postgresql/src/bin/pg_ctl/pg_ctl.c

2589 lines
66 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* pg_ctl --- start/stops/restarts the PostgreSQL server
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
*
2010-09-20 22:08:53 +02:00
* src/bin/pg_ctl/pg_ctl.c
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include <fcntl.h>
#include <signal.h>
#include <time.h>
#include <sys/stat.h>
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
#include <sys/wait.h>
#include <unistd.h>
#ifdef HAVE_SYS_RESOURCE_H
#include <sys/time.h>
#include <sys/resource.h>
#endif
#include "catalog/pg_control.h"
#include "common/controldata_utils.h"
#include "common/file_perm.h"
#include "common/logging.h"
#include "common/string.h"
#include "getopt_long.h"
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
#include "utils/pidfile.h"
#ifdef WIN32 /* on Unix, we don't need libpq */
#include "pqexpbuffer.h"
#endif
2004-06-01 03:28:03 +02:00
/* PID can be negative for standalone backend */
typedef long pgpid_t;
typedef enum
{
SMART_MODE,
FAST_MODE,
IMMEDIATE_MODE
2004-08-29 07:07:03 +02:00
} ShutdownMode;
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
typedef enum
{
POSTMASTER_READY,
POSTMASTER_STILL_STARTING,
POSTMASTER_FAILED
} WaitPMResult;
typedef enum
{
NO_COMMAND = 0,
INIT_COMMAND,
START_COMMAND,
STOP_COMMAND,
RESTART_COMMAND,
RELOAD_COMMAND,
STATUS_COMMAND,
PROMOTE_COMMAND,
LOGROTATE_COMMAND,
KILL_COMMAND,
REGISTER_COMMAND,
UNREGISTER_COMMAND,
RUN_AS_SERVICE_COMMAND
2004-08-29 07:07:03 +02:00
} CtlCommand;
#define DEFAULT_WAIT 60
#define USEC_PER_SEC 1000000
#define WAITS_PER_SEC 10 /* should divide USEC_PER_SEC evenly */
static bool do_wait = true;
static int wait_seconds = DEFAULT_WAIT;
static bool wait_seconds_arg = false;
static bool silent_mode = false;
static ShutdownMode shutdown_mode = FAST_MODE;
static int sig = SIGINT; /* default */
static CtlCommand ctl_command = NO_COMMAND;
static char *pg_data = NULL;
static char *pg_config = NULL;
static char *pgdata_opt = NULL;
static char *post_opts = NULL;
static const char *progname;
static char *log_file = NULL;
static char *exec_path = NULL;
static char *event_source = NULL;
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
static char *register_servicename = "PostgreSQL"; /* FIXME: + version ID? */
static char *register_username = NULL;
static char *register_password = NULL;
static char *argv0 = NULL;
static bool allow_core_files = false;
static time_t start_time;
static char postopts_file[MAXPGPATH];
static char version_file[MAXPGPATH];
static char pid_file[MAXPGPATH];
static char backup_file[MAXPGPATH];
static char promote_file[MAXPGPATH];
static char logrotate_file[MAXPGPATH];
Detach postmaster process from pg_ctl's session at server startup. pg_ctl is supposed to daemonize the postmaster process, so that it's not affected by signals to the launching process group. Before this patch, if you had a shell script that used "pg_ctl start", and you interrupted the shell script after postmaster had been launched, postmaster was also killed. To fix, call setsid() after forking the postmaster process. Long time ago, we had a 'silent_mode' option, which daemonized the postmaster process by calling setsid(), but that was removed back in 2011 (commit f7ea6beaf4). We discussed bringing that back in some form, but pg_ctl is the documented way of launching postmaster to the background, so putting the setsid() call in pg_ctl itself seems appropriate. Just putting postmaster in a separate session would change the behavior when you interrupt "pg_ctl -w start", e.g. with CTRL-C, while it's waiting for postmaster to start. The historical behavior has been that interrupting pg_ctl aborts the server launch, which is handy if the server is stuck in recovery, for example, and won't fully start up. To keep that behavior, install a signal handler in pg_ctl, to explicitly kill postmaster, if pg_ctl is interrupted while it's waiting for the server to start up. This isn't 100% watertight, there is a small window after forking the postmaster process, where the signal handler doesn't know the postmaster's PID yet, but seems good enough. Arguably this is a long-standing bug, but I refrained from back-batching, out of fear of breaking someone's scripts that depended on the old behavior. Reviewed by Tom Lane. Report and original patch by Paul Guo, with feedback from Michael Paquier. Discussion: https://www.postgresql.org/message-id/CAEET0ZH5Bf7dhZB3mYy8zZQttJrdZg_0Wwaj0o1PuuBny1JkEw%40mail.gmail.com
2019-01-14 13:50:58 +01:00
static volatile pgpid_t postmasterPID = -1;
#ifdef WIN32
static DWORD pgctl_start_type = SERVICE_AUTO_START;
static SERVICE_STATUS status;
static SERVICE_STATUS_HANDLE hStatus = (SERVICE_STATUS_HANDLE) 0;
static HANDLE shutdownHandles[2];
#define shutdownEvent shutdownHandles[0]
#define postmasterProcess shutdownHandles[1]
#endif
static void write_stderr(const char *fmt,...) pg_attribute_printf(1, 2);
static void do_advice(void);
static void do_help(void);
static void set_mode(char *modeopt);
static void set_sig(char *signame);
static void do_init(void);
static void do_start(void);
static void do_stop(void);
static void do_restart(void);
static void do_reload(void);
static void do_status(void);
static void do_promote(void);
static void do_logrotate(void);
2004-06-01 03:28:03 +02:00
static void do_kill(pgpid_t pid);
static void print_msg(const char *msg);
static void adjust_data_dir(void);
2004-08-29 07:07:03 +02:00
#ifdef WIN32
#include <versionhelpers.h>
2004-08-29 07:07:03 +02:00
static bool pgwin32_IsInstalled(SC_HANDLE);
static char *pgwin32_CommandLine(bool);
static void pgwin32_doRegister(void);
static void pgwin32_doUnregister(void);
static void pgwin32_SetServiceStatus(DWORD);
static void WINAPI pgwin32_ServiceHandler(DWORD);
2004-08-29 07:07:03 +02:00
static void WINAPI pgwin32_ServiceMain(DWORD, LPTSTR *);
static void pgwin32_doRunAsService(void);
static int CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo, bool as_service);
static PTOKEN_PRIVILEGES GetPrivilegesToDelete(HANDLE hToken);
#endif
static pgpid_t get_pgpid(bool is_status_request);
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
static char **readfile(const char *path, int *numlines);
static void free_readfile(char **optlines);
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
static pgpid_t start_postmaster(void);
static void read_post_opts(void);
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
static WaitPMResult wait_for_postmaster(pgpid_t pm_pid, bool do_checkpoint);
static bool postmaster_is_alive(pid_t pid);
#if defined(HAVE_GETRLIMIT) && defined(RLIMIT_CORE)
static void unlimit_core_size(void);
#endif
static DBState get_control_dbstate(void);
#ifdef WIN32
static void
write_eventlog(int level, const char *line)
{
static HANDLE evtHandle = INVALID_HANDLE_VALUE;
if (silent_mode && level == EVENTLOG_INFORMATION_TYPE)
return;
2004-08-29 07:07:03 +02:00
if (evtHandle == INVALID_HANDLE_VALUE)
{
evtHandle = RegisterEventSource(NULL,
event_source ? event_source : DEFAULT_EVENT_SOURCE);
2004-08-29 07:07:03 +02:00
if (evtHandle == NULL)
{
evtHandle = INVALID_HANDLE_VALUE;
return;
}
}
ReportEvent(evtHandle,
level,
0,
2004-08-29 07:07:03 +02:00
0, /* All events are Id 0 */
NULL,
1,
0,
&line,
NULL);
}
#endif
/*
* Write errors to stderr (or by equal means when stderr is
* not available).
*/
static void
write_stderr(const char *fmt,...)
{
2004-08-29 07:07:03 +02:00
va_list ap;
va_start(ap, fmt);
#ifndef WIN32
/* On Unix, we just fprintf to stderr */
vfprintf(stderr, fmt, ap);
#else
2004-08-29 07:07:03 +02:00
/*
* On Win32, we print to stderr if running on a console, or write to
* eventlog if running as a service
*/
if (pgwin32_is_service()) /* Running as a service */
{
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
char errbuf[2048]; /* Arbitrary size? */
vsnprintf(errbuf, sizeof(errbuf), fmt, ap);
write_eventlog(EVENTLOG_ERROR_TYPE, errbuf);
}
2004-08-29 07:07:03 +02:00
else
/* Not running as service, write to stderr */
vfprintf(stderr, fmt, ap);
#endif
va_end(ap);
}
/*
* Given an already-localized string, print it to stdout unless the
* user has specified that no messages should be printed.
*/
static void
print_msg(const char *msg)
{
if (!silent_mode)
{
fputs(msg, stdout);
fflush(stdout);
}
}
2004-06-01 03:28:03 +02:00
static pgpid_t
get_pgpid(bool is_status_request)
{
FILE *pidf;
long pid;
struct stat statbuf;
if (stat(pg_data, &statbuf) != 0)
{
if (errno == ENOENT)
2014-05-15 04:19:18 +02:00
write_stderr(_("%s: directory \"%s\" does not exist\n"), progname,
pg_data);
else
2014-05-15 04:19:18 +02:00
write_stderr(_("%s: could not access directory \"%s\": %s\n"), progname,
pg_data, strerror(errno));
/*
* The Linux Standard Base Core Specification 3.1 says this should
* return '4, program or service status is unknown'
* https://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html
*/
exit(is_status_request ? 4 : 1);
}
if (stat(version_file, &statbuf) != 0 && errno == ENOENT)
{
2014-05-15 04:19:18 +02:00
write_stderr(_("%s: directory \"%s\" is not a database cluster directory\n"),
progname, pg_data);
exit(is_status_request ? 4 : 1);
}
pidf = fopen(pid_file, "r");
if (pidf == NULL)
{
/* No pid file, not an error on startup */
if (errno == ENOENT)
return 0;
else
{
write_stderr(_("%s: could not open PID file \"%s\": %s\n"),
2004-10-12 23:54:45 +02:00
progname, pid_file, strerror(errno));
exit(1);
}
}
if (fscanf(pidf, "%ld", &pid) != 1)
{
/* Is the file empty? */
if (ftell(pidf) == 0 && feof(pidf))
write_stderr(_("%s: the PID file \"%s\" is empty\n"),
progname, pid_file);
else
write_stderr(_("%s: invalid data in PID file \"%s\"\n"),
progname, pid_file);
exit(1);
}
fclose(pidf);
return (pgpid_t) pid;
}
/*
* get the lines from a text file - return NULL if file can't be opened
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
*
* Trailing newlines are deleted from the lines (this is a change from pre-v10)
*
* *numlines is set to the number of line pointers returned; there is
* also an additional NULL pointer after the last real line.
*/
static char **
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
readfile(const char *path, int *numlines)
{
int fd;
int nlines;
char **result;
char *buffer;
char *linebegin;
int i;
int n;
int len;
struct stat statbuf;
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
*numlines = 0; /* in case of failure or empty file */
/*
* Slurp the file into memory.
*
* The file can change concurrently, so we read the whole file into memory
* with a single read() call. That's not guaranteed to get an atomic
* snapshot, but in practice, for a small file, it's close enough for the
* current use.
*/
fd = open(path, O_RDONLY | PG_BINARY, 0);
if (fd < 0)
return NULL;
if (fstat(fd, &statbuf) < 0)
{
close(fd);
return NULL;
}
if (statbuf.st_size == 0)
{
/* empty file */
close(fd);
result = (char **) pg_malloc(sizeof(char *));
*result = NULL;
return result;
}
buffer = pg_malloc(statbuf.st_size + 1);
len = read(fd, buffer, statbuf.st_size + 1);
close(fd);
if (len != statbuf.st_size)
{
/* oops, the file size changed between fstat and read */
free(buffer);
return NULL;
}
/*
* Count newlines. We expect there to be a newline after each full line,
* including one at the end of file. If there isn't a newline at the end,
* any characters after the last newline will be ignored.
*/
nlines = 0;
for (i = 0; i < len; i++)
{
if (buffer[i] == '\n')
nlines++;
}
/* set up the result buffer */
result = (char **) pg_malloc((nlines + 1) * sizeof(char *));
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
*numlines = nlines;
/* now split the buffer into lines */
linebegin = buffer;
n = 0;
for (i = 0; i < len; i++)
{
if (buffer[i] == '\n')
{
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
int slen = &buffer[i] - linebegin;
char *linebuf = pg_malloc(slen + 1);
memcpy(linebuf, linebegin, slen);
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
/* we already dropped the \n, but get rid of any \r too */
if (slen > 0 && linebuf[slen - 1] == '\r')
slen--;
linebuf[slen] = '\0';
result[n++] = linebuf;
linebegin = &buffer[i + 1];
}
}
result[n] = NULL;
free(buffer);
return result;
}
/*
* Free memory allocated for optlines through readfile()
*/
static void
free_readfile(char **optlines)
{
char *curr_line = NULL;
int i = 0;
if (!optlines)
return;
while ((curr_line = optlines[i++]))
free(curr_line);
2014-03-03 20:05:33 +01:00
free(optlines);
}
/*
* start/test/stop routines
*/
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
/*
* Start the postmaster and return its PID.
*
* Currently, on Windows what we return is the PID of the shell process
* that launched the postmaster (and, we trust, is waiting for it to exit).
* So the PID is usable for "is the postmaster still running" checks,
* but cannot be compared directly to postmaster.pid.
*
* On Windows, we also save aside a handle to the shell process in
* "postmasterProcess", which the caller should close when done with it.
*/
static pgpid_t
start_postmaster(void)
{
char cmd[MAXPGPATH];
2006-10-04 02:30:14 +02:00
#ifndef WIN32
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
pgpid_t pm_pid;
/* Flush stdio channels just before fork, to avoid double-output problems */
fflush(stdout);
fflush(stderr);
pm_pid = fork();
if (pm_pid < 0)
{
/* fork failed */
write_stderr(_("%s: could not start server: %s\n"),
progname, strerror(errno));
exit(1);
}
if (pm_pid > 0)
{
/* fork succeeded, in parent */
return pm_pid;
}
/* fork succeeded, in child */
2006-10-04 02:30:14 +02:00
Detach postmaster process from pg_ctl's session at server startup. pg_ctl is supposed to daemonize the postmaster process, so that it's not affected by signals to the launching process group. Before this patch, if you had a shell script that used "pg_ctl start", and you interrupted the shell script after postmaster had been launched, postmaster was also killed. To fix, call setsid() after forking the postmaster process. Long time ago, we had a 'silent_mode' option, which daemonized the postmaster process by calling setsid(), but that was removed back in 2011 (commit f7ea6beaf4). We discussed bringing that back in some form, but pg_ctl is the documented way of launching postmaster to the background, so putting the setsid() call in pg_ctl itself seems appropriate. Just putting postmaster in a separate session would change the behavior when you interrupt "pg_ctl -w start", e.g. with CTRL-C, while it's waiting for postmaster to start. The historical behavior has been that interrupting pg_ctl aborts the server launch, which is handy if the server is stuck in recovery, for example, and won't fully start up. To keep that behavior, install a signal handler in pg_ctl, to explicitly kill postmaster, if pg_ctl is interrupted while it's waiting for the server to start up. This isn't 100% watertight, there is a small window after forking the postmaster process, where the signal handler doesn't know the postmaster's PID yet, but seems good enough. Arguably this is a long-standing bug, but I refrained from back-batching, out of fear of breaking someone's scripts that depended on the old behavior. Reviewed by Tom Lane. Report and original patch by Paul Guo, with feedback from Michael Paquier. Discussion: https://www.postgresql.org/message-id/CAEET0ZH5Bf7dhZB3mYy8zZQttJrdZg_0Wwaj0o1PuuBny1JkEw%40mail.gmail.com
2019-01-14 13:50:58 +01:00
/*
* If possible, detach the postmaster process from the launching process
* group and make it a group leader, so that it doesn't get signaled along
* with the current group that launched it.
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
{
write_stderr(_("%s: could not start server due to setsid() failure: %s\n"),
progname, strerror(errno));
exit(1);
}
#endif
/*
2005-10-15 04:49:52 +02:00
* Since there might be quotes to handle here, it is easier simply to pass
* everything to a shell to process them. Use exec so that the postmaster
* has the same PID as the current child process.
*/
if (log_file != NULL)
snprintf(cmd, MAXPGPATH, "exec \"%s\" %s%s < \"%s\" >> \"%s\" 2>&1",
exec_path, pgdata_opt, post_opts,
DEVNULL, log_file);
2006-10-04 02:30:14 +02:00
else
snprintf(cmd, MAXPGPATH, "exec \"%s\" %s%s < \"%s\" 2>&1",
exec_path, pgdata_opt, post_opts, DEVNULL);
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
(void) execl("/bin/sh", "/bin/sh", "-c", cmd, (char *) NULL);
/* exec failed */
write_stderr(_("%s: could not start server: %s\n"),
progname, strerror(errno));
exit(1);
return 0; /* keep dumb compilers quiet */
2006-10-04 02:30:14 +02:00
#else /* WIN32 */
/*
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
* As with the Unix case, it's easiest to use the shell (CMD.EXE) to
* handle redirection etc. Unfortunately CMD.EXE lacks any equivalent of
* "exec", so we don't get to find out the postmaster's PID immediately.
2006-10-04 02:30:14 +02:00
*/
PROCESS_INFORMATION pi;
const char *comspec;
/* Find CMD.EXE location using COMSPEC, if it's set */
comspec = getenv("COMSPEC");
if (comspec == NULL)
comspec = "CMD";
2006-10-04 02:30:14 +02:00
if (log_file != NULL)
{
/*
* First, open the log file if it exists. The idea is that if the
* file is still locked by a previous postmaster run, we'll wait until
* it comes free, instead of failing with ERROR_SHARING_VIOLATION.
* (It'd be better to open the file in a sharing-friendly mode, but we
* can't use CMD.EXE to do that, so work around it. Note that the
* previous postmaster will still have the file open for a short time
* after removing postmaster.pid.)
*
* If the log file doesn't exist, we *must not* create it here. If we
* were launched with higher privileges than the restricted process
* will have, the log file might end up with permissions settings that
* prevent the postmaster from writing on it.
*/
int fd = open(log_file, O_RDWR, 0);
if (fd == -1)
{
/*
* ENOENT is expectable since we didn't use O_CREAT. Otherwise
* complain. We could just fall through and let CMD.EXE report
* the problem, but its error reporting is pretty miserable.
*/
if (errno != ENOENT)
{
write_stderr(_("%s: could not open log file \"%s\": %s\n"),
progname, log_file, strerror(errno));
exit(1);
}
}
else
close(fd);
snprintf(cmd, MAXPGPATH, "\"%s\" /C \"\"%s\" %s%s < \"%s\" >> \"%s\" 2>&1\"",
comspec, exec_path, pgdata_opt, post_opts, DEVNULL, log_file);
}
2006-10-04 02:30:14 +02:00
else
snprintf(cmd, MAXPGPATH, "\"%s\" /C \"\"%s\" %s%s < \"%s\" 2>&1\"",
comspec, exec_path, pgdata_opt, post_opts, DEVNULL);
if (!CreateRestrictedProcess(cmd, &pi, false))
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
{
write_stderr(_("%s: could not start server: error code %lu\n"),
progname, (unsigned long) GetLastError());
exit(1);
}
/* Don't close command process handle here; caller must do so */
postmasterProcess = pi.hProcess;
2006-10-04 02:30:14 +02:00
CloseHandle(pi.hThread);
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
return pi.dwProcessId; /* Shell's PID, not postmaster's! */
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
#endif /* WIN32 */
}
/*
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
* Wait for the postmaster to become ready.
*
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
* On Unix, pm_pid is the PID of the just-launched postmaster. On Windows,
* it may be the PID of an ancestor shell process, so we can't check the
* contents of postmaster.pid quite as carefully.
*
* On Windows, the static variable postmasterProcess is an implicit argument
* to this routine; it contains a handle to the postmaster process or an
* ancestor shell process thereof.
*
* Note that the checkpoint parameter enables a Windows service control
* manager checkpoint, it's got nothing to do with database checkpoints!!
*/
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
static WaitPMResult
wait_for_postmaster(pgpid_t pm_pid, bool do_checkpoint)
{
int i;
for (i = 0; i < wait_seconds * WAITS_PER_SEC; i++)
{
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
char **optlines;
int numlines;
/*
* Try to read the postmaster.pid file. If it's not valid, or if the
* status line isn't there yet, just keep waiting.
*/
if ((optlines = readfile(pid_file, &numlines)) != NULL &&
numlines >= LOCK_FILE_LINE_PM_STATUS)
{
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
/* File is complete enough for us, parse it */
pgpid_t pmpid;
time_t pmstart;
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
/*
* Make sanity checks. If it's for the wrong PID, or the recorded
* start time is before pg_ctl started, then either we are looking
* at the wrong data directory, or this is a pre-existing pidfile
* that hasn't (yet?) been overwritten by our child postmaster.
* Allow 2 seconds slop for possible cross-process clock skew.
*/
pmpid = atol(optlines[LOCK_FILE_LINE_PID - 1]);
pmstart = atol(optlines[LOCK_FILE_LINE_START_TIME - 1]);
if (pmstart >= start_time - 2 &&
#ifndef WIN32
pmpid == pm_pid
#else
/* Windows can only reject standalone-backend PIDs */
pmpid > 0
#endif
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
)
{
/*
* OK, seems to be a valid pidfile from our child. Check the
* status line (this assumes a v10 or later server).
*/
char *pmstatus = optlines[LOCK_FILE_LINE_PM_STATUS - 1];
if (strcmp(pmstatus, PM_STATUS_READY) == 0 ||
strcmp(pmstatus, PM_STATUS_STANDBY) == 0)
{
/* postmaster is done starting up */
free_readfile(optlines);
return POSTMASTER_READY;
}
}
}
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
/*
* Free the results of readfile.
*
* This is safe to call even if optlines is NULL.
*/
free_readfile(optlines);
/*
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
* Check whether the child postmaster process is still alive. This
* lets us exit early if the postmaster fails during startup.
*
* On Windows, we may be checking the postmaster's parent shell, but
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
* that's fine for this purpose.
*/
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
#ifndef WIN32
{
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
int exitstatus;
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
if (waitpid((pid_t) pm_pid, &exitstatus, WNOHANG) == (pid_t) pm_pid)
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
return POSTMASTER_FAILED;
}
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
#else
if (WaitForSingleObject(postmasterProcess, 0) == WAIT_OBJECT_0)
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
return POSTMASTER_FAILED;
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
#endif
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
/* Startup still in process; wait, printing a dot once per second */
if (i % WAITS_PER_SEC == 0)
{
#ifdef WIN32
if (do_checkpoint)
{
/*
* Increment the wait hint by 6 secs (connection timeout +
* sleep). We must do this to indicate to the SCM that our
* startup time is changing, otherwise it'll usually send a
* stop signal after 20 seconds, despite incrementing the
* checkpoint counter.
*/
status.dwWaitHint += 6000;
status.dwCheckPoint++;
SetServiceStatus(hStatus, (LPSERVICE_STATUS) &status);
}
else
#endif
print_msg(".");
}
pg_usleep(USEC_PER_SEC / WAITS_PER_SEC);
}
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
/* out of patience; report that postmaster is still starting up */
return POSTMASTER_STILL_STARTING;
}
#if defined(HAVE_GETRLIMIT) && defined(RLIMIT_CORE)
2007-11-15 22:14:46 +01:00
static void
unlimit_core_size(void)
{
struct rlimit lim;
2007-11-15 22:14:46 +01:00
getrlimit(RLIMIT_CORE, &lim);
if (lim.rlim_max == 0)
{
2007-11-15 22:14:46 +01:00
write_stderr(_("%s: cannot set core file size limit; disallowed by hard limit\n"),
progname);
return;
}
else if (lim.rlim_max == RLIM_INFINITY || lim.rlim_cur < lim.rlim_max)
{
lim.rlim_cur = lim.rlim_max;
2007-11-15 22:14:46 +01:00
setrlimit(RLIMIT_CORE, &lim);
}
}
#endif
static void
read_post_opts(void)
{
if (post_opts == NULL)
{
post_opts = ""; /* default */
if (ctl_command == RESTART_COMMAND)
{
char **optlines;
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
int numlines;
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
optlines = readfile(postopts_file, &numlines);
if (optlines == NULL)
{
2004-10-12 23:54:45 +02:00
write_stderr(_("%s: could not read file \"%s\"\n"), progname, postopts_file);
exit(1);
}
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
else if (numlines != 1)
{
write_stderr(_("%s: option file \"%s\" must have exactly one line\n"),
progname, postopts_file);
exit(1);
}
else
{
char *optline;
char *arg1;
2004-08-29 07:07:03 +02:00
optline = optlines[0];
/*
* Are we at the first option, as defined by space and
* double-quote?
*/
if ((arg1 = strstr(optline, " \"")) != NULL)
{
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
*arg1 = '\0'; /* terminate so we get only program name */
post_opts = pg_strdup(arg1 + 1); /* point past whitespace */
}
if (exec_path == NULL)
exec_path = pg_strdup(optline);
}
/* Free the results of readfile. */
free_readfile(optlines);
}
}
}
Detach postmaster process from pg_ctl's session at server startup. pg_ctl is supposed to daemonize the postmaster process, so that it's not affected by signals to the launching process group. Before this patch, if you had a shell script that used "pg_ctl start", and you interrupted the shell script after postmaster had been launched, postmaster was also killed. To fix, call setsid() after forking the postmaster process. Long time ago, we had a 'silent_mode' option, which daemonized the postmaster process by calling setsid(), but that was removed back in 2011 (commit f7ea6beaf4). We discussed bringing that back in some form, but pg_ctl is the documented way of launching postmaster to the background, so putting the setsid() call in pg_ctl itself seems appropriate. Just putting postmaster in a separate session would change the behavior when you interrupt "pg_ctl -w start", e.g. with CTRL-C, while it's waiting for postmaster to start. The historical behavior has been that interrupting pg_ctl aborts the server launch, which is handy if the server is stuck in recovery, for example, and won't fully start up. To keep that behavior, install a signal handler in pg_ctl, to explicitly kill postmaster, if pg_ctl is interrupted while it's waiting for the server to start up. This isn't 100% watertight, there is a small window after forking the postmaster process, where the signal handler doesn't know the postmaster's PID yet, but seems good enough. Arguably this is a long-standing bug, but I refrained from back-batching, out of fear of breaking someone's scripts that depended on the old behavior. Reviewed by Tom Lane. Report and original patch by Paul Guo, with feedback from Michael Paquier. Discussion: https://www.postgresql.org/message-id/CAEET0ZH5Bf7dhZB3mYy8zZQttJrdZg_0Wwaj0o1PuuBny1JkEw%40mail.gmail.com
2019-01-14 13:50:58 +01:00
/*
* SIGINT signal handler used while waiting for postmaster to start up.
* Forwards the SIGINT to the postmaster process, asking it to shut down,
* before terminating pg_ctl itself. This way, if the user hits CTRL-C while
* waiting for the server to start up, the server launch is aborted.
*/
static void
trap_sigint_during_startup(int sig)
{
if (postmasterPID != -1)
{
if (kill(postmasterPID, SIGINT) != 0)
write_stderr(_("%s: could not send stop signal (PID: %ld): %s\n"),
progname, (pgpid_t) postmasterPID, strerror(errno));
}
/*
* Clear the signal handler, and send the signal again, to terminate the
* process as normal.
*/
pqsignal(SIGINT, SIG_DFL);
raise(SIGINT);
}
static char *
find_other_exec_or_die(const char *argv0, const char *target, const char *versionstr)
{
int ret;
char *found_path;
found_path = pg_malloc(MAXPGPATH);
if ((ret = find_other_exec(argv0, target, versionstr, found_path)) < 0)
{
char full_path[MAXPGPATH];
if (find_my_exec(argv0, full_path) < 0)
strlcpy(full_path, progname, sizeof(full_path));
if (ret == -1)
write_stderr(_("The program \"%s\" is needed by %s "
"but was not found in the\n"
"same directory as \"%s\".\n"
"Check your installation.\n"),
target, progname, full_path);
else
write_stderr(_("The program \"%s\" was found by \"%s\"\n"
"but was not the same version as %s.\n"
"Check your installation.\n"),
target, full_path, progname);
exit(1);
}
return found_path;
}
static void
do_init(void)
{
2010-02-26 03:01:40 +01:00
char cmd[MAXPGPATH];
if (exec_path == NULL)
exec_path = find_other_exec_or_die(argv0, "initdb", "initdb (PostgreSQL) " PG_VERSION "\n");
if (pgdata_opt == NULL)
pgdata_opt = "";
if (post_opts == NULL)
post_opts = "";
if (!silent_mode)
snprintf(cmd, MAXPGPATH, "\"%s\" %s%s",
exec_path, pgdata_opt, post_opts);
else
snprintf(cmd, MAXPGPATH, "\"%s\" %s%s > \"%s\"",
exec_path, pgdata_opt, post_opts, DEVNULL);
2010-02-26 03:01:40 +01:00
if (system(cmd) != 0)
{
write_stderr(_("%s: database system initialization failed\n"), progname);
exit(1);
}
}
static void
do_start(void)
{
pgpid_t old_pid = 0;
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
pgpid_t pm_pid;
if (ctl_command != RESTART_COMMAND)
{
old_pid = get_pgpid(false);
if (old_pid != 0)
write_stderr(_("%s: another server might be running; "
"trying to start server anyway\n"),
progname);
}
read_post_opts();
/* No -D or -D already added during server start */
if (ctl_command == RESTART_COMMAND || pgdata_opt == NULL)
2004-08-29 07:07:03 +02:00
pgdata_opt = "";
if (exec_path == NULL)
exec_path = find_other_exec_or_die(argv0, "postgres", PG_BACKEND_VERSIONSTR);
#if defined(HAVE_GETRLIMIT) && defined(RLIMIT_CORE)
if (allow_core_files)
unlimit_core_size();
#endif
/*
* If possible, tell the postmaster our parent shell's PID (see the
* comments in CreateLockFile() for motivation). Windows hasn't got
* getppid() unfortunately.
*/
#ifndef WIN32
{
static char env_var[32];
snprintf(env_var, sizeof(env_var), "PG_GRANDPARENT_PID=%d",
(int) getppid());
putenv(env_var);
}
#endif
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
pm_pid = start_postmaster();
if (do_wait)
{
Detach postmaster process from pg_ctl's session at server startup. pg_ctl is supposed to daemonize the postmaster process, so that it's not affected by signals to the launching process group. Before this patch, if you had a shell script that used "pg_ctl start", and you interrupted the shell script after postmaster had been launched, postmaster was also killed. To fix, call setsid() after forking the postmaster process. Long time ago, we had a 'silent_mode' option, which daemonized the postmaster process by calling setsid(), but that was removed back in 2011 (commit f7ea6beaf4). We discussed bringing that back in some form, but pg_ctl is the documented way of launching postmaster to the background, so putting the setsid() call in pg_ctl itself seems appropriate. Just putting postmaster in a separate session would change the behavior when you interrupt "pg_ctl -w start", e.g. with CTRL-C, while it's waiting for postmaster to start. The historical behavior has been that interrupting pg_ctl aborts the server launch, which is handy if the server is stuck in recovery, for example, and won't fully start up. To keep that behavior, install a signal handler in pg_ctl, to explicitly kill postmaster, if pg_ctl is interrupted while it's waiting for the server to start up. This isn't 100% watertight, there is a small window after forking the postmaster process, where the signal handler doesn't know the postmaster's PID yet, but seems good enough. Arguably this is a long-standing bug, but I refrained from back-batching, out of fear of breaking someone's scripts that depended on the old behavior. Reviewed by Tom Lane. Report and original patch by Paul Guo, with feedback from Michael Paquier. Discussion: https://www.postgresql.org/message-id/CAEET0ZH5Bf7dhZB3mYy8zZQttJrdZg_0Wwaj0o1PuuBny1JkEw%40mail.gmail.com
2019-01-14 13:50:58 +01:00
/*
* If the user interrupts the startup (e.g. with CTRL-C), we'd like to
* abort the server launch. Install a signal handler that will
* forward SIGINT to the postmaster process, while we wait.
*
* (We don't bother to reset the signal handler after the launch, as
* we're about to exit, anyway.)
*/
postmasterPID = pm_pid;
pqsignal(SIGINT, trap_sigint_during_startup);
print_msg(_("waiting for server to start..."));
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
switch (wait_for_postmaster(pm_pid, false))
{
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
case POSTMASTER_READY:
print_msg(_(" done\n"));
print_msg(_("server started\n"));
break;
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
case POSTMASTER_STILL_STARTING:
print_msg(_(" stopped waiting\n"));
write_stderr(_("%s: server did not start in time\n"),
progname);
exit(1);
break;
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
case POSTMASTER_FAILED:
print_msg(_(" stopped waiting\n"));
write_stderr(_("%s: could not start server\n"
"Examine the log output.\n"),
progname);
exit(1);
break;
}
}
else
print_msg(_("server starting\n"));
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
#ifdef WIN32
/* Now we don't need the handle to the shell process anymore */
CloseHandle(postmasterProcess);
postmasterProcess = INVALID_HANDLE_VALUE;
#endif
}
static void
do_stop(void)
{
int cnt;
2004-06-01 03:28:03 +02:00
pgpid_t pid;
struct stat statbuf;
pid = get_pgpid(false);
if (pid == 0) /* no pid file */
{
2004-10-12 23:54:45 +02:00
write_stderr(_("%s: PID file \"%s\" does not exist\n"), progname, pid_file);
write_stderr(_("Is server running?\n"));
exit(1);
}
else if (pid < 0) /* standalone backend, not postmaster */
{
pid = -pid;
write_stderr(_("%s: cannot stop server; "
"single-user server is running (PID: %ld)\n"),
progname, pid);
exit(1);
}
2004-06-01 03:28:03 +02:00
if (kill((pid_t) pid, sig) != 0)
{
2004-10-12 23:54:45 +02:00
write_stderr(_("%s: could not send stop signal (PID: %ld): %s\n"), progname, pid,
2004-08-29 07:07:03 +02:00
strerror(errno));
exit(1);
}
if (!do_wait)
{
print_msg(_("server shutting down\n"));
return;
}
else
{
/*
2011-04-10 17:42:00 +02:00
* If backup_label exists, an online backup is running. Warn the user
* that smart shutdown will wait for it to finish. However, if the
* server is in archive recovery, we're recovering from an online
* backup instead of performing one.
*/
if (shutdown_mode == SMART_MODE &&
stat(backup_file, &statbuf) == 0 &&
get_control_dbstate() != DB_IN_ARCHIVE_RECOVERY)
{
print_msg(_("WARNING: online backup mode is active\n"
"Shutdown will not complete until pg_stop_backup() is called.\n\n"));
}
print_msg(_("waiting for server to shut down..."));
2004-08-29 07:07:03 +02:00
for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++)
{
if ((pid = get_pgpid(false)) != 0)
{
if (cnt % WAITS_PER_SEC == 0)
print_msg(".");
pg_usleep(USEC_PER_SEC / WAITS_PER_SEC);
}
else
break;
}
2004-08-29 07:07:03 +02:00
if (pid != 0) /* pid file still exists */
{
print_msg(_(" failed\n"));
2004-08-29 07:07:03 +02:00
write_stderr(_("%s: server does not shut down\n"), progname);
if (shutdown_mode == SMART_MODE)
write_stderr(_("HINT: The \"-m fast\" option immediately disconnects sessions rather than\n"
"waiting for session-initiated disconnection.\n"));
exit(1);
}
print_msg(_(" done\n"));
print_msg(_("server stopped\n"));
}
}
/*
* restart/reload routines
*/
static void
do_restart(void)
{
int cnt;
2004-06-01 03:28:03 +02:00
pgpid_t pid;
struct stat statbuf;
pid = get_pgpid(false);
if (pid == 0) /* no pid file */
{
write_stderr(_("%s: PID file \"%s\" does not exist\n"),
progname, pid_file);
write_stderr(_("Is server running?\n"));
write_stderr(_("trying to start server anyway\n"));
do_start();
return;
}
else if (pid < 0) /* standalone backend, not postmaster */
{
pid = -pid;
if (postmaster_is_alive((pid_t) pid))
{
write_stderr(_("%s: cannot restart server; "
"single-user server is running (PID: %ld)\n"),
progname, pid);
write_stderr(_("Please terminate the single-user server and try again.\n"));
exit(1);
}
}
if (postmaster_is_alive((pid_t) pid))
{
if (kill((pid_t) pid, sig) != 0)
{
write_stderr(_("%s: could not send stop signal (PID: %ld): %s\n"), progname, pid,
strerror(errno));
exit(1);
}
/*
2011-04-10 17:42:00 +02:00
* If backup_label exists, an online backup is running. Warn the user
* that smart shutdown will wait for it to finish. However, if the
* server is in archive recovery, we're recovering from an online
* backup instead of performing one.
*/
if (shutdown_mode == SMART_MODE &&
stat(backup_file, &statbuf) == 0 &&
get_control_dbstate() != DB_IN_ARCHIVE_RECOVERY)
{
print_msg(_("WARNING: online backup mode is active\n"
"Shutdown will not complete until pg_stop_backup() is called.\n\n"));
}
print_msg(_("waiting for server to shut down..."));
2005-10-15 04:49:52 +02:00
/* always wait for restart */
for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++)
{
if ((pid = get_pgpid(false)) != 0)
{
if (cnt % WAITS_PER_SEC == 0)
print_msg(".");
pg_usleep(USEC_PER_SEC / WAITS_PER_SEC);
}
else
break;
}
2005-10-15 04:49:52 +02:00
if (pid != 0) /* pid file still exists */
{
print_msg(_(" failed\n"));
write_stderr(_("%s: server does not shut down\n"), progname);
if (shutdown_mode == SMART_MODE)
write_stderr(_("HINT: The \"-m fast\" option immediately disconnects sessions rather than\n"
"waiting for session-initiated disconnection.\n"));
exit(1);
}
print_msg(_(" done\n"));
print_msg(_("server stopped\n"));
}
else
{
write_stderr(_("%s: old server process (PID: %ld) seems to be gone\n"),
progname, pid);
write_stderr(_("starting server anyway\n"));
}
do_start();
}
static void
do_reload(void)
{
pgpid_t pid;
pid = get_pgpid(false);
if (pid == 0) /* no pid file */
{
write_stderr(_("%s: PID file \"%s\" does not exist\n"), progname, pid_file);
write_stderr(_("Is server running?\n"));
exit(1);
}
else if (pid < 0) /* standalone backend, not postmaster */
{
pid = -pid;
write_stderr(_("%s: cannot reload server; "
"single-user server is running (PID: %ld)\n"),
progname, pid);
write_stderr(_("Please terminate the single-user server and try again.\n"));
exit(1);
}
if (kill((pid_t) pid, sig) != 0)
{
write_stderr(_("%s: could not send reload signal (PID: %ld): %s\n"),
progname, pid, strerror(errno));
exit(1);
}
print_msg(_("server signaled\n"));
}
/*
* promote
*/
static void
do_promote(void)
{
FILE *prmfile;
pgpid_t pid;
pid = get_pgpid(false);
if (pid == 0) /* no pid file */
{
write_stderr(_("%s: PID file \"%s\" does not exist\n"), progname, pid_file);
write_stderr(_("Is server running?\n"));
exit(1);
}
else if (pid < 0) /* standalone backend, not postmaster */
{
pid = -pid;
write_stderr(_("%s: cannot promote server; "
"single-user server is running (PID: %ld)\n"),
progname, pid);
exit(1);
}
if (get_control_dbstate() != DB_IN_ARCHIVE_RECOVERY)
{
write_stderr(_("%s: cannot promote server; "
"server is not in standby mode\n"),
progname);
exit(1);
}
/*
* For 9.3 onwards, "fast" promotion is performed. Promotion with a full
* checkpoint is still possible by writing a file called
* "fallback_promote" instead of "promote"
*/
snprintf(promote_file, MAXPGPATH, "%s/promote", pg_data);
if ((prmfile = fopen(promote_file, "w")) == NULL)
{
write_stderr(_("%s: could not create promote signal file \"%s\": %s\n"),
progname, promote_file, strerror(errno));
exit(1);
}
if (fclose(prmfile))
{
write_stderr(_("%s: could not write promote signal file \"%s\": %s\n"),
progname, promote_file, strerror(errno));
exit(1);
}
sig = SIGUSR1;
if (kill((pid_t) pid, sig) != 0)
{
write_stderr(_("%s: could not send promote signal (PID: %ld): %s\n"),
progname, pid, strerror(errno));
if (unlink(promote_file) != 0)
write_stderr(_("%s: could not remove promote signal file \"%s\": %s\n"),
progname, promote_file, strerror(errno));
exit(1);
}
if (do_wait)
{
DBState state = DB_STARTUP;
int cnt;
print_msg(_("waiting for server to promote..."));
for (cnt = 0; cnt < wait_seconds * WAITS_PER_SEC; cnt++)
{
state = get_control_dbstate();
if (state == DB_IN_PRODUCTION)
break;
if (cnt % WAITS_PER_SEC == 0)
print_msg(".");
pg_usleep(USEC_PER_SEC / WAITS_PER_SEC);
}
if (state == DB_IN_PRODUCTION)
{
print_msg(_(" done\n"));
print_msg(_("server promoted\n"));
}
else
{
print_msg(_(" stopped waiting\n"));
write_stderr(_("%s: server did not promote in time\n"),
progname);
exit(1);
}
}
else
print_msg(_("server promoting\n"));
}
/*
* log rotate
*/
static void
do_logrotate(void)
{
FILE *logrotatefile;
pgpid_t pid;
pid = get_pgpid(false);
if (pid == 0) /* no pid file */
{
write_stderr(_("%s: PID file \"%s\" does not exist\n"), progname, pid_file);
write_stderr(_("Is server running?\n"));
exit(1);
}
else if (pid < 0) /* standalone backend, not postmaster */
{
pid = -pid;
write_stderr(_("%s: cannot rotate log file; "
"single-user server is running (PID: %ld)\n"),
progname, pid);
exit(1);
}
snprintf(logrotate_file, MAXPGPATH, "%s/logrotate", pg_data);
if ((logrotatefile = fopen(logrotate_file, "w")) == NULL)
{
write_stderr(_("%s: could not create log rotation signal file \"%s\": %s\n"),
progname, logrotate_file, strerror(errno));
exit(1);
}
if (fclose(logrotatefile))
{
write_stderr(_("%s: could not write log rotation signal file \"%s\": %s\n"),
progname, logrotate_file, strerror(errno));
exit(1);
}
sig = SIGUSR1;
if (kill((pid_t) pid, sig) != 0)
{
write_stderr(_("%s: could not send log rotation signal (PID: %ld): %s\n"),
progname, pid, strerror(errno));
if (unlink(logrotate_file) != 0)
write_stderr(_("%s: could not remove log rotation signal file \"%s\": %s\n"),
progname, logrotate_file, strerror(errno));
exit(1);
}
print_msg(_("server signaled to rotate log file\n"));
}
/*
* utility routines
*/
static bool
postmaster_is_alive(pid_t pid)
{
/*
* Test to see if the process is still there. Note that we do not
* consider an EPERM failure to mean that the process is still there;
2005-10-15 04:49:52 +02:00
* EPERM must mean that the given PID belongs to some other userid, and
* considering the permissions on $PGDATA, that means it's not the
* postmaster we are after.
*
* Don't believe that our own PID or parent shell's PID is the postmaster,
* either. (Windows hasn't got getppid(), though.)
*/
if (pid == getpid())
return false;
#ifndef WIN32
if (pid == getppid())
return false;
#endif
if (kill(pid, 0) == 0)
return true;
return false;
}
static void
do_status(void)
{
2004-06-01 03:28:03 +02:00
pgpid_t pid;
pid = get_pgpid(true);
/* Is there a pid file? */
if (pid != 0)
{
/* standalone backend? */
if (pid < 0)
{
pid = -pid;
if (postmaster_is_alive((pid_t) pid))
{
printf(_("%s: single-user server is running (PID: %ld)\n"),
progname, pid);
return;
}
}
2005-10-15 04:49:52 +02:00
else
/* must be a postmaster */
{
if (postmaster_is_alive((pid_t) pid))
{
char **optlines;
char **curr_line;
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
int numlines;
printf(_("%s: server is running (PID: %ld)\n"),
progname, pid);
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
optlines = readfile(postopts_file, &numlines);
if (optlines != NULL)
{
for (curr_line = optlines; *curr_line != NULL; curr_line++)
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
puts(*curr_line);
/* Free the results of readfile */
free_readfile(optlines);
}
return;
}
}
}
printf(_("%s: no server running\n"), progname);
/*
* The Linux Standard Base Core Specification 3.1 says this should return
* '3, program is not running'
* https://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html
*/
exit(3);
}
static void
2004-06-01 03:28:03 +02:00
do_kill(pgpid_t pid)
{
2004-06-01 03:28:03 +02:00
if (kill((pid_t) pid, sig) != 0)
{
write_stderr(_("%s: could not send signal %d (PID: %ld): %s\n"),
progname, sig, pid, strerror(errno));
exit(1);
}
}
#ifdef WIN32
2004-08-29 07:07:03 +02:00
static bool
pgwin32_IsInstalled(SC_HANDLE hSCM)
{
2004-08-29 07:07:03 +02:00
SC_HANDLE hService = OpenService(hSCM, register_servicename, SERVICE_QUERY_CONFIG);
bool bResult = (hService != NULL);
if (bResult)
CloseServiceHandle(hService);
return bResult;
}
2004-08-29 07:07:03 +02:00
static char *
pgwin32_CommandLine(bool registration)
{
PQExpBuffer cmdLine = createPQExpBuffer();
char cmdPath[MAXPGPATH];
2004-08-29 07:07:03 +02:00
int ret;
2005-10-15 04:49:52 +02:00
if (registration)
{
ret = find_my_exec(argv0, cmdPath);
if (ret != 0)
{
write_stderr(_("%s: could not find own program executable\n"), progname);
exit(1);
}
}
else
{
ret = find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
cmdPath);
if (ret != 0)
{
write_stderr(_("%s: could not find postgres program executable\n"), progname);
exit(1);
}
}
/* if path does not end in .exe, append it */
if (strlen(cmdPath) < 4 ||
pg_strcasecmp(cmdPath + strlen(cmdPath) - 4, ".exe") != 0)
snprintf(cmdPath + strlen(cmdPath), sizeof(cmdPath) - strlen(cmdPath),
".exe");
/* use backslashes in path to avoid problems with some third-party tools */
make_native_path(cmdPath);
/* be sure to double-quote the executable's name in the command */
appendPQExpBuffer(cmdLine, "\"%s\"", cmdPath);
/* append assorted switches to the command line, as needed */
if (registration)
appendPQExpBuffer(cmdLine, " runservice -N \"%s\"",
register_servicename);
if (pg_config)
{
/* We need the -D path to be absolute */
char *dataDir;
if ((dataDir = make_absolute_path(pg_config)) == NULL)
{
/* make_absolute_path already reported the error */
exit(1);
}
make_native_path(dataDir);
appendPQExpBuffer(cmdLine, " -D \"%s\"", dataDir);
free(dataDir);
}
if (registration && event_source != NULL)
appendPQExpBuffer(cmdLine, " -e \"%s\"", event_source);
if (registration && do_wait)
appendPQExpBufferStr(cmdLine, " -w");
2004-08-29 07:07:03 +02:00
/* Don't propagate a value from an environment variable. */
if (registration && wait_seconds_arg && wait_seconds != DEFAULT_WAIT)
appendPQExpBuffer(cmdLine, " -t %d", wait_seconds);
if (registration && silent_mode)
appendPQExpBufferStr(cmdLine, " -s");
if (post_opts)
{
if (registration)
appendPQExpBuffer(cmdLine, " -o \"%s\"", post_opts);
else
appendPQExpBuffer(cmdLine, " %s", post_opts);
}
return cmdLine->data;
}
static void
pgwin32_doRegister(void)
{
2004-08-29 07:07:03 +02:00
SC_HANDLE hService;
SC_HANDLE hSCM = OpenSCManager(NULL, NULL, SC_MANAGER_ALL_ACCESS);
if (hSCM == NULL)
{
2004-10-12 23:54:45 +02:00
write_stderr(_("%s: could not open service manager\n"), progname);
exit(1);
}
if (pgwin32_IsInstalled(hSCM))
{
CloseServiceHandle(hSCM);
2004-10-12 23:54:45 +02:00
write_stderr(_("%s: service \"%s\" already registered\n"), progname, register_servicename);
exit(1);
}
if ((hService = CreateService(hSCM, register_servicename, register_servicename,
SERVICE_ALL_ACCESS, SERVICE_WIN32_OWN_PROCESS,
pgctl_start_type, SERVICE_ERROR_NORMAL,
pgwin32_CommandLine(true),
NULL, NULL, "RPCSS\0", register_username, register_password)) == NULL)
{
CloseServiceHandle(hSCM);
write_stderr(_("%s: could not register service \"%s\": error code %lu\n"),
progname, register_servicename,
(unsigned long) GetLastError());
exit(1);
}
CloseServiceHandle(hService);
CloseServiceHandle(hSCM);
}
static void
pgwin32_doUnregister(void)
{
2004-08-29 07:07:03 +02:00
SC_HANDLE hService;
SC_HANDLE hSCM = OpenSCManager(NULL, NULL, SC_MANAGER_ALL_ACCESS);
if (hSCM == NULL)
{
2004-10-12 23:54:45 +02:00
write_stderr(_("%s: could not open service manager\n"), progname);
exit(1);
}
if (!pgwin32_IsInstalled(hSCM))
{
CloseServiceHandle(hSCM);
2004-10-12 23:54:45 +02:00
write_stderr(_("%s: service \"%s\" not registered\n"), progname, register_servicename);
exit(1);
}
if ((hService = OpenService(hSCM, register_servicename, DELETE)) == NULL)
{
CloseServiceHandle(hSCM);
write_stderr(_("%s: could not open service \"%s\": error code %lu\n"),
progname, register_servicename,
(unsigned long) GetLastError());
exit(1);
}
2004-08-29 07:07:03 +02:00
if (!DeleteService(hService))
{
CloseServiceHandle(hService);
CloseServiceHandle(hSCM);
write_stderr(_("%s: could not unregister service \"%s\": error code %lu\n"),
progname, register_servicename,
(unsigned long) GetLastError());
exit(1);
}
CloseServiceHandle(hService);
CloseServiceHandle(hSCM);
}
2004-08-29 07:07:03 +02:00
static void
pgwin32_SetServiceStatus(DWORD currentState)
{
status.dwCurrentState = currentState;
SetServiceStatus(hStatus, (LPSERVICE_STATUS) &status);
}
2004-08-29 07:07:03 +02:00
static void WINAPI
pgwin32_ServiceHandler(DWORD request)
{
switch (request)
{
case SERVICE_CONTROL_STOP:
case SERVICE_CONTROL_SHUTDOWN:
2004-08-29 07:07:03 +02:00
/*
2005-10-15 04:49:52 +02:00
* We only need a short wait hint here as it just needs to wait
* for the next checkpoint. They occur every 5 seconds during
* shutdown
*/
2004-08-29 07:07:03 +02:00
status.dwWaitHint = 10000;
pgwin32_SetServiceStatus(SERVICE_STOP_PENDING);
SetEvent(shutdownEvent);
return;
case SERVICE_CONTROL_PAUSE:
/* Win32 config reloading */
status.dwWaitHint = 5000;
2004-08-29 07:07:03 +02:00
kill(postmasterPID, SIGHUP);
return;
2004-08-29 07:07:03 +02:00
/* FIXME: These could be used to replace other signals etc */
case SERVICE_CONTROL_CONTINUE:
case SERVICE_CONTROL_INTERROGATE:
default:
break;
}
}
2004-08-29 07:07:03 +02:00
static void WINAPI
pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
{
PROCESS_INFORMATION pi;
2004-08-29 07:07:03 +02:00
DWORD ret;
/* Initialize variables */
2004-08-29 07:07:03 +02:00
status.dwWin32ExitCode = S_OK;
status.dwCheckPoint = 0;
status.dwWaitHint = 60000;
status.dwServiceType = SERVICE_WIN32_OWN_PROCESS;
status.dwControlsAccepted = SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_SHUTDOWN | SERVICE_ACCEPT_PAUSE_CONTINUE;
status.dwServiceSpecificExitCode = 0;
status.dwCurrentState = SERVICE_START_PENDING;
2004-08-29 07:07:03 +02:00
memset(&pi, 0, sizeof(pi));
2007-11-15 22:14:46 +01:00
read_post_opts();
/* Register the control request handler */
2004-08-29 07:07:03 +02:00
if ((hStatus = RegisterServiceCtrlHandler(register_servicename, pgwin32_ServiceHandler)) == (SERVICE_STATUS_HANDLE) 0)
return;
2004-08-29 07:07:03 +02:00
if ((shutdownEvent = CreateEvent(NULL, true, false, NULL)) == NULL)
return;
/* Start the postmaster */
pgwin32_SetServiceStatus(SERVICE_START_PENDING);
if (!CreateRestrictedProcess(pgwin32_CommandLine(false), &pi, true))
{
pgwin32_SetServiceStatus(SERVICE_STOPPED);
return;
}
2004-08-29 07:07:03 +02:00
postmasterPID = pi.dwProcessId;
postmasterProcess = pi.hProcess;
CloseHandle(pi.hThread);
if (do_wait)
{
write_eventlog(EVENTLOG_INFORMATION_TYPE, _("Waiting for server startup...\n"));
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
if (wait_for_postmaster(postmasterPID, true) != POSTMASTER_READY)
{
write_eventlog(EVENTLOG_ERROR_TYPE, _("Timed out waiting for server startup\n"));
2007-11-15 22:14:46 +01:00
pgwin32_SetServiceStatus(SERVICE_STOPPED);
return;
}
write_eventlog(EVENTLOG_INFORMATION_TYPE, _("Server started and accepting connections\n"));
}
pgwin32_SetServiceStatus(SERVICE_RUNNING);
/* Wait for quit... */
2004-08-29 07:07:03 +02:00
ret = WaitForMultipleObjects(2, shutdownHandles, FALSE, INFINITE);
pgwin32_SetServiceStatus(SERVICE_STOP_PENDING);
switch (ret)
{
2004-08-29 07:07:03 +02:00
case WAIT_OBJECT_0: /* shutdown event */
{
/*
* status.dwCheckPoint can be incremented by
Change pg_ctl to detect server-ready by watching status in postmaster.pid. Traditionally, "pg_ctl start -w" has waited for the server to become ready to accept connections by attempting a connection once per second. That has the major problem that connection issues (for instance, a kernel packet filter blocking traffic) can't be reliably told apart from server startup issues, and the minor problem that if server startup isn't quick, we accumulate "the database system is starting up" spam in the server log. We've hacked around many of the possible connection issues, but it resulted in ugly and complicated code in pg_ctl.c. In commit c61559ec3, I changed the probe rate to every tenth of a second. That prompted Jeff Janes to complain that the log-spam problem had become much worse. In the ensuing discussion, Andres Freund pointed out that we could dispense with connection attempts altogether if the postmaster were changed to report its status in postmaster.pid, which "pg_ctl start" already relies on being able to read. This patch implements that, teaching postmaster.c to report a status string into the pidfile at the same state-change points already identified as being of interest for systemd status reporting (cf commit 7d17e683f). pg_ctl no longer needs to link with libpq at all; all its functions now depend on reading server files. In support of this, teach AddToDataDirLockFile() to allow addition of postmaster.pid lines in not-necessarily-sequential order. This is needed on Windows where the SHMEM_KEY line will never be written at all. We still have the restriction that we don't want to truncate the pidfile; document the reasons for that a bit better. Also, fix the pg_ctl TAP tests so they'll notice if "start -w" mode is broken --- before, they'd just wait out the sixty seconds until the loop gives up, and then report success anyway. (Yes, I found that out the hard way.) While at it, arrange for pg_ctl to not need to #include miscadmin.h; as a rather low-level backend header, requiring that to be compilable client-side is pretty dubious. This requires moving the #define's associated with the pidfile into a new header file, and moving PG_BACKEND_VERSIONSTR someplace else. For lack of a clearly better "someplace else", I put it into port.h, beside the declaration of find_other_exec(), since most users of that macro are passing the value to find_other_exec(). (initdb still depends on miscadmin.h, but at least pg_ctl and pg_upgrade no longer do.) In passing, fix main.c so that PG_BACKEND_VERSIONSTR actually defines the output of "postgres -V", which remarkably it had never done before. Discussion: https://postgr.es/m/CAMkU=1xJW8e+CTotojOMBd-yzUvD0e_JZu2xHo=MnuZ4__m7Pg@mail.gmail.com
2017-06-28 23:31:24 +02:00
* wait_for_postmaster(), so it might not start from 0.
*/
Fix "pg_ctl start -w" to test child process status directly. pg_ctl start with -w previously relied on a heuristic that the postmaster would surely always manage to create postmaster.pid within five seconds. Unfortunately, that fails much more often than we would like on some of the slower, more heavily loaded buildfarm members. We have known for quite some time that we could remove the need for that heuristic on Unix by using fork/exec instead of system() to launch the postmaster. This allows us to know the exact PID of the postmaster, which allows near-certain verification that the postmaster.pid file is the one we want and not a leftover, and it also lets us use waitpid() to detect reliably whether the child postmaster has exited or not. What was blocking this change was not wanting to rewrite the Windows version of start_postmaster() to avoid use of CMD.EXE. That's doable in theory but would require fooling about with stdout/stderr redirection, and getting the handling of quote-containing postmaster switches to stay the same might be rather ticklish. However, we realized that we don't have to do that to fix the problem, because we can test whether the shell process has exited as a proxy for whether the postmaster is still alive. That doesn't allow an exact check of the PID in postmaster.pid, but we're no worse off than before in that respect; and we do get to get rid of the heuristic about how long the postmaster might take to create postmaster.pid. On Unix, this change means that a second "pg_ctl start -w" immediately after another such command will now reliably fail, whereas previously it would succeed if done within two seconds of the earlier command. Since that's a saner behavior anyway, it's fine. On Windows, the case can still succeed within the same time window, since pg_ctl can't tell that the earlier postmaster's postmaster.pid isn't the pidfile it is looking for. To ensure stable test results on Windows, we can insert a short sleep into the test script for pg_ctl, ensuring that the existing pidfile looks stale. This hack can be removed if we ever do rewrite start_postmaster(), but that no longer seems like a high-priority thing to do. Back-patch to all supported versions, both because the current behavior is buggy and because we must do that if we want the buildfarm failures to go away. Tom Lane and Michael Paquier
2015-10-13 00:30:36 +02:00
int maxShutdownCheckPoint = status.dwCheckPoint + 12;
2004-08-29 07:07:03 +02:00
kill(postmasterPID, SIGINT);
/*
* Increment the checkpoint and try again. Abort after 12
* checkpoints as the postmaster has probably hung.
*/
while (WaitForSingleObject(postmasterProcess, 5000) == WAIT_TIMEOUT && status.dwCheckPoint < maxShutdownCheckPoint)
{
status.dwCheckPoint++;
SetServiceStatus(hStatus, (LPSERVICE_STATUS) &status);
}
break;
}
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
case (WAIT_OBJECT_0 + 1): /* postmaster went down */
break;
default:
/* shouldn't get here? */
break;
}
CloseHandle(shutdownEvent);
CloseHandle(postmasterProcess);
pgwin32_SetServiceStatus(SERVICE_STOPPED);
}
2004-08-29 07:07:03 +02:00
static void
pgwin32_doRunAsService(void)
{
2004-08-29 07:07:03 +02:00
SERVICE_TABLE_ENTRY st[] = {{register_servicename, pgwin32_ServiceMain},
{NULL, NULL}};
2004-12-23 01:03:24 +01:00
if (StartServiceCtrlDispatcher(st) == 0)
{
write_stderr(_("%s: could not start service \"%s\": error code %lu\n"),
progname, register_servicename,
(unsigned long) GetLastError());
exit(1);
}
}
/*
* Mingw headers are incomplete, and so are the libraries. So we have to load
* a whole lot of API functions dynamically. Since we have to do this anyway,
* also load the couple of functions that *do* exist in mingw headers but not
* on NT4. That way, we don't break on NT4.
*/
typedef BOOL (WINAPI * __CreateRestrictedToken) (HANDLE, DWORD, DWORD, PSID_AND_ATTRIBUTES, DWORD, PLUID_AND_ATTRIBUTES, DWORD, PSID_AND_ATTRIBUTES, PHANDLE);
typedef BOOL (WINAPI * __IsProcessInJob) (HANDLE, HANDLE, PBOOL);
typedef HANDLE (WINAPI * __CreateJobObject) (LPSECURITY_ATTRIBUTES, LPCTSTR);
typedef BOOL (WINAPI * __SetInformationJobObject) (HANDLE, JOBOBJECTINFOCLASS, LPVOID, DWORD);
typedef BOOL (WINAPI * __AssignProcessToJobObject) (HANDLE, HANDLE);
typedef BOOL (WINAPI * __QueryInformationJobObject) (HANDLE, JOBOBJECTINFOCLASS, LPVOID, DWORD, LPDWORD);
/*
* Create a restricted token, a job object sandbox, and execute the specified
* process with it.
*
* Returns 0 on success, non-zero on failure, same as CreateProcess().
*
* On NT4, or any other system not containing the required functions, will
* launch the process under the current token without doing any modifications.
*
* NOTE! Job object will only work when running as a service, because it's
* automatically destroyed when pg_ctl exits.
*/
static int
CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo, bool as_service)
{
2006-10-04 02:30:14 +02:00
int r;
BOOL b;
STARTUPINFO si;
HANDLE origToken;
HANDLE restrictedToken;
SID_IDENTIFIER_AUTHORITY NtAuthority = {SECURITY_NT_AUTHORITY};
SID_AND_ATTRIBUTES dropSids[2];
PTOKEN_PRIVILEGES delPrivs;
2006-10-04 02:30:14 +02:00
/* Functions loaded dynamically */
__CreateRestrictedToken _CreateRestrictedToken = NULL;
__IsProcessInJob _IsProcessInJob = NULL;
__CreateJobObject _CreateJobObject = NULL;
__SetInformationJobObject _SetInformationJobObject = NULL;
__AssignProcessToJobObject _AssignProcessToJobObject = NULL;
__QueryInformationJobObject _QueryInformationJobObject = NULL;
HANDLE Kernel32Handle;
HANDLE Advapi32Handle;
ZeroMemory(&si, sizeof(si));
si.cb = sizeof(si);
Advapi32Handle = LoadLibrary("ADVAPI32.DLL");
if (Advapi32Handle != NULL)
{
_CreateRestrictedToken = (__CreateRestrictedToken) GetProcAddress(Advapi32Handle, "CreateRestrictedToken");
}
if (_CreateRestrictedToken == NULL)
{
/*
* NT4 doesn't have CreateRestrictedToken, so just call ordinary
* CreateProcess
*/
write_stderr(_("%s: WARNING: cannot create restricted tokens on this platform\n"), progname);
2006-10-04 02:30:14 +02:00
if (Advapi32Handle != NULL)
FreeLibrary(Advapi32Handle);
return CreateProcess(NULL, cmd, NULL, NULL, FALSE, 0, NULL, NULL, &si, processInfo);
}
/* Open the current token to use as a base for the restricted one */
if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ALL_ACCESS, &origToken))
{
/*
2016-06-10 00:02:36 +02:00
* Most Windows targets make DWORD a 32-bit unsigned long, but in case
* it doesn't cast DWORD before printing.
*/
write_stderr(_("%s: could not open process token: error code %lu\n"),
progname, (unsigned long) GetLastError());
2006-10-04 02:30:14 +02:00
return 0;
}
/* Allocate list of SIDs to remove */
ZeroMemory(&dropSids, sizeof(dropSids));
if (!AllocateAndInitializeSid(&NtAuthority, 2,
SECURITY_BUILTIN_DOMAIN_RID, DOMAIN_ALIAS_RID_ADMINS, 0, 0, 0, 0, 0,
2006-10-04 02:30:14 +02:00
0, &dropSids[0].Sid) ||
!AllocateAndInitializeSid(&NtAuthority, 2,
SECURITY_BUILTIN_DOMAIN_RID, DOMAIN_ALIAS_RID_POWER_USERS, 0, 0, 0, 0, 0,
2006-10-04 02:30:14 +02:00
0, &dropSids[1].Sid))
{
write_stderr(_("%s: could not allocate SIDs: error code %lu\n"),
progname, (unsigned long) GetLastError());
2006-10-04 02:30:14 +02:00
return 0;
}
/* Get list of privileges to remove */
delPrivs = GetPrivilegesToDelete(origToken);
if (delPrivs == NULL)
/* Error message already printed */
return 0;
2006-10-04 02:30:14 +02:00
b = _CreateRestrictedToken(origToken,
0,
2006-10-04 02:30:14 +02:00
sizeof(dropSids) / sizeof(dropSids[0]),
dropSids,
delPrivs->PrivilegeCount, delPrivs->Privileges,
2006-10-04 02:30:14 +02:00
0, NULL,
&restrictedToken);
free(delPrivs);
2006-10-04 02:30:14 +02:00
FreeSid(dropSids[1].Sid);
FreeSid(dropSids[0].Sid);
CloseHandle(origToken);
FreeLibrary(Advapi32Handle);
if (!b)
{
write_stderr(_("%s: could not create restricted token: error code %lu\n"),
progname, (unsigned long) GetLastError());
2006-10-04 02:30:14 +02:00
return 0;
}
AddUserToTokenDacl(restrictedToken);
2006-10-04 02:30:14 +02:00
r = CreateProcessAsUser(restrictedToken, NULL, cmd, NULL, NULL, TRUE, CREATE_SUSPENDED, NULL, NULL, &si, processInfo);
Kernel32Handle = LoadLibrary("KERNEL32.DLL");
if (Kernel32Handle != NULL)
{
_IsProcessInJob = (__IsProcessInJob) GetProcAddress(Kernel32Handle, "IsProcessInJob");
_CreateJobObject = (__CreateJobObject) GetProcAddress(Kernel32Handle, "CreateJobObjectA");
_SetInformationJobObject = (__SetInformationJobObject) GetProcAddress(Kernel32Handle, "SetInformationJobObject");
_AssignProcessToJobObject = (__AssignProcessToJobObject) GetProcAddress(Kernel32Handle, "AssignProcessToJobObject");
_QueryInformationJobObject = (__QueryInformationJobObject) GetProcAddress(Kernel32Handle, "QueryInformationJobObject");
}
/* Verify that we found all functions */
if (_IsProcessInJob == NULL || _CreateJobObject == NULL || _SetInformationJobObject == NULL || _AssignProcessToJobObject == NULL || _QueryInformationJobObject == NULL)
{
/*
* IsProcessInJob() is not available on < WinXP, so there is no need
* to log the error every time in that case
*/
if (IsWindowsXPOrGreater())
2006-10-04 02:30:14 +02:00
/*
* Log error if we can't get version, or if we're on WinXP/2003 or
* newer
*/
write_stderr(_("%s: WARNING: could not locate all job object functions in system API\n"), progname);
2006-10-04 02:30:14 +02:00
}
else
{
BOOL inJob;
if (_IsProcessInJob(processInfo->hProcess, NULL, &inJob))
{
if (!inJob)
{
/*
* Job objects are working, and the new process isn't in one,
* so we can create one safely. If any problems show up when
* setting it, we're going to ignore them.
*/
HANDLE job;
char jobname[128];
sprintf(jobname, "PostgreSQL_%lu",
(unsigned long) processInfo->dwProcessId);
2006-10-04 02:30:14 +02:00
job = _CreateJobObject(NULL, jobname);
if (job)
{
JOBOBJECT_BASIC_LIMIT_INFORMATION basicLimit;
JOBOBJECT_BASIC_UI_RESTRICTIONS uiRestrictions;
JOBOBJECT_SECURITY_LIMIT_INFORMATION securityLimit;
ZeroMemory(&basicLimit, sizeof(basicLimit));
ZeroMemory(&uiRestrictions, sizeof(uiRestrictions));
ZeroMemory(&securityLimit, sizeof(securityLimit));
basicLimit.LimitFlags = JOB_OBJECT_LIMIT_DIE_ON_UNHANDLED_EXCEPTION | JOB_OBJECT_LIMIT_PRIORITY_CLASS;
basicLimit.PriorityClass = NORMAL_PRIORITY_CLASS;
_SetInformationJobObject(job, JobObjectBasicLimitInformation, &basicLimit, sizeof(basicLimit));
uiRestrictions.UIRestrictionsClass = JOB_OBJECT_UILIMIT_DESKTOP | JOB_OBJECT_UILIMIT_DISPLAYSETTINGS |
JOB_OBJECT_UILIMIT_EXITWINDOWS | JOB_OBJECT_UILIMIT_READCLIPBOARD |
2006-10-04 02:30:14 +02:00
JOB_OBJECT_UILIMIT_SYSTEMPARAMETERS | JOB_OBJECT_UILIMIT_WRITECLIPBOARD;
if (as_service)
{
if (!IsWindows7OrGreater())
{
/*
* On Windows 7 (and presumably later),
* JOB_OBJECT_UILIMIT_HANDLES prevents us from
* starting as a service. So we only enable it on
* Vista and earlier (version <= 6.0)
*/
uiRestrictions.UIRestrictionsClass |= JOB_OBJECT_UILIMIT_HANDLES;
}
}
2006-10-04 02:30:14 +02:00
_SetInformationJobObject(job, JobObjectBasicUIRestrictions, &uiRestrictions, sizeof(uiRestrictions));
securityLimit.SecurityLimitFlags = JOB_OBJECT_SECURITY_NO_ADMIN | JOB_OBJECT_SECURITY_ONLY_TOKEN;
securityLimit.JobToken = restrictedToken;
_SetInformationJobObject(job, JobObjectSecurityLimitInformation, &securityLimit, sizeof(securityLimit));
_AssignProcessToJobObject(job, processInfo->hProcess);
}
}
}
}
2006-10-04 02:30:14 +02:00
CloseHandle(restrictedToken);
ResumeThread(processInfo->hThread);
FreeLibrary(Kernel32Handle);
/*
* We intentionally don't close the job object handle, because we want the
* object to live on until pg_ctl shuts down.
*/
2006-10-04 02:30:14 +02:00
return r;
}
/*
* Get a list of privileges to delete from the access token. We delete all privileges
* except SeLockMemoryPrivilege which is needed to use large pages, and
* SeChangeNotifyPrivilege which is enabled by default in DISABLE_MAX_PRIVILEGE.
*/
static PTOKEN_PRIVILEGES
GetPrivilegesToDelete(HANDLE hToken)
{
int i,
j;
DWORD length;
PTOKEN_PRIVILEGES tokenPrivs;
LUID luidLockPages;
LUID luidChangeNotify;
if (!LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luidLockPages) ||
!LookupPrivilegeValue(NULL, SE_CHANGE_NOTIFY_NAME, &luidChangeNotify))
{
write_stderr(_("%s: could not get LUIDs for privileges: error code %lu\n"),
progname, (unsigned long) GetLastError());
return NULL;
}
if (!GetTokenInformation(hToken, TokenPrivileges, NULL, 0, &length) &&
GetLastError() != ERROR_INSUFFICIENT_BUFFER)
{
write_stderr(_("%s: could not get token information: error code %lu\n"),
progname, (unsigned long) GetLastError());
return NULL;
}
tokenPrivs = (PTOKEN_PRIVILEGES) pg_malloc_extended(length,
MCXT_ALLOC_NO_OOM);
if (tokenPrivs == NULL)
{
write_stderr(_("%s: out of memory\n"), progname);
return NULL;
}
if (!GetTokenInformation(hToken, TokenPrivileges, tokenPrivs, length, &length))
{
write_stderr(_("%s: could not get token information: error code %lu\n"),
progname, (unsigned long) GetLastError());
free(tokenPrivs);
return NULL;
}
for (i = 0; i < tokenPrivs->PrivilegeCount; i++)
{
if (memcmp(&tokenPrivs->Privileges[i].Luid, &luidLockPages, sizeof(LUID)) == 0 ||
memcmp(&tokenPrivs->Privileges[i].Luid, &luidChangeNotify, sizeof(LUID)) == 0)
{
for (j = i; j < tokenPrivs->PrivilegeCount - 1; j++)
tokenPrivs->Privileges[j] = tokenPrivs->Privileges[j + 1];
tokenPrivs->PrivilegeCount--;
}
}
return tokenPrivs;
}
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
#endif /* WIN32 */
static void
do_advice(void)
{
write_stderr(_("Try \"%s --help\" for more information.\n"), progname);
}
static void
do_help(void)
{
printf(_("%s is a utility to initialize, start, stop, or control a PostgreSQL server.\n\n"), progname);
printf(_("Usage:\n"));
printf(_(" %s init[db] [-D DATADIR] [-s] [-o OPTIONS]\n"), progname);
printf(_(" %s start [-D DATADIR] [-l FILENAME] [-W] [-t SECS] [-s]\n"
" [-o OPTIONS] [-p PATH] [-c]\n"), progname);
printf(_(" %s stop [-D DATADIR] [-m SHUTDOWN-MODE] [-W] [-t SECS] [-s]\n"), progname);
printf(_(" %s restart [-D DATADIR] [-m SHUTDOWN-MODE] [-W] [-t SECS] [-s]\n"
" [-o OPTIONS] [-c]\n"), progname);
printf(_(" %s reload [-D DATADIR] [-s]\n"), progname);
printf(_(" %s status [-D DATADIR]\n"), progname);
printf(_(" %s promote [-D DATADIR] [-W] [-t SECS] [-s]\n"), progname);
printf(_(" %s logrotate [-D DATADIR] [-s]\n"), progname);
printf(_(" %s kill SIGNALNAME PID\n"), progname);
#ifdef WIN32
printf(_(" %s register [-D DATADIR] [-N SERVICENAME] [-U USERNAME] [-P PASSWORD]\n"
" [-S START-TYPE] [-e SOURCE] [-W] [-t SECS] [-s] [-o OPTIONS]\n"), progname);
2004-10-12 23:54:45 +02:00
printf(_(" %s unregister [-N SERVICENAME]\n"), progname);
#endif
printf(_("\nCommon options:\n"));
printf(_(" -D, --pgdata=DATADIR location of the database storage area\n"));
#ifdef WIN32
printf(_(" -e SOURCE event source for logging when running as a service\n"));
#endif
printf(_(" -s, --silent only print errors, no informational messages\n"));
printf(_(" -t, --timeout=SECS seconds to wait when using -w option\n"));
printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -w, --wait wait until operation completes (default)\n"));
printf(_(" -W, --no-wait do not wait until operation completes\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("If the -D option is omitted, the environment variable PGDATA is used.\n"));
printf(_("\nOptions for start or restart:\n"));
#if defined(HAVE_GETRLIMIT) && defined(RLIMIT_CORE)
printf(_(" -c, --core-files allow postgres to produce core files\n"));
#else
printf(_(" -c, --core-files not applicable on this platform\n"));
#endif
printf(_(" -l, --log=FILENAME write (or append) server log to FILENAME\n"));
printf(_(" -o, --options=OPTIONS command line options to pass to postgres\n"
" (PostgreSQL server executable) or initdb\n"));
printf(_(" -p PATH-TO-POSTGRES normally not necessary\n"));
printf(_("\nOptions for stop or restart:\n"));
printf(_(" -m, --mode=MODE MODE can be \"smart\", \"fast\", or \"immediate\"\n"));
printf(_("\nShutdown modes are:\n"));
printf(_(" smart quit after all clients have disconnected\n"));
printf(_(" fast quit directly, with proper shutdown (default)\n"));
printf(_(" immediate quit without complete shutdown; will lead to recovery on restart\n"));
printf(_("\nAllowed signal names for kill:\n"));
printf(" ABRT HUP INT KILL QUIT TERM USR1 USR2\n");
#ifdef WIN32
printf(_("\nOptions for register and unregister:\n"));
printf(_(" -N SERVICENAME service name with which to register PostgreSQL server\n"));
printf(_(" -P PASSWORD password of account to register PostgreSQL server\n"));
printf(_(" -U USERNAME user name of account to register PostgreSQL server\n"));
printf(_(" -S START-TYPE service start type to register PostgreSQL server\n"));
printf(_("\nStart types are:\n"));
printf(_(" auto start service automatically during system startup (default)\n"));
printf(_(" demand start service on demand\n"));
#endif
printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
}
static void
set_mode(char *modeopt)
{
if (strcmp(modeopt, "s") == 0 || strcmp(modeopt, "smart") == 0)
{
shutdown_mode = SMART_MODE;
sig = SIGTERM;
}
else if (strcmp(modeopt, "f") == 0 || strcmp(modeopt, "fast") == 0)
{
shutdown_mode = FAST_MODE;
sig = SIGINT;
}
else if (strcmp(modeopt, "i") == 0 || strcmp(modeopt, "immediate") == 0)
{
shutdown_mode = IMMEDIATE_MODE;
sig = SIGQUIT;
}
else
{
2004-10-12 23:54:45 +02:00
write_stderr(_("%s: unrecognized shutdown mode \"%s\"\n"), progname, modeopt);
do_advice();
exit(1);
}
}
static void
set_sig(char *signame)
{
if (strcmp(signame, "HUP") == 0)
sig = SIGHUP;
else if (strcmp(signame, "INT") == 0)
sig = SIGINT;
else if (strcmp(signame, "QUIT") == 0)
sig = SIGQUIT;
else if (strcmp(signame, "ABRT") == 0)
sig = SIGABRT;
else if (strcmp(signame, "KILL") == 0)
sig = SIGKILL;
else if (strcmp(signame, "TERM") == 0)
sig = SIGTERM;
else if (strcmp(signame, "USR1") == 0)
sig = SIGUSR1;
else if (strcmp(signame, "USR2") == 0)
sig = SIGUSR2;
else
{
2004-10-12 23:54:45 +02:00
write_stderr(_("%s: unrecognized signal name \"%s\"\n"), progname, signame);
do_advice();
exit(1);
}
}
#ifdef WIN32
static void
set_starttype(char *starttypeopt)
{
if (strcmp(starttypeopt, "a") == 0 || strcmp(starttypeopt, "auto") == 0)
pgctl_start_type = SERVICE_AUTO_START;
else if (strcmp(starttypeopt, "d") == 0 || strcmp(starttypeopt, "demand") == 0)
pgctl_start_type = SERVICE_DEMAND_START;
else
{
write_stderr(_("%s: unrecognized start type \"%s\"\n"), progname, starttypeopt);
do_advice();
exit(1);
}
}
#endif
/*
* adjust_data_dir
*
* If a configuration-only directory was specified, find the real data dir.
*/
2011-12-17 05:19:06 +01:00
static void
adjust_data_dir(void)
{
char cmd[MAXPGPATH],
filename[MAXPGPATH],
*my_exec_path;
FILE *fd;
/* do nothing if we're working without knowledge of data dir */
if (pg_config == NULL)
return;
/* If there is no postgresql.conf, it can't be a config-only dir */
snprintf(filename, sizeof(filename), "%s/postgresql.conf", pg_config);
if ((fd = fopen(filename, "r")) == NULL)
return;
fclose(fd);
/* If PG_VERSION exists, it can't be a config-only dir */
snprintf(filename, sizeof(filename), "%s/PG_VERSION", pg_config);
if ((fd = fopen(filename, "r")) != NULL)
{
fclose(fd);
return;
}
/* Must be a configuration directory, so find the data directory */
/* we use a private my_exec_path to avoid interfering with later uses */
if (exec_path == NULL)
my_exec_path = find_other_exec_or_die(argv0, "postgres", PG_BACKEND_VERSIONSTR);
else
my_exec_path = pg_strdup(exec_path);
/* it's important for -C to be the first option, see main.c */
snprintf(cmd, MAXPGPATH, "\"%s\" -C data_directory %s%s",
my_exec_path,
pgdata_opt ? pgdata_opt : "",
post_opts ? post_opts : "");
fd = popen(cmd, "r");
if (fd == NULL || fgets(filename, sizeof(filename), fd) == NULL)
{
2012-07-02 20:12:46 +02:00
write_stderr(_("%s: could not determine the data directory using command \"%s\"\n"), progname, cmd);
exit(1);
}
pclose(fd);
free(my_exec_path);
/* strip trailing newline and carriage return */
(void) pg_strip_crlf(filename);
free(pg_data);
pg_data = pg_strdup(filename);
canonicalize_path(pg_data);
}
static DBState
get_control_dbstate(void)
{
DBState ret;
bool crc_ok;
Unified logging system for command-line programs This unifies the various ad hoc logging (message printing, error printing) systems used throughout the command-line programs. Features: - Program name is automatically prefixed. - Message string does not end with newline. This removes a common source of inconsistencies and omissions. - Additionally, a final newline is automatically stripped, simplifying use of PQerrorMessage() etc., another common source of mistakes. - I converted error message strings to use %m where possible. - As a result of the above several points, more translatable message strings can be shared between different components and between frontends and backend, without gratuitous punctuation or whitespace differences. - There is support for setting a "log level". This is not meant to be user-facing, but can be used internally to implement debug or verbose modes. - Lazy argument evaluation, so no significant overhead if logging at some level is disabled. - Some color in the messages, similar to gcc and clang. Set PG_COLOR=auto to try it out. Some colors are predefined, but can be customized by setting PG_COLORS. - Common files (common/, fe_utils/, etc.) can handle logging much more simply by just using one API without worrying too much about the context of the calling program, requiring callbacks, or having to pass "progname" around everywhere. - Some programs called setvbuf() to make sure that stderr is unbuffered, even on Windows. But not all programs did that. This is now done centrally. Soft goals: - Reduces vertical space use and visual complexity of error reporting in the source code. - Encourages more deliberate classification of messages. For example, in some cases it wasn't clear without analyzing the surrounding code whether a message was meant as an error or just an info. - Concepts and terms are vaguely aligned with popular logging frameworks such as log4j and Python logging. This is all just about printing stuff out. Nothing affects program flow (e.g., fatal exits). The uses are just too varied to do that. Some existing code had wrappers that do some kind of print-and-exit, and I adapted those. I tried to keep the output mostly the same, but there is a lot of historical baggage to unwind and special cases to consider, and I might not always have succeeded. One significant change is that pg_rewind used to write all error messages to stdout. That is now changed to stderr. Reviewed-by: Donald Dong <xdong@csumb.edu> Reviewed-by: Arthur Zakirov <a.zakirov@postgrespro.ru> Discussion: https://www.postgresql.org/message-id/flat/6a609b43-4f57-7348-6480-bd022f924310@2ndquadrant.com
2019-04-01 14:24:37 +02:00
ControlFileData *control_file_data = get_controlfile(pg_data, &crc_ok);
if (!crc_ok)
{
write_stderr(_("%s: control file appears to be corrupt\n"), progname);
exit(1);
}
ret = control_file_data->state;
pfree(control_file_data);
return ret;
}
int
main(int argc, char **argv)
{
static struct option long_options[] = {
{"help", no_argument, NULL, '?'},
{"version", no_argument, NULL, 'V'},
{"log", required_argument, NULL, 'l'},
{"mode", required_argument, NULL, 'm'},
{"pgdata", required_argument, NULL, 'D'},
{"options", required_argument, NULL, 'o'},
{"silent", no_argument, NULL, 's'},
{"timeout", required_argument, NULL, 't'},
{"core-files", no_argument, NULL, 'c'},
{"wait", no_argument, NULL, 'w'},
{"no-wait", no_argument, NULL, 'W'},
{NULL, 0, NULL, 0}
};
char *env_wait;
int option_index;
int c;
2004-06-01 03:28:03 +02:00
pgpid_t killproc = 0;
2004-08-29 07:07:03 +02:00
Unified logging system for command-line programs This unifies the various ad hoc logging (message printing, error printing) systems used throughout the command-line programs. Features: - Program name is automatically prefixed. - Message string does not end with newline. This removes a common source of inconsistencies and omissions. - Additionally, a final newline is automatically stripped, simplifying use of PQerrorMessage() etc., another common source of mistakes. - I converted error message strings to use %m where possible. - As a result of the above several points, more translatable message strings can be shared between different components and between frontends and backend, without gratuitous punctuation or whitespace differences. - There is support for setting a "log level". This is not meant to be user-facing, but can be used internally to implement debug or verbose modes. - Lazy argument evaluation, so no significant overhead if logging at some level is disabled. - Some color in the messages, similar to gcc and clang. Set PG_COLOR=auto to try it out. Some colors are predefined, but can be customized by setting PG_COLORS. - Common files (common/, fe_utils/, etc.) can handle logging much more simply by just using one API without worrying too much about the context of the calling program, requiring callbacks, or having to pass "progname" around everywhere. - Some programs called setvbuf() to make sure that stderr is unbuffered, even on Windows. But not all programs did that. This is now done centrally. Soft goals: - Reduces vertical space use and visual complexity of error reporting in the source code. - Encourages more deliberate classification of messages. For example, in some cases it wasn't clear without analyzing the surrounding code whether a message was meant as an error or just an info. - Concepts and terms are vaguely aligned with popular logging frameworks such as log4j and Python logging. This is all just about printing stuff out. Nothing affects program flow (e.g., fatal exits). The uses are just too varied to do that. Some existing code had wrappers that do some kind of print-and-exit, and I adapted those. I tried to keep the output mostly the same, but there is a lot of historical baggage to unwind and special cases to consider, and I might not always have succeeded. One significant change is that pg_rewind used to write all error messages to stdout. That is now changed to stderr. Reviewed-by: Donald Dong <xdong@csumb.edu> Reviewed-by: Arthur Zakirov <a.zakirov@postgrespro.ru> Discussion: https://www.postgresql.org/message-id/flat/6a609b43-4f57-7348-6480-bd022f924310@2ndquadrant.com
2019-04-01 14:24:37 +02:00
pg_logging_init(argv[0]);
progname = get_progname(argv[0]);
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_ctl"));
start_time = time(NULL);
/*
2005-10-15 04:49:52 +02:00
* save argv[0] so do_start() can look for the postmaster if necessary. we
* don't look for postmaster here because in many cases we won't need it.
*/
argv0 = argv[0];
/* Set restrictive mode mask until PGDATA permissions are checked */
umask(PG_MODE_MASK_OWNER);
/* support --help and --version even if invoked as root */
2004-08-29 07:07:03 +02:00
if (argc > 1)
{
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
{
do_help();
exit(0);
}
else if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
{
puts("pg_ctl (PostgreSQL) " PG_VERSION);
exit(0);
}
}
/*
* Disallow running as root, to forestall any possible security holes.
*/
#ifndef WIN32
if (geteuid() == 0)
{
write_stderr(_("%s: cannot be run as root\n"
"Please log in (using, e.g., \"su\") as the "
"(unprivileged) user that will\n"
"own the server process.\n"),
progname);
exit(1);
}
#endif
env_wait = getenv("PGCTLTIMEOUT");
if (env_wait != NULL)
wait_seconds = atoi(env_wait);
/*
2004-08-29 07:07:03 +02:00
* 'Action' can be before or after args so loop over both. Some
2005-10-15 04:49:52 +02:00
* getopt_long() implementations will reorder argv[] to place all flags
* first (GNU?), but we don't rely on it. Our /port version doesn't do
* that.
*/
optind = 1;
2004-08-29 07:07:03 +02:00
/* process command-line options */
while (optind < argc)
{
while ((c = getopt_long(argc, argv, "cD:e:l:m:N:o:p:P:sS:t:U:wW",
long_options, &option_index)) != -1)
{
switch (c)
{
case 'D':
2004-08-29 07:07:03 +02:00
{
char *pgdata_D;
char *env_var;
2004-08-29 07:07:03 +02:00
pgdata_D = pg_strdup(optarg);
canonicalize_path(pgdata_D);
env_var = psprintf("PGDATA=%s", pgdata_D);
2004-08-29 07:07:03 +02:00
putenv(env_var);
/*
2005-10-15 04:49:52 +02:00
* We could pass PGDATA just in an environment
* variable but we do -D too for clearer postmaster
* 'ps' display
2004-08-29 07:07:03 +02:00
*/
pgdata_opt = psprintf("-D \"%s\" ", pgdata_D);
2004-08-29 07:07:03 +02:00
break;
}
case 'e':
event_source = pg_strdup(optarg);
break;
case 'l':
log_file = pg_strdup(optarg);
break;
case 'm':
set_mode(optarg);
break;
case 'N':
register_servicename = pg_strdup(optarg);
break;
case 'o':
/* append option? */
if (!post_opts)
post_opts = pg_strdup(optarg);
else
{
2015-05-24 03:35:49 +02:00
char *old_post_opts = post_opts;
post_opts = psprintf("%s %s", old_post_opts, optarg);
free(old_post_opts);
}
break;
case 'p':
exec_path = pg_strdup(optarg);
break;
case 'P':
register_password = pg_strdup(optarg);
break;
case 's':
silent_mode = true;
break;
case 'S':
#ifdef WIN32
set_starttype(optarg);
#else
write_stderr(_("%s: -S option not supported on this platform\n"),
progname);
exit(1);
#endif
break;
case 't':
wait_seconds = atoi(optarg);
wait_seconds_arg = true;
break;
case 'U':
2004-08-29 07:07:03 +02:00
if (strchr(optarg, '\\'))
register_username = pg_strdup(optarg);
2004-08-29 07:07:03 +02:00
else
2005-10-15 04:49:52 +02:00
/* Prepend .\ for local accounts */
register_username = psprintf(".\\%s", optarg);
break;
case 'w':
do_wait = true;
break;
case 'W':
do_wait = false;
break;
case 'c':
allow_core_files = true;
break;
default:
/* getopt_long already issued a suitable error message */
do_advice();
exit(1);
}
}
2004-08-29 07:07:03 +02:00
/* Process an action */
if (optind < argc)
{
if (ctl_command != NO_COMMAND)
{
write_stderr(_("%s: too many command-line arguments (first is \"%s\")\n"), progname, argv[optind]);
do_advice();
exit(1);
}
2004-08-29 07:07:03 +02:00
if (strcmp(argv[optind], "init") == 0
|| strcmp(argv[optind], "initdb") == 0)
ctl_command = INIT_COMMAND;
else if (strcmp(argv[optind], "start") == 0)
ctl_command = START_COMMAND;
else if (strcmp(argv[optind], "stop") == 0)
ctl_command = STOP_COMMAND;
else if (strcmp(argv[optind], "restart") == 0)
ctl_command = RESTART_COMMAND;
else if (strcmp(argv[optind], "reload") == 0)
ctl_command = RELOAD_COMMAND;
else if (strcmp(argv[optind], "status") == 0)
ctl_command = STATUS_COMMAND;
else if (strcmp(argv[optind], "promote") == 0)
ctl_command = PROMOTE_COMMAND;
else if (strcmp(argv[optind], "logrotate") == 0)
ctl_command = LOGROTATE_COMMAND;
else if (strcmp(argv[optind], "kill") == 0)
{
if (argc - optind < 3)
{
write_stderr(_("%s: missing arguments for kill mode\n"), progname);
do_advice();
exit(1);
}
ctl_command = KILL_COMMAND;
set_sig(argv[++optind]);
killproc = atol(argv[++optind]);
}
#ifdef WIN32
else if (strcmp(argv[optind], "register") == 0)
ctl_command = REGISTER_COMMAND;
else if (strcmp(argv[optind], "unregister") == 0)
ctl_command = UNREGISTER_COMMAND;
else if (strcmp(argv[optind], "runservice") == 0)
ctl_command = RUN_AS_SERVICE_COMMAND;
#endif
else
{
2004-10-12 23:54:45 +02:00
write_stderr(_("%s: unrecognized operation mode \"%s\"\n"), progname, argv[optind]);
do_advice();
exit(1);
}
optind++;
}
}
if (ctl_command == NO_COMMAND)
{
write_stderr(_("%s: no operation specified\n"), progname);
do_advice();
exit(1);
}
/* Note we put any -D switch into the env var above */
pg_config = getenv("PGDATA");
if (pg_config)
{
pg_config = pg_strdup(pg_config);
canonicalize_path(pg_config);
pg_data = pg_strdup(pg_config);
}
/* -D might point at config-only directory; if so find the real PGDATA */
adjust_data_dir();
/* Complain if -D needed and not provided */
if (pg_config == NULL &&
ctl_command != KILL_COMMAND && ctl_command != UNREGISTER_COMMAND)
{
write_stderr(_("%s: no database directory specified and environment variable PGDATA unset\n"),
progname);
do_advice();
exit(1);
}
if (ctl_command == RELOAD_COMMAND)
{
sig = SIGHUP;
do_wait = false;
}
2006-10-04 02:30:14 +02:00
if (pg_data)
{
snprintf(postopts_file, MAXPGPATH, "%s/postmaster.opts", pg_data);
snprintf(version_file, MAXPGPATH, "%s/PG_VERSION", pg_data);
2006-10-04 02:30:14 +02:00
snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pg_data);
snprintf(backup_file, MAXPGPATH, "%s/backup_label", pg_data);
/*
* Set mask based on PGDATA permissions,
*
* Don't error here if the data directory cannot be stat'd. This is
* handled differently based on the command and we don't want to
* interfere with that logic.
*/
if (GetDataDirectoryCreatePerm(pg_data))
umask(pg_mode_mask);
2006-10-04 02:30:14 +02:00
}
switch (ctl_command)
{
case INIT_COMMAND:
do_init();
break;
case STATUS_COMMAND:
do_status();
break;
case START_COMMAND:
do_start();
break;
case STOP_COMMAND:
do_stop();
break;
case RESTART_COMMAND:
do_restart();
break;
case RELOAD_COMMAND:
do_reload();
break;
case PROMOTE_COMMAND:
do_promote();
break;
case LOGROTATE_COMMAND:
do_logrotate();
break;
case KILL_COMMAND:
do_kill(killproc);
break;
#ifdef WIN32
2004-08-29 07:07:03 +02:00
case REGISTER_COMMAND:
pgwin32_doRegister();
break;
case UNREGISTER_COMMAND:
pgwin32_doUnregister();
break;
case RUN_AS_SERVICE_COMMAND:
pgwin32_doRunAsService();
break;
#endif
default:
break;
}
exit(0);
}