From 92f33bb7afd373ed562e23077c14831944d1b0d4 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 7 Jun 2020 13:07:31 -0400 Subject: [PATCH] Rethink definition of cancel.c's CancelRequested flag. As it stands, this flag is only set when we've successfully sent a cancel request, not if we get SIGINT and then fail to send a cancel. However, for almost all callers, that's the Wrong Thing: we'd prefer to abort processing after control-C even if no cancel could be sent. As an example, since commit 1d468b9ad "pgbench -i" fails to give up sending COPY data even after control-C, if the postmaster has been stopped, which is clearly not what the code intends and not what anyone would want. (The fact that it keeps going at all is the fault of a separate bug in libpq, but not letting CancelRequested become set is clearly not what we want here.) The sole exception, as far as I can find, is that scripts_parallel.c's ParallelSlotsGetIdle tries to consume a query result after issuing a cancel, which of course might not terminate quickly if no cancel happened. But that behavior was poorly thought out too. No user of ParallelSlotsGetIdle tries to continue processing after a cancel, so there is really no point in trying to clear the connection's state. Moreover this has the same defect as for other users of cancel.c, that if the cancel request fails for some reason then we end up with control-C being completely ignored. (On top of that, select_loop failed to distinguish clearly between SIGINT and other reasons for select(2) failing, which means that it's possible that the existing code would think that a cancel has been sent when it hasn't.) Hence, redefine CancelRequested as simply meaning that SIGINT was received. We could add a second flag with the other meaning, but in the absence of any compelling argument why such a flag is needed, I think it would just offer an opportunity for future callers to get it wrong. Also remove the consumeQueryResult call in ParallelSlotsGetIdle's failure exit. In passing, simplify the API of select_loop. It would now be possible to re-unify psql's cancel_pressed with CancelRequested, partly undoing 5d43c3c54. But I'm not really convinced that that's worth the trouble, so I left psql alone, other than fixing a misleading comment. This code is new in v13 (cf a4fd3aa71), so no need for back-patch. Per investigation of a complaint from Andres Freund. Discussion: https://postgr.es/m/20200603201242.ofvm4jztpqytwfye@alap3.anarazel.de --- src/bin/psql/common.c | 2 +- src/bin/scripts/scripts_parallel.c | 32 +++++++++--------------------- src/fe_utils/cancel.c | 20 +++++++++---------- 3 files changed, 19 insertions(+), 35 deletions(-) diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c index 06f801764b..6323a35c91 100644 --- a/src/bin/psql/common.c +++ b/src/bin/psql/common.c @@ -240,7 +240,7 @@ NoticeProcessor(void *arg, const char *message) * fgets are coded to handle possible interruption. * * On Windows, currently this does not work, so control-C is less useful - * there, and the callback is just a no-op. + * there. */ volatile bool sigint_interrupt_enabled = false; diff --git a/src/bin/scripts/scripts_parallel.c b/src/bin/scripts/scripts_parallel.c index 45c69b8d19..01bc6dfeff 100644 --- a/src/bin/scripts/scripts_parallel.c +++ b/src/bin/scripts/scripts_parallel.c @@ -28,7 +28,7 @@ #include "scripts_parallel.h" static void init_slot(ParallelSlot *slot, PGconn *conn); -static int select_loop(int maxFd, fd_set *workerset, bool *aborting); +static int select_loop(int maxFd, fd_set *workerset); static void init_slot(ParallelSlot *slot, PGconn *conn) @@ -39,25 +39,19 @@ init_slot(ParallelSlot *slot, PGconn *conn) } /* - * Loop on select() until a descriptor from the given set becomes readable. + * Wait until a file descriptor from the given set becomes readable. * - * If we get a cancel request while we're waiting, we forego all further - * processing and set the *aborting flag to true. The return value must be - * ignored in this case. Otherwise, *aborting is set to false. + * Returns the number of ready descriptors, or -1 on failure (including + * getting a cancel request). */ static int -select_loop(int maxFd, fd_set *workerset, bool *aborting) +select_loop(int maxFd, fd_set *workerset) { int i; fd_set saveSet = *workerset; if (CancelRequested) - { - *aborting = true; return -1; - } - else - *aborting = false; for (;;) { @@ -90,7 +84,7 @@ select_loop(int maxFd, fd_set *workerset, bool *aborting) if (i < 0 && errno == EINTR) continue; /* ignore this */ if (i < 0 || CancelRequested) - *aborting = true; /* but not this */ + return -1; /* but not this */ if (i == 0) continue; /* timeout (Win32 only) */ break; @@ -135,7 +129,6 @@ ParallelSlotsGetIdle(ParallelSlot *slots, int numslots) { fd_set slotset; int maxFd = 0; - bool aborting; /* We must reconstruct the fd_set for each call to select_loop */ FD_ZERO(&slotset); @@ -157,19 +150,12 @@ ParallelSlotsGetIdle(ParallelSlot *slots, int numslots) } SetCancelConn(slots->connection); - i = select_loop(maxFd, &slotset, &aborting); + i = select_loop(maxFd, &slotset); ResetCancelConn(); - if (aborting) - { - /* - * We set the cancel-receiving connection to the one in the zeroth - * slot above, so fetch the error from there. - */ - consumeQueryResult(slots->connection); + /* failure? */ + if (i < 0) return NULL; - } - Assert(i != 0); for (i = 0; i < numslots; i++) { diff --git a/src/fe_utils/cancel.c b/src/fe_utils/cancel.c index eb4056a9a6..51fb67d384 100644 --- a/src/fe_utils/cancel.c +++ b/src/fe_utils/cancel.c @@ -43,11 +43,11 @@ static PGcancel *volatile cancelConn = NULL; /* - * CancelRequested tracks if a cancellation request has completed after - * a signal interruption. Note that if cancelConn is not set, in short - * if SetCancelConn() was never called or if ResetCancelConn() freed - * the cancellation object, then CancelRequested is switched to true after - * all cancellation attempts. + * CancelRequested is set when we receive SIGINT (or local equivalent). + * There is no provision in this module for resetting it; but applications + * might choose to clear it after successfully recovering from a cancel. + * Note that there is no guarantee that we successfully sent a Cancel request, + * or that the request will have any effect if we did send it. */ volatile sig_atomic_t CancelRequested = false; @@ -148,6 +148,8 @@ handle_sigint(SIGNAL_ARGS) int save_errno = errno; char errbuf[256]; + CancelRequested = true; + if (cancel_callback != NULL) cancel_callback(); @@ -156,7 +158,6 @@ handle_sigint(SIGNAL_ARGS) { if (PQcancel(cancelConn, errbuf, sizeof(errbuf))) { - CancelRequested = true; write_stderr(_("Cancel request sent\n")); } else @@ -165,8 +166,6 @@ handle_sigint(SIGNAL_ARGS) write_stderr(errbuf); } } - else - CancelRequested = true; errno = save_errno; /* just in case the write changed it */ } @@ -193,6 +192,8 @@ consoleHandler(DWORD dwCtrlType) if (dwCtrlType == CTRL_C_EVENT || dwCtrlType == CTRL_BREAK_EVENT) { + CancelRequested = true; + if (cancel_callback != NULL) cancel_callback(); @@ -203,7 +204,6 @@ consoleHandler(DWORD dwCtrlType) if (PQcancel(cancelConn, errbuf, sizeof(errbuf))) { write_stderr(_("Cancel request sent\n")); - CancelRequested = true; } else { @@ -211,8 +211,6 @@ consoleHandler(DWORD dwCtrlType) write_stderr(errbuf); } } - else - CancelRequested = true; LeaveCriticalSection(&cancelConnLock);