Rethink definition of cancel.c's CancelRequested flag.

As it stands, this flag is only set when we've successfully sent a
cancel request, not if we get SIGINT and then fail to send a cancel.
However, for almost all callers, that's the Wrong Thing: we'd prefer
to abort processing after control-C even if no cancel could be sent.

As an example, since commit 1d468b9ad "pgbench -i" fails to give up
sending COPY data even after control-C, if the postmaster has been
stopped, which is clearly not what the code intends and not what anyone
would want.  (The fact that it keeps going at all is the fault of a
separate bug in libpq, but not letting CancelRequested become set is
clearly not what we want here.)

The sole exception, as far as I can find, is that scripts_parallel.c's
ParallelSlotsGetIdle tries to consume a query result after issuing a
cancel, which of course might not terminate quickly if no cancel
happened.  But that behavior was poorly thought out too.  No user of
ParallelSlotsGetIdle tries to continue processing after a cancel,
so there is really no point in trying to clear the connection's state.
Moreover this has the same defect as for other users of cancel.c,
that if the cancel request fails for some reason then we end up with
control-C being completely ignored.  (On top of that, select_loop failed
to distinguish clearly between SIGINT and other reasons for select(2)
failing, which means that it's possible that the existing code would
think that a cancel has been sent when it hasn't.)

Hence, redefine CancelRequested as simply meaning that SIGINT was
received.  We could add a second flag with the other meaning, but
in the absence of any compelling argument why such a flag is needed,
I think it would just offer an opportunity for future callers to
get it wrong.  Also remove the consumeQueryResult call in
ParallelSlotsGetIdle's failure exit.  In passing, simplify the
API of select_loop.

It would now be possible to re-unify psql's cancel_pressed with
CancelRequested, partly undoing 5d43c3c54.  But I'm not really
convinced that that's worth the trouble, so I left psql alone,
other than fixing a misleading comment.

This code is new in v13 (cf a4fd3aa71), so no need for back-patch.

Per investigation of a complaint from Andres Freund.

Discussion: https://postgr.es/m/20200603201242.ofvm4jztpqytwfye@alap3.anarazel.de
This commit is contained in:
Tom Lane 2020-06-07 13:07:31 -04:00
parent 1fbb6c93df
commit 92f33bb7af
3 changed files with 19 additions and 35 deletions

View File

@ -240,7 +240,7 @@ NoticeProcessor(void *arg, const char *message)
* fgets are coded to handle possible interruption. * fgets are coded to handle possible interruption.
* *
* On Windows, currently this does not work, so control-C is less useful * On Windows, currently this does not work, so control-C is less useful
* there, and the callback is just a no-op. * there.
*/ */
volatile bool sigint_interrupt_enabled = false; volatile bool sigint_interrupt_enabled = false;

View File

@ -28,7 +28,7 @@
#include "scripts_parallel.h" #include "scripts_parallel.h"
static void init_slot(ParallelSlot *slot, PGconn *conn); static void init_slot(ParallelSlot *slot, PGconn *conn);
static int select_loop(int maxFd, fd_set *workerset, bool *aborting); static int select_loop(int maxFd, fd_set *workerset);
static void static void
init_slot(ParallelSlot *slot, PGconn *conn) init_slot(ParallelSlot *slot, PGconn *conn)
@ -39,25 +39,19 @@ init_slot(ParallelSlot *slot, PGconn *conn)
} }
/* /*
* Loop on select() until a descriptor from the given set becomes readable. * Wait until a file descriptor from the given set becomes readable.
* *
* If we get a cancel request while we're waiting, we forego all further * Returns the number of ready descriptors, or -1 on failure (including
* processing and set the *aborting flag to true. The return value must be * getting a cancel request).
* ignored in this case. Otherwise, *aborting is set to false.
*/ */
static int static int
select_loop(int maxFd, fd_set *workerset, bool *aborting) select_loop(int maxFd, fd_set *workerset)
{ {
int i; int i;
fd_set saveSet = *workerset; fd_set saveSet = *workerset;
if (CancelRequested) if (CancelRequested)
{
*aborting = true;
return -1; return -1;
}
else
*aborting = false;
for (;;) for (;;)
{ {
@ -90,7 +84,7 @@ select_loop(int maxFd, fd_set *workerset, bool *aborting)
if (i < 0 && errno == EINTR) if (i < 0 && errno == EINTR)
continue; /* ignore this */ continue; /* ignore this */
if (i < 0 || CancelRequested) if (i < 0 || CancelRequested)
*aborting = true; /* but not this */ return -1; /* but not this */
if (i == 0) if (i == 0)
continue; /* timeout (Win32 only) */ continue; /* timeout (Win32 only) */
break; break;
@ -135,7 +129,6 @@ ParallelSlotsGetIdle(ParallelSlot *slots, int numslots)
{ {
fd_set slotset; fd_set slotset;
int maxFd = 0; int maxFd = 0;
bool aborting;
/* We must reconstruct the fd_set for each call to select_loop */ /* We must reconstruct the fd_set for each call to select_loop */
FD_ZERO(&slotset); FD_ZERO(&slotset);
@ -157,19 +150,12 @@ ParallelSlotsGetIdle(ParallelSlot *slots, int numslots)
} }
SetCancelConn(slots->connection); SetCancelConn(slots->connection);
i = select_loop(maxFd, &slotset, &aborting); i = select_loop(maxFd, &slotset);
ResetCancelConn(); ResetCancelConn();
if (aborting) /* failure? */
{ if (i < 0)
/*
* We set the cancel-receiving connection to the one in the zeroth
* slot above, so fetch the error from there.
*/
consumeQueryResult(slots->connection);
return NULL; return NULL;
}
Assert(i != 0);
for (i = 0; i < numslots; i++) for (i = 0; i < numslots; i++)
{ {

View File

@ -43,11 +43,11 @@
static PGcancel *volatile cancelConn = NULL; static PGcancel *volatile cancelConn = NULL;
/* /*
* CancelRequested tracks if a cancellation request has completed after * CancelRequested is set when we receive SIGINT (or local equivalent).
* a signal interruption. Note that if cancelConn is not set, in short * There is no provision in this module for resetting it; but applications
* if SetCancelConn() was never called or if ResetCancelConn() freed * might choose to clear it after successfully recovering from a cancel.
* the cancellation object, then CancelRequested is switched to true after * Note that there is no guarantee that we successfully sent a Cancel request,
* all cancellation attempts. * or that the request will have any effect if we did send it.
*/ */
volatile sig_atomic_t CancelRequested = false; volatile sig_atomic_t CancelRequested = false;
@ -148,6 +148,8 @@ handle_sigint(SIGNAL_ARGS)
int save_errno = errno; int save_errno = errno;
char errbuf[256]; char errbuf[256];
CancelRequested = true;
if (cancel_callback != NULL) if (cancel_callback != NULL)
cancel_callback(); cancel_callback();
@ -156,7 +158,6 @@ handle_sigint(SIGNAL_ARGS)
{ {
if (PQcancel(cancelConn, errbuf, sizeof(errbuf))) if (PQcancel(cancelConn, errbuf, sizeof(errbuf)))
{ {
CancelRequested = true;
write_stderr(_("Cancel request sent\n")); write_stderr(_("Cancel request sent\n"));
} }
else else
@ -165,8 +166,6 @@ handle_sigint(SIGNAL_ARGS)
write_stderr(errbuf); write_stderr(errbuf);
} }
} }
else
CancelRequested = true;
errno = save_errno; /* just in case the write changed it */ errno = save_errno; /* just in case the write changed it */
} }
@ -193,6 +192,8 @@ consoleHandler(DWORD dwCtrlType)
if (dwCtrlType == CTRL_C_EVENT || if (dwCtrlType == CTRL_C_EVENT ||
dwCtrlType == CTRL_BREAK_EVENT) dwCtrlType == CTRL_BREAK_EVENT)
{ {
CancelRequested = true;
if (cancel_callback != NULL) if (cancel_callback != NULL)
cancel_callback(); cancel_callback();
@ -203,7 +204,6 @@ consoleHandler(DWORD dwCtrlType)
if (PQcancel(cancelConn, errbuf, sizeof(errbuf))) if (PQcancel(cancelConn, errbuf, sizeof(errbuf)))
{ {
write_stderr(_("Cancel request sent\n")); write_stderr(_("Cancel request sent\n"));
CancelRequested = true;
} }
else else
{ {
@ -211,8 +211,6 @@ consoleHandler(DWORD dwCtrlType)
write_stderr(errbuf); write_stderr(errbuf);
} }
} }
else
CancelRequested = true;
LeaveCriticalSection(&cancelConnLock); LeaveCriticalSection(&cancelConnLock);