From 10f743389ca9a92720fb9c3d15f647888d82c297 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 17 Aug 2022 02:04:55 -0400 Subject: [PATCH 1/6] compat: add function to enable nonblocking pipes We'd like to be able to make some of our pipes nonblocking so that poll() can be used effectively, but O_NONBLOCK isn't portable. Let's introduce a compat wrapper so this can be abstracted for each platform. The interface is as narrow as possible to let platforms do what's natural there (rather than having to implement fcntl() and a fake O_NONBLOCK for example, or having to handle other types of descriptors). The next commit will add Windows support, at which point we should be covering all platforms in practice. But if we do find some other platform without O_NONBLOCK, we'll return ENOSYS. Arguably we could just trigger a build-time #error in this case, which would catch the problem earlier. But since we're not planning to use this compat wrapper in many code paths, a seldom-seen runtime error may be friendlier for such a platform than blocking compilation completely. Our test suite would still notice it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Makefile | 1 + compat/nonblock.c | 23 +++++++++++++++++++++++ compat/nonblock.h | 9 +++++++++ 3 files changed, 33 insertions(+) create mode 100644 compat/nonblock.c create mode 100644 compat/nonblock.h diff --git a/Makefile b/Makefile index 04d0fd1fe6..17fdb16dd8 100644 --- a/Makefile +++ b/Makefile @@ -910,6 +910,7 @@ LIB_OBJS += combine-diff.o LIB_OBJS += commit-graph.o LIB_OBJS += commit-reach.o LIB_OBJS += commit.o +LIB_OBJS += compat/nonblock.o LIB_OBJS += compat/obstack.o LIB_OBJS += compat/terminal.o LIB_OBJS += compat/zlib-uncompress2.o diff --git a/compat/nonblock.c b/compat/nonblock.c new file mode 100644 index 0000000000..b08105a21d --- /dev/null +++ b/compat/nonblock.c @@ -0,0 +1,23 @@ +#include "git-compat-util.h" +#include "nonblock.h" + +#ifdef O_NONBLOCK + +int enable_pipe_nonblock(int fd) +{ + int flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -1; + flags |= O_NONBLOCK; + return fcntl(fd, F_SETFL, flags); +} + +#else + +int enable_pipe_nonblock(int fd) +{ + errno = ENOSYS; + return -1; +} + +#endif diff --git a/compat/nonblock.h b/compat/nonblock.h new file mode 100644 index 0000000000..af1a331301 --- /dev/null +++ b/compat/nonblock.h @@ -0,0 +1,9 @@ +#ifndef COMPAT_NONBLOCK_H +#define COMPAT_NONBLOCK_H + +/* + * Enable non-blocking I/O for the pipe specified by the passed-in descriptor. + */ +int enable_pipe_nonblock(int fd); + +#endif From 24b56ae4aecc937a246efb94d283f54a7f59c7f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Wed, 17 Aug 2022 02:05:25 -0400 Subject: [PATCH 2/6] nonblock: support Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement enable_pipe_nonblock() using the Windows API. This works only for pipes, but that is sufficient for this limited interface. Despite the API calls used, it handles both "named" and anonymous pipes from our pipe() emulation. Signed-off-by: René Scharfe Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- compat/nonblock.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/compat/nonblock.c b/compat/nonblock.c index b08105a21d..9694ebdb1d 100644 --- a/compat/nonblock.c +++ b/compat/nonblock.c @@ -12,6 +12,33 @@ int enable_pipe_nonblock(int fd) return fcntl(fd, F_SETFL, flags); } +#elif defined(GIT_WINDOWS_NATIVE) + +#include "win32.h" + +int enable_pipe_nonblock(int fd) +{ + HANDLE h = (HANDLE)_get_osfhandle(fd); + DWORD mode; + DWORD type = GetFileType(h); + if (type == FILE_TYPE_UNKNOWN && GetLastError() != NO_ERROR) { + errno = EBADF; + return -1; + } + if (type != FILE_TYPE_PIPE) + BUG("unsupported file type: %lu", type); + if (!GetNamedPipeHandleState(h, &mode, NULL, NULL, NULL, NULL, 0)) { + errno = err_win_to_posix(GetLastError()); + return -1; + } + mode |= PIPE_NOWAIT; + if (!SetNamedPipeHandleState(h, &mode, NULL, NULL)) { + errno = err_win_to_posix(GetLastError()); + return -1; + } + return 0; +} + #else int enable_pipe_nonblock(int fd) From ec4f39b2333db94096ec2d6b900eabaf4d1e3f1b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 17 Aug 2022 02:06:58 -0400 Subject: [PATCH 3/6] git-compat-util: make MAX_IO_SIZE define globally available We define MAX_IO_SIZE within wrapper.c, but it's useful for any code that wants to do a raw write() for whatever reason (say, because they want different EAGAIN handling). Let's make it available everywhere. The alternative would be adding xwrite_foo() variants to give callers more options. But there's really no reason MAX_IO_SIZE needs to be abstracted away, so this give callers the most flexibility. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- git-compat-util.h | 22 ++++++++++++++++++++++ wrapper.c | 22 ---------------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/git-compat-util.h b/git-compat-util.h index 58d7708296..79977caed6 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -995,6 +995,28 @@ static inline unsigned long cast_size_t_to_ulong(size_t a) return (unsigned long)a; } +/* + * Limit size of IO chunks, because huge chunks only cause pain. OS X + * 64-bit is buggy, returning EINVAL if len >= INT_MAX; and even in + * the absence of bugs, large chunks can result in bad latencies when + * you decide to kill the process. + * + * We pick 8 MiB as our default, but if the platform defines SSIZE_MAX + * that is smaller than that, clip it to SSIZE_MAX, as a call to + * read(2) or write(2) larger than that is allowed to fail. As the last + * resort, we allow a port to pass via CFLAGS e.g. "-DMAX_IO_SIZE=value" + * to override this, if the definition of SSIZE_MAX given by the platform + * is broken. + */ +#ifndef MAX_IO_SIZE +# define MAX_IO_SIZE_DEFAULT (8*1024*1024) +# if defined(SSIZE_MAX) && (SSIZE_MAX < MAX_IO_SIZE_DEFAULT) +# define MAX_IO_SIZE SSIZE_MAX +# else +# define MAX_IO_SIZE MAX_IO_SIZE_DEFAULT +# endif +#endif + #ifdef HAVE_ALLOCA_H # include # define xalloca(size) (alloca(size)) diff --git a/wrapper.c b/wrapper.c index 1c3c970080..f52f3e21cc 100644 --- a/wrapper.c +++ b/wrapper.c @@ -161,28 +161,6 @@ void xsetenv(const char *name, const char *value, int overwrite) die_errno(_("could not setenv '%s'"), name ? name : "(null)"); } -/* - * Limit size of IO chunks, because huge chunks only cause pain. OS X - * 64-bit is buggy, returning EINVAL if len >= INT_MAX; and even in - * the absence of bugs, large chunks can result in bad latencies when - * you decide to kill the process. - * - * We pick 8 MiB as our default, but if the platform defines SSIZE_MAX - * that is smaller than that, clip it to SSIZE_MAX, as a call to - * read(2) or write(2) larger than that is allowed to fail. As the last - * resort, we allow a port to pass via CFLAGS e.g. "-DMAX_IO_SIZE=value" - * to override this, if the definition of SSIZE_MAX given by the platform - * is broken. - */ -#ifndef MAX_IO_SIZE -# define MAX_IO_SIZE_DEFAULT (8*1024*1024) -# if defined(SSIZE_MAX) && (SSIZE_MAX < MAX_IO_SIZE_DEFAULT) -# define MAX_IO_SIZE SSIZE_MAX -# else -# define MAX_IO_SIZE MAX_IO_SIZE_DEFAULT -# endif -#endif - /** * xopen() is the same as open(), but it die()s if the open() fails. */ From 14eab817e499cb047dd8ba21e688257a06d043f0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 17 Aug 2022 02:08:06 -0400 Subject: [PATCH 4/6] pipe_command(): avoid xwrite() for writing to pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If xwrite() sees an EAGAIN response, it will loop forever until the write succeeds (or encounters a real error). This is due to ef1cf0167a (xwrite: poll on non-blocking FDs, 2016-06-26), with the idea that we won't be surprised by a descriptor unexpectedly set as non-blocking. But that will make things awkward when we do want a non-blocking descriptor, and a future patch will switch pipe_command() to using one. In that case, looping on EAGAIN is bad, because the process on the other end of the pipe may be waiting on us before doing another read() on the pipe, which would mean we deadlock. In practice we're not supposed to ever see EAGAIN here, since poll() will have just told us the descriptor is ready for writing. But our Windows emulation of poll() will always return "ready" for writing to a pipe descriptor! This is due to 94f4d01932 (mingw: workaround for hangs when sending STDIN, 2020-02-17). Our best bet in that case is to keep handling other descriptors, as any read() we do may allow the child command to make forward progress (i.e., its write() finishes, and then it read()s from its stdin, freeing up space in the pipe buffer). This means we might busy-loop between poll() and write() on Windows if the child command is slow to read our input, but it's much better than the alternative of deadlocking. In practice, this busy-looping should be rare: - for small inputs, we'll just write the whole thing in a single write() anyway, non-blocking or not - for larger inputs where the child reads input and then processes it before writing (e.g., gpg verifying a signature), we may make a few extra write() calls that get EAGAIN during the initial write, but once it has taken in the whole input, we'll correctly block waiting to read back the data. - for larger inputs where the child process is streaming output back (like a diff filter), we'll likewise see some extra EAGAINs, but most of them will be followed immediately by a read(), which will let the child command make forward progress. Of course it won't happen at all for now, since we don't yet use a non-blocking pipe. This is just preparation for when we do. Helped-by: René Scharfe Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- run-command.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/run-command.c b/run-command.c index 14f17830f5..e078c3046f 100644 --- a/run-command.c +++ b/run-command.c @@ -1364,12 +1364,24 @@ static int pump_io_round(struct io_pump *slots, int nr, struct pollfd *pfd) continue; if (io->type == POLLOUT) { - ssize_t len = xwrite(io->fd, - io->u.out.buf, io->u.out.len); + ssize_t len; + + /* + * Don't use xwrite() here. It loops forever on EAGAIN, + * and we're in our own poll() loop here. + * + * Note that we lose xwrite()'s handling of MAX_IO_SIZE + * and EINTR, so we have to implement those ourselves. + */ + len = write(io->fd, io->u.out.buf, + io->u.out.len <= MAX_IO_SIZE ? + io->u.out.len : MAX_IO_SIZE); if (len < 0) { - io->error = errno; - close(io->fd); - io->fd = -1; + if (errno != EINTR && errno != EAGAIN) { + io->error = errno; + close(io->fd); + io->fd = -1; + } } else { io->u.out.buf += len; io->u.out.len -= len; From c6d3cce6f3c4d1a8d9ebc556c38f1335afdfeb6c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 17 Aug 2022 02:09:42 -0400 Subject: [PATCH 5/6] pipe_command(): handle ENOSPC when writing to a pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When write() to a non-blocking pipe fails because the buffer is full, POSIX says we should see EAGAIN. But our mingw_write() compat layer on Windows actually returns ENOSPC for this case. This is probably something we want to correct, but given that we don't plan to use non-blocking descriptors in a lot of places, we can work around it by just catching ENOSPC alongside EAGAIN. If we ever do fix mingw_write(), then this patch can be reverted. We don't actually use a non-blocking pipe yet, so this is still just preparation. Helped-by: René Scharfe Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- run-command.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/run-command.c b/run-command.c index e078c3046f..5fbaa8b5ac 100644 --- a/run-command.c +++ b/run-command.c @@ -1377,7 +1377,8 @@ static int pump_io_round(struct io_pump *slots, int nr, struct pollfd *pfd) io->u.out.len <= MAX_IO_SIZE ? io->u.out.len : MAX_IO_SIZE); if (len < 0) { - if (errno != EINTR && errno != EAGAIN) { + if (errno != EINTR && errno != EAGAIN && + errno != ENOSPC) { io->error = errno; close(io->fd); io->fd = -1; From 716c1f649e372a0784b9826cd3839e7b373e2ea9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 17 Aug 2022 02:10:22 -0400 Subject: [PATCH 6/6] pipe_command(): mark stdin descriptor as non-blocking Our pipe_command() helper lets you both write to and read from a child process on its stdin/stdout. It's supposed to work without deadlocks because we use poll() to check when descriptors are ready for reading or writing. But there's a bug: if both the data to be written and the data to be read back exceed the pipe buffer, we'll deadlock. The issue is that the code assumes that if you have, say, a 2MB buffer to write and poll() tells you that the pipe descriptor is ready for writing, that calling: write(cmd->in, buf, 2*1024*1024); will do a partial write, filling the pipe buffer and then returning what it did write. And that is what it would do on a socket, but not for a pipe. When writing to a pipe, at least on Linux, it will block waiting for the child process to read() more. And now we have a potential deadlock, because the child may be writing back to us, waiting for us to read() ourselves. An easy way to trigger this is: git -c add.interactive.useBuiltin=true \ -c interactive.diffFilter=cat \ checkout -p HEAD~200 The diff against HEAD~200 will be big, and the filter wants to write all of it back to us (obviously this is a dummy filter, but in the real world something like diff-highlight would similarly stream back a big output). If you set add.interactive.useBuiltin to false, the problem goes away, because now we're not using pipe_command() anymore (instead, that part happens in perl). But this isn't a bug in the interactive code at all. It's the underlying pipe_command() code which is broken, and has been all along. We presumably didn't notice because most calls only do input _or_ output, not both. And the few that do both, like gpg calls, may have large inputs or outputs, but never both at the same time (e.g., consider signing, which has a large payload but a small signature comes back). The obvious fix is to put the descriptor into non-blocking mode, and indeed, that makes the problem go away. Callers shouldn't need to care, because they never see the descriptor (they hand us a buffer to feed into it). The included test fails reliably on Linux without this patch. Curiously, it doesn't fail in our Windows CI environment, but has been reported to do so for individual developers. It should pass in any environment after this patch (courtesy of the compat/ layers added in the last few commits). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- run-command.c | 10 ++++++++++ t/t3701-add-interactive.sh | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/run-command.c b/run-command.c index 5fbaa8b5ac..5ec3a46dcc 100644 --- a/run-command.c +++ b/run-command.c @@ -10,6 +10,7 @@ #include "config.h" #include "packfile.h" #include "hook.h" +#include "compat/nonblock.h" void child_process_init(struct child_process *child) { @@ -1451,6 +1452,15 @@ int pipe_command(struct child_process *cmd, return -1; if (in) { + if (enable_pipe_nonblock(cmd->in) < 0) { + error_errno("unable to make pipe non-blocking"); + close(cmd->in); + if (out) + close(cmd->out); + if (err) + close(cmd->err); + return -1; + } io[nr].fd = cmd->in; io[nr].type = POLLOUT; io[nr].u.out.buf = in; diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh index b354fb39de..3b7df9bed5 100755 --- a/t/t3701-add-interactive.sh +++ b/t/t3701-add-interactive.sh @@ -766,6 +766,19 @@ test_expect_success 'detect bogus diffFilter output' ' force_color test_must_fail git add -p test && + test_config interactive.diffFilter cat && + printf y >y && + force_color git add -p >output 2>&1