From 452efd11fbf607ad12854edf1488112a7e4790d2 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 10 Dec 2019 20:00:20 +0000 Subject: [PATCH 1/9] t3011: demonstrate directory traversal failures Add several tests demonstrating directory traversal failures of various sorts in dir.c (and one similar looking test that turns out to be a git_fnmatch bug). A lot of these tests look like near duplicates of each other, but an optimization path in dir.c to pre-descend into a common prefix and the specialized treatment of trailing slashes in dir.c mean the tiny differences are sometimes important and potentially cause different codepaths to be explored. Of the 7 failing tests, 2 are new to git-2.24.0 (tweaked by side effects of the en/clean-nested-with-ignored-topic); the other 5 also failed under git-2.23.0 and earlier. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- ...common-prefixes-and-directory-traversal.sh | 209 ++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100755 t/t3011-common-prefixes-and-directory-traversal.sh diff --git a/t/t3011-common-prefixes-and-directory-traversal.sh b/t/t3011-common-prefixes-and-directory-traversal.sh new file mode 100755 index 0000000000..54f80c62b8 --- /dev/null +++ b/t/t3011-common-prefixes-and-directory-traversal.sh @@ -0,0 +1,209 @@ +#!/bin/sh + +test_description='directory traversal handling, especially with common prefixes' + +. ./test-lib.sh + +test_expect_success 'setup' ' + test_commit hello && + + >empty && + mkdir untracked_dir && + >untracked_dir/empty && + git init untracked_repo && + >untracked_repo/empty && + + cat <<-EOF >.gitignore && + ignored + an_ignored_dir/ + EOF + mkdir an_ignored_dir && + mkdir an_untracked_dir && + >an_ignored_dir/ignored && + >an_ignored_dir/untracked && + >an_untracked_dir/ignored && + >an_untracked_dir/untracked +' + +test_expect_success 'git ls-files -o shows the right entries' ' + cat <<-EOF >expect && + .gitignore + actual + an_ignored_dir/ignored + an_ignored_dir/untracked + an_untracked_dir/ignored + an_untracked_dir/untracked + empty + expect + untracked_dir/empty + untracked_repo/ + EOF + git ls-files -o >actual && + test_cmp expect actual +' + +test_expect_success 'git ls-files -o --exclude-standard shows the right entries' ' + cat <<-EOF >expect && + .gitignore + actual + an_untracked_dir/untracked + empty + expect + untracked_dir/empty + untracked_repo/ + EOF + git ls-files -o --exclude-standard >actual && + test_cmp expect actual +' + +test_expect_success 'git ls-files -o untracked_dir recurses' ' + echo untracked_dir/empty >expect && + git ls-files -o untracked_dir >actual && + test_cmp expect actual +' + +test_expect_success 'git ls-files -o untracked_dir/ recurses' ' + echo untracked_dir/empty >expect && + git ls-files -o untracked_dir/ >actual && + test_cmp expect actual +' + +test_expect_success 'git ls-files -o --directory untracked_dir does not recurse' ' + echo untracked_dir/ >expect && + git ls-files -o --directory untracked_dir >actual && + test_cmp expect actual +' + +test_expect_failure 'git ls-files -o --directory untracked_dir/ does not recurse' ' + echo untracked_dir/ >expect && + git ls-files -o --directory untracked_dir/ >actual && + test_cmp expect actual +' + +test_expect_success 'git ls-files -o untracked_repo does not recurse' ' + echo untracked_repo/ >expect && + git ls-files -o untracked_repo >actual && + test_cmp expect actual +' + +test_expect_failure 'git ls-files -o untracked_repo/ does not recurse' ' + echo untracked_repo/ >expect && + git ls-files -o untracked_repo/ >actual && + test_cmp expect actual +' + +test_expect_failure 'git ls-files -o untracked_dir untracked_repo recurses into untracked_dir only' ' + cat <<-EOF >expect && + untracked_dir/empty + untracked_repo/ + EOF + git ls-files -o untracked_dir untracked_repo >actual && + test_cmp expect actual +' + +test_expect_success 'git ls-files -o untracked_dir/ untracked_repo/ recurses into untracked_dir only' ' + cat <<-EOF >expect && + untracked_dir/empty + untracked_repo/ + EOF + git ls-files -o untracked_dir/ untracked_repo/ >actual && + test_cmp expect actual +' + +test_expect_failure 'git ls-files -o --directory untracked_dir untracked_repo does not recurse' ' + cat <<-EOF >expect && + untracked_dir/ + untracked_repo/ + EOF + git ls-files -o --directory untracked_dir untracked_repo >actual && + test_cmp expect actual +' + +test_expect_success 'git ls-files -o --directory untracked_dir/ untracked_repo/ does not recurse' ' + cat <<-EOF >expect && + untracked_dir/ + untracked_repo/ + EOF + git ls-files -o --directory untracked_dir/ untracked_repo/ >actual && + test_cmp expect actual +' + +test_expect_success 'git ls-files -o .git shows nothing' ' + git ls-files -o .git >actual && + test_must_be_empty actual +' + +test_expect_failure 'git ls-files -o .git/ shows nothing' ' + git ls-files -o .git/ >actual && + test_must_be_empty actual +' + +test_expect_success FUNNYNAMES 'git ls-files -o untracked_* recurses appropriately' ' + mkdir "untracked_*" && + >"untracked_*/empty" && + + cat <<-EOF >expect && + untracked_*/empty + untracked_dir/empty + untracked_repo/ + EOF + git ls-files -o "untracked_*" >actual && + test_cmp expect actual +' + +# It turns out fill_directory returns the right paths, but ls-files' post-call +# filtering in show_dir_entry() via calling dir_path_match() which ends up +# in git_fnmatch() has logic for PATHSPEC_ONESTAR that assumes the pathspec +# must match the full path; it doesn't check it for matching a leading +# directory. +test_expect_failure FUNNYNAMES 'git ls-files -o untracked_*/ recurses appropriately' ' + cat <<-EOF >expect && + untracked_*/empty + untracked_dir/empty + untracked_repo/ + EOF + git ls-files -o "untracked_*/" >actual && + test_cmp expect actual +' + +test_expect_success FUNNYNAMES 'git ls-files -o --directory untracked_* does not recurse' ' + cat <<-EOF >expect && + untracked_*/ + untracked_dir/ + untracked_repo/ + EOF + git ls-files -o --directory "untracked_*" >actual && + test_cmp expect actual +' + +test_expect_success FUNNYNAMES 'git ls-files -o --directory untracked_*/ does not recurse' ' + cat <<-EOF >expect && + untracked_*/ + untracked_dir/ + untracked_repo/ + EOF + git ls-files -o --directory "untracked_*/" >actual && + test_cmp expect actual +' + +test_expect_success 'git ls-files -o consistent between one or two dirs' ' + git ls-files -o --exclude-standard an_ignored_dir/ an_untracked_dir/ >tmp && + ! grep ^an_ignored_dir/ tmp >expect && + git ls-files -o --exclude-standard an_ignored_dir/ >actual && + test_cmp expect actual +' + +# ls-files doesn't have a way to request showing both untracked and ignored +# files at the same time, so use `git status --ignored` +test_expect_failure 'git status --ignored shows same files under dir with or without pathspec' ' + cat <<-EOF >expect && + ?? an_untracked_dir/ + !! an_untracked_dir/ignored + EOF + git status --porcelain --ignored >output && + grep an_untracked_dir output >expect && + git status --porcelain --ignored an_untracked_dir/ >actual && + test_cmp expect actual +' + +test_done From a2b13367fe55bdeb10862f41aff3e2446b63e171 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 10 Dec 2019 20:00:21 +0000 Subject: [PATCH 2/9] Revert "dir.c: make 'git-status --ignored' work within leading directories" Commit be8a84c52669 ("dir.c: make 'git-status --ignored' work within leading directories", 2013-04-15) noted that git status --ignored would not list ignored files and directories within if was untracked, and modified the behavior to make it show them. However, it did so via a hack that broke consistency; it would show paths under differently than a simple git status --ignored | grep would show them. A correct fix is slightly more involved, and complicated slightly by this hack, so we revert this commit (but keep corrected versions of the testcases) and will later fix the original bug with a subsequent patch. Some history may be helpful: A very, very similar case to the commit we are reverting was raised in commit 48ffef966c76 ("ls-files: fix overeager pathspec optimization", 2010-01-08); but it actually went in somewhat the opposite direction. In that commit, it mentioned how git ls-files -o --exclude-standard t/ used to show untracked files under t/ even when t/ was ignored, and then changed the behavior to stop showing untracked files under an ignored directory. More importantly, this commit considered keeping this behavior but noted that it would be inconsistent with the behavior when multiple pathspecs were specified and thus rejected it. The reason for this whole inconsistency when one pathspec is specified versus zero or two is because common prefixes of pathspecs are sent through a different set of checks (in treat_leading_path()) than normal file/directory traversal (those go through read_directory_recursive() and treat_path()). As such, for consistency, one needs to check that both codepaths produce the same result. Revert commit be8a84c526691667fc04a8241d93a3de1de298ab, except instead of removing the testcase it added, modify it to check for correct and consistent behavior. A subsequent patch in this series will fix the testcase. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.c | 3 --- t/t7061-wtstatus-ignore.sh | 9 +++++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/dir.c b/dir.c index 61f559f980..0dd5266629 100644 --- a/dir.c +++ b/dir.c @@ -2083,14 +2083,12 @@ static int treat_leading_path(struct dir_struct *dir, struct strbuf sb = STRBUF_INIT; int baselen, rc = 0; const char *cp; - int old_flags = dir->flags; while (len && path[len - 1] == '/') len--; if (!len) return 1; baselen = 0; - dir->flags &= ~DIR_SHOW_OTHER_DIRECTORIES; while (1) { cp = path + baselen + !!baselen; cp = memchr(cp, '/', path + len - cp); @@ -2113,7 +2111,6 @@ static int treat_leading_path(struct dir_struct *dir, } } strbuf_release(&sb); - dir->flags = old_flags; return rc; } diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index 0c394cf995..84366050da 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -43,11 +43,16 @@ test_expect_success 'status untracked directory with --ignored -u' ' test_cmp expected actual ' cat >expected <<\EOF -?? untracked/uncommitted +?? untracked/ !! untracked/ignored EOF -test_expect_success 'status prefixed untracked directory with --ignored' ' +test_expect_failure 'status of untracked directory with --ignored works with or without prefix' ' + git status --porcelain --ignored >tmp && + grep untracked/ tmp >actual && + rm tmp && + test_cmp expected actual && + git status --porcelain --ignored untracked/ >actual && test_cmp expected actual ' From 2f5d3847d4ed1b6d6c6d2a2e6726cfcda7d361e5 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 10 Dec 2019 20:00:22 +0000 Subject: [PATCH 3/9] dir: remove stray quote character in comment Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dir.c b/dir.c index 0dd5266629..5dacacd469 100644 --- a/dir.c +++ b/dir.c @@ -373,7 +373,7 @@ static int match_pathspec_item(const struct index_state *istate, !ps_strncmp(item, match, name, namelen)) return MATCHED_RECURSIVELY_LEADING_PATHSPEC; - /* name" doesn't match up to the first wild character */ + /* name doesn't match up to the first wild character */ if (item->nowildcard_len < item->len && ps_strncmp(item, match, name, item->nowildcard_len - prefix)) From 072a231016e5da347c3a8ff38afb72e7876dd1d7 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 10 Dec 2019 20:00:23 +0000 Subject: [PATCH 4/9] dir: exit before wildcard fall-through if there is no wildcard The DO_MATCH_LEADING_PATHSPEC had a fall-through case for if there was a wildcard, noting that we don't yet have enough information to determine if a further paths under the current directory might match due to the presence of wildcards. But if we have no wildcards in our pathspec, then we shouldn't get to that fall-through case. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.c | 7 +++++++ t/t3011-common-prefixes-and-directory-traversal.sh | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/dir.c b/dir.c index 5dacacd469..517a569e10 100644 --- a/dir.c +++ b/dir.c @@ -379,6 +379,13 @@ static int match_pathspec_item(const struct index_state *istate, item->nowildcard_len - prefix)) return 0; + /* + * name has no wildcard, and it didn't match as a leading + * pathspec so return. + */ + if (item->nowildcard_len == item->len) + return 0; + /* * Here is where we would perform a wildmatch to check if * "name" can be matched as a directory (or a prefix) against diff --git a/t/t3011-common-prefixes-and-directory-traversal.sh b/t/t3011-common-prefixes-and-directory-traversal.sh index 54f80c62b8..d6e161ddd8 100755 --- a/t/t3011-common-prefixes-and-directory-traversal.sh +++ b/t/t3011-common-prefixes-and-directory-traversal.sh @@ -92,7 +92,7 @@ test_expect_failure 'git ls-files -o untracked_repo/ does not recurse' ' test_cmp expect actual ' -test_expect_failure 'git ls-files -o untracked_dir untracked_repo recurses into untracked_dir only' ' +test_expect_success 'git ls-files -o untracked_dir untracked_repo recurses into untracked_dir only' ' cat <<-EOF >expect && untracked_dir/empty untracked_repo/ @@ -110,7 +110,7 @@ test_expect_success 'git ls-files -o untracked_dir/ untracked_repo/ recurses int test_cmp expect actual ' -test_expect_failure 'git ls-files -o --directory untracked_dir untracked_repo does not recurse' ' +test_expect_success 'git ls-files -o --directory untracked_dir untracked_repo does not recurse' ' cat <<-EOF >expect && untracked_dir/ untracked_repo/ From c5c4eddd56ccd2ffd6a193856a660573993e9a41 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 10 Dec 2019 20:00:24 +0000 Subject: [PATCH 5/9] dir: break part of read_directory_recursive() out for reuse Create an add_path_to_appropriate_result_list() function from the code at the end of read_directory_recursive() so we can use it elsewhere. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.c | 60 ++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/dir.c b/dir.c index 517a569e10..645b44ea64 100644 --- a/dir.c +++ b/dir.c @@ -1932,6 +1932,40 @@ static void close_cached_dir(struct cached_dir *cdir) } } +static void add_path_to_appropriate_result_list(struct dir_struct *dir, + struct untracked_cache_dir *untracked, + struct cached_dir *cdir, + struct index_state *istate, + struct strbuf *path, + int baselen, + const struct pathspec *pathspec, + enum path_treatment state) +{ + /* add the path to the appropriate result list */ + switch (state) { + case path_excluded: + if (dir->flags & DIR_SHOW_IGNORED) + dir_add_name(dir, istate, path->buf, path->len); + else if ((dir->flags & DIR_SHOW_IGNORED_TOO) || + ((dir->flags & DIR_COLLECT_IGNORED) && + exclude_matches_pathspec(path->buf, path->len, + pathspec))) + dir_add_ignored(dir, istate, path->buf, path->len); + break; + + case path_untracked: + if (dir->flags & DIR_SHOW_IGNORED) + break; + dir_add_name(dir, istate, path->buf, path->len); + if (cdir->fdir) + add_untracked(untracked, path->buf + baselen); + break; + + default: + break; + } +} + /* * Read a directory tree. We currently ignore anything but * directories, regular files and symlinks. That's because git @@ -2035,29 +2069,9 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, continue; } - /* add the path to the appropriate result list */ - switch (state) { - case path_excluded: - if (dir->flags & DIR_SHOW_IGNORED) - dir_add_name(dir, istate, path.buf, path.len); - else if ((dir->flags & DIR_SHOW_IGNORED_TOO) || - ((dir->flags & DIR_COLLECT_IGNORED) && - exclude_matches_pathspec(path.buf, path.len, - pathspec))) - dir_add_ignored(dir, istate, path.buf, path.len); - break; - - case path_untracked: - if (dir->flags & DIR_SHOW_IGNORED) - break; - dir_add_name(dir, istate, path.buf, path.len); - if (cdir.fdir) - add_untracked(untracked, path.buf + baselen); - break; - - default: - break; - } + add_path_to_appropriate_result_list(dir, untracked, &cdir, + istate, &path, baselen, + pathspec, state); } close_cached_dir(&cdir); out: From b9670c1f5e6b98837c489a03ac0d343d30e08505 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 19 Dec 2019 21:28:24 +0000 Subject: [PATCH 6/9] dir: fix checks on common prefix directory Many years ago, the directory traversing logic had an optimization that would always recurse into any directory that was a common prefix of all the pathspecs without walking the leading directories to get down to the desired directory. Thus, git ls-files -o .git/ # case A would notice that .git/ was a common prefix of all pathspecs (since it is the only pathspec listed), and then traverse into it and start showing unknown files under that directory. Unfortunately, .git/ is not a directory we should be traversing into, which made this optimization problematic. This also affected cases like git ls-files -o --exclude-standard t/ # case B where t/ was in the .gitignore file and thus isn't interesting and shouldn't be recursed into. It also affected cases like git ls-files -o --directory untracked_dir/ # case C where untracked_dir/ is indeed untracked and thus interesting, but the --directory flag means we only want to show the directory itself, not recurse into it and start listing untracked files below it. The case B class of bugs were noted and fixed in commits 16e2cfa90993 ("read_directory(): further split treat_path()", 2010-01-08) and 48ffef966c76 ("ls-files: fix overeager pathspec optimization", 2010-01-08), with the idea being that we first wanted to check whether the common prefix was interesting. The former patch noted that treat_path() couldn't be used when checking the common prefix because treat_path() requires a dir_entry() and we haven't read any directories at the point we are checking the common prefix. So, that patch split treat_one_path() out of treat_path(). The latter patch then created a new treat_leading_path() which duplicated by hand the bits of treat_path() that couldn't be broken out and then called treat_one_path() for the remainder. There were three problems with this approach: * The duplicated logic in treat_leading_path() accidentally missed the check for special paths (such as is_dot_or_dotdot and matching ".git"), causing case A types of bugs to continue to be an issue. * The treat_leading_path() logic assumed we should traverse into anything where path_treatment was not path_none, i.e. it perpetuated class C types of bugs. * It meant we had split logic that needed to kept in sync, running the risk that people introduced new inconsistencies (such as in commit be8a84c52669, which we reverted earlier in this series, or in commit df5bcdf83ae which we'll fix in a subsequent commit) Fix most these problems by making treat_leading_path() not only loop over each leading path component, but calling treat_path() directly on each. To do so, we have to create a synthetic dir_entry, but that only takes a few lines. Then, pay attention to the path_treatment result we get from treat_path() and don't treat path_excluded, path_untracked, and path_recurse all the same as path_recurse. This leaves one remaining problem, the new inconsistency from commit df5bcdf83ae. That will be addressed in a subsequent commit. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.c | 67 ++++++++++++++++--- ...common-prefixes-and-directory-traversal.sh | 6 +- 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/dir.c b/dir.c index 645b44ea64..a42cc2aa8c 100644 --- a/dir.c +++ b/dir.c @@ -2102,37 +2102,82 @@ static int treat_leading_path(struct dir_struct *dir, const struct pathspec *pathspec) { struct strbuf sb = STRBUF_INIT; - int baselen, rc = 0; + int prevlen, baselen; const char *cp; + struct cached_dir cdir; + struct dirent *de; + enum path_treatment state = path_none; + + /* + * For each directory component of path, we are going to check whether + * that path is relevant given the pathspec. For example, if path is + * foo/bar/baz/ + * then we will ask treat_path() whether we should go into foo, then + * whether we should go into bar, then whether baz is relevant. + * Checking each is important because e.g. if path is + * .git/info/ + * then we need to check .git to know we shouldn't traverse it. + * If the return from treat_path() is: + * * path_none, for any path, we return false. + * * path_recurse, for all path components, we return true + * * for some intermediate component, we make sure + * to add that path to the relevant list but return false + * signifying that we shouldn't recurse into it. + */ while (len && path[len - 1] == '/') len--; if (!len) return 1; + + /* + * We need a manufactured dirent with sufficient space to store a + * leading directory component of path in its d_name. Here, we + * assume that the dirent's d_name is either declared as + * char d_name[BIG_ENOUGH] + * or that it is declared at the end of the struct as + * char d_name[] + * For either case, padding with len+1 bytes at the end will ensure + * sufficient storage space. + */ + de = xcalloc(1, sizeof(struct dirent)+len+1); + memset(&cdir, 0, sizeof(cdir)); + cdir.de = de; +#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT) + de->d_type = DT_DIR; +#endif baselen = 0; + prevlen = 0; while (1) { - cp = path + baselen + !!baselen; + prevlen = baselen + !!baselen; + cp = path + prevlen; cp = memchr(cp, '/', path + len - cp); if (!cp) baselen = len; else baselen = cp - path; - strbuf_setlen(&sb, 0); + strbuf_reset(&sb); strbuf_add(&sb, path, baselen); if (!is_directory(sb.buf)) break; - if (simplify_away(sb.buf, sb.len, pathspec)) - break; - if (treat_one_path(dir, NULL, istate, &sb, baselen, pathspec, - DT_DIR, NULL) == path_none) + strbuf_reset(&sb); + strbuf_add(&sb, path, prevlen); + memcpy(de->d_name, path+prevlen, baselen-prevlen); + de->d_name[baselen-prevlen] = '\0'; + state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen, + pathspec); + if (state != path_recurse) break; /* do not recurse into it */ - if (len <= baselen) { - rc = 1; + if (len <= baselen) break; /* finished checking */ - } } + add_path_to_appropriate_result_list(dir, NULL, &cdir, istate, + &sb, baselen, pathspec, + state); + + free(de); strbuf_release(&sb); - return rc; + return state == path_recurse; } static const char *get_ident_string(void) diff --git a/t/t3011-common-prefixes-and-directory-traversal.sh b/t/t3011-common-prefixes-and-directory-traversal.sh index d6e161ddd8..098fddc75b 100755 --- a/t/t3011-common-prefixes-and-directory-traversal.sh +++ b/t/t3011-common-prefixes-and-directory-traversal.sh @@ -74,7 +74,7 @@ test_expect_success 'git ls-files -o --directory untracked_dir does not recurse' test_cmp expect actual ' -test_expect_failure 'git ls-files -o --directory untracked_dir/ does not recurse' ' +test_expect_success 'git ls-files -o --directory untracked_dir/ does not recurse' ' echo untracked_dir/ >expect && git ls-files -o --directory untracked_dir/ >actual && test_cmp expect actual @@ -86,7 +86,7 @@ test_expect_success 'git ls-files -o untracked_repo does not recurse' ' test_cmp expect actual ' -test_expect_failure 'git ls-files -o untracked_repo/ does not recurse' ' +test_expect_success 'git ls-files -o untracked_repo/ does not recurse' ' echo untracked_repo/ >expect && git ls-files -o untracked_repo/ >actual && test_cmp expect actual @@ -133,7 +133,7 @@ test_expect_success 'git ls-files -o .git shows nothing' ' test_must_be_empty actual ' -test_expect_failure 'git ls-files -o .git/ shows nothing' ' +test_expect_success 'git ls-files -o .git/ shows nothing' ' git ls-files -o .git/ >actual && test_must_be_empty actual ' From 777b420347649f26022bb1a4bf7afe7c4fe0b090 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 19 Dec 2019 21:28:25 +0000 Subject: [PATCH 7/9] dir: synchronize treat_leading_path() and read_directory_recursive() Our optimization to avoid calling into read_directory_recursive() when all pathspecs have a common leading directory mean that we need to match the logic that read_directory_recursive() would use if we had just called it from the root. Since it does more than call treat_path() we need to copy that same logic. Alternatively, we could try to change treat_path to return path_recurse for an untracked directory under the given special circumstances that this logic checks for, but a simple switch results in many test failures such as 'git clean -d' not wiping out untracked but empty directories. To work around that, we'd need the caller of treat_path to check for path_recurse and sometimes special case it into path_untracked. In other words, we'd still have extra logic in both places. Needing to duplicate logic like this means it is guaranteed someone will eventually need to make further changes and forget to update both locations. It is tempting to just nuke the leading_directory special casing to avoid such bugs and simplify the code, but unpack_trees' verify_clean_subdirectory() also calls read_directory() and does so with a non-empty leading path, so I'm hesitant to try to restructure further. Add obnoxious warnings to treat_leading_path() and read_directory_recursive() to try to warn people of such problems. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.c | 30 +++++++++++++++++++ ...common-prefixes-and-directory-traversal.sh | 2 +- t/t7061-wtstatus-ignore.sh | 2 +- 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/dir.c b/dir.c index a42cc2aa8c..357f9593c4 100644 --- a/dir.c +++ b/dir.c @@ -1990,6 +1990,15 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, struct untracked_cache_dir *untracked, int check_only, int stop_at_first_file, const struct pathspec *pathspec) { + /* + * WARNING WARNING WARNING: + * + * Any updates to the traversal logic here may need corresponding + * updates in treat_leading_path(). See the commit message for the + * commit adding this warning as well as the commit preceding it + * for details. + */ + struct cached_dir cdir; enum path_treatment state, subdir_state, dir_state = path_none; struct strbuf path = STRBUF_INIT; @@ -2101,6 +2110,15 @@ static int treat_leading_path(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec) { + /* + * WARNING WARNING WARNING: + * + * Any updates to the traversal logic here may need corresponding + * updates in treat_leading_path(). See the commit message for the + * commit adding this warning as well as the commit preceding it + * for details. + */ + struct strbuf sb = STRBUF_INIT; int prevlen, baselen; const char *cp; @@ -2166,6 +2184,18 @@ static int treat_leading_path(struct dir_struct *dir, de->d_name[baselen-prevlen] = '\0'; state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen, pathspec); + if (state == path_untracked && + get_dtype(cdir.de, istate, sb.buf, sb.len) == DT_DIR && + (dir->flags & DIR_SHOW_IGNORED_TOO || + do_match_pathspec(istate, pathspec, sb.buf, sb.len, + baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)) { + add_path_to_appropriate_result_list(dir, NULL, &cdir, + istate, + &sb, baselen, + pathspec, state); + state = path_recurse; + } + if (state != path_recurse) break; /* do not recurse into it */ if (len <= baselen) diff --git a/t/t3011-common-prefixes-and-directory-traversal.sh b/t/t3011-common-prefixes-and-directory-traversal.sh index 098fddc75b..3da5b2b6e7 100755 --- a/t/t3011-common-prefixes-and-directory-traversal.sh +++ b/t/t3011-common-prefixes-and-directory-traversal.sh @@ -195,7 +195,7 @@ test_expect_success 'git ls-files -o consistent between one or two dirs' ' # ls-files doesn't have a way to request showing both untracked and ignored # files at the same time, so use `git status --ignored` -test_expect_failure 'git status --ignored shows same files under dir with or without pathspec' ' +test_expect_success 'git status --ignored shows same files under dir with or without pathspec' ' cat <<-EOF >expect && ?? an_untracked_dir/ !! an_untracked_dir/ignored diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index 84366050da..e4cf5484f9 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -47,7 +47,7 @@ cat >expected <<\EOF !! untracked/ignored EOF -test_expect_failure 'status of untracked directory with --ignored works with or without prefix' ' +test_expect_success 'status of untracked directory with --ignored works with or without prefix' ' git status --porcelain --ignored >tmp && grep untracked/ tmp >actual && rm tmp && From c847dfafeee8b0fe3e053ac307de88e04d1ad072 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 19 Dec 2019 21:28:26 +0000 Subject: [PATCH 8/9] dir: consolidate similar code in treat_directory() Both the DIR_SKIP_NESTED_GIT and DIR_NO_GITLINKS cases were checking for whether a path was actually a nonbare repository. That code could be shared, with just the result of how to act differing between the two cases. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- dir.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/dir.c b/dir.c index 357f9593c4..e1b74f6478 100644 --- a/dir.c +++ b/dir.c @@ -1461,6 +1461,8 @@ static enum path_treatment treat_directory(struct dir_struct *dir, const char *dirname, int len, int baselen, int exclude, const struct pathspec *pathspec) { + int nested_repo = 0; + /* The "len-1" is to strip the final '/' */ switch (directory_exists_in_index(istate, dirname, len-1)) { case index_directory: @@ -1470,15 +1472,16 @@ static enum path_treatment treat_directory(struct dir_struct *dir, return path_none; case index_nonexistent: - if (dir->flags & DIR_SKIP_NESTED_GIT) { - int nested_repo; + if ((dir->flags & DIR_SKIP_NESTED_GIT) || + !(dir->flags & DIR_NO_GITLINKS)) { struct strbuf sb = STRBUF_INIT; strbuf_addstr(&sb, dirname); nested_repo = is_nonbare_repository_dir(&sb); strbuf_release(&sb); - if (nested_repo) - return path_none; } + if (nested_repo) + return ((dir->flags & DIR_SKIP_NESTED_GIT) ? path_none : + (exclude ? path_excluded : path_untracked)); if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES) break; @@ -1506,13 +1509,6 @@ static enum path_treatment treat_directory(struct dir_struct *dir, return path_none; } - if (!(dir->flags & DIR_NO_GITLINKS)) { - struct strbuf sb = STRBUF_INIT; - strbuf_addstr(&sb, dirname); - if (is_nonbare_repository_dir(&sb)) - return exclude ? path_excluded : path_untracked; - strbuf_release(&sb); - } return path_recurse; } From 6836d2fe06cea750ba7364895f8f37c32a34408c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 20 Dec 2019 09:55:53 -0800 Subject: [PATCH 9/9] dir.c: use st_add3() for allocation size When preparing a manufactured dirent instance, we add a length of path to the size of struct to decide how many bytes to allocate. Make sure this addition does not wrap-around to cause us underallocate. Suggested-by: Jeff King Signed-off-by: Junio C Hamano --- dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dir.c b/dir.c index e1b74f6478..113170aeb9 100644 --- a/dir.c +++ b/dir.c @@ -2154,7 +2154,7 @@ static int treat_leading_path(struct dir_struct *dir, * For either case, padding with len+1 bytes at the end will ensure * sufficient storage space. */ - de = xcalloc(1, sizeof(struct dirent)+len+1); + de = xcalloc(1, st_add3(sizeof(struct dirent), len, 1)); memset(&cdir, 0, sizeof(cdir)); cdir.de = de; #if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)