From fb0882648e0d624f825974aa7030e56daf6de2af Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Sat, 23 Jan 2021 19:58:11 +0000 Subject: [PATCH 1/9] cache-tree: clean up cache_tree_update() Make the method safer by allocating a cache_tree member for the given index_state if it is not already present. This is preferrable to a BUG() statement or returning with an error because future callers will want to populate an empty cache-tree using this method. Callers can also remove their conditional allocations of cache_tree. Also drop local variables that can be found directly from the 'istate' parameter. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/checkout.c | 3 --- cache-tree.c | 17 +++++++++-------- sequencer.c | 3 --- unpack-trees.c | 2 -- 4 files changed, 9 insertions(+), 16 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index c9ba23c279..2d6550bc3c 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -821,9 +821,6 @@ static int merge_working_tree(const struct checkout_opts *opts, } } - if (!active_cache_tree) - active_cache_tree = cache_tree(); - if (!cache_tree_fully_valid(active_cache_tree)) cache_tree_update(&the_index, WRITE_TREE_SILENT | WRITE_TREE_REPAIR); diff --git a/cache-tree.c b/cache-tree.c index 3f1a8d4f1b..60b6aefbf5 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -436,16 +436,20 @@ static int update_one(struct cache_tree *it, int cache_tree_update(struct index_state *istate, int flags) { - struct cache_tree *it = istate->cache_tree; - struct cache_entry **cache = istate->cache; - int entries = istate->cache_nr; - int skip, i = verify_cache(cache, entries, flags); + int skip, i; + + i = verify_cache(istate->cache, istate->cache_nr, flags); if (i) return i; + + if (!istate->cache_tree) + istate->cache_tree = cache_tree(); + trace_performance_enter(); trace2_region_enter("cache_tree", "update", the_repository); - i = update_one(it, cache, entries, "", 0, &skip, flags); + i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, + "", 0, &skip, flags); trace2_region_leave("cache_tree", "update", the_repository); trace_performance_leave("cache_tree_update"); if (i < 0) @@ -635,9 +639,6 @@ static int write_index_as_tree_internal(struct object_id *oid, cache_tree_valid = 0; } - if (!index_state->cache_tree) - index_state->cache_tree = cache_tree(); - if (!cache_tree_valid && cache_tree_update(index_state, flags) < 0) return WRITE_TREE_UNMERGED_INDEX; diff --git a/sequencer.c b/sequencer.c index 8909a46770..aa3e4c81cf 100644 --- a/sequencer.c +++ b/sequencer.c @@ -679,9 +679,6 @@ static int do_recursive_merge(struct repository *r, static struct object_id *get_cache_tree_oid(struct index_state *istate) { - if (!istate->cache_tree) - istate->cache_tree = cache_tree(); - if (!cache_tree_fully_valid(istate->cache_tree)) if (cache_tree_update(istate, 0)) { error(_("unable to update cache tree")); diff --git a/unpack-trees.c b/unpack-trees.c index af6e9b9c2f..a810b79657 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1726,8 +1726,6 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options if (!ret) { if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)) cache_tree_verify(the_repository, &o->result); - if (!o->result.cache_tree) - o->result.cache_tree = cache_tree(); if (!cache_tree_fully_valid(o->result.cache_tree)) cache_tree_update(&o->result, WRITE_TREE_SILENT | From 8d87e338e16e022545638a0e9a3e15c6bdb56111 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Sat, 23 Jan 2021 19:58:12 +0000 Subject: [PATCH 2/9] cache-tree: simplify verify_cache() prototype The verify_cache() method takes an array of cache entries and a count, but these are always provided directly from a struct index_state. Use a pointer to the full structure instead. There is a subtle point when istate->cache_nr is zero that subtracting one will underflow. This triggers a failure in t0000-basic.sh, among others. Use "i + 1 < istate->cache_nr" to avoid these strange comparisons. Convert i to be unsigned as well, which also removes the potential signed overflow in the unlikely case that cache_nr is over 2.1 billion entries. The 'funny' variable has a maximum value of 11, so making it unsigned does not change anything of importance. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- cache-tree.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index 60b6aefbf5..acac6d58c3 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -151,16 +151,15 @@ void cache_tree_invalidate_path(struct index_state *istate, const char *path) istate->cache_changed |= CACHE_TREE_CHANGED; } -static int verify_cache(struct cache_entry **cache, - int entries, int flags) +static int verify_cache(struct index_state *istate, int flags) { - int i, funny; + unsigned i, funny; int silent = flags & WRITE_TREE_SILENT; /* Verify that the tree is merged */ funny = 0; - for (i = 0; i < entries; i++) { - const struct cache_entry *ce = cache[i]; + for (i = 0; i < istate->cache_nr; i++) { + const struct cache_entry *ce = istate->cache[i]; if (ce_stage(ce)) { if (silent) return -1; @@ -180,13 +179,13 @@ static int verify_cache(struct cache_entry **cache, * stage 0 entries. */ funny = 0; - for (i = 0; i < entries - 1; i++) { + for (i = 0; i + 1 < istate->cache_nr; i++) { /* path/file always comes after path because of the way * the cache is sorted. Also path can appear only once, * which means conflicting one would immediately follow. */ - const struct cache_entry *this_ce = cache[i]; - const struct cache_entry *next_ce = cache[i + 1]; + const struct cache_entry *this_ce = istate->cache[i]; + const struct cache_entry *next_ce = istate->cache[i + 1]; const char *this_name = this_ce->name; const char *next_name = next_ce->name; int this_len = ce_namelen(this_ce); @@ -438,7 +437,7 @@ int cache_tree_update(struct index_state *istate, int flags) { int skip, i; - i = verify_cache(istate->cache, istate->cache_nr, flags); + i = verify_cache(istate, flags); if (i) return i; From c80dd3967f28527dab49c8e9525524c7f33baa22 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Sat, 23 Jan 2021 19:58:13 +0000 Subject: [PATCH 3/9] cache-tree: extract subtree_pos() This method will be helpful to use outside of cache-tree.c in a later feature. The implementation is subtle due to subtree_name_cmp() sorting by length and then lexicographically. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- cache-tree.c | 6 +++--- cache-tree.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index acac6d58c3..2fb483d3c0 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -45,7 +45,7 @@ static int subtree_name_cmp(const char *one, int onelen, return memcmp(one, two, onelen); } -static int subtree_pos(struct cache_tree *it, const char *path, int pathlen) +int cache_tree_subtree_pos(struct cache_tree *it, const char *path, int pathlen) { struct cache_tree_sub **down = it->down; int lo, hi; @@ -72,7 +72,7 @@ static struct cache_tree_sub *find_subtree(struct cache_tree *it, int create) { struct cache_tree_sub *down; - int pos = subtree_pos(it, path, pathlen); + int pos = cache_tree_subtree_pos(it, path, pathlen); if (0 <= pos) return it->down[pos]; if (!create) @@ -123,7 +123,7 @@ static int do_invalidate_path(struct cache_tree *it, const char *path) it->entry_count = -1; if (!*slash) { int pos; - pos = subtree_pos(it, path, namelen); + pos = cache_tree_subtree_pos(it, path, namelen); if (0 <= pos) { cache_tree_free(&it->down[pos]->cache_tree); free(it->down[pos]); diff --git a/cache-tree.h b/cache-tree.h index 639bfa5340..8efeccebfc 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -27,6 +27,8 @@ void cache_tree_free(struct cache_tree **); void cache_tree_invalidate_path(struct index_state *, const char *); struct cache_tree_sub *cache_tree_sub(struct cache_tree *, const char *); +int cache_tree_subtree_pos(struct cache_tree *it, const char *path, int pathlen); + void cache_tree_write(struct strbuf *, struct cache_tree *root); struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); From cae70acf2431f0c31e5c097b96fa5bd752526e6d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Sat, 23 Jan 2021 19:58:14 +0000 Subject: [PATCH 4/9] fsmonitor: de-duplicate BUG()s around dirty bits The index has an fsmonitor_dirty bitmap that records which index entries are "dirty" based on the response from the FSMonitor. If this bitmap ever grows larger than the index, then there was an error in how it was constructed, and it was probably a developer's bug. There are several BUG() statements that are very similar, so replace these uses with a simpler assert_index_minimum(). Since there is one caller that uses a custom 'pos' value instead of the bit_size member, we cannot simplify it too much. However, the error string is identical in each, so this simplifies things. Be sure to add one when checking if a position if valid, since the minimum is a bound on the expected size. The end result is that the code is simpler to read while also preserving these assertions for developers in the FSMonitor space. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- fsmonitor.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/fsmonitor.c b/fsmonitor.c index ca031c3abb..fe9e9d7baf 100644 --- a/fsmonitor.c +++ b/fsmonitor.c @@ -13,14 +13,19 @@ struct trace_key trace_fsmonitor = TRACE_KEY_INIT(FSMONITOR); +static void assert_index_minimum(struct index_state *istate, size_t pos) +{ + if (pos > istate->cache_nr) + BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)", + (uintmax_t)pos, istate->cache_nr); +} + static void fsmonitor_ewah_callback(size_t pos, void *is) { struct index_state *istate = (struct index_state *)is; struct cache_entry *ce; - if (pos >= istate->cache_nr) - BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" >= %u)", - (uintmax_t)pos, istate->cache_nr); + assert_index_minimum(istate, pos + 1); ce = istate->cache[pos]; ce->ce_flags &= ~CE_FSMONITOR_VALID; @@ -82,10 +87,8 @@ int read_fsmonitor_extension(struct index_state *istate, const void *data, } istate->fsmonitor_dirty = fsmonitor_dirty; - if (!istate->split_index && - istate->fsmonitor_dirty->bit_size > istate->cache_nr) - BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)", - (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr); + if (!istate->split_index) + assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size); trace_printf_key(&trace_fsmonitor, "read fsmonitor extension successful"); return 0; @@ -110,10 +113,8 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate) uint32_t ewah_size = 0; int fixup = 0; - if (!istate->split_index && - istate->fsmonitor_dirty->bit_size > istate->cache_nr) - BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)", - (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr); + if (!istate->split_index) + assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size); put_be32(&hdr_version, INDEX_EXTENSION_VERSION2); strbuf_add(sb, &hdr_version, sizeof(uint32_t)); @@ -335,9 +336,7 @@ void tweak_fsmonitor(struct index_state *istate) } /* Mark all previously saved entries as dirty */ - if (istate->fsmonitor_dirty->bit_size > istate->cache_nr) - BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)", - (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr); + assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size); ewah_each_bit(istate->fsmonitor_dirty, fsmonitor_ewah_callback, istate); refresh_fsmonitor(istate); From 1fd9ae517c45294a01466aad0d38f99c7893d814 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Sat, 23 Jan 2021 19:58:15 +0000 Subject: [PATCH 5/9] repository: add repo reference to index_state It will be helpful to add behavior to index operations that might trigger an object lookup. Since each index belongs to a specific repository, add a 'repo' pointer to struct index_state that allows access to this repository. Add a BUG() statement if the repo already has an index, and the index already has a repo, but somehow the index points to a different repo. This will prevent future changes from needing to pass an additional 'struct repository *repo' parameter and instead rely only on the 'struct index_state *istate' parameter. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- cache.h | 1 + repository.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/cache.h b/cache.h index 7109765748..f9c7a60384 100644 --- a/cache.h +++ b/cache.h @@ -328,6 +328,7 @@ struct index_state { struct ewah_bitmap *fsmonitor_dirty; struct mem_pool *ce_mem_pool; struct progress *progress; + struct repository *repo; }; /* Name hashing */ diff --git a/repository.c b/repository.c index a4174ddb06..c98298acd0 100644 --- a/repository.c +++ b/repository.c @@ -264,6 +264,12 @@ int repo_read_index(struct repository *repo) if (!repo->index) repo->index = xcalloc(1, sizeof(*repo->index)); + /* Complete the double-reference */ + if (!repo->index->repo) + repo->index->repo = repo; + else if (repo->index->repo != repo) + BUG("repo's index should point back at itself"); + return read_index_from(repo->index, repo->index_file, repo->gitdir); } From 6a9372f4ef218d189120e969f6dc01f9fcd5317a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Sat, 23 Jan 2021 19:58:16 +0000 Subject: [PATCH 6/9] name-hash: use trace2 regions for init The lazy_init_name_hash() populates a hashset with all filenames and another with all directories represented in the index. This is run only if we need to use the hashsets to check for existence or case-folding renames. Place trace2 regions where there is already a performance trace. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- name-hash.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/name-hash.c b/name-hash.c index 5d3c7b12c1..4e03fac9bb 100644 --- a/name-hash.c +++ b/name-hash.c @@ -7,6 +7,7 @@ */ #include "cache.h" #include "thread-utils.h" +#include "trace2.h" struct dir_entry { struct hashmap_entry ent; @@ -577,6 +578,7 @@ static void lazy_init_name_hash(struct index_state *istate) if (istate->name_hash_initialized) return; trace_performance_enter(); + trace2_region_enter("index", "name-hash-init", istate->repo); hashmap_init(&istate->name_hash, cache_entry_cmp, NULL, istate->cache_nr); hashmap_init(&istate->dir_hash, dir_entry_cmp, NULL, istate->cache_nr); @@ -597,6 +599,7 @@ static void lazy_init_name_hash(struct index_state *istate) } istate->name_hash_initialized = 1; + trace2_region_leave("index", "name-hash-init", istate->repo); trace_performance_leave("initialize name hash"); } From dd23022acbf7433514e20a14a9f74e39fec9d340 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Sat, 23 Jan 2021 19:58:17 +0000 Subject: [PATCH 7/9] sparse-checkout: load sparse-checkout patterns A future feature will want to load the sparse-checkout patterns into a pattern_list, but the current mechanism to do so is a bit complicated. This is made difficult due to needing to find the sparse-checkout file in different ways throughout the codebase. The logic implemented in the new get_sparse_checkout_patterns() was duplicated in populate_from_existing_patterns() in unpack-trees.c. Use the new method instead, keeping the logic around handling the struct unpack_trees_options. The callers to get_sparse_checkout_filename() in builtin/sparse-checkout.c manipulate the sparse-checkout file directly, so it is not appropriate to replace logic in that file with get_sparse_checkout_patterns(). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 5 ----- dir.c | 17 +++++++++++++++++ dir.h | 2 ++ unpack-trees.c | 6 +----- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index e3140db2a0..2306a9ad98 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -22,11 +22,6 @@ static char const * const builtin_sparse_checkout_usage[] = { NULL }; -static char *get_sparse_checkout_filename(void) -{ - return git_pathdup("info/sparse-checkout"); -} - static void write_patterns_to_file(FILE *fp, struct pattern_list *pl) { int i; diff --git a/dir.c b/dir.c index d637461da5..d153a63bbd 100644 --- a/dir.c +++ b/dir.c @@ -2998,6 +2998,23 @@ void setup_standard_excludes(struct dir_struct *dir) } } +char *get_sparse_checkout_filename(void) +{ + return git_pathdup("info/sparse-checkout"); +} + +int get_sparse_checkout_patterns(struct pattern_list *pl) +{ + int res; + char *sparse_filename = get_sparse_checkout_filename(); + + pl->use_cone_patterns = core_sparse_checkout_cone; + res = add_patterns_from_file_to_list(sparse_filename, "", 0, pl, NULL); + + free(sparse_filename); + return res; +} + int remove_path(const char *name) { char *slash; diff --git a/dir.h b/dir.h index a3c40dec51..facfae4740 100644 --- a/dir.h +++ b/dir.h @@ -448,6 +448,8 @@ int is_empty_dir(const char *dir); void setup_standard_excludes(struct dir_struct *dir); +char *get_sparse_checkout_filename(void); +int get_sparse_checkout_patterns(struct pattern_list *pl); /* Constants for remove_dir_recursively: */ diff --git a/unpack-trees.c b/unpack-trees.c index a810b79657..f5f668f532 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1549,14 +1549,10 @@ static void mark_new_skip_worktree(struct pattern_list *pl, static void populate_from_existing_patterns(struct unpack_trees_options *o, struct pattern_list *pl) { - char *sparse = git_pathdup("info/sparse-checkout"); - - pl->use_cone_patterns = core_sparse_checkout_cone; - if (add_patterns_from_file_to_list(sparse, "", 0, pl, NULL) < 0) + if (get_sparse_checkout_patterns(pl) < 0) o->skip_sparse_checkout = 1; else o->pl = pl; - free(sparse); } From 3b14436364f0a68167723541bef1a625c5c6f163 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Sat, 23 Jan 2021 16:07:08 -0500 Subject: [PATCH 8/9] test-lib: test_region looks for trace2 regions From ff15d509b89edd4830d85d53cea3079a6b0c1c08 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 11 Jan 2021 08:53:09 -0500 Subject: [PATCH 8/9] test-lib: test_region looks for trace2 regions Most test cases can verify Git's behavior using input/output expectations or changes to the .git directory. However, sometimes we want to check that Git did or did not run a certain section of code. This is particularly important for performance-only features that we want to ensure have been enabled in certain cases. Add a new 'test_region' function that checks if a trace2 region was entered and left in a given trace2 event log. There is one existing test (t0500-progress-display.sh) that performs this check already, so use the helper function instead. Note that this changes the expectations slightly. The old test (incorrectly) used two patterns for the 'grep' invocation, but this performs an OR of the patterns, not an AND. This means that as long as one region_enter event was logged, the test would succeed, even if it was not due to the progress category. More uses will be added in a later change. t6423-merge-rename-directories.sh also greps for region_enter lines, but it verifies the number of such lines, which is not the same as an existence check. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t0500-progress-display.sh | 3 +-- t/test-lib-functions.sh | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/t/t0500-progress-display.sh b/t/t0500-progress-display.sh index 1ed1df351c..84cce345e7 100755 --- a/t/t0500-progress-display.sh +++ b/t/t0500-progress-display.sh @@ -303,8 +303,7 @@ test_expect_success 'progress generates traces' ' "Working hard" stderr && # t0212/parse_events.perl intentionally omits regions and data. - grep -e "region_enter" -e "\"category\":\"progress\"" trace.event && - grep -e "region_leave" -e "\"category\":\"progress\"" trace.event && + test_region progress "Working hard" trace.event && grep "\"key\":\"total_objects\",\"value\":\"40\"" trace.event && grep "\"key\":\"total_bytes\",\"value\":\"409600\"" trace.event ' diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 999982fe4a..9fc4cf8476 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -1655,3 +1655,45 @@ test_subcommand () { grep "\[$expr\]" fi } + +# Check that the given command was invoked as part of the +# trace2-format trace on stdin. +# +# test_region [!]