From a8437f3cb1994cb0c93b27fdefaa0163eddcf92a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Oct 2022 18:01:48 -0400 Subject: [PATCH 1/4] midx.c: fix whitespace typo This was unintentionally introduced via 893b563505 (midx: inline nth_midxed_pack_entry(), 2021-09-11) where "struct repository *r" became "struct repository * r". The latter does not adhere to our usual style conventions, so fix that up to look more like our usual declarations. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/midx.c b/midx.c index c27d0e5f15..ea24e30b6e 100644 --- a/midx.c +++ b/midx.c @@ -278,7 +278,7 @@ uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos) (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH); } -int fill_midx_entry(struct repository * r, +int fill_midx_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m) From 1dc4f1ef0d2528f405d71850d73d80ff1a571abd Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Oct 2022 18:01:52 -0400 Subject: [PATCH 2/4] midx.c: consider annotated tags during bitmap selection When generating a multi-pack bitmap without a `--refs-snapshot` (e.g., by running `git multi-pack-index write --bitmap` directly), we determine the set of bitmap-able commits by enumerating each reference, and adding the referrent as the tip of a reachability traversal when it appears somewhere in the MIDX. (Any commit we encounter during the reachability traversal then becomes a candidate for bitmap selection). But we incorrectly avoid peeling the object at the tip of each reference. So if we see some reference that points at an annotated tag (which in turn points through zero or more additional annotated tags at a commit), that we will not add it as a tip for the reachability traversal. This means that if some commit C is only referenced through one or more annotated tag(s), then C won't become a bitmap candidate. Correct this by peeling the reference tips as we enumerate them to ensure that we consider commits which are the targets of annotated tags, in addition to commits which are referenced directly. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 4 ++++ t/t5326-multi-pack-bitmaps.sh | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/midx.c b/midx.c index ea24e30b6e..a8d2111e96 100644 --- a/midx.c +++ b/midx.c @@ -980,6 +980,7 @@ static int add_ref_to_pending(const char *refname, int flag, void *cb_data) { struct rev_info *revs = (struct rev_info*)cb_data; + struct object_id peeled; struct object *object; if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) { @@ -987,6 +988,9 @@ static int add_ref_to_pending(const char *refname, return 0; } + if (!peel_iterated_oid(oid, &peeled)) + oid = &peeled; + object = parse_object_or_die(oid, refname); if (object->type != OBJ_COMMIT) return 0; diff --git a/t/t5326-multi-pack-bitmaps.sh b/t/t5326-multi-pack-bitmaps.sh index ad6eea5fa0..0882cbb6e4 100755 --- a/t/t5326-multi-pack-bitmaps.sh +++ b/t/t5326-multi-pack-bitmaps.sh @@ -410,4 +410,28 @@ test_expect_success 'preferred pack change with existing MIDX bitmap' ' ) ' +test_expect_success 'tagged commits are selected for bitmapping' ' + rm -fr repo && + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit --annotate base && + git repack -d && + + # Remove refs/heads/main which points at the commit directly, + # leaving only a reference to the annotated tag. + git branch -M main && + git checkout base && + git branch -d main && + + git multi-pack-index write --bitmap && + + git rev-parse HEAD >want && + test-tool bitmap list-commits >actual && + grep $(cat want) actual + ) +' + test_done From 2dcff52524e611413cb0cb49015f0d105f5f8dfa Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Oct 2022 18:01:55 -0400 Subject: [PATCH 3/4] midx.c: instrument MIDX and bitmap generation with trace2 regions When debugging MIDX and MIDX-bitmap related issues, it is useful to figure out where Git is spending its time. GitHub has been using the below trace2 regions to instrument various components of generating a MIDX itself, as well time spent preparing to build a MIDX bitmap. These are limited to instrumenting the following functions: - midx.c::find_commits_for_midx_bitmap() - midx.c::midx_pack_order() - midx.c::prepare_midx_packing_data() - midx.c::write_midx_bitmap() - midx.c::write_midx_internal() - midx.c::write_midx_reverse_index() to start and end with a trace2_region_enter() and trace2_region_leave(), respectively. The category for all of these is "midx", which matches the existing convention. The region description matches the name of the function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/midx.c b/midx.c index a8d2111e96..4df701ee11 100644 --- a/midx.c +++ b/midx.c @@ -913,6 +913,8 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) uint32_t *pack_order; uint32_t i; + trace2_region_enter("midx", "midx_pack_order", the_repository); + ALLOC_ARRAY(data, ctx->entries_nr); for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *e = &ctx->entries[i]; @@ -930,6 +932,8 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) pack_order[i] = data[i].nr; free(data); + trace2_region_leave("midx", "midx_pack_order", the_repository); + return pack_order; } @@ -939,6 +943,8 @@ static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash, struct strbuf buf = STRBUF_INIT; const char *tmp_file; + trace2_region_enter("midx", "write_midx_reverse_index", the_repository); + strbuf_addf(&buf, "%s-%s.rev", midx_name, hash_to_hex(midx_hash)); tmp_file = write_rev_file_order(NULL, ctx->pack_order, ctx->entries_nr, @@ -948,6 +954,8 @@ static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash, die(_("cannot store reverse index file")); strbuf_release(&buf); + + trace2_region_leave("midx", "write_midx_reverse_index", the_repository); } static void clear_midx_files_ext(const char *object_dir, const char *ext, @@ -963,6 +971,8 @@ static void prepare_midx_packing_data(struct packing_data *pdata, { uint32_t i; + trace2_region_enter("midx", "prepare_midx_packing_data", the_repository); + memset(pdata, 0, sizeof(struct packing_data)); prepare_packing_data(the_repository, pdata); @@ -973,6 +983,8 @@ static void prepare_midx_packing_data(struct packing_data *pdata, oe_set_in_pack(pdata, to, ctx->info[ctx->pack_perm[from->pack_int_id]].p); } + + trace2_region_leave("midx", "prepare_midx_packing_data", the_repository); } static int add_ref_to_pending(const char *refname, @@ -1070,6 +1082,9 @@ static struct commit **find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr struct rev_info revs; struct bitmap_commit_cb cb = {0}; + trace2_region_enter("midx", "find_commits_for_midx_bitmap", + the_repository); + cb.ctx = ctx; repo_init_revisions(the_repository, &revs, NULL); @@ -1103,6 +1118,10 @@ static struct commit **find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr *indexed_commits_nr_p = cb.commits_nr; release_revisions(&revs); + + trace2_region_leave("midx", "find_commits_for_midx_bitmap", + the_repository); + return cb.commits; } @@ -1120,6 +1139,8 @@ static int write_midx_bitmap(const char *midx_name, char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name, hash_to_hex(midx_hash)); + trace2_region_enter("midx", "write_midx_bitmap", the_repository); + if (flags & MIDX_WRITE_BITMAP_HASH_CACHE) options |= BITMAP_OPT_HASH_CACHE; @@ -1165,6 +1186,9 @@ static int write_midx_bitmap(const char *midx_name, cleanup: free(index); free(bitmap_name); + + trace2_region_leave("midx", "write_midx_bitmap", the_repository); + return ret; } @@ -1211,6 +1235,8 @@ static int write_midx_internal(const char *object_dir, int result = 0; struct chunkfile *cf; + trace2_region_enter("midx", "write_midx_internal", the_repository); + get_midx_filename(&midx_name, object_dir); if (safe_create_leading_directories(midx_name.buf)) die_errno(_("unable to create leading directories of %s"), @@ -1552,6 +1578,8 @@ cleanup: free(ctx.pack_order); strbuf_release(&midx_name); + trace2_region_leave("midx", "write_midx_internal", the_repository); + return result; } From e9c383994493f3b775191aed13811a868aa639da Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Oct 2022 18:01:57 -0400 Subject: [PATCH 4/4] pack-bitmap-write.c: instrument number of reused bitmaps When debugging bitmap generation performance, it is useful to know how many bitmaps were generated from scratch, and how many were the result of permuting the bit-order of an existing bitmap. Keep track of the latter, and emit the count as a trace2_data line to aid in debugging. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index a213f5eddc..cfa67a510f 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -384,6 +384,8 @@ static int fill_bitmap_tree(struct bitmap *bitmap, return 0; } +static int reused_bitmaps_nr; + static int fill_bitmap_commit(struct bb_commit *ent, struct commit *commit, struct prio_queue *queue, @@ -409,8 +411,10 @@ static int fill_bitmap_commit(struct bb_commit *ent, * bitmap and add its bits to this one. No need to walk * parents or the tree for this commit. */ - if (old && !rebuild_bitmap(mapping, old, ent->bitmap)) + if (old && !rebuild_bitmap(mapping, old, ent->bitmap)) { + reused_bitmaps_nr++; continue; + } } /* @@ -526,6 +530,8 @@ int bitmap_writer_build(struct packing_data *to_pack) trace2_region_leave("pack-bitmap-write", "building_bitmaps_total", the_repository); + trace2_data_intmax("pack-bitmap-write", the_repository, + "building_bitmaps_reused", reused_bitmaps_nr); stop_progress(&writer.progress);