From 0dfed92dfdb95c2f12df7299bb1e606d79185626 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Sep 2019 18:50:31 -0400 Subject: [PATCH 1/6] git-am: handle missing "author" when parsing commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We try to parse the "author" line out of a commit buffer. We handle the case that split_ident_line() doesn't work, but we don't do any error checking that we found an "author" line in the first place! This would cause us to segfault on such a corrupt object. Let's put in an explicit NULL check (we can just die(), which is what a bogus split would do, too). As a bonus, this silences a warning when compiling with gcc 9.2.1 using "-flto -O3", which claims that ident_len may be uninitialized (it would only be if we had a NULL here). Reported-by: Stephan Beyer Helped-by: René Scharfe Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/am.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/builtin/am.c b/builtin/am.c index 1aea657a7f..ee7305eaa6 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -1272,7 +1272,9 @@ static void get_commit_info(struct am_state *state, struct commit *commit) buffer = logmsg_reencode(commit, NULL, get_commit_output_encoding()); ident_line = find_commit_header(buffer, "author", &ident_len); - + if (!ident_line) + die(_("missing author line in commit %s"), + oid_to_hex(&commit->object.oid)); if (split_ident_line(&id, ident_line, ident_len) < 0) die(_("invalid ident line: %.*s"), (int)ident_len, ident_line); From f1cbd033e201a18c7175bc6509b48d6243e79739 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Sep 2019 18:52:25 -0400 Subject: [PATCH 2/6] pack-objects: use object_id in packlist_alloc() The only caller of packlist_alloc() already has a "struct object_id", and we immediately copy the hash they pass us into our own object_id. Let's avoid the unnecessary round-trip to a raw sha1 pointer. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 +- pack-objects.c | 4 ++-- pack-objects.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 76ce906946..dc2a7e9ac0 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1147,7 +1147,7 @@ static void create_object_entry(const struct object_id *oid, { struct object_entry *entry; - entry = packlist_alloc(&to_pack, oid->hash, index_pos); + entry = packlist_alloc(&to_pack, oid, index_pos); entry->hash = hash; oe_set_type(entry, type); if (exclude) diff --git a/pack-objects.c b/pack-objects.c index 52560293b6..c1df08df1a 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -153,7 +153,7 @@ void prepare_packing_data(struct repository *r, struct packing_data *pdata) } struct object_entry *packlist_alloc(struct packing_data *pdata, - const unsigned char *sha1, + const struct object_id *oid, uint32_t index_pos) { struct object_entry *new_entry; @@ -177,7 +177,7 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, new_entry = pdata->objects + pdata->nr_objects++; memset(new_entry, 0, sizeof(*new_entry)); - hashcpy(new_entry->idx.oid.hash, sha1); + oidcpy(&new_entry->idx.oid, oid); if (pdata->index_size * 3 <= pdata->nr_objects * 4) rehash_objects(pdata); diff --git a/pack-objects.h b/pack-objects.h index 857d43850b..47bf7ebf86 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -183,7 +183,7 @@ static inline void packing_data_unlock(struct packing_data *pdata) } struct object_entry *packlist_alloc(struct packing_data *pdata, - const unsigned char *sha1, + const struct object_id *oid, uint32_t index_pos); struct object_entry *packlist_find(struct packing_data *pdata, From 7140414d8bd7ed1a05b83bc34d0d0e76ef8b12bd Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Sep 2019 18:52:49 -0400 Subject: [PATCH 3/6] bulk-checkin: zero-initialize hashfile_checkpoint We declare a "struct hashfile_checkpoint" but only sometimes actually call hashfile_checkpoint() on it. That makes it not immediately obvious that it's valid when we later access its members. In fact, the code is fine: we fill it in unconditionally in the while(1) loop as long as "idx" is non-NULL. And then if "idx" is NULL, we exit early from the function (because we're just computing the hash, not actually writing), before we look at the struct. However, this does seem to confuse gcc 9.2.1's -Wmaybe-uninitialized when compiled with "-flto -O2" (probably because with LTO it can now realize that our call to hashfile_truncate() does not set the members either). Let's zero-initialize the struct to tell the compiler, as well as any readers of the code, that all is well. Reported-by: Stephan Beyer Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- bulk-checkin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index 39ee7d6107..583aacb9e3 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -197,7 +197,7 @@ static int deflate_to_pack(struct bulk_checkin_state *state, git_hash_ctx ctx; unsigned char obuf[16384]; unsigned header_len; - struct hashfile_checkpoint checkpoint; + struct hashfile_checkpoint checkpoint = {0}; struct pack_idx_entry *idx = NULL; seekback = lseek(fd, 0, SEEK_CUR); From e4b369069e4a7630233a045784d0b1e2425b0a05 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Sep 2019 18:53:37 -0400 Subject: [PATCH 4/6] diff-delta: set size out-parameter to 0 for NULL delta When we cannot generate a delta, we return NULL but leave delta_size untouched. This is generally OK, as callers rely on NULL to decide if the output is usable or not. But it can confuse compilers; in particular, gcc 9.2.1 with "-flto -O3" complains in fast-import's store_object() that delta_len may be used uninitialized. Let's change the diff-delta code to set the size explicitly to 0 for a NULL return. That silences the compiler and makes it easier to reason about the result. Reported-by: Stephan Beyer Helped-by: Junio C Hamano Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- diff-delta.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/diff-delta.c b/diff-delta.c index e49643353b..77fea08dfb 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -326,6 +326,8 @@ create_delta(const struct delta_index *index, const unsigned char *ref_data, *ref_top, *data, *top; unsigned char *out; + *delta_size = 0; + if (!trg_buf || !trg_size) return NULL; From 7e6b96c73b24a797762440e7a1ef0eaf5e7a8dca Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Sep 2019 18:54:31 -0400 Subject: [PATCH 5/6] test-read-cache: drop namelen variable Early in the function we set "namelen = strlen(name)" if "name" is non-NULL. Later, we use "namelen" only if "name" is non-NULL. However, it's hard to immediately see this, and it seems to confuse gcc 9.2.1 (with "-flto" interestingly, though all of the involved logic is in inline functions; it also triggers when building with ASan). Let's simplify the code and remove the variable entirely. There's only one use of namelen anyway, so we can just call strlen() then. It's true this is in a loop, so we might execute strlen() more often. But: - this is test code that only ever loops twice in our test suite (we do loop 1000 times in a t/perf test, but without using this option). - a decent compiler ought to be able to hoist that out of the loop anyway (though I wouldn't count on gcc 9.2.1 doing so!) Reported-by: Stephan Beyer Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/helper/test-read-cache.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/t/helper/test-read-cache.c b/t/helper/test-read-cache.c index 7e79b555de..244977a29b 100644 --- a/t/helper/test-read-cache.c +++ b/t/helper/test-read-cache.c @@ -4,11 +4,10 @@ int cmd__read_cache(int argc, const char **argv) { - int i, cnt = 1, namelen; + int i, cnt = 1; const char *name = NULL; if (argc > 1 && skip_prefix(argv[1], "--print-and-refresh=", &name)) { - namelen = strlen(name); argc--; argv++; } @@ -24,7 +23,7 @@ int cmd__read_cache(int argc, const char **argv) refresh_index(&the_index, REFRESH_QUIET, NULL, NULL, NULL); - pos = index_name_pos(&the_index, name, namelen); + pos = index_name_pos(&the_index, name, strlen(name)); if (pos < 0) die("%s not in index", name); printf("%s is%s up to date\n", name, From 3a37876b5dca4c18bda67bcdead9c1d79a59933d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 5 Sep 2019 21:36:05 -0400 Subject: [PATCH 6/6] pack-objects: drop packlist index_pos optimization Once upon a time, the code to add an object to our packing list in pack-objects all lived in a single function. It computed the position within the hash table once, then used it to check if the object was already present, and if not, to add it. Later, in 2834bc27c1 (pack-objects: refactor the packing list, 2013-10-24), this was split into two functions: packlist_find() and packlist_alloc(). We ended up with an "index_pos" variable that gets passed through several functions to make it from one to the other. The resulting code is rather confusing to follow. The "index_pos" variable is sometimes undefined, if we don't yet have a hash table. This works out in practice because in that case packlist_alloc() won't use it at all, since it will have to create/grow the hash table. But it's hard to verify that, and it does cause gcc 9.2.1's -Wmaybe-uninitialized to complain when compiled with "-flto -O3" (rightfully, since we do pass the uninitialized value as a function parameter, even if nobody ends up using it). All of this is to save computing the hash index again when we're inserting into the hash table, which I found doesn't make a measurable difference in the program runtime (which is not surprising, since we're doing all kinds of other heavyweight things for each object). Let's just drop this index_pos variable entirely, simplifying the code (and pleasing the compiler). We might be better still refactoring this custom hash table to use one of our existing implementations (an oidmap, or a kh_oid_map). I stopped short of that here, but this would be the likely first step towards that anyway. Reported-by: Stephan Beyer Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 33 ++++++++++++++------------------- pack-bitmap-write.c | 2 +- pack-bitmap.c | 2 +- pack-objects.c | 20 +++++++++++--------- pack-objects.h | 6 ++---- 5 files changed, 29 insertions(+), 34 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index dc2a7e9ac0..9a8d935700 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -610,12 +610,12 @@ static int mark_tagged(const char *path, const struct object_id *oid, int flag, void *cb_data) { struct object_id peeled; - struct object_entry *entry = packlist_find(&to_pack, oid, NULL); + struct object_entry *entry = packlist_find(&to_pack, oid); if (entry) entry->tagged = 1; if (!peel_ref(path, &peeled)) { - entry = packlist_find(&to_pack, &peeled, NULL); + entry = packlist_find(&to_pack, &peeled); if (entry) entry->tagged = 1; } @@ -996,12 +996,11 @@ static int no_try_delta(const char *path) * few lines later when we want to add the new entry. */ static int have_duplicate_entry(const struct object_id *oid, - int exclude, - uint32_t *index_pos) + int exclude) { struct object_entry *entry; - entry = packlist_find(&to_pack, oid, index_pos); + entry = packlist_find(&to_pack, oid); if (!entry) return 0; @@ -1141,13 +1140,12 @@ static void create_object_entry(const struct object_id *oid, uint32_t hash, int exclude, int no_try_delta, - uint32_t index_pos, struct packed_git *found_pack, off_t found_offset) { struct object_entry *entry; - entry = packlist_alloc(&to_pack, oid, index_pos); + entry = packlist_alloc(&to_pack, oid); entry->hash = hash; oe_set_type(entry, type); if (exclude) @@ -1171,11 +1169,10 @@ static int add_object_entry(const struct object_id *oid, enum object_type type, { struct packed_git *found_pack = NULL; off_t found_offset = 0; - uint32_t index_pos; display_progress(progress_state, ++nr_seen); - if (have_duplicate_entry(oid, exclude, &index_pos)) + if (have_duplicate_entry(oid, exclude)) return 0; if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) { @@ -1190,7 +1187,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type, create_object_entry(oid, type, pack_name_hash(name), exclude, name && no_try_delta(name), - index_pos, found_pack, found_offset); + found_pack, found_offset); return 1; } @@ -1199,17 +1196,15 @@ static int add_object_entry_from_bitmap(const struct object_id *oid, int flags, uint32_t name_hash, struct packed_git *pack, off_t offset) { - uint32_t index_pos; - display_progress(progress_state, ++nr_seen); - if (have_duplicate_entry(oid, 0, &index_pos)) + if (have_duplicate_entry(oid, 0)) return 0; if (!want_object_in_pack(oid, 0, &pack, &offset)) return 0; - create_object_entry(oid, type, name_hash, 0, 0, index_pos, pack, offset); + create_object_entry(oid, type, name_hash, 0, 0, pack, offset); return 1; } @@ -1507,7 +1502,7 @@ static int can_reuse_delta(const unsigned char *base_sha1, * First see if we're already sending the base (or it's explicitly in * our "excluded" list). */ - base = packlist_find(&to_pack, &base_oid, NULL); + base = packlist_find(&to_pack, &base_oid); if (base) { if (!in_same_island(&delta->idx.oid, &base->idx.oid)) return 0; @@ -2579,7 +2574,7 @@ static void add_tag_chain(const struct object_id *oid) * it was included via bitmaps, we would not have parsed it * previously). */ - if (packlist_find(&to_pack, oid, NULL)) + if (packlist_find(&to_pack, oid)) return; tag = lookup_tag(the_repository, oid); @@ -2603,7 +2598,7 @@ static int add_ref_tag(const char *path, const struct object_id *oid, int flag, if (starts_with(path, "refs/tags/") && /* is a tag? */ !peel_ref(path, &peeled) && /* peelable? */ - packlist_find(&to_pack, &peeled, NULL)) /* object packed? */ + packlist_find(&to_pack, &peeled)) /* object packed? */ add_tag_chain(oid); return 0; } @@ -2803,7 +2798,7 @@ static void show_object(struct object *obj, const char *name, void *data) for (p = strchr(name, '/'); p; p = strchr(p + 1, '/')) depth++; - ent = packlist_find(&to_pack, &obj->oid, NULL); + ent = packlist_find(&to_pack, &obj->oid); if (ent && depth > oe_tree_depth(&to_pack, ent)) oe_set_tree_depth(&to_pack, ent, depth); } @@ -3034,7 +3029,7 @@ static void loosen_unused_packed_objects(void) for (i = 0; i < p->num_objects; i++) { nth_packed_object_oid(&oid, p, i); - if (!packlist_find(&to_pack, &oid, NULL) && + if (!packlist_find(&to_pack, &oid) && !has_sha1_pack_kept_or_nonlocal(&oid) && !loosened_object_can_be_discarded(&oid, p->mtime)) if (force_object_loose(&oid, p->mtime)) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index fa78a460c9..a7a4964b50 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -144,7 +144,7 @@ static inline void reset_all_seen(void) static uint32_t find_object_pos(const struct object_id *oid) { - struct object_entry *entry = packlist_find(writer.to_pack, oid, NULL); + struct object_entry *entry = packlist_find(writer.to_pack, oid); if (!entry) { die("Failed to write bitmap index. Packfile doesn't have full closure " diff --git a/pack-bitmap.c b/pack-bitmap.c index ed2befaac6..84cd1bed4a 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1063,7 +1063,7 @@ int rebuild_existing_bitmaps(struct bitmap_index *bitmap_git, entry = &bitmap_git->pack->revindex[i]; nth_packed_object_oid(&oid, bitmap_git->pack, entry->nr); - oe = packlist_find(mapping, &oid, NULL); + oe = packlist_find(mapping, &oid); if (oe) reposition[i] = oe_in_pack_pos(mapping, oe) + 1; diff --git a/pack-objects.c b/pack-objects.c index c1df08df1a..c6250d77f4 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -68,8 +68,7 @@ static void rehash_objects(struct packing_data *pdata) } struct object_entry *packlist_find(struct packing_data *pdata, - const struct object_id *oid, - uint32_t *index_pos) + const struct object_id *oid) { uint32_t i; int found; @@ -79,9 +78,6 @@ struct object_entry *packlist_find(struct packing_data *pdata, i = locate_object_entry_hash(pdata, oid, &found); - if (index_pos) - *index_pos = i; - if (!found) return NULL; @@ -153,8 +149,7 @@ void prepare_packing_data(struct repository *r, struct packing_data *pdata) } struct object_entry *packlist_alloc(struct packing_data *pdata, - const struct object_id *oid, - uint32_t index_pos) + const struct object_id *oid) { struct object_entry *new_entry; @@ -181,8 +176,15 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, if (pdata->index_size * 3 <= pdata->nr_objects * 4) rehash_objects(pdata); - else - pdata->index[index_pos] = pdata->nr_objects; + else { + int found; + uint32_t pos = locate_object_entry_hash(pdata, + &new_entry->idx.oid, + &found); + if (found) + BUG("duplicate object inserted into hash"); + pdata->index[pos] = pdata->nr_objects; + } if (pdata->in_pack) pdata->in_pack[pdata->nr_objects - 1] = NULL; diff --git a/pack-objects.h b/pack-objects.h index 47bf7ebf86..6fe6ae5ee8 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -183,12 +183,10 @@ static inline void packing_data_unlock(struct packing_data *pdata) } struct object_entry *packlist_alloc(struct packing_data *pdata, - const struct object_id *oid, - uint32_t index_pos); + const struct object_id *oid); struct object_entry *packlist_find(struct packing_data *pdata, - const struct object_id *oid, - uint32_t *index_pos); + const struct object_id *oid); static inline uint32_t pack_name_hash(const char *name) {