From 0763671b8e0b3ef873df13c741a911b809e6813d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 23 Feb 2020 23:27:36 -0500 Subject: [PATCH 01/10] nth_packed_object_oid(): use customary integer return Our nth_packed_object_sha1() function returns NULL for error. So when we wrapped it with nth_packed_object_oid(), we kept the same semantics. But it's a bit funny, because the caller actually passes in an out parameter, and the pointer we return is just that same struct they passed to us (or NULL). It's not too terrible, but it does make the interface a little non-idiomatic. Let's switch to our usual "0 for success, negative for error" return value. Most callers either don't check it, or are trivially converted. The one that requires the biggest change is actually improved, as we can ditch an extra aliased pointer variable. Since we are changing the interface in a subtle way that the compiler wouldn't catch, let's also change the name to catch any topics in flight. We can drop the 'o' and make it nth_packed_object_id(). That's slightly shorter, but also less redundant since the 'o' stands for "object" already. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 ++-- midx.c | 2 +- pack-bitmap.c | 4 ++-- packfile.c | 18 +++++++++--------- packfile.h | 5 ++--- sha1-name.c | 13 ++++++------- 6 files changed, 22 insertions(+), 24 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 940fbcb7b3..de8335e2bd 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3053,7 +3053,7 @@ static void add_objects_in_unpacked_packs(void) in_pack.alloc); for (i = 0; i < p->num_objects; i++) { - nth_packed_object_oid(&oid, p, i); + nth_packed_object_id(&oid, p, i); o = lookup_unknown_object(&oid); if (!(o->flags & OBJECT_ADDED)) mark_in_pack_object(o, p, &in_pack); @@ -3157,7 +3157,7 @@ static void loosen_unused_packed_objects(void) die(_("cannot open pack index")); for (i = 0; i < p->num_objects; i++) { - nth_packed_object_oid(&oid, p, i); + nth_packed_object_id(&oid, p, i); if (!packlist_find(&to_pack, &oid) && !has_sha1_pack_kept_or_nonlocal(&oid) && !loosened_object_can_be_discarded(&oid, p->mtime)) diff --git a/midx.c b/midx.c index 37ec28623a..1527e464a7 100644 --- a/midx.c +++ b/midx.c @@ -534,7 +534,7 @@ static void fill_pack_entry(uint32_t pack_int_id, uint32_t cur_object, struct pack_midx_entry *entry) { - if (!nth_packed_object_oid(&entry->oid, p, cur_object)) + if (nth_packed_object_id(&entry->oid, p, cur_object) < 0) die(_("failed to locate object %d in packfile"), cur_object); entry->pack_int_id = pack_int_id; diff --git a/pack-bitmap.c b/pack-bitmap.c index 5a8689cdf8..c81d323329 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -658,7 +658,7 @@ static void show_objects_for_type( offset += ewah_bit_ctz64(word >> offset); entry = &bitmap_git->pack->revindex[pos + offset]; - nth_packed_object_oid(&oid, bitmap_git->pack, entry->nr); + nth_packed_object_id(&oid, bitmap_git->pack, entry->nr); if (bitmap_git->hashes) hash = get_be32(bitmap_git->hashes + entry->nr); @@ -1136,7 +1136,7 @@ int rebuild_existing_bitmaps(struct bitmap_index *bitmap_git, struct object_entry *oe; entry = &bitmap_git->pack->revindex[i]; - nth_packed_object_oid(&oid, bitmap_git->pack, entry->nr); + nth_packed_object_id(&oid, bitmap_git->pack, entry->nr); oe = packlist_find(mapping, &oid); if (oe) diff --git a/packfile.c b/packfile.c index 99dd1a7d09..947c3f8e5d 100644 --- a/packfile.c +++ b/packfile.c @@ -1261,7 +1261,7 @@ static int retry_bad_packed_offset(struct repository *r, revidx = find_pack_revindex(p, obj_offset); if (!revidx) return OBJ_BAD; - nth_packed_object_oid(&oid, p, revidx->nr); + nth_packed_object_id(&oid, p, revidx->nr); mark_bad_packed_object(p, oid.hash); type = oid_object_info(r, &oid, NULL); if (type <= OBJ_NONE) @@ -1693,7 +1693,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, off_t len = revidx[1].offset - obj_offset; if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { struct object_id oid; - nth_packed_object_oid(&oid, p, revidx->nr); + nth_packed_object_id(&oid, p, revidx->nr); error("bad packed object CRC for %s", oid_to_hex(&oid)); mark_bad_packed_object(p, oid.hash); @@ -1782,7 +1782,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, struct object_id base_oid; revidx = find_pack_revindex(p, obj_offset); if (revidx) { - nth_packed_object_oid(&base_oid, p, revidx->nr); + nth_packed_object_id(&base_oid, p, revidx->nr); error("failed to read delta base object %s" " at offset %"PRIuMAX" from %s", oid_to_hex(&base_oid), (uintmax_t)obj_offset, @@ -1890,15 +1890,15 @@ const unsigned char *nth_packed_object_sha1(struct packed_git *p, } } -const struct object_id *nth_packed_object_oid(struct object_id *oid, - struct packed_git *p, - uint32_t n) +int nth_packed_object_id(struct object_id *oid, + struct packed_git *p, + uint32_t n) { const unsigned char *hash = nth_packed_object_sha1(p, n); if (!hash) - return NULL; + return -1; hashcpy(oid->hash, hash); - return oid; + return 0; } void check_pack_index_ptr(const struct packed_git *p, const void *vptr) @@ -2077,7 +2077,7 @@ int for_each_object_in_pack(struct packed_git *p, else pos = i; - if (!nth_packed_object_oid(&oid, p, pos)) + if (nth_packed_object_id(&oid, p, pos) < 0) return error("unable to get sha1 of object %u in %s", pos, p->pack_name); diff --git a/packfile.h b/packfile.h index ec536a4ae5..95b83ba25b 100644 --- a/packfile.h +++ b/packfile.h @@ -129,10 +129,9 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32 const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t n); /* * Like nth_packed_object_sha1, but write the data into the object specified by - * the the first argument. Returns the first argument on success, and NULL on - * error. + * the the first argument. Returns 0 on success, negative otherwise. */ -const struct object_id *nth_packed_object_oid(struct object_id *, struct packed_git *, uint32_t n); +int nth_packed_object_id(struct object_id *, struct packed_git *, uint32_t n); /* * Return the offset of the nth object within the specified packfile. diff --git a/sha1-name.c b/sha1-name.c index f2e24ea666..5bb006e5a9 100644 --- a/sha1-name.c +++ b/sha1-name.c @@ -155,7 +155,6 @@ static void unique_in_pack(struct packed_git *p, struct disambiguate_state *ds) { uint32_t num, i, first = 0; - const struct object_id *current = NULL; if (p->multi_pack_index) return; @@ -173,10 +172,10 @@ static void unique_in_pack(struct packed_git *p, */ for (i = first; i < num && !ds->ambiguous; i++) { struct object_id oid; - current = nth_packed_object_oid(&oid, p, i); - if (!match_sha(ds->len, ds->bin_pfx.hash, current->hash)) + nth_packed_object_id(&oid, p, i); + if (!match_sha(ds->len, ds->bin_pfx.hash, oid.hash)) break; - update_candidates(ds, current); + update_candidates(ds, &oid); } } @@ -643,14 +642,14 @@ static void find_abbrev_len_for_pack(struct packed_git *p, */ mad->init_len = 0; if (!match) { - if (nth_packed_object_oid(&oid, p, first)) + if (!nth_packed_object_id(&oid, p, first)) extend_abbrev_len(&oid, mad); } else if (first < num - 1) { - if (nth_packed_object_oid(&oid, p, first + 1)) + if (!nth_packed_object_id(&oid, p, first + 1)) extend_abbrev_len(&oid, mad); } if (first > 0) { - if (nth_packed_object_oid(&oid, p, first - 1)) + if (!nth_packed_object_id(&oid, p, first - 1)) extend_abbrev_len(&oid, mad); } mad->init_len = mad->cur_len; From 3f83fd5e44c1f038c8a7033cb77399e9ef4f43a9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 23 Feb 2020 23:29:44 -0500 Subject: [PATCH 02/10] pack-objects: read delta base oid into object_id struct When we're considering reusing an on-disk delta, we get the oid of the base as a pointer to unsigned char bytes of the hash, either into the packfile itself (for REF_DELTA) or into the pack idx (using the revindex to convert the offset into an index entry). Instead, we'd prefer to use a more type-safe object_id as much as possible. We can get the pack idx using nth_packed_object_id() instead. For the packfile bytes, we can copy them out using oidread(). This doesn't even incur an extra copy overall, since the next thing we'd always do with that pointer is pass it to can_reuse_delta(), which needs an object_id anyway (and called oidread() itself). So this patch also converts that function to take the object_id directly. Note that we did previously use NULL as a sentinel value when the object isn't a delta. We could probably get away with using the null oid for this, but instead we'll use an explicit boolean flag, which should make things more obvious for people reading the code later. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index de8335e2bd..8692ab3fe6 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1618,23 +1618,17 @@ static void cleanup_preferred_base(void) * deltify other objects against, in order to avoid * circular deltas. */ -static int can_reuse_delta(const unsigned char *base_sha1, +static int can_reuse_delta(const struct object_id *base_oid, struct object_entry *delta, struct object_entry **base_out) { struct object_entry *base; - struct object_id base_oid; - - if (!base_sha1) - return 0; - - oidread(&base_oid, base_sha1); /* * First see if we're already sending the base (or it's explicitly in * our "excluded" list). */ - base = packlist_find(&to_pack, &base_oid); + base = packlist_find(&to_pack, base_oid); if (base) { if (!in_same_island(&delta->idx.oid, &base->idx.oid)) return 0; @@ -1647,9 +1641,9 @@ static int can_reuse_delta(const unsigned char *base_sha1, * even if it was buried too deep in history to make it into the * packing list. */ - if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, &base_oid)) { + if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, base_oid)) { if (use_delta_islands) { - if (!in_same_island(&delta->idx.oid, &base_oid)) + if (!in_same_island(&delta->idx.oid, base_oid)) return 0; } *base_out = NULL; @@ -1666,7 +1660,8 @@ static void check_object(struct object_entry *entry) if (IN_PACK(entry)) { struct packed_git *p = IN_PACK(entry); struct pack_window *w_curs = NULL; - const unsigned char *base_ref = NULL; + int have_base = 0; + struct object_id base_ref; struct object_entry *base_entry; unsigned long used, used_0; unsigned long avail; @@ -1707,9 +1702,13 @@ static void check_object(struct object_entry *entry) unuse_pack(&w_curs); return; case OBJ_REF_DELTA: - if (reuse_delta && !entry->preferred_base) - base_ref = use_pack(p, &w_curs, - entry->in_pack_offset + used, NULL); + if (reuse_delta && !entry->preferred_base) { + oidread(&base_ref, + use_pack(p, &w_curs, + entry->in_pack_offset + used, + NULL)); + have_base = 1; + } entry->in_pack_header_size = used + the_hash_algo->rawsz; break; case OBJ_OFS_DELTA: @@ -1739,13 +1738,15 @@ static void check_object(struct object_entry *entry) revidx = find_pack_revindex(p, ofs); if (!revidx) goto give_up; - base_ref = nth_packed_object_sha1(p, revidx->nr); + if (!nth_packed_object_id(&base_ref, p, revidx->nr)) + have_base = 1; } entry->in_pack_header_size = used + used_0; break; } - if (can_reuse_delta(base_ref, entry, &base_entry)) { + if (have_base && + can_reuse_delta(&base_ref, entry, &base_entry)) { oe_set_type(entry, entry->in_pack_type); SET_SIZE(entry, in_pack_size); /* delta size */ SET_DELTA_SIZE(entry, in_pack_size); @@ -1755,7 +1756,7 @@ static void check_object(struct object_entry *entry) entry->delta_sibling_idx = base_entry->delta_child_idx; SET_DELTA_CHILD(base_entry, entry); } else { - SET_DELTA_EXT(entry, base_ref); + SET_DELTA_EXT(entry, base_ref.hash); } unuse_pack(&w_curs); From a93c141ddef25dc999fff73c590b42d3af606ff3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 23 Feb 2020 23:30:07 -0500 Subject: [PATCH 03/10] pack-objects: convert oe_set_delta_ext() to use object_id We already store an object_id internally, and now our sole caller also has one. Let's stop passing around the internal hash array, which adds a bit of type safety. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 +- pack-objects.c | 4 ++-- pack-objects.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 8692ab3fe6..44f44fcb1a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1756,7 +1756,7 @@ static void check_object(struct object_entry *entry) entry->delta_sibling_idx = base_entry->delta_child_idx; SET_DELTA_CHILD(base_entry, entry); } else { - SET_DELTA_EXT(entry, base_ref.hash); + SET_DELTA_EXT(entry, &base_ref); } unuse_pack(&w_curs); diff --git a/pack-objects.c b/pack-objects.c index 5e5a3c62d9..f2a433885a 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -203,14 +203,14 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, void oe_set_delta_ext(struct packing_data *pdata, struct object_entry *delta, - const unsigned char *sha1) + const struct object_id *oid) { struct object_entry *base; ALLOC_GROW(pdata->ext_bases, pdata->nr_ext + 1, pdata->alloc_ext); base = &pdata->ext_bases[pdata->nr_ext++]; memset(base, 0, sizeof(*base)); - hashcpy(base->idx.oid.hash, sha1); + oidcpy(&base->idx.oid, oid); /* These flags mark that we are not part of the actual pack output. */ base->preferred_base = 1; diff --git a/pack-objects.h b/pack-objects.h index d3975e079b..9d88e3e518 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -292,7 +292,7 @@ static inline void oe_set_delta(struct packing_data *pack, void oe_set_delta_ext(struct packing_data *pack, struct object_entry *e, - const unsigned char *sha1); + const struct object_id *oid); static inline struct object_entry *oe_delta_child( const struct packing_data *pack, From f66d4e025059b734ba8da40ec059bb0fb8991306 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 23 Feb 2020 23:31:22 -0500 Subject: [PATCH 04/10] pack-objects: use object_id struct in pack-reuse code When the pack-reuse code is dumping an OFS_DELTA entry to a client that doesn't support it, we re-write it as a REF_DELTA. To do so, we use nth_packed_object_sha1() to get the oid, but that function is soon going away in favor of the more type-safe nth_packed_object_id(). Let's switch now in preparation. Note that this does incur an extra hash copy (from the pack idx mmap to the object_id and then to the output, rather than straight from mmap to the output). But this is not worth worrying about. It's probably not measurable even when it triggers, and this is fallback code that we expect to trigger very rarely (since everybody supports OFS_DELTA these days anyway). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 44f44fcb1a..73fca2cb17 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -872,14 +872,15 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out, /* Convert to REF_DELTA if we must... */ if (!allow_ofs_delta) { int base_pos = find_revindex_position(reuse_packfile, base_offset); - const unsigned char *base_sha1 = - nth_packed_object_sha1(reuse_packfile, - reuse_packfile->revindex[base_pos].nr); + struct object_id base_oid; + + nth_packed_object_id(&base_oid, reuse_packfile, + reuse_packfile->revindex[base_pos].nr); len = encode_in_pack_object_header(header, sizeof(header), OBJ_REF_DELTA, size); hashwrite(out, header, len); - hashwrite(out, base_sha1, 20); + hashwrite(out, base_oid.hash, 20); copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur); return; } From 500e4f236606684467b0b34b86e319dfa40747c4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 23 Feb 2020 23:32:27 -0500 Subject: [PATCH 05/10] pack-bitmap: use object_id when loading on-disk bitmaps A pack bitmap file contains the index position of the commit for each bitmap, which we then translate into an object id via nth_packed_object_sha1(). In preparation for that function going away, we can switch to the more type-safe nth_packed_object_id(). Note that even though the result ends up in an object_id this does incur an extra copy of the hash (into our temporary object_id, and then into the final malloc'd stored_bitmap struct). This shouldn't make any measurable difference. If it did, we could avoid this copy _and_ the copy of the rest of the items by allocating the stored_bitmap struct beforehand and reading directly into it from the bitmap file. Or better still, if this is a bottleneck, we could introduce an on-disk index to the bitmap file so we don't have to read every single entry to use just one of them. So it's not worth worrying about micro-optimizing out this one hash copy. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-bitmap.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index c81d323329..1a067885a1 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -169,7 +169,7 @@ static int load_bitmap_header(struct bitmap_index *index) static struct stored_bitmap *store_bitmap(struct bitmap_index *index, struct ewah_bitmap *root, - const unsigned char *hash, + const struct object_id *oid, struct stored_bitmap *xor_with, int flags) { @@ -181,7 +181,7 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, stored->root = root; stored->xor = xor_with; stored->flags = flags; - oidread(&stored->oid, hash); + oidcpy(&stored->oid, oid); hash_pos = kh_put_oid_map(index->bitmaps, stored->oid, &ret); @@ -189,7 +189,7 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, * because the SHA1 already existed on the map. this is bad, there * shouldn't be duplicated commits in the index */ if (ret == 0) { - error("Duplicate entry in bitmap index: %s", hash_to_hex(hash)); + error("Duplicate entry in bitmap index: %s", oid_to_hex(oid)); return NULL; } @@ -221,13 +221,13 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) struct ewah_bitmap *bitmap = NULL; struct stored_bitmap *xor_bitmap = NULL; uint32_t commit_idx_pos; - const unsigned char *sha1; + struct object_id oid; commit_idx_pos = read_be32(index->map, &index->map_pos); xor_offset = read_u8(index->map, &index->map_pos); flags = read_u8(index->map, &index->map_pos); - sha1 = nth_packed_object_sha1(index->pack, commit_idx_pos); + nth_packed_object_id(&oid, index->pack, commit_idx_pos); bitmap = read_bitmap_1(index); if (!bitmap) @@ -244,7 +244,7 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) } recent_bitmaps[i % MAX_XOR_OFFSET] = store_bitmap( - index, bitmap, sha1, xor_bitmap, flags); + index, bitmap, &oid, xor_bitmap, flags); } return 0; From e31c71083abef5dbe4b4112a1a1a24a90ce587f3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 23 Feb 2020 23:33:18 -0500 Subject: [PATCH 06/10] pack-check: convert "internal error" die to a BUG() If we fail to load the oid from the index of a packfile, we'll die() with an "internal error". But this should never happen: we'd fail here only if the idx needed to be lazily opened (but we've already opened it) or if we asked for an out-of-range index (but we're iterating using the same count that we'd check the range against). A corrupted index might have a bogus count (e.g., too large for its size), but we'd have complained and aborted already when opening the index initially. While we're here, we can add a few details so that if this bug ever _does_ trigger, we'll have a bit more information. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-check.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pack-check.c b/pack-check.c index e4ef71c673..39196ecfbc 100644 --- a/pack-check.c +++ b/pack-check.c @@ -99,7 +99,8 @@ static int verify_packfile(struct repository *r, for (i = 0; i < nr_objects; i++) { entries[i].oid.hash = nth_packed_object_sha1(p, i); if (!entries[i].oid.hash) - die("internal error pack-check nth-packed-object"); + BUG("unable to get oid of object %lu from %s", + (unsigned long)i, p->pack_name); entries[i].offset = nth_packed_object_offset(p, i); entries[i].nr = i; } From 63f4a7fc0107ec240f48605a4d4f8e41b91caa41 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 23 Feb 2020 23:36:31 -0500 Subject: [PATCH 07/10] pack-check: push oid lookup into loop When we're checking a pack with fsck or verify-pack, we first sort the idx entries by offset, since accessing them in pack order is more efficient. To do so, we loop over them and fill in an array of structs with the offset, object_id, and index position of each, sort the result, and only then do we iterate over the sorted array and process each entry. In order to avoid the memory cost of storing the hash of each object, we just store a pointer into the copy in the mmap'd pack index file. To keep that property even as the rest of the code converted to "struct object_id", commit 9fd750461b (Convert the verify_pack callback to struct object_id, 2017-05-06) introduced a union in order to type-pun the pointer-to-hash into an object_id struct. But we can make this even simpler by observing that the sort operation doesn't need the object id at all! We only need them one at a time while we actually process each entry. So we can just omit the oid from the struct entirely and load it on the fly into a local variable in the second loop. This gets rid of the type-punning, and lets us directly use the more type-safe nth_packed_object_id(), simplifying the code. And as a bonus, it saves 8 bytes of memory per object. Note that this does mean we'll do the offset lookup for each object before the oid lookup. The oid lookup has more safety checks in it (e.g., for looking past p->num_objects) which in theory protected the offset lookup. But since violating those checks was already a BUG() condition (as described in the previous commit), it's not worth worrying about. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-check.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/pack-check.c b/pack-check.c index 39196ecfbc..dad6d8ae7f 100644 --- a/pack-check.c +++ b/pack-check.c @@ -8,10 +8,6 @@ struct idx_entry { off_t offset; - union idx_entry_object { - const unsigned char *hash; - struct object_id *oid; - } oid; unsigned int nr; }; @@ -97,10 +93,6 @@ static int verify_packfile(struct repository *r, entries[nr_objects].offset = pack_sig_ofs; /* first sort entries by pack offset, since unpacking them is more efficient that way */ for (i = 0; i < nr_objects; i++) { - entries[i].oid.hash = nth_packed_object_sha1(p, i); - if (!entries[i].oid.hash) - BUG("unable to get oid of object %lu from %s", - (unsigned long)i, p->pack_name); entries[i].offset = nth_packed_object_offset(p, i); entries[i].nr = i; } @@ -108,11 +100,16 @@ static int verify_packfile(struct repository *r, for (i = 0; i < nr_objects; i++) { void *data; + struct object_id oid; enum object_type type; unsigned long size; off_t curpos; int data_valid; + if (nth_packed_object_id(&oid, p, entries[i].nr) < 0) + BUG("unable to get oid of object %lu from %s", + (unsigned long)entries[i].nr, p->pack_name); + if (p->index_version > 1) { off_t offset = entries[i].offset; off_t len = entries[i+1].offset - offset; @@ -120,7 +117,7 @@ static int verify_packfile(struct repository *r, if (check_pack_crc(p, w_curs, offset, len, nr)) err = error("index CRC mismatch for object %s " "from %s at offset %"PRIuMAX"", - oid_to_hex(entries[i].oid.oid), + oid_to_hex(&oid), p->pack_name, (uintmax_t)offset); } @@ -143,14 +140,14 @@ static int verify_packfile(struct repository *r, if (data_valid && !data) err = error("cannot unpack %s from %s at offset %"PRIuMAX"", - oid_to_hex(entries[i].oid.oid), p->pack_name, + oid_to_hex(&oid), p->pack_name, (uintmax_t)entries[i].offset); - else if (check_object_signature(r, entries[i].oid.oid, data, size, type_name(type))) + else if (check_object_signature(r, &oid, data, size, type_name(type))) err = error("packed %s from %s is corrupt", - oid_to_hex(entries[i].oid.oid), p->pack_name); + oid_to_hex(&oid), p->pack_name); else if (fn) { int eaten = 0; - err |= fn(entries[i].oid.oid, type, size, data, &eaten); + err |= fn(&oid, type, size, data, &eaten); if (eaten) data = NULL; } From b99b6bcc57faf5c989fc18c3b8d4d92df3407cec Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 23 Feb 2020 23:36:56 -0500 Subject: [PATCH 08/10] packed_object_info(): use object_id for returning delta base If a caller sets the object_info.delta_base_sha1 to a non-NULL pointer, we'll write the oid of the object's delta base to it. But we can increase our type safety by switching this to a real object_id struct. All of our callers are just pointing into the hash member of an object_id anyway, so there's no inconvenience. Note that we do still keep it as a pointer-to-struct, because the NULL sentinel value tells us whether the caller is even interested in the information. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- object-store.h | 2 +- packfile.c | 6 +++--- ref-filter.c | 4 ++-- sha1-file.c | 8 ++++---- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index d6a1aa74cd..272f9fc6d7 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -262,7 +262,7 @@ static void expand_atom(struct strbuf *sb, const char *atom, int len, strbuf_addstr(sb, data->rest); } else if (is_atom("deltabase", atom, len)) { if (data->mark_query) - data->info.delta_base_sha1 = data->delta_base_oid.hash; + data->info.delta_base_oid = &data->delta_base_oid; else strbuf_addstr(sb, oid_to_hex(&data->delta_base_oid)); diff --git a/object-store.h b/object-store.h index 5b047637e3..be72fee7d5 100644 --- a/object-store.h +++ b/object-store.h @@ -300,7 +300,7 @@ struct object_info { enum object_type *typep; unsigned long *sizep; off_t *disk_sizep; - unsigned char *delta_base_sha1; + struct object_id *delta_base_oid; struct strbuf *type_name; void **contentp; diff --git a/packfile.c b/packfile.c index 947c3f8e5d..ec7349bb86 100644 --- a/packfile.c +++ b/packfile.c @@ -1556,7 +1556,7 @@ int packed_object_info(struct repository *r, struct packed_git *p, } } - if (oi->delta_base_sha1) { + if (oi->delta_base_oid) { if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { const unsigned char *base; @@ -1567,9 +1567,9 @@ int packed_object_info(struct repository *r, struct packed_git *p, goto out; } - hashcpy(oi->delta_base_sha1, base); + hashcpy(oi->delta_base_oid->hash, base); } else - hashclr(oi->delta_base_sha1); + oidclr(oi->delta_base_oid); } oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED : diff --git a/ref-filter.c b/ref-filter.c index 6867e33648..79bb520678 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -279,9 +279,9 @@ static int deltabase_atom_parser(const struct ref_format *format, struct used_at if (arg) return strbuf_addf_ret(err, -1, _("%%(deltabase) does not take arguments")); if (*atom->name == '*') - oi_deref.info.delta_base_sha1 = oi_deref.delta_base_oid.hash; + oi_deref.info.delta_base_oid = &oi_deref.delta_base_oid; else - oi.info.delta_base_sha1 = oi.delta_base_oid.hash; + oi.info.delta_base_oid = &oi.delta_base_oid; return 0; } diff --git a/sha1-file.c b/sha1-file.c index d785de8a85..616886799e 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -1354,8 +1354,8 @@ static int loose_object_info(struct repository *r, struct strbuf hdrbuf = STRBUF_INIT; unsigned long size_scratch; - if (oi->delta_base_sha1) - hashclr(oi->delta_base_sha1); + if (oi->delta_base_oid) + oidclr(oi->delta_base_oid); /* * If we don't care about type or size, then we don't @@ -1474,8 +1474,8 @@ static int do_oid_object_info_extended(struct repository *r, *(oi->sizep) = co->size; if (oi->disk_sizep) *(oi->disk_sizep) = 0; - if (oi->delta_base_sha1) - hashclr(oi->delta_base_sha1); + if (oi->delta_base_oid) + oidclr(oi->delta_base_oid); if (oi->type_name) strbuf_addstr(oi->type_name, type_name(co->type)); if (oi->contentp) From 6ac9760a30683a24e80a7aefe30e383046e810f0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 23 Feb 2020 23:37:31 -0500 Subject: [PATCH 09/10] packed_object_info(): use object_id internally for delta base The previous commit changed the public interface of packed_object_info() to return a struct object_id rather than a bare hash. That enables us to convert our internal helper, as well. We can use nth_packed_object_id() directly for OFS_DELTA, but we'll still have to use oidread() to pull the hash for a REF_DELTA out of the packfile. There should be no additional cost, since we're copying directly into the object_id the caller provided us (just as we did before; it's just happening now via nth_packed_object_id()). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- packfile.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/packfile.c b/packfile.c index ec7349bb86..90cb5a05ac 100644 --- a/packfile.c +++ b/packfile.c @@ -1225,30 +1225,32 @@ off_t get_delta_base(struct packed_git *p, * the final object lookup), but more expensive for OFS deltas (we * have to load the revidx to convert the offset back into a sha1). */ -static const unsigned char *get_delta_base_sha1(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos, - enum object_type type, - off_t delta_obj_offset) +static int get_delta_base_oid(struct packed_git *p, + struct pack_window **w_curs, + off_t curpos, + struct object_id *oid, + enum object_type type, + off_t delta_obj_offset) { if (type == OBJ_REF_DELTA) { unsigned char *base = use_pack(p, w_curs, curpos, NULL); - return base; + oidread(oid, base); + return 0; } else if (type == OBJ_OFS_DELTA) { struct revindex_entry *revidx; off_t base_offset = get_delta_base(p, w_curs, &curpos, type, delta_obj_offset); if (!base_offset) - return NULL; + return -1; revidx = find_pack_revindex(p, base_offset); if (!revidx) - return NULL; + return -1; - return nth_packed_object_sha1(p, revidx->nr); + return nth_packed_object_id(oid, p, revidx->nr); } else - return NULL; + return -1; } static int retry_bad_packed_offset(struct repository *r, @@ -1558,16 +1560,12 @@ int packed_object_info(struct repository *r, struct packed_git *p, if (oi->delta_base_oid) { if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { - const unsigned char *base; - - base = get_delta_base_sha1(p, &w_curs, curpos, - type, obj_offset); - if (!base) { + if (get_delta_base_oid(p, &w_curs, curpos, + oi->delta_base_oid, + type, obj_offset) < 0) { type = OBJ_BAD; goto out; } - - hashcpy(oi->delta_base_oid->hash, base); } else oidclr(oi->delta_base_oid); } From 2fecc48cade44529dff2594eadfb294643cdc24d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 23 Feb 2020 23:37:54 -0500 Subject: [PATCH 10/10] packfile: drop nth_packed_object_sha1() Once upon a time, nth_packed_object_sha1() was the primary way to get the oid of a packfile's index position. But these days we have the more type-safe nth_packed_object_id() wrapper, and all callers have been converted. Let's drop the "sha1" version (turning the safer wrapper into a single function) so that nobody is tempted to introduce new callers. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- packfile.c | 23 +++++++---------------- packfile.h | 12 +++--------- 2 files changed, 10 insertions(+), 25 deletions(-) diff --git a/packfile.c b/packfile.c index 90cb5a05ac..f4e752996d 100644 --- a/packfile.c +++ b/packfile.c @@ -1867,35 +1867,26 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32 index_lookup, index_lookup_width, result); } -const unsigned char *nth_packed_object_sha1(struct packed_git *p, - uint32_t n) +int nth_packed_object_id(struct object_id *oid, + struct packed_git *p, + uint32_t n) { const unsigned char *index = p->index_data; const unsigned int hashsz = the_hash_algo->rawsz; if (!index) { if (open_pack_index(p)) - return NULL; + return -1; index = p->index_data; } if (n >= p->num_objects) - return NULL; + return -1; index += 4 * 256; if (p->index_version == 1) { - return index + (hashsz + 4) * n + 4; + oidread(oid, index + (hashsz + 4) * n + 4); } else { index += 8; - return index + hashsz * n; + oidread(oid, index + hashsz * n); } -} - -int nth_packed_object_id(struct object_id *oid, - struct packed_git *p, - uint32_t n) -{ - const unsigned char *hash = nth_packed_object_sha1(p, n); - if (!hash) - return -1; - hashcpy(oid->hash, hash); return 0; } diff --git a/packfile.h b/packfile.h index 95b83ba25b..240aa73b95 100644 --- a/packfile.h +++ b/packfile.h @@ -121,15 +121,9 @@ void check_pack_index_ptr(const struct packed_git *p, const void *ptr); int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result); /* - * Return the SHA-1 of the nth object within the specified packfile. - * Open the index if it is not already open. The return value points - * at the SHA-1 within the mmapped index. Return NULL if there is an - * error. - */ -const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t n); -/* - * Like nth_packed_object_sha1, but write the data into the object specified by - * the the first argument. Returns 0 on success, negative otherwise. + * Write the oid of the nth object within the specified packfile into the first + * parameter. Open the index if it is not already open. Returns 0 on success, + * negative otherwise. */ int nth_packed_object_id(struct object_id *, struct packed_git *, uint32_t n);