From 6d79e5ecb342586466e0321487f08f17cbccb2bb Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:40:50 -0400 Subject: [PATCH 01/17] describe: fix accidental oid/hash type-punning The find_commit_name() function passes an object_id.hash as the key of a hashmap. That ends up in commit_name_neq(), which then feeds it to oideq(). Which means we should actually be the whole "struct object_id". It works anyway because pointers to the two are interchangeable. And because we're going through a layer of void pointers, the compiler doesn't notice the type mismatch. But it's worth cleaning up (especially since once we switch away from sha1hash() on the same line, accessing the hash member will look doubly out of place). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/describe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/describe.c b/builtin/describe.c index 1409cedce2..0a5cde00a2 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -76,7 +76,7 @@ static int commit_name_neq(const void *unused_cmp_data, static inline struct commit_name *find_commit_name(const struct object_id *peeled) { - return hashmap_get_from_hash(&names, sha1hash(peeled->hash), peeled->hash); + return hashmap_get_from_hash(&names, sha1hash(peeled->hash), peeled); } static int replace_name(struct commit_name *e, From 62b89d43e2eab692aaa65393de901e4119eddbb8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:40:54 -0400 Subject: [PATCH 02/17] upload-pack: rename a "sha1" variable to "oid" This variable is a "struct object_id", but uses the old-style name "sha1". Let's call it oid to match more modern code (and make it clear that it can handle any algorithm, since it uses parse_oid_hex() properly). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- upload-pack.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 4d2129e7fc..d9a62adef0 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -528,13 +528,13 @@ static int get_reachable_list(struct object_array *src, return -1; while ((i = read_in_full(cmd.out, namebuf, hexsz + 1)) == hexsz + 1) { - struct object_id sha1; + struct object_id oid; const char *p; - if (parse_oid_hex(namebuf, &sha1, &p) || *p != '\n') + if (parse_oid_hex(namebuf, &oid, &p) || *p != '\n') break; - o = lookup_object(the_repository, sha1.hash); + o = lookup_object(the_repository, oid.hash); if (o && o->type == OBJ_COMMIT) { o->flags &= ~TMP_MARK; } From 05805d7411a78b606c4bca2f0288fd842df6addd Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:40:59 -0400 Subject: [PATCH 03/17] pack-bitmap-write: convert some helpers to use object_id A few functions take raw hash pointers, but all of their callers actually have a "struct object_id". Let's retain that extra type as long as possible (which will let future patches extend that further, and so on). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 802ed62677..59aa201043 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -142,13 +142,13 @@ static inline void reset_all_seen(void) seen_objects_nr = 0; } -static uint32_t find_object_pos(const unsigned char *hash) +static uint32_t find_object_pos(const struct object_id *oid) { - struct object_entry *entry = packlist_find(writer.to_pack, hash, NULL); + struct object_entry *entry = packlist_find(writer.to_pack, oid->hash, NULL); if (!entry) { die("Failed to write bitmap index. Packfile doesn't have full closure " - "(object %s is missing)", hash_to_hex(hash)); + "(object %s is missing)", oid_to_hex(oid)); } return oe_in_pack_pos(writer.to_pack, entry); @@ -157,7 +157,7 @@ static uint32_t find_object_pos(const unsigned char *hash) static void show_object(struct object *object, const char *name, void *data) { struct bitmap *base = data; - bitmap_set(base, find_object_pos(object->oid.hash)); + bitmap_set(base, find_object_pos(&object->oid)); mark_as_seen(object); } @@ -170,7 +170,7 @@ static int add_to_include_set(struct bitmap *base, struct commit *commit) { khiter_t hash_pos; - uint32_t bitmap_pos = find_object_pos(commit->object.oid.hash); + uint32_t bitmap_pos = find_object_pos(&commit->object.oid); if (bitmap_get(base, bitmap_pos)) return 0; @@ -375,14 +375,14 @@ void bitmap_writer_reuse_bitmaps(struct packing_data *to_pack) */ } -static struct ewah_bitmap *find_reused_bitmap(const unsigned char *sha1) +static struct ewah_bitmap *find_reused_bitmap(const struct object_id *oid) { khiter_t hash_pos; if (!writer.reused) return NULL; - hash_pos = kh_get_sha1(writer.reused, sha1); + hash_pos = kh_get_sha1(writer.reused, oid->hash); if (hash_pos >= kh_end(writer.reused)) return NULL; @@ -422,14 +422,14 @@ void bitmap_writer_select_commits(struct commit **indexed_commits, if (next == 0) { chosen = indexed_commits[i]; - reused_bitmap = find_reused_bitmap(chosen->object.oid.hash); + reused_bitmap = find_reused_bitmap(&chosen->object.oid); } else { chosen = indexed_commits[i + next]; for (j = 0; j <= next; ++j) { struct commit *cm = indexed_commits[i + j]; - reused_bitmap = find_reused_bitmap(cm->object.oid.hash); + reused_bitmap = find_reused_bitmap(&cm->object.oid); if (reused_bitmap || (cm->object.flags & NEEDS_BITMAP) != 0) { chosen = cm; break; From 3df28caefb2193fb7bbc87a427a620d96d508c8d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:03 -0400 Subject: [PATCH 04/17] pack-objects: convert packlist_find() to use object_id We take a raw hash pointer, but most of our callers have a "struct object_id" already. Let's switch to taking the full struct, which will let us continue removing uses of raw sha1 buffers. There are two callers that do need special attention: - in rebuild_existing_bitmaps(), we need to switch to nth_packed_object_oid(). This incurs an extra hash copy over pointing straight to the mmap'd sha1, but it shouldn't be measurable compared to the rest of the operation. - in can_reuse_delta() we already spent the effort to copy the sha1 into a "struct object_id", but now we just have to do so a little earlier in the function (we can't easily convert that function's callers because they may be pointing at mmap'd REF_DELTA blocks). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 19 ++++++++++--------- pack-bitmap-write.c | 2 +- pack-bitmap.c | 6 +++--- pack-objects.c | 4 ++-- pack-objects.h | 2 +- 5 files changed, 17 insertions(+), 16 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b2be8869c2..c95693fd4b 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -606,12 +606,12 @@ static int mark_tagged(const char *path, const struct object_id *oid, int flag, void *cb_data) { struct object_id peeled; - struct object_entry *entry = packlist_find(&to_pack, oid->hash, NULL); + struct object_entry *entry = packlist_find(&to_pack, oid, NULL); if (entry) entry->tagged = 1; if (!peel_ref(path, &peeled)) { - entry = packlist_find(&to_pack, peeled.hash, NULL); + entry = packlist_find(&to_pack, &peeled, NULL); if (entry) entry->tagged = 1; } @@ -996,7 +996,7 @@ static int have_duplicate_entry(const struct object_id *oid, { struct object_entry *entry; - entry = packlist_find(&to_pack, oid->hash, index_pos); + entry = packlist_find(&to_pack, oid, index_pos); if (!entry) return 0; @@ -1494,11 +1494,13 @@ static int can_reuse_delta(const unsigned char *base_sha1, if (!base_sha1) return 0; + oidread(&base_oid, base_sha1); + /* * First see if we're already sending the base (or it's explicitly in * our "excluded" list). */ - base = packlist_find(&to_pack, base_sha1, NULL); + base = packlist_find(&to_pack, &base_oid, NULL); if (base) { if (!in_same_island(&delta->idx.oid, &base->idx.oid)) return 0; @@ -1511,7 +1513,6 @@ static int can_reuse_delta(const unsigned char *base_sha1, * even if it was buried too deep in history to make it into the * packing list. */ - oidread(&base_oid, base_sha1); if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, &base_oid)) { if (use_delta_islands) { if (!in_same_island(&delta->idx.oid, &base_oid)) @@ -2571,7 +2572,7 @@ static void add_tag_chain(const struct object_id *oid) * it was included via bitmaps, we would not have parsed it * previously). */ - if (packlist_find(&to_pack, oid->hash, NULL)) + if (packlist_find(&to_pack, oid, NULL)) return; tag = lookup_tag(the_repository, oid); @@ -2595,7 +2596,7 @@ static int add_ref_tag(const char *path, const struct object_id *oid, int flag, if (starts_with(path, "refs/tags/") && /* is a tag? */ !peel_ref(path, &peeled) && /* peelable? */ - packlist_find(&to_pack, peeled.hash, NULL)) /* object packed? */ + packlist_find(&to_pack, &peeled, NULL)) /* object packed? */ add_tag_chain(oid); return 0; } @@ -2795,7 +2796,7 @@ static void show_object(struct object *obj, const char *name, void *data) for (p = strchr(name, '/'); p; p = strchr(p + 1, '/')) depth++; - ent = packlist_find(&to_pack, obj->oid.hash, NULL); + ent = packlist_find(&to_pack, &obj->oid, NULL); if (ent && depth > oe_tree_depth(&to_pack, ent)) oe_set_tree_depth(&to_pack, ent, depth); } @@ -3026,7 +3027,7 @@ static void loosen_unused_packed_objects(void) for (i = 0; i < p->num_objects; i++) { nth_packed_object_oid(&oid, p, i); - if (!packlist_find(&to_pack, oid.hash, NULL) && + if (!packlist_find(&to_pack, &oid, NULL) && !has_sha1_pack_kept_or_nonlocal(&oid) && !loosened_object_can_be_discarded(&oid, p->mtime)) if (force_object_loose(&oid, p->mtime)) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 59aa201043..0637378533 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -144,7 +144,7 @@ static inline void reset_all_seen(void) static uint32_t find_object_pos(const struct object_id *oid) { - struct object_entry *entry = packlist_find(writer.to_pack, oid->hash, NULL); + struct object_entry *entry = packlist_find(writer.to_pack, oid, NULL); if (!entry) { die("Failed to write bitmap index. Packfile doesn't have full closure " diff --git a/pack-bitmap.c b/pack-bitmap.c index 6069b2fe55..ff1f07e249 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1057,13 +1057,13 @@ int rebuild_existing_bitmaps(struct bitmap_index *bitmap_git, reposition = xcalloc(num_objects, sizeof(uint32_t)); for (i = 0; i < num_objects; ++i) { - const unsigned char *sha1; + struct object_id oid; struct revindex_entry *entry; struct object_entry *oe; entry = &bitmap_git->pack->revindex[i]; - sha1 = nth_packed_object_sha1(bitmap_git->pack, entry->nr); - oe = packlist_find(mapping, sha1, NULL); + nth_packed_object_oid(&oid, bitmap_git->pack, entry->nr); + oe = packlist_find(mapping, &oid, NULL); if (oe) reposition[i] = oe_in_pack_pos(mapping, oe) + 1; diff --git a/pack-objects.c b/pack-objects.c index ce33b8906e..49fdf52ea6 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -68,7 +68,7 @@ static void rehash_objects(struct packing_data *pdata) } struct object_entry *packlist_find(struct packing_data *pdata, - const unsigned char *sha1, + const struct object_id *oid, uint32_t *index_pos) { uint32_t i; @@ -77,7 +77,7 @@ struct object_entry *packlist_find(struct packing_data *pdata, if (!pdata->index_size) return NULL; - i = locate_object_entry_hash(pdata, sha1, &found); + i = locate_object_entry_hash(pdata, oid->hash, &found); if (index_pos) *index_pos = i; diff --git a/pack-objects.h b/pack-objects.h index 6fde7ce27c..857d43850b 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -187,7 +187,7 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, uint32_t index_pos); struct object_entry *packlist_find(struct packing_data *pdata, - const unsigned char *sha1, + const struct object_id *oid, uint32_t *index_pos); static inline uint32_t pack_name_hash(const char *name) From 5e7ac6802823ec23759b1b986315bd65b0881ff9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:07 -0400 Subject: [PATCH 05/17] pack-objects: convert locate_object_entry_hash() to object_id There are no callers of locate_object_entry_hash() that aren't just passing us the "hash" member of a "struct object_id". Let's take the whole struct, which gets us closer to removing all raw sha1 variables. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-objects.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pack-objects.c b/pack-objects.c index 49fdf52ea6..00a5f6e0ec 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -6,17 +6,17 @@ #include "config.h" static uint32_t locate_object_entry_hash(struct packing_data *pdata, - const unsigned char *sha1, + const struct object_id *oid, int *found) { uint32_t i, mask = (pdata->index_size - 1); - i = sha1hash(sha1) & mask; + i = sha1hash(oid->hash) & mask; while (pdata->index[i] > 0) { uint32_t pos = pdata->index[i] - 1; - if (hasheq(sha1, pdata->objects[pos].idx.oid.hash)) { + if (oideq(oid, &pdata->objects[pos].idx.oid)) { *found = 1; return i; } @@ -56,7 +56,7 @@ static void rehash_objects(struct packing_data *pdata) for (i = 0; i < pdata->nr_objects; i++) { int found; uint32_t ix = locate_object_entry_hash(pdata, - entry->idx.oid.hash, + &entry->idx.oid, &found); if (found) @@ -77,7 +77,7 @@ struct object_entry *packlist_find(struct packing_data *pdata, if (!pdata->index_size) return NULL; - i = locate_object_entry_hash(pdata, oid->hash, &found); + i = locate_object_entry_hash(pdata, oid, &found); if (index_pos) *index_pos = i; From 0ebbcf70e672ef9ad46eb4975a34d3639190aeb2 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:10 -0400 Subject: [PATCH 06/17] object: convert lookup_unknown_object() to use object_id There are no callers left of lookup_unknown_object() that aren't just passing us the "hash" member of a "struct object_id". Let's take the whole struct, which gets us closer to removing all raw sha1 variables. It also matches the existing conversions of lookup_blob(), etc. The conversions of callers were done by hand, but they're all mechanical one-liners. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fsck.c | 2 +- builtin/pack-objects.c | 2 +- fsck.c | 2 +- http-push.c | 2 +- object.c | 6 +++--- object.h | 2 +- refs.c | 2 +- t/helper/test-example-decorate.c | 6 +++--- upload-pack.c | 2 +- walker.c | 2 +- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index d26fb0a044..e422c82465 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -756,7 +756,7 @@ static int fsck_cache_tree(struct cache_tree *it) static void mark_object_for_connectivity(const struct object_id *oid) { - struct object *obj = lookup_unknown_object(oid->hash); + struct object *obj = lookup_unknown_object(oid); obj->flags |= HAS_OBJ; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index c95693fd4b..3e8467aa23 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2923,7 +2923,7 @@ static void add_objects_in_unpacked_packs(void) for (i = 0; i < p->num_objects; i++) { nth_packed_object_oid(&oid, p, i); - o = lookup_unknown_object(oid.hash); + o = lookup_unknown_object(&oid); if (!(o->flags & OBJECT_ADDED)) mark_in_pack_object(o, p, &in_pack); o->flags |= OBJECT_ADDED; diff --git a/fsck.c b/fsck.c index 4703f55561..117c4a978f 100644 --- a/fsck.c +++ b/fsck.c @@ -1092,7 +1092,7 @@ int fsck_finish(struct fsck_options *options) blob = lookup_blob(the_repository, oid); if (!blob) { - struct object *obj = lookup_unknown_object(oid->hash); + struct object *obj = lookup_unknown_object(oid); ret |= report(options, obj, FSCK_MSG_GITMODULES_BLOB, "non-blob found at .gitmodules"); diff --git a/http-push.c b/http-push.c index e36561a6db..96a98e1e61 100644 --- a/http-push.c +++ b/http-push.c @@ -1432,7 +1432,7 @@ static void one_remote_ref(const char *refname) * may be required for updating server info later. */ if (repo->can_update_info_refs && !has_object_file(&ref->old_oid)) { - obj = lookup_unknown_object(ref->old_oid.hash); + obj = lookup_unknown_object(&ref->old_oid); fprintf(stderr, " fetch %s for %s\n", oid_to_hex(&ref->old_oid), refname); add_fetch_request(obj); diff --git a/object.c b/object.c index e81d47a79c..d5b1d8daaf 100644 --- a/object.c +++ b/object.c @@ -178,11 +178,11 @@ void *object_as_type(struct repository *r, struct object *obj, enum object_type } } -struct object *lookup_unknown_object(const unsigned char *sha1) +struct object *lookup_unknown_object(const struct object_id *oid) { - struct object *obj = lookup_object(the_repository, sha1); + struct object *obj = lookup_object(the_repository, oid->hash); if (!obj) - obj = create_object(the_repository, sha1, + obj = create_object(the_repository, oid->hash, alloc_object_node(the_repository)); return obj; } diff --git a/object.h b/object.h index 4526979ccf..5e0ccfe0e4 100644 --- a/object.h +++ b/object.h @@ -143,7 +143,7 @@ struct object *parse_object_or_die(const struct object_id *oid, const char *name struct object *parse_object_buffer(struct repository *r, const struct object_id *oid, enum object_type type, unsigned long size, void *buffer, int *eaten_p); /** Returns the object, with potentially excess memory allocated. **/ -struct object *lookup_unknown_object(const unsigned char *sha1); +struct object *lookup_unknown_object(const struct object_id *oid); struct object_list *object_list_insert(struct object *item, struct object_list **list_p); diff --git a/refs.c b/refs.c index b8a8430c96..cd297ee4bd 100644 --- a/refs.c +++ b/refs.c @@ -379,7 +379,7 @@ static int filter_refs(const char *refname, const struct object_id *oid, enum peel_status peel_object(const struct object_id *name, struct object_id *oid) { - struct object *o = lookup_unknown_object(name->hash); + struct object *o = lookup_unknown_object(name); if (o->type == OBJ_NONE) { int type = oid_object_info(the_repository, name, NULL); diff --git a/t/helper/test-example-decorate.c b/t/helper/test-example-decorate.c index a20a6161e4..c8a1cde7d2 100644 --- a/t/helper/test-example-decorate.c +++ b/t/helper/test-example-decorate.c @@ -26,8 +26,8 @@ int cmd__example_decorate(int argc, const char **argv) * Add 2 objects, one with a non-NULL decoration and one with a NULL * decoration. */ - one = lookup_unknown_object(one_oid.hash); - two = lookup_unknown_object(two_oid.hash); + one = lookup_unknown_object(&one_oid); + two = lookup_unknown_object(&two_oid); ret = add_decoration(&n, one, &decoration_a); if (ret) BUG("when adding a brand-new object, NULL should be returned"); @@ -56,7 +56,7 @@ int cmd__example_decorate(int argc, const char **argv) ret = lookup_decoration(&n, two); if (ret != &decoration_b) BUG("lookup should return added declaration"); - three = lookup_unknown_object(three_oid.hash); + three = lookup_unknown_object(&three_oid); ret = lookup_decoration(&n, three); if (ret) BUG("lookup for unknown object should return NULL"); diff --git a/upload-pack.c b/upload-pack.c index d9a62adef0..ecc19641fe 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -960,7 +960,7 @@ static void receive_needs(struct packet_reader *reader, struct object_array *wan static int mark_our_ref(const char *refname, const char *refname_full, const struct object_id *oid) { - struct object *o = lookup_unknown_object(oid->hash); + struct object *o = lookup_unknown_object(oid); if (ref_is_hidden(refname, refname_full)) { o->flags |= HIDDEN_REF; diff --git a/walker.c b/walker.c index d74ae59c77..06cd2bd569 100644 --- a/walker.c +++ b/walker.c @@ -285,7 +285,7 @@ int walker_fetch(struct walker *walker, int targets, char **target, error("Could not interpret response from server '%s' as something to pull", target[i]); goto done; } - if (process(walker, lookup_unknown_object(oids[i].hash))) + if (process(walker, lookup_unknown_object(&oids[i]))) goto done; } From d0229abd93e1115d935b0e55067e29bcc9815ce8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:14 -0400 Subject: [PATCH 07/17] object: convert lookup_object() to use object_id There are no callers left of lookup_object() that aren't just passing us the "hash" member of a "struct object_id". Let's take the whole struct, which gets us closer to removing all raw sha1 variables. It also matches the existing conversions of lookup_blob(), etc. The conversions of callers were done by hand, but they're all mechanical one-liners. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- blob.c | 2 +- builtin/fast-export.c | 4 ++-- builtin/fsck.c | 6 +++--- builtin/name-rev.c | 3 +-- builtin/prune.c | 2 +- builtin/unpack-objects.c | 2 +- commit.c | 2 +- delta-islands.c | 2 +- fetch-pack.c | 12 ++++++------ http-push.c | 2 +- object.c | 12 ++++++------ object.h | 2 +- reachable.c | 4 ++-- tag.c | 2 +- tree.c | 2 +- upload-pack.c | 2 +- 16 files changed, 30 insertions(+), 31 deletions(-) diff --git a/blob.c b/blob.c index 342bdbb1bb..b9c7180b7c 100644 --- a/blob.c +++ b/blob.c @@ -7,7 +7,7 @@ const char *blob_type = "blob"; struct blob *lookup_blob(struct repository *r, const struct object_id *oid) { - struct object *obj = lookup_object(r, oid->hash); + struct object *obj = lookup_object(r, oid); if (!obj) return create_object(r, oid->hash, alloc_blob_node(r)); diff --git a/builtin/fast-export.c b/builtin/fast-export.c index c22cef3b2f..f541f55d33 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -275,7 +275,7 @@ static void export_blob(const struct object_id *oid) if (is_null_oid(oid)) return; - object = lookup_object(the_repository, oid->hash); + object = lookup_object(the_repository, oid); if (object && object->flags & SHOWN) return; @@ -453,7 +453,7 @@ static void show_filemodify(struct diff_queue_struct *q, &spec->oid)); else { struct object *object = lookup_object(the_repository, - spec->oid.hash); + &spec->oid); printf("M %06o :%d ", spec->mode, get_object_mark(object)); } diff --git a/builtin/fsck.c b/builtin/fsck.c index e422c82465..18403a94fa 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -238,7 +238,7 @@ static int mark_used(struct object *obj, int type, void *data, struct fsck_optio static void mark_unreachable_referents(const struct object_id *oid) { struct fsck_options options = FSCK_OPTIONS_DEFAULT; - struct object *obj = lookup_object(the_repository, oid->hash); + struct object *obj = lookup_object(the_repository, oid); if (!obj || !(obj->flags & HAS_OBJ)) return; /* not part of our original set */ @@ -497,7 +497,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid, struct object *obj; if (!is_null_oid(oid)) { - obj = lookup_object(the_repository, oid->hash); + obj = lookup_object(the_repository, oid); if (obj && (obj->flags & HAS_OBJ)) { if (timestamp && name_objects) add_decoration(fsck_walk_options.object_names, @@ -879,7 +879,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) struct object_id oid; if (!get_oid(arg, &oid)) { struct object *obj = lookup_object(the_repository, - oid.hash); + &oid); if (!obj || !(obj->flags & HAS_OBJ)) { if (is_promisor_object(&oid)) diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 16df43473a..c785fe16ba 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -378,8 +378,7 @@ static void name_rev_line(char *p, struct name_ref_data *data) *(p+1) = 0; if (!get_oid(p - (hexsz - 1), &oid)) { struct object *o = - lookup_object(the_repository, - oid.hash); + lookup_object(the_repository, &oid); if (o) name = get_rev_name(o, &buf); } diff --git a/builtin/prune.c b/builtin/prune.c index 97613eccb5..2b76872ad2 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -53,7 +53,7 @@ static int is_object_reachable(const struct object_id *oid, perform_reachability_traversal(revs); - obj = lookup_object(the_repository, oid->hash); + obj = lookup_object(the_repository, oid); return obj && (obj->flags & SEEN); } diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 80478808b3..a87a4bfd2c 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -332,7 +332,7 @@ static int resolve_against_held(unsigned nr, const struct object_id *base, { struct object *obj; struct obj_buffer *obj_buffer; - obj = lookup_object(the_repository, base->hash); + obj = lookup_object(the_repository, base); if (!obj) return 0; obj_buffer = lookup_object_buffer(obj); diff --git a/commit.c b/commit.c index 8fa1883c61..f47c75afae 100644 --- a/commit.c +++ b/commit.c @@ -57,7 +57,7 @@ struct commit *lookup_commit_or_die(const struct object_id *oid, const char *ref struct commit *lookup_commit(struct repository *r, const struct object_id *oid) { - struct object *obj = lookup_object(r, oid->hash); + struct object *obj = lookup_object(r, oid); if (!obj) return create_object(r, oid->hash, alloc_commit_node(r)); diff --git a/delta-islands.c b/delta-islands.c index 2186bd0738..5f3ab914f5 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -296,7 +296,7 @@ void resolve_tree_islands(struct repository *r, if (S_ISGITLINK(entry.mode)) continue; - obj = lookup_object(r, entry.oid.hash); + obj = lookup_object(r, &entry.oid); if (!obj) continue; diff --git a/fetch-pack.c b/fetch-pack.c index 1c10f54e78..07bc48a1a5 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -286,7 +286,7 @@ static int find_common(struct fetch_negotiator *negotiator, * we cannot trust the object flags). */ if (!args->no_dependents && - ((o = lookup_object(the_repository, remote->hash)) != NULL) && + ((o = lookup_object(the_repository, remote)) != NULL) && (o->flags & COMPLETE)) { continue; } @@ -364,7 +364,7 @@ static int find_common(struct fetch_negotiator *negotiator, if (skip_prefix(reader.line, "unshallow ", &arg)) { if (get_oid_hex(arg, &oid)) die(_("invalid unshallow line: %s"), reader.line); - if (!lookup_object(the_repository, oid.hash)) + if (!lookup_object(the_repository, &oid)) die(_("object not found: %s"), reader.line); /* make sure that it is parsed as shallow */ if (!parse_object(the_repository, &oid)) @@ -707,7 +707,7 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, for (ref = *refs; ref; ref = ref->next) { struct object *o = deref_tag(the_repository, lookup_object(the_repository, - ref->old_oid.hash), + &ref->old_oid), NULL, 0); if (!o || o->type != OBJ_COMMIT || !(o->flags & COMPLETE)) @@ -734,7 +734,7 @@ static int everything_local(struct fetch_pack_args *args, const struct object_id *remote = &ref->old_oid; struct object *o; - o = lookup_object(the_repository, remote->hash); + o = lookup_object(the_repository, remote); if (!o || !(o->flags & COMPLETE)) { retval = 0; print_verbose(args, "want %s (%s)", oid_to_hex(remote), @@ -1048,7 +1048,7 @@ static void add_wants(int no_dependents, const struct ref *wants, struct strbuf * we cannot trust the object flags). */ if (!no_dependents && - ((o = lookup_object(the_repository, remote->hash)) != NULL) && + ((o = lookup_object(the_repository, remote)) != NULL) && (o->flags & COMPLETE)) { continue; } @@ -1275,7 +1275,7 @@ static void receive_shallow_info(struct fetch_pack_args *args, if (skip_prefix(reader->line, "unshallow ", &arg)) { if (get_oid_hex(arg, &oid)) die(_("invalid unshallow line: %s"), reader->line); - if (!lookup_object(the_repository, oid.hash)) + if (!lookup_object(the_repository, &oid)) die(_("object not found: %s"), reader->line); /* make sure that it is parsed as shallow */ if (!parse_object(the_repository, &oid)) diff --git a/http-push.c b/http-push.c index 96a98e1e61..0353f9f514 100644 --- a/http-push.c +++ b/http-push.c @@ -723,7 +723,7 @@ static void one_remote_object(const struct object_id *oid) { struct object *obj; - obj = lookup_object(the_repository, oid->hash); + obj = lookup_object(the_repository, oid); if (!obj) obj = parse_object(the_repository, oid); diff --git a/object.c b/object.c index d5b1d8daaf..34c1d0dc8f 100644 --- a/object.c +++ b/object.c @@ -85,7 +85,7 @@ static void insert_obj_hash(struct object *obj, struct object **hash, unsigned i * Look up the record for the given sha1 in the hash map stored in * obj_hash. Return NULL if it was not found. */ -struct object *lookup_object(struct repository *r, const unsigned char *sha1) +struct object *lookup_object(struct repository *r, const struct object_id *oid) { unsigned int i, first; struct object *obj; @@ -93,9 +93,9 @@ struct object *lookup_object(struct repository *r, const unsigned char *sha1) if (!r->parsed_objects->obj_hash) return NULL; - first = i = hash_obj(sha1, r->parsed_objects->obj_hash_size); + first = i = hash_obj(oid->hash, r->parsed_objects->obj_hash_size); while ((obj = r->parsed_objects->obj_hash[i]) != NULL) { - if (hasheq(sha1, obj->oid.hash)) + if (oideq(oid, &obj->oid)) break; i++; if (i == r->parsed_objects->obj_hash_size) @@ -180,7 +180,7 @@ void *object_as_type(struct repository *r, struct object *obj, enum object_type struct object *lookup_unknown_object(const struct object_id *oid) { - struct object *obj = lookup_object(the_repository, oid->hash); + struct object *obj = lookup_object(the_repository, oid); if (!obj) obj = create_object(the_repository, oid->hash, alloc_object_node(the_repository)); @@ -256,7 +256,7 @@ struct object *parse_object(struct repository *r, const struct object_id *oid) void *buffer; struct object *obj; - obj = lookup_object(r, oid->hash); + obj = lookup_object(r, oid); if (obj && obj->parsed) return obj; @@ -268,7 +268,7 @@ struct object *parse_object(struct repository *r, const struct object_id *oid) return NULL; } parse_blob_buffer(lookup_blob(r, oid), NULL, 0); - return lookup_object(r, oid->hash); + return lookup_object(r, oid); } buffer = repo_read_object_file(r, oid, &type, &size); diff --git a/object.h b/object.h index 5e0ccfe0e4..47301186a4 100644 --- a/object.h +++ b/object.h @@ -116,7 +116,7 @@ struct object *get_indexed_object(unsigned int); * half-initialised objects, the caller is expected to initialize them * by calling parse_object() on them. */ -struct object *lookup_object(struct repository *r, const unsigned char *sha1); +struct object *lookup_object(struct repository *r, const struct object_id *oid); void *create_object(struct repository *r, const unsigned char *sha1, void *obj); diff --git a/reachable.c b/reachable.c index 0d00a91de4..8f50235b28 100644 --- a/reachable.c +++ b/reachable.c @@ -109,7 +109,7 @@ static int add_recent_loose(const struct object_id *oid, const char *path, void *data) { struct stat st; - struct object *obj = lookup_object(the_repository, oid->hash); + struct object *obj = lookup_object(the_repository, oid); if (obj && obj->flags & SEEN) return 0; @@ -134,7 +134,7 @@ static int add_recent_packed(const struct object_id *oid, struct packed_git *p, uint32_t pos, void *data) { - struct object *obj = lookup_object(the_repository, oid->hash); + struct object *obj = lookup_object(the_repository, oid); if (obj && obj->flags & SEEN) return 0; diff --git a/tag.c b/tag.c index 7445b8f6ea..3ae00ba1ab 100644 --- a/tag.c +++ b/tag.c @@ -100,7 +100,7 @@ struct object *deref_tag_noverify(struct object *o) struct tag *lookup_tag(struct repository *r, const struct object_id *oid) { - struct object *obj = lookup_object(r, oid->hash); + struct object *obj = lookup_object(r, oid); if (!obj) return create_object(r, oid->hash, alloc_tag_node(r)); diff --git a/tree.c b/tree.c index f416afc57d..0ebb8c5b02 100644 --- a/tree.c +++ b/tree.c @@ -197,7 +197,7 @@ int read_tree(struct repository *r, struct tree *tree, int stage, struct tree *lookup_tree(struct repository *r, const struct object_id *oid) { - struct object *obj = lookup_object(r, oid->hash); + struct object *obj = lookup_object(r, oid); if (!obj) return create_object(r, oid->hash, alloc_tree_node(r)); diff --git a/upload-pack.c b/upload-pack.c index ecc19641fe..a0f170b5b5 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -534,7 +534,7 @@ static int get_reachable_list(struct object_array *src, if (parse_oid_hex(namebuf, &oid, &p) || *p != '\n') break; - o = lookup_object(the_repository, oid.hash); + o = lookup_object(the_repository, &oid); if (o && o->type == OBJ_COMMIT) { o->flags &= ~TMP_MARK; } From 46931d39389f2886e6c159674923345f024e1c64 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:17 -0400 Subject: [PATCH 08/17] object: convert internal hash_obj() to object_id Now that lookup_object() has an object_id, we can consistently pass that around instead of a raw sha1. We still convert to a hash to pass to sha1hash(), but the goal is for that to go away shortly. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/object.c b/object.c index 34c1d0dc8f..dbfdbe504d 100644 --- a/object.c +++ b/object.c @@ -59,9 +59,9 @@ int type_from_string_gently(const char *str, ssize_t len, int gentle) * the specified sha1. n must be a power of 2. Please note that the * return value is *not* consistent across computer architectures. */ -static unsigned int hash_obj(const unsigned char *sha1, unsigned int n) +static unsigned int hash_obj(const struct object_id *oid, unsigned int n) { - return sha1hash(sha1) & (n - 1); + return sha1hash(oid->hash) & (n - 1); } /* @@ -71,7 +71,7 @@ static unsigned int hash_obj(const unsigned char *sha1, unsigned int n) */ static void insert_obj_hash(struct object *obj, struct object **hash, unsigned int size) { - unsigned int j = hash_obj(obj->oid.hash, size); + unsigned int j = hash_obj(&obj->oid, size); while (hash[j]) { j++; @@ -93,7 +93,7 @@ struct object *lookup_object(struct repository *r, const struct object_id *oid) if (!r->parsed_objects->obj_hash) return NULL; - first = i = hash_obj(oid->hash, r->parsed_objects->obj_hash_size); + first = i = hash_obj(oid, r->parsed_objects->obj_hash_size); while ((obj = r->parsed_objects->obj_hash[i]) != NULL) { if (oideq(oid, &obj->oid)) break; From a378509e1c8d817b3abe42bd8b3c8aa2a6f9af8a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:21 -0400 Subject: [PATCH 09/17] object: convert create_object() to use object_id There are no callers left of create_object() that aren't just passing us the "hash" member of a "struct object_id". Let's take the whole struct, which gets us closer to removing all raw sha1 variables. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- blob.c | 3 +-- commit-graph.c | 2 +- commit.c | 3 +-- object.c | 6 +++--- object.h | 2 +- tag.c | 3 +-- tree.c | 3 +-- 7 files changed, 9 insertions(+), 13 deletions(-) diff --git a/blob.c b/blob.c index b9c7180b7c..36f9abda19 100644 --- a/blob.c +++ b/blob.c @@ -9,8 +9,7 @@ struct blob *lookup_blob(struct repository *r, const struct object_id *oid) { struct object *obj = lookup_object(r, oid); if (!obj) - return create_object(r, oid->hash, - alloc_blob_node(r)); + return create_object(r, oid, alloc_blob_node(r)); return object_as_type(r, obj, OBJ_BLOB, 0); } diff --git a/commit-graph.c b/commit-graph.c index 7c5e54875f..5a62131d68 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1214,7 +1214,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); graph_commit = lookup_commit(r, &cur_oid); - odb_commit = (struct commit *)create_object(r, cur_oid.hash, alloc_commit_node(r)); + odb_commit = (struct commit *)create_object(r, &cur_oid, alloc_commit_node(r)); if (parse_commit_internal(odb_commit, 0, 0)) { graph_report(_("failed to parse commit %s from object database for commit-graph"), oid_to_hex(&cur_oid)); diff --git a/commit.c b/commit.c index f47c75afae..b71ac195d4 100644 --- a/commit.c +++ b/commit.c @@ -59,8 +59,7 @@ struct commit *lookup_commit(struct repository *r, const struct object_id *oid) { struct object *obj = lookup_object(r, oid); if (!obj) - return create_object(r, oid->hash, - alloc_commit_node(r)); + return create_object(r, oid, alloc_commit_node(r)); return object_as_type(r, obj, OBJ_COMMIT, 0); } diff --git a/object.c b/object.c index dbfdbe504d..317647da3e 100644 --- a/object.c +++ b/object.c @@ -141,13 +141,13 @@ static void grow_object_hash(struct repository *r) r->parsed_objects->obj_hash_size = new_hash_size; } -void *create_object(struct repository *r, const unsigned char *sha1, void *o) +void *create_object(struct repository *r, const struct object_id *oid, void *o) { struct object *obj = o; obj->parsed = 0; obj->flags = 0; - hashcpy(obj->oid.hash, sha1); + oidcpy(&obj->oid, oid); if (r->parsed_objects->obj_hash_size - 1 <= r->parsed_objects->nr_objs * 2) grow_object_hash(r); @@ -182,7 +182,7 @@ struct object *lookup_unknown_object(const struct object_id *oid) { struct object *obj = lookup_object(the_repository, oid); if (!obj) - obj = create_object(the_repository, oid->hash, + obj = create_object(the_repository, oid, alloc_object_node(the_repository)); return obj; } diff --git a/object.h b/object.h index 47301186a4..0120892bbd 100644 --- a/object.h +++ b/object.h @@ -118,7 +118,7 @@ struct object *get_indexed_object(unsigned int); */ struct object *lookup_object(struct repository *r, const struct object_id *oid); -void *create_object(struct repository *r, const unsigned char *sha1, void *obj); +void *create_object(struct repository *r, const struct object_id *oid, void *obj); void *object_as_type(struct repository *r, struct object *obj, enum object_type type, int quiet); diff --git a/tag.c b/tag.c index 3ae00ba1ab..5db870edb9 100644 --- a/tag.c +++ b/tag.c @@ -102,8 +102,7 @@ struct tag *lookup_tag(struct repository *r, const struct object_id *oid) { struct object *obj = lookup_object(r, oid); if (!obj) - return create_object(r, oid->hash, - alloc_tag_node(r)); + return create_object(r, oid, alloc_tag_node(r)); return object_as_type(r, obj, OBJ_TAG, 0); } diff --git a/tree.c b/tree.c index 0ebb8c5b02..4720945e6a 100644 --- a/tree.c +++ b/tree.c @@ -199,8 +199,7 @@ struct tree *lookup_tree(struct repository *r, const struct object_id *oid) { struct object *obj = lookup_object(r, oid); if (!obj) - return create_object(r, oid->hash, - alloc_tree_node(r)); + return create_object(r, oid, alloc_tree_node(r)); return object_as_type(r, obj, OBJ_TREE, 0); } From 4ed43d16d773ae5f717a258ce81a18ab3fb29435 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:25 -0400 Subject: [PATCH 10/17] khash: drop broken oid_map typedef Commit 5a8643eff1 (khash: move oid hash table definition, 2019-02-19) added a khash "oid_map" type to match the existing "oid" type, which is a simple set (i.e., just keys, no values). But in setting up the khash_oid_map typedef, it accidentally referred to "kh_oid_t", which is the set type. Nobody noticed the breakage because there are not yet any callers; the type was added just as a match to the existing sha1 types (whose map type confusingly _is_ called khash_sha1, and it has no matching set type). We could easily fix this with s/oid/oid_map/ in the typedef. But let's take this a step further, and just drop the typedef entirely. These typedefs were added by 5a8643eff1 to match the khash_sha1 typedefs. But the actual khash-derived type names are descriptive enough; this is just adding an extra layer of indirection. The khash names do not quite follow our usual style (e.g., they end in "_t"), but since we end up using other khash names (e.g., khiter_t, kh_get_oid()) anyway, just typedef-ing the struct name is not really helping much. And there are already many cases where we use the raw khash type names anyway (e.g., the "set" variant defined just above us does not have such a typedef!). So let's drop this typedef, and the matching oid_pos one (which actually _does_ have a user, but we can easily convert it). We'll leave the khash_sha1 typedef around. The ultimate fate of its callers should be conversion to kh_oid_map_t, so there's no point in going through the noise of changing the names now. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- khash.h | 2 -- pack-bitmap.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/khash.h b/khash.h index af747a683f..64d4eeb2bd 100644 --- a/khash.h +++ b/khash.h @@ -345,9 +345,7 @@ static inline int oid_equal(struct object_id a, struct object_id b) KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal) KHASH_INIT(oid_map, struct object_id, void *, 1, oid_hash, oid_equal) -typedef kh_oid_t khash_oid_map; KHASH_INIT(oid_pos, struct object_id, int, 1, oid_hash, oid_equal) -typedef kh_oid_pos_t khash_oid_pos; #endif /* __AC_KHASH_H */ diff --git a/pack-bitmap.c b/pack-bitmap.c index ff1f07e249..998133588f 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -365,7 +365,7 @@ struct include_data { static inline int bitmap_position_extended(struct bitmap_index *bitmap_git, const struct object_id *oid) { - khash_oid_pos *positions = bitmap_git->ext_index.positions; + kh_oid_pos_t *positions = bitmap_git->ext_index.positions; khiter_t pos = kh_get_oid_pos(positions, *oid); if (pos < kh_end(positions)) { From 8fbb558af4e911a9507295809a4d1d7d6687b6e1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:28 -0400 Subject: [PATCH 11/17] khash: rename kh_oid_t to kh_oid_set khash lets us define a hash as either a map or a set (i.e., with no "value" type). For the oid maps we define, "oid" is the set and "oid_map" is the map. As the bug in the previous commit shows, it's easy to pick the wrong one. So let's make the names more distinct: "oid_set" and "oid_map". An alternative naming scheme would be to actually name the type after the key/value types. So e.g., "oid" _would_ be the set, since it has no value type. And "oid_map" would become "oid_void" or similar (and "oid_pos" becomes "oid_int"). That's better in some ways: it's more regular, and a given map type can be more reasily reused in multiple contexts (e.g., something storing an "int" that isn't a "pos"). But it's also slightly less descriptive. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- khash.h | 2 +- oidset.c | 12 ++++++------ oidset.h | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/khash.h b/khash.h index 64d4eeb2bd..ae9f78347f 100644 --- a/khash.h +++ b/khash.h @@ -342,7 +342,7 @@ static inline int oid_equal(struct object_id a, struct object_id b) return oideq(&a, &b); } -KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal) +KHASH_INIT(oid_set, struct object_id, int, 0, oid_hash, oid_equal) KHASH_INIT(oid_map, struct object_id, void *, 1, oid_hash, oid_equal) diff --git a/oidset.c b/oidset.c index fe4eb921df..8bdecb13de 100644 --- a/oidset.c +++ b/oidset.c @@ -5,33 +5,33 @@ void oidset_init(struct oidset *set, size_t initial_size) { memset(&set->set, 0, sizeof(set->set)); if (initial_size) - kh_resize_oid(&set->set, initial_size); + kh_resize_oid_set(&set->set, initial_size); } int oidset_contains(const struct oidset *set, const struct object_id *oid) { - khiter_t pos = kh_get_oid(&set->set, *oid); + khiter_t pos = kh_get_oid_set(&set->set, *oid); return pos != kh_end(&set->set); } int oidset_insert(struct oidset *set, const struct object_id *oid) { int added; - kh_put_oid(&set->set, *oid, &added); + kh_put_oid_set(&set->set, *oid, &added); return !added; } int oidset_remove(struct oidset *set, const struct object_id *oid) { - khiter_t pos = kh_get_oid(&set->set, *oid); + khiter_t pos = kh_get_oid_set(&set->set, *oid); if (pos == kh_end(&set->set)) return 0; - kh_del_oid(&set->set, pos); + kh_del_oid_set(&set->set, pos); return 1; } void oidset_clear(struct oidset *set) { - kh_release_oid(&set->set); + kh_release_oid_set(&set->set); oidset_init(set, 0); } diff --git a/oidset.h b/oidset.h index 14f18f791f..505fad578b 100644 --- a/oidset.h +++ b/oidset.h @@ -20,7 +20,7 @@ * A single oidset; should be zero-initialized (or use OIDSET_INIT). */ struct oidset { - kh_oid_t set; + kh_oid_set_t set; }; #define OIDSET_INIT { { 0 } } @@ -62,7 +62,7 @@ int oidset_remove(struct oidset *set, const struct object_id *oid); void oidset_clear(struct oidset *set); struct oidset_iter { - kh_oid_t *set; + kh_oid_set_t *set; khiter_t iter; }; From f8e56da97df846e67508eaf26d11fd007e1b75c1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:32 -0400 Subject: [PATCH 12/17] delta-islands: convert island_marks khash to use oids All of the users of this map have an actual "struct object_id" rather than a bare sha1. Let's use the more descriptive type (and get one step closer to dropping khash_sha1 entirely). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- delta-islands.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/delta-islands.c b/delta-islands.c index 5f3ab914f5..88d102298c 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -22,7 +22,7 @@ KHASH_INIT(str, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal) -static khash_sha1 *island_marks; +static kh_oid_map_t *island_marks; static unsigned island_counter; static unsigned island_counter_core; @@ -105,7 +105,7 @@ int in_same_island(const struct object_id *trg_oid, const struct object_id *src_ * If we don't have a bitmap for the target, we can delta it * against anything -- it's not an important object */ - trg_pos = kh_get_sha1(island_marks, trg_oid->hash); + trg_pos = kh_get_oid_map(island_marks, *trg_oid); if (trg_pos >= kh_end(island_marks)) return 1; @@ -113,7 +113,7 @@ int in_same_island(const struct object_id *trg_oid, const struct object_id *src_ * if the source (our delta base) doesn't have a bitmap, * we don't want to base any deltas on it! */ - src_pos = kh_get_sha1(island_marks, src_oid->hash); + src_pos = kh_get_oid_map(island_marks, *src_oid); if (src_pos >= kh_end(island_marks)) return 0; @@ -129,11 +129,11 @@ int island_delta_cmp(const struct object_id *a, const struct object_id *b) if (!island_marks) return 0; - a_pos = kh_get_sha1(island_marks, a->hash); + a_pos = kh_get_oid_map(island_marks, *a); if (a_pos < kh_end(island_marks)) a_bitmap = kh_value(island_marks, a_pos); - b_pos = kh_get_sha1(island_marks, b->hash); + b_pos = kh_get_oid_map(island_marks, *b); if (b_pos < kh_end(island_marks)) b_bitmap = kh_value(island_marks, b_pos); @@ -154,7 +154,7 @@ static struct island_bitmap *create_or_get_island_marks(struct object *obj) khiter_t pos; int hash_ret; - pos = kh_put_sha1(island_marks, obj->oid.hash, &hash_ret); + pos = kh_put_oid_map(island_marks, obj->oid, &hash_ret); if (hash_ret) kh_value(island_marks, pos) = island_bitmap_new(NULL); @@ -167,7 +167,7 @@ static void set_island_marks(struct object *obj, struct island_bitmap *marks) khiter_t pos; int hash_ret; - pos = kh_put_sha1(island_marks, obj->oid.hash, &hash_ret); + pos = kh_put_oid_map(island_marks, obj->oid, &hash_ret); if (hash_ret) { /* * We don't have one yet; make a copy-on-write of the @@ -279,7 +279,7 @@ void resolve_tree_islands(struct repository *r, struct name_entry entry; khiter_t pos; - pos = kh_get_sha1(island_marks, ent->idx.oid.hash); + pos = kh_get_oid_map(island_marks, ent->idx.oid); if (pos >= kh_end(island_marks)) continue; @@ -456,7 +456,7 @@ static void deduplicate_islands(struct repository *r) void load_delta_islands(struct repository *r) { - island_marks = kh_init_sha1(); + island_marks = kh_init_oid_map(); remote_islands = kh_init_str(); git_config(island_config_callback, NULL); @@ -468,7 +468,7 @@ void load_delta_islands(struct repository *r) void propagate_island_marks(struct commit *commit) { - khiter_t pos = kh_get_sha1(island_marks, commit->object.oid.hash); + khiter_t pos = kh_get_oid_map(island_marks, commit->object.oid); if (pos < kh_end(island_marks)) { struct commit_list *p; @@ -490,7 +490,7 @@ int compute_pack_layers(struct packing_data *to_pack) for (i = 0; i < to_pack->nr_objects; ++i) { struct object_entry *entry = &to_pack->objects[i]; - khiter_t pos = kh_get_sha1(island_marks, entry->idx.oid.hash); + khiter_t pos = kh_get_oid_map(island_marks, entry->idx.oid); oe_set_layer(to_pack, entry, 1); From d2bc62b1fa7f2df247199ed88edff30875ee19bc Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:35 -0400 Subject: [PATCH 13/17] pack-bitmap: convert khash_sha1 maps into kh_oid_map All of the users of our khash_sha1 maps actually have a "struct object_id". Let's use the more descriptive type. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 14 +++++++------- pack-bitmap.c | 8 ++++---- pack-bitmap.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 0637378533..fa78a460c9 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -28,8 +28,8 @@ struct bitmap_writer { struct ewah_bitmap *blobs; struct ewah_bitmap *tags; - khash_sha1 *bitmaps; - khash_sha1 *reused; + kh_oid_map_t *bitmaps; + kh_oid_map_t *reused; struct packing_data *to_pack; struct bitmapped_commit *selected; @@ -175,7 +175,7 @@ add_to_include_set(struct bitmap *base, struct commit *commit) if (bitmap_get(base, bitmap_pos)) return 0; - hash_pos = kh_get_sha1(writer.bitmaps, commit->object.oid.hash); + hash_pos = kh_get_oid_map(writer.bitmaps, commit->object.oid); if (hash_pos < kh_end(writer.bitmaps)) { struct bitmapped_commit *bc = kh_value(writer.bitmaps, hash_pos); bitmap_or_ewah(base, bc->bitmap); @@ -256,7 +256,7 @@ void bitmap_writer_build(struct packing_data *to_pack) struct bitmap *base = bitmap_new(); struct rev_info revs; - writer.bitmaps = kh_init_sha1(); + writer.bitmaps = kh_init_oid_map(); writer.to_pack = to_pack; if (writer.show_progress) @@ -311,7 +311,7 @@ void bitmap_writer_build(struct packing_data *to_pack) if (i >= reuse_after) stored->flags |= BITMAP_FLAG_REUSE; - hash_pos = kh_put_sha1(writer.bitmaps, object->oid.hash, &hash_ret); + hash_pos = kh_put_oid_map(writer.bitmaps, object->oid, &hash_ret); if (hash_ret == 0) die("Duplicate entry when writing index: %s", oid_to_hex(&object->oid)); @@ -366,7 +366,7 @@ void bitmap_writer_reuse_bitmaps(struct packing_data *to_pack) if (!(bitmap_git = prepare_bitmap_git(to_pack->repo))) return; - writer.reused = kh_init_sha1(); + writer.reused = kh_init_oid_map(); rebuild_existing_bitmaps(bitmap_git, to_pack, writer.reused, writer.show_progress); /* @@ -382,7 +382,7 @@ static struct ewah_bitmap *find_reused_bitmap(const struct object_id *oid) if (!writer.reused) return NULL; - hash_pos = kh_get_sha1(writer.reused, oid->hash); + hash_pos = kh_get_oid_map(writer.reused, *oid); if (hash_pos >= kh_end(writer.reused)) return NULL; diff --git a/pack-bitmap.c b/pack-bitmap.c index 998133588f..ed2befaac6 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1041,7 +1041,7 @@ static int rebuild_bitmap(uint32_t *reposition, int rebuild_existing_bitmaps(struct bitmap_index *bitmap_git, struct packing_data *mapping, - khash_sha1 *reused_bitmaps, + kh_oid_map_t *reused_bitmaps, int show_progress) { uint32_t i, num_objects; @@ -1080,9 +1080,9 @@ int rebuild_existing_bitmaps(struct bitmap_index *bitmap_git, if (!rebuild_bitmap(reposition, lookup_stored_bitmap(stored), rebuild)) { - hash_pos = kh_put_sha1(reused_bitmaps, - stored->oid.hash, - &hash_ret); + hash_pos = kh_put_oid_map(reused_bitmaps, + stored->oid, + &hash_ret); kh_value(reused_bitmaps, hash_pos) = bitmap_to_ewah(rebuild); } diff --git a/pack-bitmap.h b/pack-bitmap.h index ee9792264c..00de3ec8e4 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -51,7 +51,7 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *, struct packed_git **packfile, uint32_t *entries, off_t *up_to); int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping, - khash_sha1 *reused_bitmaps, int show_progress); + kh_oid_map_t *reused_bitmaps, int show_progress); void free_bitmap_index(struct bitmap_index *); /* From 685d34a96e856fd0d7f3be99cc3ca60b3b7ce185 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:38 -0400 Subject: [PATCH 14/17] khash: drop sha1-specific map types All of the callers of khash_sha1 and khash_sha1_pos have been removed, in favor of using maps that use "struct object_id" as their keys. Let's drop these now-obsolete types. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- khash.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/khash.h b/khash.h index ae9f78347f..cb2cd3d7e4 100644 --- a/khash.h +++ b/khash.h @@ -324,14 +324,6 @@ static const double __ac_HASH_UPPER = 0.77; code; \ } } -#define __kh_oid_cmp(a, b) (hashcmp(a, b) == 0) - -KHASH_INIT(sha1, const unsigned char *, void *, 1, sha1hash, __kh_oid_cmp) -typedef kh_sha1_t khash_sha1; - -KHASH_INIT(sha1_pos, const unsigned char *, int, 1, sha1hash, __kh_oid_cmp) -typedef kh_sha1_pos_t khash_sha1_pos; - static inline unsigned int oid_hash(struct object_id oid) { return sha1hash(oid.hash); From e465e7c41d5104b48a16b122a666cbbfa184c25d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:42 -0400 Subject: [PATCH 15/17] khash: rename oid helper functions For use in object_id hash tables, we have oid_hash() and oid_equal(). But these are confusingly similar to the existing oideq() and the oidhash() we plan to add to replace sha1hash(). The big difference from those functions is that rather than accepting a const pointer to the "struct object_id", we take the arguments by value (which is a khash internal convention). So let's make that obvious by calling them oidhash_by_value() and oideq_by_value(). Those names are fairly horrendous to type, but we rarely need to do so; they are passed to the khash implementation macro and then only used internally. Callers get to use the nice kh_put_oid_map(), etc. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- khash.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/khash.h b/khash.h index cb2cd3d7e4..f911d2b005 100644 --- a/khash.h +++ b/khash.h @@ -324,20 +324,20 @@ static const double __ac_HASH_UPPER = 0.77; code; \ } } -static inline unsigned int oid_hash(struct object_id oid) +static inline unsigned int oidhash_by_value(struct object_id oid) { return sha1hash(oid.hash); } -static inline int oid_equal(struct object_id a, struct object_id b) +static inline int oideq_by_value(struct object_id a, struct object_id b) { return oideq(&a, &b); } -KHASH_INIT(oid_set, struct object_id, int, 0, oid_hash, oid_equal) +KHASH_INIT(oid_set, struct object_id, int, 0, oidhash_by_value, oideq_by_value) -KHASH_INIT(oid_map, struct object_id, void *, 1, oid_hash, oid_equal) +KHASH_INIT(oid_map, struct object_id, void *, 1, oidhash_by_value, oideq_by_value) -KHASH_INIT(oid_pos, struct object_id, int, 1, oid_hash, oid_equal) +KHASH_INIT(oid_pos, struct object_id, int, 1, oidhash_by_value, oideq_by_value) #endif /* __AC_KHASH_H */ From c0566d78aac1d5f8ff90c75f2d936487bb5a8d84 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:45 -0400 Subject: [PATCH 16/17] hash.h: move object_id definition from cache.h Our hashmap.h helpfully defines a sha1hash() function. But it cannot define a similar oidhash() without including all of cache.h, which itself wants to include hashmap.h! Let's break this circular dependency by moving the definition to hash.h, along with the remaining RAWSZ macros, etc. That will put them with the existing git_hash_algo definition. One alternative would be to move oidhash() into cache.h, but it's already quite bloated. We're better off moving things out than in. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache.h | 24 ------------------------ hash.h | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/cache.h b/cache.h index bf20337ef4..37e0b82064 100644 --- a/cache.h +++ b/cache.h @@ -43,30 +43,6 @@ int git_deflate_end_gently(git_zstream *); int git_deflate(git_zstream *, int flush); unsigned long git_deflate_bound(git_zstream *, unsigned long); -/* The length in bytes and in hex digits of an object name (SHA-1 value). */ -#define GIT_SHA1_RAWSZ 20 -#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ) -/* The block size of SHA-1. */ -#define GIT_SHA1_BLKSZ 64 - -/* The length in bytes and in hex digits of an object name (SHA-256 value). */ -#define GIT_SHA256_RAWSZ 32 -#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ) -/* The block size of SHA-256. */ -#define GIT_SHA256_BLKSZ 64 - -/* The length in byte and in hex digits of the largest possible hash value. */ -#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ -#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ -/* The largest possible block size for any supported hash. */ -#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ - -struct object_id { - unsigned char hash[GIT_MAX_RAWSZ]; -}; - -#define the_hash_algo the_repository->hash_algo - #if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT) #define DTYPE(de) ((de)->d_type) #else diff --git a/hash.h b/hash.h index 661c9f2281..52a4f1a3f4 100644 --- a/hash.h +++ b/hash.h @@ -139,4 +139,28 @@ static inline int hash_algo_by_ptr(const struct git_hash_algo *p) return p - hash_algos; } +/* The length in bytes and in hex digits of an object name (SHA-1 value). */ +#define GIT_SHA1_RAWSZ 20 +#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ) +/* The block size of SHA-1. */ +#define GIT_SHA1_BLKSZ 64 + +/* The length in bytes and in hex digits of an object name (SHA-256 value). */ +#define GIT_SHA256_RAWSZ 32 +#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ) +/* The block size of SHA-256. */ +#define GIT_SHA256_BLKSZ 64 + +/* The length in byte and in hex digits of the largest possible hash value. */ +#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ +#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ +/* The largest possible block size for any supported hash. */ +#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ + +struct object_id { + unsigned char hash[GIT_MAX_RAWSZ]; +}; + +#define the_hash_algo the_repository->hash_algo + #endif From d40abc8e95f75b529feb140178b69a3783c2d108 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:49 -0400 Subject: [PATCH 17/17] hashmap: convert sha1hash() to oidhash() There are no callers left of sha1hash() that do not simply pass the "hash" member of a "struct object_id". Let's get rid of the outdated sha1-specific function and provide one that operates on the whole struct (even though the technique, taking the first few bytes of the hash, will remain the same). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/describe.c | 4 ++-- decorate.c | 2 +- diffcore-rename.c | 2 +- hashmap.h | 8 +++++--- khash.h | 2 +- object.c | 2 +- pack-objects.c | 2 +- patch-ids.c | 2 +- 8 files changed, 13 insertions(+), 11 deletions(-) diff --git a/builtin/describe.c b/builtin/describe.c index 0a5cde00a2..200154297d 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -76,7 +76,7 @@ static int commit_name_neq(const void *unused_cmp_data, static inline struct commit_name *find_commit_name(const struct object_id *peeled) { - return hashmap_get_from_hash(&names, sha1hash(peeled->hash), peeled); + return hashmap_get_from_hash(&names, oidhash(peeled), peeled); } static int replace_name(struct commit_name *e, @@ -123,7 +123,7 @@ static void add_to_known_names(const char *path, if (!e) { e = xmalloc(sizeof(struct commit_name)); oidcpy(&e->peeled, peeled); - hashmap_entry_init(e, sha1hash(peeled->hash)); + hashmap_entry_init(e, oidhash(peeled)); hashmap_add(&names, e); e->path = NULL; } diff --git a/decorate.c b/decorate.c index de31331fa4..a605b1b5f4 100644 --- a/decorate.c +++ b/decorate.c @@ -8,7 +8,7 @@ static unsigned int hash_obj(const struct object *obj, unsigned int n) { - return sha1hash(obj->oid.hash) % n; + return oidhash(&obj->oid) % n; } static void *insert_decoration(struct decoration *n, const struct object *base, void *decoration) diff --git a/diffcore-rename.c b/diffcore-rename.c index 07bd34b631..1e50d491c1 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -266,7 +266,7 @@ static unsigned int hash_filespec(struct repository *r, hash_object_file(filespec->data, filespec->size, "blob", &filespec->oid); } - return sha1hash(filespec->oid.hash); + return oidhash(&filespec->oid); } static int find_identical_files(struct hashmap *srcs, diff --git a/hashmap.h b/hashmap.h index f95593b6cf..8424911566 100644 --- a/hashmap.h +++ b/hashmap.h @@ -1,6 +1,8 @@ #ifndef HASHMAP_H #define HASHMAP_H +#include "hash.h" + /* * Generic implementation of hash-based key-value mappings. * @@ -118,14 +120,14 @@ unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len); * the results will be different on big-endian and little-endian * platforms, so they should not be stored or transferred over the net. */ -static inline unsigned int sha1hash(const unsigned char *sha1) +static inline unsigned int oidhash(const struct object_id *oid) { /* - * Equivalent to 'return *(unsigned int *)sha1;', but safe on + * Equivalent to 'return *(unsigned int *)oid->hash;', but safe on * platforms that don't support unaligned reads. */ unsigned int hash; - memcpy(&hash, sha1, sizeof(hash)); + memcpy(&hash, oid->hash, sizeof(hash)); return hash; } diff --git a/khash.h b/khash.h index f911d2b005..21c2095216 100644 --- a/khash.h +++ b/khash.h @@ -326,7 +326,7 @@ static const double __ac_HASH_UPPER = 0.77; static inline unsigned int oidhash_by_value(struct object_id oid) { - return sha1hash(oid.hash); + return oidhash(&oid); } static inline int oideq_by_value(struct object_id a, struct object_id b) diff --git a/object.c b/object.c index 317647da3e..94db02214a 100644 --- a/object.c +++ b/object.c @@ -61,7 +61,7 @@ int type_from_string_gently(const char *str, ssize_t len, int gentle) */ static unsigned int hash_obj(const struct object_id *oid, unsigned int n) { - return sha1hash(oid->hash) & (n - 1); + return oidhash(oid) & (n - 1); } /* diff --git a/pack-objects.c b/pack-objects.c index 00a5f6e0ec..52560293b6 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -11,7 +11,7 @@ static uint32_t locate_object_entry_hash(struct packing_data *pdata, { uint32_t i, mask = (pdata->index_size - 1); - i = sha1hash(oid->hash) & mask; + i = oidhash(oid) & mask; while (pdata->index[i] > 0) { uint32_t pos = pdata->index[i] - 1; diff --git a/patch-ids.c b/patch-ids.c index f70d396654..e8c150d0c9 100644 --- a/patch-ids.c +++ b/patch-ids.c @@ -83,7 +83,7 @@ static int init_patch_id_entry(struct patch_id *patch, if (commit_patch_id(commit, &ids->diffopts, &header_only_patch_id, 1, 0)) return -1; - hashmap_entry_init(patch, sha1hash(header_only_patch_id.hash)); + hashmap_entry_init(patch, oidhash(&header_only_patch_id)); return 0; }