From 76c23892bcbc5e8fba1f6d844b05752082d0542e Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:04:52 +0000 Subject: [PATCH 01/35] t/lib-submodule-update: use appropriate length constant Instead of using a specific invalid hard-coded object ID, produce one of the appropriate length by using test_oid. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/lib-submodule-update.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/lib-submodule-update.sh b/t/lib-submodule-update.sh index 5b56b23166..1dd17fc03e 100755 --- a/t/lib-submodule-update.sh +++ b/t/lib-submodule-update.sh @@ -139,7 +139,7 @@ create_lib_submodule_repo () { git revert HEAD && git checkout -b invalid_sub1 add_sub1 && - git update-index --cacheinfo 160000 0123456789012345678901234567890123456789 sub1 && + git update-index --cacheinfo 160000 $(test_oid numeric) sub1 && git commit -m "Invalid sub1 commit" && git checkout -b valid_sub1 && git revert HEAD && @@ -196,6 +196,7 @@ test_git_directory_exists() { # the submodule repo if it doesn't exist and configures the most problematic # settings for diff.ignoreSubmodules. prolog () { + test_oid_init && (test -d submodule_update_repo || create_lib_submodule_repo) && test_config_global diff.ignoreSubmodules all && test_config diff.ignoreSubmodules all From 5a8643eff1e59f1cf96da9875d5716e88f480710 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:04:53 +0000 Subject: [PATCH 02/35] khash: move oid hash table definition Move the oid khash table definition to khash.h and define a typedef for it, similar to the one we have for unsigned char pointers. Define variants that are maps as well. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- khash.h | 18 ++++++++++++++++++ oidset.h | 12 ------------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/khash.h b/khash.h index 532109c87f..a09163b3e3 100644 --- a/khash.h +++ b/khash.h @@ -332,4 +332,22 @@ typedef kh_sha1_t khash_sha1; KHASH_INIT(sha1_pos, const unsigned char *, int, 1, sha1hash, __kh_oid_cmp) typedef kh_sha1_pos_t khash_sha1_pos; +static inline unsigned int oid_hash(struct object_id oid) +{ + return sha1hash(oid.hash); +} + +static inline int oid_equal(struct object_id a, struct object_id b) +{ + return oideq(&a, &b); +} + +KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal) + +KHASH_INIT(oid_map, struct object_id, void *, 1, oid_hash, oid_equal) +typedef kh_oid_t khash_oid_map; + +KHASH_INIT(oid_pos, struct object_id, int, 1, oid_hash, oid_equal) +typedef kh_oid_pos_t khash_oid_pos; + #endif /* __AC_KHASH_H */ diff --git a/oidset.h b/oidset.h index c9d0f6d3cc..14f18f791f 100644 --- a/oidset.h +++ b/oidset.h @@ -16,18 +16,6 @@ * table overhead. */ -static inline unsigned int oid_hash(struct object_id oid) -{ - return sha1hash(oid.hash); -} - -static inline int oid_equal(struct object_id a, struct object_id b) -{ - return oideq(&a, &b); -} - -KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal) - /** * A single oidset; should be zero-initialized (or use OIDSET_INIT). */ From 0f4d6cada83dc6bd6b6b24dc0d2b3e6460c645cb Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:04:54 +0000 Subject: [PATCH 03/35] pack-bitmap: make bitmap header handling hash agnostic Increase the checksum field in struct bitmap_disk_header to be GIT_MAX_RAWSZ bytes in length and ensure that we hash the proper number of bytes out when computing the bitmap checksum. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 2 +- pack-bitmap.c | 2 +- pack-bitmap.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 5566e94abe..c82fb01fd7 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -535,7 +535,7 @@ void bitmap_writer_finish(struct pack_idx_entry **index, header.entry_count = htonl(writer.selected_nr); hashcpy(header.checksum, writer.pack_checksum); - hashwrite(f, &header, sizeof(header)); + hashwrite(f, &header, sizeof(header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz); dump_bitmap(f, writer.commits); dump_bitmap(f, writer.trees); dump_bitmap(f, writer.blobs); diff --git a/pack-bitmap.c b/pack-bitmap.c index 4695aaf6b4..b53f37243c 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -163,7 +163,7 @@ static int load_bitmap_header(struct bitmap_index *index) } index->entry_count = ntohl(header->entry_count); - index->map_pos += sizeof(*header); + index->map_pos += sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz; return 0; } diff --git a/pack-bitmap.h b/pack-bitmap.h index 8418ba8c79..344ba23af9 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -14,7 +14,7 @@ struct bitmap_disk_header { uint16_t version; uint16_t options; uint32_t entry_count; - unsigned char checksum[20]; + unsigned char checksum[GIT_MAX_RAWSZ]; }; static const char BITMAP_IDX_SIGNATURE[] = {'B', 'I', 'T', 'M'}; From 53636539d37007099f14249a5dae4c05dc72ace4 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:04:55 +0000 Subject: [PATCH 04/35] pack-bitmap: convert struct stored_bitmap to object_id Convert struct stored_bitmap to use struct object_id. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- pack-bitmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index b53f37243c..c760913cea 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -18,7 +18,7 @@ * commit. */ struct stored_bitmap { - unsigned char sha1[20]; + struct object_id oid; struct ewah_bitmap *root; struct stored_bitmap *xor; int flags; @@ -181,9 +181,9 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, stored->root = root; stored->xor = xor_with; stored->flags = flags; - hashcpy(stored->sha1, sha1); + oidread(&stored->oid, sha1); - hash_pos = kh_put_sha1(index->bitmaps, stored->sha1, &ret); + hash_pos = kh_put_sha1(index->bitmaps, stored->oid.hash, &ret); /* a 0 return code means the insertion succeeded with no changes, * because the SHA1 already existed on the map. this is bad, there @@ -1080,7 +1080,7 @@ int rebuild_existing_bitmaps(struct bitmap_index *bitmap_git, lookup_stored_bitmap(stored), rebuild)) { hash_pos = kh_put_sha1(reused_bitmaps, - stored->sha1, + stored->oid.hash, &hash_ret); kh_value(reused_bitmaps, hash_pos) = bitmap_to_ewah(rebuild); From 0dd4924891664e9e9970e427e84f902491fcd3d3 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:04:56 +0000 Subject: [PATCH 05/35] pack-bitmap: replace sha1_to_hex Replace the uses of sha1_to_hex in the pack bitmap code with hash_to_hex to allow the use of SHA-256 as well. Rename a few variables since they are no longer limited to SHA-1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 6 +++--- pack-bitmap.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index c82fb01fd7..802ed62677 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -142,13 +142,13 @@ static inline void reset_all_seen(void) seen_objects_nr = 0; } -static uint32_t find_object_pos(const unsigned char *sha1) +static uint32_t find_object_pos(const unsigned char *hash) { - struct object_entry *entry = packlist_find(writer.to_pack, sha1, NULL); + struct object_entry *entry = packlist_find(writer.to_pack, hash, NULL); if (!entry) { die("Failed to write bitmap index. Packfile doesn't have full closure " - "(object %s is missing)", sha1_to_hex(sha1)); + "(object %s is missing)", hash_to_hex(hash)); } return oe_in_pack_pos(writer.to_pack, entry); diff --git a/pack-bitmap.c b/pack-bitmap.c index c760913cea..6d6fa68563 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -169,7 +169,7 @@ static int load_bitmap_header(struct bitmap_index *index) static struct stored_bitmap *store_bitmap(struct bitmap_index *index, struct ewah_bitmap *root, - const unsigned char *sha1, + const unsigned char *hash, struct stored_bitmap *xor_with, int flags) { @@ -181,7 +181,7 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, stored->root = root; stored->xor = xor_with; stored->flags = flags; - oidread(&stored->oid, sha1); + oidread(&stored->oid, hash); hash_pos = kh_put_sha1(index->bitmaps, stored->oid.hash, &ret); @@ -189,7 +189,7 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, * because the SHA1 already existed on the map. this is bad, there * shouldn't be duplicated commits in the index */ if (ret == 0) { - error("Duplicate entry in bitmap index: %s", sha1_to_hex(sha1)); + error("Duplicate entry in bitmap index: %s", hash_to_hex(hash)); return NULL; } @@ -805,7 +805,7 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, fprintf(stderr, "Failed to reuse at %d (%016llx)\n", reuse_objects, result->words[i]); - fprintf(stderr, " %s\n", sha1_to_hex(sha1)); + fprintf(stderr, " %s\n", hash_to_hex(sha1)); } #endif From 9941e920e0b5a79c5b7859cb59b9ab866a77b25f Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:04:57 +0000 Subject: [PATCH 06/35] pack-bitmap: switch hard-coded constants to the_hash_algo Switch two hard-coded uses of 20 to references to the_hash_algo. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- pack-bitmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index 6d6fa68563..603492c237 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -138,7 +138,7 @@ static int load_bitmap_header(struct bitmap_index *index) { struct bitmap_disk_header *header = (void *)index->map; - if (index->map_size < sizeof(*header) + 20) + if (index->map_size < sizeof(*header) + the_hash_algo->rawsz) return error("Corrupted bitmap index (missing header data)"); if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0) @@ -157,7 +157,7 @@ static int load_bitmap_header(struct bitmap_index *index) "(Git requires BITMAP_OPT_FULL_DAG)"); if (flags & BITMAP_OPT_HASH_CACHE) { - unsigned char *end = index->map + index->map_size - 20; + unsigned char *end = index->map + index->map_size - the_hash_algo->rawsz; index->hashes = ((uint32_t *)end) - index->pack->num_objects; } } From 3c7714485dc8adc810b6c52058992cfc767dfcb5 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:04:58 +0000 Subject: [PATCH 07/35] pack-bitmap: switch hash tables to use struct object_id Instead of storing unsigned char pointers in the hash tables, switch to storing instances of struct object_id. Update several internal functions and one external function to take pointers to struct object_id. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 6 ++--- pack-bitmap.c | 58 +++++++++++++++++++++--------------------- pack-bitmap.h | 2 +- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index a154fc29f6..2d9a3bdc9d 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1487,6 +1487,7 @@ static int can_reuse_delta(const unsigned char *base_sha1, struct object_entry **base_out) { struct object_entry *base; + struct object_id base_oid; if (!base_sha1) return 0; @@ -1508,10 +1509,9 @@ static int can_reuse_delta(const unsigned char *base_sha1, * even if it was buried too deep in history to make it into the * packing list. */ - if (thin && bitmap_has_sha1_in_uninteresting(bitmap_git, base_sha1)) { + oidread(&base_oid, base_sha1); + if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, &base_oid)) { if (use_delta_islands) { - struct object_id base_oid; - hashcpy(base_oid.hash, base_sha1); if (!in_same_island(&delta->idx.oid, &base_oid)) return 0; } diff --git a/pack-bitmap.c b/pack-bitmap.c index 603492c237..70d51f4f50 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -60,8 +60,8 @@ struct bitmap_index { struct ewah_bitmap *blobs; struct ewah_bitmap *tags; - /* Map from SHA1 -> `stored_bitmap` for all the bitmapped commits */ - khash_sha1 *bitmaps; + /* Map from object ID -> `stored_bitmap` for all the bitmapped commits */ + kh_oid_map_t *bitmaps; /* Number of bitmapped commits */ uint32_t entry_count; @@ -80,7 +80,7 @@ struct bitmap_index { struct object **objects; uint32_t *hashes; uint32_t count, alloc; - khash_sha1_pos *positions; + kh_oid_pos_t *positions; } ext_index; /* Bitmap result of the last performed walk */ @@ -183,7 +183,7 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, stored->flags = flags; oidread(&stored->oid, hash); - hash_pos = kh_put_sha1(index->bitmaps, stored->oid.hash, &ret); + hash_pos = kh_put_oid_map(index->bitmaps, stored->oid, &ret); /* a 0 return code means the insertion succeeded with no changes, * because the SHA1 already existed on the map. this is bad, there @@ -306,8 +306,8 @@ static int load_pack_bitmap(struct bitmap_index *bitmap_git) { assert(bitmap_git->map); - bitmap_git->bitmaps = kh_init_sha1(); - bitmap_git->ext_index.positions = kh_init_sha1_pos(); + bitmap_git->bitmaps = kh_init_oid_map(); + bitmap_git->ext_index.positions = kh_init_oid_pos(); load_pack_revindex(bitmap_git->pack); if (!(bitmap_git->commits = read_bitmap_1(bitmap_git)) || @@ -362,10 +362,10 @@ struct include_data { }; static inline int bitmap_position_extended(struct bitmap_index *bitmap_git, - const unsigned char *sha1) + const struct object_id *oid) { - khash_sha1_pos *positions = bitmap_git->ext_index.positions; - khiter_t pos = kh_get_sha1_pos(positions, sha1); + khash_oid_pos *positions = bitmap_git->ext_index.positions; + khiter_t pos = kh_get_oid_pos(positions, *oid); if (pos < kh_end(positions)) { int bitmap_pos = kh_value(positions, pos); @@ -376,9 +376,9 @@ static inline int bitmap_position_extended(struct bitmap_index *bitmap_git, } static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git, - const unsigned char *sha1) + const struct object_id *oid) { - off_t offset = find_pack_entry_one(sha1, bitmap_git->pack); + off_t offset = find_pack_entry_one(oid->hash, bitmap_git->pack); if (!offset) return -1; @@ -386,10 +386,10 @@ static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git, } static int bitmap_position(struct bitmap_index *bitmap_git, - const unsigned char *sha1) + const struct object_id *oid) { - int pos = bitmap_position_packfile(bitmap_git, sha1); - return (pos >= 0) ? pos : bitmap_position_extended(bitmap_git, sha1); + int pos = bitmap_position_packfile(bitmap_git, oid); + return (pos >= 0) ? pos : bitmap_position_extended(bitmap_git, oid); } static int ext_index_add_object(struct bitmap_index *bitmap_git, @@ -401,7 +401,7 @@ static int ext_index_add_object(struct bitmap_index *bitmap_git, int hash_ret; int bitmap_pos; - hash_pos = kh_put_sha1_pos(eindex->positions, object->oid.hash, &hash_ret); + hash_pos = kh_put_oid_pos(eindex->positions, object->oid, &hash_ret); if (hash_ret > 0) { if (eindex->count >= eindex->alloc) { eindex->alloc = (eindex->alloc + 16) * 3 / 2; @@ -431,7 +431,7 @@ static void show_object(struct object *object, const char *name, void *data_) struct bitmap_show_data *data = data_; int bitmap_pos; - bitmap_pos = bitmap_position(data->bitmap_git, object->oid.hash); + bitmap_pos = bitmap_position(data->bitmap_git, &object->oid); if (bitmap_pos < 0) bitmap_pos = ext_index_add_object(data->bitmap_git, object, @@ -446,7 +446,7 @@ static void show_commit(struct commit *commit, void *data) static int add_to_include_set(struct bitmap_index *bitmap_git, struct include_data *data, - const unsigned char *sha1, + const struct object_id *oid, int bitmap_pos) { khiter_t hash_pos; @@ -457,7 +457,7 @@ static int add_to_include_set(struct bitmap_index *bitmap_git, if (bitmap_get(data->base, bitmap_pos)) return 0; - hash_pos = kh_get_sha1(bitmap_git->bitmaps, sha1); + hash_pos = kh_get_oid_map(bitmap_git->bitmaps, *oid); if (hash_pos < kh_end(bitmap_git->bitmaps)) { struct stored_bitmap *st = kh_value(bitmap_git->bitmaps, hash_pos); bitmap_or_ewah(data->base, lookup_stored_bitmap(st)); @@ -473,13 +473,13 @@ static int should_include(struct commit *commit, void *_data) struct include_data *data = _data; int bitmap_pos; - bitmap_pos = bitmap_position(data->bitmap_git, commit->object.oid.hash); + bitmap_pos = bitmap_position(data->bitmap_git, &commit->object.oid); if (bitmap_pos < 0) bitmap_pos = ext_index_add_object(data->bitmap_git, (struct object *)commit, NULL); - if (!add_to_include_set(data->bitmap_git, data, commit->object.oid.hash, + if (!add_to_include_set(data->bitmap_git, data, &commit->object.oid, bitmap_pos)) { struct commit_list *parent = commit->parents; @@ -517,7 +517,7 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, roots = roots->next; if (object->type == OBJ_COMMIT) { - khiter_t pos = kh_get_sha1(bitmap_git->bitmaps, object->oid.hash); + khiter_t pos = kh_get_oid_map(bitmap_git->bitmaps, object->oid); if (pos < kh_end(bitmap_git->bitmaps)) { struct stored_bitmap *st = kh_value(bitmap_git->bitmaps, pos); @@ -559,7 +559,7 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, int pos; roots = roots->next; - pos = bitmap_position(bitmap_git, object->oid.hash); + pos = bitmap_position(bitmap_git, &object->oid); if (pos < 0 || base == NULL || !bitmap_get(base, pos)) { object->flags &= ~UNINTERESTING; @@ -925,7 +925,7 @@ static void test_show_object(struct object *object, const char *name, struct bitmap_test_data *tdata = data; int bitmap_pos; - bitmap_pos = bitmap_position(tdata->bitmap_git, object->oid.hash); + bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid); if (bitmap_pos < 0) die("Object not in bitmap: %s\n", oid_to_hex(&object->oid)); @@ -939,7 +939,7 @@ static void test_show_commit(struct commit *commit, void *data) int bitmap_pos; bitmap_pos = bitmap_position(tdata->bitmap_git, - commit->object.oid.hash); + &commit->object.oid); if (bitmap_pos < 0) die("Object not in bitmap: %s\n", oid_to_hex(&commit->object.oid)); @@ -966,7 +966,7 @@ void test_bitmap_walk(struct rev_info *revs) bitmap_git->version, bitmap_git->entry_count); root = revs->pending.objects[0].item; - pos = kh_get_sha1(bitmap_git->bitmaps, root->oid.hash); + pos = kh_get_oid_map(bitmap_git->bitmaps, root->oid); if (pos < kh_end(bitmap_git->bitmaps)) { struct stored_bitmap *st = kh_value(bitmap_git->bitmaps, pos); @@ -1108,7 +1108,7 @@ void free_bitmap_index(struct bitmap_index *b) ewah_pool_free(b->trees); ewah_pool_free(b->blobs); ewah_pool_free(b->tags); - kh_destroy_sha1(b->bitmaps); + kh_destroy_oid_map(b->bitmaps); free(b->ext_index.objects); free(b->ext_index.hashes); bitmap_free(b->result); @@ -1116,8 +1116,8 @@ void free_bitmap_index(struct bitmap_index *b) free(b); } -int bitmap_has_sha1_in_uninteresting(struct bitmap_index *bitmap_git, - const unsigned char *sha1) +int bitmap_has_oid_in_uninteresting(struct bitmap_index *bitmap_git, + const struct object_id *oid) { int pos; @@ -1126,7 +1126,7 @@ int bitmap_has_sha1_in_uninteresting(struct bitmap_index *bitmap_git, if (!bitmap_git->haves) return 0; /* walk had no "haves" */ - pos = bitmap_position_packfile(bitmap_git, sha1); + pos = bitmap_position_packfile(bitmap_git, oid); if (pos < 0) return 0; diff --git a/pack-bitmap.h b/pack-bitmap.h index 344ba23af9..ee9792264c 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -59,7 +59,7 @@ void free_bitmap_index(struct bitmap_index *); * queried to see if a particular object was reachable from any of the * objects flagged as UNINTERESTING. */ -int bitmap_has_sha1_in_uninteresting(struct bitmap_index *, const unsigned char *sha1); +int bitmap_has_oid_in_uninteresting(struct bitmap_index *, const struct object_id *oid); void bitmap_writer_show_progress(int show); void bitmap_writer_set_checksum(unsigned char *sha1); From db1ba2a2302e7942981c70f9356c70e21e3f7bc7 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:04:59 +0000 Subject: [PATCH 08/35] submodule: avoid hard-coded constants Instead of using hard-coded 40-based constants, express these values in terms of the_hash_algo and GIT_MAX_HEXSZ. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- merge-recursive.c | 2 +- submodule.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/merge-recursive.c b/merge-recursive.c index 6c40c61c47..6126773a7b 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -1122,7 +1122,7 @@ static int find_first_merges(struct repository *repo, struct commit *commit; int contains_another; - char merged_revision[42]; + char merged_revision[GIT_MAX_HEXSZ + 2]; const char *rev_args[] = { "rev-list", "--merges", "--ancestry-path", "--all", merged_revision, NULL }; struct rev_info revs; diff --git a/submodule.c b/submodule.c index 21cf50ca15..150d955899 100644 --- a/submodule.c +++ b/submodule.c @@ -994,7 +994,7 @@ static int submodule_needs_pushing(struct repository *r, if (start_command(&cp)) die("Could not run 'git rev-list --not --remotes -n 1' command in submodule %s", path); - if (strbuf_read(&buf, cp.out, 41)) + if (strbuf_read(&buf, cp.out, the_hash_algo->hexsz + 1)) needs_pushing = 1; finish_command(&cp); close(cp.out); From 22350307550c8503ef1738f911cabba63c3a4901 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:00 +0000 Subject: [PATCH 09/35] notes-merge: switch to use the_hash_algo Switch from using GIT_SHA1_HEXSZ to GIT_MAX_HEXSZ and the_hash_algo so that the code works with any hash algorithm. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- notes-merge.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/notes-merge.c b/notes-merge.c index 280aa8e6c1..2fe724f1cf 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -29,14 +29,14 @@ void init_notes_merge_options(struct repository *r, static int path_to_oid(const char *path, struct object_id *oid) { - char hex_oid[GIT_SHA1_HEXSZ]; + char hex_oid[GIT_MAX_HEXSZ]; int i = 0; - while (*path && i < GIT_SHA1_HEXSZ) { + while (*path && i < the_hash_algo->hexsz) { if (*path != '/') hex_oid[i++] = *path; path++; } - if (*path || i != GIT_SHA1_HEXSZ) + if (*path || i != the_hash_algo->hexsz) return -1; return get_oid_hex(hex_oid, oid); } From dd43745131ca20f728746c37ffbbf1f8946052d1 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:01 +0000 Subject: [PATCH 10/35] notes: make hash size independent Switch out various uses of the GIT_SHA1_* constants with GIT_MAX_* constants for allocations and the_hash_algo for general parsing. Update a comment to no longer be SHA-1 specific. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- notes.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/notes.c b/notes.c index 7f7cc4d511..5457bd70c6 100644 --- a/notes.c +++ b/notes.c @@ -67,8 +67,9 @@ struct non_note { #define GET_NIBBLE(n, sha1) ((((sha1)[(n) >> 1]) >> ((~(n) & 0x01) << 2)) & 0x0f) -#define KEY_INDEX (GIT_SHA1_RAWSZ - 1) -#define FANOUT_PATH_SEPARATORS ((GIT_SHA1_HEXSZ / 2) - 1) +#define KEY_INDEX (the_hash_algo->rawsz - 1) +#define FANOUT_PATH_SEPARATORS (the_hash_algo->rawsz - 1) +#define FANOUT_PATH_SEPARATORS_MAX ((GIT_MAX_HEXSZ / 2) - 1) #define SUBTREE_SHA1_PREFIXCMP(key_sha1, subtree_sha1) \ (memcmp(key_sha1, subtree_sha1, subtree_sha1[KEY_INDEX])) @@ -198,7 +199,7 @@ static void note_tree_remove(struct notes_tree *t, struct leaf_node *entry) { struct leaf_node *l; - struct int_node *parent_stack[GIT_SHA1_RAWSZ]; + struct int_node *parent_stack[GIT_MAX_RAWSZ]; unsigned char i, j; void **p = note_tree_search(t, &tree, &n, entry->key_oid.hash); @@ -394,6 +395,7 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree, void *buf; struct tree_desc desc; struct name_entry entry; + const unsigned hashsz = the_hash_algo->rawsz; buf = fill_tree_descriptor(&desc, &subtree->val_oid); if (!buf) @@ -401,7 +403,7 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree, oid_to_hex(&subtree->val_oid)); prefix_len = subtree->key_oid.hash[KEY_INDEX]; - if (prefix_len >= GIT_SHA1_RAWSZ) + if (prefix_len >= hashsz) BUG("prefix_len (%"PRIuMAX") is out of range", (uintmax_t)prefix_len); if (prefix_len * 2 < n) BUG("prefix_len (%"PRIuMAX") is too small", (uintmax_t)prefix_len); @@ -411,7 +413,7 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree, struct leaf_node *l; size_t path_len = strlen(entry.path); - if (path_len == 2 * (GIT_SHA1_RAWSZ - prefix_len)) { + if (path_len == 2 * (hashsz - prefix_len)) { /* This is potentially the remainder of the SHA-1 */ if (!S_ISREG(entry.mode)) @@ -419,7 +421,7 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree, goto handle_non_note; if (hex_to_bytes(object_oid.hash + prefix_len, entry.path, - GIT_SHA1_RAWSZ - prefix_len)) + hashsz - prefix_len)) goto handle_non_note; /* entry.path is not a SHA1 */ type = PTR_TYPE_NOTE; @@ -439,7 +441,7 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree, * except for the last byte, where we write * the length: */ - memset(object_oid.hash + len, 0, GIT_SHA1_RAWSZ - len - 1); + memset(object_oid.hash + len, 0, hashsz - len - 1); object_oid.hash[KEY_INDEX] = (unsigned char)len; type = PTR_TYPE_SUBTREE; @@ -527,15 +529,15 @@ static unsigned char determine_fanout(struct int_node *tree, unsigned char n, return fanout + 1; } -/* hex SHA1 + 19 * '/' + NUL */ -#define FANOUT_PATH_MAX GIT_SHA1_HEXSZ + FANOUT_PATH_SEPARATORS + 1 +/* hex oid + '/' between each pair of hex digits + NUL */ +#define FANOUT_PATH_MAX GIT_MAX_HEXSZ + FANOUT_PATH_SEPARATORS_MAX + 1 static void construct_path_with_fanout(const unsigned char *sha1, unsigned char fanout, char *path) { unsigned int i = 0, j = 0; const char *hex_sha1 = sha1_to_hex(sha1); - assert(fanout < GIT_SHA1_RAWSZ); + assert(fanout < the_hash_algo->rawsz); while (fanout) { path[i++] = hex_sha1[j++]; path[i++] = hex_sha1[j++]; @@ -637,10 +639,10 @@ static inline int matches_tree_write_stack(struct tree_write_stack *tws, static void write_tree_entry(struct strbuf *buf, unsigned int mode, const char *path, unsigned int path_len, const - unsigned char *sha1) + unsigned char *hash) { strbuf_addf(buf, "%o %.*s%c", mode, path_len, path, '\0'); - strbuf_add(buf, sha1, GIT_SHA1_RAWSZ); + strbuf_add(buf, hash, the_hash_algo->rawsz); } static void tree_write_stack_init_subtree(struct tree_write_stack *tws, @@ -652,7 +654,7 @@ static void tree_write_stack_init_subtree(struct tree_write_stack *tws, n = (struct tree_write_stack *) xmalloc(sizeof(struct tree_write_stack)); n->next = NULL; - strbuf_init(&n->buf, 256 * (32 + GIT_SHA1_HEXSZ)); /* assume 256 entries per tree */ + strbuf_init(&n->buf, 256 * (32 + the_hash_algo->hexsz)); /* assume 256 entries per tree */ n->path[0] = n->path[1] = '\0'; tws->next = n; tws->path[0] = path[0]; @@ -757,7 +759,7 @@ static int write_each_note(const struct object_id *object_oid, note_path[note_path_len] = '\0'; mode = 040000; } - assert(note_path_len <= GIT_SHA1_HEXSZ + FANOUT_PATH_SEPARATORS); + assert(note_path_len <= GIT_MAX_HEXSZ + FANOUT_PATH_SEPARATORS); /* Weave non-note entries into note entries */ return write_each_non_note_until(note_path, d) || @@ -1137,7 +1139,7 @@ int write_notes_tree(struct notes_tree *t, struct object_id *result) /* Prepare for traversal of current notes tree */ root.next = NULL; /* last forward entry in list is grounded */ - strbuf_init(&root.buf, 256 * (32 + GIT_SHA1_HEXSZ)); /* assume 256 entries */ + strbuf_init(&root.buf, 256 * (32 + the_hash_algo->hexsz)); /* assume 256 entries */ root.path[0] = root.path[1] = '\0'; cb_data.root = &root; cb_data.next_non_note = t->first_non_note; From 0dbc6462ee8853da1ecca7b50cd0c0c9bc62e25b Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:02 +0000 Subject: [PATCH 11/35] notes: replace sha1_to_hex Replace the uses of sha1_to_hex in this function with hash_to_hex to allow the use of SHA-256 as well. Rename some variables since this code is no longer limited to SHA-1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- notes.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/notes.c b/notes.c index 5457bd70c6..be72780a8f 100644 --- a/notes.c +++ b/notes.c @@ -532,19 +532,19 @@ static unsigned char determine_fanout(struct int_node *tree, unsigned char n, /* hex oid + '/' between each pair of hex digits + NUL */ #define FANOUT_PATH_MAX GIT_MAX_HEXSZ + FANOUT_PATH_SEPARATORS_MAX + 1 -static void construct_path_with_fanout(const unsigned char *sha1, +static void construct_path_with_fanout(const unsigned char *hash, unsigned char fanout, char *path) { unsigned int i = 0, j = 0; - const char *hex_sha1 = sha1_to_hex(sha1); + const char *hex_hash = hash_to_hex(hash); assert(fanout < the_hash_algo->rawsz); while (fanout) { - path[i++] = hex_sha1[j++]; - path[i++] = hex_sha1[j++]; + path[i++] = hex_hash[j++]; + path[i++] = hex_hash[j++]; path[i++] = '/'; fanout--; } - xsnprintf(path + i, FANOUT_PATH_MAX - i, "%s", hex_sha1 + j); + xsnprintf(path + i, FANOUT_PATH_MAX - i, "%s", hex_hash + j); } static int for_each_note_helper(struct notes_tree *t, struct int_node *tree, @@ -1167,7 +1167,7 @@ void prune_notes(struct notes_tree *t, int flags) while (l) { if (flags & NOTES_PRUNE_VERBOSE) - printf("%s\n", sha1_to_hex(l->sha1)); + printf("%s\n", hash_to_hex(l->sha1)); if (!(flags & NOTES_PRUNE_DRYRUN)) remove_note(t, l->sha1); l = l->next; From 538b1523246ba0845564a6b703c6e4ff1921c16a Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:03 +0000 Subject: [PATCH 12/35] object-store: rename and expand packed_git's sha1 member This member is used to represent the pack checksum of the pack in question. Expand this member to be GIT_MAX_RAWSZ bytes in length so it works with longer hashes and rename it to be "hash" instead of "sha1". This transformation was made with a change to the definition and the following semantic patch: @@ struct packed_git *E1; @@ - E1->sha1 + E1->hash @@ struct packed_git E1; @@ - E1.sha1 + E1.hash Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/pack-redundant.c | 2 +- fast-import.c | 17 +++++++++-------- http-push.c | 3 ++- http-walker.c | 2 +- http.c | 13 +++++++------ object-store.h | 2 +- packfile.c | 6 +++--- 7 files changed, 24 insertions(+), 21 deletions(-) diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index 68c1e547c2..178e3409b7 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -641,7 +641,7 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix) pl = red = pack_list_difference(local_packs, min); while (pl) { printf("%s\n%s\n", - sha1_pack_index_name(pl->pack->sha1), + sha1_pack_index_name(pl->pack->hash), pl->pack->pack_name); pl = pl->next; } diff --git a/fast-import.c b/fast-import.c index b7ba755c2b..7c9a10a77b 100644 --- a/fast-import.c +++ b/fast-import.c @@ -742,7 +742,8 @@ static const char *create_index(void) if (c != last) die("internal consistency error creating the index"); - tmpfile = write_idx_file(NULL, idx, object_count, &pack_idx_opts, pack_data->sha1); + tmpfile = write_idx_file(NULL, idx, object_count, &pack_idx_opts, + pack_data->hash); free(idx); return tmpfile; } @@ -753,7 +754,7 @@ static char *keep_pack(const char *curr_index_name) struct strbuf name = STRBUF_INIT; int keep_fd; - odb_pack_name(&name, pack_data->sha1, "keep"); + odb_pack_name(&name, pack_data->hash, "keep"); keep_fd = odb_pack_keep(name.buf); if (keep_fd < 0) die_errno("cannot create keep file"); @@ -761,11 +762,11 @@ static char *keep_pack(const char *curr_index_name) if (close(keep_fd)) die_errno("failed to write keep file"); - odb_pack_name(&name, pack_data->sha1, "pack"); + odb_pack_name(&name, pack_data->hash, "pack"); if (finalize_object_file(pack_data->pack_name, name.buf)) die("cannot store pack file"); - odb_pack_name(&name, pack_data->sha1, "idx"); + odb_pack_name(&name, pack_data->hash, "idx"); if (finalize_object_file(curr_index_name, name.buf)) die("cannot store index file"); free((void *)curr_index_name); @@ -779,7 +780,7 @@ static void unkeep_all_packs(void) for (k = 0; k < pack_id; k++) { struct packed_git *p = all_packs[k]; - odb_pack_name(&name, p->sha1, "keep"); + odb_pack_name(&name, p->hash, "keep"); unlink_or_warn(name.buf); } strbuf_release(&name); @@ -821,9 +822,9 @@ static void end_packfile(void) close_pack_windows(pack_data); finalize_hashfile(pack_file, cur_pack_oid.hash, 0); - fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1, - pack_data->pack_name, object_count, - cur_pack_oid.hash, pack_size); + fixup_pack_header_footer(pack_data->pack_fd, pack_data->hash, + pack_data->pack_name, object_count, + cur_pack_oid.hash, pack_size); if (object_count <= unpack_limit) { if (!loosen_small_pack(pack_data)) { diff --git a/http-push.c b/http-push.c index b22c7caea0..b313ada515 100644 --- a/http-push.c +++ b/http-push.c @@ -315,7 +315,8 @@ static void start_fetch_packed(struct transfer_request *request) return; } - fprintf(stderr, "Fetching pack %s\n", sha1_to_hex(target->sha1)); + fprintf(stderr, "Fetching pack %s\n", + sha1_to_hex(target->hash)); fprintf(stderr, " which contains %s\n", oid_to_hex(&request->obj->oid)); preq = new_http_pack_request(target, repo->url); diff --git a/http-walker.c b/http-walker.c index 8ae5d76c6a..8063896cf6 100644 --- a/http-walker.c +++ b/http-walker.c @@ -434,7 +434,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne if (walker->get_verbosely) { fprintf(stderr, "Getting pack %s\n", - sha1_to_hex(target->sha1)); + sha1_to_hex(target->hash)); fprintf(stderr, " which contains %s\n", sha1_to_hex(sha1)); } diff --git a/http.c b/http.c index a32ad36ddf..a09adc518f 100644 --- a/http.c +++ b/http.c @@ -2236,10 +2236,10 @@ int finish_http_pack_request(struct http_pack_request *preq) return -1; } - unlink(sha1_pack_index_name(p->sha1)); + unlink(sha1_pack_index_name(p->hash)); - if (finalize_object_file(preq->tmpfile.buf, sha1_pack_name(p->sha1)) - || finalize_object_file(tmp_idx, sha1_pack_index_name(p->sha1))) { + if (finalize_object_file(preq->tmpfile.buf, sha1_pack_name(p->hash)) + || finalize_object_file(tmp_idx, sha1_pack_index_name(p->hash))) { free(tmp_idx); return -1; } @@ -2262,10 +2262,10 @@ struct http_pack_request *new_http_pack_request( end_url_with_slash(&buf, base_url); strbuf_addf(&buf, "objects/pack/pack-%s.pack", - sha1_to_hex(target->sha1)); + sha1_to_hex(target->hash)); preq->url = strbuf_detach(&buf, NULL); - strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(target->sha1)); + strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(target->hash)); preq->packfile = fopen(preq->tmpfile.buf, "a"); if (!preq->packfile) { error("Unable to open local file %s for pack", @@ -2289,7 +2289,8 @@ struct http_pack_request *new_http_pack_request( if (http_is_verbose) fprintf(stderr, "Resuming fetch of pack %s at byte %"PRIuMAX"\n", - sha1_to_hex(target->sha1), (uintmax_t)prev_posn); + sha1_to_hex(target->hash), + (uintmax_t)prev_posn); http_opt_request_remainder(preq->slot->curl, prev_posn); } diff --git a/object-store.h b/object-store.h index 14fc935bd1..56f8aea1cc 100644 --- a/object-store.h +++ b/object-store.h @@ -77,7 +77,7 @@ struct packed_git { freshened:1, do_not_close:1, pack_promisor:1; - unsigned char sha1[20]; + unsigned char hash[GIT_MAX_RAWSZ]; struct revindex_entry *revindex; /* something like ".git/objects/pack/xxxxx.pack" */ char pack_name[FLEX_ARRAY]; /* more */ diff --git a/packfile.c b/packfile.c index 16bcb75262..e6e8861650 100644 --- a/packfile.c +++ b/packfile.c @@ -235,7 +235,7 @@ struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path) struct packed_git *p = alloc_packed_git(alloc); memcpy(p->pack_name, path, alloc); /* includes NUL */ - hashcpy(p->sha1, sha1); + hashcpy(p->hash, sha1); if (check_packed_git_idx(idx_path, p)) { free(p); return NULL; @@ -722,8 +722,8 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local) p->pack_local = local; p->mtime = st.st_mtime; if (path_len < the_hash_algo->hexsz || - get_sha1_hex(path + path_len - the_hash_algo->hexsz, p->sha1)) - hashclr(p->sha1); + get_sha1_hex(path + path_len - the_hash_algo->hexsz, p->hash)) + hashclr(p->hash); return p; } From 1c4675dc57b96f108adcfebb1fcfd67128ae856e Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:04 +0000 Subject: [PATCH 13/35] builtin/name-rev: make hash-size independent Use the_hash_algo when parsing instead of GIT_SHA1_HEXSZ so that this function works with any size hash. Rename the variable forty to counter, as this is a better name and is independent of the hash size. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/name-rev.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/builtin/name-rev.c b/builtin/name-rev.c index f1cb45c227..05ccf53e00 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -361,23 +361,25 @@ static char const * const name_rev_usage[] = { static void name_rev_line(char *p, struct name_ref_data *data) { struct strbuf buf = STRBUF_INIT; - int forty = 0; + int counter = 0; char *p_start; + const unsigned hexsz = the_hash_algo->hexsz; + for (p_start = p; *p; p++) { #define ishex(x) (isdigit((x)) || ((x) >= 'a' && (x) <= 'f')) if (!ishex(*p)) - forty = 0; - else if (++forty == GIT_SHA1_HEXSZ && + counter = 0; + else if (++counter == hexsz && !ishex(*(p+1))) { struct object_id oid; const char *name = NULL; char c = *(p+1); int p_len = p - p_start + 1; - forty = 0; + counter = 0; *(p+1) = 0; - if (!get_oid(p - (GIT_SHA1_HEXSZ - 1), &oid)) { + if (!get_oid(p - (hexsz - 1), &oid)) { struct object *o = lookup_object(the_repository, oid.hash); @@ -390,7 +392,7 @@ static void name_rev_line(char *p, struct name_ref_data *data) continue; if (data->name_only) - printf("%.*s%s", p_len - GIT_SHA1_HEXSZ, p_start, name); + printf("%.*s%s", p_len - hexsz, p_start, name); else printf("%.*s (%s)", p_len, p_start, name); p_start = p + 1; From 28d055bde9436dc1180d58aec2406579ab6d6307 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:05 +0000 Subject: [PATCH 14/35] fast-import: make hash-size independent Replace several uses of GIT_SHA1_HEXSZ and 40-based constants with references to the_hash_algo. Update the note handling code here to compute path sizes based on GIT_MAX_RAWSZ as well. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- fast-import.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/fast-import.c b/fast-import.c index 7c9a10a77b..464db71c75 100644 --- a/fast-import.c +++ b/fast-import.c @@ -29,6 +29,13 @@ */ #define NO_DELTA S_ISUID +/* + * The amount of additional space required in order to write an object into the + * current pack. This is the hash lengths at the end of the pack, plus the + * length of one object ID. + */ +#define PACK_SIZE_THRESHOLD (the_hash_algo->rawsz * 3) + struct object_entry { struct pack_idx_entry idx; struct object_entry *next; @@ -949,8 +956,9 @@ static int store_object( git_deflate_end(&s); /* Determine if we should auto-checkpoint. */ - if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize) - || (pack_size + 60 + s.total_out) < pack_size) { + if ((max_packsize + && (pack_size + PACK_SIZE_THRESHOLD + s.total_out) > max_packsize) + || (pack_size + PACK_SIZE_THRESHOLD + s.total_out) < pack_size) { /* This new object needs to *not* have the current pack_id. */ e->pack_id = pack_id + 1; @@ -1045,8 +1053,9 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) int status = Z_OK; /* Determine if we should auto-checkpoint. */ - if ((max_packsize && (pack_size + 60 + len) > max_packsize) - || (pack_size + 60 + len) < pack_size) + if ((max_packsize + && (pack_size + PACK_SIZE_THRESHOLD + len) > max_packsize) + || (pack_size + PACK_SIZE_THRESHOLD + len) < pack_size) cycle_packfile(); hashfile_checkpoint(pack_file, &checkpoint); @@ -1241,7 +1250,7 @@ static void load_tree(struct tree_entry *root) c += e->name->str_len + 1; hashcpy(e->versions[0].oid.hash, (unsigned char *)c); hashcpy(e->versions[1].oid.hash, (unsigned char *)c); - c += GIT_SHA1_RAWSZ; + c += the_hash_algo->rawsz; } free(buf); } @@ -1288,7 +1297,7 @@ static void mktree(struct tree_content *t, int v, struct strbuf *b) strbuf_addf(b, "%o %s%c", (unsigned int)(e->versions[v].mode & ~NO_DELTA), e->name->str_dat, '\0'); - strbuf_add(b, e->versions[v].oid.hash, GIT_SHA1_RAWSZ); + strbuf_add(b, e->versions[v].oid.hash, the_hash_algo->rawsz); } } @@ -2047,7 +2056,9 @@ static uintmax_t do_change_note_fanout( unsigned int i, tmp_hex_oid_len, tmp_fullpath_len; uintmax_t num_notes = 0; struct object_id oid; - char realpath[60]; + /* hex oid + '/' between each pair of hex digits + NUL */ + char realpath[GIT_MAX_HEXSZ + ((GIT_MAX_HEXSZ / 2) - 1) + 1]; + const unsigned hexsz = the_hash_algo->hexsz; if (!root->tree) load_tree(root); @@ -2067,7 +2078,7 @@ static uintmax_t do_change_note_fanout( * of 2 chars. */ if (!e->versions[1].mode || - tmp_hex_oid_len > GIT_SHA1_HEXSZ || + tmp_hex_oid_len > hexsz || e->name->str_len % 2) continue; @@ -2081,7 +2092,7 @@ static uintmax_t do_change_note_fanout( tmp_fullpath_len += e->name->str_len; fullpath[tmp_fullpath_len] = '\0'; - if (tmp_hex_oid_len == GIT_SHA1_HEXSZ && !get_oid_hex(hex_oid, &oid)) { + if (tmp_hex_oid_len == hexsz && !get_oid_hex(hex_oid, &oid)) { /* This is a note entry */ if (fanout == 0xff) { /* Counting mode, no rename */ @@ -2352,7 +2363,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa struct object_entry *oe; struct branch *s; struct object_id oid, commit_oid; - char path[60]; + char path[GIT_MAX_RAWSZ * 3]; uint16_t inline_data = 0; unsigned char new_fanout; @@ -2405,7 +2416,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa char *buf = read_object_with_reference(&commit_oid, commit_type, &size, &commit_oid); - if (!buf || size < 46) + if (!buf || size < the_hash_algo->hexsz + 6) die("Not a valid commit: %s", p); free(buf); } else @@ -2456,7 +2467,7 @@ static void file_change_deleteall(struct branch *b) static void parse_from_commit(struct branch *b, char *buf, unsigned long size) { - if (!buf || size < GIT_SHA1_HEXSZ + 6) + if (!buf || size < the_hash_algo->hexsz + 6) die("Not a valid commit: %s", oid_to_hex(&b->oid)); if (memcmp("tree ", buf, 5) || get_oid_hex(buf + 5, &b->branch_tree.versions[1].oid)) @@ -2555,7 +2566,7 @@ static struct hash_list *parse_merge(unsigned int *count) char *buf = read_object_with_reference(&n->oid, commit_type, &size, &n->oid); - if (!buf || size < 46) + if (!buf || size < the_hash_algo->hexsz + 6) die("Not a valid commit: %s", from); free(buf); } else @@ -2842,7 +2853,7 @@ static void parse_get_mark(const char *p) die("Unknown mark: %s", command_buf.buf); xsnprintf(output, sizeof(output), "%s\n", oid_to_hex(&oe->idx.oid)); - cat_blob_write(output, GIT_SHA1_HEXSZ + 1); + cat_blob_write(output, the_hash_algo->hexsz + 1); } static void parse_cat_blob(const char *p) @@ -2872,6 +2883,8 @@ static struct object_entry *dereference(struct object_entry *oe, { unsigned long size; char *buf = NULL; + const unsigned hexsz = the_hash_algo->hexsz; + if (!oe) { enum object_type type = oid_object_info(the_repository, oid, NULL); @@ -2905,12 +2918,12 @@ static struct object_entry *dereference(struct object_entry *oe, /* Peel one layer. */ switch (oe->type) { case OBJ_TAG: - if (size < GIT_SHA1_HEXSZ + strlen("object ") || + if (size < hexsz + strlen("object ") || get_oid_hex(buf + strlen("object "), oid)) die("Invalid SHA1 in tag: %s", command_buf.buf); break; case OBJ_COMMIT: - if (size < GIT_SHA1_HEXSZ + strlen("tree ") || + if (size < hexsz + strlen("tree ") || get_oid_hex(buf + strlen("tree "), oid)) die("Invalid SHA1 in commit: %s", command_buf.buf); } From ef479a12bd9e2891c7a1262d95f7d540f79e4a81 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:06 +0000 Subject: [PATCH 15/35] fast-import: replace sha1_to_hex Replace the uses of sha1_to_hex in this function with hash_to_hex to allow the use of SHA-256 as well. Rename a variable since it is no longer limited to SHA-1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- fast-import.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fast-import.c b/fast-import.c index 464db71c75..35ba2629b5 100644 --- a/fast-import.c +++ b/fast-import.c @@ -2955,7 +2955,7 @@ static struct object_entry *parse_treeish_dataref(const char **p) return e; } -static void print_ls(int mode, const unsigned char *sha1, const char *path) +static void print_ls(int mode, const unsigned char *hash, const char *path) { static struct strbuf line = STRBUF_INIT; @@ -2975,7 +2975,7 @@ static void print_ls(int mode, const unsigned char *sha1, const char *path) /* mode SP type SP object_name TAB path LF */ strbuf_reset(&line); strbuf_addf(&line, "%06o %s %s\t", - mode & ~NO_DELTA, type, sha1_to_hex(sha1)); + mode & ~NO_DELTA, type, hash_to_hex(hash)); quote_c_style(path, &line, NULL, 0); strbuf_addch(&line, '\n'); } From 24dd363ed586f5edbdea96689d4e0e40a7d3f7fa Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:07 +0000 Subject: [PATCH 16/35] builtin/am: make hash size independent Instead of using GIT_SHA1_HEXSZ, switch to using the_hash_algo and parse_oid_hex to parse the lines involved in rebasing notes. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/am.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/builtin/am.c b/builtin/am.c index 4fb107a9d1..5556a0651e 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -486,23 +486,24 @@ static int copy_notes_for_rebase(const struct am_state *state) while (!strbuf_getline_lf(&sb, fp)) { struct object_id from_obj, to_obj; + const char *p; - if (sb.len != GIT_SHA1_HEXSZ * 2 + 1) { + if (sb.len != the_hash_algo->hexsz * 2 + 1) { ret = error(invalid_line, sb.buf); goto finish; } - if (get_oid_hex(sb.buf, &from_obj)) { + if (parse_oid_hex(sb.buf, &from_obj, &p)) { ret = error(invalid_line, sb.buf); goto finish; } - if (sb.buf[GIT_SHA1_HEXSZ] != ' ') { + if (*p != ' ') { ret = error(invalid_line, sb.buf); goto finish; } - if (get_oid_hex(sb.buf + GIT_SHA1_HEXSZ + 1, &to_obj)) { + if (get_oid_hex(p + 1, &to_obj)) { ret = error(invalid_line, sb.buf); goto finish; } From fbfc089d913772f96f9562a3dbddaed28809fe72 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:08 +0000 Subject: [PATCH 17/35] builtin/pull: make hash-size independent Instead of using get_oid_hex and GIT_SHA1_HEXSZ, use parse_oid_hex to avoid the need for a constant and simplify the code. Additionally, fix some comments to refer to object IDs instead of SHA-1 and update a constant used to provide an allocation hint. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/pull.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/builtin/pull.c b/builtin/pull.c index 33db889955..9bd6a78081 100644 --- a/builtin/pull.c +++ b/builtin/pull.c @@ -369,9 +369,10 @@ static void get_merge_heads(struct oid_array *merge_heads) fp = xfopen(filename, "r"); while (strbuf_getline_lf(&sb, fp) != EOF) { - if (get_oid_hex(sb.buf, &oid)) - continue; /* invalid line: does not start with SHA1 */ - if (starts_with(sb.buf + GIT_SHA1_HEXSZ, "\tnot-for-merge\t")) + const char *p; + if (parse_oid_hex(sb.buf, &oid, &p)) + continue; /* invalid line: does not start with object ID */ + if (starts_with(p, "\tnot-for-merge\t")) continue; /* ref is not-for-merge */ oid_array_append(merge_heads, &oid); } @@ -760,7 +761,7 @@ static int get_rebase_fork_point(struct object_id *fork_point, const char *repo, cp.no_stderr = 1; cp.git_cmd = 1; - ret = capture_command(&cp, &sb, GIT_SHA1_HEXSZ); + ret = capture_command(&cp, &sb, GIT_MAX_HEXSZ); if (ret) goto cleanup; @@ -805,7 +806,7 @@ static int get_octopus_merge_base(struct object_id *merge_base, } /** - * Given the current HEAD SHA1, the merge head returned from git-fetch and the + * Given the current HEAD oid, the merge head returned from git-fetch and the * fork point calculated by get_rebase_fork_point(), runs git-rebase with the * appropriate arguments and returns its exit status. */ From f024b87a086643bbe6c869af11eee27aaaebb074 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:09 +0000 Subject: [PATCH 18/35] http-push: convert to use the_hash_algo Switch the lock token code to use the_hash_algo and increase its buffers to be allocated using GIT_MAX_* constants. Update the parsing of object paths to use the_hash_algo as well. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- http-push.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/http-push.c b/http-push.c index b313ada515..c4e5142bc9 100644 --- a/http-push.c +++ b/http-push.c @@ -145,7 +145,7 @@ struct remote_lock { char *url; char *owner; char *token; - char tmpfile_suffix[41]; + char tmpfile_suffix[GIT_MAX_HEXSZ + 1]; time_t start_time; long timeout; int refreshing; @@ -399,7 +399,7 @@ static void start_put(struct transfer_request *request) request->dest = strbuf_detach(&buf, NULL); append_remote_object_url(&buf, repo->url, hex, 0); - strbuf_add(&buf, request->lock->tmpfile_suffix, 41); + strbuf_add(&buf, request->lock->tmpfile_suffix, the_hash_algo->hexsz + 1); request->url = strbuf_detach(&buf, NULL); slot = get_active_slot(); @@ -758,8 +758,8 @@ static void handle_lockprop_ctx(struct xml_ctx *ctx, int tag_closed) static void handle_new_lock_ctx(struct xml_ctx *ctx, int tag_closed) { struct remote_lock *lock = (struct remote_lock *)ctx->userData; - git_SHA_CTX sha_ctx; - unsigned char lock_token_sha1[20]; + git_hash_ctx hash_ctx; + unsigned char lock_token_hash[GIT_MAX_RAWSZ]; if (tag_closed && ctx->cdata) { if (!strcmp(ctx->name, DAV_ACTIVELOCK_OWNER)) { @@ -771,12 +771,12 @@ static void handle_new_lock_ctx(struct xml_ctx *ctx, int tag_closed) } else if (!strcmp(ctx->name, DAV_ACTIVELOCK_TOKEN)) { lock->token = xstrdup(ctx->cdata); - git_SHA1_Init(&sha_ctx); - git_SHA1_Update(&sha_ctx, lock->token, strlen(lock->token)); - git_SHA1_Final(lock_token_sha1, &sha_ctx); + the_hash_algo->init_fn(&hash_ctx); + the_hash_algo->update_fn(&hash_ctx, lock->token, strlen(lock->token)); + the_hash_algo->final_fn(lock_token_hash, &hash_ctx); lock->tmpfile_suffix[0] = '_'; - memcpy(lock->tmpfile_suffix + 1, sha1_to_hex(lock_token_sha1), 40); + memcpy(lock->tmpfile_suffix + 1, hash_to_hex(lock_token_hash), the_hash_algo->hexsz); } } } @@ -1018,7 +1018,7 @@ static void remote_ls(const char *path, int flags, /* extract hex from sharded "xx/x{38}" filename */ static int get_oid_hex_from_objpath(const char *path, struct object_id *oid) { - if (strlen(path) != GIT_SHA1_HEXSZ + 1) + if (strlen(path) != the_hash_algo->hexsz + 1) return -1; if (hex_to_bytes(oid->hash, path, 1)) @@ -1026,7 +1026,7 @@ static int get_oid_hex_from_objpath(const char *path, struct object_id *oid) path += 2; path++; /* skip '/' */ - return hex_to_bytes(oid->hash + 1, path, GIT_SHA1_RAWSZ - 1); + return hex_to_bytes(oid->hash + 1, path, the_hash_algo->rawsz - 1); } static void process_ls_object(struct remote_ls_ctx *ls) From f786ae9ff91e356960521b3a0db95c5a66b59809 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:10 +0000 Subject: [PATCH 19/35] http-backend: allow 64-character hex names In an SHA-256-backed repository using the http-backend handler for dumb protocol clients, it may be necessary to access the raw packs using their full SHA-256-specified names. Allow packs and loose objects to be accessed using their full SHA-256-specified 64-character hex names. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- http-backend.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/http-backend.c b/http-backend.c index 29e68e38b5..ec3144b444 100644 --- a/http-backend.c +++ b/http-backend.c @@ -711,8 +711,11 @@ static struct service_cmd { {"GET", "/objects/info/http-alternates$", get_text_file}, {"GET", "/objects/info/packs$", get_info_packs}, {"GET", "/objects/[0-9a-f]{2}/[0-9a-f]{38}$", get_loose_object}, + {"GET", "/objects/[0-9a-f]{2}/[0-9a-f]{62}$", get_loose_object}, {"GET", "/objects/pack/pack-[0-9a-f]{40}\\.pack$", get_pack_file}, + {"GET", "/objects/pack/pack-[0-9a-f]{64}\\.pack$", get_pack_file}, {"GET", "/objects/pack/pack-[0-9a-f]{40}\\.idx$", get_idx_file}, + {"GET", "/objects/pack/pack-[0-9a-f]{64}\\.idx$", get_idx_file}, {"POST", "/git-upload-pack$", service_rpc}, {"POST", "/git-receive-pack$", service_rpc} From 1cb158b6e65fbfd12099dc0d39642ad146dd887f Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:11 +0000 Subject: [PATCH 20/35] http-push: remove remaining uses of sha1_to_hex Since sha1_to_hex is limited to SHA-1, switch all remaining uses of it in this file to hash_to_hex or oid_to_hex. Modify update_remote to take a pointer to struct object_id, and since we don't modify that parameter in the function, set it to be const as well. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- http-push.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/http-push.c b/http-push.c index c4e5142bc9..f675a96316 100644 --- a/http-push.c +++ b/http-push.c @@ -316,7 +316,7 @@ static void start_fetch_packed(struct transfer_request *request) } fprintf(stderr, "Fetching pack %s\n", - sha1_to_hex(target->hash)); + hash_to_hex(target->hash)); fprintf(stderr, " which contains %s\n", oid_to_hex(&request->obj->oid)); preq = new_http_pack_request(target, repo->url); @@ -1374,7 +1374,7 @@ static int get_delta(struct rev_info *revs, struct remote_lock *lock) return count; } -static int update_remote(unsigned char *sha1, struct remote_lock *lock) +static int update_remote(const struct object_id *oid, struct remote_lock *lock) { struct active_request_slot *slot; struct slot_results results; @@ -1383,7 +1383,7 @@ static int update_remote(unsigned char *sha1, struct remote_lock *lock) dav_headers = get_dav_token_headers(lock, DAV_HEADER_IF); - strbuf_addf(&out_buffer.buf, "%s\n", sha1_to_hex(sha1)); + strbuf_addf(&out_buffer.buf, "%s\n", oid_to_hex(oid)); slot = get_active_slot(); slot->results = &results; @@ -1948,7 +1948,7 @@ int cmd_main(int argc, const char **argv) run_request_queue(); /* Update the remote branch if all went well */ - if (aborted || !update_remote(ref->new_oid.hash, ref_lock)) + if (aborted || !update_remote(&ref->new_oid, ref_lock)) rc = 1; if (!rc) From 2bf1db786273e22070227a09d346c49026659f1e Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:12 +0000 Subject: [PATCH 21/35] http-walker: replace sha1_to_hex Since sha1_to_hex is limited to SHA-1, replace the uses of it in this file with hash_to_hex. Rename several variables accordingly to reflect that they are no longer limited to SHA-1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- http-walker.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/http-walker.c b/http-walker.c index 8063896cf6..e11670eee2 100644 --- a/http-walker.c +++ b/http-walker.c @@ -434,9 +434,9 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne if (walker->get_verbosely) { fprintf(stderr, "Getting pack %s\n", - sha1_to_hex(target->hash)); + hash_to_hex(target->hash)); fprintf(stderr, " which contains %s\n", - sha1_to_hex(sha1)); + hash_to_hex(sha1)); } preq = new_http_pack_request(target, repo->base); @@ -473,9 +473,9 @@ static void abort_object_request(struct object_request *obj_req) release_object_request(obj_req); } -static int fetch_object(struct walker *walker, unsigned char *sha1) +static int fetch_object(struct walker *walker, unsigned char *hash) { - char *hex = sha1_to_hex(sha1); + char *hex = hash_to_hex(hash); int ret = 0; struct object_request *obj_req = NULL; struct http_object_request *req; @@ -483,7 +483,7 @@ static int fetch_object(struct walker *walker, unsigned char *sha1) list_for_each(pos, head) { obj_req = list_entry(pos, struct object_request, node); - if (hasheq(obj_req->oid.hash, sha1)) + if (hasheq(obj_req->oid.hash, hash)) break; } if (obj_req == NULL) @@ -557,20 +557,20 @@ static int fetch_object(struct walker *walker, unsigned char *sha1) return ret; } -static int fetch(struct walker *walker, unsigned char *sha1) +static int fetch(struct walker *walker, unsigned char *hash) { struct walker_data *data = walker->data; struct alt_base *altbase = data->alt; - if (!fetch_object(walker, sha1)) + if (!fetch_object(walker, hash)) return 0; while (altbase) { - if (!http_fetch_pack(walker, altbase, sha1)) + if (!http_fetch_pack(walker, altbase, hash)) return 0; fetch_alternates(walker, data->alt->base); altbase = altbase->next; } - return error("Unable to find %s under %s", sha1_to_hex(sha1), + return error("Unable to find %s under %s", hash_to_hex(hash), data->alt->base); } From ae041a0f9a8f5bd236441a9384a0119cd5a8f791 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:13 +0000 Subject: [PATCH 22/35] http: replace hard-coded constant with the_hash_algo Replace a hard-coded 40 with a reference to the_hash_algo. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- http.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http.c b/http.c index a09adc518f..993ddc956a 100644 --- a/http.c +++ b/http.c @@ -2065,7 +2065,7 @@ int http_fetch_ref(const char *base, struct ref *ref) url = quote_ref_url(base, ref->name); if (http_get_strbuf(url, &buffer, &options) == HTTP_OK) { strbuf_rtrim(&buffer); - if (buffer.len == 40) + if (buffer.len == the_hash_algo->hexsz) ret = get_oid_hex(buffer.buf, &ref->old_oid); else if (starts_with(buffer.buf, "ref: ")) { ref->symref = xstrdup(buffer.buf + 5); From eed0e60f02bea5cc87830c203de86207c43cc57c Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:14 +0000 Subject: [PATCH 23/35] http: compute hash of downloaded objects using the_hash_algo Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- http.c | 10 +++++----- http.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/http.c b/http.c index 993ddc956a..458d07fabb 100644 --- a/http.c +++ b/http.c @@ -2337,8 +2337,8 @@ static size_t fwrite_sha1_file(char *ptr, size_t eltsize, size_t nmemb, freq->stream.next_out = expn; freq->stream.avail_out = sizeof(expn); freq->zret = git_inflate(&freq->stream, Z_SYNC_FLUSH); - git_SHA1_Update(&freq->c, expn, - sizeof(expn) - freq->stream.avail_out); + the_hash_algo->update_fn(&freq->c, expn, + sizeof(expn) - freq->stream.avail_out); } while (freq->stream.avail_in && freq->zret == Z_OK); return size; } @@ -2396,7 +2396,7 @@ struct http_object_request *new_http_object_request(const char *base_url, git_inflate_init(&freq->stream); - git_SHA1_Init(&freq->c); + the_hash_algo->init_fn(&freq->c); freq->url = get_remote_object_url(base_url, hex, 0); @@ -2431,7 +2431,7 @@ struct http_object_request *new_http_object_request(const char *base_url, if (prev_read == -1) { memset(&freq->stream, 0, sizeof(freq->stream)); git_inflate_init(&freq->stream); - git_SHA1_Init(&freq->c); + the_hash_algo->init_fn(&freq->c); if (prev_posn>0) { prev_posn = 0; lseek(freq->localfile, 0, SEEK_SET); @@ -2502,7 +2502,7 @@ int finish_http_object_request(struct http_object_request *freq) } git_inflate_end(&freq->stream); - git_SHA1_Final(freq->real_oid.hash, &freq->c); + the_hash_algo->final_fn(freq->real_oid.hash, &freq->c); if (freq->zret != Z_STREAM_END) { unlink_or_warn(freq->tmpfile.buf); return -1; diff --git a/http.h b/http.h index 4eb4e808e5..10d3cfdb80 100644 --- a/http.h +++ b/http.h @@ -225,7 +225,7 @@ struct http_object_request { long http_code; struct object_id oid; struct object_id real_oid; - git_SHA_CTX c; + git_hash_ctx c; git_zstream stream; int zret; int rename; From 05dfc7cac4a0e0dcd6c7b2f63009c03f96dba1c0 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:15 +0000 Subject: [PATCH 24/35] http: replace sha1_to_hex Since sha1_to_hex is limited to SHA-1, replace it with hash_to_hex. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- http.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/http.c b/http.c index 458d07fabb..8ab07458e4 100644 --- a/http.c +++ b/http.c @@ -2079,19 +2079,19 @@ int http_fetch_ref(const char *base, struct ref *ref) } /* Helpers for fetching packs */ -static char *fetch_pack_index(unsigned char *sha1, const char *base_url) +static char *fetch_pack_index(unsigned char *hash, const char *base_url) { char *url, *tmp; struct strbuf buf = STRBUF_INIT; if (http_is_verbose) - fprintf(stderr, "Getting index for pack %s\n", sha1_to_hex(sha1)); + fprintf(stderr, "Getting index for pack %s\n", hash_to_hex(hash)); end_url_with_slash(&buf, base_url); - strbuf_addf(&buf, "objects/pack/pack-%s.idx", sha1_to_hex(sha1)); + strbuf_addf(&buf, "objects/pack/pack-%s.idx", hash_to_hex(hash)); url = strbuf_detach(&buf, NULL); - strbuf_addf(&buf, "%s.temp", sha1_pack_index_name(sha1)); + strbuf_addf(&buf, "%s.temp", sha1_pack_index_name(hash)); tmp = strbuf_detach(&buf, NULL); if (http_get_file(url, tmp, NULL) != HTTP_OK) { @@ -2262,7 +2262,7 @@ struct http_pack_request *new_http_pack_request( end_url_with_slash(&buf, base_url); strbuf_addf(&buf, "objects/pack/pack-%s.pack", - sha1_to_hex(target->hash)); + hash_to_hex(target->hash)); preq->url = strbuf_detach(&buf, NULL); strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(target->hash)); @@ -2289,7 +2289,7 @@ struct http_pack_request *new_http_pack_request( if (http_is_verbose) fprintf(stderr, "Resuming fetch of pack %s at byte %"PRIuMAX"\n", - sha1_to_hex(target->hash), + hash_to_hex(target->hash), (uintmax_t)prev_posn); http_opt_request_remainder(preq->slot->curl, prev_posn); } From 9c9492e8aafdcc2ec464e9261b78619b05a835f8 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:16 +0000 Subject: [PATCH 25/35] remote-curl: make hash size independent Change one hard-coded use of the constant 40 to a reference to the_hash_algo. In addition, switch a use of get_oid_hex to parse_oid_hex to avoid the need to use a constant. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- remote-curl.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/remote-curl.c b/remote-curl.c index 5b44794922..cc74ff3ec1 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -249,7 +249,7 @@ static struct ref *parse_info_refs(struct discovery *heads) if (data[i] == '\t') mid = &data[i]; if (data[i] == '\n') { - if (mid - start != 40) + if (mid - start != the_hash_algo->hexsz) die("%sinfo/refs not valid: is this a git repository?", url.buf); data[i] = 0; @@ -1107,12 +1107,13 @@ static void parse_fetch(struct strbuf *buf) const char *name; struct ref *ref; struct object_id old_oid; + const char *q; - if (get_oid_hex(p, &old_oid)) + if (parse_oid_hex(p, &old_oid, &q)) die("protocol error: expected sha/ref, got %s'", p); - if (p[GIT_SHA1_HEXSZ] == ' ') - name = p + GIT_SHA1_HEXSZ + 1; - else if (!p[GIT_SHA1_HEXSZ]) + if (*q == ' ') + name = q + 1; + else if (!*q) name = ""; else die("protocol error: expected sha/ref, got %s'", p); From 95399788d158e4628e25db20569d44af21a40906 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:17 +0000 Subject: [PATCH 26/35] hash: add a function to lookup hash algorithm by length There are some cases, such as the dumb HTTP transport and bundles, where we can only determine the hash algorithm in use by the length of the object IDs. Provide a function that looks up the algorithm by length. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- hash.h | 2 ++ sha1-file.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/hash.h b/hash.h index adde708cf2..661c9f2281 100644 --- a/hash.h +++ b/hash.h @@ -131,6 +131,8 @@ extern const struct git_hash_algo hash_algos[GIT_HASH_NALGOS]; int hash_algo_by_name(const char *name); /* Identical, except based on the format ID. */ int hash_algo_by_id(uint32_t format_id); +/* Identical, except based on the length. */ +int hash_algo_by_length(int len); /* Identical, except for a pointer to struct git_hash_algo. */ static inline int hash_algo_by_ptr(const struct git_hash_algo *p) { diff --git a/sha1-file.c b/sha1-file.c index 494606f771..bcd9470bce 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -189,6 +189,14 @@ int hash_algo_by_id(uint32_t format_id) return GIT_HASH_UNKNOWN; } +int hash_algo_by_length(int len) +{ + int i; + for (i = 1; i < GIT_HASH_NALGOS; i++) + if (len == hash_algos[i].rawsz) + return i; + return GIT_HASH_UNKNOWN; +} /* * This is meant to hold a *small* number of objects that you would From 3548726cd7f5c5d2d8a1359ac21ae0354bd4c895 Mon Sep 17 00:00:00 2001 From: Rene Scharfe Date: Tue, 19 Feb 2019 00:05:18 +0000 Subject: [PATCH 27/35] get-tar-commit-id: parse comment record Parse pax comment records properly and get rid of magic numbers for acceptable comment length. This simplifies a later change to handle longer hashes. Signed-off-by: Rene Scharfe Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/get-tar-commit-id.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c index 2706fcfaf2..312e44ed05 100644 --- a/builtin/get-tar-commit-id.c +++ b/builtin/get-tar-commit-id.c @@ -21,6 +21,8 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) char *content = buffer + RECORDSIZE; const char *comment; ssize_t n; + long len; + char *end; if (argc != 1) usage(builtin_get_tar_commit_id_usage); @@ -32,10 +34,17 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) die_errno("git get-tar-commit-id: EOF before reading tar header"); if (header->typeflag[0] != 'g') return 1; - if (!skip_prefix(content, "52 comment=", &comment)) + + len = strtol(content, &end, 10); + if (errno == ERANGE || end == content || len < 0) + return 1; + if (!skip_prefix(end, " comment=", &comment)) + return 1; + len -= comment - content; + if (len != GIT_SHA1_HEXSZ + 1) return 1; - if (write_in_full(1, comment, 41) < 0) + if (write_in_full(1, comment, len) < 0) die_errno("git get-tar-commit-id: write error"); return 0; From 87003d2c94985f9b3c5ef23b9c042ae3b4ede6bb Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:19 +0000 Subject: [PATCH 28/35] builtin/get-tar-commit-id: make hash size independent To make this code independent of the hash size, verify that the length of the comment is equal to that of any supported hash algorithm. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/get-tar-commit-id.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c index 312e44ed05..491af9202d 100644 --- a/builtin/get-tar-commit-id.c +++ b/builtin/get-tar-commit-id.c @@ -41,7 +41,8 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) if (!skip_prefix(end, " comment=", &comment)) return 1; len -= comment - content; - if (len != GIT_SHA1_HEXSZ + 1) + if (len < 1 || !(len % 2) || + hash_algo_by_length((len - 1) / 2) == GIT_HASH_UNKNOWN) return 1; if (write_in_full(1, comment, len) < 0) From bbf05cf70e5d0ba113ea4ed4c7c77cb890707911 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:20 +0000 Subject: [PATCH 29/35] archive: convert struct archiver_args to object_id Change the commit_sha1 member to be called "commit_oid" and change it to be a pointer to struct object_id. Additionally, update some uses of GIT_SHA1_HEXSZ and hard-coded values to use the_hash_algo instead. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- archive-tar.c | 7 ++++--- archive-zip.c | 10 +++++----- archive.c | 8 ++++---- archive.h | 2 +- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 4aabd566fb..3e53aac1e6 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -326,14 +326,15 @@ static int write_tar_entry(struct archiver_args *args, static void write_global_extended_header(struct archiver_args *args) { - const unsigned char *sha1 = args->commit_sha1; + const struct object_id *oid = args->commit_oid; struct strbuf ext_header = STRBUF_INIT; struct ustar_header header; unsigned int mode; - if (sha1) + if (oid) strbuf_append_ext_header(&ext_header, "comment", - sha1_to_hex(sha1), 40); + oid_to_hex(oid), + the_hash_algo->hexsz); if (args->time > USTAR_MAX_MTIME) { strbuf_append_ext_header_uint(&ext_header, "mtime", args->time); diff --git a/archive-zip.c b/archive-zip.c index 155ee4a779..4d66b5be6e 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -577,7 +577,7 @@ static void write_zip64_trailer(void) write_or_die(1, &locator64, ZIP64_DIR_TRAILER_LOCATOR_SIZE); } -static void write_zip_trailer(const unsigned char *sha1) +static void write_zip_trailer(const struct object_id *oid) { struct zip_dir_trailer trailer; int clamped = 0; @@ -590,14 +590,14 @@ static void write_zip_trailer(const unsigned char *sha1) copy_le16_clamp(trailer.entries, zip_dir_entries, &clamped); copy_le32(trailer.size, zip_dir.len); copy_le32_clamp(trailer.offset, zip_offset, &clamped); - copy_le16(trailer.comment_length, sha1 ? GIT_SHA1_HEXSZ : 0); + copy_le16(trailer.comment_length, oid ? the_hash_algo->hexsz : 0); write_or_die(1, zip_dir.buf, zip_dir.len); if (clamped) write_zip64_trailer(); write_or_die(1, &trailer, ZIP_DIR_TRAILER_SIZE); - if (sha1) - write_or_die(1, sha1_to_hex(sha1), GIT_SHA1_HEXSZ); + if (oid) + write_or_die(1, oid_to_hex(oid), the_hash_algo->hexsz); } static void dos_time(timestamp_t *timestamp, int *dos_date, int *dos_time) @@ -635,7 +635,7 @@ static int write_zip_archive(const struct archiver *ar, err = write_archive_entries(args, write_zip_entry); if (!err) - write_zip_trailer(args->commit_sha1); + write_zip_trailer(args->commit_oid); strbuf_release(&zip_dir); diff --git a/archive.c b/archive.c index 1f98324a93..f2c78a2712 100644 --- a/archive.c +++ b/archive.c @@ -380,7 +380,7 @@ static void parse_treeish_arg(const char **argv, int remote) { const char *name = argv[0]; - const unsigned char *commit_sha1; + const struct object_id *commit_oid; time_t archive_time; struct tree *tree; const struct commit *commit; @@ -402,10 +402,10 @@ static void parse_treeish_arg(const char **argv, commit = lookup_commit_reference_gently(ar_args->repo, &oid, 1); if (commit) { - commit_sha1 = commit->object.oid.hash; + commit_oid = &commit->object.oid; archive_time = commit->date; } else { - commit_sha1 = NULL; + commit_oid = NULL; archive_time = time(NULL); } @@ -426,7 +426,7 @@ static void parse_treeish_arg(const char **argv, tree = parse_tree_indirect(&tree_oid); } ar_args->tree = tree; - ar_args->commit_sha1 = commit_sha1; + ar_args->commit_oid = commit_oid; ar_args->commit = commit; ar_args->time = archive_time; } diff --git a/archive.h b/archive.h index 21ac010699..dd022a6b46 100644 --- a/archive.h +++ b/archive.h @@ -11,7 +11,7 @@ struct archiver_args { const char *base; size_t baselen; struct tree *tree; - const unsigned char *commit_sha1; + const struct object_id *commit_oid; const struct commit *commit; timestamp_t time; struct pathspec pathspec; From b8d45d035536b5635929d281f1b4aae004d8891b Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:21 +0000 Subject: [PATCH 30/35] refspec: make hash size independent Switch a use of GIT_SHA1_HEXSZ to use the_hash_algo. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- refspec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refspec.c b/refspec.c index f529092fd6..9a9bf21934 100644 --- a/refspec.c +++ b/refspec.c @@ -72,7 +72,7 @@ static int parse_refspec(struct refspec_item *item, const char *refspec, int fet /* LHS */ if (!*item->src) ; /* empty is ok; it means "HEAD" */ - else if (llen == GIT_SHA1_HEXSZ && !get_oid_hex(item->src, &unused)) + else if (llen == the_hash_algo->hexsz && !get_oid_hex(item->src, &unused)) item->exact_sha1 = 1; /* ok */ else if (!check_refname_format(item->src, flags)) ; /* valid looking ref is ok */ From ebe4df59c45ea95ce51fd8fede2a08d9009e990f Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:22 +0000 Subject: [PATCH 31/35] builtin/difftool: use parse_oid_hex Instead of using get_oid_hex and adding constants to the result, use parse_oid_hex to make this code independent of the hash size. Additionally, correct a typo that would cause us to print one too few characters on error, since we will already have incremented the pointer to point to the beginning of the object ID before we get to printing the error message. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/difftool.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/builtin/difftool.c b/builtin/difftool.c index a3ea60ea71..5fa83f481e 100644 --- a/builtin/difftool.c +++ b/builtin/difftool.c @@ -65,14 +65,12 @@ static int parse_index_info(char *p, int *mode1, int *mode2, *mode2 = (int)strtol(p + 1, &p, 8); if (*p != ' ') return error("expected ' ', got '%c'", *p); - if (get_oid_hex(++p, oid1)) - return error("expected object ID, got '%s'", p + 1); - p += GIT_SHA1_HEXSZ; + if (parse_oid_hex(++p, oid1, (const char **)&p)) + return error("expected object ID, got '%s'", p); if (*p != ' ') return error("expected ' ', got '%c'", *p); - if (get_oid_hex(++p, oid2)) - return error("expected object ID, got '%s'", p + 1); - p += GIT_SHA1_HEXSZ; + if (parse_oid_hex(++p, oid2, (const char **)&p)) + return error("expected object ID, got '%s'", p); if (*p != ' ') return error("expected ' ', got '%c'", *p); *status = *++p; From 3899b88b49c03ae76f8834a277cbd45bc6bde830 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:23 +0000 Subject: [PATCH 32/35] dir: make untracked cache extension hash size independent Instead of using a struct with a flex array member to read and write the untracked cache extension, use a shorter, fixed-length struct and add the name and hash data explicitly. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- dir.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/dir.c b/dir.c index b2cabadf25..7503b56918 100644 --- a/dir.c +++ b/dir.c @@ -2545,13 +2545,9 @@ struct ondisk_untracked_cache { struct stat_data info_exclude_stat; struct stat_data excludes_file_stat; uint32_t dir_flags; - unsigned char info_exclude_sha1[20]; - unsigned char excludes_file_sha1[20]; - char exclude_per_dir[FLEX_ARRAY]; }; #define ouc_offset(x) offsetof(struct ondisk_untracked_cache, x) -#define ouc_size(len) (ouc_offset(exclude_per_dir) + len + 1) struct write_data { int index; /* number of written untracked_cache_dir */ @@ -2634,20 +2630,21 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra struct write_data wd; unsigned char varbuf[16]; int varint_len; - size_t len = strlen(untracked->exclude_per_dir); + const unsigned hashsz = the_hash_algo->rawsz; - FLEX_ALLOC_MEM(ouc, exclude_per_dir, untracked->exclude_per_dir, len); + ouc = xcalloc(1, sizeof(*ouc)); stat_data_to_disk(&ouc->info_exclude_stat, &untracked->ss_info_exclude.stat); stat_data_to_disk(&ouc->excludes_file_stat, &untracked->ss_excludes_file.stat); - hashcpy(ouc->info_exclude_sha1, untracked->ss_info_exclude.oid.hash); - hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.oid.hash); ouc->dir_flags = htonl(untracked->dir_flags); varint_len = encode_varint(untracked->ident.len, varbuf); strbuf_add(out, varbuf, varint_len); strbuf_addbuf(out, &untracked->ident); - strbuf_add(out, ouc, ouc_size(len)); + strbuf_add(out, ouc, sizeof(*ouc)); + strbuf_add(out, untracked->ss_info_exclude.oid.hash, hashsz); + strbuf_add(out, untracked->ss_excludes_file.oid.hash, hashsz); + strbuf_add(out, untracked->exclude_per_dir, strlen(untracked->exclude_per_dir) + 1); FREE_AND_NULL(ouc); if (!untracked->root) { @@ -2834,6 +2831,9 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long int ident_len; ssize_t len; const char *exclude_per_dir; + const unsigned hashsz = the_hash_algo->rawsz; + const unsigned offset = sizeof(struct ondisk_untracked_cache); + const unsigned exclude_per_dir_offset = offset + 2 * hashsz; if (sz <= 1 || end[-1] != '\0') return NULL; @@ -2845,7 +2845,7 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long ident = (const char *)next; next += ident_len; - if (next + ouc_size(0) > end) + if (next + exclude_per_dir_offset + 1 > end) return NULL; uc = xcalloc(1, sizeof(*uc)); @@ -2853,15 +2853,15 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long strbuf_add(&uc->ident, ident, ident_len); load_oid_stat(&uc->ss_info_exclude, next + ouc_offset(info_exclude_stat), - next + ouc_offset(info_exclude_sha1)); + next + offset); load_oid_stat(&uc->ss_excludes_file, next + ouc_offset(excludes_file_stat), - next + ouc_offset(excludes_file_sha1)); + next + offset + hashsz); uc->dir_flags = get_be32(next + ouc_offset(dir_flags)); - exclude_per_dir = (const char *)next + ouc_offset(exclude_per_dir); + exclude_per_dir = (const char *)next + exclude_per_dir_offset; uc->exclude_per_dir = xstrdup(exclude_per_dir); /* NUL after exclude_per_dir is covered by sizeof(*ouc) */ - next += ouc_size(strlen(exclude_per_dir)); + next += exclude_per_dir_offset + strlen(exclude_per_dir) + 1; if (next >= end) goto done2; From 575fa8a3ed5828f01a934fc3337f05a43ccf7679 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:24 +0000 Subject: [PATCH 33/35] read-cache: read data in a hash-independent way Index entries are structured with a variety of fields up front, followed by a hash and one or two flags fields. Because the hash field is stored in the middle of the structure, it's difficult to use one fixed-size structure that easily allows access to the hash and flags fields. Adjust the structure to hold the maximum amount of data that may be needed using a member called "data" and read and write this field independently in the various places that need to read and write the structure. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- read-cache.c | 74 ++++++++++++++++++++-------------------------------- 1 file changed, 29 insertions(+), 45 deletions(-) diff --git a/read-cache.c b/read-cache.c index 4dc6de1b55..da0e411c55 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1640,39 +1640,24 @@ struct ondisk_cache_entry { uint32_t uid; uint32_t gid; uint32_t size; - unsigned char sha1[20]; - uint16_t flags; - char name[FLEX_ARRAY]; /* more */ -}; - -/* - * This struct is used when CE_EXTENDED bit is 1 - * The struct must match ondisk_cache_entry exactly from - * ctime till flags - */ -struct ondisk_cache_entry_extended { - struct cache_time ctime; - struct cache_time mtime; - uint32_t dev; - uint32_t ino; - uint32_t mode; - uint32_t uid; - uint32_t gid; - uint32_t size; - unsigned char sha1[20]; - uint16_t flags; - uint16_t flags2; - char name[FLEX_ARRAY]; /* more */ + /* + * unsigned char hash[hashsz]; + * uint16_t flags; + * if (flags & CE_EXTENDED) + * uint16_t flags2; + */ + unsigned char data[GIT_MAX_RAWSZ + 2 * sizeof(uint16_t)]; + char name[FLEX_ARRAY]; }; /* These are only used for v3 or lower */ #define align_padding_size(size, len) ((size + (len) + 8) & ~7) - (size + len) -#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,name) + (len) + 8) & ~7) +#define align_flex_name(STRUCT,len) ((offsetof(struct STRUCT,data) + (len) + 8) & ~7) #define ondisk_cache_entry_size(len) align_flex_name(ondisk_cache_entry,len) -#define ondisk_cache_entry_extended_size(len) align_flex_name(ondisk_cache_entry_extended,len) -#define ondisk_ce_size(ce) (((ce)->ce_flags & CE_EXTENDED) ? \ - ondisk_cache_entry_extended_size(ce_namelen(ce)) : \ - ondisk_cache_entry_size(ce_namelen(ce))) +#define ondisk_data_size(flags, len) (the_hash_algo->rawsz + \ + ((flags & CE_EXTENDED) ? 2 : 1) * sizeof(uint16_t) + len) +#define ondisk_data_size_max(len) (ondisk_data_size(CE_EXTENDED, len)) +#define ondisk_ce_size(ce) (ondisk_cache_entry_size(ondisk_data_size((ce)->ce_flags, ce_namelen(ce)))) /* Allow fsck to force verification of the index checksum. */ int verify_index_checksum; @@ -1746,6 +1731,8 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool, struct cache_entry *ce; size_t len; const char *name; + const unsigned hashsz = the_hash_algo->rawsz; + const uint16_t *flagsp = (const uint16_t *)(ondisk->data + hashsz); unsigned int flags; size_t copy_len = 0; /* @@ -1758,22 +1745,20 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool, int expand_name_field = version == 4; /* On-disk flags are just 16 bits */ - flags = get_be16(&ondisk->flags); + flags = get_be16(flagsp); len = flags & CE_NAMEMASK; if (flags & CE_EXTENDED) { - struct ondisk_cache_entry_extended *ondisk2; int extended_flags; - ondisk2 = (struct ondisk_cache_entry_extended *)ondisk; - extended_flags = get_be16(&ondisk2->flags2) << 16; + extended_flags = get_be16(flagsp + 1) << 16; /* We do not yet understand any bit out of CE_EXTENDED_FLAGS */ if (extended_flags & ~CE_EXTENDED_FLAGS) die(_("unknown index entry format 0x%08x"), extended_flags); flags |= extended_flags; - name = ondisk2->name; + name = (const char *)(flagsp + 2); } else - name = ondisk->name; + name = (const char *)(flagsp + 1); if (expand_name_field) { const unsigned char *cp = (const unsigned char *)name; @@ -1812,7 +1797,9 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool, ce->ce_flags = flags & ~CE_NAMEMASK; ce->ce_namelen = len; ce->index = 0; - hashcpy(ce->oid.hash, ondisk->sha1); + hashcpy(ce->oid.hash, ondisk->data); + memcpy(ce->name, name, len); + ce->name[len] = '\0'; if (expand_name_field) { if (copy_len) @@ -2556,6 +2543,8 @@ static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk, struct cache_entry *ce) { short flags; + const unsigned hashsz = the_hash_algo->rawsz; + uint16_t *flagsp = (uint16_t *)(ondisk->data + hashsz); ondisk->ctime.sec = htonl(ce->ce_stat_data.sd_ctime.sec); ondisk->mtime.sec = htonl(ce->ce_stat_data.sd_mtime.sec); @@ -2567,15 +2556,13 @@ static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk, ondisk->uid = htonl(ce->ce_stat_data.sd_uid); ondisk->gid = htonl(ce->ce_stat_data.sd_gid); ondisk->size = htonl(ce->ce_stat_data.sd_size); - hashcpy(ondisk->sha1, ce->oid.hash); + hashcpy(ondisk->data, ce->oid.hash); flags = ce->ce_flags & ~CE_NAMEMASK; flags |= (ce_namelen(ce) >= CE_NAMEMASK ? CE_NAMEMASK : ce_namelen(ce)); - ondisk->flags = htons(flags); + flagsp[0] = htons(flags); if (ce->ce_flags & CE_EXTENDED) { - struct ondisk_cache_entry_extended *ondisk2; - ondisk2 = (struct ondisk_cache_entry_extended *)ondisk; - ondisk2->flags2 = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16); + flagsp[1] = htons((ce->ce_flags & CE_EXTENDED_FLAGS) >> 16); } } @@ -2594,10 +2581,7 @@ static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce, stripped_name = 1; } - if (ce->ce_flags & CE_EXTENDED) - size = offsetof(struct ondisk_cache_entry_extended, name); - else - size = offsetof(struct ondisk_cache_entry, name); + size = offsetof(struct ondisk_cache_entry,data) + ondisk_data_size(ce->ce_flags, 0); if (!previous_name) { int len = ce_namelen(ce); @@ -2755,7 +2739,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, struct cache_entry **cache = istate->cache; int entries = istate->cache_nr; struct stat st; - struct ondisk_cache_entry_extended ondisk; + struct ondisk_cache_entry ondisk; struct strbuf previous_name_buf = STRBUF_INIT, *previous_name; int drop_cache_tree = istate->drop_cache_tree; off_t offset; From bcbb44ba053988fcc0d3ddb876c007898f345992 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:25 +0000 Subject: [PATCH 34/35] Git.pm: make hash size independent The cat_blob function was matching on exactly 40 hex characters. This won't work with SHA-256, which uses 64-character hex object IDs. While it should be fine to simply match any number of hex characters since the output is space delimited, be extra safe by matching either exactly 40 or exactly 64 hex characters. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- perl/Git.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perl/Git.pm b/perl/Git.pm index d856930b2e..62c472e0ce 100644 --- a/perl/Git.pm +++ b/perl/Git.pm @@ -980,7 +980,7 @@ sub cat_blob { return -1; } - if ($description !~ /^[0-9a-fA-F]{40} \S+ (\d+)$/) { + if ($description !~ /^[0-9a-fA-F]{40}(?:[0-9a-fA-F]{24})? \S+ (\d+)$/) { carp "Unexpected result returned from git cat-file"; return -1; } From cfb049119c9e8b8f66a2e1faf1621ac48f714412 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 19 Feb 2019 00:05:26 +0000 Subject: [PATCH 35/35] gitweb: make hash size independent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gitweb has several hard-coded 40 values throughout it to check for values that are passed in or acquired from Git. To simplify the code, introduce a regex variable that matches either exactly 40 or exactly 64 hex characters, and use this variable anywhere we would have previously hard-coded a 40 in a regex. Add some helper functions which allow us to write tighter regexes that match exactly the number of hex characters we're expecting. Similarly, switch the code that looks for deleted diffinfo information to look for either 40 or 64 zeros, and update one piece of code to use this function. Finally, when formatting a log line, allow an abbreviated describe output to contain up to 64 characters. Helped-by: Ævar Arnfjörð Bjarmason Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- gitweb/gitweb.perl | 97 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 67 insertions(+), 30 deletions(-) diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index 2594a4badb..7fef19fe59 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -788,6 +788,38 @@ sub check_loadavg { # ====================================================================== # input validation and dispatch +# Various hash size-related values. +my $sha1_len = 40; +my $sha256_extra_len = 24; +my $sha256_len = $sha1_len + $sha256_extra_len; + +# A regex matching $len hex characters. $len may be a range (e.g. 7,64). +sub oid_nlen_regex { + my $len = shift; + my $hchr = qr/[0-9a-fA-F]/; + return qr/(?:(?:$hchr){$len})/; +} + +# A regex matching two sets of $nlen hex characters, prefixed by the literal +# string $prefix and with the literal string $infix between them. +sub oid_nlen_prefix_infix_regex { + my $nlen = shift; + my $prefix = shift; + my $infix = shift; + + my $rx = oid_nlen_regex($nlen); + + return qr/^\Q$prefix\E$rx\Q$infix\E$rx$/; +} + +# A regex matching a valid object ID. +our $oid_regex; +{ + my $x = oid_nlen_regex($sha1_len); + my $y = oid_nlen_regex($sha256_extra_len); + $oid_regex = qr/(?:$x(?:$y)?)/; +} + # input parameters can be collected from a variety of sources (presently, CGI # and PATH_INFO), so we define an %input_params hash that collects them all # together during validation: this allows subsequent uses (e.g. href()) to be @@ -1516,7 +1548,7 @@ sub is_valid_refname { return undef unless defined $input; # textual hashes are O.K. - if ($input =~ m/^[0-9a-fA-F]{40}$/) { + if ($input =~ m/^$oid_regex$/) { return 1; } # it must be correct pathname @@ -2028,6 +2060,9 @@ sub file_type_long { sub format_log_line_html { my $line = shift; + # Potentially abbreviated OID. + my $regex = oid_nlen_regex("7,64"); + $line = esc_html($line, -nbsp=>1); $line =~ s{ \b @@ -2037,10 +2072,10 @@ sub format_log_line_html { (?'; } # match - if ($line =~ m/^index [0-9a-fA-F]{40},[0-9a-fA-F]{40}/) { + if ($line =~ oid_nlen_prefix_infix_regex($sha1_len, "index ", ",") | + $line =~ oid_nlen_prefix_infix_regex($sha256_len, "index ", ",")) { # can match only for combined diff $line = 'index '; for (my $i = 0; $i < $diffinfo->{'nparents'}; $i++) { @@ -2308,7 +2344,8 @@ sub format_extended_diff_header_line { $line .= '0' x 7; } - } elsif ($line =~ m/^index [0-9a-fA-F]{40}..[0-9a-fA-F]{40}/) { + } elsif ($line =~ oid_nlen_prefix_infix_regex($sha1_len, "index ", "..") | + $line =~ oid_nlen_prefix_infix_regex($sha256_len, "index ", "..")) { # can match only for ordinary diff my ($from_link, $to_link); if ($from->{'href'}) { @@ -2834,7 +2871,7 @@ sub git_get_hash_by_path { } #'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c' - $line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40})\t/; + $line =~ m/^([0-9]+) (.+) ($oid_regex)\t/; if (defined $type && $type ne $2) { # type doesn't match return undef; @@ -3333,7 +3370,7 @@ sub git_get_references { while (my $line = <$fd>) { chomp $line; - if ($line =~ m!^([0-9a-fA-F]{40})\srefs/($type.*)$!) { + if ($line =~ m!^($oid_regex)\srefs/($type.*)$!) { if (defined $refs{$1}) { push @{$refs{$1}}, $2; } else { @@ -3407,7 +3444,7 @@ sub parse_tag { $tag{'id'} = $tag_id; while (my $line = <$fd>) { chomp $line; - if ($line =~ m/^object ([0-9a-fA-F]{40})$/) { + if ($line =~ m/^object ($oid_regex)$/) { $tag{'object'} = $1; } elsif ($line =~ m/^type (.+)$/) { $tag{'type'} = $1; @@ -3451,15 +3488,15 @@ sub parse_commit_text { } my $header = shift @commit_lines; - if ($header !~ m/^[0-9a-fA-F]{40}/) { + if ($header !~ m/^$oid_regex/) { return; } ($co{'id'}, my @parents) = split ' ', $header; while (my $line = shift @commit_lines) { last if $line eq "\n"; - if ($line =~ m/^tree ([0-9a-fA-F]{40})$/) { + if ($line =~ m/^tree ($oid_regex)$/) { $co{'tree'} = $1; - } elsif ((!defined $withparents) && ($line =~ m/^parent ([0-9a-fA-F]{40})$/)) { + } elsif ((!defined $withparents) && ($line =~ m/^parent ($oid_regex)$/)) { push @parents, $1; } elsif ($line =~ m/^author (.*) ([0-9]+) (.*)$/) { $co{'author'} = to_utf8($1); @@ -3591,7 +3628,7 @@ sub parse_difftree_raw_line { # ':100644 100644 03b218260e99b78c6df0ed378e59ed9205ccc96d 3b93d5e7cc7f7dd4ebed13a5cc1a4ad976fc94d8 M ls-files.c' # ':100644 100644 7f9281985086971d3877aca27704f2aaf9c448ce bc190ebc71bbd923f2b728e505408f5e54bd073a M rev-tree.c' - if ($line =~ m/^:([0-7]{6}) ([0-7]{6}) ([0-9a-fA-F]{40}) ([0-9a-fA-F]{40}) (.)([0-9]{0,3})\t(.*)$/) { + if ($line =~ m/^:([0-7]{6}) ([0-7]{6}) ($oid_regex) ($oid_regex) (.)([0-9]{0,3})\t(.*)$/) { $res{'from_mode'} = $1; $res{'to_mode'} = $2; $res{'from_id'} = $3; @@ -3606,7 +3643,7 @@ sub parse_difftree_raw_line { } # '::100755 100755 100755 60e79ca1b01bc8b057abe17ddab484699a7f5fdb 94067cc5f73388f33722d52ae02f44692bc07490 94067cc5f73388f33722d52ae02f44692bc07490 MR git-gui/git-gui.sh' # combined diff (for merge commit) - elsif ($line =~ s/^(::+)((?:[0-7]{6} )+)((?:[0-9a-fA-F]{40} )+)([a-zA-Z]+)\t(.*)$//) { + elsif ($line =~ s/^(::+)((?:[0-7]{6} )+)((?:$oid_regex )+)([a-zA-Z]+)\t(.*)$//) { $res{'nparents'} = length($1); $res{'from_mode'} = [ split(' ', $2) ]; $res{'to_mode'} = pop @{$res{'from_mode'}}; @@ -3616,7 +3653,7 @@ sub parse_difftree_raw_line { $res{'to_file'} = unquote($5); } # 'c512b523472485aef4fff9e57b229d9d243c967f' - elsif ($line =~ m/^([0-9a-fA-F]{40})$/) { + elsif ($line =~ m/^($oid_regex)$/) { $res{'commit'} = $1; } @@ -3644,7 +3681,7 @@ sub parse_ls_tree_line { if ($opts{'-l'}) { #'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa 16717 panic.c' - $line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40}) +(-|[0-9]+)\t(.+)$/s; + $line =~ m/^([0-9]+) (.+) ($oid_regex) +(-|[0-9]+)\t(.+)$/s; $res{'mode'} = $1; $res{'type'} = $2; @@ -3657,7 +3694,7 @@ sub parse_ls_tree_line { } } else { #'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c' - $line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40})\t(.+)$/s; + $line =~ m/^([0-9]+) (.+) ($oid_regex)\t(.+)$/s; $res{'mode'} = $1; $res{'type'} = $2; @@ -4799,7 +4836,7 @@ sub fill_from_file_info { sub is_deleted { my $diffinfo = shift; - return $diffinfo->{'to_id'} eq ('0' x 40); + return $diffinfo->{'to_id'} eq ('0' x 40) || $diffinfo->{'to_id'} eq ('0' x 64); } # does patch correspond to [previous] difftree raw line @@ -6285,7 +6322,7 @@ sub git_search_changes { -class => "list subject"}, chop_and_escape_str($co{'title'}, 50) . "
"); } elsif (defined $set{'to_id'}) { - next if ($set{'to_id'} =~ m/^0{40}$/); + next if is_deleted(\%set); print $cgi->a({-href => href(action=>"blob", hash_base=>$co{'id'}, hash=>$set{'to_id'}, file_name=>$set{'to_file'}), @@ -6829,7 +6866,7 @@ sub git_blame_common { # the header: [] # no for subsequent lines in group of lines my ($full_rev, $orig_lineno, $lineno, $group_size) = - ($line =~ /^([0-9a-f]{40}) (\d+) (\d+)(?: (\d+))?$/); + ($line =~ /^($oid_regex) (\d+) (\d+)(?: (\d+))?$/); if (!exists $metainfo{$full_rev}) { $metainfo{$full_rev} = { 'nprevious' => 0 }; } @@ -6879,7 +6916,7 @@ sub git_blame_common { } # 'previous' if (exists $meta->{'previous'} && - $meta->{'previous'} =~ /^([a-fA-F0-9]{40}) (.*)$/) { + $meta->{'previous'} =~ /^($oid_regex) (.*)$/) { $meta->{'parent'} = $1; $meta->{'file_parent'} = unquote($2); } @@ -6996,7 +7033,7 @@ sub git_blob_plain { } else { die_error(400, "No file name defined"); } - } elsif ($hash =~ m/^[0-9a-fA-F]{40}$/) { + } elsif ($hash =~ m/^$oid_regex$/) { # blobs defined by non-textual hash id's can be cached $expires = "+1d"; } @@ -7057,7 +7094,7 @@ sub git_blob { } else { die_error(400, "No file name defined"); } - } elsif ($hash =~ m/^[0-9a-fA-F]{40}$/) { + } elsif ($hash =~ m/^$oid_regex$/) { # blobs defined by non-textual hash id's can be cached $expires = "+1d"; } @@ -7515,7 +7552,7 @@ sub git_commit { # non-textual hash id's can be cached my $expires; - if ($hash =~ m/^[0-9a-fA-F]{40}$/) { + if ($hash =~ m/^$oid_regex$/) { $expires = "+1d"; } my $refs = git_get_references(); @@ -7609,7 +7646,7 @@ sub git_object { close $fd; #'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c' - unless ($line && $line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40})\t/) { + unless ($line && $line =~ m/^([0-9]+) (.+) ($oid_regex)\t/) { die_error(404, "File or directory for given base does not exist"); } $type = $2; @@ -7649,7 +7686,7 @@ sub git_blobdiff { or die_error(404, "Blob diff not found"); } elsif (defined $hash && - $hash =~ /[0-9a-fA-F]{40}/) { + $hash =~ $oid_regex) { # try to find filename from $hash # read filtered raw output @@ -7659,7 +7696,7 @@ sub git_blobdiff { @difftree = # ':100644 100644 03b21826... 3b93d5e7... M ls-files.c' # $hash == to_id - grep { /^:[0-7]{6} [0-7]{6} [0-9a-fA-F]{40} $hash/ } + grep { /^:[0-7]{6} [0-7]{6} $oid_regex $hash/ } map { chomp; $_ } <$fd>; close $fd or die_error(404, "Reading git-diff-tree failed"); @@ -7682,8 +7719,8 @@ sub git_blobdiff { $hash ||= $diffinfo{'to_id'}; # non-textual hash id's can be cached - if ($hash_base =~ m/^[0-9a-fA-F]{40}$/ && - $hash_parent_base =~ m/^[0-9a-fA-F]{40}$/) { + if ($hash_base =~ m/^$oid_regex$/ && + $hash_parent_base =~ m/^$oid_regex$/) { $expires = '+1d'; } @@ -7819,7 +7856,7 @@ sub git_commitdiff { $hash_parent ne '-c' && $hash_parent ne '--cc') { # commitdiff with two commits given my $hash_parent_short = $hash_parent; - if ($hash_parent =~ m/^[0-9a-fA-F]{40}$/) { + if ($hash_parent =~ m/^$oid_regex$/) { $hash_parent_short = substr($hash_parent, 0, 7); } $formats_nav .= @@ -7928,7 +7965,7 @@ sub git_commitdiff { # non-textual hash id's can be cached my $expires; - if ($hash =~ m/^[0-9a-fA-F]{40}$/) { + if ($hash =~ m/^$oid_regex$/) { $expires = "+1d"; }