From 1d8d9cb62099e1524ce1269ea88faad871c2197f Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 5 Aug 2020 16:06:49 -0700 Subject: [PATCH 1/4] sha1-file: introduce no-lazy-fetch has_object() There have been a few bugs wherein Git fetches missing objects whenever the existence of an object is checked, even though it does not need to perform such a fetch. To resolve these bugs, we could look at all the places that has_object_file() (or a similar function) is used. As a first step, introduce a new function has_object() that checks for the existence of an object, with a default behavior of not fetching if the object is missing and the repository is a partial clone. As we verify each has_object_file() (or similar) usage, we can replace it with has_object(), and we will know that we are done when we can delete has_object_file() (and the other similar functions). Also, the new function has_object() has more appropriate defaults: besides not fetching, it also does not recheck packed storage. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- object-store.h | 25 +++++++++++++++++++++++-- sha1-file.c | 12 ++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/object-store.h b/object-store.h index f439d47af8..c4fc9dd74e 100644 --- a/object-store.h +++ b/object-store.h @@ -239,12 +239,33 @@ int read_loose_object(const char *path, unsigned long *size, void **contents); +/* Retry packed storage after checking packed and loose storage */ +#define HAS_OBJECT_RECHECK_PACKED 1 + +/* + * Returns 1 if the object exists. This function will not lazily fetch objects + * in a partial clone. + */ +int has_object(struct repository *r, const struct object_id *oid, + unsigned flags); + +/* + * These macros and functions are deprecated. If checking existence for an + * object that is likely to be missing and/or whose absence is relatively + * inconsequential (or is consequential but the caller is prepared to handle + * it), use has_object(), which has better defaults (no lazy fetch in a partial + * clone and no rechecking of packed storage). In the unlikely event that a + * caller needs to assert existence of an object that it fully expects to + * exist, and wants to trigger a lazy fetch in a partial clone, use + * oid_object_info_extended() with a NULL struct object_info. + * + * These functions can be removed once all callers have migrated to + * has_object() and/or oid_object_info_extended(). + */ #ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS #define has_sha1_file_with_flags(sha1, flags) repo_has_sha1_file_with_flags(the_repository, sha1, flags) #define has_sha1_file(sha1) repo_has_sha1_file(the_repository, sha1) #endif - -/* Same as the above, except for struct object_id. */ int repo_has_object_file(struct repository *r, const struct object_id *oid); int repo_has_object_file_with_flags(struct repository *r, const struct object_id *oid, int flags); diff --git a/sha1-file.c b/sha1-file.c index 45fdb8415c..5866de0f65 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -1989,6 +1989,18 @@ int force_object_loose(const struct object_id *oid, time_t mtime) return ret; } +int has_object(struct repository *r, const struct object_id *oid, + unsigned flags) +{ + int quick = !(flags & HAS_OBJECT_RECHECK_PACKED); + unsigned object_info_flags = OBJECT_INFO_SKIP_FETCH_OBJECT | + (quick ? OBJECT_INFO_QUICK : 0); + + if (!startup_info->have_repository) + return 0; + return oid_object_info_extended(r, oid, NULL, object_info_flags) >= 0; +} + int repo_has_object_file_with_flags(struct repository *r, const struct object_id *oid, int flags) { From 3318238db9498749db6d4feb7a804d366eccfa82 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 5 Aug 2020 16:06:50 -0700 Subject: [PATCH 2/4] apply: do not lazy fetch when applying binary When applying a binary patch, as an optimization, "apply" checks if the postimage is already present. During this fetch, it is perfectly expected for the postimage not to be present, so there is no need to lazy-fetch missing objects. Teach "apply" not to lazy-fetch in this case. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- apply.c | 2 +- t/t4150-am.sh | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/apply.c b/apply.c index 8bff604dbe..402d80602a 100644 --- a/apply.c +++ b/apply.c @@ -3178,7 +3178,7 @@ static int apply_binary(struct apply_state *state, return 0; /* deletion patch */ } - if (has_object_file(&oid)) { + if (has_object(the_repository, &oid, 0)) { /* We already have the postimage */ enum object_type type; unsigned long size; diff --git a/t/t4150-am.sh b/t/t4150-am.sh index bda4586a79..94a2c76522 100755 --- a/t/t4150-am.sh +++ b/t/t4150-am.sh @@ -1133,4 +1133,20 @@ test_expect_success 'am and .gitattibutes' ' ) ' +test_expect_success 'apply binary blob in partial clone' ' + printf "\\000" >binary && + git add binary && + git commit -m "binary blob" && + git format-patch --stdout -m HEAD^ >patch && + + test_create_repo server && + test_config -C server uploadpack.allowfilter 1 && + test_config -C server uploadpack.allowanysha1inwant 1 && + git clone --filter=blob:none "file://$(pwd)/server" client && + test_when_finished "rm -rf client" && + + # Exercise to make sure that it works + git -C client am ../patch +' + test_done From ee47243d7636d3d54b727ad24027a9167b68ebb1 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 5 Aug 2020 16:06:51 -0700 Subject: [PATCH 3/4] pack-objects: no fetch when allow-{any,promisor} The options --missing=allow-{any,promisor} were introduced in caf3827e2f ("rev-list: add list-objects filtering support", 2017-11-22) with the following note in the commit message: This patch introduces handling of missing objects to help debugging and development of the "partial clone" mechanism, and once the mechanism is implemented, for a power user to perform operations that are missing-object aware without incurring the cost of checking if a missing link is expected. The idea that these options are missing-object aware (and thus do not need to lazily fetch objects, unlike unaware commands that assume that all objects are present) are assumed in later commits such as 07ef3c6604 ("fetch test: use more robust test for filtered objects", 2020-01-15). However, the current implementations of these options use has_object_file(), which indeed lazily fetches missing objects. Teach these implementations not to do so. Also, update the documentation of these options to be clearer. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- Documentation/git-pack-objects.txt | 11 +++++++---- builtin/pack-objects.c | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index eaa2f2a404..54d715ead1 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -270,15 +270,18 @@ So does `git bundle` (see linkgit:git-bundle[1]) when it creates a bundle. This option specifies how missing objects are handled. + The form '--missing=error' requests that pack-objects stop with an error if -a missing object is encountered. This is the default action. +a missing object is encountered. If the repository is a partial clone, an +attempt to fetch missing objects will be made before declaring them missing. +This is the default action. + The form '--missing=allow-any' will allow object traversal to continue -if a missing object is encountered. Missing objects will silently be -omitted from the results. +if a missing object is encountered. No fetch of a missing object will occur. +Missing objects will silently be omitted from the results. + The form '--missing=allow-promisor' is like 'allow-any', but will only allow object traversal to continue for EXPECTED promisor missing objects. -Unexpected missing object will raise an error. +No fetch of a missing object will occur. An unexpected missing object will +raise an error. --exclude-promisor-objects:: Omit objects that are known to be in the promisor remote. (This diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index ecef5cda44..7d07014e7c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3048,7 +3048,7 @@ static void show_object__ma_allow_any(struct object *obj, const char *name, void * Quietly ignore ALL missing objects. This avoids problems with * staging them now and getting an odd error later. */ - if (!has_object_file(&obj->oid)) + if (!has_object(the_repository, &obj->oid, 0)) return; show_object(obj, name, data); @@ -3062,7 +3062,7 @@ static void show_object__ma_allow_promisor(struct object *obj, const char *name, * Quietly ignore EXPECTED missing objects. This avoids problems with * staging them now and getting an odd error later. */ - if (!has_object_file(&obj->oid) && is_promisor_object(&obj->oid)) + if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid)) return; show_object(obj, name, data); From 9eb86f41de7d229bb3a073e16e735cf71e2ffe3b Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 5 Aug 2020 16:06:52 -0700 Subject: [PATCH 4/4] fsck: do not lazy fetch known non-promisor object There is a call to has_object_file(), which lazily fetches missing objects in a partial clone, when the object is known to not be a promisor object. Change that call to has_object(), which does not do any lazy fetching. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/fsck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 37aa07da78..fbf26cafcf 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -168,7 +168,7 @@ static int mark_object(struct object *obj, int type, void *data, struct fsck_opt return 0; if (!(obj->flags & HAS_OBJ)) { - if (parent && !has_object_file(&obj->oid)) { + if (parent && !has_object(the_repository, &obj->oid, 1)) { printf_ln(_("broken link from %7s %s\n" " to %7s %s"), printable_type(&parent->oid, parent->type),