Merge branch 'ps/do-not-trust-commit-graph-blindly-for-existence'

The codepath to traverse the commit-graph learned to notice that a
commit is missing (e.g., corrupt repository lost an object), even
though it knows something about the commit (like its parents) from
what is in commit-graph.

* ps/do-not-trust-commit-graph-blindly-for-existence:
  commit: detect commits that exist in commit-graph but not in the ODB
  commit-graph: introduce envvar to disable commit existence checks
This commit is contained in:
Junio C Hamano 2023-11-08 11:03:59 +09:00
commit 42b87f7ee6
5 changed files with 84 additions and 2 deletions

View File

@ -911,6 +911,16 @@ for full details.
should not normally need to set this to `0`, but it may be
useful when trying to salvage data from a corrupted repository.
`GIT_COMMIT_GRAPH_PARANOIA`::
When loading a commit object from the commit-graph, Git performs an
existence check on the object in the object database. This is done to
avoid issues with stale commit-graphs that contain references to
already-deleted commits, but comes with a performance penalty.
+
The default is "true", which enables the aforementioned behavior.
Setting this to "false" disables the existence check. This can lead to
a performance improvement at the cost of consistency.
`GIT_ALLOW_PROTOCOL`::
If set to a colon-separated list of protocols, behave as if
`protocol.allow` is set to `never`, and each of the listed

View File

@ -1024,14 +1024,18 @@ int repo_find_commit_pos_in_graph(struct repository *r, struct commit *c,
struct commit *lookup_commit_in_graph(struct repository *repo, const struct object_id *id)
{
static int commit_graph_paranoia = -1;
struct commit *commit;
uint32_t pos;
if (commit_graph_paranoia == -1)
commit_graph_paranoia = git_env_bool(GIT_COMMIT_GRAPH_PARANOIA, 1);
if (!prepare_commit_graph(repo))
return NULL;
if (!search_commit_pos_in_graph(id, repo->objects->commit_graph, &pos))
return NULL;
if (!has_object(repo, id, 0))
if (commit_graph_paranoia && !has_object(repo, id, 0))
return NULL;
commit = lookup_commit(repo, id);

View File

@ -8,6 +8,12 @@
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE"
#define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
/*
* This environment variable controls whether commits looked up via the
* commit graph will be double checked to exist in the object database.
*/
#define GIT_COMMIT_GRAPH_PARANOIA "GIT_COMMIT_GRAPH_PARANOIA"
/*
* This method is only used to enhance coverage of the commit-graph
* feature in the test suite with the GIT_TEST_COMMIT_GRAPH and

View File

@ -28,6 +28,7 @@
#include "shallow.h"
#include "tree.h"
#include "hook.h"
#include "parse.h"
static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **);
@ -572,8 +573,21 @@ int repo_parse_commit_internal(struct repository *r,
return -1;
if (item->object.parsed)
return 0;
if (use_commit_graph && parse_commit_in_graph(r, item))
if (use_commit_graph && parse_commit_in_graph(r, item)) {
static int commit_graph_paranoia = -1;
if (commit_graph_paranoia == -1)
commit_graph_paranoia = git_env_bool(GIT_COMMIT_GRAPH_PARANOIA, 1);
if (commit_graph_paranoia && !has_object(r, &item->object.oid, 0)) {
unparse_commit(r, &item->object.oid);
return quiet_on_missing ? -1 :
error(_("commit %s exists in commit-graph but not in the object database"),
oid_to_hex(&item->object.oid));
}
return 0;
}
if (oid_object_info_extended(r, &item->object.oid, &oi, flags) < 0)
return quiet_on_missing ? -1 :

View File

@ -895,4 +895,52 @@ test_expect_success 'reader notices too-small generations chunk' '
test_cmp expect.err err
'
test_expect_success 'stale commit cannot be parsed when given directly' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
test_commit A &&
test_commit B &&
git commit-graph write --reachable &&
oid=$(git rev-parse B) &&
rm .git/objects/"$(test_oid_to_path "$oid")" &&
# Verify that it is possible to read the commit from the
# commit graph when not being paranoid, ...
GIT_COMMIT_GRAPH_PARANOIA=false git rev-list B &&
# ... but parsing the commit when double checking that
# it actually exists in the object database should fail.
test_must_fail git rev-list -1 B
)
'
test_expect_success 'stale commit cannot be parsed when traversing graph' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
test_commit A &&
test_commit B &&
test_commit C &&
git commit-graph write --reachable &&
# Corrupt the repository by deleting the intermediate commit
# object. Commands should notice that this object is absent and
# thus that the repository is corrupt even if the commit graph
# exists.
oid=$(git rev-parse B) &&
rm .git/objects/"$(test_oid_to_path "$oid")" &&
# Again, we should be able to parse the commit when not
# being paranoid about commit graph staleness...
GIT_COMMIT_GRAPH_PARANOIA=false git rev-parse HEAD~2 &&
# ... but fail when we are paranoid.
test_must_fail git rev-parse HEAD~2 2>error &&
grep "error: commit $oid exists in commit-graph but not in the object database" error
)
'
test_done