Merge branch 'tb/commit-graph-split-strategy'

"git commit-graph write" learned different ways to write out split
files.

* tb/commit-graph-split-strategy:
  Revert "commit-graph.c: introduce '--[no-]check-oids'"
  commit-graph.c: introduce '--[no-]check-oids'
  commit-graph.h: replace 'commit_hex' with 'commits'
  oidset: introduce 'oidset_size'
  builtin/commit-graph.c: introduce split strategy 'replace'
  builtin/commit-graph.c: introduce split strategy 'no-merge'
  builtin/commit-graph.c: support for '--split[=<strategy>]'
  t/helper/test-read-graph.c: support commit-graph chains
This commit is contained in:
Junio C Hamano 2020-05-01 13:39:52 -07:00
commit 6a1c17d05b
9 changed files with 191 additions and 72 deletions

View File

@ -57,11 +57,18 @@ or `--stdin-packs`.)
With the `--append` option, include all commits that are present in the
existing commit-graph file.
+
With the `--split` option, write the commit-graph as a chain of multiple
commit-graph files stored in `<dir>/info/commit-graphs`. The new commits
not already in the commit-graph are added in a new "tip" file. This file
is merged with the existing file if the following merge conditions are
met:
With the `--split[=<strategy>]` option, write the commit-graph as a
chain of multiple commit-graph files stored in
`<dir>/info/commit-graphs`. Commit-graph layers are merged based on the
strategy and other splitting options. The new commits not already in the
commit-graph are added in a new "tip" file. This file is merged with the
existing file if the following merge conditions are met:
* If `--split=no-merge` is specified, a merge is never performed, and
the remaining options are ignored. `--split=replace` overwrites the
existing chain with a new one. A bare `--split` defers to the remaining
options. (Note that merging a chain of commit graphs replaces the
existing chain with a length-1 chain where the first and only
incremental holds the entire graph).
+
* If `--size-multiple=<X>` is not specified, let `X` equal 2. If the new
tip file would have `N` commits and the previous tip has `M` commits and

View File

@ -9,7 +9,9 @@
static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
N_("git commit-graph write [--object-dir <objdir>] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--[no-]progress] <split options>"),
N_("git commit-graph write [--object-dir <objdir>] [--append] "
"[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
"[--[no-]progress] <split options>"),
NULL
};
@ -19,7 +21,9 @@ static const char * const builtin_commit_graph_verify_usage[] = {
};
static const char * const builtin_commit_graph_write_usage[] = {
N_("git commit-graph write [--object-dir <objdir>] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--[no-]progress] <split options>"),
N_("git commit-graph write [--object-dir <objdir>] [--append] "
"[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
"[--[no-]progress] <split options>"),
NULL
};
@ -114,10 +118,29 @@ static int graph_verify(int argc, const char **argv)
extern int read_replace_refs;
static struct split_commit_graph_opts split_opts;
static int write_option_parse_split(const struct option *opt, const char *arg,
int unset)
{
enum commit_graph_split_flags *flags = opt->value;
opts.split = 1;
if (!arg)
return 0;
if (!strcmp(arg, "no-merge"))
*flags = COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED;
else if (!strcmp(arg, "replace"))
*flags = COMMIT_GRAPH_SPLIT_REPLACE;
else
die(_("unrecognized --split argument, %s"), arg);
return 0;
}
static int graph_write(int argc, const char **argv)
{
struct string_list *pack_indexes = NULL;
struct string_list *commit_hex = NULL;
struct oidset commits = OIDSET_INIT;
struct object_directory *odb = NULL;
struct string_list lines;
int result = 0;
@ -136,8 +159,10 @@ static int graph_write(int argc, const char **argv)
OPT_BOOL(0, "append", &opts.append,
N_("include all commits already in the commit-graph file")),
OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")),
OPT_BOOL(0, "split", &opts.split,
N_("allow writing an incremental commit-graph file")),
OPT_CALLBACK_F(0, "split", &split_opts.flags, NULL,
N_("allow writing an incremental commit-graph file"),
PARSE_OPT_OPTARG | PARSE_OPT_NONEG,
write_option_parse_split),
OPT_INTEGER(0, "max-commits", &split_opts.max_commits,
N_("maximum number of commits in a non-base split commit-graph")),
OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple,
@ -188,7 +213,20 @@ static int graph_write(int argc, const char **argv)
if (opts.stdin_packs)
pack_indexes = &lines;
if (opts.stdin_commits) {
commit_hex = &lines;
struct string_list_item *item;
oidset_init(&commits, lines.nr);
for_each_string_list_item(item, &lines) {
struct object_id oid;
const char *end;
if (parse_oid_hex(item->string, &oid, &end)) {
error(_("unexpected non-hex object ID: "
"%s"), item->string);
return 1;
}
oidset_insert(&commits, &oid);
}
flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS;
}
@ -197,7 +235,7 @@ static int graph_write(int argc, const char **argv)
if (write_commit_graph(odb,
pack_indexes,
commit_hex,
opts.stdin_commits ? &commits : NULL,
flags,
&split_opts))
result = 1;

View File

@ -866,7 +866,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
if (edge_value >= 0)
edge_value += ctx->new_num_commits_in_base;
else {
else if (ctx->new_base_graph) {
uint32_t pos;
if (find_commit_in_graph(parent->item,
ctx->new_base_graph,
@ -897,7 +897,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
if (edge_value >= 0)
edge_value += ctx->new_num_commits_in_base;
else {
else if (ctx->new_base_graph) {
uint32_t pos;
if (find_commit_in_graph(parent->item,
ctx->new_base_graph,
@ -964,7 +964,7 @@ static void write_graph_chunk_extra_edges(struct hashfile *f,
if (edge_value >= 0)
edge_value += ctx->new_num_commits_in_base;
else {
else if (ctx->new_base_graph) {
uint32_t pos;
if (find_commit_in_graph(parent->item,
ctx->new_base_graph,
@ -1037,6 +1037,8 @@ static void close_reachable(struct write_commit_graph_context *ctx)
{
int i;
struct commit *commit;
enum commit_graph_split_flags flags = ctx->split_opts ?
ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
if (ctx->report_progress)
ctx->progress = start_delayed_progress(
@ -1066,8 +1068,9 @@ static void close_reachable(struct write_commit_graph_context *ctx)
if (!commit)
continue;
if (ctx->split) {
if (!parse_commit(commit) &&
commit->graph_pos == COMMIT_NOT_FROM_GRAPH)
if ((!parse_commit(commit) &&
commit->graph_pos == COMMIT_NOT_FROM_GRAPH) ||
flags == COMMIT_GRAPH_SPLIT_REPLACE)
add_missing_parents(ctx, commit);
} else if (!parse_commit_no_graph(commit))
add_missing_parents(ctx, commit);
@ -1133,13 +1136,13 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
stop_progress(&ctx->progress);
}
static int add_ref_to_list(const char *refname,
const struct object_id *oid,
int flags, void *cb_data)
static int add_ref_to_set(const char *refname,
const struct object_id *oid,
int flags, void *cb_data)
{
struct string_list *list = (struct string_list *)cb_data;
struct oidset *commits = (struct oidset *)cb_data;
string_list_append(list, oid_to_hex(oid));
oidset_insert(commits, oid);
return 0;
}
@ -1147,14 +1150,14 @@ int write_commit_graph_reachable(struct object_directory *odb,
enum commit_graph_write_flags flags,
const struct split_commit_graph_opts *split_opts)
{
struct string_list list = STRING_LIST_INIT_DUP;
struct oidset commits = OIDSET_INIT;
int result;
for_each_ref(add_ref_to_list, &list);
result = write_commit_graph(odb, NULL, &list,
for_each_ref(add_ref_to_set, &commits);
result = write_commit_graph(odb, NULL, &commits,
flags, split_opts);
string_list_clear(&list, 0);
oidset_clear(&commits);
return result;
}
@ -1203,39 +1206,46 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
return 0;
}
static int fill_oids_from_commit_hex(struct write_commit_graph_context *ctx,
struct string_list *commit_hex)
static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
struct oidset *commits)
{
uint32_t i;
uint32_t i = 0;
struct strbuf progress_title = STRBUF_INIT;
struct oidset_iter iter;
struct object_id *oid;
if (!oidset_size(commits))
return 0;
if (ctx->report_progress) {
strbuf_addf(&progress_title,
Q_("Finding commits for commit graph from %d ref",
"Finding commits for commit graph from %d refs",
commit_hex->nr),
commit_hex->nr);
oidset_size(commits)),
oidset_size(commits));
ctx->progress = start_delayed_progress(
progress_title.buf,
commit_hex->nr);
oidset_size(commits));
}
for (i = 0; i < commit_hex->nr; i++) {
const char *end;
struct object_id oid;
oidset_iter_init(commits, &iter);
while ((oid = oidset_iter_next(&iter))) {
struct commit *result;
display_progress(ctx->progress, i + 1);
if (!parse_oid_hex(commit_hex->items[i].string, &oid, &end) &&
(result = lookup_commit_reference_gently(ctx->r, &oid, 1))) {
display_progress(ctx->progress, ++i);
result = lookup_commit_reference_gently(ctx->r, oid, 1);
if (result) {
ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
oidcpy(&ctx->oids.list[ctx->oids.nr], &(result->object.oid));
ctx->oids.nr++;
} else if (ctx->check_oids) {
error(_("invalid commit object id: %s"),
commit_hex->items[i].string);
oid_to_hex(oid));
return -1;
}
}
stop_progress(&ctx->progress);
strbuf_release(&progress_title);
@ -1287,6 +1297,8 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx)
static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
{
uint32_t i;
enum commit_graph_split_flags flags = ctx->split_opts ?
ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
ctx->num_extra_edges = 0;
if (ctx->report_progress)
@ -1303,11 +1315,14 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc);
ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]);
if (ctx->split &&
if (ctx->split && flags != COMMIT_GRAPH_SPLIT_REPLACE &&
ctx->commits.list[ctx->commits.nr]->graph_pos != COMMIT_NOT_FROM_GRAPH)
continue;
parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]);
if (ctx->split && flags == COMMIT_GRAPH_SPLIT_REPLACE)
parse_commit(ctx->commits.list[ctx->commits.nr]);
else
parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]);
num_parents = commit_list_count(ctx->commits.list[ctx->commits.nr]->parents);
if (num_parents > 2)
@ -1488,8 +1503,12 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
}
if (ctx->base_graph_name) {
const char *dest = ctx->commit_graph_filenames_after[
ctx->num_commit_graphs_after - 2];
const char *dest;
int idx = ctx->num_commit_graphs_after - 1;
if (ctx->num_commit_graphs_after > 1)
idx--;
dest = ctx->commit_graph_filenames_after[idx];
if (strcmp(ctx->base_graph_name, dest)) {
result = rename(ctx->base_graph_name, dest);
@ -1529,6 +1548,7 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
{
struct commit_graph *g;
uint32_t num_commits;
enum commit_graph_split_flags flags = COMMIT_GRAPH_SPLIT_UNSPECIFIED;
uint32_t i;
int max_commits = 0;
@ -1539,24 +1559,36 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
if (ctx->split_opts->size_multiple)
size_mult = ctx->split_opts->size_multiple;
flags = ctx->split_opts->flags;
}
g = ctx->r->objects->commit_graph;
num_commits = ctx->commits.nr;
ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
if (flags == COMMIT_GRAPH_SPLIT_REPLACE)
ctx->num_commit_graphs_after = 1;
else
ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
while (g && (g->num_commits <= size_mult * num_commits ||
(max_commits && num_commits > max_commits))) {
if (g->odb != ctx->odb)
break;
if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED &&
flags != COMMIT_GRAPH_SPLIT_REPLACE) {
while (g && (g->num_commits <= size_mult * num_commits ||
(max_commits && num_commits > max_commits))) {
if (g->odb != ctx->odb)
break;
num_commits += g->num_commits;
g = g->base_graph;
num_commits += g->num_commits;
g = g->base_graph;
ctx->num_commit_graphs_after--;
ctx->num_commit_graphs_after--;
}
}
ctx->new_base_graph = g;
if (flags != COMMIT_GRAPH_SPLIT_REPLACE)
ctx->new_base_graph = g;
else if (ctx->num_commit_graphs_after != 1)
BUG("split_graph_merge_strategy: num_commit_graphs_after "
"should be 1 with --split=replace");
if (ctx->num_commit_graphs_after == 2) {
char *old_graph_name = get_commit_graph_filename(g->odb);
@ -1756,13 +1788,14 @@ out:
int write_commit_graph(struct object_directory *odb,
struct string_list *pack_indexes,
struct string_list *commit_hex,
struct oidset *commits,
enum commit_graph_write_flags flags,
const struct split_commit_graph_opts *split_opts)
{
struct write_commit_graph_context *ctx;
uint32_t i, count_distinct = 0;
int res = 0;
int replace = 0;
if (!commit_graph_compatible(the_repository))
return 0;
@ -1797,6 +1830,9 @@ int write_commit_graph(struct object_directory *odb,
g = g->base_graph;
}
}
if (ctx->split_opts)
replace = ctx->split_opts->flags & COMMIT_GRAPH_SPLIT_REPLACE;
}
ctx->approx_nr_objects = approximate_object_count();
@ -1828,12 +1864,12 @@ int write_commit_graph(struct object_directory *odb,
goto cleanup;
}
if (commit_hex) {
if ((res = fill_oids_from_commit_hex(ctx, commit_hex)))
if (commits) {
if ((res = fill_oids_from_commits(ctx, commits)))
goto cleanup;
}
if (!pack_indexes && !commit_hex)
if (!pack_indexes && !commits)
fill_oids_from_all_packs(ctx);
close_reachable(ctx);
@ -1857,13 +1893,14 @@ int write_commit_graph(struct object_directory *odb,
goto cleanup;
}
if (!ctx->commits.nr)
if (!ctx->commits.nr && !replace)
goto cleanup;
if (ctx->split) {
split_graph_merge_strategy(ctx);
merge_commit_graphs(ctx);
if (!replace)
merge_commit_graphs(ctx);
} else
ctx->num_commit_graphs_after = 1;

View File

@ -6,6 +6,7 @@
#include "string-list.h"
#include "cache.h"
#include "object-store.h"
#include "oidset.h"
#define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH"
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD"
@ -82,10 +83,17 @@ enum commit_graph_write_flags {
COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3)
};
enum commit_graph_split_flags {
COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0,
COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1,
COMMIT_GRAPH_SPLIT_REPLACE = 2
};
struct split_commit_graph_opts {
int size_multiple;
int max_commits;
timestamp_t expire_time;
enum commit_graph_split_flags flags;
};
/*
@ -99,7 +107,7 @@ int write_commit_graph_reachable(struct object_directory *odb,
const struct split_commit_graph_opts *split_opts);
int write_commit_graph(struct object_directory *odb,
struct string_list *pack_indexes,
struct string_list *commit_hex,
struct oidset *commits,
enum commit_graph_write_flags flags,
const struct split_commit_graph_opts *split_opts);

View File

@ -36,6 +36,11 @@ void oidset_clear(struct oidset *set)
oidset_init(set, 0);
}
int oidset_size(struct oidset *set)
{
return kh_size(&set->set);
}
void oidset_parse_file(struct oidset *set, const char *path)
{
FILE *fp;

View File

@ -54,6 +54,11 @@ int oidset_insert(struct oidset *set, const struct object_id *oid);
*/
int oidset_remove(struct oidset *set, const struct object_id *oid);
/**
* Returns the number of oids in the set.
*/
int oidset_size(struct oidset *set);
/**
* Remove all entries from the oidset, freeing any resources associated with
* it.

View File

@ -7,26 +7,15 @@
int cmd__read_graph(int argc, const char **argv)
{
struct commit_graph *graph = NULL;
char *graph_name;
int open_ok;
int fd;
struct stat st;
struct object_directory *odb;
setup_git_directory();
odb = the_repository->objects->odb;
graph_name = get_commit_graph_filename(odb);
open_ok = open_commit_graph(graph_name, &fd, &st);
if (!open_ok)
die_errno(_("Could not open commit-graph '%s'"), graph_name);
graph = load_commit_graph_one_fd_st(fd, &st, odb);
graph = read_commit_graph_one(the_repository, odb);
if (!graph)
return 1;
FREE_AND_NULL(graph_name);
printf("header: %08x %d %d %d %d\n",
ntohl(*(uint32_t*)graph->data),

View File

@ -43,7 +43,7 @@ test_expect_success 'create commits and repack' '
test_expect_success 'exit with correct error on bad input to --stdin-commits' '
cd "$TRASH_DIRECTORY/full" &&
echo HEAD | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr &&
test_i18ngrep "invalid commit object id" stderr &&
test_i18ngrep "unexpected non-hex object ID: HEAD" stderr &&
# valid tree OID, but not a commit OID
git rev-parse HEAD^{tree} | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr &&
test_i18ngrep "invalid commit object id" stderr

View File

@ -357,4 +357,34 @@ test_expect_success 'split across alternate where alternate is not split' '
test_cmp commit-graph .git/objects/info/commit-graph
'
test_expect_success '--split=no-merge always writes an incremental' '
test_when_finished rm -rf a b &&
rm -rf $graphdir $infodir/commit-graph &&
git reset --hard commits/2 &&
git rev-list HEAD~1 >a &&
git rev-list HEAD >b &&
git commit-graph write --split --stdin-commits <a &&
git commit-graph write --split=no-merge --stdin-commits <b &&
test_line_count = 2 $graphdir/commit-graph-chain
'
test_expect_success '--split=replace replaces the chain' '
rm -rf $graphdir $infodir/commit-graph &&
git reset --hard commits/3 &&
git rev-list -1 HEAD~2 >a &&
git rev-list -1 HEAD~1 >b &&
git rev-list -1 HEAD >c &&
git commit-graph write --split=no-merge --stdin-commits <a &&
git commit-graph write --split=no-merge --stdin-commits <b &&
git commit-graph write --split=no-merge --stdin-commits <c &&
test_line_count = 3 $graphdir/commit-graph-chain &&
git commit-graph write --stdin-commits --split=replace <b &&
test_path_is_missing $infodir/commit-graph &&
test_path_is_file $graphdir/commit-graph-chain &&
ls $graphdir/graph-*.graph >graph-files &&
test_line_count = 1 graph-files &&
verify_chain_files_exist $graphdir &&
graph_read_expect 2
'
test_done