Merge branch 'sg/commit-graph-cleanups' into master

The changed-path Bloom filter is improved using ideas from an
independent implementation.

* sg/commit-graph-cleanups:
  commit-graph: simplify write_commit_graph_file() #2
  commit-graph: simplify write_commit_graph_file() #1
  commit-graph: simplify parse_commit_graph() #2
  commit-graph: simplify parse_commit_graph() #1
  commit-graph: clean up #includes
  diff.h: drop diff_tree_oid() & friends' return value
  commit-slab: add a function to deep free entries on the slab
  commit-graph-format.txt: all multi-byte numbers are in network byte order
  commit-graph: fix parsing the Chunk Lookup table
  tree-walk.c: don't match submodule entries for 'submod/anything'
This commit is contained in:
Junio C Hamano 2020-07-30 13:20:30 -07:00
commit de6dda0dc3
13 changed files with 117 additions and 108 deletions

View File

@ -32,7 +32,7 @@ the body into "chunks" and provide a binary lookup table at the beginning
of the body. The header includes certain values, such as number of chunks of the body. The header includes certain values, such as number of chunks
and hash type. and hash type.
All 4-byte numbers are in network order. All multi-byte numbers are in network byte order.
HEADER: HEADER:

View File

@ -1,7 +1,5 @@
#include "cache.h"
#include "config.h"
#include "dir.h"
#include "git-compat-util.h" #include "git-compat-util.h"
#include "config.h"
#include "lockfile.h" #include "lockfile.h"
#include "pack.h" #include "pack.h"
#include "packfile.h" #include "packfile.h"
@ -285,8 +283,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
const unsigned char *data, *chunk_lookup; const unsigned char *data, *chunk_lookup;
uint32_t i; uint32_t i;
struct commit_graph *graph; struct commit_graph *graph;
uint64_t last_chunk_offset; uint64_t next_chunk_offset;
uint32_t last_chunk_id;
uint32_t graph_signature; uint32_t graph_signature;
unsigned char graph_version, hash_version; unsigned char graph_version, hash_version;
@ -326,24 +323,26 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
graph->data = graph_map; graph->data = graph_map;
graph->data_len = graph_size; graph->data_len = graph_size;
last_chunk_id = 0; if (graph_size < GRAPH_HEADER_SIZE +
last_chunk_offset = 8; (graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH +
GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
error(_("commit-graph file is too small to hold %u chunks"),
graph->num_chunks);
free(graph);
return NULL;
}
chunk_lookup = data + 8; chunk_lookup = data + 8;
next_chunk_offset = get_be64(chunk_lookup + 4);
for (i = 0; i < graph->num_chunks; i++) { for (i = 0; i < graph->num_chunks; i++) {
uint32_t chunk_id; uint32_t chunk_id;
uint64_t chunk_offset; uint64_t chunk_offset = next_chunk_offset;
int chunk_repeated = 0; int chunk_repeated = 0;
if (data + graph_size - chunk_lookup <
GRAPH_CHUNKLOOKUP_WIDTH) {
error(_("commit-graph chunk lookup table entry missing; file may be incomplete"));
goto free_and_return;
}
chunk_id = get_be32(chunk_lookup + 0); chunk_id = get_be32(chunk_lookup + 0);
chunk_offset = get_be64(chunk_lookup + 4);
chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH; chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
next_chunk_offset = get_be64(chunk_lookup + 4);
if (chunk_offset > graph_size - the_hash_algo->rawsz) { if (chunk_offset > graph_size - the_hash_algo->rawsz) {
error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32), error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
@ -362,8 +361,11 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
case GRAPH_CHUNKID_OIDLOOKUP: case GRAPH_CHUNKID_OIDLOOKUP:
if (graph->chunk_oid_lookup) if (graph->chunk_oid_lookup)
chunk_repeated = 1; chunk_repeated = 1;
else else {
graph->chunk_oid_lookup = data + chunk_offset; graph->chunk_oid_lookup = data + chunk_offset;
graph->num_commits = (next_chunk_offset - chunk_offset)
/ graph->hash_len;
}
break; break;
case GRAPH_CHUNKID_DATA: case GRAPH_CHUNKID_DATA:
@ -417,15 +419,6 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
error(_("commit-graph chunk id %08x appears multiple times"), chunk_id); error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
goto free_and_return; goto free_and_return;
} }
if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
{
graph->num_commits = (chunk_offset - last_chunk_offset)
/ graph->hash_len;
}
last_chunk_id = chunk_id;
last_chunk_offset = chunk_offset;
} }
if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) { if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
@ -1586,17 +1579,22 @@ static int write_graph_chunk_base(struct hashfile *f,
return 0; return 0;
} }
struct chunk_info {
uint32_t id;
uint64_t size;
};
static int write_commit_graph_file(struct write_commit_graph_context *ctx) static int write_commit_graph_file(struct write_commit_graph_context *ctx)
{ {
uint32_t i; uint32_t i;
int fd; int fd;
struct hashfile *f; struct hashfile *f;
struct lock_file lk = LOCK_INIT; struct lock_file lk = LOCK_INIT;
uint32_t chunk_ids[MAX_NUM_CHUNKS + 1]; struct chunk_info chunks[MAX_NUM_CHUNKS + 1];
uint64_t chunk_offsets[MAX_NUM_CHUNKS + 1];
const unsigned hashsz = the_hash_algo->rawsz; const unsigned hashsz = the_hash_algo->rawsz;
struct strbuf progress_title = STRBUF_INIT; struct strbuf progress_title = STRBUF_INIT;
int num_chunks = 3; int num_chunks = 3;
uint64_t chunk_offset;
struct object_id file_hash; struct object_id file_hash;
const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
@ -1644,51 +1642,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
} }
chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; chunks[0].size = GRAPH_FANOUT_SIZE;
chunk_ids[2] = GRAPH_CHUNKID_DATA; chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
chunks[1].size = hashsz * ctx->commits.nr;
chunks[2].id = GRAPH_CHUNKID_DATA;
chunks[2].size = (hashsz + 16) * ctx->commits.nr;
if (ctx->num_extra_edges) { if (ctx->num_extra_edges) {
chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
chunks[num_chunks].size = 4 * ctx->num_extra_edges;
num_chunks++; num_chunks++;
} }
if (ctx->changed_paths) { if (ctx->changed_paths) {
chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMINDEXES; chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
num_chunks++; num_chunks++;
chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMDATA; chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
chunks[num_chunks].size = sizeof(uint32_t) * 3
+ ctx->total_bloom_filter_data_size;
num_chunks++; num_chunks++;
} }
if (ctx->num_commit_graphs_after > 1) { if (ctx->num_commit_graphs_after > 1) {
chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE; chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
num_chunks++; num_chunks++;
} }
chunk_ids[num_chunks] = 0; chunks[num_chunks].id = 0;
chunks[num_chunks].size = 0;
chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr;
chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr;
num_chunks = 3;
if (ctx->num_extra_edges) {
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
4 * ctx->num_extra_edges;
num_chunks++;
}
if (ctx->changed_paths) {
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
sizeof(uint32_t) * ctx->commits.nr;
num_chunks++;
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size;
num_chunks++;
}
if (ctx->num_commit_graphs_after > 1) {
chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
hashsz * (ctx->num_commit_graphs_after - 1);
num_chunks++;
}
hashwrite_be32(f, GRAPH_SIGNATURE); hashwrite_be32(f, GRAPH_SIGNATURE);
@ -1697,13 +1678,16 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
hashwrite_u8(f, num_chunks); hashwrite_u8(f, num_chunks);
hashwrite_u8(f, ctx->num_commit_graphs_after - 1); hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
for (i = 0; i <= num_chunks; i++) { for (i = 0; i <= num_chunks; i++) {
uint32_t chunk_write[3]; uint32_t chunk_write[3];
chunk_write[0] = htonl(chunk_ids[i]); chunk_write[0] = htonl(chunks[i].id);
chunk_write[1] = htonl(chunk_offsets[i] >> 32); chunk_write[1] = htonl(chunk_offset >> 32);
chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff); chunk_write[2] = htonl(chunk_offset & 0xffffffff);
hashwrite(f, chunk_write, 12); hashwrite(f, chunk_write, 12);
chunk_offset += chunks[i].size;
} }
if (ctx->report_progress) { if (ctx->report_progress) {

View File

@ -2,9 +2,6 @@
#define COMMIT_GRAPH_H #define COMMIT_GRAPH_H
#include "git-compat-util.h" #include "git-compat-util.h"
#include "repository.h"
#include "string-list.h"
#include "cache.h"
#include "object-store.h" #include "object-store.h"
#include "oidset.h" #include "oidset.h"
@ -23,6 +20,9 @@ void git_test_write_commit_graph_or_die(void);
struct commit; struct commit;
struct bloom_filter_settings; struct bloom_filter_settings;
struct repository;
struct raw_object_store;
struct string_list;
char *get_commit_graph_filename(struct object_directory *odb); char *get_commit_graph_filename(struct object_directory *odb);
int open_commit_graph(const char *graph_file, int *fd, struct stat *st); int open_commit_graph(const char *graph_file, int *fd, struct stat *st);

View File

@ -32,6 +32,7 @@ struct slabname { \
void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \ void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \
void init_ ##slabname(struct slabname *s); \ void init_ ##slabname(struct slabname *s); \
void clear_ ##slabname(struct slabname *s); \ void clear_ ##slabname(struct slabname *s); \
void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \
elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \ elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \
elemtype *slabname## _at(struct slabname *s, const struct commit *c); \ elemtype *slabname## _at(struct slabname *s, const struct commit *c); \
elemtype *slabname## _peek(struct slabname *s, const struct commit *c) elemtype *slabname## _peek(struct slabname *s, const struct commit *c)

View File

@ -38,6 +38,19 @@ scope void clear_ ##slabname(struct slabname *s) \
FREE_AND_NULL(s->slab); \ FREE_AND_NULL(s->slab); \
} \ } \
\ \
scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \
{ \
unsigned int i; \
for (i = 0; i < s->slab_count; i++) { \
unsigned int j; \
if (!s->slab[i]) \
continue; \
for (j = 0; j < s->slab_size; j++) \
free_fn(&s->slab[i][j * s->stride]); \
} \
clear_ ##slabname(s); \
} \
\
scope elemtype *slabname## _at_peek(struct slabname *s, \ scope elemtype *slabname## _at_peek(struct slabname *s, \
const struct commit *c, \ const struct commit *c, \
int add_if_missing) \ int add_if_missing) \

View File

@ -47,6 +47,16 @@
* *
* Call this function before the slab falls out of scope to avoid * Call this function before the slab falls out of scope to avoid
* leaking memory. * leaking memory.
*
* - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*))
*
* Empties the slab, similar to clear_indegree(), but in addition it
* calls the given 'free_fn' for each slab entry to release any
* additional memory that might be owned by the entry (but not the
* entry itself!).
* Note that 'free_fn' might be called even for entries for which no
* indegree_at() call has been made; in this case 'free_fn' is invoked
* with a pointer to a zero-initialized location.
*/ */
#define define_commit_slab(slabname, elemtype) \ #define define_commit_slab(slabname, elemtype) \

10
diff.h
View File

@ -431,11 +431,11 @@ struct combine_diff_path *diff_tree_paths(
struct combine_diff_path *p, const struct object_id *oid, struct combine_diff_path *p, const struct object_id *oid,
const struct object_id **parents_oid, int nparent, const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt); struct strbuf *base, struct diff_options *opt);
int diff_tree_oid(const struct object_id *old_oid, void diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid, const struct object_id *new_oid,
const char *base, struct diff_options *opt); const char *base, struct diff_options *opt);
int diff_root_tree_oid(const struct object_id *new_oid, const char *base, void diff_root_tree_oid(const struct object_id *new_oid, const char *base,
struct diff_options *opt); struct diff_options *opt);
struct combine_diff_path { struct combine_diff_path {
struct combine_diff_path *next; struct combine_diff_path *next;

View File

@ -791,9 +791,7 @@ static int rev_compare_tree(struct rev_info *revs,
tree_difference = REV_TREE_SAME; tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0; revs->pruning.flags.has_changes = 0;
if (diff_tree_oid(&t1->object.oid, &t2->object.oid, "", diff_tree_oid(&t1->object.oid, &t2->object.oid, "", &revs->pruning);
&revs->pruning) < 0)
return REV_TREE_DIFFERENT;
if (!nth_parent) if (!nth_parent)
if (bloom_ret == 1 && tree_difference == REV_TREE_SAME) if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
@ -804,7 +802,6 @@ static int rev_compare_tree(struct rev_info *revs,
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit) static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
{ {
int retval;
struct tree *t1 = get_commit_tree(commit); struct tree *t1 = get_commit_tree(commit);
if (!t1) if (!t1)
@ -812,9 +809,9 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
tree_difference = REV_TREE_SAME; tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0; revs->pruning.flags.has_changes = 0;
retval = diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning); diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
return retval >= 0 && (tree_difference == REV_TREE_SAME); return tree_difference == REV_TREE_SAME;
} }
struct treesame_state { struct treesame_state {

View File

@ -110,6 +110,10 @@ void rollback_shallow_file(struct repository *r, struct shallow_lock *lk)
* supports a "valid" flag. * supports a "valid" flag.
*/ */
define_commit_slab(commit_depth, int *); define_commit_slab(commit_depth, int *);
static void free_depth_in_slab(int **ptr)
{
FREE_AND_NULL(*ptr);
}
struct commit_list *get_shallow_commits(struct object_array *heads, int depth, struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
int shallow_flag, int not_shallow_flag) int shallow_flag, int not_shallow_flag)
{ {
@ -176,15 +180,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
} }
} }
} }
for (i = 0; i < depths.slab_count; i++) { deep_clear_commit_depth(&depths, free_depth_in_slab);
int j;
if (!depths.slab[i])
continue;
for (j = 0; j < depths.slab_size; j++)
free(depths.slab[i][j]);
}
clear_commit_depth(&depths);
return result; return result;
} }

View File

@ -125,7 +125,9 @@ test_expect_success 'setup submodules' '
test_expect_success 'diff-tree ignores trailing slash on submodule path' ' test_expect_success 'diff-tree ignores trailing slash on submodule path' '
git diff --name-only HEAD^ HEAD submod >expect && git diff --name-only HEAD^ HEAD submod >expect &&
git diff --name-only HEAD^ HEAD submod/ >actual && git diff --name-only HEAD^ HEAD submod/ >actual &&
test_cmp expect actual test_cmp expect actual &&
git diff --name-only HEAD^ HEAD -- submod/whatever >actual &&
test_must_be_empty actual
' '
test_expect_success 'diff multiple wildcard pathspecs' ' test_expect_success 'diff multiple wildcard pathspecs' '

View File

@ -529,7 +529,7 @@ test_expect_success 'detect bad hash version' '
' '
test_expect_success 'detect low chunk count' ' test_expect_success 'detect low chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \ corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \
"missing the .* chunk" "missing the .* chunk"
' '
@ -615,7 +615,8 @@ test_expect_success 'detect invalid checksum hash' '
test_expect_success 'detect incorrect chunk count' ' test_expect_success 'detect incorrect chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \ corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \
"chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET "commit-graph file is too small to hold [0-9]* chunks" \
$GRAPH_CHUNK_LOOKUP_OFFSET
' '
test_expect_success 'git fsck (checks commit-graph)' ' test_expect_success 'git fsck (checks commit-graph)' '

View File

@ -29,9 +29,9 @@ static struct combine_diff_path *ll_diff_tree_paths(
struct combine_diff_path *p, const struct object_id *oid, struct combine_diff_path *p, const struct object_id *oid,
const struct object_id **parents_oid, int nparent, const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt); struct strbuf *base, struct diff_options *opt);
static int ll_diff_tree_oid(const struct object_id *old_oid, static void ll_diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid, const struct object_id *new_oid,
struct strbuf *base, struct diff_options *opt); struct strbuf *base, struct diff_options *opt);
/* /*
* Compare two tree entries, taking into account only path/S_ISDIR(mode), * Compare two tree entries, taking into account only path/S_ISDIR(mode),
@ -679,9 +679,9 @@ static void try_to_follow_renames(const struct object_id *old_oid,
q->nr = 1; q->nr = 1;
} }
static int ll_diff_tree_oid(const struct object_id *old_oid, static void ll_diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid, const struct object_id *new_oid,
struct strbuf *base, struct diff_options *opt) struct strbuf *base, struct diff_options *opt)
{ {
struct combine_diff_path phead, *p; struct combine_diff_path phead, *p;
pathchange_fn_t pathchange_old = opt->pathchange; pathchange_fn_t pathchange_old = opt->pathchange;
@ -697,29 +697,27 @@ static int ll_diff_tree_oid(const struct object_id *old_oid,
} }
opt->pathchange = pathchange_old; opt->pathchange = pathchange_old;
return 0;
} }
int diff_tree_oid(const struct object_id *old_oid, void diff_tree_oid(const struct object_id *old_oid,
const struct object_id *new_oid, const struct object_id *new_oid,
const char *base_str, struct diff_options *opt) const char *base_str, struct diff_options *opt)
{ {
struct strbuf base; struct strbuf base;
int retval;
strbuf_init(&base, PATH_MAX); strbuf_init(&base, PATH_MAX);
strbuf_addstr(&base, base_str); strbuf_addstr(&base, base_str);
retval = ll_diff_tree_oid(old_oid, new_oid, &base, opt); ll_diff_tree_oid(old_oid, new_oid, &base, opt);
if (!*base_str && opt->flags.follow_renames && diff_might_be_rename()) if (!*base_str && opt->flags.follow_renames && diff_might_be_rename())
try_to_follow_renames(old_oid, new_oid, &base, opt); try_to_follow_renames(old_oid, new_oid, &base, opt);
strbuf_release(&base); strbuf_release(&base);
return retval;
} }
int diff_root_tree_oid(const struct object_id *new_oid, const char *base, struct diff_options *opt) void diff_root_tree_oid(const struct object_id *new_oid,
const char *base,
struct diff_options *opt)
{ {
return diff_tree_oid(NULL, new_oid, base, opt); diff_tree_oid(NULL, new_oid, base, opt);
} }

View File

@ -851,7 +851,14 @@ static int match_entry(const struct pathspec_item *item,
if (matchlen > pathlen) { if (matchlen > pathlen) {
if (match[pathlen] != '/') if (match[pathlen] != '/')
return 0; return 0;
if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode)) /*
* Reject non-directories as partial pathnames, except
* when match is a submodule with a trailing slash and
* nothing else (to handle 'submod/' and 'submod'
* uniformly).
*/
if (!S_ISDIR(entry->mode) &&
(!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1))
return 0; return 0;
} }