Merge branch 'jt/transfer-fsck-across-packs'

The approach to "fsck" the incoming objects in "index-pack" is
attractive for performance reasons (we have them already in core,
inflated and ready to be inspected), but fundamentally cannot be
applied fully when we receive more than one pack stream, as a tree
object in one pack may refer to a blob object in another pack as
".gitmodules", when we want to inspect blobs that are used as
".gitmodules" file, for example.  Teach "index-pack" to emit
objects that must be inspected later and check them in the calling
"fetch-pack" process.

* jt/transfer-fsck-across-packs:
  fetch-pack: print and use dangling .gitmodules
  fetch-pack: with packfile URIs, use index-pack arg
  http-fetch: allow custom index-pack args
  http: allow custom index-pack args
This commit is contained in:
Junio C Hamano 2021-03-01 14:02:57 -08:00
commit 6ee353d42f
14 changed files with 229 additions and 51 deletions

View File

@ -41,11 +41,17 @@ commit-id::
<commit-id>['\t'<filename-as-in--w>]
--packfile=<hash>::
Instead of a commit id on the command line (which is not expected in
For internal use only. Instead of a commit id on the command
line (which is not expected in
this case), 'git http-fetch' fetches the packfile directly at the given
URL and uses index-pack to generate corresponding .idx and .keep files.
The hash is used to determine the name of the temporary file and is
arbitrary. The output of index-pack is printed to stdout.
arbitrary. The output of index-pack is printed to stdout. Requires
--index-pack-args.
--index-pack-args=<args>::
For internal use only. The command to run on the contents of the
downloaded pack. Arguments are URL-encoded separated by spaces.
--recover::
Verify that everything reachable from target is fetched. Used after

View File

@ -86,7 +86,12 @@ OPTIONS
Die if the pack contains broken links. For internal use only.
--fsck-objects::
Die if the pack contains broken objects. For internal use only.
For internal use only.
+
Die if the pack contains broken objects. If the pack contains a tree
pointing to a .gitmodules blob that does not exist, prints the hash of
that blob (for the caller to check) after the hash that goes into the
name of the pack/idx file (see "Notes").
--threads=<n>::
Specifies the number of threads to spawn when resolving

View File

@ -1712,6 +1712,22 @@ static void show_pack_info(int stat_only)
}
}
static int print_dangling_gitmodules(struct fsck_options *o,
const struct object_id *oid,
enum object_type object_type,
int msg_type, const char *message)
{
/*
* NEEDSWORK: Plumb the MSG_ID (from fsck.c) here and use it
* instead of relying on this string check.
*/
if (starts_with(message, "gitmodulesMissing")) {
printf("%s\n", oid_to_hex(oid));
return 0;
}
return fsck_error_function(o, oid, object_type, msg_type, message);
}
int cmd_index_pack(int argc, const char **argv, const char *prefix)
{
int i, fix_thin_pack = 0, verify = 0, stat_only = 0, rev_index;
@ -1932,8 +1948,13 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
else
close(input_fd);
if (do_fsck_object && fsck_finish(&fsck_options))
die(_("fsck error in pack objects"));
if (do_fsck_object) {
struct fsck_options fo = fsck_options;
fo.error_func = print_dangling_gitmodules;
if (fsck_finish(&fo))
die(_("fsck error in pack objects"));
}
free(objects);
strbuf_release(&index_name_buf);

View File

@ -2275,7 +2275,7 @@ static const char *unpack(int err_fd, struct shallow_info *si)
status = start_command(&child);
if (status)
return "index-pack fork failed";
pack_lockfile = index_pack_lockfile(child.out);
pack_lockfile = index_pack_lockfile(child.out, NULL);
close(child.out);
status = finish_command(&child);
if (status)

View File

@ -790,14 +790,36 @@ static void create_promisor_file(const char *keep_name,
strbuf_release(&promisor_name);
}
static void parse_gitmodules_oids(int fd, struct oidset *gitmodules_oids)
{
int len = the_hash_algo->hexsz + 1; /* hash + NL */
do {
char hex_hash[GIT_MAX_HEXSZ + 1];
int read_len = read_in_full(fd, hex_hash, len);
struct object_id oid;
const char *end;
if (!read_len)
return;
if (read_len != len)
die("invalid length read %d", read_len);
if (parse_oid_hex(hex_hash, &oid, &end) || *end != '\n')
die("invalid hash");
oidset_insert(gitmodules_oids, &oid);
} while (1);
}
/*
* Pass 1 as "only_packfile" if the pack received is the only pack in this
* fetch request (that is, if there were no packfile URIs provided).
* If packfile URIs were provided, pass a non-NULL pointer to index_pack_args.
* The strings to pass as the --index-pack-arg arguments to http-fetch will be
* stored there. (It must be freed by the caller.)
*/
static int get_pack(struct fetch_pack_args *args,
int xd[2], struct string_list *pack_lockfiles,
int only_packfile,
struct ref **sought, int nr_sought)
struct strvec *index_pack_args,
struct ref **sought, int nr_sought,
struct oidset *gitmodules_oids)
{
struct async demux;
int do_keep = args->keep_pack;
@ -805,6 +827,7 @@ static int get_pack(struct fetch_pack_args *args,
struct pack_header header;
int pass_header = 0;
struct child_process cmd = CHILD_PROCESS_INIT;
int fsck_objects = 0;
int ret;
memset(&demux, 0, sizeof(demux));
@ -839,8 +862,15 @@ static int get_pack(struct fetch_pack_args *args,
strvec_push(&cmd.args, alternate_shallow_file);
}
if (do_keep || args->from_promisor) {
if (pack_lockfiles)
if (fetch_fsck_objects >= 0
? fetch_fsck_objects
: transfer_fsck_objects >= 0
? transfer_fsck_objects
: 0)
fsck_objects = 1;
if (do_keep || args->from_promisor || index_pack_args || fsck_objects) {
if (pack_lockfiles || fsck_objects)
cmd.out = -1;
cmd_name = "index-pack";
strvec_push(&cmd.args, cmd_name);
@ -857,7 +887,7 @@ static int get_pack(struct fetch_pack_args *args,
"--keep=fetch-pack %"PRIuMAX " on %s",
(uintmax_t)getpid(), hostname);
}
if (only_packfile && args->check_self_contained_and_connected)
if (!index_pack_args && args->check_self_contained_and_connected)
strvec_push(&cmd.args, "--check-self-contained-and-connected");
else
/*
@ -890,12 +920,8 @@ static int get_pack(struct fetch_pack_args *args,
strvec_pushf(&cmd.args, "--pack_header=%"PRIu32",%"PRIu32,
ntohl(header.hdr_version),
ntohl(header.hdr_entries));
if (fetch_fsck_objects >= 0
? fetch_fsck_objects
: transfer_fsck_objects >= 0
? transfer_fsck_objects
: 0) {
if (args->from_promisor || !only_packfile)
if (fsck_objects) {
if (args->from_promisor || index_pack_args)
/*
* We cannot use --strict in index-pack because it
* checks both broken objects and links, but we only
@ -907,14 +933,26 @@ static int get_pack(struct fetch_pack_args *args,
fsck_msg_types.buf);
}
if (index_pack_args) {
int i;
for (i = 0; i < cmd.args.nr; i++)
strvec_push(index_pack_args, cmd.args.v[i]);
}
cmd.in = demux.out;
cmd.git_cmd = 1;
if (start_command(&cmd))
die(_("fetch-pack: unable to fork off %s"), cmd_name);
if (do_keep && pack_lockfiles) {
char *pack_lockfile = index_pack_lockfile(cmd.out);
if (do_keep && (pack_lockfiles || fsck_objects)) {
int is_well_formed;
char *pack_lockfile = index_pack_lockfile(cmd.out, &is_well_formed);
if (!is_well_formed)
die(_("fetch-pack: invalid index-pack output"));
if (pack_lockfile)
string_list_append_nodup(pack_lockfiles, pack_lockfile);
parse_gitmodules_oids(cmd.out, gitmodules_oids);
close(cmd.out);
}
@ -949,6 +987,22 @@ static int cmp_ref_by_name(const void *a_, const void *b_)
return strcmp(a->name, b->name);
}
static void fsck_gitmodules_oids(struct oidset *gitmodules_oids)
{
struct oidset_iter iter;
const struct object_id *oid;
struct fsck_options fo = FSCK_OPTIONS_STRICT;
if (!oidset_size(gitmodules_oids))
return;
oidset_iter_init(gitmodules_oids, &iter);
while ((oid = oidset_iter_next(&iter)))
register_found_gitmodules(oid);
if (fsck_finish(&fo))
die("fsck failed");
}
static struct ref *do_fetch_pack(struct fetch_pack_args *args,
int fd[2],
const struct ref *orig_ref,
@ -963,6 +1017,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
int agent_len;
struct fetch_negotiator negotiator_alloc;
struct fetch_negotiator *negotiator;
struct oidset gitmodules_oids = OIDSET_INIT;
negotiator = &negotiator_alloc;
fetch_negotiator_init(r, negotiator);
@ -1078,8 +1133,10 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
alternate_shallow_file = setup_temporary_shallow(si->shallow);
else
alternate_shallow_file = NULL;
if (get_pack(args, fd, pack_lockfiles, 1, sought, nr_sought))
if (get_pack(args, fd, pack_lockfiles, NULL, sought, nr_sought,
&gitmodules_oids))
die(_("git fetch-pack: fetch failed."));
fsck_gitmodules_oids(&gitmodules_oids);
all_done:
if (negotiator)
@ -1529,6 +1586,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
int seen_ack = 0;
struct string_list packfile_uris = STRING_LIST_INIT_DUP;
int i;
struct strvec index_pack_args = STRVEC_INIT;
struct oidset gitmodules_oids = OIDSET_INIT;
negotiator = &negotiator_alloc;
fetch_negotiator_init(r, negotiator);
@ -1618,7 +1677,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
receive_packfile_uris(&reader, &packfile_uris);
process_section_header(&reader, "packfile", 0);
if (get_pack(args, fd, pack_lockfiles,
!packfile_uris.nr, sought, nr_sought))
packfile_uris.nr ? &index_pack_args : NULL,
sought, nr_sought, &gitmodules_oids))
die(_("git fetch-pack: fetch failed."));
do_check_stateless_delimiter(args, &reader);
@ -1630,6 +1690,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
}
for (i = 0; i < packfile_uris.nr; i++) {
int j;
struct child_process cmd = CHILD_PROCESS_INIT;
char packname[GIT_MAX_HEXSZ + 1];
const char *uri = packfile_uris.items[i].string +
@ -1639,6 +1700,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
strvec_pushf(&cmd.args, "--packfile=%.*s",
(int) the_hash_algo->hexsz,
packfile_uris.items[i].string);
for (j = 0; j < index_pack_args.nr; j++)
strvec_pushf(&cmd.args, "--index-pack-arg=%s",
index_pack_args.v[j]);
strvec_push(&cmd.args, uri);
cmd.git_cmd = 1;
cmd.no_stdin = 1;
@ -1657,6 +1721,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
packname[the_hash_algo->hexsz] = '\0';
parse_gitmodules_oids(cmd.out, &gitmodules_oids);
close(cmd.out);
if (finish_command(&cmd))
@ -1674,6 +1740,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
packname));
}
string_list_clear(&packfile_uris, 0);
strvec_clear(&index_pack_args);
fsck_gitmodules_oids(&gitmodules_oids);
if (negotiator)
negotiator->release(negotiator);

5
fsck.c
View File

@ -1276,6 +1276,11 @@ int fsck_error_function(struct fsck_options *o,
return 1;
}
void register_found_gitmodules(const struct object_id *oid)
{
oidset_insert(&gitmodules_found, oid);
}
int fsck_finish(struct fsck_options *options)
{
int ret = 0;

2
fsck.h
View File

@ -62,6 +62,8 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
int fsck_object(struct object *obj, void *data, unsigned long size,
struct fsck_options *options);
void register_found_gitmodules(const struct object_id *oid);
/*
* fsck a tag, and pass info about it back to the caller. This is
* exposed fsck_object() internals for git-mktag(1).

View File

@ -3,6 +3,7 @@
#include "exec-cmd.h"
#include "http.h"
#include "walker.h"
#include "strvec.h"
static const char http_fetch_usage[] = "git http-fetch "
"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile=hash | commit-id] url";
@ -44,7 +45,8 @@ static int fetch_using_walker(const char *raw_url, int get_verbosely,
}
static void fetch_single_packfile(struct object_id *packfile_hash,
const char *url) {
const char *url,
const char **index_pack_args) {
struct http_pack_request *preq;
struct slot_results results;
int ret;
@ -55,7 +57,8 @@ static void fetch_single_packfile(struct object_id *packfile_hash,
if (preq == NULL)
die("couldn't create http pack request");
preq->slot->results = &results;
preq->generate_keep = 1;
preq->index_pack_args = index_pack_args;
preq->preserve_index_pack_stdout = 1;
if (start_active_slot(preq->slot)) {
run_active_slot(preq->slot);
@ -86,6 +89,7 @@ int cmd_main(int argc, const char **argv)
int packfile = 0;
int nongit;
struct object_id packfile_hash;
struct strvec index_pack_args = STRVEC_INIT;
setup_git_directory_gently(&nongit);
@ -112,6 +116,8 @@ int cmd_main(int argc, const char **argv)
packfile = 1;
if (parse_oid_hex(p, &packfile_hash, &end) || *end)
die(_("argument to --packfile must be a valid hash (got '%s')"), p);
} else if (skip_prefix(argv[arg], "--index-pack-arg=", &p)) {
strvec_push(&index_pack_args, p);
}
arg++;
}
@ -124,10 +130,18 @@ int cmd_main(int argc, const char **argv)
git_config(git_default_config, NULL);
if (packfile) {
fetch_single_packfile(&packfile_hash, argv[arg]);
if (!index_pack_args.nr)
die(_("--packfile requires --index-pack-args"));
fetch_single_packfile(&packfile_hash, argv[arg],
index_pack_args.v);
return 0;
}
if (index_pack_args.nr)
die(_("--index-pack-args can only be used with --packfile"));
if (commits_on_stdin) {
commits = walker_targets_stdin(&commit_id, &write_ref);
} else {

15
http.c
View File

@ -2259,6 +2259,9 @@ void release_http_pack_request(struct http_pack_request *preq)
free(preq);
}
static const char *default_index_pack_args[] =
{"index-pack", "--stdin", NULL};
int finish_http_pack_request(struct http_pack_request *preq)
{
struct child_process ip = CHILD_PROCESS_INIT;
@ -2270,17 +2273,15 @@ int finish_http_pack_request(struct http_pack_request *preq)
tmpfile_fd = xopen(preq->tmpfile.buf, O_RDONLY);
strvec_push(&ip.args, "index-pack");
strvec_push(&ip.args, "--stdin");
ip.git_cmd = 1;
ip.in = tmpfile_fd;
if (preq->generate_keep) {
strvec_pushf(&ip.args, "--keep=git %"PRIuMAX,
(uintmax_t)getpid());
ip.argv = preq->index_pack_args ? preq->index_pack_args
: default_index_pack_args;
if (preq->preserve_index_pack_stdout)
ip.out = 0;
} else {
else
ip.no_stdout = 1;
}
if (run_command(&ip)) {
ret = -1;

10
http.h
View File

@ -218,12 +218,12 @@ struct http_pack_request {
char *url;
/*
* If this is true, finish_http_pack_request() will pass "--keep" to
* index-pack, resulting in the creation of a keep file, and will not
* suppress its stdout (that is, the "keep\t<hash>\n" line will be
* printed to stdout).
* index-pack command to run. Must be terminated by NULL.
*
* If NULL, defaults to {"index-pack", "--stdin", NULL}.
*/
unsigned generate_keep : 1;
const char **index_pack_args;
unsigned preserve_index_pack_stdout : 1;
FILE *packfile;
struct strbuf tmpfile;

View File

@ -380,7 +380,7 @@ void fixup_pack_header_footer(int pack_fd,
fsync_or_die(pack_fd, pack_name);
}
char *index_pack_lockfile(int ip_out)
char *index_pack_lockfile(int ip_out, int *is_well_formed)
{
char packname[GIT_MAX_HEXSZ + 6];
const int len = the_hash_algo->hexsz + 6;
@ -394,11 +394,17 @@ char *index_pack_lockfile(int ip_out)
*/
if (read_in_full(ip_out, packname, len) == len && packname[len-1] == '\n') {
const char *name;
if (is_well_formed)
*is_well_formed = 1;
packname[len-1] = 0;
if (skip_prefix(packname, "keep\t", &name))
return xstrfmt("%s/pack/pack-%s.keep",
get_object_directory(), name);
return NULL;
}
if (is_well_formed)
*is_well_formed = 0;
return NULL;
}

2
pack.h
View File

@ -87,7 +87,7 @@ int verify_pack_index(struct packed_git *);
int verify_pack(struct repository *, struct packed_git *, verify_fn fn, struct progress *, uint32_t);
off_t write_pack_header(struct hashfile *f, uint32_t);
void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t);
char *index_pack_lockfile(int fd);
char *index_pack_lockfile(int fd, int *is_well_formed);
struct ref;

View File

@ -227,7 +227,10 @@ test_expect_success 'http-fetch --packfile' '
git init packfileclient &&
p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) &&
git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
git -C packfileclient http-fetch --packfile=$ARBITRARY \
--index-pack-arg=index-pack --index-pack-arg=--stdin \
--index-pack-arg=--keep \
"$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
grep "^keep.[0-9a-f]\{16,\}$" out &&
cut -c6- out >packhash &&

View File

@ -875,11 +875,10 @@ test_expect_success 'part of packfile response provided as URI' '
test -f hfound &&
test -f h2found &&
# Ensure that there are exactly 6 files (3 .pack and 3 .idx).
ls http_child/.git/objects/pack/*.pack >packlist &&
ls http_child/.git/objects/pack/*.idx >idxlist &&
test_line_count = 3 idxlist &&
test_line_count = 3 packlist
# Ensure that there are exactly 3 packfiles with associated .idx
ls http_child/.git/objects/pack/*.pack \
http_child/.git/objects/pack/*.idx >filelist &&
test_line_count = 6 filelist
'
test_expect_success 'fetching with valid packfile URI but invalid hash fails' '
@ -931,11 +930,10 @@ test_expect_success 'packfile-uri with transfer.fsckobjects' '
-c fetch.uriprotocols=http,https \
clone "$HTTPD_URL/smart/http_parent" http_child &&
# Ensure that there are exactly 4 files (2 .pack and 2 .idx).
ls http_child/.git/objects/pack/*.pack >packlist &&
ls http_child/.git/objects/pack/*.idx >idxlist &&
test_line_count = 2 idxlist &&
test_line_count = 2 packlist
# Ensure that there are exactly 2 packfiles with associated .idx
ls http_child/.git/objects/pack/*.pack \
http_child/.git/objects/pack/*.idx >filelist &&
test_line_count = 4 filelist
'
test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object' '
@ -968,6 +966,54 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
test_i18ngrep "invalid author/committer line - missing email" error
'
test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmodules is separate from tree' '
P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
rm -rf "$P" http_child &&
git init "$P" &&
git -C "$P" config "uploadpack.allowsidebandall" "true" &&
echo "[submodule libfoo]" >"$P/.gitmodules" &&
echo "path = include/foo" >>"$P/.gitmodules" &&
echo "url = git://example.com/git/lib.git" >>"$P/.gitmodules" &&
git -C "$P" add .gitmodules &&
git -C "$P" commit -m x &&
configure_exclusion "$P" .gitmodules >h &&
sane_unset GIT_TEST_SIDEBAND_ALL &&
git -c protocol.version=2 -c transfer.fsckobjects=1 \
-c fetch.uriprotocols=http,https \
clone "$HTTPD_URL/smart/http_parent" http_child &&
# Ensure that there are exactly 2 packfiles with associated .idx
ls http_child/.git/objects/pack/*.pack \
http_child/.git/objects/pack/*.idx >filelist &&
test_line_count = 4 filelist
'
test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodules separate from tree is invalid' '
P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
rm -rf "$P" http_child err &&
git init "$P" &&
git -C "$P" config "uploadpack.allowsidebandall" "true" &&
echo "[submodule \"..\"]" >"$P/.gitmodules" &&
echo "path = include/foo" >>"$P/.gitmodules" &&
echo "url = git://example.com/git/lib.git" >>"$P/.gitmodules" &&
git -C "$P" add .gitmodules &&
git -C "$P" commit -m x &&
configure_exclusion "$P" .gitmodules >h &&
sane_unset GIT_TEST_SIDEBAND_ALL &&
test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \
-c fetch.uriprotocols=http,https \
clone "$HTTPD_URL/smart/http_parent" http_child 2>err &&
test_i18ngrep "disallowed submodule name" err
'
# DO NOT add non-httpd-specific tests here, because the last part of this
# test script is only executed when httpd is available and enabled.