diff --git a/Documentation/git-http-fetch.txt b/Documentation/git-http-fetch.txt index 4deb4893f5..9fa17b60e4 100644 --- a/Documentation/git-http-fetch.txt +++ b/Documentation/git-http-fetch.txt @@ -41,11 +41,17 @@ commit-id:: ['\t'] --packfile=:: - Instead of a commit id on the command line (which is not expected in + For internal use only. Instead of a commit id on the command + line (which is not expected in this case), 'git http-fetch' fetches the packfile directly at the given URL and uses index-pack to generate corresponding .idx and .keep files. The hash is used to determine the name of the temporary file and is - arbitrary. The output of index-pack is printed to stdout. + arbitrary. The output of index-pack is printed to stdout. Requires + --index-pack-args. + +--index-pack-args=:: + For internal use only. The command to run on the contents of the + downloaded pack. Arguments are URL-encoded separated by spaces. --recover:: Verify that everything reachable from target is fetched. Used after diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt index 69ba904d44..7fa74b9e79 100644 --- a/Documentation/git-index-pack.txt +++ b/Documentation/git-index-pack.txt @@ -86,7 +86,12 @@ OPTIONS Die if the pack contains broken links. For internal use only. --fsck-objects:: - Die if the pack contains broken objects. For internal use only. + For internal use only. ++ +Die if the pack contains broken objects. If the pack contains a tree +pointing to a .gitmodules blob that does not exist, prints the hash of +that blob (for the caller to check) after the hash that goes into the +name of the pack/idx file (see "Notes"). --threads=:: Specifies the number of threads to spawn when resolving diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 54f74c4874..bad5748807 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1712,6 +1712,22 @@ static void show_pack_info(int stat_only) } } +static int print_dangling_gitmodules(struct fsck_options *o, + const struct object_id *oid, + enum object_type object_type, + int msg_type, const char *message) +{ + /* + * NEEDSWORK: Plumb the MSG_ID (from fsck.c) here and use it + * instead of relying on this string check. + */ + if (starts_with(message, "gitmodulesMissing")) { + printf("%s\n", oid_to_hex(oid)); + return 0; + } + return fsck_error_function(o, oid, object_type, msg_type, message); +} + int cmd_index_pack(int argc, const char **argv, const char *prefix) { int i, fix_thin_pack = 0, verify = 0, stat_only = 0, rev_index; @@ -1932,8 +1948,13 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) else close(input_fd); - if (do_fsck_object && fsck_finish(&fsck_options)) - die(_("fsck error in pack objects")); + if (do_fsck_object) { + struct fsck_options fo = fsck_options; + + fo.error_func = print_dangling_gitmodules; + if (fsck_finish(&fo)) + die(_("fsck error in pack objects")); + } free(objects); strbuf_release(&index_name_buf); diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index b89ce31bf2..d26040c477 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2275,7 +2275,7 @@ static const char *unpack(int err_fd, struct shallow_info *si) status = start_command(&child); if (status) return "index-pack fork failed"; - pack_lockfile = index_pack_lockfile(child.out); + pack_lockfile = index_pack_lockfile(child.out, NULL); close(child.out); status = finish_command(&child); if (status) diff --git a/fetch-pack.c b/fetch-pack.c index 1eaedcb5dc..0cb59acc48 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -790,14 +790,36 @@ static void create_promisor_file(const char *keep_name, strbuf_release(&promisor_name); } +static void parse_gitmodules_oids(int fd, struct oidset *gitmodules_oids) +{ + int len = the_hash_algo->hexsz + 1; /* hash + NL */ + + do { + char hex_hash[GIT_MAX_HEXSZ + 1]; + int read_len = read_in_full(fd, hex_hash, len); + struct object_id oid; + const char *end; + + if (!read_len) + return; + if (read_len != len) + die("invalid length read %d", read_len); + if (parse_oid_hex(hex_hash, &oid, &end) || *end != '\n') + die("invalid hash"); + oidset_insert(gitmodules_oids, &oid); + } while (1); +} + /* - * Pass 1 as "only_packfile" if the pack received is the only pack in this - * fetch request (that is, if there were no packfile URIs provided). + * If packfile URIs were provided, pass a non-NULL pointer to index_pack_args. + * The strings to pass as the --index-pack-arg arguments to http-fetch will be + * stored there. (It must be freed by the caller.) */ static int get_pack(struct fetch_pack_args *args, int xd[2], struct string_list *pack_lockfiles, - int only_packfile, - struct ref **sought, int nr_sought) + struct strvec *index_pack_args, + struct ref **sought, int nr_sought, + struct oidset *gitmodules_oids) { struct async demux; int do_keep = args->keep_pack; @@ -805,6 +827,7 @@ static int get_pack(struct fetch_pack_args *args, struct pack_header header; int pass_header = 0; struct child_process cmd = CHILD_PROCESS_INIT; + int fsck_objects = 0; int ret; memset(&demux, 0, sizeof(demux)); @@ -839,8 +862,15 @@ static int get_pack(struct fetch_pack_args *args, strvec_push(&cmd.args, alternate_shallow_file); } - if (do_keep || args->from_promisor) { - if (pack_lockfiles) + if (fetch_fsck_objects >= 0 + ? fetch_fsck_objects + : transfer_fsck_objects >= 0 + ? transfer_fsck_objects + : 0) + fsck_objects = 1; + + if (do_keep || args->from_promisor || index_pack_args || fsck_objects) { + if (pack_lockfiles || fsck_objects) cmd.out = -1; cmd_name = "index-pack"; strvec_push(&cmd.args, cmd_name); @@ -857,7 +887,7 @@ static int get_pack(struct fetch_pack_args *args, "--keep=fetch-pack %"PRIuMAX " on %s", (uintmax_t)getpid(), hostname); } - if (only_packfile && args->check_self_contained_and_connected) + if (!index_pack_args && args->check_self_contained_and_connected) strvec_push(&cmd.args, "--check-self-contained-and-connected"); else /* @@ -890,12 +920,8 @@ static int get_pack(struct fetch_pack_args *args, strvec_pushf(&cmd.args, "--pack_header=%"PRIu32",%"PRIu32, ntohl(header.hdr_version), ntohl(header.hdr_entries)); - if (fetch_fsck_objects >= 0 - ? fetch_fsck_objects - : transfer_fsck_objects >= 0 - ? transfer_fsck_objects - : 0) { - if (args->from_promisor || !only_packfile) + if (fsck_objects) { + if (args->from_promisor || index_pack_args) /* * We cannot use --strict in index-pack because it * checks both broken objects and links, but we only @@ -907,14 +933,26 @@ static int get_pack(struct fetch_pack_args *args, fsck_msg_types.buf); } + if (index_pack_args) { + int i; + + for (i = 0; i < cmd.args.nr; i++) + strvec_push(index_pack_args, cmd.args.v[i]); + } + cmd.in = demux.out; cmd.git_cmd = 1; if (start_command(&cmd)) die(_("fetch-pack: unable to fork off %s"), cmd_name); - if (do_keep && pack_lockfiles) { - char *pack_lockfile = index_pack_lockfile(cmd.out); + if (do_keep && (pack_lockfiles || fsck_objects)) { + int is_well_formed; + char *pack_lockfile = index_pack_lockfile(cmd.out, &is_well_formed); + + if (!is_well_formed) + die(_("fetch-pack: invalid index-pack output")); if (pack_lockfile) string_list_append_nodup(pack_lockfiles, pack_lockfile); + parse_gitmodules_oids(cmd.out, gitmodules_oids); close(cmd.out); } @@ -949,6 +987,22 @@ static int cmp_ref_by_name(const void *a_, const void *b_) return strcmp(a->name, b->name); } +static void fsck_gitmodules_oids(struct oidset *gitmodules_oids) +{ + struct oidset_iter iter; + const struct object_id *oid; + struct fsck_options fo = FSCK_OPTIONS_STRICT; + + if (!oidset_size(gitmodules_oids)) + return; + + oidset_iter_init(gitmodules_oids, &iter); + while ((oid = oidset_iter_next(&iter))) + register_found_gitmodules(oid); + if (fsck_finish(&fo)) + die("fsck failed"); +} + static struct ref *do_fetch_pack(struct fetch_pack_args *args, int fd[2], const struct ref *orig_ref, @@ -963,6 +1017,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, int agent_len; struct fetch_negotiator negotiator_alloc; struct fetch_negotiator *negotiator; + struct oidset gitmodules_oids = OIDSET_INIT; negotiator = &negotiator_alloc; fetch_negotiator_init(r, negotiator); @@ -1078,8 +1133,10 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, alternate_shallow_file = setup_temporary_shallow(si->shallow); else alternate_shallow_file = NULL; - if (get_pack(args, fd, pack_lockfiles, 1, sought, nr_sought)) + if (get_pack(args, fd, pack_lockfiles, NULL, sought, nr_sought, + &gitmodules_oids)) die(_("git fetch-pack: fetch failed.")); + fsck_gitmodules_oids(&gitmodules_oids); all_done: if (negotiator) @@ -1529,6 +1586,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, int seen_ack = 0; struct string_list packfile_uris = STRING_LIST_INIT_DUP; int i; + struct strvec index_pack_args = STRVEC_INIT; + struct oidset gitmodules_oids = OIDSET_INIT; negotiator = &negotiator_alloc; fetch_negotiator_init(r, negotiator); @@ -1618,7 +1677,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, receive_packfile_uris(&reader, &packfile_uris); process_section_header(&reader, "packfile", 0); if (get_pack(args, fd, pack_lockfiles, - !packfile_uris.nr, sought, nr_sought)) + packfile_uris.nr ? &index_pack_args : NULL, + sought, nr_sought, &gitmodules_oids)) die(_("git fetch-pack: fetch failed.")); do_check_stateless_delimiter(args, &reader); @@ -1630,6 +1690,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, } for (i = 0; i < packfile_uris.nr; i++) { + int j; struct child_process cmd = CHILD_PROCESS_INIT; char packname[GIT_MAX_HEXSZ + 1]; const char *uri = packfile_uris.items[i].string + @@ -1639,6 +1700,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, strvec_pushf(&cmd.args, "--packfile=%.*s", (int) the_hash_algo->hexsz, packfile_uris.items[i].string); + for (j = 0; j < index_pack_args.nr; j++) + strvec_pushf(&cmd.args, "--index-pack-arg=%s", + index_pack_args.v[j]); strvec_push(&cmd.args, uri); cmd.git_cmd = 1; cmd.no_stdin = 1; @@ -1657,6 +1721,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, packname[the_hash_algo->hexsz] = '\0'; + parse_gitmodules_oids(cmd.out, &gitmodules_oids); + close(cmd.out); if (finish_command(&cmd)) @@ -1674,6 +1740,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, packname)); } string_list_clear(&packfile_uris, 0); + strvec_clear(&index_pack_args); + + fsck_gitmodules_oids(&gitmodules_oids); if (negotiator) negotiator->release(negotiator); diff --git a/fsck.c b/fsck.c index 71134fdefa..e3030f3b35 100644 --- a/fsck.c +++ b/fsck.c @@ -1276,6 +1276,11 @@ int fsck_error_function(struct fsck_options *o, return 1; } +void register_found_gitmodules(const struct object_id *oid) +{ + oidset_insert(&gitmodules_found, oid); +} + int fsck_finish(struct fsck_options *options) { int ret = 0; diff --git a/fsck.h b/fsck.h index 423c467feb..733378f126 100644 --- a/fsck.h +++ b/fsck.h @@ -62,6 +62,8 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options); int fsck_object(struct object *obj, void *data, unsigned long size, struct fsck_options *options); +void register_found_gitmodules(const struct object_id *oid); + /* * fsck a tag, and pass info about it back to the caller. This is * exposed fsck_object() internals for git-mktag(1). diff --git a/http-fetch.c b/http-fetch.c index c4ccc5fea9..fa642462a9 100644 --- a/http-fetch.c +++ b/http-fetch.c @@ -3,6 +3,7 @@ #include "exec-cmd.h" #include "http.h" #include "walker.h" +#include "strvec.h" static const char http_fetch_usage[] = "git http-fetch " "[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile=hash | commit-id] url"; @@ -44,7 +45,8 @@ static int fetch_using_walker(const char *raw_url, int get_verbosely, } static void fetch_single_packfile(struct object_id *packfile_hash, - const char *url) { + const char *url, + const char **index_pack_args) { struct http_pack_request *preq; struct slot_results results; int ret; @@ -55,7 +57,8 @@ static void fetch_single_packfile(struct object_id *packfile_hash, if (preq == NULL) die("couldn't create http pack request"); preq->slot->results = &results; - preq->generate_keep = 1; + preq->index_pack_args = index_pack_args; + preq->preserve_index_pack_stdout = 1; if (start_active_slot(preq->slot)) { run_active_slot(preq->slot); @@ -86,6 +89,7 @@ int cmd_main(int argc, const char **argv) int packfile = 0; int nongit; struct object_id packfile_hash; + struct strvec index_pack_args = STRVEC_INIT; setup_git_directory_gently(&nongit); @@ -112,6 +116,8 @@ int cmd_main(int argc, const char **argv) packfile = 1; if (parse_oid_hex(p, &packfile_hash, &end) || *end) die(_("argument to --packfile must be a valid hash (got '%s')"), p); + } else if (skip_prefix(argv[arg], "--index-pack-arg=", &p)) { + strvec_push(&index_pack_args, p); } arg++; } @@ -124,10 +130,18 @@ int cmd_main(int argc, const char **argv) git_config(git_default_config, NULL); if (packfile) { - fetch_single_packfile(&packfile_hash, argv[arg]); + if (!index_pack_args.nr) + die(_("--packfile requires --index-pack-args")); + + fetch_single_packfile(&packfile_hash, argv[arg], + index_pack_args.v); + return 0; } + if (index_pack_args.nr) + die(_("--index-pack-args can only be used with --packfile")); + if (commits_on_stdin) { commits = walker_targets_stdin(&commit_id, &write_ref); } else { diff --git a/http.c b/http.c index 8b23a546af..f8ea28bb2e 100644 --- a/http.c +++ b/http.c @@ -2259,6 +2259,9 @@ void release_http_pack_request(struct http_pack_request *preq) free(preq); } +static const char *default_index_pack_args[] = + {"index-pack", "--stdin", NULL}; + int finish_http_pack_request(struct http_pack_request *preq) { struct child_process ip = CHILD_PROCESS_INIT; @@ -2270,17 +2273,15 @@ int finish_http_pack_request(struct http_pack_request *preq) tmpfile_fd = xopen(preq->tmpfile.buf, O_RDONLY); - strvec_push(&ip.args, "index-pack"); - strvec_push(&ip.args, "--stdin"); ip.git_cmd = 1; ip.in = tmpfile_fd; - if (preq->generate_keep) { - strvec_pushf(&ip.args, "--keep=git %"PRIuMAX, - (uintmax_t)getpid()); + ip.argv = preq->index_pack_args ? preq->index_pack_args + : default_index_pack_args; + + if (preq->preserve_index_pack_stdout) ip.out = 0; - } else { + else ip.no_stdout = 1; - } if (run_command(&ip)) { ret = -1; diff --git a/http.h b/http.h index 5de792ef3f..bf3d1270ad 100644 --- a/http.h +++ b/http.h @@ -218,12 +218,12 @@ struct http_pack_request { char *url; /* - * If this is true, finish_http_pack_request() will pass "--keep" to - * index-pack, resulting in the creation of a keep file, and will not - * suppress its stdout (that is, the "keep\t\n" line will be - * printed to stdout). + * index-pack command to run. Must be terminated by NULL. + * + * If NULL, defaults to {"index-pack", "--stdin", NULL}. */ - unsigned generate_keep : 1; + const char **index_pack_args; + unsigned preserve_index_pack_stdout : 1; FILE *packfile; struct strbuf tmpfile; diff --git a/pack-write.c b/pack-write.c index 680c36755d..2ca85a9d16 100644 --- a/pack-write.c +++ b/pack-write.c @@ -380,7 +380,7 @@ void fixup_pack_header_footer(int pack_fd, fsync_or_die(pack_fd, pack_name); } -char *index_pack_lockfile(int ip_out) +char *index_pack_lockfile(int ip_out, int *is_well_formed) { char packname[GIT_MAX_HEXSZ + 6]; const int len = the_hash_algo->hexsz + 6; @@ -394,11 +394,17 @@ char *index_pack_lockfile(int ip_out) */ if (read_in_full(ip_out, packname, len) == len && packname[len-1] == '\n') { const char *name; + + if (is_well_formed) + *is_well_formed = 1; packname[len-1] = 0; if (skip_prefix(packname, "keep\t", &name)) return xstrfmt("%s/pack/pack-%s.keep", get_object_directory(), name); + return NULL; } + if (is_well_formed) + *is_well_formed = 0; return NULL; } diff --git a/pack.h b/pack.h index afdcf8f5c7..857cbd5bd4 100644 --- a/pack.h +++ b/pack.h @@ -87,7 +87,7 @@ int verify_pack_index(struct packed_git *); int verify_pack(struct repository *, struct packed_git *, verify_fn fn, struct progress *, uint32_t); off_t write_pack_header(struct hashfile *f, uint32_t); void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t); -char *index_pack_lockfile(int fd); +char *index_pack_lockfile(int fd, int *is_well_formed); struct ref; diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh index 2ecb06bb63..6d9142afc3 100755 --- a/t/t5550-http-fetch-dumb.sh +++ b/t/t5550-http-fetch-dumb.sh @@ -227,7 +227,10 @@ test_expect_success 'http-fetch --packfile' ' git init packfileclient && p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) && - git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out && + git -C packfileclient http-fetch --packfile=$ARBITRARY \ + --index-pack-arg=index-pack --index-pack-arg=--stdin \ + --index-pack-arg=--keep \ + "$HTTPD_URL"/dumb/repo_pack.git/$p >out && grep "^keep.[0-9a-f]\{16,\}$" out && cut -c6- out >packhash && diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index 9113d209c5..994a76ca3c 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -875,11 +875,10 @@ test_expect_success 'part of packfile response provided as URI' ' test -f hfound && test -f h2found && - # Ensure that there are exactly 6 files (3 .pack and 3 .idx). - ls http_child/.git/objects/pack/*.pack >packlist && - ls http_child/.git/objects/pack/*.idx >idxlist && - test_line_count = 3 idxlist && - test_line_count = 3 packlist + # Ensure that there are exactly 3 packfiles with associated .idx + ls http_child/.git/objects/pack/*.pack \ + http_child/.git/objects/pack/*.idx >filelist && + test_line_count = 6 filelist ' test_expect_success 'fetching with valid packfile URI but invalid hash fails' ' @@ -931,11 +930,10 @@ test_expect_success 'packfile-uri with transfer.fsckobjects' ' -c fetch.uriprotocols=http,https \ clone "$HTTPD_URL/smart/http_parent" http_child && - # Ensure that there are exactly 4 files (2 .pack and 2 .idx). - ls http_child/.git/objects/pack/*.pack >packlist && - ls http_child/.git/objects/pack/*.idx >idxlist && - test_line_count = 2 idxlist && - test_line_count = 2 packlist + # Ensure that there are exactly 2 packfiles with associated .idx + ls http_child/.git/objects/pack/*.pack \ + http_child/.git/objects/pack/*.idx >filelist && + test_line_count = 4 filelist ' test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object' ' @@ -968,6 +966,54 @@ test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object' test_i18ngrep "invalid author/committer line - missing email" error ' +test_expect_success 'packfile-uri with transfer.fsckobjects succeeds when .gitmodules is separate from tree' ' + P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + rm -rf "$P" http_child && + + git init "$P" && + git -C "$P" config "uploadpack.allowsidebandall" "true" && + + echo "[submodule libfoo]" >"$P/.gitmodules" && + echo "path = include/foo" >>"$P/.gitmodules" && + echo "url = git://example.com/git/lib.git" >>"$P/.gitmodules" && + git -C "$P" add .gitmodules && + git -C "$P" commit -m x && + + configure_exclusion "$P" .gitmodules >h && + + sane_unset GIT_TEST_SIDEBAND_ALL && + git -c protocol.version=2 -c transfer.fsckobjects=1 \ + -c fetch.uriprotocols=http,https \ + clone "$HTTPD_URL/smart/http_parent" http_child && + + # Ensure that there are exactly 2 packfiles with associated .idx + ls http_child/.git/objects/pack/*.pack \ + http_child/.git/objects/pack/*.idx >filelist && + test_line_count = 4 filelist +' + +test_expect_success 'packfile-uri with transfer.fsckobjects fails when .gitmodules separate from tree is invalid' ' + P="$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && + rm -rf "$P" http_child err && + + git init "$P" && + git -C "$P" config "uploadpack.allowsidebandall" "true" && + + echo "[submodule \"..\"]" >"$P/.gitmodules" && + echo "path = include/foo" >>"$P/.gitmodules" && + echo "url = git://example.com/git/lib.git" >>"$P/.gitmodules" && + git -C "$P" add .gitmodules && + git -C "$P" commit -m x && + + configure_exclusion "$P" .gitmodules >h && + + sane_unset GIT_TEST_SIDEBAND_ALL && + test_must_fail git -c protocol.version=2 -c transfer.fsckobjects=1 \ + -c fetch.uriprotocols=http,https \ + clone "$HTTPD_URL/smart/http_parent" http_child 2>err && + test_i18ngrep "disallowed submodule name" err +' + # DO NOT add non-httpd-specific tests here, because the last part of this # test script is only executed when httpd is available and enabled.