http-fetch: support fetching packfiles by URL

Teach http-fetch the ability to download packfiles directly, given a
URL, and to verify them.

The http_pack_request suite has been augmented with a function that
takes a URL directly. With this function, the hash is only used to
determine the name of the temporary file.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jonathan Tan 2020-06-10 13:57:18 -07:00 committed by Junio C Hamano
parent 8e6adb69e1
commit 8d5d2a34df
5 changed files with 123 additions and 18 deletions

View File

@ -9,7 +9,7 @@ git-http-fetch - Download from a remote Git repository via HTTP
SYNOPSIS
--------
[verse]
'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin] <commit> <url>
'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin | --packfile=<hash> | <commit>] <url>
DESCRIPTION
-----------
@ -40,6 +40,13 @@ commit-id::
<commit-id>['\t'<filename-as-in--w>]
--packfile=<hash>::
Instead of a commit id on the command line (which is not expected in
this case), 'git http-fetch' fetches the packfile directly at the given
URL and uses index-pack to generate corresponding .idx and .keep files.
The hash is used to determine the name of the temporary file and is
arbitrary. The output of index-pack is printed to stdout.
--recover::
Verify that everything reachable from target is fetched. Used after
an earlier fetch is interrupted.

View File

@ -5,7 +5,7 @@
#include "walker.h"
static const char http_fetch_usage[] = "git http-fetch "
"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url";
"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile=hash | commit-id] url";
static int fetch_using_walker(const char *raw_url, int get_verbosely,
int get_recover, int commits, char **commit_id,
@ -43,6 +43,37 @@ static int fetch_using_walker(const char *raw_url, int get_verbosely,
return rc;
}
static void fetch_single_packfile(struct object_id *packfile_hash,
const char *url) {
struct http_pack_request *preq;
struct slot_results results;
int ret;
http_init(NULL, url, 0);
preq = new_direct_http_pack_request(packfile_hash->hash, xstrdup(url));
if (preq == NULL)
die("couldn't create http pack request");
preq->slot->results = &results;
preq->generate_keep = 1;
if (start_active_slot(preq->slot)) {
run_active_slot(preq->slot);
if (results.curl_result != CURLE_OK) {
die("Unable to get pack file %s\n%s", preq->url,
curl_errorstr);
}
} else {
die("Unable to start request");
}
if ((ret = finish_http_pack_request(preq)))
die("finish_http_pack_request gave result %d", ret);
release_http_pack_request(preq);
http_cleanup();
}
int cmd_main(int argc, const char **argv)
{
int commits_on_stdin = 0;
@ -52,8 +83,12 @@ int cmd_main(int argc, const char **argv)
int arg = 1;
int get_verbosely = 0;
int get_recover = 0;
int packfile = 0;
struct object_id packfile_hash;
while (arg < argc && argv[arg][0] == '-') {
const char *p;
if (argv[arg][1] == 't') {
} else if (argv[arg][1] == 'c') {
} else if (argv[arg][1] == 'a') {
@ -68,25 +103,33 @@ int cmd_main(int argc, const char **argv)
get_recover = 1;
} else if (!strcmp(argv[arg], "--stdin")) {
commits_on_stdin = 1;
} else if (skip_prefix(argv[arg], "--packfile=", &p)) {
const char *end;
packfile = 1;
if (parse_oid_hex(p, &packfile_hash, &end) || *end)
die(_("argument to --packfile must be a valid hash (got '%s')"), p);
}
arg++;
}
if (argc != arg + 2 - commits_on_stdin)
if (argc != arg + 2 - (commits_on_stdin || packfile))
usage(http_fetch_usage);
setup_git_directory();
git_config(git_default_config, NULL);
if (packfile) {
fetch_single_packfile(&packfile_hash, argv[arg]);
return 0;
}
if (commits_on_stdin) {
commits = walker_targets_stdin(&commit_id, &write_ref);
} else {
commit_id = (char **) &argv[arg++];
commits = 1;
}
setup_git_directory();
git_config(git_default_config, NULL);
if (!argv[arg])
BUG("must have one arg remaining");
return fetch_using_walker(argv[arg], get_verbosely, get_recover,
commits, commit_id, write_ref,
commits_on_stdin);

28
http.c
View File

@ -2281,7 +2281,13 @@ int finish_http_pack_request(struct http_pack_request *preq)
argv_array_push(&ip.args, "--stdin");
ip.git_cmd = 1;
ip.in = tmpfile_fd;
ip.no_stdout = 1;
if (preq->generate_keep) {
argv_array_pushf(&ip.args, "--keep=git %"PRIuMAX,
(uintmax_t)getpid());
ip.out = 0;
} else {
ip.no_stdout = 1;
}
if (run_command(&ip)) {
ret = -1;
@ -2307,19 +2313,27 @@ void http_install_packfile(struct packed_git *p,
}
struct http_pack_request *new_http_pack_request(
const unsigned char *packed_git_hash, const char *base_url)
const unsigned char *packed_git_hash, const char *base_url) {
struct strbuf buf = STRBUF_INIT;
end_url_with_slash(&buf, base_url);
strbuf_addf(&buf, "objects/pack/pack-%s.pack",
hash_to_hex(packed_git_hash));
return new_direct_http_pack_request(packed_git_hash,
strbuf_detach(&buf, NULL));
}
struct http_pack_request *new_direct_http_pack_request(
const unsigned char *packed_git_hash, char *url)
{
off_t prev_posn = 0;
struct strbuf buf = STRBUF_INIT;
struct http_pack_request *preq;
preq = xcalloc(1, sizeof(*preq));
strbuf_init(&preq->tmpfile, 0);
end_url_with_slash(&buf, base_url);
strbuf_addf(&buf, "objects/pack/pack-%s.pack",
hash_to_hex(packed_git_hash));
preq->url = strbuf_detach(&buf, NULL);
preq->url = url;
strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(packed_git_hash));
preq->packfile = fopen(preq->tmpfile.buf, "a");

11
http.h
View File

@ -216,6 +216,15 @@ int http_get_info_packs(const char *base_url,
struct http_pack_request {
char *url;
/*
* If this is true, finish_http_pack_request() will pass "--keep" to
* index-pack, resulting in the creation of a keep file, and will not
* suppress its stdout (that is, the "keep\t<hash>\n" line will be
* printed to stdout).
*/
unsigned generate_keep : 1;
FILE *packfile;
struct strbuf tmpfile;
struct active_request_slot *slot;
@ -223,6 +232,8 @@ struct http_pack_request {
struct http_pack_request *new_http_pack_request(
const unsigned char *packed_git_hash, const char *base_url);
struct http_pack_request *new_direct_http_pack_request(
const unsigned char *packed_git_hash, char *url);
int finish_http_pack_request(struct http_pack_request *preq);
void release_http_pack_request(struct http_pack_request *preq);

View File

@ -199,6 +199,28 @@ test_expect_success 'fetch packed objects' '
git clone $HTTPD_URL/dumb/repo_pack.git
'
test_expect_success 'http-fetch --packfile' '
# Arbitrary hash. Use rev-parse so that we get one of the correct
# length.
ARBITRARY=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) &&
git init packfileclient &&
p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) &&
git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
grep "^keep.[0-9a-f]\{16,\}$" out &&
cut -c6- out >packhash &&
# Ensure that the expected files are generated
test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).pack" &&
test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).idx" &&
test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).keep" &&
# Ensure that it has the HEAD of repo_pack, at least
HASH=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) &&
git -C packfileclient cat-file -e "$HASH"
'
test_expect_success 'fetch notices corrupt pack' '
cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
@ -214,6 +236,14 @@ test_expect_success 'fetch notices corrupt pack' '
)
'
test_expect_success 'http-fetch --packfile with corrupt pack' '
rm -rf packfileclient &&
git init packfileclient &&
p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && ls objects/pack/pack-*.pack) &&
test_must_fail git -C packfileclient http-fetch --packfile \
"$HTTPD_URL"/dumb/repo_bad1.git/$p
'
test_expect_success 'fetch notices corrupt idx' '
cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git &&
(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git &&