From 23b6d00ba7fd4b3446319395cfe3791710e81d92 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <derrickstolee@github.com>
Date: Wed, 12 Oct 2022 12:52:28 +0000
Subject: [PATCH 01/12] bundle-uri: use plain string in find_temp_filename()

The find_temp_filename() method was created in 53a50892be2 (bundle-uri:
create basic file-copy logic, 2022-08-09) and uses odb_mkstemp() to
create a temporary filename. The odb_mkstemp() method uses a strbuf in
its interface, but we do not need to continue carrying a strbuf
throughout the bundle URI code.

Convert the find_temp_filename() method to use a 'char *' and modify its
only caller. This makes sense that we don't actually need to modify this
filename directly later, so using a strbuf is overkill.

This change will simplify the data structure for tracking a bundle list
to use plain strings instead of strbufs.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 bundle-uri.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/bundle-uri.c b/bundle-uri.c
index 4a8cc74ed0..8b2f4e08c9 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -5,22 +5,23 @@
 #include "refs.h"
 #include "run-command.h"
 
-static int find_temp_filename(struct strbuf *name)
+static char *find_temp_filename(void)
 {
 	int fd;
+	struct strbuf name = STRBUF_INIT;
 	/*
 	 * Find a temporary filename that is available. This is briefly
 	 * racy, but unlikely to collide.
 	 */
-	fd = odb_mkstemp(name, "bundles/tmp_uri_XXXXXX");
+	fd = odb_mkstemp(&name, "bundles/tmp_uri_XXXXXX");
 	if (fd < 0) {
 		warning(_("failed to create temporary file"));
-		return -1;
+		return NULL;
 	}
 
 	close(fd);
-	unlink(name->buf);
-	return 0;
+	unlink(name.buf);
+	return strbuf_detach(&name, NULL);
 }
 
 static int download_https_uri_to_file(const char *file, const char *uri)
@@ -141,28 +142,31 @@ static int unbundle_from_file(struct repository *r, const char *file)
 int fetch_bundle_uri(struct repository *r, const char *uri)
 {
 	int result = 0;
-	struct strbuf filename = STRBUF_INIT;
+	char *filename;
 
-	if ((result = find_temp_filename(&filename)))
+	if (!(filename = find_temp_filename())) {
+		result = -1;
 		goto cleanup;
+	}
 
-	if ((result = copy_uri_to_file(filename.buf, uri))) {
+	if ((result = copy_uri_to_file(filename, uri))) {
 		warning(_("failed to download bundle from URI '%s'"), uri);
 		goto cleanup;
 	}
 
-	if ((result = !is_bundle(filename.buf, 0))) {
+	if ((result = !is_bundle(filename, 0))) {
 		warning(_("file at URI '%s' is not a bundle"), uri);
 		goto cleanup;
 	}
 
-	if ((result = unbundle_from_file(r, filename.buf))) {
+	if ((result = unbundle_from_file(r, filename))) {
 		warning(_("failed to unbundle bundle from URI '%s'"), uri);
 		goto cleanup;
 	}
 
 cleanup:
-	unlink(filename.buf);
-	strbuf_release(&filename);
+	if (filename)
+		unlink(filename);
+	free(filename);
 	return result;
 }

From 0634f717a3e3c57ee5d965882366df1ada1ad11b Mon Sep 17 00:00:00 2001
From: Derrick Stolee <derrickstolee@github.com>
Date: Wed, 12 Oct 2022 12:52:29 +0000
Subject: [PATCH 02/12] bundle-uri: create bundle_list struct and helpers

It will likely be rare where a user uses a single bundle URI and expects
that URI to point to a bundle. Instead, that URI will likely be a list
of bundles provided in some format. Alternatively, the Git server could
advertise a list of bundles.

In anticipation of these two ways of advertising multiple bundles,
create a data structure that represents such a list. This will be
populated using a common API, but for now focus on what data can be
represented.

Each list contains a number of remote_bundle_info structs. These contain
an 'id' that is used to uniquely identify them in the list, and also a
'uri' that contains the location of its data. Finally, there is a strbuf
containing the filename used when Git downloads the contents to disk.

The list itself stores these remote_bundle_info structs in a hashtable
using 'id' as the key. The order of the structs in the input is
considered unimportant, but future modifications to the format and these
data structures will place ordering possibilities on the set. The list
also has a few "global" properties, including the version (used when
parsing the list) and the mode. The mode is one of these two options:

1. BUNDLE_MODE_ALL: all listed URIs are intended to be combined
   together. The client should download all of the advertised data to
   have a complete copy of the data.

2. BUNDLE_MODE_ANY: any one listed item is sufficient to have a complete
   copy of the data. The client can choose arbitrarily from these
   options. In the future, the client may use pings to find the closest
   URI among geodistributed replicas, or use some other heuristic
   information added to the format.

This API is currently unused, but will soon be expanded with parsing
logic and then be consumed by the bundle URI download logic.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 bundle-uri.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 bundle-uri.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+)

diff --git a/bundle-uri.c b/bundle-uri.c
index 8b2f4e08c9..f9a8db221b 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -4,6 +4,66 @@
 #include "object-store.h"
 #include "refs.h"
 #include "run-command.h"
+#include "hashmap.h"
+#include "pkt-line.h"
+
+static int compare_bundles(const void *hashmap_cmp_fn_data,
+			   const struct hashmap_entry *he1,
+			   const struct hashmap_entry *he2,
+			   const void *id)
+{
+	const struct remote_bundle_info *e1 =
+		container_of(he1, const struct remote_bundle_info, ent);
+	const struct remote_bundle_info *e2 =
+		container_of(he2, const struct remote_bundle_info, ent);
+
+	return strcmp(e1->id, id ? (const char *)id : e2->id);
+}
+
+void init_bundle_list(struct bundle_list *list)
+{
+	memset(list, 0, sizeof(*list));
+
+	/* Implied defaults. */
+	list->mode = BUNDLE_MODE_ALL;
+	list->version = 1;
+
+	hashmap_init(&list->bundles, compare_bundles, NULL, 0);
+}
+
+static int clear_remote_bundle_info(struct remote_bundle_info *bundle,
+				    void *data)
+{
+	FREE_AND_NULL(bundle->id);
+	FREE_AND_NULL(bundle->uri);
+	return 0;
+}
+
+void clear_bundle_list(struct bundle_list *list)
+{
+	if (!list)
+		return;
+
+	for_all_bundles_in_list(list, clear_remote_bundle_info, NULL);
+	hashmap_clear_and_free(&list->bundles, struct remote_bundle_info, ent);
+}
+
+int for_all_bundles_in_list(struct bundle_list *list,
+			    bundle_iterator iter,
+			    void *data)
+{
+	struct remote_bundle_info *info;
+	struct hashmap_iter i;
+
+	hashmap_for_each_entry(&list->bundles, &i, info, ent) {
+		int result = iter(info, data);
+
+		if (result)
+			return result;
+	}
+
+	return 0;
+}
 
 static char *find_temp_filename(void)
 {
diff --git a/bundle-uri.h b/bundle-uri.h
index 8a152f1ef1..ff7e3fd3fb 100644
--- a/bundle-uri.h
+++ b/bundle-uri.h
@@ -1,7 +1,63 @@
 #ifndef BUNDLE_URI_H
 #define BUNDLE_URI_H
 
+#include "hashmap.h"
+#include "strbuf.h"
+
 struct repository;
+struct string_list;
+
+/**
+ * The remote_bundle_info struct contains information for a single bundle
+ * URI. This may be initialized simply by a given URI or might have
+ * additional metadata associated with it if the bundle was advertised by
+ * a bundle list.
+ */
+struct remote_bundle_info {
+	struct hashmap_entry ent;
+
+	/**
+	 * The 'id' is a name given to the bundle for reference
+	 * by other bundle infos.
+	 */
+	char *id;
+
+	/**
+	 * The 'uri' is the location of the remote bundle so
+	 * it can be downloaded on-demand. This will be NULL
+	 * if there was no table of contents.
+	 */
+	char *uri;
+};
+
+#define REMOTE_BUNDLE_INFO_INIT { 0 }
+
+enum bundle_list_mode {
+	BUNDLE_MODE_NONE = 0,
+	BUNDLE_MODE_ALL,
+	BUNDLE_MODE_ANY
+};
+
+/**
+ * A bundle_list contains an unordered set of remote_bundle_info structs,
+ * as well as information about the bundle listing, such as version and
+ * mode.
+ */
+struct bundle_list {
+	int version;
+	enum bundle_list_mode mode;
+	struct hashmap bundles;
+};
+
+void init_bundle_list(struct bundle_list *list);
+void clear_bundle_list(struct bundle_list *list);
+
+typedef int (*bundle_iterator)(struct remote_bundle_info *bundle,
+			       void *data);
+
+int for_all_bundles_in_list(struct bundle_list *list,
+			    bundle_iterator iter,
+			    void *data);
 
 /**
  * Fetch data from the given 'uri' and unbundle the bundle data found

From bff03c47f7342c2a08fac6c0af7229b1579fea15 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <derrickstolee@github.com>
Date: Wed, 12 Oct 2022 12:52:30 +0000
Subject: [PATCH 03/12] bundle-uri: create base key-value pair parsing

There will be two primary ways to advertise a bundle list: as a list of
packet lines in Git's protocol v2 and as a config file served from a
bundle URI. Both of these fundamentally use a list of key-value pairs.
We will use the same set of key-value pairs across these formats.

Create a new bundle_list_update() method that is currently unusued, but
will be used in the next change. It inspects each key to see if it is
understood and then applies it to the given bundle_list. Here are the
keys that we teach Git to understand:

* bundle.version: This value should be an integer. Git currently
  understands only version 1 and will ignore the list if the version is
  any other value. This version can be increased in the future if we
  need to add new keys that Git should not ignore. We can add new
  "heuristic" keys without incrementing the version.

* bundle.mode: This value should be one of "all" or "any". If this
  mode is not understood, then Git will ignore the list. This mode
  indicates whether Git needs all of the bundle list items to make a
  complete view of the content or if any single item is sufficient.

The rest of the keys use a bundle identifier "<id>" as part of the key
name. Keys using the same "<id>" describe a single bundle list item.

* bundle.<id>.uri: This stores the URI of the bundle item. This
  currently is expected to be an absolute URI, but will be relaxed to be
  a relative URI in the future.

While parsing, return an error if a URI key is repeated, since we can
make that restriction with bundle lists.

Make the git_parse_int() method global so we can parse the integer
version value carefully.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/config.txt        |  2 +
 Documentation/config/bundle.txt | 24 +++++++++++
 bundle-uri.c                    | 76 +++++++++++++++++++++++++++++++++
 config.c                        |  2 +-
 config.h                        |  1 +
 5 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/config/bundle.txt

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 5b5b976569..4f9002efd6 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -387,6 +387,8 @@ include::config/branch.txt[]
 
 include::config/browser.txt[]
 
+include::config/bundle.txt[]
+
 include::config/checkout.txt[]
 
 include::config/clean.txt[]
diff --git a/Documentation/config/bundle.txt b/Documentation/config/bundle.txt
new file mode 100644
index 0000000000..daa21eb674
--- /dev/null
+++ b/Documentation/config/bundle.txt
@@ -0,0 +1,24 @@
+bundle.*::
+	The `bundle.*` keys may appear in a bundle list file found via the
+	`git clone --bundle-uri` option. These keys currently have no effect
+	if placed in a repository config file, though this will change in the
+	future. See link:technical/bundle-uri.html[the bundle URI design
+	document] for more details.
+
+bundle.version::
+	This integer value advertises the version of the bundle list format
+	used by the bundle list. Currently, the only accepted value is `1`.
+
+bundle.mode::
+	This string value should be either `all` or `any`. This value describes
+	whether all of the advertised bundles are required to unbundle a
+	complete understanding of the bundled information (`all`) or if any one
+	of the listed bundle URIs is sufficient (`any`).
+
+bundle.<id>.*::
+	The `bundle.<id>.*` keys are used to describe a single item in the
+	bundle list, grouped under `<id>` for identification purposes.
+
+bundle.<id>.uri::
+	This string value defines the URI by which Git can reach the contents
+	of this `<id>`. This URI may be a bundle file or another bundle list.
diff --git a/bundle-uri.c b/bundle-uri.c
index f9a8db221b..0bc59dd9c3 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -6,6 +6,7 @@
 #include "run-command.h"
 #include "hashmap.h"
 #include "pkt-line.h"
+#include "config.h"
 
 static int compare_bundles(const void *hashmap_cmp_fn_data,
 			   const struct hashmap_entry *he1,
@@ -65,6 +66,81 @@ int for_all_bundles_in_list(struct bundle_list *list,
 	return 0;
 }
 
+/**
+ * Given a key-value pair, update the state of the given bundle list.
+ * Returns 0 if the key-value pair is understood. Returns -1 if the key
+ * is not understood or the value is malformed.
+ */
+MAYBE_UNUSED
+static int bundle_list_update(const char *key, const char *value,
+			      struct bundle_list *list)
+{
+	struct strbuf id = STRBUF_INIT;
+	struct remote_bundle_info lookup = REMOTE_BUNDLE_INFO_INIT;
+	struct remote_bundle_info *bundle;
+	const char *subsection, *subkey;
+	size_t subsection_len;
+
+	if (parse_config_key(key, "bundle", &subsection, &subsection_len, &subkey))
+		return -1;
+
+	if (!subsection_len) {
+		if (!strcmp(subkey, "version")) {
+			int version;
+			if (!git_parse_int(value, &version))
+				return -1;
+			if (version != 1)
+				return -1;
+
+			list->version = version;
+			return 0;
+		}
+
+		if (!strcmp(subkey, "mode")) {
+			if (!strcmp(value, "all"))
+				list->mode = BUNDLE_MODE_ALL;
+			else if (!strcmp(value, "any"))
+				list->mode = BUNDLE_MODE_ANY;
+			else
+				return -1;
+			return 0;
+		}
+
+		/* Ignore other unknown global keys. */
+		return 0;
+	}
+
+	strbuf_add(&id, subsection, subsection_len);
+
+	/*
+	 * Check for an existing bundle with this <id>, or create one
+	 * if necessary.
+	 */
+	lookup.id = id.buf;
+	hashmap_entry_init(&lookup.ent, strhash(lookup.id));
+	if (!(bundle = hashmap_get_entry(&list->bundles, &lookup, ent, NULL))) {
+		CALLOC_ARRAY(bundle, 1);
+		bundle->id = strbuf_detach(&id, NULL);
+		hashmap_entry_init(&bundle->ent, strhash(bundle->id));
+		hashmap_add(&list->bundles, &bundle->ent);
+	}
+	strbuf_release(&id);
+
+	if (!strcmp(subkey, "uri")) {
+		if (bundle->uri)
+			return -1;
+		bundle->uri = xstrdup(value);
+		return 0;
+	}
+
+	/*
+	 * At this point, we ignore any information that we don't
+	 * understand, assuming it to be hints for a heuristic the client
+	 * does not currently understand.
+	 */
+	return 0;
+}
+
 static char *find_temp_filename(void)
 {
 	int fd;
diff --git a/config.c b/config.c
index e8ebef77d5..1cb35bea2f 100644
--- a/config.c
+++ b/config.c
@@ -1214,7 +1214,7 @@ static int git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max)
 	return 0;
 }
 
-static int git_parse_int(const char *value, int *ret)
+int git_parse_int(const char *value, int *ret)
 {
 	intmax_t tmp;
 	if (!git_parse_signed(value, &tmp, maximum_signed_value_of_type(int)))
diff --git a/config.h b/config.h
index ca994d7714..ef9eade641 100644
--- a/config.h
+++ b/config.h
@@ -206,6 +206,7 @@ int config_with_options(config_fn_t fn, void *,
 
 int git_parse_ssize_t(const char *, ssize_t *);
 int git_parse_ulong(const char *, unsigned long *);
+int git_parse_int(const char *value, int *ret);
 
 /**
  * Same as `git_config_bool`, except that it returns -1 on error rather

From 9424e373fd2136aa7f5cec23c8cafc272996ecd6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Wed, 12 Oct 2022 12:52:31 +0000
Subject: [PATCH 04/12] bundle-uri: create "key=value" line parsing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When advertising a bundle list over Git's protocol v2, we will use
packet lines. Each line will be of the form "key=value" representing a
bundle list. Connect the API necessary for Git's transport to the
key-value pair parsing created in the previous change.

We are not currently implementing this protocol v2 functionality, but
instead preparing to expose this parsing to be unit-testable.

Co-authored-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 bundle-uri.c | 27 ++++++++++++++++++++++++++-
 bundle-uri.h | 12 ++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/bundle-uri.c b/bundle-uri.c
index 0bc59dd9c3..372e6fac5c 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -71,7 +71,6 @@ int for_all_bundles_in_list(struct bundle_list *list,
  * Returns 0 if the key-value pair is understood. Returns -1 if the key
  * is not understood or the value is malformed.
  */
-MAYBE_UNUSED
 static int bundle_list_update(const char *key, const char *value,
 			      struct bundle_list *list)
 {
@@ -306,3 +305,29 @@ cleanup:
 	free(filename);
 	return result;
 }
+
+/**
+ * General API for {transport,connect}.c etc.
+ */
+int bundle_uri_parse_line(struct bundle_list *list, const char *line)
+{
+	int result;
+	const char *equals;
+	struct strbuf key = STRBUF_INIT;
+
+	if (!strlen(line))
+		return error(_("bundle-uri: got an empty line"));
+
+	equals = strchr(line, '=');
+
+	if (!equals)
+		return error(_("bundle-uri: line is not of the form 'key=value'"));
+	if (line == equals || !*(equals + 1))
+		return error(_("bundle-uri: line has empty key or value"));
+
+	strbuf_add(&key, line, equals - line);
+	result = bundle_list_update(key.buf, equals + 1, list);
+	strbuf_release(&key);
+
+	return result;
+}
diff --git a/bundle-uri.h b/bundle-uri.h
index ff7e3fd3fb..9058346192 100644
--- a/bundle-uri.h
+++ b/bundle-uri.h
@@ -67,4 +67,16 @@ int for_all_bundles_in_list(struct bundle_list *list,
  */
 int fetch_bundle_uri(struct repository *r, const char *uri);
 
+/**
+ * General API for {transport,connect}.c etc.
+ */
+
+/**
+ * Parse a "key=value" packet line from the bundle-uri verb.
+ *
+ * Returns 0 on success and non-zero on error.
+ */
+int bundle_uri_parse_line(struct bundle_list *list,
+			  const char *line);
+
 #endif

From d796cedbe8ca77310a7dabcafe60b040aa0e2b67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?=
 <avarab@gmail.com>
Date: Wed, 12 Oct 2022 12:52:32 +0000
Subject: [PATCH 05/12] bundle-uri: unit test "key=value" parsing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Create a new 'test-tool bundle-uri' test helper. This helper will assist
in testing logic deep in the bundle URI feature.

This change introduces the 'parse-key-values' subcommand, which parses
an input file as a list of lines. These are fed into
bundle_uri_parse_line() to test how we construct a 'struct bundle_list'
from that data. The list is then output to stdout as if the key-value
pairs were a Git config file.

We use an input file instead of stdin because of a future change to
parse in config-file format that works better as an input file.

Co-authored-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile                    |   1 +
 bundle-uri.c                |  33 ++++++++++
 bundle-uri.h                |   3 +
 t/helper/test-bundle-uri.c  |  70 +++++++++++++++++++++
 t/helper/test-tool.c        |   1 +
 t/helper/test-tool.h        |   1 +
 t/t5750-bundle-uri-parse.sh | 121 ++++++++++++++++++++++++++++++++++++
 t/test-lib-functions.sh     |  11 ++++
 8 files changed, 241 insertions(+)
 create mode 100644 t/helper/test-bundle-uri.c
 create mode 100755 t/t5750-bundle-uri-parse.sh

diff --git a/Makefile b/Makefile
index 7320bee9c9..4a19d9cb72 100644
--- a/Makefile
+++ b/Makefile
@@ -722,6 +722,7 @@ PROGRAMS += $(patsubst %.o,git-%$X,$(PROGRAM_OBJS))
 TEST_BUILTINS_OBJS += test-advise.o
 TEST_BUILTINS_OBJS += test-bitmap.o
 TEST_BUILTINS_OBJS += test-bloom.o
+TEST_BUILTINS_OBJS += test-bundle-uri.o
 TEST_BUILTINS_OBJS += test-chmtime.o
 TEST_BUILTINS_OBJS += test-config.o
 TEST_BUILTINS_OBJS += test-crontab.o
diff --git a/bundle-uri.c b/bundle-uri.c
index 372e6fac5c..c02e7f62eb 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -66,6 +66,39 @@ int for_all_bundles_in_list(struct bundle_list *list,
 	return 0;
 }
 
+static int summarize_bundle(struct remote_bundle_info *info, void *data)
+{
+	FILE *fp = data;
+	fprintf(fp, "[bundle \"%s\"]\n", info->id);
+	fprintf(fp, "\turi = %s\n", info->uri);
+	return 0;
+}
+
+void print_bundle_list(FILE *fp, struct bundle_list *list)
+{
+	const char *mode;
+
+	switch (list->mode) {
+	case BUNDLE_MODE_ALL:
+		mode = "all";
+		break;
+
+	case BUNDLE_MODE_ANY:
+		mode = "any";
+		break;
+
+	case BUNDLE_MODE_NONE:
+	default:
+		mode = "<unknown>";
+	}
+
+	fprintf(fp, "[bundle]\n");
+	fprintf(fp, "\tversion = %d\n", list->version);
+	fprintf(fp, "\tmode = %s\n", mode);
+
+	for_all_bundles_in_list(list, summarize_bundle, fp);
+}
+
 /**
  * Given a key-value pair, update the state of the given bundle list.
  * Returns 0 if the key-value pair is understood. Returns -1 if the key
diff --git a/bundle-uri.h b/bundle-uri.h
index 9058346192..0e56ab2ae5 100644
--- a/bundle-uri.h
+++ b/bundle-uri.h
@@ -59,6 +59,9 @@ int for_all_bundles_in_list(struct bundle_list *list,
 			    bundle_iterator iter,
 			    void *data);
 
+struct FILE;
+void print_bundle_list(FILE *fp, struct bundle_list *list);
+
 /**
  * Fetch data from the given 'uri' and unbundle the bundle data found
  * based on that information.
diff --git a/t/helper/test-bundle-uri.c b/t/helper/test-bundle-uri.c
new file mode 100644
index 0000000000..0329c56544
--- /dev/null
+++ b/t/helper/test-bundle-uri.c
@@ -0,0 +1,70 @@
+#include "test-tool.h"
+#include "parse-options.h"
+#include "bundle-uri.h"
+#include "strbuf.h"
+#include "string-list.h"
+
+static int cmd__bundle_uri_parse(int argc, const char **argv)
+{
+	const char *key_value_usage[] = {
+		"test-tool bundle-uri parse-key-values <input>",
+		NULL
+	};
+	const char **usage = key_value_usage;
+	struct option options[] = {
+		OPT_END(),
+	};
+	struct strbuf sb = STRBUF_INIT;
+	struct bundle_list list;
+	int err = 0;
+	FILE *fp;
+
+	argc = parse_options(argc, argv, NULL, options, usage, 0);
+	if (argc != 1)
+		goto usage;
+
+	init_bundle_list(&list);
+	fp = fopen(argv[0], "r");
+	if (!fp)
+		die("failed to open '%s'", argv[0]);
+
+	while (strbuf_getline(&sb, fp) != EOF) {
+		if (bundle_uri_parse_line(&list, sb.buf))
+			err = error("bad line: '%s'", sb.buf);
+	}
+	strbuf_release(&sb);
+	fclose(fp);
+
+	print_bundle_list(stdout, &list);
+
+	clear_bundle_list(&list);
+
+	return !!err;
+
+usage:
+	usage_with_options(usage, options);
+}
+
+int cmd__bundle_uri(int argc, const char **argv)
+{
+	const char *usage[] = {
+		"test-tool bundle-uri <subcommand> [<options>]",
+		NULL
+	};
+	struct option options[] = {
+		OPT_END(),
+	};
+
+	argc = parse_options(argc, argv, NULL, options, usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION |
+			     PARSE_OPT_KEEP_ARGV0);
+	if (argc == 1)
+		goto usage;
+
+	if (!strcmp(argv[1], "parse-key-values"))
+		return cmd__bundle_uri_parse(argc - 1, argv + 1);
+	error("there is no test-tool bundle-uri tool '%s'", argv[1]);
+
+usage:
+	usage_with_options(usage, options);
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index 318fdbab0c..fbe2d9d810 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -17,6 +17,7 @@ static struct test_cmd cmds[] = {
 	{ "advise", cmd__advise_if_enabled },
 	{ "bitmap", cmd__bitmap },
 	{ "bloom", cmd__bloom },
+	{ "bundle-uri", cmd__bundle_uri },
 	{ "chmtime", cmd__chmtime },
 	{ "config", cmd__config },
 	{ "crontab", cmd__crontab },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index bb79927163..b2aa1f39a8 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -7,6 +7,7 @@
 int cmd__advise_if_enabled(int argc, const char **argv);
 int cmd__bitmap(int argc, const char **argv);
 int cmd__bloom(int argc, const char **argv);
+int cmd__bundle_uri(int argc, const char **argv);
 int cmd__chmtime(int argc, const char **argv);
 int cmd__config(int argc, const char **argv);
 int cmd__crontab(int argc, const char **argv);
diff --git a/t/t5750-bundle-uri-parse.sh b/t/t5750-bundle-uri-parse.sh
new file mode 100755
index 0000000000..fd142a66ad
--- /dev/null
+++ b/t/t5750-bundle-uri-parse.sh
@@ -0,0 +1,121 @@
+#!/bin/sh
+
+test_description="Test bundle-uri bundle_uri_parse_line()"
+
+TEST_NO_CREATE_REPO=1
+TEST_PASSES_SANITIZE_LEAK=true
+. ./test-lib.sh
+
+test_expect_success 'bundle_uri_parse_line() just URIs' '
+	cat >in <<-\EOF &&
+	bundle.one.uri=http://example.com/bundle.bdl
+	bundle.two.uri=https://example.com/bundle.bdl
+	bundle.three.uri=file:///usr/share/git/bundle.bdl
+	EOF
+
+	cat >expect <<-\EOF &&
+	[bundle]
+		version = 1
+		mode = all
+	[bundle "one"]
+		uri = http://example.com/bundle.bdl
+	[bundle "two"]
+		uri = https://example.com/bundle.bdl
+	[bundle "three"]
+		uri = file:///usr/share/git/bundle.bdl
+	EOF
+
+	test-tool bundle-uri parse-key-values in >actual 2>err &&
+	test_must_be_empty err &&
+	test_cmp_config_output expect actual
+'
+
+test_expect_success 'bundle_uri_parse_line() parsing edge cases: empty key or value' '
+	cat >in <<-\EOF &&
+	=bogus-value
+	bogus-key=
+	EOF
+
+	cat >err.expect <<-EOF &&
+	error: bundle-uri: line has empty key or value
+	error: bad line: '\''=bogus-value'\''
+	error: bundle-uri: line has empty key or value
+	error: bad line: '\''bogus-key='\''
+	EOF
+
+	cat >expect <<-\EOF &&
+	[bundle]
+		version = 1
+		mode = all
+	EOF
+
+	test_must_fail test-tool bundle-uri parse-key-values in >actual 2>err &&
+	test_cmp err.expect err &&
+	test_cmp_config_output expect actual
+'
+
+test_expect_success 'bundle_uri_parse_line() parsing edge cases: empty lines' '
+	cat >in <<-\EOF &&
+	bundle.one.uri=http://example.com/bundle.bdl
+
+	bundle.two.uri=https://example.com/bundle.bdl
+
+	bundle.three.uri=file:///usr/share/git/bundle.bdl
+	EOF
+
+	cat >err.expect <<-\EOF &&
+	error: bundle-uri: got an empty line
+	error: bad line: '\'''\''
+	error: bundle-uri: got an empty line
+	error: bad line: '\'''\''
+	EOF
+
+	# We fail, but try to continue parsing regardless
+	cat >expect <<-\EOF &&
+	[bundle]
+		version = 1
+		mode = all
+	[bundle "one"]
+		uri = http://example.com/bundle.bdl
+	[bundle "two"]
+		uri = https://example.com/bundle.bdl
+	[bundle "three"]
+		uri = file:///usr/share/git/bundle.bdl
+	EOF
+
+	test_must_fail test-tool bundle-uri parse-key-values in >actual 2>err &&
+	test_cmp err.expect err &&
+	test_cmp_config_output expect actual
+'
+
+test_expect_success 'bundle_uri_parse_line() parsing edge cases: duplicate lines' '
+	cat >in <<-\EOF &&
+	bundle.one.uri=http://example.com/bundle.bdl
+	bundle.two.uri=https://example.com/bundle.bdl
+	bundle.one.uri=https://example.com/bundle-2.bdl
+	bundle.three.uri=file:///usr/share/git/bundle.bdl
+	EOF
+
+	cat >err.expect <<-\EOF &&
+	error: bad line: '\''bundle.one.uri=https://example.com/bundle-2.bdl'\''
+	EOF
+
+	# We fail, but try to continue parsing regardless
+	cat >expect <<-\EOF &&
+	[bundle]
+		version = 1
+		mode = all
+	[bundle "one"]
+		uri = http://example.com/bundle.bdl
+	[bundle "two"]
+		uri = https://example.com/bundle.bdl
+	[bundle "three"]
+		uri = file:///usr/share/git/bundle.bdl
+	EOF
+
+	test_must_fail test-tool bundle-uri parse-key-values in >actual 2>err &&
+	test_cmp err.expect err &&
+	test_cmp_config_output expect actual
+'
+
+test_done
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index c6479f24eb..5d2f20983b 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1868,3 +1868,14 @@ test_is_magic_mtime () {
 	rm -f .git/test-mtime-actual
 	return $ret
 }
+
+# Given two filenames, parse both using 'git config --list --file'
+# and compare the sorted output of those commands. Useful when
+# wanting to ignore whitespace differences and sorting concerns.
+test_cmp_config_output () {
+	git config --list --file="$1" >config-expect &&
+	git config --list --file="$2" >config-actual &&
+	sort config-expect >sorted-expect &&
+	sort config-actual >sorted-actual &&
+	test_cmp sorted-expect sorted-actual
+}

From 738e5245fa423fc43495e2e17e053365dc6b2fc0 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <derrickstolee@github.com>
Date: Wed, 12 Oct 2022 12:52:33 +0000
Subject: [PATCH 06/12] bundle-uri: parse bundle list in config format

When a bundle provider wants to operate independently from a Git remote,
they want to provide a single, consistent URI that users can use in
their 'git clone --bundle-uri' commands. At this point, the Git client
expects that URI to be a single bundle that can be unbundled and used to
bootstrap the rest of the clone from the Git server. This single bundle
cannot be re-used to assist with future incremental fetches.

To allow for the incremental fetch case, teach Git to understand a
bundle list that could be advertised at an independent bundle URI. Such
a bundle list is likely to be inspected by human readers, even if only
by the bundle provider creating the list. For this reason, we can take
our expected "key=value" pairs and instead format them using Git config
format.

Create bundle_uri_parse_config_format() to parse a file in config format
and convert that into a 'struct bundle_list' filled with its
understanding of the contents.

Be careful to use error_action CONFIG_ERROR_ERROR when calling
git_config_from_file_with_options() because the default action for
git_config_from_file() is to die() on a parsing error.  The current
warning isn't particularly helpful if it arises to a user, but it will
be made more verbose at a higher layer later.

Update 'test-tool bundle-uri' to take this config file format as input.
It uses a filename instead of stdin because there is no existing way to
parse a FILE pointer in the config machinery. Using
git_config_from_mem() is overly complicated and more likely to introduce
bugs than this simpler version.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 bundle-uri.c                | 27 ++++++++++++++++++++
 bundle-uri.h                |  9 +++++++
 t/helper/test-bundle-uri.c  | 49 +++++++++++++++++++++++++++---------
 t/t5750-bundle-uri-parse.sh | 50 +++++++++++++++++++++++++++++++++++++
 4 files changed, 123 insertions(+), 12 deletions(-)

diff --git a/bundle-uri.c b/bundle-uri.c
index c02e7f62eb..3d44ec2b1e 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -173,6 +173,33 @@ static int bundle_list_update(const char *key, const char *value,
 	return 0;
 }
 
+static int config_to_bundle_list(const char *key, const char *value, void *data)
+{
+	struct bundle_list *list = data;
+	return bundle_list_update(key, value, list);
+}
+
+int bundle_uri_parse_config_format(const char *uri,
+				   const char *filename,
+				   struct bundle_list *list)
+{
+	int result;
+	struct config_options opts = {
+		.error_action = CONFIG_ERROR_ERROR,
+	};
+
+	result = git_config_from_file_with_options(config_to_bundle_list,
+						   filename, list,
+						   &opts);
+
+	if (!result && list->mode == BUNDLE_MODE_NONE) {
+		warning(_("bundle list at '%s' has no mode"), uri);
+		result = 1;
+	}
+
+	return result;
+}
+
 static char *find_temp_filename(void)
 {
 	int fd;
diff --git a/bundle-uri.h b/bundle-uri.h
index 0e56ab2ae5..bc13d4c992 100644
--- a/bundle-uri.h
+++ b/bundle-uri.h
@@ -62,6 +62,15 @@ int for_all_bundles_in_list(struct bundle_list *list,
 struct FILE;
 void print_bundle_list(FILE *fp, struct bundle_list *list);
 
+/**
+ * A bundle URI may point to a bundle list where the key=value
+ * pairs are provided in config file format. This method is
+ * exposed publicly for testing purposes.
+ */
+int bundle_uri_parse_config_format(const char *uri,
+				   const char *filename,
+				   struct bundle_list *list);
+
 /**
  * Fetch data from the given 'uri' and unbundle the bundle data found
  * based on that information.
diff --git a/t/helper/test-bundle-uri.c b/t/helper/test-bundle-uri.c
index 0329c56544..25afd39342 100644
--- a/t/helper/test-bundle-uri.c
+++ b/t/helper/test-bundle-uri.c
@@ -4,12 +4,21 @@
 #include "strbuf.h"
 #include "string-list.h"
 
-static int cmd__bundle_uri_parse(int argc, const char **argv)
+enum input_mode {
+	KEY_VALUE_PAIRS,
+	CONFIG_FILE,
+};
+
+static int cmd__bundle_uri_parse(int argc, const char **argv, enum input_mode mode)
 {
 	const char *key_value_usage[] = {
 		"test-tool bundle-uri parse-key-values <input>",
 		NULL
 	};
+	const char *config_usage[] = {
+		"test-tool bundle-uri parse-config <input>",
+		NULL
+	};
 	const char **usage = key_value_usage;
 	struct option options[] = {
 		OPT_END(),
@@ -19,21 +28,35 @@ static int cmd__bundle_uri_parse(int argc, const char **argv)
 	int err = 0;
 	FILE *fp;
 
-	argc = parse_options(argc, argv, NULL, options, usage, 0);
-	if (argc != 1)
-		goto usage;
+	if (mode == CONFIG_FILE)
+		usage = config_usage;
+
+	argc = parse_options(argc, argv, NULL, options, usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
 
 	init_bundle_list(&list);
-	fp = fopen(argv[0], "r");
-	if (!fp)
-		die("failed to open '%s'", argv[0]);
 
-	while (strbuf_getline(&sb, fp) != EOF) {
-		if (bundle_uri_parse_line(&list, sb.buf))
-			err = error("bad line: '%s'", sb.buf);
+	switch (mode) {
+	case KEY_VALUE_PAIRS:
+		if (argc != 1)
+			goto usage;
+		fp = fopen(argv[0], "r");
+		if (!fp)
+			die("failed to open '%s'", argv[0]);
+		while (strbuf_getline(&sb, fp) != EOF) {
+			if (bundle_uri_parse_line(&list, sb.buf))
+				err = error("bad line: '%s'", sb.buf);
+		}
+		fclose(fp);
+		break;
+
+	case CONFIG_FILE:
+		if (argc != 1)
+			goto usage;
+		err = bundle_uri_parse_config_format("<uri>", argv[0], &list);
+		break;
 	}
 	strbuf_release(&sb);
-	fclose(fp);
 
 	print_bundle_list(stdout, &list);
 
@@ -62,7 +85,9 @@ int cmd__bundle_uri(int argc, const char **argv)
 		goto usage;
 
 	if (!strcmp(argv[1], "parse-key-values"))
-		return cmd__bundle_uri_parse(argc - 1, argv + 1);
+		return cmd__bundle_uri_parse(argc - 1, argv + 1, KEY_VALUE_PAIRS);
+	if (!strcmp(argv[1], "parse-config"))
+		return cmd__bundle_uri_parse(argc - 1, argv + 1, CONFIG_FILE);
 	error("there is no test-tool bundle-uri tool '%s'", argv[1]);
 
 usage:
diff --git a/t/t5750-bundle-uri-parse.sh b/t/t5750-bundle-uri-parse.sh
index fd142a66ad..c2fe3f9c5a 100755
--- a/t/t5750-bundle-uri-parse.sh
+++ b/t/t5750-bundle-uri-parse.sh
@@ -118,4 +118,54 @@ test_expect_success 'bundle_uri_parse_line() parsing edge cases: duplicate lines
 	test_cmp_config_output expect actual
 '
 
+test_expect_success 'parse config format: just URIs' '
+	cat >expect <<-\EOF &&
+	[bundle]
+		version = 1
+		mode = all
+	[bundle "one"]
+		uri = http://example.com/bundle.bdl
+	[bundle "two"]
+		uri = https://example.com/bundle.bdl
+	[bundle "three"]
+		uri = file:///usr/share/git/bundle.bdl
+	EOF
+
+	test-tool bundle-uri parse-config expect >actual 2>err &&
+	test_must_be_empty err &&
+	test_cmp_config_output expect actual
+'
+
+test_expect_success 'parse config format edge cases: empty key or value' '
+	cat >in1 <<-\EOF &&
+	= bogus-value
+	EOF
+
+	cat >err1 <<-EOF &&
+	error: bad config line 1 in file in1
+	EOF
+
+	cat >expect <<-\EOF &&
+	[bundle]
+		version = 1
+		mode = all
+	EOF
+
+	test_must_fail test-tool bundle-uri parse-config in1 >actual 2>err &&
+	test_cmp err1 err &&
+	test_cmp_config_output expect actual &&
+
+	cat >in2 <<-\EOF &&
+	bogus-key =
+	EOF
+
+	cat >err2 <<-EOF &&
+	error: bad config line 1 in file in2
+	EOF
+
+	test_must_fail test-tool bundle-uri parse-config in2 >actual 2>err &&
+	test_cmp err2 err &&
+	test_cmp_config_output expect actual
+'
+
 test_done

From 20c1e2a68bfcb85dd919c92a82c129cee215c23a Mon Sep 17 00:00:00 2001
From: Derrick Stolee <derrickstolee@github.com>
Date: Wed, 12 Oct 2022 12:52:34 +0000
Subject: [PATCH 07/12] bundle-uri: limit recursion depth for bundle lists

The next change will start allowing us to parse bundle lists that are
downloaded from a provided bundle URI. Those lists might point to other
lists, which could proceed to an arbitrary depth (and even create
cycles). Restructure fetch_bundle_uri() to have an internal version that
has a recursion depth. Compare that to a new max_bundle_uri_depth
constant that is twice as high as we expect this depth to be for any
legitimate use of bundle list linking.

We can consider making max_bundle_uri_depth a configurable value if
there is demonstrated value in the future.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 bundle-uri.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/bundle-uri.c b/bundle-uri.c
index 3d44ec2b1e..8a7c11c639 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -334,11 +334,25 @@ static int unbundle_from_file(struct repository *r, const char *file)
 	return result;
 }
 
-int fetch_bundle_uri(struct repository *r, const char *uri)
+/**
+ * This limits the recursion on fetch_bundle_uri_internal() when following
+ * bundle lists.
+ */
+static int max_bundle_uri_depth = 4;
+
+static int fetch_bundle_uri_internal(struct repository *r,
+				     const char *uri,
+				     int depth)
 {
 	int result = 0;
 	char *filename;
 
+	if (depth >= max_bundle_uri_depth) {
+		warning(_("exceeded bundle URI recursion limit (%d)"),
+			max_bundle_uri_depth);
+		return -1;
+	}
+
 	if (!(filename = find_temp_filename())) {
 		result = -1;
 		goto cleanup;
@@ -366,6 +380,11 @@ cleanup:
 	return result;
 }
 
+int fetch_bundle_uri(struct repository *r, const char *uri)
+{
+	return fetch_bundle_uri_internal(r, uri, 0);
+}
+
 /**
  * General API for {transport,connect}.c etc.
  */

From c96060b0cef79c9d76eb97965e700beb9651f35b Mon Sep 17 00:00:00 2001
From: Derrick Stolee <derrickstolee@github.com>
Date: Wed, 12 Oct 2022 12:52:35 +0000
Subject: [PATCH 08/12] bundle: properly clear all revision flags

The verify_bundle() method checks two things for a bundle's
prerequisites:

 1. Are these objects in the object store?
 2. Are these objects reachable from our references?

In this second question, multiple uses of verify_bundle() in the same
process can report an invalid bundle even though it is correct. The
reason is due to not clearing all of the commit marks on the commits
previously walked.

The revision walk machinery was first introduced in-process by
fb9a54150d3 (git-bundle: avoid fork() in verify_bundle(), 2007-02-22).
This implementation used "-1" as the set of flags to clear. The next
meaningful change came in 2b064697a5b (revision traversal: retire
BOUNDARY_SHOW, 2007-03-05), which introduced the PREREQ_MARK flag
instead of a flag normally controlled by the revision-walk machinery.

In 86a0a408b90 (commit: factor out
clear_commit_marks_for_object_array, 2011-10-01), the loop over the
array of commits was replaced with a new
clear_commit_marks_for_object_array(), but simultaneously the "-1" value
was replaced with "ALL_REV_FLAGS", which stopped un-setting the
PREREQ_MARK flag. This means that if multiple commits were marked by the
PREREQ_MARK in a previous run of verify_bundle(), then this loop could
terminate early due to 'i' going to zero:

	while (i && (commit = get_revision(&revs)))
		if (commit->object.flags & PREREQ_MARK)
			i--;

The flag clearing work was changed again in 63647391e6c (bundle: avoid
using the rev_info flag leak_pending, 2017-12-25), but that was only
cosmetic and did not change the behavior.

It may seem that it would be sufficient to add the PREREQ_MARK flag to
the clear_commit_marks() call in its current location. However, we
actually need to do it in the "cleanup:" step, since the first loop
checking "Are these objects in the object store?" might add the
PREREQ_MARK flag to some objects and then terminate without performing a
walk due to one missing object. By clearing the flags in all cases, we
avoid this issue when running verify_bundle() multiple times in the same
process.

Moving this loop to the cleanup step alone would cause a segfault when
running 'git bundle verify' outside of a repository, but this is because
of that error condition using "goto cleanup" when returning is perfectly
safe. Nothing has been initialized at that point, so we can return
immediately without causing any leaks.

This behavior is verified carefully by a test that will be added soon
when Git learns to download bundle lists in a 'git clone --bundle-uri'
command.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 bundle.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/bundle.c b/bundle.c
index 0208e6d90d..c277f3b936 100644
--- a/bundle.c
+++ b/bundle.c
@@ -202,10 +202,8 @@ int verify_bundle(struct repository *r,
 	int i, ret = 0, req_nr;
 	const char *message = _("Repository lacks these prerequisite commits:");
 
-	if (!r || !r->objects || !r->objects->odb) {
-		ret = error(_("need a repository to verify a bundle"));
-		goto cleanup;
-	}
+	if (!r || !r->objects || !r->objects->odb)
+		return error(_("need a repository to verify a bundle"));
 
 	repo_init_revisions(r, &revs, NULL);
 	for (i = 0; i < p->nr; i++) {
@@ -250,15 +248,6 @@ int verify_bundle(struct repository *r,
 		error("%s %s", oid_to_hex(oid), name);
 	}
 
-	/* Clean up objects used, as they will be reused. */
-	for (i = 0; i < p->nr; i++) {
-		struct string_list_item *e = p->items + i;
-		struct object_id *oid = e->util;
-		commit = lookup_commit_reference_gently(r, oid, 1);
-		if (commit)
-			clear_commit_marks(commit, ALL_REV_FLAGS);
-	}
-
 	if (verbose) {
 		struct string_list *r;
 
@@ -287,6 +276,14 @@ int verify_bundle(struct repository *r,
 				  list_objects_filter_spec(&header->filter));
 	}
 cleanup:
+	/* Clean up objects used, as they will be reused. */
+	for (i = 0; i < p->nr; i++) {
+		struct string_list_item *e = p->items + i;
+		struct object_id *oid = e->util;
+		commit = lookup_commit_reference_gently(r, oid, 1);
+		if (commit)
+			clear_commit_marks(commit, ALL_REV_FLAGS | PREREQ_MARK);
+	}
 	release_revisions(&revs);
 	return ret;
 }

From c23f592117bac30765ca22545386c3e9304da803 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <derrickstolee@github.com>
Date: Wed, 12 Oct 2022 12:52:36 +0000
Subject: [PATCH 09/12] bundle-uri: fetch a list of bundles

When the content at a given bundle URI is not understood as a bundle
(based on inspecting the initial content), then Git currently gives up
and ignores that content. Independent bundle providers may want to split
up the bundle content into multiple bundles, but still make them
available from a single URI.

Teach Git to attempt parsing the bundle URI content as a Git config file
providing the key=value pairs for a bundle list. Git then looks at the
mode of the list to see if ANY single bundle is sufficient or if ALL
bundles are required. The content at the selected URIs are downloaded
and the content is inspected again, creating a recursive process.

To guard the recursion against malformed or malicious content, limit the
recursion depth to a reasonable four for now. This can be converted to a
configured value in the future if necessary. The value of four is twice
as high as expected to be useful (a bundle list is unlikely to point to
more bundle lists).

To test this scenario, create an interesting bundle topology where three
incremental bundles are built on top of a single full bundle. By using a
merge commit, the two middle bundles are "independent" in that they do
not require each other in order to unbundle themselves. They each only
need the base bundle. The bundle containing the merge commit requires
both of the middle bundles, though. This leads to some interesting
decisions when unbundling, especially when we later implement heuristics
that promote downloading bundles until the prerequisite commits are
satisfied.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 bundle-uri.c                | 203 ++++++++++++++++++++++++++---
 bundle-uri.h                |  13 ++
 t/t5558-clone-bundle-uri.sh | 248 ++++++++++++++++++++++++++++++++++++
 3 files changed, 448 insertions(+), 16 deletions(-)

diff --git a/bundle-uri.c b/bundle-uri.c
index 8a7c11c639..70bfd2defe 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -37,6 +37,8 @@ static int clear_remote_bundle_info(struct remote_bundle_info *bundle,
 {
 	FREE_AND_NULL(bundle->id);
 	FREE_AND_NULL(bundle->uri);
+	FREE_AND_NULL(bundle->file);
+	bundle->unbundled = 0;
 	return 0;
 }
 
@@ -334,18 +336,117 @@ static int unbundle_from_file(struct repository *r, const char *file)
 	return result;
 }
 
+struct bundle_list_context {
+	struct repository *r;
+	struct bundle_list *list;
+	enum bundle_list_mode mode;
+	int count;
+	int depth;
+};
+
+/*
+ * This early definition is necessary because we use indirect recursion:
+ *
+ * While iterating through a bundle list that was downloaded as part
+ * of fetch_bundle_uri_internal(), iterator methods eventually call it
+ * again, but with depth + 1.
+ */
+static int fetch_bundle_uri_internal(struct repository *r,
+				     struct remote_bundle_info *bundle,
+				     int depth,
+				     struct bundle_list *list);
+
+static int download_bundle_to_file(struct remote_bundle_info *bundle, void *data)
+{
+	int res;
+	struct bundle_list_context *ctx = data;
+
+	if (ctx->mode == BUNDLE_MODE_ANY && ctx->count)
+		return 0;
+
+	res = fetch_bundle_uri_internal(ctx->r, bundle, ctx->depth + 1, ctx->list);
+
+	/*
+	 * Only increment count if the download succeeded. If our mode is
+	 * BUNDLE_MODE_ANY, then we will want to try other URIs in the
+	 * list in case they work instead.
+	 */
+	if (!res)
+		ctx->count++;
+
+	/*
+	 * To be opportunistic as possible, we continue iterating and
+	 * download as many bundles as we can, so we can apply the ones
+	 * that work, even in BUNDLE_MODE_ALL mode.
+	 */
+	return 0;
+}
+
+static int download_bundle_list(struct repository *r,
+				struct bundle_list *local_list,
+				struct bundle_list *global_list,
+				int depth)
+{
+	struct bundle_list_context ctx = {
+		.r = r,
+		.list = global_list,
+		.depth = depth + 1,
+		.mode = local_list->mode,
+	};
+
+	return for_all_bundles_in_list(local_list, download_bundle_to_file, &ctx);
+}
+
+static int fetch_bundle_list_in_config_format(struct repository *r,
+					      struct bundle_list *global_list,
+					      struct remote_bundle_info *bundle,
+					      int depth)
+{
+	int result;
+	struct bundle_list list_from_bundle;
+
+	init_bundle_list(&list_from_bundle);
+
+	if ((result = bundle_uri_parse_config_format(bundle->uri,
+						     bundle->file,
+						     &list_from_bundle)))
+		goto cleanup;
+
+	if (list_from_bundle.mode == BUNDLE_MODE_NONE) {
+		warning(_("unrecognized bundle mode from URI '%s'"),
+			bundle->uri);
+		result = -1;
+		goto cleanup;
+	}
+
+	if ((result = download_bundle_list(r, &list_from_bundle,
+					   global_list, depth)))
+		goto cleanup;
+
+cleanup:
+	clear_bundle_list(&list_from_bundle);
+	return result;
+}
+
 /**
  * This limits the recursion on fetch_bundle_uri_internal() when following
  * bundle lists.
  */
 static int max_bundle_uri_depth = 4;
 
+/**
+ * Recursively download all bundles advertised at the given URI
+ * to files. If the file is a bundle, then add it to the given
+ * 'list'. Otherwise, expect a bundle list and recurse on the
+ * URIs in that list according to the list mode (ANY or ALL).
+ */
 static int fetch_bundle_uri_internal(struct repository *r,
-				     const char *uri,
-				     int depth)
+				     struct remote_bundle_info *bundle,
+				     int depth,
+				     struct bundle_list *list)
 {
 	int result = 0;
-	char *filename;
+	struct remote_bundle_info *bcopy;
 
 	if (depth >= max_bundle_uri_depth) {
 		warning(_("exceeded bundle URI recursion limit (%d)"),
@@ -353,36 +454,106 @@ static int fetch_bundle_uri_internal(struct repository *r,
 		return -1;
 	}
 
-	if (!(filename = find_temp_filename())) {
+	if (!bundle->file &&
+	    !(bundle->file = find_temp_filename())) {
 		result = -1;
 		goto cleanup;
 	}
 
-	if ((result = copy_uri_to_file(filename, uri))) {
-		warning(_("failed to download bundle from URI '%s'"), uri);
+	if ((result = copy_uri_to_file(bundle->file, bundle->uri))) {
+		warning(_("failed to download bundle from URI '%s'"), bundle->uri);
 		goto cleanup;
 	}
 
-	if ((result = !is_bundle(filename, 0))) {
-		warning(_("file at URI '%s' is not a bundle"), uri);
+	if ((result = !is_bundle(bundle->file, 1))) {
+		result = fetch_bundle_list_in_config_format(
+				r, list, bundle, depth);
+		if (result)
+			warning(_("file at URI '%s' is not a bundle or bundle list"),
+				bundle->uri);
 		goto cleanup;
 	}
 
-	if ((result = unbundle_from_file(r, filename))) {
-		warning(_("failed to unbundle bundle from URI '%s'"), uri);
-		goto cleanup;
-	}
+	/* Copy the bundle and insert it into the global list. */
+	CALLOC_ARRAY(bcopy, 1);
+	bcopy->id = xstrdup(bundle->id);
+	bcopy->file = xstrdup(bundle->file);
+	hashmap_entry_init(&bcopy->ent, strhash(bcopy->id));
+	hashmap_add(&list->bundles, &bcopy->ent);
 
 cleanup:
-	if (filename)
-		unlink(filename);
-	free(filename);
+	if (result && bundle->file)
+		unlink(bundle->file);
 	return result;
 }
 
+/**
+ * This loop iterator breaks the loop with nonzero return code on the
+ * first successful unbundling of a bundle.
+ */
+static int attempt_unbundle(struct remote_bundle_info *info, void *data)
+{
+	struct repository *r = data;
+
+	if (!info->file || info->unbundled)
+		return 0;
+
+	if (!unbundle_from_file(r, info->file)) {
+		info->unbundled = 1;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int unbundle_all_bundles(struct repository *r,
+				struct bundle_list *list)
+{
+	/*
+	 * Iterate through all bundles looking for ones that can
+	 * successfully unbundle. If any succeed, then perhaps another
+	 * will succeed in the next attempt.
+	 *
+	 * Keep in mind that a non-zero result for the loop here means
+	 * the loop terminated early on a successful unbundling, which
+	 * signals that we can try again.
+	 */
+	while (for_all_bundles_in_list(list, attempt_unbundle, r)) ;
+
+	return 0;
+}
+
+static int unlink_bundle(struct remote_bundle_info *info, void *data)
+{
+	if (info->file)
+		unlink_or_warn(info->file);
+	return 0;
+}
+
 int fetch_bundle_uri(struct repository *r, const char *uri)
 {
-	return fetch_bundle_uri_internal(r, uri, 0);
+	int result;
+	struct bundle_list list;
+	struct remote_bundle_info bundle = {
+		.uri = xstrdup(uri),
+		.id = xstrdup(""),
+	};
+
+	init_bundle_list(&list);
+
+	/* If a bundle is added to this global list, then it is required. */
+	list.mode = BUNDLE_MODE_ALL;
+
+	if ((result = fetch_bundle_uri_internal(r, &bundle, 0, &list)))
+		goto cleanup;
+
+	result = unbundle_all_bundles(r, &list);
+
+cleanup:
+	for_all_bundles_in_list(&list, unlink_bundle, NULL);
+	clear_bundle_list(&list);
+	clear_remote_bundle_info(&bundle, NULL);
+	return result;
 }
 
 /**
diff --git a/bundle-uri.h b/bundle-uri.h
index bc13d4c992..4dbc269823 100644
--- a/bundle-uri.h
+++ b/bundle-uri.h
@@ -28,6 +28,19 @@ struct remote_bundle_info {
 	 * if there was no table of contents.
 	 */
 	char *uri;
+
+	/**
+	 * If the bundle has been downloaded, then 'file' is a
+	 * filename storing its contents. Otherwise, 'file' is
+	 * NULL.
+	 */
+	char *file;
+
+	/**
+	 * If the bundle has been unbundled successfully, then
+	 * this boolean is true.
+	 */
+	unsigned unbundled:1;
 };
 
 #define REMOTE_BUNDLE_INFO_INIT { 0 }
diff --git a/t/t5558-clone-bundle-uri.sh b/t/t5558-clone-bundle-uri.sh
index ad666a2d28..a86dc04f52 100755
--- a/t/t5558-clone-bundle-uri.sh
+++ b/t/t5558-clone-bundle-uri.sh
@@ -41,6 +41,195 @@ test_expect_success 'clone with file:// bundle' '
 	test_cmp expect actual
 '
 
+# To get interesting tests for bundle lists, we need to construct a
+# somewhat-interesting commit history.
+#
+# ---------------- bundle-4
+#
+#       4
+#      / \
+# ----|---|------- bundle-3
+#     |   |
+#     |   3
+#     |   |
+# ----|---|------- bundle-2
+#     |   |
+#     2   |
+#     |   |
+# ----|---|------- bundle-1
+#      \ /
+#       1
+#       |
+# (previous commits)
+test_expect_success 'construct incremental bundle list' '
+	(
+		cd clone-from &&
+		git checkout -b base &&
+		test_commit 1 &&
+		git checkout -b left &&
+		test_commit 2 &&
+		git checkout -b right base &&
+		test_commit 3 &&
+		git checkout -b merge left &&
+		git merge right -m "4" &&
+
+		git bundle create bundle-1.bundle base &&
+		git bundle create bundle-2.bundle base..left &&
+		git bundle create bundle-3.bundle base..right &&
+		git bundle create bundle-4.bundle merge --not left right
+	)
+'
+
+test_expect_success 'clone bundle list (file, no heuristic)' '
+	cat >bundle-list <<-EOF &&
+	[bundle]
+		version = 1
+		mode = all
+
+	[bundle "bundle-1"]
+		uri = file://$(pwd)/clone-from/bundle-1.bundle
+
+	[bundle "bundle-2"]
+		uri = file://$(pwd)/clone-from/bundle-2.bundle
+
+	[bundle "bundle-3"]
+		uri = file://$(pwd)/clone-from/bundle-3.bundle
+
+	[bundle "bundle-4"]
+		uri = file://$(pwd)/clone-from/bundle-4.bundle
+	EOF
+
+	git clone --bundle-uri="file://$(pwd)/bundle-list" clone-from clone-list-file &&
+	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
+	git -C clone-list-file cat-file --batch-check <oids &&
+
+	git -C clone-list-file for-each-ref --format="%(refname)" >refs &&
+	grep "refs/bundles/" refs >actual &&
+	cat >expect <<-\EOF &&
+	refs/bundles/base
+	refs/bundles/left
+	refs/bundles/merge
+	refs/bundles/right
+	EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'clone bundle list (file, all mode, some failures)' '
+	cat >bundle-list <<-EOF &&
+	[bundle]
+		version = 1
+		mode = all
+
+	# Does not exist. Should be skipped.
+	[bundle "bundle-0"]
+		uri = file://$(pwd)/clone-from/bundle-0.bundle
+
+	[bundle "bundle-1"]
+		uri = file://$(pwd)/clone-from/bundle-1.bundle
+
+	[bundle "bundle-2"]
+		uri = file://$(pwd)/clone-from/bundle-2.bundle
+
+	# No bundle-3 means bundle-4 will not apply.
+
+	[bundle "bundle-4"]
+		uri = file://$(pwd)/clone-from/bundle-4.bundle
+
+	# Does not exist. Should be skipped.
+	[bundle "bundle-5"]
+		uri = file://$(pwd)/clone-from/bundle-5.bundle
+	EOF
+
+	GIT_TRACE2_PERF=1 \
+	git clone --bundle-uri="file://$(pwd)/bundle-list" clone-from clone-all-some &&
+	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
+	git -C clone-all-some cat-file --batch-check <oids &&
+
+	git -C clone-all-some for-each-ref --format="%(refname)" >refs &&
+	grep "refs/bundles/" refs >actual &&
+	cat >expect <<-\EOF &&
+	refs/bundles/base
+	refs/bundles/left
+	EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'clone bundle list (file, all mode, all failures)' '
+	cat >bundle-list <<-EOF &&
+	[bundle]
+		version = 1
+		mode = all
+
+	# Does not exist. Should be skipped.
+	[bundle "bundle-0"]
+		uri = file://$(pwd)/clone-from/bundle-0.bundle
+
+	# Does not exist. Should be skipped.
+	[bundle "bundle-5"]
+		uri = file://$(pwd)/clone-from/bundle-5.bundle
+	EOF
+
+	git clone --bundle-uri="file://$(pwd)/bundle-list" clone-from clone-all-fail &&
+	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
+	git -C clone-all-fail cat-file --batch-check <oids &&
+
+	git -C clone-all-fail for-each-ref --format="%(refname)" >refs &&
+	! grep "refs/bundles/" refs
+'
+
+test_expect_success 'clone bundle list (file, any mode)' '
+	cat >bundle-list <<-EOF &&
+	[bundle]
+		version = 1
+		mode = any
+
+	# Does not exist. Should be skipped.
+	[bundle "bundle-0"]
+		uri = file://$(pwd)/clone-from/bundle-0.bundle
+
+	[bundle "bundle-1"]
+		uri = file://$(pwd)/clone-from/bundle-1.bundle
+
+	# Does not exist. Should be skipped.
+	[bundle "bundle-5"]
+		uri = file://$(pwd)/clone-from/bundle-5.bundle
+	EOF
+
+	git clone --bundle-uri="file://$(pwd)/bundle-list" clone-from clone-any-file &&
+	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
+	git -C clone-any-file cat-file --batch-check <oids &&
+
+	git -C clone-any-file for-each-ref --format="%(refname)" >refs &&
+	grep "refs/bundles/" refs >actual &&
+	cat >expect <<-\EOF &&
+	refs/bundles/base
+	EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'clone bundle list (file, any mode, all failures)' '
+	cat >bundle-list <<-EOF &&
+	[bundle]
+		version = 1
+		mode = any
+
+	# Does not exist. Should be skipped.
+	[bundle "bundle-0"]
+		uri = $HTTPD_URL/bundle-0.bundle
+
+	# Does not exist. Should be skipped.
+	[bundle "bundle-5"]
+		uri = $HTTPD_URL/bundle-5.bundle
+	EOF
+
+	git clone --bundle-uri="file://$(pwd)/bundle-list" clone-from clone-any-fail &&
+	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
+	git -C clone-any-fail cat-file --batch-check <oids &&
+
+	git -C clone-any-fail for-each-ref --format="%(refname)" >refs &&
+	! grep "refs/bundles/" refs
+'
+
 #########################################################################
 # HTTP tests begin here
 
@@ -75,6 +264,65 @@ test_expect_success 'clone HTTP bundle' '
 	test_config -C clone-http log.excludedecoration refs/bundle/
 '
 
+test_expect_success 'clone bundle list (HTTP, no heuristic)' '
+	cp clone-from/bundle-*.bundle "$HTTPD_DOCUMENT_ROOT_PATH/" &&
+	cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
+	[bundle]
+		version = 1
+		mode = all
+
+	[bundle "bundle-1"]
+		uri = $HTTPD_URL/bundle-1.bundle
+
+	[bundle "bundle-2"]
+		uri = $HTTPD_URL/bundle-2.bundle
+
+	[bundle "bundle-3"]
+		uri = $HTTPD_URL/bundle-3.bundle
+
+	[bundle "bundle-4"]
+		uri = $HTTPD_URL/bundle-4.bundle
+	EOF
+
+	git clone --bundle-uri="$HTTPD_URL/bundle-list" clone-from clone-list-http &&
+	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
+	git -C clone-list-http cat-file --batch-check <oids
+'
+
+test_expect_success 'clone bundle list (HTTP, any mode)' '
+	cp clone-from/bundle-*.bundle "$HTTPD_DOCUMENT_ROOT_PATH/" &&
+	cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
+	[bundle]
+		version = 1
+		mode = any
+
+	# Does not exist. Should be skipped.
+	[bundle "bundle-0"]
+		uri = $HTTPD_URL/bundle-0.bundle
+
+	[bundle "bundle-1"]
+		uri = $HTTPD_URL/bundle-1.bundle
+
+	# Does not exist. Should be skipped.
+	[bundle "bundle-5"]
+		uri = $HTTPD_URL/bundle-5.bundle
+	EOF
+
+	git clone --bundle-uri="$HTTPD_URL/bundle-list" clone-from clone-any-http &&
+	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
+	git -C clone-any-http cat-file --batch-check <oids &&
+
+	git -C clone-list-file for-each-ref --format="%(refname)" >refs &&
+	grep "refs/bundles/" refs >actual &&
+	cat >expect <<-\EOF &&
+	refs/bundles/base
+	refs/bundles/left
+	refs/bundles/merge
+	refs/bundles/right
+	EOF
+	test_cmp expect actual
+'
+
 # Do not add tests here unless they use the HTTP server, as they will
 # not run unless the HTTP dependencies exist.
 

From 89bd7fedf947484da08e2722d663fdac23a431be Mon Sep 17 00:00:00 2001
From: Derrick Stolee <derrickstolee@github.com>
Date: Wed, 12 Oct 2022 12:52:37 +0000
Subject: [PATCH 10/12] bundle: add flags to verify_bundle()

The verify_bundle() method has a 'verbose' option, but we will want to
extend this method to have more granular control over its output. First,
replace this 'verbose' option with a new 'flags' option with a single
possible value: VERIFY_BUNDLE_VERBOSE.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/bundle.c |  5 +++--
 bundle-uri.c     |  7 ++++++-
 bundle.c         |  9 +++++----
 bundle.h         | 14 ++++++++++++--
 transport.c      |  2 +-
 5 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/builtin/bundle.c b/builtin/bundle.c
index 2adad545a2..7d983a238f 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -119,7 +119,8 @@ static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 		goto cleanup;
 	}
 	close(bundle_fd);
-	if (verify_bundle(the_repository, &header, !quiet)) {
+	if (verify_bundle(the_repository, &header,
+			  quiet ? 0 : VERIFY_BUNDLE_VERBOSE)) {
 		ret = 1;
 		goto cleanup;
 	}
@@ -185,7 +186,7 @@ static int cmd_bundle_unbundle(int argc, const char **argv, const char *prefix)
 		strvec_pushl(&extra_index_pack_args, "-v", "--progress-title",
 			     _("Unbundling objects"), NULL);
 	ret = !!unbundle(the_repository, &header, bundle_fd,
-			 &extra_index_pack_args) ||
+			 &extra_index_pack_args, 0) ||
 		list_bundle_refs(&header, argc, argv);
 	bundle_header_release(&header);
 cleanup:
diff --git a/bundle-uri.c b/bundle-uri.c
index 70bfd2defe..d9060be707 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -303,7 +303,12 @@ static int unbundle_from_file(struct repository *r, const char *file)
 	if ((bundle_fd = read_bundle_header(file, &header)) < 0)
 		return 1;
 
-	if ((result = unbundle(r, &header, bundle_fd, NULL)))
+	/*
+	 * Skip the reachability walk here, since we will be adding
+	 * a reachable ref pointing to the new tips, which will reach
+	 * the prerequisite commits.
+	 */
+	if ((result = unbundle(r, &header, bundle_fd, NULL, 0)))
 		return 1;
 
 	/*
diff --git a/bundle.c b/bundle.c
index c277f3b936..1f6a7f782e 100644
--- a/bundle.c
+++ b/bundle.c
@@ -189,7 +189,7 @@ static int list_refs(struct string_list *r, int argc, const char **argv)
 
 int verify_bundle(struct repository *r,
 		  struct bundle_header *header,
-		  int verbose)
+		  enum verify_bundle_flags flags)
 {
 	/*
 	 * Do fast check, then if any prereqs are missing then go line by line
@@ -248,7 +248,7 @@ int verify_bundle(struct repository *r,
 		error("%s %s", oid_to_hex(oid), name);
 	}
 
-	if (verbose) {
+	if (flags & VERIFY_BUNDLE_VERBOSE) {
 		struct string_list *r;
 
 		r = &header->references;
@@ -617,7 +617,8 @@ err:
 }
 
 int unbundle(struct repository *r, struct bundle_header *header,
-	     int bundle_fd, struct strvec *extra_index_pack_args)
+	     int bundle_fd, struct strvec *extra_index_pack_args,
+	     enum verify_bundle_flags flags)
 {
 	struct child_process ip = CHILD_PROCESS_INIT;
 	strvec_pushl(&ip.args, "index-pack", "--fix-thin", "--stdin", NULL);
@@ -631,7 +632,7 @@ int unbundle(struct repository *r, struct bundle_header *header,
 		strvec_clear(extra_index_pack_args);
 	}
 
-	if (verify_bundle(r, header, 0))
+	if (verify_bundle(r, header, flags))
 		return -1;
 	ip.in = bundle_fd;
 	ip.no_stdout = 1;
diff --git a/bundle.h b/bundle.h
index 0c052f5496..6652e81998 100644
--- a/bundle.h
+++ b/bundle.h
@@ -29,7 +29,13 @@ int read_bundle_header_fd(int fd, struct bundle_header *header,
 int create_bundle(struct repository *r, const char *path,
 		  int argc, const char **argv, struct strvec *pack_options,
 		  int version);
-int verify_bundle(struct repository *r, struct bundle_header *header, int verbose);
+
+enum verify_bundle_flags {
+	VERIFY_BUNDLE_VERBOSE = (1 << 0),
+};
+
+int verify_bundle(struct repository *r, struct bundle_header *header,
+		  enum verify_bundle_flags flags);
 
 /**
  * Unbundle after reading the header with read_bundle_header().
@@ -40,9 +46,13 @@ int verify_bundle(struct repository *r, struct bundle_header *header, int verbos
  * Provide "extra_index_pack_args" to pass any extra arguments
  * (e.g. "-v" for verbose/progress), NULL otherwise. The provided
  * "extra_index_pack_args" (if any) will be strvec_clear()'d for you.
+ *
+ * Before unbundling, this method will call verify_bundle() with the
+ * given 'flags'.
  */
 int unbundle(struct repository *r, struct bundle_header *header,
-	     int bundle_fd, struct strvec *extra_index_pack_args);
+	     int bundle_fd, struct strvec *extra_index_pack_args,
+	     enum verify_bundle_flags flags);
 int list_bundle_refs(struct bundle_header *header,
 		int argc, const char **argv);
 
diff --git a/transport.c b/transport.c
index b51e991e44..de4d88687c 100644
--- a/transport.c
+++ b/transport.c
@@ -178,7 +178,7 @@ static int fetch_refs_from_bundle(struct transport *transport,
 	if (!data->get_refs_from_bundle_called)
 		get_refs_from_bundle_inner(transport);
 	ret = unbundle(the_repository, &data->header, data->fd,
-		       &extra_index_pack_args);
+		       &extra_index_pack_args, 0);
 	transport->hash_algo = data->header.hash_algo;
 	return ret;
 }

From 70334fc3ebf1c6199014d82bbbf0595b64a8fa90 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <derrickstolee@github.com>
Date: Wed, 12 Oct 2022 12:52:38 +0000
Subject: [PATCH 11/12] bundle-uri: quiet failed unbundlings

When downloading a list of bundles in "all" mode, Git has no
understanding of the dependencies between the bundles. Git attempts to
unbundle the bundles in some order, but some may not pass the
verify_bundle() step because of missing prerequisites. This is passed as
error messages to the user, even when they eventually succeed in later
attempts after their dependent bundles are unbundled.

Add a new VERIFY_BUNDLE_QUIET flag to verify_bundle() that avoids the
error messages from the missing prerequisite commits. The method still
returns the number of missing prerequisit commits, allowing callers to
unbundle() to notice that the bundle failed to apply.

Use this flag in bundle-uri.c and test that the messages go away for
'git clone --bundle-uri' commands.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/bundle.c            |  2 +-
 bundle-uri.c                |  3 ++-
 bundle.c                    | 10 ++++++++--
 bundle.h                    |  1 +
 t/t5558-clone-bundle-uri.sh | 25 ++++++++++++++++++++-----
 5 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/builtin/bundle.c b/builtin/bundle.c
index 7d983a238f..fd4586b09e 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -120,7 +120,7 @@ static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) {
 	}
 	close(bundle_fd);
 	if (verify_bundle(the_repository, &header,
-			  quiet ? 0 : VERIFY_BUNDLE_VERBOSE)) {
+			  quiet ? VERIFY_BUNDLE_QUIET : VERIFY_BUNDLE_VERBOSE)) {
 		ret = 1;
 		goto cleanup;
 	}
diff --git a/bundle-uri.c b/bundle-uri.c
index d9060be707..d872acf5ab 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -308,7 +308,8 @@ static int unbundle_from_file(struct repository *r, const char *file)
 	 * a reachable ref pointing to the new tips, which will reach
 	 * the prerequisite commits.
 	 */
-	if ((result = unbundle(r, &header, bundle_fd, NULL, 0)))
+	if ((result = unbundle(r, &header, bundle_fd, NULL,
+			       VERIFY_BUNDLE_QUIET)))
 		return 1;
 
 	/*
diff --git a/bundle.c b/bundle.c
index 1f6a7f782e..4ef7256aa1 100644
--- a/bundle.c
+++ b/bundle.c
@@ -216,7 +216,10 @@ int verify_bundle(struct repository *r,
 			add_pending_object(&revs, o, name);
 			continue;
 		}
-		if (++ret == 1)
+		ret++;
+		if (flags & VERIFY_BUNDLE_QUIET)
+			continue;
+		if (ret == 1)
 			error("%s", message);
 		error("%s %s", oid_to_hex(oid), name);
 	}
@@ -243,7 +246,10 @@ int verify_bundle(struct repository *r,
 		assert(o); /* otherwise we'd have returned early */
 		if (o->flags & SHOWN)
 			continue;
-		if (++ret == 1)
+		ret++;
+		if (flags & VERIFY_BUNDLE_QUIET)
+			continue;
+		if (ret == 1)
 			error("%s", message);
 		error("%s %s", oid_to_hex(oid), name);
 	}
diff --git a/bundle.h b/bundle.h
index 6652e81998..575c34245d 100644
--- a/bundle.h
+++ b/bundle.h
@@ -32,6 +32,7 @@ int create_bundle(struct repository *r, const char *path,
 
 enum verify_bundle_flags {
 	VERIFY_BUNDLE_VERBOSE = (1 << 0),
+	VERIFY_BUNDLE_QUIET = (1 << 1),
 };
 
 int verify_bundle(struct repository *r, struct bundle_header *header,
diff --git a/t/t5558-clone-bundle-uri.sh b/t/t5558-clone-bundle-uri.sh
index a86dc04f52..9b15907838 100755
--- a/t/t5558-clone-bundle-uri.sh
+++ b/t/t5558-clone-bundle-uri.sh
@@ -99,7 +99,10 @@ test_expect_success 'clone bundle list (file, no heuristic)' '
 		uri = file://$(pwd)/clone-from/bundle-4.bundle
 	EOF
 
-	git clone --bundle-uri="file://$(pwd)/bundle-list" clone-from clone-list-file &&
+	git clone --bundle-uri="file://$(pwd)/bundle-list" \
+		clone-from clone-list-file 2>err &&
+	! grep "Repository lacks these prerequisite commits" err &&
+
 	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
 	git -C clone-list-file cat-file --batch-check <oids &&
 
@@ -141,7 +144,10 @@ test_expect_success 'clone bundle list (file, all mode, some failures)' '
 	EOF
 
 	GIT_TRACE2_PERF=1 \
-	git clone --bundle-uri="file://$(pwd)/bundle-list" clone-from clone-all-some &&
+	git clone --bundle-uri="file://$(pwd)/bundle-list" \
+		clone-from clone-all-some 2>err &&
+	! grep "Repository lacks these prerequisite commits" err &&
+
 	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
 	git -C clone-all-some cat-file --batch-check <oids &&
 
@@ -169,7 +175,10 @@ test_expect_success 'clone bundle list (file, all mode, all failures)' '
 		uri = file://$(pwd)/clone-from/bundle-5.bundle
 	EOF
 
-	git clone --bundle-uri="file://$(pwd)/bundle-list" clone-from clone-all-fail &&
+	git clone --bundle-uri="file://$(pwd)/bundle-list" \
+		clone-from clone-all-fail 2>err &&
+	! grep "Repository lacks these prerequisite commits" err &&
+
 	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
 	git -C clone-all-fail cat-file --batch-check <oids &&
 
@@ -195,7 +204,10 @@ test_expect_success 'clone bundle list (file, any mode)' '
 		uri = file://$(pwd)/clone-from/bundle-5.bundle
 	EOF
 
-	git clone --bundle-uri="file://$(pwd)/bundle-list" clone-from clone-any-file &&
+	git clone --bundle-uri="file://$(pwd)/bundle-list" \
+		clone-from clone-any-file 2>err &&
+	! grep "Repository lacks these prerequisite commits" err &&
+
 	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
 	git -C clone-any-file cat-file --batch-check <oids &&
 
@@ -284,7 +296,10 @@ test_expect_success 'clone bundle list (HTTP, no heuristic)' '
 		uri = $HTTPD_URL/bundle-4.bundle
 	EOF
 
-	git clone --bundle-uri="$HTTPD_URL/bundle-list" clone-from clone-list-http &&
+	git clone --bundle-uri="$HTTPD_URL/bundle-list" \
+		clone-from clone-list-http  2>err &&
+	! grep "Repository lacks these prerequisite commits" err &&
+
 	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
 	git -C clone-list-http cat-file --batch-check <oids
 '

From 8628a842bddda7723ad7548b7f6d141123a164a0 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <derrickstolee@github.com>
Date: Wed, 12 Oct 2022 12:52:39 +0000
Subject: [PATCH 12/12] bundle-uri: suppress stderr from remote-https

When downloading bundles from a git-remote-https subprocess, the bundle
URI logic wants to be opportunistic and download as much as possible and
work with what did succeed. This is particularly important in the "any"
mode, where any single bundle success will work.

If the URI is not available, the git-remote-https process will die()
with a "fatal:" error message, even though that error is not actually
fatal to the super process. Since stderr is passed through, it looks
like a fatal error to the user.

Suppress stderr to avoid these errors from bubbling to the surface. The
bundle URI API adds its own warning() messages on these failures.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 bundle-uri.c                |  1 +
 t/t5558-clone-bundle-uri.sh | 16 ++++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/bundle-uri.c b/bundle-uri.c
index d872acf5ab..79a914f961 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -230,6 +230,7 @@ static int download_https_uri_to_file(const char *file, const char *uri)
 	int found_get = 0;
 
 	strvec_pushl(&cp.args, "git-remote-https", uri, NULL);
+	cp.err = -1;
 	cp.in = -1;
 	cp.out = -1;
 
diff --git a/t/t5558-clone-bundle-uri.sh b/t/t5558-clone-bundle-uri.sh
index 9b15907838..9155f31fa2 100755
--- a/t/t5558-clone-bundle-uri.sh
+++ b/t/t5558-clone-bundle-uri.sh
@@ -147,6 +147,8 @@ test_expect_success 'clone bundle list (file, all mode, some failures)' '
 	git clone --bundle-uri="file://$(pwd)/bundle-list" \
 		clone-from clone-all-some 2>err &&
 	! grep "Repository lacks these prerequisite commits" err &&
+	! grep "fatal" err &&
+	grep "warning: failed to download bundle from URI" err &&
 
 	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
 	git -C clone-all-some cat-file --batch-check <oids &&
@@ -178,6 +180,8 @@ test_expect_success 'clone bundle list (file, all mode, all failures)' '
 	git clone --bundle-uri="file://$(pwd)/bundle-list" \
 		clone-from clone-all-fail 2>err &&
 	! grep "Repository lacks these prerequisite commits" err &&
+	! grep "fatal" err &&
+	grep "warning: failed to download bundle from URI" err &&
 
 	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
 	git -C clone-all-fail cat-file --batch-check <oids &&
@@ -234,7 +238,11 @@ test_expect_success 'clone bundle list (file, any mode, all failures)' '
 		uri = $HTTPD_URL/bundle-5.bundle
 	EOF
 
-	git clone --bundle-uri="file://$(pwd)/bundle-list" clone-from clone-any-fail &&
+	git clone --bundle-uri="file://$(pwd)/bundle-list" \
+		clone-from clone-any-fail 2>err &&
+	! grep "fatal" err &&
+	grep "warning: failed to download bundle from URI" err &&
+
 	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
 	git -C clone-any-fail cat-file --batch-check <oids &&
 
@@ -323,7 +331,11 @@ test_expect_success 'clone bundle list (HTTP, any mode)' '
 		uri = $HTTPD_URL/bundle-5.bundle
 	EOF
 
-	git clone --bundle-uri="$HTTPD_URL/bundle-list" clone-from clone-any-http &&
+	git clone --bundle-uri="$HTTPD_URL/bundle-list" \
+		clone-from clone-any-http 2>err &&
+	! grep "fatal" err &&
+	grep "warning: failed to download bundle from URI" err &&
+
 	git -C clone-from for-each-ref --format="%(objectname)" >oids &&
 	git -C clone-any-http cat-file --batch-check <oids &&