From c6909f9959d394db8b76f08a6e59e5a82dade07a Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Thu, 18 Apr 2019 17:17:02 -0400
Subject: [PATCH 1/3] untracked-cache: be defensive about missing NULs in index

The on-disk format for the untracked-cache extension contains
NUL-terminated filenames. We parse these from the mmap'd file using
string functions like strlen(). This works fine in the normal case, but
if we see a malformed or corrupted index, we might read off the end of
our mmap.

Instead, let's use memchr() to find the trailing NUL within the bytes we
know are available, and return an error if it's missing.

Note that we can further simplify by folding another range check into
our conditional. After we find the end of the string, we set "next" to
the byte after the string and treat it as an error if there are no such
bytes left. That saves us from having to do a range check at the
beginning of each subsequent string (and works because there is always
data after each string). We can do both range checks together by
checking "!eos" (we didn't find a NUL) and "eos == end" (it was on the
last available byte, meaning there's nothing after). This replaces the
existing "next > end" checks.

Note also that the decode_varint() calls have a similar problem (we
don't even pass them "end"; they just keep parsing). These are probably
OK in practice since varints have a finite length (we stop parsing when
we'd overflow a uintmax_t), so the worst case is that we'd overflow into
reading the trailing bytes of the index.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/dir.c b/dir.c
index f5293a6536..7b0513c476 100644
--- a/dir.c
+++ b/dir.c
@@ -2733,6 +2733,7 @@ static int read_one_dir(struct untracked_cache_dir **untracked_,
 {
 	struct untracked_cache_dir ud, *untracked;
 	const unsigned char *next, *data = rd->data, *end = rd->end;
+	const unsigned char *eos;
 	unsigned int value;
 	int i, len;
 
@@ -2756,21 +2757,24 @@ static int read_one_dir(struct untracked_cache_dir **untracked_,
 	ALLOC_ARRAY(ud.dirs, ud.dirs_nr);
 	data = next;
 
-	len = strlen((const char *)data);
-	next = data + len + 1;
-	if (next > rd->end)
+	eos = memchr(data, '\0', end - data);
+	if (!eos || eos == end)
 		return -1;
+	len = eos - data;
+	next = eos + 1;
+
 	*untracked_ = untracked = xmalloc(st_add3(sizeof(*untracked), len, 1));
 	memcpy(untracked, &ud, sizeof(ud));
 	memcpy(untracked->name, data, len + 1);
 	data = next;
 
 	for (i = 0; i < untracked->untracked_nr; i++) {
-		len = strlen((const char *)data);
-		next = data + len + 1;
-		if (next > rd->end)
+		eos = memchr(data, '\0', end - data);
+		if (!eos || eos == end)
 			return -1;
-		untracked->untracked[i] = xstrdup((const char*)data);
+		len = eos - data;
+		next = eos + 1;
+		untracked->untracked[i] = xmemdupz(data, len);
 		data = next;
 	}
 

From b511d6d569ce8baee888700ebd12f82d991a5250 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Thu, 18 Apr 2019 17:17:38 -0400
Subject: [PATCH 2/3] untracked-cache: simplify parsing by dropping "next"

When we parse an on-disk untracked cache, we have two pointers, "data"
and "next". As we parse, we point "next" to the end of an element, and
then later update "data" to match.

But we actually don't need two pointers. Each parsing step can just
update "data" directly from other variables we hold (and we don't have
to worry about bailing in an intermediate state, since any parsing
failure causes us to immediately discard "data" and return).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/dir.c b/dir.c
index 7b0513c476..17865f44df 100644
--- a/dir.c
+++ b/dir.c
@@ -2732,50 +2732,44 @@ static int read_one_dir(struct untracked_cache_dir **untracked_,
 			struct read_data *rd)
 {
 	struct untracked_cache_dir ud, *untracked;
-	const unsigned char *next, *data = rd->data, *end = rd->end;
+	const unsigned char *data = rd->data, *end = rd->end;
 	const unsigned char *eos;
 	unsigned int value;
 	int i, len;
 
 	memset(&ud, 0, sizeof(ud));
 
-	next = data;
-	value = decode_varint(&next);
-	if (next > end)
+	value = decode_varint(&data);
+	if (data > end)
 		return -1;
 	ud.recurse	   = 1;
 	ud.untracked_alloc = value;
 	ud.untracked_nr	   = value;
 	if (ud.untracked_nr)
 		ALLOC_ARRAY(ud.untracked, ud.untracked_nr);
-	data = next;
 
-	next = data;
-	ud.dirs_alloc = ud.dirs_nr = decode_varint(&next);
-	if (next > end)
+	ud.dirs_alloc = ud.dirs_nr = decode_varint(&data);
+	if (data > end)
 		return -1;
 	ALLOC_ARRAY(ud.dirs, ud.dirs_nr);
-	data = next;
 
 	eos = memchr(data, '\0', end - data);
 	if (!eos || eos == end)
 		return -1;
 	len = eos - data;
-	next = eos + 1;
 
 	*untracked_ = untracked = xmalloc(st_add3(sizeof(*untracked), len, 1));
 	memcpy(untracked, &ud, sizeof(ud));
 	memcpy(untracked->name, data, len + 1);
-	data = next;
+	data = eos + 1;
 
 	for (i = 0; i < untracked->untracked_nr; i++) {
 		eos = memchr(data, '\0', end - data);
 		if (!eos || eos == end)
 			return -1;
 		len = eos - data;
-		next = eos + 1;
 		untracked->untracked[i] = xmemdupz(data, len);
-		data = next;
+		data = eos + 1;
 	}
 
 	rd->ucd[rd->index++] = untracked;

From 08bf354de71a806bad319ec236740ac698b58a5b Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Thu, 18 Apr 2019 17:18:35 -0400
Subject: [PATCH 3/3] untracked-cache: simplify parsing by dropping "len"

The code which parses untracked-cache extensions from disk keeps a "len"
variable, which is the size of the string we are parsing. But since we
now have an "end of string" variable, we can just use that to get the
length when we need it. This eliminates the need to keep "len" up to
date (and removes the possibility of any errors where "len" and "eos"
get out of sync).

As a bonus, it means we are not storing a string length in an "int",
which is a potential source of overflows (though in this case it seems
fairly unlikely for that to cause any memory problems).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/dir.c b/dir.c
index 17865f44df..60438b2cdc 100644
--- a/dir.c
+++ b/dir.c
@@ -2735,7 +2735,7 @@ static int read_one_dir(struct untracked_cache_dir **untracked_,
 	const unsigned char *data = rd->data, *end = rd->end;
 	const unsigned char *eos;
 	unsigned int value;
-	int i, len;
+	int i;
 
 	memset(&ud, 0, sizeof(ud));
 
@@ -2756,19 +2756,17 @@ static int read_one_dir(struct untracked_cache_dir **untracked_,
 	eos = memchr(data, '\0', end - data);
 	if (!eos || eos == end)
 		return -1;
-	len = eos - data;
 
-	*untracked_ = untracked = xmalloc(st_add3(sizeof(*untracked), len, 1));
+	*untracked_ = untracked = xmalloc(st_add3(sizeof(*untracked), eos - data, 1));
 	memcpy(untracked, &ud, sizeof(ud));
-	memcpy(untracked->name, data, len + 1);
+	memcpy(untracked->name, data, eos - data + 1);
 	data = eos + 1;
 
 	for (i = 0; i < untracked->untracked_nr; i++) {
 		eos = memchr(data, '\0', end - data);
 		if (!eos || eos == end)
 			return -1;
-		len = eos - data;
-		untracked->untracked[i] = xmemdupz(data, len);
+		untracked->untracked[i] = xmemdupz(data, eos - data);
 		data = eos + 1;
 	}
 
@@ -2776,8 +2774,7 @@ static int read_one_dir(struct untracked_cache_dir **untracked_,
 	rd->data = data;
 
 	for (i = 0; i < untracked->dirs_nr; i++) {
-		len = read_one_dir(untracked->dirs + i, rd);
-		if (len < 0)
+		if (read_one_dir(untracked->dirs + i, rd) < 0)
 			return -1;
 	}
 	return 0;