diff --git a/fsck.c b/fsck.c index c2c8facd2d..2b18717ee8 100644 --- a/fsck.c +++ b/fsck.c @@ -748,6 +748,23 @@ static int fsck_tree(const struct object_id *tree_oid, return retval; } +/* + * Confirm that the headers of a commit or tag object end in a reasonable way, + * either with the usual "\n\n" separator, or at least with a trailing newline + * on the final header line. + * + * This property is important for the memory safety of our callers. It allows + * them to scan the buffer linewise without constantly checking the remaining + * size as long as: + * + * - they check that there are bytes left in the buffer at the start of any + * line (i.e., that the last newline they saw was not the final one we + * found here) + * + * - any intra-line scanning they do will stop at a newline, which will worst + * case hit the newline we found here as the end-of-header. This makes it + * OK for them to use helpers like parse_oid_hex(), or even skip_prefix(). + */ static int verify_headers(const void *data, unsigned long size, const struct object_id *oid, enum object_type type, struct fsck_options *options) @@ -808,6 +825,20 @@ static int fsck_ident(const char **ident, if (*p != ' ') return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_DATE, "invalid author/committer line - missing space before date"); p++; + /* + * Our timestamp parser is based on the C strto*() functions, which + * will happily eat whitespace, including the newline that is supposed + * to prevent us walking past the end of the buffer. So do our own + * scan, skipping linear whitespace but not newlines, and then + * confirming we found a digit. We _could_ be even more strict here, + * as we really expect only a single space, but since we have + * traditionally allowed extra whitespace, we'll continue to do so. + */ + while (*p == ' ' || *p == '\t') + p++; + if (!isdigit(*p)) + return report(options, oid, type, FSCK_MSG_BAD_DATE, + "invalid author/committer line - bad date"); if (*p == '0' && p[1] != ' ') return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date"); if (date_overflows(parse_timestamp(p, &end, 10))) @@ -834,12 +865,18 @@ static int fsck_commit(const struct object_id *oid, unsigned author_count; int err; const char *buffer_begin = buffer; + const char *buffer_end = buffer + size; const char *p; + /* + * We _must_ stop parsing immediately if this reports failure, as the + * memory safety of the rest of the function depends on it. See the + * comment above the definition of verify_headers() for more details. + */ if (verify_headers(buffer, size, oid, OBJ_COMMIT, options)) return -1; - if (!skip_prefix(buffer, "tree ", &buffer)) + if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer)) return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line"); if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') { err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1"); @@ -847,7 +884,7 @@ static int fsck_commit(const struct object_id *oid, return err; } buffer = p + 1; - while (skip_prefix(buffer, "parent ", &buffer)) { + while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) { if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') { err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1"); if (err) @@ -856,7 +893,7 @@ static int fsck_commit(const struct object_id *oid, buffer = p + 1; } author_count = 0; - while (skip_prefix(buffer, "author ", &buffer)) { + while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) { author_count++; err = fsck_ident(&buffer, oid, OBJ_COMMIT, options); if (err) @@ -868,7 +905,7 @@ static int fsck_commit(const struct object_id *oid, err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines"); if (err) return err; - if (!skip_prefix(buffer, "committer ", &buffer)) + if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer)) return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line"); err = fsck_ident(&buffer, oid, OBJ_COMMIT, options); if (err) @@ -899,13 +936,19 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, int ret = 0; char *eol; struct strbuf sb = STRBUF_INIT; + const char *buffer_end = buffer + size; const char *p; + /* + * We _must_ stop parsing immediately if this reports failure, as the + * memory safety of the rest of the function depends on it. See the + * comment above the definition of verify_headers() for more details. + */ ret = verify_headers(buffer, size, oid, OBJ_TAG, options); if (ret) goto done; - if (!skip_prefix(buffer, "object ", &buffer)) { + if (buffer >= buffer_end || !skip_prefix(buffer, "object ", &buffer)) { ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_OBJECT, "invalid format - expected 'object' line"); goto done; } @@ -916,11 +959,11 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, } buffer = p + 1; - if (!skip_prefix(buffer, "type ", &buffer)) { + if (buffer >= buffer_end || !skip_prefix(buffer, "type ", &buffer)) { ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE_ENTRY, "invalid format - expected 'type' line"); goto done; } - eol = strchr(buffer, '\n'); + eol = memchr(buffer, '\n', buffer_end - buffer); if (!eol) { ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE, "invalid format - unexpected end after 'type' line"); goto done; @@ -932,11 +975,11 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, goto done; buffer = eol + 1; - if (!skip_prefix(buffer, "tag ", &buffer)) { + if (buffer >= buffer_end || !skip_prefix(buffer, "tag ", &buffer)) { ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG_ENTRY, "invalid format - expected 'tag' line"); goto done; } - eol = strchr(buffer, '\n'); + eol = memchr(buffer, '\n', buffer_end - buffer); if (!eol) { ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG, "invalid format - unexpected end after 'type' line"); goto done; @@ -952,7 +995,7 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, } buffer = eol + 1; - if (!skip_prefix(buffer, "tagger ", &buffer)) { + if (buffer >= buffer_end || !skip_prefix(buffer, "tagger ", &buffer)) { /* early tags do not contain 'tagger' lines; warn only */ ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAGGER_ENTRY, "invalid format - expected 'tagger' line"); if (ret) @@ -960,10 +1003,8 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, } else ret = fsck_ident(&buffer, oid, OBJ_TAG, options); - if (!*buffer) - goto done; - if (!starts_with(buffer, "\n")) { + if (buffer < buffer_end && !starts_with(buffer, "\n")) { /* * The verify_headers() check will allow * e.g. "[...]tagger \nsome diff --git a/t/t1451-fsck-buffer.sh b/t/t1451-fsck-buffer.sh new file mode 100755 index 0000000000..9ac270abab --- /dev/null +++ b/t/t1451-fsck-buffer.sh @@ -0,0 +1,140 @@ +#!/bin/sh + +test_description='fsck on buffers without NUL termination + +The goal here is to make sure that the various fsck parsers never look +past the end of the buffer they are given, even when encountering broken +or truncated objects. + +We have to use "hash-object" for this because most code paths that read objects +append an extra NUL for safety after the buffer. But hash-object, since it is +reading straight from a file (and possibly even mmap-ing it) cannot always do +so. + +These tests _might_ catch such overruns in normal use, but should be run with +ASan or valgrind for more confidence. +' +. ./test-lib.sh + +# the general idea for tags and commits is to build up the "base" file +# progressively, and then test new truncations on top of it. +reset () { + test_expect_success 'reset input to empty' ' + >base + ' +} + +add () { + content="$1" + type=${content%% *} + test_expect_success "add $type line" ' + echo "$content" >>base + ' +} + +check () { + type=$1 + fsck=$2 + content=$3 + test_expect_success "truncated $type ($fsck, \"$content\")" ' + # do not pipe into hash-object here; we want to increase + # the chance that it uses a fixed-size buffer or mmap, + # and a pipe would be read into a strbuf. + { + cat base && + echo "$content" + } >input && + test_must_fail git hash-object -t "$type" input 2>err && + grep "$fsck" err + ' +} + +test_expect_success 'create valid objects' ' + git commit --allow-empty -m foo && + commit=$(git rev-parse --verify HEAD) && + tree=$(git rev-parse --verify HEAD^{tree}) +' + +reset +check commit missingTree "" +check commit missingTree "tr" +check commit missingTree "tree" +check commit badTreeSha1 "tree " +check commit badTreeSha1 "tree 1234" +add "tree $tree" + +# these expect missingAuthor because "parent" is optional +check commit missingAuthor "" +check commit missingAuthor "par" +check commit missingAuthor "parent" +check commit badParentSha1 "parent " +check commit badParentSha1 "parent 1234" +add "parent $commit" + +check commit missingAuthor "" +check commit missingAuthor "au" +check commit missingAuthor "author" +ident_checks () { + check $1 missingEmail "$2 " + check $1 missingEmail "$2 name" + check $1 badEmail "$2 name <" + check $1 badEmail "$2 name " + check $1 badDate "$2 name " + check $1 badDate "$2 name 1234" + check $1 badTimezone "$2 name 1234 " + check $1 badTimezone "$2 name 1234 +" +} +ident_checks commit author +add "author name 1234 +0000" + +check commit missingCommitter "" +check commit missingCommitter "co" +check commit missingCommitter "committer" +ident_checks commit committer +add "committer name 1234 +0000" + +reset +check tag missingObject "" +check tag missingObject "obj" +check tag missingObject "object" +check tag badObjectSha1 "object " +check tag badObjectSha1 "object 1234" +add "object $commit" + +check tag missingType "" +check tag missingType "ty" +check tag missingType "type" +check tag badType "type " +check tag badType "type com" +add "type commit" + +check tag missingTagEntry "" +check tag missingTagEntry "ta" +check tag missingTagEntry "tag" +check tag badTagName "tag " +add "tag foo" + +check tag missingTagger "" +check tag missingTagger "ta" +check tag missingTagger "tagger" +ident_checks tag tagger + +# trees are a binary format and can't use our earlier helpers +test_expect_success 'truncated tree (short hash)' ' + printf "100644 foo\0\1\1\1\1" >input && + test_must_fail git hash-object -t tree input 2>err && + grep badTree err +' + +test_expect_success 'truncated tree (missing nul)' ' + # these two things are indistinguishable to the parser. The important + # thing about this is example is that there are enough bytes to + # make up a hash, and that there is no NUL (and we confirm that the + # parser does not walk past the end of the buffer). + printf "100644 a long filename, or a hash with missing nul?" >input && + test_must_fail git hash-object -t tree input 2>err && + grep badTree err +' + +test_done