From 947208b725188eb499625ebc5c6e43d54c97e4fc Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:11 -0400 Subject: [PATCH 1/6] setup_traverse_info(): stop copying oid We assume that if setup_traverse_info() is passed a non-empty "base" string, that string is pointing into a tree object and we can read the object oid by skipping past the trailing NUL. As it turns out, this is not true for either of the two calls, and we may end up reading garbage bytes: 1. In git-merge-tree, our base string is either empty (in which case we'd never run this code), or it comes from our traverse_path() helper. The latter overallocates a buffer by the_hash_algo->rawsz bytes, but then fills it with only make_traverse_path(), leaving those extra bytes uninitialized (but part of a legitimate heap buffer). 2. In unpack_trees(), we pass o->prefix, which is some arbitrary string from the caller. In "git read-tree --prefix=foo", for instance, it will point to the command-line parameter, and we'll read 20 bytes past the end of the string. Interestingly, tools like ASan do not detect (2) because the process argv is part of a big pre-allocated buffer. So we're reading trash, but it's trash that's probably part of the next argument, or the environment. You can convince it to fail by putting something like this at the beginning of common-main.c's main() function: { int i; for (i = 0; i < argc; i++) argv[i] = xstrdup_or_null(argv[i]); } That puts the arguments into their own heap buffers, so running: make SANITIZE=address test will find problems when "read-tree --prefix" is used (e.g., in t3030). Doubly interesting, even with the hackery above, this does not fail prior to ea82b2a085 (tree-walk: store object_id in a separate member, 2019-01-15). That commit switched setup_traverse_info() to actually copying the hash, rather than simply pointing to it. That pointer was always pointing to garbage memory, but that commit started actually dereferencing the bytes, which is what triggers ASan. That also implies that nobody actually cares about reading these oid bytes anyway (or at least no path covered by our tests). And manual inspection of the code backs that up (I'll follow this patch with some cleanups that show definitively this is the case, but they're quite invasive, so it's worth doing this fix on its own). So let's drop the bogus hashcpy(), along with the confusing oversizing in merge-tree. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/technical/api-tree-walking.txt | 4 +--- builtin/merge-tree.c | 2 +- tree-walk.c | 4 +--- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/Documentation/technical/api-tree-walking.txt b/Documentation/technical/api-tree-walking.txt index bde18622a8..59d78e0362 100644 --- a/Documentation/technical/api-tree-walking.txt +++ b/Documentation/technical/api-tree-walking.txt @@ -62,9 +62,7 @@ Initializing `setup_traverse_info`:: Initialize a `traverse_info` given the pathname of the tree to start - traversing from. The `base` argument is assumed to be the `path` - member of the `name_entry` being recursed into unless the tree is a - top-level tree in which case the empty string ("") is used. + traversing from. Walking ------- diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index 34ca0258b1..8ac6270836 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -180,7 +180,7 @@ static struct merge_list *create_entry(unsigned stage, unsigned mode, const stru static char *traverse_path(const struct traverse_info *info, const struct name_entry *n) { - char *path = xmallocz(traverse_path_len(info, n) + the_hash_algo->rawsz); + char *path = xmallocz(traverse_path_len(info, n)); return make_traverse_path(path, info, n); } diff --git a/tree-walk.c b/tree-walk.c index ec32a47b2e..ba106152ef 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -177,10 +177,8 @@ void setup_traverse_info(struct traverse_info *info, const char *base) info->pathlen = pathlen ? pathlen + 1 : 0; info->name.path = base; info->name.pathlen = pathlen; - if (pathlen) { - hashcpy(info->name.oid.hash, (const unsigned char *)base + pathlen + 1); + if (pathlen) info->prev = &dummy; - } } char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n) From 9055384710dd8963b125f4f87c24d8f67d9fa24f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:15 -0400 Subject: [PATCH 2/6] tree-walk: drop oid from traverse_info As the previous commit shows, the presence of an oid in each level of the traverse_info is confusing and ultimately not necessary. Let's drop it to make it clear that it will not always be set (as well as convince us that it's unused, and let the compiler catch any merges with other branches that do add new uses). Since the oid is part of name_entry, we'll actually stop embedding a name_entry entirely, and instead just separately hold the pathname, its length, and the mode. This makes the resulting code slightly more verbose as we have to pass those elements around individually. But it also makes it more clear what each code path is going to use (and in most of the paths, we really only care about the pathname itself). A few of these conversions are noisier than they need to be, as they also take the opportunity to rename "len" to "namelen" for clarity (especially where we also have "pathlen" or "ce_len" alongside). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/merge-tree.c | 2 +- cache-tree.c | 2 +- tree-walk.c | 23 ++++++++++--------- tree-walk.h | 8 +++++-- unpack-trees.c | 53 ++++++++++++++++++++++++-------------------- 5 files changed, 49 insertions(+), 39 deletions(-) diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index 8ac6270836..f0e4cfefaa 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -181,7 +181,7 @@ static struct merge_list *create_entry(unsigned stage, unsigned mode, const stru static char *traverse_path(const struct traverse_info *info, const struct name_entry *n) { char *path = xmallocz(traverse_path_len(info, n)); - return make_traverse_path(path, info, n); + return make_traverse_path(path, info, n->path, n->pathlen); } static void resolve(const struct traverse_info *info, struct name_entry *ours, struct name_entry *result) diff --git a/cache-tree.c b/cache-tree.c index b13bfaf71e..badf5669f1 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -713,7 +713,7 @@ static struct cache_tree *find_cache_tree_from_traversal(struct cache_tree *root if (!info->prev) return root; our_parent = find_cache_tree_from_traversal(root, info->prev); - return cache_tree_find(our_parent, info->name.path); + return cache_tree_find(our_parent, info->name); } int cache_tree_matches_traversal(struct cache_tree *root, diff --git a/tree-walk.c b/tree-walk.c index ba106152ef..4610f77383 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -175,27 +175,27 @@ void setup_traverse_info(struct traverse_info *info, const char *base) if (pathlen && base[pathlen-1] == '/') pathlen--; info->pathlen = pathlen ? pathlen + 1 : 0; - info->name.path = base; - info->name.pathlen = pathlen; + info->name = base; + info->namelen = pathlen; if (pathlen) info->prev = &dummy; } -char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n) +char *make_traverse_path(char *path, const struct traverse_info *info, + const char *name, size_t namelen) { - int len = tree_entry_len(n); int pathlen = info->pathlen; - path[pathlen + len] = 0; + path[pathlen + namelen] = 0; for (;;) { - memcpy(path + pathlen, n->path, len); + memcpy(path + pathlen, name, namelen); if (!pathlen) break; path[--pathlen] = '/'; - n = &info->name; - len = tree_entry_len(n); + name = info->name; + namelen = info->namelen; info = info->prev; - pathlen -= len; + pathlen -= namelen; } return path; } @@ -397,12 +397,13 @@ int traverse_trees(struct index_state *istate, if (info->prev) { strbuf_grow(&base, info->pathlen); - make_traverse_path(base.buf, info->prev, &info->name); + make_traverse_path(base.buf, info->prev, info->name, + info->namelen); base.buf[info->pathlen-1] = '/'; strbuf_setlen(&base, info->pathlen); traverse_path = xstrndup(base.buf, info->pathlen); } else { - traverse_path = xstrndup(info->name.path, info->pathlen); + traverse_path = xstrndup(info->name, info->pathlen); } info->traverse_path = traverse_path; for (;;) { diff --git a/tree-walk.h b/tree-walk.h index 161e2400f4..baa2aa62c7 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -56,7 +56,10 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct object_id *tree_oid, c struct traverse_info { const char *traverse_path; struct traverse_info *prev; - struct name_entry name; + const char *name; + size_t namelen; + unsigned mode; + int pathlen; struct pathspec *pathspec; @@ -67,7 +70,8 @@ struct traverse_info { }; int get_tree_entry(const struct object_id *, const char *, struct object_id *, unsigned short *); -char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n); +char *make_traverse_path(char *path, const struct traverse_info *info, + const char *name, size_t namelen); void setup_traverse_info(struct traverse_info *info, const char *base); static inline int traverse_path_len(const struct traverse_info *info, const struct name_entry *n) diff --git a/unpack-trees.c b/unpack-trees.c index 50189909b8..26f971f7ff 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -632,7 +632,7 @@ static int unpack_index_entry(struct cache_entry *ce, return ret; } -static int find_cache_pos(struct traverse_info *, const struct name_entry *); +static int find_cache_pos(struct traverse_info *, const char *p, size_t len); static void restore_cache_bottom(struct traverse_info *info, int bottom) { @@ -651,7 +651,7 @@ static int switch_cache_bottom(struct traverse_info *info) if (o->diff_index_cached) return 0; ret = o->cache_bottom; - pos = find_cache_pos(info->prev, &info->name); + pos = find_cache_pos(info->prev, info->name, info->namelen); if (pos < -1) o->cache_bottom = -2 - pos; @@ -690,7 +690,7 @@ static int index_pos_by_traverse_info(struct name_entry *names, char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */); int pos; - make_traverse_path(name, info, names); + make_traverse_path(name, info, names->path, names->pathlen); name[len++] = '/'; name[len] = '\0'; pos = index_name_pos(o->src_index, name, len); @@ -811,7 +811,9 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, newinfo = *info; newinfo.prev = info; newinfo.pathspec = info->pathspec; - newinfo.name = *p; + newinfo.name = p->path; + newinfo.namelen = p->pathlen; + newinfo.mode = p->mode; newinfo.pathlen += tree_entry_len(p) + 1; newinfo.df_conflicts |= df_conflicts; @@ -863,14 +865,18 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, * itself - the caller needs to do the final check for the cache * entry having more data at the end! */ -static int do_compare_entry_piecewise(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n) +static int do_compare_entry_piecewise(const struct cache_entry *ce, + const struct traverse_info *info, + const char *name, size_t namelen, + unsigned mode) { - int len, pathlen, ce_len; + int pathlen, ce_len; const char *ce_name; if (info->prev) { int cmp = do_compare_entry_piecewise(ce, info->prev, - &info->name); + info->name, info->namelen, + info->mode); if (cmp) return cmp; } @@ -884,15 +890,15 @@ static int do_compare_entry_piecewise(const struct cache_entry *ce, const struct ce_len -= pathlen; ce_name = ce->name + pathlen; - len = tree_entry_len(n); - return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode); + return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode); } static int do_compare_entry(const struct cache_entry *ce, const struct traverse_info *info, - const struct name_entry *n) + const char *name, size_t namelen, + unsigned mode) { - int len, pathlen, ce_len; + int pathlen, ce_len; const char *ce_name; int cmp; @@ -902,7 +908,7 @@ static int do_compare_entry(const struct cache_entry *ce, * it is quicker to use the precomputed version. */ if (!info->traverse_path) - return do_compare_entry_piecewise(ce, info, n); + return do_compare_entry_piecewise(ce, info, name, namelen, mode); cmp = strncmp(ce->name, info->traverse_path, info->pathlen); if (cmp) @@ -917,13 +923,12 @@ static int do_compare_entry(const struct cache_entry *ce, ce_len -= pathlen; ce_name = ce->name + pathlen; - len = tree_entry_len(n); - return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode); + return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode); } static int compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n) { - int cmp = do_compare_entry(ce, info, n); + int cmp = do_compare_entry(ce, info, n->path, n->pathlen, n->mode); if (cmp) return cmp; @@ -939,7 +944,8 @@ static int ce_in_traverse_path(const struct cache_entry *ce, { if (!info->prev) return 1; - if (do_compare_entry(ce, info->prev, &info->name)) + if (do_compare_entry(ce, info->prev, + info->name, info->namelen, info->mode)) return 0; /* * If ce (blob) is the same name as the path (which is a tree @@ -964,7 +970,7 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, ce->ce_flags = create_ce_flags(stage); ce->ce_namelen = len; oidcpy(&ce->oid, &n->oid); - make_traverse_path(ce->name, info, n); + make_traverse_path(ce->name, info, n->path, n->pathlen); return ce; } @@ -1057,13 +1063,12 @@ static int unpack_failed(struct unpack_trees_options *o, const char *message) * the directory. */ static int find_cache_pos(struct traverse_info *info, - const struct name_entry *p) + const char *p, size_t p_len) { int pos; struct unpack_trees_options *o = info->data; struct index_state *index = o->src_index; int pfxlen = info->pathlen; - int p_len = tree_entry_len(p); for (pos = o->cache_bottom; pos < index->cache_nr; pos++) { const struct cache_entry *ce = index->cache[pos]; @@ -1099,7 +1104,7 @@ static int find_cache_pos(struct traverse_info *info, ce_len = ce_slash - ce_name; else ce_len = ce_namelen(ce) - pfxlen; - cmp = name_compare(p->path, p_len, ce_name, ce_len); + cmp = name_compare(p, p_len, ce_name, ce_len); /* * Exact match; if we have a directory we need to * delay returning it. @@ -1114,7 +1119,7 @@ static int find_cache_pos(struct traverse_info *info, * E.g. ce_name == "t-i", and p->path == "t"; we may * have "t/a" in the index. */ - if (p_len < ce_len && !memcmp(ce_name, p->path, p_len) && + if (p_len < ce_len && !memcmp(ce_name, p, p_len) && ce_name[p_len] < '/') continue; /* keep looking */ break; @@ -1125,7 +1130,7 @@ static int find_cache_pos(struct traverse_info *info, static struct cache_entry *find_cache_entry(struct traverse_info *info, const struct name_entry *p) { - int pos = find_cache_pos(info, p); + int pos = find_cache_pos(info, p->path, p->pathlen); struct unpack_trees_options *o = info->data; if (0 <= pos) @@ -1138,10 +1143,10 @@ static void debug_path(struct traverse_info *info) { if (info->prev) { debug_path(info->prev); - if (*info->prev->name.path) + if (*info->prev->name) putchar('/'); } - printf("%s", info->name.path); + printf("%s", info->name); } static void debug_name_entry(int i, struct name_entry *n) From 37806080d7be1ab5b2fa918f6a528652596ea2c1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:18 -0400 Subject: [PATCH 3/6] tree-walk: use size_t consistently We store and manipulate the cumulative traverse_info.pathlen as an "int", which can overflow when we are fed ridiculously long pathnames (e.g., ones at the edge of 2GB or 4GB, even if the individual tree entry names are smaller than that). The results can be confusing, though after some prodding I was not able to use this integer overflow to cause an under-allocated buffer. Let's consistently use size_t to generate and store these, and make sure our addition doesn't overflow. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- tree-walk.c | 4 ++-- tree-walk.h | 6 +++--- unpack-trees.c | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tree-walk.c b/tree-walk.c index 4610f77383..70f9eb5f1b 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -168,7 +168,7 @@ int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry) void setup_traverse_info(struct traverse_info *info, const char *base) { - int pathlen = strlen(base); + size_t pathlen = strlen(base); static struct traverse_info dummy; memset(info, 0, sizeof(*info)); @@ -184,7 +184,7 @@ void setup_traverse_info(struct traverse_info *info, const char *base) char *make_traverse_path(char *path, const struct traverse_info *info, const char *name, size_t namelen) { - int pathlen = info->pathlen; + size_t pathlen = info->pathlen; path[pathlen + namelen] = 0; for (;;) { diff --git a/tree-walk.h b/tree-walk.h index baa2aa62c7..47bf85d282 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -60,7 +60,7 @@ struct traverse_info { size_t namelen; unsigned mode; - int pathlen; + size_t pathlen; struct pathspec *pathspec; unsigned long df_conflicts; @@ -74,9 +74,9 @@ char *make_traverse_path(char *path, const struct traverse_info *info, const char *name, size_t namelen); void setup_traverse_info(struct traverse_info *info, const char *base); -static inline int traverse_path_len(const struct traverse_info *info, const struct name_entry *n) +static inline size_t traverse_path_len(const struct traverse_info *info, const struct name_entry *n) { - return info->pathlen + tree_entry_len(n); + return st_add(info->pathlen, tree_entry_len(n)); } /* in general, positive means "kind of interesting" */ diff --git a/unpack-trees.c b/unpack-trees.c index 26f971f7ff..8dbfb22770 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -686,7 +686,7 @@ static int index_pos_by_traverse_info(struct name_entry *names, struct traverse_info *info) { struct unpack_trees_options *o = info->data; - int len = traverse_path_len(info, names); + size_t len = traverse_path_len(info, names); char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */); int pos; @@ -814,7 +814,7 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, newinfo.name = p->path; newinfo.namelen = p->pathlen; newinfo.mode = p->mode; - newinfo.pathlen += tree_entry_len(p) + 1; + newinfo.pathlen = st_add3(newinfo.pathlen, tree_entry_len(p), 1); newinfo.df_conflicts |= df_conflicts; /* @@ -960,7 +960,7 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, struct index_state *istate, int is_transient) { - int len = traverse_path_len(info, n); + size_t len = traverse_path_len(info, n); struct cache_entry *ce = is_transient ? make_empty_transient_cache_entry(len) : From b3b3cbcbf246b1051ad453bc02e24a89573e2911 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:20 -0400 Subject: [PATCH 4/6] tree-walk: accept a raw length for traverse_path_len() We take a "struct name_entry", but only care about the length of the path name. Let's just take that length directly, making it easier to use the function from callers that sometimes do not have a name_entry at all. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/merge-tree.c | 2 +- tree-walk.h | 5 +++-- unpack-trees.c | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index f0e4cfefaa..0629c87b19 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -180,7 +180,7 @@ static struct merge_list *create_entry(unsigned stage, unsigned mode, const stru static char *traverse_path(const struct traverse_info *info, const struct name_entry *n) { - char *path = xmallocz(traverse_path_len(info, n)); + char *path = xmallocz(traverse_path_len(info, tree_entry_len(n))); return make_traverse_path(path, info, n->path, n->pathlen); } diff --git a/tree-walk.h b/tree-walk.h index 47bf85d282..a25c751c1e 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -74,9 +74,10 @@ char *make_traverse_path(char *path, const struct traverse_info *info, const char *name, size_t namelen); void setup_traverse_info(struct traverse_info *info, const char *base); -static inline size_t traverse_path_len(const struct traverse_info *info, const struct name_entry *n) +static inline size_t traverse_path_len(const struct traverse_info *info, + size_t namelen) { - return st_add(info->pathlen, tree_entry_len(n)); + return st_add(info->pathlen, namelen); } /* in general, positive means "kind of interesting" */ diff --git a/unpack-trees.c b/unpack-trees.c index 8dbfb22770..492eff666a 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -686,7 +686,7 @@ static int index_pos_by_traverse_info(struct name_entry *names, struct traverse_info *info) { struct unpack_trees_options *o = info->data; - size_t len = traverse_path_len(info, names); + size_t len = traverse_path_len(info, tree_entry_len(names)); char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */); int pos; @@ -936,7 +936,7 @@ static int compare_entry(const struct cache_entry *ce, const struct traverse_inf * Even if the beginning compared identically, the ce should * compare as bigger than a directory leading up to it! */ - return ce_namelen(ce) > traverse_path_len(info, n); + return ce_namelen(ce) > traverse_path_len(info, tree_entry_len(n)); } static int ce_in_traverse_path(const struct cache_entry *ce, @@ -960,7 +960,7 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, struct index_state *istate, int is_transient) { - size_t len = traverse_path_len(info, n); + size_t len = traverse_path_len(info, tree_entry_len(n)); struct cache_entry *ce = is_transient ? make_empty_transient_cache_entry(len) : From c43ab062598d0299ea6e0d115a6018189a7793bf Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:23 -0400 Subject: [PATCH 5/6] tree-walk: add a strbuf wrapper for make_traverse_path() All but one of the callers of make_traverse_path() allocate a new heap buffer to store the path. Let's give them an easy way to write to a strbuf, which saves them from computing the length themselves (which is especially tricky when they want to add to the path). It will also make it easier for us to change the make_traverse_path() interface in a future patch to improve its bounds-checking. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/technical/api-tree-walking.txt | 4 ++++ builtin/merge-tree.c | 5 +++-- tree-walk.c | 21 ++++++++++++++------ tree-walk.h | 3 +++ unpack-trees.c | 16 +++++++-------- 5 files changed, 32 insertions(+), 17 deletions(-) diff --git a/Documentation/technical/api-tree-walking.txt b/Documentation/technical/api-tree-walking.txt index 59d78e0362..7962e32854 100644 --- a/Documentation/technical/api-tree-walking.txt +++ b/Documentation/technical/api-tree-walking.txt @@ -138,6 +138,10 @@ same in the next callback invocation. This utilizes the memory structure of a tree entry to avoid the overhead of using a generic strlen(). +`strbuf_make_traverse_path`:: + + Convenience wrapper to `make_traverse_path` into a strbuf. + Authors ------- diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index 0629c87b19..87d949cf88 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -180,8 +180,9 @@ static struct merge_list *create_entry(unsigned stage, unsigned mode, const stru static char *traverse_path(const struct traverse_info *info, const struct name_entry *n) { - char *path = xmallocz(traverse_path_len(info, tree_entry_len(n))); - return make_traverse_path(path, info, n->path, n->pathlen); + struct strbuf buf = STRBUF_INIT; + strbuf_make_traverse_path(&buf, info, n->path, n->pathlen); + return strbuf_detach(&buf, NULL); } static void resolve(const struct traverse_info *info, struct name_entry *ours, struct name_entry *result) diff --git a/tree-walk.c b/tree-walk.c index 70f9eb5f1b..c2952f3793 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -200,6 +200,17 @@ char *make_traverse_path(char *path, const struct traverse_info *info, return path; } +void strbuf_make_traverse_path(struct strbuf *out, + const struct traverse_info *info, + const char *name, size_t namelen) +{ + size_t len = traverse_path_len(info, namelen); + + strbuf_grow(out, len); + make_traverse_path(out->buf + out->len, info, name, namelen); + strbuf_setlen(out, out->len + len); +} + struct tree_desc_skip { struct tree_desc_skip *prev; const void *ptr; @@ -396,12 +407,10 @@ int traverse_trees(struct index_state *istate, tx[i].d = t[i]; if (info->prev) { - strbuf_grow(&base, info->pathlen); - make_traverse_path(base.buf, info->prev, info->name, - info->namelen); - base.buf[info->pathlen-1] = '/'; - strbuf_setlen(&base, info->pathlen); - traverse_path = xstrndup(base.buf, info->pathlen); + strbuf_make_traverse_path(&base, info->prev, + info->name, info->namelen); + strbuf_addch(&base, '/'); + traverse_path = xstrndup(base.buf, base.len); } else { traverse_path = xstrndup(info->name, info->pathlen); } diff --git a/tree-walk.h b/tree-walk.h index a25c751c1e..994c14a499 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -72,6 +72,9 @@ struct traverse_info { int get_tree_entry(const struct object_id *, const char *, struct object_id *, unsigned short *); char *make_traverse_path(char *path, const struct traverse_info *info, const char *name, size_t namelen); +void strbuf_make_traverse_path(struct strbuf *out, + const struct traverse_info *info, + const char *name, size_t namelen); void setup_traverse_info(struct traverse_info *info, const char *base); static inline size_t traverse_path_len(const struct traverse_info *info, diff --git a/unpack-trees.c b/unpack-trees.c index 492eff666a..c3059c2440 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -686,21 +686,19 @@ static int index_pos_by_traverse_info(struct name_entry *names, struct traverse_info *info) { struct unpack_trees_options *o = info->data; - size_t len = traverse_path_len(info, tree_entry_len(names)); - char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */); + struct strbuf name = STRBUF_INIT; int pos; - make_traverse_path(name, info, names->path, names->pathlen); - name[len++] = '/'; - name[len] = '\0'; - pos = index_name_pos(o->src_index, name, len); + strbuf_make_traverse_path(&name, info, names->path, names->pathlen); + strbuf_addch(&name, '/'); + pos = index_name_pos(o->src_index, name.buf, name.len); if (pos >= 0) BUG("This is a directory and should not exist in index"); pos = -pos - 1; - if (!starts_with(o->src_index->cache[pos]->name, name) || - (pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name))) + if (!starts_with(o->src_index->cache[pos]->name, name.buf) || + (pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name.buf))) BUG("pos must point at the first entry in this directory"); - free(name); + strbuf_release(&name); return pos; } From 5aa02f98685d78666293149087d3f69b97528cfb Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:25 -0400 Subject: [PATCH 6/6] tree-walk: harden make_traverse_path() length computations The make_traverse_path() function isn't very careful about checking its output buffer boundaries. In fact, it doesn't even _know_ the size of the buffer it's writing to, and just assumes that the caller used traverse_path_len() correctly. And even then we assume that our traverse_info.pathlen components are all correct, and just blindly write into the buffer. Let's improve this situation a bit: - have the caller pass in their allocated buffer length, which we'll check against our own computations - check for integer underflow as we do our backwards-insertion of pathnames into the buffer - check that we do not run out items in our list to traverse before we've filled the expected number of bytes None of these should be triggerable in practice (especially since our switch to size_t everywhere in a previous commit), but it doesn't hurt to check our assumptions. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- tree-walk.c | 28 ++++++++++++++++++++-------- tree-walk.h | 2 +- unpack-trees.c | 3 ++- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/tree-walk.c b/tree-walk.c index c2952f3793..4f1e9d79ab 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -181,21 +181,32 @@ void setup_traverse_info(struct traverse_info *info, const char *base) info->prev = &dummy; } -char *make_traverse_path(char *path, const struct traverse_info *info, +char *make_traverse_path(char *path, size_t pathlen, + const struct traverse_info *info, const char *name, size_t namelen) { - size_t pathlen = info->pathlen; + /* Always points to the end of the name we're about to add */ + size_t pos = st_add(info->pathlen, namelen); - path[pathlen + namelen] = 0; + if (pos >= pathlen) + BUG("too small buffer passed to make_traverse_path"); + + path[pos] = 0; for (;;) { - memcpy(path + pathlen, name, namelen); - if (!pathlen) + if (pos < namelen) + BUG("traverse_info pathlen does not match strings"); + pos -= namelen; + memcpy(path + pos, name, namelen); + + if (!pos) break; - path[--pathlen] = '/'; + path[--pos] = '/'; + + if (!info) + BUG("traverse_info ran out of list items"); name = info->name; namelen = info->namelen; info = info->prev; - pathlen -= namelen; } return path; } @@ -207,7 +218,8 @@ void strbuf_make_traverse_path(struct strbuf *out, size_t len = traverse_path_len(info, namelen); strbuf_grow(out, len); - make_traverse_path(out->buf + out->len, info, name, namelen); + make_traverse_path(out->buf + out->len, out->alloc - out->len, + info, name, namelen); strbuf_setlen(out, out->len + len); } diff --git a/tree-walk.h b/tree-walk.h index 994c14a499..a3ad54e6ce 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -70,7 +70,7 @@ struct traverse_info { }; int get_tree_entry(const struct object_id *, const char *, struct object_id *, unsigned short *); -char *make_traverse_path(char *path, const struct traverse_info *info, +char *make_traverse_path(char *path, size_t pathlen, const struct traverse_info *info, const char *name, size_t namelen); void strbuf_make_traverse_path(struct strbuf *out, const struct traverse_info *info, diff --git a/unpack-trees.c b/unpack-trees.c index c3059c2440..65c4677578 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -968,7 +968,8 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, ce->ce_flags = create_ce_flags(stage); ce->ce_namelen = len; oidcpy(&ce->oid, &n->oid); - make_traverse_path(ce->name, info, n->path, n->pathlen); + /* len+1 because the cache_entry allocates space for NUL */ + make_traverse_path(ce->name, len + 1, info, n->path, n->pathlen); return ce; }