From 1de16aecf51daf5794aa074f6dd133e088a12690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Fri, 8 Mar 2019 16:28:34 +0700 Subject: [PATCH] worktree add: sanitize worktree names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Worktree names are based on $(basename $GIT_WORK_TREE). They aren't significant until 3a3b9d8cde (refs: new ref types to make per-worktree refs visible to all worktrees - 2018-10-21), where worktree name could be part of a refname and must follow refname rules. Update 'worktree add' code to remove special characters to follow these rules. In the future the user will be able to specify the worktree name by themselves if they're not happy with this dumb character substitution. Reported-by: Konstantin Kharlamov Helped-by: Jeff King Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- builtin/worktree.c | 10 +++- refs.c | 110 ++++++++++++++++++++++++++++++++-------- refs.h | 6 +++ t/t2400-worktree-add.sh | 5 ++ 4 files changed, 110 insertions(+), 21 deletions(-) diff --git a/builtin/worktree.c b/builtin/worktree.c index 3f9907fcc9..b0266f2d44 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -275,6 +275,7 @@ static int add_worktree(const char *path, const char *refname, struct strbuf symref = STRBUF_INIT; struct commit *commit = NULL; int is_branch = 0; + struct strbuf sb_name = STRBUF_INIT; validate_worktree_add(path, opts); @@ -290,7 +291,13 @@ static int add_worktree(const char *path, const char *refname, die(_("invalid reference: %s"), refname); name = worktree_basename(path, &len); - git_path_buf(&sb_repo, "worktrees/%.*s", (int)(path + len - name), name); + strbuf_add(&sb, name, path + len - name); + sanitize_refname_component(sb.buf, &sb_name); + if (!sb_name.len) + BUG("How come '%s' becomes empty after sanitization?", sb.buf); + strbuf_reset(&sb); + name = sb_name.buf; + git_path_buf(&sb_repo, "worktrees/%s", name); len = sb_repo.len; if (safe_create_leading_directories_const(sb_repo.buf)) die_errno(_("could not create leading directories of '%s'"), @@ -415,6 +422,7 @@ done: strbuf_release(&symref); strbuf_release(&sb_repo); strbuf_release(&sb_git); + strbuf_release(&sb_name); return ret; } diff --git a/refs.c b/refs.c index 142888a40a..94e8d461c7 100644 --- a/refs.c +++ b/refs.c @@ -63,7 +63,7 @@ static unsigned char refname_disposition[256] = { * not legal. It is legal if it is something reasonable to have under * ".git/refs/"; We do not like it if: * - * - any path component of it begins with ".", or + * - it begins with ".", or * - it has double dots "..", or * - it has ASCII control characters, or * - it has ":", "?", "[", "\", "^", "~", SP, or TAB anywhere, or @@ -71,31 +71,63 @@ static unsigned char refname_disposition[256] = { * - it ends with a "/", or * - it ends with ".lock", or * - it contains a "@{" portion + * + * When sanitized is not NULL, instead of rejecting the input refname + * as an error, try to come up with a usable replacement for the input + * refname in it. */ -static int check_refname_component(const char *refname, int *flags) +static int check_refname_component(const char *refname, int *flags, + struct strbuf *sanitized) { const char *cp; char last = '\0'; + size_t component_start = 0; /* garbage - not a reasonable initial value */ + + if (sanitized) + component_start = sanitized->len; for (cp = refname; ; cp++) { int ch = *cp & 255; unsigned char disp = refname_disposition[ch]; + + if (sanitized && disp != 1) + strbuf_addch(sanitized, ch); + switch (disp) { case 1: goto out; case 2: - if (last == '.') - return -1; /* Refname contains "..". */ + if (last == '.') { /* Refname contains "..". */ + if (sanitized) + /* collapse ".." to single "." */ + strbuf_setlen(sanitized, sanitized->len - 1); + else + return -1; + } break; case 3: - if (last == '@') - return -1; /* Refname contains "@{". */ + if (last == '@') { /* Refname contains "@{". */ + if (sanitized) + sanitized->buf[sanitized->len-1] = '-'; + else + return -1; + } break; case 4: - return -1; + /* forbidden char */ + if (sanitized) + sanitized->buf[sanitized->len-1] = '-'; + else + return -1; + break; case 5: - if (!(*flags & REFNAME_REFSPEC_PATTERN)) - return -1; /* refspec can't be a pattern */ + if (!(*flags & REFNAME_REFSPEC_PATTERN)) { + /* refspec can't be a pattern */ + if (sanitized) + sanitized->buf[sanitized->len-1] = '-'; + else + return -1; + } /* * Unset the pattern flag so that we only accept @@ -109,26 +141,48 @@ static int check_refname_component(const char *refname, int *flags) out: if (cp == refname) return 0; /* Component has zero length. */ - if (refname[0] == '.') - return -1; /* Component starts with '.'. */ + + if (refname[0] == '.') { /* Component starts with '.'. */ + if (sanitized) + sanitized->buf[component_start] = '-'; + else + return -1; + } if (cp - refname >= LOCK_SUFFIX_LEN && - !memcmp(cp - LOCK_SUFFIX_LEN, LOCK_SUFFIX, LOCK_SUFFIX_LEN)) - return -1; /* Refname ends with ".lock". */ + !memcmp(cp - LOCK_SUFFIX_LEN, LOCK_SUFFIX, LOCK_SUFFIX_LEN)) { + if (!sanitized) + return -1; + /* Refname ends with ".lock". */ + while (strbuf_strip_suffix(sanitized, LOCK_SUFFIX)) { + /* try again in case we have .lock.lock */ + } + } return cp - refname; } -int check_refname_format(const char *refname, int flags) +static int check_or_sanitize_refname(const char *refname, int flags, + struct strbuf *sanitized) { int component_len, component_count = 0; - if (!strcmp(refname, "@")) + if (!strcmp(refname, "@")) { /* Refname is a single character '@'. */ - return -1; + if (sanitized) + strbuf_addch(sanitized, '-'); + else + return -1; + } while (1) { + if (sanitized && sanitized->len) + strbuf_complete(sanitized, '/'); + /* We are at the start of a path component. */ - component_len = check_refname_component(refname, &flags); - if (component_len <= 0) + component_len = check_refname_component(refname, &flags, + sanitized); + if (sanitized && component_len == 0) + ; /* OK, omit empty component */ + else if (component_len <= 0) return -1; component_count++; @@ -138,13 +192,29 @@ int check_refname_format(const char *refname, int flags) refname += component_len + 1; } - if (refname[component_len - 1] == '.') - return -1; /* Refname ends with '.'. */ + if (refname[component_len - 1] == '.') { + /* Refname ends with '.'. */ + if (sanitized) + ; /* omit ending dot */ + else + return -1; + } if (!(flags & REFNAME_ALLOW_ONELEVEL) && component_count < 2) return -1; /* Refname has only one component. */ return 0; } +int check_refname_format(const char *refname, int flags) +{ + return check_or_sanitize_refname(refname, flags, NULL); +} + +void sanitize_refname_component(const char *refname, struct strbuf *out) +{ + if (check_or_sanitize_refname(refname, REFNAME_ALLOW_ONELEVEL, out)) + BUG("sanitizing refname '%s' check returned error", refname); +} + int refname_is_safe(const char *refname) { const char *rest; diff --git a/refs.h b/refs.h index 308fa1f03b..4d8c5465c3 100644 --- a/refs.h +++ b/refs.h @@ -460,6 +460,12 @@ int for_each_reflog(each_ref_fn fn, void *cb_data); */ int check_refname_format(const char *refname, int flags); +/* + * Apply the rules from check_refname_format, but mutate the result until it + * is acceptable, and place the result in "out". + */ +void sanitize_refname_component(const char *refname, struct strbuf *out); + const char *prettify_refname(const char *refname); char *shorten_unambiguous_ref(const char *refname, int strict); diff --git a/t/t2400-worktree-add.sh b/t/t2400-worktree-add.sh index 286bba35d8..c989dbe321 100755 --- a/t/t2400-worktree-add.sh +++ b/t/t2400-worktree-add.sh @@ -570,4 +570,9 @@ test_expect_success '"add" an existing locked but missing worktree' ' git worktree add --force --force --detach gnoo ' +test_expect_success FUNNYNAMES 'sanitize generated worktree name' ' + git worktree add --detach ". weird*..?.lock.lock" && + test -d .git/worktrees/---weird-.- +' + test_done