diff --git a/Documentation/git-patch-id.txt b/Documentation/git-patch-id.txt index 442caff8a9..1d15fa45d5 100644 --- a/Documentation/git-patch-id.txt +++ b/Documentation/git-patch-id.txt @@ -8,18 +8,18 @@ git-patch-id - Compute unique ID for a patch SYNOPSIS -------- [verse] -'git patch-id' [--stable | --unstable] +'git patch-id' [--stable | --unstable | --verbatim] DESCRIPTION ----------- Read a patch from the standard input and compute the patch ID for it. A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a -patch, with whitespace and line numbers ignored. As such, it's "reasonably -stable", but at the same time also reasonably unique, i.e., two patches that -have the same "patch ID" are almost guaranteed to be the same thing. +patch, with line numbers ignored. As such, it's "reasonably stable", but at +the same time also reasonably unique, i.e., two patches that have the same +"patch ID" are almost guaranteed to be the same thing. -IOW, you can use this thing to look for likely duplicate commits. +The main usecase for this command is to look for likely duplicate commits. When dealing with 'git diff-tree' output, it takes advantage of the fact that the patch is prefixed with the object name of the @@ -30,6 +30,12 @@ This can be used to make a mapping from patch ID to commit ID. OPTIONS ------- +--verbatim:: + Calculate the patch-id of the input as it is given, do not strip + any whitespace. + + This is the default if patchid.verbatim is true. + --stable:: Use a "stable" sum of hashes as the patch ID. With this option: - Reordering file diffs that make up a patch does not affect the ID. @@ -45,14 +51,16 @@ OPTIONS of "-O", thereby making existing databases storing such "unstable" or historical patch-ids unusable. + - All whitespace within the patch is ignored and does not affect the id. + This is the default if patchid.stable is set to true. --unstable:: Use an "unstable" hash as the patch ID. With this option, the result produced is compatible with the patch-id value produced - by git 1.9 and older. Users with pre-existing databases storing - patch-ids produced by git 1.9 and older (who do not deal with reordered - patches) may want to use this option. + by git 1.9 and older and whitespace is ignored. Users with pre-existing + databases storing patch-ids produced by git 1.9 and older (who do not deal + with reordered patches) may want to use this option. This is the default. diff --git a/builtin/log.c b/builtin/log.c index ee19dc5d45..e72869afb3 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -1763,7 +1763,7 @@ static void prepare_bases(struct base_tree_info *bases, struct object_id *patch_id; if (*commit_base_at(&commit_base, commit)) continue; - if (commit_patch_id(commit, &diffopt, &oid, 0, 1)) + if (commit_patch_id(commit, &diffopt, &oid, 0)) die(_("cannot get patch id")); ALLOC_GROW(bases->patch_id, bases->nr_patch_id + 1, bases->alloc_patch_id); patch_id = bases->patch_id + bases->nr_patch_id; diff --git a/builtin/patch-id.c b/builtin/patch-id.c index 881fcf3273..f840fbf1c7 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -2,6 +2,7 @@ #include "builtin.h" #include "config.h" #include "diff.h" +#include "parse-options.h" static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result) { @@ -57,10 +58,12 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after) } static int get_one_patchid(struct object_id *next_oid, struct object_id *result, - struct strbuf *line_buf, int stable) + struct strbuf *line_buf, int stable, int verbatim) { int patchlen = 0, found_next = 0; int before = -1, after = -1; + int diff_is_binary = 0; + char pre_oid_str[GIT_MAX_HEXSZ + 1], post_oid_str[GIT_MAX_HEXSZ + 1]; git_hash_ctx ctx; the_hash_algo->init_fn(&ctx); @@ -71,11 +74,14 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, const char *p = line; int len; - if (!skip_prefix(line, "diff-tree ", &p) && - !skip_prefix(line, "commit ", &p) && + /* Possibly skip over the prefix added by "log" or "format-patch" */ + if (!skip_prefix(line, "commit ", &p) && !skip_prefix(line, "From ", &p) && - starts_with(line, "\\ ") && 12 < strlen(line)) + starts_with(line, "\\ ") && 12 < strlen(line)) { + if (verbatim) + the_hash_algo->update_fn(&ctx, line, strlen(line)); continue; + } if (!get_oid_hex(p, next_oid)) { found_next = 1; @@ -88,14 +94,44 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, /* Parsing diff header? */ if (before == -1) { - if (starts_with(line, "index ")) + if (starts_with(line, "GIT binary patch") || + starts_with(line, "Binary files")) { + diff_is_binary = 1; + before = 0; + the_hash_algo->update_fn(&ctx, pre_oid_str, + strlen(pre_oid_str)); + the_hash_algo->update_fn(&ctx, post_oid_str, + strlen(post_oid_str)); + if (stable) + flush_one_hunk(result, &ctx); continue; - else if (starts_with(line, "--- ")) + } else if (skip_prefix(line, "index ", &p)) { + char *oid1_end = strstr(line, ".."); + char *oid2_end = NULL; + if (oid1_end) + oid2_end = strstr(oid1_end, " "); + if (!oid2_end) + oid2_end = line + strlen(line) - 1; + if (oid1_end != NULL && oid2_end != NULL) { + *oid1_end = *oid2_end = '\0'; + strlcpy(pre_oid_str, p, GIT_MAX_HEXSZ + 1); + strlcpy(post_oid_str, oid1_end + 2, GIT_MAX_HEXSZ + 1); + } + continue; + } else if (starts_with(line, "--- ")) before = after = 1; else if (!isalpha(line[0])) break; } + if (diff_is_binary) { + if (starts_with(line, "diff ")) { + diff_is_binary = 0; + before = -1; + } + continue; + } + /* Looking for a valid hunk header? */ if (before == 0 && after == 0) { if (starts_with(line, "@@ -")) { @@ -120,8 +156,8 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, if (line[0] == '+' || line[0] == ' ') after--; - /* Compute the sha without whitespace */ - len = remove_space(line); + /* Add line to hash algo (possibly removing whitespace) */ + len = verbatim ? strlen(line) : remove_space(line); patchlen += len; the_hash_algo->update_fn(&ctx, line, len); } @@ -134,7 +170,7 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, return patchlen; } -static void generate_id_list(int stable) +static void generate_id_list(int stable, int verbatim) { struct object_id oid, n, result; int patchlen; @@ -142,21 +178,32 @@ static void generate_id_list(int stable) oidclr(&oid); while (!feof(stdin)) { - patchlen = get_one_patchid(&n, &result, &line_buf, stable); + patchlen = get_one_patchid(&n, &result, &line_buf, stable, verbatim); flush_current_id(patchlen, &oid, &result); oidcpy(&oid, &n); } strbuf_release(&line_buf); } -static const char patch_id_usage[] = "git patch-id [--stable | --unstable]"; +static const char *const patch_id_usage[] = { + N_("git patch-id [--stable | --unstable | --verbatim]"), NULL +}; + +struct patch_id_opts { + int stable; + int verbatim; +}; static int git_patch_id_config(const char *var, const char *value, void *cb) { - int *stable = cb; + struct patch_id_opts *opts = cb; if (!strcmp(var, "patchid.stable")) { - *stable = git_config_bool(var, value); + opts->stable = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "patchid.verbatim")) { + opts->verbatim = git_config_bool(var, value); return 0; } @@ -165,21 +212,29 @@ static int git_patch_id_config(const char *var, const char *value, void *cb) int cmd_patch_id(int argc, const char **argv, const char *prefix) { - int stable = -1; + /* if nothing is set, default to unstable */ + struct patch_id_opts config = {0, 0}; + int opts = 0; + struct option builtin_patch_id_options[] = { + OPT_CMDMODE(0, "unstable", &opts, + N_("use the unstable patch-id algorithm"), 1), + OPT_CMDMODE(0, "stable", &opts, + N_("use the stable patch-id algorithm"), 2), + OPT_CMDMODE(0, "verbatim", &opts, + N_("don't strip whitespace from the patch"), 3), + OPT_END() + }; - git_config(git_patch_id_config, &stable); + git_config(git_patch_id_config, &config); - /* If nothing is set, default to unstable. */ - if (stable < 0) - stable = 0; + /* verbatim implies stable */ + if (config.verbatim) + config.stable = 1; - if (argc == 2 && !strcmp(argv[1], "--stable")) - stable = 1; - else if (argc == 2 && !strcmp(argv[1], "--unstable")) - stable = 0; - else if (argc != 1) - usage(patch_id_usage); + argc = parse_options(argc, argv, prefix, builtin_patch_id_options, + patch_id_usage, 0); - generate_id_list(stable); + generate_id_list(opts ? opts > 1 : config.stable, + opts ? opts == 3 : config.verbatim); return 0; } diff --git a/diff.c b/diff.c index 285d6e2d57..35e46dd968 100644 --- a/diff.c +++ b/diff.c @@ -6229,7 +6229,7 @@ static void patch_id_add_mode(git_hash_ctx *ctx, unsigned mode) } /* returns 0 upon success, and writes result into oid */ -static int diff_get_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only, int stable) +static int diff_get_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only) { struct diff_queue_struct *q = &diff_queued_diff; int i; @@ -6276,61 +6276,62 @@ static int diff_get_patch_id(struct diff_options *options, struct object_id *oid if (p->one->mode == 0) { patch_id_add_string(&ctx, "newfilemode"); patch_id_add_mode(&ctx, p->two->mode); - patch_id_add_string(&ctx, "---/dev/null"); - patch_id_add_string(&ctx, "+++b/"); - the_hash_algo->update_fn(&ctx, p->two->path, len2); } else if (p->two->mode == 0) { patch_id_add_string(&ctx, "deletedfilemode"); patch_id_add_mode(&ctx, p->one->mode); - patch_id_add_string(&ctx, "---a/"); - the_hash_algo->update_fn(&ctx, p->one->path, len1); - patch_id_add_string(&ctx, "+++/dev/null"); - } else { - patch_id_add_string(&ctx, "---a/"); - the_hash_algo->update_fn(&ctx, p->one->path, len1); - patch_id_add_string(&ctx, "+++b/"); - the_hash_algo->update_fn(&ctx, p->two->path, len2); + } else if (p->one->mode != p->two->mode) { + patch_id_add_string(&ctx, "oldmode"); + patch_id_add_mode(&ctx, p->one->mode); + patch_id_add_string(&ctx, "newmode"); + patch_id_add_mode(&ctx, p->two->mode); } - if (diff_header_only) - continue; - - if (fill_mmfile(options->repo, &mf1, p->one) < 0 || - fill_mmfile(options->repo, &mf2, p->two) < 0) - return error("unable to read files to diff"); - - if (diff_filespec_is_binary(options->repo, p->one) || + if (diff_header_only) { + /* don't do anything since we're only populating header info */ + } else if (diff_filespec_is_binary(options->repo, p->one) || diff_filespec_is_binary(options->repo, p->two)) { the_hash_algo->update_fn(&ctx, oid_to_hex(&p->one->oid), the_hash_algo->hexsz); the_hash_algo->update_fn(&ctx, oid_to_hex(&p->two->oid), the_hash_algo->hexsz); - continue; + } else { + if (p->one->mode == 0) { + patch_id_add_string(&ctx, "---/dev/null"); + patch_id_add_string(&ctx, "+++b/"); + the_hash_algo->update_fn(&ctx, p->two->path, len2); + } else if (p->two->mode == 0) { + patch_id_add_string(&ctx, "---a/"); + the_hash_algo->update_fn(&ctx, p->one->path, len1); + patch_id_add_string(&ctx, "+++/dev/null"); + } else { + patch_id_add_string(&ctx, "---a/"); + the_hash_algo->update_fn(&ctx, p->one->path, len1); + patch_id_add_string(&ctx, "+++b/"); + the_hash_algo->update_fn(&ctx, p->two->path, len2); + } + + if (fill_mmfile(options->repo, &mf1, p->one) < 0 || + fill_mmfile(options->repo, &mf2, p->two) < 0) + return error("unable to read files to diff"); + xpp.flags = 0; + xecfg.ctxlen = 3; + xecfg.flags = XDL_EMIT_NO_HUNK_HDR; + if (xdi_diff_outf(&mf1, &mf2, NULL, + patch_id_consume, &data, &xpp, &xecfg)) + return error("unable to generate patch-id diff for %s", + p->one->path); } - - xpp.flags = 0; - xecfg.ctxlen = 3; - xecfg.flags = XDL_EMIT_NO_HUNK_HDR; - if (xdi_diff_outf(&mf1, &mf2, NULL, - patch_id_consume, &data, &xpp, &xecfg)) - return error("unable to generate patch-id diff for %s", - p->one->path); - - if (stable) - flush_one_hunk(oid, &ctx); + flush_one_hunk(oid, &ctx); } - if (!stable) - the_hash_algo->final_oid_fn(oid, &ctx); - return 0; } -int diff_flush_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only, int stable) +int diff_flush_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only) { struct diff_queue_struct *q = &diff_queued_diff; int i; - int result = diff_get_patch_id(options, oid, diff_header_only, stable); + int result = diff_get_patch_id(options, oid, diff_header_only); for (i = 0; i < q->nr; i++) diff_free_filepair(q->queue[i]); diff --git a/diff.h b/diff.h index 8ae18e5ab1..fd33caeb25 100644 --- a/diff.h +++ b/diff.h @@ -634,7 +634,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option); int run_diff_index(struct rev_info *revs, unsigned int option); int do_diff_cache(const struct object_id *, struct diff_options *); -int diff_flush_patch_id(struct diff_options *, struct object_id *, int, int); +int diff_flush_patch_id(struct diff_options *, struct object_id *, int); void flush_one_hunk(struct object_id *result, git_hash_ctx *ctx); int diff_result_code(struct diff_options *, int); diff --git a/patch-ids.c b/patch-ids.c index 46c6a8f3ea..3153446626 100644 --- a/patch-ids.c +++ b/patch-ids.c @@ -11,7 +11,7 @@ static int patch_id_defined(struct commit *commit) } int commit_patch_id(struct commit *commit, struct diff_options *options, - struct object_id *oid, int diff_header_only, int stable) + struct object_id *oid, int diff_header_only) { if (!patch_id_defined(commit)) return -1; @@ -22,7 +22,7 @@ int commit_patch_id(struct commit *commit, struct diff_options *options, else diff_root_tree_oid(&commit->object.oid, "", options); diffcore_std(options); - return diff_flush_patch_id(options, oid, diff_header_only, stable); + return diff_flush_patch_id(options, oid, diff_header_only); } /* @@ -48,11 +48,11 @@ static int patch_id_neq(const void *cmpfn_data, b = container_of(entry_or_key, struct patch_id, ent); if (is_null_oid(&a->patch_id) && - commit_patch_id(a->commit, opt, &a->patch_id, 0, 0)) + commit_patch_id(a->commit, opt, &a->patch_id, 0)) return error("Could not get patch ID for %s", oid_to_hex(&a->commit->object.oid)); if (is_null_oid(&b->patch_id) && - commit_patch_id(b->commit, opt, &b->patch_id, 0, 0)) + commit_patch_id(b->commit, opt, &b->patch_id, 0)) return error("Could not get patch ID for %s", oid_to_hex(&b->commit->object.oid)); return !oideq(&a->patch_id, &b->patch_id); @@ -82,7 +82,7 @@ static int init_patch_id_entry(struct patch_id *patch, struct object_id header_only_patch_id; patch->commit = commit; - if (commit_patch_id(commit, &ids->diffopts, &header_only_patch_id, 1, 0)) + if (commit_patch_id(commit, &ids->diffopts, &header_only_patch_id, 1)) return -1; hashmap_entry_init(&patch->ent, oidhash(&header_only_patch_id)); diff --git a/patch-ids.h b/patch-ids.h index ab6c6a6804..490d739371 100644 --- a/patch-ids.h +++ b/patch-ids.h @@ -20,7 +20,7 @@ struct patch_ids { }; int commit_patch_id(struct commit *commit, struct diff_options *options, - struct object_id *oid, int, int); + struct object_id *oid, int); int init_patch_ids(struct repository *, struct patch_ids *); int free_patch_ids(struct patch_ids *); diff --git a/t/t3419-rebase-patch-id.sh b/t/t3419-rebase-patch-id.sh index 295040f2fe..7181f176b8 100755 --- a/t/t3419-rebase-patch-id.sh +++ b/t/t3419-rebase-patch-id.sh @@ -43,15 +43,26 @@ test_expect_success 'setup: 500 lines' ' git add newfile && git commit -q -m "add small file" && - git cherry-pick main >/dev/null 2>&1 -' + git cherry-pick main >/dev/null 2>&1 && -test_expect_success 'setup attributes' ' - echo "file binary" >.gitattributes + git branch -f squashed main && + git checkout -q -f squashed && + git reset -q --soft HEAD~2 && + git commit -q -m squashed && + + git branch -f mode main && + git checkout -q -f mode && + test_chmod +x file && + git commit -q -a --amend && + + git branch -f modeother other && + git checkout -q -f modeother && + test_chmod +x file && + git commit -q -a --amend ' test_expect_success 'detect upstream patch' ' - git checkout -q main && + git checkout -q main^{} && scramble file && git add file && git commit -q -m "change big file again" && @@ -61,14 +72,46 @@ test_expect_success 'detect upstream patch' ' test_must_be_empty revs ' +test_expect_success 'detect upstream patch binary' ' + echo "file binary" >.gitattributes && + git checkout -q other^{} && + git rebase main && + git rev-list main...HEAD~ >revs && + test_must_be_empty revs && + test_when_finished "rm .gitattributes" +' + +test_expect_success 'detect upstream patch modechange' ' + git checkout -q modeother^{} && + git rebase mode && + git rev-list mode...HEAD~ >revs && + test_must_be_empty revs +' + test_expect_success 'do not drop patch' ' - git branch -f squashed main && - git checkout -q -f squashed && - git reset -q --soft HEAD~2 && - git commit -q -m squashed && git checkout -q other^{} && test_must_fail git rebase squashed && - git rebase --quit + test_when_finished "git rebase --abort" +' + +test_expect_success 'do not drop patch binary' ' + echo "file binary" >.gitattributes && + git checkout -q other^{} && + test_must_fail git rebase squashed && + test_when_finished "git rebase --abort" && + test_when_finished "rm .gitattributes" +' + +test_expect_success 'do not drop patch modechange' ' + git checkout -q modeother^{} && + git rebase other && + cat >expected <<-\EOF && + diff --git a/file b/file + old mode 100644 + new mode 100755 + EOF + git diff HEAD~ >modediff && + test_cmp expected modediff ' test_done diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh index a730c0db98..a7fa94ce0a 100755 --- a/t/t4204-patch-id.sh +++ b/t/t4204-patch-id.sh @@ -8,13 +8,13 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh test_expect_success 'setup' ' - as="a a a a a a a a" && # eight a - test_write_lines $as >foo && - test_write_lines $as >bar && + str="ab cd ef gh ij kl mn op" && + test_write_lines $str >foo && + test_write_lines $str >bar && git add foo bar && git commit -a -m initial && - test_write_lines $as b >foo && - test_write_lines $as b >bar && + test_write_lines $str b >foo && + test_write_lines $str b >bar && git commit -a -m first && git checkout -b same main && git commit --amend -m same-msg && @@ -22,8 +22,23 @@ test_expect_success 'setup' ' echo c >foo && echo c >bar && git commit --amend -a -m notsame-msg && + git checkout -b with_space main~ && + cat >foo <<-\EOF && + a b + c d + e f + g h + i j + k l + m n + op + EOF + cp foo bar && + git add foo bar && + git commit --amend -m "with spaces" && test_write_lines bar foo >bar-then-foo && test_write_lines foo bar >foo-then-bar + ' test_expect_success 'patch-id output is well-formed' ' @@ -42,7 +57,7 @@ calc_patch_id () { } get_top_diff () { - git log -p -1 "$@" -O bar-then-foo -- + git log -p -1 "$@" -O bar-then-foo --full-index -- } get_patch_id () { @@ -61,6 +76,33 @@ test_expect_success 'patch-id detects inequality' ' get_patch_id notsame && ! test_cmp patch-id_main patch-id_notsame ' +test_expect_success 'patch-id detects equality binary' ' + cat >.gitattributes <<-\EOF && + foo binary + bar binary + EOF + get_patch_id main && + get_patch_id same && + git log -p -1 --binary main >top-diff.output && + calc_patch_id top-diff.output && + calc_patch_id .gitattributes <<-\EOF && + foo binary + bar binary + EOF + get_patch_id main && + get_patch_id notsame && + ! test_cmp patch-id_main patch-id_notsame && + test_when_finished "rm .gitattributes" +' test_expect_success 'patch-id supports git-format-patch output' ' get_patch_id main && @@ -101,9 +143,21 @@ test_patch_id_file_order () { git format-patch -1 --stdout -O foo-then-bar >format-patch.output && calc_patch_id top-diff.output && + calc_patch_id top-diff.output && + calc_patch_id