From 0570be79ead35e47e29ee2587e2c8ea47c091d49 Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Mon, 24 Oct 2022 20:07:39 +0000 Subject: [PATCH 1/6] patch-id: fix stable patch id for binary / header-only Patch-ids for binary patches are found by hashing the object ids of the before and after objects in succession. However in the --stable case, there is a bug where hunks are not flushed for binary and header-only patch ids, which would always result in a patch-id of 0000. The --unstable case is currently correct. Reorder the logic to branch into 3 cases for populating the patch body: header-only which populates nothing, binary which populates the object ids, and normal which populates the text diff. All branches will end up flushing the hunk. Don't populate the ---a/ and +++b/ lines for binary diffs, to correspond to those lines not being present in the "git diff" text output. This is necessary because we advertise that the patch-id calculated internally and used in format-patch is the same that what the builtin "git patch-id" would produce when piped from a diff. Update the test to run on both binary and normal files. Signed-off-by: Jerry Zhang Signed-off-by: Junio C Hamano --- diff.c | 58 +++++++++++++++++++------------------- t/t3419-rebase-patch-id.sh | 34 +++++++++++++++------- 2 files changed, 53 insertions(+), 39 deletions(-) diff --git a/diff.c b/diff.c index e71cf75886..2aee15f2f1 100644 --- a/diff.c +++ b/diff.c @@ -6221,46 +6221,46 @@ static int diff_get_patch_id(struct diff_options *options, struct object_id *oid if (p->one->mode == 0) { patch_id_add_string(&ctx, "newfilemode"); patch_id_add_mode(&ctx, p->two->mode); - patch_id_add_string(&ctx, "---/dev/null"); - patch_id_add_string(&ctx, "+++b/"); - the_hash_algo->update_fn(&ctx, p->two->path, len2); } else if (p->two->mode == 0) { patch_id_add_string(&ctx, "deletedfilemode"); patch_id_add_mode(&ctx, p->one->mode); - patch_id_add_string(&ctx, "---a/"); - the_hash_algo->update_fn(&ctx, p->one->path, len1); - patch_id_add_string(&ctx, "+++/dev/null"); - } else { - patch_id_add_string(&ctx, "---a/"); - the_hash_algo->update_fn(&ctx, p->one->path, len1); - patch_id_add_string(&ctx, "+++b/"); - the_hash_algo->update_fn(&ctx, p->two->path, len2); } - if (diff_header_only) - continue; - - if (fill_mmfile(options->repo, &mf1, p->one) < 0 || - fill_mmfile(options->repo, &mf2, p->two) < 0) - return error("unable to read files to diff"); - - if (diff_filespec_is_binary(options->repo, p->one) || + if (diff_header_only) { + /* don't do anything since we're only populating header info */ + } else if (diff_filespec_is_binary(options->repo, p->one) || diff_filespec_is_binary(options->repo, p->two)) { the_hash_algo->update_fn(&ctx, oid_to_hex(&p->one->oid), the_hash_algo->hexsz); the_hash_algo->update_fn(&ctx, oid_to_hex(&p->two->oid), the_hash_algo->hexsz); - continue; + } else { + if (p->one->mode == 0) { + patch_id_add_string(&ctx, "---/dev/null"); + patch_id_add_string(&ctx, "+++b/"); + the_hash_algo->update_fn(&ctx, p->two->path, len2); + } else if (p->two->mode == 0) { + patch_id_add_string(&ctx, "---a/"); + the_hash_algo->update_fn(&ctx, p->one->path, len1); + patch_id_add_string(&ctx, "+++/dev/null"); + } else { + patch_id_add_string(&ctx, "---a/"); + the_hash_algo->update_fn(&ctx, p->one->path, len1); + patch_id_add_string(&ctx, "+++b/"); + the_hash_algo->update_fn(&ctx, p->two->path, len2); + } + + if (fill_mmfile(options->repo, &mf1, p->one) < 0 || + fill_mmfile(options->repo, &mf2, p->two) < 0) + return error("unable to read files to diff"); + xpp.flags = 0; + xecfg.ctxlen = 3; + xecfg.flags = XDL_EMIT_NO_HUNK_HDR; + if (xdi_diff_outf(&mf1, &mf2, NULL, + patch_id_consume, &data, &xpp, &xecfg)) + return error("unable to generate patch-id diff for %s", + p->one->path); } - - xpp.flags = 0; - xecfg.ctxlen = 3; - xecfg.flags = XDL_EMIT_NO_HUNK_HDR; - if (xdi_diff_outf(&mf1, &mf2, NULL, - patch_id_consume, &data, &xpp, &xecfg)) - return error("unable to generate patch-id diff for %s", - p->one->path); - if (stable) flush_one_hunk(oid, &ctx); } diff --git a/t/t3419-rebase-patch-id.sh b/t/t3419-rebase-patch-id.sh index 295040f2fe..d24e55aac8 100755 --- a/t/t3419-rebase-patch-id.sh +++ b/t/t3419-rebase-patch-id.sh @@ -43,15 +43,16 @@ test_expect_success 'setup: 500 lines' ' git add newfile && git commit -q -m "add small file" && - git cherry-pick main >/dev/null 2>&1 -' + git cherry-pick main >/dev/null 2>&1 && -test_expect_success 'setup attributes' ' - echo "file binary" >.gitattributes + git branch -f squashed main && + git checkout -q -f squashed && + git reset -q --soft HEAD~2 && + git commit -q -m squashed ' test_expect_success 'detect upstream patch' ' - git checkout -q main && + git checkout -q main^{} && scramble file && git add file && git commit -q -m "change big file again" && @@ -61,14 +62,27 @@ test_expect_success 'detect upstream patch' ' test_must_be_empty revs ' +test_expect_success 'detect upstream patch binary' ' + echo "file binary" >.gitattributes && + git checkout -q other^{} && + git rebase main && + git rev-list main...HEAD~ >revs && + test_must_be_empty revs && + test_when_finished "rm .gitattributes" +' + test_expect_success 'do not drop patch' ' - git branch -f squashed main && - git checkout -q -f squashed && - git reset -q --soft HEAD~2 && - git commit -q -m squashed && git checkout -q other^{} && test_must_fail git rebase squashed && - git rebase --quit + test_when_finished "git rebase --abort" +' + +test_expect_success 'do not drop patch binary' ' + echo "file binary" >.gitattributes && + git checkout -q other^{} && + test_must_fail git rebase squashed && + test_when_finished "git rebase --abort" && + test_when_finished "rm .gitattributes" ' test_done From 51276c1832d64d3b8f4dfc06c3ef21bf74f1916e Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Mon, 24 Oct 2022 20:07:40 +0000 Subject: [PATCH 2/6] patch-id: use stable patch-id for rebases Git doesn't persist patch-ids during the rebase process, so there is no need to specifically invoke the unstable variant. Use the stable logic for all internal patch-id calculations to minimize the number of code paths and improve test coverage. Signed-off-by: Jerry Zhang Signed-off-by: Junio C Hamano --- builtin/log.c | 2 +- diff.c | 12 ++++-------- diff.h | 2 +- patch-ids.c | 10 +++++----- patch-ids.h | 2 +- 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/builtin/log.c b/builtin/log.c index 88a5e98875..306103f302 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -1694,7 +1694,7 @@ static void prepare_bases(struct base_tree_info *bases, struct object_id *patch_id; if (*commit_base_at(&commit_base, commit)) continue; - if (commit_patch_id(commit, &diffopt, &oid, 0, 1)) + if (commit_patch_id(commit, &diffopt, &oid, 0)) die(_("cannot get patch id")); ALLOC_GROW(bases->patch_id, bases->nr_patch_id + 1, bases->alloc_patch_id); patch_id = bases->patch_id + bases->nr_patch_id; diff --git a/diff.c b/diff.c index 2aee15f2f1..c86688794f 100644 --- a/diff.c +++ b/diff.c @@ -6174,7 +6174,7 @@ static void patch_id_add_mode(git_hash_ctx *ctx, unsigned mode) } /* returns 0 upon success, and writes result into oid */ -static int diff_get_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only, int stable) +static int diff_get_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only) { struct diff_queue_struct *q = &diff_queued_diff; int i; @@ -6261,21 +6261,17 @@ static int diff_get_patch_id(struct diff_options *options, struct object_id *oid return error("unable to generate patch-id diff for %s", p->one->path); } - if (stable) - flush_one_hunk(oid, &ctx); + flush_one_hunk(oid, &ctx); } - if (!stable) - the_hash_algo->final_oid_fn(oid, &ctx); - return 0; } -int diff_flush_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only, int stable) +int diff_flush_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only) { struct diff_queue_struct *q = &diff_queued_diff; int i; - int result = diff_get_patch_id(options, oid, diff_header_only, stable); + int result = diff_get_patch_id(options, oid, diff_header_only); for (i = 0; i < q->nr; i++) diff_free_filepair(q->queue[i]); diff --git a/diff.h b/diff.h index 8ae18e5ab1..fd33caeb25 100644 --- a/diff.h +++ b/diff.h @@ -634,7 +634,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option); int run_diff_index(struct rev_info *revs, unsigned int option); int do_diff_cache(const struct object_id *, struct diff_options *); -int diff_flush_patch_id(struct diff_options *, struct object_id *, int, int); +int diff_flush_patch_id(struct diff_options *, struct object_id *, int); void flush_one_hunk(struct object_id *result, git_hash_ctx *ctx); int diff_result_code(struct diff_options *, int); diff --git a/patch-ids.c b/patch-ids.c index 8bf425555d..fefddc487e 100644 --- a/patch-ids.c +++ b/patch-ids.c @@ -11,7 +11,7 @@ static int patch_id_defined(struct commit *commit) } int commit_patch_id(struct commit *commit, struct diff_options *options, - struct object_id *oid, int diff_header_only, int stable) + struct object_id *oid, int diff_header_only) { if (!patch_id_defined(commit)) return -1; @@ -22,7 +22,7 @@ int commit_patch_id(struct commit *commit, struct diff_options *options, else diff_root_tree_oid(&commit->object.oid, "", options); diffcore_std(options); - return diff_flush_patch_id(options, oid, diff_header_only, stable); + return diff_flush_patch_id(options, oid, diff_header_only); } /* @@ -48,11 +48,11 @@ static int patch_id_neq(const void *cmpfn_data, b = container_of(entry_or_key, struct patch_id, ent); if (is_null_oid(&a->patch_id) && - commit_patch_id(a->commit, opt, &a->patch_id, 0, 0)) + commit_patch_id(a->commit, opt, &a->patch_id, 0)) return error("Could not get patch ID for %s", oid_to_hex(&a->commit->object.oid)); if (is_null_oid(&b->patch_id) && - commit_patch_id(b->commit, opt, &b->patch_id, 0, 0)) + commit_patch_id(b->commit, opt, &b->patch_id, 0)) return error("Could not get patch ID for %s", oid_to_hex(&b->commit->object.oid)); return !oideq(&a->patch_id, &b->patch_id); @@ -82,7 +82,7 @@ static int init_patch_id_entry(struct patch_id *patch, struct object_id header_only_patch_id; patch->commit = commit; - if (commit_patch_id(commit, &ids->diffopts, &header_only_patch_id, 1, 0)) + if (commit_patch_id(commit, &ids->diffopts, &header_only_patch_id, 1)) return -1; hashmap_entry_init(&patch->ent, oidhash(&header_only_patch_id)); diff --git a/patch-ids.h b/patch-ids.h index ab6c6a6804..490d739371 100644 --- a/patch-ids.h +++ b/patch-ids.h @@ -20,7 +20,7 @@ struct patch_ids { }; int commit_patch_id(struct commit *commit, struct diff_options *options, - struct object_id *oid, int, int); + struct object_id *oid, int); int init_patch_ids(struct repository *, struct patch_ids *); int free_patch_ids(struct patch_ids *); From 0df19eb9d905ff9b6115139106d98d8f0a8a9046 Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Mon, 24 Oct 2022 20:07:41 +0000 Subject: [PATCH 3/6] builtin: patch-id: fix patch-id with binary diffs "git patch-id" currently doesn't produce correct output if the incoming diff has any binary files. Add logic to get_one_patchid to handle the different possible styles of binary diff. This attempts to keep resulting patch-ids identical to what would be produced by the counterpart logic in diff.c, that is it produces the id by hashing the a and b oids in succession. In general we handle binary diffs by first caching the object ids from the "index" line and using those if we then find an indication that the diff is binary. The input could contain patches generated with "git diff --binary". This currently breaks the parse logic and results in multiple patch-ids output for a single commit. Here we have to skip the contents of the patch itself since those do not go into the patch id. --binary implies --full-index so the object ids are always available. When the diff is generated with --full-index there is no patch content to skip over. When a diff is generated without --full-index or --binary, it will contain abbreviated object ids. This will still result in a sufficiently unique patch-id when hashed, but does not match internal patch id output. We'll call this ok for now as we already need specialized arguments to diff in order to match internal patch id (namely -U3). Signed-off-by: Jerry Zhang Signed-off-by: Junio C Hamano --- builtin/patch-id.c | 36 ++++++++++++++++++++++++++++++++++-- t/t4204-patch-id.sh | 29 ++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/builtin/patch-id.c b/builtin/patch-id.c index 881fcf3273..e7a3112314 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -61,6 +61,8 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, { int patchlen = 0, found_next = 0; int before = -1, after = -1; + int diff_is_binary = 0; + char pre_oid_str[GIT_MAX_HEXSZ + 1], post_oid_str[GIT_MAX_HEXSZ + 1]; git_hash_ctx ctx; the_hash_algo->init_fn(&ctx); @@ -88,14 +90,44 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, /* Parsing diff header? */ if (before == -1) { - if (starts_with(line, "index ")) + if (starts_with(line, "GIT binary patch") || + starts_with(line, "Binary files")) { + diff_is_binary = 1; + before = 0; + the_hash_algo->update_fn(&ctx, pre_oid_str, + strlen(pre_oid_str)); + the_hash_algo->update_fn(&ctx, post_oid_str, + strlen(post_oid_str)); + if (stable) + flush_one_hunk(result, &ctx); continue; - else if (starts_with(line, "--- ")) + } else if (skip_prefix(line, "index ", &p)) { + char *oid1_end = strstr(line, ".."); + char *oid2_end = NULL; + if (oid1_end) + oid2_end = strstr(oid1_end, " "); + if (!oid2_end) + oid2_end = line + strlen(line) - 1; + if (oid1_end != NULL && oid2_end != NULL) { + *oid1_end = *oid2_end = '\0'; + strlcpy(pre_oid_str, p, GIT_MAX_HEXSZ + 1); + strlcpy(post_oid_str, oid1_end + 2, GIT_MAX_HEXSZ + 1); + } + continue; + } else if (starts_with(line, "--- ")) before = after = 1; else if (!isalpha(line[0])) break; } + if (diff_is_binary) { + if (starts_with(line, "diff ")) { + diff_is_binary = 0; + before = -1; + } + continue; + } + /* Looking for a valid hunk header? */ if (before == 0 && after == 0) { if (starts_with(line, "@@ -")) { diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh index a730c0db98..cdc5191aa8 100755 --- a/t/t4204-patch-id.sh +++ b/t/t4204-patch-id.sh @@ -42,7 +42,7 @@ calc_patch_id () { } get_top_diff () { - git log -p -1 "$@" -O bar-then-foo -- + git log -p -1 "$@" -O bar-then-foo --full-index -- } get_patch_id () { @@ -61,6 +61,33 @@ test_expect_success 'patch-id detects inequality' ' get_patch_id notsame && ! test_cmp patch-id_main patch-id_notsame ' +test_expect_success 'patch-id detects equality binary' ' + cat >.gitattributes <<-\EOF && + foo binary + bar binary + EOF + get_patch_id main && + get_patch_id same && + git log -p -1 --binary main >top-diff.output && + calc_patch_id top-diff.output && + calc_patch_id .gitattributes <<-\EOF && + foo binary + bar binary + EOF + get_patch_id main && + get_patch_id notsame && + ! test_cmp patch-id_main patch-id_notsame && + test_when_finished "rm .gitattributes" +' test_expect_success 'patch-id supports git-format-patch output' ' get_patch_id main && From 93105aba6c4c8608b10c8ebe14b2313b3d347124 Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Mon, 24 Oct 2022 20:07:42 +0000 Subject: [PATCH 4/6] patch-id: fix patch-id for mode changes Currently patch-id as used in rebase and cherry-pick does not account for file modes if the file is modified. One consequence of this is that if you have a local patch that changes modes, but upstream has applied an outdated version of the patch that doesn't include that mode change, "git rebase" will drop your local version of the patch along with your mode changes. It also means that internal patch-id doesn't produce the same output as the builtin, which does account for mode changes due to them being part of diff output. Fix by adding mode to the patch-id if it has changed, in the same format that would be produced by diff, so that it is compatible with builtin patch-id. Signed-off-by: Jerry Zhang Signed-off-by: Junio C Hamano --- diff.c | 5 +++++ t/t3419-rebase-patch-id.sh | 31 ++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/diff.c b/diff.c index c86688794f..20b121637d 100644 --- a/diff.c +++ b/diff.c @@ -6224,6 +6224,11 @@ static int diff_get_patch_id(struct diff_options *options, struct object_id *oid } else if (p->two->mode == 0) { patch_id_add_string(&ctx, "deletedfilemode"); patch_id_add_mode(&ctx, p->one->mode); + } else if (p->one->mode != p->two->mode) { + patch_id_add_string(&ctx, "oldmode"); + patch_id_add_mode(&ctx, p->one->mode); + patch_id_add_string(&ctx, "newmode"); + patch_id_add_mode(&ctx, p->two->mode); } if (diff_header_only) { diff --git a/t/t3419-rebase-patch-id.sh b/t/t3419-rebase-patch-id.sh index d24e55aac8..7181f176b8 100755 --- a/t/t3419-rebase-patch-id.sh +++ b/t/t3419-rebase-patch-id.sh @@ -48,7 +48,17 @@ test_expect_success 'setup: 500 lines' ' git branch -f squashed main && git checkout -q -f squashed && git reset -q --soft HEAD~2 && - git commit -q -m squashed + git commit -q -m squashed && + + git branch -f mode main && + git checkout -q -f mode && + test_chmod +x file && + git commit -q -a --amend && + + git branch -f modeother other && + git checkout -q -f modeother && + test_chmod +x file && + git commit -q -a --amend ' test_expect_success 'detect upstream patch' ' @@ -71,6 +81,13 @@ test_expect_success 'detect upstream patch binary' ' test_when_finished "rm .gitattributes" ' +test_expect_success 'detect upstream patch modechange' ' + git checkout -q modeother^{} && + git rebase mode && + git rev-list mode...HEAD~ >revs && + test_must_be_empty revs +' + test_expect_success 'do not drop patch' ' git checkout -q other^{} && test_must_fail git rebase squashed && @@ -85,4 +102,16 @@ test_expect_success 'do not drop patch binary' ' test_when_finished "rm .gitattributes" ' +test_expect_success 'do not drop patch modechange' ' + git checkout -q modeother^{} && + git rebase other && + cat >expected <<-\EOF && + diff --git a/file b/file + old mode 100644 + new mode 100755 + EOF + git diff HEAD~ >modediff && + test_cmp expected modediff +' + test_done From 2871f4d447214874e13cf764ab3a170c9d844ca2 Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Mon, 24 Oct 2022 20:07:43 +0000 Subject: [PATCH 5/6] builtin: patch-id: add --verbatim as a command mode There are situations where the user might not want the default setting where patch-id strips all whitespace. They might be working in a language where white space is syntactically important, or they might have CI testing that enforces strict whitespace linting. In these cases, a whitespace change would result in the patch fundamentally changing, and thus deserving of a different id. Add a new mode that is exclusive of --stable and --unstable called --verbatim. It also corresponds to the config patchid.verbatim = true. In this mode, the stable algorithm is used and whitespace is not stripped from the patch text. Users of --unstable mainly care about compatibility with old git versions, which unstripping the whitespace would break. Thus there isn't a usecase for the combination of --verbatim and --unstable, and we don't expose this so as to not add maintainence burden. Signed-off-by: Jerry Zhang fixes https://github.com/Skydio/revup/issues/2 Signed-off-by: Junio C Hamano --- Documentation/git-patch-id.txt | 24 ++++++++---- builtin/patch-id.c | 65 +++++++++++++++++++++----------- t/t4204-patch-id.sh | 68 ++++++++++++++++++++++++++++++---- 3 files changed, 121 insertions(+), 36 deletions(-) diff --git a/Documentation/git-patch-id.txt b/Documentation/git-patch-id.txt index 442caff8a9..1d15fa45d5 100644 --- a/Documentation/git-patch-id.txt +++ b/Documentation/git-patch-id.txt @@ -8,18 +8,18 @@ git-patch-id - Compute unique ID for a patch SYNOPSIS -------- [verse] -'git patch-id' [--stable | --unstable] +'git patch-id' [--stable | --unstable | --verbatim] DESCRIPTION ----------- Read a patch from the standard input and compute the patch ID for it. A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a -patch, with whitespace and line numbers ignored. As such, it's "reasonably -stable", but at the same time also reasonably unique, i.e., two patches that -have the same "patch ID" are almost guaranteed to be the same thing. +patch, with line numbers ignored. As such, it's "reasonably stable", but at +the same time also reasonably unique, i.e., two patches that have the same +"patch ID" are almost guaranteed to be the same thing. -IOW, you can use this thing to look for likely duplicate commits. +The main usecase for this command is to look for likely duplicate commits. When dealing with 'git diff-tree' output, it takes advantage of the fact that the patch is prefixed with the object name of the @@ -30,6 +30,12 @@ This can be used to make a mapping from patch ID to commit ID. OPTIONS ------- +--verbatim:: + Calculate the patch-id of the input as it is given, do not strip + any whitespace. + + This is the default if patchid.verbatim is true. + --stable:: Use a "stable" sum of hashes as the patch ID. With this option: - Reordering file diffs that make up a patch does not affect the ID. @@ -45,14 +51,16 @@ OPTIONS of "-O", thereby making existing databases storing such "unstable" or historical patch-ids unusable. + - All whitespace within the patch is ignored and does not affect the id. + This is the default if patchid.stable is set to true. --unstable:: Use an "unstable" hash as the patch ID. With this option, the result produced is compatible with the patch-id value produced - by git 1.9 and older. Users with pre-existing databases storing - patch-ids produced by git 1.9 and older (who do not deal with reordered - patches) may want to use this option. + by git 1.9 and older and whitespace is ignored. Users with pre-existing + databases storing patch-ids produced by git 1.9 and older (who do not deal + with reordered patches) may want to use this option. This is the default. diff --git a/builtin/patch-id.c b/builtin/patch-id.c index e7a3112314..afdd472369 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -2,6 +2,7 @@ #include "builtin.h" #include "config.h" #include "diff.h" +#include "parse-options.h" static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result) { @@ -57,7 +58,7 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after) } static int get_one_patchid(struct object_id *next_oid, struct object_id *result, - struct strbuf *line_buf, int stable) + struct strbuf *line_buf, int stable, int verbatim) { int patchlen = 0, found_next = 0; int before = -1, after = -1; @@ -76,8 +77,11 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, if (!skip_prefix(line, "diff-tree ", &p) && !skip_prefix(line, "commit ", &p) && !skip_prefix(line, "From ", &p) && - starts_with(line, "\\ ") && 12 < strlen(line)) + starts_with(line, "\\ ") && 12 < strlen(line)) { + if (verbatim) + the_hash_algo->update_fn(&ctx, line, strlen(line)); continue; + } if (!get_oid_hex(p, next_oid)) { found_next = 1; @@ -152,8 +156,8 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, if (line[0] == '+' || line[0] == ' ') after--; - /* Compute the sha without whitespace */ - len = remove_space(line); + /* Add line to hash algo (possibly removing whitespace) */ + len = verbatim ? strlen(line) : remove_space(line); patchlen += len; the_hash_algo->update_fn(&ctx, line, len); } @@ -166,7 +170,7 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, return patchlen; } -static void generate_id_list(int stable) +static void generate_id_list(int stable, int verbatim) { struct object_id oid, n, result; int patchlen; @@ -174,21 +178,32 @@ static void generate_id_list(int stable) oidclr(&oid); while (!feof(stdin)) { - patchlen = get_one_patchid(&n, &result, &line_buf, stable); + patchlen = get_one_patchid(&n, &result, &line_buf, stable, verbatim); flush_current_id(patchlen, &oid, &result); oidcpy(&oid, &n); } strbuf_release(&line_buf); } -static const char patch_id_usage[] = "git patch-id [--stable | --unstable]"; +static const char *const patch_id_usage[] = { + N_("git patch-id [--stable | --unstable | --verbatim]"), NULL +}; + +struct patch_id_opts { + int stable; + int verbatim; +}; static int git_patch_id_config(const char *var, const char *value, void *cb) { - int *stable = cb; + struct patch_id_opts *opts = cb; if (!strcmp(var, "patchid.stable")) { - *stable = git_config_bool(var, value); + opts->stable = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "patchid.verbatim")) { + opts->verbatim = git_config_bool(var, value); return 0; } @@ -197,21 +212,29 @@ static int git_patch_id_config(const char *var, const char *value, void *cb) int cmd_patch_id(int argc, const char **argv, const char *prefix) { - int stable = -1; + /* if nothing is set, default to unstable */ + struct patch_id_opts config = {0, 0}; + int opts = 0; + struct option builtin_patch_id_options[] = { + OPT_CMDMODE(0, "unstable", &opts, + N_("use the unstable patch-id algorithm"), 1), + OPT_CMDMODE(0, "stable", &opts, + N_("use the stable patch-id algorithm"), 2), + OPT_CMDMODE(0, "verbatim", &opts, + N_("don't strip whitespace from the patch"), 3), + OPT_END() + }; - git_config(git_patch_id_config, &stable); + git_config(git_patch_id_config, &config); - /* If nothing is set, default to unstable. */ - if (stable < 0) - stable = 0; + /* verbatim implies stable */ + if (config.verbatim) + config.stable = 1; - if (argc == 2 && !strcmp(argv[1], "--stable")) - stable = 1; - else if (argc == 2 && !strcmp(argv[1], "--unstable")) - stable = 0; - else if (argc != 1) - usage(patch_id_usage); + argc = parse_options(argc, argv, prefix, builtin_patch_id_options, + patch_id_usage, 0); - generate_id_list(stable); + generate_id_list(opts ? opts > 1 : config.stable, + opts ? opts == 3 : config.verbatim); return 0; } diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh index cdc5191aa8..a7fa94ce0a 100755 --- a/t/t4204-patch-id.sh +++ b/t/t4204-patch-id.sh @@ -8,13 +8,13 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh test_expect_success 'setup' ' - as="a a a a a a a a" && # eight a - test_write_lines $as >foo && - test_write_lines $as >bar && + str="ab cd ef gh ij kl mn op" && + test_write_lines $str >foo && + test_write_lines $str >bar && git add foo bar && git commit -a -m initial && - test_write_lines $as b >foo && - test_write_lines $as b >bar && + test_write_lines $str b >foo && + test_write_lines $str b >bar && git commit -a -m first && git checkout -b same main && git commit --amend -m same-msg && @@ -22,8 +22,23 @@ test_expect_success 'setup' ' echo c >foo && echo c >bar && git commit --amend -a -m notsame-msg && + git checkout -b with_space main~ && + cat >foo <<-\EOF && + a b + c d + e f + g h + i j + k l + m n + op + EOF + cp foo bar && + git add foo bar && + git commit --amend -m "with spaces" && test_write_lines bar foo >bar-then-foo && test_write_lines foo bar >foo-then-bar + ' test_expect_success 'patch-id output is well-formed' ' @@ -128,9 +143,21 @@ test_patch_id_file_order () { git format-patch -1 --stdout -O foo-then-bar >format-patch.output && calc_patch_id top-diff.output && + calc_patch_id top-diff.output && + calc_patch_id Date: Mon, 24 Oct 2022 20:07:44 +0000 Subject: [PATCH 6/6] builtin: patch-id: remove unused diff-tree prefix The last git version that had "diff-tree" in the header text of "git diff-tree" output was v1.3.0 from 2006. The header text was changed from "diff-tree" to "commit" in 91539833 ("Log message printout cleanups"). Given how long ago this change was made, it is highly unlikely that anyone is still feeding in outputs from that git version. Remove the handling of the "diff-tree" prefix and document the source of the other prefixes so that the overall functionality is more clear. Signed-off-by: Jerry Zhang Signed-off-by: Junio C Hamano --- builtin/patch-id.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/patch-id.c b/builtin/patch-id.c index afdd472369..f840fbf1c7 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -74,8 +74,8 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, const char *p = line; int len; - if (!skip_prefix(line, "diff-tree ", &p) && - !skip_prefix(line, "commit ", &p) && + /* Possibly skip over the prefix added by "log" or "format-patch" */ + if (!skip_prefix(line, "commit ", &p) && !skip_prefix(line, "From ", &p) && starts_with(line, "\\ ") && 12 < strlen(line)) { if (verbatim)