diff --git a/diffcore-rename.c b/diffcore-rename.c index 266d4fae48..41558185ae 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -379,7 +379,6 @@ static const char *get_basename(const char *filename) return base ? base + 1 : filename; } -MAYBE_UNUSED static int find_basename_matches(struct diff_options *options, int minimum_score) { @@ -716,11 +715,55 @@ void diffcore_rename(struct diff_options *options) if (minimum_score == MAX_SCORE) goto cleanup; - /* Calculate how many renames are left */ - num_destinations = (rename_dst_nr - rename_count); - remove_unneeded_paths_from_src(want_copies); num_sources = rename_src_nr; + if (want_copies || break_idx) { + /* + * Cull sources: + * - remove ones corresponding to exact renames + */ + trace2_region_enter("diff", "cull after exact", options->repo); + remove_unneeded_paths_from_src(want_copies); + trace2_region_leave("diff", "cull after exact", options->repo); + } else { + /* Determine minimum score to match basenames */ + double factor = 0.5; + char *basename_factor = getenv("GIT_BASENAME_FACTOR"); + int min_basename_score; + + if (basename_factor) + factor = strtol(basename_factor, NULL, 10)/100.0; + assert(factor >= 0.0 && factor <= 1.0); + min_basename_score = minimum_score + + (int)(factor * (MAX_SCORE - minimum_score)); + + /* + * Cull sources: + * - remove ones involved in renames (found via exact match) + */ + trace2_region_enter("diff", "cull after exact", options->repo); + remove_unneeded_paths_from_src(want_copies); + trace2_region_leave("diff", "cull after exact", options->repo); + + /* Utilize file basenames to quickly find renames. */ + trace2_region_enter("diff", "basename matches", options->repo); + rename_count += find_basename_matches(options, + min_basename_score); + trace2_region_leave("diff", "basename matches", options->repo); + + /* + * Cull sources, again: + * - remove ones involved in renames (found via basenames) + */ + trace2_region_enter("diff", "cull basename", options->repo); + remove_unneeded_paths_from_src(want_copies); + trace2_region_leave("diff", "cull basename", options->repo); + } + + /* Calculate how many rename destinations are left */ + num_destinations = (rename_dst_nr - rename_count); + num_sources = rename_src_nr; /* rename_src_nr reflects lower number */ + /* All done? */ if (!num_destinations || !num_sources) goto cleanup; @@ -751,7 +794,7 @@ void diffcore_rename(struct diff_options *options) struct diff_score *m; if (rename_dst[i].is_rename) - continue; /* dealt with exact match already. */ + continue; /* exact or basename match already handled */ m = &mx[dst_cnt * NUM_CANDIDATE_PER_DST]; for (j = 0; j < NUM_CANDIDATE_PER_DST; j++) diff --git a/t/t4001-diff-rename.sh b/t/t4001-diff-rename.sh index 0f97858197..99a5d1bd1c 100755 --- a/t/t4001-diff-rename.sh +++ b/t/t4001-diff-rename.sh @@ -277,10 +277,11 @@ test_expect_success 'basename similarity vs best similarity' ' git add file.txt file.md && git commit -a -m "rename" && git diff-tree -r -M --name-status HEAD^ HEAD >actual && - # subdir/file.txt is 88% similar to file.md and 78% similar to file.txt + # subdir/file.txt is 88% similar to file.md, 78% similar to file.txt, + # but since same basenames are checked first... cat >expected <<-\EOF && - R088 subdir/file.txt file.md - A file.txt + A file.md + R078 subdir/file.txt file.txt EOF test_cmp expected actual '