Merge branch 'dd/sequencer-utf8'

Handling of commit objects that use non UTF-8 encoding during
"rebase -i" has been improved.

* dd/sequencer-utf8:
  sequencer: reencode commit message for am/rebase --show-current-patch
  sequencer: reencode old merge-commit message
  sequencer: reencode squashing commit's message
  sequencer: reencode revert/cherry-pick's todo list
  sequencer: reencode to utf-8 before arrange rebase's todo list
  t3900: demonstrate git-rebase problem with multi encoding
  configure.ac: define ICONV_OMITS_BOM if necessary
  t0028: eliminate non-standard usage of printf
This commit is contained in:
Junio C Hamano 2019-12-01 09:04:36 -08:00
commit 6511cb33c9
7 changed files with 193 additions and 9 deletions

View File

@ -844,12 +844,61 @@ AC_MSG_CHECKING([for old iconv()])
AC_COMPILE_IFELSE([OLDICONVTEST_SRC],
[AC_MSG_RESULT([no])],
[AC_MSG_RESULT([yes])
AC_DEFINE(HAVE_OLD_ICONV, 1)
OLD_ICONV=UnfortunatelyYes])
GIT_UNSTASH_FLAGS($ICONVDIR)
GIT_CONF_SUBST([OLD_ICONV])
#
# Define ICONV_OMITS_BOM if you are on a system which
# iconv omits bom for utf-{16,32}
if test -z "$NO_ICONV"; then
AC_CACHE_CHECK([whether iconv omits bom for utf-16 and utf-32],
[ac_cv_iconv_omits_bom],
[
old_LIBS="$LIBS"
if test -n "$NEEDS_LIBICONV"; then
LIBS="$LIBS -liconv"
fi
AC_RUN_IFELSE(
[AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT
#include <iconv.h>
#ifdef HAVE_OLD_ICONV
typedef const char *iconv_ibp;
#else
typedef char *iconv_ibp;
#endif
],
[[
int v;
iconv_t conv;
char in[] = "a"; iconv_ibp pin = in;
char out[20] = ""; char *pout = out;
size_t isz = sizeof in;
size_t osz = sizeof out;
conv = iconv_open("UTF-16", "UTF-8");
iconv(conv, &pin, &isz, &pout, &osz);
iconv_close(conv);
v = (unsigned char)(out[0]) + (unsigned char)(out[1]);
return v != 0xfe + 0xff;
]])],
[ac_cv_iconv_omits_bom=no],
[ac_cv_iconv_omits_bom=yes])
LIBS="$old_LIBS"
])
if test "x$ac_cv_iconv_omits_bom" = xyes; then
ICONV_OMITS_BOM=Yes
else
ICONV_OMITS_BOM=
fi
GIT_CONF_SUBST([ICONV_OMITS_BOM])
fi
## Checks for typedefs, structures, and compiler characteristics.
AC_MSG_NOTICE([CHECKS for typedefs, structures, and compiler characteristics])
#

View File

@ -1574,6 +1574,7 @@ static int update_squash_messages(struct repository *r,
struct strbuf buf = STRBUF_INIT;
int res;
const char *message, *body;
const char *encoding = get_commit_output_encoding();
if (opts->current_fixup_count > 0) {
struct strbuf header = STRBUF_INIT;
@ -1600,7 +1601,7 @@ static int update_squash_messages(struct repository *r,
return error(_("need a HEAD to fixup"));
if (!(head_commit = lookup_commit_reference(r, &head)))
return error(_("could not read HEAD"));
if (!(head_message = get_commit_buffer(head_commit, NULL)))
if (!(head_message = logmsg_reencode(head_commit, NULL, encoding)))
return error(_("could not read HEAD's commit message"));
find_commit_subject(head_message, &body);
@ -1621,7 +1622,7 @@ static int update_squash_messages(struct repository *r,
unuse_commit_buffer(head_commit, head_message);
}
if (!(message = get_commit_buffer(commit, NULL)))
if (!(message = logmsg_reencode(commit, NULL, encoding)))
return error(_("could not read commit message of %s"),
oid_to_hex(&commit->object.oid));
find_commit_subject(message, &body);
@ -2562,14 +2563,17 @@ static int walk_revs_populate_todo(struct todo_list *todo_list,
enum todo_command command = opts->action == REPLAY_PICK ?
TODO_PICK : TODO_REVERT;
const char *command_string = todo_command_info[command].str;
const char *encoding;
struct commit *commit;
if (prepare_revs(opts))
return -1;
encoding = get_log_output_encoding();
while ((commit = get_revision(opts->revs))) {
struct todo_item *item = append_new_todo(todo_list);
const char *commit_buffer = get_commit_buffer(commit, NULL);
const char *commit_buffer = logmsg_reencode(commit, NULL, encoding);
const char *subject;
int subject_len;
@ -2966,7 +2970,8 @@ static int make_patch(struct repository *r,
strbuf_addf(&buf, "%s/message", get_dir(opts));
if (!file_exists(buf.buf)) {
const char *commit_buffer = get_commit_buffer(commit, NULL);
const char *encoding = get_commit_output_encoding();
const char *commit_buffer = logmsg_reencode(commit, NULL, encoding);
find_commit_subject(commit_buffer, &subject);
res |= write_message(subject, strlen(subject), buf.buf, 1);
unuse_commit_buffer(commit, commit_buffer);
@ -3368,7 +3373,8 @@ static int do_merge(struct repository *r,
}
if (commit) {
const char *message = get_commit_buffer(commit, NULL);
const char *encoding = get_commit_output_encoding();
const char *message = logmsg_reencode(commit, NULL, encoding);
const char *body;
int len;
@ -4149,9 +4155,10 @@ static int commit_staged_changes(struct repository *r,
*/
struct commit *commit;
const char *path = rebase_path_squash_msg();
const char *encoding = get_commit_output_encoding();
if (parse_head(r, &commit) ||
!(p = get_commit_buffer(commit, NULL)) ||
!(p = logmsg_reencode(commit, NULL, encoding)) ||
write_message(p, strlen(p), path, 0)) {
unuse_commit_buffer(commit, p);
return error(_("could not write file: "
@ -5167,7 +5174,7 @@ int todo_list_rearrange_squash(struct todo_list *todo_list)
*commit_todo_item_at(&commit_todo, item->commit) = item;
parse_commit(item->commit);
commit_buffer = get_commit_buffer(item->commit, NULL);
commit_buffer = logmsg_reencode(item->commit, NULL, "UTF-8");
find_commit_subject(commit_buffer, &subject);
format_subject(&buf, subject, " ");
subject = subjects[i] = strbuf_detach(&buf, &subject_len);

View File

@ -17,7 +17,7 @@ test_lazy_prereq NO_UTF32_BOM '
write_utf16 () {
if test_have_prereq NO_UTF16_BOM
then
printf '\xfe\xff'
printf '\376\377'
fi &&
iconv -f UTF-8 -t UTF-16
}
@ -25,7 +25,7 @@ write_utf16 () {
write_utf32 () {
if test_have_prereq NO_UTF32_BOM
then
printf '\x00\x00\xfe\xff'
printf '\0\0\376\377'
fi &&
iconv -f UTF-8 -t UTF-32
}

84
t/t3434-rebase-i18n.sh Executable file
View File

@ -0,0 +1,84 @@
#!/bin/sh
#
# Copyright (c) 2019 Doan Tran Cong Danh
#
test_description='rebase with changing encoding
Initial setup:
1 - 2 master
\
3 - 4 first
\
5 - 6 second
'
. ./test-lib.sh
compare_msg () {
iconv -f "$2" -t "$3" "$TEST_DIRECTORY/t3434/$1" >expect &&
git cat-file commit HEAD >raw &&
sed "1,/^$/d" raw >actual &&
test_cmp expect actual
}
test_expect_success setup '
test_commit one &&
git branch first &&
test_commit two &&
git switch first &&
test_commit three &&
git branch second &&
test_commit four &&
git switch second &&
test_commit five &&
test_commit six
'
test_expect_success 'rebase --rebase-merges update encoding eucJP to UTF-8' '
git switch -c merge-eucJP-UTF-8 first &&
git config i18n.commitencoding eucJP &&
git merge -F "$TEST_DIRECTORY/t3434/eucJP.txt" second &&
git config i18n.commitencoding UTF-8 &&
git rebase --rebase-merges master &&
compare_msg eucJP.txt eucJP UTF-8
'
test_expect_failure 'rebase --rebase-merges update encoding eucJP to ISO-2022-JP' '
git switch -c merge-eucJP-ISO-2022-JP first &&
git config i18n.commitencoding eucJP &&
git merge -F "$TEST_DIRECTORY/t3434/eucJP.txt" second &&
git config i18n.commitencoding ISO-2022-JP &&
git rebase --rebase-merges master &&
compare_msg eucJP.txt eucJP ISO-2022-JP
'
test_rebase_continue_update_encode () {
old=$1
new=$2
msgfile=$3
test_expect_success "rebase --continue update from $old to $new" '
(git rebase --abort || : abort current git-rebase failure) &&
git switch -c conflict-$old-$new one &&
echo for-conflict >two.t &&
git add two.t &&
git config i18n.commitencoding $old &&
git commit -F "$TEST_DIRECTORY/t3434/$msgfile" &&
git config i18n.commitencoding $new &&
test_must_fail git rebase -m master &&
test -f .git/rebase-merge/message &&
git stripspace <.git/rebase-merge/message >two.t &&
git add two.t &&
git rebase --continue &&
compare_msg $msgfile $old $new &&
: git-commit assume invalid utf-8 is latin1 &&
test_cmp expect two.t
'
}
test_rebase_continue_update_encode ISO-8859-1 UTF-8 ISO8859-1.txt
test_rebase_continue_update_encode eucJP UTF-8 eucJP.txt
test_rebase_continue_update_encode eucJP ISO-2022-JP eucJP.txt
test_done

3
t/t3434/ISO8859-1.txt Normal file
View File

@ -0,0 +1,3 @@
トヒムマヨ
チb軼鑁g

4
t/t3434/eucJP.txt Normal file
View File

@ -0,0 +1,4 @@
はれひほふ
しているのが、いるので。
濱浜ほれぷりぽれまびぐりろへ。

View File

@ -204,4 +204,41 @@ test_commit_autosquash_flags eucJP fixup
test_commit_autosquash_flags ISO-2022-JP squash
test_commit_autosquash_multi_encoding () {
flag=$1
old=$2
new=$3
msg=$4
test_expect_success "commit --$flag into $old from $new" '
git checkout -b $flag-$old-$new C0 &&
git config i18n.commitencoding $old &&
echo $old >>F &&
git commit -a -F "$TEST_DIRECTORY"/t3900/$msg &&
test_tick &&
echo intermediate stuff >>G &&
git add G &&
git commit -a -m "intermediate commit" &&
test_tick &&
git config i18n.commitencoding $new &&
echo $new-$flag >>F &&
git commit -a --$flag HEAD^ &&
git rebase --autosquash -i HEAD^^^ &&
git rev-list HEAD >actual &&
test_line_count = 3 actual &&
iconv -f $old -t UTF-8 "$TEST_DIRECTORY"/t3900/$msg >expect &&
if test $flag = squash; then
subject="$(head -1 expect)" &&
printf "\nsquash! %s\n" "$subject" >>expect
fi &&
git cat-file commit HEAD^ >raw &&
(sed "1,/^$/d" raw | iconv -f $new -t utf-8) >actual &&
test_cmp expect actual
'
}
test_commit_autosquash_multi_encoding fixup UTF-8 ISO-8859-1 1-UTF-8.txt
test_commit_autosquash_multi_encoding squash ISO-8859-1 UTF-8 ISO8859-1.txt
test_commit_autosquash_multi_encoding squash eucJP ISO-2022-JP eucJP.txt
test_commit_autosquash_multi_encoding fixup ISO-2022-JP UTF-8 ISO-2022-JP.txt
test_done