postgresql/src/tools/git_changelog

420 lines
11 KiB
Perl
Executable File

#!/usr/bin/perl
# Copyright (c) 2021-2024, PostgreSQL Global Development Group
#
# src/tools/git_changelog
#
# Display all commits on active branches, merging together commits from
# different branches that occur close together in time and with identical
# log messages.
#
# By default, commits are annotated with branch and release info thus:
# Branch: REL8_3_STABLE Release: REL8_3_2 [92c3a8004] 2008-03-29 00:15:37 +0000
# This shows that the commit on REL8_3_STABLE was released in 8.3.2.
# Commits on master will usually instead have notes like
# Branch: master Release: REL8_4_BR [6fc9d4272] 2008-03-29 00:15:28 +0000
# showing that this commit is ancestral to release branches 8.4 and later.
# If no Release: marker appears, the commit hasn't yet made it into any
# release.
#
# The --brief option shortens that to a format like:
# YYYY-MM-DD [hash] abbreviated commit subject line
# Since the branch isn't shown, this is mainly useful in conjunction
# with --master-only.
#
# Most of the time, matchable commits occur in the same order on all branches,
# and we print them out in that order. However, if commit A occurs before
# commit B on branch X and commit B occurs before commit A on branch Y, then
# there's no ordering which is consistent with both branches. In such cases
# we sort a merged commit according to its timestamp on the newest branch
# it appears in.
#
# The default output of this script is meant for generating minor release
# notes, where we need to know which branches a merged commit affects.
#
# To generate major release notes, use:
# git_changelog --master-only --brief --oldest-first --since='start-date'
# To find the appropriate start date, use:
# git show --summary $(git merge-base REL_12_STABLE master)
# where the branch to mention is the previously forked-off branch. This
# shows the last commit before that branch was made.
#
# Note that --master-only is an imperfect filter, since it will not detect
# cases where a master patch was back-patched awhile later or with a slightly
# different commit message. To find such cases, it's a good idea to look
# through the output of
# git_changelog --non-master-only --oldest-first --since='start-date'
# and then remove anything from the --master-only output that would be
# duplicative.
use strict;
use warnings FATAL => 'all';
require Time::Local;
require Getopt::Long;
require IPC::Open2;
# Adjust this list when the set of interesting branches changes.
# (We could get this from "git branches", but not worth the trouble.)
# NB: master must be first!
my @BRANCHES = qw(master
REL_16_STABLE REL_15_STABLE REL_14_STABLE REL_13_STABLE
REL_12_STABLE REL_11_STABLE REL_10_STABLE REL9_6_STABLE REL9_5_STABLE
REL9_4_STABLE REL9_3_STABLE REL9_2_STABLE REL9_1_STABLE REL9_0_STABLE
REL8_4_STABLE REL8_3_STABLE REL8_2_STABLE REL8_1_STABLE REL8_0_STABLE
REL7_4_STABLE REL7_3_STABLE REL7_2_STABLE REL7_1_STABLE REL7_0_PATCHES
REL6_5_PATCHES REL6_4);
# Might want to make this parameter user-settable.
my $timestamp_slop = 24 * 60 * 60;
my $brief = 0;
my $details_after = 0;
my $post_date = 0;
my $master_only = 0;
my $non_master_only = 0;
my $oldest_first = 0;
my $since;
my @output_buffer;
my $output_line = '';
Getopt::Long::GetOptions(
'brief' => \$brief,
'details-after' => \$details_after,
'master-only' => \$master_only,
'non-master-only' => \$non_master_only,
'post-date' => \$post_date,
'oldest-first' => \$oldest_first,
'since=s' => \$since) || usage();
usage() if @ARGV;
my @git = qw(git log --format=fuller --date=iso);
push @git, '--since=' . $since if defined $since;
# Collect the release tag data
my %rel_tags;
{
my $cmd = "git for-each-ref refs/tags";
my $pid = IPC::Open2::open2(my $git_out, my $git_in, $cmd)
|| die "can't run $cmd: $!";
while (my $line = <$git_out>)
{
if ($line =~ m|^([a-f0-9]+)\s+commit\s+refs/tags/(\S+)|)
{
my $commit = $1;
my $tag = $2;
if ( $tag =~ /^REL_\d+_\d+$/
|| $tag =~ /^REL\d+_\d+$/
|| $tag =~ /^REL\d+_\d+_\d+$/)
{
$rel_tags{$commit} = $tag;
}
}
}
waitpid($pid, 0);
my $child_exit_status = $? >> 8;
die "$cmd failed" if $child_exit_status != 0;
}
# Collect the commit data
my %all_commits;
my %all_commits_by_branch;
# This remembers where each branch sprouted from master. Note the values
# will be wrong if --since terminates the log listing before the branch
# sprouts; but in that case it doesn't matter since we also won't reach
# the part of master where it would matter.
my %sprout_tags;
for my $branch (@BRANCHES)
{
my @cmd = @git;
if ($branch eq "master")
{
push @cmd, "origin/$branch";
}
else
{
push @cmd, "--parents";
push @cmd, "master..origin/$branch";
}
my $pid = IPC::Open2::open2(my $git_out, my $git_in, @cmd)
|| die "can't run @cmd: $!";
my $last_tag = undef;
my $last_parent;
my %commit;
while (my $line = <$git_out>)
{
if ($line =~ /^commit\s+(\S+)/)
{
push_commit(\%commit) if %commit;
$last_tag = $rel_tags{$1} if defined $rel_tags{$1};
%commit = (
'branch' => $branch,
'commit' => $1,
'last_tag' => $last_tag,
'message' => '',);
if ($line =~ /^commit\s+\S+\s+(\S+)/)
{
$last_parent = $1;
}
else
{
$last_parent = undef;
}
}
elsif ($line =~ /^Author:\s+(.*)/)
{
$commit{'author'} = $1;
}
elsif ($line =~ /^CommitDate:\s+(.*)/)
{
$commit{'date'} = $1;
}
elsif ($line =~ /^\s\s/)
{
$commit{'message'} .= $line;
}
}
push_commit(\%commit) if %commit;
$sprout_tags{$last_parent} = $branch if defined $last_parent;
waitpid($pid, 0);
my $child_exit_status = $? >> 8;
die "@cmd failed" if $child_exit_status != 0;
}
# Run through the master branch and apply tags. We already tagged the other
# branches, but master needs a separate pass after we've acquired the
# sprout_tags data. Also, in post-date mode we need to add phony entries
# for branches that sprouted after a particular master commit was made.
{
my $last_tag = undef;
my %sprouted_branches;
for my $cc (@{ $all_commits_by_branch{'master'} })
{
my $commit = $cc->{'commit'};
my $c = $cc->{'commits'}->[0];
$last_tag = $rel_tags{$commit} if defined $rel_tags{$commit};
if (defined $sprout_tags{$commit})
{
$last_tag = $sprout_tags{$commit};
# normalize branch names for making sprout tags
$last_tag =~ s/^(REL_\d+).*/$1_BR/;
$last_tag =~ s/^(REL\d+_\d+).*/$1_BR/;
}
$c->{'last_tag'} = $last_tag;
if ($post_date)
{
if (defined $sprout_tags{$commit})
{
$sprouted_branches{ $sprout_tags{$commit} } = 1;
}
# insert new commits between master and any other commits
my @new_commits = (shift @{ $cc->{'commits'} });
for my $branch (reverse sort keys %sprouted_branches)
{
my $ccopy = { %{$c} };
$ccopy->{'branch'} = $branch;
push @new_commits, $ccopy;
}
$cc->{'commits'} = [ @new_commits, @{ $cc->{'commits'} } ];
}
}
}
my %position;
for my $branch (@BRANCHES)
{
$position{$branch} = 0;
}
while (1)
{
my $best_branch;
my $best_timestamp;
for my $branch (@BRANCHES)
{
my $leader = $all_commits_by_branch{$branch}->[ $position{$branch} ];
next if !defined $leader;
if (!defined $best_branch
|| $leader->{'timestamp'} > $best_timestamp)
{
$best_branch = $branch;
$best_timestamp = $leader->{'timestamp'};
}
}
last if !defined $best_branch;
my $winner =
$all_commits_by_branch{$best_branch}->[ $position{$best_branch} ];
my $print_it = 1;
if ($master_only)
{
$print_it = (@{ $winner->{'commits'} } == 1)
&& ($winner->{'commits'}[0]->{'branch'} eq 'master');
}
elsif ($non_master_only)
{
foreach my $c (@{ $winner->{'commits'} })
{
$print_it = 0 if ($c->{'branch'} eq 'master');
}
}
if ($print_it)
{
output_details($winner) if (!$details_after);
output_str("%s", $winner->{'message'} . "\n");
output_details($winner) if ($details_after);
unshift(@output_buffer, $output_line) if ($oldest_first);
$output_line = '';
}
$winner->{'done'} = 1;
for my $branch (@BRANCHES)
{
my $leader = $all_commits_by_branch{$branch}->[ $position{$branch} ];
if (defined $leader && $leader->{'done'})
{
++$position{$branch};
redo;
}
}
}
print @output_buffer if ($oldest_first);
sub push_commit
{
my ($c) = @_;
my $ht = hash_commit($c);
my $ts = parse_datetime($c->{'date'});
my $cc;
# Note that this code will never merge two commits on the same branch,
# even if they have the same hash (author/message) and nearby
# timestamps. This means that there could be multiple potential
# matches when we come to add a commit from another branch. Prefer
# the closest-in-time one.
for my $candidate (@{ $all_commits{$ht} })
{
my $diff = abs($ts - $candidate->{'timestamp'});
if ($diff < $timestamp_slop
&& !exists $candidate->{'branch_position'}{ $c->{'branch'} })
{
if (!defined $cc
|| $diff < abs($ts - $cc->{'timestamp'}))
{
$cc = $candidate;
}
}
}
if (!defined $cc)
{
$cc = {
'author' => $c->{'author'},
'message' => $c->{'message'},
'commit' => $c->{'commit'},
'commits' => [],
'timestamp' => $ts
};
push @{ $all_commits{$ht} }, $cc;
}
# stash only the fields we'll need later
my $smallc = {
'branch' => $c->{'branch'},
'commit' => $c->{'commit'},
'date' => $c->{'date'},
'last_tag' => $c->{'last_tag'}
};
push @{ $cc->{'commits'} }, $smallc;
push @{ $all_commits_by_branch{ $c->{'branch'} } }, $cc;
$cc->{'branch_position'}{ $c->{'branch'} } =
-1 + @{ $all_commits_by_branch{ $c->{'branch'} } };
return;
}
sub hash_commit
{
my ($c) = @_;
return $c->{'author'} . "\0" . $c->{'message'};
}
sub parse_datetime
{
my ($dt) = @_;
$dt =~
/^(\d\d\d\d)-(\d\d)-(\d\d)\s+(\d\d):(\d\d):(\d\d)\s+([-+])(\d\d)(\d\d)$/;
my $gm = Time::Local::timegm($6, $5, $4, $3, $2 - 1, $1);
my $tzoffset = ($8 * 60 + $9) * 60;
$tzoffset = -$tzoffset if $7 eq '-';
return $gm - $tzoffset;
}
sub output_str
{
($oldest_first) ? ($output_line .= sprintf(shift, @_)) : printf(@_);
return;
}
sub output_details
{
my $item = shift;
if ($details_after)
{
$item->{'author'} =~ m{^(.*?)\s*<[^>]*>$};
# output only author name, not email address
output_str("(%s)\n", $1);
}
else
{
output_str("Author: %s\n", $item->{'author'});
}
foreach my $c (@{ $item->{'commits'} })
{
if ($brief)
{
$item->{'message'} =~ m/^\s*(.*)/;
output_str(
"%s [%s] %s\n",
substr($c->{'date'}, 0, 10),
substr($c->{'commit'}, 0, 9),
substr($1, 0, 56));
}
else
{
output_str("Branch: %s ", $c->{'branch'})
if (!$master_only);
output_str("Release: %s ", $c->{'last_tag'})
if (defined $c->{'last_tag'});
output_str("[%s] %s\n", substr($c->{'commit'}, 0, 9),
$c->{'date'});
}
}
output_str("\n");
return;
}
sub usage
{
print STDERR <<EOM;
Usage: git_changelog [--brief/-b] [--details-after/-d] [--master-only/-m] [--non-master-only/-n] [--oldest-first/-o] [--post-date/-p] [--since=SINCE]
--brief Shorten commit descriptions, omitting branch identification
--details-after Show branch and author info after the commit description
--master-only Show only commits made just in the master branch
--non-master-only Show only commits made just in back branches
--oldest-first Show oldest commits first
--post-date Show branches made after a commit occurred
--since Show only commits dated since SINCE
EOM
exit 1;
}