From a9f72b5a59cdda08b6d05239f1df647462b5f047 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 28 Nov 2021 21:07:35 +0100 Subject: [PATCH] git: add function to parse message trailers This is a pure python implementation of the message trailer parsing algorithm in git (and libgit2). It is intended for use on finalized commit messages only. Lines starting with comments are not ignored. The function returns a list of pairs (name, value) where name is the name of the trailer. Trailer values may span over multiple lines. Link: https://git-scm.com/docs/git-interpret-trailers Link: https://github.com/git/git/blob/master/trailer.c Link: https://github.com/libgit2/libgit2/blob/main/src/trailer.c Signed-off-by: Robin Jarry --- hubsrht/trailers.py | 83 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 hubsrht/trailers.py diff --git a/hubsrht/trailers.py b/hubsrht/trailers.py new file mode 100644 index 0000000..0f923ad --- /dev/null +++ b/hubsrht/trailers.py @@ -0,0 +1,83 @@ +import re +from typing import List, Tuple + +_git_generated_prefixes = ( + "Signed-off-by: ", + "(cherry picked from commit ", +) + +def commit_trailers(message: str) -> List[Tuple[str, str]]: + """ + Extract the trailers from a commit message. Return a list of pairs of + (name, value). + + This borrows a large amount of logic from git core (trailer.c). + """ + lines = message.strip().splitlines() + + # The first paragraph is the title and cannot be trailers + while lines and lines[0] != '': + del lines[0] + + recognized_prefix = False + only_spaces = True + trailer_lines = non_trailer_lines = 0 + possible_continuation_lines = 0 + + # Get the start of the trailers by looking starting from the end for a + # blank line before a set of non-blank lines that (i) are all trailers, or + # (ii) contains at least one Git-generated trailer and consists of at least + # 25% trailers. + i = len(lines) - 1 + while i >= 0: + line = lines[i] + + if not line.strip(): + # blank line + if only_spaces: + i -= 1 + continue + if recognized_prefix and trailer_lines * 3 >= non_trailer_lines: + i += 1 + break + if trailer_lines > 0 and non_trailer_lines == 0: + i += 1 + break + return [] + + only_spaces = False + + if any(line.startswith(p) for p in _git_generated_prefixes): + trailer_lines += 1 + possible_continuation_lines = 0 + recognized_prefix = True + elif re.search(r"^[A-Za-z\d][A-Za-z\d-]*\s*:", line): + trailer_lines += 1 + possible_continuation_lines = 0 + elif line[0] in (" ", "\t"): + possible_continuation_lines += 1 + else: + non_trailer_lines += 1 + possible_continuation_lines + possible_continuation_lines = 0 + i -= 1 + + # Iterate over all remaining lines and collect trailer names and values. + # If a line does not match a trailer and starts with a space or tab, its + # contents are appended to the current trailer value. + trailers = [] + name = value = None + + for line in lines[i:]: + match = re.match(r"^([A-Za-z\d][A-Za-z\d-]*)\s*:\s*(.*)$", line) + if match: + if name is not None and value is not None: + trailers.append((name, value)) + name = match[1] + value = match[2] + elif name is not None and value is not None and line[0] in (" ", "\t"): + # continuation line + value += "\n" + line + if name is not None and value is not None: + trailers.append((name, value)) + + return trailers