146 lines
4.5 KiB
Python
Executable File
146 lines
4.5 KiB
Python
Executable File
#!/usr/bin/python3
|
|
# generate debian/copyright from debian/copyright.template and node_modules
|
|
# Author: Martin Pitt <mpitt@debian.org>
|
|
# Allison Karlitskaya <allison.karlitskaya@redhat.com>
|
|
|
|
import argparse
|
|
import gzip
|
|
import os
|
|
import re
|
|
import sys
|
|
import time
|
|
from typing import Dict, Set
|
|
|
|
BASE_DIR = os.path.realpath(f'{__file__}/../..')
|
|
TEMPLATE_FILE = f'{BASE_DIR}/tools/debian/copyright.template'
|
|
|
|
|
|
own_copyright = f"Copyright (C) 2013 - {time.strftime('%Y')} Red Hat, Inc."
|
|
|
|
license_patterns = {
|
|
# Common patterns
|
|
r'\bMIT\b': ['MIT'],
|
|
|
|
# https://github.com/focus-trap/focus-trap/blob/master/LICENSE
|
|
r'\bfocus-trap\b': ['MIT'],
|
|
}
|
|
|
|
copyright_patterns = {
|
|
# Common patterns
|
|
r'Copyright (.*)$': [r'\1'],
|
|
r'@copyright (.*)$': [r'\1'],
|
|
r'\(c\) (.*)$': [r'\1'],
|
|
|
|
# https://github.com/focus-trap/focus-trap/blob/master/LICENSE
|
|
r'\bfocus-trap\b': ['2015-2016 David Clark'],
|
|
}
|
|
|
|
used_patterns = set()
|
|
|
|
|
|
def parse_args():
|
|
p = argparse.ArgumentParser(description='Generate debian/copyright file from template and node_modules')
|
|
return p.parse_args()
|
|
|
|
|
|
def template_licenses(template):
|
|
"""Return set of existing License: short names"""
|
|
|
|
ids = set()
|
|
for line in template.splitlines():
|
|
if line.startswith('License:'):
|
|
ids.add(line.split(None, 1)[1].lower())
|
|
return ids
|
|
|
|
|
|
def find_patterns(patterns, text):
|
|
results = set()
|
|
|
|
for pattern, templates in patterns.items():
|
|
for match in re.finditer(pattern, text, re.MULTILINE):
|
|
used_patterns.add(pattern)
|
|
results.update(match.expand(template) for template in templates)
|
|
|
|
return results
|
|
|
|
#
|
|
# main
|
|
#
|
|
|
|
|
|
args = parse_args()
|
|
|
|
with open(TEMPLATE_FILE, encoding='UTF-8') as f:
|
|
template = f.read()
|
|
|
|
license_ids = template_licenses(template)
|
|
|
|
# scan dist/ bundles for third-party copyrights and licenses
|
|
|
|
dist_copyrights: Dict[str, Set[str]] = {} # Files: dirglob → set(copyrights)
|
|
dist_licenses: Dict[str, Set[str]] = {} # Files: dirglob → set(licenses)
|
|
|
|
for directory, _subdirs, files in os.walk(f'{BASE_DIR}/dist'):
|
|
for file in files:
|
|
if '.LEGAL.txt' not in file:
|
|
continue
|
|
|
|
full_filename = os.path.join(directory, file)
|
|
directory_glob = os.path.relpath(directory, start=BASE_DIR) + '/*'
|
|
|
|
if file.endswith('.gz'):
|
|
with gzip.open(full_filename, 'rt') as license_file_gz:
|
|
contents = license_file_gz.read()
|
|
else:
|
|
with open(full_filename, 'rt') as license_file:
|
|
contents = license_file.read()
|
|
|
|
for comment in contents.split('\n\n'):
|
|
if (comment.strip() == "" or "Bundled license information:" in comment):
|
|
continue
|
|
|
|
licenses = find_patterns(license_patterns, comment)
|
|
if not licenses:
|
|
raise SystemError('Can not determine licenses of:\n%s' % comment)
|
|
for license_id in licenses:
|
|
if license_id.lower() not in license_ids:
|
|
raise KeyError(f'License {license_id} not found in {TEMPLATE_FILE}')
|
|
|
|
# All bundles also contain our own code
|
|
licenses.add("LGPL-2.1-or-later")
|
|
|
|
dist_licenses.setdefault(directory_glob, set()).update(licenses)
|
|
|
|
copyrights = find_patterns(copyright_patterns, comment)
|
|
if not copyrights:
|
|
raise SystemError('Did not find any copyrights in:\n%s' % comment)
|
|
|
|
# All bundles also contain our own code
|
|
copyrights.add(own_copyright)
|
|
|
|
dist_copyrights.setdefault(directory_glob, set()).update(copyrights)
|
|
|
|
for pattern in set.union(set(license_patterns), set(copyright_patterns)):
|
|
if pattern not in used_patterns:
|
|
# We'll have no LEGAL.txt files in that dev builds
|
|
# so of course we won't use any of the patterns
|
|
if os.getenv('NODE_ENV') == 'development' or os.getenv('IGNORE_UNUSED_PATTERNS'):
|
|
continue
|
|
|
|
sys.exit(f'build-debian-copyright: Unused pattern: {pattern}')
|
|
|
|
paragraphs = []
|
|
for dirglob in sorted(dist_copyrights):
|
|
paragraphs.append("Files: {0}\nCopyright: {1}\nLicense: {2}".format(
|
|
dirglob,
|
|
'\n '.join(sorted(dist_copyrights[dirglob])),
|
|
' and '.join(sorted(dist_licenses[dirglob]))))
|
|
|
|
# force UTF-8 output, even when running in C locale
|
|
for line in template.splitlines():
|
|
if '#NPM' in line:
|
|
sys.stdout.buffer.write('\n\n'.join(paragraphs).encode('UTF-8'))
|
|
else:
|
|
sys.stdout.buffer.write(line.encode('UTF-8'))
|
|
sys.stdout.buffer.write(b'\n')
|