cockpit/bots/tests-scan

#!/usr/bin/env python3

# This file is part of Cockpit.
#
# Copyright (C) 2015 Red Hat, Inc.
#
# Cockpit is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# Cockpit is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Cockpit; If not, see <http://www.gnu.org/licenses/>.

# Random extra options for tests-invoke
REPO_EXTRA_INVOKE_OPTIONS = {
    'mvollmer/subscription-manager': [ "--html-logs" ]
}

# Label: should a PR trigger external tests
LABEL_TEST_EXTERNAL = "test-external"

import argparse
import os
import json
import pipes
import sys
import time
import logging
import itertools
import urllib.request

sys.dont_write_bytecode = True
logging.basicConfig(level=logging.INFO)

from task import github, label, redhat_network, labels_of_pull, distributed_queue, testmap

no_amqp = False
try:
    import pika
except ImportError:
    no_amqp = True

def main():
    parser = argparse.ArgumentParser(description='Bot: scan and update status of pull requests on GitHub')
    parser.add_argument('-v', '--human-readable', action="store_true", default=False,
                        help='Display human readable output rather than tasks')
    parser.add_argument('-d', '--dry', action="store_true", default=False,
                        help='Don''t actually change anything on GitHub')
    parser.add_argument('--repo', default=None,
                        help='Repository to scan and checkout.')
    parser.add_argument('-c', '--context', action="append", default=[ ],
                        help='Test contexts to use.')
    parser.add_argument('-p', '--pull-number', default=None,
                        help='Single pull request to scan for tasks')
    parser.add_argument('--pull-data', default=None,
                        help='pull_request event GitHub JSON data to evaluate; mutualy exclusive with -p and -s')
    parser.add_argument('-s', '--sha', default=None,
                        help='SHA beloging to pull request to scan for tasks')
    parser.add_argument('--amqp', default=None,
                        help='The host:port of the AMQP server to publish to (format host:port)')

    opts = parser.parse_args()
    if opts.amqp and no_amqp:
        logging.error("AMQP host:port specified but python-amqp not available")
        return 1
    if opts.pull_data and (opts.pull_number or opts.sha):
        parser.error("--pull-data and --pull-number/--sha are mutually exclusive")

    api = github.GitHub(repo=opts.repo)

    # HACK: The `repo` option is used throughout the code, for example repo from
    # opts is needed in `tests_invoke`, `tests_human`, `queue_test` etc.
    # Would be better to use api.repo everywhere
    opts.repo = api.repo

    try:
        policy = testmap.tests_for_project(opts.repo)
        if opts.context:
            short_contexts = []
            for context in opts.context:
                short_contexts.append(context.split("@")[0])
            policy = {}
            for (branch, contexts) in testmap.tests_for_project(opts.repo).items():
                branch_context = []
                for context in short_contexts:
                    if context in contexts:
                        branch_context.append(context)
                if branch_context:
                    policy[branch] = branch_context
        results = scan_for_pull_tasks(api, policy, opts, opts.repo)

        # When run from c-p/cockpit checkout without PR number or PR data we want to scan also all
        # external projects. E.g. `bots/tests-scan` in c-p/c checkout will scan all projects
        if opts.repo == "cockpit-project/cockpit" and not opts.pull_data and not opts.pull_number:
            results += scan_external_projects(opts)
    except RuntimeError as ex:
        logging.error("tests-scan: " + str(ex))
        return 1

    for result in results:
        if result:
            sys.stdout.write(result + "\n")

    return 0

# Prepare a human readable output
def tests_human(priority, name, number, revision, ref, context, base, original_base, repo, bots_ref):
    if not priority:
        return
    try:
        priority = int(priority)
    except (ValueError, TypeError):
        pass
    return "{name:11} {context:25} {revision:10} {priority:2}{repo}{bots_ref}{branches}".format(
        priority=priority,
        revision=revision[0:7],
        context=context,
        name=name,
        repo=repo and "  (%s)" % repo or "",
        bots_ref=bots_ref and (" [bots@%s]" % bots_ref) or "",
        branches=base != original_base and ("   {%s...%s}" % (original_base, base)) or ""
    )

def is_internal_context(context):
    for pattern in ["rhel", "edge", "vmware", "openstack"]:
        if pattern in context:
            return True
    return False

# Prepare a test invocation command
def tests_invoke(priority, name, number, revision, ref, context, base, original_base, repo, bots_ref, options):
    if not options.amqp and not redhat_network() and is_internal_context(context):
        return ''

    try:
        priority = int(priority)
    except (ValueError, TypeError):
        priority = 0
    if priority <= 0:
        return
    current = time.strftime('%Y%m%d-%H%M%M')

    checkout = "PRIORITY={priority:04d} bots/make-checkout --verbose"
    cmd = "TEST_PROJECT={repo} TEST_NAME={name}-{current} TEST_REVISION={revision} bots/tests-invoke --pull-number={pull_number} "
    if base:
        if base != ref:
            cmd += " --rebase={base}"
        checkout += " --base={base}"

    checkout += " --repo={repo}"

    if bots_ref:
        checkout += " --bots-ref={bots_ref}"

    # The repo of this test differs from the PR's repo
    if options.repo != repo:
        cmd = "GITHUB_BASE={github_base} " + cmd

    if repo in REPO_EXTRA_INVOKE_OPTIONS:
        cmd += " " + " ".join(REPO_EXTRA_INVOKE_OPTIONS[repo])

    # Let tests-invoke know that we are triggering different branch - it needs to post correct status
    if base != original_base:
        cmd = "TEST_BRANCH={base} " + cmd

    cmd += " {context}"
    if bots_ref:
        # we are checking the external repo on a cockpit PR, so stay on the project's master
        checkout += " {ref} && "
    else:
        # we are testing the repo itself, checkout revision from the PR
        checkout += " {ref} {revision} && "

    return (checkout + "cd bots/make-checkout-workdir && " + cmd + " ; cd ../..").format(
        priority=priority,
        name=pipes.quote(name),
        revision=pipes.quote(revision),
        base=pipes.quote(str(base)),
        ref=pipes.quote(ref),
        bots_ref=pipes.quote(bots_ref),
        context=pipes.quote(context),
        current=current,
        pull_number=number,
        repo=pipes.quote(repo),
        github_base=pipes.quote(options.repo),
    )

def queue_test(priority, name, number, revision, ref, context, base, original_base, repo, bots_ref, channel, options):
    command = tests_invoke(priority, name, number, revision, ref, context, base, original_base, repo, bots_ref, options)
    if command:
        if priority > distributed_queue.MAX_PRIORITY:
            priority = distributed_queue.MAX_PRIORITY

        body = {
            "command": command,
            "type": "test",
            "sha": revision,
            "ref": ref,
            "name": name,
        }
        queue = 'rhel' if is_internal_context(context) else 'public'
        channel.basic_publish('', queue, json.dumps(body), properties=pika.BasicProperties(priority=priority))
        logging.info("Published '{0}' on '{1}' with command: '{2}'".format(name, revision, command))

def prioritize(status, title, labels, priority, context, number):
    state = status.get("state", None)
    update = { "state": "pending" }

    # This commit definitively succeeded or failed
    if state in [ "success", "failure" ]:
        logging.info("Skipping '{0}' on #{1} because it has already finished".format(context, number))
        priority = 0
        update = None

    # This test errored, we try again but low priority
    elif state in [ "error" ]:
        priority -= 2

    elif state in [ "pending" ]:
        logging.info("Not updating status for '{0}' on #{1} because it is pending".format(context, number))
        update = None

    # Ignore context when the PR has [no-test] in the title or as label, unless
    # the context was directly triggered
    if (('no-test' in labels or '[no-test]' in title) and
        status.get("description", "") != github.NOT_TESTED_DIRECT):
        logging.info("Skipping '{0}' on #{1} because it is no-test".format(context, number))
        priority = 0
        update = None

    if priority > 0:
        if "priority" in labels:
            priority += 2
        if "blocked" in labels:
            priority -= 1

        # Pull requests where the title starts with WIP get penalized
        if title.startswith("WIP") or "needswork" in labels:
            priority -= 1

        # Is testing already in progress?
        description = status.get("description", "")
        if description.startswith(github.TESTING):
            logging.info("Skipping '{0}' on #{1} because it is already running".format(context, number))
            priority = description
            update = None

    if update:
        if priority <= 0:
            logging.info("Not updating status for '{0}' on #{1} because of low priority".format(context, number))
            update = None
        else:
            update["description"] = github.NOT_TESTED

    return [priority, update]

def dict_is_subset(full, check):
    for (key, value) in check.items():
        if not key in full or full[key] != value:
            return False
    return True

def update_status(api, revision, context, last, changes):
    if changes:
        changes["context"] = context
    if changes and not dict_is_subset(last, changes):
        response = api.post("statuses/" + revision, changes, accept=[ 422 ]) # 422 Unprocessable Entity
        errors = response.get("errors", None)
        if not errors:
            return True
        for error in response.get("errors", []):
            sys.stderr.write("{0}: {1}\n".format(revision, error.get('message', json.dumps(error))))
            sys.stderr.write(json.dumps(changes))
        return False
    return True

def cockpit_tasks(api, update, branch_contexts, repo, pull_data, pull_number, sha, amqp):
    results = []
    pulls = []
    contexts = set(itertools.chain(*branch_contexts.values()))

    if pull_data:
        pulls.append(json.loads(pull_data)['pull_request'])
    elif pull_number:
        pull = api.get("pulls/{0}".format(pull_number))
        if pull:
            pulls.append(pull)
        else:
            logging.error("Can't find pull request {0}".format(pull_number))
            return 1
    else:
        pulls = api.pulls()

    whitelist = api.whitelist()
    for pull in pulls:
        title = pull["title"]
        number = pull["number"]
        revision = pull["head"]["sha"]
        statuses = api.statuses(revision)
        login = pull["head"]["user"]["login"]
        base = pull["base"]["ref"]  # The branch this pull request targets

        logging.info("Processing #{0} titled '{1}' on revision {2}".format(number, title, revision))

        # If sha is present only scan PR with selected sha
        if sha and revision != sha and not revision.startswith(sha):
            continue

        labels = labels_of_pull(pull)

        baseline = distributed_queue.BASELINE_PRIORITY
        # amqp automatically prioritizes on age
        if not amqp:
            # modify the baseline slightly to favor older pull requests, so that we don't
            # end up with a bunch of half tested pull requests
            baseline += 1.0 - (min(100000, float(number)) / 100000)

        def trigger_externals():
            if repo != "cockpit-project/cockpit":  # already a non-cockpit project
                return False
            if base != "master":  # bots/ is always taken from master branch
                return False
            if LABEL_TEST_EXTERNAL in labels:  # already checked before?
                return True

            if not statuses:
                # this is the first time tests-scan looks at a PR, so determine if it changes bots/
                with urllib.request.urlopen(pull["patch_url"]) as f:
                    # enough to look at the git commit header, it lists all changed files
                    if b"bots/" in f.read(4000):
                        if update:
                            # remember for next run, to avoid downloading the patch multiple times
                            label(number, [LABEL_TEST_EXTERNAL])
                        return True

            return False

        def get_externals():
            result = []
            for proj_repo in testmap.projects():
                if proj_repo == "cockpit-project/cockpit":
                    continue
                for context in testmap.tests_for_project(proj_repo).get("master", []):
                    result.append(context + "@" + proj_repo)
            return result

        def is_valid_context(context):
            (os_scenario, _, repo_branch) = context.partition("@")
            if repo_branch:
                repo_branch = "/".join(repo_branch.split("/")[:2])
                repo_contexts = testmap.tests_for_project(repo_branch).values()
                return os_scenario in set(itertools.chain(*repo_contexts))
            else:
                return os_scenario in contexts

        # Create list of statuses to process
        todos = {}
        for status in statuses: # Firstly add all valid contexts that already exist in github
            if is_valid_context(status):
                todos[status] = statuses[status]
        if not statuses: # If none defined in github add basic set of contexts
            for context in branch_contexts.get(base, contexts):
                todos[context] = {}
        if trigger_externals():
            for context in get_externals():
                if context not in todos:
                    todos[context] = {}

        for context in todos:
            # Get correct project and branch. Ones from test name have priority
            project = repo
            branch = base
            (os_scenario, _, repo_branch) = context.partition("@")
            repo_branch = repo_branch.split("/")
            if len(repo_branch) == 2:
                project = "/".join(repo_branch)
                branch = "master"
            elif len(repo_branch) == 3:
                project = "/".join(repo_branch[:2])
                branch = repo_branch[2]

            ref = "pull/%d/head" % number

            # For unmarked and untested status, user must be in whitelist
            # Not this only applies to this specific commit. A new status
            # will apply if the user pushes a new commit.
            status = todos[context]
            if login not in whitelist and status.get("description", github.NO_TESTING) == github.NO_TESTING:
                priority = github.NO_TESTING
                changes = { "description": github.NO_TESTING, "context": context, "state": "pending" }
            else:
                (priority, changes) = prioritize(status, title, labels, baseline, context, number)
            if not update or update_status(api, revision, context, status, changes):
                checkout_ref = ref
                if project != repo:
                    checkout_ref = "master"
                if base != branch:
                    checkout_ref = branch
                results.append((priority,
                                "pull-%d" % number,
                                number,
                                revision,
                                checkout_ref,
                                os_scenario,
                                branch,
                                base,
                                project,
                                ref if project != repo or base != branch else None))

    return results

def scan_for_pull_tasks(api, policy, opts, repo):
    kvm = os.access("/dev/kvm", os.R_OK | os.W_OK)
    if not kvm:
        logging.error("tests-scan: No /dev/kvm access, not running tests here")
        return []

    results = cockpit_tasks(api, not opts.dry, policy, repo, opts.pull_data, opts.pull_number, opts.sha, opts.amqp)

    if opts.human_readable:
        func = lambda x: tests_human(*x)
        results.sort(reverse=True, key=lambda x: str(x))
        return list(map(func, results))
    if not opts.amqp:
        func = lambda x: tests_invoke(*x, options=opts)
        return list(map(func, results))
    with distributed_queue.DistributedQueue(opts.amqp, ['rhel', 'public']) as q:
        func = lambda x: queue_test(*x, channel=q.channel, options=opts)
        return list(map(func, results))

def scan_external_projects(opts):
    tests = []
    for repo in testmap.projects():
        if repo != "cockpit-project/cockpit":
            tests += scan_for_pull_tasks(github.GitHub(repo=repo), testmap.tests_for_project(repo), opts, repo)
    return tests

if __name__ == '__main__':
    sys.exit(main())