Add timeout support to ansible-test. (#53302)

* Add timeout support to ansible-test.
* Fix ansible-test tar filename filter bug.
* Update timeouts used on Shippable.
* Kill subprocesses when parent process terminates.
* Require explicit use of env --show option.
This commit is contained in:
Matt Clay 2019-03-05 11:58:13 -08:00 committed by GitHub
parent 44b347aef5
commit a8d829d9c3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 218 additions and 20 deletions

1
.gitignore vendored
View File

@ -105,3 +105,4 @@ test/units/.coverage.*
/test/integration/cloud-config-azure.yml /test/integration/cloud-config-azure.yml
/SYMLINK_CACHE.json /SYMLINK_CACHE.json
changelogs/.plugin-cache.yaml changelogs/.plugin-cache.yaml
.ansible-test-timeout.json

View File

@ -52,6 +52,7 @@ from lib.config import (
from lib.env import ( from lib.env import (
EnvConfig, EnvConfig,
command_env, command_env,
configure_timeout,
) )
from lib.sanity import ( from lib.sanity import (
@ -95,6 +96,7 @@ def main():
display.color = config.color display.color = config.color
display.info_stderr = (isinstance(config, SanityConfig) and config.lint) or (isinstance(config, IntegrationConfig) and config.list_targets) display.info_stderr = (isinstance(config, SanityConfig) and config.lint) or (isinstance(config, IntegrationConfig) and config.list_targets)
check_startup() check_startup()
configure_timeout(config)
display.info('RLIMIT_NOFILE: %s' % (CURRENT_RLIMIT_NOFILE,), verbosity=2) display.info('RLIMIT_NOFILE: %s' % (CURRENT_RLIMIT_NOFILE,), verbosity=2)
display.info('MAXFD: %d' % MAXFD, verbosity=2) display.info('MAXFD: %d' % MAXFD, verbosity=2)
@ -509,6 +511,11 @@ def parse_args():
action='store_true', action='store_true',
help='dump environment to disk') help='dump environment to disk')
env.add_argument('--timeout',
type=int,
metavar='MINUTES',
help='timeout for future ansible-test commands (0 clears)')
if argcomplete: if argcomplete:
argcomplete.autocomplete(parser, always_complete_options=False, validator=lambda i, k: True) argcomplete.autocomplete(parser, always_complete_options=False, validator=lambda i, k: True)

View File

@ -3,3 +3,6 @@
# Setting a low soft RLIMIT_NOFILE value will improve the performance of subprocess.Popen on Python 2.x when close_fds=True. # Setting a low soft RLIMIT_NOFILE value will improve the performance of subprocess.Popen on Python 2.x when close_fds=True.
# This will affect all Python subprocesses. It will also affect the current Python process if set before subprocess is imported for the first time. # This will affect all Python subprocesses. It will also affect the current Python process if set before subprocess is imported for the first time.
SOFT_RLIMIT_NOFILE = 1024 SOFT_RLIMIT_NOFILE = 1024
# File used to track the ansible-test test execution timeout.
TIMEOUT_PATH = '.ansible-test-timeout.json'

View File

@ -4,13 +4,17 @@ from __future__ import absolute_import, print_function
import datetime import datetime
import json import json
import functools
import os import os
import platform import platform
import re import re
import signal
import sys import sys
import time
from lib.config import ( from lib.config import (
CommonConfig, CommonConfig,
TestConfig,
) )
from lib.util import ( from lib.util import (
@ -34,6 +38,18 @@ from lib.docker_util import (
docker_version docker_version
) )
from lib.thread import (
WrappedThread,
)
from lib.constants import (
TIMEOUT_PATH,
)
from lib.test import (
TestTimeout,
)
class EnvConfig(CommonConfig): class EnvConfig(CommonConfig):
"""Configuration for the tools command.""" """Configuration for the tools command."""
@ -43,14 +59,26 @@ class EnvConfig(CommonConfig):
""" """
super(EnvConfig, self).__init__(args, 'env') super(EnvConfig, self).__init__(args, 'env')
self.show = args.show or not args.dump self.show = args.show
self.dump = args.dump self.dump = args.dump
self.timeout = args.timeout
def command_env(args): def command_env(args):
""" """
:type args: EnvConfig :type args: EnvConfig
""" """
show_dump_env(args)
set_timeout(args)
def show_dump_env(args):
"""
:type args: EnvConfig
"""
if not args.show and not args.dump:
return
data = dict( data = dict(
ansible=dict( ansible=dict(
version=get_ansible_version(args), version=get_ansible_version(args),
@ -84,6 +112,105 @@ def command_env(args):
results_fd.write(json.dumps(data, sort_keys=True)) results_fd.write(json.dumps(data, sort_keys=True))
def set_timeout(args):
"""
:type args: EnvConfig
"""
if args.timeout is None:
return
if args.timeout:
deadline = (datetime.datetime.utcnow() + datetime.timedelta(minutes=args.timeout)).strftime('%Y-%m-%dT%H:%M:%SZ')
display.info('Setting a %d minute test timeout which will end at: %s' % (args.timeout, deadline), verbosity=1)
else:
deadline = None
display.info('Clearing existing test timeout.', verbosity=1)
if args.explain:
return
if deadline:
data = dict(
duration=args.timeout,
deadline=deadline,
)
with open(TIMEOUT_PATH, 'w') as timeout_fd:
json.dump(data, timeout_fd, indent=4, sort_keys=True)
elif os.path.exists(TIMEOUT_PATH):
os.remove(TIMEOUT_PATH)
def get_timeout():
"""
:rtype: dict[str, any] | None
"""
if not os.path.exists(TIMEOUT_PATH):
return None
with open(TIMEOUT_PATH, 'r') as timeout_fd:
data = json.load(timeout_fd)
data['deadline'] = datetime.datetime.strptime(data['deadline'], '%Y-%m-%dT%H:%M:%SZ')
return data
def configure_timeout(args):
"""
:type args: CommonConfig
"""
if isinstance(args, TestConfig):
configure_test_timeout(args) # only tests are subject to the timeout
def configure_test_timeout(args):
"""
:type args: TestConfig
"""
timeout = get_timeout()
if not timeout:
return
timeout_start = datetime.datetime.utcnow()
timeout_duration = timeout['duration']
timeout_deadline = timeout['deadline']
timeout_remaining = timeout_deadline - timeout_start
test_timeout = TestTimeout(timeout_duration)
if timeout_remaining <= datetime.timedelta():
test_timeout.write(args)
raise ApplicationError('The %d minute test timeout expired %s ago at %s.' % (
timeout_duration, timeout_remaining * -1, timeout_deadline))
display.info('The %d minute test timeout expires in %s at %s.' % (
timeout_duration, timeout_remaining, timeout_deadline), verbosity=1)
def timeout_handler(_dummy1, _dummy2):
"""Runs when SIGUSR1 is received."""
test_timeout.write(args)
raise ApplicationError('Tests aborted after exceeding the %d minute time limit.' % timeout_duration)
def timeout_waiter(timeout_seconds):
"""
:type timeout_seconds: int
"""
time.sleep(timeout_seconds)
os.kill(os.getpid(), signal.SIGUSR1)
signal.signal(signal.SIGUSR1, timeout_handler)
instance = WrappedThread(functools.partial(timeout_waiter, timeout_remaining.seconds))
instance.daemon = True
instance.start()
def show_dict(data, verbose, root_verbosity=0, path=None): def show_dict(data, verbose, root_verbosity=0, path=None):
""" """
:type data: dict[str, any] :type data: dict[str, any]

View File

@ -11,6 +11,10 @@ from lib.util import (
ABC, ABC,
) )
from lib.constants import (
TIMEOUT_PATH,
)
# improve performance by disabling uid/gid lookups # improve performance by disabling uid/gid lookups
tarfile.pwd = None tarfile.pwd = None
tarfile.grp = None tarfile.grp = None
@ -45,6 +49,7 @@ class DefaultTarFilter(TarFilter):
self.ignore_files = ( self.ignore_files = (
'.gitignore', '.gitignore',
'.gitdir', '.gitdir',
TIMEOUT_PATH,
) )
self.ignore_extensions = ( self.ignore_extensions = (
@ -58,7 +63,7 @@ class DefaultTarFilter(TarFilter):
:rtype: tarfile.TarInfo | None :rtype: tarfile.TarInfo | None
""" """
filename = os.path.basename(item.path) filename = os.path.basename(item.path)
name, ext = os.path.splitext(filename) ext = os.path.splitext(filename)[1]
dirs = os.path.split(item.path) dirs = os.path.split(item.path)
if not item.isdir(): if not item.isdir():
@ -68,7 +73,7 @@ class DefaultTarFilter(TarFilter):
if item.path.startswith('./docs/docsite/_build/'): if item.path.startswith('./docs/docsite/_build/'):
return None return None
if name in self.ignore_files: if filename in self.ignore_files:
return None return None
if ext in self.ignore_extensions: if ext in self.ignore_extensions:

View File

@ -57,15 +57,17 @@ def calculate_confidence(path, line, metadata):
class TestResult(object): class TestResult(object):
"""Base class for test results.""" """Base class for test results."""
def __init__(self, command, test, python_version=None): def __init__(self, command, test, python_version=None, enable_junit=False):
""" """
:type command: str :type command: str
:type test: str :type test: str
:type python_version: str :type python_version: str
:type enable_junit: bool
""" """
self.command = command self.command = command
self.test = test self.test = test
self.python_version = python_version self.python_version = python_version
self.enable_junit = enable_junit
self.name = self.test or self.command self.name = self.test or self.command
if self.python_version: if self.python_version:
@ -88,7 +90,7 @@ class TestResult(object):
if args.lint: if args.lint:
self.write_lint() self.write_lint()
if args.junit: if args.junit or self.enable_junit:
if self.junit: if self.junit:
self.write_junit(args) self.write_junit(args)
else: else:
@ -159,6 +161,38 @@ class TestResult(object):
xml.write(report.encode('utf-8', 'strict')) xml.write(report.encode('utf-8', 'strict'))
class TestTimeout(TestResult):
"""Test timeout."""
def __init__(self, timeout_duration):
"""
:type timeout_duration: int
"""
super(TestTimeout, self).__init__(command='timeout', test='', enable_junit=True)
self.timeout_duration = timeout_duration
def write_junit(self, args):
"""
:type args: TestConfig
"""
message = 'Tests were aborted after exceeding the %d minute time limit.' % self.timeout_duration
output = '''One or more of the following situations may be responsible:
- Code changes have resulted in tests that hang or run for an excessive amount of time.
- Tests have been added which exceed the time limit when combined with existing tests.
- Test infrastructure and/or external dependencies are operating slower than normal.'''
if args.coverage:
output += '\n- Additional overhead from collecting code coverage has resulted in tests exceeding the time limit.'
output += '\n\nConsult the console log for additional details on where the timeout occurred.'
test_case = self.junit.TestCase(classname=self.command, name=self.name)
test_case.add_error_info(message, output)
self.save_junit(args, test_case)
class TestSuccess(TestResult): class TestSuccess(TestResult):
"""Test success.""" """Test success."""
def write_junit(self, args): def write_junit(self, args):

View File

@ -380,23 +380,30 @@ def raw_command(cmd, capture=False, env=None, data=None, cwd=None, explain=False
stderr = None stderr = None
start = time.time() start = time.time()
process = None
try: try:
process = subprocess.Popen(cmd, env=env, stdin=stdin, stdout=stdout, stderr=stderr, cwd=cwd) try:
except OSError as ex: process = subprocess.Popen(cmd, env=env, stdin=stdin, stdout=stdout, stderr=stderr, cwd=cwd)
if ex.errno == errno.ENOENT: except OSError as ex:
raise ApplicationError('Required program "%s" not found.' % cmd[0]) if ex.errno == errno.ENOENT:
raise raise ApplicationError('Required program "%s" not found.' % cmd[0])
raise
if communicate: if communicate:
encoding = 'utf-8' encoding = 'utf-8'
data_bytes = data.encode(encoding, 'surrogateescape') if data else None data_bytes = data.encode(encoding, 'surrogateescape') if data else None
stdout_bytes, stderr_bytes = process.communicate(data_bytes) stdout_bytes, stderr_bytes = process.communicate(data_bytes)
stdout_text = stdout_bytes.decode(encoding, str_errors) if stdout_bytes else u'' stdout_text = stdout_bytes.decode(encoding, str_errors) if stdout_bytes else u''
stderr_text = stderr_bytes.decode(encoding, str_errors) if stderr_bytes else u'' stderr_text = stderr_bytes.decode(encoding, str_errors) if stderr_bytes else u''
else: else:
process.wait() process.wait()
stdout_text, stderr_text = None, None stdout_text, stderr_text = None, None
finally:
if process and process.returncode is None:
process.kill()
display.info('') # the process we're interrupting may have completed a partial line of output
display.notice('Killed command to avoid an orphaned child process during handling of an unexpected exception.')
status = process.returncode status = process.returncode
runtime = time.time() - start runtime = time.time() - start

View File

@ -116,6 +116,12 @@ function cleanup
trap cleanup EXIT trap cleanup EXIT
ansible-test env --dump --show --color -v if [[ "${COVERAGE:-}" ]]; then
timeout=60
else
timeout=45
fi
ansible-test env --dump --show --timeout "${timeout}" --color -v
"test/utils/shippable/${script}.sh" "${test}" "test/utils/shippable/${script}.sh" "${test}"

View File

@ -7,5 +7,13 @@ IFS='/:' read -ra args <<< "$1"
version="${args[1]}" version="${args[1]}"
if [[ "${COVERAGE:-}" ]]; then
timeout=90
else
timeout=10
fi
ansible-test env --timeout "${timeout}" --color -v
# shellcheck disable=SC2086 # shellcheck disable=SC2086
ansible-test units --color -v --docker default --python "${version}" ${COVERAGE:+"$COVERAGE"} ${CHANGED:+"$CHANGED"} \ ansible-test units --color -v --docker default --python "${version}" ${COVERAGE:+"$COVERAGE"} ${CHANGED:+"$CHANGED"} \