blob: 54b09acf0a5a9c03279730cdbcd5d5ed32fc9d12 [file] [log] [blame]
# Copyright 2019 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Recipe for running zbi tests."""
import copy
import os
from recipe_engine.config import Enum, List
from recipe_engine.post_process import StatusSuccess, StatusFailure
from recipe_engine.recipe_api import Property
DEPS = [
'fuchsia/build',
'fuchsia/checkout',
'fuchsia/emu',
'fuchsia/status_check',
'fuchsia/swarming_retry',
'fuchsia/symbolize',
'fuchsia/testing',
'fuchsia/testing_requests',
'recipe_engine/buildbucket',
'recipe_engine/cipd',
'recipe_engine/context',
'recipe_engine/file',
'recipe_engine/isolated',
'recipe_engine/json',
'recipe_engine/path',
'recipe_engine/platform',
'recipe_engine/properties',
'recipe_engine/step',
'recipe_engine/swarming',
]
TARGETS = ['x64', 'arm64']
# How long to wait (in seconds) before killing the test swarming task if there's
# no output being produced.
TEST_IO_TIMEOUT_SECS = 180
# How long a pending test swarming task waits to be scheduled on a bot.
# We should never expire a test task. This is currently 5 hours, but
# should be treated as infinite.
TEST_EXPIRATION_TIMEOUT_SECS = 18000
# How long the test is allowed to run before swarming kills it.
TEST_EXECUTION_TIMEOUT_SECS = 600
BOTANIST_DEVICE_CONFIG = '/etc/botanist/config.json'
CATALYST_DEVICE_CONFIG = '/etc/catalyst/config.json'
IMAGES_JSON = 'images.json'
QEMU_KERNEL_NAME = 'qemu-kernel'
# The log level to use for botanist invocations in test tasks. Can be one of
# "fatal", "error", "warning", "info", "debug", or "trace", where "trace" is
# the most verbose, and fatal is the least.
BOTANIST_LOG_LEVEL = 'debug'
# The version of catalyst to use.
CATALYST_CIPD_REVISION = "git_revision:12ba48e4eea05053e01e834502042a8c710bdc99"
PROPERTIES = {
'manifest':
Property(kind=str, help='Jiri manifest to use'),
'remote':
Property(kind=str, help='Remote manifest repository'),
'target_cpu':
Property(kind=Enum(*TARGETS), help='Target to build'),
'variants':
Property(kind=List(basestring), help='Variants to use'),
'allowed_device_types':
Property(kind=List(basestring), help='Allowed device types to run on'),
}
def prebuilt_path(api, checkout_root, *path):
"""Returns the Path to the host-platform subdir under the given subdirs."""
path = list(path)
path.append('{os}-{arch}'.format(
os=api.platform.name,
arch={'intel': 'x64'}[api.platform.arch],
))
return checkout_root.join('prebuilt', *path)
def create_task(api, **kwargs):
"""Create a Task object.
The base class of the class is inside the api object, so it can't be
top-level or otherwise defined at module load time. Defining it in this
function as an alternative.
For full args list see Task.__init__ a few lines down.
"""
class Task(api.swarming_retry.TriggeredTask):
def __init__(self, api, name, zbi_test, checkout_root, gn_results,
target_cpu, device_type, **kwargs):
super(Task, self).__init__(api=api, name=name, request=None, **kwargs)
self._checkout_root = checkout_root
self._gn_results = gn_results
self._target_cpu = target_cpu
self._zbi_test = copy.deepcopy(zbi_test)
self._device_type = device_type
if api.emu.is_emulator_type(device_type):
self._create_emu_request()
else:
self._create_device_request()
def _create_emu_request(self):
isolate_tree = api.file.symlink_tree(
root=self._api.path.mkdtemp('isolate'))
isolate_tree.register_link(
target=self._gn_results.fuchsia_build_dir.join(
self._zbi_test['path']),
linkname=isolate_tree.root.join(
os.path.basename(self._zbi_test['path'])))
qemu_kernel = copy.deepcopy([
image for image in self._gn_results.image_manifest
if image['name'] == QEMU_KERNEL_NAME
][0])
qemu_kernel_basename = api.path.basename(qemu_kernel['path'])
isolate_tree.register_link(
target=self._gn_results.fuchsia_build_dir.join(qemu_kernel['path']),
linkname=isolate_tree.root.join(qemu_kernel_basename))
qemu_kernel['path'] = qemu_kernel_basename
isolate_tree.register_link(
target=self._gn_results.tool('botanist', self._target_cpu),
linkname=isolate_tree.root.join('botanist'),
)
isolate_tree.create_links('create tree of images')
# TODO(IN-1420) botanist expects test to be named 'zircon-a'
self._zbi_test['name'] = 'zircon-a'
self._zbi_test['path'] = os.path.basename(self._zbi_test['path'])
image_manifest_path = isolate_tree.root.join(IMAGES_JSON)
image_manifest = [self._zbi_test, qemu_kernel]
self._api.file.write_json(
'write image manifest', image_manifest_path, image_manifest, indent=2)
isolated = self._api.isolated.isolated(isolate_tree.root)
isolated.add_dir(isolate_tree.root)
isolated_hash = isolated.archive('isolate images')
ensure_file = self._api.cipd.EnsureFile()
if self._device_type == 'QEMU':
self._api.emu.add_qemu_to_ensure_file(ensure_file, subdir='qemu')
dimensions = {
'pool': 'fuchsia.tests',
'os': 'Debian',
'cpu': self._zbi_test['cpu'],
'kvm': '1',
}
cmd = [
'./botanist',
'-level', BOTANIST_LOG_LEVEL,
'qemu',
'-type', '%s' % self._device_type.lower(),
'-qemu-dir', './%s/bin' % self._device_type.lower(),
'-images', IMAGES_JSON,
'-arch', self._zbi_test['cpu'],
'-use-kvm',
] # yapf: disable
env_name = '%s-%s' % (self._device_type, self._zbi_test['cpu'])
tags = {'test_environment_name': [env_name]}
request = self._api.swarming.task_request().with_name(
self.name).with_tags(tags)
self._request = request.with_slice(0, request[0]
.with_command(cmd)
.with_isolated(isolated_hash)
.with_dimensions(**dimensions)
.with_execution_timeout_secs(TEST_EXECUTION_TIMEOUT_SECS)
.with_expiration_secs(TEST_EXPIRATION_TIMEOUT_SECS)
.with_io_timeout_secs(TEST_IO_TIMEOUT_SECS)
.with_cipd_ensure_file(ensure_file)
) # yapf: disable
def _create_device_request(self):
isolate_tree = api.file.symlink_tree(
root=self._api.path.mkdtemp('isolate'))
isolate_tree.register_link(
target=self._gn_results.fuchsia_build_dir.join(
self._zbi_test['path']),
linkname=isolate_tree.root.join(
os.path.basename(self._zbi_test['path'])))
# We isolate the ZBI to the root of the directory to be isolated.
self._zbi_test['path'] = os.path.basename(self._zbi_test['path'])
for tool in ('botanist', 'seriallistener', 'bootserver_new'):
isolate_tree.register_link(
target=self._gn_results.tool(tool, 'x64'),
linkname=isolate_tree.root.join(tool),
)
image_manifest_path = isolate_tree.root.join(IMAGES_JSON)
image_manifest = [self._zbi_test]
self._api.file.write_json(
'write image manifest', image_manifest_path, image_manifest, indent=2)
ensure_file = self._api.cipd.EnsureFile()
config = BOTANIST_DEVICE_CONFIG
dimensions = {
'pool': 'fuchsia.tests',
'device_type': self._device_type,
'serial': '1',
}
ensure_file.add_package('fuchsia/infra/catalyst/${platform}',
CATALYST_CIPD_REVISION)
cmd = [
'./catalyst',
'-images', IMAGES_JSON,
'-bootserver', './bootserver_new',
'-config', CATALYST_DEVICE_CONFIG,
] # yapf: disable
# Construct the botanist command
cmd.extend([
'./botanist',
'-level', BOTANIST_LOG_LEVEL,
'run',
'-images', IMAGES_JSON,
'-serial-log', self._api.testing_requests.SERIAL_LOG_NAME,
'-config', config,
'-netboot',
'./seriallistener',
'-timeout', '5m',
'-stdout=true'
'-success-str', self._zbi_test['success_string'],
]) # yapf: disable
isolate_tree.create_links('create tree of images')
isolated = self._api.isolated.isolated(isolate_tree.root)
isolated.add_dir(isolate_tree.root)
isolated_hash = isolated.archive('isolate images')
outputs = [self._api.testing_requests.SERIAL_LOG_NAME]
env_name = '%s-%s' % (self._device_type, self._zbi_test['cpu'])
tags = {'test_environment_name': [env_name]}
request = api.swarming.task_request().with_name(self.name).with_tags(tags)
self._request = request.with_slice(0, request[0].
with_command(cmd).
with_isolated(isolated_hash).
with_dimensions(**dimensions).
with_execution_timeout_secs(TEST_EXECUTION_TIMEOUT_SECS).
with_expiration_secs(TEST_EXPIRATION_TIMEOUT_SECS).
with_io_timeout_secs(TEST_IO_TIMEOUT_SECS).
with_cipd_ensure_file(ensure_file).
with_outputs(outputs)
) # yapf: disable
def process_result(self):
attempt = self.attempts[-1]
assert attempt.result
result = attempt.result
symbolize_tool = self._gn_results.tool('symbolize')
clang_dir = prebuilt_path(self._api, self._checkout_root, 'third_party',
'clang')
llvm_symbolizer = self._gn_results.tool('llvm-symbolizer')
build_id_dirs = (
self._gn_results.zircon_build_dir.join('.build-id'),
clang_dir.join('lib', 'debug', '.build-id'),
)
with self._api.step.nest(result.name) as presentation:
attempt.logs['symbolized log'] = self._api.symbolize(
symbolize_tool=symbolize_tool,
build_id_dirs=build_id_dirs,
llvm_symbolizer=llvm_symbolizer,
data=result.output,
presentation=presentation)
# A kernel panic may be present in the logs even if the task timed
# out, so check for that first.
if 'KERNEL PANIC' in result.output:
attempt.failure_reason = 'kernel panic'
# Because of the way these tests run (they are the only user-mode
# process in the system, and then the system shuts down) we can't
# collect an exit code or nicely structured output, so we have to
# search the output for a hard-coded string to detect success.
is_emu_type = self._api.emu.is_emulator_type(self._device_type)
if is_emu_type and self._zbi_test['success_string'] not in result.output:
attempt.failure_reason = 'failed to find success string'
def present_attempt(self, task_step, attempt, **kwargs):
del task_step, kwargs # Unused.
name = '%s (%s)' % (attempt.name, 'pass' if attempt.success else 'fail')
step = api.step(name, None)
step.presentation.step_summary_text = attempt.failure_reason
step.presentation.links['task UI'] = attempt.task_ui_link
for log, data in attempt.logs.iteritems():
step.presentation.logs[log] = data
return Task(api=api, **kwargs)
def RunSteps(api, manifest, remote, target_cpu, variants, allowed_device_types):
"""Builds and executes Zircon tests in QEMU on a different machine."""
with api.context(infra_steps=True):
assert manifest
assert remote
checkout = api.checkout.fuchsia_with_options(
path=api.path['start_dir'].join('fuchsia'),
build=api.buildbucket.build,
manifest=manifest,
remote=remote,
)
with api.step.nest('build'):
gn_results = api.build.gen(
checkout_root=checkout.root_dir,
fuchsia_build_dir=checkout.root_dir.join('out', 'default'),
target=target_cpu,
build_type='debug',
product='products/bringup.gni',
variants=variants,
# This forces GN to enter //garnet/tests/zircon/BUILD.gn, which
# is necessary in order to generate the zbi test build API.
# //bundles:infratools is necessary to build botanist.
packages=['//garnet/tests/zircon:zbi_tests', '//bundles:infratools'],
)
api.build.ninja(
gn_results=gn_results,
targets=['bundles:infratools'],
build_zbi_tests=True,
build_images=True,
image_filter=lambda image: image['name'] == QEMU_KERNEL_NAME,
)
tasks = []
for name, zbi_test in gn_results.zbi_tests.iteritems():
allowed = set(allowed_device_types)
specified = set(zbi_test.get('device_types', ['QEMU']))
device_types = allowed.intersection(specified)
for device_type in device_types:
task_name = '%s - %s' % (name, device_type)
with api.step.nest('prepare test: %s' % task_name):
tasks.append(
create_task(
api,
name=task_name,
zbi_test=zbi_test,
checkout_root=checkout.root_dir,
gn_results=gn_results,
target_cpu=target_cpu,
device_type=device_type,
))
output_dir = api.path.mkdtemp('swarming')
with api.swarming_retry.retry(tasks) as retry:
retry.run_tasks(collect_output_dir=output_dir)
retry.present_tasks()
retry.raise_failures()
def GenTests(api):
def test(name, zbi_test, status, output):
device_types = zbi_test.get('device_types', ['QEMU'])
test = api.status_check.test(name, status=status)
test += api.buildbucket.ci_build(
git_repo='https://fuchsia.googlesource.com/fuchsia')
test += api.properties(
manifest='manifest',
remote='https://fuchsia.googlesource.com/fuchsia',
target_cpu=zbi_test['cpu'],
variants=['clang'],
allowed_device_types=device_types,
)
test += api.step_data('build.read zbi test manifest',
api.json.output([zbi_test]))
def get_task_data(task_name, device_type, task_id, output):
if device_type not in ['QEMU', 'AEMU'
] and zbi_test['success_string'] not in output:
return api.swarming_retry.failed_task(
task_name, task_id=task_id, output=output)
return api.swarming_retry.passed_task(
task_name, task_id=task_id, output=output)
task_id = 123
task_data = []
task_retry_data = []
failed_first_attempt = len(output) > 1
for device in device_types:
task_name = '%s - %s' % (zbi_test['name'], device)
test += api.swarming_retry.trigger_data(
name=task_name, task_id=task_id, iteration=0)
task_data.append(
get_task_data(task_name, device, task_id=task_id, output=output[0]))
if failed_first_attempt:
task_id += 1
test += api.swarming_retry.trigger_data(
name=task_name, task_id=task_id, iteration=1)
task_retry_data.append(
get_task_data(task_name, device, task_id=task_id, output=output[1]))
task_id += 1
test += api.swarming_retry.collect_data(task_data, iteration=0)
if failed_first_attempt:
test += api.swarming_retry.collect_data(task_retry_data, iteration=1)
return test
for cpu in ['arm64', 'x64']:
zbi_test = api.build.mock_zbi_test(cpu)
success_string = zbi_test['success_string']
yield (
test(cpu + '-pass', zbi_test, 'success', [success_string]) +
api.post_process(StatusSuccess)
) # yapf: disable
yield (
test(cpu + '-fail', zbi_test, 'failure', ['not success', 'not success']) +
api.post_process(StatusFailure)
) # yapf: disable
yield (
test(cpu + '-flake', zbi_test, 'success', ['not success', success_string]) +
api.post_process(StatusSuccess)
) # yapf: disable
# Including success string and "KERNEL PANIC" in output to clarify
# that this fails because of the kernel panic and not because the
# success string is not present.
output = zbi_test['success_string'] + 'KERNEL PANIC'
yield (
test(cpu + 'kernel_panic', zbi_test, 'failure', [output, output]) +
api.post_process(StatusFailure)
) # yapf: disable