blob: df30b75015e76cbd0e384ecddd89d9613a1d615d [file] [log] [blame]
# Copyright 2019 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Recipe for running zbi tests."""
import copy
import re
from recipe_engine.config import Enum, List
from recipe_engine.post_process import StatusSuccess, StatusFailure
from recipe_engine.recipe_api import Property
from RECIPE_MODULES.fuchsia.swarming_retry import api as swarming_retry_api
from RECIPE_MODULES.fuchsia.testing import api as testing_api
from RECIPE_MODULES.fuchsia.testing_requests import api as testing_requests_api
DEPS = [
"fuchsia/build",
"fuchsia/checkout",
"fuchsia/emu",
"fuchsia/recipe_testing",
"fuchsia/status_check",
"fuchsia/swarming_retry",
"fuchsia/symbolize",
"fuchsia/testing_requests",
"recipe_engine/buildbucket",
"recipe_engine/cipd",
"recipe_engine/context",
"recipe_engine/file",
"recipe_engine/isolated",
"recipe_engine/json",
"recipe_engine/path",
"recipe_engine/platform",
"recipe_engine/properties",
"recipe_engine/step",
"recipe_engine/swarming",
]
TARGETS = ["x64", "arm64"]
# How long to wait (in seconds) before killing the test swarming task if there's
# no output being produced.
TEST_IO_TIMEOUT_SECS = 180
# How long a pending test swarming task waits to be scheduled on a bot.
# We should never expire a test task. This is currently 5 hours, but
# should be treated as infinite.
TEST_EXPIRATION_TIMEOUT_SECS = 18000
# How long the test is allowed to run before swarming kills it.
TEST_EXECUTION_TIMEOUT_SECS = 600
BOTANIST_DEVICE_CONFIG = "/etc/botanist/config.json"
IMAGES_JSON = "images.json"
QEMU_KERNEL_NAME = "qemu-kernel"
# The log level to use for botanist invocations in test tasks. Can be one of
# "fatal", "error", "warning", "info", "debug", or "trace", where "trace" is
# the most verbose, and fatal is the least.
BOTANIST_LOG_LEVEL = "debug"
SPECIAL_FAILURE_LOG_STRINGS = (
"ZIRCON KERNEL PANIC",
"ZIRCON KERNEL OOPS",
"DEVICE SUSPEND TIMED OUT",
"ASSERT FAILED",
)
SPECIAL_FAILURE_LOG_STRINGS_POSSIBLY_FOUND_IN_TESTS = (
"DEVICE SUSPEND TIMED OUT",
"ASSERT FAILED",
)
SPECIAL_FAILURE_LOG_PATTERNS = (re.compile(r"ERROR: [A-Za-z]+Sanitizer"),)
# These files may contain the output of tests, and thus
# SPECIAL_FAILURE_LOG_STRINGS_POSSIBLY_FOUND_IN_TESTS applies.
LOG_FILES_WITH_TEST_OUTPUT = (
testing_api.TEST_TASK_OUTPUT_FILE,
testing_requests_api.SERIAL_LOG_NAME,
)
PROPERTIES = {
"manifest": Property(kind=str, help="Jiri manifest to use"),
"remote": Property(kind=str, help="Remote manifest repository"),
"target_cpu": Property(kind=Enum(*TARGETS), help="Target to build"),
"variants": Property(kind=List(basestring), help="Variants to use"),
"allowed_device_types": Property(
kind=List(basestring), help="Allowed device types to run on"
),
"test_pool": Property(
kind=str,
help="The swarming pool to run test tasks in",
default="fuchsia.tests",
),
"gn_args": Property(
kind=List(basestring), help="GN args to pass to build", default=[]
),
"use_goma": Property(
kind=bool, help="Whether to use goma for the build", default=True
),
}
def prebuilt_path(api, checkout_root, *path):
"""Returns the Path to the host-platform subdir under the given subdirs."""
path = list(path)
path.append(
"{os}-{arch}".format(
os=api.platform.name, arch={"intel": "x64"}[api.platform.arch],
)
)
return checkout_root.join("prebuilt", *path)
def match_special_failure(output, test_output=False):
"""Check the given string for known special failure patterns.
Args:
output (str): the output to search.
test_output (bool): whether the output is that from a test, in which case
specific failure modes are excluded, as they may result from an
intentional assertion or death test case.
Returns:
None or a string that can be used in `failure_reason`.
"""
failure_strings = SPECIAL_FAILURE_LOG_STRINGS
if test_output:
failure_strings = filter(
lambda s: s not in SPECIAL_FAILURE_LOG_STRINGS_POSSIBLY_FOUND_IN_TESTS,
failure_strings,
)
for s in failure_strings:
if s in output:
return s
for regex in SPECIAL_FAILURE_LOG_PATTERNS: # pragma: nocover
match = regex.search(output)
if match:
return match.group(0)
return None
class Task(swarming_retry_api.TriggeredTask):
# TODO(fxbug.dev/50072) The task request construction logic in this method
# should be abstracted into a helper in api.testing_requests so, for example,
# the QEMU configuration here does not diverge from the QEMU configurations
# elsewhere.
def __init__(
self,
api,
name,
build_results,
zbi_test,
qemu_kernel,
zedboot_images,
device_type,
test_pool,
**kwargs
):
self._request = api.swarming.task_request().with_name(name)
super(Task, self).__init__(self._request, api, **kwargs)
self._checkout = build_results.checkout
self._gn_results = build_results.gn_results
is_emu_type = api.emu.is_emulator_type(device_type)
images = [zbi_test]
if is_emu_type:
images.append(qemu_kernel)
# Hack. Botanist's QEMU codepath currently expects to run something with
# a name of "zircon-a" and a type of "zbi".
zbi_test["name"] = "zircon-a"
zbi_test["type"] = "zbi"
images.extend(zedboot_images)
isolate_tree = api.file.symlink_tree(root=api.path.mkdtemp("isolate"))
# The follow images might be coming from zircon build and thus be given at
# relative paths that leave the fuchsia build directory. This will not make
# sense when we translate these paths over to the test task, so we link
# these entries into the root of isolate tree to ensure that these paths
# remain sensible in any case.
for img in images:
new_path = api.path.basename(img["path"])
isolate_tree.register_link(
target=self._gn_results.fuchsia_build_dir.join(img["path"]),
linkname=isolate_tree.root.join(new_path),
)
img["path"] = new_path
build_results.images = {image["name"]: image for image in images}
# We emulate arm64 guests on arm64 hosts; in every other case we use an x64
# host in the test task.
host_cpu = "x64"
if is_emu_type and build_results.target == "arm64":
host_cpu = "arm64"
for tool in ("bootserver_new", "botanist", "seriallistener"):
isolate_tree.register_link(
target=self._gn_results.tool(tool, host_cpu),
linkname=isolate_tree.root.join(tool),
)
image_manifest_path = isolate_tree.root.join(IMAGES_JSON)
api.file.write_json(
"write image manifest", image_manifest_path, images, indent=2
)
ensure_file = api.cipd.EnsureFile()
if device_type == "QEMU":
api.emu.add_qemu_to_ensure_file(
ensure_file, checkout=self._checkout.root_dir, subdir="qemu"
)
if device_type == "AEMU":
api.emu.add_aemu_to_ensure_file(
ensure_file, checkout=self._checkout.root_dir, subdir="aemu/bin"
)
if is_emu_type:
dimensions = {
"pool": test_pool,
"os": "Debian",
"cpu": zbi_test["cpu"],
"kvm": "1",
}
config = "./qemu.json"
qemu_config = [
{
"type": device_type.lower(),
"path": "./%s/bin" % device_type.lower(),
"target": zbi_test["cpu"],
"cpu": 4,
"memory": 4096,
"kvm": True,
"serial": True,
}
]
api.file.write_json(
"write qemu config",
isolate_tree.root.join("qemu.json"),
qemu_config,
indent=2,
)
else:
config = BOTANIST_DEVICE_CONFIG
dimensions = {
"pool": test_pool,
"device_type": device_type,
"serial": "1",
}
cmd = [
"./botanist",
"-level",
BOTANIST_LOG_LEVEL,
"run",
"-images",
IMAGES_JSON,
"-serial-log",
api.testing_requests.SERIAL_LOG_NAME,
"-config",
config,
"-netboot",
# botanist will run the following as a subprocess. seriallistener
# is responsible for reading in the serial output that botanist
# forwards to a socket; it will exit(0) if it sees it or else it
# will time out and fail.
"./seriallistener",
"-timeout",
"%ss" % zbi_test.get("timeout", TEST_EXECUTION_TIMEOUT_SECS),
# If of emulator_type, then we are already redirecting serial to
# stdout. Do not double redirect.
"-stdout=%s" % (not is_emu_type),
"-success-str",
zbi_test["success_string"],
]
isolate_tree.create_links("create tree of images")
isolated = api.isolated.isolated(isolate_tree.root)
isolated.add_dir(isolate_tree.root)
isolated_hash = isolated.archive("isolate images")
outputs = [api.testing_requests.SERIAL_LOG_NAME]
env_name = "%s-%s" % (device_type, zbi_test["cpu"])
tags = {
"board": build_results.target,
"build_type": build_results.build_type,
"buildbucket_bucket": api.buildbucket.build.builder.bucket,
"buildbucket_builder": api.buildbucket.build.builder.builder,
"product": build_results.product,
"role": "tester",
"task_name": self.name,
api.testing_requests.TEST_ENVIRONMENT_TAG_NAME: env_name,
"variants": build_results.variants,
}
self._request = self._request.with_tags(
api.testing_requests.create_swarming_tags(tags)
)
self._request = self._request.with_slice(
0,
self._request[0]
.with_command(cmd)
.with_isolated(isolated_hash)
.with_dimensions(**dimensions)
.with_execution_timeout_secs(TEST_EXECUTION_TIMEOUT_SECS)
.with_expiration_secs(TEST_EXPIRATION_TIMEOUT_SECS)
.with_io_timeout_secs(TEST_IO_TIMEOUT_SECS)
.with_cipd_ensure_file(ensure_file)
.with_outputs(outputs)
.with_env_vars(
**api.testing_requests.test_task_env_vars(
api.buildbucket.build,
device_type,
build_results,
image_manifest=IMAGES_JSON,
)
),
)
def process_result(self, attempt):
assert attempt.result
result = attempt.result
symbolize_tool = self._gn_results.tool("symbolize")
clang_dir = prebuilt_path(
self._api, self._checkout.root_dir, "third_party", "clang"
)
llvm_symbolizer = self._gn_results.tool("llvm-symbolizer")
build_id_dirs = (
self._gn_results.zircon_build_dir.join(".build-id"),
clang_dir.join("lib", "debug", ".build-id"),
)
with self._api.step.nest(result.name):
attempt.logs["symbolized log"] = self._api.symbolize(
symbolize_tool=symbolize_tool,
build_id_dirs=build_id_dirs,
llvm_symbolizer=llvm_symbolizer,
data=result.output,
)
# A kernel panic may be present in the logs even if the task timed
# out, so check for that first.
special_failure = match_special_failure(result.output, test_output=True)
if special_failure is not None:
attempt.failure_reason = special_failure
def present_attempt(self, task_step, attempt, **kwargs):
del task_step, kwargs # Unused.
name = "%s (%s)" % (attempt.name, "pass" if attempt.success else "fail")
step = self._api.step(name, None)
step.presentation.step_summary_text = attempt.failure_reason
step.presentation.links["task UI"] = attempt.task_ui_link
for log, data in attempt.logs.iteritems():
step.presentation.logs[log] = data
def get_qemu_kernel(api, zbi_test, gn_results):
is_eligible_img = lambda img: img["name"] == QEMU_KERNEL_NAME
# A ZBI test may specify a `qemu_kernel_label` to point to an override of
# the standard QEMU kernel.
if "qemu_kernel_label" in zbi_test:
is_eligible_img = lambda img: img.get("label") == zbi_test["qemu_kernel_label"]
options = list(filter(is_eligible_img, gn_results.image_manifest))
if len(options) != 1: # pragma: no cover
raise api.step.StepFailure(
"no QEMU kernel match found for %s. If 'qemu_kernel_label' is \
specified precisely one image with that label must exist; else, \
precisely one with the name of %s must exist"
% (zbi_test["name"], QEMU_KERNEL_NAME)
)
qemu_kernel = copy.deepcopy(options[0])
# We override the metadata as botanist explicitly looks for an
# image of name |QEMU_KERNEL_NAME| and type |kernel| to start a QEMU instance.
qemu_kernel["name"] = QEMU_KERNEL_NAME
qemu_kernel["type"] = "kernel"
return qemu_kernel
def zedboot_images(api, gn_results):
imgs = [
img
for img in gn_results.image_manifest
if img.get("bootserver_pave_zedboot", [])
]
if not imgs: # pragma: no cover
raise api.step.StepFailure("missing zedboot pave images")
return [copy.deepcopy(img) for img in imgs]
def RunSteps(
api,
manifest,
remote,
target_cpu,
variants,
allowed_device_types,
test_pool,
gn_args,
use_goma,
):
"""Builds and executes Zircon tests in QEMU on a different machine."""
with api.context(infra_steps=True):
assert manifest
assert remote
checkout = api.checkout.fuchsia_with_options(
path=api.path["start_dir"].join("fuchsia"),
build=api.buildbucket.build,
manifest=manifest,
remote=remote,
)
build_type = "debug"
product = "products/bringup.gni"
with api.step.nest("build"):
gn_results = api.build.gen(
checkout=checkout,
fuchsia_build_dir=checkout.root_dir.join("out", "default"),
target=target_cpu,
build_type=build_type,
product=product,
variants=variants,
# //bundles:infratools is necessary to build botanist.
packages=["//bundles:infratools"],
args=gn_args,
use_goma=use_goma,
)
# ZBI tests may specify another image to be run as the QEMU kernel.
# Ensure that it is built.
ninja_targets = set(["bundles:infratools"])
path_targets = set()
qemu_kernels = {}
for name, zbi_test in gn_results.zbi_tests.iteritems():
device_types = zbi_test.get("device_types", ["QEMU"])
qemu_kernel = None
if any(
api.emu.is_emulator_type(device_type) for device_type in device_types
):
qemu_kernel = get_qemu_kernel(api, zbi_test, gn_results)
path_targets.add(qemu_kernel["path"])
qemu_kernels[name] = qemu_kernel
# Ensure that zedboot image is built for flashing with fastboot prior to
# running task.
zedboot_imgs = zedboot_images(api, gn_results)
for img in zedboot_imgs:
path_targets.add(img["path"])
zircon_targets, fuchsia_targets = api.build.ninja(
gn_results=gn_results,
targets=ninja_targets,
path_targets=path_targets,
build_zbi_tests=True,
use_goma=use_goma,
)
build_results = api.build.build_results(
board=None,
target=target_cpu,
variants=variants,
build_type=build_type,
fuchsia_build_dir=gn_results.fuchsia_build_dir,
zircon_build_dir=gn_results.zircon_build_dir,
checkout=checkout,
product=product,
gn_results=gn_results,
fuchsia_targets=fuchsia_targets,
zircon_targets=zircon_targets,
)
_, no_work = build_results.calculate_affected_tests(api.buildbucket.build.input)
with api.step.nest("record affected_tests_no_work") as presentation:
presentation.properties["affected_tests_no_work"] = no_work
if no_work and not api.recipe_testing.enabled:
return
tasks = []
for name, zbi_test in gn_results.zbi_tests.iteritems():
if zbi_test.get("disabled", False):
continue # pragma: no cover
allowed = set(allowed_device_types)
specified = set(zbi_test.get("device_types", ["QEMU"]))
device_types = allowed.intersection(specified)
for device_type in device_types:
task_name = "%s - %s" % (name, device_type)
with api.step.nest("prepare test: %s" % task_name):
tasks.append(
Task(
api,
name=task_name,
build_results=build_results,
# Copy the image objects, as they are shared across different
# task requests and each may need to modify its object.
zbi_test=copy.deepcopy(zbi_test),
qemu_kernel=copy.deepcopy(qemu_kernels[name]),
zedboot_images=zedboot_imgs,
device_type=device_type,
test_pool=test_pool,
)
)
api.swarming_retry.run_and_present_tasks(
tasks, collect_output_dir=api.path.mkdtemp("swarming")
)
def GenTests(api):
def test(name, zbi_test, status, output):
device_types = zbi_test.get("device_types", ["QEMU"])
test = api.status_check.test(name, status=status)
test += api.buildbucket.ci_build(
git_repo="https://fuchsia.googlesource.com/fuchsia"
)
test += api.properties(
manifest="manifest",
remote="https://fuchsia.googlesource.com/fuchsia",
target_cpu=zbi_test["cpu"],
variants=["clang"],
allowed_device_types=device_types,
test_pool="fuchsia.tests",
)
test += api.step_data(
"build.read zbi test manifest", api.json.output([zbi_test])
)
def get_task_data(task_name, device_type, task_id, output):
if zbi_test["success_string"] not in output:
return api.swarming_retry.failed_task(
task_name, task_id=task_id, output=output
)
return api.swarming_retry.passed_task(
task_name, task_id=task_id, output=output
)
task_id = 123
task_data = []
task_retry_data = []
failed_first_attempt = len(output) > 1
for device in device_types:
task_name = "%s - %s" % (zbi_test["name"], device)
test += api.swarming_retry.trigger_data(
name=task_name, task_id=task_id, iteration=0
)
task_data.append(
get_task_data(task_name, device, task_id=task_id, output=output[0])
)
if failed_first_attempt:
task_id += 1
test += api.swarming_retry.trigger_data(
name=task_name, task_id=task_id, iteration=1
)
task_retry_data.append(
get_task_data(task_name, device, task_id=task_id, output=output[1])
)
task_id += 1
test += api.swarming_retry.collect_data(task_data, iteration=0)
if failed_first_attempt:
test += api.swarming_retry.collect_data(task_retry_data, iteration=1)
return test
for cpu in ["arm64", "x64"]:
zbi_test = api.build.mock_zbi_test(cpu, zircon=True)
success_string = zbi_test["success_string"]
yield (
test(cpu + "-pass", zbi_test, "success", [success_string])
+ api.post_process(StatusSuccess)
)
yield (
test(cpu + "-fail", zbi_test, "failure", ["not success", "not success"])
+ api.post_process(StatusFailure)
)
yield (
test(cpu + "-flake", zbi_test, "success", ["not success", success_string])
+ api.post_process(StatusSuccess)
)
# Including success string and "ZIRCON KERNEL PANIC" in output to clarify
# that this fails because of the kernel panic and not because the
# success string is not present.
output = zbi_test["success_string"] + "ZIRCON KERNEL PANIC"
yield (
test(cpu + "-kernel_panic", zbi_test, "failure", [output, output])
+ api.post_process(StatusFailure)
)
for cpu in ["arm64", "x64"]:
for zircon in [True, False]:
zbi_test = api.build.mock_zbi_test(cpu, override=True, zircon=zircon)
success_string = zbi_test["success_string"]
name = ("zircon-" if zircon else "fuchsia-") + cpu + "-override"
yield (
test(name, zbi_test, "success", [success_string])
+ api.post_process(StatusSuccess)
)
yield (
api.status_check.test("affected_tests_no_work", "success")
+ api.properties(
manifest="manifest",
remote="https://fuchsia.googlesource.com/fuchsia",
target_cpu="x64",
variants=["clang"],
allowed_device_types=["QEMU"],
test_pool="fuchsia.tests",
)
+ api.buildbucket.try_build()
+ api.step_data("affected tests.read no work status", api.json.output(True))
)