blob: bed80106f79514087eb354b411d75056fc347df7 [file] [log] [blame]
# Copyright 2019 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import attr
import hashlib
import os.path
from recipe_engine import recipe_api
from RECIPE_MODULES.fuchsia.gce import api as gce_api
# The path to the botanist config on the host.
BOTANIST_DEVICE_CONFIG = "/etc/botanist/config.json"
# The log level to use for botanist invocations in test tasks. Can be one of
# "fatal", "error", "warning", "info", "debug", or "trace", where "trace" is
# the most verbose, and fatal is the least.
BOTANIST_LOG_LEVEL = "debug"
# Name of image manifest produced by the build.
IMAGES_JSON = "images.json"
BLOB_DOWNLOAD_MANIFEST = "blob_downloads.json"
GCSPROXY_CIPD_REVISION = "git_revision:9f8534bc114000de0b4e3a85738bcb0e7af06109"
class TestingRequestsApi(recipe_api.RecipeApi):
"""APIs for constructing Swarming task requests to test Fuchsia."""
SNAPSHOT_NAME = "snapshot.zip"
SERIAL_LOG_DIR = "serial_logs"
SYSLOG_DIR = "syslogs"
# What to name the file that contains the swarming task output. Not used
# directly by this recipe module, but it is used by most clients of this
# recipe module so it makes sense to define it here.
TEST_TASK_OUTPUT_FILE = "infra_and_test_std_and_klog.txt"
TEST_RESULTS_DIR_NAME = "out"
# The name of the tag to set on every task request that contains the name
# of the shard's environment (device type/OS and architecture).
TEST_ENVIRONMENT_TAG_NAME = "test_environment_name"
def task_requests(
self,
shards,
build_results,
buildbucket_build,
test_spec,
):
"""Returns a swarming.TaskRequest for each shard in build_artifact.shards.
Args:
shards (list of testsharder.Shard): Test shards.
build_results (FuchsiaBuildResults): The Fuchsia build results to
test.
buildbucket_build (build_pb2.Build): The buildbucket build that is
going to orchestrate testing.
test_spec (fuchsia_pb2.Fuchsia.Test): Testing configuration.
"""
futures = []
for shard in shards:
if shard.device_type == "GCE" and not test_spec.test_on_gce:
# Do not generate a task request for GCE shards if the
# test_on_gce flag is disabled.
continue
def create_request(shard):
with self.m.step.nest(f"shard {shard.name}"):
return self._create_request(
shard, build_results, buildbucket_build, test_spec
)
futures.append(self.m.futures.spawn(create_request, shard))
self.m.futures.wait(futures)
return [f.result() for f in futures]
def _create_request(self, shard, build_results, buildbucket_build, test_spec):
"""Returns a swarming.TaskRequest for the given shard.
Args:
shard (testsharder.Shard): Test shard to generate a task request
for.
build_results (FuchsiaBuildResults): The Fuchsia build results to
test.
buildbucket_build (build_pb2.Build): The buildbucket build that is
going to orchestrate testing.
test_spec (fuchsia_pb2.Fuchsia.Test): Testing configuration.
"""
# Some artifacts are within the checkout root directory but not in the
# build directory. Thus we need to map the task input tree root to the
# checkout root directory instead. However, since the paths in the test
# manifest are relative to the build directory, we use the relative
# build directory as the relative cwd of the swarming task.
#
# Additionally, toolchains under test may be located outside the
# checkout entirely, in which case we must use the common ancestor
# directory of the checkout directory and any toolchain directories as
# the root.
root_dir_descendants = [
build_results.checkout.root_dir,
self.m.build.clang_toolchain_dir,
self.m.build.gcc_toolchain_dir,
self.m.build.rust_toolchain_dir,
]
root_dir = os.path.commonpath([str(d) for d in root_dir_descendants if d])
relative_cwd = self.m.path.relpath(build_results.build_dir, root_dir)
upload_paths = []
cleanup_upload_paths = []
def register_tool(tool_name, relative_to=build_results.build_dir):
"""Register the named tool to be uploaded to CAS.
Returns the path at which the tool can be accessed on the testing
bot, relative to the task's working directory.
"""
path = build_results.tool(tool_name, cpu=test_bot_cpu)
upload_paths.append(path)
return self.m.path.relpath(path, start=relative_to)
test_manifest = build_results.build_dir.join(shard.name + "_tests.json")
self.m.file.write_json(
"write test manifest",
test_manifest,
shard.tests,
indent=2,
)
cleanup_upload_paths.append(test_manifest)
flags = [
"-out-dir",
# testrunner writes the out-dir directly to the task outputs dir,
# so no need to add it to the expected outputs. In fact, the
# out-dir name is the same as a subdirectory of the task inputs
# which gets mapped to the swarming task root directory, so adding
# this out-dir to the expected outputs will cause the directory from
# the root directory to get symlinked into the outputs as well.
self.TEST_RESULTS_DIR_NAME,
"-snapshot-output",
self.SNAPSHOT_NAME,
]
image_manifest = f"{self.m.artifacts.image_url()}/{IMAGES_JSON}"
# If we're using CAS for delivery, then the testsharder will have
# provisioned the images as part of the CAS inputs, so the image
# manifest will be present in the working directory.
if test_spec.use_cas or test_spec.use_cas_for_images:
image_manifest = IMAGES_JSON
flags.append("-prefetch-packages")
if "bringup" in build_results.set_metadata.product:
flags.append("-use-serial")
is_emu_type = self.m.emu.is_emulator_type(shard.device_type)
test_bot_cpu = build_results.set_metadata.target_arch if is_emu_type else "x64"
is_linux = not shard.os or shard.os.lower() == "linux"
# TODO(rudymathu): Eventually we may want to distribute nsjail to ARM64
# bots, but we don't have an nsjail build on ARM set up yet.
# We also may want to eventually support doing this on device shards,
# but our on-device testing infrastructure doesn't support nsjail
# sandboxing yet.
if (
test_spec.enable_sandboxing
and (is_emu_type or not shard.device_type)
and test_bot_cpu == "x64"
and is_linux
):
flags.extend(
[
"-nsjail",
"./nsjail",
"-nsjail-root",
# nsjail is run within relative_cwd but needs the root
# directory containing all the task inputs to mount.
self.m.path.relpath(root_dir, start=build_results.build_dir),
]
)
flags.append(self.m.path.relpath(test_manifest, start=build_results.build_dir))
cmd = []
outputs = []
ensure_file = self.m.cipd.EnsureFile()
dimensions = {"pool": test_spec.pool}
is_gce_type = shard.device_type == "GCE"
# To take advantage of KVM, we execute emu-arm tasks on arm hardware.
test_bot_cpu = (
build_results.set_metadata.target_arch
if (is_emu_type and not test_spec.use_tcg)
else "x64"
)
# Non-bringup products require ssh access to the target. The bringup
# product and zbi tests require serial. A zbi test shard is
# distinguished by a non-empty image_overrides field.
need_ssh = "bringup" not in build_results.set_metadata.product
need_serial = not need_ssh or bool(shard.image_overrides)
# This command spins up a metadata server that allows its subcommands to
# automagically authenticate with LUCI auth, provided the sub-exec'ed tool
# was written in go or dart and respectively makes use of the standard
# cloud.google.com/go/compute/metadata or
# github.com/dart-lang/googleapis_auth authentication libraries. Such
# libraries look for a metadata server under environment variables
# like $GCE_METADATA_HOST, which LUCI emulates.
service_account = shard.service_account or test_spec.default_service_account
if service_account:
# TODO(fxbug.dev/37142): Find a way to use the version that LUCI is
# currently using, instead of 'latest'.
ensure_file.add_package(
"infra/tools/luci-auth/${platform}", "latest", subdir=relative_cwd
)
# We specify -scopes in order to append "https://www.googleapis.com/auth/nest-account".
# Default Scopes when using 'luci-auth context':
# https://source.chromium.org/chromium/infra/infra/+/main:go/src/go.chromium.org/luci/auth/client/authcli/authcli.go;l=229;drc=8e944005719b0d612a63263176ec2a75ee78a850
scopes = [
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/firebase",
"https://www.googleapis.com/auth/gerritcodereview",
"https://www.googleapis.com/auth/userinfo.email",
"https://www.googleapis.com/auth/nest-account",
]
cmd.extend(["./luci-auth", "context", "-scopes", " ".join(scopes), "--"])
if is_emu_type:
dimensions.update(os="Debian")
if test_spec.use_tcg:
dimensions.update(cpu="x64")
else:
dimensions.update(kvm="1", cpu=build_results.set_metadata.target_arch)
elif is_gce_type:
# Have any GCE shards target the GCE executors, which are e2-2
# machines running Linux.
dimensions.update(os="Linux", cores="2", gce="1")
else:
# No device -> no serial.
if test_spec.targets_serial and shard.device_type:
dimensions["serial"] = "1"
dimensions.update(shard.dimensions)
# Ensure we use GCE VMs whenever possible.
is_linux = not shard.os or shard.os.lower() == "linux"
if is_linux and not is_gce_type and test_bot_cpu == "x64":
if test_spec.enable_sandboxing:
ensure_file.add_package(
self.m.nsjail.tool_metadata.path,
self.m.nsjail.tool_metadata.version,
subdir=relative_cwd,
)
if not test_spec.use_tcg:
dimensions["kvm"] = "1"
if (
(is_emu_type or not shard.device_type)
and test_bot_cpu == "x64"
and is_linux
):
dimensions["gce"] = "1"
dimensions["cores"] = "8"
# Default to the dynamically computed shard timeout, but allow
# overriding in case the dynamically computed timeouts are too short for
# some reason.
shard_timeout_secs = shard.timeout_secs
if test_spec.timeout_secs:
shard_timeout_secs = test_spec.timeout_secs
cmd.extend(
[
"./" + register_tool("botanist"),
"-level",
BOTANIST_LOG_LEVEL,
"run",
]
)
if shard.targets_fuchsia:
# EMU and non-EMU tasks all use the same serial log name and directory
# so add it to the expected outputs for all tasks that target Fuchsia.
outputs.append(self.SERIAL_LOG_DIR)
# We expect the serial log and directory to be in the swarming task
# root directory, but the botanist command is run within
# relative_cwd, so the path provided to botanist should be relative
# to the relative_cwd.
serial_log_path = self.m.path.relpath(
self.SERIAL_LOG_DIR + "/serial_log.txt",
relative_cwd,
)
serial_log_dir = self.m.path.relpath(self.SERIAL_LOG_DIR, relative_cwd)
cmd.extend(
[
"-images",
image_manifest,
"-timeout",
f"{int(shard_timeout_secs)}s",
]
)
if shard.image_overrides:
cmd.extend(
["-image-overrides", self.m.json.dumps(shard.image_overrides)]
)
if test_spec.use_ffx or test_spec.ffx_experiment_level > 0:
cmd.extend(["-ffx", "./" + register_tool("ffx")])
if test_spec.ffx_experiment_level > 0:
cmd.extend(
["-ffx-experiment-level", str(test_spec.ffx_experiment_level)]
)
if shard.pkg_repo:
outputs.append(BLOB_DOWNLOAD_MANIFEST)
blob_manifest_relpath = self.m.path.relpath(
BLOB_DOWNLOAD_MANIFEST,
relative_cwd,
)
cmd.extend(
[
"-repo",
self.m.artifacts.package_repo_url(),
"-blobs",
self.m.artifacts.package_blob_url(),
"-local-repo",
shard.pkg_repo,
"-download-manifest",
blob_manifest_relpath,
]
)
# In the emulator case, serial is redirected to stdio.
if not is_emu_type:
cmd.extend(["-serial-log-dir", serial_log_dir])
if need_ssh:
# We expect the syslogs to be in the swarming task root
# directory, but the path provided to botanist should be
# relative to the relative_cwd.
cmd.extend(
[
"-syslog-dir",
self.m.path.relpath(self.SYSLOG_DIR, relative_cwd),
]
)
outputs.append(self.SYSLOG_DIR)
# If targeting emu we include a private key corresponding to an
# authorized key already in the boot image; this is needed as we
# do not pave emu.
if is_emu_type:
upload_paths.append(build_results.private_key)
cmd.extend(
[
"-ssh",
self.m.path.relpath(
build_results.private_key, build_results.build_dir
),
]
)
if not test_spec.pave:
cmd.append("-netboot")
for arg in test_spec.zircon_args:
cmd.extend(["-zircon-args", arg])
config = BOTANIST_DEVICE_CONFIG
# TODO(rudymathu): Remove this awful hack once the build graph correctly sets the
# target CPU for RISC-V builds. We need this hack to ensure that the
# bringup.x64-riscv builder, which adds a risc-v sidecar to the
# regular x64 build, continues to work.
is_riscv = (
test_spec.use_tcg and build_results.set_metadata.target_arch == "x64"
) or build_results.set_metadata.target_arch == "riscv64"
if is_emu_type:
qemu_config = [
{
"type": shard.device_type.lower(),
"path": f"./{shard.device_type.lower()}/bin",
"edk2_dir": "./edk2",
"target": "riscv64"
if is_riscv
else build_results.set_metadata.target_arch,
"cpu": 4,
"memory": 8192
if is_riscv
else self.m.emu.get_memory_for_variant(build_results),
"kvm": not test_spec.use_tcg,
# Is a directive to run the emu process in a way in which we can
# synthesize a 'serial device'. We need only do this in the bringup
# case, this being used for executing tests at that level;
# restriction to the minimal case is especially important as this
# mode shows tendencies to slow certain processes down.
"serial": need_serial,
# Used to dynamically extend fvm.blk to fit downloaded
# test packages.
"fvm_tool": register_tool("fvm") if test_spec.pave else "",
"logfile": serial_log_path,
}
]
if need_ssh:
# Used to embed the ssh key into the zbi.
qemu_config[0]["zbi_tool"] = register_tool("zbi")
# UEFI-related emulator firmware.
self.m.emu.add_edk2_to_ensure_file(
ensure_file,
checkout=build_results.checkout.root_dir,
subdir=self.m.path.join(relative_cwd, "edk2"),
)
if shard.device_type == "AEMU":
self.m.emu.add_aemu_to_ensure_file(
ensure_file,
checkout=build_results.checkout.root_dir,
subdir=self.m.path.join(relative_cwd, "aemu/bin"),
)
elif shard.device_type == "QEMU":
self.m.emu.add_qemu_to_ensure_file(
ensure_file,
checkout=build_results.checkout.root_dir,
subdir=self.m.path.join(relative_cwd, "qemu"),
)
config_basename = shard.name + ".botanist.json"
config = "./" + config_basename
self.m.file.write_json(
"write qemu config",
build_results.build_dir.join(config_basename),
qemu_config,
indent=2,
)
cleanup_upload_paths.append(
build_results.build_dir.join(config_basename)
)
elif is_gce_type:
ensure_file.add_package(
gce_api.GCEM_CLIENT_CIPD_PATH,
gce_api.GCEM_CLIENT_CIPD_REVISION,
subdir=relative_cwd,
)
config_basename = shard.name + ".botanist.json"
config = "./" + config_basename
self.m.gce.create_botanist_config(
test_spec.gce_mediator.endpoint,
test_spec.gce_mediator.cloud_project,
test_spec.gce_mediator.machine_shape,
self.m.buildbucket.build.infra.swarming.parent_run_id,
build_results.build_dir.join(config_basename),
)
cleanup_upload_paths.append(
build_results.build_dir.join(config_basename)
)
cmd.extend(["-config", config])
else:
cmd.append("-skip-setup")
cmd.extend(flags)
if shard:
for dep in shard.deps:
# Use abspath() to collapse ".." elements in the dep path.
dep_abspath = self.m.path.abs_to_path(
self.m.path.abspath(build_results.build_dir.join(dep))
)
self.m.path.mock_add_paths(dep_abspath)
if not self.m.path.exists(dep_abspath): # pragma: no cover
raise self.m.step.StepFailure(
f"Dependency for shard {shard.name} was not built: {dep}"
)
upload_paths.append(dep_abspath)
env_prefixes = {}
if not shard.os or shard.os.lower() != "mac":
# Add ssh to $PATH.
env_prefixes["PATH"] = [
self.m.path.normpath(
self.m.path.join(
relative_cwd, self.m.path.dirname(register_tool("ssh"))
)
)
]
env_vars = self._test_task_env_vars(
buildbucket_build,
shard.device_type,
catapult_dashboard_master=test_spec.catapult_dashboard_master,
catapult_dashboard_bot=test_spec.catapult_dashboard_bot,
release_branch=build_results.checkout.release_branch,
release_version=build_results.checkout.release_version,
image_manifest=image_manifest,
llvm_symbolizer_path=(
# Relevant for automatic symbolization of things running on
# host. Only the x64 variation is available in the checkout and
# we have nothing that runs on an arm host that needs
# symbolizing.
register_tool("llvm-symbolizer")
if test_bot_cpu == "x64"
else ""
),
# TODO(fxb/38517): s/bootserver_new/bootserver.
bootserver_path=register_tool("bootserver_new"),
)
cas_input_root = self.m.cas_util.upload(
root_dir,
upload_paths=sorted(set(upload_paths + cleanup_upload_paths)),
step_name="upload task inputs to CAS",
)
# Clean up to avoid leaving temporary files in the build directory.
for path in cleanup_upload_paths:
self.m.file.remove(f"remove {self.m.path.basename(path)}", path)
# These variables are no longer usable now that the task inputs have
# been uploaded.
del upload_paths, cleanup_upload_paths, register_tool
request = (
self.m.swarming.task_request()
.with_name(shard.name)
.with_tags(self._test_task_tags(buildbucket_build, build_results, shard))
)
if service_account:
request = request.with_service_account(service_account)
task_slice = (
request[0]
.with_command(cmd)
.with_cas_input_root(cas_input_root)
.with_relative_cwd(relative_cwd)
.with_dimensions(**dimensions)
.with_expiration_secs(test_spec.swarming_expiration_timeout_secs)
.with_io_timeout_secs(test_spec.swarming_io_timeout_secs)
# Use a slightly longer timeout for the swarming task execution
# timeout to allow botanist to handle the timeout itself.
.with_execution_timeout_secs(shard_timeout_secs + 60)
.with_outputs(outputs)
.with_cipd_ensure_file(ensure_file)
.with_env_vars(**env_vars)
.with_env_prefixes(**env_prefixes)
)
return request.with_slice(0, task_slice)
def _test_task_env_vars(
self,
build,
device_type,
image_manifest,
catapult_dashboard_master,
catapult_dashboard_bot,
release_branch,
release_version,
llvm_symbolizer_path,
bootserver_path,
):
# Note that this will sometimes point to the wrong commit for tryjobs,
# which re-resolve HEAD on the fly rather than respecting the input
# commit. Therefore its `id` field should not be used.
commit = build.input.gitiles_commit
commit_host = commit.host
commit_ref = commit.ref
del commit
env_vars = dict(
# `${ISOLATED_OUTDIR}` is a magic string that Swarming will replace
# with a temporary directory whose contents will be automatically
# uploaded to CAS upon exit of a task.
FUCHSIA_TEST_OUTDIR="${ISOLATED_OUTDIR}",
# Used by performance tests and OTA tests.
BUILDBUCKET_ID=str(build.id) if build.id else None,
# Used by performance and e2e tests.
# TODO(fxbug.dev/50210): Don't fall back to time.time() once led
# starts setting create_time again.
BUILD_CREATE_TIME=str(build.create_time.seconds or int(self.m.time.time())),
# Used by e2e tests.
BUILDER_NAME=build.builder.builder,
# Used by e2e tests.
FUCHSIA_DEVICE_TYPE=device_type,
# Used by e2e tests.
INPUT_COMMIT_HOST=commit_host,
RELEASE_BRANCH=release_branch,
# Used by performance tests.
RELEASE_VERSION=str(release_version) if release_version else None,
# Used by the fuchsia-specific Swarming pre-task hook.
BOOTSERVER_PATH="./" + bootserver_path,
# Used by the fuchsia-specific Swarming pre-task hook.
IMAGE_MANIFEST_PATH=image_manifest,
SWARMING_BOT_FILE="${SWARMING_BOT_FILE}",
)
if llvm_symbolizer_path:
# Used for symbolization.
env_vars.update(
ASAN_SYMBOLIZER_PATH=llvm_symbolizer_path,
LSAN_SYMBOLIZER_PATH=llvm_symbolizer_path,
TSAN_OPTIONS="external_symbolizer_path=" + llvm_symbolizer_path,
UBSAN_SYMBOLIZER_PATH=llvm_symbolizer_path,
)
env_vars.update(
self._get_catapult_dashboard_env_vars(
catapult_dashboard_master, catapult_dashboard_bot, commit_ref
)
)
# For some reason, empty string environment variables sent to the swarming
# API get interpreted as null and rejected. So don't bother sending them to
# avoid breaking the task request.
# TODO(olivernewman): Figure out whether this logic should be moved into
# the upstream swarming module (or obviated by fixing the "" -> null
# behavior).
return {k: v for k, v in env_vars.items() if v}
def _test_task_tags(self, buildbucket_build, build_results, shard):
env_name = (
f"{shard.device_type or shard.os}-{build_results.set_metadata.target_arch}"
)
tags = {
"board": (
build_results.set_metadata.board
or build_results.set_metadata.target_arch
),
"build_type": build_results.set_metadata.optimize,
"buildbucket_bucket": buildbucket_build.builder.bucket,
"buildbucket_builder": buildbucket_build.builder.builder,
"product": build_results.set_metadata.product,
"role": "tester",
"task_name": shard.name,
# Create unique hash for every test shard within the same build.
"test_shard_hash": hashlib.sha256(
(
str(buildbucket_build.id) + self.m.json.dumps(attr.asdict(shard))
).encode("utf-8")
).hexdigest(),
# Consumed by google3 results uploader, and by the orchestrator
# when uploading to resultdb.
self.TEST_ENVIRONMENT_TAG_NAME: env_name,
"variants": build_results.set_metadata.variants,
}
formatted_tags = {}
for k, val in tags.items():
if not val:
val = []
elif isinstance(val, str):
val = [val]
formatted_tags[str(k)] = [str(i) for i in val]
return formatted_tags
def _get_catapult_dashboard_env_vars(self, master_name, bot_name, commit_ref):
if not master_name and not bot_name:
# Uploading to Catapult is disabled.
return {}
if not (master_name and bot_name):
raise ValueError(
f"Catapult master and bot names not set consistently: {master_name!r}, {bot_name!r}"
)
prefix = "refs/heads/releases/"
if commit_ref.startswith(prefix):
branch_name = commit_ref[len(prefix) :]
master_name += "." + branch_name
elif commit_ref != "refs/heads/main":
# Unrecognized Git branch/tag name. Disable uploading to Catapult
# by not setting the env vars.
return {}
return dict(
CATAPULT_DASHBOARD_MASTER=master_name, CATAPULT_DASHBOARD_BOT=bot_name
)