blob: 7c23659582146345db9c4002c0a910d7f2a6b95c [file] [log] [blame]
# Copyright 2019 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import attr
import hashlib
from recipe_engine import recipe_api
from RECIPE_MODULES.fuchsia.gce import api as gce_api
# The path to the botanist config on the host.
BOTANIST_DEVICE_CONFIG = "/etc/botanist/config.json"
# The log level to use for botanist invocations in test tasks. Can be one of
# "fatal", "error", "warning", "info", "debug", or "trace", where "trace" is
# the most verbose, and fatal is the least.
BOTANIST_LOG_LEVEL = "debug"
# Name of image manifest produced by the build.
IMAGES_JSON = "images.json"
# Name of product bundles manifest produced by the build.
PRODUCT_BUNDLES_JSON = "product_bundles.json"
BLOB_DOWNLOAD_MANIFEST = "blob_downloads.json"
GCSPROXY_CIPD_REVISION = "git_revision:9f8534bc114000de0b4e3a85738bcb0e7af06109"
class TestingRequestsApi(recipe_api.RecipeApi):
"""APIs for constructing Swarming task requests to test Fuchsia."""
SNAPSHOT_NAME = "snapshot.zip"
SERIAL_LOG_DIR = "serial_logs"
SYSLOG_DIR = "syslogs"
# What to name the file that contains the swarming task output. Not used
# directly by this recipe module, but it is used by most clients of this
# recipe module so it makes sense to define it here.
TEST_TASK_OUTPUT_FILE = "infra_and_test_std_and_klog.txt"
TEST_RESULTS_DIR_NAME = "out"
# The name of the tag to set on every task request that contains the name
# of the shard's environment (device type/OS and architecture).
TEST_ENVIRONMENT_TAG_NAME = "test_environment_name"
def task_requests(
self,
shards,
build_results,
buildbucket_build,
test_spec,
):
"""Returns a swarming.TaskRequest for each shard in build_artifact.shards.
Args:
shards (list of testsharder.Shard): Test shards.
build_results (FuchsiaBuildResults): The Fuchsia build results to
test.
buildbucket_build (build_pb2.Build): The buildbucket build that is
going to orchestrate testing.
test_spec (fuchsia_pb2.Fuchsia.Test): Testing configuration.
"""
futures = []
for shard in shards:
if shard.device_type == "GCE" and not test_spec.test_on_gce:
# Do not generate a task request for GCE shards if the
# test_on_gce flag is disabled.
continue
def create_request(shard):
with self.m.step.nest(f"shard {shard.name}"):
return self._create_request(
shard, build_results, buildbucket_build, test_spec
)
futures.append(self.m.futures.spawn(create_request, shard))
self.m.futures.wait(futures)
return [f.result() for f in futures]
def _create_request(self, shard, build_results, buildbucket_build, test_spec):
"""Returns a swarming.TaskRequest for the given shard.
Args:
shard (testsharder.Shard): Test shard to generate a task request
for.
build_results (FuchsiaBuildResults): The Fuchsia build results to
test.
buildbucket_build (build_pb2.Build): The buildbucket build that is
going to orchestrate testing.
test_spec (fuchsia_pb2.Fuchsia.Test): Testing configuration.
"""
# Some artifacts are within the checkout root directory but not in the
# build directory. Thus we need to map the task input tree root to the
# checkout root directory instead. However, since the paths in the test
# manifest are relative to the build directory, we use the relative
# build directory as the relative cwd of the swarming task.
root_dir = build_results.checkout.root_dir
relative_cwd = self.m.path.relpath(build_results.build_dir, root_dir)
upload_paths = []
cleanup_upload_paths = []
def register_tool(tool_name, relative_to=build_results.build_dir):
"""Register the named tool to be uploaded to CAS.
Returns the path at which the tool can be accessed on the testing
bot, relative to the task's working directory.
"""
path = build_results.tool(tool_name, cpu=test_bot_cpu)
upload_paths.append(path)
return self.m.path.relpath(path, start=relative_to)
test_manifest = build_results.build_dir.joinpath(shard.name + "_tests.json")
self.m.file.write_json(
"write test manifest",
test_manifest,
shard.tests,
indent=2,
)
cleanup_upload_paths.append(test_manifest)
flags = [
"-out-dir",
# testrunner writes the out-dir directly to the task outputs dir,
# so no need to add it to the expected outputs. In fact, the
# out-dir name is the same as a subdirectory of the task inputs
# which gets mapped to the swarming task root directory, so adding
# this out-dir to the expected outputs will cause the directory from
# the root directory to get symlinked into the outputs as well.
self.TEST_RESULTS_DIR_NAME,
"-snapshot-output",
self.SNAPSHOT_NAME,
]
image_manifest = f"{self.m.artifacts.image_url()}/{IMAGES_JSON}"
# If we're using CAS for delivery, then the testsharder will have
# provisioned the images as part of the CAS inputs, so the image
# manifest will be present in the working directory.
if test_spec.use_cas or test_spec.use_cas_for_images:
image_manifest = IMAGES_JSON
flags.append("-prefetch-packages")
if "bringup" in build_results.set_metadata.product:
flags.append("-use-serial")
is_emu_type = self.m.emu.is_emulator_type(shard.device_type)
test_bot_cpu = self._bot_cpu_type(shard, build_results, test_spec)
is_linux = not shard.os or shard.os.lower() == "linux"
# TODO(rudymathu): Eventually we may want to distribute nsjail to ARM64
# bots, but we don't have an nsjail build on ARM set up yet.
# We also may want to eventually support doing this on device shards,
# but our on-device testing infrastructure doesn't support nsjail
# sandboxing yet.
if (
test_spec.enable_sandboxing
and (is_emu_type or not shard.device_type)
and test_bot_cpu == "x64"
and is_linux
):
flags.extend(
[
"-nsjail",
"./nsjail",
"-nsjail-root",
# nsjail is run within relative_cwd but needs the root
# directory containing all the task inputs to mount.
self.m.path.relpath(root_dir, start=build_results.build_dir),
]
)
flags.append(self.m.path.relpath(test_manifest, start=build_results.build_dir))
cmd = []
ensure_file = self.m.cipd.EnsureFile()
rdb_package = "infra/tools/rdb/${platform}"
rdb_version = self.m.cipd_ensure.get_packages(
"read rdb package",
self.resource("rdb/cipd.ensure"),
test_data=f"{rdb_package} version:pinned-version",
)[rdb_package]
ensure_file.add_package(rdb_package, rdb_version, subdir=relative_cwd)
cmd.extend(
[
"./rdb",
"stream",
"-var",
f"builder:{buildbucket_build.builder.builder}",
"--",
]
)
outputs = []
dimensions = {"pool": test_spec.pool}
is_gce_type = shard.device_type == "GCE"
# Non-bringup products require ssh access to the target. The bringup
# product requires serial, but boot tests which only need to read from
# serial do not require access to write to serial. A boot test shard is
# distinguished by a non-empty product_bundle field.
need_ssh = "bringup" not in build_results.set_metadata.product
need_serial = not need_ssh and not bool(shard.product_bundle)
# This command spins up a metadata server that allows its subcommands to
# automagically authenticate with LUCI auth, provided the sub-exec'ed tool
# was written in go or dart and respectively makes use of the standard
# cloud.google.com/go/compute/metadata or
# github.com/dart-lang/googleapis_auth authentication libraries. Such
# libraries look for a metadata server under environment variables
# like $GCE_METADATA_HOST, which LUCI emulates.
service_account = shard.service_account or test_spec.default_service_account
if service_account:
# We specify -scopes in order to append "https://www.googleapis.com/auth/nest-account".
# Default Scopes when using 'luci-auth context':
# https://source.chromium.org/chromium/infra/infra/+/main:go/src/go.chromium.org/luci/auth/client/authcli/authcli.go;l=229;drc=8e944005719b0d612a63263176ec2a75ee78a850
scopes = [
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/firebase",
"https://www.googleapis.com/auth/gerritcodereview",
"https://www.googleapis.com/auth/userinfo.email",
"https://www.googleapis.com/auth/nest-account",
]
luci_auth_package = "infra/tools/luci-auth/${platform}"
luci_auth_version = self.m.cipd_ensure.get_packages(
"read luci-auth package",
self.resource("luci-auth/cipd.ensure"),
test_data=f"{luci_auth_package} version:pinned-version",
)[luci_auth_package]
ensure_file.add_package(
luci_auth_package,
luci_auth_version,
subdir=relative_cwd,
)
cmd.extend(
[
"./luci-auth",
"context",
"-scopes",
" ".join(scopes),
"--",
]
)
if is_emu_type:
dimensions.update(os="Debian")
if test_spec.use_tcg:
dimensions.update(cpu="x64")
else:
dimensions.update(kvm="1", cpu=build_results.set_metadata.target_arch)
elif is_gce_type:
# Have any GCE shards target the GCE executors, which are e2-2
# machines running Linux.
dimensions.update(os="Linux", cores="2", gce="1")
else:
# No device -> no serial.
if test_spec.targets_serial and shard.device_type:
dimensions["serial"] = "1"
dimensions.update(shard.dimensions)
# Ensure we use GCE VMs whenever possible.
is_linux = not shard.os or shard.os.lower() == "linux"
if is_linux and not is_gce_type and test_bot_cpu == "x64":
if test_spec.enable_sandboxing:
ensure_file.add_package(
*self.m.nsjail.tool_metadata,
subdir=relative_cwd,
)
if not test_spec.use_tcg:
dimensions["kvm"] = "1"
if (
(is_emu_type or not shard.device_type)
and test_bot_cpu == "x64"
and is_linux
):
dimensions["gce"] = "1"
dimensions["cores"] = "8"
# Default to the dynamically computed shard timeout, but allow
# overriding in case the dynamically computed timeouts are too short for
# some reason.
shard_timeout_secs = shard.timeout_secs
if test_spec.timeout_secs:
shard_timeout_secs = test_spec.timeout_secs
cmd.extend(
[
"./" + register_tool("botanist"),
"-level",
BOTANIST_LOG_LEVEL,
"run",
]
)
if shard.targets_fuchsia:
# EMU and non-EMU tasks all use the same serial log name and directory
# so add it to the expected outputs for all tasks that target Fuchsia.
outputs.append(self.SERIAL_LOG_DIR)
# We expect the serial log and directory to be in the swarming task
# root directory, but the botanist command is run within
# relative_cwd, so the path provided to botanist should be relative
# to the relative_cwd.
serial_log_path = self.m.path.relpath(
self.SERIAL_LOG_DIR + "/serial_log.txt",
relative_cwd,
)
serial_log_dir = self.m.path.relpath(self.SERIAL_LOG_DIR, relative_cwd)
cmd.extend(
[
"-images",
image_manifest,
"-timeout",
f"{int(shard_timeout_secs)}s",
]
)
cmd.extend(["-ffx", "./" + register_tool("ffx")])
if test_spec.ffx_experiment_level > 0:
cmd.extend(
["-ffx-experiment-level", str(test_spec.ffx_experiment_level)]
)
product_bundle_name = (
shard.product_bundle or build_results.product_bundle_name
)
if not product_bundle_name: # pragma: nocover
raise self.m.step.StepFailure("missing product bundle name")
cmd.extend(
[
"-product-bundles",
PRODUCT_BUNDLES_JSON,
"-product-bundle-name",
product_bundle_name,
]
)
if shard.product_bundle:
cmd.append("-boot-test")
if shard.bootup_timeout_secs:
cmd.extend(["-bootup-timeout", f"{shard.bootup_timeout_secs}s"])
if shard.pkg_repo:
outputs.append(BLOB_DOWNLOAD_MANIFEST)
blob_manifest_relpath = self.m.path.relpath(
BLOB_DOWNLOAD_MANIFEST,
relative_cwd,
)
cmd.extend(
[
"-repo",
self.m.artifacts.package_repo_url(),
"-blobs",
self.m.artifacts.package_blob_url(),
"-local-repo",
shard.pkg_repo,
"-download-manifest",
blob_manifest_relpath,
]
)
# In the emulator case where we don't need to read/write to serial,
# serial output is redirected to a file through the qemu config.
if not is_emu_type or need_serial:
cmd.extend(["-serial-log-dir", serial_log_dir])
if need_ssh:
# We expect the syslogs to be in the swarming task root
# directory, but the path provided to botanist should be
# relative to the relative_cwd.
cmd.extend(
[
"-syslog-dir",
self.m.path.relpath(self.SYSLOG_DIR, relative_cwd),
]
)
outputs.append(self.SYSLOG_DIR)
if not test_spec.pave:
cmd.append("-netboot")
for arg in test_spec.zircon_args:
cmd.extend(["-zircon-args", arg])
if test_spec.upload_results_from_swarming:
cmd.extend("-upload-to-resultdb")
config = BOTANIST_DEVICE_CONFIG
if is_emu_type:
qemu_config = [
{
"type": shard.device_type.lower(),
"path": f"./{shard.device_type.lower()}/bin",
"edk2_dir": "./edk2",
"target": build_results.set_metadata.target_arch,
"cpu": 4,
"memory": (
8192
if build_results.set_metadata.target_arch == "riscv64"
else self.m.emu.get_memory_for_variant(build_results)
),
"kvm": not test_spec.use_tcg,
# Is a directive to run the emu process in a way in which we can
# synthesize a 'serial device'. We need only do this in the bringup
# case, this being used for executing tests at that level;
# restriction to the minimal case is especially important as this
# mode shows tendencies to slow certain processes down.
"serial": need_serial,
# Used to dynamically extend fvm.blk to fit downloaded
# test packages.
"fvm_tool": register_tool("fvm") if test_spec.pave else "",
"logfile": serial_log_path if not need_serial else "",
}
]
if need_ssh:
# Used to embed the ssh key into the zbi.
qemu_config[0]["zbi_tool"] = register_tool("zbi")
# UEFI-related emulator firmware.
self.m.emu.add_edk2_to_ensure_file(
ensure_file,
checkout=build_results.checkout.root_dir,
subdir=self.m.path.join(relative_cwd, "edk2"),
)
if shard.device_type == "AEMU":
self.m.emu.add_aemu_to_ensure_file(
ensure_file,
checkout=build_results.checkout.root_dir,
subdir=self.m.path.join(relative_cwd, "aemu/bin"),
)
elif shard.device_type == "QEMU":
self.m.emu.add_qemu_to_ensure_file(
ensure_file,
checkout=build_results.checkout.root_dir,
subdir=self.m.path.join(relative_cwd, "qemu"),
)
config_basename = shard.name + ".botanist.json"
config = "./" + config_basename
self.m.file.write_json(
"write qemu config",
build_results.build_dir / config_basename,
qemu_config,
indent=2,
)
cleanup_upload_paths.append(build_results.build_dir / config_basename)
elif is_gce_type:
ensure_file.add_package(
gce_api.GCEM_CLIENT_CIPD_PATH,
gce_api.GCEM_CLIENT_CIPD_REVISION,
subdir=relative_cwd,
)
config_basename = shard.name + ".botanist.json"
config = "./" + config_basename
self.m.gce.create_botanist_config(
test_spec.gce_mediator.endpoint,
test_spec.gce_mediator.cloud_project,
test_spec.gce_mediator.machine_shape,
self.m.buildbucket.backend_task_id_from_build(buildbucket_build),
build_results.build_dir / config_basename,
)
cleanup_upload_paths.append(build_results.build_dir / config_basename)
cmd.extend(["-config", config])
else:
cmd.append("-skip-setup")
cmd.extend(flags)
if shard:
upload_paths.extend(
self.m.path.abs_to_path(
# Use abspath() to collapse ".." elements in the dep path.
self.m.path.abspath(build_results.build_dir / dep)
)
for dep in shard.deps
)
env_prefixes = {}
if not shard.os or shard.os.lower() != "mac":
# Add ssh to $PATH.
env_prefixes["PATH"] = [
self.m.path.normpath(
self.m.path.join(
relative_cwd, self.m.path.dirname(register_tool("ssh"))
)
)
]
env_vars = self._test_task_env_vars(
buildbucket_build,
shard.device_type,
catapult_dashboard_master=test_spec.catapult_dashboard_master,
catapult_dashboard_bot=test_spec.catapult_dashboard_bot,
release_branch=build_results.checkout.release_branch,
release_version=build_results.checkout.release_version,
image_manifest=image_manifest,
llvm_symbolizer_path=(
# Relevant for automatic symbolization of things running on
# host. Only the x64 variation is available in the checkout and
# we have nothing that runs on an arm host that needs
# symbolizing.
register_tool("llvm-symbolizer")
if test_bot_cpu == "x64"
else ""
),
# TODO(fxb/38517): s/bootserver_new/bootserver.
bootserver_path=register_tool("bootserver_new"),
integration_revision=build_results.checkout.integration_revision,
)
cas_input_root = self.m.cas_util.upload(
root_dir,
upload_paths=sorted(set(upload_paths + cleanup_upload_paths)),
step_name="upload task inputs to CAS",
)
# Clean up to avoid leaving temporary files in the build directory.
for path in cleanup_upload_paths:
self.m.file.remove(f"remove {self.m.path.basename(path)}", path)
# These variables are no longer usable now that the task inputs have
# been uploaded.
del upload_paths, cleanup_upload_paths, register_tool
request = (
self.m.swarming.task_request()
.with_name(shard.name)
.with_tags(self._test_task_tags(buildbucket_build, build_results, shard))
.with_resultdb()
)
if service_account:
request = request.with_service_account(service_account)
task_slice = (
request[0]
.with_command(cmd)
.with_cas_input_root(cas_input_root)
.with_relative_cwd(relative_cwd)
.with_dimensions(**dimensions)
.with_expiration_secs(test_spec.swarming_expiration_timeout_secs)
.with_io_timeout_secs(test_spec.swarming_io_timeout_secs)
.with_grace_period_secs(test_spec.swarming_grace_period_secs)
.with_execution_timeout_secs(
shard_timeout_secs + test_spec.botanist_grace_period_secs
)
.with_outputs(outputs)
.with_cipd_ensure_file(ensure_file)
.with_env_vars(**env_vars)
.with_env_prefixes(**env_prefixes)
)
return request.with_slice(0, task_slice)
def _bot_cpu_type(
self,
shard,
build_results,
test_spec,
):
# Determine what system architecture we want this test task to run on.
# First, pure host tests should always run on the architecture of the
# host test. We can identify this case by the absence of a device_type
# in the shard.
if not shard.device_type:
return build_results.set_metadata.target_arch
# Second, we use x64 controllers for all the physical hardware
# that runs in labs, so those should always be x64. We also use x64
# cloud instances to drive testing against GCE, regardless of whether
# the GCE instance is x64 or arm64. So all non-emulator shards with
# device_type targets set should run on x64.
is_emu_type = self.m.emu.is_emulator_type(shard.device_type)
if not is_emu_type:
return "x64"
# For emulator shards, we want to take advantage of KVM where possible,
# so we want to use the build's target arch, unless we're using TCG to
# fully emulate hardware, in which case we'd rather that run on big x64
# machines (since we don't have any riscv64 hardware capable of running
# that workload).
if test_spec.use_tcg:
return "x64"
return build_results.set_metadata.target_arch
def _test_task_env_vars(
self,
build,
device_type,
image_manifest,
catapult_dashboard_master,
catapult_dashboard_bot,
release_branch,
release_version,
llvm_symbolizer_path,
bootserver_path,
integration_revision,
):
# Note that this will sometimes point to the wrong commit for tryjobs,
# which re-resolve HEAD on the fly rather than respecting the input
# commit. Therefore its `id` field should not be used.
commit = build.input.gitiles_commit
commit_host = commit.host
commit_ref = commit.ref
del commit
env_vars = dict(
# `${ISOLATED_OUTDIR}` is a magic string that Swarming will replace
# with a temporary directory whose contents will be automatically
# uploaded to CAS upon exit of a task.
FUCHSIA_TEST_OUTDIR="${ISOLATED_OUTDIR}",
# Used by performance tests and OTA tests.
BUILDBUCKET_ID=str(build.id) if build.id else None,
# Used by performance and e2e tests.
# TODO(fxbug.dev/50210): Don't fall back to time.time() once led
# starts setting create_time again.
BUILD_CREATE_TIME=str(build.create_time.seconds or int(self.m.time.time())),
# Used by e2e tests.
BUILDER_NAME=build.builder.builder,
# Used by e2e tests.
FUCHSIA_DEVICE_TYPE=device_type,
# Used by e2e tests.
INPUT_COMMIT_HOST=commit_host,
RELEASE_BRANCH=release_branch,
# Used by performance tests.
RELEASE_VERSION=str(release_version) if release_version else None,
# Used by the fuchsia-specific Swarming pre-task hook.
BOOTSERVER_PATH="./" + bootserver_path,
# Used by the fuchsia-specific Swarming pre-task hook.
IMAGE_MANIFEST_PATH=image_manifest,
SWARMING_BOT_FILE="${SWARMING_BOT_FILE}",
)
if llvm_symbolizer_path:
# Used for symbolization.
env_vars.update(
ASAN_SYMBOLIZER_PATH=llvm_symbolizer_path,
LSAN_SYMBOLIZER_PATH=llvm_symbolizer_path,
TSAN_OPTIONS="external_symbolizer_path=" + llvm_symbolizer_path,
UBSAN_SYMBOLIZER_PATH=llvm_symbolizer_path,
)
is_internal = commit_host == "turquoise-internal.googlesource.com"
if integration_revision and is_internal:
# Used for uploading to Chromeperf/Catapult in a way that is
# compatible with Skia Perf
env_vars.update(INTEGRATION_INTERNAL_GIT_COMMIT=integration_revision)
env_vars.update(
self._get_catapult_dashboard_env_vars(
catapult_dashboard_master, catapult_dashboard_bot, commit_ref
)
)
# For some reason, empty string environment variables sent to the swarming
# API get interpreted as null and rejected. So don't bother sending them to
# avoid breaking the task request.
# TODO(olivernewman): Figure out whether this logic should be moved into
# the upstream swarming module (or obviated by fixing the "" -> null
# behavior).
return {k: v for k, v in env_vars.items() if v}
def _test_task_tags(self, buildbucket_build, build_results, shard):
env_name = (
f"{shard.device_type or shard.os}-{build_results.set_metadata.target_arch}"
)
tags = {
"board": (
build_results.set_metadata.board
or build_results.set_metadata.target_arch
),
"build_type": build_results.set_metadata.optimize,
"buildbucket_bucket": buildbucket_build.builder.bucket,
"buildbucket_builder": buildbucket_build.builder.builder,
"product": build_results.set_metadata.product,
"role": "tester",
"task_name": shard.name,
# Create unique hash for every test shard within the same build.
"test_shard_hash": hashlib.sha256(
(
str(buildbucket_build.id) + self.m.json.dumps(attr.asdict(shard))
).encode("utf-8")
).hexdigest(),
# Consumed by google3 results uploader, and by the orchestrator
# when uploading to resultdb.
self.TEST_ENVIRONMENT_TAG_NAME: env_name,
"variants": build_results.set_metadata.variants,
}
formatted_tags = {}
for k, val in tags.items():
if not val:
val = []
elif isinstance(val, str):
val = [val]
formatted_tags[str(k)] = [str(i) for i in val]
return formatted_tags
def _get_catapult_dashboard_env_vars(self, master_name, bot_name, commit_ref):
if not master_name and not bot_name:
# Uploading to Catapult is disabled.
return {}
if not (master_name and bot_name):
raise ValueError(
f"Catapult master and bot names not set consistently: {master_name!r}, {bot_name!r}"
)
prefix = "refs/heads/releases/"
if commit_ref.startswith(prefix):
branch_name = commit_ref.removeprefix(prefix)
master_name += "." + branch_name
elif commit_ref != "refs/heads/main":
# Unrecognized Git branch/tag name. Disable uploading to Catapult
# by not setting the env vars.
return {}
return dict(
CATAPULT_DASHBOARD_MASTER=master_name, CATAPULT_DASHBOARD_BOT=bot_name
)