| # Copyright 2019 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Recipe for running zbi tests.""" |
| |
| import copy |
| import re |
| |
| from recipe_engine.config import Enum, List |
| from recipe_engine.post_process import StatusSuccess, StatusFailure |
| from recipe_engine.recipe_api import Property |
| |
| from RECIPE_MODULES.fuchsia.swarming_retry import api as swarming_retry_api |
| from RECIPE_MODULES.fuchsia.testing import api as testing_api |
| from RECIPE_MODULES.fuchsia.testing_requests import api as testing_requests_api |
| |
| DEPS = [ |
| "fuchsia/build", |
| "fuchsia/checkout", |
| "fuchsia/emu", |
| "fuchsia/recipe_testing", |
| "fuchsia/status_check", |
| "fuchsia/swarming_retry", |
| "fuchsia/symbolize", |
| "fuchsia/testing_requests", |
| "recipe_engine/buildbucket", |
| "recipe_engine/cipd", |
| "recipe_engine/context", |
| "recipe_engine/file", |
| "recipe_engine/isolated", |
| "recipe_engine/json", |
| "recipe_engine/path", |
| "recipe_engine/platform", |
| "recipe_engine/properties", |
| "recipe_engine/step", |
| "recipe_engine/swarming", |
| ] |
| |
| TARGETS = ["x64", "arm64"] |
| |
| # How long to wait (in seconds) before killing the test swarming task if there's |
| # no output being produced. |
| TEST_IO_TIMEOUT_SECS = 180 |
| |
| # How long a pending test swarming task waits to be scheduled on a bot. |
| # We should never expire a test task. This is currently 5 hours, but |
| # should be treated as infinite. |
| TEST_EXPIRATION_TIMEOUT_SECS = 18000 |
| |
| # How long the test is allowed to run before swarming kills it. |
| TEST_EXECUTION_TIMEOUT_SECS = 600 |
| |
| BOTANIST_DEVICE_CONFIG = "/etc/botanist/config.json" |
| IMAGES_JSON = "images.json" |
| QEMU_KERNEL_NAME = "qemu-kernel" |
| |
| # The log level to use for botanist invocations in test tasks. Can be one of |
| # "fatal", "error", "warning", "info", "debug", or "trace", where "trace" is |
| # the most verbose, and fatal is the least. |
| BOTANIST_LOG_LEVEL = "debug" |
| |
| SPECIAL_FAILURE_LOG_STRINGS = ( |
| "ZIRCON KERNEL PANIC", |
| "ZIRCON KERNEL OOPS", |
| "DEVICE SUSPEND TIMED OUT", |
| "ASSERT FAILED", |
| ) |
| SPECIAL_FAILURE_LOG_STRINGS_POSSIBLY_FOUND_IN_TESTS = ( |
| "DEVICE SUSPEND TIMED OUT", |
| "ASSERT FAILED", |
| ) |
| SPECIAL_FAILURE_LOG_PATTERNS = (re.compile(r"ERROR: [A-Za-z]+Sanitizer"),) |
| # These files may contain the output of tests, and thus |
| # SPECIAL_FAILURE_LOG_STRINGS_POSSIBLY_FOUND_IN_TESTS applies. |
| LOG_FILES_WITH_TEST_OUTPUT = ( |
| testing_api.TEST_TASK_OUTPUT_FILE, |
| testing_requests_api.SERIAL_LOG_NAME, |
| ) |
| |
| PROPERTIES = { |
| "manifest": Property(kind=str, help="Jiri manifest to use"), |
| "remote": Property(kind=str, help="Remote manifest repository"), |
| "target_cpu": Property(kind=Enum(*TARGETS), help="Target to build"), |
| "variants": Property(kind=List(basestring), help="Variants to use"), |
| "allowed_device_types": Property( |
| kind=List(basestring), help="Allowed device types to run on" |
| ), |
| "test_pool": Property( |
| kind=str, |
| help="The swarming pool to run test tasks in", |
| default="fuchsia.tests", |
| ), |
| "gn_args": Property( |
| kind=List(basestring), help="GN args to pass to build", default=[] |
| ), |
| "use_goma": Property( |
| kind=bool, help="Whether to use goma for the build", default=True |
| ), |
| } |
| |
| |
| def prebuilt_path(api, checkout_root, *path): |
| """Returns the Path to the host-platform subdir under the given subdirs.""" |
| path = list(path) |
| path.append( |
| "{os}-{arch}".format( |
| os=api.platform.name, arch={"intel": "x64"}[api.platform.arch], |
| ) |
| ) |
| return checkout_root.join("prebuilt", *path) |
| |
| |
| def match_special_failure(output, test_output=False): |
| """Check the given string for known special failure patterns. |
| Args: |
| output (str): the output to search. |
| test_output (bool): whether the output is that from a test, in which case |
| specific failure modes are excluded, as they may result from an |
| intentional assertion or death test case. |
| |
| Returns: |
| None or a string that can be used in `failure_reason`. |
| """ |
| failure_strings = SPECIAL_FAILURE_LOG_STRINGS |
| if test_output: |
| failure_strings = filter( |
| lambda s: s not in SPECIAL_FAILURE_LOG_STRINGS_POSSIBLY_FOUND_IN_TESTS, |
| failure_strings, |
| ) |
| |
| for s in failure_strings: |
| if s in output: |
| return s |
| for regex in SPECIAL_FAILURE_LOG_PATTERNS: # pragma: nocover |
| match = regex.search(output) |
| if match: |
| return match.group(0) |
| return None |
| |
| |
| class Task(swarming_retry_api.TriggeredTask): |
| |
| # TODO(fxbug.dev/50072) The task request construction logic in this method |
| # should be abstracted into a helper in api.testing_requests so, for example, |
| # the QEMU configuration here does not diverge from the QEMU configurations |
| # elsewhere. |
| def __init__( |
| self, |
| api, |
| name, |
| build_results, |
| zbi_test, |
| qemu_kernel, |
| zedboot_images, |
| device_type, |
| test_pool, |
| **kwargs |
| ): |
| self._request = api.swarming.task_request().with_name(name) |
| super(Task, self).__init__(self._request, api, **kwargs) |
| self._checkout = build_results.checkout |
| self._gn_results = build_results.gn_results |
| |
| is_emu_type = api.emu.is_emulator_type(device_type) |
| images = [zbi_test] |
| if is_emu_type: |
| images.append(qemu_kernel) |
| # Hack. Botanist's QEMU codepath currently expects to run something with |
| # a name of "zircon-a" and a type of "zbi". |
| zbi_test["name"] = "zircon-a" |
| zbi_test["type"] = "zbi" |
| images.extend(zedboot_images) |
| |
| isolate_tree = api.file.symlink_tree(root=api.path.mkdtemp("isolate")) |
| # The follow images might be coming from zircon build and thus be given at |
| # relative paths that leave the fuchsia build directory. This will not make |
| # sense when we translate these paths over to the test task, so we link |
| # these entries into the root of isolate tree to ensure that these paths |
| # remain sensible in any case. |
| for img in images: |
| new_path = api.path.basename(img["path"]) |
| isolate_tree.register_link( |
| target=self._gn_results.fuchsia_build_dir.join(img["path"]), |
| linkname=isolate_tree.root.join(new_path), |
| ) |
| img["path"] = new_path |
| |
| build_results.images = {image["name"]: image for image in images} |
| |
| # We emulate arm64 guests on arm64 hosts; in every other case we use an x64 |
| # host in the test task. |
| host_cpu = "x64" |
| if is_emu_type and build_results.target == "arm64": |
| host_cpu = "arm64" |
| for tool in ("bootserver_new", "botanist", "seriallistener"): |
| isolate_tree.register_link( |
| target=self._gn_results.tool(tool, host_cpu), |
| linkname=isolate_tree.root.join(tool), |
| ) |
| |
| image_manifest_path = isolate_tree.root.join(IMAGES_JSON) |
| api.file.write_json( |
| "write image manifest", image_manifest_path, images, indent=2 |
| ) |
| |
| ensure_file = api.cipd.EnsureFile() |
| if device_type == "QEMU": |
| api.emu.add_qemu_to_ensure_file( |
| ensure_file, checkout=self._checkout.root_dir, subdir="qemu" |
| ) |
| |
| if device_type == "AEMU": |
| api.emu.add_aemu_to_ensure_file( |
| ensure_file, checkout=self._checkout.root_dir, subdir="aemu/bin" |
| ) |
| |
| if is_emu_type: |
| dimensions = { |
| "pool": test_pool, |
| "os": "Debian", |
| "cpu": zbi_test["cpu"], |
| "kvm": "1", |
| } |
| config = "./qemu.json" |
| qemu_config = [ |
| { |
| "type": device_type.lower(), |
| "path": "./%s/bin" % device_type.lower(), |
| "target": zbi_test["cpu"], |
| "cpu": 4, |
| "memory": 4096, |
| "kvm": True, |
| "serial": True, |
| } |
| ] |
| api.file.write_json( |
| "write qemu config", |
| isolate_tree.root.join("qemu.json"), |
| qemu_config, |
| indent=2, |
| ) |
| else: |
| config = BOTANIST_DEVICE_CONFIG |
| dimensions = { |
| "pool": test_pool, |
| "device_type": device_type, |
| "serial": "1", |
| } |
| |
| cmd = [ |
| "./botanist", |
| "-level", |
| BOTANIST_LOG_LEVEL, |
| "run", |
| "-images", |
| IMAGES_JSON, |
| "-serial-log", |
| api.testing_requests.SERIAL_LOG_NAME, |
| "-config", |
| config, |
| "-netboot", |
| # botanist will run the following as a subprocess. seriallistener |
| # is responsible for reading in the serial output that botanist |
| # forwards to a socket; it will exit(0) if it sees it or else it |
| # will time out and fail. |
| "./seriallistener", |
| "-timeout", |
| "%ss" % zbi_test.get("timeout", TEST_EXECUTION_TIMEOUT_SECS), |
| # If of emulator_type, then we are already redirecting serial to |
| # stdout. Do not double redirect. |
| "-stdout=%s" % (not is_emu_type), |
| "-success-str", |
| zbi_test["success_string"], |
| ] |
| |
| isolate_tree.create_links("create tree of images") |
| isolated = api.isolated.isolated(isolate_tree.root) |
| isolated.add_dir(isolate_tree.root) |
| isolated_hash = isolated.archive("isolate images") |
| |
| outputs = [api.testing_requests.SERIAL_LOG_NAME] |
| |
| env_name = "%s-%s" % (device_type, zbi_test["cpu"]) |
| tags = { |
| "board": build_results.target, |
| "build_type": build_results.build_type, |
| "buildbucket_bucket": api.buildbucket.build.builder.bucket, |
| "buildbucket_builder": api.buildbucket.build.builder.builder, |
| "product": build_results.product, |
| "role": "tester", |
| "task_name": self.name, |
| api.testing_requests.TEST_ENVIRONMENT_TAG_NAME: env_name, |
| "variants": build_results.variants, |
| } |
| self._request = self._request.with_tags( |
| api.testing_requests.create_swarming_tags(tags) |
| ) |
| self._request = self._request.with_slice( |
| 0, |
| self._request[0] |
| .with_command(cmd) |
| .with_isolated(isolated_hash) |
| .with_dimensions(**dimensions) |
| .with_execution_timeout_secs(TEST_EXECUTION_TIMEOUT_SECS) |
| .with_expiration_secs(TEST_EXPIRATION_TIMEOUT_SECS) |
| .with_io_timeout_secs(TEST_IO_TIMEOUT_SECS) |
| .with_cipd_ensure_file(ensure_file) |
| .with_outputs(outputs) |
| .with_env_vars( |
| **api.testing_requests.test_task_env_vars( |
| api.buildbucket.build, |
| device_type, |
| build_results, |
| image_manifest=IMAGES_JSON, |
| ) |
| ), |
| ) |
| |
| def process_result(self, attempt): |
| assert attempt.result |
| result = attempt.result |
| |
| symbolize_tool = self._gn_results.tool("symbolize") |
| clang_dir = prebuilt_path( |
| self._api, self._checkout.root_dir, "third_party", "clang" |
| ) |
| llvm_symbolizer = self._gn_results.tool("llvm-symbolizer") |
| build_id_dirs = ( |
| self._gn_results.zircon_build_dir.join(".build-id"), |
| clang_dir.join("lib", "debug", ".build-id"), |
| ) |
| |
| with self._api.step.nest(result.name): |
| attempt.logs["symbolized log"] = self._api.symbolize( |
| symbolize_tool=symbolize_tool, |
| build_id_dirs=build_id_dirs, |
| llvm_symbolizer=llvm_symbolizer, |
| data=result.output, |
| ) |
| |
| # A kernel panic may be present in the logs even if the task timed |
| # out, so check for that first. |
| special_failure = match_special_failure(result.output, test_output=True) |
| if special_failure is not None: |
| attempt.failure_reason = special_failure |
| |
| def present_attempt(self, task_step, attempt, **kwargs): |
| del task_step, kwargs # Unused. |
| name = "%s (%s)" % (attempt.name, "pass" if attempt.success else "fail") |
| step = self._api.step(name, None) |
| step.presentation.step_summary_text = attempt.failure_reason |
| step.presentation.links["task UI"] = attempt.task_ui_link |
| |
| for log, data in attempt.logs.iteritems(): |
| step.presentation.logs[log] = data |
| |
| |
| def get_qemu_kernel(api, zbi_test, gn_results): |
| is_eligible_img = lambda img: img["name"] == QEMU_KERNEL_NAME |
| # A ZBI test may specify a `qemu_kernel_label` to point to an override of |
| # the standard QEMU kernel. |
| if "qemu_kernel_label" in zbi_test: |
| is_eligible_img = lambda img: img.get("label") == zbi_test["qemu_kernel_label"] |
| options = list(filter(is_eligible_img, gn_results.image_manifest)) |
| if len(options) != 1: # pragma: no cover |
| raise api.step.StepFailure( |
| "no QEMU kernel match found for %s. If 'qemu_kernel_label' is \ |
| specified precisely one image with that label must exist; else, \ |
| precisely one with the name of %s must exist" |
| % (zbi_test["name"], QEMU_KERNEL_NAME) |
| ) |
| qemu_kernel = copy.deepcopy(options[0]) |
| # We override the metadata as botanist explicitly looks for an |
| # image of name |QEMU_KERNEL_NAME| and type |kernel| to start a QEMU instance. |
| qemu_kernel["name"] = QEMU_KERNEL_NAME |
| qemu_kernel["type"] = "kernel" |
| return qemu_kernel |
| |
| |
| def zedboot_images(api, gn_results): |
| imgs = [ |
| img |
| for img in gn_results.image_manifest |
| if img.get("bootserver_pave_zedboot", []) |
| ] |
| if not imgs: # pragma: no cover |
| raise api.step.StepFailure("missing zedboot pave images") |
| return [copy.deepcopy(img) for img in imgs] |
| |
| |
| def RunSteps( |
| api, |
| manifest, |
| remote, |
| target_cpu, |
| variants, |
| allowed_device_types, |
| test_pool, |
| gn_args, |
| use_goma, |
| ): |
| """Builds and executes Zircon tests in QEMU on a different machine.""" |
| |
| with api.context(infra_steps=True): |
| assert manifest |
| assert remote |
| checkout = api.checkout.fuchsia_with_options( |
| path=api.path["start_dir"].join("fuchsia"), |
| build=api.buildbucket.build, |
| manifest=manifest, |
| remote=remote, |
| ) |
| |
| build_type = "debug" |
| product = "products/bringup.gni" |
| with api.step.nest("build"): |
| gn_results = api.build.gen( |
| checkout=checkout, |
| fuchsia_build_dir=checkout.root_dir.join("out", "default"), |
| target=target_cpu, |
| build_type=build_type, |
| product=product, |
| variants=variants, |
| # //bundles:infratools is necessary to build botanist. |
| packages=["//bundles:infratools"], |
| args=gn_args, |
| use_goma=use_goma, |
| ) |
| |
| # ZBI tests may specify another image to be run as the QEMU kernel. |
| # Ensure that it is built. |
| ninja_targets = set(["bundles:infratools"]) |
| path_targets = set() |
| qemu_kernels = {} |
| for name, zbi_test in gn_results.zbi_tests.iteritems(): |
| device_types = zbi_test.get("device_types", ["QEMU"]) |
| qemu_kernel = None |
| if any( |
| api.emu.is_emulator_type(device_type) for device_type in device_types |
| ): |
| qemu_kernel = get_qemu_kernel(api, zbi_test, gn_results) |
| path_targets.add(qemu_kernel["path"]) |
| qemu_kernels[name] = qemu_kernel |
| |
| # Ensure that zedboot image is built for flashing with fastboot prior to |
| # running task. |
| zedboot_imgs = zedboot_images(api, gn_results) |
| for img in zedboot_imgs: |
| path_targets.add(img["path"]) |
| |
| zircon_targets, fuchsia_targets = api.build.ninja( |
| gn_results=gn_results, |
| targets=ninja_targets, |
| path_targets=path_targets, |
| build_zbi_tests=True, |
| use_goma=use_goma, |
| ) |
| |
| build_results = api.build.build_results( |
| board=None, |
| target=target_cpu, |
| variants=variants, |
| build_type=build_type, |
| fuchsia_build_dir=gn_results.fuchsia_build_dir, |
| zircon_build_dir=gn_results.zircon_build_dir, |
| checkout=checkout, |
| product=product, |
| gn_results=gn_results, |
| fuchsia_targets=fuchsia_targets, |
| zircon_targets=zircon_targets, |
| ) |
| |
| _, no_work = build_results.calculate_affected_tests(api.buildbucket.build.input) |
| with api.step.nest("record affected_tests_no_work") as presentation: |
| presentation.properties["affected_tests_no_work"] = no_work |
| if no_work and not api.recipe_testing.enabled: |
| return |
| |
| tasks = [] |
| for name, zbi_test in gn_results.zbi_tests.iteritems(): |
| if zbi_test.get("disabled", False): |
| continue # pragma: no cover |
| allowed = set(allowed_device_types) |
| specified = set(zbi_test.get("device_types", ["QEMU"])) |
| device_types = allowed.intersection(specified) |
| |
| for device_type in device_types: |
| task_name = "%s - %s" % (name, device_type) |
| with api.step.nest("prepare test: %s" % task_name): |
| tasks.append( |
| Task( |
| api, |
| name=task_name, |
| build_results=build_results, |
| # Copy the image objects, as they are shared across different |
| # task requests and each may need to modify its object. |
| zbi_test=copy.deepcopy(zbi_test), |
| qemu_kernel=copy.deepcopy(qemu_kernels[name]), |
| zedboot_images=zedboot_imgs, |
| device_type=device_type, |
| test_pool=test_pool, |
| ) |
| ) |
| |
| api.swarming_retry.run_and_present_tasks( |
| tasks, collect_output_dir=api.path.mkdtemp("swarming") |
| ) |
| |
| |
| def GenTests(api): |
| def test(name, zbi_test, status, output): |
| device_types = zbi_test.get("device_types", ["QEMU"]) |
| |
| test = api.status_check.test(name, status=status) |
| test += api.buildbucket.ci_build( |
| git_repo="https://fuchsia.googlesource.com/fuchsia" |
| ) |
| |
| test += api.properties( |
| manifest="manifest", |
| remote="https://fuchsia.googlesource.com/fuchsia", |
| target_cpu=zbi_test["cpu"], |
| variants=["clang"], |
| allowed_device_types=device_types, |
| test_pool="fuchsia.tests", |
| ) |
| |
| test += api.step_data( |
| "build.read zbi test manifest", api.json.output([zbi_test]) |
| ) |
| |
| def get_task_data(task_name, device_type, task_id, output): |
| if zbi_test["success_string"] not in output: |
| return api.swarming_retry.failed_task( |
| task_name, task_id=task_id, output=output |
| ) |
| return api.swarming_retry.passed_task( |
| task_name, task_id=task_id, output=output |
| ) |
| |
| task_id = 123 |
| task_data = [] |
| task_retry_data = [] |
| failed_first_attempt = len(output) > 1 |
| for device in device_types: |
| task_name = "%s - %s" % (zbi_test["name"], device) |
| test += api.swarming_retry.trigger_data( |
| name=task_name, task_id=task_id, iteration=0 |
| ) |
| task_data.append( |
| get_task_data(task_name, device, task_id=task_id, output=output[0]) |
| ) |
| if failed_first_attempt: |
| task_id += 1 |
| test += api.swarming_retry.trigger_data( |
| name=task_name, task_id=task_id, iteration=1 |
| ) |
| task_retry_data.append( |
| get_task_data(task_name, device, task_id=task_id, output=output[1]) |
| ) |
| task_id += 1 |
| |
| test += api.swarming_retry.collect_data(task_data, iteration=0) |
| if failed_first_attempt: |
| test += api.swarming_retry.collect_data(task_retry_data, iteration=1) |
| return test |
| |
| for cpu in ["arm64", "x64"]: |
| zbi_test = api.build.mock_zbi_test(cpu, zircon=True) |
| success_string = zbi_test["success_string"] |
| yield ( |
| test(cpu + "-pass", zbi_test, "success", [success_string]) |
| + api.post_process(StatusSuccess) |
| ) |
| |
| yield ( |
| test(cpu + "-fail", zbi_test, "failure", ["not success", "not success"]) |
| + api.post_process(StatusFailure) |
| ) |
| |
| yield ( |
| test(cpu + "-flake", zbi_test, "success", ["not success", success_string]) |
| + api.post_process(StatusSuccess) |
| ) |
| |
| # Including success string and "ZIRCON KERNEL PANIC" in output to clarify |
| # that this fails because of the kernel panic and not because the |
| # success string is not present. |
| output = zbi_test["success_string"] + "ZIRCON KERNEL PANIC" |
| yield ( |
| test(cpu + "-kernel_panic", zbi_test, "failure", [output, output]) |
| + api.post_process(StatusFailure) |
| ) |
| |
| for cpu in ["arm64", "x64"]: |
| for zircon in [True, False]: |
| zbi_test = api.build.mock_zbi_test(cpu, override=True, zircon=zircon) |
| success_string = zbi_test["success_string"] |
| name = ("zircon-" if zircon else "fuchsia-") + cpu + "-override" |
| yield ( |
| test(name, zbi_test, "success", [success_string]) |
| + api.post_process(StatusSuccess) |
| ) |
| |
| yield ( |
| api.status_check.test("affected_tests_no_work", "success") |
| + api.properties( |
| manifest="manifest", |
| remote="https://fuchsia.googlesource.com/fuchsia", |
| target_cpu="x64", |
| variants=["clang"], |
| allowed_device_types=["QEMU"], |
| test_pool="fuchsia.tests", |
| ) |
| + api.buildbucket.try_build() |
| + api.step_data("affected tests.read no work status", api.json.output(True)) |
| ) |