| # Copyright 2019 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Recipe for running zbi tests.""" |
| |
| import copy |
| import re |
| |
| from RECIPE_MODULES.fuchsia.swarming_retry import api as swarming_retry_api |
| |
| from PB.recipes.fuchsia.zbi_test import InputProperties |
| |
| PYTHON_VERSION_COMPATIBILITY = "PY3" |
| |
| DEPS = [ |
| "fuchsia/artifacts", |
| "fuchsia/autocorrelator", |
| "fuchsia/build", |
| "fuchsia/buildbucket_util", |
| "fuchsia/cas_util", |
| "fuchsia/checkout", |
| "fuchsia/emu", |
| "fuchsia/swarming_retry", |
| "fuchsia/symbolize", |
| "fuchsia/testing_requests", |
| "fuchsia/testsharder", |
| "recipe_engine/buildbucket", |
| "recipe_engine/file", |
| "recipe_engine/path", |
| "recipe_engine/platform", |
| "recipe_engine/properties", |
| "recipe_engine/step", |
| "recipe_engine/swarming", |
| ] |
| |
| PROPERTIES = InputProperties |
| |
| # How long to wait (in seconds) before forcibly terminating the test swarming |
| # task if there's no output being produced. |
| TEST_IO_TIMEOUT_SECS = 300 |
| |
| # How long a pending test swarming task waits to be scheduled on a bot. |
| # We should never expire a test task. This is currently 5 hours, but |
| # should be treated as infinite. |
| TEST_EXPIRATION_TIMEOUT_SECS = 18000 |
| |
| # How long the test is allowed to run before swarming cancels it. |
| TEST_EXECUTION_TIMEOUT_SECS = 600 |
| |
| BOTANIST_DEVICE_CONFIG = "/etc/botanist/config.json" |
| IMAGES_JSON = "images.json" |
| QEMU_KERNEL_NAME = "qemu-kernel" |
| |
| # The log level to use for botanist invocations in test tasks. Can be one of |
| # "fatal", "error", "warning", "info", "debug", or "trace", where "trace" is |
| # the most verbose, and fatal is the least. |
| BOTANIST_LOG_LEVEL = "debug" |
| |
| SPECIAL_FAILURE_LOG_STRINGS = ( |
| "ZIRCON KERNEL PANIC", |
| "ZIRCON KERNEL OOPS", |
| "DEVICE SUSPEND TIMED OUT", |
| "ASSERT FAILED", |
| ) |
| SPECIAL_FAILURE_LOG_STRINGS_POSSIBLY_FOUND_IN_TESTS = ( |
| "DEVICE SUSPEND TIMED OUT", |
| "ASSERT FAILED", |
| ) |
| SPECIAL_FAILURE_LOG_PATTERNS = (re.compile(r"ERROR: [A-Za-z]+Sanitizer"),) |
| |
| |
| def prebuilt_path(api, checkout_root, *path): |
| """Returns the Path to the host-platform subdir under the given subdirs.""" |
| path = list(path) |
| path.append( |
| "{os}-{arch}".format( |
| os=api.platform.name, |
| arch={"intel": "x64"}[api.platform.arch], |
| ) |
| ) |
| return checkout_root.join("prebuilt", *path) |
| |
| |
| def match_special_failure(output, test_output=False): |
| """Check the given string for known special failure patterns. |
| Args: |
| output (str): the output to search. |
| test_output (bool): whether the output is that from a test, in which case |
| specific failure modes are excluded, as they may result from an |
| intentional assertion or death test case. |
| |
| Returns: |
| None or a string that can be used in `failure_reason`. |
| """ |
| failure_strings = SPECIAL_FAILURE_LOG_STRINGS |
| if test_output: |
| failure_strings = [ |
| s |
| for s in failure_strings |
| if s not in SPECIAL_FAILURE_LOG_STRINGS_POSSIBLY_FOUND_IN_TESTS |
| ] |
| |
| for s in failure_strings: |
| if s in output: |
| return s |
| for regex in SPECIAL_FAILURE_LOG_PATTERNS: # pragma: nocover |
| match = regex.search(output) |
| if match: |
| return match.group(0) |
| return None |
| |
| |
| class Task(swarming_retry_api.TriggeredTask): |
| |
| # TODO(fxbug.dev/50072) The task request construction logic in this method |
| # should be abstracted into a helper in api.testing_requests so, for example, |
| # the QEMU configuration here does not diverge from the QEMU configurations |
| # elsewhere. |
| def __init__( |
| self, |
| api, |
| name, |
| task_requester, |
| build_results, |
| zbi_test, |
| qemu_kernel, |
| zedboot_images, |
| device_type, |
| **kwargs |
| ): |
| self._request = api.swarming.task_request().with_name(name) |
| super(Task, self).__init__(self._request, api, **kwargs) |
| self._checkout = build_results.checkout |
| self._gn_results = build_results.gn_results |
| |
| is_emu_type = api.emu.is_emulator_type(device_type) |
| images = [zbi_test] |
| if is_emu_type: |
| images.append(qemu_kernel) |
| # Hack. Botanist's QEMU codepath currently expects to run something with |
| # a name of "zircon-a" and a type of "zbi". |
| zbi_test["name"] = "zircon-a" |
| zbi_test["type"] = "zbi" |
| images.extend(zedboot_images) |
| |
| task_input_tree = api.cas_util.tree(root=api.path.mkdtemp("isolate")) |
| relative_cwd = api.path.relpath( |
| self._gn_results.build_dir, self._checkout.root_dir |
| ) |
| api.file.ensure_directory( |
| "ensure relative cwd", task_input_tree.root.join(relative_cwd) |
| ) |
| for img in images: |
| task_input_tree.register_link( |
| target=self._gn_results.build_dir.join(img["path"]), |
| linkname=task_input_tree.root.join(relative_cwd, img["path"]), |
| ) |
| |
| build_results.images = images |
| image_manifest_path = task_input_tree.root.join(relative_cwd, IMAGES_JSON) |
| api.file.write_json( |
| "write image manifest", image_manifest_path, images, indent=2 |
| ) |
| |
| cmd = [ |
| # botanist will run the following as a subprocess. seriallistener |
| # is responsible for reading in the serial output that botanist |
| # forwards to a socket; it will exit(0) if it sees it or else it |
| # will time out and fail. |
| "./seriallistener", |
| "-timeout", |
| "%ss" % zbi_test.get("timeout", TEST_EXECUTION_TIMEOUT_SECS), |
| # If of emulator_type, then we are already redirecting serial to |
| # stdout. Do not double redirect. |
| "-stdout=%s" % (not is_emu_type), |
| "-success-str", |
| zbi_test["success_string"], |
| ] |
| |
| zbi_shard = api.testsharder.Shard( |
| name, [], dimensions={"device_type": device_type} |
| ) |
| self._request = task_requester.request( |
| zbi_shard, |
| build_results, |
| cmd, |
| task_input_tree, |
| ["seriallistener"], |
| IMAGES_JSON, |
| BOTANIST_LOG_LEVEL, |
| relative_cwd, |
| ) |
| |
| def process_result(self, attempt): |
| assert attempt.result |
| result = attempt.result |
| |
| symbolizer_tool = self._gn_results.tool("symbolizer") |
| clang_dir = prebuilt_path( |
| self._api, self._checkout.root_dir, "third_party", "clang" |
| ) |
| build_id_dirs = ( |
| self._gn_results.build_dir.join(".build-id"), |
| clang_dir.join("lib", "debug", ".build-id"), |
| ) |
| |
| attempt.logs[ |
| self._api.testing_requests.TEST_TASK_OUTPUT_FILE |
| ] = self._api.symbolize( |
| symbolizer_tool=symbolizer_tool, |
| build_id_dirs=build_id_dirs, |
| data=result.output, |
| name="symbolize %s" % self._api.testing_requests.TEST_TASK_OUTPUT_FILE, |
| ) |
| |
| serial_log_name = self._api.testing_requests.SERIAL_LOG_NAME |
| if serial_log_name in result.outputs: |
| serial_log_contents = self._api.file.read_text( |
| "read %s" % serial_log_name, |
| result.outputs[serial_log_name], |
| include_log=False, |
| test_data="[00004.791] 00000.01025> CPU 1: 01:00:02:02:02:02", |
| ) |
| attempt.logs[serial_log_name] = self._api.symbolize( |
| symbolizer_tool=symbolizer_tool, |
| build_id_dirs=build_id_dirs, |
| data=serial_log_contents, |
| name="symbolize %s" % serial_log_name, |
| ) |
| |
| # A kernel panic may be present in the logs even if the task timed |
| # out, so check for that first. |
| special_failure = match_special_failure(result.output, test_output=True) |
| if special_failure is not None: |
| attempt.failure_reason = special_failure |
| |
| def present_attempt(self, task_step, attempt, **kwargs): |
| del task_step, kwargs # Unused. |
| name = "%s (%s)" % (attempt.name, "pass" if attempt.success else "fail") |
| step = self._api.step.empty(name) |
| step.presentation.step_summary_text = attempt.failure_reason |
| step.presentation.links["task UI"] = attempt.task_ui_link |
| |
| for log, data in sorted(attempt.logs.items()): |
| step.presentation.logs[log] = data |
| |
| |
| def RunSteps(api, props): |
| """Builds and executes Zircon tests in QEMU on a different machine.""" |
| |
| assert props.manifest |
| assert props.remote |
| checkout = api.checkout.fuchsia_with_options( |
| manifest=props.manifest, |
| remote=props.remote, |
| ) |
| |
| build_results = api.build.with_options( |
| checkout=checkout, |
| fint_params_path=props.fint_params_path, |
| ) |
| |
| if not build_results: |
| step = api.step.empty("build is unaffected") |
| step.presentation.properties["skipped_because_unaffected"] = True |
| return |
| |
| zedboot_imgs = build_results.zedboot_images |
| qemu_kernels = build_results.zbi_test_qemu_kernel_images |
| |
| with api.step.nest("record affected_tests_no_work") as presentation: |
| presentation.properties["affected_tests_no_work"] = build_results.no_work |
| if build_results.no_work: |
| return |
| |
| if props.artifact_gcs_bucket: |
| api.artifacts.gcs_bucket = props.artifact_gcs_bucket |
| api.artifacts.namespace = api.buildbucket_util.id |
| api.artifacts.upload("upload artifacts", build_results) |
| |
| task_requester = api.testing_requests.get_task_requester( |
| buildbucket_build=api.buildbucket.build, |
| pave=False, |
| pool=props.test_pool, |
| swarming_expiration_timeout_secs=TEST_EXPIRATION_TIMEOUT_SECS, |
| swarming_io_timeout_secs=TEST_IO_TIMEOUT_SECS, |
| timeout_secs=TEST_EXECUTION_TIMEOUT_SECS, |
| use_runtests=False, |
| default_service_account="", |
| targets_serial=True, |
| catapult_dashboard_master="", |
| catapult_dashboard_bot="", |
| release_branch="", |
| release_version="", |
| zircon_args=[], |
| ) |
| |
| tasks = [] |
| with api.step.nest("prepare tests"): |
| for zbi_test in sorted( |
| build_results.gn_results.zbi_tests, key=lambda t: t["name"] |
| ): |
| name = zbi_test["name"] |
| if zbi_test.get("disabled", False): |
| continue # pragma: no cover |
| allowed = set(props.allowed_device_types) |
| specified = set(zbi_test.get("device_types", ["QEMU"])) |
| device_types = allowed.intersection(specified) |
| |
| for device_type in sorted(device_types): |
| task_name = "%s - %s" % (name, device_type) |
| with api.step.nest(task_name): |
| tasks.append( |
| Task( |
| api, |
| name=task_name, |
| task_requester=task_requester, |
| build_results=build_results, |
| # Copy the image objects, as they are shared across different |
| # task requests and each may need to modify its object. |
| zbi_test=copy.deepcopy(zbi_test), |
| # Non-emulator tasks won't have a QEMU kernel |
| # image, hence get(). |
| qemu_kernel=copy.deepcopy(qemu_kernels.get(name)), |
| zedboot_images=zedboot_imgs, |
| device_type=device_type, |
| ) |
| ) |
| |
| try: |
| api.swarming_retry.run_and_present_tasks( |
| tasks, |
| collect_output_dir=api.path.mkdtemp("swarming"), |
| max_attempts=props.max_attempts_per_test, |
| ) |
| except api.step.StepFailure as exc: |
| if api.buildbucket_util.is_tryjob: |
| with api.step.nest("check for correlated failures") as parent_step: |
| api.autocorrelator.check_try( |
| "check try", |
| exc, |
| exc.reason, |
| ignore_failed_build=True, |
| ignore_skipped_tests=True, |
| ) |
| api.autocorrelator.check_ci( |
| "check ci", |
| checkout.integration_revision, |
| exc, |
| exc.reason, |
| ) |
| api.autocorrelator.set_properties(parent_step) |
| raise api.autocorrelator.compose_exception(exc) |
| |
| |
| def GenTests(api): |
| def test(name, zbi_test=None, output=(), tryjob=False, status="success"): |
| test = api.buildbucket_util.test(name, tryjob=tryjob, status=status) |
| test += api.checkout.source_info( |
| [ |
| { |
| "name": "integration", |
| "remote": "https://fuchsia.googlesource.com/integration", |
| "revision": "a491082dc1b632bbcd60ba3618d20b503c2de738", |
| "relativePath": "integration", |
| }, |
| { |
| "name": "fuchsia", |
| "remote": "https://fuchsia.googlesource.com/fuchsia", |
| "revision": "a491082dc1b632bbcd60ba3618d20b503c2de738", |
| "relativePath": ".", |
| }, |
| ] |
| ) |
| test += api.properties( |
| manifest="manifest", |
| remote="https://fuchsia.googlesource.com/fuchsia", |
| fint_params_path="fint_params/zbi_test-arm64.textproto", |
| ninja_targets=["bundles:infratools"], |
| allowed_device_types=["QEMU"], |
| test_pool="fuchsia.tests", |
| artifact_gcs_bucket="fuchsia-artifacts", |
| ) |
| if not zbi_test: |
| return test |
| |
| device_types = zbi_test.get("device_types", ["QEMU"]) |
| test += api.properties(allowed_device_types=device_types) |
| test += api.build.fint_set_artifacts( |
| metadata=dict( |
| optimize="debug", |
| product="products/bringup.gni", |
| target_arch=zbi_test["cpu"], |
| variants=["clang"], |
| ) |
| ) |
| test += api.build.fint_build_artifacts( |
| zbi_test_qemu_kernel_images={ |
| "zbi-test-arm64": {"path": "zbi-test-arm64.bin", "type": "bin"}, |
| } |
| ) |
| test += api.step_data( |
| "prepare tests.read zbi test manifest", |
| api.file.read_json([zbi_test]), |
| ) |
| |
| def get_task_data(task_name, task_id, output): |
| if zbi_test["success_string"] not in output: |
| return api.swarming_retry.failed_task( |
| task_name, task_id=task_id, output=output |
| ) |
| return api.swarming_retry.passed_task( |
| task_name, |
| task_id=task_id, |
| output=output, |
| outputs=[api.testing_requests.SERIAL_LOG_NAME], |
| ) |
| |
| task_id = 123 |
| task_data = [] |
| task_retry_data = [] |
| failed_first_attempt = len(output) > 1 |
| for device in device_types: |
| task_name = "%s - %s" % (zbi_test["name"], device) |
| test += api.swarming_retry.trigger_data( |
| name=task_name, task_id=task_id, iteration=0 |
| ) |
| task_data.append( |
| get_task_data(task_name, task_id=task_id, output=output[0]) |
| ) |
| if failed_first_attempt: |
| task_id += 1 |
| test += api.swarming_retry.trigger_data( |
| name=task_name, task_id=task_id, iteration=1 |
| ) |
| task_retry_data.append( |
| get_task_data(task_name, task_id=task_id, output=output[1]) |
| ) |
| task_id += 1 |
| |
| test += api.swarming_retry.collect_data(task_data, iteration=0) |
| if failed_first_attempt: |
| test += api.swarming_retry.collect_data(task_retry_data, iteration=1) |
| return test |
| |
| zbi_test = { |
| "cpu": "arm64", |
| "name": "zbi-test-arm64", |
| "type": "zbi", |
| "success_string": "purple monkey dishwasher", |
| "path": "zbi-test-arm64.zbi", |
| "device_types": ["QEMU", "AEMU", "DEVICE_TYPE"], |
| "timeout": 60, |
| "qemu_kernel_label": "//build/images:qemu_kernel_override", |
| } |
| yield test("pass", zbi_test, [zbi_test["success_string"]]) |
| |
| yield ( |
| test( |
| "fail", |
| zbi_test, |
| ["not success", "not success"], |
| tryjob=True, |
| status="failure", |
| ) |
| + api.autocorrelator.check_try( |
| "check for correlated failures.check try", |
| test_data=[{"build_id": "456", "score": 0.98, "is_green": False}], |
| ) |
| + api.autocorrelator.check_ci( |
| "check for correlated failures.check ci", |
| test_data={ |
| "build_id": "789", |
| "score": 0.95, |
| "is_green": False, |
| "commit_dist": 0, |
| }, |
| ) |
| + api.properties(**{"$fuchsia/autocorrelator": {"ci_bucket": "ci"}}) |
| ) |
| |
| yield test("flake", zbi_test, ["not success", zbi_test["success_string"]]) |
| |
| # Including success string and "ZIRCON KERNEL PANIC" in output to clarify |
| # that this fails because of the kernel panic and not because the |
| # success string is not present. |
| output = zbi_test["success_string"] + "ZIRCON KERNEL PANIC" |
| yield test("kernel_panic", zbi_test, [output, output], status="failure") |
| |
| yield ( |
| test("affected_tests_no_work", tryjob=True) |
| + api.properties(fint_params_path="specs/zbi_test-x64.textproto") |
| + api.build.fint_set_artifacts(skip_build=False) |
| + api.build.fint_build_artifacts(build_not_affected=True) |
| ) |
| |
| yield ( |
| test("skip_if_unaffected", tryjob=True) |
| + api.build.fint_set_artifacts(skip_build=True) |
| ) |