| # Copyright 2020 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import attr |
| |
| import collections |
| import datetime |
| |
| from recipe_engine import recipe_api |
| from PB.go.chromium.org.luci.buildbucket.proto import build as build_pb2 |
| from PB.go.chromium.org.luci.buildbucket.proto import common as common_pb2 |
| |
| from RECIPE_MODULES.fuchsia.utils import pluralize |
| |
| # As of 2021-10-19, the `api.buildbucket.collect()` timeout defaults to |
| # something too short. We never want subbuild collection to timeout; we'd rather |
| # the whole build time out. |
| COLLECT_TIMEOUT = datetime.timedelta(hours=24) |
| |
| # We implicitly rely on a select few properties being passed through |
| # from the parent build to the subbuild. |
| # NOTE: Only dynamically computed properties should be passed through |
| # automatically. Any static properties (i.e. properties that don't change |
| # between builds) should be configured explicitly on the subbuild's builder |
| # rather than implicitly passing them through. |
| PASS_THROUGH_PROPERTIES = { |
| # Used by toolchain recipes to pass custom toolchain info. |
| "$fuchsia/build", |
| # Used by toolchain recipes to pass commit info. |
| "$fuchsia/checkout", |
| # recipe_bootstrap may pass some properties to subbuilds via this field. |
| "$fuchsia/recipe_bootstrap", |
| # If a parent build is running under recipe testing, then the |
| # subbuild is as well, and it should be made aware of that. |
| "$fuchsia/recipe_testing", |
| # Needed for subbuilds to determine how the parent was invoked in CQ |
| # (e.g. whether it's a dry run or full run). |
| "$recipe_engine/cq", |
| # Set by recipe_bootstrap. A subbuild should inherit the same |
| # integration base revision as its parent so the two builds use the same |
| # version of recipes and properties. |
| "integration_base_revision", |
| # Set by `led edit-recipe-bundle -property-only` to point to the uploaded |
| # recipe bundle. It's then up to recipe_bootstrap to download the recipe |
| # bundle and execute it. A led subbuild should use the same recipe bundle |
| # version as its parent. |
| "led_cas_recipe_bundle", |
| } |
| |
| |
| @attr.s |
| class SubbuildResult(object): |
| """Subbuild result metadata.""" |
| |
| builder = attr.ib(type=str) |
| build_id = attr.ib(type=str) |
| url = attr.ib(type=str, default=None) |
| build_proto = attr.ib(type=build_pb2.Build, default=None) |
| |
| |
| class SubbuildApi(recipe_api.RecipeApi): |
| """API for launching subbuilds and collecting the results.""" |
| |
| def launch( |
| self, |
| builder_names, |
| presentation, |
| extra_properties=None, |
| set_swarming_parent_run_id=True, |
| hide_in_gerrit=True, |
| ): |
| """Launches builds with buildbucket or led. |
| |
| If the current task was launched with led, then subbuilds will also be |
| launched with led. |
| |
| Args: |
| builder_names (list(str)): The names of the builders to launch. |
| presentation (StepPresentation): The presentation to add logs to. |
| extra_properties (dict): The extra set of properties to launch the |
| builders with. These will override the parent properties that will be |
| passed to the children by default. |
| set_swarming_parent_run_id (bool): Whether to set swarming parent run |
| ID on buildbucket builds. |
| hide_in_gerrit (bool): Hide buildbucket subbuilds in the Gerrit UI. |
| |
| Returns: |
| launched_builds (dict): The launched_builds is a map from builder name |
| to the corresponding SubbuildResult. |
| """ |
| parent_properties = self.m.properties.thaw() |
| properties = { |
| key: val |
| for key, val in parent_properties.items() |
| if key and key in PASS_THROUGH_PROPERTIES |
| } |
| if extra_properties: |
| properties.update(extra_properties) |
| |
| # If this task was launched by led, we launch the child with led as well. |
| # This lets us ensure that the parent and child use the same version of |
| # the recipes code. That is a requirement for testing, as well as for |
| # avoiding the need to do soft transitions when updating the interface |
| # between the parent and child recipes. |
| if self.m.led.launched_by_led: |
| builds = self._launch_with_led(builder_names, properties) |
| else: |
| builds = self._launch_with_buildbucket( |
| builder_names, |
| properties, |
| set_swarming_parent_run_id=set_swarming_parent_run_id, |
| hide_in_gerrit=hide_in_gerrit, |
| ) |
| for builder, build in builds.items(): |
| presentation.links[builder] = build.url |
| return builds |
| |
| def _launch_with_led(self, builder_names, properties): |
| parent = self.m.buildbucket.build.builder |
| edit_args = [] |
| for k, v in sorted(properties.items()): |
| edit_args.extend(["-p", "%s=%s" % (k, self.m.json.dumps(v))]) |
| edit_cr_cl_arg = None |
| bb_input = self.m.buildbucket.build_input |
| if bb_input.gerrit_changes: |
| gerrit_change = bb_input.gerrit_changes[0] |
| edit_cr_cl_arg = "https://%s/c/%s/+/%d" % ( |
| gerrit_change.host, |
| gerrit_change.project, |
| gerrit_change.change, |
| ) |
| |
| builds = {} |
| for builder_name in builder_names: |
| led_data = self.m.led( |
| "get-builder", |
| # By default, led reduces the priority of tasks from their |
| # values in buildbucket which we do not want. |
| "-adjust-priority", |
| "0", |
| "luci.%s.%s:%s" % (parent.project, parent.bucket, builder_name), |
| ) |
| led_data = led_data.then("edit", *edit_args) |
| if edit_cr_cl_arg: |
| led_data = led_data.then("edit-cr-cl", edit_cr_cl_arg) |
| led_data = self.m.led.inject_input_recipes(led_data) |
| launch_res = led_data.then("launch", "-modernize") |
| task_id = launch_res.launch_result.task_id |
| build_url = "https://ci.chromium.org/swarming/task/%s?server=%s" % ( |
| task_id, |
| launch_res.launch_result.swarming_hostname, |
| ) |
| builds[builder_name] = SubbuildResult( |
| builder=builder_name, build_id=task_id, url=build_url |
| ) |
| return builds |
| |
| def _launch_with_buildbucket( |
| self, |
| builder_names, |
| properties, |
| set_swarming_parent_run_id=True, |
| hide_in_gerrit=True, |
| ): |
| reqs = [] |
| swarming_parent_run_id = ( |
| self.m.swarming.task_id if set_swarming_parent_run_id else None |
| ) |
| bb_tags = {"skip-retry-in-gerrit": "subbuild"} |
| if hide_in_gerrit: |
| bb_tags["hide-in-gerrit"] = "subbuild" |
| for builder_name in builder_names: |
| reqs.append( |
| self.m.buildbucket.schedule_request( |
| builder=builder_name, |
| properties=properties, |
| swarming_parent_run_id=swarming_parent_run_id, |
| priority=None, # Leave unset to avoid overriding priority from configs. |
| tags=self.m.buildbucket.tags(**bb_tags), |
| ) |
| ) |
| |
| def schedule(): |
| return self.m.buildbucket.schedule(reqs, step_name="schedule") |
| |
| # TODO(crbug.com/1254455): Stop retrying when the bb tool does its own |
| # internal retries for all transient failures. |
| # |
| # We shouldn't retry if we're scheduling multiple builds, because some |
| # of the builds might have successfully been scheduled on the first |
| # attempt even if others failed, and we don't want to launch multiple |
| # copies of each. |
| max_attempts = 1 if len(reqs) > 1 else 3 |
| scheduled_builds = self.m.utils.retry(schedule, max_attempts=max_attempts) |
| |
| builds = {} |
| for build in scheduled_builds: |
| build_url = "https://ci.chromium.org/b/%s" % build.id |
| builds[build.builder.builder] = SubbuildResult( |
| builder=build.builder.builder, build_id=build.id, url=build_url |
| ) |
| return builds |
| |
| def collect(self, build_ids, launched_by_led=None, extra_fields=frozenset()): |
| """Collects builds with the provided build_ids. |
| |
| Args: |
| build_ids (list(str)): The list of build ids to collect results for. |
| presentation (StepPresentation): The presentation to add logs to. |
| launched_by_led (bool|None): Whether the builds to collect were |
| launched by led. If None, then this value will be determined by |
| whether the current task was launched by led. |
| extra_fields (set): Extra fields to include in the buildbucket |
| response. |
| |
| Returns: |
| A map from build IDs to the corresponding SubbuildResult. |
| """ |
| if launched_by_led is None: |
| launched_by_led = self.m.led.launched_by_led |
| if launched_by_led: |
| builds = self._collect_from_led(build_ids) |
| else: |
| builds = self._collect_from_buildbucket(build_ids, extra_fields) |
| return collections.OrderedDict( |
| sorted(builds.items(), key=lambda item: (item[1].builder, item[0])) |
| ) |
| |
| def get_property(self, build_proto, property_name): |
| """Retrieve an output property from a subbuild's Build proto. |
| |
| Ensures a clear and unified missing property error message across all |
| builders that use this recipe module. |
| """ |
| try: |
| return build_proto.output.properties[property_name] |
| except ValueError: |
| raise self.m.step.InfraFailure( |
| "Subbuild did not set the %r output property" % property_name |
| ) |
| |
| def _collect_from_led(self, task_ids): |
| swarming_results = self.m.swarming.collect( |
| "collect", task_ids, output_dir=self.m.path["cleanup"] |
| ) |
| builds = {} |
| for result in swarming_results: |
| task_id = result.id |
| # Led launch ensures this file is present in the task root dir. |
| build_proto_path = result.output_dir.join("build.proto.json") |
| build_proto = self.m.file.read_proto( |
| "read build.proto.json", build_proto_path, build_pb2.Build, "JSONPB" |
| ) |
| builds[task_id] = SubbuildResult( |
| builder=build_proto.builder.builder, |
| build_id=task_id, |
| build_proto=build_proto, |
| ) |
| return builds |
| |
| def _collect_from_buildbucket(self, build_ids, extra_fields): |
| bb_fields = self.m.buildbucket.DEFAULT_FIELDS.union( |
| {"infra.swarming.task_id", "summary_markdown"} |
| ).union(extra_fields) |
| |
| def collect(): |
| return self.m.buildbucket.collect_builds( |
| [int(build_id) for build_id in build_ids], |
| interval=20, # Lower from default of 60 b/c we're impatient. |
| timeout=COLLECT_TIMEOUT, |
| step_name="collect", |
| fields=bb_fields, |
| ) |
| |
| # TODO(crbug.com/1254455): Stop retrying when the bb tool does its own |
| # internal retries for all transient failures. |
| builds = self.m.utils.retry(collect, max_attempts=3) |
| |
| failed_builds = [b for b in builds.values() if b.status != common_pb2.SUCCESS] |
| if failed_builds: |
| task_ids = [b.infra.swarming.task_id for b in failed_builds] |
| # Make sure task IDs are non-empty. |
| assert all(task_ids), task_ids |
| |
| # Wait for the underlying Swarming tasks to complete. The Swarming |
| # task for a Buildbucket build can take significantly longer to |
| # complete than the build itself due to post-processing outside the |
| # scope of the build's recipe (e.g. cache pruning). If the parent |
| # build and its Swarming task both complete before the subbuild's |
| # Swarming task finishes post-processing, then the subbuild's |
| # Swarming task will be killed by Swarming due to the parent being |
| # complete. |
| # |
| # That is actually working as intended. However, it's confusing for |
| # a subbuild to be marked as killed when the recipe actually exited |
| # normally; "killed" usually only happens for CQ builds, when a |
| # build is canceled by CQ because a new patchset of the triggering |
| # CL is uploaded. So it's convenient to have dashboards and queries |
| # ignore "killed" tasks. We use this workaround to ensure that |
| # failed subbuilds with long post-processing steps have time to |
| # complete and exit cleanly with a plain old "COMPLETED (FAILURE)" |
| # status. |
| # |
| # We only do this if the subbuild failed as a latency optimization. |
| # If all subbuilds passed, the parent will go on to do some more |
| # steps using the results of the subbuilds, leaving time for the |
| # subbuilds' tasks to complete asynchronously, so we don't want to |
| # block here while the tasks complete. |
| self.m.swarming.collect( |
| "wait for %s to complete" % pluralize("task", task_ids), task_ids |
| ) |
| for build_id, build in builds.items(): |
| builds[build_id] = SubbuildResult( |
| builder=build.builder.builder, build_id=build_id, build_proto=build |
| ) |
| return builds |