recipe_modules/subbuild/api.py - infra/recipes - Git at Google

 # Copyright 2020 The Fuchsia Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import attr

 import collections
 import datetime

 from recipe_engine import recipe_api
 from PB.go.chromium.org.luci.buildbucket.proto import build as build_pb2
 from PB.go.chromium.org.luci.buildbucket.proto import common as common_pb2

 from RECIPE_MODULES.fuchsia.utils import pluralize

 # As of 2021-10-19, the `api.buildbucket.collect()` timeout defaults to
 # something too short. We never want subbuild collection to timeout; we'd rather
 # the whole build time out.
 COLLECT_TIMEOUT = datetime.timedelta(hours=24)

 # We implicitly rely on a select few properties being passed through
 # from the parent build to the subbuild.
 # NOTE: Only dynamically computed properties should be passed through
 # automatically. Any static properties (i.e. properties that don't change
 # between builds) should be configured explicitly on the subbuild's builder
 # rather than implicitly passing them through.
 PASS_THROUGH_PROPERTIES = {
     # Used by toolchain recipes to pass custom toolchain info.
     "$fuchsia/build",
     # Used by toolchain recipes to pass commit info.
     "$fuchsia/checkout",
     # recipe_bootstrap may pass some properties to subbuilds via this field.
     "$fuchsia/recipe_bootstrap",
     # If a parent build is running under recipe testing, then the
     # subbuild is as well, and it should be made aware of that.
     "$fuchsia/recipe_testing",
     # Needed for subbuilds to determine how the parent was invoked in CQ
     # (e.g. whether it's a dry run or full run).
     "$recipe_engine/cq",
     # Set by recipe_bootstrap. A subbuild should inherit the same
     # integration base revision as its parent so the two builds use the same
     # version of recipes and properties.
     "integration_base_revision",
     # Set by `led edit-recipe-bundle -property-only` to point to the uploaded
     # recipe bundle. It's then up to recipe_bootstrap to download the recipe
     # bundle and execute it. A led subbuild should use the same recipe bundle
     # version as its parent.
     "led_cas_recipe_bundle",
 }


 @attr.s
 class SubbuildResult(object):
     """Subbuild result metadata."""

     builder = attr.ib(type=str)
     build_id = attr.ib(type=str)
     url = attr.ib(type=str, default=None)
     build_proto = attr.ib(type=build_pb2.Build, default=None)


 class SubbuildApi(recipe_api.RecipeApi):
     """API for launching subbuilds and collecting the results."""

     def launch(
         self,
         builder_names,
         presentation,
         extra_properties=None,
         set_swarming_parent_run_id=True,
         hide_in_gerrit=True,
     ):
         """Launches builds with buildbucket or led.

         If the current task was launched with led, then subbuilds will also be
         launched with led.

         Args:
           builder_names (list(str)): The names of the builders to launch.
           presentation (StepPresentation): The presentation to add logs to.
           extra_properties (dict): The extra set of properties to launch the
             builders with. These will override the parent properties that will be
             passed to the children by default.
           set_swarming_parent_run_id (bool): Whether to set swarming parent run
             ID on buildbucket builds.
           hide_in_gerrit (bool): Hide buildbucket subbuilds in the Gerrit UI.

         Returns:
           launched_builds (dict): The launched_builds is a map from builder name
           to the corresponding SubbuildResult.
         """
         parent_properties = self.m.properties.thaw()
         properties = {
             key: val
             for key, val in parent_properties.items()
             if key and key in PASS_THROUGH_PROPERTIES
         }
         if extra_properties:
             properties.update(extra_properties)

         # If this task was launched by led, we launch the child with led as well.
         # This lets us ensure that the parent and child use the same version of
         # the recipes code. That is a requirement for testing, as well as for
         # avoiding the need to do soft transitions when updating the interface
         # between the parent and child recipes.
         if self.m.led.launched_by_led:
             builds = self._launch_with_led(builder_names, properties)
         else:
             builds = self._launch_with_buildbucket(
                 builder_names,
                 properties,
                 set_swarming_parent_run_id=set_swarming_parent_run_id,
                 hide_in_gerrit=hide_in_gerrit,
             )
         for builder, build in builds.items():
             presentation.links[builder] = build.url
         return builds

     def _launch_with_led(self, builder_names, properties):
         parent = self.m.buildbucket.build.builder
         edit_args = []
         for k, v in sorted(properties.items()):
             edit_args.extend(["-p", "%s=%s" % (k, self.m.json.dumps(v))])
         edit_cr_cl_arg = None
         bb_input = self.m.buildbucket.build_input
         if bb_input.gerrit_changes:
             gerrit_change = bb_input.gerrit_changes[0]
             edit_cr_cl_arg = "https://%s/c/%s/+/%d" % (
                 gerrit_change.host,
                 gerrit_change.project,
                 gerrit_change.change,
             )

         builds = {}
         for builder_name in builder_names:
             led_data = self.m.led(
                 "get-builder",
                 # By default, led reduces the priority of tasks from their
                 # values in buildbucket which we do not want.
                 "-adjust-priority",
                 "0",
                 "luci.%s.%s:%s" % (parent.project, parent.bucket, builder_name),
             )
             led_data = led_data.then("edit", *edit_args)
             if edit_cr_cl_arg:
                 led_data = led_data.then("edit-cr-cl", edit_cr_cl_arg)
             led_data = self.m.led.inject_input_recipes(led_data)
             launch_res = led_data.then("launch", "-modernize")
             task_id = launch_res.launch_result.task_id
             build_url = "https://ci.chromium.org/swarming/task/%s?server=%s" % (
                 task_id,
                 launch_res.launch_result.swarming_hostname,
             )
             builds[builder_name] = SubbuildResult(
                 builder=builder_name, build_id=task_id, url=build_url
             )
         return builds

     def _launch_with_buildbucket(
         self,
         builder_names,
         properties,
         set_swarming_parent_run_id=True,
         hide_in_gerrit=True,
     ):
         reqs = []
         swarming_parent_run_id = (
             self.m.swarming.task_id if set_swarming_parent_run_id else None
         )
         bb_tags = {"skip-retry-in-gerrit": "subbuild"}
         if hide_in_gerrit:
             bb_tags["hide-in-gerrit"] = "subbuild"
         for builder_name in builder_names:
             reqs.append(
                 self.m.buildbucket.schedule_request(
                     builder=builder_name,
                     properties=properties,
                     swarming_parent_run_id=swarming_parent_run_id,
                     priority=None,  # Leave unset to avoid overriding priority from configs.
                     tags=self.m.buildbucket.tags(**bb_tags),
                 )
             )

         def schedule():
             return self.m.buildbucket.schedule(reqs, step_name="schedule")

         # TODO(crbug.com/1254455): Stop retrying when the bb tool does its own
         # internal retries for all transient failures.
         #
         # We shouldn't retry if we're scheduling multiple builds, because some
         # of the builds might have successfully been scheduled on the first
         # attempt even if others failed, and we don't want to launch multiple
         # copies of each.
         max_attempts = 1 if len(reqs) > 1 else 3
         scheduled_builds = self.m.utils.retry(schedule, max_attempts=max_attempts)

         builds = {}
         for build in scheduled_builds:
             build_url = "https://ci.chromium.org/b/%s" % build.id
             builds[build.builder.builder] = SubbuildResult(
                 builder=build.builder.builder, build_id=build.id, url=build_url
             )
         return builds

     def collect(self, build_ids, launched_by_led=None, extra_fields=frozenset()):
         """Collects builds with the provided build_ids.

         Args:
           build_ids (list(str)): The list of build ids to collect results for.
           presentation (StepPresentation): The presentation to add logs to.
           launched_by_led (bool|None): Whether the builds to collect were
               launched by led. If None, then this value will be determined by
               whether the current task was launched by led.
           extra_fields (set): Extra fields to include in the buildbucket
             response.

         Returns:
           A map from build IDs to the corresponding SubbuildResult.
         """
         if launched_by_led is None:
             launched_by_led = self.m.led.launched_by_led
         if launched_by_led:
             builds = self._collect_from_led(build_ids)
         else:
             builds = self._collect_from_buildbucket(build_ids, extra_fields)
         return collections.OrderedDict(
             sorted(builds.items(), key=lambda item: (item[1].builder, item[0]))
         )

     def get_property(self, build_proto, property_name):
         """Retrieve an output property from a subbuild's Build proto.

         Ensures a clear and unified missing property error message across all
         builders that use this recipe module.
         """
         try:
             return build_proto.output.properties[property_name]
         except ValueError:
             raise self.m.step.InfraFailure(
                 "Subbuild did not set the %r output property" % property_name
             )

     def _collect_from_led(self, task_ids):
         swarming_results = self.m.swarming.collect(
             "collect", task_ids, output_dir=self.m.path["cleanup"]
         )
         builds = {}
         for result in swarming_results:
             task_id = result.id
             # Led launch ensures this file is present in the task root dir.
             build_proto_path = result.output_dir.join("build.proto.json")
             build_proto = self.m.file.read_proto(
                 "read build.proto.json", build_proto_path, build_pb2.Build, "JSONPB"
             )
             builds[task_id] = SubbuildResult(
                 builder=build_proto.builder.builder,
                 build_id=task_id,
                 build_proto=build_proto,
             )
         return builds

     def _collect_from_buildbucket(self, build_ids, extra_fields):
         bb_fields = self.m.buildbucket.DEFAULT_FIELDS.union(
             {"infra.swarming.task_id", "summary_markdown"}
         ).union(extra_fields)

         def collect():
             return self.m.buildbucket.collect_builds(
                 [int(build_id) for build_id in build_ids],
                 interval=20,  # Lower from default of 60 b/c we're impatient.
                 timeout=COLLECT_TIMEOUT,
                 step_name="collect",
                 fields=bb_fields,
             )

         # TODO(crbug.com/1254455): Stop retrying when the bb tool does its own
         # internal retries for all transient failures.
         builds = self.m.utils.retry(collect, max_attempts=3)

         failed_builds = [b for b in builds.values() if b.status != common_pb2.SUCCESS]
         if failed_builds:
             task_ids = [b.infra.swarming.task_id for b in failed_builds]
             # Make sure task IDs are non-empty.
             assert all(task_ids), task_ids

             # Wait for the underlying Swarming tasks to complete. The Swarming
             # task for a Buildbucket build can take significantly longer to
             # complete than the build itself due to post-processing outside the
             # scope of the build's recipe (e.g. cache pruning). If the parent
             # build and its Swarming task both complete before the subbuild's
             # Swarming task finishes post-processing, then the subbuild's
             # Swarming task will be killed by Swarming due to the parent being
             # complete.
             #
             # That is actually working as intended. However, it's confusing for
             # a subbuild to be marked as killed when the recipe actually exited
             # normally; "killed" usually only happens for CQ builds, when a
             # build is canceled by CQ because a new patchset of the triggering
             # CL is uploaded. So it's convenient to have dashboards and queries
             # ignore "killed" tasks. We use this workaround to ensure that
             # failed subbuilds with long post-processing steps have time to
             # complete and exit cleanly with a plain old "COMPLETED (FAILURE)"
             # status.
             #
             # We only do this if the subbuild failed as a latency optimization.
             # If all subbuilds passed, the parent will go on to do some more
             # steps using the results of the subbuilds, leaving time for the
             # subbuilds' tasks to complete asynchronously, so we don't want to
             # block here while the tasks complete.
             self.m.swarming.collect(
                 "wait for %s to complete" % pluralize("task", task_ids), task_ids
             )
         for build_id, build in builds.items():
             builds[build_id] = SubbuildResult(
                 builder=build.builder.builder, build_id=build_id, build_proto=build
             )
         return builds
	# Copyright 2020 The Fuchsia Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import attr

	import collections
	import datetime

	from recipe_engine import recipe_api
	from PB.go.chromium.org.luci.buildbucket.proto import build as build_pb2
	from PB.go.chromium.org.luci.buildbucket.proto import common as common_pb2

	from RECIPE_MODULES.fuchsia.utils import pluralize

	# As of 2021-10-19, the `api.buildbucket.collect()` timeout defaults to
	# something too short. We never want subbuild collection to timeout; we'd rather
	# the whole build time out.
	COLLECT_TIMEOUT = datetime.timedelta(hours=24)

	# We implicitly rely on a select few properties being passed through
	# from the parent build to the subbuild.
	# NOTE: Only dynamically computed properties should be passed through
	# automatically. Any static properties (i.e. properties that don't change
	# between builds) should be configured explicitly on the subbuild's builder
	# rather than implicitly passing them through.
	PASS_THROUGH_PROPERTIES = {
	# Used by toolchain recipes to pass custom toolchain info.
	"$fuchsia/build",
	# Used by toolchain recipes to pass commit info.
	"$fuchsia/checkout",
	# recipe_bootstrap may pass some properties to subbuilds via this field.
	"$fuchsia/recipe_bootstrap",
	# If a parent build is running under recipe testing, then the
	# subbuild is as well, and it should be made aware of that.
	"$fuchsia/recipe_testing",
	# Needed for subbuilds to determine how the parent was invoked in CQ
	# (e.g. whether it's a dry run or full run).
	"$recipe_engine/cq",
	# Set by recipe_bootstrap. A subbuild should inherit the same
	# integration base revision as its parent so the two builds use the same
	# version of recipes and properties.
	"integration_base_revision",
	# Set by `led edit-recipe-bundle -property-only` to point to the uploaded
	# recipe bundle. It's then up to recipe_bootstrap to download the recipe
	# bundle and execute it. A led subbuild should use the same recipe bundle
	# version as its parent.
	"led_cas_recipe_bundle",
	}


	@attr.s
	class SubbuildResult(object):
	"""Subbuild result metadata."""

	builder = attr.ib(type=str)
	build_id = attr.ib(type=str)
	url = attr.ib(type=str, default=None)
	build_proto = attr.ib(type=build_pb2.Build, default=None)


	class SubbuildApi(recipe_api.RecipeApi):
	"""API for launching subbuilds and collecting the results."""

	def launch(
	self,
	builder_names,
	presentation,
	extra_properties=None,
	set_swarming_parent_run_id=True,
	hide_in_gerrit=True,
	):
	"""Launches builds with buildbucket or led.

	If the current task was launched with led, then subbuilds will also be
	launched with led.

	Args:
	builder_names (list(str)): The names of the builders to launch.
	presentation (StepPresentation): The presentation to add logs to.
	extra_properties (dict): The extra set of properties to launch the
	builders with. These will override the parent properties that will be
	passed to the children by default.
	set_swarming_parent_run_id (bool): Whether to set swarming parent run
	ID on buildbucket builds.
	hide_in_gerrit (bool): Hide buildbucket subbuilds in the Gerrit UI.

	Returns:
	launched_builds (dict): The launched_builds is a map from builder name
	to the corresponding SubbuildResult.
	"""
	parent_properties = self.m.properties.thaw()
	properties = {
	key: val
	for key, val in parent_properties.items()
	if key and key in PASS_THROUGH_PROPERTIES
	}
	if extra_properties:
	properties.update(extra_properties)

	# If this task was launched by led, we launch the child with led as well.
	# This lets us ensure that the parent and child use the same version of
	# the recipes code. That is a requirement for testing, as well as for
	# avoiding the need to do soft transitions when updating the interface
	# between the parent and child recipes.
	if self.m.led.launched_by_led:
	builds = self._launch_with_led(builder_names, properties)
	else:
	builds = self._launch_with_buildbucket(
	builder_names,
	properties,
	set_swarming_parent_run_id=set_swarming_parent_run_id,
	hide_in_gerrit=hide_in_gerrit,
	)
	for builder, build in builds.items():
	presentation.links[builder] = build.url
	return builds

	def _launch_with_led(self, builder_names, properties):
	parent = self.m.buildbucket.build.builder
	edit_args = []
	for k, v in sorted(properties.items()):
	edit_args.extend(["-p", "%s=%s" % (k, self.m.json.dumps(v))])
	edit_cr_cl_arg = None
	bb_input = self.m.buildbucket.build_input
	if bb_input.gerrit_changes:
	gerrit_change = bb_input.gerrit_changes[0]
	edit_cr_cl_arg = "https://%s/c/%s/+/%d" % (
	gerrit_change.host,
	gerrit_change.project,
	gerrit_change.change,
	)

	builds = {}
	for builder_name in builder_names:
	led_data = self.m.led(
	"get-builder",
	# By default, led reduces the priority of tasks from their
	# values in buildbucket which we do not want.
	"-adjust-priority",
	"0",
	"luci.%s.%s:%s" % (parent.project, parent.bucket, builder_name),
	)
	led_data = led_data.then("edit", *edit_args)
	if edit_cr_cl_arg:
	led_data = led_data.then("edit-cr-cl", edit_cr_cl_arg)
	led_data = self.m.led.inject_input_recipes(led_data)
	launch_res = led_data.then("launch", "-modernize")
	task_id = launch_res.launch_result.task_id
	build_url = "https://ci.chromium.org/swarming/task/%s?server=%s" % (
	task_id,
	launch_res.launch_result.swarming_hostname,
	)
	builds[builder_name] = SubbuildResult(
	builder=builder_name, build_id=task_id, url=build_url
	)
	return builds

	def _launch_with_buildbucket(
	self,
	builder_names,
	properties,
	set_swarming_parent_run_id=True,
	hide_in_gerrit=True,
	):
	reqs = []
	swarming_parent_run_id = (
	self.m.swarming.task_id if set_swarming_parent_run_id else None
	)
	bb_tags = {"skip-retry-in-gerrit": "subbuild"}
	if hide_in_gerrit:
	bb_tags["hide-in-gerrit"] = "subbuild"
	for builder_name in builder_names:
	reqs.append(
	self.m.buildbucket.schedule_request(
	builder=builder_name,
	properties=properties,
	swarming_parent_run_id=swarming_parent_run_id,
	priority=None, # Leave unset to avoid overriding priority from configs.
	tags=self.m.buildbucket.tags(**bb_tags),
	)
	)

	def schedule():
	return self.m.buildbucket.schedule(reqs, step_name="schedule")

	# TODO(crbug.com/1254455): Stop retrying when the bb tool does its own
	# internal retries for all transient failures.
	#
	# We shouldn't retry if we're scheduling multiple builds, because some
	# of the builds might have successfully been scheduled on the first
	# attempt even if others failed, and we don't want to launch multiple
	# copies of each.
	max_attempts = 1 if len(reqs) > 1 else 3
	scheduled_builds = self.m.utils.retry(schedule, max_attempts=max_attempts)

	builds = {}
	for build in scheduled_builds:
	build_url = "https://ci.chromium.org/b/%s" % build.id
	builds[build.builder.builder] = SubbuildResult(
	builder=build.builder.builder, build_id=build.id, url=build_url
	)
	return builds

	def collect(self, build_ids, launched_by_led=None, extra_fields=frozenset()):
	"""Collects builds with the provided build_ids.

	Args:
	build_ids (list(str)): The list of build ids to collect results for.
	presentation (StepPresentation): The presentation to add logs to.
	launched_by_led (bool\|None): Whether the builds to collect were
	launched by led. If None, then this value will be determined by
	whether the current task was launched by led.
	extra_fields (set): Extra fields to include in the buildbucket
	response.

	Returns:
	A map from build IDs to the corresponding SubbuildResult.
	"""
	if launched_by_led is None:
	launched_by_led = self.m.led.launched_by_led
	if launched_by_led:
	builds = self._collect_from_led(build_ids)
	else:
	builds = self._collect_from_buildbucket(build_ids, extra_fields)
	return collections.OrderedDict(
	sorted(builds.items(), key=lambda item: (item[1].builder, item[0]))
	)

	def get_property(self, build_proto, property_name):
	"""Retrieve an output property from a subbuild's Build proto.

	Ensures a clear and unified missing property error message across all
	builders that use this recipe module.
	"""
	try:
	return build_proto.output.properties[property_name]
	except ValueError:
	raise self.m.step.InfraFailure(
	"Subbuild did not set the %r output property" % property_name
	)

	def _collect_from_led(self, task_ids):
	swarming_results = self.m.swarming.collect(
	"collect", task_ids, output_dir=self.m.path["cleanup"]
	)
	builds = {}
	for result in swarming_results:
	task_id = result.id
	# Led launch ensures this file is present in the task root dir.
	build_proto_path = result.output_dir.join("build.proto.json")
	build_proto = self.m.file.read_proto(
	"read build.proto.json", build_proto_path, build_pb2.Build, "JSONPB"
	)
	builds[task_id] = SubbuildResult(
	builder=build_proto.builder.builder,
	build_id=task_id,
	build_proto=build_proto,
	)
	return builds

	def _collect_from_buildbucket(self, build_ids, extra_fields):
	bb_fields = self.m.buildbucket.DEFAULT_FIELDS.union(
	{"infra.swarming.task_id", "summary_markdown"}
	).union(extra_fields)

	def collect():
	return self.m.buildbucket.collect_builds(
	[int(build_id) for build_id in build_ids],
	interval=20, # Lower from default of 60 b/c we're impatient.
	timeout=COLLECT_TIMEOUT,
	step_name="collect",
	fields=bb_fields,
	)

	# TODO(crbug.com/1254455): Stop retrying when the bb tool does its own
	# internal retries for all transient failures.
	builds = self.m.utils.retry(collect, max_attempts=3)

	failed_builds = [b for b in builds.values() if b.status != common_pb2.SUCCESS]
	if failed_builds:
	task_ids = [b.infra.swarming.task_id for b in failed_builds]
	# Make sure task IDs are non-empty.
	assert all(task_ids), task_ids

	# Wait for the underlying Swarming tasks to complete. The Swarming
	# task for a Buildbucket build can take significantly longer to
	# complete than the build itself due to post-processing outside the
	# scope of the build's recipe (e.g. cache pruning). If the parent
	# build and its Swarming task both complete before the subbuild's
	# Swarming task finishes post-processing, then the subbuild's
	# Swarming task will be killed by Swarming due to the parent being
	# complete.
	#
	# That is actually working as intended. However, it's confusing for
	# a subbuild to be marked as killed when the recipe actually exited
	# normally; "killed" usually only happens for CQ builds, when a
	# build is canceled by CQ because a new patchset of the triggering
	# CL is uploaded. So it's convenient to have dashboards and queries
	# ignore "killed" tasks. We use this workaround to ensure that
	# failed subbuilds with long post-processing steps have time to
	# complete and exit cleanly with a plain old "COMPLETED (FAILURE)"
	# status.
	#
	# We only do this if the subbuild failed as a latency optimization.
	# If all subbuilds passed, the parent will go on to do some more
	# steps using the results of the subbuilds, leaving time for the
	# subbuilds' tasks to complete asynchronously, so we don't want to
	# block here while the tasks complete.
	self.m.swarming.collect(
	"wait for %s to complete" % pluralize("task", task_ids), task_ids
	)
	for build_id, build in builds.items():
	builds[build_id] = SubbuildResult(
	builder=build.builder.builder, build_id=build_id, build_proto=build
	)
	return builds