recipe_modules/validate_lucicfg/api.py - infra/recipes - Git at Google

 # Copyright 2020 The Fuchsia Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 from recipe_engine import recipe_api

 from PB.go.chromium.org.luci.buildbucket.proto import (
     builder_common as builder_common_pb2,
 )
 from RECIPE_MODULES.fuchsia.validate_lucicfg import (
     NEW_BUILDERS_IN_CQ_MSG,
     CQ_BUILDERS_DELETED_MSG,
 )

 # lucicfg version used when the `lucicfg_ensure_file` option is not set.
 # TODO(olivernewman): Convert each repository that uses this recipe to declare a
 # compatible lucicfg version in a cipd.ensure checked into the repo that this
 # recipe can use. Then remove support for installing a default version.
 DEFAULT_LUCICFG_VERSION = "git_revision:23d142175f4dc17791ff85df9b8af144377d73a1"
 LUCICFG_CIPD_PACKAGE = "infra/tools/luci/lucicfg/${platform}"


 class ValidateLucicfgApi(recipe_api.RecipeApi):
     """Module for polling, parsing, and validating luci config files."""

     NEW_BUILDERS_IN_CQ_MSG = NEW_BUILDERS_IN_CQ_MSG
     CQ_BUILDERS_DELETED_MSG = CQ_BUILDERS_DELETED_MSG

     def __call__(self, checkout_root, opts):
         """Test a LUCI config.

         Args:
             checkout_root: Path to checkout of config repo, with any triggering CLs
                 applied
             opts: validate_lucicfg module options (see options.proto)
         """
         assert opts.starlark_paths, "starlark_paths must not be empty"

         if opts.lucicfg_ensure_file:
             # TODO(olivernewman): The cleanup directory is supposed to already
             # exist, so we shouldn't need to re-ensure it in order to call
             # `mkdtemp()`. Figure out why the cleanup directory doesn't already
             # exist, and remove this step if possible.
             self.m.file.ensure_directory("ensure cleanup dir", self.m.path["cleanup"])
             lucicfg_dir = self.m.path.mkdtemp("lucicfg")
             self.m.cipd.ensure(
                 lucicfg_dir,
                 checkout_root.join(opts.lucicfg_ensure_file),
                 "install lucicfg",
             )
             # Assumes that the ensure file installs lucicfg in the root of the CIPD
             # directory.
             lucicfg_path = lucicfg_dir.join("lucicfg")
         else:
             lucicfg_path = self.m.cipd.ensure_tool(
                 LUCICFG_CIPD_PACKAGE, DEFAULT_LUCICFG_VERSION
             )

         with self.m.step.nest("validate"), self.m.step.defer_results():
             for relative_path in opts.starlark_paths:
                 abs_path = checkout_root.join(relative_path)
                 self.m.step(
                     str(relative_path),
                     [
                         lucicfg_path,
                         "validate",
                         "-strict",
                         "-fail-on-warnings",
                         abs_path,
                     ],
                 )

         # The LUCI Config service tracks the "main" branch of all config repos, so
         # we should skip the race condition check if we're running on a branch other
         # than "main" because:
         #   1. Comparing the local (non-main) configs to the deployed (main) configs
         #      will likely return spurious failures, e.g. if the local
         #      commit-queue.cfg file references builders that have been deleted at
         #      HEAD.
         #   2. It doesn't matter if there are any race conditions in config changes
         #      on non-main branches, because the changes won't be deployed to LUCI
         #      Config anyway; config changes on non-main branches are only necessary
         #      to modify generated files that are consumed by other parts of the
         #      infrastructure.
         if opts.generated_dir and self.m.buildbucket_util.triggering_branch == "main":
             with self.m.step.nest("check for config deployment race conditions"):
                 self._check_for_deployment_race_conditions(
                     checkout_root.join(opts.generated_dir)
                 )

     def _check_for_deployment_race_conditions(self, generated_dir):
         """
         There are some types of LUCI config changes that introduce race
         conditions in the process of deploying the updated config files to their
         corresponding services.

         For example, it's not safe to simultaneously create a new builder and add
         it to CQ, because CQ might ingest the updated configs before Buildbucket
         and try to trigger the builder before Buildbucket even knows it exists.
         This causes CQ to error out and add confusing comments to CLs.

         To avoid this specific race condition, we compare the modified version of
         commit-queue.cfg with the live version of cr-buildbucket.cfg to make sure
         that even if CQ ingests the updated configs before Buildbucket does, CQ
         won't try to trigger any builders that don't yet exist.

         We also check that the live version of commit-queue.cfg doesn't reference
         any builders that will soon be deleted, which would cause the same issue
         if Buildbucket ingests the config changes before CQ does.

         There are other race conditions that we don't bother checking here; for
         example, it's technically not safe to immediately start running a new
         builder in CI by adding it to luci-scheduler.cfg. However, LUCI scheduler
         trigger failures don't produce user-visible error messages like CQ
         trigger failures, so it's not as big a deal. Also, adding new builders to
         CI is a very common workflow (adding builders to CQ is somewhat more
         rare) and it would be annoying if that always required two separate
         changes.
         """
         self.m.path.mock_add_directory(generated_dir.join("fuchsia"))
         self.m.path.mock_add_directory(generated_dir.join("fuchsia_internal"))

         # Our luci configs are set up such that each LUCI project's config files
         # are in a subdirectory of the "generated" directory whose name is the same
         # as the project name.
         project_dirs = [
             path
             for path in self.m.file.listdir(
                 "find projects",
                 generated_dir,
                 test_data=["fuchsia", "fuchsia_internal", "foo.txt"],
             )
             if self.m.path.isdir(path)
         ]

         all_live_builders = set()
         all_local_builders = set()

         for project_dir in project_dirs:
             project = self.m.path.basename(project_dir)

             live_bb_config = self.m.luci_config.buildbucket(project=project)
             all_live_builders.update(self._all_builder_names(project, live_bb_config))

             local_bb_config = self.m.luci_config.buildbucket(
                 project=project, local_dir=project_dir.join("luci")
             )
             all_local_builders.update(self._all_builder_names(project, local_bb_config))

         for project_dir in project_dirs:
             project = self.m.path.basename(project_dir)

             cq_config_path = project_dir.join("luci", "commit-queue.cfg")
             self.m.path.mock_add_paths(cq_config_path)
             if not self.m.path.exists(cq_config_path):  # pragma: no cover
                 # Skip projects that don't have CQ set up.
                 continue

             local_cq_config = self.m.luci_config.commit_queue(
                 project=project, local_dir=project_dir.join("luci")
             )

             # Make sure that the new CQ config won't try to trigger non-existent builders.
             nonexistent_builders = self._nonexistent_cq_builders(
                 local_cq_config, all_live_builders
             )
             if nonexistent_builders:
                 msg = NEW_BUILDERS_IN_CQ_MSG + "".join(
                     [f"\n- {b}" for b in nonexistent_builders]
                 )
                 raise self.m.step.StepFailure(msg)

             live_cq_config = self.m.luci_config.commit_queue(project=project)
             # Make sure that the existing CQ config won't try to trigger builders
             # that we're deleting.
             nonexistent_builders = self._nonexistent_cq_builders(
                 live_cq_config, all_local_builders
             )
             if nonexistent_builders:
                 msg = CQ_BUILDERS_DELETED_MSG + "".join(
                     [f"\n- {b}" for b in nonexistent_builders]
                 )
                 raise self.m.step.StepFailure(msg)

     def _all_builder_names(self, project_name, bb_config):
         for bucket in bb_config.buckets:
             for builder in bucket.swarming.builders:
                 yield self.m.buildbucket_util.full_builder_name(
                     builder_common_pb2.BuilderID(
                         project=project_name,
                         bucket=bucket.name,
                         builder=builder.name,
                     )
                 )

     def _nonexistent_cq_builders(self, cq_config, all_known_builders):
         """Checks that all builders in cq_config are known to Buildbucket.

         Returns the names of any CQ builders that aren't in the list of builders
         known to Buildbucket.
         """
         nonexistent_builders = set()
         for group in cq_config.config_groups:
             for builder in group.verifiers.tryjob.builders:
                 # Includable-only builders are only run if a special footer is in a
                 # change's commit message. Usage of the footer is rare enough that
                 # we're willing to allow errors in that case, in exchange for making
                 # it easier to add and remove builders from CQ.
                 if builder.includable_only:
                     continue
                 if builder.name not in all_known_builders:
                     nonexistent_builders.add(builder.name)
         return sorted(nonexistent_builders)
	# Copyright 2020 The Fuchsia Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	from recipe_engine import recipe_api

	from PB.go.chromium.org.luci.buildbucket.proto import (
	builder_common as builder_common_pb2,
	)
	from RECIPE_MODULES.fuchsia.validate_lucicfg import (
	NEW_BUILDERS_IN_CQ_MSG,
	CQ_BUILDERS_DELETED_MSG,
	)

	# lucicfg version used when the `lucicfg_ensure_file` option is not set.
	# TODO(olivernewman): Convert each repository that uses this recipe to declare a
	# compatible lucicfg version in a cipd.ensure checked into the repo that this
	# recipe can use. Then remove support for installing a default version.
	DEFAULT_LUCICFG_VERSION = "git_revision:23d142175f4dc17791ff85df9b8af144377d73a1"
	LUCICFG_CIPD_PACKAGE = "infra/tools/luci/lucicfg/${platform}"


	class ValidateLucicfgApi(recipe_api.RecipeApi):
	"""Module for polling, parsing, and validating luci config files."""

	NEW_BUILDERS_IN_CQ_MSG = NEW_BUILDERS_IN_CQ_MSG
	CQ_BUILDERS_DELETED_MSG = CQ_BUILDERS_DELETED_MSG

	def __call__(self, checkout_root, opts):
	"""Test a LUCI config.

	Args:
	checkout_root: Path to checkout of config repo, with any triggering CLs
	applied
	opts: validate_lucicfg module options (see options.proto)
	"""
	assert opts.starlark_paths, "starlark_paths must not be empty"

	if opts.lucicfg_ensure_file:
	# TODO(olivernewman): The cleanup directory is supposed to already
	# exist, so we shouldn't need to re-ensure it in order to call
	# `mkdtemp()`. Figure out why the cleanup directory doesn't already
	# exist, and remove this step if possible.
	self.m.file.ensure_directory("ensure cleanup dir", self.m.path["cleanup"])
	lucicfg_dir = self.m.path.mkdtemp("lucicfg")
	self.m.cipd.ensure(
	lucicfg_dir,
	checkout_root.join(opts.lucicfg_ensure_file),
	"install lucicfg",
	)
	# Assumes that the ensure file installs lucicfg in the root of the CIPD
	# directory.
	lucicfg_path = lucicfg_dir.join("lucicfg")
	else:
	lucicfg_path = self.m.cipd.ensure_tool(
	LUCICFG_CIPD_PACKAGE, DEFAULT_LUCICFG_VERSION
	)

	with self.m.step.nest("validate"), self.m.step.defer_results():
	for relative_path in opts.starlark_paths:
	abs_path = checkout_root.join(relative_path)
	self.m.step(
	str(relative_path),
	[
	lucicfg_path,
	"validate",
	"-strict",
	"-fail-on-warnings",
	abs_path,
	],
	)

	# The LUCI Config service tracks the "main" branch of all config repos, so
	# we should skip the race condition check if we're running on a branch other
	# than "main" because:
	# 1. Comparing the local (non-main) configs to the deployed (main) configs
	# will likely return spurious failures, e.g. if the local
	# commit-queue.cfg file references builders that have been deleted at
	# HEAD.
	# 2. It doesn't matter if there are any race conditions in config changes
	# on non-main branches, because the changes won't be deployed to LUCI
	# Config anyway; config changes on non-main branches are only necessary
	# to modify generated files that are consumed by other parts of the
	# infrastructure.
	if opts.generated_dir and self.m.buildbucket_util.triggering_branch == "main":
	with self.m.step.nest("check for config deployment race conditions"):
	self._check_for_deployment_race_conditions(
	checkout_root.join(opts.generated_dir)
	)

	def _check_for_deployment_race_conditions(self, generated_dir):
	"""
	There are some types of LUCI config changes that introduce race
	conditions in the process of deploying the updated config files to their
	corresponding services.

	For example, it's not safe to simultaneously create a new builder and add
	it to CQ, because CQ might ingest the updated configs before Buildbucket
	and try to trigger the builder before Buildbucket even knows it exists.
	This causes CQ to error out and add confusing comments to CLs.

	To avoid this specific race condition, we compare the modified version of
	commit-queue.cfg with the live version of cr-buildbucket.cfg to make sure
	that even if CQ ingests the updated configs before Buildbucket does, CQ
	won't try to trigger any builders that don't yet exist.

	We also check that the live version of commit-queue.cfg doesn't reference
	any builders that will soon be deleted, which would cause the same issue
	if Buildbucket ingests the config changes before CQ does.

	There are other race conditions that we don't bother checking here; for
	example, it's technically not safe to immediately start running a new
	builder in CI by adding it to luci-scheduler.cfg. However, LUCI scheduler
	trigger failures don't produce user-visible error messages like CQ
	trigger failures, so it's not as big a deal. Also, adding new builders to
	CI is a very common workflow (adding builders to CQ is somewhat more
	rare) and it would be annoying if that always required two separate
	changes.
	"""
	self.m.path.mock_add_directory(generated_dir.join("fuchsia"))
	self.m.path.mock_add_directory(generated_dir.join("fuchsia_internal"))

	# Our luci configs are set up such that each LUCI project's config files
	# are in a subdirectory of the "generated" directory whose name is the same
	# as the project name.
	project_dirs = [
	path
	for path in self.m.file.listdir(
	"find projects",
	generated_dir,
	test_data=["fuchsia", "fuchsia_internal", "foo.txt"],
	)
	if self.m.path.isdir(path)
	]

	all_live_builders = set()
	all_local_builders = set()

	for project_dir in project_dirs:
	project = self.m.path.basename(project_dir)

	live_bb_config = self.m.luci_config.buildbucket(project=project)
	all_live_builders.update(self._all_builder_names(project, live_bb_config))

	local_bb_config = self.m.luci_config.buildbucket(
	project=project, local_dir=project_dir.join("luci")
	)
	all_local_builders.update(self._all_builder_names(project, local_bb_config))

	for project_dir in project_dirs:
	project = self.m.path.basename(project_dir)

	cq_config_path = project_dir.join("luci", "commit-queue.cfg")
	self.m.path.mock_add_paths(cq_config_path)
	if not self.m.path.exists(cq_config_path): # pragma: no cover
	# Skip projects that don't have CQ set up.
	continue

	local_cq_config = self.m.luci_config.commit_queue(
	project=project, local_dir=project_dir.join("luci")
	)

	# Make sure that the new CQ config won't try to trigger non-existent builders.
	nonexistent_builders = self._nonexistent_cq_builders(
	local_cq_config, all_live_builders
	)
	if nonexistent_builders:
	msg = NEW_BUILDERS_IN_CQ_MSG + "".join(
	[f"\n- {b}" for b in nonexistent_builders]
	)
	raise self.m.step.StepFailure(msg)

	live_cq_config = self.m.luci_config.commit_queue(project=project)
	# Make sure that the existing CQ config won't try to trigger builders
	# that we're deleting.
	nonexistent_builders = self._nonexistent_cq_builders(
	live_cq_config, all_local_builders
	)
	if nonexistent_builders:
	msg = CQ_BUILDERS_DELETED_MSG + "".join(
	[f"\n- {b}" for b in nonexistent_builders]
	)
	raise self.m.step.StepFailure(msg)

	def _all_builder_names(self, project_name, bb_config):
	for bucket in bb_config.buckets:
	for builder in bucket.swarming.builders:
	yield self.m.buildbucket_util.full_builder_name(
	builder_common_pb2.BuilderID(
	project=project_name,
	bucket=bucket.name,
	builder=builder.name,
	)
	)

	def _nonexistent_cq_builders(self, cq_config, all_known_builders):
	"""Checks that all builders in cq_config are known to Buildbucket.

	Returns the names of any CQ builders that aren't in the list of builders
	known to Buildbucket.
	"""
	nonexistent_builders = set()
	for group in cq_config.config_groups:
	for builder in group.verifiers.tryjob.builders:
	# Includable-only builders are only run if a special footer is in a
	# change's commit message. Usage of the footer is rare enough that
	# we're willing to allow errors in that case, in exchange for making
	# it easier to add and remove builders from CQ.
	if builder.includable_only:
	continue
	if builder.name not in all_known_builders:
	nonexistent_builders.add(builder.name)
	return sorted(nonexistent_builders)