blob: 9b2ef87b9b9e04dba0a6cd32650fc2cbfcc4a24b [file] [log] [blame]
# Copyright 2020 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from recipe_engine import recipe_api
from PB.go.chromium.org.luci.buildbucket.proto import (
builder_common as builder_common_pb2,
)
from RECIPE_MODULES.fuchsia.validate_lucicfg import (
NEW_BUILDERS_IN_CQ_MSG,
CQ_BUILDERS_DELETED_MSG,
)
# lucicfg version used when the `lucicfg_ensure_file` option is not set.
# TODO(olivernewman): Convert each repository that uses this recipe to declare a
# compatible lucicfg version in a cipd.ensure checked into the repo that this
# recipe can use. Then remove support for installing a default version.
DEFAULT_LUCICFG_VERSION = "git_revision:23d142175f4dc17791ff85df9b8af144377d73a1"
LUCICFG_CIPD_PACKAGE = "infra/tools/luci/lucicfg/${platform}"
class ValidateLucicfgApi(recipe_api.RecipeApi):
"""Module for polling, parsing, and validating luci config files."""
NEW_BUILDERS_IN_CQ_MSG = NEW_BUILDERS_IN_CQ_MSG
CQ_BUILDERS_DELETED_MSG = CQ_BUILDERS_DELETED_MSG
def __call__(self, checkout_root, opts):
"""Test a LUCI config.
Args:
checkout_root: Path to checkout of config repo, with any triggering CLs
applied
opts: validate_lucicfg module options (see options.proto)
"""
assert opts.starlark_paths, "starlark_paths must not be empty"
if opts.lucicfg_ensure_file:
# TODO(olivernewman): The cleanup directory is supposed to already
# exist, so we shouldn't need to re-ensure it in order to call
# `mkdtemp()`. Figure out why the cleanup directory doesn't already
# exist, and remove this step if possible.
self.m.file.ensure_directory("ensure cleanup dir", self.m.path["cleanup"])
lucicfg_dir = self.m.path.mkdtemp("lucicfg")
self.m.cipd.ensure(
lucicfg_dir,
checkout_root.join(opts.lucicfg_ensure_file),
"install lucicfg",
)
# Assumes that the ensure file installs lucicfg in the root of the CIPD
# directory.
lucicfg_path = lucicfg_dir.join("lucicfg")
else:
lucicfg_path = self.m.cipd.ensure_tool(
LUCICFG_CIPD_PACKAGE, DEFAULT_LUCICFG_VERSION
)
with self.m.step.nest("validate"), self.m.step.defer_results():
for relative_path in opts.starlark_paths:
abs_path = checkout_root.join(relative_path)
self.m.step(
str(relative_path),
[
lucicfg_path,
"validate",
"-strict",
"-fail-on-warnings",
abs_path,
],
)
# The LUCI Config service tracks the "main" branch of all config repos, so
# we should skip the race condition check if we're running on a branch other
# than "main" because:
# 1. Comparing the local (non-main) configs to the deployed (main) configs
# will likely return spurious failures, e.g. if the local
# commit-queue.cfg file references builders that have been deleted at
# HEAD.
# 2. It doesn't matter if there are any race conditions in config changes
# on non-main branches, because the changes won't be deployed to LUCI
# Config anyway; config changes on non-main branches are only necessary
# to modify generated files that are consumed by other parts of the
# infrastructure.
if opts.generated_dir and self.m.buildbucket_util.triggering_branch == "main":
with self.m.step.nest("check for config deployment race conditions"):
self._check_for_deployment_race_conditions(
checkout_root.join(opts.generated_dir)
)
def _check_for_deployment_race_conditions(self, generated_dir):
"""
There are some types of LUCI config changes that introduce race
conditions in the process of deploying the updated config files to their
corresponding services.
For example, it's not safe to simultaneously create a new builder and add
it to CQ, because CQ might ingest the updated configs before Buildbucket
and try to trigger the builder before Buildbucket even knows it exists.
This causes CQ to error out and add confusing comments to CLs.
To avoid this specific race condition, we compare the modified version of
commit-queue.cfg with the live version of cr-buildbucket.cfg to make sure
that even if CQ ingests the updated configs before Buildbucket does, CQ
won't try to trigger any builders that don't yet exist.
We also check that the live version of commit-queue.cfg doesn't reference
any builders that will soon be deleted, which would cause the same issue
if Buildbucket ingests the config changes before CQ does.
There are other race conditions that we don't bother checking here; for
example, it's technically not safe to immediately start running a new
builder in CI by adding it to luci-scheduler.cfg. However, LUCI scheduler
trigger failures don't produce user-visible error messages like CQ
trigger failures, so it's not as big a deal. Also, adding new builders to
CI is a very common workflow (adding builders to CQ is somewhat more
rare) and it would be annoying if that always required two separate
changes.
"""
self.m.path.mock_add_directory(generated_dir.join("fuchsia"))
self.m.path.mock_add_directory(generated_dir.join("fuchsia_internal"))
# Our luci configs are set up such that each LUCI project's config files
# are in a subdirectory of the "generated" directory whose name is the same
# as the project name.
project_dirs = [
path
for path in self.m.file.listdir(
"find projects",
generated_dir,
test_data=["fuchsia", "fuchsia_internal", "foo.txt"],
)
if self.m.path.isdir(path)
]
all_live_builders = set()
all_local_builders = set()
for project_dir in project_dirs:
project = self.m.path.basename(project_dir)
live_bb_config = self.m.luci_config.buildbucket(project=project)
all_live_builders.update(self._all_builder_names(project, live_bb_config))
local_bb_config = self.m.luci_config.buildbucket(
project=project, local_dir=project_dir.join("luci")
)
all_local_builders.update(self._all_builder_names(project, local_bb_config))
for project_dir in project_dirs:
project = self.m.path.basename(project_dir)
cq_config_path = project_dir.join("luci", "commit-queue.cfg")
self.m.path.mock_add_paths(cq_config_path)
if not self.m.path.exists(cq_config_path): # pragma: no cover
# Skip projects that don't have CQ set up.
continue
local_cq_config = self.m.luci_config.commit_queue(
project=project, local_dir=project_dir.join("luci")
)
# Make sure that the new CQ config won't try to trigger non-existent builders.
nonexistent_builders = self._nonexistent_cq_builders(
local_cq_config, all_live_builders
)
if nonexistent_builders:
msg = NEW_BUILDERS_IN_CQ_MSG + "".join(
[f"\n- {b}" for b in nonexistent_builders]
)
raise self.m.step.StepFailure(msg)
live_cq_config = self.m.luci_config.commit_queue(project=project)
# Make sure that the existing CQ config won't try to trigger builders
# that we're deleting.
nonexistent_builders = self._nonexistent_cq_builders(
live_cq_config, all_local_builders
)
if nonexistent_builders:
msg = CQ_BUILDERS_DELETED_MSG + "".join(
[f"\n- {b}" for b in nonexistent_builders]
)
raise self.m.step.StepFailure(msg)
def _all_builder_names(self, project_name, bb_config):
for bucket in bb_config.buckets:
for builder in bucket.swarming.builders:
yield self.m.buildbucket_util.full_builder_name(
builder_common_pb2.BuilderID(
project=project_name,
bucket=bucket.name,
builder=builder.name,
)
)
def _nonexistent_cq_builders(self, cq_config, all_known_builders):
"""Checks that all builders in cq_config are known to Buildbucket.
Returns the names of any CQ builders that aren't in the list of builders
known to Buildbucket.
"""
nonexistent_builders = set()
for group in cq_config.config_groups:
for builder in group.verifiers.tryjob.builders:
# Includable-only builders are only run if a special footer is in a
# change's commit message. Usage of the footer is rare enough that
# we're willing to allow errors in that case, in exchange for making
# it easier to add and remove builders from CQ.
if builder.includable_only:
continue
if builder.name not in all_known_builders:
nonexistent_builders.add(builder.name)
return sorted(nonexistent_builders)