blob: 9b9358ca744eb132ad9de5599ddea46165943d63 [file] [log] [blame]
# Copyright 2018 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Recipe for testing LUCI configs."""
import re
from recipe_engine import post_process
from PB.go.chromium.org.luci.buildbucket.proto import builder as builder_pb2
from PB.go.chromium.org.luci.buildbucket.proto import project_config as bb_pb2
from PB.go.chromium.org.luci.cq.api.config.v2 import cq as cq_pb2
from PB.recipes.fuchsia.luci_config import InputProperties
PYTHON_VERSION_COMPATIBILITY = "PY3"
DEPS = [
"fuchsia/buildbucket_util",
"fuchsia/git",
"fuchsia/luci_config",
"fuchsia/sso",
"recipe_engine/cipd",
"recipe_engine/file",
"recipe_engine/path",
"recipe_engine/properties",
"recipe_engine/step",
]
PROPERTIES = InputProperties
NEW_BUILDERS_IN_CQ_MSG = """\
The following builder(s) are referenced in commit-queue.cfg but aren't yet
known to Buildbucket. Create the builder(s) in a separate change before adding
them to commit-queue.cfg.
"""
CQ_BUILDERS_DELETED_MSG = """\
The following builder(s) are running in CQ but missing from
cr-buildbucket.cfg. Remove the builder(s) from CQ in a separate change before
deleting them.
"""
# lucicfg version used when the `lucicfg_ensure_file` property is not set.
# TODO(olivernewman): Convert each repository that uses this recipe to declare a
# compatible lucicfg version in a cipd.ensure checked into the repo that this
# recipe can use. Then remove support for installing a default version.
DEFAULT_LUCICFG_VERSION = "git_revision:23d142175f4dc17791ff85df9b8af144377d73a1"
LUCICFG_CIPD_PACKAGE = "infra/tools/luci/lucicfg/${platform}"
def RunSteps(api, props):
assert props.starlark_paths, "starlark_paths must not be empty"
checkout_root, _ = api.git.checkout_from_build_input(
api.sso.sso_to_https(props.remote)
)
if props.lucicfg_ensure_file:
# TODO(olivernewman): The cleanup directory is supposed to already
# exist, so we shouldn't need to re-ensure it in order to call
# `mkdtemp()`. Figure out why the cleanup directory doesn't already
# exist, and remove this step if possible.
api.file.ensure_directory("ensure cleanup dir", api.path["cleanup"])
lucicfg_dir = api.path.mkdtemp("lucicfg")
api.cipd.ensure(
lucicfg_dir,
checkout_root.join(props.lucicfg_ensure_file),
"install lucicfg",
)
# Assumes that the ensure file installs lucicfg in the root of the CIPD
# directory.
lucicfg_path = lucicfg_dir.join("lucicfg")
else:
lucicfg_path = api.cipd.ensure_tool(
LUCICFG_CIPD_PACKAGE, DEFAULT_LUCICFG_VERSION
)
with api.step.nest("validate"), api.step.defer_results():
for relative_path in props.starlark_paths:
abs_path = checkout_root.join(relative_path)
api.step(
str(relative_path),
[lucicfg_path, "validate", "-strict", "-fail-on-warnings", abs_path],
)
# The LUCI Config service tracks the "main" branch of all config repos, so
# we should skip the race condition check if we're running on a branch other
# than "main" because:
# 1. Comparing the local (non-main) configs to the deployed (main) configs
# will likely return spurious failures, e.g. if the local
# commit-queue.cfg file references builders that have been deleted at
# HEAD.
# 2. It doesn't matter if there are any race conditions in config changes
# on non-main branches, because the changes won't be deployed to LUCI
# Config anyway; config changes on non-main branches are only necessary
# to modify generated files that are consumed by other parts of the
# infrastructure.
if props.generated_dir and api.buildbucket_util.triggering_branch == "main":
with api.step.nest("check for config deployment race conditions"):
check_for_deployment_race_conditions(
api, checkout_root.join(props.generated_dir)
)
def check_for_deployment_race_conditions(api, generated_dir):
"""
There are some types of LUCI config changes that introduce race
conditions in the process of deploying the updated config files to their
corresponding services.
For example, it's not safe to simultaneously create a new builder and add
it to CQ, because CQ might ingest the updated configs before Buildbucket
and try to trigger the builder before Buildbucket even knows it exists.
This causes CQ to error out and add confusing comments to CLs.
To avoid this specific race condition, we compare the modified version of
commit-queue.cfg with the live version of cr-buildbucket.cfg to make sure
that even if CQ ingests the updated configs before Buildbucket does, CQ
won't try to trigger any builders that don't yet exist.
We also check that the live version of commit-queue.cfg doesn't reference
any builders that will soon be deleted, which would cause the same issue
if Buildbucket ingests the config changes before CQ does.
There are other race conditions that we don't bother checking here; for
example, it's technically not safe to immediately start running a new
builder in CI by adding it to luci-scheduler.cfg. However, LUCI scheduler
trigger failures don't produce user-visible error messages like CQ
trigger failures, so it's not as big a deal. Also, adding new builders to
CI is a very common workflow (adding builders to CQ is somewhat more
rare) and it would be annoying if that always required two separate
changes.
"""
api.path.mock_add_directory(generated_dir.join("fuchsia"))
api.path.mock_add_directory(generated_dir.join("fuchsia_internal"))
# Our luci configs are set up such that each LUCI project's config files
# are in a subdirectory of the "generated" directory whose name is the same
# as the project name.
project_dirs = [
path
for path in api.file.listdir(
"find projects",
generated_dir,
test_data=["fuchsia", "fuchsia_internal", "foo.txt"],
)
if api.path.isdir(path)
]
all_live_builders = set()
all_local_builders = set()
for project_dir in project_dirs:
project = api.path.basename(project_dir)
live_bb_config = api.luci_config.buildbucket(project=project)
all_live_builders.update(all_builder_names(api, project, live_bb_config))
local_bb_config = api.luci_config.buildbucket(
project=project, local_dir=project_dir.join("luci")
)
all_local_builders.update(all_builder_names(api, project, local_bb_config))
for project_dir in project_dirs:
project = api.path.basename(project_dir)
cq_config_path = project_dir.join("luci", "commit-queue.cfg")
api.path.mock_add_paths(cq_config_path)
if not api.path.exists(cq_config_path): # pragma: no cover
# Skip projects that don't have CQ set up.
continue
local_cq_config = api.luci_config.commit_queue(
project=project, local_dir=project_dir.join("luci")
)
# Make sure that the new CQ config won't try to trigger non-existent builders.
nonexistent_builders = nonexistent_cq_builders(
local_cq_config, all_live_builders
)
if nonexistent_builders:
msg = NEW_BUILDERS_IN_CQ_MSG + "".join(
["\n- %s" % b for b in nonexistent_builders]
)
raise api.step.StepFailure(msg)
live_cq_config = api.luci_config.commit_queue(project=project)
# Make sure that the existing CQ config won't try to trigger builders
# that we're deleting.
nonexistent_builders = nonexistent_cq_builders(
live_cq_config, all_local_builders
)
if nonexistent_builders:
msg = CQ_BUILDERS_DELETED_MSG + "".join(
["\n- %s" % b for b in nonexistent_builders]
)
raise api.step.StepFailure(msg)
def all_builder_names(api, project_name, bb_config):
for bucket in bb_config.buckets:
for builder in bucket.swarming.builders:
yield api.buildbucket_util.full_builder_name(
builder_pb2.BuilderID(
project=project_name,
bucket=bucket.name,
builder=builder.name,
)
)
def nonexistent_cq_builders(cq_config, all_known_builders):
"""Checks that all builders in cq_config are known to Buildbucket.
Returns the names of any CQ builders that aren't in the list of builders
known to Buildbucket.
"""
nonexistent_builders = set()
for group in cq_config.config_groups:
for builder in group.verifiers.tryjob.builders:
# Includable-only builders are only run if a special footer is in a
# change's commit message. Usage of the footer is rare enough that
# we're willing to allow errors in that case, in exchange for making
# it easier to add and remove builders from CQ.
if builder.includable_only:
continue
if builder.name not in all_known_builders:
nonexistent_builders.add(builder.name)
return sorted(nonexistent_builders)
def GenTests(api):
remote = "https://fuchsia.googlesource.com/infra/config"
def properties(**kwargs):
return api.properties(
InputProperties(
remote=remote,
starlark_paths=["main.star", "dev.star"],
generated_dir="generated",
**kwargs
)
)
nesting = "check for config deployment race conditions"
def cq_config(builders, includable_only=False):
return cq_pb2.Config(
config_groups=[
dict(
verifiers=dict(
tryjob=dict(
builders=[
dict(name=builder_name, includable_only=includable_only)
for builder_name in builders
]
)
)
)
]
)
yield (
api.buildbucket_util.test("starlark", git_repo=remote)
# Cover the logic for installing lucicfg from a checked-in CIPD ensure
# file.
+ properties(lucicfg_ensure_file="cipd.ensure")
# Cover the logic that reads builder names from cr-buildbucket.cfg.
+ api.luci_config.mock_local_config(
"fuchsia",
"cr-buildbucket.cfg",
bb_pb2.BuildbucketCfg(
buckets=[
dict(
name="try",
swarming=dict(builders=[dict(name="core.x64-asan")]),
)
]
),
nesting=nesting,
)
)
# The second file should be validated even if the first one fails.
yield (
api.buildbucket_util.test(
"first_file_invalid", tryjob=True, git_repo=remote, status="failure"
)
+ properties()
+ api.step_data("validate.main.star", retcode=1)
+ api.post_process(post_process.MustRun, "validate.dev.star")
# Make sure we patch in the triggering change, since this is a tryjob.
+ api.post_process(post_process.MustRun, "checkout.git rebase")
)
yield (
api.buildbucket_util.test(
"cq_new_builder", tryjob=True, git_repo=remote, status="failure"
)
+ properties()
+ api.luci_config.mock_local_config(
"fuchsia",
"commit-queue.cfg",
cq_config(["fuchsia/try/core.x64-asan"]),
nesting=nesting,
)
+ api.post_process(
post_process.ResultReasonRE, re.escape(NEW_BUILDERS_IN_CQ_MSG)
)
)
yield (
api.buildbucket_util.test(
"deleting_cq_builder", tryjob=True, git_repo=remote, status="failure"
)
+ properties()
+ api.luci_config.mock_config(
"fuchsia",
"commit-queue.cfg",
cq_config(["fuchsia/try/core.x64-asan"]),
nesting=nesting,
)
+ api.post_process(
post_process.ResultReasonRE, re.escape(CQ_BUILDERS_DELETED_MSG)
)
)
# It should be considered safe to delete an includable_only builder even if
# it's still referenced in commit-queue.cfg.
yield (
api.buildbucket_util.test(
"deleting_includable_only_builder", tryjob=True, git_repo=remote
)
+ properties()
+ api.luci_config.mock_config(
"fuchsia",
"commit-queue.cfg",
cq_config(["fuchsia/try/core.x64-asan"], includable_only=True),
nesting=nesting,
)
)
yield (
api.buildbucket_util.test("non_main_branch", tryjob=True)
+ properties()
+ api.buildbucket_util.tryjob_branch("non-main-branch")
# Race condition check should be skipped on branches other than "main".
+ api.post_process(
post_process.DoesNotRun, "check for config deployment race conditions"
)
)