blob: 309861c413ab5dbaf7424dd1e48a51a317624610 [file] [log] [blame]
# Copyright 2020 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Recipe for closing the tree when builders in a specified Milo console fail."""
import datetime
import hashlib
import re
import textwrap
import attr
from recipe_engine import post_process
from PB.go.chromium.org.luci.buildbucket.proto import (
build as build_pb2,
builder_common as builder_common_pb2,
builds_service as builds_service_pb2,
common as common_pb2,
)
from PB.go.chromium.org.luci.milo.api.config import project as milo_pb2
from PB.recipe_engine.result import RawResult
from PB.recipes.fuchsia.tree_closer import InputProperties
DEPS = [
"fuchsia/buildbucket_util",
"fuchsia/cipd_ensure",
"fuchsia/issuetracker",
"fuchsia/luci_config",
"fuchsia/tree_status",
"recipe_engine/buildbucket",
"recipe_engine/context",
"recipe_engine/json",
"recipe_engine/properties",
"recipe_engine/raw_io",
"recipe_engine/step",
"recipe_engine/time",
]
PROPERTIES = InputProperties
DEFAULT_GRACE_PERIOD = datetime.timedelta(hours=2)
# The format used for the "date" field in HTTP responses from requests to the
# tree status page.
OLD_TREE_STATUS_DATE_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
TREE_STATUS_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
MESSAGE_SUFFIX = " (automatic)"
def RunSteps(api, props):
now = los_angeles_now(api)
closing_disabled_reason = None
if not during_working_hours(now):
closing_disabled_reason = "outside working hours"
for date in props.disable_on_dates:
if now.date() == datetime.date.fromisoformat(date):
closing_disabled_reason = f"disabled today ({date})"
break
grace_period = (
datetime.timedelta(seconds=props.grace_period_seconds) or DEFAULT_GRACE_PERIOD
)
auto_closer_username = api.buildbucket.build.builder.builder
with api.context(infra_steps=True):
status = api.tree_status.get(props.tree_status_host)
if props.tree_name:
old_status = status
status = api.tree_status.get("", tree_name=props.tree_name)
# If the tree status was manually set, then the two apps will be out
# of sync, so copy the newer status to the other app.
# TODO(https://fxbug.dev/332741591): Remove when we no longer need
# the old app for the stats dashboards.
if old_status.message != status.message:
if old_status.date > status.date:
api.tree_status.update(
message=old_status.message,
hostname=props.tree_status_host,
admin_hostname=props.tree_status_admin_host,
tree_name=props.tree_name,
username=auto_closer_username,
state=old_status.state.value,
step_name="copy tree status to new app",
last_status=status,
update_all=False,
)
# Retrieve the newly copied status.
status = api.tree_status.get("", tree_name=props.tree_name)
else:
api.tree_status.update(
message=status.message,
hostname=props.tree_status_host,
admin_hostname=props.tree_status_admin_host,
username=auto_closer_username,
state=status.state.value,
step_name="copy tree status to old app",
last_status=old_status,
update_all=False,
)
# Under some conditions the auto-closer is guaranteed not to modify the
# tree status regardless of the console health, in which case we can
# exit early.
if status.open and closing_disabled_reason:
# If the tree is open and we're during a time that auto-closing is
# disabled, we should never close the tree.
return RawResult(
summary_markdown=closing_disabled_reason,
status=common_pb2.SUCCESS,
)
if not (status.message.endswith(MESSAGE_SUFFIX)) or (
status.username and status.username != auto_closer_username
):
# If the tree is already closed by a human we shouldn't auto-reopen.
if not status.open:
return RawResult(
summary_markdown="tree is already manually closed",
status=common_pb2.SUCCESS,
)
open_duration = api.time.utcnow() - status.date
# If the tree was manually reopened recently it shouldn't be closed
# again, to avoid stepping on build gardeners' toes.
if open_duration < grace_period:
return RawResult(
summary_markdown="tree is recently opened",
status=common_pb2.SUCCESS,
)
consoles = []
milo_cfg = api.luci_config.milo(project=props.project)
for console_name in props.console_names:
matches = [c for c in milo_cfg.consoles if c.id == console_name]
assert matches, f"no console with name {console_name!r}"
consoles.append(matches[0])
with api.step.nest("check console health"):
summary_regexp_ignore = [re.compile(r) for r in props.summary_regexp_ignore]
closure_reason = get_tree_closure_reason(
api, consoles, props.rules, summary_regexp_ignore
)
if not closure_reason:
if not status.open:
api.tree_status.update(
message="Tree is open" + MESSAGE_SUFFIX,
hostname=props.tree_status_host,
admin_hostname=props.tree_status_admin_host,
tree_name=props.tree_name,
username=auto_closer_username,
state="OPEN",
step_name="open tree",
last_status=status,
)
# TODO(fxbug.dev/97321): Also close the tree closure bug when
# reopening the tree.
# Builders are green enough, so no need to close the tree.
return RawResult(
summary_markdown="console is healthy", status=common_pb2.SUCCESS
)
# Log the bug title so someone looking at the build results page can
# understand why the builder is closing the tree.
api.step.empty(
closure_reason.bug_title(), status=api.step.FAILURE, raise_on_failure=False
)
# The console is unhealthy but the tree is already closed, so no action
# is necessary.
if not status.open:
return RawResult(
summary_markdown=f"{closure_reason.bug_title()} (tree is already closed)",
status=common_pb2.SUCCESS,
)
try:
culprit_ranking_text = run_autogardener(
api, [b.id for b in closure_reason.failed_builds()]
)
except api.step.StepFailure: # pragma: no cover
# TODO(olivernewman): Raise autogardener exceptions at the end of
# the build after closing the tree.
culprit_ranking_text = ""
with api.step.nest("emit tree_closing_builders") as presentation:
presentation.properties["tree_closing_builders"] = sorted(
{b.builder.builder for b in closure_reason.failed_builds()}
)
bug_link = file_bug(
api, closure_reason, props.bug_component_id, culprit_ranking_text
)
message = f"Tree is closed: {bug_link}"
api.tree_status.update(
message=message + MESSAGE_SUFFIX,
hostname=props.tree_status_host,
admin_hostname=props.tree_status_admin_host,
tree_name=props.tree_name,
username=auto_closer_username,
state="CLOSED",
# Trim the URL to just the ID.
issuetracker_id=bug_link.replace("https://fxbug.dev/", ""),
step_name="close tree",
last_status=status,
)
return RawResult(
summary_markdown=closure_reason.bug_title(), status=common_pb2.FAILURE
)
class TreeClosureReason: # pragma: no cover
def bug_title(self):
raise NotImplementedError()
def bug_description(self):
"""Returns the description of the bug to file."""
raise NotImplementedError()
def failed_builds(self):
"""Failed Buildbucket builds that triggered the closure.
Returns: list of build.proto.
"""
raise NotImplementedError()
@attr.s
class ReasonConsecutiveFailures(TreeClosureReason):
_builder = attr.ib(type=str)
_failed_builds = attr.ib(type=[build_pb2.Build])
def bug_title(self):
return f"{self._builder} failed {len(self._failed_builds)} times in a row"
def bug_description(self):
description = f""" The tree was closed because {self._builder} failed {len(self._failed_builds)} times in a row.
Full builder history: {builder_link(self._failed_builds[0].builder, limit=200)}
Failed builds:
"""
lines = textwrap.dedent(description).splitlines()
for build in self._failed_builds:
lines.append(f"- {build_link(build)}")
if build.summary_markdown:
lines.extend(formatted_summary_lines(build))
return "\n".join(lines)
def failed_builds(self):
return self._failed_builds
@attr.s
class ReasonConcurrentFailures(TreeClosureReason):
_failed_builds_by_builder = attr.ib(type={str: build_pb2.Build})
def bug_title(self):
return f"{len(self._failed_builds_by_builder)} builders are failing"
def bug_description(self):
description = f""" The tree was closed because {len(self._failed_builds_by_builder)} builders are failing.
Failing builders:
"""
lines = textwrap.dedent(description).splitlines()
for builder, latest_build in sorted(self._failed_builds_by_builder.items()):
lines.append(f"- {builder}: {build_link(latest_build)}")
if latest_build.summary_markdown:
lines.extend(formatted_summary_lines(latest_build))
return "\n".join(lines)
def failed_builds(self):
return self._failed_builds_by_builder.values()
def build_link(build):
return f"{builder_link(build.builder)}/b{int(build.id)}"
def builder_link(builder, limit=None):
url = f"https://ci.chromium.org/p/{builder.project}/builders/{builder.bucket}/{builder.builder}"
if limit:
url += f"?limit={int(limit)}"
return url
def formatted_summary_lines(build):
lines = [""]
lines.extend(" > " + line for line in build.summary_markdown.split("\n"))
lines.append("")
return lines
def get_tree_closure_reason(api, consoles, rules, summary_regexp_ignore):
"""Determines if and why the tree should be closed.
Args:
consoles (seq of milo_pb2.Console): Milo consoles to check the builders of.
rules (seq of Rule proto): The conditions under which the tree should
be closed.
summary_regexp_ignore (seq of re.Pattern): Any build whose summary matches
one of these regexes will not be considered for the purpose of
closing the tree.
Returns:
A TreeClosureReason if the tree should be closed, otherwise None.
"""
failing_builders = {}
concurrent_failures_rules = [r for r in rules if r.concurrent_failures > 0]
consecutive_failures_rules = [r for r in rules if r.consecutive_failures > 0]
# Search back in history by the maximum number of builds required by any
# rule. If no rule specifies a `consecutive_failures` value, then we only
# need to check the most recent build of each builder for each concurrent
# failure rule.
if consecutive_failures_rules:
count_to_check = max(r.consecutive_failures for r in consecutive_failures_rules)
else:
count_to_check = 1
# Fetch more builds than we actually need in case some match one of
# `summary_regexp_ignore`.
count_to_fetch = max(5, count_to_check * 2)
# TODO(olivernewman): Combine the search requests into a single batch RPC
# request rather than doing them serially. This will likely be
# significantly more efficient.
for console in consoles:
for builder in console.builders:
builds = last_n_builds(api, builder, count_to_fetch)
# Rules only apply to builds that failed, so no need to even iterate
# over the rules if all of its builds passed.
if all(build.status == common_pb2.SUCCESS for build in builds):
continue
# The buildbucket API is supposed to return builds newest-first, but
# we'll sort again here just to be safe.
builds.sort(key=lambda b: b.start_time.seconds, reverse=True)
builds = [
b
for b in builds
if not any(r.search(b.summary_markdown) for r in summary_regexp_ignore)
][:count_to_check]
if not builds:
# Either builder has no history, or all of its recent builds match
# one of summary_regexp_ignore.
continue
builder_name = builder.name.split("/")[-1]
latest_build = builds[0]
# Likewise, if a builder's most recent build passed, then there's no
# need to close the tree for this builder.
if latest_build.status == common_pb2.SUCCESS:
continue
builder_name = builder.name.split("/")[-1]
failing_builders[builder_name] = latest_build
for rule in consecutive_failures_rules:
if (
rule.builders_to_check
and builder_name not in rule.builders_to_check
):
# We're only checking this rule for a subset of builders, and
# this builder is not in that subset.
continue
if len(builds) < rule.consecutive_failures:
# This builder doesn't yet have enough history to determine whether
# it should be closed.
continue
builds_to_check = builds[: rule.consecutive_failures]
rule_matched = True
for build in builds_to_check:
if build.status == common_pb2.SUCCESS:
rule_matched = False
break
failed_step = find_failed_step_match(build, rule.failed_step_regexp)
if not failed_step:
rule_matched = False
break
if rule_matched:
return ReasonConsecutiveFailures(
builder=builder_name,
failed_builds=builds_to_check,
)
for rule in concurrent_failures_rules:
matched_builders = {}
# We only consider the most recent build of each builder when checking
# for concurrently failing builders.
for builder, latest_build in failing_builders.items():
failed_step = find_failed_step_match(latest_build, rule.failed_step_regexp)
if failed_step:
matched_builders[builder] = latest_build
if len(matched_builders) >= rule.concurrent_failures:
return ReasonConcurrentFailures(matched_builders)
return None
def find_failed_step_match(build, step_regex):
for step in build.steps:
if step.status != common_pb2.SUCCESS and re.search(step_regex, step.name):
return step
return None
def last_n_builds(api, console_builder, n):
# builder.name is of the form
# "buildbucket/luci.{project}.{bucket}/{builder}".
_, full_bucket, builder_name = console_builder.name.split("/")
_, project, bucket = full_bucket.split(".", 2)
predicate = builds_service_pb2.BuildPredicate(
builder=builder_common_pb2.BuilderID(
builder=builder_name,
bucket=bucket,
project=project,
),
status=common_pb2.ENDED_MASK, # Include only completed builds.
)
fields = api.buildbucket.DEFAULT_FIELDS.union({"steps", "summary_markdown"})
builds = api.buildbucket.search(
predicate, limit=n, step_name=builder_name, fields=fields
)
return builds
def run_autogardener(api, build_ids):
"""Given IDs of failed buildbucket builds, run culprit analysis.
Args:
build_ids (seq of int): List of failing build IDs.
Returns: A markdown-formatting string containing a ranking of potential
culprit changes.
"""
exe = api.cipd_ensure(
api.resource("cipd.ensure"),
"fuchsia/infra/autogardener/${platform}",
)
cmd = [
exe,
"culprit",
"-json-output",
api.json.output(),
]
cmd.extend(build_ids)
return api.step(
"find culprits",
cmd,
step_test_data=lambda: api.json.test_api.output(
{"markdown_output": "1. foo\n2. bar"}
),
timeout=datetime.timedelta(minutes=10),
).json.output["markdown_output"]
def file_bug(api, closure_reason, bug_component_id, culprit_ranking):
"""Every tree closure needs a tracking bug linked from the tree status.
Returns:
The bug link.
"""
description = closure_reason.bug_description()
if culprit_ranking:
description += "\n\nCulprit analysis:\n\n" + culprit_ranking
return api.issuetracker.file_bug(
"create issuetracker bug",
closure_reason.bug_title(),
description,
component_id=bug_component_id,
)
def los_angeles_now(api):
# The `time` recipe module and Python's stdlib don't have good timezone
# support, so shell out to unix `date` to get the timezone-aware time.
with api.context(env={"TZ": "America/Los_Angeles"}):
step = api.step(
"get current time",
["date", "--iso-8601=minutes"],
stdout=api.raw_io.output_text(add_output_log=True),
step_test_data=lambda: api.raw_io.test_api.stream_output_text(
"2021-12-01T09:53"
),
)
return datetime.datetime.fromisoformat(step.stdout.strip())
def during_working_hours(now):
"""Checks if the current time is within West Coast working hours.
Working hours are 9 AM to 5 PM, Monday-Friday.
"""
return 0 <= now.weekday() <= 4 and 9 <= now.hour < 17
def GenTests(api):
def pseudo_random_build_id(builder_string):
return int(int(hashlib.sha256(builder_string.encode()).hexdigest(), 16) % 1e8)
builder_name = "auto-closer"
def test(
name,
builder_history=None,
should_check_tree=True,
should_check_console=True,
should_close_tree=False,
should_open_tree=False,
rules=None,
summary_regexp_ignore=(),
tree_status="OPEN",
tree_status_user="human@example.com",
tree_status_age=DEFAULT_GRACE_PERIOD + datetime.timedelta(minutes=1),
disable_on_dates=(),
bug_component_id=None,
tree_name=None,
):
"""Create a test case for running this recipe.
Args:
name (str): Name of the test case.
builder_history (dict): Mapping from builder base name to a list
of build status strings like "SUCCESS", corresponding to the
builder's most recent builds in reverse chronological order
(latest builds first).
should_check_console (bool): Whether the recipe is expected to
check the current tree status.
should_check_console (bool): Whether the recipe is expected to
query luci-config and Buildbucket for builders and build results.
should_close_tree (bool): Whether the recipe is expected to close
the tree. Ignored if should_check_console is False.
should_open_tree (bool): Whether the recipe is expected to open
the tree. Ignored if should_check_console is False.
rules (seq of Rule): Passed as a recipe property.
summary_regexp_ignore (seq of str): Passed as a recipe property.
tree_status (str): Mock current status for the tree.
tree_status_user (str): Username associated with the last tree
status.
tree_status_age (str): Mocked duration since the last tree status
update.
disable_on_dates (seq of str): Passed as a recipe property.
"""
if not builder_history:
builder_history = {}
if rules is None:
rules = [
dict(concurrent_failures=2, consecutive_failures=2),
]
project = "fuchsia"
bucket = "global.ci"
builder_name = "auto-closer"
res = api.buildbucket_util.test(
name,
status="FAILURE" if should_close_tree else "SUCCESS",
project=project,
builder=builder_name,
) + api.properties(
console_names=["global_ci", "another_ci"],
rules=rules,
summary_regexp_ignore=list(summary_regexp_ignore),
tree_status_host="example.com",
tree_status_admin_host="admin.example.com",
bug_component_id=bug_component_id,
disable_on_dates=list(disable_on_dates),
tree_name=tree_name,
)
if not should_check_tree:
return res
now = 1592654400
res += api.time.seed(now)
status_date = datetime.datetime.utcfromtimestamp(now) - tree_status_age
old_app_more_recent = tree_status_user != builder_name
old_status = tree_status
if tree_name:
old_status = "CLOSED" if tree_status == "OPEN" else "OPEN"
res += api.step_data(
"get current tree status",
stdout=api.json.output(
{
"general_state": (
tree_status.lower()
if old_app_more_recent
else old_status.lower()
),
"username": tree_status_user,
"key": 12345,
"date": status_date.strftime(OLD_TREE_STATUS_DATE_FORMAT),
"message": f"Tree is {tree_status if old_app_more_recent else old_status}{MESSAGE_SUFFIX}",
}
),
)
if tree_name:
if old_app_more_recent:
status_date -= datetime.timedelta(seconds=5)
else:
status_date += datetime.timedelta(seconds=5)
res += api.step_data(
"get current tree status (2)",
stdout=api.json.output(
{
"generalState": (
old_status if old_app_more_recent else tree_status
),
"name": f"trees/{tree_name}/status/12345",
"createTime": status_date.strftime(TREE_STATUS_DATE_FORMAT),
"message": f"Tree is {old_status if old_app_more_recent else tree_status}{MESSAGE_SUFFIX}",
}
),
)
if old_app_more_recent:
res += api.step_data(
"check for tree status collision",
stdout=api.json.output(
{
"generalState": old_status,
"name": f"trees/{tree_name}/status/12345",
"createTime": status_date.strftime(TREE_STATUS_DATE_FORMAT),
"message": f"Tree is {old_status}{MESSAGE_SUFFIX}",
}
),
)
res += api.step_data(
"get current tree status (3)",
stdout=api.json.output(
{
"generalState": tree_status,
"name": f"trees/{tree_name}/status/12345",
"createTime": status_date.strftime(TREE_STATUS_DATE_FORMAT),
"message": f"Tree is {tree_status}{MESSAGE_SUFFIX}",
}
),
)
else:
res += api.step_data(
"check for tree status collision",
stdout=api.json.output(
{
"general_state": old_status.lower(),
"username": tree_status_user,
"key": 12345,
"date": (
status_date - datetime.timedelta(seconds=5)
).strftime(OLD_TREE_STATUS_DATE_FORMAT),
"message": f"Tree is {old_status}{MESSAGE_SUFFIX}",
}
),
)
if should_close_tree:
res += api.step_data(
"check for tree status collision (2)",
stdout=api.json.output(
{
"generalState": tree_status,
"name": f"trees/{tree_name}/status/12345",
"createTime": status_date.strftime(TREE_STATUS_DATE_FORMAT),
"message": f"Tree is {tree_status}{MESSAGE_SUFFIX}",
}
),
)
if should_check_console:
res += api.post_process(post_process.MustRun, "check console health")
else:
res += api.post_process(post_process.DoesNotRun, "check console health")
return res
checker = post_process.MustRun if should_close_tree else post_process.DoesNotRun
res += api.post_process(checker, "create issuetracker bug")
res += api.post_process(checker, "close tree")
res += api.post_process(
post_process.MustRun if should_open_tree else post_process.DoesNotRun,
"open tree",
)
console_builders = []
for builder in builder_history:
console_builders.append(
milo_pb2.Builder(name=f"buildbucket/luci.{project}.{bucket}/{builder}")
)
milo_cfg = milo_pb2.Project(
consoles=[
milo_pb2.Console(
id="global_ci",
name="global integration ci",
repo_url="https://fuchsia.googlesource.com/integration",
refs=["regexp:refs/heads/main"],
# Split builders between the two consoles to test the
# multi-console logic.
builders=[console_builders[0]],
),
milo_pb2.Console(
id="another_ci",
name="another ci",
repo_url="https://fuchsia.googlesource.com/other-repo",
refs=["regexp:refs/heads/main"],
builders=console_builders[1:],
),
]
)
res += api.luci_config.mock_config(project, "luci-milo.cfg", milo_cfg)
# History is a list of builder statuses, most recent first.
for builder, history in builder_history.items():
search_results = []
for i, build in enumerate(history):
bp = build.builder
bp.builder, bp.bucket, bp.project = builder, bucket, project
build.id = pseudo_random_build_id(builder + str(i))
build.start_time.seconds = int(
now - datetime.timedelta(days=i).total_seconds()
)
search_results.append(build)
res += api.buildbucket.simulated_search_results(
search_results,
step_name=f"check console health.{builder}",
)
return res
def _build(build_status, steps=None, summary_markdown=""):
"""Returns a Build proto for mocking buildbucket.search requests.
Args:
build_status (str): A string corresponding to a common_pb2.Status
value.
steps (dict): A mapping from step names (with nestings indicated
by | pipes) to status strings, also corresponding to
common_pb2.Status values.
summary_markdown (str): The summary to set for the build.
"""
steps = steps or {}
b = api.buildbucket.ci_build_message(status=build_status)
b.summary_markdown = summary_markdown
for step_name, status in steps.items():
step = b.steps.add()
step.name = step_name
step.status = common_pb2.Status.Value(status)
return b
def success(**kwargs):
"""Returns a Build proto with a successful status."""
return _build("SUCCESS", **kwargs)
def failure(summary_markdown="5 tests failed", **kwargs):
"""Returns a Build proto with a failure status."""
return _build("FAILURE", summary_markdown=summary_markdown, **kwargs)
# As long as the most recent build for each builder is green, we shouldn't
# close the tree.
yield test(
name="all_green",
builder_history={
"core.arm64-asan": [success(), failure()],
"core.x64-asan": [success(), failure()],
},
# Non-current dates should be ignored.
disable_on_dates=["2019-05-06"],
)
# If any builder's most recent `consecutive_failures` builds have all failed,
# close the tree.
yield test(
name="consecutive_failures",
builder_history={
"core.arm64-asan": [failure(steps={"foo": "FAILURE"}), success()],
"core.x64-asan": [
failure(steps={"foo": "FAILURE"}),
failure(steps={"bar": "SUCCESS", "foo": "FAILURE"}),
],
},
rules=[
dict(
consecutive_failures=2,
failed_step_regexp="foo",
builders_to_check=["core.x64-asan"],
)
],
should_close_tree=True,
bug_component_id=12345,
tree_name="fuchsia-stem",
)
# Even if there are consecutive failures, ignore them if we're only checking
# the rule for a subset of builders, and the failing builder is not in that
# subset.
yield test(
name="ignored_consecutive_failures",
builder_history={
"core.arm64-asan": [failure(steps={"foo": "FAILURE"}), success()],
"core.x64-asan": [
failure(steps={"foo": "FAILURE"}),
failure(steps={"bar": "SUCCESS", "foo": "FAILURE"}),
],
},
rules=[
dict(
consecutive_failures=2,
failed_step_regexp="foo",
builders_to_check=["core.arm64-asan"],
)
],
should_close_tree=False,
)
yield test(
name="summary_regexp_ignore",
builder_history={
"core.x64-asan": [
failure(
summary_markdown="step failed: checkout.jiri update",
steps={"foo": "FAILURE"},
),
],
},
rules=[dict(consecutive_failures=1)],
summary_regexp_ignore=["checkout"],
should_close_tree=False,
)
# Even if a builder has failed more than `consecutive_failures` times in a
# row, don't close the tree unless all the failed builds have a failed step
# matching `failed_step_regexp`.
yield test(
name="consecutive_failures_no_matched_step",
builder_history={
"core.x64-asan": [
failure(steps={"foo": "FAILURE"}),
failure(steps={"bar": "FAILURE"}),
]
},
rules=[dict(consecutive_failures=2, failed_step_regexp="foo")],
should_close_tree=False,
)
# If `concurrent_failures` or more builders' most recent builds have all
# failed, close the tree.
yield test(
name="concurrent_failures",
builder_history={
"bringup.arm64-asan": [success()],
"core.arm64-asan": [failure(steps={"foo": "FAILURE"})],
"core.x64-asan": [failure(steps={"bar": "SUCCESS", "foo|x": "FAILURE"})],
},
rules=[dict(concurrent_failures=2, failed_step_regexp="foo")],
should_close_tree=True,
)
# Even if `concurrent_failures` or more builders' most recent builds have
# all failed, don't close the tree if they don't all have a failed step
# matching `failed_step_regexp`.
yield test(
name="concurrent_failures_no_matched_step",
builder_history={
"bringup.arm64-asan": [failure()],
"core.arm64-asan": [failure(steps={"bar": "FAILURE"})],
# Only this builder has a failed step matching `failed_step_regexp`.
"core.x64-asan": [failure(steps={"bar": "SUCCESS", "foo|x": "FAILURE"})],
},
rules=[dict(concurrent_failures=2, failed_step_regexp="foo")],
should_close_tree=False,
)
# The tree status shouldn't be changed if it was only recently opened.
yield test(
name="tree_recently_opened",
tree_status_age=DEFAULT_GRACE_PERIOD - datetime.timedelta(minutes=1),
should_check_console=False,
)
# The build should exit early if the tree is already closed by a human.
yield test(
name="tree_closed_by_human", tree_status="CLOSED", should_check_console=False
)
# We should not take any action if the tree is already auto-closed and the
# console remains unhealthy.
yield test(
name="tree_already_closed",
tree_status="CLOSED",
builder_history={
"core.x64-asan": [
failure(steps={"foo": "FAILURE"}),
failure(steps={"bar": "FAILURE"}),
]
},
rules=[dict(consecutive_failures=2)],
tree_status_user=builder_name,
should_close_tree=False,
tree_name="fuchsia",
)
# The tree should be reopened if it is currently auto-closed and the console
# is healthy.
yield test(
name="reopen",
tree_status="CLOSED",
tree_status_user=builder_name,
builder_history={
"bringup.arm64-asan": [success()],
"core.arm64-asan": [success()],
},
should_open_tree=True,
)
# Builders with no builds shouldn't affect the tree status.
yield test(
name="throttled__no_history",
# If the tree is "throttled" then it's open, so the closer should still
# check the console for failures.
tree_status="THROTTLED",
builder_history={"core.arm64-asan": []},
should_close_tree=False,
)
# Builders with fewer than consecutive_failures builds shouldn't affect the
# tree status.
yield test(
name="not_enough_history",
builder_history={"core.arm64-asan": [failure()]},
should_close_tree=False,
)
weekday_nighttime = "2021-12-03T23:53" # Friday at 11:53 PM
weekend_daytime = "2021-12-04T11:01" # Saturday at 11:01 AM
# Tree closer should exit early if the time is outside West Coast working
# hours and there are no off-hour rules configured.
yield test("weekend", should_check_tree=False) + api.step_data(
"get current time", api.raw_io.stream_output_text(weekend_daytime)
)
yield test("nighttime", should_check_tree=False) + api.step_data(
"get current time", api.raw_io.stream_output_text(weekday_nighttime)
)
# Tree closer should reopen the tree after an automatic closure, even if
# it's outside working hours.
yield (
test(
"nighttime_reopen_tree",
should_check_tree=True,
tree_status="CLOSED",
tree_status_user=builder_name,
builder_history={
"bringup.arm64-asan": [success()],
"core.arm64-asan": [success()],
},
should_open_tree=True,
)
+ api.step_data(
"get current time", api.raw_io.stream_output_text(weekday_nighttime)
)
)
yield test(
"disable_dates", should_check_tree=False, disable_on_dates=["2021-12-01"]
) + api.step_data(
"get current time", api.raw_io.stream_output_text("2021-12-01T12:00")
)