| # Copyright 2020 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Recipe for closing the tree when builders in a specified Milo console fail.""" |
| |
| import datetime |
| import hashlib |
| import re |
| import textwrap |
| |
| import attr |
| |
| from recipe_engine import post_process |
| |
| from PB.go.chromium.org.luci.buildbucket.proto import ( |
| build as build_pb2, |
| builder_common as builder_common_pb2, |
| builds_service as builds_service_pb2, |
| common as common_pb2, |
| ) |
| from PB.go.chromium.org.luci.milo.api.config import project as milo_pb2 |
| from PB.recipe_engine.result import RawResult |
| from PB.recipes.fuchsia.tree_closer import InputProperties |
| |
| DEPS = [ |
| "fuchsia/buildbucket_util", |
| "fuchsia/cipd_ensure", |
| "fuchsia/issuetracker", |
| "fuchsia/luci_config", |
| "fuchsia/tree_status", |
| "recipe_engine/buildbucket", |
| "recipe_engine/context", |
| "recipe_engine/json", |
| "recipe_engine/properties", |
| "recipe_engine/raw_io", |
| "recipe_engine/step", |
| "recipe_engine/time", |
| ] |
| |
| PROPERTIES = InputProperties |
| |
| DEFAULT_GRACE_PERIOD = datetime.timedelta(hours=2) |
| |
| # The format used for the "date" field in HTTP responses from requests to the |
| # tree status page. |
| OLD_TREE_STATUS_DATE_FORMAT = "%Y-%m-%d %H:%M:%S.%f" |
| TREE_STATUS_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" |
| |
| MESSAGE_SUFFIX = " (automatic)" |
| |
| |
| def RunSteps(api, props): |
| now = los_angeles_now(api) |
| |
| closing_disabled_reason = None |
| |
| if not during_working_hours(now): |
| closing_disabled_reason = "outside working hours" |
| |
| for date in props.disable_on_dates: |
| if now.date() == datetime.date.fromisoformat(date): |
| closing_disabled_reason = f"disabled today ({date})" |
| break |
| |
| grace_period = ( |
| datetime.timedelta(seconds=props.grace_period_seconds) or DEFAULT_GRACE_PERIOD |
| ) |
| |
| auto_closer_username = api.buildbucket.build.builder.builder |
| |
| with api.context(infra_steps=True): |
| status = api.tree_status.get(props.tree_status_host) |
| if props.tree_name: |
| old_status = status |
| status = api.tree_status.get("", tree_name=props.tree_name) |
| # If the tree status was manually set, then the two apps will be out |
| # of sync, so copy the newer status to the other app. |
| # TODO(https://fxbug.dev/332741591): Remove when we no longer need |
| # the old app for the stats dashboards. |
| if old_status.message != status.message: |
| if old_status.date > status.date: |
| api.tree_status.update( |
| message=old_status.message, |
| hostname=props.tree_status_host, |
| admin_hostname=props.tree_status_admin_host, |
| tree_name=props.tree_name, |
| username=auto_closer_username, |
| state=old_status.state.value, |
| step_name="copy tree status to new app", |
| last_status=status, |
| update_all=False, |
| ) |
| # Retrieve the newly copied status. |
| status = api.tree_status.get("", tree_name=props.tree_name) |
| else: |
| api.tree_status.update( |
| message=status.message, |
| hostname=props.tree_status_host, |
| admin_hostname=props.tree_status_admin_host, |
| username=auto_closer_username, |
| state=status.state.value, |
| step_name="copy tree status to old app", |
| last_status=old_status, |
| update_all=False, |
| ) |
| |
| # Under some conditions the auto-closer is guaranteed not to modify the |
| # tree status regardless of the console health, in which case we can |
| # exit early. |
| |
| if status.open and closing_disabled_reason: |
| # If the tree is open and we're during a time that auto-closing is |
| # disabled, we should never close the tree. |
| return RawResult( |
| summary_markdown=closing_disabled_reason, |
| status=common_pb2.SUCCESS, |
| ) |
| |
| if not (status.message.endswith(MESSAGE_SUFFIX)) or ( |
| status.username and status.username != auto_closer_username |
| ): |
| # If the tree is already closed by a human we shouldn't auto-reopen. |
| if not status.open: |
| return RawResult( |
| summary_markdown="tree is already manually closed", |
| status=common_pb2.SUCCESS, |
| ) |
| open_duration = api.time.utcnow() - status.date |
| # If the tree was manually reopened recently it shouldn't be closed |
| # again, to avoid stepping on build gardeners' toes. |
| if open_duration < grace_period: |
| return RawResult( |
| summary_markdown="tree is recently opened", |
| status=common_pb2.SUCCESS, |
| ) |
| |
| consoles = [] |
| milo_cfg = api.luci_config.milo(project=props.project) |
| for console_name in props.console_names: |
| matches = [c for c in milo_cfg.consoles if c.id == console_name] |
| assert matches, f"no console with name {console_name!r}" |
| consoles.append(matches[0]) |
| |
| with api.step.nest("check console health"): |
| summary_regexp_ignore = [re.compile(r) for r in props.summary_regexp_ignore] |
| closure_reason = get_tree_closure_reason( |
| api, consoles, props.rules, summary_regexp_ignore |
| ) |
| |
| if not closure_reason: |
| if not status.open: |
| api.tree_status.update( |
| message="Tree is open" + MESSAGE_SUFFIX, |
| hostname=props.tree_status_host, |
| admin_hostname=props.tree_status_admin_host, |
| tree_name=props.tree_name, |
| username=auto_closer_username, |
| state="OPEN", |
| step_name="open tree", |
| last_status=status, |
| ) |
| # TODO(fxbug.dev/97321): Also close the tree closure bug when |
| # reopening the tree. |
| |
| # Builders are green enough, so no need to close the tree. |
| return RawResult( |
| summary_markdown="console is healthy", status=common_pb2.SUCCESS |
| ) |
| |
| # Log the bug title so someone looking at the build results page can |
| # understand why the builder is closing the tree. |
| api.step.empty( |
| closure_reason.bug_title(), status=api.step.FAILURE, raise_on_failure=False |
| ) |
| |
| # The console is unhealthy but the tree is already closed, so no action |
| # is necessary. |
| if not status.open: |
| return RawResult( |
| summary_markdown=f"{closure_reason.bug_title()} (tree is already closed)", |
| status=common_pb2.SUCCESS, |
| ) |
| |
| try: |
| culprit_ranking_text = run_autogardener( |
| api, [b.id for b in closure_reason.failed_builds()] |
| ) |
| except api.step.StepFailure: # pragma: no cover |
| # TODO(olivernewman): Raise autogardener exceptions at the end of |
| # the build after closing the tree. |
| culprit_ranking_text = "" |
| |
| with api.step.nest("emit tree_closing_builders") as presentation: |
| presentation.properties["tree_closing_builders"] = sorted( |
| {b.builder.builder for b in closure_reason.failed_builds()} |
| ) |
| |
| bug_link = file_bug( |
| api, closure_reason, props.bug_component_id, culprit_ranking_text |
| ) |
| |
| message = f"Tree is closed: {bug_link}" |
| api.tree_status.update( |
| message=message + MESSAGE_SUFFIX, |
| hostname=props.tree_status_host, |
| admin_hostname=props.tree_status_admin_host, |
| tree_name=props.tree_name, |
| username=auto_closer_username, |
| state="CLOSED", |
| # Trim the URL to just the ID. |
| issuetracker_id=bug_link.replace("https://fxbug.dev/", ""), |
| step_name="close tree", |
| last_status=status, |
| ) |
| |
| return RawResult( |
| summary_markdown=closure_reason.bug_title(), status=common_pb2.FAILURE |
| ) |
| |
| |
| class TreeClosureReason: # pragma: no cover |
| def bug_title(self): |
| raise NotImplementedError() |
| |
| def bug_description(self): |
| """Returns the description of the bug to file.""" |
| raise NotImplementedError() |
| |
| def failed_builds(self): |
| """Failed Buildbucket builds that triggered the closure. |
| |
| Returns: list of build.proto. |
| """ |
| raise NotImplementedError() |
| |
| |
| @attr.s |
| class ReasonConsecutiveFailures(TreeClosureReason): |
| _builder = attr.ib(type=str) |
| _failed_builds = attr.ib(type=[build_pb2.Build]) |
| |
| def bug_title(self): |
| return f"{self._builder} failed {len(self._failed_builds)} times in a row" |
| |
| def bug_description(self): |
| description = f""" The tree was closed because {self._builder} failed {len(self._failed_builds)} times in a row. |
| |
| Full builder history: {builder_link(self._failed_builds[0].builder, limit=200)} |
| |
| Failed builds: |
| |
| """ |
| lines = textwrap.dedent(description).splitlines() |
| |
| for build in self._failed_builds: |
| lines.append(f"- {build_link(build)}") |
| if build.summary_markdown: |
| lines.extend(formatted_summary_lines(build)) |
| |
| return "\n".join(lines) |
| |
| def failed_builds(self): |
| return self._failed_builds |
| |
| |
| @attr.s |
| class ReasonConcurrentFailures(TreeClosureReason): |
| _failed_builds_by_builder = attr.ib(type={str: build_pb2.Build}) |
| |
| def bug_title(self): |
| return f"{len(self._failed_builds_by_builder)} builders are failing" |
| |
| def bug_description(self): |
| description = f""" The tree was closed because {len(self._failed_builds_by_builder)} builders are failing. |
| |
| Failing builders: |
| |
| """ |
| lines = textwrap.dedent(description).splitlines() |
| for builder, latest_build in sorted(self._failed_builds_by_builder.items()): |
| lines.append(f"- {builder}: {build_link(latest_build)}") |
| if latest_build.summary_markdown: |
| lines.extend(formatted_summary_lines(latest_build)) |
| return "\n".join(lines) |
| |
| def failed_builds(self): |
| return self._failed_builds_by_builder.values() |
| |
| |
| def build_link(build): |
| return f"{builder_link(build.builder)}/b{int(build.id)}" |
| |
| |
| def builder_link(builder, limit=None): |
| url = f"https://ci.chromium.org/p/{builder.project}/builders/{builder.bucket}/{builder.builder}" |
| if limit: |
| url += f"?limit={int(limit)}" |
| return url |
| |
| |
| def formatted_summary_lines(build): |
| lines = [""] |
| lines.extend(" > " + line for line in build.summary_markdown.split("\n")) |
| lines.append("") |
| return lines |
| |
| |
| def get_tree_closure_reason(api, consoles, rules, summary_regexp_ignore): |
| """Determines if and why the tree should be closed. |
| |
| Args: |
| consoles (seq of milo_pb2.Console): Milo consoles to check the builders of. |
| rules (seq of Rule proto): The conditions under which the tree should |
| be closed. |
| summary_regexp_ignore (seq of re.Pattern): Any build whose summary matches |
| one of these regexes will not be considered for the purpose of |
| closing the tree. |
| |
| Returns: |
| A TreeClosureReason if the tree should be closed, otherwise None. |
| """ |
| failing_builders = {} |
| |
| concurrent_failures_rules = [r for r in rules if r.concurrent_failures > 0] |
| consecutive_failures_rules = [r for r in rules if r.consecutive_failures > 0] |
| |
| # Search back in history by the maximum number of builds required by any |
| # rule. If no rule specifies a `consecutive_failures` value, then we only |
| # need to check the most recent build of each builder for each concurrent |
| # failure rule. |
| if consecutive_failures_rules: |
| count_to_check = max(r.consecutive_failures for r in consecutive_failures_rules) |
| else: |
| count_to_check = 1 |
| |
| # Fetch more builds than we actually need in case some match one of |
| # `summary_regexp_ignore`. |
| count_to_fetch = max(5, count_to_check * 2) |
| |
| # TODO(olivernewman): Combine the search requests into a single batch RPC |
| # request rather than doing them serially. This will likely be |
| # significantly more efficient. |
| for console in consoles: |
| for builder in console.builders: |
| builds = last_n_builds(api, builder, count_to_fetch) |
| # Rules only apply to builds that failed, so no need to even iterate |
| # over the rules if all of its builds passed. |
| if all(build.status == common_pb2.SUCCESS for build in builds): |
| continue |
| |
| # The buildbucket API is supposed to return builds newest-first, but |
| # we'll sort again here just to be safe. |
| builds.sort(key=lambda b: b.start_time.seconds, reverse=True) |
| |
| builds = [ |
| b |
| for b in builds |
| if not any(r.search(b.summary_markdown) for r in summary_regexp_ignore) |
| ][:count_to_check] |
| if not builds: |
| # Either builder has no history, or all of its recent builds match |
| # one of summary_regexp_ignore. |
| continue |
| |
| builder_name = builder.name.split("/")[-1] |
| latest_build = builds[0] |
| |
| # Likewise, if a builder's most recent build passed, then there's no |
| # need to close the tree for this builder. |
| if latest_build.status == common_pb2.SUCCESS: |
| continue |
| |
| builder_name = builder.name.split("/")[-1] |
| failing_builders[builder_name] = latest_build |
| |
| for rule in consecutive_failures_rules: |
| if ( |
| rule.builders_to_check |
| and builder_name not in rule.builders_to_check |
| ): |
| # We're only checking this rule for a subset of builders, and |
| # this builder is not in that subset. |
| continue |
| |
| if len(builds) < rule.consecutive_failures: |
| # This builder doesn't yet have enough history to determine whether |
| # it should be closed. |
| continue |
| |
| builds_to_check = builds[: rule.consecutive_failures] |
| rule_matched = True |
| for build in builds_to_check: |
| if build.status == common_pb2.SUCCESS: |
| rule_matched = False |
| break |
| failed_step = find_failed_step_match(build, rule.failed_step_regexp) |
| if not failed_step: |
| rule_matched = False |
| break |
| |
| if rule_matched: |
| return ReasonConsecutiveFailures( |
| builder=builder_name, |
| failed_builds=builds_to_check, |
| ) |
| |
| for rule in concurrent_failures_rules: |
| matched_builders = {} |
| # We only consider the most recent build of each builder when checking |
| # for concurrently failing builders. |
| for builder, latest_build in failing_builders.items(): |
| failed_step = find_failed_step_match(latest_build, rule.failed_step_regexp) |
| if failed_step: |
| matched_builders[builder] = latest_build |
| |
| if len(matched_builders) >= rule.concurrent_failures: |
| return ReasonConcurrentFailures(matched_builders) |
| |
| return None |
| |
| |
| def find_failed_step_match(build, step_regex): |
| for step in build.steps: |
| if step.status != common_pb2.SUCCESS and re.search(step_regex, step.name): |
| return step |
| return None |
| |
| |
| def last_n_builds(api, console_builder, n): |
| # builder.name is of the form |
| # "buildbucket/luci.{project}.{bucket}/{builder}". |
| _, full_bucket, builder_name = console_builder.name.split("/") |
| _, project, bucket = full_bucket.split(".", 2) |
| predicate = builds_service_pb2.BuildPredicate( |
| builder=builder_common_pb2.BuilderID( |
| builder=builder_name, |
| bucket=bucket, |
| project=project, |
| ), |
| status=common_pb2.ENDED_MASK, # Include only completed builds. |
| ) |
| fields = api.buildbucket.DEFAULT_FIELDS.union({"steps", "summary_markdown"}) |
| builds = api.buildbucket.search( |
| predicate, limit=n, step_name=builder_name, fields=fields |
| ) |
| return builds |
| |
| |
| def run_autogardener(api, build_ids): |
| """Given IDs of failed buildbucket builds, run culprit analysis. |
| |
| Args: |
| build_ids (seq of int): List of failing build IDs. |
| |
| Returns: A markdown-formatting string containing a ranking of potential |
| culprit changes. |
| """ |
| exe = api.cipd_ensure( |
| api.resource("cipd.ensure"), |
| "fuchsia/infra/autogardener/${platform}", |
| ) |
| cmd = [ |
| exe, |
| "culprit", |
| "-json-output", |
| api.json.output(), |
| ] |
| cmd.extend(build_ids) |
| return api.step( |
| "find culprits", |
| cmd, |
| step_test_data=lambda: api.json.test_api.output( |
| {"markdown_output": "1. foo\n2. bar"} |
| ), |
| timeout=datetime.timedelta(minutes=10), |
| ).json.output["markdown_output"] |
| |
| |
| def file_bug(api, closure_reason, bug_component_id, culprit_ranking): |
| """Every tree closure needs a tracking bug linked from the tree status. |
| |
| Returns: |
| The bug link. |
| """ |
| description = closure_reason.bug_description() |
| if culprit_ranking: |
| description += "\n\nCulprit analysis:\n\n" + culprit_ranking |
| return api.issuetracker.file_bug( |
| "create issuetracker bug", |
| closure_reason.bug_title(), |
| description, |
| component_id=bug_component_id, |
| ) |
| |
| |
| def los_angeles_now(api): |
| # The `time` recipe module and Python's stdlib don't have good timezone |
| # support, so shell out to unix `date` to get the timezone-aware time. |
| with api.context(env={"TZ": "America/Los_Angeles"}): |
| step = api.step( |
| "get current time", |
| ["date", "--iso-8601=minutes"], |
| stdout=api.raw_io.output_text(add_output_log=True), |
| step_test_data=lambda: api.raw_io.test_api.stream_output_text( |
| "2021-12-01T09:53" |
| ), |
| ) |
| return datetime.datetime.fromisoformat(step.stdout.strip()) |
| |
| |
| def during_working_hours(now): |
| """Checks if the current time is within West Coast working hours. |
| |
| Working hours are 9 AM to 5 PM, Monday-Friday. |
| """ |
| return 0 <= now.weekday() <= 4 and 9 <= now.hour < 17 |
| |
| |
| def GenTests(api): |
| def pseudo_random_build_id(builder_string): |
| return int(int(hashlib.sha256(builder_string.encode()).hexdigest(), 16) % 1e8) |
| |
| builder_name = "auto-closer" |
| |
| def test( |
| name, |
| builder_history=None, |
| should_check_tree=True, |
| should_check_console=True, |
| should_close_tree=False, |
| should_open_tree=False, |
| rules=None, |
| summary_regexp_ignore=(), |
| tree_status="OPEN", |
| tree_status_user="human@example.com", |
| tree_status_age=DEFAULT_GRACE_PERIOD + datetime.timedelta(minutes=1), |
| disable_on_dates=(), |
| bug_component_id=None, |
| tree_name=None, |
| ): |
| """Create a test case for running this recipe. |
| |
| Args: |
| name (str): Name of the test case. |
| builder_history (dict): Mapping from builder base name to a list |
| of build status strings like "SUCCESS", corresponding to the |
| builder's most recent builds in reverse chronological order |
| (latest builds first). |
| should_check_console (bool): Whether the recipe is expected to |
| check the current tree status. |
| should_check_console (bool): Whether the recipe is expected to |
| query luci-config and Buildbucket for builders and build results. |
| should_close_tree (bool): Whether the recipe is expected to close |
| the tree. Ignored if should_check_console is False. |
| should_open_tree (bool): Whether the recipe is expected to open |
| the tree. Ignored if should_check_console is False. |
| rules (seq of Rule): Passed as a recipe property. |
| summary_regexp_ignore (seq of str): Passed as a recipe property. |
| tree_status (str): Mock current status for the tree. |
| tree_status_user (str): Username associated with the last tree |
| status. |
| tree_status_age (str): Mocked duration since the last tree status |
| update. |
| disable_on_dates (seq of str): Passed as a recipe property. |
| """ |
| if not builder_history: |
| builder_history = {} |
| |
| if rules is None: |
| rules = [ |
| dict(concurrent_failures=2, consecutive_failures=2), |
| ] |
| |
| project = "fuchsia" |
| bucket = "global.ci" |
| builder_name = "auto-closer" |
| |
| res = api.buildbucket_util.test( |
| name, |
| status="FAILURE" if should_close_tree else "SUCCESS", |
| project=project, |
| builder=builder_name, |
| ) + api.properties( |
| console_names=["global_ci", "another_ci"], |
| rules=rules, |
| summary_regexp_ignore=list(summary_regexp_ignore), |
| tree_status_host="example.com", |
| tree_status_admin_host="admin.example.com", |
| bug_component_id=bug_component_id, |
| disable_on_dates=list(disable_on_dates), |
| tree_name=tree_name, |
| ) |
| |
| if not should_check_tree: |
| return res |
| |
| now = 1592654400 |
| res += api.time.seed(now) |
| |
| status_date = datetime.datetime.utcfromtimestamp(now) - tree_status_age |
| old_app_more_recent = tree_status_user != builder_name |
| old_status = tree_status |
| if tree_name: |
| old_status = "CLOSED" if tree_status == "OPEN" else "OPEN" |
| res += api.step_data( |
| "get current tree status", |
| stdout=api.json.output( |
| { |
| "general_state": ( |
| tree_status.lower() |
| if old_app_more_recent |
| else old_status.lower() |
| ), |
| "username": tree_status_user, |
| "key": 12345, |
| "date": status_date.strftime(OLD_TREE_STATUS_DATE_FORMAT), |
| "message": f"Tree is {tree_status if old_app_more_recent else old_status}{MESSAGE_SUFFIX}", |
| } |
| ), |
| ) |
| if tree_name: |
| if old_app_more_recent: |
| status_date -= datetime.timedelta(seconds=5) |
| else: |
| status_date += datetime.timedelta(seconds=5) |
| res += api.step_data( |
| "get current tree status (2)", |
| stdout=api.json.output( |
| { |
| "generalState": ( |
| old_status if old_app_more_recent else tree_status |
| ), |
| "name": f"trees/{tree_name}/status/12345", |
| "createTime": status_date.strftime(TREE_STATUS_DATE_FORMAT), |
| "message": f"Tree is {old_status if old_app_more_recent else tree_status}{MESSAGE_SUFFIX}", |
| } |
| ), |
| ) |
| if old_app_more_recent: |
| res += api.step_data( |
| "check for tree status collision", |
| stdout=api.json.output( |
| { |
| "generalState": old_status, |
| "name": f"trees/{tree_name}/status/12345", |
| "createTime": status_date.strftime(TREE_STATUS_DATE_FORMAT), |
| "message": f"Tree is {old_status}{MESSAGE_SUFFIX}", |
| } |
| ), |
| ) |
| res += api.step_data( |
| "get current tree status (3)", |
| stdout=api.json.output( |
| { |
| "generalState": tree_status, |
| "name": f"trees/{tree_name}/status/12345", |
| "createTime": status_date.strftime(TREE_STATUS_DATE_FORMAT), |
| "message": f"Tree is {tree_status}{MESSAGE_SUFFIX}", |
| } |
| ), |
| ) |
| else: |
| res += api.step_data( |
| "check for tree status collision", |
| stdout=api.json.output( |
| { |
| "general_state": old_status.lower(), |
| "username": tree_status_user, |
| "key": 12345, |
| "date": ( |
| status_date - datetime.timedelta(seconds=5) |
| ).strftime(OLD_TREE_STATUS_DATE_FORMAT), |
| "message": f"Tree is {old_status}{MESSAGE_SUFFIX}", |
| } |
| ), |
| ) |
| if should_close_tree: |
| res += api.step_data( |
| "check for tree status collision (2)", |
| stdout=api.json.output( |
| { |
| "generalState": tree_status, |
| "name": f"trees/{tree_name}/status/12345", |
| "createTime": status_date.strftime(TREE_STATUS_DATE_FORMAT), |
| "message": f"Tree is {tree_status}{MESSAGE_SUFFIX}", |
| } |
| ), |
| ) |
| |
| if should_check_console: |
| res += api.post_process(post_process.MustRun, "check console health") |
| else: |
| res += api.post_process(post_process.DoesNotRun, "check console health") |
| return res |
| |
| checker = post_process.MustRun if should_close_tree else post_process.DoesNotRun |
| res += api.post_process(checker, "create issuetracker bug") |
| res += api.post_process(checker, "close tree") |
| |
| res += api.post_process( |
| post_process.MustRun if should_open_tree else post_process.DoesNotRun, |
| "open tree", |
| ) |
| |
| console_builders = [] |
| for builder in builder_history: |
| console_builders.append( |
| milo_pb2.Builder(name=f"buildbucket/luci.{project}.{bucket}/{builder}") |
| ) |
| |
| milo_cfg = milo_pb2.Project( |
| consoles=[ |
| milo_pb2.Console( |
| id="global_ci", |
| name="global integration ci", |
| repo_url="https://fuchsia.googlesource.com/integration", |
| refs=["regexp:refs/heads/main"], |
| # Split builders between the two consoles to test the |
| # multi-console logic. |
| builders=[console_builders[0]], |
| ), |
| milo_pb2.Console( |
| id="another_ci", |
| name="another ci", |
| repo_url="https://fuchsia.googlesource.com/other-repo", |
| refs=["regexp:refs/heads/main"], |
| builders=console_builders[1:], |
| ), |
| ] |
| ) |
| res += api.luci_config.mock_config(project, "luci-milo.cfg", milo_cfg) |
| |
| # History is a list of builder statuses, most recent first. |
| for builder, history in builder_history.items(): |
| search_results = [] |
| for i, build in enumerate(history): |
| bp = build.builder |
| bp.builder, bp.bucket, bp.project = builder, bucket, project |
| build.id = pseudo_random_build_id(builder + str(i)) |
| build.start_time.seconds = int( |
| now - datetime.timedelta(days=i).total_seconds() |
| ) |
| search_results.append(build) |
| res += api.buildbucket.simulated_search_results( |
| search_results, |
| step_name=f"check console health.{builder}", |
| ) |
| |
| return res |
| |
| def _build(build_status, steps=None, summary_markdown=""): |
| """Returns a Build proto for mocking buildbucket.search requests. |
| |
| Args: |
| build_status (str): A string corresponding to a common_pb2.Status |
| value. |
| steps (dict): A mapping from step names (with nestings indicated |
| by | pipes) to status strings, also corresponding to |
| common_pb2.Status values. |
| summary_markdown (str): The summary to set for the build. |
| """ |
| steps = steps or {} |
| b = api.buildbucket.ci_build_message(status=build_status) |
| b.summary_markdown = summary_markdown |
| for step_name, status in steps.items(): |
| step = b.steps.add() |
| step.name = step_name |
| step.status = common_pb2.Status.Value(status) |
| return b |
| |
| def success(**kwargs): |
| """Returns a Build proto with a successful status.""" |
| return _build("SUCCESS", **kwargs) |
| |
| def failure(summary_markdown="5 tests failed", **kwargs): |
| """Returns a Build proto with a failure status.""" |
| return _build("FAILURE", summary_markdown=summary_markdown, **kwargs) |
| |
| # As long as the most recent build for each builder is green, we shouldn't |
| # close the tree. |
| yield test( |
| name="all_green", |
| builder_history={ |
| "core.arm64-asan": [success(), failure()], |
| "core.x64-asan": [success(), failure()], |
| }, |
| # Non-current dates should be ignored. |
| disable_on_dates=["2019-05-06"], |
| ) |
| |
| # If any builder's most recent `consecutive_failures` builds have all failed, |
| # close the tree. |
| yield test( |
| name="consecutive_failures", |
| builder_history={ |
| "core.arm64-asan": [failure(steps={"foo": "FAILURE"}), success()], |
| "core.x64-asan": [ |
| failure(steps={"foo": "FAILURE"}), |
| failure(steps={"bar": "SUCCESS", "foo": "FAILURE"}), |
| ], |
| }, |
| rules=[ |
| dict( |
| consecutive_failures=2, |
| failed_step_regexp="foo", |
| builders_to_check=["core.x64-asan"], |
| ) |
| ], |
| should_close_tree=True, |
| bug_component_id=12345, |
| tree_name="fuchsia-stem", |
| ) |
| |
| # Even if there are consecutive failures, ignore them if we're only checking |
| # the rule for a subset of builders, and the failing builder is not in that |
| # subset. |
| yield test( |
| name="ignored_consecutive_failures", |
| builder_history={ |
| "core.arm64-asan": [failure(steps={"foo": "FAILURE"}), success()], |
| "core.x64-asan": [ |
| failure(steps={"foo": "FAILURE"}), |
| failure(steps={"bar": "SUCCESS", "foo": "FAILURE"}), |
| ], |
| }, |
| rules=[ |
| dict( |
| consecutive_failures=2, |
| failed_step_regexp="foo", |
| builders_to_check=["core.arm64-asan"], |
| ) |
| ], |
| should_close_tree=False, |
| ) |
| |
| yield test( |
| name="summary_regexp_ignore", |
| builder_history={ |
| "core.x64-asan": [ |
| failure( |
| summary_markdown="step failed: checkout.jiri update", |
| steps={"foo": "FAILURE"}, |
| ), |
| ], |
| }, |
| rules=[dict(consecutive_failures=1)], |
| summary_regexp_ignore=["checkout"], |
| should_close_tree=False, |
| ) |
| |
| # Even if a builder has failed more than `consecutive_failures` times in a |
| # row, don't close the tree unless all the failed builds have a failed step |
| # matching `failed_step_regexp`. |
| yield test( |
| name="consecutive_failures_no_matched_step", |
| builder_history={ |
| "core.x64-asan": [ |
| failure(steps={"foo": "FAILURE"}), |
| failure(steps={"bar": "FAILURE"}), |
| ] |
| }, |
| rules=[dict(consecutive_failures=2, failed_step_regexp="foo")], |
| should_close_tree=False, |
| ) |
| |
| # If `concurrent_failures` or more builders' most recent builds have all |
| # failed, close the tree. |
| yield test( |
| name="concurrent_failures", |
| builder_history={ |
| "bringup.arm64-asan": [success()], |
| "core.arm64-asan": [failure(steps={"foo": "FAILURE"})], |
| "core.x64-asan": [failure(steps={"bar": "SUCCESS", "foo|x": "FAILURE"})], |
| }, |
| rules=[dict(concurrent_failures=2, failed_step_regexp="foo")], |
| should_close_tree=True, |
| ) |
| |
| # Even if `concurrent_failures` or more builders' most recent builds have |
| # all failed, don't close the tree if they don't all have a failed step |
| # matching `failed_step_regexp`. |
| yield test( |
| name="concurrent_failures_no_matched_step", |
| builder_history={ |
| "bringup.arm64-asan": [failure()], |
| "core.arm64-asan": [failure(steps={"bar": "FAILURE"})], |
| # Only this builder has a failed step matching `failed_step_regexp`. |
| "core.x64-asan": [failure(steps={"bar": "SUCCESS", "foo|x": "FAILURE"})], |
| }, |
| rules=[dict(concurrent_failures=2, failed_step_regexp="foo")], |
| should_close_tree=False, |
| ) |
| |
| # The tree status shouldn't be changed if it was only recently opened. |
| yield test( |
| name="tree_recently_opened", |
| tree_status_age=DEFAULT_GRACE_PERIOD - datetime.timedelta(minutes=1), |
| should_check_console=False, |
| ) |
| |
| # The build should exit early if the tree is already closed by a human. |
| yield test( |
| name="tree_closed_by_human", tree_status="CLOSED", should_check_console=False |
| ) |
| |
| # We should not take any action if the tree is already auto-closed and the |
| # console remains unhealthy. |
| yield test( |
| name="tree_already_closed", |
| tree_status="CLOSED", |
| builder_history={ |
| "core.x64-asan": [ |
| failure(steps={"foo": "FAILURE"}), |
| failure(steps={"bar": "FAILURE"}), |
| ] |
| }, |
| rules=[dict(consecutive_failures=2)], |
| tree_status_user=builder_name, |
| should_close_tree=False, |
| tree_name="fuchsia", |
| ) |
| |
| # The tree should be reopened if it is currently auto-closed and the console |
| # is healthy. |
| yield test( |
| name="reopen", |
| tree_status="CLOSED", |
| tree_status_user=builder_name, |
| builder_history={ |
| "bringup.arm64-asan": [success()], |
| "core.arm64-asan": [success()], |
| }, |
| should_open_tree=True, |
| ) |
| |
| # Builders with no builds shouldn't affect the tree status. |
| yield test( |
| name="throttled__no_history", |
| # If the tree is "throttled" then it's open, so the closer should still |
| # check the console for failures. |
| tree_status="THROTTLED", |
| builder_history={"core.arm64-asan": []}, |
| should_close_tree=False, |
| ) |
| |
| # Builders with fewer than consecutive_failures builds shouldn't affect the |
| # tree status. |
| yield test( |
| name="not_enough_history", |
| builder_history={"core.arm64-asan": [failure()]}, |
| should_close_tree=False, |
| ) |
| |
| weekday_nighttime = "2021-12-03T23:53" # Friday at 11:53 PM |
| weekend_daytime = "2021-12-04T11:01" # Saturday at 11:01 AM |
| |
| # Tree closer should exit early if the time is outside West Coast working |
| # hours and there are no off-hour rules configured. |
| yield test("weekend", should_check_tree=False) + api.step_data( |
| "get current time", api.raw_io.stream_output_text(weekend_daytime) |
| ) |
| yield test("nighttime", should_check_tree=False) + api.step_data( |
| "get current time", api.raw_io.stream_output_text(weekday_nighttime) |
| ) |
| |
| # Tree closer should reopen the tree after an automatic closure, even if |
| # it's outside working hours. |
| yield ( |
| test( |
| "nighttime_reopen_tree", |
| should_check_tree=True, |
| tree_status="CLOSED", |
| tree_status_user=builder_name, |
| builder_history={ |
| "bringup.arm64-asan": [success()], |
| "core.arm64-asan": [success()], |
| }, |
| should_open_tree=True, |
| ) |
| + api.step_data( |
| "get current time", api.raw_io.stream_output_text(weekday_nighttime) |
| ) |
| ) |
| |
| yield test( |
| "disable_dates", should_check_tree=False, disable_on_dates=["2021-12-01"] |
| ) + api.step_data( |
| "get current time", api.raw_io.stream_output_text("2021-12-01T12:00") |
| ) |