blob: e8f922bf49d8eb63c71884e66dfb24dcadb6065d [file] [log] [blame]
# Copyright 2021 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import contextlib
from recipe_engine import recipe_api
from RECIPE_MODULES.fuchsia.utils import pluralize
from PB.go.chromium.org.luci.buildbucket.proto import (
builder_common as builder_common_pb2,
)
AUTOCORRELATOR_MAX_SUMMARY_SIZE = 750
AUTOCORRELATOR_HEADER = "This failure may be a false rejection."
AUTOCORRELATOR_FOOTER = "Original failure continues below."
class AutocorrelatorApi(recipe_api.RecipeApi):
"""APIs for finding correlations between failures in Fuchsia CI/CQ."""
def __init__(self, props, *args, **kwargs):
super().__init__(*args, **kwargs)
self.ci_bucket = props.ci_bucket
self.ci_builder = props.ci_builder
self.findings = []
@contextlib.contextmanager
def context(
self,
ci_base_commit,
ignore_failed_build=False,
ignore_skipped_build=False,
ignore_skipped_tests=False,
):
"""Context manager that analyzes failures using autocorrelator.
If the wrapped code raises a StepFailure that matches the summary
markdown of recent CI or CQ builds, raises a new wrapper StepFailure
passed through `compose_exception`.
Most recipes that use the autocorrelator should access it through this
high-level method.
Usage:
with api.autocorrelator.context(ci_base_commit="abc123"):
run_steps_that_might_fail()
Args:
ci_base_commit (str): Passed through to `check_ci`.
ignore_failed_build (bool): Passed through to `check_try`.
ignore_skipped_build (bool): Passed through to `check_try`.
ignore_skipped_tests (bool): Passed through to `check_try`.
"""
try:
yield
except self.m.step.StepFailure as exc:
if self.m.runtime.in_global_shutdown:
# The build was cancelled, likely manually or by CQ. This is
# probably an expected result so we don't need to try to
# correlate it, plus the recipe engine doesn't allow running
# more steps after a cancellation anyway.
raise
if self.m.buildbucket_util.is_tryjob:
with self.m.step.nest("check for correlated failures") as presentation:
self._check_try(
"check try",
exc,
exc.reason,
ignore_skipped_build=ignore_skipped_build,
ignore_failed_build=ignore_failed_build,
ignore_skipped_tests=ignore_skipped_tests,
)
self._check_ci(
"check ci",
ci_base_commit,
exc,
exc.reason,
)
presentation.properties["num_autocorrelator_findings"] = len(
self.findings
)
raise self._compose_exception(exc)
def _check_ci(
self,
step_name,
base_commit,
exception,
summary_markdown,
score_threshold=0.95,
):
"""Compare summary markdown similarity to a CI-like builder. Record a
finding if it meets the score threshold.
Args:
step_name (str): Name of the step.
base_commit (str): Base commit as sha1 to check against `builder`.
exception (Exception): The exception corresponding to the summary
markdown.
summary_markdown (str): Summary markdown to compare.
score_threshold (float): Record a finding if it meets this
threshold.
"""
assert self.m.buildbucket_util.is_tryjob, "invalid command in non-try"
if not self.ci_bucket:
return
with self.m.step.nest(step_name):
args = [
"check-ci",
"-base-commit",
base_commit,
"-builder",
self.m.buildbucket_util.full_builder_name(
builder_common_pb2.BuilderID(
project=self.m.buildbucket.build.builder.project,
bucket=self.ci_bucket,
builder=self.ci_builder
or self.m.buildbucket.build.builder.builder,
)
),
"-build-status",
self.m.buildbucket_util.build_status(exception),
"-summary-markdown-path",
self.m.raw_io.input_text(summary_markdown),
"-json-output",
self.m.json.output(),
]
step = self._run("run autocorrelator", args)
finding = step.json.output
if not finding or finding["is_green"]:
return
score = finding["score"]
if score >= score_threshold:
ci_build_link = self.m.buildbucket.build_url(
build_id=finding["build_id"]
)
step.presentation.links[finding["build_id"]] = ci_build_link
self.findings.append(
f"Correlated failure found in CI: {score:.2f} similarity, {finding['commit_dist']} git commit distance\n\n- {ci_build_link}"
)
def _check_try(
self,
step_name,
exception,
summary_markdown,
builder=None,
change_num=None,
ignore_failed_build=False,
ignore_skipped_build=False,
ignore_skipped_tests=False,
score_threshold=0.95,
build_frequency_threshold=0.6,
):
"""Compare summary markdown similarity to a try-like builder. Record an
aggregate finding if there are one or more findings that meet the score
threshold, and the frequency of findings meets the build frequency
threshold.
Args:
step_name (str): Name of the step.
exception (Exception): The exception corresponding to the summary
markdown.
summary_markdown (str): Summary markdown to compare.
builder (str or None): Fully-qualified Buildbucket builder name of
the try-like builder to check. If None, use the current build's
builder name.
change_num (int or None): Gerrit change number. If None, use the
current build's change number.
ignore_failed_build (bool): Whether to ignore try builds which
failed to build.
ignore_skipped_build (bool): Whether to ignore try builds with
unaffected build graphs.
ignore_skipped_tests (bool): Whether to ignore builds which skipped
testing.
score_threshold (float): Count a finding as part of the aggregate if
its score meets this threshold.
build_frequency_threshold (float): Record an aggregate finding if
the frequency of findings meets this threshold, i.e. a 0.5
threshold means that a finding will be recorded if
num_findings / num_inspected_builds >= 0.5.
"""
assert self.m.buildbucket_util.is_tryjob, "invalid command in non-try"
with self.m.step.nest(step_name) as presentation:
if not self.m.buildbucket.build.input.gerrit_changes:
presentation.step_text = "no triggering gerrit_changes, skipping checks"
return
args = [
"check-try",
"-builder",
builder or self.m.buildbucket_util.full_builder_name(),
"-change-num",
change_num or self.m.buildbucket.build.input.gerrit_changes[0].change,
"-build-status",
self.m.buildbucket_util.build_status(exception),
"-summary-markdown-path",
self.m.raw_io.input_text(summary_markdown),
"-scrub-header",
AUTOCORRELATOR_HEADER,
"-scrub-footer",
AUTOCORRELATOR_FOOTER,
"-json-output",
self.m.json.output(),
]
if ignore_failed_build:
args.append("-ignore-failed-build")
if ignore_skipped_build:
args.append("-ignore-skipped-build")
if ignore_skipped_tests:
args.append("-ignore-skipped-tests")
step = self._run("run autocorrelator", args)
findings = step.json.output
if not findings:
return
findings_meeting_threshold = []
for finding in findings:
if finding["score"] >= score_threshold:
findings_meeting_threshold.append(finding)
try_build_link = self.m.buildbucket.build_url(
build_id=finding["build_id"]
)
step.presentation.links[finding["build_id"]] = try_build_link
build_frequency = len(findings_meeting_threshold) / float(len(findings))
if build_frequency >= build_frequency_threshold:
self.findings.append(
"Similar failures found in try: %0.2f avg similarity in %s\n\n%s"
% (
sum(
(finding["score"] for finding in findings_meeting_threshold)
)
/ len(findings_meeting_threshold),
pluralize("build", len(findings_meeting_threshold)),
"\n".join(
"- %s" % build_link
for build_link in step.presentation.links.values()
),
),
)
def _compose_exception(self, original_exc):
"""Compose a modified StepFailure with the current autocorrelator
findings.
Args:
original_exc (Exception): Exception to modify.
Returns:
StepFailure: containing the modified summary markdown
including autocorrelator findings.
"""
# If the exception is not a StepFailure then there's no consistent way
# to obtain the original summary markdown. Likewise, if there are no
# findings then there's no context to add to the exception.
if not isinstance(original_exc, self.m.step.StepFailure) or not self.findings:
return original_exc
# Construct a summary markdown for current findings.
autocorrelator_summary = self.m.buildbucket_util.summary_message(
"\n\n".join(self.findings),
"findings truncated, see autocorrelator steps for full details",
truncate_length=(
AUTOCORRELATOR_MAX_SUMMARY_SIZE
# Include space for header and footer text.
- len(AUTOCORRELATOR_HEADER)
- len(AUTOCORRELATOR_FOOTER)
# Include space for newline characters between each component.
# See join() below.
- 2 * 3
),
escape_markdown=False,
)
# Stitch together the full summary markdown.
summary_markdown = "\n\n".join(
(
AUTOCORRELATOR_HEADER,
autocorrelator_summary,
AUTOCORRELATOR_FOOTER,
original_exc.reason,
)
)
if isinstance(original_exc, self.m.step.InfraFailure):
return self.m.step.InfraFailure(summary_markdown)
elif isinstance(original_exc, self.m.step.StepWarning):
return self.m.step.StepWarning(summary_markdown)
return self.m.step.StepFailure(summary_markdown)
def _run(self, step_name, args):
return self.m.step(
step_name, [self._autocorrelator_tool] + args, infra_step=True
)
@property
def _autocorrelator_tool(self):
return self.m.ensure_tool("autocorrelator", self.resource("tool_manifest.json"))