recipe_modules/autocorrelator/api.py - infra/recipes - Git at Google

 # Copyright 2021 The Fuchsia Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import contextlib

 from recipe_engine import recipe_api

 from RECIPE_MODULES.fuchsia.utils import pluralize

 from PB.go.chromium.org.luci.buildbucket.proto import (
     builder_common as builder_common_pb2,
 )


 AUTOCORRELATOR_MAX_SUMMARY_SIZE = 750
 AUTOCORRELATOR_HEADER = "This failure may be a false rejection."
 AUTOCORRELATOR_FOOTER = "Original failure continues below."


 class AutocorrelatorApi(recipe_api.RecipeApi):
     """APIs for finding correlations between failures in Fuchsia CI/CQ."""

     def __init__(self, props, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.ci_bucket = props.ci_bucket
         self.ci_builder = props.ci_builder
         self.findings = []

     @contextlib.contextmanager
     def context(
         self,
         ci_base_commit,
         ignore_failed_build=False,
         ignore_skipped_build=False,
         ignore_skipped_tests=False,
     ):
         """Context manager that analyzes failures using autocorrelator.

         If the wrapped code raises a StepFailure that matches the summary
         markdown of recent CI or CQ builds, raises a new wrapper StepFailure
         passed through `compose_exception`.

         Most recipes that use the autocorrelator should access it through this
         high-level method.

         Usage:

             with api.autocorrelator.context(ci_base_commit="abc123"):
                 run_steps_that_might_fail()

         Args:
             ci_base_commit (str): Passed through to `check_ci`.
             ignore_failed_build (bool): Passed through to `check_try`.
             ignore_skipped_build (bool): Passed through to `check_try`.
             ignore_skipped_tests (bool): Passed through to `check_try`.
         """
         try:
             yield
         except self.m.step.StepFailure as exc:
             if self.m.runtime.in_global_shutdown:
                 # The build was cancelled, likely manually or by CQ. This is
                 # probably an expected result so we don't need to try to
                 # correlate it, plus the recipe engine doesn't allow running
                 # more steps after a cancellation anyway.
                 raise
             if self.m.buildbucket_util.is_tryjob:
                 with self.m.step.nest("check for correlated failures") as presentation:
                     self._check_try(
                         "check try",
                         exc,
                         exc.reason,
                         ignore_skipped_build=ignore_skipped_build,
                         ignore_failed_build=ignore_failed_build,
                         ignore_skipped_tests=ignore_skipped_tests,
                     )
                     self._check_ci(
                         "check ci",
                         ci_base_commit,
                         exc,
                         exc.reason,
                     )
                     presentation.properties["num_autocorrelator_findings"] = len(
                         self.findings
                     )
             raise self._compose_exception(exc)

     def _check_ci(
         self,
         step_name,
         base_commit,
         exception,
         summary_markdown,
         score_threshold=0.95,
     ):
         """Compare summary markdown similarity to a CI-like builder. Record a
         finding if it meets the score threshold.

         Args:
             step_name (str): Name of the step.
             base_commit (str): Base commit as sha1 to check against `builder`.
             exception (Exception): The exception corresponding to the summary
                 markdown.
             summary_markdown (str): Summary markdown to compare.
             score_threshold (float): Record a finding if it meets this
                 threshold.
         """
         assert self.m.buildbucket_util.is_tryjob, "invalid command in non-try"
         if not self.ci_bucket:
             return
         with self.m.step.nest(step_name):
             args = [
                 "check-ci",
                 "-base-commit",
                 base_commit,
                 "-builder",
                 self.m.buildbucket_util.full_builder_name(
                     builder_common_pb2.BuilderID(
                         project=self.m.buildbucket.build.builder.project,
                         bucket=self.ci_bucket,
                         builder=self.ci_builder
                         or self.m.buildbucket.build.builder.builder,
                     )
                 ),
                 "-build-status",
                 self.m.buildbucket_util.build_status(exception),
                 "-summary-markdown-path",
                 self.m.raw_io.input_text(summary_markdown),
                 "-json-output",
                 self.m.json.output(),
             ]
             step = self._run("run autocorrelator", args)
             finding = step.json.output
             if not finding or finding["is_green"]:
                 return
             score = finding["score"]
             if score >= score_threshold:
                 ci_build_link = self.m.buildbucket.build_url(
                     build_id=finding["build_id"]
                 )
                 step.presentation.links[finding["build_id"]] = ci_build_link
                 self.findings.append(
                     f"Correlated failure found in CI: {score:.2f} similarity, {finding['commit_dist']} git commit distance\n\n- {ci_build_link}"
                 )

     def _check_try(
         self,
         step_name,
         exception,
         summary_markdown,
         builder=None,
         change_num=None,
         ignore_failed_build=False,
         ignore_skipped_build=False,
         ignore_skipped_tests=False,
         score_threshold=0.95,
         build_frequency_threshold=0.6,
     ):
         """Compare summary markdown similarity to a try-like builder. Record an
         aggregate finding if there are one or more findings that meet the score
         threshold, and the frequency of findings meets the build frequency
         threshold.

         Args:
             step_name (str): Name of the step.
             exception (Exception): The exception corresponding to the summary
                 markdown.
             summary_markdown (str): Summary markdown to compare.
             builder (str or None): Fully-qualified Buildbucket builder name of
                 the try-like builder to check. If None, use the current build's
                 builder name.
             change_num (int or None): Gerrit change number. If None, use the
                 current build's change number.
             ignore_failed_build (bool): Whether to ignore try builds which
                 failed to build.
             ignore_skipped_build (bool): Whether to ignore try builds with
                 unaffected build graphs.
             ignore_skipped_tests (bool): Whether to ignore builds which skipped
                 testing.
             score_threshold (float): Count a finding as part of the aggregate if
                 its score meets this threshold.
             build_frequency_threshold (float): Record an aggregate finding if
                 the frequency of findings meets this threshold, i.e. a 0.5
                 threshold means that a finding will be recorded if
                 num_findings / num_inspected_builds >= 0.5.
         """
         assert self.m.buildbucket_util.is_tryjob, "invalid command in non-try"
         with self.m.step.nest(step_name) as presentation:
             if not self.m.buildbucket.build.input.gerrit_changes:
                 presentation.step_text = "no triggering gerrit_changes, skipping checks"
                 return
             args = [
                 "check-try",
                 "-builder",
                 builder or self.m.buildbucket_util.full_builder_name(),
                 "-change-num",
                 change_num or self.m.buildbucket.build.input.gerrit_changes[0].change,
                 "-build-status",
                 self.m.buildbucket_util.build_status(exception),
                 "-summary-markdown-path",
                 self.m.raw_io.input_text(summary_markdown),
                 "-scrub-header",
                 AUTOCORRELATOR_HEADER,
                 "-scrub-footer",
                 AUTOCORRELATOR_FOOTER,
                 "-json-output",
                 self.m.json.output(),
             ]
             if ignore_failed_build:
                 args.append("-ignore-failed-build")
             if ignore_skipped_build:
                 args.append("-ignore-skipped-build")
             if ignore_skipped_tests:
                 args.append("-ignore-skipped-tests")
             step = self._run("run autocorrelator", args)
             findings = step.json.output
             if not findings:
                 return
             findings_meeting_threshold = []
             for finding in findings:
                 if finding["score"] >= score_threshold:
                     findings_meeting_threshold.append(finding)
                     try_build_link = self.m.buildbucket.build_url(
                         build_id=finding["build_id"]
                     )
                     step.presentation.links[finding["build_id"]] = try_build_link
             build_frequency = len(findings_meeting_threshold) / float(len(findings))
             if build_frequency >= build_frequency_threshold:
                 self.findings.append(
                     "Similar failures found in try: %0.2f avg similarity in %s\n\n%s"
                     % (
                         sum(
                             (finding["score"] for finding in findings_meeting_threshold)
                         )
                         / len(findings_meeting_threshold),
                         pluralize("build", len(findings_meeting_threshold)),
                         "\n".join(
                             "- %s" % build_link
                             for build_link in step.presentation.links.values()
                         ),
                     ),
                 )

     def _compose_exception(self, original_exc):
         """Compose a modified StepFailure with the current autocorrelator
         findings.

         Args:
             original_exc (Exception): Exception to modify.

         Returns:
             StepFailure: containing the modified summary markdown
             including autocorrelator findings.
         """
         # If the exception is not a StepFailure then there's no consistent way
         # to obtain the original summary markdown. Likewise, if there are no
         # findings then there's no context to add to the exception.
         if not isinstance(original_exc, self.m.step.StepFailure) or not self.findings:
             return original_exc

         # Construct a summary markdown for current findings.
         autocorrelator_summary = self.m.buildbucket_util.summary_message(
             "\n\n".join(self.findings),
             "findings truncated, see autocorrelator steps for full details",
             truncate_length=(
                 AUTOCORRELATOR_MAX_SUMMARY_SIZE
                 # Include space for header and footer text.
                 - len(AUTOCORRELATOR_HEADER)
                 - len(AUTOCORRELATOR_FOOTER)
                 # Include space for newline characters between each component.
                 # See join() below.
                 - 2 * 3
             ),
             escape_markdown=False,
         )
         # Stitch together the full summary markdown.
         summary_markdown = "\n\n".join(
             (
                 AUTOCORRELATOR_HEADER,
                 autocorrelator_summary,
                 AUTOCORRELATOR_FOOTER,
                 original_exc.reason,
             )
         )

         if isinstance(original_exc, self.m.step.InfraFailure):
             return self.m.step.InfraFailure(summary_markdown)
         elif isinstance(original_exc, self.m.step.StepWarning):
             return self.m.step.StepWarning(summary_markdown)
         return self.m.step.StepFailure(summary_markdown)

     def _run(self, step_name, args):
         return self.m.step(
             step_name, [self._autocorrelator_tool] + args, infra_step=True
         )

     @property
     def _autocorrelator_tool(self):
         return self.m.ensure_tool("autocorrelator", self.resource("tool_manifest.json"))
	# Copyright 2021 The Fuchsia Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import contextlib

	from recipe_engine import recipe_api

	from RECIPE_MODULES.fuchsia.utils import pluralize

	from PB.go.chromium.org.luci.buildbucket.proto import (
	builder_common as builder_common_pb2,
	)


	AUTOCORRELATOR_MAX_SUMMARY_SIZE = 750
	AUTOCORRELATOR_HEADER = "This failure may be a false rejection."
	AUTOCORRELATOR_FOOTER = "Original failure continues below."


	class AutocorrelatorApi(recipe_api.RecipeApi):
	"""APIs for finding correlations between failures in Fuchsia CI/CQ."""

	def __init__(self, props, args, *kwargs):
	super().__init__(args, *kwargs)
	self.ci_bucket = props.ci_bucket
	self.ci_builder = props.ci_builder
	self.findings = []

	@contextlib.contextmanager
	def context(
	self,
	ci_base_commit,
	ignore_failed_build=False,
	ignore_skipped_build=False,
	ignore_skipped_tests=False,
	):
	"""Context manager that analyzes failures using autocorrelator.

	If the wrapped code raises a StepFailure that matches the summary
	markdown of recent CI or CQ builds, raises a new wrapper StepFailure
	passed through `compose_exception`.

	Most recipes that use the autocorrelator should access it through this
	high-level method.

	Usage:

	with api.autocorrelator.context(ci_base_commit="abc123"):
	run_steps_that_might_fail()

	Args:
	ci_base_commit (str): Passed through to `check_ci`.
	ignore_failed_build (bool): Passed through to `check_try`.
	ignore_skipped_build (bool): Passed through to `check_try`.
	ignore_skipped_tests (bool): Passed through to `check_try`.
	"""
	try:
	yield
	except self.m.step.StepFailure as exc:
	if self.m.runtime.in_global_shutdown:
	# The build was cancelled, likely manually or by CQ. This is
	# probably an expected result so we don't need to try to
	# correlate it, plus the recipe engine doesn't allow running
	# more steps after a cancellation anyway.
	raise
	if self.m.buildbucket_util.is_tryjob:
	with self.m.step.nest("check for correlated failures") as presentation:
	self._check_try(
	"check try",
	exc,
	exc.reason,
	ignore_skipped_build=ignore_skipped_build,
	ignore_failed_build=ignore_failed_build,
	ignore_skipped_tests=ignore_skipped_tests,
	)
	self._check_ci(
	"check ci",
	ci_base_commit,
	exc,
	exc.reason,
	)
	presentation.properties["num_autocorrelator_findings"] = len(
	self.findings
	)
	raise self._compose_exception(exc)

	def _check_ci(
	self,
	step_name,
	base_commit,
	exception,
	summary_markdown,
	score_threshold=0.95,
	):
	"""Compare summary markdown similarity to a CI-like builder. Record a
	finding if it meets the score threshold.

	Args:
	step_name (str): Name of the step.
	base_commit (str): Base commit as sha1 to check against `builder`.
	exception (Exception): The exception corresponding to the summary
	markdown.
	summary_markdown (str): Summary markdown to compare.
	score_threshold (float): Record a finding if it meets this
	threshold.
	"""
	assert self.m.buildbucket_util.is_tryjob, "invalid command in non-try"
	if not self.ci_bucket:
	return
	with self.m.step.nest(step_name):
	args = [
	"check-ci",
	"-base-commit",
	base_commit,
	"-builder",
	self.m.buildbucket_util.full_builder_name(
	builder_common_pb2.BuilderID(
	project=self.m.buildbucket.build.builder.project,
	bucket=self.ci_bucket,
	builder=self.ci_builder
	or self.m.buildbucket.build.builder.builder,
	)
	),
	"-build-status",
	self.m.buildbucket_util.build_status(exception),
	"-summary-markdown-path",
	self.m.raw_io.input_text(summary_markdown),
	"-json-output",
	self.m.json.output(),
	]
	step = self._run("run autocorrelator", args)
	finding = step.json.output
	if not finding or finding["is_green"]:
	return
	score = finding["score"]
	if score >= score_threshold:
	ci_build_link = self.m.buildbucket.build_url(
	build_id=finding["build_id"]
	)
	step.presentation.links[finding["build_id"]] = ci_build_link
	self.findings.append(
	f"Correlated failure found in CI: {score:.2f} similarity, {finding['commit_dist']} git commit distance\n\n- {ci_build_link}"
	)

	def _check_try(
	self,
	step_name,
	exception,
	summary_markdown,
	builder=None,
	change_num=None,
	ignore_failed_build=False,
	ignore_skipped_build=False,
	ignore_skipped_tests=False,
	score_threshold=0.95,
	build_frequency_threshold=0.6,
	):
	"""Compare summary markdown similarity to a try-like builder. Record an
	aggregate finding if there are one or more findings that meet the score
	threshold, and the frequency of findings meets the build frequency
	threshold.

	Args:
	step_name (str): Name of the step.
	exception (Exception): The exception corresponding to the summary
	markdown.
	summary_markdown (str): Summary markdown to compare.
	builder (str or None): Fully-qualified Buildbucket builder name of
	the try-like builder to check. If None, use the current build's
	builder name.
	change_num (int or None): Gerrit change number. If None, use the
	current build's change number.
	ignore_failed_build (bool): Whether to ignore try builds which
	failed to build.
	ignore_skipped_build (bool): Whether to ignore try builds with
	unaffected build graphs.
	ignore_skipped_tests (bool): Whether to ignore builds which skipped
	testing.
	score_threshold (float): Count a finding as part of the aggregate if
	its score meets this threshold.
	build_frequency_threshold (float): Record an aggregate finding if
	the frequency of findings meets this threshold, i.e. a 0.5
	threshold means that a finding will be recorded if
	num_findings / num_inspected_builds >= 0.5.
	"""
	assert self.m.buildbucket_util.is_tryjob, "invalid command in non-try"
	with self.m.step.nest(step_name) as presentation:
	if not self.m.buildbucket.build.input.gerrit_changes:
	presentation.step_text = "no triggering gerrit_changes, skipping checks"
	return
	args = [
	"check-try",
	"-builder",
	builder or self.m.buildbucket_util.full_builder_name(),
	"-change-num",
	change_num or self.m.buildbucket.build.input.gerrit_changes[0].change,
	"-build-status",
	self.m.buildbucket_util.build_status(exception),
	"-summary-markdown-path",
	self.m.raw_io.input_text(summary_markdown),
	"-scrub-header",
	AUTOCORRELATOR_HEADER,
	"-scrub-footer",
	AUTOCORRELATOR_FOOTER,
	"-json-output",
	self.m.json.output(),
	]
	if ignore_failed_build:
	args.append("-ignore-failed-build")
	if ignore_skipped_build:
	args.append("-ignore-skipped-build")
	if ignore_skipped_tests:
	args.append("-ignore-skipped-tests")
	step = self._run("run autocorrelator", args)
	findings = step.json.output
	if not findings:
	return
	findings_meeting_threshold = []
	for finding in findings:
	if finding["score"] >= score_threshold:
	findings_meeting_threshold.append(finding)
	try_build_link = self.m.buildbucket.build_url(
	build_id=finding["build_id"]
	)
	step.presentation.links[finding["build_id"]] = try_build_link
	build_frequency = len(findings_meeting_threshold) / float(len(findings))
	if build_frequency >= build_frequency_threshold:
	self.findings.append(
	"Similar failures found in try: %0.2f avg similarity in %s\n\n%s"
	% (
	sum(
	(finding["score"] for finding in findings_meeting_threshold)
	)
	/ len(findings_meeting_threshold),
	pluralize("build", len(findings_meeting_threshold)),
	"\n".join(
	"- %s" % build_link
	for build_link in step.presentation.links.values()
	),
	),
	)

	def _compose_exception(self, original_exc):
	"""Compose a modified StepFailure with the current autocorrelator
	findings.

	Args:
	original_exc (Exception): Exception to modify.

	Returns:
	StepFailure: containing the modified summary markdown
	including autocorrelator findings.
	"""
	# If the exception is not a StepFailure then there's no consistent way
	# to obtain the original summary markdown. Likewise, if there are no
	# findings then there's no context to add to the exception.
	if not isinstance(original_exc, self.m.step.StepFailure) or not self.findings:
	return original_exc

	# Construct a summary markdown for current findings.
	autocorrelator_summary = self.m.buildbucket_util.summary_message(
	"\n\n".join(self.findings),
	"findings truncated, see autocorrelator steps for full details",
	truncate_length=(
	AUTOCORRELATOR_MAX_SUMMARY_SIZE
	# Include space for header and footer text.
	- len(AUTOCORRELATOR_HEADER)
	- len(AUTOCORRELATOR_FOOTER)
	# Include space for newline characters between each component.
	# See join() below.
	- 2 * 3
	),
	escape_markdown=False,
	)
	# Stitch together the full summary markdown.
	summary_markdown = "\n\n".join(
	(
	AUTOCORRELATOR_HEADER,
	autocorrelator_summary,
	AUTOCORRELATOR_FOOTER,
	original_exc.reason,
	)
	)

	if isinstance(original_exc, self.m.step.InfraFailure):
	return self.m.step.InfraFailure(summary_markdown)
	elif isinstance(original_exc, self.m.step.StepWarning):
	return self.m.step.StepWarning(summary_markdown)
	return self.m.step.StepFailure(summary_markdown)

	def _run(self, step_name, args):
	return self.m.step(
	step_name, [self._autocorrelator_tool] + args, infra_step=True
	)

	@property
	def _autocorrelator_tool(self):
	return self.m.ensure_tool("autocorrelator", self.resource("tool_manifest.json"))