cmd/autogardener/blamelist.go - infra/infra - Git at Google

 // Copyright 2022 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 package main

 import (
 	"context"
 	_ "embed"
 	"time"

 	"cloud.google.com/go/bigquery"
 	"go.fuchsia.dev/infra/functools"
 )

 //go:embed queries/nearby_test_results.sql
 var nearbyTestResultsQuery string

 type nearbyTestResult struct {
 	// Absolute builder name, in the form "project/bucket/builder".
 	Builder        string
 	Failed         bool
 	CommitPosition int
 }

 func getNearbyTestResults(
 	ctx context.Context,
 	bqClient *bigquery.Client,
 	sig failureSignature,
 	windowEnd time.Time,
 ) ([]nearbyTestResult, error) {
 	return runQuery[nearbyTestResult](ctx, bqClient, nearbyTestResultsQuery,
 		map[string]any{
 			"test_id": sig.FailedTest,
 			// Use a smaller window to decrease the likelihood of treating
 			// failures from separate old breakages as the first failure of the
 			// current breakage.
 			"earliest_time": windowEnd.Add(-20 * time.Hour),
 			"latest_time":   windowEnd,
 			// TODO(olivernewman): don't hardcode this.
 			"repo": "turquoise-internal.googlesource.com/integration",
 		},
 	)
 }

 // calculateBlamelistDistances computes, for each builder with a certain failure
 // mode, the number of builds between each suspect commit and the first build
 // (within the time window used by the query) that had that failure mode.
 //
 // This is analogous to the manual process of "lining up" CI builder blamelists
 // to find a culprit.
 func calculateBlamelistDistances(results []nearbyTestResult, suspects []suspectCommit) error {
 	byBuilder := make(map[string][]nearbyTestResult)
 	for _, tr := range results {
 		byBuilder[tr.Builder] = append(byBuilder[tr.Builder], tr)
 	}
 	// Sort results in chronological order (earliest first).
 	for _, results := range byBuilder {
 		functools.SortBy(results, func(tr nearbyTestResult) int {
 			return tr.CommitPosition
 		})
 	}

 	// TODO(olivernewman): handle the case where a test has broken on separate
 	// occasions within the time window. It's not easy to distinguish this from
 	// flakiness, but we can make a best effort at least for high-frequency
 	// failure modes.
 	for builder, results := range byBuilder {
 		firstFailureIdx := -1
 		for i, result := range results {
 			if result.Failed {
 				firstFailureIdx = i
 				break
 			}
 		}
 		// Skip builders where the failure mode didn't occur at all.
 		if firstFailureIdx == -1 {
 			continue
 		}
 		for i, suspect := range suspects {
 			for buildIdx, result := range results {
 				if result.CommitPosition >= suspect.CommitPosition {
 					// TODO(olivernewman): Also take blamelist size into
 					// account. If we are X% confident that a culprit falls
 					// within a given blamelist of length N, then we're only
 					// X/N% confident in each member of the blamelist. So that
 					// confidence will increase as the blamelist size decreases.
 					dist := firstFailureIdx - buildIdx
 					suspects[i].BlamelistDistances[builder] = dist
 					break
 				}
 			}
 		}
 	}
 	return nil
 }

 // scoreBlamelistDistances computes a 0-100 likelihood score for a potential
 // culprit commit based on a list of CI builder first-failure blamelist
 // distances. It takes the amount of data points into account by incorporating a
 // uncertainty level.
 func scoreBlamelistDistances(distances []int) int {
 	if len(distances) == 0 {
 		return 0
 	}
 	var weightedDistances []int
 	for _, dist := range distances {
 		if dist < 0 {
 			// If the commit landed *after* the first failure that's an
 			// especially good indicator that it's unlikely to be the culprit,
 			// so apply a large multiplier to that data point (and negate it, so
 			// the weighted distances are all positive). We simply downweight
 			// the suspect instead of completely discarding it (by returning
 			// zero) because it's possible the first failure is a different
 			// failure mode (e.g. latent flakiness) than the failure mode we're
 			// trying to diagnose.
 			dist *= -10
 		}
 		weightedDistances = append(weightedDistances, dist)
 	}
 	avg := average(weightedDistances)

 	// Calculate an uncertainty level based on the number of builders that are
 	// providing data points. If a commit was in the first failure blamelist of
 	// only builder it may be a coincidence, whereas we get a stronger signal if
 	// it's in the first failure blamelist of multiple builders.
 	uncertainty := 12 - 4*(len(distances)-1)
 	if uncertainty < 0 {
 		uncertainty = 0
 	}

 	score := 100/(avg/2+1) - float64(uncertainty)
 	if score < 0 {
 		return 0
 	}
 	return int(score)
 }
	// Copyright 2022 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	package main

	import (
	"context"
	_ "embed"
	"time"

	"cloud.google.com/go/bigquery"
	"go.fuchsia.dev/infra/functools"
	)

	//go:embed queries/nearby_test_results.sql
	var nearbyTestResultsQuery string

	type nearbyTestResult struct {
	// Absolute builder name, in the form "project/bucket/builder".
	Builder string
	Failed bool
	CommitPosition int
	}

	func getNearbyTestResults(
	ctx context.Context,
	bqClient *bigquery.Client,
	sig failureSignature,
	windowEnd time.Time,
	) ([]nearbyTestResult, error) {
	return runQuery[nearbyTestResult](ctx, bqClient, nearbyTestResultsQuery,
	map[string]any{
	"test_id": sig.FailedTest,
	// Use a smaller window to decrease the likelihood of treating
	// failures from separate old breakages as the first failure of the
	// current breakage.
	"earliest_time": windowEnd.Add(-20 * time.Hour),
	"latest_time": windowEnd,
	// TODO(olivernewman): don't hardcode this.
	"repo": "turquoise-internal.googlesource.com/integration",
	},
	)
	}

	// calculateBlamelistDistances computes, for each builder with a certain failure
	// mode, the number of builds between each suspect commit and the first build
	// (within the time window used by the query) that had that failure mode.
	//
	// This is analogous to the manual process of "lining up" CI builder blamelists
	// to find a culprit.
	func calculateBlamelistDistances(results []nearbyTestResult, suspects []suspectCommit) error {
	byBuilder := make(map[string][]nearbyTestResult)
	for _, tr := range results {
	byBuilder[tr.Builder] = append(byBuilder[tr.Builder], tr)
	}
	// Sort results in chronological order (earliest first).
	for _, results := range byBuilder {
	functools.SortBy(results, func(tr nearbyTestResult) int {
	return tr.CommitPosition
	})
	}

	// TODO(olivernewman): handle the case where a test has broken on separate
	// occasions within the time window. It's not easy to distinguish this from
	// flakiness, but we can make a best effort at least for high-frequency
	// failure modes.
	for builder, results := range byBuilder {
	firstFailureIdx := -1
	for i, result := range results {
	if result.Failed {
	firstFailureIdx = i
	break
	}
	}
	// Skip builders where the failure mode didn't occur at all.
	if firstFailureIdx == -1 {
	continue
	}
	for i, suspect := range suspects {
	for buildIdx, result := range results {
	if result.CommitPosition >= suspect.CommitPosition {
	// TODO(olivernewman): Also take blamelist size into
	// account. If we are X% confident that a culprit falls
	// within a given blamelist of length N, then we're only
	// X/N% confident in each member of the blamelist. So that
	// confidence will increase as the blamelist size decreases.
	dist := firstFailureIdx - buildIdx
	suspects[i].BlamelistDistances[builder] = dist
	break
	}
	}
	}
	}
	return nil
	}

	// scoreBlamelistDistances computes a 0-100 likelihood score for a potential
	// culprit commit based on a list of CI builder first-failure blamelist
	// distances. It takes the amount of data points into account by incorporating a
	// uncertainty level.
	func scoreBlamelistDistances(distances []int) int {
	if len(distances) == 0 {
	return 0
	}
	var weightedDistances []int
	for _, dist := range distances {
	if dist < 0 {
	// If the commit landed after the first failure that's an
	// especially good indicator that it's unlikely to be the culprit,
	// so apply a large multiplier to that data point (and negate it, so
	// the weighted distances are all positive). We simply downweight
	// the suspect instead of completely discarding it (by returning
	// zero) because it's possible the first failure is a different
	// failure mode (e.g. latent flakiness) than the failure mode we're
	// trying to diagnose.
	dist *= -10
	}
	weightedDistances = append(weightedDistances, dist)
	}
	avg := average(weightedDistances)

	// Calculate an uncertainty level based on the number of builders that are
	// providing data points. If a commit was in the first failure blamelist of
	// only builder it may be a coincidence, whereas we get a stronger signal if
	// it's in the first failure blamelist of multiple builders.
	uncertainty := 12 - 4*(len(distances)-1)
	if uncertainty < 0 {
	uncertainty = 0
	}

	score := 100/(avg/2+1) - float64(uncertainty)
	if score < 0 {
	return 0
	}
	return int(score)
	}