blob: 56da838d2a9aceb6e89ccf508cd08084c81c59aa [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file
package catapult
import (
"errors"
"log"
"math"
schema "fuchsia.googlesource.com/infra/infra/perf/schema/v1"
uuid "github.com/satori/go.uuid"
)
// Histogram is a Catapult histogram object.
//
// See https://github.com/catapult-project/catapult/blob/master/docs/histogram-set-json-format.md
// for more information on the format.
//
// TODO(kjharland): Add these missing fields as needed
// ShortName
// BinBoundaries
// NanDiagnostics
// AllBins
// SummaryOptions
// SampleValues
type Histogram struct {
Name string `json:"name"`
GUID string `json:"guid"`
Unit string `json:"unit"`
Description string `json:"description"`
MaxNumSampleValues int `json:"maxNumSampleValues"`
NumNans int `json:"numNans"`
Running []float64 `json:"running"`
// Diagnostics maps a Diagnostic's name to its GUID.
//
// These map entries communicate that the diagnostic with the given
// name and GUID contains metadata that can help debug regressions and
// other issues with this Histogram in the Catapult Dashboard.
Diagnostics map[string]string `json:"diagnostics"`
}
// AddDiagnostic associates name with the given GUID in this Histogram's
// Diagnostics map.
//
// If the the new entry overwrites an existing entry, a warning is logged.
func (h *Histogram) AddDiagnostic(name string, guid string) {
if h.Diagnostics == nil {
h.Diagnostics = make(map[string]string)
}
if existing, ok := h.Diagnostics[name]; ok && existing != guid {
log.Printf(
"Overwriting shared Diagnostic %v in Histogram %v."+
"($old, $new) = (%v, %v)",
name, h.Name, existing, guid)
}
h.Diagnostics[name] = guid
}
// ConvertBenchmarkDataToHistograms converts BenchmarkData to Histograms.
//
// A BenchmarkData contains one or more Sample objects. This conversion works
// differently based on the labels of those samples:
//
// * If all sample labels are empty, a single Histogram is created for the
// BenchmarkData. It contains all sample values and its name is set to
// BenchmarkData.Label. Zircon benchmarks results are an example of samples
// without labels.
// * If all sample labels are non-empty, a Histogram is created for each sample.
// It contains only that sample's values and its name is set to
// "{BenchmarkData.Label}_{Sample.Label}". Some tracing-based benchmark
// results are examples of samples with labels.
//
// It does not make sense for some labels to be non-empty while others are empty
// because there is no way to determine how the benchmark author really wants
// this information to be organized in the Catapult dashboard. An error is
// returned in this case.
//
// This function assumes that all non-empty sample labels are unique to their
// parent BenchmarkData. Non-unique names may result in confusing data in the
// Catapult dashboard.
//
// TODO(IN-330): We should have a schema that removes this ambiguity in sample
// labelling.
func ConvertBenchmarkDataToHistograms(d schema.BenchmarkData) ([]Histogram, error) {
if len(d.Samples) == 0 {
return nil, errors.New("BenchmarkData has no samples")
}
samplesHaveLabels, err := checkSampleLabels(d.Samples)
if err != nil {
return nil, err
}
if samplesHaveLabels {
// Samples are labeled. Create a Histogram for each one.
var histograms []Histogram
for _, sample := range d.Samples {
histogram, err := createHistogram(d.Label+"_"+sample.Label, sample.Values)
if err != nil {
return nil, err
}
histograms = append(histograms, histogram)
}
return histograms, nil
} else {
// Samples are unlabeled. Concat all data into a single Histogram.
var sampleValues []float64
for _, sample := range d.Samples {
sampleValues = append(sampleValues, sample.Values...)
}
histogram, err := createHistogram(d.Label, sampleValues)
return []Histogram{histogram}, err
}
}
// createHistogram creates a Histogram with the given name.
//
// The histogram's statistics are computed from the given slice of values, which
// are assumed to be nanosecond measurements.
//
// Returns an error if values is empty.
func createHistogram(name string, values []float64) (Histogram, error) {
var sampleValues []float64
if len(values) == 0 {
return Histogram{}, errors.New("at least one sample value required")
}
for _, value := range values {
// Fuchsia benchmarks use nanoseconds. Catapult doesn't support this,
// so convert to milliseconds instead.
sampleValues = append(sampleValues, value/1e6)
}
return Histogram{
Name: name,
Unit: "ms_smallerIsBetter",
GUID: uuid.NewV4().String(),
NumNans: 0, // All samples are numeric values
// TODO(kjharland): Compute AllBins.
MaxNumSampleValues: len(sampleValues),
Running: computeRunningStatistics(sampleValues),
}, nil
}
// Computes an ordered set of 7 statistics for the given set of values:
//
// count, max, meanlogs, mean, min, sum, variance
//
// meanlogs is the mean of the logs of the absolute values of the given values.
//
// https://github.com/catapult-project/catapult/issues/4150
func computeRunningStatistics(values []float64) []float64 {
count := float64(len(values))
min := math.Inf(1)
max := math.Inf(-1)
var sum float64
var meanlogs float64
for i, v := range values {
min = math.Min(min, v)
max = math.Max(max, v)
sum += v
// Compute meanlogs as a cumulative moving average:
// https://en.wikipedia.org/wiki/Moving_average
meanlogs += (math.Log10(math.Abs(v)) - meanlogs) / float64(i+1)
}
mean, variance := meanVariance(values)
return []float64{count, max, meanlogs, mean, min, sum, variance}
}
// Computes the mean and variance of the samples in vals.
func meanVariance(vals []float64) (mean, variance float64) {
// First compute the mean.
var sum float64
for _, val := range vals {
sum += val
}
valsCount := float64(len(vals))
mean = sum / valsCount
// A single value has no variance. Leave it as 0 unless there are
// multiple values.
if valsCount < 2 {
return
}
// Compute variance using the "Two-pass algorithm". See
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
var sumOfSquaredDiffs float64
for _, val := range vals {
sumOfSquaredDiffs += (val - mean) * (val - mean)
}
variance = sumOfSquaredDiffs / (valsCount - 1)
return
}
// checkSampleLabels checks whether the given samples have non-empty labels.
//
// Returns true iff all samples are labeled. Returns an error if samples are
// inconsistently labeled or samples is empty.
func checkSampleLabels(samples []schema.Sample) (bool, error) {
if len(samples) == 0 {
return false, errors.New("sample list is empty")
}
samplesShouldHaveLabels := samples[0].Label != ""
// Verify that all samples are consistently labeled.
for _, sample := range samples {
// Return an error if the samples are inconsistently labeled.
sampleHasLabel := sample.Label != ""
if samplesShouldHaveLabels != sampleHasLabel {
return false, errors.New("some samples are missing labels")
}
}
return samplesShouldHaveLabels, nil
}