blob: 0a181c5d4fb7a939cb3790e235e76a3a57301fde [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file
package catapult
import (
schema ""
uuid ""
// Histogram is a Catapult histogram object.
// See
// for more information on the format.
// TODO(kjharland): Add these missing fields as needed
// ShortName
// BinBoundaries
// NanDiagnostics
// AllBins
// SummaryOptions
// SampleValues
type Histogram struct {
Name string `json:"name"`
GUID string `json:"guid"`
Unit string `json:"unit"`
Description string `json:"description"`
MaxNumSampleValues int `json:"maxNumSampleValues"`
NumNans int `json:"numNans"`
Running []float64 `json:"running"`
// Diagnostics maps a Diagnostic's name to its GUID.
// These map entries communicate that the diagnostic with the given
// name and GUID contains metadata that can help debug regressions and
// other issues with this Histogram in the Catapult Dashboard.
Diagnostics map[string]string `json:"diagnostics"`
// AddDiagnostic associates name with the given GUID in this Histogram's
// Diagnostics map.
// If the the new entry overwrites an existing entry, a warning is logged.
func (h *Histogram) AddDiagnostic(name string, guid string) {
if h.Diagnostics == nil {
h.Diagnostics = make(map[string]string)
if existing, ok := h.Diagnostics[name]; ok && existing != guid {
"Overwriting shared Diagnostic %v in Histogram %v."+
"($old, $new) = (%v, %v)",
name, h.Name, existing, guid)
h.Diagnostics[name] = guid
// ConvertBenchmarkDataToHistograms converts BenchmarkData to Histograms.
// A BenchmarkData contains one or more Sample objects. This conversion works
// differently based on the labels of those samples:
// * If all sample labels are empty, a single Histogram is created for the
// BenchmarkData. It contains all sample values and its name is set to
// BenchmarkData.Label. Zircon benchmarks results are an example of samples
// without labels.
// * If all sample labels are non-empty, a Histogram is created for each sample.
// It contains only that sample's values and its name is set to
// "{BenchmarkData.Label}_{Sample.Label}". Some tracing-based benchmark
// results are examples of samples with labels.
// It does not make sense for some labels to be non-empty while others are empty
// because there is no way to determine how the benchmark author really wants
// this information to be organized in the Catapult dashboard. An error is
// returned in this case.
// This function assumes that all non-empty sample labels are unique to their
// parent BenchmarkData. Non-unique names may result in confusing data in the
// Catapult dashboard.
// TODO(IN-330): We should have a schema that removes this ambiguity in sample
// labelling.
func ConvertBenchmarkDataToHistograms(d schema.BenchmarkData) ([]Histogram, error) {
if len(d.Samples) == 0 {
return nil, errors.New("BenchmarkData has no samples")
samplesHaveLabels, err := checkSampleLabels(d.Samples)
if err != nil {
return nil, err
if samplesHaveLabels {
// Samples are labeled. Create a Histogram for each one.
var histograms []Histogram
for _, sample := range d.Samples {
histogram, err := createHistogram(d.Label+"_"+sample.Label, sample.Values)
if err != nil {
return nil, err
histograms = append(histograms, histogram)
return histograms, nil
} else {
// Samples are unlabeled. Concat all data into a single Histogram.
var sampleValues []float64
for _, sample := range d.Samples {
sampleValues = append(sampleValues, sample.Values...)
histogram, err := createHistogram(d.Label, sampleValues)
return []Histogram{histogram}, err
// createHistogram creates a Histogram with the given name.
// The histogram's statistics are computed from the given slice of values, which
// are assumed to be nanosecond measurements.
// This also performs the following normalizations on the input:
// * Converts values to milliseconds: Catapult doesn't support nanoseconds.
// * Converts label whitespace to underscores: Catpapult forms a unique key
// for fetching graph data using the Histogram name. Whitespace breaks this
// key and causes Catapult to incorrectly process the data.
// Returns an error if values is empty.
func createHistogram(name string, values []float64) (Histogram, error) {
var sampleValues []float64
if len(values) == 0 {
return Histogram{}, errors.New("at least one sample value required")
// Fuchsia benchmarks use nanoseconds. Catapult doesn't support this,
// so convert to milliseconds instead.
for _, value := range values {
sampleValues = append(sampleValues, value/1e6)
// Catapult does not support whitespace in the Histogram name. Replace with
// underscores. -1 specifies no limit on the number of replacements.
name = strings.Replace(name, " ", "_", -1)
return Histogram{
Name: name,
Unit: "ms_smallerIsBetter",
GUID: uuid.NewV4().String(),
NumNans: 0, // All samples are numeric values
// TODO(kjharland): Compute AllBins.
MaxNumSampleValues: len(sampleValues),
Running: computeRunningStatistics(sampleValues),
}, nil
// Computes an ordered set of 7 statistics for the given set of values:
// count, max, meanlogs, mean, min, sum, variance
// meanlogs is the mean of the logs of the absolute values of the given values.
func computeRunningStatistics(values []float64) []float64 {
count := float64(len(values))
min := math.Inf(1)
max := math.Inf(-1)
var sum float64
var meanlogs float64
for i, v := range values {
min = math.Min(min, v)
max = math.Max(max, v)
sum += v
// Compute meanlogs as a cumulative moving average:
meanlogs += (math.Log(math.Abs(v)) - meanlogs) / float64(i+1)
mean, variance := meanVariance(values)
return []float64{count, max, meanlogs, mean, min, sum, variance}
// Computes the mean and variance of the samples in vals.
func meanVariance(vals []float64) (mean, variance float64) {
// First compute the mean.
var sum float64
for _, val := range vals {
sum += val
valsCount := float64(len(vals))
mean = sum / valsCount
// A single value has no variance. Leave it as 0 unless there are
// multiple values.
if valsCount < 2 {
// Compute variance using the "Two-pass algorithm". See
var sumOfSquaredDiffs float64
for _, val := range vals {
sumOfSquaredDiffs += (val - mean) * (val - mean)
variance = sumOfSquaredDiffs / (valsCount - 1)
// checkSampleLabels checks whether the given samples have non-empty labels.
// Returns true iff all samples are labeled. Returns an error if samples are
// inconsistently labeled or samples is empty.
func checkSampleLabels(samples []schema.Sample) (bool, error) {
if len(samples) == 0 {
return false, errors.New("sample list is empty")
samplesShouldHaveLabels := samples[0].Label != ""
// Verify that all samples are consistently labeled.
for _, sample := range samples {
// Return an error if the samples are inconsistently labeled.
sampleHasLabel := sample.Label != ""
if samplesShouldHaveLabels != sampleHasLabel {
return false, errors.New("some samples are missing labels")
return samplesShouldHaveLabels, nil