| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file |
| |
| package catapult |
| |
| import ( |
| "errors" |
| "log" |
| "math" |
| "strings" |
| |
| schema "fuchsia.googlesource.com/infra/infra/perf/schema/v1" |
| uuid "github.com/satori/go.uuid" |
| ) |
| |
| // Histogram is a Catapult histogram object. |
| // |
| // See https://github.com/catapult-project/catapult/blob/master/docs/histogram-set-json-format.md |
| // for more information on the format. |
| // |
| // TODO(kjharland): Add these missing fields as needed |
| // ShortName |
| // BinBoundaries |
| // NanDiagnostics |
| // AllBins |
| // SummaryOptions |
| // SampleValues |
| type Histogram struct { |
| Name string `json:"name"` |
| GUID string `json:"guid"` |
| Unit string `json:"unit"` |
| Description string `json:"description"` |
| MaxNumSampleValues int `json:"maxNumSampleValues"` |
| NumNans int `json:"numNans"` |
| Running []float64 `json:"running"` |
| // Diagnostics maps a Diagnostic's name to its GUID. |
| // |
| // These map entries communicate that the diagnostic with the given |
| // name and GUID contains metadata that can help debug regressions and |
| // other issues with this Histogram in the Catapult Dashboard. |
| Diagnostics map[string]string `json:"diagnostics"` |
| } |
| |
| // AddDiagnostic associates name with the given GUID in this Histogram's |
| // Diagnostics map. |
| // |
| // If the the new entry overwrites an existing entry, a warning is logged. |
| func (h *Histogram) AddDiagnostic(name string, guid string) { |
| |
| if h.Diagnostics == nil { |
| h.Diagnostics = make(map[string]string) |
| } |
| |
| if existing, ok := h.Diagnostics[name]; ok && existing != guid { |
| log.Printf( |
| "Overwriting shared Diagnostic %v in Histogram %v."+ |
| "($old, $new) = (%v, %v)", |
| name, h.Name, existing, guid) |
| } |
| |
| h.Diagnostics[name] = guid |
| } |
| |
| // ConvertBenchmarkDataToHistograms converts BenchmarkData to Histograms. |
| // |
| // A BenchmarkData contains one or more Sample objects. This conversion works |
| // differently based on the labels of those samples: |
| // |
| // * If all sample labels are empty, a single Histogram is created for the |
| // BenchmarkData. It contains all sample values and its name is set to |
| // BenchmarkData.Label. Zircon benchmarks results are an example of samples |
| // without labels. |
| // * If all sample labels are non-empty, a Histogram is created for each sample. |
| // It contains only that sample's values and its name is set to |
| // "{BenchmarkData.Label}_{Sample.Label}". Some tracing-based benchmark |
| // results are examples of samples with labels. |
| // |
| // It does not make sense for some labels to be non-empty while others are empty |
| // because there is no way to determine how the benchmark author really wants |
| // this information to be organized in the Catapult dashboard. An error is |
| // returned in this case. |
| // |
| // This function assumes that all non-empty sample labels are unique to their |
| // parent BenchmarkData. Non-unique names may result in confusing data in the |
| // Catapult dashboard. |
| // |
| // TODO(IN-330): We should have a schema that removes this ambiguity in sample |
| // labelling. |
| func ConvertBenchmarkDataToHistograms(d schema.BenchmarkData) ([]Histogram, error) { |
| if len(d.Samples) == 0 { |
| return nil, errors.New("BenchmarkData has no samples") |
| } |
| |
| samplesHaveLabels, err := checkSampleLabels(d.Samples) |
| if err != nil { |
| return nil, err |
| } |
| |
| if samplesHaveLabels { |
| // Samples are labeled. Create a Histogram for each one. |
| var histograms []Histogram |
| for _, sample := range d.Samples { |
| histogram, err := createHistogram(d.Label+"_"+sample.Label, sample.Values) |
| if err != nil { |
| return nil, err |
| } |
| histograms = append(histograms, histogram) |
| } |
| return histograms, nil |
| } else { |
| // Samples are unlabeled. Concat all data into a single Histogram. |
| var sampleValues []float64 |
| for _, sample := range d.Samples { |
| sampleValues = append(sampleValues, sample.Values...) |
| } |
| histogram, err := createHistogram(d.Label, sampleValues) |
| return []Histogram{histogram}, err |
| } |
| } |
| |
| // createHistogram creates a Histogram with the given name. |
| // |
| // The histogram's statistics are computed from the given slice of values, which |
| // are assumed to be nanosecond measurements. |
| // |
| // This also performs the following normalizations on the input: |
| // |
| // * Converts values to milliseconds: Catapult doesn't support nanoseconds. |
| // * Converts label whitespace to underscores: Catpapult forms a unique key |
| // for fetching graph data using the Histogram name. Whitespace breaks this |
| // key and causes Catapult to incorrectly process the data. |
| // |
| // Returns an error if values is empty. |
| func createHistogram(name string, values []float64) (Histogram, error) { |
| var sampleValues []float64 |
| |
| if len(values) == 0 { |
| return Histogram{}, errors.New("at least one sample value required") |
| } |
| |
| // Fuchsia benchmarks use nanoseconds. Catapult doesn't support this, |
| // so convert to milliseconds instead. |
| for _, value := range values { |
| sampleValues = append(sampleValues, value/1e6) |
| } |
| |
| // Catapult does not support whitespace in the Histogram name. Replace with |
| // underscores. -1 specifies no limit on the number of replacements. |
| name = strings.Replace(name, " ", "_", -1) |
| |
| return Histogram{ |
| Name: name, |
| Unit: "ms_smallerIsBetter", |
| GUID: uuid.NewV4().String(), |
| NumNans: 0, // All samples are numeric values |
| // TODO(kjharland): Compute AllBins. |
| MaxNumSampleValues: len(sampleValues), |
| Running: computeRunningStatistics(sampleValues), |
| }, nil |
| } |
| |
| // Computes an ordered set of 7 statistics for the given set of values: |
| // |
| // count, max, meanlogs, mean, min, sum, variance |
| // |
| // meanlogs is the mean of the logs of the absolute values of the given values. |
| // |
| // https://github.com/catapult-project/catapult/issues/4150 |
| func computeRunningStatistics(values []float64) []float64 { |
| count := float64(len(values)) |
| min := math.Inf(1) |
| max := math.Inf(-1) |
| var sum float64 |
| var meanlogs float64 |
| |
| for i, v := range values { |
| min = math.Min(min, v) |
| max = math.Max(max, v) |
| sum += v |
| // Compute meanlogs as a cumulative moving average: |
| // https://en.wikipedia.org/wiki/Moving_average |
| meanlogs += (math.Log(math.Abs(v)) - meanlogs) / float64(i+1) |
| } |
| |
| mean, variance := meanVariance(values) |
| return []float64{count, max, meanlogs, mean, min, sum, variance} |
| } |
| |
| // Computes the mean and variance of the samples in vals. |
| func meanVariance(vals []float64) (mean, variance float64) { |
| // First compute the mean. |
| var sum float64 |
| for _, val := range vals { |
| sum += val |
| } |
| |
| valsCount := float64(len(vals)) |
| mean = sum / valsCount |
| |
| // A single value has no variance. Leave it as 0 unless there are |
| // multiple values. |
| if valsCount < 2 { |
| return |
| } |
| |
| // Compute variance using the "Two-pass algorithm". See |
| // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance |
| var sumOfSquaredDiffs float64 |
| |
| for _, val := range vals { |
| sumOfSquaredDiffs += (val - mean) * (val - mean) |
| } |
| variance = sumOfSquaredDiffs / (valsCount - 1) |
| |
| return |
| } |
| |
| // checkSampleLabels checks whether the given samples have non-empty labels. |
| // |
| // Returns true iff all samples are labeled. Returns an error if samples are |
| // inconsistently labeled or samples is empty. |
| func checkSampleLabels(samples []schema.Sample) (bool, error) { |
| if len(samples) == 0 { |
| return false, errors.New("sample list is empty") |
| } |
| |
| samplesShouldHaveLabels := samples[0].Label != "" |
| |
| // Verify that all samples are consistently labeled. |
| for _, sample := range samples { |
| // Return an error if the samples are inconsistently labeled. |
| sampleHasLabel := sample.Label != "" |
| if samplesShouldHaveLabels != sampleHasLabel { |
| return false, errors.New("some samples are missing labels") |
| } |
| } |
| |
| return samplesShouldHaveLabels, nil |
| } |