catapult/histogram.go - testing - Git at Google

 // Copyright 2018 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file

 package catapult

 import (
 	"fmt"
 	"log"
 	"math"

 	schema "fuchsia.googlesource.com/testing/perf/schema/v1"
 	uuid "github.com/satori/go.uuid"
 	"gonum.org/v1/gonum/stat"
 )

 // Histogram is a Catapult histogram object.
 //
 // See https://github.com/catapult-project/catapult/blob/master/docs/histogram-set-json-format.md
 // for more information on the format.
 //
 // TODO(kjharland): Add these missing fields as needed
 //   ShortName
 //   BinBoundaries
 //   NanDiagnostics
 //   AllBins
 //   SummaryOptions
 type Histogram struct {
 	Name               string    `json:"name"`
 	GUID               string    `json:"guid"`
 	Unit               string    `json:"unit"`
 	Description        string    `json:"description"`
 	SampleValues       []float64 `json:"sampleValues"`
 	MaxNumSampleValues int       `json:"maxNumSampleValues"`
 	NumNans            int       `json:"numNans"`
 	Running            []float64 `json:"running"`
 	// Diagnostics maps a Diagnostic's name to its GUID.
 	//
 	// These map entries communicate that the diagnostic with the given
 	// name and GUID contains metadata that can help debug regressions and
 	// other issues with this Histogram in the Catapult Dashboard.
 	Diagnostics map[string]string `json:"diagnostics"`
 }

 // AddDiagnostic associates name with the given GUID in this Histogram's
 // Diagnostics map.
 //
 // If the the new entry overwrites an existing entry, a warning is logged.
 func (h *Histogram) AddDiagnostic(name string, guid string) {

 	if h.Diagnostics == nil {
 		h.Diagnostics = make(map[string]string)
 	}

 	if existing, ok := h.Diagnostics[name]; ok && existing != guid {
 		log.Printf(
 			"Overwriting shared Diagnostic %v in Histogram %v."+
 				"($old, $new) = (%v, %v)",
 			name, h.Name, existing, guid)
 	}

 	h.Diagnostics[name] = guid
 }

 // ConvertVariantsToHistograms converts a collection of Fuchsia benchmark
 // variants into a list of Catapult Histograms.
 func ConvertVariantsToHistograms(variants []schema.Variant) []Histogram {
 	var histograms []Histogram
 	for _, variant := range variants {
 		for _, benchmarkData := range variant.FBenchmarksData {
 			histograms = append(histograms,
 				createHistogram(variant, benchmarkData))
 		}
 	}

 	return histograms
 }

 // Creates a histogram from the given variant and benchmark data set.
 //
 // TODO(kjharland): Generalize to support non-zircon benchmarks once I have
 // a better idea of how other benchmarks will be converted.
 func createHistogram(v schema.Variant, d schema.BenchmarkData) Histogram {
 	var sampleValues []float64
 	for _, sample := range d.Samples {
 		// The number of samples to take. Catapult will reject sample
 		// sets that are *too large*. Fuchsia tests usually generate
 		// ~10,000 - 20,000 sample points per test-run, but only a
 		// handful of these are needed.
 		//
 		// TODO(kjharland): Use a sane max of the first 1000 points for now.
 		// Find out how to properly split samples so that data is not thrown
 		// away.
 		numSamples := math.Min(1000, float64(len(sample.Values)))

 		// All zircon benchmarks use nanoseconds. Catapult doesn't support this,
 		// so convert to milliseconds instead.
 		for i := 0; i < int(numSamples); i++ {
 			sampleValues = append(sampleValues, sample.Values[i]/1e6)
 		}
 	}

 	return Histogram{
 		Name:               fmt.Sprintf("%v, %v", v.VariantDesc, d.Label),
 		Unit:               "ms_smallerIsBetter",
 		GUID:               uuid.NewV4().String(),
 		NumNans:            0, // All samples are numeric values
 		SampleValues:       sampleValues,
 		MaxNumSampleValues: len(sampleValues),
 		Running:            computeRunningStatistics(sampleValues),
 	}
 }

 // Computes an ordered set of 7 statistics for the given set of values:
 //
 // count, max, meanlogs, mean, min, sum, variance
 //
 // meanlogs is the mean of the logs of the absolute values of the given values.
 //
 // https://github.com/catapult-project/catapult/issues/4150
 func computeRunningStatistics(values []float64) []float64 {
 	count := float64(len(values))
 	min := math.Inf(1)
 	max := math.Inf(-1)
 	var sum float64
 	var meanlogs float64

 	for i, v := range values {
 		min = math.Min(min, v)
 		max = math.Max(max, v)
 		sum += v
 		// Compute meanlogs as a cumulative moving average:
 		// https://en.wikipedia.org/wiki/Moving_average
 		meanlogs += (math.Log10(math.Abs(v)) - meanlogs) / float64(i+1)
 	}

 	mean := stat.Mean(values, nil)
 	variance := stat.Variance(values, nil)
 	return []float64{count, max, meanlogs, mean, min, sum, variance}
 }
	// Copyright 2018 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file

	package catapult

	import (
	"fmt"
	"log"
	"math"

	schema "fuchsia.googlesource.com/testing/perf/schema/v1"
	uuid "github.com/satori/go.uuid"
	"gonum.org/v1/gonum/stat"
	)

	// Histogram is a Catapult histogram object.
	//
	// See https://github.com/catapult-project/catapult/blob/master/docs/histogram-set-json-format.md
	// for more information on the format.
	//
	// TODO(kjharland): Add these missing fields as needed
	// ShortName
	// BinBoundaries
	// NanDiagnostics
	// AllBins
	// SummaryOptions
	type Histogram struct {
	Name string `json:"name"`
	GUID string `json:"guid"`
	Unit string `json:"unit"`
	Description string `json:"description"`
	SampleValues []float64 `json:"sampleValues"`
	MaxNumSampleValues int `json:"maxNumSampleValues"`
	NumNans int `json:"numNans"`
	Running []float64 `json:"running"`
	// Diagnostics maps a Diagnostic's name to its GUID.
	//
	// These map entries communicate that the diagnostic with the given
	// name and GUID contains metadata that can help debug regressions and
	// other issues with this Histogram in the Catapult Dashboard.
	Diagnostics map[string]string `json:"diagnostics"`
	}

	// AddDiagnostic associates name with the given GUID in this Histogram's
	// Diagnostics map.
	//
	// If the the new entry overwrites an existing entry, a warning is logged.
	func (h *Histogram) AddDiagnostic(name string, guid string) {

	if h.Diagnostics == nil {
	h.Diagnostics = make(map[string]string)
	}

	if existing, ok := h.Diagnostics[name]; ok && existing != guid {
	log.Printf(
	"Overwriting shared Diagnostic %v in Histogram %v."+
	"($old, $new) = (%v, %v)",
	name, h.Name, existing, guid)
	}

	h.Diagnostics[name] = guid
	}

	// ConvertVariantsToHistograms converts a collection of Fuchsia benchmark
	// variants into a list of Catapult Histograms.
	func ConvertVariantsToHistograms(variants []schema.Variant) []Histogram {
	var histograms []Histogram
	for _, variant := range variants {
	for _, benchmarkData := range variant.FBenchmarksData {
	histograms = append(histograms,
	createHistogram(variant, benchmarkData))
	}
	}

	return histograms
	}

	// Creates a histogram from the given variant and benchmark data set.
	//
	// TODO(kjharland): Generalize to support non-zircon benchmarks once I have
	// a better idea of how other benchmarks will be converted.
	func createHistogram(v schema.Variant, d schema.BenchmarkData) Histogram {
	var sampleValues []float64
	for _, sample := range d.Samples {
	// The number of samples to take. Catapult will reject sample
	// sets that are too large. Fuchsia tests usually generate
	// ~10,000 - 20,000 sample points per test-run, but only a
	// handful of these are needed.
	//
	// TODO(kjharland): Use a sane max of the first 1000 points for now.
	// Find out how to properly split samples so that data is not thrown
	// away.
	numSamples := math.Min(1000, float64(len(sample.Values)))

	// All zircon benchmarks use nanoseconds. Catapult doesn't support this,
	// so convert to milliseconds instead.
	for i := 0; i < int(numSamples); i++ {
	sampleValues = append(sampleValues, sample.Values[i]/1e6)
	}
	}

	return Histogram{
	Name: fmt.Sprintf("%v, %v", v.VariantDesc, d.Label),
	Unit: "ms_smallerIsBetter",
	GUID: uuid.NewV4().String(),
	NumNans: 0, // All samples are numeric values
	SampleValues: sampleValues,
	MaxNumSampleValues: len(sampleValues),
	Running: computeRunningStatistics(sampleValues),
	}
	}

	// Computes an ordered set of 7 statistics for the given set of values:
	//
	// count, max, meanlogs, mean, min, sum, variance
	//
	// meanlogs is the mean of the logs of the absolute values of the given values.
	//
	// https://github.com/catapult-project/catapult/issues/4150
	func computeRunningStatistics(values []float64) []float64 {
	count := float64(len(values))
	min := math.Inf(1)
	max := math.Inf(-1)
	var sum float64
	var meanlogs float64

	for i, v := range values {
	min = math.Min(min, v)
	max = math.Max(max, v)
	sum += v
	// Compute meanlogs as a cumulative moving average:
	// https://en.wikipedia.org/wiki/Moving_average
	meanlogs += (math.Log10(math.Abs(v)) - meanlogs) / float64(i+1)
	}

	mean := stat.Mean(values, nil)
	variance := stat.Variance(values, nil)
	return []float64{count, max, meanlogs, mean, min, sum, variance}
	}