blob: d97c147301d405435bc0fab5a21b99b402ab89ed [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file
package catapult
import "fmt"
// BinBoundaries maps values to adjacent regions of real numbers, and is useful
// for building histograms.
//
// The boundaries themselves are real values, defined on some range. There are
// always at least two boundaries, which are the minimum and maximum values of
// the range. Optionally, the range is also partitioned to create several
// intermediate boundaries between these min and max values.
//
// Each adjacent pair of boundaries form a region, or "bin". Let N be the
// number of boundaries, then there are always N+1 bins in total; N-1 between
// the min and max boundaries, an underflow bin, and an overflow bin. The
// underflow bin contains all values that are less than the min boundary, and
// the overflow bin contains all values greater than or equal to the max
// boundary. Bins are indexed from 0 to N. This is illustrated in the chart
// below:
//
// min max
// | | | | |
// | | | | |
// -Inf <-------|---------|---------|-----|---------|---------> +Inf
// : underflow : central : central : ... : central : overflow :
// : bin : bin : bin : : bin : bin :
// : 0 : 1 : 2 : : N-1 : N :
//
// The index of the bin containing a given value can be obtained using
// `GetBinIndex(value)`.
//
// BinBoundaries are immutable and can be used to partition multiple collections
// of values into multiple histogram bins.
//
// Example Usage:
// // We'll partition the range 10..20 into 3 bins to create a histogram.
// bb, err := NewLinearBinBoundaries(3, 10.0, 20.0)
// if err != nil {
// log.Fatal(err)
// }
//
// // The BinBoundaries contains these 3 bins, plus the underflow and
// // overflow bins for a total of five.
// if bb.BinCount() != 5 {
// log.Fatal(...)
// }
//
// // Use an array of float64 arrays to represent the historam bins.
// bins := make([][]float64, bb.BinCount())
//
// // Partition values into our histogram bins.
// for _, val := range values {
// binIndex := bb.GetBinIndex(value)
// bins[binIndex] = append(bins[binIndex], value)
// }
//
//
// Differences between this and Catapult's BinBoundaries API:
// * These BinBoundaries do not support non-numeric bins because Fuchsia
// performance tests only collect numeric sample values.
//
// * Catapult's BinBoundaries provide a builder-like API for progressively
// creating bin boundaries with a mix of linear and exponential bins, like so:
//
// | bin_1(1..5) | bin_2(6..10) | bin_3(11..100) | bin_4(101...1000) | ...
//
// Fuchsia's needs are not yet so specific, so we opt to create linearly
// spaced bins all-at-once and have the option to use exponential bins and/or
// move to a more complex interface such as Catapult's if necessary.
type BinBoundaries struct {
boundaries []float64
}
// GetBinIndex returns the index of the bin containing the given value.
//
// A return value of 0 always indicates the underflow bin. A return value of
// BinCount() - 1 always indicates the overflow bin. Any value V where 0 < V
// < BinCount() - 1 indicates a central bin.
func (b *BinBoundaries) GetBinIndex(value float64) uint {
for bin, binMax := range b.boundaries {
if value < binMax {
return uint(bin)
}
}
// The value falls into the overflow bin.
return b.BinCount() - 1
}
// BinCount is the number of bins including the underflow and overflow bins.
func (b BinBoundaries) BinCount() uint {
return uint(len(b.boundaries) + 1)
}
// GetBoundaries returns a copy of the boundary values in this BinBoundaries.
//
// The ith boundary value is the "min" value for the `i-1`th bin. This means if
// GetBoundaries()[i] == N and GetBoundaries()[i-1] == M, then the ith bin
// contains all values greater than or equal to M and less than N.
//
// GetBoundaries()[0] is always the minimum value for the first central bin.
// There is no boundary value (min value) for the underflow bin, which contains
// all values less than the first element in GetBoundaries().
func (b *BinBoundaries) GetBoundaries() []float64 {
boundariesCopy := make([]float64, len(b.boundaries))
copy(boundariesCopy, b.boundaries)
return boundariesCopy
}
// NewLinearBinBoundaries returns BinBoundaries for linearly spaced bins.
//
// If centralBinCount < 1, an error is returned along with a nil pointer for the
// BinBoundaries value.
func NewLinearBinBoundaries(centralBinCount uint, min float64, max float64) (*BinBoundaries, error) {
if centralBinCount < 1 {
return nil, fmt.Errorf("must specify at least 1 central bin. Got %v", centralBinCount)
}
if min >= max {
return nil, fmt.Errorf(
"min must be smaller than max. Got min=%v, max=%v",
min, max)
}
// Initialize the list of boundaries with the min-value for the first central bin.
// There is no min-value for the underflow bin.
boundaries := []float64{min}
// Add the min-value boundaries for the remaining central bins.
for i := uint(1); i < centralBinCount; i++ {
boundary := min + (max-min)*float64(i)/float64(centralBinCount)
boundaries = append(boundaries, boundary)
}
// Manually add the min-value for the overflow bin to avoid rounding errors.
boundaries = append(boundaries, max)
// No max-value added for the overflow bin, whose upper bound is infinity.
return &BinBoundaries{boundaries}, nil
}