| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file |
| |
| package catapult |
| |
| import "fmt" |
| |
| // BinBoundaries maps values to adjacent regions of real numbers, and is useful |
| // for building histograms. |
| // |
| // The boundaries themselves are real values, defined on some range. There are |
| // always at least two boundaries, which are the minimum and maximum values of |
| // the range. Optionally, the range is also partitioned to create several |
| // intermediate boundaries between these min and max values. |
| // |
| // Each adjacent pair of boundaries form a region, or "bin". Let N be the |
| // number of boundaries, then there are always N+1 bins in total; N-1 between |
| // the min and max boundaries, an underflow bin, and an overflow bin. The |
| // underflow bin contains all values that are less than the min boundary, and |
| // the overflow bin contains all values greater than or equal to the max |
| // boundary. Bins are indexed from 0 to N. This is illustrated in the chart |
| // below: |
| // |
| // min max |
| // | | | | | |
| // | | | | | |
| // -Inf <-------|---------|---------|-----|---------|---------> +Inf |
| // : underflow : central : central : ... : central : overflow : |
| // : bin : bin : bin : : bin : bin : |
| // : 0 : 1 : 2 : : N-1 : N : |
| // |
| // The index of the bin containing a given value can be obtained using |
| // `GetBinIndex(value)`. |
| // |
| // BinBoundaries are immutable and can be used to partition multiple collections |
| // of values into multiple histogram bins. |
| // |
| // Example Usage: |
| // // We'll partition the range 10..20 into 3 bins to create a histogram. |
| // bb, err := NewLinearBinBoundaries(3, 10.0, 20.0) |
| // if err != nil { |
| // log.Fatal(err) |
| // } |
| // |
| // // The BinBoundaries contains these 3 bins, plus the underflow and |
| // // overflow bins for a total of five. |
| // if bb.BinCount() != 5 { |
| // log.Fatal(...) |
| // } |
| // |
| // // Use an array of float64 arrays to represent the historam bins. |
| // bins := make([][]float64, bb.BinCount()) |
| // |
| // // Partition values into our histogram bins. |
| // for _, val := range values { |
| // binIndex := bb.GetBinIndex(value) |
| // bins[binIndex] = append(bins[binIndex], value) |
| // } |
| // |
| // |
| // Differences between this and Catapult's BinBoundaries API: |
| // * These BinBoundaries do not support non-numeric bins because Fuchsia |
| // performance tests only collect numeric sample values. |
| // |
| // * Catapult's BinBoundaries provide a builder-like API for progressively |
| // creating bin boundaries with a mix of linear and exponential bins, like so: |
| // |
| // | bin_1(1..5) | bin_2(6..10) | bin_3(11..100) | bin_4(101...1000) | ... |
| // |
| // Fuchsia's needs are not yet so specific, so we opt to create linearly |
| // spaced bins all-at-once and have the option to use exponential bins and/or |
| // move to a more complex interface such as Catapult's if necessary. |
| type BinBoundaries struct { |
| boundaries []float64 |
| } |
| |
| // GetBinIndex returns the index of the bin containing the given value. |
| // |
| // A return value of 0 always indicates the underflow bin. A return value of |
| // BinCount() - 1 always indicates the overflow bin. Any value V where 0 < V |
| // < BinCount() - 1 indicates a central bin. |
| func (b *BinBoundaries) GetBinIndex(value float64) uint { |
| for bin, binMax := range b.boundaries { |
| if value < binMax { |
| return uint(bin) |
| } |
| } |
| // The value falls into the overflow bin. |
| return b.BinCount() - 1 |
| } |
| |
| // BinCount is the number of bins including the underflow and overflow bins. |
| func (b BinBoundaries) BinCount() uint { |
| return uint(len(b.boundaries) + 1) |
| } |
| |
| // GetBoundaries returns a copy of the boundary values in this BinBoundaries. |
| // |
| // The ith boundary value is the "min" value for the `i-1`th bin. This means if |
| // GetBoundaries()[i] == N and GetBoundaries()[i-1] == M, then the ith bin |
| // contains all values greater than or equal to M and less than N. |
| // |
| // GetBoundaries()[0] is always the minimum value for the first central bin. |
| // There is no boundary value (min value) for the underflow bin, which contains |
| // all values less than the first element in GetBoundaries(). |
| func (b *BinBoundaries) GetBoundaries() []float64 { |
| boundariesCopy := make([]float64, len(b.boundaries)) |
| copy(boundariesCopy, b.boundaries) |
| return boundariesCopy |
| } |
| |
| // NewLinearBinBoundaries returns BinBoundaries for linearly spaced bins. |
| // |
| // If centralBinCount < 1, an error is returned along with a nil pointer for the |
| // BinBoundaries value. |
| func NewLinearBinBoundaries(centralBinCount uint, min float64, max float64) (*BinBoundaries, error) { |
| if centralBinCount < 1 { |
| return nil, fmt.Errorf("must specify at least 1 central bin. Got %v", centralBinCount) |
| } |
| |
| if min >= max { |
| return nil, fmt.Errorf( |
| "min must be smaller than max. Got min=%v, max=%v", |
| min, max) |
| } |
| |
| // Initialize the list of boundaries with the min-value for the first central bin. |
| // There is no min-value for the underflow bin. |
| boundaries := []float64{min} |
| |
| // Add the min-value boundaries for the remaining central bins. |
| for i := uint(1); i < centralBinCount; i++ { |
| boundary := min + (max-min)*float64(i)/float64(centralBinCount) |
| boundaries = append(boundaries, boundary) |
| } |
| |
| // Manually add the min-value for the overflow bin to avoid rounding errors. |
| boundaries = append(boundaries, max) |
| |
| // No max-value added for the overflow bin, whose upper bound is infinity. |
| return &BinBoundaries{boundaries}, nil |
| } |