| // Copyright ©2015 The gonum Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package distuv |
| |
| import ( |
| "math" |
| "sort" |
| "testing" |
| |
| "gonum.org/v1/gonum/floats" |
| "gonum.org/v1/gonum/integrate/quad" |
| "gonum.org/v1/gonum/stat" |
| ) |
| |
| type meaner interface { |
| Mean() float64 |
| } |
| |
| type quantiler interface { |
| Quantile(float64) float64 |
| } |
| |
| type medianer interface { |
| quantiler |
| Median() float64 |
| } |
| |
| type varStder interface { |
| StdDev() float64 |
| Variance() float64 |
| } |
| |
| type entropyer interface { |
| LogProber |
| Entropy() float64 |
| } |
| |
| type exKurtosiser interface { |
| ExKurtosis() float64 |
| Mean() float64 |
| } |
| |
| type skewnesser interface { |
| StdDev() float64 |
| Mean() float64 |
| Skewness() float64 |
| } |
| |
| type cumulanter interface { |
| Quantiler |
| CDF(x float64) float64 |
| Survival(x float64) float64 |
| } |
| |
| func generateSamples(x []float64, r Rander) { |
| for i := range x { |
| x[i] = r.Rand() |
| } |
| } |
| |
| type probLogprober interface { |
| Prob(x float64) float64 |
| LogProb(x float64) float64 |
| } |
| |
| type cumulantProber interface { |
| cumulanter |
| probLogprober |
| } |
| |
| func checkMean(t *testing.T, i int, x []float64, m meaner, tol float64) { |
| mean := stat.Mean(x, nil) |
| if !floats.EqualWithinAbsOrRel(mean, m.Mean(), tol, tol) { |
| t.Errorf("Mean mismatch case %v: want: %v, got: %v", i, mean, m.Mean()) |
| } |
| } |
| |
| func checkMedian(t *testing.T, i int, x []float64, m medianer, tol float64) { |
| median := stat.Quantile(0.5, stat.Empirical, x, nil) |
| if !floats.EqualWithinAbsOrRel(median, m.Median(), tol, tol) { |
| t.Errorf("Median mismatch case %v: want: %v, got: %v", i, median, m.Median()) |
| } |
| } |
| |
| func checkVarAndStd(t *testing.T, i int, x []float64, v varStder, tol float64) { |
| variance := stat.Variance(x, nil) |
| if !floats.EqualWithinAbsOrRel(variance, v.Variance(), tol, tol) { |
| t.Errorf("Variance mismatch case %v: want: %v, got: %v", i, variance, v.Variance()) |
| } |
| std := math.Sqrt(variance) |
| if !floats.EqualWithinAbsOrRel(std, v.StdDev(), tol, tol) { |
| t.Errorf("StdDev mismatch case %v: want: %v, got: %v", i, std, v.StdDev()) |
| } |
| } |
| |
| func checkEntropy(t *testing.T, i int, x []float64, e entropyer, tol float64) { |
| tmp := make([]float64, len(x)) |
| for i, v := range x { |
| tmp[i] = -e.LogProb(v) |
| } |
| entropy := stat.Mean(tmp, nil) |
| if !floats.EqualWithinAbsOrRel(entropy, e.Entropy(), tol, tol) { |
| t.Errorf("Entropy mismatch case %v: want: %v, got: %v", i, entropy, e.Entropy()) |
| } |
| } |
| |
| func checkExKurtosis(t *testing.T, i int, x []float64, e exKurtosiser, tol float64) { |
| mean := e.Mean() |
| tmp := make([]float64, len(x)) |
| for i, x := range x { |
| tmp[i] = math.Pow(x-mean, 4) |
| } |
| variance := stat.Variance(x, nil) |
| mu4 := stat.Mean(tmp, nil) |
| kurtosis := mu4/(variance*variance) - 3 |
| if !floats.EqualWithinAbsOrRel(kurtosis, e.ExKurtosis(), tol, tol) { |
| t.Errorf("ExKurtosis mismatch case %v: want: %v, got: %v", i, kurtosis, e.ExKurtosis()) |
| } |
| } |
| |
| func checkSkewness(t *testing.T, i int, x []float64, s skewnesser, tol float64) { |
| mean := s.Mean() |
| std := s.StdDev() |
| tmp := make([]float64, len(x)) |
| for i, v := range x { |
| tmp[i] = math.Pow(v-mean, 3) |
| } |
| mu3 := stat.Mean(tmp, nil) |
| skewness := mu3 / math.Pow(std, 3) |
| if !floats.EqualWithinAbsOrRel(skewness, s.Skewness(), tol, tol) { |
| t.Errorf("Skewness mismatch case %v: want: %v, got: %v", i, skewness, s.Skewness()) |
| } |
| } |
| |
| func checkQuantileCDFSurvival(t *testing.T, i int, xs []float64, c cumulanter, tol float64) { |
| // Quantile, CDF, and survival check. |
| for i, p := range []float64{0.1, 0.25, 0.5, 0.75, 0.9} { |
| x := c.Quantile(p) |
| cdf := c.CDF(x) |
| estCDF := stat.CDF(x, stat.Empirical, xs, nil) |
| if !floats.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) { |
| t.Errorf("CDF mismatch case %v: want: %v, got: %v", i, estCDF, cdf) |
| } |
| if !floats.EqualWithinAbsOrRel(cdf, p, tol, tol) { |
| t.Errorf("Quantile/CDF mismatch case %v: want: %v, got: %v", i, p, cdf) |
| } |
| if math.Abs(1-cdf-c.Survival(x)) > 1e-14 { |
| t.Errorf("Survival/CDF mismatch case %v: want: %v, got: %v", i, 1-cdf, c.Survival(x)) |
| } |
| } |
| } |
| |
| func checkProbContinuous(t *testing.T, i int, x []float64, p probLogprober, tol float64) { |
| // Check that the PDF is consistent (integrates to 1). |
| q := quad.Fixed(p.Prob, math.Inf(-1), math.Inf(1), 1000000, nil, 0) |
| if math.Abs(q-1) > tol { |
| t.Errorf("Probability distribution doesn't integrate to 1. Case %v: Got %v", i, q) |
| } |
| |
| // Check that PDF and LogPDF are consistent. |
| for i, v := range x { |
| if math.Abs(math.Log(p.Prob(v))-p.LogProb(v)) > 1e-14 { |
| t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", i, v, math.Log(v), p.LogProb(v)) |
| break |
| } |
| } |
| } |
| |
| // checkProbQuantContinuous checks that the Prob, Rand, and Quantile are all consistent. |
| // checkProbContinuous only checks that Prob is a valid distribution (integrates |
| // to 1 and greater than 0). However, this is also true if the PDF of a different |
| // distribution is used. This checks that PDF is also consistent with the |
| // CDF implementation and the random samples. |
| func checkProbQuantContinuous(t *testing.T, i int, xs []float64, c cumulantProber, tol float64) { |
| ps := make([]float64, 101) |
| floats.Span(ps, 0, 1) |
| |
| var xp, x float64 |
| for i, p := range ps { |
| x = c.Quantile(p) |
| if p == 0 { |
| xp = x |
| if floats.Min(xs) < x { |
| t.Errorf("Sample of x less than Quantile(0). Case %v.", i) |
| break |
| } |
| continue |
| } |
| if p == 1 { |
| if floats.Max(xs) > x { |
| t.Errorf("Sample of x greater than Quantile(1). Case %v.", i) |
| break |
| } |
| } |
| |
| // The integral of the PDF between xp and x should be the difference in |
| // the quantiles. |
| q := quad.Fixed(c.Prob, xp, x, 1000, nil, 0) |
| if math.Abs(q-(p-ps[i-1])) > 1e-5 { |
| t.Errorf("Integral of PDF doesn't match quantile. Case %v. Want %v, got %v.", i, p-ps[i-1], q) |
| break |
| } |
| |
| pEst := stat.CDF(x, stat.Empirical, xs, nil) |
| if math.Abs(pEst-p) > tol { |
| t.Errorf("Empirical CDF doesn't match quantile. Case %v.", i) |
| } |
| xp = x |
| } |
| } |
| |
| // checkProbDiscrete confirms that PDF and Rand are consistent for discrete distributions. |
| func checkProbDiscrete(t *testing.T, i int, xs []float64, p probLogprober, tol float64) { |
| // Make a map of all of the unique samples. |
| m := make(map[float64]int) |
| for _, v := range xs { |
| m[v]++ |
| } |
| for x, count := range m { |
| prob := float64(count) / float64(len(xs)) |
| if math.Abs(prob-p.Prob(x)) > tol { |
| t.Errorf("PDF mismatch case %v at %v: want %v, got %v", i, x, prob, p.Prob(x)) |
| } |
| if math.Abs(math.Log(p.Prob(x))-p.LogProb(x)) > 1e-14 { |
| t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", i, x, math.Log(x), p.LogProb(x)) |
| } |
| } |
| } |
| |
| // dist is a type that implements the standard set of routines. |
| type fullDist interface { |
| CDF(x float64) float64 |
| Entropy() float64 |
| ExKurtosis() float64 |
| LogProb(x float64) float64 |
| Mean() float64 |
| Median() float64 |
| NumParameters() int |
| Prob(x float64) float64 |
| Quantile(p float64) float64 |
| Rand() float64 |
| Skewness() float64 |
| StdDev() float64 |
| Survival(x float64) float64 |
| Variance() float64 |
| } |
| |
| // testFullDist tests all of the functions of a fullDist. |
| func testFullDist(t *testing.T, f fullDist, i int, continuous bool) { |
| tol := 1e-2 |
| const n = 1e6 |
| x := make([]float64, n) |
| generateSamples(x, f) |
| sort.Float64s(x) |
| |
| checkMean(t, i, x, f, tol) |
| checkVarAndStd(t, i, x, f, tol) |
| checkEntropy(t, i, x, f, tol) |
| checkExKurtosis(t, i, x, f, tol) |
| checkSkewness(t, i, x, f, tol) |
| if continuous { |
| // In a discrete distribution, the median may not have positive probability. |
| checkMedian(t, i, x, f, tol) |
| // In a discrete distribution, the CDF and Quantile may not be perfect mappings. |
| checkQuantileCDFSurvival(t, i, x, f, tol) |
| // Integrate over the PDF |
| checkProbContinuous(t, i, x, f, 1e-10) |
| checkProbQuantContinuous(t, i, x, f, tol) |
| } else { |
| // Check against empirical PDF. |
| checkProbDiscrete(t, i, x, f, tol) |
| } |
| } |
| |
| // testRandLogProb tests that LogProb and Rand give consistent results. This |
| // can be used when the distribution does not implement CDF. |
| func testRandLogProbContinuous(t *testing.T, i int, min float64, x []float64, f LogProber, tol float64, bins int) { |
| for cdf := 1 / float64(bins); cdf <= 1-1/float64(bins); cdf += 1 / float64(bins) { |
| // Get the estimated CDF from the samples |
| pt := stat.Quantile(cdf, stat.Empirical, x, nil) |
| |
| prob := func(x float64) float64 { |
| return math.Exp(f.LogProb(x)) |
| } |
| // Integrate the PDF to find the CDF |
| estCDF := quad.Fixed(prob, min, pt, 1000, nil, 0) |
| if !floats.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) { |
| t.Errorf("Mismatch between integral of PDF and empirical CDF. Case %v. Want %v, got %v", i, cdf, estCDF) |
| } |
| } |
| } |