stat/distuv/distribution_test.go - third_party/github.com/gonum/gonum - Git at Google

 // Copyright ©2015 The gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package distuv

 import (
 	"math"
 	"sort"
 	"testing"

 	"gonum.org/v1/gonum/floats"
 	"gonum.org/v1/gonum/integrate/quad"
 	"gonum.org/v1/gonum/stat"
 )

 type meaner interface {
 	Mean() float64
 }

 type quantiler interface {
 	Quantile(float64) float64
 }

 type medianer interface {
 	quantiler
 	Median() float64
 }

 type varStder interface {
 	StdDev() float64
 	Variance() float64
 }

 type entropyer interface {
 	LogProber
 	Entropy() float64
 }

 type exKurtosiser interface {
 	ExKurtosis() float64
 	Mean() float64
 }

 type skewnesser interface {
 	StdDev() float64
 	Mean() float64
 	Skewness() float64
 }

 type cumulanter interface {
 	Quantiler
 	CDF(x float64) float64
 	Survival(x float64) float64
 }

 func generateSamples(x []float64, r Rander) {
 	for i := range x {
 		x[i] = r.Rand()
 	}
 }

 type probLogprober interface {
 	Prob(x float64) float64
 	LogProb(x float64) float64
 }

 type cumulantProber interface {
 	cumulanter
 	probLogprober
 }

 func checkMean(t *testing.T, i int, x []float64, m meaner, tol float64) {
 	mean := stat.Mean(x, nil)
 	if !floats.EqualWithinAbsOrRel(mean, m.Mean(), tol, tol) {
 		t.Errorf("Mean mismatch case %v: want: %v, got: %v", i, mean, m.Mean())
 	}
 }

 func checkMedian(t *testing.T, i int, x []float64, m medianer, tol float64) {
 	median := stat.Quantile(0.5, stat.Empirical, x, nil)
 	if !floats.EqualWithinAbsOrRel(median, m.Median(), tol, tol) {
 		t.Errorf("Median mismatch case %v: want: %v, got: %v", i, median, m.Median())
 	}
 }

 func checkVarAndStd(t *testing.T, i int, x []float64, v varStder, tol float64) {
 	variance := stat.Variance(x, nil)
 	if !floats.EqualWithinAbsOrRel(variance, v.Variance(), tol, tol) {
 		t.Errorf("Variance mismatch case %v: want: %v, got: %v", i, variance, v.Variance())
 	}
 	std := math.Sqrt(variance)
 	if !floats.EqualWithinAbsOrRel(std, v.StdDev(), tol, tol) {
 		t.Errorf("StdDev mismatch case %v: want: %v, got: %v", i, std, v.StdDev())
 	}
 }

 func checkEntropy(t *testing.T, i int, x []float64, e entropyer, tol float64) {
 	tmp := make([]float64, len(x))
 	for i, v := range x {
 		tmp[i] = -e.LogProb(v)
 	}
 	entropy := stat.Mean(tmp, nil)
 	if !floats.EqualWithinAbsOrRel(entropy, e.Entropy(), tol, tol) {
 		t.Errorf("Entropy mismatch case %v: want: %v, got: %v", i, entropy, e.Entropy())
 	}
 }

 func checkExKurtosis(t *testing.T, i int, x []float64, e exKurtosiser, tol float64) {
 	mean := e.Mean()
 	tmp := make([]float64, len(x))
 	for i, x := range x {
 		tmp[i] = math.Pow(x-mean, 4)
 	}
 	variance := stat.Variance(x, nil)
 	mu4 := stat.Mean(tmp, nil)
 	kurtosis := mu4/(variance*variance) - 3
 	if !floats.EqualWithinAbsOrRel(kurtosis, e.ExKurtosis(), tol, tol) {
 		t.Errorf("ExKurtosis mismatch case %v: want: %v, got: %v", i, kurtosis, e.ExKurtosis())
 	}
 }

 func checkSkewness(t *testing.T, i int, x []float64, s skewnesser, tol float64) {
 	mean := s.Mean()
 	std := s.StdDev()
 	tmp := make([]float64, len(x))
 	for i, v := range x {
 		tmp[i] = math.Pow(v-mean, 3)
 	}
 	mu3 := stat.Mean(tmp, nil)
 	skewness := mu3 / math.Pow(std, 3)
 	if !floats.EqualWithinAbsOrRel(skewness, s.Skewness(), tol, tol) {
 		t.Errorf("Skewness mismatch case %v: want: %v, got: %v", i, skewness, s.Skewness())
 	}
 }

 func checkQuantileCDFSurvival(t *testing.T, i int, xs []float64, c cumulanter, tol float64) {
 	// Quantile, CDF, and survival check.
 	for i, p := range []float64{0.1, 0.25, 0.5, 0.75, 0.9} {
 		x := c.Quantile(p)
 		cdf := c.CDF(x)
 		estCDF := stat.CDF(x, stat.Empirical, xs, nil)
 		if !floats.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) {
 			t.Errorf("CDF mismatch case %v: want: %v, got: %v", i, estCDF, cdf)
 		}
 		if !floats.EqualWithinAbsOrRel(cdf, p, tol, tol) {
 			t.Errorf("Quantile/CDF mismatch case %v: want: %v, got: %v", i, p, cdf)
 		}
 		if math.Abs(1-cdf-c.Survival(x)) > 1e-14 {
 			t.Errorf("Survival/CDF mismatch case %v: want: %v, got: %v", i, 1-cdf, c.Survival(x))
 		}
 	}
 }

 func checkProbContinuous(t *testing.T, i int, x []float64, p probLogprober, tol float64) {
 	// Check that the PDF is consistent (integrates to 1).
 	q := quad.Fixed(p.Prob, math.Inf(-1), math.Inf(1), 1000000, nil, 0)
 	if math.Abs(q-1) > tol {
 		t.Errorf("Probability distribution doesn't integrate to 1. Case %v: Got %v", i, q)
 	}

 	// Check that PDF and LogPDF are consistent.
 	for i, v := range x {
 		if math.Abs(math.Log(p.Prob(v))-p.LogProb(v)) > 1e-14 {
 			t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", i, v, math.Log(v), p.LogProb(v))
 			break
 		}
 	}
 }

 // checkProbQuantContinuous checks that the Prob, Rand, and Quantile are all consistent.
 // checkProbContinuous only checks that Prob is a valid distribution (integrates
 // to 1 and greater than 0). However, this is also true if the PDF of a different
 // distribution is used. This checks that PDF is also consistent with the
 // CDF implementation and the random samples.
 func checkProbQuantContinuous(t *testing.T, i int, xs []float64, c cumulantProber, tol float64) {
 	ps := make([]float64, 101)
 	floats.Span(ps, 0, 1)

 	var xp, x float64
 	for i, p := range ps {
 		x = c.Quantile(p)
 		if p == 0 {
 			xp = x
 			if floats.Min(xs) < x {
 				t.Errorf("Sample of x less than Quantile(0). Case %v.", i)
 				break
 			}
 			continue
 		}
 		if p == 1 {
 			if floats.Max(xs) > x {
 				t.Errorf("Sample of x greater than Quantile(1). Case %v.", i)
 				break
 			}
 		}

 		// The integral of the PDF between xp and x should be the difference in
 		// the quantiles.
 		q := quad.Fixed(c.Prob, xp, x, 1000, nil, 0)
 		if math.Abs(q-(p-ps[i-1])) > 1e-5 {
 			t.Errorf("Integral of PDF doesn't match quantile. Case %v. Want %v, got %v.", i, p-ps[i-1], q)
 			break
 		}

 		pEst := stat.CDF(x, stat.Empirical, xs, nil)
 		if math.Abs(pEst-p) > tol {
 			t.Errorf("Empirical CDF doesn't match quantile. Case %v.", i)
 		}
 		xp = x
 	}
 }

 // checkProbDiscrete confirms that PDF and Rand are consistent for discrete distributions.
 func checkProbDiscrete(t *testing.T, i int, xs []float64, p probLogprober, tol float64) {
 	// Make a map of all of the unique samples.
 	m := make(map[float64]int)
 	for _, v := range xs {
 		m[v]++
 	}
 	for x, count := range m {
 		prob := float64(count) / float64(len(xs))
 		if math.Abs(prob-p.Prob(x)) > tol {
 			t.Errorf("PDF mismatch case %v at %v: want %v, got %v", i, x, prob, p.Prob(x))
 		}
 		if math.Abs(math.Log(p.Prob(x))-p.LogProb(x)) > 1e-14 {
 			t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", i, x, math.Log(x), p.LogProb(x))
 		}
 	}
 }

 // dist is a type that implements the standard set of routines.
 type fullDist interface {
 	CDF(x float64) float64
 	Entropy() float64
 	ExKurtosis() float64
 	LogProb(x float64) float64
 	Mean() float64
 	Median() float64
 	NumParameters() int
 	Prob(x float64) float64
 	Quantile(p float64) float64
 	Rand() float64
 	Skewness() float64
 	StdDev() float64
 	Survival(x float64) float64
 	Variance() float64
 }

 // testFullDist tests all of the functions of a fullDist.
 func testFullDist(t *testing.T, f fullDist, i int, continuous bool) {
 	tol := 1e-2
 	const n = 1e6
 	x := make([]float64, n)
 	generateSamples(x, f)
 	sort.Float64s(x)

 	checkMean(t, i, x, f, tol)
 	checkVarAndStd(t, i, x, f, tol)
 	checkEntropy(t, i, x, f, tol)
 	checkExKurtosis(t, i, x, f, tol)
 	checkSkewness(t, i, x, f, tol)
 	if continuous {
 		// In a discrete distribution, the median may not have positive probability.
 		checkMedian(t, i, x, f, tol)
 		// In a discrete distribution, the CDF and Quantile may not be perfect mappings.
 		checkQuantileCDFSurvival(t, i, x, f, tol)
 		// Integrate over the PDF
 		checkProbContinuous(t, i, x, f, 1e-10)
 		checkProbQuantContinuous(t, i, x, f, tol)
 	} else {
 		// Check against empirical PDF.
 		checkProbDiscrete(t, i, x, f, tol)
 	}
 }

 // testRandLogProb tests that LogProb and Rand give consistent results. This
 // can be used when the distribution does not implement CDF.
 func testRandLogProbContinuous(t *testing.T, i int, min float64, x []float64, f LogProber, tol float64, bins int) {
 	for cdf := 1 / float64(bins); cdf <= 1-1/float64(bins); cdf += 1 / float64(bins) {
 		// Get the estimated CDF from the samples
 		pt := stat.Quantile(cdf, stat.Empirical, x, nil)

 		prob := func(x float64) float64 {
 			return math.Exp(f.LogProb(x))
 		}
 		// Integrate the PDF to find the CDF
 		estCDF := quad.Fixed(prob, min, pt, 1000, nil, 0)
 		if !floats.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) {
 			t.Errorf("Mismatch between integral of PDF and empirical CDF. Case %v. Want %v, got %v", i, cdf, estCDF)
 		}
 	}
 }
	// Copyright ©2015 The gonum Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package distuv

	import (
	"math"
	"sort"
	"testing"

	"gonum.org/v1/gonum/floats"
	"gonum.org/v1/gonum/integrate/quad"
	"gonum.org/v1/gonum/stat"
	)

	type meaner interface {
	Mean() float64
	}

	type quantiler interface {
	Quantile(float64) float64
	}

	type medianer interface {
	quantiler
	Median() float64
	}

	type varStder interface {
	StdDev() float64
	Variance() float64
	}

	type entropyer interface {
	LogProber
	Entropy() float64
	}

	type exKurtosiser interface {
	ExKurtosis() float64
	Mean() float64
	}

	type skewnesser interface {
	StdDev() float64
	Mean() float64
	Skewness() float64
	}

	type cumulanter interface {
	Quantiler
	CDF(x float64) float64
	Survival(x float64) float64
	}

	func generateSamples(x []float64, r Rander) {
	for i := range x {
	x[i] = r.Rand()
	}
	}

	type probLogprober interface {
	Prob(x float64) float64
	LogProb(x float64) float64
	}

	type cumulantProber interface {
	cumulanter
	probLogprober
	}

	func checkMean(t *testing.T, i int, x []float64, m meaner, tol float64) {
	mean := stat.Mean(x, nil)
	if !floats.EqualWithinAbsOrRel(mean, m.Mean(), tol, tol) {
	t.Errorf("Mean mismatch case %v: want: %v, got: %v", i, mean, m.Mean())
	}
	}

	func checkMedian(t *testing.T, i int, x []float64, m medianer, tol float64) {
	median := stat.Quantile(0.5, stat.Empirical, x, nil)
	if !floats.EqualWithinAbsOrRel(median, m.Median(), tol, tol) {
	t.Errorf("Median mismatch case %v: want: %v, got: %v", i, median, m.Median())
	}
	}

	func checkVarAndStd(t *testing.T, i int, x []float64, v varStder, tol float64) {
	variance := stat.Variance(x, nil)
	if !floats.EqualWithinAbsOrRel(variance, v.Variance(), tol, tol) {
	t.Errorf("Variance mismatch case %v: want: %v, got: %v", i, variance, v.Variance())
	}
	std := math.Sqrt(variance)
	if !floats.EqualWithinAbsOrRel(std, v.StdDev(), tol, tol) {
	t.Errorf("StdDev mismatch case %v: want: %v, got: %v", i, std, v.StdDev())
	}
	}

	func checkEntropy(t *testing.T, i int, x []float64, e entropyer, tol float64) {
	tmp := make([]float64, len(x))
	for i, v := range x {
	tmp[i] = -e.LogProb(v)
	}
	entropy := stat.Mean(tmp, nil)
	if !floats.EqualWithinAbsOrRel(entropy, e.Entropy(), tol, tol) {
	t.Errorf("Entropy mismatch case %v: want: %v, got: %v", i, entropy, e.Entropy())
	}
	}

	func checkExKurtosis(t *testing.T, i int, x []float64, e exKurtosiser, tol float64) {
	mean := e.Mean()
	tmp := make([]float64, len(x))
	for i, x := range x {
	tmp[i] = math.Pow(x-mean, 4)
	}
	variance := stat.Variance(x, nil)
	mu4 := stat.Mean(tmp, nil)
	kurtosis := mu4/(variance*variance) - 3
	if !floats.EqualWithinAbsOrRel(kurtosis, e.ExKurtosis(), tol, tol) {
	t.Errorf("ExKurtosis mismatch case %v: want: %v, got: %v", i, kurtosis, e.ExKurtosis())
	}
	}

	func checkSkewness(t *testing.T, i int, x []float64, s skewnesser, tol float64) {
	mean := s.Mean()
	std := s.StdDev()
	tmp := make([]float64, len(x))
	for i, v := range x {
	tmp[i] = math.Pow(v-mean, 3)
	}
	mu3 := stat.Mean(tmp, nil)
	skewness := mu3 / math.Pow(std, 3)
	if !floats.EqualWithinAbsOrRel(skewness, s.Skewness(), tol, tol) {
	t.Errorf("Skewness mismatch case %v: want: %v, got: %v", i, skewness, s.Skewness())
	}
	}

	func checkQuantileCDFSurvival(t *testing.T, i int, xs []float64, c cumulanter, tol float64) {
	// Quantile, CDF, and survival check.
	for i, p := range []float64{0.1, 0.25, 0.5, 0.75, 0.9} {
	x := c.Quantile(p)
	cdf := c.CDF(x)
	estCDF := stat.CDF(x, stat.Empirical, xs, nil)
	if !floats.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) {
	t.Errorf("CDF mismatch case %v: want: %v, got: %v", i, estCDF, cdf)
	}
	if !floats.EqualWithinAbsOrRel(cdf, p, tol, tol) {
	t.Errorf("Quantile/CDF mismatch case %v: want: %v, got: %v", i, p, cdf)
	}
	if math.Abs(1-cdf-c.Survival(x)) > 1e-14 {
	t.Errorf("Survival/CDF mismatch case %v: want: %v, got: %v", i, 1-cdf, c.Survival(x))
	}
	}
	}

	func checkProbContinuous(t *testing.T, i int, x []float64, p probLogprober, tol float64) {
	// Check that the PDF is consistent (integrates to 1).
	q := quad.Fixed(p.Prob, math.Inf(-1), math.Inf(1), 1000000, nil, 0)
	if math.Abs(q-1) > tol {
	t.Errorf("Probability distribution doesn't integrate to 1. Case %v: Got %v", i, q)
	}

	// Check that PDF and LogPDF are consistent.
	for i, v := range x {
	if math.Abs(math.Log(p.Prob(v))-p.LogProb(v)) > 1e-14 {
	t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", i, v, math.Log(v), p.LogProb(v))
	break
	}
	}
	}

	// checkProbQuantContinuous checks that the Prob, Rand, and Quantile are all consistent.
	// checkProbContinuous only checks that Prob is a valid distribution (integrates
	// to 1 and greater than 0). However, this is also true if the PDF of a different
	// distribution is used. This checks that PDF is also consistent with the
	// CDF implementation and the random samples.
	func checkProbQuantContinuous(t *testing.T, i int, xs []float64, c cumulantProber, tol float64) {
	ps := make([]float64, 101)
	floats.Span(ps, 0, 1)

	var xp, x float64
	for i, p := range ps {
	x = c.Quantile(p)
	if p == 0 {
	xp = x
	if floats.Min(xs) < x {
	t.Errorf("Sample of x less than Quantile(0). Case %v.", i)
	break
	}
	continue
	}
	if p == 1 {
	if floats.Max(xs) > x {
	t.Errorf("Sample of x greater than Quantile(1). Case %v.", i)
	break
	}
	}

	// The integral of the PDF between xp and x should be the difference in
	// the quantiles.
	q := quad.Fixed(c.Prob, xp, x, 1000, nil, 0)
	if math.Abs(q-(p-ps[i-1])) > 1e-5 {
	t.Errorf("Integral of PDF doesn't match quantile. Case %v. Want %v, got %v.", i, p-ps[i-1], q)
	break
	}

	pEst := stat.CDF(x, stat.Empirical, xs, nil)
	if math.Abs(pEst-p) > tol {
	t.Errorf("Empirical CDF doesn't match quantile. Case %v.", i)
	}
	xp = x
	}
	}

	// checkProbDiscrete confirms that PDF and Rand are consistent for discrete distributions.
	func checkProbDiscrete(t *testing.T, i int, xs []float64, p probLogprober, tol float64) {
	// Make a map of all of the unique samples.
	m := make(map[float64]int)
	for _, v := range xs {
	m[v]++
	}
	for x, count := range m {
	prob := float64(count) / float64(len(xs))
	if math.Abs(prob-p.Prob(x)) > tol {
	t.Errorf("PDF mismatch case %v at %v: want %v, got %v", i, x, prob, p.Prob(x))
	}
	if math.Abs(math.Log(p.Prob(x))-p.LogProb(x)) > 1e-14 {
	t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", i, x, math.Log(x), p.LogProb(x))
	}
	}
	}

	// dist is a type that implements the standard set of routines.
	type fullDist interface {
	CDF(x float64) float64
	Entropy() float64
	ExKurtosis() float64
	LogProb(x float64) float64
	Mean() float64
	Median() float64
	NumParameters() int
	Prob(x float64) float64
	Quantile(p float64) float64
	Rand() float64
	Skewness() float64
	StdDev() float64
	Survival(x float64) float64
	Variance() float64
	}

	// testFullDist tests all of the functions of a fullDist.
	func testFullDist(t *testing.T, f fullDist, i int, continuous bool) {
	tol := 1e-2
	const n = 1e6
	x := make([]float64, n)
	generateSamples(x, f)
	sort.Float64s(x)

	checkMean(t, i, x, f, tol)
	checkVarAndStd(t, i, x, f, tol)
	checkEntropy(t, i, x, f, tol)
	checkExKurtosis(t, i, x, f, tol)
	checkSkewness(t, i, x, f, tol)
	if continuous {
	// In a discrete distribution, the median may not have positive probability.
	checkMedian(t, i, x, f, tol)
	// In a discrete distribution, the CDF and Quantile may not be perfect mappings.
	checkQuantileCDFSurvival(t, i, x, f, tol)
	// Integrate over the PDF
	checkProbContinuous(t, i, x, f, 1e-10)
	checkProbQuantContinuous(t, i, x, f, tol)
	} else {
	// Check against empirical PDF.
	checkProbDiscrete(t, i, x, f, tol)
	}
	}

	// testRandLogProb tests that LogProb and Rand give consistent results. This
	// can be used when the distribution does not implement CDF.
	func testRandLogProbContinuous(t *testing.T, i int, min float64, x []float64, f LogProber, tol float64, bins int) {
	for cdf := 1 / float64(bins); cdf <= 1-1/float64(bins); cdf += 1 / float64(bins) {
	// Get the estimated CDF from the samples
	pt := stat.Quantile(cdf, stat.Empirical, x, nil)

	prob := func(x float64) float64 {
	return math.Exp(f.LogProb(x))
	}
	// Integrate the PDF to find the CDF
	estCDF := quad.Fixed(prob, min, pt, 1000, nil, 0)
	if !floats.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) {
	t.Errorf("Mismatch between integral of PDF and empirical CDF. Case %v. Want %v, got %v", i, cdf, estCDF)
	}
	}
	}