blob: 31113d8d9cd30d0d230ba5d7192e6ccc41855dca [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package privacy
import (
"config"
"fmt"
"math"
"gonum.org/v1/gonum/integrate/quad"
)
type ErrorCalculator struct {
ParamsCalc PrivacyEncodingParamsCalculator
}
// Public factory method for creating an ErrorCalculator given a
// PrivacyEncodingParamsCalculator.
func NewErrorCalculator(paramsCalc PrivacyEncodingParamsCalculator) *ErrorCalculator {
return &ErrorCalculator{paramsCalc}
}
// Public factory method for creating an ErrorCalculator given the file path
// of the PrivacyEncodingParams.
func NewErrorCalculatorFromPrivacyParams(privacyParamsPath string) (*ErrorCalculator, error) {
paramsCalculator, err := NewPrivacyEncodingParamsCalculator(privacyParamsPath)
if err != nil {
return nil, err
}
errorCalculator := NewErrorCalculator(*paramsCalculator)
if err != nil {
return nil, err
}
return errorCalculator, nil
}
// Given a |metric|, |report|, and |params|, estimates the report row error.
func (e *ErrorCalculator) Estimate(metric *config.MetricDefinition, report *config.ReportDefinition, epsilon float64, population uint64, minDenominatorEstimate uint64) (estimate float64, err error) {
if epsilon == 0 {
return 0, nil
}
sparsity, err := getSparsityForReport(metric, report)
if err != nil {
return -1, err
}
rangeSize, err := GetIntegerRangeSizeForReport(report)
if err != nil {
return -1, err
}
privacyEncodingParams, err := e.ParamsCalc.GetPrivacyEncodingParams(epsilon, population, sparsity, rangeSize)
if err != nil {
return -1, err
}
probBitFlip := privacyEncodingParams.ProbBitFlip
discretization := uint64(privacyEncodingParams.NumIndexPoints)
var errorEstimate float64
switch report.GetReportType() {
case config.ReportDefinition_UNIQUE_DEVICE_HISTOGRAMS:
fallthrough // Calculate RMSE Error for each bucket.
case config.ReportDefinition_UNIQUE_DEVICE_COUNTS:
errorEstimate = singleContributionRapporRMSE(population, probBitFlip)
case config.ReportDefinition_HOURLY_VALUE_HISTOGRAMS:
population *= 24 // Hourly contribution interval: 24*population pseudo-users.
errorEstimate = singleContributionRapporRMSE(population, probBitFlip)
case config.ReportDefinition_FLEETWIDE_OCCURRENCE_COUNTS:
contributionRange := uint64(report.MaxValue - report.MinValue)
if contributionRange <= 0 {
return -1, fmt.Errorf("contribution range must be positive to estimate error")
}
population *= 24 // Hourly contribution interval: 24*population pseudo-users.
errorEstimate = multipleContributionRapporRMSE(population, probBitFlip, contributionRange, discretization)
case config.ReportDefinition_FLEETWIDE_HISTOGRAMS:
contributionRange := report.MaxCount
if contributionRange <= 0 {
return -1, fmt.Errorf("contribution range must be positive to estimate error")
}
population *= 24 // Hourly contribution interval: 24*population pseudo-users.
errorEstimate = multipleContributionRapporRMSE(population, probBitFlip, contributionRange, discretization)
case config.ReportDefinition_UNIQUE_DEVICE_NUMERIC_STATS:
if err := meanReportConfigurationError(report, minDenominatorEstimate); err != nil {
return -1, err
}
errorEstimate = numericStatsRapporRMSE(population, probBitFlip, minDenominatorEstimate, discretization, report)
case config.ReportDefinition_HOURLY_VALUE_NUMERIC_STATS:
if err := meanReportConfigurationError(report, minDenominatorEstimate); err != nil {
return -1, err
}
population *= 24 // Hourly contribution interval: 24*population pseudo-users.
errorEstimate = numericStatsRapporRMSE(population, probBitFlip, minDenominatorEstimate, discretization, report)
case config.ReportDefinition_FLEETWIDE_MEANS:
if err := meanReportConfigurationError(report, minDenominatorEstimate); err != nil {
return -1, err
}
population *= 24 // Hourly contribution interval: 24*population pseudo-users.
errorEstimate, err = fleetwideMeansRapporRMSE(population, probBitFlip, minDenominatorEstimate, discretization, report)
if err != nil {
return -1, err
}
default:
reportType := config.ReportDefinition_ReportType_name[int32(report.GetReportType())]
return -1, fmt.Errorf("error estimation is not supported for reports of type %s", reportType)
}
if math.IsNaN(errorEstimate) || math.IsInf(errorEstimate, 0) {
return errorEstimate, fmt.Errorf("error estimation failed to return valid result due to an invalid or missing field")
}
return errorEstimate, nil
}
// Compute the 2D-RAPPOR RMSE for reports with single contributions.
//
// See Proposition 1 and 2 of go/histogram-aggregation-privacy-guarantee.
func singleContributionRapporRMSE(population uint64, probBitFlip float64) float64 {
return math.Sqrt(float64(population)*probBitFlip*(1-probBitFlip)) / (1 - 2*probBitFlip)
}
// Compute 2D-RAPPOR RMSE for reports with multiple or real-number contributions.
//
// See Proposition 1, 2, and 3 of go/histogram-aggregation-privacy-guarantee.
func multipleContributionRapporRMSE(population uint64, probBitFlip float64, contributionRange uint64, discretization uint64) float64 {
var n = float64(population)
var p = probBitFlip
var m = float64(contributionRange)
var r = float64(discretization)
var estimate = n * p * (1 - p) / math.Pow((1-2*p), 2)
estimate = estimate * (2*math.Pow(r, 3) + 3*math.Pow(r, 2) + r) / 6
if contributionRange > discretization {
// Add rounding error.
estimate += n / 4
}
estimate = m / r * math.Sqrt(estimate)
return estimate
}
// Compute 2D-RAPPOR RMSE for NumericStats reports.
func numericStatsRapporRMSE(population uint64, probBitFlip float64, minDenominatorEstimate uint64, discretization uint64, report *config.ReportDefinition) float64 {
T := float64(math.Max(float64(-report.MinValue), float64(report.MaxValue)))
M := uint64(report.MaxValue - report.MinValue)
sigmaNum := multipleContributionRapporRMSE(population, probBitFlip, M, discretization)
alphaNum := probBitFlip * float64(M) / (1 - 2*probBitFlip)
sigmaDenom := singleContributionRapporRMSE(population, probBitFlip)
alphaDenom := probBitFlip / (1 - 2*probBitFlip)
return eThree(T, float64(minDenominatorEstimate), sigmaNum, alphaNum, sigmaDenom, alphaDenom)
}
// Compute 2D-RAPPOR RMSE for the FleetwideMeans report.
func fleetwideMeansRapporRMSE(population uint64, probBitFlip float64, minDenominatorEstimate uint64, discretization uint64, report *config.ReportDefinition) (float64, error) {
t := report.MaxCount
a := probBitFlip * float64(t) / (1 - 2*probBitFlip)
T := float64(math.Max(float64(-report.MinValue), float64(report.MaxValue)))
// Numerator is aggregated like FleetwideOccurrenceCounts.
numeratorContributionRange := uint64(report.MaxValue - report.MinValue)
rmseNumerator := multipleContributionRapporRMSE(population, probBitFlip, numeratorContributionRange, discretization)
mseNumerator := math.Pow(rmseNumerator, 2)
// Denominator RMSE aggregated like FleetwideHistograms.
sigma := multipleContributionRapporRMSE(population, probBitFlip, t, discretization)
// Find the max over a geometric sequence of minDenominator > minDenominatorEstimate.
ratio := 1.5
maxSequence := 20
previousError := 0.0
for n := 0; n < maxSequence; n++ {
minDenominator := float64(minDenominatorEstimate) * math.Pow(ratio, float64(n))
currentError := meanRapporRMSE(mseNumerator, sigma, a, T, minDenominator)
// Assume meanRapporRMSE is concave. This is not proven but is observed for
// significant values of minDenominatorEstimate (roughly estimate > 500)
if currentError <= previousError {
return previousError, nil
}
previousError = currentError
}
// No maximum found
return -1, fmt.Errorf("maximum not found after %v iterations", maxSequence)
}
func meanRapporRMSE(mseNum float64, sigma float64, a float64, T float64, B float64) float64 {
return math.Sqrt(mseNum*eOne(B, sigma, a, 1) +
math.Pow(T, 2)*eTwo(B, sigma, a, 1) +
math.Pow(T/B, 2)*math.Pow(sigma, 2))
}
// E1 term defined in Lemma 10.
func eOne(B float64, sigma float64, a float64, l float64) float64 {
f := func(y float64) float64 {
return (2 / math.Pow(B+y, 3)) *
math.Exp(-math.Pow(sigma, 2)/
math.Pow(a, 2)*
h(-a*y/math.Pow(sigma, 2)))
}
return 1/math.Pow(B, 2) + quad.Fixed(f, l-B, 0, 10000, nil, 0)
}
// E2 term defined in Lemma 10.
func eTwo(B float64, sigma float64, a float64, l float64) float64 {
f := func(y float64) float64 {
return (2 * B * y /
math.Pow(B+y, 3)) *
math.Exp(-math.Pow(sigma, 2)/
math.Pow(a, 2)*
h(-a*y/math.Pow(sigma, 2)))
}
return -quad.Fixed(f, l-B, 0, 10000, nil, 0)
}
func eThree(T float64, B float64, sigmaNum float64, alphaNum float64, sigmaDenom float64, alphaDenom float64) float64 {
f := func(y float64) float64 {
tOne := math.Sqrt(y) / (T + 1)
tTwo := math.Sqrt(y) * B / (math.Sqrt(y) + T + 1)
t := math.Max(tOne, tTwo)
return 2*math.Exp(-math.Pow(sigmaNum, 2)/math.Pow(alphaNum, 2)*
h(alphaNum*t/math.Pow(sigmaNum, 2))) +
2*math.Exp(-math.Pow(sigmaDenom, 2)/math.Pow(alphaDenom, 2)*
h(alphaDenom*t/math.Pow(sigmaDenom, 2)))
}
return quad.Fixed(f, 0, math.Inf(1), 10000, nil, 0)
}
func h(u float64) float64 {
return (1+u)*math.Log(1+u) - u
}
func meanReportConfigurationError(report *config.ReportDefinition, minDenominatorEstimate uint64) error {
if minDenominatorEstimate == 0 {
return fmt.Errorf("user estimate for lower bound on unnoised denominator required for %s", report.GetReportType())
}
if report.MaxValue == 0 && report.MinValue == 0 {
return fmt.Errorf("MinValue and MaxValue required to estimate error for %s", report.GetReportType())
}
return nil
}