Fix bugs in error estimation formulas
This fixes CL fixes three issues in the error estimation formulas.
go/tq-cobalt-error-formulae
1. FLEETWIDE_OCCURRENCE_COUNTS incorrectly used the MaxCount field as
an upper bound on the user's contribution. It has been updated to be
the difference between MinValue and MaxValue.
2. FLEETWIDE_HISTOGRAMS incorrectly assumed a user's contribution was in
[0, 1]. This has been updated to account for user contributions up to a
value of MaxCount.
3. FLEETWIDE_MEANS fully updated to match new formula.
Change-Id: I0969eba26cf5bd4e6de6dbd71e04d2b801dcdb84
Reviewed-on: https://fuchsia-review.googlesource.com/c/cobalt/+/472819
Commit-Queue: Jared Weinstein <jaredweinstein@google.com>
Reviewed-by: Pasin Manurangsi <pasin@google.com>
diff --git a/src/bin/config_parser/src/privacy/error_calculator.go b/src/bin/config_parser/src/privacy/error_calculator.go
index f0e9c50..6c78165 100644
--- a/src/bin/config_parser/src/privacy/error_calculator.go
+++ b/src/bin/config_parser/src/privacy/error_calculator.go
@@ -50,18 +50,22 @@
}
// For report types with an hourly contribution interval, the population is set to (24*population) pseudo-users.
+ probBitFlip := privacyEncodingParams.ProbBitFlip
+ discretization := uint64(privacyEncodingParams.NumIndexPoints)
var errorEstimate float64
switch report.GetReportType() {
case config.ReportDefinition_UNIQUE_DEVICE_HISTOGRAMS:
fallthrough // Calculate RMSE Error for each bucket.
case config.ReportDefinition_UNIQUE_DEVICE_COUNTS:
- errorEstimate = SingleContributionRapporRMSE(population, privacyEncodingParams)
+ errorEstimate = SingleContributionRapporRMSE(population, probBitFlip)
case config.ReportDefinition_HOURLY_VALUE_HISTOGRAMS:
- errorEstimate = SingleContributionRapporRMSE(24*population, privacyEncodingParams)
+ errorEstimate = SingleContributionRapporRMSE(24*population, probBitFlip)
case config.ReportDefinition_FLEETWIDE_OCCURRENCE_COUNTS:
- errorEstimate = MultipleContributionRapporRMSE(24*population, privacyEncodingParams, report.MaxCount)
+ contributionRange := uint64(report.MaxValue - report.MinValue)
+ errorEstimate = MultipleContributionRapporRMSE(24*population, probBitFlip, contributionRange, discretization)
case config.ReportDefinition_FLEETWIDE_HISTOGRAMS:
- errorEstimate = MultipleContributionRapporRMSE(24*population, privacyEncodingParams, 1)
+ contributionRange := report.MaxCount
+ errorEstimate = MultipleContributionRapporRMSE(24*population, probBitFlip, contributionRange, discretization)
// TODO(jaredweinstein): Enable NumericStats once the formula is resolved.
// case config.ReportDefinition_UNIQUE_DEVICE_NUMERIC_STATS:
// if err := meanReportConfigurationError(report, minDenominatorEstimate); err != nil {
@@ -77,7 +81,7 @@
if err := meanReportConfigurationError(report, minDenominatorEstimate); err != nil {
return -1, err
}
- errorEstimate = FleetwideMeansRapporRMSE(24*population, privacyEncodingParams, minDenominatorEstimate, report)
+ errorEstimate = FleetwideMeansRapporRMSE(24*population, probBitFlip, minDenominatorEstimate, discretization, report)
default:
reportType := config.ReportDefinition_ReportType_name[int32(report.GetReportType())]
return -1, fmt.Errorf("Error estimation is not supported for reports of type %s.", reportType)
@@ -92,19 +96,18 @@
// Compute the 2D-RAPPOR RMSE for reports with single contributions.
//
// See Proposition 1 and 2 of go/histogram-aggregation-privacy-guarantee.
-func SingleContributionRapporRMSE(population uint64, privacyParams PrivacyEncodingParams) float64 {
- probBitFlip := privacyParams.ProbBitFlip
+func SingleContributionRapporRMSE(population uint64, probBitFlip float64) float64 {
return math.Sqrt(float64(population)*probBitFlip*(1-probBitFlip)) / (1 - 2*probBitFlip)
}
// Compute 2D-RAPPOR RMSE for reports with multiple or real-number contributions.
//
// See Proposition 1, 2, and 3 of go/histogram-aggregation-privacy-guarantee.
-func MultipleContributionRapporRMSE(population uint64, privacyParams PrivacyEncodingParams, maxUserContribution uint64) float64 {
+func MultipleContributionRapporRMSE(population uint64, probBitFlip float64, contributionRange uint64, discretization uint64) float64 {
var n = float64(population)
- var p = privacyParams.ProbBitFlip
- var m = float64(maxUserContribution)
- var r = float64(privacyParams.NumIndexPoints)
+ var p = probBitFlip
+ var m = float64(contributionRange)
+ var r = float64(discretization)
var estimate = n * p * (1 - p) / math.Pow((1-2*p), 2)
estimate = estimate * (2*math.Pow(r, 3) + 3*math.Pow(r, 2) + r) / 6
@@ -130,28 +133,26 @@
// return meanRapporRMSE(mseNumerator, mseDenominator, sigma, a, float64(M), float64(minDenominatorEstimate))
// }
-func FleetwideMeansRapporRMSE(population uint64, params PrivacyEncodingParams, minDenominatorEstimate uint64, report *config.ReportDefinition) float64 {
- t := float64(report.MaxCount)
- sigma := SingleContributionRapporRMSE(population, params) *
- math.Sqrt(float64(t*(t+1)*(2*t+1)/6))
- a := params.ProbBitFlip * t / (1 - 2*params.ProbBitFlip)
- M := uint64(math.Max(math.Abs(float64(report.MinValue)), math.Abs(float64(report.MaxValue))))
+func FleetwideMeansRapporRMSE(population uint64, probBitFlip float64, minDenominatorEstimate uint64, discretization uint64, report *config.ReportDefinition) float64 {
+ t := report.MaxCount
+ a := probBitFlip * float64(t) / (1 - 2*probBitFlip)
+ T := float64(math.Max(float64(-report.MinValue), float64(report.MaxValue)))
// Numerator is aggregated like FleetwideOccurrenceCounts.
- rmseNumerator := MultipleContributionRapporRMSE(population, params, M)
+ numeratorContributionRange := uint64(report.MaxValue - report.MinValue)
+ rmseNumerator := MultipleContributionRapporRMSE(population, probBitFlip, numeratorContributionRange, discretization)
mseNumerator := math.Pow(rmseNumerator, 2)
- // Denominator is aggregated like UniqueDeviceCounts.
- rmseDenominator := SingleContributionRapporRMSE(population, params)
- mseDenominator := math.Pow(rmseDenominator, 2)
+ // Denominator RMSE aggregated like FleetwideHistograms.
+ sigma := MultipleContributionRapporRMSE(population, probBitFlip, t, discretization)
- return meanRapporRMSE(mseNumerator, mseDenominator, sigma, a, float64(M), float64(minDenominatorEstimate))
+ return meanRapporRMSE(mseNumerator, sigma, a, T, float64(minDenominatorEstimate))
}
-func meanRapporRMSE(mseNum float64, mseDenom float64, sigma float64, a float64, T float64, B float64) float64 {
+func meanRapporRMSE(mseNum float64, sigma float64, a float64, T float64, B float64) float64 {
return math.Sqrt(mseNum*eOne(B, sigma, a, 1) +
math.Pow(T, 2)*eTwo(B, sigma, a, 1) +
- math.Pow(T/B, 2)*mseDenom)
+ math.Pow(T/B, 2)*math.Pow(sigma, 2))
}
// E1 term defined in Lemma 10.
diff --git a/src/bin/config_parser/src/privacy/error_calculator_test.go b/src/bin/config_parser/src/privacy/error_calculator_test.go
index b7c8736..96700ad 100644
--- a/src/bin/config_parser/src/privacy/error_calculator_test.go
+++ b/src/bin/config_parser/src/privacy/error_calculator_test.go
@@ -48,18 +48,34 @@
ReportName: "FleetwideOccurrenceCounts",
ReportType: config.ReportDefinition_FLEETWIDE_OCCURRENCE_COUNTS,
LocalAggregationProcedure: config.ReportDefinition_SELECT_FIRST,
- MaxCount: 1,
+ MaxValue: 5,
+ MinValue: 1,
}
fleetwideOccurrenceCountHighMax := config.ReportDefinition{
- ReportName: "FleetwideOccurrenceCountsWithHighMaxCount",
+ ReportName: "FleetwideOccurrenceCountsWithHighMaxValue",
ReportType: config.ReportDefinition_FLEETWIDE_OCCURRENCE_COUNTS,
LocalAggregationProcedure: config.ReportDefinition_SELECT_FIRST,
- MaxCount: 4,
+ MaxValue: 20,
+ MinValue: 1,
+ }
+ fleetwideOccurrenceCountNegativeValue := config.ReportDefinition{
+ ReportName: "FleetwideOccurrenceCountsNegativeValue",
+ ReportType: config.ReportDefinition_FLEETWIDE_OCCURRENCE_COUNTS,
+ LocalAggregationProcedure: config.ReportDefinition_SELECT_FIRST,
+ MaxValue: -1,
+ MinValue: -5,
}
fleetwideHistogram := config.ReportDefinition{
ReportName: "FleetwideHistograms",
ReportType: config.ReportDefinition_FLEETWIDE_HISTOGRAMS,
LocalAggregationProcedure: config.ReportDefinition_SELECT_FIRST,
+ MaxCount: 5,
+ }
+ fleetwideHistogramHighMax := config.ReportDefinition{
+ ReportName: "FleetwideHistogramsWithHighMaxCount",
+ ReportType: config.ReportDefinition_FLEETWIDE_HISTOGRAMS,
+ LocalAggregationProcedure: config.ReportDefinition_SELECT_FIRST,
+ MaxCount: 20,
}
// uniqueDeviceNumericStats := config.ReportDefinition{
// ReportName: "UniqueDeviceNumericStats",
@@ -78,14 +94,16 @@
fleetwideMeans := config.ReportDefinition{
ReportName: "FleetwideMeans",
ReportType: config.ReportDefinition_FLEETWIDE_MEANS,
- MaxValue: 1,
- MaxCount: 1,
+ MinValue: 0,
+ MaxValue: 5,
+ MaxCount: 5,
}
fleetwideMeansHighMaxCount := config.ReportDefinition{
ReportName: "FleetwideMeansHighMaxCount",
ReportType: config.ReportDefinition_FLEETWIDE_MEANS,
- MaxValue: 1,
- MaxCount: 4,
+ MinValue: 0,
+ MaxValue: 5,
+ MaxCount: 20,
}
fleetwideMeansMissingMaxValue := config.ReportDefinition{
ReportName: "FleetwideMeansMissingMaxValue",
@@ -119,13 +137,15 @@
{args{&testMetric, &hourlyValueHistogram, 1, 20000, 0}, true, 34.19422624116276},
// Multi-contribution reports
- {args{&testMetric, &fleetwideOccurrenceCount, 1, 10000, 0}, true, 52.893723859652496},
- {args{&testMetric, &fleetwideOccurrenceCount, 10, 10000, 0}, true, 37.430720721998675},
- {args{&testMetric, &fleetwideOccurrenceCount, 1, 20000, 0}, true, 74.80302164673793},
- {args{&testMetric, &fleetwideOccurrenceCountHighMax, 1, 10000, 0}, true, 211.57489543860999},
- {args{&testMetric, &fleetwideHistogram, 1, 10000, 0}, true, 52.893723859652496},
- {args{&testMetric, &fleetwideHistogram, 10, 10000, 0}, true, 37.430720721998675},
- {args{&testMetric, &fleetwideHistogram, 1, 20000, 0}, true, 74.80302164673793},
+ {args{&testMetric, &fleetwideOccurrenceCount, 1, 10000, 0}, true, 211.57489543860999},
+ {args{&testMetric, &fleetwideOccurrenceCount, 10, 10000, 0}, true, 149.7228828879947},
+ {args{&testMetric, &fleetwideOccurrenceCount, 1, 20000, 0}, true, 299.2120865869517},
+ {args{&testMetric, &fleetwideOccurrenceCountHighMax, 1, 10000, 0}, true, 1004.9807533333975},
+ {args{&testMetric, &fleetwideOccurrenceCountNegativeValue, 1, 10000, 0}, true, 211.57489543860999},
+ {args{&testMetric, &fleetwideHistogram, 1, 10000, 0}, true, 264.4686192982625},
+ {args{&testMetric, &fleetwideHistogram, 10, 10000, 0}, true, 187.15360360999338},
+ {args{&testMetric, &fleetwideHistogram, 1, 20000, 0}, true, 374.0151082336897},
+ {args{&testMetric, &fleetwideHistogramHighMax, 1, 10000, 0}, true, 1057.87447719305},
// Mean reports
// {args{&testMetric, &uniqueDeviceNumericStats, 1, 10000, 500}, true, 0.027891803930152177},
@@ -136,17 +156,17 @@
// {args{&testMetric, &hourlyValueNumericStats, 10, 10000, 500}, true, 0.09424308127635954},
// {args{&testMetric, &hourlyValueNumericStats, 1, 20000, 500}, true, 0.20996453488325503},
// {args{&testMetric, &hourlyValueNumericStats, 1, 10000, 2000}, true, 0.03425744055252169},
- {args{&testMetric, &fleetwideMeans, 1, 10000, 500}, true, 0.14403990551821316},
- {args{&testMetric, &fleetwideMeans, 10, 10000, 500}, true, 0.09424308127635954},
- {args{&testMetric, &fleetwideMeans, 1, 20000, 500}, true, 0.20996453488325503},
- {args{&testMetric, &fleetwideMeans, 1, 10000, 2000}, true, 0.03425744055252169},
- {args{&testMetric, &fleetwideMeansHighMaxCount, 1, 10000, 500}, true, 14.67564563395404},
- {args{&testMetric, &fleetwideMeansHighMaxCount, 10, 10000, 500}, true, 0.3612570115350953},
- {args{&testMetric, &fleetwideMeansHighMaxCount, 1, 20000, 500}, true, 86.18119817858194},
- {args{&testMetric, &fleetwideMeansHighMaxCount, 1, 10000, 2000}, true, 0.11189363618734707},
+
+ {args{&testMetric, &fleetwideMeans, 1, 10000, 500}, true, 1034.0131324817698},
+ {args{&testMetric, &fleetwideMeans, 10, 10000, 500}, true, 426.22967217469716},
+ {args{&testMetric, &fleetwideMeans, 1, 20000, 500}, true, 1619.5676003943095},
+ {args{&testMetric, &fleetwideMeans, 1, 10000, 2000}, true, 1.4456690853472534},
+ {args{&testMetric, &fleetwideMeansHighMaxCount, 1, 10000, 500}, true, 2373.764851498005},
+ {args{&testMetric, &fleetwideMeansHighMaxCount, 10, 10000, 500}, true, 2239.957602630985},
+ {args{&testMetric, &fleetwideMeansHighMaxCount, 1, 20000, 500}, true, 2453.988452255676},
+ {args{&testMetric, &fleetwideMeansHighMaxCount, 1, 10000, 2000}, true, 4098.700976333229},
// Invalid input
- // Mean Report missing minDenominatorEstimate
// {args{&testMetric, &uniqueDeviceNumericStats, 1, 10000, 0}, false, 0},
// {args{&testMetric, &hourlyValueNumericStats, 1, 10000, 0}, false, 0},
// {args{&testMetric, &uniqueDeviceNumericStatsMissingMaxValue, 1, 10000, 500}, false, 0},