[Cobalt 1.1 privacy] Calculate privacy params for StringCounts reports
Adds support for StringCounts reports to the registry parser's
privacy noise parameter calculator.
The sparsity of a StringCounts report is the smaller of:
- the total number of cells in a CountMin sketch for the report
(currently hard-coded to 50: 5 hashes with 10 cells per hash)
- the parent metric's string_buffer_max (if set) times the
number of hashes in a CountMin sketch for the report.
Change-Id: I61e1b5db0a298841cc9f4ed35a105146b572e890
Reviewed-on: https://fuchsia-review.googlesource.com/c/cobalt/+/507063
Fuchsia-Auto-Submit: Laura Peskin <pesk@google.com>
Commit-Queue: Auto-Submit <auto-submit@fuchsia-infra.iam.gserviceaccount.com>
Reviewed-by: Alexandre Zani <azani@google.com>
diff --git a/src/bin/config_parser/src/config_parser/populate_privacy_params_test.go b/src/bin/config_parser/src/config_parser/populate_privacy_params_test.go
index dbbe5f6..d790517 100644
--- a/src/bin/config_parser/src/config_parser/populate_privacy_params_test.go
+++ b/src/bin/config_parser/src/config_parser/populate_privacy_params_test.go
@@ -31,6 +31,8 @@
// points given by |testParamRecords|.
func TestPopulateParamsForReport(t *testing.T) {
var eventCodeBufferMax uint64 = 10
+ var smallEventCodeBufferMax uint64 = 2
+ var stringBufferMax uint32 = 1
var minValue int64 = 0
var maxValue int64 = 100
@@ -42,8 +44,10 @@
EventCodeBufferMax: eventCodeBufferMax,
}
stringMetric := config.MetricDefinition{
- MetricName: "StringMetric",
- MetricType: config.MetricDefinition_STRING,
+ MetricName: "StringMetric",
+ MetricType: config.MetricDefinition_STRING,
+ EventCodeBufferMax: smallEventCodeBufferMax,
+ StringBufferMax: stringBufferMax,
}
highPrivacyFleetwideOccurrenceCountsReport := config.ReportDefinition{
ReportName: "HighPrivacyFleetwideOccurrenceCounts",
@@ -102,7 +106,6 @@
}
var tests = []struct {
input args
- valid bool
expected expectedParams
}{
// Valid inputs:
@@ -110,30 +113,26 @@
// population is 15000.
//
// The best-match record has key (1.0, 10000, 10).
- {args{&occurrenceMetric, &highPrivacyFleetwideOccurrenceCountsReport}, true, expectedParams{0.019537480548024178, 4}},
+ {args{&occurrenceMetric, &highPrivacyFleetwideOccurrenceCountsReport}, expectedParams{0.019537480548024178, 4}},
// The best-match record has key (5.0, 10000, 10).
- {args{&occurrenceMetric, &mediumPrivacyFleetwideOccurrenceCountsReport}, true, expectedParams{0.0014028092846274376, 10}},
+ {args{&occurrenceMetric, &mediumPrivacyFleetwideOccurrenceCountsReport}, expectedParams{0.0014028092846274376, 10}},
// The best-match record has key (10.0, 10000, 10).
- {args{&occurrenceMetric, &lowPrivacyFleetwideOccurrenceCountsReport}, true, expectedParams{0.0008652620017528534, 12}},
+ {args{&occurrenceMetric, &lowPrivacyFleetwideOccurrenceCountsReport}, expectedParams{0.0008652620017528534, 12}},
+ // The best-match record has key (1.0, 10000, 10).
+ {args{&stringMetric, &highPrivacyStringCountsReport}, expectedParams{0.019537480548024178, 4}},
// The reports have no added privacy, so both ProbBitFlip and NumIndexPoints should remain 0.
// STRING_COUNTS with no added privacy are supported.
- {args{&occurrenceMetric, &noAddedPrivacyFleetwideOccurrenceCountsReport}, true, expectedParams{0.0, 0}},
- {args{&stringMetric, &noAddedPrivacyStringCountsReport}, true, expectedParams{0.0, 0}},
+ {args{&occurrenceMetric, &noAddedPrivacyFleetwideOccurrenceCountsReport}, expectedParams{0.0, 0}},
+ {args{&stringMetric, &noAddedPrivacyStringCountsReport}, expectedParams{0.0, 0}},
// The report has no privacy level set, so both ProbBitFlip and NumIndexPoints should remain 0.
// (All real reports should have a privacy level, enforced by the config validator.
// However, we want to allow test reports with an unset privacy level.)
- {args{&occurrenceMetric, &unsetPrivacyLevelReport}, true, expectedParams{0.0, 0}},
- //
- // Invalid inputs:
- // STRING_COUNTS reports with added privacy are not supported yet.
- {args{&stringMetric, &highPrivacyStringCountsReport}, false, expectedParams{}},
+ {args{&occurrenceMetric, &unsetPrivacyLevelReport}, expectedParams{0.0, 0}},
}
for _, test := range tests {
err := populateParamsForReport(calc, test.input.metric, test.input.report)
- if test.valid && err != nil {
+ if err != nil {
t.Errorf("populateParamsForReport() failed for report %s: %v", test.input.report.ReportName, err)
- } else if !test.valid && err == nil {
- t.Errorf("populateParamsForReport() accepted invalid input: metric %s, report %s", test.input.metric.MetricName, test.input.report.ReportName)
} else {
if test.input.report.ProbBitFlip != test.expected.probBitFlip {
t.Errorf("populateParamsForReport() wrote incorrect ProbBitFlip for report %s: expected %f, got %f",
diff --git a/src/bin/config_parser/src/privacy/privacy_encoding_params.go b/src/bin/config_parser/src/privacy/privacy_encoding_params.go
index 7b8f5f1..8e2c577 100644
--- a/src/bin/config_parser/src/privacy/privacy_encoding_params.go
+++ b/src/bin/config_parser/src/privacy/privacy_encoding_params.go
@@ -411,7 +411,15 @@
case config.MetricDefinition_INTEGER_HISTOGRAM:
numBuckets, err = getNumHistogramBuckets(metric.IntBuckets)
case config.MetricDefinition_STRING:
- numBuckets, err = numBuckets, fmt.Errorf("STRING metrics are not supported yet")
+ numCellsPerHash, numHashes, err := getCountMinSketchDimensionsForReport(report)
+ if err != nil {
+ return numBuckets, err
+ }
+ if metric.StringBufferMax != 0 && uint64(metric.StringBufferMax) < numCellsPerHash {
+ numBuckets, err = uint64(metric.StringBufferMax)*numHashes, nil
+ } else {
+ numBuckets, err = numCellsPerHash*numHashes, nil
+ }
default:
err = fmt.Errorf("unsupported metric type %v", metric.MetricType)
}
@@ -433,6 +441,17 @@
return numBuckets, err
}
+// Returns the dimensions of a CountMin sketch for a report of type StringCounts, or an error if
+// the report is of a different type. Currently these dimensions are hard-coded.
+func getCountMinSketchDimensionsForReport(report *config.ReportDefinition) (numCellsPerHash uint64, numHashes uint64, err error) {
+ if report.ReportType != config.ReportDefinition_STRING_COUNTS {
+ return numCellsPerHash, numHashes, fmt.Errorf("expected report of type StringCounts, found %v", report.ReportType)
+ }
+ numCellsPerHash = 10
+ numHashes = 5
+ return numCellsPerHash, numHashes, nil
+}
+
// Returns the paramsMapKey{e, p, s} with the following properties:
// - |e| is the greatest mapped epsilon value which is *less than or equal* to |epsilon|
// - |p| is the greatest mapped population value which is *less than or equal to* |population|
diff --git a/src/bin/config_parser/src/privacy/privacy_encoding_params_test.go b/src/bin/config_parser/src/privacy/privacy_encoding_params_test.go
index d084d70..68fafb2 100644
--- a/src/bin/config_parser/src/privacy/privacy_encoding_params_test.go
+++ b/src/bin/config_parser/src/privacy/privacy_encoding_params_test.go
@@ -230,6 +230,10 @@
var numLinearBuckets uint32 = 7
var maxEventCode uint32 = 2
+ // The hard-coded dimensions of a CountMin sketch for StringCounts reports.
+ var numCellsPerHash uint64 = 10
+ var numHashes uint64 = 5
+
linearBuckets := config.LinearIntegerBuckets{NumBuckets: numLinearBuckets}
buckets := config.IntegerBuckets{
Buckets: &config.IntegerBuckets_Linear{&linearBuckets},
@@ -242,23 +246,33 @@
// Metrics
occurrenceMetric := config.MetricDefinition{
+ MetricName: "OccurrenceMetric",
MetricType: config.MetricDefinition_OCCURRENCE,
EventCodeBufferMax: eventCodeBufferMax,
}
occurrenceMetricEventCodeBufferMaxUnset := config.MetricDefinition{
+ MetricName: "OccurenceMetricEventCodeBufferMaxUnset",
MetricType: config.MetricDefinition_OCCURRENCE,
MetricDimensions: []*config.MetricDefinition_MetricDimension{&dimension},
}
integerMetric := config.MetricDefinition{
+ MetricName: "IntegerMetric",
MetricType: config.MetricDefinition_INTEGER,
EventCodeBufferMax: eventCodeBufferMax,
}
integerHistogramMetric := config.MetricDefinition{
+ MetricName: "IntegerHistogramMetric",
MetricType: config.MetricDefinition_INTEGER_HISTOGRAM,
EventCodeBufferMax: eventCodeBufferMax,
IntBuckets: &buckets,
}
- stringMetric := config.MetricDefinition{
+ stringMetricNoStringBufferMax := config.MetricDefinition{
+ MetricName: "StringMetricNoStringBufferMax",
+ MetricType: config.MetricDefinition_STRING,
+ EventCodeBufferMax: eventCodeBufferMax,
+ }
+ stringMetricWithStringBufferMax := config.MetricDefinition{
+ MetricName: "StringMetricWithStringBufferMax",
MetricType: config.MetricDefinition_STRING,
EventCodeBufferMax: eventCodeBufferMax,
StringBufferMax: stringBufferMax,
@@ -350,11 +364,13 @@
{args{&integerMetric, &uniqueDeviceNumericStatsReport}, true, eventCodeBufferMax},
{args{&integerMetric, &hourlyValueNumericStatsReport}, true, eventCodeBufferMax},
- {args{&integerHistogramMetric, &fleetwideHistogramsForIntHistogramReport}, true, eventCodeBufferMax * uint64(numLinearBuckets)},
+ {args{&integerHistogramMetric, &fleetwideHistogramsForIntHistogramReport}, true,
+ eventCodeBufferMax * uint64(numLinearBuckets)},
+
+ {args{&stringMetricNoStringBufferMax, &stringCountsReport}, true, eventCodeBufferMax * numCellsPerHash * numHashes},
+ {args{&stringMetricWithStringBufferMax, &stringCountsReport}, true, eventCodeBufferMax * uint64(stringBufferMax) * numHashes},
// Invalid input:
- // STRING metrics are not supported yet.
- {args{&stringMetric, &stringCountsReport}, false, 0},
// This report does not have a report type set.
{args{&occurrenceMetric, &unsetReportTypeReport}, false, 0},
}
@@ -365,7 +381,8 @@
} else if !test.valid && err == nil {
t.Errorf("getSparsityForReport() accepted invalid report: %s", test.input.report.ReportName)
} else if test.valid && result != test.expected {
- t.Errorf("getSparsityForReport() for report %s: expected %d, got %d", test.input.report.ReportName, test.expected, result)
+ t.Errorf("getSparsityForReport() for metric %s and report %s: expected %d, got %d",
+ test.input.metric.MetricName, test.input.report.ReportName, test.expected, result)
}
}
}