[Cobalt 1.1 privacy] Calculate privacy params for StringCounts reports

Adds support for StringCounts reports to the registry parser's
privacy noise parameter calculator.

The sparsity of a StringCounts report is the smaller of:
- the total number of cells in a CountMin sketch for the report
  (currently hard-coded to 50: 5 hashes with 10 cells per hash)
- the parent metric's string_buffer_max (if set) times the
  number of hashes in a CountMin sketch for the report.

Change-Id: I61e1b5db0a298841cc9f4ed35a105146b572e890
Reviewed-on: https://fuchsia-review.googlesource.com/c/cobalt/+/507063
Fuchsia-Auto-Submit: Laura Peskin <pesk@google.com>
Commit-Queue: Auto-Submit <auto-submit@fuchsia-infra.iam.gserviceaccount.com>
Reviewed-by: Alexandre Zani <azani@google.com>
diff --git a/src/bin/config_parser/src/config_parser/populate_privacy_params_test.go b/src/bin/config_parser/src/config_parser/populate_privacy_params_test.go
index dbbe5f6..d790517 100644
--- a/src/bin/config_parser/src/config_parser/populate_privacy_params_test.go
+++ b/src/bin/config_parser/src/config_parser/populate_privacy_params_test.go
@@ -31,6 +31,8 @@
 // points given by |testParamRecords|.
 func TestPopulateParamsForReport(t *testing.T) {
 	var eventCodeBufferMax uint64 = 10
+	var smallEventCodeBufferMax uint64 = 2
+	var stringBufferMax uint32 = 1
 
 	var minValue int64 = 0
 	var maxValue int64 = 100
@@ -42,8 +44,10 @@
 		EventCodeBufferMax: eventCodeBufferMax,
 	}
 	stringMetric := config.MetricDefinition{
-		MetricName: "StringMetric",
-		MetricType: config.MetricDefinition_STRING,
+		MetricName:         "StringMetric",
+		MetricType:         config.MetricDefinition_STRING,
+		EventCodeBufferMax: smallEventCodeBufferMax,
+		StringBufferMax:    stringBufferMax,
 	}
 	highPrivacyFleetwideOccurrenceCountsReport := config.ReportDefinition{
 		ReportName:   "HighPrivacyFleetwideOccurrenceCounts",
@@ -102,7 +106,6 @@
 	}
 	var tests = []struct {
 		input    args
-		valid    bool
 		expected expectedParams
 	}{
 		// Valid inputs:
@@ -110,30 +113,26 @@
 		// population is 15000.
 		//
 		// The best-match record has key (1.0, 10000, 10).
-		{args{&occurrenceMetric, &highPrivacyFleetwideOccurrenceCountsReport}, true, expectedParams{0.019537480548024178, 4}},
+		{args{&occurrenceMetric, &highPrivacyFleetwideOccurrenceCountsReport}, expectedParams{0.019537480548024178, 4}},
 		// The best-match record has key (5.0, 10000, 10).
-		{args{&occurrenceMetric, &mediumPrivacyFleetwideOccurrenceCountsReport}, true, expectedParams{0.0014028092846274376, 10}},
+		{args{&occurrenceMetric, &mediumPrivacyFleetwideOccurrenceCountsReport}, expectedParams{0.0014028092846274376, 10}},
 		// The best-match record has key (10.0, 10000, 10).
-		{args{&occurrenceMetric, &lowPrivacyFleetwideOccurrenceCountsReport}, true, expectedParams{0.0008652620017528534, 12}},
+		{args{&occurrenceMetric, &lowPrivacyFleetwideOccurrenceCountsReport}, expectedParams{0.0008652620017528534, 12}},
+		// The best-match record has key (1.0, 10000, 10).
+		{args{&stringMetric, &highPrivacyStringCountsReport}, expectedParams{0.019537480548024178, 4}},
 		// The reports have no added privacy, so both ProbBitFlip and NumIndexPoints should remain 0.
 		// STRING_COUNTS with no added privacy are supported.
-		{args{&occurrenceMetric, &noAddedPrivacyFleetwideOccurrenceCountsReport}, true, expectedParams{0.0, 0}},
-		{args{&stringMetric, &noAddedPrivacyStringCountsReport}, true, expectedParams{0.0, 0}},
+		{args{&occurrenceMetric, &noAddedPrivacyFleetwideOccurrenceCountsReport}, expectedParams{0.0, 0}},
+		{args{&stringMetric, &noAddedPrivacyStringCountsReport}, expectedParams{0.0, 0}},
 		// The report has no privacy level set, so both ProbBitFlip and NumIndexPoints should remain 0.
 		// (All real reports should have a privacy level, enforced by the config validator.
 		// However, we want to allow test reports with an unset privacy level.)
-		{args{&occurrenceMetric, &unsetPrivacyLevelReport}, true, expectedParams{0.0, 0}},
-		//
-		// Invalid inputs:
-		// STRING_COUNTS reports with added privacy are not supported yet.
-		{args{&stringMetric, &highPrivacyStringCountsReport}, false, expectedParams{}},
+		{args{&occurrenceMetric, &unsetPrivacyLevelReport}, expectedParams{0.0, 0}},
 	}
 	for _, test := range tests {
 		err := populateParamsForReport(calc, test.input.metric, test.input.report)
-		if test.valid && err != nil {
+		if err != nil {
 			t.Errorf("populateParamsForReport() failed for report %s: %v", test.input.report.ReportName, err)
-		} else if !test.valid && err == nil {
-			t.Errorf("populateParamsForReport() accepted invalid input: metric %s, report %s", test.input.metric.MetricName, test.input.report.ReportName)
 		} else {
 			if test.input.report.ProbBitFlip != test.expected.probBitFlip {
 				t.Errorf("populateParamsForReport() wrote incorrect ProbBitFlip for report %s: expected %f, got %f",
diff --git a/src/bin/config_parser/src/privacy/privacy_encoding_params.go b/src/bin/config_parser/src/privacy/privacy_encoding_params.go
index 7b8f5f1..8e2c577 100644
--- a/src/bin/config_parser/src/privacy/privacy_encoding_params.go
+++ b/src/bin/config_parser/src/privacy/privacy_encoding_params.go
@@ -411,7 +411,15 @@
 	case config.MetricDefinition_INTEGER_HISTOGRAM:
 		numBuckets, err = getNumHistogramBuckets(metric.IntBuckets)
 	case config.MetricDefinition_STRING:
-		numBuckets, err = numBuckets, fmt.Errorf("STRING metrics are not supported yet")
+		numCellsPerHash, numHashes, err := getCountMinSketchDimensionsForReport(report)
+		if err != nil {
+			return numBuckets, err
+		}
+		if metric.StringBufferMax != 0 && uint64(metric.StringBufferMax) < numCellsPerHash {
+			numBuckets, err = uint64(metric.StringBufferMax)*numHashes, nil
+		} else {
+			numBuckets, err = numCellsPerHash*numHashes, nil
+		}
 	default:
 		err = fmt.Errorf("unsupported metric type %v", metric.MetricType)
 	}
@@ -433,6 +441,17 @@
 	return numBuckets, err
 }
 
+// Returns the dimensions of a CountMin sketch for a report of type StringCounts, or an error if
+// the report is of a different type. Currently these dimensions are hard-coded.
+func getCountMinSketchDimensionsForReport(report *config.ReportDefinition) (numCellsPerHash uint64, numHashes uint64, err error) {
+	if report.ReportType != config.ReportDefinition_STRING_COUNTS {
+		return numCellsPerHash, numHashes, fmt.Errorf("expected report of type StringCounts, found %v", report.ReportType)
+	}
+	numCellsPerHash = 10
+	numHashes = 5
+	return numCellsPerHash, numHashes, nil
+}
+
 // Returns the paramsMapKey{e, p, s} with the following properties:
 // - |e| is the greatest mapped epsilon value which is *less than or equal* to |epsilon|
 // - |p| is the greatest mapped population value which is *less than or equal to* |population|
diff --git a/src/bin/config_parser/src/privacy/privacy_encoding_params_test.go b/src/bin/config_parser/src/privacy/privacy_encoding_params_test.go
index d084d70..68fafb2 100644
--- a/src/bin/config_parser/src/privacy/privacy_encoding_params_test.go
+++ b/src/bin/config_parser/src/privacy/privacy_encoding_params_test.go
@@ -230,6 +230,10 @@
 	var numLinearBuckets uint32 = 7
 	var maxEventCode uint32 = 2
 
+	// The hard-coded dimensions of a CountMin sketch for StringCounts reports.
+	var numCellsPerHash uint64 = 10
+	var numHashes uint64 = 5
+
 	linearBuckets := config.LinearIntegerBuckets{NumBuckets: numLinearBuckets}
 	buckets := config.IntegerBuckets{
 		Buckets: &config.IntegerBuckets_Linear{&linearBuckets},
@@ -242,23 +246,33 @@
 
 	// Metrics
 	occurrenceMetric := config.MetricDefinition{
+		MetricName:         "OccurrenceMetric",
 		MetricType:         config.MetricDefinition_OCCURRENCE,
 		EventCodeBufferMax: eventCodeBufferMax,
 	}
 	occurrenceMetricEventCodeBufferMaxUnset := config.MetricDefinition{
+		MetricName:       "OccurenceMetricEventCodeBufferMaxUnset",
 		MetricType:       config.MetricDefinition_OCCURRENCE,
 		MetricDimensions: []*config.MetricDefinition_MetricDimension{&dimension},
 	}
 	integerMetric := config.MetricDefinition{
+		MetricName:         "IntegerMetric",
 		MetricType:         config.MetricDefinition_INTEGER,
 		EventCodeBufferMax: eventCodeBufferMax,
 	}
 	integerHistogramMetric := config.MetricDefinition{
+		MetricName:         "IntegerHistogramMetric",
 		MetricType:         config.MetricDefinition_INTEGER_HISTOGRAM,
 		EventCodeBufferMax: eventCodeBufferMax,
 		IntBuckets:         &buckets,
 	}
-	stringMetric := config.MetricDefinition{
+	stringMetricNoStringBufferMax := config.MetricDefinition{
+		MetricName:         "StringMetricNoStringBufferMax",
+		MetricType:         config.MetricDefinition_STRING,
+		EventCodeBufferMax: eventCodeBufferMax,
+	}
+	stringMetricWithStringBufferMax := config.MetricDefinition{
+		MetricName:         "StringMetricWithStringBufferMax",
 		MetricType:         config.MetricDefinition_STRING,
 		EventCodeBufferMax: eventCodeBufferMax,
 		StringBufferMax:    stringBufferMax,
@@ -350,11 +364,13 @@
 		{args{&integerMetric, &uniqueDeviceNumericStatsReport}, true, eventCodeBufferMax},
 		{args{&integerMetric, &hourlyValueNumericStatsReport}, true, eventCodeBufferMax},
 
-		{args{&integerHistogramMetric, &fleetwideHistogramsForIntHistogramReport}, true, eventCodeBufferMax * uint64(numLinearBuckets)},
+		{args{&integerHistogramMetric, &fleetwideHistogramsForIntHistogramReport}, true,
+			eventCodeBufferMax * uint64(numLinearBuckets)},
+
+		{args{&stringMetricNoStringBufferMax, &stringCountsReport}, true, eventCodeBufferMax * numCellsPerHash * numHashes},
+		{args{&stringMetricWithStringBufferMax, &stringCountsReport}, true, eventCodeBufferMax * uint64(stringBufferMax) * numHashes},
 
 		// Invalid input:
-		// STRING metrics are not supported yet.
-		{args{&stringMetric, &stringCountsReport}, false, 0},
 		// This report does not have a report type set.
 		{args{&occurrenceMetric, &unsetReportTypeReport}, false, 0},
 	}
@@ -365,7 +381,8 @@
 		} else if !test.valid && err == nil {
 			t.Errorf("getSparsityForReport() accepted invalid report: %s", test.input.report.ReportName)
 		} else if test.valid && result != test.expected {
-			t.Errorf("getSparsityForReport() for report %s: expected %d, got %d", test.input.report.ReportName, test.expected, result)
+			t.Errorf("getSparsityForReport() for metric %s and report %s: expected %d, got %d",
+				test.input.metric.MetricName, test.input.report.ReportName, test.expected, result)
 		}
 	}
 }