Use the string sketch parameters when encoding private string observations.
Bug: b/288965410
Change-Id: Ide9591f4aac1b60a2f516cf90c10cd78db1d0786
Reviewed-on: https://fuchsia-review.googlesource.com/c/cobalt/+/1011073
Reviewed-by: Alex Pankhurst <pankhurst@google.com>
Commit-Queue: Alexandre Zani <azani@google.com>
diff --git a/src/logger/privacy_encoder.cc b/src/logger/privacy_encoder.cc
index 245517e..f83d7ca 100644
--- a/src/logger/privacy_encoder.cc
+++ b/src/logger/privacy_encoder.cc
@@ -12,13 +12,6 @@
using google::protobuf::RepeatedPtrField;
namespace cobalt::logger {
-namespace {
-
-// The dimensions of a CountMin sketch for a report of type StringCounts.
-const size_t kNumCountMinCellsPerHash = 10;
-const size_t kNumCountMinHashes = 5;
-
-} // namespace
PrivacyEncoder::PrivacyEncoder(std::unique_ptr<SecureBitGeneratorInterface<uint32_t>> secure_gen,
std::unique_ptr<BitGeneratorInterface<uint32_t>> gen)
@@ -147,7 +140,7 @@
switch (report_def.report_type()) {
case ReportDefinition::STRING_COUNTS:
case ReportDefinition::UNIQUE_DEVICE_STRING_COUNTS: {
- return kNumCountMinCellsPerHash;
+ return report_def.string_sketch_params().num_cells_per_hash();
}
default: {
return Status(StatusCode::INVALID_ARGUMENT,
@@ -161,7 +154,7 @@
switch (report_def.report_type()) {
case ReportDefinition::STRING_COUNTS:
case ReportDefinition::UNIQUE_DEVICE_STRING_COUNTS: {
- return kNumCountMinHashes;
+ return report_def.string_sketch_params().num_hashes();
}
default: {
return Status(StatusCode::INVALID_ARGUMENT,
diff --git a/src/logger/privacy_encoder_test.cc b/src/logger/privacy_encoder_test.cc
index 1e21f0d..1a83374 100644
--- a/src/logger/privacy_encoder_test.cc
+++ b/src/logger/privacy_encoder_test.cc
@@ -1144,6 +1144,8 @@
TEST_F(PrivacyEncoderTest, MaxIndexForReportStringCounts) {
uint64_t max_event_code = 9;
uint32_t num_index_points = 6;
+ int32_t num_cells_per_hash = 15;
+ int32_t num_hashes = 11;
// |metric_def| has 10 valid event vectors.
MetricDefinition metric_def;
@@ -1156,10 +1158,10 @@
ReportDefinition report_def;
report_def.set_report_type(ReportDefinition::STRING_COUNTS);
report_def.set_num_index_points(num_index_points);
-
- CB_ASSERT_OK_AND_ASSIGN(size_t num_cells_per_hash,
- PrivacyEncoder::GetNumCountMinCellsPerHash(report_def));
- CB_ASSERT_OK_AND_ASSIGN(size_t num_hashes, PrivacyEncoder::GetNumCountMinHashes(report_def));
+ StringSketchParameters sketch_params;
+ sketch_params.set_num_cells_per_hash(num_cells_per_hash);
+ sketch_params.set_num_hashes(num_hashes);
+ *report_def.mutable_string_sketch_params() = sketch_params;
// The expected max index is:
// (# of valid event vectors) * (# valid count values) * (size of count min sketch) - 1
@@ -1174,6 +1176,8 @@
TEST_F(PrivacyEncoderTest, MaxIndexForReportUniqueDeviceStringCounts) {
uint64_t max_event_code = 9;
+ int32_t num_cells_per_hash = 15;
+ int32_t num_hashes = 11;
// |metric_def| has 10 valid event vectors.
MetricDefinition metric_def;
@@ -1184,10 +1188,10 @@
ReportDefinition report_def;
report_def.set_report_type(ReportDefinition::UNIQUE_DEVICE_STRING_COUNTS);
-
- CB_ASSERT_OK_AND_ASSIGN(size_t num_cells_per_hash,
- PrivacyEncoder::GetNumCountMinCellsPerHash(report_def));
- CB_ASSERT_OK_AND_ASSIGN(size_t num_hashes, PrivacyEncoder::GetNumCountMinHashes(report_def));
+ StringSketchParameters sketch_params;
+ sketch_params.set_num_cells_per_hash(num_cells_per_hash);
+ sketch_params.set_num_hashes(num_hashes);
+ *report_def.mutable_string_sketch_params() = sketch_params;
// The expected max index is: (# of valid event vectors) * (size of count min sketch) - 1 = 10 *
// 50 - 1 = 499.