[Cobalt 1.1 privacy] Implement MakePrivateObservations for FleetwideHistograms

Implements PrepareIndexVector() and MakePrivateObservations()
for FleetwideHistograms reports using the existing encoding
for histogram (bucket_index, bucket_count) pairs.

Change-Id: I2f2c3738d5eff89a0a6f5a9d34638d4742247f1b
Reviewed-on: https://fuchsia-review.googlesource.com/c/cobalt/+/442903
Commit-Queue: Laura Peskin <pesk@google.com>
Reviewed-by: Alexandre Zani <azani@google.com>
diff --git a/src/logger/privacy_encoder.cc b/src/logger/privacy_encoder.cc
index 3e98aaa..949f696 100644
--- a/src/logger/privacy_encoder.cc
+++ b/src/logger/privacy_encoder.cc
@@ -97,6 +97,26 @@
       }
       return (GetNumEventVectors(metric_def.metric_dimensions()) * num_buckets.ValueOrDie()) - 1;
     }
+    case ReportDefinition::FLEETWIDE_HISTOGRAMS: {
+      lib::statusor::StatusOr<uint32_t> num_buckets =
+          GetNumIntegerBuckets(report_def.int_buckets());
+      switch (metric_def.metric_type()) {
+        case MetricDefinition::INTEGER: {
+          num_buckets = GetNumIntegerBuckets(report_def.int_buckets());
+          break;
+        }
+        case MetricDefinition::INTEGER_HISTOGRAM: {
+          num_buckets = GetNumIntegerBuckets(metric_def.int_buckets());
+          break;
+        }
+        default:
+          return util::Status(util::INVALID_ARGUMENT,
+                              "invalid metric type with FLEETWIDE_HISTOGRAMS report.");
+      }
+      return (GetNumEventVectors(metric_def.metric_dimensions()) * num_buckets.ValueOrDie() *
+              report_def.num_index_points()) -
+             1;
+    }
 
     default:
       return util::Status(util::UNIMPLEMENTED, "this is not yet implemented");
@@ -130,6 +150,11 @@
                                        observation, metric_def, report_def));
       break;
     }
+    case ReportDefinition::FLEETWIDE_HISTOGRAMS: {
+      CB_ASSIGN_OR_RETURN(indices, PrepareIndexVectorForFleetwideHistogramsReport(
+                                       observation, metric_def, report_def));
+      break;
+    }
 
     default:
       return util::Status(util::UNIMPLEMENTED, "this is not yet implemented");
@@ -260,4 +285,47 @@
   return occurred_indices;
 }
 
+lib::statusor::StatusOr<std::vector<uint64_t>>
+PrivacyEncoder::PrepareIndexVectorForFleetwideHistogramsReport(const Observation &observation,
+                                                               const MetricDefinition &metric_def,
+                                                               const ReportDefinition &report_def) {
+  std::vector<uint64_t> occurred_indices;
+  if (!observation.has_index_histogram()) {
+    return util::Status(util::INVALID_ARGUMENT,
+                        "observation type is not IndexHistogramObservation.");
+  }
+
+  for (const auto &histogram : observation.index_histogram().index_histograms()) {
+    std::vector<uint32_t> event_codes(histogram.event_codes().begin(),
+                                      histogram.event_codes().end());
+    CB_ASSIGN_OR_RETURN(auto event_vector_index, EventVectorToIndex(event_codes, metric_def));
+
+    // If histogram.bucket_indices() is empty, histogram.bucket_counts(i) is the count for the
+    // i-th index in the histogram.
+    if (histogram.bucket_indices_size() == 0) {
+      for (int64_t bucket_index = 0; bucket_index < histogram.bucket_counts_size();
+           ++bucket_index) {
+        uint64_t histogram_index = HistogramBucketAndCountToIndex(
+            histogram.bucket_counts(bucket_index), bucket_index, report_def.max_count(),
+            report_def.num_index_points(), gen_.get());
+        occurred_indices.push_back(ValueAndEventVectorIndicesToIndex(
+            histogram_index, event_vector_index,
+            GetNumEventVectors(metric_def.metric_dimensions()) - 1));
+      }
+    } else {
+      // If histogram.bucket_indices() is not empty, histogram.bucket_counts(i) is the count for
+      // the histogram.bucket_indices(i)-th index in the histogram.
+      for (int i = 0; i < histogram.bucket_indices_size(); ++i) {
+        uint64_t histogram_index = HistogramBucketAndCountToIndex(
+            histogram.bucket_counts(i), histogram.bucket_indices(i), report_def.max_count(),
+            report_def.num_index_points(), gen_.get());
+        occurred_indices.push_back(ValueAndEventVectorIndicesToIndex(
+            histogram_index, event_vector_index,
+            GetNumEventVectors(metric_def.metric_dimensions()) - 1));
+      }
+    }
+  }
+  return occurred_indices;
+}
+
 }  // namespace cobalt::logger
diff --git a/src/logger/privacy_encoder.h b/src/logger/privacy_encoder.h
index e38e3b8..2a8657f 100644
--- a/src/logger/privacy_encoder.h
+++ b/src/logger/privacy_encoder.h
@@ -76,6 +76,10 @@
                                                  const MetricDefinition &metric_def,
                                                  const ReportDefinition &report_def);
 
+  lib::statusor::StatusOr<std::vector<uint64_t>> PrepareIndexVectorForFleetwideHistogramsReport(
+      const Observation &observation, const MetricDefinition &metric_def,
+      const ReportDefinition &report_def);
+
   static std::vector<std::unique_ptr<Observation>> ObservationsFromIndices(
       const std::vector<uint64_t> &indices);
 
diff --git a/src/logger/privacy_encoder_test.cc b/src/logger/privacy_encoder_test.cc
index 153e633..7e6acc8 100644
--- a/src/logger/privacy_encoder_test.cc
+++ b/src/logger/privacy_encoder_test.cc
@@ -46,6 +46,13 @@
                                                                             report_def);
   }
 
+  lib::statusor::StatusOr<std::vector<uint64_t>> PrepareIndexVectorForFleetwideHistogramsReport(
+      const Observation &observation, const MetricDefinition &metric_def,
+      const ReportDefinition &report_def) {
+    return privacy_encoder_->PrepareIndexVectorForFleetwideHistogramsReport(observation, metric_def,
+                                                                            report_def);
+  }
+
   static std::vector<std::unique_ptr<Observation>> ObservationsFromIndices(
       const std::vector<uint64_t> &indices) {
     return PrivacyEncoder::ObservationsFromIndices(indices);
@@ -111,7 +118,8 @@
       ReportDefinition::UNIQUE_DEVICE_NUMERIC_STATS,
       ReportDefinition::FLEETWIDE_MEANS,
       ReportDefinition::HOURLY_VALUE_HISTOGRAMS,
-      ReportDefinition::UNIQUE_DEVICE_HISTOGRAMS};
+      ReportDefinition::UNIQUE_DEVICE_HISTOGRAMS,
+      ReportDefinition::FLEETWIDE_HISTOGRAMS};
 
   MetricDefinition metric_def;
   ReportDefinition report_def;
@@ -129,7 +137,7 @@
 // MakePrivateObservationsImplemented test as they are implemented.
 TEST_F(PrivacyEncoderTest, MakePrivateObservationsUnimplemented) {
   std::vector<ReportDefinition::ReportType> unimplemented_report_types = {
-      ReportDefinition::FLEETWIDE_HISTOGRAMS, ReportDefinition::STRING_COUNTS};
+      ReportDefinition::STRING_COUNTS};
 
   MetricDefinition metric_def;
   ReportDefinition report_def;
@@ -319,6 +327,111 @@
   EXPECT_EQ(status_or_indices.status().error_code(), util::INVALID_ARGUMENT);
 }
 
+TEST_F(PrivacyEncoderTest, FleetwideHistogramsIntegerMetric) {
+  // |metric_def| has 11 valid event vectors.
+  MetricDefinition metric_def;
+  metric_def.set_metric_type(MetricDefinition::INTEGER);
+  auto metric_dim = metric_def.add_metric_dimensions();
+  metric_dim->set_dimension("dimension 0");
+  metric_dim->set_max_event_code(10);
+
+  // Counting the underflow and overflow buckets, |int_buckets| has 8 + 2 = 10 valid bucket indices.
+  LinearIntegerBuckets int_buckets;
+  int_buckets.set_floor(0);
+  int_buckets.set_num_buckets(8);
+  int_buckets.set_step_size(2);
+
+  ReportDefinition report_def;
+  *report_def.mutable_int_buckets()->mutable_linear() = int_buckets;
+  // The numeric index cutoffs are {0, 2, 4, 6, 8, 10}.
+  report_def.set_max_count(10);
+  report_def.set_num_index_points(6);
+
+  // The general formula for the expected index is:
+  // (numeric_index(count) + num_index_points * bucket_index) * (num_event_vectors) +
+  // event_vector_index.
+  std::vector<uint64_t> expected_indices = {12, 89, 222, 365};
+  Observation observation;
+  IndexHistogramObservation *histogram_obs = observation.mutable_index_histogram();
+
+  // A histogram with event vector {1}, and (bucket_index, bucket_count) pairs (0, 2) and (1, 3).
+  //
+  // The numeric index of a count of 2 is 1, so the expected index for ({1}, (0, 2)) is 12.
+  // The numeric index of a count of 4 is 2, so the expected index for ({1}, (1, 4)) is 89.
+  IndexHistogram *histogram_no_indices = histogram_obs->add_index_histograms();
+  histogram_no_indices->add_event_codes(1u);
+  histogram_no_indices->add_bucket_counts(2u);
+  histogram_no_indices->add_bucket_counts(4u);
+
+  // A histogram with event vector {2}, and (bucket_index, bucket_count) pairs (3, 4) and (5, 6).
+  //
+  // The numeric index for a count of 4 is 2, so the expected index for ({2}, (3, 4)) is 222.
+  // The numeric index for a count of 6 is 3, so the expected index for ({2}, (5, 6)) is 365.
+  IndexHistogram *histogram_with_indices = histogram_obs->add_index_histograms();
+  histogram_with_indices->add_event_codes(2u);
+  histogram_with_indices->add_bucket_indices(3u);
+  histogram_with_indices->add_bucket_counts(4u);
+  histogram_with_indices->add_bucket_indices(5u);
+  histogram_with_indices->add_bucket_counts(6u);
+
+  lib::statusor::StatusOr<std::vector<uint64_t>> indices =
+      PrepareIndexVectorForFleetwideHistogramsReport(observation, metric_def, report_def);
+  ASSERT_TRUE(indices.ok());
+  EXPECT_EQ(indices.ValueOrDie(), expected_indices);
+}
+
+TEST_F(PrivacyEncoderTest, FleetwideHistogramsIntegerHistogramMetric) {
+  // |metric_def| has 11 valid event vectors.
+  MetricDefinition metric_def;
+  metric_def.set_metric_type(MetricDefinition::INTEGER_HISTOGRAM);
+  auto metric_dim = metric_def.add_metric_dimensions();
+  metric_dim->set_dimension("dimension 0");
+  metric_dim->set_max_event_code(10);
+
+  // Counting the underflow and overflow buckets, |int_buckets| has 8 + 2 = 10 valid bucket indices.
+  LinearIntegerBuckets int_buckets;
+  int_buckets.set_floor(0);
+  int_buckets.set_num_buckets(8);
+  int_buckets.set_step_size(2);
+  *metric_def.mutable_int_buckets()->mutable_linear() = int_buckets;
+
+  ReportDefinition report_def;
+  report_def.set_max_count(10);
+  report_def.set_num_index_points(6);
+
+  // The general formula for the expected index is:
+  // (numeric_index(count) + num_index_points * bucket_index) * (num_event_vectors) +
+  // event_vector_index.
+  std::vector<uint64_t> expected_indices = {12, 89, 222, 365};
+  Observation observation;
+  IndexHistogramObservation *histogram_obs = observation.mutable_index_histogram();
+
+  // A histogram with event vector {1}, and (bucket_index, bucket_count) pairs (0, 2) and (1, 3).
+  //
+  // The numeric index of a count of 2 is 1, so the expected index for ({1}, (0, 2)) is 12.
+  // The numeric index of a count of 4 is 2, so the expected index for ({1}, (1, 4)) is 89.
+  IndexHistogram *histogram_no_indices = histogram_obs->add_index_histograms();
+  histogram_no_indices->add_event_codes(1u);
+  histogram_no_indices->add_bucket_counts(2u);
+  histogram_no_indices->add_bucket_counts(4u);
+
+  // A histogram with event vector {2}, and (bucket_index, bucket_count) pairs (3, 4) and (5, 6).
+  //
+  // The numeric index for a count of 4 is 2, so the expected index for ({2}, (3, 4)) is 222.
+  // The numeric index for a count of 6 is 3, so the expected index for ({2}, (5, 6)) is 365.
+  IndexHistogram *histogram_with_indices = histogram_obs->add_index_histograms();
+  histogram_with_indices->add_event_codes(2u);
+  histogram_with_indices->add_bucket_indices(3u);
+  histogram_with_indices->add_bucket_counts(4u);
+  histogram_with_indices->add_bucket_indices(5u);
+  histogram_with_indices->add_bucket_counts(6u);
+
+  lib::statusor::StatusOr<std::vector<uint64_t>> indices =
+      PrepareIndexVectorForFleetwideHistogramsReport(observation, metric_def, report_def);
+  ASSERT_TRUE(indices.ok());
+  EXPECT_EQ(indices.ValueOrDie(), expected_indices);
+}
+
 TEST_F(PrivacyEncoderTest, ObservationsFromIndicesNoIndices) {
   std::vector<uint64_t> indices;
   auto observations = ObservationsFromIndices(indices);
@@ -444,6 +557,84 @@
   }
 }
 
+TEST_F(PrivacyEncoderTest, MaxIndexForReportFleetwideHistogramsIntegerMetric) {
+  MetricDefinition metric_def;
+  metric_def.set_metric_type(MetricDefinition::INTEGER);
+
+  uint32_t max_event_code = 10;
+  auto dim = metric_def.add_metric_dimensions();
+  dim->set_dimension("dimension 1");
+  dim->set_max_event_code(max_event_code);
+
+  IntegerBuckets linear_buckets;
+  linear_buckets.mutable_linear()->set_floor(0);
+  linear_buckets.mutable_linear()->set_num_buckets(3);
+
+  IntegerBuckets exp_buckets;
+  exp_buckets.mutable_exponential()->set_floor(0);
+  exp_buckets.mutable_exponential()->set_num_buckets(3);
+
+  std::vector<IntegerBuckets> bucket_variants = {linear_buckets, exp_buckets};
+
+  ReportDefinition report_def;
+  report_def.set_report_type(ReportDefinition::FLEETWIDE_HISTOGRAMS);
+  report_def.set_num_index_points(6);
+
+  // There are 11 event codes, 5 histogram buckets (3 registered + 1 underflow + 1 overflow), and 6
+  // numeric index points, for a total of 330 possible indices.
+  uint64_t expected_max_index = 329;
+
+  for (const auto &int_buckets : bucket_variants) {
+    *report_def.mutable_int_buckets() = int_buckets;
+
+    auto max_index = PrivacyEncoder::MaxIndexForReport(metric_def, report_def);
+    ASSERT_TRUE(max_index.ok()) << "Failed to get max index with status "
+                                << max_index.status().error_code() << ", "
+                                << max_index.status().error_message();
+
+    EXPECT_EQ(expected_max_index, max_index.ValueOrDie());
+  }
+}
+
+TEST_F(PrivacyEncoderTest, MaxIndexForReportFleetwideHistogramsIntegerHistogramMetric) {
+  MetricDefinition metric_def;
+  metric_def.set_metric_type(MetricDefinition::INTEGER_HISTOGRAM);
+
+  uint32_t max_event_code = 10;
+  auto dim = metric_def.add_metric_dimensions();
+  dim->set_dimension("dimension 1");
+  dim->set_max_event_code(max_event_code);
+
+  IntegerBuckets linear_buckets;
+  linear_buckets.mutable_linear()->set_floor(0);
+  linear_buckets.mutable_linear()->set_num_buckets(3);
+
+  IntegerBuckets exp_buckets;
+  exp_buckets.mutable_exponential()->set_floor(0);
+  exp_buckets.mutable_exponential()->set_num_buckets(3);
+
+  std::vector<IntegerBuckets> bucket_variants = {linear_buckets, exp_buckets};
+
+  ReportDefinition report_def;
+  report_def.set_report_type(ReportDefinition::FLEETWIDE_HISTOGRAMS);
+  report_def.set_num_index_points(6);
+
+  // There are 11 event codes, 5 histogram buckets (3 registered + 1 underflow + 1 overflow), and 6
+  // numeric index points, for a total of 330 possible indices.
+  uint64_t expected_max_index = 329;
+
+  for (const auto &int_buckets : bucket_variants) {
+    *metric_def.mutable_int_buckets() = int_buckets;
+
+    auto max_index = PrivacyEncoder::MaxIndexForReport(metric_def, report_def);
+    ASSERT_TRUE(max_index.ok()) << "Failed to get max index with status "
+                                << max_index.status().error_code() << ", "
+                                << max_index.status().error_message();
+
+    EXPECT_EQ(expected_max_index, max_index.ValueOrDie());
+  }
+}
+
 TEST_F(PrivacyEncoderTest, MaxIndexForReportUnimplemented) {
   MetricDefinition metric_def;
   ReportDefinition report_def;