[cobalt 1.1] Implement HistogramBuckets encoding with privacy.
Change-Id: I5c8e8e57d729ed88151cfc5d3749170e93f3af80
Reviewed-on: https://fuchsia-review.googlesource.com/c/cobalt/+/436075
Commit-Queue: Alexandre Zani <azani@google.com>
Reviewed-by: Laura Peskin <pesk@google.com>
diff --git a/src/algorithms/privacy/numeric_encoding.cc b/src/algorithms/privacy/numeric_encoding.cc
index d5107b7..9c496af 100644
--- a/src/algorithms/privacy/numeric_encoding.cc
+++ b/src/algorithms/privacy/numeric_encoding.cc
@@ -34,6 +34,13 @@
num_index_points;
}
+uint64_t HistogramBucketToIndex(uint64_t bucket_count, uint32_t bucket_index, uint64_t max_count,
+ uint64_t num_index_points, BitGeneratorInterface<uint32_t>* gen) {
+ return IntegerToIndex(static_cast<int64_t>(bucket_count), 0, static_cast<int64_t>(max_count),
+ num_index_points, gen) +
+ num_index_points * static_cast<uint64_t>(bucket_index);
+}
+
double DoubleFromIndex(uint64_t index, double min_value, double max_value,
uint64_t num_index_points) {
double interval_size = (max_value - min_value) / static_cast<double>(num_index_points - 1);
@@ -51,6 +58,14 @@
static_cast<int64_t>(max_count), num_index_points));
}
+void HistogramBucketFromIndex(uint64_t index, uint64_t max_count, uint64_t num_index_points,
+ uint64_t* bucket_count, uint32_t* bucket_index) {
+ *bucket_index = static_cast<uint32_t>(index / num_index_points);
+ uint64_t numeric_index = index - (*bucket_index * num_index_points);
+ *bucket_count = static_cast<uint64_t>(
+ IntegerFromIndex(numeric_index, 0, static_cast<int64_t>(max_count), num_index_points));
+}
+
uint64_t ValueAndEventVectorIndicesToIndex(uint64_t value_index, uint64_t event_vector_index,
uint64_t max_event_vector_index) {
return value_index * (max_event_vector_index + 1) + event_vector_index;
diff --git a/src/algorithms/privacy/numeric_encoding.h b/src/algorithms/privacy/numeric_encoding.h
index 415f0cd..83e16d7 100644
--- a/src/algorithms/privacy/numeric_encoding.h
+++ b/src/algorithms/privacy/numeric_encoding.h
@@ -30,6 +30,11 @@
uint64_t CountToIndex(uint64_t count, uint64_t max_count, uint64_t num_index_points,
BitGeneratorInterface<uint32_t>* gen);
+// Encodes a |bucket_count|, |bucket_index| pair by encoding the |bucket_count| as per the
+// DoubleToIndex encoding scheme plus an offset equal to |bucket_index| * |num_index_points|.
+uint64_t HistogramBucketToIndex(uint64_t bucket_count, uint32_t bucket_index, uint64_t max_count,
+ uint64_t num_index_points, BitGeneratorInterface<uint32_t>* gen);
+
// DoubleFromIndex maps |index| to the numeric value corresponding to the boundary point with index
// |index|.
double DoubleFromIndex(uint64_t index, double min_value, double max_value,
@@ -42,6 +47,10 @@
// Decodes an |index| that was encoded with CountToIndex.
uint64_t CountFromIndex(uint64_t index, uint64_t max_count, uint64_t num_index_points);
+// Decodes an |index| that was encoded with HistogramBucketFromIndex.
+void HistogramBucketFromIndex(uint64_t index, uint64_t max_count, uint64_t num_index_points,
+ uint64_t* bucket_count, uint32_t* bucket_index);
+
// ValueAndEventVectorIndicesToIndex uniquely maps a |value_index|, |event_vector_index| pair to a
// single index in the range [0, |max_event_vector_index + 1| * |num_index_points| - 1].
//
@@ -55,6 +64,7 @@
// Returns true if |index| was encoded using CountToIndex.
bool IsCountIndex(uint64_t index, uint64_t num_index_points);
+
} // namespace cobalt
#endif // COBALT_SRC_ALGORITHMS_PRIVACY_NUMERIC_ENCODING_H_
diff --git a/src/algorithms/privacy/numeric_encoding_test.cc b/src/algorithms/privacy/numeric_encoding_test.cc
index 84bd048..c86c1b8 100644
--- a/src/algorithms/privacy/numeric_encoding_test.cc
+++ b/src/algorithms/privacy/numeric_encoding_test.cc
@@ -85,6 +85,19 @@
EXPECT_EQ(CountToIndex(count, max_count, num_index_points, &gen), expected);
}
+TEST(NumericEncodingTest, HistogramBucketToIndex) {
+ uint64_t max_count = 10u;
+ uint64_t num_index_points = 6u;
+ uint64_t bucket_count = 4u;
+ uint32_t bucket_index = 7u;
+ RandomNumberGenerator gen;
+
+ uint64_t expected = 44u;
+
+ EXPECT_EQ(HistogramBucketToIndex(bucket_count, bucket_index, max_count, num_index_points, &gen),
+ expected);
+}
+
TEST(NumericEncodingTest, DoubleFromIndex) {
double min_value = -4.0;
double max_value = 6.0;
@@ -121,6 +134,30 @@
}
}
+TEST(NumericEncodingTest, HistogramBucketFromIndex) {
+ uint64_t max_count = 10u;
+ uint64_t num_index_points = 6u;
+ uint64_t num_buckets = 10u;
+
+ uint64_t expected_bucket_count = 0u;
+ uint64_t expected_bucket_index = 0u;
+
+ for (uint64_t index = 0u; index < num_index_points * num_buckets; ++index) {
+ uint64_t bucket_count;
+ uint32_t bucket_index;
+
+ HistogramBucketFromIndex(index, max_count, num_index_points, &bucket_count, &bucket_index);
+ EXPECT_EQ(bucket_count, expected_bucket_count);
+ EXPECT_EQ(bucket_index, expected_bucket_index);
+
+ expected_bucket_count += 2;
+ if (expected_bucket_count > max_count) {
+ expected_bucket_count = 0;
+ ++expected_bucket_index;
+ }
+ }
+}
+
TEST(NumericEncodingTest, ValueAndEventVectorIndicesToIndex) {
uint64_t num_index_points = 6;
uint64_t max_event_vector_index = 9;