blob: 85bf7171922ae5a196d918bdcda996eb7910f540 [file] [log] [blame]
#include "src/logger/privacy_encoder.h"
#include <algorithm>
#include <gtest/gtest.h>
#include "src/algorithms/privacy/count_min.h"
#include "src/algorithms/random/test_secure_random.h"
#include "src/lib/util/hash.h"
#include "src/pb/observation.pb.h"
#include "src/public/lib/statusor/status_macros.h"
#include "src/public/lib/statusor/statusor.h"
#include "src/registry/metric_definition.pb.h"
#include "src/registry/report_definition.pb.h"
namespace cobalt::logger {
class PrivacyEncoderTest : public testing::Test {
protected:
void SetUp() override {
auto secure_gen = std::make_unique<TestSecureRandomNumberGenerator>(0);
auto gen = std::make_unique<RandomNumberGenerator>(0);
privacy_encoder_ = std::make_unique<PrivacyEncoder>(std::move(secure_gen), std::move(gen));
}
[[nodiscard]] PrivacyEncoder *GetPrivacyEncoder() const { return privacy_encoder_.get(); }
static lib::statusor::StatusOr<std::vector<uint64_t>> PrepareIndexVectorForUniqueDeviceCount(
const Observation &observation, const MetricDefinition &metric_def) {
return PrivacyEncoder::PrepareIndexVectorForUniqueDeviceCount(observation, metric_def);
}
lib::statusor::StatusOr<std::vector<uint64_t>> PrepareIndexVectorForPerDeviceIntegerReport(
const Observation &observation, const MetricDefinition &metric_def,
const ReportDefinition &report_def) {
return privacy_encoder_->PrepareIndexVectorForPerDeviceIntegerReport(observation, metric_def,
report_def);
}
lib::statusor::StatusOr<std::vector<uint64_t>> PrepareIndexVectorForFleetwideMeansReport(
const Observation &observation, const MetricDefinition &metric_def,
const ReportDefinition &report_def) {
return privacy_encoder_->PrepareIndexVectorForFleetwideMeansReport(observation, metric_def,
report_def);
}
lib::statusor::StatusOr<std::vector<uint64_t>> PrepareIndexVectorForPerDeviceHistogramsReport(
const Observation &observation, const MetricDefinition &metric_def,
const ReportDefinition &report_def) {
return privacy_encoder_->PrepareIndexVectorForPerDeviceHistogramsReport(observation, metric_def,
report_def);
}
lib::statusor::StatusOr<std::vector<uint64_t>> PrepareIndexVectorForFleetwideHistogramsReport(
const Observation &observation, const MetricDefinition &metric_def,
const ReportDefinition &report_def) {
return privacy_encoder_->PrepareIndexVectorForFleetwideHistogramsReport(observation, metric_def,
report_def);
}
lib::statusor::StatusOr<std::vector<uint64_t>> PrepareIndexVectorForStringCountsReport(
const Observation &observation, const MetricDefinition &metric_def,
const ReportDefinition &report_def, size_t num_cells_per_hash, size_t num_hashes) {
return privacy_encoder_->PrepareIndexVectorForStringCountsReport(
observation, metric_def, report_def, num_cells_per_hash, num_hashes);
}
lib::statusor::StatusOr<std::vector<uint64_t>>
PrepareIndexVectorForUniqueDeviceStringCountsReport(const Observation &observation,
const MetricDefinition &metric_def,
const ReportDefinition &report_def,
size_t num_cells_per_hash,
size_t num_hashes) {
return privacy_encoder_->PrepareIndexVectorForUniqueDeviceStringCountsReport(
observation, metric_def, report_def, num_cells_per_hash, num_hashes);
}
static std::vector<std::unique_ptr<Observation>> ObservationsFromIndices(
const std::vector<uint64_t> &indices) {
return PrivacyEncoder::ObservationsFromIndices(indices);
}
lib::statusor::StatusOr<std::vector<uint64_t>> AddNoise(const std::vector<uint64_t> &indices,
const MetricDefinition &metric_def,
const ReportDefinition &report_def) {
return privacy_encoder_->AddNoise(indices, metric_def, report_def);
}
lib::statusor::StatusOr<std::vector<std::unique_ptr<Observation>>> MakePrivateObservations(
const Observation *observation, const MetricDefinition &metric_def,
const ReportDefinition &report_def) {
return privacy_encoder_->MakePrivateObservations(observation, metric_def, report_def);
}
static int64_t ClipValue(int64_t value, const ReportDefinition &report_def) {
return PrivacyEncoder::ClipValue(value, report_def);
}
static uint64_t ClipCount(uint64_t count, const ReportDefinition &report_def) {
return PrivacyEncoder::ClipCount(count, report_def);
}
private:
std::unique_ptr<PrivacyEncoder> privacy_encoder_;
};
TEST_F(PrivacyEncoderTest, MaybeMakePrivateObservationsNoAddedPrivacyReport) {
MetricDefinition metric_def;
ReportDefinition report_def;
report_def.set_privacy_level(ReportDefinition::NO_ADDED_PRIVACY);
auto observation = std::make_unique<Observation>();
Observation *expected = observation.get();
auto status_or = GetPrivacyEncoder()->MaybeMakePrivateObservations(std::move(observation),
metric_def, report_def);
ASSERT_TRUE(status_or.ok());
auto observations = std::move(status_or.ValueOrDie());
ASSERT_EQ(observations.size(), 1u);
EXPECT_EQ(observations[0].get(), expected);
}
TEST_F(PrivacyEncoderTest, MaybeMakePrivateObservationsNoAddedPrivacyReportNoAggregateData) {
MetricDefinition metric_def;
ReportDefinition report_def;
report_def.set_privacy_level(ReportDefinition::NO_ADDED_PRIVACY);
lib::statusor::StatusOr<std::vector<std::unique_ptr<Observation>>> status_or =
GetPrivacyEncoder()->MaybeMakePrivateObservations(nullptr, metric_def, report_def);
ASSERT_TRUE(status_or.ok());
std::vector<std::unique_ptr<Observation>> observations = status_or.ConsumeValueOrDie();
ASSERT_EQ(observations.size(), 1u);
EXPECT_EQ(observations[0].get(), nullptr);
}
TEST_F(PrivacyEncoderTest, MakePrivateObservationsNoAddedPrivacyReport) {
MetricDefinition metric_def;
ReportDefinition report_def;
report_def.set_privacy_level(ReportDefinition::NO_ADDED_PRIVACY);
Observation observation;
auto status_or = MakePrivateObservations(&observation, metric_def, report_def);
ASSERT_FALSE(status_or.ok());
EXPECT_EQ(status_or.status().error_code(), StatusCode::INVALID_ARGUMENT);
}
TEST_F(PrivacyEncoderTest, MakePrivateObservationsImplemented) {
std::vector<ReportDefinition::ReportType> implemented_report_types = {
ReportDefinition::UNIQUE_DEVICE_COUNTS,
ReportDefinition::FLEETWIDE_OCCURRENCE_COUNTS,
ReportDefinition::HOURLY_VALUE_NUMERIC_STATS,
ReportDefinition::UNIQUE_DEVICE_NUMERIC_STATS,
ReportDefinition::FLEETWIDE_MEANS,
ReportDefinition::HOURLY_VALUE_HISTOGRAMS,
ReportDefinition::UNIQUE_DEVICE_HISTOGRAMS,
ReportDefinition::FLEETWIDE_HISTOGRAMS,
ReportDefinition::STRING_COUNTS,
ReportDefinition::UNIQUE_DEVICE_STRING_COUNTS};
MetricDefinition metric_def;
ReportDefinition report_def;
report_def.set_privacy_level(ReportDefinition::LOW_PRIVACY);
Observation observation;
for (const ReportDefinition::ReportType report_type : implemented_report_types) {
report_def.set_report_type(report_type);
EXPECT_NE(MakePrivateObservations(&observation, metric_def, report_def).status().error_code(),
StatusCode::UNIMPLEMENTED);
}
}
TEST_F(PrivacyEncoderTest, MakePrivateObservationsNullObservation) {
MetricDefinition metric_def;
ReportDefinition report_def;
report_def.set_privacy_level(ReportDefinition::LOW_PRIVACY);
report_def.set_report_type(ReportDefinition::UNIQUE_DEVICE_COUNTS);
auto status_or = MakePrivateObservations(nullptr, metric_def, report_def);
ASSERT_TRUE(status_or.ok());
}
TEST_F(PrivacyEncoderTest, UniqueDeviceCount) {
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::OCCURRENCE);
MetricDefinition::MetricDimension *metric_dim = metric_def.add_metric_dimensions();
metric_dim->set_dimension("dimension 0");
metric_dim->set_max_event_code(10);
std::vector<uint64_t> expected_indices = {2, 4, 6};
Observation observation;
IntegerObservation *integer_obs = observation.mutable_integer();
for (uint64_t index : expected_indices) {
IntegerObservation::Value *val = integer_obs->add_values();
val->add_event_codes(index);
val->set_value(1);
}
auto status_or_indices = PrepareIndexVectorForUniqueDeviceCount(observation, metric_def);
ASSERT_TRUE(status_or_indices.ok());
EXPECT_EQ(status_or_indices.ValueOrDie(), expected_indices);
}
TEST_F(PrivacyEncoderTest, UniqueDeviceCountInvalidObservationType) {
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::OCCURRENCE);
MetricDefinition::MetricDimension *metric_dim = metric_def.add_metric_dimensions();
metric_dim->set_dimension("dimension 0");
metric_dim->set_max_event_code(10);
Observation observation;
auto status_or_indices = PrepareIndexVectorForUniqueDeviceCount(observation, metric_def);
ASSERT_FALSE(status_or_indices.ok());
EXPECT_EQ(status_or_indices.status().error_code(), StatusCode::INVALID_ARGUMENT);
}
TEST_F(PrivacyEncoderTest, PerDeviceInteger) {
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::OCCURRENCE);
MetricDefinition::MetricDimension *metric_dim = metric_def.add_metric_dimensions();
metric_dim->set_dimension("dimension 0");
metric_dim->set_max_event_code(10);
ReportDefinition report_def;
report_def.set_min_value(-4);
report_def.set_max_value(6);
report_def.set_num_index_points(6);
std::vector<uint64_t> expected_indices = {14, 40};
Observation observation;
IntegerObservation *integer_obs = observation.mutable_integer();
IntegerObservation::Value *val = integer_obs->add_values();
val->add_event_codes(3u);
val->set_value(-2);
val = integer_obs->add_values();
val->add_event_codes(7u);
val->set_value(2);
lib::statusor::StatusOr<std::vector<uint64_t>> status_or_indices =
PrepareIndexVectorForPerDeviceIntegerReport(observation, metric_def, report_def);
ASSERT_TRUE(status_or_indices.ok());
EXPECT_EQ(status_or_indices.ValueOrDie(), expected_indices);
}
TEST_F(PrivacyEncoderTest, PerDeviceIntegerInvalidObservationType) {
MetricDefinition metric_def;
ReportDefinition report_def;
Observation observation;
auto status_or_indices =
PrepareIndexVectorForPerDeviceIntegerReport(observation, metric_def, report_def);
ASSERT_FALSE(status_or_indices.ok());
EXPECT_EQ(status_or_indices.status().error_code(), StatusCode::INVALID_ARGUMENT);
}
TEST_F(PrivacyEncoderTest, FleetwideMeans) {
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::OCCURRENCE);
MetricDefinition::MetricDimension *metric_dim = metric_def.add_metric_dimensions();
metric_dim->set_dimension("dimension 0");
metric_dim->set_max_event_code(10);
ReportDefinition report_def;
report_def.set_min_value(-4);
report_def.set_max_value(6);
report_def.set_num_index_points(6);
report_def.set_max_count(10);
std::vector<uint64_t> expected_indices = {14, 91};
Observation observation;
SumAndCountObservation *sum_and_count_obs = observation.mutable_sum_and_count();
SumAndCountObservation::SumAndCount *val = sum_and_count_obs->add_sums_and_counts();
val->add_event_codes(3u);
val->set_sum(-2);
val->set_count(4);
auto status_or_indices =
PrepareIndexVectorForFleetwideMeansReport(observation, metric_def, report_def);
ASSERT_TRUE(status_or_indices.ok());
EXPECT_EQ(status_or_indices.ValueOrDie(), expected_indices);
}
TEST_F(PrivacyEncoderTest, FleetwideMeansInvalidObservationType) {
MetricDefinition metric_def;
ReportDefinition report_def;
Observation observation;
auto status_or_indices =
PrepareIndexVectorForFleetwideMeansReport(observation, metric_def, report_def);
ASSERT_FALSE(status_or_indices.ok());
EXPECT_EQ(status_or_indices.status().error_code(), StatusCode::INVALID_ARGUMENT);
}
TEST_F(PrivacyEncoderTest, PerDeviceHistograms) {
// |metric_def| has 11 valid event vectors.
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::OCCURRENCE);
MetricDefinition::MetricDimension *metric_dim = metric_def.add_metric_dimensions();
metric_dim->set_dimension("dimension 0");
metric_dim->set_max_event_code(10);
// Counting the underflow and overflow buckets, |int_buckets| has 8 + 2 = 10 valid bucket indices.
LinearIntegerBuckets int_buckets;
int_buckets.set_floor(0);
int_buckets.set_num_buckets(8);
int_buckets.set_step_size(2);
ReportDefinition report_def;
*report_def.mutable_int_buckets()->mutable_linear() = int_buckets;
std::vector<uint64_t> expected_indices = {14, 40};
Observation observation;
IntegerObservation *integer_obs = observation.mutable_integer();
// Add a value for event code 3 and bucket index 1.
// The expected index is:
// num_event_vectors * bucket_index + event_code = 11 * 1 + 3 = 14.
IntegerObservation::Value *val = integer_obs->add_values();
val->add_event_codes(3u);
val->set_value(0);
// Add a value for event code 7 and bucket index 3.
// The expected index is:
// num_event_vectors * bucket_index + event_code = 11 * 3 + 7 = 40.
val = integer_obs->add_values();
val->add_event_codes(7u);
val->set_value(5);
auto status_or_indices =
PrepareIndexVectorForPerDeviceHistogramsReport(observation, metric_def, report_def);
ASSERT_TRUE(status_or_indices.ok());
EXPECT_EQ(status_or_indices.ValueOrDie(), expected_indices);
}
TEST_F(PrivacyEncoderTest, PerDeviceHistogramsInvalidObservationType) {
MetricDefinition metric_def;
ReportDefinition report_def;
Observation observation;
auto status_or_indices =
PrepareIndexVectorForPerDeviceHistogramsReport(observation, metric_def, report_def);
ASSERT_FALSE(status_or_indices.ok());
EXPECT_EQ(status_or_indices.status().error_code(), StatusCode::INVALID_ARGUMENT);
}
TEST_F(PrivacyEncoderTest, FleetwideHistogramsIntegerMetric) {
// |metric_def| has 11 valid event vectors.
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::INTEGER);
MetricDefinition::MetricDimension *metric_dim = metric_def.add_metric_dimensions();
metric_dim->set_dimension("dimension 0");
metric_dim->set_max_event_code(10);
// Counting the underflow and overflow buckets, |int_buckets| has 8 + 2 = 10 valid bucket indices.
LinearIntegerBuckets int_buckets;
int_buckets.set_floor(0);
int_buckets.set_num_buckets(8);
int_buckets.set_step_size(2);
ReportDefinition report_def;
*report_def.mutable_int_buckets()->mutable_linear() = int_buckets;
// The numeric index cutoffs are {0, 2, 4, 6, 8, 10}.
report_def.set_max_count(10);
report_def.set_num_index_points(6);
// The general formula for the expected index is:
// (numeric_index(count) + num_index_points * bucket_index) * (num_event_vectors) +
// event_vector_index.
std::vector<uint64_t> expected_indices = {12, 89, 222, 365};
Observation observation;
IndexHistogramObservation *histogram_obs = observation.mutable_index_histogram();
// A histogram with event vector {1}, and (bucket_index, bucket_count) pairs (0, 2) and (1, 4).
//
// The numeric index of a count of 2 is 1, so the expected index for ({1}, (0, 2)) is 12.
// The numeric index of a count of 4 is 2, so the expected index for ({1}, (1, 4)) is 89.
IndexHistogram *histogram_1 = histogram_obs->add_index_histograms();
histogram_1->add_event_codes(1u);
histogram_1->add_bucket_indices(0u);
histogram_1->add_bucket_counts(2u);
histogram_1->add_bucket_indices(1u);
histogram_1->add_bucket_counts(4u);
// A histogram with event vector {2}, and (bucket_index, bucket_count) pairs (3, 4) and (5, 6).
//
// The numeric index for a count of 4 is 2, so the expected index for ({2}, (3, 4)) is 222.
// The numeric index for a count of 6 is 3, so the expected index for ({2}, (5, 6)) is 365.
IndexHistogram *histogram_2 = histogram_obs->add_index_histograms();
histogram_2->add_event_codes(2u);
histogram_2->add_bucket_indices(3u);
histogram_2->add_bucket_counts(4u);
histogram_2->add_bucket_indices(5u);
histogram_2->add_bucket_counts(6u);
lib::statusor::StatusOr<std::vector<uint64_t>> indices =
PrepareIndexVectorForFleetwideHistogramsReport(observation, metric_def, report_def);
ASSERT_TRUE(indices.ok());
EXPECT_EQ(indices.ValueOrDie(), expected_indices);
}
TEST_F(PrivacyEncoderTest, FleetwideHistogramsIntegerHistogramMetric) {
// |metric_def| has 11 valid event vectors.
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::INTEGER_HISTOGRAM);
MetricDefinition::MetricDimension *metric_dim = metric_def.add_metric_dimensions();
metric_dim->set_dimension("dimension 0");
metric_dim->set_max_event_code(10);
// Counting the underflow and overflow buckets, |int_buckets| has 8 + 2 = 10 valid bucket indices.
LinearIntegerBuckets int_buckets;
int_buckets.set_floor(0);
int_buckets.set_num_buckets(8);
int_buckets.set_step_size(2);
*metric_def.mutable_int_buckets()->mutable_linear() = int_buckets;
ReportDefinition report_def;
report_def.set_max_count(10);
report_def.set_num_index_points(6);
// The general formula for the expected index is:
// (numeric_index(count) + num_index_points * bucket_index) * (num_event_vectors) +
// event_vector_index.
std::vector<uint64_t> expected_indices = {12, 89, 222, 365};
Observation observation;
IndexHistogramObservation *histogram_obs = observation.mutable_index_histogram();
// A histogram with event vector {1}, and (bucket_index, bucket_count) pairs (0, 2) and (1, 4).
//
// The numeric index of a count of 2 is 1, so the expected index for ({1}, (0, 2)) is 12.
// The numeric index of a count of 4 is 2, so the expected index for ({1}, (1, 4)) is 89.
IndexHistogram *histogram_1 = histogram_obs->add_index_histograms();
histogram_1->add_event_codes(1u);
histogram_1->add_bucket_indices(0u);
histogram_1->add_bucket_counts(2u);
histogram_1->add_bucket_indices(1u);
histogram_1->add_bucket_counts(4u);
// A histogram with event vector {2}, and (bucket_index, bucket_count) pairs (3, 4) and (5, 6).
//
// The numeric index for a count of 4 is 2, so the expected index for ({2}, (3, 4)) is 222.
// The numeric index for a count of 6 is 3, so the expected index for ({2}, (5, 6)) is 365.
IndexHistogram *histogram_2 = histogram_obs->add_index_histograms();
histogram_2->add_event_codes(2u);
histogram_2->add_bucket_indices(3u);
histogram_2->add_bucket_counts(4u);
histogram_2->add_bucket_indices(5u);
histogram_2->add_bucket_counts(6u);
lib::statusor::StatusOr<std::vector<uint64_t>> indices =
PrepareIndexVectorForFleetwideHistogramsReport(observation, metric_def, report_def);
ASSERT_TRUE(indices.ok());
EXPECT_EQ(indices.ValueOrDie(), expected_indices);
}
TEST_F(PrivacyEncoderTest, StringCounts) {
size_t num_cells_per_hash = 2;
size_t num_hashes = 2;
uint32_t max_event_code = 1;
uint32_t max_count = 2;
uint32_t num_index_points = max_count + 1;
// |metric_def| has 2 valid event vectors.
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::STRING);
metric_def.set_string_buffer_max(10);
MetricDefinition::MetricDimension *metric_dim = metric_def.add_metric_dimensions();
metric_dim->set_dimension("dimension 0");
metric_dim->set_max_event_code(max_event_code);
// |report_def| has 3 valid count values: {0, 1, 2}.
ReportDefinition report_def;
report_def.set_max_count(max_count);
report_def.set_num_index_points(num_index_points);
// Prepare a StringHistogramObservation with 2 IndexHistograms:
// - with event vector {0}: ("blobfs", count = 1), ("thinfs", count = 2)
// - with event vector {1}: ("thinfs", count = 2)
Observation observation;
StringHistogramObservation *string_histogram_obs = observation.mutable_string_histogram();
string_histogram_obs->add_string_hashes(util::FarmhashFingerprint("blobfs"));
string_histogram_obs->add_string_hashes(util::FarmhashFingerprint("thinfs"));
IndexHistogram *histogram_1 = string_histogram_obs->add_string_histograms();
histogram_1->add_event_codes(0u);
histogram_1->add_bucket_indices(0u);
histogram_1->add_bucket_counts(1u);
histogram_1->add_bucket_indices(1u);
histogram_1->add_bucket_counts(2u);
IndexHistogram *histogram_2 = string_histogram_obs->add_string_histograms();
histogram_2->add_event_codes(1u);
histogram_2->add_bucket_indices(1u);
histogram_2->add_bucket_counts(2u);
// The general formula for an expected index is:
// (count + num_index_points * sketch cell index) * (num_event_vectors) + event_vector_index.
// Cells with count 0 are omitted.
auto count_min = CountMin<uint64_t>::MakeSketch(num_cells_per_hash, num_hashes);
std::vector<size_t> blobfs_indices =
count_min.GetCellIndices(string_histogram_obs->string_hashes(0));
std::vector<size_t> thinfs_indices =
count_min.GetCellIndices(string_histogram_obs->string_hashes(1));
std::vector<uint64_t> expected_private_indices;
expected_private_indices.reserve(6);
for (size_t index : blobfs_indices) {
// Expected private indices for a count of 1 for "blobfs" with event vector {0}
expected_private_indices.push_back((1 + num_index_points * index) * (max_event_code + 1) + 0);
}
for (size_t index : thinfs_indices) {
// Expected private indices for a count of 2 for "thinfs" with event vector {0}
expected_private_indices.push_back((2 + num_index_points * index) * (max_event_code + 1) + 0);
// Expected private indices for a count of 2 for "thinfs" with event vector {1}
expected_private_indices.push_back((2 + num_index_points * index) * (max_event_code + 1) + 1);
}
CB_ASSERT_OK_AND_ASSIGN(std::vector<uint64_t> indices,
PrepareIndexVectorForStringCountsReport(
observation, metric_def, report_def, num_cells_per_hash, num_hashes));
std::sort(indices.begin(), indices.end());
std::sort(expected_private_indices.begin(), expected_private_indices.end());
EXPECT_EQ(indices, expected_private_indices);
}
// Checks that the `max_count` bound of a StringCounts report is enforced on each
// cell of the CountMin sketch before encoding. In particular, if two strings hash
// to the same cell of the sketch, then the total count for that sketch cell must
// be clipped to `max_count`.
TEST_F(PrivacyEncoderTest, StringCountsEnforceMaxCount) {
// Use a 1x1 CountMin sketch so that all strings hash to the same cell.
size_t num_cells_per_hash = 1;
size_t num_hashes = 1;
int64_t max_count = 2;
uint32_t num_index_points = max_count + 1;
uint32_t max_event_code = 1;
// |metric_def| has 2 valid event vectors.
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::STRING);
metric_def.set_string_buffer_max(10);
MetricDefinition::MetricDimension *metric_dim = metric_def.add_metric_dimensions();
metric_dim->set_dimension("dimension 0");
metric_dim->set_max_event_code(max_event_code);
// |report_def| has 3 valid count values: {0, 1, 2}.
ReportDefinition report_def;
report_def.set_max_count(max_count);
report_def.set_num_index_points(num_index_points);
// Prepare a StringHistogramObservation with 1 IndexHistogram containing two string counts
// such that the sum of the counts is greater than `max_count`.
//
// The sum of the counts should be clipped to `max_count` when the CountMin sketch is prepared.
Observation observation;
StringHistogramObservation *string_histogram_obs = observation.mutable_string_histogram();
string_histogram_obs->add_string_hashes(util::FarmhashFingerprint("blobfs"));
string_histogram_obs->add_string_hashes(util::FarmhashFingerprint("minfs"));
IndexHistogram *histogram = string_histogram_obs->add_string_histograms();
histogram->add_event_codes(0u);
histogram->add_bucket_indices(0u);
histogram->add_bucket_counts(max_count);
histogram->add_bucket_indices(1u);
histogram->add_bucket_counts(1u);
// The general formula for an expected index is:
// (count + num_index_points * sketch cell index) * (num_event_vectors) + event_vector_index.
//
// The expected index is of this form with count = `max_count`, sketch cell index = 0, and
// event_vector_index = 0.
uint64_t expected_index = max_count * (max_event_code + 1);
CB_ASSERT_OK_AND_ASSIGN(std::vector<uint64_t> indices,
PrepareIndexVectorForStringCountsReport(
observation, metric_def, report_def, num_cells_per_hash, num_hashes));
EXPECT_EQ(indices, std::vector<uint64_t>({expected_index}));
}
TEST_F(PrivacyEncoderTest, UniqueDeviceStringCounts) {
size_t num_cells_per_hash = 2;
size_t num_hashes = 2;
uint32_t max_event_code = 1;
// |metric_def| has 2 valid event vectors.
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::STRING);
MetricDefinition::MetricDimension *metric_dim = metric_def.add_metric_dimensions();
metric_dim->set_dimension("dimension 0");
metric_dim->set_max_event_code(max_event_code);
ReportDefinition report_def;
// Prepare a StringHistogramObservation with 2 IndexHistograms with count 1 for each included
// string:
// - with event vector {0}: ("blobfs", count = 1), ("thinfs", count = 1)
// - with event vector {1}: ("thinfs", count = 1)
Observation observation;
StringHistogramObservation *string_histogram_obs = observation.mutable_string_histogram();
string_histogram_obs->add_string_hashes(util::FarmhashFingerprint("blobfs"));
string_histogram_obs->add_string_hashes(util::FarmhashFingerprint("thinfs"));
IndexHistogram *histogram_1 = string_histogram_obs->add_string_histograms();
histogram_1->add_event_codes(0u);
histogram_1->add_bucket_indices(0u);
histogram_1->add_bucket_counts(1u);
histogram_1->add_bucket_indices(1u);
histogram_1->add_bucket_counts(1u);
IndexHistogram *histogram_2 = string_histogram_obs->add_string_histograms();
histogram_2->add_event_codes(1u);
histogram_2->add_bucket_indices(1u);
histogram_2->add_bucket_counts(1u);
// The general formula for an expected index is:
// sketch cell index * num_event_vectors + event_vector_index.
// Cells with count 0 are omitted.
CountMin<uint64_t> count_min = CountMin<uint64_t>::MakeSketch(num_cells_per_hash, num_hashes);
std::vector<size_t> blobfs_indices =
count_min.GetCellIndices(string_histogram_obs->string_hashes(0));
std::vector<size_t> thinfs_indices =
count_min.GetCellIndices(string_histogram_obs->string_hashes(1));
std::vector<uint64_t> expected_private_indices;
expected_private_indices.reserve(6);
for (size_t index : blobfs_indices) {
// Expected private indices for a count of 1 for "blobfs" with event vector {0}
expected_private_indices.push_back(index * (max_event_code + 1) + 0);
}
for (size_t index : thinfs_indices) {
// Expected private indices for a count of 1 for "thinfs" with event vector {0}
expected_private_indices.push_back(index * (max_event_code + 1) + 0);
// Expected private indices for a count of 1 for "thinfs" with event vector {1}
expected_private_indices.push_back(index * (max_event_code + 1) + 1);
}
CB_ASSERT_OK_AND_ASSIGN(std::vector<uint64_t> indices,
PrepareIndexVectorForUniqueDeviceStringCountsReport(
observation, metric_def, report_def, num_cells_per_hash, num_hashes));
std::sort(indices.begin(), indices.end());
std::sort(expected_private_indices.begin(), expected_private_indices.end());
EXPECT_EQ(indices, expected_private_indices);
}
TEST_F(PrivacyEncoderTest, ObservationsFromIndicesNoIndices) {
std::vector<uint64_t> indices;
auto observations = ObservationsFromIndices(indices);
// Expect one ReportParticipationObservation.
ASSERT_EQ(observations.size(), 1u);
EXPECT_EQ(observations[0]->observation_type_case(), Observation::kReportParticipation);
}
TEST_F(PrivacyEncoderTest, ObservationsFromIndices) {
std::vector<uint64_t> indices = {1, 2, 20, 50};
auto observations = ObservationsFromIndices(indices);
// Expect 1 PrivateIndexObservation for each index, plus one ReportParticipationObservation.
ASSERT_EQ(observations.size(), indices.size() + 1);
for (size_t i = 0; i < observations.size() - 1; ++i) {
ASSERT_EQ(observations[i]->observation_type_case(), Observation::kPrivateIndex);
EXPECT_EQ(observations[i]->private_index().index(), indices[i]);
}
EXPECT_EQ(observations[observations.size() - 1]->observation_type_case(),
Observation::kReportParticipation);
}
TEST_F(PrivacyEncoderTest, MaxIndexForReportUniqueDeviceCount) {
MetricDefinition metric_def;
uint32_t max_event_code = 10;
MetricDefinition::MetricDimension *dim = metric_def.add_metric_dimensions();
dim->set_dimension("dimension 1");
dim->set_max_event_code(max_event_code);
ReportDefinition report_def;
report_def.set_report_type(ReportDefinition::UNIQUE_DEVICE_COUNTS);
auto status_or = PrivacyEncoder::MaxIndexForReport(metric_def, report_def);
ASSERT_TRUE(status_or.ok());
uint64_t expected_max_index = max_event_code;
EXPECT_EQ(status_or.ValueOrDie(), expected_max_index);
}
TEST_F(PrivacyEncoderTest, MaxIndexForReportPerDeviceInteger) {
MetricDefinition metric_def;
uint32_t max_event_code = 10;
MetricDefinition::MetricDimension *dim = metric_def.add_metric_dimensions();
dim->set_dimension("dimension 1");
dim->set_max_event_code(max_event_code);
std::vector<ReportDefinition::ReportType> per_device_integer_report_types = {
ReportDefinition::FLEETWIDE_OCCURRENCE_COUNTS, ReportDefinition::HOURLY_VALUE_NUMERIC_STATS,
ReportDefinition::UNIQUE_DEVICE_NUMERIC_STATS};
ReportDefinition report_def;
uint32_t num_index_points = 5;
report_def.set_num_index_points(num_index_points);
// There are 11 event codes and 5 index points for a total of 55 possible indices.
uint64_t expected_max_index = 54;
for (auto report_type : per_device_integer_report_types) {
report_def.set_report_type(report_type);
auto status_or = PrivacyEncoder::MaxIndexForReport(metric_def, report_def);
ASSERT_TRUE(status_or.ok());
EXPECT_EQ(expected_max_index, status_or.ValueOrDie());
}
}
TEST_F(PrivacyEncoderTest, MaxIndexForReportFleetwideMeans) {
MetricDefinition metric_def;
uint32_t max_event_code = 10;
MetricDefinition::MetricDimension *dim = metric_def.add_metric_dimensions();
dim->set_dimension("dimension 1");
dim->set_max_event_code(max_event_code);
ReportDefinition report_def;
report_def.set_report_type(ReportDefinition::FLEETWIDE_MEANS);
uint32_t num_index_points = 5;
report_def.set_num_index_points(num_index_points);
auto status_or = PrivacyEncoder::MaxIndexForReport(metric_def, report_def);
ASSERT_TRUE(status_or.ok());
uint64_t expected_max_index = (max_event_code + 1) * num_index_points * 2 - 1;
EXPECT_EQ(status_or.ValueOrDie(), expected_max_index);
}
TEST_F(PrivacyEncoderTest, MaxIndexForReportPerDeviceHistogram) {
MetricDefinition metric_def;
uint32_t max_event_code = 10;
MetricDefinition::MetricDimension *dim = metric_def.add_metric_dimensions();
dim->set_dimension("dimension 1");
dim->set_max_event_code(max_event_code);
std::vector<ReportDefinition::ReportType> per_device_histogram_report_types = {
ReportDefinition::HOURLY_VALUE_HISTOGRAMS, ReportDefinition::UNIQUE_DEVICE_HISTOGRAMS};
IntegerBuckets linear_buckets;
linear_buckets.mutable_linear()->set_floor(0);
linear_buckets.mutable_linear()->set_num_buckets(3);
IntegerBuckets exp_buckets;
exp_buckets.mutable_exponential()->set_floor(0);
exp_buckets.mutable_exponential()->set_num_buckets(3);
std::vector<IntegerBuckets> bucket_variants = {linear_buckets, exp_buckets};
ReportDefinition report_def;
// There are 11 event codes and 5 histogram buckets (3 registered + 1 underflow + 1 overflow) for
// a total of 55 possible indices.
uint64_t expected_max_index = 54;
for (const auto report_type : per_device_histogram_report_types) {
for (const auto &int_buckets : bucket_variants) {
report_def.set_report_type(report_type);
*report_def.mutable_int_buckets() = int_buckets;
auto max_index = PrivacyEncoder::MaxIndexForReport(metric_def, report_def);
ASSERT_TRUE(max_index.ok()) << "Failed to get max index with status "
<< max_index.status().error_code() << ", "
<< max_index.status().error_message();
EXPECT_EQ(expected_max_index, max_index.ValueOrDie());
}
}
}
TEST_F(PrivacyEncoderTest, MaxIndexForReportFleetwideHistogramsIntegerMetric) {
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::INTEGER);
uint32_t max_event_code = 10;
MetricDefinition::MetricDimension *dim = metric_def.add_metric_dimensions();
dim->set_dimension("dimension 1");
dim->set_max_event_code(max_event_code);
IntegerBuckets linear_buckets;
linear_buckets.mutable_linear()->set_floor(0);
linear_buckets.mutable_linear()->set_num_buckets(3);
IntegerBuckets exp_buckets;
exp_buckets.mutable_exponential()->set_floor(0);
exp_buckets.mutable_exponential()->set_num_buckets(3);
std::vector<IntegerBuckets> bucket_variants = {linear_buckets, exp_buckets};
ReportDefinition report_def;
report_def.set_report_type(ReportDefinition::FLEETWIDE_HISTOGRAMS);
report_def.set_num_index_points(6);
// There are 11 event codes, 5 histogram buckets (3 registered + 1 underflow + 1 overflow), and 6
// numeric index points, for a total of 330 possible indices.
uint64_t expected_max_index = 329;
for (const auto &int_buckets : bucket_variants) {
*report_def.mutable_int_buckets() = int_buckets;
auto max_index = PrivacyEncoder::MaxIndexForReport(metric_def, report_def);
ASSERT_TRUE(max_index.ok()) << "Failed to get max index with status "
<< max_index.status().error_code() << ", "
<< max_index.status().error_message();
EXPECT_EQ(expected_max_index, max_index.ValueOrDie());
}
}
TEST_F(PrivacyEncoderTest, MaxIndexForReportFleetwideHistogramsIntegerHistogramMetric) {
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::INTEGER_HISTOGRAM);
uint32_t max_event_code = 10;
MetricDefinition::MetricDimension *dim = metric_def.add_metric_dimensions();
dim->set_dimension("dimension 1");
dim->set_max_event_code(max_event_code);
IntegerBuckets linear_buckets;
linear_buckets.mutable_linear()->set_floor(0);
linear_buckets.mutable_linear()->set_num_buckets(3);
IntegerBuckets exp_buckets;
exp_buckets.mutable_exponential()->set_floor(0);
exp_buckets.mutable_exponential()->set_num_buckets(3);
std::vector<IntegerBuckets> bucket_variants = {linear_buckets, exp_buckets};
ReportDefinition report_def;
report_def.set_report_type(ReportDefinition::FLEETWIDE_HISTOGRAMS);
report_def.set_num_index_points(6);
// There are 11 event codes, 5 histogram buckets (3 registered + 1 underflow + 1 overflow), and 6
// numeric index points, for a total of 330 possible indices.
uint64_t expected_max_index = 329;
for (const auto &int_buckets : bucket_variants) {
*metric_def.mutable_int_buckets() = int_buckets;
auto max_index = PrivacyEncoder::MaxIndexForReport(metric_def, report_def);
ASSERT_TRUE(max_index.ok()) << "Failed to get max index with status "
<< max_index.status().error_code() << ", "
<< max_index.status().error_message();
EXPECT_EQ(expected_max_index, max_index.ValueOrDie());
}
}
TEST_F(PrivacyEncoderTest, MaxIndexForReportStringCounts) {
uint64_t max_event_code = 9;
uint32_t num_index_points = 6;
// |metric_def| has 10 valid event vectors.
MetricDefinition metric_def;
metric_def.set_metric_type(MetricDefinition::STRING);
MetricDefinition::MetricDimension *dim = metric_def.add_metric_dimensions();
dim->set_dimension("dimension1");
dim->set_max_event_code(max_event_code);
// |report_def| has 6 valid count values.
ReportDefinition report_def;
report_def.set_report_type(ReportDefinition::STRING_COUNTS);
report_def.set_num_index_points(num_index_points);
CB_ASSERT_OK_AND_ASSIGN(size_t num_cells_per_hash,
PrivacyEncoder::GetNumCountMinCellsPerHash(report_def));
CB_ASSERT_OK_AND_ASSIGN(size_t num_hashes, PrivacyEncoder::GetNumCountMinHashes(report_def));
// The expected max index is:
// (# of valid event vectors) * (# valid count values) * (size of count min sketch) - 1
// = 10 * 6 * 50 - 1 = 2999.
uint64_t expected_max_index =
(max_event_code + 1) * num_index_points * num_cells_per_hash * num_hashes - 1;
auto max_index = PrivacyEncoder::MaxIndexForReport(metric_def, report_def);
ASSERT_TRUE(max_index.ok()) << "Failed to get max index with status "
<< max_index.status().error_code() << ", "
<< max_index.status().error_message();
EXPECT_EQ(expected_max_index, max_index.ValueOrDie());
}
TEST_F(PrivacyEncoderTest, MaxIndexForReportUnimplemented) {
MetricDefinition metric_def;
ReportDefinition report_def;
report_def.set_report_type(ReportDefinition::CUSTOM_RAW_DUMP);
auto status_or = PrivacyEncoder::MaxIndexForReport(metric_def, report_def);
ASSERT_FALSE(status_or.ok());
EXPECT_EQ(status_or.status().error_code(), StatusCode::UNIMPLEMENTED);
}
TEST_F(PrivacyEncoderTest, AddNoisePOutOfRange) {
MetricDefinition metric_def;
ReportDefinition report_def;
report_def.set_report_type(ReportDefinition::UNIQUE_DEVICE_COUNTS);
std::vector<uint64_t> indices;
// Check that a negative value of p is rejected.
double p = -1.0;
report_def.set_prob_bit_flip(p);
auto status_or = AddNoise(indices, metric_def, report_def);
ASSERT_FALSE(status_or.ok());
EXPECT_EQ(status_or.status().error_code(), StatusCode::INVALID_ARGUMENT);
// Check that a value of p which is greater than 1 is rejected.
p = 2.0;
report_def.set_prob_bit_flip(p);
status_or = AddNoise(indices, metric_def, report_def);
ASSERT_FALSE(status_or.ok());
EXPECT_EQ(status_or.status().error_code(), StatusCode::INVALID_ARGUMENT);
}
TEST_F(PrivacyEncoderTest, AddNoiseIndexOutOfRange) {
MetricDefinition metric_def;
ReportDefinition report_def;
uint32_t max_event_code = 10;
report_def.set_report_type(ReportDefinition::UNIQUE_DEVICE_COUNTS);
MetricDefinition::MetricDimension *dim = metric_def.add_metric_dimensions();
dim->set_dimension("dimension 1");
dim->set_max_event_code(max_event_code);
std::vector<uint64_t> indices = {max_event_code + 1};
double p = 1.0;
report_def.set_prob_bit_flip(p);
auto status_or = AddNoise(indices, metric_def, report_def);
ASSERT_FALSE(status_or.ok());
EXPECT_EQ(status_or.status().error_code(), StatusCode::INVALID_ARGUMENT);
}
TEST_F(PrivacyEncoderTest, AddNoise) {
MetricDefinition metric_def;
ReportDefinition report_def;
uint32_t max_event_code = 10;
report_def.set_report_type(ReportDefinition::UNIQUE_DEVICE_COUNTS);
MetricDefinition::MetricDimension *dim = metric_def.add_metric_dimensions();
dim->set_dimension("dimension 1");
dim->set_max_event_code(max_event_code);
std::vector<uint64_t> indices = {1, 2, 3};
double p = 1.0;
report_def.set_prob_bit_flip(p);
auto status_or = AddNoise(indices, metric_def, report_def);
EXPECT_TRUE(status_or.ok());
}
TEST_F(PrivacyEncoderTest, ClipValue) {
int64_t min_value = 25;
int64_t max_value = 100;
ReportDefinition report_def;
report_def.set_min_value(min_value);
report_def.set_max_value(max_value);
int64_t value = 50;
EXPECT_EQ(ClipValue(value, report_def), value);
value = 10;
EXPECT_EQ(ClipValue(value, report_def), min_value);
value = 150;
EXPECT_EQ(ClipValue(value, report_def), max_value);
}
TEST_F(PrivacyEncoderTest, ClipCount) {
uint64_t max_count = 100;
ReportDefinition report_def;
report_def.set_max_count(max_count);
uint64_t count = 50;
EXPECT_EQ(ClipCount(count, report_def), count);
count = 150;
EXPECT_EQ(ClipCount(count, report_def), max_count);
}
} // namespace cobalt::logger