| #include "src/algorithms/experimental/archived/bucket_wise_histogram_encoder.h" |
| |
| #include <gmock/gmock.h> |
| #include <gtest/gtest.h> |
| |
| #include "src/algorithms/experimental/archived/krr_integer_encoder.h" |
| #include "src/algorithms/random/random.h" |
| |
| using testing::DoubleNear; |
| using testing::ElementsAre; |
| using testing::Pointwise; |
| |
| namespace cobalt { |
| |
| class ArchivedBucketWiseHistogramEncoderTest : public ::testing::Test { |
| protected: |
| void SetUp() override { gen_ = std::make_unique<RandomNumberGenerator>(0); } |
| |
| RandomNumberGenerator* GetGenerator() { return gen_.get(); } |
| |
| // Create an ArchivedKrrIntegerEncoder for the range [0, |max_count|] where the number of |
| // partitions is equal to |max_count|, and where the probability of a data-independent response is |
| // 0. Both the initial fixed-point encoding and the final output of the encoder are deterministic |
| // in this case. |
| std::unique_ptr<ArchivedKrrIntegerEncoder> MakeDeterministicIntegerEncoder(int64_t max_count) { |
| return std::make_unique<ArchivedKrrIntegerEncoder>(GetGenerator(), /*min_int=*/0, |
| /*max_int=*/max_count, |
| /*partitions=*/max_count, Probability(0.0)); |
| } |
| |
| // Create an ArchivedKrrIntegerSumEstimator for an ArchivedKrrIntegerEncoder with range [0, |
| // |max_count|] and where the number of partitions is equal to |max_count|. |
| static std::unique_ptr<ArchivedKrrIntegerSumEstimator> MakeIntegerSumEstimator(int64_t max_count, |
| Probability p) { |
| return std::make_unique<ArchivedKrrIntegerSumEstimator>(/*min_int=*/0, /*max_int=*/max_count, |
| /*partitions=*/max_count, p); |
| } |
| |
| private: |
| std::unique_ptr<RandomNumberGenerator> gen_; |
| }; |
| |
| // Bucket-wise encoding with p = 0. The encoder should snap negative counts to 0 and overflow |
| // counts to |max_count|, but otherwise leave counts unchanged. |
| TEST_F(ArchivedBucketWiseHistogramEncoderTest, Encode) { |
| // Parameters for encoding bucket counts |
| int64_t max_count = 5; |
| auto integer_encoder = MakeDeterministicIntegerEncoder(max_count); |
| // Number of buckets in the top-level histogram |
| uint32_t num_buckets = 10; |
| auto histogram_encoder = ArchivedBucketWiseHistogramEncoder(num_buckets, integer_encoder.get()); |
| |
| std::vector<int64_t> histogram = {-1, 0, 1, 2, 3, 4, 5, 6, 0, 1}; |
| std::vector<uint32_t> encoded = histogram_encoder.Encode(histogram); |
| EXPECT_THAT(encoded, ElementsAre(0u, 0u, 1u, 2u, 3u, 4u, 5u, 5u, 0u, 1u)); |
| } |
| |
| // Bucket-wise encoding with p > 0. The expected result depends on the seed passed to |gen_|. |
| TEST_F(ArchivedBucketWiseHistogramEncoderTest, EncodeNonzeroP) { |
| // Parameters for encoding occurrences |
| int64_t max_count = 5; |
| Probability p(0.1); |
| auto integer_encoder = |
| std::make_unique<ArchivedKrrIntegerEncoder>(GetGenerator(), 0, max_count, max_count, p); |
| // Number of buckets in the top-level histogram |
| uint32_t num_buckets = 10; |
| auto histogram_encoder = ArchivedBucketWiseHistogramEncoder(num_buckets, integer_encoder.get()); |
| |
| std::vector<int64_t> histogram = {0, 0, 1, 2, 3, 4, 5, 6, 0, 1}; |
| std::vector<uint32_t> encoded = histogram_encoder.Encode(histogram); |
| EXPECT_THAT(encoded, ElementsAre(0u, 0u, 1u, 2u, 3u, 4u, 5u, 5u, 0u, 1u)); |
| } |
| |
| TEST_F(ArchivedBucketWiseHistogramEncoderTest, EstimateSum) { |
| // Parameters for encoding bucket counts |
| int64_t max_count = 5; |
| Probability p(0.0); |
| auto integer_sum_estimator = MakeIntegerSumEstimator(max_count, p); |
| // Number of buckets in the top-level histogram |
| uint32_t num_buckets = 10; |
| auto histogram_sum_estimator = |
| ArchivedBucketWiseHistogramSumEstimator(num_buckets, integer_sum_estimator.get()); |
| |
| // Since |partitions| is 5, the elements of the encoded histograms have values between 0 and 5 |
| // inclusive. |
| std::vector<uint32_t> encoded_histogram_1 = {0, 1, 2, 3, 4, 5, 4, 3, 2, 1}; |
| std::vector<uint32_t> encoded_histogram_2 = {1, 2, 3, 4, 5, 5, 4, 3, 2, 1}; |
| std::vector<uint32_t> encoded_histogram_3 = {4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; |
| |
| std::vector<std::vector<uint32_t>> encoded_histograms = {encoded_histogram_1, encoded_histogram_2, |
| encoded_histogram_3}; |
| |
| auto estimate = histogram_sum_estimator.ComputeSum(encoded_histograms); |
| std::vector<double> expected_sums = {5.0, 7.0, 9.0, 11.0, 13.0, 14.0, 12.0, 10.0, 8.0, 6.0}; |
| EXPECT_THAT(estimate, Pointwise(DoubleNear(0.001), expected_sums)); |
| } |
| |
| TEST_F(ArchivedBucketWiseHistogramEncoderTest, EstimateSumNonzeroP) { |
| // Parameters for encoding bucket counts |
| int64_t max_count = 5; |
| Probability p(0.1); |
| auto integer_sum_estimator = MakeIntegerSumEstimator(max_count, p); |
| // Number of buckets in the top-level histogram |
| uint32_t num_buckets = 10; |
| auto histogram_sum_estimator = |
| ArchivedBucketWiseHistogramSumEstimator(num_buckets, integer_sum_estimator.get()); |
| |
| // Since |max_count| is 5 and this is equal to the number of partitions, the elements of the |
| // encoded histograms have values between 0 and 5 inclusive. |
| std::vector<uint32_t> encoded_histogram_1 = {0, 1, 2, 3, 4, 5, 4, 3, 2, 1}; |
| std::vector<uint32_t> encoded_histogram_2 = {1, 2, 3, 4, 5, 5, 4, 3, 2, 1}; |
| std::vector<uint32_t> encoded_histogram_3 = {4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; |
| |
| std::vector<std::vector<uint32_t>> encoded_histograms = {encoded_histogram_1, encoded_histogram_2, |
| encoded_histogram_3}; |
| |
| auto estimate = histogram_sum_estimator.ComputeSum(encoded_histograms); |
| std::vector<double> expected_sums = {4.722, 6.944, 9.166, 11.388, 13.611, |
| 14.722, 12.500, 10.277, 8.0555, 5.833}; |
| EXPECT_THAT(estimate, Pointwise(DoubleNear(0.001), expected_sums)); |
| } |
| |
| } // namespace cobalt |