blob: cb682a101a0811ab60c9cc97858edee4ed46019d [file] [log] [blame]
#include "src/algorithms/experimental/archived/bucket_wise_histogram_encoder.h"
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "src/algorithms/experimental/archived/krr_integer_encoder.h"
#include "src/algorithms/random/random.h"
using testing::DoubleNear;
using testing::ElementsAre;
using testing::Pointwise;
namespace cobalt {
class ArchivedBucketWiseHistogramEncoderTest : public ::testing::Test {
protected:
void SetUp() override { gen_ = std::make_unique<RandomNumberGenerator>(0); }
RandomNumberGenerator* GetGenerator() { return gen_.get(); }
// Create an ArchivedKrrIntegerEncoder for the range [0, |max_count|] where the number of
// partitions is equal to |max_count|, and where the probability of a data-independent response is
// 0. Both the initial fixed-point encoding and the final output of the encoder are deterministic
// in this case.
std::unique_ptr<ArchivedKrrIntegerEncoder> MakeDeterministicIntegerEncoder(int64_t max_count) {
return std::make_unique<ArchivedKrrIntegerEncoder>(GetGenerator(), /*min_int=*/0,
/*max_int=*/max_count,
/*partitions=*/max_count, Probability(0.0));
}
// Create an ArchivedKrrIntegerSumEstimator for an ArchivedKrrIntegerEncoder with range [0,
// |max_count|] and where the number of partitions is equal to |max_count|.
static std::unique_ptr<ArchivedKrrIntegerSumEstimator> MakeIntegerSumEstimator(int64_t max_count,
Probability p) {
return std::make_unique<ArchivedKrrIntegerSumEstimator>(/*min_int=*/0, /*max_int=*/max_count,
/*partitions=*/max_count, p);
}
private:
std::unique_ptr<RandomNumberGenerator> gen_;
};
// Bucket-wise encoding with p = 0. The encoder should snap negative counts to 0 and overflow
// counts to |max_count|, but otherwise leave counts unchanged.
TEST_F(ArchivedBucketWiseHistogramEncoderTest, Encode) {
// Parameters for encoding bucket counts
int64_t max_count = 5;
auto integer_encoder = MakeDeterministicIntegerEncoder(max_count);
// Number of buckets in the top-level histogram
uint32_t num_buckets = 10;
auto histogram_encoder = ArchivedBucketWiseHistogramEncoder(num_buckets, integer_encoder.get());
std::vector<int64_t> histogram = {-1, 0, 1, 2, 3, 4, 5, 6, 0, 1};
std::vector<uint32_t> encoded = histogram_encoder.Encode(histogram);
EXPECT_THAT(encoded, ElementsAre(0u, 0u, 1u, 2u, 3u, 4u, 5u, 5u, 0u, 1u));
}
// Bucket-wise encoding with p > 0. The expected result depends on the seed passed to |gen_|.
TEST_F(ArchivedBucketWiseHistogramEncoderTest, EncodeNonzeroP) {
// Parameters for encoding occurrences
int64_t max_count = 5;
Probability p(0.1);
auto integer_encoder =
std::make_unique<ArchivedKrrIntegerEncoder>(GetGenerator(), 0, max_count, max_count, p);
// Number of buckets in the top-level histogram
uint32_t num_buckets = 10;
auto histogram_encoder = ArchivedBucketWiseHistogramEncoder(num_buckets, integer_encoder.get());
std::vector<int64_t> histogram = {0, 0, 1, 2, 3, 4, 5, 6, 0, 1};
std::vector<uint32_t> encoded = histogram_encoder.Encode(histogram);
EXPECT_THAT(encoded, ElementsAre(0u, 0u, 1u, 2u, 3u, 4u, 5u, 5u, 0u, 1u));
}
TEST_F(ArchivedBucketWiseHistogramEncoderTest, EstimateSum) {
// Parameters for encoding bucket counts
int64_t max_count = 5;
Probability p(0.0);
auto integer_sum_estimator = MakeIntegerSumEstimator(max_count, p);
// Number of buckets in the top-level histogram
uint32_t num_buckets = 10;
auto histogram_sum_estimator =
ArchivedBucketWiseHistogramSumEstimator(num_buckets, integer_sum_estimator.get());
// Since |partitions| is 5, the elements of the encoded histograms have values between 0 and 5
// inclusive.
std::vector<uint32_t> encoded_histogram_1 = {0, 1, 2, 3, 4, 5, 4, 3, 2, 1};
std::vector<uint32_t> encoded_histogram_2 = {1, 2, 3, 4, 5, 5, 4, 3, 2, 1};
std::vector<uint32_t> encoded_histogram_3 = {4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
std::vector<std::vector<uint32_t>> encoded_histograms = {encoded_histogram_1, encoded_histogram_2,
encoded_histogram_3};
auto estimate = histogram_sum_estimator.ComputeSum(encoded_histograms);
std::vector<double> expected_sums = {5.0, 7.0, 9.0, 11.0, 13.0, 14.0, 12.0, 10.0, 8.0, 6.0};
EXPECT_THAT(estimate, Pointwise(DoubleNear(0.001), expected_sums));
}
TEST_F(ArchivedBucketWiseHistogramEncoderTest, EstimateSumNonzeroP) {
// Parameters for encoding bucket counts
int64_t max_count = 5;
Probability p(0.1);
auto integer_sum_estimator = MakeIntegerSumEstimator(max_count, p);
// Number of buckets in the top-level histogram
uint32_t num_buckets = 10;
auto histogram_sum_estimator =
ArchivedBucketWiseHistogramSumEstimator(num_buckets, integer_sum_estimator.get());
// Since |max_count| is 5 and this is equal to the number of partitions, the elements of the
// encoded histograms have values between 0 and 5 inclusive.
std::vector<uint32_t> encoded_histogram_1 = {0, 1, 2, 3, 4, 5, 4, 3, 2, 1};
std::vector<uint32_t> encoded_histogram_2 = {1, 2, 3, 4, 5, 5, 4, 3, 2, 1};
std::vector<uint32_t> encoded_histogram_3 = {4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
std::vector<std::vector<uint32_t>> encoded_histograms = {encoded_histogram_1, encoded_histogram_2,
encoded_histogram_3};
auto estimate = histogram_sum_estimator.ComputeSum(encoded_histograms);
std::vector<double> expected_sums = {4.722, 6.944, 9.166, 11.388, 13.611,
14.722, 12.500, 10.277, 8.0555, 5.833};
EXPECT_THAT(estimate, Pointwise(DoubleNear(0.001), expected_sums));
}
} // namespace cobalt