blob: 003557d96b44abb48749dde727f3f1c89154a0ef [file] [log] [blame]
#include "src/algorithms/experimental/integer_encoder.h"
#include "src/algorithms/experimental/distributions.h"
#include "src/algorithms/experimental/random.h"
#include "src/algorithms/experimental/randomized_response.h"
#include "src/registry/buckets_config.h"
namespace cobalt {
using config::IntegerBucketConfig;
namespace {
IntegerBuckets MakeIntegerBuckets(int64_t min_int, uint32_t num_buckets, uint32_t step_size) {
IntegerBuckets buckets;
auto linear_buckets = buckets.mutable_linear();
linear_buckets->set_floor(min_int);
linear_buckets->set_num_buckets(num_buckets);
linear_buckets->set_step_size(step_size);
return buckets;
}
double GetMultiplierSum(const std::vector<double>& multipliers) {
double sum = 0;
for (const double m : multipliers) {
sum += m;
}
return sum;
}
} // namespace
IntegerEncoder::IntegerEncoder(BitGeneratorInterface<uint32_t>* gen, const IntegerBuckets& buckets,
double p, const RoundingStrategy& rounding_strategy)
: gen_(gen), rounding_strategy_(rounding_strategy) {
bucket_config_ = IntegerBucketConfig::CreateFromProto(buckets);
randomizer_ = std::make_unique<ResponseRandomizer>(gen, bucket_config_->OverflowBucket(), p);
}
IntegerEncoder::IntegerEncoder(BitGeneratorInterface<uint32_t>* gen, int64_t min_int,
int64_t max_int, uint32_t partitions, double p,
const RoundingStrategy& rounding_strategy)
: gen_(gen), rounding_strategy_(rounding_strategy) {
int64_t step_size = (max_int - min_int) / partitions;
auto buckets = MakeIntegerBuckets(min_int, partitions, step_size);
bucket_config_ = IntegerBucketConfig::CreateFromProto(buckets);
randomizer_ = std::make_unique<ResponseRandomizer>(gen, bucket_config_->OverflowBucket(), p);
}
uint32_t IntegerEncoder::Encode(int64_t val) {
uint32_t bucket_index = bucket_config_->BucketIndex(val);
if (rounding_strategy_ == IntegerEncoder::kRandom) {
bucket_index = RandomRound(val, bucket_index);
}
return randomizer_->Encode(bucket_index);
}
uint32_t IntegerEncoder::RandomRound(int64_t val, uint32_t bucket_index) {
if (bucket_index == bucket_config_->UnderflowBucket() ||
bucket_index == bucket_config_->OverflowBucket()) {
return bucket_index;
}
int64_t lower_bound = bucket_config_->BucketFloor(bucket_index);
int64_t width = (bucket_config_->BucketFloor(bucket_index + 1) - lower_bound);
double pos = static_cast<double>(val - lower_bound) / width;
return bucket_index += BernoulliDistribution(gen_, pos).Sample();
}
IntegerSumEstimator::IntegerSumEstimator(const IntegerBuckets& buckets, double p,
const OutOfBoundsStrategy& underflow_strategy,
const OutOfBoundsStrategy& overflow_strategy)
: p_(p) {
bucket_config_ = IntegerBucketConfig::CreateFromProto(buckets);
frequency_estimator_ = std::make_unique<FrequencyEstimator>(bucket_config_->OverflowBucket());
GetBucketMultipliers(&multipliers_, underflow_strategy, overflow_strategy);
multiplier_sum_ = GetMultiplierSum(multipliers_);
}
IntegerSumEstimator::IntegerSumEstimator(int64_t min_int, int64_t max_int, uint32_t partitions,
double p, const OutOfBoundsStrategy& underflow_strategy,
const OutOfBoundsStrategy& overflow_strategy)
: p_(p) {
int64_t step_size = (max_int - min_int) / partitions;
auto buckets = MakeIntegerBuckets(min_int, partitions, step_size);
bucket_config_ = IntegerBucketConfig::CreateFromProto(buckets);
frequency_estimator_ = std::make_unique<FrequencyEstimator>(bucket_config_->OverflowBucket());
GetBucketMultipliers(&multipliers_, underflow_strategy, overflow_strategy);
multiplier_sum_ = GetMultiplierSum(multipliers_);
}
std::tuple<double, uint64_t, uint64_t> IntegerSumEstimator::ComputeSum(
const std::vector<uint32_t>& encoded_vals) {
std::vector<uint64_t> frequencies = frequency_estimator_->GetFrequencies(encoded_vals);
double raw_sum = 0.0;
for (uint32_t index = 0; index < frequencies.size(); index++) {
raw_sum += frequencies[index] * multipliers_[index];
}
auto underflow_count = frequencies[bucket_config_->UnderflowBucket()];
auto overflow_count = frequencies[bucket_config_->OverflowBucket()];
return {DebiasSum(raw_sum, encoded_vals.size()), underflow_count, overflow_count};
}
void IntegerSumEstimator::GetBucketMultipliers(std::vector<double>* multipliers,
const OutOfBoundsStrategy& underflow_strategy,
const OutOfBoundsStrategy& overflow_strategy) {
uint32_t underflow_index = bucket_config_->UnderflowBucket();
uint32_t overflow_index = bucket_config_->OverflowBucket();
multipliers->resize(overflow_index + 1);
switch (underflow_strategy) {
case kClamp: {
(*multipliers)[underflow_index] = bucket_config_->BucketFloor(1);
break;
}
case kDiscard:
default: {
(*multipliers)[underflow_index] = 0;
}
}
switch (overflow_strategy) {
case kClamp: {
(*multipliers)[overflow_index] = bucket_config_->BucketFloor(overflow_index);
break;
}
case kDiscard:
default: {
(*multipliers)[overflow_index] = 0;
}
}
for (uint32_t index = 1; index < overflow_index; index++) {
(*multipliers)[index] = bucket_config_->BucketFloor(index);
}
}
double IntegerSumEstimator::DebiasSum(int64_t raw_sum, uint64_t input_size) {
double coeff = (static_cast<double>(input_size) * p_) / (bucket_config_->OverflowBucket() + 1);
return static_cast<double>(raw_sum) - (coeff * multiplier_sum_);
}
} // namespace cobalt