src/algorithms/experimental/integer_encoder.cc - cobalt - Git at Google

 #include "src/algorithms/experimental/integer_encoder.h"

 #include "src/algorithms/experimental/distributions.h"
 #include "src/algorithms/experimental/random.h"
 #include "src/algorithms/experimental/randomized_response.h"
 #include "src/registry/buckets_config.h"

 namespace cobalt {

 using config::IntegerBucketConfig;

 namespace {

 IntegerBuckets MakeIntegerBuckets(int64_t min_int, uint32_t num_buckets, uint32_t step_size) {
   IntegerBuckets buckets;
   auto linear_buckets = buckets.mutable_linear();
   linear_buckets->set_floor(min_int);
   linear_buckets->set_num_buckets(num_buckets);
   linear_buckets->set_step_size(step_size);
   return buckets;
 }

 double GetMultiplierSum(const std::vector<double>& multipliers) {
   double sum = 0;
   for (const double m : multipliers) {
     sum += m;
   }
   return sum;
 }

 }  // namespace

 IntegerEncoder::IntegerEncoder(BitGeneratorInterface<uint32_t>* gen, const IntegerBuckets& buckets,
                                double p, const RoundingStrategy& rounding_strategy)
     : gen_(gen), rounding_strategy_(rounding_strategy) {
   bucket_config_ = IntegerBucketConfig::CreateFromProto(buckets);
   randomizer_ = std::make_unique<ResponseRandomizer>(gen, bucket_config_->OverflowBucket(), p);
 }

 IntegerEncoder::IntegerEncoder(BitGeneratorInterface<uint32_t>* gen, int64_t min_int,
                                int64_t max_int, uint32_t partitions, double p,
                                const RoundingStrategy& rounding_strategy)
     : gen_(gen), rounding_strategy_(rounding_strategy) {
   int64_t step_size = (max_int - min_int) / partitions;
   auto buckets = MakeIntegerBuckets(min_int, partitions, step_size);
   bucket_config_ = IntegerBucketConfig::CreateFromProto(buckets);
   randomizer_ = std::make_unique<ResponseRandomizer>(gen, bucket_config_->OverflowBucket(), p);
 }

 uint32_t IntegerEncoder::Encode(int64_t val) {
   uint32_t bucket_index = bucket_config_->BucketIndex(val);
   if (rounding_strategy_ == IntegerEncoder::kRandom) {
     bucket_index = RandomRound(val, bucket_index);
   }
   return randomizer_->Encode(bucket_index);
 }

 uint32_t IntegerEncoder::RandomRound(int64_t val, uint32_t bucket_index) {
   if (bucket_index == bucket_config_->UnderflowBucket() ||
       bucket_index == bucket_config_->OverflowBucket()) {
     return bucket_index;
   }
   int64_t lower_bound = bucket_config_->BucketFloor(bucket_index);
   int64_t width = (bucket_config_->BucketFloor(bucket_index + 1) - lower_bound);
   double pos = static_cast<double>(val - lower_bound) / width;
   return bucket_index += BernoulliDistribution(gen_, pos).Sample();
 }

 IntegerSumEstimator::IntegerSumEstimator(const IntegerBuckets& buckets, double p,
                                          const OutOfBoundsStrategy& underflow_strategy,
                                          const OutOfBoundsStrategy& overflow_strategy)
     : p_(p) {
   bucket_config_ = IntegerBucketConfig::CreateFromProto(buckets);
   frequency_estimator_ = std::make_unique<FrequencyEstimator>(bucket_config_->OverflowBucket());
   GetBucketMultipliers(&multipliers_, underflow_strategy, overflow_strategy);
   multiplier_sum_ = GetMultiplierSum(multipliers_);
 }

 IntegerSumEstimator::IntegerSumEstimator(int64_t min_int, int64_t max_int, uint32_t partitions,
                                          double p, const OutOfBoundsStrategy& underflow_strategy,
                                          const OutOfBoundsStrategy& overflow_strategy)
     : p_(p) {
   int64_t step_size = (max_int - min_int) / partitions;
   auto buckets = MakeIntegerBuckets(min_int, partitions, step_size);
   bucket_config_ = IntegerBucketConfig::CreateFromProto(buckets);
   frequency_estimator_ = std::make_unique<FrequencyEstimator>(bucket_config_->OverflowBucket());
   GetBucketMultipliers(&multipliers_, underflow_strategy, overflow_strategy);
   multiplier_sum_ = GetMultiplierSum(multipliers_);
 }

 std::tuple<double, uint64_t, uint64_t> IntegerSumEstimator::ComputeSum(
     const std::vector<uint32_t>& encoded_vals) {
   std::vector<uint64_t> frequencies = frequency_estimator_->GetFrequencies(encoded_vals);
   double raw_sum = 0.0;
   for (uint32_t index = 0; index < frequencies.size(); index++) {
     raw_sum += frequencies[index] * multipliers_[index];
   }
   auto underflow_count = frequencies[bucket_config_->UnderflowBucket()];
   auto overflow_count = frequencies[bucket_config_->OverflowBucket()];
   return {DebiasSum(raw_sum, encoded_vals.size()), underflow_count, overflow_count};
 }

 void IntegerSumEstimator::GetBucketMultipliers(std::vector<double>* multipliers,
                                                const OutOfBoundsStrategy& underflow_strategy,
                                                const OutOfBoundsStrategy& overflow_strategy) {
   uint32_t underflow_index = bucket_config_->UnderflowBucket();
   uint32_t overflow_index = bucket_config_->OverflowBucket();
   multipliers->resize(overflow_index + 1);
   switch (underflow_strategy) {
     case kClamp: {
       (*multipliers)[underflow_index] = bucket_config_->BucketFloor(1);
       break;
     }
     case kDiscard:
     default: {
       (*multipliers)[underflow_index] = 0;
     }
   }
   switch (overflow_strategy) {
     case kClamp: {
       (*multipliers)[overflow_index] = bucket_config_->BucketFloor(overflow_index);
       break;
     }
     case kDiscard:
     default: {
       (*multipliers)[overflow_index] = 0;
     }
   }
   for (uint32_t index = 1; index < overflow_index; index++) {
     (*multipliers)[index] = bucket_config_->BucketFloor(index);
   }
 }

 double IntegerSumEstimator::DebiasSum(int64_t raw_sum, uint64_t input_size) {
   double coeff = (static_cast<double>(input_size) * p_) / (bucket_config_->OverflowBucket() + 1);
   return static_cast<double>(raw_sum) - (coeff * multiplier_sum_);
 }

 }  // namespace cobalt
	#include "src/algorithms/experimental/integer_encoder.h"

	#include "src/algorithms/experimental/distributions.h"
	#include "src/algorithms/experimental/random.h"
	#include "src/algorithms/experimental/randomized_response.h"
	#include "src/registry/buckets_config.h"

	namespace cobalt {

	using config::IntegerBucketConfig;

	namespace {

	IntegerBuckets MakeIntegerBuckets(int64_t min_int, uint32_t num_buckets, uint32_t step_size) {
	IntegerBuckets buckets;
	auto linear_buckets = buckets.mutable_linear();
	linear_buckets->set_floor(min_int);
	linear_buckets->set_num_buckets(num_buckets);
	linear_buckets->set_step_size(step_size);
	return buckets;
	}

	double GetMultiplierSum(const std::vector<double>& multipliers) {
	double sum = 0;
	for (const double m : multipliers) {
	sum += m;
	}
	return sum;
	}

	} // namespace

	IntegerEncoder::IntegerEncoder(BitGeneratorInterface<uint32_t>* gen, const IntegerBuckets& buckets,
	double p, const RoundingStrategy& rounding_strategy)
	: gen_(gen), rounding_strategy_(rounding_strategy) {
	bucket_config_ = IntegerBucketConfig::CreateFromProto(buckets);
	randomizer_ = std::make_unique<ResponseRandomizer>(gen, bucket_config_->OverflowBucket(), p);
	}

	IntegerEncoder::IntegerEncoder(BitGeneratorInterface<uint32_t>* gen, int64_t min_int,
	int64_t max_int, uint32_t partitions, double p,
	const RoundingStrategy& rounding_strategy)
	: gen_(gen), rounding_strategy_(rounding_strategy) {
	int64_t step_size = (max_int - min_int) / partitions;
	auto buckets = MakeIntegerBuckets(min_int, partitions, step_size);
	bucket_config_ = IntegerBucketConfig::CreateFromProto(buckets);
	randomizer_ = std::make_unique<ResponseRandomizer>(gen, bucket_config_->OverflowBucket(), p);
	}

	uint32_t IntegerEncoder::Encode(int64_t val) {
	uint32_t bucket_index = bucket_config_->BucketIndex(val);
	if (rounding_strategy_ == IntegerEncoder::kRandom) {
	bucket_index = RandomRound(val, bucket_index);
	}
	return randomizer_->Encode(bucket_index);
	}

	uint32_t IntegerEncoder::RandomRound(int64_t val, uint32_t bucket_index) {
	if (bucket_index == bucket_config_->UnderflowBucket() \|\|
	bucket_index == bucket_config_->OverflowBucket()) {
	return bucket_index;
	}
	int64_t lower_bound = bucket_config_->BucketFloor(bucket_index);
	int64_t width = (bucket_config_->BucketFloor(bucket_index + 1) - lower_bound);
	double pos = static_cast<double>(val - lower_bound) / width;
	return bucket_index += BernoulliDistribution(gen_, pos).Sample();
	}

	IntegerSumEstimator::IntegerSumEstimator(const IntegerBuckets& buckets, double p,
	const OutOfBoundsStrategy& underflow_strategy,
	const OutOfBoundsStrategy& overflow_strategy)
	: p_(p) {
	bucket_config_ = IntegerBucketConfig::CreateFromProto(buckets);
	frequency_estimator_ = std::make_unique<FrequencyEstimator>(bucket_config_->OverflowBucket());
	GetBucketMultipliers(&multipliers_, underflow_strategy, overflow_strategy);
	multiplier_sum_ = GetMultiplierSum(multipliers_);
	}

	IntegerSumEstimator::IntegerSumEstimator(int64_t min_int, int64_t max_int, uint32_t partitions,
	double p, const OutOfBoundsStrategy& underflow_strategy,
	const OutOfBoundsStrategy& overflow_strategy)
	: p_(p) {
	int64_t step_size = (max_int - min_int) / partitions;
	auto buckets = MakeIntegerBuckets(min_int, partitions, step_size);
	bucket_config_ = IntegerBucketConfig::CreateFromProto(buckets);
	frequency_estimator_ = std::make_unique<FrequencyEstimator>(bucket_config_->OverflowBucket());
	GetBucketMultipliers(&multipliers_, underflow_strategy, overflow_strategy);
	multiplier_sum_ = GetMultiplierSum(multipliers_);
	}

	std::tuple<double, uint64_t, uint64_t> IntegerSumEstimator::ComputeSum(
	const std::vector<uint32_t>& encoded_vals) {
	std::vector<uint64_t> frequencies = frequency_estimator_->GetFrequencies(encoded_vals);
	double raw_sum = 0.0;
	for (uint32_t index = 0; index < frequencies.size(); index++) {
	raw_sum += frequencies[index] * multipliers_[index];
	}
	auto underflow_count = frequencies[bucket_config_->UnderflowBucket()];
	auto overflow_count = frequencies[bucket_config_->OverflowBucket()];
	return {DebiasSum(raw_sum, encoded_vals.size()), underflow_count, overflow_count};
	}

	void IntegerSumEstimator::GetBucketMultipliers(std::vector<double>* multipliers,
	const OutOfBoundsStrategy& underflow_strategy,
	const OutOfBoundsStrategy& overflow_strategy) {
	uint32_t underflow_index = bucket_config_->UnderflowBucket();
	uint32_t overflow_index = bucket_config_->OverflowBucket();
	multipliers->resize(overflow_index + 1);
	switch (underflow_strategy) {
	case kClamp: {
	(*multipliers)[underflow_index] = bucket_config_->BucketFloor(1);
	break;
	}
	case kDiscard:
	default: {
	(*multipliers)[underflow_index] = 0;
	}
	}
	switch (overflow_strategy) {
	case kClamp: {
	(*multipliers)[overflow_index] = bucket_config_->BucketFloor(overflow_index);
	break;
	}
	case kDiscard:
	default: {
	(*multipliers)[overflow_index] = 0;
	}
	}
	for (uint32_t index = 1; index < overflow_index; index++) {
	(*multipliers)[index] = bucket_config_->BucketFloor(index);
	}
	}

	double IntegerSumEstimator::DebiasSum(int64_t raw_sum, uint64_t input_size) {
	double coeff = (static_cast<double>(input_size) * p_) / (bucket_config_->OverflowBucket() + 1);
	return static_cast<double>(raw_sum) - (coeff * multiplier_sum_);
	}

	} // namespace cobalt