src/algorithms/experimental/histogram_encoder.cc - cobalt - Git at Google

 #include "src/algorithms/experimental/histogram_encoder.h"

 #include <algorithm>
 #include <map>

 #include "src/algorithms/experimental/integer_encoder.h"
 #include "src/algorithms/experimental/randomized_response.h"
 #include "src/algorithms/random/random.h"

 namespace cobalt {

 BucketWiseHistogramEncoder::BucketWiseHistogramEncoder(uint32_t num_buckets,
                                                        IntegerEncoder* integer_encoder)
     : num_buckets_(num_buckets), integer_encoder_(integer_encoder) {}

 std::vector<uint32_t> BucketWiseHistogramEncoder::Encode(const std::vector<int64_t>& histogram) {
   std::vector<uint32_t> encoded(num_buckets_);
   for (uint32_t index = 0; index < num_buckets_; index++) {
     encoded[index] = integer_encoder_->Encode(histogram[index]);
   }
   return encoded;
 }

 BucketWiseHistogramSumEstimator::BucketWiseHistogramSumEstimator(
     uint32_t num_buckets, IntegerSumEstimator* integer_sum_estimator)
     : num_buckets_(num_buckets), integer_sum_estimator_(integer_sum_estimator) {}

 std::vector<double> BucketWiseHistogramSumEstimator::ComputeSum(
     const std::vector<std::vector<uint32_t>>& encoded_histograms) {
   std::vector<double> decoded(num_buckets_);
   for (uint32_t index = 0; index < num_buckets_; index++) {
     std::vector<uint32_t> encoded_counts(encoded_histograms.size());
     for (size_t k = 0; k < encoded_histograms.size(); k++) {
       encoded_counts[k] = encoded_histograms[k][index];
     }
     decoded[index] = integer_sum_estimator_->ComputeSum(encoded_counts);
   }
   return decoded;
 }

 OccurrenceWiseHistogramEncoder::OccurrenceWiseHistogramEncoder(BitGeneratorInterface<uint32_t>* gen,
                                                                uint32_t num_buckets,
                                                                uint64_t max_count, double p)
     : num_buckets_(num_buckets), max_count_(max_count) {
   randomizer_ = std::make_unique<ResponseRandomizer>(gen, num_buckets - 1, p);
 }

 std::vector<uint64_t> OccurrenceWiseHistogramEncoder::Encode(
     const std::vector<uint64_t>& histogram) {
   std::vector<uint64_t> encoded_histogram(num_buckets_);
   for (uint32_t bucket_index = 0u; bucket_index < histogram.size(); bucket_index++) {
     uint64_t bucket_count = std::min(histogram[bucket_index], max_count_);
     for (uint64_t i = 0u; i < bucket_count; i++) {
       encoded_histogram[randomizer_->Encode(bucket_index)]++;
     }
   }
   return encoded_histogram;
 }

 OccurrenceWiseHistogramSumEstimator::OccurrenceWiseHistogramSumEstimator(uint32_t num_buckets,
                                                                          double p) {
   frequency_estimator_ = std::make_unique<FrequencyEstimator>(num_buckets - 1, p);
 }

 std::vector<double> OccurrenceWiseHistogramSumEstimator::ComputeSum(
     const std::vector<std::vector<uint64_t>>& encoded_histograms) {
   return frequency_estimator_->GetFrequenciesFromHistograms(encoded_histograms);
 }

 TwoDimRapporHistogramEncoder::TwoDimRapporHistogramEncoder(BitGeneratorInterface<uint32_t>* gen,
                                                            uint32_t num_buckets, uint64_t max_count,
                                                            double p)
     : gen_(gen), num_buckets_(num_buckets), max_count_(max_count), p_(p) {}

 std::vector<std::pair<uint32_t, uint64_t>> TwoDimRapporHistogramEncoder::Encode(
     const std::vector<uint64_t>& histogram) {
   std::vector<uint64_t> clipped_histogram(num_buckets_);
   for (uint32_t bucket_index = 0u; bucket_index < num_buckets_; bucket_index++) {
     clipped_histogram[bucket_index] = std::min(histogram[bucket_index], max_count_);
   }
   std::vector<std::pair<uint32_t, uint64_t>> encoded;
   auto dist_if_absent = BinomialDistribution(gen_, num_buckets_, p_);
   auto dist_0_to_1_if_present = BinomialDistribution(gen_, num_buckets_ - 1, p_);
   auto dist_1_to_1_if_present = BernoulliDistribution(gen_, 1.0 - p_);
   for (uint32_t bucket_index = 0u; bucket_index < num_buckets_; bucket_index++) {
     // We skip encoding and sending bucket counts of 0.
     for (uint64_t bucket_count = 1u; bucket_count <= max_count_; bucket_count++) {
       uint64_t encoded_count =
           (bucket_count == clipped_histogram[bucket_index]
                ? dist_0_to_1_if_present.Sample() + dist_1_to_1_if_present.Sample()
                : dist_if_absent.Sample());
       for (uint64_t num_occurrences = 0u; num_occurrences < encoded_count; num_occurrences++) {
         encoded.emplace_back(std::pair<uint32_t, uint64_t>(bucket_index, bucket_count));
       }
     }
   }
   return encoded;
 }

 TwoDimRapporHistogramSumEstimator::TwoDimRapporHistogramSumEstimator(uint32_t num_buckets,
                                                                      uint64_t max_count, double p)
     : num_buckets_(num_buckets), max_count_(max_count), p_(p) {}

 std::vector<double> TwoDimRapporHistogramSumEstimator::ComputeSum(
     const std::vector<std::vector<std::pair<uint32_t, uint64_t>>>& encoded_histograms,
     uint64_t num_participants) {
   // Compute the number of occurrences of each bit ID in |encoded_histograms|.
   std::map<std::pair<uint32_t, uint64_t>, uint64_t> raw_bit_counts;
   for (const auto& hist : encoded_histograms) {
     for (auto bit_id : hist) {
       raw_bit_counts[bit_id]++;
     }
   }

   // The total number of encoded 1-hot bitvectors which contributed to |encoded_histograms|.
   auto num_bitvectors = static_cast<double>(num_participants * num_buckets_);

   std::vector<double> sum(num_buckets_);
   for (uint32_t bucket_index = 0; bucket_index < num_buckets_; bucket_index++) {
     double total_count_for_bucket = 0.0;
     for (uint64_t bucket_count = 0; bucket_count <= max_count_; bucket_count++) {
       // Estimate the true number of occurrences of the bit with ID (|bucket_count|, |bucket_index|)
       // among the |num_bitvectors| encoded bitvectors. (See Erlingsson, Pihur, Korolova, "RAPPOR:
       // Randomized Aggregatable Privacy-Preserving Ordinal Response" section 4, taking f = 1.)
       double estimated_bit_count =
           (static_cast<double>(raw_bit_counts[{bucket_index, bucket_count}]) -
            p_ * num_bitvectors) /
           (1.0 - 2 * p_);
       // Clip the estimate into the range of possible true values.
       if (estimated_bit_count < 0.0) {
         estimated_bit_count = 0.0;
       }
       if (estimated_bit_count > num_bitvectors) {
         estimated_bit_count = num_bitvectors;
       }
       // Compute the contribution of the (estimated number of) bits with true ID (|bucket_index|,
       // |bucket_count|) to the total count for bucket |bucket_index|.
       total_count_for_bucket += static_cast<double>(bucket_count) * estimated_bit_count;
     }
     sum[bucket_index] = total_count_for_bucket;
   }
   return sum;
 }

 SinglePassTwoDimRapporHistogramEncoder::SinglePassTwoDimRapporHistogramEncoder(
     BitGeneratorInterface<uint32_t>* gen, uint32_t num_buckets, uint64_t max_count, double p)
     : gen_(gen), num_buckets_(num_buckets), max_count_(max_count), p_(p) {}

 std::vector<std::pair<uint32_t, uint64_t>> SinglePassTwoDimRapporHistogramEncoder::Encode(
     const std::vector<std::pair<uint32_t, uint64_t>>& histogram) {
   std::vector<std::pair<uint32_t, uint64_t>> clipped_histogram;
   clipped_histogram.reserve(histogram.size());
   for (std::pair<uint32_t, uint64_t> bucket_index_and_count : histogram) {
     clipped_histogram.emplace_back(bucket_index_and_count.first,
                                    std::min(bucket_index_and_count.second, max_count_));
   }
   if (p_ == 0.0) {
     return clipped_histogram;
   }
   std::vector<std::pair<uint32_t, uint64_t>> encoded;
   auto geom_dist = GeometricDistribution(gen_, p_);

   // Current index into an enumeration of the set [0,...,|num_buckets|-1] x [1,...,|max_count|]. We
   // will draw a random sample to decide whether to include the pair (bucket_index, count) at this
   // index.
   uint64_t current_histogram_index = 0;

   // Next index to be flipped.
   uint64_t flipped_index = geom_dist.Sample();

   while (flipped_index < num_buckets_ * max_count_) {
     auto flipped_bucket_index_and_count =
         std::pair<uint32_t, uint64_t>(flipped_index / max_count_, 1 + (flipped_index % max_count_));

     while (current_histogram_index < clipped_histogram.size() &&
            clipped_histogram[current_histogram_index] < flipped_bucket_index_and_count) {
       // The bit at clipped_histogram[current_histogram_index] is not flipped.
       // Add it to the output.
       encoded.emplace_back(clipped_histogram[current_histogram_index]);
       current_histogram_index++;
     }

     if (current_histogram_index < clipped_histogram.size() &&
         clipped_histogram[current_histogram_index] == flipped_bucket_index_and_count) {
       // The bit at histogram[current_histogram_index] is flipped. Do not add it to the output.
       current_histogram_index++;
     } else {
       // The bit at clipped_histogram[flipped_bucket_index_and_count] is flipped.
       // Add it to the output.
       encoded.emplace_back(flipped_bucket_index_and_count);
     }

     // Compute the next index to be flipped.
     flipped_index += (geom_dist.Sample() + 1);
   }

   // Add the remaining pairs in the input histogram to the output, as they are not flipped.
   while (current_histogram_index < clipped_histogram.size()) {
     encoded.emplace_back(clipped_histogram[current_histogram_index]);
     current_histogram_index++;
   }

   return encoded;
 }

 SinglePassTwoDimRapporHistogramSumEstimator::SinglePassTwoDimRapporHistogramSumEstimator(
     uint32_t num_buckets, uint64_t max_count, double p)
     : num_buckets_(num_buckets), max_count_(max_count), p_(p) {}

 std::vector<double> SinglePassTwoDimRapporHistogramSumEstimator::ComputeSum(
     const std::vector<std::vector<std::pair<uint32_t, uint64_t>>>& encoded_histograms,
     uint64_t num_participants) {
   // Compute the number of occurrences of each bit ID in |encoded_histograms|.
   std::map<std::pair<uint32_t, uint64_t>, uint64_t> raw_bit_counts;
   for (const auto& hist : encoded_histograms) {
     for (auto bit_id : hist) {
       raw_bit_counts[bit_id]++;
     }
   }

   std::vector<double> sum(num_buckets_);
   for (uint32_t bucket_index = 0; bucket_index < num_buckets_; bucket_index++) {
     double total_count_for_bucket = 0.0;
     for (uint64_t bucket_count = 0; bucket_count <= max_count_; bucket_count++) {
       // Estimate the true number of occurrences of the bit with ID (|bucket_count|, |bucket_index|)
       // among the |num_participants| encoded bitvectors. (See Erlingsson, Pihur, Korolova, "RAPPOR:
       // Randomized Aggregatable Privacy-Preserving Ordinal Response" section 4, taking f = 1.)
       double estimated_bit_count =
           (static_cast<double>(raw_bit_counts[{bucket_index, bucket_count}]) -
            p_ * static_cast<double>(num_participants)) /
           (1.0 - 2 * p_);
       // Clip the estimate into the range of possible true values.
       if (estimated_bit_count < 0.0) {
         estimated_bit_count = 0.0;
       }
       if (estimated_bit_count > static_cast<double>(num_participants)) {
         estimated_bit_count = static_cast<double>(num_participants);
       }
       // Compute the contribution of the (estimated number of) bits with true ID (|bucket_index|,
       // |bucket_count|) to the total count for bucket |bucket_index|.
       total_count_for_bucket += static_cast<double>(bucket_count) * estimated_bit_count;
     }
     sum[bucket_index] = total_count_for_bucket;
   }

   return sum;
 }

 TwoDimBinomialHistogramEncoder::TwoDimBinomialHistogramEncoder(BitGeneratorInterface<uint32_t>* gen,
                                                                uint32_t num_buckets,
                                                                uint64_t max_count, double p)
     : gen_(gen), num_buckets_(num_buckets), max_count_(max_count), p_(p) {}

 std::vector<std::pair<uint32_t, uint64_t>> TwoDimBinomialHistogramEncoder::Encode(
     const std::vector<std::pair<uint32_t, uint64_t>>& histogram) {
   std::vector<std::pair<uint32_t, uint64_t>> encoded;
   encoded.reserve(histogram.size());

   // Add all input pairs to the output.
   for (std::pair<uint32_t, uint64_t> bucket_index_and_count : histogram) {
     encoded.emplace_back(bucket_index_and_count.first,
                          std::min(bucket_index_and_count.second, max_count_));
   }

   if (p_ == 0.0) {
     return encoded;
   }
   // Add noise to the output; each pair is added with probability p_.
   auto geom_dist = GeometricDistribution(gen_, p_);
   uint64_t flipped_index = geom_dist.Sample();
   while (flipped_index < num_buckets_ * max_count_) {
     encoded.emplace_back(flipped_index / max_count_, 1 + flipped_index % max_count_);
     flipped_index += (geom_dist.Sample() + 1);
   }

   return encoded;
 }

 TwoDimBinomialHistogramSumEstimator::TwoDimBinomialHistogramSumEstimator(uint32_t num_buckets,
                                                                          uint64_t max_count,
                                                                          double p)
     : num_buckets_(num_buckets), max_count_(max_count), p_(p) {}

 std::vector<double> TwoDimBinomialHistogramSumEstimator::ComputeSum(
     const std::vector<std::vector<std::pair<uint32_t, uint64_t>>>& encoded_histograms,
     uint64_t num_participants) {
   // Compute the number of occurrences of each bit ID in |encoded_histograms|.
   std::map<std::pair<uint32_t, uint64_t>, uint64_t> raw_bit_counts;
   for (const auto& hist : encoded_histograms) {
     for (auto bit_id : hist) {
       raw_bit_counts[bit_id]++;
     }
   }

   std::vector<double> sum(num_buckets_);
   for (uint32_t bucket_index = 0; bucket_index < num_buckets_; bucket_index++) {
     double total_count_for_bucket = 0.0;
     for (uint64_t bucket_count = 0; bucket_count <= max_count_; bucket_count++) {
       // Estimate the true number of occurrences of the bit with ID (|bucket_count|, |bucket_index|)
       double estimated_bit_count =
           (static_cast<double>(raw_bit_counts[{bucket_index, bucket_count}]) -
            p_ * static_cast<double>(num_participants));
       // Clip the estimate into the range of possible true values.
       if (estimated_bit_count < 0.0) {
         estimated_bit_count = 0.0;
       }
       if (estimated_bit_count > static_cast<double>(num_participants)) {
         estimated_bit_count = static_cast<double>(num_participants);
       }
       // Compute the contribution of the (estimated number of) bits with true ID (|bucket_index|,
       // |bucket_count|) to the total count for bucket |bucket_index|.
       total_count_for_bucket += static_cast<double>(bucket_count) * estimated_bit_count;
     }
     sum[bucket_index] = total_count_for_bucket;
   }

   return sum;
 }

 }  // namespace cobalt
	#include "src/algorithms/experimental/histogram_encoder.h"

	#include <algorithm>
	#include <map>

	#include "src/algorithms/experimental/integer_encoder.h"
	#include "src/algorithms/experimental/randomized_response.h"
	#include "src/algorithms/random/random.h"

	namespace cobalt {

	BucketWiseHistogramEncoder::BucketWiseHistogramEncoder(uint32_t num_buckets,
	IntegerEncoder* integer_encoder)
	: num_buckets_(num_buckets), integer_encoder_(integer_encoder) {}

	std::vector<uint32_t> BucketWiseHistogramEncoder::Encode(const std::vector<int64_t>& histogram) {
	std::vector<uint32_t> encoded(num_buckets_);
	for (uint32_t index = 0; index < num_buckets_; index++) {
	encoded[index] = integer_encoder_->Encode(histogram[index]);
	}
	return encoded;
	}

	BucketWiseHistogramSumEstimator::BucketWiseHistogramSumEstimator(
	uint32_t num_buckets, IntegerSumEstimator* integer_sum_estimator)
	: num_buckets_(num_buckets), integer_sum_estimator_(integer_sum_estimator) {}

	std::vector<double> BucketWiseHistogramSumEstimator::ComputeSum(
	const std::vector<std::vector<uint32_t>>& encoded_histograms) {
	std::vector<double> decoded(num_buckets_);
	for (uint32_t index = 0; index < num_buckets_; index++) {
	std::vector<uint32_t> encoded_counts(encoded_histograms.size());
	for (size_t k = 0; k < encoded_histograms.size(); k++) {
	encoded_counts[k] = encoded_histograms[k][index];
	}
	decoded[index] = integer_sum_estimator_->ComputeSum(encoded_counts);
	}
	return decoded;
	}

	OccurrenceWiseHistogramEncoder::OccurrenceWiseHistogramEncoder(BitGeneratorInterface<uint32_t>* gen,
	uint32_t num_buckets,
	uint64_t max_count, double p)
	: num_buckets_(num_buckets), max_count_(max_count) {
	randomizer_ = std::make_unique<ResponseRandomizer>(gen, num_buckets - 1, p);
	}

	std::vector<uint64_t> OccurrenceWiseHistogramEncoder::Encode(
	const std::vector<uint64_t>& histogram) {
	std::vector<uint64_t> encoded_histogram(num_buckets_);
	for (uint32_t bucket_index = 0u; bucket_index < histogram.size(); bucket_index++) {
	uint64_t bucket_count = std::min(histogram[bucket_index], max_count_);
	for (uint64_t i = 0u; i < bucket_count; i++) {
	encoded_histogram[randomizer_->Encode(bucket_index)]++;
	}
	}
	return encoded_histogram;
	}

	OccurrenceWiseHistogramSumEstimator::OccurrenceWiseHistogramSumEstimator(uint32_t num_buckets,
	double p) {
	frequency_estimator_ = std::make_unique<FrequencyEstimator>(num_buckets - 1, p);
	}

	std::vector<double> OccurrenceWiseHistogramSumEstimator::ComputeSum(
	const std::vector<std::vector<uint64_t>>& encoded_histograms) {
	return frequency_estimator_->GetFrequenciesFromHistograms(encoded_histograms);
	}

	TwoDimRapporHistogramEncoder::TwoDimRapporHistogramEncoder(BitGeneratorInterface<uint32_t>* gen,
	uint32_t num_buckets, uint64_t max_count,
	double p)
	: gen_(gen), num_buckets_(num_buckets), max_count_(max_count), p_(p) {}

	std::vector<std::pair<uint32_t, uint64_t>> TwoDimRapporHistogramEncoder::Encode(
	const std::vector<uint64_t>& histogram) {
	std::vector<uint64_t> clipped_histogram(num_buckets_);
	for (uint32_t bucket_index = 0u; bucket_index < num_buckets_; bucket_index++) {
	clipped_histogram[bucket_index] = std::min(histogram[bucket_index], max_count_);
	}
	std::vector<std::pair<uint32_t, uint64_t>> encoded;
	auto dist_if_absent = BinomialDistribution(gen_, num_buckets_, p_);
	auto dist_0_to_1_if_present = BinomialDistribution(gen_, num_buckets_ - 1, p_);
	auto dist_1_to_1_if_present = BernoulliDistribution(gen_, 1.0 - p_);
	for (uint32_t bucket_index = 0u; bucket_index < num_buckets_; bucket_index++) {
	// We skip encoding and sending bucket counts of 0.
	for (uint64_t bucket_count = 1u; bucket_count <= max_count_; bucket_count++) {
	uint64_t encoded_count =
	(bucket_count == clipped_histogram[bucket_index]
	? dist_0_to_1_if_present.Sample() + dist_1_to_1_if_present.Sample()
	: dist_if_absent.Sample());
	for (uint64_t num_occurrences = 0u; num_occurrences < encoded_count; num_occurrences++) {
	encoded.emplace_back(std::pair<uint32_t, uint64_t>(bucket_index, bucket_count));
	}
	}
	}
	return encoded;
	}

	TwoDimRapporHistogramSumEstimator::TwoDimRapporHistogramSumEstimator(uint32_t num_buckets,
	uint64_t max_count, double p)
	: num_buckets_(num_buckets), max_count_(max_count), p_(p) {}

	std::vector<double> TwoDimRapporHistogramSumEstimator::ComputeSum(
	const std::vector<std::vector<std::pair<uint32_t, uint64_t>>>& encoded_histograms,
	uint64_t num_participants) {
	// Compute the number of occurrences of each bit ID in \|encoded_histograms\|.
	std::map<std::pair<uint32_t, uint64_t>, uint64_t> raw_bit_counts;
	for (const auto& hist : encoded_histograms) {
	for (auto bit_id : hist) {
	raw_bit_counts[bit_id]++;
	}
	}

	// The total number of encoded 1-hot bitvectors which contributed to \|encoded_histograms\|.
	auto num_bitvectors = static_cast<double>(num_participants * num_buckets_);

	std::vector<double> sum(num_buckets_);
	for (uint32_t bucket_index = 0; bucket_index < num_buckets_; bucket_index++) {
	double total_count_for_bucket = 0.0;
	for (uint64_t bucket_count = 0; bucket_count <= max_count_; bucket_count++) {
	// Estimate the true number of occurrences of the bit with ID (\|bucket_count\|, \|bucket_index\|)
	// among the \|num_bitvectors\| encoded bitvectors. (See Erlingsson, Pihur, Korolova, "RAPPOR:
	// Randomized Aggregatable Privacy-Preserving Ordinal Response" section 4, taking f = 1.)
	double estimated_bit_count =
	(static_cast<double>(raw_bit_counts[{bucket_index, bucket_count}]) -
	p_ * num_bitvectors) /
	(1.0 - 2 * p_);
	// Clip the estimate into the range of possible true values.
	if (estimated_bit_count < 0.0) {
	estimated_bit_count = 0.0;
	}
	if (estimated_bit_count > num_bitvectors) {
	estimated_bit_count = num_bitvectors;
	}
	// Compute the contribution of the (estimated number of) bits with true ID (\|bucket_index\|,
	// \|bucket_count\|) to the total count for bucket \|bucket_index\|.
	total_count_for_bucket += static_cast<double>(bucket_count) * estimated_bit_count;
	}
	sum[bucket_index] = total_count_for_bucket;
	}
	return sum;
	}

	SinglePassTwoDimRapporHistogramEncoder::SinglePassTwoDimRapporHistogramEncoder(
	BitGeneratorInterface<uint32_t>* gen, uint32_t num_buckets, uint64_t max_count, double p)
	: gen_(gen), num_buckets_(num_buckets), max_count_(max_count), p_(p) {}

	std::vector<std::pair<uint32_t, uint64_t>> SinglePassTwoDimRapporHistogramEncoder::Encode(
	const std::vector<std::pair<uint32_t, uint64_t>>& histogram) {
	std::vector<std::pair<uint32_t, uint64_t>> clipped_histogram;
	clipped_histogram.reserve(histogram.size());
	for (std::pair<uint32_t, uint64_t> bucket_index_and_count : histogram) {
	clipped_histogram.emplace_back(bucket_index_and_count.first,
	std::min(bucket_index_and_count.second, max_count_));
	}
	if (p_ == 0.0) {
	return clipped_histogram;
	}
	std::vector<std::pair<uint32_t, uint64_t>> encoded;
	auto geom_dist = GeometricDistribution(gen_, p_);

	// Current index into an enumeration of the set [0,...,\|num_buckets\|-1] x [1,...,\|max_count\|]. We
	// will draw a random sample to decide whether to include the pair (bucket_index, count) at this
	// index.
	uint64_t current_histogram_index = 0;

	// Next index to be flipped.
	uint64_t flipped_index = geom_dist.Sample();

	while (flipped_index < num_buckets_ * max_count_) {
	auto flipped_bucket_index_and_count =
	std::pair<uint32_t, uint64_t>(flipped_index / max_count_, 1 + (flipped_index % max_count_));

	while (current_histogram_index < clipped_histogram.size() &&
	clipped_histogram[current_histogram_index] < flipped_bucket_index_and_count) {
	// The bit at clipped_histogram[current_histogram_index] is not flipped.
	// Add it to the output.
	encoded.emplace_back(clipped_histogram[current_histogram_index]);
	current_histogram_index++;
	}

	if (current_histogram_index < clipped_histogram.size() &&
	clipped_histogram[current_histogram_index] == flipped_bucket_index_and_count) {
	// The bit at histogram[current_histogram_index] is flipped. Do not add it to the output.
	current_histogram_index++;
	} else {
	// The bit at clipped_histogram[flipped_bucket_index_and_count] is flipped.
	// Add it to the output.
	encoded.emplace_back(flipped_bucket_index_and_count);
	}

	// Compute the next index to be flipped.
	flipped_index += (geom_dist.Sample() + 1);
	}

	// Add the remaining pairs in the input histogram to the output, as they are not flipped.
	while (current_histogram_index < clipped_histogram.size()) {
	encoded.emplace_back(clipped_histogram[current_histogram_index]);
	current_histogram_index++;
	}

	return encoded;
	}

	SinglePassTwoDimRapporHistogramSumEstimator::SinglePassTwoDimRapporHistogramSumEstimator(
	uint32_t num_buckets, uint64_t max_count, double p)
	: num_buckets_(num_buckets), max_count_(max_count), p_(p) {}

	std::vector<double> SinglePassTwoDimRapporHistogramSumEstimator::ComputeSum(
	const std::vector<std::vector<std::pair<uint32_t, uint64_t>>>& encoded_histograms,
	uint64_t num_participants) {
	// Compute the number of occurrences of each bit ID in \|encoded_histograms\|.
	std::map<std::pair<uint32_t, uint64_t>, uint64_t> raw_bit_counts;
	for (const auto& hist : encoded_histograms) {
	for (auto bit_id : hist) {
	raw_bit_counts[bit_id]++;
	}
	}

	std::vector<double> sum(num_buckets_);
	for (uint32_t bucket_index = 0; bucket_index < num_buckets_; bucket_index++) {
	double total_count_for_bucket = 0.0;
	for (uint64_t bucket_count = 0; bucket_count <= max_count_; bucket_count++) {
	// Estimate the true number of occurrences of the bit with ID (\|bucket_count\|, \|bucket_index\|)
	// among the \|num_participants\| encoded bitvectors. (See Erlingsson, Pihur, Korolova, "RAPPOR:
	// Randomized Aggregatable Privacy-Preserving Ordinal Response" section 4, taking f = 1.)
	double estimated_bit_count =
	(static_cast<double>(raw_bit_counts[{bucket_index, bucket_count}]) -
	p_ * static_cast<double>(num_participants)) /
	(1.0 - 2 * p_);
	// Clip the estimate into the range of possible true values.
	if (estimated_bit_count < 0.0) {
	estimated_bit_count = 0.0;
	}
	if (estimated_bit_count > static_cast<double>(num_participants)) {
	estimated_bit_count = static_cast<double>(num_participants);
	}
	// Compute the contribution of the (estimated number of) bits with true ID (\|bucket_index\|,
	// \|bucket_count\|) to the total count for bucket \|bucket_index\|.
	total_count_for_bucket += static_cast<double>(bucket_count) * estimated_bit_count;
	}
	sum[bucket_index] = total_count_for_bucket;
	}

	return sum;
	}

	TwoDimBinomialHistogramEncoder::TwoDimBinomialHistogramEncoder(BitGeneratorInterface<uint32_t>* gen,
	uint32_t num_buckets,
	uint64_t max_count, double p)
	: gen_(gen), num_buckets_(num_buckets), max_count_(max_count), p_(p) {}

	std::vector<std::pair<uint32_t, uint64_t>> TwoDimBinomialHistogramEncoder::Encode(
	const std::vector<std::pair<uint32_t, uint64_t>>& histogram) {
	std::vector<std::pair<uint32_t, uint64_t>> encoded;
	encoded.reserve(histogram.size());

	// Add all input pairs to the output.
	for (std::pair<uint32_t, uint64_t> bucket_index_and_count : histogram) {
	encoded.emplace_back(bucket_index_and_count.first,
	std::min(bucket_index_and_count.second, max_count_));
	}

	if (p_ == 0.0) {
	return encoded;
	}
	// Add noise to the output; each pair is added with probability p_.
	auto geom_dist = GeometricDistribution(gen_, p_);
	uint64_t flipped_index = geom_dist.Sample();
	while (flipped_index < num_buckets_ * max_count_) {
	encoded.emplace_back(flipped_index / max_count_, 1 + flipped_index % max_count_);
	flipped_index += (geom_dist.Sample() + 1);
	}

	return encoded;
	}

	TwoDimBinomialHistogramSumEstimator::TwoDimBinomialHistogramSumEstimator(uint32_t num_buckets,
	uint64_t max_count,
	double p)
	: num_buckets_(num_buckets), max_count_(max_count), p_(p) {}

	std::vector<double> TwoDimBinomialHistogramSumEstimator::ComputeSum(
	const std::vector<std::vector<std::pair<uint32_t, uint64_t>>>& encoded_histograms,
	uint64_t num_participants) {
	// Compute the number of occurrences of each bit ID in \|encoded_histograms\|.
	std::map<std::pair<uint32_t, uint64_t>, uint64_t> raw_bit_counts;
	for (const auto& hist : encoded_histograms) {
	for (auto bit_id : hist) {
	raw_bit_counts[bit_id]++;
	}
	}

	std::vector<double> sum(num_buckets_);
	for (uint32_t bucket_index = 0; bucket_index < num_buckets_; bucket_index++) {
	double total_count_for_bucket = 0.0;
	for (uint64_t bucket_count = 0; bucket_count <= max_count_; bucket_count++) {
	// Estimate the true number of occurrences of the bit with ID (\|bucket_count\|, \|bucket_index\|)
	double estimated_bit_count =
	(static_cast<double>(raw_bit_counts[{bucket_index, bucket_count}]) -
	p_ * static_cast<double>(num_participants));
	// Clip the estimate into the range of possible true values.
	if (estimated_bit_count < 0.0) {
	estimated_bit_count = 0.0;
	}
	if (estimated_bit_count > static_cast<double>(num_participants)) {
	estimated_bit_count = static_cast<double>(num_participants);
	}
	// Compute the contribution of the (estimated number of) bits with true ID (\|bucket_index\|,
	// \|bucket_count\|) to the total count for bucket \|bucket_index\|.
	total_count_for_bucket += static_cast<double>(bucket_count) * estimated_bit_count;
	}
	sum[bucket_index] = total_count_for_bucket;
	}

	return sum;
	}

	} // namespace cobalt