blob: 6caad81e09079e3034d2111b1fb54a37c44df947 [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COBALT_SRC_ALGORITHMS_EXPERIMENTAL_ARCHIVED_OCCURRENCE_WISE_HISTOGRAM_ENCODER_H_
#define COBALT_SRC_ALGORITHMS_EXPERIMENTAL_ARCHIVED_OCCURRENCE_WISE_HISTOGRAM_ENCODER_H_
#include "src/algorithms/experimental/archived/response_randomizer.h"
#include "src/algorithms/random/random.h"
namespace cobalt {
// An encoder that operates on histograms. Each occurrence of a bucket index is encoded using
// randomized response on the set of buckets, and the resulting 1-hot histograms are summed to form
// the encoded histogram.
class ArchivedOccurrenceWiseHistogramEncoder {
public:
// |num_buckets| is the number of buckets in each input histogram, |max_count| is the maximum
// count that should be reported for any bucket, and |p| is the noise parameter for each instance
// of randomized response.
ArchivedOccurrenceWiseHistogramEncoder(BitGeneratorInterface<uint32_t>* gen, uint32_t num_buckets,
uint64_t max_count, Probability p);
// |histogram| is a vector of length |num_buckets|, where the n-th element is the count for that
// bucket. Counts should be in the range [0, |max_count|] inclusive; larger bucket counts are
// clipped to |max_count| before encoding.
std::vector<uint64_t> Encode(const std::vector<uint64_t>& histogram);
private:
uint32_t num_buckets_;
uint64_t max_count_;
std::unique_ptr<ArchivedResponseRandomizer> randomizer_;
};
// Estimates the true sum of a collection of histograms, each of which was encoded with an
// OccurrenceWiseHistogramSumEstimator.
class ArchivedOccurrenceWiseHistogramSumEstimator {
public:
// |num_buckets| is the number of histogram buckets, and |p| is a noise parameter. These should be
// equal to the arguments that were used to construct the ArchivedOccurrenceWiseHistogramEncoder
// which produced the encoded histograms.
explicit ArchivedOccurrenceWiseHistogramSumEstimator(uint32_t num_buckets, Probability p);
// |encoded_histograms| is a vector of encoded histograms produced by an
// ArchivedOccurrenceWiseHistogramEncoder. For each index n in the range [0, |num_buckets| - 1],
// ComputeSum() returns an unbiased estimate of the sum of the counts in the n-th buckets of the
// encoded histograms.
std::vector<double> ComputeSum(const std::vector<std::vector<uint64_t>>& encoded_histograms);
private:
std::unique_ptr<ArchivedFrequencyEstimator> frequency_estimator_;
};
} // namespace cobalt
#endif // COBALT_SRC_ALGORITHMS_EXPERIMENTAL_ARCHIVED_OCCURRENCE_WISE_HISTOGRAM_ENCODER_H_