blob: f3886e4bd38195e7d69dd01db7dd8e3c170b525c [file] [log] [blame]
//
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef DIFFERENTIAL_PRIVACY_ALGORITHMS_BOUNDED_VARIANCE_H_
#define DIFFERENTIAL_PRIVACY_ALGORITHMS_BOUNDED_VARIANCE_H_
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>
#include <cstdint>
#include "base/logging.h"
#include "google/protobuf/any.pb.h"
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_cat.h"
#include "algorithms/algorithm.h"
#include "algorithms/approx-bounds.h"
#include "algorithms/bounded-algorithm.h"
#include "algorithms/numerical-mechanisms.h"
#include "algorithms/util.h"
#include "proto/util.h"
#include "proto/data.pb.h"
#include "proto/summary.pb.h"
#include "base/status_macros.h"
namespace differential_privacy {
// Incrementally provides a differentially private variance for values in the
// range [lower..upper]. Values outside of this range will be clamped so they
// lie in the range. The output will also be clamped between 0 and (upper -
// lower)^2 / 4. Since the result is guaranteed to be positive, this algorithm
// can be used to compute a differentially private standard deviation.
//
// The algorithm uses O(1) memory and runs in O(n) time where n is the size of
// the dataset, making it a fast and efficient. The amount of noise added grows
// quadratically in (upper - lower) and decreases linearly in n, so it might not
// produce good results unless n >> (upper - lower)^2.
//
// The algorithm is a variation of the algorithm for differentially private mean
// from "Differential Privacy: From Theory to Practice", section 2.5.5:
// https://books.google.com/books?id=WFttDQAAQBAJ&pg=PA24#v=onepage&q&f=false
template <typename T>
class BoundedVariance : public Algorithm<T> {
static_assert(std::is_arithmetic<T>::value,
"BoundedVariance can only be used for arithmetic types");
public:
// Builder for BoundedVariance algorithm.
class Builder;
BoundedVariance(const double epsilon) : Algorithm<T>(epsilon) {}
virtual ~BoundedVariance() = default;
// Returns the epsilon used to calculate approximate bounds. If approximate
// bounds are not used, returns 0.
virtual double GetBoundingEpsilon() const = 0;
// Returns the epsilon used to calculate the noisy mean. If bounds are
// specified explicitly, this will be the total epsilon used by the algorithm.
virtual double GetAggregationEpsilon() const = 0;
protected:
virtual void AddMultipleEntries(const T& input, int64_t num_of_entries) = 0;
// For integral type, check for overflow in subtraction squared.
template <typename T2 = T,
std::enable_if_t<std::is_integral<T2>::value>* = nullptr>
static absl::Status CheckBounds(T lower, T upper) {
if (lower > upper) {
return absl::InvalidArgumentError("Lower cannot be greater than upper.");
}
SafeOpResult<T> subtract_result = SafeSubtract(upper, lower);
SafeOpResult<T> safe_square_result = SafeSquare(subtract_result.value);
if (subtract_result.overflow || safe_square_result.overflow) {
return absl::InvalidArgumentError(
"Sensitivity calculation caused integer overflow.");
}
if (upper > std::sqrt(std::numeric_limits<T>::max()) ||
lower < -1 * std::sqrt(std::numeric_limits<T>::max())) {
return absl::InvalidArgumentError("Squaring the bounds caused overflow.");
}
return absl::OkStatus();
}
template <typename T2 = T,
std::enable_if_t<std::is_floating_point<T2>::value>* = nullptr>
static absl::Status CheckBounds(T lower, T upper) {
if (lower > upper) {
return absl::InvalidArgumentError("Lower cannot be greater than upper.");
}
return absl::OkStatus();
}
// Returns the width of the range of f(x) = x^2 where the domain of f is
// [lower, upper].
static double RangeOfSquares(T lower, T upper) {
if (0 > lower && 0 < upper) {
return std::max(lower * lower, upper * upper);
}
return std::abs(upper * upper - lower * lower);
}
static absl::StatusOr<std::unique_ptr<NumericalMechanism>> BuildSumMechanism(
std::unique_ptr<NumericalMechanismBuilder> mechanism_builder,
const double epsilon, const double l0_sensitivity,
const double max_contributions_per_partition, const T lower,
const T upper) {
return mechanism_builder->SetEpsilon(epsilon)
.SetL0Sensitivity(l0_sensitivity)
.SetLInfSensitivity(max_contributions_per_partition *
static_cast<double>(upper - lower) / 2.0)
.Build();
}
static absl::StatusOr<std::unique_ptr<NumericalMechanism>>
BuildSumOfSquaresMechanism(
std::unique_ptr<NumericalMechanismBuilder> mechanism_builder,
const double epsilon, const double l0_sensitivity,
const double max_contributions_per_partition, const T lower,
const T upper) {
return mechanism_builder->SetEpsilon(epsilon)
.SetL0Sensitivity(l0_sensitivity)
.SetLInfSensitivity(max_contributions_per_partition *
(RangeOfSquares(lower, upper) / 2))
.Build();
}
static double IntervalLengthSquared(T lower, T upper) {
return std::pow(static_cast<double>(upper - lower), 2);
}
// Returns the midpoint of the range of f(x) = x^2 where the domains of f is
// [lower, upper].
static double MidpointOfSquares(T lower, T upper) {
DCHECK_GE(upper, lower);
if (0 > lower && 0 < upper) {
return std::max(lower * lower, upper * upper) / 2;
}
return lower * lower + (upper * upper - lower * lower) / 2;
}
// Friend class for testing only
friend class BoundedVarianceTestPeer;
};
// Bounded variance implementation that uses fixed bounds.
template <typename T>
class BoundedVarianceWithFixedBounds : public BoundedVariance<T> {
public:
BoundedVarianceWithFixedBounds(
const double epsilon, const T lower, const T upper,
std::unique_ptr<NumericalMechanism> count_mechanism,
std::unique_ptr<NumericalMechanism> sum_mechanism,
std::unique_ptr<NumericalMechanism> sum_of_squares_mechanism)
: BoundedVariance<T>(epsilon),
lower_(lower),
upper_(upper),
count_mechanism_(std::move(count_mechanism)),
sum_mechanism_(std::move(sum_mechanism)),
sum_of_squares_mechanism_(std::move(sum_of_squares_mechanism)) {}
void AddEntry(const T& t) override { AddMultipleEntries(t, 1); }
Summary Serialize() const override {
BoundedVarianceSummary variance_summary;
variance_summary.set_count(partial_count_);
SetValue(variance_summary.add_pos_sum(), partial_sum_);
variance_summary.add_pos_sum_of_squares(partial_sum_of_squares_);
// Pack variance summary into summary.
Summary summary;
summary.mutable_data()->PackFrom(variance_summary);
return summary;
}
absl::Status Merge(const Summary& summary) override {
if (!summary.has_data()) {
return absl::InternalError(
"Cannot merge summary with no bounded variance data.");
}
// Unpack bounded variance summary.
BoundedVarianceSummary variance_summary;
if (!summary.data().UnpackTo(&variance_summary)) {
return absl::InternalError(
"Bounded variance summary unable to be unpacked.");
}
// Check for expected sizes of repeated fields.
if (variance_summary.pos_sum_size() != 1) {
return absl::InternalError(
absl::StrCat("Expected positive sums of size exactly 1 but got ",
variance_summary.pos_sum_size()));
}
if (variance_summary.pos_sum_of_squares_size() != 1) {
return absl::InternalError(absl::StrCat(
"Expected positive sum of squares of size exactly 1 but got ",
variance_summary.pos_sum_of_squares_size()));
}
// Verification successful. Merge fields.
partial_count_ += variance_summary.count();
partial_sum_ += GetValue<T>(variance_summary.pos_sum(0));
partial_sum_of_squares_ += variance_summary.pos_sum_of_squares(0);
return absl::OkStatus();
}
int64_t MemoryUsed() override {
return sizeof(BoundedVarianceWithFixedBounds) +
count_mechanism_->MemoryUsed() + sum_mechanism_->MemoryUsed() +
sum_of_squares_mechanism_->MemoryUsed();
}
double GetBoundingEpsilon() const override { return 0; }
double GetAggregationEpsilon() const override {
return Algorithm<T>::GetEpsilon();
}
protected:
absl::StatusOr<Output> GenerateResult(double noise_interval_level) override {
const double sum_midpoint = lower_ + ((upper_ - lower_) / 2);
const double sum_of_squares_midpoint =
BoundedVariance<T>::MidpointOfSquares(lower_, upper_);
const double noised_count = count_mechanism_->AddNoise(partial_count_);
const double noised_normalized_sum = sum_mechanism_->AddNoise(
partial_sum_ - (partial_count_ * sum_midpoint));
const double noised_normalized_sum_of_squares =
sum_of_squares_mechanism_->AddNoise(
partial_sum_of_squares_ -
(partial_count_ * sum_of_squares_midpoint));
double mean;
double mean_of_squares;
if (noised_count <= 1) {
mean = sum_midpoint;
mean_of_squares = sum_of_squares_midpoint;
} else {
mean = (noised_normalized_sum / noised_count) + sum_midpoint;
mean_of_squares = (noised_normalized_sum_of_squares / noised_count) +
sum_of_squares_midpoint;
}
const double noised_variance = mean_of_squares - (mean * mean);
Output output;
AddToOutput<double>(
&output,
Clamp<double>(
0, BoundedVariance<T>::IntervalLengthSquared(lower_, upper_) / 4,
noised_variance));
return output;
}
void ResetState() override {
partial_count_ = 0;
partial_sum_ = 0;
partial_sum_of_squares_ = 0;
}
void AddMultipleEntries(const T& input, int64_t num_of_entries) override {
absl::Status status =
ValidateIsPositive(num_of_entries, "Number of entries");
if (std::isnan(static_cast<double>(input)) || !status.ok()) {
return;
}
partial_count_ += num_of_entries;
const T clamped_input = Clamp<T>(lower_, upper_, input);
partial_sum_ += clamped_input * num_of_entries;
partial_sum_of_squares_ += std::pow(clamped_input, 2) * num_of_entries;
}
private:
const T lower_;
const T upper_;
std::unique_ptr<NumericalMechanism> count_mechanism_;
std::unique_ptr<NumericalMechanism> sum_mechanism_;
std::unique_ptr<NumericalMechanism> sum_of_squares_mechanism_;
int64_t partial_count_ = 0;
T partial_sum_ = 0;
double partial_sum_of_squares_ = 0;
};
template <typename T>
class BoundedVarianceWithApproxBounds : public BoundedVariance<T> {
public:
BoundedVarianceWithApproxBounds(
const double epsilon, const double epsilon_for_sum,
const double epsilon_for_squares, const double l0_sensitivity,
const int max_contributions_per_partition,
std::unique_ptr<NumericalMechanismBuilder> mechanism_builder,
std::unique_ptr<NumericalMechanism> count_mechanism,
std::unique_ptr<ApproxBounds<T>> approx_bounds)
: BoundedVariance<T>(epsilon),
epsilon_for_sum_(epsilon_for_sum),
epsilon_for_squares_(epsilon_for_squares),
mechanism_builder_(std::move(mechanism_builder)),
l0_sensitivity_(l0_sensitivity),
max_contributions_per_partition_(max_contributions_per_partition),
count_mechanism_(std::move(count_mechanism)),
approx_bounds_(std::move(approx_bounds)) {
// To determining bounds, we need partial values for each bin of the
// ApproxBounds logarithmic histogram.
pos_sum_.resize(approx_bounds_->NumPositiveBins(), 0);
neg_sum_.resize(approx_bounds_->NumPositiveBins(), 0);
pos_sum_of_squares_.resize(approx_bounds_->NumPositiveBins(), 0);
neg_sum_of_squares_.resize(approx_bounds_->NumPositiveBins(), 0);
}
void AddEntry(const T& t) override { AddMultipleEntries(t, 1); }
Summary Serialize() const override {
// Create BoundedVarianceSummary.
BoundedVarianceSummary bv_summary;
bv_summary.set_count(partial_count_);
for (T x : pos_sum_) {
SetValue(bv_summary.add_pos_sum(), x);
}
for (T x : neg_sum_) {
SetValue(bv_summary.add_neg_sum(), x);
}
for (double x : pos_sum_of_squares_) {
bv_summary.add_pos_sum_of_squares(x);
}
for (double x : neg_sum_of_squares_) {
bv_summary.add_neg_sum_of_squares(x);
}
// Serialize approx bounds data.
Summary approx_bounds_summary = approx_bounds_->Serialize();
approx_bounds_summary.data().UnpackTo(bv_summary.mutable_bounds_summary());
// Create Summary.
Summary summary;
summary.mutable_data()->PackFrom(bv_summary);
return summary;
}
absl::Status Merge(const Summary& summary) override {
if (!summary.has_data()) {
return absl::InternalError(
"Cannot merge summary with no bounded variance data.");
}
// Unpack bounded variance summary.
BoundedVarianceSummary bv_summary;
if (!summary.data().UnpackTo(&bv_summary)) {
return absl::InternalError(
"Bounded variance summary unable to be unpacked.");
}
if ((approx_bounds_ != nullptr) != bv_summary.has_bounds_summary()) {
return absl::InternalError(
"Merged BoundedVariance must have the same bounding strategy.");
}
if (pos_sum_.size() != bv_summary.pos_sum_size() ||
neg_sum_.size() != bv_summary.neg_sum_size() ||
pos_sum_of_squares_.size() != bv_summary.pos_sum_of_squares_size() ||
neg_sum_of_squares_.size() != bv_summary.neg_sum_of_squares_size()) {
return absl::InternalError(
"Merged BoundedVariance must have the same amount of partial "
"sum or sum of squares values as this BoundedVariance.");
}
// Merge approx bounds
Summary approx_bounds_summary;
approx_bounds_summary.mutable_data()->PackFrom(bv_summary.bounds_summary());
RETURN_IF_ERROR(approx_bounds_->Merge(approx_bounds_summary));
// Add count and partial values to current ones.
partial_count_ += bv_summary.count();
for (int i = 0; i < pos_sum_.size(); ++i) {
pos_sum_[i] += GetValue<T>(bv_summary.pos_sum(i));
pos_sum_of_squares_[i] += bv_summary.pos_sum_of_squares(i);
}
for (int i = 0; i < neg_sum_.size(); ++i) {
neg_sum_[i] += GetValue<T>(bv_summary.neg_sum(i));
neg_sum_of_squares_[i] += bv_summary.neg_sum_of_squares(i);
}
return absl::OkStatus();
}
int64_t MemoryUsed() override {
int64_t memory = sizeof(BoundedVarianceWithApproxBounds<T>);
memory += sizeof(T) * (pos_sum_.capacity() + neg_sum_.capacity());
memory += sizeof(double) *
(pos_sum_of_squares_.capacity() + neg_sum_of_squares_.capacity());
memory += sizeof(*mechanism_builder_);
memory += approx_bounds_->MemoryUsed();
return memory;
}
// Returns the epsilon used to calculate approximate bounds.
double GetBoundingEpsilon() const override {
return approx_bounds_->GetEpsilon();
}
// Returns the epsilon used to calculate the noisy mean.
double GetAggregationEpsilon() const override {
return Algorithm<T>::GetEpsilon() - GetBoundingEpsilon();
}
// Returns a pointer to the ApproxBounds object. Does not transfer
// ownsership. Only use for testing.
ApproxBounds<T>* GetApproxBoundsForTesting() { return approx_bounds_.get(); }
private:
absl::StatusOr<Output> GenerateResult(double noise_interval_level) override {
Output output;
ASSIGN_OR_RETURN(Output bounds,
approx_bounds_->PartialResult(noise_interval_level));
const T lower = GetValue<T>(bounds.elements(0).value());
const T upper = GetValue<T>(bounds.elements(1).value());
RETURN_IF_ERROR(BoundedVariance<T>::CheckBounds(lower, upper));
// To find the sum, pass the identity function as the transform.
ASSIGN_OR_RETURN(const double sum,
approx_bounds_->template ComputeFromPartials<T>(
pos_sum_, neg_sum_, [](T x) { return x; }, lower,
upper, partial_count_));
// To find sum of squares, pass the square function.
ASSIGN_OR_RETURN(
const double sum_of_squares,
approx_bounds_->template ComputeFromPartials<double>(
pos_sum_of_squares_, neg_sum_of_squares_, [](T x) { return x * x; },
lower, upper, partial_count_));
// Populate the bounding report with ApproxBounds information.
*(output.mutable_error_report()->mutable_bounding_report()) =
approx_bounds_->GetBoundingReport(lower, upper);
const double noised_count = count_mechanism_->AddNoise(partial_count_);
// Calculate noised normalized sum
const T sum_midpoint = lower + ((upper - lower) / 2);
ASSIGN_OR_RETURN(
std::unique_ptr<NumericalMechanism> sum_mechanism,
BoundedVariance<T>::BuildSumMechanism(
mechanism_builder_->Clone(), epsilon_for_sum_, l0_sensitivity_,
max_contributions_per_partition_, lower, upper));
const double noised_normalized_sum =
sum_mechanism->AddNoise(sum - (partial_count_ * sum_midpoint));
// Calculate noised normalized sum of squares.
const double sum_of_squares_midpoint =
BoundedVariance<T>::MidpointOfSquares(lower, upper);
ASSIGN_OR_RETURN(
std::unique_ptr<NumericalMechanism> sum_of_squares_mechanism,
BoundedVariance<T>::BuildSumOfSquaresMechanism(
mechanism_builder_->Clone(), epsilon_for_squares_, l0_sensitivity_,
max_contributions_per_partition_, lower, upper));
const double noised_normalized_sum_of_squares =
sum_of_squares_mechanism->AddNoise(
sum_of_squares - (partial_count_ * sum_of_squares_midpoint));
// Calculate the result from the noised values. From this point everything
// should be post-processing.
double mean = sum_midpoint;
double mean_of_square = sum_of_squares_midpoint;
if (noised_count > 1) {
mean = noised_normalized_sum / noised_count + sum_midpoint;
mean_of_square = noised_normalized_sum_of_squares / noised_count +
sum_of_squares_midpoint;
}
const double noised_variance = mean_of_square - std::pow(mean, 2);
AddToOutput<double>(
&output,
Clamp<double>(
0.0, BoundedVariance<T>::IntervalLengthSquared(lower, upper) / 4,
noised_variance));
return output;
}
void ResetState() override {
std::fill(pos_sum_.begin(), pos_sum_.end(), 0);
std::fill(pos_sum_of_squares_.begin(), pos_sum_of_squares_.end(), 0);
std::fill(neg_sum_.begin(), neg_sum_.end(), 0);
std::fill(neg_sum_of_squares_.begin(), neg_sum_of_squares_.end(), 0);
partial_count_ = 0;
approx_bounds_->Reset();
}
void AddMultipleEntries(const T& input, int64_t num_of_entries) override {
// Drop value if it is NaN.
// REF:
// https://stackoverflow.com/questions/61646166/how-to-resolve-fpclassify-ambiguous-call-to-overloaded-function
absl::Status status =
ValidateIsPositive(num_of_entries, "Number of entries");
if (std::isnan(static_cast<double>(input)) || !status.ok()) {
return;
}
// Count is unaffected by clamping.
partial_count_ += num_of_entries;
// Store partial results and feed input into ApproxBounds algorithm.
approx_bounds_->AddMultipleEntries(input, num_of_entries);
// Add to partial sums and sum of squares.
auto difference_of_squares = [](T val1, T val2) {
// Lessen the chance of becoming inf/-inf by calculating it like this.
return (static_cast<double>(val1) + val2) *
(static_cast<double>(val1) - val2);
};
if (input >= 0) {
approx_bounds_->template AddMultipleEntriesToPartialSums<T>(
&pos_sum_, input, num_of_entries);
approx_bounds_->template AddMultipleEntriesToPartials<double>(
&pos_sum_of_squares_, input, num_of_entries, difference_of_squares);
} else {
approx_bounds_->template AddMultipleEntriesToPartialSums<T>(
&neg_sum_, input, num_of_entries);
approx_bounds_->template AddMultipleEntriesToPartials<double>(
&neg_sum_of_squares_, input, num_of_entries, difference_of_squares);
}
}
// Vectors of partial values stored for automatic clamping.
std::vector<T> pos_sum_, neg_sum_;
std::vector<double> pos_sum_of_squares_, neg_sum_of_squares_;
int64_t partial_count_ = 0;
// Used to construct mechanism once bounds are obtained.
const double epsilon_for_sum_;
const double epsilon_for_squares_;
std::unique_ptr<NumericalMechanismBuilder> mechanism_builder_;
const double l0_sensitivity_;
const int max_contributions_per_partition_;
std::unique_ptr<NumericalMechanism> count_mechanism_;
// If this is not nullptr, we are automatically determining bounds. Otherwise,
// lower and upper contain the manually set bounds.
std::unique_ptr<ApproxBounds<T>> approx_bounds_;
};
template <typename T>
class BoundedVariance<T>::Builder {
public:
BoundedVariance<T>::Builder& SetEpsilon(double epsilon) {
epsilon_ = epsilon;
return *this;
}
BoundedVariance<T>::Builder& SetDelta(double delta) {
delta_ = delta;
return *this;
}
BoundedVariance<T>::Builder& SetMaxPartitionsContributed(
int max_partitions_contributed) {
max_partitions_contributed_ = max_partitions_contributed;
return *this;
}
BoundedVariance<T>::Builder& SetMaxContributionsPerPartition(
int max_contributions_per_partition) {
max_contributions_per_partition_ = max_contributions_per_partition;
return *this;
}
BoundedVariance<T>::Builder& SetUpper(T upper) {
upper_ = upper;
return *this;
}
BoundedVariance<T>::Builder& SetLower(T lower) {
lower_ = lower;
return *this;
}
BoundedVariance<T>::Builder& SetApproxBounds(
std::unique_ptr<ApproxBounds<T>> approx_bounds) {
approx_bounds_ = std::move(approx_bounds);
return *this;
}
BoundedVariance<T>::Builder& SetLaplaceMechanism(
std::unique_ptr<NumericalMechanismBuilder> builder) {
mechanism_builder_ = std::move(builder);
return *this;
}
absl::StatusOr<std::unique_ptr<BoundedVariance<T>>> Build() {
if (!epsilon_.has_value()) {
epsilon_ = DefaultEpsilon();
LOG(WARNING) << "Default epsilon of " << epsilon_.value()
<< " is being used. Consider setting your own epsilon based "
"on privacy considerations.";
}
RETURN_IF_ERROR(ValidateEpsilon(epsilon_));
RETURN_IF_ERROR(ValidateDelta(delta_));
RETURN_IF_ERROR(ValidateBounds(lower_, upper_));
RETURN_IF_ERROR(
ValidateMaxPartitionsContributed(max_partitions_contributed_));
RETURN_IF_ERROR(
ValidateMaxContributionsPerPartition(max_contributions_per_partition_));
if (upper_.has_value() && lower_.has_value()) {
return BuildVarianceWithFixedBounds();
}
return BuildVarianceWithApproxBounds();
}
private:
absl::optional<double> epsilon_;
double delta_ = 0;
absl::optional<T> upper_;
absl::optional<T> lower_;
int max_partitions_contributed_ = 1;
int max_contributions_per_partition_ = 1;
std::unique_ptr<NumericalMechanismBuilder> mechanism_builder_ =
absl::make_unique<LaplaceMechanism::Builder>();
std::unique_ptr<ApproxBounds<T>> approx_bounds_;
absl::StatusOr<std::unique_ptr<BoundedVariance<T>>>
BuildVarianceWithFixedBounds() {
RETURN_IF_ERROR(CheckBounds(lower_.value(), upper_.value()));
ASSIGN_OR_RETURN(std::unique_ptr<NumericalMechanism> count_mechanism,
mechanism_builder_->Clone()
->SetEpsilon(epsilon_.value() / 3)
.SetL0Sensitivity(max_partitions_contributed_)
.SetLInfSensitivity(max_contributions_per_partition_)
.Build());
ASSIGN_OR_RETURN(
std::unique_ptr<NumericalMechanism> sum_mechanism,
BoundedVariance<T>::BuildSumMechanism(
mechanism_builder_->Clone(), epsilon_.value() / 3,
max_partitions_contributed_, max_contributions_per_partition_,
lower_.value(), upper_.value()));
ASSIGN_OR_RETURN(
std::unique_ptr<NumericalMechanism> sos_mechanism,
BoundedVariance<T>::BuildSumOfSquaresMechanism(
mechanism_builder_->Clone(), epsilon_.value() / 3,
max_partitions_contributed_, max_contributions_per_partition_,
lower_.value(), upper_.value()));
return absl::StatusOr<std::unique_ptr<BoundedVariance<T>>>(
absl::make_unique<BoundedVarianceWithFixedBounds<T>>(
epsilon_.value(), lower_.value(), upper_.value(),
std::move(count_mechanism), std::move(sum_mechanism),
std::move(sos_mechanism)));
}
absl::StatusOr<std::unique_ptr<BoundedVariance<T>>>
BuildVarianceWithApproxBounds() {
if (!approx_bounds_) {
ASSIGN_OR_RETURN(
approx_bounds_,
typename ApproxBounds<T>::Builder()
.SetEpsilon(epsilon_.value() / 2)
.SetLaplaceMechanism(mechanism_builder_->Clone())
.SetMaxContributionsPerPartition(max_contributions_per_partition_)
.SetMaxPartitionsContributed(max_partitions_contributed_)
.Build());
}
if (epsilon_.value() <= approx_bounds_->GetEpsilon()) {
return absl::InvalidArgumentError(absl::StrCat(
"Approx Bounds consumes more epsilon budget than available. Total "
"Epsilon: ",
epsilon_.value(),
" Approx Bounds Epsilon: ", approx_bounds_->GetEpsilon()));
}
// Budget calculation.
const double remaining_epsilon =
epsilon_.value() - approx_bounds_->GetEpsilon();
const double epsilon_for_count = remaining_epsilon / 3;
const double epsilon_for_sum = remaining_epsilon / 3;
const double epsilon_for_squares =
remaining_epsilon - epsilon_for_count - epsilon_for_sum;
ASSIGN_OR_RETURN(std::unique_ptr<NumericalMechanism> count_mechanism,
mechanism_builder_->Clone()
->SetEpsilon(epsilon_for_count)
.SetL0Sensitivity(max_partitions_contributed_)
.SetLInfSensitivity(max_contributions_per_partition_)
.Build());
return absl::StatusOr<std::unique_ptr<BoundedVariance<T>>>(
absl::make_unique<BoundedVarianceWithApproxBounds<T>>(
epsilon_.value(), epsilon_for_sum, epsilon_for_squares,
max_partitions_contributed_, max_contributions_per_partition_,
mechanism_builder_->Clone(), std::move(count_mechanism),
std::move(approx_bounds_)));
}
};
} // namespace differential_privacy
#endif // DIFFERENTIAL_PRIVACY_ALGORITHMS_BOUNDED_VARIANCE_H_