| // |
| // Copyright 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| |
| #ifndef DIFFERENTIAL_PRIVACY_ALGORITHMS_BOUNDED_VARIANCE_H_ |
| #define DIFFERENTIAL_PRIVACY_ALGORITHMS_BOUNDED_VARIANCE_H_ |
| |
| #include <algorithm> |
| #include <cmath> |
| #include <cstdlib> |
| #include <limits> |
| #include <memory> |
| #include <optional> |
| #include <string> |
| #include <type_traits> |
| #include <utility> |
| #include <vector> |
| |
| #include <cstdint> |
| #include "base/logging.h" |
| #include "google/protobuf/any.pb.h" |
| #include "absl/memory/memory.h" |
| #include "absl/status/status.h" |
| #include "absl/status/statusor.h" |
| #include "absl/strings/str_cat.h" |
| #include "algorithms/algorithm.h" |
| #include "algorithms/approx-bounds.h" |
| #include "algorithms/bounded-algorithm.h" |
| #include "algorithms/numerical-mechanisms.h" |
| #include "algorithms/util.h" |
| #include "proto/util.h" |
| #include "proto/data.pb.h" |
| #include "proto/summary.pb.h" |
| #include "base/status_macros.h" |
| |
| namespace differential_privacy { |
| |
| // Incrementally provides a differentially private variance for values in the |
| // range [lower..upper]. Values outside of this range will be clamped so they |
| // lie in the range. The output will also be clamped between 0 and (upper - |
| // lower)^2 / 4. Since the result is guaranteed to be positive, this algorithm |
| // can be used to compute a differentially private standard deviation. |
| // |
| // The algorithm uses O(1) memory and runs in O(n) time where n is the size of |
| // the dataset, making it a fast and efficient. The amount of noise added grows |
| // quadratically in (upper - lower) and decreases linearly in n, so it might not |
| // produce good results unless n >> (upper - lower)^2. |
| // |
| // The algorithm is a variation of the algorithm for differentially private mean |
| // from "Differential Privacy: From Theory to Practice", section 2.5.5: |
| // https://books.google.com/books?id=WFttDQAAQBAJ&pg=PA24#v=onepage&q&f=false |
| template <typename T> |
| class BoundedVariance : public Algorithm<T> { |
| static_assert(std::is_arithmetic<T>::value, |
| "BoundedVariance can only be used for arithmetic types"); |
| |
| public: |
| // Builder for BoundedVariance algorithm. |
| class Builder; |
| |
| BoundedVariance(const double epsilon) : Algorithm<T>(epsilon) {} |
| |
| virtual ~BoundedVariance() = default; |
| |
| // Returns the epsilon used to calculate approximate bounds. If approximate |
| // bounds are not used, returns 0. |
| virtual double GetBoundingEpsilon() const = 0; |
| |
| // Returns the epsilon used to calculate the noisy mean. If bounds are |
| // specified explicitly, this will be the total epsilon used by the algorithm. |
| virtual double GetAggregationEpsilon() const = 0; |
| |
| protected: |
| virtual void AddMultipleEntries(const T& input, int64_t num_of_entries) = 0; |
| |
| // For integral type, check for overflow in subtraction squared. |
| template <typename T2 = T, |
| std::enable_if_t<std::is_integral<T2>::value>* = nullptr> |
| static absl::Status CheckBounds(T lower, T upper) { |
| if (lower > upper) { |
| return absl::InvalidArgumentError("Lower cannot be greater than upper."); |
| } |
| SafeOpResult<T> subtract_result = SafeSubtract(upper, lower); |
| SafeOpResult<T> safe_square_result = SafeSquare(subtract_result.value); |
| if (subtract_result.overflow || safe_square_result.overflow) { |
| return absl::InvalidArgumentError( |
| "Sensitivity calculation caused integer overflow."); |
| } |
| if (upper > std::sqrt(std::numeric_limits<T>::max()) || |
| lower < -1 * std::sqrt(std::numeric_limits<T>::max())) { |
| return absl::InvalidArgumentError("Squaring the bounds caused overflow."); |
| } |
| return absl::OkStatus(); |
| } |
| |
| template <typename T2 = T, |
| std::enable_if_t<std::is_floating_point<T2>::value>* = nullptr> |
| static absl::Status CheckBounds(T lower, T upper) { |
| if (lower > upper) { |
| return absl::InvalidArgumentError("Lower cannot be greater than upper."); |
| } |
| return absl::OkStatus(); |
| } |
| |
| // Returns the width of the range of f(x) = x^2 where the domain of f is |
| // [lower, upper]. |
| static double RangeOfSquares(T lower, T upper) { |
| if (0 > lower && 0 < upper) { |
| return std::max(lower * lower, upper * upper); |
| } |
| return std::abs(upper * upper - lower * lower); |
| } |
| |
| static absl::StatusOr<std::unique_ptr<NumericalMechanism>> BuildSumMechanism( |
| std::unique_ptr<NumericalMechanismBuilder> mechanism_builder, |
| const double epsilon, const double l0_sensitivity, |
| const double max_contributions_per_partition, const T lower, |
| const T upper) { |
| return mechanism_builder->SetEpsilon(epsilon) |
| .SetL0Sensitivity(l0_sensitivity) |
| .SetLInfSensitivity(max_contributions_per_partition * |
| static_cast<double>(upper - lower) / 2.0) |
| .Build(); |
| } |
| |
| static absl::StatusOr<std::unique_ptr<NumericalMechanism>> |
| BuildSumOfSquaresMechanism( |
| std::unique_ptr<NumericalMechanismBuilder> mechanism_builder, |
| const double epsilon, const double l0_sensitivity, |
| const double max_contributions_per_partition, const T lower, |
| const T upper) { |
| return mechanism_builder->SetEpsilon(epsilon) |
| .SetL0Sensitivity(l0_sensitivity) |
| .SetLInfSensitivity(max_contributions_per_partition * |
| (RangeOfSquares(lower, upper) / 2)) |
| .Build(); |
| } |
| |
| static double IntervalLengthSquared(T lower, T upper) { |
| return std::pow(static_cast<double>(upper - lower), 2); |
| } |
| |
| // Returns the midpoint of the range of f(x) = x^2 where the domains of f is |
| // [lower, upper]. |
| static double MidpointOfSquares(T lower, T upper) { |
| DCHECK_GE(upper, lower); |
| if (0 > lower && 0 < upper) { |
| return std::max(lower * lower, upper * upper) / 2; |
| } |
| return lower * lower + (upper * upper - lower * lower) / 2; |
| } |
| |
| // Friend class for testing only |
| friend class BoundedVarianceTestPeer; |
| }; |
| |
| // Bounded variance implementation that uses fixed bounds. |
| template <typename T> |
| class BoundedVarianceWithFixedBounds : public BoundedVariance<T> { |
| public: |
| BoundedVarianceWithFixedBounds( |
| const double epsilon, const T lower, const T upper, |
| std::unique_ptr<NumericalMechanism> count_mechanism, |
| std::unique_ptr<NumericalMechanism> sum_mechanism, |
| std::unique_ptr<NumericalMechanism> sum_of_squares_mechanism) |
| : BoundedVariance<T>(epsilon), |
| lower_(lower), |
| upper_(upper), |
| count_mechanism_(std::move(count_mechanism)), |
| sum_mechanism_(std::move(sum_mechanism)), |
| sum_of_squares_mechanism_(std::move(sum_of_squares_mechanism)) {} |
| |
| void AddEntry(const T& t) override { AddMultipleEntries(t, 1); } |
| |
| Summary Serialize() const override { |
| BoundedVarianceSummary variance_summary; |
| variance_summary.set_count(partial_count_); |
| SetValue(variance_summary.add_pos_sum(), partial_sum_); |
| variance_summary.add_pos_sum_of_squares(partial_sum_of_squares_); |
| |
| // Pack variance summary into summary. |
| Summary summary; |
| summary.mutable_data()->PackFrom(variance_summary); |
| return summary; |
| } |
| |
| absl::Status Merge(const Summary& summary) override { |
| if (!summary.has_data()) { |
| return absl::InternalError( |
| "Cannot merge summary with no bounded variance data."); |
| } |
| |
| // Unpack bounded variance summary. |
| BoundedVarianceSummary variance_summary; |
| if (!summary.data().UnpackTo(&variance_summary)) { |
| return absl::InternalError( |
| "Bounded variance summary unable to be unpacked."); |
| } |
| |
| // Check for expected sizes of repeated fields. |
| if (variance_summary.pos_sum_size() != 1) { |
| return absl::InternalError( |
| absl::StrCat("Expected positive sums of size exactly 1 but got ", |
| variance_summary.pos_sum_size())); |
| } |
| if (variance_summary.pos_sum_of_squares_size() != 1) { |
| return absl::InternalError(absl::StrCat( |
| "Expected positive sum of squares of size exactly 1 but got ", |
| variance_summary.pos_sum_of_squares_size())); |
| } |
| |
| // Verification successful. Merge fields. |
| partial_count_ += variance_summary.count(); |
| partial_sum_ += GetValue<T>(variance_summary.pos_sum(0)); |
| partial_sum_of_squares_ += variance_summary.pos_sum_of_squares(0); |
| |
| return absl::OkStatus(); |
| } |
| |
| int64_t MemoryUsed() override { |
| return sizeof(BoundedVarianceWithFixedBounds) + |
| count_mechanism_->MemoryUsed() + sum_mechanism_->MemoryUsed() + |
| sum_of_squares_mechanism_->MemoryUsed(); |
| } |
| |
| double GetBoundingEpsilon() const override { return 0; } |
| |
| double GetAggregationEpsilon() const override { |
| return Algorithm<T>::GetEpsilon(); |
| } |
| |
| protected: |
| absl::StatusOr<Output> GenerateResult(double noise_interval_level) override { |
| const double sum_midpoint = lower_ + ((upper_ - lower_) / 2); |
| const double sum_of_squares_midpoint = |
| BoundedVariance<T>::MidpointOfSquares(lower_, upper_); |
| |
| const double noised_count = count_mechanism_->AddNoise(partial_count_); |
| const double noised_normalized_sum = sum_mechanism_->AddNoise( |
| partial_sum_ - (partial_count_ * sum_midpoint)); |
| const double noised_normalized_sum_of_squares = |
| sum_of_squares_mechanism_->AddNoise( |
| partial_sum_of_squares_ - |
| (partial_count_ * sum_of_squares_midpoint)); |
| |
| double mean; |
| double mean_of_squares; |
| if (noised_count <= 1) { |
| mean = sum_midpoint; |
| mean_of_squares = sum_of_squares_midpoint; |
| } else { |
| mean = (noised_normalized_sum / noised_count) + sum_midpoint; |
| mean_of_squares = (noised_normalized_sum_of_squares / noised_count) + |
| sum_of_squares_midpoint; |
| } |
| |
| const double noised_variance = mean_of_squares - (mean * mean); |
| |
| Output output; |
| AddToOutput<double>( |
| &output, |
| Clamp<double>( |
| 0, BoundedVariance<T>::IntervalLengthSquared(lower_, upper_) / 4, |
| noised_variance)); |
| return output; |
| } |
| |
| void ResetState() override { |
| partial_count_ = 0; |
| partial_sum_ = 0; |
| partial_sum_of_squares_ = 0; |
| } |
| |
| void AddMultipleEntries(const T& input, int64_t num_of_entries) override { |
| absl::Status status = |
| ValidateIsPositive(num_of_entries, "Number of entries"); |
| if (std::isnan(static_cast<double>(input)) || !status.ok()) { |
| return; |
| } |
| partial_count_ += num_of_entries; |
| const T clamped_input = Clamp<T>(lower_, upper_, input); |
| partial_sum_ += clamped_input * num_of_entries; |
| partial_sum_of_squares_ += std::pow(clamped_input, 2) * num_of_entries; |
| } |
| |
| private: |
| const T lower_; |
| const T upper_; |
| |
| std::unique_ptr<NumericalMechanism> count_mechanism_; |
| std::unique_ptr<NumericalMechanism> sum_mechanism_; |
| std::unique_ptr<NumericalMechanism> sum_of_squares_mechanism_; |
| |
| int64_t partial_count_ = 0; |
| T partial_sum_ = 0; |
| double partial_sum_of_squares_ = 0; |
| }; |
| |
| template <typename T> |
| class BoundedVarianceWithApproxBounds : public BoundedVariance<T> { |
| public: |
| BoundedVarianceWithApproxBounds( |
| const double epsilon, const double epsilon_for_sum, |
| const double epsilon_for_squares, const double l0_sensitivity, |
| const int max_contributions_per_partition, |
| std::unique_ptr<NumericalMechanismBuilder> mechanism_builder, |
| std::unique_ptr<NumericalMechanism> count_mechanism, |
| std::unique_ptr<ApproxBounds<T>> approx_bounds) |
| : BoundedVariance<T>(epsilon), |
| epsilon_for_sum_(epsilon_for_sum), |
| epsilon_for_squares_(epsilon_for_squares), |
| mechanism_builder_(std::move(mechanism_builder)), |
| l0_sensitivity_(l0_sensitivity), |
| max_contributions_per_partition_(max_contributions_per_partition), |
| count_mechanism_(std::move(count_mechanism)), |
| approx_bounds_(std::move(approx_bounds)) { |
| // To determining bounds, we need partial values for each bin of the |
| // ApproxBounds logarithmic histogram. |
| pos_sum_.resize(approx_bounds_->NumPositiveBins(), 0); |
| neg_sum_.resize(approx_bounds_->NumPositiveBins(), 0); |
| pos_sum_of_squares_.resize(approx_bounds_->NumPositiveBins(), 0); |
| neg_sum_of_squares_.resize(approx_bounds_->NumPositiveBins(), 0); |
| } |
| |
| void AddEntry(const T& t) override { AddMultipleEntries(t, 1); } |
| |
| Summary Serialize() const override { |
| // Create BoundedVarianceSummary. |
| BoundedVarianceSummary bv_summary; |
| bv_summary.set_count(partial_count_); |
| for (T x : pos_sum_) { |
| SetValue(bv_summary.add_pos_sum(), x); |
| } |
| for (T x : neg_sum_) { |
| SetValue(bv_summary.add_neg_sum(), x); |
| } |
| for (double x : pos_sum_of_squares_) { |
| bv_summary.add_pos_sum_of_squares(x); |
| } |
| for (double x : neg_sum_of_squares_) { |
| bv_summary.add_neg_sum_of_squares(x); |
| } |
| |
| // Serialize approx bounds data. |
| Summary approx_bounds_summary = approx_bounds_->Serialize(); |
| approx_bounds_summary.data().UnpackTo(bv_summary.mutable_bounds_summary()); |
| |
| // Create Summary. |
| Summary summary; |
| summary.mutable_data()->PackFrom(bv_summary); |
| return summary; |
| } |
| |
| absl::Status Merge(const Summary& summary) override { |
| if (!summary.has_data()) { |
| return absl::InternalError( |
| "Cannot merge summary with no bounded variance data."); |
| } |
| |
| // Unpack bounded variance summary. |
| BoundedVarianceSummary bv_summary; |
| if (!summary.data().UnpackTo(&bv_summary)) { |
| return absl::InternalError( |
| "Bounded variance summary unable to be unpacked."); |
| } |
| if ((approx_bounds_ != nullptr) != bv_summary.has_bounds_summary()) { |
| return absl::InternalError( |
| "Merged BoundedVariance must have the same bounding strategy."); |
| } |
| if (pos_sum_.size() != bv_summary.pos_sum_size() || |
| neg_sum_.size() != bv_summary.neg_sum_size() || |
| pos_sum_of_squares_.size() != bv_summary.pos_sum_of_squares_size() || |
| neg_sum_of_squares_.size() != bv_summary.neg_sum_of_squares_size()) { |
| return absl::InternalError( |
| "Merged BoundedVariance must have the same amount of partial " |
| "sum or sum of squares values as this BoundedVariance."); |
| } |
| |
| // Merge approx bounds |
| Summary approx_bounds_summary; |
| approx_bounds_summary.mutable_data()->PackFrom(bv_summary.bounds_summary()); |
| RETURN_IF_ERROR(approx_bounds_->Merge(approx_bounds_summary)); |
| |
| // Add count and partial values to current ones. |
| partial_count_ += bv_summary.count(); |
| for (int i = 0; i < pos_sum_.size(); ++i) { |
| pos_sum_[i] += GetValue<T>(bv_summary.pos_sum(i)); |
| pos_sum_of_squares_[i] += bv_summary.pos_sum_of_squares(i); |
| } |
| for (int i = 0; i < neg_sum_.size(); ++i) { |
| neg_sum_[i] += GetValue<T>(bv_summary.neg_sum(i)); |
| neg_sum_of_squares_[i] += bv_summary.neg_sum_of_squares(i); |
| } |
| |
| return absl::OkStatus(); |
| } |
| |
| int64_t MemoryUsed() override { |
| int64_t memory = sizeof(BoundedVarianceWithApproxBounds<T>); |
| memory += sizeof(T) * (pos_sum_.capacity() + neg_sum_.capacity()); |
| memory += sizeof(double) * |
| (pos_sum_of_squares_.capacity() + neg_sum_of_squares_.capacity()); |
| memory += sizeof(*mechanism_builder_); |
| memory += approx_bounds_->MemoryUsed(); |
| return memory; |
| } |
| |
| // Returns the epsilon used to calculate approximate bounds. |
| double GetBoundingEpsilon() const override { |
| return approx_bounds_->GetEpsilon(); |
| } |
| |
| // Returns the epsilon used to calculate the noisy mean. |
| double GetAggregationEpsilon() const override { |
| return Algorithm<T>::GetEpsilon() - GetBoundingEpsilon(); |
| } |
| |
| // Returns a pointer to the ApproxBounds object. Does not transfer |
| // ownsership. Only use for testing. |
| ApproxBounds<T>* GetApproxBoundsForTesting() { return approx_bounds_.get(); } |
| |
| private: |
| absl::StatusOr<Output> GenerateResult(double noise_interval_level) override { |
| Output output; |
| |
| ASSIGN_OR_RETURN(Output bounds, |
| approx_bounds_->PartialResult(noise_interval_level)); |
| const T lower = GetValue<T>(bounds.elements(0).value()); |
| const T upper = GetValue<T>(bounds.elements(1).value()); |
| RETURN_IF_ERROR(BoundedVariance<T>::CheckBounds(lower, upper)); |
| |
| // To find the sum, pass the identity function as the transform. |
| ASSIGN_OR_RETURN(const double sum, |
| approx_bounds_->template ComputeFromPartials<T>( |
| pos_sum_, neg_sum_, [](T x) { return x; }, lower, |
| upper, partial_count_)); |
| |
| // To find sum of squares, pass the square function. |
| ASSIGN_OR_RETURN( |
| const double sum_of_squares, |
| approx_bounds_->template ComputeFromPartials<double>( |
| pos_sum_of_squares_, neg_sum_of_squares_, [](T x) { return x * x; }, |
| lower, upper, partial_count_)); |
| |
| // Populate the bounding report with ApproxBounds information. |
| *(output.mutable_error_report()->mutable_bounding_report()) = |
| approx_bounds_->GetBoundingReport(lower, upper); |
| |
| const double noised_count = count_mechanism_->AddNoise(partial_count_); |
| |
| // Calculate noised normalized sum |
| const T sum_midpoint = lower + ((upper - lower) / 2); |
| ASSIGN_OR_RETURN( |
| std::unique_ptr<NumericalMechanism> sum_mechanism, |
| BoundedVariance<T>::BuildSumMechanism( |
| mechanism_builder_->Clone(), epsilon_for_sum_, l0_sensitivity_, |
| max_contributions_per_partition_, lower, upper)); |
| const double noised_normalized_sum = |
| sum_mechanism->AddNoise(sum - (partial_count_ * sum_midpoint)); |
| |
| // Calculate noised normalized sum of squares. |
| const double sum_of_squares_midpoint = |
| BoundedVariance<T>::MidpointOfSquares(lower, upper); |
| ASSIGN_OR_RETURN( |
| std::unique_ptr<NumericalMechanism> sum_of_squares_mechanism, |
| BoundedVariance<T>::BuildSumOfSquaresMechanism( |
| mechanism_builder_->Clone(), epsilon_for_squares_, l0_sensitivity_, |
| max_contributions_per_partition_, lower, upper)); |
| const double noised_normalized_sum_of_squares = |
| sum_of_squares_mechanism->AddNoise( |
| sum_of_squares - (partial_count_ * sum_of_squares_midpoint)); |
| |
| // Calculate the result from the noised values. From this point everything |
| // should be post-processing. |
| double mean = sum_midpoint; |
| double mean_of_square = sum_of_squares_midpoint; |
| if (noised_count > 1) { |
| mean = noised_normalized_sum / noised_count + sum_midpoint; |
| mean_of_square = noised_normalized_sum_of_squares / noised_count + |
| sum_of_squares_midpoint; |
| } |
| |
| const double noised_variance = mean_of_square - std::pow(mean, 2); |
| |
| AddToOutput<double>( |
| &output, |
| Clamp<double>( |
| 0.0, BoundedVariance<T>::IntervalLengthSquared(lower, upper) / 4, |
| noised_variance)); |
| return output; |
| } |
| |
| void ResetState() override { |
| std::fill(pos_sum_.begin(), pos_sum_.end(), 0); |
| std::fill(pos_sum_of_squares_.begin(), pos_sum_of_squares_.end(), 0); |
| std::fill(neg_sum_.begin(), neg_sum_.end(), 0); |
| std::fill(neg_sum_of_squares_.begin(), neg_sum_of_squares_.end(), 0); |
| partial_count_ = 0; |
| approx_bounds_->Reset(); |
| } |
| |
| void AddMultipleEntries(const T& input, int64_t num_of_entries) override { |
| // Drop value if it is NaN. |
| // REF: |
| // https://stackoverflow.com/questions/61646166/how-to-resolve-fpclassify-ambiguous-call-to-overloaded-function |
| absl::Status status = |
| ValidateIsPositive(num_of_entries, "Number of entries"); |
| if (std::isnan(static_cast<double>(input)) || !status.ok()) { |
| return; |
| } |
| |
| // Count is unaffected by clamping. |
| partial_count_ += num_of_entries; |
| |
| // Store partial results and feed input into ApproxBounds algorithm. |
| approx_bounds_->AddMultipleEntries(input, num_of_entries); |
| |
| // Add to partial sums and sum of squares. |
| auto difference_of_squares = [](T val1, T val2) { |
| // Lessen the chance of becoming inf/-inf by calculating it like this. |
| return (static_cast<double>(val1) + val2) * |
| (static_cast<double>(val1) - val2); |
| }; |
| |
| if (input >= 0) { |
| approx_bounds_->template AddMultipleEntriesToPartialSums<T>( |
| &pos_sum_, input, num_of_entries); |
| approx_bounds_->template AddMultipleEntriesToPartials<double>( |
| &pos_sum_of_squares_, input, num_of_entries, difference_of_squares); |
| } else { |
| approx_bounds_->template AddMultipleEntriesToPartialSums<T>( |
| &neg_sum_, input, num_of_entries); |
| approx_bounds_->template AddMultipleEntriesToPartials<double>( |
| &neg_sum_of_squares_, input, num_of_entries, difference_of_squares); |
| } |
| } |
| |
| // Vectors of partial values stored for automatic clamping. |
| std::vector<T> pos_sum_, neg_sum_; |
| std::vector<double> pos_sum_of_squares_, neg_sum_of_squares_; |
| int64_t partial_count_ = 0; |
| |
| // Used to construct mechanism once bounds are obtained. |
| const double epsilon_for_sum_; |
| const double epsilon_for_squares_; |
| std::unique_ptr<NumericalMechanismBuilder> mechanism_builder_; |
| const double l0_sensitivity_; |
| const int max_contributions_per_partition_; |
| |
| std::unique_ptr<NumericalMechanism> count_mechanism_; |
| |
| // If this is not nullptr, we are automatically determining bounds. Otherwise, |
| // lower and upper contain the manually set bounds. |
| std::unique_ptr<ApproxBounds<T>> approx_bounds_; |
| }; |
| |
| template <typename T> |
| class BoundedVariance<T>::Builder { |
| public: |
| BoundedVariance<T>::Builder& SetEpsilon(double epsilon) { |
| epsilon_ = epsilon; |
| return *this; |
| } |
| |
| BoundedVariance<T>::Builder& SetDelta(double delta) { |
| delta_ = delta; |
| return *this; |
| } |
| |
| BoundedVariance<T>::Builder& SetMaxPartitionsContributed( |
| int max_partitions_contributed) { |
| max_partitions_contributed_ = max_partitions_contributed; |
| return *this; |
| } |
| |
| BoundedVariance<T>::Builder& SetMaxContributionsPerPartition( |
| int max_contributions_per_partition) { |
| max_contributions_per_partition_ = max_contributions_per_partition; |
| return *this; |
| } |
| |
| BoundedVariance<T>::Builder& SetUpper(T upper) { |
| upper_ = upper; |
| return *this; |
| } |
| |
| BoundedVariance<T>::Builder& SetLower(T lower) { |
| lower_ = lower; |
| return *this; |
| } |
| |
| BoundedVariance<T>::Builder& SetApproxBounds( |
| std::unique_ptr<ApproxBounds<T>> approx_bounds) { |
| approx_bounds_ = std::move(approx_bounds); |
| return *this; |
| } |
| |
| BoundedVariance<T>::Builder& SetLaplaceMechanism( |
| std::unique_ptr<NumericalMechanismBuilder> builder) { |
| mechanism_builder_ = std::move(builder); |
| return *this; |
| } |
| |
| absl::StatusOr<std::unique_ptr<BoundedVariance<T>>> Build() { |
| if (!epsilon_.has_value()) { |
| epsilon_ = DefaultEpsilon(); |
| LOG(WARNING) << "Default epsilon of " << epsilon_.value() |
| << " is being used. Consider setting your own epsilon based " |
| "on privacy considerations."; |
| } |
| RETURN_IF_ERROR(ValidateEpsilon(epsilon_)); |
| RETURN_IF_ERROR(ValidateDelta(delta_)); |
| RETURN_IF_ERROR(ValidateBounds(lower_, upper_)); |
| RETURN_IF_ERROR( |
| ValidateMaxPartitionsContributed(max_partitions_contributed_)); |
| RETURN_IF_ERROR( |
| ValidateMaxContributionsPerPartition(max_contributions_per_partition_)); |
| if (upper_.has_value() && lower_.has_value()) { |
| return BuildVarianceWithFixedBounds(); |
| } |
| return BuildVarianceWithApproxBounds(); |
| } |
| |
| private: |
| absl::optional<double> epsilon_; |
| double delta_ = 0; |
| absl::optional<T> upper_; |
| absl::optional<T> lower_; |
| int max_partitions_contributed_ = 1; |
| int max_contributions_per_partition_ = 1; |
| std::unique_ptr<NumericalMechanismBuilder> mechanism_builder_ = |
| absl::make_unique<LaplaceMechanism::Builder>(); |
| std::unique_ptr<ApproxBounds<T>> approx_bounds_; |
| |
| absl::StatusOr<std::unique_ptr<BoundedVariance<T>>> |
| BuildVarianceWithFixedBounds() { |
| RETURN_IF_ERROR(CheckBounds(lower_.value(), upper_.value())); |
| |
| ASSIGN_OR_RETURN(std::unique_ptr<NumericalMechanism> count_mechanism, |
| mechanism_builder_->Clone() |
| ->SetEpsilon(epsilon_.value() / 3) |
| .SetL0Sensitivity(max_partitions_contributed_) |
| .SetLInfSensitivity(max_contributions_per_partition_) |
| .Build()); |
| ASSIGN_OR_RETURN( |
| std::unique_ptr<NumericalMechanism> sum_mechanism, |
| BoundedVariance<T>::BuildSumMechanism( |
| mechanism_builder_->Clone(), epsilon_.value() / 3, |
| max_partitions_contributed_, max_contributions_per_partition_, |
| lower_.value(), upper_.value())); |
| ASSIGN_OR_RETURN( |
| std::unique_ptr<NumericalMechanism> sos_mechanism, |
| BoundedVariance<T>::BuildSumOfSquaresMechanism( |
| mechanism_builder_->Clone(), epsilon_.value() / 3, |
| max_partitions_contributed_, max_contributions_per_partition_, |
| lower_.value(), upper_.value())); |
| |
| return absl::StatusOr<std::unique_ptr<BoundedVariance<T>>>( |
| absl::make_unique<BoundedVarianceWithFixedBounds<T>>( |
| epsilon_.value(), lower_.value(), upper_.value(), |
| std::move(count_mechanism), std::move(sum_mechanism), |
| std::move(sos_mechanism))); |
| } |
| |
| absl::StatusOr<std::unique_ptr<BoundedVariance<T>>> |
| BuildVarianceWithApproxBounds() { |
| if (!approx_bounds_) { |
| ASSIGN_OR_RETURN( |
| approx_bounds_, |
| typename ApproxBounds<T>::Builder() |
| .SetEpsilon(epsilon_.value() / 2) |
| .SetLaplaceMechanism(mechanism_builder_->Clone()) |
| .SetMaxContributionsPerPartition(max_contributions_per_partition_) |
| .SetMaxPartitionsContributed(max_partitions_contributed_) |
| .Build()); |
| } |
| |
| if (epsilon_.value() <= approx_bounds_->GetEpsilon()) { |
| return absl::InvalidArgumentError(absl::StrCat( |
| "Approx Bounds consumes more epsilon budget than available. Total " |
| "Epsilon: ", |
| epsilon_.value(), |
| " Approx Bounds Epsilon: ", approx_bounds_->GetEpsilon())); |
| } |
| |
| // Budget calculation. |
| const double remaining_epsilon = |
| epsilon_.value() - approx_bounds_->GetEpsilon(); |
| |
| const double epsilon_for_count = remaining_epsilon / 3; |
| const double epsilon_for_sum = remaining_epsilon / 3; |
| const double epsilon_for_squares = |
| remaining_epsilon - epsilon_for_count - epsilon_for_sum; |
| |
| ASSIGN_OR_RETURN(std::unique_ptr<NumericalMechanism> count_mechanism, |
| mechanism_builder_->Clone() |
| ->SetEpsilon(epsilon_for_count) |
| .SetL0Sensitivity(max_partitions_contributed_) |
| .SetLInfSensitivity(max_contributions_per_partition_) |
| .Build()); |
| |
| return absl::StatusOr<std::unique_ptr<BoundedVariance<T>>>( |
| absl::make_unique<BoundedVarianceWithApproxBounds<T>>( |
| epsilon_.value(), epsilon_for_sum, epsilon_for_squares, |
| max_partitions_contributed_, max_contributions_per_partition_, |
| mechanism_builder_->Clone(), std::move(count_mechanism), |
| std::move(approx_bounds_))); |
| } |
| }; |
| |
| } // namespace differential_privacy |
| |
| #endif // DIFFERENTIAL_PRIVACY_ALGORITHMS_BOUNDED_VARIANCE_H_ |