| // |
| // Copyright 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| |
| #ifndef DIFFERENTIAL_PRIVACY_ALGORITHMS_BOUNDED_SUM_H_ |
| #define DIFFERENTIAL_PRIVACY_ALGORITHMS_BOUNDED_SUM_H_ |
| |
| #include <stdlib.h> |
| |
| #include <algorithm> |
| #include <cmath> |
| #include <limits> |
| #include <memory> |
| #include <optional> |
| #include <string> |
| #include <type_traits> |
| #include <utility> |
| #include <vector> |
| |
| #include <cstdint> |
| #include "base/logging.h" |
| #include "google/protobuf/any.pb.h" |
| #include "absl/memory/memory.h" |
| #include "absl/status/status.h" |
| #include "absl/status/statusor.h" |
| #include "absl/strings/str_cat.h" |
| #include "algorithms/algorithm.h" |
| #include "algorithms/approx-bounds.h" |
| #include "algorithms/bounded-algorithm.h" |
| #include "algorithms/numerical-mechanisms.h" |
| #include "algorithms/util.h" |
| #include "proto/util.h" |
| #include "proto/confidence-interval.pb.h" |
| #include "proto/data.pb.h" |
| #include "proto/summary.pb.h" |
| #include "base/status_macros.h" |
| |
| namespace differential_privacy { |
| |
| template <typename T> |
| class BoundedSum : public Algorithm<T> { |
| static_assert(std::is_arithmetic<T>::value, |
| "BoundedSum can only be used for arithmetic types"); |
| static_assert(std::numeric_limits<T>::lowest() < 0, |
| "BoundedSum can only be used for signed types"); |
| |
| public: |
| // Builder class that should be used to construct BoundedSum algorithms. |
| class Builder; |
| |
| BoundedSum(double epsilon, double delta) : Algorithm<T>(epsilon, delta) {} |
| |
| virtual ~BoundedSum() = default; |
| |
| // Returns the lower bound when it has been set. |
| virtual std::optional<T> lower() const = 0; |
| |
| // Returns the upper bound when it has been set. |
| virtual std::optional<T> upper() const = 0; |
| |
| protected: |
| // Check that bounds are appropriate. |
| static absl::Status CheckLowerBound(T lower) { |
| if (lower < -1 * std::numeric_limits<T>::max()) { |
| return absl::InvalidArgumentError( |
| "Lower bound cannot be higher in magnitude than the max " |
| "numeric limit. If manually bounding, please increase it by " |
| "at least 1."); |
| } |
| return absl::OkStatus(); |
| } |
| |
| // Build a numerical mechanism that will return adequate noise for the raw |
| // sum to make the result DP. |
| static absl::StatusOr<std::unique_ptr<NumericalMechanism>> BuildMechanism( |
| std::unique_ptr<NumericalMechanismBuilder> mechanism_builder, |
| const double epsilon, const double delta, const double l0_sensitivity, |
| const double max_contributions_per_partition, const T lower, |
| const T upper) { |
| return mechanism_builder->SetEpsilon(epsilon) |
| .SetDelta(delta) |
| .SetL0Sensitivity(l0_sensitivity) |
| .SetLInfSensitivity(max_contributions_per_partition * |
| std::max(std::abs(static_cast<double>(lower)), |
| std::abs(static_cast<double>(upper)))) |
| .Build(); |
| } |
| }; |
| |
| // Bounded sum implementation that uses fixed bounds. |
| template <typename T> |
| class BoundedSumWithFixedBounds : public BoundedSum<T> { |
| public: |
| BoundedSumWithFixedBounds(const double epsilon, const double delta, |
| const T lower, const T upper, |
| std::unique_ptr<NumericalMechanism> mechanism) |
| : BoundedSum<T>(epsilon, delta), |
| lower_(lower), |
| upper_(upper), |
| mechanism_(std::move(mechanism)) {} |
| |
| void AddEntry(const T& t) override { |
| if (std::isnan(static_cast<double>(t))) { |
| return; |
| } |
| partial_sum_ += Clamp<T>(lower_, upper_, t); |
| } |
| |
| Summary Serialize() const override { |
| BoundedSumSummary sum_summary; |
| // TODO: Use the partial_sum field of the proto. |
| SetValue(sum_summary.add_pos_sum(), partial_sum_); |
| |
| Summary result; |
| result.mutable_data()->PackFrom(sum_summary); |
| return result; |
| } |
| |
| absl::Status Merge(const Summary& summary) override { |
| if (!summary.has_data()) { |
| return absl::InternalError("Cannot merge summary with no data."); |
| } |
| |
| // Unpack sum summary |
| BoundedSumSummary sum_summary; |
| if (!summary.data().UnpackTo(&sum_summary)) { |
| return absl::InternalError("Bounded sum summary unable to be unpacked."); |
| } |
| |
| // Get required partial sum |
| // TODO: Use the partial_sum field of the proto. |
| if (sum_summary.pos_sum_size() != 1) { |
| return absl::InternalError(absl::StrCat( |
| "Bounded sum summary must have exactly one pos_sum but got ", |
| sum_summary.pos_sum_size())); |
| } |
| partial_sum_ += GetValue<T>(sum_summary.pos_sum(0)); |
| |
| return absl::Status(); |
| } |
| |
| int64_t MemoryUsed() override { |
| return sizeof(BoundedSumWithFixedBounds) + mechanism_->MemoryUsed(); |
| } |
| |
| std::optional<T> upper() const override { return upper_; } |
| |
| std::optional<T> lower() const override { return lower_; } |
| |
| absl::StatusOr<ConfidenceInterval> NoiseConfidenceInterval( |
| double confidence_level) override { |
| return mechanism_->NoiseConfidenceInterval(confidence_level); |
| } |
| |
| protected: |
| absl::StatusOr<Output> GenerateResult(double noise_interval_level) override { |
| Output output; |
| |
| // Add noise to the sum. |
| double noisy_sum = mechanism_->AddNoise(partial_sum_); |
| // Add noise confidence interval. |
| absl::StatusOr<ConfidenceInterval> interval = |
| NoiseConfidenceInterval(noise_interval_level); |
| |
| if (std::is_integral<T>::value) { |
| SafeOpResult<T> cast_result = |
| SafeCastFromDouble<T>(std::round(noisy_sum)); |
| if (interval.ok()) { |
| output = MakeOutput<T>(cast_result.value, interval.value()); |
| } else { |
| output = MakeOutput<T>(cast_result.value); |
| } |
| } else { |
| if (interval.ok()) { |
| output = MakeOutput<T>(noisy_sum, interval.value()); |
| } else { |
| output = MakeOutput<T>(noisy_sum); |
| } |
| } |
| |
| return output; |
| } |
| |
| void ResetState() override { partial_sum_ = 0; } |
| |
| private: |
| // Bounds |
| const T lower_; |
| const T upper_; |
| |
| // (Partially) aggregated sum |
| T partial_sum_ = 0; |
| |
| // Mechanism to add noise. |
| std::unique_ptr<NumericalMechanism> mechanism_; |
| }; |
| |
| // Bounded sum implementation using privately inferred bounds as a single-pass |
| // algorithm using ApproxBounds. |
| template <typename T> |
| class BoundedSumWithApproxBounds : public BoundedSum<T> { |
| public: |
| BoundedSumWithApproxBounds( |
| const double epsilon, const double delta, const double l0_sensitivity, |
| const double max_contributions_per_partition, |
| std::unique_ptr<NumericalMechanismBuilder> mechanism_builder, |
| std::unique_ptr<ApproxBounds<T>> approx_bounds) |
| : BoundedSum<T>(epsilon, delta), |
| mechanism_builder_(std::move(mechanism_builder)), |
| l0_sensitivity_(l0_sensitivity), |
| max_contributions_per_partition_(max_contributions_per_partition), |
| approx_bounds_(std::move(approx_bounds)) { |
| // We use partial values for each bin of the ApproxBounds logarithmic |
| // histogram. |
| pos_sum_.resize(approx_bounds_->NumPositiveBins(), 0); |
| neg_sum_.resize(approx_bounds_->NumPositiveBins(), 0); |
| } |
| |
| void AddEntry(const T& t) override { |
| // REF: |
| // https://stackoverflow.com/questions/61646166/how-to-resolve-fpclassify-ambiguous-call-to-overloaded-function |
| if (std::isnan(static_cast<double>(t))) { |
| return; |
| } |
| |
| approx_bounds_->AddEntry(t); |
| |
| // Find partial sums. |
| if (t >= 0) { |
| approx_bounds_->template AddToPartialSums<T>(&pos_sum_, t); |
| } else { |
| approx_bounds_->template AddToPartialSums<T>(&neg_sum_, t); |
| } |
| } |
| |
| // Noise confidence interval is not known before finalizing the algorithm as |
| // we are using approx bounds. |
| absl::StatusOr<ConfidenceInterval> NoiseConfidenceInterval( |
| double confidence_level) override { |
| return absl::InvalidArgumentError( |
| "NoiseConfidenceInterval changes per result generation for " |
| "automatically-determined sensitivity."); |
| } |
| |
| std::optional<T> lower() const override { return std::nullopt; } |
| std::optional<T> upper() const override { return std::nullopt; } |
| |
| Summary Serialize() const override { |
| // Create BoundedSumSummary. |
| BoundedSumSummary bs_summary; |
| for (T x : pos_sum_) { |
| SetValue(bs_summary.add_pos_sum(), x); |
| } |
| for (T x : neg_sum_) { |
| SetValue(bs_summary.add_neg_sum(), x); |
| } |
| Summary approx_bounds_summary = approx_bounds_->Serialize(); |
| approx_bounds_summary.data().UnpackTo(bs_summary.mutable_bounds_summary()); |
| |
| // Create Summary. |
| Summary summary; |
| summary.mutable_data()->PackFrom(bs_summary); |
| return summary; |
| } |
| |
| absl::Status Merge(const Summary& summary) override { |
| if (!summary.has_data()) { |
| return absl::InternalError( |
| "Cannot merge summary with no bounded sum data."); |
| } |
| |
| // Add bounded sum partial values. |
| BoundedSumSummary bs_summary; |
| if (!summary.data().UnpackTo(&bs_summary)) { |
| return absl::InternalError("Bounded sum summary unable to be unpacked."); |
| } |
| if (pos_sum_.size() != bs_summary.pos_sum_size() || |
| neg_sum_.size() != bs_summary.neg_sum_size()) { |
| return absl::InternalError( |
| "Merged BoundedSum must have the same amount of partial sum " |
| "values as this BoundedSum."); |
| } |
| for (int i = 0; i < pos_sum_.size(); ++i) { |
| pos_sum_[i] += GetValue<T>(bs_summary.pos_sum(i)); |
| } |
| for (int i = 0; i < neg_sum_.size(); ++i) { |
| neg_sum_[i] += GetValue<T>(bs_summary.neg_sum(i)); |
| } |
| |
| // Merge approx bounds summary. |
| Summary approx_bounds_summary; |
| approx_bounds_summary.mutable_data()->PackFrom(bs_summary.bounds_summary()); |
| RETURN_IF_ERROR(approx_bounds_->Merge(approx_bounds_summary)); |
| |
| return absl::OkStatus(); |
| } |
| |
| // Returns the epsilon used to calculate approximate bounds. |
| double GetBoundingEpsilon() const { return approx_bounds_->GetEpsilon(); } |
| |
| // Returns the epsilon used to calculate the noisy sum. The overall algorithm |
| // also uses epsilon for privately inferred bounds using approx bounds. |
| double GetAggregationEpsilon() const { |
| return Algorithm<T>::GetEpsilon() - approx_bounds_->GetEpsilon(); |
| } |
| |
| int64_t MemoryUsed() override { |
| int64_t memory = sizeof(BoundedSum<T>); |
| memory += sizeof(T) * (pos_sum_.capacity() + neg_sum_.capacity()); |
| memory += approx_bounds_->MemoryUsed(); |
| memory += sizeof(*mechanism_builder_); |
| return memory; |
| } |
| |
| // Use the following methods only for testing. |
| int GetMaxContributionsPerPartitionForTesting() { |
| return max_contributions_per_partition_; |
| } |
| |
| double GetL0SensitivityForTesting() { return l0_sensitivity_; } |
| |
| ApproxBounds<T>* GetApproxBoundsForTesting() { return approx_bounds_.get(); } |
| |
| protected: |
| absl::StatusOr<Output> GenerateResult(double noise_interval_level) override { |
| // Get results of approximate bounds. |
| ASSIGN_OR_RETURN(Output bounds, |
| approx_bounds_->PartialResult(noise_interval_level)); |
| const T approx_bounds_lower = GetValue<T>(bounds.elements(0).value()); |
| const T approx_bounds_upper = GetValue<T>(bounds.elements(1).value()); |
| |
| // Since sensitivity is determined only by the larger-magnitude bound, |
| // set the smaller-magnitude bound to be the negative of the larger. This |
| // minimizes clamping and so maximizes accuracy. We need to be careful with |
| // the numerical limits since -max == lowest + 1 for integers. |
| T lower = approx_bounds_lower; |
| T upper = approx_bounds_upper; |
| if (approx_bounds_lower == std::numeric_limits<T>::lowest()) { |
| upper = std::numeric_limits<T>::max(); |
| } else { |
| lower = std::min(approx_bounds_lower, -1 * approx_bounds_upper); |
| upper = std::max(approx_bounds_upper, -1 * approx_bounds_lower); |
| } |
| |
| // Construct NumericalMechanism. |
| ASSIGN_OR_RETURN(std::unique_ptr<NumericalMechanism> mechanism, |
| BoundedSum<T>::BuildMechanism( |
| mechanism_builder_->Clone(), GetAggregationEpsilon(), |
| Algorithm<T>::GetDelta(), l0_sensitivity_, |
| max_contributions_per_partition_, lower, upper)); |
| |
| // To find the sum, pass the identity function as the transform. We pass |
| // count = 0 because the count should never be used. |
| ASSIGN_OR_RETURN( |
| T sum, approx_bounds_->template ComputeFromPartials<T>( |
| pos_sum_, neg_sum_, [](T x) { return x; }, lower, upper, 0)); |
| |
| // Add noise and confidence interval to the sum output. Use the remaining |
| // privacy budget. |
| T noisy_sum = mechanism->AddNoise(sum); |
| absl::StatusOr<ConfidenceInterval> interval = |
| mechanism->NoiseConfidenceInterval(noise_interval_level); |
| |
| Output output; |
| |
| if (interval.ok()) { |
| output = MakeOutput<T>(noisy_sum, interval.value()); |
| } else { |
| output = MakeOutput<T>(noisy_sum); |
| } |
| |
| // Populate the bounding report with ApproxBounds information. |
| output.mutable_error_report()->set_allocated_bounding_report( |
| new BoundingReport(approx_bounds_->GetBoundingReport(lower, upper))); |
| |
| return output; |
| } |
| |
| void ResetState() override { |
| std::fill(pos_sum_.begin(), pos_sum_.end(), 0); |
| std::fill(neg_sum_.begin(), neg_sum_.end(), 0); |
| approx_bounds_->Reset(); |
| } |
| |
| private: |
| // Vectors of partial values stored for automatic clamping. |
| std::vector<T> pos_sum_, neg_sum_; |
| |
| // Used to construct the numerical mechanism once bounds are obtained. |
| std::unique_ptr<NumericalMechanismBuilder> mechanism_builder_; |
| const double l0_sensitivity_; |
| const int max_contributions_per_partition_; |
| |
| // Algorithm to privately infer bounds. |
| std::unique_ptr<ApproxBounds<T>> approx_bounds_; |
| }; |
| |
| template <typename T> |
| class BoundedSum<T>::Builder { |
| public: |
| BoundedSum<T>::Builder& SetEpsilon(double epsilon) { |
| epsilon_ = epsilon; |
| return *this; |
| } |
| |
| BoundedSum<T>::Builder& SetDelta(double delta) { |
| delta_ = delta; |
| return *this; |
| } |
| |
| BoundedSum<T>::Builder& SetMaxPartitionsContributed( |
| int max_partitions_contributed) { |
| max_partitions_contributed_ = max_partitions_contributed; |
| return *this; |
| } |
| |
| BoundedSum<T>::Builder& SetMaxContributionsPerPartition( |
| int max_contributions_per_partition) { |
| max_contributions_per_partition_ = max_contributions_per_partition; |
| return *this; |
| } |
| |
| BoundedSum<T>::Builder& SetUpper(T upper) { |
| upper_ = upper; |
| return *this; |
| } |
| |
| BoundedSum<T>::Builder& SetLower(T lower) { |
| lower_ = lower; |
| return *this; |
| } |
| |
| BoundedSum<T>::Builder& SetApproxBounds( |
| std::unique_ptr<ApproxBounds<T>> approx_bounds) { |
| approx_bounds_ = std::move(approx_bounds); |
| return *this; |
| } |
| |
| BoundedSum<T>::Builder& SetLaplaceMechanism( |
| std::unique_ptr<NumericalMechanismBuilder> builder) { |
| mechanism_builder_ = std::move(builder); |
| return *this; |
| } |
| |
| absl::StatusOr<std::unique_ptr<BoundedSum<T>>> Build() { |
| if (!epsilon_.has_value()) { |
| epsilon_ = DefaultEpsilon(); |
| LOG(WARNING) << "Default epsilon of " << epsilon_.value() |
| << " is being used. Consider setting your own epsilon based " |
| "on privacy considerations."; |
| } |
| RETURN_IF_ERROR(ValidateEpsilon(epsilon_)); |
| RETURN_IF_ERROR(ValidateDelta(delta_)); |
| RETURN_IF_ERROR(ValidateBounds(lower_, upper_)); |
| if (lower_.has_value()) { |
| RETURN_IF_ERROR(CheckLowerBound(lower_.value())); |
| } |
| RETURN_IF_ERROR( |
| ValidateMaxPartitionsContributed(max_partitions_contributed_)); |
| RETURN_IF_ERROR( |
| ValidateMaxContributionsPerPartition(max_contributions_per_partition_)); |
| if (upper_.has_value() && lower_.has_value()) { |
| return BuildSumWithFixedBounds(); |
| } |
| return BuildSumWithApproxBounds(); |
| } |
| |
| private: |
| absl::optional<double> epsilon_; |
| double delta_ = 0; |
| absl::optional<T> upper_; |
| absl::optional<T> lower_; |
| int max_partitions_contributed_ = 1; |
| int max_contributions_per_partition_ = 1; |
| std::unique_ptr<NumericalMechanismBuilder> mechanism_builder_ = |
| absl::make_unique<LaplaceMechanism::Builder>(); |
| std::unique_ptr<ApproxBounds<T>> approx_bounds_; |
| |
| absl::StatusOr<std::unique_ptr<BoundedSum<T>>> BuildSumWithFixedBounds() { |
| ASSIGN_OR_RETURN( |
| std::unique_ptr<NumericalMechanism> mechanism, |
| BuildMechanism(mechanism_builder_->Clone(), epsilon_.value(), delta_, |
| max_partitions_contributed_, |
| max_contributions_per_partition_, lower_.value(), |
| upper_.value())); |
| |
| return absl::StatusOr<std::unique_ptr<BoundedSum<T>>>( |
| absl::make_unique<BoundedSumWithFixedBounds<T>>( |
| epsilon_.value(), delta_, lower_.value(), upper_.value(), |
| std::move(mechanism))); |
| } |
| |
| absl::StatusOr<std::unique_ptr<BoundedSum<T>>> BuildSumWithApproxBounds() { |
| if (!approx_bounds_) { |
| ASSIGN_OR_RETURN( |
| approx_bounds_, |
| typename ApproxBounds<T>::Builder() |
| .SetEpsilon(epsilon_.value() / 2) |
| .SetLaplaceMechanism(mechanism_builder_->Clone()) |
| .SetMaxContributionsPerPartition(max_contributions_per_partition_) |
| .SetMaxPartitionsContributed(max_partitions_contributed_) |
| .Build()); |
| } |
| if (epsilon_.value() <= approx_bounds_->GetEpsilon()) { |
| return absl::InvalidArgumentError(absl::StrCat( |
| "Approx Bounds consumes more epsilon budget than available. Total " |
| "Epsilon: ", |
| epsilon_.value(), |
| " Approx Bounds Epsilon: ", approx_bounds_->GetEpsilon())); |
| } |
| |
| return absl::StatusOr<std::unique_ptr<BoundedSum<T>>>( |
| absl::make_unique<BoundedSumWithApproxBounds<T>>( |
| epsilon_.value(), delta_, max_partitions_contributed_, |
| max_contributions_per_partition_, mechanism_builder_->Clone(), |
| std::move(approx_bounds_))); |
| } |
| }; |
| |
| } // namespace differential_privacy |
| |
| #endif // DIFFERENTIAL_PRIVACY_ALGORITHMS_BOUNDED_SUM_H_ |