| // |
| // Copyright 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| |
| #ifndef DIFFERENTIAL_PRIVACY_ALGORITHMS_BOUNDED_SUM_H_ |
| #define DIFFERENTIAL_PRIVACY_ALGORITHMS_BOUNDED_SUM_H_ |
| |
| #include <limits> |
| #include <type_traits> |
| |
| #include "google/protobuf/any.pb.h" |
| #include "absl/memory/memory.h" |
| #include "absl/status/status.h" |
| #include "base/statusor.h" |
| #include "algorithms/algorithm.h" |
| #include "algorithms/approx-bounds.h" |
| #include "algorithms/bounded-algorithm.h" |
| #include "algorithms/numerical-mechanisms.h" |
| #include "algorithms/util.h" |
| #include "proto/summary.pb.h" |
| #include "base/canonical_errors.h" |
| |
| namespace differential_privacy { |
| |
| // Incrementally provides a differentially private sum, clamped between upper |
| // and lower values. Bounds can be manually set or privately inferred. |
| template <typename T, std::enable_if_t<std::is_arithmetic<T>::value>* = nullptr> |
| class BoundedSum : public Algorithm<T> { |
| public: |
| // Builder for BoundedSum algorithm. |
| class Builder : public BoundedAlgorithmBuilder<T, BoundedSum<T>, Builder> { |
| using AlgorithmBuilder = |
| differential_privacy::AlgorithmBuilder<T, BoundedSum<T>, Builder>; |
| using BoundedBuilder = BoundedAlgorithmBuilder<T, BoundedSum<T>, Builder>; |
| |
| public: |
| // Check that bounds are appropriate. |
| static absl::Status CheckLowerBound(T lower) { |
| if (lower < -1 * std::numeric_limits<T>::max()) { |
| return absl::InvalidArgumentError( |
| "Lower bound cannot be higher in magnitude than the max " |
| "numeric limit. If manually bounding, please increase it by " |
| "at least 1."); |
| } |
| return absl::OkStatus(); |
| } |
| |
| private: |
| base::StatusOr<std::unique_ptr<BoundedSum<T>>> BuildBoundedAlgorithm() |
| override { |
| // We have to check epsilon now, otherwise the split during ApproxBounds |
| // construction might make the error message confusing. |
| RETURN_IF_ERROR( |
| GetValueIfSetAndPositive(AlgorithmBuilder::GetEpsilon(), "Epsilon") |
| .status()); |
| |
| // Ensure that either bounds are manually set or ApproxBounds is made. |
| RETURN_IF_ERROR(BoundedBuilder::BoundsSetup()); |
| |
| // If manual bounding, construct mechanism so we can fail on build if |
| // sensitivity is inappropriate. |
| std::unique_ptr<NumericalMechanism> mechanism = nullptr; |
| if (BoundedBuilder::BoundsAreSet()) { |
| RETURN_IF_ERROR(CheckLowerBound(BoundedBuilder::GetLower().value())); |
| ASSIGN_OR_RETURN( |
| mechanism, |
| BuildMechanism( |
| AlgorithmBuilder::GetMechanismBuilderClone(), |
| BoundedBuilder::GetRemainingEpsilon().value(), |
| AlgorithmBuilder::GetMaxPartitionsContributed().value_or(1), |
| AlgorithmBuilder::GetMaxContributionsPerPartition().value_or(1), |
| BoundedBuilder::GetLower().value(), |
| BoundedBuilder::GetUpper().value())); |
| } |
| |
| // Construct BoundedSum. |
| auto mech_builder = AlgorithmBuilder::GetMechanismBuilderClone(); |
| return absl::WrapUnique(new BoundedSum( |
| BoundedBuilder::GetRemainingEpsilon().value(), |
| BoundedBuilder::GetLower().value_or(0), |
| BoundedBuilder::GetUpper().value_or(0), |
| AlgorithmBuilder::GetMaxPartitionsContributed().value_or(1), |
| AlgorithmBuilder::GetMaxContributionsPerPartition().value_or(1), |
| std::move(mech_builder), std::move(mechanism), |
| std::move(BoundedBuilder::MoveApproxBoundsPointer()))); |
| } |
| }; |
| |
| void AddEntry(const T& t) override { |
| // REF: |
| // https://stackoverflow.com/questions/61646166/how-to-resolve-fpclassify-ambiguous-call-to-overloaded-function |
| if (std::isnan(static_cast<double>(t))) { |
| return; |
| } |
| |
| // If manual bounds are set, clamp immediately and store sum. Otherwise, |
| // feed inputs into ApproxBounds and store temporary partial sums. |
| if (!approx_bounds_) { |
| SafeAdd(pos_sum_[0], Clamp<T>(lower_, upper_, t), &pos_sum_[0]); |
| } else { |
| approx_bounds_->AddEntry(t); |
| |
| // Find partial sums. |
| if (t >= 0) { |
| approx_bounds_->template AddToPartialSums<T>(&pos_sum_, t); |
| } else { |
| approx_bounds_->template AddToPartialSums<T>(&neg_sum_, t); |
| } |
| } |
| } |
| |
| // Only return noise confidence interval for manually set bounds, since it is |
| // dynamic upon result generation for auto-bounds. |
| base::StatusOr<ConfidenceInterval> NoiseConfidenceInterval( |
| double confidence_level, double privacy_budget = 1) override { |
| if (approx_bounds_) { |
| return absl::InvalidArgumentError( |
| "NoiseConfidenceInterval changes per result generation for " |
| "automatically-determined sensitivity."); |
| } |
| return NoiseConfidenceIntervalImpl(confidence_level, privacy_budget); |
| } |
| |
| T lower() { return lower_; } |
| T upper() { return upper_; } |
| |
| Summary Serialize() override { |
| // Create BoundedSumSummary. |
| BoundedSumSummary bs_summary; |
| for (T x : pos_sum_) { |
| SetValue(bs_summary.add_pos_sum(), x); |
| } |
| for (T x : neg_sum_) { |
| SetValue(bs_summary.add_neg_sum(), x); |
| } |
| if (approx_bounds_) { |
| Summary approx_bounds_summary = approx_bounds_->Serialize(); |
| approx_bounds_summary.data().UnpackTo( |
| bs_summary.mutable_bounds_summary()); |
| } |
| |
| // Create Summary. |
| Summary summary; |
| summary.mutable_data()->PackFrom(bs_summary); |
| return summary; |
| } |
| |
| absl::Status Merge(const Summary& summary) override { |
| if (!summary.has_data()) { |
| return absl::InternalError( |
| "Cannot merge summary with no bounded sum data."); |
| } |
| |
| // Add bounded sum partial values. |
| BoundedSumSummary bs_summary; |
| if (!summary.data().UnpackTo(&bs_summary)) { |
| return absl::InternalError("Bounded sum summary unable to be unpacked."); |
| } |
| if (pos_sum_.size() != bs_summary.pos_sum_size() || |
| neg_sum_.size() != bs_summary.neg_sum_size()) { |
| return absl::InternalError( |
| "Merged BoundedSum must have the same amount of partial sum " |
| "values as this BoundedSum."); |
| } |
| for (int i = 0; i < pos_sum_.size(); ++i) { |
| SafeAdd(pos_sum_[i], GetValue<T>(bs_summary.pos_sum(i)), &pos_sum_[i]); |
| } |
| for (int i = 0; i < neg_sum_.size(); ++i) { |
| SafeAdd(neg_sum_[i], GetValue<T>(bs_summary.neg_sum(i)), &neg_sum_[i]); |
| } |
| if (approx_bounds_) { |
| Summary approx_bounds_summary; |
| approx_bounds_summary.mutable_data()->PackFrom( |
| bs_summary.bounds_summary()); |
| RETURN_IF_ERROR(approx_bounds_->Merge(approx_bounds_summary)); |
| } |
| return absl::OkStatus(); |
| } |
| |
| double GetEpsilon() const override { |
| if (approx_bounds_) { |
| return approx_bounds_->GetEpsilon() + Algorithm<T>::GetEpsilon(); |
| } |
| return Algorithm<T>::GetEpsilon(); |
| } |
| |
| // Returns the epsilon used to calculate approximate bounds. If approximate |
| // bounds are not used, returns 0. |
| double GetBoundingEpsilon() const { |
| if (approx_bounds_) { |
| return approx_bounds_->GetEpsilon(); |
| } |
| return 0; |
| } |
| |
| // Returns the epsilon used to calculate the noisy mean. If bounds are |
| // specified explicitly, this will be the total epsilon used by the algorithm. |
| double GetAggregationEpsilon() const { return Algorithm<T>::GetEpsilon(); } |
| |
| int64_t MemoryUsed() override { |
| int64_t memory = sizeof(BoundedSum<T>) + |
| sizeof(T) * (pos_sum_.capacity() + neg_sum_.capacity()); |
| if (approx_bounds_) { |
| memory += approx_bounds_->MemoryUsed(); |
| } |
| if (mechanism_) { |
| memory += mechanism_->MemoryUsed(); |
| } |
| if (mechanism_builder_) { |
| memory += sizeof(*mechanism_builder_); |
| } |
| return memory; |
| } |
| |
| protected: |
| // Protected constructor to allow for testing. |
| BoundedSum(double epsilon, T lower, T upper, const double l0_sensitivity, |
| const double max_contributions_per_partition, |
| std::unique_ptr<NumericalMechanismBuilder> mechanism_builder, |
| std::unique_ptr<NumericalMechanism> mechanism, |
| std::unique_ptr<ApproxBounds<T>> approx_bounds = nullptr) |
| : Algorithm<T>(epsilon), |
| lower_(lower), |
| upper_(upper), |
| mechanism_builder_(std::move(mechanism_builder)), |
| l0_sensitivity_(l0_sensitivity), |
| max_contributions_per_partition_(max_contributions_per_partition), |
| mechanism_(std::move(mechanism)), |
| approx_bounds_(std::move(approx_bounds)) { |
| // If automatically determining bounds, we need partial values for each bin |
| // of the ApproxBounds logarithmic histogram. Otherwise, we only need to |
| // store one already-clamped value. |
| if (approx_bounds_) { |
| pos_sum_.resize(approx_bounds_->NumPositiveBins(), 0); |
| neg_sum_.resize(approx_bounds_->NumPositiveBins(), 0); |
| } else { |
| pos_sum_.push_back(0); |
| } |
| } |
| |
| base::StatusOr<Output> GenerateResult(double privacy_budget, |
| double noise_interval_level) override { |
| DCHECK_GT(privacy_budget, 0.0) |
| << "Privacy budget should be greater than zero."; |
| if (privacy_budget == 0.0) return Output(); |
| |
| Output output; |
| double sum = 0; |
| double remaining_budget = privacy_budget; |
| |
| if (approx_bounds_) { |
| // Use a fraction of the privacy budget to find the approximate bounds. |
| double bounds_budget = privacy_budget / 2; |
| remaining_budget -= bounds_budget; |
| ASSIGN_OR_RETURN(Output bounds, approx_bounds_->PartialResult( |
| bounds_budget, noise_interval_level)); |
| T lower = GetValue<T>(bounds.elements(0).value()); |
| T upper = GetValue<T>(bounds.elements(1).value()); |
| RETURN_IF_ERROR(Builder::CheckLowerBound(lower)); |
| |
| // Since sensitivity is determined only by the larger-magnitude bound, |
| // set the smaller-magnitude bound to be the negative of the larger. This |
| // minimizes clamping and so maximizes accuracy. |
| lower_ = std::min(lower, -1 * upper); |
| upper_ = std::max(upper, -1 * lower); |
| |
| // To find the sum, pass the identity function as the transform. We pass |
| // count = 0 because the count should never be used. |
| sum = approx_bounds_->template ComputeFromPartials<T>( |
| pos_sum_, neg_sum_, [](T x) { return x; }, lower_, upper_, 0); |
| |
| // Populate the bounding report with ApproxBounds information. |
| *(output.mutable_error_report()->mutable_bounding_report()) = |
| approx_bounds_->GetBoundingReport(lower_, upper_); |
| |
| // Clear the mechanism. The sensitivity might have changed. |
| mechanism_.reset(); |
| } else { |
| // Manual bounds were set and clamping was done upon adding entries. |
| sum = pos_sum_[0]; |
| } |
| |
| // Construct mechanism if needed. Mechanism is already constructed if |
| // NoiseConfidenceInterval() was called with manual bounds. |
| if (!mechanism_) { |
| ASSIGN_OR_RETURN( |
| mechanism_, |
| BuildMechanism(mechanism_builder_->Clone(), |
| Algorithm<T>::GetEpsilon(), l0_sensitivity_, |
| max_contributions_per_partition_, lower_, upper_)); |
| } |
| |
| // Add noise confidence interval to the error report. |
| base::StatusOr<ConfidenceInterval> interval = |
| NoiseConfidenceIntervalImpl(noise_interval_level, remaining_budget); |
| if (interval.ok()) { |
| *(output.mutable_error_report()->mutable_noise_confidence_interval()) = |
| interval.value(); |
| } |
| |
| // Add noise to sum. Use the remaining privacy budget. |
| double noisy_sum = mechanism_->AddNoise(sum, remaining_budget); |
| if (std::is_integral<T>::value) { |
| T value; |
| SafeCastFromDouble<T>(std::round(noisy_sum), value); |
| AddToOutput<T>(&output, value); |
| } else { |
| AddToOutput<T>(&output, noisy_sum); |
| } |
| return output; |
| } |
| |
| void ResetState() override { |
| std::fill(pos_sum_.begin(), pos_sum_.end(), 0); |
| std::fill(neg_sum_.begin(), neg_sum_.end(), 0); |
| if (approx_bounds_) { |
| approx_bounds_->Reset(); |
| mechanism_ = nullptr; |
| } |
| } |
| |
| private: |
| base::StatusOr<ConfidenceInterval> NoiseConfidenceIntervalImpl( |
| double confidence_level, double privacy_budget = 1) { |
| if (!mechanism_) { |
| return absl::InvalidArgumentError( |
| "Mechanism not yet constructed. Try getting noise confidence " |
| "interval after generating result."); |
| } |
| return mechanism_->NoiseConfidenceInterval(confidence_level, |
| privacy_budget); |
| } |
| |
| static base::StatusOr<std::unique_ptr<NumericalMechanism>> BuildMechanism( |
| std::unique_ptr<NumericalMechanismBuilder> mechanism_builder, |
| const double epsilon, const double l0_sensitivity, |
| const double max_contributions_per_partition, const T lower, |
| const T upper) { |
| return mechanism_builder->SetEpsilon(epsilon) |
| .SetL0Sensitivity(l0_sensitivity) |
| .SetLInfSensitivity(max_contributions_per_partition * |
| std::max(std::abs(lower), std::abs(upper))) |
| .Build(); |
| } |
| |
| // Vectors of partial values stored for automatic clamping. |
| std::vector<T> pos_sum_, neg_sum_; |
| |
| // If manually set, these values are determined upon construction. Otherwise, |
| // they are found in GenerateResult(). |
| T lower_, upper_; |
| |
| // Used to construct mechanism once bounds are obtained for auto-bounding. |
| std::unique_ptr<NumericalMechanismBuilder> mechanism_builder_; |
| const double l0_sensitivity_; |
| const int max_contributions_per_partition_; |
| |
| // Will be available upon BoundedSum for manual bounding, and constructed upon |
| // GenerateResult for auto-bounding. |
| std::unique_ptr<NumericalMechanism> mechanism_; |
| |
| // If this is not nullptr, we are automatically determining bounds. Otherwise, |
| // lower and upper contain the manually set bounds. |
| std::unique_ptr<ApproxBounds<T>> approx_bounds_; |
| }; |
| |
| } // namespace differential_privacy |
| |
| #endif // DIFFERENTIAL_PRIVACY_ALGORITHMS_BOUNDED_SUM_H_ |