| // |
| // Copyright 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| |
| #include "algorithms/bounded-variance.h" |
| |
| #include <cmath> |
| #include <functional> |
| #include <limits> |
| #include <memory> |
| #include <numeric> |
| #include <random> |
| #include <string> |
| #include <type_traits> |
| #include <utility> |
| #include <vector> |
| |
| #include <cstdint> |
| #include "base/logging.h" |
| #include "base/testing/proto_matchers.h" |
| #include "base/testing/status_matchers.h" |
| #include "gmock/gmock.h" |
| #include "gtest/gtest.h" |
| #include "absl/memory/memory.h" |
| #include "absl/random/distributions.h" |
| #include "absl/status/status.h" |
| #include "absl/status/statusor.h" |
| #include "algorithms/approx-bounds.h" |
| #include "algorithms/numerical-mechanisms-testing.h" |
| #include "algorithms/numerical-mechanisms.h" |
| #include "proto/util.h" |
| #include "proto/data.pb.h" |
| #include "proto/summary.pb.h" |
| |
| namespace differential_privacy { |
| |
| // Provides limited-scope static methods for interacting with a BoundedVariance |
| // object for testing purposes. |
| class BoundedVarianceTestPeer { |
| public: |
| template <typename T, |
| std::enable_if_t<std::is_arithmetic<T>::value>* = nullptr> |
| static void AddMultipleEntries(const T& t, int64_t num_of_entries, |
| BoundedVariance<T>* bv) { |
| bv->AddMultipleEntries(t, num_of_entries); |
| } |
| }; |
| |
| namespace { |
| |
| using test_utils::ZeroNoiseMechanism; |
| using ::testing::_; |
| using ::testing::DoubleEq; |
| using ::testing::DoubleNear; |
| using ::differential_privacy::base::testing::EqualsProto; |
| using ::testing::HasSubstr; |
| using ::testing::NotNull; |
| using ::differential_privacy::base::testing::StatusIs; |
| |
| constexpr double kSmallEpsilon = 0.00000001; |
| constexpr int64_t kNumSamples = 10000; |
| constexpr double kDefaultEpsilon = 1.1; |
| // Max upper bound (and negative lower bound) BoundedVariance will accept |
| // Used in overflow-related tests |
| const int64_t kSqrtInt64Max = std::sqrt(std::numeric_limits<int64_t>::max()); |
| |
| template <typename T> |
| class BoundedVarianceTest : public testing::Test { |
| protected: |
| void SetUp() override {} |
| |
| void TearDown() override {} |
| |
| private: |
| double Variance(std::vector<T> values) { |
| if (values.empty()) { |
| return 0; |
| } |
| int num_of_values = values.size(); |
| double mean = accumulate(values.begin(), values.end(), 0.0) / num_of_values; |
| double variance = 0; |
| for (int i = 0; i < num_of_values; ++i) { |
| variance += (values[i] - mean) * (values[i] - mean); |
| } |
| variance /= num_of_values; |
| return variance; |
| } |
| }; |
| |
| // Typed test to iterate all test cases through all supported versions of |
| // BoundedVarianceTest, currently (int64_t, double). |
| typedef ::testing::Types<int64_t, double> NumericTypes; |
| TYPED_TEST_SUITE(BoundedVarianceTest, NumericTypes); |
| |
| TYPED_TEST(BoundedVarianceTest, BasicTest) { |
| std::vector<TypeParam> a = {1, 2, 3, 4, 5}; |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(1.0) |
| .SetLower(0) |
| .SetUpper(6) |
| .Build(); |
| ASSERT_OK(bv); |
| absl::StatusOr<Output> result = (*bv)->Result(a.begin(), a.end()); |
| ASSERT_OK(result); |
| EXPECT_DOUBLE_EQ(GetValue<double>(*result), 2.0); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, BasicMultipleEntriesTest) { |
| std::vector<TypeParam> a = {1, 2, 3, 4, 5}; |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(1.0) |
| .SetLower(0) |
| .SetUpper(6) |
| .Build(); |
| ASSERT_OK(bv); |
| for (const auto& input : a) { |
| BoundedVarianceTestPeer::AddMultipleEntries<TypeParam>(input, input, |
| (*bv).get()); |
| } |
| absl::StatusOr<Output> result = (*bv)->PartialResult(); |
| ASSERT_OK(result); |
| EXPECT_NEAR(GetValue<double>(*result), 14.0 / 9.0, 0.0000001); |
| } |
| |
| TEST(BoundedVarianceTest, AddMultipleEntriesInvalidInputTest) { |
| absl::StatusOr<std::unique_ptr<BoundedVariance<float>>> bv = |
| typename BoundedVariance<float>::Builder() |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(1.0) |
| .SetLower(0) |
| .SetUpper(6) |
| .Build(); |
| ASSERT_OK(bv); |
| |
| // Add a few basic entries so we can expect a predictable variance. |
| std::vector<float> a = {1, 2, 3, 4, 5}; |
| for (const auto& input : a) { |
| BoundedVarianceTestPeer::AddMultipleEntries<float>(input, input, |
| (*bv).get()); |
| } |
| |
| BoundedVarianceTestPeer::AddMultipleEntries<float>( |
| std::numeric_limits<float>::quiet_NaN(), 1, (*bv).get()); |
| absl::StatusOr<Output> result = (*bv)->PartialResult(); |
| ASSERT_OK(result); |
| EXPECT_NEAR(GetValue<double>(*result), 14.0 / 9.0, 0.0000001); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, AddMultipleEntriesInvalidNumberOfEntriesTest) { |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(1.0) |
| .SetLower(0) |
| .SetUpper(6) |
| .Build(); |
| ASSERT_OK(bv); |
| |
| // Add a few basic entries so we can expect a predictable variance. |
| std::vector<TypeParam> a = {1, 2, 3, 4, 5}; |
| for (const auto& input : a) { |
| BoundedVarianceTestPeer::AddMultipleEntries<TypeParam>(input, input, |
| (*bv).get()); |
| } |
| |
| std::vector<int64_t> invalid_entries{0, -1, |
| std::numeric_limits<int64_t>::lowest()}; |
| for (int64_t n_entries : invalid_entries) { |
| BoundedVarianceTestPeer::AddMultipleEntries<TypeParam>(1, n_entries, |
| (*bv).get()); |
| } |
| |
| absl::StatusOr<Output> result = (*bv)->PartialResult(); |
| ASSERT_OK(result); |
| EXPECT_NEAR(GetValue<double>(*result), 14.0 / 9.0, 0.0000001); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, RepeatedResultTest) { |
| std::vector<TypeParam> a = {1, 2, 3, 4, 5}; |
| typename BoundedVariance<TypeParam>::Builder builder; |
| builder.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(1.0) |
| .SetLower(0) |
| .SetUpper(6); |
| |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv1 = |
| builder.Build(); |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv2 = |
| builder.Build(); |
| ASSERT_OK(bv1); |
| ASSERT_OK(bv2); |
| (*bv1)->AddEntries(a.begin(), a.end()); |
| (*bv2)->AddEntries(a.begin(), a.end()); |
| absl::StatusOr<Output> result1 = (*bv1)->PartialResult(0.5); |
| ASSERT_OK(result1); |
| absl::StatusOr<Output> result2 = (*bv2)->PartialResult(0.5); |
| ASSERT_OK(result2); |
| EXPECT_DOUBLE_EQ(GetValue<double>(*result1), GetValue<double>(*result2)); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, InsufficientPrivacyBudgetTest) { |
| std::vector<TypeParam> a = {1, 2, 3, 4, 5}; |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(1.0) |
| .SetLower(0) |
| .SetUpper(6) |
| .Build(); |
| ASSERT_OK(bv); |
| (*bv)->AddEntries(a.begin(), a.end()); |
| ASSERT_OK((*bv)->PartialResult()); |
| EXPECT_THAT((*bv)->PartialResult(), |
| StatusIs(absl::StatusCode::kInvalidArgument, |
| HasSubstr("can only produce results once"))); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, ClampInputTest) { |
| std::vector<TypeParam> a = {0, 0, 1, 1}; |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(1.0) |
| .SetLower(1) |
| .SetUpper(3) |
| .Build(); |
| ASSERT_OK(bv); |
| absl::StatusOr<Output> result = (*bv)->Result(a.begin(), a.end()); |
| ASSERT_OK(result); |
| // The input will be clamped to {1, 1, 1, 1} returning variance 0. |
| EXPECT_DOUBLE_EQ(GetValue<double>(*result), 0.0); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, ClampOutputLowerTest) { |
| std::vector<TypeParam> a = {1, 1, 1, 1}; |
| |
| for (int i = 0; i < kNumSamples; ++i) { |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetEpsilon(kSmallEpsilon) |
| .SetLower(1) |
| .SetUpper(2) |
| .Build(); |
| ASSERT_OK(bv); |
| absl::StatusOr<Output> result = (*bv)->Result(a.begin(), a.end()); |
| EXPECT_GE(GetValue<double>(*result), 0.0); |
| } |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, ClampOutputUpperTest) { |
| std::vector<TypeParam> a = {0, 10}; |
| |
| for (int i = 0; i < kNumSamples; ++i) { |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetEpsilon(kSmallEpsilon) |
| .SetLower(0) |
| .SetUpper(10) |
| .Build(); |
| ASSERT_OK(bv); |
| absl::StatusOr<Output> result = (*bv)->Result(a.begin(), a.end()); |
| ASSERT_OK(result); |
| EXPECT_LE(GetValue<double>(*result), 25.0); |
| } |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, EmptyInputsBoundsTest) { |
| std::vector<TypeParam> a; |
| double lower = 0; |
| double upper = 10; |
| // See header comment in BoundedVariance that states "The output will also be |
| // clamped between 0 and (upper - lower)^2." |
| double result_lower = 0; |
| double result_upper = std::pow(upper - lower, 2); |
| int num_of_trials = 100; // Chosen abitrarily |
| for (int i = 0; i < num_of_trials; ++i) { |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetLaplaceMechanism( |
| absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(1.0) |
| .SetLower(lower) |
| .SetUpper(upper) |
| .Build(); |
| ASSERT_OK(bv); |
| absl::StatusOr<Output> result = (*bv)->Result(a.begin(), a.end()); |
| ASSERT_OK(result); |
| EXPECT_GE(GetValue<double>(*result), result_lower); |
| EXPECT_LE(GetValue<double>(*result), result_upper); |
| } |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, GaussianInputTest) { |
| // Test samples points from a Gaussian and checks that the differentially |
| // private variance is the variance used to generate the distribution. |
| absl::StatusOr<std::unique_ptr<BoundedVariance<double>>> bv = |
| BoundedVariance<double>::Builder() |
| .SetLower(-20) |
| .SetUpper(20) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bv); |
| // Generate the large sample of points from a seeded Laplace distribution. |
| constexpr int samples = 100000; |
| std::mt19937 gen; |
| double stdev = 2; |
| for (int i = 0; i < samples; i++) { |
| (*bv)->AddEntry(std::round(absl::Gaussian<double>(gen, 0, stdev))); |
| } |
| absl::StatusOr<Output> result = (*bv)->PartialResult(); |
| ASSERT_OK(result); |
| EXPECT_NEAR(GetValue<double>(*result), stdev * stdev, 0.1); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, MaxContributionsVarianceTest) { |
| const std::vector<TypeParam> input = {-1, -1, 1, 1}; |
| |
| // Calculate variance of input. |
| const double real_mean = |
| std::accumulate(input.begin(), input.end(), 0.0) / input.size(); |
| double sum = 0; |
| for (const auto& i : input) { |
| sum += std::pow(i - real_mean, 2); |
| } |
| const double real_variance = sum / (input.size() - 1); |
| |
| std::function<double(int)> sample_variance_for_max_contribution = |
| [&input, real_variance](int max_contribution) { |
| double sum = 0; |
| for (int i = 0; i < kNumSamples; ++i) { |
| auto variance = typename BoundedVariance<TypeParam>::Builder() |
| .SetMaxContributionsPerPartition(max_contribution) |
| .SetEpsilon(1) |
| .SetLower(-1) |
| .SetUpper(1) |
| .Build(); |
| CHECK_EQ(variance.status(), absl::OkStatus()); |
| auto out = (*variance)->Result(input.begin(), input.end()); |
| CHECK_EQ(out.status(), absl::OkStatus()); |
| sum += std::pow(GetValue<double>(*out) - real_variance, 2); |
| } |
| return sum / (kNumSamples - 1); |
| }; |
| |
| // We expect the sample variance with max contribution 2 to be (significantly) |
| // bigger than with max contribution 1. |
| EXPECT_GT(sample_variance_for_max_contribution(2), |
| 1.1 * sample_variance_for_max_contribution(1)); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, MergeDifferentBoundingStrategy) { |
| // Manual bounding. |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv1 = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetLower(0) |
| .SetUpper(3) |
| .Build(); |
| ASSERT_OK(bv1); |
| Summary summary = (*bv1)->Serialize(); |
| |
| // Auto bounding. |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv2 = |
| typename BoundedVariance<TypeParam>::Builder().Build(); |
| ASSERT_OK(bv2); |
| |
| // Error due to different strategies. |
| ASSERT_THAT( |
| (*bv2)->Merge(summary), |
| StatusIs( |
| absl::StatusCode::kInternal, |
| HasSubstr( |
| "Merged BoundedVariance must have the same bounding strategy."))); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, SerializeMergeTest) { |
| // Get summary of first BoundedVariance between entries. |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv1 = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetEpsilon(.5) |
| .SetLower(0) |
| .SetUpper(3) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bv1); |
| (*bv1)->AddEntry(2); |
| Summary summary = (*bv1)->Serialize(); |
| (*bv1)->AddEntry(6); |
| |
| // Merge summary into second BoundedVariance. |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv2 = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetEpsilon(.5) |
| .SetLower(0) |
| .SetUpper(3) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bv2); |
| (*bv2)->AddEntry(6); |
| EXPECT_OK((*bv2)->Merge(summary)); |
| |
| // Check equality. |
| absl::StatusOr<Output> result1 = (*bv1)->PartialResult(); |
| ASSERT_OK(result1); |
| absl::StatusOr<Output> result2 = (*bv2)->PartialResult(); |
| ASSERT_OK(result2); |
| EXPECT_DOUBLE_EQ(GetValue<double>(*result1), GetValue<double>(*result2)); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, SerializeMergePartialValuesTest) { |
| typename ApproxBounds<TypeParam>::Builder bounds_builder; |
| typename BoundedVariance<TypeParam>::Builder builder; |
| |
| // Automatic bounding, so entries will be split and stored as partials. |
| absl::StatusOr<std::unique_ptr<ApproxBounds<TypeParam>>> bounds1 = |
| typename ApproxBounds<TypeParam>::Builder() |
| .SetThresholdForTest(0.5) |
| .SetNumBins(50) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(kDefaultEpsilon / 2) |
| .Build(); |
| ASSERT_OK(bounds1); |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv1 = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(kDefaultEpsilon) |
| .SetApproxBounds(std::move(bounds1).value()) |
| .Build(); |
| ASSERT_OK(bv1); |
| (*bv1)->AddEntry(-10); |
| (*bv1)->AddEntry(4); |
| Summary summary = (*bv1)->Serialize(); |
| (*bv1)->AddEntry(6); |
| |
| // Merge summary into second BoundedVariance. |
| absl::StatusOr<std::unique_ptr<ApproxBounds<TypeParam>>> bounds2 = |
| typename ApproxBounds<TypeParam>::Builder() |
| .SetThresholdForTest(0.5) |
| .SetNumBins(50) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(kDefaultEpsilon / 2) |
| .Build(); |
| ASSERT_OK(bounds2); |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv2 = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(kDefaultEpsilon) |
| .SetApproxBounds(std::move(bounds2).value()) |
| .Build(); |
| ASSERT_OK(bv2); |
| (*bv2)->AddEntry(6); |
| EXPECT_OK((*bv2)->Merge(summary)); |
| |
| // Check equality. Bounds are set to [-16, 8]. |
| absl::StatusOr<Output> result1 = (*bv1)->PartialResult(); |
| ASSERT_OK(result1); |
| absl::StatusOr<Output> result2 = (*bv2)->PartialResult(); |
| ASSERT_OK(result2); |
| EXPECT_DOUBLE_EQ(GetValue<double>(*result1), GetValue<double>(*result2)); |
| } |
| |
| TEST(BoundedVarianceTest, OverflowRawCountTest) { |
| typename BoundedVariance<double>::Builder builder; |
| |
| std::unique_ptr<BoundedVariance<double>> bv = |
| builder |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetLower(-1) |
| .SetUpper(1) |
| .Build() |
| .value(); |
| BoundedVarianceTestPeer::AddMultipleEntries<double>( |
| -0.5, std::numeric_limits<int64_t>::max(), bv.get()); |
| BoundedVarianceTestPeer::AddMultipleEntries<double>(-0.5, 1, bv.get()); |
| BoundedVarianceTestPeer::AddMultipleEntries<double>( |
| 0.5, std::numeric_limits<int64_t>::max(), bv.get()); |
| BoundedVarianceTestPeer::AddMultipleEntries<double>(0.5, 1, bv.get()); |
| BoundedVarianceTestPeer::AddMultipleEntries<double>(1, 5, bv.get()); |
| |
| auto result = bv->PartialResult(); |
| EXPECT_OK(result.status()); |
| // A partial_count_ overflow should result in a variance of 1.0. |
| EXPECT_DOUBLE_EQ(GetValue<double>(result.value()), 1.0); |
| } |
| |
| TEST(BoundedVarianceTest, OverflowAddEntryManualBounds) { |
| typename BoundedVariance<int32_t>::Builder builder; |
| |
| std::unique_ptr<BoundedVariance<int32_t>> bv = |
| builder |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetLower(-1) |
| .SetUpper(1) |
| .Build() |
| .value(); |
| |
| BoundedVarianceTestPeer::AddMultipleEntries<int32_t>( |
| 1, std::numeric_limits<int32_t>::max(), bv.get()); |
| BoundedVarianceTestPeer::AddMultipleEntries<int32_t>( |
| 1, std::numeric_limits<int32_t>::max(), bv.get()); |
| |
| auto result = bv->PartialResult(); |
| ASSERT_OK(result.status()); |
| // Overflow should result in a variance of 1.0. |
| EXPECT_DOUBLE_EQ(GetValue<double>(result.value()), 1.0); |
| } |
| |
| TEST(BoundedVarianceTest, UnderflowAddEntryManualBounds) { |
| typename BoundedVariance<int32_t>::Builder builder; |
| |
| std::unique_ptr<BoundedVariance<int32_t>> bv = |
| builder |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetLower(-1) |
| .SetUpper(1) |
| .Build() |
| .value(); |
| BoundedVarianceTestPeer::AddMultipleEntries<int32_t>( |
| -1, std::numeric_limits<int32_t>::max(), bv.get()); |
| BoundedVarianceTestPeer::AddMultipleEntries<int32_t>( |
| -1, std::numeric_limits<int32_t>::max(), bv.get()); |
| |
| auto result = bv->PartialResult(); |
| EXPECT_OK(result.status()); |
| // Overflow should result in a variance of 1.0 |
| EXPECT_DOUBLE_EQ(GetValue<double>(result.value()), 1.0); |
| } |
| |
| TEST(BoundedVarianceTest, OverflowRawCountMergeManualBoundsTest) { |
| typename BoundedVariance<double>::Builder builder; |
| |
| std::unique_ptr<BoundedVariance<double>> bv = |
| builder |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetLower(0) |
| .SetUpper(10) |
| .Build() |
| .value(); |
| BoundedVarianceTestPeer::AddMultipleEntries<double>( |
| 10, std::numeric_limits<int64_t>::max(), bv.get()); |
| |
| Summary summary = bv->Serialize(); |
| |
| std::unique_ptr<BoundedVariance<double>> bv2 = builder.Build().value(); |
| BoundedVarianceTestPeer::AddMultipleEntries<double>( |
| 10, std::numeric_limits<int64_t>::max(), bv.get()); |
| |
| EXPECT_OK(bv2->Merge(summary)); |
| |
| bv2->AddEntry(10); |
| bv2->AddEntry(10); |
| |
| auto result = bv2->PartialResult(); |
| EXPECT_OK(result.status()); |
| // An overflow should cause the count of entries wrap around to 1, which |
| // should result in the variance so large that it becomes clamped to |
| // IntervalLengthSquared(lower, upper) / 4, instead of based upon the actual |
| // data entries (which would be 0 if there was no count overflow, since all |
| // entries are the same and do not vary). |
| EXPECT_DOUBLE_EQ(GetValue<double>(result.value()), 25.0); |
| } |
| |
| TEST(BoundedVarianceTest, OverflowMergeManualBoundsTest) { |
| typename BoundedVariance<int32_t>::Builder builder; |
| |
| std::unique_ptr<BoundedVariance<int32_t>> bv = |
| builder |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetLower(-1) |
| .SetUpper(1) |
| .Build() |
| .value(); |
| BoundedVarianceTestPeer::AddMultipleEntries<int32_t>( |
| 1, std::numeric_limits<int32_t>::max(), bv.get()); |
| Summary summary = bv->Serialize(); |
| |
| std::unique_ptr<BoundedVariance<int32_t>> bv2 = builder.Build().value(); |
| BoundedVarianceTestPeer::AddMultipleEntries<int32_t>( |
| 1, std::numeric_limits<int32_t>::max(), bv2.get()); |
| |
| ASSERT_OK(bv2->Merge(summary)); |
| |
| auto result = bv2->PartialResult(); |
| ASSERT_OK(result.status()); |
| // Overflow should result in a variance of 1.0 |
| EXPECT_DOUBLE_EQ(GetValue<double>(result.value()), 1.0); |
| } |
| |
| TEST(BoundedVarianceTest, UnderflowMergeManualBoundsTest) { |
| typename BoundedVariance<int32_t>::Builder builder; |
| |
| std::unique_ptr<BoundedVariance<int32_t>> bv = |
| builder |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetLower(-1) |
| .SetUpper(1) |
| .Build() |
| .value(); |
| BoundedVarianceTestPeer::AddMultipleEntries<int32_t>( |
| -1, std::numeric_limits<int32_t>::max(), bv.get()); |
| Summary summary = bv->Serialize(); |
| |
| std::unique_ptr<BoundedVariance<int32_t>> bv2 = builder.Build().value(); |
| BoundedVarianceTestPeer::AddMultipleEntries<int32_t>( |
| -1, std::numeric_limits<int32_t>::max(), bv2.get()); |
| |
| EXPECT_OK(bv2->Merge(summary)); |
| |
| auto result = bv2->PartialResult(); |
| EXPECT_OK(result.status()); |
| // Underflow should result in a variance of 1.0 |
| EXPECT_DOUBLE_EQ(GetValue<double>(result.value()), 1.0); |
| } |
| |
| TEST(BoundedVarianceTest, SensitivityOverflow) { |
| absl::StatusOr<std::unique_ptr<BoundedVariance<int64_t>>> failed_bv = |
| BoundedVariance<int64_t>::Builder() |
| .SetEpsilon(1.0) |
| .SetLower(std::numeric_limits<int64_t>::lowest()) |
| .SetUpper(std::numeric_limits<int64_t>::max()) |
| .Build(); |
| EXPECT_THAT( |
| failed_bv, |
| StatusIs(absl::StatusCode::kInvalidArgument, |
| HasSubstr("Sensitivity calculation caused integer overflow."))); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, SensitivityTooHigh) { |
| // Make bounds so taking the interval squared won't overflow. |
| absl::StatusOr<std::unique_ptr<BoundedVariance<double>>> failed_bv = |
| BoundedVariance<double>::Builder() |
| .SetLower(0) |
| .SetUpper(std::pow(std::numeric_limits<double>::max() / 2, .5)) |
| .Build(); |
| EXPECT_THAT(failed_bv, StatusIs(absl::StatusCode::kInvalidArgument, |
| HasSubstr("Sensitivity is too high."))); |
| } |
| |
| TEST(BoundedVarianceTest, DropNanEntries) { |
| std::vector<double> a = {1, 2, 3, 4, NAN, NAN, 5}; |
| absl::StatusOr<std::unique_ptr<BoundedVariance<double>>> bv = |
| BoundedVariance<double>::Builder() |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(1) |
| .SetLower(0) |
| .SetUpper(6) |
| .Build(); |
| ASSERT_OK(bv); |
| absl::StatusOr<Output> result = (*bv)->Result(a.begin(), a.end()); |
| ASSERT_OK(result); |
| EXPECT_DOUBLE_EQ(GetValue<double>(*result), 2.0); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, PropagateApproxBoundsError) { |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder().SetEpsilon(1).Build(); |
| ASSERT_OK(bv); |
| |
| // Automatic bounds are needed but there is no input, so the count-threshhold |
| // should exceed any bin count. |
| EXPECT_THAT((*bv)->PartialResult(), |
| StatusIs(absl::StatusCode::kFailedPrecondition, |
| HasSubstr("run over a larger dataset"))); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, AutomaticBoundsContainZero) { |
| std::vector<TypeParam> a = {0, 8, -8, |
| std::numeric_limits<TypeParam>::lowest(), |
| std::numeric_limits<TypeParam>::max()}; |
| absl::StatusOr<std::unique_ptr<ApproxBounds<TypeParam>>> bounds = |
| typename ApproxBounds<TypeParam>::Builder() |
| .SetEpsilon(0.5) |
| .SetNumBins(4) |
| .SetBase(2) |
| .SetScale(1) |
| .SetThresholdForTest(1) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bounds); |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetEpsilon(1) |
| .SetApproxBounds(std::move(*bounds)) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bv); |
| (*bv)->AddEntries(a.begin(), a.end()); |
| |
| Output expected_output; |
| AddToOutput<double>(&expected_output, 51.2); |
| BoundingReport* report = |
| expected_output.mutable_error_report()->mutable_bounding_report(); |
| SetValue<TypeParam>(report->mutable_lower_bound(), -8); |
| SetValue<TypeParam>(report->mutable_upper_bound(), 8); |
| report->set_num_inputs(a.size()); |
| report->set_num_outside(0); |
| |
| absl::StatusOr<Output> actual_output = (*bv)->PartialResult(); |
| ASSERT_OK(actual_output); |
| EXPECT_THAT(*actual_output, EqualsProto(expected_output)); |
| } |
| |
| TEST(BoundedVarianceTest, AutomaticBoundsNegative) { |
| std::vector<double> a = {5, -2, -2, -4, -6, -6}; |
| absl::StatusOr<std::unique_ptr<ApproxBounds<double>>> bounds = |
| ApproxBounds<double>::Builder() |
| .SetEpsilon(0.5) |
| .SetNumBins(5) |
| .SetBase(2) |
| .SetScale(1) |
| .SetThresholdForTest(1.5) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bounds); |
| absl::StatusOr<std::unique_ptr<BoundedVariance<double>>> bv = |
| BoundedVariance<double>::Builder() |
| .SetEpsilon(1) |
| .SetApproxBounds(std::move(bounds).value()) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bv); |
| (*bv)->AddEntries(a.begin(), a.end()); |
| |
| absl::StatusOr<Output> result = (*bv)->PartialResult(); |
| ASSERT_OK(result); |
| |
| // 5 gets clamped to -1 |
| double expected_sum = -21; |
| double expected_sos = 97; |
| double expected_variance = |
| (expected_sos - expected_sum * expected_sum / a.size()) / a.size(); |
| |
| EXPECT_THAT(GetValue<double>(*result), |
| DoubleNear(expected_variance, expected_variance / 10000)); |
| |
| BoundingReport expected_report; |
| SetValue<double>(expected_report.mutable_lower_bound(), -8); |
| SetValue<double>(expected_report.mutable_upper_bound(), -1); |
| expected_report.set_num_inputs(a.size()); |
| expected_report.set_num_outside(1); |
| |
| EXPECT_THAT(result->error_report().bounding_report(), |
| EqualsProto(expected_report)); |
| } |
| |
| TEST(BoundedVarianceTest, AutomaticBoundsPositive) { |
| std::vector<double> a = {-5, 2, 2, 4, 6, 6}; |
| absl::StatusOr<std::unique_ptr<ApproxBounds<double>>> bounds = |
| ApproxBounds<double>::Builder() |
| .SetEpsilon(0.5) |
| .SetNumBins(5) |
| .SetBase(2) |
| .SetScale(1) |
| .SetThresholdForTest(1.5) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bounds); |
| absl::StatusOr<std::unique_ptr<BoundedVariance<double>>> bv = |
| BoundedVariance<double>::Builder() |
| .SetEpsilon(1) |
| .SetApproxBounds(std::move(bounds).value()) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bv); |
| (*bv)->AddEntries(a.begin(), a.end()); |
| |
| absl::StatusOr<Output> result = (*bv)->PartialResult(); |
| ASSERT_OK(result); |
| |
| // -5 gets clamped to 1 |
| double expected_sum = 21; |
| double expected_sos = 97; |
| double expected_variance = |
| (expected_sos - expected_sum * expected_sum / a.size()) / a.size(); |
| |
| EXPECT_THAT(GetValue<double>(*result), |
| DoubleNear(expected_variance, expected_variance / 10000)); |
| |
| BoundingReport expected_report; |
| SetValue<double>(expected_report.mutable_lower_bound(), 1); |
| SetValue<double>(expected_report.mutable_upper_bound(), 8); |
| expected_report.set_num_inputs(a.size()); |
| expected_report.set_num_outside(1); |
| |
| EXPECT_THAT(result->error_report().bounding_report(), |
| EqualsProto(expected_report)); |
| } |
| |
| // Test not providing ApproxBounds and instead using the default. |
| TYPED_TEST(BoundedVarianceTest, AutomaticBoundsDefault) { |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetEpsilon(1) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bv); |
| std::vector<TypeParam> big(570, 10); |
| std::vector<TypeParam> small(570, -10); |
| (*bv)->AddEntries(big.begin(), big.end()); |
| (*bv)->AddEntries(small.begin(), small.end()); |
| |
| Output expected_output; |
| AddToOutput<double>(&expected_output, 100); |
| BoundingReport* report = |
| expected_output.mutable_error_report()->mutable_bounding_report(); |
| SetValue<TypeParam>(report->mutable_lower_bound(), -16); |
| SetValue<TypeParam>(report->mutable_upper_bound(), 16); |
| report->set_num_inputs(big.size() + small.size()); |
| report->set_num_outside(0); |
| |
| absl::StatusOr<Output> actual_output = (*bv)->PartialResult(); |
| ASSERT_OK(actual_output); |
| EXPECT_THAT(*actual_output, EqualsProto(expected_output)); |
| } |
| |
| // Test when a bound is 0. |
| TYPED_TEST(BoundedVarianceTest, AutomaticBoundsZero) { |
| std::vector<TypeParam> a = {0, 0, 4, 4, -2, 7}; |
| absl::StatusOr<std::unique_ptr<ApproxBounds<TypeParam>>> bounds = |
| typename ApproxBounds<TypeParam>::Builder() |
| .SetEpsilon(0.5) |
| .SetNumBins(4) |
| .SetBase(2) |
| .SetScale(1) |
| .SetThresholdForTest(1.5) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bounds); |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetEpsilon(1) |
| .SetApproxBounds(std::move(bounds).value()) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bv); |
| (*bv)->AddEntries(a.begin(), a.end()); |
| |
| // Bounds are [0, 4]. -2 gets clamped to 0. 7 gets clamped to 4. |
| absl::StatusOr<Output> result = (*bv)->PartialResult(); |
| ASSERT_OK(result); |
| EXPECT_DOUBLE_EQ(GetValue<double>(result->elements(0).value()), 4); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, Reset) { |
| // Construct approximate variance. |
| absl::StatusOr<std::unique_ptr<ApproxBounds<TypeParam>>> bounds = |
| typename ApproxBounds<TypeParam>::Builder() |
| .SetNumBins(3) |
| .SetBase(10) |
| .SetScale(1) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .SetEpsilon(kDefaultEpsilon / 2) |
| .SetThresholdForTest(0.5) |
| .Build(); |
| ASSERT_OK(bounds); |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetEpsilon(1) |
| .SetApproxBounds(std::move(bounds).value()) |
| .SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>()) |
| .Build(); |
| ASSERT_OK(bv); |
| |
| // Reset between adding vectors. |
| std::vector<TypeParam> a = {-10, -10, -10, 1000, 1000, 1000}; |
| std::vector<TypeParam> b = {-100, -100, -100, 100, 100, 100}; |
| (*bv)->AddEntries(a.begin(), a.end()); |
| (*bv)->Reset(); |
| (*bv)->AddEntries(b.begin(), b.end()); |
| |
| // Check result is only affected by vector b. Bounds are [-100, 100]. |
| absl::StatusOr<Output> result = (*bv)->PartialResult(); |
| ASSERT_OK(result); |
| EXPECT_DOUBLE_EQ(GetValue<double>(result->elements(0).value()), 10000); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, MemoryUsed) { |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder().Build(); |
| ASSERT_OK(bv); |
| EXPECT_GT((*bv)->MemoryUsed(), 0); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, SplitsEpsilonWithAutomaticBounds) { |
| double epsilon = 1.0; |
| |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetEpsilon(epsilon) |
| .Build(); |
| ASSERT_OK(bv); |
| auto* bvi = |
| dynamic_cast<BoundedVarianceWithApproxBounds<TypeParam>*>(bv->get()); |
| |
| EXPECT_NEAR((*bv)->GetEpsilon(), epsilon, 1e-10); |
| EXPECT_NEAR((*bv)->GetEpsilon(), |
| bvi->GetBoundingEpsilon() + bvi->GetAggregationEpsilon(), 1e-10); |
| EXPECT_GT(bvi->GetBoundingEpsilon(), 0); |
| EXPECT_LT(bvi->GetBoundingEpsilon(), epsilon); |
| EXPECT_GT(bvi->GetAggregationEpsilon(), 0); |
| EXPECT_LT(bvi->GetAggregationEpsilon(), epsilon); |
| } |
| |
| TYPED_TEST(BoundedVarianceTest, |
| BuilderWithApproxBoundsMoreBudgetThanTotalBudgetFails) { |
| absl::StatusOr<std::unique_ptr<ApproxBounds<TypeParam>>> bounds = |
| typename ApproxBounds<TypeParam>::Builder().SetEpsilon(1.1).Build(); |
| ASSERT_OK(bounds); |
| absl::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv = |
| typename BoundedVariance<TypeParam>::Builder() |
| .SetEpsilon(1.09) |
| .SetApproxBounds(std::move(bounds).value()) |
| .Build(); |
| ASSERT_THAT(bv.status(), |
| StatusIs(absl::StatusCode::kInvalidArgument, |
| HasSubstr("Approx Bounds consumes more epsilon"))); |
| } |
| |
| TEST(BoundedVarianceWithFixedBoundsTest, |
| ConsumesAllBudgetOfNumericalMechanisms) { |
| std::unique_ptr<test_utils::MockLaplaceMechanism> mock_count_mechanism = |
| std::make_unique<test_utils::MockLaplaceMechanism>(); |
| std::unique_ptr<test_utils::MockLaplaceMechanism> mock_sum_mechanism = |
| std::make_unique<test_utils::MockLaplaceMechanism>(); |
| std::unique_ptr<test_utils::MockLaplaceMechanism> |
| mock_sum_of_squares_mechanism = |
| std::make_unique<test_utils::MockLaplaceMechanism>(); |
| |
| test_utils::MockLaplaceMechanism* mock_count_ptr = mock_count_mechanism.get(); |
| test_utils::MockLaplaceMechanism* mock_sum_ptr = mock_sum_mechanism.get(); |
| test_utils::MockLaplaceMechanism* mock_sum_of_squares_ptr = |
| mock_sum_of_squares_mechanism.get(); |
| |
| // For a double bounded variance, we add int noise to the count and double |
| // noise to the sum and the sum of squares. |
| EXPECT_CALL(*mock_count_ptr, AddInt64Noise(_)).Times(1); |
| EXPECT_CALL(*mock_sum_ptr, AddDoubleNoise(_)).Times(1); |
| EXPECT_CALL(*mock_sum_of_squares_ptr, AddDoubleNoise(_)).Times(1); |
| |
| BoundedVarianceWithFixedBounds<double> bv( |
| /*epsilon=*/1.0, |
| /*lower=*/-1, |
| /*upper=*/1, std::move(mock_count_mechanism), |
| std::move(mock_sum_mechanism), std::move(mock_sum_of_squares_mechanism)); |
| |
| for (int i = 0; i < 10; ++i) { |
| bv.AddEntry(1.0); |
| } |
| |
| EXPECT_OK(bv.PartialResult()); |
| } |
| |
| TEST(BoundedVarianceTest, ApproxBoundsMechanismHasExpectedVariance) { |
| const int max_partitions_contributed = 2; |
| const int max_contributions_per_partition = 3; |
| |
| absl::StatusOr<std::unique_ptr<BoundedVariance<double>>> bv = |
| BoundedVariance<double>::Builder() |
| .SetEpsilon(kDefaultEpsilon) |
| .SetMaxPartitionsContributed(max_partitions_contributed) |
| .SetMaxContributionsPerPartition(max_contributions_per_partition) |
| .Build(); |
| ASSERT_OK(bv); |
| |
| auto* bv_with_approx_bounds = |
| static_cast<BoundedVarianceWithApproxBounds<double>*>(bv.value().get()); |
| ASSERT_THAT(bv_with_approx_bounds, NotNull()); |
| |
| const double expected_variance = |
| LaplaceMechanism::Builder() |
| .SetEpsilon(kDefaultEpsilon / 2) |
| .SetL0Sensitivity(max_partitions_contributed) |
| .SetLInfSensitivity(max_contributions_per_partition) |
| .Build() |
| .value() |
| ->GetVariance(); |
| |
| ASSERT_THAT(bv_with_approx_bounds->GetApproxBoundsForTesting() |
| ->GetMechanismForTesting() |
| ->GetVariance(), |
| DoubleEq(expected_variance)); |
| } |
| |
| } // namespace |
| } // namespace differential_privacy |