blob: 31c1fedf9b41b88936958833db9a410e6567f1e6 [file] [log] [blame]
//
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "algorithms/bounded-variance.h"
#include <limits>
#include "base/testing/proto_matchers.h"
#include "base/testing/status_matchers.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/random/distributions.h"
#include "absl/status/status.h"
#include "algorithms/approx-bounds.h"
#include "algorithms/numerical-mechanisms-testing.h"
namespace differential_privacy {
// Provides limited-scope static methods for interacting with a BoundedVariance
// object for testing purposes.
class BoundedVarianceTestPeer {
public:
template <typename T,
std::enable_if_t<std::is_arithmetic<T>::value>* = nullptr>
static void AddMultipleEntries(const T& t, uint64_t num_of_entries,
BoundedVariance<T>* bv) {
bv->AddMultipleEntries(t, num_of_entries);
}
};
namespace {
using test_utils::ZeroNoiseMechanism;
using ::testing::DoubleNear;
using ::differential_privacy::base::testing::EqualsProto;
using ::testing::HasSubstr;
using ::differential_privacy::base::testing::StatusIs;
constexpr double kSmallEpsilon = 0.00000001;
constexpr int64_t kNumSamples = 10000;
// Max upper bound (and negative lower bound) BoundedVariance will accept
// Used in overflow-related tests
const int64_t kSqrtInt64Max = sqrt(std::numeric_limits<int64_t>::max());
template <typename T>
class BoundedVarianceTest : public testing::Test {
protected:
void SetUp() override {}
void TearDown() override {}
private:
double Variance(std::vector<T> values) {
if (values.empty()) {
return 0;
}
int num_of_values = values.size();
double mean = accumulate(values.begin(), values.end(), 0.0) / num_of_values;
double variance = 0;
for (int i = 0; i < num_of_values; ++i) {
variance += (values[i] - mean) * (values[i] - mean);
}
variance /= num_of_values;
return variance;
}
};
// Typed test to iterate all test cases through all supported versions of
// BoundedVarianceTest, currently (int64_t, double).
typedef ::testing::Types<int64_t, double> NumericTypes;
TYPED_TEST_SUITE(BoundedVarianceTest, NumericTypes);
TYPED_TEST(BoundedVarianceTest, BasicTest) {
std::vector<TypeParam> a = {1, 2, 3, 4, 5};
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder()
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetEpsilon(1.0)
.SetLower(0)
.SetUpper(6)
.Build();
ASSERT_OK(bv);
base::StatusOr<Output> result = (*bv)->Result(a.begin(), a.end());
ASSERT_OK(result);
EXPECT_DOUBLE_EQ(GetValue<double>(*result), 2.0);
}
TYPED_TEST(BoundedVarianceTest, BasicMultipleEntriesTest) {
std::vector<TypeParam> a = {1, 2, 3, 4, 5};
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder()
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetEpsilon(1.0)
.SetLower(0)
.SetUpper(6)
.Build();
ASSERT_OK(bv);
for (const auto& input : a) {
BoundedVarianceTestPeer::AddMultipleEntries<TypeParam>(input, input,
(*bv).get());
}
base::StatusOr<Output> result = (*bv)->PartialResult();
ASSERT_OK(result);
EXPECT_NEAR(GetValue<double>(*result), 14.0 / 9.0, 0.0000001);
}
TYPED_TEST(BoundedVarianceTest, RepeatedResultTest) {
std::vector<TypeParam> a = {1, 2, 3, 4, 5};
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder()
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetEpsilon(1.0)
.SetLower(0)
.SetUpper(6)
.Build();
ASSERT_OK(bv);
(*bv)->AddEntries(a.begin(), a.end());
base::StatusOr<Output> result1 = (*bv)->PartialResult(0.5);
ASSERT_OK(result1);
base::StatusOr<Output> result2 = (*bv)->PartialResult(0.5);
ASSERT_OK(result2);
EXPECT_DOUBLE_EQ(GetValue<double>(*result1), GetValue<double>(*result2));
}
TYPED_TEST(BoundedVarianceTest, ClampInputTest) {
std::vector<TypeParam> a = {0, 0, 10, 10};
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder()
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetEpsilon(1.0)
.SetLower(1)
.SetUpper(3)
.Build();
ASSERT_OK(bv);
base::StatusOr<Output> result = (*bv)->Result(a.begin(), a.end());
ASSERT_OK(result);
EXPECT_DOUBLE_EQ(GetValue<double>(*result), 1.0);
}
TYPED_TEST(BoundedVarianceTest, ClampOutputLowerTest) {
std::vector<TypeParam> a = {1, 1, 1, 1};
for (int i = 0; i < kNumSamples; ++i) {
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder()
.SetEpsilon(kSmallEpsilon)
.SetLower(1)
.SetUpper(2)
.Build();
ASSERT_OK(bv);
base::StatusOr<Output> result = (*bv)->Result(a.begin(), a.end());
EXPECT_GE(GetValue<double>(*result), 0.0);
}
}
TYPED_TEST(BoundedVarianceTest, ClampOutputUpperTest) {
std::vector<TypeParam> a = {0, 10};
for (int i = 0; i < kNumSamples; ++i) {
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder()
.SetEpsilon(kSmallEpsilon)
.SetLower(0)
.SetUpper(10)
.Build();
ASSERT_OK(bv);
base::StatusOr<Output> result = (*bv)->Result(a.begin(), a.end());
ASSERT_OK(result);
EXPECT_LE(GetValue<double>(*result), 25.0);
}
}
TYPED_TEST(BoundedVarianceTest, EmptyInputsBoundsTest) {
std::vector<TypeParam> a;
double lower = 0;
double upper = 10;
// See header comment in BoundedVariance that states "The output will also be
// clamped between 0 and (upper - lower)^2."
double result_lower = 0;
double result_upper = pow(upper - lower, 2);
int num_of_trials = 100; // Chosen abitrarily
for (int i = 0; i < num_of_trials; ++i) {
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder()
.SetLaplaceMechanism(
absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetEpsilon(1.0)
.SetLower(lower)
.SetUpper(upper)
.Build();
ASSERT_OK(bv);
base::StatusOr<Output> result = (*bv)->Result(a.begin(), a.end());
ASSERT_OK(result);
EXPECT_GE(GetValue<double>(*result), result_lower);
EXPECT_LE(GetValue<double>(*result), result_upper);
}
}
TYPED_TEST(BoundedVarianceTest, GaussianInputTest) {
// Test samples points from a Gaussian and checks that the differentially
// private variance is the variance used to generate the distribution.
base::StatusOr<std::unique_ptr<BoundedVariance<double>>> bv =
BoundedVariance<double>::Builder()
.SetLower(-20)
.SetUpper(20)
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bv);
// Generate the large sample of points from a seeded Laplace distribution.
constexpr int samples = 100000;
std::mt19937 gen;
double stdev = 2;
for (int i = 0; i < samples; i++) {
(*bv)->AddEntry(std::round(absl::Gaussian<double>(gen, 0, stdev)));
}
base::StatusOr<Output> result = (*bv)->PartialResult();
ASSERT_OK(result);
EXPECT_NEAR(GetValue<double>(*result), stdev * stdev, 0.1);
}
TYPED_TEST(BoundedVarianceTest, MaxContributionsVarianceTest) {
const std::vector<TypeParam> input = {-1, -1, 1, 1};
// Calculate variance of input.
const double real_mean =
std::accumulate(input.begin(), input.end(), 0.0) / input.size();
double sum = 0;
for (const auto& i : input) {
sum += std::pow(i - real_mean, 2);
}
const double real_variance = sum / (input.size() - 1);
std::function<double(int)> sample_variance_for_max_contribution =
[&input, real_variance](int max_contribution) {
double sum = 0;
for (int i = 0; i < kNumSamples; ++i) {
auto variance = typename BoundedVariance<TypeParam>::Builder()
.SetMaxContributionsPerPartition(max_contribution)
.SetEpsilon(1)
.SetLower(-1)
.SetUpper(1)
.Build();
CHECK_EQ(variance.status(), absl::OkStatus());
auto out = (*variance)->Result(input.begin(), input.end());
CHECK_EQ(out.status(), absl::OkStatus());
sum += std::pow(GetValue<double>(*out) - real_variance, 2);
}
return sum / (kNumSamples - 1);
};
// We expect the sample variance with max contribution 2 to be (significantly)
// bigger than with max contribution 1.
EXPECT_GT(sample_variance_for_max_contribution(2),
1.1 * sample_variance_for_max_contribution(1));
}
TYPED_TEST(BoundedVarianceTest, MergeDifferentBoundingStrategy) {
typename BoundedVariance<TypeParam>::Builder bv_builder;
// Manual bounding.
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv1 =
bv_builder.SetLower(0).SetUpper(3).Build();
ASSERT_OK(bv1);
Summary summary = (*bv1)->Serialize();
// Auto bounding.
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv2 =
bv_builder.ClearBounds().Build();
ASSERT_OK(bv2);
// Error due to different strategies.
ASSERT_THAT(
(*bv2)->Merge(summary),
StatusIs(
absl::StatusCode::kInternal,
HasSubstr(
"Merged BoundedVariance must have the same bounding strategy.")));
}
TYPED_TEST(BoundedVarianceTest, SerializeMergeTest) {
typename BoundedVariance<TypeParam>::Builder bv_builder;
// Get summary of first BoundedVariance between entries.
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv1 =
bv_builder.SetEpsilon(.5)
.SetLower(0)
.SetUpper(3)
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bv1);
(*bv1)->AddEntry(2);
Summary summary = (*bv1)->Serialize();
(*bv1)->AddEntry(6);
// Merge summary into second BoundedVariance.
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv2 =
bv_builder.Build();
ASSERT_OK(bv2);
(*bv2)->AddEntry(6);
EXPECT_OK((*bv2)->Merge(summary));
// Check equality.
base::StatusOr<Output> result1 = (*bv1)->PartialResult();
ASSERT_OK(result1);
base::StatusOr<Output> result2 = (*bv2)->PartialResult();
ASSERT_OK(result2);
EXPECT_DOUBLE_EQ(GetValue<double>(*result1), GetValue<double>(*result2));
}
TYPED_TEST(BoundedVarianceTest, SerializeMergePartialValuesTest) {
typename ApproxBounds<TypeParam>::Builder bounds_builder;
typename BoundedVariance<TypeParam>::Builder builder;
// Automatic bounding, so entries will be split and stored as partials.
base::StatusOr<std::unique_ptr<ApproxBounds<TypeParam>>> bounds1 =
bounds_builder.SetThreshold(1)
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bounds1);
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv1 =
builder
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetApproxBounds(std::move(*bounds1))
.Build();
ASSERT_OK(bv1);
(*bv1)->AddEntry(-10);
(*bv1)->AddEntry(4);
Summary summary = (*bv1)->Serialize();
(*bv1)->AddEntry(6);
// Merge summary into second BoundedVariance.
base::StatusOr<std::unique_ptr<ApproxBounds<TypeParam>>> bounds2 =
bounds_builder.Build();
ASSERT_OK(bounds2);
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv2 =
builder.SetApproxBounds(std::move(*bounds2)).Build();
ASSERT_OK(bv2);
(*bv2)->AddEntry(6);
EXPECT_OK((*bv2)->Merge(summary));
// Check equality. Bounds are set to [-16, 8].
base::StatusOr<Output> result1 = (*bv1)->PartialResult();
ASSERT_OK(result1);
base::StatusOr<Output> result2 = (*bv2)->PartialResult();
ASSERT_OK(result2);
EXPECT_DOUBLE_EQ(GetValue<double>(*result1), GetValue<double>(*result2));
}
TEST(BoundedVarianceTest, OverflowRawCountTest) {
typename BoundedVariance<double>::Builder builder;
std::unique_ptr<BoundedVariance<double>> bv =
builder
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetLower(-1)
.SetUpper(1)
.Build()
.ValueOrDie();
BoundedVarianceTestPeer::AddMultipleEntries<double>(
-0.5, std::numeric_limits<uint64_t>::max() / 2, bv.get());
BoundedVarianceTestPeer::AddMultipleEntries<double>(
0.5, std::numeric_limits<uint64_t>::max() / 2, bv.get());
BoundedVarianceTestPeer::AddMultipleEntries<double>(1, 5, bv.get());
auto result = bv->PartialResult();
EXPECT_OK(result.status());
// A raw_count_ overflow would result in a larger variance of 1.
EXPECT_DOUBLE_EQ(GetValue<double>(result.value()), 0.25);
}
TEST(BoundedVarianceTest, OverflowAddEntryManualBounds) {
typename BoundedVariance<int64_t>::Builder builder;
std::unique_ptr<BoundedVariance<int64_t>> bv =
builder
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetLower(-1)
.SetUpper(1)
.Build()
.ValueOrDie();
// Try to overflow so far that it would result in an running sum of almost 0.
BoundedVarianceTestPeer::AddMultipleEntries<int64_t>(
1, std::numeric_limits<int64_t>::max(), bv.get());
BoundedVarianceTestPeer::AddMultipleEntries<int64_t>(
1, std::numeric_limits<int64_t>::max(), bv.get());
auto result = bv->PartialResult();
EXPECT_OK(result.status());
// Overflow would result in a variance of 1.0
EXPECT_DOUBLE_EQ(GetValue<double>(result.value()), 0.75);
}
TEST(BoundedVarianceTest, UnderflowAddEntryManualBounds) {
typename BoundedVariance<int64_t>::Builder builder;
std::unique_ptr<BoundedVariance<int64_t>> bv =
builder
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetLower(-1)
.SetUpper(1)
.Build()
.ValueOrDie();
BoundedVarianceTestPeer::AddMultipleEntries<int64_t>(
-1, std::numeric_limits<int64_t>::max(), bv.get());
BoundedVarianceTestPeer::AddMultipleEntries<int64_t>(
-1, std::numeric_limits<int64_t>::max(), bv.get());
auto result = bv->PartialResult();
EXPECT_OK(result.status());
// Overflow would result in a variance of 1.0
EXPECT_DOUBLE_EQ(GetValue<double>(result.value()), 0.75);
}
TEST(BoundedVarianceTest, OverflowRawCountMergeManualBoundsTest) {
typename BoundedVariance<double>::Builder builder;
std::unique_ptr<BoundedVariance<double>> bv =
builder
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetLower(0)
.SetUpper(10)
.Build()
.ValueOrDie();
BoundedVarianceTestPeer::AddMultipleEntries<double>(
10, std::numeric_limits<uint64_t>::max(), bv.get());
Summary summary = bv->Serialize();
std::unique_ptr<BoundedVariance<double>> bv2 = builder.Build().ValueOrDie();
bv2->AddEntry(10);
bv2->AddEntry(10);
EXPECT_OK(bv2->Merge(summary));
auto result = bv2->PartialResult();
EXPECT_OK(result.status());
// An overflow would cause the count of entries to be 1, which would result in
// the variance being based entirely on the midpoint between the upper and
// lower bounds, instead of based upon the actual data entries.
EXPECT_DOUBLE_EQ(GetValue<double>(result.value()), 0);
}
TEST(BoundedVarianceTest, OverflowMergeManualBoundsTest) {
typename BoundedVariance<int64_t>::Builder builder;
std::unique_ptr<BoundedVariance<int64_t>> bv =
builder
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetLower(-1)
.SetUpper(1)
.Build()
.ValueOrDie();
BoundedVarianceTestPeer::AddMultipleEntries<int64_t>(
1, std::numeric_limits<int64_t>::max(), bv.get());
Summary summary = bv->Serialize();
std::unique_ptr<BoundedVariance<int64_t>> bv2 = builder.Build().ValueOrDie();
BoundedVarianceTestPeer::AddMultipleEntries<int64_t>(
1, std::numeric_limits<int64_t>::max(), bv2.get());
EXPECT_OK(bv2->Merge(summary));
auto result = bv2->PartialResult();
EXPECT_OK(result.status());
// Overflow would result in a variance of 1.0
EXPECT_DOUBLE_EQ(GetValue<double>(result.value()), 0.75);
}
TEST(BoundedVarianceTest, UnderflowMergeManualBoundsTest) {
typename BoundedVariance<int64_t>::Builder builder;
std::unique_ptr<BoundedVariance<int64_t>> bv =
builder
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetLower(-1)
.SetUpper(1)
.Build()
.ValueOrDie();
BoundedVarianceTestPeer::AddMultipleEntries<int64_t>(
-1, std::numeric_limits<int64_t>::max(), bv.get());
Summary summary = bv->Serialize();
std::unique_ptr<BoundedVariance<int64_t>> bv2 = builder.Build().ValueOrDie();
BoundedVarianceTestPeer::AddMultipleEntries<int64_t>(
-1, std::numeric_limits<int64_t>::max(), bv2.get());
EXPECT_OK(bv2->Merge(summary));
auto result = bv2->PartialResult();
EXPECT_OK(result.status());
// Overflow would result in a variance of 1.0
EXPECT_DOUBLE_EQ(GetValue<double>(result.value()), 0.75);
}
TEST(BoundedVarianceTest, SensitivityOverflow) {
base::StatusOr<std::unique_ptr<BoundedVariance<int64_t>>> failed_bv =
BoundedVariance<int64_t>::Builder()
.SetEpsilon(1.0)
.SetLower(std::numeric_limits<int64_t>::lowest())
.SetUpper(std::numeric_limits<int64_t>::max())
.Build();
EXPECT_THAT(
failed_bv,
StatusIs(absl::StatusCode::kInvalidArgument,
HasSubstr("Sensitivity calculation caused integer overflow.")));
}
TYPED_TEST(BoundedVarianceTest, SensitivityTooHigh) {
// Make bounds so taking the interval squared won't overflow.
base::StatusOr<std::unique_ptr<BoundedVariance<double>>> failed_bv =
BoundedVariance<double>::Builder()
.SetLower(0)
.SetUpper(std::pow(std::numeric_limits<double>::max() / 2, .5))
.Build();
EXPECT_THAT(failed_bv, StatusIs(absl::StatusCode::kInvalidArgument,
HasSubstr("Sensitivity is too high.")));
}
TEST(BoundedVarianceTest, DropNanEntries) {
std::vector<double> a = {1, 2, 3, 4, NAN, NAN, 5};
base::StatusOr<std::unique_ptr<BoundedVariance<double>>> bv =
BoundedVariance<double>::Builder()
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetEpsilon(1)
.SetLower(0)
.SetUpper(6)
.Build();
ASSERT_OK(bv);
base::StatusOr<Output> result = (*bv)->Result(a.begin(), a.end());
ASSERT_OK(result);
EXPECT_DOUBLE_EQ(GetValue<double>(*result), 2.0);
}
TYPED_TEST(BoundedVarianceTest, PropagateApproxBoundsError) {
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder().SetEpsilon(1).Build();
ASSERT_OK(bv);
// Automatic bounds are needed but there is no input, so the count-threshhold
// should exceed any bin count.
EXPECT_THAT((*bv)->PartialResult(),
StatusIs(absl::StatusCode::kFailedPrecondition,
HasSubstr("run over a larger dataset")));
}
TYPED_TEST(BoundedVarianceTest, AutomaticBoundsContainZero) {
std::vector<TypeParam> a = {0, 8, -8,
std::numeric_limits<TypeParam>::lowest(),
std::numeric_limits<TypeParam>::max()};
base::StatusOr<std::unique_ptr<ApproxBounds<TypeParam>>> bounds =
typename ApproxBounds<TypeParam>::Builder()
.SetEpsilon(1)
.SetNumBins(4)
.SetBase(2)
.SetScale(1)
.SetThreshold(1)
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bounds);
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder()
.SetEpsilon(1)
.SetApproxBounds(std::move(*bounds))
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bv);
(*bv)->AddEntries(a.begin(), a.end());
Output expected_output;
AddToOutput<double>(&expected_output, 51.2);
BoundingReport* report =
expected_output.mutable_error_report()->mutable_bounding_report();
SetValue<TypeParam>(report->mutable_lower_bound(), -8);
SetValue<TypeParam>(report->mutable_upper_bound(), 8);
report->set_num_inputs(a.size());
report->set_num_outside(0);
base::StatusOr<Output> actual_output = (*bv)->PartialResult();
ASSERT_OK(actual_output);
EXPECT_THAT(*actual_output, EqualsProto(expected_output));
}
TEST(BoundedVarianceTest, AutomaticBoundsNegative) {
std::vector<double> a = {5, -2, -2, -4, -6, -6};
base::StatusOr<std::unique_ptr<ApproxBounds<double>>> bounds =
ApproxBounds<double>::Builder()
.SetEpsilon(1)
.SetNumBins(5)
.SetBase(2)
.SetScale(1)
.SetThreshold(2)
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bounds);
base::StatusOr<std::unique_ptr<BoundedVariance<double>>> bv =
BoundedVariance<double>::Builder()
.SetEpsilon(1)
.SetApproxBounds(std::move(*bounds))
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bv);
(*bv)->AddEntries(a.begin(), a.end());
base::StatusOr<Output> result = (*bv)->PartialResult();
ASSERT_OK(result);
double expected_sum = -21;
double expected_sos = 97;
double expected_variance =
(expected_sos - expected_sum * expected_sum / a.size()) / a.size();
EXPECT_THAT(GetValue<double>(*result),
DoubleNear(expected_variance, expected_variance / 10000));
BoundingReport expected_report;
SetValue<double>(expected_report.mutable_lower_bound(), -8);
SetValue<double>(expected_report.mutable_upper_bound(), -1);
expected_report.set_num_inputs(a.size());
expected_report.set_num_outside(1);
EXPECT_THAT(result->error_report().bounding_report(),
EqualsProto(expected_report));
}
TEST(BoundedVarianceTest, AutomaticBoundsPositive) {
std::vector<double> a = {-5, 2, 2, 4, 6, 6};
base::StatusOr<std::unique_ptr<ApproxBounds<double>>> bounds =
ApproxBounds<double>::Builder()
.SetEpsilon(1)
.SetNumBins(5)
.SetBase(2)
.SetScale(1)
.SetThreshold(2)
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bounds);
base::StatusOr<std::unique_ptr<BoundedVariance<double>>> bv =
BoundedVariance<double>::Builder()
.SetEpsilon(1)
.SetApproxBounds(std::move(*bounds))
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bv);
(*bv)->AddEntries(a.begin(), a.end());
base::StatusOr<Output> result = (*bv)->PartialResult();
ASSERT_OK(result);
double expected_sum = 21;
double expected_sos = 97;
double expected_variance =
(expected_sos - expected_sum * expected_sum / a.size()) / a.size();
EXPECT_THAT(GetValue<double>(*result),
DoubleNear(expected_variance, expected_variance / 10000));
BoundingReport expected_report;
SetValue<double>(expected_report.mutable_lower_bound(), 1);
SetValue<double>(expected_report.mutable_upper_bound(), 8);
expected_report.set_num_inputs(a.size());
expected_report.set_num_outside(1);
EXPECT_THAT(result->error_report().bounding_report(),
EqualsProto(expected_report));
}
// Test not providing ApproxBounds and instead using the default.
TYPED_TEST(BoundedVarianceTest, AutomaticBoundsDefault) {
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder()
.SetEpsilon(1)
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bv);
std::vector<TypeParam> big(570, 10);
std::vector<TypeParam> small(570, -10);
(*bv)->AddEntries(big.begin(), big.end());
(*bv)->AddEntries(small.begin(), small.end());
Output expected_output;
AddToOutput<double>(&expected_output, 100);
BoundingReport* report =
expected_output.mutable_error_report()->mutable_bounding_report();
SetValue<TypeParam>(report->mutable_lower_bound(), -16);
SetValue<TypeParam>(report->mutable_upper_bound(), 16);
report->set_num_inputs(big.size() + small.size());
report->set_num_outside(0);
base::StatusOr<Output> actual_output = (*bv)->PartialResult();
ASSERT_OK(actual_output);
EXPECT_THAT(*actual_output, EqualsProto(expected_output));
}
// Test when a bound is 0.
TYPED_TEST(BoundedVarianceTest, AutomaticBoundsZero) {
std::vector<TypeParam> a = {0, 0, 4, 4, -2, 7};
base::StatusOr<std::unique_ptr<ApproxBounds<TypeParam>>> bounds =
typename ApproxBounds<TypeParam>::Builder()
.SetEpsilon(1)
.SetNumBins(4)
.SetBase(2)
.SetScale(1)
.SetThreshold(2)
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bounds);
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder()
.SetEpsilon(1)
.SetApproxBounds(std::move(*bounds))
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bv);
(*bv)->AddEntries(a.begin(), a.end());
// Bounds are [0, 4]. -2 gets clamped to 0. 7 gets clamped to 4.
base::StatusOr<Output> result = (*bv)->PartialResult();
ASSERT_OK(result);
EXPECT_DOUBLE_EQ(GetValue<double>(result->elements(0).value()), 4);
}
TYPED_TEST(BoundedVarianceTest, Reset) {
// Construct approximate variance.
base::StatusOr<std::unique_ptr<ApproxBounds<TypeParam>>> bounds =
typename ApproxBounds<TypeParam>::Builder()
.SetNumBins(3)
.SetBase(10)
.SetScale(1)
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.SetThreshold(3)
.Build();
ASSERT_OK(bounds);
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder()
.SetEpsilon(1)
.SetApproxBounds(std::move(*bounds))
.SetLaplaceMechanism(absl::make_unique<ZeroNoiseMechanism::Builder>())
.Build();
ASSERT_OK(bv);
// Reset between adding vectors.
std::vector<TypeParam> a = {-10, -10, -10, 1000, 1000, 1000};
std::vector<TypeParam> b = {-100, -100, -100, 100, 100, 100};
(*bv)->AddEntries(a.begin(), a.end());
(*bv)->Reset();
(*bv)->AddEntries(b.begin(), b.end());
// Check result is only affected by vector b. Bounds are [-100, 100].
base::StatusOr<Output> result = (*bv)->PartialResult();
ASSERT_OK(result);
EXPECT_DOUBLE_EQ(GetValue<double>(result->elements(0).value()), 10000);
}
TYPED_TEST(BoundedVarianceTest, MemoryUsed) {
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder().Build();
ASSERT_OK(bv);
EXPECT_GT((*bv)->MemoryUsed(), 0);
}
TYPED_TEST(BoundedVarianceTest, SplitsEpsilonWithAutomaticBounds) {
double epsilon = 1.0;
base::StatusOr<std::unique_ptr<BoundedVariance<TypeParam>>> bv =
typename BoundedVariance<TypeParam>::Builder()
.SetEpsilon(epsilon)
.Build();
ASSERT_OK(bv);
EXPECT_NEAR((*bv)->GetEpsilon(), epsilon, 1e-10);
EXPECT_NEAR((*bv)->GetEpsilon(),
(*bv)->GetBoundingEpsilon() + (*bv)->GetAggregationEpsilon(),
1e-10);
EXPECT_GT((*bv)->GetBoundingEpsilon(), 0);
EXPECT_LT((*bv)->GetBoundingEpsilon(), epsilon);
EXPECT_GT((*bv)->GetAggregationEpsilon(), 0);
EXPECT_LT((*bv)->GetAggregationEpsilon(), epsilon);
}
} // namespace
} // namespace differential_privacy