Fix for thread contention, refactoring, and test cleanup.
C++ DP
- Refactor partition selection tests
GO DP
- Replace `interface{}` with `any`
- Language edits
Python DP PLD Accounting
- Clean up and compress tests for privacy_loss_distribution using common method
- Make one default value for num_queries in get_smallest_*_noise
- Fix error message in assert_dictionary_contained
Change-Id: Ifc0fb78a0277da81b465e7aae8fa857fa5580c9f
GitOrigin-RevId: b5f1f49af26c99f352db4c3d424a54b04b68c82b
diff --git a/cc/algorithms/partition-selection_test.cc b/cc/algorithms/partition-selection_test.cc
index 000e485..d65caf1 100644
--- a/cc/algorithms/partition-selection_test.cc
+++ b/cc/algorithms/partition-selection_test.cc
@@ -31,12 +31,12 @@
using ::testing::DoubleEq;
using ::testing::DoubleNear;
-using ::testing::Eq;
using ::testing::HasSubstr;
using ::differential_privacy::base::testing::StatusIs;
constexpr int kNumSamples = 10000000;
constexpr int kSmallNumSamples = 1000000;
+constexpr int kTinyNumSamples = 10000;
constexpr int64_t kInt64Min = std::numeric_limits<int64_t>::min();
constexpr int64_t kInt64Max = std::numeric_limits<int64_t>::max();
@@ -177,61 +177,56 @@
// will be approximately delta
TEST(PartitionSelectionTest, NearTruncatedGeometricPartitionSelectionOneUser) {
NearTruncatedGeometricPartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder.SetEpsilon(0.5)
- .SetDelta(0.02)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
- double num_kept = 0.0;
+ test_builder.SetEpsilon(0.5).SetDelta(0.02).SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ int num_kept = 0;
for (int i = 0; i < kSmallNumSamples; i++) {
- if (build->ShouldKeep(1)) num_kept++;
+ if (strategy.value()->ShouldKeep(1)) num_kept++;
}
- EXPECT_NEAR(build->ProbabilityOfKeep(1), build->GetDelta(), 1e-12);
- EXPECT_THAT(num_kept / kSmallNumSamples,
- DoubleNear(build->GetDelta(), 0.001));
+ EXPECT_NEAR(strategy.value()->ProbabilityOfKeep(1),
+ strategy.value()->GetDelta(), 1e-12);
+ EXPECT_THAT(static_cast<double>(num_kept) / kSmallNumSamples,
+ DoubleNear(strategy.value()->GetDelta(), 0.001));
}
// We expect the probability of keeping a partition with no users will be zero
TEST(PartitionSelectionTest, NearTruncatedGeometricPartitionSelectionNoUsers) {
NearTruncatedGeometricPartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder.SetEpsilon(0.5)
- .SetDelta(0.02)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
- EXPECT_EQ(build->ProbabilityOfKeep(0), 0.0);
+ test_builder.SetEpsilon(0.5).SetDelta(0.02).SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ EXPECT_EQ(strategy.value()->ProbabilityOfKeep(0), 0.0);
for (int i = 0; i < 1000; i++) {
- EXPECT_FALSE(build->ShouldKeep(0));
+ EXPECT_FALSE(strategy.value()->ShouldKeep(0));
}
}
TEST(PartitionSelectionTest,
NearTruncatedGeometricPartitionSelectionFirstCrossover) {
NearTruncatedGeometricPartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder.SetEpsilon(0.5)
- .SetDelta(0.02)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
+ test_builder.SetEpsilon(0.5).SetDelta(0.02).SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
NearTruncatedGeometricPartitionSelection* magic =
- dynamic_cast<NearTruncatedGeometricPartitionSelection*>(build.get());
+ dynamic_cast<NearTruncatedGeometricPartitionSelection*>(
+ strategy.value().get());
EXPECT_THAT(magic->GetFirstCrossover(), DoubleEq(6));
}
TEST(PartitionSelectionTest,
NearTruncatedGeometricPartitionSelectionSecondCrossover) {
NearTruncatedGeometricPartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder.SetEpsilon(0.5)
- .SetDelta(0.02)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
+ test_builder.SetEpsilon(0.5).SetDelta(0.02).SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
NearTruncatedGeometricPartitionSelection* magic =
- dynamic_cast<NearTruncatedGeometricPartitionSelection*>(build.get());
+ dynamic_cast<NearTruncatedGeometricPartitionSelection*>(
+ strategy.value().get());
EXPECT_THAT(magic->GetSecondCrossover(), DoubleEq(11));
}
@@ -239,36 +234,34 @@
TEST(PartitionSelectionTest,
NearTruncatedGeometricPartitionSelectionNumUsersEqFirstCrossover) {
NearTruncatedGeometricPartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder.SetEpsilon(0.5)
- .SetDelta(0.02)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
- double num_kept = 0.0;
- EXPECT_NEAR(build->ProbabilityOfKeep(6), 0.58840484458, 1e-10);
+ test_builder.SetEpsilon(0.5).SetDelta(0.02).SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ EXPECT_NEAR(strategy.value()->ProbabilityOfKeep(6), 0.58840484458, 1e-10);
+ int num_kept = 0;
for (int i = 0; i < kNumSamples; i++) {
- if (build->ShouldKeep(6)) num_kept++;
+ if (strategy.value()->ShouldKeep(6)) num_kept++;
}
- EXPECT_THAT(num_kept / kNumSamples, DoubleNear(0.58840484458, 0.001));
+ EXPECT_THAT(static_cast<double>(num_kept) / kNumSamples,
+ DoubleNear(0.58840484458, 0.001));
}
// Values calculated with formula
TEST(PartitionSelectionTest,
NearTruncatedGeometricPartitionSelectionNumUsersBtwnCrossovers) {
NearTruncatedGeometricPartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder.SetEpsilon(0.5)
- .SetDelta(0.02)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
- double num_kept = 0.0;
+ test_builder.SetEpsilon(0.5).SetDelta(0.02).SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ int num_kept = 0;
for (int i = 0; i < kNumSamples; i++) {
- if (build->ShouldKeep(8)) num_kept++;
+ if (strategy.value()->ShouldKeep(8)) num_kept++;
}
- EXPECT_NEAR(build->ProbabilityOfKeep(8), 0.86807080625, 1e-10);
- EXPECT_THAT(num_kept / kNumSamples, DoubleNear(0.86807080625, 0.001));
+ EXPECT_NEAR(strategy.value()->ProbabilityOfKeep(8), 0.86807080625, 1e-10);
+ EXPECT_THAT(static_cast<double>(num_kept) / kNumSamples,
+ DoubleNear(0.86807080625, 0.001));
}
// Values calculated with formula - 15 should be so large that this partition is
@@ -276,15 +269,13 @@
TEST(PartitionSelectionTest,
NearTruncatedGeometricPartitionSelectionNumUsersGreaterThanCrossovers) {
NearTruncatedGeometricPartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder.SetEpsilon(0.5)
- .SetDelta(0.02)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
- EXPECT_EQ(build->ProbabilityOfKeep(15), 1);
+ test_builder.SetEpsilon(0.5).SetDelta(0.02).SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ EXPECT_EQ(strategy.value()->ProbabilityOfKeep(15), 1);
for (int i = 0; i < 1000; i++) {
- EXPECT_TRUE(build->ShouldKeep(15));
+ EXPECT_TRUE(strategy.value()->ShouldKeep(15));
}
}
@@ -292,53 +283,50 @@
TEST(PartitionSelectionTest,
NearTruncatedGeometricPartitionSelectionTinyEpsilon) {
NearTruncatedGeometricPartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder.SetEpsilon(1e-20)
- .SetDelta(0.02)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
- double num_kept = 0.0;
+ test_builder.SetEpsilon(1e-20).SetDelta(0.02).SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ int num_kept = 0;
for (int i = 0; i < kNumSamples; i++) {
- if (build->ShouldKeep(6)) num_kept++;
+ if (strategy.value()->ShouldKeep(6)) num_kept++;
}
- EXPECT_NEAR(build->ProbabilityOfKeep(6), 0.12, 1e-10);
- EXPECT_THAT(num_kept / kNumSamples, DoubleNear(0.12, 0.001));
+ EXPECT_NEAR(strategy.value()->ProbabilityOfKeep(6), 0.12, 1e-10);
+ EXPECT_THAT(static_cast<double>(num_kept) / kNumSamples,
+ DoubleNear(0.12, 0.001));
}
TEST(PartitionSelectionTest,
NearTruncatedGeometricPartitionSelectionTinyEpsilonLargeDelta) {
NearTruncatedGeometricPartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder.SetEpsilon(1e-20)
- .SetDelta(0.15)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
- double num_kept = 0.0;
+ test_builder.SetEpsilon(1e-20).SetDelta(0.15).SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ int num_kept = 0;
for (int i = 0; i < kNumSamples; i++) {
- if (build->ShouldKeep(3)) num_kept++;
+ if (strategy.value()->ShouldKeep(3)) num_kept++;
}
- EXPECT_NEAR(build->ProbabilityOfKeep(3), 0.45, 1e-10);
- EXPECT_THAT(num_kept / kNumSamples, DoubleNear(0.45, 0.001));
+ EXPECT_NEAR(strategy.value()->ProbabilityOfKeep(3), 0.45, 1e-10);
+ EXPECT_THAT(static_cast<double>(num_kept) / kNumSamples,
+ DoubleNear(0.45, 0.001));
}
// For tiny epsilon probability of keeping is basically n * delta.
TEST(PartitionSelectionTest,
NearTruncatedGeometricPartitionSelectionTinyEpsilonBtwnCrossovers) {
NearTruncatedGeometricPartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder.SetEpsilon(1e-20)
- .SetDelta(0.02)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
- double num_kept = 0.0;
+ test_builder.SetEpsilon(1e-20).SetDelta(0.02).SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ int num_kept = 0;
for (int i = 0; i < kNumSamples; i++) {
- if (build->ShouldKeep(40)) num_kept++;
+ if (strategy.value()->ShouldKeep(40)) num_kept++;
}
- EXPECT_NEAR(build->ProbabilityOfKeep(40), 0.8, 1e-10);
- EXPECT_THAT(num_kept / kNumSamples, DoubleNear(0.8, 0.001));
+ EXPECT_NEAR(strategy.value()->ProbabilityOfKeep(40), 0.8, 1e-10);
+ EXPECT_THAT(static_cast<double>(num_kept) / kNumSamples,
+ DoubleNear(0.8, 0.001));
}
// LaplacePartitionSelection Tests
// Due to the inheritance, SetLaplaceMechanism must be
@@ -471,84 +459,84 @@
// will be approximately delta
TEST(PartitionSelectionTest, LaplacePartitionSelectionOneUser) {
LaplacePartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder
- .SetLaplaceMechanism(absl::make_unique<LaplaceMechanism::Builder>())
- .SetEpsilon(0.5)
- .SetDelta(0.02)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
- double num_kept = 0.0;
+ test_builder
+ .SetLaplaceMechanism(absl::make_unique<LaplaceMechanism::Builder>())
+ .SetEpsilon(0.5)
+ .SetDelta(0.02)
+ .SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ int num_kept = 0;
for (int i = 0; i < kSmallNumSamples; i++) {
- if (build->ShouldKeep(1)) num_kept++;
+ if (strategy.value()->ShouldKeep(1)) num_kept++;
}
- EXPECT_NEAR(build->ProbabilityOfKeep(1), build->GetDelta(), 1e-12);
- EXPECT_THAT(num_kept / kSmallNumSamples,
- DoubleNear(build->GetDelta(), 0.0006));
+ EXPECT_NEAR(strategy.value()->ProbabilityOfKeep(1),
+ strategy.value()->GetDelta(), 1e-12);
+ EXPECT_THAT(static_cast<double>(num_kept) / kSmallNumSamples,
+ DoubleNear(strategy.value()->GetDelta(), 0.0006));
}
// When the number of users is at the threshold, we expect drop/keep is 50/50.
// These numbers should make the threshold approximately 5.
TEST(PartitionSelectionTest, LaplacePartitionSelectionAtThreshold) {
LaplacePartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder
- .SetLaplaceMechanism(absl::make_unique<LaplaceMechanism::Builder>())
- .SetEpsilon(0.5)
- .SetDelta(0.06766764161)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
- double num_kept = 0.0;
+ test_builder
+ .SetLaplaceMechanism(absl::make_unique<LaplaceMechanism::Builder>())
+ .SetEpsilon(0.5)
+ .SetDelta(0.06766764161)
+ .SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ int num_kept = 0;
for (int i = 0; i < kSmallNumSamples; i++) {
- if (build->ShouldKeep(5)) num_kept++;
+ if (strategy.value()->ShouldKeep(5)) num_kept++;
}
- EXPECT_NEAR(build->ProbabilityOfKeep(5), 0.5, 1e-10);
- EXPECT_THAT(num_kept / kSmallNumSamples, DoubleNear(0.5, 0.0025));
+ EXPECT_NEAR(strategy.value()->ProbabilityOfKeep(5), 0.5, 1e-10);
+ EXPECT_THAT(static_cast<double>(num_kept) / kSmallNumSamples,
+ DoubleNear(0.5, 0.0025));
}
TEST(PartitionSelectionTest, LaplacePartitionSelectionThreshold) {
LaplacePartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder
- .SetLaplaceMechanism(absl::make_unique<LaplaceMechanism::Builder>())
- .SetEpsilon(0.5)
- .SetDelta(0.02)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
+ test_builder
+ .SetLaplaceMechanism(absl::make_unique<LaplaceMechanism::Builder>())
+ .SetEpsilon(0.5)
+ .SetDelta(0.02)
+ .SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
LaplacePartitionSelection* laplace =
- dynamic_cast<LaplacePartitionSelection*>(build.get());
+ dynamic_cast<LaplacePartitionSelection*>(strategy.value().get());
EXPECT_THAT(laplace->GetThreshold(), DoubleNear(7.43775164974, 0.001));
}
TEST(PartitionSelectionTest, LaplacePartitionSelectionUnsetBuilderThreshold) {
LaplacePartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder.SetEpsilon(0.5)
- .SetDelta(0.02)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
+ test_builder.SetEpsilon(0.5).SetDelta(0.02).SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
LaplacePartitionSelection* laplace =
- dynamic_cast<LaplacePartitionSelection*>(build.get());
+ dynamic_cast<LaplacePartitionSelection*>(strategy.value().get());
EXPECT_THAT(laplace->GetThreshold(), DoubleNear(7.43775164974, 0.001));
}
TEST(PartitionSelectionTest, LaplacePartitionSelectionNoiseValueIfShouldKeep) {
LaplacePartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder
- .SetLaplaceMechanism(absl::make_unique<LaplaceMechanism::Builder>())
- .SetEpsilon(0.5)
- .SetDelta(0.06766764161)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
- double num_kept = 0.0;
- auto* laplace_ps = dynamic_cast<LaplacePartitionSelection*>(build.get());
- const int kTinyNumSamples = 10000;
+ test_builder
+ .SetLaplaceMechanism(absl::make_unique<LaplaceMechanism::Builder>())
+ .SetEpsilon(0.5)
+ .SetDelta(0.06766764161)
+ .SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ int num_kept = 0;
+ auto* laplace_ps =
+ dynamic_cast<LaplacePartitionSelection*>(strategy.value().get());
for (int i = 0; i < kTinyNumSamples; ++i) {
auto noised_value = laplace_ps->NoiseValueIfShouldKeep(5);
if (noised_value.has_value()) {
@@ -557,7 +545,8 @@
}
}
- EXPECT_THAT(num_kept / kTinyNumSamples, DoubleNear(0.5, 0.02));
+ EXPECT_THAT(static_cast<double>(num_kept) / kTinyNumSamples,
+ DoubleNear(0.5, 0.02));
}
// CalculateDelta and CalculateThreshold structs and tests
@@ -1648,17 +1637,17 @@
TEST(PartitionSelectionTest, GaussianPartitionSelectionNoiseValueIfShouldKeep) {
GaussianPartitionSelection::Builder test_builder;
- std::unique_ptr<PartitionSelectionStrategy> build =
- test_builder
- .SetGaussianMechanism(absl::make_unique<GaussianMechanism::Builder>())
- .SetEpsilon(0.5)
- .SetDelta(0.01)
- .SetMaxPartitionsContributed(1)
- .Build()
- .value();
- double num_kept = 0.0;
- auto* gaussian_ps = dynamic_cast<GaussianPartitionSelection*>(build.get());
- const int kTinyNumSamples = 10000;
+ test_builder
+ .SetGaussianMechanism(absl::make_unique<GaussianMechanism::Builder>())
+ .SetEpsilon(0.5)
+ .SetDelta(0.01)
+ .SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ int num_kept = 0;
+ auto* gaussian_ps =
+ dynamic_cast<GaussianPartitionSelection*>(strategy.value().get());
for (int i = 0; i < kTinyNumSamples; ++i) {
auto noised_value = gaussian_ps->NoiseValueIfShouldKeep(5);
if (noised_value.has_value()) {
@@ -1667,7 +1656,55 @@
}
}
- EXPECT_THAT(num_kept / kTinyNumSamples, DoubleNear(0.07, 0.02));
+ EXPECT_THAT(static_cast<double>(num_kept) / kTinyNumSamples,
+ DoubleNear(0.07, 0.02));
+}
+
+TEST(PartitionSelectionTest, GaussianPartitionSelectionShouldKeepNoUsers) {
+ GaussianPartitionSelection::Builder test_builder;
+ test_builder
+ .SetGaussianMechanism(absl::make_unique<GaussianMechanism::Builder>())
+ .SetEpsilon(0.5)
+ .SetDelta(0.01)
+ .SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ auto* gaussian_ps =
+ dynamic_cast<GaussianPartitionSelection*>(strategy.value().get());
+ int num_kept = 0;
+ for (int i = 0; i < kTinyNumSamples; i++) {
+ if (gaussian_ps->ShouldKeep(0)) {
+ ++num_kept;
+ }
+ }
+ EXPECT_THAT(gaussian_ps->ProbabilityOfKeep(0), DoubleNear(0, 0.02));
+ // With small probability, ShouldKeep evaluates to true.
+ EXPECT_THAT(static_cast<double>(num_kept) / kTinyNumSamples,
+ DoubleNear(0, 0.02));
+}
+
+TEST(PartitionSelectionTest, GaussianPartitionSelectionShouldKeep) {
+ GaussianPartitionSelection::Builder test_builder;
+ test_builder
+ .SetGaussianMechanism(absl::make_unique<GaussianMechanism::Builder>())
+ .SetEpsilon(0.5)
+ .SetDelta(0.01)
+ .SetMaxPartitionsContributed(1);
+ absl::StatusOr<std::unique_ptr<PartitionSelectionStrategy>> strategy =
+ test_builder.Build();
+ ASSERT_OK(strategy);
+ auto* gaussian_ps =
+ dynamic_cast<GaussianPartitionSelection*>(strategy.value().get());
+ int num_kept = 0;
+ for (int i = 0; i < kTinyNumSamples; ++i) {
+ if (gaussian_ps->ShouldKeep(5)) {
+ ++num_kept;
+ }
+ }
+ EXPECT_THAT(gaussian_ps->ProbabilityOfKeep(5), DoubleNear(0.07, 0.02));
+ EXPECT_THAT(static_cast<double>(num_kept) / kTinyNumSamples,
+ DoubleNear(0.07, 0.02));
}
} // namespace
diff --git a/privacy-on-beam/internal/kv/kv.go b/privacy-on-beam/internal/kv/kv.go
index 1d51e22..da31b08 100644
--- a/privacy-on-beam/internal/kv/kv.go
+++ b/privacy-on-beam/internal/kv/kv.go
@@ -82,7 +82,7 @@
}
// Encode transforms a <K,V> pair into a Pair.
-func (codec *Codec) Encode(k, v interface{}) (Pair, error) {
+func (codec *Codec) Encode(k, v any) (Pair, error) {
var bufK, bufV bytes.Buffer
if err := codec.kEnc.Encode(k, &bufK); err != nil {
return Pair{}, fmt.Errorf("kv.Codec.Encode: couldn't Encode key %v: %v", k, err)
@@ -97,7 +97,7 @@
}
// Decode transforms a Pair into a <K,V> pair.
-func (codec *Codec) Decode(p Pair) (k, v interface{}, err error) {
+func (codec *Codec) Decode(p Pair) (k, v any, err error) {
k, err = codec.kDec.Decode(bytes.NewBuffer(p.K))
if err != nil {
return k, v, fmt.Errorf("kv.Codec.Decode: couldn't Decode key %v: %v", k, err)
diff --git a/privacy-on-beam/pbeam/aggregations.go b/privacy-on-beam/pbeam/aggregations.go
index cd11e7c..81dd694 100644
--- a/privacy-on-beam/pbeam/aggregations.go
+++ b/privacy-on-beam/pbeam/aggregations.go
@@ -94,15 +94,15 @@
// since the privacy guarantee doesn't depend on the privacy unit contributions being selected randomly.
//
// In order to do the cross-partition contribution bounding we need:
-// 1. the key to be the privacy ID.
+// 1. the key to be the privacy ID.
// 2. the value to be the partition ID or the pair = {partition ID, aggregated statistic},
-// where aggregated statistic is either array of values which are associated with the given id
-// and partition, or sum/count/etc of these values.
+// where aggregated statistic is either array of values which are associated with the given id
+// and partition, or sum/count/etc of these values.
//
// In order to do the per-partition contribution bounding we need:
-// 1. the key to be the pair = {privacy ID, partition ID}.
-// 2. the value to be just the value which is associated with that {privacy ID, partition ID} pair
-// (there could be multiple entries with the same key).
+// 1. the key to be the pair = {privacy ID, partition ID}.
+// 2. the value to be just the value which is associated with that {privacy ID, partition ID} pair
+// (there could be multiple entries with the same key).
func boundContributions(s beam.Scope, kvCol beam.PCollection, contributionLimit int64) beam.PCollection {
s = s.Scope("boundContributions")
// Transform the PCollection<K,V> into a PCollection<K,[]V>, where
@@ -127,7 +127,7 @@
return k, int64(v)
}
-func findRekeyFn(kind reflect.Kind) (interface{}, error) {
+func findRekeyFn(kind reflect.Kind) (any, error) {
switch kind {
case reflect.Int64:
return rekeyInt64Fn, nil
@@ -162,7 +162,7 @@
return kv.K, pairFloat64{kv.V, m}
}
-func newDecodePairFn(t reflect.Type, kind reflect.Kind) (interface{}, error) {
+func newDecodePairFn(t reflect.Type, kind reflect.Kind) (any, error) {
switch kind {
case reflect.Int64:
return newDecodePairInt64Fn(t), nil
@@ -219,9 +219,9 @@
return x, pair.M, nil
}
-func newBoundedSumFn(epsilon, delta float64, maxPartitionsContributed int64, lower, upper float64, noiseKind noise.Kind, vKind reflect.Kind, publicPartitions bool, testMode testMode) (interface{}, error) {
+func newBoundedSumFn(epsilon, delta float64, maxPartitionsContributed int64, lower, upper float64, noiseKind noise.Kind, vKind reflect.Kind, publicPartitions bool, testMode testMode) (any, error) {
var err, checkErr error
- var bsFn interface{}
+ var bsFn any
switch vKind {
case reflect.Int64:
@@ -517,7 +517,7 @@
}
// findDereferenceValueFn dereferences a *int64 to int64 or *float64 to float64.
-func findDereferenceValueFn(kind reflect.Kind) (interface{}, error) {
+func findDereferenceValueFn(kind reflect.Kind) (any, error) {
switch kind {
case reflect.Int64:
return dereferenceValueToInt64Fn, nil
@@ -540,7 +540,7 @@
return fmt.Sprintf("%#v", fn)
}
-func findDropThresholdedPartitionsFn(kind reflect.Kind) (interface{}, error) {
+func findDropThresholdedPartitionsFn(kind reflect.Kind) (any, error) {
switch kind {
case reflect.Int64:
return dropThresholdedPartitionsInt64Fn, nil
@@ -575,7 +575,7 @@
}
}
-func findClampNegativePartitionsFn(kind reflect.Kind) (interface{}, error) {
+func findClampNegativePartitionsFn(kind reflect.Kind) (any, error) {
switch kind {
case reflect.Int64:
return clampNegativePartitionsInt64Fn, nil
@@ -611,7 +611,7 @@
}
// newAddZeroValuesToPublicPartitionsFn turns a PCollection<V> into PCollection<V,0>.
-func newAddZeroValuesToPublicPartitionsFn(vKind reflect.Kind) (interface{}, error) {
+func newAddZeroValuesToPublicPartitionsFn(vKind reflect.Kind) (any, error) {
switch vKind {
case reflect.Int64:
return addZeroValuesToPublicPartitionsInt64Fn, nil
@@ -636,7 +636,7 @@
// dropNonPublicPartitions returns the PCollection with the non-public partitions dropped if public partitions are
// specified. Returns the input PCollection otherwise.
-func dropNonPublicPartitions(s beam.Scope, pcol PrivatePCollection, publicPartitions interface{}, partitionType reflect.Type) (beam.PCollection, error) {
+func dropNonPublicPartitions(s beam.Scope, pcol PrivatePCollection, publicPartitions any, partitionType reflect.Type) (beam.PCollection, error) {
// If PublicPartitions is not specified, return the input collection.
if publicPartitions == nil {
return pcol.col, nil
@@ -699,11 +699,11 @@
// e.g. count and distinctid.
//
// We drop values that are not in the publicPartitions PCollection as follows:
-// 1. Transform publicPartitions from <V> to <V, int64(0)> (0 is a dummy value)
-// 2. Swap pcol.col from <PrivacyKey, V> to <V, PrivacyKey>
-// 3. Do a CoGroupByKey on the output of 1 and 2.
-// 4. From the output of 3, only output <PrivacyKey, V> if there is an input
-// from 1 using the mergePublicValuesFn.
+// 1. Transform publicPartitions from <V> to <V, int64(0)> (0 is a placeholder value)
+// 2. Swap pcol.col from <PrivacyKey, V> to <V, PrivacyKey>
+// 3. Do a CoGroupByKey on the output of 1 and 2.
+// 4. From the output of 3, only output <PrivacyKey, V> if there is an input
+// from 1 using the mergePublicValuesFn.
//
// Returns a PCollection<PrivacyKey, Value> only for values present in
// publicPartitions.
@@ -968,7 +968,7 @@
}
// findConvertFn gets the correct conversion to float64 function.
-func findConvertToFloat64Fn(t typex.FullType) (interface{}, error) {
+func findConvertToFloat64Fn(t typex.FullType) (any, error) {
switch t.Type().String() {
case "int":
return convertIntToFloat64Fn, nil
@@ -1122,7 +1122,7 @@
// checkPublicPartitions returns an error if publicPartitions parameter of an aggregation
// is not valid.
-func checkPublicPartitions(publicPartitions interface{}, partitionType reflect.Type) error {
+func checkPublicPartitions(publicPartitions any, partitionType reflect.Type) error {
if publicPartitions != nil {
if reflect.TypeOf(publicPartitions) != reflect.TypeOf(beam.PCollection{}) &&
reflect.ValueOf(publicPartitions).Kind() != reflect.Slice &&
@@ -1142,9 +1142,10 @@
// checkDelta returns an error if delta parameter of an aggregation is not valid. Delta
// is valid in the following cases:
-// delta == 0; when laplace noise with public partitions are used
-// 0 < delta < 1; otherwise
-func checkDelta(delta float64, noiseKind noise.Kind, publicPartitions interface{}) error {
+//
+// delta == 0; when laplace noise with public partitions are used
+// 0 < delta < 1; otherwise
+func checkDelta(delta float64, noiseKind noise.Kind, publicPartitions any) error {
if publicPartitions != nil && noiseKind == noise.LaplaceNoise {
if delta != 0 {
return fmt.Errorf("Delta is %e, using Laplace Noise with Public Partitions requires setting delta to 0", delta)
diff --git a/privacy-on-beam/pbeam/aggregations_test.go b/privacy-on-beam/pbeam/aggregations_test.go
index c941e7f..6db4158 100644
--- a/privacy-on-beam/pbeam/aggregations_test.go
+++ b/privacy-on-beam/pbeam/aggregations_test.go
@@ -38,7 +38,7 @@
desc string
noiseKind noise.Kind
vKind reflect.Kind
- want interface{}
+ want any
}{
{"Laplace Float64", noise.LaplaceNoise, reflect.Float64,
&boundedSumFloat64Fn{
@@ -103,7 +103,7 @@
for _, tc := range []struct {
desc string
noiseKind noise.Kind
- wantNoise interface{}
+ wantNoise any
}{
{"Laplace noise kind", noise.LaplaceNoise, noise.Laplace()},
{"Gaussian noise kind", noise.GaussianNoise, noise.Gaussian()}} {
@@ -122,7 +122,7 @@
for _, tc := range []struct {
desc string
noiseKind noise.Kind
- wantNoise interface{}
+ wantNoise any
}{
{"Laplace noise kind", noise.LaplaceNoise, noise.Laplace()},
{"Gaussian noise kind", noise.GaussianNoise, noise.Gaussian()}} {
@@ -515,7 +515,7 @@
for _, tc := range []struct {
desc string
fullType typex.FullType
- wantConvertFn interface{}
+ wantConvertFn any
wantErr bool
}{
{"int", typex.New(reflect.TypeOf(int(0))), convertIntToFloat64Fn, false},
diff --git a/privacy-on-beam/pbeam/coders.go b/privacy-on-beam/pbeam/coders.go
index cd9b86b..2752ee2 100644
--- a/privacy-on-beam/pbeam/coders.go
+++ b/privacy-on-beam/pbeam/coders.go
@@ -117,13 +117,13 @@
return ret, err
}
-func encode(v interface{}) ([]byte, error) {
+func encode(v any) ([]byte, error) {
var buf bytes.Buffer
enc := gob.NewEncoder(&buf)
err := enc.Encode(v)
return buf.Bytes(), err
}
-func decode(v interface{}, data []byte) error {
+func decode(v any, data []byte) error {
return gob.NewDecoder(bytes.NewReader(data)).Decode(v)
}
diff --git a/privacy-on-beam/pbeam/count.go b/privacy-on-beam/pbeam/count.go
index d149c3a..8560697 100644
--- a/privacy-on-beam/pbeam/count.go
+++ b/privacy-on-beam/pbeam/count.go
@@ -81,7 +81,7 @@
// otherwise.
//
// Optional.
- PublicPartitions interface{}
+ PublicPartitions any
}
// Count counts the number of times a value appears in a PrivatePCollection,
diff --git a/privacy-on-beam/pbeam/count_test.go b/privacy-on-beam/pbeam/count_test.go
index 21cd1ef..dcfdf11 100644
--- a/privacy-on-beam/pbeam/count_test.go
+++ b/privacy-on-beam/pbeam/count_test.go
@@ -96,7 +96,7 @@
p, s, col, want := ptest.CreateList2(pairs, result)
col = beam.ParDo(s, testutils.PairToKV, col)
publicPartitionsSlice := []int{9, 10}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -336,7 +336,7 @@
p, s, col := ptest.CreateList(pairs)
publicPartitionsSlice := []int{0}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -411,7 +411,7 @@
col = beam.ParDo(s, testutils.PairToKV, col)
publicPartitionsSlice := []int{0, 1, 2, 3, 4}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -478,7 +478,7 @@
publicPartitionsSlice = append(publicPartitionsSlice, i)
}
p, s, col := ptest.CreateList(pairs)
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
diff --git a/privacy-on-beam/pbeam/distinct_id.go b/privacy-on-beam/pbeam/distinct_id.go
index 18e9929..adc56c3 100644
--- a/privacy-on-beam/pbeam/distinct_id.go
+++ b/privacy-on-beam/pbeam/distinct_id.go
@@ -78,7 +78,7 @@
// otherwise.
//
// Optional.
- PublicPartitions interface{}
+ PublicPartitions any
}
// DistinctPrivacyID counts the number of distinct privacy identifiers
diff --git a/privacy-on-beam/pbeam/distinct_id_test.go b/privacy-on-beam/pbeam/distinct_id_test.go
index a08ac64..b633b1a 100644
--- a/privacy-on-beam/pbeam/distinct_id_test.go
+++ b/privacy-on-beam/pbeam/distinct_id_test.go
@@ -125,7 +125,7 @@
col = beam.ParDo(s, testutils.PairToKV, col)
publicPartitionsSlice := []int{0, 1, 3, 4}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -382,7 +382,7 @@
pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta))
publicPartitionsSlice := []int{0}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -455,7 +455,7 @@
col = beam.ParDo(s, testutils.PairToKV, col)
publicPartitionsSlice := []int{0, 1, 2, 3, 4}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -525,7 +525,7 @@
for i := 0; i < 200; i++ {
publicPartitionsSlice = append(publicPartitionsSlice, i)
}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -623,7 +623,7 @@
for _, tc := range []struct {
desc string
noiseKind noise.Kind
- wantNoise interface{}
+ wantNoise any
}{
{"Laplace noise kind", noise.LaplaceNoise, noise.Laplace()},
{"Gaussian noise kind", noise.GaussianNoise, noise.Gaussian()}} {
diff --git a/privacy-on-beam/pbeam/distinct_per_key.go b/privacy-on-beam/pbeam/distinct_per_key.go
index cd8f939..9be3830 100644
--- a/privacy-on-beam/pbeam/distinct_per_key.go
+++ b/privacy-on-beam/pbeam/distinct_per_key.go
@@ -79,7 +79,7 @@
// otherwise.
//
// Optional.
- PublicPartitions interface{}
+ PublicPartitions any
}
// DistinctPerKey estimates the number of distinct values associated to
@@ -183,7 +183,7 @@
// We do partition selection after cross-partition contribution bounding because
// we want to keep the same contributions across partitions for partition selection
// and Count.
- var partitions interface{}
+ var partitions any
var noiseEpsilon, partitionSelectionEpsilon, noiseDelta, partitionSelectionDelta float64
if params.PublicPartitions != nil {
partitions = params.PublicPartitions
diff --git a/privacy-on-beam/pbeam/distinct_per_key_test.go b/privacy-on-beam/pbeam/distinct_per_key_test.go
index 9851289..dd27198 100644
--- a/privacy-on-beam/pbeam/distinct_per_key_test.go
+++ b/privacy-on-beam/pbeam/distinct_per_key_test.go
@@ -218,7 +218,7 @@
epsilon := 50.0
delta := 0.0
publicPartitionsSlice := []int{0, 1, 2, 3, 4}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
diff --git a/privacy-on-beam/pbeam/mean_test.go b/privacy-on-beam/pbeam/mean_test.go
index 5a717a1..898cc28 100644
--- a/privacy-on-beam/pbeam/mean_test.go
+++ b/privacy-on-beam/pbeam/mean_test.go
@@ -38,7 +38,7 @@
for _, tc := range []struct {
desc string
noiseKind noise.Kind
- want interface{}
+ want any
}{
{"Laplace noise kind", noise.LaplaceNoise,
&boundedMeanFn{
@@ -89,7 +89,7 @@
for _, tc := range []struct {
desc string
noiseKind noise.Kind
- wantNoise interface{}
+ wantNoise any
}{
{"Laplace noise kind", noise.LaplaceNoise, noise.Laplace()},
{"Gaussian noise kind", noise.GaussianNoise, noise.Gaussian()}} {
@@ -441,7 +441,7 @@
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col)
publicPartitionsSlice := []int{0}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -623,7 +623,7 @@
p, s, col, want := ptest.CreateList2(triples, result)
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col)
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -733,7 +733,7 @@
p, s, col, want := ptest.CreateList2(triples, result)
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col)
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -1144,7 +1144,7 @@
for p := 0; p < 200; p++ {
publicPartitionsSlice = append(publicPartitionsSlice, p)
}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -1244,7 +1244,7 @@
p, s, col, want := ptest.CreateList2(triples, result)
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col)
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -1359,7 +1359,7 @@
p, s, col, want := ptest.CreateList2(triples, result)
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col)
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
diff --git a/privacy-on-beam/pbeam/pardo.go b/privacy-on-beam/pbeam/pardo.go
index 6ffcd77..24cbb00 100644
--- a/privacy-on-beam/pbeam/pardo.go
+++ b/privacy-on-beam/pbeam/pardo.go
@@ -32,17 +32,17 @@
// identifiers. For now, it only works if doFn is a function that has one of
// the following types.
//
-// Transforms a PrivatePCollection<X> into a PrivatePCollection<Y>:
+// Transforms a PrivatePCollection<X> into a PrivatePCollection<Y>:
// - func(X) Y
// - func(context.Context, X) Y
-// - func(X) (Y, error)
-// - func(context.Context, X) (Y, error)
+// - func(X) (Y, error)
+// - func(context.Context, X) (Y, error)
// - func(X, emit), where emit has type func(Y)
// - func(context.Context, X, emit), where emit has type func(Y)
// - func(X, emit) error, where emit has type func(Y)
// - func(context.Context, X, emit) error, where emit has type func(Y)
//
-// Transforms a PrivatePCollection<X> into a PrivatePCollection<Y,Z>:
+// Transforms a PrivatePCollection<X> into a PrivatePCollection<Y,Z>:
// - func(X) (Y, Z)
// - func(context.Context, X) (Y, Z)
// - func(X) (Y, Z, error)
@@ -52,7 +52,7 @@
// - func(X, emit) error, where emit has type func(Y, Z)
// - func(context.Context, X, emit) error, where emit has type func(Y, Z)
//
-// Transforms a PrivatePCollection<W,X> into a PrivatePCollection<Y>:
+// Transforms a PrivatePCollection<W,X> into a PrivatePCollection<Y>:
// - func(W, X) Y
// - func(context.Context, W, X) Y
// - func(W, X) (Y, error)
@@ -74,7 +74,7 @@
//
// Note that Beam universal types (e.g., beam.V, beam.T, etc.) are not supported:
// each of the X, Y, Z, W above needs to be a concrete type.
-func ParDo(s beam.Scope, doFn interface{}, pcol PrivatePCollection) PrivatePCollection {
+func ParDo(s beam.Scope, doFn any, pcol PrivatePCollection) PrivatePCollection {
s = s.Scope("pbeam.ParDo")
// Convert the doFn into a anonDoFn.
anonDoFn, err := buildDoFn(doFn)
@@ -107,13 +107,13 @@
// anonDoFn contains the transformed doFn that is passed to Beam, as well as metadata.
type anonDoFn struct {
- fn interface{} // the transformed doFn passed to Beam
+ fn any // the transformed doFn passed to Beam
typeDef beam.TypeDefinition // the type definition necessary for Beam to process fn
codec *kv.Codec // if fn outputs a KV pair, the codec that can decode this pair
}
// buildDoFn validates the provided doFn and transforms it into an *anonDoFn.
-func buildDoFn(doFn interface{}) (*anonDoFn, error) {
+func buildDoFn(doFn any) (*anonDoFn, error) {
if reflect.ValueOf(doFn).Type().Kind() != reflect.Func {
return nil, fmt.Errorf("pbeam.ParDo doesn't support structural DoFns for now: doFn must be a function")
}
diff --git a/privacy-on-beam/pbeam/pardo_test.go b/privacy-on-beam/pbeam/pardo_test.go
index 1ed30fc..d6fb064 100644
--- a/privacy-on-beam/pbeam/pardo_test.go
+++ b/privacy-on-beam/pbeam/pardo_test.go
@@ -851,7 +851,7 @@
func TestParDo1x2Emit(t *testing.T) {
for _, tc := range []struct {
desc string
- doFn interface{}
+ doFn any
want []testutils.PairICodedKV
}{
{"doFn that emits only non-zero inputs",
@@ -1037,7 +1037,7 @@
func TestParDo2x1Emit(t *testing.T) {
for _, tc := range []struct {
desc string
- doFn interface{}
+ doFn any
want []testutils.PairII
}{
{"doFn that emits only non-zero input k",
@@ -1208,7 +1208,7 @@
func TestParDo2x2Emit(t *testing.T) {
for _, tc := range []struct {
desc string
- doFn interface{}
+ doFn any
want []testutils.PairICodedKV
}{
{"doFn that emits only non-zero input k",
@@ -1396,7 +1396,7 @@
func TestBuildDoFn(t *testing.T) {
for _, tc := range []struct {
desc string
- doFn interface{}
+ doFn any
wantType reflect.Type
wantTypeDef beam.TypeDefinition
wantCodec *kv.Codec
@@ -1649,7 +1649,7 @@
func TestInvalidDoFn(t *testing.T) {
for _, tc := range []struct {
desc string
- doFn interface{}
+ doFn any
}{
{"structural doFn", &testStructuralDoFn{1}},
// bad inputs
diff --git a/privacy-on-beam/pbeam/pbeam.go b/privacy-on-beam/pbeam/pbeam.go
index d052e24..04e809f 100644
--- a/privacy-on-beam/pbeam/pbeam.go
+++ b/privacy-on-beam/pbeam/pbeam.go
@@ -37,31 +37,31 @@
// To understand the main API contract provided by PrivatePCollection, consider
// the following example pipeline.
//
-// p := beam.NewPipeline()
-// s := p.Root()
-// // The input is a series of files in which each line contains the data of a privacy unit (e.g. an individual).
-// input := textio.Read(s, "/path/to/files/*.txt") // input is a PCollection<string>
-// // Extracts the privacy ID and the data associated with each line: extractID is a func(string) (userID,data).
-// icol := beam.ParDo(s, input, extractID) // icol is a PCollection<privacyUnitID,data>
-// // Transforms the input PCollection into a PrivatePCollection with parameters ε=1 and δ=10⁻¹⁰.
-// // The privacy ID is "hidden" by the operation: pcol behaves as if it were a PCollection<data>.
-// pcol := MakePrivate(s, icol, NewPrivacySpec(1, 1e-10)) // pcol is a PrivatePCollection<data>
-// // Arbitrary transformations can be applied to the data…
-// pcol = ParDo(s, pcol, someDoFn)
-// pcol = ParDo(s, pcol, otherDoFn)
-// // …and to retrieve PCollection outputs, differentially private aggregations must be used.
-// // For example, assuming pcol is now a PrivatePCollection<field,float64>:
-// sumParams := SumParams{MaxPartitionsContributed: 10, MaxValue: 5}
-// ocol := SumPerKey(s, pcol2, sumParams) // ocol is a PCollection<field,float64>
-// // And it is now possible to output this data.
-// textio.Write(s, "/path/to/output/file", ocol)
+// p := beam.NewPipeline()
+// s := p.Root()
+// // The input is a series of files in which each line contains the data of a privacy unit (e.g. an individual).
+// input := textio.Read(s, "/path/to/files/*.txt") // input is a PCollection<string>
+// // Extracts the privacy ID and the data associated with each line: extractID is a func(string) (userID,data).
+// icol := beam.ParDo(s, input, extractID) // icol is a PCollection<privacyUnitID,data>
+// // Transforms the input PCollection into a PrivatePCollection with parameters ε=1 and δ=10⁻¹⁰.
+// // The privacy ID is "hidden" by the operation: pcol behaves as if it were a PCollection<data>.
+// pcol := MakePrivate(s, icol, NewPrivacySpec(1, 1e-10)) // pcol is a PrivatePCollection<data>
+// // Arbitrary transformations can be applied to the data…
+// pcol = ParDo(s, pcol, someDoFn)
+// pcol = ParDo(s, pcol, otherDoFn)
+// // …and to retrieve PCollection outputs, differentially private aggregations must be used.
+// // For example, assuming pcol is now a PrivatePCollection<field,float64>:
+// sumParams := SumParams{MaxPartitionsContributed: 10, MaxValue: 5}
+// ocol := SumPerKey(s, pcol2, sumParams) // ocol is a PCollection<field,float64>
+// // And it is now possible to output this data.
+// textio.Write(s, "/path/to/output/file", ocol)
//
// The behavior of PrivatePCollection is similar to the behavior of PCollection.
// In particular, it implements arbitrary per-record transformations via ParDo.
// However, the contents of a PrivatePCollection cannot be written to disk.
// For example, there is no equivalent of:
//
-// textio.Write(s, "/path/to/output/file", pcol)
+// textio.Write(s, "/path/to/output/file", pcol)
//
// In order to retrieve data encapsulated in a PrivatePCollection, it is
// necessary to use one of the differentially private aggregations provided with
@@ -78,7 +78,7 @@
// PCollection obtained by removing all records associated with a given value of
// K in icol. Then, for any set S of possible outputs:
//
-// P[f(icol) ∈ S] ≤ exp(ε) * P[f(icol') ∈ S] + δ.
+// P[f(icol) ∈ S] ≤ exp(ε) * P[f(icol') ∈ S] + δ.
//
// The K, in the example above, is userID, representing a user identifier. This
// means that the full list of contributions of any given user is protected. However, this does not need
@@ -221,7 +221,7 @@
// PrivacySpecOption is used for customizing PrivacySpecs. In the typical use
// case, PrivacySpecOptions are passed into the NewPrivacySpec constructor to
// create a further customized PrivacySpec.
-type PrivacySpecOption interface{}
+type PrivacySpecOption any
func evaluatePrivacySpecOption(opt PrivacySpecOption, spec *PrivacySpec) {
switch opt {
@@ -322,19 +322,19 @@
// use as a privacy key.
// For example:
//
-// type exampleStruct1 struct {
-// IntField int
-// StructField exampleStruct2
-// }
+// type exampleStruct1 struct {
+// IntField int
+// StructField exampleStruct2
+// }
//
-// type exampleStruct2 struct {
-// StringField string
-// }
+// type exampleStruct2 struct {
+// StringField string
+// }
//
// If col is a PCollection of exampleStruct1, you could use "IntField" or
// "StructField.StringField" as idFieldPath.
//
-// Caution
+// # Caution
//
// The privacy key field must be a simple type (e.g. int, string, etc.), or
// a pointer to a simple type and all its parents must be structs or
@@ -377,7 +377,7 @@
// getIDField retrieves the ID field (specified by the IDFieldPath) from
// struct or pointer to a struct s.
-func (ext *extractStructFieldFn) getIDField(s interface{}) (interface{}, error) {
+func (ext *extractStructFieldFn) getIDField(s any) (any, error) {
subFieldNames := strings.Split(ext.IDFieldPath, ".")
subField := reflect.ValueOf(s)
var subFieldPath bytes.Buffer
@@ -472,7 +472,7 @@
// its fully qualified name, and deletes this field from the original message.
// It fails if the field is a submessage, if it is repeated, or if any of its
// parents are repeated.
-func (ext *extractProtoFieldFn) extractField(pb protoreflect.Message) (interface{}, error) {
+func (ext *extractProtoFieldFn) extractField(pb protoreflect.Message) (any, error) {
parts := strings.Split(ext.IDFieldPath, ".")
curPb := pb
curDesc := ext.desc
diff --git a/privacy-on-beam/pbeam/pbeam_test.go b/privacy-on-beam/pbeam/pbeam_test.go
index 53fff33..59d7d94 100644
--- a/privacy-on-beam/pbeam/pbeam_test.go
+++ b/privacy-on-beam/pbeam/pbeam_test.go
@@ -157,7 +157,7 @@
}
for _, tc := range []struct {
idFieldPath string
- want interface{}
+ want any
wantErr bool
}{
{"String", "0", false},
diff --git a/privacy-on-beam/pbeam/quantiles.go b/privacy-on-beam/pbeam/quantiles.go
index 55d51b3..6523252 100644
--- a/privacy-on-beam/pbeam/quantiles.go
+++ b/privacy-on-beam/pbeam/quantiles.go
@@ -123,12 +123,12 @@
// Note that due to the implementation details of the internal Quantiles algorithm, using pbeamtest
// with QuantilesPerKey has two caveats:
//
-// 1. Even without DP noise, the output will be slightly noisy. You can use
-// pbeamtest.QuantilesTolerance() to account for that noise.
+// 1. Even without DP noise, the output will be slightly noisy. You can use
+// pbeamtest.QuantilesTolerance() to account for that noise.
// 2. It is not possible to not clamp input values when using
-// pbeamtest.NewPrivacySpecNoNoiseWithoutContributionBounding(), so clamping to Min/MaxValue will
-// still be applied. However, MaxContributionsPerPartition and MaxPartitionsContributed contribution
-// bounding will be disabled.
+// pbeamtest.NewPrivacySpecNoNoiseWithoutContributionBounding(), so clamping to Min/MaxValue will
+// still be applied. However, MaxContributionsPerPartition and MaxPartitionsContributed contribution
+// bounding will be disabled.
func QuantilesPerKey(s beam.Scope, pcol PrivatePCollection, params QuantilesParams) beam.PCollection {
s = s.Scope("pbeam.QuantilesPerKey")
// Obtain & validate type information from the underlying PCollection<K,V>.
diff --git a/privacy-on-beam/pbeam/quantiles_test.go b/privacy-on-beam/pbeam/quantiles_test.go
index 91bfeca..0a0c3be 100644
--- a/privacy-on-beam/pbeam/quantiles_test.go
+++ b/privacy-on-beam/pbeam/quantiles_test.go
@@ -37,7 +37,7 @@
for _, tc := range []struct {
desc string
noiseKind noise.Kind
- want interface{}
+ want any
}{
{"Laplace noise kind", noise.LaplaceNoise,
&boundedQuantilesFn{
@@ -91,7 +91,7 @@
for _, tc := range []struct {
desc string
noiseKind noise.Kind
- wantNoise interface{}
+ wantNoise any
}{
{"Laplace noise kind", noise.LaplaceNoise, noise.Laplace()},
{"Gaussian noise kind", noise.GaussianNoise, noise.Gaussian()}} {
@@ -437,7 +437,7 @@
}
publicPartitionsSlice := []int{0}
p, s, col := ptest.CreateList(triples)
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -537,7 +537,7 @@
upper := 5.0
ranks := []float64{0.00, 0.25, 0.75, 1.00}
publicPartitionsSlice := []int{0}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -590,7 +590,7 @@
upper := 5.0
ranks := []float64{0.00, 0.25, 0.75, 1.00}
publicPartitionsSlice := []int{2, 3, 4, 5}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -804,7 +804,7 @@
delta := 0.0
ranks := []float64{0.60}
publicPartitionsSlice := []int{0, 1}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -910,7 +910,7 @@
upper := 5.0
ranks := []float64{0.49, 0.51}
publicPartitionsSlice := []int{0}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -1005,7 +1005,7 @@
upper := 5.0
ranks := []float64{0.00, 0.25, 0.75, 1.00}
publicPartitionsSlice := []int{0}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
diff --git a/privacy-on-beam/pbeam/sum.go b/privacy-on-beam/pbeam/sum.go
index ee88339..6a66a05 100644
--- a/privacy-on-beam/pbeam/sum.go
+++ b/privacy-on-beam/pbeam/sum.go
@@ -93,7 +93,7 @@
// otherwise.
//
// Optional.
- PublicPartitions interface{}
+ PublicPartitions any
}
// SumPerKey sums the values associated with each key in a
@@ -312,7 +312,7 @@
}
// findConvertFn gets the correct conversion to int64 or float64 function.
-func findConvertFn(t typex.FullType) (interface{}, error) {
+func findConvertFn(t typex.FullType) (any, error) {
switch t.Type().String() {
case "int":
return convertIntToInt64Fn, nil
@@ -375,7 +375,7 @@
}
// getKind gets the return kind of the convertFn function.
-func getKind(fn interface{}) (reflect.Kind, error) {
+func getKind(fn any) (reflect.Kind, error) {
if fn == nil {
return reflect.Invalid, fmt.Errorf("convertFn is nil")
}
@@ -388,9 +388,9 @@
return reflect.TypeOf(fn).Out(1).Kind(), nil
}
-func newAddNoiseToEmptyPublicPartitionsFn(epsilon, delta float64, maxPartitionsContributed int64, lower, upper float64, noiseKind noise.Kind, vKind reflect.Kind, testMode testMode) (interface{}, error) {
+func newAddNoiseToEmptyPublicPartitionsFn(epsilon, delta float64, maxPartitionsContributed int64, lower, upper float64, noiseKind noise.Kind, vKind reflect.Kind, testMode testMode) (any, error) {
var err error
- var bsFn interface{}
+ var bsFn any
switch vKind {
case reflect.Int64:
diff --git a/privacy-on-beam/pbeam/sum_test.go b/privacy-on-beam/pbeam/sum_test.go
index ab3e091..ec0484f 100644
--- a/privacy-on-beam/pbeam/sum_test.go
+++ b/privacy-on-beam/pbeam/sum_test.go
@@ -141,7 +141,7 @@
p, s, col, want := ptest.CreateList2(triples, result)
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col)
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -214,7 +214,7 @@
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col)
publicPartitionsSlice := []int{1, 2}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -334,7 +334,7 @@
p, s, col, want := ptest.CreateList2(triples, result)
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col)
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -409,7 +409,7 @@
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col)
publicPartitionsSlice := []int{1, 2}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -575,7 +575,7 @@
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col)
publicPartitionsSlice := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -722,7 +722,7 @@
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col)
publicPartitionsSlice := []int{0, 1, 2, 3, 4}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -810,7 +810,7 @@
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col)
publicPartitionsSlice := []int{0, 1, 2, 3, 4}
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -1037,7 +1037,7 @@
for _, tc := range []struct {
desc string
fullType typex.FullType
- wantConvertFn interface{}
+ wantConvertFn any
wantErr bool
}{
{"int", typex.New(reflect.TypeOf(int(0))), convertIntToInt64Fn, false},
@@ -1067,7 +1067,7 @@
func TestGetKind(t *testing.T) {
for _, tc := range []struct {
desc string
- convertFn interface{}
+ convertFn any
wantKind reflect.Kind
wantErr bool
}{
@@ -1138,7 +1138,7 @@
p, s, col := ptest.CreateList(triples)
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col)
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
@@ -1201,7 +1201,7 @@
p, s, col := ptest.CreateList(triples)
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col)
- var publicPartitions interface{}
+ var publicPartitions any
if tc.inMemory {
publicPartitions = publicPartitionsSlice
} else {
diff --git a/python/dp_accounting/pld/BUILD.bazel b/python/dp_accounting/pld/BUILD.bazel
index dd1d76c..763feb6 100644
--- a/python/dp_accounting/pld/BUILD.bazel
+++ b/python/dp_accounting/pld/BUILD.bazel
@@ -124,6 +124,7 @@
python_version = "PY3",
deps = [
":common",
+ ":pld_pmf",
":privacy_loss_distribution",
":test_util",
requirement("absl-py"),
diff --git a/python/dp_accounting/pld/accountant.py b/python/dp_accounting/pld/accountant.py
index c197eff..af29387 100644
--- a/python/dp_accounting/pld/accountant.py
+++ b/python/dp_accounting/pld/accountant.py
@@ -62,7 +62,7 @@
def get_smallest_laplace_noise(
privacy_parameters: common.DifferentialPrivacyParameters,
- num_queries: int,
+ num_queries: int = 1,
sensitivity: float = 1) -> float:
"""Finds smallest Laplace noise for which the mechanism satisfies desired privacy.
@@ -103,7 +103,7 @@
def get_smallest_discrete_laplace_noise(
privacy_parameters: common.DifferentialPrivacyParameters,
- num_queries: int,
+ num_queries: int = 1,
sensitivity: int = 1) -> float:
"""Finds smallest discrete Laplace noise for which the mechanism satisfies desired privacy.
@@ -152,7 +152,7 @@
def get_smallest_gaussian_noise(
privacy_parameters: common.DifferentialPrivacyParameters,
- num_queries: int,
+ num_queries: int = 1,
sensitivity: float = 1) -> float:
"""Finds smallest Gaussian noise for which the mechanism satisfies desired privacy.
diff --git a/python/dp_accounting/pld/privacy_loss_distribution_test.py b/python/dp_accounting/pld/privacy_loss_distribution_test.py
index 8910662..cd0766c 100644
--- a/python/dp_accounting/pld/privacy_loss_distribution_test.py
+++ b/python/dp_accounting/pld/privacy_loss_distribution_test.py
@@ -13,15 +13,48 @@
# limitations under the License.
"""Tests for privacy_loss_distribution.py."""
import math
-from typing import Any, Mapping
+from typing import Any, Mapping, Optional
import unittest
from absl.testing import parameterized
from scipy import stats
+from dp_accounting.pld import common
+from dp_accounting.pld import pld_pmf
from dp_accounting.pld import privacy_loss_distribution
from dp_accounting.pld import test_util
+def _assert_pld_pmf_equal(
+ testcase: unittest.TestCase,
+ pld: privacy_loss_distribution.PrivacyLossDistribution,
+ expected_rounded_pmf_add: Mapping[int, float],
+ expected_infinity_mass_add: float,
+ expected_rounded_pmf_remove: Optional[Mapping[int, float]] = None,
+ expected_infinity_mass_remove: Optional[float] = None):
+ """Asserts equality of PLD with expected values."""
+ def sparse_loss_probs(pmf: pld_pmf.PLDPmf) -> Mapping[int, float]:
+ if isinstance(pmf, pld_pmf.SparsePLDPmf):
+ return pmf._loss_probs
+ elif isinstance(pmf, pld_pmf.DensePLDPmf):
+ return common.list_to_dictionary(pmf._probs, pmf._lower_loss)
+ return {}
+
+ test_util.assert_dictionary_almost_equal(
+ testcase, expected_rounded_pmf_add, sparse_loss_probs(pld._pmf_add))
+ testcase.assertAlmostEqual(expected_infinity_mass_add,
+ pld._pmf_add._infinity_mass)
+ if expected_rounded_pmf_remove is None:
+ testcase.assertTrue(pld._symmetric)
+ else:
+ test_util.assert_dictionary_almost_equal(
+ testcase,
+ expected_rounded_pmf_remove,
+ sparse_loss_probs(pld._pmf_remove))
+ testcase.assertAlmostEqual(expected_infinity_mass_remove,
+ pld._pmf_remove._infinity_mass)
+ testcase.assertFalse(pld._symmetric)
+
+
class AddRemovePrivacyLossDistributionTest(parameterized.TestCase):
def _create_pld(
@@ -378,17 +411,9 @@
parameter, sensitivity=sensitivity, value_discretization_interval=1,
sampling_prob=sampling_prob)
- if expected_rounded_pmf_remove is None:
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- self.assertTrue(pld._symmetric)
- else:
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_add._loss_probs) # pytype: disable=attribute-error
- test_util.assert_dictionary_almost_equal(self,
- expected_rounded_pmf_remove,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- self.assertFalse(pld._symmetric)
+ _assert_pld_pmf_equal(self, pld,
+ expected_rounded_pmf_add, 0.0,
+ expected_rounded_pmf_remove, 0.0)
@parameterized.parameters((0.5, {
2: 0.61059961,
@@ -407,13 +432,12 @@
-3: 0.19337051
}))
def test_laplace_discretization(self, value_discretization_interval,
- expected_rounded_probability_mass_function):
+ expected_rounded_pmf):
"""Verifies correctness of pessimistic PLD for varying discretization."""
pld = privacy_loss_distribution.from_laplace_mechanism(
1, value_discretization_interval=value_discretization_interval)
- test_util.assert_dictionary_almost_equal(
- self, expected_rounded_probability_mass_function,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
+
+ _assert_pld_pmf_equal(self, pld, expected_rounded_pmf, 0.0)
@parameterized.parameters(
# Tests with sampling_prob = 1
@@ -473,17 +497,9 @@
value_discretization_interval=1,
sampling_prob=sampling_prob)
- if expected_rounded_pmf_remove is None:
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- self.assertTrue(pld._symmetric)
- else:
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_add._loss_probs) # pytype: disable=attribute-error
- test_util.assert_dictionary_almost_equal(self,
- expected_rounded_pmf_remove,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- self.assertFalse(pld._symmetric)
+ _assert_pld_pmf_equal(self, pld,
+ expected_rounded_pmf_add, 0.0,
+ expected_rounded_pmf_remove, 0.0)
class GaussianPrivacyLossDistributionTest(parameterized.TestCase):
@@ -586,17 +602,13 @@
value_discretization_interval=1,
sampling_prob=sampling_prob)
+ test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
+ pld._pmf_add._loss_probs) # pytype: disable=attribute-error
+ test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9),
+ pld._pmf_add._infinity_mass)
if expected_rounded_pmf_remove is None:
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9),
- pld._pmf_remove._infinity_mass)
self.assertTrue(pld._symmetric)
else:
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_add._loss_probs) # pytype: disable=attribute-error
- test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9),
- pld._pmf_add._infinity_mass)
test_util.assert_dictionary_almost_equal(self,
expected_rounded_pmf_remove,
pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
@@ -624,7 +636,7 @@
-4: 0.04456546
}))
def test_gaussian_discretization(self, value_discretization_interval,
- expected_rounded_probability_mass_function):
+ expected_rounded_pmf):
"""Verifies correctness of pessimistic PLD for varying discretization."""
pld = privacy_loss_distribution.from_gaussian_mechanism(
1,
@@ -633,7 +645,7 @@
test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9),
pld._pmf_remove._infinity_mass)
test_util.assert_dictionary_almost_equal(
- self, expected_rounded_probability_mass_function,
+ self, expected_rounded_pmf,
pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
@parameterized.parameters(
@@ -723,17 +735,13 @@
value_discretization_interval=1,
sampling_prob=sampling_prob)
+ test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
+ pld._pmf_add._loss_probs) # pytype: disable=attribute-error
+ test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9),
+ pld._pmf_add._infinity_mass)
if expected_rounded_pmf_remove is None:
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9),
- pld._pmf_remove._infinity_mass)
self.assertTrue(pld._symmetric)
else:
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_add._loss_probs) # pytype: disable=attribute-error
- test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9),
- pld._pmf_add._infinity_mass)
test_util.assert_dictionary_almost_equal(self,
expected_rounded_pmf_remove,
pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
@@ -823,17 +831,10 @@
pld = privacy_loss_distribution.from_discrete_laplace_mechanism(
parameter, sensitivity=sensitivity, value_discretization_interval=1,
sampling_prob=sampling_prob)
- if expected_rounded_pmf_remove is None:
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- self.assertTrue(pld._symmetric)
- else:
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_add._loss_probs) # pytype: disable=attribute-error
- test_util.assert_dictionary_almost_equal(self,
- expected_rounded_pmf_remove,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- self.assertFalse(pld._symmetric)
+
+ _assert_pld_pmf_equal(self, pld,
+ expected_rounded_pmf_add, 0.0,
+ expected_rounded_pmf_remove, 0.0)
@parameterized.parameters((0.1, {
10: 0.73105858,
@@ -844,13 +845,12 @@
}))
def test_discrete_laplace_discretization(
self, value_discretization_interval,
- expected_rounded_probability_mass_function):
+ expected_rounded_pmf):
"""Verifies correctness of pessimistic PLD for varying discretization."""
pld = privacy_loss_distribution.from_discrete_laplace_mechanism(
1, value_discretization_interval=value_discretization_interval)
- test_util.assert_dictionary_almost_equal(
- self, expected_rounded_probability_mass_function,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
+
+ _assert_pld_pmf_equal(self, pld, expected_rounded_pmf, 0.0)
@parameterized.parameters(
# Tests with sampling_prob = 1
@@ -914,17 +914,10 @@
parameter, sensitivity=sensitivity, value_discretization_interval=1,
pessimistic_estimate=False,
sampling_prob=sampling_prob)
- if expected_rounded_pmf_remove is None:
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- self.assertTrue(pld._symmetric)
- else:
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_add._loss_probs) # pytype: disable=attribute-error
- test_util.assert_dictionary_almost_equal(self,
- expected_rounded_pmf_remove,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- self.assertFalse(pld._symmetric)
+
+ _assert_pld_pmf_equal(self, pld,
+ expected_rounded_pmf_add, 0.0,
+ expected_rounded_pmf_remove, 0.0)
class DiscreteGaussianPrivacyLossDistributionTest(parameterized.TestCase):
@@ -986,23 +979,11 @@
pld = privacy_loss_distribution.from_discrete_gaussian_mechanism(
sigma, sensitivity=sensitivity, truncation_bound=1,
sampling_prob=sampling_prob)
- if expected_rounded_pmf_remove is None:
- self.assertAlmostEqual(pld._pmf_remove._infinity_mass,
- expected_infinity_mass_add)
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- self.assertTrue(pld._symmetric)
- else:
- self.assertAlmostEqual(pld._pmf_add._infinity_mass,
- expected_infinity_mass_add)
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_add._loss_probs) # pytype: disable=attribute-error
- self.assertAlmostEqual(pld._pmf_remove._infinity_mass,
- expected_infinity_mass_remove)
- test_util.assert_dictionary_almost_equal(self,
- expected_rounded_pmf_remove,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- self.assertFalse(pld._symmetric)
+
+ _assert_pld_pmf_equal(
+ self, pld,
+ expected_rounded_pmf_add, expected_infinity_mass_add,
+ expected_rounded_pmf_remove, expected_infinity_mass_remove)
@parameterized.parameters((2, {
15000: 0.24420134,
@@ -1018,16 +999,13 @@
-25000: 0.00443305
}, 0.00443305))
def test_discrete_gaussian_truncation(
- self, truncation_bound, expected_rounded_probability_mass_function,
- expected_infinity_mass):
+ self, truncation_bound, expected_rounded_pmf, expected_infinity_mass):
"""Verifies correctness of pessimistic PLD for varying truncation bound."""
pld = privacy_loss_distribution.from_discrete_gaussian_mechanism(
1, truncation_bound=truncation_bound)
- self.assertAlmostEqual(pld._pmf_remove._infinity_mass,
- expected_infinity_mass)
- test_util.assert_dictionary_almost_equal(
- self, expected_rounded_probability_mass_function,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
+
+ _assert_pld_pmf_equal(
+ self, pld, expected_rounded_pmf, expected_infinity_mass)
@parameterized.parameters(
# Tests with sampling_prob = 1
@@ -1077,23 +1055,11 @@
sigma, sensitivity=sensitivity, truncation_bound=1,
pessimistic_estimate=False,
sampling_prob=sampling_prob)
- if expected_rounded_pmf_remove is None:
- self.assertAlmostEqual(pld._pmf_remove._infinity_mass,
- expected_infinity_mass_add)
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- self.assertTrue(pld._symmetric)
- else:
- self.assertAlmostEqual(pld._pmf_add._infinity_mass,
- expected_infinity_mass_add)
- test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add,
- pld._pmf_add._loss_probs) # pytype: disable=attribute-error
- self.assertAlmostEqual(pld._pmf_remove._infinity_mass,
- expected_infinity_mass_remove)
- test_util.assert_dictionary_almost_equal(self,
- expected_rounded_pmf_remove,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
- self.assertFalse(pld._symmetric)
+
+ _assert_pld_pmf_equal(
+ self, pld,
+ expected_rounded_pmf_add, expected_infinity_mass_add,
+ expected_rounded_pmf_remove, expected_infinity_mass_remove)
class RandomizedResponsePrivacyLossDistributionTest(parameterized.TestCase):
@@ -1108,13 +1074,11 @@
}))
def test_randomized_response_basic(
self, noise_parameter, num_buckets,
- expected_rounded_probability_mass_function):
+ expected_rounded_pmf):
# Set value_discretization_interval = 1 here.
pld = privacy_loss_distribution.from_randomized_response(
noise_parameter, num_buckets, value_discretization_interval=1)
- test_util.assert_dictionary_almost_equal(
- self, expected_rounded_probability_mass_function,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
+ _assert_pld_pmf_equal(self, pld, expected_rounded_pmf, 0.0)
@parameterized.parameters((0.7, {
5: 0.85,
@@ -1126,16 +1090,13 @@
0: 0.1
}))
def test_randomized_response_discretization(
- self, value_discretization_interval,
- expected_rounded_probability_mass_function):
+ self, value_discretization_interval, expected_rounded_pmf):
# Set noise_parameter = 0.2, num_buckets = 4 here.
# The true (non-discretized) PLD is
# {2.83321334: 0.85, -2.83321334: 0.05, 0: 0.1}.
pld = privacy_loss_distribution.from_randomized_response(
0.2, 4, value_discretization_interval=value_discretization_interval)
- test_util.assert_dictionary_almost_equal(
- self, expected_rounded_probability_mass_function,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
+ _assert_pld_pmf_equal(self, pld, expected_rounded_pmf, 0.0)
@parameterized.parameters((0.5, 2, {
1: 0.75,
@@ -1146,17 +1107,14 @@
0: 0.1
}))
def test_randomized_response_optimistic(
- self, noise_parameter, num_buckets,
- expected_rounded_probability_mass_function):
+ self, noise_parameter, num_buckets, expected_rounded_pmf):
# Set value_discretization_interval = 1 here.
pld = privacy_loss_distribution.from_randomized_response(
noise_parameter,
num_buckets,
pessimistic_estimate=False,
value_discretization_interval=1)
- test_util.assert_dictionary_almost_equal(
- self, expected_rounded_probability_mass_function,
- pld._pmf_remove._loss_probs) # pytype: disable=attribute-error
+ _assert_pld_pmf_equal(self, pld, expected_rounded_pmf, 0.0)
@parameterized.parameters((0.0, 10), (1.1, 4), (0.5, 1))
def test_randomized_response_value_errors(self, noise_parameter, num_buckets):
@@ -1169,9 +1127,7 @@
def test_identity(self):
pld = privacy_loss_distribution.identity()
- test_util.assert_dictionary_almost_equal(self, pld._pmf_remove._loss_probs,
- {0: 1}) # pytype: disable=attribute-error
- self.assertAlmostEqual(pld._pmf_remove._infinity_mass, 0)
+ _assert_pld_pmf_equal(self, pld, {0: 1}, 0.0)
pld = pld.compose(
privacy_loss_distribution.PrivacyLossDistribution
@@ -1179,14 +1135,7 @@
1: 0.5,
-1: 0.5
}, 0, 1e-4))
- test_util.assert_dictionary_almost_equal(
- self,
- pld._pmf_remove._loss_probs, # pytype: disable=attribute-error
- {
- 1: 0.5,
- -1: 0.5
- })
- self.assertAlmostEqual(pld._pmf_remove._infinity_mass, 0)
+ _assert_pld_pmf_equal(self, pld, {1: 0.5, -1: 0.5}, 0.0)
if __name__ == '__main__':
diff --git a/python/dp_accounting/pld/test_util.py b/python/dp_accounting/pld/test_util.py
index 25268b1..f812903 100644
--- a/python/dp_accounting/pld/test_util.py
+++ b/python/dp_accounting/pld/test_util.py
@@ -38,12 +38,19 @@
"""
for i in dict1.keys():
if not np.isclose(dict1[i], 0):
- found = False
+ key_found = False
+ value_found = False
for j in dict2.keys():
- if np.isclose(i, j) and np.isclose(dict1[i], dict2[j]):
- found = True
- break
- testcase.assertTrue(found, msg=f'Key {i} in {dict1} not found in {dict2}')
+ if np.isclose(i, j):
+ key_found = True
+ if np.isclose(dict1[i], dict2[j]):
+ value_found = True
+ break
+ testcase.assertTrue(key_found,
+ msg=f'Key {i} in {dict1} not found in {dict2}')
+ testcase.assertTrue(
+ value_found,
+ msg=f'Value for key {i} in {dict1} not matching that in {dict2}')
def assert_dictionary_almost_equal(testcase: 'unittest.TestCase',
diff --git a/python/dp_accounting/pld/test_util_test.py b/python/dp_accounting/pld/test_util_test.py
index c6f5572..750d944 100644
--- a/python/dp_accounting/pld/test_util_test.py
+++ b/python/dp_accounting/pld/test_util_test.py
@@ -63,6 +63,32 @@
test_util.assert_dictionary_contained(self, dict1, dict2)
@parameterized.parameters(
+ # Key missing
+ ({
+ 1: 0.1,
+ 2: 0.3
+ }, {
+ 1: 0.1,
+ 3: 0.3,
+ }, True),
+ # Value not matching
+ ({
+ 1: 0.1,
+ 2: 0.3
+ }, {
+ 1: 0.1,
+ 2: 0.2
+ }, False))
+ def test_assert_dictionary_contained_error_messages(
+ self, dict1, dict2, key_missing):
+ with self.assertRaises(AssertionError) as cm:
+ test_util.assert_dictionary_contained(self, dict1, dict2)
+ if key_missing:
+ self.assertStartsWith(str(cm.exception), 'False is not true : Key')
+ else:
+ self.assertStartsWith(str(cm.exception), 'False is not true : Value')
+
+ @parameterized.parameters(
# Dictionary almost equal
({
1: 0.1,