blob: bb4aaa6c516ce2c80e7afb75543b97facfa0717d [file] [log] [blame]
//
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package pbeam
import (
"testing"
"github.com/google/differential-privacy/go/v2/dpagg"
"github.com/google/differential-privacy/privacy-on-beam/v2/pbeam/testutils"
"github.com/apache/beam/sdks/v2/go/pkg/beam"
"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest"
)
// Checks that SelectPartitions is performing a random partition selection
// for PrivatePCollection<V> inputs.
func TestSelectPartitionsIsNonDeterministicV(t *testing.T) {
for _, tc := range []struct {
name string
epsilon float64
delta float64
numPartitions int
}{
{
epsilon: 1,
delta: 0.3, // yields a 30% chance of emitting any particular partition.
// 143 distinct partitions implies that some (but not all) partitions are
// emitted with high probability (at least 1 - 1e-20).
numPartitions: 143,
},
} {
t.Run(tc.name, func(t *testing.T) {
// Build up {ID, Value} pairs such that 1 privacy unit contributes to
// each of the tc.numPartitions partitions:
// {0,0}, {1,1}, ..., {numPartitions-1,numPartitions-1}
var (
pairs []testutils.PairII
)
for i := 0; i < tc.numPartitions; i++ {
pairs = append(pairs, testutils.PairII{i, i})
}
p, s, col := ptest.CreateList(pairs)
col = beam.ParDo(s, testutils.PairToKV, col)
// Run SelectPartitions on pairs
pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta))
got := SelectPartitions(s, pcol, SelectPartitionsParams{MaxPartitionsContributed: 1})
// Validate that partitions are selected randomly (i.e., some emitted and some dropped).
testutils.CheckSomePartitionsAreDropped(s, got, tc.numPartitions)
if err := ptest.Run(p); err != nil {
t.Errorf("%v", err)
}
})
}
}
// Checks that SelectPartitions is performing a random partition selection
// for PrivatePCollection<K,V> inputs.
func TestSelectPartitionsIsNonDeterministicKV(t *testing.T) {
for _, tc := range []struct {
name string
epsilon float64
delta float64
numPartitions int
}{
{
epsilon: 1,
delta: 0.3, // yields a 30% chance of emitting any particular partition.
// 143 distinct partitions implies that some (but not all) partitions are
// emitted with high probability (at least 1 - 1e-20).
numPartitions: 143,
},
{
epsilon: 1,
delta: 0.3, // yields a 30% chance of emitting any particular partition.
// 143 distinct partitions implies that some (but not all) partitions are
// emitted with high probability (at least 1 - 1e-20).
numPartitions: 143,
},
} {
t.Run(tc.name, func(t *testing.T) {
// Build up {ID, Partition, Value} pairs such that 1 privacy unit contributes to
// each of the tc.numPartitions partitions:
// {0,0,0}, {1,1,0}, ..., {numPartitions-1,numPartitions-1,0}
var (
triples []testutils.TripleWithIntValue
)
for i := 0; i < tc.numPartitions; i++ {
triples = append(triples, testutils.TripleWithIntValue{i, i, 0})
}
p, s, col := ptest.CreateList(triples)
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col)
// Run SelectPartitions on triples
pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta))
pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol)
got := SelectPartitions(s, pcol, SelectPartitionsParams{MaxPartitionsContributed: 1})
// Validate that partitions are selected randomly (i.e., some emitted and some dropped).
testutils.CheckSomePartitionsAreDropped(s, got, tc.numPartitions)
if err := ptest.Run(p); err != nil {
t.Errorf("%v", err)
}
})
}
}
// Checks that SelectPartitions bounds cross-partition contributions correctly
// for PrivatePCollection<V> inputs.
func TestSelectPartitionsBoundsCrossPartitionContributionsV(t *testing.T) {
// Create 10 partitions with a single privacy ID contributing to each.
var pairs []testutils.PairII
for i := 0; i < 10; i++ {
pairs = append(pairs, testutils.MakePairsWithFixedV(1, i)...)
}
p, s, col := ptest.CreateList(pairs)
col = beam.ParDo(s, testutils.PairToKV, col)
// ε=50, δ=~1 and l0Sensitivity=1 gives a threshold of 2.
epsilon, delta, l0Sensitivity := 50.0, dpagg.LargestRepresentableDelta, 1
pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta))
got := SelectPartitions(s, pcol, SelectPartitionsParams{MaxPartitionsContributed: int64(l0Sensitivity)})
// With a max contribution of 1, only 1 partition should be outputted.
testutils.CheckNumPartitions(s, got, 1)
if err := ptest.Run(p); err != nil {
t.Errorf("Did not bound cross partition contributions correctly for PrivatePCollection<V> inputs: %v", err)
}
}
// Checks that SelectPartitions bounds cross-partition contributions correctly
// for PrivatePCollection<K,V> inputs.
func TestSelectPartitionsBoundsCrossPartitionContributionsKV(t *testing.T) {
// Create 10 partitions with a single privacy ID contributing to each.
var triples []testutils.TripleWithIntValue
for i := 0; i < 10; i++ {
triples = append(triples, testutils.MakeTripleWithIntValue(1, i, 0)...)
}
p, s, col := ptest.CreateList(triples)
col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col)
// ε=50, δ=~1 and l0Sensitivity=1 gives a threshold of 2.
epsilon, delta, l0Sensitivity := 50.0, dpagg.LargestRepresentableDelta, 1
pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta))
pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol)
got := SelectPartitions(s, pcol, SelectPartitionsParams{MaxPartitionsContributed: int64(l0Sensitivity)})
// With a max contribution of 1, only 1 partition should be outputted.
testutils.CheckNumPartitions(s, got, 1)
if err := ptest.Run(p); err != nil {
t.Errorf("Did not bound cross partition contributions correctly for PrivatePCollection<K,V> inputs: %v", err)
}
}