blob: 53fff33935f896ead77e6dd659f336027d4e7f79 [file] [log] [blame]
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package pbeam
import (
"reflect"
"testing"
"github.com/google/differential-privacy/privacy-on-beam/v2/pbeam/testutils"
testpb "github.com/google/differential-privacy/privacy-on-beam/v2/testdata"
"github.com/apache/beam/sdks/v2/go/pkg/beam"
"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert"
"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest"
"github.com/google/go-cmp/cmp"
"google.golang.org/protobuf/proto"
)
func init() {
beam.RegisterType(reflect.TypeOf((*testpb.TestAnon)(nil)))
beam.RegisterType(reflect.TypeOf(protoPair{}))
}
type protoPair struct {
Key string
Pb *testpb.TestAnon
}
func kvToProtoPair(key string, pb *testpb.TestAnon) protoPair {
return protoPair{key, pb}
}
func TestMakePrivate(t *testing.T) {
values := []testutils.PairII{
{17, 42},
{99, 0},
}
p, s, col := ptest.CreateList(values)
colKV := beam.ParDo(s, testutils.PairToKV, col)
// pcol should contain 17→42 and 99→0.
pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10))
got := beam.ParDo(s, testutils.KVToPair, pcol.col)
passert.Equals(s, got, col)
if err := ptest.Run(p); err != nil {
t.Errorf("MakePrivate(%v) = %v, expected %v: %v", col, got, col, err)
}
}
type SimpleStruct struct {
String string
Int int
}
type ComplexStruct struct {
String string
Int int
StringPointer *string
StringSlice []string
SubStruct *SimpleStruct
SubStructSlice []SimpleStruct
}
type structPair struct {
Key string
Value ComplexStruct
}
func kvToStructPair(key string, value ComplexStruct) structPair {
return structPair{Key: key, Value: value}
}
type RecursiveStruct struct {
String string
Int int
SubStruct ComplexStruct
RecursiveStruct *RecursiveStruct
}
func TestMakePrivateFromStruct(t *testing.T) {
fortyTwo := "42"
seventeen := "17"
for _, tc := range []struct {
desc string
idFieldPath string
values []ComplexStruct
want []structPair
}{
{"top level string id field",
"String",
[]ComplexStruct{
{String: "42", Int: 42},
{String: "17", Int: 17}},
[]structPair{
{Key: "\"42\"", Value: ComplexStruct{String: "42", Int: 42}},
{Key: "\"17\"", Value: ComplexStruct{String: "17", Int: 17}}},
},
{"top level string pointer id field",
"StringPointer",
[]ComplexStruct{
{StringPointer: &fortyTwo, Int: 42},
{StringPointer: &seventeen, Int: 17}},
[]structPair{
{Key: "\"42\"", Value: ComplexStruct{StringPointer: &fortyTwo, Int: 42}},
{Key: "\"17\"", Value: ComplexStruct{StringPointer: &seventeen, Int: 17}}},
},
{"bottom level string id field",
"SubStruct.String",
[]ComplexStruct{
{SubStruct: &SimpleStruct{String: "42"}, Int: 42},
{SubStruct: &SimpleStruct{String: "17"}, Int: 17}},
[]structPair{
{Key: "\"42\"", Value: ComplexStruct{SubStruct: &SimpleStruct{String: "42"}, Int: 42}},
{Key: "\"17\"", Value: ComplexStruct{SubStruct: &SimpleStruct{String: "17"}, Int: 17}}},
},
} {
p, s, col, want := ptest.CreateList2(tc.values, tc.want)
pcol := MakePrivateFromStruct(s, col, NewPrivacySpec(1, 1e-10), tc.idFieldPath)
got := beam.ParDo(s, kvToStructPair, pcol.col)
passert.Equals(s, got, want)
if err := ptest.Run(p); err != nil {
t.Errorf("MakePrivateFromStruct output does not match input values with %s. got %v, expected %v: %v", tc.desc, got, want, err)
}
}
}
// Tests the GetIDField method in extractStructFieldFn.
func TestGetIDField(t *testing.T) {
eight := "8"
val := RecursiveStruct{
String: "0",
Int: 0,
SubStruct: ComplexStruct{
String: "1",
Int: 1,
StringSlice: []string{"2", "3", "4"},
SubStruct: &SimpleStruct{String: "5", Int: 5},
SubStructSlice: []SimpleStruct{
SimpleStruct{String: "6", Int: 6},
SimpleStruct{String: "7", Int: 7},
},
StringPointer: &eight},
RecursiveStruct: &RecursiveStruct{String: "9", Int: 9},
}
for _, tc := range []struct {
idFieldPath string
want interface{}
wantErr bool
}{
{"String", "0", false},
{"Int", 0, false},
{"SubStruct", nil, true},
{"SubStruct.String", "1", false},
{"SubStruct.Int", 1, false},
{"SubStruct.StringSlice", nil, true},
{"SubStruct.SubStruct", nil, true},
{"SubStruct.SubStruct.String", "5", false},
{"SubStruct.SubStruct.Int", 5, false},
{"SubStruct.SubStructSlice", nil, true},
{"SubStruct.SubStructSlice.String", nil, true},
{"SubStruct.SubStructSlice.Int", nil, true},
{"SubStruct.StringPointer", "8", false},
{"RecursiveStruct", nil, true},
{"RecursiveStruct.String", "9", false},
{"RecursiveStruct.Int", 9, false},
{"RecursiveStruct.RecursiveStruct", nil, true},
{"RecursiveStruct.RecursiveStruct.String", "", false},
{"RecursiveStruct.RecursiveStruct.Int", 0, false},
{"nonexistent", nil, true},
} {
ext := extractStructFieldFn{IDFieldPath: tc.idFieldPath}
got, err := ext.getIDField(val)
if (err != nil) != tc.wantErr {
t.Errorf("GetIDField with idFieldPath=%s: got error %v, wantErr=%t.", tc.idFieldPath, err, tc.wantErr)
}
if !cmp.Equal(got, tc.want) {
t.Errorf("GetIDField with idFieldPath=%s: retrieved field %v, wanted=%v.", tc.idFieldPath, got, tc.want)
}
}
}
func TestMakePrivateFromProto(t *testing.T) {
values := []*testpb.TestAnon{
&testpb.TestAnon{Foo: proto.Int64(42), Bar: proto.String("fourty-two")},
&testpb.TestAnon{Foo: proto.Int64(17), Bar: proto.String("seventeen")},
&testpb.TestAnon{Bar: proto.String("zero")},
}
result := []protoPair{
{"42", &testpb.TestAnon{Foo: proto.Int64(42), Bar: proto.String("fourty-two")}},
{"17", &testpb.TestAnon{Foo: proto.Int64(17), Bar: proto.String("seventeen")}},
{"0", &testpb.TestAnon{Bar: proto.String("zero")}},
}
p, s, col, want := ptest.CreateList2(values, result)
pcol := MakePrivateFromProto(s, col, NewPrivacySpec(1, 1e-10), "foo")
got := beam.ParDo(s, kvToProtoPair, pcol.col)
passert.Equals(s, got, want)
if err := ptest.Run(p); err != nil {
t.Errorf("MakePrivateFromProto(%v) = %v, expected %v: %v", col, got, want, err)
}
}
var (
repeat = []string{"bar", "baz"}
subrepeat = []*testpb.TestComplex_Submessage{
&testpb.TestComplex_Submessage{
Simple: proto.String("oob"),
Repeat: repeat,
},
&testpb.TestComplex_Submessage{
Simple: proto.String("obo"),
Repeat: repeat,
},
}
complexMsg = &testpb.TestComplex{
Simple: proto.String("foo"),
Repeat: repeat,
Sub: &testpb.TestComplex_Submessage{
Simple: proto.String("boo"),
Repeat: repeat,
},
Subrepeat: subrepeat,
}
withoutSimple = &testpb.TestComplex{
Repeat: repeat,
Sub: &testpb.TestComplex_Submessage{
Simple: proto.String("boo"),
Repeat: repeat,
},
Subrepeat: subrepeat,
}
withoutSubSimple = &testpb.TestComplex{
Simple: proto.String("foo"),
Repeat: repeat,
Sub: &testpb.TestComplex_Submessage{
Repeat: repeat,
},
Subrepeat: subrepeat,
}
)
// Tests the extraction logic in extractProtoFieldFn.
func TestExtractProtoField(t *testing.T) {
for _, tc := range []struct {
idFieldPath string
wantField string
wantMsg *testpb.TestComplex
ok bool
}{
{"simple", "foo", complexMsg, true},
{"empty", "", complexMsg, true},
{"sub.simple", "boo", complexMsg, true},
{"repeat", "", nil, false},
{"sub.repeat", "", nil, false},
{"subrepeat.simple", "", nil, false},
{"subrepeat.repeat", "", nil, false},
{"nonexistent", "", nil, false},
} {
ext := &extractProtoFieldFn{
IDFieldPath: tc.idFieldPath,
desc: (&testpb.TestComplex{}).ProtoReflect().Descriptor(),
}
clone := &testpb.TestComplex{}
proto.Merge(clone, complexMsg)
gotField, err := ext.extractField(clone.ProtoReflect())
if (err == nil) != tc.ok {
t.Errorf("extractField with IDFieldPath=%s: got error %v, want ok=%t.", tc.idFieldPath, err, tc.ok)
}
if err == nil {
gotField := gotField.(string)
if gotField != tc.wantField {
t.Errorf("extractField with IDFieldPath=%s: got field %v, want %s", tc.idFieldPath, gotField, tc.wantField)
}
if !proto.Equal(clone, tc.wantMsg) {
t.Errorf("extractField with IDFieldPath=%s: got msg %v, want %v", tc.idFieldPath, clone, tc.wantMsg)
}
}
}
}
// Tests that we can get get the whole budget and consume it partially afterwards.
func TestGetFullBudget(t *testing.T) {
spec := NewPrivacySpec(2, 2e-10)
eps, del, err := spec.getBudget(0, 0)
if err != nil {
t.Errorf("expected no error but got error: %v", err)
}
if eps != 2.0 || del != 2e-10 {
t.Errorf("Trying to get the whole budget: Got (epsilon,delta)=(%f,%e), expected=(%f,%e)", eps, del, 2.0, 2e-10)
}
// Split the budget and consume it in two calls.
eps, del, err = spec.consumeBudget(1, 1e-10)
if err != nil {
t.Errorf("expected no error but got error: %v", err)
}
if eps != 1.0 || del != 1e-10 {
t.Errorf("Trying to consume the budget after getBudget call: Got (epsilon,delta)=(%f,%e), expected=(%f,%e)", eps, del, 1.0, 1e-10)
}
eps, del, err = spec.consumeBudget(1, 1e-10)
if err != nil {
t.Errorf("expected no error but got error: %v", err)
}
if eps != 1.0 || del != 1e-10 {
t.Errorf("Trying to consume the budget after getBudget call: Got (epsilon,delta)=(%f,%e), expected=(%f,%e)", eps, del, 1.0, 1e-10)
}
}
// Tests that we can get and consume the budget partially.
func TestGetPartialBudget(t *testing.T) {
spec := NewPrivacySpec(2, 2e-10)
eps, del, err := spec.getBudget(1, 1e-10)
if err != nil {
t.Errorf("expected no error but got error: %v", err)
}
if eps != 1.0 || del != 1e-10 {
t.Errorf("Trying to get first half of the budget: Got (epsilon,delta)=(%f,%e), expected=(%f,%e)", eps, del, 1.0, 1e-10)
}
eps, del, err = spec.consumeBudget(1, 1e-10)
if err != nil {
t.Errorf("expected no error but got error: %v", err)
}
if eps != 1.0 || del != 1e-10 {
t.Errorf("Trying to consume second half of the budget after getBudget call: Got (epsilon,delta)=(%f,%e), expected=(%f,%e)", eps, del, 1.0, 1e-10)
}
eps, del, err = spec.getBudget(1, 1e-10)
if err != nil {
t.Errorf("expected no error but got error: %v", err)
}
if eps != 1.0 || del != 1e-10 {
t.Errorf("Trying to get second half of the budget: Got (epsilon,delta)=(%f,%e), expected=(%f,%e)", eps, del, 1.0, 1e-10)
}
eps, del, err = spec.consumeBudget(1, 1e-10)
if err != nil {
t.Errorf("expected no error but got error: %v", err)
}
if eps != 1.0 || del != 1e-10 {
t.Errorf("Trying to consume second half the budget after getBudget call: Got (epsilon,delta)=(%f,%e), expected=(%f,%e)", eps, del, 1.0, 1e-10)
}
}
// Tests that we can consume all the budget at once.
func TestBudgetFullyConsumed(t *testing.T) {
values := []testutils.PairII{
{1, 1},
{2, 2},
}
p, s, col := ptest.CreateList(values)
colKV := beam.ParDo(s, testutils.PairToKV, col)
spec := NewPrivacySpec(1, 1e-30)
pcol := MakePrivate(s, colKV, spec)
got := Count(s, pcol, CountParams{MaxValue: 1, MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}})
passert.Empty(s, got)
if err := ptest.Run(p); err != nil {
t.Errorf("expected no error but got error: %v", err)
}
// Try consuming 1% of the initial budget.
if eps, del, err := spec.consumeBudget(0.01, 1e-32); err == nil {
t.Errorf("expected spec to be out of budget, but could consume (%f,%e) without any error", eps, del)
}
}
// Tests that two distinct budgets can be independently consumed.
func TestTwoDistinctBudgets(t *testing.T) {
values := []testutils.PairII{
{1, 1},
{2, 2},
}
p, s, col := ptest.CreateList(values)
colKV := beam.ParDo(s, testutils.PairToKV, col)
spec1 := NewPrivacySpec(1, 1e-30)
spec2 := NewPrivacySpec(1, 1e-30)
pcol1 := MakePrivate(s, colKV, spec1)
pcol2 := MakePrivate(s, colKV, spec2)
got1 := Count(s, pcol1, CountParams{MaxValue: 1, MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}})
got2 := Count(s, pcol2, CountParams{MaxValue: 1, MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}})
passert.Empty(s, got1)
passert.Empty(s, got2)
if err := ptest.Run(p); err != nil {
t.Errorf("expected no error but got error: %v", err)
}
// Try consuming 1% of the initial budget independently for ε and δ.
if eps, del, err := spec1.consumeBudget(0, 1e-32); err == nil {
t.Errorf("expected spec1 to be out of budget, but could consume (%f,%e) without any error", eps, del)
}
if eps, del, err := spec2.consumeBudget(0.01, 0); err == nil {
t.Errorf("expected spec2 to be out of budget, but could consume (%f,%e) without any error", eps, del)
}
}
// Test for rounding errors during budget allocation. Dividing the overall
// epsilon by 3 leads to rounding errors in this test case. Should run without
// any errors.
func TestBudgetRounding(t *testing.T) {
for numAggregations := 1; numAggregations <= 10; numAggregations++ {
values := []testutils.PairII{
{1, 1},
{2, 2},
}
p, s, col := ptest.CreateList(values)
colKV := beam.ParDo(s, testutils.PairToKV, col)
spec := NewPrivacySpec(1, 1e-30)
pcol := MakePrivate(s, colKV, spec)
epsPerAggregation := 1. / float64(numAggregations)
delPerAggregation := 1e-30 / float64(numAggregations)
for i := 0; i < numAggregations; i++ {
DistinctPrivacyID(s, pcol, DistinctPrivacyIDParams{Epsilon: epsPerAggregation, Delta: delPerAggregation, MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}})
}
if err := ptest.Run(p); err != nil {
t.Errorf("with %d aggregations, expected no error but got error: %v", numAggregations, err)
}
// Now, the budget should be really empty.
if eps, del, err := spec.consumeBudget(1e-15, 1e-40); err == nil {
t.Errorf("with %d aggregations, expected spec to be out of budget, but could consume (%f,%e) without any error", numAggregations, eps, del)
}
}
}