blob: 519d452bfe57b3135665e97dcc6fa4422e98cc23 [file] [log] [blame]
//
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Package codelab contains example pipelines for computing various aggregations using Privacy on Beam.
package codelab
import (
"math"
"github.com/google/differential-privacy/privacy-on-beam/v2/pbeam"
"github.com/apache/beam/sdks/v2/go/pkg/beam"
"github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/stats"
)
// Constants used throughtout the codelab
var epsilon = math.Log(3)
const delta = 1e-5
func init() {
beam.RegisterFunction(extractVisitHourFn)
}
// CountVisitsPerHour counts and returns the number of visits to a restaurant for each hour.
// This produces a non-anonymized, non-private count. Use PrivateCountVisitsPerHour
// for computing this in an anonymized way.
func CountVisitsPerHour(s beam.Scope, col beam.PCollection) beam.PCollection {
s = s.Scope("CountVisitsPerHour")
visitHours := beam.ParDo(s, extractVisitHourFn, col)
visitsPerHour := stats.Count(s, visitHours)
return visitsPerHour
}
func extractVisitHourFn(v Visit) int {
return v.TimeEntered.Hour()
}
// PrivateCountVisitsPerHour counts and returns the number of visits to a restaurant for each hour
// in a differentially private way.
func PrivateCountVisitsPerHour(s beam.Scope, col beam.PCollection) beam.PCollection {
s = s.Scope("PrivateCountVisitsPerHour")
// Create a Privacy Spec and convert col into a PrivatePCollection
spec := pbeam.NewPrivacySpec(epsilon, delta)
pCol := pbeam.MakePrivateFromStruct(s, col, spec, "VisitorID")
visitHours := pbeam.ParDo(s, extractVisitHourFn, pCol)
visitsPerHour := pbeam.Count(s, visitHours, pbeam.CountParams{
MaxPartitionsContributed: 1, // Visitors can visit the restaurant once (one hour) a day
MaxValue: 1, // Visitors can visit the restaurant once within an hour
})
return visitsPerHour
}