blob: 5e31dc0a64bc2492271d28e7fb507a9163081f20 [file] [log] [blame]
//
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package dpagg
import (
"fmt"
"math"
log "github.com/golang/glog"
"github.com/google/differential-privacy/go/noise"
)
// Count calculates a differentially private count of a collection of values
// using the Laplace or Gaussian mechanism
//
// It supports privacy units that contribute to multiple partitions (via the
// MaxPartitionsContributed parameter) by scaling the added noise appropriately.
// However, it does not support multiple contributions to a single partition from
// the same privacy unit. For that use case, BoundedSumInt64 should be used instead.
//
// The provided differentially private count is an unbiased estimate of the raw
// count meaning that its expected value is equal to the raw count.
//
// For general details and key definitions, see
// https://github.com/google/differential-privacy/blob/main/differential_privacy.md#key-definitions.
//
// Note: Do not use when your results may cause overflows for int64 values.
// This aggregation is not hardened for such applications yet.
//
// Not thread-safe.
type Count struct {
// Parameters
epsilon float64
delta float64
l0Sensitivity int64
lInfSensitivity int64
Noise noise.Noise
noiseKind noise.Kind // necessary for serializing noise.Noise information
// State variables
count int64
state aggregationState
noisedCount int64
}
func countEquallyInitialized(c1, c2 *Count) bool {
return c1.epsilon == c2.epsilon &&
c1.delta == c2.delta &&
c1.l0Sensitivity == c2.l0Sensitivity &&
c1.lInfSensitivity == c2.lInfSensitivity &&
c1.noiseKind == c2.noiseKind &&
c1.state == c2.state
}
// CountOptions contains the options necessary to initialize a Count.
type CountOptions struct {
Epsilon float64 // Privacy parameter ε. Required.
Delta float64 // Privacy parameter δ. Required with Gaussian noise, must be 0 with Laplace noise.
MaxPartitionsContributed int64 // How many distinct partitions may a single privacy unit contribute to? Defaults to 1.
Noise noise.Noise // Type of noise used. Defaults to Laplace noise.
// How many times may a single privacy unit contribute to a single partition?
// Defaults to 1. This is only needed for other aggregation functions using Count;
// which is why the option is not exported.
maxContributionsPerPartition int64
}
// NewCount returns a new Count, initialized at 0.
func NewCount(opt *CountOptions) *Count {
if opt == nil {
opt = &CountOptions{}
}
// Set defaults.
l0 := opt.MaxPartitionsContributed
if l0 == 0 {
l0 = 1
}
lInf := opt.maxContributionsPerPartition
if lInf == 0 {
lInf = 1
}
n := opt.Noise
if n == nil {
n = noise.Laplace()
}
// Check that the parameters are compatible with the noise chosen by calling
// the noise on some dummy value.
eps, del := opt.Epsilon, opt.Delta
n.AddNoiseInt64(0, l0, lInf, eps, del)
return &Count{
epsilon: eps,
delta: del,
l0Sensitivity: l0,
lInfSensitivity: lInf,
Noise: n,
noiseKind: noise.ToKind(n),
count: 0,
state: defaultState,
}
}
// Increment increments the count by one.
func (c *Count) Increment() {
c.IncrementBy(1)
}
// IncrementBy increments the count by the given value.
// Note that this shouldn't be used to count multiple contributions to a
// single partition from the same privacy unit.
func (c *Count) IncrementBy(count int64) {
if c.state != defaultState {
log.Fatalf("Count cannot be amended. Reason: %v", c.state.errorMessage())
}
c.count += count
}
// Merge merges c2 into c (i.e., adds to c all entries that were added to c2).
// c2 is consumed by this operation: it may not be used after it is merged
// into c.
func (c *Count) Merge(c2 *Count) {
if e := checkMergeCount(c, c2); e != nil {
log.Exit(e)
}
c.count += c2.count
c2.state = merged
}
func checkMergeCount(c1, c2 *Count) error {
if c1.state != defaultState {
return fmt.Errorf("checkMergeCount: c1 cannot be merged with another Count instance. Reason: %v", c1.state.errorMessage())
}
if c2.state != defaultState {
return fmt.Errorf("checkMergeCount: c2 cannot be merged with another Count instance. Reason: %v", c2.state.errorMessage())
}
if !countEquallyInitialized(c1, c2) {
return fmt.Errorf("checkMergeCount: c1 and c2 are not compatible")
}
return nil
}
// Result returns a differentially private estimate of the current count. The
// method can be called only once.
//
// The returned value is an unbiased estimate of the raw count.
//
// The returned value may sometimes be negative. This can be corrected by setting
// negative results to 0. Note that such post processing introduces bias to the
// result.
func (c *Count) Result() int64 {
if c.state != defaultState {
log.Fatalf("Count's noised result cannot be computed. Reason: " + c.state.errorMessage())
}
c.state = resultReturned
c.noisedCount = c.Noise.AddNoiseInt64(c.count, c.l0Sensitivity, c.lInfSensitivity, c.epsilon, c.delta)
return c.noisedCount
}
// ThresholdedResult is similar to Result() but applies thresholding to the result.
// So, if the result is less than the threshold specified by the parameters of Count
// as well as thresholdDelta, it returns nil. Otherwise, it returns the result.
//
// Note that the nil results should not be published when the existence of a
// partition in the output depends on private data.
func (c *Count) ThresholdedResult(thresholdDelta float64) *int64 {
threshold := c.Noise.Threshold(c.l0Sensitivity, float64(c.lInfSensitivity), c.epsilon, c.delta, thresholdDelta)
result := c.Result()
// Rounding up the threshold when converting it to int64 to ensure that no DP guarantees
// are violated due to a result being returned that is less than the fractional threshold.
if result < int64(math.Ceil(threshold)) {
return nil
}
return &result
}
// ComputeConfidenceInterval computes a confidence interval with integer bounds that
// contains the true count with a probability greater than or equal to 1 - alpha using
// the noised count computed by Result(). The computation is based exclusively on the
// noised count returned by Result(). Thus no privacy budget is consumed by this operation.
//
// Result() needs to be called before ComputeConfidenceInterval, otherwise this will return
// an error.
//
// See https://github.com/google/differential-privacy/tree/main/common_docs/confidence_intervals.md.
func (c *Count) ComputeConfidenceInterval(alpha float64) (noise.ConfidenceInterval, error) {
if c.state != resultReturned {
return noise.ConfidenceInterval{}, fmt.Errorf("Result() must be called before calling ComputeConfidenceInterval()")
}
confInt, err := c.Noise.ComputeConfidenceIntervalInt64(c.noisedCount, c.l0Sensitivity, c.lInfSensitivity, c.epsilon, c.delta, alpha)
if err != nil {
return noise.ConfidenceInterval{}, err
}
// True count cannot be negative.
confInt.LowerBound, confInt.UpperBound = math.Max(0, confInt.LowerBound), math.Max(0, confInt.UpperBound)
return confInt, nil
}
// encodableCount can be encoded by the gob package.
type encodableCount struct {
Epsilon float64
Delta float64
L0Sensitivity int64
LInfSensitivity int64
NoiseKind noise.Kind
Count int64
}
// GobEncode encodes Count.
func (c *Count) GobEncode() ([]byte, error) {
if c.state != defaultState {
return nil, fmt.Errorf("Count object cannot be serialized. Reason: " + c.state.errorMessage())
}
enc := encodableCount{
Epsilon: c.epsilon,
Delta: c.delta,
L0Sensitivity: c.l0Sensitivity,
LInfSensitivity: c.lInfSensitivity,
NoiseKind: noise.ToKind(c.Noise),
Count: c.count,
}
c.state = serialized
return encode(enc)
}
// GobDecode decodes Count.
func (c *Count) GobDecode(data []byte) error {
var enc encodableCount
err := decode(&enc, data)
if err != nil {
log.Fatalf("GobDecode: couldn't decode Count from bytes")
return err
}
*c = Count{
epsilon: enc.Epsilon,
delta: enc.Delta,
l0Sensitivity: enc.L0Sensitivity,
lInfSensitivity: enc.LInfSensitivity,
noiseKind: enc.NoiseKind,
Noise: noise.ToNoise(enc.NoiseKind),
count: enc.Count,
state: defaultState,
}
return nil
}