| // |
| // Copyright 2020 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| |
| package dpagg |
| |
| import ( |
| "fmt" |
| "math" |
| |
| log "github.com/golang/glog" |
| "github.com/google/differential-privacy/go/noise" |
| ) |
| |
| // Count calculates a differentially private count of a collection of values |
| // using the Laplace or Gaussian mechanism |
| // |
| // It supports privacy units that contribute to multiple partitions (via the |
| // MaxPartitionsContributed parameter) by scaling the added noise appropriately. |
| // However, it does not support multiple contributions to a single partition from |
| // the same privacy unit. For that use case, BoundedSumInt64 should be used instead. |
| // |
| // The provided differentially private count is an unbiased estimate of the raw |
| // count meaning that its expected value is equal to the raw count. |
| // |
| // For general details and key definitions, see |
| // https://github.com/google/differential-privacy/blob/main/differential_privacy.md#key-definitions. |
| // |
| // Note: Do not use when your results may cause overflows for int64 values. |
| // This aggregation is not hardened for such applications yet. |
| // |
| // Not thread-safe. |
| type Count struct { |
| // Parameters |
| epsilon float64 |
| delta float64 |
| l0Sensitivity int64 |
| lInfSensitivity int64 |
| Noise noise.Noise |
| noiseKind noise.Kind // necessary for serializing noise.Noise information |
| |
| // State variables |
| count int64 |
| state aggregationState |
| noisedCount int64 |
| } |
| |
| func countEquallyInitialized(c1, c2 *Count) bool { |
| return c1.epsilon == c2.epsilon && |
| c1.delta == c2.delta && |
| c1.l0Sensitivity == c2.l0Sensitivity && |
| c1.lInfSensitivity == c2.lInfSensitivity && |
| c1.noiseKind == c2.noiseKind && |
| c1.state == c2.state |
| } |
| |
| // CountOptions contains the options necessary to initialize a Count. |
| type CountOptions struct { |
| Epsilon float64 // Privacy parameter ε. Required. |
| Delta float64 // Privacy parameter δ. Required with Gaussian noise, must be 0 with Laplace noise. |
| MaxPartitionsContributed int64 // How many distinct partitions may a single privacy unit contribute to? Defaults to 1. |
| Noise noise.Noise // Type of noise used. Defaults to Laplace noise. |
| // How many times may a single privacy unit contribute to a single partition? |
| // Defaults to 1. This is only needed for other aggregation functions using Count; |
| // which is why the option is not exported. |
| maxContributionsPerPartition int64 |
| } |
| |
| // NewCount returns a new Count, initialized at 0. |
| func NewCount(opt *CountOptions) *Count { |
| if opt == nil { |
| opt = &CountOptions{} |
| } |
| // Set defaults. |
| l0 := opt.MaxPartitionsContributed |
| if l0 == 0 { |
| l0 = 1 |
| } |
| |
| lInf := opt.maxContributionsPerPartition |
| if lInf == 0 { |
| lInf = 1 |
| } |
| |
| n := opt.Noise |
| if n == nil { |
| n = noise.Laplace() |
| } |
| // Check that the parameters are compatible with the noise chosen by calling |
| // the noise on some dummy value. |
| eps, del := opt.Epsilon, opt.Delta |
| n.AddNoiseInt64(0, l0, lInf, eps, del) |
| |
| return &Count{ |
| epsilon: eps, |
| delta: del, |
| l0Sensitivity: l0, |
| lInfSensitivity: lInf, |
| Noise: n, |
| noiseKind: noise.ToKind(n), |
| count: 0, |
| state: defaultState, |
| } |
| } |
| |
| // Increment increments the count by one. |
| func (c *Count) Increment() { |
| c.IncrementBy(1) |
| } |
| |
| // IncrementBy increments the count by the given value. |
| // Note that this shouldn't be used to count multiple contributions to a |
| // single partition from the same privacy unit. |
| func (c *Count) IncrementBy(count int64) { |
| if c.state != defaultState { |
| log.Fatalf("Count cannot be amended. Reason: %v", c.state.errorMessage()) |
| } |
| c.count += count |
| } |
| |
| // Merge merges c2 into c (i.e., adds to c all entries that were added to c2). |
| // c2 is consumed by this operation: it may not be used after it is merged |
| // into c. |
| func (c *Count) Merge(c2 *Count) { |
| if e := checkMergeCount(c, c2); e != nil { |
| log.Exit(e) |
| } |
| c.count += c2.count |
| c2.state = merged |
| } |
| |
| func checkMergeCount(c1, c2 *Count) error { |
| if c1.state != defaultState { |
| return fmt.Errorf("checkMergeCount: c1 cannot be merged with another Count instance. Reason: %v", c1.state.errorMessage()) |
| } |
| if c2.state != defaultState { |
| return fmt.Errorf("checkMergeCount: c2 cannot be merged with another Count instance. Reason: %v", c2.state.errorMessage()) |
| } |
| |
| if !countEquallyInitialized(c1, c2) { |
| return fmt.Errorf("checkMergeCount: c1 and c2 are not compatible") |
| } |
| |
| return nil |
| } |
| |
| // Result returns a differentially private estimate of the current count. The |
| // method can be called only once. |
| // |
| // The returned value is an unbiased estimate of the raw count. |
| // |
| // The returned value may sometimes be negative. This can be corrected by setting |
| // negative results to 0. Note that such post processing introduces bias to the |
| // result. |
| func (c *Count) Result() int64 { |
| if c.state != defaultState { |
| log.Fatalf("Count's noised result cannot be computed. Reason: " + c.state.errorMessage()) |
| } |
| c.state = resultReturned |
| c.noisedCount = c.Noise.AddNoiseInt64(c.count, c.l0Sensitivity, c.lInfSensitivity, c.epsilon, c.delta) |
| return c.noisedCount |
| } |
| |
| // ThresholdedResult is similar to Result() but applies thresholding to the result. |
| // So, if the result is less than the threshold specified by the parameters of Count |
| // as well as thresholdDelta, it returns nil. Otherwise, it returns the result. |
| // |
| // Note that the nil results should not be published when the existence of a |
| // partition in the output depends on private data. |
| func (c *Count) ThresholdedResult(thresholdDelta float64) *int64 { |
| threshold := c.Noise.Threshold(c.l0Sensitivity, float64(c.lInfSensitivity), c.epsilon, c.delta, thresholdDelta) |
| result := c.Result() |
| // Rounding up the threshold when converting it to int64 to ensure that no DP guarantees |
| // are violated due to a result being returned that is less than the fractional threshold. |
| if result < int64(math.Ceil(threshold)) { |
| return nil |
| } |
| return &result |
| } |
| |
| // ComputeConfidenceInterval computes a confidence interval with integer bounds that |
| // contains the true count with a probability greater than or equal to 1 - alpha using |
| // the noised count computed by Result(). The computation is based exclusively on the |
| // noised count returned by Result(). Thus no privacy budget is consumed by this operation. |
| // |
| // Result() needs to be called before ComputeConfidenceInterval, otherwise this will return |
| // an error. |
| // |
| // See https://github.com/google/differential-privacy/tree/main/common_docs/confidence_intervals.md. |
| func (c *Count) ComputeConfidenceInterval(alpha float64) (noise.ConfidenceInterval, error) { |
| if c.state != resultReturned { |
| return noise.ConfidenceInterval{}, fmt.Errorf("Result() must be called before calling ComputeConfidenceInterval()") |
| } |
| confInt, err := c.Noise.ComputeConfidenceIntervalInt64(c.noisedCount, c.l0Sensitivity, c.lInfSensitivity, c.epsilon, c.delta, alpha) |
| if err != nil { |
| return noise.ConfidenceInterval{}, err |
| } |
| // True count cannot be negative. |
| confInt.LowerBound, confInt.UpperBound = math.Max(0, confInt.LowerBound), math.Max(0, confInt.UpperBound) |
| return confInt, nil |
| } |
| |
| // encodableCount can be encoded by the gob package. |
| type encodableCount struct { |
| Epsilon float64 |
| Delta float64 |
| L0Sensitivity int64 |
| LInfSensitivity int64 |
| NoiseKind noise.Kind |
| Count int64 |
| } |
| |
| // GobEncode encodes Count. |
| func (c *Count) GobEncode() ([]byte, error) { |
| if c.state != defaultState { |
| return nil, fmt.Errorf("Count object cannot be serialized. Reason: " + c.state.errorMessage()) |
| } |
| enc := encodableCount{ |
| Epsilon: c.epsilon, |
| Delta: c.delta, |
| L0Sensitivity: c.l0Sensitivity, |
| LInfSensitivity: c.lInfSensitivity, |
| NoiseKind: noise.ToKind(c.Noise), |
| Count: c.count, |
| } |
| c.state = serialized |
| return encode(enc) |
| } |
| |
| // GobDecode decodes Count. |
| func (c *Count) GobDecode(data []byte) error { |
| var enc encodableCount |
| err := decode(&enc, data) |
| if err != nil { |
| log.Fatalf("GobDecode: couldn't decode Count from bytes") |
| return err |
| } |
| *c = Count{ |
| epsilon: enc.Epsilon, |
| delta: enc.Delta, |
| l0Sensitivity: enc.L0Sensitivity, |
| lInfSensitivity: enc.LInfSensitivity, |
| noiseKind: enc.NoiseKind, |
| Noise: noise.ToNoise(enc.NoiseKind), |
| count: enc.Count, |
| state: defaultState, |
| } |
| return nil |
| } |