blob: a5e2a21e651d659f8aeeb81298e6a3c143768881 [file] [log] [blame]
//
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package noise
import (
"math"
"github.com/google/differential-privacy/go/v2/checks"
"github.com/google/differential-privacy/go/v2/rand"
"gonum.org/v1/gonum/stat/distuv"
)
var (
// The square root of the maximum number n of Bernoulli trials from which a binomial
// sample is drawn. Larger values result in more fine-grained noise, but increase the
// chance of sampling inaccuracies due to overflows. The probability of such an event
// will be roughly 2⁻⁴⁵ or less, if the square root is set to 2⁵⁷.
binomialBound float64 = math.Exp2(57.0)
// The absolute bound of the two-sided geometric samples k that are used for creating
// a binomial sample is m + n / 2. For performance reasons, m is not composed of n
// Bernoulli trials. Instead, m is obtained via a rejection sampling technique, which sets
// m = (k + l) * (sqrt(2 * n) + 1),
// where l is a uniform random sample between 0 and 1. Bounding k is therefore necessary
// to prevent m from overflowing.
//
// The probability of a single sample k being bounded is 2⁻⁴⁵.
geometricBound int64 = (math.MaxInt64 / int64(math.Round(math.Sqrt2*binomialBound+1.0))) - 1
// gaussianSigmaAccuracy approximates the accuracy up to which the smallest sigma that
// satisfies the given DP parameters.
gaussianSigmaAccuracy = 1e-3
)
type gaussian struct{}
// Gaussian returns a Noise instance that adds Gaussian noise to its input.
//
// The Gaussian noise is based on a binomial sampling mechanism that is robust against
// unintentional privacy leaks due to artifacts of floating-point arithmetic. See
// https://github.com/google/differential-privacy/blob/main/common_docs/Secure_Noise_Generation.pdf
// for more information.
func Gaussian() Noise {
return gaussian{}
}
// AddNoiseFloat64 adds Gaussian noise to the specified float64, so that its
// output is (ε,δ)-differentially private.
func (gaussian) AddNoiseFloat64(x float64, l0Sensitivity int64, lInfSensitivity, epsilon, delta float64) (float64, error) {
if err := checkArgsGaussian(l0Sensitivity, lInfSensitivity, epsilon, delta); err != nil {
return 0, err
}
sigma := SigmaForGaussian(l0Sensitivity, lInfSensitivity, epsilon, delta)
return addGaussianFloat64(x, sigma), nil
}
// AddNoiseInt64 adds Gaussian noise to the specified int64, so that the
// output is (ε,δ)-differentially private.
func (gaussian) AddNoiseInt64(x, l0Sensitivity, lInfSensitivity int64, epsilon, delta float64) (int64, error) {
if err := checkArgsGaussian(l0Sensitivity, float64(lInfSensitivity), epsilon, delta); err != nil {
return 0, err
}
sigma := SigmaForGaussian(l0Sensitivity, float64(lInfSensitivity), epsilon, delta)
return addGaussianInt64(x, sigma), nil
}
// Threshold returns the smallest threshold k to use in a differentially private
// histogram with added Gaussian noise.
//
// See https://github.com/google/differential-privacy/blob/main/common_docs/Delta_For_Thresholding.pdf for details on the math underlying this.
func (gaussian) Threshold(l0Sensitivity int64, lInfSensitivity, epsilon, noiseDelta, thresholdDelta float64) (float64, error) {
if err := checkArgsGaussian(l0Sensitivity, lInfSensitivity, epsilon, noiseDelta); err != nil {
return 0, err
}
if err := checks.CheckThresholdDelta(thresholdDelta, noiseDelta); err != nil {
return 0, err
}
sigma := SigmaForGaussian(l0Sensitivity, lInfSensitivity, epsilon, noiseDelta)
noiseDist := distuv.Normal{Mu: 0, Sigma: sigma}
return lInfSensitivity + noiseDist.Quantile(math.Pow(1-thresholdDelta, 1.0/float64(l0Sensitivity))), nil
}
// DeltaForThreshold is the inverse operation of Threshold. Specifically, given
// the parameters and a threshold, it returns the delta induced by thresholding.
//
// See https://github.com/google/differential-privacy/blob/main/common_docs/Delta_For_Thresholding.pdf for details on the math underlying this.
func (gaussian) DeltaForThreshold(l0Sensitivity int64, lInfSensitivity, epsilon, delta, threshold float64) (float64, error) {
if err := checkArgsGaussian(l0Sensitivity, lInfSensitivity, epsilon, delta); err != nil {
return 0, err
}
sigma := SigmaForGaussian(l0Sensitivity, lInfSensitivity, epsilon, delta)
noiseDist := distuv.Normal{Mu: 0, Sigma: sigma}
return 1 - math.Pow(noiseDist.CDF(threshold-lInfSensitivity), float64(l0Sensitivity)), nil
}
// ComputeConfidenceIntervalInt64 computes a confidence interval that contains the raw integer value x from which int64 noisedX
// is computed with a probability greater or equal to 1 - alpha based on the specified gaussian noise parameters.
//
// See https://github.com/google/differential-privacy/tree/main/common_docs/confidence_intervals.md.
func (gaussian) ComputeConfidenceIntervalInt64(noisedX, l0Sensitivity, lInfSensitivity int64, epsilon, delta, alpha float64) (ConfidenceInterval, error) {
err := checkArgsConfidenceIntervalGaussian(l0Sensitivity, float64(lInfSensitivity), epsilon, delta, alpha)
if err != nil {
return ConfidenceInterval{}, err
}
sigma := SigmaForGaussian(l0Sensitivity, float64(lInfSensitivity), epsilon, delta)
// Computing the confidence interval around zero rather than nosiedX helps represent the
// interval bounds more accurately. The reason is that the resolution of float64 values is most
// fine grained around zero.
confIntAroundZero := computeConfidenceIntervalGaussian(0, sigma, alpha).roundToInt64()
// Adding noisedX after converting the interval bounds to int64 ensures that no precision is lost
// due to the coarse resolution of float64 values for large instances of noisedX.
lowerBound := nextSmallerFloat64(int64(confIntAroundZero.LowerBound) + noisedX)
upperBound := nextLargerFloat64(int64(confIntAroundZero.UpperBound) + noisedX)
return ConfidenceInterval{LowerBound: lowerBound, UpperBound: upperBound}, nil
}
// ComputeConfidenceIntervalFloat64 computes a confidence interval that contains the raw value x from which float64
// noisedX is computed with a probability equal to 1 - alpha based on the specified gaussian noise parameters.
//
// See https://github.com/google/differential-privacy/tree/main/common_docs/confidence_intervals.md.
func (gaussian) ComputeConfidenceIntervalFloat64(noisedX float64, l0Sensitivity int64, lInfSensitivity, epsilon, delta, alpha float64) (ConfidenceInterval, error) {
err := checkArgsConfidenceIntervalGaussian(l0Sensitivity, lInfSensitivity, epsilon, delta, alpha)
if err != nil {
return ConfidenceInterval{}, err
}
sigma := SigmaForGaussian(l0Sensitivity, lInfSensitivity, epsilon, delta)
return computeConfidenceIntervalGaussian(noisedX, sigma, alpha), nil
}
func checkArgsGaussian(l0Sensitivity int64, lInfSensitivity, epsilon, delta float64) error {
if err := checks.CheckL0Sensitivity(l0Sensitivity); err != nil {
return err
}
if err := checks.CheckLInfSensitivity(lInfSensitivity); err != nil {
return err
}
if err := checks.CheckEpsilon(epsilon); err != nil {
return err
}
return checks.CheckDeltaStrict(delta)
}
func (gaussian) String() string {
return "Gaussian Noise"
}
// checkArgsConfidenceIntervalGaussian checks the parameters for gaussian confidence interval, as well as the provided confidence level.
func checkArgsConfidenceIntervalGaussian(l0Sensitivity int64, lInfSensitivity, epsilon, delta, alpha float64) error {
if err := checks.CheckAlpha(alpha); err != nil {
return err
}
return checkArgsGaussian(l0Sensitivity, lInfSensitivity, epsilon, delta)
}
// addGaussianFloat64 adds Gaussian noise of scale σ to the specified float64.
func addGaussianFloat64(x, sigma float64) float64 {
granularity := ceilPowerOfTwo(2.0 * sigma / binomialBound)
// sqrtN is chosen in a way that places it in the interval between binomialBound
// and binomialBound / 2. This ensures that the respective binomial distribution
// consists of enough Bernoulli samples to closely approximate a Gaussian distribution.
sqrtN := 2.0 * sigma / granularity
sample := symmetricBinomial(sqrtN)
return roundToMultipleOfPowerOfTwo(x, granularity) + float64(sample)*granularity
}
// addGaussianInt64 adds Gaussian noise of scale σ to the specified int64.
func addGaussianInt64(x int64, sigma float64) int64 {
granularity := ceilPowerOfTwo(2.0 * sigma / binomialBound)
// sqrtN is chosen in a way that places it in the interval between binomialBound
// and binomialBound / 2. This ensures that the respective binomial distribution
// consists of enough Bernoulli samples to closely approximate a Gaussian distribution.
sqrtN := 2.0 * sigma / granularity
sample := symmetricBinomial(sqrtN)
if granularity < 1 {
return x + int64(math.Round(float64(sample)*granularity))
}
return roundToMultiple(x, int64(granularity)) + sample*int64(granularity)
}
// computeConfidenceIntervalGaussian computes a confidence interval that contains the raw value x from which
// float64 noisedX is computed with a probability equal to 1 - alpha with the given sigma.
func computeConfidenceIntervalGaussian(noisedX, sigma, alpha float64) ConfidenceInterval {
z := inverseCDFGaussian(sigma, alpha/2)
// Because of the symmetry of the Gaussian distribution,
// -z corresponds to the (1 - alpha/2)-quantile of the distribution,
// meaning that the interval [z, -z] contains 1-alpha of the probability mass.
// Deriving the (1 - alpha/2)-quantile from the (alpha/2)-quantile and not vice versa is a
// deliberate choice. The reason is that alpha tends to be very small.
// Consequently, alpha/2 is more accurately representable as a float64 than 1 - alpha/2,
// facilitating numerical computations.
return ConfidenceInterval{LowerBound: noisedX + z, UpperBound: noisedX - z}
}
// inverseCDFGaussian computes the quantile z satisfying Pr[Y <= z] = p for a random variable Y that is Gaussian
// distributed with the specified sigma and mean 0.
func inverseCDFGaussian(sigma, p float64) float64 {
return -sigma * math.Sqrt(2) * math.Erfcinv(2*p)
}
// symmetricBinomial returns a random sample m where the term m + n / 2 is drawn from
// a binomial distribution of n Bernoulli trials that have a success probability of
// 0.5 each. The sampling technique is based on Bringmann et al.'s rejection sampling
// approach proposed in "Internal DLA: Efficient Simulation of a Physical Growth Model"
// (https://people.mpi-inf.mpg.de/~kbringma/paper/2014ICALP.pdf).
func symmetricBinomial(sqrtN float64) int64 {
stepSize := int64(math.Round(math.Sqrt2*sqrtN + 1.0))
var result int64
i := 0
for true {
// 1 is subtracted from the geometric sample to count the number of Bernoulli fails
// rather than the number of trials until the first success.
boundedGeometricSample := int64(math.Min(rand.Geometric()-1.0, float64(geometricBound)))
twoSidedGeometricSample := boundedGeometricSample
if rand.Boolean() {
twoSidedGeometricSample = -twoSidedGeometricSample - 1
}
result = stepSize*twoSidedGeometricSample + rand.I63n(stepSize)
resultProbability := binomialProbability(sqrtN, result)
rejectProbability := rand.Uniform()
if resultProbability > 0.0 &&
rejectProbability < resultProbability*float64(stepSize)*math.Pow(2.0, float64(boundedGeometricSample))/4.0 {
break
}
i++
}
return result
}
// Approximates the probability of a random sample m + n / 2 drawn from a binomial
// distribution of n Bernoulli trials that have a success probability of 1 / 2 each.
// The approximation is based on Lemma 7 of
// https://github.com/google/differential-privacy/blob/main/common_docs/Secure_Noise_Generation.pdf
func binomialProbability(sqrtN float64, m int64) float64 {
if math.Abs(float64(m)) > sqrtN*math.Sqrt(math.Log(sqrtN)/2.0) {
return 0.0
}
return (math.Sqrt(2.0/math.Pi) / sqrtN) *
math.Exp((-2.0*float64(m)*float64(m))/(sqrtN*sqrtN)) *
(1 - 0.4*math.Pow(2.0, 1.5)*math.Pow(math.Log(sqrtN), 1.5)/sqrtN)
}
// deltaForGaussian computes the smallest δ such that the Gaussian mechanism
// with fixed standard deviation σ is (ε,δ)-differentially private. The
// calculation is based on Theorem 8 of Balle and Wang's "Improving the Gaussian
// Mechanism for Differential Privacy: Analytical Calibration and Optimal
// Denoising" (https://arxiv.org/abs/1805.06530v2).
func deltaForGaussian(sigma float64, l0Sensitivity int64, lInfSensitivity, epsilon float64) float64 {
l2Sensitivity := lInfSensitivity * math.Sqrt(float64(l0Sensitivity))
// Defining
// Φ – Standard Gaussian distribution (mean: 0, variance: 1) CDF function
// s – L2 sensitivity
// δ(σ,s,ε) – The level of (ε,δ)-approximate differential privacy achieved
// by the Gaussian mechanism applied with standard deviation σ
// to data with L2 sensitivity s with fixed ε.
// The tight choice of δ (see https://arxiv.org/abs/1805.06530v2, Theorem 8) is:
// δ(σ,s,ε) := Φ(s/(2σ) - εσ/s) - exp(ε)Φ(-s/(2σ) - εσ/s)
// To simplify the calculation of this formula and to simplify reasoning about
// overflow and underflow, we pull out terms a := s/(2σ), b := εσ/s, c := exp(ε)
// so that δ(σ,s,ε) = Φ(a - b) - cΦ(-a - b)
a := l2Sensitivity / (2 * sigma)
b := epsilon * sigma / l2Sensitivity
c := math.Exp(epsilon)
if math.IsInf(c, +1) {
// δ(σ,s,ε) –> 0 as ε –> ∞, so return 0.
return 0
}
if math.IsInf(b, +1) {
// δ(σ,s,ε) –> 0 as the L2 sensitivity –> 0, so return 0.
return 0
}
return distuv.UnitNormal.CDF(a-b) - c*distuv.UnitNormal.CDF(-a-b)
}
// SigmaForGaussian calculates the standard deviation σ of Gaussian noise
// needed to achieve (ε,δ)-approximate differential privacy.
//
// SigmaForGaussian uses binary search. The result will deviate from the exact value
// σ_tight by at most gaussianSigmaAccuracy*σ_tight.
//
// Runtime: O(log(max(σ_tight/l2Sensitivity, l2Sensitivity/σ_tight)) +
// log(gaussianSigmaAccuracy)).
// where l2Sensitivity := lInfSensitivity * math.Sqrt(l0Sensitivity)
//
// TODO: Memorize the results of this function to avoid recomputing it
func SigmaForGaussian(l0Sensitivity int64, lInfSensitivity, epsilon, delta float64) float64 {
if delta >= 1 {
return 0
}
// We use l2Sensitivity as a starting guess for the upper bound since the
// required noise grows linearly with sensitivity.
l2Sensitivity := lInfSensitivity * math.Sqrt(float64(l0Sensitivity))
upperBound := l2Sensitivity
var lowerBound float64
// Increase upperBound until it is actually an upper bound of σ_tight.
//
// DeltaForGaussian(sigma, l2Sensitivity, epsilon) is a decreasing function with
// respect to sigma. This loop terminates in
// O(log(σ_tight/l2Sensitivity)) if σ_tight > l2Sensitivity
// O(1) otherwise.
// In the case where σ_tight > l2Sensitivity, when the loop exits, the
// following things are true:
// (1) upperBound - lowerBound <= σ_tight
// (2) lowerBound >= 0.5*σ_tight.
for deltaForGaussian(upperBound, l0Sensitivity, lInfSensitivity, epsilon) > delta {
lowerBound = upperBound
upperBound = upperBound * 2
}
// Loop runtime:
// O(log(1/σ_tight) + log(1/gaussianSigmaAccuracy)) if σ_tight < l2Sensitivity
// O(log(1/gaussianSigmaAccuracy)) otherwise.
//
// Proof. First, suppose σ_tight > l2Sensitivity. The prior for-loop guarantees that
// (1) upperBound - lowerBound <= σ_tight
// (2) lowerBound >= 0.5*σ_tight
// at the start of this loop. Using (1), binary search takes
// O(log(1/gaussianSigmaAccuracy)) iterations to bound the solution within an interval
// of width 0.5*σ_tight*gaussianSigmaAccuracy. Since (2) holds over all iterations of
// this loop, that is sufficient iterations to meet the loop's exit criterion.
//
// Now suppose σ_tight <= l2Sensitivity. It takes
// O(log(l2Sensitivity/σ_tight)) iterations to calculate a middle that is less
// than σ_tight. At that iteration, lowerBound is updated to be at least
// 0.5*σ_tight. After this first update to lowerBound, we use the argument of
// the preceding paragraph (noting that (1) and (2) now both hold) to see that
// it takes an additional O(log(l2Sensitivity/gaussianSigmaAccuracy)) iterations to
// find a sufficiently accurate estimate of σ_tight to exit the loop.
for upperBound-lowerBound > gaussianSigmaAccuracy*lowerBound {
middle := lowerBound*0.5 + upperBound*0.5
if deltaForGaussian(middle, l0Sensitivity, lInfSensitivity, epsilon) > delta {
lowerBound = middle
} else {
upperBound = middle
}
}
return upperBound
}