| // Copyright 2021 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Package privacy contains methods for computing privacy encoding parameters and error of the privacy encoding. |
| package privacy |
| |
| import ( |
| "fmt" |
| "math" |
| "math/cmplx" |
| |
| "gonum.org/v1/gonum/dsp/fourier" |
| ) |
| |
| // ProbabilityMassFunction represents a mass function of a discrete probability distribution |
| // TODO(https://fxbug.dev/296499681): Consider using only the shiftedDistribution type representation of ProbabilityMassFunction. |
| type ProbabilityMassFunction map[int]float64 |
| |
| // PrivacyLossDistribution represents a privacy loss distribution defined by probability mass function that is |
| // discretized to intervals defined by discretization parameter. |
| type PrivacyLossDistribution struct { |
| discretization float64 |
| infiniteMass float64 // probability of getting a value Inf in PMF of this PLD. |
| pmf ProbabilityMassFunction |
| // Estimation can be optimistic: then we may underestimate probabilities of some values in PLD, |
| // or pessimistic: then we always overestimate mass of the values in PLD. |
| // For more details see Section 2.1 in https://github.com/google/differential-privacy/blob/main/common_docs/Privacy_Loss_Distributions.pdf. |
| pessimisticEstimation bool |
| } |
| |
| type shiftedDistribution struct { |
| array []float64 |
| shift int |
| } |
| |
| func (pld *PrivacyLossDistribution) getPLDDiscretization() float64 { |
| return pld.discretization |
| } |
| |
| func (pld *PrivacyLossDistribution) getPLDInfiniteMass() float64 { |
| return pld.infiniteMass |
| } |
| |
| func (pld *PrivacyLossDistribution) getPLDPessimisticEstimate() bool { |
| return pld.pessimisticEstimation |
| } |
| |
| func (pld *PrivacyLossDistribution) getPLDPmf() ProbabilityMassFunction { |
| return pld.pmf |
| } |
| |
| // computeTruncationValueBounds for an input array and truncation mass computes indices of the |
| // arrays such that all values outside of these boundaries (with indices strictly smaller than |
| // minBound and strictly greater than maxBound) correspond to tail-values in a distribution with a |
| // mass that sum up to at most truncatedMass/2 value for each distribution tails. |
| // This truncation computation is used in a computation of convolution of a pair of distributions. |
| // Note, that this function can return minBound > maxBound (in that case, we need to truncate all |
| // elements). |
| func computeTruncationValueBounds(distrArray []float64, truncatedMass float64) (int, int) { |
| sum := 0.0 |
| var minBound int |
| for minBound = 0; minBound < len(distrArray); minBound++ { |
| if (sum + distrArray[minBound]) <= truncatedMass/2 { |
| sum += distrArray[minBound] |
| } else { |
| break |
| } |
| } |
| |
| sum = 0.0 |
| var maxBound int |
| for maxBound = len(distrArray) - 1; maxBound >= 0; maxBound-- { |
| if (sum + distrArray[maxBound]) <= truncatedMass/2 { |
| sum += distrArray[maxBound] |
| } else { |
| break |
| } |
| } |
| return minBound, maxBound |
| } |
| |
| // minMaxKeyValues for an input PMF finds the minimum and maximum values respectively in the support |
| // of the distribution. The function returns (0, 0) if the distribution is empty. |
| func minMaxKeyValues(pmf ProbabilityMassFunction) (int, int) { |
| if len(pmf) == 0 { |
| return 0, 0 |
| } |
| max := math.MinInt |
| min := math.MaxInt |
| for key := range pmf { |
| if key > max { |
| max = key |
| } |
| if key < min { |
| min = key |
| } |
| } |
| return min, max |
| } |
| |
| // rescaleFFTArray for an input array divides all values in the array by the length of the array. |
| // Required for post-processing of FFT computations, as forward FFT followed by reverse FFT returns |
| // values multiplied by the size of the distribution (# elements in it) |
| func rescaleFFTArray(array []float64) []float64 { |
| scale := float64(len(array)) |
| if scale == 0 || scale == 1 { |
| return array |
| } |
| result := make([]float64, len(array)) |
| for i, value := range array { |
| result[i] = value / scale |
| } |
| return result |
| } |
| |
| // convertMapToShiftedArray for an input PMF converts discretized key-value pairs to integer keys and shifts them so the minimal value has 0 index in the array. |
| // Array representation of the distribution is necessary for applying FFT. |
| // If we have a distribution with only positive values, then this shifting makes the array smaller, hence makes the computation faster |
| func convertMapToShiftedArray(pmf ProbabilityMassFunction, size int) (shiftedDistribution, error) { |
| if size < 0 || size > math.MaxInt { |
| return shiftedDistribution{nil, 0.0}, fmt.Errorf("Size parameter for array is either too large or too small, golang cannot create a slice of this size: %d, for distribution: %v", size, pmf) |
| } |
| array := make([]float64, size) |
| shift, _ := minMaxKeyValues(pmf) |
| result := shiftedDistribution{array, shift} |
| for key := range pmf { |
| index := key - shift |
| if index >= size || index < 0 { |
| return shiftedDistribution{nil, 0.0}, fmt.Errorf("Incorrect initial size estimation for converting distribution map to array, expected size at least %d, got index %d", size, index) |
| } |
| result.array[index] = pmf[key] |
| } |
| return result, nil |
| } |
| |
| // convolveHeterogeneousDistributions uses FFT to compute a convolution of two discrete distributions ((mu1 * mu2)(g) = \sum_{h in G} mu1(h)mu2(g-h)). |
| // Convolution computation is required for PLD computations: first we construct two PLDs, then, by the properties that PLD of a product is a convolution of PLDs |
| // compute a final PLD as a convolution of two PLDs |
| func convolveHeterogeneousDistributions(pmf1, pmf2 ProbabilityMassFunction, truncatedMass float64) (ProbabilityMassFunction, error) { |
| // Compute final array length, so it will contain enough space for all coefficients for target discretization |
| minKey1, maxKey1 := minMaxKeyValues(pmf1) |
| minKey2, maxKey2 := minMaxKeyValues(pmf2) |
| minArrayLength := maxKey1 + maxKey2 + 1 - minKey1 - minKey2 |
| // The running time of fft operations depends upon the length of the array and |
| // the size of the largest prime divisor of the length of the array. We use |
| // the next factor of 2 to improve performance. |
| // |
| // TODO(https://fxbug.dev/296499492): Find something better similar to scipy.fft.next_fast_len |
| finalArrayLength := int(math.Pow(2, math.Ceil(math.Log2(float64(minArrayLength))))) |
| |
| // For applying FFT we convert each map to an array, shifting all values to the smallest value. |
| shiftedDistr1, err := convertMapToShiftedArray(pmf1, finalArrayLength) |
| if err != nil { |
| return nil, err |
| } |
| shiftedDistr2, err := convertMapToShiftedArray(pmf2, finalArrayLength) |
| if err != nil { |
| return nil, err |
| } |
| |
| fft := fourier.NewFFT(int(finalArrayLength)) |
| coeff1 := fft.Coefficients(nil, shiftedDistr1.array) |
| coeff2 := fft.Coefficients(nil, shiftedDistr2.array) |
| |
| // Compute convolution using Convolution Theorem |
| for i := range coeff1 { |
| coeff1[i] = coeff1[i] * coeff2[i] |
| } |
| |
| inverseFft := fft.Sequence(nil, coeff1) |
| rescaledResultArray := rescaleFFTArray(inverseFft) |
| |
| // Compute distribution tails and then truncate corresponding values from the final distribution. |
| minNotTruncatedValue, maxNotTruncatedValue := computeTruncationValueBounds(rescaledResultArray, truncatedMass) |
| |
| // Convert shifted discretized array to the original map |
| resultPmf := make(ProbabilityMassFunction) |
| for i := minNotTruncatedValue; i <= maxNotTruncatedValue; i++ { |
| if rescaledResultArray[i] > 0 { |
| resultPmf[i+shiftedDistr1.shift+shiftedDistr2.shift] = rescaledResultArray[i] |
| } |
| } |
| return resultPmf, nil |
| } |
| |
| // computeTruncationValueBoundsForHomogeneousConvolution for an input array and truncation mass |
| // computes indices of the array that correspond to tail-values in a distribution we get after |
| // convolving distrArray numTimes times. |
| // Tail mass we truncate for any of two tails is at most truncatedMass value divided by 2. |
| // We use the bound (5) from the supplementary materials to compute these truncation bounds: |
| // https://github.com/google/differential-privacy/blob/main/common_docs/Privacy_Loss_Distributions.pdf |
| func computeTruncationValueBoundsForHomogeneousConvolution(distrArray []float64, numTimes uint64, truncatedMass float64) (int, int) { |
| arraySize := int(len(distrArray)) |
| upperBound := (arraySize - 1) * int(numTimes) |
| lowerBound := 0 |
| if arraySize == 0 { |
| return 0, 0 |
| } |
| if truncatedMass == 0 { |
| return lowerBound, upperBound |
| } |
| |
| // TODO(https://fxbug.dev/195348085): Explain the choice of orders. |
| for order := -20; order <= 20; order++ { |
| if order == 0 { |
| continue |
| } |
| rescaledOrder := float64(order) / float64(arraySize) |
| // to compute moment-generating function we compute corresponding moment (expectation of e^{o*t}) |
| momentValue := 0.0 |
| // TODO(https://fxbug.dev/296499687): Is it possible to use LogSumExp (like scipy.special.logsumexp in python)? |
| for value, prob := range distrArray { |
| momentValue += math.Exp(float64(value)*rescaledOrder) * prob |
| } |
| |
| // compute bound from (5) in the supplementary material |
| bound := (float64(numTimes)*math.Log(momentValue) + math.Log(2.0/truncatedMass)) / rescaledOrder |
| if order > 0 { |
| upperBound = int(math.Min(float64(upperBound), math.Ceil(bound))) |
| } else if order < 0 { |
| lowerBound = int(math.Max(float64(lowerBound), math.Floor(bound))) |
| } |
| } |
| return lowerBound, upperBound |
| } |
| |
| // convolveHomogeneousDistributions for PMF and degree uses FFT to compute a convolution of this PMF with itself degree-times. |
| // Used to compute a PLD of mu^k, as it is a k-degree convolution of PLD of mu. |
| func convolveHomogeneousDistributions(pmf ProbabilityMassFunction, numTimes uint64, truncatedMass float64) (ProbabilityMassFunction, error) { |
| if numTimes <= 0 { |
| return nil, fmt.Errorf("Incorrect numTimes for homogeneous convolution: should be greater or equal 1. Found %d", numTimes) |
| } |
| if numTimes == 1 { |
| return pmf, nil |
| } |
| |
| // For applying FFT we convert each map to an array, shifting all values to the smallest value. |
| // Temporary shiftedDistribution is used only to compute truncation bounds. |
| minKey, maxKey := minMaxKeyValues(pmf) |
| tempDistr, err := convertMapToShiftedArray(pmf, maxKey-minKey+1) |
| if err != nil { |
| return nil, err |
| } |
| |
| // K-times convolution can give us very large array, so, to avoid truncation in a postprocessing, |
| // we precompute truncation bounds and compute values of FFT for part of the array |
| minBound, maxBound := computeTruncationValueBoundsForHomogeneousConvolution(tempDistr.array, numTimes, truncatedMass) |
| if minBound > maxBound { |
| return nil, fmt.Errorf("Incorrect truncation bound for homogeneous convolution: minBound (%d) is greater than maxBound (%d)", minBound, maxBound) |
| } |
| |
| // Final array length defines how many Fourier coefficients we will compute. |
| // It should be large enough to store distrArray (the initial array). |
| minArrayLength := maxBound - minBound + 1 |
| // The running time of fft operations depends upon the length of the array and |
| // the size of the largest prime divisor of the length of the array. We use |
| // the next factor of 2 to improve performance. |
| // |
| // TODO(https://fxbug.dev/296499492): Find something better similar to scipy.fft.next_fast_len |
| finalArrayLength := int(math.Pow(2, math.Ceil(math.Log2(float64(minArrayLength))))) |
| |
| // For applying FFT we convert each map to an array, shifting all values to the smallest value. |
| shiftedDistr, err := convertMapToShiftedArray(pmf, finalArrayLength) |
| if err != nil { |
| return nil, err |
| } |
| |
| fft := fourier.NewFFT(len(shiftedDistr.array)) |
| coeff := fft.Coefficients(nil, shiftedDistr.array) |
| |
| for i := range coeff { |
| coeff[i] = cmplx.Pow(coeff[i], complex(float64(numTimes), 0)) |
| } |
| |
| inverseFft := fft.Sequence(nil, coeff) |
| // To normalize values after forward and inverse FFT we rescale all values by the length of the array |
| rescaledResultArray := rescaleFFTArray(inverseFft) |
| |
| // For computing FFT we transformed map to shifted discretized array, now, to return a map, we reverse the transformation. |
| resultPmf := make(ProbabilityMassFunction) |
| for i := minBound; i <= maxBound; i++ { |
| modI := i % finalArrayLength |
| if rescaledResultArray[modI] > 0 { |
| resultPmf[i+int(numTimes)*shiftedDistr.shift] = rescaledResultArray[modI] |
| } |
| } |
| return resultPmf, nil |
| } |
| |
| // generatePLD for two PMFs generates a new privacy loss distribution (PLD) of two distributions |
| // Computed using a formula PLD_{D1/D2}(y) = \sum_{o: y = ln{D1(o)/D2(o)}} D1(o). |
| // For more details see Section 2 in https://github.com/google/differential-privacy/blob/main/common_docs/Privacy_Loss_Distributions.pdf. |
| // We refer to upperPmf to be distribution in the numerator, and lowerPmf is distribution in the denominator. |
| func generatePLD(upperPmf, lowerPmf ProbabilityMassFunction, logTruncatedMass, discretization float64, pessimistic bool) *PrivacyLossDistribution { |
| var infMass float64 = 0.0 |
| // For all outputs such that probability under upperPmf is non-zero, but it is zero for the lower distribution, |
| // then log(upper_dist(O)/lower_distr(O))=inf. |
| // Hence we need to add mass of upper_dist(O) to the infinite mass. |
| for value, prob := range upperPmf { |
| if prob > 0 && lowerPmf[value] == 0 { |
| infMass += upperPmf[value] |
| } |
| } |
| |
| roundedPmf := make(ProbabilityMassFunction) |
| // Compute probability of non-infinite values in PLD. |
| for value := range lowerPmf { |
| if lowerPmf[value] != 0 && upperPmf[value] != 0 { |
| // Values in the support of PLD are of the form ln{upperPmf(o)/lowerPmf(o)} for output o in support of upperPmf. |
| logLowerMass := math.Log(lowerPmf[value]) |
| logUpperMass := math.Log(upperPmf[value]) |
| // Check that this value is large enough, so we should care about it in current estimation. |
| // If it is small, we add it to infinite mass under pessimistic estimation. |
| if logUpperMass > logTruncatedMass { |
| scaledPrivacyLoss := (logUpperMass - logLowerMass) / float64(discretization) |
| // Discretizing floating point privacy loss values to integers. |
| if pessimistic { |
| roundedPmf[int(math.Ceil(scaledPrivacyLoss))] += upperPmf[value] |
| } else { |
| roundedPmf[int(math.Floor(scaledPrivacyLoss))] += upperPmf[value] |
| } |
| } else if pessimistic { |
| // For pessimistic estimate we want to account for any truncated mass. Here we account this mass as an infinite mass. |
| // by increasing an infinity mass by probability of the truncated outcome. |
| infMass += upperPmf[value] |
| } |
| } |
| } |
| |
| pld := &PrivacyLossDistribution{ |
| discretization: discretization, |
| infiniteMass: infMass, |
| pmf: roundedPmf, |
| pessimisticEstimation: pessimistic} |
| |
| return pld |
| } |
| |
| // composeHomogeneousPLD computes PLD of a cross product of the same distributions based on PLD of the distributions |
| func composeHomogeneousPLD(pld *PrivacyLossDistribution, numTimes uint64, truncatedMass float64) (*PrivacyLossDistribution, error) { |
| var pmf ProbabilityMassFunction |
| var err error |
| |
| // Below is a numerically stable version of 1 - math.Pow(1-pld.infiniteMass, float64(numTimes)) |
| newInfiniteMass := -math.Expm1(float64(numTimes) * math.Log1p(-pld.infiniteMass)) |
| |
| // Currently truncation supported only for pessimistic bound computation |
| if pld.pessimisticEstimation { |
| pmf, err = convolveHomogeneousDistributions(pld.pmf, numTimes, truncatedMass) |
| newInfiniteMass += truncatedMass |
| } else { |
| if truncatedMass != 0 { |
| return nil, fmt.Errorf("Currently truncation for optimistic estimation is not supported") |
| } |
| pmf, err = convolveHomogeneousDistributions(pld.pmf, numTimes, 0.0) |
| } |
| |
| if err != nil { |
| return nil, err |
| } |
| |
| resultPld := &PrivacyLossDistribution{ |
| discretization: pld.discretization, |
| infiniteMass: newInfiniteMass, |
| pmf: pmf, |
| pessimisticEstimation: pld.pessimisticEstimation} |
| |
| return resultPld, nil |
| } |
| |
| // composeHeterogeneousPLD compute PLD of a cross product of two distinct distributions based on PLDs of each of the distributions |
| func composeHeterogeneousPLD(pld1, pld2 *PrivacyLossDistribution, truncatedMass float64) (*PrivacyLossDistribution, error) { |
| var pmf ProbabilityMassFunction |
| var err error |
| |
| if pld1.discretization != pld2.discretization { |
| return nil, fmt.Errorf("Composing PLDs with distinct discretization is not supported") |
| } |
| |
| if pld1.pessimisticEstimation != pld2.pessimisticEstimation { |
| return nil, fmt.Errorf("Composing PLDs with mixed estimation types (one is optimistic, other is pessimistic) is not supported") |
| } |
| |
| // to compute a new infinite mass use a formula of a probability of two non-independent events |
| newInfiniteMass := pld1.infiniteMass + pld2.infiniteMass - pld1.infiniteMass*pld2.infiniteMass |
| if pld1.pessimisticEstimation { |
| // Truncated mass gets added to infiniteMass. |
| // TODO(https://fxbug.dev/296500421): Only add the mass that is actually truncated instead of `truncatedMass`. |
| newInfiniteMass += truncatedMass |
| } |
| |
| pmf, err = convolveHeterogeneousDistributions(pld1.pmf, pld2.pmf, truncatedMass) |
| |
| if err != nil { |
| return nil, err |
| } |
| |
| resultPld := &PrivacyLossDistribution{ |
| discretization: pld1.discretization, |
| infiniteMass: newInfiniteMass, |
| pmf: pmf, |
| pessimisticEstimation: pld1.pessimisticEstimation} |
| return resultPld, nil |
| } |
| |
| // getDeltaForPrivacyLossDistribution computes a lower bound, such that all deltas greater than |
| // this bound guarantee the corresponding algorithm satisfies (epsilon,delta)-DP. |
| // These computations are based on Observation 2 in supplementary material |
| // https://github.com/google/differential-privacy/blob/main/common_docs/Privacy_Loss_Distributions.pdf |
| func (pld *PrivacyLossDistribution) getDeltaForPrivacyLossDistribution(epsilon float64) float64 { |
| divergence := pld.infiniteMass |
| for discPrivacyLoss, prob := range pld.pmf { |
| privacyLoss := float64(discPrivacyLoss) * pld.discretization |
| // By Observation 2 it suffices to consider only positive values of (1-e^{epsilon-privacyLoss}) |
| if privacyLoss > epsilon && prob > 0 { |
| // Below is a numerically stable version of (1 - math.Exp(epsilon-privacyLoss)) * prob |
| divergence += -math.Expm1(epsilon-privacyLoss) * prob |
| } |
| } |
| return divergence |
| } |