blob: e34b5264b8724e6ad4cb1a35ec0b806cbf8689c7 [file] [log] [blame]
# Copyright 2020 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementing Privacy Loss of Mechanisms.
This file implements privacy loss of several additive noise mechanisms,
including Gaussian Mechanism, Laplace Mechanism and Discrete Laplace Mechanism.
Please refer to the supplementary material below for more details:
../docs/Privacy_Loss_Distributions.pdf
"""
import abc
import math
import typing
from typing import Iterable, Optional, Union
import dataclasses
import numpy as np
from scipy import stats
from dp_accounting import common
@dataclasses.dataclass
class TailPrivacyLossDistribution(object):
"""Representation of the tail of privacy loss distribution.
Attributes:
lower_x_truncation: the minimum value of x that should be considered after
the tail is discarded.
upper_x_truncation: the maximum value of x that should be considered after
the tail is discarded.
tail_probability_mass_function: the probability mass of the privacy loss
distribution that has to be added due to the discarded tail; each key is a
privacy loss value and the corresponding value is the probability mass
that the value occurs.
"""
lower_x_truncation: float
upper_x_truncation: float
tail_probability_mass_function: typing.Mapping[float, float]
class AdditiveNoisePrivacyLoss(metaclass=abc.ABCMeta):
"""Superclass for privacy loss of additive noise mechanisms.
An additive noise mechanism for computing a scalar-valued function f is a
mechanism that outputs the sum of the true value of the function and a noise
drawn from a certain distribution mu. This class allows one to compute several
quantities related to the privacy loss of additive noise mechanisms.
We assume that the noise mu is such that the algorithm is more private as the
sensitivity of f decreases. (Recall that the sensitivity of f is the maximum
absolute change in f when an input to a single user changes.) Under this
assumption, the privacy loss distribution of the mechanism is exactly
generated as follows: pick x from mu and let the privacy loss be
ln(P(x) / P(x - sensitivity)). Note that when mu is discrete, P(x) and
P(x - sensitivity) are the probability masses of mu at x and x - sensitivity
respectively. When mu is continuous, P(x) and P(x - sensitivity) are the
probability densities of mu at x and x - sensitivity respectively.
This class also assumes the privacy loss is non-increasing as x increases.
Attributes:
sensitivity: the sensitivity of function f. (i.e. the maximum absolute
change in f when an input to a single user changes.)
discrete_noise: a value indicating whether the noise is discrete. If this
is True, then it is assumed that the noise can only take integer values.
If False, then it is assumed that the noise is continuous, i.e., the
probability mass at any given point is zero.
"""
def __init__(self, sensitivity, discrete_noise):
if sensitivity <= 0:
raise ValueError(
f'Sensitivity is not a positive real number: {sensitivity}')
self.sensitivity = sensitivity
self.discrete_noise = discrete_noise
def get_delta_for_epsilon(self, epsilon):
"""Computes the epsilon-hockey stick divergence of the mechanism.
The epsilon-hockey stick divergence of the mechanism is the value of delta
for which the mechanism is (epsilon, delta)-differentially private. (See
Observation 1 in the supplementary material.)
This function assumes the privacy loss is non-increasing as x increases.
Under this assumption, the hockey stick divergence is simply
CDF(inverse_privacy_loss(epsilon)) - exp(epsilon) *
CDF(inverse_privacy_loss(epsilon) - sensitivity), because the privacy loss
at a point x is at least epsilon iff x <= inverse_privacy_loss(epsilon).
Args:
epsilon: the epsilon in epsilon-hockey stick divergence.
Returns:
A non-negative real number which is the epsilon-hockey stick divergence
of the mechanism.
"""
x_cutoff = self.inverse_privacy_loss(epsilon)
return self.noise_cdf(x_cutoff) - math.exp(epsilon) * self.noise_cdf(
x_cutoff - self.sensitivity)
@abc.abstractmethod
def privacy_loss_tail(self) -> TailPrivacyLossDistribution:
"""Computes the privacy loss at the tail of the distribution.
Returns:
A TailPrivacyLossDistribution instance representing the tail of the
privacy loss distribution.
Raises:
NotImplementedError: If not implemented by the subclass.
"""
raise NotImplementedError
@abc.abstractmethod
def privacy_loss(self, x: float) -> float:
"""Computes the privacy loss at a given point.
Args:
x: the point at which the privacy loss is computed.
Returns:
The privacy loss at point x, which is equal to
ln(P(x) / P(x - sensitivity)).
Raises:
NotImplementedError: If not implemented by the subclass.
"""
raise NotImplementedError
@abc.abstractmethod
def inverse_privacy_loss(self, privacy_loss: float) -> float:
"""Computes the inverse of a given privacy loss.
Args:
privacy_loss: the privacy loss value.
Returns:
The largest float x such that the privacy loss at x is at least
privacy_loss.
Raises:
NotImplementedError: If not implemented by the subclass.
"""
raise NotImplementedError
@abc.abstractmethod
def noise_cdf(self, x: Union[float,
Iterable[float]]) -> Union[float, np.ndarray]:
"""Computes the cumulative density function of the noise distribution.
Args:
x: the point or points at which the cumulative density function is to be
calculated.
Returns:
The cumulative density function of that noise at x, i.e., the probability
that mu is less than or equal to x.
Raises:
NotImplementedError: If not implemented by the subclass.
"""
raise NotImplementedError
@classmethod
@abc.abstractmethod
def from_privacy_guarantee(
cls,
privacy_parameters: common.DifferentialPrivacyParameters,
sensitivity: float = 1,
pessimistic_estimate: bool = True
) -> 'AdditiveNoisePrivacyLoss':
"""Creates the privacy loss for the mechanism with a given privacy.
Args:
privacy_parameters: the desired privacy guarantee of the mechanism.
sensitivity: the sensitivity of function f. (i.e. the maximum absolute
change in f when an input to a single user changes.)
pessimistic_estimate: a value indicating whether the rounding is done in
such a way that the resulting epsilon-hockey stick divergence
computation gives an upper estimate to the real value.
Returns:
The privacy loss of the mechanism with the given privacy guarantee.
Raises:
NotImplementedError: If not implemented by the subclass.
"""
raise NotImplementedError
class LaplacePrivacyLoss(AdditiveNoisePrivacyLoss):
"""Privacy loss of the Laplace mechanism.
The Laplace mechanism for computing a scalar-valued function f simply outputs
the sum of the true value of the function and a noise drawn from the Laplace
distribution. Recall that the Laplace distribution with parameter b has
probability density function 0.5/b * exp(-|x|/b) at x for any real number x.
The privacy loss distribution of the Laplace mechanism is equivalent to the
privacy loss distribution between the Laplace distribution and the same
distribution but shifted by the sensitivity of f. Specifically, the privacy
loss distribution of the Laplace mechanism is generated as follows: first pick
x according to the Laplace noise. Then, let the privacy loss be
ln(PDF(x) / PDF(x - sensitivity)) which is equal to
(|x - sensitivity| - |x|) / parameter.
"""
def __init__(self,
parameter: float,
sensitivity: float = 1) -> None:
"""Initializes the privacy loss of the Laplace mechanism.
Args:
parameter: the parameter of the Laplace distribution.
sensitivity: the sensitivity of function f. (i.e. the maximum absolute
change in f when an input to a single user changes.)
"""
if parameter <= 0:
raise ValueError(f'Parameter is not a positive real number: {parameter}')
self._parameter = parameter
self._laplace_random_variable = stats.laplace(scale=parameter)
super(LaplacePrivacyLoss, self).__init__(sensitivity, False)
def privacy_loss_tail(self) -> TailPrivacyLossDistribution:
"""Computes the privacy loss at the tail of the Laplace distribution.
When x <= 0, the privacy loss is simply sensitivity / parameter; this
happens with probability 0.5. When x >= sensitivity, the privacy loss is
simply - sensitivity / parameter; this happens with probability
1 - CDF(sensitivity) = CDF(-sensitivity).
Returns:
A TailPrivacyLossDistribution instance representing the tail of the
privacy loss distribution.
"""
return TailPrivacyLossDistribution(
0.0, self.sensitivity, {
self.sensitivity / self._parameter:
0.5,
-self.sensitivity / self._parameter:
self._laplace_random_variable.cdf(-self.sensitivity)
})
def privacy_loss(self, x: float) -> float:
"""Computes the privacy loss of the Laplace mechanism at a given point.
Args:
x: the point at which the privacy loss is computed.
Returns:
The privacy loss of the Laplace mechanism at point x, which is equal to
(|x - sensitivity| - |x|) / parameter.
"""
return (abs(x - self.sensitivity) - abs(x)) / self._parameter
def inverse_privacy_loss(self, privacy_loss: float) -> float:
"""Computes the inverse of a given privacy loss for the Laplace mechanism.
Args:
privacy_loss: the privacy loss value.
Returns:
The largest float x such that the privacy loss at x is at least
privacy_loss. When privacy_loss is at most - sensitivity / parameter, x is
equal to infinity. When - sensitivity / parameter < privacy_loss <=
sensitivity / parameter, x is equal to
0.5 * (sensitivity - privacy_loss * parameter). When privacy_loss >
sensitivity / parameter, no such x exists and the function returns
-infinity.
"""
if privacy_loss > self.sensitivity / self._parameter:
return -math.inf
if privacy_loss <= -self.sensitivity / self._parameter:
return math.inf
return 0.5 * (self.sensitivity - privacy_loss * self._parameter)
def noise_cdf(self, x: Union[float,
Iterable[float]]) -> Union[float, np.ndarray]:
"""Computes the cumulative density function of the Laplace distribution.
Args:
x: the point or points at which the cumulative density function is to be
calculated.
Returns:
The cumulative density function of the Laplace noise at x, i.e., the
probability that the Laplace noise is less than or equal to x.
"""
return self._laplace_random_variable.cdf(x)
@classmethod
def from_privacy_guarantee(
cls,
privacy_parameters: common.DifferentialPrivacyParameters,
sensitivity: float = 1,
pessimistic_estimate: bool = True
) -> 'LaplacePrivacyLoss':
"""Creates the privacy loss for Laplace mechanism with given privacy.
The parameter of the Laplace mechanism is simply sensitivity / epsilon.
Args:
privacy_parameters: the desired privacy guarantee of the mechanism.
sensitivity: the sensitivity of function f. (i.e. the maximum absolute
change in f when an input to a single user changes.)
pessimistic_estimate: a value indicating whether the rounding is done in
such a way that the resulting epsilon-hockey stick divergence
computation gives an upper estimate to the real value.
Returns:
The privacy loss of the Laplace mechanism with the given privacy
guarantee.
"""
parameter = sensitivity / privacy_parameters.epsilon
return LaplacePrivacyLoss(parameter, sensitivity=sensitivity)
@property
def parameter(self) -> float:
"""The parameter of the corresponding Laplace noise."""
return self._parameter
class GaussianPrivacyLoss(AdditiveNoisePrivacyLoss):
"""Privacy loss of the Gaussian mechanism.
The Gaussian mechanism for computing a scalar-valued function f simply
outputs the sum of the true value of the function and a noise drawn from the
Gaussian distribution. Recall that the (centered) Gaussian distribution with
standard deviation sigma has probability density function
1/(sigma * sqrt(2 * pi)) * exp(-0.5 x^2/sigma^2) at x for any real number x.
The privacy loss distribution of the Gaussian mechanism is equivalent to the
privacy loss distribution between the Gaussian distribution and the same
distribution but shifted by the sensitivity of f. Specifically, the privacy
loss distribution of the Gaussian mechanism is generated as follows: first
pick x according to the Gaussian noise. Then, let the privacy loss be
ln(PDF(x) / PDF(x - sensitivity)) which is equal to
0.5 * sensitivity * (sensitivity - 2 * x) / sigma^2.
"""
def __init__(self,
standard_deviation: float,
sensitivity: float = 1,
pessimistic_estimate: bool = True,
log_mass_truncation_bound: float = -50) -> None:
"""Initializes the privacy loss of the Gaussian mechanism.
Args:
standard_deviation: the standard_deviation of the Gaussian distribution.
sensitivity: the sensitivity of function f. (i.e. the maximum absolute
change in f when an input to a single user changes.)
pessimistic_estimate: a value indicating whether the rounding is done in
such a way that the resulting epsilon-hockey stick divergence
computation gives an upper estimate to the real value.
log_mass_truncation_bound: the ln of the probability mass that might be
discarded from the noise distribution. The larger this number, the more
error it may introduce in divergence calculations.
"""
if standard_deviation <= 0:
raise ValueError(f'Standard deviation is not a positive real number: '
f'{standard_deviation}')
if log_mass_truncation_bound > 0:
raise ValueError(f'Log mass truncation bound is not a non-positive real '
f'number: {log_mass_truncation_bound}')
self._standard_deviation = standard_deviation
self._gaussian_random_variable = stats.norm(scale=standard_deviation)
self._pessimistic_estimate = pessimistic_estimate
self._log_mass_truncation_bound = log_mass_truncation_bound
super(GaussianPrivacyLoss, self).__init__(sensitivity, False)
def privacy_loss_tail(self) -> TailPrivacyLossDistribution:
"""Computes the privacy loss at the tail of the Gaussian distribution.
We set lower_x_truncation so that CDF(lower_x_truncation) =
0.5 * exp(log_mass_truncation_bound), and then set upper_x_truncation to be
-lower_x_truncation.
If pessimistic_estimate is True, the privacy losses for
x < lower_x_truncation and x > upper_x_truncation are rounded up and added
to tail_probability_mass_function. In the case x < lower_x_truncation,
the privacy loss is rounded up to infinity. In the case
x > upper_x_truncation, it is rounded up to the privacy loss at
upper_x_truncation.
On the other hand, if pessimistic_estimate is False, the privacy losses for
x < lower_x_truncation and x > upper_x_truncation are rounded down and added
to tail_probability_mass_function. In the case x < lower_x_truncation, the
privacy loss is rounded down to the privacy loss at lower_x_truncation.
In the case x > upper_x_truncation, it is rounded down to -infinity and
hence not included in tail_probability_mass_function,
Returns:
A TailPrivacyLossDistribution instance representing the tail of the
privacy loss distribution.
"""
lower_x_truncation = self._gaussian_random_variable.ppf(
0.5 * math.exp(self._log_mass_truncation_bound))
upper_x_truncation = -lower_x_truncation
if self._pessimistic_estimate:
tail_probability_mass_function = {
math.inf:
0.5 * math.exp(self._log_mass_truncation_bound),
self.privacy_loss(upper_x_truncation):
0.5 * math.exp(self._log_mass_truncation_bound)
}
else:
tail_probability_mass_function = {
self.privacy_loss(lower_x_truncation):
0.5 * math.exp(self._log_mass_truncation_bound),
}
return TailPrivacyLossDistribution(lower_x_truncation, upper_x_truncation,
tail_probability_mass_function)
def privacy_loss(self, x: float) -> float:
"""Computes the privacy loss of the Gaussian mechanism at a given point.
Args:
x: the point at which the privacy loss is computed.
Returns:
The privacy loss of the Gaussian mechanism at point x, which is equal to
0.5 * sensitivity * (sensitivity - 2 * x) / standard_deviation^2.
"""
return (0.5 * self.sensitivity * (self.sensitivity - 2 * x) /
(self._standard_deviation**2))
def inverse_privacy_loss(self, privacy_loss: float) -> float:
"""Computes the inverse of a given privacy loss for the Gaussian mechanism.
Args:
privacy_loss: the privacy loss value.
Returns:
The largest float x such that the privacy loss at x is at least
privacy_loss. This is equal to
0.5 * sensitivity - privacy_loss * standard_deviation^2 / sensitivity.
"""
return (0.5 * self.sensitivity - privacy_loss *
(self._standard_deviation**2) / self.sensitivity)
def noise_cdf(self, x: Union[float,
Iterable[float]]) -> Union[float, np.ndarray]:
"""Computes the cumulative density function of the Gaussian distribution.
Args:
x: the point or points at which the cumulative density function is to be
calculated.
Returns:
The cumulative density function of the Gaussian noise at x, i.e., the
probability that the Gaussian noise is less than or equal to x.
"""
return self._gaussian_random_variable.cdf(x)
@classmethod
def from_privacy_guarantee(
cls,
privacy_parameters: common.DifferentialPrivacyParameters,
sensitivity: float = 1,
pessimistic_estimate: bool = True,
) -> 'GaussianPrivacyLoss':
"""Creates the privacy loss for Gaussian mechanism with desired privacy.
Uses binary search to find the smallest possible standard deviation of the
Gaussian noise for which the protocol is (epsilon, delta)-differentially
private.
Args:
privacy_parameters: the desired privacy guarantee of the mechanism.
sensitivity: the sensitivity of function f. (i.e. the maximum absolute
change in f when an input to a single user changes.)
pessimistic_estimate: a value indicating whether the rounding is done in
such a way that the resulting epsilon-hockey stick divergence
computation gives an upper estimate to the real value.
Returns:
The privacy loss of the Gaussian mechanism with the given privacy
guarantee.
"""
if privacy_parameters.delta == 0:
raise ValueError('delta=0 is not allowed for the Gaussian mechanism')
# The initial standard deviation is set to
# sqrt(2 * ln(1.5/delta)) * sensitivity / epsilon. It is known that, when
# epsilon is no more than one, the Gaussian mechanism with this standard
# deviation is (epsilon, delta)-DP. See e.g. Appendix A in Dwork and Roth
# book, "The Algorithmic Foundations of Differential Privacy".
search_parameters = common.BinarySearchParameters(
0,
math.inf,
initial_guess=math.sqrt(2 * math.log(1.5 / privacy_parameters.delta)) *
sensitivity / privacy_parameters.epsilon)
def _get_delta_for_standard_deviation(current_standard_deviation):
return GaussianPrivacyLoss(
current_standard_deviation,
sensitivity=sensitivity).get_delta_for_epsilon(
privacy_parameters.epsilon)
standard_deviation = common.inverse_monotone_function(
_get_delta_for_standard_deviation, privacy_parameters.delta,
search_parameters)
return GaussianPrivacyLoss(
standard_deviation,
sensitivity=sensitivity,
pessimistic_estimate=pessimistic_estimate)
@property
def standard_deviation(self) -> float:
"""The standard deviation of the corresponding Gaussian noise."""
return self._standard_deviation
class DiscreteLaplacePrivacyLoss(AdditiveNoisePrivacyLoss):
"""Privacy loss of the discrete Laplace mechanism.
The discrete Laplace mechanism for computing an integer-valued function f
simply outputs the sum of the true value of the function and a noise drawn
from the discrete Laplace distribution. Recall that the discrete Laplace
distribution with parameter a > 0 has probability mass function
Z * exp(-a * |x|) at x for any integer x, where Z = (e^a - 1) / (e^a + 1).
This class represents the privacy loss for the aforementioned
discrete Laplace mechanism with a given parameter, and a given sensitivity of
the function f. It is assumed that the function f only outputs an integer.
The privacy loss distribution of the discrete Laplace mechanism is equivalent
to that between the discrete Laplace distribution and the same distribution
but shifted by the sensitivity. More specifically, the privacy loss
distribution of the discrete Laplace mechanism is generated as follows: first
pick x according to the discrete Laplace noise. Then, let the privacy loss be
ln(PMF(x) / PMF(x - sensitivity)) which is equal to
parameter * (|x - sensitivity| - |x|).
"""
def __init__(self,
parameter: float,
sensitivity: int = 1) -> None:
"""Initializes the privacy loss of the discrete Laplace mechanism.
Args:
parameter: the parameter of the discrete Laplace distribution.
sensitivity: the sensitivity of function f. (i.e. the maximum absolute
change in f when an input to a single user changes.)
"""
if parameter <= 0:
raise ValueError(f'Parameter is not a positive real number: {parameter}')
if not isinstance(sensitivity, int):
raise ValueError(f'Sensitivity is not an integer : {sensitivity}')
self.sensitivity = sensitivity
self._parameter = parameter
self._discrete_laplace_random_variable = stats.dlaplace(parameter)
super(DiscreteLaplacePrivacyLoss, self).__init__(sensitivity, True)
def privacy_loss_tail(self) -> TailPrivacyLossDistribution:
"""Computes privacy loss at the tail of the discrete Laplace distribution.
When x <= 0, the privacy loss is simply sensitivity * parameter; this
happens with probability CDF(0). When x >= sensitivity, the privacy loss is
simply - sensitivity * parameter; this happens with probability
1 - CDF(sensitivity - 1) = CDF(-sensitivity).
Returns:
A TailPrivacyLossDistribution instance representing the tail of the
privacy loss distribution.
"""
return TailPrivacyLossDistribution(
1, self.sensitivity - 1, {
self.sensitivity * self._parameter:
self._discrete_laplace_random_variable.cdf(0),
-self.sensitivity * self._parameter:
self._discrete_laplace_random_variable.cdf(-self.sensitivity)
})
def privacy_loss(self, x: float) -> float:
"""Computes privacy loss of the discrete Laplace mechanism at a given point.
Args:
x: the point at which the privacy loss is computed.
Returns:
The privacy loss of the discrete Laplace mechanism at point x, which is
equal to (|x - sensitivity| - |x|) * parameter for any integer x.
"""
if not isinstance(x, int):
raise ValueError(f'Privacy loss at x is undefined for x = {x}')
return (abs(x - self.sensitivity) - abs(x)) * self._parameter
def inverse_privacy_loss(self, privacy_loss: float) -> float:
"""Computes the inverse of a given privacy loss for the discrete Laplace mechanism.
Args:
privacy_loss: the privacy loss value.
Returns:
The largest float x such that the privacy loss at x is at least
privacy_loss. When privacy_loss is at most - sensitivity * parameter, x is
equal to infinity. When - sensitivity * parameter < privacy_loss <=
sensitivity * parameter, x is equal to
floor(0.5 * (sensitivity - privacy_loss / parameter)). When privacy_loss >
sensitivity * parameter, no such x exists and the function returns
-infinity.
"""
if privacy_loss > self.sensitivity * self._parameter:
return -math.inf
if privacy_loss <= -self.sensitivity * self._parameter:
return math.inf
return math.floor(0.5 *
(self.sensitivity - privacy_loss / self._parameter))
def noise_cdf(self, x: Union[float,
Iterable[float]]) -> Union[float, np.ndarray]:
"""Computes cumulative density function of the discrete Laplace distribution.
Args:
x: the point or points at which the cumulative density function is to be
calculated.
Returns:
The cumulative density function of the discrete Laplace noise at x, i.e.,
the probability that the discrete Laplace noise is less than or equal to
x.
"""
return self._discrete_laplace_random_variable.cdf(x)
@classmethod
def from_privacy_guarantee(
cls,
privacy_parameters: common.DifferentialPrivacyParameters,
sensitivity: int = 1
) -> 'DiscreteLaplacePrivacyLoss':
"""Creates privacy loss for discrete Laplace mechanism with desired privacy.
The parameter of the discrete Laplace mechanism is simply
epsilon / sensitivity.
Args:
privacy_parameters: the desired privacy guarantee of the mechanism.
sensitivity: the sensitivity of function f. (i.e. the maximum absolute
change in f when an input to a single user changes.)
Returns:
The privacy loss of the discrete Laplace mechanism with the given privacy
guarantee.
"""
if not isinstance(sensitivity, int):
raise ValueError(f'Sensitivity is not an integer : {sensitivity}')
if sensitivity <= 0:
raise ValueError(
f'Sensitivity is not a positive real number: {sensitivity}')
return DiscreteLaplacePrivacyLoss(
privacy_parameters.epsilon / sensitivity,
sensitivity=sensitivity)
@property
def parameter(self) -> float:
"""The parameter of the corresponding Discrete Laplace noise."""
return self._parameter
class DiscreteGaussianPrivacyLoss(AdditiveNoisePrivacyLoss):
"""Privacy loss of the discrete Gaussian mechanism.
The discrete Gaussian mechanism for computing a scalar-valued function f
simply outputs the sum of the true value of the function and a noise drawn
from the discrete Gaussian distribution. Recall that the (centered) discrete
Gaussian distribution with parameter sigma has probability mass function
proportional to exp(-0.5 x^2/sigma^2) at x for any integer x. Since its
normalization factor and cumulative density function do not have a closed
form, we will instead consider the truncated version where the noise x is
restricted to only be in [-truncated_bound, truncated_bound].
The privacy loss distribution of the discrete Gaussian mechanism is equivalent
to the privacy loss distribution between the discrete Gaussian distribution
and the same distribution but shifted by the sensitivity of f. Specifically,
the privacy loss distribution of the discrete Gaussian mechanism is generated
as follows: first pick x according to the discrete Gaussian noise. Then, let
the privacy loss be ln(PMF(x) / PMF(x - sensitivity)) which is equal to
0.5 * sensitivity * (sensitivity - 2 * x) / sigma^2. Note that since we
consider the truncated version of the noise, we set the privacy loss to
infinity when x < -truncation_bound + sensitivity.
Reference:
Canonne, Kamath, Steinke. "The Discrete Gaussian for Differential Privacy".
In NeurIPS 2020.
"""
def __init__(self,
sigma: float,
sensitivity: int = 1,
truncation_bound: Optional[int] = None) -> None:
"""Initializes the privacy loss of the discrete Gaussian mechanism.
Args:
sigma: the parameter of the discrete Gaussian distribution. Note that
unlike the (continuous) Gaussian distribution this is not equal to the
standard deviation of the noise.
sensitivity: the sensitivity of function f. (i.e. the maximum absolute
change in f when an input to a single user changes.)
truncation_bound: bound for truncating the noise, i.e. the noise will only
have a support in [-truncation_bound, truncation_bound]. When not
specified, truncation_bound will be chosen in such a way that the mass
of the noise outside of this range is at most 1e-30.
"""
if sigma <= 0:
raise ValueError(f'Sigma is not a positive real number: {sigma}')
if not isinstance(sensitivity, int):
raise ValueError(f'Sensitivity is not an integer : {sensitivity}')
self._sigma = sigma
if truncation_bound is None:
# Tail bound from Canonne et al. ensures that the mass that gets truncated
# is at most 1e-30. (See Proposition 1 in the supplementary material.)
self._truncation_bound = math.ceil(11.6 * sigma)
else:
self._truncation_bound = truncation_bound
if 2 * self._truncation_bound < sensitivity:
raise ValueError(f'Truncation bound ({truncation_bound}) is smaller '
f'than 0.5 * sensitivity (0.5 * {sensitivity})')
# Create the PMF and CDF.
self._offset = -1 * self._truncation_bound - 1
self._pmf_array = np.array(
list(range(-1 * self._truncation_bound, self._truncation_bound + 1)))
self._pmf_array = np.exp(-0.5 * (self._pmf_array)**2 / (sigma**2))
self._pmf_array = np.insert(self._pmf_array, 0, 0)
self._cdf_array = np.add.accumulate(self._pmf_array)
self._pmf_array /= self._cdf_array[-1]
self._cdf_array /= self._cdf_array[-1]
super(DiscreteGaussianPrivacyLoss, self).__init__(sensitivity, True)
def privacy_loss_tail(self) -> TailPrivacyLossDistribution:
"""Computes the privacy loss at the tail of the discrete Gaussian distribution.
When x < -truncation_bound + sensitivity, the privacy loss is infinity.
Due to truncation, x > truncation_bound never occurs.
Returns:
A TailPrivacyLossDistribution instance representing the tail of the
privacy loss distribution.
"""
return TailPrivacyLossDistribution(
self.sensitivity - self._truncation_bound, self._truncation_bound, {
math.inf:
self.noise_cdf(self.sensitivity - self._truncation_bound - 1)
})
def privacy_loss(self, x: float) -> float:
"""Computes the privacy loss of the discrete Gaussian mechanism at a given point.
Args:
x: the point at which the privacy loss is computed.
Returns:
The privacy loss of the discrete Gaussian mechanism at point x. If x is an
integer in the range [-truncation_bound + sensitivity, truncation_bound],
it is equal to 0.5 * sensitivity * (sensitivity - 2 * x) / sigma^2. If x
is an integer in the range [-truncation_bound,
truncation_bound + sensitivity), then it is equal to infinity. Otherwise,
the privacy loss is undefined
"""
if (not isinstance(x, int)
or x > self._truncation_bound or x < -1 * self._truncation_bound):
raise ValueError(f'Privacy loss at x is undefined for x = {x}')
if x >= self.sensitivity - self._truncation_bound:
return (0.5 * self.sensitivity * (self.sensitivity - 2 * x) /
(self._sigma**2))
return math.inf
def inverse_privacy_loss(self, privacy_loss: float) -> float:
"""Computes the inverse of a given privacy loss for the discrete Gaussian mechanism.
Args:
privacy_loss: the privacy loss value.
Returns:
The largest int x such that the privacy loss at x is at least
privacy_loss. This is equal to
floor(0.5 * sensitivity - privacy_loss * sigma^2 / sensitivity).
"""
return math.floor(0.5 * self.sensitivity - privacy_loss *
(self._sigma**2) / self.sensitivity)
def noise_cdf(self, x: Union[float,
Iterable[float]]) -> Union[float, np.ndarray]:
"""Computes the cumulative density function of the discrete Gaussian distribution.
Args:
x: the point or points at which the cumulative density function is to be
calculated.
Returns:
The cumulative density function of the discrete Gaussian noise at x, i.e.,
the probability that the discrete Gaussian noise is less than or equal to
x.
"""
clipped_x = np.clip(x, -1 * self._truncation_bound - 1,
self._truncation_bound)
indices = np.floor(clipped_x).astype('int') - self._offset
return self._cdf_array[indices]
@classmethod
def from_privacy_guarantee(
cls,
privacy_parameters: common.DifferentialPrivacyParameters,
sensitivity: int = 1,
) -> 'DiscreteGaussianPrivacyLoss':
"""Creates the privacy loss for discrete Gaussian mechanism with desired privacy.
Uses binary search to find the smallest possible standard deviation of the
discrete Gaussian noise for which the protocol is (epsilon, delta)-DP.
Args:
privacy_parameters: the desired privacy guarantee of the mechanism.
sensitivity: the sensitivity of function f. (i.e. the maximum absolute
change in f when an input to a single user changes.)
Returns:
The privacy loss of the discrete Gaussian mechanism with the given privacy
guarantee.
"""
if not isinstance(sensitivity, int):
raise ValueError(f'Sensitivity is not an integer : {sensitivity}')
if privacy_parameters.delta == 0:
raise ValueError('delta=0 is not allowed for discrete Gaussian mechanism')
# The initial standard deviation is set to
# sqrt(2 * ln(1.5/delta)) * sensitivity / epsilon. It is known that, when
# epsilon is no more than one, the (continuous) Gaussian mechanism with this
# standard deviation is (epsilon, delta)-DP. See e.g. Appendix A in Dwork
# and Roth book, "The Algorithmic Foundations of Differential Privacy".
search_parameters = common.BinarySearchParameters(
0,
math.inf,
initial_guess=math.sqrt(2 * math.log(1.5 / privacy_parameters.delta)) *
sensitivity / privacy_parameters.epsilon)
def _get_delta_for_sigma(current_sigma):
return DiscreteGaussianPrivacyLoss(
current_sigma,
sensitivity=sensitivity).get_delta_for_epsilon(
privacy_parameters.epsilon)
sigma = common.inverse_monotone_function(
_get_delta_for_sigma, privacy_parameters.delta, search_parameters)
return DiscreteGaussianPrivacyLoss(sigma, sensitivity=sensitivity)
def standard_deviation(self) -> float:
"""The standard deviation of the corresponding discrete Gaussian noise."""
return math.sqrt(
sum(((i + self._offset)**2) * probability_mass
for i, probability_mass in enumerate(self._pmf_array)))