blob: f1825fbbfac4c6aa65b0c2693e730b8a16071c21 [file] [log] [blame]
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Privacy accountant that uses Renyi differential privacy."""
import math
from typing import Callable, Optional, Sequence, Tuple, Union
import numpy as np
from scipy import special
from dp_accounting import dp_event
from dp_accounting import privacy_accountant
NeighborRel = privacy_accountant.NeighboringRelation
CompositionErrorDetails = privacy_accountant.PrivacyAccountant.CompositionErrorDetails
def _log_add(logx: float, logy: float) -> float:
"""Adds two numbers in the log space."""
a, b = min(logx, logy), max(logx, logy)
if a == -np.inf: # adding 0
return b
# Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b)
return math.log1p(math.exp(a - b)) + b # log1p(x) = log(x + 1)
def _log_sub(logx: float, logy: float) -> float:
"""Subtracts two numbers in the log space. Answer must be non-negative."""
if logx < logy:
raise ValueError('The result of subtraction must be non-negative.')
if logy == -np.inf: # subtracting 0
return logx
if logx == logy:
return -np.inf # 0 is represented as -np.inf in the log space.
try:
# Use exp(x) - exp(y) = (exp(x - y) - 1) * exp(y).
return math.log(math.expm1(logx - logy)) + logy # expm1(x) = exp(x) - 1
except OverflowError:
return logx
def _log_sub_sign(logx: float, logy: float) -> Tuple[bool, float]:
"""Returns log(exp(logx)-exp(logy)) and its sign."""
if logx > logy:
s = True
mag = logx + np.log(1 - np.exp(logy - logx))
elif logx < logy:
s = False
mag = logy + np.log(1 - np.exp(logx - logy))
else:
s = True
mag = -np.inf
return s, mag
def _log_comb(n: int, k: int) -> float:
"""Computes log of binomial coefficient."""
return (special.gammaln(n + 1) - special.gammaln(k + 1) -
special.gammaln(n - k + 1))
def _compute_log_a_int(q: float, sigma: float, alpha: int) -> float:
"""Computes log(A_alpha) for integer alpha, 0 < q < 1."""
# Initialize with 0 in the log space.
log_a = -np.inf
for i in range(alpha + 1):
log_coef_i = (
_log_comb(alpha, i) + i * math.log(q) + (alpha - i) * math.log(1 - q))
s = log_coef_i + (i * i - i) / (2 * (sigma**2))
log_a = _log_add(log_a, s)
return float(log_a)
def _compute_log_a_frac(q: float, sigma: float, alpha: float) -> float:
"""Computes log(A_alpha) for fractional alpha, 0 < q < 1."""
# The two parts of A_alpha, integrals over (-inf,z0] and [z0, +inf), are
# initialized to 0 in the log space:
log_a0, log_a1 = -np.inf, -np.inf
i = 0
z0 = sigma**2 * math.log(1 / q - 1) + .5
while True: # do ... until loop
coef = special.binom(alpha, i)
log_coef = math.log(abs(coef))
j = alpha - i
log_t0 = log_coef + i * math.log(q) + j * math.log(1 - q)
log_t1 = log_coef + j * math.log(q) + i * math.log(1 - q)
log_e0 = math.log(.5) + _log_erfc((i - z0) / (math.sqrt(2) * sigma))
log_e1 = math.log(.5) + _log_erfc((z0 - j) / (math.sqrt(2) * sigma))
log_s0 = log_t0 + (i * i - i) / (2 * (sigma**2)) + log_e0
log_s1 = log_t1 + (j * j - j) / (2 * (sigma**2)) + log_e1
if coef > 0:
log_a0 = _log_add(log_a0, log_s0)
log_a1 = _log_add(log_a1, log_s1)
else:
log_a0 = _log_sub(log_a0, log_s0)
log_a1 = _log_sub(log_a1, log_s1)
i += 1
if max(log_s0, log_s1) < -30:
break
return _log_add(log_a0, log_a1)
def _log_erfc(x: float) -> float:
"""Computes log(erfc(x)) with high accuracy for large x."""
try:
return math.log(2) + special.log_ndtr(-x * 2**.5)
except NameError:
# If log_ndtr is not available, approximate as follows:
r = special.erfc(x)
if r == 0.0:
# Using the Laurent series at infinity for the tail of the erfc function:
# erfc(x) ~ exp(-x^2-.5/x^2+.625/x^4)/(x*pi^.5)
# To verify in Mathematica:
# Series[Log[Erfc[x]] + Log[x] + Log[Pi]/2 + x^2, {x, Infinity, 6}]
return (-math.log(math.pi) / 2 - math.log(x) - x**2 - .5 * x**-2 +
.625 * x**-4 - 37. / 24. * x**-6 + 353. / 64. * x**-8)
else:
return math.log(r)
def compute_delta(orders: Sequence[float], rdp: Sequence[float],
epsilon: float) -> Tuple[float, int]:
"""Computes delta given a list of RDP values and target epsilon.
Args:
orders: An array of orders.
rdp: An array of RDP guarantees.
epsilon: The target epsilon.
Returns:
2-tuple containing optimal delta and the optimal order.
Raises:
ValueError: If input is malformed.
"""
if epsilon < 0:
raise ValueError(f'Epsilon cannot be negative. Found {epsilon}.')
if len(orders) != len(rdp):
raise ValueError('Input lists must have the same length.')
# Basic bound (see https://arxiv.org/abs/1702.07476 Proposition 3 in v3):
# delta = min( np.exp((rdp - epsilon) * (orders - 1)) )
# Improved bound from https://arxiv.org/abs/2004.00010 Proposition 12 (in v4):
logdeltas = [] # work in log space to avoid overflows
for (a, r) in zip(orders, rdp):
if a < 1:
raise ValueError(f'Renyi divergence order must be at least 1. Found {a}.')
if r < 0:
raise ValueError(f'Renyi divergence cannot be negative. Found {r}.')
# For small alpha, we are better of with bound via KL divergence:
# delta <= sqrt(1-exp(-KL)).
# Take a min of the two bounds.
if r == 0:
logdelta = -np.inf
else:
logdelta = 0.5 * math.log1p(-math.exp(-r))
if a > 1.01:
# This bound is not numerically stable as alpha->1.
# Thus we have a min value for alpha.
# The bound is also not useful for small alpha, so doesn't matter.
rdp_bound = (a - 1) * (r - epsilon + math.log1p(-1 / a)) - math.log(a)
logdelta = min(logdelta, rdp_bound)
logdeltas.append(logdelta)
optimal_index = np.argmin(logdeltas)
return min(math.exp(logdeltas[optimal_index]), 1.), orders[optimal_index]
def compute_epsilon(orders: Sequence[float], rdp: Sequence[float],
delta: float) -> Tuple[float, int]:
"""Computes epsilon given a list of RDP values and target delta.
Args:
orders: An array of orders.
rdp: An array of RDP guarantees.
delta: The target delta. Must be >= 0.
Returns:
2-tuple containing optimal epsilon and the optimal order.
Raises:
ValueError: If input is malformed.
"""
if delta < 0:
raise ValueError(f'Delta cannot be negative. Found {delta}.')
if delta == 0:
if all(r == 0 for r in rdp):
return 0, 0
else:
return np.inf, 0
if len(orders) != len(rdp):
raise ValueError('Input lists must have the same length.')
# Basic bound (see https://arxiv.org/abs/1702.07476 Proposition 3 in v3):
# epsilon = min( rdp - math.log(delta) / (orders - 1) )
# Improved bound from https://arxiv.org/abs/2004.00010 Proposition 12 (in v4).
# Also appears in https://arxiv.org/abs/2001.05990 Equation 20 (in v1).
eps = []
for (a, r) in zip(orders, rdp):
if a < 1:
raise ValueError(f'Renyi divergence order must be at least 1. Found {a}.')
if r < 0:
raise ValueError(f'Renyi divergence cannot be negative. Found {r}.')
if delta**2 + math.expm1(-r) > 0:
# In this case, we can simply bound via KL divergence:
# delta <= sqrt(1-exp(-KL)).
epsilon = 0 # No need to try further computation if we have epsilon = 0.
elif a > 1.01:
# This bound is not numerically stable as alpha->1.
# Thus we have a min value of alpha.
# The bound is also not useful for small alpha, so doesn't matter.
epsilon = r + math.log1p(-1 / a) - math.log(delta * a) / (a - 1)
else:
# In this case we can't do anything. E.g., asking for delta = 0.
epsilon = np.inf
eps.append(epsilon)
optimal_index = np.argmin(eps)
return max(0, eps[optimal_index]), orders[optimal_index]
def _stable_inplace_diff_in_log(vec: np.ndarray,
signs: np.ndarray,
n: Optional[int] = None):
"""Replaces the first n-1 dims of vec with the log of abs difference operator.
Args:
vec: numpy array of floats with size larger than 'n'
signs: Optional numpy array of bools with the same size as vec in case one
needs to compute partial differences vec and signs jointly describe a
vector of real numbers' sign and abs in log scale.
n: Optonal upper bound on number of differences to compute. If None, all
differences are computed.
Returns:
The first n-1 dimension of vec and signs will store the log-abs and sign of
the difference.
Raises:
ValueError: If input is malformed.
"""
if vec.shape != signs.shape:
raise ValueError('Shape of vec and signs do not match.')
if signs.dtype != bool:
raise ValueError('signs must be of type bool')
if n is None:
n = np.max(vec.shape) - 1
else:
assert np.max(vec.shape) >= n + 1
for j in range(0, n, 1):
if signs[j] == signs[j + 1]: # When the signs are the same
# if the signs are both positive, then we can just use the standard one
signs[j], vec[j] = _log_sub_sign(vec[j + 1], vec[j])
# otherwise, we do that but toggle the sign
if not signs[j + 1]:
signs[j] = ~signs[j]
else: # When the signs are different.
vec[j] = _log_add(vec[j], vec[j + 1])
signs[j] = signs[j + 1]
def _get_forward_diffs(fun: Callable[[float], float],
n: int) -> Tuple[np.ndarray, np.ndarray]:
"""Computes up to nth order forward difference evaluated at 0.
See Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf
Args:
fun: Function to compute forward differences of.
n: Number of differences to compute.
Returns:
Pair (deltas, signs_deltas) of the log deltas and their signs.
"""
func_vec = np.zeros(n + 3)
signs_func_vec = np.ones(n + 3, dtype=bool)
# ith coordinate of deltas stores log(abs(ith order discrete derivative))
deltas = np.zeros(n + 2)
signs_deltas = np.zeros(n + 2, dtype=bool)
for i in range(1, n + 3, 1):
func_vec[i] = fun(1.0 * (i - 1))
for i in range(0, n + 2, 1):
# Diff in log scale
_stable_inplace_diff_in_log(func_vec, signs_func_vec, n=n + 2 - i)
deltas[i] = func_vec[0]
signs_deltas[i] = signs_func_vec[0]
return deltas, signs_deltas
def _compute_log_a(q: float, noise_multiplier: float,
alpha: Union[int, float]) -> float:
if float(alpha).is_integer():
return _compute_log_a_int(q, noise_multiplier, int(alpha))
else:
return _compute_log_a_frac(q, noise_multiplier, alpha)
def _compute_rdp_poisson_subsampled_gaussian(
q: float, noise_multiplier: float,
orders: Sequence[float]) -> Union[float, np.ndarray]:
"""Computes RDP of the Poisson sampled Gaussian mechanism.
Args:
q: The sampling rate.
noise_multiplier: The ratio of the standard deviation of the Gaussian noise
to the l2-sensitivity of the function to which it is added.
orders: An array of RDP orders.
Returns:
The RDPs at all orders. Can be `np.inf`.
"""
def compute_one_order(q, alpha):
if np.isinf(alpha) or noise_multiplier == 0:
return np.inf
if q == 0:
return 0
if q == 1.:
return alpha / (2 * noise_multiplier**2)
return _compute_log_a(q, noise_multiplier, alpha) / (alpha - 1)
return np.array([compute_one_order(q, order) for order in orders])
def _compute_rdp_sample_wor_gaussian(
q: float, noise_multiplier: float,
orders: Sequence[float]) -> Union[float, np.ndarray]:
"""Computes RDP of Gaussian mechanism using sampling without replacement.
This function applies to the following schemes:
1. Sampling w/o replacement: Sample a uniformly random subset of size m = q*n.
2. ``Replace one data point'' version of differential privacy, i.e., n is
considered public information.
Reference: Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf (A strengthened
version applies subsampled-Gaussian mechanism.)
- Wang, Balle, Kasiviswanathan. "Subsampled Renyi Differential Privacy and
Analytical Moments Accountant." AISTATS'2019.
Args:
q: The sampling proportion = m / n. Assume m is an integer <= n.
noise_multiplier: The ratio of the standard deviation of the Gaussian noise
to the l2-sensitivity of the function to which it is added.
orders: An array of RDP orders.
Returns:
The RDPs at all orders, can be np.inf.
"""
return np.array([
_compute_rdp_sample_wor_gaussian_scalar(q, noise_multiplier, order)
for order in orders
])
def _compute_rdp_sample_wor_gaussian_scalar(q: float, sigma: float,
alpha: Union[float, int]) -> float:
"""Computes RDP of the Sampled Gaussian mechanism at order alpha.
Args:
q: The sampling proportion = m / n. Assume m is an integer <= n.
sigma: The std of the additive Gaussian noise.
alpha: The order at which RDP is computed.
Returns:
RDP at alpha, can be np.inf.
"""
assert (q <= 1) and (q >= 0) and (alpha >= 1)
if q == 0:
return 0
if q == 1.:
return alpha / (2 * sigma**2)
if np.isinf(alpha):
return np.inf
if float(alpha).is_integer():
return _compute_rdp_sample_wor_gaussian_int(q, sigma, int(alpha)) / (
alpha - 1)
else:
# When alpha not an integer, we apply Corollary 10 of [WBK19] to interpolate
# the CGF and obtain an upper bound
alpha_f = math.floor(alpha)
alpha_c = math.ceil(alpha)
x = _compute_rdp_sample_wor_gaussian_int(q, sigma, alpha_f)
y = _compute_rdp_sample_wor_gaussian_int(q, sigma, alpha_c)
t = alpha - alpha_f
return ((1 - t) * x + t * y) / (alpha - 1)
def _compute_rdp_sample_wor_gaussian_int(q: float, sigma: float,
alpha: int) -> float:
"""Computes log(A_alpha) for integer alpha, subsampling without replacement.
When alpha is smaller than max_alpha, compute the bound Theorem 27 exactly,
otherwise compute the bound with Stirling approximation.
Args:
q: The sampling proportion = m / n. Assume m is an integer <= n.
sigma: The std of the additive Gaussian noise.
alpha: The order at which RDP is computed.
Returns:
RDP at alpha, can be np.inf.
"""
max_alpha = 256
if np.isinf(alpha):
return np.inf
elif alpha == 1:
return 0
def cgf(x):
# Return rdp(x+1)*x, the rdp of Gaussian mechanism is alpha/(2*sigma**2)
return x * 1.0 * (x + 1) / (2.0 * sigma**2)
def func(x):
# Return the rdp of Gaussian mechanism
return 1.0 * x / (2.0 * sigma**2)
# Initialize with 1 in the log space.
log_a = 0
# Calculates the log term when alpha = 2
log_f2m1 = func(2.0) + np.log(1 - np.exp(-func(2.0)))
if alpha <= max_alpha:
# We need forward differences of exp(cgf)
# The following line is the numerically stable way of implementing it.
# The output is in polar form with logarithmic magnitude
deltas, _ = _get_forward_diffs(cgf, alpha)
# Compute the bound exactly requires book keeping of O(alpha**2)
for i in range(2, alpha + 1):
if i == 2:
s = 2 * np.log(q) + _log_comb(alpha, 2) + np.minimum(
np.log(4) + log_f2m1,
func(2.0) + np.log(2))
elif i > 2:
delta_lo = deltas[int(2 * np.floor(i / 2.0)) - 1]
delta_hi = deltas[int(2 * np.ceil(i / 2.0)) - 1]
s = np.log(4) + 0.5 * (delta_lo + delta_hi)
s = np.minimum(s, np.log(2) + cgf(i - 1))
s += i * np.log(q) + _log_comb(alpha, i)
log_a = _log_add(log_a, s)
return float(log_a)
else:
# Compute the bound with stirling approximation. Everything is O(x) now.
for i in range(2, alpha + 1):
if i == 2:
s = 2 * np.log(q) + _log_comb(alpha, 2) + np.minimum(
np.log(4) + log_f2m1,
func(2.0) + np.log(2))
else:
s = np.log(2) + cgf(i - 1) + i * np.log(q) + _log_comb(alpha, i)
log_a = _log_add(log_a, s)
return log_a
def _effective_gaussian_noise_multiplier(
event: dp_event.DpEvent) -> Union[float, dp_event.DpEvent]:
"""Determines the effective noise multiplier of nested structure of Gaussians.
A series of Gaussian queries on the same data can be reexpressed as a single
query with pre- and post- processing. For details, see section 3 of
https://arxiv.org/pdf/1812.06210.pdf.
Args:
event: A `dp_event.DpEvent`. In order for conversion to be successful it
must consist of a single `dp_event.GaussianDpEvent`, or a nested structure
of `dp_event.ComposedDpEvent` and/or `dp_event.SelfComposedDpEvent`
bottoming out in `dp_event.GaussianDpEvent`s.
Returns:
The noise multiplier of the equivalent `dp_event.GaussianDpEvent`. If the
input event was not a `dp_event.GaussianDpEvent` or a nested structure of
`dp_event.ComposedDpEvent` and/or `dp_event.SelfComposedDpEvent` bottoming
out in `dp_event.GaussianDpEvent`s, returns offending subevent.
"""
if isinstance(event, dp_event.GaussianDpEvent):
return event.noise_multiplier
elif isinstance(event, dp_event.ComposedDpEvent):
sum_sigma_inv_sq = 0
for e in event.events:
sigma = _effective_gaussian_noise_multiplier(e)
if not isinstance(sigma, float):
return sigma
sum_sigma_inv_sq += sigma**-2
return sum_sigma_inv_sq**-0.5
elif isinstance(event, dp_event.SelfComposedDpEvent):
sigma = _effective_gaussian_noise_multiplier(event.event)
if not isinstance(sigma, float):
return sigma
return (event.count * sigma**-2)**-0.5
else:
return event
def _compute_rdp_single_epoch_tree_aggregation(
noise_multiplier: float, step_counts: Union[int, Sequence[int]],
orders: Sequence[float]) -> Union[float, np.ndarray]:
"""Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism.
This function implements the accounting when the tree is periodically
restarted and no record occurs twice across all trees. See appendix D of
"Practical and Private (Deep) Learning without Sampling or Shuffling"
https://arxiv.org/abs/2103.00039.
Args:
noise_multiplier: A non-negative float representing the ratio of the
standard deviation of the Gaussian noise to the l2-sensitivity of the
function to which it is added.
step_counts: A scalar or a list of non-negative integers representing the
number of steps per epoch (between two restarts).
orders: An array of RDP orders.
Returns:
The RDPs at all orders. Can be `np.inf`.
"""
if noise_multiplier < 0:
raise ValueError(
f'noise_multiplier must be non-negative. Got {noise_multiplier}.')
if noise_multiplier == 0:
return np.inf
if not step_counts:
raise ValueError(
'steps_list must be a non-empty list, or a non-zero scalar. Got '
f'{step_counts}.')
if np.isscalar(step_counts):
step_counts = [step_counts]
for steps in step_counts:
if steps < 0:
raise ValueError(f'Steps must be non-negative. Got {step_counts}')
max_depth = math.ceil(math.log2(max(step_counts) + 1))
return np.array([a * max_depth / (2 * noise_multiplier**2) for a in orders])
def _expm1_over_x(x: float) -> float:
"""Computes (exp(x)-1)/x in a numerically stable manner.
Args:
x: float
Returns:
(exp(x)-1)/x
"""
if x < -0.1 or x > 0.1:
return math.expm1(x) / x
# exp(x) = sum_{k>=0} x^k / k!
# (exp(x)-1)/x = sum_{k>=1} x^{k-1} / k!
terms = []
y = 1 # = x^{k-1}/k!
for k in range(1, 100):
y = y / k
terms.append(y)
y = y * x
return math.fsum(terms)
# Dropped terms: sum_{k>=100} x^{k-1} / k!
# Since |x|<= 0.1, this sum is < 10^-100.
# Note that 0.9 < (exp(x)-1)/x < 1.1, so this is also a small relative error.
def _logx_over_xm1(x: float) -> float:
"""Computes log(x)/(x-1) in a numerically stable manner.
Here log is the natural logarithm.
Args:
x: float
Returns:
log(x)/(x-1)
"""
if x < 0.9 or x > 1.1:
return math.log(x) / (x - 1)
# Denote y = 1-x. Then we have a Taylor series for the natural logarithm:
# log(x) = log(1-y) = - sum_{k>=1} y^k / k
# Thus log(x)/(x-1) = -log(1-y)/y = sum_{k>=1} y^{k-1}/k
return math.fsum((1 - x)**(k - 1) / k for k in range(1, 100))
# Dropped terms: sum_{k>=100} y^{k-1}/k.
# Since |y|<=0.1, this sum is < 10^-100.
# Since 0.9 <= log(x)/(x-1) <= 1.1, this is also a small relative error.
def _truncated_negative_binomial_mean(gamma: float, shape: float) -> float:
"""Computes the mean of the truncated negative binomial Distribution.
See Definition 1 in https://arxiv.org/pdf/2110.03620v2.pdf#page=5
Args:
gamma: Halting probability parameter of the distribution. Must be >0 and
<=1.
shape: Shape parameter of the Distribution. Must be >=0.
Returns:
The mean of the distribution.
"""
if shape < 0:
raise ValueError(f'shape must be non-negative. Got {shape}')
if gamma <= 0 or gamma > 1:
raise ValueError(f'gamma must be in (0,1]. Got {gamma}')
if shape == 1: # Geometric Distribution
return 1 / gamma
elif shape == 0: # Logarithmic distribution
# answer = (1 - 1 / gamma) / log(gamma)
# = 1/(gamma*log(gamma)/(gamma-1))
return 1 / (gamma * _logx_over_xm1(gamma))
else: # Truncated Negative Binomial
# answer = shape * (1 / gamma - 1) / (1 - gamma**shape)
# = 1 / ( (exp(shape*log(gamma))-1)/(shape*log(gamma)) *
# log(gamma)/(gamma-1) * gamma)
a = _expm1_over_x(shape * math.log(gamma))
b = _logx_over_xm1(gamma)
return 1 / (gamma * a * b)
def _gamma_truncated_negative_binomial(shape: float,
mean: float,
tolerance: float = 1e-9) -> float:
"""Computes gamma parameter of truncated negative binomial from mean.
Args:
shape: shape parameter of truncated negative binomial distribution. Must be
>= 0.
mean: the expectation of the Distribution
tolerance: relative (i.e. multiplicative) accuracy bound for gamma. Default
tolerance is 10^-9.
Returns:
The gamma parameter = success probability of the distribution.
"""
if shape < 0:
raise ValueError(f'shape must be non-negative. Got {shape}')
if mean < 1:
raise ValueError(f'mean must be at least 1. Got {mean}')
if shape == 1:
return 1 / mean # Geometric distribution
# Otherwise we invert the _truncated_negative_binomial_mean function.
gamma_min = 0 # gamma=0 corresponds to mean=infinity.
gamma_max = min(1, 2 * (shape + 1) / mean) # gamma=1 corresponds to mean=1.
# Also max{shape,1/ln(1/gamma)}*(1/gamma-1) <= mean <= 2*(shape+1)/gamma,
# which implies gamma <= 2*(shape+1)/mean
while gamma_max > gamma_min * (1 + tolerance):
gamma = (gamma_min + gamma_max) / 2
gamma_mean = _truncated_negative_binomial_mean(gamma, shape)
if gamma_mean < mean:
gamma_max = gamma
else:
gamma_min = gamma
return gamma_min # The conservative estimate is returned.
def _compute_rdp_repeat_and_select(orders: Sequence[float],
rdp: Sequence[float], mean: float,
shape: float) -> Sequence[float]:
# pyformat: disable
"""Computes RDP of repeating and selecting best run.
Inputs orders & rdp represent RDP of a single run.
Output represents RDP of running multiple times and returning the best run;
outputs of other runs are not returned.
The total number of runs is randomized and drawn from a distribution
with the given parameters. Poisson (shape=infinity), Geometric (shape=1),
Logarithmic (shape=0), or Tuncated Negative binomial (0<shape<infinity).
See https://arxiv.org/abs/2110.03620 for details.
Args:
orders: List of Renyi orders considered. Each order should be >= 1.
rdp: List of RDPs for a single run of the mechanism.
mean: The mean of the distribution of the random number of repetitions.
shape: Shape/type of the distribution. Should be >= 0.
* shape == 0 is the logarithmic distribution.
* shape == 1 is the geometric distribution.
* shape == infinity is the Poisson Distribution
* shape in (0, infinity) is the truncated negative binomial.
Returns:
The RDPs at all orders.
"""
# pyformat: enable
if math.isnan(shape) or shape < 0:
raise ValueError(f'Distribution of repetitions must be >=0. Got {shape}.')
if math.isnan(mean) or mean < 1:
raise ValueError(f'Mean of number of repetitions must be >=1. Got {mean}.')
if len(orders) != len(rdp):
raise ValueError(
f'orders and rdp must be same length, got {len(orders)} & {len(rdp)}.')
orders = np.asarray(orders)
rdp_out = np.zeros_like(orders, dtype=np.float64) # This will be the output.
rdp_out += np.inf # Initialize to infinity.
if shape == np.inf: # Poisson Distribution
for i in range(len(orders)):
# orders[i]=lambda and rdp[i]=epsilon in the language of
# Theorem 6 of https://arxiv.org/pdf/2110.03620v2.pdf#page=7
if orders[i] <= 1:
continue # Our formula is not applicable in this case.
epshat = math.log1p(1 / (orders[i] - 1))
deltahat, _ = compute_delta(orders, rdp, epshat)
rdp_out[i] = rdp[i] + mean * deltahat + math.log(mean) / (orders[i] - 1)
else: # Truncated Negative Binomial (includes Logarithmic & Geometric)
# First we map mean parameter to gamma parameter of TNB.
gamma = _gamma_truncated_negative_binomial(shape, mean)
# Next we apply the formula.
# Theorem 2 of https://arxiv.org/pdf/2110.03620v2.pdf#page=5
# orders[i] = lambda, rdp[i] = epsilon,
# orders[j] = lambdahat, rdp[j] = epsilonhat
# First compute constant term
c = (1 + shape) * np.min((1 - 1 / orders) * rdp - math.log(gamma) / orders)
for i in range(len(orders)):
if orders[i] > 1: # Otherwise our formula is invalid.
rdp_out[i] = rdp[i] + math.log(mean) / (orders[i] - 1) + c
# Finally we apply monotonicity of Renyi DP
# i.e. if orders[i] < orders[j], then rdp[i] < rdp[j].
# We can use this to bound rdp for low orders.
for i in range(len(orders)):
rdp_out[i] = min(
rdp_out[j] for j in range(len(orders)) if orders[i] <= orders[j])
return rdp_out
# Default orders chosen to give good coverage for Gaussian mechanism in
# the privacy regime of interest.
DEFAULT_RDP_ORDERS = ([1 + x / 10. for x in range(1, 100)] +
list(range(11, 64)) + [128, 256, 512, 1024])
class RdpAccountant(privacy_accountant.PrivacyAccountant):
"""Privacy accountant that uses Renyi differential privacy."""
def __init__(
self,
orders: Optional[Sequence[float]] = None,
neighboring_relation: NeighborRel = NeighborRel.ADD_OR_REMOVE_ONE,
):
super().__init__(neighboring_relation)
if orders is None:
orders = DEFAULT_RDP_ORDERS
self._orders = np.array(orders)
self._rdp = np.zeros_like(orders, dtype=np.float64)
def _maybe_compose(self, event: dp_event.DpEvent, count: int,
do_compose: bool) -> Optional[CompositionErrorDetails]:
"""Traverses `event` and performs composition if `do_compose` is True."""
if isinstance(event, dp_event.NoOpDpEvent):
return None
elif isinstance(event, dp_event.NonPrivateDpEvent):
if do_compose:
self._rdp += np.inf
return None
elif isinstance(event, dp_event.SelfComposedDpEvent):
return self._maybe_compose(event.event, event.count * count, do_compose)
elif isinstance(event, dp_event.ComposedDpEvent):
for e in event.events:
result = self._maybe_compose(e, count, do_compose)
if result is not None:
return result
return None
elif isinstance(event, dp_event.GaussianDpEvent):
if do_compose:
self._rdp += count * _compute_rdp_poisson_subsampled_gaussian(
q=1.0, noise_multiplier=event.noise_multiplier, orders=self._orders)
return None
elif isinstance(event, dp_event.PoissonSampledDpEvent):
if self._neighboring_relation is not NeighborRel.ADD_OR_REMOVE_ONE:
error_msg = (
'neighboring_relation must be `ADD_OR_REMOVE_ONE` for '
f'`PoissonSampledDpEvent`. Found {self._neighboring_relation}.')
return CompositionErrorDetails(
invalid_event=event, error_message=error_msg)
sigma_or_bad_event = _effective_gaussian_noise_multiplier(event.event)
if isinstance(sigma_or_bad_event, dp_event.DpEvent):
return CompositionErrorDetails(
invalid_event=event,
error_message='Subevent of `PoissonSampledDpEvent` must be a '
'`GaussianDpEvent` or a nested structure of `ComposedDpEvent` '
'and/or `SelfComposedDpEvent` bottoming out in `GaussianDpEvent`s.'
f' Found subevent {sigma_or_bad_event}.')
if do_compose:
self._rdp += count * _compute_rdp_poisson_subsampled_gaussian(
q=event.sampling_probability,
noise_multiplier=sigma_or_bad_event,
orders=self._orders)
return None
elif isinstance(event, dp_event.SampledWithoutReplacementDpEvent):
if self._neighboring_relation is not NeighborRel.REPLACE_ONE:
error_msg = ('neighboring_relation must be `REPLACE_ONE` for '
'`SampledWithoutReplacementDpEvent`. Found '
f'{self._neighboring_relation}.')
return CompositionErrorDetails(
invalid_event=event, error_message=error_msg)
sigma_or_bad_event = _effective_gaussian_noise_multiplier(event.event)
if isinstance(sigma_or_bad_event, dp_event.DpEvent):
return CompositionErrorDetails(
invalid_event=event,
error_message='Subevent of `SampledWithoutReplacementDpEvent` must '
'be a `GaussianDpEvent` or a nested structure of `ComposedDpEvent` '
'and/or `SelfComposedDpEvent` bottoming out in `GaussianDpEvent`s. '
f'Found subevent {sigma_or_bad_event}.')
if do_compose:
self._rdp += count * _compute_rdp_sample_wor_gaussian(
q=event.sample_size / event.source_dataset_size,
noise_multiplier=sigma_or_bad_event,
orders=self._orders)
return None
elif isinstance(event, dp_event.SingleEpochTreeAggregationDpEvent):
if self._neighboring_relation is not NeighborRel.REPLACE_SPECIAL:
error_msg = ('neighboring_relation must be `REPLACE_SPECIAL` for '
'`SingleEpochTreeAggregationDpEvent`. Found '
f'{self._neighboring_relation}.')
return CompositionErrorDetails(
invalid_event=event, error_message=error_msg)
if do_compose:
self._rdp += count * _compute_rdp_single_epoch_tree_aggregation(
event.noise_multiplier, event.step_counts, self._orders)
return None
elif isinstance(event, dp_event.LaplaceDpEvent):
if do_compose:
# Laplace satisfies eps-DP with eps = 1 / event.noise_multiplier
# eps-DP implies (alpha, min(eps,alpha*eps^2/2))-RDP for all alpha.
eps = 1 / event.noise_multiplier
rho = 0.5 * eps * eps
self._rdp += count * np.array(
[min(eps, rho * order) for order in self._orders])
return None
elif isinstance(event, dp_event.RepeatAndSelectDpEvent):
if do_compose:
# Save the RDP values from already composed DPEvents. These will
# be added back after we process this RepeatAndSelectDpEvent.
# Zero out self._rdp before computing the RDP of the underlying
# DP event.
save_rdp = self._rdp
self._rdp = np.zeros_like(self._orders, dtype=np.float64)
composition_error = self._maybe_compose(event.event, 1, do_compose)
if composition_error is None and do_compose:
self._rdp = count * _compute_rdp_repeat_and_select(
self._orders, self._rdp, event.mean, event.shape) + save_rdp
return composition_error
else:
# Unsupported event (including `UnsupportedDpEvent`).
return CompositionErrorDetails(
invalid_event=event, error_message='Unsupported event.')
def get_epsilon_and_optimal_order(self,
target_delta: float) -> Tuple[float, int]:
return compute_epsilon(self._orders, self._rdp, target_delta)
def get_epsilon(self, target_delta: float) -> float:
return compute_epsilon(self._orders, self._rdp, target_delta)[0]
def get_delta_and_optimal_order(self,
target_epsilon: float) -> Tuple[float, int]:
return compute_delta(self._orders, self._rdp, target_epsilon)
def get_delta(self, target_epsilon: float) -> float:
return compute_delta(self._orders, self._rdp, target_epsilon)[0]