python/dp_accounting/rdp/rdp_privacy_accountant.py - third_party/github.com/google/differential-privacy - Git at Google

 # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Privacy accountant that uses Renyi differential privacy."""

 import math
 from typing import Callable, Optional, Sequence, Tuple, Union

 import numpy as np
 from scipy import special

 from dp_accounting import dp_event
 from dp_accounting import privacy_accountant

 NeighborRel = privacy_accountant.NeighboringRelation
 CompositionErrorDetails = privacy_accountant.PrivacyAccountant.CompositionErrorDetails


 def _log_add(logx: float, logy: float) -> float:
   """Adds two numbers in the log space."""
   a, b = min(logx, logy), max(logx, logy)
   if a == -np.inf:  # adding 0
     return b
   # Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b)
   return math.log1p(math.exp(a - b)) + b  # log1p(x) = log(x + 1)


 def _log_sub(logx: float, logy: float) -> float:
   """Subtracts two numbers in the log space. Answer must be non-negative."""
   if logx < logy:
     raise ValueError('The result of subtraction must be non-negative.')
   if logy == -np.inf:  # subtracting 0
     return logx
   if logx == logy:
     return -np.inf  # 0 is represented as -np.inf in the log space.

   try:
     # Use exp(x) - exp(y) = (exp(x - y) - 1) * exp(y).
     return math.log(math.expm1(logx - logy)) + logy  # expm1(x) = exp(x) - 1
   except OverflowError:
     return logx


 def _log_sub_sign(logx: float, logy: float) -> Tuple[bool, float]:
   """Returns log(exp(logx)-exp(logy)) and its sign."""
   if logx > logy:
     s = True
     mag = logx + np.log(1 - np.exp(logy - logx))
   elif logx < logy:
     s = False
     mag = logy + np.log(1 - np.exp(logx - logy))
   else:
     s = True
     mag = -np.inf

   return s, mag


 def _log_comb(n: int, k: int) -> float:
   """Computes log of binomial coefficient."""
   return (special.gammaln(n + 1) - special.gammaln(k + 1) -
           special.gammaln(n - k + 1))


 def _compute_log_a_int(q: float, sigma: float, alpha: int) -> float:
   """Computes log(A_alpha) for integer alpha, 0 < q < 1."""

   # Initialize with 0 in the log space.
   log_a = -np.inf

   for i in range(alpha + 1):
     log_coef_i = (
         _log_comb(alpha, i) + i * math.log(q) + (alpha - i) * math.log(1 - q))

     s = log_coef_i + (i * i - i) / (2 * (sigma**2))
     log_a = _log_add(log_a, s)

   return float(log_a)


 def _compute_log_a_frac(q: float, sigma: float, alpha: float) -> float:
   """Computes log(A_alpha) for fractional alpha, 0 < q < 1."""
   # The two parts of A_alpha, integrals over (-inf,z0] and [z0, +inf), are
   # initialized to 0 in the log space:
   log_a0, log_a1 = -np.inf, -np.inf
   i = 0

   z0 = sigma**2 * math.log(1 / q - 1) + .5

   while True:  # do ... until loop
     coef = special.binom(alpha, i)
     log_coef = math.log(abs(coef))
     j = alpha - i

     log_t0 = log_coef + i * math.log(q) + j * math.log(1 - q)
     log_t1 = log_coef + j * math.log(q) + i * math.log(1 - q)

     log_e0 = math.log(.5) + _log_erfc((i - z0) / (math.sqrt(2) * sigma))
     log_e1 = math.log(.5) + _log_erfc((z0 - j) / (math.sqrt(2) * sigma))

     log_s0 = log_t0 + (i * i - i) / (2 * (sigma**2)) + log_e0
     log_s1 = log_t1 + (j * j - j) / (2 * (sigma**2)) + log_e1

     if coef > 0:
       log_a0 = _log_add(log_a0, log_s0)
       log_a1 = _log_add(log_a1, log_s1)
     else:
       log_a0 = _log_sub(log_a0, log_s0)
       log_a1 = _log_sub(log_a1, log_s1)

     i += 1
     if max(log_s0, log_s1) < -30:
       break

   return _log_add(log_a0, log_a1)


 def _log_erfc(x: float) -> float:
   """Computes log(erfc(x)) with high accuracy for large x."""
   try:
     return math.log(2) + special.log_ndtr(-x * 2**.5)
   except NameError:
     # If log_ndtr is not available, approximate as follows:
     r = special.erfc(x)
     if r == 0.0:
       # Using the Laurent series at infinity for the tail of the erfc function:
       #     erfc(x) ~ exp(-x^2-.5/x^2+.625/x^4)/(x*pi^.5)
       # To verify in Mathematica:
       #     Series[Log[Erfc[x]] + Log[x] + Log[Pi]/2 + x^2, {x, Infinity, 6}]
       return (-math.log(math.pi) / 2 - math.log(x) - x**2 - .5 * x**-2 +
               .625 * x**-4 - 37. / 24. * x**-6 + 353. / 64. * x**-8)
     else:
       return math.log(r)


 def compute_delta(orders: Sequence[float], rdp: Sequence[float],
                   epsilon: float) -> Tuple[float, int]:
   """Computes delta given a list of RDP values and target epsilon.

   Args:
     orders: An array of orders.
     rdp: An array of RDP guarantees.
     epsilon: The target epsilon.

   Returns:
     2-tuple containing optimal delta and the optimal order.

   Raises:
     ValueError: If input is malformed.

   """
   if epsilon < 0:
     raise ValueError(f'Epsilon cannot be negative. Found {epsilon}.')
   if len(orders) != len(rdp):
     raise ValueError('Input lists must have the same length.')

   # Basic bound (see https://arxiv.org/abs/1702.07476 Proposition 3 in v3):
   #   delta = min( np.exp((rdp - epsilon) * (orders - 1)) )

   # Improved bound from https://arxiv.org/abs/2004.00010 Proposition 12 (in v4):
   logdeltas = []  # work in log space to avoid overflows
   for (a, r) in zip(orders, rdp):
     if a < 1:
       raise ValueError(f'Renyi divergence order must be at least 1. Found {a}.')
     if r < 0:
       raise ValueError(f'Renyi divergence cannot be negative. Found {r}.')
     # For small alpha, we are better of with bound via KL divergence:
     # delta <= sqrt(1-exp(-KL)).
     # Take a min of the two bounds.
     if r == 0:
       logdelta = -np.inf
     else:
       logdelta = 0.5 * math.log1p(-math.exp(-r))
     if a > 1.01:
       # This bound is not numerically stable as alpha->1.
       # Thus we have a min value for alpha.
       # The bound is also not useful for small alpha, so doesn't matter.
       rdp_bound = (a - 1) * (r - epsilon + math.log1p(-1 / a)) - math.log(a)
       logdelta = min(logdelta, rdp_bound)

     logdeltas.append(logdelta)

   optimal_index = np.argmin(logdeltas)
   return min(math.exp(logdeltas[optimal_index]), 1.), orders[optimal_index]


 def compute_epsilon(orders: Sequence[float], rdp: Sequence[float],
                     delta: float) -> Tuple[float, int]:
   """Computes epsilon given a list of RDP values and target delta.

   Args:
     orders: An array of orders.
     rdp: An array of RDP guarantees.
     delta: The target delta. Must be >= 0.

   Returns:
     2-tuple containing optimal epsilon and the optimal order.

   Raises:
     ValueError: If input is malformed.

   """
   if delta < 0:
     raise ValueError(f'Delta cannot be negative. Found {delta}.')

   if delta == 0:
     if all(r == 0 for r in rdp):
       return 0, 0
     else:
       return np.inf, 0

   if len(orders) != len(rdp):
     raise ValueError('Input lists must have the same length.')

   # Basic bound (see https://arxiv.org/abs/1702.07476 Proposition 3 in v3):
   #   epsilon = min( rdp - math.log(delta) / (orders - 1) )

   # Improved bound from https://arxiv.org/abs/2004.00010 Proposition 12 (in v4).
   # Also appears in https://arxiv.org/abs/2001.05990 Equation 20 (in v1).
   eps = []
   for (a, r) in zip(orders, rdp):
     if a < 1:
       raise ValueError(f'Renyi divergence order must be at least 1. Found {a}.')
     if r < 0:
       raise ValueError(f'Renyi divergence cannot be negative. Found {r}.')

     if delta**2 + math.expm1(-r) > 0:
       # In this case, we can simply bound via KL divergence:
       # delta <= sqrt(1-exp(-KL)).
       epsilon = 0  # No need to try further computation if we have epsilon = 0.
     elif a > 1.01:
       # This bound is not numerically stable as alpha->1.
       # Thus we have a min value of alpha.
       # The bound is also not useful for small alpha, so doesn't matter.
       epsilon = r + math.log1p(-1 / a) - math.log(delta * a) / (a - 1)
     else:
       # In this case we can't do anything. E.g., asking for delta = 0.
       epsilon = np.inf
     eps.append(epsilon)

   optimal_index = np.argmin(eps)
   return max(0, eps[optimal_index]), orders[optimal_index]


 def _stable_inplace_diff_in_log(vec: np.ndarray,
                                 signs: np.ndarray,
                                 n: Optional[int] = None):
   """Replaces the first n-1 dims of vec with the log of abs difference operator.

   Args:
     vec: numpy array of floats with size larger than 'n'
     signs: Optional numpy array of bools with the same size as vec in case one
       needs to compute partial differences vec and signs jointly describe a
       vector of real numbers' sign and abs in log scale.
     n: Optonal upper bound on number of differences to compute. If None, all
       differences are computed.

   Returns:
     The first n-1 dimension of vec and signs will store the log-abs and sign of
     the difference.

   Raises:
     ValueError: If input is malformed.
   """

   if vec.shape != signs.shape:
     raise ValueError('Shape of vec and signs do not match.')
   if signs.dtype != bool:
     raise ValueError('signs must be of type bool')
   if n is None:
     n = np.max(vec.shape) - 1
   else:
     assert np.max(vec.shape) >= n + 1
   for j in range(0, n, 1):
     if signs[j] == signs[j + 1]:  # When the signs are the same
       # if the signs are both positive, then we can just use the standard one
       signs[j], vec[j] = _log_sub_sign(vec[j + 1], vec[j])
       # otherwise, we do that but toggle the sign
       if not signs[j + 1]:
         signs[j] = ~signs[j]
     else:  # When the signs are different.
       vec[j] = _log_add(vec[j], vec[j + 1])
       signs[j] = signs[j + 1]


 def _get_forward_diffs(fun: Callable[[float], float],
                        n: int) -> Tuple[np.ndarray, np.ndarray]:
   """Computes up to nth order forward difference evaluated at 0.

   See Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf

   Args:
     fun: Function to compute forward differences of.
     n: Number of differences to compute.

   Returns:
     Pair (deltas, signs_deltas) of the log deltas and their signs.
   """
   func_vec = np.zeros(n + 3)
   signs_func_vec = np.ones(n + 3, dtype=bool)

   # ith coordinate of deltas stores log(abs(ith order discrete derivative))
   deltas = np.zeros(n + 2)
   signs_deltas = np.zeros(n + 2, dtype=bool)
   for i in range(1, n + 3, 1):
     func_vec[i] = fun(1.0 * (i - 1))
   for i in range(0, n + 2, 1):
     # Diff in log scale
     _stable_inplace_diff_in_log(func_vec, signs_func_vec, n=n + 2 - i)
     deltas[i] = func_vec[0]
     signs_deltas[i] = signs_func_vec[0]
   return deltas, signs_deltas


 def _compute_log_a(q: float, noise_multiplier: float,
                    alpha: Union[int, float]) -> float:
   if float(alpha).is_integer():
     return _compute_log_a_int(q, noise_multiplier, int(alpha))
   else:
     return _compute_log_a_frac(q, noise_multiplier, alpha)


 def _compute_rdp_poisson_subsampled_gaussian(
     q: float, noise_multiplier: float,
     orders: Sequence[float]) -> Union[float, np.ndarray]:
   """Computes RDP of the Poisson sampled Gaussian mechanism.

   Args:
     q: The sampling rate.
     noise_multiplier: The ratio of the standard deviation of the Gaussian noise
       to the l2-sensitivity of the function to which it is added.
     orders: An array of RDP orders.

   Returns:
     The RDPs at all orders. Can be `np.inf`.
   """

   def compute_one_order(q, alpha):
     if np.isinf(alpha) or noise_multiplier == 0:
       return np.inf

     if q == 0:
       return 0

     if q == 1.:
       return alpha / (2 * noise_multiplier**2)

     return _compute_log_a(q, noise_multiplier, alpha) / (alpha - 1)

   return np.array([compute_one_order(q, order) for order in orders])


 def _compute_rdp_sample_wor_gaussian(
     q: float, noise_multiplier: float,
     orders: Sequence[float]) -> Union[float, np.ndarray]:
   """Computes RDP of Gaussian mechanism using sampling without replacement.

   This function applies to the following schemes:
   1. Sampling w/o replacement: Sample a uniformly random subset of size m = q*n.
   2. ``Replace one data point'' version of differential privacy, i.e., n is
      considered public information.

   Reference: Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf (A strengthened
   version applies subsampled-Gaussian mechanism.)
   - Wang, Balle, Kasiviswanathan. "Subsampled Renyi Differential Privacy and
   Analytical Moments Accountant." AISTATS'2019.

   Args:
     q: The sampling proportion =  m / n.  Assume m is an integer <= n.
     noise_multiplier: The ratio of the standard deviation of the Gaussian noise
       to the l2-sensitivity of the function to which it is added.
     orders: An array of RDP orders.

   Returns:
     The RDPs at all orders, can be np.inf.
   """
   return np.array([
       _compute_rdp_sample_wor_gaussian_scalar(q, noise_multiplier, order)
       for order in orders
   ])


 def _compute_rdp_sample_wor_gaussian_scalar(q: float, sigma: float,
                                             alpha: Union[float, int]) -> float:
   """Computes RDP of the Sampled Gaussian mechanism at order alpha.

   Args:
     q: The sampling proportion =  m / n.  Assume m is an integer <= n.
     sigma: The std of the additive Gaussian noise.
     alpha: The order at which RDP is computed.

   Returns:
     RDP at alpha, can be np.inf.
   """

   assert (q <= 1) and (q >= 0) and (alpha >= 1)

   if q == 0:
     return 0

   if q == 1.:
     return alpha / (2 * sigma**2)

   if np.isinf(alpha):
     return np.inf

   if float(alpha).is_integer():
     return _compute_rdp_sample_wor_gaussian_int(q, sigma, int(alpha)) / (
         alpha - 1)
   else:
     # When alpha not an integer, we apply Corollary 10 of [WBK19] to interpolate
     # the CGF and obtain an upper bound
     alpha_f = math.floor(alpha)
     alpha_c = math.ceil(alpha)

     x = _compute_rdp_sample_wor_gaussian_int(q, sigma, alpha_f)
     y = _compute_rdp_sample_wor_gaussian_int(q, sigma, alpha_c)
     t = alpha - alpha_f
     return ((1 - t) * x + t * y) / (alpha - 1)


 def _compute_rdp_sample_wor_gaussian_int(q: float, sigma: float,
                                          alpha: int) -> float:
   """Computes log(A_alpha) for integer alpha, subsampling without replacement.

   When alpha is smaller than max_alpha, compute the bound Theorem 27 exactly,
     otherwise compute the bound with Stirling approximation.

   Args:
     q: The sampling proportion = m / n.  Assume m is an integer <= n.
     sigma: The std of the additive Gaussian noise.
     alpha: The order at which RDP is computed.

   Returns:
     RDP at alpha, can be np.inf.
   """

   max_alpha = 256

   if np.isinf(alpha):
     return np.inf
   elif alpha == 1:
     return 0

   def cgf(x):
     # Return rdp(x+1)*x, the rdp of Gaussian mechanism is alpha/(2*sigma**2)
     return x * 1.0 * (x + 1) / (2.0 * sigma**2)

   def func(x):
     # Return the rdp of Gaussian mechanism
     return 1.0 * x / (2.0 * sigma**2)

   # Initialize with 1 in the log space.
   log_a = 0
   # Calculates the log term when alpha = 2
   log_f2m1 = func(2.0) + np.log(1 - np.exp(-func(2.0)))
   if alpha <= max_alpha:
     # We need forward differences of exp(cgf)
     # The following line is the numerically stable way of implementing it.
     # The output is in polar form with logarithmic magnitude
     deltas, _ = _get_forward_diffs(cgf, alpha)
     # Compute the bound exactly requires book keeping of O(alpha**2)

     for i in range(2, alpha + 1):
       if i == 2:
         s = 2 * np.log(q) + _log_comb(alpha, 2) + np.minimum(
             np.log(4) + log_f2m1,
             func(2.0) + np.log(2))
       elif i > 2:
         delta_lo = deltas[int(2 * np.floor(i / 2.0)) - 1]
         delta_hi = deltas[int(2 * np.ceil(i / 2.0)) - 1]
         s = np.log(4) + 0.5 * (delta_lo + delta_hi)
         s = np.minimum(s, np.log(2) + cgf(i - 1))
         s += i * np.log(q) + _log_comb(alpha, i)
       log_a = _log_add(log_a, s)
     return float(log_a)
   else:
     # Compute the bound with stirling approximation. Everything is O(x) now.
     for i in range(2, alpha + 1):
       if i == 2:
         s = 2 * np.log(q) + _log_comb(alpha, 2) + np.minimum(
             np.log(4) + log_f2m1,
             func(2.0) + np.log(2))
       else:
         s = np.log(2) + cgf(i - 1) + i * np.log(q) + _log_comb(alpha, i)
       log_a = _log_add(log_a, s)

     return log_a


 def _effective_gaussian_noise_multiplier(
     event: dp_event.DpEvent) -> Union[float, dp_event.DpEvent]:
   """Determines the effective noise multiplier of nested structure of Gaussians.

   A series of Gaussian queries on the same data can be reexpressed as a single
   query with pre- and post- processing. For details, see section 3 of
   https://arxiv.org/pdf/1812.06210.pdf.

   Args:
     event: A `dp_event.DpEvent`. In order for conversion to be successful it
       must consist of a single `dp_event.GaussianDpEvent`, or a nested structure
       of `dp_event.ComposedDpEvent` and/or `dp_event.SelfComposedDpEvent`
       bottoming out in `dp_event.GaussianDpEvent`s.

   Returns:
     The noise multiplier of the equivalent `dp_event.GaussianDpEvent`. If the
     input event was not a `dp_event.GaussianDpEvent` or a nested structure of
     `dp_event.ComposedDpEvent` and/or `dp_event.SelfComposedDpEvent` bottoming
     out in `dp_event.GaussianDpEvent`s, returns offending subevent.
   """
   if isinstance(event, dp_event.GaussianDpEvent):
     return event.noise_multiplier
   elif isinstance(event, dp_event.ComposedDpEvent):
     sum_sigma_inv_sq = 0
     for e in event.events:
       sigma = _effective_gaussian_noise_multiplier(e)
       if not isinstance(sigma, float):
         return sigma
       sum_sigma_inv_sq += sigma**-2
     return sum_sigma_inv_sq**-0.5
   elif isinstance(event, dp_event.SelfComposedDpEvent):
     sigma = _effective_gaussian_noise_multiplier(event.event)
     if not isinstance(sigma, float):
       return sigma
     return (event.count * sigma**-2)**-0.5
   else:
     return event


 def _compute_rdp_single_epoch_tree_aggregation(
     noise_multiplier: float, step_counts: Union[int, Sequence[int]],
     orders: Sequence[float]) -> Union[float, np.ndarray]:
   """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism.

   This function implements the accounting when the tree is periodically
   restarted and no record occurs twice across all trees. See appendix D of
   "Practical and Private (Deep) Learning without Sampling or Shuffling"
   https://arxiv.org/abs/2103.00039.

   Args:
     noise_multiplier: A non-negative float representing the ratio of the
       standard deviation of the Gaussian noise to the l2-sensitivity of the
       function to which it is added.
     step_counts: A scalar or a list of non-negative integers representing the
       number of steps per epoch (between two restarts).
     orders: An array of RDP orders.

   Returns:
     The RDPs at all orders. Can be `np.inf`.
   """
   if noise_multiplier < 0:
     raise ValueError(
         f'noise_multiplier must be non-negative. Got {noise_multiplier}.')
   if noise_multiplier == 0:
     return np.inf

   if not step_counts:
     raise ValueError(
         'steps_list must be a non-empty list, or a non-zero scalar. Got '
         f'{step_counts}.')

   if np.isscalar(step_counts):
     step_counts = [step_counts]

   for steps in step_counts:
     if steps < 0:
       raise ValueError(f'Steps must be non-negative. Got {step_counts}')

   max_depth = math.ceil(math.log2(max(step_counts) + 1))
   return np.array([a * max_depth / (2 * noise_multiplier**2) for a in orders])


 def _expm1_over_x(x: float) -> float:
   """Computes (exp(x)-1)/x in a numerically stable manner.

   Args:
     x: float

   Returns:
     (exp(x)-1)/x
   """
   if x < -0.1 or x > 0.1:
     return math.expm1(x) / x
   # exp(x) = sum_{k>=0} x^k / k!
   # (exp(x)-1)/x = sum_{k>=1} x^{k-1} / k!
   terms = []
   y = 1  # = x^{k-1}/k!
   for k in range(1, 100):
     y = y / k
     terms.append(y)
     y = y * x
   return math.fsum(terms)
   # Dropped terms: sum_{k>=100} x^{k-1} / k!
   # Since |x|<= 0.1, this sum is < 10^-100.
   # Note that 0.9 < (exp(x)-1)/x < 1.1, so this is also a small relative error.


 def _logx_over_xm1(x: float) -> float:
   """Computes log(x)/(x-1) in a numerically stable manner.

   Here log is the natural logarithm.

   Args:
     x: float

   Returns:
     log(x)/(x-1)
   """
   if x < 0.9 or x > 1.1:
     return math.log(x) / (x - 1)
   # Denote y = 1-x. Then we have a Taylor series for the natural logarithm:
   # log(x) = log(1-y) = - sum_{k>=1} y^k / k
   # Thus log(x)/(x-1) = -log(1-y)/y = sum_{k>=1} y^{k-1}/k
   return math.fsum((1 - x)**(k - 1) / k for k in range(1, 100))
   # Dropped terms: sum_{k>=100} y^{k-1}/k.
   # Since |y|<=0.1, this sum is < 10^-100.
   # Since 0.9 <= log(x)/(x-1) <= 1.1, this is also a small relative error.


 def _truncated_negative_binomial_mean(gamma: float, shape: float) -> float:
   """Computes the mean of the truncated negative binomial Distribution.

   See Definition 1 in https://arxiv.org/pdf/2110.03620v2.pdf#page=5

   Args:
     gamma: Halting probability parameter of the distribution. Must be >0 and
       <=1.
     shape: Shape parameter of the Distribution. Must be >=0.

   Returns:
     The mean of the distribution.
   """
   if shape < 0:
     raise ValueError(f'shape must be non-negative. Got {shape}')
   if gamma <= 0 or gamma > 1:
     raise ValueError(f'gamma must be in (0,1]. Got {gamma}')

   if shape == 1:  # Geometric Distribution
     return 1 / gamma
   elif shape == 0:  # Logarithmic distribution
     # answer = (1 - 1 / gamma) / log(gamma)
     #        = 1/(gamma*log(gamma)/(gamma-1))
     return 1 / (gamma * _logx_over_xm1(gamma))
   else:  # Truncated Negative Binomial
     # answer = shape * (1 / gamma - 1) / (1 - gamma**shape)
     #        = 1 / ( (exp(shape*log(gamma))-1)/(shape*log(gamma)) *
     #                                              log(gamma)/(gamma-1) * gamma)
     a = _expm1_over_x(shape * math.log(gamma))
     b = _logx_over_xm1(gamma)
     return 1 / (gamma * a * b)


 def _gamma_truncated_negative_binomial(shape: float,
                                        mean: float,
                                        tolerance: float = 1e-9) -> float:
   """Computes gamma parameter of truncated negative binomial from mean.

   Args:
     shape: shape parameter of truncated negative binomial distribution. Must be
       >= 0.
     mean: the expectation of the Distribution
     tolerance: relative (i.e. multiplicative) accuracy bound for gamma. Default
       tolerance is 10^-9.

   Returns:
     The gamma parameter = success probability of the distribution.
   """
   if shape < 0:
     raise ValueError(f'shape must be non-negative. Got {shape}')
   if mean < 1:
     raise ValueError(f'mean must be at least 1. Got {mean}')

   if shape == 1:
     return 1 / mean  # Geometric distribution
   # Otherwise we invert the _truncated_negative_binomial_mean function.
   gamma_min = 0  # gamma=0 corresponds to mean=infinity.
   gamma_max = min(1, 2 * (shape + 1) / mean)  # gamma=1 corresponds to mean=1.
   # Also max{shape,1/ln(1/gamma)}*(1/gamma-1) <= mean <= 2*(shape+1)/gamma,
   # which implies gamma <= 2*(shape+1)/mean
   while gamma_max > gamma_min * (1 + tolerance):
     gamma = (gamma_min + gamma_max) / 2
     gamma_mean = _truncated_negative_binomial_mean(gamma, shape)
     if gamma_mean < mean:
       gamma_max = gamma
     else:
       gamma_min = gamma
   return gamma_min  # The conservative estimate is returned.


 def _compute_rdp_repeat_and_select(orders: Sequence[float],
                                    rdp: Sequence[float], mean: float,
                                    shape: float) -> Sequence[float]:
   # pyformat: disable
   """Computes RDP of repeating and selecting best run.

   Inputs orders & rdp represent RDP of a single run.
   Output represents RDP of running multiple times and returning the best run;
   outputs of other runs are not returned.

   The total number of runs is randomized and drawn from a distribution
   with the given parameters. Poisson (shape=infinity), Geometric (shape=1),
   Logarithmic (shape=0), or Tuncated Negative binomial (0<shape<infinity).

   See https://arxiv.org/abs/2110.03620 for details.

   Args:
     orders: List of Renyi orders considered. Each order should be >= 1.
     rdp: List of RDPs for a single run of the mechanism.
     mean: The mean of the distribution of the random number of repetitions.
     shape: Shape/type of the distribution. Should be >= 0.
        * shape == 0 is the logarithmic distribution.
        * shape == 1 is the geometric distribution.
        * shape == infinity is the Poisson Distribution
        * shape in (0, infinity) is the truncated negative binomial.

   Returns:
     The RDPs at all orders.
   """
   # pyformat: enable
   if math.isnan(shape) or shape < 0:
     raise ValueError(f'Distribution of repetitions must be >=0. Got {shape}.')
   if math.isnan(mean) or mean < 1:
     raise ValueError(f'Mean of number of repetitions must be >=1. Got {mean}.')
   if len(orders) != len(rdp):
     raise ValueError(
         f'orders and rdp must be same length, got {len(orders)} & {len(rdp)}.')

   orders = np.asarray(orders)
   rdp_out = np.zeros_like(orders, dtype=np.float64)  # This will be the output.
   rdp_out += np.inf  # Initialize to infinity.

   if shape == np.inf:  # Poisson Distribution
     for i in range(len(orders)):
       # orders[i]=lambda and rdp[i]=epsilon in the language of
       # Theorem 6 of https://arxiv.org/pdf/2110.03620v2.pdf#page=7
       if orders[i] <= 1:
         continue  # Our formula is not applicable in this case.
       epshat = math.log1p(1 / (orders[i] - 1))
       deltahat, _ = compute_delta(orders, rdp, epshat)
       rdp_out[i] = rdp[i] + mean * deltahat + math.log(mean) / (orders[i] - 1)
   else:  # Truncated Negative Binomial (includes Logarithmic & Geometric)
     # First we map mean parameter to gamma parameter of TNB.
     gamma = _gamma_truncated_negative_binomial(shape, mean)

     # Next we apply the formula.
     # Theorem 2 of https://arxiv.org/pdf/2110.03620v2.pdf#page=5
     # orders[i] = lambda, rdp[i] = epsilon,
     # orders[j] = lambdahat, rdp[j] = epsilonhat
     # First compute constant term
     c = (1 + shape) * np.min((1 - 1 / orders) * rdp - math.log(gamma) / orders)
     for i in range(len(orders)):
       if orders[i] > 1:  # Otherwise our formula is invalid.
         rdp_out[i] = rdp[i] + math.log(mean) / (orders[i] - 1) + c

     # Finally we apply monotonicity of Renyi DP
     # i.e. if orders[i] < orders[j], then rdp[i] < rdp[j].
     # We can use this to bound rdp for low orders.
     for i in range(len(orders)):
       rdp_out[i] = min(
           rdp_out[j] for j in range(len(orders)) if orders[i] <= orders[j])
   return rdp_out


 # Default orders chosen to give good coverage for Gaussian mechanism in
 # the privacy regime of interest.
 DEFAULT_RDP_ORDERS = ([1 + x / 10. for x in range(1, 100)] +
                       list(range(11, 64)) + [128, 256, 512, 1024])


 class RdpAccountant(privacy_accountant.PrivacyAccountant):
   """Privacy accountant that uses Renyi differential privacy."""

   def __init__(
       self,
       orders: Optional[Sequence[float]] = None,
       neighboring_relation: NeighborRel = NeighborRel.ADD_OR_REMOVE_ONE,
   ):
     super().__init__(neighboring_relation)
     if orders is None:
       orders = DEFAULT_RDP_ORDERS
     self._orders = np.array(orders)
     self._rdp = np.zeros_like(orders, dtype=np.float64)

   def _maybe_compose(self, event: dp_event.DpEvent, count: int,
                      do_compose: bool) -> Optional[CompositionErrorDetails]:
     """Traverses `event` and performs composition if `do_compose` is True."""

     if isinstance(event, dp_event.NoOpDpEvent):
       return None
     elif isinstance(event, dp_event.NonPrivateDpEvent):
       if do_compose:
         self._rdp += np.inf
       return None
     elif isinstance(event, dp_event.SelfComposedDpEvent):
       return self._maybe_compose(event.event, event.count * count, do_compose)
     elif isinstance(event, dp_event.ComposedDpEvent):
       for e in event.events:
         result = self._maybe_compose(e, count, do_compose)
         if result is not None:
           return result
       return None
     elif isinstance(event, dp_event.GaussianDpEvent):
       if do_compose:
         self._rdp += count * _compute_rdp_poisson_subsampled_gaussian(
             q=1.0, noise_multiplier=event.noise_multiplier, orders=self._orders)
       return None
     elif isinstance(event, dp_event.PoissonSampledDpEvent):
       if self._neighboring_relation is not NeighborRel.ADD_OR_REMOVE_ONE:
         error_msg = (
             'neighboring_relation must be `ADD_OR_REMOVE_ONE` for '
             f'`PoissonSampledDpEvent`. Found {self._neighboring_relation}.')
         return CompositionErrorDetails(
             invalid_event=event, error_message=error_msg)
       sigma_or_bad_event = _effective_gaussian_noise_multiplier(event.event)
       if isinstance(sigma_or_bad_event, dp_event.DpEvent):
         return CompositionErrorDetails(
             invalid_event=event,
             error_message='Subevent of `PoissonSampledDpEvent` must be a '
             '`GaussianDpEvent` or a nested structure of `ComposedDpEvent` '
             'and/or `SelfComposedDpEvent` bottoming out in `GaussianDpEvent`s.'
             f' Found subevent {sigma_or_bad_event}.')
       if do_compose:
         self._rdp += count * _compute_rdp_poisson_subsampled_gaussian(
             q=event.sampling_probability,
             noise_multiplier=sigma_or_bad_event,
             orders=self._orders)
       return None
     elif isinstance(event, dp_event.SampledWithoutReplacementDpEvent):
       if self._neighboring_relation is not NeighborRel.REPLACE_ONE:
         error_msg = ('neighboring_relation must be `REPLACE_ONE` for '
                      '`SampledWithoutReplacementDpEvent`. Found '
                      f'{self._neighboring_relation}.')
         return CompositionErrorDetails(
             invalid_event=event, error_message=error_msg)
       sigma_or_bad_event = _effective_gaussian_noise_multiplier(event.event)
       if isinstance(sigma_or_bad_event, dp_event.DpEvent):
         return CompositionErrorDetails(
             invalid_event=event,
             error_message='Subevent of `SampledWithoutReplacementDpEvent` must '
             'be a `GaussianDpEvent` or a nested structure of `ComposedDpEvent` '
             'and/or `SelfComposedDpEvent` bottoming out in `GaussianDpEvent`s. '
             f'Found subevent {sigma_or_bad_event}.')
       if do_compose:
         self._rdp += count * _compute_rdp_sample_wor_gaussian(
             q=event.sample_size / event.source_dataset_size,
             noise_multiplier=sigma_or_bad_event,
             orders=self._orders)
       return None
     elif isinstance(event, dp_event.SingleEpochTreeAggregationDpEvent):
       if self._neighboring_relation is not NeighborRel.REPLACE_SPECIAL:
         error_msg = ('neighboring_relation must be `REPLACE_SPECIAL` for '
                      '`SingleEpochTreeAggregationDpEvent`. Found '
                      f'{self._neighboring_relation}.')
         return CompositionErrorDetails(
             invalid_event=event, error_message=error_msg)
       if do_compose:
         self._rdp += count * _compute_rdp_single_epoch_tree_aggregation(
             event.noise_multiplier, event.step_counts, self._orders)
       return None
     elif isinstance(event, dp_event.LaplaceDpEvent):
       if do_compose:
         # Laplace satisfies eps-DP with eps = 1 / event.noise_multiplier
         # eps-DP implies (alpha, min(eps,alpha*eps^2/2))-RDP for all alpha.
         eps = 1 / event.noise_multiplier
         rho = 0.5 * eps * eps
         self._rdp += count * np.array(
             [min(eps, rho * order) for order in self._orders])
       return None
     elif isinstance(event, dp_event.RepeatAndSelectDpEvent):
       if do_compose:
         # Save the RDP values from already composed DPEvents. These will
         # be added back after we process this RepeatAndSelectDpEvent.
         # Zero out self._rdp before computing the RDP of the underlying
         # DP event.
         save_rdp = self._rdp
         self._rdp = np.zeros_like(self._orders, dtype=np.float64)
       composition_error = self._maybe_compose(event.event, 1, do_compose)
       if composition_error is None and do_compose:
         self._rdp = count * _compute_rdp_repeat_and_select(
             self._orders, self._rdp, event.mean, event.shape) + save_rdp
       return composition_error
     else:
       # Unsupported event (including `UnsupportedDpEvent`).
       return CompositionErrorDetails(
           invalid_event=event, error_message='Unsupported event.')

   def get_epsilon_and_optimal_order(self,
                                     target_delta: float) -> Tuple[float, int]:
     return compute_epsilon(self._orders, self._rdp, target_delta)

   def get_epsilon(self, target_delta: float) -> float:
     return compute_epsilon(self._orders, self._rdp, target_delta)[0]

   def get_delta_and_optimal_order(self,
                                   target_epsilon: float) -> Tuple[float, int]:
     return compute_delta(self._orders, self._rdp, target_epsilon)

   def get_delta(self, target_epsilon: float) -> float:
     return compute_delta(self._orders, self._rdp, target_epsilon)[0]