Incorporating the "Connect the Dots" algorithm in PLD accounting library. Reference: https://arxiv.org/abs/2207.04380 Main changes are as follows: - Allow vectorized calls to get_delta_for_epsilon, - Construct pessimistic PLDPmf using Connect-the-Dots algorithm, - Support Connect-the-Dots algorithm for privacy loss distributions of additive noise mechanisms. Some other minor changes included as follows: - Add checks to make sure that truncation bounds in self convolution does not result in truncation of the input list, - Use more numerically stable scipy.special.logsumexp for moment generating function computation, - Use more efficient np.roll for shifting the output result for self convolution. PiperOrigin-RevId: 494208119 Change-Id: I1c50896b3252c19c8802b1e1ebfd6f0ba94c3be2 GitOrigin-RevId: fca6f9ba22ab0a7b3d020d3c5bc95d30405261df
diff --git a/common_docs/Privacy_Loss_Distributions.pdf b/common_docs/Privacy_Loss_Distributions.pdf index 0ac2f56..049c65e 100644 --- a/common_docs/Privacy_Loss_Distributions.pdf +++ b/common_docs/Privacy_Loss_Distributions.pdf Binary files differ
diff --git a/python/dp_accounting/pld/common.py b/python/dp_accounting/pld/common.py index 249327e..e468c9a 100644 --- a/python/dp_accounting/pld/common.py +++ b/python/dp_accounting/pld/common.py
@@ -20,6 +20,7 @@ import numpy as np from scipy import fft from scipy import signal +from scipy import special ArrayLike = Union[np.ndarray, List[float]] @@ -241,10 +242,10 @@ np.concatenate((np.arange(-20, 0), np.arange(1, 21))) / len(input_list)) # Compute log of the moment generating function at the specified orders. - log_mgfs = np.log([ - np.dot(np.exp(np.arange(len(input_list)) * order), input_list) + log_mgfs = [ + special.logsumexp(np.arange(len(input_list)) * order, b=input_list) for order in orders - ]) + ] for order, log_mgf_value in zip(orders, log_mgfs): # Use Chernoff bound to update the upper/lower bound. See equation (5) in @@ -280,17 +281,16 @@ input_list, num_times, tail_mass_truncation) # Use FFT to compute the convolution - fast_len = fft.next_fast_len(truncation_upper_bound - truncation_lower_bound + - 1) + output_len = truncation_upper_bound - truncation_lower_bound + 1 + fast_len = fft.next_fast_len(max(output_len, len(input_list))) truncated_convolution_output = np.real( fft.ifft(fft.fft(input_list, fast_len)**num_times)) # Discrete Fourier Transform wraps around modulo fast_len. Extract the output # values in the range of interest. - output_list = [ - truncated_convolution_output[i % fast_len] - for i in range(truncation_lower_bound, truncation_upper_bound + 1) - ] + output_list = np.roll( + truncated_convolution_output, -truncation_lower_bound + )[:output_len] return truncation_lower_bound, output_list
diff --git a/python/dp_accounting/pld/common_test.py b/python/dp_accounting/pld/common_test.py index ce5666e..cdfac8a 100644 --- a/python/dp_accounting/pld/common_test.py +++ b/python/dp_accounting/pld/common_test.py
@@ -15,6 +15,7 @@ import math import unittest +from unittest import mock from absl.testing import parameterized from dp_accounting.pld import common @@ -230,6 +231,16 @@ self.assertEqual(min_val, expected_min_val) self.assertSequenceAlmostEqual(expected_result_list, result_list) + @mock.patch.object( + common, 'compute_self_convolve_bounds', return_value=(6, 6) + ) + def test_compute_self_convolve_with_too_small_truncation(self, _): + # When the truncation bounds returned from compute_self_convolve_bounds are + # too small, the input should not be truncated. + min_val, result_list = common.self_convolve([0, 0, 1], 3) + self.assertEqual(min_val, 6) + self.assertSequenceAlmostEqual([1], result_list) + @parameterized.parameters( (5, 7, 3, 8.60998489), (0.5, 3, 0.1, 2.31676098),
diff --git a/python/dp_accounting/pld/pld_pmf.py b/python/dp_accounting/pld/pld_pmf.py index 51af04a..6063d8c 100644 --- a/python/dp_accounting/pld/pld_pmf.py +++ b/python/dp_accounting/pld/pld_pmf.py
@@ -25,6 +25,7 @@ import numbers from typing import Iterable, List, Mapping, Sequence, Tuple, Union import numpy as np +import numpy.typing from scipy import signal from dp_accounting.pld import common @@ -552,6 +553,96 @@ pessimistic_estimate) +def create_pmf_pessimistic_connect_dots( + discretization: float, + rounded_epsilons: numpy.typing.ArrayLike, # dtype int, shape (n,) + deltas: numpy.typing.ArrayLike, # dtype float, shape (n,) + ) -> PLDPmf: + """Returns PLD probability mass function using pessimistic Connect-the-Dots. + + This method uses Algorithm 1 (PLD Discretization) from the following paper: + Title: Connect the Dots: Tighter Discrete Approximations of Privacy Loss + Distributions + Authors: V. Doroshenko, B. Ghazi, P. Kamath, R. Kumar, P. Manurangsi + Link: https://arxiv.org/abs/2207.04380 + + Given a set of (finite) epsilon values that the PLD should be supported on + (in addition to +infinity), the probability mass is computed as follows: + Suppose desired epsilon values are [eps_1, ... , eps_n]. + Let eps_0 = -infinity for convenience. + Let delta_i = eps_i-hockey stick divergence for the mechanism. + (Note: delta_0 = 1) + + The probability mass on eps_i (1 <= i <= n-1) is given as: + ((delta_i - delta_{i-1}) / (exp(eps_{i-1} - eps_i) - 1) + + (delta_{i+1} - delta_i) / (exp(eps_{i+1} - eps_i) - 1)) + The probability mass on eps_n is given as: + (delta_n - delta_{n-1}) / (exp(eps_{n-1} - eps_n) - 1) + The probability mass on +infinity is simply delta_n. + + Args: + discretization: The length of the discretization interval, such that all + epsilon values in the support of the privacy loss distribution are integer + multiples of it. + rounded_epsilons: The desired support of the privacy loss distribution + specified as a strictly increasing sequence of integer values. The support + will be given by these values times discretization. + deltas: The delta values corresponding to the epsilon values. These values + must be in non-increasing order, due to the nature of hockey stick + divergence. + + Returns: + The pessimistic Connect-the-Dots privacy loss distribution supported on + specified epsilons. + + Raises: + ValueError: If any of the following hold: + - rounded_epsilons and deltas do not have the same length, or if one of + them is empty, or + - if rounded_epsilons are not in strictly increasing order, or + - if deltas are not in non-increasing order. + """ + rounded_epsilons = np.asarray(rounded_epsilons, dtype=int) + deltas = np.asarray(deltas, dtype=float) + + if (rounded_epsilons.size != deltas.size or rounded_epsilons.size == 0): + raise ValueError('Length of rounded_epsilons and deltas are either unequal ' + f'or zero: {rounded_epsilons=}, {deltas=}.') + + # Notation: epsilons = [eps_1, ... , eps_n] + # epsilon_diffs = [eps_2 - eps_1, ... , eps_n - eps_{n-1}] + epsilon_diffs = np.diff(rounded_epsilons) * discretization + if np.any(epsilon_diffs <= 0): + raise ValueError('rounded_epsilons are not in strictly increasing order: ' + f'{rounded_epsilons=}.') + + # Notation: deltas = [delta_1, ... , delta_n] + # delta_diffs = [delta_2 - delta_1, ... , delta_n - delta_{n-1}] + delta_diffs = np.diff(deltas) + if np.any(delta_diffs > 0): + raise ValueError(f'deltas are not in non-increasing order: {deltas=}.') + + # delta_diffs_scaled_v1 = [y_0, y_1, ..., y_{n-1}] + # where y_i = (delta_{i+1} - delta_i) / (exp(eps_i - eps_{i+1}) - 1) + # and y_0 = (1 - delta_1) + delta_diffs_scaled_v1 = np.append(1 - deltas[0], + delta_diffs / np.expm1(- epsilon_diffs)) + + # delta_diffs_scaled_v2 = [z_1, z_2, ..., z_{n-1}, z_n] + # where z_i = (delta_{i+1} - delta_i) / (exp(eps_{i+1} - eps_i) - 1) + # and z_n = 0 + delta_diffs_scaled_v2 = np.append(delta_diffs / np.expm1(epsilon_diffs), 0.0) + + # PLD contains eps_i with probability mass y_{i-1} + z_i, and + # infinity with probability mass delta_n + return create_pmf( + loss_probs=dict(zip(rounded_epsilons, + delta_diffs_scaled_v1 + delta_diffs_scaled_v2)), + discretization=discretization, + infinity_mass=deltas[-1], + pessimistic_estimate=True) + + def compose_pmfs(pmf1: PLDPmf, pmf2: PLDPmf, tail_mass_truncation: float = 0) -> PLDPmf:
diff --git a/python/dp_accounting/pld/pld_pmf_test.py b/python/dp_accounting/pld/pld_pmf_test.py index f597930..4e22c73 100644 --- a/python/dp_accounting/pld/pld_pmf_test.py +++ b/python/dp_accounting/pld/pld_pmf_test.py
@@ -54,6 +54,16 @@ test_util.assert_dictionary_almost_equal(self, expected_loss_probs, sparse_pmf._loss_probs) + def _check_sparse_pld_pmf_equal(self, sparse_pmf: pld_pmf.SparsePLDPmf, + discretization: float, infinity_mass: float, + loss_probs: dict[int, float], + pessimistic_estimate: bool): + self.assertEqual(discretization, sparse_pmf._discretization) + self.assertAlmostEqual(infinity_mass, sparse_pmf._infinity_mass) + self.assertEqual(pessimistic_estimate, sparse_pmf._pessimistic_estimate) + test_util.assert_dictionary_almost_equal(self, loss_probs, + sparse_pmf._loss_probs) + @parameterized.parameters(False, True) def test_delta_for_epsilon(self, dense: bool): discretization = 0.1 @@ -409,6 +419,90 @@ self.assertEqual(num_points, pmf.size) + @parameterized.named_parameters( + dict(testcase_name='empty', + discretization=0.1, + rounded_epsilons=np.array([]), + deltas=np.array([]), + error_message='unequal or zero'), + dict(testcase_name='unequal_length', + discretization=0.1, + rounded_epsilons=np.array([-1, 1]), + deltas=np.array([0.5]), + error_message='unequal or zero'), + dict(testcase_name='non_sorted_epsilons', + discretization=0.1, + rounded_epsilons=np.array([1, -1]), + deltas=np.array([0, 0.5]), + error_message='not in strictly increasing order'), + dict(testcase_name='non_monotone_deltas', + discretization=0.1, + rounded_epsilons=np.array([-1, 0, 1]), + deltas=np.array([0.5, 0, 0.1]), + error_message='not in non-increasing order'), + ) + def test_connect_dots_pmf_value_errors( + self, discretization, rounded_epsilons, deltas, error_message): + with self.assertRaisesRegex(ValueError, error_message): + pld_pmf.create_pmf_pessimistic_connect_dots(discretization, + rounded_epsilons, + deltas) + + @parameterized.named_parameters( + dict(testcase_name='trivial', + # mu_upper = [1] + # mu_lower = [1] + discretization=0.1, + rounded_epsilons=np.array([0]), + deltas=np.array([0.0]), + expected_loss_probs={0: 1.0}, + expected_infinity_mass=0.0), + dict(testcase_name='rr_eps=0.1', + # mu_upper = [1/(1+e^{-0.1}), 1/(1+e^{0.1})] + # mu_lower = [1/(1+e^{0.1}), 1/(1+e^{-0.1})] + discretization=0.1, + rounded_epsilons=np.array([-1, 1]), + deltas=np.array([1 - np.exp(-0.1), 0.0]), + expected_loss_probs={-1: 1/(1 + np.exp(0.1)), + 1: 1/(1 + np.exp(-0.1))}, + expected_infinity_mass=0.0), + dict(testcase_name='rr_eps=0.1_abort_w_p_0.5', + # mu_upper = [0.5/(1+e^{-0.1}), 0.5, 0.5/(1+e^{0.1})] + # mu_lower = [0.5/(1+e^{0.1}), 0.5, 0.5/(1+e^{-0.1})] + discretization=0.1, + rounded_epsilons=np.array([-1, 0, 1]), + deltas=np.array([1 - np.exp(-0.1), + 0.5*(np.exp(0.1)-1)/(np.exp(0.1)+1), + 0.0]), + expected_loss_probs={-1: 0.5/(1 + np.exp(0.1)), + 0: 0.5, + 1: 0.5/(1 + np.exp(-0.1))}, + expected_infinity_mass=0.0), + dict(testcase_name='rr_eps=0.1_delta=0.5', + # mu_upper = [0.5, 0.5/(1+e^{-0.1}), 0.5/(1+e^{0.1}), 0.0] + # mu_lower = [0.0, 0.5/(1+e^{0.1}), 0.5/(1+e^{-0.1}), 0.5] + discretization=0.1, + rounded_epsilons=np.array([-1, 0, 1]), + deltas=np.array([1 - 0.5*np.exp(-0.1), + 0.5 + 0.5*(np.exp(0.1)-1)/(np.exp(0.1)+1), + 0.5]), + expected_loss_probs={-1: 0.5/(1 + np.exp(0.1)), + 0: 0.0, + 1: 0.5/(1 + np.exp(-0.1))}, + expected_infinity_mass=0.5), + ) + def test_connect_dots_pmf_creation( + self, discretization, rounded_epsilons, deltas, + expected_loss_probs, expected_infinity_mass): + pmf = pld_pmf.create_pmf_pessimistic_connect_dots(discretization, + rounded_epsilons, + deltas) + self._check_sparse_pld_pmf_equal(pmf, + discretization, + expected_infinity_mass, + expected_loss_probs, + pessimistic_estimate=True) + @parameterized.parameters((1, 100, True), (10, 100, True), (1, 1001, False), (10, 101, False), (1001, 1, False), (1001, 1001, False))
diff --git a/python/dp_accounting/pld/privacy_loss_distribution.py b/python/dp_accounting/pld/privacy_loss_distribution.py index 1629707..9a478ea 100644 --- a/python/dp_accounting/pld/privacy_loss_distribution.py +++ b/python/dp_accounting/pld/privacy_loss_distribution.py
@@ -787,7 +787,8 @@ additive_noise_privacy_loss: 'privacy_loss_mechanism.AdditiveNoisePrivacyLoss', pessimistic_estimate: bool = True, - value_discretization_interval: float = 1e-4) -> pld_pmf.PLDPmf: + value_discretization_interval: float = 1e-4, + use_connect_dots: bool = False) -> pld_pmf.PLDPmf: """Constructs the privacy loss distribution of an additive noise mechanism. An additive noise mechanism for computing a scalar-valued function f is a @@ -795,6 +796,11 @@ drawn from a certain distribution mu. This function calculates the privacy loss distribution for such an additive noise mechanism. + This method supports two algorithms for constructing the privacy loss + distribution. One given by the "Privacy Buckets" algorithm and other given by + "Connect the Dots" algorithm. See Sections 2.1 and 2.2 of supplementary + material for more details. + Args: additive_noise_privacy_loss: the privacy loss representation of the mechanism. @@ -806,13 +812,38 @@ integer multiples of this number. Smaller value results in more accurate estimates of the privacy loss, at the cost of increased run-time / memory usage. + use_connect_dots: when False (default), the privacy buckets algorithm will + be used to construct the privacy loss distribution. When True, the + connect-the-dots algorithm would be used. Returns: The privacy loss distribution constructed as specified. + + Raises: + ValueError if use_connect_dots=True and pessimistic_estimate=False. """ + if use_connect_dots: + if not pessimistic_estimate: + raise ValueError('Current implementation does not support pessimistic' + '_estimate=False when use_connect_dots=True.') + connect_dots_bounds = additive_noise_privacy_loss.connect_dots_bounds() + rounded_epsilon_upper = math.ceil(connect_dots_bounds.epsilon_upper / + value_discretization_interval) + rounded_epsilon_lower = math.floor(connect_dots_bounds.epsilon_lower / + value_discretization_interval) + rounded_epsilon_values = np.arange(rounded_epsilon_lower, + rounded_epsilon_upper + 1, dtype=int) + + delta_values = additive_noise_privacy_loss.get_delta_for_epsilon( + rounded_epsilon_values * value_discretization_interval) + + return pld_pmf.create_pmf_pessimistic_connect_dots( + value_discretization_interval, rounded_epsilon_values, delta_values) + round_fn = math.ceil if pessimistic_estimate else math.floor tail_pld = additive_noise_privacy_loss.privacy_loss_tail() + lower_x, upper_x = tail_pld.lower_x_truncation, tail_pld.upper_x_truncation rounded_probability_mass_function = collections.defaultdict(lambda: 0) infinity_mass = tail_pld.tail_probability_mass_function.get(math.inf, 0) @@ -823,10 +854,7 @@ )] += tail_pld.tail_probability_mass_function[privacy_loss] if additive_noise_privacy_loss.discrete_noise: - xs = list( - range( - math.ceil(tail_pld.lower_x_truncation) - 1, - math.floor(tail_pld.upper_x_truncation) + 1)) + xs = list(range(math.ceil(lower_x) - 1, math.floor(upper_x) + 1)) # Compute PMF for the x's. Note that a vectorized call to mu_upper_cdf can # be much faster than many scalar calls. @@ -838,20 +866,18 @@ additive_noise_privacy_loss.privacy_loss(x) / value_discretization_interval)] += prob else: - lower_x = tail_pld.lower_x_truncation rounded_down_value = math.floor( additive_noise_privacy_loss.privacy_loss(lower_x) / value_discretization_interval) - upper_x_privacy_loss = additive_noise_privacy_loss.privacy_loss( - tail_pld.upper_x_truncation) + upper_x_privacy_loss = additive_noise_privacy_loss.privacy_loss(upper_x) # Compute discretization intervals for PLD approximation. xs, rounded_values = [lower_x], [] x = lower_x - while x < tail_pld.upper_x_truncation: + while x < upper_x: if (value_discretization_interval * rounded_down_value <= upper_x_privacy_loss): - x = tail_pld.upper_x_truncation + x = upper_x else: x = additive_noise_privacy_loss.inverse_privacy_loss( value_discretization_interval * rounded_down_value) @@ -1053,9 +1079,15 @@ sensitivity: float = 1, pessimistic_estimate: bool = True, value_discretization_interval: float = 1e-4, - sampling_prob: float = 1.0) -> PrivacyLossDistribution: + sampling_prob: float = 1.0, + use_connect_dots: bool = False) -> PrivacyLossDistribution: """Computes the privacy loss distribution of the Laplace mechanism. + This method supports two algorithms for constructing the privacy loss + distribution. One given by the "Privacy Buckets" algorithm and other given by + "Connect the Dots" algorithm. See Sections 2.1 and 2.2 of supplementary + material for more details. + Args: parameter: the parameter of the Laplace distribution. sensitivity: the sensitivity of function f. (i.e. the maximum absolute @@ -1069,10 +1101,16 @@ estimates of the privacy loss, at the cost of increased run-time / memory usage. sampling_prob: sub-sampling probability, a value in (0,1]. + use_connect_dots: when False (default), the privacy buckets algorithm will + be used to construct the privacy loss distribution. When True, the + connect-the-dots algorithm would be used. Returns: The privacy loss distribution corresponding to the Laplace mechanism with given parameters. + + Raises: + ValueError if use_connect_dots=True and pessimistic_estimate=False. """ def single_laplace_pld( @@ -1084,7 +1122,8 @@ sampling_prob=sampling_prob, adjacency_type=adjacency_type), pessimistic_estimate=pessimistic_estimate, - value_discretization_interval=value_discretization_interval) + value_discretization_interval=value_discretization_interval, + use_connect_dots=use_connect_dots) return _pld_for_subsampled_mechanism(single_laplace_pld, sampling_prob) @@ -1095,9 +1134,15 @@ pessimistic_estimate: bool = True, value_discretization_interval: float = 1e-4, log_mass_truncation_bound: float = -50, - sampling_prob: float = 1.0) -> PrivacyLossDistribution: + sampling_prob: float = 1.0, + use_connect_dots: bool = False) -> PrivacyLossDistribution: """Creates the privacy loss distribution of the Gaussian mechanism. + This method supports two algorithms for constructing the privacy loss + distribution. One given by the "Privacy Buckets" algorithm and other given by + "Connect the Dots" algorithm. See Sections 2.1 and 2.2 of supplementary + material for more details. + Args: standard_deviation: the standard_deviation of the Gaussian distribution. sensitivity: the sensitivity of function f. (i.e. the maximum absolute @@ -1114,10 +1159,16 @@ discarded from the noise distribution. The larger this number, the more error it may introduce in divergence calculations. sampling_prob: sub-sampling probability, a value in (0,1]. + use_connect_dots: when False (default), the privacy buckets algorithm will + be used to construct the privacy loss distribution. When True, the + connect-the-dots algorithm would be used. Returns: The privacy loss distribution corresponding to the Gaussian mechanism with given parameters. + + Raises: + ValueError if use_connect_dots=True and pessimistic_estimate=False. """ def single_gaussian_pld( @@ -1131,7 +1182,8 @@ sampling_prob=sampling_prob, adjacency_type=adjacency_type), pessimistic_estimate=pessimistic_estimate, - value_discretization_interval=value_discretization_interval) + value_discretization_interval=value_discretization_interval, + use_connect_dots=use_connect_dots) return _pld_for_subsampled_mechanism(single_gaussian_pld, sampling_prob) @@ -1141,9 +1193,15 @@ sensitivity: int = 1, pessimistic_estimate: bool = True, value_discretization_interval: float = 1e-4, - sampling_prob: float = 1.0) -> PrivacyLossDistribution: + sampling_prob: float = 1.0, + use_connect_dots: bool = False) -> PrivacyLossDistribution: """Computes the privacy loss distribution of the Discrete Laplace mechanism. + This method supports two algorithms for constructing the privacy loss + distribution. One given by the "Privacy Buckets" algorithm and other given by + "Connect the Dots" algorithm. See Sections 2.1 and 2.2 of supplementary + material for more details. + Args: parameter: the parameter of the discrete Laplace distribution. sensitivity: the sensitivity of function f. (i.e. the maximum absolute @@ -1157,10 +1215,16 @@ estimates of the privacy loss, at the cost of increased run-time / memory usage. sampling_prob: sub-sampling probability, a value in (0,1]. + use_connect_dots: when False (default), the privacy buckets algorithm will + be used to construct the privacy loss distribution. When True, the + connect-the-dots algorithm would be used. Returns: The privacy loss distribution corresponding to the Discrete Laplace mechanism with given parameters. + + Raises: + ValueError if use_connect_dots=True and pessimistic_estimate=False. """ def single_discrete_laplace_pld( @@ -1172,7 +1236,8 @@ sampling_prob=sampling_prob, adjacency_type=adjacency_type), pessimistic_estimate=pessimistic_estimate, - value_discretization_interval=value_discretization_interval) + value_discretization_interval=value_discretization_interval, + use_connect_dots=use_connect_dots) return _pld_for_subsampled_mechanism(single_discrete_laplace_pld, sampling_prob) @@ -1184,9 +1249,15 @@ truncation_bound: Optional[int] = None, pessimistic_estimate: bool = True, value_discretization_interval: float = 1e-4, - sampling_prob: float = 1.0) -> PrivacyLossDistribution: + sampling_prob: float = 1.0, + use_connect_dots: bool = False) -> PrivacyLossDistribution: """Creates the privacy loss distribution of the discrete Gaussian mechanism. + This method supports two algorithms for constructing the privacy loss + distribution. One given by the "Privacy Buckets" algorithm and other given by + "Connect the Dots" algorithm. See Sections 2.1 and 2.2 of supplementary + material for more details. + Args: sigma: the parameter of the discrete Gaussian distribution. Note that unlike the (continuous) Gaussian distribution this is not equal to the @@ -1206,10 +1277,16 @@ estimates of the privacy loss, at the cost of increased run-time / memory usage. sampling_prob: sub-sampling probability, a value in (0,1]. + use_connect_dots: when False (default), the privacy buckets algorithm will + be used to construct the privacy loss distribution. When True, the + connect-the-dots algorithm would be used. Returns: The privacy loss distribution corresponding to the discrete Gaussian mechanism with given parameters. + + Raises: + ValueError if use_connect_dots=True and pessimistic_estimate=False. """ def single_discrete_gaussian_pld( @@ -1222,7 +1299,8 @@ sampling_prob=sampling_prob, adjacency_type=adjacency_type), pessimistic_estimate=pessimistic_estimate, - value_discretization_interval=value_discretization_interval) + value_discretization_interval=value_discretization_interval, + use_connect_dots=use_connect_dots) return _pld_for_subsampled_mechanism(single_discrete_gaussian_pld, sampling_prob)
diff --git a/python/dp_accounting/pld/privacy_loss_distribution_test.py b/python/dp_accounting/pld/privacy_loss_distribution_test.py index cd0766c..092ffb4 100644 --- a/python/dp_accounting/pld/privacy_loss_distribution_test.py +++ b/python/dp_accounting/pld/privacy_loss_distribution_test.py
@@ -344,6 +344,14 @@ parameter, sensitivity=sensitivity, value_discretization_interval=1, sampling_prob=sampling_prob) + def test_laplace_optimistic_connect_dots_value_error(self): + with self.assertRaisesRegex( + ValueError, + 'Current implementation does not support pessimistic_estimate=False ' + 'when use_connect_dots=True.'): + privacy_loss_distribution.from_laplace_mechanism( + 1, pessimistic_estimate=False, use_connect_dots=True) + @parameterized.parameters( # Tests with sampling_prob = 1 (1.0, 1.0, 1.0, { @@ -409,7 +417,86 @@ """Verifies correctness of pessimistic PLD for various parameter values.""" pld = privacy_loss_distribution.from_laplace_mechanism( parameter, sensitivity=sensitivity, value_discretization_interval=1, - sampling_prob=sampling_prob) + sampling_prob=sampling_prob, use_connect_dots=False) + + _assert_pld_pmf_equal(self, pld, + expected_rounded_pmf_add, 0.0, + expected_rounded_pmf_remove, 0.0) + + @parameterized.parameters( + # Tests with sampling_prob = 1 + (1.0, 1.0, 1.0, { + 1: 0.62245933, + 0: 0.14855068, + -1: 0.22898999 + }), + (3.0, 3.0, 1.0, { + 1: 0.62245933, + 0: 0.14855068, + -1: 0.22898999 + }), + (1.0, 2.0, 1.0, { + 2: 0.62245933, + 1: 0.14855068, + 0: 0.09010054, + -1: 0.05464874, + -2: 0.08424071 + }), + (2.0, 4.0, 1.0, { + 2: 0.62245933, + 1: 0.14855068, + 0: 0.09010054, + -1: 0.05464874, + -2: 0.08424071 + }), + # Tests with sampling_prob < 1 + (1.0, 1.0, 0.8, { + 1: 0.49796746, + 0: 0.31884054, + -1: 0.18319199 + }, { + 1: 0.49796746, + 0: 0.31884054, + -1: 0.18319199 + }), + (3.0, 3.0, 0.5, { + 1: 0.31122967, + 0: 0.57427534, + -1: 0.11449500, + }, { + 1: 0.31122967, + 0: 0.57427534, + -1: 0.11449500, + }), + (1.0, 2.0, 0.7, { + 1: 0.70000000, + 0: 0.17131139, + -1: 0.08129580, + -2: 0.04739281, + }, { + 2: 0.35018810, + 1: 0.22098490, + 0: 0.17131139, + -1: 0.25751561, + }), + (2.0, 4.0, 0.3, { + 1: 0.30000000, + 0: 0.59763391, + -1: 0.09942388, + -2: 0.00294221, + }, { + 2: 0.02174013, + 1: 0.27026212, + 0: 0.59763391, + -1: 0.11036383, + })) + def test_laplace_varying_parameter_and_sensitivity_connect_dots( + self, parameter, sensitivity, sampling_prob, + expected_rounded_pmf_add, expected_rounded_pmf_remove=None): + """Verifies correctness of connect_dots PLD for various parameter values.""" + pld = privacy_loss_distribution.from_laplace_mechanism( + parameter, sensitivity=sensitivity, value_discretization_interval=1, + sampling_prob=sampling_prob, use_connect_dots=True) _assert_pld_pmf_equal(self, pld, expected_rounded_pmf_add, 0.0, @@ -435,10 +522,36 @@ expected_rounded_pmf): """Verifies correctness of pessimistic PLD for varying discretization.""" pld = privacy_loss_distribution.from_laplace_mechanism( - 1, value_discretization_interval=value_discretization_interval) + 1, value_discretization_interval=value_discretization_interval, + use_connect_dots=False) _assert_pld_pmf_equal(self, pld, expected_rounded_pmf, 0.0) + @parameterized.parameters((0.5, { + 2: 0.56217650, + 1: 0.09684622, + 0: 0.07542391, + -1: 0.05874020, + -2: 0.20681318, + }), (0.3, { + 4: 0.18817131, + 3: 0.37181835, + 2: 0.06128993, + 1: 0.05275273, + 0: 0.04540470, + -1: 0.03908019, + -2: 0.03363663, + -3: 0.15117006, + -4: 0.05667611, + })) + def test_laplace_discretization_connect_dots( + self, value_discretization_interval, expected_rounded_pmf): + """Verifies correctness of connect_dots PLD for varying discretization.""" + pld = privacy_loss_distribution.from_laplace_mechanism( + 1, value_discretization_interval=value_discretization_interval, + use_connect_dots=True) + _assert_pld_pmf_equal(self, pld, expected_rounded_pmf, 0.0) + @parameterized.parameters( # Tests with sampling_prob = 1 (1.0, 1.0, 1.0, { @@ -491,11 +604,9 @@ expected_rounded_pmf_add, expected_rounded_pmf_remove=None): """Verifies correctness of optimistic PLD for various parameter values.""" pld = privacy_loss_distribution.from_laplace_mechanism( - parameter=parameter, - sensitivity=sensitivity, - pessimistic_estimate=False, - value_discretization_interval=1, - sampling_prob=sampling_prob) + parameter=parameter, sensitivity=sensitivity, + pessimistic_estimate=False, value_discretization_interval=1, + sampling_prob=sampling_prob, use_connect_dots=False) _assert_pld_pmf_equal(self, pld, expected_rounded_pmf_add, 0.0, @@ -516,6 +627,14 @@ value_discretization_interval=1, sampling_prob=sampling_prob) + def test_gaussian_optimistic_connect_dots_value_error(self): + with self.assertRaisesRegex( + ValueError, + 'Current implementation does not support pessimistic_estimate=False ' + 'when use_connect_dots=True.'): + privacy_loss_distribution.from_gaussian_mechanism( + 1, pessimistic_estimate=False, use_connect_dots=True) + @parameterized.parameters( # Tests with sampling_prob = 1 (1.0, 1.0, 1.0, { @@ -600,7 +719,8 @@ sensitivity=sensitivity, log_mass_truncation_bound=math.log(2) + stats.norm.logcdf(-0.9), value_discretization_interval=1, - sampling_prob=sampling_prob) + sampling_prob=sampling_prob, + use_connect_dots=False) test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, pld._pmf_add._loss_probs) # pytype: disable=attribute-error @@ -616,6 +736,111 @@ pld._pmf_remove._infinity_mass) self.assertFalse(pld._symmetric) + @parameterized.parameters( + # Tests with sampling_prob = 1 + (1.0, 1.0, 1.0, { + 2: 0.167710257, + 1: 0.343270190, + 0: 0.319116756, + -1: 0.126282046, + -2: 0.022697115, + }, 0.020923636), + (5.0, 5.0, 1.0, { + 2: 0.167710257, + 1: 0.343270190, + 0: 0.319116756, + -1: 0.126282046, + -2: 0.022697115, + }, 0.020923636), + (1.0, 2.0, 1.0, { + 4: 0.156393834, + 3: 0.176732821, + 2: 0.195352391, + 1: 0.169838899, + 0: 0.116146963, + -1: 0.062480239, + -2: 0.026438071, + -3: 0.008799009, + -4: 0.002864453, + }, 0.084953319), + (3.0, 6.0, 1.0, { + 4: 0.156393834, + 3: 0.176732821, + 2: 0.195352391, + 1: 0.169838899, + 0: 0.116146963, + -1: 0.062480239, + -2: 0.026438071, + -3: 0.008799009, + -4: 0.002864453, + }, 0.084953319), + # Tests with sampling_prob < 1 + (1.0, 1.0, 0.8, { + 1: 0.448021700, + 0: 0.398104072, + -1: 0.115544309, + -2: 0.015193708, + }, 0.023136210, { + 2: 0.112267164, + 1: 0.314081995, + 0: 0.398104072, + -1: 0.164817973, + }, 0.010728796), + (5.0, 5.0, 0.6, { + 1: 0.363466985, + 0: 0.528456643, + -1: 0.099555157, + -2: 0.008521215, + }, 0.000000000, { + 2: 0.062963737, + 1: 0.270618975, + 0: 0.528456643, + -1: 0.133712031, + }, 0.004248614), + (1.0, 2.0, 0.4, { + 1: 0.431999550, + 0: 0.499732772, + -1: 0.052519150, + -2: 0.012224135, + -3: 0.003524394, + }, 0.000000000, { + 3: 0.070789342, + 2: 0.090324817, + 1: 0.142761851, + 0: 0.499732772, + -1: 0.158923753, + }, 0.037467466), + (3.0, 6.0, 0.2, { + 1: 0.215999775, + 0: 0.738200118, + -1: 0.038917231, + -2: 0.005650510, + -3: 0.001232367, + }, 0.000000000, { + 3: 0.024752755, + 2: 0.041751932, + 1: 0.105788001, + 0: 0.738200118, + -1: 0.079461876, + }, 0.010045317)) + def test_gaussian_varying_standard_deviation_and_sensitivity_connect_dots( + self, standard_deviation, sensitivity, sampling_prob, + expected_rounded_pmf_add, expected_infinity_mass_add, + expected_rounded_pmf_remove=None, expected_infinity_mass_remove=None): + """Verifies correctness of connect_dots PLD for various parameter values.""" + pld = privacy_loss_distribution.from_gaussian_mechanism( + standard_deviation, + sensitivity=sensitivity, + log_mass_truncation_bound=math.log(2) + stats.norm.logcdf(-0.9), + value_discretization_interval=1, + sampling_prob=sampling_prob, + use_connect_dots=True) + + _assert_pld_pmf_equal( + self, pld, + expected_rounded_pmf_add, expected_infinity_mass_add, + expected_rounded_pmf_remove, expected_infinity_mass_remove) + @parameterized.parameters((0.5, { 3: 0.12447741, 2: 0.19146246, @@ -641,13 +866,47 @@ pld = privacy_loss_distribution.from_gaussian_mechanism( 1, log_mass_truncation_bound=math.log(2) + stats.norm.logcdf(-0.9), - value_discretization_interval=value_discretization_interval) + value_discretization_interval=value_discretization_interval, + use_connect_dots=False) test_util.assert_almost_greater_equal(self, stats.norm.cdf(-0.9), pld._pmf_remove._infinity_mass) test_util.assert_dictionary_almost_equal( self, expected_rounded_pmf, pld._pmf_remove._loss_probs) # pytype: disable=attribute-error + @parameterized.parameters((0.5, 0.056696236, { + 3: 0.178515818, + 2: 0.175063076, + 1: 0.195400642, + 0: 0.171573378, + -1: 0.118516480, + -2: 0.064402107, + -3: 0.039832263, + }), (0.3, 0.056696236, { + 5: 0.143933223, + 4: 0.093801719, + 3: 0.110114456, + 2: 0.118295665, + 1: 0.116301523, + 0: 0.104639278, + -1: 0.086158287, + -2: 0.064922037, + -3: 0.044769197, + -4: 0.028252535, + -5: 0.032115843, + })) + def test_gaussian_discretization_connect_dots( + self, value_discretization_interval, expected_infinity_mass, + expected_rounded_pmf): + """Verifies correctness of pessimistic PLD for varying discretization.""" + pld = privacy_loss_distribution.from_gaussian_mechanism( + 1, + log_mass_truncation_bound=math.log(2) + stats.norm.logcdf(-0.9), + value_discretization_interval=value_discretization_interval, + use_connect_dots=True) + _assert_pld_pmf_equal( + self, pld, expected_rounded_pmf, expected_infinity_mass) + @parameterized.parameters( # Tests with sampling_prob = 1 (1.0, 1.0, 1.0, { @@ -733,7 +992,8 @@ pessimistic_estimate=False, log_mass_truncation_bound=math.log(2) + stats.norm.logcdf(-0.9), value_discretization_interval=1, - sampling_prob=sampling_prob) + sampling_prob=sampling_prob, + use_connect_dots=False) test_util.assert_dictionary_almost_equal(self, expected_rounded_pmf_add, pld._pmf_add._loss_probs) # pytype: disable=attribute-error @@ -754,7 +1014,8 @@ privacy_loss_distribution.from_gaussian_mechanism( 0.02, value_discretization_interval=1, - sampling_prob=0.1) + sampling_prob=0.1, + use_connect_dots=False) class DiscreteLaplacePrivacyLossDistributionTest(parameterized.TestCase): @@ -769,6 +1030,14 @@ parameter, sensitivity=sensitivity, value_discretization_interval=1, sampling_prob=sampling_prob) + def test_discrete_laplace_optimistic_connect_dots_value_error(self): + with self.assertRaisesRegex( + ValueError, + 'Current implementation does not support pessimistic_estimate=False ' + 'when use_connect_dots=True.'): + privacy_loss_distribution.from_discrete_laplace_mechanism( + 1, pessimistic_estimate=False, use_connect_dots=True) + @parameterized.parameters( # Tests with sampling_prob = 1 (1.0, 1, 1, { @@ -830,7 +1099,87 @@ """Verifies correctness of pessimistic PLD for various parameter values.""" pld = privacy_loss_distribution.from_discrete_laplace_mechanism( parameter, sensitivity=sensitivity, value_discretization_interval=1, - sampling_prob=sampling_prob) + sampling_prob=sampling_prob, use_connect_dots=False) + + _assert_pld_pmf_equal(self, pld, + expected_rounded_pmf_add, 0.0, + expected_rounded_pmf_remove, 0.0) + + @parameterized.parameters( + # Tests with sampling_prob = 1 + (1.0, 1, 1, { + 1: 0.731058579, + -1: 0.268941421, + }), + (1.0, 2, 1, { + 2: 0.731058579, + 0: 0.170003402, + -2: 0.098938020 + }), + (0.8, 2, 1, { + 2: 0.492482728, + 1: 0.197491753, + 0: 0.170722074, + -1: 0.072653156, + -2: 0.066650289, + }), + (0.8, 3, 1, { + 3: 0.359853436, + 2: 0.330121045, + 1: 0.148724321, + 0: 0.043995505, + -1: 0.054712620, + -2: 0.044677025, + -3: 0.017916048, + }), + # # Tests with sampling_prob < 1 + (1.0, 1, 0.8, { + 1: 0.584846863, + 0: 0.200000000, + -1: 0.215153137, + }, { + 1: 0.584846863, + 0: 0.200000000, + -1: 0.215153137, + }), + (1.0, 2, 0.5, { + 1: 0.500000000, + 0: 0.401061980, + -1: 0.067667642, + -2: 0.031270378, + }, { + 2: 0.231058579, + 1: 0.183939721, + 0: 0.401061980, + -1: 0.183939721, + }), + (0.8, 2, 0.3, { + 1: 0.261344626, + 0: 0.642512060, + -1: 0.096143315, + }, { + 1: 0.261344626, + 0: 0.642512060, + -1: 0.096143315, + }), + (0.8, 3, 0.2, { + 1: 0.228245419, + 0: 0.698218984, + -1: 0.069698173, + -2: 0.003837424, + }, { + 2: 0.028354943, + 1: 0.189459276, + 0: 0.698218984, + -1: 0.083966797, + })) + def test_discrete_laplace_varying_parameter_and_sensitivity_connect_dots( + self, parameter, sensitivity, sampling_prob, + expected_rounded_pmf_add, expected_rounded_pmf_remove=None): + """Verifies correctness of pessimistic PLD for various parameter values.""" + pld = privacy_loss_distribution.from_discrete_laplace_mechanism( + parameter, sensitivity=sensitivity, value_discretization_interval=1, + sampling_prob=sampling_prob, use_connect_dots=True) _assert_pld_pmf_equal(self, pld, expected_rounded_pmf_add, 0.0, @@ -848,10 +1197,29 @@ expected_rounded_pmf): """Verifies correctness of pessimistic PLD for varying discretization.""" pld = privacy_loss_distribution.from_discrete_laplace_mechanism( - 1, value_discretization_interval=value_discretization_interval) + 1, value_discretization_interval=value_discretization_interval, + use_connect_dots=False) _assert_pld_pmf_equal(self, pld, expected_rounded_pmf, 0.0) + @parameterized.parameters((0.1, { + 10: 0.731058579, + -10: 0.268941421 + }), (0.03, { + 34: 0.246127076, + 33: 0.484931503, + -33: 0.180189243, + -34: 0.088752178, + })) + def test_discrete_laplace_discretization_connect_dots( + self, value_discretization_interval, + expected_rounded_pmf): + """Verifies correctness of pessimistic PLD for varying discretization.""" + pld = privacy_loss_distribution.from_discrete_laplace_mechanism( + 1, value_discretization_interval=value_discretization_interval, + use_connect_dots=True) + _assert_pld_pmf_equal(self, pld, expected_rounded_pmf, 0.0) + @parameterized.parameters( # Tests with sampling_prob = 1 (1.0, 1, 1, { @@ -913,7 +1281,7 @@ pld = privacy_loss_distribution.from_discrete_laplace_mechanism( parameter, sensitivity=sensitivity, value_discretization_interval=1, pessimistic_estimate=False, - sampling_prob=sampling_prob) + sampling_prob=sampling_prob, use_connect_dots=False) _assert_pld_pmf_equal(self, pld, expected_rounded_pmf_add, 0.0, @@ -932,6 +1300,14 @@ sigma, sensitivity=sensitivity, truncation_bound=1, sampling_prob=sampling_prob) + def test_discrete_gaussian_optimistic_connect_dots_value_error(self): + with self.assertRaisesRegex( + ValueError, + 'Current implementation does not support pessimistic_estimate=False ' + 'when use_connect_dots=True.'): + privacy_loss_distribution.from_discrete_gaussian_mechanism( + 1, pessimistic_estimate=False, use_connect_dots=True) + @parameterized.parameters( # Tests with sampling_prob = 1 (1.0, 1, 1.0, { @@ -978,7 +1354,76 @@ """Verifies correctness of pessimistic PLD for various parameter values.""" pld = privacy_loss_distribution.from_discrete_gaussian_mechanism( sigma, sensitivity=sensitivity, truncation_bound=1, - sampling_prob=sampling_prob) + sampling_prob=sampling_prob, use_connect_dots=False) + + _assert_pld_pmf_equal( + self, pld, + expected_rounded_pmf_add, expected_infinity_mass_add, + expected_rounded_pmf_remove, expected_infinity_mass_remove) + + @parameterized.parameters( + # Tests with sampling_prob = 1 + (1.0, 1, 1.0, { + -5000: 0.274068619, + 5000: 0.451862762 + }, 0.274068619), + (1.0, 2, 1.0, { + 0: 0.27406862 + }, 0.72593138), + (3.0, 1, 1.0, { + -556: 0.181720640, + -555: 0.145383781, + 555: 0.153680690, + 556: 0.192110468, + }, 0.327104421), + # Tests with sampling_prob < 1 + (1.0, 1, 0.6, { + -3288: 0.141485241, + -3287: 0.132583378, + 2692: 0.025722139, + 2693: 0.426140623, + 9162: 0.025399872, + 9163: 0.248668747, + }, 0.0, { + 3288: 0.196565440, + 3287: 0.184179664, + -2692: 0.019651469, + -2693: 0.325534808, + -9162: 0.010160871, + -9163: 0.099466577, + }, 0.164441171), + (1.0, 2, 0.3, { + 0: 0.274068619, + 3566: 0.181882996, + 3567: 0.544048385, + }, 0.0, { + -3567: 0.380824327, + -3566: 0.127327639, + 0: 0.274068619, + }, 0.217779414), + (3.0, 1, 0.1, { + -57: 0.315715606, + -56: 0.011388815, + 54: 0.281098525, + 55: 0.064692633, + 1053: 0.129151121, + 1054: 0.197953300, + }, 0.0, { + -1054: 0.178150936, + -1053: 0.116243043, + -55: 0.064337801, + -54: 0.279584684, + 56: 0.011452771, + 57: 0.317520323, + }, 0.032710442)) + def test_discrete_gaussian_varying_sigma_and_sensitivity_connect_dots( + self, sigma, sensitivity, sampling_prob, + expected_rounded_pmf_add, expected_infinity_mass_add, + expected_rounded_pmf_remove=None, expected_infinity_mass_remove=None): + """Verifies correctness of pessimistic PLD for various parameter values.""" + pld = privacy_loss_distribution.from_discrete_gaussian_mechanism( + sigma, sensitivity=sensitivity, truncation_bound=1, + sampling_prob=sampling_prob, use_connect_dots=True) _assert_pld_pmf_equal( self, pld, @@ -1002,7 +1447,7 @@ self, truncation_bound, expected_rounded_pmf, expected_infinity_mass): """Verifies correctness of pessimistic PLD for varying truncation bound.""" pld = privacy_loss_distribution.from_discrete_gaussian_mechanism( - 1, truncation_bound=truncation_bound) + 1, truncation_bound=truncation_bound, use_connect_dots=False) _assert_pld_pmf_equal( self, pld, expected_rounded_pmf, expected_infinity_mass) @@ -1053,8 +1498,8 @@ """Verifies correctness of optimistic PLD for various parameter values.""" pld = privacy_loss_distribution.from_discrete_gaussian_mechanism( sigma, sensitivity=sensitivity, truncation_bound=1, - pessimistic_estimate=False, - sampling_prob=sampling_prob) + pessimistic_estimate=False, sampling_prob=sampling_prob, + use_connect_dots=False) _assert_pld_pmf_equal( self, pld,
diff --git a/python/dp_accounting/pld/privacy_loss_mechanism.py b/python/dp_accounting/pld/privacy_loss_mechanism.py index c4fd0fb..e5f4504 100644 --- a/python/dp_accounting/pld/privacy_loss_mechanism.py +++ b/python/dp_accounting/pld/privacy_loss_mechanism.py
@@ -24,7 +24,8 @@ import dataclasses import enum import math -from typing import Iterable, Mapping, Optional, Union +import numbers +from typing import Iterable, List, Mapping, Optional, Union import numpy as np from scipy import stats @@ -49,8 +50,8 @@ REMOVE = 'REMOVE' -@dataclasses.dataclass -class TailPrivacyLossDistribution(object): +@dataclasses.dataclass(frozen=True) +class TailPrivacyLossDistribution: """Representation of the tail of privacy loss distribution. Attributes: @@ -68,6 +69,18 @@ tail_probability_mass_function: Mapping[float, float] +@dataclasses.dataclass(frozen=True) +class ConnectDotsEpsilonBounds: + """Upper and lower bounds on epsilon to use for Connect-the-Dots algorithm. + + Attributes: + epsilon_upper: largest epsilon value to use in connect-the-dots algorithm. + epsilon_lower: smallest epsilon value to use in connect-the-dots algorithm. + """ + epsilon_upper: float + epsilon_lower: float + + class AdditiveNoisePrivacyLoss(metaclass=abc.ABCMeta): """Superclass for privacy loss of additive noise mechanisms. @@ -193,7 +206,8 @@ else: # Case: self.adjacency_type == AdjacencyType.REMOVE return self.noise_cdf(x) - def get_delta_for_epsilon(self, epsilon): + def get_delta_for_epsilon( + self, epsilon: Union[float, List[float]]) -> Union[float, List[float]]: """Computes the epsilon-hockey stick divergence of the mechanism. The epsilon-hockey stick divergence of the mechanism is the value of delta @@ -215,22 +229,32 @@ since mu_lower_cdf*exp(epsilon) is pointwise lower than mu_upper_cdf. Args: - epsilon: the epsilon in epsilon-hockey stick divergence. + epsilon: the epsilon, or list-like object of epsilon values, in + epsilon-hockey stick divergence. Returns: - A non-negative real number which is the epsilon-hockey stick divergence - of the mechanism. + A non-negative real number which is the epsilon-hockey stick divergence of + the mechanism, or a numpy array if epsilon is list-like. """ - if self.sampling_prob != 1.0: - if (self.adjacency_type == AdjacencyType.ADD and - epsilon >= -math.log(1 - self.sampling_prob)): - return 0.0 - if (self.adjacency_type == AdjacencyType.REMOVE and - epsilon <= math.log(1 - self.sampling_prob)): - return 1.0 - math.exp(epsilon) - x_cutoff = self.inverse_privacy_loss(epsilon) - return (self.mu_upper_cdf(x_cutoff) - - math.exp(epsilon) * self.mu_lower_cdf(x_cutoff)) + is_scalar = isinstance(epsilon, numbers.Number) + epsilon_values = np.array([epsilon]) if is_scalar else np.asarray(epsilon) + delta_values = np.zeros_like(epsilon_values, dtype=float) + if self.sampling_prob == 1.0: + inverse_indices = np.full_like(epsilon_values, True, dtype=bool) + elif self.adjacency_type == AdjacencyType.ADD: + inverse_indices = epsilon_values < -math.log(1 - self.sampling_prob) + else: # Case: self.adjacency_type == AdjacencyType.REMOVE + inverse_indices = epsilon_values > math.log(1 - self.sampling_prob) + other_indices = np.logical_not(inverse_indices) + delta_values[other_indices] = 1 - np.exp(epsilon_values[other_indices]) + x_cutoffs = np.array([ + self.inverse_privacy_loss(eps) + for eps in epsilon_values[inverse_indices] + ]) + delta_values[inverse_indices] = ( + self.mu_upper_cdf(x_cutoffs) - + np.exp(epsilon_values[inverse_indices]) * self.mu_lower_cdf(x_cutoffs)) + return float(delta_values) if is_scalar else delta_values @abc.abstractmethod def privacy_loss_tail(self) -> TailPrivacyLossDistribution: @@ -245,6 +269,15 @@ """ raise NotImplementedError + @abc.abstractmethod + def connect_dots_bounds(self) -> ConnectDotsEpsilonBounds: + """Computes the bounds on epsilon values to use in connect-the-dots algorithm. + + Returns: + A ConnectDotsEpsilonBounds instance containing upper and lower values of + epsilon to use in connect-the-dots algorithm. + """ + def privacy_loss(self, x: float) -> float: """Computes the privacy loss at a given point. @@ -540,6 +573,39 @@ 1 - self.mu_upper_cdf(upper_x_truncation) }) + def connect_dots_bounds(self) -> ConnectDotsEpsilonBounds: + """Computes the bounds on epsilon values to use in connect-the-dots algorithm. + + With sub-sampling probability of q, + For ADD adjacency type: + epsilon_upper = - log(1 - q + q * e^{-sensitivity / parameter}) + epsilon_lower = - log(1 - q + q * e^{sensitivity / parameter}) + + For REMOVE adjacency type: + epsilon_upper = log(1 - q + q * e^{sensitivity / parameter}) + epsilon_lower = log(1 - q + q * e^{-sensitivity / parameter}) + + Returns: + A ConnectDotsEpsilonBounds instance containing upper and lower values of + epsilon to use in connect-the-dots algorithm. + """ + max_epsilon = self.sensitivity / self._parameter + if self.sampling_prob == 1.0: + # For efficiency this case is handled separately. + return ConnectDotsEpsilonBounds(max_epsilon, -max_epsilon) + elif self.adjacency_type == AdjacencyType.ADD: + return ConnectDotsEpsilonBounds( + - math.log(1 - self.sampling_prob + + self.sampling_prob * math.exp(-max_epsilon)), + - math.log(1 - self.sampling_prob + + self.sampling_prob * math.exp(max_epsilon))) + else: # Case: self.adjacency_type == AdjacencyType.REMOVE + return ConnectDotsEpsilonBounds( + math.log(1 - self.sampling_prob + + self.sampling_prob * math.exp(max_epsilon)), + math.log(1 - self.sampling_prob + + self.sampling_prob * math.exp(-max_epsilon))) + def privacy_loss_without_subsampling(self, x: float) -> float: """Computes the privacy loss of the Laplace mechanism without sub-sampling at a given point. @@ -793,6 +859,25 @@ return TailPrivacyLossDistribution(lower_x_truncation, upper_x_truncation, tail_probability_mass_function) + def connect_dots_bounds(self) -> ConnectDotsEpsilonBounds: + """Computes the bounds on epsilon values to use in connect-the-dots algorithm. + + epsilon_upper = privacy_loss(lower_x_truncation) + epsilon_lower = privacy_loss(upper_x_truncation) + + where lower_x_truncation and upper_x_truncation are the lower and upper + values of trunction as given by privacy_loss_tail(). + + Returns: + A ConnectDotsEpsilonBounds instance containing upper and lower values of + epsilon to use in connect-the-dots algorithm. + """ + tail_pld = self.privacy_loss_tail() + + return ConnectDotsEpsilonBounds( + self.privacy_loss(tail_pld.lower_x_truncation), + self.privacy_loss(tail_pld.upper_x_truncation)) + def privacy_loss_without_subsampling(self, x: float) -> float: """Computes the privacy loss of the Gaussian mechanism without sub-sampling at a given point. @@ -1024,6 +1109,39 @@ 1 - self.mu_upper_cdf(upper_x_truncation) }) + def connect_dots_bounds(self) -> ConnectDotsEpsilonBounds: + """Computes the bounds on epsilon values to use in connect-the-dots algorithm. + + With sub-sampling probability of q, + For ADD adjacency type: + epsilon_upper = - log(1 - q + q * e^{-sensitivity * parameter}) + epsilon_lower = - log(1 - q + q * e^{sensitivity * parameter}) + + For REMOVE adjacency type: + epsilon_upper = log(1 - q + q * e^{sensitivity * parameter}) + epsilon_lower = log(1 - q + q * e^{-sensitivity * parameter}) + + Returns: + A ConnectDotsEpsilonBounds instance containing upper and lower values of + epsilon to use in connect-the-dots algorithm. + """ + max_epsilon = self.sensitivity * self._parameter + if self.sampling_prob == 1.0: + # For efficiency this case is handled separately. + return ConnectDotsEpsilonBounds(max_epsilon, -max_epsilon) + elif self.adjacency_type == AdjacencyType.ADD: + return ConnectDotsEpsilonBounds( + - math.log(1 - self.sampling_prob + + self.sampling_prob * math.exp(-max_epsilon)), + - math.log(1 - self.sampling_prob + + self.sampling_prob * math.exp(max_epsilon))) + else: # Case: self.adjacency_type == AdjacencyType.REMOVE + return ConnectDotsEpsilonBounds( + math.log(1 - self.sampling_prob + + self.sampling_prob * math.exp(max_epsilon)), + math.log(1 - self.sampling_prob + + self.sampling_prob * math.exp(-max_epsilon))) + def privacy_loss_without_subsampling(self, x: float) -> float: """Computes privacy loss of the discrete Laplace mechanism without sub-sampling at a given point. @@ -1287,6 +1405,25 @@ lower_x_truncation, upper_x_truncation, {math.inf: self.mu_upper_cdf(lower_x_truncation - 1)}) + def connect_dots_bounds(self) -> ConnectDotsEpsilonBounds: + """Computes the bounds on epsilon values to use in connect-the-dots algorithm. + + epsilon_upper = privacy_loss(lower_x_truncation) + epsilon_lower = privacy_loss(upper_x_truncation) + + where lower_x_truncation and upper_x_truncation are the lower and upper + values of trunction as given by privacy_loss_tail(). + + Returns: + A ConnectDotsEpsilonBounds instance containing upper and lower values of + epsilon to use in connect-the-dots algorithm. + """ + tail_pld = self.privacy_loss_tail() + + return ConnectDotsEpsilonBounds( + self.privacy_loss(tail_pld.lower_x_truncation), + self.privacy_loss(tail_pld.upper_x_truncation)) + def privacy_loss_without_subsampling(self, x: float) -> float: """Computes the privacy loss of the discrete Gaussian mechanism without sub-sampling at a given point.
diff --git a/python/dp_accounting/pld/privacy_loss_mechanism_test.py b/python/dp_accounting/pld/privacy_loss_mechanism_test.py index 11e30d0..8174aeb 100644 --- a/python/dp_accounting/pld/privacy_loss_mechanism_test.py +++ b/python/dp_accounting/pld/privacy_loss_mechanism_test.py
@@ -146,6 +146,36 @@ self, expected_tail_probability_mass_function, tail_pld.tail_probability_mass_function) + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1.0, 1.0, ADD, 1.0, -1.0), + (1.0, 2.0, 1.0, ADD, 2.0, -2.0), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1.0, 0.8, ADD, 0.704605471, -0.864839725), + (3.0, 3.0, 0.6, ADD, 0.476862836, -0.708513067), + (1.0, 2.0, 0.7, ADD, 0.929541390, -1.699706179), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1.0, 1.0, REM, 1.0, -1.0), + (1.0, 2.0, 1.0, REM, 2.0, -2.0), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1.0, 0.8, REM, 0.864839725, -0.704605471), + (3.0, 3.0, 0.6, REM, 0.708513067, -0.476862836), + (1.0, 2.0, 0.7, REM, 1.699706179, -0.929541390)) + def test_laplace_connect_dots_bounds(self, parameter, sensitivity, + sampling_prob, adjacency_type, + expected_epsilon_upper, + expected_epsilon_lower): + pl = privacy_loss_mechanism.LaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + connect_dots_bounds = pl.connect_dots_bounds() + self.assertAlmostEqual(expected_epsilon_upper, + connect_dots_bounds.epsilon_upper) + self.assertAlmostEqual(expected_epsilon_lower, + connect_dots_bounds.epsilon_lower) + @parameterized.parameters((-3.0, 1.0, 1.0, ADD), (0.0, 1.0, 1.0, ADD), (1.0, 0.0, 1.0, REM), (2.0, -1.0, 1.0, REM), (2.0, 1.0, 0.0, ADD), (1.0, 1.0, 1.2, REM), @@ -228,6 +258,38 @@ adjacency_type=adjacency_type) self.assertAlmostEqual(expected_delta, pl.get_delta_for_epsilon(epsilon)) + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1.0, 1.0, ADD, [-2.0, -1.0, 0.0, 1.0], + [0.86466472, 0.63212056, 0.39346934, 0.0]), + (2.0, 4.0, 1.0, ADD, [-0.5, 0.0, 0.5, 1.0, 2.0], + [0.7134952031, 0.6321205588, 0.5276334473, 0.3934693403, 0.0]), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1.0, 0.2, ADD, [-0.3, -0.2, -0.1, 0.0, 0.1, 0.2, 0.3], + [0.2591817793, 0.2008511573, 0.1405456165, 0.07869386806, + 0.01879989609, 0.0, 0.0]), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1.0, 1.0, REM, [-2.0, -1.0, 0.0, 1.0], + [0.86466472, 0.63212056, 0.39346934, 0.0]), + (2.0, 4.0, 1.0, REM, [-0.5, 0.0, 0.5, 1.0, 2.0], + [0.7134952031, 0.6321205588, 0.5276334473, 0.3934693403, 0.0]), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE) + (1.0, 1.0, 0.2, REM, [-0.3, -0.2, -0.1, 0.0, 0.1, 0.2, 0.3], + [0.2591817793, 0.1812692469, 0.1121734314, 0.07869386806, + 0.05015600993, 0.02391739939, 0.0]), + ) + def test_laplace_get_delta_for_epsilon_vectorized( + self, parameter, sensitivity, sampling_prob, adjacency_type, + epsilon_values, expected_delta_values): + pl = privacy_loss_mechanism.LaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + for expected_delta, delta in zip(expected_delta_values, + pl.get_delta_for_epsilon(epsilon_values)): + self.assertAlmostEqual(expected_delta, delta) + class GaussianPrivacyLossTest(parameterized.TestCase): @@ -438,6 +500,42 @@ self, expected_tail_probability_mass_function, tail_pld.tail_probability_mass_function) + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1.0, 1.0, ADD, 1.5, -1.5), + (3.0, 3.0, 1.0, ADD, 1.5, -1.5), + (1.0, 2.0, 1.0, ADD, 4.0, -4.0), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1.0, 0.8, ADD, 0.971528300, -1.331138685), + (3.0, 3.0, 0.8, ADD, 0.971528300, -1.331138685), + (1.0, 2.0, 0.5, ADD, 0.674997253, -3.325002747), + (4.0, 8.0, 0.6, ADD, 0.889187896, -3.501310856), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1.0, 1.0, REM, 1.5, -1.5), + (3.0, 3.0, 1.0, REM, 1.5, -1.5), + (1.0, 2.0, 1.0, REM, 4.0, -4.0), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1.0, 0.8, REM, 1.331138685, -0.971528300), + (3.0, 3.0, 0.8, REM, 1.331138685, -0.971528300), + (1.0, 2.0, 0.5, REM, 3.325002747, -0.674997253), + (4.0, 8.0, 0.6, REM, 3.501310856, -0.889187896)) + def test_gaussian_connect_dots_bounds(self, standard_deviation, sensitivity, + sampling_prob, adjacency_type, + expected_epsilon_upper, + expected_epsilon_lower): + pl = privacy_loss_mechanism.GaussianPrivacyLoss( + standard_deviation, + sensitivity=sensitivity, + pessimistic_estimate=True, + log_mass_truncation_bound=math.log(2) + stats.norm.logcdf(-1), + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + connect_dots_bounds = pl.connect_dots_bounds() + self.assertAlmostEqual(expected_epsilon_upper, + connect_dots_bounds.epsilon_upper) + self.assertAlmostEqual(expected_epsilon_lower, + connect_dots_bounds.epsilon_lower) + @parameterized.parameters((0.0, 1.0), (-10.0, 2.0), (4.0, 0.0), (2.0, -1.0), (1.0, 1.0, 1.0, ADD, 1), (2.0, 1.0, 0.0, REM), (1.0, 1.0, 1.2, ADD), (2.0, 1.0, -0.1, REM)) @@ -530,6 +628,34 @@ adjacency_type=adjacency_type) self.assertAlmostEqual(expected_delta, pl.get_delta_for_epsilon(epsilon)) + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1.0, 1.0, ADD, [-2.0, -1.0, 0.0, 1.0, 2.0], + [0.86749642, 0.67881797, 0.38292492, 0.12693674, 0.020923636]), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1.0, 0.2, ADD, [-0.3, -0.2, -0.1, 0.0, 0.1, 0.2, 0.3], + [0.27768558, 0.21052945, 0.14204776, 0.076584985, 0.02337934, + 0.00020196, 0.0]), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1.0, 1.0, REM, [-2.0, -1.0, 0.0, 1.0, 2.0], + [0.86749642, 0.67881797, 0.38292492, 0.12693674, 0.020923636]), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1.0, 0.2, REM, [-0.3, -0.2, -0.1, 0.0, 0.1, 0.2, 0.5, 1.0], + [0.25918178, 0.1814346, 0.11631708, 0.076584985, 0.051816131, + 0.035738492, 0.012494629, 0.00229682]), + ) + def test_gaussian_get_delta_for_epsilon_vectorized( + self, standard_deviation, sensitivity, sampling_prob, adjacency_type, + epsilon_values, expected_delta_values): + pl = privacy_loss_mechanism.GaussianPrivacyLoss( + standard_deviation, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + for expected_delta, delta in zip(expected_delta_values, + pl.get_delta_for_epsilon(epsilon_values)): + self.assertAlmostEqual(expected_delta, delta) + class DiscreteLaplacePrivacyLossDistributionTest(parameterized.TestCase): @@ -674,6 +800,34 @@ self, expected_tail_probability_mass_function, tail_pld.tail_probability_mass_function) + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1, 1.0, ADD, 1.0, -1.0), + (0.3, 2, 1.0, ADD, 0.6, -0.6), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1, 0.8, ADD, 0.704605471, -0.864839725), + (0.3, 2, 0.6, ADD, 0.315687960, -0.400969203), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1, 1.0, REM, 1.0, -1.0), + (0.3, 2, 1.0, REM, 0.6, -0.6), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1, 0.8, REM, 0.864839725, -0.704605471), + (0.3, 2, 0.6, REM, 0.400969203, -0.315687960)) + def test_discrete_laplace_connect_dots_bounds(self, parameter, sensitivity, + sampling_prob, adjacency_type, + expected_epsilon_upper, + expected_epsilon_lower): + pl = privacy_loss_mechanism.DiscreteLaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + connect_dots_bounds = pl.connect_dots_bounds() + self.assertAlmostEqual(expected_epsilon_upper, + connect_dots_bounds.epsilon_upper) + self.assertAlmostEqual(expected_epsilon_lower, + connect_dots_bounds.epsilon_lower) + @parameterized.parameters((-3.0, 1), (0.0, 1), (2.0, 0.5), (2.0, -1), (1.0, 0), (2.0, 1, 0.0, ADD), (1.0, 1, 1.2, REM), @@ -753,6 +907,35 @@ adjacency_type=adjacency_type) self.assertAlmostEqual(expected_delta, pl.get_delta_for_epsilon(epsilon)) + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (0.5, 4, 1.0, ADD, [-2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0], + [0.86466472, 0.77686984, 0.7222211, 0.63212056, 0.54202002, + 0.39346934, 0.0]), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (0.5, 4, 0.2, ADD, [-0.3, -0.2, -0.1, 0.0, 0.1, 0.2, 0.3], + [0.31709255, 0.25960017, 0.19633877, 0.12642411, 0.058632421, 0.0, 0.0]), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (0.5, 4, 1.0, REM, [-2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0], + [0.86466472, 0.77686984, 0.7222211, 0.63212056, 0.54202002, + 0.39346934, 0.0]), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (0.5, 4, 0.2, REM, [-0.3, -0.2, -0.1, 0.0, 0.1, 0.2, 0.3], + [0.25918178, 0.18126925, 0.14821539, 0.12642411, 0.11181698, + 0.095673604, 0.07817137]), + ) + def test_discrete_laplace_get_delta_for_epsilon_vectorized( + self, parameter, sensitivity, sampling_prob, adjacency_type, + epsilon_values, expected_delta_values): + pl = privacy_loss_mechanism.DiscreteLaplacePrivacyLoss( + parameter, + sensitivity=sensitivity, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + for expected_delta, delta in zip(expected_delta_values, + pl.get_delta_for_epsilon(epsilon_values)): + self.assertAlmostEqual(expected_delta, delta) + class DiscreteGaussianPrivacyLossTest(parameterized.TestCase): @@ -901,6 +1084,34 @@ self, expected_tail_probability_mass_function, tail_pld.tail_probability_mass_function) + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1, 2, 1.0, ADD, 1.5, -1.5), + (1.0, 2, 2, 1.0, ADD, 2.0, -2.0), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1, 2, 0.8, ADD, 1.609437912, -1.331138685), + (1.0, 2, 2, 0.7, ADD, 1.203972804, -1.699706179), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1, 2, 1.0, REM, 1.5, -1.5), + (1.0, 2, 2, 1.0, REM, 2.0, -2.0), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1, 2, 0.8, REM, 1.331138685, -1.609437912), + (1.0, 2, 2, 0.7, REM, 1.699706179, -1.203972804)) + def test_discrete_gaussian_connect_dots_bounds( + self, sigma, sensitivity, truncation_bound, sampling_prob, adjacency_type, + expected_epsilon_upper, expected_epsilon_lower): + pl = privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( + sigma, + sensitivity=sensitivity, + truncation_bound=truncation_bound, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + connect_dots_bounds = pl.connect_dots_bounds() + self.assertAlmostEqual(expected_epsilon_upper, + connect_dots_bounds.epsilon_upper) + self.assertAlmostEqual(expected_epsilon_lower, + connect_dots_bounds.epsilon_lower) + @parameterized.parameters((-3.0, 1), (0.0, 1), (2.0, 0.5), (1.0, 0), (2.0, -1), (2.0, 4, 1, ADD, 1), (2.0, 1, 0), (1.0, 1, 1.2), (2.0, 1, -0.1)) @@ -987,5 +1198,64 @@ self.assertAlmostEqual(expected_sigma, pl._sigma, 3) self.assertEqual(adjacency_type, pl.adjacency_type) + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1, 2, 1.0, ADD, 1.0, 0.150574425), + (10.0, 3, 5, 1.0, ADD, 1.0, 0.263672394), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1, 2, 0.8, ADD, 1.0, 0.024865564), + (5.0, 3, 5, 0.4, ADD, 0.1, 0.085584980), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1, 2, 1.0, REM, 1.0, 0.150574425), + (10.0, 3, 5, 1.0, REM, 1.0, 0.263672394), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1, 2, 0.8, REM, 1.0, 0.101734157), + (5.0, 3, 5, 0.4, REM, 0.1, 0.104486937)) + def test_discrete_gaussian_get_delta_for_epsilon( + self, sigma, sensitivity, truncation_bound, sampling_prob, adjacency_type, + epsilon, expected_delta): + pl = privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( + sigma, + sensitivity=sensitivity, + truncation_bound=truncation_bound, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + self.assertAlmostEqual(expected_delta, pl.get_delta_for_epsilon(epsilon)) + + @parameterized.parameters( + # Tests with sampling_prob = 1 for adjacency_type=ADD + (1.0, 1, 2, 1.0, ADD, [-2.0, -1.0, 0.0, 1.0, 2.0], + [0.872038958, 0.687513794, 0.402619947, 0.150574425, 0.054488685]), + (10.0, 3, 5, 1.0, ADD, [-2.0, -1.0, 0.0, 1.0, 2.0], + [0.900348895, 0.729120212, 0.285480872, 0.263672394, 0.263672394]), + # Tests with sampling_prob < 1 for adjacency_type=ADD + (1.0, 1, 2, 0.8, ADD, [-2.0, -1.0, 0.0, 1.0, 2.0], + [0.870564110, 0.669546464, 0.322095958, 0.024865564, 0.000000000]), + (5.0, 3, 5, 0.4, ADD, [-0.2, -0.1, 0.0, 0.1, 0.2], + [0.258926845, 0.189706273, 0.129522020, 0.085584980, 0.063350727]), + # Tests with sampling_prob = 1 for adjacency_type=REMOVE + (1.0, 1, 2, 1.0, REM, [-2.0, -1.0, 0.0, 1.0, 2.0], + [0.872038958, 0.687513794, 0.402619947, 0.150574425, 0.054488685]), + (10.0, 3, 5, 1.0, REM, [-2.0, -1.0, 0.0, 1.0, 2.0], + [0.900348895, 0.729120212, 0.285480872, 0.263672394, 0.263672394]), + # Tests with sampling_prob < 1 for adjacency_type=REMOVE + (1.0, 1, 2, 0.8, REM, [-2.0, -1.0, 0.0, 1.0, 2.0], + [0.864664717, 0.641268089, 0.322095958, 0.101734157, 0.043590948]), + (5.0, 3, 5, 0.4, REM, [-0.2, -0.1, 0.0, 0.1, 0.2], + [0.233136435, 0.172603074, 0.129522020, 0.104486937, 0.094851204])) + def test_discrete_gaussian_get_delta_for_epsilon_vectorized( + self, sigma, sensitivity, truncation_bound, sampling_prob, adjacency_type, + epsilon_values, expected_delta_values): + pl = privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( + sigma, + sensitivity=sensitivity, + truncation_bound=truncation_bound, + sampling_prob=sampling_prob, + adjacency_type=adjacency_type) + for expected_delta, delta in zip(expected_delta_values, + pl.get_delta_for_epsilon(epsilon_values)): + self.assertAlmostEqual(expected_delta, delta) + + if __name__ == '__main__': unittest.main()