| # Copyright 2020 Google LLC. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| """Implementing Privacy Loss Distribution. |
| |
| This file implements the privacy loss distribution (PLD) and its basic |
| functionalities. The main feature of PLD is that it allows for accurate |
| computation of privacy parameters under composition. Please refer to the |
| supplementary material below for more details: |
| ../../common_docs/Privacy_Loss_Distributions.pdf |
| """ |
| |
| import collections |
| import logging |
| import math |
| from typing import Mapping, Optional, Callable, Any |
| |
| import numpy as np |
| |
| from dp_accounting import common |
| from dp_accounting import privacy_loss_mechanism |
| |
| |
| class BasicPrivacyLossDistribution(object): |
| """Class for single privacy loss distributions and computation involving them. |
| |
| The privacy loss distribution (PLD) of two discrete distributions, the upper |
| distribution mu_upper and the lower distribution mu_lower, is defined as a |
| distribution on real numbers generated by first sampling an outcome o |
| according to mu_upper and then outputting the privacy loss |
| ln(mu_upper(o) / mu_lower(o)) where mu_lower(o) and mu_upper(o) are the |
| probability masses of o in mu_lower and mu_upper respectively. This class |
| allows one to create and manipulate privacy loss distributions. |
| |
| PLD allows one to (approximately) compute the epsilon-hockey stick divergence |
| between mu_upper and mu_lower, which is defined as |
| sum_{o} [mu_upper(o) - e^{epsilon} * mu_lower(o)]_+. This quantity in turn |
| governs the parameter delta of (eps, delta)-differential privacy of the |
| corresponding protocol. (See Observation 1 in the supplementary material.) |
| |
| The above definitions extend to continuous distributions. The PLD of two |
| continuous distributions mu_upper and mu_lower is defined as a distribution on |
| real numbers generated by first sampling an outcome o according to mu_upper |
| and then outputting the privacy loss ln(f_{mu_upper}(o) / f_{mu_lower}(o)) |
| where f_{mu_lower}(o) and f_{mu_upper}(o) are the probability density |
| functions at o in mu_lower and mu_upper respectively. Moreover, for continuous |
| distributions the epsilon-hockey stick divergence is defined as |
| integral [f_{mu_upper}(o) - e^{epsilon} * f_{mu_lower}(o)]_+ do. |
| |
| Attributes: |
| rounded_probability_mass_function: the probability mass function for the |
| privacy loss distribution where each value is rounded to be an integer |
| multiple of value_discretization_interval. To avoid floating point errors |
| in the values, the keys here are the integer multipliers. For example, |
| suppose that the probability mass function assigns mass of 0.1 to the |
| value 2 * value_discretization_interval, then the dictionary will have |
| (key: value) pair (2: 0.1). |
| value_discretization_interval: the interval length for which the values of |
| the privacy loss distribution are discretized. In particular, the values |
| are always integer multiples of value_discretization_interval. Smaller |
| value results in more accurate estimates of the privacy loss, at the cost |
| of increased run-time / memory usage. |
| infinity_mass: The probability mass of mu_upper over all the outcomes that |
| can occur only in mu_upper but not in mu_lower.(These outcomes result in |
| privacy loss ln(mu_upper(o) / mu_lower(o)) of infinity.) |
| pessimistic_estimate: whether the rounding is done in such a way that the |
| resulting epsilon-hockey stick divergence computation gives an upper |
| estimate to the real value. |
| """ |
| |
| def __init__(self, |
| rounded_probability_mass_function: Mapping[int, float], |
| value_discretization_interval: float, |
| infinity_mass: float, |
| pessimistic_estimate: bool = True): |
| self.rounded_probability_mass_function = rounded_probability_mass_function |
| self.value_discretization_interval = value_discretization_interval |
| self.infinity_mass = infinity_mass |
| self.pessimistic_estimate = pessimistic_estimate |
| |
| def get_delta_for_epsilon(self, epsilon: float) -> float: |
| """Computes the epsilon-hockey stick divergence between mu_upper, mu_lower. |
| |
| When this privacy loss distribution corresponds to a mechanism, the |
| epsilon-hockey stick divergence gives the value of delta for which the |
| mechanism is (epsilon, delta)-differentially private. (See Observation 1 in |
| the supplementary material.) |
| |
| Args: |
| epsilon: the epsilon in epsilon-hockey stick divergence. |
| |
| Returns: |
| A non-negative real number which is the epsilon-hockey stick divergence |
| between the upper (mu_upper) and the lower (mu_lower) distributions |
| corresponding to this privacy loss distribution. |
| """ |
| |
| # The epsilon-hockey stick divergence of mu_upper with respect to mu_lower |
| # is equal to (the sum over all the values in the privacy loss distribution |
| # of the probability mass at value times max(0, 1 - e^{epsilon - value}) ) |
| # plus the infinity_mass. |
| shifted_privacy_losses = [] |
| probability_masses = [] |
| for i in self.rounded_probability_mass_function: |
| val = i * self.value_discretization_interval |
| if val > epsilon and self.rounded_probability_mass_function[i] > 0: |
| shifted_privacy_losses.append(epsilon - val) |
| probability_masses.append(self.rounded_probability_mass_function[i]) |
| |
| return self.infinity_mass + np.dot( |
| (1 - np.exp(shifted_privacy_losses)), probability_masses) |
| |
| def get_epsilon_for_delta(self, delta: float) -> float: |
| """Computes epsilon for which hockey stick divergence is at most delta. |
| |
| This function computes the smallest non-negative epsilon for which the |
| epsilon-hockey stick divergence between mu_upper, mu_lower is at most delta. |
| |
| When this privacy loss distribution corresponds to a mechanism and the |
| rounding is pessimistic, the returned value corresponds to an epsilon for |
| which the mechanism is (epsilon, delta)-differentially private. (See |
| Observation 1 in the supplementary material.) |
| |
| Args: |
| delta: the target epsilon-hockey stick divergence. |
| |
| Returns: |
| A non-negative real number which is the smallest epsilon such that the |
| epsilon-hockey stick divergence between the upper (mu_upper) and the |
| lower (mu_lower) distributions is at most delta. When no such finite |
| epsilon exists, return math.inf. |
| """ |
| |
| if self.infinity_mass > delta: |
| return math.inf |
| |
| mass_upper = self.infinity_mass |
| mass_lower = 0 |
| for i in sorted( |
| self.rounded_probability_mass_function.keys(), reverse=True): |
| val = i * self.value_discretization_interval |
| |
| if (mass_upper > delta and mass_lower > 0 and math.log( |
| (mass_upper - delta) / mass_lower) >= val): |
| # Epsilon is greater than or equal to val. |
| break |
| |
| mass_upper += self.rounded_probability_mass_function[i] |
| mass_lower += (math.exp(-val) * self.rounded_probability_mass_function[i]) |
| |
| if mass_upper >= delta and mass_lower == 0: |
| # This only occurs when val is very large, which results in exp(-val) |
| # being treated as zero. |
| return max(0, val) |
| |
| if mass_upper <= mass_lower + delta: |
| return 0 |
| else: |
| return math.log((mass_upper - delta) / mass_lower) |
| |
| def validate_composable( |
| self, basic_pld: 'BasicPrivacyLossDistribution'): |
| """Verifies that a given Basic PLD can be composed with this Basic PLD. |
| |
| The two privacy loss distributions must have the same discretization |
| interval and estimate type for the composition to be allowed. |
| |
| Args: |
| basic_pld: the privacy loss distribution to be composed |
| with the current privacy loss distribution. |
| |
| Raises: |
| ValueError if the value_discretization_interval or estimate_type of the |
| two PLDs are different. |
| """ |
| if (self.value_discretization_interval != |
| basic_pld.value_discretization_interval): |
| raise ValueError( |
| f'Discretization intervals are different: ' |
| f'{self.value_discretization_interval}' |
| f'{basic_pld.value_discretization_interval}') |
| |
| if (self.pessimistic_estimate != |
| basic_pld.pessimistic_estimate): |
| raise ValueError(f'Estimation types are different: ' |
| f'{self.pessimistic_estimate}' |
| f'{basic_pld.pessimistic_estimate}') |
| |
| def compose( |
| self, |
| basic_pld: 'BasicPrivacyLossDistribution', |
| tail_mass_truncation: float = 1e-15) -> 'BasicPrivacyLossDistribution': |
| """Computes a privacy loss distribution resulting from composing two PLDs. |
| |
| Args: |
| basic_pld: the privacy loss distribution to be composed |
| with the current privacy loss distribution. The two must have the same |
| value_discretization_interval. |
| tail_mass_truncation: an upper bound on the tails of the probability mass |
| of the PLD that might be truncated. |
| |
| Returns: |
| A privacy loss distribution which is the result of composing the two. |
| """ |
| self.validate_composable(basic_pld) |
| |
| # The probability mass function of the resulting distribution is simply the |
| # convolutaion of the two input probability mass functions. |
| new_rounded_probability_mass_function = common.convolve_dictionary( |
| self.rounded_probability_mass_function, |
| basic_pld.rounded_probability_mass_function, |
| tail_mass_truncation=tail_mass_truncation) |
| |
| new_infinity_mass = ( |
| self.infinity_mass + basic_pld.infinity_mass - |
| (self.infinity_mass * basic_pld.infinity_mass)) |
| |
| if self.pessimistic_estimate: |
| # In the pessimistic case, the truncated probability mass needs to be |
| # treated as if it were infinity. |
| new_infinity_mass += tail_mass_truncation |
| |
| return BasicPrivacyLossDistribution( |
| new_rounded_probability_mass_function, |
| self.value_discretization_interval, |
| new_infinity_mass, |
| pessimistic_estimate=self.pessimistic_estimate) |
| |
| def get_delta_for_epsilon_for_composed_pld( |
| self, basic_pld: 'BasicPrivacyLossDistribution', |
| epsilon: float) -> float: |
| """Computes delta for given epsilon for the result of composing this PLD and a given PLD. |
| |
| The output of this function should be the same as first composing this PLD |
| and basic_pld, and then call get_delta_for_epsilon on the |
| resulting PLD. The main advantage is that this function is faster. |
| |
| Args: |
| basic_pld: the privacy loss distribution to be composed |
| with the current privacy loss distribution. The two must have the same |
| value_discretization_interval. |
| epsilon: the epsilon in epsilon-hockey stick divergence. |
| |
| Returns: |
| A non-negative real number which is the epsilon-hockey stick divergence |
| of the privacy loss distribution which is the result of composing this PLD |
| with basic_pld. |
| """ |
| self.validate_composable(basic_pld) |
| |
| this_offset, this_probability_mass_function = common.dictionary_to_list( |
| self.rounded_probability_mass_function) |
| this_exponentiated_privacy_loss_values = np.exp( |
| self.value_discretization_interval * np.arange( |
| this_offset, this_offset + len(this_probability_mass_function))) |
| |
| other_offset, other_probability_mass_function = common.dictionary_to_list( |
| basic_pld.rounded_probability_mass_function) |
| other_exponentiated_privacy_loss_values = np.exp( |
| basic_pld.value_discretization_interval * np.arange( |
| other_offset, other_offset + len(other_probability_mass_function))) |
| |
| exp_epsilon = math.exp(epsilon) |
| |
| # Compute the hockey stick divergence using equation (2) in the |
| # supplementary material. other_cumulative_upper_mass below represents the |
| # summation in equation (3) and other_cumulative_lower_mass represents the |
| # summation in equation (4). |
| |
| other_cumulative_upper_mass = 0 |
| other_cumulative_lower_mass = 0 |
| current_index = len(other_probability_mass_function) - 1 |
| divergence = 0 |
| for this_exponentiated_privacy_loss, this_probability_mass in zip( |
| this_exponentiated_privacy_loss_values, this_probability_mass_function): |
| cutoff = exp_epsilon / this_exponentiated_privacy_loss |
| while current_index >= 0 and other_exponentiated_privacy_loss_values[ |
| current_index] > cutoff: |
| other_cumulative_upper_mass += other_probability_mass_function[ |
| current_index] |
| other_cumulative_lower_mass += ( |
| other_probability_mass_function[current_index] / |
| other_exponentiated_privacy_loss_values[current_index]) |
| current_index -= 1 |
| divergence += this_probability_mass * ( |
| other_cumulative_upper_mass - cutoff * other_cumulative_lower_mass) |
| |
| # The probability that the composed privacy loss is infinite |
| composed_infinity_mass = 1 - (1 - self.infinity_mass) * ( |
| 1 - basic_pld.infinity_mass) |
| |
| return divergence + composed_infinity_mass |
| |
| def self_compose( |
| self, |
| num_times: int, |
| tail_mass_truncation: float = 1e-15) -> 'BasicPrivacyLossDistribution': |
| """Computes PLD resulting from repeated composing the PLD with itself. |
| |
| Args: |
| num_times: the number of times to compose this PLD with itself. |
| tail_mass_truncation: an upper bound on the tails of the probability mass |
| of the PLD that might be truncated. Currently only supports for |
| pessimistic estimates. |
| |
| Returns: |
| A privacy loss distribution which is the result of the composition. |
| """ |
| if not self.pessimistic_estimate: |
| # Currently support truncation only for pessimistic estimates. |
| tail_mass_truncation = 0 |
| |
| new_rounded_probability_mass_function = common.self_convolve_dictionary( |
| self.rounded_probability_mass_function, |
| num_times, |
| tail_mass_truncation=tail_mass_truncation) |
| |
| new_infinity_mass = (1 - ((1 - self.infinity_mass)**num_times)) |
| new_infinity_mass += tail_mass_truncation |
| |
| return BasicPrivacyLossDistribution( |
| new_rounded_probability_mass_function, |
| self.value_discretization_interval, |
| new_infinity_mass, |
| pessimistic_estimate=self.pessimistic_estimate) |
| |
| |
| def _deprecation_warning(method_name: str): |
| logging.warning('PrivacyLossDistribution.%s() will be deprecated shortly. ' |
| 'Use factory method %s() instead.', method_name, method_name) |
| |
| |
| class PrivacyLossDistribution(object): |
| """Class for privacy loss distributions and computation involving them. |
| |
| The privacy loss distribution (PLD) of two discrete distributions, the upper |
| distribution mu_upper and the lower distribution mu_lower, is defined as a |
| distribution on real numbers generated by first sampling an outcome o |
| according to mu_upper and then outputting the privacy loss |
| ln(mu_upper(o) / mu_lower(o)) where mu_lower(o) and mu_upper(o) are the |
| probability masses of o in mu_lower and mu_upper respectively. This class |
| allows one to create and manipulate privacy loss distributions. |
| |
| PLD allows one to (approximately) compute the epsilon-hockey stick divergence |
| between mu_upper and mu_lower, which is defined as |
| sum_{o} [mu_upper(o) - e^{epsilon} * mu_lower(o)]_+. This quantity in turn |
| governs the parameter delta of (eps, delta)-differential privacy of the |
| corresponding protocol. (See Observation 1 in the supplementary material.) |
| |
| The above definitions extend to continuous distributions. The PLD of two |
| continuous distributions mu_upper and mu_lower is defined as a distribution on |
| real numbers generated by first sampling an outcome o according to mu_upper |
| and then outputting the privacy loss ln(f_{mu_upper}(o) / f_{mu_lower}(o)) |
| where f_{mu_lower}(o) and f_{mu_upper}(o) are the probability density |
| functions at o in mu_lower and mu_upper respectively. Moreover, for continuous |
| distributions the epsilon-hockey stick divergence is defined as |
| integral [f_{mu_upper}(o) - e^{epsilon} * f_{mu_lower}(o)]_+ do. |
| |
| A single privacy loss distribution is represented as an object of the class |
| BasicPrivacyLossDistribution. This class, on the other hand, holds the higher |
| level logic. |
| |
| Namely, this class maintains up to two BasicPrivacyLossDistribution objects. |
| One for the 'add' adjacency type, which specifies the privacy loss |
| distribution for a mechanism M with mu_upper = M(D) and mu_lower = M(D'), |
| where D' contains one more datapoint than D. |
| And one for the 'remove' adjacency type, which specifies the privacy loss |
| distribution for a mechanism M, with mu_upper = M(D) and mu_lower = M(D'), |
| where D' contains one less datapoint than D. |
| In the case where both privacy loss distributions are the same, only one copy |
| is maintained. |
| |
| While this class offers additional support with respect to the ADD/REMOVE |
| adjacency, this is not an inherent limitation; this class can also be used |
| in the case of other adjacencies such as Substitution. |
| |
| Factory methods in this module provide a convenient way to generate objects of |
| this class associated to various mechanisms. |
| |
| Attributes: |
| _basic_pld_remove: basic privacy loss distribution with respect to REMOVE |
| adjacency. |
| _basic_pld_add: basic privacy loss distribution with respect to ADD |
| adjacency. |
| _symmetric: When True, basic_pld_add is assumed to be the same as |
| basic_pld_remove. |
| _basic_pld: An alias for basic_pld_remove. Useful when symmetric is True. |
| """ |
| |
| def __init__( |
| self, |
| rounded_probability_mass_function: Mapping[int, float], |
| value_discretization_interval: float, |
| infinity_mass: float, |
| pessimistic_estimate: bool = True, |
| rounded_probability_mass_function_add: Optional[Mapping[int, |
| float]] = None, |
| infinity_mass_add: Optional[float] = None, |
| symmetric: bool = True): |
| """Initialization method for PrivacyLossDistribution. |
| |
| Args: |
| rounded_probability_mass_function: rounded probability mass function of |
| the basic privacy loss distribution, with respect to REMOVE adjacency. |
| value_discretization_interval: the interval length for which the values of |
| the privacy loss distribution are discretized. In particular, the values |
| are always integer multiples of value_discretization_interval. Smaller |
| value results in more accurate estimates of the privacy loss, at the |
| cost of increased run-time / memory usage. Smaller value results in more |
| accurate estimates of the privacy loss, at the cost of increased |
| run-time / memory usage. |
| infinity_mass: infinity_mass for basic privacy loss distribution with |
| respect to the REMOVE adjacency. |
| pessimistic_estimate: whether the rounding is done in such a way that the |
| resulting epsilon-hockey stick divergence computation gives an upper |
| estimate to the real value. |
| rounded_probability_mass_function_add: rounded probability mass function |
| of the basic privacy loss distribution,with respect to ADD adjacency. |
| infinity_mass_add: infinity_mass for basic privacy loss distribution with |
| respect to the ADD adjacency. |
| symmetric: When True, the basic privacy loss distribution with respect to |
| ADD adjacency is assumed to be the same as that for REMOVE adjacency. |
| Arguments rounded_probability_mass_function_add, infinity_mass_add are |
| ignored in this case. |
| """ |
| self._basic_pld = BasicPrivacyLossDistribution( |
| rounded_probability_mass_function, value_discretization_interval, |
| infinity_mass, pessimistic_estimate) |
| self._basic_pld_remove = self._basic_pld |
| self._symmetric = symmetric |
| if symmetric: |
| self._basic_pld_add = self._basic_pld_remove |
| if (rounded_probability_mass_function_add is not None or |
| infinity_mass_add is not None): |
| raise ValueError('Details about privacy loss distribution with respect' |
| 'to ADD adjacency cannot be specified when symmetric') |
| else: |
| if (rounded_probability_mass_function_add is None or |
| infinity_mass_add is None): |
| raise ValueError('Details about privacy loss distribution with respect' |
| 'to ADD adjacency should be specified when not ' |
| 'symmetric') |
| self._basic_pld_add = BasicPrivacyLossDistribution( |
| rounded_probability_mass_function_add, value_discretization_interval, |
| infinity_mass_add, pessimistic_estimate) |
| |
| @classmethod |
| def from_basic_privacy_loss_distribution( |
| cls, |
| basic_pld: 'BasicPrivacyLossDistribution') -> 'PrivacyLossDistribution': |
| """Constructs from a single BasicPrivacyLossDistribution object.""" |
| return cls(basic_pld.rounded_probability_mass_function, |
| basic_pld.value_discretization_interval, |
| basic_pld.infinity_mass, |
| basic_pld.pessimistic_estimate) |
| |
| @classmethod |
| def from_add_remove_basic_plds( |
| cls, basic_pld_remove: BasicPrivacyLossDistribution, |
| basic_pld_add: BasicPrivacyLossDistribution |
| ) -> 'PrivacyLossDistribution': |
| """Constructs from Basic PLDs for REMOVE and ADD adjacencies. |
| |
| It is assumed that both basic PLDs have the same pessimistic estimate and |
| value_discretization_interval. |
| |
| Args: |
| basic_pld_remove: basic privacy loss distribution with respect to REMOVE |
| adjacency. |
| basic_pld_add: basic privacy loss distribution with respect to ADD |
| adjacency. |
| |
| Returns: |
| Asymmetric PrivacyLossDistribution object with respect to |
| ADD/REMOVE adjacency. |
| """ |
| basic_pld_remove.validate_composable(basic_pld_add) |
| return cls(basic_pld_remove.rounded_probability_mass_function, |
| basic_pld_remove.value_discretization_interval, |
| basic_pld_remove.infinity_mass, |
| basic_pld_remove.pessimistic_estimate, |
| basic_pld_add.rounded_probability_mass_function, |
| basic_pld_add.infinity_mass, |
| symmetric=False) |
| |
| @classmethod |
| def identity( |
| cls, |
| value_discretization_interval: float = 1e-4) -> 'PrivacyLossDistribution': |
| """Constructs an identity privacy loss distribution. |
| |
| This class method will be deprecated shortly. Use factory method identity() |
| instead. |
| |
| Args: |
| value_discretization_interval: the dicretization interval for the privacy |
| loss distribution. The values will be rounded up/down to be integer |
| multiples of this number. Smaller value results in more accurate |
| estimates of the privacy loss, at the cost of increased run-time / |
| memory usage. |
| |
| Returns: |
| The privacy loss distribution corresponding to an algorithm with no |
| privacy leak (i.e. output is independent of input). |
| """ |
| _deprecation_warning('identity') |
| return identity(value_discretization_interval) |
| |
| @classmethod |
| def from_two_probability_mass_functions( |
| cls, |
| log_probability_mass_function_lower: Mapping[Any, float], |
| log_probability_mass_function_upper: Mapping[Any, float], |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4, |
| log_mass_truncation_bound: float = -math.inf |
| ) -> 'PrivacyLossDistribution': |
| """Constructs a privacy loss distribution from mu_lower and mu_upper. |
| |
| This class method will be deprecated shortly. Use factory method |
| from_two_probability_mass_functions() instead. |
| |
| Args: |
| log_probability_mass_function_lower: the probability mass function of |
| mu_lower represented as a dictionary where each key is an outcome o of |
| mu_lower and the corresponding value is the natural log of the |
| probability mass of mu_lower at o. |
| log_probability_mass_function_upper: the probability mass function of |
| mu_upper represented as a dictionary where each key is an outcome o of |
| mu_upper and the corresponding value is the natural log of the |
| probability mass of mu_upper at o. |
| pessimistic_estimate: whether the rounding is done in such a way that the |
| resulting epsilon-hockey stick divergence computation gives an upper |
| estimate to the real value. |
| value_discretization_interval: the dicretization interval for the privacy |
| loss distribution. The values will be rounded up/down to be integer |
| multiples of this number. Smaller value results in more accurate |
| estimates of the privacy loss, at the cost of increased run-time / |
| memory usage. |
| log_mass_truncation_bound: when the log of the probability mass of the |
| upper distribution is below this bound, it is either (i) included in |
| infinity_mass in the case of pessimistic estimate or (ii) discarded |
| completely in the case of optimistic estimate. The larger |
| log_mass_truncation_bound is, the more error it may introduce in |
| divergence calculations. |
| |
| Returns: |
| The privacy loss distribution constructed as specified. |
| """ |
| _deprecation_warning('from_two_probability_mass_functions') |
| return from_two_probability_mass_functions( |
| log_probability_mass_function_lower, |
| log_probability_mass_function_upper, |
| pessimistic_estimate, |
| value_discretization_interval, |
| log_mass_truncation_bound) |
| |
| @classmethod |
| def create_from_additive_noise( |
| cls, |
| additive_noise_privacy_loss: |
| 'privacy_loss_mechanism.AdditiveNoisePrivacyLoss', |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4) -> 'PrivacyLossDistribution': |
| """Constructs the privacy loss distribution of an additive noise mechanism. |
| |
| An additive noise mechanism for computing a scalar-valued function f is a |
| mechanism that outputs the sum of the true value of the function and a noise |
| drawn from a certain distribution mu. This function calculates the privacy |
| loss distribution for such an additive noise mechanism. |
| |
| This class method will be deprecated shortly. Use factory method |
| create_from_additive_noise() instead. |
| |
| Args: |
| additive_noise_privacy_loss: the privacy loss representation of the |
| mechanism. |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence |
| computation gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval |
| for the privacy loss distribution. The values will be rounded up/down to |
| be integer multiples of this number. Smaller value results in more |
| accurate estimates of the privacy loss, at the cost of increased |
| run-time / memory usage. |
| |
| Returns: |
| The privacy loss distribution constructed as specified. |
| """ |
| _deprecation_warning('create_from_additive_noise') |
| return create_from_additive_noise(additive_noise_privacy_loss, |
| pessimistic_estimate, |
| value_discretization_interval) |
| |
| @classmethod |
| def create_from_cdf( |
| cls, |
| cdf: Callable[[float], float], |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4, |
| tail_mass_truncation: float = 1e-15) -> 'PrivacyLossDistribution': |
| """Constructs the privacy loss distribution from its cumulative density function. |
| |
| This class method will be deprecated shortly. Use factory method |
| create_from_cdf() instead. |
| |
| Args: |
| cdf: the cumulative density function of the privacy loss distribution. |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence |
| computation gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval |
| for the privacy loss distribution. The values will be rounded up/down to |
| be integer multiples of this number. Smaller value results in more |
| accurate estimates of the privacy loss, at the cost of increased |
| run-time / memory usage. |
| tail_mass_truncation: an upper bound on the tails of the probability mass |
| of the PLD that might be truncated. |
| |
| Returns: |
| The privacy loss distribution constructed as specified. |
| """ |
| _deprecation_warning('create_from_cdf') |
| return create_from_cdf(cdf, pessimistic_estimate, |
| value_discretization_interval, |
| tail_mass_truncation) |
| |
| @classmethod |
| def from_randomized_response( |
| cls, |
| noise_parameter: float, |
| num_buckets: int, |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4) -> 'PrivacyLossDistribution': |
| """Constructs the privacy loss distribution of Randomized Response. |
| |
| The Randomized Response over k buckets with noise parameter p takes in an |
| input which is one of the k buckets. With probability 1 - p, it simply |
| outputs the input bucket. Otherwise, with probability p, it outputs a bucket |
| drawn uniformly at random from the k buckets. |
| |
| This function calculates the privacy loss distribution for the |
| aforementioned Randomized Response with a given number of buckets, and a |
| given noise parameter. |
| |
| Specifically, suppose that the original input is x and it is changed to x'. |
| Recall that the privacy loss distribution of the Randomized Response |
| mechanism is generated as follows: first pick o according to R(x), where |
| R(x) denote the output distribution of the Randomized Response mechanism |
| on input x. Then, the privacy loss is ln(Pr[R(x) = o] / Pr[R(x') = o]). |
| There are three cases here: |
| - When o = x, ln(Pr[R(x) = o] / Pr[R(x') = o]) = |
| ln(Pr[R(x) = x] / Pr[R(x') = x]). Here Pr[R(x) = x] = 1 - p + p / k |
| and Pr[R(x') = x] = p / k. |
| - When o = x', ln(Pr[R(x) = o] / Pr[R(x') = o]) = |
| ln(Pr[R(x') = x'] / Pr[R(x) = x']), which is just the negation of the |
| previous privacy loss. |
| - When o != x, x', the privacy loss is zero. |
| |
| This class method will be deprecated shortly. Use factory method |
| from_randomized_response() instead. |
| |
| Args: |
| noise_parameter: the probability that the Randomized Response outputs a |
| completely random bucket. |
| num_buckets: the total number of possible input values (which is equal to |
| the total number of possible output values). |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence |
| computation gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval |
| for the privacy loss distribution. The values will be rounded up/down to |
| be integer multiples of this number. Smaller value results in more |
| accurate estimates of the privacy loss, at the cost of increased |
| run-time / memory usage. |
| |
| Returns: |
| The privacy loss distribution constructed as specified. |
| """ |
| _deprecation_warning('from_randomized_response') |
| return from_randomized_response(noise_parameter, num_buckets, |
| pessimistic_estimate, |
| value_discretization_interval) |
| |
| @classmethod |
| def from_laplace_mechanism( |
| cls, |
| parameter: float, |
| sensitivity: float = 1, |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4, |
| sampling_prob: float = 1.0) -> 'PrivacyLossDistribution': |
| """Computes the privacy loss distribution of the Laplace mechanism. |
| |
| This class method will be deprecated shortly. Use factory method |
| from_laplace_mechanism() instead. |
| |
| Args: |
| parameter: the parameter of the Laplace distribution. |
| sensitivity: the sensitivity of function f. (i.e. the maximum absolute |
| change in f when an input to a single user changes.) |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence |
| computation gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval |
| for the privacy loss distribution. The values will be rounded up/down to |
| be integer multiples of this number. Smaller value results in more |
| accurate estimates of the privacy loss, at the cost of increased |
| run-time / memory usage. |
| sampling_prob: sub-sampling probability, a value in (0,1]. |
| |
| Returns: |
| The privacy loss distribution corresponding to the Laplace mechanism with |
| given parameters. |
| """ |
| _deprecation_warning('from_laplace_mechanism') |
| return from_laplace_mechanism(parameter, sensitivity, pessimistic_estimate, |
| value_discretization_interval, sampling_prob) |
| |
| @classmethod |
| def from_gaussian_mechanism( |
| cls, |
| standard_deviation: float, |
| sensitivity: float = 1, |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4, |
| log_mass_truncation_bound: float = -50, |
| sampling_prob: float = 1.0) -> 'PrivacyLossDistribution': |
| """Creates the privacy loss distribution of the Gaussian mechanism. |
| |
| This class method will be deprecated shortly. Use factory method |
| from_gaussian_mechanism() instead. |
| |
| Args: |
| standard_deviation: the standard_deviation of the Gaussian distribution. |
| sensitivity: the sensitivity of function f. (i.e. the maximum absolute |
| change in f when an input to a single user changes.) |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence |
| computation gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval |
| for the privacy loss distribution. The values will be rounded up/down to |
| be integer multiples of this number. Smaller value results in more |
| accurate estimates of the privacy loss, at the cost of increased |
| run-time / memory usage. |
| log_mass_truncation_bound: the ln of the probability mass that might be |
| discarded from the noise distribution. The larger this number, the more |
| error it may introduce in divergence calculations. |
| sampling_prob: sub-sampling probability, a value in (0,1]. |
| |
| Returns: |
| The privacy loss distribution corresponding to the Gaussian mechanism with |
| given parameters. |
| """ |
| _deprecation_warning('from_gaussian_mechanism') |
| return from_gaussian_mechanism(standard_deviation, sensitivity, |
| pessimistic_estimate, |
| value_discretization_interval, |
| log_mass_truncation_bound, |
| sampling_prob) |
| |
| @classmethod |
| def from_discrete_laplace_mechanism( |
| cls, |
| parameter: float, |
| sensitivity: int = 1, |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4, |
| sampling_prob: float = 1.0) -> 'PrivacyLossDistribution': |
| """Computes the privacy loss distribution of the Discrete Laplace mechanism. |
| |
| This class method will be deprecated shortly. Use factory method |
| from_discrete_laplace_mechanism() instead. |
| |
| Args: |
| parameter: the parameter of the discrete Laplace distribution. |
| sensitivity: the sensitivity of function f. (i.e. the maximum absolute |
| change in f when an input to a single user changes.) |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence |
| computation gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval |
| for the privacy loss distribution. The values will be rounded up/down to |
| be integer multiples of this number. Smaller value results in more |
| accurate estimates of the privacy loss, at the cost of increased |
| run-time / memory usage. |
| sampling_prob: sub-sampling probability, a value in (0,1]. |
| |
| Returns: |
| The privacy loss distribution corresponding to the Discrete Laplace |
| mechanism with given parameters. |
| """ |
| _deprecation_warning('from_discrete_laplace_mechanism') |
| return from_discrete_laplace_mechanism(parameter, sensitivity, |
| pessimistic_estimate, |
| value_discretization_interval, |
| sampling_prob) |
| |
| @classmethod |
| def from_discrete_gaussian_mechanism( |
| cls, |
| sigma: float, |
| sensitivity: int = 1, |
| truncation_bound: Optional[int] = None, |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4, |
| sampling_prob: float = 1.0) -> 'PrivacyLossDistribution': |
| """Creates the privacy loss distribution of the discrete Gaussian mechanism. |
| |
| This class method will be deprecated shortly. Use factory method |
| from_discrete_gaussian_mechanism() instead. |
| |
| Args: |
| sigma: the parameter of the discrete Gaussian distribution. Note that |
| unlike the (continuous) Gaussian distribution this is not equal to the |
| standard deviation of the noise. |
| sensitivity: the sensitivity of function f. (i.e. the maximum absolute |
| change in f when an input to a single user changes.) |
| truncation_bound: bound for truncating the noise, i.e. the noise will only |
| have a support in [-truncation_bound, truncation_bound]. When not |
| specified, truncation_bound will be chosen in such a way that the mass |
| of the noise outside of this range is at most 1e-30. |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence |
| computation gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval |
| for the privacy loss distribution. The values will be rounded up/down to |
| be integer multiples of this number. Smaller value results in more |
| accurate estimates of the privacy loss, at the cost of increased |
| run-time / memory usage. |
| sampling_prob: sub-sampling probability, a value in (0,1]. |
| |
| Returns: |
| The privacy loss distribution corresponding to the discrete Gaussian |
| mechanism with given parameters. |
| """ |
| _deprecation_warning('from_discrete_gaussian_mechanism') |
| return from_discrete_gaussian_mechanism(sigma, sensitivity, |
| truncation_bound, |
| pessimistic_estimate, |
| value_discretization_interval, |
| sampling_prob) |
| |
| @classmethod |
| def from_privacy_parameters( |
| cls, |
| privacy_parameters: common.DifferentialPrivacyParameters, |
| value_discretization_interval: float = 1e-4) -> 'PrivacyLossDistribution': |
| """Constructs pessimistic PLD from epsilon and delta parameters. |
| |
| When the mechanism is (epsilon, delta)-differentially private, the following |
| is a pessimistic estimate of its privacy loss distribution (see Section 3.5 |
| of the supplementary material for more explanation): |
| - infinity with probability delta. |
| - epsilon with probability (1 - delta) / (1 + exp(-eps)) |
| - -epsilon with probability (1 - delta) / (1 + exp(eps)) |
| |
| This class method will be deprecated shortly. Use factory method |
| from_privacy_parameters() instead. |
| |
| Args: |
| privacy_parameters: the privacy guarantee of the mechanism. |
| value_discretization_interval: the length of the dicretization interval |
| for the privacy loss distribution. The values will be rounded up/down to |
| be integer multiples of this number. Smaller value results in more |
| accurate estimates of the privacy loss, at the cost of increased |
| run-time / memory usage. |
| |
| Returns: |
| The privacy loss distribution constructed as specified. |
| """ |
| _deprecation_warning('from_privacy_parameters') |
| return from_privacy_parameters(privacy_parameters, |
| value_discretization_interval) |
| |
| def get_delta_for_epsilon(self, epsilon: float) -> float: |
| """Computes the epsilon-hockey stick divergence between mu_upper, mu_lower. |
| |
| When this privacy loss distribution corresponds to a mechanism, the |
| epsilon-hockey stick divergence gives the value of delta for which the |
| mechanism is (epsilon, delta)-differentially private. (See Observation 1 in |
| the supplementary material.) |
| |
| Args: |
| epsilon: the epsilon in epsilon-hockey stick divergence. |
| |
| Returns: |
| A non-negative real number which is the epsilon-hockey stick divergence |
| between the upper (mu_upper) and the lower (mu_lower) distributions |
| corresponding to this privacy loss distribution. |
| """ |
| delta_remove = self._basic_pld_remove.get_delta_for_epsilon(epsilon) |
| if self._symmetric: |
| return delta_remove |
| delta_add = self._basic_pld_add.get_delta_for_epsilon(epsilon) |
| return max(delta_remove, delta_add) |
| |
| def get_epsilon_for_delta(self, delta: float) -> float: |
| """Computes epsilon for which hockey stick divergence is at most delta. |
| |
| This function computes the smallest non-negative epsilon for which the |
| epsilon-hockey stick divergence between mu_upper, mu_lower is at most delta. |
| |
| When this privacy loss distribution corresponds to a mechanism and the |
| rounding is pessimistic, the returned value corresponds to an epsilon for |
| which the mechanism is (epsilon, delta)-differentially private. (See |
| Observation 1 in the supplementary material.) |
| |
| Args: |
| delta: the target epsilon-hockey stick divergence. |
| |
| Returns: |
| A non-negative real number which is the smallest epsilon such that the |
| epsilon-hockey stick divergence between the upper (mu_upper) and the |
| lower (mu_lower) distributions is at most delta. When no such finite |
| epsilon exists, return math.inf. |
| """ |
| epsilon_remove = self._basic_pld_remove.get_epsilon_for_delta(delta) |
| if self._symmetric: |
| return epsilon_remove |
| epsilon_add = self._basic_pld_add.get_epsilon_for_delta(delta) |
| return max(epsilon_remove, epsilon_add) |
| |
| def validate_composable(self, |
| privacy_loss_distribution: 'PrivacyLossDistribution'): |
| """Verifies that a given PLD can be composed with this PLD. |
| |
| The two privacy loss distributions must have the same discretization |
| interval and estimate type for the composition to be allowed. |
| |
| Args: |
| privacy_loss_distribution: the privacy loss distribution to be composed |
| with the current privacy loss distribution. |
| |
| Raises: |
| ValueError if the value_discretization_interval or estimate_type of the |
| two PLDs are different. |
| """ |
| self._basic_pld_remove.validate_composable( |
| privacy_loss_distribution._basic_pld_remove) # pylint:disable=protected-access |
| |
| def compose( |
| self, |
| privacy_loss_distribution: 'PrivacyLossDistribution', |
| tail_mass_truncation: float = 1e-15, |
| ) -> 'PrivacyLossDistribution': |
| """Computes a privacy loss distribution resulting from composing two PLDs. |
| |
| Args: |
| privacy_loss_distribution: the privacy loss distribution to be composed |
| with the current privacy loss distribution. The two must have the same |
| value_discretization_interval. |
| tail_mass_truncation: an upper bound on the tails of the probability mass |
| of the PLD that might be truncated. |
| |
| Returns: |
| A privacy loss distribution which is the result of composing the two. |
| """ |
| basic_pld_remove = self._basic_pld_remove.compose( |
| privacy_loss_distribution._basic_pld_remove, tail_mass_truncation) # pylint:disable=protected-access |
| if self._symmetric and privacy_loss_distribution._symmetric: # pylint:disable=protected-access |
| return PrivacyLossDistribution.from_basic_privacy_loss_distribution( |
| basic_pld_remove) |
| basic_pld_add = self._basic_pld_add.compose( |
| privacy_loss_distribution._basic_pld_add, tail_mass_truncation) # pylint:disable=protected-access |
| return PrivacyLossDistribution.from_add_remove_basic_plds( |
| basic_pld_remove, basic_pld_add) |
| |
| def get_delta_for_epsilon_for_composed_pld( |
| self, privacy_loss_distribution: 'PrivacyLossDistribution', |
| epsilon: float) -> float: |
| """Computes delta for given epsilon for the result of composing this PLD and a given PLD. |
| |
| The output of this function should be the same as first composing this PLD |
| and privacy_loss_distribution, and then call get_delta_for_epsilon on the |
| resulting PLD. The main advantage is that this function is faster. |
| |
| Args: |
| privacy_loss_distribution: the privacy loss distribution to be composed |
| with the current privacy loss distribution. The two must have the same |
| value_discretization_interval. |
| epsilon: the epsilon in epsilon-hockey stick divergence. |
| |
| Returns: |
| A non-negative real number which is the epsilon-hockey stick divergence |
| of the privacy loss distribution which is the result of composing this PLD |
| with privacy_loss_distribution. |
| """ |
| delta_remove = self._basic_pld_remove.get_delta_for_epsilon_for_composed_pld( |
| privacy_loss_distribution._basic_pld_remove, epsilon) # pylint:disable=protected-access |
| if self._symmetric and privacy_loss_distribution._symmetric: # pylint:disable=protected-access |
| return delta_remove |
| delta_add = self._basic_pld_add.get_delta_for_epsilon_for_composed_pld( |
| privacy_loss_distribution._basic_pld_add, epsilon) # pylint:disable=protected-access |
| return max(delta_remove, delta_add) |
| |
| def self_compose( |
| self, |
| num_times: int, |
| tail_mass_truncation: float = 1e-15) -> 'PrivacyLossDistribution': |
| """Computes PLD resulting from repeated composing the PLD with itself. |
| |
| Args: |
| num_times: the number of times to compose this PLD with itself. |
| tail_mass_truncation: an upper bound on the tails of the probability mass |
| of the PLD that might be truncated. Currently only supports for |
| pessimistic estimates. |
| |
| Returns: |
| A privacy loss distribution which is the result of the composition. |
| """ |
| basic_pld_remove = self._basic_pld_remove.self_compose( |
| num_times, tail_mass_truncation) |
| if self._symmetric: |
| return PrivacyLossDistribution.from_basic_privacy_loss_distribution( |
| basic_pld_remove) |
| basic_pld_add = self._basic_pld_add.self_compose(num_times, |
| tail_mass_truncation) |
| return PrivacyLossDistribution.from_add_remove_basic_plds( |
| basic_pld_remove, basic_pld_add) |
| |
| |
| def identity( |
| value_discretization_interval: float = 1e-4) -> PrivacyLossDistribution: |
| """Constructs an identity privacy loss distribution. |
| |
| Args: |
| value_discretization_interval: the dicretization interval for the privacy |
| loss distribution. The values will be rounded up/down to be integer |
| multiples of this number. Smaller value results in more accurate estimates |
| of the privacy loss, at the cost of increased run-time / memory usage. |
| |
| Returns: |
| The privacy loss distribution corresponding to an algorithm with no |
| privacy leak (i.e. output is independent of input). |
| """ |
| return PrivacyLossDistribution({0: 1}, value_discretization_interval, 0) |
| |
| |
| def from_two_probability_mass_functions( |
| log_probability_mass_function_lower: Mapping[Any, float], |
| log_probability_mass_function_upper: Mapping[Any, float], |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4, |
| log_mass_truncation_bound: float = -math.inf) -> PrivacyLossDistribution: |
| """Constructs a privacy loss distribution from mu_lower and mu_upper. |
| |
| Args: |
| log_probability_mass_function_lower: the probability mass function of |
| mu_lower represented as a dictionary where each key is an outcome o of |
| mu_lower and the corresponding value is the natural log of the |
| probability mass of mu_lower at o. |
| log_probability_mass_function_upper: the probability mass function of |
| mu_upper represented as a dictionary where each key is an outcome o of |
| mu_upper and the corresponding value is the natural log of the |
| probability mass of mu_upper at o. |
| pessimistic_estimate: whether the rounding is done in such a way that the |
| resulting epsilon-hockey stick divergence computation gives an upper |
| estimate to the real value. |
| value_discretization_interval: the dicretization interval for the privacy |
| loss distribution. The values will be rounded up/down to be integer |
| multiples of this number. Smaller value results in more accurate estimates |
| of the privacy loss, at the cost of increased run-time / memory usage. |
| log_mass_truncation_bound: when the log of the probability mass of the upper |
| distribution is below this bound, it is either (i) included in |
| infinity_mass in the case of pessimistic estimate or (ii) discarded |
| completely in the case of optimistic estimate. The larger |
| log_mass_truncation_bound is, the more error it may introduce in |
| divergence calculations. |
| |
| Returns: |
| The privacy loss distribution constructed as specified. |
| """ |
| |
| infinity_mass = 0 |
| for outcome in log_probability_mass_function_upper: |
| if log_probability_mass_function_lower.get(outcome, -math.inf) == -math.inf: |
| # When an outcome only appears in the upper distribution but not in the |
| # lower distribution, then it must be counted in infinity_mass as such |
| # an outcome contributes to the hockey stick divergence. |
| infinity_mass += math.exp(log_probability_mass_function_upper[outcome]) |
| |
| # Compute the (non-discretized) probability mass function for the privacy |
| # loss distribution. |
| probability_mass_function = {} |
| for outcome in log_probability_mass_function_lower: |
| if log_probability_mass_function_lower[outcome] == -math.inf: |
| # This outcome never occurs in mu_lower. This case was already included |
| # as infinity_mass above. |
| continue |
| elif (log_probability_mass_function_upper.get(outcome, -math.inf) > |
| log_mass_truncation_bound): |
| # When the probability mass of mu_upper at the outcome is greater than |
| # the threshold, add it to the distribution. |
| privacy_loss_value = ( |
| log_probability_mass_function_upper[outcome] - |
| log_probability_mass_function_lower[outcome]) |
| probability_mass_function[privacy_loss_value] = ( |
| probability_mass_function.get(privacy_loss_value, 0) + |
| math.exp(log_probability_mass_function_upper[outcome])) |
| else: |
| if pessimistic_estimate: |
| # When the probability mass of mu_upper at the outcome is no more than |
| # the threshold and we would like to get a pessimistic estimate, |
| # account for this in infinity_mass. |
| infinity_mass += math.exp( |
| log_probability_mass_function_upper.get(outcome, -math.inf)) |
| |
| # Discretize the probability mass so that the values are integer multiples |
| # of value_discretization_interval |
| rounded_probability_mass_function = collections.defaultdict(lambda: 0) |
| round_fn = math.ceil if pessimistic_estimate else math.floor |
| for val in probability_mass_function: |
| rounded_probability_mass_function[round_fn( |
| val / value_discretization_interval)] += probability_mass_function[val] |
| |
| return PrivacyLossDistribution(rounded_probability_mass_function, |
| value_discretization_interval, |
| infinity_mass, |
| pessimistic_estimate=pessimistic_estimate) |
| |
| |
| def create_from_additive_noise( |
| additive_noise_privacy_loss: |
| 'privacy_loss_mechanism.AdditiveNoisePrivacyLoss', |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4) -> PrivacyLossDistribution: |
| """Constructs the privacy loss distribution of an additive noise mechanism. |
| |
| An additive noise mechanism for computing a scalar-valued function f is a |
| mechanism that outputs the sum of the true value of the function and a noise |
| drawn from a certain distribution mu. This function calculates the privacy |
| loss distribution for such an additive noise mechanism. |
| |
| Args: |
| additive_noise_privacy_loss: the privacy loss representation of the |
| mechanism. |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence computation |
| gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval for |
| the privacy loss distribution. The values will be rounded up/down to be |
| integer multiples of this number. Smaller value results in more accurate |
| estimates of the privacy loss, at the cost of increased run-time / memory |
| usage. |
| |
| Returns: |
| The privacy loss distribution constructed as specified. |
| """ |
| round_fn = math.ceil if pessimistic_estimate else math.floor |
| |
| tail_pld = additive_noise_privacy_loss.privacy_loss_tail() |
| |
| rounded_probability_mass_function = collections.defaultdict(lambda: 0) |
| infinity_mass = tail_pld.tail_probability_mass_function.get(math.inf, 0) |
| for privacy_loss in tail_pld.tail_probability_mass_function: |
| if privacy_loss != math.inf: |
| rounded_probability_mass_function[round_fn( |
| privacy_loss / value_discretization_interval |
| )] += tail_pld.tail_probability_mass_function[privacy_loss] |
| |
| if additive_noise_privacy_loss.discrete_noise: |
| xs = list( |
| range( |
| math.ceil(tail_pld.lower_x_truncation) - 1, |
| math.floor(tail_pld.upper_x_truncation) + 1)) |
| |
| # Compute PMF for the x's. Note that a vectorized call to mu_upper_cdf can |
| # be much faster than many scalar calls. |
| cdf_values = additive_noise_privacy_loss.mu_upper_cdf(xs) |
| probability_mass = cdf_values[1:] - cdf_values[:-1] |
| |
| for x, prob in zip(xs[1:], probability_mass): |
| rounded_probability_mass_function[round_fn( |
| additive_noise_privacy_loss.privacy_loss(x) / |
| value_discretization_interval)] += prob |
| else: |
| lower_x = tail_pld.lower_x_truncation |
| rounded_down_value = math.floor( |
| additive_noise_privacy_loss.privacy_loss(lower_x) / |
| value_discretization_interval) |
| upper_x_privacy_loss = additive_noise_privacy_loss.privacy_loss( |
| tail_pld.upper_x_truncation) |
| |
| # Compute discretization intervals for PLD approximation. |
| xs, rounded_values = [lower_x], [] |
| x = lower_x |
| while x < tail_pld.upper_x_truncation: |
| if (value_discretization_interval * rounded_down_value <= |
| upper_x_privacy_loss): |
| x = tail_pld.upper_x_truncation |
| else: |
| x = additive_noise_privacy_loss.inverse_privacy_loss( |
| value_discretization_interval * rounded_down_value) |
| |
| xs.append(x) |
| rounded_values.append(round_fn(rounded_down_value + 0.5)) |
| rounded_down_value -= 1 |
| |
| # Compute PLD for discretization intervals. Note that a vectorized call to |
| # mu_upper_cdf is much faster than many scalar calls. |
| cdf_values = additive_noise_privacy_loss.mu_upper_cdf(xs) |
| probability_mass = cdf_values[1:] - cdf_values[:-1] |
| |
| # Each x in [lower_x, upper_x] results in privacy loss that lies in |
| # [value_discretization_interval * rounded_down_value, |
| # value_discretization_interval * (rounded_down_value + 1)] |
| for rounded_value, prob in zip(rounded_values, probability_mass): |
| rounded_probability_mass_function[rounded_value] += prob |
| |
| return PrivacyLossDistribution( |
| dict(rounded_probability_mass_function), |
| value_discretization_interval, |
| infinity_mass, |
| pessimistic_estimate=pessimistic_estimate) |
| |
| |
| def create_from_cdf( |
| cdf: Callable[[float], float], |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4, |
| tail_mass_truncation: float = 1e-15) -> PrivacyLossDistribution: |
| """Constructs the privacy loss distribution from its cumulative density function. |
| |
| Args: |
| cdf: the cumulative density function of the privacy loss distribution. |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence computation |
| gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval for |
| the privacy loss distribution. The values will be rounded up/down to be |
| integer multiples of this number. Smaller value results in more accurate |
| estimates of the privacy loss, at the cost of increased run-time / memory |
| usage. |
| tail_mass_truncation: an upper bound on the tails of the probability mass of |
| the PLD that might be truncated. |
| |
| Returns: |
| The privacy loss distribution constructed as specified. |
| """ |
| rounded_probability_mass_function = {} |
| |
| # Construct the distribution for value greater than or equal to zero. |
| rounded_value = 1 if pessimistic_estimate else 0 |
| value = 0 |
| while cdf(value) < 1 - tail_mass_truncation / 2: |
| rounded_probability_mass_function[rounded_value] = ( |
| cdf(value + value_discretization_interval) - cdf(value)) |
| rounded_value += 1 |
| value += value_discretization_interval |
| |
| # Construct the distribution for value less than zero. |
| rounded_value = 0 if pessimistic_estimate else -1 |
| value = 0 |
| while cdf(value) > tail_mass_truncation / 2: |
| rounded_probability_mass_function[rounded_value] = ( |
| cdf(value) - cdf(value - value_discretization_interval)) |
| rounded_value -= 1 |
| value -= value_discretization_interval |
| |
| return PrivacyLossDistribution( |
| rounded_probability_mass_function, |
| value_discretization_interval, |
| tail_mass_truncation if pessimistic_estimate else 0, |
| pessimistic_estimate=pessimistic_estimate) |
| |
| |
| def from_randomized_response( |
| noise_parameter: float, |
| num_buckets: int, |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4 |
| ) -> PrivacyLossDistribution: |
| """Constructs the privacy loss distribution of Randomized Response. |
| |
| The Randomized Response over k buckets with noise parameter p takes in an |
| input which is one of the k buckets. With probability 1 - p, it simply |
| outputs the input bucket. Otherwise, with probability p, it outputs a bucket |
| drawn uniformly at random from the k buckets. |
| |
| This function calculates the privacy loss distribution for the |
| aforementioned Randomized Response with a given number of buckets, and a |
| given noise parameter. |
| |
| Specifically, suppose that the original input is x and it is changed to x'. |
| Recall that the privacy loss distribution of the Randomized Response |
| mechanism is generated as follows: first pick o according to R(x), where |
| R(x) denote the output distribution of the Randomized Response mechanism |
| on input x. Then, the privacy loss is ln(Pr[R(x) = o] / Pr[R(x') = o]). |
| There are three cases here: |
| - When o = x, ln(Pr[R(x) = o] / Pr[R(x') = o]) = |
| ln(Pr[R(x) = x] / Pr[R(x') = x]). Here Pr[R(x) = x] = 1 - p + p / k |
| and Pr[R(x') = x] = p / k. |
| - When o = x', ln(Pr[R(x) = o] / Pr[R(x') = o]) = |
| ln(Pr[R(x') = x'] / Pr[R(x) = x']), which is just the negation of the |
| previous privacy loss. |
| - When o != x, x', the privacy loss is zero. |
| |
| Args: |
| noise_parameter: the probability that the Randomized Response outputs a |
| completely random bucket. |
| num_buckets: the total number of possible input values (which is equal to |
| the total number of possible output values). |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence computation |
| gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval for |
| the privacy loss distribution. The values will be rounded up/down to be |
| integer multiples of this number. Smaller value results in more accurate |
| estimates of the privacy loss, at the cost of increased run-time / memory |
| usage. |
| |
| Returns: |
| The privacy loss distribution constructed as specified. |
| """ |
| |
| if noise_parameter <= 0 or noise_parameter >= 1: |
| raise ValueError(f'Noise parameter must be strictly between 0 and 1: ' |
| f'{noise_parameter}') |
| |
| if num_buckets <= 1: |
| raise ValueError( |
| f'Number of buckets must be strictly greater than 1: {num_buckets}') |
| |
| round_fn = math.ceil if pessimistic_estimate else math.floor |
| |
| rounded_probability_mass_function = collections.defaultdict(lambda: 0) |
| |
| # Probability that the output is equal to the input, i.e., Pr[R(x) = x] |
| probability_output_equal_input = ((1 - noise_parameter) + |
| noise_parameter / num_buckets) |
| # Probability that the output is equal to a specific bucket that is not the |
| # input, i.e., Pr[R(x') = x] for x' != x. |
| probability_output_not_input = noise_parameter / num_buckets |
| |
| # Add privacy loss for the case o = x |
| rounded_value = round_fn( |
| math.log(probability_output_equal_input / probability_output_not_input) |
| / value_discretization_interval) |
| rounded_probability_mass_function[ |
| rounded_value] += probability_output_equal_input |
| |
| # Add privacy loss for the case o = x' |
| rounded_value = round_fn( |
| math.log(probability_output_not_input / probability_output_equal_input) |
| / value_discretization_interval) |
| rounded_probability_mass_function[ |
| rounded_value] += probability_output_not_input |
| |
| # Add privacy loss for the case o != x, x' |
| rounded_probability_mass_function[0] += ( |
| probability_output_not_input * (num_buckets - 2)) |
| |
| return PrivacyLossDistribution( |
| rounded_probability_mass_function, |
| value_discretization_interval, |
| 0, |
| pessimistic_estimate=pessimistic_estimate) |
| |
| |
| def _pld_for_subsampled_mechanism( |
| single_pld: Callable[[privacy_loss_mechanism.AdjacencyType], |
| PrivacyLossDistribution], |
| sampling_prob: float = 1.0) -> PrivacyLossDistribution: |
| """Computes the privacy loss distribution for subsampled mechanisms. |
| |
| It is assumed that when sub-sampling probability is 1, the privacy loss |
| distributions corresponding to ADD and REMOVE adjacencies are identical. |
| |
| Args: |
| single_pld: method for computing the privacy loss distributions with respect |
| to ADD and REMOVE adjacency types. |
| sampling_prob: sub-sampling probability, a value in (0,1]. |
| |
| Returns: |
| A symmetric privacy loss distribution when sampling_prob = 1; An |
| asymmetric privacy loss distribution corresponding to ADD and REMOVE |
| adjacency types when sampling_prob < 1. |
| """ |
| if sampling_prob == 1.0: |
| return single_pld(privacy_loss_mechanism.AdjacencyType.REMOVE) |
| |
| return PrivacyLossDistribution.from_add_remove_basic_plds( |
| single_pld(privacy_loss_mechanism.AdjacencyType.REMOVE)._basic_pld, # pylint:disable=protected-access |
| single_pld(privacy_loss_mechanism.AdjacencyType.ADD)._basic_pld) # pylint:disable=protected-access |
| |
| |
| def from_laplace_mechanism( |
| parameter: float, |
| sensitivity: float = 1, |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4, |
| sampling_prob: float = 1.0) -> PrivacyLossDistribution: |
| """Computes the privacy loss distribution of the Laplace mechanism. |
| |
| Args: |
| parameter: the parameter of the Laplace distribution. |
| sensitivity: the sensitivity of function f. (i.e. the maximum absolute |
| change in f when an input to a single user changes.) |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence computation |
| gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval for |
| the privacy loss distribution. The values will be rounded up/down to be |
| integer multiples of this number. Smaller value results in more accurate |
| estimates of the privacy loss, at the cost of increased run-time / memory |
| usage. |
| sampling_prob: sub-sampling probability, a value in (0,1]. |
| |
| Returns: |
| The privacy loss distribution corresponding to the Laplace mechanism with |
| given parameters. |
| """ |
| |
| def single_laplace_pld( |
| adjacency_type: privacy_loss_mechanism.AdjacencyType |
| ) -> PrivacyLossDistribution: |
| return create_from_additive_noise( |
| privacy_loss_mechanism.LaplacePrivacyLoss( |
| parameter, |
| sensitivity=sensitivity, |
| sampling_prob=sampling_prob, |
| adjacency_type=adjacency_type), |
| pessimistic_estimate=pessimistic_estimate, |
| value_discretization_interval=value_discretization_interval) |
| |
| return _pld_for_subsampled_mechanism(single_laplace_pld, sampling_prob) |
| |
| |
| def from_gaussian_mechanism( |
| standard_deviation: float, |
| sensitivity: float = 1, |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4, |
| log_mass_truncation_bound: float = -50, |
| sampling_prob: float = 1.0) -> PrivacyLossDistribution: |
| """Creates the privacy loss distribution of the Gaussian mechanism. |
| |
| Args: |
| standard_deviation: the standard_deviation of the Gaussian distribution. |
| sensitivity: the sensitivity of function f. (i.e. the maximum absolute |
| change in f when an input to a single user changes.) |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence computation |
| gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval for |
| the privacy loss distribution. The values will be rounded up/down to be |
| integer multiples of this number. Smaller value results in more accurate |
| estimates of the privacy loss, at the cost of increased run-time / memory |
| usage. |
| log_mass_truncation_bound: the ln of the probability mass that might be |
| discarded from the noise distribution. The larger this number, the more |
| error it may introduce in divergence calculations. |
| sampling_prob: sub-sampling probability, a value in (0,1]. |
| |
| Returns: |
| The privacy loss distribution corresponding to the Gaussian mechanism with |
| given parameters. |
| """ |
| |
| def single_gaussian_pld( |
| adjacency_type: privacy_loss_mechanism.AdjacencyType |
| ) -> PrivacyLossDistribution: |
| return create_from_additive_noise( |
| privacy_loss_mechanism.GaussianPrivacyLoss( |
| standard_deviation, |
| sensitivity=sensitivity, |
| pessimistic_estimate=pessimistic_estimate, |
| log_mass_truncation_bound=log_mass_truncation_bound, |
| sampling_prob=sampling_prob, |
| adjacency_type=adjacency_type), |
| pessimistic_estimate=pessimistic_estimate, |
| value_discretization_interval=value_discretization_interval) |
| |
| return _pld_for_subsampled_mechanism(single_gaussian_pld, sampling_prob) |
| |
| |
| def from_discrete_laplace_mechanism( |
| parameter: float, |
| sensitivity: int = 1, |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4, |
| sampling_prob: float = 1.0) -> PrivacyLossDistribution: |
| """Computes the privacy loss distribution of the Discrete Laplace mechanism. |
| |
| Args: |
| parameter: the parameter of the discrete Laplace distribution. |
| sensitivity: the sensitivity of function f. (i.e. the maximum absolute |
| change in f when an input to a single user changes.) |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence computation |
| gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval for |
| the privacy loss distribution. The values will be rounded up/down to be |
| integer multiples of this number. Smaller value results in more accurate |
| estimates of the privacy loss, at the cost of increased run-time / memory |
| usage. |
| sampling_prob: sub-sampling probability, a value in (0,1]. |
| |
| Returns: |
| The privacy loss distribution corresponding to the Discrete Laplace |
| mechanism with given parameters. |
| """ |
| |
| def single_discrete_laplace_pld( |
| adjacency_type: privacy_loss_mechanism.AdjacencyType |
| ) -> PrivacyLossDistribution: |
| return create_from_additive_noise( |
| privacy_loss_mechanism.DiscreteLaplacePrivacyLoss( |
| parameter, |
| sensitivity=sensitivity, |
| sampling_prob=sampling_prob, |
| adjacency_type=adjacency_type), |
| pessimistic_estimate=pessimistic_estimate, |
| value_discretization_interval=value_discretization_interval) |
| |
| return _pld_for_subsampled_mechanism(single_discrete_laplace_pld, |
| sampling_prob) |
| |
| |
| def from_discrete_gaussian_mechanism( |
| sigma: float, |
| sensitivity: int = 1, |
| truncation_bound: Optional[int] = None, |
| pessimistic_estimate: bool = True, |
| value_discretization_interval: float = 1e-4, |
| sampling_prob: float = 1.0) -> PrivacyLossDistribution: |
| """Creates the privacy loss distribution of the discrete Gaussian mechanism. |
| |
| Args: |
| sigma: the parameter of the discrete Gaussian distribution. Note that |
| unlike the (continuous) Gaussian distribution this is not equal to the |
| standard deviation of the noise. |
| sensitivity: the sensitivity of function f. (i.e. the maximum absolute |
| change in f when an input to a single user changes.) |
| truncation_bound: bound for truncating the noise, i.e. the noise will only |
| have a support in [-truncation_bound, truncation_bound]. When not |
| specified, truncation_bound will be chosen in such a way that the mass |
| of the noise outside of this range is at most 1e-30. |
| pessimistic_estimate: a value indicating whether the rounding is done in |
| such a way that the resulting epsilon-hockey stick divergence computation |
| gives an upper estimate to the real value. |
| value_discretization_interval: the length of the dicretization interval for |
| the privacy loss distribution. The values will be rounded up/down to be |
| integer multiples of this number. Smaller value results in more accurate |
| estimates of the privacy loss, at the cost of increased run-time / memory |
| usage. |
| sampling_prob: sub-sampling probability, a value in (0,1]. |
| |
| Returns: |
| The privacy loss distribution corresponding to the discrete Gaussian |
| mechanism with given parameters. |
| """ |
| |
| def single_discrete_gaussian_pld( |
| adjacency_type: privacy_loss_mechanism.AdjacencyType |
| ) -> PrivacyLossDistribution: |
| return create_from_additive_noise( |
| privacy_loss_mechanism.DiscreteGaussianPrivacyLoss( |
| sigma, |
| sensitivity=sensitivity, |
| truncation_bound=truncation_bound, |
| sampling_prob=sampling_prob, |
| adjacency_type=adjacency_type), |
| pessimistic_estimate=pessimistic_estimate, |
| value_discretization_interval=value_discretization_interval) |
| |
| return _pld_for_subsampled_mechanism(single_discrete_gaussian_pld, |
| sampling_prob) |
| |
| |
| def from_privacy_parameters( |
| privacy_parameters: common.DifferentialPrivacyParameters, |
| value_discretization_interval: float = 1e-4) -> PrivacyLossDistribution: |
| """Constructs pessimistic PLD from epsilon and delta parameters. |
| |
| When the mechanism is (epsilon, delta)-differentially private, the following |
| is a pessimistic estimate of its privacy loss distribution (see Section 3.5 |
| of the supplementary material for more explanation): |
| - infinity with probability delta. |
| - epsilon with probability (1 - delta) / (1 + exp(-eps)) |
| - -epsilon with probability (1 - delta) / (1 + exp(eps)) |
| |
| Args: |
| privacy_parameters: the privacy guarantee of the mechanism. |
| value_discretization_interval: the length of the dicretization interval for |
| the privacy loss distribution. The values will be rounded up/down to be |
| integer multiples of this number. Smaller value results in more accurate |
| estimates of the privacy loss, at the cost of increased run-time / memory |
| usage. |
| |
| Returns: |
| The privacy loss distribution constructed as specified. |
| """ |
| delta = privacy_parameters.delta |
| epsilon = privacy_parameters.epsilon |
| |
| rounded_probability_mass_function = collections.defaultdict(lambda: 0) |
| |
| rounded_probability_mass_function[math.ceil( |
| epsilon / |
| value_discretization_interval)] = (1 - delta) / (1 + math.exp(-epsilon)) |
| rounded_probability_mass_function[math.ceil( |
| -epsilon / |
| value_discretization_interval)] += (1 - delta) / (1 + math.exp(epsilon)) |
| |
| return PrivacyLossDistribution(rounded_probability_mass_function, |
| value_discretization_interval, |
| privacy_parameters.delta) |