| # Copyright 2021 Google LLC. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| """Utilities for adding noise to satisfy central privacy.""" |
| |
| import dataclasses |
| from typing import Type |
| |
| import numpy as np |
| from scipy import stats |
| |
| from clustering import clustering_params |
| from dp_accounting.pld import accountant |
| from dp_accounting.pld import common |
| |
| |
| @dataclasses.dataclass |
| class AveragePrivacyParam(): |
| """Privacy parameters for calling get_private_average().""" |
| gaussian_standard_deviation: float |
| sensitivity: float |
| |
| def __post_init__(self): |
| # Standard deviation can be 0 to indicate no noise. |
| if self.gaussian_standard_deviation < 0: |
| raise ValueError( |
| f'Gaussian standard deviation was {self.gaussian_standard_deviation}, ' |
| 'but it must be nonnegative.') |
| |
| if self.sensitivity <= 0: |
| raise ValueError( |
| f'Sensitivity for averaging was {self.sensitivity}, but it must be ' |
| 'positive.') |
| |
| @classmethod |
| def from_budget_split( |
| cls: Type['AveragePrivacyParam'], |
| privacy_param: clustering_params.DifferentialPrivacyParam, |
| privacy_budget_split: clustering_params.PrivacyBudgetSplit, |
| radius: float) -> 'AveragePrivacyParam': |
| """Calculates standard deviation by splitting the privacy budget.""" |
| split_epsilon = (privacy_budget_split.frac_sum * privacy_param.epsilon) |
| if split_epsilon == np.inf: |
| gaussian_standard_deviation = 0 |
| else: |
| gaussian_standard_deviation = accountant.get_smallest_gaussian_noise( |
| common.DifferentialPrivacyParameters(split_epsilon, |
| privacy_param.delta), |
| num_queries=1, |
| sensitivity=radius) |
| return cls(gaussian_standard_deviation, radius) |
| |
| |
| def get_private_average(nonprivate_points: np.ndarray, private_count: int, |
| average_privacy_param: AveragePrivacyParam, |
| dim: int) -> np.ndarray: |
| """Returns a differentially private average of the given data points. |
| |
| Args: |
| nonprivate_points: data points to be averaged, may be empty. |
| private_count: differentially private count of the number of data points. |
| This is provided to save privacy budget since, in our applications, it is |
| often already computed elsewhere. Required to be >= 1. |
| average_privacy_param: privacy parameters for the private average. |
| dim: dimension of the data points. |
| |
| Returns: |
| A differentially private average of the given data points. |
| """ |
| if private_count < 1: |
| raise ValueError( |
| f'get_private_average() called with private_count={private_count}') |
| |
| sum_points = np.sum(nonprivate_points, axis=0) |
| |
| # Add noise. |
| sum_points += np.random.normal( |
| scale=average_privacy_param.gaussian_standard_deviation, size=dim) |
| return sum_points / private_count |
| |
| |
| @dataclasses.dataclass |
| class CountPrivacyParam(): |
| """Privacy parameters for calling get_private_count().""" |
| laplace_param: float |
| |
| def __post_init__(self): |
| # No noise means laplace_param == inf, not 0. We invert the laplace param |
| # for accounting. |
| if self.laplace_param <= 0: |
| raise ValueError(f'Laplace param was {self.laplace_param}, ' |
| 'but it must be positive.') |
| |
| @classmethod |
| def from_budget_split( |
| cls: Type['CountPrivacyParam'], |
| clustering_privacy_param: clustering_params.DifferentialPrivacyParam, |
| budget_split: clustering_params.PrivacyBudgetSplit, |
| depth: int) -> 'CountPrivacyParam': |
| """Computes laplace param by splitting the budget.""" |
| # Split epsilon between each level of the tree starting with level 0. Depth |
| # is based on the number of edges in the path, so add one to the depth to |
| # get the number of levels. |
| split_epsilon = (budget_split.frac_group_count * |
| clustering_privacy_param.epsilon) / ( |
| depth + 1.0) |
| return cls(laplace_param=split_epsilon) |
| |
| |
| def get_private_count(nonprivate_count: int, |
| count_privacy_param: CountPrivacyParam) -> int: |
| """Computes differentially private count. |
| |
| Args: |
| nonprivate_count: the (unnoised) count of the number of data points in a |
| group. |
| count_privacy_param: privacy parameters for calculating the private count. |
| |
| Returns: |
| The differentially private count where a Discrete Laplace noise with |
| appropriate parameter is added to the non-private count. |
| """ |
| if count_privacy_param.laplace_param == np.inf: |
| return nonprivate_count |
| return nonprivate_count + stats.dlaplace.rvs( |
| count_privacy_param.laplace_param) |