blob: 02c07c36e2a16ce20a24fc03c63cd8e0c5f04547 [file] [log] [blame] [edit]
# Copyright 2021 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for adding noise to satisfy central privacy."""
import dataclasses
import numpy as np
from scipy import stats
@dataclasses.dataclass
class AveragePrivacyParam():
"""Privacy parameters for calling get_private_average()."""
gaussian_standard_deviation: float
sensitivity: float
def __post_init__(self):
# Standard deviation can be 0 to indicate no noise.
if self.gaussian_standard_deviation < 0:
raise ValueError(
f'Gaussian standard deviation was {self.gaussian_standard_deviation},'
' but it must be nonnegative.'
)
if self.sensitivity <= 0:
raise ValueError(
f'Sensitivity for averaging was {self.sensitivity}, but it must be '
'positive.')
def get_private_average(nonprivate_points: np.ndarray, private_count: int,
average_privacy_param: AveragePrivacyParam,
dim: int) -> np.ndarray:
"""Returns a differentially private average of the given data points.
Args:
nonprivate_points: data points to be averaged, may be empty.
private_count: differentially private count of the number of data points.
This is provided to save privacy budget since, in our applications, it is
often already computed elsewhere. Required to be >= 1.
average_privacy_param: privacy parameters for the private average.
dim: dimension of the data points.
Returns:
A differentially private average of the given data points.
"""
if private_count < 1:
raise ValueError(
f'get_private_average() called with private_count={private_count}')
sum_points = np.sum(nonprivate_points, axis=0)
# Add noise.
sum_points += np.random.normal(
scale=average_privacy_param.gaussian_standard_deviation, size=dim)
return sum_points / private_count
@dataclasses.dataclass
class CountPrivacyParam():
"""Privacy parameters for calling get_private_count()."""
dlaplace_param: float
def __post_init__(self):
# Note: No noise means dlaplace_param == inf, not 0.
if self.dlaplace_param <= 0:
raise ValueError(f'Discrete Laplace param was {self.dlaplace_param}, '
'but it must be positive.')
def get_private_count(nonprivate_count: int,
count_privacy_param: CountPrivacyParam) -> int:
"""Computes differentially private count.
Args:
nonprivate_count: the (unnoised) count of the number of data points in a
group.
count_privacy_param: privacy parameters for calculating the private count.
Returns:
The differentially private count where a Discrete Laplace noise with
appropriate parameter is added to the non-private count.
"""
if count_privacy_param.dlaplace_param == np.inf:
return nonprivate_count
return nonprivate_count + stats.dlaplace.rvs(
count_privacy_param.dlaplace_param)