blob: 8570e0dac20126923caed518a0981f451a995065 [file] [log] [blame]
//
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "algorithms/distributions.h"
#include <cmath>
#include <cstdlib>
#include "absl/memory/memory.h"
#include "absl/random/random.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"
#include "algorithms/rand.h"
#include "algorithms/util.h"
#include "base/status_macros.h"
namespace differential_privacy {
namespace internal {
namespace {
static constexpr double kPi = 3.14159265358979323846;
// The square root of the maximum number n of Bernoulli trials from which a
// binomial sample is drawn. Larger values result in more fine grained noise,
// but increase the chance of sampling inaccuracies due to overflows. The
// probability of such an event will be roughly 2^-45 or less, if the square
// root is set to 2^57.
static constexpr double kBinomialBound = (double)(1LL << 57);
// Approximates the probability of a random sample m + n / 2 drawn from a
// binomial distribution of n Bernoulli trials that have a success probability
// of 1 / 2 each. The approximation is taken from Lemma 7 of the noise
// generation documentation available in
// https://github.com/google/differential-privacy/blob/main/common_docs/Secure_Noise_Generation.pdf
double ApproximateBinomialProbability(double sqrt_n, int64_t m) {
const double n = sqrt_n * sqrt_n;
if (std::abs(m) > sqrt_n * std::sqrt(std::log(n)) / 2) {
return 0;
}
return std::sqrt(2 / kPi) / sqrt_n * std::exp(-2.0 * m * m / n) *
(1 - (0.4 * std::pow(std::log(n), 1.5) / sqrt_n));
}
} // namespace
GaussianDistribution::Builder& GaussianDistribution::Builder::SetStddev(
double stddev) {
stddev_ = stddev;
return *this;
}
absl::StatusOr<std::unique_ptr<GaussianDistribution>>
GaussianDistribution::Builder::Build() {
RETURN_IF_ERROR(
ValidateIsFiniteAndNonNegative(stddev_, "Standard deviation"));
return absl::WrapUnique<GaussianDistribution>(
new GaussianDistribution(stddev_));
}
GaussianDistribution::GaussianDistribution(double stddev) : stddev_(stddev) {
DCHECK_GE(stddev, 0.0);
}
double GaussianDistribution::Sample(double scale) {
DCHECK_GT(scale, 0);
// TODO: make graceful behaviour when sigma is too big.
double sigma = scale * stddev_;
// Use at least the lowest positive floating point number as granularity when
// sigma is very small.
double granularity =
std::max(GetGranularity(scale), std::numeric_limits<double>::min());
// The square root of n is chosen in a way that ensures that the respective
// binomial distribution approximates a Gaussian distribution close enough.
// The sqrt(n) is taken instead of n, to ensure that all results of arithmetic
// operations fit in 64 bit integer range.
double sqrt_n = 2.0 * sigma / granularity;
return SampleBinomial(sqrt_n) * granularity;
}
double GaussianDistribution::Sample() { return Sample(1.0); }
double GaussianDistribution::Stddev() const { return stddev_; }
double GaussianDistribution::GetGranularity(double scale) const {
double sigma = scale * stddev_;
return GetNextPowerOfTwo(2 * sigma / kBinomialBound);
}
double GaussianDistribution::cdf(double stddev, double x) {
DCHECK_GT(stddev, 0);
return (std::erfc(-x / (stddev * std::sqrt(2)))) / 2;
}
double GaussianDistribution::Quantile(double stddev, double x) {
DCHECK_GT(stddev, 0);
return stddev * std::sqrt(2) * InverseErrorFunction(2 * x - 1);
}
GeometricDistribution::Builder& GeometricDistribution::Builder::SetLambda(
double lambda) {
lambda_ = lambda;
return *this;
}
absl::StatusOr<std::unique_ptr<GeometricDistribution>>
GeometricDistribution::Builder::Build() {
RETURN_IF_ERROR(ValidateIsFiniteAndNonNegative(lambda_, "Lambda"));
return absl::WrapUnique<GeometricDistribution>(
new GeometricDistribution(lambda_));
}
GeometricDistribution::GeometricDistribution(double lambda) : lambda_(lambda) {
DCHECK_GE(lambda, 0);
}
double GaussianDistribution::SampleGeometric() {
int geom_sample = 0;
while (absl::Bernoulli(SecureURBG::GetSingleton(), 0.5)) ++geom_sample;
return geom_sample;
}
// Returns a random sample m where {@code m + n / 2} is drawn from a binomial
// distribution of n Bernoulli trials that have a success probability of 1 / 2
// each. The sampling technique is based on Bringmann et al.'s rejection
// sampling approach proposed in "Internal DLA: Efficient Simulation of a
// Physical Growth Model", available
// https://people.mpi-inf.mpg.de/~kbringma/paper/2014ICALP.pdf. The square root
// of n must be at least 10^6. This is to ensure an accurate approximation of a
// Gaussian distribution.
double GaussianDistribution::SampleBinomial(double sqrt_n) {
long long step_size =
static_cast<long long>(std::round(std::sqrt(2.0) * sqrt_n + 1));
SecureURBG& random = SecureURBG::GetSingleton();
while (true) {
int geom_sample = SampleGeometric();
int two_sided_geom =
absl::Bernoulli(random, 0.5) ? geom_sample : (-geom_sample - 1);
int64_t uniform_sample = absl::Uniform(random, 0u, step_size);
int64_t result = step_size * two_sided_geom + uniform_sample;
double result_prob = ApproximateBinomialProbability(sqrt_n, result);
double reject_prob = UniformDouble();
if (result_prob > 0 && reject_prob > 0 &&
reject_prob <
result_prob * step_size * std::pow(2.0, geom_sample - 2)) {
return result;
}
}
}
double GeometricDistribution::GetUniformDouble() { return UniformDouble(); }
int64_t GeometricDistribution::Sample() { return Sample(1.0); }
int64_t GeometricDistribution::Sample(double scale) {
if (lambda_ == std::numeric_limits<double>::infinity()) {
return 0;
}
double lambda = lambda_ / scale;
if (GetUniformDouble() >
-1.0 * std::expm1(-1.0 * lambda * std::numeric_limits<int64_t>::max())) {
return std::numeric_limits<int64_t>::max();
}
// Performs a binary search for the sample over the range of possible output
// values. At each step we split the remaining range in two and pick the left
// or right side proportional to the probability that the output falls within
// that range, ending when we have only a single possible sample remaining.
int64_t lo = 0;
int64_t hi = std::numeric_limits<int64_t>::max();
while (hi - lo > 1) {
int64_t mid =
lo - static_cast<int64_t>(std::floor(
(std::log(0.5) + std::log1p(std::exp(lambda * (lo - hi)))) /
lambda));
mid = std::min(std::max(mid, lo + 1), hi - 1);
double q = std::expm1(lambda * (lo - mid)) / std::expm1(lambda * (lo - hi));
if (GetUniformDouble() <= q) {
hi = mid;
} else {
lo = mid;
}
}
return hi - 1;
}
double GeometricDistribution::Lambda() { return lambda_; }
// This is 2^K, with K hardcoded as 40. To generate laplace noise, we sample an
// integer from a geometric distribution, randomly flip the sign, then multiply
// it by a small power of two. That small power of 2 is the smallest power of 2
// larger than ((sensitivity / epsilon) / 2^K). As a consequence, large values
// of K will result in more fine grained noise, but increase the chance of an
// overflow during noise sampling. The probability of such an event will be well
// below 2^-1000, if the granularity parameter is set to a value of 2^40 or less
// and the epsilon passed to addNoise is at least 2^-50.
constexpr double kLaplaceGranularityParam = static_cast<double>(int64_t{1} << 40);
absl::Status LaplaceDistribution::ValidateEpsilon(double epsilon) {
RETURN_IF_ERROR(ValidateIsFiniteAndPositive(epsilon, "Epsilon"));
if (epsilon < kMinEpsilon) {
return absl::InvalidArgumentError(
absl::StrCat("Epsilon must be at least 2^-50, but is ", epsilon, "."));
}
return absl::OkStatus();
}
// Calculates 'r' from the secure noise paper (see
// ../../common_docs/Secure_Noise_Generation.pdf)
double CalculateGranularityForLaplace(double diversity) {
return GetNextPowerOfTwo(diversity / kLaplaceGranularityParam);
}
absl::StatusOr<double> LaplaceDistribution::CalculateGranularity(
double epsilon, double sensitivity) {
RETURN_IF_ERROR(ValidateEpsilon(epsilon));
RETURN_IF_ERROR(ValidateIsFiniteAndPositive(sensitivity, "Sensitivity"));
return CalculateGranularityForLaplace(sensitivity / epsilon);
}
LaplaceDistribution::Builder& LaplaceDistribution::Builder::SetEpsilon(
double epsilon) {
epsilon_ = epsilon;
return *this;
}
LaplaceDistribution::Builder& LaplaceDistribution::Builder::SetSensitivity(
double sensitivity) {
sensitivity_ = sensitivity;
return *this;
}
absl::StatusOr<std::unique_ptr<LaplaceDistribution>>
LaplaceDistribution::Builder::Build() {
RETURN_IF_ERROR(ValidateEpsilon(epsilon_));
RETURN_IF_ERROR(ValidateIsFiniteAndPositive(sensitivity_, "Sensitivity"));
const double diversity = sensitivity_ / epsilon_;
const double granularity = CalculateGranularityForLaplace(diversity);
ASSIGN_OR_RETURN(
std::unique_ptr<GeometricDistribution> geometric_distro,
GeometricDistribution::Builder()
.SetLambda(granularity * epsilon_ / (sensitivity_ + granularity))
.Build());
return absl::WrapUnique<LaplaceDistribution>(new LaplaceDistribution(
epsilon_, sensitivity_, granularity, std::move(geometric_distro)));
}
LaplaceDistribution::LaplaceDistribution(
double epsilon, double sensitivity, double granularity,
std::unique_ptr<GeometricDistribution> geometric_distro)
: epsilon_(epsilon),
sensitivity_(sensitivity),
granularity_(granularity),
geometric_distro_(std::move(geometric_distro)) {}
LaplaceDistribution::LaplaceDistribution(double epsilon, double sensitivity)
: epsilon_(epsilon), sensitivity_(sensitivity) {
absl::StatusOr<double> granularity =
CalculateGranularity(epsilon_, sensitivity_);
CHECK(granularity.ok()) << granularity.status();
granularity_ = granularity.value();
double lambda;
if (sensitivity_ == 0) {
// Builder validation should prevent this case from ever happening, but if
// not, prevent a possible divide-by-zero.
lambda = std::numeric_limits<double>::infinity();
} else {
lambda = granularity_ * epsilon_ / (sensitivity_ + granularity_);
}
GeometricDistribution::Builder builder;
geometric_distro_ = builder.SetLambda(lambda).Build().value();
}
double LaplaceDistribution::GetUniformDouble() { return UniformDouble(); }
bool LaplaceDistribution::GetBoolean() {
return absl::Bernoulli(SecureURBG::GetSingleton(), 0.5);
}
double LaplaceDistribution::Sample() {
int64_t sample;
bool sign;
do {
sample = geometric_distro_->Sample();
sign = GetBoolean();
// Keep a sample of 0 only if the sign is positive. Otherwise, the
// probability of 0 would be twice as high as it should be.
} while (sample == 0 && !sign);
const int64_t signed_sample = sign ? sample : -sample;
return signed_sample * granularity_;
}
double LaplaceDistribution::GetGranularity() { return granularity_; }
double LaplaceDistribution::GetVariance() const {
return 2.0 * std::pow(GetDiversity(), 2);
}
double LaplaceDistribution::GetDiversity() const {
return sensitivity_ / epsilon_;
}
double LaplaceDistribution::GetMinEpsilon() { return kMinEpsilon; }
double LaplaceDistribution::cdf(double b, double x) {
if (x > 0) {
return 1 - .5 * std::exp(-x / b);
}
return .5 * std::exp(x / b);
}
double LaplaceDistribution::Quantile(double b, double p) {
if (p > 0.5) {
return -b * std::log(2 - 2 * p);
}
return b * std::log(2 * p);
}
int64_t LaplaceDistribution::MemoryUsed() {
int64_t memory = sizeof(LaplaceDistribution);
if (geometric_distro_ != nullptr) {
memory += sizeof(*geometric_distro_);
}
return memory;
}
} // namespace internal
} // namespace differential_privacy