blob: 924c8fda190f6b1e2f4ab480a54e5372f03e9bcf [file] [log] [blame]
//
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef DIFFERENTIAL_PRIVACY_ALGORITHMS_UTIL_H_
#define DIFFERENTIAL_PRIVACY_ALGORITHMS_UTIL_H_
#include <algorithm>
#include <cmath>
#include <limits>
#include <numeric>
#include <string>
#include <type_traits>
#include <vector>
#include "base/logging.h"
#include "absl/base/attributes.h"
#include "absl/status/status.h"
#include "base/statusor.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "base/status_macros.h"
namespace differential_privacy {
// XORs two strings together character by character. If one std::string is shorter
// than the other it will be repeated until it is the same length as the longer
// std::string before being XORed. If either std::string is empty, the other will be
// returned.
std::string XorStrings(const std::string& longer, const std::string& shorter);
// Arbitrary default value for epsilon. The algorithm interface falls back on
// this value whenever one is not provided. This value should only be used for
// testing convenience. For any production use case, please set your own epsilon
// based on privacy needs.
ABSL_DEPRECATED("Use your own epsilon based on privacy considerations.")
double DefaultEpsilon();
// Returns the smallest power of 2 greater than or equal to n. n must be > 0.
// Includes negative powers.
double GetNextPowerOfTwo(double n);
// Rounds n to the nearest multiple of base. Ties are broken towards +inf.
// If base is 0, returns n.
double RoundToNearestMultiple(double n, double base);
// Return 1.0 if n > 0, -1.0 if n < 0, and 0 if n == 0.
double sign(double n);
// Approximate the inverse of the error function.
// Implementation based on Table 5 in Giles' paper
// on approximating the inverse of the error function
// (https://people.maths.ox.ac.uk/gilesm/files/gems_erfinv.pdf).
double InverseErrorFunction(double x);
// Estimation of the inverse cdf of the normal distribution centered at mu with
// standard deviation sigma, at probability p. Based on Abramowitz and Stegun
// formula 26.2.23. The error of the estimation is bounded by 4.5 e-4. This
// function will fail if higher accuracy is required.
base::StatusOr<double> Qnorm(double p, double mu = 0.0, double sigma = 1.0);
template <typename T>
inline const T& Clamp(const T& low, const T& high, const T& value) {
// Prevents errors in ordering the arguments.
DCHECK(!(high < low));
if (high < value) return high;
if (value < low) return low;
return value;
}
// When T is an integral type, return true and assign the addition result if the
// addition will not overflow. Otherwise, assign the numeric limit to result and
// return false.
template <typename T, std::enable_if_t<std::is_integral<T>::value>* = nullptr>
inline bool SafeAdd(T lhs, T rhs, T* result) {
if (lhs > 0) {
// For negative rhs, we will never overflow.
if (rhs > 0) {
T safe_distance = std::numeric_limits<T>::max() - lhs;
if (safe_distance < rhs) {
*result = std::numeric_limits<T>::max();
return false;
}
}
} else if (lhs < 0) {
// For positive rhs, we will never overflow.
if (rhs < 0) {
T safe_distance = std::numeric_limits<T>::lowest() - lhs;
if (safe_distance > rhs) {
*result = std::numeric_limits<T>::lowest();
return false;
}
}
}
*result = lhs + rhs;
return true;
}
// When T is a floating-point type, perform a simple addition, since
// floating-point types don't have the same overflow issues as integral types.
template <typename T,
std::enable_if_t<std::is_floating_point<T>::value>* = nullptr>
inline bool SafeAdd(T lhs, T rhs, T* result) {
*result = lhs + rhs;
return true;
}
// When T is an integral type, return true and assign the subtraction result if
// the subtraction will not overflow. Otherwise, assign the numeric limit to
// result and return false.
template <typename T, std::enable_if_t<std::is_integral<T>::value>* = nullptr>
inline bool SafeSubtract(T lhs, T rhs, T* result) {
// For integral values the min numeric limit is larger in magnitude than the
// max numeric limit, so we cannot negate it. For unsigned types, the lowest
// numeric limit is 0. For signed types, it is negative.
if (rhs == std::numeric_limits<T>::lowest() && rhs != 0) {
if (lhs > 0) {
*result = std::numeric_limits<T>::lowest();
return false;
} else {
*result = lhs - rhs;
return true;
}
}
// For all other values of rhs, add the negation.
return SafeAdd(lhs, -rhs, result);
}
// When T is a floating-point type, perform a simple subtraction, since
// floating-point types don't have the same overflow issues as integral types.
template <typename T,
std::enable_if_t<std::is_floating_point<T>::value>* = nullptr>
inline bool SafeSubtract(T lhs, T rhs, T* result) {
*result = lhs - rhs;
return true;
}
// When T is an integral type, return true and assign the multiplication result
// if the multiplication will not overflow. Otherwise, assign the numeric limit
// to result and return false.
template <typename T, std::enable_if_t<std::is_integral<T>::value>* = nullptr>
inline bool SafeMultiply(T lhs, T rhs, T* result) {
if (lhs > 0) {
if (rhs > 0) {
T safe_distance = std::numeric_limits<T>::max() / lhs;
if (safe_distance < rhs) {
*result = std::numeric_limits<T>::max();
return false;
}
} else if (rhs < 0) {
T safe_distance = std::numeric_limits<T>::lowest() / lhs;
if (safe_distance > rhs) {
*result = std::numeric_limits<T>::lowest();
return false;
}
}
} else if (lhs < 0) {
if (rhs < 0) {
T safe_distance = std::numeric_limits<T>::max() / lhs;
if (safe_distance > rhs) {
*result = std::numeric_limits<T>::max();
return false;
}
} else if (rhs > 0) {
T safe_distance = std::numeric_limits<T>::lowest() / rhs;
if (safe_distance > lhs) {
*result = std::numeric_limits<T>::lowest();
return false;
}
}
}
*result = lhs * rhs;
return true;
}
// When T is a floating-point type, perform a simple multiplication, since
// floating-point types don't have the same overflow issues as integral types.
template <typename T,
std::enable_if_t<std::is_floating_point<T>::value>* = nullptr>
inline bool SafeMultiply(T lhs, T rhs, T* result) {
*result = lhs * rhs;
return true;
}
// Return true and assign the square result if squaring will not overflow.
template <typename T, std::enable_if_t<std::is_integral<T>::value>* = nullptr>
inline bool SafeSquare(T num, T* result) {
double max_root = std::pow(std::numeric_limits<T>::max(), 0.5);
if (num > 0 && num > static_cast<T>(max_root)) return false;
if (num < 0 && num < -1 * static_cast<T>(max_root)) return false;
*result = num * num;
return true;
}
// Tries to convert a double value to an integral values while avoiding
// overflows. Returns whether the cast was successful and has been written to
// `out`.
template <typename T, std::enable_if_t<std::is_integral<T>::value>* = nullptr>
inline bool SafeCastFromDouble(const double in, T& out) {
if (std::isnan(in)) {
// Integral types do not support NaN values.
return false;
} else if (in >= std::numeric_limits<T>::max()) {
out = std::numeric_limits<T>::max();
return true;
} else if (in <= std::numeric_limits<T>::lowest()) {
out = std::numeric_limits<T>::lowest();
return true;
}
out = static_cast<T>(in);
return true;
}
// Convert double to other floating points. This should be mostly a no-op since
// we are typically only using doubles.
template <typename T,
std::enable_if_t<std::is_floating_point<T>::value>* = nullptr>
inline bool SafeCastFromDouble(const double in, T& out) {
out = static_cast<T>(in);
return true;
}
template <typename T>
inline double Mean(const std::vector<T>& v) {
return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
}
template <typename T>
inline double Variance(const std::vector<T>& v) {
double mean = Mean(v);
double var = 0;
for (const T& num : v) {
var += std::pow(num - mean, 2);
}
return var / v.size();
}
template <typename T>
inline double StandardDev(const std::vector<T>& v) {
return std::pow(Variance(v), .5);
}
// Percentile should be between 0 and 1. Does linear interpolation between
// nearest indices.
template <typename T>
inline T OrderStatistic(double percentile, const std::vector<T>& v) {
std::vector<T> values = std::vector<T>(v);
std::sort(values.begin(), values.end());
const int n = values.size();
if (n == 0) return 0.0;
const double pos = n * percentile - 0.5;
if (pos <= 0.0) return values[0];
if (pos >= n - 1) return values[n - 1];
const int index = static_cast<const int>(pos);
const double fraction = pos - index;
return (1.0 - fraction) * v[index] + fraction * v[index + 1];
}
// Given two numeric vectors of equal length, returns their linear correlation
// coefficient, or NaN if a variance is zero. Return NaN for unequal length
// vectors as well.
template <typename T>
double Correlation(const std::vector<T>& x, const std::vector<T>& y) {
int n = x.size();
if (n < 2 || n != y.size()) {
return NAN;
}
// First get the means.
T sum_x = 0.0;
T sum_y = 0.0;
for (int i = 0; i < n; ++i) {
sum_x += x[i];
sum_y += y[i];
}
const double mean_x = sum_x / n;
const double mean_y = sum_y / n;
// Then the variances and covariance.
double sum_xx = 0.0;
double sum_yy = 0.0;
double sum_xy = 0.0;
for (int i = 0; i < n; ++i) {
const double delta_x = x[i] - mean_x;
const double delta_y = y[i] - mean_y;
sum_xx += delta_x * delta_x;
sum_xy += delta_x * delta_y;
sum_yy += delta_y * delta_y;
}
// Return the correlation coefficient, or NaN if variance in x or y is almost
// 0.0.
const double error = std::pow(10, -10);
if (sum_xx > error && sum_yy > error) {
return sum_xy / std::sqrt(sum_xx * sum_yy);
} else {
return NAN;
}
}
// Filter a vector v using a selection vector. The selection vector has true
// at an index i if that element is selected. Return a vector of only the
// selected elements in v, preserving order.
template <typename T>
std::vector<T> VectorFilter(const std::vector<T>& v,
const std::vector<bool>& selection) {
std::vector<T> result;
DCHECK(v.size() == selection.size());
for (int i = 0; i < std::min(v.size(), selection.size()); ++i) {
if (selection[i]) {
result.push_back(v[i]);
}
}
return result;
}
// Transform vector into a pretty std::string.
template <typename T>
std::string VectorToString(const std::vector<T>& v) {
return absl::StrCat("[", absl::StrJoin(v, ", "), "]");
}
// Returns the value of optional `opt` if it is set and finite. Will return
// an InvalidArgumentError otherwise that includes `name` in the error
// message.
base::StatusOr<double> GetValueIfSetAndFinite(absl::optional<double> opt,
absl::string_view name);
// Returns the value of optional `opt` if it is set, finite, and positive.
// Will return an InvalidArgumentError otherwise that includes `name` in the
// error message.
base::StatusOr<double> GetValueIfSetAndPositive(absl::optional<double> opt,
absl::string_view name);
} // namespace differential_privacy
#endif // DIFFERENTIAL_PRIVACY_ALGORITHMS_UTIL_H_