blob: f2631adecbe05bcede711c2cd83308414b40e984 [file] [log] [blame] [edit]
//
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef DIFFERENTIAL_PRIVACY_ALGORITHMS_ALGORITHM_H_
#define DIFFERENTIAL_PRIVACY_ALGORITHMS_ALGORITHM_H_
#include <cstdint>
#include <limits>
#include <memory>
#include <optional>
#include <utility>
#include "absl/log/check.h"
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "algorithms/numerical-mechanisms.h"
#include "algorithms/util.h"
#include "proto/util.h"
#include "proto/confidence-interval.pb.h"
#include "proto/data.pb.h"
#include "proto/summary.pb.h"
#include "base/status_macros.h"
namespace differential_privacy {
constexpr double kDefaultDelta = 0.0;
constexpr double kDefaultConfidenceLevel = .95;
// Abstract superclass for differentially private algorithms.
//
// Algorithm instances are typically *not* thread safe. Entries must be added
// from a single thread only. In case you want to use multiple threads, you can
// use per-thread instances of the Algorithm child class, serialize them, and
// then merge them together in a single thread.
template <typename T>
class Algorithm {
public:
//
// Epsilon, delta are standard parameters of differentially private
// algorithms. See "The Algorithmic Foundations of Differential Privacy" p17.
explicit Algorithm(double epsilon, double delta)
: epsilon_(epsilon), delta_(delta) {
DCHECK_NE(epsilon, std::numeric_limits<double>::infinity());
DCHECK_GT(epsilon, 0.0);
}
explicit Algorithm(double epsilon) : Algorithm(epsilon, 0) {}
virtual ~Algorithm() = default;
Algorithm(const Algorithm&) = delete;
Algorithm& operator=(const Algorithm&) = delete;
// Adds one input to the algorithm.
virtual void AddEntry(const T& t) = 0;
// Adds multiple inputs to the algorithm.
template <typename Iterator>
void AddEntries(Iterator begin, Iterator end) {
for (auto it = begin; it != end; ++it) {
AddEntry(*it);
}
}
// Runs the algorithm on (only) the input specified by the iterator,
// using the epsilon parameter provided in the constructor. Returns the result
// or an error, if any occurred.
//
// Will ignore any data that has already been accumulated. Don't mix this with
// calls to AddEntry/Entries, PartialResult, Serialize or Merge.
template <typename Iterator>
absl::StatusOr<Output> Result(Iterator begin, Iterator end) {
Reset();
AddEntries(begin, end);
return PartialResult();
}
// Get the algorithm result on the accumulated data.
absl::StatusOr<Output> PartialResult() {
return PartialResult(kDefaultConfidenceLevel);
}
// In most cases, override GenerateResult instead to maintain budget checks.
// Override this PartialResult with caution.
// Same as above, but allows the user to specify the confidence level that
// may be returned as part of the Output. Not all Algorithms support
// confidence levels, for unsupported algorithms the confidence level will not
// be included. See NoiseConfidenceInterval for more details.
virtual absl::StatusOr<Output> PartialResult(double noise_interval_level) {
if (result_returned_) {
return absl::InvalidArgumentError(
"The algorithm can only produce results once for a given epsilon, "
"delta budget.");
}
result_returned_ = true;
return GenerateResult(noise_interval_level);
}
// Resets the algorithm to a state in which it has received no input. After
// Reset is called, the algorithm should only consider input added after the
// last Reset call when providing output.
void Reset() {
result_returned_ = false;
ResetState();
}
// Serializes summary data of current entries into Summary proto. This allows
// results from distributed aggregation to be recorded and later merged.
// Returns empty summary for algorithms for which serialize is unimplemented.
virtual Summary Serialize() const = 0;
// Merges serialized summary data into this algorithm. The summary proto must
// represent data from the same algorithm type with identical parameters. The
// data field must contain the algorithm summary type of the corresponding
// algorithm used. The summary proto cannot be empty.
virtual absl::Status Merge(const Summary& summary) = 0;
// Returns an estimate for the current memory consumption of the algorithm in
// bytes. Intended to be used for distribution frameworks to prevent
// out-of-memory errors.
virtual int64_t MemoryUsed() = 0;
// Returns the confidence_level confidence interval of noise added within the
// algorithm, using epsilon and other relevant, algorithm-specific parameters
// (e.g. bounds) provided by the constructor. This metric may be used to gauge
// the error rate introduced by the noise.
//
// If the returned value is <x,y>, then the noise added has a confidence_level
// chance of being in the domain [x,y].
//
// By default, NoiseConfidenceInterval() returns an error. Algorithms for
// which a confidence interval can feasibly be calculated override this and
// output the relevant value.
// Conservatively, we do not release the error rate for algorithms whose
// confidence intervals rely on input size.
virtual absl::StatusOr<ConfidenceInterval> NoiseConfidenceInterval(
double confidence_level) {
return absl::UnimplementedError(
"NoiseConfidenceInterval() unsupported for this algorithm");
}
virtual double GetEpsilon() const { return epsilon_; }
virtual double GetDelta() const { return delta_; }
protected:
// Returns the result of the algorithm when run on all the input that has been
// provided via AddEntr[y|ies] since the last call to Reset.
// Apportioning of privacy budget is handled by calls from PartialResult
// above.
virtual absl::StatusOr<Output> GenerateResult(
double noise_interval_level) = 0;
// Allows child classes to reset their state as part of a global reset.
virtual void ResetState() = 0;
private:
bool result_returned_ = false;
const double epsilon_;
const double delta_;
};
template <typename T, class Algorithm, class Builder>
class AlgorithmBuilder {
public:
virtual ~AlgorithmBuilder() = default;
absl::StatusOr<std::unique_ptr<Algorithm>> Build() {
RETURN_IF_ERROR(ValidateIsFiniteAndPositive(epsilon_, "Epsilon"));
if (delta_.has_value()) {
RETURN_IF_ERROR(
ValidateIsInInclusiveInterval(delta_.value(), 0, 1, "Delta"));
} // TODO: Default delta_ to kDefaultDelta?
if (l0_sensitivity_.has_value()) {
RETURN_IF_ERROR(
ValidateIsPositive(l0_sensitivity_.value(),
"Maximum number of partitions that can be "
"contributed to (i.e., L0 sensitivity)"));
} // TODO: Default is set in UpdateAndBuildMechanism() below.
if (max_contributions_per_partition_.has_value()) {
RETURN_IF_ERROR(
ValidateIsPositive(max_contributions_per_partition_.value(),
"Maximum number of contributions per partition"));
} // TODO: Default is set in UpdateAndBuildMechanism() below.
return BuildAlgorithm();
}
Builder& SetEpsilon(double epsilon) {
epsilon_ = epsilon;
return *static_cast<Builder*>(this);
}
Builder& SetDelta(double delta) {
delta_ = delta;
return *static_cast<Builder*>(this);
}
Builder& SetMaxPartitionsContributed(int max_partitions) {
l0_sensitivity_ = max_partitions;
return *static_cast<Builder*>(this);
}
// Note for BoundedAlgorithm, this does not specify the contribution that will
// be clamped, but the number of contributions to any partition.
Builder& SetMaxContributionsPerPartition(int max_contributions) {
max_contributions_per_partition_ = max_contributions;
return *static_cast<Builder*>(this);
}
Builder& SetLaplaceMechanism(
std::unique_ptr<NumericalMechanismBuilder> mechanism_builder) {
mechanism_builder_ = std::move(mechanism_builder);
return *static_cast<Builder*>(this);
}
private:
std::optional<double> epsilon_;
std::optional<double> delta_;
std::optional<int> l0_sensitivity_;
std::optional<int> max_contributions_per_partition_;
// The mechanism builder is used to interject custom mechanisms for testing.
std::unique_ptr<NumericalMechanismBuilder> mechanism_builder_ =
absl::make_unique<LaplaceMechanism::Builder>();
protected:
std::optional<double> GetEpsilon() const { return epsilon_; }
std::optional<double> GetDelta() const { return delta_; }
std::optional<int> GetMaxPartitionsContributed() const {
return l0_sensitivity_;
}
std::optional<int> GetMaxContributionsPerPartition() const {
return max_contributions_per_partition_;
}
std::unique_ptr<NumericalMechanismBuilder> GetMechanismBuilderClone() const {
return mechanism_builder_->Clone();
}
virtual absl::StatusOr<std::unique_ptr<Algorithm>> BuildAlgorithm() = 0;
absl::StatusOr<std::unique_ptr<NumericalMechanism>>
UpdateAndBuildMechanism() {
auto clone = mechanism_builder_->Clone();
if (epsilon_.has_value()) {
clone->SetEpsilon(epsilon_.value());
}
if (delta_.has_value()) {
clone->SetDelta(delta_.value());
}
// If not set, we are using 1 as default value for both, L0 and Linf, as
// fallback for existing clients.
// TODO: Refactor, consolidate, or remove defaults.
return clone->SetL0Sensitivity(l0_sensitivity_.value_or(1))
.SetLInfSensitivity(max_contributions_per_partition_.value_or(1))
.Build();
}
};
} // namespace differential_privacy
#endif // DIFFERENTIAL_PRIVACY_ALGORITHMS_ALGORITHM_H_