blob: 90985c327a83ac1aaa3ae5a997ff03e2413d2cee [file] [log] [blame]
//
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package com.google.privacy.differentialprivacy;
import static java.lang.Math.abs;
import static java.lang.Math.max;
import static java.lang.Math.min;
import com.google.auto.value.AutoValue;
import com.google.common.base.Preconditions;
import com.google.differentialprivacy.Data.ValueType;
import com.google.differentialprivacy.SummaryOuterClass.BoundedSumSummary;
import com.google.protobuf.InvalidProtocolBufferException;
import java.util.Collection;
import javax.annotation.Nullable;
/**
* Calculates a differentially private sum for a collection of values using the Laplace or Gaussian
* mechanism.
*
* <p>This class allows a single privacy unit (e.g., an individual) to contribute data to multiple
* different partitions. The class does not check whether the number of partitions is within the
* specified bounds. This is the responsibility of the caller.
*
* <p>This class assumes that each privacy unit may contribute to a single partition only once
* (i.e., only one data contribution per privacy unit per partition). Multiple contributions from a
* single privacy unit should be pre-aggregated before they are passed to this class.
*
* <p>The user can provide a {@link Noise} instance which will be used to generate the noise. If no
* instance is specified, {@link LaplaceNoise} is applied.
*
* <p>This class provides an unbiased estimator for the raw bounded sum meaning that the expected
* value of the differentially private bounded sum is equal to the raw bounded sum.
*
* <p>Note: this class is not thread-safe.
*
* <p>For more implementation details, see {@link #computeResult()}.
*
* <p>For general details and key definitions, see <a href=
* "https://github.com/google/differential-privacy/blob/main/differential_privacy.md#key-definitions">
* this</a> introduction to Differential Privacy.
*/
public class BoundedSum {
private final Params params;
private double sum;
private double noisedSum;
private AggregationState state = AggregationState.DEFAULT;
private BoundedSum(Params params) {
sum = 0.0;
this.params = params;
}
public static Params.Builder builder() {
return Params.Builder.newBuilder();
}
/** Clamps the input value and adds it to the sum. */
public void addEntry(double e) {
if (state != AggregationState.DEFAULT) {
throw new IllegalStateException("Entry cannot be added. Reason: " + state.getErrorMessage());
}
// NaN is ignored because introducing even a single NaN entry will result in a NaN sum
// regardless of other entries, which would break the indistinguishability property required
// for differential privacy.
if (Double.isNaN(e)) {
return;
}
sum += clamp(e);
}
/** Clamps the input values and adds them to the sum. */
public void addEntries(Collection<Double> e) {
e.forEach(this::addEntry);
}
private double clamp(double value) {
return max(min(value, params.upper()), params.lower());
}
/**
* Computes and returns a differentially private sum of the elements added via {@link #addEntry}
* and {@link #addEntries}. The method can be called only once for a given collection of elements.
* All subsequent calls will throw an exception.
*
* <p>The returned value is an unbiased estimate of the raw bounded sum.
*
* <p>The returned value may sometimes be outside the set of possible raw bounded sums, e.g., the
* differentially private bounded sum may be positive although neither the lower nor the upper
* bound are positive. This can be corrected by the caller of this method, e.g., by snapping the
* result to the closest value representing a bounded sum that is possible. Note that such post
* processing introduces bias to the result.
*/
public double computeResult() {
if (state != AggregationState.DEFAULT) {
throw new IllegalStateException(
"DP sum cannot be computed. Reason: " + state.getErrorMessage());
}
state = AggregationState.RESULT_RETURNED;
noisedSum = params.noise().addNoise(
sum,
getL0Sensitivity(),
getLInfSensitivity(),
params.epsilon(),
params.delta()
);
return noisedSum;
}
/**
* Computes a confidence interval that contains the true {@link
* BoundedSum} with a probability greater or equal to 1 - alpha using the noised {@link
* BoundedSum} computed by {@code computeResult()}.
*
* <p>Refer to <a
* href="https://github.com/google/differential-privacy/tree/main/common_docs/confidence_intervals.md">this</a> doc for
* more information.
*/
public ConfidenceInterval computeConfidenceInterval(double alpha) {
if (state != AggregationState.RESULT_RETURNED) {
throw new IllegalStateException(
"computeResult must be called before calling computeConfidenceInterval.");
}
ConfidenceInterval confInt =
params
.noise()
.computeConfidenceInterval(
noisedSum,
getL0Sensitivity(),
getLInfSensitivity(),
params.epsilon(),
params.delta(),
alpha);
if (params.lower() >= 0.0) {
confInt =
ConfidenceInterval.create(max(0.0, confInt.lowerBound()), max(0.0, confInt.upperBound()));
} else if (params.upper() <= 0.0) {
confInt =
ConfidenceInterval.create(min(0.0, confInt.lowerBound()), min(0.0, confInt.upperBound()));
}
return confInt;
}
/**
* Returns a serializable version of the current state of {@link BoundedSum} and the parameters
* used to calculate it. After calling this method, this instance of BoundedSum will be unusable,
* since the result can only be output once.
*/
public byte[] getSerializableSummary() {
if (state != AggregationState.DEFAULT) {
throw new IllegalStateException(
"Sum object cannot be serialized. Reason: " + state.getErrorMessage());
}
ValueType sumValue = ValueType.newBuilder().setFloatValue(sum).build();
BoundedSumSummary.Builder builder =
BoundedSumSummary.newBuilder()
.setPartialSum(sumValue)
.setEpsilon(params.epsilon())
.setLower(params.lower())
.setUpper(params.upper())
.setMaxPartitionsContributed(params.maxPartitionsContributed())
.setMaxContributionsPerPartition(params.maxContributionsPerPartition())
.setMechanismType(params.noise().getMechanismType());
if (params.delta() != null) {
builder.setDelta(params.delta());
}
// Record that this object is no longer suitable for producing a differentially private sum,
// since serialization exposes the object's raw state.
state = AggregationState.SERIALIZED;
return builder.build().toByteArray();
}
/**
* Merges this instance with the output of {@link #getSerializableSummary()} from a different
* {@link BoundedSum} and stores the merged result in this instance. This is required in the
* distributed calculations context for merging partial results.
*
* @throws IllegalArgumentException if not all config parameters (e.g., epsilon, contribution
* bounds) are equal or if the passed serialized sum is invalid.
* @throws IllegalStateException if this sum has already been calculated or serialized.
*/
public void mergeWith(byte[] otherBoundedSumSummary) {
if (state != AggregationState.DEFAULT) {
throw new IllegalStateException(
"Sum object cannot be merged. Reason: " + state.getErrorMessage());
}
BoundedSumSummary otherSummaryParsed;
try {
otherSummaryParsed = BoundedSumSummary.parseFrom(otherBoundedSumSummary);
} catch (InvalidProtocolBufferException pbe) {
throw new IllegalArgumentException(pbe);
}
checkMergeParametersAreEqual(otherSummaryParsed);
this.sum += otherSummaryParsed.getPartialSum().getFloatValue();
}
private void checkMergeParametersAreEqual(BoundedSumSummary otherSum) {
DpPreconditions.checkMergeMechanismTypesAreEqual(
params.noise().getMechanismType(), otherSum.getMechanismType());
DpPreconditions.checkMergeEpsilonAreEqual(params.epsilon(), otherSum.getEpsilon());
DpPreconditions.checkMergeDeltaAreEqual(params.delta(), otherSum.getDelta());
DpPreconditions.checkMergeMaxPartitionsContributedAreEqual(
params.maxPartitionsContributed(), otherSum.getMaxPartitionsContributed());
DpPreconditions.checkMergeMaxContributionsPerPartitionAreEqual(
params.maxContributionsPerPartition(), otherSum.getMaxContributionsPerPartition());
DpPreconditions.checkMergeBoundsAreEqual(
params.lower(), otherSum.getLower(), params.upper(), otherSum.getUpper());
}
private int getL0Sensitivity() {
// maxPartitionsContributed is the user-facing parameter, which is technically the same as
// L_0 sensitivity used by the noise internally.
return params.maxPartitionsContributed();
}
private double getLInfSensitivity() {
return getLInfSensitivity(
params.lower(), params.upper(), params.maxContributionsPerPartition());
}
private static double getLInfSensitivity(
double lower, double upper, int maxContributionsPerPartition) {
return max(abs(lower), abs(upper)) * maxContributionsPerPartition;
}
@AutoValue
public abstract static class Params {
abstract Noise noise();
abstract double epsilon();
@Nullable
abstract Double delta();
abstract int maxPartitionsContributed();
abstract int maxContributionsPerPartition();
abstract double lower();
abstract double upper();
@AutoValue.Builder
public abstract static class Builder {
private static void checkLInfSensitivityOverflow(
double lower, double upper, int maxContributionsPerPartition) {
double lInfSensitivity = getLInfSensitivity(lower, upper, maxContributionsPerPartition);
Preconditions.checkArgument(
Double.compare(lInfSensitivity, Double.MAX_VALUE) <= 0,
"bounds and maxContributionsPerPartition are too high - the LInfSensitivity "
+ " overflows. Provided values: lower bound = %s, upper bound = %s,"
+ " maxContributionsPerPartition = %s",
lower,
upper,
maxContributionsPerPartition);
}
private static void checkL1SensitivityOverflow(
double lower,
double upper,
int maxContributionsPerPartition,
int maxPartitionsContributed) {
double lInfSensitivity = getLInfSensitivity(lower, upper, maxContributionsPerPartition);
double l1Sensitivity = Noise.getL1Sensitivity(maxPartitionsContributed, lInfSensitivity);
Preconditions.checkArgument(
Double.compare(l1Sensitivity, Double.MAX_VALUE) <= 0,
"bounds and maxContributionsPerPartition are too high - the L1Sensitivity "
+ " overflows. Provided values: lower bound = %s, upper bound = %s,"
+ " maxContributionsPerPartition = %s",
lower,
upper,
maxContributionsPerPartition);
}
private static void checkL2SensitivityOverflow(
double lower,
double upper,
int maxContributionsPerPartition,
int maxPartitionsContributed) {
double lInfSensitivity = getLInfSensitivity(lower, upper, maxContributionsPerPartition);
double l2Sensitivity = Noise.getL2Sensitivity(maxPartitionsContributed, lInfSensitivity);
Preconditions.checkArgument(
Double.compare(l2Sensitivity, Double.MAX_VALUE) <= 0,
"bounds and maxContributionsPerPartition are too high - the L2Sensitivity "
+ " overflows. Provided values: lower bound = %s, upper bound = %s,"
+ " maxContributionsPerPartition = %s",
lower,
upper,
maxContributionsPerPartition);
}
private static Builder newBuilder() {
Params.Builder builder = new AutoValue_BoundedSum_Params.Builder();
// Provide LaplaceNoise as a default noise generator.
builder.noise(new LaplaceNoise());
// By default, assume that each user contributes to a given partition no more than once.
builder.maxContributionsPerPartition(1);
return builder;
}
/** Epsilon DP parameter. */
public abstract Builder epsilon(double value);
/**
* Delta DP parameter.
*
* <p>Note that Laplace noise does not use delta. Hence, delta should not be set when Laplace
* noise is used.
*/
public abstract Builder delta(@Nullable Double value);
/**
* Maximum number of partitions to which a single privacy unit (i.e., an individual) is
* allowed to contribute.
*/
public abstract Builder maxPartitionsContributed(int value);
/** Distribution from which the noise will be generated and added to the sum. */
public abstract Builder noise(Noise value);
/**
* Lower bound for the entries added to the sum. Any entires smaller than this value will be
* set to this value.
*/
public abstract Builder lower(double value);
/**
* Upper bound for the entries added to the sum. Any entires greater than this value will be
* set to this value.
*/
public abstract Builder upper(double value);
/**
* Maximum number of contributions associated with a single privacy unit (e.g., an individual)
* to a single partition. This is used to calculate the sensitivity of the sum operation. This
* is not public because it should only be used by other aggregation functions inside the
* library. See {@link BoundedSum} for more details.
*/
abstract Builder maxContributionsPerPartition(int value);
abstract Params autoBuild();
public BoundedSum build() {
Params params = autoBuild();
// No need to check if noise is null: Laplace noise is used by default.
DpPreconditions.checkEpsilon(params.epsilon());
DpPreconditions.checkNoiseDelta(params.delta(), params.noise());
DpPreconditions.checkMaxPartitionsContributed(params.maxPartitionsContributed());
DpPreconditions.checkMaxContributionsPerPartition(params.maxContributionsPerPartition());
DpPreconditions.checkBounds(params.lower(), params.upper());
switch (params.noise().getMechanismType()) {
case LAPLACE:
checkL1SensitivityOverflow(
params.lower(),
params.upper(),
params.maxContributionsPerPartition(),
params.maxPartitionsContributed());
break;
case GAUSSIAN:
checkL2SensitivityOverflow(
params.lower(),
params.upper(),
params.maxContributionsPerPartition(),
params.maxPartitionsContributed());
break;
default:
break;
}
checkLInfSensitivityOverflow(
params.lower(), params.upper(), params.maxContributionsPerPartition());
return new BoundedSum(params);
}
}
}
}