java/main/com/google/privacy/differentialprivacy/ApproximateBounds.java - third_party/github.com/google/differential-privacy - Git at Google

 //
 // Copyright 2020 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //

 package com.google.privacy.differentialprivacy;

 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkState;
 import static java.lang.Math.abs;
 import static java.lang.Math.ceil;
 import static java.lang.Math.expm1;
 import static java.lang.Math.log;
 import static java.lang.Math.log1p;
 import static java.lang.Math.max;
 import static java.lang.Math.min;

 import com.google.auto.value.AutoValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.primitives.Longs;
 import com.google.privacy.differentialprivacy.proto.SummaryOuterClass.ApproxBoundsSummary;
 import com.google.protobuf.ExtensionRegistry;
 import com.google.protobuf.InvalidProtocolBufferException;
 import java.security.SecureRandom;
 import java.util.Collection;
 import java.util.Optional;
 import java.util.Random;

 /**
  * Finds the approximate bounds of a set of numbers.
  *
  * <p>The algorithm uses two logarithmic histograms, one for positive values and one for negative
  * values. Histogram bin counts are thresholded to remove bins with too few counts. The threshold is
  * derived from the "failure probability", which is an upper bound on the probability of returning
  * bounds that are too loose.
  *
  * <p>We choose the rightmost bin that exceeds the threshold count and return its greater boundary
  * as the approximate upper bound. Similarly the leftmost bin that exceeds the threshold count is
  * chosen and its smaller boundary is returned as the approximate lower bound. If the failure
  * probability is too low, then it is possible that no bin exceeds the threshold, in which case we
  * increase the failure probability and try again. When the failure probability exceeds a fixed
  * value, we throw an exception rather than risk returning bounds that are primarily determined by
  * the noise. If this happens, try providing a larger sample to ApproximateBounds.
  *
  * <p>For general details and key definitions, see <a href=
  * "https://github.com/google/differential-privacy/blob/main/differential_privacy.md#key-definitions">
  * the introduction to Differential Privacy</a>.
  */
 public class ApproximateBounds {
   @VisibleForTesting final Params params;

   /**
    * The failure probability is an upper bound on the probability of returning bounds that are too
    * loose.
    *
    * <p>More precisely, it is the probability that we (incorrectly) return bounds for an empty
    * dataset due to the noise added. It is used to determine the threshold for a bin to be
    * considered non-empty.
    *
    * <p>Setting the failure probability too low will result in higher thresholds and, consequently,
    * in dropping bins with too few entries or failing to find bounds at all. If this happens, we
    * repeatedly retry with a larger failure probability until the failure probability reaches {@link
    * ApproximateBounds.MAX_FAILURE_PROBABILITY}.
    */
   private static final double INITIAL_FAILURE_PROBABILITY = 1e-9;

   /** The factor by which the failure probability is increased when bounding fails. */
   private static final double FAILURE_PROBABILITY_INCREMENT_FACTOR = 2;

   /**
    * The maximum acceptable failure probability.
    *
    * <p>When increasing the failure probability in order to try to find bounds for a small sample,
    * if the failure probability increases above this value then we return an error: the bounds we
    * would get would too often be influenced by noise rather than the sample.
    */
   private static final double MAX_FAILURE_PROBABILITY = 1e-6;

   private final long[] positiveBins;
   private final long[] negativeBins;
   private final double[] rightBinBoundaries;

   private final Random random = new SecureRandom();

   private AggregationState aggregationState = AggregationState.DEFAULT;

   private ApproximateBounds(Params params) {
     this.params = params;

     positiveBins = new long[params.inputType().numPositiveBins];
     negativeBins = new long[params.inputType().numNegativeBins];

     // Cache bin boundaries for performance reasons.
     rightBinBoundaries = params.inputType().generateRightBinBoundaries();
   }

   public static Params.Builder builder() {
     return Params.Builder.newBuilder();
   }

   /** Adds a value to the approximate bounds calculation. */
   public void addEntry(double value) {
     checkState(
         aggregationState.equals(AggregationState.DEFAULT), aggregationState.getErrorMessage());

     // NaN is ignored because we cannot run logic on NaNs.
     if (Double.isNaN(value)) {
       return;
     }

     // If our histogram has no negative bins, clamp negative values to the 0th bin
     if (value < 0 && !params.inputType().hasNegativeBins) {
       positiveBins[0]++;
       return;
     }

     // If our histogram has negative bins, then they are symmetric with the positive ones.
     int binIndex = params.inputType().getPositiveBinNumber(abs(value));
     if (value >= 0) {
       positiveBins[binIndex]++;
     } else {
       negativeBins[binIndex]++;
     }
   }

   /** Adds a set of value to the approximate bounds calculation. */
   public void addEntries(Collection<Double> e) {
     e.forEach(this::addEntry);
   }

   public Result computeResult() {
     checkState(
         aggregationState.equals(AggregationState.DEFAULT), aggregationState.getErrorMessage());
     aggregationState = AggregationState.RESULT_RETURNED;

     double[] noisyPositiveBins = addNoise(positiveBins);
     double[] noisyNegativeBins = addNoise(negativeBins);

     double currentFailureProbability = INITIAL_FAILURE_PROBABILITY;
     while (currentFailureProbability <= MAX_FAILURE_PROBABILITY) {
       double perBinFailureProbability =
           computePerBinFailureProbability(
               currentFailureProbability, params.inputType().totalBins());

       double threshold =
           LaplaceNoise.computeQuantile(
               1 - perBinFailureProbability,
               /* x= */ 0,
               /* l1Sensitivity= */ params.maxContributions(),
               params.epsilon());

       Optional<Double> minimum = findMinimum(threshold, noisyNegativeBins, noisyPositiveBins);
       Optional<Double> maximum = findMaximum(threshold, noisyNegativeBins, noisyPositiveBins);

       if (minimum.isPresent() && maximum.isPresent()) {
         return Result.create(minimum.get(), maximum.get());
       }

       currentFailureProbability = FAILURE_PROBABILITY_INCREMENT_FACTOR * currentFailureProbability;
     }

     throw new IllegalArgumentException(
         "Bin count threshold was too large to find approximate bounds. Either run over a larger "
             + "dataset or decrease success_probability and try again.");
   }

   /**
    * Converts an overall failure probability into the corresponding per-bin failure probability.
    *
    * <p>If we were dealing with a "success probability" (i.e. 1 - failure probability) and had N
    * bins, then the per-bin success probability is the N^th root of the overall success probability.
    * Translated into failure probabilities: perBinFP = 1 - pow(1 - FP, 1/N)
    *
    * <p>Since in general the failure probability is very small, we use expm1 and log1p to calculate
    * this:
    *
    * <pre>{@code
    * perBinFP = 1 - exp(log(pow(1 - FP, 1/N)))
    *          = 1 - exp(log(1 - FP) / N)
    *          = -expm1(log1p(-FP) / N)
    * }</pre>
    */
   @VisibleForTesting
   static double computePerBinFailureProbability(double overallFailureProbability, int numBins) {
     return -expm1(log1p(-overallFailureProbability) / numBins);
   }

   /**
    * Finds the minimum by looking for bins with sufficient amount of entries in the noisy
    * histograms.
    */
   private Optional<Double> findMinimum(
       double threshold, double[] noisyNegativeBins, double[] noisyPositiveBins) {
     for (int i = noisyNegativeBins.length - 1; i >= 0; --i) {
       if (noisyNegativeBins[i] >= threshold) {
         return Optional.of(getNegativeLeftBinBoundary(i));
       }
     }

     for (int i = 0; i < noisyPositiveBins.length; i++) {
       if (noisyPositiveBins[i] >= threshold) {
         return Optional.of(getPositiveLeftBinBoundary(i));
       }
     }

     return Optional.empty();
   }

   /**
    * Finds the maximum by looking for bins with sufficient amount of entries in the noisy
    * histograms.
    */
   private Optional<Double> findMaximum(
       double threshold, double[] noisyNegativeBins, double[] noisyPositiveBins) {
     for (int i = noisyPositiveBins.length - 1; i >= 0; --i) {
       if (noisyPositiveBins[i] >= threshold) {
         return Optional.of(getPositiveRightBinBoundary(i));
       }
     }

     for (int i = 0; i < noisyNegativeBins.length; i++) {
       if (noisyNegativeBins[i] >= threshold) {
         return Optional.of(getNegativeRightBinBoundary(i));
       }
     }

     return Optional.empty();
   }

   private double getNegativeLeftBinBoundary(int bin) {
     return -1.0 * getPositiveRightBinBoundary(bin);
   }

   private double getNegativeRightBinBoundary(int bin) {
     return -getPositiveLeftBinBoundary(bin);
   }

   private double getPositiveLeftBinBoundary(int bin) {
     if (bin == 0) {
       return 0;
     }
     return getPositiveRightBinBoundary(bin - 1);
   }

   private double getPositiveRightBinBoundary(int bin) {
     return rightBinBoundaries[bin];
   }

   private double[] addNoise(long[] bins) {
     double[] noisyBins = new double[bins.length];
     for (int i = 0; i < bins.length; i++) {
       // Since we never release the raw noised values from this class, we don't need to defend
       // against attacks against the least significant bits of the noise (see "On Significance of
       // the Least Significant Bits For Differential Privacy" by Ilya Mironov,
       // https://www.microsoft.com/en-us/research/wp-content/uploads/2012/10/lsbs.pdf). Therefore we
       // can use a faster randomness generation method.
       noisyBins[i] =
           LaplaceNoise.computeQuantile(
               /* rank= */ random.nextDouble(),
               /* x= */ (double) bins[i],
               /* l1Sensitivity= */ params.maxContributions(),
               params.epsilon());
     }

     return noisyBins;
   }

   /**
    * Returns a serializable summary of the current state of this {@link ApproximateBounds} instance.
    * The summary can be used to merge this instance with another instance of {@link
    * ApproximateBounds}.
    *
    * <p>This method cannot be invoked if the result has already been computed using {@link
    * computeResult()}. Moreover, after this instance of {@link ApproximateBounds} has been
    * serialized once, further modification and queries are not possible anymore.
    *
    * @throws IllegalStateException if this instance of {@link ApproximateBounds} has already been
    *     queried.
    */
   public byte[] getSerializableSummary() {
     checkState(
         aggregationState != AggregationState.RESULT_RETURNED, aggregationState.getErrorMessage());
     aggregationState = AggregationState.SERIALIZED;

     return ApproxBoundsSummary.newBuilder()
         .addAllPosBinCount(Longs.asList(positiveBins))
         .addAllNegBinCount(Longs.asList(negativeBins))
         .build()
         .toByteArray();
   }

   /**
    * Merges the output of {@link #getSerializableSummary()} from a different instance of {@link
    * ApproximateBounds} with this instance. Intended to be used in the context of distributed
    * computation.
    *
    * @throws IllegalArgumentException if the passed serialized summary is invalid or has a different
    *     number of bins to this instance.
    * @throws IllegalStateException if this instance of {@link ApproximateBounds} has already been
    *     queried or serialized.
    */
   public void mergeWith(byte[] otherSummary) {
     checkState(
         aggregationState.equals(AggregationState.DEFAULT), aggregationState.getErrorMessage());
     ApproxBoundsSummary otherSummaryParsed;
     try {
       otherSummaryParsed =
           ApproxBoundsSummary.parseFrom(otherSummary, ExtensionRegistry.newInstance());
     } catch (InvalidProtocolBufferException e) {
       throw new IllegalArgumentException(e);
     }

     checkArgument(
         otherSummaryParsed.getPosBinCountCount() == positiveBins.length,
         "Both histograms must have the same number of positive bins (merge target has %s, "
             + "merge source has %s).",
         positiveBins.length,
         otherSummaryParsed.getPosBinCountCount());
     checkArgument(
         otherSummaryParsed.getNegBinCountCount() == negativeBins.length,
         "Both histograms must have the same number of negative bins (merge target has %s, "
             + "merge source has %s).",
         negativeBins.length,
         otherSummaryParsed.getNegBinCountCount());
     for (int i = 0; i < otherSummaryParsed.getPosBinCountCount(); i++) {
       positiveBins[i] += otherSummaryParsed.getPosBinCount(i);
     }
     for (int i = 0; i < otherSummaryParsed.getNegBinCountCount(); i++) {
       negativeBins[i] += otherSummaryParsed.getNegBinCount(i);
     }
   }

   /**
    * Parameters for calculation of differentially-private approximate bounds.
    *
    * <p>See the {@link Builder} for the explanation of each parameter.
    */
   @AutoValue
   public abstract static class Params {
     abstract double epsilon();

     abstract InputType inputType();

     abstract Integer maxContributions();

     public abstract Builder toBuilder();

     @AutoValue.Builder
     public abstract static class Builder {
       // Approximate bounds supports only Laplace noise due to the way sensitivities
       // are calculated in the internal logarithmic histogram. Therefore, the builder doesn't
       // accept noise or delta parameters.

       /** Epsilon to be used for the approximate bounds calculation. */
       public abstract Builder epsilon(double value);

       /**
        * Type of the inputs to {@link ApproximateBounds}.
        *
        * <p>This parameter is used to optimize the bounding algorithm based on expected properties
        * of the input values, e.g., the values have the range and granularity of a certain numerical
        * type such as double, long, etc. For example, when the input type is {@code
        * InputType.DOUBLE}, we include both positive and negative bins in the histogram, as well as
        * bins between 0 and 1. Note that all values passed to {@link ApproximateBounds} are doubles;
        * this parameter only specifies how the algorithm processes the values.
        */
       public abstract Builder inputType(InputType inputType);

       /** The maximum number of contributions each privacy unit can make to the dataset. */
       public abstract Builder maxContributions(Integer value);

       abstract Params autoBuild();

       private static Builder newBuilder() {
         Params.Builder builder = new AutoValue_ApproximateBounds_Params.Builder();
         builder.inputType(InputType.DOUBLE);
         return builder;
       }

       public ApproximateBounds build() {
         Params params = autoBuild();
         DpPreconditions.checkEpsilon(params.epsilon());
         return new ApproximateBounds(params);
       }
     }

     /**
      * The type of input that this algorithm will be provided with.
      *
      * <p>This determines the size and number of bins, as well as whether we should include negative
      * bins and bins between 0 and 1.
      *
      * <p>In general, bin i contains the number of inputs that lie in the range (scale * base^(i-1),
      * scale * base^i], where scale and base are determined by the InputType. The exception is
      * positive bin 0, which has boundaries [0, scale * base^0]. Negative bin 0 does not contain 0.
      * Values outside of the range covered by bins are clipped to the closest bin boundary.
      *
      * <p>For example, if scale = 1, base = 2, and num_bins = 4 then the positive histogram bins are
      * [0, 1], (1, 2], (2, 4], and (4, 8].
      */
     public enum InputType {
       // For doubles, we include both positive and negative bins, with successive bins doubling in
       // size. These parameters result in a rightmost bin boundary equal to 2^1023. Since
       // Double.MAX_VALUE is slightly less than 2^1024, values close to ±Double.MAX_VALUE will get
       // clamped. Sample bin boundaries:
       // ..., [-2, -1), [-1, -0.5), ..., (0.5, 1], (1, 2], ...
       DOUBLE(2046, /*hasNegativeBins=*/ true, Double.MIN_NORMAL),
       // For integers, the bins are the same as for doubles except we don't subdivide the bins
       // [-1, 0) and [0, 1]. These parameters result in a rightmost bin boundary equal to 2^31.
       // Note that this is greater than Integer.MAX_VALUE, so Integer.MAX_VALUE lies in the
       // interior of the rightmost bin. Sample bin boundaries:
       // ..., [-2, -1), [-1, 0), [0, 1], (1, 2], ...
       INTEGER(32, /*hasNegativeBins=*/ true, 1),
       // For positive integers, the bins are the same as for integers except we don't include
       // negative bins. Sample bin boundaries:
       // [0, 1], (1, 2], ...
       POSITIVE_INTEGER(32, /*hasNegativeBins=*/ false, 1),
       // In tests, we use a reduced number of bins for performance reasons and to keep assertions
       // simple. We only use 10 bins, with successive bins doubling in size. Sample bin boundaries:
       // [-16, -8), [-8, -4) ..., [0, 1], (1, 2], ..., (8, 16]
       TEST(5, /*hasNegativeBins=*/ true, 1);

       @VisibleForTesting public final int numPositiveBins;
       @VisibleForTesting public final int numNegativeBins;
       @VisibleForTesting public final boolean hasNegativeBins;
       @VisibleForTesting public final double scale;
       @VisibleForTesting public final double base = 2.0;

       private InputType(int numPositiveBins, boolean hasNegativeBins, double scale) {
         this.numPositiveBins = numPositiveBins;
         this.numNegativeBins = hasNegativeBins ? numPositiveBins : 0;
         this.hasNegativeBins = hasNegativeBins;
         this.scale = scale;
       }

       int totalBins() {
         return this.numPositiveBins + this.numNegativeBins;
       }

       double[] generateRightBinBoundaries() {
         double[] rightBinBoundaries = new double[numPositiveBins];
         double current = scale;
         for (int i = 0; i < numPositiveBins; i++) {
           rightBinBoundaries[i] = current;
           // The above values for numPositiveBins are chosen so that they do not cause an overflow
           // here.
           current *= base;
         }

         return rightBinBoundaries;
       }

       /** Returns the bin number where the provided positive value should be placed. */
       int getPositiveBinNumber(double value) {
         if (value == 0) {
           // Zero is special since log(0) is undefined
           return 0;
         }
         if (value < 0) {
           throw new IllegalArgumentException("Expected a positive input but got " + value);
         }

         // Calculate the most significant bit and clamp to a valid bin index.
         // x satisfies scale * base^x = abs(value)
         double x = (log(abs(value)) - log(scale)) / log(base);
         int msb = (int) ceil(x);
         return max(0, min(msb, numPositiveBins - 1));
       }
     }
   }

   /** A result of approximate bounds calculation. */
   @AutoValue
   public abstract static class Result {

     /** Lower approximate bound. */
     public abstract double lowerBound();

     /** Upper approximate bound. */
     public abstract double upperBound();

     public static Result create(double lower, double upper) {
       return new AutoValue_ApproximateBounds_Result(lower, upper);
     }
   }
 }
	//
	// Copyright 2020 Google LLC
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	//

	package com.google.privacy.differentialprivacy;

	import static com.google.common.base.Preconditions.checkArgument;
	import static com.google.common.base.Preconditions.checkState;
	import static java.lang.Math.abs;
	import static java.lang.Math.ceil;
	import static java.lang.Math.expm1;
	import static java.lang.Math.log;
	import static java.lang.Math.log1p;
	import static java.lang.Math.max;
	import static java.lang.Math.min;

	import com.google.auto.value.AutoValue;
	import com.google.common.annotations.VisibleForTesting;
	import com.google.common.primitives.Longs;
	import com.google.privacy.differentialprivacy.proto.SummaryOuterClass.ApproxBoundsSummary;
	import com.google.protobuf.ExtensionRegistry;
	import com.google.protobuf.InvalidProtocolBufferException;
	import java.security.SecureRandom;
	import java.util.Collection;
	import java.util.Optional;
	import java.util.Random;

	/**
	* Finds the approximate bounds of a set of numbers.
	*
	* <p>The algorithm uses two logarithmic histograms, one for positive values and one for negative
	* values. Histogram bin counts are thresholded to remove bins with too few counts. The threshold is
	* derived from the "failure probability", which is an upper bound on the probability of returning
	* bounds that are too loose.
	*
	* <p>We choose the rightmost bin that exceeds the threshold count and return its greater boundary
	* as the approximate upper bound. Similarly the leftmost bin that exceeds the threshold count is
	* chosen and its smaller boundary is returned as the approximate lower bound. If the failure
	* probability is too low, then it is possible that no bin exceeds the threshold, in which case we
	* increase the failure probability and try again. When the failure probability exceeds a fixed
	* value, we throw an exception rather than risk returning bounds that are primarily determined by
	* the noise. If this happens, try providing a larger sample to ApproximateBounds.
	*
	* <p>For general details and key definitions, see <a href=
	* "https://github.com/google/differential-privacy/blob/main/differential_privacy.md#key-definitions">
	* the introduction to Differential Privacy</a>.
	*/
	public class ApproximateBounds {
	@VisibleForTesting final Params params;

	/**
	* The failure probability is an upper bound on the probability of returning bounds that are too
	* loose.
	*
	* <p>More precisely, it is the probability that we (incorrectly) return bounds for an empty
	* dataset due to the noise added. It is used to determine the threshold for a bin to be
	* considered non-empty.
	*
	* <p>Setting the failure probability too low will result in higher thresholds and, consequently,
	* in dropping bins with too few entries or failing to find bounds at all. If this happens, we
	* repeatedly retry with a larger failure probability until the failure probability reaches {@link
	* ApproximateBounds.MAX_FAILURE_PROBABILITY}.
	*/
	private static final double INITIAL_FAILURE_PROBABILITY = 1e-9;

	/** The factor by which the failure probability is increased when bounding fails. */
	private static final double FAILURE_PROBABILITY_INCREMENT_FACTOR = 2;

	/**
	* The maximum acceptable failure probability.
	*
	* <p>When increasing the failure probability in order to try to find bounds for a small sample,
	* if the failure probability increases above this value then we return an error: the bounds we
	* would get would too often be influenced by noise rather than the sample.
	*/
	private static final double MAX_FAILURE_PROBABILITY = 1e-6;

	private final long[] positiveBins;
	private final long[] negativeBins;
	private final double[] rightBinBoundaries;

	private final Random random = new SecureRandom();

	private AggregationState aggregationState = AggregationState.DEFAULT;

	private ApproximateBounds(Params params) {
	this.params = params;

	positiveBins = new long[params.inputType().numPositiveBins];
	negativeBins = new long[params.inputType().numNegativeBins];

	// Cache bin boundaries for performance reasons.
	rightBinBoundaries = params.inputType().generateRightBinBoundaries();
	}

	public static Params.Builder builder() {
	return Params.Builder.newBuilder();
	}

	/** Adds a value to the approximate bounds calculation. */
	public void addEntry(double value) {
	checkState(
	aggregationState.equals(AggregationState.DEFAULT), aggregationState.getErrorMessage());

	// NaN is ignored because we cannot run logic on NaNs.
	if (Double.isNaN(value)) {
	return;
	}

	// If our histogram has no negative bins, clamp negative values to the 0th bin
	if (value < 0 && !params.inputType().hasNegativeBins) {
	positiveBins[0]++;
	return;
	}

	// If our histogram has negative bins, then they are symmetric with the positive ones.
	int binIndex = params.inputType().getPositiveBinNumber(abs(value));
	if (value >= 0) {
	positiveBins[binIndex]++;
	} else {
	negativeBins[binIndex]++;
	}
	}

	/** Adds a set of value to the approximate bounds calculation. */
	public void addEntries(Collection<Double> e) {
	e.forEach(this::addEntry);
	}

	public Result computeResult() {
	checkState(
	aggregationState.equals(AggregationState.DEFAULT), aggregationState.getErrorMessage());
	aggregationState = AggregationState.RESULT_RETURNED;

	double[] noisyPositiveBins = addNoise(positiveBins);
	double[] noisyNegativeBins = addNoise(negativeBins);

	double currentFailureProbability = INITIAL_FAILURE_PROBABILITY;
	while (currentFailureProbability <= MAX_FAILURE_PROBABILITY) {
	double perBinFailureProbability =
	computePerBinFailureProbability(
	currentFailureProbability, params.inputType().totalBins());

	double threshold =
	LaplaceNoise.computeQuantile(
	1 - perBinFailureProbability,
	/* x= */ 0,
	/* l1Sensitivity= */ params.maxContributions(),
	params.epsilon());

	Optional<Double> minimum = findMinimum(threshold, noisyNegativeBins, noisyPositiveBins);
	Optional<Double> maximum = findMaximum(threshold, noisyNegativeBins, noisyPositiveBins);

	if (minimum.isPresent() && maximum.isPresent()) {
	return Result.create(minimum.get(), maximum.get());
	}

	currentFailureProbability = FAILURE_PROBABILITY_INCREMENT_FACTOR * currentFailureProbability;
	}

	throw new IllegalArgumentException(
	"Bin count threshold was too large to find approximate bounds. Either run over a larger "
	+ "dataset or decrease success_probability and try again.");
	}

	/**
	* Converts an overall failure probability into the corresponding per-bin failure probability.
	*
	* <p>If we were dealing with a "success probability" (i.e. 1 - failure probability) and had N
	* bins, then the per-bin success probability is the N^th root of the overall success probability.
	* Translated into failure probabilities: perBinFP = 1 - pow(1 - FP, 1/N)
	*
	* <p>Since in general the failure probability is very small, we use expm1 and log1p to calculate
	* this:
	*
	* <pre>{@code
	* perBinFP = 1 - exp(log(pow(1 - FP, 1/N)))
	* = 1 - exp(log(1 - FP) / N)
	* = -expm1(log1p(-FP) / N)
	* }</pre>
	*/
	@VisibleForTesting
	static double computePerBinFailureProbability(double overallFailureProbability, int numBins) {
	return -expm1(log1p(-overallFailureProbability) / numBins);
	}

	/**
	* Finds the minimum by looking for bins with sufficient amount of entries in the noisy
	* histograms.
	*/
	private Optional<Double> findMinimum(
	double threshold, double[] noisyNegativeBins, double[] noisyPositiveBins) {
	for (int i = noisyNegativeBins.length - 1; i >= 0; --i) {
	if (noisyNegativeBins[i] >= threshold) {
	return Optional.of(getNegativeLeftBinBoundary(i));
	}
	}

	for (int i = 0; i < noisyPositiveBins.length; i++) {
	if (noisyPositiveBins[i] >= threshold) {
	return Optional.of(getPositiveLeftBinBoundary(i));
	}
	}

	return Optional.empty();
	}

	/**
	* Finds the maximum by looking for bins with sufficient amount of entries in the noisy
	* histograms.
	*/
	private Optional<Double> findMaximum(
	double threshold, double[] noisyNegativeBins, double[] noisyPositiveBins) {
	for (int i = noisyPositiveBins.length - 1; i >= 0; --i) {
	if (noisyPositiveBins[i] >= threshold) {
	return Optional.of(getPositiveRightBinBoundary(i));
	}
	}

	for (int i = 0; i < noisyNegativeBins.length; i++) {
	if (noisyNegativeBins[i] >= threshold) {
	return Optional.of(getNegativeRightBinBoundary(i));
	}
	}

	return Optional.empty();
	}

	private double getNegativeLeftBinBoundary(int bin) {
	return -1.0 * getPositiveRightBinBoundary(bin);
	}

	private double getNegativeRightBinBoundary(int bin) {
	return -getPositiveLeftBinBoundary(bin);
	}

	private double getPositiveLeftBinBoundary(int bin) {
	if (bin == 0) {
	return 0;
	}
	return getPositiveRightBinBoundary(bin - 1);
	}

	private double getPositiveRightBinBoundary(int bin) {
	return rightBinBoundaries[bin];
	}

	private double[] addNoise(long[] bins) {
	double[] noisyBins = new double[bins.length];
	for (int i = 0; i < bins.length; i++) {
	// Since we never release the raw noised values from this class, we don't need to defend
	// against attacks against the least significant bits of the noise (see "On Significance of
	// the Least Significant Bits For Differential Privacy" by Ilya Mironov,
	// https://www.microsoft.com/en-us/research/wp-content/uploads/2012/10/lsbs.pdf). Therefore we
	// can use a faster randomness generation method.
	noisyBins[i] =
	LaplaceNoise.computeQuantile(
	/* rank= */ random.nextDouble(),
	/* x= */ (double) bins[i],
	/* l1Sensitivity= */ params.maxContributions(),
	params.epsilon());
	}

	return noisyBins;
	}

	/**
	* Returns a serializable summary of the current state of this {@link ApproximateBounds} instance.
	* The summary can be used to merge this instance with another instance of {@link
	* ApproximateBounds}.
	*
	* <p>This method cannot be invoked if the result has already been computed using {@link
	* computeResult()}. Moreover, after this instance of {@link ApproximateBounds} has been
	* serialized once, further modification and queries are not possible anymore.
	*
	* @throws IllegalStateException if this instance of {@link ApproximateBounds} has already been
	* queried.
	*/
	public byte[] getSerializableSummary() {
	checkState(
	aggregationState != AggregationState.RESULT_RETURNED, aggregationState.getErrorMessage());
	aggregationState = AggregationState.SERIALIZED;

	return ApproxBoundsSummary.newBuilder()
	.addAllPosBinCount(Longs.asList(positiveBins))
	.addAllNegBinCount(Longs.asList(negativeBins))
	.build()
	.toByteArray();
	}

	/**
	* Merges the output of {@link #getSerializableSummary()} from a different instance of {@link
	* ApproximateBounds} with this instance. Intended to be used in the context of distributed
	* computation.
	*
	* @throws IllegalArgumentException if the passed serialized summary is invalid or has a different
	* number of bins to this instance.
	* @throws IllegalStateException if this instance of {@link ApproximateBounds} has already been
	* queried or serialized.
	*/
	public void mergeWith(byte[] otherSummary) {
	checkState(
	aggregationState.equals(AggregationState.DEFAULT), aggregationState.getErrorMessage());
	ApproxBoundsSummary otherSummaryParsed;
	try {
	otherSummaryParsed =
	ApproxBoundsSummary.parseFrom(otherSummary, ExtensionRegistry.newInstance());
	} catch (InvalidProtocolBufferException e) {
	throw new IllegalArgumentException(e);
	}

	checkArgument(
	otherSummaryParsed.getPosBinCountCount() == positiveBins.length,
	"Both histograms must have the same number of positive bins (merge target has %s, "
	+ "merge source has %s).",
	positiveBins.length,
	otherSummaryParsed.getPosBinCountCount());
	checkArgument(
	otherSummaryParsed.getNegBinCountCount() == negativeBins.length,
	"Both histograms must have the same number of negative bins (merge target has %s, "
	+ "merge source has %s).",
	negativeBins.length,
	otherSummaryParsed.getNegBinCountCount());
	for (int i = 0; i < otherSummaryParsed.getPosBinCountCount(); i++) {
	positiveBins[i] += otherSummaryParsed.getPosBinCount(i);
	}
	for (int i = 0; i < otherSummaryParsed.getNegBinCountCount(); i++) {
	negativeBins[i] += otherSummaryParsed.getNegBinCount(i);
	}
	}

	/**
	* Parameters for calculation of differentially-private approximate bounds.
	*
	* <p>See the {@link Builder} for the explanation of each parameter.
	*/
	@AutoValue
	public abstract static class Params {
	abstract double epsilon();

	abstract InputType inputType();

	abstract Integer maxContributions();

	public abstract Builder toBuilder();

	@AutoValue.Builder
	public abstract static class Builder {
	// Approximate bounds supports only Laplace noise due to the way sensitivities
	// are calculated in the internal logarithmic histogram. Therefore, the builder doesn't
	// accept noise or delta parameters.

	/** Epsilon to be used for the approximate bounds calculation. */
	public abstract Builder epsilon(double value);

	/**
	* Type of the inputs to {@link ApproximateBounds}.
	*
	* <p>This parameter is used to optimize the bounding algorithm based on expected properties
	* of the input values, e.g., the values have the range and granularity of a certain numerical
	* type such as double, long, etc. For example, when the input type is {@code
	* InputType.DOUBLE}, we include both positive and negative bins in the histogram, as well as
	* bins between 0 and 1. Note that all values passed to {@link ApproximateBounds} are doubles;
	* this parameter only specifies how the algorithm processes the values.
	*/
	public abstract Builder inputType(InputType inputType);

	/** The maximum number of contributions each privacy unit can make to the dataset. */
	public abstract Builder maxContributions(Integer value);

	abstract Params autoBuild();

	private static Builder newBuilder() {
	Params.Builder builder = new AutoValue_ApproximateBounds_Params.Builder();
	builder.inputType(InputType.DOUBLE);
	return builder;
	}

	public ApproximateBounds build() {
	Params params = autoBuild();
	DpPreconditions.checkEpsilon(params.epsilon());
	return new ApproximateBounds(params);
	}
	}

	/**
	* The type of input that this algorithm will be provided with.
	*
	* <p>This determines the size and number of bins, as well as whether we should include negative
	* bins and bins between 0 and 1.
	*
	* <p>In general, bin i contains the number of inputs that lie in the range (scale * base^(i-1),
	* scale * base^i], where scale and base are determined by the InputType. The exception is
	* positive bin 0, which has boundaries [0, scale * base^0]. Negative bin 0 does not contain 0.
	* Values outside of the range covered by bins are clipped to the closest bin boundary.
	*
	* <p>For example, if scale = 1, base = 2, and num_bins = 4 then the positive histogram bins are
	* [0, 1], (1, 2], (2, 4], and (4, 8].
	*/
	public enum InputType {
	// For doubles, we include both positive and negative bins, with successive bins doubling in
	// size. These parameters result in a rightmost bin boundary equal to 2^1023. Since
	// Double.MAX_VALUE is slightly less than 2^1024, values close to ±Double.MAX_VALUE will get
	// clamped. Sample bin boundaries:
	// ..., [-2, -1), [-1, -0.5), ..., (0.5, 1], (1, 2], ...
	DOUBLE(2046, /hasNegativeBins=/ true, Double.MIN_NORMAL),
	// For integers, the bins are the same as for doubles except we don't subdivide the bins
	// [-1, 0) and [0, 1]. These parameters result in a rightmost bin boundary equal to 2^31.
	// Note that this is greater than Integer.MAX_VALUE, so Integer.MAX_VALUE lies in the
	// interior of the rightmost bin. Sample bin boundaries:
	// ..., [-2, -1), [-1, 0), [0, 1], (1, 2], ...
	INTEGER(32, /hasNegativeBins=/ true, 1),
	// For positive integers, the bins are the same as for integers except we don't include
	// negative bins. Sample bin boundaries:
	// [0, 1], (1, 2], ...
	POSITIVE_INTEGER(32, /hasNegativeBins=/ false, 1),
	// In tests, we use a reduced number of bins for performance reasons and to keep assertions
	// simple. We only use 10 bins, with successive bins doubling in size. Sample bin boundaries:
	// [-16, -8), [-8, -4) ..., [0, 1], (1, 2], ..., (8, 16]
	TEST(5, /hasNegativeBins=/ true, 1);

	@VisibleForTesting public final int numPositiveBins;
	@VisibleForTesting public final int numNegativeBins;
	@VisibleForTesting public final boolean hasNegativeBins;
	@VisibleForTesting public final double scale;
	@VisibleForTesting public final double base = 2.0;

	private InputType(int numPositiveBins, boolean hasNegativeBins, double scale) {
	this.numPositiveBins = numPositiveBins;
	this.numNegativeBins = hasNegativeBins ? numPositiveBins : 0;
	this.hasNegativeBins = hasNegativeBins;
	this.scale = scale;
	}

	int totalBins() {
	return this.numPositiveBins + this.numNegativeBins;
	}

	double[] generateRightBinBoundaries() {
	double[] rightBinBoundaries = new double[numPositiveBins];
	double current = scale;
	for (int i = 0; i < numPositiveBins; i++) {
	rightBinBoundaries[i] = current;
	// The above values for numPositiveBins are chosen so that they do not cause an overflow
	// here.
	current *= base;
	}

	return rightBinBoundaries;
	}

	/** Returns the bin number where the provided positive value should be placed. */
	int getPositiveBinNumber(double value) {
	if (value == 0) {
	// Zero is special since log(0) is undefined
	return 0;
	}
	if (value < 0) {
	throw new IllegalArgumentException("Expected a positive input but got " + value);
	}

	// Calculate the most significant bit and clamp to a valid bin index.
	// x satisfies scale * base^x = abs(value)
	double x = (log(abs(value)) - log(scale)) / log(base);
	int msb = (int) ceil(x);
	return max(0, min(msb, numPositiveBins - 1));
	}
	}
	}

	/** A result of approximate bounds calculation. */
	@AutoValue
	public abstract static class Result {

	/** Lower approximate bound. */
	public abstract double lowerBound();

	/** Upper approximate bound. */
	public abstract double upperBound();

	public static Result create(double lower, double upper) {
	return new AutoValue_ApproximateBounds_Result(lower, upper);
	}
	}
	}