blob: 8e2546fad3c525ed2d9e0a41115ddf243bead4a7 [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COBALT_SRC_ALGORITHMS_RAPPOR_BASIC_RAPPOR_ANALYZER_H_
#define COBALT_SRC_ALGORITHMS_RAPPOR_BASIC_RAPPOR_ANALYZER_H_
#include <memory>
#include <vector>
#include "src/algorithms/rappor/rappor_config_validator.h"
namespace cobalt {
namespace rappor {
class BasicRapporAnalyzer {
public:
// Constructs a BasicRapporAnalyzer for the given config. All of the
// observations added via AddObservation() must have been encoded using this
// config. If the config is not valid then all calls to AddObservation()
// will return false.
// TODO(rudominer) Enhance this API to also accept DP release parameters.
explicit BasicRapporAnalyzer(const BasicRapporConfig& config);
// Adds an additional observation to be analyzed. The observation must have
// been encoded using the BasicRapporConfig passed to the constructor.
//
// Returns true to indicate the observation was added without error and
// so num_observations() was incremented or false to indicate there was
// an error and so observation_errors() was incremented.
bool AddObservation(const BasicRapporObservation& obs);
// The number of times that AddObservation() was invoked minus the value
// of observation_errors().
[[nodiscard]] size_t num_observations() const { return num_observations_; }
// The number of times that AddObservation() was invoked and the observation
// was discarded due to an error. If this number is not zero it indicates
// that the Analyzer received data that was not created by a legitimate
// Cobalt client. See the error logs for details of the errors.
[[nodiscard]] size_t observation_errors() const { return observation_errors_; }
// The number of categories being analyzed.
[[nodiscard]] size_t num_categories() const { return category_counts_.size(); }
struct CategoryResult {
ValuePart category;
// An unbiased estimate of the true count for this category. Note that
// in order to maintain unbiasedness we allow count_estimate to be
// greater than num_observations() or less than zero. One may wish to
// clip to [0, num_observations()] before displaying to a user.
double count_estimate;
// Multiply this value by z_{alpha/2} to obtain the radius of an approximate
// 100(1 - alpha)% confidence interval. For example an approximate 95%
// confidence interval for the count is given by
// (count_estimate - 1.96*std_error, count_estimate + 1.96 * std_error)
// because 1.96 ~= z_{.025} meaning that P(Z > 1.96) ~= 0.025 where
// Z ~ Normal(0, 1).
double std_error;
};
// Performs Basic RAPPOR analysis on the observations added via
// AddObservation() and returns the results. The returned vector of
// CategoryResults will have length equal to the number of categories
// and will be in the category order specified in the config.
std::vector<CategoryResult> Analyze();
private:
friend class BasicRapporAnalyzerTest;
// Gives access to the raw counts for each category based on the observations
// added via AddObservation(). This is mostly useful for tests.
const std::vector<size_t>& raw_category_counts() { return category_counts_; }
std::unique_ptr<RapporConfigValidator> config_;
size_t num_observations_ = 0;
size_t observation_errors_ = 0;
// The raw counts for each category based on the observations added
// via AddObservation().
std::vector<size_t> category_counts_;
// The number of bytes used to encode observations. This is a function
// of the |config_|.
size_t num_encoding_bytes_;
};
} // namespace rappor
} // namespace cobalt
#endif // COBALT_SRC_ALGORITHMS_RAPPOR_BASIC_RAPPOR_ANALYZER_H_