blob: 2e3b2f2d502636c53a114cbd5cd8b8500ff18e50 [file] [log] [blame]
// Copyright 2017 The Fuchsia Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef COBALT_ALGORITHMS_RAPPOR_RAPPOR_ANALYZER_H_
#define COBALT_ALGORITHMS_RAPPOR_RAPPOR_ANALYZER_H_
#include <memory>
#include <string>
#include <vector>
#include "./observation.pb.h"
#include "algorithms/rappor/bloom_bit_counter.h"
#include "algorithms/rappor/rappor_config_validator.h"
#include "config/encodings.pb.h"
#include "config/report_configs.pb.h"
#include "grpc++/grpc++.h"
namespace cobalt {
namespace rappor {
// A string RAPPOR analysis result for a single candidate. The method
// RapporAnalyzer::Analyze() populates a vector of CandidateResults, one for
// each candidate.
struct CandidateResult {
double count_estimate;
double std_error;
};
// A RapporAnalyzer is constructed for the purpose of performing a single
// string RAPPOR analysis.
//
// (1) Construct a RapporAnalyzer passing in a RapporConfig and a
// RapporCandidateList.
//
// (2) Repeatedly invoke AddObservation() to add the set of observations to
// be analyzed. The observations must all be for the same metric part and
// must have been encoded using the same encoding configuration. More
// precisely this means they must be associated with the same customer_id,
// project_id, metric_id, encoding_config_id and metric_part_name.
//
// (3) Invoke Analyze() to perform the string RAPPOR analysis and obtain the
// results.
//
// (4) Optionally examine the underlying BloomBitCounter via the bit_counter()
// accessor.
class RapporAnalyzer {
public:
// Constructs a RapporAnalyzer for the given config and candidates. All of the
// observations added via AddObservation() must have been encoded using this
// config. If the config is not valid then all calls to AddObservation()
// will return false.
// TODO(rudominer) Enhance this API to also accept DP release parameters.
explicit RapporAnalyzer(const RapporConfig& config,
const RapporCandidateList* candidates);
// Adds an additional observation to be analyzed. The observation must have
// been encoded using the RapporConfig passed to the constructor.
//
// Returns true to indicate the observation was added without error.
bool AddObservation(const RapporObservation& obs);
// Performs the string RAPPOR analysis and writes the results to
// |results_out|. Return OK for success or an error status.
//
// |results_out| will initially be cleared and, upon success of the alogrithm,
// will contain a vector of size candidates->size() where |candidates| is the
// argument to the constructor. |results_out| will be in the same order
// as |candidates|. More precisely, the CandidateResult in
// (*results_out)[i] will be the result for the candidate in (*candidates)[i].
grpc::Status Analyze(std::vector<CandidateResult>* results_out);
// Gives access to the underlying BloomBitCounter.
const BloomBitCounter& bit_counter() { return bit_counter_; }
private:
friend class RapporAnalyzerTest;
// Builds the RAPPOR CandidateMap based on the data passed to the constructor.
grpc::Status BuildCandidateMap();
// An instance of Hashes is implicitly associated with a given
// (candidate, cohort) pair and gives the list of hash values for that pair
// under each of several hash functions. Each of the hash values is a
// bit index in a Bloom filter.
struct Hashes {
// This vector has size h = num_hashes from the RapporConfig passed
// to the RapporAnalyzer constructor. bit_indices[i] contains the value of
// the ith hash function applied to the implicitly associated
// (candidate, cohort) pair. bit_indices[i] is a bit index in the range
// [0, k) where k = num_bloom_bits from the RapporConfig passed to the
// RapporAnalyzer constructor.
//
// IMPORTANT: We index bits "from the right." This means that bit number
// zero is the least significant bit of the last byte of the Bloom filter.
std::vector<uint16_t> bit_indices;
};
// An instance of CohortMap is implicitly associated with a given
// candidate string S and gives the Hashes for the pairs (S, cohort)
// for each cohort in the range [0, num_cohorts).
struct CohortMap {
// This vector has size m = num_cohorts from the RapporConfig passed to
// the RapporAnalyzer constructor. cohort_hashes[i] contains the Hashes
// for cohort i.
std::vector<Hashes> cohort_hashes;
};
// CandidateMap stores the list of all candidates and a parallel list of
// CohortMaps for each candidate.
struct CandidateMap {
// Contains the list of all candidates. (pointer not owned)
const RapporCandidateList* candidate_list;
// This vector has size equal to the number of candidates in
// |candidate_list|. candidate_cohort_maps[i] contains the CohortMap for
// the ith candidate.
std::vector<CohortMap> candidate_cohort_maps;
};
BloomBitCounter bit_counter_;
std::shared_ptr<RapporConfigValidator> config_;
CandidateMap candidate_map_;
};
} // namespace rappor
} // namespace cobalt
#endif // COBALT_ALGORITHMS_RAPPOR_RAPPOR_ANALYZER_H_