Small refactor of RAPPOR classes: Expose some private functions.
We expose some private implementation functions in BloomBitCounter
and RapporEncoder so that they can be shared with the
RapporAnalyzer (in a later CL).
Change-Id: I808c9bf3f5b3283e9d8e96b0354e483c6b7aeaee
diff --git a/algorithms/rappor/bloom_bit_counter.h b/algorithms/rappor/bloom_bit_counter.h
index 6c07873..cfc5af6 100644
--- a/algorithms/rappor/bloom_bit_counter.h
+++ b/algorithms/rappor/bloom_bit_counter.h
@@ -69,10 +69,14 @@
// cohort number from 0 to num_cohorts - 1.
const std::vector<CohortCounts>& EstimateCounts();
+ std::shared_ptr<RapporConfigValidator> config() {
+ return config_;
+ }
+
private:
friend class BloomBitCounterTest;
- std::unique_ptr<RapporConfigValidator> config_;
+ std::shared_ptr<RapporConfigValidator> config_;
size_t num_observations_ = 0;
size_t observation_errors_ = 0;
@@ -107,7 +111,7 @@
// of |bit_sums|. In the latter case the values are listed
// in bit order "from right to left". That is, count_estimates[0] and
// std_error[0] contain values for the right-most bit, i.e. the least
- // significant bit.
+ // significant bit of the last byte of the Bloom filter.
// The adjusted counts giving our estimate of the true pre-encoded count
// for each bit.
diff --git a/algorithms/rappor/rappor_encoder.cc b/algorithms/rappor/rappor_encoder.cc
index 66359ea..de14ff6 100644
--- a/algorithms/rappor/rappor_encoder.cc
+++ b/algorithms/rappor/rappor_encoder.cc
@@ -73,6 +73,34 @@
RapporEncoder::~RapporEncoder() {}
+bool RapporEncoder::HashValueAndCohort(
+ const std::string serialized_value, uint32_t cohort_num,
+ uint32_t num_hashes, byte hashed_value[crypto::hash::DIGEST_SIZE]) {
+ // We append the cohort to the value before hashing.
+ std::vector<byte> hash_input(serialized_value.size() + sizeof(cohort_num_));
+ std::memcpy(hash_input.data(), &serialized_value[0], serialized_value.size());
+ std::memcpy(hash_input.data() + serialized_value.size(), &cohort_num,
+ sizeof(cohort_num_));
+
+ // Now we hash |hash_input| into |hashed_value|.
+ // We are going to use two bytes of |hashed_value| for each hash in the Bloom
+ // filter so we need DIGEST_SIZE to be at least num_hashes*2. This should have
+ // already been checked at config validation time.
+ CHECK(crypto::hash::DIGEST_SIZE >= num_hashes * 2);
+ return crypto::hash::Hash(hash_input.data(), hash_input.size(), hashed_value);
+}
+
+uint32_t RapporEncoder::ExtractBitIndex(
+ byte hashed_value[crypto::hash::DIGEST_SIZE], size_t hash_index,
+ uint32_t num_bits) {
+ // Each bloom filter consumes two bytes of |hashed_value|. Note that
+ // num_bits is required to be a power of 2 (this is checked in the
+ // constructor of RapporConfigValidator) so that the mod operation below
+ // preserves the uniform distribution of |hashed_value|.
+ return (*reinterpret_cast<uint16_t*>(&hashed_value[hash_index * 2])) %
+ num_bits;
+}
+
std::string RapporEncoder::MakeBloomBits(const ValuePart& value) {
uint32_t num_bits = config_->num_bits();
uint32_t num_bytes = (num_bits + 7) / 8;
@@ -81,19 +109,9 @@
std::string serialized_value;
value.SerializeToString(&serialized_value);
- // We append the cohort to the value before hashing.
- std::vector<byte> hash_input(serialized_value.size() + sizeof(cohort_num_));
- std::memcpy(hash_input.data(), &serialized_value[0], serialized_value.size());
- std::memcpy(hash_input.data() + serialized_value.size(), &cohort_num_,
- sizeof(cohort_num_));
-
- // Now we hash |hash_input| into |hashed_value|.
- // We are going to use two bytes of |hashed_value| for each hash in the Bloom
- // filter so we need DIGEST_SIZE to be at least num_hashes*2. This should have
- // already been checked at config validation time.
- CHECK(crypto::hash::DIGEST_SIZE >= num_hashes * 2);
byte hashed_value[crypto::hash::DIGEST_SIZE];
- if (!crypto::hash::Hash(hash_input.data(), hash_input.size(), hashed_value)) {
+ if (!HashValueAndCohort(serialized_value, cohort_num_, num_hashes,
+ hashed_value)) {
VLOG(1) << "Hash() failed";
return "";
}
@@ -102,13 +120,7 @@
// (The C++ Protocol Buffer API uses string to represent an array of bytes.)
std::string data(num_bytes, static_cast<char>(0));
for (size_t hash_index = 0; hash_index < num_hashes; hash_index++) {
- // Each bloom filter consumes two bytes of |hashed_value|. Note that
- // num_bits is required to be a power of 2 (this is checked in the
- // constructor of RapporConfigValidator) so that the mod operation below
- // preserves the uniform distribution of |hashed_value|.
- uint32_t bit_index =
- (*reinterpret_cast<uint16_t*>(&hashed_value[hash_index * 2])) %
- num_bits;
+ uint32_t bit_index = ExtractBitIndex(hashed_value, hash_index, num_bits);
// Indexed from the right, i.e. the least-significant bit.
uint32_t byte_index = bit_index / 8;
diff --git a/algorithms/rappor/rappor_encoder.h b/algorithms/rappor/rappor_encoder.h
index b0f8bf0..4ddc5de 100644
--- a/algorithms/rappor/rappor_encoder.h
+++ b/algorithms/rappor/rappor_encoder.h
@@ -23,6 +23,7 @@
#include "algorithms/rappor/rappor_config_validator.h"
#include "config/encodings.pb.h"
#include "encoder/client_secret.h"
+#include "util/crypto_util/hash.h"
#include "util/crypto_util/random.h"
namespace cobalt {
@@ -51,12 +52,39 @@
private:
friend class StringRapporEncoderTest;
+ friend class RapporAnalyzer;
// Allows Friend classess to set a special RNG for use in tests.
void SetRandomForTesting(std::unique_ptr<crypto::Random> random) {
random_ = std::move(random);
}
+ // Computes a hash of the given |serialized value| and |cohort_num| and writes
+ // the result to |hashed_value|. This plus ExtractBitIndex() are used by
+ // MakeBloomBits() to form the Bloom filter. These two functions have been
+ // extracted from MakeBloomBits() so that they can be shared by RaporAnalyzer.
+ //
+ // |num_hashes| indicates the the upper bound for the values of |hash_index|
+ // that will be passed to ExtractBitIndex() after this method returns.
+ //
+ // Returns true for success or false if the hash operation fails for any
+ // reason.
+ static bool HashValueAndCohort(
+ const std::string serialized_value, uint32_t cohort_num,
+ uint32_t num_hashes,
+ crypto::byte hashed_value[crypto::hash::DIGEST_SIZE]);
+
+ // Extracts a bit index from the given |hashed_value| for the given
+ // |hash_index|. This plus HashValueAndCohort are used by MakeBloomBits()
+ // to form the Bloom filter. These two functions have been extracted from
+ // MakeBloomBits() so that they can be shared by RaporAnalyzer.
+ //
+ // IMPORTANT: We index bits "from the right." This means that bit number zero
+ // is the least significant bit of the last byte of the Bloom filter.
+ static uint32_t ExtractBitIndex(
+ crypto::byte hashed_value[crypto::hash::DIGEST_SIZE], size_t hash_index,
+ uint32_t num_bits);
+
// Generates the array of bloom bits derived from |value|. Returns the
// empty string on error.
std::string MakeBloomBits(const ValuePart& value);