Small refactor of RAPPOR classes: Expose some private functions.

We expose some private implementation functions in BloomBitCounter
and RapporEncoder so that they can be shared with the
RapporAnalyzer (in a later CL).

Change-Id: I808c9bf3f5b3283e9d8e96b0354e483c6b7aeaee
diff --git a/algorithms/rappor/bloom_bit_counter.h b/algorithms/rappor/bloom_bit_counter.h
index 6c07873..cfc5af6 100644
--- a/algorithms/rappor/bloom_bit_counter.h
+++ b/algorithms/rappor/bloom_bit_counter.h
@@ -69,10 +69,14 @@
   // cohort number from 0 to num_cohorts - 1.
   const std::vector<CohortCounts>& EstimateCounts();
 
+  std::shared_ptr<RapporConfigValidator> config() {
+    return config_;
+  }
+
  private:
   friend class BloomBitCounterTest;
 
-  std::unique_ptr<RapporConfigValidator> config_;
+  std::shared_ptr<RapporConfigValidator> config_;
 
   size_t num_observations_ = 0;
   size_t observation_errors_ = 0;
@@ -107,7 +111,7 @@
   // of |bit_sums|. In the latter case the values are listed
   // in bit order "from right to left". That is, count_estimates[0] and
   // std_error[0] contain values for the right-most bit, i.e. the least
-  // significant bit.
+  // significant bit of the last byte of the Bloom filter.
 
   // The adjusted counts giving our estimate of the true pre-encoded count
   // for each bit.
diff --git a/algorithms/rappor/rappor_encoder.cc b/algorithms/rappor/rappor_encoder.cc
index 66359ea..de14ff6 100644
--- a/algorithms/rappor/rappor_encoder.cc
+++ b/algorithms/rappor/rappor_encoder.cc
@@ -73,6 +73,34 @@
 
 RapporEncoder::~RapporEncoder() {}
 
+bool RapporEncoder::HashValueAndCohort(
+    const std::string serialized_value, uint32_t cohort_num,
+    uint32_t num_hashes, byte hashed_value[crypto::hash::DIGEST_SIZE]) {
+  // We append the cohort to the value before hashing.
+  std::vector<byte> hash_input(serialized_value.size() + sizeof(cohort_num_));
+  std::memcpy(hash_input.data(), &serialized_value[0], serialized_value.size());
+  std::memcpy(hash_input.data() + serialized_value.size(), &cohort_num,
+              sizeof(cohort_num_));
+
+  // Now we hash |hash_input| into |hashed_value|.
+  // We are going to use two bytes of |hashed_value| for each hash in the Bloom
+  // filter so we need DIGEST_SIZE to be at least num_hashes*2. This should have
+  // already been checked at config validation time.
+  CHECK(crypto::hash::DIGEST_SIZE >= num_hashes * 2);
+  return crypto::hash::Hash(hash_input.data(), hash_input.size(), hashed_value);
+}
+
+uint32_t RapporEncoder::ExtractBitIndex(
+    byte hashed_value[crypto::hash::DIGEST_SIZE], size_t hash_index,
+    uint32_t num_bits) {
+  // Each bloom filter consumes two bytes of |hashed_value|. Note that
+  // num_bits is required to be a power of 2 (this is checked in the
+  // constructor of RapporConfigValidator) so that the mod operation below
+  // preserves the uniform distribution of |hashed_value|.
+  return (*reinterpret_cast<uint16_t*>(&hashed_value[hash_index * 2])) %
+         num_bits;
+}
+
 std::string RapporEncoder::MakeBloomBits(const ValuePart& value) {
   uint32_t num_bits = config_->num_bits();
   uint32_t num_bytes = (num_bits + 7) / 8;
@@ -81,19 +109,9 @@
   std::string serialized_value;
   value.SerializeToString(&serialized_value);
 
-  // We append the cohort to the value before hashing.
-  std::vector<byte> hash_input(serialized_value.size() + sizeof(cohort_num_));
-  std::memcpy(hash_input.data(), &serialized_value[0], serialized_value.size());
-  std::memcpy(hash_input.data() + serialized_value.size(), &cohort_num_,
-              sizeof(cohort_num_));
-
-  // Now we hash |hash_input| into |hashed_value|.
-  // We are going to use two bytes of |hashed_value| for each hash in the Bloom
-  // filter so we need DIGEST_SIZE to be at least num_hashes*2. This should have
-  // already been checked at config validation time.
-  CHECK(crypto::hash::DIGEST_SIZE >= num_hashes * 2);
   byte hashed_value[crypto::hash::DIGEST_SIZE];
-  if (!crypto::hash::Hash(hash_input.data(), hash_input.size(), hashed_value)) {
+  if (!HashValueAndCohort(serialized_value, cohort_num_, num_hashes,
+                          hashed_value)) {
     VLOG(1) << "Hash() failed";
     return "";
   }
@@ -102,13 +120,7 @@
   // (The C++ Protocol Buffer API uses string to represent an array of bytes.)
   std::string data(num_bytes, static_cast<char>(0));
   for (size_t hash_index = 0; hash_index < num_hashes; hash_index++) {
-    // Each bloom filter consumes two bytes of |hashed_value|. Note that
-    // num_bits is required to be a power of 2 (this is checked in the
-    // constructor of RapporConfigValidator) so that the mod operation below
-    // preserves the uniform distribution of |hashed_value|.
-    uint32_t bit_index =
-        (*reinterpret_cast<uint16_t*>(&hashed_value[hash_index * 2])) %
-        num_bits;
+    uint32_t bit_index = ExtractBitIndex(hashed_value, hash_index, num_bits);
 
     // Indexed from the right, i.e. the least-significant bit.
     uint32_t byte_index = bit_index / 8;
diff --git a/algorithms/rappor/rappor_encoder.h b/algorithms/rappor/rappor_encoder.h
index b0f8bf0..4ddc5de 100644
--- a/algorithms/rappor/rappor_encoder.h
+++ b/algorithms/rappor/rappor_encoder.h
@@ -23,6 +23,7 @@
 #include "algorithms/rappor/rappor_config_validator.h"
 #include "config/encodings.pb.h"
 #include "encoder/client_secret.h"
+#include "util/crypto_util/hash.h"
 #include "util/crypto_util/random.h"
 
 namespace cobalt {
@@ -51,12 +52,39 @@
 
  private:
   friend class StringRapporEncoderTest;
+  friend class RapporAnalyzer;
 
   // Allows Friend classess to set a special RNG for use in tests.
   void SetRandomForTesting(std::unique_ptr<crypto::Random> random) {
     random_ = std::move(random);
   }
 
+  // Computes a hash of the given |serialized value| and |cohort_num| and writes
+  // the result to |hashed_value|. This plus ExtractBitIndex() are used by
+  // MakeBloomBits() to form the Bloom filter. These two functions have been
+  // extracted from MakeBloomBits() so that they can be shared by RaporAnalyzer.
+  //
+  // |num_hashes| indicates the the upper bound for the values of |hash_index|
+  // that will be passed to ExtractBitIndex() after this method returns.
+  //
+  // Returns true for success or false if the hash operation fails for any
+  // reason.
+  static bool HashValueAndCohort(
+      const std::string serialized_value, uint32_t cohort_num,
+      uint32_t num_hashes,
+      crypto::byte hashed_value[crypto::hash::DIGEST_SIZE]);
+
+  // Extracts a bit index from the given |hashed_value| for the given
+  // |hash_index|. This plus HashValueAndCohort are used by MakeBloomBits()
+  // to form the Bloom filter. These two functions have been extracted from
+  // MakeBloomBits() so that they can be shared by RaporAnalyzer.
+  //
+  // IMPORTANT: We index bits "from the right." This means that bit number zero
+  // is the least significant bit of the last byte of the Bloom filter.
+  static uint32_t ExtractBitIndex(
+      crypto::byte hashed_value[crypto::hash::DIGEST_SIZE], size_t hash_index,
+      uint32_t num_bits);
+
   // Generates the array of bloom bits derived from |value|. Returns the
   // empty string on error.
   std::string MakeBloomBits(const ValuePart& value);