Adds functionality to build a candidate map.

This continues the work of implementing the parts of String RAPPOR
that occur prior to the use of LASSO. In terms of the python and R
Cobalt prototype this does the work of the Python function HashCandidates.

In more detail this builds a data structure that captures, for each
candidate string and each cohort, the indices of the set bits in
the Bloom filter.

- We add the new class RapporAnalyzer. This is the class that will contain
the String RAPPOR analysis code.
- We add the function AddObservation() which just delegates to
BloomBitCounter::AddObservation().
- We ad the private function BuildCandidateMap() which contains the
logic for building the candidate map.
- We add a stub for the function Analyze(). Currently this function
does nothing but invokes BuildCandidateMap(). We also added a comment:
TODO(azani, mironov) Put LASSO analysis here.

Change-Id: I06af4f808ed8aad105fab0af2fb071bf38275086
diff --git a/algorithms/rappor/CMakeLists.txt b/algorithms/rappor/CMakeLists.txt
index b413f3f..7937f59 100644
--- a/algorithms/rappor/CMakeLists.txt
+++ b/algorithms/rappor/CMakeLists.txt
@@ -28,7 +28,7 @@
                       client_secret cobalt_crypto glog ${PROTOBUF_LIBRARY} rappor_config_validator)
 
 add_library(rappor_analyzer
-            basic_rappor_analyzer.cc bloom_bit_counter.cc
+            basic_rappor_analyzer.cc bloom_bit_counter.cc rappor_analyzer.cc
             ${COBALT_PROTO_SRCS} ${CONFIG_PROTO_SRCS})
 target_link_libraries(rappor_analyzer
                       cobalt_crypto glog ${PROTOBUF_LIBRARY} rappor_config_validator)
@@ -37,7 +37,7 @@
 include_directories(BEFORE PRIVATE "${CMAKE_SOURCE_DIR}/third_party/boringssl/src/include")
 add_executable(rappor_tests
                basic_rappor_analyzer_test.cc
-               bloom_bit_counter_test rappor_encoder_test.cc
+               bloom_bit_counter_test rappor_encoder_test.cc rappor_analyzer_test.cc
                rappor_test_utils.cc rappor_test_utils_test.cc)
 target_link_libraries(rappor_tests rappor_encoder rappor_analyzer rappor_analyzer gtest gtest_main)
 set_target_properties(rappor_tests PROPERTIES
diff --git a/algorithms/rappor/rappor_analyzer.cc b/algorithms/rappor/rappor_analyzer.cc
new file mode 100644
index 0000000..75175e5
--- /dev/null
+++ b/algorithms/rappor/rappor_analyzer.cc
@@ -0,0 +1,113 @@
+// Copyright 2017 The Fuchsia Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "algorithms/rappor/rappor_analyzer.h"
+
+#include <glog/logging.h>
+
+#include "algorithms/rappor/rappor_encoder.h"
+#include "util/crypto_util/hash.h"
+
+namespace cobalt {
+namespace rappor {
+
+using crypto::byte;
+
+RapporAnalyzer::RapporAnalyzer(const RapporConfig& config,
+                               const RapporCandidateList* candidates)
+    : bit_counter_(config), config_(bit_counter_.config()) {
+  candidate_map_.candidate_list = candidates;
+  // candidate_map_.candidate_cohort_maps remains empty for now. It
+  // will be populated by BuildCandidateMap.
+}
+
+bool RapporAnalyzer::AddObservation(const RapporObservation& obs) {
+  return bit_counter_.AddObservation(obs);
+}
+
+grpc::Status RapporAnalyzer::Analyze(
+    std::vector<CandidateResult>* results_out) {
+  CHECK(results_out);
+  auto status = BuildCandidateMap();
+  if (!status.ok()) {
+    return status;
+  }
+
+  //////////////////////////////////////////////////////
+  // TODO(azani, mironov) Put LASSO analysis here.
+  /////////////////////////////////////////////////////
+
+  return grpc::Status::OK;
+}
+
+grpc::Status RapporAnalyzer::BuildCandidateMap() {
+  if (!config_->valid()) {
+    return grpc::Status(grpc::FAILED_PRECONDITION,
+                        "Invalid RapporConfig passed to constructor.");
+  }
+
+  // TODO(rudominer) Consider caching the CandidateMaps. The same RAPPOR
+  // analysis will likely be run every day with the same RapporConfig and
+  // RapporCandidateList but different sets of Observatons. Since the
+  // CandidateMap does not depend on the Observations, we can cache them
+  // (for example in Bigtable.) Alternatively, instead of caching the
+  // CandidateMap itself we could also cache the sparse binary matrix that will
+  // be generated based on the CandidateMap and used as one of the inputs
+  // to the LASSO algorithm.
+
+  uint32_t num_bits = config_->num_bits();
+  uint32_t num_cohorts = config_->num_cohorts();
+  uint32_t num_hashes = config_->num_hashes();
+
+  for (const std::string& candidate :
+       candidate_map_.candidate_list->candidates()) {
+    // In rappor_encoder.cc it is not std::strings that are encoded but rather
+    // |ValuePart|s. So here we want to take the candidate as a string and
+    // convert it into a serialized |ValuePart|.
+    ValuePart candidate_as_value_part;
+    candidate_as_value_part.set_string_value(candidate);
+    std::string serialized_candidate;
+    candidate_as_value_part.SerializeToString(&serialized_candidate);
+
+    // Append a CohortMap for this candidate.
+    candidate_map_.candidate_cohort_maps.emplace_back();
+    CohortMap& cohort_map = candidate_map_.candidate_cohort_maps.back();
+
+    // Iterate through the cohorts.
+    for (auto cohort = 0; cohort < num_cohorts; cohort++) {
+      // Append an instance of |Hashes| for this cohort.
+      cohort_map.cohort_hashes.emplace_back();
+      Hashes& hashes = cohort_map.cohort_hashes.back();
+
+      // Form one big hashed value of the serialized_candidate. This will be
+      // used to obtain multiple bit indices.
+      byte hashed_value[crypto::hash::DIGEST_SIZE];
+      if (!RapporEncoder::HashValueAndCohort(serialized_candidate, cohort,
+                                             num_hashes, hashed_value)) {
+        return grpc::Status(grpc::INTERNAL,
+                            "Hash operation failed unexpectedly.");
+      }
+
+      // Extract one bit index for each of the hashes in the Bloom filter.
+      for (size_t hash_index = 0; hash_index < num_hashes; hash_index++) {
+        hashes.bit_indices.push_back(
+            RapporEncoder::ExtractBitIndex(hashed_value, hash_index, num_bits));
+      }
+    }
+  }
+  return grpc::Status::OK;
+}
+
+}  // namespace rappor
+}  // namespace cobalt
diff --git a/algorithms/rappor/rappor_analyzer.h b/algorithms/rappor/rappor_analyzer.h
new file mode 100644
index 0000000..2e3b2f2
--- /dev/null
+++ b/algorithms/rappor/rappor_analyzer.h
@@ -0,0 +1,142 @@
+// Copyright 2017 The Fuchsia Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef COBALT_ALGORITHMS_RAPPOR_RAPPOR_ANALYZER_H_
+#define COBALT_ALGORITHMS_RAPPOR_RAPPOR_ANALYZER_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "./observation.pb.h"
+#include "algorithms/rappor/bloom_bit_counter.h"
+#include "algorithms/rappor/rappor_config_validator.h"
+#include "config/encodings.pb.h"
+#include "config/report_configs.pb.h"
+#include "grpc++/grpc++.h"
+
+namespace cobalt {
+namespace rappor {
+
+// A string RAPPOR analysis result for a single candidate. The method
+// RapporAnalyzer::Analyze() populates a vector of CandidateResults, one for
+// each candidate.
+struct CandidateResult {
+  double count_estimate;
+
+  double std_error;
+};
+
+// A RapporAnalyzer is constructed for the purpose of performing a single
+// string RAPPOR analysis.
+//
+// (1) Construct a RapporAnalyzer passing in a RapporConfig and a
+// RapporCandidateList.
+//
+// (2) Repeatedly invoke AddObservation() to add the set of observations to
+//     be analyzed. The observations must all be for the same metric part and
+//     must have been encoded using the same encoding configuration. More
+//     precisely this means they must be associated with the same customer_id,
+//     project_id, metric_id, encoding_config_id and metric_part_name.
+//
+// (3) Invoke Analyze() to perform the string RAPPOR analysis and obtain the
+//     results.
+//
+// (4) Optionally examine the underlying BloomBitCounter via the bit_counter()
+//     accessor.
+class RapporAnalyzer {
+ public:
+  // Constructs a RapporAnalyzer for the given config and candidates. All of the
+  // observations added via AddObservation() must have been encoded using this
+  // config. If the config is not valid then all calls to AddObservation()
+  // will return false.
+  // TODO(rudominer) Enhance this API to also accept DP release parameters.
+  explicit RapporAnalyzer(const RapporConfig& config,
+                          const RapporCandidateList* candidates);
+
+  // Adds an additional observation to be analyzed. The observation must have
+  // been encoded using the RapporConfig passed to the constructor.
+  //
+  // Returns true to indicate the observation was added without error.
+  bool AddObservation(const RapporObservation& obs);
+
+  // Performs the string RAPPOR analysis and writes the results to
+  // |results_out|. Return OK for success or an error status.
+  //
+  // |results_out| will initially be cleared and, upon success of the alogrithm,
+  // will contain a vector of size candidates->size() where |candidates| is the
+  // argument to the constructor. |results_out| will be in the same order
+  // as |candidates|. More precisely, the CandidateResult in
+  // (*results_out)[i] will be the result for the candidate in (*candidates)[i].
+  grpc::Status Analyze(std::vector<CandidateResult>* results_out);
+
+  // Gives access to the underlying BloomBitCounter.
+  const BloomBitCounter& bit_counter() { return bit_counter_; }
+
+ private:
+  friend class RapporAnalyzerTest;
+
+  // Builds the RAPPOR CandidateMap based on the data passed to the constructor.
+  grpc::Status BuildCandidateMap();
+
+  // An instance of Hashes is implicitly associated with a given
+  // (candidate, cohort) pair and gives the list of hash values for that pair
+  // under each of several hash functions. Each of the hash values is a
+  // bit index in a Bloom filter.
+  struct Hashes {
+    // This vector has size h = num_hashes from the RapporConfig passed
+    // to the RapporAnalyzer constructor. bit_indices[i] contains the value of
+    // the ith hash function applied to the implicitly associated
+    // (candidate, cohort) pair. bit_indices[i] is a bit index in the range
+    // [0, k) where k = num_bloom_bits from the RapporConfig passed to the
+    // RapporAnalyzer constructor.
+    //
+    // IMPORTANT: We index bits "from the right." This means that bit number
+    // zero is the least significant bit of the last byte of the Bloom filter.
+    std::vector<uint16_t> bit_indices;
+  };
+
+  // An instance of CohortMap is implicitly associated with a given
+  // candidate string S and gives the Hashes for the pairs (S, cohort)
+  // for each cohort in the range [0, num_cohorts).
+  struct CohortMap {
+    // This vector has size m = num_cohorts from the RapporConfig passed to
+    // the RapporAnalyzer constructor. cohort_hashes[i] contains the Hashes
+    // for cohort i.
+    std::vector<Hashes> cohort_hashes;
+  };
+
+  // CandidateMap stores the list of all candidates and a parallel list of
+  // CohortMaps for each candidate.
+  struct CandidateMap {
+    // Contains the list of all candidates. (pointer not owned)
+    const RapporCandidateList* candidate_list;
+
+    // This vector has size equal to the number of candidates in
+    // |candidate_list|. candidate_cohort_maps[i] contains the CohortMap for
+    // the ith candidate.
+    std::vector<CohortMap> candidate_cohort_maps;
+  };
+
+  BloomBitCounter bit_counter_;
+
+  std::shared_ptr<RapporConfigValidator> config_;
+
+  CandidateMap candidate_map_;
+};
+
+}  // namespace rappor
+}  // namespace cobalt
+
+#endif  // COBALT_ALGORITHMS_RAPPOR_RAPPOR_ANALYZER_H_
diff --git a/algorithms/rappor/rappor_analyzer_test.cc b/algorithms/rappor/rappor_analyzer_test.cc
new file mode 100644
index 0000000..fd932be
--- /dev/null
+++ b/algorithms/rappor/rappor_analyzer_test.cc
@@ -0,0 +1,242 @@
+// Copyright 2016 The Fuchsia Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "algorithms/rappor/rappor_analyzer.h"
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "algorithms/rappor/rappor_encoder.h"
+#include "algorithms/rappor/rappor_test_utils.h"
+#include "encoder/client_secret.h"
+#include "third_party/googletest/googletest/include/gtest/gtest.h"
+
+namespace cobalt {
+namespace rappor {
+
+using encoder::ClientSecret;
+
+namespace {
+
+std::string CandidateString(int i) {
+  return std::string("candidate string") + std::to_string(i);
+}
+
+// Populates |candidate_list| with |num_candidates| candidates;
+void PopulateRapporCandidateList(uint32_t num_candidates,
+                                 RapporCandidateList* candidate_list) {
+  candidate_list->Clear();
+  for (int i = 0; i < num_candidates; i++) {
+    candidate_list->add_candidates(CandidateString(i));
+  }
+}
+
+// Makes a RapporConfig with the given data (and num_hashes=2).
+RapporConfig Config(uint32_t num_bloom_bits, uint32_t num_cohorts,
+                    uint32_t num_hashes, double p, double q) {
+  RapporConfig config;
+  config.set_num_bloom_bits(num_bloom_bits);
+  config.set_num_hashes(num_hashes);
+  config.set_num_cohorts(num_cohorts);
+  config.set_prob_0_becomes_1(p);
+  config.set_prob_1_stays_1(q);
+  return config;
+}
+
+}  // namespace
+
+class RapporAnalyzerTest : public ::testing::Test {
+ protected:
+  // Sets the member variable analyzer_ to a new RapporAnalyzer configured
+  // with the given arguments and the current values of prob_0_becomes_1_,
+  // prob_1_stays_1_.
+  void SetAnalyzer(uint32_t num_candidates, uint32_t num_bloom_bits,
+                   uint32_t num_cohorts, uint32_t num_hashes) {
+    PopulateRapporCandidateList(num_candidates, &candidate_list_);
+    config_ = Config(num_bloom_bits, num_cohorts, num_hashes, prob_0_becomes_1_,
+                     prob_1_stays_1_);
+    analyzer_.reset(new RapporAnalyzer(config_, &candidate_list_));
+  }
+
+  void BuildCandidateMap() {
+    EXPECT_EQ(grpc::OK, analyzer_->BuildCandidateMap().error_code());
+
+    uint32_t num_candidates =
+        analyzer_->candidate_map_.candidate_list->candidates_size();
+    uint32_t num_cohorts = analyzer_->config_->num_cohorts();
+    uint32_t num_hashes = analyzer_->config_->num_hashes();
+    uint32_t num_bits = analyzer_->config_->num_bits();
+
+    // Expect the number of candidates to be correct,
+    EXPECT_EQ(num_candidates,
+              analyzer_->candidate_map_.candidate_cohort_maps.size());
+
+    // and for each candidate...
+    for (auto candidate = 0; candidate < num_candidates; candidate++) {
+      // expect the number of cohorts to be correct,
+      EXPECT_EQ(num_cohorts,
+                analyzer_->candidate_map_.candidate_cohort_maps[candidate]
+                    .cohort_hashes.size());
+
+      // and for each cohort...
+      for (auto cohort = 0; cohort < num_cohorts; cohort++) {
+        // expect the number of hashes to be correct,
+        EXPECT_EQ(num_hashes,
+                  analyzer_->candidate_map_.candidate_cohort_maps[candidate]
+                      .cohort_hashes[cohort]
+                      .bit_indices.size());
+
+        // and for each hash...
+        for (int hash = 0; hash < num_hashes; hash++) {
+          // Expect the bit index to be in the range [0, num_bits).
+          auto bit_index = GetCandidateMapValue(candidate, cohort, hash);
+          EXPECT_GE(bit_index, 0);
+          EXPECT_LT(bit_index, num_bits);
+        }
+      }
+    }
+  }
+
+  // This should be invoked after BuildCandidateMap. It returns the bit index
+  // within the CandidateMap for the given |candidate_index|, |cohort_index|,
+  // and |hash_index|.
+  uint16_t GetCandidateMapValue(uint16_t candidate_index, uint16_t cohort_index,
+                                uint16_t hash_index) {
+    EXPECT_GT(analyzer_->candidate_map_.candidate_cohort_maps.size(),
+              candidate_index);
+    EXPECT_GT(analyzer_->candidate_map_.candidate_cohort_maps[candidate_index]
+                  .cohort_hashes.size(),
+              cohort_index);
+    EXPECT_GT(analyzer_->candidate_map_.candidate_cohort_maps[candidate_index]
+                  .cohort_hashes[cohort_index]
+                  .bit_indices.size(),
+              hash_index);
+    return analyzer_->candidate_map_.candidate_cohort_maps[candidate_index]
+        .cohort_hashes[cohort_index]
+        .bit_indices[hash_index];
+  }
+
+  // Builds and returns a bit string (i.e. a string of ASCII '0's and '1's)
+  // representing the Bloom filter implicitly stored within the CandidateMap
+  // for the given |candidate_index| and |cohort_index|.
+  std::string BuildBitString(uint16_t candidate_index, uint16_t cohort_index) {
+    return BuildBinaryString(
+        analyzer_->config_->num_bits(),
+        analyzer_->candidate_map_.candidate_cohort_maps[candidate_index]
+            .cohort_hashes[cohort_index]
+            .bit_indices);
+  }
+
+  RapporConfig config_;
+  std::unique_ptr<RapporAnalyzer> analyzer_;
+
+  RapporCandidateList candidate_list_;
+
+  // By default this test uses p=0, q=1. Individual tests may override this.
+  double prob_0_becomes_1_ = 0.0;
+  double prob_1_stays_1_ = 1.0;
+};
+
+// Tests the function BuildCandidateMap. We build one small CandidateMap and
+// then we explicitly check every value against a known value. We have not
+// independently verified the SHA-256 hash values and so rather than a test
+// of correctness this is firstly a sanity test: we can eyeball the values
+// and confirm they look sane, and secondly a regression test.
+TEST_F(RapporAnalyzerTest, BuildCandidateMapSmallTest) {
+  static const uint32_t kNumCandidates = 5;
+  static const uint32_t kNumCohorts = 3;
+  static const uint32_t kNumHashes = 2;
+  static const uint32_t kNumBloomBits = 8;
+
+  SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
+  BuildCandidateMap();
+
+  // clang-format off
+  int expected_bit_indices[kNumCandidates][kNumCohorts*kNumHashes] = {
+  // cihj means cohort = i and hash-index = j.
+  // c0h0 c0h1 c1h0 c1h1 c2h0 c2h2
+      {3,   5,   2,   6,   3,   6},  // candidate 0
+      {1,   5,   4,   7,   2,   0},  // candidate 1
+      {3,   0,   2,   0,   1,   4},  // candidate 2
+      {5,   1,   2,   4,   2,   4},  // candidate 3
+      {1,   4,   3,   1,   2,   6},  // candidate 4
+  };
+  // clang-format on
+
+  for (auto candidate = 0; candidate < kNumCandidates; candidate++) {
+    for (auto cohort = 0; cohort < kNumCohorts; cohort++) {
+      for (int hash = 0; hash < kNumHashes; hash++) {
+        EXPECT_EQ(expected_bit_indices[candidate][cohort * kNumHashes + hash],
+                  GetCandidateMapValue(candidate, cohort, hash))
+            << "(" << candidate << "," << cohort * kNumHashes + hash << ")";
+      }
+    }
+  }
+}
+
+// Tests the function BuildCandidateMap. We build many different CandidateMaps
+// with many different parameters. We are testing firstly that the procedure
+// completes without error, secondly that the shape of the produced
+// data structure is correct and thirdly that the bit indexes are in the range
+// [0, num_bloom_bits). The latter two checks occur inside of BuildCandidateMap.
+TEST_F(RapporAnalyzerTest, BuildCandidateMapSmokeTest) {
+  for (auto num_candidates : {11, 51, 99}) {
+    for (auto num_cohorts : {23, 45}) {
+      for (auto num_hashes : {2, 6, 7}) {
+        for (auto num_bloom_bits : {16, 128}) {
+          SetAnalyzer(num_candidates, num_bloom_bits, num_cohorts, num_hashes);
+          BuildCandidateMap();
+        }
+      }
+    }
+  }
+}
+
+// Tests the function BuildCandidateMap. We test that the map that is built
+// is consistent with the Bloom filters that are built by an encoder.
+TEST_F(RapporAnalyzerTest, BuildCandidateMapCompareWithEncoder) {
+  static const uint32_t kNumCandidates = 10;
+  static const uint32_t kNumCohorts = 20;
+  static const uint32_t kNumHashes = 5;
+  static const uint32_t kNumBloomBits = 64;
+
+  SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
+  BuildCandidateMap();
+
+  for (int candidate = 0; candidate < kNumCandidates; candidate++) {
+    // Construct a new encoder with a new ClientSecret so that a random
+    // cohort is selected.
+    RapporEncoder encoder(config_, ClientSecret::GenerateNewSecret());
+
+    // Encode the current candidate string using |encoder|.
+    ValuePart value_part;
+    value_part.set_string_value(CandidateString(candidate));
+    RapporObservation observation;
+    encoder.Encode(value_part, &observation);
+
+    // Since p=0 and q=1 the RapporObservation contains the raw Bloom filter
+    // with no noise added. Confirm that the BloomFilter is the same as
+    // the one implied by the CandidateMap at the appropriate candidate
+    // and cohort.
+    EXPECT_EQ(BuildBitString(candidate, encoder.cohort()),
+              DataToBinaryString(observation.data()));
+  }
+}
+
+}  // namespace rappor
+}  // namespace cobalt
diff --git a/algorithms/rappor/rappor_test_utils.cc b/algorithms/rappor/rappor_test_utils.cc
index 2304602..e01184a 100644
--- a/algorithms/rappor/rappor_test_utils.cc
+++ b/algorithms/rappor/rappor_test_utils.cc
@@ -39,6 +39,16 @@
   return output;
 }
 
+std::string BuildBinaryString(size_t num_bits,
+                              const std::vector<uint16_t>& index_of_1s) {
+  // Initialize output to a string of all zeroes.
+  std::string output(num_bits, '0');
+  for (auto bit_index : index_of_1s) {
+    output[num_bits - bit_index - 1] = '1';
+  }
+  return output;
+}
+
 std::string BinaryStringToData(const std::string& binary_string) {
   size_t num_bits = binary_string.size();
   EXPECT_EQ(0, num_bits % 8);
@@ -65,11 +75,10 @@
   return std::string(buffer, 12);
 }
 
-
 std::string BuildBitPatternString(int num_bits, int index, char index_char,
-    char other_char ) {
+                                  char other_char) {
   return std::string(num_bits - 1 - index, other_char) + index_char +
-      std::string(index, other_char);
+         std::string(index, other_char);
 }
 
 }  // namespace rappor
diff --git a/algorithms/rappor/rappor_test_utils.h b/algorithms/rappor/rappor_test_utils.h
index 6dc756a..72e6e2e 100644
--- a/algorithms/rappor/rappor_test_utils.h
+++ b/algorithms/rappor/rappor_test_utils.h
@@ -16,6 +16,7 @@
 #define COBALT_ALGORITHMS_RAPPOR_RAPPOR_TEST_UTILS_H_
 
 #include <string>
+#include <vector>
 
 namespace cobalt {
 namespace rappor {
@@ -33,6 +34,13 @@
 // bytes in |data|.
 std::string DataToBinaryString(const std::string& data);
 
+// Builds a binary string of length |num_bits| in which there are '1''s in
+// exactly the indices specified by |index_of_1s|. Note that we index bits
+// "from the right" so that an index of zero refers to the least significant
+// bit, i.e. the last character of the returned string.
+std::string BuildBinaryString(size_t num_bits,
+                              const std::vector<uint16_t>& index_of_1s);
+
 // Builds the string "category<index>" using 4 digits from index.
 std::string CategoryName(uint32_t index);