Adds functionality to build a candidate map.
This continues the work of implementing the parts of String RAPPOR
that occur prior to the use of LASSO. In terms of the python and R
Cobalt prototype this does the work of the Python function HashCandidates.
In more detail this builds a data structure that captures, for each
candidate string and each cohort, the indices of the set bits in
the Bloom filter.
- We add the new class RapporAnalyzer. This is the class that will contain
the String RAPPOR analysis code.
- We add the function AddObservation() which just delegates to
BloomBitCounter::AddObservation().
- We ad the private function BuildCandidateMap() which contains the
logic for building the candidate map.
- We add a stub for the function Analyze(). Currently this function
does nothing but invokes BuildCandidateMap(). We also added a comment:
TODO(azani, mironov) Put LASSO analysis here.
Change-Id: I06af4f808ed8aad105fab0af2fb071bf38275086
diff --git a/algorithms/rappor/CMakeLists.txt b/algorithms/rappor/CMakeLists.txt
index b413f3f..7937f59 100644
--- a/algorithms/rappor/CMakeLists.txt
+++ b/algorithms/rappor/CMakeLists.txt
@@ -28,7 +28,7 @@
client_secret cobalt_crypto glog ${PROTOBUF_LIBRARY} rappor_config_validator)
add_library(rappor_analyzer
- basic_rappor_analyzer.cc bloom_bit_counter.cc
+ basic_rappor_analyzer.cc bloom_bit_counter.cc rappor_analyzer.cc
${COBALT_PROTO_SRCS} ${CONFIG_PROTO_SRCS})
target_link_libraries(rappor_analyzer
cobalt_crypto glog ${PROTOBUF_LIBRARY} rappor_config_validator)
@@ -37,7 +37,7 @@
include_directories(BEFORE PRIVATE "${CMAKE_SOURCE_DIR}/third_party/boringssl/src/include")
add_executable(rappor_tests
basic_rappor_analyzer_test.cc
- bloom_bit_counter_test rappor_encoder_test.cc
+ bloom_bit_counter_test rappor_encoder_test.cc rappor_analyzer_test.cc
rappor_test_utils.cc rappor_test_utils_test.cc)
target_link_libraries(rappor_tests rappor_encoder rappor_analyzer rappor_analyzer gtest gtest_main)
set_target_properties(rappor_tests PROPERTIES
diff --git a/algorithms/rappor/rappor_analyzer.cc b/algorithms/rappor/rappor_analyzer.cc
new file mode 100644
index 0000000..75175e5
--- /dev/null
+++ b/algorithms/rappor/rappor_analyzer.cc
@@ -0,0 +1,113 @@
+// Copyright 2017 The Fuchsia Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "algorithms/rappor/rappor_analyzer.h"
+
+#include <glog/logging.h>
+
+#include "algorithms/rappor/rappor_encoder.h"
+#include "util/crypto_util/hash.h"
+
+namespace cobalt {
+namespace rappor {
+
+using crypto::byte;
+
+RapporAnalyzer::RapporAnalyzer(const RapporConfig& config,
+ const RapporCandidateList* candidates)
+ : bit_counter_(config), config_(bit_counter_.config()) {
+ candidate_map_.candidate_list = candidates;
+ // candidate_map_.candidate_cohort_maps remains empty for now. It
+ // will be populated by BuildCandidateMap.
+}
+
+bool RapporAnalyzer::AddObservation(const RapporObservation& obs) {
+ return bit_counter_.AddObservation(obs);
+}
+
+grpc::Status RapporAnalyzer::Analyze(
+ std::vector<CandidateResult>* results_out) {
+ CHECK(results_out);
+ auto status = BuildCandidateMap();
+ if (!status.ok()) {
+ return status;
+ }
+
+ //////////////////////////////////////////////////////
+ // TODO(azani, mironov) Put LASSO analysis here.
+ /////////////////////////////////////////////////////
+
+ return grpc::Status::OK;
+}
+
+grpc::Status RapporAnalyzer::BuildCandidateMap() {
+ if (!config_->valid()) {
+ return grpc::Status(grpc::FAILED_PRECONDITION,
+ "Invalid RapporConfig passed to constructor.");
+ }
+
+ // TODO(rudominer) Consider caching the CandidateMaps. The same RAPPOR
+ // analysis will likely be run every day with the same RapporConfig and
+ // RapporCandidateList but different sets of Observatons. Since the
+ // CandidateMap does not depend on the Observations, we can cache them
+ // (for example in Bigtable.) Alternatively, instead of caching the
+ // CandidateMap itself we could also cache the sparse binary matrix that will
+ // be generated based on the CandidateMap and used as one of the inputs
+ // to the LASSO algorithm.
+
+ uint32_t num_bits = config_->num_bits();
+ uint32_t num_cohorts = config_->num_cohorts();
+ uint32_t num_hashes = config_->num_hashes();
+
+ for (const std::string& candidate :
+ candidate_map_.candidate_list->candidates()) {
+ // In rappor_encoder.cc it is not std::strings that are encoded but rather
+ // |ValuePart|s. So here we want to take the candidate as a string and
+ // convert it into a serialized |ValuePart|.
+ ValuePart candidate_as_value_part;
+ candidate_as_value_part.set_string_value(candidate);
+ std::string serialized_candidate;
+ candidate_as_value_part.SerializeToString(&serialized_candidate);
+
+ // Append a CohortMap for this candidate.
+ candidate_map_.candidate_cohort_maps.emplace_back();
+ CohortMap& cohort_map = candidate_map_.candidate_cohort_maps.back();
+
+ // Iterate through the cohorts.
+ for (auto cohort = 0; cohort < num_cohorts; cohort++) {
+ // Append an instance of |Hashes| for this cohort.
+ cohort_map.cohort_hashes.emplace_back();
+ Hashes& hashes = cohort_map.cohort_hashes.back();
+
+ // Form one big hashed value of the serialized_candidate. This will be
+ // used to obtain multiple bit indices.
+ byte hashed_value[crypto::hash::DIGEST_SIZE];
+ if (!RapporEncoder::HashValueAndCohort(serialized_candidate, cohort,
+ num_hashes, hashed_value)) {
+ return grpc::Status(grpc::INTERNAL,
+ "Hash operation failed unexpectedly.");
+ }
+
+ // Extract one bit index for each of the hashes in the Bloom filter.
+ for (size_t hash_index = 0; hash_index < num_hashes; hash_index++) {
+ hashes.bit_indices.push_back(
+ RapporEncoder::ExtractBitIndex(hashed_value, hash_index, num_bits));
+ }
+ }
+ }
+ return grpc::Status::OK;
+}
+
+} // namespace rappor
+} // namespace cobalt
diff --git a/algorithms/rappor/rappor_analyzer.h b/algorithms/rappor/rappor_analyzer.h
new file mode 100644
index 0000000..2e3b2f2
--- /dev/null
+++ b/algorithms/rappor/rappor_analyzer.h
@@ -0,0 +1,142 @@
+// Copyright 2017 The Fuchsia Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef COBALT_ALGORITHMS_RAPPOR_RAPPOR_ANALYZER_H_
+#define COBALT_ALGORITHMS_RAPPOR_RAPPOR_ANALYZER_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "./observation.pb.h"
+#include "algorithms/rappor/bloom_bit_counter.h"
+#include "algorithms/rappor/rappor_config_validator.h"
+#include "config/encodings.pb.h"
+#include "config/report_configs.pb.h"
+#include "grpc++/grpc++.h"
+
+namespace cobalt {
+namespace rappor {
+
+// A string RAPPOR analysis result for a single candidate. The method
+// RapporAnalyzer::Analyze() populates a vector of CandidateResults, one for
+// each candidate.
+struct CandidateResult {
+ double count_estimate;
+
+ double std_error;
+};
+
+// A RapporAnalyzer is constructed for the purpose of performing a single
+// string RAPPOR analysis.
+//
+// (1) Construct a RapporAnalyzer passing in a RapporConfig and a
+// RapporCandidateList.
+//
+// (2) Repeatedly invoke AddObservation() to add the set of observations to
+// be analyzed. The observations must all be for the same metric part and
+// must have been encoded using the same encoding configuration. More
+// precisely this means they must be associated with the same customer_id,
+// project_id, metric_id, encoding_config_id and metric_part_name.
+//
+// (3) Invoke Analyze() to perform the string RAPPOR analysis and obtain the
+// results.
+//
+// (4) Optionally examine the underlying BloomBitCounter via the bit_counter()
+// accessor.
+class RapporAnalyzer {
+ public:
+ // Constructs a RapporAnalyzer for the given config and candidates. All of the
+ // observations added via AddObservation() must have been encoded using this
+ // config. If the config is not valid then all calls to AddObservation()
+ // will return false.
+ // TODO(rudominer) Enhance this API to also accept DP release parameters.
+ explicit RapporAnalyzer(const RapporConfig& config,
+ const RapporCandidateList* candidates);
+
+ // Adds an additional observation to be analyzed. The observation must have
+ // been encoded using the RapporConfig passed to the constructor.
+ //
+ // Returns true to indicate the observation was added without error.
+ bool AddObservation(const RapporObservation& obs);
+
+ // Performs the string RAPPOR analysis and writes the results to
+ // |results_out|. Return OK for success or an error status.
+ //
+ // |results_out| will initially be cleared and, upon success of the alogrithm,
+ // will contain a vector of size candidates->size() where |candidates| is the
+ // argument to the constructor. |results_out| will be in the same order
+ // as |candidates|. More precisely, the CandidateResult in
+ // (*results_out)[i] will be the result for the candidate in (*candidates)[i].
+ grpc::Status Analyze(std::vector<CandidateResult>* results_out);
+
+ // Gives access to the underlying BloomBitCounter.
+ const BloomBitCounter& bit_counter() { return bit_counter_; }
+
+ private:
+ friend class RapporAnalyzerTest;
+
+ // Builds the RAPPOR CandidateMap based on the data passed to the constructor.
+ grpc::Status BuildCandidateMap();
+
+ // An instance of Hashes is implicitly associated with a given
+ // (candidate, cohort) pair and gives the list of hash values for that pair
+ // under each of several hash functions. Each of the hash values is a
+ // bit index in a Bloom filter.
+ struct Hashes {
+ // This vector has size h = num_hashes from the RapporConfig passed
+ // to the RapporAnalyzer constructor. bit_indices[i] contains the value of
+ // the ith hash function applied to the implicitly associated
+ // (candidate, cohort) pair. bit_indices[i] is a bit index in the range
+ // [0, k) where k = num_bloom_bits from the RapporConfig passed to the
+ // RapporAnalyzer constructor.
+ //
+ // IMPORTANT: We index bits "from the right." This means that bit number
+ // zero is the least significant bit of the last byte of the Bloom filter.
+ std::vector<uint16_t> bit_indices;
+ };
+
+ // An instance of CohortMap is implicitly associated with a given
+ // candidate string S and gives the Hashes for the pairs (S, cohort)
+ // for each cohort in the range [0, num_cohorts).
+ struct CohortMap {
+ // This vector has size m = num_cohorts from the RapporConfig passed to
+ // the RapporAnalyzer constructor. cohort_hashes[i] contains the Hashes
+ // for cohort i.
+ std::vector<Hashes> cohort_hashes;
+ };
+
+ // CandidateMap stores the list of all candidates and a parallel list of
+ // CohortMaps for each candidate.
+ struct CandidateMap {
+ // Contains the list of all candidates. (pointer not owned)
+ const RapporCandidateList* candidate_list;
+
+ // This vector has size equal to the number of candidates in
+ // |candidate_list|. candidate_cohort_maps[i] contains the CohortMap for
+ // the ith candidate.
+ std::vector<CohortMap> candidate_cohort_maps;
+ };
+
+ BloomBitCounter bit_counter_;
+
+ std::shared_ptr<RapporConfigValidator> config_;
+
+ CandidateMap candidate_map_;
+};
+
+} // namespace rappor
+} // namespace cobalt
+
+#endif // COBALT_ALGORITHMS_RAPPOR_RAPPOR_ANALYZER_H_
diff --git a/algorithms/rappor/rappor_analyzer_test.cc b/algorithms/rappor/rappor_analyzer_test.cc
new file mode 100644
index 0000000..fd932be
--- /dev/null
+++ b/algorithms/rappor/rappor_analyzer_test.cc
@@ -0,0 +1,242 @@
+// Copyright 2016 The Fuchsia Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "algorithms/rappor/rappor_analyzer.h"
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "algorithms/rappor/rappor_encoder.h"
+#include "algorithms/rappor/rappor_test_utils.h"
+#include "encoder/client_secret.h"
+#include "third_party/googletest/googletest/include/gtest/gtest.h"
+
+namespace cobalt {
+namespace rappor {
+
+using encoder::ClientSecret;
+
+namespace {
+
+std::string CandidateString(int i) {
+ return std::string("candidate string") + std::to_string(i);
+}
+
+// Populates |candidate_list| with |num_candidates| candidates;
+void PopulateRapporCandidateList(uint32_t num_candidates,
+ RapporCandidateList* candidate_list) {
+ candidate_list->Clear();
+ for (int i = 0; i < num_candidates; i++) {
+ candidate_list->add_candidates(CandidateString(i));
+ }
+}
+
+// Makes a RapporConfig with the given data (and num_hashes=2).
+RapporConfig Config(uint32_t num_bloom_bits, uint32_t num_cohorts,
+ uint32_t num_hashes, double p, double q) {
+ RapporConfig config;
+ config.set_num_bloom_bits(num_bloom_bits);
+ config.set_num_hashes(num_hashes);
+ config.set_num_cohorts(num_cohorts);
+ config.set_prob_0_becomes_1(p);
+ config.set_prob_1_stays_1(q);
+ return config;
+}
+
+} // namespace
+
+class RapporAnalyzerTest : public ::testing::Test {
+ protected:
+ // Sets the member variable analyzer_ to a new RapporAnalyzer configured
+ // with the given arguments and the current values of prob_0_becomes_1_,
+ // prob_1_stays_1_.
+ void SetAnalyzer(uint32_t num_candidates, uint32_t num_bloom_bits,
+ uint32_t num_cohorts, uint32_t num_hashes) {
+ PopulateRapporCandidateList(num_candidates, &candidate_list_);
+ config_ = Config(num_bloom_bits, num_cohorts, num_hashes, prob_0_becomes_1_,
+ prob_1_stays_1_);
+ analyzer_.reset(new RapporAnalyzer(config_, &candidate_list_));
+ }
+
+ void BuildCandidateMap() {
+ EXPECT_EQ(grpc::OK, analyzer_->BuildCandidateMap().error_code());
+
+ uint32_t num_candidates =
+ analyzer_->candidate_map_.candidate_list->candidates_size();
+ uint32_t num_cohorts = analyzer_->config_->num_cohorts();
+ uint32_t num_hashes = analyzer_->config_->num_hashes();
+ uint32_t num_bits = analyzer_->config_->num_bits();
+
+ // Expect the number of candidates to be correct,
+ EXPECT_EQ(num_candidates,
+ analyzer_->candidate_map_.candidate_cohort_maps.size());
+
+ // and for each candidate...
+ for (auto candidate = 0; candidate < num_candidates; candidate++) {
+ // expect the number of cohorts to be correct,
+ EXPECT_EQ(num_cohorts,
+ analyzer_->candidate_map_.candidate_cohort_maps[candidate]
+ .cohort_hashes.size());
+
+ // and for each cohort...
+ for (auto cohort = 0; cohort < num_cohorts; cohort++) {
+ // expect the number of hashes to be correct,
+ EXPECT_EQ(num_hashes,
+ analyzer_->candidate_map_.candidate_cohort_maps[candidate]
+ .cohort_hashes[cohort]
+ .bit_indices.size());
+
+ // and for each hash...
+ for (int hash = 0; hash < num_hashes; hash++) {
+ // Expect the bit index to be in the range [0, num_bits).
+ auto bit_index = GetCandidateMapValue(candidate, cohort, hash);
+ EXPECT_GE(bit_index, 0);
+ EXPECT_LT(bit_index, num_bits);
+ }
+ }
+ }
+ }
+
+ // This should be invoked after BuildCandidateMap. It returns the bit index
+ // within the CandidateMap for the given |candidate_index|, |cohort_index|,
+ // and |hash_index|.
+ uint16_t GetCandidateMapValue(uint16_t candidate_index, uint16_t cohort_index,
+ uint16_t hash_index) {
+ EXPECT_GT(analyzer_->candidate_map_.candidate_cohort_maps.size(),
+ candidate_index);
+ EXPECT_GT(analyzer_->candidate_map_.candidate_cohort_maps[candidate_index]
+ .cohort_hashes.size(),
+ cohort_index);
+ EXPECT_GT(analyzer_->candidate_map_.candidate_cohort_maps[candidate_index]
+ .cohort_hashes[cohort_index]
+ .bit_indices.size(),
+ hash_index);
+ return analyzer_->candidate_map_.candidate_cohort_maps[candidate_index]
+ .cohort_hashes[cohort_index]
+ .bit_indices[hash_index];
+ }
+
+ // Builds and returns a bit string (i.e. a string of ASCII '0's and '1's)
+ // representing the Bloom filter implicitly stored within the CandidateMap
+ // for the given |candidate_index| and |cohort_index|.
+ std::string BuildBitString(uint16_t candidate_index, uint16_t cohort_index) {
+ return BuildBinaryString(
+ analyzer_->config_->num_bits(),
+ analyzer_->candidate_map_.candidate_cohort_maps[candidate_index]
+ .cohort_hashes[cohort_index]
+ .bit_indices);
+ }
+
+ RapporConfig config_;
+ std::unique_ptr<RapporAnalyzer> analyzer_;
+
+ RapporCandidateList candidate_list_;
+
+ // By default this test uses p=0, q=1. Individual tests may override this.
+ double prob_0_becomes_1_ = 0.0;
+ double prob_1_stays_1_ = 1.0;
+};
+
+// Tests the function BuildCandidateMap. We build one small CandidateMap and
+// then we explicitly check every value against a known value. We have not
+// independently verified the SHA-256 hash values and so rather than a test
+// of correctness this is firstly a sanity test: we can eyeball the values
+// and confirm they look sane, and secondly a regression test.
+TEST_F(RapporAnalyzerTest, BuildCandidateMapSmallTest) {
+ static const uint32_t kNumCandidates = 5;
+ static const uint32_t kNumCohorts = 3;
+ static const uint32_t kNumHashes = 2;
+ static const uint32_t kNumBloomBits = 8;
+
+ SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
+ BuildCandidateMap();
+
+ // clang-format off
+ int expected_bit_indices[kNumCandidates][kNumCohorts*kNumHashes] = {
+ // cihj means cohort = i and hash-index = j.
+ // c0h0 c0h1 c1h0 c1h1 c2h0 c2h2
+ {3, 5, 2, 6, 3, 6}, // candidate 0
+ {1, 5, 4, 7, 2, 0}, // candidate 1
+ {3, 0, 2, 0, 1, 4}, // candidate 2
+ {5, 1, 2, 4, 2, 4}, // candidate 3
+ {1, 4, 3, 1, 2, 6}, // candidate 4
+ };
+ // clang-format on
+
+ for (auto candidate = 0; candidate < kNumCandidates; candidate++) {
+ for (auto cohort = 0; cohort < kNumCohorts; cohort++) {
+ for (int hash = 0; hash < kNumHashes; hash++) {
+ EXPECT_EQ(expected_bit_indices[candidate][cohort * kNumHashes + hash],
+ GetCandidateMapValue(candidate, cohort, hash))
+ << "(" << candidate << "," << cohort * kNumHashes + hash << ")";
+ }
+ }
+ }
+}
+
+// Tests the function BuildCandidateMap. We build many different CandidateMaps
+// with many different parameters. We are testing firstly that the procedure
+// completes without error, secondly that the shape of the produced
+// data structure is correct and thirdly that the bit indexes are in the range
+// [0, num_bloom_bits). The latter two checks occur inside of BuildCandidateMap.
+TEST_F(RapporAnalyzerTest, BuildCandidateMapSmokeTest) {
+ for (auto num_candidates : {11, 51, 99}) {
+ for (auto num_cohorts : {23, 45}) {
+ for (auto num_hashes : {2, 6, 7}) {
+ for (auto num_bloom_bits : {16, 128}) {
+ SetAnalyzer(num_candidates, num_bloom_bits, num_cohorts, num_hashes);
+ BuildCandidateMap();
+ }
+ }
+ }
+ }
+}
+
+// Tests the function BuildCandidateMap. We test that the map that is built
+// is consistent with the Bloom filters that are built by an encoder.
+TEST_F(RapporAnalyzerTest, BuildCandidateMapCompareWithEncoder) {
+ static const uint32_t kNumCandidates = 10;
+ static const uint32_t kNumCohorts = 20;
+ static const uint32_t kNumHashes = 5;
+ static const uint32_t kNumBloomBits = 64;
+
+ SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
+ BuildCandidateMap();
+
+ for (int candidate = 0; candidate < kNumCandidates; candidate++) {
+ // Construct a new encoder with a new ClientSecret so that a random
+ // cohort is selected.
+ RapporEncoder encoder(config_, ClientSecret::GenerateNewSecret());
+
+ // Encode the current candidate string using |encoder|.
+ ValuePart value_part;
+ value_part.set_string_value(CandidateString(candidate));
+ RapporObservation observation;
+ encoder.Encode(value_part, &observation);
+
+ // Since p=0 and q=1 the RapporObservation contains the raw Bloom filter
+ // with no noise added. Confirm that the BloomFilter is the same as
+ // the one implied by the CandidateMap at the appropriate candidate
+ // and cohort.
+ EXPECT_EQ(BuildBitString(candidate, encoder.cohort()),
+ DataToBinaryString(observation.data()));
+ }
+}
+
+} // namespace rappor
+} // namespace cobalt
diff --git a/algorithms/rappor/rappor_test_utils.cc b/algorithms/rappor/rappor_test_utils.cc
index 2304602..e01184a 100644
--- a/algorithms/rappor/rappor_test_utils.cc
+++ b/algorithms/rappor/rappor_test_utils.cc
@@ -39,6 +39,16 @@
return output;
}
+std::string BuildBinaryString(size_t num_bits,
+ const std::vector<uint16_t>& index_of_1s) {
+ // Initialize output to a string of all zeroes.
+ std::string output(num_bits, '0');
+ for (auto bit_index : index_of_1s) {
+ output[num_bits - bit_index - 1] = '1';
+ }
+ return output;
+}
+
std::string BinaryStringToData(const std::string& binary_string) {
size_t num_bits = binary_string.size();
EXPECT_EQ(0, num_bits % 8);
@@ -65,11 +75,10 @@
return std::string(buffer, 12);
}
-
std::string BuildBitPatternString(int num_bits, int index, char index_char,
- char other_char ) {
+ char other_char) {
return std::string(num_bits - 1 - index, other_char) + index_char +
- std::string(index, other_char);
+ std::string(index, other_char);
}
} // namespace rappor
diff --git a/algorithms/rappor/rappor_test_utils.h b/algorithms/rappor/rappor_test_utils.h
index 6dc756a..72e6e2e 100644
--- a/algorithms/rappor/rappor_test_utils.h
+++ b/algorithms/rappor/rappor_test_utils.h
@@ -16,6 +16,7 @@
#define COBALT_ALGORITHMS_RAPPOR_RAPPOR_TEST_UTILS_H_
#include <string>
+#include <vector>
namespace cobalt {
namespace rappor {
@@ -33,6 +34,13 @@
// bytes in |data|.
std::string DataToBinaryString(const std::string& data);
+// Builds a binary string of length |num_bits| in which there are '1''s in
+// exactly the indices specified by |index_of_1s|. Note that we index bits
+// "from the right" so that an index of zero refers to the least significant
+// bit, i.e. the last character of the returned string.
+std::string BuildBinaryString(size_t num_bits,
+ const std::vector<uint16_t>& index_of_1s);
+
// Builds the string "category<index>" using 4 digits from index.
std::string CategoryName(uint32_t index);