algorithms/rappor/rappor_analyzer_unit_tests.cc - cobalt - Git at Google

 // Copyright 2018 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "algorithms/rappor/rappor_analyzer_test.h"

 namespace cobalt {
 namespace rappor {

 using encoder::ClientSecret;

 // Tests the function BuildCandidateMap. We build one small CandidateMap and
 // then we explicitly check every value against a known value. We have not
 // independently verified the SHA-256 hash values and so rather than a test
 // of correctness this is firstly a sanity test: we can eyeball the values
 // and confirm they look sane, and secondly a regression test.
 TEST_F(RapporAnalyzerTest, BuildCandidateMapSmallTest) {
   static const uint32_t kNumCandidates = 5;
   static const uint32_t kNumCohorts = 3;
   static const uint32_t kNumHashes = 2;
   static const uint32_t kNumBloomBits = 8;

   SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
   BuildCandidateMap();

   // clang-format off
   int expected_bit_indices[kNumCandidates][kNumCohorts*kNumHashes] = {
   // cihj means cohort = i and hash-index = j.
   // c0h0 c0h1 c1h0 c1h1 c2h0 c2h2
       {3,   5,   2,   6,   3,   6},  // candidate 0
       {1,   5,   4,   7,   2,   0},  // candidate 1
       {3,   0,   2,   0,   1,   4},  // candidate 2
       {5,   1,   2,   4,   2,   4},  // candidate 3
       {1,   4,   3,   1,   2,   6},  // candidate 4
   };
   // clang-format on

   for (size_t candidate = 0; candidate < kNumCandidates; candidate++) {
     for (size_t cohort = 0; cohort < kNumCohorts; cohort++) {
       for (size_t hash = 0; hash < kNumHashes; hash++) {
         EXPECT_EQ(expected_bit_indices[candidate][cohort * kNumHashes + hash],
                   GetCandidateMapValue(candidate, cohort, hash))
             << "(" << candidate << "," << cohort * kNumHashes + hash << ")";
       }
     }
   }

   // Check the associated sparse matrix.
   std::ostringstream stream;
   stream << candidate_matrix().block(0, 0, kNumCohorts * kNumBloomBits,
                                      kNumCandidates);
   const char* kExpectedMatrixString =
       "0 0 0 0 0 \n"
       "0 0 0 0 0 \n"
       "1 1 0 1 0 \n"
       "0 0 0 0 1 \n"
       "1 0 1 0 0 \n"
       "0 0 0 0 0 \n"
       "0 1 0 1 1 \n"
       "0 0 1 0 0 \n"
       "0 1 0 0 0 \n"
       "1 0 0 0 0 \n"
       "0 0 0 0 0 \n"
       "0 1 0 1 0 \n"
       "0 0 0 0 1 \n"
       "1 0 1 1 0 \n"
       "0 0 0 0 1 \n"
       "0 0 1 0 0 \n"
       "0 0 0 0 0 \n"
       "1 0 0 0 1 \n"
       "0 0 0 0 0 \n"
       "0 0 1 1 0 \n"
       "1 0 0 0 0 \n"
       "0 1 0 1 1 \n"
       "0 0 1 0 0 \n"
       "0 1 0 0 0 \n";
   EXPECT_EQ(kExpectedMatrixString, stream.str());
 }

 // This test is identical to the previous test except that kNumBloomBits = 4
 // instead of 8. The purpose of this test is to force the situation in which
 // the two hash functions for a given cohort and a given candidate give the
 // same value. For example below we see that for candidate 0, cohort 1, both
 // hash functions yielded a 2. We want to test that the associated sparse
 // matrix has a "1" in the corresponding position (in this case that is
 // row 5, column 0) and does not have a "2" in that position. In other words
 // we want to test that we correctly added only one entry to the list of
 // triples that defined the sparse matrix and not two entries.
 TEST_F(RapporAnalyzerTest, BuildCandidateMapSmallTestWithDuplicates) {
   static const uint32_t kNumCandidates = 5;
   static const uint32_t kNumCohorts = 3;
   static const uint32_t kNumHashes = 2;
   static const uint32_t kNumBloomBits = 4;

   SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
   BuildCandidateMap();

   // clang-format off
   int expected_bit_indices[kNumCandidates][kNumCohorts*kNumHashes] = {
   // cihj means cohort = i and hash-index = j.
   // c0h0 c0h1 c1h0 c1h1 c2h0 c2h2
       {3,   1,   2,   2,   3,   2},  // candidate 0
       {1,   1,   0,   3,   2,   0},  // candidate 1
       {3,   0,   2,   0,   1,   0},  // candidate 2
       {1,   1,   2,   0,   2,   0},  // candidate 3
       {1,   0,   3,   1,   2,   2},  // candidate 4
   };
   // clang-format on

   for (size_t candidate = 0; candidate < kNumCandidates; candidate++) {
     for (size_t cohort = 0; cohort < kNumCohorts; cohort++) {
       for (size_t hash = 0; hash < kNumHashes; hash++) {
         EXPECT_EQ(expected_bit_indices[candidate][cohort * kNumHashes + hash],
                   GetCandidateMapValue(candidate, cohort, hash))
             << "(" << candidate << "," << cohort * kNumHashes + hash << ")";
       }
     }
   }

   // Check the associated sparse matrix.
   std::ostringstream stream;
   stream << candidate_matrix().block(0, 0, kNumCohorts * kNumBloomBits,
                                      kNumCandidates);
   const char* kExpectedMatrixString =
       "1 0 1 0 0 \n"
       "0 0 0 0 0 \n"
       "1 1 0 1 1 \n"
       "0 0 1 0 1 \n"
       "0 1 0 0 1 \n"
       "1 0 1 1 0 \n"
       "0 0 0 0 1 \n"
       "0 1 1 1 0 \n"
       "1 0 0 0 0 \n"
       "1 1 0 1 1 \n"
       "0 0 1 0 0 \n"
       "0 1 1 1 0 \n";
   EXPECT_EQ(kExpectedMatrixString, stream.str());
 }

 // Tests the function BuildCandidateMap. We build many different CandidateMaps
 // with many different parameters. We are testing firstly that the procedure
 // completes without error, secondly that the shape of the produced
 // data structure is correct and thirdly that the bit indexes are in the range
 // [0, num_bloom_bits). The latter two checks occur inside of
 // BuildCandidateMap.
 TEST_F(RapporAnalyzerTest, BuildCandidateMapSmokeTest) {
   for (auto num_candidates : {11, 51, 99}) {
     for (auto num_cohorts : {23, 45}) {
       for (auto num_hashes : {2, 6, 7}) {
         for (auto num_bloom_bits : {16, 128}) {
           SetAnalyzer(num_candidates, num_bloom_bits, num_cohorts, num_hashes);
           BuildCandidateMap();
         }
       }
     }
   }
 }

 // Tests the function BuildCandidateMap. We test that the map that is built
 // is consistent with the Bloom filters that are built by an encoder.
 TEST_F(RapporAnalyzerTest, BuildCandidateMapCompareWithEncoder) {
   static const uint32_t kNumCandidates = 10;
   static const uint32_t kNumCohorts = 20;
   static const uint32_t kNumHashes = 5;
   static const uint32_t kNumBloomBits = 64;

   SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
   BuildCandidateMap();

   for (size_t candidate = 0; candidate < kNumCandidates; candidate++) {
     // Construct a new encoder with a new ClientSecret so that a random
     // cohort is selected.
     RapporEncoder encoder(config_, ClientSecret::GenerateNewSecret());

     // Encode the current candidate string using |encoder|.
     ValuePart value_part;
     value_part.set_string_value(CandidateString(candidate));
     RapporObservation observation;
     encoder.Encode(value_part, &observation);

     // Since p=0 and q=1 the RapporObservation contains the raw Bloom filter
     // with no noise added. Confirm that the BloomFilter is the same as
     // the one implied by the CandidateMap at the appropriate candidate
     // and cohort.
     EXPECT_EQ(BuildBitString(candidate, encoder.cohort()),
               DataToBinaryString(observation.data()));
   }
 }

 // Tests the function ExtractEstimatedBitCountRatios(). We build one small
 // estimated bit count ratio vector and explicitly check its values. We
 // use no-randomness: p = 0, q = 1 so that the estimated bit counts are
 // identical to the true bit counts.
 TEST_F(RapporAnalyzerTest, ExtractEstimatedBitCountRatiosSmallNonRandomTest) {
   static const uint32_t kNumCandidates = 10;
   static const uint32_t kNumCohorts = 3;
   static const uint32_t kNumHashes = 2;
   static const uint32_t kNumBloomBits = 8;
   SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
   AddObservation(0, "00001010");
   AddObservation(0, "00010010");
   AddObservation(1, "00001010");
   AddObservation(1, "00010010");
   AddObservation(1, "00100010");
   AddObservation(2, "00001010");
   AddObservation(2, "00010010");
   AddObservation(2, "00010010");
   AddObservation(2, "00100010");

   Eigen::VectorXd est_bit_count_ratios;
   ExtractEstimatedBitCountRatios(&est_bit_count_ratios);

   std::ostringstream stream;
   stream << est_bit_count_ratios.block(0, 0, kNumCohorts * kNumBloomBits, 1);

   const char* kExpectedVectorString =
       "       0\n"
       "       0\n"
       "       0\n"
       "     0.5\n"
       "     0.5\n"
       "       0\n"
       "       1\n"
       "       0\n"
       "       0\n"
       "       0\n"
       "0.333333\n"
       "0.333333\n"
       "0.333333\n"
       "       0\n"
       "       1\n"
       "       0\n"
       "       0\n"
       "       0\n"
       "    0.25\n"
       "     0.5\n"
       "    0.25\n"
       "       0\n"
       "       1\n"
       "       0";
   EXPECT_EQ(kExpectedVectorString, stream.str());
 }

 // This test invokes Analyze() in a few very simple cases, checks that the the
 // algorithm converges and that the result vector has the correct size.
 TEST_F(RapporAnalyzerTest, SimpleAnalyzeTest) {
   static const uint32_t kNumCandidates = 10;
   static const uint32_t kNumCohorts = 3;
   static const uint32_t kNumHashes = 2;
   static const uint32_t kNumBloomBits = 8;
   static const bool print_estimates = false;

   std::vector<int> candidate_indices(100, 5);
   std::vector<int> true_candidate_counts = {0, 0, 0, 0, 0, 100, 0, 0, 0, 0};
   ShortExperimentWithAnalyze(
       "p=0, q=1, only candidate 5", kNumCandidates, kNumBloomBits, kNumCohorts,
       kNumHashes, candidate_indices, true_candidate_counts, print_estimates);

   candidate_indices = std::vector<int>(20, 1);
   candidate_indices.insert(candidate_indices.end(), 20, 4);
   candidate_indices.insert(candidate_indices.end(), 60, 9);
   true_candidate_counts = {0, 20, 0, 0, 20, 0, 0, 0, 0, 60};
   ShortExperimentWithAnalyze("p=0, q=1, several candidates", kNumCandidates,
                              kNumBloomBits, kNumCohorts, kNumHashes,
                              candidate_indices, true_candidate_counts,
                              print_estimates);

   prob_0_becomes_1_ = 0.1;
   prob_1_stays_1_ = 0.9;

   candidate_indices = std::vector<int>(100, 5);
   true_candidate_counts = {0, 0, 0, 0, 0, 100, 0, 0, 0, 0};
   ShortExperimentWithAnalyze("p=0.1, q=0.9, only candidate 5", kNumCandidates,
                              kNumBloomBits, kNumCohorts, kNumHashes,
                              candidate_indices, true_candidate_counts,
                              print_estimates);

   candidate_indices = std::vector<int>(20, 1);
   candidate_indices.insert(candidate_indices.end(), 20, 4);
   candidate_indices.insert(candidate_indices.end(), 60, 9);
   true_candidate_counts = {0, 20, 0, 0, 20, 0, 0, 0, 0, 60};
   ShortExperimentWithAnalyze("p=0.1, q=0.9, several candidates", kNumCandidates,
                              kNumBloomBits, kNumCohorts, kNumHashes,
                              candidate_indices, true_candidate_counts,
                              print_estimates);
 }

 // Runs Analyze() on the corner-case of one candidate and one cohort.
 // The result should be exact.
 TEST_F(RapporAnalyzerTest, SingleCandidateTest) {
   static const uint32_t kNumCandidates = 1;
   static const uint32_t kNumCohorts = 1;
   static const uint32_t kNumHashes = 2;
   static const uint32_t kNumBloomBits = 8;

   // No noise introduced
   prob_0_becomes_1_ = 0.0;
   prob_1_stays_1_ = 1.0;

   SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);

   std::vector<int> candidate_indices(100, 0);
   AddObservationsForCandidates(candidate_indices);

   std::vector<CandidateResult> results;
   auto status = analyzer_->Analyze(&results);

   EXPECT_EQ(results.size(), kNumCandidates);
   EXPECT_GE(results[0].count_estimate, 99);
   EXPECT_LE(results[0].count_estimate, 101);
 }

 // Runs Analyze with one cohort and the number of bits much larger than the
 // number of candidates. No noise is introduced. The heavy hitter should be
 // reproduced nearly exactly (assuming second step of RAPPOR is "on" and
 // working). Note(bazyli): this test is based on the assumption that the number
 // of bits in the cohort is so large that no or almost no of the candidates will
 // share a non-zero bit with the heavy hitter.
 TEST_F(RapporAnalyzerTest, ExactSolutionTest) {
   // The probability that the heaviest hitter will not share a nonzero bit with
   // any other candidate when kNumCandidates = 10, kNumCohorts = 1, kNumHashes =
   // 2, kNumBloomBits = 128, is (125 * 126 / (127 * 128))^ 9 = 0.75. In that
   // case, the heaviest hitter must be solved exactly. Even if it does share a
   // bit or two with some other candidate(s), this is unlikely to alter the
   // result significantly.
   static const uint32_t kNumCandidates = 10;
   static const uint32_t kNumCohorts = 1;
   static const uint32_t kNumHashes = 2;
   static const uint32_t kNumBloomBits = 128;

   // No noise introduced.
   prob_0_becomes_1_ = 0.0;
   prob_1_stays_1_ = 1.0;

   SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);

   std::vector<int> candidate_indices(10, 0);
   candidate_indices.insert(candidate_indices.end(), 10, 1);
   candidate_indices.insert(candidate_indices.end(), 10, 2);
   candidate_indices.insert(candidate_indices.end(), 10, 3);
   candidate_indices.insert(candidate_indices.end(), 10, 4);
   candidate_indices.insert(candidate_indices.end(), 100, 9);
   AddObservationsForCandidates(candidate_indices);

   std::vector<CandidateResult> results;
   auto status = analyzer_->Analyze(&results);

   // Check that the heavy hitter is reproduced almost exactly
   EXPECT_GE(results[9].count_estimate, 95);
   EXPECT_LE(results[9].count_estimate, 105);

   // Candidates that are not observed should have zero counts
   EXPECT_EQ(results[5].count_estimate, 0);
   EXPECT_EQ(results[6].count_estimate, 0);
   EXPECT_EQ(results[7].count_estimate, 0);
   EXPECT_EQ(results[8].count_estimate, 0);

   // All standard errors should be numerically 0
   for (uint32_t i = 0; i < kNumCandidates; i++) {
     EXPECT_LE(results[i].std_error, 1e-12);
   }
 }

 // Runs Analyzer with two cohorts and large number of bits. Very small noise is
 // introduced. The heaviest hitter should be reproduced very well (assuming
 // second step of RAPPOR is "on" and working). This test is based on similar
 // assumptions that ExactSolutionTest but the check is relaxed because of noise
 // and multiple cohorts.
 TEST_F(RapporAnalyzerTest, HighAccuracySolutionTest) {
   static const uint32_t kNumCandidates = 10;
   static const uint32_t kNumCohorts = 2;
   static const uint32_t kNumHashes = 2;
   static const uint32_t kNumBloomBits = 64;

   // Small noise introduced.
   prob_0_becomes_1_ = 0.1;
   prob_1_stays_1_ = 0.9;

   SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);

   std::vector<int> candidate_indices(10, 0);
   candidate_indices.insert(candidate_indices.end(), 10, 1);
   candidate_indices.insert(candidate_indices.end(), 10, 2);
   candidate_indices.insert(candidate_indices.end(), 10, 3);
   candidate_indices.insert(candidate_indices.end(), 10, 4);
   candidate_indices.insert(candidate_indices.end(), 100, 9);
   AddObservationsForCandidates(candidate_indices);

   std::vector<CandidateResult> results;
   auto status = analyzer_->Analyze(&results);

   EXPECT_GE(results[9].count_estimate, 70);
   EXPECT_LE(results[9].count_estimate, 120);
 }

 // Runs Analyzer on a moderately under-determined system (number of rows <
 // number of columns) with more than one cohort. Small noise is introduced.
 // There are a number of (equal) heavy hitters, which should be identified.
 // TODO(bazyli): this test is heuristic.
 TEST_F(RapporAnalyzerTest, KHeavyHittersTest) {
   static const uint32_t kNumCandidates = 100;
   static const uint32_t kNumCohorts = 4;
   static const uint32_t kNumHashes = 2;
   static const uint32_t kNumBloomBits = 16;

   // Small noise introduced
   prob_0_becomes_1_ = 0.1;
   prob_1_stays_1_ = 0.9;

   SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);

   std::vector<int> candidate_indices;
   candidate_indices.insert(candidate_indices.end(), 100, 1);
   candidate_indices.insert(candidate_indices.end(), 100, 2);
   candidate_indices.insert(candidate_indices.end(), 100, 4);
   candidate_indices.insert(candidate_indices.end(), 100, 8);
   candidate_indices.insert(candidate_indices.end(), 100, 16);
   candidate_indices.insert(candidate_indices.end(), 100, 32);
   candidate_indices.insert(candidate_indices.end(), 100, 64);
   AddObservationsForCandidates(candidate_indices);

   std::vector<CandidateResult> results;
   auto status = analyzer_->Analyze(&results);

   // Check if the heavy hitters are identified
   EXPECT_GE(results[1].count_estimate, 50);
   EXPECT_LE(results[1].count_estimate, 150);
   EXPECT_GE(results[2].count_estimate, 50);
   EXPECT_LE(results[2].count_estimate, 150);
   EXPECT_GE(results[4].count_estimate, 50);
   EXPECT_LE(results[4].count_estimate, 150);
   EXPECT_GE(results[8].count_estimate, 50);
   EXPECT_LE(results[8].count_estimate, 150);
   EXPECT_GE(results[16].count_estimate, 50);
   EXPECT_LE(results[16].count_estimate, 150);
   EXPECT_GE(results[32].count_estimate, 50);
   EXPECT_LE(results[32].count_estimate, 150);
 }

 // Runs a small instance of RAPPOR with large noise.
 // The standard errors of identified heavy hitters should be nonzero.
 // Note(bazyli): this test is heuristic test.
 TEST_F(RapporAnalyzerTest, StandardErrorTest) {
   static const uint32_t kNumCandidates = 50;
   static const uint32_t kNumCohorts = 2;
   static const uint32_t kNumHashes = 2;
   static const uint32_t kNumBloomBits = 32;

   // Large noise introduced.
   prob_0_becomes_1_ = 0.4;
   prob_1_stays_1_ = 0.6;

   SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);

   std::vector<int> candidate_indices(1000, 0);
   candidate_indices.insert(candidate_indices.end(), 1000, 10);
   candidate_indices.insert(candidate_indices.end(), 1000, 20);
   candidate_indices.insert(candidate_indices.end(), 1000, 30);
   candidate_indices.insert(candidate_indices.end(), 1000, 40);
   AddObservationsForCandidates(candidate_indices);

   std::vector<CandidateResult> results;
   auto status = analyzer_->Analyze(&results);

   EXPECT_GT(results[0].std_error, 0);
   EXPECT_GT(results[10].std_error, 0);
   EXPECT_GT(results[20].std_error, 0);
   EXPECT_GT(results[30].std_error, 0);
   EXPECT_GT(results[40].std_error, 0);
 }

 }  // namespace rappor
 }  // namespace cobalt
	// Copyright 2018 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "algorithms/rappor/rappor_analyzer_test.h"

	namespace cobalt {
	namespace rappor {

	using encoder::ClientSecret;

	// Tests the function BuildCandidateMap. We build one small CandidateMap and
	// then we explicitly check every value against a known value. We have not
	// independently verified the SHA-256 hash values and so rather than a test
	// of correctness this is firstly a sanity test: we can eyeball the values
	// and confirm they look sane, and secondly a regression test.
	TEST_F(RapporAnalyzerTest, BuildCandidateMapSmallTest) {
	static const uint32_t kNumCandidates = 5;
	static const uint32_t kNumCohorts = 3;
	static const uint32_t kNumHashes = 2;
	static const uint32_t kNumBloomBits = 8;

	SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
	BuildCandidateMap();

	// clang-format off
	int expected_bit_indices[kNumCandidates][kNumCohorts*kNumHashes] = {
	// cihj means cohort = i and hash-index = j.
	// c0h0 c0h1 c1h0 c1h1 c2h0 c2h2
	{3, 5, 2, 6, 3, 6}, // candidate 0
	{1, 5, 4, 7, 2, 0}, // candidate 1
	{3, 0, 2, 0, 1, 4}, // candidate 2
	{5, 1, 2, 4, 2, 4}, // candidate 3
	{1, 4, 3, 1, 2, 6}, // candidate 4
	};
	// clang-format on

	for (size_t candidate = 0; candidate < kNumCandidates; candidate++) {
	for (size_t cohort = 0; cohort < kNumCohorts; cohort++) {
	for (size_t hash = 0; hash < kNumHashes; hash++) {
	EXPECT_EQ(expected_bit_indices[candidate][cohort * kNumHashes + hash],
	GetCandidateMapValue(candidate, cohort, hash))
	<< "(" << candidate << "," << cohort * kNumHashes + hash << ")";
	}
	}
	}

	// Check the associated sparse matrix.
	std::ostringstream stream;
	stream << candidate_matrix().block(0, 0, kNumCohorts * kNumBloomBits,
	kNumCandidates);
	const char* kExpectedMatrixString =
	"0 0 0 0 0 \n"
	"0 0 0 0 0 \n"
	"1 1 0 1 0 \n"
	"0 0 0 0 1 \n"
	"1 0 1 0 0 \n"
	"0 0 0 0 0 \n"
	"0 1 0 1 1 \n"
	"0 0 1 0 0 \n"
	"0 1 0 0 0 \n"
	"1 0 0 0 0 \n"
	"0 0 0 0 0 \n"
	"0 1 0 1 0 \n"
	"0 0 0 0 1 \n"
	"1 0 1 1 0 \n"
	"0 0 0 0 1 \n"
	"0 0 1 0 0 \n"
	"0 0 0 0 0 \n"
	"1 0 0 0 1 \n"
	"0 0 0 0 0 \n"
	"0 0 1 1 0 \n"
	"1 0 0 0 0 \n"
	"0 1 0 1 1 \n"
	"0 0 1 0 0 \n"
	"0 1 0 0 0 \n";
	EXPECT_EQ(kExpectedMatrixString, stream.str());
	}

	// This test is identical to the previous test except that kNumBloomBits = 4
	// instead of 8. The purpose of this test is to force the situation in which
	// the two hash functions for a given cohort and a given candidate give the
	// same value. For example below we see that for candidate 0, cohort 1, both
	// hash functions yielded a 2. We want to test that the associated sparse
	// matrix has a "1" in the corresponding position (in this case that is
	// row 5, column 0) and does not have a "2" in that position. In other words
	// we want to test that we correctly added only one entry to the list of
	// triples that defined the sparse matrix and not two entries.
	TEST_F(RapporAnalyzerTest, BuildCandidateMapSmallTestWithDuplicates) {
	static const uint32_t kNumCandidates = 5;
	static const uint32_t kNumCohorts = 3;
	static const uint32_t kNumHashes = 2;
	static const uint32_t kNumBloomBits = 4;

	SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
	BuildCandidateMap();

	// clang-format off
	int expected_bit_indices[kNumCandidates][kNumCohorts*kNumHashes] = {
	// cihj means cohort = i and hash-index = j.
	// c0h0 c0h1 c1h0 c1h1 c2h0 c2h2
	{3, 1, 2, 2, 3, 2}, // candidate 0
	{1, 1, 0, 3, 2, 0}, // candidate 1
	{3, 0, 2, 0, 1, 0}, // candidate 2
	{1, 1, 2, 0, 2, 0}, // candidate 3
	{1, 0, 3, 1, 2, 2}, // candidate 4
	};
	// clang-format on

	for (size_t candidate = 0; candidate < kNumCandidates; candidate++) {
	for (size_t cohort = 0; cohort < kNumCohorts; cohort++) {
	for (size_t hash = 0; hash < kNumHashes; hash++) {
	EXPECT_EQ(expected_bit_indices[candidate][cohort * kNumHashes + hash],
	GetCandidateMapValue(candidate, cohort, hash))
	<< "(" << candidate << "," << cohort * kNumHashes + hash << ")";
	}
	}
	}

	// Check the associated sparse matrix.
	std::ostringstream stream;
	stream << candidate_matrix().block(0, 0, kNumCohorts * kNumBloomBits,
	kNumCandidates);
	const char* kExpectedMatrixString =
	"1 0 1 0 0 \n"
	"0 0 0 0 0 \n"
	"1 1 0 1 1 \n"
	"0 0 1 0 1 \n"
	"0 1 0 0 1 \n"
	"1 0 1 1 0 \n"
	"0 0 0 0 1 \n"
	"0 1 1 1 0 \n"
	"1 0 0 0 0 \n"
	"1 1 0 1 1 \n"
	"0 0 1 0 0 \n"
	"0 1 1 1 0 \n";
	EXPECT_EQ(kExpectedMatrixString, stream.str());
	}

	// Tests the function BuildCandidateMap. We build many different CandidateMaps
	// with many different parameters. We are testing firstly that the procedure
	// completes without error, secondly that the shape of the produced
	// data structure is correct and thirdly that the bit indexes are in the range
	// [0, num_bloom_bits). The latter two checks occur inside of
	// BuildCandidateMap.
	TEST_F(RapporAnalyzerTest, BuildCandidateMapSmokeTest) {
	for (auto num_candidates : {11, 51, 99}) {
	for (auto num_cohorts : {23, 45}) {
	for (auto num_hashes : {2, 6, 7}) {
	for (auto num_bloom_bits : {16, 128}) {
	SetAnalyzer(num_candidates, num_bloom_bits, num_cohorts, num_hashes);
	BuildCandidateMap();
	}
	}
	}
	}
	}

	// Tests the function BuildCandidateMap. We test that the map that is built
	// is consistent with the Bloom filters that are built by an encoder.
	TEST_F(RapporAnalyzerTest, BuildCandidateMapCompareWithEncoder) {
	static const uint32_t kNumCandidates = 10;
	static const uint32_t kNumCohorts = 20;
	static const uint32_t kNumHashes = 5;
	static const uint32_t kNumBloomBits = 64;

	SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
	BuildCandidateMap();

	for (size_t candidate = 0; candidate < kNumCandidates; candidate++) {
	// Construct a new encoder with a new ClientSecret so that a random
	// cohort is selected.
	RapporEncoder encoder(config_, ClientSecret::GenerateNewSecret());

	// Encode the current candidate string using \|encoder\|.
	ValuePart value_part;
	value_part.set_string_value(CandidateString(candidate));
	RapporObservation observation;
	encoder.Encode(value_part, &observation);

	// Since p=0 and q=1 the RapporObservation contains the raw Bloom filter
	// with no noise added. Confirm that the BloomFilter is the same as
	// the one implied by the CandidateMap at the appropriate candidate
	// and cohort.
	EXPECT_EQ(BuildBitString(candidate, encoder.cohort()),
	DataToBinaryString(observation.data()));
	}
	}

	// Tests the function ExtractEstimatedBitCountRatios(). We build one small
	// estimated bit count ratio vector and explicitly check its values. We
	// use no-randomness: p = 0, q = 1 so that the estimated bit counts are
	// identical to the true bit counts.
	TEST_F(RapporAnalyzerTest, ExtractEstimatedBitCountRatiosSmallNonRandomTest) {
	static const uint32_t kNumCandidates = 10;
	static const uint32_t kNumCohorts = 3;
	static const uint32_t kNumHashes = 2;
	static const uint32_t kNumBloomBits = 8;
	SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
	AddObservation(0, "00001010");
	AddObservation(0, "00010010");
	AddObservation(1, "00001010");
	AddObservation(1, "00010010");
	AddObservation(1, "00100010");
	AddObservation(2, "00001010");
	AddObservation(2, "00010010");
	AddObservation(2, "00010010");
	AddObservation(2, "00100010");

	Eigen::VectorXd est_bit_count_ratios;
	ExtractEstimatedBitCountRatios(&est_bit_count_ratios);

	std::ostringstream stream;
	stream << est_bit_count_ratios.block(0, 0, kNumCohorts * kNumBloomBits, 1);

	const char* kExpectedVectorString =
	" 0\n"
	" 0\n"
	" 0\n"
	" 0.5\n"
	" 0.5\n"
	" 0\n"
	" 1\n"
	" 0\n"
	" 0\n"
	" 0\n"
	"0.333333\n"
	"0.333333\n"
	"0.333333\n"
	" 0\n"
	" 1\n"
	" 0\n"
	" 0\n"
	" 0\n"
	" 0.25\n"
	" 0.5\n"
	" 0.25\n"
	" 0\n"
	" 1\n"
	" 0";
	EXPECT_EQ(kExpectedVectorString, stream.str());
	}

	// This test invokes Analyze() in a few very simple cases, checks that the the
	// algorithm converges and that the result vector has the correct size.
	TEST_F(RapporAnalyzerTest, SimpleAnalyzeTest) {
	static const uint32_t kNumCandidates = 10;
	static const uint32_t kNumCohorts = 3;
	static const uint32_t kNumHashes = 2;
	static const uint32_t kNumBloomBits = 8;
	static const bool print_estimates = false;

	std::vector<int> candidate_indices(100, 5);
	std::vector<int> true_candidate_counts = {0, 0, 0, 0, 0, 100, 0, 0, 0, 0};
	ShortExperimentWithAnalyze(
	"p=0, q=1, only candidate 5", kNumCandidates, kNumBloomBits, kNumCohorts,
	kNumHashes, candidate_indices, true_candidate_counts, print_estimates);

	candidate_indices = std::vector<int>(20, 1);
	candidate_indices.insert(candidate_indices.end(), 20, 4);
	candidate_indices.insert(candidate_indices.end(), 60, 9);
	true_candidate_counts = {0, 20, 0, 0, 20, 0, 0, 0, 0, 60};
	ShortExperimentWithAnalyze("p=0, q=1, several candidates", kNumCandidates,
	kNumBloomBits, kNumCohorts, kNumHashes,
	candidate_indices, true_candidate_counts,
	print_estimates);

	prob_0_becomes_1_ = 0.1;
	prob_1_stays_1_ = 0.9;

	candidate_indices = std::vector<int>(100, 5);
	true_candidate_counts = {0, 0, 0, 0, 0, 100, 0, 0, 0, 0};
	ShortExperimentWithAnalyze("p=0.1, q=0.9, only candidate 5", kNumCandidates,
	kNumBloomBits, kNumCohorts, kNumHashes,
	candidate_indices, true_candidate_counts,
	print_estimates);

	candidate_indices = std::vector<int>(20, 1);
	candidate_indices.insert(candidate_indices.end(), 20, 4);
	candidate_indices.insert(candidate_indices.end(), 60, 9);
	true_candidate_counts = {0, 20, 0, 0, 20, 0, 0, 0, 0, 60};
	ShortExperimentWithAnalyze("p=0.1, q=0.9, several candidates", kNumCandidates,
	kNumBloomBits, kNumCohorts, kNumHashes,
	candidate_indices, true_candidate_counts,
	print_estimates);
	}

	// Runs Analyze() on the corner-case of one candidate and one cohort.
	// The result should be exact.
	TEST_F(RapporAnalyzerTest, SingleCandidateTest) {
	static const uint32_t kNumCandidates = 1;
	static const uint32_t kNumCohorts = 1;
	static const uint32_t kNumHashes = 2;
	static const uint32_t kNumBloomBits = 8;

	// No noise introduced
	prob_0_becomes_1_ = 0.0;
	prob_1_stays_1_ = 1.0;

	SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);

	std::vector<int> candidate_indices(100, 0);
	AddObservationsForCandidates(candidate_indices);

	std::vector<CandidateResult> results;
	auto status = analyzer_->Analyze(&results);

	EXPECT_EQ(results.size(), kNumCandidates);
	EXPECT_GE(results[0].count_estimate, 99);
	EXPECT_LE(results[0].count_estimate, 101);
	}

	// Runs Analyze with one cohort and the number of bits much larger than the
	// number of candidates. No noise is introduced. The heavy hitter should be
	// reproduced nearly exactly (assuming second step of RAPPOR is "on" and
	// working). Note(bazyli): this test is based on the assumption that the number
	// of bits in the cohort is so large that no or almost no of the candidates will
	// share a non-zero bit with the heavy hitter.
	TEST_F(RapporAnalyzerTest, ExactSolutionTest) {
	// The probability that the heaviest hitter will not share a nonzero bit with
	// any other candidate when kNumCandidates = 10, kNumCohorts = 1, kNumHashes =
	// 2, kNumBloomBits = 128, is (125 * 126 / (127 * 128))^ 9 = 0.75. In that
	// case, the heaviest hitter must be solved exactly. Even if it does share a
	// bit or two with some other candidate(s), this is unlikely to alter the
	// result significantly.
	static const uint32_t kNumCandidates = 10;
	static const uint32_t kNumCohorts = 1;
	static const uint32_t kNumHashes = 2;
	static const uint32_t kNumBloomBits = 128;

	// No noise introduced.
	prob_0_becomes_1_ = 0.0;
	prob_1_stays_1_ = 1.0;

	SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);

	std::vector<int> candidate_indices(10, 0);
	candidate_indices.insert(candidate_indices.end(), 10, 1);
	candidate_indices.insert(candidate_indices.end(), 10, 2);
	candidate_indices.insert(candidate_indices.end(), 10, 3);
	candidate_indices.insert(candidate_indices.end(), 10, 4);
	candidate_indices.insert(candidate_indices.end(), 100, 9);
	AddObservationsForCandidates(candidate_indices);

	std::vector<CandidateResult> results;
	auto status = analyzer_->Analyze(&results);

	// Check that the heavy hitter is reproduced almost exactly
	EXPECT_GE(results[9].count_estimate, 95);
	EXPECT_LE(results[9].count_estimate, 105);

	// Candidates that are not observed should have zero counts
	EXPECT_EQ(results[5].count_estimate, 0);
	EXPECT_EQ(results[6].count_estimate, 0);
	EXPECT_EQ(results[7].count_estimate, 0);
	EXPECT_EQ(results[8].count_estimate, 0);

	// All standard errors should be numerically 0
	for (uint32_t i = 0; i < kNumCandidates; i++) {
	EXPECT_LE(results[i].std_error, 1e-12);
	}
	}

	// Runs Analyzer with two cohorts and large number of bits. Very small noise is
	// introduced. The heaviest hitter should be reproduced very well (assuming
	// second step of RAPPOR is "on" and working). This test is based on similar
	// assumptions that ExactSolutionTest but the check is relaxed because of noise
	// and multiple cohorts.
	TEST_F(RapporAnalyzerTest, HighAccuracySolutionTest) {
	static const uint32_t kNumCandidates = 10;
	static const uint32_t kNumCohorts = 2;
	static const uint32_t kNumHashes = 2;
	static const uint32_t kNumBloomBits = 64;

	// Small noise introduced.
	prob_0_becomes_1_ = 0.1;
	prob_1_stays_1_ = 0.9;

	SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);

	std::vector<int> candidate_indices(10, 0);
	candidate_indices.insert(candidate_indices.end(), 10, 1);
	candidate_indices.insert(candidate_indices.end(), 10, 2);
	candidate_indices.insert(candidate_indices.end(), 10, 3);
	candidate_indices.insert(candidate_indices.end(), 10, 4);
	candidate_indices.insert(candidate_indices.end(), 100, 9);
	AddObservationsForCandidates(candidate_indices);

	std::vector<CandidateResult> results;
	auto status = analyzer_->Analyze(&results);

	EXPECT_GE(results[9].count_estimate, 70);
	EXPECT_LE(results[9].count_estimate, 120);
	}

	// Runs Analyzer on a moderately under-determined system (number of rows <
	// number of columns) with more than one cohort. Small noise is introduced.
	// There are a number of (equal) heavy hitters, which should be identified.
	// TODO(bazyli): this test is heuristic.
	TEST_F(RapporAnalyzerTest, KHeavyHittersTest) {
	static const uint32_t kNumCandidates = 100;
	static const uint32_t kNumCohorts = 4;
	static const uint32_t kNumHashes = 2;
	static const uint32_t kNumBloomBits = 16;

	// Small noise introduced
	prob_0_becomes_1_ = 0.1;
	prob_1_stays_1_ = 0.9;

	SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);

	std::vector<int> candidate_indices;
	candidate_indices.insert(candidate_indices.end(), 100, 1);
	candidate_indices.insert(candidate_indices.end(), 100, 2);
	candidate_indices.insert(candidate_indices.end(), 100, 4);
	candidate_indices.insert(candidate_indices.end(), 100, 8);
	candidate_indices.insert(candidate_indices.end(), 100, 16);
	candidate_indices.insert(candidate_indices.end(), 100, 32);
	candidate_indices.insert(candidate_indices.end(), 100, 64);
	AddObservationsForCandidates(candidate_indices);

	std::vector<CandidateResult> results;
	auto status = analyzer_->Analyze(&results);

	// Check if the heavy hitters are identified
	EXPECT_GE(results[1].count_estimate, 50);
	EXPECT_LE(results[1].count_estimate, 150);
	EXPECT_GE(results[2].count_estimate, 50);
	EXPECT_LE(results[2].count_estimate, 150);
	EXPECT_GE(results[4].count_estimate, 50);
	EXPECT_LE(results[4].count_estimate, 150);
	EXPECT_GE(results[8].count_estimate, 50);
	EXPECT_LE(results[8].count_estimate, 150);
	EXPECT_GE(results[16].count_estimate, 50);
	EXPECT_LE(results[16].count_estimate, 150);
	EXPECT_GE(results[32].count_estimate, 50);
	EXPECT_LE(results[32].count_estimate, 150);
	}

	// Runs a small instance of RAPPOR with large noise.
	// The standard errors of identified heavy hitters should be nonzero.
	// Note(bazyli): this test is heuristic test.
	TEST_F(RapporAnalyzerTest, StandardErrorTest) {
	static const uint32_t kNumCandidates = 50;
	static const uint32_t kNumCohorts = 2;
	static const uint32_t kNumHashes = 2;
	static const uint32_t kNumBloomBits = 32;

	// Large noise introduced.
	prob_0_becomes_1_ = 0.4;
	prob_1_stays_1_ = 0.6;

	SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);

	std::vector<int> candidate_indices(1000, 0);
	candidate_indices.insert(candidate_indices.end(), 1000, 10);
	candidate_indices.insert(candidate_indices.end(), 1000, 20);
	candidate_indices.insert(candidate_indices.end(), 1000, 30);
	candidate_indices.insert(candidate_indices.end(), 1000, 40);
	AddObservationsForCandidates(candidate_indices);

	std::vector<CandidateResult> results;
	auto status = analyzer_->Analyze(&results);

	EXPECT_GT(results[0].std_error, 0);
	EXPECT_GT(results[10].std_error, 0);
	EXPECT_GT(results[20].std_error, 0);
	EXPECT_GT(results[30].std_error, 0);
	EXPECT_GT(results[40].std_error, 0);
	}

	} // namespace rappor
	} // namespace cobalt