blob: fd932bef51eb61f2441cd8e35855a009521cb74a [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "algorithms/rappor/rappor_analyzer.h"
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <algorithm>
#include <string>
#include <utility>
#include <vector>
#include "algorithms/rappor/rappor_encoder.h"
#include "algorithms/rappor/rappor_test_utils.h"
#include "encoder/client_secret.h"
#include "third_party/googletest/googletest/include/gtest/gtest.h"
namespace cobalt {
namespace rappor {
using encoder::ClientSecret;
namespace {
std::string CandidateString(int i) {
return std::string("candidate string") + std::to_string(i);
}
// Populates |candidate_list| with |num_candidates| candidates;
void PopulateRapporCandidateList(uint32_t num_candidates,
RapporCandidateList* candidate_list) {
candidate_list->Clear();
for (int i = 0; i < num_candidates; i++) {
candidate_list->add_candidates(CandidateString(i));
}
}
// Makes a RapporConfig with the given data (and num_hashes=2).
RapporConfig Config(uint32_t num_bloom_bits, uint32_t num_cohorts,
uint32_t num_hashes, double p, double q) {
RapporConfig config;
config.set_num_bloom_bits(num_bloom_bits);
config.set_num_hashes(num_hashes);
config.set_num_cohorts(num_cohorts);
config.set_prob_0_becomes_1(p);
config.set_prob_1_stays_1(q);
return config;
}
} // namespace
class RapporAnalyzerTest : public ::testing::Test {
protected:
// Sets the member variable analyzer_ to a new RapporAnalyzer configured
// with the given arguments and the current values of prob_0_becomes_1_,
// prob_1_stays_1_.
void SetAnalyzer(uint32_t num_candidates, uint32_t num_bloom_bits,
uint32_t num_cohorts, uint32_t num_hashes) {
PopulateRapporCandidateList(num_candidates, &candidate_list_);
config_ = Config(num_bloom_bits, num_cohorts, num_hashes, prob_0_becomes_1_,
prob_1_stays_1_);
analyzer_.reset(new RapporAnalyzer(config_, &candidate_list_));
}
void BuildCandidateMap() {
EXPECT_EQ(grpc::OK, analyzer_->BuildCandidateMap().error_code());
uint32_t num_candidates =
analyzer_->candidate_map_.candidate_list->candidates_size();
uint32_t num_cohorts = analyzer_->config_->num_cohorts();
uint32_t num_hashes = analyzer_->config_->num_hashes();
uint32_t num_bits = analyzer_->config_->num_bits();
// Expect the number of candidates to be correct,
EXPECT_EQ(num_candidates,
analyzer_->candidate_map_.candidate_cohort_maps.size());
// and for each candidate...
for (auto candidate = 0; candidate < num_candidates; candidate++) {
// expect the number of cohorts to be correct,
EXPECT_EQ(num_cohorts,
analyzer_->candidate_map_.candidate_cohort_maps[candidate]
.cohort_hashes.size());
// and for each cohort...
for (auto cohort = 0; cohort < num_cohorts; cohort++) {
// expect the number of hashes to be correct,
EXPECT_EQ(num_hashes,
analyzer_->candidate_map_.candidate_cohort_maps[candidate]
.cohort_hashes[cohort]
.bit_indices.size());
// and for each hash...
for (int hash = 0; hash < num_hashes; hash++) {
// Expect the bit index to be in the range [0, num_bits).
auto bit_index = GetCandidateMapValue(candidate, cohort, hash);
EXPECT_GE(bit_index, 0);
EXPECT_LT(bit_index, num_bits);
}
}
}
}
// This should be invoked after BuildCandidateMap. It returns the bit index
// within the CandidateMap for the given |candidate_index|, |cohort_index|,
// and |hash_index|.
uint16_t GetCandidateMapValue(uint16_t candidate_index, uint16_t cohort_index,
uint16_t hash_index) {
EXPECT_GT(analyzer_->candidate_map_.candidate_cohort_maps.size(),
candidate_index);
EXPECT_GT(analyzer_->candidate_map_.candidate_cohort_maps[candidate_index]
.cohort_hashes.size(),
cohort_index);
EXPECT_GT(analyzer_->candidate_map_.candidate_cohort_maps[candidate_index]
.cohort_hashes[cohort_index]
.bit_indices.size(),
hash_index);
return analyzer_->candidate_map_.candidate_cohort_maps[candidate_index]
.cohort_hashes[cohort_index]
.bit_indices[hash_index];
}
// Builds and returns a bit string (i.e. a string of ASCII '0's and '1's)
// representing the Bloom filter implicitly stored within the CandidateMap
// for the given |candidate_index| and |cohort_index|.
std::string BuildBitString(uint16_t candidate_index, uint16_t cohort_index) {
return BuildBinaryString(
analyzer_->config_->num_bits(),
analyzer_->candidate_map_.candidate_cohort_maps[candidate_index]
.cohort_hashes[cohort_index]
.bit_indices);
}
RapporConfig config_;
std::unique_ptr<RapporAnalyzer> analyzer_;
RapporCandidateList candidate_list_;
// By default this test uses p=0, q=1. Individual tests may override this.
double prob_0_becomes_1_ = 0.0;
double prob_1_stays_1_ = 1.0;
};
// Tests the function BuildCandidateMap. We build one small CandidateMap and
// then we explicitly check every value against a known value. We have not
// independently verified the SHA-256 hash values and so rather than a test
// of correctness this is firstly a sanity test: we can eyeball the values
// and confirm they look sane, and secondly a regression test.
TEST_F(RapporAnalyzerTest, BuildCandidateMapSmallTest) {
static const uint32_t kNumCandidates = 5;
static const uint32_t kNumCohorts = 3;
static const uint32_t kNumHashes = 2;
static const uint32_t kNumBloomBits = 8;
SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
BuildCandidateMap();
// clang-format off
int expected_bit_indices[kNumCandidates][kNumCohorts*kNumHashes] = {
// cihj means cohort = i and hash-index = j.
// c0h0 c0h1 c1h0 c1h1 c2h0 c2h2
{3, 5, 2, 6, 3, 6}, // candidate 0
{1, 5, 4, 7, 2, 0}, // candidate 1
{3, 0, 2, 0, 1, 4}, // candidate 2
{5, 1, 2, 4, 2, 4}, // candidate 3
{1, 4, 3, 1, 2, 6}, // candidate 4
};
// clang-format on
for (auto candidate = 0; candidate < kNumCandidates; candidate++) {
for (auto cohort = 0; cohort < kNumCohorts; cohort++) {
for (int hash = 0; hash < kNumHashes; hash++) {
EXPECT_EQ(expected_bit_indices[candidate][cohort * kNumHashes + hash],
GetCandidateMapValue(candidate, cohort, hash))
<< "(" << candidate << "," << cohort * kNumHashes + hash << ")";
}
}
}
}
// Tests the function BuildCandidateMap. We build many different CandidateMaps
// with many different parameters. We are testing firstly that the procedure
// completes without error, secondly that the shape of the produced
// data structure is correct and thirdly that the bit indexes are in the range
// [0, num_bloom_bits). The latter two checks occur inside of BuildCandidateMap.
TEST_F(RapporAnalyzerTest, BuildCandidateMapSmokeTest) {
for (auto num_candidates : {11, 51, 99}) {
for (auto num_cohorts : {23, 45}) {
for (auto num_hashes : {2, 6, 7}) {
for (auto num_bloom_bits : {16, 128}) {
SetAnalyzer(num_candidates, num_bloom_bits, num_cohorts, num_hashes);
BuildCandidateMap();
}
}
}
}
}
// Tests the function BuildCandidateMap. We test that the map that is built
// is consistent with the Bloom filters that are built by an encoder.
TEST_F(RapporAnalyzerTest, BuildCandidateMapCompareWithEncoder) {
static const uint32_t kNumCandidates = 10;
static const uint32_t kNumCohorts = 20;
static const uint32_t kNumHashes = 5;
static const uint32_t kNumBloomBits = 64;
SetAnalyzer(kNumCandidates, kNumBloomBits, kNumCohorts, kNumHashes);
BuildCandidateMap();
for (int candidate = 0; candidate < kNumCandidates; candidate++) {
// Construct a new encoder with a new ClientSecret so that a random
// cohort is selected.
RapporEncoder encoder(config_, ClientSecret::GenerateNewSecret());
// Encode the current candidate string using |encoder|.
ValuePart value_part;
value_part.set_string_value(CandidateString(candidate));
RapporObservation observation;
encoder.Encode(value_part, &observation);
// Since p=0 and q=1 the RapporObservation contains the raw Bloom filter
// with no noise added. Confirm that the BloomFilter is the same as
// the one implied by the CandidateMap at the appropriate candidate
// and cohort.
EXPECT_EQ(BuildBitString(candidate, encoder.cohort()),
DataToBinaryString(observation.data()));
}
}
} // namespace rappor
} // namespace cobalt