blob: ec615a74f7b5996b8dd70ededab296d6edd7c427 [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "algorithms/rappor/rappor_encoder.h"
#include <algorithm>
#include <map>
#include <vector>
#include "algorithms/rappor/rappor_test_utils.h"
#include "third_party/googletest/googletest/include/gtest/gtest.h"
#include "util/crypto_util/random_test_utils.h"
namespace cobalt {
namespace rappor {
using encoder::ClientSecret;
TEST(RapporConfigValidatorTest, TestMinPower2Above) {
EXPECT_EQ(1u, RapporConfigValidator::MinPower2Above(0));
EXPECT_EQ(1u, RapporConfigValidator::MinPower2Above(1));
EXPECT_EQ(2u, RapporConfigValidator::MinPower2Above(2));
EXPECT_EQ(4u, RapporConfigValidator::MinPower2Above(3));
EXPECT_EQ(4u, RapporConfigValidator::MinPower2Above(4));
EXPECT_EQ(8u, RapporConfigValidator::MinPower2Above(5));
EXPECT_EQ(8u, RapporConfigValidator::MinPower2Above(6));
EXPECT_EQ(8u, RapporConfigValidator::MinPower2Above(7));
EXPECT_EQ(8u, RapporConfigValidator::MinPower2Above(8));
EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(9));
EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(10));
EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(11));
EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(12));
EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(13));
EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(14));
EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(15));
EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(16));
EXPECT_EQ(32u, RapporConfigValidator::MinPower2Above(17));
}
TEST(RapporConfigValidatorTest, TestConstructor) {
RapporConfig config;
config.set_prob_0_becomes_1(0.3);
config.set_prob_1_stays_1(0.7);
config.set_num_bloom_bits(64);
config.set_num_hashes(5);
config.set_num_cohorts(100);
auto validator = RapporConfigValidator(config);
EXPECT_EQ(128u, validator.num_cohorts_2_power());
config.set_num_cohorts(200);
validator = RapporConfigValidator(config);
EXPECT_EQ(256u, validator.num_cohorts_2_power());
config.set_num_cohorts(300);
validator = RapporConfigValidator(config);
EXPECT_EQ(512u, validator.num_cohorts_2_power());
config.set_num_cohorts(400);
validator = RapporConfigValidator(config);
EXPECT_EQ(512u, validator.num_cohorts_2_power());
config.set_num_cohorts(500);
validator = RapporConfigValidator(config);
EXPECT_EQ(512u, validator.num_cohorts_2_power());
config.set_num_cohorts(600);
validator = RapporConfigValidator(config);
EXPECT_EQ(1024u, validator.num_cohorts_2_power());
config.set_num_cohorts(1023);
validator = RapporConfigValidator(config);
EXPECT_EQ(1024u, validator.num_cohorts_2_power());
config.set_num_cohorts(1024);
validator = RapporConfigValidator(config);
EXPECT_EQ(1024u, validator.num_cohorts_2_power());
}
// Constructs a RapporEncoder with the given |config|, invokes
// Encode() with a dummy string and EncodeNullObservation(), and checks that the
// returned statuses are either kOK or kInvalidConfig, whichever is expected.
void TestRapporConfig(const RapporConfig& config, Status expected_status,
int caller_line_number) {
// Make a ClientSecret once and statically store the token.
static const std::string kClientSecretToken =
ClientSecret::GenerateNewSecret().GetToken();
// Each time this function is invoked reconstitute the secret from the token.
RapporEncoder encoder(config, ClientSecret::FromToken(kClientSecretToken));
RapporObservation obs;
ValuePart value;
value.set_string_value("dummy");
EXPECT_EQ(expected_status, encoder.Encode(value, &obs))
<< "Invoked from line number: " << caller_line_number;
}
// A macro to invoke testRapporConfig and pass it the current line number.
#define TEST_RAPPOR_CONFIG(config, expected_status) \
(TestRapporConfig(config, expected_status, __LINE__))
// Tests the validation of config for String RAPPOR.
TEST(RapporEncoderTest, StringRapporConfigValidation) {
// Empty config: Invalid
RapporConfig config;
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Add two probabilities, still Invalid
config.set_prob_0_becomes_1(0.3);
config.set_prob_1_stays_1(0.7);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set num_bloom_bits, still Invalid
config.set_num_bloom_bits(8);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set num_hashes, still Invalid
config.set_num_hashes(2);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// set num_cohorts: Valid
config.set_num_cohorts(20);
TEST_RAPPOR_CONFIG(config, kOK);
// Explicitly set PRR to 0: Valid.
config.set_prob_rr(0.0);
TEST_RAPPOR_CONFIG(config, kOK);
// Explicitly set PRR to non-zero: Invalid.
config.set_prob_rr(0.1);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Explicitly set PRR back to zero: Valid.
config.set_prob_rr(0.0);
TEST_RAPPOR_CONFIG(config, kOK);
// Set one of the probabilities to negative: Invalid
config.set_prob_0_becomes_1(-0.3);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set one of the probabilities to greater than 1: Invalid
config.set_prob_0_becomes_1(1.3);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Fix the probability: Valid
config.set_prob_0_becomes_1(0.3);
TEST_RAPPOR_CONFIG(config, kOK);
// Set the other probability to negative: Invalid
config.set_prob_1_stays_1(-0.7);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set the other probability to greater than 1: Invalid
config.set_prob_1_stays_1(1.7);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Fix the probability: Valid
config.set_prob_1_stays_1(0.7);
TEST_RAPPOR_CONFIG(config, kOK);
// Set num_bloom_bits to negative: Invalid
config.set_num_bloom_bits(-8);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set num_bloom_bits to 0: Invalid
config.set_num_bloom_bits(0);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set num_bloom_bits back to positive: Valid
config.set_num_bloom_bits(8);
TEST_RAPPOR_CONFIG(config, kOK);
// Set num_hashes to negative: Invalid
config.set_num_hashes(-2);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set num_hashes to 0: Invalid
config.set_num_hashes(0);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set num_hashes to 8: Invalid
config.set_num_hashes(8);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set num_hashes back to positive: Valid
config.set_num_hashes(2);
TEST_RAPPOR_CONFIG(config, kOK);
// Set num_cohorts to negative: Invalid
config.set_num_cohorts(-20);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set num_cohorts to 0: Invalid
config.set_num_cohorts(0);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set num_cohorts to 1025: Invalid
config.set_num_cohorts(1025);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set num_cohorts to 1024: Valid
config.set_num_cohorts(1024);
TEST_RAPPOR_CONFIG(config, kOK);
// Set num_cohorts back to positive: Valid
config.set_num_cohorts(20);
TEST_RAPPOR_CONFIG(config, kOK);
// Set num_bloom_bits to equal num_hashes: Invalid
config.set_num_bloom_bits(2);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Set num_bloom_bits to greater than num_hashes and a power of 2: Valid
config.set_num_bloom_bits(4);
TEST_RAPPOR_CONFIG(config, kOK);
// Set num_bloom_bits to greater than num_hashes but not a power of 2: Invalid
config.set_num_bloom_bits(3);
TEST_RAPPOR_CONFIG(config, kInvalidConfig);
// Test with an invalid ClientSecret
RapporEncoder encoder(config, ClientSecret::FromToken("Invalid Token"));
RapporObservation obs;
ValuePart value;
value.set_string_value("dummy");
EXPECT_EQ(kInvalidConfig, encoder.Encode(value, &obs));
}
// Constructs a BasicRapporEncoder with the given |config|, invokes
// Encode() with a dummy string and EncodeNullObservation(), and checks that the
// returned statuses are either kOK or kInvalidConfig, whichever is expected.
void TestBasicRapporConfig(const BasicRapporConfig& config,
Status expected_status, int caller_line_number) {
// Make a ClientSecret once and statically store the token.
static const std::string kClientSecretToken =
ClientSecret::GenerateNewSecret().GetToken();
// Each time this function is invoked reconstitute the secret from the token.
BasicRapporEncoder encoder(config,
ClientSecret::FromToken(kClientSecretToken));
BasicRapporObservation obs;
ValuePart value;
value.set_string_value("cat");
EXPECT_EQ(expected_status, encoder.Encode(value, &obs))
<< "Invoked from line number: " << caller_line_number;
BasicRapporObservation null_obs;
EXPECT_EQ(expected_status, encoder.EncodeNullObservation(&null_obs))
<< "Invoked from line number: " << caller_line_number;
}
// A macro to invoke TestBasicRapporConfig and pass it the current line number.
#define TEST_BASIC_RAPPOR_CONFIG(config, expected_status) \
(TestBasicRapporConfig(config, expected_status, __LINE__))
// Tests the validation of config for Basic RAPPOR.
TEST(RapporEncoderTest, BasicRapporConfigValidation) {
// Empty config: Invalid
BasicRapporConfig config;
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Add two probabilities but no categories: Invalid
config.set_prob_0_becomes_1(0.3);
config.set_prob_1_stays_1(0.7);
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Add one category: Invalid.
config.mutable_string_categories()->add_category("cat");
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Add two more categories: Valid.
config.mutable_string_categories()->add_category("dog");
config.mutable_string_categories()->add_category("fish");
TEST_BASIC_RAPPOR_CONFIG(config, kOK);
// Explicitly set PRR to 0: Valid.
config.set_prob_rr(0.0);
TEST_BASIC_RAPPOR_CONFIG(config, kOK);
// Explicitly set PRR to non-zero: Invalid.
config.set_prob_rr(0.1);
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Explicitly set PRR back to zero: Valid.
config.set_prob_rr(0.0);
TEST_BASIC_RAPPOR_CONFIG(config, kOK);
// Set one of the probabilities to negative: Invalid
config.set_prob_0_becomes_1(-0.3);
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Set one of the probabilities to greater than 1: Invalid
config.set_prob_0_becomes_1(1.3);
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Fix the probability: Valid
config.set_prob_0_becomes_1(0.3);
TEST_BASIC_RAPPOR_CONFIG(config, kOK);
// Set the other probability to negative: Invalid
config.set_prob_1_stays_1(-0.7);
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Set the other the probability to greater than 1: Invalid
config.set_prob_1_stays_1(1.7);
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Fix the probability: Valid
config.set_prob_1_stays_1(0.7);
TEST_BASIC_RAPPOR_CONFIG(config, kOK);
// Add an empty category: Invalid
config.mutable_string_categories()->add_category("");
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Test with an invalid ClientSecret
BasicRapporEncoder encoder(config, ClientSecret::FromToken("Invalid Token"));
BasicRapporObservation obs;
ValuePart value;
value.set_string_value("dummy");
EXPECT_EQ(kInvalidConfig, encoder.Encode(value, &obs));
BasicRapporObservation null_obs;
EXPECT_EQ(kInvalidConfig, encoder.EncodeNullObservation(&null_obs));
}
// Test Config Validation with integer categories
TEST(RapporEncoderTest, BasicRapporWithIntsConfigValidation) {
// Create a config with three integer categories.
BasicRapporConfig config;
config.set_prob_0_becomes_1(0.3);
config.set_prob_1_stays_1(0.7);
config.mutable_int_range_categories()->set_first(-1);
config.mutable_int_range_categories()->set_last(1);
// Construct the encoder
BasicRapporEncoder encoder(config, ClientSecret::GenerateNewSecret());
// Perform an encode with a value equal to one of the listed categories
BasicRapporObservation obs;
ValuePart value;
value.set_int_value(-1);
EXPECT_EQ(kOK, encoder.Encode(value, &obs));
// Perform an encode with a value not equal to one of the listed categories
value.set_int_value(2);
EXPECT_EQ(kInvalidInput, encoder.Encode(value, &obs));
// Perform an encode of a null observation
BasicRapporObservation null_obs;
EXPECT_EQ(kOK, encoder.EncodeNullObservation(&null_obs));
}
// Performs a test of BasicRapporEncoder::Encode() and
// BasicRapporEncoder::EncodeNullObservation() in the two special cases that
// that there is no randomness involved in the encoded string, namely
// (a) p = 0, q = 1
// (b) p = 1, q = 0
//
// |num_categories| must be a positive integer. Basic RAPPOR will be configured
// to have this many categories. The encoding will be performed for each of
// the categores.
//
// |q_is_one| Do the test in case (a) where p = 0, q = 1
void DoBasicRapporNoRandomnessTest(int num_categories, bool q_is_one) {
// Select the parameters based on the mode. index_char and other_char
// determine the expected bit pattern in the encoding. index_char is the
// character we expect to see in the position of the given category and
// other_char is the character we expect to see in the other positions.
float p, q;
char index_char, other_char;
if (q_is_one) {
// We expect a 1 in the index position and 0's everywhere else.
p = 0.0;
q = 1.0;
index_char = '1';
other_char = '0';
} else {
// We expect a 0 in the index position and 1's everywhere else.
p = 1.0;
q = 0.0;
index_char = '0';
other_char = '1';
}
// Configure basic RAPPOR with the selected parameters.
BasicRapporConfig config;
config.set_prob_0_becomes_1(p);
config.set_prob_1_stays_1(q);
for (int i = 0; i < num_categories; i++) {
config.mutable_string_categories()->add_category(CategoryName(i));
}
// Construct a BasicRapporEncoder.
static const std::string kClientSecretToken =
ClientSecret::GenerateNewSecret().GetToken();
BasicRapporEncoder encoder(config,
ClientSecret::FromToken(kClientSecretToken));
// The expected number of bits in the encoding is the least multiple of 8
// greater than or equal to num_categories.
int expected_num_bits = 8 * (((num_categories - 1) / 8) + 1);
// For each category, obtain the observation and check that the bit pattern
// is as expected.
BasicRapporObservation obs;
for (int i = 0; i < num_categories; i++) {
auto category_name = CategoryName(i);
ValuePart value;
value.set_string_value(category_name);
ASSERT_EQ(kOK, encoder.Encode(value, &obs)) << category_name;
auto expected_pattern =
BuildBitPatternString(expected_num_bits, i, index_char, other_char);
EXPECT_EQ(DataToBinaryString(obs.data()), expected_pattern);
}
// Encode a null observation and check that the bit pattern is as expected.
BasicRapporObservation null_obs;
ASSERT_EQ(kOK, encoder.EncodeNullObservation(&null_obs))
<< "Null observation";
std::vector<uint16_t> index_of_1s(0);
if (!q_is_one) {
for (uint16_t k = 0; k < expected_num_bits; k++) {
index_of_1s.push_back(k);
}
}
auto null_expected_pattern =
BuildBinaryString(expected_num_bits, index_of_1s);
EXPECT_EQ(DataToBinaryString(null_obs.data()), null_expected_pattern);
}
// Performs a test of BasicRapporEncoder::Encode() in the special case that
// the values of p and q are either 0 or 1 so that there is no randomness
// involved in the encoded string.
TEST(BasicRapporEncoderTest, NoRandomness) {
// We test with between 2 and 50 categories.
for (int num_categories = 2; num_categories <= 50; num_categories++) {
// See comments at DoBasicRapporNoRandomnessTest.
DoBasicRapporNoRandomnessTest(num_categories, true);
DoBasicRapporNoRandomnessTest(num_categories, false);
}
}
// Base class for tests of Basic RAPPOR that use a deterministic RNG.
class BasicRapporDeterministicTest : public ::testing::Test {
protected:
std::unique_ptr<BasicRapporEncoder> BuildEncoder(float prob_0_becomes_1,
float prob_1_stays_1,
int num_categories) {
// Configure BasicRappor.
BasicRapporConfig config;
config.set_prob_0_becomes_1(prob_0_becomes_1);
config.set_prob_1_stays_1(prob_1_stays_1);
for (int i = 0; i < num_categories; i++) {
config.mutable_string_categories()->add_category(CategoryName(i));
}
// Construct a BasicRapporEncoder.
static const std::string kClientSecretToken =
ClientSecret::GenerateNewSecret().GetToken();
std::unique_ptr<BasicRapporEncoder> encoder(new BasicRapporEncoder(
config, ClientSecret::FromToken(kClientSecretToken)));
// Give the encoder a deterministic RNG.
encoder->SetRandomForTesting(
std::unique_ptr<crypto::Random>(new crypto::DeterministicRandom()));
return encoder;
}
// Generates a Basic RAPPOR observation 1000 times and then performs Pearson's
// chi-squared test on each bit separately to check for goodness of fit to
// a binomial distribution with the appropriate parameter. Fails if
// chi-squared >= |chi_squared_threshold|.
//
// Uses DeterministicRandom in order to ensure reproducibility.
//
// REQUIRES: 0 <= selected_category < num_categories.
// All 1000 of the observations will be for the selected category. Thus
// the expected number of 1's in the bit position corresponding to the
// selected category is prob_1_stays_1 and the expected number of 1'1 in
// all other bit positions is prob_0_becomes_1.
void DoChiSquaredTest(float prob_0_becomes_1, float prob_1_stays_1,
int num_categories, int selected_category,
double chi_squared_threshold) {
// Build the encoder
auto encoder =
BuildEncoder(prob_0_becomes_1, prob_1_stays_1, num_categories);
// Sample 1000 observations of the selected category and collect the bit
// counts
static const int kNumTrials = 1000;
auto category_name = CategoryName(selected_category);
BasicRapporObservation obs;
std::vector<int> counts(num_categories, 0);
for (size_t i = 0; i < kNumTrials; i++) {
obs.Clear();
ValuePart value;
value.set_string_value(category_name);
EXPECT_EQ(kOK, encoder->Encode(value, &obs));
for (int bit_index = 0; bit_index < num_categories; bit_index++) {
if (IsSet(obs.data(), bit_index)) {
counts[bit_index]++;
}
}
}
// In the special case where prob_1_stays_1 is 1 make sure that we got
// 1000 1's in the selected category.
if (prob_1_stays_1 == 1.0) {
EXPECT_EQ(kNumTrials, counts[selected_category]);
}
// This is the expected number of ones and zeroes for the bit position in
// the selected category.
const double expected_1_selected =
static_cast<double>(kNumTrials) * prob_1_stays_1;
const double expected_0_selected =
static_cast<double>(kNumTrials) - expected_1_selected;
// This is the expected number of ones and zeroes for all bit positions
// other than the selected category.
const double expected_1 =
static_cast<double>(kNumTrials) * prob_0_becomes_1;
const double expected_0 = static_cast<double>(kNumTrials) - expected_1;
// For each of the bit positions, perform the chi-squared test.
for (int bit_index = 0; bit_index < num_categories; bit_index++) {
double exp_0 =
(bit_index == selected_category ? expected_0_selected : expected_0);
double exp_1 =
(bit_index == selected_category ? expected_1_selected : expected_1);
if (exp_0 != 0.0 && exp_1 != 0.0) {
// Difference between actual 1 count and expected 1 count.
double delta_1 = static_cast<double>(counts[bit_index]) - exp_1;
// Difference between actual 0 count and expected 0 count.
double delta_0 =
static_cast<double>(kNumTrials - counts[bit_index]) - exp_0;
// Compute and check the Chi-Squared value.
double chi_squared =
delta_1 * delta_1 / exp_1 + delta_0 * delta_0 / exp_0;
EXPECT_TRUE(chi_squared < chi_squared_threshold)
<< "chi_squared=" << chi_squared
<< " chi_squared_threshold=" << chi_squared_threshold
<< " bit_index=" << bit_index << " delta_0=" << delta_0
<< " delta_1=" << delta_1 << " num_categories=" << num_categories
<< " selected_category=" << selected_category
<< " prob_0_becomes_1=" << prob_0_becomes_1
<< " prob_1_stays_1=" << prob_1_stays_1;
}
}
}
// Generates a Basic RAPPOR encoding of a null observation 1000 times and then
// performs Pearson's chi-squared test on each bit separately to check for
// goodness of fit to a binomial distribution with the appropriate parameter.
// Fails if chi-squared >= |chi_squared_threshold|.
//
// Uses DeterministicRandom in order to ensure reproducibility.
//
// The true value of each bit (before encoding) is 0, so the expected number
// of 1's in each bit position is prob_0_becomes_1.
void DoChiSquaredTestForNullObs(float prob_0_becomes_1, float prob_1_stays_1,
int num_categories,
double chi_squared_threshold) {
// Build the encoder.
// Since no bits of the true bit vector are 1, the probability that 1 stays
// 1 is irrelevant.
auto encoder =
BuildEncoder(prob_0_becomes_1, prob_1_stays_1, num_categories);
// Sample 1000 observations of the selected category and collect the bit
// counts
static const int kNumTrials = 1000;
BasicRapporObservation null_obs;
std::vector<int> counts(num_categories, 0);
for (size_t i = 0; i < kNumTrials; i++) {
null_obs.Clear();
EXPECT_EQ(kOK, encoder->EncodeNullObservation(&null_obs));
for (int bit_index = 0; bit_index < num_categories; bit_index++) {
if (IsSet(null_obs.data(), bit_index)) {
counts[bit_index]++;
}
}
}
// In the special case where prob_0_becomes_1 is 0 make sure that we got
// 1000 0's.
for (int k = 0; k < num_categories; k++) {
if (prob_0_becomes_1 == 0.0) {
EXPECT_EQ(0, counts[k]);
}
}
// This is the expected number of ones and zeroes for each bit position.
const double expected_1 =
static_cast<double>(kNumTrials) * prob_0_becomes_1;
const double expected_0 = static_cast<double>(kNumTrials) - expected_1;
// For each of the bit positions, perform the chi-squared test.
for (int bit_index = 0; bit_index < num_categories; bit_index++) {
if (expected_0 != 0.0 && expected_1 != 0.0) {
// Difference between actual 1 count and expected 1 count.
double delta_1 = static_cast<double>(counts[bit_index]) - expected_1;
// Difference between actual 0 count and expected 0 count.
double delta_0 =
static_cast<double>(kNumTrials - counts[bit_index]) - expected_0;
// Compute and check the Chi-Squared value.
double chi_squared =
delta_1 * delta_1 / expected_1 + delta_0 * delta_0 / expected_0;
EXPECT_TRUE(chi_squared < chi_squared_threshold)
<< "chi_squared=" << chi_squared
<< " chi_squared_threshold=" << chi_squared_threshold
<< " bit_index=" << bit_index << " delta_0=" << delta_0
<< " delta_1=" << delta_1 << " num_categories=" << num_categories
<< " prob_0_becomes_1=" << prob_0_becomes_1;
}
}
}
};
TEST_F(BasicRapporDeterministicTest, ChiSquaredTest) {
// Perform the chi-squared test for various numbers of categories and
// various selected categories, as well as for null observations without
// selected categories. This gets combinatorially explosive so to keep the
// testing time reasonable we don't test every combination but rather step
// through the num_categories by 7 and use at most 3 selected categories for
// each num_categories.
//
// The last parameter to DoChiSquaredTest*() is the chi-squared value to use.
// Notice that these values were chosen by experimentation to be as small as
// possible so that the test passes. They do not necessarily correspond to
// natural confidence intervals for the chi-squared test.
for (int num_categories = 2; num_categories < 40; num_categories += 7) {
for (int selected_category = 0; selected_category < num_categories;
selected_category += (num_categories / 3 + 1)) {
// The first parameter is prob_0_becomes_1, and the second parameter is
// prob_1_stays_1.
DoChiSquaredTest(0.01, 0.99, num_categories, selected_category, 8.2);
DoChiSquaredTest(0.1, 0.9, num_categories, selected_category, 9.4);
DoChiSquaredTest(0.2, 0.8, num_categories, selected_category, 11.1);
DoChiSquaredTest(0.25, 0.75, num_categories, selected_category, 11.8);
DoChiSquaredTest(0.3, 0.7, num_categories, selected_category, 11.8);
}
// The first parameter is prob_0_becomes_1, and the second parameter is
// prob_1_stays_1.
DoChiSquaredTestForNullObs(0.0, 1.0, num_categories, 0.0);
DoChiSquaredTestForNullObs(0.01, 0.99, num_categories, 8.2);
DoChiSquaredTestForNullObs(0.1, 0.9, num_categories, 9.4);
DoChiSquaredTestForNullObs(0.2, 0.8, num_categories, 11.1);
DoChiSquaredTestForNullObs(0.25, 0.75, num_categories, 11.8);
DoChiSquaredTestForNullObs(0.3, 0.7, num_categories, 11.8);
}
}
// Test that BasicRapporEncoder::Encode() returns kInvalidArgument if a category
// name is used that is not one of the registered categories.
TEST(BasicRapporEncoderTest, BadCategory) {
// Configure Basic RAPPOR with two categories, "dog" and "cat".
BasicRapporConfig config;
config.set_prob_0_becomes_1(0.3);
config.set_prob_1_stays_1(0.7);
config.mutable_string_categories()->add_category("dog");
config.mutable_string_categories()->add_category("cat");
// Construct a BasicRapporEncoder.
static const std::string kClientSecretToken =
ClientSecret::GenerateNewSecret().GetToken();
BasicRapporEncoder encoder(config,
ClientSecret::FromToken(kClientSecretToken));
// Attempt to encode a string that is not one of the categories. Expect
// to receive kInvalidInput.
BasicRapporObservation obs;
ValuePart value;
value.set_string_value("fish");
EXPECT_EQ(kInvalidInput, encoder.Encode(value, &obs));
}
// Tests that BasicRapporEncoder::Encode() correctly handles values of type
// INDEX
TEST(BasicRapporEncoderTest, EncodeIndex) {
// Configure Basic RAPPOR with 5 indexed categories.
// We use no randomness so we can check that the correct bit is being set.
BasicRapporConfig config;
config.set_prob_0_becomes_1(0.0);
config.set_prob_1_stays_1(1.0);
config.mutable_indexed_categories()->set_num_categories(5);
// Construct a BasicRapporEncoder.
static const std::string kClientSecretToken =
ClientSecret::GenerateNewSecret().GetToken();
BasicRapporEncoder encoder(config,
ClientSecret::FromToken(kClientSecretToken));
BasicRapporObservation obs;
// Validate that category indices 0, 1 and 4 yield the correct data.
ValuePart value;
value.set_index_value(0);
EXPECT_EQ(kOK, encoder.Encode(value, &obs));
EXPECT_EQ("00000001", DataToBinaryString(obs.data()));
obs.Clear();
value.set_index_value(1);
EXPECT_EQ(kOK, encoder.Encode(value, &obs));
EXPECT_EQ("00000010", DataToBinaryString(obs.data()));
obs.Clear();
value.set_index_value(4);
EXPECT_EQ(kOK, encoder.Encode(value, &obs));
EXPECT_EQ("00010000", DataToBinaryString(obs.data()));
obs.Clear();
// Validate that category index 5 yields kInvalidInput.
value.set_index_value(5);
EXPECT_EQ(kInvalidInput, encoder.Encode(value, &obs));
}
class StringRapporEncoderTest : public ::testing::Test {
protected:
uint32_t AttemptDeriveCohortFromSecret(size_t attempt_number) {
return encoder_->AttemptDeriveCohortFromSecret(attempt_number);
}
uint32_t DeriveCohortFromSecret() {
return encoder_->DeriveCohortFromSecret();
}
void SetNewEncoder(const RapporConfig& config, encoder::ClientSecret secret) {
encoder_.reset(new RapporEncoder(config, secret));
}
std::string MakeBloomBits(const ValuePart& value) {
return encoder_->MakeBloomBits(value);
}
// Using the given parameters, and using the fixed input string
// "www.google.com" and a fixed cohort (i.e. a fixed client secret),
// this test generates a String RAPPOR observation 1000 times, counts
// the number of resulting 1's and 0's in two bit positions, and performs
// Pearson's Chi-squared test to check for goodness of fit to a binomial
// distribution with the appropriate parameter. Fails if
// chi-squared >= |chi_squared_threshold|.
//
// First we examine the Bloom filter with no bits flipped and we find one
// index of a set bit and one index of an unset bit. We perform the
// Chi-squared test twice: once for each of these two indices.
//
// Uses DeterministicRandom in order to ensure reproducibility.
void DoChiSquaredTest(float prob_0_becomes_1, float prob_1_stays_1,
int num_bits, int num_hashes,
double chi_squared_threshold) {
// Build the encoder.
RapporConfig config;
config.set_prob_0_becomes_1(prob_0_becomes_1);
config.set_prob_1_stays_1(prob_1_stays_1);
config.set_num_bloom_bits(num_bits);
config.set_num_hashes(num_hashes);
// This value will not be used but it needs to be something valid.
config.set_num_cohorts(100);
// We use a fixed client secret so this test is deterministic.
static const char kClientSecret[] = "4b4BxKq253TTCWIXFhLDTg==";
SetNewEncoder(config, ClientSecret::FromToken(kClientSecret));
// Give the encoder a deterministic RNG.
encoder_->SetRandomForTesting(
std::unique_ptr<crypto::Random>(new crypto::DeterministicRandom()));
// Build the input value. We use a fixed input string so this test is
// deterministic.
ValuePart value;
value.set_string_value("www.google.com");
// Capture the indices of one bit that is set and one bit that is unset in
// the bloom filter for the input value. It doesn't matter which two bits
// we capture. We will do two chi-squared tests, one on each of the two
// bits.
int index_of_set_bit = -1;
int index_of_unset_bit = -1;
auto bloom_bits = MakeBloomBits(value);
int num_bits_set = 0;
for (int bit_index = 0; bit_index < num_bits; bit_index++) {
if (IsSet(bloom_bits, bit_index)) {
num_bits_set++;
index_of_set_bit = bit_index;
} else {
index_of_unset_bit = bit_index;
}
}
ASSERT_GT(num_bits_set, 0);
ASSERT_LE(num_bits_set, num_hashes);
// This heuristic for a minimum reasonable number of bits that should be
// set was found by experimentation to allow the test to pass.
int expected_min_num_bits_set = std::min(num_hashes - 2, num_bits / 4);
ASSERT_GE(num_bits_set, expected_min_num_bits_set)
<< " num_bits=" << num_bits << " num_hashes=" << num_hashes;
ASSERT_GE(index_of_set_bit, 0);
ASSERT_LT(index_of_set_bit, num_bits);
ASSERT_GE(index_of_unset_bit, 0);
ASSERT_LT(index_of_unset_bit, num_bits);
ASSERT_NE(index_of_set_bit, index_of_unset_bit);
ASSERT_TRUE(IsSet(bloom_bits, index_of_set_bit));
ASSERT_FALSE(IsSet(bloom_bits, index_of_unset_bit));
// Encode the input value 1000 times, tallying the counts for the two bits.
static const int kNumTrials = 1000;
int set_bit_count = 0;
int unset_bit_count = 0;
for (size_t i = 0; i < kNumTrials; i++) {
RapporObservation obs;
EXPECT_EQ(kOK, encoder_->Encode(value, &obs));
if (IsSet(obs.data(), index_of_set_bit)) {
set_bit_count++;
}
if (IsSet(obs.data(), index_of_unset_bit)) {
unset_bit_count++;
}
}
// This is the expected number of ones and zeroes for a bit that is set
// in the Bloom filter.
const double expected_1_set =
static_cast<double>(kNumTrials) * prob_1_stays_1;
const double expected_0_set =
static_cast<double>(kNumTrials) - expected_1_set;
// This is the expected number of ones and zeroes for a bit that is
// unset in the Bloom filter.
const double expected_1_unset =
static_cast<double>(kNumTrials) * prob_0_becomes_1;
const double expected_0_unset =
static_cast<double>(kNumTrials) - expected_1_unset;
// Perform Chi-squared test twice, once for the set bit, once for the
// unset bit.
for (int i = 0; i < 2; i++) {
double exp_0 = (i == 0 ? expected_0_set : expected_0_unset);
double exp_1 = (i == 0 ? expected_1_set : expected_1_unset);
int count = (i == 0 ? set_bit_count : unset_bit_count);
// Difference between actual 1 count and expected 1 count.
double delta_1 = static_cast<double>(count) - exp_1;
// Difference between actual 0 count and expected 0 count.
double delta_0 = static_cast<double>(kNumTrials - count) - exp_0;
// Compute and check the Chi-Squared value.
double chi_squared =
delta_1 * delta_1 / exp_1 + delta_0 * delta_0 / exp_0;
EXPECT_LT(chi_squared, chi_squared_threshold)
<< " delta_0=" << delta_0 << " delta_1=" << delta_1
<< " num_bits=" << num_bits << " num_hashes=" << num_hashes
<< " i=" << i << " prob_0_becomes_1=" << prob_0_becomes_1
<< " prob_1_stays_1=" << prob_1_stays_1;
}
}
std::unique_ptr<RapporEncoder> encoder_;
};
// We invoke AttemptDeriveCohortFromSecret() 1000 times using a fixed
// ClientSecret and increasing values for |attempt_number|. We use 16 buckets
// (i.e. num_cohorts_2_power = 16). The outputs should be approximately
// uniformly distributed over the integers in [0, 15].
TEST_F(StringRapporEncoderTest, AttemptDeriveCohortFromSecret) {
RapporConfig config;
// These config values are not relevant but need to be something valid.
config.set_prob_0_becomes_1(0.3);
config.set_prob_1_stays_1(0.7);
config.set_num_bloom_bits(64);
config.set_num_hashes(5);
// We set num_cohorts to 10 so num_cohorts_2_power will be 16.
config.set_num_cohorts(10);
// We use a fixed client secret so this test is deterministic.
static const char kClientSecret[] = "4b4BxKq253TTCWIXFhLDTg==";
SetNewEncoder(config, ClientSecret::FromToken(kClientSecret));
// Initialize counts to all zeroes.
int counts[16] = {0};
// Invoke AttemptDeriveCohortFromSecret() 1000 times with successive
// attempt indices. Accumulate the results.
for (int i = 0; i < 1000; i++) {
counts[AttemptDeriveCohortFromSecret(i)]++;
}
// These are the counts we found we get. 1000/16 = 62.5 is the expected value
// for each count.
//
// Each of the results for buckets 10, 11 12, 13, 14 and 15 would have been
// discarded and another attempt would have been made. That happened with
// probability (75 + 71 + 56 + 58 + 44 + 53) / 1000 = 0.357.
int expected_counts[] = {58, 71, 69, 62, 70, 64, 76, 57,
48, 68, 75, 71, 56, 58, 44, 53};
for (int i = 0; i < 16; i++) {
EXPECT_EQ(expected_counts[i], counts[i]) << "i=" << i;
}
}
// We invoke DeriveCohortFromSecret() 1000 times using a varying ClientSecret.
// (We use a deterministic PRNG so the test is deterministic.)
// We use 10 buckets (i.e. num_cohorts = 10). The outputs should be
// approximately uniformly distributed over the integers in [0, 9].
TEST_F(StringRapporEncoderTest, DeriveCohortFromSecret) {
RapporConfig config;
// These config values are not relevant but need to be valid.
config.set_prob_0_becomes_1(0.3);
config.set_prob_1_stays_1(0.7);
config.set_num_bloom_bits(64);
config.set_num_hashes(5);
// We set num_cohorts to 10.
config.set_num_cohorts(10);
// Initialize counts to all zeroes.
int counts[10] = {0};
crypto::DeterministicRandom deterministic_random;
// Invoke DeriveCohortFromSecret() 1000 times. Accumulate the results.
for (int i = 0; i < 1000; i++) {
SetNewEncoder(config,
ClientSecret::GenerateNewSecret(&deterministic_random));
// The constructor should have already invoked DeriveCohortFromSecret
// and set cohort to that value.
EXPECT_EQ(encoder_->cohort(), DeriveCohortFromSecret());
counts[encoder_->cohort()]++;
}
// These are the counts we found we get. 1000/10 = 100 is the expected value
// for each count.
int expected_counts[] = {85, 98, 104, 93, 113, 93, 89, 99, 103, 123};
for (int i = 0; i < 10; i++) {
EXPECT_EQ(expected_counts[i], counts[i]) << "i=" << i;
}
}
// We invoke MakeBloomBits 1000 times with a fixed cohort
// (i.e. a fixed ClientSecret) and varying input strings. We use 10 different
// initial segments of 100 different randomly generated strings. (We use a
// deterministic PRNG so this test is deterministic.) We use the values
// num_hashes = 2, num_bloom_bits = 16. We accumulate the counts of the number
// of times each bit is set. The counts should be approximately uniformly
// distributed over the integers in [0, 15].
TEST_F(StringRapporEncoderTest, MakeBloomBits) {
RapporConfig config;
// These config values are not relevant but need to be valid.
config.set_prob_0_becomes_1(0.3);
config.set_prob_1_stays_1(0.7);
config.set_num_cohorts(10);
// Set the number of bloom bits to 16
static const int kNumBloomBits = 16;
config.set_num_bloom_bits(kNumBloomBits);
// Set the number of hashes to 2.
config.set_num_hashes(2);
// We use a fixed client secret so this test is deterministic.
static const char kClientSecret[] = "4b4BxKq253TTCWIXFhLDTg==";
SetNewEncoder(config, ClientSecret::FromToken(kClientSecret));
// Initialize counts to all zeroes.
int counts[kNumBloomBits] = {0};
crypto::DeterministicRandom prng;
// We invoke MakeBloomBits() 1000 times and accumulate the results.
// Generate 100 random strings of length 100.
for (int i = 0; i < 100; i++) {
crypto::byte random_bits[100];
prng.RandomBytes(random_bits, sizeof(random_bits));
// Use 10 progressively longer initial segments of |random_bits|.
for (int size = 10; size <= 100; size += 10) {
ValuePart value;
auto blob_value = std::string(reinterpret_cast<char*>(random_bits), size);
value.set_blob_value(blob_value);
auto bloom_bits = MakeBloomBits(value);
// Capture which bits were set.
int num_set = 0;
for (int bit_index = 0; bit_index < kNumBloomBits; bit_index++) {
if (IsSet(bloom_bits, bit_index)) {
num_set++;
counts[bit_index]++;
}
}
// Since we are using 2 hashes the number of bits set should be 1 or 2.
EXPECT_TRUE(num_set == 1 || num_set == 2);
}
}
// These are the counts we found we get. 2000/16 = 125 is the
// expected value for each count.
int expected_counts[] = {114, 139, 100, 113, 117, 118, 119, 122,
137, 129, 114, 134, 116, 109, 137, 123};
for (int i = 0; i < 16; i++) {
EXPECT_EQ(expected_counts[i], counts[i]) << "i=" << i;
}
}
// For various numbers of bits, and hashes, and for various values of p and q
// we invoke DoChiSquaredTest().
TEST_F(StringRapporEncoderTest, ChiSquaredTest) {
// Use num_bits = 4, 16, 64, 256, 1024
for (int num_bits_exp = 2; num_bits_exp <= 10; num_bits_exp += 2) {
int num_bits = 1 << num_bits_exp;
// Use num_hashes = 2, 5 and 8.
int max_num_hashes = std::min(8, num_bits - 1);
for (int num_hashes = 2; num_hashes <= max_num_hashes; num_hashes += 3) {
// The first two parameters are p and q.
//
// The last parameter is the chi-squared value to use. Notice that these
// values were chosen by experimentation to be as small as possible so
// that the test passes. They do not necessarily correspond to natural
// confidence intervals for the chi-squared test.
DoChiSquaredTest(0.01, 0.99, num_bits, num_hashes, 4.95);
DoChiSquaredTest(0.1, 0.9, num_bits, num_hashes, 5.38);
DoChiSquaredTest(0.2, 0.8, num_bits, num_hashes, 2.26);
DoChiSquaredTest(0.25, 0.75, num_bits, num_hashes, 2.83);
DoChiSquaredTest(0.3, 0.7, num_bits, num_hashes, 2.31);
}
}
}
} // namespace rappor
} // namespace cobalt