| // Copyright 2016 The Fuchsia Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| #include "algorithms/rappor/rappor_encoder.h" |
| |
| #include <algorithm> |
| #include <map> |
| #include <vector> |
| |
| #include "algorithms/rappor/rappor_test_utils.h" |
| #include "third_party/googletest/googletest/include/gtest/gtest.h" |
| #include "util/crypto_util/random_test_utils.h" |
| |
| namespace cobalt { |
| namespace rappor { |
| |
| using encoder::ClientSecret; |
| |
| TEST(RapporConfigValidatorTest, TestMinPower2Above) { |
| EXPECT_EQ(1u, RapporConfigValidator::MinPower2Above(0)); |
| EXPECT_EQ(1u, RapporConfigValidator::MinPower2Above(1)); |
| EXPECT_EQ(2u, RapporConfigValidator::MinPower2Above(2)); |
| EXPECT_EQ(4u, RapporConfigValidator::MinPower2Above(3)); |
| EXPECT_EQ(4u, RapporConfigValidator::MinPower2Above(4)); |
| EXPECT_EQ(8u, RapporConfigValidator::MinPower2Above(5)); |
| EXPECT_EQ(8u, RapporConfigValidator::MinPower2Above(6)); |
| EXPECT_EQ(8u, RapporConfigValidator::MinPower2Above(7)); |
| EXPECT_EQ(8u, RapporConfigValidator::MinPower2Above(8)); |
| EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(9)); |
| EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(10)); |
| EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(11)); |
| EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(12)); |
| EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(13)); |
| EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(14)); |
| EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(15)); |
| EXPECT_EQ(16u, RapporConfigValidator::MinPower2Above(16)); |
| EXPECT_EQ(32u, RapporConfigValidator::MinPower2Above(17)); |
| } |
| |
| TEST(RapporConfigValidatorTest, TestConstructor) { |
| RapporConfig config; |
| config.set_prob_0_becomes_1(0.3); |
| config.set_prob_1_stays_1(0.7); |
| config.set_num_bloom_bits(64); |
| config.set_num_hashes(5); |
| |
| config.set_num_cohorts(100); |
| auto validator = RapporConfigValidator(config); |
| EXPECT_EQ(128u, validator.num_cohorts_2_power()); |
| |
| config.set_num_cohorts(200); |
| validator = RapporConfigValidator(config); |
| EXPECT_EQ(256u, validator.num_cohorts_2_power()); |
| |
| config.set_num_cohorts(300); |
| validator = RapporConfigValidator(config); |
| EXPECT_EQ(512u, validator.num_cohorts_2_power()); |
| |
| config.set_num_cohorts(400); |
| validator = RapporConfigValidator(config); |
| EXPECT_EQ(512u, validator.num_cohorts_2_power()); |
| |
| config.set_num_cohorts(500); |
| validator = RapporConfigValidator(config); |
| EXPECT_EQ(512u, validator.num_cohorts_2_power()); |
| |
| config.set_num_cohorts(600); |
| validator = RapporConfigValidator(config); |
| EXPECT_EQ(1024u, validator.num_cohorts_2_power()); |
| |
| config.set_num_cohorts(1023); |
| validator = RapporConfigValidator(config); |
| EXPECT_EQ(1024u, validator.num_cohorts_2_power()); |
| |
| config.set_num_cohorts(1024); |
| validator = RapporConfigValidator(config); |
| EXPECT_EQ(1024u, validator.num_cohorts_2_power()); |
| } |
| |
| // Constructs a RapporEncoder with the given |config|, invokes |
| // Encode() with a dummy string and EncodeNullObservation(), and checks that the |
| // returned statuses are either kOK or kInvalidConfig, whichever is expected. |
| void TestRapporConfig(const RapporConfig& config, Status expected_status, |
| int caller_line_number) { |
| // Make a ClientSecret once and statically store the token. |
| static const std::string kClientSecretToken = |
| ClientSecret::GenerateNewSecret().GetToken(); |
| // Each time this function is invoked reconstitute the secret from the token. |
| RapporEncoder encoder(config, ClientSecret::FromToken(kClientSecretToken)); |
| RapporObservation obs; |
| ValuePart value; |
| value.set_string_value("dummy"); |
| EXPECT_EQ(expected_status, encoder.Encode(value, &obs)) |
| << "Invoked from line number: " << caller_line_number; |
| } |
| |
| // A macro to invoke testRapporConfig and pass it the current line number. |
| #define TEST_RAPPOR_CONFIG(config, expected_status) \ |
| (TestRapporConfig(config, expected_status, __LINE__)) |
| |
| // Tests the validation of config for String RAPPOR. |
| TEST(RapporEncoderTest, StringRapporConfigValidation) { |
| // Empty config: Invalid |
| RapporConfig config; |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Add two probabilities, still Invalid |
| config.set_prob_0_becomes_1(0.3); |
| config.set_prob_1_stays_1(0.7); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set num_bloom_bits, still Invalid |
| config.set_num_bloom_bits(8); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set num_hashes, still Invalid |
| config.set_num_hashes(2); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // set num_cohorts: Valid |
| config.set_num_cohorts(20); |
| TEST_RAPPOR_CONFIG(config, kOK); |
| |
| // Explicitly set PRR to 0: Valid. |
| config.set_prob_rr(0.0); |
| TEST_RAPPOR_CONFIG(config, kOK); |
| |
| // Explicitly set PRR to non-zero: Invalid. |
| config.set_prob_rr(0.1); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Explicitly set PRR back to zero: Valid. |
| config.set_prob_rr(0.0); |
| TEST_RAPPOR_CONFIG(config, kOK); |
| |
| // Set one of the probabilities to negative: Invalid |
| config.set_prob_0_becomes_1(-0.3); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set one of the probabilities to greater than 1: Invalid |
| config.set_prob_0_becomes_1(1.3); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Fix the probability: Valid |
| config.set_prob_0_becomes_1(0.3); |
| TEST_RAPPOR_CONFIG(config, kOK); |
| |
| // Set the other probability to negative: Invalid |
| config.set_prob_1_stays_1(-0.7); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set the other probability to greater than 1: Invalid |
| config.set_prob_1_stays_1(1.7); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Fix the probability: Valid |
| config.set_prob_1_stays_1(0.7); |
| TEST_RAPPOR_CONFIG(config, kOK); |
| |
| // Set num_bloom_bits to negative: Invalid |
| config.set_num_bloom_bits(-8); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set num_bloom_bits to 0: Invalid |
| config.set_num_bloom_bits(0); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set num_bloom_bits back to positive: Valid |
| config.set_num_bloom_bits(8); |
| TEST_RAPPOR_CONFIG(config, kOK); |
| |
| // Set num_hashes to negative: Invalid |
| config.set_num_hashes(-2); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set num_hashes to 0: Invalid |
| config.set_num_hashes(0); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set num_hashes to 8: Invalid |
| config.set_num_hashes(8); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set num_hashes back to positive: Valid |
| config.set_num_hashes(2); |
| TEST_RAPPOR_CONFIG(config, kOK); |
| |
| // Set num_cohorts to negative: Invalid |
| config.set_num_cohorts(-20); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set num_cohorts to 0: Invalid |
| config.set_num_cohorts(0); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set num_cohorts to 1025: Invalid |
| config.set_num_cohorts(1025); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set num_cohorts to 1024: Valid |
| config.set_num_cohorts(1024); |
| TEST_RAPPOR_CONFIG(config, kOK); |
| |
| // Set num_cohorts back to positive: Valid |
| config.set_num_cohorts(20); |
| TEST_RAPPOR_CONFIG(config, kOK); |
| |
| // Set num_bloom_bits to equal num_hashes: Invalid |
| config.set_num_bloom_bits(2); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set num_bloom_bits to greater than num_hashes and a power of 2: Valid |
| config.set_num_bloom_bits(4); |
| TEST_RAPPOR_CONFIG(config, kOK); |
| |
| // Set num_bloom_bits to greater than num_hashes but not a power of 2: Invalid |
| config.set_num_bloom_bits(3); |
| TEST_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Test with an invalid ClientSecret |
| RapporEncoder encoder(config, ClientSecret::FromToken("Invalid Token")); |
| RapporObservation obs; |
| ValuePart value; |
| value.set_string_value("dummy"); |
| EXPECT_EQ(kInvalidConfig, encoder.Encode(value, &obs)); |
| } |
| |
| // Constructs a BasicRapporEncoder with the given |config|, invokes |
| // Encode() with a dummy string and EncodeNullObservation(), and checks that the |
| // returned statuses are either kOK or kInvalidConfig, whichever is expected. |
| void TestBasicRapporConfig(const BasicRapporConfig& config, |
| Status expected_status, int caller_line_number) { |
| // Make a ClientSecret once and statically store the token. |
| static const std::string kClientSecretToken = |
| ClientSecret::GenerateNewSecret().GetToken(); |
| // Each time this function is invoked reconstitute the secret from the token. |
| BasicRapporEncoder encoder(config, |
| ClientSecret::FromToken(kClientSecretToken)); |
| BasicRapporObservation obs; |
| ValuePart value; |
| value.set_string_value("cat"); |
| EXPECT_EQ(expected_status, encoder.Encode(value, &obs)) |
| << "Invoked from line number: " << caller_line_number; |
| BasicRapporObservation null_obs; |
| EXPECT_EQ(expected_status, encoder.EncodeNullObservation(&null_obs)) |
| << "Invoked from line number: " << caller_line_number; |
| } |
| |
| // A macro to invoke TestBasicRapporConfig and pass it the current line number. |
| #define TEST_BASIC_RAPPOR_CONFIG(config, expected_status) \ |
| (TestBasicRapporConfig(config, expected_status, __LINE__)) |
| |
| // Tests the validation of config for Basic RAPPOR. |
| TEST(RapporEncoderTest, BasicRapporConfigValidation) { |
| // Empty config: Invalid |
| BasicRapporConfig config; |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Add two probabilities but no categories: Invalid |
| config.set_prob_0_becomes_1(0.3); |
| config.set_prob_1_stays_1(0.7); |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Add one category: Invalid. |
| config.mutable_string_categories()->add_category("cat"); |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Add two more categories: Valid. |
| config.mutable_string_categories()->add_category("dog"); |
| config.mutable_string_categories()->add_category("fish"); |
| TEST_BASIC_RAPPOR_CONFIG(config, kOK); |
| |
| // Explicitly set PRR to 0: Valid. |
| config.set_prob_rr(0.0); |
| TEST_BASIC_RAPPOR_CONFIG(config, kOK); |
| |
| // Explicitly set PRR to non-zero: Invalid. |
| config.set_prob_rr(0.1); |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Explicitly set PRR back to zero: Valid. |
| config.set_prob_rr(0.0); |
| TEST_BASIC_RAPPOR_CONFIG(config, kOK); |
| |
| // Set one of the probabilities to negative: Invalid |
| config.set_prob_0_becomes_1(-0.3); |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set one of the probabilities to greater than 1: Invalid |
| config.set_prob_0_becomes_1(1.3); |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Fix the probability: Valid |
| config.set_prob_0_becomes_1(0.3); |
| TEST_BASIC_RAPPOR_CONFIG(config, kOK); |
| |
| // Set the other probability to negative: Invalid |
| config.set_prob_1_stays_1(-0.7); |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set the other the probability to greater than 1: Invalid |
| config.set_prob_1_stays_1(1.7); |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Fix the probability: Valid |
| config.set_prob_1_stays_1(0.7); |
| TEST_BASIC_RAPPOR_CONFIG(config, kOK); |
| |
| // Add an empty category: Invalid |
| config.mutable_string_categories()->add_category(""); |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Test with an invalid ClientSecret |
| BasicRapporEncoder encoder(config, ClientSecret::FromToken("Invalid Token")); |
| BasicRapporObservation obs; |
| ValuePart value; |
| value.set_string_value("dummy"); |
| EXPECT_EQ(kInvalidConfig, encoder.Encode(value, &obs)); |
| BasicRapporObservation null_obs; |
| EXPECT_EQ(kInvalidConfig, encoder.EncodeNullObservation(&null_obs)); |
| } |
| |
| // Test Config Validation with integer categories |
| TEST(RapporEncoderTest, BasicRapporWithIntsConfigValidation) { |
| // Create a config with three integer categories. |
| BasicRapporConfig config; |
| config.set_prob_0_becomes_1(0.3); |
| config.set_prob_1_stays_1(0.7); |
| config.mutable_int_range_categories()->set_first(-1); |
| config.mutable_int_range_categories()->set_last(1); |
| |
| // Construct the encoder |
| BasicRapporEncoder encoder(config, ClientSecret::GenerateNewSecret()); |
| |
| // Perform an encode with a value equal to one of the listed categories |
| BasicRapporObservation obs; |
| ValuePart value; |
| value.set_int_value(-1); |
| EXPECT_EQ(kOK, encoder.Encode(value, &obs)); |
| |
| // Perform an encode with a value not equal to one of the listed categories |
| value.set_int_value(2); |
| EXPECT_EQ(kInvalidInput, encoder.Encode(value, &obs)); |
| |
| // Perform an encode of a null observation |
| BasicRapporObservation null_obs; |
| EXPECT_EQ(kOK, encoder.EncodeNullObservation(&null_obs)); |
| } |
| |
| // Performs a test of BasicRapporEncoder::Encode() and |
| // BasicRapporEncoder::EncodeNullObservation() in the two special cases that |
| // that there is no randomness involved in the encoded string, namely |
| // (a) p = 0, q = 1 |
| // (b) p = 1, q = 0 |
| // |
| // |num_categories| must be a positive integer. Basic RAPPOR will be configured |
| // to have this many categories. The encoding will be performed for each of |
| // the categores. |
| // |
| // |q_is_one| Do the test in case (a) where p = 0, q = 1 |
| void DoBasicRapporNoRandomnessTest(int num_categories, bool q_is_one) { |
| // Select the parameters based on the mode. index_char and other_char |
| // determine the expected bit pattern in the encoding. index_char is the |
| // character we expect to see in the position of the given category and |
| // other_char is the character we expect to see in the other positions. |
| float p, q; |
| char index_char, other_char; |
| if (q_is_one) { |
| // We expect a 1 in the index position and 0's everywhere else. |
| p = 0.0; |
| q = 1.0; |
| index_char = '1'; |
| other_char = '0'; |
| } else { |
| // We expect a 0 in the index position and 1's everywhere else. |
| p = 1.0; |
| q = 0.0; |
| index_char = '0'; |
| other_char = '1'; |
| } |
| |
| // Configure basic RAPPOR with the selected parameters. |
| BasicRapporConfig config; |
| config.set_prob_0_becomes_1(p); |
| config.set_prob_1_stays_1(q); |
| for (int i = 0; i < num_categories; i++) { |
| config.mutable_string_categories()->add_category(CategoryName(i)); |
| } |
| |
| // Construct a BasicRapporEncoder. |
| static const std::string kClientSecretToken = |
| ClientSecret::GenerateNewSecret().GetToken(); |
| BasicRapporEncoder encoder(config, |
| ClientSecret::FromToken(kClientSecretToken)); |
| |
| // The expected number of bits in the encoding is the least multiple of 8 |
| // greater than or equal to num_categories. |
| int expected_num_bits = 8 * (((num_categories - 1) / 8) + 1); |
| |
| // For each category, obtain the observation and check that the bit pattern |
| // is as expected. |
| BasicRapporObservation obs; |
| for (int i = 0; i < num_categories; i++) { |
| auto category_name = CategoryName(i); |
| ValuePart value; |
| value.set_string_value(category_name); |
| ASSERT_EQ(kOK, encoder.Encode(value, &obs)) << category_name; |
| auto expected_pattern = |
| BuildBitPatternString(expected_num_bits, i, index_char, other_char); |
| EXPECT_EQ(DataToBinaryString(obs.data()), expected_pattern); |
| } |
| |
| // Encode a null observation and check that the bit pattern is as expected. |
| BasicRapporObservation null_obs; |
| ASSERT_EQ(kOK, encoder.EncodeNullObservation(&null_obs)) |
| << "Null observation"; |
| std::vector<uint16_t> index_of_1s(0); |
| if (!q_is_one) { |
| for (uint16_t k = 0; k < expected_num_bits; k++) { |
| index_of_1s.push_back(k); |
| } |
| } |
| auto null_expected_pattern = |
| BuildBinaryString(expected_num_bits, index_of_1s); |
| EXPECT_EQ(DataToBinaryString(null_obs.data()), null_expected_pattern); |
| } |
| |
| // Performs a test of BasicRapporEncoder::Encode() in the special case that |
| // the values of p and q are either 0 or 1 so that there is no randomness |
| // involved in the encoded string. |
| TEST(BasicRapporEncoderTest, NoRandomness) { |
| // We test with between 2 and 50 categories. |
| for (int num_categories = 2; num_categories <= 50; num_categories++) { |
| // See comments at DoBasicRapporNoRandomnessTest. |
| DoBasicRapporNoRandomnessTest(num_categories, true); |
| DoBasicRapporNoRandomnessTest(num_categories, false); |
| } |
| } |
| |
| // Base class for tests of Basic RAPPOR that use a deterministic RNG. |
| class BasicRapporDeterministicTest : public ::testing::Test { |
| protected: |
| std::unique_ptr<BasicRapporEncoder> BuildEncoder(float prob_0_becomes_1, |
| float prob_1_stays_1, |
| int num_categories) { |
| // Configure BasicRappor. |
| BasicRapporConfig config; |
| config.set_prob_0_becomes_1(prob_0_becomes_1); |
| config.set_prob_1_stays_1(prob_1_stays_1); |
| for (int i = 0; i < num_categories; i++) { |
| config.mutable_string_categories()->add_category(CategoryName(i)); |
| } |
| |
| // Construct a BasicRapporEncoder. |
| static const std::string kClientSecretToken = |
| ClientSecret::GenerateNewSecret().GetToken(); |
| std::unique_ptr<BasicRapporEncoder> encoder(new BasicRapporEncoder( |
| config, ClientSecret::FromToken(kClientSecretToken))); |
| |
| // Give the encoder a deterministic RNG. |
| encoder->SetRandomForTesting( |
| std::unique_ptr<crypto::Random>(new crypto::DeterministicRandom())); |
| |
| return encoder; |
| } |
| |
| // Generates a Basic RAPPOR observation 1000 times and then performs Pearson's |
| // chi-squared test on each bit separately to check for goodness of fit to |
| // a binomial distribution with the appropriate parameter. Fails if |
| // chi-squared >= |chi_squared_threshold|. |
| // |
| // Uses DeterministicRandom in order to ensure reproducibility. |
| // |
| // REQUIRES: 0 <= selected_category < num_categories. |
| // All 1000 of the observations will be for the selected category. Thus |
| // the expected number of 1's in the bit position corresponding to the |
| // selected category is prob_1_stays_1 and the expected number of 1'1 in |
| // all other bit positions is prob_0_becomes_1. |
| void DoChiSquaredTest(float prob_0_becomes_1, float prob_1_stays_1, |
| int num_categories, int selected_category, |
| double chi_squared_threshold) { |
| // Build the encoder |
| auto encoder = |
| BuildEncoder(prob_0_becomes_1, prob_1_stays_1, num_categories); |
| |
| // Sample 1000 observations of the selected category and collect the bit |
| // counts |
| static const int kNumTrials = 1000; |
| auto category_name = CategoryName(selected_category); |
| BasicRapporObservation obs; |
| std::vector<int> counts(num_categories, 0); |
| for (size_t i = 0; i < kNumTrials; i++) { |
| obs.Clear(); |
| ValuePart value; |
| value.set_string_value(category_name); |
| EXPECT_EQ(kOK, encoder->Encode(value, &obs)); |
| for (int bit_index = 0; bit_index < num_categories; bit_index++) { |
| if (IsSet(obs.data(), bit_index)) { |
| counts[bit_index]++; |
| } |
| } |
| } |
| |
| // In the special case where prob_1_stays_1 is 1 make sure that we got |
| // 1000 1's in the selected category. |
| if (prob_1_stays_1 == 1.0) { |
| EXPECT_EQ(kNumTrials, counts[selected_category]); |
| } |
| |
| // This is the expected number of ones and zeroes for the bit position in |
| // the selected category. |
| const double expected_1_selected = |
| static_cast<double>(kNumTrials) * prob_1_stays_1; |
| const double expected_0_selected = |
| static_cast<double>(kNumTrials) - expected_1_selected; |
| |
| // This is the expected number of ones and zeroes for all bit positions |
| // other than the selected category. |
| const double expected_1 = |
| static_cast<double>(kNumTrials) * prob_0_becomes_1; |
| const double expected_0 = static_cast<double>(kNumTrials) - expected_1; |
| |
| // For each of the bit positions, perform the chi-squared test. |
| for (int bit_index = 0; bit_index < num_categories; bit_index++) { |
| double exp_0 = |
| (bit_index == selected_category ? expected_0_selected : expected_0); |
| double exp_1 = |
| (bit_index == selected_category ? expected_1_selected : expected_1); |
| |
| if (exp_0 != 0.0 && exp_1 != 0.0) { |
| // Difference between actual 1 count and expected 1 count. |
| double delta_1 = static_cast<double>(counts[bit_index]) - exp_1; |
| |
| // Difference between actual 0 count and expected 0 count. |
| double delta_0 = |
| static_cast<double>(kNumTrials - counts[bit_index]) - exp_0; |
| |
| // Compute and check the Chi-Squared value. |
| double chi_squared = |
| delta_1 * delta_1 / exp_1 + delta_0 * delta_0 / exp_0; |
| |
| EXPECT_TRUE(chi_squared < chi_squared_threshold) |
| << "chi_squared=" << chi_squared |
| << " chi_squared_threshold=" << chi_squared_threshold |
| << " bit_index=" << bit_index << " delta_0=" << delta_0 |
| << " delta_1=" << delta_1 << " num_categories=" << num_categories |
| << " selected_category=" << selected_category |
| << " prob_0_becomes_1=" << prob_0_becomes_1 |
| << " prob_1_stays_1=" << prob_1_stays_1; |
| } |
| } |
| } |
| |
| // Generates a Basic RAPPOR encoding of a null observation 1000 times and then |
| // performs Pearson's chi-squared test on each bit separately to check for |
| // goodness of fit to a binomial distribution with the appropriate parameter. |
| // Fails if chi-squared >= |chi_squared_threshold|. |
| // |
| // Uses DeterministicRandom in order to ensure reproducibility. |
| // |
| // The true value of each bit (before encoding) is 0, so the expected number |
| // of 1's in each bit position is prob_0_becomes_1. |
| void DoChiSquaredTestForNullObs(float prob_0_becomes_1, float prob_1_stays_1, |
| int num_categories, |
| double chi_squared_threshold) { |
| // Build the encoder. |
| // Since no bits of the true bit vector are 1, the probability that 1 stays |
| // 1 is irrelevant. |
| auto encoder = |
| BuildEncoder(prob_0_becomes_1, prob_1_stays_1, num_categories); |
| |
| // Sample 1000 observations of the selected category and collect the bit |
| // counts |
| static const int kNumTrials = 1000; |
| BasicRapporObservation null_obs; |
| std::vector<int> counts(num_categories, 0); |
| for (size_t i = 0; i < kNumTrials; i++) { |
| null_obs.Clear(); |
| EXPECT_EQ(kOK, encoder->EncodeNullObservation(&null_obs)); |
| for (int bit_index = 0; bit_index < num_categories; bit_index++) { |
| if (IsSet(null_obs.data(), bit_index)) { |
| counts[bit_index]++; |
| } |
| } |
| } |
| |
| // In the special case where prob_0_becomes_1 is 0 make sure that we got |
| // 1000 0's. |
| for (int k = 0; k < num_categories; k++) { |
| if (prob_0_becomes_1 == 0.0) { |
| EXPECT_EQ(0, counts[k]); |
| } |
| } |
| |
| // This is the expected number of ones and zeroes for each bit position. |
| const double expected_1 = |
| static_cast<double>(kNumTrials) * prob_0_becomes_1; |
| const double expected_0 = static_cast<double>(kNumTrials) - expected_1; |
| |
| // For each of the bit positions, perform the chi-squared test. |
| for (int bit_index = 0; bit_index < num_categories; bit_index++) { |
| if (expected_0 != 0.0 && expected_1 != 0.0) { |
| // Difference between actual 1 count and expected 1 count. |
| double delta_1 = static_cast<double>(counts[bit_index]) - expected_1; |
| |
| // Difference between actual 0 count and expected 0 count. |
| double delta_0 = |
| static_cast<double>(kNumTrials - counts[bit_index]) - expected_0; |
| |
| // Compute and check the Chi-Squared value. |
| double chi_squared = |
| delta_1 * delta_1 / expected_1 + delta_0 * delta_0 / expected_0; |
| |
| EXPECT_TRUE(chi_squared < chi_squared_threshold) |
| << "chi_squared=" << chi_squared |
| << " chi_squared_threshold=" << chi_squared_threshold |
| << " bit_index=" << bit_index << " delta_0=" << delta_0 |
| << " delta_1=" << delta_1 << " num_categories=" << num_categories |
| << " prob_0_becomes_1=" << prob_0_becomes_1; |
| } |
| } |
| } |
| }; |
| |
| TEST_F(BasicRapporDeterministicTest, ChiSquaredTest) { |
| // Perform the chi-squared test for various numbers of categories and |
| // various selected categories, as well as for null observations without |
| // selected categories. This gets combinatorially explosive so to keep the |
| // testing time reasonable we don't test every combination but rather step |
| // through the num_categories by 7 and use at most 3 selected categories for |
| // each num_categories. |
| // |
| // The last parameter to DoChiSquaredTest*() is the chi-squared value to use. |
| // Notice that these values were chosen by experimentation to be as small as |
| // possible so that the test passes. They do not necessarily correspond to |
| // natural confidence intervals for the chi-squared test. |
| for (int num_categories = 2; num_categories < 40; num_categories += 7) { |
| for (int selected_category = 0; selected_category < num_categories; |
| selected_category += (num_categories / 3 + 1)) { |
| // The first parameter is prob_0_becomes_1, and the second parameter is |
| // prob_1_stays_1. |
| DoChiSquaredTest(0.01, 0.99, num_categories, selected_category, 8.2); |
| DoChiSquaredTest(0.1, 0.9, num_categories, selected_category, 9.4); |
| DoChiSquaredTest(0.2, 0.8, num_categories, selected_category, 11.1); |
| DoChiSquaredTest(0.25, 0.75, num_categories, selected_category, 11.8); |
| DoChiSquaredTest(0.3, 0.7, num_categories, selected_category, 11.8); |
| } |
| // The first parameter is prob_0_becomes_1, and the second parameter is |
| // prob_1_stays_1. |
| DoChiSquaredTestForNullObs(0.0, 1.0, num_categories, 0.0); |
| DoChiSquaredTestForNullObs(0.01, 0.99, num_categories, 8.2); |
| DoChiSquaredTestForNullObs(0.1, 0.9, num_categories, 9.4); |
| DoChiSquaredTestForNullObs(0.2, 0.8, num_categories, 11.1); |
| DoChiSquaredTestForNullObs(0.25, 0.75, num_categories, 11.8); |
| DoChiSquaredTestForNullObs(0.3, 0.7, num_categories, 11.8); |
| } |
| } |
| |
| // Test that BasicRapporEncoder::Encode() returns kInvalidArgument if a category |
| // name is used that is not one of the registered categories. |
| TEST(BasicRapporEncoderTest, BadCategory) { |
| // Configure Basic RAPPOR with two categories, "dog" and "cat". |
| BasicRapporConfig config; |
| config.set_prob_0_becomes_1(0.3); |
| config.set_prob_1_stays_1(0.7); |
| config.mutable_string_categories()->add_category("dog"); |
| config.mutable_string_categories()->add_category("cat"); |
| |
| // Construct a BasicRapporEncoder. |
| static const std::string kClientSecretToken = |
| ClientSecret::GenerateNewSecret().GetToken(); |
| BasicRapporEncoder encoder(config, |
| ClientSecret::FromToken(kClientSecretToken)); |
| |
| // Attempt to encode a string that is not one of the categories. Expect |
| // to receive kInvalidInput. |
| BasicRapporObservation obs; |
| ValuePart value; |
| value.set_string_value("fish"); |
| EXPECT_EQ(kInvalidInput, encoder.Encode(value, &obs)); |
| } |
| |
| // Tests that BasicRapporEncoder::Encode() correctly handles values of type |
| // INDEX |
| TEST(BasicRapporEncoderTest, EncodeIndex) { |
| // Configure Basic RAPPOR with 5 indexed categories. |
| // We use no randomness so we can check that the correct bit is being set. |
| BasicRapporConfig config; |
| config.set_prob_0_becomes_1(0.0); |
| config.set_prob_1_stays_1(1.0); |
| config.mutable_indexed_categories()->set_num_categories(5); |
| |
| // Construct a BasicRapporEncoder. |
| static const std::string kClientSecretToken = |
| ClientSecret::GenerateNewSecret().GetToken(); |
| BasicRapporEncoder encoder(config, |
| ClientSecret::FromToken(kClientSecretToken)); |
| |
| BasicRapporObservation obs; |
| // Validate that category indices 0, 1 and 4 yield the correct data. |
| ValuePart value; |
| value.set_index_value(0); |
| EXPECT_EQ(kOK, encoder.Encode(value, &obs)); |
| EXPECT_EQ("00000001", DataToBinaryString(obs.data())); |
| obs.Clear(); |
| value.set_index_value(1); |
| EXPECT_EQ(kOK, encoder.Encode(value, &obs)); |
| EXPECT_EQ("00000010", DataToBinaryString(obs.data())); |
| obs.Clear(); |
| value.set_index_value(4); |
| EXPECT_EQ(kOK, encoder.Encode(value, &obs)); |
| EXPECT_EQ("00010000", DataToBinaryString(obs.data())); |
| obs.Clear(); |
| |
| // Validate that category index 5 yields kInvalidInput. |
| value.set_index_value(5); |
| EXPECT_EQ(kInvalidInput, encoder.Encode(value, &obs)); |
| } |
| |
| class StringRapporEncoderTest : public ::testing::Test { |
| protected: |
| uint32_t AttemptDeriveCohortFromSecret(size_t attempt_number) { |
| return encoder_->AttemptDeriveCohortFromSecret(attempt_number); |
| } |
| |
| uint32_t DeriveCohortFromSecret() { |
| return encoder_->DeriveCohortFromSecret(); |
| } |
| |
| void SetNewEncoder(const RapporConfig& config, encoder::ClientSecret secret) { |
| encoder_.reset(new RapporEncoder(config, secret)); |
| } |
| |
| std::string MakeBloomBits(const ValuePart& value) { |
| return encoder_->MakeBloomBits(value); |
| } |
| |
| // Using the given parameters, and using the fixed input string |
| // "www.google.com" and a fixed cohort (i.e. a fixed client secret), |
| // this test generates a String RAPPOR observation 1000 times, counts |
| // the number of resulting 1's and 0's in two bit positions, and performs |
| // Pearson's Chi-squared test to check for goodness of fit to a binomial |
| // distribution with the appropriate parameter. Fails if |
| // chi-squared >= |chi_squared_threshold|. |
| // |
| // First we examine the Bloom filter with no bits flipped and we find one |
| // index of a set bit and one index of an unset bit. We perform the |
| // Chi-squared test twice: once for each of these two indices. |
| // |
| // Uses DeterministicRandom in order to ensure reproducibility. |
| void DoChiSquaredTest(float prob_0_becomes_1, float prob_1_stays_1, |
| int num_bits, int num_hashes, |
| double chi_squared_threshold) { |
| // Build the encoder. |
| RapporConfig config; |
| config.set_prob_0_becomes_1(prob_0_becomes_1); |
| config.set_prob_1_stays_1(prob_1_stays_1); |
| config.set_num_bloom_bits(num_bits); |
| config.set_num_hashes(num_hashes); |
| // This value will not be used but it needs to be something valid. |
| config.set_num_cohorts(100); |
| // We use a fixed client secret so this test is deterministic. |
| static const char kClientSecret[] = "4b4BxKq253TTCWIXFhLDTg=="; |
| SetNewEncoder(config, ClientSecret::FromToken(kClientSecret)); |
| // Give the encoder a deterministic RNG. |
| encoder_->SetRandomForTesting( |
| std::unique_ptr<crypto::Random>(new crypto::DeterministicRandom())); |
| |
| // Build the input value. We use a fixed input string so this test is |
| // deterministic. |
| ValuePart value; |
| value.set_string_value("www.google.com"); |
| |
| // Capture the indices of one bit that is set and one bit that is unset in |
| // the bloom filter for the input value. It doesn't matter which two bits |
| // we capture. We will do two chi-squared tests, one on each of the two |
| // bits. |
| int index_of_set_bit = -1; |
| int index_of_unset_bit = -1; |
| auto bloom_bits = MakeBloomBits(value); |
| int num_bits_set = 0; |
| for (int bit_index = 0; bit_index < num_bits; bit_index++) { |
| if (IsSet(bloom_bits, bit_index)) { |
| num_bits_set++; |
| index_of_set_bit = bit_index; |
| } else { |
| index_of_unset_bit = bit_index; |
| } |
| } |
| ASSERT_GT(num_bits_set, 0); |
| ASSERT_LE(num_bits_set, num_hashes); |
| // This heuristic for a minimum reasonable number of bits that should be |
| // set was found by experimentation to allow the test to pass. |
| int expected_min_num_bits_set = std::min(num_hashes - 2, num_bits / 4); |
| ASSERT_GE(num_bits_set, expected_min_num_bits_set) |
| << " num_bits=" << num_bits << " num_hashes=" << num_hashes; |
| ASSERT_GE(index_of_set_bit, 0); |
| ASSERT_LT(index_of_set_bit, num_bits); |
| ASSERT_GE(index_of_unset_bit, 0); |
| ASSERT_LT(index_of_unset_bit, num_bits); |
| ASSERT_NE(index_of_set_bit, index_of_unset_bit); |
| ASSERT_TRUE(IsSet(bloom_bits, index_of_set_bit)); |
| ASSERT_FALSE(IsSet(bloom_bits, index_of_unset_bit)); |
| |
| // Encode the input value 1000 times, tallying the counts for the two bits. |
| static const int kNumTrials = 1000; |
| int set_bit_count = 0; |
| int unset_bit_count = 0; |
| for (size_t i = 0; i < kNumTrials; i++) { |
| RapporObservation obs; |
| EXPECT_EQ(kOK, encoder_->Encode(value, &obs)); |
| if (IsSet(obs.data(), index_of_set_bit)) { |
| set_bit_count++; |
| } |
| if (IsSet(obs.data(), index_of_unset_bit)) { |
| unset_bit_count++; |
| } |
| } |
| |
| // This is the expected number of ones and zeroes for a bit that is set |
| // in the Bloom filter. |
| const double expected_1_set = |
| static_cast<double>(kNumTrials) * prob_1_stays_1; |
| const double expected_0_set = |
| static_cast<double>(kNumTrials) - expected_1_set; |
| |
| // This is the expected number of ones and zeroes for a bit that is |
| // unset in the Bloom filter. |
| const double expected_1_unset = |
| static_cast<double>(kNumTrials) * prob_0_becomes_1; |
| const double expected_0_unset = |
| static_cast<double>(kNumTrials) - expected_1_unset; |
| |
| // Perform Chi-squared test twice, once for the set bit, once for the |
| // unset bit. |
| for (int i = 0; i < 2; i++) { |
| double exp_0 = (i == 0 ? expected_0_set : expected_0_unset); |
| double exp_1 = (i == 0 ? expected_1_set : expected_1_unset); |
| int count = (i == 0 ? set_bit_count : unset_bit_count); |
| |
| // Difference between actual 1 count and expected 1 count. |
| double delta_1 = static_cast<double>(count) - exp_1; |
| |
| // Difference between actual 0 count and expected 0 count. |
| double delta_0 = static_cast<double>(kNumTrials - count) - exp_0; |
| |
| // Compute and check the Chi-Squared value. |
| double chi_squared = |
| delta_1 * delta_1 / exp_1 + delta_0 * delta_0 / exp_0; |
| |
| EXPECT_LT(chi_squared, chi_squared_threshold) |
| << " delta_0=" << delta_0 << " delta_1=" << delta_1 |
| << " num_bits=" << num_bits << " num_hashes=" << num_hashes |
| << " i=" << i << " prob_0_becomes_1=" << prob_0_becomes_1 |
| << " prob_1_stays_1=" << prob_1_stays_1; |
| } |
| } |
| |
| std::unique_ptr<RapporEncoder> encoder_; |
| }; |
| |
| // We invoke AttemptDeriveCohortFromSecret() 1000 times using a fixed |
| // ClientSecret and increasing values for |attempt_number|. We use 16 buckets |
| // (i.e. num_cohorts_2_power = 16). The outputs should be approximately |
| // uniformly distributed over the integers in [0, 15]. |
| TEST_F(StringRapporEncoderTest, AttemptDeriveCohortFromSecret) { |
| RapporConfig config; |
| // These config values are not relevant but need to be something valid. |
| config.set_prob_0_becomes_1(0.3); |
| config.set_prob_1_stays_1(0.7); |
| config.set_num_bloom_bits(64); |
| config.set_num_hashes(5); |
| |
| // We set num_cohorts to 10 so num_cohorts_2_power will be 16. |
| config.set_num_cohorts(10); |
| |
| // We use a fixed client secret so this test is deterministic. |
| static const char kClientSecret[] = "4b4BxKq253TTCWIXFhLDTg=="; |
| SetNewEncoder(config, ClientSecret::FromToken(kClientSecret)); |
| |
| // Initialize counts to all zeroes. |
| int counts[16] = {0}; |
| |
| // Invoke AttemptDeriveCohortFromSecret() 1000 times with successive |
| // attempt indices. Accumulate the results. |
| for (int i = 0; i < 1000; i++) { |
| counts[AttemptDeriveCohortFromSecret(i)]++; |
| } |
| |
| // These are the counts we found we get. 1000/16 = 62.5 is the expected value |
| // for each count. |
| // |
| // Each of the results for buckets 10, 11 12, 13, 14 and 15 would have been |
| // discarded and another attempt would have been made. That happened with |
| // probability (75 + 71 + 56 + 58 + 44 + 53) / 1000 = 0.357. |
| int expected_counts[] = {58, 71, 69, 62, 70, 64, 76, 57, |
| 48, 68, 75, 71, 56, 58, 44, 53}; |
| |
| for (int i = 0; i < 16; i++) { |
| EXPECT_EQ(expected_counts[i], counts[i]) << "i=" << i; |
| } |
| } |
| |
| // We invoke DeriveCohortFromSecret() 1000 times using a varying ClientSecret. |
| // (We use a deterministic PRNG so the test is deterministic.) |
| // We use 10 buckets (i.e. num_cohorts = 10). The outputs should be |
| // approximately uniformly distributed over the integers in [0, 9]. |
| TEST_F(StringRapporEncoderTest, DeriveCohortFromSecret) { |
| RapporConfig config; |
| // These config values are not relevant but need to be valid. |
| config.set_prob_0_becomes_1(0.3); |
| config.set_prob_1_stays_1(0.7); |
| config.set_num_bloom_bits(64); |
| config.set_num_hashes(5); |
| |
| // We set num_cohorts to 10. |
| config.set_num_cohorts(10); |
| |
| // Initialize counts to all zeroes. |
| int counts[10] = {0}; |
| |
| crypto::DeterministicRandom deterministic_random; |
| |
| // Invoke DeriveCohortFromSecret() 1000 times. Accumulate the results. |
| for (int i = 0; i < 1000; i++) { |
| SetNewEncoder(config, |
| ClientSecret::GenerateNewSecret(&deterministic_random)); |
| // The constructor should have already invoked DeriveCohortFromSecret |
| // and set cohort to that value. |
| EXPECT_EQ(encoder_->cohort(), DeriveCohortFromSecret()); |
| counts[encoder_->cohort()]++; |
| } |
| |
| // These are the counts we found we get. 1000/10 = 100 is the expected value |
| // for each count. |
| int expected_counts[] = {85, 98, 104, 93, 113, 93, 89, 99, 103, 123}; |
| |
| for (int i = 0; i < 10; i++) { |
| EXPECT_EQ(expected_counts[i], counts[i]) << "i=" << i; |
| } |
| } |
| |
| // We invoke MakeBloomBits 1000 times with a fixed cohort |
| // (i.e. a fixed ClientSecret) and varying input strings. We use 10 different |
| // initial segments of 100 different randomly generated strings. (We use a |
| // deterministic PRNG so this test is deterministic.) We use the values |
| // num_hashes = 2, num_bloom_bits = 16. We accumulate the counts of the number |
| // of times each bit is set. The counts should be approximately uniformly |
| // distributed over the integers in [0, 15]. |
| TEST_F(StringRapporEncoderTest, MakeBloomBits) { |
| RapporConfig config; |
| // These config values are not relevant but need to be valid. |
| config.set_prob_0_becomes_1(0.3); |
| config.set_prob_1_stays_1(0.7); |
| config.set_num_cohorts(10); |
| |
| // Set the number of bloom bits to 16 |
| static const int kNumBloomBits = 16; |
| config.set_num_bloom_bits(kNumBloomBits); |
| |
| // Set the number of hashes to 2. |
| config.set_num_hashes(2); |
| |
| // We use a fixed client secret so this test is deterministic. |
| static const char kClientSecret[] = "4b4BxKq253TTCWIXFhLDTg=="; |
| SetNewEncoder(config, ClientSecret::FromToken(kClientSecret)); |
| |
| // Initialize counts to all zeroes. |
| int counts[kNumBloomBits] = {0}; |
| |
| crypto::DeterministicRandom prng; |
| |
| // We invoke MakeBloomBits() 1000 times and accumulate the results. |
| |
| // Generate 100 random strings of length 100. |
| for (int i = 0; i < 100; i++) { |
| crypto::byte random_bits[100]; |
| prng.RandomBytes(random_bits, sizeof(random_bits)); |
| // Use 10 progressively longer initial segments of |random_bits|. |
| for (int size = 10; size <= 100; size += 10) { |
| ValuePart value; |
| auto blob_value = std::string(reinterpret_cast<char*>(random_bits), size); |
| value.set_blob_value(blob_value); |
| auto bloom_bits = MakeBloomBits(value); |
| // Capture which bits were set. |
| int num_set = 0; |
| for (int bit_index = 0; bit_index < kNumBloomBits; bit_index++) { |
| if (IsSet(bloom_bits, bit_index)) { |
| num_set++; |
| counts[bit_index]++; |
| } |
| } |
| // Since we are using 2 hashes the number of bits set should be 1 or 2. |
| EXPECT_TRUE(num_set == 1 || num_set == 2); |
| } |
| } |
| |
| // These are the counts we found we get. 2000/16 = 125 is the |
| // expected value for each count. |
| int expected_counts[] = {114, 139, 100, 113, 117, 118, 119, 122, |
| 137, 129, 114, 134, 116, 109, 137, 123}; |
| |
| for (int i = 0; i < 16; i++) { |
| EXPECT_EQ(expected_counts[i], counts[i]) << "i=" << i; |
| } |
| } |
| |
| // For various numbers of bits, and hashes, and for various values of p and q |
| // we invoke DoChiSquaredTest(). |
| TEST_F(StringRapporEncoderTest, ChiSquaredTest) { |
| // Use num_bits = 4, 16, 64, 256, 1024 |
| for (int num_bits_exp = 2; num_bits_exp <= 10; num_bits_exp += 2) { |
| int num_bits = 1 << num_bits_exp; |
| // Use num_hashes = 2, 5 and 8. |
| int max_num_hashes = std::min(8, num_bits - 1); |
| for (int num_hashes = 2; num_hashes <= max_num_hashes; num_hashes += 3) { |
| // The first two parameters are p and q. |
| // |
| // The last parameter is the chi-squared value to use. Notice that these |
| // values were chosen by experimentation to be as small as possible so |
| // that the test passes. They do not necessarily correspond to natural |
| // confidence intervals for the chi-squared test. |
| DoChiSquaredTest(0.01, 0.99, num_bits, num_hashes, 4.95); |
| DoChiSquaredTest(0.1, 0.9, num_bits, num_hashes, 5.38); |
| DoChiSquaredTest(0.2, 0.8, num_bits, num_hashes, 2.26); |
| DoChiSquaredTest(0.25, 0.75, num_bits, num_hashes, 2.83); |
| DoChiSquaredTest(0.3, 0.7, num_bits, num_hashes, 2.31); |
| } |
| } |
| } |
| |
| } // namespace rappor |
| } // namespace cobalt |