| // Copyright 2016 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "src/algorithms/rappor/rappor_encoder.h" |
| |
| #include <algorithm> |
| #include <map> |
| #include <vector> |
| |
| #include <gtest/gtest.h> |
| |
| #include "src/algorithms/rappor/rappor_test_utils.h" |
| #include "src/lib/crypto_util/random_test_utils.h" |
| |
| namespace cobalt::rappor { |
| |
| using system_data::ClientSecret; |
| |
| // Constructs a BasicRapporEncoder with the given |config|, invokes |
| // Encode() with a dummy string and EncodeNullObservation(), and checks that the |
| // returned statuses are either kOK or kInvalidConfig, whichever is expected. |
| void TestBasicRapporConfig(const BasicRapporConfig& config, Status expected_status, |
| int caller_line_number) { |
| // Make a ClientSecret once and statically store the token. |
| static const std::string kClientSecretToken = ClientSecret::GenerateNewSecret().GetToken(); |
| // Each time this function is invoked reconstitute the secret from the token. |
| BasicRapporEncoder encoder(config, ClientSecret::FromToken(kClientSecretToken)); |
| BasicRapporObservation obs; |
| ValuePart value; |
| value.set_string_value("cat"); |
| EXPECT_EQ(expected_status, encoder.Encode(value, &obs)) |
| << "Invoked from line number: " << caller_line_number; |
| BasicRapporObservation null_obs; |
| EXPECT_EQ(expected_status, encoder.EncodeNullObservation(&null_obs)) |
| << "Invoked from line number: " << caller_line_number; |
| } |
| |
| // A macro to invoke TestBasicRapporConfig and pass it the current line number. |
| #define TEST_BASIC_RAPPOR_CONFIG(config, expected_status) \ |
| (TestBasicRapporConfig(config, expected_status, __LINE__)) |
| |
| // Tests the validation of config for Basic RAPPOR. |
| TEST(RapporEncoderTest, BasicRapporConfigValidation) { |
| // Empty config: Invalid |
| BasicRapporConfig config; |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Add two probabilities but no categories: Invalid |
| config.prob_0_becomes_1 = 0.3; |
| config.prob_1_stays_1 = 0.7; |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Add one category: Invalid. |
| config.categories.add_string("cat"); |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Add two more categories: Valid. |
| config.categories.add_string("dog"); |
| config.categories.add_string("fish"); |
| TEST_BASIC_RAPPOR_CONFIG(config, kOK); |
| |
| // Explicitly set PRR to 0: Valid. |
| config.prob_rr = 0.0; |
| TEST_BASIC_RAPPOR_CONFIG(config, kOK); |
| |
| // Explicitly set PRR to non-zero: Invalid. |
| config.prob_rr = 0.1; |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Explicitly set PRR back to zero: Valid. |
| config.prob_rr = 0.0; |
| TEST_BASIC_RAPPOR_CONFIG(config, kOK); |
| |
| // Set one of the probabilities to negative: Invalid |
| config.prob_0_becomes_1 = -0.3; |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set one of the probabilities to greater than 1: Invalid |
| config.prob_0_becomes_1 = 1.3; |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Fix the probability: Valid |
| config.prob_0_becomes_1 = 0.3; |
| TEST_BASIC_RAPPOR_CONFIG(config, kOK); |
| |
| // Set the other probability to negative: Invalid |
| config.prob_1_stays_1 = -0.7; |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Set the other the probability to greater than 1: Invalid |
| config.prob_1_stays_1 = 1.7; |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Fix the probability: Valid |
| config.prob_1_stays_1 = 0.7; |
| TEST_BASIC_RAPPOR_CONFIG(config, kOK); |
| |
| // Add an empty category: Invalid |
| config.categories.add_string(""); |
| TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig); |
| |
| // Test with an invalid ClientSecret |
| BasicRapporEncoder encoder(config, ClientSecret::FromToken("Invalid Token")); |
| BasicRapporObservation obs; |
| ValuePart value; |
| value.set_string_value("dummy"); |
| EXPECT_EQ(kInvalidConfig, encoder.Encode(value, &obs)); |
| BasicRapporObservation null_obs; |
| EXPECT_EQ(kInvalidConfig, encoder.EncodeNullObservation(&null_obs)); |
| } |
| |
| // Test Config Validation with integer categories |
| TEST(RapporEncoderTest, BasicRapporWithIntsConfigValidation) { |
| // Create a config with three integer categories. |
| BasicRapporConfig config; |
| config.prob_0_becomes_1 = 0.3; |
| config.prob_1_stays_1 = 0.7; |
| config.categories.set_int_range(-1, 1); |
| |
| // Construct the encoder |
| BasicRapporEncoder encoder(config, ClientSecret::GenerateNewSecret()); |
| |
| // Perform an encode with a value equal to one of the listed categories |
| BasicRapporObservation obs; |
| ValuePart value; |
| value.set_int_value(-1); |
| EXPECT_EQ(kOK, encoder.Encode(value, &obs)); |
| |
| // Perform an encode with a value not equal to one of the listed categories |
| value.set_int_value(2); |
| EXPECT_EQ(kInvalidInput, encoder.Encode(value, &obs)); |
| |
| // Perform an encode of a null observation |
| BasicRapporObservation null_obs; |
| EXPECT_EQ(kOK, encoder.EncodeNullObservation(&null_obs)); |
| } |
| |
| // Performs a test of BasicRapporEncoder::Encode() and |
| // BasicRapporEncoder::EncodeNullObservation() in the two special cases that |
| // that there is no randomness involved in the encoded string, namely |
| // (a) p = 0, q = 1 |
| // (b) p = 1, q = 0 |
| // |
| // |num_categories| must be a positive integer. Basic RAPPOR will be configured |
| // to have this many categories. The encoding will be performed for each of |
| // the categores. |
| // |
| // |q_is_one| Do the test in case (a) where p = 0, q = 1 |
| void DoBasicRapporNoRandomnessTest(int num_categories, bool q_is_one) { |
| // Select the parameters based on the mode. index_char and other_char |
| // determine the expected bit pattern in the encoding. index_char is the |
| // character we expect to see in the position of the given category and |
| // other_char is the character we expect to see in the other positions. |
| float p, q; |
| char index_char, other_char; |
| if (q_is_one) { |
| // We expect a 1 in the index position and 0's everywhere else. |
| p = 0.0; |
| q = 1.0; |
| index_char = '1'; |
| other_char = '0'; |
| } else { |
| // We expect a 0 in the index position and 1's everywhere else. |
| p = 1.0; |
| q = 0.0; |
| index_char = '0'; |
| other_char = '1'; |
| } |
| |
| // Configure basic RAPPOR with the selected parameters. |
| BasicRapporConfig config; |
| config.prob_0_becomes_1 = p; |
| config.prob_1_stays_1 = q; |
| for (int i = 0; i < num_categories; i++) { |
| config.categories.add_string(CategoryName(i)); |
| } |
| |
| // Construct a BasicRapporEncoder. |
| static const std::string kClientSecretToken = ClientSecret::GenerateNewSecret().GetToken(); |
| BasicRapporEncoder encoder(config, ClientSecret::FromToken(kClientSecretToken)); |
| |
| // The expected number of bits in the encoding is the least multiple of 8 |
| // greater than or equal to num_categories. |
| uint16_t expected_num_bits = 8 * (((num_categories - 1) / 8) + 1); |
| |
| // For each category, obtain the observation and check that the bit pattern |
| // is as expected. |
| BasicRapporObservation obs; |
| for (int i = 0; i < num_categories; i++) { |
| auto category_name = CategoryName(i); |
| ValuePart value; |
| value.set_string_value(category_name); |
| ASSERT_EQ(kOK, encoder.Encode(value, &obs)) << category_name; |
| auto expected_pattern = BuildBitPatternString(expected_num_bits, i, index_char, other_char); |
| EXPECT_EQ(DataToBinaryString(obs.data()), expected_pattern); |
| } |
| |
| // Encode a null observation and check that the bit pattern is as expected. |
| BasicRapporObservation null_obs; |
| ASSERT_EQ(kOK, encoder.EncodeNullObservation(&null_obs)) << "Null observation"; |
| std::vector<uint16_t> index_of_1s(0); |
| if (!q_is_one) { |
| for (uint16_t k = 0; k < expected_num_bits; k++) { |
| index_of_1s.push_back(k); |
| } |
| } |
| auto null_expected_pattern = BuildBinaryString(expected_num_bits, index_of_1s); |
| EXPECT_EQ(DataToBinaryString(null_obs.data()), null_expected_pattern); |
| } |
| |
| // Performs a test of BasicRapporEncoder::Encode() in the special case that |
| // the values of p and q are either 0 or 1 so that there is no randomness |
| // involved in the encoded string. |
| TEST(BasicRapporEncoderTest, NoRandomness) { |
| // We test with between 2 and 50 categories. |
| for (int num_categories = 2; num_categories <= 50; num_categories++) { |
| // See comments at DoBasicRapporNoRandomnessTest. |
| DoBasicRapporNoRandomnessTest(num_categories, true); |
| DoBasicRapporNoRandomnessTest(num_categories, false); |
| } |
| } |
| |
| // Base class for tests of Basic RAPPOR that use a deterministic RNG. |
| class BasicRapporDeterministicTest : public ::testing::Test { |
| protected: |
| static std::unique_ptr<BasicRapporEncoder> BuildEncoder(float prob_0_becomes_1, |
| float prob_1_stays_1, |
| int num_categories) { |
| // Configure BasicRappor. |
| BasicRapporConfig config; |
| config.prob_0_becomes_1 = prob_0_becomes_1; |
| config.prob_1_stays_1 = prob_1_stays_1; |
| for (int i = 0; i < num_categories; i++) { |
| config.categories.add_string(CategoryName(i)); |
| } |
| |
| // Construct a BasicRapporEncoder. |
| static const std::string kClientSecretToken = ClientSecret::GenerateNewSecret().GetToken(); |
| std::unique_ptr<BasicRapporEncoder> encoder( |
| new BasicRapporEncoder(config, ClientSecret::FromToken(kClientSecretToken))); |
| |
| // Give the encoder a deterministic RNG. |
| encoder->SetRandomForTesting( |
| std::unique_ptr<crypto::Random>(new crypto::DeterministicRandom())); |
| |
| return encoder; |
| } |
| |
| // Generates a Basic RAPPOR observation 1000 times and then performs Pearson's |
| // chi-squared test on each bit separately to check for goodness of fit to |
| // a binomial distribution with the appropriate parameter. Fails if |
| // chi-squared >= |chi_squared_threshold|. |
| // |
| // Uses DeterministicRandom in order to ensure reproducibility. |
| // |
| // REQUIRES: 0 <= selected_category < num_categories. |
| // All 1000 of the observations will be for the selected category. Thus |
| // the expected number of 1's in the bit position corresponding to the |
| // selected category is prob_1_stays_1 and the expected number of 1'1 in |
| // all other bit positions is prob_0_becomes_1. |
| static void DoChiSquaredTest(float prob_0_becomes_1, float prob_1_stays_1, int num_categories, |
| int selected_category, double chi_squared_threshold) { |
| // Build the encoder |
| auto encoder = BuildEncoder(prob_0_becomes_1, prob_1_stays_1, num_categories); |
| |
| // Sample 1000 observations of the selected category and collect the bit |
| // counts |
| static const int kNumTrials = 1000; |
| auto category_name = CategoryName(selected_category); |
| BasicRapporObservation obs; |
| std::vector<int> counts(num_categories, 0); |
| for (size_t i = 0; i < kNumTrials; i++) { |
| obs.Clear(); |
| ValuePart value; |
| value.set_string_value(category_name); |
| EXPECT_EQ(kOK, encoder->Encode(value, &obs)); |
| for (int bit_index = 0; bit_index < num_categories; bit_index++) { |
| if (IsSet(obs.data(), bit_index)) { |
| counts[bit_index]++; |
| } |
| } |
| } |
| |
| // In the special case where prob_1_stays_1 is 1 make sure that we got |
| // 1000 1's in the selected category. |
| if (prob_1_stays_1 == 1.0) { |
| EXPECT_EQ(kNumTrials, counts[selected_category]); |
| } |
| |
| // This is the expected number of ones and zeroes for the bit position in |
| // the selected category. |
| const double expected_1_selected = static_cast<double>(kNumTrials) * prob_1_stays_1; |
| const double expected_0_selected = static_cast<double>(kNumTrials) - expected_1_selected; |
| |
| // This is the expected number of ones and zeroes for all bit positions |
| // other than the selected category. |
| const double expected_1 = static_cast<double>(kNumTrials) * prob_0_becomes_1; |
| const double expected_0 = static_cast<double>(kNumTrials) - expected_1; |
| |
| // For each of the bit positions, perform the chi-squared test. |
| for (int bit_index = 0; bit_index < num_categories; bit_index++) { |
| double exp_0 = (bit_index == selected_category ? expected_0_selected : expected_0); |
| double exp_1 = (bit_index == selected_category ? expected_1_selected : expected_1); |
| |
| if (exp_0 != 0.0 && exp_1 != 0.0) { |
| // Difference between actual 1 count and expected 1 count. |
| double delta_1 = static_cast<double>(counts[bit_index]) - exp_1; |
| |
| // Difference between actual 0 count and expected 0 count. |
| double delta_0 = static_cast<double>(kNumTrials - counts[bit_index]) - exp_0; |
| |
| // Compute and check the Chi-Squared value. |
| double chi_squared = delta_1 * delta_1 / exp_1 + delta_0 * delta_0 / exp_0; |
| |
| EXPECT_TRUE(chi_squared < chi_squared_threshold) |
| << "chi_squared=" << chi_squared << " chi_squared_threshold=" << chi_squared_threshold |
| << " bit_index=" << bit_index << " delta_0=" << delta_0 << " delta_1=" << delta_1 |
| << " num_categories=" << num_categories << " selected_category=" << selected_category |
| << " prob_0_becomes_1=" << prob_0_becomes_1 << " prob_1_stays_1=" << prob_1_stays_1; |
| } |
| } |
| } |
| |
| // Generates a Basic RAPPOR encoding of a null observation 1000 times and then |
| // performs Pearson's chi-squared test on each bit separately to check for |
| // goodness of fit to a binomial distribution with the appropriate parameter. |
| // Fails if chi-squared >= |chi_squared_threshold|. |
| // |
| // Uses DeterministicRandom in order to ensure reproducibility. |
| // |
| // The true value of each bit (before encoding) is 0, so the expected number |
| // of 1's in each bit position is prob_0_becomes_1. |
| static void DoChiSquaredTestForNullObs(float prob_0_becomes_1, float prob_1_stays_1, |
| int num_categories, double chi_squared_threshold) { |
| // Build the encoder. |
| // Since no bits of the true bit vector are 1, the probability that 1 stays |
| // 1 is irrelevant. |
| auto encoder = BuildEncoder(prob_0_becomes_1, prob_1_stays_1, num_categories); |
| |
| // Sample 1000 observations of the selected category and collect the bit |
| // counts |
| static const int kNumTrials = 1000; |
| BasicRapporObservation null_obs; |
| std::vector<int> counts(num_categories, 0); |
| for (size_t i = 0; i < kNumTrials; i++) { |
| null_obs.Clear(); |
| EXPECT_EQ(kOK, encoder->EncodeNullObservation(&null_obs)); |
| for (int bit_index = 0; bit_index < num_categories; bit_index++) { |
| if (IsSet(null_obs.data(), bit_index)) { |
| counts[bit_index]++; |
| } |
| } |
| } |
| |
| // In the special case where prob_0_becomes_1 is 0 make sure that we got |
| // 1000 0's. |
| for (int k = 0; k < num_categories; k++) { |
| if (prob_0_becomes_1 == 0.0) { |
| EXPECT_EQ(0, counts[k]); |
| } |
| } |
| |
| // This is the expected number of ones and zeroes for each bit position. |
| const double expected_1 = static_cast<double>(kNumTrials) * prob_0_becomes_1; |
| const double expected_0 = static_cast<double>(kNumTrials) - expected_1; |
| |
| // For each of the bit positions, perform the chi-squared test. |
| for (int bit_index = 0; bit_index < num_categories; bit_index++) { |
| if (expected_0 != 0.0 && expected_1 != 0.0) { |
| // Difference between actual 1 count and expected 1 count. |
| double delta_1 = static_cast<double>(counts[bit_index]) - expected_1; |
| |
| // Difference between actual 0 count and expected 0 count. |
| double delta_0 = static_cast<double>(kNumTrials - counts[bit_index]) - expected_0; |
| |
| // Compute and check the Chi-Squared value. |
| double chi_squared = delta_1 * delta_1 / expected_1 + delta_0 * delta_0 / expected_0; |
| |
| EXPECT_TRUE(chi_squared < chi_squared_threshold) |
| << "chi_squared=" << chi_squared << " chi_squared_threshold=" << chi_squared_threshold |
| << " bit_index=" << bit_index << " delta_0=" << delta_0 << " delta_1=" << delta_1 |
| << " num_categories=" << num_categories << " prob_0_becomes_1=" << prob_0_becomes_1; |
| } |
| } |
| } |
| }; |
| |
| TEST_F(BasicRapporDeterministicTest, ChiSquaredTest) { |
| // Perform the chi-squared test for various numbers of categories and |
| // various selected categories, as well as for null observations without |
| // selected categories. This gets combinatorially explosive so to keep the |
| // testing time reasonable we don't test every combination but rather step |
| // through the num_categories by 7 and use at most 3 selected categories for |
| // each num_categories. |
| // |
| // The last parameter to DoChiSquaredTest*() is the chi-squared value to use. |
| // Notice that these values were chosen by experimentation to be as small as |
| // possible so that the test passes. They do not necessarily correspond to |
| // natural confidence intervals for the chi-squared test. |
| for (int num_categories = 2; num_categories < 40; num_categories += 7) { |
| for (int selected_category = 0; selected_category < num_categories; |
| selected_category += (num_categories / 3 + 1)) { |
| // The first parameter is prob_0_becomes_1, and the second parameter is |
| // prob_1_stays_1. |
| DoChiSquaredTest(0.01, 0.99, num_categories, selected_category, 8.2); |
| DoChiSquaredTest(0.1, 0.9, num_categories, selected_category, 9.4); |
| DoChiSquaredTest(0.2, 0.8, num_categories, selected_category, 11.1); |
| DoChiSquaredTest(0.25, 0.75, num_categories, selected_category, 11.8); |
| DoChiSquaredTest(0.3, 0.7, num_categories, selected_category, 11.8); |
| } |
| // The first parameter is prob_0_becomes_1, and the second parameter is |
| // prob_1_stays_1. |
| DoChiSquaredTestForNullObs(0.0, 1.0, num_categories, 0.0); |
| DoChiSquaredTestForNullObs(0.01, 0.99, num_categories, 8.2); |
| DoChiSquaredTestForNullObs(0.1, 0.9, num_categories, 9.4); |
| DoChiSquaredTestForNullObs(0.2, 0.8, num_categories, 11.1); |
| DoChiSquaredTestForNullObs(0.25, 0.75, num_categories, 11.8); |
| DoChiSquaredTestForNullObs(0.3, 0.7, num_categories, 11.8); |
| } |
| } |
| |
| // Test that BasicRapporEncoder::Encode() returns kInvalidArgument if a category |
| // name is used that is not one of the registered categories. |
| TEST(BasicRapporEncoderTest, BadCategory) { |
| // Configure Basic RAPPOR with two categories, "dog" and "cat". |
| BasicRapporConfig config; |
| config.prob_0_becomes_1 = 0.3; |
| config.prob_1_stays_1 = 0.7; |
| config.categories.set_strings({"dog", "cat"}); |
| |
| // Construct a BasicRapporEncoder. |
| static const std::string kClientSecretToken = ClientSecret::GenerateNewSecret().GetToken(); |
| BasicRapporEncoder encoder(config, ClientSecret::FromToken(kClientSecretToken)); |
| |
| // Attempt to encode a string that is not one of the categories. Expect |
| // to receive kInvalidInput. |
| BasicRapporObservation obs; |
| ValuePart value; |
| value.set_string_value("fish"); |
| EXPECT_EQ(kInvalidInput, encoder.Encode(value, &obs)); |
| } |
| |
| // Tests that BasicRapporEncoder::Encode() correctly handles values of type |
| // INDEX |
| TEST(BasicRapporEncoderTest, EncodeIndex) { |
| // Configure Basic RAPPOR with 5 indexed categories. |
| // We use no randomness so we can check that the correct bit is being set. |
| BasicRapporConfig config; |
| config.prob_0_becomes_1 = 0.0; |
| config.prob_1_stays_1 = 1.0; |
| config.categories.set_indexed(5); |
| |
| // Construct a BasicRapporEncoder. |
| static const std::string kClientSecretToken = ClientSecret::GenerateNewSecret().GetToken(); |
| BasicRapporEncoder encoder(config, ClientSecret::FromToken(kClientSecretToken)); |
| |
| BasicRapporObservation obs; |
| // Validate that category indices 0, 1 and 4 yield the correct data. |
| ValuePart value; |
| value.set_index_value(0); |
| EXPECT_EQ(kOK, encoder.Encode(value, &obs)); |
| EXPECT_EQ("00000001", DataToBinaryString(obs.data())); |
| obs.Clear(); |
| value.set_index_value(1); |
| EXPECT_EQ(kOK, encoder.Encode(value, &obs)); |
| EXPECT_EQ("00000010", DataToBinaryString(obs.data())); |
| obs.Clear(); |
| value.set_index_value(4); |
| EXPECT_EQ(kOK, encoder.Encode(value, &obs)); |
| EXPECT_EQ("00010000", DataToBinaryString(obs.data())); |
| obs.Clear(); |
| |
| // Validate that category index 5 yields kInvalidInput. |
| value.set_index_value(5); |
| EXPECT_EQ(kInvalidInput, encoder.Encode(value, &obs)); |
| } |
| |
| } // namespace cobalt::rappor |