| // Copyright 2016 The Fuchsia Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| #include "algorithms/rappor/bloom_bit_counter.h" |
| |
| #include <gflags/gflags.h> |
| #include <glog/logging.h> |
| |
| #include <algorithm> |
| #include <cmath> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "algorithms/rappor/rappor_encoder.h" |
| #include "algorithms/rappor/rappor_test_utils.h" |
| #include "third_party/googletest/googletest/include/gtest/gtest.h" |
| #include "util/crypto_util/random_test_utils.h" |
| |
| namespace cobalt { |
| namespace rappor { |
| |
| using encoder::ClientSecret; |
| |
| namespace { |
| // Makes a RapporConfig with the given data (and num_hashes=2). |
| RapporConfig Config(uint32_t num_bloom_bits, uint32_t num_cohorts, double p, |
| double q) { |
| RapporConfig config; |
| config.set_num_bloom_bits(num_bloom_bits); |
| config.set_num_hashes(2); |
| config.set_num_cohorts(num_cohorts); |
| config.set_prob_0_becomes_1(p); |
| config.set_prob_1_stays_1(q); |
| return config; |
| } |
| |
| } // namespace |
| |
| class BloomBitCounterTest : public ::testing::Test { |
| protected: |
| // Sets the member variable bit_counter_ to a new BloomBitCounter configured |
| // with the given arguments and the current values of prob_0_becomes_1_, |
| // prob_1_stays_1_. |
| void SetBitCounter(uint32_t num_bloom_bits, uint32_t num_cohorts) { |
| bit_counter_.reset(new BloomBitCounter(Config( |
| num_bloom_bits, num_cohorts, prob_0_becomes_1_, prob_1_stays_1_))); |
| add_good_observation_call_count_ = 0; |
| add_bad_observation_call_count_ = 0; |
| } |
| |
| // Adds an observation to bit_counter_ described by |binary_string| for the |
| // given cohort. Expects the operation to result in an error. |
| void AddObservationExpectFalse(uint32_t cohort, std::string binary_string) { |
| EXPECT_FALSE(bit_counter_->AddObservation( |
| RapporObservationFromString(cohort, binary_string))); |
| CheckState(add_good_observation_call_count_, |
| ++add_bad_observation_call_count_); |
| } |
| |
| // Adds an observation to bit_counter_ described by |binary_string| for the |
| // given cohort. Expects the operation to succeed. |
| void AddObservation(uint32_t cohort, std::string binary_string) { |
| EXPECT_TRUE(bit_counter_->AddObservation( |
| RapporObservationFromString(cohort, binary_string))); |
| CheckState(++add_good_observation_call_count_, |
| add_bad_observation_call_count_); |
| } |
| |
| // Invokes AddObservation() many times. |
| void AddObservations(uint32_t cohort, std::string binary_string, |
| int num_times) { |
| for (int count = 0; count < num_times; count++) { |
| SCOPED_TRACE(std::string("count=") + std::to_string(count)); |
| AddObservation(cohort, binary_string); |
| } |
| } |
| |
| // Checks that bit_counter_ has the expected number observations and errors. |
| void CheckState(size_t expected_num_observations, |
| size_t expected_observation_errors) { |
| EXPECT_EQ(expected_num_observations, bit_counter_->num_observations()); |
| EXPECT_EQ(expected_observation_errors, bit_counter_->observation_errors()); |
| } |
| |
| // Checks that bit_counter_ has the expected raw count for the given cohort |
| // and bit index. |
| void ExpectRawCount(uint32_t cohort, size_t index, size_t expected_count) { |
| EXPECT_EQ(expected_count, |
| bit_counter_->estimated_bloom_counts_[cohort].bit_sums[index]); |
| } |
| |
| // Checks that bit_counter_ has the expected raw counts for the given cohort. |
| void ExpectRawCounts(uint32_t cohort, std::vector<size_t> expected_counts) { |
| EXPECT_EQ(expected_counts, |
| bit_counter_->estimated_bloom_counts_[cohort].bit_sums); |
| } |
| |
| // Invokes bit_countr_->EstimateCounts() and checks the count and std_error in |
| // the given bit index for the given cohort. |
| void EstimateCountsAndCheck(size_t cohort, size_t index, |
| double expected_estimate, |
| double expected_std_err) { |
| auto estimated_counts = bit_counter_->EstimateCounts(); |
| ASSERT_GT(estimated_counts.size(), cohort) |
| << "size=" << estimated_counts.size() << ", cohort=" << cohort; |
| ASSERT_GT(estimated_counts[cohort].count_estimates.size(), index) |
| << "size=" << estimated_counts[cohort].count_estimates.size() |
| << ", index=" << index; |
| ASSERT_GT(estimated_counts[cohort].std_errors.size(), index) |
| << "size=" << estimated_counts[cohort].std_errors.size() |
| << ", index=" << index; |
| EXPECT_FLOAT_EQ(expected_estimate, |
| estimated_counts[cohort].count_estimates[index]); |
| EXPECT_FLOAT_EQ(expected_std_err, |
| estimated_counts[cohort].std_errors[index]); |
| } |
| |
| // Tests BloomBitCounter focusing on only a single bit at a time. |
| // |
| // We use 32 bits and 5 cohorts. Multiple times we single out one bit and one |
| // cohort. Only that bit receives any non-zero observation data and only that |
| // bit is validated. |
| // |
| // Uses the currently set values for prob_0_becomes_1_ and prob_1_stays_1_. |
| // There will be |n| total observations with |y| 1's and |n-y| 0's. |
| void OneBitTest(int n, int y, double expected_estimate, |
| double expected_std_err) { |
| // We pick five different bits out of the 32 bits to test. |
| for (int bit_index : {0, 1, 8, 19, 31}) { |
| // We pick five different cohorts of the 100 cohorts to test. |
| for (int cohort : {0, 1, 47, 61, 99}) { |
| SCOPED_TRACE(std::to_string(bit_index) + ", " + std::to_string(cohort) + |
| ", " + std::to_string(n) + ", " + std::to_string(y)); |
| // Construct a BloomBitCounter with 32 bits and 100 cohorts. |
| SetBitCounter(32, 100); |
| // Add y observations with a 1 in position |bit_index|. |
| AddObservations(cohort, BuildBitPatternString(32, bit_index, '1', '0'), |
| y); |
| // Add n-y observations with a 0 in position |bit_index|. |
| AddObservations(cohort, BuildBitPatternString(32, bit_index, '0', '0'), |
| n - y); |
| |
| // Analyze and check position |bit_index| |
| EstimateCountsAndCheck(cohort, bit_index, expected_estimate, |
| expected_std_err); |
| } |
| } |
| } |
| |
| // By default this test uses p=0, q=1. Individual tests may override this. |
| double prob_0_becomes_1_ = 0.0; |
| double prob_1_stays_1_ = 1.0; |
| std::unique_ptr<BloomBitCounter> bit_counter_; |
| int add_bad_observation_call_count_ = 0; |
| int add_good_observation_call_count_ = 0; |
| }; |
| |
| // Tests the raw counts when there are four bits and two cohorts |
| TEST_F(BloomBitCounterTest, RawCounts4x2) { |
| // Construct a bloom bit counter with four bits and 2 cohorts. |
| SetBitCounter(4, 2); |
| |
| AddObservation(0, "00000000"); |
| ExpectRawCounts(0, {0, 0, 0, 0}); |
| ExpectRawCounts(1, {0, 0, 0, 0}); |
| |
| AddObservation(1, "00000000"); |
| ExpectRawCounts(0, {0, 0, 0, 0}); |
| ExpectRawCounts(1, {0, 0, 0, 0}); |
| |
| AddObservation(0, "00000001"); |
| ExpectRawCounts(0, {1, 0, 0, 0}); |
| ExpectRawCounts(1, {0, 0, 0, 0}); |
| |
| AddObservation(0, "00000001"); |
| ExpectRawCounts(0, {2, 0, 0, 0}); |
| ExpectRawCounts(1, {0, 0, 0, 0}); |
| |
| AddObservation(0, "00000010"); |
| ExpectRawCounts(0, {2, 1, 0, 0}); |
| ExpectRawCounts(1, {0, 0, 0, 0}); |
| |
| AddObservation(0, "00000010"); |
| ExpectRawCounts(0, {2, 2, 0, 0}); |
| ExpectRawCounts(1, {0, 0, 0, 0}); |
| |
| AddObservation(1, "00000010"); |
| ExpectRawCounts(0, {2, 2, 0, 0}); |
| ExpectRawCounts(1, {0, 1, 0, 0}); |
| |
| AddObservation(1, "00000010"); |
| ExpectRawCounts(0, {2, 2, 0, 0}); |
| ExpectRawCounts(1, {0, 2, 0, 0}); |
| |
| AddObservation(0, "00000011"); |
| ExpectRawCounts(0, {3, 3, 0, 0}); |
| ExpectRawCounts(1, {0, 2, 0, 0}); |
| |
| AddObservation(0, "00000100"); |
| ExpectRawCounts(0, {3, 3, 1, 0}); |
| ExpectRawCounts(1, {0, 2, 0, 0}); |
| |
| AddObservation(0, "00000101"); |
| ExpectRawCounts(0, {4, 3, 2, 0}); |
| ExpectRawCounts(1, {0, 2, 0, 0}); |
| |
| AddObservation(0, "00000011"); |
| ExpectRawCounts(0, {5, 4, 2, 0}); |
| ExpectRawCounts(1, {0, 2, 0, 0}); |
| |
| AddObservation(1, "00001010"); |
| ExpectRawCounts(0, {5, 4, 2, 0}); |
| ExpectRawCounts(1, {0, 3, 0, 1}); |
| |
| AddObservation(1, "00001010"); |
| ExpectRawCounts(0, {5, 4, 2, 0}); |
| ExpectRawCounts(1, {0, 4, 0, 2}); |
| |
| for (int i = 0; i < 1000; i++) { |
| AddObservation(0, "00001100"); |
| AddObservation(1, "00000110"); |
| } |
| ExpectRawCounts(0, {5, 4, 1002, 1000}); |
| ExpectRawCounts(1, {0, 1004, 1000, 2}); |
| |
| // The extra high-order-bits should be ignored |
| AddObservation(0, "11110000"); |
| AddObservation(1, "11110000"); |
| ExpectRawCounts(0, {5, 4, 1002, 1000}); |
| ExpectRawCounts(1, {0, 1004, 1000, 2}); |
| } |
| |
| // Tests the raw counts when there are 1024 bits and 100 cohorts |
| TEST_F(BloomBitCounterTest, RawCounts1024x100) { |
| // Construct a bloom bit counter with 1024 bits and 100 cohorts. |
| SetBitCounter(1024, 100); |
| // Iterate 100 times |
| for (int iteration = 0; iteration < 100; iteration++) { |
| // For i = 0, 10, 20, 30, ..... |
| for (int bit_index = 0; bit_index < 1024; bit_index += 10) { |
| // Add observations with bit i alone set for cohorts 0, 51, 97. |
| AddObservation(0, BuildBitPatternString(1024, bit_index, '1', '0')); |
| AddObservation(51, BuildBitPatternString(1024, bit_index, '1', '0')); |
| AddObservation(97, BuildBitPatternString(1024, bit_index, '1', '0')); |
| } |
| } |
| |
| // Check the counts. |
| for (int bit_index = 0; bit_index < 1024; bit_index++) { |
| size_t expected_count = (bit_index % 10 == 0 ? 100 : 0); |
| ExpectRawCount(0, bit_index, expected_count); |
| ExpectRawCount(1, bit_index, 0); |
| ExpectRawCount(51, bit_index, expected_count); |
| ExpectRawCount(52, bit_index, 0); |
| ExpectRawCount(97, bit_index, expected_count); |
| ExpectRawCount(98, bit_index, 0); |
| } |
| } |
| |
| // Tests that AddObservation() returns false when an invalid config is |
| // provided to the constructor. |
| TEST_F(BloomBitCounterTest, InvalidConfig) { |
| // Set prob_0_becomes_1 to an invalid value. |
| prob_0_becomes_1_ = 1.1; |
| |
| // Construct a bloom bit counter with 8 bits and 2 cohorts. |
| SetBitCounter(8, 2); |
| |
| AddObservationExpectFalse(0, "00000000"); |
| AddObservationExpectFalse(1, "00000000"); |
| } |
| |
| // Tests that AddObservation() returns false when an invalid observation |
| // is added. |
| TEST_F(BloomBitCounterTest, InvalidObservations) { |
| // Construct a bloom bit counter with 8 bits and 2 cohorts. |
| SetBitCounter(8, 2); |
| |
| // Attempt to add observations with 2 bytes instead of one. |
| AddObservationExpectFalse(0, "0000000000000000"); |
| AddObservationExpectFalse(1, "0000000000000000"); |
| AddObservationExpectFalse(0, "0000000100000000"); |
| AddObservationExpectFalse(1, "0000000100000000"); |
| |
| // Successfully add observations with one bytes. |
| AddObservation(0, "00000001"); |
| AddObservation(1, "00000001"); |
| |
| // Attempt to add an observation for cohort 3. |
| AddObservationExpectFalse(3, "00000001"); |
| } |
| |
| // Invokes OneBitTest on various y using n=100, p=0, q=1 |
| TEST_F(BloomBitCounterTest, OneBitTestN100P0Q1) { |
| int n = 100; |
| double expected_std_err = 0; |
| |
| // Test with various values of y. expected_estimate = y. |
| for (int y : {0, 1, 34, 49, 50, 51, 71, 99, 100}) { |
| SCOPED_TRACE(std::to_string(y)); |
| OneBitTest(n, y, y, expected_std_err); |
| } |
| } |
| |
| // Invokes OneBitTest on various y using n=100, p=0.2, q=0.8 |
| TEST_F(BloomBitCounterTest, OneBitTestN100P02Q08) { |
| prob_0_becomes_1_ = 0.2; |
| prob_1_stays_1_ = 0.8; |
| int n = 100; |
| |
| // This is the formula for computing expected_estimate when n=100, p=0.2, |
| // q=0.8. |
| auto estimator = [](double y) { return (y - 20.0) * 5.0 / 3.0; }; |
| // This is the expected standard error for n=100, p=0.2, q=0.8, independent |
| // of y. |
| double expected_std_err = 20.0 / 3.0; |
| |
| // Test with various values of y. |
| for (int y : {0, 1, 34, 49, 50, 51, 71, 99, 100}) { |
| SCOPED_TRACE(std::to_string(y)); |
| OneBitTest(n, y, estimator(y), expected_std_err); |
| } |
| } |
| |
| // Invokes OneBitTest on various y using n=1000, p=0.15, q=0.85 |
| TEST_F(BloomBitCounterTest, OneBitTestN1000P015Q085) { |
| prob_0_becomes_1_ = 0.15; |
| prob_1_stays_1_ = 0.85; |
| int n = 1000; |
| |
| // This is the formula for computing expected_estimate when n=1000, p=0.15, |
| // q=0.85. |
| auto estimator = [](double y) { return (y - 150.0) * 10.0 / 7.0; }; |
| // This is the expected standard error for n=1000, p=0.15, q=0.85, |
| // independent |
| // of y. |
| double expected_std_err = sqrt(127.5) * 10.0 / 7.0; |
| |
| // Test with various values of y. |
| for (int y : {0, 1, 71, 333, 444, 555, 666, 777, 888, 999, 1000}) { |
| SCOPED_TRACE(std::to_string(y)); |
| OneBitTest(n, y, estimator(y), expected_std_err); |
| } |
| } |
| |
| // Invokes OneBitTest on various y using n=5000, p=0.5, q=0.9. Notice that |
| // p + q > 1 |
| TEST_F(BloomBitCounterTest, OneBitTestN5000P05Q09) { |
| prob_0_becomes_1_ = 0.5; |
| prob_1_stays_1_ = 0.9; |
| int n = 5000; |
| |
| // This is the formula for computing expected_estimate when n=5000, p=0.5, |
| // q=0.9. |
| auto estimator = [](double y) { return (y - 2500.0) * 5.0 / 2.0; }; |
| |
| // This is the formula for computing expected_std_err when n=5000, p=0.5, |
| // q=0.9. |
| auto std_err = [](double y) { |
| return sqrt(y * -0.4 + 2250.0) * 5.0 / 2.0; |
| }; |
| |
| // Test with various values of y. |
| for (int y : {0, 1, 49, 222, 1333, 2444, 3555, 4999, 5000}) { |
| SCOPED_TRACE(std::to_string(y)); |
| OneBitTest(n, y, estimator(y), std_err(y)); |
| } |
| } |
| |
| // Invokes OneBitTest on various y using n=5000, p=0.05, q=0.5. Notice that |
| // p + q < 1 |
| TEST_F(BloomBitCounterTest, OneBitTestN5000P005Q05) { |
| prob_0_becomes_1_ = 0.05; |
| prob_1_stays_1_ = 0.5; |
| int n = 5000; |
| |
| // This is the formula for computing expected_estimate when n=5000, p=0.05, |
| // q=0.5. |
| auto estimator = [](double y) { return (y - 250.0) / 0.45; }; |
| |
| // This is the formula for computing expected_std_err when n=5000, p=0.05, |
| // q=0.5. |
| auto std_err = [](double y) { return sqrt(y * 0.45 + 125.0) / 0.45; }; |
| |
| // Test with various values of y. |
| for (int y : {0, 1, 49, 222, 1333, 2444, 3555, 4999, 5000}) { |
| SCOPED_TRACE(std::to_string(y)); |
| OneBitTest(n, y, estimator(y), std_err(y)); |
| } |
| } |
| |
| } // namespace rappor |
| } // namespace cobalt |