blob: 7baac42ed1fc707389756b7100894ed7133de18c [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/algorithms/rappor/rappor_encoder.h"
#include <algorithm>
#include <map>
#include <vector>
#include <gtest/gtest.h>
#include "src/algorithms/rappor/rappor_test_utils.h"
#include "src/lib/crypto_util/random_test_utils.h"
namespace cobalt::rappor {
using system_data::ClientSecret;
// Constructs a BasicRapporEncoder with the given |config|, invokes
// Encode() with a dummy string and EncodeNullObservation(), and checks that the
// returned statuses are either kOK or kInvalidConfig, whichever is expected.
void TestBasicRapporConfig(const BasicRapporConfig& config, Status expected_status,
int caller_line_number) {
// Make a ClientSecret once and statically store the token.
static const std::string kClientSecretToken = ClientSecret::GenerateNewSecret().GetToken();
// Each time this function is invoked reconstitute the secret from the token.
BasicRapporEncoder encoder(config, ClientSecret::FromToken(kClientSecretToken));
BasicRapporObservation obs;
ValuePart value;
value.set_string_value("cat");
EXPECT_EQ(expected_status, encoder.Encode(value, &obs))
<< "Invoked from line number: " << caller_line_number;
BasicRapporObservation null_obs;
EXPECT_EQ(expected_status, encoder.EncodeNullObservation(&null_obs))
<< "Invoked from line number: " << caller_line_number;
}
// A macro to invoke TestBasicRapporConfig and pass it the current line number.
#define TEST_BASIC_RAPPOR_CONFIG(config, expected_status) \
(TestBasicRapporConfig(config, expected_status, __LINE__))
// Tests the validation of config for Basic RAPPOR.
TEST(RapporEncoderTest, BasicRapporConfigValidation) {
// Empty config: Invalid
BasicRapporConfig config;
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Add two probabilities but no categories: Invalid
config.prob_0_becomes_1 = 0.3;
config.prob_1_stays_1 = 0.7;
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Add one category: Invalid.
config.categories.add_string("cat");
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Add two more categories: Valid.
config.categories.add_string("dog");
config.categories.add_string("fish");
TEST_BASIC_RAPPOR_CONFIG(config, kOK);
// Explicitly set PRR to 0: Valid.
config.prob_rr = 0.0;
TEST_BASIC_RAPPOR_CONFIG(config, kOK);
// Explicitly set PRR to non-zero: Invalid.
config.prob_rr = 0.1;
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Explicitly set PRR back to zero: Valid.
config.prob_rr = 0.0;
TEST_BASIC_RAPPOR_CONFIG(config, kOK);
// Set one of the probabilities to negative: Invalid
config.prob_0_becomes_1 = -0.3;
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Set one of the probabilities to greater than 1: Invalid
config.prob_0_becomes_1 = 1.3;
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Fix the probability: Valid
config.prob_0_becomes_1 = 0.3;
TEST_BASIC_RAPPOR_CONFIG(config, kOK);
// Set the other probability to negative: Invalid
config.prob_1_stays_1 = -0.7;
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Set the other the probability to greater than 1: Invalid
config.prob_1_stays_1 = 1.7;
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Fix the probability: Valid
config.prob_1_stays_1 = 0.7;
TEST_BASIC_RAPPOR_CONFIG(config, kOK);
// Add an empty category: Invalid
config.categories.add_string("");
TEST_BASIC_RAPPOR_CONFIG(config, kInvalidConfig);
// Test with an invalid ClientSecret
BasicRapporEncoder encoder(config, ClientSecret::FromToken("Invalid Token"));
BasicRapporObservation obs;
ValuePart value;
value.set_string_value("dummy");
EXPECT_EQ(kInvalidConfig, encoder.Encode(value, &obs));
BasicRapporObservation null_obs;
EXPECT_EQ(kInvalidConfig, encoder.EncodeNullObservation(&null_obs));
}
// Test Config Validation with integer categories
TEST(RapporEncoderTest, BasicRapporWithIntsConfigValidation) {
// Create a config with three integer categories.
BasicRapporConfig config;
config.prob_0_becomes_1 = 0.3;
config.prob_1_stays_1 = 0.7;
config.categories.set_int_range(-1, 1);
// Construct the encoder
BasicRapporEncoder encoder(config, ClientSecret::GenerateNewSecret());
// Perform an encode with a value equal to one of the listed categories
BasicRapporObservation obs;
ValuePart value;
value.set_int_value(-1);
EXPECT_EQ(kOK, encoder.Encode(value, &obs));
// Perform an encode with a value not equal to one of the listed categories
value.set_int_value(2);
EXPECT_EQ(kInvalidInput, encoder.Encode(value, &obs));
// Perform an encode of a null observation
BasicRapporObservation null_obs;
EXPECT_EQ(kOK, encoder.EncodeNullObservation(&null_obs));
}
// Performs a test of BasicRapporEncoder::Encode() and
// BasicRapporEncoder::EncodeNullObservation() in the two special cases that
// that there is no randomness involved in the encoded string, namely
// (a) p = 0, q = 1
// (b) p = 1, q = 0
//
// |num_categories| must be a positive integer. Basic RAPPOR will be configured
// to have this many categories. The encoding will be performed for each of
// the categores.
//
// |q_is_one| Do the test in case (a) where p = 0, q = 1
void DoBasicRapporNoRandomnessTest(int num_categories, bool q_is_one) {
// Select the parameters based on the mode. index_char and other_char
// determine the expected bit pattern in the encoding. index_char is the
// character we expect to see in the position of the given category and
// other_char is the character we expect to see in the other positions.
float p, q;
char index_char, other_char;
if (q_is_one) {
// We expect a 1 in the index position and 0's everywhere else.
p = 0.0;
q = 1.0;
index_char = '1';
other_char = '0';
} else {
// We expect a 0 in the index position and 1's everywhere else.
p = 1.0;
q = 0.0;
index_char = '0';
other_char = '1';
}
// Configure basic RAPPOR with the selected parameters.
BasicRapporConfig config;
config.prob_0_becomes_1 = p;
config.prob_1_stays_1 = q;
for (int i = 0; i < num_categories; i++) {
config.categories.add_string(CategoryName(i));
}
// Construct a BasicRapporEncoder.
static const std::string kClientSecretToken = ClientSecret::GenerateNewSecret().GetToken();
BasicRapporEncoder encoder(config, ClientSecret::FromToken(kClientSecretToken));
// The expected number of bits in the encoding is the least multiple of 8
// greater than or equal to num_categories.
uint16_t expected_num_bits = 8 * (((num_categories - 1) / 8) + 1);
// For each category, obtain the observation and check that the bit pattern
// is as expected.
BasicRapporObservation obs;
for (int i = 0; i < num_categories; i++) {
auto category_name = CategoryName(i);
ValuePart value;
value.set_string_value(category_name);
ASSERT_EQ(kOK, encoder.Encode(value, &obs)) << category_name;
auto expected_pattern = BuildBitPatternString(expected_num_bits, i, index_char, other_char);
EXPECT_EQ(DataToBinaryString(obs.data()), expected_pattern);
}
// Encode a null observation and check that the bit pattern is as expected.
BasicRapporObservation null_obs;
ASSERT_EQ(kOK, encoder.EncodeNullObservation(&null_obs)) << "Null observation";
std::vector<uint16_t> index_of_1s(0);
if (!q_is_one) {
for (uint16_t k = 0; k < expected_num_bits; k++) {
index_of_1s.push_back(k);
}
}
auto null_expected_pattern = BuildBinaryString(expected_num_bits, index_of_1s);
EXPECT_EQ(DataToBinaryString(null_obs.data()), null_expected_pattern);
}
// Performs a test of BasicRapporEncoder::Encode() in the special case that
// the values of p and q are either 0 or 1 so that there is no randomness
// involved in the encoded string.
TEST(BasicRapporEncoderTest, NoRandomness) {
// We test with between 2 and 50 categories.
for (int num_categories = 2; num_categories <= 50; num_categories++) {
// See comments at DoBasicRapporNoRandomnessTest.
DoBasicRapporNoRandomnessTest(num_categories, true);
DoBasicRapporNoRandomnessTest(num_categories, false);
}
}
// Base class for tests of Basic RAPPOR that use a deterministic RNG.
class BasicRapporDeterministicTest : public ::testing::Test {
protected:
static std::unique_ptr<BasicRapporEncoder> BuildEncoder(float prob_0_becomes_1,
float prob_1_stays_1,
int num_categories) {
// Configure BasicRappor.
BasicRapporConfig config;
config.prob_0_becomes_1 = prob_0_becomes_1;
config.prob_1_stays_1 = prob_1_stays_1;
for (int i = 0; i < num_categories; i++) {
config.categories.add_string(CategoryName(i));
}
// Construct a BasicRapporEncoder.
static const std::string kClientSecretToken = ClientSecret::GenerateNewSecret().GetToken();
std::unique_ptr<BasicRapporEncoder> encoder(
new BasicRapporEncoder(config, ClientSecret::FromToken(kClientSecretToken)));
// Give the encoder a deterministic RNG.
encoder->SetRandomForTesting(
std::unique_ptr<crypto::Random>(new crypto::DeterministicRandom()));
return encoder;
}
// Generates a Basic RAPPOR observation 1000 times and then performs Pearson's
// chi-squared test on each bit separately to check for goodness of fit to
// a binomial distribution with the appropriate parameter. Fails if
// chi-squared >= |chi_squared_threshold|.
//
// Uses DeterministicRandom in order to ensure reproducibility.
//
// REQUIRES: 0 <= selected_category < num_categories.
// All 1000 of the observations will be for the selected category. Thus
// the expected number of 1's in the bit position corresponding to the
// selected category is prob_1_stays_1 and the expected number of 1'1 in
// all other bit positions is prob_0_becomes_1.
static void DoChiSquaredTest(float prob_0_becomes_1, float prob_1_stays_1, int num_categories,
int selected_category, double chi_squared_threshold) {
// Build the encoder
auto encoder = BuildEncoder(prob_0_becomes_1, prob_1_stays_1, num_categories);
// Sample 1000 observations of the selected category and collect the bit
// counts
static const int kNumTrials = 1000;
auto category_name = CategoryName(selected_category);
BasicRapporObservation obs;
std::vector<int> counts(num_categories, 0);
for (size_t i = 0; i < kNumTrials; i++) {
obs.Clear();
ValuePart value;
value.set_string_value(category_name);
EXPECT_EQ(kOK, encoder->Encode(value, &obs));
for (int bit_index = 0; bit_index < num_categories; bit_index++) {
if (IsSet(obs.data(), bit_index)) {
counts[bit_index]++;
}
}
}
// In the special case where prob_1_stays_1 is 1 make sure that we got
// 1000 1's in the selected category.
if (prob_1_stays_1 == 1.0) {
EXPECT_EQ(kNumTrials, counts[selected_category]);
}
// This is the expected number of ones and zeroes for the bit position in
// the selected category.
const double expected_1_selected = static_cast<double>(kNumTrials) * prob_1_stays_1;
const double expected_0_selected = static_cast<double>(kNumTrials) - expected_1_selected;
// This is the expected number of ones and zeroes for all bit positions
// other than the selected category.
const double expected_1 = static_cast<double>(kNumTrials) * prob_0_becomes_1;
const double expected_0 = static_cast<double>(kNumTrials) - expected_1;
// For each of the bit positions, perform the chi-squared test.
for (int bit_index = 0; bit_index < num_categories; bit_index++) {
double exp_0 = (bit_index == selected_category ? expected_0_selected : expected_0);
double exp_1 = (bit_index == selected_category ? expected_1_selected : expected_1);
if (exp_0 != 0.0 && exp_1 != 0.0) {
// Difference between actual 1 count and expected 1 count.
double delta_1 = static_cast<double>(counts[bit_index]) - exp_1;
// Difference between actual 0 count and expected 0 count.
double delta_0 = static_cast<double>(kNumTrials - counts[bit_index]) - exp_0;
// Compute and check the Chi-Squared value.
double chi_squared = delta_1 * delta_1 / exp_1 + delta_0 * delta_0 / exp_0;
EXPECT_TRUE(chi_squared < chi_squared_threshold)
<< "chi_squared=" << chi_squared << " chi_squared_threshold=" << chi_squared_threshold
<< " bit_index=" << bit_index << " delta_0=" << delta_0 << " delta_1=" << delta_1
<< " num_categories=" << num_categories << " selected_category=" << selected_category
<< " prob_0_becomes_1=" << prob_0_becomes_1 << " prob_1_stays_1=" << prob_1_stays_1;
}
}
}
// Generates a Basic RAPPOR encoding of a null observation 1000 times and then
// performs Pearson's chi-squared test on each bit separately to check for
// goodness of fit to a binomial distribution with the appropriate parameter.
// Fails if chi-squared >= |chi_squared_threshold|.
//
// Uses DeterministicRandom in order to ensure reproducibility.
//
// The true value of each bit (before encoding) is 0, so the expected number
// of 1's in each bit position is prob_0_becomes_1.
static void DoChiSquaredTestForNullObs(float prob_0_becomes_1, float prob_1_stays_1,
int num_categories, double chi_squared_threshold) {
// Build the encoder.
// Since no bits of the true bit vector are 1, the probability that 1 stays
// 1 is irrelevant.
auto encoder = BuildEncoder(prob_0_becomes_1, prob_1_stays_1, num_categories);
// Sample 1000 observations of the selected category and collect the bit
// counts
static const int kNumTrials = 1000;
BasicRapporObservation null_obs;
std::vector<int> counts(num_categories, 0);
for (size_t i = 0; i < kNumTrials; i++) {
null_obs.Clear();
EXPECT_EQ(kOK, encoder->EncodeNullObservation(&null_obs));
for (int bit_index = 0; bit_index < num_categories; bit_index++) {
if (IsSet(null_obs.data(), bit_index)) {
counts[bit_index]++;
}
}
}
// In the special case where prob_0_becomes_1 is 0 make sure that we got
// 1000 0's.
for (int k = 0; k < num_categories; k++) {
if (prob_0_becomes_1 == 0.0) {
EXPECT_EQ(0, counts[k]);
}
}
// This is the expected number of ones and zeroes for each bit position.
const double expected_1 = static_cast<double>(kNumTrials) * prob_0_becomes_1;
const double expected_0 = static_cast<double>(kNumTrials) - expected_1;
// For each of the bit positions, perform the chi-squared test.
for (int bit_index = 0; bit_index < num_categories; bit_index++) {
if (expected_0 != 0.0 && expected_1 != 0.0) {
// Difference between actual 1 count and expected 1 count.
double delta_1 = static_cast<double>(counts[bit_index]) - expected_1;
// Difference between actual 0 count and expected 0 count.
double delta_0 = static_cast<double>(kNumTrials - counts[bit_index]) - expected_0;
// Compute and check the Chi-Squared value.
double chi_squared = delta_1 * delta_1 / expected_1 + delta_0 * delta_0 / expected_0;
EXPECT_TRUE(chi_squared < chi_squared_threshold)
<< "chi_squared=" << chi_squared << " chi_squared_threshold=" << chi_squared_threshold
<< " bit_index=" << bit_index << " delta_0=" << delta_0 << " delta_1=" << delta_1
<< " num_categories=" << num_categories << " prob_0_becomes_1=" << prob_0_becomes_1;
}
}
}
};
TEST_F(BasicRapporDeterministicTest, ChiSquaredTest) {
// Perform the chi-squared test for various numbers of categories and
// various selected categories, as well as for null observations without
// selected categories. This gets combinatorially explosive so to keep the
// testing time reasonable we don't test every combination but rather step
// through the num_categories by 7 and use at most 3 selected categories for
// each num_categories.
//
// The last parameter to DoChiSquaredTest*() is the chi-squared value to use.
// Notice that these values were chosen by experimentation to be as small as
// possible so that the test passes. They do not necessarily correspond to
// natural confidence intervals for the chi-squared test.
for (int num_categories = 2; num_categories < 40; num_categories += 7) {
for (int selected_category = 0; selected_category < num_categories;
selected_category += (num_categories / 3 + 1)) {
// The first parameter is prob_0_becomes_1, and the second parameter is
// prob_1_stays_1.
DoChiSquaredTest(0.01, 0.99, num_categories, selected_category, 8.2);
DoChiSquaredTest(0.1, 0.9, num_categories, selected_category, 9.4);
DoChiSquaredTest(0.2, 0.8, num_categories, selected_category, 11.1);
DoChiSquaredTest(0.25, 0.75, num_categories, selected_category, 11.8);
DoChiSquaredTest(0.3, 0.7, num_categories, selected_category, 11.8);
}
// The first parameter is prob_0_becomes_1, and the second parameter is
// prob_1_stays_1.
DoChiSquaredTestForNullObs(0.0, 1.0, num_categories, 0.0);
DoChiSquaredTestForNullObs(0.01, 0.99, num_categories, 8.2);
DoChiSquaredTestForNullObs(0.1, 0.9, num_categories, 9.4);
DoChiSquaredTestForNullObs(0.2, 0.8, num_categories, 11.1);
DoChiSquaredTestForNullObs(0.25, 0.75, num_categories, 11.8);
DoChiSquaredTestForNullObs(0.3, 0.7, num_categories, 11.8);
}
}
// Test that BasicRapporEncoder::Encode() returns kInvalidArgument if a category
// name is used that is not one of the registered categories.
TEST(BasicRapporEncoderTest, BadCategory) {
// Configure Basic RAPPOR with two categories, "dog" and "cat".
BasicRapporConfig config;
config.prob_0_becomes_1 = 0.3;
config.prob_1_stays_1 = 0.7;
config.categories.set_strings({"dog", "cat"});
// Construct a BasicRapporEncoder.
static const std::string kClientSecretToken = ClientSecret::GenerateNewSecret().GetToken();
BasicRapporEncoder encoder(config, ClientSecret::FromToken(kClientSecretToken));
// Attempt to encode a string that is not one of the categories. Expect
// to receive kInvalidInput.
BasicRapporObservation obs;
ValuePart value;
value.set_string_value("fish");
EXPECT_EQ(kInvalidInput, encoder.Encode(value, &obs));
}
// Tests that BasicRapporEncoder::Encode() correctly handles values of type
// INDEX
TEST(BasicRapporEncoderTest, EncodeIndex) {
// Configure Basic RAPPOR with 5 indexed categories.
// We use no randomness so we can check that the correct bit is being set.
BasicRapporConfig config;
config.prob_0_becomes_1 = 0.0;
config.prob_1_stays_1 = 1.0;
config.categories.set_indexed(5);
// Construct a BasicRapporEncoder.
static const std::string kClientSecretToken = ClientSecret::GenerateNewSecret().GetToken();
BasicRapporEncoder encoder(config, ClientSecret::FromToken(kClientSecretToken));
BasicRapporObservation obs;
// Validate that category indices 0, 1 and 4 yield the correct data.
ValuePart value;
value.set_index_value(0);
EXPECT_EQ(kOK, encoder.Encode(value, &obs));
EXPECT_EQ("00000001", DataToBinaryString(obs.data()));
obs.Clear();
value.set_index_value(1);
EXPECT_EQ(kOK, encoder.Encode(value, &obs));
EXPECT_EQ("00000010", DataToBinaryString(obs.data()));
obs.Clear();
value.set_index_value(4);
EXPECT_EQ(kOK, encoder.Encode(value, &obs));
EXPECT_EQ("00010000", DataToBinaryString(obs.data()));
obs.Clear();
// Validate that category index 5 yields kInvalidInput.
value.set_index_value(5);
EXPECT_EQ(kInvalidInput, encoder.Encode(value, &obs));
}
} // namespace cobalt::rappor