blob: 47b71edec65bbc57529b0d5533d42c9874396ae4 [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "encoder/envelope_maker.h"
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <utility>
#include "encoder/client_secret.h"
#include "encoder/encoder.h"
#include "encoder/project_context.h"
#include "third_party/googletest/googletest/include/gtest/gtest.h"
#include "util/encrypted_message_util.h"
namespace cobalt {
namespace encoder {
using config::EncodingRegistry;
using config::MetricRegistry;
namespace {
static const uint32_t kCustomerId = 1;
static const uint32_t kProjectId = 1;
static const char* kAnalyzerPublicKey = "analyzer-public-key";
static const char* kShufflerPublicKey = "shuffler-public-key";
// This unix timestamp corresponds to Friday Dec 2, 2016 in UTC
// and Thursday Dec 1, 2016 in Pacific time.
const time_t kSomeTimestamp = 1480647356;
// This is the day index for Friday Dec 2, 2016
const uint32_t kUtcDayIndex = 17137;
const char* kMetricConfigText = R"(
# Metric 1 has one string part.
element {
customer_id: 1
project_id: 1
id: 1
time_zone_policy: UTC
parts {
key: "Part1"
value {
}
}
}
# Metric 2 has one string part.
element {
customer_id: 1
project_id: 1
id: 2
time_zone_policy: UTC
parts {
key: "Part1"
value {
}
}
}
# Metric 3 has one string part.
element {
customer_id: 1
project_id: 1
id: 3
time_zone_policy: UTC
parts {
key: "Part1"
value {
}
}
}
)";
const char* kEncodingConfigText = R"(
# EncodingConfig 1 is Forculus.
element {
customer_id: 1
project_id: 1
id: 1
forculus {
threshold: 20
}
}
# EncodingConfig 2 is Basic RAPPOR with string categories.
element {
customer_id: 1
project_id: 1
id: 2
basic_rappor {
prob_0_becomes_1: 0.25
prob_1_stays_1: 0.75
string_categories: {
category: "Apple"
category: "Banana"
category: "Cantaloupe"
}
}
}
# EncodingConfig 3 is NoOp.
element {
customer_id: 1
project_id: 1
id: 3
no_op_encoding {
}
}
)";
// Returns a ProjectContext obtained by parsing the above configuration
// text strings.
std::shared_ptr<ProjectContext> GetTestProject() {
// Parse the metric config string
auto metric_parse_result =
MetricRegistry::FromString(kMetricConfigText, nullptr);
EXPECT_EQ(config::kOK, metric_parse_result.second);
std::shared_ptr<MetricRegistry> metric_registry(
metric_parse_result.first.release());
// Parse the encoding config string
auto encoding_parse_result =
EncodingRegistry::FromString(kEncodingConfigText, nullptr);
EXPECT_EQ(config::kOK, encoding_parse_result.second);
std::shared_ptr<EncodingRegistry> encoding_registry(
encoding_parse_result.first.release());
return std::shared_ptr<ProjectContext>(new ProjectContext(
kCustomerId, kProjectId, metric_registry, encoding_registry));
}
} // namespace
class EnvelopeMakerTest : public ::testing::Test {
public:
EnvelopeMakerTest()
: envelope_maker_(
new EnvelopeMaker(kAnalyzerPublicKey, EncryptedMessage::NONE,
kShufflerPublicKey, EncryptedMessage::NONE)),
project_(GetTestProject()),
encoder_(project_, ClientSecret::GenerateNewSecret()) {
// Set a static current time so we can test the day_index computation.
encoder_.set_current_time(kSomeTimestamp);
}
// Returns the current value of envelope_maker_ and resets envelope_maker_
// to a new EnvelopeMaker constructed using the given optional arguments.
std::unique_ptr<EnvelopeMaker> ResetEnvelopeMaker(
size_t max_bytes_each_observation = SIZE_MAX,
size_t max_num_bytes = SIZE_MAX) {
std::unique_ptr<EnvelopeMaker> return_val = std::move(envelope_maker_);
envelope_maker_.reset(new EnvelopeMaker(
kAnalyzerPublicKey, EncryptedMessage::NONE, kShufflerPublicKey,
EncryptedMessage::NONE, max_bytes_each_observation, max_num_bytes));
return return_val;
}
// The metric is expected to have a single string part named "Part1" and
// to use the UTC timezone.
// expected_size_change: What is the expected change in the size of the
// envelope in bytes due to the AddObservation()?
void AddStringObservation(std::string value, uint32_t metric_id,
uint32_t encoding_config_id,
int expected_num_batches,
size_t expected_this_batch_index,
int expected_this_batch_size,
size_t expected_size_change,
EnvelopeMaker::AddStatus expected_status) {
// Encode an Observation
Encoder::Result result =
encoder_.EncodeString(metric_id, encoding_config_id, value);
ASSERT_EQ(Encoder::kOK, result.status);
ASSERT_NE(nullptr, result.observation);
ASSERT_NE(nullptr, result.metadata);
// Add the Observation to the EnvelopeMaker
size_t size_before_add = envelope_maker_->size();
ASSERT_EQ(expected_status,
envelope_maker_->AddObservation(*result.observation,
std::move(result.metadata)));
size_t size_after_add = envelope_maker_->size();
EXPECT_EQ(expected_size_change, size_after_add - size_before_add) << value;
// Check the number of batches currently in the envelope.
ASSERT_EQ(expected_num_batches, envelope_maker_->envelope().batch_size());
if (expected_status != EnvelopeMaker::kOk) {
return;
}
// Check the ObservationMetadata of the expected batch.
const auto& batch =
envelope_maker_->envelope().batch(expected_this_batch_index);
const auto& metadata = batch.meta_data();
EXPECT_EQ(kCustomerId, metadata.customer_id());
EXPECT_EQ(kProjectId, metadata.project_id());
EXPECT_EQ(metric_id, metadata.metric_id());
EXPECT_EQ(kUtcDayIndex, metadata.day_index());
// Check the size of the expected batch.
ASSERT_EQ(expected_this_batch_size, batch.encrypted_observation_size())
<< "batch_index=" << expected_this_batch_index
<< "; metric_id=" << metric_id;
// Deserialize the most recently added observation from the
// expected batch.
EXPECT_EQ(
EncryptedMessage::NONE,
batch.encrypted_observation(expected_this_batch_size - 1).scheme());
std::string serialized_observation =
batch.encrypted_observation(expected_this_batch_size - 1).ciphertext();
Observation recovered_observation;
ASSERT_TRUE(recovered_observation.ParseFromString(serialized_observation));
// Check that it looks right.
ASSERT_EQ(1u, recovered_observation.parts().size());
auto iter = recovered_observation.parts().find("Part1");
ASSERT_TRUE(iter != recovered_observation.parts().cend());
const auto& part = iter->second;
ASSERT_EQ(encoding_config_id, part.encoding_config_id());
}
// Adds multiple string observations to the EnvelopeMaker for the given
// metric_id and for encoding_config_id=3, the NoOp encoding. The string
// values will be "value<i>" for i in [first, limit).
// expected_num_batches: How many batches do we expecte the EnvelopeMaker to
// contain after the first add.
// expected_this_batch_index: Which batch index do we expect this add to
// have gone into.
// expected_this_batch_size: What is the expected size of the current batch
// *before* the first add.
void AddManyStringsNoOp(int first, int limit, uint32_t metric_id,
int expected_num_batches,
size_t expected_this_batch_index,
int expected_this_batch_size) {
static const uint32_t kEncodingConfigId = 3;
for (int i = first; i < limit; i++) {
std::ostringstream stream;
stream << "value " << i;
// NOTE(rudominer) The values of expected_observation_num_bytes for
// the NoOp encodings in this test are obtained from experimentation
// rather than calculation. Notice that there seems to be an overhead
// of 20 bytes in addition to the bytes required to store the string
// value. Each Observation also needs to store the name of the
// MetricPart ("Part 1" in our case) as well as the fact that the NoOp
// encoding is being used and the fact that the datatype is string.
size_t expected_observation_num_bytes = (i >= 10 ? 28 : 27);
expected_this_batch_size++;
AddStringObservation(stream.str(), metric_id, kEncodingConfigId,
expected_num_batches, expected_this_batch_index,
expected_this_batch_size,
expected_observation_num_bytes, EnvelopeMaker::kOk);
}
}
// Adds multiple encoded Observations to two different metrics. Test that
// the EnvelopeMaker behaves correctly.
void DoTest() {
// Add two observations for metric 1
size_t expected_num_batches = 1;
size_t expected_this_batch_index = 0;
size_t expected_this_batch_size = 1;
// NOTE(rudominer) The values of expected_observation_num_bytes for
// the Forculus and Basic RAPPOR encodings in this test are obtained from
// experimentation rather than calculation. We are therefore not testing
// that the values are correct but rather testing that there is no
// regression in the size() functionality. Also just eybealling the numbers
// serves as a sanity test. Notice that the Forculus Observations are
// rather large compared to the Basic RAPPOR observations with 3 categoreis.
size_t expected_observation_num_bytes = 111;
AddStringObservation("a value", 1, 1, expected_num_batches,
expected_this_batch_index, expected_this_batch_size,
expected_observation_num_bytes, EnvelopeMaker::kOk);
expected_this_batch_size = 2;
expected_observation_num_bytes = 19;
AddStringObservation("Apple", 1, 2, expected_num_batches,
expected_this_batch_index, expected_this_batch_size,
expected_observation_num_bytes, EnvelopeMaker::kOk);
// Add two observations for metric 2
expected_num_batches = 2;
expected_this_batch_index = 1;
expected_this_batch_size = 1;
expected_observation_num_bytes = 112;
AddStringObservation("a value2", 2, 1, expected_num_batches,
expected_this_batch_index, expected_this_batch_size,
expected_observation_num_bytes, EnvelopeMaker::kOk);
expected_this_batch_size = 2;
expected_observation_num_bytes = 19;
AddStringObservation("Banana", 2, 2, expected_num_batches,
expected_this_batch_index, expected_this_batch_size,
expected_observation_num_bytes, EnvelopeMaker::kOk);
// Add two more observations for metric 1
expected_this_batch_index = 0;
expected_this_batch_size = 3;
expected_observation_num_bytes = 112;
AddStringObservation("a value3", 1, 1, expected_num_batches,
expected_this_batch_index, expected_this_batch_size,
expected_observation_num_bytes, EnvelopeMaker::kOk);
expected_this_batch_size = 4;
expected_observation_num_bytes = 19;
AddStringObservation("Banana", 1, 2, expected_num_batches,
expected_this_batch_index, expected_this_batch_size,
expected_observation_num_bytes, EnvelopeMaker::kOk);
// Add two more observations for metric 2
expected_this_batch_index = 1;
expected_this_batch_size = 3;
expected_observation_num_bytes = 113;
AddStringObservation("a value40", 2, 1, expected_num_batches,
expected_this_batch_index, expected_this_batch_size,
expected_observation_num_bytes, EnvelopeMaker::kOk);
expected_this_batch_size = 4;
expected_observation_num_bytes = 19;
AddStringObservation("Cantaloupe", 2, 2, expected_num_batches,
expected_this_batch_index, expected_this_batch_size,
expected_observation_num_bytes, EnvelopeMaker::kOk);
// Make the encrypted Envelope.
EncryptedMessage encrypted_message;
EXPECT_TRUE(envelope_maker_->MakeEncryptedEnvelope(&encrypted_message));
// Decrypt encrypted_message. (No actual decryption is involved since
// we used the NONE encryption scheme.)
util::MessageDecrypter decrypter("");
Envelope recovered_envelope;
EXPECT_TRUE(
decrypter.DecryptMessage(encrypted_message, &recovered_envelope));
// Check that it looks right.
EXPECT_EQ(2, recovered_envelope.batch_size());
for (size_t i = 0; i < 2; i++) {
EXPECT_EQ(i + 1, recovered_envelope.batch(i).meta_data().metric_id());
EXPECT_EQ(4, recovered_envelope.batch(i).encrypted_observation_size());
}
}
protected:
std::unique_ptr<EnvelopeMaker> envelope_maker_;
std::shared_ptr<ProjectContext> project_;
Encoder encoder_;
};
// We perform DoTest() three times with a Clear() between each turn.
// This last tests that Clear() works correctly.
TEST_F(EnvelopeMakerTest, TestAll) {
for (int i = 0; i < 3; i++) {
DoTest();
envelope_maker_->Clear();
}
}
// Tests the MergeOutOf() method.
TEST_F(EnvelopeMakerTest, MergeOutOf) {
// Add metric 1 batch to EnvelopeMaker 1 with strings 0..9
uint32_t metric_id = 1;
int expected_num_batches = 1;
size_t expected_this_batch_index = 0;
int expected_this_batch_size = 0;
AddManyStringsNoOp(0, 10, metric_id, expected_num_batches,
expected_this_batch_index, expected_this_batch_size);
// Add metric 2 batch to EnvelopeMaker 1 with strings 0..9
metric_id = 2;
expected_num_batches = 2;
expected_this_batch_index = 1;
AddManyStringsNoOp(0, 10, metric_id, expected_num_batches,
expected_this_batch_index, expected_this_batch_size);
// Take EnvelopeMaker 1 and create EnvelopeMaker 2.
auto envelope_maker1 = ResetEnvelopeMaker();
// Add metric 2 batch to EnvelopeMaker 2 with strings 10..19
metric_id = 2;
expected_num_batches = 1;
expected_this_batch_index = 0;
AddManyStringsNoOp(10, 20, metric_id, expected_num_batches,
expected_this_batch_index, expected_this_batch_size);
// Add metric 3 to EnvelopeMaker 2 with strings 0..9
metric_id = 3;
expected_num_batches = 2;
expected_this_batch_index = 1;
AddManyStringsNoOp(0, 10, metric_id, expected_num_batches,
expected_this_batch_index, expected_this_batch_size);
// Take EnvelopeMaker 2,
auto envelope_maker2 = ResetEnvelopeMaker();
// Now invoke MergeOutOf to merge EnvelopeMaker 2 into EnvelopeMaker 1.
envelope_maker1->MergeOutOf(envelope_maker2.get());
// EnvelopeMaker 2 should be empty.
EXPECT_TRUE(envelope_maker2->Empty());
// EnvelopeMaker 1 should have three batches for Metrics 1, 2, 3
EXPECT_FALSE(envelope_maker1->Empty());
ASSERT_EQ(3, envelope_maker1->envelope().batch_size());
// Iterate through each of the batches and check it.
for (uint index = 0; index < 3; index++) {
// Batch 0 and 2 should have 10 encrypted observations and batch
// 1 should have 20 because batch 1 from EnvelopeMaker 2 was merged
// into batch 1 of EnvelopeMaker 1.
auto& batch = envelope_maker1->envelope().batch(index);
EXPECT_EQ(index + 1, batch.meta_data().metric_id());
auto expected_num_observations = (index == 1 ? 20 : 10);
ASSERT_EQ(expected_num_observations, batch.encrypted_observation_size());
// Check each one of the observations.
for (int i = 0; i < expected_num_observations; i++) {
// Extract the serialized observation.
auto& encrypted_message = batch.encrypted_observation(i);
EXPECT_EQ(EncryptedMessage::NONE, encrypted_message.scheme());
std::string serialized_observation = encrypted_message.ciphertext();
Observation recovered_observation;
ASSERT_TRUE(
recovered_observation.ParseFromString(serialized_observation));
// Check that it looks right.
ASSERT_EQ(1u, recovered_observation.parts().size());
auto iter = recovered_observation.parts().find("Part1");
ASSERT_TRUE(iter != recovered_observation.parts().cend());
const auto& part = iter->second;
ASSERT_EQ(3u, part.encoding_config_id());
ASSERT_TRUE(part.has_unencoded());
// Check the string values. Batches 0 and 2 are straightforward. The
// values should be {"value 0", "value 1", .. "value 9"}. But
// batch 1 is more complicated. Because of the way merge is implemented
// we expect to see:
// {"value 0", "value 1", .. "value 9", "value 19",
// "value 18", ... "value 10"}
// This is because when we merged batch 1 of Envelope 2 into batch
// 1 of Envelope 1 we reversed the order of the observations in
// Ennvelope 2.
std::ostringstream stream;
int expected_value_index = i;
if (index == 1 && i >= 10) {
expected_value_index = 29 - i;
}
stream << "value " << expected_value_index;
auto expected_string_value = stream.str();
EXPECT_EQ(expected_string_value,
part.unencoded().unencoded_value().string_value());
}
}
// Now we want to test that after the MergeOutOf() operation the EnvelopeMaker
// is still usable. Put EnvelopeMaker 1 back as the test EnvelopeMaker.
envelope_maker_ = std::move(envelope_maker1);
// Add string observations 10..19 to metric ID 1 batches 1, 2 and 3.
for (int metric_id = 1; metric_id <= 3; metric_id++) {
expected_num_batches = 3;
expected_this_batch_index = metric_id - 1;
expected_this_batch_size = (metric_id == 2 ? 20 : 10);
AddManyStringsNoOp(10, 20, metric_id, expected_num_batches,
expected_this_batch_index, expected_this_batch_size);
}
}
// Tests that EnvelopeMaker returns kObservationTooBig when it is supposed to.
TEST_F(EnvelopeMakerTest, ObservationTooBig) {
static const uint32_t kMetricId = 1;
static const uint32_t kEncodingConfigId = 3; // NoOp encoding.
// Set max_bytes_each_observation = 100.
ResetEnvelopeMaker(100);
// Build an input string of length 75 bytes.
std::string value("x", 75);
// From experimentation we know that the overhead of the NoOp encoding
// is 20 bytes so we expect the Observation size to be 95.
size_t expected_observation_num_bytes = 95;
// Invoke AddStringObservation() and expect kOk
int expected_num_batches = 1;
size_t expected_this_batch_index = 0;
int expected_this_batch_size = 1;
AddStringObservation(value, kMetricId, kEncodingConfigId,
expected_num_batches, expected_this_batch_index,
expected_this_batch_size, expected_observation_num_bytes,
EnvelopeMaker::kOk);
// Build an input string of length 101 bytes.
value = std::string("x", 101);
// We expect the Observation to not be added to the Envelope and so for
// the Envelope size to not change.
expected_observation_num_bytes = 0;
// Invoke AddStringObservation() and expect kObservationTooBig
AddStringObservation(value, kMetricId, kEncodingConfigId,
expected_num_batches, expected_this_batch_index,
expected_this_batch_size, expected_observation_num_bytes,
EnvelopeMaker::kObservationTooBig);
// Build an input string of length 75 bytes again.
value = std::string("x", 75);
expected_observation_num_bytes = 95;
expected_this_batch_size = 2;
// Invoke AddStringObservation() and expect kOk.
AddStringObservation(value, kMetricId, kEncodingConfigId,
expected_num_batches, expected_this_batch_index,
expected_this_batch_size, expected_observation_num_bytes,
EnvelopeMaker::kOk);
}
// Tests that EnvelopeMaker returns kEnvelopeFull when it is supposed to.
TEST_F(EnvelopeMakerTest, EnvelopeFull) {
static const uint32_t kMetricId = 1;
static const uint32_t kEncodingConfigId = 3; // NoOp encoding.
// Set max_bytes_each_observation = 100, max_num_bytes=1000.
ResetEnvelopeMaker(100, 1000);
int expected_this_batch_size = 1;
int expected_num_batches = 1;
size_t expected_this_batch_index = 0;
for (int i = 0; i < 19; i++) {
// Build an input string of length 30 bytes.
std::string value("x", 30);
// From experimentation we know that the overhead of the NoOp encoding
// is 20 bytes so we expect the Observation size to be 50.
size_t expected_observation_num_bytes = 50;
// Invoke AddStringObservation() and expect kOk
AddStringObservation(value, kMetricId, kEncodingConfigId,
expected_num_batches, expected_this_batch_index,
expected_this_batch_size++,
expected_observation_num_bytes, EnvelopeMaker::kOk);
}
EXPECT_EQ(950u, envelope_maker_->size());
// If we try to add an observation of more than 100 bytes we should
// get kObservationTooBig.
std::string value("x", 101);
// We expect the Observation to not be added to the Envelope and so for
// the Envelope size to not change.
size_t expected_observation_num_bytes = 0;
AddStringObservation(
value, kMetricId, kEncodingConfigId, expected_num_batches,
expected_this_batch_index, expected_this_batch_size++,
expected_observation_num_bytes, EnvelopeMaker::kObservationTooBig);
// If we try to add an observation of 75 bytes we should
// get kEnvelopeFull
value = std::string("x", 75);
AddStringObservation(
value, kMetricId, kEncodingConfigId, expected_num_batches,
expected_this_batch_index, expected_this_batch_size++,
expected_observation_num_bytes, EnvelopeMaker::kEnvelopeFull);
}
} // namespace encoder
} // namespace cobalt