| // Copyright 2016 The Fuchsia Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "encoder/envelope_maker.h" |
| |
| #include <gflags/gflags.h> |
| #include <glog/logging.h> |
| #include <utility> |
| |
| #include "encoder/client_secret.h" |
| #include "encoder/encoder.h" |
| #include "encoder/project_context.h" |
| #include "third_party/googletest/googletest/include/gtest/gtest.h" |
| #include "util/encrypted_message_util.h" |
| |
| namespace cobalt { |
| namespace encoder { |
| |
| using config::EncodingRegistry; |
| using config::MetricRegistry; |
| |
| namespace { |
| |
| static const uint32_t kCustomerId = 1; |
| static const uint32_t kProjectId = 1; |
| static const char* kAnalyzerPublicKey = "analyzer-public-key"; |
| static const char* kShufflerPublicKey = "shuffler-public-key"; |
| |
| // This unix timestamp corresponds to Friday Dec 2, 2016 in UTC |
| // and Thursday Dec 1, 2016 in Pacific time. |
| const time_t kSomeTimestamp = 1480647356; |
| // This is the day index for Friday Dec 2, 2016 |
| const uint32_t kUtcDayIndex = 17137; |
| |
| const char* kMetricConfigText = R"( |
| # Metric 1 has one string part. |
| element { |
| customer_id: 1 |
| project_id: 1 |
| id: 1 |
| time_zone_policy: UTC |
| parts { |
| key: "Part1" |
| value { |
| } |
| } |
| } |
| |
| # Metric 2 has one string part. |
| element { |
| customer_id: 1 |
| project_id: 1 |
| id: 2 |
| time_zone_policy: UTC |
| parts { |
| key: "Part1" |
| value { |
| } |
| } |
| } |
| |
| # Metric 3 has one string part. |
| element { |
| customer_id: 1 |
| project_id: 1 |
| id: 3 |
| time_zone_policy: UTC |
| parts { |
| key: "Part1" |
| value { |
| } |
| } |
| } |
| )"; |
| |
| const char* kEncodingConfigText = R"( |
| # EncodingConfig 1 is Forculus. |
| element { |
| customer_id: 1 |
| project_id: 1 |
| id: 1 |
| forculus { |
| threshold: 20 |
| } |
| } |
| |
| # EncodingConfig 2 is Basic RAPPOR with string categories. |
| element { |
| customer_id: 1 |
| project_id: 1 |
| id: 2 |
| basic_rappor { |
| prob_0_becomes_1: 0.25 |
| prob_1_stays_1: 0.75 |
| string_categories: { |
| category: "Apple" |
| category: "Banana" |
| category: "Cantaloupe" |
| } |
| } |
| } |
| |
| # EncodingConfig 3 is NoOp. |
| element { |
| customer_id: 1 |
| project_id: 1 |
| id: 3 |
| no_op_encoding { |
| } |
| } |
| |
| )"; |
| |
| // Returns a ProjectContext obtained by parsing the above configuration |
| // text strings. |
| std::shared_ptr<ProjectContext> GetTestProject() { |
| // Parse the metric config string |
| auto metric_parse_result = |
| MetricRegistry::FromString(kMetricConfigText, nullptr); |
| EXPECT_EQ(config::kOK, metric_parse_result.second); |
| std::shared_ptr<MetricRegistry> metric_registry( |
| metric_parse_result.first.release()); |
| |
| // Parse the encoding config string |
| auto encoding_parse_result = |
| EncodingRegistry::FromString(kEncodingConfigText, nullptr); |
| EXPECT_EQ(config::kOK, encoding_parse_result.second); |
| std::shared_ptr<EncodingRegistry> encoding_registry( |
| encoding_parse_result.first.release()); |
| |
| return std::shared_ptr<ProjectContext>(new ProjectContext( |
| kCustomerId, kProjectId, metric_registry, encoding_registry)); |
| } |
| |
| } // namespace |
| |
| class EnvelopeMakerTest : public ::testing::Test { |
| public: |
| EnvelopeMakerTest() |
| : envelope_maker_( |
| new EnvelopeMaker(kAnalyzerPublicKey, EncryptedMessage::NONE, |
| kShufflerPublicKey, EncryptedMessage::NONE)), |
| project_(GetTestProject()), |
| encoder_(project_, ClientSecret::GenerateNewSecret()) { |
| // Set a static current time so we can test the day_index computation. |
| encoder_.set_current_time(kSomeTimestamp); |
| } |
| |
| // Returns the current value of envelope_maker_ and resets envelope_maker_. |
| std::unique_ptr<EnvelopeMaker> ResetEnvelopeMaker() { |
| std::unique_ptr<EnvelopeMaker> return_val = std::move(envelope_maker_); |
| envelope_maker_.reset( |
| new EnvelopeMaker(kAnalyzerPublicKey, EncryptedMessage::NONE, |
| kShufflerPublicKey, EncryptedMessage::NONE)); |
| return return_val; |
| } |
| |
| // The metric is expected to have a single string part named "Part1" and |
| // to use the UTC timezone. |
| void AddStringObservation(std::string value, uint32_t metric_id, |
| uint32_t encoding_config_id, |
| int expected_num_batches, |
| size_t expected_this_batch_index, |
| int expected_this_batch_size) { |
| // Encode an Observation |
| Encoder::Result result = |
| encoder_.EncodeString(metric_id, encoding_config_id, value); |
| ASSERT_EQ(Encoder::kOK, result.status); |
| ASSERT_NE(nullptr, result.observation); |
| ASSERT_NE(nullptr, result.metadata); |
| |
| // Add the Observation to the EnvelopeMaker |
| envelope_maker_->AddObservation(*result.observation, |
| std::move(result.metadata)); |
| |
| // Check the number of batches currently in the envelope. |
| ASSERT_EQ(expected_num_batches, envelope_maker_->envelope().batch_size()); |
| |
| // Check the ObservationMetadata of the expected batch. |
| const auto& batch = |
| envelope_maker_->envelope().batch(expected_this_batch_index); |
| const auto& metadata = batch.meta_data(); |
| EXPECT_EQ(kCustomerId, metadata.customer_id()); |
| EXPECT_EQ(kProjectId, metadata.project_id()); |
| EXPECT_EQ(metric_id, metadata.metric_id()); |
| EXPECT_EQ(kUtcDayIndex, metadata.day_index()); |
| |
| // Check the size of the expected batch. |
| ASSERT_EQ(expected_this_batch_size, batch.encrypted_observation_size()) |
| << "batch_index=" << expected_this_batch_index |
| << "; metric_id=" << metric_id; |
| |
| // Deserialize the most recently added observation from the |
| // expected batch. |
| EXPECT_EQ( |
| EncryptedMessage::NONE, |
| batch.encrypted_observation(expected_this_batch_size - 1).scheme()); |
| std::string serialized_observation = |
| batch.encrypted_observation(expected_this_batch_size - 1).ciphertext(); |
| Observation recovered_observation; |
| ASSERT_TRUE(recovered_observation.ParseFromString(serialized_observation)); |
| // Check that it looks right. |
| ASSERT_EQ(1u, recovered_observation.parts().size()); |
| auto iter = recovered_observation.parts().find("Part1"); |
| ASSERT_TRUE(iter != recovered_observation.parts().cend()); |
| const auto& part = iter->second; |
| ASSERT_EQ(encoding_config_id, part.encoding_config_id()); |
| } |
| |
| // Adds multiple string observations to the EnvelopeMaker for the given |
| // metric_id and for encoding_config_id=3, the NoOp encoding. The string |
| // values will be "value<i>" for i in [first, limit). |
| // expected_num_batches: How many batches do we expecte the EnvelopeMaker to |
| // contain after the first add. |
| // expected_this_batch_index: Which batch index do we expect this add to |
| // have gone into. |
| // expected_this_batch_size: What is the expected size of the current batch |
| // *before* the first add. |
| void AddManyStringsNoOp(int first, int limit, uint32_t metric_id, |
| int expected_num_batches, |
| size_t expected_this_batch_index, |
| int expected_this_batch_size) { |
| static const uint32_t kEncodingConfigId = 3; |
| for (int i = first; i < limit; i++) { |
| std::ostringstream stream; |
| stream << "value " << i; |
| expected_this_batch_size++; |
| AddStringObservation(stream.str(), metric_id, kEncodingConfigId, |
| expected_num_batches, expected_this_batch_index, |
| expected_this_batch_size); |
| } |
| } |
| |
| // Adds multiple encoded Observations to two different metrics. Test that |
| // the EnvelopeMaker behaves correctly. |
| void DoTest() { |
| // Add two observations for metric 1 |
| size_t expected_num_batches = 1; |
| size_t expected_this_batch_index = 0; |
| size_t expected_this_batch_size = 1; |
| AddStringObservation("a value", 1, 1, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| expected_this_batch_size = 2; |
| AddStringObservation("Apple", 1, 2, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| |
| // Add two observations for metric 2 |
| expected_num_batches = 2; |
| expected_this_batch_index = 1; |
| expected_this_batch_size = 1; |
| AddStringObservation("a value", 2, 1, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| expected_this_batch_size = 2; |
| AddStringObservation("Banana", 2, 2, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| |
| // Add two more observations for metric 1 |
| expected_this_batch_index = 0; |
| expected_this_batch_size = 3; |
| AddStringObservation("a value", 1, 1, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| expected_this_batch_size = 4; |
| AddStringObservation("Banana", 1, 2, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| |
| // Add two more observations for metric 2 |
| expected_this_batch_index = 1; |
| expected_this_batch_size = 3; |
| AddStringObservation("a value", 2, 1, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| expected_this_batch_size = 4; |
| AddStringObservation("Cantaloupe", 2, 2, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| |
| // Make the encrypted Envelope. |
| EncryptedMessage encrypted_message; |
| EXPECT_TRUE(envelope_maker_->MakeEncryptedEnvelope(&encrypted_message)); |
| |
| // Decrypt encrypted_message. (No actual decryption is involved since |
| // we used the NONE encryption scheme.) |
| util::MessageDecrypter decrypter(""); |
| Envelope recovered_envelope; |
| EXPECT_TRUE( |
| decrypter.DecryptMessage(encrypted_message, &recovered_envelope)); |
| |
| // Check that it looks right. |
| EXPECT_EQ(2, recovered_envelope.batch_size()); |
| for (size_t i = 0; i < 2; i++) { |
| EXPECT_EQ(i + 1, recovered_envelope.batch(i).meta_data().metric_id()); |
| EXPECT_EQ(4, recovered_envelope.batch(i).encrypted_observation_size()); |
| } |
| } |
| |
| protected: |
| std::unique_ptr<EnvelopeMaker> envelope_maker_; |
| std::shared_ptr<ProjectContext> project_; |
| Encoder encoder_; |
| }; |
| |
| // We perform DoTest() three times with a Clear() between each turn. |
| // This last tests that Clear() works correctly. |
| TEST_F(EnvelopeMakerTest, TestAll) { |
| for (int i = 0; i < 3; i++) { |
| DoTest(); |
| envelope_maker_->Clear(); |
| } |
| } |
| |
| // Tests the MergeOutOf() method. |
| TEST_F(EnvelopeMakerTest, MergeOutOf) { |
| // Add metric 1 batch to EnvelopeMaker 1 with strings 0..9 |
| uint32_t metric_id = 1; |
| int expected_num_batches = 1; |
| size_t expected_this_batch_index = 0; |
| int expected_this_batch_size = 0; |
| AddManyStringsNoOp(0, 10, metric_id, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| |
| // Add metric 2 batch to EnvelopeMaker 1 with strings 0..9 |
| metric_id = 2; |
| expected_num_batches = 2; |
| expected_this_batch_index = 1; |
| AddManyStringsNoOp(0, 10, metric_id, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| |
| // Take EnvelopeMaker 1 and create EnvelopeMaker 2. |
| auto envelope_maker1 = ResetEnvelopeMaker(); |
| |
| // Add metric 2 batch to EnvelopeMaker 2 with strings 10..19 |
| metric_id = 2; |
| expected_num_batches = 1; |
| expected_this_batch_index = 0; |
| AddManyStringsNoOp(10, 20, metric_id, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| |
| // Add metric 3 to EnvelopeMaker 2 with strings 0..9 |
| metric_id = 3; |
| expected_num_batches = 2; |
| expected_this_batch_index = 1; |
| AddManyStringsNoOp(0, 10, metric_id, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| |
| // Take EnvelopeMaker 2, |
| auto envelope_maker2 = ResetEnvelopeMaker(); |
| |
| // Now invoke MergeOutOf to merge EnvelopeMaker 2 into EnvelopeMaker 1. |
| envelope_maker1->MergeOutOf(envelope_maker2.get()); |
| |
| // EnvelopeMaker 2 should be empty. |
| EXPECT_TRUE(envelope_maker2->Empty()); |
| |
| // EnvelopeMaker 1 should have three batches for Metrics 1, 2, 3 |
| EXPECT_FALSE(envelope_maker1->Empty()); |
| ASSERT_EQ(3, envelope_maker1->envelope().batch_size()); |
| |
| // Iterate through each of the batches and check it. |
| for (uint index = 0; index < 3; index++) { |
| // Batch 0 and 2 should have 10 encrypted observations and batch |
| // 1 should have 20 because batch 1 from EnvelopeMaker 2 was merged |
| // into batch 1 of EnvelopeMaker 1. |
| auto& batch = envelope_maker1->envelope().batch(index); |
| EXPECT_EQ(index + 1, batch.meta_data().metric_id()); |
| auto expected_num_observations = (index == 1 ? 20 : 10); |
| ASSERT_EQ(expected_num_observations, batch.encrypted_observation_size()); |
| |
| // Check each one of the observations. |
| for (int i = 0; i < expected_num_observations; i++) { |
| // Extract the serialized observation. |
| auto& encrypted_message = batch.encrypted_observation(i); |
| EXPECT_EQ(EncryptedMessage::NONE, encrypted_message.scheme()); |
| std::string serialized_observation = encrypted_message.ciphertext(); |
| Observation recovered_observation; |
| ASSERT_TRUE( |
| recovered_observation.ParseFromString(serialized_observation)); |
| |
| // Check that it looks right. |
| ASSERT_EQ(1u, recovered_observation.parts().size()); |
| auto iter = recovered_observation.parts().find("Part1"); |
| ASSERT_TRUE(iter != recovered_observation.parts().cend()); |
| const auto& part = iter->second; |
| ASSERT_EQ(3u, part.encoding_config_id()); |
| ASSERT_TRUE(part.has_unencoded()); |
| |
| // Check the string values. Batches 0 and 2 are straightforward. The |
| // values should be {"value 0", "value 1", .. "value 9"}. But |
| // batch 1 is more complicated. Because of the way merge is implemented |
| // we expect to see: |
| // {"value 0", "value 1", .. "value 9", "value 19", |
| // "value 18", ... "value 10"} |
| // This is because when we merged batch 1 of Envelope 2 into batch |
| // 1 of Envelope 1 we reversed the order of the observations in |
| // Ennvelope 2. |
| std::ostringstream stream; |
| int expected_value_index = i; |
| if (index == 1 && i >= 10) { |
| expected_value_index = 29 - i; |
| } |
| stream << "value " << expected_value_index; |
| auto expected_string_value = stream.str(); |
| EXPECT_EQ(expected_string_value, |
| part.unencoded().unencoded_value().string_value()); |
| } |
| } |
| |
| // Now we want to test that after the MergeOutOf() operation the EnvelopeMaker |
| // is still usable. Put EnvelopeMaker 1 back as the test EnvelopeMaker. |
| envelope_maker_ = std::move(envelope_maker1); |
| |
| // Add string observations 10..19 to metric ID 1 batches 1, 2 and 3. |
| for (int metric_id = 1; metric_id <= 3; metric_id++) { |
| expected_num_batches = 3; |
| expected_this_batch_index = metric_id - 1; |
| expected_this_batch_size = (metric_id == 2 ? 20 : 10); |
| AddManyStringsNoOp(10, 20, metric_id, expected_num_batches, |
| expected_this_batch_index, expected_this_batch_size); |
| } |
| } |
| |
| } // namespace encoder |
| } // namespace cobalt |