| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef COBALT_SRC_LOGGER_ENCODER_H_ |
| #define COBALT_SRC_LOGGER_ENCODER_H_ |
| |
| #include <memory> |
| #include <string> |
| #include <vector> |
| |
| #include <google/protobuf/repeated_field.h> |
| |
| #include "src/algorithms/rappor/rappor_encoder.h" |
| #include "src/logger/project_context.h" |
| #include "src/logger/types.h" |
| #include "src/pb/event.pb.h" |
| #include "src/pb/metadata_builder.h" |
| #include "src/pb/observation.pb.h" |
| #include "src/public/lib/status.h" |
| #include "src/registry/aggregation_window.pb.h" |
| #include "src/registry/metric_definition.pb.h" |
| #include "src/registry/report_definition.pb.h" |
| #include "src/system_data/client_secret.h" |
| #include "src/system_data/system_data.h" |
| |
| namespace cobalt::logger { |
| |
| // A HistogramPtr provides a moveable way of passing the buckets of a Histogram. |
| using HistogramPtr = std::unique_ptr<google::protobuf::RepeatedPtrField<HistogramBucket>>; |
| |
| // A EventValuesPtr provides a moveable way of passing the dimensions of a |
| // custom event. |
| using EventValuesPtr = std::unique_ptr<google::protobuf::Map<std::string, CustomDimensionValue>>; |
| |
| // A vector of event codes that the data corresponds to. |
| using EventCodes = std::vector<uint32_t>; |
| |
| // Histogram vector of (index, count) pairs. |
| using Histogram = std::vector<std::tuple<uint32_t, int64_t>>; |
| |
| // An Encoder is used for creating Observations. An Observation is the |
| // unit of encoded data that is sent from a client device to the Shuffler |
| // and ultimately to the Analyzer. |
| // |
| // Observations are derived from Events. Events are the raw data directly |
| // logged by a Cobalt user on the client system. |
| // |
| // There are two broad categories of Observations: immediate Observations and |
| // locally-aggregated observations. An immediate Observation is generated |
| // directly from a single Event at the time the Event is logged. A |
| // locally-aggregated Observation is computed based on the data of many |
| // logged Events over a period of time. |
| // |
| // An Observation is associated with a Metric and this means that the |
| // Observation is derived from one or more Events belonging to that Metric. |
| // |
| // An Observation is always generated for a particular Report. The Report |
| // definition indicates whether the Observation should be an immediate or |
| // locally aggregated Observation and how the Observation should be encoded. |
| // |
| // There will usually be a singleton instance of Encoder on a client device. |
| // The Encoder interface is not exposed directly to Cobalt users. Instead it |
| // is used by the Logger implementation in order to encode immediate |
| // Observations and it is used by the Local Aggregator to encode |
| // locally-aggregated Observations. |
| class Encoder { |
| public: |
| // Constructor |
| // |
| // client_secret: A random secret that is generated once on the client and then persisted by the |
| // client and used repeatedly. It is used as an input by some of the encodings. |
| // |
| // metadata_builder: Used to construct ObservationMetadata. The Encoder does not take ownership of |
| // metadata_builder. |
| Encoder(system_data::ClientSecret client_secret, MetadataBuilder& metadata_builder); |
| |
| // Encoder should be move-only. |
| Encoder(Encoder const&) = delete; |
| Encoder& operator=(Encoder const&) = delete; |
| |
| ////////////////////////////////////////////////////////////////////////////////////////////////// |
| // |
| // Cobalt 1.0 Encoding methods. |
| // |
| // An Observation is always tagged with a day_index indicating the day on |
| // which the Observation was encoded. For immediate Observations this will |
| // be the same as the day the corresponding Event was logged. For |
| // locally-aggregated Observations this will be the day the aggregation was |
| // completed. |
| // |
| // An Observation is always associated with an instance of ObservationMetadata |
| // that contains the metric_id, report_id and day_index, among other data. |
| // |
| // All of the Encode*() methods take the same first three parameters: |
| // (1) |metric| A MetricRef that provides the names and IDs of the customer, |
| // project and metric associated with the Observation being encoded. |
| // Note that the methods of this class do not see the MetricDefinition |
| // itself and have no knowledge of the different Metric types or their |
| // meanings. In particular no validation against the MetricDefinition or type |
| // is performed by this class. If any such validation is needed it must be |
| // performed by the caller prior to invoking the Encode*() methods of this |
| // class. |
| // (2) |report| A pointer to the definition of the Report associated with the |
| // Observation being encoded. The ReportDefinition may carry fields |
| // particular to the encoding to be performed. The following |
| // ReportDefinition fields are always required to be populated: |name|, |
| // |id|, |system_profile_field|. Additionally, each Encode*() method may |
| // require other fields of ReportDefinition to be populated. This will be |
| // specified in the comments for each Encode*() method. |
| // (3) |day_index| The day associated with the Observation being encoded. |
| // |
| ////////////////////////////////////////////////////////////////////////////////////////////////// |
| |
| // The output of the Cobalt 1.0 Encode*() methods is a triple consisting of a |
| // status and, if the status is kOK, a new observation and its metadata. The |
| // observation will have been assigned a new quasi-unique |random_id|. |
| struct Result { |
| Status status; |
| std::unique_ptr<Observation> observation; |
| std::unique_ptr<ObservationMetadata> metadata; |
| }; |
| |
| // Encodes an Observation of type BasicRapporObservation. |
| // |
| // metric: Provides access to the names and IDs of the customer, project and |
| // metric associated with the Observation being encoded. |
| // |
| // report: The definition of the Report associated with the Observation being |
| // encoded. In addition to the common fields always required, this method also |
| // requires that the |local_privacy_noise_level| field be set. This is used to |
| // determine the p and q values for Basic RAPPOR. |
| // |
| // day_index: The day index associated with the Observation being encoded. |
| // |
| // value_index: The index to encode using Basic RAPPOR. It must be in |
| // the range [0, num_categories - 1] |
| // |
| // num_categories: The number of categories to use in the Basic RAPPOR |
| // encoding. |
| Result EncodeBasicRapporObservation(MetricRef metric, const ReportDefinition* report, |
| uint32_t day_index, uint32_t value_index, |
| uint32_t num_categories) const; |
| |
| // Encodes an Observation of type IntegerEventObservation. |
| // |
| // metric: Provides access to the names and IDs of the customer, project and |
| // metric associated with the Observation being encoded. |
| // |
| // report: The definition of the Report associated with the Observation being |
| // encoded. |
| // |
| // day_index: The day index associated with the Observation being encoded. |
| // |
| // event_codes: This will be used to populate the bits of the Obseravtion's |
| // |event_code|. |
| // |
| // component: The hash of this value will populate the Observation's |
| // |component_name_hash| field. |
| // |
| // value: This will populate the Observation's |value| field. |
| Result EncodeIntegerEventObservation(MetricRef metric, const ReportDefinition* report, |
| uint32_t day_index, |
| const google::protobuf::RepeatedField<uint32_t>& event_codes, |
| const std::string& component, int64_t value) const; |
| |
| // Encodes an Observation of type HistogramObservation. |
| // |
| // metric: Provides access to the names and IDs of the customer, project and |
| // metric associated with the Observation being encoded. |
| // |
| // report: The definition of the Report associated with the Observation being |
| // encoded. |
| // |
| // day_index: The day index associated with the Observation being encoded. |
| // |
| // event_codes: This will be used to populate the bits of the Obseravtion's |
| // |event_code|. |
| // |
| // component: The hash of this value will populate the Observation's |
| // |component_name_hash| field. |
| // |
| // histogram: This will be used to populate the Observation's |buckets| field. |
| // This method does not validate |histogram| against the Metric definition. |
| // That is the caller's responsibility. |
| Result EncodeHistogramObservation(MetricRef metric, const ReportDefinition* report, |
| uint32_t day_index, |
| const google::protobuf::RepeatedField<uint32_t>& event_codes, |
| const std::string& component, HistogramPtr histogram) const; |
| |
| // Encodes an Observation of type CustomObservation. |
| // |
| // metric: Provides access to the names and IDs of the customer, project and |
| // metric associated with the Observation being encoded. |
| // |
| // report: The definition of the Report associated with the Observation being |
| // encoded. |
| // |
| // day_index: The day index associated with the Observation being encoded. |
| // |
| // event_values: This will be used to populate the Observation's |values| |
| // field. This method does not validate |event_values| against the Metric's |
| // proto definition. That is the caller's responsibility. |
| Result EncodeCustomObservation(MetricRef metric, const ReportDefinition* report, |
| uint32_t day_index, EventValuesPtr event_values) const; |
| |
| // Encodes an Observation of type CustomObservation. |
| // |
| // metric: Provides access to the names and IDs of the customer, project and |
| // metric associated with the Observation being encoded. |
| // |
| // report: The definition of the Report associated with the Observation being |
| // encoded. |
| // |
| // day_index: The day index associated with the Observation being encoded. |
| // |
| // serialized_proto: This will be used to populate the Observation's |serialized_proto| field. |
| // This method does not validate |serialized_proto|. That is the caller's responsibility. |
| Result EncodeSerializedCustomObservation(MetricRef metric, const ReportDefinition* report, |
| uint32_t day_index, |
| std::unique_ptr<std::string> serialized_proto) const; |
| |
| // Encodes an Observation of type UniqueActivesObservation. |
| // |
| // metric: Provides access to the names and IDs of the customer, project and |
| // metric associated with the Observation being encoded. |
| // |
| // report: The definition of the Report associated with the Observation being |
| // encoded. In addition to the common fields always required, this method also |
| // requires that the |local_privacy_noise_level| field be set. This is used to |
| // determine the p and q values for Basic RAPPOR. |
| // |
| // day_index: The day index associated with the Observation being encoded. |
| // This is the last day (inclusive) of the rolling window associated with this |
| // Observation. |
| // |
| // event_code: The event code of the Event associated with this Observation. |
| // This value should be a nonnegative integer less than or equal to the |
| // max_event_code of the MetricDefinition wrapped by |metric|, but it is the |
| // caller's responsibility to ensure this. |
| // |
| // was_active: Set to true if an event with code |event_code| |
| // occurred during the window of size |window_size| ending on |day_index|, |
| // false otherwise. If |was_active| is true, the BasicRapporObservation field |
| // of the UniqueActivesObservation is a Basic RAPPOR encoding of a 1 bit. |
| // If |was_active| is false, the BasicRapporObservation field is a Basic |
| // RAPPOR encoding of a 0 bit. |
| // |
| // aggregation_window: The aggregation window associated with the Observation. |
| // This should be one of the OnDeviceAggregationWindows specified in |report| |
| // (or equivalent, if |report| has a WindowSize) but it is the caller's |
| // responsibility to ensure this. |
| Result EncodeUniqueActivesObservation(MetricRef metric, const ReportDefinition* report, |
| uint32_t day_index, uint32_t event_code, bool was_active, |
| const OnDeviceAggregationWindow& aggregation_window) const; |
| |
| // Encodes an Observation of type PerDeviceNumericObservation. |
| // |
| // metric: Provides access to the names and IDs of the customer, project and |
| // metric associated with the Observation being encoded. |
| // |
| // report: The definition of the Report associated with the Observation being |
| // encoded. |
| // |
| // day_index: The day index associated with the Observation being encoded. |
| // This is the last day (inclusive) of the rolling window associated with this |
| // Observation. |
| // |
| // component: The component associated with this Observation. The hash of this |
| // value will populate the Observation's |component_name_hash| field. |
| // |
| // event_codes: This will be used to populate the bits of the Obseravtion's |
| // |event_code|. |
| // |
| // value: This will populate the |value| field of the the |
| // IntegerEventObservation wrapped by the PerDeviceNumericObservation. |
| // |
| // aggregation_window: The aggregation window associated with the Observation. |
| // This should be one of the OnDeviceAggregationWindows specified in |report| |
| // (or equivalent, if |report| has a WindowSize) but it is the caller's |
| // responsibility to ensure this. |
| Result EncodePerDeviceNumericObservation( |
| MetricRef metric, const ReportDefinition* report, uint32_t day_index, |
| const std::string& component, const google::protobuf::RepeatedField<uint32_t>& event_codes, |
| int64_t value, const OnDeviceAggregationWindow& aggregation_window) const; |
| |
| // Encodes an Observation of type PerDeviceNumericHistogramObservation. |
| // |
| // metric: Provides access to the names and IDs of the customer, project and |
| // metric associated with the Observation being encoded. |
| // |
| // report: The definition of the Report associated with the Observation being |
| // encoded. |
| // |
| // day_index: The day index associated with the Observation being encoded. |
| // This is the last day (inclusive) of the rolling window associated with this |
| // Observation. |
| // |
| // component: The component associated with this Observation. The hash of this |
| // value will populate the Observation's |component_name_hash| field. |
| // |
| // event_codes: This will be used to populate the bits of the Obseravtion's |
| // |event_code|. |
| // |
| // value: This is the raw value of the aggregation. This will be used to populate HistogramBuckets |
| // field wrapped by the HistogramObservation in PerDeviceNumericObservation. |
| // |
| // aggregation_window: The aggregation window associated with the Observation. |
| // This should be one of the OnDeviceAggregationWindows specified in |report| |
| // (or equivalent, if |report| has a WindowSize) but it is the caller's |
| // responsibility to ensure this. |
| Result EncodePerDeviceHistogramObservation( |
| MetricRef metric, const ReportDefinition* report, uint32_t day_index, |
| const std::string& component, const google::protobuf::RepeatedField<uint32_t>& event_codes, |
| int64_t value, const OnDeviceAggregationWindow& aggregation_window) const; |
| |
| // Encodes an Observation of type ReportParticipationObservation. |
| // |
| // metric: Provides access to the names and IDs of the customer, project, and |
| // metric associated with the Observation being encoded. |
| // |
| // report: The definition of the Report associated with the Observation being |
| // encoded. |
| // |
| // day_index: The day index associated with the Observation being encoded. |
| Result EncodeReportParticipationObservation(MetricRef metric, const ReportDefinition* report, |
| uint32_t day_index) const; |
| |
| ////////////////////////////////////////////////////////////////////////////////////////////////// |
| // |
| // Cobalt 1.1 Encoding methods. |
| // |
| // All Cobalt 1.1 Observations are locally aggregated. |
| // |
| ////////////////////////////////////////////////////////////////////////////////////////////////// |
| |
| // Encodes an Observation of type IntegerObservation. |
| // |
| // data: A vector of (event_codes, integer_value) pairs, that will be used to encode the |
| // Observation. |
| [[nodiscard]] static lib::statusor::StatusOr<std::unique_ptr<Observation>> |
| EncodeIntegerObservation(const std::vector<std::tuple<std::vector<uint32_t>, int64_t>>& data); |
| |
| // Encodes an Observation of type SumAndCountObservation. |
| // |
| // data: A vector of (event_codes, sum, count) triples, that will be used to encode the |
| // Observation. |
| [[nodiscard]] static lib::statusor::StatusOr<std::unique_ptr<Observation>> |
| EncodeSumAndCountObservation( |
| const std::vector<std::tuple<std::vector<uint32_t>, int64_t, uint32_t>>& data); |
| |
| // Encodes an Observation of type IndexHistogramObservation. |
| // |
| // data: A vector of (event_codes, histogram vector of (index, count)) pairs that will be used to |
| // encode the Observation. |
| [[nodiscard]] static lib::statusor::StatusOr<std::unique_ptr<Observation>> |
| EncodeIndexHistogramObservation( |
| const std::vector< |
| std::tuple<std::vector<uint32_t>, std::vector<std::tuple<uint32_t, int64_t>>>>& data); |
| |
| // Encodes an Observation of type StringHistogramObservation. |
| // |
| // hashes: Vector of hashes of strings (hashed using Farmhash Fingerprint128). |
| // data: A vector of (event_codes, histogram vector of (index, count)) pairs that will be used to |
| // encode the Observation. The bucket with index i in the histogram contains the count for the |
| // string whose hash is in position i in the hashes vector. |
| [[nodiscard]] static lib::statusor::StatusOr<std::unique_ptr<Observation>> |
| EncodeStringHistogramObservation(const std::vector<std::string>& hashes, |
| const std::vector<std::tuple<EventCodes, Histogram>>& data); |
| |
| private: |
| // Encodes a BasicRapporObservation for a given |metric|, |report|, and |
| // |day_index| in which the data field is a Basic RAPPOR encoding of a vector |
| // of |num_categories| zero bits. |
| Result EncodeNullBasicRapporObservation(MetricRef metric, const ReportDefinition* report, |
| uint32_t day_index, uint32_t num_categories) const; |
| |
| // Returns a result containing a populated ObservationMetadata and an empty Observation. |
| Result NewObservationWithMetadata(MetricRef metric, const ReportDefinition* report, |
| uint32_t day_index) const; |
| |
| const system_data::ClientSecret client_secret_; |
| MetadataBuilder& metadata_builder_; |
| }; |
| |
| } // namespace cobalt::logger |
| |
| #endif // COBALT_SRC_LOGGER_ENCODER_H_ |