blob: db9d6d4cf1c4b7388cf642fd1121a51dd978ff2c [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
syntax = "proto3";
package cobalt;
import "config/metrics.proto";
import "config/report_configs.proto";
import "config/window_size.proto";
import "config/annotations.proto";
option go_package = "config";
// A Report analyzes Events that were logged to Cobalt and emits an aggregated
// output that may then be queried or visualized by an analyst user of Cobalt.
//
// A Report is associated with a Metric and this means that the Report analyzes
// the Events that were logged to that Metric. The first step occurs on a
// Fuchsia device where Cobalt analyzes the logged Events in order to form
// Observations.
//
// An Observation is built for a particular Report. The type of observation,
// including which of several privacy-oriented Encodings is used or not, depends
// on the Report type.
//
// The Observations are sent to the Cobalt Shuffler which shuffles them in order
// to break linkability between Observations and linkability with the
// originating device. Next the shuffled Observations are sent to the Analyzer
// which aggregates Observations from all Fuchsia devices in order to generate
// a report.
//
// There are multiple types of Metrics and multiple types of Reports. Each
// Report type is compatible with only some of the Metric types.
//
// A ReportDefinition defines a Cobalt Report to be generated.
// An instance of ReportDefinition is always associated with an instance of
// MetricDefinition called the owning MetricDefinition.
message ReportDefinition {
// Next id: 14
// Unique name for this Report within its owning MetricDefinition.
// The name must obey the syntax of a C variable name and must have length
// at most 64. The name must not be changed once data collection has
// begun. Changing the name constitutes declaring a different
// ReportDefinition.
string report_name = 1;
// The unique numerical identifier for this Report used internally by Cobalt.
// Normally an instance of ReportDefinition is created by the Cobalt config
// YAML parser and this value will be automatically set to the hash of the
// |name| field. If an instance of ReportDefinition is created by some means
// other than the YAML parser then this should be set to a value that is
// unique within this ReportDefinition's owning MetricDefinition.
uint32 id = 2;
// A Report has one of the following types.
enum ReportType {
REPORT_TYPE_UNSET = 0;
// Gives the total, daily, fleet-wide count of the number of occurrences of
// each of a set of event types. Supports the use of local differential
// privacy.
//
// Supported Input Types:
// - Metric type: EVENT_OCCURRED
// Encoding: Basic RAPPOR
// Observation type: BasicRapporObservation
//
// ReportDefinition fields particular to this type:
// - local_privacy_noise_level # Gives Basic RAPPOR parameters
//
// Report row type: SimpleOccurrenceCountReportRow. (See report_row.proto)
SIMPLE_OCCURRENCE_COUNT = 1;
// Gives the total, daily, fleet-wide count of the number of occurrences of
// each of a set of event types, for each of a set of components.
//
// Supported Input Types:
// - Metric type: EVENT_COUNT
// Encoding: component name is hashed
// Observation type: IntegerEventObservation
// Observation value field: count
//
// ReportDefinition fields particular to this type:
// - candidate_list # All known component names
// - candidate_file # File with all known component names
//
// Report row type: EventComponentOccurrenceCountReportRow.
// (See report_row.proto)
EVENT_COMPONENT_OCCURRENCE_COUNT = 2;
// Gives daily, fleet-wide aggregate statistics of a numerical metric of one
// or more event types, associated with one or more components
//
// Supported Input Types:
// - Metric types: ELAPSED_TIME, FRAME_RATE, MEMORY_USAGE
// Encoding: component name is hashed
// Observation type: IntegerEventObservation
// Observation value field: Respectively, duration_microseconds
// round(fps * 100), bytes
//
// ReportDefinition fields particular to this type:
// - candidate_list # All known component names
// - candidate_file # File with all known component names
//
// Report fields:
// - date
// - system_profile # One or more fields from the SystemProfile message
// type (cobalt/observation.proto) describing the
// system on which the events occurred.
// - event
// - component
// - count
// - mean, median, etc # Aggregate numerical statistics for that event
// # type for that component, on that date, over
// # devices with that system profile.
//
// Example report:
//
// Date, SystemProfile, Component, Event, Count, Mean, Median, Min, Max, ...
// -------------------------------------------------------------------------
// 2018-7-25,<sys-prof 1>,"Component 1","Event 1",12345,123.4,123.4,123.4,..
// 2018-7-25,<sys-prof 2>,"Component 1","Event 2",34567,123.4,123.4,123.4,..
// 2018-7-25,<sys-prof 1>,"Component 2","Event 1",12345,123.4,123.4,123.4,..
// 2018-7-25,<sys-prof 1>,"Component 2","Event 2",23456,123.4,123.4,123.4,..
NUMERIC_AGGREGATION = 3;
// Gives daily, fleet-wide histograms of an integer-valued metric of one or
// more event types, associated with one or more components
//
// Supported Input Types:
// - Metric types: ELAPSED_TIME, FRAME_RATE, MEMORY_USAGE
// Encoding, Observation type, Observation value field: Same as
// NUMERIC_AGGREGATION above.
// - Metric type: INT_HISTOGRAM
// Encoding: Component name is hashed, values are bucketed
// Observation type: HistogramObservation
//
// ReportDefinition fields particular to this type:
// - candidate_list # All known component names
// - candidate_file # File with all known component names
// - int_buckets # Definition of integer-range buckets for histogram
// # Not used for Metric type INT_HISTOGRAM because the
// # Metric already contains this field.
//
// Report row type: IntRangeHistogramReportRow
// (See report_row.proto)
INT_RANGE_HISTOGRAM = 4;
// Gives the approximate distribution of the most commonly used strings in a
// specific context specified by comments in the owning MetricDefinition.
// Supports the use of local differential privacy.
//
// Supported Input Types:
// - Metric types: STRING_USED
// Encoding: String RAPPOR
// Observation type: StringRapporObservation
//
// ReportDefinition fields particular to this type:
// - local_privacy_noise_level # Gives RAPPOR parameters p,q
// - expected_population_size # Hint for RAPPOR num cohorts
// - expected_string_set_size # Hint for RAPPOR num bits
// - candidate_list # All known string values
// - candidate_file # File with all known string values
//
// Report fields:
// - date
// - system_profile # One or more fields from the SystemProfile message
// type (cobalt/observation.proto) describing the
// system on which the events occurred.
// - string # The string that was used
// - count # An estimate of the total number of times the string was used
// # for the given date, over devices with that system profile.
// - std_err: # Estimate of the std err in the value of Count estimate.
//
// Example report:
//
// Date, SystemProfile, String, Count, std_err
// ------------------------------------------
// 2018-7-25,<sys-prof 1>,"www.alpha.com", 12345, 1.3
// 2018-7-25,<sys-prof 1>,"www.beta.com", 23456, 1.3
// 2018-7-25,<sys-prof 2>,"www.gamma.com", 34567, 1.3
// 2018-7-25,<sys-prof 2>,"www.delta.com", 78901, 1.3
//
// Note that the report will only include the most frequently used strings,
// the so-called "heavy-hitters."
HIGH_FREQUENCY_STRING_COUNTS = 5;
// Gives the counts of sufficiently frequently used strings in a specific
// context specified by comments in the owning MetricDefinition.
//
// This report type uses threshold encryption to protect sensitive strings
// that are logged by too few client devices. The strings are transmitted
// as ciphertext. The ciphertext may only be decrypted on the Cobalt server
// if the number of distinct devices that have logged that string reaches
// a specified threshold. This Report type is appropriate to use in
// contexts in which the set of strings being logged exhibits high entropy.
//
// Supported Input Types:
// - Metric types: STRING_USED
// Encoding: Forculus threshold encryption
// Observation type: ForculusObservation
//
// ReportDefinition fields particular to this type:
// - threshold # The number of distinct devices that must log a string
// # before that string my be decrypted on the server.
//
// Report fields:
// - date
// - system_profile # One or more fields from the SystemProfile message
// type (cobalt/observation.proto) describing the
// system on which the events occurred.
// - string # The string that was used
// - count # The total number of times the string was used
// # for the given date, over devices with that system profile.
//
// Example report:
//
// Date, SystemProfile, String, Count, std_err
// ------------------------------------------
// 2018-7-25,<sys-prof 1>,"xhyeijly" ,12345
// 2018-7-25,<sys-prof 1>,"oywmhdyoj" ,23456
// 2018-7-25,<sys-prof 2>,"25dj#^&" ,34567
// 2018-7-25,<sys-prof 2>,"dhaukkdi" ,8901
STRING_COUNTS_WITH_THRESHOLD = 6;
// Gives a raw dump of the Observations for a day of one of the numeric
// performance metrics.
//
// Supported Input Types:
// - Metric types: ELAPSED_TIME, FRAME_RATE, MEMORY_USAGE
// Encoding: component name is hashed
// Observation type: IntegerEventObservation
// Observation value field: Respectively, duration_microseconds
// round(fps * 100), bytes
//
// ReportDefinition fields particular to this type:
// - candidate_list # All known component names
// - candidate_file # File with all known component names
//
// Report fields:
// - date
// - system_profile # One or more fields from the SystemProfile message
// type (cobalt/observation.proto) describing the
// system on which the events occurred.
// - event
// - component
// - value # The integer value from the Observation
//
// Example report:
//
// Date, SystemProfile, Component, Event, Value
// -------------------------------------------------------------------------
// 2018-7-25,<sys-prof 1>,"Component 1","Event 1",12345
// 2018-7-25,<sys-prof 2>,"Component 1","Event 2",34567
// 2018-7-25,<sys-prof 1>,"Component 2","Event 1",12345
// 2018-7-25,<sys-prof 2>,"Component 2","Event 3",78901
NUMERIC_PERF_RAW_DUMP = 7;
////////////////////// Locally aggregated ReportTypes //////////////////////
//
// Report types for which logged Events are aggregated on each device over a
// specified number of days. Each day, each device sends an Observation of
// the aggregate of the Events over the rolling window ending on that day.
//
////////////////////////////////////////////////////////////////////////////
// For each of a set of events, gives an estimate of the number of unique
// devices on which that event occurred at least once during a rolling
// window. Supports the use of local differential privacy with Basic RAPPOR.
//
// Each device sends an observation each day for each window size registered
// in a ReportDefinition of this type, whether or not any of the associated
// events occurred in the window ending on that day.
//
// Supported Input Types:
// - Metric type: EVENT_OCCURRED
// - Encoding: Basic RAPPOR. In the decoding step one should not assume
// that the true bit vector has exactly 1 bit set, but rather
// can have any number of bits set. This encoding/decoding
// scheme is the same as the Instantanous Randomized Response
// in the RAPPOR paper (https://arxiv.org/pdf/1407.6981.pdf).
// - Observation type: UniqueActivesObservation
//
// ReportDefinition fields particular to this type:
// - local_privacy_noise_level # Gives Basic RAPPOR parameters.
// - window_size # A list of window sizes in days.
//
// Report row type: UniqueActivesReportRow.
// (see report_row.proto)
UNIQUE_N_DAY_ACTIVES = 8;
// For each of a set of events, for each of a set of component labels, for
// a rolling window of a particular number of days, gives summary statistics
// of the number of occurrences on individual devices of that event, with
// that component, during the window.
//
// Supported Input Types:
// - Metric type: EVENT_COUNT
// - Encoding: component name is hashed
// - Observation type: PerDeviceCountObservation
//
// ReportDefinition fields particular to this type:
// - candidate_list # All known component names.
// - candidate_file # File with all known component names.
// - window_size # A list of window sizes in days
//
// Report row type: PerDeviceCountStatsReportRow.
// (See report_row.proto)
PER_DEVICE_COUNT_STATS = 9;
/////////////////////// Custom Raw Dump Report /////////////////////////////
// Gives a raw dump of the Observations for a CustomMetric for a day.
//
// Supported Input Types:
// - Metric types: CUSTOM
// Encoding: none
// Observation type: CustomObservation
//
// ReportDefinition fields particular to this type: none
//
// Report fields:
// - date
// - system_profile # One or more fields from the SystemProfile message
// type (cobalt/observation.proto) describing the
// system on which the events occurred.
// - event
// - component
// - part-1, part-2, ... # The values of the custom parts
//
// Example report:
//
// Date, SystemProfile, Component, Event, part-1, part-2, part-3, ...
// -------------------------------------------------------------------------
// 2018-7-25,<sys-prof 1>,"Part 1","value1"
// 2018-7-25,<sys-prof 2>,"Part 1","value1"
// 2018-7-25,<sys-prof 1>,"Part 2","value2"
//
// A report of this type must be associated with a MetricDefinition of type
// CustomMetric. Observations for the Report will be of type
// CustomObservation
CUSTOM_RAW_DUMP = 9999;
}
ReportType report_type = 3;
//////////////// Fields specific to some metric types /////////////////
// A level of random noise to use when encoding observations for RAPPOR.
enum LocalPrivacyNoiseLevel {
// local_privacy_noise_level must be explicitly set when using
// Basic RAPPOR or String RAPPOR.
NOISE_LEVEL_UNSET = 0;
// p = 0.0, q = 1.0
NONE = 1;
// p = 0.01, q = 0.99
SMALL = 2;
// p = 0.1, q = 0.9
MEDIUM = 3;
// p = 0.25, q = 0.75
LARGE = 4;
}
// This field is used only with Report types SIMPLE_OCCURRENCE_COUNT and
// HIGH_FREQUENCY_STRING_COUNTS. Its value must not be changed
// after data collection has begun or the data will become corrupted.
LocalPrivacyNoiseLevel local_privacy_noise_level = 4;
// This field is used only with Report type
// HIGH_FREQUENCY_STRING_COUNTS.
// An estimate of the number of different devices that will be participating
// in collecting data for the report. This helps to tune the RAPPOR
// algorithm. This must not be changed after data collection has
// begun or the data may become corrupted.
uint32 expected_population_size = 5;
// This field is used only with Report type
// HIGH_FREQUENCY_STRING_COUNTS.
// An estimate of the size of the set of all strings that are likely to
// be logged. This helps to tune the RAPPOR algorithm. This
// must not be changed after data collection has begun or the data may
// become corrupted.
uint32 expected_string_set_size = 6;
// This field is used only with Report type STRING_COUNTS_WITH_THRESHOLD.
// The threshold. Only strings that have been used by at least this many
// distinct clients will appear in the report.
// Must satisfy 2 <= threshold <= 1,000,000
// This must not be changed after data collection has begun or the data may
// become corrupted.
uint32 threshold = 7;
// Explicit list of known string values. Either this or |candidate_file|
// should be used, not both. Used for the hashed |component_name|
// field for several Report types and the encoded |string| field for the
// HIGH_FREQUENCY_STRING_COUNTS Report type.
repeated string candidate_list = 8 [(cobalt_options).hide_on_client = true];
// Simple name or full path to file containing known string values.
// Either this or |candidate_list| should be used, not both. Used for the
// hashed |component_name| field for several report types and the encoded
// |string| field for the
// HIGH_FREQUENCY_STRING_COUNTS Report type.
string candidate_file = 9;
// This field is used only with the INT_RANGE_HISTOGRAM Report type, but
// not used when the Metric type is INT_HISTOGRAM because in that case the
// MetricDefinition already contains an instance of IntegerBuckets.
IntegerBuckets int_buckets = 10;
// This field is used only with the UNIQUE_N_DAY_ACTIVES Report type.
// Used to specify the length in days of the rolling window for that
// report type. Cobalt will send a separate Observation for each
// WindowSize specified.
repeated WindowSize window_size = 11;
// This field is used only with CUSTOM_RAW_DUMP Report type. Used to
// specify the path to the location the metrics should be written to.
string output_location = 12;
// This field can be used with all Report types. When set, the generated
// report will exclude an Observation if there are not at least
// |reporting_threshold| number of distinct devices reporting Observations
// with the same ObservationMetadata.
uint32 reporting_threshold = 13;
/////////////////// Fields used by all metric types ///////////////////
// The list of SystemProfileFields to include in each row of the report.
// Optional. If not specified a reasonable default will be used.
repeated SystemProfileField system_profile_field = 100;
// Configuration of differentially private release for this report.
DifferentialPrivacyConfig dp_release_config = 101;
// Overrides the location to where Cobalt exports this Report.
// This field is implementation-specific. If omitted Cobalt will use its
// default settings to determine the location of the exported report.
string export_location_override = 102;
}