blob: 68aa07409e76dacaf32373c7ace951fa189de71b [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
syntax = "proto3";
package cobalt;
import "src/registry/aggregation_window.proto";
import "src/registry/annotations.proto";
import "src/registry/window_size.proto";
option go_package = "config";
// A Report analyzes Events that were logged to Cobalt and emits an aggregated
// output that may then be queried or visualized by an analyst user of Cobalt.
//
// A Report is associated with a Metric and this means that the Report analyzes
// the Events that were logged to that Metric. The first step occurs on a
// Fuchsia device where Cobalt analyzes the logged Events in order to form
// Observations.
//
// An Observation is built for a particular Report. The type of observation,
// including which of several privacy-oriented Encodings is used or not, depends
// on the Report type.
//
// The Observations are sent to the Cobalt Shuffler which shuffles them in order
// to break linkability between Observations and linkability with the
// originating device. Next the shuffled Observations are sent to the Analyzer
// which aggregates Observations from all Fuchsia devices in order to generate
// a report.
//
// There are multiple types of Metrics and multiple types of Reports. Each
// Report type is compatible with only some of the Metric types.
//
// A ReportDefinition defines a Cobalt Report to be generated.
// An instance of ReportDefinition is always associated with an instance of
// MetricDefinition called the owning MetricDefinition.
message ReportDefinition {
reserved 7;
reserved "threshold";
// Next id: 17
// Unique name for this Report within its owning MetricDefinition.
// The name must obey the syntax of a C variable name and must have length
// at most 64. The name must not be changed once data collection has
// begun. Changing the name constitutes declaring a different
// ReportDefinition.
string report_name = 1;
// The unique numerical identifier for this Report used internally by Cobalt.
// Normally an instance of ReportDefinition is created by the Cobalt config
// YAML parser and this value will be automatically set to the hash of the
// |name| field. If an instance of ReportDefinition is created by some means
// other than the YAML parser then this should be set to a value that is
// unique within this ReportDefinition's owning MetricDefinition.
uint32 id = 2;
// A Report has one of the following types.
enum ReportType {
reserved 5, 6;
reserved "HIGH_FREQUENCY_STRING_COUNTS", "STRING_COUNTS_WITH_THRESHOLD";
REPORT_TYPE_UNSET = 0;
// Gives the total, daily, fleet-wide count of the number of occurrences of
// each of a set of event types. Supports the use of local differential
// privacy.
//
// Supported Input Types:
// - Metric type: EVENT_OCCURRED
// Encoding: Basic RAPPOR
// Observation type: BasicRapporObservation
//
// ReportDefinition fields particular to this type:
// - local_privacy_noise_level # Gives Basic RAPPOR parameters
//
// Report row type: SimpleOccurrenceCountReportRow. (See report_row.proto)
SIMPLE_OCCURRENCE_COUNT = 1;
// Gives the total, daily, fleet-wide count of the number of occurrences of
// each of a set of event types, for each of a set of components.
//
// Supported Input Types:
// - Metric type: EVENT_COUNT
// Encoding: component name is hashed
// Observation type: IntegerEventObservation
// Observation value field: count
//
// ReportDefinition fields particular to this type:
// - candidate_list # All known component names
// - candidate_file # File with all known component names
//
// Report row type: EventComponentOccurrenceCountReportRow.
// (See report_row.proto)
EVENT_COMPONENT_OCCURRENCE_COUNT = 2;
// Gives daily, fleet-wide aggregate statistics of a numerical metric of one
// or more event types, associated with one or more components
//
// Supported Input Types:
// - Metric types: ELAPSED_TIME, EVENT_COUNT, FRAME_RATE, MEMORY_USAGE
// Encoding: component name is hashed
// Observation type: IntegerEventObservation
// Observation value field: Respectively, duration_microseconds
// occurrences, round(fps * 100), bytes
//
// ReportDefinition fields particular to this type:
// - candidate_list # All known component names
// - candidate_file # File with all known component names
// - percentiles # A list of percentiles between 0-100 to include in the
// report
//
// Report row type: NumericAggregationReportRow.
// (See report_row.proto)
NUMERIC_AGGREGATION = 3;
// Gives daily, fleet-wide histograms of an integer-valued metric of one or
// more event types, associated with one or more components
//
// Supported Input Types:
// - Metric types: ELAPSED_TIME, EVENT_COUNT, FRAME_RATE, MEMORY_USAGE
// Encoding, Observation type, Observation value field: Same as
// NUMERIC_AGGREGATION above.
// - Metric type: INT_HISTOGRAM
// Encoding: Component name is hashed, values are bucketed
// Observation type: HistogramObservation
//
// ReportDefinition fields particular to this type:
// - candidate_list # All known component names
// - candidate_file # File with all known component names
// - int_buckets # Definition of integer-range buckets for histogram
// # Not used for Metric type INT_HISTOGRAM because the
// # Metric already contains this field.
//
// Report row type: IntRangeHistogramReportRow
// (See report_row.proto)
INT_RANGE_HISTOGRAM = 4;
// Gives a raw dump of the Observations for a day of one of the numeric
// performance metrics.
//
// Supported Input Types:
// - Metric types: ELAPSED_TIME, FRAME_RATE, MEMORY_USAGE
// Encoding: component name is hashed
// Observation type: IntegerEventObservation
// Observation value field: Respectively, duration_microseconds
// round(fps * 100), bytes
//
// ReportDefinition fields particular to this type:
// - candidate_list # All known component names
// - candidate_file # File with all known component names
//
// Report fields:
// - date
// - system_profile # One or more fields from the SystemProfile message
// type (cobalt/observation.proto) describing the
// system on which the events occurred.
// - event
// - component
// - value # The integer value from the Observation
//
// Example report:
//
// Date, SystemProfile, Component, Event, Value
// -------------------------------------------------------------------------
// 2018-7-25,<sys-prof 1>,"Component 1","Event 1",12345
// 2018-7-25,<sys-prof 2>,"Component 1","Event 2",34567
// 2018-7-25,<sys-prof 1>,"Component 2","Event 1",12345
// 2018-7-25,<sys-prof 2>,"Component 2","Event 3",78901
NUMERIC_PERF_RAW_DUMP = 7;
////////////////////// Locally aggregated ReportTypes //////////////////////
//
// Report types for which logged Events are aggregated on each device over a
// specified number of days. Each day, each device sends an Observation of
// the aggregate of the Events over the rolling window ending on that day.
//
////////////////////////////////////////////////////////////////////////////
// For each of a set of events, gives an estimate of the number of unique
// devices on which that event occurred at least once during a rolling
// window. Supports the use of local differential privacy with Basic RAPPOR.
//
// Each device sends an observation each day for each window size registered
// in a ReportDefinition of this type, whether or not any of the associated
// events occurred in the window ending on that day.
//
// Supported Input Types:
// - Metric type: EVENT_OCCURRED
// - Encoding: Basic RAPPOR. In the decoding step one should not assume
// that the true bit vector has exactly 1 bit set, but rather
// can have any number of bits set. This encoding/decoding
// scheme is the same as the Instantanous Randomized Response
// in the RAPPOR paper (https://arxiv.org/pdf/1407.6981.pdf).
// - Observation type: UniqueActivesObservation
//
// ReportDefinition fields particular to this type:
// - local_privacy_noise_level # Gives Basic RAPPOR parameters.
// - window_size # A list of window sizes in days.
//
// Report row type: UniqueActivesReportRow.
// (see report_row.proto)
UNIQUE_N_DAY_ACTIVES = 8;
// For each of a set of events, for each of a set of component labels, for
// a rolling window of a particular number of days, gives summary statistics
// of the number of occurrences on individual devices of that event, with
// that component, during the window.
//
// Supported Input Types:
// - Metric type: EVENT_COUNT, ELAPSED_TIME, FRAME_RATE, MEMORY_USAGE
// - Encoding: component name is hashed
// - Observation type: PerDeviceNumericObservation
//
// ReportDefinition fields particular to this type:
// - candidate_list # All known component names.
// - candidate_file # File with all known component names.
// - window_size # A list of window sizes in days
// - aggregation_type # How numeric values should be aggregated on-device
//
// Report row type: PerDeviceNumericStatsReportRow.
// (See report_row.proto)
PER_DEVICE_NUMERIC_STATS = 9;
// For each of a set of events, for each of a set of component labels, for
// a rolling window of a particular number of days, gives summary statistics
// of the number of occurrences on individual devices of that event, with
// that component, during the window.
//
// Supported Input Types:
// - Metric type: EVENT_COUNT, ELAPSED_TIME, FRAME_RATE, MEMORY_USAGE
// - Encoding: component name is hashed
// - Observation type: PerDeviceHistogramObservation
//
// ReportDefinition fields particular to this type:
// - candidate_list # All known component names.
// - candidate_file # File with all known component names.
// - aggregation_window # A list of desired aggregation windows in days
// or hours.
// - aggregation_type # How numeric values should be aggregated on-device
// - int_buckets # Definition of integer-range buckets for histogram
//
// Report row type: PerDeviceNumericStatsReportRow.
// (See report_row.proto)
PER_DEVICE_HISTOGRAM = 10;
/////////////////////// Custom Raw Dump Report /////////////////////////////
// Gives a raw dump of the Observations for a CustomMetric for a day.
//
// Supported Input Types:
// - Metric types: CUSTOM
// Encoding: none
// Observation type: CustomObservation
//
// ReportDefinition fields particular to this type: none
//
// Report fields:
// - date
// - system_profile # One or more fields from the SystemProfile message
// type (cobalt/observation.proto) describing the
// system on which the events occurred.
// - event
// - component
// - part-1, part-2, ... # The values of the custom parts
//
// Example report:
//
// Date, SystemProfile, Component, Event, part-1, part-2, part-3, ...
// -------------------------------------------------------------------------
// 2018-7-25,<sys-prof 1>,"Part 1","value1"
// 2018-7-25,<sys-prof 2>,"Part 1","value1"
// 2018-7-25,<sys-prof 1>,"Part 2","value2"
//
// A report of this type must be associated with a MetricDefinition of type
// CustomMetric. Observations for the Report will be of type
// CustomObservation
CUSTOM_RAW_DUMP = 9999;
}
ReportType report_type = 3;
//////////////// Fields specific to some metric types /////////////////
// A level of random noise to use when encoding observations for RAPPOR.
enum LocalPrivacyNoiseLevel {
// local_privacy_noise_level must be explicitly set when using
// Basic RAPPOR or String RAPPOR.
NOISE_LEVEL_UNSET = 0;
// p = 0.0, q = 1.0
NONE = 1;
// p = 0.01, q = 0.99
SMALL = 2;
// p = 0.1, q = 0.9
MEDIUM = 3;
// p = 0.25, q = 0.75
LARGE = 4;
}
// This field is used only with Report types SIMPLE_OCCURRENCE_COUNT and
// HIGH_FREQUENCY_STRING_COUNTS. Its value must not be changed
// after data collection has begun or the data will become corrupted.
LocalPrivacyNoiseLevel local_privacy_noise_level = 4;
// This field is not in use.
uint32 expected_population_size = 5 [deprecated = true];
// This field is not in use.
uint32 expected_string_set_size = 6 [deprecated = true];
// Explicit list of known string values. Either this or |candidate_file|
// should be used, not both. Used for the hashed |component_name|
// field for several Report types.
repeated string candidate_list = 8 [(cobalt_options).hide_on_client = true];
// Simple name or full path to file containing known string values.
// Either this or |candidate_list| should be used, not both. Used for the
// hashed |component_name| field for several report types.
string candidate_file = 9;
// This field is used only with the INT_RANGE_HISTOGRAM Report type, but
// not used when the Metric type is INT_HISTOGRAM because in that case the
// MetricDefinition already contains an instance of IntegerBuckets.
IntegerBuckets int_buckets = 10;
// This field is required for the UNIQUE_N_DAY_ACTIVES and
// PER_DEVICE_NUMERIC_STATS Report types. It is not used with any other
// report types.
//
// Used to specify the length in days of the rolling window for that
// report type. Cobalt will send a separate Observation for each
// WindowSize specified.
//
// TODO(pesk): make this a reserved name and ID once support for
// |aggregation_window| is done and existing reports are updated.
repeated WindowSize window_size = 11;
// A time window in units of days or hours over which to aggregate events
// on-device. Cobalt will send a separate Observation for each
// OnDeviceAggregationWindow specified.
//
// TODO(pesk): update comment when transition is complete.
// This field is not yet supported. It will replace the |window_size|
// field and expand its functionality to include hourly windows.
repeated OnDeviceAggregationWindow aggregation_window = 16;
// This field is used only with CUSTOM_RAW_DUMP Report type. Used to
// specify the path to the location the metrics should be written to.
string output_location = 12;
// This field can be used with all Report types. When set, the generated
// report will exclude an Observation if there are not at least
// |reporting_threshold| number of distinct devices reporting Observations
// with the same ObservationMetadata.
uint32 reporting_threshold = 13;
// How to aggregate numeric values on a device.
enum OnDeviceAggregationType {
// Sum the values.
SUM = 0;
// Take the minimum value.
MIN = 1;
// Take the maximum value.
MAX = 2;
}
// This field is required for the PER_DEVICE_NUMERIC_STATS report type.
// It is not used with any other report type.
//
// Used to specify how numeric values should be aggregated on-device.
// If SUM is chosen, each device will generate an Observation containing
// the sum of logged numeric values over a specified aggregation window.
// If MIN (resp., MAX) is chosen, an Observation for this report will
// contain the minimum (resp., maximum) numeric value logged during the
// aggregation window.
OnDeviceAggregationType aggregation_type = 14;
// This field is optional for the NUMERIC_AGGREGATION report type. It is not
// used with any other report type.
//
// Used to specify which additional percentiles to include in the report
// output. Each value should be between 0 and 100.
repeated uint32 percentiles = 15;
/////////////////// Fields used by all metric types ///////////////////
// Next id: 103
// The list of SystemProfileFields to include in each row of the report.
// Optional. If not specified a reasonable default will be used.
repeated SystemProfileField system_profile_field = 100;
// Configuration of differentially private release for this report.
DifferentialPrivacyConfig dp_release_config = 101;
// Overrides the location to where Cobalt exports this Report.
// This field is implementation-specific. If omitted Cobalt will use its
// default settings to determine the location of the exported report.
string export_location_override = 102;
}
message DifferentialPrivacyConfig {
float epsilon = 1;
float delta = 2;
}
// A specification of a field from SystemProfile. These are used in a ReportDefinition to specify
// which fields should be included in the generated Observations and reports.
enum SystemProfileField {
OS = 0;
ARCH = 1;
BOARD_NAME = 2;
PRODUCT_NAME = 3;
SYSTEM_VERSION = 4;
CHANNEL = 5;
}
// ExponentialIntegerBuckets is used to define a partition of the integers into
// a finite number of exponentially increasing buckets.
//
// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1.
//
// The bucket boundaries are:
// a[0] = floor
// a[1] = floor + initial_step
// a[2] = floor + initial_step * step_multiplier
// a[3] = floor + initial_step * step_multiplier ^ 2
// a[4] = floor + initial_step * step_multiplier ^ 3
// and in general, for i = 1, 2, 3 ... n
// a[i] = floor + initial_step * step_multiplier ^ (i-1)
//
// Then, the buckets are defined as follows:
// Bucket 0 is the underflow bucket: (-infinity, floor)
// Bucket i for 0 < i < n+1: [a[i-1], a[i])
// Bucket n+1 is the overflow bucket: [a[n], +infinity)
//
// Examples:
// floor = 0
// num_buckets = 3
// initial_step = 10
// step_multiplier = 10
// Then, the buckets are:
// (-infinity, 0), [0, 10), [10, 100), [100, 1000), [1000, +infinity)
//
// floor = 0
// num_buckets = 3
// initial_step = 2
// step_multiplier = 2
// Then, the buckets are:
// (-infinity, 0), [0, 2), [2, 4), [4, 8), [8, +infinity)
//
// floor = 10
// num_buckets = 3
// initial_step = 2
// step_multiplier = 2
// Then, the buckets are:
// (-infinity, 10), [10, 12), [12, 14), [14, 18), [18, +infinity)
//
// floor = 0
// num_buckets = 3
// initial_step = 100
// step_multiplier = 10
// Then, the buckets are:
// (-infinity, 0), [0, 100), [100, 1000), [1000, 10000), [10000, +infinity)
//
message ExponentialIntegerBuckets {
int64 floor = 1;
// num_buckets must be at least 1.
uint32 num_buckets = 2;
// Must be at least one.
uint32 initial_step = 3;
// Must be at least one.
uint32 step_multiplier = 4;
};
// LinearIntegerBuckets is used to define a partition of the integers into a
// finite number of buckets of equal size.
//
// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1.
// Bucket 0 is the underflow bucket: (-infinity, floor)
// Bucket n+1 is the overflow bucket: [lower + step_size * n, +infinity)
//
// For i = 1 to n, the bucket i is defined as
// [floor + step_size * (i-1), floor + step_size * i)
//
// Example: floor = 0, num_buckets = 3, step_size = 10.
// (-infinity, 0), [0, 10), [10, 20), [20, 30), [30, +inifinity)
message LinearIntegerBuckets {
int64 floor = 1;
// Must be at least one.
uint32 num_buckets = 2;
// Must be at least one.
uint32 step_size = 3;
}
message IntegerBuckets {
oneof buckets {
ExponentialIntegerBuckets exponential = 1;
LinearIntegerBuckets linear = 2;
}
}
// A list of candidates to use during a string RAPPOR analysis.
message RapporCandidateList {
repeated string candidates = 1;
}