blob: e994edc4de49a77380878835340b11b4ceff91eb [file] [edit]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
syntax = "proto3";
package cobalt;
import "src/registry/window_size.proto";
option go_package = "src/registry;config";
// A Report analyzes Events that were logged to Cobalt and emits an aggregated
// output that may then be queried or visualized by an analyst user of Cobalt.
//
// A Report is associated with a Metric and this means that the Report analyzes
// the Events that were logged to that Metric. The first step occurs on a
// device where Cobalt analyzes the logged Events in order to form Observations.
//
// An Observation is built for a particular Report. The type of observation,
// including which of several privacy-oriented Encodings is used or not, depends
// on the Report type.
//
// The Observations are sent to the Cobalt Shuffler which shuffles them in order
// to break linkability between Observations and linkability with the
// originating device. Next the shuffled Observations are sent to the Analyzer
// which aggregates Observations from all devices in order to generate a report.
//
// There are multiple types of Metrics and multiple types of Reports. Each
// Report type is compatible with only some of the Metric types.
//
// A ReportDefinition defines a Cobalt Report to be generated.
// An instance of ReportDefinition is always associated with an instance of
// MetricDefinition called the owning MetricDefinition.
// Next ID: 122
message ReportDefinition {
reserved 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 20, 21, 30, 31, 101, 102;
reserved "aggregation_type", "aggregation_window", "candidate_lis", "dp_release_config",
"expected_population_size", "expected_string_set_size", "export_location_override",
"local_privacy_noise_level", "output_location", "percentiles", "threshold", "window_size",
"use_poisson_mechanism_for_privacy", "prob_bit_flip", "candidate_file", "privacy_level",
"poisson_mean";
// Unique name for this Report within its owning MetricDefinition.
// The name must obey the syntax of a C variable name and must have length
// at most 64. The integer |id| field is the stable identifier for a report
// so this name may be changed. However doing this may affect the
// names and locations of some artifacts produced by Cobalt's report
// generation pipeline.
string report_name = 1;
// The unique integer ID for this report within its owning metric.
// The user must manually set this |id| field. This is the stable identifier
// for a report and should not be changed once data collection begins.
uint32 id = 2;
// A Report has one of the following types.
// Next standard report type ID: 23
enum ReportType {
reserved 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 9999;
reserved "CUSTOM_RAW_DUMP", "EVENT_COMPONENT_OCCURRENCE_COUNT", "HIGH_FREQUENCY_STRING_COUNTS",
"INT_RANGE_HISTOGRAM", "NUMERIC_AGGREGATION", "NUMERIC_PERF_RAW_DUMP",
"PER_DEVICE_HISTOGRAM", "PER_DEVICE_NUMERIC_STATS", "SIMPLE_OCCURRENCE_COUNT",
"STRING_COUNTS_WITH_THRESHOLD", "STRING_HISTOGRAMS", "UNIQUE_N_DAY_ACTIVES";
REPORT_TYPE_UNSET = 0;
// For each event, produces the total count of occurrences of this event across
// the fleet on the report day, grouped by system profile.
// For example, a report of this type might give the total number of times
// a medium, red widget was used across the fleet yesterday.
//
// Input metric types: OCCURRENCE
//
// Local aggregation: DO NOT SET. Defaults to summing the counts for each event
// over the local aggregation period per system profile
// Local aggregation period:
// - Fuchsia reports: DO NOT SET. Defaults to 1 hour
// - Android reports: DO NOT SET. Defaults to 1 day
// Global aggregation: Sums the counts for each event on the report day per
// system profile
// System Profile Selection Policy: DO NOT SET. Defaults to REPORT_ALL
//
// Device sent observation type: IntegerObservation
// (See observation.proto)
// Output report row type: OccurrenceCountReportRow
// (See report_row.proto)
//
// ReportDefinition fields particular to this type:
// none
FLEETWIDE_OCCURRENCE_COUNTS = 11;
// For each event that is accepted during the aggregation period ending on the
// report day, produces the number of unique devices with the same system profile.
//
// "Is accepted" depends on the local aggregation procedure used:
//
// AT_LEAST_ONCE: An event is accepted if it was logged at least once during
// the aggregation period. For example, a report of this type might give the
// total number of devices with the same system profile on which a medium,
// red widget was used at least once over the seven-day period ending yesterday.
//
// SELECT_FIRST: An event is accepted only if it was the first logged event
// during the aggregation period. For example, a report of this type might
// give the total number of devices with the same system profile on which the
// first widget used during the seven-day period ending yesterday were medium-red.
//
// SELECT_MOST_COMMON: An event is accepted only if it was the most frequently
// logged event during the aggregation period. For example, a report of this
// type might give the total number of devices with the same system profile
// on which most of the widgets used during the seven-day period ending yesterday
// were medium-red.
//
// NOTE: Using a local aggregation procedure of AT_LEAST_ONCE or
// SELECT_FIRST, in combination with setting expedited_sending, results in
// the count being sent by the device when the event occurs (instead of at
// the end of the day). This can be desirable for having data for the
// current day appear faster in the reports output by Cobalt.
//
// Input metric types: OCCURRENCE
//
// Local aggregation: AT_LEAST_ONCE, SELECT_FIRST, or SELECT_MOST_COMMON
// Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
// Global aggregation: Sums the counts for each event on the report day per
// system profile
// System Profile Selection Policy: SELECT_FIRST and SELECT_LAST will maintain
// uniqueness, REPORT_ALL may be useful in
// some cases
//
// Device sent observation type: IntegerObservation with value of 1 if the
// associated event "is accepted"
// Output report row type: OccurrenceCountReportRow
// (See report_row.proto)
//
// ReportDefinition fields particular to this type:
// - local_aggregation_procedure
// - local_aggregation_period
// - expedited_sending
// - system_profile_selection
UNIQUE_DEVICE_COUNTS = 12;
// For each event, produces an int-range histogram where each bucket
// counts the number of unique devices with the same system profile, where:
//
// OCCURRENCE: an occurrence count of this event over the local aggregation
// period ending on the report day falls within the bucket range
//
// INTEGER: a locally aggregated statistic associated with this event over
// the local aggregation period ending on the report day falls
// within the bucket range
//
// For example, for the integer bucket [10, 100), a report for an OCCURRENCE
// metric might give the number of devices with the same system profile on
// which a medium, red widget was used between 10 and 100 times over the
// seven-day period ending yesterday.
//
// For the integer bucket [10, 100), a report for an INTEGER metric that
// specifies the MIN_PROCEDURE local aggregation procedure might give the
// number of devices with the same system profile on which the minimum
// temperature of a medium, red widget over the seven-day period ending
// yesterday was between 10 and 100 degrees.
//
// Input metric types: OCCURRENCE or INTEGER
//
// Local aggregation:
// - INTEGER metrics: SUM_PROCEDURE, MIN_PROCEDURE, MAX_PROCEDURE, MEAN,
// MEDIAN or PERCENTILE_N
// - OCCURRENCE metrics: DO NOT SET. Defaults to summing the counts for each
// event over the local aggregation period per
// system profile
// Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
// Global aggregation: Sums the counts in each int-range bucket for each
// event on the report day per system profile
// System Profile Selection Policy: SELECT_FIRST and SELECT_LAST will maintain
// uniqueness, REPORT_ALL may be useful in
// some cases
//
// Device sent observation type: IntegerObservation
// (See observation.proto)
// Output report row type: IntegerHistogramReportRow
// (See report_row.proto)
//
// ReportDefinition fields particular to this type:
// - local_aggregation_procedure (only when the metric type is INTEGER)
// - local_aggregation_period
// - int_buckets (this is used only on the server for reports without
// added privacy, but is used on the client for reports with added
// privacy)
// - system_profile_selection
UNIQUE_DEVICE_HISTOGRAMS = 13;
// For each event, produces an int-range histogram where each bucket
// counts the number of values, grouped by system profile, where:
//
// OCCURRENCE: an hourly occurrence count of this event falls within the
// bucket range on the report day
//
// INTEGER: an hourly statistic associated with this event falls within
// the bucket range on the report day
//
// Each unique device contributes a value every hour, 24 such values per
// device per day.
//
// Computationally this report type is identical to UNIQUE_DEVICE_HISTOGRAMS
// except that the local aggregation period used is one hour and so the
// counts in each buckets are NOT interpreted as a number of unique devices,
// but a number of hourly values.
//
// For example, for the integer bucket [10, 100), a report for an OCCURRENCE
// metric might give the number of times that the hourly count of a medium,
// red widget usages was between 10 and 100 across the fleet yesterday.
//
// For the integer bucket [10, 100), a report for an INTEGER metric that
// specifies the MIN_PROCEDURE local aggregation procedure might give the
// number of times that the minimum temperature over an hour of all medium,
// red widget usages was between 10 and 100 degrees across the fleet yesterday.
//
// Input metric types: OCCURRENCE or INTEGER
//
// Local aggregation:
// - INTEGER metrics: SUM_PROCEDURE, MIN_PROCEDURE, MAX_PROCEDURE, MEAN,
// MEDIAN or PERCENTILE_N
// - OCCURRENCE metrics: DO NOT SET. Defaults to summing the counts for each event
// over the local aggregation period per system profile
// Local aggregation period: DO NOT SET. Defaults to 1 hour
// Global aggregation: Sums the counts in each int-range bucket for each event
// on the report day per system profile
// System Profile Selection Policy: SELECT_FIRST and SELECT_LAST will maintain
// uniqueness, REPORT_ALL may be useful in
// some cases
//
// Device sent observation type: IntegerObservation
// (See observation.proto)
// Output report row type: IntegerHistogramReportRow
// (See report_row.proto)
//
// ReportDefinition fields particular to this type:
// - local_aggregation_procedure (only when the metric type is INTEGER)
// - int_buckets (this is used only on the server for reports without
// added privacy, but is used on the client for reports with added
// privacy)
// - system_profile_selection
HOURLY_VALUE_HISTOGRAMS = 14;
// For each event, produces an int-range histogram where each bucket counts
// the number of integer measurements associated with the event falls within
// the bucket range across the fleet on the report day, grouped by system
// profile.
//
// Each logged event is counted as a separate value, so a device that experiences
// many events will contribute more data than one with few events. It's impossible
// to later determine if the dataset has been skewed by one or more devices
// contributing more values because the shuffler breaks links between values
// uploaded by the same device.
//
// For example, for the integer bucket [10, 100), a report of this type might
// give the number of times that a medium, red widget's temperature was measured
// as being between 10 and 100 degrees across the fleet yesterday, regardless
// of how many temperature measurements were taken on each device individually.
//
// Input metric types: INTEGER or INTEGER_HISTOGRAM
//
// Local aggregation: DO NOT SET. Defaults to summing the counts in each int-range
// bucket for each event over the local aggregation period per
// system profile
// Local aggregation period:
// - Fuchsia reports: DO NOT SET. Defaults to 1 hour
// - Android reports: DO NOT SET. Defaults to 1 day
// Global aggregation: Sums the counts in each int-range bucket for each event on
// the report day per system profile
// System Profile Selection Policy: DO NOT SET. Defaults to REPORT_ALL
//
// Device sent observation type: IndexHistogramObservation
// (See observation.proto)
// Output report row type: IntegerHistogramReportRow
// (See report_row.proto)
//
// ReportDefinition fields particular to this type:
// - int_buckets (Only with metric_type = INTEGER)
FLEETWIDE_HISTOGRAMS = 15;
// For each event, produces the sum and count of many integer measurements
// associated with this event across the fleet on the report day, grouped by
// system profile.
//
// Each logged event is counted as a separate value, so a device that experiences
// many events will contribute more data than one with few events. This allows
// us to produce a fleetwide mean. Note, it's impossible to later determine if the
// dataset has been skewed by one or more devices contributing more values because
// the shuffler breaks links between values uploaded by the same device.
//
// For example, a report of this type might give the mean of all temperature
// measurements of a medium, red widget across the fleet, yesterday, regardless
// of how many temperature measurements were taken on each device individually.
//
// Input metric types: INTEGER
//
// Local aggregation: DO NOT SET. Defaults to accumulating the sum and count for
// each event over the local aggregation period per system
// profile
// Local aggregation period: DO NOT SET. Defaults to 1 hour
// Global aggregation: Accumulates the sum and count for each event on the report
// day per system profile
// System Profile Selection Policy: DO NOT SET. Defaults to REPORT_ALL
//
// Device sent observation type: SumAndCountObservation
// (See observation.proto)
// Output report row type: SumAndCountReportRow
// (See report_row.proto)
//
// ReportDefinition fields particular to this type:
// none
FLEETWIDE_MEANS = 16;
// For each event, produces several per-device numeric statistics (e.g.95%-ile)
// over a set of values collected across the fleet grouped by system profile,
// where each value is:
//
// OCCURRENCE: an occurrence count of this event over the local aggregation
// period ending on the report day
//
// INTEGER: a locally aggregated statistic associated with this event over
// the local aggregation period ending on the report day
//
// Each unique device contributes a single value and so the distribution
// of the values may be thought of as a distribution of unique devices.
//
// For example, a report for an OCCURRENCE metric might give the 95%-ile of
// the 7-day per-device counts of a medium, red widget usage across the fleet
// yesterday.
//
// A report for an INTEGER metric that specifies the MIN_PROCEDURE local
// aggregation procedure might give the 95%-ile of the 7-day per-device
// minimum temperature of the medium, red widget across the fleet yesterday.
//
// Input metric types: OCCURRENCE or INTEGER
//
// Local aggregation:
// - INTEGER metrics: SUM_PROCEDURE, MIN_PROCEDURE, MAX_PROCEDURE, MEAN,
// MEDIAN or PERCENTILE_N
// - OCCURRENCE metrics: DO NOT SET. Defaults to summing the counts for each
// event over the local aggregation period per system
// profile
// Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
// Global aggregation: Produces several numeric statistics from uploaded values
// for each event on the report day per system profile
// System Profile Selection Policy: DO NOT SET. Defaults to REPORT_ALL
//
// Device sent observation type: IntegerObservation
// (See observation.proto)
// Output report row type: NumericStatsReportRow
// (See report_row.proto)
//
// ReportDefinition fields particular to this type:
// - local_aggregation_procedure (only when the metric type is INTEGER)
// - local_aggregation_period
UNIQUE_DEVICE_NUMERIC_STATS = 17;
// For each event, produces several numeric statistics (e.g. 95%-ile) over a
// set of hourly values collected across the fleet grouped by system profile,
// where each values is:
//
// OCCURRENCE: an hourly occurrence count of this event on the report day
//
// INTEGER: an hourly statistics associated with this event on the report day
//
// Each unique device contributes a value every hour, 24 such values per
// device per day.
//
// Computationally this report type is identical to
// UNIQUE_DEVICE_NUMERIC_STATS except that the local aggregation period
// used is one hour and so the distribution of the values are NOT
// interpreted as a distribution of unique devices, but a distribution of
// hourly values.
//
// For example a report for OCCURRENCE metric might give the 95%-ile of the
// hourly count of medium, red widget usages across the fleet yesterday.
//
// A report for INTEGER metric that specifies the MIN_PROCEDURE local
// aggregation procedure might give the 95%-ile of the hourly minimum
// temperature of a medium, red widgets usages across the fleet yesterday
//
// Input metric types: OCCURRENCE or INTEGER
//
// Local aggregation:
// - INTEGER metrics: SUM_PROCEDURE, MIN_PROCEDURE, MAX_PROCEDURE, MEAN,
// MEDIAN or PERCENTILE_N
// - OCCURRENCE metrics: DO NOT SET. Defaults to summing the counts for each
// event over the local aggregation period per system
// profile
// Local aggregation period: DO NOT SET. Defaults to 1 hour
// Global aggregation: Produces several numeric statistics from uploaded values
// for each event on the report day per system profile
// System Profile Selection Policy: DO NOT SET. Defaults to REPORT_ALL
//
// Device sent observation type: IntegerObservation
// (See observation.proto)
// Output report row type: NumericStatsReportRow
// (See report_row.proto)
//
// ReportDefinition fields particular to this type:
// - local_aggregation_procedure (only when the metric type is INTEGER)
HOURLY_VALUE_NUMERIC_STATS = 18;
// For each event, produces the total count of occurrences of a string value
// associated with this event across the fleet on the report day, grouped by
// system profile.
//
// Input metric types: STRING
//
// Local aggregation: DO NOT SET. Defaults to summing the counts in each string
// bucket (where the key is the hash of the logged string)
// for each event over the local aggregation period per
// system profile
// Local aggregation period:
// - Fuchsia reports: DO NOT SET. Defaults to 1 hour
// - Android reports: DO NOT SET. Defaults to 1 day
// Global aggregation: Sums the counts for each candidate string for each event
// on the report day per system profile
// System Profile Selection Policy: DO NOT SET. Defaults to REPORT_ALL
//
// Device sent observation type: StringHistogramObservation
// (See observation.proto)
// Output report row type: StringCountReportRow
// (See report_row.proto)
//
// ReportDefinition fields particular to this type:
// - string_buffer_max
STRING_COUNTS = 20;
// For each event and candidate string, produces the number of unique devices
// with the same system profile on which this string was logged, associated
// with this event during the aggregation period ending on the report day,
// which must be DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
//
// This is similar to the AT_LEAST_ONCE local aggregation procedure for
// UNIQUE_DEVICE_COUNTS. For example, a report of this type might
// give the total number of devices with the same system profile on which
// a medium, red widget was used in conjunction with the component name
// "widget-consumer" at least once in the seven-day period ending
// yesterday.
//
// Input metric types: STRING
//
// Local aggregation procedure: DO NOT SET. For each event, increments the count
// by 1 for the string bucket if this string is
// logged at least once over the local aggregation
// period per system profile
// Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
// Global aggregation: Sums the counts for each candidate string for each event
// on the report day per system profile
// System Profile Selection Policy: SELECT_FIRST and SELECT_LAST will maintain
// uniqueness, REPORT_ALL may be useful in
// some cases
//
// Device sent observation type: StringHistogramObservation
// (See observation.proto)
// Output report row type: StringCountReportRow
// (See report_row.proto)
//
// ReportDefinition fields particular to this type:
// - local_aggregation_period
// - string_buffer_max
// - system_profile_selection
UNIQUE_DEVICE_STRING_COUNTS = 21;
// Experimental. DO NOT USE!
// Input metric types: STRUCT
//
// Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30
//
// ReportDefinition fields particular to this type:
// - aggregates: (1 or more)
STRUCT = 22;
}
ReportType report_type = 3;
//////////////// Fields for reports with privacy enabled /////////////////
// When reporting numerical values with privacy, the values are mapped to
// indices from 0 to num_index_points-1 with a randomized rounding method.
//
// In the future, the value of this field will be computed by the registry
// parser as a function of other privacy-related fields and an estimate of the
// user population size. For now, it should be set manually in the Cobalt
// registry in consultation with the Cobalt team.
//
// TODO(https://fxbug.dev/278932979): update this comment once the field is populated by
// the registry parser.
uint32 num_index_points = 22;
// When reporting strings with privacy, the strings are counted using a linear
// sketch.
//
// In the future, the value of this field will be computed by the registry
// parser as a function of other privacy-related fields and an estimate of the
// user population size. For now, it should be set manually in the Cobalt
// registry in consultation with the Cobalt team.
//
// TODO(https://fxbug.dev/278932979): update this comment once the field is populated by
// the registry parser.
StringSketchParameters string_sketch_params = 27;
// These fields specify the range of values that can be reported by a device
// in the specified local_aggregation_period. If the true value to be reported
// falls outside specified range, the value is clipped.
//
// For FLEETWIDE_OCCURRENCE_COUNTS, UNIQUE_DEVICE_NUMERIC_STATS and
// HOURLY_VALUE_NUMERIC_STATS, the range applies to the total numerical value
// computed for the device over the aggregation period specified in the
// report.
//
// For FLEETWIDE_MEANS, the range applies to the per-device sum of the value
// to be averaged over one hour. (For FLEETWIDE_MEANS, the `max_count` field
// is also required in order to bound the `count` value.)
//
// If a privacy_mechanism other than DE_IDENTIFICATION is specified, this field is
// required for reports of type:
// * FLEETWIDE_OCCURRENCE_COUNTS
// * UNIQUE_DEVICE_NUMERIC_STATS
// * HOURLY_VALUE_NUMERIC_STATS
// * FLEETWIDE_MEANS
int64 min_value = 23;
int64 max_value = 24;
// This field specifies the maximum count to be reported by a device in the
// specified local_aggregation_period. If the true count is greater than
// max_count, then the count will be reported as max_count.
//
// For FLEETWIDE_HISTOGRAMS, the bound applies to the count for each
// individual histogram bucket over the aggregation period of one hour. For
// STRING_COUNTS, it applies to the count for each string over one hour.
//
// For FLEETWIDE_MEANS, the bound applies to the per-device count of the
// values to be averaged over one hour.
//
// If a privacy_mechanism other than DE_IDENTIFICATION is specified, this field is
// required for reports of type:
// * FLEETWIDE_HISTOGRAMS
// * FLEETWIDE_MEANS
// * STRING_COUNTS
uint64 max_count = 25;
//////////////// Fields specific to some report types /////////////////
// A specification of integer-range buckets for a histogram.
//
// This field is for reports of type UNIQUE_DEVICE_HISTOGRAMS,
// HOURLY_VALUE_HISTOGRAMS, and FLEETWIDE_HISTOGRAMS -- but for
// FLEETWIDE_HISTOGRAMS only with metrics of type INTEGER, not with metrics of
// type INTEGER_HISTOGRAM, because in that case the MetricDefinition already
// contains an instance of IntegerBuckets.
IntegerBuckets int_buckets = 10;
// The interval with which clients will generate and upload observations.
enum ReportingInterval {
REPORTING_INTERVAL_UNSET = 0;
HOURS_1 = 1;
DAYS_1 = 2;
}
// This field is optional for FLEETWIDE_OCCURRENCE_COUNTS reports, and is only
// supported by some client platforms. If not set, the reporting interval
// defaults to 1 hour for FLEETWIDE_OCCURRENCE_COUNTS reports.
ReportingInterval reporting_interval = 32;
// This field can be used with all Report types. When set, the generated
// report will exclude an Observation if there are not at least
// |reporting_threshold| number of distinct devices reporting Observations
// with the same ObservationMetadata.
uint32 reporting_threshold = 13;
// The on-device function used to aggregate logged data over the local aggregation
// period.
enum LocalAggregationProcedure {
LOCAL_AGGREGATION_PROCEDURE_UNSET = 0;
// Numerical statistic aggregation procedures to be used with reports
// of type UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS,
// UNIQUE_DEVICE_NUMERIC_STATS and HOURLY_VALUE_NUMERIC_STATS for INTEGER metric.
// TODO(https://fxbug.dev/42168241): Rename these to remove the '_PROCEDURE' suffix.
SUM_PROCEDURE = 1;
MIN_PROCEDURE = 2;
MAX_PROCEDURE = 3;
MEAN = 4;
MEDIAN = 5;
// The value of N is set in the field
// |local_aggregation_procedure_percentile_n|.
PERCENTILE_N = 6;
// Logical aggregation procedures to be used with reports of type
// UNIQUE_DEVICE_COUNTS
AT_LEAST_ONCE = 7;
SELECT_FIRST = 8;
SELECT_MOST_COMMON = 9;
}
// This field is required for reports of type UNIQUE_DEVICE_COUNTS, and for reports
// of type UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS,
// UNIQUE_DEVICE_NUMERIC_STATS, HOURLY_VALUE_NUMERIC_STATS for INTEGER metric.
// Different report types support different values of this field. See the comments
// on the enum values in LocalAggregationProcedure.
LocalAggregationProcedure local_aggregation_procedure = 17;
// This field is required when
// local_aggregation_procedure = LOCAL_AGGREGATION_PROCEDURE_PERCENTILE_N.
// In this case it gives the value of N to use. Otherwise this field is
// ignored.
uint32 local_aggregation_procedure_percentile_n = 18;
// Time window over which the logged data is aggregated. The local aggregation
// period is specified for UNIQUE_DEVICE_* report types.
WindowSize local_aggregation_period = 19;
// The maximum number of distinct event vectors for which an instance of the Cobalt
// client should produce an observation, for a given local aggregation period. Event
// vectors are prioritized in order of first arrival during the aggregation period.
//
// For example, if a report has an event_vector_buffer_max of 10, and 12 distinct event
// vectors are logged for this metric over an aggregation period, then Cobalt will send
// observations of the first 10 event vectors for that aggregation period and drop the
// last 2.
//
// If this field is unset, the registry parser assigns to it the total number of event
// vectors for the report's parent metric (i.e., the product over all metric dimensions
// of the number of event codes per dimension).
//
// The report's project will be charged against a resource budget for this value
// so project owners are encouraged to set this as small as possible. For example,
// the report's parent metric may include a dimension with thousands of event codes,
// but it is expected that any one device will log only a few distinct event vectors
// per day. In that case we may set event_vector_buffer_max to a relatively small number,
// say 20. For reports which use differential privacy, setting event_vector_buffer_max
// to a smaller number will improve the signal for event vectors which are included in
// observations.
uint64 event_vector_buffer_max = 26;
// The maximum number of distinct strings that Cobalt must keep in its in-memory buffer
// on any single device. During local aggregation for reports of type STRING_COUNTS and
// UNIQUE_DEVICE_STRING_COUNTS, Cobalt will keep track of this many distinct strings per
// aggregation period. The report's project will be charged against a resource budget for this
// value so project owners are encouraged to set this as small as possible. A STRING metric
// includes a file of candidate strings that may contain many thousands of strings. But it is
// expected that any one device will log only a few of these strings per day. We may set
// string_buffer_max to a relatively small number, say 20.
//
// This is a required field for reports of type STRING_COUNTS and UNIQUE_DEVICE_STRING_COUNTS.
uint32 string_buffer_max = 28;
// For reports of type UNIQUE_DEVICE_COUNTS, send observations as soon as the
// event occurs, instead of waiting for the end of the day.
//
// This can only be enabled when using a local aggregation procedure of
// AT_LEAST_ONCE or SELECT_FIRST, and when the privacy level is
// NO_ADDED_PRIVACY. When used with a system_profile_selection of REPORT_ALL
// or SELECT_FIRST, enabling this is recommended as Cobalt will send the count
// for the current day when the event occurs instead of at the end of the day.
// For a system_profile_selection of SELECT_LAST, this may also be desirable,
// though it may result in a slight change in the current day's system profile
// that is used, as Cobalt won't wait until the end of the day to determine
// the final system profile, but will instead send the count immediately with
// the system profile that is currently active on the device.
bool expedited_sending = 29;
// Experimental. DO NOT USE!
// A specification of an aggregate for a STRUCT report.
message AggregateDefinition {
uint32 id = 1;
// If this field is left unspecified, the value of for will be used.
// If for is not specified, this field is required.
string name = 2;
// This field must match the name of one of the corresponding metric's struct fields.
string for = 3;
// This field is filled in by the registry parser based on the value of for.
uint32 for_id = 4;
enum AggregationFunction {
UNSPECIFIED_AGGREGATION_FUNCTION = 0;
// local usage:
// - Can be used with BOOL, ENUM or STRING fields.
//
// global usage:
// - Requires the local aggregation function to be GROUP_BY.
GROUP_BY = 1;
// local usage:
// - COUNT counts the number of occurrences of groups.
// - |for| must be left unspecified.
//
// global usage:
// - Counts the number of devices in each group. The local aggregation function must be left
// unspecified.
COUNT = 2;
// global usage:
// - Requires the local aggregation function to return a numeric value.
SUM = 3;
// global usage:
// - Requires the local aggregation function to return a numeric value.
MEAN = 4;
}
// The on-device function computed on the metric during the aggregation window.
AggregationFunction local = 5;
// The server-side function computed on data received from devices.
AggregationFunction global = 6;
// Maximum of distinct values that Cobalt must keep on any single device for this GROUP_BY
// aggregate. The first distinct_values_max_per_device values seen by the device are kept
// and subsequent ones are discarded.
//
// This is a required field for aggregates of string fields.
uint64 distinct_values_max_per_device = 7;
// The path to a list of candidate strings for a GROUP_BY aggregate referring to a STRING field.
// The path should be relative to the root of the Cobalt registry, for
// instance "$CUSTOMER/$PROJECT/candidate_strings.txt". String candidate
// files should ideally be placed in the same registry and directory as the
// project that uses it.
//
// The empty string, "", is added to the candidate list implicitly by Cobalt
// and does not need to be added to the list.
//
// If string_candidate_file is not set for the corresponding metric field, this field is
// required.
string string_candidate_file = 8;
}
// Experimental. DO NOT USE!
// A list of aggregates for a STRUCT report.
repeated AggregateDefinition aggregates = 33;
/////////////////// Fields used by all report types ///////////////////
// Next id: 109
// The list of SystemProfileFields to include in each row of the report.
// Optional.
repeated SystemProfileField system_profile_field = 100;
// The list of Experiments to include in each row of the report.
//
// Each report row lists the intersection of the experiment ids active on the device and
// experiment ids specified in this field.
//
// The specified experiment ids must be found in one of the project's experiments_namespaces.
repeated int64 experiment_id = 104;
// If set to true, guarantees that each row in the output report contains a single
// experiment id in its system profile. This allows a single report to be used
// for multiple experiments without compromising privacy.
//
// This field is not supported on Fuchsia.
bool single_experiment_id_per_row = 121;
// The number of cookie buckets to be used by the report.
//
// Must be 0 (disabled) or 20, which required the report to be for an experiment.
int32 num_cookie_buckets = 109;
// String defining the scope of cookie buckets.
//
// Must be set if cookie bucket is enabled for the report. Must be the same for all reports
// that require a device use the same cookie buckets, e.g. multiple reports for the same
// experiment, and should be different for reports without this requirement.
string cookie_bucket_scope = 120;
// This field is required for reports of type UNIQUE_DEVICE_COUNTS,
// UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and
// HOURLY_VALUE_HISTOGRAMS. The value for these reports must be SELECT_LAST,
// SELECT_FIRST, or occasionally REPORT_ALL. For report types other than those
// listed above, this field must not be set and is implicitly REPORT_ALL.
//
// If the system profile value changed during the aggregation window specified
// for this report, system_profile_selection specifies which system profile to
// report for each device.
SystemProfileSelectionPolicy system_profile_selection = 103;
// Maximum ReleaseStage for which this Report is allowed to be collected.
ReleaseStage max_release_stage = 105;
// Report can be collected even if the user/device has not consented.
// This field can only be set to true on reports that use privacy mechanisms
// that include differential privacy (i.e. not DE_IDENTIFICATION). The use of
// this field is for collecting anonymized data that is allowed even when
// the consent is not given. These use cases need to be specially approved
// by privacy reviewers.
bool exempt_from_consent = 108;
// New Privacy API
// This enum identifies what privacy protection is applied to the report.
enum PrivacyMechanism {
PRIVACY_MECHANISM_UNSPECIFIED = 0;
// If you specify this value the data will be de-identified without
// additional privacy protections.
DE_IDENTIFICATION = 1;
// If you specify this value the data will be protected with Shuffled
// Differential Privacy guarantees (e.g., the noise wll be added on the
// devices)
SHUFFLED_DIFFERENTIAL_PRIVACY = 2;
}
// This field identifies what privacy protection is applied to the report.
PrivacyMechanism privacy_mechanism = 106;
// The object for grouping all parameters needed for SHUFFLED DP mode.
message ShuffledDifferentialPrivacyConfig {
// This field represents an upper bound on the amount of information which
// can be learned about a device from a report including that device.
// Lower values correspond to higher privacy.
// Epsilon must be > 0.
double epsilon = 1;
// This field represents the risk of the epsilon guarantee not holding. This
// is usually set as 1 over the expected number of participating devices.
// Delta must be > 0 and < 1.
double delta = 2;
// The generated report will exclude an Observation if there are not at
// least |reporting_threshold| number of distinct devices reporting
// Observations with the same ObservationMetadata.
uint32 reporting_threshold = 3;
// The mean number of observations added per index point when performing the
// Poisson mechanism encoding for Cobalt reports. Required.
//
// In the future, the value of this field will be computed by the registry
// parser as a function of other fields in this
// ShuffledDifferentialPrivacyConfig. For now, it should be set manually in
// the Cobalt registry in consultation with the Cobalt team.
//
// TODO(https://fxbug.dev/295053509): update this comment once the field is auto populated by
// the registry parser.
double poisson_mean = 4;
// If true, skip validating the |poisson_mean| value. This is meant to be used only
// in end-to-end tests where the traffic volume would not allow a reasonable |poisson_mean|.
bool skip_poisson_mean_validation_test_only = 5;
// The report fields that affect a how a device participates in a private
// report.
//
// Note, this field is populated by the registry parser and must not be set
// manually.
DevicePrivacyDependencySet device_privacy_dependency_set = 6;
// Captures the report fields a device _must_ use in order to properly
// make contributions, real and fabricated, to a report.
//
// Any report field that changes how observations are encoded or noise is
// fabricated must result in a new value.
enum DevicePrivacyDependencySet {
DEVICE_PRIVACY_DEPENDENCY_SET_UNSET = 0;
// Captures:
// - Fields that affect index points include:
// * metric dimensions
// * num_index_points
// * string_sketch_params
// * min_value
// * max_value
// * max_count
// * int_buckets
//
// - Fields that affect sparsity are:
// * event_vector_buffer_max
// * string_buffer_max
//
// - poisson_mean
V1 = 1;
}
}
// If privacy_mechanism is SHUFFLED_DIFFERENTIAL_PRIVACY then privacy_config
// must contain valid ShuffledDifferentialPrivacyConfig otherwise empty.
oneof privacy_config {
ShuffledDifferentialPrivacyConfig shuffled_dp = 107;
}
}
// A specification for SystemProfile selection policy.
enum SystemProfileSelectionPolicy {
// Use the default value. For reports of type FLEETWIDE_OCCURRENCE_COUNTS,
// FLEETWIDE_HISTOGRAMS, FLEETWIDE_MEANS, UNIQUE_DEVICE_NUMERIC_STATS,
// HOURLY_VALUE_NUMERIC_STATS, and STRING_COUNTS this will resolve to
// 'REPORT_ALL' and should not be changed. This must be set for all other
// report types.
SYSTEM_PROFILE_SELECTION_POLICY_UNSET = 0;
// Always report the last SystemProfile seen in the aggregation window. This
// will be the last SystemProfile seen *at the time of an event* in the
// aggregation window.
SELECT_LAST = 1;
// Always report the first SystemProfile seen in the aggregation window. This
// will be the first SystemProfile seen *at the time of an event* in the
// aggregation window.
SELECT_FIRST = 2;
// Report all system profiles in the aggregation window. For most report
// types, this is the most sensible value to use. For reports that depend on
// some concept of uniqueness (such as UNIQUE_DEVICE_COUNTS,
// UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and
// HOURLY_VALUE_HISTOGRAMS) this may not be the best choice, since it will no
// longer be the case that a single device will only upload one observation
// per time period (It will upload one observation per time period *per unique
// system_profile*).
REPORT_ALL = 3;
}
// A specification of a field from SystemProfile. These are used in a
// ReportDefinition to specify which fields should be included in the generated
// Observations and reports.
//
// For a description of the meaning of each field, see the fields in the
// SystemProfile in: src/pb/common.proto
enum SystemProfileField {
OS = 0;
ARCH = 1;
BOARD_NAME = 2;
PRODUCT_NAME = 3;
SYSTEM_VERSION = 4;
APP_VERSION = 10;
CHANNEL = 5;
BUILD_TYPE = 7;
EXPERIMENT_IDS = 9;
COOKIE_BUCKET = 12;
reserved 6, 8, 11;
reserved "REALM", "EXPERIMENT_TOKENS", "COOKIE_BUCKET_ID";
}
// Stages in the release cycle of a component. Each Cobalt customer determines
// its current ReleaseStage when initializing the CobaltService. Each Metric
// and Report can declare the maximum ReleaseStage for which it is allowed to
// be collected. For example a DEBUG Metric will not be collected from a device
// running a FISHFOOD release.
enum ReleaseStage {
RELEASE_STAGE_NOT_SET = 0;
// A test build. Also called "eng". Only use this value when the device is
// running test builds as all metrics/reports will be collected.
DEBUG = 10;
// Small, internal prototype. Used for testing a new feature internally,
// usually within the team or a small group.
FISHFOOD = 20;
// An internal release for testing with internal users.
DOGFOOD = 40;
// An open beta, for testing with internal and external users.
OPEN_BETA = 60;
// Generally-available. The final stage of a release. Also called
// "production". If unsure of which release stage the device is running, it
// is safest to fallback to this value (which is the default if no value is
// set), to avoid inadvertently collecting metric/report data.
GA = 99;
}
// ExponentialIntegerBuckets is used to define a partition of the integers into
// a finite number of exponentially increasing buckets.
//
// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1.
//
// The bucket boundaries are:
// a[0] = floor
// a[1] = floor + initial_step
// a[2] = floor + initial_step * step_multiplier_float
// a[3] = floor + initial_step * step_multiplier_float ^ 2
// a[4] = floor + initial_step * step_multiplier_float ^ 3
// and in general, for i = 1, 2, 3 ... n
// a[i] = floor + initial_step * step_multiplier_float ^ (i-1)
//
// Then, the buckets are defined as follows:
// Bucket 0 is the underflow bucket: (-infinity, floor)
// Bucket i for 0 < i < n+1: [ceiling(a[i-1]), ceiling(a[i]))
// Bucket n+1 is the overflow bucket: [ceiling(a[n]), +infinity)
//
// Examples:
// floor = 0
// num_buckets = 3
// initial_step = 10
// step_multiplier_float = 10
// Then, the buckets are:
// (-infinity, 0), [0, 10), [10, 100), [100, 1000), [1000, +infinity)
//
// floor = 0
// num_buckets = 3
// initial_step = 2
// step_multiplier_float = 2
// Then, the buckets are:
// (-infinity, 0), [0, 2), [2, 4), [4, 8), [8, +infinity)
//
// floor = 10
// num_buckets = 3
// initial_step = 2
// step_multiplier_float = 2
// Then, the buckets are:
// (-infinity, 10), [10, 12), [12, 14), [14, 18), [18, +infinity)
//
// floor = 0
// num_buckets = 3
// initial_step = 100
// step_multiplier_float = 10
// Then, the buckets are:
// (-infinity, 0), [0, 100), [100, 1000), [1000, 10000), [10000, +infinity)
//
// When the step_multiplier_float is between 1 and 2, some buckets will have a width of zero
// after the bucket bounds are rounded up. These zero-width buckets are considered "impossible"
// because they shouldn't have any data. Any data that falls into these impossible buckets, whether
// it's due to reporting errors or generated noise, is discarded.
//
// Examples:
// floor = 0
// num_buckets = 3
// initial_step = 1
// step_multiplier_float = 1.1
// Then, the pre-ceiling buckets are:
// (-infinity, 0), [0, 1), [1, 1.10), [1.10, 1.21), [1.21, +infinity)
// the buckets after ceiling are:
// (-infinity, 0), [0, 1), [1, 2), ^[2, 2)^, [2, +infinity)
//
// floor = 0
// num_buckets = 3
// initial_step = 10
// step_multiplier_float = 1.1
// Then, the pre-ceiling buckets are:
// (-infinity, 0), [0, 10), [10, 11.00), [11.00, 12.10), [12.10, +infinity)
// the buckets after ceiling are:
// (-infinity, 0), [0, 10), [10, 11), [11, 13), [13, +infinity)
message ExponentialIntegerBuckets {
reserved 4;
reserved "step_multiplier";
int64 floor = 1;
// num_buckets must be at least 1.
uint32 num_buckets = 2;
// Must be at least one.
uint32 initial_step = 3;
// Must be greater than one.
float step_multiplier_float = 5;
}
// LinearIntegerBuckets is used to define a partition of the integers into a
// finite number of buckets of equal size.
//
// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1.
// Bucket 0 is the underflow bucket: (-infinity, floor)
// Bucket n+1 is the overflow bucket: [lower + step_size * n, +infinity)
//
// For i = 1 to n, the bucket i is defined as
// [floor + step_size * (i-1), floor + step_size * i)
//
// Example: floor = 0, num_buckets = 3, step_size = 10.
// (-infinity, 0), [0, 10), [10, 20), [20, 30), [30, +infinity)
message LinearIntegerBuckets {
int64 floor = 1;
// Must be at least one.
uint32 num_buckets = 2;
// Must be at least one.
uint32 step_size = 3;
}
message IntegerBuckets {
oneof buckets {
ExponentialIntegerBuckets exponential = 1;
LinearIntegerBuckets linear = 2;
}
// If set to true, empty buckets will not be added to the report data such
// that all histograms contain a row for every bucket. Buckets with a zero
// count may still occur if data is logged that contains a zero count. This
// field can not be set on reports with added privacy.
bool sparse_output = 3;
}
message StringSketchParameters {
// Number of hashes in Count-Min Sketch.
int32 num_hashes = 1;
// Number of cells per hash in Count-Min Sketch.
int32 num_cells_per_hash = 2;
}