| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| syntax = "proto3"; |
| |
| package cobalt; |
| |
| import "src/registry/window_size.proto"; |
| |
| option go_package = "src/registry;config"; |
| |
| // A Report analyzes Events that were logged to Cobalt and emits an aggregated |
| // output that may then be queried or visualized by an analyst user of Cobalt. |
| // |
| // A Report is associated with a Metric and this means that the Report analyzes |
| // the Events that were logged to that Metric. The first step occurs on a |
| // device where Cobalt analyzes the logged Events in order to form Observations. |
| // |
| // An Observation is built for a particular Report. The type of observation, |
| // including which of several privacy-oriented Encodings is used or not, depends |
| // on the Report type. |
| // |
| // The Observations are sent to the Cobalt Shuffler which shuffles them in order |
| // to break linkability between Observations and linkability with the |
| // originating device. Next the shuffled Observations are sent to the Analyzer |
| // which aggregates Observations from all devices in order to generate a report. |
| // |
| // There are multiple types of Metrics and multiple types of Reports. Each |
| // Report type is compatible with only some of the Metric types. |
| // |
| // A ReportDefinition defines a Cobalt Report to be generated. |
| // An instance of ReportDefinition is always associated with an instance of |
| // MetricDefinition called the owning MetricDefinition. |
| // Next ID: 122 |
| message ReportDefinition { |
| reserved 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 20, 21, 30, 31, 101, 102; |
| reserved "aggregation_type", "aggregation_window", "candidate_lis", "dp_release_config", |
| "expected_population_size", "expected_string_set_size", "export_location_override", |
| "local_privacy_noise_level", "output_location", "percentiles", "threshold", "window_size", |
| "use_poisson_mechanism_for_privacy", "prob_bit_flip", "candidate_file", "privacy_level", |
| "poisson_mean"; |
| |
| // Unique name for this Report within its owning MetricDefinition. |
| // The name must obey the syntax of a C variable name and must have length |
| // at most 64. The integer |id| field is the stable identifier for a report |
| // so this name may be changed. However doing this may affect the |
| // names and locations of some artifacts produced by Cobalt's report |
| // generation pipeline. |
| string report_name = 1; |
| |
| // The unique integer ID for this report within its owning metric. |
| // The user must manually set this |id| field. This is the stable identifier |
| // for a report and should not be changed once data collection begins. |
| uint32 id = 2; |
| |
| // A Report has one of the following types. |
| // Next standard report type ID: 23 |
| enum ReportType { |
| reserved 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 9999; |
| reserved "CUSTOM_RAW_DUMP", "EVENT_COMPONENT_OCCURRENCE_COUNT", "HIGH_FREQUENCY_STRING_COUNTS", |
| "INT_RANGE_HISTOGRAM", "NUMERIC_AGGREGATION", "NUMERIC_PERF_RAW_DUMP", |
| "PER_DEVICE_HISTOGRAM", "PER_DEVICE_NUMERIC_STATS", "SIMPLE_OCCURRENCE_COUNT", |
| "STRING_COUNTS_WITH_THRESHOLD", "STRING_HISTOGRAMS", "UNIQUE_N_DAY_ACTIVES"; |
| |
| REPORT_TYPE_UNSET = 0; |
| |
| // For each event, produces the total count of occurrences of this event across |
| // the fleet on the report day, grouped by system profile. |
| // For example, a report of this type might give the total number of times |
| // a medium, red widget was used across the fleet yesterday. |
| // |
| // Input metric types: OCCURRENCE |
| // |
| // Local aggregation: DO NOT SET. Defaults to summing the counts for each event |
| // over the local aggregation period per system profile |
| // Local aggregation period: |
| // - Fuchsia reports: DO NOT SET. Defaults to 1 hour |
| // - Android reports: DO NOT SET. Defaults to 1 day |
| // Global aggregation: Sums the counts for each event on the report day per |
| // system profile |
| // System Profile Selection Policy: DO NOT SET. Defaults to REPORT_ALL |
| // |
| // Device sent observation type: IntegerObservation |
| // (See observation.proto) |
| // Output report row type: OccurrenceCountReportRow |
| // (See report_row.proto) |
| // |
| // ReportDefinition fields particular to this type: |
| // none |
| FLEETWIDE_OCCURRENCE_COUNTS = 11; |
| |
| // For each event that is accepted during the aggregation period ending on the |
| // report day, produces the number of unique devices with the same system profile. |
| // |
| // "Is accepted" depends on the local aggregation procedure used: |
| // |
| // AT_LEAST_ONCE: An event is accepted if it was logged at least once during |
| // the aggregation period. For example, a report of this type might give the |
| // total number of devices with the same system profile on which a medium, |
| // red widget was used at least once over the seven-day period ending yesterday. |
| // |
| // SELECT_FIRST: An event is accepted only if it was the first logged event |
| // during the aggregation period. For example, a report of this type might |
| // give the total number of devices with the same system profile on which the |
| // first widget used during the seven-day period ending yesterday were medium-red. |
| // |
| // SELECT_MOST_COMMON: An event is accepted only if it was the most frequently |
| // logged event during the aggregation period. For example, a report of this |
| // type might give the total number of devices with the same system profile |
| // on which most of the widgets used during the seven-day period ending yesterday |
| // were medium-red. |
| // |
| // NOTE: Using a local aggregation procedure of AT_LEAST_ONCE or |
| // SELECT_FIRST, in combination with setting expedited_sending, results in |
| // the count being sent by the device when the event occurs (instead of at |
| // the end of the day). This can be desirable for having data for the |
| // current day appear faster in the reports output by Cobalt. |
| // |
| // Input metric types: OCCURRENCE |
| // |
| // Local aggregation: AT_LEAST_ONCE, SELECT_FIRST, or SELECT_MOST_COMMON |
| // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. |
| // Global aggregation: Sums the counts for each event on the report day per |
| // system profile |
| // System Profile Selection Policy: SELECT_FIRST and SELECT_LAST will maintain |
| // uniqueness, REPORT_ALL may be useful in |
| // some cases |
| // |
| // Device sent observation type: IntegerObservation with value of 1 if the |
| // associated event "is accepted" |
| // Output report row type: OccurrenceCountReportRow |
| // (See report_row.proto) |
| // |
| // ReportDefinition fields particular to this type: |
| // - local_aggregation_procedure |
| // - local_aggregation_period |
| // - expedited_sending |
| // - system_profile_selection |
| UNIQUE_DEVICE_COUNTS = 12; |
| |
| // For each event, produces an int-range histogram where each bucket |
| // counts the number of unique devices with the same system profile, where: |
| // |
| // OCCURRENCE: an occurrence count of this event over the local aggregation |
| // period ending on the report day falls within the bucket range |
| // |
| // INTEGER: a locally aggregated statistic associated with this event over |
| // the local aggregation period ending on the report day falls |
| // within the bucket range |
| // |
| // For example, for the integer bucket [10, 100), a report for an OCCURRENCE |
| // metric might give the number of devices with the same system profile on |
| // which a medium, red widget was used between 10 and 100 times over the |
| // seven-day period ending yesterday. |
| // |
| // For the integer bucket [10, 100), a report for an INTEGER metric that |
| // specifies the MIN_PROCEDURE local aggregation procedure might give the |
| // number of devices with the same system profile on which the minimum |
| // temperature of a medium, red widget over the seven-day period ending |
| // yesterday was between 10 and 100 degrees. |
| // |
| // Input metric types: OCCURRENCE or INTEGER |
| // |
| // Local aggregation: |
| // - INTEGER metrics: SUM_PROCEDURE, MIN_PROCEDURE, MAX_PROCEDURE, MEAN, |
| // MEDIAN or PERCENTILE_N |
| // - OCCURRENCE metrics: DO NOT SET. Defaults to summing the counts for each |
| // event over the local aggregation period per |
| // system profile |
| // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. |
| // Global aggregation: Sums the counts in each int-range bucket for each |
| // event on the report day per system profile |
| // System Profile Selection Policy: SELECT_FIRST and SELECT_LAST will maintain |
| // uniqueness, REPORT_ALL may be useful in |
| // some cases |
| // |
| // Device sent observation type: IntegerObservation |
| // (See observation.proto) |
| // Output report row type: IntegerHistogramReportRow |
| // (See report_row.proto) |
| // |
| // ReportDefinition fields particular to this type: |
| // - local_aggregation_procedure (only when the metric type is INTEGER) |
| // - local_aggregation_period |
| // - int_buckets (this is used only on the server for reports without |
| // added privacy, but is used on the client for reports with added |
| // privacy) |
| // - system_profile_selection |
| UNIQUE_DEVICE_HISTOGRAMS = 13; |
| |
| // For each event, produces an int-range histogram where each bucket |
| // counts the number of values, grouped by system profile, where: |
| // |
| // OCCURRENCE: an hourly occurrence count of this event falls within the |
| // bucket range on the report day |
| // |
| // INTEGER: an hourly statistic associated with this event falls within |
| // the bucket range on the report day |
| // |
| // Each unique device contributes a value every hour, 24 such values per |
| // device per day. |
| // |
| // Computationally this report type is identical to UNIQUE_DEVICE_HISTOGRAMS |
| // except that the local aggregation period used is one hour and so the |
| // counts in each buckets are NOT interpreted as a number of unique devices, |
| // but a number of hourly values. |
| // |
| // For example, for the integer bucket [10, 100), a report for an OCCURRENCE |
| // metric might give the number of times that the hourly count of a medium, |
| // red widget usages was between 10 and 100 across the fleet yesterday. |
| // |
| // For the integer bucket [10, 100), a report for an INTEGER metric that |
| // specifies the MIN_PROCEDURE local aggregation procedure might give the |
| // number of times that the minimum temperature over an hour of all medium, |
| // red widget usages was between 10 and 100 degrees across the fleet yesterday. |
| // |
| // Input metric types: OCCURRENCE or INTEGER |
| // |
| // Local aggregation: |
| // - INTEGER metrics: SUM_PROCEDURE, MIN_PROCEDURE, MAX_PROCEDURE, MEAN, |
| // MEDIAN or PERCENTILE_N |
| // - OCCURRENCE metrics: DO NOT SET. Defaults to summing the counts for each event |
| // over the local aggregation period per system profile |
| // Local aggregation period: DO NOT SET. Defaults to 1 hour |
| // Global aggregation: Sums the counts in each int-range bucket for each event |
| // on the report day per system profile |
| // System Profile Selection Policy: SELECT_FIRST and SELECT_LAST will maintain |
| // uniqueness, REPORT_ALL may be useful in |
| // some cases |
| // |
| // Device sent observation type: IntegerObservation |
| // (See observation.proto) |
| // Output report row type: IntegerHistogramReportRow |
| // (See report_row.proto) |
| // |
| // ReportDefinition fields particular to this type: |
| // - local_aggregation_procedure (only when the metric type is INTEGER) |
| // - int_buckets (this is used only on the server for reports without |
| // added privacy, but is used on the client for reports with added |
| // privacy) |
| // - system_profile_selection |
| HOURLY_VALUE_HISTOGRAMS = 14; |
| |
| // For each event, produces an int-range histogram where each bucket counts |
| // the number of integer measurements associated with the event falls within |
| // the bucket range across the fleet on the report day, grouped by system |
| // profile. |
| // |
| // Each logged event is counted as a separate value, so a device that experiences |
| // many events will contribute more data than one with few events. It's impossible |
| // to later determine if the dataset has been skewed by one or more devices |
| // contributing more values because the shuffler breaks links between values |
| // uploaded by the same device. |
| // |
| // For example, for the integer bucket [10, 100), a report of this type might |
| // give the number of times that a medium, red widget's temperature was measured |
| // as being between 10 and 100 degrees across the fleet yesterday, regardless |
| // of how many temperature measurements were taken on each device individually. |
| // |
| // Input metric types: INTEGER or INTEGER_HISTOGRAM |
| // |
| // Local aggregation: DO NOT SET. Defaults to summing the counts in each int-range |
| // bucket for each event over the local aggregation period per |
| // system profile |
| // Local aggregation period: |
| // - Fuchsia reports: DO NOT SET. Defaults to 1 hour |
| // - Android reports: DO NOT SET. Defaults to 1 day |
| // Global aggregation: Sums the counts in each int-range bucket for each event on |
| // the report day per system profile |
| // System Profile Selection Policy: DO NOT SET. Defaults to REPORT_ALL |
| // |
| // Device sent observation type: IndexHistogramObservation |
| // (See observation.proto) |
| // Output report row type: IntegerHistogramReportRow |
| // (See report_row.proto) |
| // |
| // ReportDefinition fields particular to this type: |
| // - int_buckets (Only with metric_type = INTEGER) |
| FLEETWIDE_HISTOGRAMS = 15; |
| |
| // For each event, produces the sum and count of many integer measurements |
| // associated with this event across the fleet on the report day, grouped by |
| // system profile. |
| // |
| // Each logged event is counted as a separate value, so a device that experiences |
| // many events will contribute more data than one with few events. This allows |
| // us to produce a fleetwide mean. Note, it's impossible to later determine if the |
| // dataset has been skewed by one or more devices contributing more values because |
| // the shuffler breaks links between values uploaded by the same device. |
| // |
| // For example, a report of this type might give the mean of all temperature |
| // measurements of a medium, red widget across the fleet, yesterday, regardless |
| // of how many temperature measurements were taken on each device individually. |
| // |
| // Input metric types: INTEGER |
| // |
| // Local aggregation: DO NOT SET. Defaults to accumulating the sum and count for |
| // each event over the local aggregation period per system |
| // profile |
| // Local aggregation period: DO NOT SET. Defaults to 1 hour |
| // Global aggregation: Accumulates the sum and count for each event on the report |
| // day per system profile |
| // System Profile Selection Policy: DO NOT SET. Defaults to REPORT_ALL |
| // |
| // Device sent observation type: SumAndCountObservation |
| // (See observation.proto) |
| // Output report row type: SumAndCountReportRow |
| // (See report_row.proto) |
| // |
| // ReportDefinition fields particular to this type: |
| // none |
| FLEETWIDE_MEANS = 16; |
| |
| // For each event, produces several per-device numeric statistics (e.g.95%-ile) |
| // over a set of values collected across the fleet grouped by system profile, |
| // where each value is: |
| // |
| // OCCURRENCE: an occurrence count of this event over the local aggregation |
| // period ending on the report day |
| // |
| // INTEGER: a locally aggregated statistic associated with this event over |
| // the local aggregation period ending on the report day |
| // |
| // Each unique device contributes a single value and so the distribution |
| // of the values may be thought of as a distribution of unique devices. |
| // |
| // For example, a report for an OCCURRENCE metric might give the 95%-ile of |
| // the 7-day per-device counts of a medium, red widget usage across the fleet |
| // yesterday. |
| // |
| // A report for an INTEGER metric that specifies the MIN_PROCEDURE local |
| // aggregation procedure might give the 95%-ile of the 7-day per-device |
| // minimum temperature of the medium, red widget across the fleet yesterday. |
| // |
| // Input metric types: OCCURRENCE or INTEGER |
| // |
| // Local aggregation: |
| // - INTEGER metrics: SUM_PROCEDURE, MIN_PROCEDURE, MAX_PROCEDURE, MEAN, |
| // MEDIAN or PERCENTILE_N |
| // - OCCURRENCE metrics: DO NOT SET. Defaults to summing the counts for each |
| // event over the local aggregation period per system |
| // profile |
| // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. |
| // Global aggregation: Produces several numeric statistics from uploaded values |
| // for each event on the report day per system profile |
| // System Profile Selection Policy: DO NOT SET. Defaults to REPORT_ALL |
| // |
| // Device sent observation type: IntegerObservation |
| // (See observation.proto) |
| // Output report row type: NumericStatsReportRow |
| // (See report_row.proto) |
| // |
| // ReportDefinition fields particular to this type: |
| // - local_aggregation_procedure (only when the metric type is INTEGER) |
| // - local_aggregation_period |
| UNIQUE_DEVICE_NUMERIC_STATS = 17; |
| |
| // For each event, produces several numeric statistics (e.g. 95%-ile) over a |
| // set of hourly values collected across the fleet grouped by system profile, |
| // where each values is: |
| // |
| // OCCURRENCE: an hourly occurrence count of this event on the report day |
| // |
| // INTEGER: an hourly statistics associated with this event on the report day |
| // |
| // Each unique device contributes a value every hour, 24 such values per |
| // device per day. |
| // |
| // Computationally this report type is identical to |
| // UNIQUE_DEVICE_NUMERIC_STATS except that the local aggregation period |
| // used is one hour and so the distribution of the values are NOT |
| // interpreted as a distribution of unique devices, but a distribution of |
| // hourly values. |
| // |
| // For example a report for OCCURRENCE metric might give the 95%-ile of the |
| // hourly count of medium, red widget usages across the fleet yesterday. |
| // |
| // A report for INTEGER metric that specifies the MIN_PROCEDURE local |
| // aggregation procedure might give the 95%-ile of the hourly minimum |
| // temperature of a medium, red widgets usages across the fleet yesterday |
| // |
| // Input metric types: OCCURRENCE or INTEGER |
| // |
| // Local aggregation: |
| // - INTEGER metrics: SUM_PROCEDURE, MIN_PROCEDURE, MAX_PROCEDURE, MEAN, |
| // MEDIAN or PERCENTILE_N |
| // - OCCURRENCE metrics: DO NOT SET. Defaults to summing the counts for each |
| // event over the local aggregation period per system |
| // profile |
| // Local aggregation period: DO NOT SET. Defaults to 1 hour |
| // Global aggregation: Produces several numeric statistics from uploaded values |
| // for each event on the report day per system profile |
| // System Profile Selection Policy: DO NOT SET. Defaults to REPORT_ALL |
| // |
| // Device sent observation type: IntegerObservation |
| // (See observation.proto) |
| // Output report row type: NumericStatsReportRow |
| // (See report_row.proto) |
| // |
| // ReportDefinition fields particular to this type: |
| // - local_aggregation_procedure (only when the metric type is INTEGER) |
| HOURLY_VALUE_NUMERIC_STATS = 18; |
| |
| // For each event, produces the total count of occurrences of a string value |
| // associated with this event across the fleet on the report day, grouped by |
| // system profile. |
| // |
| // Input metric types: STRING |
| // |
| // Local aggregation: DO NOT SET. Defaults to summing the counts in each string |
| // bucket (where the key is the hash of the logged string) |
| // for each event over the local aggregation period per |
| // system profile |
| // Local aggregation period: |
| // - Fuchsia reports: DO NOT SET. Defaults to 1 hour |
| // - Android reports: DO NOT SET. Defaults to 1 day |
| // Global aggregation: Sums the counts for each candidate string for each event |
| // on the report day per system profile |
| // System Profile Selection Policy: DO NOT SET. Defaults to REPORT_ALL |
| // |
| // Device sent observation type: StringHistogramObservation |
| // (See observation.proto) |
| // Output report row type: StringCountReportRow |
| // (See report_row.proto) |
| // |
| // ReportDefinition fields particular to this type: |
| // - string_buffer_max |
| STRING_COUNTS = 20; |
| |
| // For each event and candidate string, produces the number of unique devices |
| // with the same system profile on which this string was logged, associated |
| // with this event during the aggregation period ending on the report day, |
| // which must be DAYS_1, DAYS_7, DAYS_28 or DAYS_30. |
| // |
| // This is similar to the AT_LEAST_ONCE local aggregation procedure for |
| // UNIQUE_DEVICE_COUNTS. For example, a report of this type might |
| // give the total number of devices with the same system profile on which |
| // a medium, red widget was used in conjunction with the component name |
| // "widget-consumer" at least once in the seven-day period ending |
| // yesterday. |
| // |
| // Input metric types: STRING |
| // |
| // Local aggregation procedure: DO NOT SET. For each event, increments the count |
| // by 1 for the string bucket if this string is |
| // logged at least once over the local aggregation |
| // period per system profile |
| // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. |
| // Global aggregation: Sums the counts for each candidate string for each event |
| // on the report day per system profile |
| // System Profile Selection Policy: SELECT_FIRST and SELECT_LAST will maintain |
| // uniqueness, REPORT_ALL may be useful in |
| // some cases |
| // |
| // Device sent observation type: StringHistogramObservation |
| // (See observation.proto) |
| // Output report row type: StringCountReportRow |
| // (See report_row.proto) |
| // |
| // ReportDefinition fields particular to this type: |
| // - local_aggregation_period |
| // - string_buffer_max |
| // - system_profile_selection |
| UNIQUE_DEVICE_STRING_COUNTS = 21; |
| |
| // Experimental. DO NOT USE! |
| // Input metric types: STRUCT |
| // |
| // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30 |
| // |
| // ReportDefinition fields particular to this type: |
| // - aggregates: (1 or more) |
| STRUCT = 22; |
| } |
| ReportType report_type = 3; |
| |
| //////////////// Fields for reports with privacy enabled ///////////////// |
| |
| // When reporting numerical values with privacy, the values are mapped to |
| // indices from 0 to num_index_points-1 with a randomized rounding method. |
| // |
| // In the future, the value of this field will be computed by the registry |
| // parser as a function of other privacy-related fields and an estimate of the |
| // user population size. For now, it should be set manually in the Cobalt |
| // registry in consultation with the Cobalt team. |
| // |
| // TODO(https://fxbug.dev/278932979): update this comment once the field is populated by |
| // the registry parser. |
| uint32 num_index_points = 22; |
| |
| // When reporting strings with privacy, the strings are counted using a linear |
| // sketch. |
| // |
| // In the future, the value of this field will be computed by the registry |
| // parser as a function of other privacy-related fields and an estimate of the |
| // user population size. For now, it should be set manually in the Cobalt |
| // registry in consultation with the Cobalt team. |
| // |
| // TODO(https://fxbug.dev/278932979): update this comment once the field is populated by |
| // the registry parser. |
| StringSketchParameters string_sketch_params = 27; |
| |
| // These fields specify the range of values that can be reported by a device |
| // in the specified local_aggregation_period. If the true value to be reported |
| // falls outside specified range, the value is clipped. |
| // |
| // For FLEETWIDE_OCCURRENCE_COUNTS, UNIQUE_DEVICE_NUMERIC_STATS and |
| // HOURLY_VALUE_NUMERIC_STATS, the range applies to the total numerical value |
| // computed for the device over the aggregation period specified in the |
| // report. |
| // |
| // For FLEETWIDE_MEANS, the range applies to the per-device sum of the value |
| // to be averaged over one hour. (For FLEETWIDE_MEANS, the `max_count` field |
| // is also required in order to bound the `count` value.) |
| // |
| // If a privacy_mechanism other than DE_IDENTIFICATION is specified, this field is |
| // required for reports of type: |
| // * FLEETWIDE_OCCURRENCE_COUNTS |
| // * UNIQUE_DEVICE_NUMERIC_STATS |
| // * HOURLY_VALUE_NUMERIC_STATS |
| // * FLEETWIDE_MEANS |
| int64 min_value = 23; |
| int64 max_value = 24; |
| |
| // This field specifies the maximum count to be reported by a device in the |
| // specified local_aggregation_period. If the true count is greater than |
| // max_count, then the count will be reported as max_count. |
| // |
| // For FLEETWIDE_HISTOGRAMS, the bound applies to the count for each |
| // individual histogram bucket over the aggregation period of one hour. For |
| // STRING_COUNTS, it applies to the count for each string over one hour. |
| // |
| // For FLEETWIDE_MEANS, the bound applies to the per-device count of the |
| // values to be averaged over one hour. |
| // |
| // If a privacy_mechanism other than DE_IDENTIFICATION is specified, this field is |
| // required for reports of type: |
| // * FLEETWIDE_HISTOGRAMS |
| // * FLEETWIDE_MEANS |
| // * STRING_COUNTS |
| uint64 max_count = 25; |
| |
| //////////////// Fields specific to some report types ///////////////// |
| |
| // A specification of integer-range buckets for a histogram. |
| // |
| // This field is for reports of type UNIQUE_DEVICE_HISTOGRAMS, |
| // HOURLY_VALUE_HISTOGRAMS, and FLEETWIDE_HISTOGRAMS -- but for |
| // FLEETWIDE_HISTOGRAMS only with metrics of type INTEGER, not with metrics of |
| // type INTEGER_HISTOGRAM, because in that case the MetricDefinition already |
| // contains an instance of IntegerBuckets. |
| IntegerBuckets int_buckets = 10; |
| |
| // The interval with which clients will generate and upload observations. |
| enum ReportingInterval { |
| REPORTING_INTERVAL_UNSET = 0; |
| HOURS_1 = 1; |
| DAYS_1 = 2; |
| } |
| |
| // This field is optional for FLEETWIDE_OCCURRENCE_COUNTS reports, and is only |
| // supported by some client platforms. If not set, the reporting interval |
| // defaults to 1 hour for FLEETWIDE_OCCURRENCE_COUNTS reports. |
| ReportingInterval reporting_interval = 32; |
| |
| // This field can be used with all Report types. When set, the generated |
| // report will exclude an Observation if there are not at least |
| // |reporting_threshold| number of distinct devices reporting Observations |
| // with the same ObservationMetadata. |
| uint32 reporting_threshold = 13; |
| |
| // The on-device function used to aggregate logged data over the local aggregation |
| // period. |
| enum LocalAggregationProcedure { |
| LOCAL_AGGREGATION_PROCEDURE_UNSET = 0; |
| |
| // Numerical statistic aggregation procedures to be used with reports |
| // of type UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS, |
| // UNIQUE_DEVICE_NUMERIC_STATS and HOURLY_VALUE_NUMERIC_STATS for INTEGER metric. |
| // TODO(https://fxbug.dev/42168241): Rename these to remove the '_PROCEDURE' suffix. |
| SUM_PROCEDURE = 1; |
| MIN_PROCEDURE = 2; |
| MAX_PROCEDURE = 3; |
| MEAN = 4; |
| MEDIAN = 5; |
| // The value of N is set in the field |
| // |local_aggregation_procedure_percentile_n|. |
| PERCENTILE_N = 6; |
| |
| // Logical aggregation procedures to be used with reports of type |
| // UNIQUE_DEVICE_COUNTS |
| AT_LEAST_ONCE = 7; |
| SELECT_FIRST = 8; |
| SELECT_MOST_COMMON = 9; |
| } |
| |
| // This field is required for reports of type UNIQUE_DEVICE_COUNTS, and for reports |
| // of type UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS, |
| // UNIQUE_DEVICE_NUMERIC_STATS, HOURLY_VALUE_NUMERIC_STATS for INTEGER metric. |
| // Different report types support different values of this field. See the comments |
| // on the enum values in LocalAggregationProcedure. |
| LocalAggregationProcedure local_aggregation_procedure = 17; |
| |
| // This field is required when |
| // local_aggregation_procedure = LOCAL_AGGREGATION_PROCEDURE_PERCENTILE_N. |
| // In this case it gives the value of N to use. Otherwise this field is |
| // ignored. |
| uint32 local_aggregation_procedure_percentile_n = 18; |
| |
| // Time window over which the logged data is aggregated. The local aggregation |
| // period is specified for UNIQUE_DEVICE_* report types. |
| WindowSize local_aggregation_period = 19; |
| |
| // The maximum number of distinct event vectors for which an instance of the Cobalt |
| // client should produce an observation, for a given local aggregation period. Event |
| // vectors are prioritized in order of first arrival during the aggregation period. |
| // |
| // For example, if a report has an event_vector_buffer_max of 10, and 12 distinct event |
| // vectors are logged for this metric over an aggregation period, then Cobalt will send |
| // observations of the first 10 event vectors for that aggregation period and drop the |
| // last 2. |
| // |
| // If this field is unset, the registry parser assigns to it the total number of event |
| // vectors for the report's parent metric (i.e., the product over all metric dimensions |
| // of the number of event codes per dimension). |
| // |
| // The report's project will be charged against a resource budget for this value |
| // so project owners are encouraged to set this as small as possible. For example, |
| // the report's parent metric may include a dimension with thousands of event codes, |
| // but it is expected that any one device will log only a few distinct event vectors |
| // per day. In that case we may set event_vector_buffer_max to a relatively small number, |
| // say 20. For reports which use differential privacy, setting event_vector_buffer_max |
| // to a smaller number will improve the signal for event vectors which are included in |
| // observations. |
| uint64 event_vector_buffer_max = 26; |
| |
| // The maximum number of distinct strings that Cobalt must keep in its in-memory buffer |
| // on any single device. During local aggregation for reports of type STRING_COUNTS and |
| // UNIQUE_DEVICE_STRING_COUNTS, Cobalt will keep track of this many distinct strings per |
| // aggregation period. The report's project will be charged against a resource budget for this |
| // value so project owners are encouraged to set this as small as possible. A STRING metric |
| // includes a file of candidate strings that may contain many thousands of strings. But it is |
| // expected that any one device will log only a few of these strings per day. We may set |
| // string_buffer_max to a relatively small number, say 20. |
| // |
| // This is a required field for reports of type STRING_COUNTS and UNIQUE_DEVICE_STRING_COUNTS. |
| uint32 string_buffer_max = 28; |
| |
| // For reports of type UNIQUE_DEVICE_COUNTS, send observations as soon as the |
| // event occurs, instead of waiting for the end of the day. |
| // |
| // This can only be enabled when using a local aggregation procedure of |
| // AT_LEAST_ONCE or SELECT_FIRST, and when the privacy level is |
| // NO_ADDED_PRIVACY. When used with a system_profile_selection of REPORT_ALL |
| // or SELECT_FIRST, enabling this is recommended as Cobalt will send the count |
| // for the current day when the event occurs instead of at the end of the day. |
| // For a system_profile_selection of SELECT_LAST, this may also be desirable, |
| // though it may result in a slight change in the current day's system profile |
| // that is used, as Cobalt won't wait until the end of the day to determine |
| // the final system profile, but will instead send the count immediately with |
| // the system profile that is currently active on the device. |
| bool expedited_sending = 29; |
| |
| // Experimental. DO NOT USE! |
| // A specification of an aggregate for a STRUCT report. |
| message AggregateDefinition { |
| uint32 id = 1; |
| |
| // If this field is left unspecified, the value of for will be used. |
| // If for is not specified, this field is required. |
| string name = 2; |
| |
| // This field must match the name of one of the corresponding metric's struct fields. |
| string for = 3; |
| |
| // This field is filled in by the registry parser based on the value of for. |
| uint32 for_id = 4; |
| |
| enum AggregationFunction { |
| UNSPECIFIED_AGGREGATION_FUNCTION = 0; |
| |
| // local usage: |
| // - Can be used with BOOL, ENUM or STRING fields. |
| // |
| // global usage: |
| // - Requires the local aggregation function to be GROUP_BY. |
| GROUP_BY = 1; |
| |
| // local usage: |
| // - COUNT counts the number of occurrences of groups. |
| // - |for| must be left unspecified. |
| // |
| // global usage: |
| // - Counts the number of devices in each group. The local aggregation function must be left |
| // unspecified. |
| COUNT = 2; |
| |
| // global usage: |
| // - Requires the local aggregation function to return a numeric value. |
| SUM = 3; |
| |
| // global usage: |
| // - Requires the local aggregation function to return a numeric value. |
| MEAN = 4; |
| } |
| |
| // The on-device function computed on the metric during the aggregation window. |
| AggregationFunction local = 5; |
| |
| // The server-side function computed on data received from devices. |
| AggregationFunction global = 6; |
| |
| // Maximum of distinct values that Cobalt must keep on any single device for this GROUP_BY |
| // aggregate. The first distinct_values_max_per_device values seen by the device are kept |
| // and subsequent ones are discarded. |
| // |
| // This is a required field for aggregates of string fields. |
| uint64 distinct_values_max_per_device = 7; |
| |
| // The path to a list of candidate strings for a GROUP_BY aggregate referring to a STRING field. |
| // The path should be relative to the root of the Cobalt registry, for |
| // instance "$CUSTOMER/$PROJECT/candidate_strings.txt". String candidate |
| // files should ideally be placed in the same registry and directory as the |
| // project that uses it. |
| // |
| // The empty string, "", is added to the candidate list implicitly by Cobalt |
| // and does not need to be added to the list. |
| // |
| // If string_candidate_file is not set for the corresponding metric field, this field is |
| // required. |
| string string_candidate_file = 8; |
| } |
| |
| // Experimental. DO NOT USE! |
| // A list of aggregates for a STRUCT report. |
| repeated AggregateDefinition aggregates = 33; |
| |
| /////////////////// Fields used by all report types /////////////////// |
| // Next id: 109 |
| |
| // The list of SystemProfileFields to include in each row of the report. |
| // Optional. |
| repeated SystemProfileField system_profile_field = 100; |
| |
| // The list of Experiments to include in each row of the report. |
| // |
| // Each report row lists the intersection of the experiment ids active on the device and |
| // experiment ids specified in this field. |
| // |
| // The specified experiment ids must be found in one of the project's experiments_namespaces. |
| repeated int64 experiment_id = 104; |
| |
| // If set to true, guarantees that each row in the output report contains a single |
| // experiment id in its system profile. This allows a single report to be used |
| // for multiple experiments without compromising privacy. |
| // |
| // This field is not supported on Fuchsia. |
| bool single_experiment_id_per_row = 121; |
| |
| // The number of cookie buckets to be used by the report. |
| // |
| // Must be 0 (disabled) or 20, which required the report to be for an experiment. |
| int32 num_cookie_buckets = 109; |
| |
| // String defining the scope of cookie buckets. |
| // |
| // Must be set if cookie bucket is enabled for the report. Must be the same for all reports |
| // that require a device use the same cookie buckets, e.g. multiple reports for the same |
| // experiment, and should be different for reports without this requirement. |
| string cookie_bucket_scope = 120; |
| |
| // This field is required for reports of type UNIQUE_DEVICE_COUNTS, |
| // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and |
| // HOURLY_VALUE_HISTOGRAMS. The value for these reports must be SELECT_LAST, |
| // SELECT_FIRST, or occasionally REPORT_ALL. For report types other than those |
| // listed above, this field must not be set and is implicitly REPORT_ALL. |
| // |
| // If the system profile value changed during the aggregation window specified |
| // for this report, system_profile_selection specifies which system profile to |
| // report for each device. |
| SystemProfileSelectionPolicy system_profile_selection = 103; |
| |
| // Maximum ReleaseStage for which this Report is allowed to be collected. |
| ReleaseStage max_release_stage = 105; |
| |
| // Report can be collected even if the user/device has not consented. |
| // This field can only be set to true on reports that use privacy mechanisms |
| // that include differential privacy (i.e. not DE_IDENTIFICATION). The use of |
| // this field is for collecting anonymized data that is allowed even when |
| // the consent is not given. These use cases need to be specially approved |
| // by privacy reviewers. |
| bool exempt_from_consent = 108; |
| |
| // New Privacy API |
| |
| // This enum identifies what privacy protection is applied to the report. |
| enum PrivacyMechanism { |
| PRIVACY_MECHANISM_UNSPECIFIED = 0; |
| // If you specify this value the data will be de-identified without |
| // additional privacy protections. |
| DE_IDENTIFICATION = 1; |
| // If you specify this value the data will be protected with Shuffled |
| // Differential Privacy guarantees (e.g., the noise wll be added on the |
| // devices) |
| SHUFFLED_DIFFERENTIAL_PRIVACY = 2; |
| } |
| |
| // This field identifies what privacy protection is applied to the report. |
| PrivacyMechanism privacy_mechanism = 106; |
| |
| // The object for grouping all parameters needed for SHUFFLED DP mode. |
| message ShuffledDifferentialPrivacyConfig { |
| // This field represents an upper bound on the amount of information which |
| // can be learned about a device from a report including that device. |
| // Lower values correspond to higher privacy. |
| // Epsilon must be > 0. |
| double epsilon = 1; |
| |
| // This field represents the risk of the epsilon guarantee not holding. This |
| // is usually set as 1 over the expected number of participating devices. |
| // Delta must be > 0 and < 1. |
| double delta = 2; |
| |
| // The generated report will exclude an Observation if there are not at |
| // least |reporting_threshold| number of distinct devices reporting |
| // Observations with the same ObservationMetadata. |
| uint32 reporting_threshold = 3; |
| |
| // The mean number of observations added per index point when performing the |
| // Poisson mechanism encoding for Cobalt reports. Required. |
| // |
| // In the future, the value of this field will be computed by the registry |
| // parser as a function of other fields in this |
| // ShuffledDifferentialPrivacyConfig. For now, it should be set manually in |
| // the Cobalt registry in consultation with the Cobalt team. |
| // |
| // TODO(https://fxbug.dev/295053509): update this comment once the field is auto populated by |
| // the registry parser. |
| double poisson_mean = 4; |
| |
| // If true, skip validating the |poisson_mean| value. This is meant to be used only |
| // in end-to-end tests where the traffic volume would not allow a reasonable |poisson_mean|. |
| bool skip_poisson_mean_validation_test_only = 5; |
| |
| // The report fields that affect a how a device participates in a private |
| // report. |
| // |
| // Note, this field is populated by the registry parser and must not be set |
| // manually. |
| DevicePrivacyDependencySet device_privacy_dependency_set = 6; |
| |
| // Captures the report fields a device _must_ use in order to properly |
| // make contributions, real and fabricated, to a report. |
| // |
| // Any report field that changes how observations are encoded or noise is |
| // fabricated must result in a new value. |
| enum DevicePrivacyDependencySet { |
| DEVICE_PRIVACY_DEPENDENCY_SET_UNSET = 0; |
| |
| // Captures: |
| // - Fields that affect index points include: |
| // * metric dimensions |
| // * num_index_points |
| // * string_sketch_params |
| // * min_value |
| // * max_value |
| // * max_count |
| // * int_buckets |
| // |
| // - Fields that affect sparsity are: |
| // * event_vector_buffer_max |
| // * string_buffer_max |
| // |
| // - poisson_mean |
| V1 = 1; |
| } |
| } |
| |
| // If privacy_mechanism is SHUFFLED_DIFFERENTIAL_PRIVACY then privacy_config |
| // must contain valid ShuffledDifferentialPrivacyConfig otherwise empty. |
| oneof privacy_config { |
| ShuffledDifferentialPrivacyConfig shuffled_dp = 107; |
| } |
| } |
| |
| // A specification for SystemProfile selection policy. |
| enum SystemProfileSelectionPolicy { |
| // Use the default value. For reports of type FLEETWIDE_OCCURRENCE_COUNTS, |
| // FLEETWIDE_HISTOGRAMS, FLEETWIDE_MEANS, UNIQUE_DEVICE_NUMERIC_STATS, |
| // HOURLY_VALUE_NUMERIC_STATS, and STRING_COUNTS this will resolve to |
| // 'REPORT_ALL' and should not be changed. This must be set for all other |
| // report types. |
| SYSTEM_PROFILE_SELECTION_POLICY_UNSET = 0; |
| |
| // Always report the last SystemProfile seen in the aggregation window. This |
| // will be the last SystemProfile seen *at the time of an event* in the |
| // aggregation window. |
| SELECT_LAST = 1; |
| |
| // Always report the first SystemProfile seen in the aggregation window. This |
| // will be the first SystemProfile seen *at the time of an event* in the |
| // aggregation window. |
| SELECT_FIRST = 2; |
| |
| // Report all system profiles in the aggregation window. For most report |
| // types, this is the most sensible value to use. For reports that depend on |
| // some concept of uniqueness (such as UNIQUE_DEVICE_COUNTS, |
| // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and |
| // HOURLY_VALUE_HISTOGRAMS) this may not be the best choice, since it will no |
| // longer be the case that a single device will only upload one observation |
| // per time period (It will upload one observation per time period *per unique |
| // system_profile*). |
| REPORT_ALL = 3; |
| } |
| |
| // A specification of a field from SystemProfile. These are used in a |
| // ReportDefinition to specify which fields should be included in the generated |
| // Observations and reports. |
| // |
| // For a description of the meaning of each field, see the fields in the |
| // SystemProfile in: src/pb/common.proto |
| enum SystemProfileField { |
| OS = 0; |
| ARCH = 1; |
| BOARD_NAME = 2; |
| PRODUCT_NAME = 3; |
| SYSTEM_VERSION = 4; |
| APP_VERSION = 10; |
| CHANNEL = 5; |
| BUILD_TYPE = 7; |
| EXPERIMENT_IDS = 9; |
| COOKIE_BUCKET = 12; |
| reserved 6, 8, 11; |
| reserved "REALM", "EXPERIMENT_TOKENS", "COOKIE_BUCKET_ID"; |
| } |
| |
| // Stages in the release cycle of a component. Each Cobalt customer determines |
| // its current ReleaseStage when initializing the CobaltService. Each Metric |
| // and Report can declare the maximum ReleaseStage for which it is allowed to |
| // be collected. For example a DEBUG Metric will not be collected from a device |
| // running a FISHFOOD release. |
| enum ReleaseStage { |
| RELEASE_STAGE_NOT_SET = 0; |
| |
| // A test build. Also called "eng". Only use this value when the device is |
| // running test builds as all metrics/reports will be collected. |
| DEBUG = 10; |
| // Small, internal prototype. Used for testing a new feature internally, |
| // usually within the team or a small group. |
| FISHFOOD = 20; |
| // An internal release for testing with internal users. |
| DOGFOOD = 40; |
| // An open beta, for testing with internal and external users. |
| OPEN_BETA = 60; |
| |
| // Generally-available. The final stage of a release. Also called |
| // "production". If unsure of which release stage the device is running, it |
| // is safest to fallback to this value (which is the default if no value is |
| // set), to avoid inadvertently collecting metric/report data. |
| GA = 99; |
| } |
| |
| // ExponentialIntegerBuckets is used to define a partition of the integers into |
| // a finite number of exponentially increasing buckets. |
| // |
| // Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1. |
| // |
| // The bucket boundaries are: |
| // a[0] = floor |
| // a[1] = floor + initial_step |
| // a[2] = floor + initial_step * step_multiplier_float |
| // a[3] = floor + initial_step * step_multiplier_float ^ 2 |
| // a[4] = floor + initial_step * step_multiplier_float ^ 3 |
| // and in general, for i = 1, 2, 3 ... n |
| // a[i] = floor + initial_step * step_multiplier_float ^ (i-1) |
| // |
| // Then, the buckets are defined as follows: |
| // Bucket 0 is the underflow bucket: (-infinity, floor) |
| // Bucket i for 0 < i < n+1: [ceiling(a[i-1]), ceiling(a[i])) |
| // Bucket n+1 is the overflow bucket: [ceiling(a[n]), +infinity) |
| // |
| // Examples: |
| // floor = 0 |
| // num_buckets = 3 |
| // initial_step = 10 |
| // step_multiplier_float = 10 |
| // Then, the buckets are: |
| // (-infinity, 0), [0, 10), [10, 100), [100, 1000), [1000, +infinity) |
| // |
| // floor = 0 |
| // num_buckets = 3 |
| // initial_step = 2 |
| // step_multiplier_float = 2 |
| // Then, the buckets are: |
| // (-infinity, 0), [0, 2), [2, 4), [4, 8), [8, +infinity) |
| // |
| // floor = 10 |
| // num_buckets = 3 |
| // initial_step = 2 |
| // step_multiplier_float = 2 |
| // Then, the buckets are: |
| // (-infinity, 10), [10, 12), [12, 14), [14, 18), [18, +infinity) |
| // |
| // floor = 0 |
| // num_buckets = 3 |
| // initial_step = 100 |
| // step_multiplier_float = 10 |
| // Then, the buckets are: |
| // (-infinity, 0), [0, 100), [100, 1000), [1000, 10000), [10000, +infinity) |
| // |
| // When the step_multiplier_float is between 1 and 2, some buckets will have a width of zero |
| // after the bucket bounds are rounded up. These zero-width buckets are considered "impossible" |
| // because they shouldn't have any data. Any data that falls into these impossible buckets, whether |
| // it's due to reporting errors or generated noise, is discarded. |
| // |
| // Examples: |
| // floor = 0 |
| // num_buckets = 3 |
| // initial_step = 1 |
| // step_multiplier_float = 1.1 |
| // Then, the pre-ceiling buckets are: |
| // (-infinity, 0), [0, 1), [1, 1.10), [1.10, 1.21), [1.21, +infinity) |
| // the buckets after ceiling are: |
| // (-infinity, 0), [0, 1), [1, 2), ^[2, 2)^, [2, +infinity) |
| // |
| // floor = 0 |
| // num_buckets = 3 |
| // initial_step = 10 |
| // step_multiplier_float = 1.1 |
| // Then, the pre-ceiling buckets are: |
| // (-infinity, 0), [0, 10), [10, 11.00), [11.00, 12.10), [12.10, +infinity) |
| // the buckets after ceiling are: |
| // (-infinity, 0), [0, 10), [10, 11), [11, 13), [13, +infinity) |
| message ExponentialIntegerBuckets { |
| reserved 4; |
| reserved "step_multiplier"; |
| |
| int64 floor = 1; |
| |
| // num_buckets must be at least 1. |
| uint32 num_buckets = 2; |
| |
| // Must be at least one. |
| uint32 initial_step = 3; |
| |
| // Must be greater than one. |
| float step_multiplier_float = 5; |
| } |
| |
| // LinearIntegerBuckets is used to define a partition of the integers into a |
| // finite number of buckets of equal size. |
| // |
| // Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1. |
| // Bucket 0 is the underflow bucket: (-infinity, floor) |
| // Bucket n+1 is the overflow bucket: [lower + step_size * n, +infinity) |
| // |
| // For i = 1 to n, the bucket i is defined as |
| // [floor + step_size * (i-1), floor + step_size * i) |
| // |
| // Example: floor = 0, num_buckets = 3, step_size = 10. |
| // (-infinity, 0), [0, 10), [10, 20), [20, 30), [30, +infinity) |
| message LinearIntegerBuckets { |
| int64 floor = 1; |
| |
| // Must be at least one. |
| uint32 num_buckets = 2; |
| |
| // Must be at least one. |
| uint32 step_size = 3; |
| } |
| |
| message IntegerBuckets { |
| oneof buckets { |
| ExponentialIntegerBuckets exponential = 1; |
| LinearIntegerBuckets linear = 2; |
| } |
| |
| // If set to true, empty buckets will not be added to the report data such |
| // that all histograms contain a row for every bucket. Buckets with a zero |
| // count may still occur if data is logged that contains a zero count. This |
| // field can not be set on reports with added privacy. |
| bool sparse_output = 3; |
| } |
| |
| message StringSketchParameters { |
| // Number of hashes in Count-Min Sketch. |
| int32 num_hashes = 1; |
| |
| // Number of cells per hash in Count-Min Sketch. |
| int32 num_cells_per_hash = 2; |
| } |