| // Copyright 2016 The Fuchsia Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| syntax = "proto3"; |
| |
| package cobalt; |
| |
| option go_package = "cobalt"; |
| |
| /////////////////////////////////////////////////////////////// |
| // Messages created in the Encoder and consumed in the Analyzer |
| /////////////////////////////////////////////////////////////// |
| |
| // A distribution over an indexed set of value buckets. |
| // The definition of the buckets is specified elsewhere. |
| message BucketDistribution { |
| // |counts| maps buckets to counts for those buckets. The key is the bucket |
| // index. The value is the count. |
| map<uint32, uint64> counts = 1; |
| }; |
| |
| // A client-given value of a |MetricPart| to be encoded and collected by Cobalt. |
| // An Encoder encodes a |ValuePart| and produces an |ObservationPart|. An |
| // analyzer decodes an |ObservationPart| to recover the |ValuePart|. Cobalt |
| // supports different types of values but the type must match the type declared |
| // in the |MetricPart| definition. |
| // |
| // A ValuePart of type BucketDistribution is an exception to the description |
| // above. It represents not a single client-given value but rather a |
| // client-given distribution over many values. Currently Cobalt only supports |
| // integer buckets and so a BucketDistribution may only be used when the |
| // corresponding MetricPart is of type INT and includes an IntegerBuckets field |
| // defining the buckets. |
| message ValuePart { |
| oneof data { |
| // A human-readable, UTF8 string. |
| string string_value = 1; |
| |
| // An integer. |
| int64 int_value = 2; |
| |
| // An uninterpreted sequence of bytes. |
| bytes blob_value = 3; |
| |
| // A zero-based index into some enumerated set that is specified outside |
| // of Cobalt's configuration. See comments on the INDEX DataType in |
| // metrics.proto for more about this. |
| uint32 index_value = 4; |
| |
| // A double-precision floating point value. |
| double double_value = 5; |
| |
| // A distribution over an indexed set of buckets. An Observation may include |
| // at most one part whose value is a BucketDistribution. When Cobalt |
| // generates a report such an Observation will be treated mathematically as |
| // if it were many Observations. |
| // |
| // This can only be used if the corresponding metric part has |int_buckets| |
| // set and in that case the IntegerBuckets set there defines the bucket |
| // indices referenced by the BucketDistribution here. |
| BucketDistribution int_bucket_distribution = 999999; |
| } |
| } |
| |
| // Observations of this type contain an encryption of a string. The string |
| // is encrypted using Forculus threshold encryption so that it may only be |
| // decrypted if sufficiently many distinct clients transmit the same string. |
| // |
| // This type of Observation is used in the following case: |
| // |
| // A MetricDefinition of type STRING_USED and Report of type |
| // STRING_COUNTS_WITH_THRESHOLD. In this case the Observation is |
| // *immediate* meaning it is generated directly from a single |
| // STRING_USED Event as soon as the Event is logged. The Event contains |
| // the string that was used. The fields of a ForculusObservation contain |
| // the data necessary to represent the threshold-encrypted ciphertext of |
| // the string. |
| message ForculusObservation { |
| bytes ciphertext = 1; |
| bytes point_x = 2; |
| bytes point_y = 3; |
| } |
| |
| // Observations of this type contain an encoding of a string. The encoding uses |
| // String RAPPOR to provide local differential privacy. |
| // |
| // This type of Observation is used in the following case: |
| // |
| // A MetricDefinition of type STRING_USED and Report of type |
| // HIGH_FREQUENCY_STRING_COUNTS. In this case the Observation is |
| // *immediate* meaning it is generated directly from a single |
| // STRING_USED Event as soon as the Event is logged. The Event contains |
| // the string that was used. The |data| field in this Observation contains |
| // an encoding of that string. It contains a bit vector obtained by representing |
| // the string as a bit vector using several hash functions and then adding |
| // random noise by flipping bits. |
| message RapporObservation { |
| // The client's cohort. Each client is assigned a random cohort. The cohorts |
| // are needed to recover the original string since the process of representing |
| // a string as a bit vector is lossy. |
| uint32 cohort = 1; |
| |
| // A bit vector containing the encoding of the string. |
| bytes data = 2; |
| } |
| |
| |
| // Observations of this type contain an encoding of an element of an |
| // an indexed set. The encoding uses Basic RAPPOR to provide local differential |
| // privacy. |
| // |
| // This type of Observation is used in the following case: |
| // |
| // A MetricDefinition of type EVENT_OCCURRED and Report of type |
| // SIMPLE_OCCURRENCE_COUNT. In this case the Observation is *immediate* meaning |
| // it is generated directly from a single EVENT_OCCURRED Event as soon as |
| // the Event is logged. The Event contains a single index, the index of the |
| // event-type that occurred. The |data| field in this Observation contains |
| // an encoding of that index. It contains a bit vector obtained by representing |
| // the index as a one-hot bit vector and then adding random noise by |
| // flipping bits. |
| message BasicRapporObservation { |
| // A bit vector containing the encoding of the index. |
| bytes data = 1; |
| } |
| |
| // Used with the No-op encoding. Note that this offers no client-side privacy- |
| // protection. Cobalt may still offer some privacy protection via the Shuffler |
| // and via differentially-private release. |
| message UnencodedObservation { |
| // The raw-unencoded input value. |
| ValuePart unencoded_value = 1; |
| } |
| |
| // The encoding of a ValuePart. This is produced by an Encoder. |
| message ObservationPart { |
| oneof value { |
| UnencodedObservation unencoded = 1; |
| ForculusObservation forculus = 2; |
| RapporObservation rappor = 3; |
| BasicRapporObservation basic_rappor = 4; |
| } |
| |
| // The encoding_config_id, along with the customer_id and project_id |
| // (specified in the containing Observation) form the primary key into the |
| // "EncodingConfigs" table in the Cobalt configuration database. The value |
| // column is an "EncodingConfig" proto message describing how this |
| // ObservationPart is encoded. |
| uint32 encoding_config_id = 5; // e.g. 114=(Forculus with threshold=50) |
| } |
| |
| // An Observation consists of one or more ObservationParts. |
| message Observation { |
| // The keys are the names of the metric parts to which each ObservationPart |
| // is associated. |
| map<string, ObservationPart> parts = 1; |
| |
| // A quasi-unique identifier for this observation. This is randomly generated |
| // on the client and used on the server as part of a fully-unique identifier. |
| // This field allows the add-observation operation to be idempotent: If the |
| // same observation is transmitted to the server twice then the server will |
| // store the observation only once. |
| bytes random_id = 2; |
| } |