blob: 2762d8558457b714405cbbde38e13435d6b96dd5 [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package cobalt;
option go_package = "cobalt";
///////////////////////////////////////////////////////////////
// Messages created in the Encoder and consumed in the Analyzer
///////////////////////////////////////////////////////////////
// A distribution over an indexed set of value buckets.
// The definition of the buckets is specified elsewhere.
message BucketDistribution {
// |counts| maps buckets to counts for those buckets. The key is the bucket
// index. The value is the count.
map<uint32, uint64> counts = 1;
};
// A client-given value of a |MetricPart| to be encoded and collected by Cobalt.
// An Encoder encodes a |ValuePart| and produces an |ObservationPart|. An
// analyzer decodes an |ObservationPart| to recover the |ValuePart|. Cobalt
// supports different types of values but the type must match the type declared
// in the |MetricPart| definition.
//
// A ValuePart of type BucketDistribution is an exception to the description
// above. It represents not a single client-given value but rather a
// client-given distribution over many values. Currently Cobalt only supports
// integer buckets and so a BucketDistribution may only be used when the
// corresponding MetricPart is of type INT and includes an IntegerBuckets field
// defining the buckets.
message ValuePart {
oneof data {
// A human-readable, UTF8 string.
string string_value = 1;
// An integer.
int64 int_value = 2;
// An uninterpreted sequence of bytes.
bytes blob_value = 3;
// A zero-based index into some enumerated set that is specified outside
// of Cobalt's configuration. See comments on the INDEX DataType in
// metrics.proto for more about this.
uint32 index_value = 4;
// A double-precision floating point value.
double double_value = 5;
// A distribution over an indexed set of buckets. An Observation may include
// at most one part whose value is a BucketDistribution. When Cobalt
// generates a report such an Observation will be treated mathematically as
// if it were many Observations.
//
// This can only be used if the corresponding metric part has |int_buckets|
// set and in that case the IntegerBuckets set there defines the bucket
// indices referenced by the BucketDistribution here.
BucketDistribution int_bucket_distribution = 999999;
}
}
// Observations of this type contain an encryption of a string. The string
// is encrypted using Forculus threshold encryption so that it may only be
// decrypted if sufficiently many distinct clients transmit the same string.
//
// This type of Observation is used in the following case:
//
// A MetricDefinition of type STRING_USED and Report of type
// STRING_COUNTS_WITH_THRESHOLD. In this case the Observation is
// *immediate* meaning it is generated directly from a single
// STRING_USED Event as soon as the Event is logged. The Event contains
// the string that was used. The fields of a ForculusObservation contain
// the data necessary to represent the threshold-encrypted ciphertext of
// the string.
message ForculusObservation {
bytes ciphertext = 1;
bytes point_x = 2;
bytes point_y = 3;
}
// Observations of this type contain an encoding of a string. The encoding uses
// String RAPPOR to provide local differential privacy.
//
// This type of Observation is used in the following case:
//
// A MetricDefinition of type STRING_USED and Report of type
// HIGH_FREQUENCY_STRING_COUNTS. In this case the Observation is
// *immediate* meaning it is generated directly from a single
// STRING_USED Event as soon as the Event is logged. The Event contains
// the string that was used. The |data| field in this Observation contains
// an encoding of that string. It contains a bit vector obtained by representing
// the string as a bit vector using several hash functions and then adding
// random noise by flipping bits.
message RapporObservation {
// The client's cohort. Each client is assigned a random cohort. The cohorts
// are needed to recover the original string since the process of representing
// a string as a bit vector is lossy.
uint32 cohort = 1;
// A bit vector containing the encoding of the string.
bytes data = 2;
}
// Observations of this type contain an encoding of an element of an
// an indexed set. The encoding uses Basic RAPPOR to provide local differential
// privacy.
//
// This type of Observation is used in the following case:
//
// A MetricDefinition of type EVENT_OCCURRED and Report of type
// SIMPLE_OCCURRENCE_COUNT. In this case the Observation is *immediate* meaning
// it is generated directly from a single EVENT_OCCURRED Event as soon as
// the Event is logged. The Event contains a single index, the index of the
// event-type that occurred. The |data| field in this Observation contains
// an encoding of that index. It contains a bit vector obtained by representing
// the index as a one-hot bit vector and then adding random noise by
// flipping bits.
message BasicRapporObservation {
// A bit vector containing the encoding of the index.
bytes data = 1;
}
// Used with the No-op encoding. Note that this offers no client-side privacy-
// protection. Cobalt may still offer some privacy protection via the Shuffler
// and via differentially-private release.
message UnencodedObservation {
// The raw-unencoded input value.
ValuePart unencoded_value = 1;
}
// The encoding of a ValuePart. This is produced by an Encoder.
message ObservationPart {
oneof value {
UnencodedObservation unencoded = 1;
ForculusObservation forculus = 2;
RapporObservation rappor = 3;
BasicRapporObservation basic_rappor = 4;
}
// The encoding_config_id, along with the customer_id and project_id
// (specified in the containing Observation) form the primary key into the
// "EncodingConfigs" table in the Cobalt configuration database. The value
// column is an "EncodingConfig" proto message describing how this
// ObservationPart is encoded.
uint32 encoding_config_id = 5; // e.g. 114=(Forculus with threshold=50)
}
// An Observation consists of one or more ObservationParts.
message Observation {
// The keys are the names of the metric parts to which each ObservationPart
// is associated.
map<string, ObservationPart> parts = 1;
// A quasi-unique identifier for this observation. This is randomly generated
// on the client and used on the server as part of a fully-unique identifier.
// This field allows the add-observation operation to be idempotent: If the
// same observation is transmitted to the server twice then the server will
// store the observation only once.
bytes random_id = 2;
}