blob: c14551ad7f9fd54eb4e2f770a183fd35d82345bb [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package cobalt;
option go_package = "cobalt";
import "encrypted_message.proto";
///////////////////////////////////////////////////////////////
// Messages created in the Encoder and consumed in the Analyzer
///////////////////////////////////////////////////////////////
// A distribution over an indexed set of value buckets.
// The definition of the buckets is specified elsewhere.
message BucketDistribution {
// |counts| maps buckets to counts for those buckets. The key is the bucket
// index. The value is the count.
map<uint32, uint64> counts = 1;
};
// A client-given value of a |MetricPart| to be encoded and collected by Cobalt.
// An Encoder encodes a |ValuePart| and produces an |ObservationPart|. An
// analyzer decodes an |ObservationPart| to recover the |ValuePart|. Cobalt
// supports different types of values but the type must match the type declared
// in the |MetricPart| definition.
//
// A ValuePart of type BucketDistribution is an exception to the description
// above. It represents not a single client-given value but rather a
// client-given distribution over many values. Currently Cobalt only supports
// integer buckets and so a BucketDistribution may only be used when the
// corresponding MetricPart is of type INT and includes an IntegerBuckets field
// defining the buckets.
message ValuePart {
oneof data {
// A human-readable, UTF8 string.
string string_value = 1;
// An integer.
int64 int_value = 2;
// An uninterpreted sequence of bytes.
bytes blob_value = 3;
// A zero-based index into some enumerated set that is specified outside
// of Cobalt's configuration. See comments on the INDEX DataType in
// metrics.proto for more about this.
uint32 index_value = 4;
// A double-precision floating point value.
double double_value = 5;
// A distribution over an indexed set of buckets. An Observation may include
// at most one part whose value is a BucketDistribution. When Cobalt
// generates a report such an Observation will be treated mathematically as
// if it were many Observations.
//
// This can only be used if the corresponding metric part has |int_buckets|
// set and in that case the IntegerBuckets set there defines the bucket
// indices referenced by the BucketDistribution here.
BucketDistribution int_bucket_distribution = 999999;
}
}
// Observations of this type contain an encryption of a string. The string
// is encrypted using Forculus threshold encryption so that it may only be
// decrypted if sufficiently many distinct clients transmit the same string.
//
// This type of Observation is used in the following case:
//
// A MetricDefinition of type STRING_USED and Report of type
// STRING_COUNTS_WITH_THRESHOLD. In this case the Observation is
// *immediate* meaning it is generated directly from a single
// STRING_USED Event as soon as the Event is logged. The Event contains
// the string that was used. The fields of a ForculusObservation contain
// the data necessary to represent the threshold-encrypted ciphertext of
// the string.
message ForculusObservation {
bytes ciphertext = 1;
bytes point_x = 2;
bytes point_y = 3;
}
// Observations of this type contain an encoding of a string. The encoding uses
// String RAPPOR to provide local differential privacy.
//
// This type of Observation is used in the following case:
//
// A MetricDefinition of type STRING_USED and Report of type
// HIGH_FREQUENCY_STRING_COUNTS. In this case the Observation is
// *immediate* meaning it is generated directly from a single
// STRING_USED Event as soon as the Event is logged. The Event contains
// the string that was used. The |data| field in this Observation contains
// an encoding of that string. It contains a bit vector obtained by representing
// the string as a bit vector using several hash functions and then adding
// random noise by flipping bits.
message RapporObservation {
// The client's cohort. Each client is assigned a random cohort. The cohorts
// are needed to recover the original string since the process of representing
// a string as a bit vector is lossy.
uint32 cohort = 1;
// A bit vector containing the encoding of the string.
bytes data = 2;
}
// Observations of this type contain an encoding of an element of an
// an indexed set. The encoding uses Basic RAPPOR to provide local differential
// privacy.
//
// This type of Observation is used in the following case:
//
// A MetricDefinition of type EVENT_OCCURRED and Report of type
// SIMPLE_OCCURRENCE_COUNT. In this case the Observation is *immediate* meaning
// it is generated directly from a single EVENT_OCCURRED Event as soon as
// the Event is logged. The Event contains a single index, the index of the
// event-type that occurred. The |data| field in this Observation contains
// an encoding of that index. It contains a bit vector obtained by representing
// the index as a one-hot bit vector and then adding random noise by
// flipping bits.
message BasicRapporObservation {
// A bit vector containing the encoding of the index.
bytes data = 1;
}
// Used with the No-op encoding. Note that this offers no client-side privacy-
// protection. Cobalt may still offer some privacy protection via the Shuffler
// and via differentially-private release.
message UnencodedObservation {
// The raw-unencoded input value.
ValuePart unencoded_value = 1;
}
// The encoding of a ValuePart. This is produced by an Encoder.
message ObservationPart {
oneof value {
UnencodedObservation unencoded = 1;
ForculusObservation forculus = 2;
RapporObservation rappor = 3;
BasicRapporObservation basic_rappor = 4;
}
// The encoding_config_id, along with the customer_id and project_id
// (specified in the containing Observation) form the primary key into the
// "EncodingConfigs" table in the Cobalt configuration database. The value
// column is an "EncodingConfig" proto message describing how this
// ObservationPart is encoded.
uint32 encoding_config_id = 5; // e.g. 114=(Forculus with threshold=50)
}
// A SystemProfile describes the client system on which an Observation
// is collected.
message SystemProfile {
reserved 3;
enum OS {
UNKNOWN_OS = 0;
FUCHSIA = 1;
LINUX = 2;
}
enum ARCH {
UNKNOWN_ARCH = 0;
X86_64 = 1;
ARM_64 = 2;
}
enum BuildLevel {
UNKNOWN = 0;
// These should all be the same as BuildLevel in config/metrics.proto.
DEBUG = 10;
FISHFOOD = 20;
DOGFOOD = 30;
PROD = 99;
}
OS os = 1;
ARCH arch = 2;
// This is a string representing the board name of the device. If a board name
// cannot be determined, then this field will be 'unknown:<cpu signature>'.
string board_name = 4;
// This is a string representing the source of the observation.
// For now, this is going to refer to layers of the Fuchsia cake such as
// "garnet", "zircon", "topaz", etc... In the future, we will use something
// related to what sort of device we are running on, such as
// "Acme Lightbulb X" or "Machine Corp. Laptop III".
string product_name = 5;
// This represents the build state of the system artifact.
// Currently assumed to be the build version of the package calling Cobalt.
BuildLevel build_level = 6;
}
// An Observation consists of one or more ObservationParts.
message Observation {
// The keys are the names of the metric parts to which each ObservationPart
// is associated.
map<string, ObservationPart> parts = 1;
// A quasi-unique identifier for this observation. This is randomly generated
// on the client and used on the server as part of a fully-unique identifier.
// This field allows the add-observation operation to be idempotent: If the
// same observation is transmitted to the server twice then the server will
// store the observation only once.
bytes random_id = 2;
}
// ObservationMetadata describes the parts of an observation other than the
// secret payload.
message ObservationMetadata {
// next id: 8
// An Observation is for a particular Metric.
// The following three values together specify that Metric.
uint32 customer_id = 1;
uint32 project_id = 2;
uint32 metric_id = 3;
// Starting in Cobalt 1.0 an Observation is for a particular Report.
// The triple (customer_id, project_id, report_id) specify the Report.
uint32 report_id = 7;
// The day on which the observation occurred, expressed as the zero-based
// index relative to January 1, 1970.
// i.e. 0 = January 1, 1970
// 1 = January 2, 1970
// etc.
//
// We intentionally leave the meaning of this vague and leave it to each
// Encoder Client to define how to make it precise. Which day it is depends on
// time zone. The Encoder client is free to use the local time zone or a
// different time zone. The Encoder client is free to add some random noise to
// the time at which an event occurred and this might change the day.
uint32 day_index = 4;
// The profile of the client system on which the Observation was collected.
// This field is populated in ObservationMetadata that are sent from the
// Shuffler to the Analyzer but not in ObservationMetada sent from a
// client to the Shuffler. Instead the SystemProfile is sent just once
// per Envelope and the Shuffler copies it into individual
// ObservationMetadata.
SystemProfile system_profile = 5;
// A ShufflerBackend specifies which backend to use.
enum ShufflerBackend {
// The legacy GRPC GKE backend
LEGACY_BACKEND = 0;
// The first version of the clearcut backend.
V1_BACKEND = 1;
}
// Specifies a destination where observations should be sent.
ShufflerBackend backend = 6;
}
// A batch of encrypted Observations with common metadata. This is the unit
// sent from a Shuffler to an Analyzer. The Observations are encrypted to
// the public key of the Analyzer so the Shuffler cannot read them.
message ObservationBatch {
// The common Metadata for all of the encrypted observations in this batch.
//
// This field is authenticated by virtue of being included in the
// "associated data" part of the AEAD encryption of each
// |encryped_observation|.
ObservationMetadata meta_data = 1;
// Each EncryptedMessage contains the ciphertext of an Observation that has
// been encrypted to the public key of the Analyzer.
repeated EncryptedMessage encrypted_observation = 2;
}
// An envelope contains multiple ObservationBatches. An encrypted Envelope
// is the unit sent from an Encoder client to a Shuffler.
message Envelope {
repeated ObservationBatch batch = 1;
// The profile of the client system from which this Envelope is being sent.
// To minimize the data sent from a client, this is included only once
// per Envelope. The Shuffler is responsible for copying this into the
// individual ObservationMetadata contained in this Envelope.
//
// Since we will now be adding filtered SystemProfiles to each Batch, we no
// longer need this field.
SystemProfile system_profile = 3 [deprecated = true];
}