// Copyright 2016 The Fuchsia Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";

package cobalt;

option go_package = "cobalt";

import "encrypted_message.proto";

///////////////////////////////////////////////////////////////
// Messages created in the Encoder and consumed in the Analyzer
///////////////////////////////////////////////////////////////

// A distribution over an indexed set of value buckets.
// The definition of the buckets is specified elsewhere.
message BucketDistribution {
  // |counts| maps buckets to counts for those buckets. The key is the bucket
  // index. The value is the count.
  map<uint32, uint64> counts = 1;
};

// A client-given value of a |MetricPart| to be encoded and collected by Cobalt.
// An Encoder encodes a |ValuePart| and produces an |ObservationPart|. An
// analyzer decodes an |ObservationPart| to recover the |ValuePart|. Cobalt
// supports different types of values but the type must match the type declared
// in the |MetricPart| definition.
//
// A ValuePart of type BucketDistribution is an exception to the description
// above. It represents not a single client-given value but rather a
// client-given distribution over many values. Currently Cobalt only supports
// integer buckets and so a BucketDistribution may only be used when the
// corresponding MetricPart is of type INT and includes an IntegerBuckets field
// defining the buckets.
message ValuePart {
  oneof data {
    // A human-readable, UTF8 string.
    string string_value = 1;

    // An integer.
    int64 int_value = 2;

    // An uninterpreted sequence of bytes.
    bytes blob_value = 3;

    // A zero-based index into some enumerated set that is specified outside
    // of Cobalt's configuration. See comments on the INDEX DataType in
    // metrics.proto for more about this.
    uint32 index_value = 4;

    // A double-precision floating point value.
    double double_value = 5;

    // A distribution over an indexed set of buckets. An Observation may include
    // at most one part whose value is a BucketDistribution. When Cobalt
    // generates a report such an Observation will be treated mathematically as
    // if it were many Observations.
    //
    // This can only be used if the corresponding metric part has |int_buckets|
    // set and in that case the IntegerBuckets set there defines the bucket
    // indices referenced by the BucketDistribution here.
    BucketDistribution int_bucket_distribution = 999999;
  }
}

// Observations of this type contain an encryption of a string. The string
// is encrypted using Forculus threshold encryption so that it may only be
// decrypted if sufficiently many distinct clients transmit the same string.
//
// This type of Observation is used in the following case:
//
// A MetricDefinition of type STRING_USED and Report of type
// STRING_COUNTS_WITH_THRESHOLD. In this case the Observation is
// *immediate* meaning it is generated directly from a single
// STRING_USED Event as soon as the Event is logged. The Event contains
// the string that was used. The fields of a ForculusObservation contain
// the data necessary to represent the threshold-encrypted ciphertext of
// the string.
message ForculusObservation {
  bytes ciphertext = 1;
  bytes point_x = 2;
  bytes point_y = 3;
}

// Observations of this type contain an encoding of a string. The encoding uses
// String RAPPOR to provide local differential privacy.
//
// This type of Observation is used in the following case:
//
// A MetricDefinition of type STRING_USED and Report of type
// HIGH_FREQUENCY_STRING_COUNTS. In this case the Observation is
// *immediate* meaning it is generated directly from a single
// STRING_USED Event as soon as the Event is logged. The Event contains
// the string that was used. The |data| field in this Observation contains
// an encoding of that string. It contains a bit vector obtained by representing
// the string as a bit vector using several hash functions and then adding
// random noise by flipping bits.
message RapporObservation {
  // The client's cohort. Each client is assigned a random cohort. The cohorts
  // are needed to recover the original string since the process of representing
  // a string as a bit vector is lossy.
  uint32 cohort = 1;

  // A bit vector containing the encoding of the string.
  bytes data = 2;
}


// Observations of this type contain an encoding of an element of an
// an indexed set. The encoding uses Basic RAPPOR to provide local differential
// privacy.
//
// This type of Observation is used in the following case:
//
// A MetricDefinition of type EVENT_OCCURRED and Report of type
// SIMPLE_OCCURRENCE_COUNT. In this case the Observation is *immediate* meaning
// it is generated directly from a single EVENT_OCCURRED Event as soon as
// the Event is logged. The Event contains a single index, the index of the
// event-type that occurred. The |data| field in this Observation contains
// an encoding of that index. It contains a bit vector obtained by representing
// the index as a one-hot bit vector and then adding random noise by
// flipping bits.
message BasicRapporObservation {
  // A bit vector containing the encoding of the index.
  bytes data = 1;
}

// Used with the No-op encoding. Note that this offers no client-side privacy-
// protection. Cobalt may still offer some privacy protection via the Shuffler
// and via differentially-private release.
message UnencodedObservation {
  // The raw-unencoded input value.
  ValuePart unencoded_value = 1;
}

// The encoding of a ValuePart. This is produced by an Encoder.
message ObservationPart {
  oneof value {
    UnencodedObservation unencoded = 1;
    ForculusObservation forculus = 2;
    RapporObservation rappor = 3;
    BasicRapporObservation basic_rappor = 4;
  }

  // The encoding_config_id, along with the customer_id and project_id
  // (specified in the containing Observation) form the primary key into the
  // "EncodingConfigs" table in the Cobalt configuration database. The value
  // column is an "EncodingConfig" proto message describing how this
  // ObservationPart is encoded.
  uint32 encoding_config_id = 5;  // e.g. 114=(Forculus with threshold=50)
}

// A SystemProfile describes the client system on which an Observation
// is collected.
message SystemProfile {
  reserved 3;

  enum OS {
    UNKNOWN_OS = 0;
    FUCHSIA = 1;
    LINUX = 2;
  }

  enum ARCH {
    UNKNOWN_ARCH = 0;
    X86_64 = 1;
    ARM_64 = 2;
  }

  enum BuildLevel {
    UNKNOWN = 0;

    // These should all be the same as BuildLevel in config/metrics.proto.
    DEBUG = 10;
    FISHFOOD = 20;
    DOGFOOD = 30;
    PROD = 99;
  }

  OS os = 1;
  ARCH arch = 2;

  // This is a string representing the board name of the device. If a board name
  // cannot be determined, then this field will be 'unknown:<cpu signature>'.
  string board_name = 4;

  // This is a string representing the source of the observation.
  // For now, this is going to refer to layers of the Fuchsia cake such as
  // "garnet", "zircon", "topaz", etc... In the future, we will use something
  // related to what sort of device we are running on, such as
  // "Acme Lightbulb X" or "Machine Corp. Laptop III".
  string product_name = 5;

  // This represents the build state of the system artifact.
  // Currently assumed to be the build version of the package calling Cobalt.
  // DEPRECATED.
  BuildLevel build_level = 6;
}

// An Observation consists of one or more ObservationParts.
message Observation {
  // The keys are the names of the metric parts to which each ObservationPart
  // is associated.
  map<string, ObservationPart> parts = 1;

  // A quasi-unique identifier for this observation. This is randomly generated
  // on the client and used on the server as part of a fully-unique identifier.
  // This field allows the add-observation operation to be idempotent: If the
  // same observation is transmitted to the server twice then the server will
  // store the observation only once.
  bytes random_id = 2;
}

// ObservationMetadata describes the parts of an observation other than the
// secret payload.
message ObservationMetadata {
  // next id: 8

  // An Observation is for a particular Metric.
  // The following three values together specify that Metric.
  uint32 customer_id = 1;
  uint32 project_id = 2;
  uint32 metric_id = 3;

  // Starting in Cobalt 1.0 an Observation is for a particular Report.
  // The triple (customer_id, project_id, report_id) specify the Report.
  uint32 report_id = 7;

  // The day on which the observation occurred, expressed as the zero-based
  // index relative to January 1, 1970.
  // i.e. 0 = January 1, 1970
  //      1 = January 2, 1970
  //      etc.
  //
  // We intentionally leave the meaning of this vague and leave it to each
  // Encoder Client to define how to make it precise. Which day it is depends on
  // time zone. The Encoder client is free to use the local time zone or a
  // different time zone. The Encoder client is free to add some random noise to
  // the time at which an event occurred and this might change the day.
  uint32 day_index = 4;

  // The profile of the client system on which the Observation was collected.
  // This field is populated in ObservationMetadata that are sent from the
  // Shuffler to the Analyzer but not in ObservationMetada sent from a
  // client to the Shuffler. Instead the SystemProfile is sent just once
  // per Envelope and the Shuffler copies it into individual
  // ObservationMetadata.
  SystemProfile system_profile = 5;

  // A ShufflerBackend specifies which backend to use.
  enum ShufflerBackend {
    // The legacy GRPC GKE backend
    LEGACY_BACKEND = 0;
    // The first version of the clearcut backend.
    V1_BACKEND = 1;
  }
  // Specifies a destination where observations should be sent.
  ShufflerBackend backend = 6;
}

// A batch of encrypted Observations with common metadata. This is the unit
// sent from a Shuffler to an Analyzer. The Observations are encrypted to
// the public key of the Analyzer so the Shuffler cannot read them.
message ObservationBatch {
  // The common Metadata for all of the encrypted observations in this batch.
  //
  // This field is authenticated by virtue of being included in the
  // "associated data" part of the AEAD encryption of each
  // |encryped_observation|.
  ObservationMetadata meta_data = 1;

  // Each EncryptedMessage contains the ciphertext of an Observation that has
  // been encrypted to the public key of the Analyzer.
  repeated EncryptedMessage encrypted_observation = 2;
}

// An envelope contains multiple ObservationBatches. An encrypted Envelope
// is the unit sent from an Encoder client to a Shuffler.
message Envelope {
  repeated ObservationBatch batch = 1;

  // The profile of the client system from which this Envelope is being sent.
  // To minimize the data sent from a client, this is included only once
  // per Envelope. The Shuffler is responsible for copying this into the
  // individual ObservationMetadata contained in this Envelope.
  //
  // Since we will now be adding filtered SystemProfiles to each Batch, we no
  // longer need this field.
  SystemProfile system_profile = 3 [deprecated = true];
}
