// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

library fuchsia.media;

using fuchsia.sysmem;

const uint32 MAX_KEY_ID_SIZE = 16;
const uint32 MAX_INIT_VECTOR_SIZE = 16;

/// Value
///
/// Generic "value" for use within generic "Parameter" struct.
union Value {
    1: bool bool_value;
    2: uint64 uint64_value;
    3: int64 int64_value;
    4: string string_value;
    // Prefer using oob_bytes instead.
    5: bytes bytes_value;
};

/// Parameter
///
/// Generic parameter.
///
/// We want to minimize use of this generic "Parameter" structure by natively
/// defining as many stream-specific parameter semantics as we can.
///
// TODO: When possible, describe the very limited scenarios in which it would
// still be reasonable to use a generic Parameter.
struct Parameter {
    // Some indication of the scope of applicability of this Parameter.
    string scope;
    // Specific name of this parameter, without the scope prefix.
    string name;
    // The particular value of this parameter.
    Value value;
};

/// StreamError
///
/// This error code encapsulates various errors that might emanate from a
/// StreamProcessor server. It can be sent either as an OnStreamFailed event or
/// as an epitaph for the channel.
enum StreamError : uint32 {
    // Common errors 0x00
    /// An internal error with an unspecified reason.
    UNKNOWN = 0x00000001;
    /// The client provided invalid input format details.
    INVALID_INPUT_FORMAT_DETAILS = 0x00000002;
    /// The server received buffers that are not suitable for the operation to
    /// be performed. An example of this would be if a Decoder received output
    /// buffers that are too small to decode a frame into.
    INCOMPATIBLE_BUFFERS_PROVIDED = 0x00000003;

    // Decoder errors 0x01
    /// An internal decoder error with an unspecified reason.
    DECODER_UNKNOWN = 0x01000001;

    // Encoder errors 0x02
    /// An internal encoder error with an unspecified reason.
    ENCODER_UNKNOWN = 0x02000001;

    // Decryptor errors 0x03
    /// An internal decryptor error with an unspecified reason.
    DECRYPTOR_UNKNOWN = 0x03000001;
    /// The requested KeyId is not available for use by the Decryptor. The
    /// client may try again later if that key becomes available.
    DECRYPTOR_NO_KEY = 0x03000002;
};

xunion AudioCompressedFormat {
    AudioCompressedFormatAac aac;
    AudioCompressedFormatSbc sbc;
};

enum AudioBitrateMode {
    // Used mainly when a client is configuring an encoder's output format.  May
    // also be present in an OnOutputConstraints() message from an encoder, but
    // should not be relied upon to be present by any consumer downstream of an
    // encoder.
    UNSPECIFIED = 0;
    CBR = 1;
    VBR = 2;
};

struct AudioCompressedFormatAac {
};

struct AudioCompressedFormatSbc {
};

/// AudioPcmMode
///
// TODO(dustingreen): Keep or discard any non-linear formats for purposes of the
// Codec interface?
enum AudioPcmMode {
    // 16 bit signed int linear or 32 bit float linear, for now
    // 1-N channels ok, with "A.B" channels designated as A+B channel_count -
    // the channel map is separately specified.  So 5.1 becomes channel_count 6.
    LINEAR = 0;
    // G.711 8 bit format-defined waveform semantics
    // 1 channel
    ALAW = 1;
    // G.711 8 bit format-defined waveform semantics
    // 1 channel
    MULAW = 2;
};

/// AudioChannelId
///
/// Used in specifying which audio channel is for which speaker location / type.
///
/// TODO(dustingreen): Do we need more channel IDs than this?
///
// TODO(dustingreen): Check with mpuryear@ re. naming consistency for "S" vs.
// "R" as we move these to a common definition.  Also the ordering of LS/RS vs.
// LR/RR - probably LR/RR being first would make more sense re. how channels
// get added incrementally, but changing the order would no longer match
// Android's ordering.
enum AudioChannelId {
    SKIP = 0; // unused channel
    LF = 1; // left front
    RF = 2; // right front
    CF = 3; // center front
    LS = 4; // left surround
    RS = 5; // right surround
    LFE = 6; // low frequency effects
    CS = 7; // back surround
    LR = 8; // left rear
    RR = 9; // right rear
    // This is the last explicitly-defined value + 1.  This name will be
    // re-defined in future if we add more defined channel IDs above.
    END_DEFINED = 10;
    // This is where format-specific (or ad-hoc) channel ID values should go, to
    // avoid colliding with any additional values allocated above.  The values
    // here are not guaranteed to avoid collision across different formats.
    EXTENDED_CHANNEL_ID_BASE = 0x6f000000;
    // Extended channel IDs should be <= Max.
    MAX = 0x7fffffff;
};

/// PcmFormat
///
/// PCM audio format details.
///
// TODO(dustingreen): Discuss with mpuryear@ re. where definitions for these
// details go and make sure the common details can specify at least this much.
struct PcmFormat {
    // Implicit details:
    //   * For bits_per_sample > 8, host-endian is implied.
    //   * At least for now, for channel_count >= 2, interleaved layout is
    //     implied.

    AudioPcmMode pcm_mode;

    // bits_per_sample
    //
    // A "sample" is for a single channel.
    //
    // For example, CD quality is 16.  See PcmMode comments, as the mode
    // constrains this value.
    uint32 bits_per_sample;

    // frames_per_second
    //
    // A "frame" is one datapoint (one "sample") for each channel.  Each channel
    // is sampled this many times per second.  For example, CD quality is 44100.
    uint32 frames_per_second;

    // channel_map
    //
    // channel_map.size() is the channel count.  See PcmMode comments, as some
    // modes constrain the channel count to 1.
    //
    // Values from AudioChannelId should be used if they are suitable.
    //
    // If a channel has no suitable AudioChannelId, an ad-hoc value can be used
    // in a range starting from AudioChannel_ExtendedChannelIdBase.
    vector<AudioChannelId>:16 channel_map;

    // TODO(dustingreen): Add unsigned 8 bit, float 32 bit, maybe others. FWIW,
    // AOSP appears to support signed 16 bit, unsigned 8 bit, and float 32 bit
    // under "Pcm", AFAICT based on OMX_NUMERICALDATATYPE and ACodec.cpp code.
};

/// AudioUncompressedFormat
///
// Uncompressed audio format details.
union AudioUncompressedFormat {
    1: PcmFormat pcm;
};

/// AudioFormat
///
// Audio format details.
union AudioFormat {
    1: AudioCompressedFormat compressed;
    2: AudioUncompressedFormat uncompressed;
};

/// VideoCompressedFormat
///
/// Compressed video format details.
///
// If a compressed video format has no fields here, it's because it's a good
// format and is already self-describing given the mime_type + format-defined
// oob_bytes as appropriate + in-band data.
union VideoCompressedFormat {
    // TODO(dustingreen): Any compressed video formats that aren't sufficiently
    // self-describing to select and create a Codec instance to decode it?

    // TODO(dustingreen): temp field to make the compiler happy until we have at
    // least one real field.
    1: uint32 temp_field_todo_remove;
};

enum VideoColorSpace {
    // TODO(dustingreen): add to this list
    INVALID = 0;
};

/// VideoUncompressedFormat
///
/// Uncompressed video format details.
///
// TODO(dustingreen): Integrate with a system-wide structure for this purpose.
// In progress - see image_format field below which will take the place of this
// struct/table.
struct VideoUncompressedFormat {
    // TODO(dustingreen): This will replace VideoUncompressedFormat (after
    // struct to table change merges).
    fuchsia.sysmem.ImageFormat_2 image_format;

    // fourcc
    //
    // A human-readable fourcc like RGBA should be 0x41424752 in the fourcc
    // field (regardless of host endian-ness). Note that the R (first character)
    // of the fourcc is in the low-order byte of this fourcc field.
    //
    // There are some fourcc codes that don't format nicely as a string.  While
    // I don't foresee any use of any of the purely numeric fourcc codes (not
    // corresponding to packed ascii character values), those would be stored
    // such that their numeric value has it's low-order byte in the low-order
    // byte of this fourcc value.  So a fourcc with "hex value" 0x00000001 would
    // have the numeric value 1 in this field.
    //
    // The endian-ness of fourcc values stored in files or in network packets is
    // outside the scope of these comments, other than to state that regardless
    // of the source of the fourcc code and the order that storage /
    // transmission format stores these bytes, a human-readable fourcc should
    // have its human-read first ascii character value in the low order byte of
    // this field.
    uint32 fourcc;

    // For formats with different planes having different resolution, this is
    // the resolution of the highest-resolution plane(s).  Else it's the
    // resolution of all the planes.
    uint32 primary_width_pixels;
    uint32 primary_height_pixels;

    // For formats where the secondary planes are the same resolution, these
    // fields will be the same as primary_width_pixels and
    // primary_height_pixels.  For formats with smaller secondary resolutions,
    // these indicate that resolution.
    uint32 secondary_width_pixels;
    uint32 secondary_height_pixels;

    // Planar means the various planes are separately stored in their own chunks
    // of memory.
    bool planar;

    // If a format is swizzled, the swizzling parameters are not directly here.
    bool swizzled;

    uint32 primary_line_stride_bytes;
    // Formats with the same stride for all planes will have this field equal to
    // primary_line_stride_bytes.
    uint32 secondary_line_stride_bytes;

    // R or Y
    uint32 primary_start_offset;
    // G or U
    uint32 secondary_start_offset;
    // B or V
    uint32 tertiary_start_offset;

    uint32 primary_pixel_stride;
    // For formats with the same pixel stride for all planes, this field will be
    // equal to primary_pixel_stride.
    uint32 secondary_pixel_stride;

    // These override the primary_width_pixels and primary_height_pixels for
    // purposes of display (but not for purposes of determining the pixel layout
    // in memory).  These can crop on the right and bottom.  These must be <=
    // the corresponding coded dimension.
    //
    // This value must be <= primary_width_pixels.
    uint32 primary_display_width_pixels;
    // This value must be <= primary_height_pixels.
    uint32 primary_display_height_pixels;

    // The pixel_aspect_ratio_width : pixel_aspect_ratio_height is the pixel
    // aspect ratio (AKA sample aspect ratio aka SAR) for the luma (AKA Y)
    // samples. A pixel_aspect_ratio of 1:1 mean square pixels. A
    // pixel_aspect_ratio of 2:1 would mean pixels that are displayed twice as
    // wide as they are tall. Codec implementation should ensure these two
    // values are relatively prime by reducing the fraction (dividing both by
    // GCF) if necessary.
    //
    // When has_pixel_aspect_ratio == false, pixel_aspect_ratio_width and
    // pixel_aspect_ratio_height will both be 1, but in that case the
    // pixel_aspect_ratio_width : pixel_aspect_ratio_height of 1:1 is just a
    // very weak suggestion re. reasonable-ish handling, not in any way
    // authoritative. In this case (or in any case really) the receiver of this
    // message may have other OOB means to determine the actual
    // pixel_aspect_ratio.
    bool has_pixel_aspect_ratio = false;
    uint32 pixel_aspect_ratio_width = 1;
    uint32 pixel_aspect_ratio_height = 1;
};

/// VideoFormat
///
/// Video (compress or uncompressed) format details.  In this context,
/// "uncompressed" can include block-based image compression formats that still
/// permit fairly fast random access to image data.
union VideoFormat {
    1: VideoCompressedFormat compressed;
    2: VideoUncompressedFormat uncompressed;
};

// Encryption schemes as defined by ISO 23001-7: Common encryption in ISO base
// media file format files. These are defined as strings rather than enums so as
// to not limit the encryption schemes that an implementation supports to the
// constants that are defined here.
const string ENCRYPTION_SCHEME_UNENCRYPTED = "unencrypted";
const string ENCRYPTION_SCHEME_CENC = "cenc";
const string ENCRYPTION_SCHEME_CBC1 = "cbc1";
const string ENCRYPTION_SCHEME_CENS = "cens";
const string ENCRYPTION_SCHEME_CBCS = "cbcs";

using KeyId = bytes:MAX_KEY_ID_SIZE;
using InitVector = bytes:MAX_INIT_VECTOR_SIZE;

/// SubsampleEntry
///
/// A subsample is a byte range within a sample consisting of a clear byte range
/// followed by an encrypted byte range. This structure specifies the size of
/// each range in the subsample.
struct SubsampleEntry {
    uint32 clear_bytes;
    uint32 encrypted_bytes;
};

/// EncryptionPattern
///
/// Pattern encryption utilizes a pattern of encrypted and clear 16 byte blocks
/// over the protected range of a subsample (the encrypted_bytes of a
/// `SubsampleEntry`). This structure specifies the number of encrypted data
/// blocks followed by the number of clear data blocks.
struct EncryptionPattern {
    uint32 clear_blocks;
    uint32 encrypted_blocks;
};

/// EncryptedFormat
///
/// The stream format details payload of a decrypting stream processor. This is
/// a sparsely populated table to specify parameters necessary for decryption
/// other than the data stream. It is only necessary to update fields if they
/// changed, but not an error if the same value is repeated.
table EncryptedFormat {
    1: reserved;
    2: reserved;
    7: reserved;

    /// `scheme` specifies which encryption scheme to use, such as
    /// `fuchsia.media.ENCRYPTION_SCHEME_CENC`.
    /// Usage:
    ///  - It is required to be set prior to delivery of input packets.
    ///  - Changing the scheme mid-stream is only permitted in some scenarios.
    ///    Once an encrypted scheme is selected for a stream, the scheme may
    ///    only be set to `fuchsia.media.ENCRYPTION_SCHEME_UNENCRYPTED` or that
    ///    same initial encrypted scheme. The scheme may be set to
    ///    `fuchsia.media.ENCRYPTION_SCHEME_UNENCRYPTED` at any point.
    6: string scheme;

    /// `key_id` identifies the key that should be used for decrypting
    /// subsequent data.
    /// Usage:
    ///  - It is required to be set prior to delivery of input packets to a
    ///    decryptor.
    ///  - This may be changed multiple times during a data stream.
    8: KeyId key_id;

    /// `init_vector` is used in combination with a key and a block of content
    /// to create the first cipher block in a chain and derive subsequent cipher
    /// blocks in a cipher block chain.
    /// Usage:
    ///  - It is required to be set prior to the delivery of input packets to a
    ///    decryptor.
    ///  - This may be changed multiple times during a data stream.
    3: InitVector init_vector;

    /// `subsamples` is used to identify the clear and encrypted portions of a
    /// subsample.
    /// Usage:
    ///  - For whole sample encryption, this parameter should not be sent.
    ///  - This may be changed multiple times during a data stream.
    4: vector<SubsampleEntry> subsamples;

    /// `pattern` is used to identify the clear and encrypted blocks for pattern
    /// based encryption.
    /// Usage:
    /// - This is not allowed for CENC and CBC1 and required for CENS and CBCS.
    /// - If required, it must be set prior to the delivery of input packets to
    ///   a decryptor.
    /// - This may be changed multiple times during a data stream.
    5: EncryptionPattern pattern;
};

/// DecryptedFormat
///
/// This describes the format of the decrypted content. It is required to be
/// sent by the StreamProcessor server prior to the delivery of output packets.
/// Currently, there is no additional format details for decrypted output.
table DecryptedFormat {
    // TODO(FIDL-709): Empty tables cause dart analysis error. Remove this
    // unused field once the issue is resolved.
    1: bool ignore_this_field;
};

/// CryptoFormat
///
/// Crypto (encrypted or decrypted) format details.
xunion CryptoFormat {
    EncryptedFormat encrypted;
    DecryptedFormat decrypted;
};

/// DomainFormat
///
// Domain-specific format details (audio or video, compressed or uncompressed).
union DomainFormat {
    1: AudioFormat audio;
    2: VideoFormat video;
    3: CryptoFormat crypto;
};

const uint64 kMaxOobBytesSize = 8192;

enum SbcSubBands {
    SUB_BANDS_4 = 4;
    SUB_BANDS_8 = 8;
};

enum SbcBlockCount {
    BLOCK_COUNT_4 = 4;
    BLOCK_COUNT_8 = 8;
    BLOCK_COUNT_12 = 12;
    BLOCK_COUNT_16 = 16;
};

enum SbcAllocation {
    ALLOC_LOUDNESS = 0;
    ALLOC_SNR = 1;
};

enum SbcChannelMode {
    MONO = 0;
    DUAL = 1;
    STEREO = 2;
    JOINT_STEREO = 3;
};

/// Settings for an SBC Encoder.
///
/// SBC Encoders take signed little endian 16 bit linear PCM samples and
/// return encoded SBC frames. SBC encoder PCM data in batches of
/// `sub_bands * block_count` PCM frames. This encoder will accept PCM data on
/// arbitrary frame boundaries, but the output flushed when EOS is queued may be
/// zero-padded to make a full batch for encoding.
struct SbcEncoderSettings {
    SbcSubBands sub_bands = SbcSubBands.SUB_BANDS_8;
    SbcAllocation allocation = SbcAllocation.ALLOC_LOUDNESS;
    SbcBlockCount block_count = SbcBlockCount.BLOCK_COUNT_4;
    SbcChannelMode channel_mode;
    /// SBC bit pool value.
    uint64 bit_pool;
};

/// Raw AAC access units.
struct AacTransportRaw {
};

xunion AacTransport {
    AacTransportRaw raw;
};

enum AacChannelMode {
    MONO = 0;
    STEREO = 2;
};

struct AacConstantBitRate {
    /// Bits per second
    uint32 bit_rate;
};

/// Variable bit rate modes. The actual resulting bitrate
/// varies based on input signal and other encoding settings.
///
/// See https://wiki.hydrogenaud.io/index.php?title=Fraunhofer_FDK_AAC#Bitrate_Modes
enum AacVariableBitRate {
    V1 = 1;
    V2 = 2;
    V3 = 3;
    V4 = 4;
    V5 = 5;
};

union AacBitRate {
    1: AacConstantBitRate constant;
    2: AacVariableBitRate variable;
};

enum AacAudioObjectType {
    /// MPEG-2 Low Complexity
    MPEG2_AAC_LC = 0;
};

struct AacEncoderSettings {
    AacTransport transport;
    AacChannelMode channel_mode;
    AacBitRate bit_rate;
    AacAudioObjectType aot;
};

/// Settings for encoders that tell them how to encode raw
/// formats.
xunion EncoderSettings {
    SbcEncoderSettings sbc;
    AacEncoderSettings aac;
};

/// FormatDetails
///
/// This describes/details the format on input or output of a StreamProcessor
/// (separate instances for input vs. output).
//
// The purpose of FormatDetails is to fill in additional details not
// conveyed via other means.
//
// For decoder input, the format details tend to be fairly sparse, since most
// compressed formats tend to be mostly self-describing.
//
// For decoder output and encoder input, the format details need to include all
// the out-of-band information regarding the uncompressed data, which tends not
// to be self-describing.
//
// Settings that are completely redundant with the data in the format itself
// should not be in a required field here. An encoder may set oob_bytes on its
// output.
//
// This stuff should be limited to things we need to know to properly process the
// data which we can't already determine from the data itself, and which isn't
// already covered by a format's defined OOB binary config blob, which is
// conveyed in oob_bytes.
//
// Most decoders can have FormatDetails.domain null.
table FormatDetails {
    // Particular instances of FormatDetails will set this field to make it
    // easier for a receiver to determine if any part of the format has changed
    // vs. the last FormatDetails received for the same context.
    1: uint64 format_details_version_ordinal;

    // "mime_type" strings used by particular decoders / encoders so far:
    //
    // SW AAC decoder:
    //   * input:
    //     * "audio/aac-adts" - ATDS AAC; self-contained format, but
    //       implementation for now requires oob_bytes to contain
    //       AudioSpecificConfig() reconstructed from ADTS header data - see
    //       also make_AudioSpecificConfig_from_ADTS_header() for now.
    //   * output:
    //     * "audio/raw" - stereo linear 16 bit integer PCM
    //
    // TODO(dustingreen): avoid requiring oob_bytes when using SoftAAC2.cpp
    // for AAC ADTS.
    //
    // TODO(dustingreen): Add non-ADTS AAC support (which naturally needs
    // oob_bytes).
    //
    // TODO(dustingreen): Consider "pseudo_mime_type", or an enum, + "domain"
    // details as needed instead, since calling this "mime_type" could lead to
    // confusion.
    2: string mime_type;

    // Some streams have their own binary configuration structure.  For those
    // streams we allow that binary structure to be directly conveyed to the
    // stream processor here.
    //
    // audio/aac - this is an AudioSpecificConfig().
    // audio/aac-adts - this is not set.
    // TODO(dustingreen): make the audio/aac-adts statement true soon.  At the
    // moment we set this with make_AudioSpecificConfig_from_ADTS_header(), but
    // that should not be the client's job for ADTS.
    //
    // For some formats whose "ES" data format is self-contained, or for which
    // there is no format-defined binary OOB config, this is null.
    //
    // A server can close the channel if the count of bytes is >
    // kMaxOobBytesSize or is larger than makes any sense for the stream
    // processor.  If any stream actually needs more than kMaxOobBytesSize
    // bytes here, we could potentially increase this restriction some, but
    // this interface isn't designed to support OOB config blobs that approach
    // ZX_CHANNEL_MAX_MSG_BYTES.
    3: bytes oob_bytes;

    // Decoder input format:
    //
    // If a format is not self-describing given the mime_type and a
    // format-spec-defined oob_bytes, this domain field can be set to
    // provide the additional compressed-format-specific details.  This is
    // expected to be fairly rare, so most compressed input formats will have
    // only the mime_type and possibly oob_bytes set, with domain typically
    // null.  If an encoder is upstream however, domain may be set to convey the
    // encoder settings that were used, but a decoder consumer doesn't need to
    // look at those.
    //
    // Encoder output format:
    //
    // The encoder's compressed data output typically needs some configuration
    // (provided in this field) that's convenient to provide in a form that's
    // not oob_bytes, and the codec can convert that config to oob_bytes on
    // encoder output via OnOutputConstraints().  We retain these encoder settings
    // in the output FormatDetails to allow for cases where a downstream
    // consumer knowing the encoder settings could be useful.
    //
    // TODO(dustingreen): Decide if we want to retain this, or if we'd prefer to
    // split out config settings and maybe only represent a few encoder settings
    // as best-effort optional aux data, like bitrate.
    //
    // Encoder input format / decoder output format:
    //
    // This field contains fairly detailed information re. uncompressed data
    // format details, which tends to _not_ be self-describing in-band.
    4: DomainFormat domain;

    // See comments above on Parameter.  At the time we lock relevant FIDL
    // interfaces, there should be zero use of this field outside tests, but
    // this is here in case we need to allow a stream processor client to
    // convey additional config parameters to/from a stream processor which we
    // didn't anticipate before locking.
    //
    // If there are any known "official" exceptions to the previous paragraph,
    // we'll list them here by corresponding mime_type (none so far):
    //   * "<mime_type>" - <usage_description>
    //
    // For streams that define their own stream-specific config/OOB data, put
    // that in oob_bytes above instead of this field.
    5: vector<Parameter> pass_through_parameters;

    /// Instructs an encoder on how to encode raw data.
    ///
    /// Decoders may ignore this field but are entitled to rejected requests with
    /// this field set because it doesn't make sense.
    6: EncoderSettings encoder_settings;

    /// The number of ticks of the timebase of input packet timestamp_ish values
    /// per second.
    ///
    /// The timebase is only used used for optional extrapolation of timestamp_ish
    /// values when an input timestamp which applies to byte 0 of the valid portion
    /// of the input packet does not correspond directly to byte 0 of the valid
    /// portion of any output packet.
    ///
    /// Leave unset if timestamp extrapolation is not needed, either due to lack of
    /// timestamps on input, or due to input being provided in increments of the
    /// encoder's input chunk size (based on the encoder settings and calculated
    /// independently by the client).  Set if timestamp extrapolation is known to be
    /// needed or known to be acceptable to the client.
    7: uint64 timebase;
};
