|  | // Copyright 2018 The Fuchsia Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | library fuchsia.media; | 
|  |  | 
|  | using fuchsia.sysmem; | 
|  |  | 
|  | const uint32 MAX_KEY_ID_SIZE = 16; | 
|  | const uint32 MAX_INIT_VECTOR_SIZE = 16; | 
|  | const uint32 MAX_ENCRYPTION_SCHEME_SIZE = 100; | 
|  |  | 
|  | /// Value | 
|  | /// | 
|  | /// Generic "value" for use within generic "Parameter" struct. | 
|  | union Value { | 
|  | 1: bool bool_value; | 
|  | 2: uint64 uint64_value; | 
|  | 3: int64 int64_value; | 
|  | 4: string string_value; | 
|  | // Prefer using oob_bytes instead. | 
|  | 5: bytes bytes_value; | 
|  | }; | 
|  |  | 
|  | /// Parameter | 
|  | /// | 
|  | /// Generic parameter. | 
|  | /// | 
|  | /// We want to minimize use of this generic "Parameter" structure by natively | 
|  | /// defining as many stream-specific parameter semantics as we can. | 
|  | /// | 
|  | // TODO: When possible, describe the very limited scenarios in which it would | 
|  | // still be reasonable to use a generic Parameter. | 
|  | struct Parameter { | 
|  | // Some indication of the scope of applicability of this Parameter. | 
|  | string scope; | 
|  | // Specific name of this parameter, without the scope prefix. | 
|  | string name; | 
|  | // The particular value of this parameter. | 
|  | Value value; | 
|  | }; | 
|  |  | 
|  | /// StreamError | 
|  | /// | 
|  | /// This error code encapsulates various errors that might emanate from a | 
|  | /// StreamProcessor server. It can be sent either as an OnStreamFailed event or | 
|  | /// as an epitaph for the channel. | 
|  | enum StreamError : uint32 { | 
|  | // Common errors 0x00 | 
|  | /// An internal error with an unspecified reason. | 
|  | UNKNOWN = 0x00000001; | 
|  | /// The client provided invalid input format details. | 
|  | INVALID_INPUT_FORMAT_DETAILS = 0x00000002; | 
|  | /// The server received buffers that are not suitable for the operation to | 
|  | /// be performed. An example of this would be if a Decoder received output | 
|  | /// buffers that are too small to decode a frame into. | 
|  | INCOMPATIBLE_BUFFERS_PROVIDED = 0x00000003; | 
|  | /// Processing of input EOS (end of stream) failed, so the stream failed. | 
|  | /// Currently this can occur if a core codec watchdog fires while processing | 
|  | /// EOS. | 
|  | EOS_PROCESSING = 0x00000004; | 
|  |  | 
|  | // Decoder errors 0x01 | 
|  | /// An internal decoder error with an unspecified reason. | 
|  | DECODER_UNKNOWN = 0x01000001; | 
|  | /// Input data that can't be parsed.  Only some parsing problems/errors are | 
|  | /// reported this way.  Corrupt input data may be reported as other | 
|  | /// StreamError, or may not cause a StreamError. | 
|  | DECODER_DATA_PARSING = 0x01000002; | 
|  |  | 
|  | // Encoder errors 0x02 | 
|  | /// An internal encoder error with an unspecified reason. | 
|  | ENCODER_UNKNOWN = 0x02000001; | 
|  |  | 
|  | // Decryptor errors 0x03 | 
|  | /// An internal decryptor error with an unspecified reason. | 
|  | DECRYPTOR_UNKNOWN = 0x03000001; | 
|  | /// The requested KeyId is not available for use by the Decryptor. The | 
|  | /// client may try again later if that key becomes available. | 
|  | DECRYPTOR_NO_KEY = 0x03000002; | 
|  | }; | 
|  |  | 
|  | flexible union AudioCompressedFormat { | 
|  | 1: AudioCompressedFormatAac aac; | 
|  | 2: AudioCompressedFormatSbc sbc; | 
|  | }; | 
|  |  | 
|  | enum AudioBitrateMode { | 
|  | // Used mainly when a client is configuring an encoder's output format.  May | 
|  | // also be present in an OnOutputConstraints() message from an encoder, but | 
|  | // should not be relied upon to be present by any consumer downstream of an | 
|  | // encoder. | 
|  | UNSPECIFIED = 0; | 
|  | CBR = 1; | 
|  | VBR = 2; | 
|  | }; | 
|  |  | 
|  | struct AudioCompressedFormatAac { | 
|  | }; | 
|  |  | 
|  | struct AudioCompressedFormatSbc { | 
|  | }; | 
|  |  | 
|  | /// AudioPcmMode | 
|  | /// | 
|  | // TODO(dustingreen): Keep or discard any non-linear formats for purposes of the | 
|  | // Codec interface? | 
|  | enum AudioPcmMode { | 
|  | // 16 bit signed int linear or 32 bit float linear, for now | 
|  | // 1-N channels ok, with "A.B" channels designated as A+B channel_count - | 
|  | // the channel map is separately specified.  So 5.1 becomes channel_count 6. | 
|  | LINEAR = 0; | 
|  | // G.711 8 bit format-defined waveform semantics | 
|  | // 1 channel | 
|  | ALAW = 1; | 
|  | // G.711 8 bit format-defined waveform semantics | 
|  | // 1 channel | 
|  | MULAW = 2; | 
|  | }; | 
|  |  | 
|  | /// AudioChannelId | 
|  | /// | 
|  | /// Used in specifying which audio channel is for which speaker location / type. | 
|  | /// | 
|  | /// TODO(dustingreen): Do we need more channel IDs than this? | 
|  | /// | 
|  | // TODO(dustingreen): Check with mpuryear@ re. naming consistency for "S" vs. | 
|  | // "R" as we move these to a common definition.  Also the ordering of LS/RS vs. | 
|  | // LR/RR - probably LR/RR being first would make more sense re. how channels | 
|  | // get added incrementally, but changing the order would no longer match | 
|  | // Android's ordering. | 
|  | enum AudioChannelId { | 
|  | SKIP = 0; // unused channel | 
|  | LF = 1; // left front | 
|  | RF = 2; // right front | 
|  | CF = 3; // center front | 
|  | LS = 4; // left surround | 
|  | RS = 5; // right surround | 
|  | LFE = 6; // low frequency effects | 
|  | CS = 7; // back surround | 
|  | LR = 8; // left rear | 
|  | RR = 9; // right rear | 
|  | // This is the last explicitly-defined value + 1.  This name will be | 
|  | // re-defined in future if we add more defined channel IDs above. | 
|  | END_DEFINED = 10; | 
|  | // This is where format-specific (or ad-hoc) channel ID values should go, to | 
|  | // avoid colliding with any additional values allocated above.  The values | 
|  | // here are not guaranteed to avoid collision across different formats. | 
|  | EXTENDED_CHANNEL_ID_BASE = 0x6f000000; | 
|  | // Extended channel IDs should be <= Max. | 
|  | MAX = 0x7fffffff; | 
|  | }; | 
|  |  | 
|  | /// PcmFormat | 
|  | /// | 
|  | /// PCM audio format details. | 
|  | /// | 
|  | // TODO(dustingreen): Discuss with mpuryear@ re. where definitions for these | 
|  | // details go and make sure the common details can specify at least this much. | 
|  | struct PcmFormat { | 
|  | // Implicit details: | 
|  | //   * For bits_per_sample > 8, host-endian is implied. | 
|  | //   * At least for now, for channel_count >= 2, interleaved layout is | 
|  | //     implied. | 
|  |  | 
|  | AudioPcmMode pcm_mode; | 
|  |  | 
|  | // bits_per_sample | 
|  | // | 
|  | // A "sample" is for a single channel. | 
|  | // | 
|  | // For example, CD quality is 16.  See PcmMode comments, as the mode | 
|  | // constrains this value. | 
|  | uint32 bits_per_sample; | 
|  |  | 
|  | // frames_per_second | 
|  | // | 
|  | // A "frame" is one datapoint (one "sample") for each channel.  Each channel | 
|  | // is sampled this many times per second.  For example, CD quality is 44100. | 
|  | uint32 frames_per_second; | 
|  |  | 
|  | // channel_map | 
|  | // | 
|  | // channel_map.size() is the channel count.  See PcmMode comments, as some | 
|  | // modes constrain the channel count to 1. | 
|  | // | 
|  | // Values from AudioChannelId should be used if they are suitable. | 
|  | // | 
|  | // If a channel has no suitable AudioChannelId, an ad-hoc value can be used | 
|  | // in a range starting from AudioChannel_ExtendedChannelIdBase. | 
|  | vector<AudioChannelId>:16 channel_map; | 
|  |  | 
|  | // TODO(dustingreen): Add unsigned 8 bit, float 32 bit, maybe others. FWIW, | 
|  | // AOSP appears to support signed 16 bit, unsigned 8 bit, and float 32 bit | 
|  | // under "Pcm", AFAICT based on OMX_NUMERICALDATATYPE and ACodec.cpp code. | 
|  | }; | 
|  |  | 
|  | /// AudioUncompressedFormat | 
|  | /// | 
|  | // Uncompressed audio format details. | 
|  | union AudioUncompressedFormat { | 
|  | 1: PcmFormat pcm; | 
|  | }; | 
|  |  | 
|  | /// AudioFormat | 
|  | /// | 
|  | // Audio format details. | 
|  | union AudioFormat { | 
|  | 1: AudioCompressedFormat compressed; | 
|  | 2: AudioUncompressedFormat uncompressed; | 
|  | }; | 
|  |  | 
|  | /// VideoCompressedFormat | 
|  | /// | 
|  | /// Compressed video format details. | 
|  | /// | 
|  | // If a compressed video format has no fields here, it's because it's a good | 
|  | // format and is already self-describing given the mime_type + format-defined | 
|  | // oob_bytes as appropriate + in-band data. | 
|  | union VideoCompressedFormat { | 
|  | // TODO(dustingreen): Any compressed video formats that aren't sufficiently | 
|  | // self-describing to select and create a Codec instance to decode it? | 
|  |  | 
|  | // TODO(dustingreen): temp field to make the compiler happy until we have at | 
|  | // least one real field. | 
|  | 1: uint32 temp_field_todo_remove; | 
|  | }; | 
|  |  | 
|  | enum VideoColorSpace { | 
|  | // TODO(dustingreen): add to this list | 
|  | INVALID = 0; | 
|  | }; | 
|  |  | 
|  | /// VideoUncompressedFormat | 
|  | /// | 
|  | /// Uncompressed video format details. | 
|  | /// | 
|  | // TODO(dustingreen): Integrate with a system-wide structure for this purpose. | 
|  | // In progress - see image_format field below which will take the place of this | 
|  | // struct/table. | 
|  | struct VideoUncompressedFormat { | 
|  | // TODO(dustingreen): This will replace VideoUncompressedFormat (after | 
|  | // struct to table change merges). | 
|  | fuchsia.sysmem.ImageFormat_2 image_format; | 
|  |  | 
|  | // fourcc | 
|  | // | 
|  | // A human-readable fourcc like RGBA should be 0x41424752 in the fourcc | 
|  | // field (regardless of host endian-ness). Note that the R (first character) | 
|  | // of the fourcc is in the low-order byte of this fourcc field. | 
|  | // | 
|  | // There are some fourcc codes that don't format nicely as a string.  While | 
|  | // I don't foresee any use of any of the purely numeric fourcc codes (not | 
|  | // corresponding to packed ascii character values), those would be stored | 
|  | // such that their numeric value has it's low-order byte in the low-order | 
|  | // byte of this fourcc value.  So a fourcc with "hex value" 0x00000001 would | 
|  | // have the numeric value 1 in this field. | 
|  | // | 
|  | // The endian-ness of fourcc values stored in files or in network packets is | 
|  | // outside the scope of these comments, other than to state that regardless | 
|  | // of the source of the fourcc code and the order that storage / | 
|  | // transmission format stores these bytes, a human-readable fourcc should | 
|  | // have its human-read first ascii character value in the low order byte of | 
|  | // this field. | 
|  | uint32 fourcc; | 
|  |  | 
|  | // For formats with different planes having different resolution, this is | 
|  | // the resolution of the highest-resolution plane(s).  Else it's the | 
|  | // resolution of all the planes. | 
|  | uint32 primary_width_pixels; | 
|  | uint32 primary_height_pixels; | 
|  |  | 
|  | // For formats where the secondary planes are the same resolution, these | 
|  | // fields will be the same as primary_width_pixels and | 
|  | // primary_height_pixels.  For formats with smaller secondary resolutions, | 
|  | // these indicate that resolution. | 
|  | uint32 secondary_width_pixels; | 
|  | uint32 secondary_height_pixels; | 
|  |  | 
|  | // Planar means the various planes are separately stored in their own chunks | 
|  | // of memory. | 
|  | bool planar; | 
|  |  | 
|  | // If a format is swizzled, the swizzling parameters are not directly here. | 
|  | bool swizzled; | 
|  |  | 
|  | uint32 primary_line_stride_bytes; | 
|  | // Formats with the same stride for all planes will have this field equal to | 
|  | // primary_line_stride_bytes. | 
|  | uint32 secondary_line_stride_bytes; | 
|  |  | 
|  | // R or Y | 
|  | uint32 primary_start_offset; | 
|  | // G or U | 
|  | uint32 secondary_start_offset; | 
|  | // B or V | 
|  | uint32 tertiary_start_offset; | 
|  |  | 
|  | uint32 primary_pixel_stride; | 
|  | // For formats with the same pixel stride for all planes, this field will be | 
|  | // equal to primary_pixel_stride. | 
|  | uint32 secondary_pixel_stride; | 
|  |  | 
|  | // These override the primary_width_pixels and primary_height_pixels for | 
|  | // purposes of display (but not for purposes of determining the pixel layout | 
|  | // in memory).  These can crop on the right and bottom.  These must be <= | 
|  | // the corresponding coded dimension. | 
|  | // | 
|  | // This value must be <= primary_width_pixels. | 
|  | uint32 primary_display_width_pixels; | 
|  | // This value must be <= primary_height_pixels. | 
|  | uint32 primary_display_height_pixels; | 
|  |  | 
|  | // The pixel_aspect_ratio_width : pixel_aspect_ratio_height is the pixel | 
|  | // aspect ratio (AKA sample aspect ratio aka SAR) for the luma (AKA Y) | 
|  | // samples. A pixel_aspect_ratio of 1:1 mean square pixels. A | 
|  | // pixel_aspect_ratio of 2:1 would mean pixels that are displayed twice as | 
|  | // wide as they are tall. Codec implementation should ensure these two | 
|  | // values are relatively prime by reducing the fraction (dividing both by | 
|  | // GCF) if necessary. | 
|  | // | 
|  | // When has_pixel_aspect_ratio == false, pixel_aspect_ratio_width and | 
|  | // pixel_aspect_ratio_height will both be 1, but in that case the | 
|  | // pixel_aspect_ratio_width : pixel_aspect_ratio_height of 1:1 is just a | 
|  | // very weak suggestion re. reasonable-ish handling, not in any way | 
|  | // authoritative. In this case (or in any case really) the receiver of this | 
|  | // message may have other OOB means to determine the actual | 
|  | // pixel_aspect_ratio. | 
|  | bool has_pixel_aspect_ratio = false; | 
|  | uint32 pixel_aspect_ratio_width = 1; | 
|  | uint32 pixel_aspect_ratio_height = 1; | 
|  | }; | 
|  |  | 
|  | /// VideoFormat | 
|  | /// | 
|  | /// Video (compress or uncompressed) format details.  In this context, | 
|  | /// "uncompressed" can include block-based image compression formats that still | 
|  | /// permit fairly fast random access to image data. | 
|  | union VideoFormat { | 
|  | 1: VideoCompressedFormat compressed; | 
|  | 2: VideoUncompressedFormat uncompressed; | 
|  | }; | 
|  |  | 
|  | // Encryption schemes as defined by ISO 23001-7: Common encryption in ISO base | 
|  | // media file format files. These are defined as strings rather than enums so as | 
|  | // to not limit the encryption schemes that an implementation supports to the | 
|  | // constants that are defined here. | 
|  | alias EncryptionScheme = string:MAX_ENCRYPTION_SCHEME_SIZE; | 
|  | const string ENCRYPTION_SCHEME_UNENCRYPTED = "unencrypted"; | 
|  | const string ENCRYPTION_SCHEME_CENC = "cenc"; | 
|  | const string ENCRYPTION_SCHEME_CBC1 = "cbc1"; | 
|  | const string ENCRYPTION_SCHEME_CENS = "cens"; | 
|  | const string ENCRYPTION_SCHEME_CBCS = "cbcs"; | 
|  |  | 
|  | alias KeyId = bytes:MAX_KEY_ID_SIZE; | 
|  | alias InitVector = bytes:MAX_INIT_VECTOR_SIZE; | 
|  |  | 
|  | /// SubsampleEntry | 
|  | /// | 
|  | /// A subsample is a byte range within a sample consisting of a clear byte range | 
|  | /// followed by an encrypted byte range. This structure specifies the size of | 
|  | /// each range in the subsample. | 
|  | struct SubsampleEntry { | 
|  | uint32 clear_bytes; | 
|  | uint32 encrypted_bytes; | 
|  | }; | 
|  |  | 
|  | /// EncryptionPattern | 
|  | /// | 
|  | /// Pattern encryption utilizes a pattern of encrypted and clear 16 byte blocks | 
|  | /// over the protected range of a subsample (the encrypted_bytes of a | 
|  | /// `SubsampleEntry`). This structure specifies the number of encrypted data | 
|  | /// blocks followed by the number of clear data blocks. | 
|  | struct EncryptionPattern { | 
|  | uint32 clear_blocks; | 
|  | uint32 encrypted_blocks; | 
|  | }; | 
|  |  | 
|  | /// EncryptedFormat | 
|  | /// | 
|  | /// The stream format details payload of a decrypting stream processor. This is | 
|  | /// a sparsely populated table to specify parameters necessary for decryption | 
|  | /// other than the data stream. It is only necessary to update fields if they | 
|  | /// changed, but not an error if the same value is repeated. | 
|  | table EncryptedFormat { | 
|  | 1: reserved; | 
|  | 2: reserved; | 
|  | 7: reserved; | 
|  |  | 
|  | /// `scheme` specifies which encryption scheme to use, such as | 
|  | /// `fuchsia.media.ENCRYPTION_SCHEME_CENC`. | 
|  | /// Usage: | 
|  | ///  - It is required to be set prior to delivery of input packets. | 
|  | ///  - Changing the scheme mid-stream is only permitted in some scenarios. | 
|  | ///    Once an encrypted scheme is selected for a stream, the scheme may | 
|  | ///    only be set to `fuchsia.media.ENCRYPTION_SCHEME_UNENCRYPTED` or that | 
|  | ///    same initial encrypted scheme. The scheme may be set to | 
|  | ///    `fuchsia.media.ENCRYPTION_SCHEME_UNENCRYPTED` at any point. | 
|  | 6: string scheme; | 
|  |  | 
|  | /// `key_id` identifies the key that should be used for decrypting | 
|  | /// subsequent data. | 
|  | /// Usage: | 
|  | ///  - It is required to be set prior to delivery of input packets to a | 
|  | ///    decryptor. | 
|  | ///  - This may be changed multiple times during a data stream. | 
|  | 8: KeyId key_id; | 
|  |  | 
|  | /// `init_vector` is used in combination with a key and a block of content | 
|  | /// to create the first cipher block in a chain and derive subsequent cipher | 
|  | /// blocks in a cipher block chain. | 
|  | /// Usage: | 
|  | ///  - It is required to be set prior to the delivery of input packets to a | 
|  | ///    decryptor. | 
|  | ///  - This may be changed multiple times during a data stream. | 
|  | 3: InitVector init_vector; | 
|  |  | 
|  | /// `subsamples` is used to identify the clear and encrypted portions of a | 
|  | /// subsample. | 
|  | /// Usage: | 
|  | ///  - For whole sample encryption, this parameter should not be sent. | 
|  | ///  - This may be changed multiple times during a data stream. | 
|  | 4: vector<SubsampleEntry> subsamples; | 
|  |  | 
|  | /// `pattern` is used to identify the clear and encrypted blocks for pattern | 
|  | /// based encryption. | 
|  | /// Usage: | 
|  | /// - This is not allowed for CENC and CBC1 and required for CENS and CBCS. | 
|  | /// - If required, it must be set prior to the delivery of input packets to | 
|  | ///   a decryptor. | 
|  | /// - This may be changed multiple times during a data stream. | 
|  | 5: EncryptionPattern pattern; | 
|  | }; | 
|  |  | 
|  | /// DecryptedFormat | 
|  | /// | 
|  | /// This describes the format of the decrypted content. It is required to be | 
|  | /// sent by the StreamProcessor server prior to the delivery of output packets. | 
|  | /// Currently, there is no additional format details for decrypted output. | 
|  | table DecryptedFormat { | 
|  | // TODO(fxbug.dev/8031): Empty tables cause dart analysis error. Remove this | 
|  | // unused field once the issue is resolved. | 
|  | 1: bool ignore_this_field; | 
|  | }; | 
|  |  | 
|  | /// CryptoFormat | 
|  | /// | 
|  | /// Crypto (encrypted or decrypted) format details. | 
|  | flexible union CryptoFormat { | 
|  | 1: EncryptedFormat encrypted; | 
|  | 2: DecryptedFormat decrypted; | 
|  | }; | 
|  |  | 
|  | /// DomainFormat | 
|  | /// | 
|  | // Domain-specific format details (audio or video, compressed or uncompressed). | 
|  | union DomainFormat { | 
|  | 1: AudioFormat audio; | 
|  | 2: VideoFormat video; | 
|  | 3: CryptoFormat crypto; | 
|  | }; | 
|  |  | 
|  | const uint64 kMaxOobBytesSize = 8192; | 
|  |  | 
|  | enum SbcSubBands { | 
|  | SUB_BANDS_4 = 4; | 
|  | SUB_BANDS_8 = 8; | 
|  | }; | 
|  |  | 
|  | enum SbcBlockCount { | 
|  | BLOCK_COUNT_4 = 4; | 
|  | BLOCK_COUNT_8 = 8; | 
|  | BLOCK_COUNT_12 = 12; | 
|  | BLOCK_COUNT_16 = 16; | 
|  | }; | 
|  |  | 
|  | enum SbcAllocation { | 
|  | ALLOC_LOUDNESS = 0; | 
|  | ALLOC_SNR = 1; | 
|  | }; | 
|  |  | 
|  | enum SbcChannelMode { | 
|  | MONO = 0; | 
|  | DUAL = 1; | 
|  | STEREO = 2; | 
|  | JOINT_STEREO = 3; | 
|  | }; | 
|  |  | 
|  | /// Settings for an SBC Encoder. | 
|  | /// | 
|  | /// SBC Encoders take signed little endian 16 bit linear PCM samples and | 
|  | /// return encoded SBC frames. SBC encoder PCM data in batches of | 
|  | /// `sub_bands * block_count` PCM frames. This encoder will accept PCM data on | 
|  | /// arbitrary frame boundaries, but the output flushed when EOS is queued may be | 
|  | /// zero-padded to make a full batch for encoding. | 
|  | struct SbcEncoderSettings { | 
|  | SbcSubBands sub_bands = SbcSubBands.SUB_BANDS_8; | 
|  | SbcAllocation allocation = SbcAllocation.ALLOC_LOUDNESS; | 
|  | SbcBlockCount block_count = SbcBlockCount.BLOCK_COUNT_4; | 
|  | SbcChannelMode channel_mode; | 
|  | /// SBC bit pool value. | 
|  | uint64 bit_pool; | 
|  | }; | 
|  |  | 
|  | /// Raw AAC access units. | 
|  | struct AacTransportRaw { | 
|  | }; | 
|  |  | 
|  | /// AAC inside LATM | 
|  | struct AacTransportLatm { | 
|  | /// Whether MuxConfiguration stream element is present | 
|  | bool mux_config_present; | 
|  | }; | 
|  |  | 
|  | /// AAC inside ADTS | 
|  | struct AacTransportAdts { | 
|  | }; | 
|  |  | 
|  | flexible union AacTransport { | 
|  | 1: AacTransportRaw raw; | 
|  | 2: AacTransportLatm latm; | 
|  | 3: AacTransportAdts adts; | 
|  | }; | 
|  |  | 
|  | enum AacChannelMode { | 
|  | MONO = 0; | 
|  | STEREO = 2; | 
|  | }; | 
|  |  | 
|  | struct AacConstantBitRate { | 
|  | /// Bits per second | 
|  | uint32 bit_rate; | 
|  | }; | 
|  |  | 
|  | /// Variable bit rate modes. The actual resulting bitrate | 
|  | /// varies based on input signal and other encoding settings. | 
|  | /// | 
|  | /// See https://wiki.hydrogenaud.io/index.php?title=Fraunhofer_FDK_AAC#Bitrate_Modes | 
|  | enum AacVariableBitRate { | 
|  | V1 = 1; | 
|  | V2 = 2; | 
|  | V3 = 3; | 
|  | V4 = 4; | 
|  | V5 = 5; | 
|  | }; | 
|  |  | 
|  | union AacBitRate { | 
|  | 1: AacConstantBitRate constant; | 
|  | 2: AacVariableBitRate variable; | 
|  | }; | 
|  |  | 
|  | enum AacAudioObjectType { | 
|  | /// MPEG-2 Low Complexity | 
|  | MPEG2_AAC_LC = 0; | 
|  | /// MPEG-4 Low Complexity | 
|  | MPEG4_AAC_LC = 1; | 
|  | }; | 
|  |  | 
|  | struct AacEncoderSettings { | 
|  | AacTransport transport; | 
|  | AacChannelMode channel_mode; | 
|  | AacBitRate bit_rate; | 
|  | AacAudioObjectType aot; | 
|  | }; | 
|  |  | 
|  | /// Customization of h264 encoder parameters for macroblock quantization. The values | 
|  | /// can range from 0 to 51, with lower numbers indicating higher | 
|  | /// quality/bitrate. While encoders should support these fields if feasible, | 
|  | /// some encoders may ignore these fields. It's ok to not set this table, or | 
|  | /// not set some of the fields in this table, as encoders can determine their | 
|  | /// own defaults. If the targeted bitrate can't be achieved with the specified values, | 
|  | /// then the user should expect the resulting encoded stream bitrate to differ from | 
|  | /// the requested bitrate. | 
|  | table H264QuantizationParameters { | 
|  | /// Starting value for quantization of key frames. | 
|  | 1: uint32 i_base; | 
|  | /// Smallest allowed value for quantization of key frames. | 
|  | 2: uint32 i_min; | 
|  | /// Largest allowed value for quantization of key frames. | 
|  | 3: uint32 i_max; | 
|  | /// Starting value for quantization of predicted frames. | 
|  | 4: uint32 p_base; | 
|  | /// Smallest allowed value for quantization of predicted frames. | 
|  | 5: uint32 p_min; | 
|  | /// Largest allowed value for quantization of predicted frames. | 
|  | 6: uint32 p_max; | 
|  | }; | 
|  |  | 
|  | /// Settings for H264 Encoders. | 
|  | table H264EncoderSettings { | 
|  | /// Target bits per second for encoded stream. Defaults to 200,000 if | 
|  | /// omitted. | 
|  | 1: uint32 bit_rate; | 
|  | /// Target frames per second for encoded stream. Defaults to 30 if omitted. | 
|  | 2: uint32 frame_rate; | 
|  | /// Number of pictures per keyframe. Defaults to 8 if omitted. Setting to 0 | 
|  | /// will disable key frame encoding, except for if force_key_frame is set to | 
|  | /// true. | 
|  | 3: uint32 gop_size; | 
|  | /// Whether to enable frame rate adjustments in order to meet target bitrate. | 
|  | /// False if omitted. | 
|  | 4: bool variable_frame_rate; | 
|  | /// Lowest frame rate allowed if `variable_frame_rate` is enabled. Defaults | 
|  | /// to 10 if omitted. | 
|  | 5: uint32 min_frame_rate; | 
|  | /// If true, next frame encoded will be a key frame. If omitted, defaults to | 
|  | /// false. | 
|  | 6: bool force_key_frame; | 
|  | /// Allow customization of quantization parameters for encoding. Each frame | 
|  | /// submitted after setting this will use the new values. If omitted, no | 
|  | /// change from encoder defaults is made. | 
|  | 7: H264QuantizationParameters quantization_params; | 
|  | }; | 
|  |  | 
|  | /// Settings for HEVC/H265 Encoders. | 
|  | table HevcEncoderSettings { | 
|  | /// Target bits per second for encoded stream. Defaults to 200,000 if | 
|  | /// omitted. | 
|  | 1: uint32 bit_rate; | 
|  | /// Target frames per second for encoded stream. Defaults to 30 if omitted. | 
|  | 2: uint32 frame_rate; | 
|  | /// Number of pictures per keyframe. Defaults to 8 if omitted. | 
|  | 3: uint32 gop_size; | 
|  | }; | 
|  |  | 
|  | /// Settings for encoders that tell them how to encode raw | 
|  | /// formats. | 
|  | flexible union EncoderSettings { | 
|  | 1: SbcEncoderSettings sbc; | 
|  | 2: AacEncoderSettings aac; | 
|  | 3: H264EncoderSettings h264; | 
|  | 4: HevcEncoderSettings hevc; | 
|  | }; | 
|  |  | 
|  | /// FormatDetails | 
|  | /// | 
|  | /// This describes/details the format on input or output of a StreamProcessor | 
|  | /// (separate instances for input vs. output). | 
|  | // | 
|  | // The purpose of FormatDetails is to fill in additional details not | 
|  | // conveyed via other means. | 
|  | // | 
|  | // For decoder input, the format details tend to be fairly sparse, since most | 
|  | // compressed formats tend to be mostly self-describing. | 
|  | // | 
|  | // For decoder output and encoder input, the format details need to include all | 
|  | // the out-of-band information regarding the uncompressed data, which tends not | 
|  | // to be self-describing. | 
|  | // | 
|  | // Settings that are completely redundant with the data in the format itself | 
|  | // should not be in a required field here. An encoder may set oob_bytes on its | 
|  | // output. | 
|  | // | 
|  | // This stuff should be limited to things we need to know to properly process the | 
|  | // data which we can't already determine from the data itself, and which isn't | 
|  | // already covered by a format's defined OOB binary config blob, which is | 
|  | // conveyed in oob_bytes. | 
|  | // | 
|  | // Most decoders can have FormatDetails.domain null. | 
|  | table FormatDetails { | 
|  | // Particular instances of FormatDetails will set this field to make it | 
|  | // easier for a receiver to determine if any part of the format has changed | 
|  | // vs. the last FormatDetails received for the same context. | 
|  | 1: uint64 format_details_version_ordinal; | 
|  |  | 
|  | // "mime_type" strings used by particular decoders / encoders so far: | 
|  | // | 
|  | // SW AAC decoder: | 
|  | //   * input: | 
|  | //     * "audio/aac-adts" - ATDS AAC; self-contained format, but | 
|  | //       implementation for now requires oob_bytes to contain | 
|  | //       AudioSpecificConfig() reconstructed from ADTS header data - see | 
|  | //       also make_AudioSpecificConfig_from_ADTS_header() for now. | 
|  | //   * output: | 
|  | //     * "audio/raw" - stereo linear 16 bit integer PCM | 
|  | // | 
|  | // TODO(dustingreen): avoid requiring oob_bytes when using SoftAAC2.cpp | 
|  | // for AAC ADTS. | 
|  | // | 
|  | // TODO(dustingreen): Add non-ADTS AAC support (which naturally needs | 
|  | // oob_bytes). | 
|  | // | 
|  | // TODO(dustingreen): Consider "pseudo_mime_type", or an enum, + "domain" | 
|  | // details as needed instead, since calling this "mime_type" could lead to | 
|  | // confusion. | 
|  | 2: string mime_type; | 
|  |  | 
|  | // Some streams have their own binary configuration structure.  For those | 
|  | // streams we allow that binary structure to be directly conveyed to the | 
|  | // stream processor here. | 
|  | // | 
|  | // audio/aac - this is an AudioSpecificConfig(). | 
|  | // audio/aac-adts - this is not set. | 
|  | // TODO(dustingreen): make the audio/aac-adts statement true soon.  At the | 
|  | // moment we set this with make_AudioSpecificConfig_from_ADTS_header(), but | 
|  | // that should not be the client's job for ADTS. | 
|  | // | 
|  | // For some formats whose "ES" data format is self-contained, or for which | 
|  | // there is no format-defined binary OOB config, this is null. | 
|  | // | 
|  | // A server can close the channel if the count of bytes is > | 
|  | // kMaxOobBytesSize or is larger than makes any sense for the stream | 
|  | // processor.  If any stream actually needs more than kMaxOobBytesSize | 
|  | // bytes here, we could potentially increase this restriction some, but | 
|  | // this interface isn't designed to support OOB config blobs that approach | 
|  | // ZX_CHANNEL_MAX_MSG_BYTES. | 
|  | 3: bytes oob_bytes; | 
|  |  | 
|  | // Decoder input format: | 
|  | // | 
|  | // If a format is not self-describing given the mime_type and a | 
|  | // format-spec-defined oob_bytes, this domain field can be set to | 
|  | // provide the additional compressed-format-specific details.  This is | 
|  | // expected to be fairly rare, so most compressed input formats will have | 
|  | // only the mime_type and possibly oob_bytes set, with domain typically | 
|  | // null.  If an encoder is upstream however, domain may be set to convey the | 
|  | // encoder settings that were used, but a decoder consumer doesn't need to | 
|  | // look at those. | 
|  | // | 
|  | // Encoder output format: | 
|  | // | 
|  | // The encoder's compressed data output typically needs some configuration | 
|  | // (provided in this field) that's convenient to provide in a form that's | 
|  | // not oob_bytes, and the codec can convert that config to oob_bytes on | 
|  | // encoder output via OnOutputConstraints().  We retain these encoder settings | 
|  | // in the output FormatDetails to allow for cases where a downstream | 
|  | // consumer knowing the encoder settings could be useful. | 
|  | // | 
|  | // TODO(dustingreen): Decide if we want to retain this, or if we'd prefer to | 
|  | // split out config settings and maybe only represent a few encoder settings | 
|  | // as best-effort optional aux data, like bitrate. | 
|  | // | 
|  | // Encoder input format / decoder output format: | 
|  | // | 
|  | // This field contains fairly detailed information re. uncompressed data | 
|  | // format details, which tends to _not_ be self-describing in-band. | 
|  | 4: DomainFormat domain; | 
|  |  | 
|  | // See comments above on Parameter.  At the time we lock relevant FIDL | 
|  | // interfaces, there should be zero use of this field outside tests, but | 
|  | // this is here in case we need to allow a stream processor client to | 
|  | // convey additional config parameters to/from a stream processor which we | 
|  | // didn't anticipate before locking. | 
|  | // | 
|  | // If there are any known "official" exceptions to the previous paragraph, | 
|  | // we'll list them here by corresponding mime_type (none so far): | 
|  | //   * "<mime_type>" - <usage_description> | 
|  | // | 
|  | // For streams that define their own stream-specific config/OOB data, put | 
|  | // that in oob_bytes above instead of this field. | 
|  | 5: vector<Parameter> pass_through_parameters; | 
|  |  | 
|  | /// Instructs an encoder on how to encode raw data. | 
|  | /// | 
|  | /// Decoders may ignore this field but are entitled to rejected requests with | 
|  | /// this field set because it doesn't make sense. | 
|  | 6: EncoderSettings encoder_settings; | 
|  |  | 
|  | /// The number of ticks of the timebase of input packet timestamp_ish values | 
|  | /// per second. | 
|  | /// | 
|  | /// The timebase is only used used for optional extrapolation of timestamp_ish | 
|  | /// values when an input timestamp which applies to byte 0 of the valid portion | 
|  | /// of the input packet does not correspond directly to byte 0 of the valid | 
|  | /// portion of any output packet. | 
|  | /// | 
|  | /// Leave unset if timestamp extrapolation is not needed, either due to lack of | 
|  | /// timestamps on input, or due to input being provided in increments of the | 
|  | /// encoder's input chunk size (based on the encoder settings and calculated | 
|  | /// independently by the client).  Set if timestamp extrapolation is known to be | 
|  | /// needed or known to be acceptable to the client. | 
|  | 7: uint64 timebase; | 
|  | }; |