blob: c29a3f908603719eab61b3d1ba79644903321ad3 [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
using fuchsia.sysmem;
/// Value
/// Generic "value" for use within generic "Parameter" struct.
union Value {
bool bool_value;
uint64 uint64_value;
int64 int64_value;
string string_value;
// Prefer using oob_bytes instead.
bytes bytes_value;
/// Parameter
/// Generic parameter.
/// We want to minimize use of this generic "Parameter" structure by natively
/// defining as many stream-specific parameter semantics as we can.
// TODO: When possible, describe the very limited scenarios in which it would
// still be reasonable to use a generic Parameter.
struct Parameter {
// Some indication of the scope of applicability of this Parameter.
string scope;
// Specific name of this parameter, without the scope prefix.
string name;
// The particular value of this parameter.
Value value;
// FormatDetails
// The purpose of FormatDetails is to fill in additional details not
// conveyed via other means.
// TODO(dustingreen): Where at all possible, definitions for the concepts
// within FormatDetails should go in media_types.fidl or another location
// that's widely shared. Maybe some encoder settings could remain
// codec-specific.
// For decoder input, the format details tend to be fairly sparse, since most
// compressed formats tend to be mostly self-describing.
// For decoder output and encoder input, the format details need to include all
// the out-of-band information regarding the uncompressed data, which tends not
// to be self-describing.
// Settings that are completely redundant with the data in the format itself
// should not be in a required field here. An encoder may set oob_bytes on its
// output.
// This stuff should be limited to things we need to know to properly process the
// data which we can't already determine from the data itself, and which isn't
// already covered by a format's defined OOB binary config blob, which is
// conveyed in oob_bytes.
// Most decoders can have FormatDetails.domain null.
/// AudioCompressedFormat
/// Format details relevant to compressed audio, mostly for encoder settings,
/// (but see TODO below).
/// Unless otherwise specified in a comment on a field in this structure,
/// FormatDetails.domain is null for a decoder. A sub-structure whose name
/// ends in "Encoder" is for encoder output settings.
// TODO(dustingreen): Complete process of splitting encoder settings out.
xunion AudioCompressedFormat {
// For an aac encoder, this field has settings the encoder needs which are
// specific to AAC encoding.
AudioCompressedFormatAacEncoder aac;
AudioCompressedFormatSbc sbc;
enum AudioBitrateMode {
// Used mainly when a client is configuring an encoder's output format. May
// also be present in an OnOutputConstraints() message from an encoder, but
// should not be relied upon to be present by any consumer downstream of an
// encoder.
CBR = 1;
VBR = 2;
/// AudioCompressedFormatAacEncoder
/// Encoder settings for an AAC encoder.
struct AudioCompressedFormatAacEncoder {
// In SetOutputConfig():
// If zero, an encoder should generate 256 kbps, and a consumer should not
// assume any particular bitrate.
// If not zero, the encoder should not exceed this bitrate. In CBR the
// encoder should use the highest available bitrate that doesn't exceed this
// value, or if there is no such bitrate, the lowest available bitrate. In
// VBR, the encoder should stay at or below this bitrate.
// In VBR it's left up to the encoder to choose a reasonable ratio between
// max bits per second and min bits per second, with the aim in VBR being
// constant perceived quality.
// In OnOutputConstraints():
// In CBR, the nominal bits per second. In VBR, the nominal max bits per
// second.
uint32 bits_per_second;
// In SetOutputConfig():
// If UNSPECIFIED, up to the encoder. If CBR or VBR, a hint to the encoder
// to use that mode.
// In OnOutputConstraints():
// Actual mode being used. UNSPECIFIED means the source is not specifying
// which mode.
AudioBitrateMode bitrate_mode;
// TODO(dustingreen): AAC profile settings.
struct AudioCompressedFormatSbc {
/// AudioPcmMode
// TODO(dustingreen): Keep or discard any non-linear formats for purposes of the
// Codec interface?
enum AudioPcmMode {
// 16 bit signed int linear or 32 bit float linear, for now
// 1-N channels ok, with "A.B" channels designated as A+B channel_count -
// the channel map is separately specified. So 5.1 becomes channel_count 6.
// G.711 8 bit format-defined waveform semantics
// 1 channel
ALAW = 1;
// G.711 8 bit format-defined waveform semantics
// 1 channel
MULAW = 2;
/// AudioChannelId
/// Used in specifying which audio channel is for which speaker location / type.
/// TODO(dustingreen): Do we need more channel IDs than this?
// TODO(dustingreen): Check with mpuryear@ re. naming consistency for "S" vs.
// "R" as we move these to a common definition. Also the ordering of LS/RS vs.
// LR/RR - probably LR/RR being first would make more sense re. how channels
// get added incrementally, but changing the order would no longer match
// Android's ordering.
enum AudioChannelId {
SKIP = 0; // unused channel
LF = 1; // left front
RF = 2; // right front
CF = 3; // center front
LS = 4; // left surround
RS = 5; // right surround
LFE = 6; // low frequency effects
CS = 7; // back surround
LR = 8; // left rear
RR = 9; // right rear
// This is the last explicitly-defined value + 1. This name will be
// re-defined in future if we add more defined channel IDs above.
// This is where format-specific (or ad-hoc) channel ID values should go, to
// avoid colliding with any additional values allocated above. The values
// here are not guaranteed to avoid collision across different formats.
// Extended channel IDs should be <= Max.
MAX = 0x7fffffff;
/// PcmFormat
/// PCM audio format details.
// TODO(dustingreen): Discuss with mpuryear@ re. where definitions for these
// details go and make sure the common details can specify at least this much.
struct PcmFormat {
// Implicit details:
// * For bits_per_sample > 8, host-endian is implied.
// * At least for now, for channel_count >= 2, interleaved layout is
// implied.
AudioPcmMode pcm_mode;
// bits_per_sample
// A "sample" is for a single channel.
// For example, CD quality is 16. See PcmMode comments, as the mode
// constrains this value.
uint32 bits_per_sample;
// frames_per_second
// A "frame" is one datapoint (one "sample") for each channel. Each channel
// is sampled this many times per second. For example, CD quality is 44100.
uint32 frames_per_second;
// channel_map
// channel_map.size() is the channel count. See PcmMode comments, as some
// modes constrain the channel count to 1.
// Values from AudioChannelId should be used if they are suitable.
// If a channel has no suitable AudioChannelId, an ad-hoc value can be used
// in a range starting from AudioChannel_ExtendedChannelIdBase.
vector<AudioChannelId>:16 channel_map;
// TODO(dustingreen): Add unsigned 8 bit, float 32 bit, maybe others. FWIW,
// AOSP appears to support signed 16 bit, unsigned 8 bit, and float 32 bit
// under "Pcm", AFAICT based on OMX_NUMERICALDATATYPE and ACodec.cpp code.
/// AudioUncompressedFormat
// Uncompressed audio format details.
union AudioUncompressedFormat {
PcmFormat pcm;
/// AudioFormat
// Audio format details.
union AudioFormat {
AudioCompressedFormat compressed;
AudioUncompressedFormat uncompressed;
/// VideoCompressedFormat
/// Compressed video format details.
// If a compressed video format has no fields here, it's because it's a good
// format and is already self-describing given the mime_type + format-defined
// oob_bytes as appropriate + in-band data.
union VideoCompressedFormat {
// TODO(dustingreen): Any compressed video formats that aren't sufficiently
// self-describing to select and create a Codec instance to decode it?
// TODO(dustingreen): temp field to make the compiler happy until we have at
// least one real field.
uint32 temp_field_todo_remove;
enum VideoColorSpace {
// TODO(dustingreen): add to this list
/// VideoUncompressedFormat
/// Uncompressed video format details.
// TODO(dustingreen): Integrate with a system-wide structure for this purpose.
// In progress - see image_format field below which will take the place of this
// struct/table.
struct VideoUncompressedFormat {
// TODO(dustingreen): This will replace VideoUncompressedFormat (after
// struct to table change merges).
fuchsia.sysmem.ImageFormat_2 image_format;
// fourcc
// A human-readable fourcc like RGBA should be 0x41424752 in the fourcc
// field (regardless of host endian-ness). Note that the R (first character)
// of the fourcc is in the low-order byte of this fourcc field.
// There are some fourcc codes that don't format nicely as a string. While
// I don't foresee any use of any of the purely numeric fourcc codes (not
// corresponding to packed ascii character values), those would be stored
// such that their numeric value has it's low-order byte in the low-order
// byte of this fourcc value. So a fourcc with "hex value" 0x00000001 would
// have the numeric value 1 in this field.
// The endian-ness of fourcc values stored in files or in network packets is
// outside the scope of these comments, other than to state that regardless
// of the source of the fourcc code and the order that storage /
// transmission format stores these bytes, a human-readable fourcc should
// have its human-read first ascii character value in the low order byte of
// this field.
uint32 fourcc;
// For formats with different planes having different resolution, this is
// the resolution of the highest-resolution plane(s). Else it's the
// resolution of all the planes.
uint32 primary_width_pixels;
uint32 primary_height_pixels;
// For formats where the secondary planes are the same resolution, these
// fields will be the same as primary_width_pixels and
// primary_height_pixels. For formats with smaller secondary resolutions,
// these indicate that resolution.
uint32 secondary_width_pixels;
uint32 secondary_height_pixels;
// Planar means the various planes are separately stored in their own chunks
// of memory.
bool planar;
// If a format is swizzled, the swizzling parameters are not directly here.
bool swizzled;
uint32 primary_line_stride_bytes;
// Formats with the same stride for all planes will have this field equal to
// primary_line_stride_bytes.
uint32 secondary_line_stride_bytes;
// R or Y
uint32 primary_start_offset;
// G or U
uint32 secondary_start_offset;
// B or V
uint32 tertiary_start_offset;
uint32 primary_pixel_stride;
// For formats with the same pixel stride for all planes, this field will be
// equal to primary_pixel_stride.
uint32 secondary_pixel_stride;
// These override the primary_width_pixels and primary_height_pixels for
// purposes of display (but not for purposes of determining the pixel layout
// in memory). These can crop on the right and bottom. These must be <=
// the corresponding coded dimension.
// This value must be <= primary_width_pixels.
uint32 primary_display_width_pixels;
// This value must be <= primary_height_pixels.
uint32 primary_display_height_pixels;
// The pixel_aspect_ratio_width : pixel_aspect_ratio_height is the pixel
// aspect ratio (AKA sample aspect ratio aka SAR) for the luma (AKA Y)
// samples. A pixel_aspect_ratio of 1:1 mean square pixels. A
// pixel_aspect_ratio of 2:1 would mean pixels that are displayed twice as
// wide as they are tall. Codec implementation should ensure these two
// values are relatively prime by reducing the fraction (dividing both by
// GCF) if necessary.
// When has_pixel_aspect_ratio == false, pixel_aspect_ratio_width and
// pixel_aspect_ratio_height will both be 1, but in that case the
// pixel_aspect_ratio_width : pixel_aspect_ratio_height of 1:1 is just a
// very weak suggestion re. reasonable-ish handling, not in any way
// authoritative. In this case (or in any case really) the receiver of this
// message may have other OOB means to determine the actual
// pixel_aspect_ratio.
bool has_pixel_aspect_ratio = false;
uint32 pixel_aspect_ratio_width = 1;
uint32 pixel_aspect_ratio_height = 1;
/// VideoFormat
/// Video (compress or uncompressed) format details. In this context,
/// "uncompressed" can include block-based image compression formats that still
/// permit fairly fast random access to image data.
union VideoFormat {
VideoCompressedFormat compressed;
VideoUncompressedFormat uncompressed;
/// DomainFormat
// Domain-specific format details (audio or video, compressed or uncompressed).
union DomainFormat {
AudioFormat audio;
VideoFormat video;
const uint64 kMaxOobBytesSize = 8192;
enum SbcSubBands {
SUB_BANDS_4 = 4;
SUB_BANDS_8 = 8;
enum SbcBlockCount {
BLOCK_COUNT_12 = 12;
BLOCK_COUNT_16 = 16;
enum SbcAllocation {
enum SbcChannelMode {
MONO = 0;
DUAL = 1;
/// Settings for an SBC Encoder.
/// SBC Encoders take signed little endian 16 bit linear PCM samples and
/// return encoded SBC frames. SBC encoder PCM data in batches of
/// `sub_bands * block_count` PCM frames. This encoder will accept PCM data on
/// arbitrary frame boundaries, but the output flushed when EOS is queued may be
/// zero-padded to make a full batch for encoding.
struct SbcEncoderSettings {
SbcSubBands sub_bands = SUB_BANDS_8;
SbcAllocation allocation = ALLOC_LOUDNESS;
SbcBlockCount block_count = BLOCK_COUNT_4;
SbcChannelMode channel_mode;
/// SBC bit pool value.
uint64 bit_pool;
/// Settings for encoders that tell them how to encode raw
/// formats.
xunion EncoderSettings {
SbcEncoderSettings sbc;
/// FormatDetails
/// This describes/details the format on input or output of a StreamProcessor
/// (separate instances for input vs. output).
table FormatDetails {
// Particular instances of FormatDetails will set this field to make it
// easier for a receiver to determine if any part of the format has changed
// vs. the last FormatDetails received for the same context.
1: uint64 format_details_version_ordinal;
// "mime_type" strings used by particular decoders / encoders so far:
// SW AAC decoder:
// * input:
// * "audio/aac-adts" - ATDS AAC; self-contained format, but
// implementation for now requires oob_bytes to contain
// AudioSpecificConfig() reconstructed from ADTS header data - see
// also make_AudioSpecificConfig_from_ADTS_header() for now.
// * output:
// * "audio/raw" - stereo linear 16 bit integer PCM
// TODO(dustingreen): avoid requiring oob_bytes when using SoftAAC2.cpp
// for AAC ADTS.
// TODO(dustingreen): Add non-ADTS AAC support (which naturally needs
// oob_bytes).
// TODO(dustingreen): Consider "pseudo_mime_type", or an enum, + "domain"
// details as needed instead, since calling this "mime_type" could lead to
// confusion.
2: string mime_type;
// Some streams have their own binary configuration structure. For those
// streams we allow that binary structure to be directly conveyed to the
// stream processor here.
// audio/aac - this is an AudioSpecificConfig().
// audio/aac-adts - this is not set.
// TODO(dustingreen): make the audio/aac-adts statement true soon. At the
// moment we set this with make_AudioSpecificConfig_from_ADTS_header(), but
// that should not be the client's job for ADTS.
// For some formats whose "ES" data format is self-contained, or for which
// there is no format-defined binary OOB config, this is null.
// A server can close the channel if the count of bytes is >
// kMaxOobBytesSize or is larger than makes any sense for the stream
// processor. If any stream actually needs more than kMaxOobBytesSize
// bytes here, we could potentially increase this restriction some, but
// this interface isn't designed to support OOB config blobs that approach
3: bytes oob_bytes;
// Decoder input format:
// If a format is not self-describing given the mime_type and a
// format-spec-defined oob_bytes, this domain field can be set to
// provide the additional compressed-format-specific details. This is
// expected to be fairly rare, so most compressed input formats will have
// only the mime_type and possibly oob_bytes set, with domain typically
// null. If an encoder is upstream however, domain may be set to convey the
// encoder settings that were used, but a decoder consumer doesn't need to
// look at those.
// Encoder output format:
// The encoder's compressed data output typically needs some configuration
// (provided in this field) that's convenient to provide in a form that's
// not oob_bytes, and the codec can convert that config to oob_bytes on
// encoder output via OnOutputConstraints(). We retain these encoder settings
// in the output FormatDetails to allow for cases where a downstream
// consumer knowing the encoder settings could be useful.
// TODO(dustingreen): Decide if we want to retain this, or if we'd prefer to
// split out config settings and maybe only represent a few encoder settings
// as best-effort optional aux data, like bitrate.
// Encoder input format / decoder output format:
// This field contains fairly detailed information re. uncompressed data
// format details, which tends to _not_ be self-describing in-band.
4: DomainFormat domain;
// See comments above on Parameter. At the time we lock relevant FIDL
// interfaces, there should be zero use of this field outside tests, but
// this is here in case we need to allow a stream processor client to
// convey additional config parameters to/from a stream processor which we
// didn't anticipate before locking.
// If there are any known "official" exceptions to the previous paragraph,
// we'll list them here by corresponding mime_type (none so far):
// * "<mime_type>" - <usage_description>
// For streams that define their own stream-specific config/OOB data, put
// that in oob_bytes above instead of this field.
5: vector<Parameter> pass_through_parameters;
/// Instructs an encoder on how to encode raw data.
/// Decoders may ignore this field but are entitled to rejected requests with
/// this field set because it doesn't make sense.
6: EncoderSettings encoder_settings;
/// The number of ticks of the timebase of input packet timestamp_ish values
/// per second.
/// The timebase is only used used for optional extrapolation of timestamp_ish
/// values when an input timestamp which applies to byte 0 of the valid portion
/// of the input packet does not correspond directly to byte 0 of the valid
/// portion of any output packet.
/// Leave unset if timestamp extrapolation is not needed, either due to lack of
/// timestamps on input, or due to input being provided in increments of the
/// encoder's input chunk size (based on the encoder settings and calculated
/// independently by the client). Set if timestamp extrapolation is known to be
/// needed or known to be acceptable to the client.
7: uint64 timebase;