blob: 217fa850acbce65c32a653f84478bcfa9766a511 [file] [log] [blame]
// Copyright 2017 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
module media;
import "lib/media/fidl/media_transport.fidl";
import "lib/media/fidl/media_types.fidl";
interface AudioCapturerClient {
// Called when an AudioCapturer operating in AsyncCapture mode has a packet's
// worth of data to deliver to the client.
OnPacketCaptured@0(MediaPacket packet);
};
// AudioCapturer
//
// An AudioCapturer is an interface returned from an AudioService's
// CreateAudioCapturer method which may be used by clients to capture audio from
// either the current default audio input device, or the current default audio
// output device depending on the flags passed during creation.
//
// TODO(johngro): Routing policy needs to become more capable than this.
// Clients will need to be able to request sets of inputs/outputs/renderers,
// make changes to theses sets, have their requests vetted by policy (do they
// have the permission to capture this private stream, do they have the
// permission to capture at this frame rate, etc...). Eventually, this
// functionality will need to be expressed at the AudioPolicy level, not here.
//
// ** Format support **
//
// See (Get|Set)MediaType below. By default, the captured media type will be
// initially determined by the currently configured media type of the source
// that the capturer was bound to at creation time. Users may either fetch this
// type using GetMediaType, or they may choose to have the media
// resampled/converted to a type of their choosing by calling SetMediaType.
// Note: the media type may only be set while the system is not running, meaning
// that there are no pending capture regions (specified using CaptureAt) and
// that the system is not currently running in 'async' capture mode.
//
// ** Buffers and memory management **
//
// Audio data is captured into a shared memory buffer (a VMO) supplied by the
// user to the capturer during the SetPayloadBuffer call. Please note the
// following requirements related to the management of the payload buffer.
//
// ++ The payload buffer must be supplied before any capture operation may
// start. Any attempt to start capture (via either CaptureAt or
// StartAsyncCapture) before a payload buffer has been established is an
// error.
// ++ The payload buffer may not be changed while there are any capture
// operations pending.
// ++ The media type may not be changed after the payload buffer has been set.
// ++ The payload buffer must be an integral number of audio frame sizes (in
// bytes)
// ++ When running in 'async' mode (see below), the payload buffer must be at
// least as large as twice the frames_per_packet size specified during
// StartAsyncCapture.
// ++ The handle to the payload buffer supplied by the user must be readable,
// writable, and mappable.
// ++ Users should always treat the payload buffer as read-only.
//
// ** Synchronous vs. Asynchronous capture mode **
//
// The AudioCapturer interface can be used in one of two mutually exclusive
// modes: Synchronous and Asynchronous. A description of each mode and their
// tradeoffs is given below.
//
// (TODO(johngro) : can we come up with better names than these? Both are
// really async modes under the hood).
//
// ** Synchronous mode **
//
// By default, AudioCapturer instances are running in 'sync' mode. They will
// only capture data when a user supplies at least one region to capture into
// using the CaptureAt method. Regions supplied in this way will be filled in
// the order that they are received and returned to the client as MediaPackets
// via the return value of the CaptureAt method. If an AudioCapturer instance
// has data to capture, but no place to put it (because there are no more
// pending regions to fill), the next payload generated will indicate that their
// has been an overflow by setting the Discontinuity flag on the next produced
// MediaPacket. Synchronous mode may not be used in conjunction with
// Asynchronous mode. It is an error to attempt to call StartAsyncCapture while
// the system still regions supplied by CaptureAt waiting to be filled.
//
// If a user has supplied regions to be filled by the AudioCapturer instance in
// the past, but wishes to reclaim those regions, they may do so using the Flush
// method. Calling the Flush method will cause all pending regions to be
// returned, but with kNoTimestamp as their MediaPacket's PTS. See "Timing and
// Overflows", below, for a discussion of timestamps and discontinuity flags.
// The final buffer returned after a flush operation will have the 'kFlagEos'
// flag set on it. While an AudioCapturer will never overwrite any region of
// the payload buffer after a completed region is returned, it may overwrite
// the unfilled portions of a partially filled buffer which has been returned as
// a result of a flush operation.
//
// ** Asynchronous mode **
//
// While running in 'async' mode, clients do not need to explicitly supply
// regions of the shared buffer to be filled by the AudioCapturer instance.
// Instead, a client enters into 'async' mode by calling StartAsyncCapture and
// supplying a callback interface, and a number of frames to capture
// per-callback. Once running in async mode, the AudioCapturer instance will
// choose regions of the payload buffer to capture into, capture the specified
// number of frames, then deliver those frames as MediaPackets using the
// OnPacketCapture method of the AudioCapturerClient interface supplied by the
// user. Users may stop capturing are return the AudioCapturer instance to
// 'sync' mode using the StopAsyncCapture method. Likewise, closing the channel
// underneath the supplied AudioCapturerClient will cause the AudioCapturer
// instance to stop capturing and return to 'sync' mode (however this operation
// is not synchronized with other operations and should not be used if the
// client intends to either start capturing again at a future time).
//
// It is considered an error to attempt any of the following operations.
//
// ++ To attempt to enter 'async' capture mode when no payload buffer has been
// established.
// ++ To specify a number of frames to capture per payload which does not permit
// at least two contiguous capture payloads to exist in the established
// shared payload buffer simultaneously.
// ++ To send a region to capture into using the CaptureAt method while the
// AudioCapturer instance is running in 'async' mode.
// ++ To attempt to call Flush while the AudioCapturer instance is running in
// 'async' mode.
// ++ To attempt to re-start 'async' mode capturing without having first
// stopped.
// ++ To attempt any operation except for SetGain while in the process of
// stopping.
//
// ** Synchronizing with a StopAsyncCapture operation **
//
// Stopping asynchronous capture mode and returning to synchronous capture mode
// is an operation which takes time. Aside from SetGain, users may not call any
// other methods on the AudioCapturer interface after calling StopAsyncCapture
// (including calling StopAsyncCapture again) until after the stop operation has
// completed. Because of this, it is important for users to be able to
// synchronize with the stop operation. Two mechanisms are provided for doing
// so.
//
// The first is to use the StopAsyncCaptureWithCallback method. When the user's
// callback has been called, they can be certain that stop operation is complete
// and that the AudioCapturer instance has returned to synchronous operation
// mode.
//
// The second way to determine that a stop operation has completed is to use the
// flags on the packets which get delivered via the user-supplied
// AudioCapturerCallback interface after calling StopAsyncCapture. When
// asked to stop, any partially filled packet will be returned to the user, and
// the final packet returned will always have the end-of-stream flag (kFlagsEos)
// set on it to indicate that this is the final frame in the sequence. If
// there is no partially filled packet to return, the AudioCapturer will
// synthesize an empty packet with no timestamp, and offset/length set to zero,
// in order to deliver a packet with the end-of-stream flag set on it. Once
// users have seen the end-of-stream flag after calling stop, the AudioCapturer
// has finished the stop operation and returned to synchronous operating mode.
//
// ** Timing and Overflows **
//
// All media packets produced by an AudioCapturer instance will have their PTS
// field filled out with the capture time of the audio expressed as a timestamp
// given by the CLOCK_MONOTONIC timeline. Note: this timestamp is actually a
// capture timestamp, not a presentation timestamp (it is more of a CTS than a
// PTS) and is meant to represent the underlying system's best estimate of the
// capture time of the first frame of audio, including all outboard and hardware
// introduced buffering delay. As a result, all timestamps produced by an
// AudioCapturer should be expected to be in the past relative to 'now' on the
// CLOCK_MONOTONIC timeline.
//
// TODO(johngro) : Specify the way in which timestamps relative to a different
// clock (such as an audio domain clock) may be delivered to a client.
//
// The one exception to the "everything has an explicit timestamp" rule is when
// Flushing submitted regions while operating in synchronous mode. Flushed
// packets have no data in them, but FIDL demands that all pending
// method-return-value callbacks be executed. Because of this, the regions will
// be returned to the user, but their timestamps will be set to
// MediaPacket::kNoTimestamp, and their payload sizes will be set to zero. Any
// partially filled payload will have a valid timestamp, but a payload size
// smaller than originally requested. The final flushed payload (if there were
// any to flush) will have the EOS flag set on it.
//
// Two MediaPackets delivered by an AudioCapturer instance are 'continuous' if
// the first frame of audio contained in the second packet was capture exactly
// one nominal frame time after the final frame of audio in the first packet.
// If this relationship does not hold, the second MediaPacket will have the
// 'kFlagDiscontinuous' flag set in it's flags field.
//
// Even though explicit timestamps are provided on every MediaPacket produced,
// users who have very precise timing requirements are encouraged to always
// reason about time by counting frames delivered since the last discontinuity
// instead of simply using the raw capture timestamps. This is because the
// explicit timestamps written on continuous packets may have a small amount of
// rounding error based on whether or not the units of the capture timeline
// (CLOCK_MONOTONIC) are divisible by the chosen audio frame rate.
//
// Users should always expect the first MediaPacket produced by an AudioCapturer
// to have the discontinuous flag set on it (as there is no previous packet to
// be continuous with). Similarly, the first MediaPacket after a Flush or a
// Stop/Start cycle will always be discontinuous. After that, there are only
// two reasons that a MediaPacket will ever be discontinuous.
//
// 1) The user is operating an synchronous mode and does not supply regions to
// be filled quickly enough. If the next continuous frame of data has not
// been captured by the time it needs to be purged from the source buffers,
// an overflow has occurred and the AudioCapturer will flag the next captured
// region as discontinuous.
// 2) The user is operating in asynchronous mode and some internal error
// prevents the AudioCapturer instance from capturing the next frame of audio
// in a continuous fashion. This might be high system load or a hardware
// error, but in general it is something which should never normally happen.
// In practice, however, if it does, the next produced packet will be flagged
// as being discontinuous.
//
// ** Synchronous vs. Asynchronous Trade-offs **
//
// The choice of operating in synchronous vs. asynchronous mode is up to the
// user, and depending on the user's requirements, there are some advantages and
// disadvantages to each choice.
//
// Synchronous mode requires only a single Zircon channel under the hood and can
// achieve some small savings because of this. In addition, the user has
// complete control over the buffer management. Users specify exactly where
// audio will be captured to and in what order. Because of this, if users do
// not need to always be capturing, it is simple to stop and restart the capture
// later (just by ceasing to supply packets, then resuming later on). Payloads
// do not need to be uniform in size either, clients may specify payloads of
// whatever granularity is appropriate.
//
// The primary downside of operating in synchronous mode is that two messages
// will need to be sent for every packet to be captured. One to inform the
// AudioCapturer of the instance to capture into, and one to inform the user
// that the packet has been captured. This may end up increasing overhead and
// potentially complicating client designs.
//
// Asynchronous mode costs two Zircon channels (one for the AudioCapturer
// interface and one for the AudioCapturerClient interface), but has the
// advantage requiring only 1/2 of the messages which adds up to a significant
// savings overall. When operating in 'async' mode, AudioCapturer instances
// have no way of knowing if a user is processing the MediaPackets being sent in
// a timely fashion, and no way of automatically detecting an overflow
// condition. Users of 'async' mode should be careful to use a buffer large
// enough to ensure that they will be able to process their data before an
// AudioCapturer will be forced to overwrite it.
//
// ** Future Directions (aka TODOs) **
//
// ++ Consider adding a 'zero message' capture mode where the AudioCapturer
// simply supplies a linear transformation and some buffer parameters (max
// audio hold time) each time that it is started in 'async' mode, or each
// time an internal overflow occurs in 'async' mode. Based on this
// information, client should know where the capture write pointer is at all
// times as a function of the transformation removing the need to send any
// buffer position messages. This would reduce the operational overhead just
// about as low as it could go, and could allow for the lowest possible
// latency for capture clients. OTOH - it might be better to achieve this
// simply by allowing clients to be granted direct, exclusive access to the
// driver level of capture if no resampling, reformatting, or sharing is
// needed.
// ++ Consider providing some mechanism by which users may specify the exact
// time at which they want to capture data.
// ++ Allow for more complex routing/mixing/AEC scenarios and place this under
// the control of the policy manager.
// ++ Define and enforce access permissions and downsampling requirements for
// sensitive content. Enforce using the policy manager.
// ++ Consider allowing the mixer to produce compressed audio.
//
interface AudioCapturer {
// Gets the currently configured media type. Note: for a capturer which was
// just created and has not had it's media type explicitly set yet, this will
// give the currently configured media type of the input or the output that
// the capturer was bound to at creation time.
//
// TODO(johngro) : Get rid of this. Eventually, capturers will be bindable to
// a set of inputs/outputs/renderers, so the concept of a "native" media type
// will go away. Mechanisms will need to be put in place to allow users to
// enumerate the configuration of these bind-able endpoints (and perhaps to
// exercise control over them), but it will be the user of the capturer's job
// to specify the format they want.
GetMediaType@0() => (MediaType media_type);
// Sets the media type of the stream to be delivered. Causes the source
// material to be reformatted/resampled if needed in order to produce the
// requested media type. Note that the media type may not be changed after
// the payload buffer has been established.
SetMediaType@1(MediaType media_type);
// Set the gain (in dB) applied to the input. Gain may be adjusted at any
// time, but must be on the range...
// [AudioRenderer::kMutedGain, AudioRenderer::kMaxGain]
SetGain@2(float gain);
// Set the shared buffer used to transport captured data.
//
// TODO(johngro) : consider extending this so that multiple payload buffers
// may be assigned if needed.
SetPayloadBuffer@3(handle<vmo> payload_buffer);
// Explicitly specify a region of the shared payload buffer for the audio
// capturer to capture into. See the discussion of 'synchronous' vs.
// 'asynchronous` capture mode (above) for details.
CaptureAt@4(uint32 offset_frames, uint32 num_frames)
=> (MediaPacket captured_packet);
// Flush all regions specified using CaptureAt from the internal queue of
// regions to be captured into and (optionally) deliver a callback which may
// be used by the client if explicit synchronization is needed. It is illegal
// to call Flush while operating in asynchronous capture mode.
Flush@5();
FlushWithCallback@6() => ();
// Place the capturer into 'async' capture mode and begin to capture packets
// of exactly 'frames_per_packet' number of frames each. The client-supplied
// AudioCapturerClient will be used to inform the client of captured packets.
StartAsyncCapture@7(AudioCapturerClient callback_target,
uint32 frames_per_packet);
// Stop capturing in 'async' capture mode and (optionally) deliver a callback
// which may be used by the client if explicit synchronization is needed.
StopAsyncCapture@8();
StopAsyncCaptureWithCallback@9() => ();
};