|  | // Copyright 2017 The Fuchsia Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | library fuchsia.media; | 
|  |  | 
|  | using fuchsia.media.audio; | 
|  | using zx; | 
|  |  | 
|  | /// Configuration for a capturer which will receive a loopback stream | 
|  | /// a system output. | 
|  | table LoopbackAudioCapturerConfiguration { | 
|  | }; | 
|  |  | 
|  | /// Configuration for a capturer which will receive a stream from an | 
|  | /// input device. | 
|  | table InputAudioCapturerConfiguration { | 
|  | 1: AudioCaptureUsage usage; | 
|  | }; | 
|  |  | 
|  | /// Configuration for an audio Capturer. | 
|  | union AudioCapturerConfiguration { | 
|  | 1: LoopbackAudioCapturerConfiguration loopback; | 
|  | 2: InputAudioCapturerConfiguration input; | 
|  | }; | 
|  |  | 
|  | // TODO(mpuryear): Routing policy needs to become more capable than this. | 
|  | // Clients will need to be able to request sets of inputs/outputs/renderers, | 
|  | // make changes to theses sets, have their requests vetted by policy (do they | 
|  | // have the permission to capture this private stream, do they have the | 
|  | // permission to capture at this frame rate, etc...).  Eventually, this | 
|  | // functionality will need to be expressed at the AudioPolicy level, not here. | 
|  | // TODO(mpuryear): can we come up with better names than these?  Both are | 
|  | // really async modes under the hood). | 
|  | // TODO(mpuryear): Add a new method to replace `DiscardAllPackets`, which will | 
|  | // be removed from `StreamSource`. | 
|  | // TODO(mpuryear): Fix obsolete docs. | 
|  | // TODO(mpuryear): Specify the way in which timestamps relative to a different | 
|  | // clock (such as an audio domain clock) may be delivered to a client. | 
|  | /// AudioCapturer | 
|  | /// | 
|  | /// An AudioCapturer is an interface returned from an fuchsia.media.Audio's | 
|  | /// CreateAudioCapturer method, which may be used by clients to capture audio | 
|  | /// from either the current default audio input device, or the current default | 
|  | /// audio output device depending on the flags passed during creation. | 
|  | /// | 
|  | /// ** Format support ** | 
|  | /// | 
|  | /// See (Get|Set)StreamType below. By default, the captured stream type will be | 
|  | /// initially determined by the currently configured stream type of the source | 
|  | /// that the AudioCapturer was bound to at creation time. Users may either fetch | 
|  | /// this type using GetStreamType, or they may choose to have the media | 
|  | /// resampled or converted to a type of their choosing by calling SetStreamType. | 
|  | /// Note: the stream type may only be set while the system is not running, | 
|  | /// meaning that there are no pending capture regions (specified using CaptureAt) | 
|  | /// and that the system is not currently running in 'async' capture mode. | 
|  | /// | 
|  | /// ** Buffers and memory management ** | 
|  | /// | 
|  | /// Audio data is captured into a shared memory buffer (a VMO) supplied by the | 
|  | /// user to the AudioCapturer during the AddPayloadBuffer call.  Please note the | 
|  | /// following requirements related to the management of the payload buffer. | 
|  | /// | 
|  | /// ++ The payload buffer must be supplied before any capture operation may | 
|  | ///    start.  Any attempt to start capture (via either CaptureAt or | 
|  | ///    StartAsyncCapture) before a payload buffer has been established is an | 
|  | ///    error. | 
|  | /// ++ The payload buffer may not be changed while there are any capture | 
|  | ///    operations pending. | 
|  | /// ++ The stream type may not be changed after the payload buffer has been set. | 
|  | /// ++ The payload buffer must be an integral number of audio frame sizes (in | 
|  | ///    bytes) | 
|  | /// ++ When running in 'async' mode (see below), the payload buffer must be at | 
|  | ///    least as large as twice the frames_per_packet size specified during | 
|  | ///    StartAsyncCapture. | 
|  | /// ++ The handle to the payload buffer supplied by the user must be readable, | 
|  | ///    writable, mappable and transferable. | 
|  | /// ++ Users should always treat the payload buffer as read-only. | 
|  | /// | 
|  | /// ** Synchronous vs. Asynchronous capture mode ** | 
|  | /// | 
|  | /// The AudioCapturer interface can be used in one of two mutually exclusive | 
|  | /// modes: Synchronous and Asynchronous.  A description of each mode and their | 
|  | /// tradeoffs is given below. | 
|  | /// | 
|  | /// ** Synchronous mode ** | 
|  | /// | 
|  | /// By default, AudioCapturer instances are running in 'sync' mode.  They will | 
|  | /// only capture data when a user supplies at least one region to capture into | 
|  | /// using the CaptureAt method.  Regions supplied in this way will be filled in | 
|  | /// the order that they are received and returned to the client as StreamPackets | 
|  | /// via the return value of the CaptureAt method.  If an AudioCapturer instance | 
|  | /// has data to capture, but no place to put it (because there are no more | 
|  | /// pending regions to fill), the next payload generated will indicate that their | 
|  | /// has been an overflow by setting the Discontinuity flag on the next produced | 
|  | /// StreamPacket.  Synchronous mode may not be used in conjunction with | 
|  | /// Asynchronous mode.  It is an error to attempt to call StartAsyncCapture while | 
|  | /// the system still regions supplied by CaptureAt waiting to be filled. | 
|  | /// | 
|  | /// If a user has supplied regions to be filled by the AudioCapturer instance in | 
|  | /// the past, but wishes to reclaim those regions, they may do so using the | 
|  | /// DiscardAllPackets method.  Calling the DiscardAllPackets method will cause | 
|  | /// all pending regions to be returned, but with `NO_TIMESTAMP` as their | 
|  | /// StreamPacket's PTS.  See "Timing and Overflows", below, for a discussion of | 
|  | /// timestamps and discontinuity flags. After a DiscardAllPackets operation, | 
|  | /// an OnEndOfStream event will be produced.  While an AudioCapturer will never | 
|  | /// overwrite any region of the payload buffer after a completed region is | 
|  | /// returned, it may overwrite the unfilled portions of a partially filled | 
|  | /// buffer which has been returned as a result of a DiscardAllPackets operation. | 
|  | /// | 
|  | /// ** Asynchronous mode ** | 
|  | /// | 
|  | /// While running in 'async' mode, clients do not need to explicitly supply | 
|  | /// shared buffer regions to be filled by the AudioCapturer instance. Instead, a | 
|  | /// client enters into 'async' mode by calling StartAsyncCapture and supplying a | 
|  | /// callback interface and the number of frames to capture per-callback. Once | 
|  | /// running in async mode, the AudioCapturer instance will identify which | 
|  | /// payload buffer regions to capture into, capture the specified number of | 
|  | /// frames, then deliver those frames as StreamPackets using the OnPacketCapture | 
|  | /// FIDL event. Users may stop capturing and return the AudioCapturer instance to | 
|  | /// 'sync' mode using the StopAsyncCapture method. | 
|  | /// | 
|  | /// It is considered an error to attempt any of the following operations. | 
|  | /// | 
|  | /// ++ To attempt to enter 'async' capture mode when no payload buffer has been | 
|  | ///    established. | 
|  | /// ++ To specify a number of frames to capture per payload which does not permit | 
|  | ///    at least two contiguous capture payloads to exist in the established | 
|  | ///    shared payload buffer simultaneously. | 
|  | /// ++ To send a region to capture into using the CaptureAt method while the | 
|  | ///    AudioCapturer instance is running in 'async' mode. | 
|  | /// ++ To attempt to call DiscardAllPackets while the AudioCapturer instance is | 
|  | ///    running in 'async' mode. | 
|  | /// ++ To attempt to re-start 'async' mode capturing without having first | 
|  | ///    stopped. | 
|  | /// ++ To attempt any operation except for SetGain while in the process of | 
|  | ///    stopping. | 
|  | /// | 
|  | /// ** Synchronizing with a StopAsyncCapture operation ** | 
|  | /// | 
|  | /// Stopping asynchronous capture mode and returning to synchronous capture mode | 
|  | /// is an operation which takes time.  Aside from SetGain, users may not call any | 
|  | /// other methods on the AudioCapturer interface after calling StopAsyncCapture | 
|  | /// (including calling StopAsyncCapture again) until after the stop operation has | 
|  | /// completed.  Because of this, it is important for users to be able to | 
|  | /// synchronize with the stop operation.  Two mechanisms are provided for doing | 
|  | /// so. | 
|  | /// | 
|  | /// The first is to use StopAsyncCapture (not the NoReply variant). When the user's | 
|  | /// callback has been called, they can be certain that stop operation is complete | 
|  | /// and that the AudioCapturer instance has returned to synchronous operation | 
|  | /// mode. | 
|  | /// | 
|  | /// The second way to determine that a stop operation has completed is to use the | 
|  | /// flags on the packets which get delivered via the user-supplied | 
|  | /// AudioCapturerCallback interface after calling StopAsyncCapture.  When | 
|  | /// asked to stop, any partially filled packet will be returned to the user, and | 
|  | /// the final packet returned will always have the end-of-stream flag (kFlagsEos) | 
|  | /// set on it to indicate that this is the final frame in the sequence.  If | 
|  | /// there is no partially filled packet to return, the AudioCapturer will | 
|  | /// synthesize an empty packet with no timestamp, and offset/length set to zero, | 
|  | /// in order to deliver a packet with the end-of-stream flag set on it.  Once | 
|  | /// users have seen the end-of-stream flag after calling stop, the AudioCapturer | 
|  | /// has finished the stop operation and returned to synchronous operating mode. | 
|  | /// | 
|  | /// ** Timing and Overflows ** | 
|  | /// | 
|  | /// All media packets produced by an AudioCapturer instance will have their PTS | 
|  | /// field filled out with the capture time of the audio expressed as a timestamp | 
|  | /// given by the reference clock timeline. Note: this timestamp is actually a | 
|  | /// capture timestamp, not a presentation timestamp (it is more of a CTS than a | 
|  | /// PTS) and is meant to represent the underlying system's best estimate of the | 
|  | /// capture time of the first frame of audio, including all outboard and hardware | 
|  | /// introduced buffering delay.  As a result, all timestamps produced by an | 
|  | /// AudioCapturer should be expected to be in the past relative to 'now' on the | 
|  | /// stream's reference clock timeline. | 
|  | /// | 
|  | /// The one exception to the "everything has an explicit timestamp" rule is when | 
|  | /// discarding submitted regions while operating in synchronous mode. Discarded | 
|  | /// packets have no data in them, but FIDL demands that all pending | 
|  | /// method-return-value callbacks be executed.  Because of this, the regions will | 
|  | /// be returned to the user, but their timestamps will be set to | 
|  | /// `NO_TIMESTAMP`, and their payload sizes will be set to zero.  Any | 
|  | /// partially filled payload will have a valid timestamp, but a payload size | 
|  | /// smaller than originally requested.  The final discarded payload (if there | 
|  | /// were any to discard) will be followed by an OnEndOfStream event. | 
|  | /// | 
|  | /// Two StreamPackets delivered by an AudioCapturer instance are 'continuous' if | 
|  | /// the first frame of audio contained in the second packet was capture exactly | 
|  | /// one nominal frame time after the final frame of audio in the first packet. | 
|  | /// If this relationship does not hold, the second StreamPacket will have the | 
|  | /// 'kFlagDiscontinuous' flag set in it's flags field. | 
|  | /// | 
|  | /// Even though explicit timestamps are provided on every StreamPacket produced, | 
|  | /// users who have very precise timing requirements are encouraged to always | 
|  | /// reason about time by counting frames delivered since the last discontinuity, | 
|  | /// rather than simply using the raw capture timestamps. This is because the | 
|  | /// explicit timestamps written on continuous packets may have a small amount of | 
|  | /// rounding error based on whether or not the units of the capture timeline | 
|  | /// reference clock are divisible by the chosen audio frame rate. | 
|  | /// | 
|  | /// Users should always expect the first StreamPacket produced by an | 
|  | /// AudioCapturer to have the discontinuous flag set on it (as there is no | 
|  | /// previous packet to be continuous with). Similarly, the first StreamPacket | 
|  | /// after a DiscardAllPackets or a Stop/Start cycle will always be | 
|  | /// discontinuous. After that, there are only two reasons that a StreamPacket | 
|  | /// will ever be discontinuous: | 
|  | /// | 
|  | /// 1) The user is operating an synchronous mode and does not supply regions to | 
|  | ///    be filled quickly enough.  If the next continuous frame of data has not | 
|  | ///    been captured by the time it needs to be purged from the source buffers, | 
|  | ///    an overflow has occurred and the AudioCapturer will flag the next captured | 
|  | ///    region as discontinuous. | 
|  | /// 2) The user is operating in asynchronous mode and some internal error | 
|  | ///    prevents the AudioCapturer instance from capturing the next frame of audio | 
|  | ///    in a continuous fashion.  This might be high system load or a hardware | 
|  | ///    error, but in general it is something which should never normally happen. | 
|  | ///    In practice, however, if it does, the next produced packet will be flagged | 
|  | ///    as being discontinuous. | 
|  | /// | 
|  | /// ** Synchronous vs. Asynchronous Trade-offs ** | 
|  | /// | 
|  | /// The choice of operating in synchronous vs. asynchronous mode is up to the | 
|  | /// user, and depending on the user's requirements, there are some advantages and | 
|  | /// disadvantages to each choice. | 
|  | /// | 
|  | /// Synchronous mode requires only a single Zircon channel under the hood and can | 
|  | /// achieve some small savings because of this.  In addition, the user has | 
|  | /// complete control over the buffer management.  Users specify exactly where | 
|  | /// audio will be captured to and in what order.  Because of this, if users do | 
|  | /// not need to always be capturing, it is simple to stop and restart the capture | 
|  | /// later (just by ceasing to supply packets, then resuming later on).  Payloads | 
|  | /// do not need to be uniform in size either, clients may specify payloads of | 
|  | /// whatever granularity is appropriate. | 
|  | /// | 
|  | /// The primary downside of operating in synchronous mode is that two messages | 
|  | /// will need to be sent for every packet to be captured.  One to inform the | 
|  | /// AudioCapturer of the instance to capture into, and one to inform the user | 
|  | /// that the packet has been captured.  This may end up increasing overhead and | 
|  | /// potentially complicating client designs. | 
|  | /// | 
|  | /// Asynchronous mode has the advantage requiring only 1/2 of the messages, | 
|  | /// however, when operating in 'async' mode, AudioCapturer instances have no way | 
|  | /// of knowing if a user is processing the StreamPackets being sent in a timely | 
|  | /// fashion, and no way of automatically detecting an overflow condition.  Users | 
|  | /// of 'async' mode should be careful to use a buffer large enough to ensure that | 
|  | /// they will be able to process their data before an AudioCapturer will be | 
|  | /// forced to overwrite it. | 
|  | /// | 
|  | // ** Future Directions (aka TODOs) ** | 
|  | // | 
|  | // ++ Consider adding a 'zero message' capture mode where the AudioCapturer | 
|  | //    simply supplies a linear transformation and some buffer parameters (max | 
|  | //    audio hold time) each time that it is started in 'async' mode, or each | 
|  | //    time an internal overflow occurs in 'async' mode.  Based on this | 
|  | //    information, client should know where the capture write pointer is at all | 
|  | //    times as a function of the transformation removing the need to send any | 
|  | //    buffer position messages.  This would reduce the operational overhead just | 
|  | //    about as low as it could go, and could allow for the lowest possible | 
|  | //    latency for capture clients.  OTOH - it might be better to achieve this | 
|  | //    simply by allowing clients to be granted direct, exclusive access to the | 
|  | //    driver level of capture if no resampling, reformatting, or sharing is | 
|  | //    needed. | 
|  | // ++ Consider providing some mechanism by which users may specify the exact | 
|  | //    time at which they want to capture data. | 
|  | // ++ Allow for more complex routing/mixing/AEC scenarios and place this under | 
|  | //    the control of the policy manager. | 
|  | // ++ Define and enforce access permissions and downsampling requirements for | 
|  | //    sensitive content.  Enforce using the policy manager. | 
|  | // ++ Consider allowing the mixer to produce compressed audio. | 
|  | // | 
|  |  | 
|  | protocol AudioCapturer { | 
|  | compose StreamBufferSet; | 
|  | compose StreamSource; | 
|  |  | 
|  | /// Sets the stream type of the stream to be delivered.  Causes the source | 
|  | /// material to be reformatted/resampled if needed in order to produce the | 
|  | /// requested stream type. Note that the stream type may not be changed | 
|  | /// after the payload buffer has been established. | 
|  | SetPcmStreamType(AudioStreamType stream_type); | 
|  |  | 
|  | /// Explicitly specify a region of the shared payload buffer for the audio | 
|  | /// input to capture into. | 
|  | CaptureAt(uint32 payload_buffer_id, uint32 payload_offset, | 
|  | uint32 frames) -> (StreamPacket captured_packet); | 
|  |  | 
|  | /// Place the AudioCapturer into 'async' capture mode and begin to produce | 
|  | /// packets of exactly 'frames_per_packet' number of frames each. The | 
|  | /// OnPacketProduced event (of StreamSink) will be used to inform the client | 
|  | /// of produced packets. | 
|  | StartAsyncCapture(uint32 frames_per_packet); | 
|  |  | 
|  | /// Stop capturing in 'async' capture mode and (optionally) deliver a callback | 
|  | /// that may be used by the client if explicit synchronization is needed. | 
|  | StopAsyncCapture() -> (); | 
|  | StopAsyncCaptureNoReply(); | 
|  |  | 
|  | /// Binds to the gain control for this AudioCapturer. | 
|  | BindGainControl(request<fuchsia.media.audio.GainControl> gain_control_request); | 
|  |  | 
|  | /// Retrieves the stream's reference clock. The returned handle will have READ, DUPLICATE | 
|  | /// and TRANSFER rights, and will refer to a zx::clock that is MONOTONIC and CONTINUOUS. | 
|  | /// | 
|  | GetReferenceClock() -> (zx.handle:CLOCK reference_clock); | 
|  |  | 
|  | /// Sets the reference clock that controls this capturer's playback rate. If the input | 
|  | /// parameter is a valid zx::clock, it must have READ, DUPLICATE, TRANSFER rights and | 
|  | /// refer to a clock that is both MONOTONIC and CONTINUOUS. If instead an invalid clock | 
|  | /// is passed (such as the uninitialized `zx::clock()`), this indicates that the stream | 
|  | /// will use a 'flexible' clock generated by AudioCore that tracks the audio device. | 
|  | /// | 
|  | /// `SetReferenceClock` cannot be called after the capturer payload buffer has been | 
|  | /// added. It also cannot be called a second time (even before capture). | 
|  | /// If the client wants a reference clock that is initially `CLOCK_MONOTONIC` but may | 
|  | /// diverge at some later time, they should create a clone of the monotonic clock, set | 
|  | /// this as the stream's reference clock, then rate-adjust it subsequently as needed. | 
|  | /// | 
|  | SetReferenceClock(zx.handle:CLOCK? reference_clock); | 
|  |  | 
|  | /// Sets the usage of the capture stream.  This may be changed on the fly, but | 
|  | /// packets in flight may be affected by the new usage.  By default the | 
|  | /// Capturer is created with the FOREGROUND usage. | 
|  | SetUsage(AudioCaptureUsage usage); | 
|  |  | 
|  | // //////////////////////////////////////////////////////////////////////// | 
|  | // StreamBufferSet methods | 
|  | // See stream.fidl. | 
|  |  | 
|  | // //////////////////////////////////////////////////////////////////////// | 
|  | // StreamSource methods | 
|  | // See stream.fidl. | 
|  |  | 
|  | // //////////////////////////////////////////////////////////////////////// | 
|  | // Methods to be deprecated | 
|  | // These methods will go away. | 
|  |  | 
|  | /// Gets the currently configured stream type. Note: for an AudioCapturer | 
|  | /// which was just created and has not yet had its stream type explicitly | 
|  | /// set, this will retrieve the stream type -- at the time the AudioCapturer | 
|  | /// was created -- of the source (input or looped-back output) to which the | 
|  | /// AudioCapturer is bound. | 
|  | /// | 
|  | // TODO(mpuryear): Get rid of this. Eventually, AudioCapturers will be | 
|  | // bindable to a set of inputs/outputs/renderers, so the concept of a | 
|  | // "native" stream type will go away. Mechanisms will need to be put in | 
|  | // place to allow users to enumerate the configuration of these bind-able | 
|  | // endpoints (and perhaps to exercise control over them), but it will be | 
|  | // the user of the AudioCapturer's job to specify the format they want. | 
|  | GetStreamType() -> (StreamType stream_type); | 
|  | }; |