|  | // Copyright 2018 The Fuchsia Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  | library fuchsia.media; | 
|  |  | 
|  | using fuchsia.media.audio; | 
|  | using zx; | 
|  |  | 
|  | /// The maximum number of frames that may be contained within a single StreamPacket. | 
|  | const MAX_FRAMES_PER_RENDERER_PACKET int64 = 0x3ffff; | 
|  |  | 
|  | /// | 
|  | /// AudioRenderers can be in one of two states at any time: _configurable_ or _operational_. A | 
|  | /// renderer is considered operational whenever it has packets queued to be rendered; otherwise it | 
|  | /// is _configurable_. Once an AudioRenderer enters the operational state, calls to "configuring" | 
|  | /// methods are disallowed and will cause the audio service to disconnect the client's connection. | 
|  | /// The following are considered configuring methods: `AddPayloadBuffer`, `SetPcmStreamType`, | 
|  | /// `SetStreamType`, `SetPtsUnits`, `SetPtsContinuityThreshold`. | 
|  | /// | 
|  | /// If an AudioRenderer must be reconfigured, the client must ensure that no packets are still | 
|  | /// enqueued when these "configuring" methods are called. Thus it is best practice to call | 
|  | /// `DiscardAllPackets` on the AudioRenderer (and ideally `Stop` before `DiscardAllPackets`), prior | 
|  | /// to reconfiguring the renderer. | 
|  | /// | 
|  | protocol AudioRenderer { | 
|  | compose StreamBufferSet; | 
|  | compose StreamSink; | 
|  |  | 
|  | /// Binds to the gain control for this AudioRenderer. | 
|  | BindGainControl(resource struct { | 
|  | gain_control_request server_end:fuchsia.media.audio.GainControl; | 
|  | }); | 
|  |  | 
|  | /// Sets the units used by the presentation (media) timeline. By default, PTS units are | 
|  | /// nanoseconds (as if this were called with numerator of 1e9 and denominator of 1). | 
|  | /// This ratio must lie between 1/60 (1 tick per minute) and 1e9/1 (1ns per tick). | 
|  | SetPtsUnits(struct { | 
|  | tick_per_second_numerator uint32; | 
|  | tick_per_second_denominator uint32; | 
|  | }); | 
|  |  | 
|  | /// Sets the maximum threshold (in seconds) between explicit user-provided PTS | 
|  | /// and expected PTS (determined using interpolation). Beyond this threshold, | 
|  | /// a stream is no longer considered 'continuous' by the renderer. | 
|  | /// | 
|  | /// Defaults to an interval of half a PTS 'tick', using the currently-defined PTS units. | 
|  | /// Most users should not need to change this value from its default. | 
|  | /// | 
|  | /// Example: | 
|  | /// A user is playing back 48KHz audio from a container, which also contains | 
|  | /// video and needs to be synchronized with the audio. The timestamps are | 
|  | /// provided explicitly per packet by the container, and expressed in mSec | 
|  | /// units. This means that a single tick of the media timeline (1 mSec) | 
|  | /// represents exactly 48 frames of audio. The application in this scenario | 
|  | /// delivers packets of audio to the AudioRenderer, each with exactly 470 | 
|  | /// frames of audio, and each with an explicit timestamp set to the best | 
|  | /// possible representation of the presentation time (given this media | 
|  | /// clock's resolution). So, starting from zero, the timestamps would be.. | 
|  | /// | 
|  | /// [ 0, 10, 20, 29, 39, 49, 59, 69, 78, 88, ... ] | 
|  | /// | 
|  | /// In this example, attempting to use the presentation time to compute the | 
|  | /// starting frame number of the audio in the packet would be wrong the | 
|  | /// majority of the time. The first timestamp is correct (by definition), but | 
|  | /// it will be 24 packets before the timestamps and frame numbers come back | 
|  | /// into alignment (the 24th packet would start with the 11280th audio frame | 
|  | /// and have a PTS of exactly 235). | 
|  | /// | 
|  | /// One way to fix this situation is to set the PTS continuity threshold | 
|  | /// (henceforth, CT) for the stream to be equal to 1/2 of the time taken by | 
|  | /// the number of frames contained within a single tick of the media clock, | 
|  | /// rounded up. In this scenario, that would be 24.0 frames of audio, or 500 | 
|  | /// uSec. Any packets whose expected PTS was within +/-CT frames of the | 
|  | /// explicitly provided PTS would be considered to be a continuation of the | 
|  | /// previous frame of audio. For this example, calling 'SetPtsContinuityThreshold(0.0005)' | 
|  | /// would work well. | 
|  | /// | 
|  | /// Other possible uses: | 
|  | /// Users who are scheduling audio explicitly, relative to a clock which has | 
|  | /// not been configured as the reference clock, can use this value to control | 
|  | /// the maximum acceptable synchronization error before a discontinuity is | 
|  | /// introduced. E.g., if a user is scheduling audio based on a recovered | 
|  | /// common media clock, and has not published that clock as the reference | 
|  | /// clock, and they set the CT to 20mSec, then up to 20mSec of drift error | 
|  | /// can accumulate before the AudioRenderer deliberately inserts a | 
|  | /// presentation discontinuity to account for the error. | 
|  | /// | 
|  | /// Users whose need to deal with a container where their timestamps may be | 
|  | /// even less correct than +/- 1/2 of a PTS tick may set this value to | 
|  | /// something larger. This should be the maximum level of inaccuracy present | 
|  | /// in the container timestamps, if known. Failing that, it could be set to | 
|  | /// the maximum tolerable level of drift error before absolute timestamps are | 
|  | /// explicitly obeyed. Finally, a user could set this number to a very large | 
|  | /// value (86400.0 seconds, for example) to effectively cause *all* | 
|  | /// timestamps to be ignored after the first, thus treating all audio as | 
|  | /// continuous with previously delivered packets. Conversely, users who wish | 
|  | /// to *always* explicitly schedule their audio packets exactly may specify | 
|  | /// a CT of 0. | 
|  | /// | 
|  | /// Note: explicitly specifying high-frequency PTS units reduces the default | 
|  | /// continuity threshold accordingly. Internally, this threshold is stored as an | 
|  | /// integer of 1/8192 subframes. The default threshold is computed as follows: | 
|  | ///     RoundUp((AudioFPS/PTSTicksPerSec) * 4096) / (AudioFPS * 8192) | 
|  | /// For this reason, specifying PTS units with a frequency greater than 8192x | 
|  | /// the frame rate (or NOT calling SetPtsUnits, which accepts the default PTS | 
|  | /// unit of 1 nanosec) will result in a default continuity threshold of zero. | 
|  | /// | 
|  | SetPtsContinuityThreshold(struct { | 
|  | threshold_seconds float32; | 
|  | }); | 
|  |  | 
|  | /// Retrieves the stream's reference clock. The returned handle will have READ, DUPLICATE | 
|  | /// and TRANSFER rights, and will refer to a zx::clock that is MONOTONIC and CONTINUOUS. | 
|  | /// | 
|  | GetReferenceClock() -> (resource struct { | 
|  | reference_clock zx.handle:CLOCK; | 
|  | }); | 
|  |  | 
|  | /// Sets the reference clock that controls this renderer's playback rate. If the input | 
|  | /// parameter is a valid zx::clock, it must have READ, DUPLICATE, TRANSFER rights and | 
|  | /// refer to a clock that is both MONOTONIC and CONTINUOUS. If instead an invalid clock | 
|  | /// is passed (such as the uninitialized `zx::clock()`), this indicates that the stream | 
|  | /// will use a 'flexible' clock generated by AudioCore that tracks the audio device. | 
|  | /// | 
|  | /// `SetReferenceClock` cannot be called once `SetPcmStreamType` is called. It also | 
|  | /// cannot be called a second time (even if the renderer format has not yet been set). | 
|  | /// If a client wants a reference clock that is initially `CLOCK_MONOTONIC` but may | 
|  | /// diverge at some later time, they should create a clone of the monotonic clock, set | 
|  | /// this as the stream's reference clock, then rate-adjust it subsequently as needed. | 
|  | /// | 
|  | SetReferenceClock(resource struct { | 
|  | reference_clock zx.handle:<CLOCK, optional>; | 
|  | }); | 
|  |  | 
|  | /// Sets the usage of the render stream. This method may not be called after | 
|  | /// `SetPcmStreamType` is called. The default usage is `MEDIA`. | 
|  | SetUsage(struct { | 
|  | usage AudioRenderUsage; | 
|  | }); | 
|  |  | 
|  | /// Sets the type of the stream to be delivered by the client. Using this method implies | 
|  | /// that the stream encoding is `AUDIO_ENCODING_LPCM`. | 
|  | /// | 
|  | /// This must be called before `Play` or `PlayNoReply`. After a call to `SetPcmStreamType`, | 
|  | /// the client must then send an `AddPayloadBuffer` request, then the various `StreamSink` | 
|  | /// methods such as `SendPacket`/`SendPacketNoReply`. | 
|  | SetPcmStreamType(struct { | 
|  | type AudioStreamType; | 
|  | }); | 
|  |  | 
|  | /// Enable or disable notifications about changes to the minimum clock lead | 
|  | /// time (in nanoseconds) for this AudioRenderer. Calling this method with | 
|  | /// 'enabled' set to true will trigger an immediate `OnMinLeadTimeChanged` | 
|  | /// event with the current minimum lead time for the AudioRenderer. If the | 
|  | /// value changes, an `OnMinLeadTimeChanged` event will be raised with the | 
|  | /// new value. This behavior will continue until the user calls | 
|  | /// `EnableMinLeadTimeEvents(false)`. | 
|  | /// | 
|  | /// The minimum clock lead time is the amount of time ahead of the reference | 
|  | /// clock's understanding of "now" that packets needs to arrive (relative to | 
|  | /// the playback clock transformation) in order for the mixer to be able to | 
|  | /// mix packet. For example... | 
|  | /// | 
|  | /// ++ Let the PTS of packet X be P(X) | 
|  | /// ++ Let the function which transforms PTS -> RefClock be R(p) (this | 
|  | ///    function is determined by the call to Play(...) | 
|  | /// ++ Let the minimum lead time be MLT | 
|  | /// | 
|  | /// If R(P(X)) < RefClock.Now() + MLT | 
|  | /// Then the packet is late, and some (or all) of the packet's payload will | 
|  | /// need to be skipped in order to present the packet at the scheduled time. | 
|  | /// | 
|  | // TODO(mpuryear): What should the units be here?  Options include... | 
|  | // | 
|  | // 1) Normalized to nanoseconds (this is the current API) | 
|  | // 2) Reference clock units (what happens if the reference clock changes?) | 
|  | // 3) PTS units (what happens when the user changes the PTS units?) | 
|  | // | 
|  | // TODO(mpuryear): Should `EnableMinLeadTimeEvents` have an optional -> () | 
|  | // return value for synchronization purposes?  Probably not; users should be | 
|  | // able to send a disable request and clear their event handler if they no | 
|  | // longer want notifications. Their in-process dispatcher framework can | 
|  | // handle draining and dropping any lead time changed events that were | 
|  | // already in flight when the disable message was sent. | 
|  | // | 
|  | EnableMinLeadTimeEvents(struct { | 
|  | enabled bool; | 
|  | }); | 
|  | -> OnMinLeadTimeChanged(struct { | 
|  | min_lead_time_nsec int64; | 
|  | }); | 
|  |  | 
|  | // TODO(mpuryear): Eliminate this method when possible. Right now, it is | 
|  | // used by code requiring synchronous FIDL interfaces to talk to | 
|  | // AudioRenderers. | 
|  | /// | 
|  | /// While it is possible to call `GetMinLeadTime` before `SetPcmStreamType`, | 
|  | /// there's little reason to do so. This is because lead time is a function | 
|  | /// of format/rate, so lead time will be recalculated after `SetPcmStreamType`. | 
|  | /// If min lead time events are enabled before `SetPcmStreamType` (with | 
|  | /// `EnableMinLeadTimeEvents(true)`), then an event will be generated in | 
|  | /// response to `SetPcmStreamType`. | 
|  | GetMinLeadTime() -> (struct { | 
|  | min_lead_time_nsec int64; | 
|  | }); | 
|  |  | 
|  | /// Immediately put the AudioRenderer into a playing state. Start the advance | 
|  | /// of the media timeline, using specific values provided by the caller (or | 
|  | /// default values if not specified). In an optional callback, return the | 
|  | /// timestamp values ultimately used -- these set the ongoing relationship | 
|  | /// between the media and reference timelines (i.e., how to translate between | 
|  | /// the domain of presentation timestamps, and the realm of local system | 
|  | /// time). | 
|  | /// | 
|  | /// Local system time is specified in units of nanoseconds; media_time is | 
|  | /// specified in the units defined by the user in the `SetPtsUnits` function, | 
|  | /// or nanoseconds if `SetPtsUnits` is not called. | 
|  | /// | 
|  | /// The act of placing an AudioRenderer into the playback state establishes a | 
|  | /// relationship between 1) the user-defined media (or presentation) timeline | 
|  | /// for this particular AudioRenderer, and 2) the real-world system reference | 
|  | /// timeline. To communicate how to translate between timelines, the Play() | 
|  | /// callback provides an equivalent timestamp in each time domain. The first | 
|  | /// value ('reference_time') is given in terms of this renderer's reference | 
|  | /// clock; the second value ('media_time') is what media instant exactly | 
|  | /// corresponds to that local time. Restated, the frame at 'media_time' in | 
|  | /// the audio stream should be presented at 'reference_time' according to | 
|  | /// the reference clock. | 
|  | /// | 
|  | /// Note: on calling this API, media_time immediately starts advancing. It is | 
|  | /// possible (if uncommon) for a caller to specify a system time that is | 
|  | /// far in the past, or far into the future. This, along with the specified | 
|  | /// media time, is simply used to determine what media time corresponds to | 
|  | /// 'now', and THAT media time is then intersected with presentation | 
|  | /// timestamps of packets already submitted, to determine which media frames | 
|  | /// should be presented next. | 
|  | /// | 
|  | /// With the corresponding reference_time and media_time values, a user can | 
|  | /// translate arbitrary time values from one timeline into the other. After | 
|  | /// calling `SetPtsUnits(pts_per_sec_numerator, pts_per_sec_denominator)` and | 
|  | /// given the 'ref_start' and 'media_start' values from `Play`, then for | 
|  | /// any 'ref_time': | 
|  | /// | 
|  | /// media_time = ( (ref_time - ref_start) / 1e9 | 
|  | ///                * (pts_per_sec_numerator / pts_per_sec_denominator) ) | 
|  | ///              + media_start | 
|  | /// | 
|  | /// Conversely, for any presentation timestamp 'media_time': | 
|  | /// | 
|  | /// ref_time = ( (media_time - media_start) | 
|  | ///              * (pts_per_sec_denominator / pts_per_sec_numerator) | 
|  | ///              * 1e9 ) | 
|  | ///            + ref_start | 
|  | /// | 
|  | /// Users, depending on their use case, may optionally choose not to specify | 
|  | /// one or both of these timestamps. A timestamp may be omitted by supplying | 
|  | /// the special value '`NO_TIMESTAMP`'. The AudioRenderer automatically deduces | 
|  | /// any omitted timestamp value using the following rules: | 
|  | /// | 
|  | /// Reference Time | 
|  | /// If 'reference_time' is omitted, the AudioRenderer will select a "safe" | 
|  | /// reference time to begin presentation, based on the minimum lead times for | 
|  | /// the output devices that are currently bound to this AudioRenderer. For | 
|  | /// example, if an AudioRenderer is bound to an internal audio output | 
|  | /// requiring at least 3 mSec of lead time, and an HDMI output requiring at | 
|  | /// least 75 mSec of lead time, the AudioRenderer might (if 'reference_time' | 
|  | /// is omitted) select a reference time 80 mSec from now. | 
|  | /// | 
|  | /// Media Time | 
|  | /// If media_time is omitted, the AudioRenderer will select one of two | 
|  | /// values. | 
|  | /// - If the AudioRenderer is resuming from the paused state, and packets | 
|  | /// have not been discarded since being paused, then the AudioRenderer will | 
|  | /// use a media_time corresponding to the instant at which the presentation | 
|  | /// became paused. | 
|  | /// - If the AudioRenderer is being placed into a playing state for the first | 
|  | /// time following startup or a 'discard packets' operation, the initial | 
|  | /// media_time will be set to the PTS of the first payload in the pending | 
|  | /// packet queue. If the pending queue is empty, initial media_time will be | 
|  | /// set to zero. | 
|  | /// | 
|  | /// Return Value | 
|  | /// When requested, the AudioRenderer will return the 'reference_time' and | 
|  | /// 'media_time' which were selected and used (whether they were explicitly | 
|  | /// specified or not) in the return value of the play call. | 
|  | /// | 
|  | /// Examples | 
|  | /// 1. A user has queued some audio using `SendPacket` and simply wishes them | 
|  | /// to start playing as soon as possible. The user may call Play without | 
|  | /// providing explicit timestamps -- `Play(NO_TIMESTAMP, NO_TIMESTAMP)`. | 
|  | /// | 
|  | /// 2. A user has queued some audio using `SendPacket`, and wishes to start | 
|  | /// playback at a specified 'reference_time', in sync with some other media | 
|  | /// stream, either initially or after discarding packets. The user would call | 
|  | /// `Play(reference_time, NO_TIMESTAMP)`. | 
|  | /// | 
|  | /// 3. A user has queued some audio using `SendPacket`. The first of these | 
|  | /// packets has a PTS of zero, and the user wishes playback to begin as soon | 
|  | /// as possible, but wishes to skip all of the audio content between PTS 0 | 
|  | /// and PTS 'media_time'. The user would call | 
|  | /// `Play(NO_TIMESTAMP, media_time)`. | 
|  | /// | 
|  | /// 4. A user has queued some audio using `SendPacket` and want to present | 
|  | /// this media in synch with another player in a different device. The | 
|  | /// coordinator of the group of distributed players sends an explicit | 
|  | /// message to each player telling them to begin presentation of audio at | 
|  | /// PTS 'media_time', at the time (based on the group's shared reference | 
|  | /// clock) 'reference_time'. Here the user would call | 
|  | /// `Play(reference_time, media_time)`. | 
|  | /// | 
|  | // TODO(mpuryear): Define behavior in the case that a user calls `Play()` | 
|  | // while the system is already playing. We should probably do nothing but | 
|  | // return a valid correspondence pair in response -- unless both reference | 
|  | // and media times are provided (and do not equate to the current timeline | 
|  | // relationship), in which case we should introduce a discontinuity. | 
|  | // | 
|  | // TODO(mpuryear): Collapse these if we ever have optional retvals in FIDL | 
|  | Play(struct { | 
|  | reference_time int64; | 
|  | media_time int64; | 
|  | }) -> (struct { | 
|  | reference_time int64; | 
|  | media_time int64; | 
|  | }); | 
|  | PlayNoReply(struct { | 
|  | reference_time int64; | 
|  | media_time int64; | 
|  | }); | 
|  |  | 
|  | /// Immediately put the AudioRenderer into the paused state and then report | 
|  | /// the relationship between the media and reference timelines which was | 
|  | /// established (if requested). | 
|  | /// | 
|  | /// If the AudioRenderer is already in the paused state when this called, | 
|  | /// the previously-established timeline values are returned (if requested). | 
|  | // | 
|  | // TODO(mpuryear): Remove NoReply variants if FIDL gets optional retvals. | 
|  | Pause() -> (struct { | 
|  | reference_time int64; | 
|  | media_time int64; | 
|  | }); | 
|  | PauseNoReply(); | 
|  |  | 
|  | // TODO(mpuryear): Spec methods/events which can be used for unintentional | 
|  | // discontinuity/underflow detection. | 
|  | // | 
|  | // TODO(mpuryear): Spec methods/events which can be used to report routing | 
|  | // changes. (Presuming that they belong at this level at all; they may | 
|  | // belong on some sort of policy object). | 
|  | // | 
|  | // TODO(mpuryear): Spec methods/events which can be used to report policy | 
|  | // induced gain/ducking changes. (Presuming that they belong at this level | 
|  | // at all; they may belong on some sort of policy object). | 
|  | }; |