blob: 837b63254e77aa9d4b9a13c8637e1f4ff81d886e [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
#include "src/media/audio/audio_core/audio_output.h"
#include <lib/fit/defer.h>
#include <lib/zx/clock.h>
#include <limits>
#include <trace/event.h>
#include "src/media/audio/audio_core/audio_renderer_impl.h"
#include "src/media/audio/audio_core/mixer/mixer.h"
#include "src/media/audio/audio_core/mixer/no_op.h"
#include "src/media/audio/lib/logging/logging.h"
namespace media::audio {
static constexpr zx::duration kMaxTrimPeriod = zx::msec(10);
AudioOutput::AudioOutput(ThreadingModel* threading_model, DeviceRegistry* registry)
: AudioDevice(Type::Output, threading_model, registry) {
next_sched_time_ = async::Now(mix_domain().dispatcher());
next_sched_time_known_ = true;
source_link_refs_.reserve(16u);
}
void AudioOutput::Process() {
TRACE_DURATION("audio", "AudioOutput::Process");
auto now = async::Now(mix_domain().dispatcher());
bool needs_trim = true;
// At this point, we should always know when our implementation would like to be called to do some
// mixing work next. If we do not know, then we should have already shut down.
//
// If the next sched time has not arrived yet, don't attempt to mix anything. Just trim the queues
// and move on.
FX_DCHECK(next_sched_time_known_);
if (now >= next_sched_time_) {
// Clear the flag. If the implementation does not set it during the cycle by calling
// SetNextSchedTime, we consider it an error and shut down.
next_sched_time_known_ = false;
// As long as our implementation wants to mix more and has not run into a problem trying to
// finish the mix job, mix some more.
memset(&cur_mix_job_, 0, sizeof(cur_mix_job_));
auto mix_frames = StartMixJob(&cur_mix_job_, now);
if (mix_frames) {
// If we have a mix job, then we must have an intermediate buffer allocated, and it must be
// large enough for the mix job we were given.
FX_DCHECK(mix_buf_);
FX_DCHECK(cur_mix_job_.buf_frames <= mix_buf_frames_);
FX_DCHECK(mix_format_);
cur_mix_job_.buf = mix_buf_.get();
cur_mix_job_.buf_frames = mix_frames->length;
cur_mix_job_.start_pts_of = mix_frames->start;
// Fill the intermediate buffer with silence.
size_t bytes_to_zero =
sizeof(cur_mix_job_.buf[0]) * cur_mix_job_.buf_frames * mix_format_->channels();
std::memset(cur_mix_job_.buf, 0, bytes_to_zero);
// If we are not muted, actually do the mix.
if (!cur_mix_job_.sw_output_muted) {
ForEachSource(TaskType::Mix, now);
// If we mix we don't need to trim, since any packets will be released by the mix loop.
needs_trim = false;
}
FinishMixJob(cur_mix_job_);
}
}
if (needs_trim) {
ForEachSource(TaskType::Trim, now);
}
if (!next_sched_time_known_) {
FX_LOGS(ERROR) << "Output failed to schedule next service time. Shutting down!";
ShutdownSelf();
return;
}
// Figure out when we should wake up to do more work again. No matter how long our implementation
// wants to wait, we need to make sure to wake up and periodically trim our input queues.
auto max_sched_time = now + kMaxTrimPeriod;
if (next_sched_time_ > max_sched_time) {
next_sched_time_ = max_sched_time;
}
zx_status_t status = mix_timer_.PostForTime(mix_domain().dispatcher(), next_sched_time_);
if (status != ZX_OK) {
FX_PLOGS(ERROR, status) << "Failed to schedule mix";
ShutdownSelf();
}
}
zx_status_t AudioOutput::InitializeSourceLink(const fbl::RefPtr<AudioLink>& link) {
TRACE_DURATION("audio", "AudioOutput::InitializeSourceLink");
// If we have an output, pick a mixer based on the input and output formats. Otherwise, we only
// need a NoOp mixer (for the time being).
std::unique_ptr<Mixer> mixer;
auto stream = link->stream();
if (mix_format_ && stream) {
mixer = Mixer::Select(stream->format().stream_type(), mix_format_->stream_type());
} else {
mixer = std::make_unique<audio::mixer::NoOp>();
}
if (mixer == nullptr) {
FX_LOGS(ERROR)
<< "*** Audio system mixer cannot convert between formats *** (could not select mixer "
"while linking to output). Usually, this indicates a 'num_channels' mismatch.";
return ZX_ERR_NOT_SUPPORTED;
}
// The Gain object contains multiple stages. In render, stream gain is "source" gain and device
// (or system) gain is "dest" gain.
//
// The renderer will set this link's source gain once this call returns.
//
// Set the dest gain -- device gain retrieved from device settings.
const auto& settings = device_settings();
if (settings != nullptr) {
AudioDeviceSettings::GainState cur_gain_state;
settings->SnapshotGainState(&cur_gain_state);
mixer->bookkeeping().gain.SetDestGain(
cur_gain_state.muted
? fuchsia::media::audio::MUTED_GAIN_DB
: fbl::clamp(cur_gain_state.gain_db, Gain::kMinGainDb, Gain::kMaxGainDb));
}
link->set_mixer(std::move(mixer));
return ZX_OK;
}
// Create our intermediate accumulation buffer.
void AudioOutput::SetupMixBuffer(uint32_t max_mix_frames) {
TRACE_DURATION("audio", "AudioOutput::SetupMixBuffer");
FX_DCHECK(mix_format_ && mix_format_->channels() > 0u);
FX_DCHECK(max_mix_frames > 0u);
FX_DCHECK(static_cast<uint64_t>(max_mix_frames) * mix_format_->channels() <=
std::numeric_limits<uint32_t>::max());
mix_buf_frames_ = max_mix_frames;
mix_buf_ = std::make_unique<float[]>(mix_buf_frames_ * mix_format_->channels());
}
void AudioOutput::ForEachSource(TaskType task_type, zx::time ref_time) {
TRACE_DURATION("audio", "AudioOutput::ForEachSource");
// Make a copy of our currently active set of links so that we don't have to hold onto mutex_ for
// the entire mix operation.
{
std::lock_guard<std::mutex> links_lock(links_lock_);
ZX_DEBUG_ASSERT(source_link_refs_.empty());
for (auto& link : source_links_) {
source_link_refs_.emplace_back(fbl::RefPtr(&link));
}
}
// In all cases, release our temporary references upon leaving this method.
auto cleanup = fit::defer([this]() FXL_NO_THREAD_SAFETY_ANALYSIS { source_link_refs_.clear(); });
for (const auto& link : source_link_refs_) {
// Quit early if we should be shutting down.
if (is_shutting_down()) {
return;
}
// Is the link still valid? If so, process it.
if (!link->valid()) {
continue;
}
auto stream = link->stream();
if (!stream) {
continue;
}
auto mixer = link->mixer();
FX_DCHECK(mixer != nullptr);
auto& info = mixer->bookkeeping();
// Ensure the mapping from source-frame to local-time is up-to-date.
UpdateSourceTrans(*stream, &info);
bool setup_done = false;
std::optional<Stream::Buffer> stream_buffer;
bool release_buffer;
while (true) {
release_buffer = false;
// Try to grab the packet queue's front.
stream_buffer = stream->LockBuffer();
// If the queue is empty, then we are done.
if (!stream_buffer) {
break;
}
// If the packet is discontinuous, reset our mixer's internal filter state.
if (!stream_buffer->is_continuous()) {
mixer->Reset();
}
// If we have not set up for this renderer yet, do so. If the setup fails for any reason, stop
// processing packets for this renderer.
if (!setup_done) {
if (task_type == TaskType::Mix) {
SetupMix(mixer);
} else {
SetupTrim(mixer, ref_time);
}
setup_done = true;
}
// Now process the packet at the front of the renderer's queue. If the packet has been
// entirely consumed, pop it off the front and proceed to the next. Otherwise, we are done.
release_buffer = (task_type == TaskType::Mix)
? ProcessMix(link->GetSource(), mixer, *stream_buffer)
: ProcessTrim(*stream_buffer);
// If we have mixed enough destination frames, we are done with this mix, regardless of what
// we should now do with the source packet.
if ((task_type == TaskType::Mix) &&
(cur_mix_job_.frames_produced == cur_mix_job_.buf_frames)) {
break;
}
// If we still need to produce more destination data, but could not complete this source
// packet (we're paused, or the packet is in the future), then we are done.
if (!release_buffer) {
break;
}
// We did consume this entire source packet, and we should keep mixing.
stream_buffer = std::nullopt;
stream->UnlockBuffer(release_buffer);
}
// Unlock queue (completing packet if needed) and proceed to the next source.
stream_buffer = std::nullopt;
stream->UnlockBuffer(release_buffer);
// Note: there is no point in doing this for Trim tasks, but it doesn't hurt anything, and it's
// easier than adding another function to ForEachSource to run after each renderer is processed,
// just to set this flag.
cur_mix_job_.accumulate = true;
}
}
void AudioOutput::SetupMix(Mixer* mixer) {
TRACE_DURATION("audio", "AudioOutput::SetupMix");
// If we need to recompose our transformation from destination frame space to source fractional
// frames, do so now.
FX_DCHECK(mixer);
UpdateDestTrans(cur_mix_job_, &mixer->bookkeeping());
cur_mix_job_.frames_produced = 0;
}
bool AudioOutput::ProcessMix(const fbl::RefPtr<AudioObject>& source, Mixer* mixer,
const Stream::Buffer& source_buffer) {
TRACE_DURATION("audio", "AudioOutput::ProcessMix");
// Bookkeeping should contain: the rechannel matrix (eventually).
// Sanity check our parameters.
FX_DCHECK(mixer);
// We had better have a valid job, or why are we here?
FX_DCHECK(cur_mix_job_.buf_frames);
FX_DCHECK(cur_mix_job_.frames_produced <= cur_mix_job_.buf_frames);
auto& info = mixer->bookkeeping();
// If the renderer is currently paused, subject_delta (not just step_size) is zero. This packet
// may be relevant eventually, but currently it contributes nothing. Tell ForEachSource we are
// done, but hold the packet for now.
if (!info.dest_frames_to_frac_source_frames.subject_delta()) {
return false;
}
// Have we produced enough? If so, hold this packet and move to next renderer.
if (cur_mix_job_.frames_produced >= cur_mix_job_.buf_frames) {
return false;
}
// At this point we know we need to consume some source data, but we don't yet know how much.
// Here is how many destination frames we still need to produce, for this mix job.
uint32_t dest_frames_left = cur_mix_job_.buf_frames - cur_mix_job_.frames_produced;
float* buf = mix_buf_.get() + (cur_mix_job_.frames_produced * mix_format_->channels());
// Calculate this job's first and last sampling points, in source sub-frames. Use timestamps for
// the first and last dest frames we need, translated into the source (frac_frame) timeline.
FractionalFrames<int64_t> frac_source_for_first_mix_job_frame =
FractionalFrames<int64_t>::FromRaw(info.dest_frames_to_frac_source_frames(
cur_mix_job_.start_pts_of + cur_mix_job_.frames_produced));
// This represents (in the frac_frame source timeline) the time of the LAST dest frame we need.
// Without the "-1", this would be the first destination frame of the NEXT job.
FractionalFrames<int64_t> frac_source_for_final_mix_job_frame =
frac_source_for_first_mix_job_frame +
FractionalFrames<int64_t>::FromRaw(
info.dest_frames_to_frac_source_frames.rate().Scale(dest_frames_left - 1));
// If packet has no frames, there's no need to mix it; it may be skipped.
if (source_buffer.end() == source_buffer.start()) {
AUD_VLOG(TRACE) << " skipping an empty packet!";
return true;
}
FX_DCHECK(source_buffer.end() >= source_buffer.start() + 1);
// The above two calculated values characterize our demand. Now reason about our supply. Calculate
// the actual first and final frame times in the source packet.
FractionalFrames<int64_t> frac_source_for_first_packet_frame = source_buffer.start();
FractionalFrames<int64_t> frac_source_for_final_packet_frame = source_buffer.end() - 1;
// If this source packet's final audio frame occurs before our filter's negative edge, centered at
// our first sampling point, then this packet is entirely in the past and may be skipped.
// Returning true means we're done with the packet (it can be completed) and we would like another
if (frac_source_for_final_packet_frame <
(frac_source_for_first_mix_job_frame - mixer->neg_filter_width())) {
FractionalFrames<int64_t> source_frac_frames_late = frac_source_for_first_mix_job_frame -
mixer->neg_filter_width() -
frac_source_for_first_packet_frame;
auto clock_mono_late = zx::nsec(info.clock_mono_to_frac_source_frames.rate().Inverse().Scale(
source_frac_frames_late.raw_value()));
source->UnderflowOccurred(frac_source_for_first_packet_frame,
frac_source_for_first_mix_job_frame, clock_mono_late);
return true;
}
// If this source packet's first audio frame occurs after our filter's positive edge, centered at
// our final sampling point, then this packet is entirely in the future and should be held.
// Returning false (based on requirement that packets must be presented in timestamp-chronological
// order) means that we have consumed all of the available packet "supply" as we can at this time.
if (frac_source_for_first_packet_frame >
(frac_source_for_final_mix_job_frame + mixer->pos_filter_width())) {
return false;
}
// If neither of the above, then evidently this source packet intersects our mixer's filter.
// Compute the offset into the dest buffer where our first generated sample should land, and the
// offset into the source packet where we should start sampling.
int64_t dest_offset_64 = 0;
FractionalFrames<int64_t> frac_source_offset_64 =
frac_source_for_first_mix_job_frame - frac_source_for_first_packet_frame;
FractionalFrames<int64_t> frac_source_pos_edge_first_mix_frame =
frac_source_for_first_mix_job_frame + mixer->pos_filter_width();
// If the packet's first frame comes after the filter window's positive edge,
// then we should skip some frames in the destination buffer before starting to produce data.
if (frac_source_for_first_packet_frame > frac_source_pos_edge_first_mix_frame) {
const TimelineRate& dest_to_src = info.dest_frames_to_frac_source_frames.rate();
// The dest_buffer offset is based on the distance from mix job start to packet start (measured
// in frac_frames), converted into frames in the destination timeline. As we scale the
// frac_frame delta into dest frames, we want to "round up" any subframes that are present; any
// src subframes should push our dest frame up to the next integer. To do this, we subtract a
// single subframe (guaranteeing that the zero-fraction src case will truncate down), then scale
// the src delta to dest frames (which effectively truncates any resultant fraction in the
// computed dest frame), then add an additional 'round-up' frame (to account for initial
// subtract). Because we entered this IF in the first place, we have at least some fractional
// src delta, thus dest_offset_64 is guaranteed to become greater than zero.
FractionalFrames<int64_t> first_source_mix_point =
frac_source_for_first_packet_frame - frac_source_pos_edge_first_mix_frame;
dest_offset_64 = dest_to_src.Inverse().Scale(first_source_mix_point.raw_value() - 1) + 1;
FX_DCHECK(dest_offset_64 > 0);
frac_source_offset_64 += FractionalFrames<int64_t>::FromRaw(dest_to_src.Scale(dest_offset_64));
source->PartialUnderflowOccurred(frac_source_offset_64, dest_offset_64);
}
FX_DCHECK(dest_offset_64 >= 0);
FX_DCHECK(dest_offset_64 < static_cast<int64_t>(dest_frames_left));
auto dest_offset = static_cast<uint32_t>(dest_offset_64);
FX_DCHECK(frac_source_offset_64 <= std::numeric_limits<int32_t>::max());
FX_DCHECK(frac_source_offset_64 >= std::numeric_limits<int32_t>::min());
auto frac_source_offset = FractionalFrames<int32_t>(frac_source_offset_64);
// Looks like we are ready to go. Mix.
FX_DCHECK(source_buffer.length() <= FractionalFrames<uint32_t>(FractionalFrames<int32_t>::Max()));
FX_DCHECK(frac_source_offset + mixer->pos_filter_width() >= FractionalFrames<uint32_t>(0));
bool consumed_source = false;
if (frac_source_offset + mixer->pos_filter_width() < source_buffer.length()) {
// When calling Mix(), we communicate the resampling rate with three parameters. We augment
// step_size with rate_modulo and denominator arguments that capture the remaining rate
// component that cannot be expressed by a 19.13 fixed-point step_size. Note: step_size and
// frac_source_offset use the same format -- they have the same limitations in what they can and
// cannot communicate.
//
// For perfect position accuracy, just as we track incoming/outgoing fractional source offset,
// we also need to track the ongoing subframe_position_modulo. This is now added to Mix() and
// maintained across calls, but not initially set to any value other than zero. For now, we are
// deferring that work, tracking it with MTWN-128.
//
// Q: Why did we solve this issue for Rate but not for initial Position?
// A: We solved this issue for *rate* because its effect accumulates over time, causing clearly
// measurable distortion that becomes crippling with larger jobs. For *position*, there is no
// accumulated magnification over time -- in analyzing the distortion that this should cause,
// mix job size affects the distortion's frequency but not its amplitude. We expect the effects
// to be below audible thresholds. Until the effects are measurable and attributable to this
// jitter, we will defer this work.
auto prev_dest_offset = dest_offset;
auto prev_frac_source_offset = frac_source_offset;
// Check whether we are still ramping
bool ramping = info.gain.IsRamping();
if (ramping) {
info.gain.GetScaleArray(
info.scale_arr.get(),
std::min(dest_frames_left - dest_offset, Mixer::Bookkeeping::kScaleArrLen),
cur_mix_job_.reference_clock_to_destination_frame->rate());
}
{
int32_t raw_source_offset = frac_source_offset.raw_value();
consumed_source = mixer->Mix(buf, dest_frames_left, &dest_offset, source_buffer.payload(),
source_buffer.length().raw_value(), &raw_source_offset,
cur_mix_job_.accumulate);
frac_source_offset = FractionalFrames<int32_t>::FromRaw(raw_source_offset);
}
FX_DCHECK(dest_offset <= dest_frames_left);
AUD_VLOG_OBJ(SPEW, this) << " consumed from " << std::hex << std::setw(8)
<< prev_frac_source_offset.raw_value() << " to " << std::setw(8)
<< frac_source_offset.raw_value() << ", of " << std::setw(8)
<< source_buffer.length().raw_value();
// If src is ramping, advance by delta of dest_offset
if (ramping) {
info.gain.Advance(dest_offset - prev_dest_offset,
cur_mix_job_.reference_clock_to_destination_frame->rate());
}
} else {
// This packet was initially within our mix window. After realigning our sampling point to the
// nearest dest frame, it is now entirely in the past. This can only occur when down-sampling
// and is made more likely if the rate conversion ratio is very high. We've already reported
// a partial underflow when realigning, so just complete the packet and move on to the next.
consumed_source = true;
}
if (consumed_source) {
FX_DCHECK(frac_source_offset + mixer->pos_filter_width() >= source_buffer.length());
}
cur_mix_job_.frames_produced += dest_offset;
FX_DCHECK(cur_mix_job_.frames_produced <= cur_mix_job_.buf_frames);
return consumed_source;
}
void AudioOutput::SetupTrim(Mixer* mixer, zx::time now) {
TRACE_DURATION("audio", "AudioOutput::SetupTrim");
// Compute the cutoff time used to decide whether to trim packets. ForEachSource has already
// updated our transformation, no need for us to do so here.
FX_DCHECK(mixer);
int64_t local_now_ticks = (now - zx::time(0)).to_nsecs();
// RateControlBase guarantees that the transformation into the media timeline is never singular.
// If a forward transformation fails it must be because of overflow, which should be impossible
// unless user defined a playback rate where the ratio of media-ticks-to-local-ticks is greater
// than one.
trim_threshold_ = FractionalFrames<int64_t>::FromRaw(
mixer->bookkeeping().clock_mono_to_frac_source_frames(local_now_ticks));
}
bool AudioOutput::ProcessTrim(const Stream::Buffer& buffer) {
TRACE_DURATION("audio", "AudioOutput::ProcessTrim");
// If the presentation end of this packet is in the future, stop trimming.
if (buffer.end() > trim_threshold_) {
return false;
}
return true;
}
void AudioOutput::UpdateSourceTrans(const Stream& stream, Mixer::Bookkeeping* bk) {
TRACE_DURATION("audio", "AudioOutput::UpdateSourceTrans");
auto func = stream.ReferenceClockToFractionalFrames();
bk->clock_mono_to_frac_source_frames = func.first;
// If local->media transformation hasn't changed since last time, we're done.
if (bk->source_trans_gen_id == func.second) {
return;
}
// Transformation has changed. Update gen; invalidate dest-to-src generation.
bk->source_trans_gen_id = func.second;
bk->dest_trans_gen_id = kInvalidGenerationId;
}
void AudioOutput::UpdateDestTrans(const MixJob& job, Mixer::Bookkeeping* bk) {
TRACE_DURATION("audio", "AudioOutput::UpdateDestTrans");
// We should only be here if we have a valid mix job. This means a job which supplies a valid
// transformation from local time to output frames.
FX_DCHECK(job.reference_clock_to_destination_frame);
FX_DCHECK(job.reference_clock_to_destination_frame_gen != kInvalidGenerationId);
// If generations match, don't re-compute -- just use what we have already.
if (bk->dest_trans_gen_id == job.reference_clock_to_destination_frame_gen) {
return;
}
// Assert we can map from local time to fractional renderer frames.
FX_DCHECK(bk->source_trans_gen_id != kInvalidGenerationId);
// Combine the job-supplied local-to-output transformation, with the renderer-supplied mapping of
// local-to-input-subframe, to produce a transformation which maps from output frames to
// fractional input frames.
TimelineFunction& dest = bk->dest_frames_to_frac_source_frames;
dest = bk->clock_mono_to_frac_source_frames * job.reference_clock_to_destination_frame->Inverse();
// Finally, compute the step size in subframes. IOW, every time we move forward one output frame,
// how many input subframes should we consume. Don't bother doing the multiplications if already
// we know the numerator is zero.
FX_DCHECK(dest.rate().reference_delta());
if (!dest.rate().subject_delta()) {
bk->step_size = 0;
bk->denominator = 0; // shouldn't also need to clear rate_mod and pos_mod
} else {
int64_t tmp_step_size = dest.rate().Scale(1);
FX_DCHECK(tmp_step_size >= 0);
FX_DCHECK(tmp_step_size <= std::numeric_limits<uint32_t>::max());
bk->step_size = static_cast<uint32_t>(tmp_step_size);
bk->denominator = bk->SnapshotDenominatorFromDestTrans();
bk->rate_modulo = dest.rate().subject_delta() - (bk->denominator * bk->step_size);
}
// Done, update our dest_trans generation.
bk->dest_trans_gen_id = job.reference_clock_to_destination_frame_gen;
}
void AudioOutput::Cleanup() {
AudioDevice::Cleanup();
mix_timer_.Cancel();
}
} // namespace media::audio