blob: 0d365d5a18e0edb69c9e9e104b527f0d464d37b6 [file] [log] [blame] [edit]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
#include <fuchsia/media/cpp/fidl.h>
#include <lib/async-loop/cpp/loop.h>
#include <lib/async-loop/default.h>
#include <lib/async/cpp/task.h>
#include <lib/fit/defer.h>
#include <lib/fzl/vmo-mapper.h>
#include <lib/sys/cpp/component_context.h>
#include <lib/syslog/cpp/macros.h>
#include <lib/zx/clock.h>
#include <lib/zx/vmo.h>
#include "src/lib/fxl/command_line.h"
#include "src/lib/fxl/strings/string_printf.h"
#include "src/media/audio/lib/analysis/analysis.h"
#include "src/media/audio/lib/analysis/generators.h"
#include "src/media/audio/lib/clock/clone_mono.h"
#include "src/media/audio/lib/clock/utils.h"
#include "src/media/audio/lib/format/audio_buffer.h"
#include "src/media/audio/lib/format/format.h"
#include "src/media/audio/lib/logging/cli.h"
#include "src/media/audio/lib/wav/wav_writer.h"
using ASF = fuchsia::media::AudioSampleFormat;
namespace {
constexpr auto kChannelCount = 1;
constexpr auto kFrameRate = 96000;
constexpr auto kFramesPerCapturePacket = kFrameRate * 2 / 1000; // 2ms
constexpr auto kImpulseRingInFrames = kFrameRate * 10 / 1000; // 10ms
constexpr auto kImpulseRingInDuration = zx::msec(10);
constexpr auto kImpulseFrames = 10;
constexpr auto kImpulseMagnitude = 0.75;
const auto kImpulseFormat =
media::audio::Format::Create<ASF::FLOAT>(kChannelCount, kFrameRate).value();
const auto kCaptureFormat =
media::audio::Format::Create<ASF::FLOAT>(kChannelCount, kFrameRate).value();
// Given perfect math and full-volume output, the impulse is a step function with
// magnitude kImpulseMagnitude. Due to quantization and internal scaling, we may
// see different values. Also, on some devices, the microphone picks up sounds at
// a much lower volume than the output. Empirically, the following threshold works
// well on an Astro device at full volume.
constexpr float kNoiseFloor = 0.01f;
bool verbose = false;
zx::time global_start_time_mono;
async::Loop* loop;
void Shutdown() {
async::PostTask(loop->dispatcher(), []() { loop->Quit(); });
}
zx::clock DupClock() {
// Use the same clock for all renderers and capturers so everything is sync'd up.
// Currently we're using the system monotonic clock.
return media::audio::clock::CloneOfMonotonic();
}
double DurationToFrames(zx::duration d) {
return static_cast<double>(d.to_nsecs()) * kFrameRate / 1e9;
}
std::string SprintDuration(zx::duration d) {
return fxl::StringPrintf("%ld ns (%f frames)", d.to_nsecs(), DurationToFrames(d));
}
class Barrier {
public:
explicit Barrier(size_t size) : size_(size) {}
void Wait(std::function<void()> ready_cb) {
callbacks_.push_back(std::move(ready_cb));
size_--;
if (size_ == 0) {
for (auto& cb : callbacks_) {
cb();
}
}
}
private:
size_t size_;
std::vector<std::function<void()>> callbacks_;
};
class Capture {
public:
Capture(fuchsia::media::AudioCorePtr& audio, bool is_loopback, const std::string& filename,
Barrier& barrier)
: filename_(filename), format_(kCaptureFormat), barrier_(barrier), buffer_(format_, 0) {
// Create the WAV file writer.
CLI_CHECK(wav_writer_.Initialize(filename_.c_str(), format_.sample_format(),
static_cast<uint16_t>(format_.channels()),
format_.frames_per_second(),
static_cast<uint16_t>(format_.bytes_per_sample() * 8)),
"Could not create " << filename);
// Create the capturer.
audio->CreateAudioCapturer(is_loopback, capturer_.NewRequest());
capturer_.set_error_handler([this](zx_status_t status) {
printf("Capturer for %s failed with status %d.\n", filename_.c_str(), status);
Shutdown();
});
capturer_->SetReferenceClock(DupClock());
capturer_->SetPcmStreamType(format_.stream_type());
SetupPayloadBuffer();
capturer_->GetReferenceClock([this](zx::clock c) {
clock_ = std::move(c);
barrier_.Wait([this]() { Start(); });
});
}
~Capture() {
printf("Closing %s (%lu frames, %lu bytes)\n", filename_.c_str(), buffer_.NumFrames(),
buffer_.NumBytes());
CLI_CHECK(wav_writer_.Close(), "Could not close " << filename_);
}
void Stop() {
capturer_.events().OnPacketProduced = nullptr;
capturer_->StopAsyncCaptureNoReply();
}
// Given a list of times where we expect to see signals, return a list of times where
// signals are actually detected, using -1 when a signal cannot be detected.
std::vector<zx::time> FindSounds(std::vector<zx::time> expected_times_mono) {
std::vector<zx::time> out;
for (auto expected_time_mono : expected_times_mono) {
int64_t expected_frame = frames_to_mono_time_.Inverse().Apply(expected_time_mono.get());
// If everything goes perfectly, we should find the signal at exactly expected_time_mono
// for the loopback capture and slightly later for the microphone capture. Signals are
// separated by 1s. To account for signals that might be way off, search +/- 250ms around
// the expected time.
auto search_time_start = expected_time_mono - zx::msec(250);
int64_t search_frame_start =
std::max(0l, frames_to_mono_time_.Inverse().Apply(search_time_start.get()));
int64_t search_frame_end =
std::min(search_frame_start + format_.frames_per_ns().Scale(ZX_MSEC(500)),
static_cast<int64_t>(buffer_.NumFrames()));
auto slice = media::audio::AudioBufferSlice(&buffer_, search_frame_start, search_frame_end);
auto max_frame = FindImpulseLeadingEdge(slice, kNoiseFloor);
if (verbose) {
printf("[verbose] searched through frames %lu to %lu\n", search_frame_start,
search_frame_end);
for (auto f = search_frame_start; f < search_frame_end; f++) {
// Print if this value or one of the next 5 values exceeds the noise floor.
bool print = false;
for (auto k = f; k < f + 5 && k < search_frame_end; k++) {
print = print || (buffer_.SampleAt(k, 0) > kNoiseFloor);
}
if (print) {
auto val = buffer_.SampleAt(f, 0);
int64_t slice_index = f - search_frame_start;
printf("[verbose] frame %lu, sample %f%s%s\n", f, val,
(max_frame && slice_index == *max_frame) ? " (left edge)" : "",
(slice_index == (expected_frame - search_frame_start)) ? " (expected)" : "");
}
}
}
if (!max_frame) {
out.push_back(zx::time(-1));
continue;
}
auto left_edge = *max_frame + search_frame_start;
out.push_back(zx::time(frames_to_mono_time_.Apply(left_edge)));
if (verbose) {
printf("[verbose] *** signal estimated at frame %lu, expected signal at frame %lu\n",
left_edge, expected_frame);
}
}
return out;
}
private:
void SetupPayloadBuffer() {
const auto frames_per_payload = format_.frames_per_second(); // 1s
const auto bytes_per_payload = frames_per_payload * format_.bytes_per_frame();
zx::vmo vmo;
auto status = vmo_mapper_.CreateAndMap(
bytes_per_payload, ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, nullptr, &vmo,
ZX_RIGHT_READ | ZX_RIGHT_WRITE | ZX_RIGHT_MAP | ZX_RIGHT_TRANSFER);
CLI_CHECK_OK(status, "Failed to create " << bytes_per_payload << "-byte payload buffer");
memset(PayloadStart(), 0, bytes_per_payload);
capturer_->AddPayloadBuffer(0, std::move(vmo));
}
void Start() {
printf("Starting capture to %s\n", filename_.c_str());
capturer_.events().OnPacketProduced = [this](fuchsia::media::StreamPacket pkt) {
OnPacket(pkt);
};
capturer_->StartAsyncCapture(kFramesPerCapturePacket);
}
void OnPacket(fuchsia::media::StreamPacket pkt) {
auto cleanup = fit::defer([this, pkt]() { capturer_->ReleasePacket(pkt); });
if (!wrote_first_packet_) {
// The first output frame should occur at global_start_time_mono.
// Write enough silence to cover the time between then and this packet's PTS.
auto packet_time_mono =
media::audio::clock::MonotonicTimeFromReferenceTime(clock_, zx::time(pkt.pts)).value();
auto duration = packet_time_mono - global_start_time_mono;
FX_CHECK(duration.get() > 0) << duration.get();
auto num_silent_frames = format_.frames_per_ns().Scale(duration.get());
if (verbose) {
printf("[verbose] Writing %ld silent frames to the start of %s\n", num_silent_frames,
filename_.c_str());
}
std::vector<char> buffer(num_silent_frames * format_.bytes_per_frame());
if (!wav_writer_.Write(reinterpret_cast<void*>(&buffer[0]),
static_cast<uint32_t>(buffer.size()))) {
printf("First write failed.\n");
CLI_CHECK(wav_writer_.Close(), "File close failed as well.");
Shutdown();
}
wrote_first_packet_ = true;
frames_to_mono_time_ =
media::TimelineFunction(packet_time_mono.get(), 0, format_.frames_per_ns().Inverse());
} else {
if (pkt.flags & fuchsia::media::STREAM_PACKET_FLAG_DISCONTINUITY) {
printf("WARNING: found discontinuity within recording of %s\n", filename_.c_str());
}
}
if (!pkt.payload_size) {
return;
}
// Append this packet to the WAV file.
auto first_byte = PayloadStart() + pkt.payload_offset;
if (!wav_writer_.Write(reinterpret_cast<void*>(first_byte),
static_cast<uint32_t>(pkt.payload_size))) {
printf("File write failed. Trying to save any already-written data.\n");
CLI_CHECK(wav_writer_.Close(), "File close failed as well.");
Shutdown();
}
// Also save the full audio as an in-memory buffer.
float* first_sample = reinterpret_cast<float*>(first_byte);
buffer_.samples().insert(buffer_.samples().end(), first_sample,
first_sample + pkt.payload_size / format_.bytes_per_sample());
}
uint8_t* PayloadStart() const { return reinterpret_cast<uint8_t*>(vmo_mapper_.start()); }
const std::string filename_;
const media::audio::TypedFormat<ASF::FLOAT> format_;
Barrier& barrier_;
fuchsia::media::AudioCapturerPtr capturer_;
media::audio::WavWriter<> wav_writer_;
fzl::VmoMapper vmo_mapper_;
zx::clock clock_;
bool wrote_first_packet_ = false;
media::TimelineFunction frames_to_mono_time_;
media::audio::AudioBuffer<ASF::FLOAT> buffer_;
};
void PlaySound(fuchsia::media::AudioCorePtr& audio, zx::clock reference_clock,
zx::time reference_time, media::audio::AudioBuffer<ASF::FLOAT> sound) {
// Create a renderer.
// We wrap this in a shared_ptr so it can live until the sound is fully rendered.
auto holder = std::make_shared<fuchsia::media::AudioRendererPtr>();
auto& r = *holder;
audio->CreateAudioRenderer(r.NewRequest());
r.set_error_handler([](zx_status_t status) {
printf("PlaySound renderer failed with status %d.\n", status);
Shutdown();
});
r->SetReferenceClock(std::move(reference_clock));
r->SetUsage2(fuchsia::media::AudioRenderUsage2::MEDIA);
r->SetPcmStreamType(kImpulseFormat.stream_type());
// Setup the payload.
fzl::VmoMapper vmo_mapper;
zx::vmo vmo;
auto status =
vmo_mapper.CreateAndMap(sound.NumBytes(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, nullptr, &vmo,
ZX_RIGHT_READ | ZX_RIGHT_MAP | ZX_RIGHT_TRANSFER);
CLI_CHECK_OK(status, "Failed to create " << sound.NumBytes() << "-byte payload buffer");
memmove(reinterpret_cast<uint8_t*>(vmo_mapper.start()),
reinterpret_cast<uint8_t*>(&sound.samples()[0]), sound.NumBytes());
r->AddPayloadBuffer(0, std::move(vmo));
auto pkt = fuchsia::media::StreamPacket{
.pts = 0,
.payload_buffer_id = 0,
.payload_offset = 0,
.payload_size = static_cast<size_t>(sound.NumBytes()),
};
// Play this sound and tear down the renderer once the sound has been played.
r->SendPacket(pkt, [holder]() mutable {
printf("Played sound\n");
holder->Unbind();
});
r->Play(reference_time.get(), 0,
[reference_time](int64_t play_ref_time, int64_t play_media_time) {
if (play_ref_time != reference_time.get()) {
printf("WARNING: Play() changed the reference time by %ld ns\n",
play_ref_time - reference_time.get());
}
if (play_media_time != 0) {
printf("WARNING: Play() changed the media time from 0 to %ld\n", play_media_time);
}
});
}
void CheckAlignment(std::vector<zx::time> play_times, std::vector<zx::time> microphone_times,
std::vector<zx::time> loopback_times) {
printf("============================================\n");
printf("Alignment\n");
printf("\n");
printf("Ideally, the loopback should be perfectly aligned with the renderer and the\n");
printf("microphone should occur slightly later due to propagation delay between the\n");
printf("speaker and microphone (assuming 6\" separation, the delay should be 437us).\n");
printf("\n");
int tests_pass = 0;
int tests_unknown = 0;
for (size_t k = 0; k < play_times.size(); k++) {
auto rt = play_times[k] - global_start_time_mono;
auto mt = microphone_times[k] - global_start_time_mono;
auto lt = loopback_times[k] - global_start_time_mono;
printf("Sound %lu\n", k);
printf(" render @ %ld ns\n", rt.to_nsecs());
if (mt.get() > 0) {
printf(" microphone @ %ld ns, render - microphone = %s\n", mt.to_nsecs(),
SprintDuration(rt - mt).c_str());
} else {
printf(" not found in microphone\n");
}
if (lt.get() > 0) {
printf(" loopback @ %ld ns, render - loopback = %s", lt.to_nsecs(),
SprintDuration(rt - lt).c_str());
if (mt.get() > 0) {
printf(", microphone - loopback = %s", SprintDuration(mt - lt).c_str());
}
printf("\n");
} else {
printf(" not found in loopback\n");
}
if (mt.get() > 0 && lt.get() > 0) {
bool pass = true;
// Loopback timestamp must match the render timestamp.
if (auto delta_frames = std::abs(DurationToFrames(rt - lt)); delta_frames > 1) {
pass = false;
printf(" failed: loopback not aligned with renderer\n");
}
// Microphone timestamp must be beyond the loopback timestamp by at most 100ms.
if (mt.get() < lt.get()) {
pass = false;
printf(" failed: microphone timestamp before loopback timestamp\n");
} else if (mt - lt > zx::msec(100)) {
pass = false;
printf(" failed: microphone timestamp more than 100ms after loopback timestamp\n");
}
if (pass) {
printf(" passed\n");
tests_pass++;
}
} else {
tests_unknown++;
}
printf("\n");
}
printf("Results\n");
printf(" %d passed\n", tests_pass);
printf(" %lu failed\n", play_times.size() - (tests_pass + tests_unknown));
printf(" %d could not locate timestamps\n", tests_unknown);
printf("\n");
}
} // namespace
int main(int argc, const char** argv) {
loop = new async::Loop(&kAsyncLoopConfigAttachToCurrentThread);
auto ctx = sys::ComponentContext::CreateAndServeOutgoingDirectory();
auto command_line = fxl::CommandLineFromArgcArgv(argc, argv);
if (command_line.HasOption("help")) {
printf("Usage: audio-capture-timestamp-validator [--duration-seconds=10] [--verbose]\n");
printf("\n");
printf("This tool helps to debug capture timestamp issues. It does three things\n");
printf("concurrently:\n");
printf("\n");
printf(" 1. Plays a short impulse once per second\n");
printf(" 2. Captures the loopback interface\n");
printf(" 3. Captures the microphone interface\n");
printf("\n");
printf("The tool then compares the timestamps at which the impulses are captured by\n");
printf("the loopback and microphone interfaces. Microphone timestamps should occur\n");
printf("strictly after loopback timestamps. Direct open-air acoustic propagation is\n");
printf("approximately 1 ft/ms; many full-duplex algorithms accommodate environmental\n");
printf("delays of up to 100 ms.\n");
printf("\n");
printf("The captured audio is saved to WAV files for futher debugging.\n");
return 0;
}
verbose = command_line.HasOption("verbose");
printf("WARNING: Volume will be increased to 100%% temporarily. If the tool does not\n");
printf(" shut down cleanly, the volume may not be restored. For most accurate\n");
printf(" results, run in a quiet environment.\n");
std::string duration_str;
int64_t duration_seconds = 10;
if (command_line.GetOptionValue("duration-seconds", &duration_str)) {
CLI_CHECK(sscanf(duration_str.c_str(), "%li", &duration_seconds) == 1,
"--duration_seconds must be an integer");
CLI_CHECK(duration_seconds > 0, "--duration_seconds must be positive");
}
// Set the volume to 100%.
fuchsia::media::AudioCorePtr audio_core = ctx->svc()->Connect<fuchsia::media::AudioCore>();
fuchsia::media::audio::VolumeControlPtr volume_control;
audio_core->BindUsageVolumeControl2(fuchsia::media::Usage2::WithRenderUsage(
fidl::Clone(fuchsia::media::AudioRenderUsage2::MEDIA)),
volume_control.NewRequest());
std::optional<float> old_volume;
volume_control.events().OnVolumeMuteChanged = [&old_volume](float v, bool muted) {
if (!old_volume) {
printf("Saving old volume: %f\n", v);
old_volume = v;
}
};
while (!old_volume) {
loop->RunUntilIdle();
}
volume_control->SetVolume(1.0);
// Restore volume on exit.
auto restore_volume =
fit::defer([old_volume, &volume_control]() { volume_control->SetVolume(*old_volume); });
// Create an impulse signal prefixed by a silent ring in.
auto packet = GenerateSilentAudio(kImpulseFormat, kImpulseRingInFrames);
auto impulse = GenerateConstantAudio(kImpulseFormat, kImpulseFrames, kImpulseMagnitude);
packet.Append(&impulse);
// Play an impulse every second. Play the first sound at least 1s-RingIn in the future so
// it's well-beyond the renderer's MinLeadTime and so we have plenty of time to setup the
// capturers before the first sound is played.
global_start_time_mono = zx::clock::get_monotonic();
std::vector<zx::time> play_times;
for (auto k = 1; k < duration_seconds; k++) {
auto t = global_start_time_mono + zx::sec(k);
PlaySound(audio_core, DupClock(), t, packet);
play_times.push_back(t + kImpulseRingInDuration);
}
// Start the capturers.
// We use a barrier to align the start time of the output wav files.
auto barrier = std::make_unique<Barrier>(2);
auto microphone = std::make_unique<Capture>(audio_core, false, "/tmp/microphone.wav", *barrier);
auto loopback = std::make_unique<Capture>(audio_core, true, "/tmp/loopback.wav", *barrier);
loop->Run(zx::clock::get_monotonic() + zx::sec(duration_seconds));
microphone->Stop();
loopback->Stop();
loop->RunUntilIdle();
// Check alignment.
if (verbose) {
printf("[verbose] Looking for sounds in the microphone capture\n");
}
auto microphone_times = microphone->FindSounds(play_times);
if (verbose) {
printf("[verbose] Looking for sounds in the loopback capture\n");
}
auto loopback_times = loopback->FindSounds(play_times);
CheckAlignment(play_times, microphone_times, loopback_times);
return 0;
}