src/media/audio/tools/audio-capture-timestamp-validator/main.cc - fuchsia - Git at Google

 // Copyright 2020 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.

 #include <fuchsia/media/cpp/fidl.h>
 #include <lib/async-loop/cpp/loop.h>
 #include <lib/async-loop/default.h>
 #include <lib/async/cpp/task.h>
 #include <lib/fit/defer.h>
 #include <lib/fzl/vmo-mapper.h>
 #include <lib/sys/cpp/component_context.h>
 #include <lib/syslog/cpp/macros.h>
 #include <lib/zx/clock.h>
 #include <lib/zx/vmo.h>

 #include "src/lib/fxl/command_line.h"
 #include "src/lib/fxl/strings/string_printf.h"
 #include "src/media/audio/lib/analysis/analysis.h"
 #include "src/media/audio/lib/analysis/generators.h"
 #include "src/media/audio/lib/clock/clone_mono.h"
 #include "src/media/audio/lib/clock/utils.h"
 #include "src/media/audio/lib/format/audio_buffer.h"
 #include "src/media/audio/lib/format/format.h"
 #include "src/media/audio/lib/logging/cli.h"
 #include "src/media/audio/lib/wav/wav_writer.h"

 using ASF = fuchsia::media::AudioSampleFormat;

 namespace {
 constexpr auto kChannelCount = 1;
 constexpr auto kFrameRate = 96000;
 constexpr auto kFramesPerCapturePacket = kFrameRate * 2 / 1000;  // 2ms
 constexpr auto kImpulseRingInFrames = kFrameRate * 10 / 1000;    // 10ms
 constexpr auto kImpulseRingInDuration = zx::msec(10);
 constexpr auto kImpulseFrames = 10;
 constexpr auto kImpulseMagnitude = 0.75;
 const auto kImpulseFormat =
     media::audio::Format::Create<ASF::FLOAT>(kChannelCount, kFrameRate).value();
 const auto kCaptureFormat =
     media::audio::Format::Create<ASF::FLOAT>(kChannelCount, kFrameRate).value();

 // Given perfect math and full-volume output, the impulse is a step function with
 // magnitude kImpulseMagnitude. Due to quantization and internal scaling, we may
 // see different values. Also, on some devices, the microphone picks up sounds at
 // a much lower volume than the output. Empirically, the following threshold works
 // well on an Astro device at full volume.
 constexpr float kNoiseFloor = 0.01f;

 bool verbose = false;
 zx::time global_start_time_mono;
 async::Loop* loop;

 void Shutdown() {
   async::PostTask(loop->dispatcher(), []() { loop->Quit(); });
 }

 zx::clock DupClock() {
   // Use the same clock for all renderers and capturers so everything is sync'd up.
   // Currently we're using the system monotonic clock.
   return media::audio::clock::CloneOfMonotonic();
 }

 double DurationToFrames(zx::duration d) {
   return static_cast<double>(d.to_nsecs()) * kFrameRate / 1e9;
 }

 std::string SprintDuration(zx::duration d) {
   return fxl::StringPrintf("%ld ns (%f frames)", d.to_nsecs(), DurationToFrames(d));
 }

 class Barrier {
  public:
   explicit Barrier(size_t size) : size_(size) {}

   void Wait(std::function<void()> ready_cb) {
     callbacks_.push_back(std::move(ready_cb));
     size_--;
     if (size_ == 0) {
       for (auto& cb : callbacks_) {
         cb();
       }
     }
   }

  private:
   size_t size_;
   std::vector<std::function<void()>> callbacks_;
 };

 class Capture {
  public:
   Capture(fuchsia::media::AudioCorePtr& audio, bool is_loopback, const std::string& filename,
           Barrier& barrier)
       : filename_(filename), format_(kCaptureFormat), barrier_(barrier), buffer_(format_, 0) {
     // Create the WAV file writer.
     CLI_CHECK(wav_writer_.Initialize(filename_.c_str(), format_.sample_format(),
                                      static_cast<uint16_t>(format_.channels()),
                                      format_.frames_per_second(),
                                      static_cast<uint16_t>(format_.bytes_per_sample() * 8)),
               "Could not create " << filename);

     // Create the capturer.
     audio->CreateAudioCapturer(is_loopback, capturer_.NewRequest());
     capturer_.set_error_handler([this](zx_status_t status) {
       printf("Capturer for %s failed with status %d.\n", filename_.c_str(), status);
       Shutdown();
     });
     capturer_->SetReferenceClock(DupClock());
     capturer_->SetPcmStreamType(format_.stream_type());
     SetupPayloadBuffer();
     capturer_->GetReferenceClock([this](zx::clock c) {
       clock_ = std::move(c);
       barrier_.Wait([this]() { Start(); });
     });
   }

   ~Capture() {
     printf("Closing %s (%lu frames, %lu bytes)\n", filename_.c_str(), buffer_.NumFrames(),
            buffer_.NumBytes());
     CLI_CHECK(wav_writer_.Close(), "Could not close " << filename_);
   }

   void Stop() {
     capturer_.events().OnPacketProduced = nullptr;
     capturer_->StopAsyncCaptureNoReply();
   }

   // Given a list of times where we expect to see signals, return a list of times where
   // signals are actually detected, using -1 when a signal cannot be detected.
   std::vector<zx::time> FindSounds(std::vector<zx::time> expected_times_mono) {
     std::vector<zx::time> out;

     for (auto expected_time_mono : expected_times_mono) {
       int64_t expected_frame = frames_to_mono_time_.Inverse().Apply(expected_time_mono.get());

       // If everything goes perfectly, we should find the signal at exactly expected_time_mono
       // for the loopback capture and slightly later for the microphone capture. Signals are
       // separated by 1s. To account for signals that might be way off, search +/- 250ms around
       // the expected time.
       auto search_time_start = expected_time_mono - zx::msec(250);
       int64_t search_frame_start =
           std::max(0l, frames_to_mono_time_.Inverse().Apply(search_time_start.get()));
       int64_t search_frame_end =
           std::min(search_frame_start + format_.frames_per_ns().Scale(ZX_MSEC(500)),
                    static_cast<int64_t>(buffer_.NumFrames()));

       auto slice = media::audio::AudioBufferSlice(&buffer_, search_frame_start, search_frame_end);
       auto max_frame = FindImpulseLeadingEdge(slice, kNoiseFloor);

       if (verbose) {
         printf("[verbose] searched through frames %lu to %lu\n", search_frame_start,
                search_frame_end);
         for (auto f = search_frame_start; f < search_frame_end; f++) {
           // Print if this value or one of the next 5 values exceeds the noise floor.
           bool print = false;
           for (auto k = f; k < f + 5 && k < search_frame_end; k++) {
             print = print || (buffer_.SampleAt(k, 0) > kNoiseFloor);
           }
           if (print) {
             auto val = buffer_.SampleAt(f, 0);
             int64_t slice_index = f - search_frame_start;
             printf("[verbose] frame %lu, sample %f%s%s\n", f, val,
                    (max_frame && slice_index == *max_frame) ? " (left edge)" : "",
                    (slice_index == (expected_frame - search_frame_start)) ? " (expected)" : "");
           }
         }
       }

       if (!max_frame) {
         out.push_back(zx::time(-1));
         continue;
       }

       auto left_edge = *max_frame + search_frame_start;
       out.push_back(zx::time(frames_to_mono_time_.Apply(left_edge)));
       if (verbose) {
         printf("[verbose] *** signal estimated at frame %lu, expected signal at frame %lu\n",
                left_edge, expected_frame);
       }
     }

     return out;
   }

  private:
   void SetupPayloadBuffer() {
     const auto frames_per_payload = format_.frames_per_second();  // 1s
     const auto bytes_per_payload = frames_per_payload * format_.bytes_per_frame();

     zx::vmo vmo;
     auto status = vmo_mapper_.CreateAndMap(
         bytes_per_payload, ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, nullptr, &vmo,
         ZX_RIGHT_READ | ZX_RIGHT_WRITE | ZX_RIGHT_MAP | ZX_RIGHT_TRANSFER);
     CLI_CHECK_OK(status, "Failed to create " << bytes_per_payload << "-byte payload buffer");
     memset(PayloadStart(), 0, bytes_per_payload);
     capturer_->AddPayloadBuffer(0, std::move(vmo));
   }

   void Start() {
     printf("Starting capture to %s\n", filename_.c_str());
     capturer_.events().OnPacketProduced = [this](fuchsia::media::StreamPacket pkt) {
       OnPacket(pkt);
     };
     capturer_->StartAsyncCapture(kFramesPerCapturePacket);
   }

   void OnPacket(fuchsia::media::StreamPacket pkt) {
     auto cleanup = fit::defer([this, pkt]() { capturer_->ReleasePacket(pkt); });

     if (!wrote_first_packet_) {
       // The first output frame should occur at global_start_time_mono.
       // Write enough silence to cover the time between then and this packet's PTS.
       auto packet_time_mono =
           media::audio::clock::MonotonicTimeFromReferenceTime(clock_, zx::time(pkt.pts)).value();
       auto duration = packet_time_mono - global_start_time_mono;
       FX_CHECK(duration.get() > 0) << duration.get();

       auto num_silent_frames = format_.frames_per_ns().Scale(duration.get());
       if (verbose) {
         printf("[verbose] Writing %ld silent frames to the start of %s\n", num_silent_frames,
                filename_.c_str());
       }

       std::vector<char> buffer(num_silent_frames * format_.bytes_per_frame());
       if (!wav_writer_.Write(reinterpret_cast<void*>(&buffer[0]),
                              static_cast<uint32_t>(buffer.size()))) {
         printf("First write failed.\n");
         CLI_CHECK(wav_writer_.Close(), "File close failed as well.");
         Shutdown();
       }

       wrote_first_packet_ = true;
       frames_to_mono_time_ =
           media::TimelineFunction(packet_time_mono.get(), 0, format_.frames_per_ns().Inverse());
     } else {
       if (pkt.flags & fuchsia::media::STREAM_PACKET_FLAG_DISCONTINUITY) {
         printf("WARNING: found discontinuity within recording of %s\n", filename_.c_str());
       }
     }

     if (!pkt.payload_size) {
       return;
     }

     // Append this packet to the WAV file.
     auto first_byte = PayloadStart() + pkt.payload_offset;
     if (!wav_writer_.Write(reinterpret_cast<void*>(first_byte),
                            static_cast<uint32_t>(pkt.payload_size))) {
       printf("File write failed. Trying to save any already-written data.\n");
       CLI_CHECK(wav_writer_.Close(), "File close failed as well.");
       Shutdown();
     }

     // Also save the full audio as an in-memory buffer.
     float* first_sample = reinterpret_cast<float*>(first_byte);
     buffer_.samples().insert(buffer_.samples().end(), first_sample,
                              first_sample + pkt.payload_size / format_.bytes_per_sample());
   }

   uint8_t* PayloadStart() const { return reinterpret_cast<uint8_t*>(vmo_mapper_.start()); }

   const std::string filename_;
   const media::audio::TypedFormat<ASF::FLOAT> format_;
   Barrier& barrier_;
   fuchsia::media::AudioCapturerPtr capturer_;
   media::audio::WavWriter<> wav_writer_;
   fzl::VmoMapper vmo_mapper_;
   zx::clock clock_;

   bool wrote_first_packet_ = false;

   media::TimelineFunction frames_to_mono_time_;
   media::audio::AudioBuffer<ASF::FLOAT> buffer_;
 };

 void PlaySound(fuchsia::media::AudioCorePtr& audio, zx::clock reference_clock,
                zx::time reference_time, media::audio::AudioBuffer<ASF::FLOAT> sound) {
   // Create a renderer.
   // We wrap this in a shared_ptr so it can live until the sound is fully rendered.
   auto holder = std::make_shared<fuchsia::media::AudioRendererPtr>();
   auto& r = *holder;
   audio->CreateAudioRenderer(r.NewRequest());
   r.set_error_handler([](zx_status_t status) {
     printf("PlaySound renderer failed with status %d.\n", status);
     Shutdown();
   });
   r->SetReferenceClock(std::move(reference_clock));
   r->SetUsage2(fuchsia::media::AudioRenderUsage2::MEDIA);
   r->SetPcmStreamType(kImpulseFormat.stream_type());

   // Setup the payload.
   fzl::VmoMapper vmo_mapper;
   zx::vmo vmo;
   auto status =
       vmo_mapper.CreateAndMap(sound.NumBytes(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, nullptr, &vmo,
                               ZX_RIGHT_READ | ZX_RIGHT_MAP | ZX_RIGHT_TRANSFER);
   CLI_CHECK_OK(status, "Failed to create " << sound.NumBytes() << "-byte payload buffer");
   memmove(reinterpret_cast<uint8_t*>(vmo_mapper.start()),
           reinterpret_cast<uint8_t*>(&sound.samples()[0]), sound.NumBytes());
   r->AddPayloadBuffer(0, std::move(vmo));
   auto pkt = fuchsia::media::StreamPacket{
       .pts = 0,
       .payload_buffer_id = 0,
       .payload_offset = 0,
       .payload_size = static_cast<size_t>(sound.NumBytes()),
   };

   // Play this sound and tear down the renderer once the sound has been played.
   r->SendPacket(pkt, [holder]() mutable {
     printf("Played sound\n");
     holder->Unbind();
   });
   r->Play(reference_time.get(), 0,
           [reference_time](int64_t play_ref_time, int64_t play_media_time) {
             if (play_ref_time != reference_time.get()) {
               printf("WARNING: Play() changed the reference time by %ld ns\n",
                      play_ref_time - reference_time.get());
             }
             if (play_media_time != 0) {
               printf("WARNING: Play() changed the media time from 0 to %ld\n", play_media_time);
             }
           });
 }

 void CheckAlignment(std::vector<zx::time> play_times, std::vector<zx::time> microphone_times,
                     std::vector<zx::time> loopback_times) {
   printf("============================================\n");
   printf("Alignment\n");
   printf("\n");
   printf("Ideally, the loopback should be perfectly aligned with the renderer and the\n");
   printf("microphone should occur slightly later due to propagation delay between the\n");
   printf("speaker and microphone (assuming 6\" separation, the delay should be 437us).\n");
   printf("\n");

   int tests_pass = 0;
   int tests_unknown = 0;

   for (size_t k = 0; k < play_times.size(); k++) {
     auto rt = play_times[k] - global_start_time_mono;
     auto mt = microphone_times[k] - global_start_time_mono;
     auto lt = loopback_times[k] - global_start_time_mono;

     printf("Sound %lu\n", k);
     printf("  render @ %ld ns\n", rt.to_nsecs());

     if (mt.get() > 0) {
       printf("  microphone @ %ld ns, render - microphone = %s\n", mt.to_nsecs(),
              SprintDuration(rt - mt).c_str());
     } else {
       printf("  not found in microphone\n");
     }

     if (lt.get() > 0) {
       printf("  loopback @ %ld ns, render - loopback = %s", lt.to_nsecs(),
              SprintDuration(rt - lt).c_str());
       if (mt.get() > 0) {
         printf(", microphone - loopback = %s", SprintDuration(mt - lt).c_str());
       }
       printf("\n");
     } else {
       printf("  not found in loopback\n");
     }

     if (mt.get() > 0 && lt.get() > 0) {
       bool pass = true;

       // Loopback timestamp must match the render timestamp.
       if (auto delta_frames = std::abs(DurationToFrames(rt - lt)); delta_frames > 1) {
         pass = false;
         printf("  failed: loopback not aligned with renderer\n");
       }
       // Microphone timestamp must be beyond the loopback timestamp by at most 100ms.
       if (mt.get() < lt.get()) {
         pass = false;
         printf("  failed: microphone timestamp before loopback timestamp\n");
       } else if (mt - lt > zx::msec(100)) {
         pass = false;
         printf("  failed: microphone timestamp more than 100ms after loopback timestamp\n");
       }

       if (pass) {
         printf("  passed\n");
         tests_pass++;
       }
     } else {
       tests_unknown++;
     }

     printf("\n");
   }

   printf("Results\n");
   printf("  %d passed\n", tests_pass);
   printf("  %lu failed\n", play_times.size() - (tests_pass + tests_unknown));
   printf("  %d could not locate timestamps\n", tests_unknown);
   printf("\n");
 }
 }  // namespace

 int main(int argc, const char** argv) {
   loop = new async::Loop(&kAsyncLoopConfigAttachToCurrentThread);
   auto ctx = sys::ComponentContext::CreateAndServeOutgoingDirectory();
   auto command_line = fxl::CommandLineFromArgcArgv(argc, argv);

   if (command_line.HasOption("help")) {
     printf("Usage: audio-capture-timestamp-validator [--duration-seconds=10] [--verbose]\n");
     printf("\n");
     printf("This tool helps to debug capture timestamp issues. It does three things\n");
     printf("concurrently:\n");
     printf("\n");
     printf("  1. Plays a short impulse once per second\n");
     printf("  2. Captures the loopback interface\n");
     printf("  3. Captures the microphone interface\n");
     printf("\n");
     printf("The tool then compares the timestamps at which the impulses are captured by\n");
     printf("the loopback and microphone interfaces. Microphone timestamps should occur\n");
     printf("strictly after loopback timestamps. Direct open-air acoustic propagation is\n");
     printf("approximately 1 ft/ms; many full-duplex algorithms accommodate environmental\n");
     printf("delays of up to 100 ms.\n");
     printf("\n");
     printf("The captured audio is saved to WAV files for futher debugging.\n");
     return 0;
   }

   verbose = command_line.HasOption("verbose");

   printf("WARNING: Volume will be increased to 100%% temporarily. If the tool does not\n");
   printf("         shut down cleanly, the volume may not be restored. For most accurate\n");
   printf("         results, run in a quiet environment.\n");

   std::string duration_str;
   int64_t duration_seconds = 10;
   if (command_line.GetOptionValue("duration-seconds", &duration_str)) {
     CLI_CHECK(sscanf(duration_str.c_str(), "%li", &duration_seconds) == 1,
               "--duration_seconds must be an integer");
     CLI_CHECK(duration_seconds > 0, "--duration_seconds must be positive");
   }

   // Set the volume to 100%.
   fuchsia::media::AudioCorePtr audio_core = ctx->svc()->Connect<fuchsia::media::AudioCore>();
   fuchsia::media::audio::VolumeControlPtr volume_control;
   audio_core->BindUsageVolumeControl2(fuchsia::media::Usage2::WithRenderUsage(
                                           fidl::Clone(fuchsia::media::AudioRenderUsage2::MEDIA)),
                                       volume_control.NewRequest());

   std::optional<float> old_volume;
   volume_control.events().OnVolumeMuteChanged = [&old_volume](float v, bool muted) {
     if (!old_volume) {
       printf("Saving old volume: %f\n", v);
       old_volume = v;
     }
   };
   while (!old_volume) {
     loop->RunUntilIdle();
   }
   volume_control->SetVolume(1.0);

   // Restore volume on exit.
   auto restore_volume =
       fit::defer([old_volume, &volume_control]() { volume_control->SetVolume(*old_volume); });

   // Create an impulse signal prefixed by a silent ring in.
   auto packet = GenerateSilentAudio(kImpulseFormat, kImpulseRingInFrames);
   auto impulse = GenerateConstantAudio(kImpulseFormat, kImpulseFrames, kImpulseMagnitude);
   packet.Append(&impulse);

   // Play an impulse every second. Play the first sound at least 1s-RingIn in the future so
   // it's well-beyond the renderer's MinLeadTime and so we have plenty of time to setup the
   // capturers before the first sound is played.
   global_start_time_mono = zx::clock::get_monotonic();
   std::vector<zx::time> play_times;
   for (auto k = 1; k < duration_seconds; k++) {
     auto t = global_start_time_mono + zx::sec(k);
     PlaySound(audio_core, DupClock(), t, packet);
     play_times.push_back(t + kImpulseRingInDuration);
   }

   // Start the capturers.
   // We use a barrier to align the start time of the output wav files.
   auto barrier = std::make_unique<Barrier>(2);
   auto microphone = std::make_unique<Capture>(audio_core, false, "/tmp/microphone.wav", *barrier);
   auto loopback = std::make_unique<Capture>(audio_core, true, "/tmp/loopback.wav", *barrier);
   loop->Run(zx::clock::get_monotonic() + zx::sec(duration_seconds));

   microphone->Stop();
   loopback->Stop();
   loop->RunUntilIdle();

   // Check alignment.
   if (verbose) {
     printf("[verbose] Looking for sounds in the microphone capture\n");
   }
   auto microphone_times = microphone->FindSounds(play_times);
   if (verbose) {
     printf("[verbose] Looking for sounds in the loopback capture\n");
   }
   auto loopback_times = loopback->FindSounds(play_times);
   CheckAlignment(play_times, microphone_times, loopback_times);

   return 0;
 }
	// Copyright 2020 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.

	#include <fuchsia/media/cpp/fidl.h>
	#include <lib/async-loop/cpp/loop.h>
	#include <lib/async-loop/default.h>
	#include <lib/async/cpp/task.h>
	#include <lib/fit/defer.h>
	#include <lib/fzl/vmo-mapper.h>
	#include <lib/sys/cpp/component_context.h>
	#include <lib/syslog/cpp/macros.h>
	#include <lib/zx/clock.h>
	#include <lib/zx/vmo.h>

	#include "src/lib/fxl/command_line.h"
	#include "src/lib/fxl/strings/string_printf.h"
	#include "src/media/audio/lib/analysis/analysis.h"
	#include "src/media/audio/lib/analysis/generators.h"
	#include "src/media/audio/lib/clock/clone_mono.h"
	#include "src/media/audio/lib/clock/utils.h"
	#include "src/media/audio/lib/format/audio_buffer.h"
	#include "src/media/audio/lib/format/format.h"
	#include "src/media/audio/lib/logging/cli.h"
	#include "src/media/audio/lib/wav/wav_writer.h"

	using ASF = fuchsia::media::AudioSampleFormat;

	namespace {
	constexpr auto kChannelCount = 1;
	constexpr auto kFrameRate = 96000;
	constexpr auto kFramesPerCapturePacket = kFrameRate * 2 / 1000; // 2ms
	constexpr auto kImpulseRingInFrames = kFrameRate * 10 / 1000; // 10ms
	constexpr auto kImpulseRingInDuration = zx::msec(10);
	constexpr auto kImpulseFrames = 10;
	constexpr auto kImpulseMagnitude = 0.75;
	const auto kImpulseFormat =
	media::audio::Format::Create<ASF::FLOAT>(kChannelCount, kFrameRate).value();
	const auto kCaptureFormat =
	media::audio::Format::Create<ASF::FLOAT>(kChannelCount, kFrameRate).value();

	// Given perfect math and full-volume output, the impulse is a step function with
	// magnitude kImpulseMagnitude. Due to quantization and internal scaling, we may
	// see different values. Also, on some devices, the microphone picks up sounds at
	// a much lower volume than the output. Empirically, the following threshold works
	// well on an Astro device at full volume.
	constexpr float kNoiseFloor = 0.01f;

	bool verbose = false;
	zx::time global_start_time_mono;
	async::Loop* loop;

	void Shutdown() {
	async::PostTask(loop->dispatcher(), []() { loop->Quit(); });
	}

	zx::clock DupClock() {
	// Use the same clock for all renderers and capturers so everything is sync'd up.
	// Currently we're using the system monotonic clock.
	return media::audio::clock::CloneOfMonotonic();
	}

	double DurationToFrames(zx::duration d) {
	return static_cast<double>(d.to_nsecs()) * kFrameRate / 1e9;
	}

	std::string SprintDuration(zx::duration d) {
	return fxl::StringPrintf("%ld ns (%f frames)", d.to_nsecs(), DurationToFrames(d));
	}

	class Barrier {
	public:
	explicit Barrier(size_t size) : size_(size) {}

	void Wait(std::function<void()> ready_cb) {
	callbacks_.push_back(std::move(ready_cb));
	size_--;
	if (size_ == 0) {
	for (auto& cb : callbacks_) {
	cb();
	}
	}
	}

	private:
	size_t size_;
	std::vector<std::function<void()>> callbacks_;
	};

	class Capture {
	public:
	Capture(fuchsia::media::AudioCorePtr& audio, bool is_loopback, const std::string& filename,
	Barrier& barrier)
	: filename_(filename), format_(kCaptureFormat), barrier_(barrier), buffer_(format_, 0) {
	// Create the WAV file writer.
	CLI_CHECK(wav_writer_.Initialize(filename_.c_str(), format_.sample_format(),
	static_cast<uint16_t>(format_.channels()),
	format_.frames_per_second(),
	static_cast<uint16_t>(format_.bytes_per_sample() * 8)),
	"Could not create " << filename);

	// Create the capturer.
	audio->CreateAudioCapturer(is_loopback, capturer_.NewRequest());
	capturer_.set_error_handler([this](zx_status_t status) {
	printf("Capturer for %s failed with status %d.\n", filename_.c_str(), status);
	Shutdown();
	});
	capturer_->SetReferenceClock(DupClock());
	capturer_->SetPcmStreamType(format_.stream_type());
	SetupPayloadBuffer();
	capturer_->GetReferenceClock([this](zx::clock c) {
	clock_ = std::move(c);
	barrier_.Wait([this]() { Start(); });
	});
	}

	~Capture() {
	printf("Closing %s (%lu frames, %lu bytes)\n", filename_.c_str(), buffer_.NumFrames(),
	buffer_.NumBytes());
	CLI_CHECK(wav_writer_.Close(), "Could not close " << filename_);
	}

	void Stop() {
	capturer_.events().OnPacketProduced = nullptr;
	capturer_->StopAsyncCaptureNoReply();
	}

	// Given a list of times where we expect to see signals, return a list of times where
	// signals are actually detected, using -1 when a signal cannot be detected.
	std::vector<zx::time> FindSounds(std::vector<zx::time> expected_times_mono) {
	std::vector<zx::time> out;

	for (auto expected_time_mono : expected_times_mono) {
	int64_t expected_frame = frames_to_mono_time_.Inverse().Apply(expected_time_mono.get());

	// If everything goes perfectly, we should find the signal at exactly expected_time_mono
	// for the loopback capture and slightly later for the microphone capture. Signals are
	// separated by 1s. To account for signals that might be way off, search +/- 250ms around
	// the expected time.
	auto search_time_start = expected_time_mono - zx::msec(250);
	int64_t search_frame_start =
	std::max(0l, frames_to_mono_time_.Inverse().Apply(search_time_start.get()));
	int64_t search_frame_end =
	std::min(search_frame_start + format_.frames_per_ns().Scale(ZX_MSEC(500)),
	static_cast<int64_t>(buffer_.NumFrames()));

	auto slice = media::audio::AudioBufferSlice(&buffer_, search_frame_start, search_frame_end);
	auto max_frame = FindImpulseLeadingEdge(slice, kNoiseFloor);

	if (verbose) {
	printf("[verbose] searched through frames %lu to %lu\n", search_frame_start,
	search_frame_end);
	for (auto f = search_frame_start; f < search_frame_end; f++) {
	// Print if this value or one of the next 5 values exceeds the noise floor.
	bool print = false;
	for (auto k = f; k < f + 5 && k < search_frame_end; k++) {
	print = print \|\| (buffer_.SampleAt(k, 0) > kNoiseFloor);
	}
	if (print) {
	auto val = buffer_.SampleAt(f, 0);
	int64_t slice_index = f - search_frame_start;
	printf("[verbose] frame %lu, sample %f%s%s\n", f, val,
	(max_frame && slice_index == *max_frame) ? " (left edge)" : "",
	(slice_index == (expected_frame - search_frame_start)) ? " (expected)" : "");
	}
	}
	}

	if (!max_frame) {
	out.push_back(zx::time(-1));
	continue;
	}

	auto left_edge = *max_frame + search_frame_start;
	out.push_back(zx::time(frames_to_mono_time_.Apply(left_edge)));
	if (verbose) {
	printf("[verbose] *** signal estimated at frame %lu, expected signal at frame %lu\n",
	left_edge, expected_frame);
	}
	}

	return out;
	}

	private:
	void SetupPayloadBuffer() {
	const auto frames_per_payload = format_.frames_per_second(); // 1s
	const auto bytes_per_payload = frames_per_payload * format_.bytes_per_frame();

	zx::vmo vmo;
	auto status = vmo_mapper_.CreateAndMap(
	bytes_per_payload, ZX_VM_PERM_READ \| ZX_VM_PERM_WRITE, nullptr, &vmo,
	ZX_RIGHT_READ \| ZX_RIGHT_WRITE \| ZX_RIGHT_MAP \| ZX_RIGHT_TRANSFER);
	CLI_CHECK_OK(status, "Failed to create " << bytes_per_payload << "-byte payload buffer");
	memset(PayloadStart(), 0, bytes_per_payload);
	capturer_->AddPayloadBuffer(0, std::move(vmo));
	}

	void Start() {
	printf("Starting capture to %s\n", filename_.c_str());
	capturer_.events().OnPacketProduced = [this](fuchsia::media::StreamPacket pkt) {
	OnPacket(pkt);
	};
	capturer_->StartAsyncCapture(kFramesPerCapturePacket);
	}

	void OnPacket(fuchsia::media::StreamPacket pkt) {
	auto cleanup = fit::defer([this, pkt]() { capturer_->ReleasePacket(pkt); });

	if (!wrote_first_packet_) {
	// The first output frame should occur at global_start_time_mono.
	// Write enough silence to cover the time between then and this packet's PTS.
	auto packet_time_mono =
	media::audio::clock::MonotonicTimeFromReferenceTime(clock_, zx::time(pkt.pts)).value();
	auto duration = packet_time_mono - global_start_time_mono;
	FX_CHECK(duration.get() > 0) << duration.get();

	auto num_silent_frames = format_.frames_per_ns().Scale(duration.get());
	if (verbose) {
	printf("[verbose] Writing %ld silent frames to the start of %s\n", num_silent_frames,
	filename_.c_str());
	}

	std::vector<char> buffer(num_silent_frames * format_.bytes_per_frame());
	if (!wav_writer_.Write(reinterpret_cast<void*>(&buffer[0]),
	static_cast<uint32_t>(buffer.size()))) {
	printf("First write failed.\n");
	CLI_CHECK(wav_writer_.Close(), "File close failed as well.");
	Shutdown();
	}

	wrote_first_packet_ = true;
	frames_to_mono_time_ =
	media::TimelineFunction(packet_time_mono.get(), 0, format_.frames_per_ns().Inverse());
	} else {
	if (pkt.flags & fuchsia::media::STREAM_PACKET_FLAG_DISCONTINUITY) {
	printf("WARNING: found discontinuity within recording of %s\n", filename_.c_str());
	}
	}

	if (!pkt.payload_size) {
	return;
	}

	// Append this packet to the WAV file.
	auto first_byte = PayloadStart() + pkt.payload_offset;
	if (!wav_writer_.Write(reinterpret_cast<void*>(first_byte),
	static_cast<uint32_t>(pkt.payload_size))) {
	printf("File write failed. Trying to save any already-written data.\n");
	CLI_CHECK(wav_writer_.Close(), "File close failed as well.");
	Shutdown();
	}

	// Also save the full audio as an in-memory buffer.
	float* first_sample = reinterpret_cast<float*>(first_byte);
	buffer_.samples().insert(buffer_.samples().end(), first_sample,
	first_sample + pkt.payload_size / format_.bytes_per_sample());
	}

	uint8_t* PayloadStart() const { return reinterpret_cast<uint8_t*>(vmo_mapper_.start()); }

	const std::string filename_;
	const media::audio::TypedFormat<ASF::FLOAT> format_;
	Barrier& barrier_;
	fuchsia::media::AudioCapturerPtr capturer_;
	media::audio::WavWriter<> wav_writer_;
	fzl::VmoMapper vmo_mapper_;
	zx::clock clock_;

	bool wrote_first_packet_ = false;

	media::TimelineFunction frames_to_mono_time_;
	media::audio::AudioBuffer<ASF::FLOAT> buffer_;
	};

	void PlaySound(fuchsia::media::AudioCorePtr& audio, zx::clock reference_clock,
	zx::time reference_time, media::audio::AudioBuffer<ASF::FLOAT> sound) {
	// Create a renderer.
	// We wrap this in a shared_ptr so it can live until the sound is fully rendered.
	auto holder = std::make_shared<fuchsia::media::AudioRendererPtr>();
	auto& r = *holder;
	audio->CreateAudioRenderer(r.NewRequest());
	r.set_error_handler([](zx_status_t status) {
	printf("PlaySound renderer failed with status %d.\n", status);
	Shutdown();
	});
	r->SetReferenceClock(std::move(reference_clock));
	r->SetUsage2(fuchsia::media::AudioRenderUsage2::MEDIA);
	r->SetPcmStreamType(kImpulseFormat.stream_type());

	// Setup the payload.
	fzl::VmoMapper vmo_mapper;
	zx::vmo vmo;
	auto status =
	vmo_mapper.CreateAndMap(sound.NumBytes(), ZX_VM_PERM_READ \| ZX_VM_PERM_WRITE, nullptr, &vmo,
	ZX_RIGHT_READ \| ZX_RIGHT_MAP \| ZX_RIGHT_TRANSFER);
	CLI_CHECK_OK(status, "Failed to create " << sound.NumBytes() << "-byte payload buffer");
	memmove(reinterpret_cast<uint8_t*>(vmo_mapper.start()),
	reinterpret_cast<uint8_t*>(&sound.samples()[0]), sound.NumBytes());
	r->AddPayloadBuffer(0, std::move(vmo));
	auto pkt = fuchsia::media::StreamPacket{
	.pts = 0,
	.payload_buffer_id = 0,
	.payload_offset = 0,
	.payload_size = static_cast<size_t>(sound.NumBytes()),
	};

	// Play this sound and tear down the renderer once the sound has been played.
	r->SendPacket(pkt, [holder]() mutable {
	printf("Played sound\n");
	holder->Unbind();
	});
	r->Play(reference_time.get(), 0,
	[reference_time](int64_t play_ref_time, int64_t play_media_time) {
	if (play_ref_time != reference_time.get()) {
	printf("WARNING: Play() changed the reference time by %ld ns\n",
	play_ref_time - reference_time.get());
	}
	if (play_media_time != 0) {
	printf("WARNING: Play() changed the media time from 0 to %ld\n", play_media_time);
	}
	});
	}

	void CheckAlignment(std::vector<zx::time> play_times, std::vector<zx::time> microphone_times,
	std::vector<zx::time> loopback_times) {
	printf("============================================\n");
	printf("Alignment\n");
	printf("\n");
	printf("Ideally, the loopback should be perfectly aligned with the renderer and the\n");
	printf("microphone should occur slightly later due to propagation delay between the\n");
	printf("speaker and microphone (assuming 6\" separation, the delay should be 437us).\n");
	printf("\n");

	int tests_pass = 0;
	int tests_unknown = 0;

	for (size_t k = 0; k < play_times.size(); k++) {
	auto rt = play_times[k] - global_start_time_mono;
	auto mt = microphone_times[k] - global_start_time_mono;
	auto lt = loopback_times[k] - global_start_time_mono;

	printf("Sound %lu\n", k);
	printf(" render @ %ld ns\n", rt.to_nsecs());

	if (mt.get() > 0) {
	printf(" microphone @ %ld ns, render - microphone = %s\n", mt.to_nsecs(),
	SprintDuration(rt - mt).c_str());
	} else {
	printf(" not found in microphone\n");
	}

	if (lt.get() > 0) {
	printf(" loopback @ %ld ns, render - loopback = %s", lt.to_nsecs(),
	SprintDuration(rt - lt).c_str());
	if (mt.get() > 0) {
	printf(", microphone - loopback = %s", SprintDuration(mt - lt).c_str());
	}
	printf("\n");
	} else {
	printf(" not found in loopback\n");
	}

	if (mt.get() > 0 && lt.get() > 0) {
	bool pass = true;

	// Loopback timestamp must match the render timestamp.
	if (auto delta_frames = std::abs(DurationToFrames(rt - lt)); delta_frames > 1) {
	pass = false;
	printf(" failed: loopback not aligned with renderer\n");
	}
	// Microphone timestamp must be beyond the loopback timestamp by at most 100ms.
	if (mt.get() < lt.get()) {
	pass = false;
	printf(" failed: microphone timestamp before loopback timestamp\n");
	} else if (mt - lt > zx::msec(100)) {
	pass = false;
	printf(" failed: microphone timestamp more than 100ms after loopback timestamp\n");
	}

	if (pass) {
	printf(" passed\n");
	tests_pass++;
	}
	} else {
	tests_unknown++;
	}

	printf("\n");
	}

	printf("Results\n");
	printf(" %d passed\n", tests_pass);
	printf(" %lu failed\n", play_times.size() - (tests_pass + tests_unknown));
	printf(" %d could not locate timestamps\n", tests_unknown);
	printf("\n");
	}
	} // namespace

	int main(int argc, const char** argv) {
	loop = new async::Loop(&kAsyncLoopConfigAttachToCurrentThread);
	auto ctx = sys::ComponentContext::CreateAndServeOutgoingDirectory();
	auto command_line = fxl::CommandLineFromArgcArgv(argc, argv);

	if (command_line.HasOption("help")) {
	printf("Usage: audio-capture-timestamp-validator [--duration-seconds=10] [--verbose]\n");
	printf("\n");
	printf("This tool helps to debug capture timestamp issues. It does three things\n");
	printf("concurrently:\n");
	printf("\n");
	printf(" 1. Plays a short impulse once per second\n");
	printf(" 2. Captures the loopback interface\n");
	printf(" 3. Captures the microphone interface\n");
	printf("\n");
	printf("The tool then compares the timestamps at which the impulses are captured by\n");
	printf("the loopback and microphone interfaces. Microphone timestamps should occur\n");
	printf("strictly after loopback timestamps. Direct open-air acoustic propagation is\n");
	printf("approximately 1 ft/ms; many full-duplex algorithms accommodate environmental\n");
	printf("delays of up to 100 ms.\n");
	printf("\n");
	printf("The captured audio is saved to WAV files for futher debugging.\n");
	return 0;
	}

	verbose = command_line.HasOption("verbose");

	printf("WARNING: Volume will be increased to 100%% temporarily. If the tool does not\n");
	printf(" shut down cleanly, the volume may not be restored. For most accurate\n");
	printf(" results, run in a quiet environment.\n");

	std::string duration_str;
	int64_t duration_seconds = 10;
	if (command_line.GetOptionValue("duration-seconds", &duration_str)) {
	CLI_CHECK(sscanf(duration_str.c_str(), "%li", &duration_seconds) == 1,
	"--duration_seconds must be an integer");
	CLI_CHECK(duration_seconds > 0, "--duration_seconds must be positive");
	}

	// Set the volume to 100%.
	fuchsia::media::AudioCorePtr audio_core = ctx->svc()->Connect<fuchsia::media::AudioCore>();
	fuchsia::media::audio::VolumeControlPtr volume_control;
	audio_core->BindUsageVolumeControl2(fuchsia::media::Usage2::WithRenderUsage(
	fidl::Clone(fuchsia::media::AudioRenderUsage2::MEDIA)),
	volume_control.NewRequest());

	std::optional<float> old_volume;
	volume_control.events().OnVolumeMuteChanged = [&old_volume](float v, bool muted) {
	if (!old_volume) {
	printf("Saving old volume: %f\n", v);
	old_volume = v;
	}
	};
	while (!old_volume) {
	loop->RunUntilIdle();
	}
	volume_control->SetVolume(1.0);

	// Restore volume on exit.
	auto restore_volume =
	fit::defer([old_volume, &volume_control]() { volume_control->SetVolume(*old_volume); });

	// Create an impulse signal prefixed by a silent ring in.
	auto packet = GenerateSilentAudio(kImpulseFormat, kImpulseRingInFrames);
	auto impulse = GenerateConstantAudio(kImpulseFormat, kImpulseFrames, kImpulseMagnitude);
	packet.Append(&impulse);

	// Play an impulse every second. Play the first sound at least 1s-RingIn in the future so
	// it's well-beyond the renderer's MinLeadTime and so we have plenty of time to setup the
	// capturers before the first sound is played.
	global_start_time_mono = zx::clock::get_monotonic();
	std::vector<zx::time> play_times;
	for (auto k = 1; k < duration_seconds; k++) {
	auto t = global_start_time_mono + zx::sec(k);
	PlaySound(audio_core, DupClock(), t, packet);
	play_times.push_back(t + kImpulseRingInDuration);
	}

	// Start the capturers.
	// We use a barrier to align the start time of the output wav files.
	auto barrier = std::make_unique<Barrier>(2);
	auto microphone = std::make_unique<Capture>(audio_core, false, "/tmp/microphone.wav", *barrier);
	auto loopback = std::make_unique<Capture>(audio_core, true, "/tmp/loopback.wav", *barrier);
	loop->Run(zx::clock::get_monotonic() + zx::sec(duration_seconds));

	microphone->Stop();
	loopback->Stop();
	loop->RunUntilIdle();

	// Check alignment.
	if (verbose) {
	printf("[verbose] Looking for sounds in the microphone capture\n");
	}
	auto microphone_times = microphone->FindSounds(play_times);
	if (verbose) {
	printf("[verbose] Looking for sounds in the loopback capture\n");
	}
	auto loopback_times = loopback->FindSounds(play_times);
	CheckAlignment(play_times, microphone_times, loopback_times);

	return 0;
	}