blob: 2c2e91f06bc41030cc5cf0b619ff6e3cdc777dfe [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <lib/trace/event.h>
#include <zircon/types.h>
#include <limits>
#include <unordered_map>
#include <unordered_set>
#include "src/media/audio/lib/format/audio_buffer.h"
namespace media::audio {
namespace internal {
// Perform a Fast Fourier Transform on the provided data arrays.
// On input, real[] and imag[] contain 'buf_size' number of double-float values
// in the time domain (such as audio samples); buf_size must be a power-of-two.
// On output, real[] and imag[] contain 'buf_size' number of double-float values
// in frequency domain, but generally used only through buf_size/2 (per Nyquist)
void FFT(double* real, double* imag, uint32_t buf_size);
// Calculate phase in radians for the complex pair. Correctly handles negative
// or zero values: range of return value is [-PI,PI], not just [-PI/2,PI/2].
double GetPhase(double real, double imag);
// Convert provided real-imag (cartesian) data into magn-phase (polar) format.
// This is done with 2 in-buffers 2 two out-buffers -- NOT 2 in-out-buffers.
// TODO(mpuryear): will clients (tests) want this transformed in-place?
void RectangularToPolar(const double* real, const double* imag, uint32_t buf_size, double* magn,
double* phase = nullptr);
void RealDFT(const double* reals, uint32_t len, double* r_freq, double* i_freq);
void InverseDFT(double* real, double* imag, uint32_t buf_size, double* real_out);
void InverseFFT(double* real, double* imag, uint32_t buf_size);
} // namespace internal
struct AudioFreqResult {
void Display(std::string tag = "", double magn_display_threshold = 0.0);
// Raw list of square magnitudes for all bins up to size/2.
std::vector<double> all_square_magnitudes;
// Mapping from frequency -> magnitude, for each requested frequency.
std::unordered_map<int32_t, double> magnitudes;
// Phase in radians, for each requested frequency.
std::unordered_map<int32_t, double> phases;
// Total magnitude over all requested frequencies.
// Magnitude is the root-sum-of-squares of the magnitude at all requested frequencies.
double total_magn_signal;
// Total magnitude over all other frequencies.
// Magnitude is the root-sum-of-squares of the magnitude at all other frequencies.
double total_magn_other;
// For the given audio buffer, analyze contents and return the magnitude (and phase) at the given
// frequency. Also return magnitude of all other content. Useful for frequency response and
// signal-to-noise. Internally uses an FFT, so slice.NumFrames() must be a power-of-two. The format
// must have channels() == 1.
// |freq| is the number of **complete sinusoidal periods** that should perfectly fit into the
// buffer.
template <fuchsia::media::AudioSampleFormat SampleFormat>
AudioFreqResult MeasureAudioFreqs(AudioBufferSlice<SampleFormat> slice,
std::unordered_set<int32_t> freqs);
// Shorthand that analyzes a single frequency.
template <fuchsia::media::AudioSampleFormat SampleFormat>
AudioFreqResult MeasureAudioFreq(AudioBufferSlice<SampleFormat> slice, int32_t freq) {
auto result = MeasureAudioFreqs(slice, {freq});
FX_DCHECK(result.total_magn_signal == result.magnitudes[freq]);
return result;
// Compute the root-mean-square (RMS) energy of a slice. This is a measure of loudness.
template <fuchsia::media::AudioSampleFormat SampleFormat>
double MeasureAudioRMS(AudioBufferSlice<SampleFormat> slice) {
FX_CHECK(slice.NumFrames() > 0);
double sum = 0;
for (int64_t frame = 0; frame < slice.NumFrames(); frame++) {
for (int32_t chan = 0; chan < slice.format().channels(); chan++) {
double s = SampleFormatTraits<SampleFormat>::ToFloat(slice.SampleAt(frame, chan));
sum += s * s;
return sqrt(sum / static_cast<double>(slice.NumSamples()));
// Locate the left edge of the first impulse in the given slice, ignoring samples quieter
// than the given noise floor. Returns the frame index if found, and std::nullopt otherwise.
// The given slice must have a single channel. We assume the impulse has a positive signal.
template <fuchsia::media::AudioSampleFormat SampleFormat>
std::optional<int64_t> FindImpulseLeadingEdge(
AudioBufferSlice<SampleFormat> slice,
typename SampleFormatTraits<SampleFormat>::SampleT noise_floor) {
FX_CHECK(slice.format().channels() == 1);
auto normalize = [](typename SampleFormatTraits<SampleFormat>::SampleT val) {
float d = static_cast<float>(val);
if constexpr (SampleFormat == fuchsia::media::AudioSampleFormat::UNSIGNED_8) {
d -= 128;
return d;
// If our impulse was a single frame, we could simply find the maximum value.
// To support wider impulses, we need to find the left edge of the impulse. We
// do this by finding the first value such that there does not exist a value
// more than 50% larger.
float max_value = 0;
for (int64_t f = 0; f < slice.NumFrames(); f++) {
max_value = std::max(max_value, normalize(slice.SampleAt(f, 0)));
for (int64_t f = 0; f < slice.NumFrames(); f++) {
float val = normalize(slice.SampleAt(f, 0));
if (val <= static_cast<float>(noise_floor)) {
if (1.5 * val > max_value) {
return f;
return std::nullopt;
struct Impulse {
int64_t leading_edge;
int64_t max;
int64_t center;
int64_t trailing_edge;
// Locate the center of the impulse in the given slice, ignoring samples quieter than the given
// noise floor. Returns the frame index if found, and std::nullopt otherwise.
// This function requires a one-channel slice, and it assumes there is exactly one impulse.
// Unlike FindImpulseLeadingEdge, this func ignores sign: center/edges can be positive or negative.
template <fuchsia::media::AudioSampleFormat SampleFormat>
std::optional<Impulse> FindImpulse(AudioBufferSlice<SampleFormat> slice,
typename SampleFormatTraits<SampleFormat>::SampleT noise_floor) {
constexpr bool kDisplayEdgesAndCenter = false;
FX_CHECK(slice.format().channels() == 1);
auto normalize = [](typename SampleFormatTraits<SampleFormat>::SampleT val) {
float norm = static_cast<float>(val);
if constexpr (SampleFormat == fuchsia::media::AudioSampleFormat::UNSIGNED_8) {
norm -= 0x80;
return norm;
// If our impulse was a single frame, we could simply find the maximum absolute value.
// To support wider impulses, we need to find the left and right edges of the impulse.
// We do this by finding the first and last values such that there does not exist a
// value more than 50% larger.
float max_value = 0.0f;
int64_t max_idx;
for (int64_t idx = 0; idx < slice.NumFrames(); idx++) {
float val = std::abs(normalize(slice.SampleAt(idx, 0)));
if (val <= static_cast<float>(noise_floor)) {
if (val > max_value) {
// Store the index and value (ignoring sign) of the largest value in this slice.
max_value = val;
max_idx = idx;
if (max_value == 0.0f) {
return std::nullopt;
int64_t leading_idx = 0;
float leading_val = 0.0f;
for (int64_t idx = 0; idx < slice.NumFrames(); ++idx) {
float val = std::abs(normalize(slice.SampleAt(idx, 0)));
if (1.5 * val > max_value) {
leading_idx = idx;
leading_val = val;
int64_t trailing_idx = slice.NumFrames() - 1;
float trailing_val = 0.0f;
for (int64_t idx = slice.NumFrames() - 1; idx >= 0; --idx) {
float val = std::abs(normalize(slice.SampleAt(idx, 0)));
if (1.5 * val > max_value) {
trailing_idx = idx;
trailing_val = val;
int64_t sum_idx = leading_idx + trailing_idx;
int64_t center_idx = sum_idx / 2;
// Round center_idx toward the greater of (leading_val, trailing_val), ignoring signs.
if ((sum_idx & 0x01) && trailing_val > leading_val) {
if constexpr (kDisplayEdgesAndCenter) {
std::stringstream edge_values;
edge_values << " [" << std::setw(5) << slice.start_frame() + leading_idx << "]"
<< std::setw(10) << leading_val << " | [" << std::setw(5)
<< slice.start_frame() + center_idx << "]" << std::setw(10)
<< normalize(slice.SampleAt(center_idx, 0));
if ((sum_idx & 0x01) && leading_val < trailing_val) {
edge_values << " | [" << std::setw(5) << slice.start_frame() + center_idx << "]"
<< std::setw(10) << normalize(slice.SampleAt(center_idx, 0));
FX_LOGS(INFO) << edge_values.str() << " | [" << std::setw(5)
<< slice.start_frame() + trailing_idx << "]" << std::setw(10) << trailing_val;
return {{
.leading_edge = leading_idx,
.max = max_idx,
.center = center_idx,
.trailing_edge = trailing_idx,
// Multiply the input buffer by a Tukey window, producing a new output buffer. A Tukey window
// contains a ramp up from zero, followed by a flat top of 1.0, followed by a ramp down to zero.
// The total width of the up and down ramps is described by the alpha parameter, which must be <= 1.
template <fuchsia::media::AudioSampleFormat SampleFormat>
AudioBuffer<SampleFormat> MultiplyByTukeyWindow(AudioBufferSlice<SampleFormat> slice, double alpha);
} // namespace media::audio