blob: 950678bfd1b142f170f05976c591230dfbc477ca [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "garnet/bin/media/audio_core/mixer/test/audio_analysis.h"
#include <fbl/algorithm.h>
#include <iomanip>
#include <vector>
#include "lib/fxl/logging.h"
namespace media::audio::test {
//
// This library contains standalone functions that enable tests to analyze
// audio- or gain-related outputs.
//
// The GenerateCosine function populates audio buffers with sinusoidal values of
// the given frequency, magnitude and phase. The FFT function performs Fast
// Fourier Transforms on the provided real and imaginary arrays. The
// MeasureAudioFreq function analyzes the given audio buffer at the specified
// frequency, returning the magnitude of signal that resides at that frequency,
// as well as the combined magnitude of all other frequencies (useful for
// computing signal-to-noise and other metrics).
//
// These are the multiplicative factors by which float-based buffer comparisons
// can differ, and still qualify as "equal" (if 'float_tolerance' is specified).
// These are just enough to cover "off-by-one" imprecision when converting to
// 32-bit IEEE-754 float (which essentially has 23 bits of data precision).
constexpr double kFloatPrecisionUpperBound = 1.00000015;
constexpr double kFloatPrecisionLowerBound = 1.0 / kFloatPrecisionUpperBound;
//
// Numerically compare two buffers of integers. Emit values if mismatch found.
// For testability, fourth param represents whether we expect comp to fail.
//
// TODO(mpuryear): Consider using the googletest Array Matchers for this
template <typename T>
bool CompareBuffers(const T* actual, const T* expect, uint32_t buf_size,
bool expect_to_pass, bool float_tolerance) {
// uint8_t is interpreted as char. Cast into larger int for correct display.
constexpr bool is_uint8 = std::is_same<T, uint8_t>::value;
float_tolerance = float_tolerance && (std::is_same<T, float>::value);
for (uint32_t idx = 0; idx < buf_size; ++idx) {
if (actual[idx] != expect[idx]) {
if (expect_to_pass) {
if (float_tolerance) {
// If expect[idx] is negative, then upper bound is closer to zero
// (hence multiplied by the .99999... kFloatPrecisionLowerBound).
T low_val =
expect[idx] * (expect[idx] >= 0 ? kFloatPrecisionLowerBound
: kFloatPrecisionUpperBound);
T high_val =
expect[idx] * (expect[idx] >= 0 ? kFloatPrecisionUpperBound
: kFloatPrecisionLowerBound);
if (actual[idx] >= low_val && actual[idx] <= high_val) {
continue;
}
}
FXL_LOG(ERROR) << "[" << idx << "] was " << std::setprecision(10)
<< (is_uint8 ? static_cast<int32_t>(actual[idx])
: actual[idx])
<< ", should be "
<< (is_uint8 ? static_cast<int32_t>(expect[idx])
: expect[idx]);
}
return false;
}
}
if (!expect_to_pass) {
FXL_LOG(ERROR) << "We expected two buffers (length " << buf_size
<< ") to differ, but they did not!";
}
return true;
}
// Numerically compares a buffer of integers to a specific value. For
// testability, 'expect_to_pass' represents whether we expect the comp to fail.
template <typename T>
bool CompareBufferToVal(const T* buf, T val, uint32_t buf_size,
bool expect_to_pass, bool float_tolerance) {
// uint8_t is interpreted as char. Cast into larger int for correct display.
constexpr bool is_uint8 = std::is_same<T, uint8_t>::value;
float_tolerance = float_tolerance && (std::is_same<T, float>::value);
for (uint32_t idx = 0; idx < buf_size; ++idx) {
if (buf[idx] != val) {
if (expect_to_pass) {
if (float_tolerance) {
T low_val = val * kFloatPrecisionLowerBound;
T high_val = val * kFloatPrecisionUpperBound;
if (buf[idx] >= low_val && buf[idx] <= high_val) {
continue;
}
}
FXL_LOG(ERROR) << "[" << idx << "] was " << std::setprecision(10)
<< (is_uint8 ? static_cast<int32_t>(buf[idx]) : buf[idx])
<< ", should be "
<< (is_uint8 ? static_cast<int32_t>(val) : val);
}
return false;
}
}
if (!expect_to_pass) {
FXL_LOG(ERROR) << "We expected buffer (length " << buf_size
<< ") to differ from value "
<< (is_uint8 ? static_cast<int32_t>(val) : val)
<< ", but it was equal!";
}
return true;
}
// Display array of double values
void DisplayVals(const double* buf, uint32_t buf_size) {
printf("\n ********************************************************");
printf("\n **************************************************************");
printf("\n *** Displaying raw array data for length %5d ***",
buf_size);
printf("\n **************************************************************");
for (uint32_t idx = 0; idx < buf_size; ++idx) {
if (idx % 8 == 0) {
printf("\n [%d] ", idx);
}
printf("%.15lf ", buf[idx]);
}
printf("\n **************************************************************");
printf("\n ********************************************************");
printf("\n");
}
// Given a val with fractional content, prep it to be put in an int container.
//
// Used specifically when generating high-precision audio content for source
// buffers, these functions round double-precision floating point values into
// the appropriate container sizes (assumed to be integer, although float
// destination types are specialized).
// In the general case, values are rounded -- and unsigned 8-bit integers
// further biased by 0x80 -- so that the output data is exactly as it would be
// when arriving from an audio source (such as .wav file with int16 values, or
// audio input device operating in uint8 mode). Float and double specializations
// need not do anything, as double-to-float cast poses no real risk of
// distortion from truncation.
// Used only within this file by GenerateCosine, these functions do not check
// for overflow/clamp, leaving that responsibility on users of GenerateCosine.
template <typename T>
inline T Finalize(double value) {
return round(value);
}
template <>
inline uint8_t Finalize(double value) {
return round(value) + 0x80;
}
template <>
inline float Finalize(double value) {
return value;
}
template <>
inline double Finalize(double value) {
return value;
}
// Populate this buffer with cosine values. Frequency is set so that wave
// repeats itself 'freq' times within buffer length; 'magn' specifies peak
// value. Accumulates these values with preexisting array vals, if bool is set.
template <typename T>
void GenerateCosine(T* buffer, uint32_t buf_size, double freq, bool accumulate,
double magn, double phase) {
// If frequency is 0 (constant val), phase offset causes reduced amplitude
FXL_DCHECK(freq > 0.0 || (freq == 0.0 && phase == 0.0));
// Freqs above buf_size/2 (Nyquist limit) will alias into lower frequencies.
FXL_DCHECK(freq * 2.0 <= buf_size)
<< "Buffer too short--requested frequency will be aliased";
// freq is defined as: cosine recurs exactly 'freq' times within buf_size.
const double mult = 2.0 * M_PI / buf_size * freq;
if (accumulate) {
for (uint32_t idx = 0; idx < buf_size; ++idx) {
buffer[idx] += Finalize<T>(magn * std::cos(mult * idx + phase));
}
} else {
for (uint32_t idx = 0; idx < buf_size; ++idx) {
buffer[idx] = Finalize<T>(magn * std::cos(mult * idx + phase));
}
}
}
// Perform a Fast Fourier Transform on the provided data arrays.
//
// On input, real[] and imag[] contain 'buf_size' number of double-float values
// in the time domain (such as audio samples); buf_size must be a power-of-two.
//
// On output, real[] and imag[] contain 'buf_size' number of double-float values
// in frequency domain, but generally used only through buf_size/2 (per Nyquist)
//
// The classic FFT derivation (based on Cooley-Tukey), and what I implement
// here, achieves NlogN performance (instead of N^2) with divide-and-conquer,
// while additional optimizing by working in-place. To do this, it first breaks
// the data stream into single elements (so-called interlaced decomposition)
// that are in the appropriate order, and then combines these to form series of
// 2-element matrices, then combines these to form 4-element matrices, and so
// on, until combining the final matrices (each of which is half the size of the
// original). Two interesting details deserve further explanation:
//
// 1. Interlaced decomposition into the "appropriate order" mentioned above is
// achieved by sorting values by index, but in ascending order if viewing the
// index in bit-reversed manner! (This is exactly what is needed in order to
// combine the pairs of values in the appropriate cross-matrix sequence.) So for
// a stream of 16 values (4 bits of index), this re-sorted order is as follows -
// 0, 8, 4, 12, 2, 10, 6, ..., 7, 15 ... or, in binary:
// 0000, 1000, 0100, 1100, 0010, 11010, 0110, ..., 0111, 1111.
//
// 2. Combining each matrix (called synthesis) is accomplished in the following
// fashion, regardless of size: combining [ac] and [bd] to make [abcd] is done
// by spacing [ac] into [a0c0] and spacing [bd] into [0b0d] and then overlaying
// them. The frequency-domain equivalent of making [a0c0] from [ac] is simply
// to turn [AC] into [ACAC]. The equivalent of creating [0b0d] from [bd] is to
// multiply [BD] by a sinusoid (to delay it by one sample) while also
// duplicating [BD] into [BDBD]. This results in a 'butterfly' flow (based on
// the shape of two inputs, two outputs, and the four arrows between them).
// Specifically, in each pair of values that are combined:
// even_output = even_input + (sinusoid_factor x odd_input), and
// odd_output = even input - (sinusoid_factor x odd_input).
// (specifically, this sinusoid is the spectrum of a shifted delta function)
// This butterfly operation transforms two complex points into two other complex
// points, combining two 1-element signals into one 2-element signal (etc).
//
// Classic DSP texts by Oppenheim, Schaffer, Rabiner, or the Cooley-Tukey paper
// itself, are serviceable references for these concepts.
//
// TODO(mpuryear): Consider std::complex<double> instead of real/imag arrays.
void FFT(double* reals, double* imags, uint32_t buf_size) {
FXL_DCHECK(fbl::is_pow2(buf_size));
const uint32_t buf_sz_2 = buf_size >> 1;
uint32_t N = 0;
while (buf_size > (1 << N)) {
++N;
}
// First, perform a bit-reversal sort of indices. Again, this is done so
// that all subsequent matrix-merging work can be done on adjacent values.
// This sort implementation performs the minimal number of swaps/moves
// (considering buf_size could be 128K, 256K or more), but is admittedly more
// difficult to follow than some.
// When debugging, remember 1) each swap moves both vals to final locations,
// 2) each val is touched once or not at all, and 3) the final index ordering
// is **ascending if looking at indices in bit-reversed fashion**.
uint32_t swap_idx = buf_sz_2;
for (uint32_t idx = 1; idx < buf_size - 1; ++idx) {
if (idx < swap_idx) {
std::swap(reals[idx], reals[swap_idx]);
std::swap(imags[idx], imags[swap_idx]);
}
uint32_t alt_idx = buf_sz_2;
while (alt_idx <= swap_idx) {
swap_idx -= alt_idx;
alt_idx /= 2;
}
swap_idx += alt_idx;
}
// Loop through log2(buf_size) stages: one for each power of two, starting
// with 2, then 4, then 8, .... During each stage, combine pairs of shorter
// signals (of length 'sub_dft_sz_2') into single, longer signals (of length
// 'sub_dft_sz'). From previous sorting, signals to be combined are adjacent.
for (uint32_t fft_level = 1; fft_level <= N; ++fft_level) {
const uint32_t sub_dft_sz = 1 << fft_level; // length of combined signal
const uint32_t sub_dft_sz_2 = sub_dft_sz >> 1; // length of shorter signals
// 'Odd' values are multiplied by complex (real & imaginary) factors before
// being combined with 'even' values. These coefficients help the real and
// imaginary factors advance correctly, within each sub_dft.
const double real_coef = std::cos(M_PI / static_cast<double>(sub_dft_sz_2));
const double imag_coef =
-std::sin(M_PI / static_cast<double>(sub_dft_sz_2));
// For each point in this signal (for each complex pair in this 'sub_dft'),
double real_factor = 1.0, imag_factor = 0.0;
for (uint32_t btrfly_num = 1; btrfly_num <= sub_dft_sz_2; ++btrfly_num) {
double temp_real, temp_imag;
// ... perform the so-called butterfly operation on a pair of points.
for (uint32_t idx = btrfly_num - 1; idx < buf_size; idx += sub_dft_sz) {
const uint32_t idx2 = idx + sub_dft_sz_2;
temp_real = reals[idx2] * real_factor - imags[idx2] * imag_factor;
temp_imag = reals[idx2] * imag_factor + imags[idx2] * real_factor;
reals[idx2] = reals[idx] - temp_real;
imags[idx2] = imags[idx] - temp_imag;
reals[idx] += temp_real;
imags[idx] += temp_imag;
}
// Update the sinusoid coefficients, for the next points in this signal.
temp_real = real_factor;
real_factor = temp_real * real_coef - imag_factor * imag_coef;
imag_factor = temp_real * imag_coef + imag_factor * real_coef;
}
}
}
// For specified audio buffer & length, analyze the contents and return the
// magnitude (and phase) of signal at given frequency (i.e. frequency at which
// 'freq' periods fit perfectly within buffer length). Also return the magnitude
// of all other content. Useful for frequency response and signal-to-noise.
// Internally uses an FFT, so buf_size must be a power-of-two.
template <typename T>
void MeasureAudioFreq(T* audio, uint32_t buf_size, uint32_t freq,
double* magn_signal, double* magn_other) {
FXL_DCHECK(fbl::is_pow2(buf_size));
uint32_t buf_sz_2 = buf_size >> 1;
bool freq_out_of_range = (freq > buf_sz_2);
// Copy input to double buffer, before doing a high-res FFT (freq-analysis)
// Note that we set imags[] to zero: MeasureAudioFreq retrieves a REAL (not
// Complex) FFT for the data, the return real and imaginary frequency-domain
// data only spans 0...N/2 (inclusive).
std::vector<double> reals(buf_size);
std::vector<double> imags(buf_size);
for (uint32_t idx = 0; idx < buf_size; ++idx) {
reals[idx] = audio[idx];
imags[idx] = 0.0;
// In case of uint8 input data, bias from a zero of 0x80 to 0.0
if (std::is_same<T, uint8_t>::value) {
reals[idx] -= 128.0;
}
}
FFT(reals.data(), imags.data(), buf_size);
// Convert real FFT results from frequency domain into sinusoid amplitudes
//
// We only feed REAL (not complex) data to the FFT, so return values in
// reals[] and imags[] only have meaning through buf_sz_2. Thus, for the
// frequency bins [1 thru buf_sz_2 - 1], we could either add in the identical
// "negative" (beyond buf_size/2) frequency vals, or multiply by two (with
// upcoming div-by-buf_size, this becomes div-by-buf_sz_2 for those elements).
for (uint32_t bin = 1; bin < buf_sz_2; ++bin) {
reals[bin] /= buf_sz_2;
imags[bin] /= buf_sz_2;
}
// Frequencies 0 & buf_sz_2 are 'half-width' bins, so these bins get reduced
reals[0] /= buf_size; // by half during the normalization process.
imags[0] /= buf_size; // Specifically compared to the other indices,
reals[buf_sz_2] /= buf_size; // we divide the real and imag values by
imags[buf_sz_2] /= buf_size; // buf_size instead of buf_sz_2.
// Calculate magnitude of primary signal (even if out-of-range aliased back!)
if (freq_out_of_range) {
freq = buf_size - freq;
}
*magn_signal =
std::sqrt(reals[freq] * reals[freq] + imags[freq] * imags[freq]);
// Calculate magnitude of all other frequencies
if (magn_other) {
double sum_sq_magn_other = 0.0;
for (uint32_t bin = 0; bin <= buf_sz_2; ++bin) {
if (bin != freq || freq_out_of_range) {
sum_sq_magn_other +=
(reals[bin] * reals[bin] + imags[bin] * imags[bin]);
}
}
*magn_other = std::sqrt(sum_sq_magn_other);
}
}
template bool CompareBuffers<uint8_t>(const uint8_t*, const uint8_t*, uint32_t,
bool, bool);
template bool CompareBuffers<int16_t>(const int16_t*, const int16_t*, uint32_t,
bool, bool);
template bool CompareBuffers<int32_t>(const int32_t*, const int32_t*, uint32_t,
bool, bool);
template bool CompareBuffers<float>(const float*, const float*, uint32_t, bool,
bool);
template bool CompareBuffers<double>(const double*, const double*, uint32_t,
bool, bool);
template bool CompareBufferToVal<uint8_t>(const uint8_t*, uint8_t, uint32_t,
bool, bool);
template bool CompareBufferToVal<int16_t>(const int16_t*, int16_t, uint32_t,
bool, bool);
template bool CompareBufferToVal<int32_t>(const int32_t*, int32_t, uint32_t,
bool, bool);
template bool CompareBufferToVal<float>(const float*, float, uint32_t, bool,
bool);
template void GenerateCosine<uint8_t>(uint8_t*, uint32_t, double, bool, double,
double);
template void GenerateCosine<int16_t>(int16_t*, uint32_t, double, bool, double,
double);
template void GenerateCosine<int32_t>(int32_t*, uint32_t, double, bool, double,
double);
template void GenerateCosine<float>(float*, uint32_t, double, bool, double,
double);
template void GenerateCosine<double>(double*, uint32_t, double, bool, double,
double);
template void MeasureAudioFreq<uint8_t>(uint8_t* audio, uint32_t buf_size,
uint32_t freq, double* magn_signal,
double* magn_other = nullptr);
template void MeasureAudioFreq<int16_t>(int16_t* audio, uint32_t buf_size,
uint32_t freq, double* magn_signal,
double* magn_other = nullptr);
template void MeasureAudioFreq<int32_t>(int32_t* audio, uint32_t buf_size,
uint32_t freq, double* magn_signal,
double* magn_other = nullptr);
template void MeasureAudioFreq<float>(float* audio, uint32_t buf_size,
uint32_t freq, double* magn_signal,
double* magn_other = nullptr);
} // namespace media::audio::test