| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. |
| |
| #include "src/media/audio/lib/analysis/analysis.h" |
| |
| #include <lib/stdcompat/bit.h> |
| #include <lib/syslog/cpp/macros.h> |
| |
| #include <iomanip> |
| #include <unordered_set> |
| #include <vector> |
| |
| #include "src/media/audio/lib/format/traits.h" |
| |
| namespace media::audio { |
| |
| namespace internal { |
| |
| template <typename T> |
| double SampleToDouble(T value) { |
| return static_cast<double>(value); |
| } |
| |
| template <> |
| double SampleToDouble<uint8_t>(uint8_t value) { |
| // In case of uint8 input data, bias from a zero of 0x80 to 0.0 |
| return static_cast<double>(value) - 128.0; |
| } |
| |
| // Perform a Fast Fourier Transform on the provided data arrays. |
| // |
| // On input, real[] and imag[] contain 'buf_size' number of double-float values in the time domain |
| // (such as audio samples); buf_size must be a power-of-two. |
| // |
| // On output, real[] and imag[] contain 'buf_size' number of double-float values in frequency |
| // domain, but generally used only through buf_size/2 (per Nyquist). |
| // |
| // The classic FFT derivation (based on Cooley-Tukey), and what I implement here, achieves NlogN |
| // performance (instead of N^2) with divide-and-conquer, while additional optimizing by working |
| // in-place. To do this, it first breaks the data stream into single elements (so-called interlaced |
| // decomposition) that are in the appropriate order, and then combines these to form series of |
| // 2-element matrices, then combines these to form 4-element matrices, and so on, until combining |
| // the final matrices (each of which is half the size of the original). Two interesting details |
| // deserve further explanation: |
| // |
| // 1. Interlaced decomposition into the "appropriate order" mentioned above is achieved by sorting |
| // values by index, but in ascending order if viewing the index in bit-reversed manner! (This is |
| // exactly what is needed in order to combine the pairs of values in the appropriate cross-matrix |
| // sequence.) So for a stream of 16 values (4 bits of index), this re-sorted order is as follows - |
| // 0, 8, 4, 12, 2, 10, 6, ..., 7, 15 ... or, in binary: |
| // 0000, 1000, 0100, 1100, 0010, 11010, 0110, ..., 0111, 1111. |
| // |
| // 2. Combining each matrix (called synthesis) is accomplished in the following fashion, regardless |
| // of size: combining [ac] and [bd] to make [abcd] is done by spacing [ac] into [a0c0] and spacing |
| // [bd] into [0b0d] and then overlaying them. The frequency-domain equivalent of making [a0c0] from |
| // [ac] is simply to turn [AC] into [ACAC]. The equivalent of creating [0b0d] from [bd] is to |
| // multiply [BD] by a sinusoid (to delay it by one sample) while also duplicating [BD] into [BDBD]. |
| // This results in a 'butterfly' flow (based on the shape of two inputs, two outputs, and the four |
| // arrows between them). |
| // Specifically, in each pair of values that are combined: |
| // even_output = even_input + (sinusoid_factor x odd_input), and |
| // odd_output = even input - (sinusoid_factor x odd_input). |
| // (specifically, this sinusoid is the spectrum of a shifted delta function) |
| // This butterfly operation transforms two complex points into two other complex points, combining |
| // two 1-element signals into one 2-element signal (etc). |
| // |
| // Classic DSP texts by Oppenheim, Schaffer, Rabiner, or the Cooley-Tukey paper itself, are |
| // serviceable references for these concepts. |
| // |
| // TODO(mpuryear): Consider std::complex<double> instead of real/imag arrays. |
| void FFT(double* reals, double* imags, uint32_t buf_size) { |
| FX_DCHECK(cpp20::has_single_bit(buf_size)); |
| const uint32_t buf_sz_2 = buf_size >> 1; |
| |
| uint32_t N = 0; |
| while (buf_size > (1 << N)) { |
| ++N; |
| } |
| |
| // First, perform a bit-reversal sort of indices. Again, this is done so that all subsequent |
| // matrix-merging work can be done on adjacent values. This sort implementation performs the |
| // minimal number of swaps/moves (considering buf_size could be 128K, 256K or more), but is |
| // admittedly more difficult to follow than some. |
| // When debugging, remember 1) each swap moves both vals to final locations, 2) each val is |
| // touched once or not at all, and 3) the final index ordering is **ascending if looking at |
| // indices in bit-reversed fashion**. |
| uint32_t swap_idx = buf_sz_2; |
| for (uint32_t idx = 1; idx < buf_size - 1; ++idx) { |
| if (idx < swap_idx) { |
| std::swap(reals[idx], reals[swap_idx]); |
| std::swap(imags[idx], imags[swap_idx]); |
| } |
| uint32_t alt_idx = buf_sz_2; |
| while (alt_idx <= swap_idx) { |
| swap_idx -= alt_idx; |
| alt_idx /= 2; |
| } |
| swap_idx += alt_idx; |
| } |
| |
| // Loop through log2(buf_size) stages: one for each power of two, starting with 2, then 4, then 8, |
| // .... During each stage, combine pairs of shorter signals (of length 'sub_dft_sz_2') into |
| // single, longer signals (of length 'sub_dft_sz'). From previous sorting, signals to be combined |
| // are adjacent. |
| for (uint32_t fft_level = 1; fft_level <= N; ++fft_level) { |
| const uint32_t sub_dft_sz = 1 << fft_level; // length of combined signal |
| const uint32_t sub_dft_sz_2 = sub_dft_sz >> 1; // length of shorter signals |
| // 'Odd' values are multiplied by complex (real & imaginary) factors before being combined with |
| // 'even' values. These coefficients help the real and imaginary factors advance correctly, |
| // within each sub_dft. |
| const double real_coef = std::cos(M_PI / static_cast<double>(sub_dft_sz_2)); |
| const double imag_coef = -std::sin(M_PI / static_cast<double>(sub_dft_sz_2)); |
| |
| // For each point in this signal (for each complex pair in this 'sub_dft'), |
| double real_factor = 1.0, imag_factor = 0.0; |
| for (uint32_t btrfly_num = 1; btrfly_num <= sub_dft_sz_2; ++btrfly_num) { |
| double temp_real, temp_imag; |
| |
| // ... perform the so-called butterfly operation on a pair of points. |
| for (uint32_t idx = btrfly_num - 1; idx < buf_size; idx += sub_dft_sz) { |
| const uint32_t idx2 = idx + sub_dft_sz_2; |
| |
| temp_real = reals[idx2] * real_factor - imags[idx2] * imag_factor; |
| temp_imag = reals[idx2] * imag_factor + imags[idx2] * real_factor; |
| reals[idx2] = reals[idx] - temp_real; |
| imags[idx2] = imags[idx] - temp_imag; |
| reals[idx] += temp_real; |
| imags[idx] += temp_imag; |
| } |
| // Update the sinusoid coefficients, for the next points in this signal. |
| temp_real = real_factor; |
| real_factor = temp_real * real_coef - imag_factor * imag_coef; |
| imag_factor = temp_real * imag_coef + imag_factor * real_coef; |
| } |
| } |
| } |
| |
| // Calculate phase for a given complex number, spanning [-PI, PI]. |
| double GetPhase(double real, double imag) { |
| if (real == 0.0f) { |
| real = 1e-20; |
| } |
| if (imag < 1e-19 && imag > -1e-19) { |
| imag = 0.0; |
| } |
| double phase = std::atan(imag / real); |
| |
| if (real < 0.0) { |
| if (imag < 0.0) { |
| phase -= M_PI; |
| } else { |
| phase += M_PI; |
| } |
| } |
| return phase; |
| } |
| |
| // Convert 2 incoming arrs (reals & imags == x & y) into magnitude and phase arrs. Magnitude is |
| // absolute value, phase is in radians with range (-PI, PI]. |
| void RectangularToPolar(const double* reals, const double* imags, uint32_t buf_size, double* magn, |
| double* phase) { |
| for (uint32_t freq = 0; freq < buf_size; ++freq) { |
| double sum_sq = (reals[freq] * reals[freq]) + (imags[freq] * imags[freq]); |
| magn[freq] = std::sqrt(sum_sq); |
| } |
| if (phase) { |
| for (uint32_t freq = 0; freq < buf_size; ++freq) { |
| phase[freq] = GetPhase(reals[freq], imags[freq]); |
| } |
| } |
| } |
| |
| // Perform the Discrete Fourier Transform, converting time-domain reals[] (len buf_size) into |
| // freq-domain real_freq[] & imag_freq[], both of length (buf_size/2 + 1). |
| // This is a naive, unoptimized (N^2)/2 implementation. |
| void RealDFT(const double* reals, uint32_t buf_size, double* real_freq, double* imag_freq) { |
| FX_DCHECK((buf_size & 1u) == 0) << "DFT buffer size must be even"; |
| |
| const double multiplier = M_PI * 2.0 / static_cast<double>(buf_size); |
| const uint32_t buf_sz_2 = buf_size >> 1; |
| |
| for (uint32_t freq = 0; freq <= buf_sz_2; ++freq) { |
| const double freq_mult = multiplier * static_cast<double>(freq); |
| double real = 0.0; |
| double imag = 0.0; |
| for (uint32_t idx = 0; idx < buf_size; ++idx) { |
| const double idx_mult = freq_mult * static_cast<double>(idx); |
| |
| real += (std::cos(idx_mult) * reals[idx]); |
| imag -= (std::sin(idx_mult) * reals[idx]); |
| } |
| real_freq[freq] = real; |
| imag_freq[freq] = imag; |
| } |
| } |
| |
| // Converts frequency-domain arrays real_freq & imag_freq (len buf_size/2 + 1) into time-domain |
| // array reals (len buf_size). This is a naive, unoptimized (N^2)/2 implementation. |
| void InverseDFT(double* real_freq, double* imag_freq, uint32_t buf_size, double* reals) { |
| uint32_t buf_sz_2 = buf_size >> 1; |
| |
| for (uint32_t idx = 0; idx <= buf_sz_2; ++idx) { |
| real_freq[idx] = real_freq[idx] / static_cast<double>(buf_sz_2); |
| imag_freq[idx] = -imag_freq[idx] / static_cast<double>(buf_sz_2); |
| } |
| real_freq[0] /= 2.0; |
| real_freq[buf_sz_2] /= 2.0; |
| |
| double mult = M_PI * 2.0 / static_cast<double>(buf_size); |
| for (uint32_t idx = 0; idx < buf_size; ++idx) { |
| double idx_mult = mult * idx; |
| double val = 0.0; |
| for (uint32_t freq = 0; freq <= buf_sz_2; ++freq) { |
| double freq_mult = idx_mult * freq; |
| val += (real_freq[freq] * std::cos(freq_mult)); |
| val += (imag_freq[freq] * std::sin(freq_mult)); |
| } |
| reals[idx] = val; |
| } |
| } |
| |
| // Converts frequency-domain arrays reals & imags (len buf_size) in-place into time-domain arrays |
| // (also len buf_size) |
| void InverseFFT(double* reals, double* imags, uint32_t buf_size) { |
| FX_DCHECK(cpp20::has_single_bit(buf_size)); |
| |
| for (uint32_t idx = 0; idx < buf_size; ++idx) { |
| imags[idx] = -imags[idx]; |
| } |
| |
| FFT(reals, imags, buf_size); |
| |
| for (uint32_t idx = 0; idx < buf_size; ++idx) { |
| reals[idx] = reals[idx] / static_cast<double>(buf_size); |
| imags[idx] = -imags[idx] / static_cast<double>(buf_size); |
| } |
| } |
| |
| } // namespace internal |
| |
| void AudioFreqResult::Display(std::string tag, double magn_display_threshold) { |
| printf("\n%s", tag.c_str()); |
| |
| printf("\n total_magn_signal: %.9f", total_magn_signal); |
| printf("\n total_magn_other: %.9f", total_magn_other); |
| |
| printf("\n magnitudes: "); |
| for (const auto& [key, value] : magnitudes) { |
| printf(" [%5d] %.9f,", key, value); |
| } |
| printf("\n phases: "); |
| for (const auto& [key, value] : phases) { |
| printf(" [%5d] %.9f,", key, value); |
| } |
| |
| printf("\n all_magnitudes >= %.9f (0 - %lu):", magn_display_threshold, |
| all_square_magnitudes.size() - 1); |
| for (auto idx = 0u, displayed = 0u; idx < all_square_magnitudes.size(); ++idx) { |
| auto magn = std::sqrt(all_square_magnitudes[idx]); |
| if (magn < magn_display_threshold) { |
| continue; |
| } |
| if (displayed % 8 == 0) { |
| printf("\n "); |
| } |
| printf(" [%5d] %.9f,", idx, magn); |
| ++displayed; |
| } |
| printf("\n"); |
| } |
| |
| // For specified audio buffer & length, analyze the contents and return the magnitude (and phase) of |
| // signal at given frequency (i.e. frequency at which 'freq' periods fit perfectly within buffer |
| // length). Also return the magnitude of all other content. Useful for frequency response and |
| // signal-to-noise. Internally uses an FFT, so slice.NumFrames() must be a power-of-two. |
| template <fuchsia::media::AudioSampleFormat SampleFormat> |
| AudioFreqResult MeasureAudioFreqs(AudioBufferSlice<SampleFormat> slice, |
| std::unordered_set<int32_t> freqs) { |
| FX_CHECK(cpp20::has_single_bit(static_cast<uint64_t>(slice.NumFrames()))); |
| FX_CHECK(slice.format().channels() == 1); |
| |
| const int64_t buf_size = slice.NumFrames(); |
| const int64_t buf_sz_2 = buf_size >> 1; |
| |
| // Copy input to double buffer, before doing a high-res FFT (freq-analysis). Note that we set |
| // imags[] to zero: MeasureAudioFreq retrieves a REAL (not Complex) FFT for the data, the return |
| // real and imaginary frequency-domain data only spans 0...N/2 (inclusive). |
| std::vector<double> reals(buf_size); |
| std::vector<double> imags(buf_size); |
| for (int64_t frame = 0; frame < buf_size; ++frame) { |
| reals[frame] = internal::SampleToDouble(slice.SampleAt(frame, 0)); |
| imags[frame] = 0.0; |
| } |
| internal::FFT(reals.data(), imags.data(), static_cast<uint32_t>(buf_size)); |
| |
| // Convert real FFT results from frequency domain into sinusoid amplitudes |
| // |
| // We only feed REAL (not complex) data to the FFT, so return values in reals[] and imags[] only |
| // have meaning through buf_sz_2. Thus, for the frequency bins [1 thru buf_sz_2 - 1], we could |
| // either add in the identical "negative" (beyond buf_size/2) frequency vals, or multiply by two |
| // (with upcoming div-by-buf_size, this becomes div-by-buf_sz_2 for those elements). |
| for (int64_t bin = 1; bin < buf_sz_2; ++bin) { |
| reals[bin] /= static_cast<double>(buf_sz_2); |
| imags[bin] /= static_cast<double>(buf_sz_2); |
| } |
| // Frequencies 0 & buf_sz_2 are 'half-width' bins, so these bins get reduced by half during |
| reals[0] /= static_cast<double>(buf_size); // the normalization step. Specifically, |
| imags[0] /= static_cast<double>(buf_size); // compared to the other indices, we |
| reals[buf_sz_2] /= static_cast<double>(buf_size); // divide the real and imag values |
| imags[buf_sz_2] /= static_cast<double>(buf_size); // by buf_size instead of buf_sz_2. |
| |
| AudioFreqResult out; |
| |
| for (int64_t bin = 0; bin <= buf_sz_2; ++bin) { |
| out.all_square_magnitudes.push_back(reals[bin] * reals[bin] + imags[bin] * imags[bin]); |
| } |
| |
| // Calculate magnitude and phase of primary signal. |
| double sum_sq_magn_signal = 0.0; |
| for (auto freq : freqs) { |
| FX_CHECK(freq <= buf_sz_2) << "Frequency " << freq << " cannot be measured in a buffer of size " |
| << buf_sz_2; |
| auto mag2 = out.all_square_magnitudes[freq]; |
| sum_sq_magn_signal += mag2; |
| out.magnitudes[freq] = std::sqrt(mag2); |
| out.phases[freq] = internal::GetPhase(reals[freq], imags[freq]); |
| } |
| out.total_magn_signal = std::sqrt(sum_sq_magn_signal); |
| |
| // Calculate magnitude of all other frequencies |
| double sum_sq_magn_other = 0.0; |
| for (int32_t bin = 0; bin <= buf_sz_2; ++bin) { |
| if (freqs.count(bin) == 0) { |
| sum_sq_magn_other += out.all_square_magnitudes[bin]; |
| } |
| } |
| out.total_magn_other = std::sqrt(sum_sq_magn_other); |
| |
| return out; |
| } |
| |
| template <fuchsia::media::AudioSampleFormat SampleFormat> |
| AudioBuffer<SampleFormat> MultiplyByTukeyWindow(AudioBufferSlice<SampleFormat> slice, |
| double alpha) { |
| FX_CHECK(alpha <= 1); |
| |
| auto out = slice.Clone(); |
| auto ramp_length_frames = |
| static_cast<int64_t>(alpha * static_cast<double>(slice.NumFrames()) / 2); |
| |
| for (int64_t frame = 0; frame < ramp_length_frames; ++frame) { |
| double x = static_cast<double>(frame) / static_cast<double>(ramp_length_frames); |
| double w = 0.5 * (1.0 - std::cos(M_PI * x)); |
| for (int32_t chan = 0; chan < out.format().channels(); ++chan) { |
| int64_t a = out.SampleIndex(frame, chan); |
| int64_t b = out.NumSamples() - 1 - a; |
| |
| double a_val = w * internal::SampleToDouble(out.samples()[a]); |
| double b_val = w * internal::SampleToDouble(out.samples()[b]); |
| |
| out.samples()[a] = static_cast<typename AudioBuffer<SampleFormat>::SampleT>(a_val); |
| out.samples()[b] = static_cast<typename AudioBuffer<SampleFormat>::SampleT>(b_val); |
| } |
| } |
| |
| return out; |
| } |
| |
| // Explicitly instantiate all possible implementations. |
| #define INSTANTIATE(T) \ |
| template AudioFreqResult MeasureAudioFreqs<T>(AudioBufferSlice<T>, std::unordered_set<int32_t>); \ |
| template AudioBuffer<T> MultiplyByTukeyWindow<T>(AudioBufferSlice<T>, double); |
| |
| INSTANTIATE_FOR_ALL_FORMATS(INSTANTIATE) |
| |
| } // namespace media::audio |