src/media/audio/lib/analysis/analysis.h - fuchsia - Git at Google

 // Copyright 2020 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef SRC_MEDIA_AUDIO_LIB_ANALYSIS_ANALYSIS_H_
 #define SRC_MEDIA_AUDIO_LIB_ANALYSIS_ANALYSIS_H_

 #include <lib/trace/event.h>
 #include <zircon/types.h>

 #include <limits>
 #include <unordered_map>
 #include <unordered_set>

 #include "src/media/audio/lib/format/audio_buffer.h"

 namespace media::audio {

 namespace internal {

 // Perform a Fast Fourier Transform on the provided data arrays.
 //
 // On input, real[] and imag[] contain 'buf_size' number of double-float values
 // in the time domain (such as audio samples); buf_size must be a power-of-two.
 //
 // On output, real[] and imag[] contain 'buf_size' number of double-float values
 // in frequency domain, but generally used only through buf_size/2 (per Nyquist)
 void FFT(double* real, double* imag, uint32_t buf_size);

 // Calculate phase in radians for the complex pair. Correctly handles negative
 // or zero values: range of return value is [-PI,PI], not just [-PI/2,PI/2].
 double GetPhase(double real, double imag);

 // Convert provided real-imag (cartesian) data into magn-phase (polar) format.
 // This is done with 2 in-buffers 2 two out-buffers -- NOT 2 in-out-buffers.
 // TODO(mpuryear): will clients (tests) want this transformed in-place?
 void RectangularToPolar(const double* real, const double* imag, uint32_t buf_size, double* magn,
                         double* phase = nullptr);

 void RealDFT(const double* reals, uint32_t len, double* r_freq, double* i_freq);

 void InverseDFT(double* real, double* imag, uint32_t buf_size, double* real_out);

 void InverseFFT(double* real, double* imag, uint32_t buf_size);

 }  // namespace internal

 struct AudioFreqResult {
   void Display(std::string tag = "", double magn_display_threshold = 0.0);

   // Raw list of square magnitudes for all bins up to size/2.
   std::vector<double> all_square_magnitudes;

   // Mapping from frequency -> magnitude, for each requested frequency.
   std::unordered_map<int32_t, double> magnitudes;
   // Phase in radians, for each requested frequency.
   std::unordered_map<int32_t, double> phases;
   // Total magnitude over all requested frequencies.
   // Magnitude is the root-sum-of-squares of the magnitude at all requested frequencies.
   double total_magn_signal;
   // Total magnitude over all other frequencies.
   // Magnitude is the root-sum-of-squares of the magnitude at all other frequencies.
   double total_magn_other;
 };

 // For the given audio buffer, analyze contents and return the magnitude (and phase) at the given
 // frequency. Also return magnitude of all other content. Useful for frequency response and
 // signal-to-noise. Internally uses an FFT, so slice.NumFrames() must be a power-of-two. The format
 // must have channels() == 1.
 //
 // |freq| is the number of **complete sinusoidal periods** that should perfectly fit into the
 // buffer.
 template <fuchsia::media::AudioSampleFormat SampleFormat>
 AudioFreqResult MeasureAudioFreqs(AudioBufferSlice<SampleFormat> slice,
                                   std::unordered_set<int32_t> freqs);

 // Shorthand that analyzes a single frequency.
 template <fuchsia::media::AudioSampleFormat SampleFormat>
 AudioFreqResult MeasureAudioFreq(AudioBufferSlice<SampleFormat> slice, int32_t freq) {
   auto result = MeasureAudioFreqs(slice, {freq});
   FX_DCHECK(result.total_magn_signal == result.magnitudes[freq]);
   return result;
 }

 // Compute the root-mean-square (RMS) energy of a slice. This is a measure of loudness.
 template <fuchsia::media::AudioSampleFormat SampleFormat>
 double MeasureAudioRMS(AudioBufferSlice<SampleFormat> slice) {
   FX_CHECK(slice.NumFrames() > 0);
   double sum = 0;
   for (int64_t frame = 0; frame < slice.NumFrames(); frame++) {
     for (int32_t chan = 0; chan < slice.format().channels(); chan++) {
       double s = SampleFormatTraits<SampleFormat>::ToFloat(slice.SampleAt(frame, chan));
       sum += s * s;
     }
   }
   return sqrt(sum / static_cast<double>(slice.NumSamples()));
 }

 // Locate the left edge of the first impulse in the given slice, ignoring samples quieter
 // than the given noise floor. Returns the frame index if found, and std::nullopt otherwise.
 // The given slice must have a single channel. We assume the impulse has a positive signal.
 template <fuchsia::media::AudioSampleFormat SampleFormat>
 std::optional<int64_t> FindImpulseLeadingEdge(
     AudioBufferSlice<SampleFormat> slice,
     typename SampleFormatTraits<SampleFormat>::SampleT noise_floor) {
   FX_CHECK(slice.format().channels() == 1);

   auto normalize = [](typename SampleFormatTraits<SampleFormat>::SampleT val) {
     float d = static_cast<float>(val);
     if constexpr (SampleFormat == fuchsia::media::AudioSampleFormat::UNSIGNED_8) {
       d -= 128;
     }
     return d;
   };

   // If our impulse was a single frame, we could simply find the maximum value.
   // To support wider impulses, we need to find the left edge of the impulse. We
   // do this by finding the first value such that there does not exist a value
   // more than 50% larger.
   float max_value = 0;
   for (int64_t f = 0; f < slice.NumFrames(); f++) {
     max_value = std::max(max_value, normalize(slice.SampleAt(f, 0)));
   }
   for (int64_t f = 0; f < slice.NumFrames(); f++) {
     float val = normalize(slice.SampleAt(f, 0));
     if (val <= static_cast<float>(noise_floor)) {
       continue;
     }
     if (1.5 * val > max_value) {
       return f;
     }
   }
   return std::nullopt;
 }

 struct Impulse {
   int64_t leading_edge;
   int64_t max;
   int64_t center;
   int64_t trailing_edge;
 };

 // Locate the center of the impulse in the given slice, ignoring samples quieter than the given
 // noise floor. Returns the frame index if found, and std::nullopt otherwise.
 // This function requires a one-channel slice, and it assumes there is exactly one impulse.
 // Unlike FindImpulseLeadingEdge, this func ignores sign: center/edges can be positive or negative.
 template <fuchsia::media::AudioSampleFormat SampleFormat>
 std::optional<Impulse> FindImpulse(AudioBufferSlice<SampleFormat> slice,
                                    typename SampleFormatTraits<SampleFormat>::SampleT noise_floor) {
   constexpr bool kDisplayEdgesAndCenter = false;

   FX_CHECK(slice.format().channels() == 1);

   auto normalize = [](typename SampleFormatTraits<SampleFormat>::SampleT val) {
     float norm = static_cast<float>(val);
     if constexpr (SampleFormat == fuchsia::media::AudioSampleFormat::UNSIGNED_8) {
       norm -= 0x80;
     }
     return norm;
   };

   // If our impulse was a single frame, we could simply find the maximum absolute value.
   // To support wider impulses, we need to find the left and right edges of the impulse.
   // We do this by finding the first and last values such that there does not exist a
   // value more than 50% larger.
   float max_value = 0.0f;
   int64_t max_idx;
   for (int64_t idx = 0; idx < slice.NumFrames(); idx++) {
     float val = std::abs(normalize(slice.SampleAt(idx, 0)));
     if (val <= static_cast<float>(noise_floor)) {
       continue;
     }
     if (val > max_value) {
       // Store the index and value (ignoring sign) of the largest value in this slice.
       max_value = val;
       max_idx = idx;
     }
   }
   if (max_value == 0.0f) {
     return std::nullopt;
   }

   int64_t leading_idx = 0;
   float leading_val = 0.0f;
   for (int64_t idx = 0; idx < slice.NumFrames(); ++idx) {
     float val = std::abs(normalize(slice.SampleAt(idx, 0)));
     if (1.5 * val > max_value) {
       leading_idx = idx;
       leading_val = val;
       break;
     }
   }

   int64_t trailing_idx = slice.NumFrames() - 1;
   float trailing_val = 0.0f;
   for (int64_t idx = slice.NumFrames() - 1; idx >= 0; --idx) {
     float val = std::abs(normalize(slice.SampleAt(idx, 0)));
     if (1.5 * val > max_value) {
       trailing_idx = idx;
       trailing_val = val;
       break;
     }
   }
   int64_t sum_idx = leading_idx + trailing_idx;
   int64_t center_idx = sum_idx / 2;
   // Round center_idx toward the greater of (leading_val, trailing_val), ignoring signs.
   if ((sum_idx & 0x01) && trailing_val > leading_val) {
     center_idx++;
   }

   if constexpr (kDisplayEdgesAndCenter) {
     std::stringstream edge_values;
     edge_values << "   [" << std::setw(5) << slice.start_frame() + leading_idx << "]"
                 << std::setw(10) << leading_val << " | [" << std::setw(5)
                 << slice.start_frame() + center_idx << "]" << std::setw(10)
                 << normalize(slice.SampleAt(center_idx, 0));
     if ((sum_idx & 0x01) && leading_val < trailing_val) {
       edge_values << " | [" << std::setw(5) << slice.start_frame() + center_idx << "]"
                   << std::setw(10) << normalize(slice.SampleAt(center_idx, 0));
     }
     FX_LOGS(INFO) << edge_values.str() << " | [" << std::setw(5)
                   << slice.start_frame() + trailing_idx << "]" << std::setw(10) << trailing_val;
   }

   return {{
       .leading_edge = leading_idx,
       .max = max_idx,
       .center = center_idx,
       .trailing_edge = trailing_idx,
   }};
 }

 // Multiply the input buffer by a Tukey window, producing a new output buffer. A Tukey window
 // contains a ramp up from zero, followed by a flat top of 1.0, followed by a ramp down to zero.
 // The total width of the up and down ramps is described by the alpha parameter, which must be <= 1.
 template <fuchsia::media::AudioSampleFormat SampleFormat>
 AudioBuffer<SampleFormat> MultiplyByTukeyWindow(AudioBufferSlice<SampleFormat> slice, double alpha);

 }  // namespace media::audio

 #endif  // SRC_MEDIA_AUDIO_LIB_ANALYSIS_ANALYSIS_H_
	// Copyright 2020 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef SRC_MEDIA_AUDIO_LIB_ANALYSIS_ANALYSIS_H_
	#define SRC_MEDIA_AUDIO_LIB_ANALYSIS_ANALYSIS_H_

	#include <lib/trace/event.h>
	#include <zircon/types.h>

	#include <limits>
	#include <unordered_map>
	#include <unordered_set>

	#include "src/media/audio/lib/format/audio_buffer.h"

	namespace media::audio {

	namespace internal {

	// Perform a Fast Fourier Transform on the provided data arrays.
	//
	// On input, real[] and imag[] contain 'buf_size' number of double-float values
	// in the time domain (such as audio samples); buf_size must be a power-of-two.
	//
	// On output, real[] and imag[] contain 'buf_size' number of double-float values
	// in frequency domain, but generally used only through buf_size/2 (per Nyquist)
	void FFT(double* real, double* imag, uint32_t buf_size);

	// Calculate phase in radians for the complex pair. Correctly handles negative
	// or zero values: range of return value is [-PI,PI], not just [-PI/2,PI/2].
	double GetPhase(double real, double imag);

	// Convert provided real-imag (cartesian) data into magn-phase (polar) format.
	// This is done with 2 in-buffers 2 two out-buffers -- NOT 2 in-out-buffers.
	// TODO(mpuryear): will clients (tests) want this transformed in-place?
	void RectangularToPolar(const double* real, const double* imag, uint32_t buf_size, double* magn,
	double* phase = nullptr);

	void RealDFT(const double* reals, uint32_t len, double* r_freq, double* i_freq);

	void InverseDFT(double* real, double* imag, uint32_t buf_size, double* real_out);

	void InverseFFT(double* real, double* imag, uint32_t buf_size);

	} // namespace internal

	struct AudioFreqResult {
	void Display(std::string tag = "", double magn_display_threshold = 0.0);

	// Raw list of square magnitudes for all bins up to size/2.
	std::vector<double> all_square_magnitudes;

	// Mapping from frequency -> magnitude, for each requested frequency.
	std::unordered_map<int32_t, double> magnitudes;
	// Phase in radians, for each requested frequency.
	std::unordered_map<int32_t, double> phases;
	// Total magnitude over all requested frequencies.
	// Magnitude is the root-sum-of-squares of the magnitude at all requested frequencies.
	double total_magn_signal;
	// Total magnitude over all other frequencies.
	// Magnitude is the root-sum-of-squares of the magnitude at all other frequencies.
	double total_magn_other;
	};

	// For the given audio buffer, analyze contents and return the magnitude (and phase) at the given
	// frequency. Also return magnitude of all other content. Useful for frequency response and
	// signal-to-noise. Internally uses an FFT, so slice.NumFrames() must be a power-of-two. The format
	// must have channels() == 1.
	//
	// \|freq\| is the number of complete sinusoidal periods that should perfectly fit into the
	// buffer.
	template <fuchsia::media::AudioSampleFormat SampleFormat>
	AudioFreqResult MeasureAudioFreqs(AudioBufferSlice<SampleFormat> slice,
	std::unordered_set<int32_t> freqs);

	// Shorthand that analyzes a single frequency.
	template <fuchsia::media::AudioSampleFormat SampleFormat>
	AudioFreqResult MeasureAudioFreq(AudioBufferSlice<SampleFormat> slice, int32_t freq) {
	auto result = MeasureAudioFreqs(slice, {freq});
	FX_DCHECK(result.total_magn_signal == result.magnitudes[freq]);
	return result;
	}

	// Compute the root-mean-square (RMS) energy of a slice. This is a measure of loudness.
	template <fuchsia::media::AudioSampleFormat SampleFormat>
	double MeasureAudioRMS(AudioBufferSlice<SampleFormat> slice) {
	FX_CHECK(slice.NumFrames() > 0);
	double sum = 0;
	for (int64_t frame = 0; frame < slice.NumFrames(); frame++) {
	for (int32_t chan = 0; chan < slice.format().channels(); chan++) {
	double s = SampleFormatTraits<SampleFormat>::ToFloat(slice.SampleAt(frame, chan));
	sum += s * s;
	}
	}
	return sqrt(sum / static_cast<double>(slice.NumSamples()));
	}

	// Locate the left edge of the first impulse in the given slice, ignoring samples quieter
	// than the given noise floor. Returns the frame index if found, and std::nullopt otherwise.
	// The given slice must have a single channel. We assume the impulse has a positive signal.
	template <fuchsia::media::AudioSampleFormat SampleFormat>
	std::optional<int64_t> FindImpulseLeadingEdge(
	AudioBufferSlice<SampleFormat> slice,
	typename SampleFormatTraits<SampleFormat>::SampleT noise_floor) {
	FX_CHECK(slice.format().channels() == 1);

	auto normalize = [](typename SampleFormatTraits<SampleFormat>::SampleT val) {
	float d = static_cast<float>(val);
	if constexpr (SampleFormat == fuchsia::media::AudioSampleFormat::UNSIGNED_8) {
	d -= 128;
	}
	return d;
	};

	// If our impulse was a single frame, we could simply find the maximum value.
	// To support wider impulses, we need to find the left edge of the impulse. We
	// do this by finding the first value such that there does not exist a value
	// more than 50% larger.
	float max_value = 0;
	for (int64_t f = 0; f < slice.NumFrames(); f++) {
	max_value = std::max(max_value, normalize(slice.SampleAt(f, 0)));
	}
	for (int64_t f = 0; f < slice.NumFrames(); f++) {
	float val = normalize(slice.SampleAt(f, 0));
	if (val <= static_cast<float>(noise_floor)) {
	continue;
	}
	if (1.5 * val > max_value) {
	return f;
	}
	}
	return std::nullopt;
	}

	struct Impulse {
	int64_t leading_edge;
	int64_t max;
	int64_t center;
	int64_t trailing_edge;
	};

	// Locate the center of the impulse in the given slice, ignoring samples quieter than the given
	// noise floor. Returns the frame index if found, and std::nullopt otherwise.
	// This function requires a one-channel slice, and it assumes there is exactly one impulse.
	// Unlike FindImpulseLeadingEdge, this func ignores sign: center/edges can be positive or negative.
	template <fuchsia::media::AudioSampleFormat SampleFormat>
	std::optional<Impulse> FindImpulse(AudioBufferSlice<SampleFormat> slice,
	typename SampleFormatTraits<SampleFormat>::SampleT noise_floor) {
	constexpr bool kDisplayEdgesAndCenter = false;

	FX_CHECK(slice.format().channels() == 1);

	auto normalize = [](typename SampleFormatTraits<SampleFormat>::SampleT val) {
	float norm = static_cast<float>(val);
	if constexpr (SampleFormat == fuchsia::media::AudioSampleFormat::UNSIGNED_8) {
	norm -= 0x80;
	}
	return norm;
	};

	// If our impulse was a single frame, we could simply find the maximum absolute value.
	// To support wider impulses, we need to find the left and right edges of the impulse.
	// We do this by finding the first and last values such that there does not exist a
	// value more than 50% larger.
	float max_value = 0.0f;
	int64_t max_idx;
	for (int64_t idx = 0; idx < slice.NumFrames(); idx++) {
	float val = std::abs(normalize(slice.SampleAt(idx, 0)));
	if (val <= static_cast<float>(noise_floor)) {
	continue;
	}
	if (val > max_value) {
	// Store the index and value (ignoring sign) of the largest value in this slice.
	max_value = val;
	max_idx = idx;
	}
	}
	if (max_value == 0.0f) {
	return std::nullopt;
	}

	int64_t leading_idx = 0;
	float leading_val = 0.0f;
	for (int64_t idx = 0; idx < slice.NumFrames(); ++idx) {
	float val = std::abs(normalize(slice.SampleAt(idx, 0)));
	if (1.5 * val > max_value) {
	leading_idx = idx;
	leading_val = val;
	break;
	}
	}

	int64_t trailing_idx = slice.NumFrames() - 1;
	float trailing_val = 0.0f;
	for (int64_t idx = slice.NumFrames() - 1; idx >= 0; --idx) {
	float val = std::abs(normalize(slice.SampleAt(idx, 0)));
	if (1.5 * val > max_value) {
	trailing_idx = idx;
	trailing_val = val;
	break;
	}
	}
	int64_t sum_idx = leading_idx + trailing_idx;
	int64_t center_idx = sum_idx / 2;
	// Round center_idx toward the greater of (leading_val, trailing_val), ignoring signs.
	if ((sum_idx & 0x01) && trailing_val > leading_val) {
	center_idx++;
	}

	if constexpr (kDisplayEdgesAndCenter) {
	std::stringstream edge_values;
	edge_values << " [" << std::setw(5) << slice.start_frame() + leading_idx << "]"
	<< std::setw(10) << leading_val << " \| [" << std::setw(5)
	<< slice.start_frame() + center_idx << "]" << std::setw(10)
	<< normalize(slice.SampleAt(center_idx, 0));
	if ((sum_idx & 0x01) && leading_val < trailing_val) {
	edge_values << " \| [" << std::setw(5) << slice.start_frame() + center_idx << "]"
	<< std::setw(10) << normalize(slice.SampleAt(center_idx, 0));
	}
	FX_LOGS(INFO) << edge_values.str() << " \| [" << std::setw(5)
	<< slice.start_frame() + trailing_idx << "]" << std::setw(10) << trailing_val;
	}

	return {{
	.leading_edge = leading_idx,
	.max = max_idx,
	.center = center_idx,
	.trailing_edge = trailing_idx,
	}};
	}

	// Multiply the input buffer by a Tukey window, producing a new output buffer. A Tukey window
	// contains a ramp up from zero, followed by a flat top of 1.0, followed by a ramp down to zero.
	// The total width of the up and down ramps is described by the alpha parameter, which must be <= 1.
	template <fuchsia::media::AudioSampleFormat SampleFormat>
	AudioBuffer<SampleFormat> MultiplyByTukeyWindow(AudioBufferSlice<SampleFormat> slice, double alpha);

	} // namespace media::audio

	#endif // SRC_MEDIA_AUDIO_LIB_ANALYSIS_ANALYSIS_H_