libavfilter/af_apsyclip.c - third_party/ffmpeg - Git at Google

 /*
  * Copyright (c) 2014 - 2021 Jason Jang
  * Copyright (c) 2021 Paul B Mahol
  *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
  * as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public License
  * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */

 #include "libavutil/opt.h"
 #include "libavutil/tx.h"
 #include "audio.h"
 #include "avfilter.h"
 #include "filters.h"
 #include "internal.h"

 typedef struct AudioPsyClipContext {
     const AVClass *class;

     double level_in;
     double level_out;
     double clip_level;
     double adaptive;
     int auto_level;
     int diff_only;
     int iterations;
     char *protections_str;
     double *protections;

     int num_psy_bins;
     int fft_size;
     int overlap;
     int channels;

     int spread_table_rows;
     int *spread_table_index;
     int (*spread_table_range)[2];
     float *window, *inv_window, *spread_table, *margin_curve;

     AVFrame *in;
     AVFrame *in_buffer;
     AVFrame *in_frame;
     AVFrame *out_dist_frame;
     AVFrame *windowed_frame;
     AVFrame *clipping_delta;
     AVFrame *spectrum_buf;
     AVFrame *mask_curve;

     AVTXContext **tx_ctx;
     av_tx_fn tx_fn;
     AVTXContext **itx_ctx;
     av_tx_fn itx_fn;
 } AudioPsyClipContext;

 #define OFFSET(x) offsetof(AudioPsyClipContext, x)
 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM

 static const AVOption apsyclip_options[] = {
     { "level_in",   "set input level",         OFFSET(level_in),   AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625,   64, FLAGS },
     { "level_out",  "set output level",        OFFSET(level_out),  AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625,   64, FLAGS },
     { "clip",       "set clip level",          OFFSET(clip_level), AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625,    1, FLAGS },
     { "diff",       "enable difference",       OFFSET(diff_only),  AV_OPT_TYPE_BOOL,   {.i64=0},      0,    1, FLAGS },
     { "adaptive",   "set adaptive distortion", OFFSET(adaptive),   AV_OPT_TYPE_DOUBLE, {.dbl=0.5},    0,    1, FLAGS },
     { "iterations", "set iterations",          OFFSET(iterations), AV_OPT_TYPE_INT,    {.i64=10},     1,   20, FLAGS },
     { "level",      "set auto level",          OFFSET(auto_level), AV_OPT_TYPE_BOOL,   {.i64=0},      0,    1, FLAGS },
     {NULL}
 };

 AVFILTER_DEFINE_CLASS(apsyclip);

 static void generate_hann_window(float *window, float *inv_window, int size)
 {
     for (int i = 0; i < size; i++) {
         float value = 0.5f * (1.f - cosf(2.f * M_PI * i / size));

         window[i] = value;
         // 1/window to calculate unwindowed peak.
         inv_window[i] = value > 0.1f ? 1.f / value : 0.f;
     }
 }

 static void set_margin_curve(AudioPsyClipContext *s,
                              const int (*points)[2], int num_points, int sample_rate)
 {
     int j = 0;

     s->margin_curve[0] = points[0][1];

     for (int i = 0; i < num_points - 1; i++) {
         while (j < s->fft_size / 2 + 1 && j * sample_rate / s->fft_size < points[i + 1][0]) {
             // linearly interpolate between points
             int binHz = j * sample_rate / s->fft_size;
             s->margin_curve[j] = points[i][1] + (binHz - points[i][0]) * (points[i + 1][1] - points[i][1]) / (points[i + 1][0] - points[i][0]);
             j++;
         }
     }
     // handle bins after the last point
     while (j < s->fft_size / 2 + 1) {
         s->margin_curve[j] = points[num_points - 1][1];
         j++;
     }

     // convert margin curve to linear amplitude scale
     for (j = 0; j < s->fft_size / 2 + 1; j++)
         s->margin_curve[j] = powf(10.f, s->margin_curve[j] / 20.f);
 }

 static void generate_spread_table(AudioPsyClipContext *s)
 {
     // Calculate tent-shape function in log-log scale.

     // As an optimization, only consider bins close to "bin"
     // This reduces the number of multiplications needed in calculate_mask_curve
     // The masking contribution at faraway bins is negligeable

     // Another optimization to save memory and speed up the calculation of the
     // spread table is to calculate and store only 2 spread functions per
     // octave, and reuse the same spread function for multiple bins.
     int table_index = 0;
     int bin = 0;
     int increment = 1;

     while (bin < s->num_psy_bins) {
         float sum = 0;
         int base_idx = table_index * s->num_psy_bins;
         int start_bin = bin * 3 / 4;
         int end_bin = FFMIN(s->num_psy_bins, ((bin + 1) * 4 + 2) / 3);
         int next_bin;

         for (int j = start_bin; j < end_bin; j++) {
             // add 0.5 so i=0 doesn't get log(0)
             float rel_idx_log = FFABS(logf((j + 0.5f) / (bin + 0.5f)));
             float value;
             if (j >= bin) {
                 // mask up
                 value = expf(-rel_idx_log * 40.f);
             } else {
                 // mask down
                 value = expf(-rel_idx_log * 80.f);
             }
             // the spreading function is centred in the row
             sum += value;
             s->spread_table[base_idx + s->num_psy_bins / 2 + j - bin] = value;
         }
         // now normalize it
         for (int j = start_bin; j < end_bin; j++) {
             s->spread_table[base_idx + s->num_psy_bins / 2 + j - bin] /= sum;
         }

         s->spread_table_range[table_index][0] = start_bin - bin;
         s->spread_table_range[table_index][1] = end_bin - bin;

         if (bin <= 1) {
             next_bin = bin + 1;
         } else {
             if ((bin & (bin - 1)) == 0) {
                 // power of 2
                 increment = bin / 2;
             }

             next_bin = bin + increment;
         }

         // set bins between "bin" and "next_bin" to use this table_index
         for (int i = bin; i < next_bin; i++)
             s->spread_table_index[i] = table_index;

         bin = next_bin;
         table_index++;
     }
 }

 static int config_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
     AudioPsyClipContext *s = ctx->priv;
     static const int points[][2] = { {0,14}, {125,14}, {250,16}, {500,18}, {1000,20}, {2000,20}, {4000,20}, {8000,17}, {16000,14}, {20000,-10} };
     static const int num_points = 10;
     float scale = 1.f;
     int ret;

     s->fft_size = inlink->sample_rate > 100000 ? 1024 : inlink->sample_rate > 50000 ? 512 : 256;
     s->overlap = s->fft_size / 4;

     // The psy masking calculation is O(n^2),
     // so skip it for frequencies not covered by base sampling rantes (i.e. 44k)
     if (inlink->sample_rate <= 50000) {
         s->num_psy_bins = s->fft_size / 2;
     } else if (inlink->sample_rate <= 100000) {
         s->num_psy_bins = s->fft_size / 4;
     } else {
         s->num_psy_bins = s->fft_size / 8;
     }

     s->window = av_calloc(s->fft_size, sizeof(*s->window));
     s->inv_window = av_calloc(s->fft_size, sizeof(*s->inv_window));
     if (!s->window || !s->inv_window)
         return AVERROR(ENOMEM);

     s->in_buffer      = ff_get_audio_buffer(inlink, s->fft_size * 2);
     s->in_frame       = ff_get_audio_buffer(inlink, s->fft_size * 2);
     s->out_dist_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
     s->windowed_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
     s->clipping_delta = ff_get_audio_buffer(inlink, s->fft_size * 2);
     s->spectrum_buf   = ff_get_audio_buffer(inlink, s->fft_size * 2);
     s->mask_curve     = ff_get_audio_buffer(inlink, s->fft_size / 2 + 1);
     if (!s->in_buffer || !s->in_frame ||
         !s->out_dist_frame || !s->windowed_frame ||
         !s->clipping_delta || !s->spectrum_buf || !s->mask_curve)
         return AVERROR(ENOMEM);

     generate_hann_window(s->window, s->inv_window, s->fft_size);

     s->margin_curve = av_calloc(s->fft_size / 2 + 1, sizeof(*s->margin_curve));
     if (!s->margin_curve)
         return AVERROR(ENOMEM);

     s->spread_table_rows = av_log2(s->num_psy_bins) * 2;
     s->spread_table = av_calloc(s->spread_table_rows * s->num_psy_bins, sizeof(*s->spread_table));
     if (!s->spread_table)
         return AVERROR(ENOMEM);

     s->spread_table_range = av_calloc(s->spread_table_rows * 2, sizeof(*s->spread_table_range));
     if (!s->spread_table_range)
         return AVERROR(ENOMEM);

     s->spread_table_index = av_calloc(s->num_psy_bins, sizeof(*s->spread_table_index));
     if (!s->spread_table_index)
         return AVERROR(ENOMEM);

     set_margin_curve(s, points, num_points, inlink->sample_rate);

     generate_spread_table(s);

     s->channels = inlink->ch_layout.nb_channels;

     s->tx_ctx = av_calloc(s->channels, sizeof(*s->tx_ctx));
     s->itx_ctx = av_calloc(s->channels, sizeof(*s->itx_ctx));
     if (!s->tx_ctx || !s->itx_ctx)
         return AVERROR(ENOMEM);

     for (int ch = 0; ch < s->channels; ch++) {
         ret = av_tx_init(&s->tx_ctx[ch], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->fft_size, &scale, 0);
         if (ret < 0)
             return ret;

         ret = av_tx_init(&s->itx_ctx[ch], &s->itx_fn, AV_TX_FLOAT_FFT, 1, s->fft_size, &scale, 0);
         if (ret < 0)
             return ret;
     }

     return 0;
 }

 static void apply_window(AudioPsyClipContext *s,
                          const float *in_frame, float *out_frame, const int add_to_out_frame)
 {
     const float *window = s->window;

     for (int i = 0; i < s->fft_size; i++) {
         if (add_to_out_frame) {
             out_frame[i] += in_frame[i] * window[i];
         } else {
             out_frame[i] = in_frame[i] * window[i];
         }
     }
 }

 static void calculate_mask_curve(AudioPsyClipContext *s,
                                  const float *spectrum, float *mask_curve)
 {
     for (int i = 0; i < s->fft_size / 2 + 1; i++)
         mask_curve[i] = 0;

     for (int i = 0; i < s->num_psy_bins; i++) {
         int base_idx, start_bin, end_bin, table_idx;
         float magnitude;
         int range[2];

         if (i == 0) {
             magnitude = FFABS(spectrum[0]);
         } else if (i == s->fft_size / 2) {
             magnitude = FFABS(spectrum[s->fft_size]);
         } else {
             // Because the input signal is real, the + and - frequencies are redundant.
             // Multiply the magnitude by 2 to simulate adding up the + and - frequencies.
             magnitude = hypotf(spectrum[2 * i], spectrum[2 * i + 1]) * 2;
         }

         table_idx = s->spread_table_index[i];
         range[0] = s->spread_table_range[table_idx][0];
         range[1] = s->spread_table_range[table_idx][1];
         base_idx = table_idx * s->num_psy_bins;
         start_bin = FFMAX(0, i + range[0]);
         end_bin = FFMIN(s->num_psy_bins, i + range[1]);

         for (int j = start_bin; j < end_bin; j++)
             mask_curve[j] += s->spread_table[base_idx + s->num_psy_bins / 2 + j - i] * magnitude;
     }

     // for ultrasonic frequencies, skip the O(n^2) spread calculation and just copy the magnitude
     for (int i = s->num_psy_bins; i < s->fft_size / 2 + 1; i++) {
         float magnitude;
         if (i == s->fft_size / 2) {
             magnitude = FFABS(spectrum[s->fft_size]);
         } else {
             // Because the input signal is real, the + and - frequencies are redundant.
             // Multiply the magnitude by 2 to simulate adding up the + and - frequencies.
             magnitude = hypotf(spectrum[2 * i], spectrum[2 * i + 1]) * 2;
         }

         mask_curve[i] = magnitude;
     }

     for (int i = 0; i < s->fft_size / 2 + 1; i++)
         mask_curve[i] = mask_curve[i] / s->margin_curve[i];
 }

 static void clip_to_window(AudioPsyClipContext *s,
                            const float *windowed_frame, float *clipping_delta, float delta_boost)
 {
     const float *window = s->window;

     for (int i = 0; i < s->fft_size; i++) {
         const float limit = s->clip_level * window[i];
         const float effective_value = windowed_frame[i] + clipping_delta[i];

         if (effective_value > limit) {
             clipping_delta[i] += (limit - effective_value) * delta_boost;
         } else if (effective_value < -limit) {
             clipping_delta[i] += (-limit - effective_value) * delta_boost;
         }
     }
 }

 static void limit_clip_spectrum(AudioPsyClipContext *s,
                                 float *clip_spectrum, const float *mask_curve)
 {
     // bin 0
     float relative_distortion_level = FFABS(clip_spectrum[0]) / mask_curve[0];

     if (relative_distortion_level > 1.f)
         clip_spectrum[0] /= relative_distortion_level;

     // bin 1..N/2-1
     for (int i = 1; i < s->fft_size / 2; i++) {
         float real = clip_spectrum[i * 2];
         float imag = clip_spectrum[i * 2 + 1];
         // Because the input signal is real, the + and - frequencies are redundant.
         // Multiply the magnitude by 2 to simulate adding up the + and - frequencies.
         relative_distortion_level = hypotf(real, imag) * 2 / mask_curve[i];
         if (relative_distortion_level > 1.0) {
             clip_spectrum[i * 2] /= relative_distortion_level;
             clip_spectrum[i * 2 + 1] /= relative_distortion_level;
             clip_spectrum[s->fft_size * 2 - i * 2] /= relative_distortion_level;
             clip_spectrum[s->fft_size * 2 - i * 2 + 1] /= relative_distortion_level;
         }
     }
     // bin N/2
     relative_distortion_level = FFABS(clip_spectrum[s->fft_size]) / mask_curve[s->fft_size / 2];
     if (relative_distortion_level > 1.f)
         clip_spectrum[s->fft_size] /= relative_distortion_level;
 }

 static void r2c(float *buffer, int size)
 {
     for (int i = size - 1; i >= 0; i--)
         buffer[2 * i] = buffer[i];

     for (int i = size - 1; i >= 0; i--)
         buffer[2 * i + 1] = 0.f;
 }

 static void c2r(float *buffer, int size)
 {
     for (int i = 0; i < size; i++)
         buffer[i] = buffer[2 * i];

     for (int i = 0; i < size; i++)
         buffer[i + size] = 0.f;
 }

 static void feed(AVFilterContext *ctx, int ch,
                  const float *in_samples, float *out_samples, int diff_only,
                  float *in_frame, float *out_dist_frame,
                  float *windowed_frame, float *clipping_delta,
                  float *spectrum_buf, float *mask_curve)
 {
     AudioPsyClipContext *s = ctx->priv;
     const float clip_level_inv = 1.f / s->clip_level;
     const float level_out = s->level_out;
     float orig_peak = 0;
     float peak;

     // shift in/out buffers
     for (int i = 0; i < s->fft_size - s->overlap; i++) {
         in_frame[i] = in_frame[i + s->overlap];
         out_dist_frame[i] = out_dist_frame[i + s->overlap];
     }

     for (int i = 0; i < s->overlap; i++) {
         in_frame[i + s->fft_size - s->overlap] = in_samples[i];
         out_dist_frame[i + s->fft_size - s->overlap] = 0.f;
     }

     apply_window(s, in_frame, windowed_frame, 0);
     r2c(windowed_frame, s->fft_size);
     s->tx_fn(s->tx_ctx[ch], spectrum_buf, windowed_frame, sizeof(AVComplexFloat));
     c2r(windowed_frame, s->fft_size);
     calculate_mask_curve(s, spectrum_buf, mask_curve);

     // It would be easier to calculate the peak from the unwindowed input.
     // This is just for consistency with the clipped peak calculateion
     // because the inv_window zeros out samples on the edge of the window.
     for (int i = 0; i < s->fft_size; i++)
         orig_peak = FFMAX(orig_peak, FFABS(windowed_frame[i] * s->inv_window[i]));
     orig_peak *= clip_level_inv;
     peak = orig_peak;

     // clear clipping_delta
     for (int i = 0; i < s->fft_size * 2; i++)
         clipping_delta[i] = 0.f;

     // repeat clipping-filtering process a few times to control both the peaks and the spectrum
     for (int i = 0; i < s->iterations; i++) {
         float mask_curve_shift = 1.122f; // 1.122 is 1dB
         // The last 1/3 of rounds have boosted delta to help reach the peak target faster
         float delta_boost = 1.f;
         if (i >= s->iterations - s->iterations / 3) {
             // boosting the delta when largs peaks are still present is dangerous
             if (peak < 2.f)
                 delta_boost = 2.f;
         }

         clip_to_window(s, windowed_frame, clipping_delta, delta_boost);

         r2c(clipping_delta, s->fft_size);
         s->tx_fn(s->tx_ctx[ch], spectrum_buf, clipping_delta, sizeof(AVComplexFloat));

         limit_clip_spectrum(s, spectrum_buf, mask_curve);

         s->itx_fn(s->itx_ctx[ch], clipping_delta, spectrum_buf, sizeof(AVComplexFloat));
         c2r(clipping_delta, s->fft_size);

         for (int i = 0; i < s->fft_size; i++)
             clipping_delta[i] /= s->fft_size;

         peak = 0;
         for (int i = 0; i < s->fft_size; i++)
             peak = FFMAX(peak, FFABS((windowed_frame[i] + clipping_delta[i]) * s->inv_window[i]));
         peak *= clip_level_inv;

         // Automatically adjust mask_curve as necessary to reach peak target
         if (orig_peak > 1.f && peak > 1.f) {
             float diff_achieved = orig_peak - peak;
             if (i + 1 < s->iterations - s->iterations / 3 && diff_achieved > 0) {
                 float diff_needed = orig_peak - 1.f;
                 float diff_ratio = diff_needed / diff_achieved;
                 // If a good amount of peak reduction was already achieved,
                 // don't shift the mask_curve by the full peak value
                 // On the other hand, if only a little peak reduction was achieved,
                 // don't shift the mask_curve by the enormous diff_ratio.
                 diff_ratio = FFMIN(diff_ratio, peak);
                 mask_curve_shift = FFMAX(mask_curve_shift, diff_ratio);
             } else {
                 // If the peak got higher than the input or we are in the last 1/3 rounds,
                 // go back to the heavy-handed peak heuristic.
                 mask_curve_shift = FFMAX(mask_curve_shift, peak);
             }
         }

         mask_curve_shift = 1.f + (mask_curve_shift - 1.f) * s->adaptive;

         // Be less strict in the next iteration.
         // This helps with peak control.
         for (int i = 0; i < s->fft_size / 2 + 1; i++)
             mask_curve[i] *= mask_curve_shift;
     }

     // do overlap & add
     apply_window(s, clipping_delta, out_dist_frame, 1);

     for (int i = 0; i < s->overlap; i++) {
         // 4 times overlap with squared hanning window results in 1.5 time increase in amplitude
         if (!ctx->is_disabled) {
             out_samples[i] = out_dist_frame[i] / 1.5f;
             if (!diff_only)
                 out_samples[i] += in_frame[i];
             if (s->auto_level)
                 out_samples[i] *= clip_level_inv;
             out_samples[i] *= level_out;
         } else {
             out_samples[i] = in_frame[i];
         }
     }
 }

 static int psy_channel(AVFilterContext *ctx, AVFrame *in, AVFrame *out, int ch)
 {
     AudioPsyClipContext *s = ctx->priv;
     const float *src = (const float *)in->extended_data[ch];
     float *in_buffer = (float *)s->in_buffer->extended_data[ch];
     float *dst = (float *)out->extended_data[ch];

     for (int n = 0; n < s->overlap; n++)
         in_buffer[n] = src[n] * s->level_in;

     feed(ctx, ch, in_buffer, dst, s->diff_only,
          (float *)(s->in_frame->extended_data[ch]),
          (float *)(s->out_dist_frame->extended_data[ch]),
          (float *)(s->windowed_frame->extended_data[ch]),
          (float *)(s->clipping_delta->extended_data[ch]),
          (float *)(s->spectrum_buf->extended_data[ch]),
          (float *)(s->mask_curve->extended_data[ch]));

     return 0;
 }

 static int psy_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
     AudioPsyClipContext *s = ctx->priv;
     AVFrame *out = arg;
     const int start = (out->ch_layout.nb_channels * jobnr) / nb_jobs;
     const int end = (out->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;

     for (int ch = start; ch < end; ch++)
         psy_channel(ctx, s->in, out, ch);

     return 0;
 }

 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
     AVFilterLink *outlink = ctx->outputs[0];
     AudioPsyClipContext *s = ctx->priv;
     AVFrame *out;
     int ret;

     out = ff_get_audio_buffer(outlink, s->overlap);
     if (!out) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }

     s->in = in;
     av_frame_copy_props(out, in);
     ff_filter_execute(ctx, psy_channels, out, NULL,
                       FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));

     out->pts = in->pts;
     out->nb_samples = in->nb_samples;
     ret = ff_filter_frame(outlink, out);
 fail:
     av_frame_free(&in);
     s->in = NULL;
     return ret < 0 ? ret : 0;
 }

 static int activate(AVFilterContext *ctx)
 {
     AVFilterLink *inlink = ctx->inputs[0];
     AVFilterLink *outlink = ctx->outputs[0];
     AudioPsyClipContext *s = ctx->priv;
     AVFrame *in = NULL;
     int ret = 0, status;
     int64_t pts;

     FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);

     ret = ff_inlink_consume_samples(inlink, s->overlap, s->overlap, &in);
     if (ret < 0)
         return ret;

     if (ret > 0) {
         return filter_frame(inlink, in);
     } else if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
         ff_outlink_set_status(outlink, status, pts);
         return 0;
     } else {
         if (ff_inlink_queued_samples(inlink) >= s->overlap) {
             ff_filter_set_ready(ctx, 10);
         } else if (ff_outlink_frame_wanted(outlink)) {
             ff_inlink_request_frame(inlink);
         }
         return 0;
     }
 }

 static av_cold void uninit(AVFilterContext *ctx)
 {
     AudioPsyClipContext *s = ctx->priv;

     av_freep(&s->window);
     av_freep(&s->inv_window);
     av_freep(&s->spread_table);
     av_freep(&s->spread_table_range);
     av_freep(&s->spread_table_index);
     av_freep(&s->margin_curve);

     av_frame_free(&s->in_buffer);
     av_frame_free(&s->in_frame);
     av_frame_free(&s->out_dist_frame);
     av_frame_free(&s->windowed_frame);
     av_frame_free(&s->clipping_delta);
     av_frame_free(&s->spectrum_buf);
     av_frame_free(&s->mask_curve);

     for (int ch = 0; ch < s->channels; ch++) {
         if (s->tx_ctx)
             av_tx_uninit(&s->tx_ctx[ch]);
         if (s->itx_ctx)
             av_tx_uninit(&s->itx_ctx[ch]);
     }

     av_freep(&s->tx_ctx);
     av_freep(&s->itx_ctx);
 }

 static const AVFilterPad inputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_AUDIO,
         .config_props = config_input,
     },
 };

 static const AVFilterPad outputs[] = {
     {
         .name = "default",
         .type = AVMEDIA_TYPE_AUDIO,
     },
 };

 const AVFilter ff_af_apsyclip = {
     .name            = "apsyclip",
     .description     = NULL_IF_CONFIG_SMALL("Audio Psychoacoustic Clipper."),
     .priv_size       = sizeof(AudioPsyClipContext),
     .priv_class      = &apsyclip_class,
     .uninit          = uninit,
     FILTER_INPUTS(inputs),
     FILTER_OUTPUTS(outputs),
     FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP),
     .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
                        AVFILTER_FLAG_SLICE_THREADS,
     .activate        = activate,
     .process_command = ff_filter_process_command,
 };
	/*
	* Copyright (c) 2014 - 2021 Jason Jang
	* Copyright (c) 2021 Paul B Mahol
	*
	* This file is part of FFmpeg.
	*
	* FFmpeg is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public License
	* as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* FFmpeg is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public License
	* along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
	* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	#include "libavutil/opt.h"
	#include "libavutil/tx.h"
	#include "audio.h"
	#include "avfilter.h"
	#include "filters.h"
	#include "internal.h"

	typedef struct AudioPsyClipContext {
	const AVClass *class;

	double level_in;
	double level_out;
	double clip_level;
	double adaptive;
	int auto_level;
	int diff_only;
	int iterations;
	char *protections_str;
	double *protections;

	int num_psy_bins;
	int fft_size;
	int overlap;
	int channels;

	int spread_table_rows;
	int *spread_table_index;
	int (*spread_table_range)[2];
	float window, inv_window, spread_table, margin_curve;

	AVFrame *in;
	AVFrame *in_buffer;
	AVFrame *in_frame;
	AVFrame *out_dist_frame;
	AVFrame *windowed_frame;
	AVFrame *clipping_delta;
	AVFrame *spectrum_buf;
	AVFrame *mask_curve;

	AVTXContext **tx_ctx;
	av_tx_fn tx_fn;
	AVTXContext **itx_ctx;
	av_tx_fn itx_fn;
	} AudioPsyClipContext;

	#define OFFSET(x) offsetof(AudioPsyClipContext, x)
	#define FLAGS AV_OPT_FLAG_AUDIO_PARAM \| AV_OPT_FLAG_FILTERING_PARAM \| AV_OPT_FLAG_RUNTIME_PARAM

	static const AVOption apsyclip_options[] = {
	{ "level_in", "set input level", OFFSET(level_in), AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625, 64, FLAGS },
	{ "level_out", "set output level", OFFSET(level_out), AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625, 64, FLAGS },
	{ "clip", "set clip level", OFFSET(clip_level), AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625, 1, FLAGS },
	{ "diff", "enable difference", OFFSET(diff_only), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
	{ "adaptive", "set adaptive distortion", OFFSET(adaptive), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 1, FLAGS },
	{ "iterations", "set iterations", OFFSET(iterations), AV_OPT_TYPE_INT, {.i64=10}, 1, 20, FLAGS },
	{ "level", "set auto level", OFFSET(auto_level), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
	{NULL}
	};

	AVFILTER_DEFINE_CLASS(apsyclip);

	static void generate_hann_window(float window, float inv_window, int size)
	{
	for (int i = 0; i < size; i++) {
	float value = 0.5f * (1.f - cosf(2.f * M_PI * i / size));

	window[i] = value;
	// 1/window to calculate unwindowed peak.
	inv_window[i] = value > 0.1f ? 1.f / value : 0.f;
	}
	}

	static void set_margin_curve(AudioPsyClipContext *s,
	const int (*points)[2], int num_points, int sample_rate)
	{
	int j = 0;

	s->margin_curve[0] = points[0][1];

	for (int i = 0; i < num_points - 1; i++) {
	while (j < s->fft_size / 2 + 1 && j * sample_rate / s->fft_size < points[i + 1][0]) {
	// linearly interpolate between points
	int binHz = j * sample_rate / s->fft_size;
	s->margin_curve[j] = points[i][1] + (binHz - points[i][0]) * (points[i + 1][1] - points[i][1]) / (points[i + 1][0] - points[i][0]);
	j++;
	}
	}
	// handle bins after the last point
	while (j < s->fft_size / 2 + 1) {
	s->margin_curve[j] = points[num_points - 1][1];
	j++;
	}

	// convert margin curve to linear amplitude scale
	for (j = 0; j < s->fft_size / 2 + 1; j++)
	s->margin_curve[j] = powf(10.f, s->margin_curve[j] / 20.f);
	}

	static void generate_spread_table(AudioPsyClipContext *s)
	{
	// Calculate tent-shape function in log-log scale.

	// As an optimization, only consider bins close to "bin"
	// This reduces the number of multiplications needed in calculate_mask_curve
	// The masking contribution at faraway bins is negligeable

	// Another optimization to save memory and speed up the calculation of the
	// spread table is to calculate and store only 2 spread functions per
	// octave, and reuse the same spread function for multiple bins.
	int table_index = 0;
	int bin = 0;
	int increment = 1;

	while (bin < s->num_psy_bins) {
	float sum = 0;
	int base_idx = table_index * s->num_psy_bins;
	int start_bin = bin * 3 / 4;
	int end_bin = FFMIN(s->num_psy_bins, ((bin + 1) * 4 + 2) / 3);
	int next_bin;

	for (int j = start_bin; j < end_bin; j++) {
	// add 0.5 so i=0 doesn't get log(0)
	float rel_idx_log = FFABS(logf((j + 0.5f) / (bin + 0.5f)));
	float value;
	if (j >= bin) {
	// mask up
	value = expf(-rel_idx_log * 40.f);
	} else {
	// mask down
	value = expf(-rel_idx_log * 80.f);
	}
	// the spreading function is centred in the row
	sum += value;
	s->spread_table[base_idx + s->num_psy_bins / 2 + j - bin] = value;
	}
	// now normalize it
	for (int j = start_bin; j < end_bin; j++) {
	s->spread_table[base_idx + s->num_psy_bins / 2 + j - bin] /= sum;
	}

	s->spread_table_range[table_index][0] = start_bin - bin;
	s->spread_table_range[table_index][1] = end_bin - bin;

	if (bin <= 1) {
	next_bin = bin + 1;
	} else {
	if ((bin & (bin - 1)) == 0) {
	// power of 2
	increment = bin / 2;
	}

	next_bin = bin + increment;
	}

	// set bins between "bin" and "next_bin" to use this table_index
	for (int i = bin; i < next_bin; i++)
	s->spread_table_index[i] = table_index;

	bin = next_bin;
	table_index++;
	}
	}

	static int config_input(AVFilterLink *inlink)
	{
	AVFilterContext *ctx = inlink->dst;
	AudioPsyClipContext *s = ctx->priv;
	static const int points[][2] = { {0,14}, {125,14}, {250,16}, {500,18}, {1000,20}, {2000,20}, {4000,20}, {8000,17}, {16000,14}, {20000,-10} };
	static const int num_points = 10;
	float scale = 1.f;
	int ret;

	s->fft_size = inlink->sample_rate > 100000 ? 1024 : inlink->sample_rate > 50000 ? 512 : 256;
	s->overlap = s->fft_size / 4;

	// The psy masking calculation is O(n^2),
	// so skip it for frequencies not covered by base sampling rantes (i.e. 44k)
	if (inlink->sample_rate <= 50000) {
	s->num_psy_bins = s->fft_size / 2;
	} else if (inlink->sample_rate <= 100000) {
	s->num_psy_bins = s->fft_size / 4;
	} else {
	s->num_psy_bins = s->fft_size / 8;
	}

	s->window = av_calloc(s->fft_size, sizeof(*s->window));
	s->inv_window = av_calloc(s->fft_size, sizeof(*s->inv_window));
	if (!s->window \|\| !s->inv_window)
	return AVERROR(ENOMEM);

	s->in_buffer = ff_get_audio_buffer(inlink, s->fft_size * 2);
	s->in_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
	s->out_dist_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
	s->windowed_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
	s->clipping_delta = ff_get_audio_buffer(inlink, s->fft_size * 2);
	s->spectrum_buf = ff_get_audio_buffer(inlink, s->fft_size * 2);
	s->mask_curve = ff_get_audio_buffer(inlink, s->fft_size / 2 + 1);
	if (!s->in_buffer \|\| !s->in_frame \|\|
	!s->out_dist_frame \|\| !s->windowed_frame \|\|
	!s->clipping_delta \|\| !s->spectrum_buf \|\| !s->mask_curve)
	return AVERROR(ENOMEM);

	generate_hann_window(s->window, s->inv_window, s->fft_size);

	s->margin_curve = av_calloc(s->fft_size / 2 + 1, sizeof(*s->margin_curve));
	if (!s->margin_curve)
	return AVERROR(ENOMEM);

	s->spread_table_rows = av_log2(s->num_psy_bins) * 2;
	s->spread_table = av_calloc(s->spread_table_rows * s->num_psy_bins, sizeof(*s->spread_table));
	if (!s->spread_table)
	return AVERROR(ENOMEM);

	s->spread_table_range = av_calloc(s->spread_table_rows * 2, sizeof(*s->spread_table_range));
	if (!s->spread_table_range)
	return AVERROR(ENOMEM);

	s->spread_table_index = av_calloc(s->num_psy_bins, sizeof(*s->spread_table_index));
	if (!s->spread_table_index)
	return AVERROR(ENOMEM);

	set_margin_curve(s, points, num_points, inlink->sample_rate);

	generate_spread_table(s);

	s->channels = inlink->ch_layout.nb_channels;

	s->tx_ctx = av_calloc(s->channels, sizeof(*s->tx_ctx));
	s->itx_ctx = av_calloc(s->channels, sizeof(*s->itx_ctx));
	if (!s->tx_ctx \|\| !s->itx_ctx)
	return AVERROR(ENOMEM);

	for (int ch = 0; ch < s->channels; ch++) {
	ret = av_tx_init(&s->tx_ctx[ch], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->fft_size, &scale, 0);
	if (ret < 0)
	return ret;

	ret = av_tx_init(&s->itx_ctx[ch], &s->itx_fn, AV_TX_FLOAT_FFT, 1, s->fft_size, &scale, 0);
	if (ret < 0)
	return ret;
	}

	return 0;
	}

	static void apply_window(AudioPsyClipContext *s,
	const float in_frame, float out_frame, const int add_to_out_frame)
	{
	const float *window = s->window;

	for (int i = 0; i < s->fft_size; i++) {
	if (add_to_out_frame) {
	out_frame[i] += in_frame[i] * window[i];
	} else {
	out_frame[i] = in_frame[i] * window[i];
	}
	}
	}

	static void calculate_mask_curve(AudioPsyClipContext *s,
	const float spectrum, float mask_curve)
	{
	for (int i = 0; i < s->fft_size / 2 + 1; i++)
	mask_curve[i] = 0;

	for (int i = 0; i < s->num_psy_bins; i++) {
	int base_idx, start_bin, end_bin, table_idx;
	float magnitude;
	int range[2];

	if (i == 0) {
	magnitude = FFABS(spectrum[0]);
	} else if (i == s->fft_size / 2) {
	magnitude = FFABS(spectrum[s->fft_size]);
	} else {
	// Because the input signal is real, the + and - frequencies are redundant.
	// Multiply the magnitude by 2 to simulate adding up the + and - frequencies.
	magnitude = hypotf(spectrum[2 * i], spectrum[2 * i + 1]) * 2;
	}

	table_idx = s->spread_table_index[i];
	range[0] = s->spread_table_range[table_idx][0];
	range[1] = s->spread_table_range[table_idx][1];
	base_idx = table_idx * s->num_psy_bins;
	start_bin = FFMAX(0, i + range[0]);
	end_bin = FFMIN(s->num_psy_bins, i + range[1]);

	for (int j = start_bin; j < end_bin; j++)
	mask_curve[j] += s->spread_table[base_idx + s->num_psy_bins / 2 + j - i] * magnitude;
	}

	// for ultrasonic frequencies, skip the O(n^2) spread calculation and just copy the magnitude
	for (int i = s->num_psy_bins; i < s->fft_size / 2 + 1; i++) {
	float magnitude;
	if (i == s->fft_size / 2) {
	magnitude = FFABS(spectrum[s->fft_size]);
	} else {
	// Because the input signal is real, the + and - frequencies are redundant.
	// Multiply the magnitude by 2 to simulate adding up the + and - frequencies.
	magnitude = hypotf(spectrum[2 * i], spectrum[2 * i + 1]) * 2;
	}

	mask_curve[i] = magnitude;
	}

	for (int i = 0; i < s->fft_size / 2 + 1; i++)
	mask_curve[i] = mask_curve[i] / s->margin_curve[i];
	}

	static void clip_to_window(AudioPsyClipContext *s,
	const float windowed_frame, float clipping_delta, float delta_boost)
	{
	const float *window = s->window;

	for (int i = 0; i < s->fft_size; i++) {
	const float limit = s->clip_level * window[i];
	const float effective_value = windowed_frame[i] + clipping_delta[i];

	if (effective_value > limit) {
	clipping_delta[i] += (limit - effective_value) * delta_boost;
	} else if (effective_value < -limit) {
	clipping_delta[i] += (-limit - effective_value) * delta_boost;
	}
	}
	}

	static void limit_clip_spectrum(AudioPsyClipContext *s,
	float clip_spectrum, const float mask_curve)
	{
	// bin 0
	float relative_distortion_level = FFABS(clip_spectrum[0]) / mask_curve[0];

	if (relative_distortion_level > 1.f)
	clip_spectrum[0] /= relative_distortion_level;

	// bin 1..N/2-1
	for (int i = 1; i < s->fft_size / 2; i++) {
	float real = clip_spectrum[i * 2];
	float imag = clip_spectrum[i * 2 + 1];
	// Because the input signal is real, the + and - frequencies are redundant.
	// Multiply the magnitude by 2 to simulate adding up the + and - frequencies.
	relative_distortion_level = hypotf(real, imag) * 2 / mask_curve[i];
	if (relative_distortion_level > 1.0) {
	clip_spectrum[i * 2] /= relative_distortion_level;
	clip_spectrum[i * 2 + 1] /= relative_distortion_level;
	clip_spectrum[s->fft_size * 2 - i * 2] /= relative_distortion_level;
	clip_spectrum[s->fft_size * 2 - i * 2 + 1] /= relative_distortion_level;
	}
	}
	// bin N/2
	relative_distortion_level = FFABS(clip_spectrum[s->fft_size]) / mask_curve[s->fft_size / 2];
	if (relative_distortion_level > 1.f)
	clip_spectrum[s->fft_size] /= relative_distortion_level;
	}

	static void r2c(float *buffer, int size)
	{
	for (int i = size - 1; i >= 0; i--)
	buffer[2 * i] = buffer[i];

	for (int i = size - 1; i >= 0; i--)
	buffer[2 * i + 1] = 0.f;
	}

	static void c2r(float *buffer, int size)
	{
	for (int i = 0; i < size; i++)
	buffer[i] = buffer[2 * i];

	for (int i = 0; i < size; i++)
	buffer[i + size] = 0.f;
	}

	static void feed(AVFilterContext *ctx, int ch,
	const float in_samples, float out_samples, int diff_only,
	float in_frame, float out_dist_frame,
	float windowed_frame, float clipping_delta,
	float spectrum_buf, float mask_curve)
	{
	AudioPsyClipContext *s = ctx->priv;
	const float clip_level_inv = 1.f / s->clip_level;
	const float level_out = s->level_out;
	float orig_peak = 0;
	float peak;

	// shift in/out buffers
	for (int i = 0; i < s->fft_size - s->overlap; i++) {
	in_frame[i] = in_frame[i + s->overlap];
	out_dist_frame[i] = out_dist_frame[i + s->overlap];
	}

	for (int i = 0; i < s->overlap; i++) {
	in_frame[i + s->fft_size - s->overlap] = in_samples[i];
	out_dist_frame[i + s->fft_size - s->overlap] = 0.f;
	}

	apply_window(s, in_frame, windowed_frame, 0);
	r2c(windowed_frame, s->fft_size);
	s->tx_fn(s->tx_ctx[ch], spectrum_buf, windowed_frame, sizeof(AVComplexFloat));
	c2r(windowed_frame, s->fft_size);
	calculate_mask_curve(s, spectrum_buf, mask_curve);

	// It would be easier to calculate the peak from the unwindowed input.
	// This is just for consistency with the clipped peak calculateion
	// because the inv_window zeros out samples on the edge of the window.
	for (int i = 0; i < s->fft_size; i++)
	orig_peak = FFMAX(orig_peak, FFABS(windowed_frame[i] * s->inv_window[i]));
	orig_peak *= clip_level_inv;
	peak = orig_peak;

	// clear clipping_delta
	for (int i = 0; i < s->fft_size * 2; i++)
	clipping_delta[i] = 0.f;

	// repeat clipping-filtering process a few times to control both the peaks and the spectrum
	for (int i = 0; i < s->iterations; i++) {
	float mask_curve_shift = 1.122f; // 1.122 is 1dB
	// The last 1/3 of rounds have boosted delta to help reach the peak target faster
	float delta_boost = 1.f;
	if (i >= s->iterations - s->iterations / 3) {
	// boosting the delta when largs peaks are still present is dangerous
	if (peak < 2.f)
	delta_boost = 2.f;
	}

	clip_to_window(s, windowed_frame, clipping_delta, delta_boost);

	r2c(clipping_delta, s->fft_size);
	s->tx_fn(s->tx_ctx[ch], spectrum_buf, clipping_delta, sizeof(AVComplexFloat));

	limit_clip_spectrum(s, spectrum_buf, mask_curve);

	s->itx_fn(s->itx_ctx[ch], clipping_delta, spectrum_buf, sizeof(AVComplexFloat));
	c2r(clipping_delta, s->fft_size);

	for (int i = 0; i < s->fft_size; i++)
	clipping_delta[i] /= s->fft_size;

	peak = 0;
	for (int i = 0; i < s->fft_size; i++)
	peak = FFMAX(peak, FFABS((windowed_frame[i] + clipping_delta[i]) * s->inv_window[i]));
	peak *= clip_level_inv;

	// Automatically adjust mask_curve as necessary to reach peak target
	if (orig_peak > 1.f && peak > 1.f) {
	float diff_achieved = orig_peak - peak;
	if (i + 1 < s->iterations - s->iterations / 3 && diff_achieved > 0) {
	float diff_needed = orig_peak - 1.f;
	float diff_ratio = diff_needed / diff_achieved;
	// If a good amount of peak reduction was already achieved,
	// don't shift the mask_curve by the full peak value
	// On the other hand, if only a little peak reduction was achieved,
	// don't shift the mask_curve by the enormous diff_ratio.
	diff_ratio = FFMIN(diff_ratio, peak);
	mask_curve_shift = FFMAX(mask_curve_shift, diff_ratio);
	} else {
	// If the peak got higher than the input or we are in the last 1/3 rounds,
	// go back to the heavy-handed peak heuristic.
	mask_curve_shift = FFMAX(mask_curve_shift, peak);
	}
	}

	mask_curve_shift = 1.f + (mask_curve_shift - 1.f) * s->adaptive;

	// Be less strict in the next iteration.
	// This helps with peak control.
	for (int i = 0; i < s->fft_size / 2 + 1; i++)
	mask_curve[i] *= mask_curve_shift;
	}

	// do overlap & add
	apply_window(s, clipping_delta, out_dist_frame, 1);

	for (int i = 0; i < s->overlap; i++) {
	// 4 times overlap with squared hanning window results in 1.5 time increase in amplitude
	if (!ctx->is_disabled) {
	out_samples[i] = out_dist_frame[i] / 1.5f;
	if (!diff_only)
	out_samples[i] += in_frame[i];
	if (s->auto_level)
	out_samples[i] *= clip_level_inv;
	out_samples[i] *= level_out;
	} else {
	out_samples[i] = in_frame[i];
	}
	}
	}

	static int psy_channel(AVFilterContext ctx, AVFrame in, AVFrame *out, int ch)
	{
	AudioPsyClipContext *s = ctx->priv;
	const float src = (const float )in->extended_data[ch];
	float in_buffer = (float )s->in_buffer->extended_data[ch];
	float dst = (float )out->extended_data[ch];

	for (int n = 0; n < s->overlap; n++)
	in_buffer[n] = src[n] * s->level_in;

	feed(ctx, ch, in_buffer, dst, s->diff_only,
	(float *)(s->in_frame->extended_data[ch]),
	(float *)(s->out_dist_frame->extended_data[ch]),
	(float *)(s->windowed_frame->extended_data[ch]),
	(float *)(s->clipping_delta->extended_data[ch]),
	(float *)(s->spectrum_buf->extended_data[ch]),
	(float *)(s->mask_curve->extended_data[ch]));

	return 0;
	}

	static int psy_channels(AVFilterContext ctx, void arg, int jobnr, int nb_jobs)
	{
	AudioPsyClipContext *s = ctx->priv;
	AVFrame *out = arg;
	const int start = (out->ch_layout.nb_channels * jobnr) / nb_jobs;
	const int end = (out->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;

	for (int ch = start; ch < end; ch++)
	psy_channel(ctx, s->in, out, ch);

	return 0;
	}

	static int filter_frame(AVFilterLink inlink, AVFrame in)
	{
	AVFilterContext *ctx = inlink->dst;
	AVFilterLink *outlink = ctx->outputs[0];
	AudioPsyClipContext *s = ctx->priv;
	AVFrame *out;
	int ret;

	out = ff_get_audio_buffer(outlink, s->overlap);
	if (!out) {
	ret = AVERROR(ENOMEM);
	goto fail;
	}

	s->in = in;
	av_frame_copy_props(out, in);
	ff_filter_execute(ctx, psy_channels, out, NULL,
	FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));

	out->pts = in->pts;
	out->nb_samples = in->nb_samples;
	ret = ff_filter_frame(outlink, out);
	fail:
	av_frame_free(&in);
	s->in = NULL;
	return ret < 0 ? ret : 0;
	}

	static int activate(AVFilterContext *ctx)
	{
	AVFilterLink *inlink = ctx->inputs[0];
	AVFilterLink *outlink = ctx->outputs[0];
	AudioPsyClipContext *s = ctx->priv;
	AVFrame *in = NULL;
	int ret = 0, status;
	int64_t pts;

	FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);

	ret = ff_inlink_consume_samples(inlink, s->overlap, s->overlap, &in);
	if (ret < 0)
	return ret;

	if (ret > 0) {
	return filter_frame(inlink, in);
	} else if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
	ff_outlink_set_status(outlink, status, pts);
	return 0;
	} else {
	if (ff_inlink_queued_samples(inlink) >= s->overlap) {
	ff_filter_set_ready(ctx, 10);
	} else if (ff_outlink_frame_wanted(outlink)) {
	ff_inlink_request_frame(inlink);
	}
	return 0;
	}
	}

	static av_cold void uninit(AVFilterContext *ctx)
	{
	AudioPsyClipContext *s = ctx->priv;

	av_freep(&s->window);
	av_freep(&s->inv_window);
	av_freep(&s->spread_table);
	av_freep(&s->spread_table_range);
	av_freep(&s->spread_table_index);
	av_freep(&s->margin_curve);

	av_frame_free(&s->in_buffer);
	av_frame_free(&s->in_frame);
	av_frame_free(&s->out_dist_frame);
	av_frame_free(&s->windowed_frame);
	av_frame_free(&s->clipping_delta);
	av_frame_free(&s->spectrum_buf);
	av_frame_free(&s->mask_curve);

	for (int ch = 0; ch < s->channels; ch++) {
	if (s->tx_ctx)
	av_tx_uninit(&s->tx_ctx[ch]);
	if (s->itx_ctx)
	av_tx_uninit(&s->itx_ctx[ch]);
	}

	av_freep(&s->tx_ctx);
	av_freep(&s->itx_ctx);
	}

	static const AVFilterPad inputs[] = {
	{
	.name = "default",
	.type = AVMEDIA_TYPE_AUDIO,
	.config_props = config_input,
	},
	};

	static const AVFilterPad outputs[] = {
	{
	.name = "default",
	.type = AVMEDIA_TYPE_AUDIO,
	},
	};

	const AVFilter ff_af_apsyclip = {
	.name = "apsyclip",
	.description = NULL_IF_CONFIG_SMALL("Audio Psychoacoustic Clipper."),
	.priv_size = sizeof(AudioPsyClipContext),
	.priv_class = &apsyclip_class,
	.uninit = uninit,
	FILTER_INPUTS(inputs),
	FILTER_OUTPUTS(outputs),
	FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP),
	.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL \|
	AVFILTER_FLAG_SLICE_THREADS,
	.activate = activate,
	.process_command = ff_filter_process_command,
	};