libavcodec/apv_dsp.c - third_party/ffmpeg - Git at Google

 /*
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */

 #include <stdint.h>

 #include "config.h"
 #include "libavutil/attributes.h"
 #include "libavutil/common.h"

 #include "apv.h"
 #include "apv_dsp.h"


 static const int8_t apv_trans_matrix[8][8] = {
     {  64,  64,  64,  64,  64,  64,  64,  64 },
     {  89,  75,  50,  18, -18, -50, -75, -89 },
     {  84,  35, -35, -84, -84, -35,  35,  84 },
     {  75, -18, -89, -50,  50,  89,  18, -75 },
     {  64, -64, -64,  64,  64, -64, -64,  64 },
     {  50, -89,  18,  75, -75, -18,  89, -50 },
     {  35, -84,  84, -35, -35,  84, -84,  35 },
     {  18, -50,  75, -89,  89, -75,  50, -18 },
 };

 static void apv_decode_transquant_c(void *output,
                                     ptrdiff_t pitch,
                                     const int16_t *input_flat,
                                     const int16_t *qmatrix_flat,
                                     int bit_depth,
                                     int qp_shift)
 {
     const int16_t (*input)[8]   = (const int16_t(*)[8])input_flat;
     const int16_t (*qmatrix)[8] = (const int16_t(*)[8])qmatrix_flat;

     int16_t scaled_coeff[8][8];
     int32_t recon_sample[8][8];

     // Dequant.
     {
         // Note that level_scale was already combined into qmatrix
         // before we got here.
         int bd_shift = bit_depth + 3 - 5;

         for (int y = 0; y < 8; y++) {
             for (int x = 0; x < 8; x++) {
                 int coeff = ((int)(input[y][x] * qmatrix[y][x] * (1U << qp_shift) +
                              (1 << (bd_shift - 1)))) >> bd_shift;

                 scaled_coeff[y][x] =
                     av_clip(coeff, APV_MIN_TRANS_COEFF,
                                    APV_MAX_TRANS_COEFF);
             }
         }
     }

     // Transform.
     {
         int32_t tmp[8][8];

         // Vertical transform of columns.
         for (int x = 0; x < 8; x++) {
             for (int i = 0; i < 8; i++) {
                 int sum = 0;
                 for (int j = 0; j < 8; j++)
                     sum += apv_trans_matrix[j][i] * scaled_coeff[j][x];
                 tmp[i][x] = sum;
             }
         }

         // Renormalise.
         for (int x = 0; x < 8; x++) {
             for (int y = 0; y < 8; y++)
                 tmp[y][x] = (tmp[y][x] + 64) >> 7;
         }

         // Horizontal transform of rows.
         for (int y = 0; y < 8; y++) {
             for (int i = 0; i < 8; i++) {
                 int sum = 0;
                 for (int j = 0; j < 8; j++)
                     sum += apv_trans_matrix[j][i] * tmp[y][j];
                 recon_sample[y][i] = sum;
             }
         }
     }

     // Output.
     if (bit_depth == 8) {
         uint8_t *ptr = output;
         int bd_shift = 20 - bit_depth;

         for (int y = 0; y < 8; y++) {
             for (int x = 0; x < 8; x++) {
                 int sample = ((recon_sample[y][x] +
                                (1 << (bd_shift - 1))) >> bd_shift) +
                     (1 << (bit_depth - 1));
                 ptr[x] = av_clip_uintp2(sample, bit_depth);
             }
             ptr += pitch;
         }
     } else {
         uint16_t *ptr = output;
         int bd_shift = 20 - bit_depth;
         pitch /= 2; // Pitch was in bytes, 2 bytes per sample.

         for (int y = 0; y < 8; y++) {
             for (int x = 0; x < 8; x++) {
                 int sample = ((recon_sample[y][x] +
                                (1 << (bd_shift - 1))) >> bd_shift) +
                     (1 << (bit_depth - 1));
                 ptr[x] = av_clip_uintp2(sample, bit_depth);
             }
             ptr += pitch;
         }
     }
 }

 av_cold void ff_apv_dsp_init(APVDSPContext *dsp)
 {
     dsp->decode_transquant = apv_decode_transquant_c;

 #if ARCH_X86_64 && HAVE_X86ASM
     ff_apv_dsp_init_x86_64(dsp);
 #endif
 }
	/*
	* This file is part of FFmpeg.
	*
	* FFmpeg is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* FFmpeg is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with FFmpeg; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	#include <stdint.h>

	#include "config.h"
	#include "libavutil/attributes.h"
	#include "libavutil/common.h"

	#include "apv.h"
	#include "apv_dsp.h"


	static const int8_t apv_trans_matrix[8][8] = {
	{ 64, 64, 64, 64, 64, 64, 64, 64 },
	{ 89, 75, 50, 18, -18, -50, -75, -89 },
	{ 84, 35, -35, -84, -84, -35, 35, 84 },
	{ 75, -18, -89, -50, 50, 89, 18, -75 },
	{ 64, -64, -64, 64, 64, -64, -64, 64 },
	{ 50, -89, 18, 75, -75, -18, 89, -50 },
	{ 35, -84, 84, -35, -35, 84, -84, 35 },
	{ 18, -50, 75, -89, 89, -75, 50, -18 },
	};

	static void apv_decode_transquant_c(void *output,
	ptrdiff_t pitch,
	const int16_t *input_flat,
	const int16_t *qmatrix_flat,
	int bit_depth,
	int qp_shift)
	{
	const int16_t (input)[8] = (const int16_t()[8])input_flat;
	const int16_t (qmatrix)[8] = (const int16_t()[8])qmatrix_flat;

	int16_t scaled_coeff[8][8];
	int32_t recon_sample[8][8];

	// Dequant.
	{
	// Note that level_scale was already combined into qmatrix
	// before we got here.
	int bd_shift = bit_depth + 3 - 5;

	for (int y = 0; y < 8; y++) {
	for (int x = 0; x < 8; x++) {
	int coeff = ((int)(input[y][x] * qmatrix[y][x] * (1U << qp_shift) +
	(1 << (bd_shift - 1)))) >> bd_shift;

	scaled_coeff[y][x] =
	av_clip(coeff, APV_MIN_TRANS_COEFF,
	APV_MAX_TRANS_COEFF);
	}
	}
	}

	// Transform.
	{
	int32_t tmp[8][8];

	// Vertical transform of columns.
	for (int x = 0; x < 8; x++) {
	for (int i = 0; i < 8; i++) {
	int sum = 0;
	for (int j = 0; j < 8; j++)
	sum += apv_trans_matrix[j][i] * scaled_coeff[j][x];
	tmp[i][x] = sum;
	}
	}

	// Renormalise.
	for (int x = 0; x < 8; x++) {
	for (int y = 0; y < 8; y++)
	tmp[y][x] = (tmp[y][x] + 64) >> 7;
	}

	// Horizontal transform of rows.
	for (int y = 0; y < 8; y++) {
	for (int i = 0; i < 8; i++) {
	int sum = 0;
	for (int j = 0; j < 8; j++)
	sum += apv_trans_matrix[j][i] * tmp[y][j];
	recon_sample[y][i] = sum;
	}
	}
	}

	// Output.
	if (bit_depth == 8) {
	uint8_t *ptr = output;
	int bd_shift = 20 - bit_depth;

	for (int y = 0; y < 8; y++) {
	for (int x = 0; x < 8; x++) {
	int sample = ((recon_sample[y][x] +
	(1 << (bd_shift - 1))) >> bd_shift) +
	(1 << (bit_depth - 1));
	ptr[x] = av_clip_uintp2(sample, bit_depth);
	}
	ptr += pitch;
	}
	} else {
	uint16_t *ptr = output;
	int bd_shift = 20 - bit_depth;
	pitch /= 2; // Pitch was in bytes, 2 bytes per sample.

	for (int y = 0; y < 8; y++) {
	for (int x = 0; x < 8; x++) {
	int sample = ((recon_sample[y][x] +
	(1 << (bd_shift - 1))) >> bd_shift) +
	(1 << (bit_depth - 1));
	ptr[x] = av_clip_uintp2(sample, bit_depth);
	}
	ptr += pitch;
	}
	}
	}

	av_cold void ff_apv_dsp_init(APVDSPContext *dsp)
	{
	dsp->decode_transquant = apv_decode_transquant_c;

	#if ARCH_X86_64 && HAVE_X86ASM
	ff_apv_dsp_init_x86_64(dsp);
	#endif
	}