tests/checkasm/hevc_deblock.c - third_party/ffmpeg - Git at Google

 /*
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License along
  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */

 #include <string.h>

 #include "libavutil/intreadwrite.h"
 #include "libavutil/macros.h"
 #include "libavutil/mem_internal.h"

 #include "libavcodec/hevc/dsp.h"

 #include "checkasm.h"

 static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };

 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
 #define BUF_STRIDE (16 * 2)
 #define BUF_LINES (16)
 // large buffer sizes based on high bit depth
 #define BUF_OFFSET (2 * BUF_STRIDE * BUF_LINES)
 #define BUF_SIZE (2 * BUF_STRIDE * BUF_LINES + BUF_OFFSET * 2)

 #define randomize_buffers(buf0, buf1, size)                 \
     do {                                                    \
         uint32_t mask = pixel_mask[(bit_depth - 8) >> 1];   \
         int k;                                              \
         for (k = 0; k < size; k += 4) {                     \
             uint32_t r = rnd() & mask;                      \
             AV_WN32A(buf0 + k, r);                          \
             AV_WN32A(buf1 + k, r);                          \
         }                                                   \
     } while (0)

 static void check_deblock_chroma(HEVCDSPContext *h, int bit_depth, int c)
 {
     // see tctable[] in hevc_filter.c, we check full range
     int32_t tc[2] = { rnd() % 25, rnd() % 25 };
     // no_p, no_q can only be { 0,0 } for the simpler assembly (non *_c
     // variant) functions, see deblocking_filter_CTB() in hevc_filter.c
     uint8_t no_p[2] = { rnd() & c, rnd() & c };
     uint8_t no_q[2] = { rnd() & c, rnd() & c };
     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);

     declare_func(void, uint8_t *pix, ptrdiff_t stride,
                  const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q);

     if (check_func(c ? h->hevc_h_loop_filter_chroma_c : h->hevc_h_loop_filter_chroma,
                          "hevc_h_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
     {
         randomize_buffers(buf0, buf1, BUF_SIZE);

         call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
         call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
         if (memcmp(buf0, buf1, BUF_SIZE))
             fail();
         bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
     }

     if (check_func(c ? h->hevc_v_loop_filter_chroma_c : h->hevc_v_loop_filter_chroma,
                          "hevc_v_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
     {
         randomize_buffers(buf0, buf1, BUF_SIZE);

         call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
         call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
         if (memcmp(buf0, buf1, BUF_SIZE))
             fail();
         bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
     }
 }

 #define P3 buf[-4 * xstride]
 #define P2 buf[-3 * xstride]
 #define P1 buf[-2 * xstride]
 #define P0 buf[-1 * xstride]
 #define Q0 buf[0 * xstride]
 #define Q1 buf[1 * xstride]
 #define Q2 buf[2 * xstride]
 #define Q3 buf[3 * xstride]

 #define TC25(x) ((tc[x] * 5 + 1) >> 1)
 #define MASK(x) (uint16_t)(x & ((1 << (bit_depth)) - 1))
 #define GET(x) ((SIZEOF_PIXEL == 1) ? *(uint8_t*)(&x) : *(uint16_t*)(&x))
 #define SET(x, y) do { \
     uint16_t z = MASK(y); \
     if (SIZEOF_PIXEL == 1) \
         *(uint8_t*)(&x) = z; \
     else \
         *(uint16_t*)(&x) = z; \
 } while (0)
 #define RANDCLIP(x, diff) av_clip(GET(x) - (diff), 0, \
     (1 << (bit_depth)) - 1) + rnd() % FFMAX(2 * (diff), 1)

 // NOTE: this function doesn't work 'correctly' in that it won't always choose
 // strong/strong or weak/weak, in most cases it tends to but will sometimes mix
 // weak/strong or even skip sometimes. This is more useful to test correctness
 // for these functions, though it does make benching them difficult. The easiest
 // way to bench these functions is to check an overall decode since there are too
 // many paths and ways to trigger the deblock: we would have to bench all
 // permutations of weak/strong/skip/nd_q/nd_p/no_q/no_p and it quickly becomes
 // too much.
 static void randomize_luma_buffers(int type, int *beta, int32_t tc[2],
    uint8_t *buf, ptrdiff_t xstride, ptrdiff_t ystride, int bit_depth)
 {
     int i, j, b3, tc25, tc25diff, b3diff;
     // both tc & beta are unscaled inputs
     // minimum useful value is 1, full range 0-24
     tc[0] = (rnd() % 25) + 1;
     tc[1] = (rnd() % 25) + 1;
     // minimum useful value for 8bit is 8
     *beta = (rnd() % 57) + 8;

     switch (type) {
     case 0: // strong
         for (j = 0; j < 2; j++) {
             tc25 = TC25(j) << (bit_depth - 8);
             tc25diff = FFMAX(tc25 - 1, 0);
             // 4 lines per tc
             for (i = 0; i < 4; i++) {
                 b3 = (*beta << (bit_depth - 8)) >> 3;

                 SET(P0, rnd() % (1 << bit_depth));
                 SET(Q0, RANDCLIP(P0, tc25diff));

                 // p3 - p0 up to beta3 budget
                 b3diff = rnd() % b3;
                 SET(P3, RANDCLIP(P0, b3diff));
                 // q3 - q0, reduced budget
                 b3diff = rnd() % FFMAX(b3 - b3diff, 1);
                 SET(Q3, RANDCLIP(Q0, b3diff));

                 // same concept, budget across 4 pixels
                 b3 -= b3diff = rnd() % FFMAX(b3, 1);
                 SET(P2, RANDCLIP(P0, b3diff));
                 b3 -= b3diff = rnd() % FFMAX(b3, 1);
                 SET(Q2, RANDCLIP(Q0, b3diff));

                 // extra reduced budget for weighted pixels
                 b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
                 SET(P1, RANDCLIP(P0, b3diff));
                 b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
                 SET(Q1, RANDCLIP(Q0, b3diff));

                 buf += ystride;
             }
         }
         break;
     case 1: // weak
         for (j = 0; j < 2; j++) {
             tc25 = TC25(j) << (bit_depth - 8);
             tc25diff = FFMAX(tc25 - 1, 0);
             // 4 lines per tc
             for (i = 0; i < 4; i++) {
                 // Weak filtering is significantly simpler to activate as
                 // we only need to satisfy d0 + d3 < beta, which
                 // can be simplified to d0 + d0 < beta. Using the above
                 // derivations but substiuting b3 for b1 and ensuring
                 // that P0/Q0 are at least 1/2 tc25diff apart (tending
                 // towards 1/2 range).
                 b3 = (*beta << (bit_depth - 8)) >> 1;

                 SET(P0, rnd() % (1 << bit_depth));
                 SET(Q0, RANDCLIP(P0, tc25diff >> 1) +
                     (tc25diff >> 1) * (P0 < (1 << (bit_depth - 1))) ? 1 : -1);

                 // p3 - p0 up to beta3 budget
                 b3diff = rnd() % b3;
                 SET(P3, RANDCLIP(P0, b3diff));
                 // q3 - q0, reduced budget
                 b3diff = rnd() % FFMAX(b3 - b3diff, 1);
                 SET(Q3, RANDCLIP(Q0, b3diff));

                 // same concept, budget across 4 pixels
                 b3 -= b3diff = rnd() % FFMAX(b3, 1);
                 SET(P2, RANDCLIP(P0, b3diff));
                 b3 -= b3diff = rnd() % FFMAX(b3, 1);
                 SET(Q2, RANDCLIP(Q0, b3diff));

                 // extra reduced budget for weighted pixels
                 b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
                 SET(P1, RANDCLIP(P0, b3diff));
                 b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
                 SET(Q1, RANDCLIP(Q0, b3diff));

                 buf += ystride;
             }
         }
         break;
     case 2: // none
         *beta = 0; // ensure skip
         for (i = 0; i < 8; i++) {
             // we can just fill with completely random data, nothing should be touched.
             SET(P3, rnd()); SET(P2, rnd()); SET(P1, rnd()); SET(P0, rnd());
             SET(Q0, rnd()); SET(Q1, rnd()); SET(Q2, rnd()); SET(Q3, rnd());
             buf += ystride;
         }
         break;
     }
 }

 static void check_deblock_luma(HEVCDSPContext *h, int bit_depth, int c)
 {
     const char *type;
     const char *types[3] = { "strong", "weak", "skip" };
     int beta;
     int32_t tc[2] = {0};
     uint8_t no_p[2] = { rnd() & c, rnd() & c };
     uint8_t no_q[2] = { rnd() & c, rnd() & c };
     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
     uint8_t *ptr0 = buf0 + BUF_OFFSET,
             *ptr1 = buf1 + BUF_OFFSET;

     declare_func(void, uint8_t *pix, ptrdiff_t stride, int beta,
                  const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q);
     memset(buf0, 0, BUF_SIZE);

     for (int j = 0; j < 3; j++) {
         type = types[j];
         if (check_func(c ? h->hevc_h_loop_filter_luma_c : h->hevc_h_loop_filter_luma,
                              "hevc_h_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))
         {
             randomize_luma_buffers(j, &beta, tc, buf0 + BUF_OFFSET, 16 * SIZEOF_PIXEL, SIZEOF_PIXEL, bit_depth);
             memcpy(buf1, buf0, BUF_SIZE);

             call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
             call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
             if (memcmp(buf0, buf1, BUF_SIZE))
                 fail();
             bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
         }

         if (check_func(c ? h->hevc_v_loop_filter_luma_c : h->hevc_v_loop_filter_luma,
                              "hevc_v_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))
         {
             randomize_luma_buffers(j, &beta, tc, buf0 + BUF_OFFSET, SIZEOF_PIXEL, 16 * SIZEOF_PIXEL, bit_depth);
             memcpy(buf1, buf0, BUF_SIZE);

             call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
             call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
             if (memcmp(buf0, buf1, BUF_SIZE))
                 fail();
             bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
         }
     }
 }

 void checkasm_check_hevc_deblock(void)
 {
     HEVCDSPContext h;
     int bit_depth;
     for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
         ff_hevc_dsp_init(&h, bit_depth);
         check_deblock_chroma(&h, bit_depth, 0);
     }
     report("chroma");
     for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
         ff_hevc_dsp_init(&h, bit_depth);
         check_deblock_chroma(&h, bit_depth, 1);
     }
     report("chroma_full");
     for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
         ff_hevc_dsp_init(&h, bit_depth);
         check_deblock_luma(&h, bit_depth, 0);
     }
     report("luma");
     for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
         ff_hevc_dsp_init(&h, bit_depth);
         check_deblock_luma(&h, bit_depth, 1);
     }
     report("luma_full");
 }
	/*
	* This file is part of FFmpeg.
	*
	* FFmpeg is free software; you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation; either version 2 of the License, or
	* (at your option) any later version.
	*
	* FFmpeg is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License along
	* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
	* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
	*/

	#include <string.h>

	#include "libavutil/intreadwrite.h"
	#include "libavutil/macros.h"
	#include "libavutil/mem_internal.h"

	#include "libavcodec/hevc/dsp.h"

	#include "checkasm.h"

	static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };

	#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
	#define BUF_STRIDE (16 * 2)
	#define BUF_LINES (16)
	// large buffer sizes based on high bit depth
	#define BUF_OFFSET (2 * BUF_STRIDE * BUF_LINES)
	#define BUF_SIZE (2 * BUF_STRIDE * BUF_LINES + BUF_OFFSET * 2)

	#define randomize_buffers(buf0, buf1, size) \
	do { \
	uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
	int k; \
	for (k = 0; k < size; k += 4) { \
	uint32_t r = rnd() & mask; \
	AV_WN32A(buf0 + k, r); \
	AV_WN32A(buf1 + k, r); \
	} \
	} while (0)

	static void check_deblock_chroma(HEVCDSPContext *h, int bit_depth, int c)
	{
	// see tctable[] in hevc_filter.c, we check full range
	int32_t tc[2] = { rnd() % 25, rnd() % 25 };
	// no_p, no_q can only be { 0,0 } for the simpler assembly (non *_c
	// variant) functions, see deblocking_filter_CTB() in hevc_filter.c
	uint8_t no_p[2] = { rnd() & c, rnd() & c };
	uint8_t no_q[2] = { rnd() & c, rnd() & c };
	LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
	LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);

	declare_func(void, uint8_t *pix, ptrdiff_t stride,
	const int32_t tc, const uint8_t no_p, const uint8_t *no_q);

	if (check_func(c ? h->hevc_h_loop_filter_chroma_c : h->hevc_h_loop_filter_chroma,
	"hevc_h_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
	{
	randomize_buffers(buf0, buf1, BUF_SIZE);

	call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
	call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
	if (memcmp(buf0, buf1, BUF_SIZE))
	fail();
	bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
	}

	if (check_func(c ? h->hevc_v_loop_filter_chroma_c : h->hevc_v_loop_filter_chroma,
	"hevc_v_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
	{
	randomize_buffers(buf0, buf1, BUF_SIZE);

	call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
	call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
	if (memcmp(buf0, buf1, BUF_SIZE))
	fail();
	bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
	}
	}

	#define P3 buf[-4 * xstride]
	#define P2 buf[-3 * xstride]
	#define P1 buf[-2 * xstride]
	#define P0 buf[-1 * xstride]
	#define Q0 buf[0 * xstride]
	#define Q1 buf[1 * xstride]
	#define Q2 buf[2 * xstride]
	#define Q3 buf[3 * xstride]

	#define TC25(x) ((tc[x] * 5 + 1) >> 1)
	#define MASK(x) (uint16_t)(x & ((1 << (bit_depth)) - 1))
	#define GET(x) ((SIZEOF_PIXEL == 1) ? (uint8_t)(&x) : (uint16_t)(&x))
	#define SET(x, y) do { \
	uint16_t z = MASK(y); \
	if (SIZEOF_PIXEL == 1) \
	(uint8_t)(&x) = z; \
	else \
	(uint16_t)(&x) = z; \
	} while (0)
	#define RANDCLIP(x, diff) av_clip(GET(x) - (diff), 0, \
	(1 << (bit_depth)) - 1) + rnd() % FFMAX(2 * (diff), 1)

	// NOTE: this function doesn't work 'correctly' in that it won't always choose
	// strong/strong or weak/weak, in most cases it tends to but will sometimes mix
	// weak/strong or even skip sometimes. This is more useful to test correctness
	// for these functions, though it does make benching them difficult. The easiest
	// way to bench these functions is to check an overall decode since there are too
	// many paths and ways to trigger the deblock: we would have to bench all
	// permutations of weak/strong/skip/nd_q/nd_p/no_q/no_p and it quickly becomes
	// too much.
	static void randomize_luma_buffers(int type, int *beta, int32_t tc[2],
	uint8_t *buf, ptrdiff_t xstride, ptrdiff_t ystride, int bit_depth)
	{
	int i, j, b3, tc25, tc25diff, b3diff;
	// both tc & beta are unscaled inputs
	// minimum useful value is 1, full range 0-24
	tc[0] = (rnd() % 25) + 1;
	tc[1] = (rnd() % 25) + 1;
	// minimum useful value for 8bit is 8
	*beta = (rnd() % 57) + 8;

	switch (type) {
	case 0: // strong
	for (j = 0; j < 2; j++) {
	tc25 = TC25(j) << (bit_depth - 8);
	tc25diff = FFMAX(tc25 - 1, 0);
	// 4 lines per tc
	for (i = 0; i < 4; i++) {
	b3 = (*beta << (bit_depth - 8)) >> 3;

	SET(P0, rnd() % (1 << bit_depth));
	SET(Q0, RANDCLIP(P0, tc25diff));

	// p3 - p0 up to beta3 budget
	b3diff = rnd() % b3;
	SET(P3, RANDCLIP(P0, b3diff));
	// q3 - q0, reduced budget
	b3diff = rnd() % FFMAX(b3 - b3diff, 1);
	SET(Q3, RANDCLIP(Q0, b3diff));

	// same concept, budget across 4 pixels
	b3 -= b3diff = rnd() % FFMAX(b3, 1);
	SET(P2, RANDCLIP(P0, b3diff));
	b3 -= b3diff = rnd() % FFMAX(b3, 1);
	SET(Q2, RANDCLIP(Q0, b3diff));

	// extra reduced budget for weighted pixels
	b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
	SET(P1, RANDCLIP(P0, b3diff));
	b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
	SET(Q1, RANDCLIP(Q0, b3diff));

	buf += ystride;
	}
	}
	break;
	case 1: // weak
	for (j = 0; j < 2; j++) {
	tc25 = TC25(j) << (bit_depth - 8);
	tc25diff = FFMAX(tc25 - 1, 0);
	// 4 lines per tc
	for (i = 0; i < 4; i++) {
	// Weak filtering is significantly simpler to activate as
	// we only need to satisfy d0 + d3 < beta, which
	// can be simplified to d0 + d0 < beta. Using the above
	// derivations but substiuting b3 for b1 and ensuring
	// that P0/Q0 are at least 1/2 tc25diff apart (tending
	// towards 1/2 range).
	b3 = (*beta << (bit_depth - 8)) >> 1;

	SET(P0, rnd() % (1 << bit_depth));
	SET(Q0, RANDCLIP(P0, tc25diff >> 1) +
	(tc25diff >> 1) * (P0 < (1 << (bit_depth - 1))) ? 1 : -1);

	// p3 - p0 up to beta3 budget
	b3diff = rnd() % b3;
	SET(P3, RANDCLIP(P0, b3diff));
	// q3 - q0, reduced budget
	b3diff = rnd() % FFMAX(b3 - b3diff, 1);
	SET(Q3, RANDCLIP(Q0, b3diff));

	// same concept, budget across 4 pixels
	b3 -= b3diff = rnd() % FFMAX(b3, 1);
	SET(P2, RANDCLIP(P0, b3diff));
	b3 -= b3diff = rnd() % FFMAX(b3, 1);
	SET(Q2, RANDCLIP(Q0, b3diff));

	// extra reduced budget for weighted pixels
	b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
	SET(P1, RANDCLIP(P0, b3diff));
	b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
	SET(Q1, RANDCLIP(Q0, b3diff));

	buf += ystride;
	}
	}
	break;
	case 2: // none
	*beta = 0; // ensure skip
	for (i = 0; i < 8; i++) {
	// we can just fill with completely random data, nothing should be touched.
	SET(P3, rnd()); SET(P2, rnd()); SET(P1, rnd()); SET(P0, rnd());
	SET(Q0, rnd()); SET(Q1, rnd()); SET(Q2, rnd()); SET(Q3, rnd());
	buf += ystride;
	}
	break;
	}
	}

	static void check_deblock_luma(HEVCDSPContext *h, int bit_depth, int c)
	{
	const char *type;
	const char *types[3] = { "strong", "weak", "skip" };
	int beta;
	int32_t tc[2] = {0};
	uint8_t no_p[2] = { rnd() & c, rnd() & c };
	uint8_t no_q[2] = { rnd() & c, rnd() & c };
	LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
	LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
	uint8_t *ptr0 = buf0 + BUF_OFFSET,
	*ptr1 = buf1 + BUF_OFFSET;

	declare_func(void, uint8_t *pix, ptrdiff_t stride, int beta,
	const int32_t tc, const uint8_t no_p, const uint8_t *no_q);
	memset(buf0, 0, BUF_SIZE);

	for (int j = 0; j < 3; j++) {
	type = types[j];
	if (check_func(c ? h->hevc_h_loop_filter_luma_c : h->hevc_h_loop_filter_luma,
	"hevc_h_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))
	{
	randomize_luma_buffers(j, &beta, tc, buf0 + BUF_OFFSET, 16 * SIZEOF_PIXEL, SIZEOF_PIXEL, bit_depth);
	memcpy(buf1, buf0, BUF_SIZE);

	call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
	call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
	if (memcmp(buf0, buf1, BUF_SIZE))
	fail();
	bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
	}

	if (check_func(c ? h->hevc_v_loop_filter_luma_c : h->hevc_v_loop_filter_luma,
	"hevc_v_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))
	{
	randomize_luma_buffers(j, &beta, tc, buf0 + BUF_OFFSET, SIZEOF_PIXEL, 16 * SIZEOF_PIXEL, bit_depth);
	memcpy(buf1, buf0, BUF_SIZE);

	call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
	call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
	if (memcmp(buf0, buf1, BUF_SIZE))
	fail();
	bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
	}
	}
	}

	void checkasm_check_hevc_deblock(void)
	{
	HEVCDSPContext h;
	int bit_depth;
	for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
	ff_hevc_dsp_init(&h, bit_depth);
	check_deblock_chroma(&h, bit_depth, 0);
	}
	report("chroma");
	for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
	ff_hevc_dsp_init(&h, bit_depth);
	check_deblock_chroma(&h, bit_depth, 1);
	}
	report("chroma_full");
	for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
	ff_hevc_dsp_init(&h, bit_depth);
	check_deblock_luma(&h, bit_depth, 0);
	}
	report("luma");
	for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
	ff_hevc_dsp_init(&h, bit_depth);
	check_deblock_luma(&h, bit_depth, 1);
	}
	report("luma_full");
	}