SingleSource/UnitTests/Vector/AVX512F/alignr.c - third_party/github.com/llvm/llvm-test-suite - Git at Google

 /*
  * Test 512-bit intrinsics related to valignd and valignq.
  * Here we check for _mm512_[mask|maskz]_alignr_epi*
  * intrinsics.
  */

 #include "m512_test_util.h"
 #include <memory.h>

 volatile int vol0 = 0;

 #define soft_update(v512) (v512).xmmi[vol0] = (v512).xmmi[vol0]

 #define TEST_MASK(instr, elembits, mask, imm, num_elems, dtype)                \
   { /* Blend masking */                                                        \
     memcpy(&res, &dtype##res_orig, sizeof(res));                               \
     soft_update(dtype##op2);                                                   \
     res.zmmi = _mm512_mask_alignr_epi##elembits(                               \
         res.zmmi, mask, dtype##op1.zmmi, dtype##op2.zmmi, imm);                \
                                                                                \
     /* Compute the expected result. */                                         \
     expect.zmmi =                                                              \
         compute_##instr(mask, 0, &dtype##op1, &dtype##op2, imm, num_elems);    \
                                                                                \
     /* Compare the obtained and expected results. */                           \
     check_equal_n##dtype(&res, &expect, num_elems,                             \
                          "_mm512_mask_alignr_epi" #elembits ", " #imm,         \
                          __LINE__);                                            \
     /* Verify combination with masked load. */                                 \
     {                                                                          \
       __m512i src2_copy, src2 = dtype##op1.zmmi;                               \
       memcpy(&res, &dtype##res_orig, sizeof(res));                             \
       soft_update(dtype##op2);                                                 \
       src2 = _mm512_mask_load_epi##elembits(src2, mask, &dtype##op2.zmmi);     \
       res.zmmi = _mm512_mask_alignr_epi##elembits(res.zmmi, mask,              \
                                                   dtype##op1.zmmi, src2, imm); \
       soft_update(dtype##op2);                                                 \
       src2_copy = _mm512_mask_load_epi##elembits(dtype##op1.zmmi, mask,        \
                                                  &dtype##op2.zmmi);            \
       expect.zmmi =                                                            \
           compute_##instr(mask, 0, &dtype##op1, &src2_copy, imm, num_elems);   \
       check_equal_n##dtype(&res, &expect, num_elems,                           \
                            "mix with load _mm512_mask_alignr_epi" #elembits ", \
                            " #imm, __LINE__);                                  \
     }                                                                          \
   }

 #define TEST_MASKZ(instr, elembits, mask, imm, num_elems, dtype)               \
   {                                                                            \
     /* Zero masking */                                                         \
     memset(&res, 0xFF, sizeof(res));                                           \
     soft_update(dtype##op2);                                                   \
     res.zmmi = _mm512_maskz_alignr_epi##elembits(mask, dtype##op1.zmmi,        \
                                                  dtype##op2.zmmi, imm);        \
                                                                                \
     /* Compute the expected result. */                                         \
     expect.zmmi =                                                              \
         compute_##instr(mask, 1, &dtype##op1, &dtype##op2, imm, num_elems);    \
                                                                                \
     /* Compare the obtained and expected results. */                           \
     check_equal_n##dtype(&res, &expect, num_elems,                             \
                          "_mm512_maskz_alignr_epi" #elembits ", " #imm,        \
                          __LINE__);                                            \
   }

 #define TEST(instr, elembits, imm, num_elems, dtype)                           \
   {                                                                            \
     /* No masking */                                                           \
     memset(&res, 0xFF, sizeof(res));                                           \
     soft_update(dtype##op2);                                                   \
     res.zmmi =                                                                 \
         _mm512_alignr_epi##elembits(dtype##op1.zmmi, dtype##op2.zmmi, imm);    \
                                                                                \
     /* Compute the expected result. */                                         \
     expect.zmmi = compute_##instr((1 << (num_elems)) - 1, 0, &dtype##op1,      \
                                   &dtype##op2, imm, num_elems);                \
                                                                                \
     /* Compare the obtained and expected results. */                           \
     check_equal_n##dtype(&res, &expect, num_elems,                             \
                          "_mm512_alignr_epi" #elembits ", " #imm, __LINE__);   \
   }

 #define TEST_ALIGN(instr, elembits, mask, imm, num_elems, dtype)               \
   TEST_MASK(instr, elembits, mask, imm, num_elems, dtype)                      \
   TEST_MASKZ(instr, elembits, mask, imm, num_elems, dtype)                     \
   TEST(instr, elembits, imm, num_elems, dtype)

 #define TEST_ALIGND(mask, imm) TEST_ALIGN(zalignd, 32, mask, imm, 16, d)

 #define TEST_ALIGNQ(mask, imm) TEST_ALIGN(zalignq, 64, mask, imm, 8, q)

 V512 dop1, dop2, dres_orig;
 V512 qop1, qop2, qres_orig;
 V512 res, expect;

 volatile unsigned int dres_orig_arr[16] = {
     0x12345678, 0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555,
     0x66666666, 0x77777777, 0x88888888, 0x99999999, 0xaaaaaaaa, 0xbbbbbbbb,
     0xcccccccc, 0xdddddddd, 0xeeeeeeee, 0xffffffff};

 volatile U64 qres_orig_arr[8] = {0x123456789abcdef0, 0x1111111111111111,
                                  0x2222222222222222, 0x3333333333333333,
                                  0x4444444444444444, 0x5555555555555555,
                                  0x7777777777777777, 0x6666666666666666};

 static void NOINLINE init() {
   int i;

   for (i = 0; i < 16; i++) {
     dop1.u32[i] = 0x11000000 + i;
   }
   for (i = 0; i < 16; i++) {
     dop2.u32[i] = 0x22000000 + i;
   }

   for (i = 0; i < 8; i++) {
     qop1.u64[i] = 0x1111000000000000 + i;
   }
   for (i = 0; i < 8; i++) {
     qop2.u64[i] = 0x2222000000000000 + i;
   }

   memcpy((void *)&dres_orig, (void *)dres_orig_arr, 64);
   memcpy((void *)&qres_orig, (void *)qres_orig_arr, 64);
 }

 __m512i NOINLINE compute_zalignd(__mmask16 mask, int zero_mask, const void *op1,
                                  const void *op2, int imm, int num_elems) {
   V512 res;
   int i, res_idx;

   res_idx = 0;
   for (i = 0; i < 2; i++) {
     int lower, upper, op_idx;
     unsigned int *vop;

     if (i == 0) {
       lower = imm;
       upper = num_elems;
       vop = (unsigned int *)op2;
     } else {
       lower = 0;
       upper = imm;
       vop = (unsigned int *)op1;
     }

     for (op_idx = lower; op_idx < upper; op_idx++) {

       int elem_mask = mask & (1 << res_idx);

       if (elem_mask) {
         res.u32[res_idx] = vop[op_idx];
       } else if (zero_mask) {
         res.u32[res_idx] = 0;
       } else {
         res.u32[res_idx] = dres_orig.u32[res_idx];
       }

       res_idx++;
     }
   }

   return res.zmmi;
 }

 void NOINLINE do_zalignd() {
   TEST_ALIGND(0x0000, 0);
   TEST_ALIGND(0xabcd, 0);
   TEST_ALIGND(0xffff, 0);

   TEST_ALIGND(0x0000, 1);
   TEST_ALIGND(0xabcd, 1);
   TEST_ALIGND(0xfef7, 1);
   TEST_ALIGND(0xffff, 1);

   TEST_ALIGND(0xabcd, 3);
   TEST_ALIGND(0xfefe, 5);

   TEST_ALIGND(0x0000, 7);
   TEST_ALIGND(0xabcd, 7);
   TEST_ALIGND(0xffff, 7);

   TEST_ALIGND(0x0000, 8);
   TEST_ALIGND(0xabcd, 8);
   TEST_ALIGND(0xffff, 8);

   TEST_ALIGND(0x0000, 9);
   TEST_ALIGND(0xabcd, 9);
   TEST_ALIGND(0xffff, 9);

   TEST_ALIGND(0x0000, 14);
   TEST_ALIGND(0xabcd, 14);
   TEST_ALIGND(0xfef7, 14);
   TEST_ALIGND(0xffff, 14);

   TEST_ALIGND(0x0000, 15);
   TEST_ALIGND(0xabcd, 15);
   TEST_ALIGND(0xffff, 15);
 }

 __m512i NOINLINE compute_zalignq(int mask, int zero_mask, const void *op1,
                                  const void *op2, int imm, int num_elems) {
   V512 res;
   int i, res_idx;

   res_idx = 0;
   for (i = 0; i < 2; i++) {
     int lower, upper, op_idx;
     U64 *vop;

     if (i == 0) {
       lower = imm;
       upper = num_elems;
       vop = (U64 *)op2;
     } else {
       lower = 0;
       upper = imm;
       vop = (U64 *)op1;
     }

     for (op_idx = lower; op_idx < upper; op_idx++) {

       int elem_mask = mask & (1 << res_idx);

       if (elem_mask) {
         res.u64[res_idx] = vop[op_idx];
       } else if (zero_mask) {
         res.u64[res_idx] = 0;
       } else {
         res.u64[res_idx] = qres_orig.u64[res_idx];
       }

       res_idx++;
     }
   }

   return res.zmmi;
 }

 void NOINLINE do_zalignq() {
   TEST_ALIGNQ(0x00, 0);
   TEST_ALIGNQ(0xbe, 0);
   TEST_ALIGNQ(0xff, 0);

   TEST_ALIGNQ(0x00, 1);
   TEST_ALIGNQ(0xbe, 1);
   TEST_ALIGNQ(0xff, 1);

   TEST_ALIGNQ(0x00, 3);
   TEST_ALIGNQ(0xbe, 3);
   TEST_ALIGNQ(0xff, 3);

   TEST_ALIGNQ(0x00, 4);
   TEST_ALIGNQ(0xbe, 4);
   TEST_ALIGNQ(0xff, 4);

   TEST_ALIGNQ(0x00, 5);
   TEST_ALIGNQ(0xbe, 5);
   TEST_ALIGNQ(0xff, 5);

   TEST_ALIGNQ(0x00, 6);
   TEST_ALIGNQ(0xbe, 6);
   TEST_ALIGNQ(0xff, 6);

   TEST_ALIGNQ(0x00, 7);
   TEST_ALIGNQ(0xbe, 7);
   TEST_ALIGNQ(0xe7, 7);
   TEST_ALIGNQ(0xff, 7);
 }

 int main() {
   init();

   do_zalignd();

   do_zalignq();

   if (n_errs != 0) {
     printf("FAILED\n");
     return 1;
   }

   printf("PASSED\n");
   return 0;
 }
	/*
	* Test 512-bit intrinsics related to valignd and valignq.
	* Here we check for _mm512_[mask\|maskz]_alignr_epi*
	* intrinsics.
	*/

	#include "m512_test_util.h"
	#include <memory.h>

	volatile int vol0 = 0;

	#define soft_update(v512) (v512).xmmi[vol0] = (v512).xmmi[vol0]

	#define TEST_MASK(instr, elembits, mask, imm, num_elems, dtype) \
	{ /* Blend masking */ \
	memcpy(&res, &dtype##res_orig, sizeof(res)); \
	soft_update(dtype##op2); \
	res.zmmi = _mm512_mask_alignr_epi##elembits( \
	res.zmmi, mask, dtype##op1.zmmi, dtype##op2.zmmi, imm); \
	\
	/* Compute the expected result. */ \
	expect.zmmi = \
	compute_##instr(mask, 0, &dtype##op1, &dtype##op2, imm, num_elems); \
	\
	/* Compare the obtained and expected results. */ \
	check_equal_n##dtype(&res, &expect, num_elems, \
	"_mm512_mask_alignr_epi" #elembits ", " #imm, \
	__LINE__); \
	/* Verify combination with masked load. */ \
	{ \
	__m512i src2_copy, src2 = dtype##op1.zmmi; \
	memcpy(&res, &dtype##res_orig, sizeof(res)); \
	soft_update(dtype##op2); \
	src2 = _mm512_mask_load_epi##elembits(src2, mask, &dtype##op2.zmmi); \
	res.zmmi = _mm512_mask_alignr_epi##elembits(res.zmmi, mask, \
	dtype##op1.zmmi, src2, imm); \
	soft_update(dtype##op2); \
	src2_copy = _mm512_mask_load_epi##elembits(dtype##op1.zmmi, mask, \
	&dtype##op2.zmmi); \
	expect.zmmi = \
	compute_##instr(mask, 0, &dtype##op1, &src2_copy, imm, num_elems); \
	check_equal_n##dtype(&res, &expect, num_elems, \
	"mix with load _mm512_mask_alignr_epi" #elembits ", \
	" #imm, __LINE__); \
	} \
	}

	#define TEST_MASKZ(instr, elembits, mask, imm, num_elems, dtype) \
	{ \
	/* Zero masking */ \
	memset(&res, 0xFF, sizeof(res)); \
	soft_update(dtype##op2); \
	res.zmmi = _mm512_maskz_alignr_epi##elembits(mask, dtype##op1.zmmi, \
	dtype##op2.zmmi, imm); \
	\
	/* Compute the expected result. */ \
	expect.zmmi = \
	compute_##instr(mask, 1, &dtype##op1, &dtype##op2, imm, num_elems); \
	\
	/* Compare the obtained and expected results. */ \
	check_equal_n##dtype(&res, &expect, num_elems, \
	"_mm512_maskz_alignr_epi" #elembits ", " #imm, \
	__LINE__); \
	}

	#define TEST(instr, elembits, imm, num_elems, dtype) \
	{ \
	/* No masking */ \
	memset(&res, 0xFF, sizeof(res)); \
	soft_update(dtype##op2); \
	res.zmmi = \
	_mm512_alignr_epi##elembits(dtype##op1.zmmi, dtype##op2.zmmi, imm); \
	\
	/* Compute the expected result. */ \
	expect.zmmi = compute_##instr((1 << (num_elems)) - 1, 0, &dtype##op1, \
	&dtype##op2, imm, num_elems); \
	\
	/* Compare the obtained and expected results. */ \
	check_equal_n##dtype(&res, &expect, num_elems, \
	"_mm512_alignr_epi" #elembits ", " #imm, __LINE__); \
	}

	#define TEST_ALIGN(instr, elembits, mask, imm, num_elems, dtype) \
	TEST_MASK(instr, elembits, mask, imm, num_elems, dtype) \
	TEST_MASKZ(instr, elembits, mask, imm, num_elems, dtype) \
	TEST(instr, elembits, imm, num_elems, dtype)

	#define TEST_ALIGND(mask, imm) TEST_ALIGN(zalignd, 32, mask, imm, 16, d)

	#define TEST_ALIGNQ(mask, imm) TEST_ALIGN(zalignq, 64, mask, imm, 8, q)

	V512 dop1, dop2, dres_orig;
	V512 qop1, qop2, qres_orig;
	V512 res, expect;

	volatile unsigned int dres_orig_arr[16] = {
	0x12345678, 0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555,
	0x66666666, 0x77777777, 0x88888888, 0x99999999, 0xaaaaaaaa, 0xbbbbbbbb,
	0xcccccccc, 0xdddddddd, 0xeeeeeeee, 0xffffffff};

	volatile U64 qres_orig_arr[8] = {0x123456789abcdef0, 0x1111111111111111,
	0x2222222222222222, 0x3333333333333333,
	0x4444444444444444, 0x5555555555555555,
	0x7777777777777777, 0x6666666666666666};

	static void NOINLINE init() {
	int i;

	for (i = 0; i < 16; i++) {
	dop1.u32[i] = 0x11000000 + i;
	}
	for (i = 0; i < 16; i++) {
	dop2.u32[i] = 0x22000000 + i;
	}

	for (i = 0; i < 8; i++) {
	qop1.u64[i] = 0x1111000000000000 + i;
	}
	for (i = 0; i < 8; i++) {
	qop2.u64[i] = 0x2222000000000000 + i;
	}

	memcpy((void )&dres_orig, (void )dres_orig_arr, 64);
	memcpy((void )&qres_orig, (void )qres_orig_arr, 64);
	}

	__m512i NOINLINE compute_zalignd(__mmask16 mask, int zero_mask, const void *op1,
	const void *op2, int imm, int num_elems) {
	V512 res;
	int i, res_idx;

	res_idx = 0;
	for (i = 0; i < 2; i++) {
	int lower, upper, op_idx;
	unsigned int *vop;

	if (i == 0) {
	lower = imm;
	upper = num_elems;
	vop = (unsigned int *)op2;
	} else {
	lower = 0;
	upper = imm;
	vop = (unsigned int *)op1;
	}

	for (op_idx = lower; op_idx < upper; op_idx++) {

	int elem_mask = mask & (1 << res_idx);

	if (elem_mask) {
	res.u32[res_idx] = vop[op_idx];
	} else if (zero_mask) {
	res.u32[res_idx] = 0;
	} else {
	res.u32[res_idx] = dres_orig.u32[res_idx];
	}

	res_idx++;
	}
	}

	return res.zmmi;
	}

	void NOINLINE do_zalignd() {
	TEST_ALIGND(0x0000, 0);
	TEST_ALIGND(0xabcd, 0);
	TEST_ALIGND(0xffff, 0);

	TEST_ALIGND(0x0000, 1);
	TEST_ALIGND(0xabcd, 1);
	TEST_ALIGND(0xfef7, 1);
	TEST_ALIGND(0xffff, 1);

	TEST_ALIGND(0xabcd, 3);
	TEST_ALIGND(0xfefe, 5);

	TEST_ALIGND(0x0000, 7);
	TEST_ALIGND(0xabcd, 7);
	TEST_ALIGND(0xffff, 7);

	TEST_ALIGND(0x0000, 8);
	TEST_ALIGND(0xabcd, 8);
	TEST_ALIGND(0xffff, 8);

	TEST_ALIGND(0x0000, 9);
	TEST_ALIGND(0xabcd, 9);
	TEST_ALIGND(0xffff, 9);

	TEST_ALIGND(0x0000, 14);
	TEST_ALIGND(0xabcd, 14);
	TEST_ALIGND(0xfef7, 14);
	TEST_ALIGND(0xffff, 14);

	TEST_ALIGND(0x0000, 15);
	TEST_ALIGND(0xabcd, 15);
	TEST_ALIGND(0xffff, 15);
	}

	__m512i NOINLINE compute_zalignq(int mask, int zero_mask, const void *op1,
	const void *op2, int imm, int num_elems) {
	V512 res;
	int i, res_idx;

	res_idx = 0;
	for (i = 0; i < 2; i++) {
	int lower, upper, op_idx;
	U64 *vop;

	if (i == 0) {
	lower = imm;
	upper = num_elems;
	vop = (U64 *)op2;
	} else {
	lower = 0;
	upper = imm;
	vop = (U64 *)op1;
	}

	for (op_idx = lower; op_idx < upper; op_idx++) {

	int elem_mask = mask & (1 << res_idx);

	if (elem_mask) {
	res.u64[res_idx] = vop[op_idx];
	} else if (zero_mask) {
	res.u64[res_idx] = 0;
	} else {
	res.u64[res_idx] = qres_orig.u64[res_idx];
	}

	res_idx++;
	}
	}

	return res.zmmi;
	}

	void NOINLINE do_zalignq() {
	TEST_ALIGNQ(0x00, 0);
	TEST_ALIGNQ(0xbe, 0);
	TEST_ALIGNQ(0xff, 0);

	TEST_ALIGNQ(0x00, 1);
	TEST_ALIGNQ(0xbe, 1);
	TEST_ALIGNQ(0xff, 1);

	TEST_ALIGNQ(0x00, 3);
	TEST_ALIGNQ(0xbe, 3);
	TEST_ALIGNQ(0xff, 3);

	TEST_ALIGNQ(0x00, 4);
	TEST_ALIGNQ(0xbe, 4);
	TEST_ALIGNQ(0xff, 4);

	TEST_ALIGNQ(0x00, 5);
	TEST_ALIGNQ(0xbe, 5);
	TEST_ALIGNQ(0xff, 5);

	TEST_ALIGNQ(0x00, 6);
	TEST_ALIGNQ(0xbe, 6);
	TEST_ALIGNQ(0xff, 6);

	TEST_ALIGNQ(0x00, 7);
	TEST_ALIGNQ(0xbe, 7);
	TEST_ALIGNQ(0xe7, 7);
	TEST_ALIGNQ(0xff, 7);
	}

	int main() {
	init();

	do_zalignd();

	do_zalignq();

	if (n_errs != 0) {
	printf("FAILED\n");
	return 1;
	}

	printf("PASSED\n");
	return 0;
	}