blob: 479996f5e17075d15d22cdfc4c972dd1ddd400ac [file] [log] [blame]
/*
* Test 512-bit intrinsics related to valignd and valignq.
* Here we check for _mm512_[mask|maskz]_alignr_epi*
* intrinsics.
*/
#include "m512_test_util.h"
#include <memory.h>
volatile int vol0 = 0;
#define soft_update(v512) (v512).xmmi[vol0] = (v512).xmmi[vol0]
#define TEST_MASK(instr, elembits, mask, imm, num_elems, dtype) \
{ /* Blend masking */ \
memcpy(&res, &dtype##res_orig, sizeof(res)); \
soft_update(dtype##op2); \
res.zmmi = _mm512_mask_alignr_epi##elembits( \
res.zmmi, mask, dtype##op1.zmmi, dtype##op2.zmmi, imm); \
\
/* Compute the expected result. */ \
expect.zmmi = \
compute_##instr(mask, 0, &dtype##op1, &dtype##op2, imm, num_elems); \
\
/* Compare the obtained and expected results. */ \
check_equal_n##dtype(&res, &expect, num_elems, \
"_mm512_mask_alignr_epi" #elembits ", " #imm, \
__LINE__); \
/* Verify combination with masked load. */ \
{ \
__m512i src2_copy, src2 = dtype##op1.zmmi; \
memcpy(&res, &dtype##res_orig, sizeof(res)); \
soft_update(dtype##op2); \
src2 = _mm512_mask_load_epi##elembits(src2, mask, &dtype##op2.zmmi); \
res.zmmi = _mm512_mask_alignr_epi##elembits(res.zmmi, mask, \
dtype##op1.zmmi, src2, imm); \
soft_update(dtype##op2); \
src2_copy = _mm512_mask_load_epi##elembits(dtype##op1.zmmi, mask, \
&dtype##op2.zmmi); \
expect.zmmi = \
compute_##instr(mask, 0, &dtype##op1, &src2_copy, imm, num_elems); \
check_equal_n##dtype(&res, &expect, num_elems, \
"mix with load _mm512_mask_alignr_epi" #elembits ", \
" #imm, __LINE__); \
} \
}
#define TEST_MASKZ(instr, elembits, mask, imm, num_elems, dtype) \
{ \
/* Zero masking */ \
memset(&res, 0xFF, sizeof(res)); \
soft_update(dtype##op2); \
res.zmmi = _mm512_maskz_alignr_epi##elembits(mask, dtype##op1.zmmi, \
dtype##op2.zmmi, imm); \
\
/* Compute the expected result. */ \
expect.zmmi = \
compute_##instr(mask, 1, &dtype##op1, &dtype##op2, imm, num_elems); \
\
/* Compare the obtained and expected results. */ \
check_equal_n##dtype(&res, &expect, num_elems, \
"_mm512_maskz_alignr_epi" #elembits ", " #imm, \
__LINE__); \
}
#define TEST(instr, elembits, imm, num_elems, dtype) \
{ \
/* No masking */ \
memset(&res, 0xFF, sizeof(res)); \
soft_update(dtype##op2); \
res.zmmi = \
_mm512_alignr_epi##elembits(dtype##op1.zmmi, dtype##op2.zmmi, imm); \
\
/* Compute the expected result. */ \
expect.zmmi = compute_##instr((1 << (num_elems)) - 1, 0, &dtype##op1, \
&dtype##op2, imm, num_elems); \
\
/* Compare the obtained and expected results. */ \
check_equal_n##dtype(&res, &expect, num_elems, \
"_mm512_alignr_epi" #elembits ", " #imm, __LINE__); \
}
#define TEST_ALIGN(instr, elembits, mask, imm, num_elems, dtype) \
TEST_MASK(instr, elembits, mask, imm, num_elems, dtype) \
TEST_MASKZ(instr, elembits, mask, imm, num_elems, dtype) \
TEST(instr, elembits, imm, num_elems, dtype)
#define TEST_ALIGND(mask, imm) TEST_ALIGN(zalignd, 32, mask, imm, 16, d)
#define TEST_ALIGNQ(mask, imm) TEST_ALIGN(zalignq, 64, mask, imm, 8, q)
V512 dop1, dop2, dres_orig;
V512 qop1, qop2, qres_orig;
V512 res, expect;
volatile unsigned int dres_orig_arr[16] = {
0x12345678, 0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555,
0x66666666, 0x77777777, 0x88888888, 0x99999999, 0xaaaaaaaa, 0xbbbbbbbb,
0xcccccccc, 0xdddddddd, 0xeeeeeeee, 0xffffffff};
volatile U64 qres_orig_arr[8] = {0x123456789abcdef0, 0x1111111111111111,
0x2222222222222222, 0x3333333333333333,
0x4444444444444444, 0x5555555555555555,
0x7777777777777777, 0x6666666666666666};
static void NOINLINE init() {
int i;
for (i = 0; i < 16; i++) {
dop1.u32[i] = 0x11000000 + i;
}
for (i = 0; i < 16; i++) {
dop2.u32[i] = 0x22000000 + i;
}
for (i = 0; i < 8; i++) {
qop1.u64[i] = 0x1111000000000000 + i;
}
for (i = 0; i < 8; i++) {
qop2.u64[i] = 0x2222000000000000 + i;
}
memcpy((void *)&dres_orig, (void *)dres_orig_arr, 64);
memcpy((void *)&qres_orig, (void *)qres_orig_arr, 64);
}
__m512i NOINLINE compute_zalignd(__mmask16 mask, int zero_mask, const void *op1,
const void *op2, int imm, int num_elems) {
V512 res;
int i, res_idx;
res_idx = 0;
for (i = 0; i < 2; i++) {
int lower, upper, op_idx;
unsigned int *vop;
if (i == 0) {
lower = imm;
upper = num_elems;
vop = (unsigned int *)op2;
} else {
lower = 0;
upper = imm;
vop = (unsigned int *)op1;
}
for (op_idx = lower; op_idx < upper; op_idx++) {
int elem_mask = mask & (1 << res_idx);
if (elem_mask) {
res.u32[res_idx] = vop[op_idx];
} else if (zero_mask) {
res.u32[res_idx] = 0;
} else {
res.u32[res_idx] = dres_orig.u32[res_idx];
}
res_idx++;
}
}
return res.zmmi;
}
void NOINLINE do_zalignd() {
TEST_ALIGND(0x0000, 0);
TEST_ALIGND(0xabcd, 0);
TEST_ALIGND(0xffff, 0);
TEST_ALIGND(0x0000, 1);
TEST_ALIGND(0xabcd, 1);
TEST_ALIGND(0xfef7, 1);
TEST_ALIGND(0xffff, 1);
TEST_ALIGND(0xabcd, 3);
TEST_ALIGND(0xfefe, 5);
TEST_ALIGND(0x0000, 7);
TEST_ALIGND(0xabcd, 7);
TEST_ALIGND(0xffff, 7);
TEST_ALIGND(0x0000, 8);
TEST_ALIGND(0xabcd, 8);
TEST_ALIGND(0xffff, 8);
TEST_ALIGND(0x0000, 9);
TEST_ALIGND(0xabcd, 9);
TEST_ALIGND(0xffff, 9);
TEST_ALIGND(0x0000, 14);
TEST_ALIGND(0xabcd, 14);
TEST_ALIGND(0xfef7, 14);
TEST_ALIGND(0xffff, 14);
TEST_ALIGND(0x0000, 15);
TEST_ALIGND(0xabcd, 15);
TEST_ALIGND(0xffff, 15);
}
__m512i NOINLINE compute_zalignq(int mask, int zero_mask, const void *op1,
const void *op2, int imm, int num_elems) {
V512 res;
int i, res_idx;
res_idx = 0;
for (i = 0; i < 2; i++) {
int lower, upper, op_idx;
U64 *vop;
if (i == 0) {
lower = imm;
upper = num_elems;
vop = (U64 *)op2;
} else {
lower = 0;
upper = imm;
vop = (U64 *)op1;
}
for (op_idx = lower; op_idx < upper; op_idx++) {
int elem_mask = mask & (1 << res_idx);
if (elem_mask) {
res.u64[res_idx] = vop[op_idx];
} else if (zero_mask) {
res.u64[res_idx] = 0;
} else {
res.u64[res_idx] = qres_orig.u64[res_idx];
}
res_idx++;
}
}
return res.zmmi;
}
void NOINLINE do_zalignq() {
TEST_ALIGNQ(0x00, 0);
TEST_ALIGNQ(0xbe, 0);
TEST_ALIGNQ(0xff, 0);
TEST_ALIGNQ(0x00, 1);
TEST_ALIGNQ(0xbe, 1);
TEST_ALIGNQ(0xff, 1);
TEST_ALIGNQ(0x00, 3);
TEST_ALIGNQ(0xbe, 3);
TEST_ALIGNQ(0xff, 3);
TEST_ALIGNQ(0x00, 4);
TEST_ALIGNQ(0xbe, 4);
TEST_ALIGNQ(0xff, 4);
TEST_ALIGNQ(0x00, 5);
TEST_ALIGNQ(0xbe, 5);
TEST_ALIGNQ(0xff, 5);
TEST_ALIGNQ(0x00, 6);
TEST_ALIGNQ(0xbe, 6);
TEST_ALIGNQ(0xff, 6);
TEST_ALIGNQ(0x00, 7);
TEST_ALIGNQ(0xbe, 7);
TEST_ALIGNQ(0xe7, 7);
TEST_ALIGNQ(0xff, 7);
}
int main() {
init();
do_zalignd();
do_zalignq();
if (n_errs != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}