blob: 0b29243c08e49db4f0a3b883be960eef56a45cd2 [file] [log] [blame]
/*
* Test load, copy and store intrinsics related to integer move instructions.
*/
#include "m512_test_util.h"
V512 i8_src1;
V512 i8_src2;
V512 i16_src1;
V512 i16_src2;
void NOINLINE init() {
volatile int i;
for (i = 0; i < 64; i++) {
i8_src1.s8[i] = i;
i8_src2.s8[i] = (i & 1) ? i : -i;
}
for (i = 0; i < 32; i++) {
i16_src1.s16[i] = i;
i16_src2.s16[i] = (i & 1) ? i : -i;
}
}
/*
* Use "soft update" between tests to make compiler think src was updated.
* Prevents PRE'ing a load of src, thus allowing ciscization.
* Also prevents PRE'ing intrinsic operations, ensuring we
* execute the intended instructions.
*/
volatile int vol0 = 0;
#define soft_v512_update(var) (var).xmmi[vol0] = (var).xmmi[vol0]
#define BLANK
#define GEN_MASK_I8_LOAD(oper) GEN_MASK_I8(oper, &)
#define GEN_MASK_I8_COPY(oper) GEN_MASK_I8(oper, BLANK)
#define GEN_MASK_I8(oper, addr_of) \
void NOINLINE do_##oper() { \
V512 xmm_res, ymm_res, zmm_res; \
__mmask64 k64 = 0xabcdeffe97febdca; \
__mmask32 k32 = (__mmask32)k64; \
__mmask16 k16 = (__mmask16)k64; \
\
/* Masked. */ \
\
zmm_res.zmmi = _mm512_setzero_epi32(); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i8_src2); \
zmm_res.zmmi = \
_mm512_mask_##oper(i8_src1.zmmi, k64, addr_of i8_src2.zmmi); \
soft_v512_update(i8_src2); \
ymm_res.ymmi[0] = \
_mm256_mask_##oper(i8_src1.ymmi[0], k32, addr_of i8_src2.ymmi[0]); \
soft_v512_update(i8_src2); \
xmm_res.xmmi[0] = \
_mm_mask_##oper(i8_src1.xmmi[0], k16, addr_of i8_src2.xmmi[0]); \
\
check_equal_nd(&ymm_res, &zmm_res, 8, "_mm256_mask_" #oper, __LINE__); \
check_equal_nd(&xmm_res, &zmm_res, 4, "_mm_mask_" #oper, __LINE__); \
\
/* Zero-masked. */ \
\
zmm_res.zmmi = _mm512_set1_epi32(1.0); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i8_src1); \
zmm_res.zmmi = _mm512_maskz_##oper(k64, addr_of i8_src1.zmmi); \
soft_v512_update(i8_src1); \
ymm_res.ymmi[0] = _mm256_maskz_##oper(k32, addr_of i8_src1.ymmi[0]); \
soft_v512_update(i8_src1); \
xmm_res.xmmi[0] = _mm_maskz_##oper(k16, addr_of i8_src1.xmmi[0]); \
\
check_equal_nd(&ymm_res, &zmm_res, 8, "_mm256_maskz_" #oper, __LINE__); \
check_equal_nd(&xmm_res, &zmm_res, 4, "_mm_maskz_" #oper, __LINE__); \
}
#define GEN_MASK_I8_STORE(oper) \
void NOINLINE do_##oper() { \
V512 xmm_res, ymm_res, zmm_res; \
__mmask64 k64 = 0xabcdeffe97febdca; \
__mmask32 k32 = (__mmask32)k64; \
__mmask16 k16 = (__mmask16)k64; \
\
/* Masked. */ \
\
zmm_res = i16_src1; \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i8_src2); \
_mm512_mask_##oper(&zmm_res.zmmi, k64, i8_src2.zmmi); \
soft_v512_update(i8_src2); \
soft_v512_update(ymm_res); \
_mm256_mask_##oper(&ymm_res.ymmi[0], k32, i8_src2.ymmi[0]); \
soft_v512_update(i8_src2); \
soft_v512_update(xmm_res); \
_mm_mask_##oper(&xmm_res.xmmi[0], k16, i8_src2.xmmi[0]); \
\
check_equal_nd(&ymm_res, &zmm_res, 8, "_mm256_mask_" #oper, __LINE__); \
check_equal_nd(&xmm_res, &zmm_res, 4, "_mm_mask_" #oper, __LINE__); \
}
#define GEN_MASK_I16_LOAD(oper) GEN_MASK_I16(oper, &)
#define GEN_MASK_I16_COPY(oper) GEN_MASK_I16(oper, BLANK)
#define GEN_MASK_I16(oper, addr_of) \
void NOINLINE do_##oper() { \
V512 xmm_res, ymm_res, zmm_res; \
__mmask32 k32 = 0xcfe97dba; \
__mmask16 k16 = (__mmask16)k32; \
__mmask8 k8 = (__mmask8)k32; \
\
/* Masked. */ \
\
zmm_res.zmmi = _mm512_setzero_epi32(); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i16_src2); \
zmm_res.zmmi = \
_mm512_mask_##oper(i16_src1.zmmi, k32, addr_of i16_src2.zmmi); \
soft_v512_update(i16_src2); \
ymm_res.ymmi[0] = \
_mm256_mask_##oper(i16_src1.ymmi[0], k16, addr_of i16_src2.ymmi[0]); \
soft_v512_update(i16_src2); \
xmm_res.xmmi[0] = \
_mm_mask_##oper(i16_src1.xmmi[0], k8, addr_of i16_src2.xmmi[0]); \
\
check_equal_nd(&ymm_res, &zmm_res, 8, "_mm256_mask_" #oper, __LINE__); \
check_equal_nd(&xmm_res, &zmm_res, 4, "_mm_mask_" #oper, __LINE__); \
\
/* Zero-masked. */ \
\
zmm_res.zmmi = _mm512_set1_epi32(1.0); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i16_src1); \
zmm_res.zmmi = _mm512_maskz_##oper(k32, addr_of i16_src1.zmmi); \
soft_v512_update(i16_src1); \
ymm_res.ymmi[0] = _mm256_maskz_##oper(k16, addr_of i16_src1.ymmi[0]); \
soft_v512_update(i16_src1); \
xmm_res.xmmi[0] = _mm_maskz_##oper(k8, addr_of i16_src1.xmmi[0]); \
\
check_equal_nd(&ymm_res, &zmm_res, 8, "_mm256_maskz_" #oper, __LINE__); \
check_equal_nd(&xmm_res, &zmm_res, 4, "_mm_maskz_" #oper, __LINE__); \
}
#define GEN_MASK_I16_STORE(oper) \
void NOINLINE do_##oper() { \
V512 xmm_res, ymm_res, zmm_res; \
__mmask32 k32 = 0xcfe97dba; \
__mmask16 k16 = (__mmask16)k32; \
__mmask8 k8 = (__mmask8)k32; \
\
/* Masked. */ \
\
zmm_res.zmmi = _mm512_setzero_epi32(); \
ymm_res = zmm_res; \
xmm_res = zmm_res; \
\
soft_v512_update(i16_src2); \
_mm512_mask_##oper(&zmm_res.zmmi, k32, i16_src2.zmmi); \
soft_v512_update(i16_src2); \
soft_v512_update(ymm_res); \
_mm256_mask_##oper(&ymm_res.ymmi[0], k16, i16_src2.ymmi[0]); \
soft_v512_update(i16_src2); \
soft_v512_update(xmm_res); \
_mm_mask_##oper(&xmm_res.xmmi[0], k8, i16_src2.xmmi[0]); \
\
check_equal_nd(&ymm_res, &zmm_res, 8, "_mm256_mask_" #oper, __LINE__); \
check_equal_nd(&xmm_res, &zmm_res, 4, "_mm_mask_" #oper, __LINE__); \
}
GEN_MASK_I8_LOAD(loadu_epi8)
GEN_MASK_I8_COPY(mov_epi8)
GEN_MASK_I8_STORE(storeu_epi8)
GEN_MASK_I16_LOAD(loadu_epi16)
GEN_MASK_I16_COPY(mov_epi16)
GEN_MASK_I16_STORE(storeu_epi16)
int main() {
init();
do_loadu_epi8();
do_mov_epi8();
do_storeu_epi8();
do_loadu_epi16();
do_mov_epi16();
do_storeu_epi16();
if (n_errs != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}