blob: 3a675ec1af9c6823b9c23a4f64fb1e55ea849ae9 [file] [log] [blame]
/*
* Exercise intrinsics for a instructions which set mask register
* by values in vector registers and set vector register value by
* values in mask register.
*/
#include "m512_test_util.h"
__int64 calc_expected_mask_val(const char *valp, int el_size, int length) {
__int64 rval = 0;
int i;
for (i = 0; i < length; i++) {
if ((valp[el_size * i + (el_size - 1)] & 0x80) != 0) {
rval |= (1LL << i);
}
}
return rval;
}
char *calc_expected_vec_val(__mmask64 mask_val, int mask_size, int el_size,
char *buf) {
int i, j;
for (i = 0; i < mask_size * el_size; buf[i++] = 0)
;
for (i = 0; i < mask_size; i++) {
if ((mask_val & (1LL << i)) != 0) {
for (j = 0; j < el_size; j++) {
buf[i * el_size + j] = 0xff;
}
}
}
return buf;
}
NOINLINE void check_mask16(__mmask16 res_mask, __mmask16 exp_mask,
const char *fname, const char *input) {
int i;
if (res_mask != exp_mask) {
printf("%s: 0x%x != 0x%x, input = ", fname, res_mask, exp_mask);
for (i = 0; i < 16; i++) {
printf("%02x ", input[i] & 0xff);
}
printf("\n");
n_errs++;
}
}
NOINLINE void check_xmm_arr(const __m128i xvar, char *buf, const char *fname,
__mmask64 input) {
int i;
char *p = (char *)&xvar;
if (memcmp((void *)p, (void *)buf, 16) != 0) {
printf("%s: 0x", fname);
for (i = 0; i < 16; i++) {
printf(" %02x", p[i] & 0xff);
}
printf(" != 0x");
for (i = 0; i < 16; i++) {
printf(" %02x", buf[i] & 0xff);
}
printf(", input = 0x%04x\n", (int)(input)&0xffff);
n_errs++;
}
}
NOINLINE void test_xmm(int shift, int mulp) {
ALIGNTO(16) char buf[16];
int i;
__m128i xvar;
for (i = 0; i < 16; i++) {
buf[i] = (i << shift) * mulp;
}
memcpy(&xvar, buf, 16);
check_mask16(_mm_movepi32_mask(xvar), calc_expected_mask_val(buf, 4, 4),
"_mm_movepi32_mask", buf);
check_mask16(_mm_movepi64_mask(xvar), calc_expected_mask_val(buf, 8, 2),
"_mm_movepi64_mask", buf);
check_xmm_arr(_mm_movm_epi32((__mmask16)shift * mulp),
calc_expected_vec_val(shift * mulp, 4, 4, buf),
"_mm_movm_epi32", (__mmask16)shift * mulp);
check_xmm_arr(_mm_movm_epi64((__mmask16)shift * mulp),
calc_expected_vec_val(shift * mulp, 2, 8, buf),
"_mm_movm_epi64", (__mmask16)shift * mulp);
}
NOINLINE void check_mask32(__mmask32 res_mask, __mmask32 exp_mask,
const char *fname, const char *input) {
int i;
if (res_mask != exp_mask) {
printf("%s: 0x%x != 0x%x, input = ", fname, res_mask, exp_mask);
for (i = 0; i < 32; i++) {
printf("%02x ", input[i] & 0xff);
}
printf("\n");
n_errs++;
exit(1);
}
}
NOINLINE void check_ymm_arr(const __m256i yvar, char *buf, const char *fname,
__mmask64 input) {
int i;
char *p = (char *)&yvar;
if (memcmp((void *)p, (void *)buf, 32) != 0) {
printf("%s: 0x", fname);
for (i = 0; i < 32; i++) {
printf(" %02x", p[i] & 0xff);
}
printf(" != 0x");
for (i = 0; i < 32; i++) {
printf(" %02x", buf[i] & 0xff);
}
printf(", input = 0x%04x\n", (int)(input));
n_errs++;
}
}
NOINLINE void test_ymm(int shift, int mulp) {
ALIGNTO(32) char buf[32];
int i;
__m256i yvar;
for (i = 0; i < 32; i++) {
buf[i] = (i << shift) * mulp;
}
memcpy(&yvar, buf, 32);
check_mask32(_mm256_movepi32_mask(yvar), calc_expected_mask_val(buf, 4, 8),
"_mm256_movepi32_mask", buf);
check_mask32(_mm256_movepi64_mask(yvar), calc_expected_mask_val(buf, 8, 4),
"_mm256_movepi64_mask", buf);
check_ymm_arr(_mm256_movm_epi32((__mmask32)shift * mulp),
calc_expected_vec_val(shift * mulp, 8, 4, buf),
"_mm256_movm_epi32", (__mmask32)shift * mulp);
check_ymm_arr(_mm256_movm_epi64((__mmask32)shift * mulp),
calc_expected_vec_val(shift * mulp, 4, 8, buf),
"_mm256_movm_epi64", (__mmask32)shift * mulp);
}
NOINLINE void check_mask64(__mmask64 res_mask, __mmask64 exp_mask,
const char *fname, const char *input) {
int i;
if (res_mask != exp_mask) {
printf("%s: 0x%llx != 0x%llx, input = ", fname, res_mask, exp_mask);
for (i = 0; i < 64; i++) {
printf("%02x ", input[i] & 0xff);
}
printf("\n");
n_errs++;
}
}
NOINLINE void check_zmm_arr(const __m512i zvar, char *buf, const char *fname,
__mmask64 input) {
int i;
char *p = (char *)&zvar;
if (memcmp((void *)p, (void *)buf, 64) != 0) {
printf("%s: 0x", fname);
for (i = 0; i < 64; i++) {
printf(" %02x", p[i] & 0xff);
}
printf(" != 0x");
for (i = 0; i < 64; i++) {
printf(" %02x", buf[i] & 0xff);
}
printf(", input = 0x%08llx\n", input);
n_errs++;
}
}
NOINLINE void test_zmm(int shift, int mulp) {
ALIGNTO(64) char buf[64];
int i;
__m512i zvar;
for (i = 0; i < 64; i++) {
buf[i] = (i << shift) * mulp;
}
memcpy(&zvar, buf, 64);
check_mask64(_mm512_movepi32_mask(zvar), calc_expected_mask_val(buf, 4, 16),
"_mm512_movepi32_mask", buf);
check_mask64(_mm512_movepi64_mask(zvar), calc_expected_mask_val(buf, 8, 8),
"_mm512_movepi64_mask", buf);
check_zmm_arr(_mm512_movm_epi32((__mmask64)shift * mulp),
calc_expected_vec_val(shift * mulp, 16, 4, buf),
"_mm512_movm_epi32", (__mmask64)shift * mulp);
check_zmm_arr(_mm512_movm_epi64((__mmask64)shift * mulp),
calc_expected_vec_val(shift * mulp, 8, 8, buf),
"_mm512_movm_epi64", (__mmask64)shift * mulp);
}
NOINLINE void test_all() {
int shift, mulp;
for (mulp = -1000; mulp < 1000; mulp += 10) {
for (shift = 0; shift < 64; shift++) {
test_xmm(shift, mulp);
test_ymm(shift, mulp);
test_zmm(shift, mulp);
}
}
}
int main(void) {
test_all();
if (n_errs != 0) {
printf("FAILED, n_errs = %d\n", n_errs);
return 1;
}
printf("PASSED\n");
return 0;
}