blob: 97e4c58ab51678519393902c45034194f7521b9c [file] [log] [blame]
#include "m512_test_util.h"
#include <stdio.h>
#include <string.h>
/*
* Here we check for _mm512_maskz_[add|sub]_[round]_ps intrinsics.
*/
volatile int vol0 = 0;
V512 i32;
V512 i32_squares;
V512 i64;
V512 i64_squares;
V512 f32;
V512 f32_squares;
V512 f32_halves;
V512 f64;
V512 f64_squares;
V512 f64_halves;
void NOINLINE init() {
volatile int i;
for (i = 0; i < 16; i++) {
i32.s32[i] = i;
i32_squares.s32[i] = i * i;
f32.f32[i] = i;
f32_squares.f32[i] = i * i;
f32_halves.f32[i] = i + 0.5f;
}
for (i = 0; i < 8; i++) {
i64.s64[i] = i;
i64_squares.s64[i] = i * i;
f64.f64[i] = i;
f64_squares.f64[i] = i * i;
f64_halves.f64[i] = i + 0.5;
}
}
/*
* Generate function do_"oper"_ps, which tests
* _mm512_maskz_oper_ps(__mmask16, __m512, __m512) and
* _mm512_maskz_oper_round_ps(__mmask16, __m512, __m512, int rounding)
*/
#define GEN_PS2_OROUND(oper) \
void NOINLINE do_##oper##_ps() { \
V512 resm, resz; \
__mmask16 k; \
\
k = 0xbcdf; \
resm.zmm = _mm512_setzero_ps(); \
resm.zmm = \
_mm512_mask_##oper##_ps(resm.zmm, k, f32_halves.zmm, f32_squares.zmm); \
\
/* Set resz to all 1's, use vol0 to make it stick. */ \
resz.zmmi = _mm512_ternarylogic_epi32(i32.zmmi, i32.zmmi, i32.zmmi, 0xff); \
resz.xmm[vol0] = resz.xmm[vol0]; /* No-op. */ \
resz.zmm = _mm512_maskz_##oper##_ps(k, f32_halves.zmm, f32_squares.zmm); \
check_equal_nd(&resz, &resm, 16, "_mm512_maskz_" #oper "_ps", __LINE__); \
\
/* Now with a rounding override. */ \
\
f32_squares.xmm[vol0] = f32_squares.xmm[vol0]; /* No-op. */ \
resm.zmm = _mm512_setzero_ps(); \
resm.zmm = _mm512_mask_##oper##_round_ps( \
resm.zmm, k, f32_halves.zmm, f32_squares.zmm, \
_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); \
f32_squares.xmm[vol0] = f32_squares.xmm[vol0]; /* No-op. */ \
\
/* Set resz to all 1's, use vol0 to make it stick. */ \
resz.zmmi = _mm512_ternarylogic_epi32(i32.zmmi, i32.zmmi, i32.zmmi, 0xff); \
resz.xmm[vol0] = resz.xmm[vol0]; /* No-op. */ \
\
resz.zmm = _mm512_maskz_##oper##_round_ps( \
k, f32_halves.zmm, f32_squares.zmm, \
_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); \
check_equal_nd(&resz, &resm, 16, "_mm512_maskz_" #oper "_round_ps", \
__LINE__); \
}
GEN_PS2_OROUND(sub)
GEN_PS2_OROUND(add)
int main(int argc, char *argv[]) {
init();
do_add_ps();
do_sub_ps();
if (n_errs != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}