blob: 97e4c58ab51678519393902c45034194f7521b9c [file] [log] [blame]
#include "m512_test_util.h"
#include <stdio.h>
#include <string.h>
* Here we check for _mm512_maskz_[add|sub]_[round]_ps intrinsics.
volatile int vol0 = 0;
V512 i32;
V512 i32_squares;
V512 i64;
V512 i64_squares;
V512 f32;
V512 f32_squares;
V512 f32_halves;
V512 f64;
V512 f64_squares;
V512 f64_halves;
void NOINLINE init() {
volatile int i;
for (i = 0; i < 16; i++) {
i32.s32[i] = i;
i32_squares.s32[i] = i * i;
f32.f32[i] = i;
f32_squares.f32[i] = i * i;
f32_halves.f32[i] = i + 0.5f;
for (i = 0; i < 8; i++) {
i64.s64[i] = i;
i64_squares.s64[i] = i * i;
f64.f64[i] = i;
f64_squares.f64[i] = i * i;
f64_halves.f64[i] = i + 0.5;
* Generate function do_"oper"_ps, which tests
* _mm512_maskz_oper_ps(__mmask16, __m512, __m512) and
* _mm512_maskz_oper_round_ps(__mmask16, __m512, __m512, int rounding)
#define GEN_PS2_OROUND(oper) \
void NOINLINE do_##oper##_ps() { \
V512 resm, resz; \
__mmask16 k; \
k = 0xbcdf; \
resm.zmm = _mm512_setzero_ps(); \
resm.zmm = \
_mm512_mask_##oper##_ps(resm.zmm, k, f32_halves.zmm, f32_squares.zmm); \
/* Set resz to all 1's, use vol0 to make it stick. */ \
resz.zmmi = _mm512_ternarylogic_epi32(i32.zmmi, i32.zmmi, i32.zmmi, 0xff); \
resz.xmm[vol0] = resz.xmm[vol0]; /* No-op. */ \
resz.zmm = _mm512_maskz_##oper##_ps(k, f32_halves.zmm, f32_squares.zmm); \
check_equal_nd(&resz, &resm, 16, "_mm512_maskz_" #oper "_ps", __LINE__); \
/* Now with a rounding override. */ \
f32_squares.xmm[vol0] = f32_squares.xmm[vol0]; /* No-op. */ \
resm.zmm = _mm512_setzero_ps(); \
resm.zmm = _mm512_mask_##oper##_round_ps( \
resm.zmm, k, f32_halves.zmm, f32_squares.zmm, \
f32_squares.xmm[vol0] = f32_squares.xmm[vol0]; /* No-op. */ \
/* Set resz to all 1's, use vol0 to make it stick. */ \
resz.zmmi = _mm512_ternarylogic_epi32(i32.zmmi, i32.zmmi, i32.zmmi, 0xff); \
resz.xmm[vol0] = resz.xmm[vol0]; /* No-op. */ \
resz.zmm = _mm512_maskz_##oper##_round_ps( \
k, f32_halves.zmm, f32_squares.zmm, \
check_equal_nd(&resz, &resm, 16, "_mm512_maskz_" #oper "_round_ps", \
__LINE__); \
int main(int argc, char *argv[]) {
if (n_errs != 0) {
return 1;
return 0;