blob: 2affc18a954c2297ab52521cbc871240c50f5311 [file] [log] [blame]
/*
* Test math instructions: sqrt, reciprocal, floor, ceil, exponent,
* scale, fixup ,roundscale and ternary logic.
* Here we check for _mm512_[mask|maskz]_[ceil|floor|scalef|sqrt|ternarylogic]
* intrinsics.
*/
#include "m512_test_util.h"
#include <math.h>
#include <stdio.h>
#include <string.h>
volatile int vol0 = 0;
V512 i32;
V512 i32_squares;
V512 i32_neg;
V512 i64;
V512 i64_squares;
V512 i64_neg;
V512 f32;
V512 f32_squares;
V512 f32_halves;
V512 f64;
V512 f64_squares;
V512 f64_halves;
void NOINLINE init() {
volatile int i;
for (i = 0; i < 16; i++) {
i32.s32[i] = i;
i32_squares.s32[i] = i * i;
i32_neg.s32[i] = -i;
f32.f32[i] = i;
f32_squares.f32[i] = i * i;
f32_halves.f32[i] = i + 0.5f;
}
for (i = 0; i < 8; i++) {
i64.s64[i] = i;
i64_squares.s64[i] = i * i;
i64_neg.s64[i] = -i;
f64.f64[i] = i;
f64_squares.f64[i] = i * i;
f64_halves.f64[i] = i + 0.5;
}
}
void NOINLINE do_rcp14pd() {
volatile V512 res;
V512 expected;
__mmask8 k = 0xc3;
res.zmmd = _mm512_rcp14_pd(f64.zmmd);
res.zmmi = _mm512_setzero_epi32();
res.zmmd = _mm512_mask_rcp14_pd(res.zmmd, k, f64.zmmd);
}
void NOINLINE do_rcp14ps() {
volatile V512 res;
V512 expected;
__mmask16 k = 0x7e95;
res.zmm = _mm512_rcp14_ps(f32.zmm);
res.zmmi = _mm512_setzero_epi32();
res.zmm = _mm512_mask_rcp14_ps(res.zmm, k, f32.zmm);
}
void NOINLINE do_sqrtps() {
V512 res;
V512 expected;
__mmask16 k;
volatile int i;
res.zmm = _mm512_sqrt_ps(f32_squares.zmm);
for (i = 0; i < 16; i++) {
expected.f32[i] = i;
}
check_equal_nd(&res, &expected, 16, "_mm512_sqrt_ps", __LINE__);
f32_squares.xmm[vol0] = f32_squares.xmm[vol0]; /* No-op. */
k = 0xbcdf;
res.zmm = _mm512_setzero_ps();
res.zmm = _mm512_mask_sqrt_ps(res.zmm, k, f32_squares.zmm);
expected.zmm = _mm512_setzero_ps();
for (i = 0; i < 16; i++) {
if (k & (1 << i)) {
expected.f32[i] = i;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_sqrt_ps", __LINE__);
}
void NOINLINE do_sqrtpd() {
V512 res;
V512 expected;
__mmask8 k;
volatile int i;
res.zmmd = _mm512_sqrt_pd(f64_squares.zmmd);
for (i = 0; i < 8; i++) {
expected.f64[i] = i;
}
check_equal_nd(&res, &expected, 16, "_mm512_sqrt_pd", __LINE__);
f64_squares.xmmd[vol0] = f64_squares.xmmd[vol0]; /* No-op. */
k = 0xe9;
res.zmmd = _mm512_setzero_pd();
res.zmmd = _mm512_mask_sqrt_pd(res.zmmd, k, f64_squares.zmmd);
expected.zmmd = _mm512_setzero_pd();
for (i = 0; i < 8; i++) {
if (k & (1 << i)) {
expected.f64[i] = i;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_sqrt_pd", __LINE__);
}
void NOINLINE do_floorps() {
V512 res;
V512 expected;
__mmask16 k;
volatile int i;
res.zmm = _mm512_floor_ps(f32_halves.zmm);
for (i = 0; i < 16; i++) {
expected.f32[i] = i;
}
check_equal_nd(&res, &expected, 16, "_mm512_floor_ps", __LINE__);
f32_halves.xmm[vol0] = f32_halves.xmm[vol0]; /* No-op. */
k = 0xbcdf;
res.zmm = _mm512_setzero_ps();
res.zmm = _mm512_mask_floor_ps(res.zmm, k, f32_halves.zmm);
expected.zmm = _mm512_setzero_ps();
for (i = 0; i < 16; i++) {
if (k & (1 << i)) {
expected.f32[i] = i;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_floor_ps", __LINE__);
}
void NOINLINE do_floorpd() {
V512 res;
V512 expected;
__mmask8 k;
volatile int i;
res.zmmd = _mm512_floor_pd(f64_halves.zmmd);
for (i = 0; i < 8; i++) {
expected.f64[i] = i;
}
check_equal_nd(&res, &expected, 16, "_mm512_floor_pd", __LINE__);
f64_halves.xmmd[vol0] = f64_halves.xmmd[vol0]; /* No-op. */
k = 0x7b;
res.zmmd = _mm512_setzero_pd();
res.zmmd = _mm512_mask_floor_pd(res.zmmd, k, f64_halves.zmmd);
expected.zmmd = _mm512_setzero_pd();
for (i = 0; i < 8; i++) {
if (k & (1 << i)) {
expected.f64[i] = i;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_floor_pd", __LINE__);
}
void NOINLINE do_ceilps() {
V512 res;
V512 expected;
__mmask16 k;
volatile int i;
res.zmm = _mm512_ceil_ps(f32_halves.zmm);
for (i = 0; i < 16; i++) {
expected.f32[i] = i + 1;
}
check_equal_nd(&res, &expected, 16, "_mm512_ceil_ps", __LINE__);
f32_halves.xmm[vol0] = f32_halves.xmm[vol0]; /* No-op. */
k = 0xbcdf;
res.zmm = _mm512_setzero_ps();
res.zmm = _mm512_mask_ceil_ps(res.zmm, k, f32_halves.zmm);
expected.zmm = _mm512_setzero_ps();
for (i = 0; i < 16; i++) {
if (k & (1 << i)) {
expected.f32[i] = i + 1;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_ceil_ps", __LINE__);
}
void NOINLINE do_ceilpd() {
V512 res;
V512 expected;
__mmask8 k;
volatile int i;
res.zmmd = _mm512_ceil_pd(f64_halves.zmmd);
for (i = 0; i < 8; i++) {
expected.f64[i] = i + 1;
}
check_equal_nd(&res, &expected, 16, "_mm512_ceil_pd", __LINE__);
f64_halves.xmmd[vol0] = f64_halves.xmmd[vol0]; /* No-op. */
k = 0x7b;
res.zmmd = _mm512_setzero_pd();
res.zmmd = _mm512_mask_ceil_pd(res.zmmd, k, f64_halves.zmmd);
expected.zmmd = _mm512_setzero_pd();
for (i = 0; i < 8; i++) {
if (k & (1 << i)) {
expected.f64[i] = i + 1;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_ceil_pd", __LINE__);
}
void NOINLINE do_getexpsd() {
__mmask8 k8 = 0x2;
volatile __m128d res;
volatile __m128d v1 = _mm_set_pd(8.0, 32.0);
volatile __m128d v2 = _mm_set_pd(16.0, 64.0);
__m128d res_exp_nomask = _mm_set_pd(8.0, 6.0);
__m128d res_exp_mask = _mm_set_pd(8.0, 32.0);
__m128d res_exp_maskz = _mm_set_pd(8.0, 0.0);
res = _mm_setzero_pd();
res = _mm_getexp_sd(v1, v2);
check_equal_ndf((void *)&res, (void *)&res_exp_nomask, 2, "_mm_getexp_sd",
__LINE__);
res = _mm_setzero_pd();
res = _mm_mask_getexp_sd(v1, k8, v1, v2);
check_equal_ndf((void *)&res, (void *)&res_exp_mask, 2, "_mm_mask_getexp_sd",
__LINE__);
res = _mm_setzero_pd();
res = _mm_maskz_getexp_sd(k8, v1, v2);
check_equal_ndf((void *)&res, (void *)&res_exp_maskz, 2,
"_mm_maskz_getexp_sd", __LINE__);
}
void NOINLINE do_getexpss() {
__mmask8 k8 = 0xe;
volatile __m128 res;
volatile __m128 v1 = _mm_set_ps(16.0f, 32.0f, 64.0f, 128.0f);
volatile __m128 v2 = _mm_set_ps(128.0f, 256.0f, 512.0f, 1024.0f);
volatile __m128 res_exp_nomask = _mm_set_ps(16.0f, 32.0f, 64.0f, 10.0f);
volatile __m128 res_exp_mask = _mm_set_ps(16.0f, 32.0f, 64.0f, 128.0f);
volatile __m128 res_exp_maskz = _mm_set_ps(16.0f, 32.0f, 64.0f, 0.0f);
res = _mm_setzero_ps();
res = _mm_getexp_ss(v1, v2);
check_equal_nsf((void *)&res, (void *)&res_exp_nomask, 4, "_mm_getexp_ss",
__LINE__);
res = _mm_setzero_ps();
res = _mm_mask_getexp_ss(v1, k8, v1, v2);
check_equal_nsf((void *)&res, (void *)&res_exp_mask, 2, "_mm_mask_getexp_ss",
__LINE__);
res = _mm_setzero_ps();
res = _mm_maskz_getexp_ss(k8, v1, v2);
check_equal_nsf((void *)&res, (void *)&res_exp_maskz, 4,
"_mm_maskz_getexp_ss", __LINE__);
}
void NOINLINE do_getmantpd() {
volatile V512 res;
V512 expected;
__mmask8 k = 0x75;
res.zmmd =
_mm512_getmant_pd(f64.zmmd, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_zero);
res.zmmi = _mm512_setzero_epi32();
res.zmmd = _mm512_mask_getmant_pd(res.zmmd, k, f64.zmmd, _MM_MANT_NORM_p5_2,
_MM_MANT_SIGN_zero);
}
void NOINLINE do_getmantps() {
volatile V512 res;
V512 expected;
__mmask16 k = 0x7e95;
res.zmm = _mm512_getmant_ps(f32.zmm, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_zero);
res.zmmi = _mm512_setzero_epi32();
res.zmm = _mm512_mask_getmant_ps(res.zmm, k, f32.zmm, _MM_MANT_NORM_p5_2,
_MM_MANT_SIGN_zero);
}
#define CHECK_SCALEFPD(n_elems, dest, mask, zeroing, name) \
{ \
volatile int i; \
for (i = 0; i < n_elems; i++) { \
expected.f64[i] = f64.f64[i] * (pow(2.0, floor(f64_squares.f64[i]))); \
if ((mask & (1 << i)) == 0) { \
if (zeroing) { \
expected.f64[i] = 0.0; \
} else { \
expected.f64[i] = dest.f64[i]; \
} \
} \
} \
check_equal_nd(&res, &expected, n_elems * 2, name, __LINE__); \
f64.xmmd[vol0] = f64.xmmd[vol0]; \
}
void NOINLINE do_scalefpd() {
V512 res;
V512 expected;
__mmask8 k = 0xFF;
res.zmmd = _mm512_scalef_round_pd(f64.zmmd, f64_squares.zmmd,
_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
CHECK_SCALEFPD(8, f64_halves, k, 0, "_mm512_scalef_round_pd");
res.zmmd = _mm512_scalef_pd(f64.zmmd, f64_squares.zmmd);
CHECK_SCALEFPD(8, f64_halves, k, 0, "_mm512_scalef_pd");
k = 0x75;
res.zmmd = _mm512_mask_scalef_round_pd(
f64_halves.zmmd, k, f64.zmmd, f64_squares.zmmd,
_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
CHECK_SCALEFPD(8, f64_halves, k, 0, "_mm512_mask_scalef_round_pd");
res.zmmd =
_mm512_mask_scalef_pd(f64_halves.zmmd, k, f64.zmmd, f64_squares.zmmd);
CHECK_SCALEFPD(8, f64_halves, k, 0, "_mm512_mask_scalef_pd");
k = 0x57;
res.zmmd = _mm512_maskz_scalef_round_pd(
k, f64.zmmd, f64_squares.zmmd, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
CHECK_SCALEFPD(8, f64_halves, k, 1, "_mm512_maskz_scalef_round_pd");
res.zmmd = _mm512_maskz_scalef_pd(k, f64.zmmd, f64_squares.zmmd);
CHECK_SCALEFPD(8, f64_halves, k, 1, "_mm512_maskz_scalef_pd");
}
#define CHECK_SCALEFPS(n_elems, dest, mask, zeroing, name) \
{ \
volatile int i; \
for (i = 0; i < n_elems; i++) { \
expected.f32[i] = f32.f32[i] * (powf(2.0F, floorf(f32_squares.f32[i]))); \
if ((mask & (1 << i)) == 0) { \
if (zeroing) { \
expected.f32[i] = 0.0F; \
} else { \
expected.f32[i] = dest.f32[i]; \
} \
} \
} \
check_equal_nd(&res, &expected, n_elems, name, __LINE__); \
f32.xmm[vol0] = f32.xmm[vol0]; \
}
void NOINLINE do_scalefps() {
V512 res;
V512 expected;
__mmask16 k = 0xFFFF;
res.zmm = _mm512_scalef_round_ps(f32.zmm, f32_squares.zmm,
_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
CHECK_SCALEFPS(16, f32_halves, k, 0, "_mm512_scalef_round_ps");
res.zmm = _mm512_scalef_ps(f32.zmm, f32_squares.zmm);
CHECK_SCALEFPS(16, f32_halves, k, 0, "_mm512_scalef_ps");
k = 0x0bcd;
res.zmm =
_mm512_mask_scalef_round_ps(f32_halves.zmm, k, f32.zmm, f32_squares.zmm,
_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
CHECK_SCALEFPS(16, f32_halves, k, 0, "_mm512_mask_scalef_round_ps");
res.zmm = _mm512_mask_scalef_ps(f32_halves.zmm, k, f32.zmm, f32_squares.zmm);
CHECK_SCALEFPS(16, f32_halves, k, 0, "_mm512_mask_scalef_ps");
k = 0x0dcb;
res.zmm = _mm512_maskz_scalef_round_ps(
k, f32.zmm, f32_squares.zmm, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
CHECK_SCALEFPS(16, f32_halves, k, 1, "_mm512_maskz_scalef_round_ps");
res.zmm = _mm512_maskz_scalef_ps(k, f32.zmm, f32_squares.zmm);
CHECK_SCALEFPS(16, f32_halves, k, 1, "_mm512_maskz_scalef_ps");
}
#define SOME_ROUND (_MM_FROUND_CUR_DIRECTION)
void NOINLINE do_fixupimmpd() {
volatile V512 res;
V512 expected;
__mmask8 k = 0x75;
res.zmmd = _mm512_fixupimm_pd(f64.zmmd, f64_squares.zmmd, i32.zmmi, 0x97);
res.zmmi = _mm512_setzero_epi32();
res.zmmd = _mm512_mask_fixupimm_pd(res.zmmd, k, f64.zmmd, i32.zmmi, 0xfe);
res.zmmi = _mm512_setzero_epi32();
res.zmmd = _mm512_maskz_fixupimm_pd(k, res.zmmd, f64.zmmd, i32.zmmi, 0xfe);
res.zmmd = _mm512_fixupimm_round_pd(f64.zmmd, f64_squares.zmmd, i32.zmmi,
0x97, SOME_ROUND);
res.zmmi = _mm512_setzero_epi32();
res.zmmd = _mm512_mask_fixupimm_round_pd(res.zmmd, k, f64.zmmd, i32.zmmi,
0xfe, SOME_ROUND);
res.zmmi = _mm512_setzero_epi32();
res.zmmd = _mm512_maskz_fixupimm_round_pd(k, res.zmmd, f64.zmmd, i32.zmmi,
0xfe, SOME_ROUND);
}
void NOINLINE do_fixupimmps() {
volatile V512 res;
V512 expected;
__mmask16 k = 0x75;
res.zmm = _mm512_fixupimm_ps(f32.zmm, f32_squares.zmm, i32.zmmi, 0x97);
res.zmmi = _mm512_setzero_epi32();
res.zmm = _mm512_mask_fixupimm_ps(res.zmm, k, f32.zmm, i32.zmmi, 0xfe);
res.zmmi = _mm512_setzero_epi32();
res.zmm = _mm512_maskz_fixupimm_ps(k, res.zmm, f32.zmm, i32.zmmi, 0xfe);
res.zmm = _mm512_fixupimm_round_ps(f32.zmm, f32_squares.zmm, i32.zmmi, 0x97,
SOME_ROUND);
res.zmmi = _mm512_setzero_epi32();
res.zmm = _mm512_mask_fixupimm_round_ps(res.zmm, k, f32.zmm, i32.zmmi, 0xfe,
SOME_ROUND);
res.zmmi = _mm512_setzero_epi32();
res.zmm = _mm512_maskz_fixupimm_round_ps(k, res.zmm, f32.zmm, i32.zmmi, 0xfe,
SOME_ROUND);
}
void NOINLINE do_fixupimmsd() {
volatile V512 res;
V512 expected;
__mmask8 k = 0x75;
res.xmmd[0] =
_mm_fixupimm_sd(f64.xmmd[0], f64_squares.xmmd[0], i32.xmmi[0], 0x97);
res.xmmi[0] = _mm_setzero_si128();
res.xmmd[0] =
_mm_mask_fixupimm_sd(res.xmmd[0], k, f64.xmmd[0], i32.xmmi[0], 0xfe);
res.xmmi[0] = _mm_setzero_si128();
res.xmmd[0] =
_mm_maskz_fixupimm_sd(k, res.xmmd[0], f64.xmmd[0], i32.xmmi[0], 0xfe);
res.xmmd[0] = _mm_fixupimm_round_sd(f64.xmmd[0], f64_squares.xmmd[0],
i32.xmmi[0], 0x97, SOME_ROUND);
res.xmmi[0] = _mm_setzero_si128();
res.xmmd[0] = _mm_mask_fixupimm_round_sd(res.xmmd[0], k, f64.xmmd[0],
i32.xmmi[0], 0xfe, SOME_ROUND);
res.xmmi[0] = _mm_setzero_si128();
res.xmmd[0] = _mm_maskz_fixupimm_round_sd(k, res.xmmd[0], f64.xmmd[0],
i32.xmmi[0], 0xfe, SOME_ROUND);
}
void NOINLINE do_fixupimmss() {
volatile V512 res;
V512 expected;
__mmask8 k = 0x75;
res.xmm[0] =
_mm_fixupimm_ss(f32.xmm[0], f32_squares.xmm[0], i32.xmmi[0], 0x97);
res.xmmi[0] = _mm_setzero_si128();
res.xmm[0] =
_mm_mask_fixupimm_ss(res.xmm[0], k, f32.xmm[0], i32.xmmi[0], 0xfe);
res.xmmi[0] = _mm_setzero_si128();
res.xmm[0] =
_mm_maskz_fixupimm_ss(k, res.xmm[0], f32.xmm[0], i32.xmmi[0], 0xfe);
res.xmm[0] = _mm_fixupimm_round_ss(f64.xmm[0], f64_squares.xmm[0],
i32.xmmi[0], 0x97, SOME_ROUND);
res.xmmi[0] = _mm_setzero_si128();
res.xmm[0] = _mm_mask_fixupimm_round_ss(res.xmm[0], k, f64.xmm[0],
i32.xmmi[0], 0xfe, SOME_ROUND);
res.xmmi[0] = _mm_setzero_si128();
res.xmm[0] = _mm_maskz_fixupimm_round_ss(k, res.xmm[0], f64.xmm[0],
i32.xmmi[0], 0xfe, SOME_ROUND);
}
void NOINLINE do_roundscalepd() {
volatile V512 res;
V512 expected;
__mmask16 k = 0x3d;
res.zmmd = _mm512_roundscale_pd(f64.zmmd, 0xff);
res.zmmi = _mm512_setzero_epi32();
res.zmmd = _mm512_mask_roundscale_pd(res.zmmd, k, f64.zmmd, 0x36);
}
void NOINLINE do_roundscaleps() {
volatile V512 res;
V512 expected;
__mmask16 k = 0x74cb;
res.zmm = _mm512_roundscale_ps(f32.zmm, 0xf7);
res.zmmi = _mm512_setzero_epi32();
res.zmm = _mm512_mask_roundscale_ps(res.zmm, k, f32.zmm, 0x36);
}
static int NOINLINE emulate_ternarylogicd(int a, int b, int c, int imm) {
int i, index, res = 0;
for (i = 0; i < 32; i++) {
index = ((a & 1) << 2) | ((b & 1) << 1) | (c & 1);
res |= ((imm & (1 << index)) ? 1 : 0) << i;
a >>= 1;
b >>= 1;
c >>= 1;
}
return res;
}
void NOINLINE do_pternlogq() {
volatile int i;
V512 res, resx, resy;
V512 expected;
__mmask8 k8 = 0x75;
res.zmmi =
_mm512_ternarylogic_epi64(i64.zmmi, i64_squares.zmmi, i64.zmmi, 0x79);
for (i = 0; i < 16; i++) {
expected.s32[i] =
emulate_ternarylogicd(i64.s32[i], i64_squares.s32[i], i64.s32[i], 0x79);
}
check_equal_nd(&res, &expected, 16, "_mm512_ternarylogic_epi64", __LINE__);
i64.xmm[vol0] = i64.xmm[vol0]; /* No-op. */
res.zmmi = _mm512_setzero_epi32();
res.zmmi = _mm512_mask_ternarylogic_epi64(res.zmmi, k8, i64_neg.zmmi,
i64.zmmi, 0xca);
for (i = 0; i < 16; i += 2) {
if (k8 & (1 << (i / 2))) {
expected.s32[i] =
emulate_ternarylogicd(0, i64_neg.s32[i], i64.s32[i], 0xca);
expected.s32[i + 1] =
emulate_ternarylogicd(0, i64_neg.s32[i + 1], i64.s32[i + 1], 0xca);
} else {
expected.s32[i] = 0;
expected.s32[i + 1] = 0;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_ternarylogic_epi64",
__LINE__);
i64.xmm[vol0] = i64.xmm[vol0]; /* No-op. */
res.zmmi = _mm512_maskz_ternarylogic_epi64(k8, i64_squares.zmmi,
i64_squares.zmmi, i64.zmmi, 0x3b);
for (i = 0; i < 16; i += 2) {
if (k8 & (1 << (i / 2))) {
expected.s32[i] = emulate_ternarylogicd(
i64_squares.s32[i], i64_squares.s32[i], i64.s32[i], 0x3b);
expected.s32[i + 1] = emulate_ternarylogicd(
i64_squares.s32[i + 1], i64_squares.s32[i + 1], i64.s32[i], 0x3b);
} else {
expected.s32[i] = 0;
expected.s32[i + 1] = 0;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_maskz_ternarylogic_epi64",
__LINE__);
}
void NOINLINE do_pternlogd() {
volatile int i;
V512 res, resx, resy;
V512 expected;
__mmask16 k = 0x23bc;
__mmask8 k8 = (__mmask8)k;
res.zmmi =
_mm512_ternarylogic_epi32(i32.zmmi, i32_squares.zmmi, i32.zmmi, 0x97);
for (i = 0; i < 16; i++) {
expected.s32[i] =
emulate_ternarylogicd(i32.s32[i], i32_squares.s32[i], i32.s32[i], 0x97);
}
check_equal_nd(&res, &expected, 16, "_mm512_ternarylogic_epi32", __LINE__);
i32.xmm[vol0] = i32.xmm[vol0]; /* No-op. */
res.zmmi = _mm512_setzero_epi32();
res.zmmi = _mm512_mask_ternarylogic_epi32(res.zmmi, k, i32_squares.zmmi,
i32.zmmi, 0xfe);
for (i = 0; i < 16; i++) {
if (k & (1 << i)) {
expected.s32[i] =
emulate_ternarylogicd(0, i32_squares.s32[i], i32.s32[i], 0xfe);
} else {
expected.s32[i] = 0;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_ternarylogic_epi32",
__LINE__);
i32.xmm[vol0] = i32.xmm[vol0]; /* No-op. */
k = 0xabcd;
k8 = (__mmask8)k;
res.zmmi = _mm512_maskz_ternarylogic_epi32(k, i32_squares.zmmi, i32_neg.zmmi,
i32.zmmi, 0x3b);
for (i = 0; i < 16; i++) {
if (k & (1 << i)) {
expected.s32[i] = emulate_ternarylogicd(i32_squares.s32[i],
i32_neg.s32[i], i32.s32[i], 0x3b);
} else {
expected.s32[i] = 0;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_maskz_ternarylogic_epi32",
__LINE__);
}
int main(int argc, char *argv[]) {
init();
do_rcp14pd();
do_rcp14ps();
do_sqrtps();
do_sqrtpd();
do_floorps();
do_floorpd();
do_ceilps();
do_ceilpd();
do_getexpsd();
do_getexpss();
do_getmantpd();
do_getmantps();
do_scalefpd();
do_scalefps();
do_fixupimmpd();
do_fixupimmps();
do_fixupimmsd();
do_fixupimmss();
do_roundscalepd();
do_roundscaleps();
do_pternlogq();
do_pternlogd();
if (n_errs != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}