blob: 037b1662d973124bcc8bc24d16a680bfba14807a [file] [log] [blame]
/*
* This test was created to check the correctness
* of the following intrinsics support:
* _mm_getexp_round_*()
* _mm_mask_getexp_round_*()
* _mm_maskz_getexp_round_*()
* _mm512_getexp_*()
* _mm512_getexp_round_*()
* _mm512_mask_getexp_*()
* _mm512_mask_getexp_round_*()
* _mm512_maskz_getexp_*()
* _mm512_maskz_getexp_round_*()
*/
#include "m512_test_util.h"
#include <math.h>
#include <memory.h>
#include <stdio.h>
#include <stdlib.h>
int show_op = 0;
static int NOINLINE check_ps(float val1[], float good[], int mask, int num_elem,
int zmask) {
int i;
int res = 1;
for (i = 0; i < num_elem; i += 1) {
if ((1 << i) & mask) {
if (val1[i] != good[i]) {
res = 0;
printf("FAIL(%d): %f != %f\n", i, val1[i], good[i]);
}
} else if (zmask == 1) {
if (val1[i] != 0) {
res = 0;
printf("FAIL(%d): %f != 0\n", i, val1[i]);
}
}
}
return (res);
}
static int NOINLINE check_ss(float val1[], float good[], float op2[], int mask,
int num_elem, int zmask) {
int i = 0;
int res = 1;
// check first element
if (0x1 & mask) {
if (val1[i] != good[i]) {
res = 0;
printf("FAIL(%d): %f != %f\n", i, val1[i], good[i]);
}
} else if (zmask == 1) {
if (val1[i] != 0) {
res = 0;
printf("FAIL(%d): %f != 0\n", i, val1[i]);
}
}
// check other elements
for (i = 1; i < num_elem; i += 1) {
if (val1[i] != op2[i]) {
res = 0;
printf("FAIL(%d): %f != %f\n", i, val1[i], op2[i]);
}
}
return (res);
}
static void NOINLINE print_f32_vec(char *pfx, float ivec[], int short_form) {
if (pfx) {
printf("%s: ", pfx);
}
if (!short_form) {
printf("%10.4f %10.4f %10.4f %10.4f ", ivec[15], ivec[14], ivec[13],
ivec[12]);
printf("%10.4f %10.4f %10.4f %10.4f ", ivec[11], ivec[10], ivec[9],
ivec[8]);
printf("%10.4f %10.4f %10.4f %10.4f ", ivec[7], ivec[6], ivec[5], ivec[4]);
}
printf("%10.4f %10.4f %10.4f %10.4f\n", ivec[3], ivec[2], ivec[1], ivec[0]);
}
static void NOINLINE init_exp_f32(float ivalout[16], float ivalexp[16],
float source[16]) {
int i;
float expected[] = {10.0, 7.0, 24.0, 5.0, 9.0, 2.0, 22.0, 6.0,
0.0, 21.0, 1.0, 18.0, 2.0, 22.0, 2.0, 16.0};
#pragma clang loop vectorize(disable)
for (i = 0; i < 16; i += 1) {
ivalout[i] = source[i];
ivalexp[i] = floorf(log2(fabs((source[i]))));
ivalexp[i] = expected[i];
}
}
static int NOINLINE check_pd(double val1[], double good[], int mask,
int num_elem, int zmask) {
int i;
int res = 1;
for (i = 0; i < num_elem; i += 1) {
if ((1 << i) & mask) {
if (val1[i] != good[i]) {
res = 0;
printf("FAIL(%d): %f != %f\n", i, val1[i], good[i]);
}
} else if (zmask == 1) {
if (val1[i] != 0) {
res = 0;
printf("FAIL(%d): %f != 0\n", i, val1[i]);
}
}
}
return (res);
}
static int NOINLINE check_sd(double val1[], double good[], double op2[],
int mask, int num_elem, int zmask) {
int i = 0;
int res = 1;
// check first element
if (0x1 & mask) {
if (val1[i] != good[i]) {
res = 0;
printf("FAIL(%d): %f != %f\n", i, val1[i], good[i]);
}
} else if (zmask == 1) {
if (val1[i] != 0) {
res = 0;
printf("FAIL(%d): %f != 0\n", i, val1[i]);
}
}
// check other elements
for (i = 1; i < num_elem; i += 1) {
if (val1[i] != op2[i]) {
res = 0;
printf("FAIL(%d): %f != %f\n", i, val1[i], op2[i]);
}
}
return (res);
}
static void NOINLINE print_f64_vec(char *pfx, double ivec[], int short_form) {
if (pfx) {
printf("%s: ", pfx);
}
if (!short_form) {
printf("%10.4f %10.4f %10.4f %10.4f ", ivec[7], ivec[6], ivec[5], ivec[4]);
}
printf("%10.4f %10.4f %10.4f %10.4f\n", ivec[3], ivec[2], ivec[1], ivec[0]);
}
static void NOINLINE init_exp_f64(double ivalout[8], double ivalexp[8],
double source[8]) {
int i;
double expected[] = {10.0, 7.0, 24.0, 5.0, 9.0, 2.0, 22.0, 19.0};
#pragma clang loop vectorize(disable)
for (i = 0; i < 8; i += 1) {
ivalout[i] = source[i];
// MS does not support log2() therefore we will use pre-calculated values.
// ivalexp[i] = floor(log2(fabs((source[i]))));
ivalexp[i] = expected[i];
}
}
#define CHECK_RESULT_PS(FUNC, RES, GOOD, OP, MMASK, NUMB, ZEROM) \
{ \
int passed = 0; \
passed = check_ps(RES.f32, GOOD.f32, MMASK, NUMB, ZEROM); \
if (!passed) { \
printf("FAIL " #FUNC "\n"); \
n_errs++; \
} \
if (!passed || show_op) { \
print_f32_vec("Opand1", OP.f32, 0); \
print_f32_vec("Scalar", GOOD.f32, 0); \
print_f32_vec("Vector", RES.f32, 0); \
} \
}
#define CHECK_RESULT_SS(FUNC, RES, GOOD, OP1, OP2, MMASK, NUMB, ZEROM) \
{ \
int passed = 0; \
passed = check_ss(RES.f32, GOOD.f32, OP2.f32, MMASK, NUMB, ZEROM); \
if (!passed) { \
printf("FAIL " #FUNC "\n"); \
n_errs++; \
} \
if (!passed || show_op) { \
print_f32_vec("Opand1", OP1.f32, 1); \
print_f32_vec("Opand2", OP2.f32, 1); \
print_f32_vec("Scalar", GOOD.f32, 1); \
print_f32_vec("Vector", RES.f32, 1); \
} \
}
#define CHECK_RESULT_PD(FUNC, RES, GOOD, OP, MMASK, NUMB, ZEROM) \
{ \
int passed = 0; \
passed = check_pd(RES.f64, GOOD.f64, MMASK, NUMB, ZEROM); \
if (!passed) { \
printf("FAIL " #FUNC "\n"); \
n_errs++; \
} \
if (!passed || show_op) { \
print_f64_vec("Opand1", OP.f64, 0); \
print_f64_vec("Scalar", GOOD.f64, 0); \
print_f64_vec("Vector", RES.f64, 0); \
} \
}
#define CHECK_RESULT_SD(FUNC, RES, GOOD, OP1, OP2, MMASK, NUMB, ZEROM) \
{ \
int passed = 0; \
passed = check_sd(RES.f64, GOOD.f64, OP2.f64, MMASK, NUMB, ZEROM); \
if (!passed) { \
printf("FAIL " #FUNC "\n"); \
n_errs++; \
} \
if (!passed || show_op) { \
print_f64_vec("Opand1", OP1.f64, 1); \
print_f64_vec("Opand2", OP2.f64, 1); \
print_f64_vec("Scalar", GOOD.f64, 1); \
print_f64_vec("Vector", RES.f64, 1); \
} \
}
static void NOINLINE getexp_float() {
V512 v0, v1, v2, v3, v4;
float init[] = {1111.11, -222.22, 33333333.33, -44.44,
555.55, -6.66, 7777777.77, -86.88,
-1.11, 2222222.22, -3.33, 444444.44,
-5.55, 6666666.66, -7.77, 88888.88};
init_exp_f32(v1.f32, v2.f32, init);
v4.zmm = _mm512_set1_ps(1.0);
v0.zmm = _mm512_setzero_ps();
v3.zmm = _mm512_getexp_ps(v1.zmm);
CHECK_RESULT_PS(_mm512_getexp_ps, v3, v2, v1, 0xffff, 16, 0);
v3.zmm = _mm512_mask_getexp_ps(v0.zmm, 0x1ff8, v1.zmm);
CHECK_RESULT_PS(_mm512_mask_getexp_ps, v3, v2, v1, 0x1ff8, 16, 0);
v3.zmm = _mm512_maskz_getexp_ps(0xf18f, v1.zmm);
CHECK_RESULT_PS(_mm512_maskz_getexp_ps, v3, v2, v1, 0xf18f, 16, 1);
v3.zmm = _mm512_getexp_round_ps(v1.zmm, _MM_FROUND_TO_NEAREST_INT |
_MM_FROUND_NO_EXC);
CHECK_RESULT_PS(_mm512_getexp_round_ps, v3, v2, v1, 0xffff, 16, 0);
v3.zmm = _mm512_mask_getexp_round_ps(
v0.zmm, 0x1ff8, v1.zmm, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
CHECK_RESULT_PS(_mm512_mask_getexp_round_ps, v3, v2, v1, 0x1ff8, 16, 0);
v3.zmm = _mm512_maskz_getexp_round_ps(
0xf18f, v1.zmm, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
CHECK_RESULT_PS(_mm512_maskz_getexp_round_ps, v3, v2, v1, 0xf18f, 16, 1);
v3.xmm[0] = _mm_getexp_round_ss(
v4.xmm[0], v1.xmm[0], _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
CHECK_RESULT_SS(_mm_getexp_round_ss, v3, v2, v1, v4, 0x1, 4, 0);
v3.xmm[0] =
_mm_mask_getexp_round_ss(v0.xmm[0], 0x1, v4.xmm[0], v1.xmm[0],
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
CHECK_RESULT_SS(_mm_mask_getexp_round_ss, v3, v2, v1, v4, 0x1, 4, 0);
v3.xmm[0] = _mm_maskz_getexp_round_ss(
0x0, v4.xmm[0], v1.xmm[0], _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
CHECK_RESULT_SS(_mm_maskz_getexp_round_ss, v3, v2, v1, v4, 0x0, 4, 1);
}
static void NOINLINE getexp_double() {
V512 v0, v1, v2, v3, v4;
double init[] = {1111.11, -222.22, 33333333.33, -44.44,
555.55, -6.66, 7777777.77, -888888.88};
init_exp_f64(v1.f64, v2.f64, init);
v4.zmmd = _mm512_set1_pd(1.0);
v0.zmmd = _mm512_setzero_pd();
v3.zmmd = _mm512_getexp_pd(v1.zmmd);
CHECK_RESULT_PD(_mm512_getexp_pd, v3, v2, v1, 0xffff, 8, 0);
v3.zmmd = _mm512_mask_getexp_pd(v0.zmmd, 0xf8, v1.zmmd);
CHECK_RESULT_PD(_mm512_mask_getexp_pd, v3, v2, v1, 0xf8, 8, 0);
v3.zmmd = _mm512_maskz_getexp_pd(0x8f, v1.zmmd);
CHECK_RESULT_PD(_mm512_maskz_getexp_pd, v3, v2, v1, 0x8f, 8, 1);
v3.zmmd = _mm512_getexp_round_pd(v1.zmmd, _MM_FROUND_TO_NEAREST_INT |
_MM_FROUND_NO_EXC);
CHECK_RESULT_PD(_mm512_getexp_round_pd, v3, v2, v1, 0xffff, 8, 0);
v3.zmmd = _mm512_mask_getexp_round_pd(
v0.zmmd, 0x1f, v1.zmmd, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
CHECK_RESULT_PD(_mm512_mask_getexp_round_pd, v3, v2, v1, 0x1f, 8, 0);
v3.zmmd = _mm512_maskz_getexp_round_pd(
0xf1, v1.zmmd, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
CHECK_RESULT_PD(_mm512_maskz_getexp_round_pd, v3, v2, v1, 0xf1, 8, 1);
v3.xmmd[0] = _mm_getexp_round_sd(
v4.xmmd[0], v1.xmmd[0], _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
CHECK_RESULT_SD(_mm_getexp_round_sd, v3, v2, v1, v4, 0x1, 2, 0);
v3.xmmd[0] =
_mm_mask_getexp_round_sd(v0.xmmd[0], 0x1, v4.xmmd[0], v1.xmmd[0],
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
CHECK_RESULT_SD(_mm_mask_getexp_round_sd, v3, v2, v1, v4, 0x1, 2, 0);
v3.xmmd[0] =
_mm_maskz_getexp_round_sd(0x0, v4.xmmd[0], v1.xmmd[0],
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
CHECK_RESULT_SD(_mm_maskz_getexp_round_sd, v3, v2, v1, v4, 0x0, 2, 1);
}
int main() {
getexp_float();
getexp_double();
if (n_errs != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}