blob: 1e017a01a7efbabec3f081fa9945f1486ddb8af8 [file] [log] [blame]
#include "m512_test_util.h"
#include <memory.h>
#include <stdio.h>
#include <stdlib.h>
/*
* Here we check for _mm_[mask|maskz]_[add|div|max|min|mul|sub]_[round]_sd
* intrinsics.
*/
int show_op =
#ifdef SHOW_OP
1
#else
0
#endif
;
typedef enum { ASSIGN, ADD, DIV, MAX, MIN, MUL, SUB } OPER;
static void NOINLINE intop(OPER op, double ivalout[2], double ivalop1[2],
double ivalop2[2]) {
int i;
int handled = 0;
memset(ivalout, 0, sizeof(ivalout));
for (i = 0; i < 2; i += 1) {
switch (op) {
case ASSIGN:
handled = 1;
ivalout[i] = ivalop1[i];
break;
case ADD:
handled = 1;
ivalout[i] = ivalop1[i] + ivalop2[i];
break;
case DIV:
handled = 1;
ivalout[i] = ivalop1[i] / ivalop2[i];
break;
case MAX:
handled = 1;
ivalout[i] = (ivalop1[i] > ivalop2[i]) ? ivalop1[i] : ivalop2[i];
break;
case MIN:
handled = 1;
ivalout[i] = (ivalop1[i] < ivalop2[i]) ? ivalop1[i] : ivalop2[i];
break;
case MUL:
handled = 1;
ivalout[i] = ivalop2[i] * ivalop1[i];
break;
case SUB:
handled = 1;
ivalout[i] = ivalop1[i] - ivalop2[i];
break;
default:
printf("FAIL: bad op\n");
break;
}
}
if (!handled) {
printf("FAIL: unsupported op\n");
n_errs++;
}
}
static int NOINLINE check(double val1[], double good[]) {
int i;
int res = 1;
for (i = 0; i < 2; i += 1) {
if (val1[i] != good[i]) {
res = 0;
printf("FAIL: %f != %f\n", val1[i], good[i]);
}
}
return (res);
}
static int NOINLINE check_mask(double dest[], double val1[], double good[],
int mask, int zeroing) {
int i, j;
int res = 1;
// elements number to check dest vector
j = 1;
if (mask == 1) {
if (val1[0] != good[0]) {
res = 0;
printf("FAIL: %f != %f\n", val1[0], dest[0]);
}
} else if (mask == 0) {
if (zeroing == 1) {
if (val1[0] != 0) {
res = 0;
printf("FAIL: %f != %f\n", val1[0], dest[0]);
}
} else {
j = 0;
}
}
// check other elements of dest vector
for (i = j; i < 2; i += 1) {
if (val1[i] != dest[i]) {
res = 0;
printf("FAIL: %f != %f\n", val1[i], dest[i]);
}
}
return (res);
}
static void NOINLINE print_vec(char *pfx, double ivec[]) {
if (pfx) {
printf("%s: ", pfx);
}
printf("%10.4f %10.4f\n", ivec[1], ivec[0]);
}
#define DOONE(OP, FUNC) \
{ \
int passed = 0; \
intop(OP, good.f64, v1.f64, v2.f64); \
vvv.xmmd[0] = FUNC(v1.xmmd[0], v2.xmmd[0]); \
passed = check_mask(vvv.f64, vvv.f64, good.f64, 0x1, 0); \
passed = check(vvv.f64, good.f64); \
if (!passed) { \
printf("FAIL " #FUNC "\n"); \
n_errs++; \
} \
if (!passed || show_op) { \
print_vec("Opand1", v1.f64); \
print_vec("Opand2", v2.f64); \
print_vec("Scalar", good.f64); \
print_vec("Vector", vvv.f64); \
} \
}
#define DOONE_WITH_MASK(OP, FUNC, MMASK) \
{ \
int passed = 0; \
intop(OP, good.f64, v1.f64, v2.f64); \
vvv.xmmd[0] = FUNC(vvv.xmmd[0], MMASK, v1.xmmd[0], v2.xmmd[0]); \
passed = check_mask(vvv.f64, vvv.f64, good.f64, MMASK, 0); \
if (!passed) { \
printf("FAIL " #FUNC "\n"); \
n_errs++; \
} \
if (!passed || show_op) { \
print_vec("Opand1", v1.f64); \
print_vec("Opand2", v2.f64); \
print_vec("Scalar", good.f64); \
print_vec("Vector", vvv.f64); \
} \
}
#define DOONE_WITH_ZMASK(OP, FUNC, MMASK) \
{ \
int passed = 0; \
intop(OP, good.f64, v1.f64, v2.f64); \
vvv.xmmd[0] = FUNC(MMASK, v1.xmmd[0], v2.xmmd[0]); \
passed = check_mask(vvv.f64, vvv.f64, good.f64, MMASK, 1); \
if (!passed) { \
printf("FAIL " #FUNC "\n"); \
n_errs++; \
} \
if (!passed || show_op) { \
print_vec("Opand1", v1.f64); \
print_vec("Opand2", v2.f64); \
print_vec("Scalar", good.f64); \
print_vec("Vector", vvv.f64); \
} \
}
#define DOONE_ROUND(OP, FUNC, ROUND) \
{ \
int passed = 0; \
intop(OP, good.f64, v1.f64, v2.f64); \
vvv.xmmd[0] = FUNC(v1.xmmd[0], v2.xmmd[0], ROUND); \
passed = check_mask(vvv.f64, vvv.f64, good.f64, 0x1, 0); \
if (!passed) { \
printf("FAIL " #FUNC "\n"); \
n_errs++; \
} \
if (!passed || show_op) { \
print_vec("Opand1", v1.f64); \
print_vec("Opand2", v2.f64); \
print_vec("Scalar", good.f64); \
print_vec("Vector", vvv.f64); \
} \
}
#define DOONE_WITH_MASK_ROUND(OP, FUNC, MMASK, ROUND) \
{ \
int passed = 0; \
intop(OP, good.f64, v1.f64, v2.f64); \
vvv.xmmd[0] = FUNC(vvv.xmmd[0], MMASK, v1.xmmd[0], v2.xmmd[0], ROUND); \
passed = check_mask(vvv.f64, vvv.f64, good.f64, MMASK, 0); \
if (!passed) { \
printf("FAIL " #FUNC "\n"); \
n_errs++; \
} \
if (!passed || show_op) { \
print_vec("Opand1", v1.f64); \
print_vec("Opand2", v2.f64); \
print_vec("Scalar", good.f64); \
print_vec("Vector", vvv.f64); \
} \
}
#define DOONE_WITH_ZMASK_ROUND(OP, FUNC, MMASK, ROUND) \
{ \
int passed = 0; \
intop(OP, good.f64, v1.f64, v2.f64); \
vvv.xmmd[0] = FUNC(MMASK, v1.xmmd[0], v2.xmmd[0], ROUND); \
passed = check_mask(vvv.f64, vvv.f64, good.f64, MMASK, 1); \
if (!passed) { \
printf("FAIL " #FUNC "\n"); \
n_errs++; \
} \
if (!passed || show_op) { \
print_vec("Opand1", v1.f64); \
print_vec("Opand2", v2.f64); \
print_vec("Scalar", good.f64); \
print_vec("Vector", vvv.f64); \
} \
}
int main() {
double init1[] = {1, -2, 3, -4, 5, 6, 7, 8, 9, 10, -11, 12, 13, 14, 15, 16};
double init2[] = {11, 12, 23, -24, 35, 36, 17, 38,
42, -1, 33, 7, 8, 10, 11, 12};
V512 v1;
V512 v2;
V512 good;
V512 vvv;
intop(ASSIGN, v1.f64, init1, 0);
intop(ASSIGN, v2.f64, init2, 0);
vvv.xmmd[0] = _mm_setzero_pd();
// simple mask intrinsics
DOONE_WITH_MASK(ADD, _mm_mask_add_sd, 0x1);
DOONE_WITH_MASK(DIV, _mm_mask_div_sd, 0x1);
DOONE_WITH_MASK(MAX, _mm_mask_max_sd, 0x1);
DOONE_WITH_MASK(MIN, _mm_mask_min_sd, 0x1);
DOONE_WITH_MASK(MUL, _mm_mask_mul_sd, 0x1);
DOONE_WITH_MASK(SUB, _mm_mask_sub_sd, 0x1);
// intrinsics with rounding mode
DOONE_ROUND(ADD, _mm_add_round_sd,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_ROUND(DIV, _mm_div_round_sd,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_ROUND(MAX, _mm_max_round_sd,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_ROUND(MIN, _mm_min_round_sd,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_ROUND(MUL, _mm_mul_round_sd,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_ROUND(SUB, _mm_sub_round_sd,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
// intrinsics with mask and rounding mode
DOONE_WITH_MASK_ROUND(ADD, _mm_mask_add_round_sd, 0x1,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_WITH_MASK_ROUND(DIV, _mm_mask_div_round_sd, 0x0,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_WITH_MASK_ROUND(MAX, _mm_mask_max_round_sd, 0x1,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_WITH_MASK_ROUND(MIN, _mm_mask_min_round_sd, 0x1,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_WITH_MASK_ROUND(MUL, _mm_mask_mul_round_sd, 0x0,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_WITH_MASK_ROUND(SUB, _mm_mask_sub_round_sd, 0x1,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
// intrinsics with zero masking
DOONE_WITH_ZMASK(ADD, _mm_maskz_add_sd, 0x0);
DOONE_WITH_ZMASK(DIV, _mm_maskz_div_sd, 0x1);
DOONE_WITH_ZMASK(MAX, _mm_maskz_max_sd, 0x1);
DOONE_WITH_ZMASK(MIN, _mm_maskz_min_sd, 0x1);
DOONE_WITH_ZMASK(MUL, _mm_maskz_mul_sd, 0x1);
DOONE_WITH_ZMASK(SUB, _mm_maskz_sub_sd, 0x0);
// intrinsics with zero masking and rounding mode
DOONE_WITH_ZMASK_ROUND(ADD, _mm_maskz_add_round_sd, 0x0,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_WITH_ZMASK_ROUND(DIV, _mm_maskz_div_round_sd, 0x1,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_WITH_ZMASK_ROUND(MAX, _mm_maskz_max_round_sd, 0x0,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_WITH_ZMASK_ROUND(MIN, _mm_maskz_min_round_sd, 0x1,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_WITH_ZMASK_ROUND(MUL, _mm_maskz_mul_round_sd, 0x1,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
DOONE_WITH_ZMASK_ROUND(SUB, _mm_maskz_sub_round_sd, 0x1,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
if (n_errs != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}