blob: 27f6a12884f489fedc583fd98cb92bdf73376eea [file] [log] [blame]
/*
* Tests for expand intrinsics family.
* This test was created to check the correctness
* of the following intrinsics support:
* _mm512_mask_compress*()
* _mm512_mask_compressstoreu*()
* _mm512_mask_expand*()
* _mm512_mask_expandloadu*()
* _mm512_maskz_compress*()
* _mm512_maskz_expand*()
* _mm512_maskz_expandloadu*()
*/
#include "m512_test_util.h"
#include <stdio.h>
#include <string.h>
volatile __int64 vol0;
V512 isrc1;
V512 isrc2;
V512 fsrc1;
V512 fsrc2;
V512 dsrc1;
V512 dsrc2;
V512 res;
V512 mres;
__mmask8 k8;
__mmask16 k16;
void NOINLINE init() {
volatile int i;
for (i = 0; i < 16; i++) {
isrc1.s32[i] = i;
isrc2.s32[i] = i + 1;
fsrc1.f32[i] = i * 1.0f;
fsrc2.f32[i] = i * 2.0f;
}
for (i = 0; i < 8; i++) {
dsrc1.f64[i] = i * 4.0;
dsrc2.f64[i] = i * 5.0;
}
k8 = 0x5a;
k16 = 0x25d6;
}
/*
* Use this between tests to make compiler think src was updated.
* Prevents PRE'ing of a load of src.
*/
#define soft_isrc1_update() isrc1.xmmi[vol0] = isrc1.xmmi[vol0]
#define soft_fsrc1_update() fsrc1.xmmi[vol0] = fsrc1.xmmi[vol0]
#define soft_dsrc1_update() dsrc1.xmmi[vol0] = dsrc1.xmmi[vol0]
/*
* Model expand intrinsic behavior.
*/
void NOINLINE model_mask_expand_i32(void *input1, __int64 mask, void *input2,
void *output, int n_elems) {
int i, j = 0;
int *v1i = (int *)input1;
int *v2i = (int *)input2;
int *v3o = (int *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[i] = v2i[j];
j++;
} else {
v3o[i] = v1i[i];
}
}
}
void NOINLINE model_maskz_expand_i32(__int64 mask, void *input2, void *output,
int n_elems) {
int i, j = 0;
int *v2i = (int *)input2;
int *v3o = (int *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[i] = v2i[j];
j++;
} else {
v3o[i] = 0;
}
}
}
void NOINLINE model_mask_expand_i64(void *input1, __int64 mask, void *input2,
void *output, int n_elems) {
int i, j = 0;
__int64 *v1i = (__int64 *)input1;
__int64 *v2i = (__int64 *)input2;
__int64 *v3o = (__int64 *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[i] = v2i[j];
j++;
} else {
v3o[i] = v1i[i];
}
}
}
void NOINLINE model_maskz_expand_i64(__int64 mask, void *input2, void *output,
int n_elems) {
int i, j = 0;
__int64 *v2i = (__int64 *)input2;
__int64 *v3o = (__int64 *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[i] = v2i[j];
j++;
} else {
v3o[i] = 0;
}
}
}
void NOINLINE model_mask_expand_f32(void *input1, __int64 mask, void *input2,
void *output, int n_elems) {
int i, j = 0;
float *v1i = (float *)input1;
float *v2i = (float *)input2;
float *v3o = (float *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[i] = v2i[j];
j++;
} else {
v3o[i] = v1i[i];
}
}
}
void NOINLINE model_maskz_expand_f32(__int64 mask, void *input2, void *output,
int n_elems) {
int i, j = 0;
float *v2i = (float *)input2;
float *v3o = (float *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[i] = v2i[j];
j++;
} else {
v3o[i] = 0.f;
}
}
}
void NOINLINE model_mask_expand_f64(void *input1, __int64 mask, void *input2,
void *output, int n_elems) {
int i, j = 0;
double *v1i = (double *)input1;
double *v2i = (double *)input2;
double *v3o = (double *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[i] = v2i[j];
j++;
} else {
v3o[i] = v1i[i];
}
}
}
void NOINLINE model_maskz_expand_f64(__int64 mask, void *input2, void *output,
int n_elems) {
int i, j = 0;
double *v2i = (double *)input2;
double *v3o = (double *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[i] = v2i[j];
j++;
} else {
v3o[i] = 0.;
}
}
}
#define GEN_MASK_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, modeller, \
checker) \
res.suffix = intrin(prefix##src2.suffix, mask, prefix##src1.suffix); \
modeller((void *)&prefix##src2.suffix, mask, (void *)&prefix##src1.suffix, \
(void *)&mres.suffix, n_elem); \
checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__)
#define GEN_MASK_LOAD_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, \
modeller, checker) \
res.suffix = intrin(prefix##src2.suffix, mask, &prefix##src1.suffix); \
modeller((void *)&prefix##src2.suffix, mask, (void *)&prefix##src1.suffix, \
(void *)&mres.suffix, n_elem); \
checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__)
#define GEN_MASKZ_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, modeller, \
checker) \
res.suffix = intrin(mask, prefix##src1.suffix); \
modeller(mask, (void *)&prefix##src1.suffix, (void *)&mres.suffix, n_elem); \
checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__)
#define GEN_MASKZ_LOAD_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, \
modeller, checker) \
res.suffix = intrin(mask, &prefix##src1.suffix); \
modeller(mask, (void *)&prefix##src1.suffix, (void *)&mres.suffix, n_elem); \
checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__)
void NOINLINE do_m512_expand() {
volatile V512 res;
soft_isrc1_update();
GEN_MASK_CHECK_CASE(_mm512_mask_expand_epi32, i, zmmi, k16, 16,
model_mask_expand_i32, check_equal_nd);
soft_isrc1_update();
GEN_MASK_LOAD_CHECK_CASE(_mm512_mask_expandloadu_epi32, i, zmmi, k16, 16,
model_mask_expand_i32, check_equal_nd);
soft_isrc1_update();
GEN_MASKZ_CHECK_CASE(_mm512_maskz_expand_epi32, i, zmmi, k16, 16,
model_maskz_expand_i32, check_equal_nd);
soft_isrc1_update();
GEN_MASKZ_LOAD_CHECK_CASE(_mm512_maskz_expandloadu_epi32, i, zmmi, k16, 16,
model_maskz_expand_i32, check_equal_nd);
soft_isrc1_update();
GEN_MASK_CHECK_CASE(_mm512_mask_expand_epi64, i, zmmi, k8, 8,
model_mask_expand_i64, check_equal_nq);
soft_isrc1_update();
GEN_MASK_LOAD_CHECK_CASE(_mm512_mask_expandloadu_epi64, i, zmmi, k8, 8,
model_mask_expand_i64, check_equal_nq);
soft_isrc1_update();
GEN_MASKZ_CHECK_CASE(_mm512_maskz_expand_epi64, i, zmmi, k8, 8,
model_maskz_expand_i64, check_equal_nq);
soft_isrc1_update();
GEN_MASKZ_LOAD_CHECK_CASE(_mm512_maskz_expandloadu_epi64, i, zmmi, k8, 8,
model_maskz_expand_i64, check_equal_nq);
soft_fsrc1_update();
GEN_MASK_CHECK_CASE(_mm512_mask_expand_ps, f, zmm, k16, 16,
model_mask_expand_f32, check_equal_nsf);
soft_fsrc1_update();
GEN_MASK_LOAD_CHECK_CASE(_mm512_mask_expandloadu_ps, f, zmm, k16, 16,
model_mask_expand_f32, check_equal_nsf);
soft_fsrc1_update();
GEN_MASKZ_CHECK_CASE(_mm512_maskz_expand_ps, f, zmm, k16, 16,
model_maskz_expand_f32, check_equal_nsf);
soft_fsrc1_update();
GEN_MASKZ_LOAD_CHECK_CASE(_mm512_maskz_expandloadu_ps, f, zmm, k16, 16,
model_maskz_expand_f32, check_equal_nsf);
soft_dsrc1_update();
GEN_MASK_CHECK_CASE(_mm512_mask_expand_pd, d, zmmd, k8, 8,
model_mask_expand_f64, check_equal_ndf);
soft_dsrc1_update();
GEN_MASK_LOAD_CHECK_CASE(_mm512_mask_expandloadu_pd, d, zmmd, k8, 8,
model_mask_expand_f64, check_equal_ndf);
soft_dsrc1_update();
GEN_MASKZ_CHECK_CASE(_mm512_maskz_expand_pd, d, zmmd, k8, 8,
model_maskz_expand_f64, check_equal_ndf);
soft_dsrc1_update();
GEN_MASKZ_LOAD_CHECK_CASE(_mm512_maskz_expandloadu_pd, d, zmmd, k8, 8,
model_maskz_expand_f64, check_equal_ndf);
}
/*
* Model compress intrinsic behavior.
*/
void NOINLINE model_mask_compress_i32(void *input1, __int64 mask, void *input2,
void *output, int n_elems) {
int i, j = 0;
int *v1i = (int *)input1;
int *v2i = (int *)input2;
int *v3o = (int *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[j] = v2i[i];
j++;
}
}
for (i = j; i < n_elems; i++) {
v3o[i] = v1i[i];
}
}
void NOINLINE model_maskz_compress_i32(__int64 mask, void *input2, void *output,
int n_elems) {
int i, j = 0;
int *v2i = (int *)input2;
int *v3o = (int *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[j] = v2i[i];
j++;
}
}
for (i = j; i < n_elems; i++) {
v3o[i] = 0;
}
}
void NOINLINE model_mask_compress_i64(void *input1, __int64 mask, void *input2,
void *output, int n_elems) {
int i, j = 0;
__int64 *v1i = (__int64 *)input1;
__int64 *v2i = (__int64 *)input2;
__int64 *v3o = (__int64 *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[j] = v2i[i];
j++;
}
}
for (i = j; i < n_elems; i++) {
v3o[i] = v1i[i];
}
}
void NOINLINE model_maskz_compress_i64(__int64 mask, void *input2, void *output,
int n_elems) {
int i, j = 0;
__int64 *v2i = (__int64 *)input2;
__int64 *v3o = (__int64 *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[j] = v2i[i];
j++;
}
}
for (i = j; i < n_elems; i++) {
v3o[i] = 0;
}
}
void NOINLINE model_mask_compress_f32(void *input1, __int64 mask, void *input2,
void *output, int n_elems) {
int i, j = 0;
float *v1i = (float *)input1;
float *v2i = (float *)input2;
float *v3o = (float *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[j] = v2i[i];
j++;
}
}
for (i = j; i < n_elems; i++) {
v3o[i] = v1i[i];
}
}
void NOINLINE model_maskz_compress_f32(__int64 mask, void *input2, void *output,
int n_elems) {
int i, j = 0;
float *v2i = (float *)input2;
float *v3o = (float *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[j] = v2i[i];
j++;
}
}
for (i = j; i < n_elems; i++) {
v3o[i] = 0;
}
}
void NOINLINE model_mask_compress_f64(void *input1, __int64 mask, void *input2,
void *output, int n_elems) {
int i, j = 0;
double *v1i = (double *)input1;
double *v2i = (double *)input2;
double *v3o = (double *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[j] = v2i[i];
j++;
}
}
for (i = j; i < n_elems; i++) {
v3o[i] = v1i[i];
}
}
void NOINLINE model_maskz_compress_f64(__int64 mask, void *input2, void *output,
int n_elems) {
int i, j = 0;
double *v2i = (double *)input2;
double *v3o = (double *)output;
for (i = 0; i < n_elems; i++) {
if ((mask & (1LL << i)) != 0) {
v3o[j] = v2i[i];
j++;
}
}
for (i = j; i < n_elems; i++) {
v3o[i] = 0;
}
}
#define GEN_MASK_STORE_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, \
modeller, checker) \
intrin((void *)&res.suffix, mask, prefix##src1.suffix); \
modeller((void *)&prefix##src2.suffix, mask, (void *)&prefix##src1.suffix, \
(void *)&mres.suffix, n_elem); \
checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__)
void NOINLINE do_m512_compress() {
volatile V512 res;
soft_isrc1_update();
GEN_MASK_CHECK_CASE(_mm512_mask_compress_epi32, i, zmmi, k16, 16,
model_mask_compress_i32, check_equal_nd);
soft_isrc1_update();
GEN_MASK_STORE_CHECK_CASE(_mm512_mask_compressstoreu_epi32, i, zmmi, k16, 16,
model_mask_compress_i32, check_equal_nd);
soft_isrc1_update();
GEN_MASKZ_CHECK_CASE(_mm512_maskz_compress_epi32, i, zmmi, k16, 16,
model_maskz_compress_i32, check_equal_nd);
soft_isrc1_update();
GEN_MASK_CHECK_CASE(_mm512_mask_compress_epi64, i, zmmi, k8, 8,
model_mask_compress_i64, check_equal_nq);
soft_isrc1_update();
GEN_MASK_STORE_CHECK_CASE(_mm512_mask_compressstoreu_epi64, i, zmmi, k8, 8,
model_mask_compress_i64, check_equal_nq);
soft_isrc1_update();
GEN_MASKZ_CHECK_CASE(_mm512_maskz_compress_epi64, i, zmmi, k8, 8,
model_maskz_compress_i64, check_equal_nq);
soft_fsrc1_update();
GEN_MASK_CHECK_CASE(_mm512_mask_compress_ps, f, zmm, k16, 16,
model_mask_compress_f32, check_equal_nsf);
soft_fsrc1_update();
GEN_MASK_STORE_CHECK_CASE(_mm512_mask_compressstoreu_ps, f, zmm, k16, 16,
model_mask_compress_f32, check_equal_nsf);
soft_fsrc1_update();
GEN_MASKZ_CHECK_CASE(_mm512_maskz_compress_ps, f, zmm, k16, 16,
model_maskz_compress_f32, check_equal_nsf);
soft_dsrc1_update();
GEN_MASK_CHECK_CASE(_mm512_mask_compress_pd, d, zmmd, k8, 8,
model_mask_compress_f64, check_equal_ndf);
soft_dsrc1_update();
GEN_MASK_STORE_CHECK_CASE(_mm512_mask_compressstoreu_pd, d, zmmd, k8, 8,
model_mask_compress_f64, check_equal_ndf);
soft_dsrc1_update();
GEN_MASKZ_CHECK_CASE(_mm512_maskz_compress_pd, d, zmmd, k8, 8,
model_maskz_compress_f64, check_equal_ndf);
}
int main() {
init();
do_m512_expand();
do_m512_compress();
if (n_errs != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}