blob: 2e42b7c7d145a1cf807758a89b19b085d9d5f771 [file] [log] [blame]
/*
* Test the "set" intrinsics.
*
* This test was created to check the correctness
* of the following intrinsics support:
* _mm_mask_set1_epi*
* _mm_maskz_set1_epi*
* _mm256_mask_set1_epi*
* _mm256_maskz_set1_epi*
* _mm512_mask_set1_epi*
* _mm512_maskz_set1_epi*
* _mm512_set1_epi*
*/
#include "m512_test_util.h"
volatile int vol0 = 0;
/* Some scalars that can be ciscized. */
unsigned char char6 = 6;
unsigned short short7 = 7;
int int11 = 11;
__int64 int64_13 = 13;
void NOINLINE invalidate_scalars() {
/* Make compiler think these variables could have an arbitrary value. */
char6 += vol0;
short7 += vol0;
int11 += vol0;
int64_13 += vol0;
}
void NOINLINE do_set1_epi8() {
V512 res, xres, yres;
V512 expected;
__mmask64 k = 0xffeebb97abcdffe9;
__mmask32 k32 = (__mmask32)k;
__mmask16 k16 = (__mmask16)k32;
int i;
res.zmmi = _mm512_set1_epi8(9);
expected.zmmi = _mm512_set1_epi32(0x09090909);
check_equal_nd(&res, &expected, 16, "_mm512_set1_epi8", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_set1_epi8(char6);
expected.zmmi = _mm512_set1_epi32(0x06060606);
check_equal_nd(&res, &expected, 16, "_mm512_set1_epi8 unknown", __LINE__);
invalidate_scalars();
/* Masked */
res.zmmi = _mm512_set1_epi32(-1);
xres.xmmi[0] = _mm_mask_set1_epi8(res.xmmi[0], k16, 14);
yres.ymmi[0] = _mm256_mask_set1_epi8(res.ymmi[0], k32, 14);
res.zmmi = _mm512_mask_set1_epi8(res.zmmi, k, 14);
expected.zmmi = _mm512_set1_epi32(0x0e0e0e0e);
for (i = 0; i < 64; i++) {
if ((k & ((__mmask64)1 << i)) == 0) {
expected.s8[i] = -1;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_set1_epi8", __LINE__);
check_equal_nd(&yres, &expected, 8, "_mm256_mask_set1_epi8", __LINE__);
check_equal_nd(&xres, &expected, 4, "_mm_mask_set1_epi8", __LINE__);
invalidate_scalars();
/* Zero masked */
res.zmmi = _mm512_maskz_set1_epi8(k, 19);
xres.xmmi[0] = _mm_maskz_set1_epi8(k16, 19);
yres.ymmi[0] = _mm256_maskz_set1_epi8(k32, 19);
expected.zmmi = _mm512_set1_epi32(0x13131313);
for (i = 0; i < 64; i++) {
if ((k & ((__mmask64)1 << i)) == 0) {
expected.s8[i] = 0;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_maskz_set1_epi8", __LINE__);
check_equal_nd(&yres, &expected, 8, "_mm256_maskz_set1_epi8", __LINE__);
check_equal_nd(&xres, &expected, 4, "_mm_maskz_set1_epi8", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_maskz_set1_epi8(k, char6);
expected.zmmi = _mm512_set1_epi32(0x06060606);
for (i = 0; i < 64; i++) {
if ((k & ((__mmask64)1 << i)) == 0) {
expected.s8[i] = 0;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_maskz_set1_epi8 unknown",
__LINE__);
}
void NOINLINE do_set1_epi16() {
V512 res, xres, yres;
V512 expected;
__mmask32 k = 0xabcdffe9;
__mmask16 k16 = (__mmask16)k;
__mmask8 k8 = (__mmask8)k16;
int i;
res.zmmi = _mm512_set1_epi16(9);
expected.zmmi = _mm512_set1_epi32((9 << 16) | 9);
check_equal_nd(&res, &expected, 16, "_mm512_set1_epi16", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_set1_epi16(short7);
expected.zmmi = _mm512_set1_epi32((7 << 16) | 7);
check_equal_nd(&res, &expected, 16, "_mm512_set1_epi16 unknown", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_set1_epi32(-1);
xres.xmmi[0] = _mm_mask_set1_epi16(res.xmmi[0], k8, 14);
yres.ymmi[0] = _mm256_mask_set1_epi16(res.ymmi[0], k16, 14);
res.zmmi = _mm512_mask_set1_epi16(res.zmmi, k, 14);
expected.zmmi = _mm512_set1_epi32((14 << 16) | 14);
for (i = 0; i < 32; i++) {
if ((k & (1 << i)) == 0) {
expected.s16[i] = -1;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_set1_epi16", __LINE__);
check_equal_nd(&yres, &expected, 8, "_mm256_mask_set1_epi16", __LINE__);
check_equal_nd(&xres, &expected, 4, "_mm_mask_set1_epi16", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_maskz_set1_epi16(k, 19);
xres.xmmi[0] = _mm_maskz_set1_epi16(k8, 19);
yres.ymmi[0] = _mm256_maskz_set1_epi16(k16, 19);
expected.zmmi = _mm512_set1_epi32((19 << 16) | 19);
for (i = 0; i < 32; i++) {
if ((k & (1 << i)) == 0) {
expected.s16[i] = 0;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_maskz_set1_epi16", __LINE__);
check_equal_nd(&yres, &expected, 8, "_mm256_maskz_set1_epi16", __LINE__);
check_equal_nd(&xres, &expected, 4, "_mm_maskz_set1_epi16", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_maskz_set1_epi16(k, short7);
expected.zmmi = _mm512_set1_epi32((7 << 16) | 7);
for (i = 0; i < 32; i++) {
if ((k & (1 << i)) == 0) {
expected.s16[i] = 0;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_maskz_set1_epi16 unknown",
__LINE__);
}
void NOINLINE do_set1_epi32() {
V512 res, xres, yres;
V512 expected;
__mmask16 k = 0xf7e6;
__mmask8 k8 = (__mmask8)k;
volatile int i;
res.zmmi = _mm512_set1_epi32(9);
for (i = 0; i < 16; i++) {
expected.s32[i] = 9;
}
check_equal_nd(&res, &expected, 16, "_mm512_set1_epi32", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_set1_epi32(int11);
for (i = 0; i < 16; i++) {
expected.s32[i] = 11;
}
check_equal_nd(&res, &expected, 16, "_mm512_set1_epi32 unknown", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_set1_epi32(-1);
xres.xmmi[0] = _mm_mask_set1_epi32(res.xmmi[0], k8, 14);
yres.ymmi[0] = _mm256_mask_set1_epi32(res.ymmi[0], k8, 14);
res.zmmi = _mm512_mask_set1_epi32(res.zmmi, k, 14);
for (i = 0; i < 16; i++) {
if ((k & (1 << i)) == 0) {
expected.s32[i] = -1;
} else {
expected.s32[i] = 14;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_set1_epi32", __LINE__);
check_equal_nd(&yres, &expected, 8, "_mm256_mask_set1_epi32", __LINE__);
check_equal_nd(&xres, &expected, 4, "_mm_mask_set1_epi32", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_maskz_set1_epi32(k, 19);
xres.xmmi[0] = _mm_maskz_set1_epi32(k8, 19);
yres.ymmi[0] = _mm256_maskz_set1_epi32(k8, 19);
for (i = 0; i < 16; i++) {
if ((k & (1 << i)) == 0) {
expected.s32[i] = 0;
} else {
expected.s32[i] = 19;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_maskz_set1_epi32", __LINE__);
check_equal_nd(&yres, &expected, 8, "_mm256_maskz_set1_epi32", __LINE__);
check_equal_nd(&xres, &expected, 4, "_mm_maskz_set1_epi32", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_maskz_set1_epi32(k, int11);
for (i = 0; i < 16; i++) {
if ((k & (1 << i)) == 0) {
expected.s32[i] = 0;
} else {
expected.s32[i] = 11;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_maskz_set1_epi32 unknown",
__LINE__);
}
void NOINLINE do_set1_epi64() {
V512 res, xres, yres;
V512 expected;
__mmask8 k = 0xe7;
volatile int i;
res.zmmi = _mm512_set1_epi64(9);
for (i = 0; i < 8; i++) {
expected.s64[i] = 9;
}
check_equal_nd(&res, &expected, 16, "_mm512_set1_epi64", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_set1_epi64(int64_13);
for (i = 0; i < 8; i++) {
expected.s64[i] = 13;
}
check_equal_nd(&res, &expected, 16, "_mm512_set1_epi64 unknown", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_set1_epi64(-1);
xres.xmmi[0] = _mm_mask_set1_epi64(res.xmmi[0], k, 14);
yres.ymmi[0] = _mm256_mask_set1_epi64(res.ymmi[0], k, 14);
res.zmmi = _mm512_mask_set1_epi64(res.zmmi, k, 14);
for (i = 0; i < 8; i++) {
if ((k & (1 << i)) == 0) {
expected.s64[i] = -1;
} else {
expected.s64[i] = 14;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_set1_epi64", __LINE__);
check_equal_nd(&yres, &expected, 8, "_mm256_mask_set1_epi64", __LINE__);
check_equal_nd(&xres, &expected, 4, "_mm_mask_set1_epi64", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_maskz_set1_epi64(k, 19);
xres.xmmi[0] = _mm_maskz_set1_epi64(k, 19);
yres.ymmi[0] = _mm256_maskz_set1_epi64(k, 19);
for (i = 0; i < 8; i++) {
if ((k & (1 << i)) == 0) {
expected.s64[i] = 0;
} else {
expected.s64[i] = 19;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_maskz_set1_epi64", __LINE__);
check_equal_nd(&yres, &expected, 8, "_mm256_maskz_set1_epi64", __LINE__);
check_equal_nd(&xres, &expected, 4, "_mm_maskz_set1_epi64", __LINE__);
invalidate_scalars();
res.zmmi = _mm512_maskz_set1_epi64(k, int64_13);
for (i = 0; i < 8; i++) {
if ((k & (1 << i)) == 0) {
expected.s64[i] = 0;
} else {
expected.s64[i] = 13;
}
}
check_equal_nd(&res, &expected, 16, "_mm512_maskz_set1_epi64 unknown",
__LINE__);
}
int main(int argc, char *argv[]) {
do_set1_epi8();
do_set1_epi16();
do_set1_epi32();
do_set1_epi64();
if (n_errs) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}