blob: 4639ae179524b2dd7584a8833cb4238705c5c9b6 [file] [log] [blame]
/*
* Test intrinsics for vptestm[bdqw] and vptestnm[bdqw].
* Here we check for _mm512_test_[epi32|epi64]_mask intrinsics.
*/
#include "m512_test_util.h"
#include <stdio.h>
volatile int ten = 10;
#define TEST(base_intrin, mask_intrin, r1, r2, correct_res) \
{ \
__int64 _i, _mask; \
__int64 _cres = (correct_res); \
__int64 _res = base_intrin((r1), (r2)); \
if (_res != _cres) { \
printf(#base_intrin "(" #r1 ", " #r2 ") failed\n"); \
printf("Expected 0x%08x%08x; got 0x%08x%08x\n", (int)(_cres >> 32), \
(int)_cres, (int)(_res >> 32), (int)_res); \
n_errs++; \
} \
for (_i = 0; _i < ten; _i++) { \
_mask = ((__int64)rand() << 32) | rand(); \
_res = mask_intrin(_mask, (r1), (r2)); \
_cres = (correct_res)&_mask; \
if (_res != _cres) { \
printf(#mask_intrin "(0x%08x%08x, " #r1 ", " #r2 ") " \
"failed\n", \
(int)(_mask >> 32), (int)_mask); \
printf("Expected 0x%08x%08x; got 0x%08x%08x\n", (int)(_cres >> 32), \
(int)_cres, (int)(_res >> 32), (int)_res); \
n_errs++; \
} \
} \
}
V512 i8;
V512 mix8;
V512 i16;
V512 mix16;
V512 i32;
V512 mix32;
V512 i64;
V512 mix64;
volatile int vol0 = 0;
void NOINLINE init() {
volatile int i;
for (i = 0; i < 64; i++) {
i8.s8[i] = -1;
mix8.s8[i] = (i & 1) ? 0 : -1;
}
for (i = 0; i < 32; i++) {
i16.s16[i] = -1;
mix16.s16[i] = (i & 1) ? 0 : -1;
}
for (i = 0; i < 16; i++) {
i32.s32[i] = -1;
mix32.s32[i] = (i & 1) ? 0 : -1;
}
for (i = 0; i < 8; i++) {
i64.s64[i] = -1;
mix64.s64[i] = (i & 1) ? 0 : -1;
}
}
void NOINLINE do_ptestmb() {
TEST(_mm_test_epi8_mask, _mm_mask_test_epi8_mask, mix8.xmmi[0], i8.xmmi[0],
0x5555);
i8.xmmi[vol0] = i8.xmmi[vol0]; /* No-op. */
TEST(_mm256_test_epi8_mask, _mm256_mask_test_epi8_mask, mix8.ymmi[0],
i8.ymmi[0], 0x55555555);
i8.xmmi[vol0] = i8.xmmi[vol0]; /* No-op. */
TEST(_mm512_test_epi8_mask, _mm512_mask_test_epi8_mask, mix8.zmmi, i8.zmmi,
0x5555555555555555);
}
void NOINLINE do_ptestmw() {
TEST(_mm_test_epi16_mask, _mm_mask_test_epi16_mask, mix16.xmmi[0],
i16.xmmi[0], 0x55);
i16.xmmi[vol0] = i16.xmmi[vol0]; /* No-op. */
TEST(_mm256_test_epi16_mask, _mm256_mask_test_epi16_mask, mix16.ymmi[0],
i16.ymmi[0], 0x5555);
i16.xmmi[vol0] = i16.xmmi[vol0]; /* No-op. */
TEST(_mm512_test_epi16_mask, _mm512_mask_test_epi16_mask, mix16.zmmi,
i16.zmmi, 0x55555555);
}
void NOINLINE do_ptestmd() {
TEST(_mm_test_epi32_mask, _mm_mask_test_epi32_mask, mix32.xmmi[0],
i32.xmmi[0], 0x5);
i32.xmmi[vol0] = i32.xmmi[vol0]; /* No-op. */
TEST(_mm256_test_epi32_mask, _mm256_mask_test_epi32_mask, mix32.ymmi[0],
i32.ymmi[0], 0x55);
i32.xmmi[vol0] = i32.xmmi[vol0]; /* No-op. */
TEST(_mm512_test_epi32_mask, _mm512_mask_test_epi32_mask, mix32.zmmi,
i32.zmmi, 0x5555);
}
void NOINLINE do_ptestmq() {
TEST(_mm_test_epi64_mask, _mm_mask_test_epi64_mask, mix64.xmmi[0],
i64.xmmi[0], 0x1);
i64.xmmi[vol0] = i64.xmmi[vol0]; /* No-op. */
TEST(_mm256_test_epi64_mask, _mm256_mask_test_epi64_mask, mix64.ymmi[0],
i64.ymmi[0], 0x5);
i64.xmmi[vol0] = i64.xmmi[vol0]; /* No-op. */
TEST(_mm512_test_epi64_mask, _mm512_mask_test_epi64_mask, mix64.zmmi,
i64.zmmi, 0x55);
}
void NOINLINE do_ptestnmb() {
TEST(_mm_testn_epi8_mask, _mm_mask_testn_epi8_mask, mix8.xmmi[0], i8.xmmi[0],
0xaaaa);
i8.xmmi[vol0] = i8.xmmi[vol0]; /* No-op. */
TEST(_mm256_testn_epi8_mask, _mm256_mask_testn_epi8_mask, mix8.ymmi[0],
i8.ymmi[0], 0xaaaaaaaa);
i8.xmmi[vol0] = i8.xmmi[vol0]; /* No-op. */
TEST(_mm512_testn_epi8_mask, _mm512_mask_testn_epi8_mask, mix8.zmmi, i8.zmmi,
0xaaaaaaaaaaaaaaaa);
}
void NOINLINE do_ptestnmw() {
TEST(_mm_testn_epi16_mask, _mm_mask_testn_epi16_mask, mix16.xmmi[0],
i16.xmmi[0], 0xaa);
i16.xmmi[vol0] = i16.xmmi[vol0]; /* No-op. */
TEST(_mm256_testn_epi16_mask, _mm256_mask_testn_epi16_mask, mix16.ymmi[0],
i16.ymmi[0], 0xaaaa);
i16.xmmi[vol0] = i16.xmmi[vol0]; /* No-op. */
TEST(_mm512_testn_epi16_mask, _mm512_mask_testn_epi16_mask, mix16.zmmi,
i16.zmmi, 0xaaaaaaaa);
}
void NOINLINE do_ptestnmd() {
TEST(_mm_testn_epi32_mask, _mm_mask_testn_epi32_mask, mix32.xmmi[0],
i32.xmmi[0], 0xa);
i32.xmmi[vol0] = i32.xmmi[vol0]; /* No-op. */
TEST(_mm256_testn_epi32_mask, _mm256_mask_testn_epi32_mask, mix32.ymmi[0],
i32.ymmi[0], 0xaa);
i32.xmmi[vol0] = i32.xmmi[vol0]; /* No-op. */
TEST(_mm512_testn_epi32_mask, _mm512_mask_testn_epi32_mask, mix32.zmmi,
i32.zmmi, 0xaaaa);
}
void NOINLINE do_ptestnmq() {
TEST(_mm_testn_epi64_mask, _mm_mask_testn_epi64_mask, mix64.xmmi[0],
i64.xmmi[0], 0x2);
i64.xmmi[vol0] = i64.xmmi[vol0]; /* No-op. */
TEST(_mm256_testn_epi64_mask, _mm256_mask_testn_epi64_mask, mix64.ymmi[0],
i64.ymmi[0], 0xa);
i64.xmmi[vol0] = i64.xmmi[vol0]; /* No-op. */
TEST(_mm512_testn_epi64_mask, _mm512_mask_testn_epi64_mask, mix64.zmmi,
i64.zmmi, 0xaa);
}
int main(int argc, char *argv[]) {
init();
do_ptestmb();
do_ptestmw();
do_ptestmd();
do_ptestmq();
do_ptestnmb();
do_ptestnmw();
do_ptestnmd();
do_ptestnmq();
if (n_errs) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}