blob: 45af6c110067e261f8d8ba3ae9cb8ee66fad424e [file] [log] [blame]
/*
* Exercise some swizzles, upconverts and downconverts.
* This test was created to check correctness
* of the following intrinsics support:
* vmovdqa32()
* vmovdqa64()
*/
#include "m512_test_util.h"
#include <memory.h>
#include <stdio.h>
#include <stdlib.h>
#if defined(__x86_64) || defined(_M_X64)
/*
* Exercise encoding zmm registers above zmm15, including one that
* has the fifth bit set and one that doesn't.
*/
#define bigzmm zmm28
#define mediumzmm zmm20
#else
#define bigzmm zmm7
#define mediumzmm zmm3
#endif
volatile int vi[16] = {0x12345678, 0x87654321, 0x05040302, 0x8594a3b2,
0x92745638, 0xa0b0c0d0, 0xd0b0a040, 0x14322341,
0xf24bee68, 0x3ed29ff6, 0xa2d46e46, 0x02119d99,
0x1289a683, 0x0c4563de, 0x3edfd4a4, 0x49d52d48};
V512 src, dst1, dst2, t0, t1;
static void NOINLINE init() {
int i;
for (i = 0; i < 16; i++) {
src.s32[i] = vi[i];
}
memset(&dst1, 0, sizeof(dst1));
memset(&dst2, 0, sizeof(dst2));
}
void NOINLINE do_mov_32() {
int i, v;
void *psrc = &src;
void *pdst = &dst1;
init();
__asm {
mov FULL_IREG(cx), [psrc]
mov FULL_IREG(dx), [pdst]
vmovdqa32 bigzmm, [FULL_IREG(cx)]
vmovdqa32 mediumzmm, bigzmm
vmovdqu32 [FULL_IREG(dx)], mediumzmm
}
for (i = 0; i < 16; i++) {
v = src.s32[i];
dst2.s32[i] = v;
}
check_equal_nd(&dst1, &dst2, 16, "vmovdqa32", __LINE__);
}
void NOINLINE do_mov_32_masked() {
int i, k;
void *psrc = &src;
void *pdst = &dst1;
k = 0xaaaa; /* every other bit is set */
init();
__asm {
mov eax, k
kmovw k6, eax
mov FULL_IREG(cx), [psrc]
mov FULL_IREG(dx), [pdst]
vzeroall
vmovdqa32 bigzmm{k6}, [FULL_IREG(cx)]
vmovdqa32 mediumzmm { k6 }, bigzmm
vmovdqu32[FULL_IREG(dx)]{k6}, mediumzmm
}
memset(&t0, 0, sizeof(t0));
/* Emulate vmovdqa32 bigzmm{k6}, [src] */
for (i = 0; i < 16; i++) {
if (k & (1 << i)) {
dst2.s32[i] = src.u32[i];
}
}
check_equal_nd(&dst1, &dst2, 16, "vmovdqa32 masked", __LINE__);
}
void NOINLINE do_mov_64() {
int i;
void *psrc = &src;
void *pdst = &dst1;
init();
__asm {
mov FULL_IREG(cx), [psrc]
mov FULL_IREG(dx), [pdst]
vmovdqa64 bigzmm, [FULL_IREG(cx)]
vmovdqa64 mediumzmm, bigzmm
vmovdqu64 [FULL_IREG(dx)], mediumzmm
}
for (i = 0; i < 8; i++) {
dst2.u64[i] = src.u64[i];
}
check_equal_nq(&dst1, &dst2, 8, "vmovdqa64", __LINE__);
}
void NOINLINE do_mov_64_masked() {
int i, k;
void *psrc = &src;
void *pdst = &dst1;
k = 0xaa; /* every other bit is set */
init();
__asm {
mov eax, k
kmovw k5, eax
mov FULL_IREG(cx), [psrc]
mov FULL_IREG(dx), [pdst]
vzeroall
vmovdqa64 bigzmm{k5}, [FULL_IREG(cx)]
vmovdqa64 mediumzmm { k5 }, bigzmm
vmovdqu64[FULL_IREG(dx)]{k5}, mediumzmm
}
memset(&t0, 0, sizeof(t0));
/* emulate vmovdqa64 bigzmm{k5}, [src] */
for (i = 0; i < 8; i++) {
if (k & (1 << i)) {
t0.u64[i] = src.u64[i];
}
}
/* emulate vmovdqa64 mediumzmm{k5}, bigzmm */
for (i = 0; i < 8; i++) {
t1.u64[i] = t0.u64[i];
}
for (i = 0; i < 8; i++) {
if (k & (1 << i)) {
t0.u64[i] = t1.u64[i];
}
}
/* emulate vmovdqu64 [dst1]{k5}, mediumzmm */
for (i = 0; i < 8; i++) {
if (k & (1 << i)) {
dst2.u64[i] = t0.u64[i];
}
}
check_equal_nq(&dst1, &dst2, 8, "vmovdqa64 masked", __LINE__);
}
int main() {
do_mov_32();
do_mov_32_masked();
do_mov_64();
do_mov_64_masked();
if (n_errs != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}