blob: 2d62f9e9463c29f2b4e3823cdc3d807e1b13a957 [file] [log] [blame]
/*
* Test shifts and rotates.
*
* This test was created to check the correctness
* of the following intrinsics support:
* _mm_shuffle_epi32()
* _mm_shufflehi_epi16()
* _mm_shufflelo_epi16()
* _mm256_shuffle_epi32()
* _mm256_shufflehi_epi16()
* _mm256_shufflelo_epi16()
* _mm512_shuffle_epi32()
* _mm512_shufflehi_epi16()
* _mm512_shufflelo_epi16()
*/
#include "m512_test_util.h"
#include <stdio.h>
#include <string.h>
V512 counts16, counts32, counts64, src, passthru, zeros;
__mmask8 k8 = 0xf9;
__mmask16 k16 = 0x9ffe;
volatile int vol0;
/*
* Use this between tests to make compiler think src was updated.
* Prevents PRE'ing of a load of src.
*/
#define soft_src_update() src.xmmi[vol0] = src.xmmi[vol0]
#define soft_counts16_update() counts16.xmmi[vol0] = counts16.xmmi[vol0]
#define soft_counts32_update() counts32.xmmi[vol0] = counts32.xmmi[vol0]
#define soft_counts64_update() counts64.xmmi[vol0] = counts64.xmmi[vol0]
void NOINLINE init() {
volatile int i;
for (i = 0; i < 16; i++) {
counts32.s32[i] = 3;
zeros.u32[i] = 0;
src.s32[i] = -27 * i * i + 300 * i - 82;
if (i & 0x1) {
src.s32[i] *= -1;
}
passthru.s32[i] = 48 * i * i + 100 * i - 9;
}
for (i = 0; i < 8; i++) {
counts64.s64[i] = 9;
}
for (i = 0; i < 32; i++) {
counts16.s16[i] = 4;
}
}
void NOINLINE emulate_shuffle(void *presult, const void *p,
const void *mask_src, int size, int control, int mask) {
int i;
V512 *result = (V512 *)presult;
V512 *v = (V512 *)p;
V512 *src = (V512 *)mask_src;
for (i = 0; i < size; i++) {
if (((1 << i) & mask) == 0)
result->u32[i] = src->u32[i];
else
result->u32[i] = v->u32[4 * (i / 4) + ((control >> (2 * (i % 4))) & 3)];
}
}
void NOINLINE emulate_shuffle16(void *presult, const void *p,
const void *mask_src, int size, int control, int mask, int order) {
int i;
V512 *result = (V512 *)presult;
V512 *v = (V512 *)p;
V512 *src = (V512 *)mask_src;
for (i = 0; i < size; i++) {
if (((1 << i) & mask) == 0) {
result->u16[i] = src->u16[i];
} else {
if ((i / 4) % 2 == order) {
result->u16[i] = v->u16[i];
} else {
result->u16[i] = v->u16[4 * (i / 4) + ((control >> (2 * (i % 4))) & 3)];
}
}
}
}
void NOINLINE do_shuffle_epi32() {
volatile V512 res;
V512 expected;
// checking mm512 shuffle
soft_counts32_update();
res.zmmi = _mm512_shuffle_epi32(src.zmmi, 3);
emulate_shuffle(&expected, &src, &zeros, 16, 3, 0xffff);
check_equal_nd(&res, &expected, 16, "_mm512_shuffle_epi32", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.zmmi = _mm512_mask_shuffle_epi32(passthru.zmmi, k16, src.zmmi, 3);
emulate_shuffle(&expected, &src, &passthru, 16, 3, k16);
check_equal_nd(&res, &expected, 16, "_mm512_mask_shuffle_epi32", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.zmmi = _mm512_maskz_shuffle_epi32(k16, src.zmmi, 3);
emulate_shuffle(&expected, &src, &zeros, 16, 3, k16);
check_equal_nd(&res, &expected, 16, "_mm512_maskz_shuffle_epi32", __LINE__);
soft_counts32_update();
// checking mm256 shuffle
soft_counts32_update();
res.ymmi[0] = _mm256_shuffle_epi32(src.ymmi[0], 3);
emulate_shuffle(&expected, &src, &zeros, 8, 3, 0xff);
check_equal_nd(&res, &expected, 8, "_mm256_shuffle_epi32", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.ymmi[0] = _mm256_mask_shuffle_epi32(passthru.ymmi[0], k8, src.ymmi[0], 3);
emulate_shuffle(&expected, &src, &passthru, 8, 3, k8);
check_equal_nd(&res, &expected, 8, "_mm256_mask_shuffle_epi32", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.ymmi[0] = _mm256_maskz_shuffle_epi32(k8, src.ymmi[0], 3);
emulate_shuffle(&expected, &src, &zeros, 8, 3, k8);
check_equal_nd(&res, &expected, 8, "_mm256_maskz_shuffle_epi32", __LINE__);
soft_counts32_update();
// checking mm shuffle
soft_counts32_update();
res.xmmi[0] = _mm_shuffle_epi32(src.xmmi[0], 3);
emulate_shuffle(&expected, &src, &zeros, 4, 3, 0xf);
check_equal_nd(&res, &expected, 4, "_mm_shuffle_epi32", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.xmmi[0] = _mm_mask_shuffle_epi32(passthru.xmmi[0], k8, src.xmmi[0], 3);
emulate_shuffle(&expected, &src, &passthru, 4, 3, k8);
check_equal_nd(&res, &expected, 4, "_mm_mask_shuffle_epi32", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.xmmi[0] = _mm_maskz_shuffle_epi32(k8, src.xmmi[0], 3);
emulate_shuffle(&expected, &src, &zeros, 4, 3, k8);
check_equal_nd(&res, &expected, 4, "_mm_maskz_shuffle_epi32", __LINE__);
soft_counts32_update();
}
void NOINLINE do_shufflehi_epi16() {
volatile V512 res;
V512 expected;
// checking mm512 shufflehi
soft_counts32_update();
res.zmmi = _mm512_shufflehi_epi16(src.zmmi, 3);
emulate_shuffle16(&expected, &src, &src, 32, 3, 0xffffffff, 0);
check_equal_nd(&res, &expected, 16, "_mm512_shufflehi_epi16", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.zmmi = _mm512_mask_shufflehi_epi16(passthru.zmmi, k16, src.zmmi, 3);
emulate_shuffle16(&expected, &src, &passthru, 32, 3, k16, 0);
check_equal_nd(&res, &expected, 16, "_mm512_mask_shufflehi_epi16", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.zmmi = _mm512_maskz_shufflehi_epi16(k16, src.zmmi, 3);
emulate_shuffle16(&expected, &src, &zeros, 32, 3, k16, 0);
check_equal_nd(&res, &expected, 16, "_mm512_maskz_shufflehi_epi16", __LINE__);
soft_counts32_update();
// checking mm256 shufflehi
soft_counts32_update();
res.ymmi[0] = _mm256_shufflehi_epi16(src.ymmi[0], 3);
emulate_shuffle16(&expected, &src, &zeros, 16, 3, 0xffff, 0);
check_equal_nd(&res, &expected, 8, "_mm256_shufflehi_epi16", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.ymmi[0] = _mm256_mask_shufflehi_epi16(passthru.ymmi[0], k16, src.ymmi[0], 3);
emulate_shuffle16(&expected, &src, &passthru, 16, 3, k16, 0);
check_equal_nd(&res, &expected, 8, "_mm256_mask_shufflehi_epi16", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.ymmi[0] = _mm256_maskz_shufflehi_epi16(k16, src.ymmi[0], 3);
emulate_shuffle16(&expected, &src, &zeros, 16, 3, k16, 0);
check_equal_nd(&res, &expected, 8, "_mm256_maskz_shufflehi_epi16", __LINE__);
soft_counts32_update();
// checking mm shufflehi
soft_counts32_update();
res.xmmi[0] = _mm_shufflehi_epi16(src.xmmi[0], 3);
emulate_shuffle16(&expected, &src, &zeros, 8, 3, 0xff, 0);
check_equal_nd(&res, &expected, 4, "_mm_shufflehi_epi16", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.xmmi[0] = _mm_mask_shufflehi_epi16(passthru.xmmi[0], k8, src.xmmi[0], 3);
emulate_shuffle16(&expected, &src, &passthru, 8, 3, k8, 0);
check_equal_nd(&res, &expected, 4, "_mm_mask_shufflehi_epi16", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.xmmi[0] = _mm_maskz_shufflehi_epi16(k8, src.xmmi[0], 3);
emulate_shuffle16(&expected, &src, &zeros, 8, 3, k8, 0);
check_equal_nd(&res, &expected, 4, "_mm_maskz_shufflehi_epi16", __LINE__);
soft_counts32_update();
}
void NOINLINE do_shufflelo_epi16() {
volatile V512 res;
V512 expected;
// checking mm512 shufflelo
soft_counts32_update();
res.zmmi = _mm512_shufflelo_epi16(src.zmmi, 3);
emulate_shuffle16(&expected, &src, &src, 32, 3, 0xffffffff, 1);
check_equal_nd(&res, &expected, 16, "_mm512_shufflelo_epi16", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.zmmi = _mm512_mask_shufflelo_epi16(passthru.zmmi, k16, src.zmmi, 3);
emulate_shuffle16(&expected, &src, &passthru, 32, 3, k16, 1);
check_equal_nd(&res, &expected, 16, "_mm512_mask_shufflelo_epi16", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.zmmi = _mm512_maskz_shufflelo_epi16(k16, src.zmmi, 3);
emulate_shuffle16(&expected, &src, &zeros, 32, 3, k16, 1);
check_equal_nd(&res, &expected, 16, "_mm512_maskz_shufflelo_epi16", __LINE__);
soft_counts32_update();
// checking mm256 shufflelo
soft_counts32_update();
res.ymmi[0] = _mm256_shufflelo_epi16(src.ymmi[0], 3);
emulate_shuffle16(&expected, &src, &zeros, 16, 3, 0xffff, 1);
check_equal_nd(&res, &expected, 8, "_mm256_shufflelo_epi16", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.ymmi[0] = _mm256_mask_shufflelo_epi16(passthru.ymmi[0], k16, src.ymmi[0], 3);
emulate_shuffle16(&expected, &src, &passthru, 16, 3, k16, 1);
check_equal_nd(&res, &expected, 8, "_mm256_mask_shufflelo_epi16", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.ymmi[0] = _mm256_maskz_shufflelo_epi16(k16, src.ymmi[0], 3);
emulate_shuffle16(&expected, &src, &zeros, 16, 3, k16, 1);
check_equal_nd(&res, &expected, 8, "_mm256_maskz_shufflelo_epi16", __LINE__);
soft_counts32_update();
// checking mm shufflelo
soft_counts32_update();
res.xmmi[0] = _mm_shufflelo_epi16(src.xmmi[0], 3);
emulate_shuffle16(&expected, &src, &zeros, 8, 3, 0xff, 1);
check_equal_nd(&res, &expected, 4, "_mm_shufflelo_epi16", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.xmmi[0] = _mm_mask_shufflelo_epi16(passthru.xmmi[0], k8, src.xmmi[0], 3);
emulate_shuffle16(&expected, &src, &passthru, 8, 3, k8, 1);
check_equal_nd(&res, &expected, 4, "_mm_mask_shufflelo_epi16", __LINE__);
soft_counts32_update();
soft_counts32_update();
res.xmmi[0] = _mm_maskz_shufflelo_epi16(k8, src.xmmi[0], 3);
emulate_shuffle16(&expected, &src, &zeros, 8, 3, k8, 1);
check_equal_nd(&res, &expected, 4, "_mm_maskz_shufflelo_epi16", __LINE__);
soft_counts32_update();
}
int main(int argc, char *argv[]) {
init();
do_shuffle_epi32();
do_shufflelo_epi16();
do_shufflehi_epi16();
if (n_errs != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}