blob: 58edb6e7100f62920e560651a7aef27289cdcb68 [file] [log] [blame]
/*
* This test was created to check the correctness
* of the following intrinsics support:
* _mm512_i32gather_epi64()
* _mm512_mask_i32gather_epi64()
* _mm512_i32gather_pd()
* _mm512_mask_i32gather_pd()
*/
#include <stdio.h>
#include <x86intrin.h>
#define NUM (256 * 256)
#define SCALE 8
double dst512_f[NUM];
double dst_f[NUM];
__int64 dst512_i[NUM];
__int64 dst_i[NUM];
double src_f[NUM];
__int64 src_i[NUM];
__int64 mask512[NUM / 8];
int g_index[NUM];
#define MIN(x, y) ((x) <= (y) ? (x) : (y))
void init_data() {
int i;
for (i = 0; i < NUM; i++) {
g_index[i] = MIN(i * 17 & 0xce, NUM - 1);
src_f[g_index[i]] = src_i[g_index[i]] = i;
dst_i[i] = dst_f[i] = -i;
dst512_i[i] = -i;
dst512_f[i] = -i;
if (i % 8 == 0) {
mask512[i / 8] = (i * 31) & 0xff;
}
}
}
void do_mm512_mmask_i32gather_epi64() {
int i;
for (i = 0; i < NUM; i += 8) {
__m256i ind = _mm256_loadu_si256((const __m256i *)(g_index + i));
__m512i old_dst = _mm512_loadu_si512((const __m512i *)(dst_i + i));
__m512i gtr =
_mm512_mask_i32gather_epi64(old_dst, mask512[i / 8], ind, src_i, SCALE);
_mm512_storeu_si512((__m512i *)(dst512_i + i), gtr);
}
}
void do_mm512_mmask_i32gather_pd() {
int i;
for (i = 0; i < NUM; i += 8) {
__m256i ind = _mm256_loadu_si256((const __m256i *)(g_index + i));
__m512d old_dst = _mm512_loadu_pd(dst_f + i);
__m512d gtr =
_mm512_mask_i32gather_pd(old_dst, mask512[i / 8], ind, src_f, SCALE);
_mm512_storeu_pd(dst512_f + i, gtr);
}
}
void do_mm512_i32gather_epi64() {
int i;
for (i = 0; i < NUM; i += 8) {
__m256i ind = _mm256_loadu_si256((const __m256i *)(g_index + i));
__m512i gtr = _mm512_i32gather_epi64(ind, src_i, SCALE);
_mm512_storeu_si512((__m512i *)(dst512_i + i), gtr);
}
}
void do_mm512_i32gather_pd() {
int i;
for (i = 0; i < NUM; i += 8) {
__m256i ind = _mm256_loadu_si256((const __m256i *)(g_index + i));
__m512d gtr = _mm512_i32gather_pd(ind, src_f, SCALE);
_mm512_storeu_pd(dst512_f + i, gtr);
}
}
int checkm(int id, __int64 *res_dst, __int64 *pass_thru_vals, __int64 *mask,
__int64 *src, int elems_in_vector) {
int i;
for (i = 0; i < NUM; i++) {
__int64 kmask = mask[i / elems_in_vector];
__int64 kmask_bit = kmask & (1 << (i % elems_in_vector));
__int64 v = kmask_bit ? src[g_index[i]] : pass_thru_vals[i];
if (v != res_dst[i]) {
printf("The testcase #%d FAILed at %d iteration\n", id, i);
printf("Expected value %I64d, actual %I64d\n", v, res_dst[i]);
return -1;
}
}
return 0;
}
int check(int id, __int64 *res_dst, __int64 *src, int elems_in_vector) {
int i;
for (i = 0; i < NUM; i++) {
__int64 v = src[g_index[i]];
if (v != res_dst[i]) {
printf("The testcase #%d FAILed at %d iteration\n", id, i);
printf("Expected value %I64d, actual %I64d\n", v, res_dst[i]);
return -1;
}
}
return 0;
}
int main() {
int error = 0;
init_data();
do_mm512_mmask_i32gather_epi64();
error |= checkm(1, dst512_i, dst_i, mask512, src_i, 8);
do_mm512_mmask_i32gather_pd();
error |= checkm(2, (__int64 *)dst512_f, (__int64 *)dst_f, mask512,
(__int64 *)src_f, 8);
init_data();
do_mm512_i32gather_epi64();
error |= check(3, dst512_i, src_i, 8);
do_mm512_i32gather_pd();
error |= check(4, (__int64 *)dst512_f, (__int64 *)src_f, 8);
if (error != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}