| /* |
| * Exercise some convert instructions. |
| * This test was created to check the correctness |
| * of the following intrinsics support: |
| * _mm512_cvt_roundph_ps() |
| * _mm512_mask_cvt_roundph_ps() |
| * _mm512_maskz_cvt_roundph_ps() |
| * _mm512_cvtph_ps() |
| * _mm512_mask_cvtph_ps() |
| * _mm512_maskz_cvtph_ps() |
| * _mm512_cvt_roundps_ph() |
| * _mm512_mask_cvt_roundps_ph() |
| * _mm512_maskz_cvt_roundps_ph() |
| * _mm512_cvtps_ph() |
| * _mm512_mask_cvtps_ph() |
| * _mm512_maskz_cvtps_ph() |
| */ |
| |
| #include <stdio.h> |
| #include <string.h> |
| #include <x86intrin.h> |
| |
| typedef union V256 { |
| __m128i m128i; |
| __m128 m128; |
| __m128d m128d; |
| __m256 m256; |
| __m256d m256d; |
| __m256i m256i; |
| short w[16]; |
| int d[8]; |
| long long q[4]; |
| float ps[8]; |
| double pd[4]; |
| |
| int i32; |
| unsigned int u32; |
| __int64 i64; |
| unsigned __int64 u64; |
| } V256; |
| |
| int n_errors = 0; |
| |
| void print(const char *str, int num_elts, int elt_size, V256 *p, int is_float) { |
| int i; |
| |
| if (elt_size == 2 && is_float) { |
| if (num_elts == 4) { |
| p->m128 = _mm_cvtph_ps(p->m128i); |
| } else { |
| p->m256 = _mm256_cvtph_ps(p->m128i); |
| } |
| } |
| |
| printf("%s = {", str); |
| for (i = 0; i < num_elts; i++) { |
| if (!is_float) { |
| int val; |
| switch (elt_size) { |
| case 2: |
| val = p->w[i]; |
| break; |
| case 4: |
| val = p->d[i]; |
| break; |
| case 8: |
| val = p->q[i]; |
| break; |
| } |
| printf("%s %3d", i == 0 ? "" : ",", val); |
| } else { |
| float val; |
| switch (elt_size) { |
| case 2: |
| val = p->ps[i]; |
| break; |
| case 4: |
| val = p->ps[i]; |
| break; |
| case 8: |
| val = p->pd[i]; |
| break; |
| } |
| printf("%s %.3f", i == 0 ? "" : ",", val); |
| } |
| } |
| printf("}\n"); |
| } |
| |
| __declspec(noinline) void check(int is_float, int elt_size, int num_elts, |
| void *v1, void *v2, const char *str) { |
| if (memcmp(v1, v2, elt_size * num_elts) != 0) { |
| ++n_errors; |
| printf("FAILED: %dx%d (%s)\n", elt_size, num_elts, str); |
| |
| print("exp", num_elts, elt_size, v1, is_float); |
| print("got", num_elts, elt_size, v2, is_float); |
| } |
| } |
| |
| #define r _MM_FROUND_NO_EXC |
| int mask = 0xAAA; // b101010101010 |
| |
| void float16_converts() { |
| |
| #define M512 _mm512_set_ps |
| |
| #define M512_RES M512(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) |
| #define M512_PASS \ |
| M512(11, 22, 33, 44, 55, 66, 77, 88, 99, 1010, 1111, 1212, 1313, 1414, 1515, \ |
| 1616) |
| #define M512_RES_MASK \ |
| M512(11, 22, 33, 44, 5, 66, 7, 88, 9, 1010, 11, 1212, 13, 1414, 15, 1616) |
| #define M512_RES_MASKZ M512(0, 0, 0, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0) |
| |
| #define M256H_SRC _mm512_cvt_roundps_ph(M512_RES, r) |
| |
| { |
| __m512 got = _mm512_cvt_roundph_ps(M256H_SRC, r); |
| __m512 exp = M512_RES; |
| check(1, 4, 16, &exp, &got, "_mm512_cvt_roundph_ps"); |
| } |
| { |
| __m512 got = _mm512_mask_cvt_roundph_ps(M512_PASS, mask, M256H_SRC, r); |
| __m512 exp = M512_RES_MASK; |
| check(1, 4, 16, &exp, &got, "_mm512_mask_cvt_roundph_ps"); |
| } |
| { |
| __m512 got = _mm512_maskz_cvt_roundph_ps(mask, M256H_SRC, r); |
| __m512 exp = M512_RES_MASKZ; |
| check(1, 4, 16, &exp, &got, "_mm512_maskz_cvt_roundph_ps"); |
| } |
| |
| { |
| __m512 got = _mm512_cvtph_ps(M256H_SRC); |
| __m512 exp = M512_RES; |
| check(1, 4, 16, &exp, &got, "_mm512_cvtph_ps"); |
| } |
| { |
| __m512 got = _mm512_mask_cvtph_ps(M512_PASS, mask, M256H_SRC); |
| __m512 exp = M512_RES_MASK; |
| check(1, 4, 16, &exp, &got, "_mm512_mask_cvtph_ps"); |
| } |
| { |
| __m512 got = _mm512_maskz_cvtph_ps(mask, M256H_SRC); |
| __m512 exp = M512_RES_MASKZ; |
| check(1, 4, 16, &exp, &got, "_mm512_maskz_cvtph_ps"); |
| } |
| |
| #define M512_SRC M512(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) |
| |
| #define M256H(m512) _mm512_cvt_roundps_ph(m512, r) |
| |
| #define M256H_PASS \ |
| M256H(M512(11, 22, 33, 44, 55, 66, 77, 88, 99, 1010, 1111, 1212, 1313, 1414, \ |
| 1515, 1616)) |
| #define M256H_RES M256H(M512_SRC) |
| #define M256H_RES_MASK \ |
| M256H(M512(11, 22, 33, 44, 5, 66, 7, 88, 9, 1010, 11, 1212, 13, 1414, 15, \ |
| 1616)) |
| #define M256H_RES_MASKZ \ |
| M256H(M512(0, 0, 0, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0)) |
| |
| { |
| V256 got, exp; |
| got.m256i = _mm512_cvt_roundps_ph(M512_SRC, r); |
| exp.m256i = M256H_RES; |
| check(1, 2, 16, &exp, &got, "_mm512_cvt_roundps_ph"); |
| } |
| { |
| V256 got, exp; |
| got.m256i = _mm512_mask_cvt_roundps_ph(M256H_PASS, mask, M512_SRC, r); |
| exp.m256i = M256H_RES_MASK; |
| check(1, 2, 16, &exp, &got, "_mm512_mask_cvt_roundps_ph"); |
| } |
| { |
| V256 got, exp; |
| got.m256i = _mm512_maskz_cvt_roundps_ph(mask, M512_SRC, r); |
| exp.m256i = M256H_RES_MASKZ; |
| check(1, 2, 16, &exp, &got, "_mm512_maskz_cvt_roundps_ph"); |
| } |
| |
| { |
| V256 got, exp; |
| got.m256i = _mm512_cvtps_ph(M512_SRC, r); |
| exp.m256i = M256H_RES; |
| check(1, 2, 16, &exp, &got, "_mm512_cvtps_ph"); |
| } |
| { |
| V256 got, exp; |
| got.m256i = _mm512_mask_cvtps_ph(M256H_PASS, mask, M512_SRC, r); |
| exp.m256i = M256H_RES_MASK; |
| check(1, 2, 16, &exp, &got, "_mm512_mask_cvtps_ph"); |
| } |
| { |
| V256 got, exp; |
| got.m256i = _mm512_maskz_cvtps_ph(mask, M512_SRC, r); |
| exp.m256i = M256H_RES_MASKZ; |
| check(1, 2, 16, &exp, &got, "_mm512_maskz_cvtps_ph"); |
| } |
| } |
| |
| __declspec(noinline) void scalar_converts() { |
| |
| #define M128SD(scalar) _mm_set_pd(123, scalar) |
| |
| #define CHECK__(core, src, res_type, res) \ |
| { \ |
| V256 got, exp; \ |
| ((got).res_type) = _mm_##core src; \ |
| ((exp).res_type) = res; \ |
| check(0, sizeof((exp).res_type), 1, &exp, &got, "_mm_" #core); \ |
| } |
| |
| #if defined(__x86_64) || defined(_M_X64) |
| #define CHECK64 CHECK__ |
| #else |
| #define CHECK64(core, src, res_type, res) |
| #endif |
| |
| #undef R |
| #define R _MM_FROUND_FLOOR | _MM_FROUND_NO_EXC |
| |
| CHECK__(cvt_roundsd_i32, (M128SD(100.7), R), i32, 100) |
| CHECK__(cvtsd_i32, (M128SD(100.7)), i32, 101) |
| CHECK__(cvtsd_si32, (M128SD(100.7)), i32, 101) |
| CHECK__(cvt_roundsd_u32, (M128SD(100.7), R), u32, 100) |
| CHECK__(cvtsd_u32, (M128SD(100.7)), u32, 101) |
| |
| CHECK64(cvt_roundsd_i64, (M128SD(100.7), R), i64, 100) |
| CHECK64(cvtsd_i64, (M128SD(100.7)), i64, 101) |
| CHECK64(cvtsd_si64, (M128SD(100.7)), i64, 101) |
| CHECK64(cvt_roundsd_u64, (M128SD(100.7), R), u64, 100) |
| CHECK64(cvtsd_u64, (M128SD(100.7)), u64, 101) |
| |
| #undef R |
| #define R _MM_FROUND_NO_EXC |
| |
| CHECK__(cvtt_roundsd_i32, (M128SD(100.7), R), i32, 100) |
| CHECK__(cvttsd_i32, (M128SD(100.7)), i32, 100) |
| CHECK__(cvttsd_si32, (M128SD(100.7)), i32, 100) |
| CHECK__(cvtt_roundsd_u32, (M128SD(100.7), R), u32, 100) |
| CHECK__(cvttsd_u32, (M128SD(100.7)), u32, 100) |
| |
| CHECK64(cvtt_roundsd_i64, (M128SD(100.7), R), i64, 100) |
| CHECK64(cvttsd_i64, (M128SD(100.7)), i64, 100) |
| CHECK64(cvttsd_si64, (M128SD(100.7)), i64, 100) |
| CHECK64(cvtt_roundsd_u64, (M128SD(100.7), R), u64, 100) |
| CHECK64(cvttsd_u64, (M128SD(100.7)), u64, 100) |
| |
| CHECK64(cvt_roundi64_sd, (M128SD(100.7), 35, R), m128d, M128SD(35)) |
| CHECK64(cvt_roundsi64_sd, (M128SD(100.7), 35, R), m128d, M128SD(35)) |
| CHECK64(cvt_roundu64_sd, (M128SD(100.7), 35, R), m128d, M128SD(35)) |
| CHECK64(cvti64_sd, (M128SD(100.7), 35), m128d, M128SD(35)) |
| CHECK64(cvtsi64_sd, (M128SD(100.7), 35), m128d, M128SD(35)) |
| CHECK64(cvtu64_sd, (M128SD(100.7), 35), m128d, M128SD(35)) |
| |
| // Rounding not supported for [ui]32->sd |
| CHECK__(cvti32_sd, (M128SD(100.7), 35), m128d, M128SD(35)) |
| CHECK__(cvtsi32_sd, (M128SD(100.7), 35), m128d, M128SD(35)) |
| CHECK__(cvtu32_sd, (M128SD(100.7), 35), m128d, M128SD(35)) |
| |
| #define M128SS(scalar) _mm_set_ps(1, 2, 3, scalar) |
| #undef R |
| #define R _MM_FROUND_FLOOR | _MM_FROUND_NO_EXC |
| |
| CHECK__(cvt_roundss_i32, (M128SS(100.7), R), i32, 100) |
| CHECK__(cvt_roundss_u32, (M128SS(100.7), R), u32, 100) |
| CHECK__(cvtss_i32, (M128SS(100.7)), i32, 101) |
| CHECK__(cvtss_si32, (M128SS(100.7)), i32, 101) |
| CHECK__(cvtss_u32, (M128SS(100.7)), u32, 101) |
| |
| CHECK64(cvt_roundss_i64, (M128SS(100.7), R), i64, 100) |
| CHECK64(cvt_roundss_u64, (M128SS(100.7), R), u64, 100) |
| CHECK64(cvtss_i64, (M128SS(100.7)), i64, 101) |
| CHECK64(cvtss_si64, (M128SS(100.7)), i64, 101) |
| CHECK64(cvtss_u64, (M128SS(100.7)), u64, 101) |
| |
| #undef R |
| #define R _MM_FROUND_NO_EXC |
| |
| CHECK__(cvtt_roundss_i32, (M128SS(100.7), R), i32, 100) |
| CHECK__(cvtt_roundss_u32, (M128SS(100.7), R), u32, 100) |
| CHECK__(cvttss_i32, (M128SS(100.7)), i32, 100) |
| CHECK__(cvttss_si32, (M128SS(100.7)), i32, 100) |
| CHECK__(cvttss_u32, (M128SS(100.7)), u32, 100) |
| |
| CHECK64(cvtt_roundss_i64, (M128SS(100.7), R), i64, 100) |
| CHECK64(cvtt_roundss_u64, (M128SS(100.7), R), u64, 100) |
| CHECK64(cvttss_i64, (M128SS(100.7)), i64, 100) |
| CHECK64(cvttss_si64, (M128SS(100.7)), i64, 100) |
| CHECK64(cvttss_u64, (M128SS(100.7)), u64, 100) |
| |
| CHECK__(cvt_roundi32_ss, (M128SS(100.7), 47, R), m128, M128SS(47)) |
| CHECK__(cvt_roundsi32_ss, (M128SS(100.7), 47, R), m128, M128SS(47)) |
| CHECK__(cvt_roundu32_ss, (M128SS(100.7), 47, R), m128, M128SS(47)) |
| CHECK__(cvti32_ss, (M128SS(100.7), 47), m128, M128SS(47)) |
| CHECK__(cvtsi32_ss, (M128SS(100.7), 47), m128, M128SS(47)) |
| CHECK__(cvtu32_ss, (M128SS(100.7), 47), m128, M128SS(47)) |
| |
| CHECK64(cvt_roundi64_ss, (M128SS(100.7), 47, R), m128, M128SS(47)) |
| CHECK64(cvt_roundsi64_ss, (M128SS(100.7), 47, R), m128, M128SS(47)) |
| CHECK64(cvt_roundu64_ss, (M128SS(100.7), 47, R), m128, M128SS(47)) |
| CHECK64(cvti64_ss, (M128SS(100.7), 47), m128, M128SS(47)) |
| CHECK64(cvtsi64_ss, (M128SS(100.7), 47), m128, M128SS(47)) |
| CHECK64(cvtu64_ss, (M128SS(100.7), 47), m128, M128SS(47)) |
| |
| #undef R |
| #define R _MM_FROUND_NO_EXC |
| |
| #define CHECK_M128D(core, src, res_type, res) \ |
| { \ |
| V256 got, exp; \ |
| ((got).res_type) = _mm_##core src; \ |
| ((exp).res_type) = res; \ |
| check(1, 8, 2, &exp, &got, "_mm_" #core); \ |
| } |
| |
| #define M128D(a, b) _mm_set_pd(a, b) |
| |
| CHECK_M128D(cvt_roundss_sd, (M128D(1, 11) /*src1*/, M128SS(51) /*src2*/, R), |
| m128d, M128D(1, 51)) |
| CHECK_M128D(cvtss_sd, (M128D(1, 11), M128SS(51)), m128d, M128D(1, 51)) |
| |
| // For masked operations we check both 0 and 1 masks |
| // |
| CHECK_M128D( |
| mask_cvt_roundss_sd, |
| (M128D(1, 11) /*dest*/, 1, M128D(2, 22) /*src1*/, M128SS(51) /*src2*/, R), |
| m128d, M128D(2, 51)) |
| CHECK_M128D(mask_cvt_roundss_sd, |
| (M128D(1, 11), 0, M128D(2, 22), M128SS(51), R), m128d, |
| M128D(2, 11)) |
| |
| CHECK_M128D( |
| mask_cvtss_sd, |
| (M128D(1, 11) /*dest*/, 1, M128D(2, 22) /*src1*/, M128SS(51) /*src2*/), |
| m128d, M128D(2, 51)) |
| CHECK_M128D(mask_cvtss_sd, (M128D(1, 11), 0, M128D(2, 22), M128SS(51)), m128d, |
| M128D(2, 11)) |
| |
| CHECK_M128D(maskz_cvt_roundss_sd, |
| (1, M128D(2, 22) /*src1*/, M128SS(51) /*src2*/, R), m128d, |
| M128D(2, 51)) |
| CHECK_M128D(maskz_cvt_roundss_sd, (0, M128D(2, 22), M128SS(51), R), m128d, |
| M128D(2, 0)) |
| |
| CHECK_M128D(maskz_cvtss_sd, (1, M128D(2, 22) /*src1*/, M128SS(51) /*src2*/), |
| m128d, M128D(2, 51)) |
| CHECK_M128D(maskz_cvtss_sd, (0, M128D(2, 22), M128SS(51)), m128d, M128D(2, 0)) |
| |
| #define M128(a, b, c, d) _mm_set_ps(a, b, c, d) |
| #define CHECK_M128(core, src, res_type, res) \ |
| { \ |
| V256 got, exp; \ |
| ((got).res_type) = _mm_##core src; \ |
| ((exp).res_type) = res; \ |
| check(1, 4, 4, &exp, &got, "_mm_" #core); \ |
| } |
| |
| CHECK_M128(cvt_roundsd_ss, |
| (M128(1, 11, 111, 1111) /*src1*/, M128D(2, 22) /*src2*/, R), m128, |
| M128(1, 11, 111, 22)) |
| CHECK_M128(cvtsd_ss, (M128(1, 11, 111, 1111), M128D(2, 22)), m128, |
| M128(1, 11, 111, 22)) |
| |
| // For masked operations we check both 0 and 1 masks |
| // |
| CHECK_M128(mask_cvt_roundsd_ss, |
| (M128(1, 11, 111, 1111) /*dest*/, 1, |
| M128(2, 22, 222, 2222) /*src1*/, M128D(3, 33) /*src2*/, R), |
| m128, M128(2, 22, 222, 33)) |
| CHECK_M128(mask_cvt_roundsd_ss, |
| (M128(1, 11, 111, 1111) /*dest*/, 0, |
| M128(2, 22, 222, 2222) /*src1*/, M128D(3, 33) /*src2*/, R), |
| m128, M128(2, 22, 222, 1111)) |
| |
| CHECK_M128(mask_cvtsd_ss, |
| (M128(1, 11, 111, 1111) /*dest*/, 1, |
| M128(2, 22, 222, 2222) /*src1*/, M128D(3, 33) /*src2*/), |
| m128, M128(2, 22, 222, 33)) |
| CHECK_M128(mask_cvtsd_ss, |
| (M128(1, 11, 111, 1111) /*dest*/, 0, |
| M128(2, 22, 222, 2222) /*src1*/, M128D(3, 33) /*src2*/), |
| m128, M128(2, 22, 222, 1111)) |
| |
| CHECK_M128(maskz_cvt_roundsd_ss, |
| (1, M128(2, 22, 222, 2222) /*src1*/, M128D(3, 33) /*src2*/, R), |
| m128, M128(2, 22, 222, 33)) |
| CHECK_M128(maskz_cvt_roundsd_ss, |
| (0, M128(2, 22, 222, 2222) /*src1*/, M128D(3, 33) /*src2*/, R), |
| m128, M128(2, 22, 222, 0)) |
| |
| CHECK_M128(maskz_cvtsd_ss, |
| (1, M128(2, 22, 222, 2222) /*src1*/, M128D(3, 33) /*src2*/), m128, |
| M128(2, 22, 222, 33)) |
| CHECK_M128(maskz_cvtsd_ss, |
| (0, M128(2, 22, 222, 2222) /*src1*/, M128D(3, 33) /*src2*/), m128, |
| M128(2, 22, 222, 0)) |
| } |
| |
| int main(void) { |
| float16_converts(); |
| scalar_converts(); |
| |
| if (n_errors) { |
| printf("FAILED\n"); |
| return 1; |
| } |
| |
| printf("PASSED\n"); |
| return 0; |
| } |