| // Ignore everything except x86 and x86_64 |
| // Any new targets that are added to CI should be ignored here. |
| // (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) |
| //@ignore-target-aarch64 |
| //@ignore-target-arm |
| //@ignore-target-avr |
| //@ignore-target-s390x |
| //@ignore-target-thumbv7em |
| //@ignore-target-wasm32 |
| //@compile-flags: -C target-feature=+sse4.1 |
| |
| #[cfg(target_arch = "x86")] |
| use std::arch::x86::*; |
| #[cfg(target_arch = "x86_64")] |
| use std::arch::x86_64::*; |
| use std::mem::transmute; |
| |
| fn main() { |
| assert!(is_x86_feature_detected!("sse4.1")); |
| |
| unsafe { |
| test_sse41(); |
| } |
| } |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_sse41() { |
| // Mostly copied from library/stdarch/crates/core_arch/src/x86/sse41.rs |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_mm_insert_ps() { |
| let a = _mm_set1_ps(1.0); |
| let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
| let r = _mm_insert_ps::<0b11_00_1100>(a, b); |
| let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0); |
| assert_eq_m128(r, e); |
| |
| // Zeroing takes precedence over copied value |
| let a = _mm_set1_ps(1.0); |
| let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
| let r = _mm_insert_ps::<0b11_00_0001>(a, b); |
| let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0); |
| assert_eq_m128(r, e); |
| } |
| test_mm_insert_ps(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_mm_packus_epi32() { |
| let a = _mm_setr_epi32(1, 2, 3, 4); |
| let b = _mm_setr_epi32(-1, -2, -3, -4); |
| let r = _mm_packus_epi32(a, b); |
| let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| test_mm_packus_epi32(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_mm_dp_pd() { |
| let a = _mm_setr_pd(2.0, 3.0); |
| let b = _mm_setr_pd(1.0, 4.0); |
| let e = _mm_setr_pd(14.0, 0.0); |
| assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e); |
| } |
| test_mm_dp_pd(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_mm_dp_ps() { |
| let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0); |
| let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0); |
| let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0); |
| assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e); |
| } |
| test_mm_dp_ps(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_round_nearest_f32() { |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test(x: f32, res: f32) { |
| let a = _mm_setr_ps(3.5, 2.5, 1.5, 4.5); |
| let b = _mm_setr_ps(x, -1.5, -3.5, -2.5); |
| let e = _mm_setr_ps(res, 2.5, 1.5, 4.5); |
| let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b); |
| assert_eq_m128(r, e); |
| // Assume round-to-nearest by default |
| let r = _mm_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); |
| assert_eq_m128(r, e); |
| |
| let a = _mm_set1_ps(x); |
| let e = _mm_set1_ps(res); |
| let r = _mm_round_ps::<_MM_FROUND_TO_NEAREST_INT>(a); |
| assert_eq_m128(r, e); |
| // Assume round-to-nearest by default |
| let r = _mm_round_ps::<_MM_FROUND_CUR_DIRECTION>(a); |
| assert_eq_m128(r, e); |
| } |
| |
| // Test rounding direction |
| test(-2.5, -2.0); |
| test(-1.75, -2.0); |
| test(-1.5, -2.0); |
| test(-1.25, -1.0); |
| test(-1.0, -1.0); |
| test(0.0, 0.0); |
| test(1.0, 1.0); |
| test(1.25, 1.0); |
| test(1.5, 2.0); |
| test(1.75, 2.0); |
| test(2.5, 2.0); |
| |
| // Test that each element is rounded |
| let a = _mm_setr_ps(1.5, 3.5, 5.5, 7.5); |
| let e = _mm_setr_ps(2.0, 4.0, 6.0, 8.0); |
| let r = _mm_round_ps::<_MM_FROUND_TO_NEAREST_INT>(a); |
| assert_eq_m128(r, e); |
| // Assume round-to-nearest by default |
| let r = _mm_round_ps::<_MM_FROUND_CUR_DIRECTION>(a); |
| assert_eq_m128(r, e); |
| } |
| test_round_nearest_f32(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_round_floor_f32() { |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test(x: f32, res: f32) { |
| let a = _mm_setr_ps(3.5, 2.5, 1.5, 4.5); |
| let b = _mm_setr_ps(x, -1.5, -3.5, -2.5); |
| let e = _mm_setr_ps(res, 2.5, 1.5, 4.5); |
| let r = _mm_floor_ss(a, b); |
| assert_eq_m128(r, e); |
| let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b); |
| assert_eq_m128(r, e); |
| |
| let a = _mm_set1_ps(x); |
| let e = _mm_set1_ps(res); |
| let r = _mm_floor_ps(a); |
| assert_eq_m128(r, e); |
| let r = _mm_round_ps::<_MM_FROUND_TO_NEG_INF>(a); |
| assert_eq_m128(r, e); |
| } |
| |
| // Test rounding direction |
| test(-2.5, -3.0); |
| test(-1.75, -2.0); |
| test(-1.5, -2.0); |
| test(-1.25, -2.0); |
| test(-1.0, -1.0); |
| test(0.0, 0.0); |
| test(1.0, 1.0); |
| test(1.25, 1.0); |
| test(1.5, 1.0); |
| test(1.75, 1.0); |
| test(2.5, 2.0); |
| |
| // Test that each element is rounded |
| let a = _mm_setr_ps(1.5, 3.5, 5.5, 7.5); |
| let e = _mm_setr_ps(1.0, 3.0, 5.0, 7.0); |
| let r = _mm_floor_ps(a); |
| assert_eq_m128(r, e); |
| let r = _mm_round_ps::<_MM_FROUND_TO_NEG_INF>(a); |
| assert_eq_m128(r, e); |
| } |
| test_round_floor_f32(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_round_ceil_f32() { |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test(x: f32, res: f32) { |
| let a = _mm_setr_ps(3.5, 2.5, 1.5, 4.5); |
| let b = _mm_setr_ps(x, -1.5, -3.5, -2.5); |
| let e = _mm_setr_ps(res, 2.5, 1.5, 4.5); |
| let r = _mm_ceil_ss(a, b); |
| assert_eq_m128(r, e); |
| let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b); |
| assert_eq_m128(r, e); |
| |
| let a = _mm_set1_ps(x); |
| let e = _mm_set1_ps(res); |
| let r = _mm_ceil_ps(a); |
| assert_eq_m128(r, e); |
| let r = _mm_round_ps::<_MM_FROUND_TO_POS_INF>(a); |
| assert_eq_m128(r, e); |
| } |
| |
| // Test rounding direction |
| test(-2.5, -2.0); |
| test(-1.75, -1.0); |
| test(-1.5, -1.0); |
| test(-1.25, -1.0); |
| test(-1.0, -1.0); |
| test(0.0, 0.0); |
| test(1.0, 1.0); |
| test(1.25, 2.0); |
| test(1.5, 2.0); |
| test(1.75, 2.0); |
| test(2.5, 3.0); |
| |
| // Test that each element is rounded |
| let a = _mm_setr_ps(1.5, 3.5, 5.5, 7.5); |
| let e = _mm_setr_ps(2.0, 4.0, 6.0, 8.0); |
| let r = _mm_ceil_ps(a); |
| assert_eq_m128(r, e); |
| let r = _mm_round_ps::<_MM_FROUND_TO_POS_INF>(a); |
| assert_eq_m128(r, e); |
| } |
| test_round_ceil_f32(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_round_trunc_f32() { |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test(x: f32, res: f32) { |
| let a = _mm_setr_ps(3.5, 2.5, 1.5, 4.5); |
| let b = _mm_setr_ps(x, -1.5, -3.5, -2.5); |
| let e = _mm_setr_ps(res, 2.5, 1.5, 4.5); |
| let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b); |
| assert_eq_m128(r, e); |
| |
| let a = _mm_set1_ps(x); |
| let e = _mm_set1_ps(res); |
| let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a); |
| assert_eq_m128(r, e); |
| } |
| |
| // Test rounding direction |
| test(-2.5, -2.0); |
| test(-1.75, -1.0); |
| test(-1.5, -1.0); |
| test(-1.25, -1.0); |
| test(-1.0, -1.0); |
| test(0.0, 0.0); |
| test(1.0, 1.0); |
| test(1.25, 1.0); |
| test(1.5, 1.0); |
| test(1.75, 1.0); |
| test(2.5, 2.0); |
| |
| // Test that each element is rounded |
| let a = _mm_setr_ps(1.5, 3.5, 5.5, 7.5); |
| let e = _mm_setr_ps(1.0, 3.0, 5.0, 7.0); |
| let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a); |
| assert_eq_m128(r, e); |
| } |
| test_round_trunc_f32(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_round_nearest_f64() { |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test(x: f64, res: f64) { |
| let a = _mm_setr_pd(3.5, 2.5); |
| let b = _mm_setr_pd(x, -1.5); |
| let e = _mm_setr_pd(res, 2.5); |
| let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b); |
| assert_eq_m128d(r, e); |
| // Assume round-to-nearest by default |
| let r = _mm_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); |
| assert_eq_m128d(r, e); |
| |
| let a = _mm_set1_pd(x); |
| let e = _mm_set1_pd(res); |
| let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a); |
| assert_eq_m128d(r, e); |
| // Assume round-to-nearest by default |
| let r = _mm_round_pd::<_MM_FROUND_CUR_DIRECTION>(a); |
| assert_eq_m128d(r, e); |
| } |
| |
| // Test rounding direction |
| test(-2.5, -2.0); |
| test(-1.75, -2.0); |
| test(-1.5, -2.0); |
| test(-1.25, -1.0); |
| test(-1.0, -1.0); |
| test(0.0, 0.0); |
| test(1.0, 1.0); |
| test(1.25, 1.0); |
| test(1.5, 2.0); |
| test(1.75, 2.0); |
| test(2.5, 2.0); |
| |
| // Test that each element is rounded |
| let a = _mm_setr_pd(1.5, 3.5); |
| let e = _mm_setr_pd(2.0, 4.0); |
| let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a); |
| assert_eq_m128d(r, e); |
| // Assume round-to-nearest by default |
| let r = _mm_round_pd::<_MM_FROUND_CUR_DIRECTION>(a); |
| assert_eq_m128d(r, e); |
| } |
| test_round_nearest_f64(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_round_floor_f64() { |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test(x: f64, res: f64) { |
| let a = _mm_setr_pd(3.5, 2.5); |
| let b = _mm_setr_pd(x, -1.5); |
| let e = _mm_setr_pd(res, 2.5); |
| let r = _mm_floor_sd(a, b); |
| assert_eq_m128d(r, e); |
| let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b); |
| assert_eq_m128d(r, e); |
| |
| let a = _mm_set1_pd(x); |
| let e = _mm_set1_pd(res); |
| let r = _mm_floor_pd(a); |
| assert_eq_m128d(r, e); |
| let r = _mm_round_pd::<_MM_FROUND_TO_NEG_INF>(a); |
| assert_eq_m128d(r, e); |
| } |
| |
| // Test rounding direction |
| test(-2.5, -3.0); |
| test(-1.75, -2.0); |
| test(-1.5, -2.0); |
| test(-1.25, -2.0); |
| test(-1.0, -1.0); |
| test(0.0, 0.0); |
| test(1.0, 1.0); |
| test(1.25, 1.0); |
| test(1.5, 1.0); |
| test(1.75, 1.0); |
| test(2.5, 2.0); |
| |
| // Test that each element is rounded |
| let a = _mm_setr_pd(1.5, 3.5); |
| let e = _mm_setr_pd(1.0, 3.0); |
| let r = _mm_floor_pd(a); |
| assert_eq_m128d(r, e); |
| let r = _mm_round_pd::<_MM_FROUND_TO_NEG_INF>(a); |
| assert_eq_m128d(r, e); |
| } |
| test_round_floor_f64(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_round_ceil_f64() { |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test(x: f64, res: f64) { |
| let a = _mm_setr_pd(3.5, 2.5); |
| let b = _mm_setr_pd(x, -1.5); |
| let e = _mm_setr_pd(res, 2.5); |
| let r = _mm_ceil_sd(a, b); |
| assert_eq_m128d(r, e); |
| let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b); |
| assert_eq_m128d(r, e); |
| |
| let a = _mm_set1_pd(x); |
| let e = _mm_set1_pd(res); |
| let r = _mm_ceil_pd(a); |
| assert_eq_m128d(r, e); |
| let r = _mm_round_pd::<_MM_FROUND_TO_POS_INF>(a); |
| assert_eq_m128d(r, e); |
| } |
| |
| // Test rounding direction |
| test(-2.5, -2.0); |
| test(-1.75, -1.0); |
| test(-1.5, -1.0); |
| test(-1.25, -1.0); |
| test(-1.0, -1.0); |
| test(0.0, 0.0); |
| test(1.0, 1.0); |
| test(1.25, 2.0); |
| test(1.5, 2.0); |
| test(1.75, 2.0); |
| test(2.5, 3.0); |
| |
| // Test that each element is rounded |
| let a = _mm_setr_pd(1.5, 3.5); |
| let e = _mm_setr_pd(2.0, 4.0); |
| let r = _mm_ceil_pd(a); |
| assert_eq_m128d(r, e); |
| let r = _mm_round_pd::<_MM_FROUND_TO_POS_INF>(a); |
| assert_eq_m128d(r, e); |
| } |
| test_round_ceil_f64(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_round_trunc_f64() { |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test(x: f64, res: f64) { |
| let a = _mm_setr_pd(3.5, 2.5); |
| let b = _mm_setr_pd(x, -1.5); |
| let e = _mm_setr_pd(res, 2.5); |
| let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b); |
| assert_eq_m128d(r, e); |
| |
| let a = _mm_set1_pd(x); |
| let e = _mm_set1_pd(res); |
| let r = _mm_round_pd::<_MM_FROUND_TO_ZERO>(a); |
| assert_eq_m128d(r, e); |
| } |
| |
| // Test rounding direction |
| test(-2.5, -2.0); |
| test(-1.75, -1.0); |
| test(-1.5, -1.0); |
| test(-1.25, -1.0); |
| test(-1.0, -1.0); |
| test(0.0, 0.0); |
| test(1.0, 1.0); |
| test(1.25, 1.0); |
| test(1.5, 1.0); |
| test(1.75, 1.0); |
| test(2.5, 2.0); |
| |
| // Test that each element is rounded |
| let a = _mm_setr_pd(1.5, 3.5); |
| let e = _mm_setr_pd(1.0, 3.0); |
| let r = _mm_round_pd::<_MM_FROUND_TO_ZERO>(a); |
| assert_eq_m128d(r, e); |
| } |
| test_round_trunc_f64(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_mm_minpos_epu16() { |
| let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66); |
| let r = _mm_minpos_epu16(a); |
| let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0); |
| assert_eq_m128i(r, e); |
| |
| let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66); |
| let r = _mm_minpos_epu16(a); |
| let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0); |
| assert_eq_m128i(r, e); |
| |
| // Case where the minimum value is repeated |
| let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13); |
| let r = _mm_minpos_epu16(a); |
| let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| test_mm_minpos_epu16(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_mm_mpsadbw_epu8() { |
| let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
| |
| let r = _mm_mpsadbw_epu8::<0b000>(a, a); |
| let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28); |
| assert_eq_m128i(r, e); |
| |
| let r = _mm_mpsadbw_epu8::<0b001>(a, a); |
| let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12); |
| assert_eq_m128i(r, e); |
| |
| let r = _mm_mpsadbw_epu8::<0b100>(a, a); |
| let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44); |
| assert_eq_m128i(r, e); |
| |
| let r = _mm_mpsadbw_epu8::<0b101>(a, a); |
| let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28); |
| assert_eq_m128i(r, e); |
| |
| let r = _mm_mpsadbw_epu8::<0b111>(a, a); |
| let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4); |
| assert_eq_m128i(r, e); |
| } |
| test_mm_mpsadbw_epu8(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_mm_testz_si128() { |
| let a = _mm_set1_epi8(1); |
| let mask = _mm_set1_epi8(0); |
| let r = _mm_testz_si128(a, mask); |
| assert_eq!(r, 1); |
| |
| let a = _mm_set1_epi8(0b101); |
| let mask = _mm_set1_epi8(0b110); |
| let r = _mm_testz_si128(a, mask); |
| assert_eq!(r, 0); |
| |
| let a = _mm_set1_epi8(0b011); |
| let mask = _mm_set1_epi8(0b100); |
| let r = _mm_testz_si128(a, mask); |
| assert_eq!(r, 1); |
| } |
| test_mm_testz_si128(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_mm_testc_si128() { |
| let a = _mm_set1_epi8(-1); |
| let mask = _mm_set1_epi8(0); |
| let r = _mm_testc_si128(a, mask); |
| assert_eq!(r, 1); |
| |
| let a = _mm_set1_epi8(0b101); |
| let mask = _mm_set1_epi8(0b110); |
| let r = _mm_testc_si128(a, mask); |
| assert_eq!(r, 0); |
| |
| let a = _mm_set1_epi8(0b101); |
| let mask = _mm_set1_epi8(0b100); |
| let r = _mm_testc_si128(a, mask); |
| assert_eq!(r, 1); |
| } |
| test_mm_testc_si128(); |
| |
| #[target_feature(enable = "sse4.1")] |
| unsafe fn test_mm_testnzc_si128() { |
| let a = _mm_set1_epi8(0); |
| let mask = _mm_set1_epi8(1); |
| let r = _mm_testnzc_si128(a, mask); |
| assert_eq!(r, 0); |
| |
| let a = _mm_set1_epi8(-1); |
| let mask = _mm_set1_epi8(0); |
| let r = _mm_testnzc_si128(a, mask); |
| assert_eq!(r, 0); |
| |
| let a = _mm_set1_epi8(0b101); |
| let mask = _mm_set1_epi8(0b110); |
| let r = _mm_testnzc_si128(a, mask); |
| assert_eq!(r, 1); |
| |
| let a = _mm_set1_epi8(0b101); |
| let mask = _mm_set1_epi8(0b101); |
| let r = _mm_testnzc_si128(a, mask); |
| assert_eq!(r, 0); |
| |
| let a = _mm_setr_epi32(0b100, 0, 0, 0b010); |
| let mask = _mm_setr_epi32(0b100, 0, 0, 0b110); |
| let r = _mm_testnzc_si128(a, mask); |
| assert_eq!(r, 1); |
| } |
| test_mm_testnzc_si128(); |
| } |
| |
| #[track_caller] |
| #[target_feature(enable = "sse")] |
| unsafe fn assert_eq_m128(a: __m128, b: __m128) { |
| let r = _mm_cmpeq_ps(a, b); |
| if _mm_movemask_ps(r) != 0b1111 { |
| panic!("{:?} != {:?}", a, b); |
| } |
| } |
| |
| #[track_caller] |
| #[target_feature(enable = "sse2")] |
| pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { |
| if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { |
| panic!("{:?} != {:?}", a, b); |
| } |
| } |
| |
| #[track_caller] |
| #[target_feature(enable = "sse2")] |
| pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { |
| assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) |
| } |