blob: 06607f3fd59e162425195124a696890f9c7b6cc6 [file] [log] [blame]
// Ignore everything except x86 and x86_64
// Any new targets that are added to CI should be ignored here.
// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
//@compile-flags: -C target-feature=+sse4.1
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::mem::transmute;
fn main() {
unsafe {
#[target_feature(enable = "sse4.1")]
unsafe fn test_sse41() {
// Mostly copied from library/stdarch/crates/core_arch/src/x86/
#[target_feature(enable = "sse4.1")]
unsafe fn test_mm_insert_ps() {
let a = _mm_set1_ps(1.0);
let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let r = _mm_insert_ps::<0b11_00_1100>(a, b);
let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
assert_eq_m128(r, e);
// Zeroing takes precedence over copied value
let a = _mm_set1_ps(1.0);
let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let r = _mm_insert_ps::<0b11_00_0001>(a, b);
let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0);
assert_eq_m128(r, e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_mm_packus_epi32() {
let a = _mm_setr_epi32(1, 2, 3, 4);
let b = _mm_setr_epi32(-1, -2, -3, -4);
let r = _mm_packus_epi32(a, b);
let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0);
assert_eq_m128i(r, e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_mm_dp_pd() {
let a = _mm_setr_pd(2.0, 3.0);
let b = _mm_setr_pd(1.0, 4.0);
let e = _mm_setr_pd(14.0, 0.0);
assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_mm_dp_ps() {
let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_round_nearest_f32() {
#[target_feature(enable = "sse4.1")]
unsafe fn test(x: f32, res: f32) {
let a = _mm_setr_ps(3.5, 2.5, 1.5, 4.5);
let b = _mm_setr_ps(x, -1.5, -3.5, -2.5);
let e = _mm_setr_ps(res, 2.5, 1.5, 4.5);
let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b);
assert_eq_m128(r, e);
// Assume round-to-nearest by default
let r = _mm_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
assert_eq_m128(r, e);
let a = _mm_set1_ps(x);
let e = _mm_set1_ps(res);
let r = _mm_round_ps::<_MM_FROUND_TO_NEAREST_INT>(a);
assert_eq_m128(r, e);
// Assume round-to-nearest by default
let r = _mm_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
assert_eq_m128(r, e);
// Test rounding direction
test(-2.5, -2.0);
test(-1.75, -2.0);
test(-1.5, -2.0);
test(-1.25, -1.0);
test(-1.0, -1.0);
test(0.0, 0.0);
test(1.0, 1.0);
test(1.25, 1.0);
test(1.5, 2.0);
test(1.75, 2.0);
test(2.5, 2.0);
// Test that each element is rounded
let a = _mm_setr_ps(1.5, 3.5, 5.5, 7.5);
let e = _mm_setr_ps(2.0, 4.0, 6.0, 8.0);
let r = _mm_round_ps::<_MM_FROUND_TO_NEAREST_INT>(a);
assert_eq_m128(r, e);
// Assume round-to-nearest by default
let r = _mm_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
assert_eq_m128(r, e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_round_floor_f32() {
#[target_feature(enable = "sse4.1")]
unsafe fn test(x: f32, res: f32) {
let a = _mm_setr_ps(3.5, 2.5, 1.5, 4.5);
let b = _mm_setr_ps(x, -1.5, -3.5, -2.5);
let e = _mm_setr_ps(res, 2.5, 1.5, 4.5);
let r = _mm_floor_ss(a, b);
assert_eq_m128(r, e);
let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b);
assert_eq_m128(r, e);
let a = _mm_set1_ps(x);
let e = _mm_set1_ps(res);
let r = _mm_floor_ps(a);
assert_eq_m128(r, e);
let r = _mm_round_ps::<_MM_FROUND_TO_NEG_INF>(a);
assert_eq_m128(r, e);
// Test rounding direction
test(-2.5, -3.0);
test(-1.75, -2.0);
test(-1.5, -2.0);
test(-1.25, -2.0);
test(-1.0, -1.0);
test(0.0, 0.0);
test(1.0, 1.0);
test(1.25, 1.0);
test(1.5, 1.0);
test(1.75, 1.0);
test(2.5, 2.0);
// Test that each element is rounded
let a = _mm_setr_ps(1.5, 3.5, 5.5, 7.5);
let e = _mm_setr_ps(1.0, 3.0, 5.0, 7.0);
let r = _mm_floor_ps(a);
assert_eq_m128(r, e);
let r = _mm_round_ps::<_MM_FROUND_TO_NEG_INF>(a);
assert_eq_m128(r, e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_round_ceil_f32() {
#[target_feature(enable = "sse4.1")]
unsafe fn test(x: f32, res: f32) {
let a = _mm_setr_ps(3.5, 2.5, 1.5, 4.5);
let b = _mm_setr_ps(x, -1.5, -3.5, -2.5);
let e = _mm_setr_ps(res, 2.5, 1.5, 4.5);
let r = _mm_ceil_ss(a, b);
assert_eq_m128(r, e);
let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b);
assert_eq_m128(r, e);
let a = _mm_set1_ps(x);
let e = _mm_set1_ps(res);
let r = _mm_ceil_ps(a);
assert_eq_m128(r, e);
let r = _mm_round_ps::<_MM_FROUND_TO_POS_INF>(a);
assert_eq_m128(r, e);
// Test rounding direction
test(-2.5, -2.0);
test(-1.75, -1.0);
test(-1.5, -1.0);
test(-1.25, -1.0);
test(-1.0, -1.0);
test(0.0, 0.0);
test(1.0, 1.0);
test(1.25, 2.0);
test(1.5, 2.0);
test(1.75, 2.0);
test(2.5, 3.0);
// Test that each element is rounded
let a = _mm_setr_ps(1.5, 3.5, 5.5, 7.5);
let e = _mm_setr_ps(2.0, 4.0, 6.0, 8.0);
let r = _mm_ceil_ps(a);
assert_eq_m128(r, e);
let r = _mm_round_ps::<_MM_FROUND_TO_POS_INF>(a);
assert_eq_m128(r, e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_round_trunc_f32() {
#[target_feature(enable = "sse4.1")]
unsafe fn test(x: f32, res: f32) {
let a = _mm_setr_ps(3.5, 2.5, 1.5, 4.5);
let b = _mm_setr_ps(x, -1.5, -3.5, -2.5);
let e = _mm_setr_ps(res, 2.5, 1.5, 4.5);
let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b);
assert_eq_m128(r, e);
let a = _mm_set1_ps(x);
let e = _mm_set1_ps(res);
let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a);
assert_eq_m128(r, e);
// Test rounding direction
test(-2.5, -2.0);
test(-1.75, -1.0);
test(-1.5, -1.0);
test(-1.25, -1.0);
test(-1.0, -1.0);
test(0.0, 0.0);
test(1.0, 1.0);
test(1.25, 1.0);
test(1.5, 1.0);
test(1.75, 1.0);
test(2.5, 2.0);
// Test that each element is rounded
let a = _mm_setr_ps(1.5, 3.5, 5.5, 7.5);
let e = _mm_setr_ps(1.0, 3.0, 5.0, 7.0);
let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a);
assert_eq_m128(r, e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_round_nearest_f64() {
#[target_feature(enable = "sse4.1")]
unsafe fn test(x: f64, res: f64) {
let a = _mm_setr_pd(3.5, 2.5);
let b = _mm_setr_pd(x, -1.5);
let e = _mm_setr_pd(res, 2.5);
let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b);
assert_eq_m128d(r, e);
// Assume round-to-nearest by default
let r = _mm_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
assert_eq_m128d(r, e);
let a = _mm_set1_pd(x);
let e = _mm_set1_pd(res);
let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
assert_eq_m128d(r, e);
// Assume round-to-nearest by default
let r = _mm_round_pd::<_MM_FROUND_CUR_DIRECTION>(a);
assert_eq_m128d(r, e);
// Test rounding direction
test(-2.5, -2.0);
test(-1.75, -2.0);
test(-1.5, -2.0);
test(-1.25, -1.0);
test(-1.0, -1.0);
test(0.0, 0.0);
test(1.0, 1.0);
test(1.25, 1.0);
test(1.5, 2.0);
test(1.75, 2.0);
test(2.5, 2.0);
// Test that each element is rounded
let a = _mm_setr_pd(1.5, 3.5);
let e = _mm_setr_pd(2.0, 4.0);
let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
assert_eq_m128d(r, e);
// Assume round-to-nearest by default
let r = _mm_round_pd::<_MM_FROUND_CUR_DIRECTION>(a);
assert_eq_m128d(r, e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_round_floor_f64() {
#[target_feature(enable = "sse4.1")]
unsafe fn test(x: f64, res: f64) {
let a = _mm_setr_pd(3.5, 2.5);
let b = _mm_setr_pd(x, -1.5);
let e = _mm_setr_pd(res, 2.5);
let r = _mm_floor_sd(a, b);
assert_eq_m128d(r, e);
let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b);
assert_eq_m128d(r, e);
let a = _mm_set1_pd(x);
let e = _mm_set1_pd(res);
let r = _mm_floor_pd(a);
assert_eq_m128d(r, e);
let r = _mm_round_pd::<_MM_FROUND_TO_NEG_INF>(a);
assert_eq_m128d(r, e);
// Test rounding direction
test(-2.5, -3.0);
test(-1.75, -2.0);
test(-1.5, -2.0);
test(-1.25, -2.0);
test(-1.0, -1.0);
test(0.0, 0.0);
test(1.0, 1.0);
test(1.25, 1.0);
test(1.5, 1.0);
test(1.75, 1.0);
test(2.5, 2.0);
// Test that each element is rounded
let a = _mm_setr_pd(1.5, 3.5);
let e = _mm_setr_pd(1.0, 3.0);
let r = _mm_floor_pd(a);
assert_eq_m128d(r, e);
let r = _mm_round_pd::<_MM_FROUND_TO_NEG_INF>(a);
assert_eq_m128d(r, e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_round_ceil_f64() {
#[target_feature(enable = "sse4.1")]
unsafe fn test(x: f64, res: f64) {
let a = _mm_setr_pd(3.5, 2.5);
let b = _mm_setr_pd(x, -1.5);
let e = _mm_setr_pd(res, 2.5);
let r = _mm_ceil_sd(a, b);
assert_eq_m128d(r, e);
let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b);
assert_eq_m128d(r, e);
let a = _mm_set1_pd(x);
let e = _mm_set1_pd(res);
let r = _mm_ceil_pd(a);
assert_eq_m128d(r, e);
let r = _mm_round_pd::<_MM_FROUND_TO_POS_INF>(a);
assert_eq_m128d(r, e);
// Test rounding direction
test(-2.5, -2.0);
test(-1.75, -1.0);
test(-1.5, -1.0);
test(-1.25, -1.0);
test(-1.0, -1.0);
test(0.0, 0.0);
test(1.0, 1.0);
test(1.25, 2.0);
test(1.5, 2.0);
test(1.75, 2.0);
test(2.5, 3.0);
// Test that each element is rounded
let a = _mm_setr_pd(1.5, 3.5);
let e = _mm_setr_pd(2.0, 4.0);
let r = _mm_ceil_pd(a);
assert_eq_m128d(r, e);
let r = _mm_round_pd::<_MM_FROUND_TO_POS_INF>(a);
assert_eq_m128d(r, e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_round_trunc_f64() {
#[target_feature(enable = "sse4.1")]
unsafe fn test(x: f64, res: f64) {
let a = _mm_setr_pd(3.5, 2.5);
let b = _mm_setr_pd(x, -1.5);
let e = _mm_setr_pd(res, 2.5);
let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b);
assert_eq_m128d(r, e);
let a = _mm_set1_pd(x);
let e = _mm_set1_pd(res);
let r = _mm_round_pd::<_MM_FROUND_TO_ZERO>(a);
assert_eq_m128d(r, e);
// Test rounding direction
test(-2.5, -2.0);
test(-1.75, -1.0);
test(-1.5, -1.0);
test(-1.25, -1.0);
test(-1.0, -1.0);
test(0.0, 0.0);
test(1.0, 1.0);
test(1.25, 1.0);
test(1.5, 1.0);
test(1.75, 1.0);
test(2.5, 2.0);
// Test that each element is rounded
let a = _mm_setr_pd(1.5, 3.5);
let e = _mm_setr_pd(1.0, 3.0);
let r = _mm_round_pd::<_MM_FROUND_TO_ZERO>(a);
assert_eq_m128d(r, e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_mm_minpos_epu16() {
let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
let r = _mm_minpos_epu16(a);
let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
assert_eq_m128i(r, e);
let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
let r = _mm_minpos_epu16(a);
let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m128i(r, e);
// Case where the minimum value is repeated
let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13);
let r = _mm_minpos_epu16(a);
let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
assert_eq_m128i(r, e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_mm_mpsadbw_epu8() {
let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm_mpsadbw_epu8::<0b000>(a, a);
let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
assert_eq_m128i(r, e);
let r = _mm_mpsadbw_epu8::<0b001>(a, a);
let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
assert_eq_m128i(r, e);
let r = _mm_mpsadbw_epu8::<0b100>(a, a);
let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
assert_eq_m128i(r, e);
let r = _mm_mpsadbw_epu8::<0b101>(a, a);
let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
assert_eq_m128i(r, e);
let r = _mm_mpsadbw_epu8::<0b111>(a, a);
let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
assert_eq_m128i(r, e);
#[target_feature(enable = "sse4.1")]
unsafe fn test_mm_testz_si128() {
let a = _mm_set1_epi8(1);
let mask = _mm_set1_epi8(0);
let r = _mm_testz_si128(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_testz_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b011);
let mask = _mm_set1_epi8(0b100);
let r = _mm_testz_si128(a, mask);
assert_eq!(r, 1);
#[target_feature(enable = "sse4.1")]
unsafe fn test_mm_testc_si128() {
let a = _mm_set1_epi8(-1);
let mask = _mm_set1_epi8(0);
let r = _mm_testc_si128(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_testc_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b100);
let r = _mm_testc_si128(a, mask);
assert_eq!(r, 1);
#[target_feature(enable = "sse4.1")]
unsafe fn test_mm_testnzc_si128() {
let a = _mm_set1_epi8(0);
let mask = _mm_set1_epi8(1);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(-1);
let mask = _mm_set1_epi8(0);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b101);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_setr_epi32(0b100, 0, 0, 0b010);
let mask = _mm_setr_epi32(0b100, 0, 0, 0b110);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 1);
#[target_feature(enable = "sse")]
unsafe fn assert_eq_m128(a: __m128, b: __m128) {
let r = _mm_cmpeq_ps(a, b);
if _mm_movemask_ps(r) != 0b1111 {
panic!("{:?} != {:?}", a, b);
#[target_feature(enable = "sse2")]
pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
panic!("{:?} != {:?}", a, b);
#[target_feature(enable = "sse2")]
pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))