| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX1 |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 |
| |
| ; PR90847 - failure to peek through FREEZE(SETCC()) results in VPMOVSMSKB(TRUNC()) instead of VMOVMSKPS |
| |
| define i32 @PR90847(<8 x float> %x) nounwind { |
| ; AVX1-LABEL: PR90847: |
| ; AVX1: # %bb.0: # %entry |
| ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm0[1,0,3,2,5,4,7,6] |
| ; AVX1-NEXT: vminps %ymm1, %ymm0, %ymm1 |
| ; AVX1-NEXT: vshufpd {{.*#+}} ymm2 = ymm1[1,0,3,2] |
| ; AVX1-NEXT: vminps %ymm2, %ymm1, %ymm1 |
| ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] |
| ; AVX1-NEXT: vminps %ymm2, %ymm1, %ymm1 |
| ; AVX1-NEXT: vcmpeqps %ymm0, %ymm1, %ymm0 |
| ; AVX1-NEXT: vmovmskps %ymm0, %ecx |
| ; AVX1-NEXT: movl $32, %eax |
| ; AVX1-NEXT: rep bsfl %ecx, %eax |
| ; AVX1-NEXT: vzeroupper |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: PR90847: |
| ; AVX2: # %bb.0: # %entry |
| ; AVX2-NEXT: vshufps {{.*#+}} ymm1 = ymm0[1,0,3,2,5,4,7,6] |
| ; AVX2-NEXT: vminps %ymm1, %ymm0, %ymm1 |
| ; AVX2-NEXT: vshufpd {{.*#+}} ymm2 = ymm1[1,0,3,2] |
| ; AVX2-NEXT: vminps %ymm2, %ymm1, %ymm1 |
| ; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm1[2,3,0,1] |
| ; AVX2-NEXT: vminps %ymm2, %ymm1, %ymm1 |
| ; AVX2-NEXT: vcmpeqps %ymm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: vmovmskps %ymm0, %ecx |
| ; AVX2-NEXT: movl $32, %eax |
| ; AVX2-NEXT: rep bsfl %ecx, %eax |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| entry: |
| %shuf1 = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> |
| %min1 = tail call noundef <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %x, <8 x float> %shuf1) |
| %shuf2 = shufflevector <8 x float> %min1, <8 x float> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> |
| %min2 = tail call noundef <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %min1, <8 x float> %shuf2) |
| %shuf3 = shufflevector <8 x float> %min2, <8 x float> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> |
| %min3 = tail call noundef <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %min2, <8 x float> %shuf3) |
| %fcmp = fcmp oeq <8 x float> %min3, %x |
| %mask = bitcast <8 x i1> %fcmp to i8 |
| %zext = zext i8 %mask to i32 |
| %cmp = icmp eq i8 %mask, 0 |
| %tz = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 %zext, i1 false) |
| %conv = select i1 %cmp, i32 undef, i32 %tz |
| ret i32 %conv |
| } |