| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX1 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX2 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=xop | FileCheck %s --check-prefixes=CHECK,XOP |
| |
| ; The condition vector for BLENDV* only cares about the sign bit of each element. |
| ; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op. |
| |
| ; Test 128-bit vectors for all legal element types. |
| |
| define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) { |
| ; CHECK-LABEL: signbit_sel_v16i8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 |
| ; CHECK-NEXT: retq |
| %tr = icmp slt <16 x i8> %mask, zeroinitializer |
| %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y |
| ret <16 x i8> %z |
| } |
| |
| ; Sorry 16-bit, you're not important enough to support? |
| |
| define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) { |
| ; AVX12-LABEL: signbit_sel_v8i16: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX12-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 |
| ; AVX12-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: signbit_sel_v8i16: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 |
| ; AVX512F-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: signbit_sel_v8i16: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm2 & (xmm0 ^ xmm1)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v8i16: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; XOP-NEXT: vpcomltw %xmm3, %xmm2, %xmm2 |
| ; XOP-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %tr = icmp slt <8 x i16> %mask, zeroinitializer |
| %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y |
| ret <8 x i16> %z |
| } |
| |
| define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { |
| ; AVX12-LABEL: signbit_sel_v4i32: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: signbit_sel_v4i32: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 |
| ; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: signbit_sel_v4i32: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 |
| ; AVX512VL-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v4i32: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %tr = icmp slt <4 x i32> %mask, zeroinitializer |
| %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y |
| ret <4 x i32> %z |
| } |
| |
| define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) { |
| ; AVX12-LABEL: signbit_sel_v2i64: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: signbit_sel_v2i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 |
| ; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: signbit_sel_v2i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 |
| ; AVX512VL-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v2i64: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %tr = icmp slt <2 x i64> %mask, zeroinitializer |
| %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y |
| ret <2 x i64> %z |
| } |
| |
| define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) { |
| ; AVX12-LABEL: signbit_sel_v4f32: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: signbit_sel_v4f32: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 |
| ; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: signbit_sel_v4f32: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 |
| ; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v4f32: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %tr = icmp slt <4 x i32> %mask, zeroinitializer |
| %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y |
| ret <4 x float> %z |
| } |
| |
| define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) { |
| ; AVX12-LABEL: signbit_sel_v2f64: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: signbit_sel_v2f64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 |
| ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: signbit_sel_v2f64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 |
| ; AVX512VL-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v2f64: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %tr = icmp slt <2 x i64> %mask, zeroinitializer |
| %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y |
| ret <2 x double> %z |
| } |
| |
| ; Test 256-bit vectors to see differences between AVX1 and AVX2. |
| |
| define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask) { |
| ; AVX1-LABEL: signbit_sel_v32i8: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 |
| ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 |
| ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3 |
| ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm4, %xmm2 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 |
| ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 |
| ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 |
| ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: signbit_sel_v32i8: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: signbit_sel_v32i8: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v32i8: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3 |
| ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 |
| ; XOP-NEXT: vpcomltb %xmm4, %xmm3, %xmm3 |
| ; XOP-NEXT: vpcomltb %xmm4, %xmm2, %xmm2 |
| ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 |
| ; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 |
| ; XOP-NEXT: retq |
| %tr = icmp slt <32 x i8> %mask, zeroinitializer |
| %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y |
| ret <32 x i8> %z |
| } |
| |
| ; Sorry 16-bit, you'll never be important enough to support? |
| |
| define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %mask) { |
| ; AVX1-LABEL: signbit_sel_v16i16: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 |
| ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 |
| ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm4, %xmm3 |
| ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm4, %xmm2 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 |
| ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 |
| ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 |
| ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: signbit_sel_v16i16: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX2-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 |
| ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: signbit_sel_v16i16: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 |
| ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: signbit_sel_v16i16: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm2 & (ymm0 ^ ymm1)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v16i16: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3 |
| ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 |
| ; XOP-NEXT: vpcomltw %xmm4, %xmm3, %xmm3 |
| ; XOP-NEXT: vpcomltw %xmm4, %xmm2, %xmm2 |
| ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 |
| ; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 |
| ; XOP-NEXT: retq |
| %tr = icmp slt <16 x i16> %mask, zeroinitializer |
| %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y |
| ret <16 x i16> %z |
| } |
| |
| define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) { |
| ; AVX12-LABEL: signbit_sel_v8i32: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: signbit_sel_v8i32: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 |
| ; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: signbit_sel_v8i32: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1 |
| ; AVX512VL-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v8i32: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 |
| ; XOP-NEXT: retq |
| %tr = icmp slt <8 x i32> %mask, zeroinitializer |
| %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y |
| ret <8 x i32> %z |
| } |
| |
| define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) { |
| ; AVX12-LABEL: signbit_sel_v4i64: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: signbit_sel_v4i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 |
| ; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: signbit_sel_v4i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1 |
| ; AVX512VL-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v4i64: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 |
| ; XOP-NEXT: retq |
| %tr = icmp slt <4 x i64> %mask, zeroinitializer |
| %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y |
| ret <4 x i64> %z |
| } |
| |
| define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) { |
| ; AVX12-LABEL: signbit_sel_v4f64: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: signbit_sel_v4f64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 |
| ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: signbit_sel_v4f64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1 |
| ; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v4f64: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 |
| ; XOP-NEXT: retq |
| %tr = icmp slt <4 x i64> %mask, zeroinitializer |
| %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y |
| ret <4 x double> %z |
| } |
| |
| ; Try a condition with a different type than the select operands. |
| |
| define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double> %y, <4 x i32> %mask) { |
| ; AVX1-LABEL: signbit_sel_v4f64_small_mask: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3 |
| ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] |
| ; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 |
| ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: signbit_sel_v4f64_small_mask: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 |
| ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: signbit_sel_v4f64_small_mask: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 |
| ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: signbit_sel_v4f64_small_mask: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 |
| ; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v4f64_small_mask: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpmovsxdq %xmm2, %xmm3 |
| ; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] |
| ; XOP-NEXT: vpmovsxdq %xmm2, %xmm2 |
| ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 |
| ; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 |
| ; XOP-NEXT: retq |
| %tr = icmp slt <4 x i32> %mask, zeroinitializer |
| %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y |
| ret <4 x double> %z |
| } |
| |
| ; Try a 512-bit vector to make sure AVX-512 is handled as expected. |
| |
| define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i64> %mask) { |
| ; AVX12-LABEL: signbit_sel_v8f64: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 |
| ; AVX12-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512-LABEL: signbit_sel_v8f64: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 |
| ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v8f64: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 |
| ; XOP-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1 |
| ; XOP-NEXT: retq |
| %tr = icmp slt <8 x i64> %mask, zeroinitializer |
| %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y |
| ret <8 x double> %z |
| } |
| |
| ; If we have a floating-point compare: |
| ; (1) Don't die. |
| ; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded. |
| |
| define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 { |
| ; AVX12-LABEL: signbit_sel_v4f32_fcmp: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vxorps %xmm2, %xmm2, %xmm2 |
| ; AVX12-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 |
| ; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: signbit_sel_v4f32_fcmp: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2 |
| ; AVX512F-NEXT: vcmpltps %zmm2, %zmm0, %k1 |
| ; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: signbit_sel_v4f32_fcmp: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vxorps %xmm2, %xmm2, %xmm2 |
| ; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1 |
| ; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: signbit_sel_v4f32_fcmp: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vxorps %xmm2, %xmm2, %xmm2 |
| ; XOP-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 |
| ; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %cmp = fcmp olt <4 x float> %x, zeroinitializer |
| %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y |
| ret <4 x float> %sel |
| } |
| |
| define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) { |
| ; AVX1-LABEL: blend_splat1_mask_cond_v4i64: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm3 |
| ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 |
| ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 |
| ; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: blend_splat1_mask_cond_v4i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0 |
| ; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_splat1_mask_cond_v4i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1 |
| ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_splat1_mask_cond_v4i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 |
| ; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_splat1_mask_cond_v4i64: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 |
| ; XOP-NEXT: vpsllq $63, %xmm3, %xmm3 |
| ; XOP-NEXT: vpmovsxbq {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553] |
| ; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3 |
| ; XOP-NEXT: vpsllq $63, %xmm0, %xmm0 |
| ; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0 |
| ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 |
| ; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 |
| ; XOP-NEXT: retq |
| %a = and <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1> |
| %c = icmp eq <4 x i64> %a, zeroinitializer |
| %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z |
| ret <4 x i64> %r |
| } |
| |
| define <4 x i32> @blend_splat1_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { |
| ; AVX12-LABEL: blend_splat1_mask_cond_v4i32: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vpslld $31, %xmm0, %xmm0 |
| ; AVX12-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_splat1_mask_cond_v4i32: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1 |
| ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_splat1_mask_cond_v4i32: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 |
| ; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_splat1_mask_cond_v4i32: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0 |
| ; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> |
| %c = icmp eq <4 x i32> %a, zeroinitializer |
| %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z |
| ret <4 x i32> %r |
| } |
| |
| define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { |
| ; AVX1-LABEL: blend_splat1_mask_cond_v16i16: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm3 |
| ; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3 |
| ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 |
| ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 |
| ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 |
| ; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0 |
| ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: blend_splat1_mask_cond_v16i16: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_splat1_mask_cond_v16i16: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_splat1_mask_cond_v16i16: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_splat1_mask_cond_v16i16: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpsllw $15, %xmm0, %xmm3 |
| ; XOP-NEXT: vpsraw $15, %xmm3, %xmm3 |
| ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0 |
| ; XOP-NEXT: vpsllw $15, %xmm0, %xmm0 |
| ; XOP-NEXT: vpsraw $15, %xmm0, %xmm0 |
| ; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 |
| ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 |
| ; XOP-NEXT: retq |
| %a = and <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
| %c = icmp eq <16 x i16> %a, zeroinitializer |
| %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z |
| ret <16 x i16> %r |
| } |
| |
| define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) { |
| ; AVX12-LABEL: blend_splat1_mask_cond_v16i8: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vpsllw $7, %xmm0, %xmm0 |
| ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_splat1_mask_cond_v16i8: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] |
| ; AVX512F-NEXT: vpand %xmm3, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_splat1_mask_cond_v16i8: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] |
| ; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & (xmm2 ^ xmm1)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_splat1_mask_cond_v16i8: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0 |
| ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %a = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> |
| %c = icmp eq <16 x i8> %a, zeroinitializer |
| %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z |
| ret <16 x i8> %r |
| } |
| |
| define <2 x i64> @blend_splatmax_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { |
| ; AVX12-LABEL: blend_splatmax_mask_cond_v2i64: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_splatmax_mask_cond_v2i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] |
| ; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1 |
| ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_splatmax_mask_cond_v2i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 |
| ; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_splatmax_mask_cond_v2i64: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; XOP-NEXT: vpcomneqq %xmm3, %xmm0, %xmm0 |
| ; XOP-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %a = and <2 x i64> %x, <i64 9223372036854775808, i64 9223372036854775808> |
| %c = icmp eq <2 x i64> %a, zeroinitializer |
| %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z |
| ret <2 x i64> %r |
| } |
| |
| define <8 x i32> @blend_splatmax_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { |
| ; AVX12-LABEL: blend_splatmax_mask_cond_v8i32: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_splatmax_mask_cond_v8i32: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1 |
| ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i32: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 |
| ; AVX512VL-NEXT: vpblendmd %ymm1, %ymm2, %ymm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_splatmax_mask_cond_v8i32: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 |
| ; XOP-NEXT: retq |
| %a = and <8 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> |
| %c = icmp eq <8 x i32> %a, zeroinitializer |
| %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z |
| ret <8 x i32> %r |
| } |
| |
| define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) { |
| ; AVX12-LABEL: blend_splatmax_mask_cond_v8i16: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0 |
| ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_splatmax_mask_cond_v8i16: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i16: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_splatmax_mask_cond_v8i16: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; XOP-NEXT: vpcomneqw %xmm3, %xmm0, %xmm0 |
| ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %a = and <8 x i16> %x, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768> |
| %c = icmp eq <8 x i16> %a, zeroinitializer |
| %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z |
| ret <8 x i16> %r |
| } |
| |
| define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { |
| ; AVX1-LABEL: blend_splatmax_mask_cond_v32i8: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 |
| ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 |
| ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3 |
| ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 |
| ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 |
| ; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0 |
| ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: blend_splatmax_mask_cond_v32i8: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_splatmax_mask_cond_v32i8: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] |
| ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_splatmax_mask_cond_v32i8: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] |
| ; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm0 & (ymm2 ^ ymm1)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_splatmax_mask_cond_v32i8: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 |
| ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 |
| ; XOP-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3 |
| ; XOP-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0 |
| ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 |
| ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 |
| ; XOP-NEXT: retq |
| %a = and <32 x i8> %x, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128> |
| %c = icmp eq <32 x i8> %a, zeroinitializer |
| %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z |
| ret <32 x i8> %r |
| } |
| |
| define <4 x i64> @blend_splat_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) { |
| ; AVX1-LABEL: blend_splat_mask_cond_v4i64: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vpsllq $62, %xmm0, %xmm3 |
| ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 |
| ; AVX1-NEXT: vpsllq $62, %xmm0, %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 |
| ; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: blend_splat_mask_cond_v4i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpsllq $62, %ymm0, %ymm0 |
| ; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_splat_mask_cond_v4i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1 |
| ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_splat_mask_cond_v4i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 |
| ; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_splat_mask_cond_v4i64: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 |
| ; XOP-NEXT: vpsllq $62, %xmm3, %xmm3 |
| ; XOP-NEXT: vpmovsxbq {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553] |
| ; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3 |
| ; XOP-NEXT: vpsllq $62, %xmm0, %xmm0 |
| ; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0 |
| ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 |
| ; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 |
| ; XOP-NEXT: retq |
| %a = and <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2> |
| %c = icmp eq <4 x i64> %a, zeroinitializer |
| %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z |
| ret <4 x i64> %r |
| } |
| |
| define <4 x i32> @blend_splat_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { |
| ; AVX12-LABEL: blend_splat_mask_cond_v4i32: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vpslld $15, %xmm0, %xmm0 |
| ; AVX12-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_splat_mask_cond_v4i32: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1 |
| ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_splat_mask_cond_v4i32: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 |
| ; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_splat_mask_cond_v4i32: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0 |
| ; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %a = and <4 x i32> %x, <i32 65536, i32 65536, i32 65536, i32 65536> |
| %c = icmp eq <4 x i32> %a, zeroinitializer |
| %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z |
| ret <4 x i32> %r |
| } |
| |
| define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { |
| ; AVX1-LABEL: blend_splat_mask_cond_v16i16: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm3 |
| ; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3 |
| ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 |
| ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0 |
| ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 |
| ; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 |
| ; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0 |
| ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: blend_splat_mask_cond_v16i16: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_splat_mask_cond_v16i16: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_splat_mask_cond_v16i16: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_splat_mask_cond_v16i16: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpsllw $5, %xmm0, %xmm3 |
| ; XOP-NEXT: vpsraw $15, %xmm3, %xmm3 |
| ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0 |
| ; XOP-NEXT: vpsllw $5, %xmm0, %xmm0 |
| ; XOP-NEXT: vpsraw $15, %xmm0, %xmm0 |
| ; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 |
| ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 |
| ; XOP-NEXT: retq |
| %a = and <16 x i16> %x, <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024> |
| %c = icmp eq <16 x i16> %a, zeroinitializer |
| %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z |
| ret <16 x i16> %r |
| } |
| |
| define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) { |
| ; AVX12-LABEL: blend_splat_mask_cond_v16i8: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vpsllw $5, %xmm0, %xmm0 |
| ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_splat_mask_cond_v16i8: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm3 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] |
| ; AVX512F-NEXT: vpand %xmm3, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_splat_mask_cond_v16i8: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] |
| ; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & (xmm2 ^ xmm1)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_splat_mask_cond_v16i8: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0 |
| ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %a = and <16 x i8> %x, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> |
| %c = icmp eq <16 x i8> %a, zeroinitializer |
| %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z |
| ret <16 x i8> %r |
| } |
| |
| define <2 x i64> @blend_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { |
| ; AVX1-LABEL: blend_mask_cond_v2i64: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [1,4] |
| ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 |
| ; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0 |
| ; AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: blend_mask_cond_v2i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_mask_cond_v2i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [1,4] |
| ; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1 |
| ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_mask_cond_v2i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 |
| ; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_mask_cond_v2i64: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %a = and <2 x i64> %x, <i64 1, i64 4> |
| %c = icmp eq <2 x i64> %a, zeroinitializer |
| %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z |
| ret <2 x i64> %r |
| } |
| |
| define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { |
| ; AVX1-LABEL: blend_mask_cond_v4i32: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: blend_mask_cond_v4i32: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_mask_cond_v4i32: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [65536,512,2,1] |
| ; AVX512F-NEXT: vptestnmd %zmm3, %zmm0, %k1 |
| ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_mask_cond_v4i32: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 |
| ; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_mask_cond_v4i32: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %a = and <4 x i32> %x, <i32 65536, i32 512, i32 2, i32 1> |
| %c = icmp eq <4 x i32> %a, zeroinitializer |
| %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z |
| ret <4 x i32> %r |
| } |
| |
| define <8 x i16> @blend_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) { |
| ; AVX12-LABEL: blend_mask_cond_v8i16: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX12-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 |
| ; AVX12-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_mask_cond_v8i16: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_mask_cond_v8i16: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_mask_cond_v8i16: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vpcomltw %xmm3, %xmm0, %xmm0 |
| ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %a = and <8 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 1024, i16 2, i16 4096> |
| %c = icmp eq <8 x i16> %a, zeroinitializer |
| %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z |
| ret <8 x i16> %r |
| } |
| |
| define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) { |
| ; AVX12-LABEL: blend_mask_cond_v16i8: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2] |
| ; AVX12-NEXT: vpand %xmm3, %xmm0, %xmm0 |
| ; AVX12-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 |
| ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_mask_cond_v16i8: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2] |
| ; AVX512F-NEXT: vpand %xmm3, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_mask_cond_v16i8: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2] |
| ; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & (xmm2 ^ xmm1)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_mask_cond_v16i8: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 |
| ; XOP-NEXT: retq |
| %a = and <16 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2> |
| %c = icmp eq <16 x i8> %a, zeroinitializer |
| %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z |
| ret <16 x i8> %r |
| } |
| |
| define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) { |
| ; AVX1-LABEL: blend_mask_cond_v4i64: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 |
| ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 |
| ; AVX1-NEXT: vpcmpeqq %xmm4, %xmm3, %xmm3 |
| ; AVX1-NEXT: vpcmpeqq %xmm4, %xmm0, %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 |
| ; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: blend_mask_cond_v4i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| ; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_mask_cond_v4i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vpmovzxwq {{.*#+}} ymm3 = [2,4,32768,1] |
| ; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1 |
| ; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_mask_cond_v4i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1 |
| ; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_mask_cond_v4i64: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 |
| ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0 |
| ; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 |
| ; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 |
| ; XOP-NEXT: retq |
| %a = and <4 x i64> %x, <i64 2, i64 4, i64 32768, i64 1> |
| %c = icmp eq <4 x i64> %a, zeroinitializer |
| %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z |
| ret <4 x i64> %r |
| } |
| |
| define <8 x i32> @blend_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { |
| ; AVX1-LABEL: blend_mask_cond_v8i32: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 |
| ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 |
| ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 |
| ; AVX1-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: blend_mask_cond_v8i32: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| ; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_mask_cond_v8i32: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 |
| ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vpmovsxwd {{.*#+}} ymm3 = [1,2,8,4,8,1024,2,4096] |
| ; AVX512F-NEXT: vptestnmd %zmm3, %zmm0, %k1 |
| ; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_mask_cond_v8i32: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1 |
| ; AVX512VL-NEXT: vpblendmd %ymm1, %ymm2, %ymm0 {%k1} |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_mask_cond_v8i32: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 |
| ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0 |
| ; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 |
| ; XOP-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 |
| ; XOP-NEXT: retq |
| %a = and <8 x i32> %x, <i32 1, i32 2, i32 8, i32 4, i32 8, i32 1024, i32 2, i32 4096> |
| %c = icmp eq <8 x i32> %a, zeroinitializer |
| %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z |
| ret <8 x i32> %r |
| } |
| |
| define <16 x i16> @blend_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { |
| ; AVX1-LABEL: blend_mask_cond_v16i16: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 |
| ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 |
| ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3 |
| ; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 |
| ; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2 |
| ; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 |
| ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: blend_mask_cond_v16i16: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX2-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_mask_cond_v16i16: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_mask_cond_v16i16: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_mask_cond_v16i16: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 |
| ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 |
| ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 |
| ; XOP-NEXT: vpcomltw %xmm4, %xmm3, %xmm3 |
| ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vpcomltw %xmm4, %xmm0, %xmm0 |
| ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 |
| ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 |
| ; XOP-NEXT: retq |
| %a = and <16 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 2, i16 2, i16 2, i16 2, i16 8, i16 8, i16 64, i16 64, i16 1024, i16 4096, i16 1024> |
| %c = icmp eq <16 x i16> %a, zeroinitializer |
| %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z |
| ret <16 x i16> %r |
| } |
| |
| define <32 x i8> @blend_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { |
| ; AVX1-LABEL: blend_mask_cond_v32i8: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 |
| ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 |
| ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 |
| ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3 |
| ; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 |
| ; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2 |
| ; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 |
| ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: blend_mask_cond_v32i8: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2,1,2,4,8,16,32,64,128,4,4,4,4,128,4,2,16] |
| ; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: blend_mask_cond_v32i8: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2,1,2,4,8,16,32,64,128,4,4,4,4,128,4,2,16] |
| ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: blend_mask_cond_v32i8: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2,1,2,4,8,16,32,64,128,4,4,4,4,128,4,2,16] |
| ; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm0 & (ymm2 ^ ymm1)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: blend_mask_cond_v32i8: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 |
| ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 |
| ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 |
| ; XOP-NEXT: vpcomltb %xmm4, %xmm3, %xmm3 |
| ; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; XOP-NEXT: vpcomltb %xmm4, %xmm0, %xmm0 |
| ; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 |
| ; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 |
| ; XOP-NEXT: retq |
| %a = and <32 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 128, i8 4, i8 2, i8 16> |
| %c = icmp eq <32 x i8> %a, zeroinitializer |
| %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z |
| ret <32 x i8> %r |
| } |
| |
| define void @store_blend_load_v4i64(ptr %a0, ptr %a1, ptr %a2) { |
| ; AVX1-LABEL: store_blend_load_v4i64: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vmovapd (%rsi), %ymm0 |
| ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] |
| ; AVX1-NEXT: # xmm1 = mem[0,0] |
| ; AVX1-NEXT: vpxor 16(%rdi), %xmm1, %xmm2 |
| ; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775815,9223372036854775815] |
| ; AVX1-NEXT: # xmm3 = mem[0,0] |
| ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 |
| ; AVX1-NEXT: vpxor (%rdi), %xmm1, %xmm1 |
| ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| ; AVX1-NEXT: vblendvpd %ymm1, (%rdi), %ymm0, %ymm0 |
| ; AVX1-NEXT: vmovapd %ymm0, (%rdx) |
| ; AVX1-NEXT: vzeroupper |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: store_blend_load_v4i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vmovdqa (%rdi), %ymm0 |
| ; AVX2-NEXT: vmovapd (%rsi), %ymm1 |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] |
| ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775815,9223372036854775815,9223372036854775815,9223372036854775815] |
| ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 |
| ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: vmovapd %ymm0, (%rdx) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: store_blend_load_v4i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 |
| ; AVX512F-NEXT: vmovdqa (%rsi), %ymm1 |
| ; AVX512F-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1 |
| ; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} |
| ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx) |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: store_blend_load_v4i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 |
| ; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1 |
| ; AVX512VL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 |
| ; AVX512VL-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} |
| ; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: store_blend_load_v4i64: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vmovapd (%rsi), %ymm0 |
| ; XOP-NEXT: vpmovsxbq {{.*#+}} xmm1 = [7,7] |
| ; XOP-NEXT: vpcomltuq 16(%rdi), %xmm1, %xmm2 |
| ; XOP-NEXT: vpcomltuq (%rdi), %xmm1, %xmm1 |
| ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| ; XOP-NEXT: vblendvpd %ymm1, (%rdi), %ymm0, %ymm0 |
| ; XOP-NEXT: vmovapd %ymm0, (%rdx) |
| ; XOP-NEXT: vzeroupper |
| ; XOP-NEXT: retq |
| %v0 = load <4 x i64>, ptr %a0 |
| %v1 = load <4 x i64>, ptr %a1 |
| %cmp = icmp ugt <4 x i64> %v0, <i64 7, i64 7, i64 7, i64 7> |
| %res = select <4 x i1> %cmp, <4 x i64> %v0, <4 x i64> %v1 |
| store <4 x i64> %res, ptr %a2 |
| ret void |
| } |
| |
| define void @store_blend_load_v8i32(ptr %a0, ptr %a1, ptr %a2) { |
| ; AVX1-LABEL: store_blend_load_v8i32: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vmovaps (%rsi), %ymm0 |
| ; AVX1-NEXT: vmovdqa (%rdi), %xmm1 |
| ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2 |
| ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [8,8,8,8] |
| ; AVX1-NEXT: vpmaxud %xmm3, %xmm2, %xmm4 |
| ; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm2 |
| ; AVX1-NEXT: vpmaxud %xmm3, %xmm1, %xmm3 |
| ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| ; AVX1-NEXT: vblendvps %ymm1, (%rdi), %ymm0, %ymm0 |
| ; AVX1-NEXT: vmovaps %ymm0, (%rdx) |
| ; AVX1-NEXT: vzeroupper |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: store_blend_load_v8i32: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vmovdqa (%rdi), %ymm0 |
| ; AVX2-NEXT: vmovaps (%rsi), %ymm1 |
| ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8] |
| ; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm2 |
| ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2 |
| ; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: vmovaps %ymm0, (%rdx) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: store_blend_load_v8i32: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 |
| ; AVX512F-NEXT: vmovdqa (%rsi), %ymm1 |
| ; AVX512F-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1 |
| ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} |
| ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx) |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: store_blend_load_v8i32: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 |
| ; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1 |
| ; AVX512VL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 |
| ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} |
| ; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: store_blend_load_v8i32: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vmovaps (%rsi), %ymm0 |
| ; XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7] |
| ; XOP-NEXT: vpcomltud 16(%rdi), %xmm1, %xmm2 |
| ; XOP-NEXT: vpcomltud (%rdi), %xmm1, %xmm1 |
| ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| ; XOP-NEXT: vblendvps %ymm1, (%rdi), %ymm0, %ymm0 |
| ; XOP-NEXT: vmovaps %ymm0, (%rdx) |
| ; XOP-NEXT: vzeroupper |
| ; XOP-NEXT: retq |
| %v0 = load <8 x i32>, ptr %a0 |
| %v1 = load <8 x i32>, ptr %a1 |
| %cmp = icmp ugt <8 x i32> %v0, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> |
| %res = select <8 x i1> %cmp, <8 x i32> %v0, <8 x i32> %v1 |
| store <8 x i32> %res, ptr %a2 |
| ret void |
| } |
| |
| define void @store_blend_load_v16i16(ptr %a0, ptr %a1, ptr %a2) { |
| ; AVX1-LABEL: store_blend_load_v16i16: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vmovdqa (%rdi), %xmm0 |
| ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 |
| ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8] |
| ; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm3 |
| ; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm3 |
| ; AVX1-NEXT: vpmaxuw %xmm2, %xmm1, %xmm2 |
| ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2 |
| ; AVX1-NEXT: vmovdqa (%rsi), %xmm4 |
| ; AVX1-NEXT: vmovdqa 16(%rsi), %xmm5 |
| ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm5, %xmm1 |
| ; AVX1-NEXT: vpblendvb %xmm3, %xmm0, %xmm4, %xmm0 |
| ; AVX1-NEXT: vmovdqa %xmm0, (%rdx) |
| ; AVX1-NEXT: vmovdqa %xmm1, 16(%rdx) |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: store_blend_load_v16i16: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vmovdqa (%rdi), %ymm0 |
| ; AVX2-NEXT: vmovdqa (%rsi), %ymm1 |
| ; AVX2-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 |
| ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 |
| ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: vmovdqa %ymm0, (%rdx) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: store_blend_load_v16i16: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 |
| ; AVX512F-NEXT: vmovdqa (%rsi), %ymm1 |
| ; AVX512F-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 |
| ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 |
| ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: store_blend_load_v16i16: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 |
| ; AVX512VL-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 |
| ; AVX512VL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm1 = mem ^ (ymm1 & (ymm0 ^ mem)) |
| ; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: store_blend_load_v16i16: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vmovdqa (%rdi), %ymm0 |
| ; XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] |
| ; XOP-NEXT: vpcomltuw 16(%rdi), %xmm1, %xmm2 |
| ; XOP-NEXT: vpcomltuw (%rdi), %xmm1, %xmm1 |
| ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| ; XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 |
| ; XOP-NEXT: vmovdqa %ymm0, (%rdx) |
| ; XOP-NEXT: vzeroupper |
| ; XOP-NEXT: retq |
| %v0 = load <16 x i16>, ptr %a0 |
| %v1 = load <16 x i16>, ptr %a1 |
| %cmp = icmp ugt <16 x i16> %v0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> |
| %res = select <16 x i1> %cmp, <16 x i16> %v0, <16 x i16> %v1 |
| store <16 x i16> %res, ptr %a2 |
| ret void |
| } |
| |
| define void @store_blend_load_v32i8(ptr %a0, ptr %a1, ptr %a2) { |
| ; AVX1-LABEL: store_blend_load_v32i8: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vmovdqa (%rdi), %xmm0 |
| ; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 |
| ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] |
| ; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm3 |
| ; AVX1-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3 |
| ; AVX1-NEXT: vpmaxub %xmm2, %xmm1, %xmm2 |
| ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2 |
| ; AVX1-NEXT: vmovdqa (%rsi), %xmm4 |
| ; AVX1-NEXT: vmovdqa 16(%rsi), %xmm5 |
| ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm5, %xmm1 |
| ; AVX1-NEXT: vpblendvb %xmm3, %xmm0, %xmm4, %xmm0 |
| ; AVX1-NEXT: vmovdqa %xmm0, (%rdx) |
| ; AVX1-NEXT: vmovdqa %xmm1, 16(%rdx) |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: store_blend_load_v32i8: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vmovdqa (%rdi), %ymm0 |
| ; AVX2-NEXT: vmovdqa (%rsi), %ymm1 |
| ; AVX2-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 |
| ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 |
| ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: vmovdqa %ymm0, (%rdx) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: store_blend_load_v32i8: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 |
| ; AVX512F-NEXT: vmovdqa (%rsi), %ymm1 |
| ; AVX512F-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 |
| ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 |
| ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: store_blend_load_v32i8: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 |
| ; AVX512VL-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 |
| ; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm1 = mem ^ (ymm1 & (ymm0 ^ mem)) |
| ; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: store_blend_load_v32i8: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vmovdqa (%rdi), %ymm0 |
| ; XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] |
| ; XOP-NEXT: vpcomltub 16(%rdi), %xmm1, %xmm2 |
| ; XOP-NEXT: vpcomltub (%rdi), %xmm1, %xmm1 |
| ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 |
| ; XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 |
| ; XOP-NEXT: vmovdqa %ymm0, (%rdx) |
| ; XOP-NEXT: vzeroupper |
| ; XOP-NEXT: retq |
| %v0 = load <32 x i8>, ptr %a0 |
| %v1 = load <32 x i8>, ptr %a1 |
| %cmp = icmp ugt <32 x i8> %v0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> |
| %res = select <32 x i1> %cmp, <32 x i8> %v0, <32 x i8> %v1 |
| store <32 x i8> %res, ptr %a2 |
| ret void |
| } |
| |
| define void @PR46531(ptr %x, ptr %y, ptr %z) { |
| ; AVX12-LABEL: PR46531: |
| ; AVX12: # %bb.0: |
| ; AVX12-NEXT: vmovdqu (%rsi), %xmm0 |
| ; AVX12-NEXT: vmovdqu (%rdx), %xmm1 |
| ; AVX12-NEXT: vpor %xmm0, %xmm1, %xmm2 |
| ; AVX12-NEXT: vpxor %xmm0, %xmm1, %xmm0 |
| ; AVX12-NEXT: vpslld $31, %xmm1, %xmm1 |
| ; AVX12-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 |
| ; AVX12-NEXT: vmovups %xmm0, (%rdi) |
| ; AVX12-NEXT: retq |
| ; |
| ; AVX512F-LABEL: PR46531: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqu (%rsi), %xmm0 |
| ; AVX512F-NEXT: vmovdqu (%rdx), %xmm1 |
| ; AVX512F-NEXT: vpor %xmm0, %xmm1, %xmm2 |
| ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k1 |
| ; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0 |
| ; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} |
| ; AVX512F-NEXT: vmovdqu %xmm0, (%rdi) |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: PR46531: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vmovdqu (%rsi), %xmm0 |
| ; AVX512VL-NEXT: vmovdqu (%rdx), %xmm1 |
| ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k1 |
| ; AVX512VL-NEXT: vpxor %xmm0, %xmm1, %xmm2 |
| ; AVX512VL-NEXT: vpord %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512VL-NEXT: vmovdqu %xmm2, (%rdi) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: PR46531: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vmovdqu (%rsi), %xmm0 |
| ; XOP-NEXT: vmovdqu (%rdx), %xmm1 |
| ; XOP-NEXT: vpor %xmm0, %xmm1, %xmm2 |
| ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3 |
| ; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 |
| ; XOP-NEXT: vpcomneqd %xmm4, %xmm3, %xmm3 |
| ; XOP-NEXT: vpxor %xmm0, %xmm1, %xmm0 |
| ; XOP-NEXT: vblendvps %xmm3, %xmm0, %xmm2, %xmm0 |
| ; XOP-NEXT: vmovups %xmm0, (%rdi) |
| ; XOP-NEXT: retq |
| %a = load <4 x i32>, ptr %y, align 4 |
| %b = load <4 x i32>, ptr %z, align 4 |
| %or = or <4 x i32> %b, %a |
| %and = and <4 x i32> %b, <i32 1, i32 1, i32 1, i32 1> |
| %cmp = icmp eq <4 x i32> %and, zeroinitializer |
| %xor = xor <4 x i32> %b, %a |
| %sel = select <4 x i1> %cmp, <4 x i32> %or, <4 x i32> %xor |
| store <4 x i32> %sel, ptr %x, align 4 |
| ret void |
| } |
| |
| define <64 x i8> @PR110875(<32 x i8> %a0, <32 x i8> %a1, i64 %a2) { |
| ; AVX1-LABEL: PR110875: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vmovq %rdi, %xmm2 |
| ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] |
| ; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm2[0,1,2,3,4,4,5,5] |
| ; AVX1-NEXT: vpshufhw {{.*#+}} xmm4 = xmm2[0,1,2,3,6,6,7,7] |
| ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 |
| ; AVX1-NEXT: vshufps {{.*#+}} ymm3 = ymm3[2,2,3,3,6,6,7,7] |
| ; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[0,0,1,1,4,5,6,7] |
| ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[2,2,3,3,4,5,6,7] |
| ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 |
| ; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,0,1,1,4,4,5,5] |
| ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] |
| ; AVX1-NEXT: vandps %ymm4, %ymm2, %ymm2 |
| ; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3 |
| ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 |
| ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 |
| ; AVX1-NEXT: vpcmpeqb %xmm5, %xmm4, %xmm4 |
| ; AVX1-NEXT: vpcmpeqb %xmm5, %xmm3, %xmm3 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 |
| ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 |
| ; AVX1-NEXT: vpcmpeqb %xmm5, %xmm4, %xmm4 |
| ; AVX1-NEXT: vpcmpeqb %xmm5, %xmm2, %xmm2 |
| ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 |
| ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm4 = [20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20] |
| ; AVX1-NEXT: vandnps %ymm4, %ymm2, %ymm5 |
| ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 |
| ; AVX1-NEXT: vorps %ymm5, %ymm0, %ymm0 |
| ; AVX1-NEXT: vandnps %ymm4, %ymm3, %ymm2 |
| ; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1 |
| ; AVX1-NEXT: vorps %ymm2, %ymm1, %ymm1 |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: PR110875: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vmovq %rdi, %xmm2 |
| ; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 |
| ; AVX2-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] |
| ; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23] |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] |
| ; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2 |
| ; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3 |
| ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm3 |
| ; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm5 = [20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20] |
| ; AVX2-NEXT: vpblendvb %ymm3, %ymm5, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm2 |
| ; AVX2-NEXT: vpblendvb %ymm2, %ymm5, %ymm1, %ymm1 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: PR110875: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 |
| ; AVX512F-NEXT: vmovq %rdi, %xmm0 |
| ; AVX512F-NEXT: vpbroadcastq %xmm0, %ymm0 |
| ; AVX512F-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] |
| ; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23] |
| ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 |
| ; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 |
| ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 |
| ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 |
| ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 |
| ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = mem ^ (zmm0 & (zmm1 ^ mem)) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: PR110875: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 |
| ; AVX512VL-NEXT: vpbroadcastq %rdi, %ymm0 |
| ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] |
| ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23] |
| ; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 |
| ; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 |
| ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 |
| ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 |
| ; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = mem ^ (zmm0 & (zmm1 ^ mem)) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; XOP-LABEL: PR110875: |
| ; XOP: # %bb.0: |
| ; XOP-NEXT: vmovq %rdi, %xmm2 |
| ; XOP-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] |
| ; XOP-NEXT: vpshufhw {{.*#+}} xmm3 = xmm2[0,1,2,3,4,4,5,5] |
| ; XOP-NEXT: vpshufhw {{.*#+}} xmm4 = xmm2[0,1,2,3,6,6,7,7] |
| ; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 |
| ; XOP-NEXT: vshufps {{.*#+}} ymm3 = ymm3[2,2,3,3,6,6,7,7] |
| ; XOP-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[0,0,1,1,4,5,6,7] |
| ; XOP-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[2,2,3,3,4,5,6,7] |
| ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 |
| ; XOP-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,0,1,1,4,4,5,5] |
| ; XOP-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] |
| ; XOP-NEXT: vandps %ymm4, %ymm2, %ymm2 |
| ; XOP-NEXT: vandps %ymm4, %ymm3, %ymm3 |
| ; XOP-NEXT: vextractf128 $1, %ymm3, %xmm4 |
| ; XOP-NEXT: vpxor %xmm5, %xmm5, %xmm5 |
| ; XOP-NEXT: vpcomeqb %xmm5, %xmm4, %xmm4 |
| ; XOP-NEXT: vpcomeqb %xmm5, %xmm3, %xmm3 |
| ; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 |
| ; XOP-NEXT: vextractf128 $1, %ymm2, %xmm4 |
| ; XOP-NEXT: vpcomeqb %xmm5, %xmm4, %xmm4 |
| ; XOP-NEXT: vpcomeqb %xmm5, %xmm2, %xmm2 |
| ; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 |
| ; XOP-NEXT: vbroadcastss {{.*#+}} ymm4 = [20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20] |
| ; XOP-NEXT: vpcmov %ymm2, %ymm4, %ymm0, %ymm0 |
| ; XOP-NEXT: vpcmov %ymm3, %ymm4, %ymm1, %ymm1 |
| ; XOP-NEXT: retq |
| %concat = shufflevector <32 x i8> %a0, <32 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> |
| %scl = insertelement <1 x i64> poison, i64 %a2, i64 0 |
| %splat = shufflevector <1 x i64> %scl, <1 x i64> poison, <8 x i32> <i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison> |
| %ref = bitcast <8 x i64> %splat to <64 x i8> |
| %shuf = shufflevector <64 x i8> %ref, <64 x i8> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55> |
| %mask = and <64 x i8> %shuf, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128> |
| %cmp = icmp eq <64 x i8> %mask, zeroinitializer |
| %res = select <64 x i1> %cmp, <64 x i8> %concat, <64 x i8> <i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20> |
| ret <64 x i8> %res |
| } |
| |
| attributes #0 = { "no-nans-fp-math"="true" } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; AVX: {{.*}} |