| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD |
| ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI |
| |
| ; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2i1 |
| ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i1_zeroes |
| |
| ; ===== Legal Vector Types ===== |
| |
| define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-SD-LABEL: shufflevector_v8i8: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI0_0 |
| ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] |
| ; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] |
| ; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v8i8: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI0_0 |
| ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] |
| ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] |
| ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b |
| ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15> |
| ret <8 x i8> %c |
| } |
| |
| define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { |
| ; CHECK-SD-LABEL: shufflevector_v16i8: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: adrp x8, .LCPI1_0 |
| ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] |
| ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v16i8: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: adrp x8, .LCPI1_0 |
| ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] |
| ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15, i32 2, i32 4, i32 6, i32 8, i32 25, i32 30, i32 31, i32 31> |
| ret <16 x i8> %c |
| } |
| |
| define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: shufflevector_v4i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uzp2 v0.4h, v0.4h, v1.4h |
| ; CHECK-NEXT: ret |
| %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ret <4 x i16> %c |
| } |
| |
| define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { |
| ; CHECK-SD-LABEL: shufflevector_v8i16: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: adrp x8, .LCPI3_0 |
| ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] |
| ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v8i16: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: adrp x8, .LCPI3_0 |
| ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] |
| ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15> |
| ret <8 x i16> %c |
| } |
| |
| define <2 x i32> @shufflevector_v2i32(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: shufflevector_v2i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s |
| ; CHECK-NEXT: ret |
| %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3> |
| ret <2 x i32> %c |
| } |
| |
| define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) { |
| ; CHECK-LABEL: shufflevector_v4i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s |
| ; CHECK-NEXT: ret |
| %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ret <4 x i32> %c |
| } |
| |
| define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) { |
| ; CHECK-LABEL: shufflevector_v2i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d |
| ; CHECK-NEXT: ret |
| %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> |
| ret <2 x i64> %c |
| } |
| |
| ; ===== Legal Vector Types with Zero Masks ===== |
| |
| define <8 x i8> @shufflevector_v8i8_zeroes(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: shufflevector_v8i8_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: dup v0.8b, v0.b[0] |
| ; CHECK-NEXT: ret |
| %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> |
| ret <8 x i8> %c |
| } |
| |
| define <16 x i8> @shufflevector_v16i8_zeroes(<16 x i8> %a, <16 x i8> %b) { |
| ; CHECK-LABEL: shufflevector_v16i8_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v0.16b, v0.b[0] |
| ; CHECK-NEXT: ret |
| %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> |
| ret <16 x i8> %c |
| } |
| |
| define <4 x i16> @shufflevector_v4i16_zeroes(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: shufflevector_v4i16_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: dup v0.4h, v0.h[0] |
| ; CHECK-NEXT: ret |
| %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> |
| ret <4 x i16> %c |
| } |
| |
| define <8 x i16> @shufflevector_v8i16_zeroes(<8 x i16> %a, <8 x i16> %b) { |
| ; CHECK-LABEL: shufflevector_v8i16_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v0.8h, v0.h[0] |
| ; CHECK-NEXT: ret |
| %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> |
| ret <8 x i16> %c |
| } |
| |
| define <2 x i32> @shufflevector_v2i32_zeroes(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: shufflevector_v2i32_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: dup v0.2s, v0.s[0] |
| ; CHECK-NEXT: ret |
| %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 0> |
| ret <2 x i32> %c |
| } |
| |
| define <4 x i32> @shufflevector_v4i32_zeroes(<4 x i32> %a, <4 x i32> %b) { |
| ; CHECK-LABEL: shufflevector_v4i32_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v0.4s, v0.s[0] |
| ; CHECK-NEXT: ret |
| %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> |
| ret <4 x i32> %c |
| } |
| |
| define <2 x i64> @shufflevector_v2i64_zeroes(<2 x i64> %a, <2 x i64> %b) { |
| ; CHECK-LABEL: shufflevector_v2i64_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v0.2d, v0.d[0] |
| ; CHECK-NEXT: ret |
| %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0> |
| ret <2 x i64> %c |
| } |
| |
| ; ===== Smaller/Larger Width Vectors with Legal Element Sizes ===== |
| |
| define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){ |
| ; CHECK-LABEL: shufflevector_v2i1: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: mov v0.s[1], v1.s[1] |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-NEXT: ret |
| %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 3> |
| ret <2 x i1> %c |
| } |
| |
| define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){ |
| ; CHECK-SD-LABEL: shufflevector_v4i8: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: ext v0.8b, v1.8b, v0.8b, #6 |
| ; CHECK-SD-NEXT: zip1 v1.4h, v1.4h, v0.4h |
| ; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4 |
| ; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v4i8: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-GI-NEXT: mov h2, v0.h[1] |
| ; CHECK-GI-NEXT: mov h3, v1.h[1] |
| ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 |
| ; CHECK-GI-NEXT: mov h4, v0.h[2] |
| ; CHECK-GI-NEXT: mov h5, v0.h[3] |
| ; CHECK-GI-NEXT: mov h6, v1.h[3] |
| ; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] |
| ; CHECK-GI-NEXT: mov h2, v1.h[2] |
| ; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] |
| ; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] |
| ; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] |
| ; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] |
| ; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] |
| ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] |
| ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI15_0] |
| ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b |
| ; CHECK-GI-NEXT: fmov w0, s0 |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 7> |
| %d = bitcast <4 x i8> %c to i32 |
| ret i32 %d |
| } |
| |
| define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){ |
| ; CHECK-SD-LABEL: shufflevector_v32i8: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 |
| ; CHECK-SD-NEXT: adrp x9, .LCPI16_1 |
| ; CHECK-SD-NEXT: mov v1.16b, v0.16b |
| ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI16_0] |
| ; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI16_1] |
| ; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b |
| ; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v32i8: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: mov v3.16b, v0.16b |
| ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 |
| ; CHECK-GI-NEXT: adrp x9, .LCPI16_0 |
| ; CHECK-GI-NEXT: mov v4.16b, v2.16b |
| ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_1] |
| ; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI16_0] |
| ; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b |
| ; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32> |
| ret <32 x i8> %c |
| } |
| |
| define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){ |
| ; CHECK-SD-LABEL: shufflevector_v2i16: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4 |
| ; CHECK-SD-NEXT: mov w8, v0.s[1] |
| ; CHECK-SD-NEXT: fmov w9, s0 |
| ; CHECK-SD-NEXT: strh w9, [sp, #12] |
| ; CHECK-SD-NEXT: strh w8, [sp, #14] |
| ; CHECK-SD-NEXT: ldr w0, [sp, #12] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v2i16: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-GI-NEXT: mov s2, v0.s[1] |
| ; CHECK-GI-NEXT: mov s3, v1.s[1] |
| ; CHECK-GI-NEXT: adrp x8, .LCPI17_0 |
| ; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] |
| ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] |
| ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] |
| ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0] |
| ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b |
| ; CHECK-GI-NEXT: fmov w0, s0 |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 1, i32 2> |
| %d = bitcast <2 x i16> %c to i32 |
| ret i32 %d |
| } |
| |
| define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){ |
| ; CHECK-SD-LABEL: shufflevector_v16i16: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI18_0 |
| ; CHECK-SD-NEXT: adrp x9, .LCPI18_1 |
| ; CHECK-SD-NEXT: mov v1.16b, v0.16b |
| ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] |
| ; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1] |
| ; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b |
| ; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v16i16: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: mov v3.16b, v0.16b |
| ; CHECK-GI-NEXT: adrp x8, .LCPI18_1 |
| ; CHECK-GI-NEXT: adrp x9, .LCPI18_0 |
| ; CHECK-GI-NEXT: mov v4.16b, v2.16b |
| ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1] |
| ; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0] |
| ; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b |
| ; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16> |
| ret <16 x i16> %c |
| } |
| |
| define <1 x i32> @shufflevector_v1i32(<1 x i32> %a, <1 x i32> %b) { |
| ; CHECK-LABEL: shufflevector_v1i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: fmov d0, d1 |
| ; CHECK-NEXT: ret |
| %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 1> |
| ret <1 x i32> %c |
| } |
| |
| define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { |
| ; CHECK-SD-LABEL: shufflevector_v8i32: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s |
| ; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v1.4s |
| ; CHECK-SD-NEXT: mov v2.s[3], v3.s[3] |
| ; CHECK-SD-NEXT: mov v1.16b, v2.16b |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v8i32: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: adrp x8, .LCPI20_0 |
| ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 |
| ; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v1.4s |
| ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI20_0] |
| ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 |
| ; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v4.16b |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15> |
| ret <8 x i32> %c |
| } |
| |
| define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { |
| ; CHECK-SD-LABEL: shufflevector_v4i64: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d |
| ; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d |
| ; CHECK-SD-NEXT: mov v1.16b, v2.16b |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v4i64: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d |
| ; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ret <4 x i64> %c |
| } |
| |
| ; ===== Smaller/Larger Width Vectors with Zero Masks ===== |
| |
| define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){ |
| ; CHECK-LABEL: shufflevector_v2i1_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: dup v0.2s, v0.s[0] |
| ; CHECK-NEXT: ret |
| %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 0> |
| ret <2 x i1> %c |
| } |
| |
| define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){ |
| ; CHECK-SD-LABEL: shufflevector_v4i8_zeroes: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-SD-NEXT: dup v0.4h, v0.h[0] |
| ; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v4i8_zeroes: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: fmov w8, s0 |
| ; CHECK-GI-NEXT: dup v0.8b, w8 |
| ; CHECK-GI-NEXT: fmov w0, s0 |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> |
| %d = bitcast <4 x i8> %c to i32 |
| ret i32 %d |
| } |
| |
| define <32 x i8> @shufflevector_v32i8_zeroes(<32 x i8> %a, <32 x i8> %b){ |
| ; CHECK-LABEL: shufflevector_v32i8_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v0.16b, v0.b[0] |
| ; CHECK-NEXT: mov v1.16b, v0.16b |
| ; CHECK-NEXT: ret |
| %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> |
| ret <32 x i8> %c |
| } |
| |
| define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){ |
| ; CHECK-SD-LABEL: shufflevector_v2i16_zeroes: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-SD-NEXT: dup v1.2s, v0.s[0] |
| ; CHECK-SD-NEXT: fmov w9, s0 |
| ; CHECK-SD-NEXT: strh w9, [sp, #12] |
| ; CHECK-SD-NEXT: mov w8, v1.s[1] |
| ; CHECK-SD-NEXT: strh w8, [sp, #14] |
| ; CHECK-SD-NEXT: ldr w0, [sp, #12] |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v2i16_zeroes: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: fmov w8, s0 |
| ; CHECK-GI-NEXT: dup v0.4h, w8 |
| ; CHECK-GI-NEXT: fmov w0, s0 |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 0, i32 0> |
| %d = bitcast <2 x i16> %c to i32 |
| ret i32 %d |
| } |
| |
| define <16 x i16> @shufflevector_v16i16_zeroes(<16 x i16> %a, <16 x i16> %b){ |
| ; CHECK-LABEL: shufflevector_v16i16_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v0.8h, v0.h[0] |
| ; CHECK-NEXT: mov v1.16b, v0.16b |
| ; CHECK-NEXT: ret |
| %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> |
| ret <16 x i16> %c |
| } |
| |
| define <1 x i32> @shufflevector_v1i32_zeroes(<1 x i32> %a, <1 x i32> %b) { |
| ; CHECK-LABEL: shufflevector_v1i32_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ret |
| %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 0> |
| ret <1 x i32> %c |
| } |
| |
| define <8 x i32> @shufflevector_v8i32_zeroes(<8 x i32> %a, <8 x i32> %b) { |
| ; CHECK-LABEL: shufflevector_v8i32_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v0.4s, v0.s[0] |
| ; CHECK-NEXT: mov v1.16b, v0.16b |
| ; CHECK-NEXT: ret |
| %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> |
| ret <8 x i32> %c |
| } |
| |
| define <4 x i64> @shufflevector_v4i64_zeroes(<4 x i64> %a, <4 x i64> %b) { |
| ; CHECK-LABEL: shufflevector_v4i64_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v0.2d, v0.d[0] |
| ; CHECK-NEXT: mov v1.16b, v0.16b |
| ; CHECK-NEXT: ret |
| %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> |
| ret <4 x i64> %c |
| } |
| |
| ; ===== Vectors with Non-Pow 2 Widths ===== |
| |
| define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) { |
| ; CHECK-SD-LABEL: shufflevector_v3i8: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: mov w0, w1 |
| ; CHECK-SD-NEXT: mov w1, w2 |
| ; CHECK-SD-NEXT: mov w2, w4 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v3i8: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: fmov s0, w0 |
| ; CHECK-GI-NEXT: fmov s1, w1 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI30_0 |
| ; CHECK-GI-NEXT: fmov s2, w3 |
| ; CHECK-GI-NEXT: fmov s3, w4 |
| ; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] |
| ; CHECK-GI-NEXT: fmov s1, w2 |
| ; CHECK-GI-NEXT: mov v2.b[1], v3.b[0] |
| ; CHECK-GI-NEXT: fmov s3, w5 |
| ; CHECK-GI-NEXT: mov v0.b[2], v1.b[0] |
| ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI30_0] |
| ; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] |
| ; CHECK-GI-NEXT: mov v0.d[1], v2.d[0] |
| ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b |
| ; CHECK-GI-NEXT: mov b1, v0.b[1] |
| ; CHECK-GI-NEXT: mov b2, v0.b[2] |
| ; CHECK-GI-NEXT: fmov w0, s0 |
| ; CHECK-GI-NEXT: fmov w1, s1 |
| ; CHECK-GI-NEXT: fmov w2, s2 |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> <i32 1, i32 2, i32 4> |
| ret <3 x i8> %c |
| } |
| |
| define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) { |
| ; CHECK-SD-LABEL: shufflevector_v7i8: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-SD-NEXT: adrp x8, .LCPI31_0 |
| ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] |
| ; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI31_0] |
| ; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v7i8: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI31_0 |
| ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] |
| ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI31_0] |
| ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b |
| ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12> |
| ret <7 x i8> %c |
| } |
| |
| define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) { |
| ; CHECK-SD-LABEL: shufflevector_v3i16: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: zip1 v1.4h, v0.4h, v1.4h |
| ; CHECK-SD-NEXT: zip2 v0.4h, v1.4h, v0.4h |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v3i16: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-GI-NEXT: adrp x8, .LCPI32_0 |
| ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] |
| ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI32_0] |
| ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b |
| ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 1, i32 2, i32 4> |
| ret <3 x i16> %c |
| } |
| |
| define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) { |
| ; CHECK-SD-LABEL: shufflevector_v7i16: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: adrp x8, .LCPI33_0 |
| ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] |
| ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v7i16: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: adrp x8, .LCPI33_0 |
| ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] |
| ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12> |
| ret <7 x i16> %c |
| } |
| |
| define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) { |
| ; CHECK-SD-LABEL: shufflevector_v3i32: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: zip1 v1.4s, v0.4s, v1.4s |
| ; CHECK-SD-NEXT: zip2 v0.4s, v1.4s, v0.4s |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v3i32: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: adrp x8, .LCPI34_0 |
| ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_0] |
| ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 1, i32 2, i32 4> |
| ret <3 x i32> %c |
| } |
| |
| ; ===== Vectors with Non-Pow 2 Widths with Zero Masks ===== |
| |
| define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) { |
| ; CHECK-SD-LABEL: shufflevector_v3i8_zeroes: |
| ; CHECK-SD: // %bb.0: |
| ; CHECK-SD-NEXT: mov w1, w0 |
| ; CHECK-SD-NEXT: mov w2, w0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: shufflevector_v3i8_zeroes: |
| ; CHECK-GI: // %bb.0: |
| ; CHECK-GI-NEXT: dup v0.8b, w0 |
| ; CHECK-GI-NEXT: mov b1, v0.b[1] |
| ; CHECK-GI-NEXT: mov b2, v0.b[2] |
| ; CHECK-GI-NEXT: fmov w0, s0 |
| ; CHECK-GI-NEXT: fmov w1, s1 |
| ; CHECK-GI-NEXT: fmov w2, s2 |
| ; CHECK-GI-NEXT: ret |
| %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> <i32 0, i32 0, i32 0> |
| ret <3 x i8> %c |
| } |
| |
| define <7 x i8> @shufflevector_v7i8_zeroes(<7 x i8> %a, <7 x i8> %b) { |
| ; CHECK-LABEL: shufflevector_v7i8_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: dup v0.8b, v0.b[0] |
| ; CHECK-NEXT: ret |
| %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> |
| ret <7 x i8> %c |
| } |
| |
| define <3 x i16> @shufflevector_v3i16_zeroes(<3 x i16> %a, <3 x i16> %b) { |
| ; CHECK-LABEL: shufflevector_v3i16_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: dup v0.4h, v0.h[0] |
| ; CHECK-NEXT: ret |
| %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 0, i32 0, i32 0> |
| ret <3 x i16> %c |
| } |
| |
| define <7 x i16> @shufflevector_v7i16_zeroes(<7 x i16> %a, <7 x i16> %b) { |
| ; CHECK-LABEL: shufflevector_v7i16_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v0.8h, v0.h[0] |
| ; CHECK-NEXT: ret |
| %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> |
| ret <7 x i16> %c |
| } |
| |
| define <3 x i32> @shufflevector_v3i32_zeroes(<3 x i32> %a, <3 x i32> %b) { |
| ; CHECK-LABEL: shufflevector_v3i32_zeroes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: dup v0.4s, v0.s[0] |
| ; CHECK-NEXT: ret |
| %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 0, i32 0, i32 0> |
| ret <3 x i32> %c |
| } |