| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s |
| |
| define i16 @test_add_zext_v8i16(<8 x i8> %a, <8 x i8> %b) local_unnamed_addr #0 { |
| ; CHECK-LABEL: test_add_zext_v8i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: mov v0.d[1], v1.d[0] |
| ; CHECK-NEXT: uaddlv h0, v0.16b |
| ; CHECK-NEXT: umov w0, v0.h[0] |
| ; CHECK-NEXT: ret |
| %z1 = zext <8 x i8> %a to <8 x i16> |
| %z2 = zext <8 x i8> %b to <8 x i16> |
| %z = add <8 x i16> %z1, %z2 |
| %r = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %z) |
| ret i16 %r |
| } |
| |
| define i32 @test_add_zext_v4i32(<4 x i16> %a, <4 x i16> %b) local_unnamed_addr #0 { |
| ; CHECK-LABEL: test_add_zext_v4i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: mov v0.d[1], v1.d[0] |
| ; CHECK-NEXT: uaddlv s0, v0.8h |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| %z1 = zext <4 x i16> %a to <4 x i32> |
| %z2 = zext <4 x i16> %b to <4 x i32> |
| %z = add <4 x i32> %z1, %z2 |
| %r = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %z) |
| ret i32 %r |
| } |
| |
| define i64 @test_add_zext_v2i64(<2 x i32> %a, <2 x i32> %b) local_unnamed_addr #0 { |
| ; CHECK-LABEL: test_add_zext_v2i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: mov v0.d[1], v1.d[0] |
| ; CHECK-NEXT: uaddlv d0, v0.4s |
| ; CHECK-NEXT: fmov x0, d0 |
| ; CHECK-NEXT: ret |
| %z1 = zext <2 x i32> %a to <2 x i64> |
| %z2 = zext <2 x i32> %b to <2 x i64> |
| %z = add <2 x i64> %z1, %z2 |
| %r = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %z) |
| ret i64 %r |
| } |
| |
| declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) |
| declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) |
| declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) |