blob: b2b909166a0c65968b92f2da07c6f33d2e275be7 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mcpu=sm_100 | FileCheck %s --check-prefix=F32X2
; RUN: llc < %s -mcpu=sm_90 | FileCheck %s --check-prefix=NOF32X2
; RUN: llc < %s -mcpu=sm_100 -nvptx-no-f32x2 | FileCheck %s --check-prefix=NOF32X2
target triple = "nvptx64-nvidia-cuda"
define <2 x float> @test(<2 x float> %a, <2 x float> %b) {
; F32X2-LABEL: test(
; F32X2: {
; F32X2-NEXT: .reg .b64 %rd<4>;
; F32X2-EMPTY:
; F32X2-NEXT: // %bb.0:
; F32X2-NEXT: ld.param.b64 %rd1, [test_param_0];
; F32X2-NEXT: ld.param.b64 %rd2, [test_param_1];
; F32X2-NEXT: add.rn.f32x2 %rd3, %rd1, %rd2;
; F32X2-NEXT: st.param.b64 [func_retval0], %rd3;
; F32X2-NEXT: ret;
;
; NOF32X2-LABEL: test(
; NOF32X2: {
; NOF32X2-NEXT: .reg .b32 %r<7>;
; NOF32X2-EMPTY:
; NOF32X2-NEXT: // %bb.0:
; NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_param_0];
; NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_param_1];
; NOF32X2-NEXT: add.rn.f32 %r5, %r2, %r4;
; NOF32X2-NEXT: add.rn.f32 %r6, %r1, %r3;
; NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5};
; NOF32X2-NEXT: ret;
%c = fadd <2 x float> %a, %b
ret <2 x float> %c
}