| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; ## Full FP32x2 support enabled by default. |
| ; RUN: llc < %s -mcpu=sm_80 -O0 -disable-post-ra -frame-pointer=all \ |
| ; RUN: -verify-machineinstrs | FileCheck --check-prefixes=CHECK,CHECK-NOF32X2 %s |
| ; RUN: %if ptxas-12.7 %{ \ |
| ; RUN: llc < %s -mcpu=sm_80 -O0 -disable-post-ra -frame-pointer=all \ |
| ; RUN: -verify-machineinstrs | %ptxas-verify -arch=sm_80 \ |
| ; RUN: %} |
| ; RUN: llc < %s -mcpu=sm_100 -O0 -disable-post-ra -frame-pointer=all \ |
| ; RUN: -verify-machineinstrs | FileCheck --check-prefixes=CHECK,CHECK-F32X2 %s |
| ; RUN: %if ptxas-12.7 %{ \ |
| ; RUN: llc < %s -mcpu=sm_100 -O0 -disable-post-ra -frame-pointer=all \ |
| ; RUN: -verify-machineinstrs | %ptxas-verify -arch=sm_100 \ |
| ; RUN: %} |
| |
| target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" |
| target triple = "nvptx64-nvidia-cuda" |
| |
| define <2 x float> @test_ret_const() #0 { |
| ; CHECK-LABEL: test_ret_const( |
| ; CHECK: { |
| ; CHECK-EMPTY: |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0f3F800000, 0f40000000}; |
| ; CHECK-NEXT: ret; |
| ret <2 x float> <float 1.0, float 2.0> |
| } |
| |
| define float @test_extract_0(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_extract_0( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_0_param_0]; |
| ; CHECK-NOF32X2-NEXT: st.param.b32 [func_retval0], %r1; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_extract_0( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<2>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_extract_0_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, _}, %rd1; |
| ; CHECK-F32X2-NEXT: st.param.b32 [func_retval0], %r1; |
| ; CHECK-F32X2-NEXT: ret; |
| %e = extractelement <2 x float> %a, i32 0 |
| ret float %e |
| } |
| |
| define float @test_extract_1(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_extract_1( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_1_param_0]; |
| ; CHECK-NOF32X2-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_extract_1( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<2>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_extract_1_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {_, %r1}, %rd1; |
| ; CHECK-F32X2-NEXT: st.param.b32 [func_retval0], %r1; |
| ; CHECK-F32X2-NEXT: ret; |
| %e = extractelement <2 x float> %a, i32 1 |
| ret float %e |
| } |
| |
| define float @test_extract_i(<2 x float> %a, i64 %idx) #0 { |
| ; CHECK-NOF32X2-LABEL: test_extract_i( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .local .align 8 .b8 __local_depot3[8]; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %SP; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %SPL; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<4>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: mov.b64 %SPL, __local_depot3; |
| ; CHECK-NOF32X2-NEXT: cvta.local.u64 %SP, %SPL; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_i_param_0]; |
| ; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1]; |
| ; CHECK-NOF32X2-NEXT: st.v2.b32 [%SP], {%r1, %r2}; |
| ; CHECK-NOF32X2-NEXT: and.b64 %rd2, %rd1, 1; |
| ; CHECK-NOF32X2-NEXT: shl.b64 %rd3, %rd2, 2; |
| ; CHECK-NOF32X2-NEXT: add.u64 %rd4, %SP, 0; |
| ; CHECK-NOF32X2-NEXT: or.b64 %rd5, %rd4, %rd3; |
| ; CHECK-NOF32X2-NEXT: ld.b32 %r3, [%rd5]; |
| ; CHECK-NOF32X2-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_extract_i( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<2>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<4>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_extract_i_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_extract_i_param_0]; |
| ; CHECK-F32X2-NEXT: setp.eq.b64 %p1, %rd2, 0; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: selp.f32 %r3, %r1, %r2, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-F32X2-NEXT: ret; |
| %e = extractelement <2 x float> %a, i64 %idx |
| ret float %e |
| } |
| |
| define <2 x float> @test_fadd(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fadd( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fadd_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_param_0]; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r6, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fadd( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fadd_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fadd_param_0]; |
| ; CHECK-F32X2-NEXT: add.rn.f32x2 %rd3, %rd1, %rd2; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fadd <2 x float> %a, %b |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fadd_imm_0(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fadd_imm_0( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_imm_0_param_0]; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r3, %r2, 0f40000000; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r4, %r1, 0f3F800000; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fadd_imm_0( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fadd_imm_0_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40000000; |
| ; CHECK-F32X2-NEXT: mov.b32 %r2, 0f3F800000; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd2, {%r2, %r1}; |
| ; CHECK-F32X2-NEXT: add.rn.f32x2 %rd3, %rd1, %rd2; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fadd <2 x float> <float 1.0, float 2.0>, %a |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fadd_imm_1(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fadd_imm_1( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_imm_1_param_0]; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r3, %r2, 0f40000000; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r4, %r1, 0f3F800000; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fadd_imm_1( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fadd_imm_1_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40000000; |
| ; CHECK-F32X2-NEXT: mov.b32 %r2, 0f3F800000; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd2, {%r2, %r1}; |
| ; CHECK-F32X2-NEXT: add.rn.f32x2 %rd3, %rd1, %rd2; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fadd <2 x float> %a, <float 1.0, float 2.0> |
| ret <2 x float> %r |
| } |
| |
| define <4 x float> @test_fadd_v4(<4 x float> %a, <4 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fadd_v4( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [test_fadd_v4_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_v4_param_0]; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r9, %r4, %r8; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r10, %r3, %r7; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r11, %r2, %r6; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r12, %r1, %r5; |
| ; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r10, %r9}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fadd_v4( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_fadd_v4_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_v4_param_0]; |
| ; CHECK-F32X2-NEXT: add.rn.f32x2 %rd5, %rd2, %rd4; |
| ; CHECK-F32X2-NEXT: add.rn.f32x2 %rd6, %rd1, %rd3; |
| ; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd5}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fadd <4 x float> %a, %b |
| ret <4 x float> %r |
| } |
| |
| define <4 x float> @test_fadd_imm_0_v4(<4 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fadd_imm_0_v4( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_imm_0_v4_param_0]; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r5, %r4, 0f40800000; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r6, %r3, 0f40400000; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r7, %r2, 0f40000000; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r8, %r1, 0f3F800000; |
| ; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fadd_imm_0_v4( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_imm_0_v4_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40800000; |
| ; CHECK-F32X2-NEXT: mov.b32 %r2, 0f40400000; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd3, {%r2, %r1}; |
| ; CHECK-F32X2-NEXT: add.rn.f32x2 %rd4, %rd2, %rd3; |
| ; CHECK-F32X2-NEXT: mov.b32 %r3, 0f40000000; |
| ; CHECK-F32X2-NEXT: mov.b32 %r4, 0f3F800000; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd5, {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: add.rn.f32x2 %rd6, %rd1, %rd5; |
| ; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd4}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fadd <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %a |
| ret <4 x float> %r |
| } |
| |
| define <4 x float> @test_fadd_imm_1_v4(<4 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fadd_imm_1_v4( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_imm_1_v4_param_0]; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r5, %r4, 0f40800000; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r6, %r3, 0f40400000; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r7, %r2, 0f40000000; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r8, %r1, 0f3F800000; |
| ; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fadd_imm_1_v4( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_imm_1_v4_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40800000; |
| ; CHECK-F32X2-NEXT: mov.b32 %r2, 0f40400000; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd3, {%r2, %r1}; |
| ; CHECK-F32X2-NEXT: add.rn.f32x2 %rd4, %rd2, %rd3; |
| ; CHECK-F32X2-NEXT: mov.b32 %r3, 0f40000000; |
| ; CHECK-F32X2-NEXT: mov.b32 %r4, 0f3F800000; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd5, {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: add.rn.f32x2 %rd6, %rd1, %rd5; |
| ; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd4}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fadd <4 x float> %a, <float 1.0, float 2.0, float 3.0, float 4.0> |
| ret <4 x float> %r |
| } |
| |
| define <2 x float> @test_fsub(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fsub( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fsub_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fsub_param_0]; |
| ; CHECK-NOF32X2-NEXT: sub.rn.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: sub.rn.f32 %r6, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fsub( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fsub_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fsub_param_0]; |
| ; CHECK-F32X2-NEXT: sub.rn.f32x2 %rd3, %rd1, %rd2; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fsub <2 x float> %a, %b |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fneg(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fneg( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fneg_param_0]; |
| ; CHECK-NOF32X2-NEXT: neg.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: neg.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fneg( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fneg_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: neg.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: neg.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fneg <2 x float> %a |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fmul(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fmul( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fmul_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fmul_param_0]; |
| ; CHECK-NOF32X2-NEXT: mul.rn.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: mul.rn.f32 %r6, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fmul( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fmul_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fmul_param_0]; |
| ; CHECK-F32X2-NEXT: mul.rn.f32x2 %rd3, %rd1, %rd2; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fmul <2 x float> %a, %b |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fdiv(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fdiv( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fdiv_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fdiv_param_0]; |
| ; CHECK-NOF32X2-NEXT: div.rn.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: div.rn.f32 %r6, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fdiv( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fdiv_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fdiv_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: div.rn.f32 %r5, %r4, %r2; |
| ; CHECK-F32X2-NEXT: div.rn.f32 %r6, %r3, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fdiv <2 x float> %a, %b |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_frem(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_frem( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<15>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_frem_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_frem_param_0]; |
| ; CHECK-NOF32X2-NEXT: div.rn.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.f32.f32 %r6, %r5; |
| ; CHECK-NOF32X2-NEXT: neg.f32 %r7, %r6; |
| ; CHECK-NOF32X2-NEXT: fma.rn.f32 %r8, %r7, %r4, %r2; |
| ; CHECK-NOF32X2-NEXT: testp.infinite.f32 %p1, %r4; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r9, %r2, %r8, %p1; |
| ; CHECK-NOF32X2-NEXT: div.rn.f32 %r10, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.f32.f32 %r11, %r10; |
| ; CHECK-NOF32X2-NEXT: neg.f32 %r12, %r11; |
| ; CHECK-NOF32X2-NEXT: fma.rn.f32 %r13, %r12, %r3, %r1; |
| ; CHECK-NOF32X2-NEXT: testp.infinite.f32 %p2, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r14, %r1, %r13, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r9}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_frem( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<15>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_frem_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_frem_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: div.rn.f32 %r5, %r4, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rzi.f32.f32 %r6, %r5; |
| ; CHECK-F32X2-NEXT: neg.f32 %r7, %r6; |
| ; CHECK-F32X2-NEXT: fma.rn.f32 %r8, %r7, %r2, %r4; |
| ; CHECK-F32X2-NEXT: testp.infinite.f32 %p1, %r2; |
| ; CHECK-F32X2-NEXT: selp.f32 %r9, %r4, %r8, %p1; |
| ; CHECK-F32X2-NEXT: div.rn.f32 %r10, %r3, %r1; |
| ; CHECK-F32X2-NEXT: cvt.rzi.f32.f32 %r11, %r10; |
| ; CHECK-F32X2-NEXT: neg.f32 %r12, %r11; |
| ; CHECK-F32X2-NEXT: fma.rn.f32 %r13, %r12, %r1, %r3; |
| ; CHECK-F32X2-NEXT: testp.infinite.f32 %p2, %r1; |
| ; CHECK-F32X2-NEXT: selp.f32 %r14, %r3, %r13, %p2; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r9}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = frem <2 x float> %a, %b |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fadd_ftz(<2 x float> %a, <2 x float> %b) #2 { |
| ; CHECK-NOF32X2-LABEL: test_fadd_ftz( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fadd_ftz_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_ftz_param_0]; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r6, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fadd_ftz( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fadd_ftz_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fadd_ftz_param_0]; |
| ; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd3, %rd1, %rd2; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fadd <2 x float> %a, %b |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fadd_imm_0_ftz(<2 x float> %a) #2 { |
| ; CHECK-NOF32X2-LABEL: test_fadd_imm_0_ftz( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_imm_0_ftz_param_0]; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r3, %r2, 0f40000000; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r4, %r1, 0f3F800000; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fadd_imm_0_ftz( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fadd_imm_0_ftz_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40000000; |
| ; CHECK-F32X2-NEXT: mov.b32 %r2, 0f3F800000; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd2, {%r2, %r1}; |
| ; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd3, %rd1, %rd2; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fadd <2 x float> <float 1.0, float 2.0>, %a |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fadd_imm_1_ftz(<2 x float> %a) #2 { |
| ; CHECK-NOF32X2-LABEL: test_fadd_imm_1_ftz( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_imm_1_ftz_param_0]; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r3, %r2, 0f40000000; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r4, %r1, 0f3F800000; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fadd_imm_1_ftz( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fadd_imm_1_ftz_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40000000; |
| ; CHECK-F32X2-NEXT: mov.b32 %r2, 0f3F800000; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd2, {%r2, %r1}; |
| ; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd3, %rd1, %rd2; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fadd <2 x float> %a, <float 1.0, float 2.0> |
| ret <2 x float> %r |
| } |
| |
| define <4 x float> @test_fadd_v4_ftz(<4 x float> %a, <4 x float> %b) #2 { |
| ; CHECK-NOF32X2-LABEL: test_fadd_v4_ftz( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [test_fadd_v4_ftz_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_v4_ftz_param_0]; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r9, %r4, %r8; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r10, %r3, %r7; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r11, %r2, %r6; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r12, %r1, %r5; |
| ; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r10, %r9}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fadd_v4_ftz( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_fadd_v4_ftz_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_v4_ftz_param_0]; |
| ; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd5, %rd2, %rd4; |
| ; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd6, %rd1, %rd3; |
| ; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd5}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fadd <4 x float> %a, %b |
| ret <4 x float> %r |
| } |
| |
| define <4 x float> @test_fadd_imm_0_v4_ftz(<4 x float> %a) #2 { |
| ; CHECK-NOF32X2-LABEL: test_fadd_imm_0_v4_ftz( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_imm_0_v4_ftz_param_0]; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r5, %r4, 0f40800000; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r6, %r3, 0f40400000; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r7, %r2, 0f40000000; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r8, %r1, 0f3F800000; |
| ; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fadd_imm_0_v4_ftz( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_imm_0_v4_ftz_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40800000; |
| ; CHECK-F32X2-NEXT: mov.b32 %r2, 0f40400000; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd3, {%r2, %r1}; |
| ; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd4, %rd2, %rd3; |
| ; CHECK-F32X2-NEXT: mov.b32 %r3, 0f40000000; |
| ; CHECK-F32X2-NEXT: mov.b32 %r4, 0f3F800000; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd5, {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd6, %rd1, %rd5; |
| ; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd4}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fadd <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %a |
| ret <4 x float> %r |
| } |
| |
| define <4 x float> @test_fadd_imm_1_v4_ftz(<4 x float> %a) #2 { |
| ; CHECK-NOF32X2-LABEL: test_fadd_imm_1_v4_ftz( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_imm_1_v4_ftz_param_0]; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r5, %r4, 0f40800000; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r6, %r3, 0f40400000; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r7, %r2, 0f40000000; |
| ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r8, %r1, 0f3F800000; |
| ; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fadd_imm_1_v4_ftz( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_imm_1_v4_ftz_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b32 %r1, 0f40800000; |
| ; CHECK-F32X2-NEXT: mov.b32 %r2, 0f40400000; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd3, {%r2, %r1}; |
| ; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd4, %rd2, %rd3; |
| ; CHECK-F32X2-NEXT: mov.b32 %r3, 0f40000000; |
| ; CHECK-F32X2-NEXT: mov.b32 %r4, 0f3F800000; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd5, {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: add.rn.ftz.f32x2 %rd6, %rd1, %rd5; |
| ; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd4}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fadd <4 x float> %a, <float 1.0, float 2.0, float 3.0, float 4.0> |
| ret <4 x float> %r |
| } |
| |
| define <2 x float> @test_fsub_ftz(<2 x float> %a, <2 x float> %b) #2 { |
| ; CHECK-NOF32X2-LABEL: test_fsub_ftz( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fsub_ftz_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fsub_ftz_param_0]; |
| ; CHECK-NOF32X2-NEXT: sub.rn.ftz.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: sub.rn.ftz.f32 %r6, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fsub_ftz( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fsub_ftz_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fsub_ftz_param_0]; |
| ; CHECK-F32X2-NEXT: sub.rn.ftz.f32x2 %rd3, %rd1, %rd2; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fsub <2 x float> %a, %b |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fneg_ftz(<2 x float> %a) #2 { |
| ; CHECK-NOF32X2-LABEL: test_fneg_ftz( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fneg_ftz_param_0]; |
| ; CHECK-NOF32X2-NEXT: neg.ftz.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: neg.ftz.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fneg_ftz( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fneg_ftz_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: neg.ftz.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: neg.ftz.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fneg <2 x float> %a |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fmul_ftz(<2 x float> %a, <2 x float> %b) #2 { |
| ; CHECK-NOF32X2-LABEL: test_fmul_ftz( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fmul_ftz_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fmul_ftz_param_0]; |
| ; CHECK-NOF32X2-NEXT: mul.rn.ftz.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: mul.rn.ftz.f32 %r6, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fmul_ftz( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fmul_ftz_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fmul_ftz_param_0]; |
| ; CHECK-F32X2-NEXT: mul.rn.ftz.f32x2 %rd3, %rd1, %rd2; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fmul <2 x float> %a, %b |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fma_ftz(<2 x float> %a, <2 x float> %b, <2 x float> %c) #2 { |
| ; CHECK-NOF32X2-LABEL: test_fma_ftz( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_fma_ftz_param_2]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fma_ftz_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fma_ftz_param_0]; |
| ; CHECK-NOF32X2-NEXT: fma.rn.ftz.f32 %r7, %r2, %r4, %r6; |
| ; CHECK-NOF32X2-NEXT: fma.rn.ftz.f32 %r8, %r1, %r3, %r5; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fma_ftz( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd3, [test_fma_ftz_param_2]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fma_ftz_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fma_ftz_param_0]; |
| ; CHECK-F32X2-NEXT: fma.rn.ftz.f32x2 %rd4, %rd1, %rd2, %rd3; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd4; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.fma(<2 x float> %a, <2 x float> %b, <2 x float> %c) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fdiv_ftz(<2 x float> %a, <2 x float> %b) #2 { |
| ; CHECK-NOF32X2-LABEL: test_fdiv_ftz( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fdiv_ftz_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fdiv_ftz_param_0]; |
| ; CHECK-NOF32X2-NEXT: div.rn.ftz.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: div.rn.ftz.f32 %r6, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fdiv_ftz( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fdiv_ftz_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fdiv_ftz_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: div.rn.ftz.f32 %r5, %r4, %r2; |
| ; CHECK-F32X2-NEXT: div.rn.ftz.f32 %r6, %r3, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fdiv <2 x float> %a, %b |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_frem_ftz(<2 x float> %a, <2 x float> %b) #2 { |
| ; CHECK-NOF32X2-LABEL: test_frem_ftz( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<15>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_frem_ftz_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_frem_ftz_param_0]; |
| ; CHECK-NOF32X2-NEXT: div.rn.ftz.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.ftz.f32.f32 %r6, %r5; |
| ; CHECK-NOF32X2-NEXT: neg.ftz.f32 %r7, %r6; |
| ; CHECK-NOF32X2-NEXT: fma.rn.ftz.f32 %r8, %r7, %r4, %r2; |
| ; CHECK-NOF32X2-NEXT: testp.infinite.f32 %p1, %r4; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r9, %r2, %r8, %p1; |
| ; CHECK-NOF32X2-NEXT: div.rn.ftz.f32 %r10, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.ftz.f32.f32 %r11, %r10; |
| ; CHECK-NOF32X2-NEXT: neg.ftz.f32 %r12, %r11; |
| ; CHECK-NOF32X2-NEXT: fma.rn.ftz.f32 %r13, %r12, %r3, %r1; |
| ; CHECK-NOF32X2-NEXT: testp.infinite.f32 %p2, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r14, %r1, %r13, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r9}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_frem_ftz( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<15>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_frem_ftz_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_frem_ftz_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: div.rn.ftz.f32 %r5, %r4, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rzi.ftz.f32.f32 %r6, %r5; |
| ; CHECK-F32X2-NEXT: neg.ftz.f32 %r7, %r6; |
| ; CHECK-F32X2-NEXT: fma.rn.ftz.f32 %r8, %r7, %r2, %r4; |
| ; CHECK-F32X2-NEXT: testp.infinite.f32 %p1, %r2; |
| ; CHECK-F32X2-NEXT: selp.f32 %r9, %r4, %r8, %p1; |
| ; CHECK-F32X2-NEXT: div.rn.ftz.f32 %r10, %r3, %r1; |
| ; CHECK-F32X2-NEXT: cvt.rzi.ftz.f32.f32 %r11, %r10; |
| ; CHECK-F32X2-NEXT: neg.ftz.f32 %r12, %r11; |
| ; CHECK-F32X2-NEXT: fma.rn.ftz.f32 %r13, %r12, %r1, %r3; |
| ; CHECK-F32X2-NEXT: testp.infinite.f32 %p2, %r1; |
| ; CHECK-F32X2-NEXT: selp.f32 %r14, %r3, %r13, %p2; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r9}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = frem <2 x float> %a, %b |
| ret <2 x float> %r |
| } |
| |
| define void @test_ldst_v2f32(ptr %a, ptr %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_ldst_v2f32( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.b64 %rd2, [test_ldst_v2f32_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_ldst_v2f32_param_0]; |
| ; CHECK-NOF32X2-NEXT: ld.v2.b32 {%r1, %r2}, [%rd1]; |
| ; CHECK-NOF32X2-NEXT: st.v2.b32 [%rd2], {%r1, %r2}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_ldst_v2f32( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_ldst_v2f32_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_ldst_v2f32_param_0]; |
| ; CHECK-F32X2-NEXT: ld.b64 %rd3, [%rd1]; |
| ; CHECK-F32X2-NEXT: st.b64 [%rd2], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %t1 = load <2 x float>, ptr %a |
| store <2 x float> %t1, ptr %b, align 32 |
| ret void |
| } |
| |
| define void @test_ldst_v3f32(ptr %a, ptr %b) #0 { |
| ; CHECK-LABEL: test_ldst_v3f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<2>; |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd2, [test_ldst_v3f32_param_1]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_ldst_v3f32_param_0]; |
| ; CHECK-NEXT: ld.b64 %rd3, [%rd1]; |
| ; CHECK-NEXT: ld.b32 %r1, [%rd1+8]; |
| ; CHECK-NEXT: st.b32 [%rd2+8], %r1; |
| ; CHECK-NEXT: st.b64 [%rd2], %rd3; |
| ; CHECK-NEXT: ret; |
| %t1 = load <3 x float>, ptr %a |
| store <3 x float> %t1, ptr %b, align 32 |
| ret void |
| } |
| |
| define void @test_ldst_v4f32(ptr %a, ptr %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_ldst_v4f32( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.b64 %rd2, [test_ldst_v4f32_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_ldst_v4f32_param_0]; |
| ; CHECK-NOF32X2-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| ; CHECK-NOF32X2-NEXT: st.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_ldst_v4f32( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_ldst_v4f32_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_ldst_v4f32_param_0]; |
| ; CHECK-F32X2-NEXT: ld.v2.b64 {%rd3, %rd4}, [%rd1]; |
| ; CHECK-F32X2-NEXT: st.v2.b64 [%rd2], {%rd3, %rd4}; |
| ; CHECK-F32X2-NEXT: ret; |
| %t1 = load <4 x float>, ptr %a |
| store <4 x float> %t1, ptr %b, align 32 |
| ret void |
| } |
| |
| define void @test_ldst_v8f32(ptr %a, ptr %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_ldst_v8f32( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.b64 %rd2, [test_ldst_v8f32_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_ldst_v8f32_param_0]; |
| ; CHECK-NOF32X2-NEXT: ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; |
| ; CHECK-NOF32X2-NEXT: ld.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; |
| ; CHECK-NOF32X2-NEXT: st.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; |
| ; CHECK-NOF32X2-NEXT: st.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_ldst_v8f32( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_ldst_v8f32_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_ldst_v8f32_param_0]; |
| ; CHECK-F32X2-NEXT: ld.v2.b64 {%rd3, %rd4}, [%rd1]; |
| ; CHECK-F32X2-NEXT: ld.v2.b64 {%rd5, %rd6}, [%rd1+16]; |
| ; CHECK-F32X2-NEXT: st.v2.b64 [%rd2+16], {%rd5, %rd6}; |
| ; CHECK-F32X2-NEXT: st.v2.b64 [%rd2], {%rd3, %rd4}; |
| ; CHECK-F32X2-NEXT: ret; |
| %t1 = load <8 x float>, ptr %a |
| store <8 x float> %t1, ptr %b, align 32 |
| ret void |
| } |
| |
| declare <2 x float> @test_callee(<2 x float> %a, <2 x float> %b) #0 |
| |
| define <2 x float> @test_call(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_call( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_call_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_call_param_0]; |
| ; CHECK-NOF32X2-NEXT: { // callseq 0, 0 |
| ; CHECK-NOF32X2-NEXT: .param .align 8 .b8 param0[8]; |
| ; CHECK-NOF32X2-NEXT: .param .align 8 .b8 param1[8]; |
| ; CHECK-NOF32X2-NEXT: .param .align 8 .b8 retval0[8]; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [param1], {%r3, %r4}; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; |
| ; CHECK-NOF32X2-NEXT: call.uni (retval0), test_callee, (param0, param1); |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [retval0]; |
| ; CHECK-NOF32X2-NEXT: } // callseq 0 |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r5, %r6}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_call( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_call_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_call_param_0]; |
| ; CHECK-F32X2-NEXT: { // callseq 0, 0 |
| ; CHECK-F32X2-NEXT: .param .align 8 .b8 param0[8]; |
| ; CHECK-F32X2-NEXT: .param .align 8 .b8 param1[8]; |
| ; CHECK-F32X2-NEXT: .param .align 8 .b8 retval0[8]; |
| ; CHECK-F32X2-NEXT: st.param.b64 [param1], %rd2; |
| ; CHECK-F32X2-NEXT: st.param.b64 [param0], %rd1; |
| ; CHECK-F32X2-NEXT: call.uni (retval0), test_callee, (param0, param1); |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd3, [retval0]; |
| ; CHECK-F32X2-NEXT: } // callseq 0 |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @test_callee(<2 x float> %a, <2 x float> %b) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_call_flipped(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_call_flipped( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_call_flipped_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_call_flipped_param_0]; |
| ; CHECK-NOF32X2-NEXT: { // callseq 1, 0 |
| ; CHECK-NOF32X2-NEXT: .param .align 8 .b8 param0[8]; |
| ; CHECK-NOF32X2-NEXT: .param .align 8 .b8 param1[8]; |
| ; CHECK-NOF32X2-NEXT: .param .align 8 .b8 retval0[8]; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [param1], {%r1, %r2}; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [param0], {%r3, %r4}; |
| ; CHECK-NOF32X2-NEXT: call.uni (retval0), test_callee, (param0, param1); |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [retval0]; |
| ; CHECK-NOF32X2-NEXT: } // callseq 1 |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r5, %r6}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_call_flipped( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_call_flipped_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_call_flipped_param_0]; |
| ; CHECK-F32X2-NEXT: { // callseq 1, 0 |
| ; CHECK-F32X2-NEXT: .param .align 8 .b8 param0[8]; |
| ; CHECK-F32X2-NEXT: .param .align 8 .b8 param1[8]; |
| ; CHECK-F32X2-NEXT: .param .align 8 .b8 retval0[8]; |
| ; CHECK-F32X2-NEXT: st.param.b64 [param1], %rd1; |
| ; CHECK-F32X2-NEXT: st.param.b64 [param0], %rd2; |
| ; CHECK-F32X2-NEXT: call.uni (retval0), test_callee, (param0, param1); |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd3, [retval0]; |
| ; CHECK-F32X2-NEXT: } // callseq 1 |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @test_callee(<2 x float> %b, <2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_tailcall_flipped(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_tailcall_flipped( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_tailcall_flipped_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_tailcall_flipped_param_0]; |
| ; CHECK-NOF32X2-NEXT: { // callseq 2, 0 |
| ; CHECK-NOF32X2-NEXT: .param .align 8 .b8 param0[8]; |
| ; CHECK-NOF32X2-NEXT: .param .align 8 .b8 param1[8]; |
| ; CHECK-NOF32X2-NEXT: .param .align 8 .b8 retval0[8]; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [param1], {%r1, %r2}; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [param0], {%r3, %r4}; |
| ; CHECK-NOF32X2-NEXT: call.uni (retval0), test_callee, (param0, param1); |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [retval0]; |
| ; CHECK-NOF32X2-NEXT: } // callseq 2 |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r5, %r6}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_tailcall_flipped( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_tailcall_flipped_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_tailcall_flipped_param_0]; |
| ; CHECK-F32X2-NEXT: { // callseq 2, 0 |
| ; CHECK-F32X2-NEXT: .param .align 8 .b8 param0[8]; |
| ; CHECK-F32X2-NEXT: .param .align 8 .b8 param1[8]; |
| ; CHECK-F32X2-NEXT: .param .align 8 .b8 retval0[8]; |
| ; CHECK-F32X2-NEXT: st.param.b64 [param1], %rd1; |
| ; CHECK-F32X2-NEXT: st.param.b64 [param0], %rd2; |
| ; CHECK-F32X2-NEXT: call.uni (retval0), test_callee, (param0, param1); |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd3, [retval0]; |
| ; CHECK-F32X2-NEXT: } // callseq 2 |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = tail call <2 x float> @test_callee(<2 x float> %b, <2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_select(<2 x float> %a, <2 x float> %b, i1 zeroext %c) #0 { |
| ; CHECK-NOF32X2-LABEL: test_select( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<2>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.b8 %rs1, [test_select_param_2]; |
| ; CHECK-NOF32X2-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NOF32X2-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_param_0]; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r5, %r2, %r4, %p1; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r6, %r1, %r3, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_select( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<2>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b8 %rs1, [test_select_param_2]; |
| ; CHECK-F32X2-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-F32X2-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_select_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_select_param_0]; |
| ; CHECK-F32X2-NEXT: selp.b64 %rd3, %rd1, %rd2, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = select i1 %c, <2 x float> %a, <2 x float> %b |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_select_cc(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) #0 { |
| ; CHECK-NOF32X2-LABEL: test_select_cc( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<11>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r7, %r8}, [test_select_cc_param_3]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_param_2]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.neu.f32 %p1, %r5, %r7; |
| ; CHECK-NOF32X2-NEXT: setp.neu.f32 %p2, %r6, %r8; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r9, %r2, %r4, %p2; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r10, %r1, %r3, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r10, %r9}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_select_cc( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<11>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd4, [test_select_cc_param_3]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd3, [test_select_cc_param_2]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_select_cc_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_select_cc_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd4; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd3; |
| ; CHECK-F32X2-NEXT: setp.neu.f32 %p1, %r3, %r1; |
| ; CHECK-F32X2-NEXT: setp.neu.f32 %p2, %r4, %r2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r5, %r6}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r7, %r8}, %rd1; |
| ; CHECK-F32X2-NEXT: selp.f32 %r9, %r8, %r6, %p2; |
| ; CHECK-F32X2-NEXT: selp.f32 %r10, %r7, %r5, %p1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r10, %r9}; |
| ; CHECK-F32X2-NEXT: ret; |
| %cc = fcmp une <2 x float> %c, %d |
| %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b |
| ret <2 x float> %r |
| } |
| |
| define <2 x double> @test_select_cc_f64_f32(<2 x double> %a, <2 x double> %b, <2 x float> %c, <2 x float> %d) #0 { |
| ; CHECK-NOF32X2-LABEL: test_select_cc_f64_f32( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f64_f32_param_3]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_f64_f32_param_2]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_select_cc_f64_f32_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_select_cc_f64_f32_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.neu.f32 %p1, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: setp.neu.f32 %p2, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: selp.f64 %rd5, %rd2, %rd4, %p2; |
| ; CHECK-NOF32X2-NEXT: selp.f64 %rd6, %rd1, %rd3, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd6, %rd5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_select_cc_f64_f32( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<9>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_select_cc_f64_f32_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_select_cc_f64_f32_param_0]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd6, [test_select_cc_f64_f32_param_3]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd5, [test_select_cc_f64_f32_param_2]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd6; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd5; |
| ; CHECK-F32X2-NEXT: setp.neu.f32 %p1, %r3, %r1; |
| ; CHECK-F32X2-NEXT: setp.neu.f32 %p2, %r4, %r2; |
| ; CHECK-F32X2-NEXT: selp.f64 %rd7, %rd2, %rd4, %p2; |
| ; CHECK-F32X2-NEXT: selp.f64 %rd8, %rd1, %rd3, %p1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd8, %rd7}; |
| ; CHECK-F32X2-NEXT: ret; |
| %cc = fcmp une <2 x float> %c, %d |
| %r = select <2 x i1> %cc, <2 x double> %a, <2 x double> %b |
| ret <2 x double> %r |
| } |
| |
| define <2 x float> @test_select_cc_f32_f64(<2 x float> %a, <2 x float> %b, <2 x double> %c, <2 x double> %d) #0 { |
| ; CHECK-NOF32X2-LABEL: test_select_cc_f32_f64( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_select_cc_f32_f64_param_3]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_select_cc_f32_f64_param_2]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f32_f64_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_f32_f64_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.neu.f64 %p1, %rd1, %rd3; |
| ; CHECK-NOF32X2-NEXT: setp.neu.f64 %p2, %rd2, %rd4; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r5, %r2, %r4, %p2; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r6, %r1, %r3, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_select_cc_f32_f64( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [test_select_cc_f32_f64_param_3]; |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_select_cc_f32_f64_param_2]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_select_cc_f32_f64_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_select_cc_f32_f64_param_0]; |
| ; CHECK-F32X2-NEXT: setp.neu.f64 %p1, %rd3, %rd5; |
| ; CHECK-F32X2-NEXT: setp.neu.f64 %p2, %rd4, %rd6; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: selp.f32 %r5, %r4, %r2, %p2; |
| ; CHECK-F32X2-NEXT: selp.f32 %r6, %r3, %r1, %p1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-F32X2-NEXT: ret; |
| %cc = fcmp une <2 x double> %c, %d |
| %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b |
| ret <2 x float> %r |
| } |
| |
| define <2 x i1> @test_fcmp_une(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_une( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_une_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_une_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.neu.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.neu.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_une( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_une_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_une_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.neu.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.neu.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp une <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_ueq(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_ueq( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ueq_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ueq_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.equ.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.equ.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_ueq( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_ueq_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_ueq_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.equ.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.equ.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp ueq <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_ugt(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_ugt( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ugt_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ugt_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.gtu.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.gtu.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_ugt( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_ugt_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_ugt_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.gtu.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.gtu.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp ugt <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_uge(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_uge( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_uge_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_uge_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.geu.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.geu.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_uge( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_uge_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_uge_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.geu.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.geu.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp uge <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_ult(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_ult( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ult_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ult_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.ltu.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.ltu.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_ult( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_ult_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_ult_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.ltu.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.ltu.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp ult <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_ule(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_ule( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ule_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ule_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.leu.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.leu.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_ule( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_ule_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_ule_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.leu.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.leu.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp ule <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_uno(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_uno( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_uno_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_uno_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.nan.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.nan.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_uno( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_uno_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_uno_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.nan.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.nan.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp uno <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_one(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_one( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_one_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_one_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.ne.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.ne.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_one( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_one_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_one_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.ne.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.ne.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp one <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_oeq(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_oeq( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_oeq_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_oeq_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.eq.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.eq.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_oeq( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_oeq_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_oeq_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.eq.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.eq.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp oeq <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_ogt(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_ogt( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ogt_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ogt_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.gt.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.gt.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_ogt( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_ogt_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_ogt_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.gt.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.gt.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp ogt <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_oge(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_oge( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_oge_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_oge_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.ge.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.ge.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_oge( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_oge_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_oge_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.ge.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.ge.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp oge <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_olt(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_olt( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_olt_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_olt_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.lt.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.lt.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_olt( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_olt_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_olt_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.lt.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.lt.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp olt <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_ole(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_ole( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ole_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ole_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.le.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.le.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_ole( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_ole_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_ole_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.le.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.le.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp ole <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i1> @test_fcmp_ord(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fcmp_ord( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ord_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ord_param_0]; |
| ; CHECK-NOF32X2-NEXT: setp.num.f32 %p1, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: setp.num.f32 %p2, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-NOF32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-NOF32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fcmp_ord( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fcmp_ord_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fcmp_ord_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: setp.num.f32 %p1, %r4, %r2; |
| ; CHECK-F32X2-NEXT: setp.num.f32 %p2, %r3, %r1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs1, -1, 0, %p2; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0], %rs1; |
| ; CHECK-F32X2-NEXT: selp.b16 %rs2, -1, 0, %p1; |
| ; CHECK-F32X2-NEXT: st.param.b8 [func_retval0+1], %rs2; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fcmp ord <2 x float> %a, %b |
| ret <2 x i1> %r |
| } |
| |
| define <2 x i32> @test_fptosi_i32(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fptosi_i32( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptosi_i32_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.s32.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.s32.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fptosi_i32( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fptosi_i32_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.rzi.s32.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rzi.s32.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fptosi <2 x float> %a to <2 x i32> |
| ret <2 x i32> %r |
| } |
| |
| define <2 x i64> @test_fptosi_i64(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fptosi_i64( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptosi_i64_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.s64.f32 %rd1, %r2; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.s64.f32 %rd2, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fptosi_i64( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fptosi_i64_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.rzi.s64.f32 %rd2, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rzi.s64.f32 %rd3, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fptosi <2 x float> %a to <2 x i64> |
| ret <2 x i64> %r |
| } |
| |
| define <2 x i32> @test_fptoui_2xi32(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fptoui_2xi32( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptoui_2xi32_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.u32.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.u32.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fptoui_2xi32( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fptoui_2xi32_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.rzi.u32.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rzi.u32.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fptoui <2 x float> %a to <2 x i32> |
| ret <2 x i32> %r |
| } |
| |
| define <2 x i64> @test_fptoui_2xi64(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fptoui_2xi64( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptoui_2xi64_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.u64.f32 %rd1, %r2; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.u64.f32 %rd2, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fptoui_2xi64( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fptoui_2xi64_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.rzi.u64.f32 %rd2, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rzi.u64.f32 %rd3, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fptoui <2 x float> %a to <2 x i64> |
| ret <2 x i64> %r |
| } |
| |
| define <2 x float> @test_uitofp_2xi32(<2 x i32> %a) #0 { |
| ; CHECK-LABEL: test_uitofp_2xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_uitofp_2xi32_param_0]; |
| ; CHECK-NEXT: cvt.rn.f32.u32 %r3, %r2; |
| ; CHECK-NEXT: cvt.rn.f32.u32 %r4, %r1; |
| ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NEXT: ret; |
| %r = uitofp <2 x i32> %a to <2 x float> |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_uitofp_2xi64(<2 x i64> %a) #0 { |
| ; CHECK-LABEL: test_uitofp_2xi64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_uitofp_2xi64_param_0]; |
| ; CHECK-NEXT: cvt.rn.f32.u64 %r1, %rd2; |
| ; CHECK-NEXT: cvt.rn.f32.u64 %r2, %rd1; |
| ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; |
| ; CHECK-NEXT: ret; |
| %r = uitofp <2 x i64> %a to <2 x float> |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_sitofp_2xi32(<2 x i32> %a) #0 { |
| ; CHECK-LABEL: test_sitofp_2xi32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sitofp_2xi32_param_0]; |
| ; CHECK-NEXT: cvt.rn.f32.s32 %r3, %r2; |
| ; CHECK-NEXT: cvt.rn.f32.s32 %r4, %r1; |
| ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NEXT: ret; |
| %r = sitofp <2 x i32> %a to <2 x float> |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_sitofp_2xi64(<2 x i64> %a) #0 { |
| ; CHECK-LABEL: test_sitofp_2xi64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_sitofp_2xi64_param_0]; |
| ; CHECK-NEXT: cvt.rn.f32.s64 %r1, %rd2; |
| ; CHECK-NEXT: cvt.rn.f32.s64 %r2, %rd1; |
| ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; |
| ; CHECK-NEXT: ret; |
| %r = sitofp <2 x i64> %a to <2 x float> |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_uitofp_2xi32_fadd( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_uitofp_2xi32_fadd_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_uitofp_2xi32_fadd_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.rn.f32.u32 %r5, %r1; |
| ; CHECK-NOF32X2-NEXT: cvt.rn.f32.u32 %r6, %r2; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r7, %r4, %r6; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r8, %r3, %r5; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_uitofp_2xi32_fadd( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_uitofp_2xi32_fadd_param_0]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_uitofp_2xi32_fadd_param_1]; |
| ; CHECK-F32X2-NEXT: cvt.rn.f32.u32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rn.f32.u32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd2, {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: add.rn.f32x2 %rd3, %rd1, %rd2; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %c = uitofp <2 x i32> %a to <2 x float> |
| %r = fadd <2 x float> %b, %c |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fptrunc_2xdouble(<2 x double> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fptrunc_2xdouble( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fptrunc_2xdouble_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.rn.f32.f64 %r1, %rd2; |
| ; CHECK-NOF32X2-NEXT: cvt.rn.f32.f64 %r2, %rd1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fptrunc_2xdouble( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fptrunc_2xdouble_param_0]; |
| ; CHECK-F32X2-NEXT: cvt.rn.f32.f64 %r1, %rd2; |
| ; CHECK-F32X2-NEXT: cvt.rn.f32.f64 %r2, %rd1; |
| ; CHECK-F32X2-NEXT: mov.b64 %rd3, {%r2, %r1}; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fptrunc <2 x double> %a to <2 x float> |
| ret <2 x float> %r |
| } |
| |
| define <2 x double> @test_fpext_2xdouble(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fpext_2xdouble( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fpext_2xdouble_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.f64.f32 %rd1, %r2; |
| ; CHECK-NOF32X2-NEXT: cvt.f64.f32 %rd2, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fpext_2xdouble( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fpext_2xdouble_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.f64.f32 %rd2, %r2; |
| ; CHECK-F32X2-NEXT: cvt.f64.f32 %rd3, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = fpext <2 x float> %a to <2 x double> |
| ret <2 x double> %r |
| } |
| |
| define <2 x i32> @test_bitcast_2xfloat_to_2xi32(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_bitcast_2xfloat_to_2xi32( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_bitcast_2xfloat_to_2xi32_param_0]; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r2}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_bitcast_2xfloat_to_2xi32( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_bitcast_2xfloat_to_2xi32_param_0]; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd1; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = bitcast <2 x float> %a to <2 x i32> |
| ret <2 x i32> %r |
| } |
| |
| define <2 x float> @test_bitcast_2xi32_to_2xfloat(<2 x i32> %a) #0 { |
| ; CHECK-LABEL: test_bitcast_2xi32_to_2xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_bitcast_2xi32_to_2xfloat_param_0]; |
| ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r2}; |
| ; CHECK-NEXT: ret; |
| %r = bitcast <2 x i32> %a to <2 x float> |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_bitcast_double_to_2xfloat(double %a) #0 { |
| ; CHECK-LABEL: test_bitcast_double_to_2xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_bitcast_double_to_2xfloat_param_0]; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; |
| ; CHECK-NEXT: ret; |
| %r = bitcast double %a to <2 x float> |
| ret <2 x float> %r |
| } |
| |
| define double @test_bitcast_2xfloat_to_double(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_bitcast_2xfloat_to_double( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_bitcast_2xfloat_to_double_param_0]; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r2}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_bitcast_2xfloat_to_double( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_bitcast_2xfloat_to_double_param_0]; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd1; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = bitcast <2 x float> %a to double |
| ret double %r |
| } |
| |
| define <2 x float> @test_sqrt(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_sqrt( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sqrt_param_0]; |
| ; CHECK-NOF32X2-NEXT: sqrt.rn.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: sqrt.rn.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_sqrt( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_sqrt_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: sqrt.rn.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: sqrt.rn.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.sqrt(<2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| ;;; Can't do this yet: requires libcall. |
| ; XCHECK-LABEL: test_powi( |
| ;define <2 x float> @test_powi(<2 x float> %a, <2 x i32> %b) #0 { |
| ; %r = call <2 x float> @llvm.powi.i32(<2 x float> %a, <2 x i32> %b) |
| ; ret <2 x float> %r |
| ;} |
| |
| define <2 x float> @test_sin(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_sin( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sin_param_0]; |
| ; CHECK-NOF32X2-NEXT: sin.approx.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: sin.approx.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_sin( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_sin_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: sin.approx.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: sin.approx.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call afn <2 x float> @llvm.sin(<2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_cos(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_cos( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_cos_param_0]; |
| ; CHECK-NOF32X2-NEXT: cos.approx.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: cos.approx.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_cos( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_cos_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cos.approx.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: cos.approx.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call afn <2 x float> @llvm.cos(<2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| ;;; Can't do this yet: requires libcall. |
| ; XCHECK-LABEL: test_pow( |
| ;define <2 x float> @test_pow(<2 x float> %a, <2 x float> %b) #0 { |
| ; %r = call <2 x float> @llvm.pow(<2 x float> %a, <2 x float> %b) |
| ; ret <2 x float> %r |
| ;} |
| |
| ;;; Can't do this yet: requires libcall. |
| ; XCHECK-LABEL: test_exp( |
| ;define <2 x float> @test_exp(<2 x float> %a) #0 { |
| ; %r = call <2 x float> @llvm.exp(<2 x float> %a) |
| ; ret <2 x float> %r |
| ;} |
| |
| ;;; Can't do this yet: requires libcall. |
| ; XCHECK-LABEL: test_exp2( |
| ;define <2 x float> @test_exp2(<2 x float> %a) #0 { |
| ; %r = call <2 x float> @llvm.exp2(<2 x float> %a) |
| ; ret <2 x float> %r |
| ;} |
| |
| ;;; Can't do this yet: requires libcall. |
| ; XCHECK-LABEL: test_log( |
| ;define <2 x float> @test_log(<2 x float> %a) #0 { |
| ; %r = call <2 x float> @llvm.log(<2 x float> %a) |
| ; ret <2 x float> %r |
| ;} |
| |
| ;;; Can't do this yet: requires libcall. |
| ; XCHECK-LABEL: test_log10( |
| ;define <2 x float> @test_log10(<2 x float> %a) #0 { |
| ; %r = call <2 x float> @llvm.log10(<2 x float> %a) |
| ; ret <2 x float> %r |
| ;} |
| |
| ;;; Can't do this yet: requires libcall. |
| ; XCHECK-LABEL: test_log2( |
| ;define <2 x float> @test_log2(<2 x float> %a) #0 { |
| ; %r = call <2 x float> @llvm.log2(<2 x float> %a) |
| ; ret <2 x float> %r |
| ;} |
| |
| |
| define <2 x float> @test_fma(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fma( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_fma_param_2]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fma_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fma_param_0]; |
| ; CHECK-NOF32X2-NEXT: fma.rn.f32 %r7, %r2, %r4, %r6; |
| ; CHECK-NOF32X2-NEXT: fma.rn.f32 %r8, %r1, %r3, %r5; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fma( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd3, [test_fma_param_2]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fma_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fma_param_0]; |
| ; CHECK-F32X2-NEXT: fma.rn.f32x2 %rd4, %rd1, %rd2, %rd3; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd4; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.fma(<2 x float> %a, <2 x float> %b, <2 x float> %c) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fabs(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fabs( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fabs_param_0]; |
| ; CHECK-NOF32X2-NEXT: abs.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: abs.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fabs( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fabs_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: abs.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: abs.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.fabs(<2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_minnum(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_minnum( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_minnum_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_minnum_param_0]; |
| ; CHECK-NOF32X2-NEXT: min.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: min.f32 %r6, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_minnum( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_minnum_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_minnum_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: min.f32 %r5, %r4, %r2; |
| ; CHECK-F32X2-NEXT: min.f32 %r6, %r3, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.minnum(<2 x float> %a, <2 x float> %b) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_maxnum(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_maxnum( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_maxnum_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_maxnum_param_0]; |
| ; CHECK-NOF32X2-NEXT: max.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: max.f32 %r6, %r1, %r3; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_maxnum( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_maxnum_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_maxnum_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; |
| ; CHECK-F32X2-NEXT: max.f32 %r5, %r4, %r2; |
| ; CHECK-F32X2-NEXT: max.f32 %r6, %r3, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.maxnum(<2 x float> %a, <2 x float> %b) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_copysign(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_copysign( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_copysign_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_copysign_param_0]; |
| ; CHECK-NOF32X2-NEXT: copysign.f32 %r5, %r4, %r2; |
| ; CHECK-NOF32X2-NEXT: copysign.f32 %r6, %r3, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_copysign( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_copysign_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_copysign_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd2; |
| ; CHECK-F32X2-NEXT: copysign.f32 %r5, %r4, %r2; |
| ; CHECK-F32X2-NEXT: copysign.f32 %r6, %r3, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.copysign(<2 x float> %a, <2 x float> %b) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_copysign_f64(<2 x float> %a, <2 x double> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_copysign_f64( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_copysign_f64_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_copysign_f64_param_0]; |
| ; CHECK-NOF32X2-NEXT: abs.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: neg.f32 %r4, %r3; |
| ; CHECK-NOF32X2-NEXT: shr.u64 %rd3, %rd2, 63; |
| ; CHECK-NOF32X2-NEXT: and.b64 %rd4, %rd3, 1; |
| ; CHECK-NOF32X2-NEXT: setp.ne.b64 %p1, %rd4, 0; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r5, %r4, %r3, %p1; |
| ; CHECK-NOF32X2-NEXT: abs.f32 %r6, %r1; |
| ; CHECK-NOF32X2-NEXT: neg.f32 %r7, %r6; |
| ; CHECK-NOF32X2-NEXT: shr.u64 %rd5, %rd1, 63; |
| ; CHECK-NOF32X2-NEXT: and.b64 %rd6, %rd5, 1; |
| ; CHECK-NOF32X2-NEXT: setp.ne.b64 %p2, %rd6, 0; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r8, %r7, %r6, %p2; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r5}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_copysign_f64( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<3>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<9>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<8>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_copysign_f64_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_copysign_f64_param_0]; |
| ; CHECK-F32X2-NEXT: shr.u64 %rd4, %rd3, 63; |
| ; CHECK-F32X2-NEXT: and.b64 %rd5, %rd4, 1; |
| ; CHECK-F32X2-NEXT: setp.ne.b64 %p1, %rd5, 0; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: abs.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: neg.f32 %r4, %r3; |
| ; CHECK-F32X2-NEXT: selp.f32 %r5, %r4, %r3, %p1; |
| ; CHECK-F32X2-NEXT: shr.u64 %rd6, %rd2, 63; |
| ; CHECK-F32X2-NEXT: and.b64 %rd7, %rd6, 1; |
| ; CHECK-F32X2-NEXT: setp.ne.b64 %p2, %rd7, 0; |
| ; CHECK-F32X2-NEXT: abs.f32 %r6, %r1; |
| ; CHECK-F32X2-NEXT: neg.f32 %r7, %r6; |
| ; CHECK-F32X2-NEXT: selp.f32 %r8, %r7, %r6, %p2; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r5}; |
| ; CHECK-F32X2-NEXT: ret; |
| %tb = fptrunc <2 x double> %b to <2 x float> |
| %r = call <2 x float> @llvm.copysign(<2 x float> %a, <2 x float> %tb) |
| ret <2 x float> %r |
| } |
| |
| define <2 x double> @test_copysign_extended(<2 x float> %a, <2 x float> %b) #0 { |
| ; CHECK-NOF32X2-LABEL: test_copysign_extended( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_copysign_extended_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_copysign_extended_param_0]; |
| ; CHECK-NOF32X2-NEXT: copysign.f32 %r5, %r3, %r1; |
| ; CHECK-NOF32X2-NEXT: copysign.f32 %r6, %r4, %r2; |
| ; CHECK-NOF32X2-NEXT: cvt.f64.f32 %rd1, %r6; |
| ; CHECK-NOF32X2-NEXT: cvt.f64.f32 %rd2, %r5; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_copysign_extended( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<7>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_copysign_extended_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_copysign_extended_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r3, %r4}, %rd2; |
| ; CHECK-F32X2-NEXT: copysign.f32 %r5, %r3, %r1; |
| ; CHECK-F32X2-NEXT: copysign.f32 %r6, %r4, %r2; |
| ; CHECK-F32X2-NEXT: cvt.f64.f32 %rd3, %r6; |
| ; CHECK-F32X2-NEXT: cvt.f64.f32 %rd4, %r5; |
| ; CHECK-F32X2-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.copysign(<2 x float> %a, <2 x float> %b) |
| %xr = fpext <2 x float> %r to <2 x double> |
| ret <2 x double> %xr |
| } |
| |
| define <2 x float> @test_floor(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_floor( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_floor_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.rmi.f32.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: cvt.rmi.f32.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_floor( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_floor_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.rmi.f32.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rmi.f32.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.floor(<2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_ceil(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_ceil( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_ceil_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.rpi.f32.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: cvt.rpi.f32.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_ceil( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_ceil_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.rpi.f32.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rpi.f32.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.ceil(<2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_trunc(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_trunc( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.f32.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.f32.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_trunc( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_trunc_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.rzi.f32.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rzi.f32.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.trunc(<2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_rint(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_rint( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_rint_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.rni.f32.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: cvt.rni.f32.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_rint( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_rint_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.rni.f32.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rni.f32.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.rint(<2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_nearbyint(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_nearbyint( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_nearbyint_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.rni.f32.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: cvt.rni.f32.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_nearbyint( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_nearbyint_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.rni.f32.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rni.f32.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.nearbyint(<2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_roundeven(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_roundeven( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_roundeven_param_0]; |
| ; CHECK-NOF32X2-NEXT: cvt.rni.f32.f32 %r3, %r2; |
| ; CHECK-NOF32X2-NEXT: cvt.rni.f32.f32 %r4, %r1; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_roundeven( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_roundeven_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.rni.f32.f32 %r3, %r2; |
| ; CHECK-F32X2-NEXT: cvt.rni.f32.f32 %r4, %r1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.roundeven(<2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| ; check the use of sign mask and 0.5 to implement round |
| define <2 x float> @test_round(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_round( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .pred %p<5>; |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<19>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_round_param_0]; |
| ; CHECK-NOF32X2-NEXT: and.b32 %r3, %r2, -2147483648; |
| ; CHECK-NOF32X2-NEXT: or.b32 %r4, %r3, 1056964608; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r5, %r2, %r4; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.f32.f32 %r6, %r5; |
| ; CHECK-NOF32X2-NEXT: abs.f32 %r7, %r2; |
| ; CHECK-NOF32X2-NEXT: setp.gt.f32 %p1, %r7, 0f4B000000; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r8, %r2, %r6, %p1; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.f32.f32 %r9, %r2; |
| ; CHECK-NOF32X2-NEXT: setp.lt.f32 %p2, %r7, 0f3F000000; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r10, %r9, %r8, %p2; |
| ; CHECK-NOF32X2-NEXT: and.b32 %r11, %r1, -2147483648; |
| ; CHECK-NOF32X2-NEXT: or.b32 %r12, %r11, 1056964608; |
| ; CHECK-NOF32X2-NEXT: add.rn.f32 %r13, %r1, %r12; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.f32.f32 %r14, %r13; |
| ; CHECK-NOF32X2-NEXT: abs.f32 %r15, %r1; |
| ; CHECK-NOF32X2-NEXT: setp.gt.f32 %p3, %r15, 0f4B000000; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r16, %r1, %r14, %p3; |
| ; CHECK-NOF32X2-NEXT: cvt.rzi.f32.f32 %r17, %r1; |
| ; CHECK-NOF32X2-NEXT: setp.lt.f32 %p4, %r15, 0f3F000000; |
| ; CHECK-NOF32X2-NEXT: selp.f32 %r18, %r17, %r16, %p4; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r18, %r10}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_round( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .pred %p<5>; |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<19>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_round_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: and.b32 %r3, %r2, -2147483648; |
| ; CHECK-F32X2-NEXT: or.b32 %r4, %r3, 1056964608; |
| ; CHECK-F32X2-NEXT: add.rn.f32 %r5, %r2, %r4; |
| ; CHECK-F32X2-NEXT: cvt.rzi.f32.f32 %r6, %r5; |
| ; CHECK-F32X2-NEXT: abs.f32 %r7, %r2; |
| ; CHECK-F32X2-NEXT: setp.gt.f32 %p1, %r7, 0f4B000000; |
| ; CHECK-F32X2-NEXT: selp.f32 %r8, %r2, %r6, %p1; |
| ; CHECK-F32X2-NEXT: cvt.rzi.f32.f32 %r9, %r2; |
| ; CHECK-F32X2-NEXT: setp.lt.f32 %p2, %r7, 0f3F000000; |
| ; CHECK-F32X2-NEXT: selp.f32 %r10, %r9, %r8, %p2; |
| ; CHECK-F32X2-NEXT: and.b32 %r11, %r1, -2147483648; |
| ; CHECK-F32X2-NEXT: or.b32 %r12, %r11, 1056964608; |
| ; CHECK-F32X2-NEXT: add.rn.f32 %r13, %r1, %r12; |
| ; CHECK-F32X2-NEXT: cvt.rzi.f32.f32 %r14, %r13; |
| ; CHECK-F32X2-NEXT: abs.f32 %r15, %r1; |
| ; CHECK-F32X2-NEXT: setp.gt.f32 %p3, %r15, 0f4B000000; |
| ; CHECK-F32X2-NEXT: selp.f32 %r16, %r1, %r14, %p3; |
| ; CHECK-F32X2-NEXT: cvt.rzi.f32.f32 %r17, %r1; |
| ; CHECK-F32X2-NEXT: setp.lt.f32 %p4, %r15, 0f3F000000; |
| ; CHECK-F32X2-NEXT: selp.f32 %r18, %r17, %r16, %p4; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r18, %r10}; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.round(<2 x float> %a) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_fmuladd(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { |
| ; CHECK-NOF32X2-LABEL: test_fmuladd( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_fmuladd_param_2]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fmuladd_param_1]; |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fmuladd_param_0]; |
| ; CHECK-NOF32X2-NEXT: fma.rn.f32 %r7, %r2, %r4, %r6; |
| ; CHECK-NOF32X2-NEXT: fma.rn.f32 %r8, %r1, %r3, %r5; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_fmuladd( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<5>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd3, [test_fmuladd_param_2]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_fmuladd_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_fmuladd_param_0]; |
| ; CHECK-F32X2-NEXT: fma.rn.f32x2 %rd4, %rd1, %rd2, %rd3; |
| ; CHECK-F32X2-NEXT: st.param.b64 [func_retval0], %rd4; |
| ; CHECK-F32X2-NEXT: ret; |
| %r = call <2 x float> @llvm.fmuladd(<2 x float> %a, <2 x float> %b, <2 x float> %c) |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_shufflevector(<2 x float> %a) #0 { |
| ; CHECK-NOF32X2-LABEL: test_shufflevector( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_shufflevector_param_0]; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_shufflevector( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_shufflevector_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; |
| ; CHECK-F32X2-NEXT: ret; |
| %s = shufflevector <2 x float> %a, <2 x float> poison, <2 x i32> <i32 1, i32 0> |
| ret <2 x float> %s |
| } |
| |
| define <2 x float> @test_insertelement(<2 x float> %a, float %x) #0 { |
| ; CHECK-NOF32X2-LABEL: test_insertelement( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<4>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_insertelement_param_0]; |
| ; CHECK-NOF32X2-NEXT: ld.param.b32 %r3, [test_insertelement_param_1]; |
| ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r3}; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_insertelement( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<3>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b32 %r1, [test_insertelement_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_insertelement_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r2, _}, %rd1; |
| ; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; |
| ; CHECK-F32X2-NEXT: ret; |
| %i = insertelement <2 x float> %a, float %x, i64 1 |
| ret <2 x float> %i |
| } |
| |
| define <2 x float> @test_sitofp_2xi32_to_2xfloat(<2 x i32> %a) #0 { |
| ; CHECK-LABEL: test_sitofp_2xi32_to_2xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sitofp_2xi32_to_2xfloat_param_0]; |
| ; CHECK-NEXT: cvt.rn.f32.s32 %r3, %r2; |
| ; CHECK-NEXT: cvt.rn.f32.s32 %r4, %r1; |
| ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NEXT: ret; |
| %r = sitofp <2 x i32> %a to <2 x float> |
| ret <2 x float> %r |
| } |
| |
| define <2 x float> @test_uitofp_2xi32_to_2xfloat(<2 x i32> %a) #0 { |
| ; CHECK-LABEL: test_uitofp_2xi32_to_2xfloat( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_uitofp_2xi32_to_2xfloat_param_0]; |
| ; CHECK-NEXT: cvt.rn.f32.u32 %r3, %r2; |
| ; CHECK-NEXT: cvt.rn.f32.u32 %r4, %r1; |
| ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; |
| ; CHECK-NEXT: ret; |
| %r = uitofp <2 x i32> %a to <2 x float> |
| ret <2 x float> %r |
| } |
| |
| define void @test_trunc_to_v2bf16(<2 x float> %a, ptr %p) { |
| ; CHECK-NOF32X2-LABEL: test_trunc_to_v2bf16( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<4>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_to_v2bf16_param_0]; |
| ; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_trunc_to_v2bf16_param_1]; |
| ; CHECK-NOF32X2-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1; |
| ; CHECK-NOF32X2-NEXT: st.b32 [%rd1], %r3; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_trunc_to_v2bf16( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<4>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2bf16_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_trunc_to_v2bf16_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1; |
| ; CHECK-F32X2-NEXT: st.b32 [%rd2], %r3; |
| ; CHECK-F32X2-NEXT: ret; |
| %trunc = fptrunc <2 x float> %a to <2 x bfloat> |
| store <2 x bfloat> %trunc, ptr %p |
| ret void |
| } |
| |
| define void @test_trunc_to_v2f16(<2 x float> %a, ptr %p) { |
| ; CHECK-NOF32X2-LABEL: test_trunc_to_v2f16( |
| ; CHECK-NOF32X2: { |
| ; CHECK-NOF32X2-NEXT: .reg .b32 %r<4>; |
| ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-NOF32X2-EMPTY: |
| ; CHECK-NOF32X2-NEXT: // %bb.0: |
| ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_to_v2f16_param_0]; |
| ; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_trunc_to_v2f16_param_1]; |
| ; CHECK-NOF32X2-NEXT: cvt.rn.f16x2.f32 %r3, %r2, %r1; |
| ; CHECK-NOF32X2-NEXT: st.b32 [%rd1], %r3; |
| ; CHECK-NOF32X2-NEXT: ret; |
| ; |
| ; CHECK-F32X2-LABEL: test_trunc_to_v2f16( |
| ; CHECK-F32X2: { |
| ; CHECK-F32X2-NEXT: .reg .b32 %r<4>; |
| ; CHECK-F32X2-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-F32X2-EMPTY: |
| ; CHECK-F32X2-NEXT: // %bb.0: |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2f16_param_1]; |
| ; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_trunc_to_v2f16_param_0]; |
| ; CHECK-F32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; |
| ; CHECK-F32X2-NEXT: cvt.rn.f16x2.f32 %r3, %r2, %r1; |
| ; CHECK-F32X2-NEXT: st.b32 [%rd2], %r3; |
| ; CHECK-F32X2-NEXT: ret; |
| %trunc = fptrunc <2 x float> %a to <2 x half> |
| store <2 x half> %trunc, ptr %p |
| ret void |
| } |
| |
| |
| attributes #0 = { nounwind } |
| attributes #2 = { "denormal-fp-math"="preserve-sign" } |