|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=CHECK,SDAG %s | 
|  | ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=CHECK,SDAG %s | 
|  | ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=CHECK,GISEL %s | 
|  |  | 
|  | define amdgpu_gs half @v_fptrunc_round_f32_to_f16_tonearest(float %a) { | 
|  | ; CHECK-LABEL: v_fptrunc_round_f32_to_f16_tonearest: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; CHECK-NEXT:    ; return to shader part epilog | 
|  | %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.tonearest") | 
|  | ret half %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs half @v_fptrunc_round_f32_to_f16_upward(float %a) { | 
|  | ; CHECK-LABEL: v_fptrunc_round_f32_to_f16_upward: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; CHECK-NEXT:    ; return to shader part epilog | 
|  | %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") | 
|  | ret half %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs half @v_fptrunc_round_f32_to_f16_downward(float %a) { | 
|  | ; CHECK-LABEL: v_fptrunc_round_f32_to_f16_downward: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; CHECK-NEXT:    ; return to shader part epilog | 
|  | %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward") | 
|  | ret half %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs half @v_fptrunc_round_f32_to_f16_towardzero(float %a) { | 
|  | ; CHECK-LABEL: v_fptrunc_round_f32_to_f16_towardzero: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; CHECK-NEXT:    ; return to shader part epilog | 
|  | %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.towardzero") | 
|  | ret half %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs void @v_fptrunc_round_f32_to_f16_upward_multiple_calls(float %a, float %b, ptr addrspace(1) %out) { | 
|  | ; CHECK-LABEL: v_fptrunc_round_f32_to_f16_upward_multiple_calls: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v4, v1 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 | 
|  | ; CHECK-NEXT:    v_add_f16_e32 v0, v0, v4 | 
|  | ; CHECK-NEXT:    v_add_f16_e32 v0, v1, v0 | 
|  | ; CHECK-NEXT:    global_store_short v[2:3], v0, off | 
|  | ; CHECK-NEXT:    s_endpgm | 
|  | %res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") | 
|  | %res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward") | 
|  | %res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.downward") | 
|  | %res4 = fadd half %res1, %res2 | 
|  | %res5 = fadd half %res3, %res4 | 
|  | store half %res5, ptr addrspace(1) %out, align 4 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define amdgpu_gs void @v_fptrunc_round_f32_to_f16_downward_multiple_calls(float %a, float %b, ptr addrspace(1) %out) { | 
|  | ; CHECK-LABEL: v_fptrunc_round_f32_to_f16_downward_multiple_calls: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v4, v0 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 | 
|  | ; CHECK-NEXT:    v_add_f16_e32 v0, v4, v0 | 
|  | ; CHECK-NEXT:    v_add_f16_e32 v0, v1, v0 | 
|  | ; CHECK-NEXT:    global_store_short v[2:3], v0, off | 
|  | ; CHECK-NEXT:    s_endpgm | 
|  | %res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") | 
|  | %res2 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward") | 
|  | %res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.downward") | 
|  | %res4 = fadd half %res1, %res2 | 
|  | %res5 = fadd half %res3, %res4 | 
|  | store half %res5, ptr addrspace(1) %out, align 4 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define amdgpu_gs void @v_fptrunc_round_f32_to_f16_towardzero_multiple_calls(float %a, float %b, ptr addrspace(1) %out) { | 
|  | ; CHECK-LABEL: v_fptrunc_round_f32_to_f16_towardzero_multiple_calls: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v4, v1 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 1 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 0 | 
|  | ; CHECK-NEXT:    v_add_f16_e32 v0, v0, v4 | 
|  | ; CHECK-NEXT:    v_add_f16_e32 v0, v1, v0 | 
|  | ; CHECK-NEXT:    global_store_short v[2:3], v0, off | 
|  | ; CHECK-NEXT:    s_endpgm | 
|  | %res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.towardzero") | 
|  | %res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.towardzero") | 
|  | %res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward") | 
|  | %res4 = fadd half %res1, %res2 | 
|  | %res5 = fadd half %res3, %res4 | 
|  | store half %res5, ptr addrspace(1) %out, align 4 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_upward(float inreg %a, ptr addrspace(1) %out) { | 
|  | ; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v0, s0 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0 | 
|  | ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0 | 
|  | ; CHECK-NEXT:    ; return to shader part epilog | 
|  | %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") | 
|  | %bitcast = bitcast half %res to i16 | 
|  | %ret = zext i16 %bitcast to i32 | 
|  | ret i32 %ret | 
|  | } | 
|  |  | 
|  | define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_downward(float inreg %a, ptr addrspace(1) %out) { | 
|  | ; CHECK-LABEL: s_fptrunc_round_f32_to_f16_downward: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v0, s0 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0 | 
|  | ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0 | 
|  | ; CHECK-NEXT:    ; return to shader part epilog | 
|  | %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward") | 
|  | %bitcast = bitcast half %res to i16 | 
|  | %ret = zext i16 %bitcast to i32 | 
|  | ret i32 %ret | 
|  | } | 
|  |  | 
|  | define amdgpu_gs void @s_fptrunc_round_f32_to_f16_upward_multiple_calls(float inreg %a, float inreg %b, ptr addrspace(1) %out) { | 
|  | ; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward_multiple_calls: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v2, s0 | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v3, s1 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v2, v2 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v4, v3 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v3, v3 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 | 
|  | ; CHECK-NEXT:    v_add_f16_e32 v2, v2, v4 | 
|  | ; CHECK-NEXT:    v_add_f16_e32 v2, v3, v2 | 
|  | ; CHECK-NEXT:    global_store_short v[0:1], v2, off | 
|  | ; CHECK-NEXT:    s_endpgm | 
|  | %res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward") | 
|  | %res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward") | 
|  | %res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.downward") | 
|  | %res4 = fadd half %res1, %res2 | 
|  | %res5 = fadd half %res3, %res4 | 
|  | store half %res5, ptr addrspace(1) %out, align 4 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define amdgpu_gs <2 x half> @v_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> %a) { | 
|  | ; SDAG-LABEL: v_fptrunc_round_v2f32_to_v2f16_upward: | 
|  | ; SDAG:       ; %bb.0: | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 | 
|  | ; SDAG-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GISEL-LABEL: v_fptrunc_round_v2f32_to_v2f16_upward: | 
|  | ; GISEL:       ; %bb.0: | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1 | 
|  | ; GISEL-NEXT:    ; return to shader part epilog | 
|  | %res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.upward") | 
|  | ret <2 x half> %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs <2 x half> @v_fptrunc_round_v2f32_to_v2f16_downward(<2 x float> %a) { | 
|  | ; SDAG-LABEL: v_fptrunc_round_v2f32_to_v2f16_downward: | 
|  | ; SDAG:       ; %bb.0: | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 | 
|  | ; SDAG-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GISEL-LABEL: v_fptrunc_round_v2f32_to_v2f16_downward: | 
|  | ; GISEL:       ; %bb.0: | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1 | 
|  | ; GISEL-NEXT:    ; return to shader part epilog | 
|  | %res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.downward") | 
|  | ret <2 x half> %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs void @v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x float> %a, <2 x float> %b, ptr addrspace(1) %out) { | 
|  | ; SDAG-LABEL: v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls: | 
|  | ; SDAG:       ; %bb.0: | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v6, v2 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v7, v3 | 
|  | ; SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v2 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v3 | 
|  | ; SDAG-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100 | 
|  | ; SDAG-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100 | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 | 
|  | ; SDAG-NEXT:    v_pk_add_f16 v0, v0, v3 | 
|  | ; SDAG-NEXT:    v_pk_add_f16 v0, v1, v0 | 
|  | ; SDAG-NEXT:    global_store_dword v[4:5], v0, off | 
|  | ; SDAG-NEXT:    s_endpgm | 
|  | ; | 
|  | ; GISEL-LABEL: v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls: | 
|  | ; GISEL:       ; %bb.0: | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v6, v2 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v7, v3 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1 | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v2 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v3 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v3, v6, v7 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v1, v1, v2 | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 | 
|  | ; GISEL-NEXT:    v_pk_add_f16 v0, v0, v3 | 
|  | ; GISEL-NEXT:    v_pk_add_f16 v0, v1, v0 | 
|  | ; GISEL-NEXT:    global_store_dword v[4:5], v0, off | 
|  | ; GISEL-NEXT:    s_endpgm | 
|  | %res1 = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.upward") | 
|  | %res2 = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %b, metadata !"round.upward") | 
|  | %res3 = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %b, metadata !"round.downward") | 
|  | %res4 = fadd <2 x half> %res1, %res2 | 
|  | %res5 = fadd <2 x half> %res3, %res4 | 
|  | store <2 x half> %res5, ptr addrspace(1) %out, align 4 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> inreg %a, ptr addrspace(1) %out) { | 
|  | ; CHECK-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v0, s0 | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v1, s1 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0 | 
|  | ; CHECK-NEXT:    v_and_b32_e32 v1, 0xffff, v1 | 
|  | ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0 | 
|  | ; CHECK-NEXT:    v_readfirstlane_b32 s1, v1 | 
|  | ; CHECK-NEXT:    ; return to shader part epilog | 
|  | %res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.upward") | 
|  | %bitcast = bitcast <2 x half> %res to <2 x i16> | 
|  | %ret = zext <2 x i16> %bitcast to <2 x i32> | 
|  | ret <2 x i32> %ret | 
|  | } | 
|  |  | 
|  | define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_downward(<2 x float> inreg %a, ptr addrspace(1) %out) { | 
|  | ; CHECK-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v0, s0 | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v1, s1 | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; CHECK-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0 | 
|  | ; CHECK-NEXT:    v_and_b32_e32 v1, 0xffff, v1 | 
|  | ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0 | 
|  | ; CHECK-NEXT:    v_readfirstlane_b32 s1, v1 | 
|  | ; CHECK-NEXT:    ; return to shader part epilog | 
|  | %res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.downward") | 
|  | %bitcast = bitcast <2 x half> %res to <2 x i16> | 
|  | %ret = zext <2 x i16> %bitcast to <2 x i32> | 
|  | ret <2 x i32> %ret | 
|  | } | 
|  |  | 
|  | define amdgpu_gs void @s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x float> inreg %a, <2 x float> inreg %b, ptr addrspace(1) %out) { | 
|  | ; SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls: | 
|  | ; SDAG:       ; %bb.0: | 
|  | ; SDAG-NEXT:    v_mov_b32_e32 v2, s0 | 
|  | ; SDAG-NEXT:    v_mov_b32_e32 v3, s2 | 
|  | ; SDAG-NEXT:    v_mov_b32_e32 v4, s1 | 
|  | ; SDAG-NEXT:    v_mov_b32_e32 v5, s3 | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v6, v3 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v4, v4 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v7, v5 | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3 | 
|  | ; SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2 | 
|  | ; SDAG-NEXT:    v_and_b32_e32 v6, 0xffff, v6 | 
|  | ; SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3 | 
|  | ; SDAG-NEXT:    v_lshl_or_b32 v2, v4, 16, v2 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v4, v5 | 
|  | ; SDAG-NEXT:    v_lshl_or_b32 v5, v7, 16, v6 | 
|  | ; SDAG-NEXT:    v_lshl_or_b32 v3, v4, 16, v3 | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 | 
|  | ; SDAG-NEXT:    v_pk_add_f16 v2, v2, v5 | 
|  | ; SDAG-NEXT:    v_pk_add_f16 v2, v3, v2 | 
|  | ; SDAG-NEXT:    global_store_dword v[0:1], v2, off | 
|  | ; SDAG-NEXT:    s_endpgm | 
|  | ; | 
|  | ; GISEL-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls: | 
|  | ; GISEL:       ; %bb.0: | 
|  | ; GISEL-NEXT:    v_mov_b32_e32 v2, s0 | 
|  | ; GISEL-NEXT:    v_mov_b32_e32 v3, s1 | 
|  | ; GISEL-NEXT:    v_mov_b32_e32 v4, s2 | 
|  | ; GISEL-NEXT:    v_mov_b32_e32 v5, s3 | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v3, v3 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v6, v4 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v7, v5 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v2, v2, v3 | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v3, v4 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v4, v5 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v5, v6, v7 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v3, v3, v4 | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 | 
|  | ; GISEL-NEXT:    v_pk_add_f16 v2, v2, v5 | 
|  | ; GISEL-NEXT:    v_pk_add_f16 v2, v3, v2 | 
|  | ; GISEL-NEXT:    global_store_dword v[0:1], v2, off | 
|  | ; GISEL-NEXT:    s_endpgm | 
|  | %res1 = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.upward") | 
|  | %res2 = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %b, metadata !"round.upward") | 
|  | %res3 = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %b, metadata !"round.downward") | 
|  | %res4 = fadd <2 x half> %res1, %res2 | 
|  | %res5 = fadd <2 x half> %res3, %res4 | 
|  | store <2 x half> %res5, ptr addrspace(1) %out, align 4 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %a) { | 
|  | ; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward: | 
|  | ; SDAG:       ; %bb.0: | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v2 | 
|  | ; SDAG-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward: | 
|  | ; GISEL:       ; %bb.0: | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v2 | 
|  | ; GISEL-NEXT:    ; return to shader part epilog | 
|  | %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward") | 
|  | ret <3 x half> %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> %a) { | 
|  | ; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward: | 
|  | ; SDAG:       ; %bb.0: | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v2 | 
|  | ; SDAG-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward: | 
|  | ; GISEL:       ; %bb.0: | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v2 | 
|  | ; GISEL-NEXT:    ; return to shader part epilog | 
|  | %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward") | 
|  | ret <3 x half> %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs <4 x half> @v_fptrunc_round_v4f32_to_v4f16_upward(<4 x float> %a) { | 
|  | ; SDAG-LABEL: v_fptrunc_round_v4f32_to_v4f16_upward: | 
|  | ; SDAG:       ; %bb.0: | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3 | 
|  | ; SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 | 
|  | ; SDAG-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100 | 
|  | ; SDAG-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GISEL-LABEL: v_fptrunc_round_v4f32_to_v4f16_upward: | 
|  | ; GISEL:       ; %bb.0: | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v3, v3 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v1, v2, v3 | 
|  | ; GISEL-NEXT:    ; return to shader part epilog | 
|  | %res = call <4 x half> @llvm.fptrunc.round.v4f16.v4f32(<4 x float> %a, metadata !"round.upward") | 
|  | ret <4 x half> %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs <4 x half> @v_fptrunc_round_v4f32_to_v4f16_downward(<4 x float> %a) { | 
|  | ; SDAG-LABEL: v_fptrunc_round_v4f32_to_v4f16_downward: | 
|  | ; SDAG:       ; %bb.0: | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3 | 
|  | ; SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 | 
|  | ; SDAG-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100 | 
|  | ; SDAG-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GISEL-LABEL: v_fptrunc_round_v4f32_to_v4f16_downward: | 
|  | ; GISEL:       ; %bb.0: | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v3, v3 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v1, v2, v3 | 
|  | ; GISEL-NEXT:    ; return to shader part epilog | 
|  | %res = call <4 x half> @llvm.fptrunc.round.v4f16.v4f32(<4 x float> %a, metadata !"round.downward") | 
|  | ret <4 x half> %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_upward(<8 x float> %a) { | 
|  | ; SDAG-LABEL: v_fptrunc_round_v8f32_to_v8f16_upward: | 
|  | ; SDAG:       ; %bb.0: | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v6, v6 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v4, v4 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v5, v5 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v7, v7 | 
|  | ; SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 | 
|  | ; SDAG-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100 | 
|  | ; SDAG-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100 | 
|  | ; SDAG-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100 | 
|  | ; SDAG-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GISEL-LABEL: v_fptrunc_round_v8f32_to_v8f16_upward: | 
|  | ; GISEL:       ; %bb.0: | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v3, v3 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v4, v4 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v5, v5 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v6, v6 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v7, v7 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v1, v2, v3 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v2, v4, v5 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v3, v6, v7 | 
|  | ; GISEL-NEXT:    ; return to shader part epilog | 
|  | %res = call <8 x half> @llvm.fptrunc.round.v8f16.v8f32(<8 x float> %a, metadata !"round.upward") | 
|  | ret <8 x half> %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_downward(<8 x float> %a) { | 
|  | ; SDAG-LABEL: v_fptrunc_round_v8f32_to_v8f16_downward: | 
|  | ; SDAG:       ; %bb.0: | 
|  | ; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v6, v6 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v4, v4 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v3, v3 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v5, v5 | 
|  | ; SDAG-NEXT:    v_cvt_f16_f32_e32 v7, v7 | 
|  | ; SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 | 
|  | ; SDAG-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100 | 
|  | ; SDAG-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100 | 
|  | ; SDAG-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100 | 
|  | ; SDAG-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GISEL-LABEL: v_fptrunc_round_v8f32_to_v8f16_downward: | 
|  | ; GISEL:       ; %bb.0: | 
|  | ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v3, v3 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v4, v4 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v5, v5 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v6, v6 | 
|  | ; GISEL-NEXT:    v_cvt_f16_f32_e32 v7, v7 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v1, v2, v3 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v2, v4, v5 | 
|  | ; GISEL-NEXT:    v_pack_b32_f16 v3, v6, v7 | 
|  | ; GISEL-NEXT:    ; return to shader part epilog | 
|  | %res = call <8 x half> @llvm.fptrunc.round.v8f16.v8f32(<8 x float> %a, metadata !"round.downward") | 
|  | ret <8 x half> %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs float @v_fptrunc_round_f64_to_f32_tonearest(double %a) { | 
|  | ; CHECK-LABEL: v_fptrunc_round_f64_to_f32_tonearest: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1] | 
|  | ; CHECK-NEXT:    ; return to shader part epilog | 
|  | %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearest") | 
|  | ret float %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs float @v_fptrunc_round_f64_to_f32_upward(double %a) { | 
|  | ; CHECK-LABEL: v_fptrunc_round_f64_to_f32_upward: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 | 
|  | ; CHECK-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1] | 
|  | ; CHECK-NEXT:    ; return to shader part epilog | 
|  | %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.upward") | 
|  | ret float %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs float @v_fptrunc_round_f64_to_f32_downward(double %a) { | 
|  | ; CHECK-LABEL: v_fptrunc_round_f64_to_f32_downward: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 | 
|  | ; CHECK-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1] | 
|  | ; CHECK-NEXT:    ; return to shader part epilog | 
|  | %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward") | 
|  | ret float %res | 
|  | } | 
|  |  | 
|  | define amdgpu_gs float @v_fptrunc_round_f64_to_f32_towardzero(double %a) { | 
|  | ; CHECK-LABEL: v_fptrunc_round_f64_to_f32_towardzero: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 | 
|  | ; CHECK-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1] | 
|  | ; CHECK-NEXT:    ; return to shader part epilog | 
|  | %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.towardzero") | 
|  | ret float %res | 
|  | } |