|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s | 
|  | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s | 
|  | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s | 
|  | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s | 
|  |  | 
|  | define amdgpu_ps void @v_interp_f32(float inreg %i, float inreg %j, i32 inreg %m0) #0 { | 
|  | ; GFX11-LABEL: v_interp_f32: | 
|  | ; GFX11:       ; %bb.0: ; %main_body | 
|  | ; GFX11-NEXT:    s_mov_b32 s3, exec_lo | 
|  | ; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX11-NEXT:    s_mov_b32 m0, s2 | 
|  | ; GFX11-NEXT:    lds_param_load v0, attr0.y wait_vdst:15 | 
|  | ; GFX11-NEXT:    lds_param_load v1, attr1.x wait_vdst:15 | 
|  | ; GFX11-NEXT:    s_mov_b32 exec_lo, s3 | 
|  | ; GFX11-NEXT:    v_mov_b32_e32 v2, s0 | 
|  | ; GFX11-NEXT:    v_mov_b32_e32 v4, s1 | 
|  | ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) | 
|  | ; GFX11-NEXT:    v_interp_p10_f32 v3, v0, v2, v0 wait_exp:1 | 
|  | ; GFX11-NEXT:    v_interp_p10_f32 v2, v1, v2, v1 wait_exp:0 | 
|  | ; GFX11-NEXT:    v_interp_p2_f32 v5, v0, v4, v3 wait_exp:7 | 
|  | ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | 
|  | ; GFX11-NEXT:    v_interp_p2_f32 v4, v1, v4, v5 wait_exp:7 | 
|  | ; GFX11-NEXT:    exp mrt0 v3, v2, v5, v4 done | 
|  | ; GFX11-NEXT:    s_endpgm | 
|  | ; | 
|  | ; GFX12-LABEL: v_interp_f32: | 
|  | ; GFX12:       ; %bb.0: ; %main_body | 
|  | ; GFX12-NEXT:    s_mov_b32 s3, exec_lo | 
|  | ; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX12-NEXT:    s_mov_b32 m0, s2 | 
|  | ; GFX12-NEXT:    ds_param_load v0, attr0.y wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-NEXT:    ds_param_load v1, attr1.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-NEXT:    s_mov_b32 exec_lo, s3 | 
|  | ; GFX12-NEXT:    v_mov_b32_e32 v2, s0 | 
|  | ; GFX12-NEXT:    v_mov_b32_e32 v4, s1 | 
|  | ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) | 
|  | ; GFX12-NEXT:    v_interp_p10_f32 v3, v0, v2, v0 wait_exp:1 | 
|  | ; GFX12-NEXT:    v_interp_p10_f32 v2, v1, v2, v1 wait_exp:0 | 
|  | ; GFX12-NEXT:    v_interp_p2_f32 v5, v0, v4, v3 wait_exp:7 | 
|  | ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | 
|  | ; GFX12-NEXT:    v_interp_p2_f32 v4, v1, v4, v5 wait_exp:7 | 
|  | ; GFX12-NEXT:    export mrt0 v3, v2, v5, v4 done | 
|  | ; GFX12-NEXT:    s_endpgm | 
|  | main_body: | 
|  | %p0 = call float @llvm.amdgcn.lds.param.load(i32 1, i32 0, i32 %m0) | 
|  | %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0) | 
|  | %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0) | 
|  | %p1_0 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0) | 
|  | %p0_1 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1) | 
|  | %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0) | 
|  | call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_1, float %p1_0, float %p1_1, i1 true, i1 true) #0 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define amdgpu_ps void @v_interp_f32_many(float inreg %i, float inreg %j, i32 inreg %m0) #0 { | 
|  | ; GFX11-LABEL: v_interp_f32_many: | 
|  | ; GFX11:       ; %bb.0: ; %main_body | 
|  | ; GFX11-NEXT:    s_mov_b32 s3, exec_lo | 
|  | ; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX11-NEXT:    s_mov_b32 m0, s2 | 
|  | ; GFX11-NEXT:    lds_param_load v0, attr0.x wait_vdst:15 | 
|  | ; GFX11-NEXT:    lds_param_load v1, attr1.x wait_vdst:15 | 
|  | ; GFX11-NEXT:    lds_param_load v2, attr2.x wait_vdst:15 | 
|  | ; GFX11-NEXT:    lds_param_load v3, attr3.x wait_vdst:15 | 
|  | ; GFX11-NEXT:    s_mov_b32 exec_lo, s3 | 
|  | ; GFX11-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 | 
|  | ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) | 
|  | ; GFX11-NEXT:    v_interp_p10_f32 v6, v0, v4, v0 wait_exp:3 | 
|  | ; GFX11-NEXT:    v_interp_p10_f32 v7, v1, v4, v1 wait_exp:2 | 
|  | ; GFX11-NEXT:    v_interp_p10_f32 v8, v2, v4, v2 wait_exp:1 | 
|  | ; GFX11-NEXT:    v_interp_p10_f32 v4, v3, v4, v3 wait_exp:0 | 
|  | ; GFX11-NEXT:    v_interp_p2_f32 v6, v0, v5, v6 wait_exp:7 | 
|  | ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | 
|  | ; GFX11-NEXT:    v_interp_p2_f32 v7, v1, v5, v7 wait_exp:7 | 
|  | ; GFX11-NEXT:    v_interp_p2_f32 v8, v2, v5, v8 wait_exp:7 | 
|  | ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | 
|  | ; GFX11-NEXT:    v_interp_p2_f32 v4, v3, v5, v4 wait_exp:7 | 
|  | ; GFX11-NEXT:    exp mrt0 v6, v7, v8, v4 done | 
|  | ; GFX11-NEXT:    s_endpgm | 
|  | ; | 
|  | ; GFX12-LABEL: v_interp_f32_many: | 
|  | ; GFX12:       ; %bb.0: ; %main_body | 
|  | ; GFX12-NEXT:    s_mov_b32 s3, exec_lo | 
|  | ; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX12-NEXT:    s_mov_b32 m0, s2 | 
|  | ; GFX12-NEXT:    ds_param_load v0, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-NEXT:    ds_param_load v1, attr1.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-NEXT:    ds_param_load v2, attr2.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-NEXT:    ds_param_load v3, attr3.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-NEXT:    s_mov_b32 exec_lo, s3 | 
|  | ; GFX12-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 | 
|  | ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) | 
|  | ; GFX12-NEXT:    v_interp_p10_f32 v6, v0, v4, v0 wait_exp:3 | 
|  | ; GFX12-NEXT:    v_interp_p10_f32 v7, v1, v4, v1 wait_exp:2 | 
|  | ; GFX12-NEXT:    v_interp_p10_f32 v8, v2, v4, v2 wait_exp:1 | 
|  | ; GFX12-NEXT:    v_interp_p10_f32 v4, v3, v4, v3 wait_exp:0 | 
|  | ; GFX12-NEXT:    v_interp_p2_f32 v6, v0, v5, v6 wait_exp:7 | 
|  | ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | 
|  | ; GFX12-NEXT:    v_interp_p2_f32 v7, v1, v5, v7 wait_exp:7 | 
|  | ; GFX12-NEXT:    v_interp_p2_f32 v8, v2, v5, v8 wait_exp:7 | 
|  | ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | 
|  | ; GFX12-NEXT:    v_interp_p2_f32 v4, v3, v5, v4 wait_exp:7 | 
|  | ; GFX12-NEXT:    export mrt0 v6, v7, v8, v4 done | 
|  | ; GFX12-NEXT:    s_endpgm | 
|  | main_body: | 
|  | %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0) | 
|  | %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0) | 
|  | %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0) | 
|  | %p3 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 3, i32 %m0) | 
|  | %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0) | 
|  | %p0_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0) | 
|  | %p1_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1) | 
|  | %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0) | 
|  | %p2_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p2, float %i, float %p2) | 
|  | %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0) | 
|  | %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3) | 
|  | %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0) | 
|  | call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define amdgpu_ps void @v_interp_f32_many_vm(ptr addrspace(1) %ptr, i32 inreg %m0) #0 { | 
|  | ; GFX11-LABEL: v_interp_f32_many_vm: | 
|  | ; GFX11:       ; %bb.0: ; %main_body | 
|  | ; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off offset:4 | 
|  | ; GFX11-NEXT:    s_mov_b32 m0, s0 | 
|  | ; GFX11-NEXT:    s_mov_b32 s0, exec_lo | 
|  | ; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX11-NEXT:    lds_param_load v2, attr0.x wait_vdst:15 | 
|  | ; GFX11-NEXT:    lds_param_load v3, attr1.x wait_vdst:15 | 
|  | ; GFX11-NEXT:    lds_param_load v4, attr2.x wait_vdst:15 | 
|  | ; GFX11-NEXT:    lds_param_load v5, attr3.x wait_vdst:15 | 
|  | ; GFX11-NEXT:    s_mov_b32 exec_lo, s0 | 
|  | ; GFX11-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; GFX11-NEXT:    v_interp_p10_f32 v6, v2, v0, v2 wait_exp:3 | 
|  | ; GFX11-NEXT:    v_interp_p10_f32 v7, v3, v0, v3 wait_exp:2 | 
|  | ; GFX11-NEXT:    v_interp_p10_f32 v8, v4, v0, v4 wait_exp:1 | 
|  | ; GFX11-NEXT:    v_interp_p10_f32 v0, v5, v0, v5 wait_exp:0 | 
|  | ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | 
|  | ; GFX11-NEXT:    v_interp_p2_f32 v6, v2, v1, v6 wait_exp:7 | 
|  | ; GFX11-NEXT:    v_interp_p2_f32 v7, v3, v1, v7 wait_exp:7 | 
|  | ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | 
|  | ; GFX11-NEXT:    v_interp_p2_f32 v8, v4, v1, v8 wait_exp:7 | 
|  | ; GFX11-NEXT:    v_interp_p2_f32 v0, v5, v1, v0 wait_exp:7 | 
|  | ; GFX11-NEXT:    exp mrt0 v6, v7, v8, v0 done | 
|  | ; GFX11-NEXT:    s_endpgm | 
|  | ; | 
|  | ; GFX12-LABEL: v_interp_f32_many_vm: | 
|  | ; GFX12:       ; %bb.0: ; %main_body | 
|  | ; GFX12-NEXT:    global_load_b64 v[0:1], v[0:1], off offset:4 | 
|  | ; GFX12-NEXT:    s_mov_b32 m0, s0 | 
|  | ; GFX12-NEXT:    s_mov_b32 s0, exec_lo | 
|  | ; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX12-NEXT:    ds_param_load v2, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-NEXT:    ds_param_load v3, attr1.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-NEXT:    ds_param_load v4, attr2.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-NEXT:    ds_param_load v5, attr3.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-NEXT:    s_mov_b32 exec_lo, s0 | 
|  | ; GFX12-NEXT:    s_wait_loadcnt 0x0 | 
|  | ; GFX12-NEXT:    v_interp_p10_f32 v6, v2, v0, v2 wait_exp:3 | 
|  | ; GFX12-NEXT:    v_interp_p10_f32 v7, v3, v0, v3 wait_exp:2 | 
|  | ; GFX12-NEXT:    v_interp_p10_f32 v8, v4, v0, v4 wait_exp:1 | 
|  | ; GFX12-NEXT:    v_interp_p10_f32 v0, v5, v0, v5 wait_exp:0 | 
|  | ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | 
|  | ; GFX12-NEXT:    v_interp_p2_f32 v6, v2, v1, v6 wait_exp:7 | 
|  | ; GFX12-NEXT:    v_interp_p2_f32 v7, v3, v1, v7 wait_exp:7 | 
|  | ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) | 
|  | ; GFX12-NEXT:    v_interp_p2_f32 v8, v4, v1, v8 wait_exp:7 | 
|  | ; GFX12-NEXT:    v_interp_p2_f32 v0, v5, v1, v0 wait_exp:7 | 
|  | ; GFX12-NEXT:    export mrt0 v6, v7, v8, v0 done | 
|  | ; GFX12-NEXT:    s_endpgm | 
|  | main_body: | 
|  | %i.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 1 | 
|  | %i = load float, ptr addrspace(1) %i.ptr, align 4 | 
|  | %j.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 2 | 
|  | %j = load float, ptr addrspace(1) %j.ptr, align 4 | 
|  | %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0) | 
|  | %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0) | 
|  | %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0) | 
|  | %p3 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 3, i32 %m0) | 
|  | %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0) | 
|  | %p0_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0) | 
|  | %p1_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1) | 
|  | %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0) | 
|  | %p2_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p2, float %i, float %p2) | 
|  | %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0) | 
|  | %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3) | 
|  | %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0) | 
|  | call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define amdgpu_ps half @v_interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 { | 
|  | ; GFX11-TRUE16-LABEL: v_interp_f16: | 
|  | ; GFX11-TRUE16:       ; %bb.0: ; %main_body | 
|  | ; GFX11-TRUE16-NEXT:    s_mov_b32 s3, exec_lo | 
|  | ; GFX11-TRUE16-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX11-TRUE16-NEXT:    s_mov_b32 m0, s2 | 
|  | ; GFX11-TRUE16-NEXT:    lds_param_load v1, attr0.x wait_vdst:15 | 
|  | ; GFX11-TRUE16-NEXT:    s_mov_b32 exec_lo, s3 | 
|  | ; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v0, s0 | 
|  | ; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v2, s1 | 
|  | ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) | 
|  | ; GFX11-TRUE16-NEXT:    v_interp_p10_f16_f32 v3, v1.l, v0, v1.l wait_exp:0 | 
|  | ; GFX11-TRUE16-NEXT:    v_interp_p10_f16_f32 v4, v1.h, v0, v1.h wait_exp:7 | 
|  | ; GFX11-TRUE16-NEXT:    v_interp_p2_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7 | 
|  | ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | 
|  | ; GFX11-TRUE16-NEXT:    v_interp_p2_f16_f32 v0.h, v1.h, v2, v4 wait_exp:7 | 
|  | ; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v0.h | 
|  | ; GFX11-TRUE16-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GFX11-FAKE16-LABEL: v_interp_f16: | 
|  | ; GFX11-FAKE16:       ; %bb.0: ; %main_body | 
|  | ; GFX11-FAKE16-NEXT:    s_mov_b32 s3, exec_lo | 
|  | ; GFX11-FAKE16-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX11-FAKE16-NEXT:    s_mov_b32 m0, s2 | 
|  | ; GFX11-FAKE16-NEXT:    lds_param_load v1, attr0.x wait_vdst:15 | 
|  | ; GFX11-FAKE16-NEXT:    s_mov_b32 exec_lo, s3 | 
|  | ; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, s0 | 
|  | ; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v2, s1 | 
|  | ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) | 
|  | ; GFX11-FAKE16-NEXT:    v_interp_p10_f16_f32 v3, v1, v0, v1 wait_exp:0 | 
|  | ; GFX11-FAKE16-NEXT:    v_interp_p10_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7 | 
|  | ; GFX11-FAKE16-NEXT:    v_interp_p2_f16_f32 v3, v1, v2, v3 wait_exp:7 | 
|  | ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | 
|  | ; GFX11-FAKE16-NEXT:    v_interp_p2_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7 | 
|  | ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, v3, v0 | 
|  | ; GFX11-FAKE16-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GFX12-TRUE16-LABEL: v_interp_f16: | 
|  | ; GFX12-TRUE16:       ; %bb.0: ; %main_body | 
|  | ; GFX12-TRUE16-NEXT:    s_mov_b32 s3, exec_lo | 
|  | ; GFX12-TRUE16-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX12-TRUE16-NEXT:    s_mov_b32 m0, s2 | 
|  | ; GFX12-TRUE16-NEXT:    ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-TRUE16-NEXT:    s_mov_b32 exec_lo, s3 | 
|  | ; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v0, s0 | 
|  | ; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v2, s1 | 
|  | ; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) | 
|  | ; GFX12-TRUE16-NEXT:    v_interp_p10_f16_f32 v3, v1.l, v0, v1.l wait_exp:0 | 
|  | ; GFX12-TRUE16-NEXT:    v_interp_p10_f16_f32 v4, v1.h, v0, v1.h wait_exp:7 | 
|  | ; GFX12-TRUE16-NEXT:    v_interp_p2_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7 | 
|  | ; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | 
|  | ; GFX12-TRUE16-NEXT:    v_interp_p2_f16_f32 v0.h, v1.h, v2, v4 wait_exp:7 | 
|  | ; GFX12-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v0.h | 
|  | ; GFX12-TRUE16-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GFX12-FAKE16-LABEL: v_interp_f16: | 
|  | ; GFX12-FAKE16:       ; %bb.0: ; %main_body | 
|  | ; GFX12-FAKE16-NEXT:    s_mov_b32 s3, exec_lo | 
|  | ; GFX12-FAKE16-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX12-FAKE16-NEXT:    s_mov_b32 m0, s2 | 
|  | ; GFX12-FAKE16-NEXT:    ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-FAKE16-NEXT:    s_mov_b32 exec_lo, s3 | 
|  | ; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v0, s0 | 
|  | ; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v2, s1 | 
|  | ; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) | 
|  | ; GFX12-FAKE16-NEXT:    v_interp_p10_f16_f32 v3, v1, v0, v1 wait_exp:0 | 
|  | ; GFX12-FAKE16-NEXT:    v_interp_p10_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7 | 
|  | ; GFX12-FAKE16-NEXT:    v_interp_p2_f16_f32 v3, v1, v2, v3 wait_exp:7 | 
|  | ; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | 
|  | ; GFX12-FAKE16-NEXT:    v_interp_p2_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7 | 
|  | ; GFX12-FAKE16-NEXT:    v_add_f16_e32 v0, v3, v0 | 
|  | ; GFX12-FAKE16-NEXT:    ; return to shader part epilog | 
|  | main_body: | 
|  | %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0) | 
|  | %l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float %p0, float %i, float %p0, i1 0) | 
|  | %l_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float %p0, float %j, float %l_p0, i1 0) | 
|  | %h_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float %p0, float %i, float %p0, i1 1) | 
|  | %h_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float %p0, float %j, float %h_p0, i1 1) | 
|  | %res = fadd half %l_p1, %h_p1 | 
|  | ret half %res | 
|  | } | 
|  |  | 
|  | define amdgpu_ps half @v_interp_rtz_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 { | 
|  | ; GFX11-TRUE16-LABEL: v_interp_rtz_f16: | 
|  | ; GFX11-TRUE16:       ; %bb.0: ; %main_body | 
|  | ; GFX11-TRUE16-NEXT:    s_mov_b32 s3, exec_lo | 
|  | ; GFX11-TRUE16-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX11-TRUE16-NEXT:    s_mov_b32 m0, s2 | 
|  | ; GFX11-TRUE16-NEXT:    lds_param_load v1, attr0.x wait_vdst:15 | 
|  | ; GFX11-TRUE16-NEXT:    s_mov_b32 exec_lo, s3 | 
|  | ; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v0, s0 | 
|  | ; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v2, s1 | 
|  | ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) | 
|  | ; GFX11-TRUE16-NEXT:    v_interp_p10_rtz_f16_f32 v3, v1.l, v0, v1.l wait_exp:0 | 
|  | ; GFX11-TRUE16-NEXT:    v_interp_p10_rtz_f16_f32 v4, v1.h, v0, v1.h wait_exp:7 | 
|  | ; GFX11-TRUE16-NEXT:    v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7 | 
|  | ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | 
|  | ; GFX11-TRUE16-NEXT:    v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v4 wait_exp:7 | 
|  | ; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v0.h | 
|  | ; GFX11-TRUE16-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GFX11-FAKE16-LABEL: v_interp_rtz_f16: | 
|  | ; GFX11-FAKE16:       ; %bb.0: ; %main_body | 
|  | ; GFX11-FAKE16-NEXT:    s_mov_b32 s3, exec_lo | 
|  | ; GFX11-FAKE16-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX11-FAKE16-NEXT:    s_mov_b32 m0, s2 | 
|  | ; GFX11-FAKE16-NEXT:    lds_param_load v1, attr0.x wait_vdst:15 | 
|  | ; GFX11-FAKE16-NEXT:    s_mov_b32 exec_lo, s3 | 
|  | ; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, s0 | 
|  | ; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v2, s1 | 
|  | ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) | 
|  | ; GFX11-FAKE16-NEXT:    v_interp_p10_rtz_f16_f32 v3, v1, v0, v1 wait_exp:0 | 
|  | ; GFX11-FAKE16-NEXT:    v_interp_p10_rtz_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7 | 
|  | ; GFX11-FAKE16-NEXT:    v_interp_p2_rtz_f16_f32 v3, v1, v2, v3 wait_exp:7 | 
|  | ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | 
|  | ; GFX11-FAKE16-NEXT:    v_interp_p2_rtz_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7 | 
|  | ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, v3, v0 | 
|  | ; GFX11-FAKE16-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GFX12-TRUE16-LABEL: v_interp_rtz_f16: | 
|  | ; GFX12-TRUE16:       ; %bb.0: ; %main_body | 
|  | ; GFX12-TRUE16-NEXT:    s_mov_b32 s3, exec_lo | 
|  | ; GFX12-TRUE16-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX12-TRUE16-NEXT:    s_mov_b32 m0, s2 | 
|  | ; GFX12-TRUE16-NEXT:    ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-TRUE16-NEXT:    s_mov_b32 exec_lo, s3 | 
|  | ; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v0, s0 | 
|  | ; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v2, s1 | 
|  | ; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) | 
|  | ; GFX12-TRUE16-NEXT:    v_interp_p10_rtz_f16_f32 v3, v1.l, v0, v1.l wait_exp:0 | 
|  | ; GFX12-TRUE16-NEXT:    v_interp_p10_rtz_f16_f32 v4, v1.h, v0, v1.h wait_exp:7 | 
|  | ; GFX12-TRUE16-NEXT:    v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7 | 
|  | ; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | 
|  | ; GFX12-TRUE16-NEXT:    v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v4 wait_exp:7 | 
|  | ; GFX12-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v0.h | 
|  | ; GFX12-TRUE16-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GFX12-FAKE16-LABEL: v_interp_rtz_f16: | 
|  | ; GFX12-FAKE16:       ; %bb.0: ; %main_body | 
|  | ; GFX12-FAKE16-NEXT:    s_mov_b32 s3, exec_lo | 
|  | ; GFX12-FAKE16-NEXT:    s_wqm_b32 exec_lo, exec_lo | 
|  | ; GFX12-FAKE16-NEXT:    s_mov_b32 m0, s2 | 
|  | ; GFX12-FAKE16-NEXT:    ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 | 
|  | ; GFX12-FAKE16-NEXT:    s_mov_b32 exec_lo, s3 | 
|  | ; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v0, s0 | 
|  | ; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v2, s1 | 
|  | ; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) | 
|  | ; GFX12-FAKE16-NEXT:    v_interp_p10_rtz_f16_f32 v3, v1, v0, v1 wait_exp:0 | 
|  | ; GFX12-FAKE16-NEXT:    v_interp_p10_rtz_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7 | 
|  | ; GFX12-FAKE16-NEXT:    v_interp_p2_rtz_f16_f32 v3, v1, v2, v3 wait_exp:7 | 
|  | ; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | 
|  | ; GFX12-FAKE16-NEXT:    v_interp_p2_rtz_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7 | 
|  | ; GFX12-FAKE16-NEXT:    v_add_f16_e32 v0, v3, v0 | 
|  | ; GFX12-FAKE16-NEXT:    ; return to shader part epilog | 
|  | main_body: | 
|  | %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0) | 
|  | %l_p0 = call float @llvm.amdgcn.interp.p10.rtz.f16(float %p0, float %i, float %p0, i1 0) | 
|  | %l_p1 = call half @llvm.amdgcn.interp.p2.rtz.f16(float %p0, float %j, float %l_p0, i1 0) | 
|  | %h_p0 = call float @llvm.amdgcn.interp.p10.rtz.f16(float %p0, float %i, float %p0, i1 1) | 
|  | %h_p1 = call half @llvm.amdgcn.interp.p2.rtz.f16(float %p0, float %j, float %h_p0, i1 1) | 
|  | %res = fadd half %l_p1, %h_p1 | 
|  | ret half %res | 
|  | } | 
|  |  | 
|  | define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #0 { | 
|  | ; GFX11-TRUE16-LABEL: v_interp_f16_imm_params: | 
|  | ; GFX11-TRUE16:       ; %bb.0: ; %main_body | 
|  | ; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0 | 
|  | ; GFX11-TRUE16-NEXT:    v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, 0 | 
|  | ; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v3, s1 | 
|  | ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) | 
|  | ; GFX11-TRUE16-NEXT:    v_interp_p10_f16_f32 v1, v0.l, v1, v0.l wait_exp:7 | 
|  | ; GFX11-TRUE16-NEXT:    v_interp_p2_f16_f32 v0.l, v0.l, v3, v2 wait_exp:7 | 
|  | ; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | 
|  | ; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.h, v1 | 
|  | ; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.h, v0.l | 
|  | ; GFX11-TRUE16-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GFX11-FAKE16-LABEL: v_interp_f16_imm_params: | 
|  | ; GFX11-FAKE16:       ; %bb.0: ; %main_body | 
|  | ; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 | 
|  | ; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v2, s1 | 
|  | ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) | 
|  | ; GFX11-FAKE16-NEXT:    v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7 | 
|  | ; GFX11-FAKE16-NEXT:    v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7 | 
|  | ; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | 
|  | ; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, v1, v0 | 
|  | ; GFX11-FAKE16-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GFX12-TRUE16-LABEL: v_interp_f16_imm_params: | 
|  | ; GFX12-TRUE16:       ; %bb.0: ; %main_body | 
|  | ; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0 | 
|  | ; GFX12-TRUE16-NEXT:    v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, 0 | 
|  | ; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v3, s1 | 
|  | ; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) | 
|  | ; GFX12-TRUE16-NEXT:    v_interp_p10_f16_f32 v1, v0.l, v1, v0.l wait_exp:7 | 
|  | ; GFX12-TRUE16-NEXT:    v_interp_p2_f16_f32 v0.l, v0.l, v3, v2 wait_exp:7 | 
|  | ; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | 
|  | ; GFX12-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.h, v1 | 
|  | ; GFX12-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.h, v0.l | 
|  | ; GFX12-TRUE16-NEXT:    ; return to shader part epilog | 
|  | ; | 
|  | ; GFX12-FAKE16-LABEL: v_interp_f16_imm_params: | 
|  | ; GFX12-FAKE16:       ; %bb.0: ; %main_body | 
|  | ; GFX12-FAKE16-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 | 
|  | ; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v2, s1 | 
|  | ; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) | 
|  | ; GFX12-FAKE16-NEXT:    v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7 | 
|  | ; GFX12-FAKE16-NEXT:    v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7 | 
|  | ; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) | 
|  | ; GFX12-FAKE16-NEXT:    v_cvt_f16_f32_e32 v1, v1 | 
|  | ; GFX12-FAKE16-NEXT:    v_add_f16_e32 v0, v1, v0 | 
|  | ; GFX12-FAKE16-NEXT:    ; return to shader part epilog | 
|  | main_body: | 
|  | %l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float 0.0, float %i, float 0.0, i1 0) | 
|  | %l_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float 0.0, float %j, float 0.0, i1 0) | 
|  | %h = fptrunc float %l_p0 to half | 
|  | %res = fadd half %h, %l_p1 | 
|  | ret half %res | 
|  | } | 
|  |  | 
|  | declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1 | 
|  | declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #0 | 
|  | declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #0 | 
|  | declare float @llvm.amdgcn.interp.inreg.p10.f16(float, float, float, i1) #0 | 
|  | declare half @llvm.amdgcn.interp.inreg.p2.f16(float, float, float, i1) #0 | 
|  | declare float @llvm.amdgcn.interp.p10.rtz.f16(float, float, float, i1) #0 | 
|  | declare half @llvm.amdgcn.interp.p2.rtz.f16(float, float, float, i1) #0 | 
|  | declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 | 
|  | declare void @llvm.amdgcn.exp.f16(i32, i32, float, float, float, float, i1, i1) #0 | 
|  |  | 
|  | attributes #0 = { nounwind } | 
|  | attributes #1 = { nounwind readnone } |