| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel=1 -O2 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefix=GISEL12 %s |
| ; RUN: llc -global-isel=0 -O2 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefix=DAGISEL12 %s |
| ; RUN: llc -global-isel=1 -O2 -mtriple=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefix=GISEL10 %s |
| ; RUN: llc -global-isel=0 -O2 -mtriple=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefix=DAGISEL10 %s |
| |
| ; This shouldn't be too different from wave32, so we'll only test one case. |
| |
| define amdgpu_cs_chain void @basic(<3 x i32> inreg %sgpr, ptr inreg %callee, i64 inreg %exec, { i32, ptr addrspace(5), i32, i64 } %vgpr, i32 %x, i32 %y) { |
| ; GISEL12-LABEL: basic: |
| ; GISEL12: ; %bb.0: ; %entry |
| ; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GISEL12-NEXT: s_wait_expcnt 0x0 |
| ; GISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; GISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; GISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; GISEL12-NEXT: s_or_saveexec_b64 s[10:11], -1 |
| ; GISEL12-NEXT: s_mov_b32 s8, s3 |
| ; GISEL12-NEXT: s_mov_b32 s9, s4 |
| ; GISEL12-NEXT: s_mov_b32 s4, s5 |
| ; GISEL12-NEXT: s_mov_b32 s5, s6 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_and_saveexec_b64 s[6:7], s[10:11] |
| ; GISEL12-NEXT: s_cbranch_execz .LBB0_2 |
| ; GISEL12-NEXT: ; %bb.1: ; %shader |
| ; GISEL12-NEXT: s_or_saveexec_b64 s[10:11], -1 |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v13, s[10:11] |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_2) |
| ; GISEL12-NEXT: v_cmp_ne_u32_e64 s[12:13], 0, v0 |
| ; GISEL12-NEXT: s_wait_alu 0xf1ff |
| ; GISEL12-NEXT: v_mov_b32_e32 v0, s12 |
| ; GISEL12-NEXT: v_mov_b32_e32 v1, s13 |
| ; GISEL12-NEXT: s_mov_b64 exec, s[10:11] |
| ; GISEL12-NEXT: v_mov_b32_e32 v11, v0 |
| ; GISEL12-NEXT: v_add_nc_u32_e32 v10, 42, v13 |
| ; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_3) |
| ; GISEL12-NEXT: v_mov_b32_e32 v12, v1 |
| ; GISEL12-NEXT: .LBB0_2: ; %tail |
| ; GISEL12-NEXT: s_wait_alu 0xfffe |
| ; GISEL12-NEXT: s_or_b64 exec, exec, s[6:7] |
| ; GISEL12-NEXT: s_mov_b64 exec, s[4:5] |
| ; GISEL12-NEXT: s_setpc_b64 s[8:9] |
| ; |
| ; DAGISEL12-LABEL: basic: |
| ; DAGISEL12: ; %bb.0: ; %entry |
| ; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_expcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_samplecnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 |
| ; DAGISEL12-NEXT: s_wait_kmcnt 0x0 |
| ; DAGISEL12-NEXT: s_or_saveexec_b64 s[10:11], -1 |
| ; DAGISEL12-NEXT: s_mov_b32 s7, s6 |
| ; DAGISEL12-NEXT: s_mov_b32 s6, s5 |
| ; DAGISEL12-NEXT: s_mov_b32 s5, s4 |
| ; DAGISEL12-NEXT: s_mov_b32 s4, s3 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: s_and_saveexec_b64 s[8:9], s[10:11] |
| ; DAGISEL12-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL12-NEXT: s_or_saveexec_b64 s[10:11], -1 |
| ; DAGISEL12-NEXT: s_wait_alu 0xfffe |
| ; DAGISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v13, s[10:11] |
| ; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s[12:13], 0, v0 |
| ; DAGISEL12-NEXT: s_mov_b64 exec, s[10:11] |
| ; DAGISEL12-NEXT: v_mov_b32_e32 v11, s12 |
| ; DAGISEL12-NEXT: v_add_nc_u32_e32 v10, 42, v13 |
| ; DAGISEL12-NEXT: v_mov_b32_e32 v12, s13 |
| ; DAGISEL12-NEXT: ; %bb.2: ; %tail |
| ; DAGISEL12-NEXT: s_or_b64 exec, exec, s[8:9] |
| ; DAGISEL12-NEXT: s_mov_b64 exec, s[6:7] |
| ; DAGISEL12-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; GISEL10-LABEL: basic: |
| ; GISEL10: ; %bb.0: ; %entry |
| ; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL10-NEXT: s_or_saveexec_b64 s[10:11], -1 |
| ; GISEL10-NEXT: s_mov_b32 s8, s3 |
| ; GISEL10-NEXT: s_mov_b32 s9, s4 |
| ; GISEL10-NEXT: s_mov_b32 s4, s5 |
| ; GISEL10-NEXT: s_mov_b32 s5, s6 |
| ; GISEL10-NEXT: s_and_saveexec_b64 s[6:7], s[10:11] |
| ; GISEL10-NEXT: s_cbranch_execz .LBB0_2 |
| ; GISEL10-NEXT: ; %bb.1: ; %shader |
| ; GISEL10-NEXT: s_or_saveexec_b64 s[10:11], -1 |
| ; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v13, s[10:11] |
| ; GISEL10-NEXT: v_cmp_ne_u32_e64 s[12:13], 0, v0 |
| ; GISEL10-NEXT: v_mov_b32_e32 v0, s12 |
| ; GISEL10-NEXT: v_mov_b32_e32 v1, s13 |
| ; GISEL10-NEXT: s_mov_b64 exec, s[10:11] |
| ; GISEL10-NEXT: v_mov_b32_e32 v11, v0 |
| ; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v13 |
| ; GISEL10-NEXT: v_mov_b32_e32 v12, v1 |
| ; GISEL10-NEXT: .LBB0_2: ; %tail |
| ; GISEL10-NEXT: s_or_b64 exec, exec, s[6:7] |
| ; GISEL10-NEXT: s_mov_b64 exec, s[4:5] |
| ; GISEL10-NEXT: s_setpc_b64 s[8:9] |
| ; |
| ; DAGISEL10-LABEL: basic: |
| ; DAGISEL10: ; %bb.0: ; %entry |
| ; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL10-NEXT: s_or_saveexec_b64 s[10:11], -1 |
| ; DAGISEL10-NEXT: s_mov_b32 s7, s6 |
| ; DAGISEL10-NEXT: s_mov_b32 s6, s5 |
| ; DAGISEL10-NEXT: s_mov_b32 s5, s4 |
| ; DAGISEL10-NEXT: s_mov_b32 s4, s3 |
| ; DAGISEL10-NEXT: s_and_saveexec_b64 s[8:9], s[10:11] |
| ; DAGISEL10-NEXT: ; %bb.1: ; %shader |
| ; DAGISEL10-NEXT: s_or_saveexec_b64 s[10:11], -1 |
| ; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v13, s[10:11] |
| ; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s[12:13], 0, v0 |
| ; DAGISEL10-NEXT: s_mov_b64 exec, s[10:11] |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v11, s12 |
| ; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v13 |
| ; DAGISEL10-NEXT: v_mov_b32_e32 v12, s13 |
| ; DAGISEL10-NEXT: ; %bb.2: ; %tail |
| ; DAGISEL10-NEXT: s_or_b64 exec, exec, s[8:9] |
| ; DAGISEL10-NEXT: s_mov_b64 exec, s[6:7] |
| ; DAGISEL10-NEXT: s_setpc_b64 s[4:5] |
| entry: |
| %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() |
| br i1 %entry_exec, label %shader, label %tail |
| |
| shader: |
| %nonwwm = add i32 %x, 42 |
| %vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i64} %vgpr, i32 %nonwwm, 2 |
| |
| %full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %x, i32 71) |
| %non.zero = icmp ne i32 %full.vgpr, 0 |
| %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %non.zero) |
| %wwm = call i64 @llvm.amdgcn.strict.wwm.i64(i64 %ballot) |
| %vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i64} %vgpr.1, i64 %wwm, 3 |
| |
| br label %tail |
| |
| tail: |
| %vgpr.args = phi { i32, ptr addrspace(5), i32, i64} [%vgpr, %entry], [%vgpr.2, %shader] |
| call void(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i64 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i64 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i64 } %vgpr.args, i32 0) |
| unreachable |
| } |