|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | ; RUN: llc -global-isel=1 -O2 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GISEL12 %s | 
|  | ; RUN: llc -global-isel=0 -O2 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=DAGISEL12 %s | 
|  | ; RUN: llc -global-isel=1 -O2 -mtriple=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GISEL10 %s | 
|  | ; RUN: llc -global-isel=0 -O2 -mtriple=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=DAGISEL10 %s | 
|  |  | 
|  | ; This shouldn't be too different from wave32, so we'll only test one case. | 
|  |  | 
|  | define amdgpu_cs_chain void @basic(<3 x i32> inreg %sgpr, ptr inreg %callee, i64 inreg %exec, { i32, ptr addrspace(5), i32, i64 } %vgpr, i32 %x, i32 %y) { | 
|  | ; GISEL12-LABEL: basic: | 
|  | ; GISEL12:       ; %bb.0: ; %entry | 
|  | ; GISEL12-NEXT:    s_wait_loadcnt_dscnt 0x0 | 
|  | ; GISEL12-NEXT:    s_wait_expcnt 0x0 | 
|  | ; GISEL12-NEXT:    s_wait_samplecnt 0x0 | 
|  | ; GISEL12-NEXT:    s_wait_bvhcnt 0x0 | 
|  | ; GISEL12-NEXT:    s_wait_kmcnt 0x0 | 
|  | ; GISEL12-NEXT:    s_or_saveexec_b64 s[10:11], -1 | 
|  | ; GISEL12-NEXT:    s_mov_b32 s8, s3 | 
|  | ; GISEL12-NEXT:    s_mov_b32 s9, s4 | 
|  | ; GISEL12-NEXT:    s_mov_b32 s4, s5 | 
|  | ; GISEL12-NEXT:    s_mov_b32 s5, s6 | 
|  | ; GISEL12-NEXT:    s_wait_alu 0xfffe | 
|  | ; GISEL12-NEXT:    s_and_saveexec_b64 s[6:7], s[10:11] | 
|  | ; GISEL12-NEXT:    s_cbranch_execz .LBB0_2 | 
|  | ; GISEL12-NEXT:  ; %bb.1: ; %shader | 
|  | ; GISEL12-NEXT:    s_or_saveexec_b64 s[10:11], -1 | 
|  | ; GISEL12-NEXT:    s_wait_alu 0xfffe | 
|  | ; GISEL12-NEXT:    v_cndmask_b32_e64 v0, 0x47, v13, s[10:11] | 
|  | ; GISEL12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) | 
|  | ; GISEL12-NEXT:    v_cmp_ne_u32_e64 s[12:13], 0, v0 | 
|  | ; GISEL12-NEXT:    s_wait_alu 0xf1ff | 
|  | ; GISEL12-NEXT:    v_mov_b32_e32 v0, s12 | 
|  | ; GISEL12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) | 
|  | ; GISEL12-NEXT:    v_mov_b32_e32 v1, s13 | 
|  | ; GISEL12-NEXT:    s_mov_b64 exec, s[10:11] | 
|  | ; GISEL12-NEXT:    v_mov_b32_e32 v11, v0 | 
|  | ; GISEL12-NEXT:    v_add_nc_u32_e32 v10, 42, v13 | 
|  | ; GISEL12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | 
|  | ; GISEL12-NEXT:    v_mov_b32_e32 v12, v1 | 
|  | ; GISEL12-NEXT:  .LBB0_2: ; %tail | 
|  | ; GISEL12-NEXT:    s_wait_alu 0xfffe | 
|  | ; GISEL12-NEXT:    s_or_b64 exec, exec, s[6:7] | 
|  | ; GISEL12-NEXT:    s_mov_b64 exec, s[4:5] | 
|  | ; GISEL12-NEXT:    s_setpc_b64 s[8:9] | 
|  | ; | 
|  | ; DAGISEL12-LABEL: basic: | 
|  | ; DAGISEL12:       ; %bb.0: ; %entry | 
|  | ; DAGISEL12-NEXT:    s_wait_loadcnt_dscnt 0x0 | 
|  | ; DAGISEL12-NEXT:    s_wait_expcnt 0x0 | 
|  | ; DAGISEL12-NEXT:    s_wait_samplecnt 0x0 | 
|  | ; DAGISEL12-NEXT:    s_wait_bvhcnt 0x0 | 
|  | ; DAGISEL12-NEXT:    s_wait_kmcnt 0x0 | 
|  | ; DAGISEL12-NEXT:    s_or_saveexec_b64 s[10:11], -1 | 
|  | ; DAGISEL12-NEXT:    s_mov_b32 s7, s6 | 
|  | ; DAGISEL12-NEXT:    s_mov_b32 s6, s5 | 
|  | ; DAGISEL12-NEXT:    s_mov_b32 s5, s4 | 
|  | ; DAGISEL12-NEXT:    s_mov_b32 s4, s3 | 
|  | ; DAGISEL12-NEXT:    s_wait_alu 0xfffe | 
|  | ; DAGISEL12-NEXT:    s_and_saveexec_b64 s[8:9], s[10:11] | 
|  | ; DAGISEL12-NEXT:  ; %bb.1: ; %shader | 
|  | ; DAGISEL12-NEXT:    s_or_saveexec_b64 s[10:11], -1 | 
|  | ; DAGISEL12-NEXT:    s_wait_alu 0xfffe | 
|  | ; DAGISEL12-NEXT:    v_cndmask_b32_e64 v0, 0x47, v13, s[10:11] | 
|  | ; DAGISEL12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | 
|  | ; DAGISEL12-NEXT:    v_cmp_ne_u32_e64 s[12:13], 0, v0 | 
|  | ; DAGISEL12-NEXT:    s_mov_b64 exec, s[10:11] | 
|  | ; DAGISEL12-NEXT:    v_mov_b32_e32 v11, s12 | 
|  | ; DAGISEL12-NEXT:    v_add_nc_u32_e32 v10, 42, v13 | 
|  | ; DAGISEL12-NEXT:    v_mov_b32_e32 v12, s13 | 
|  | ; DAGISEL12-NEXT:  ; %bb.2: ; %tail | 
|  | ; DAGISEL12-NEXT:    s_or_b64 exec, exec, s[8:9] | 
|  | ; DAGISEL12-NEXT:    s_mov_b64 exec, s[6:7] | 
|  | ; DAGISEL12-NEXT:    s_setpc_b64 s[4:5] | 
|  | ; | 
|  | ; GISEL10-LABEL: basic: | 
|  | ; GISEL10:       ; %bb.0: ; %entry | 
|  | ; GISEL10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; GISEL10-NEXT:    s_or_saveexec_b64 s[10:11], -1 | 
|  | ; GISEL10-NEXT:    s_mov_b32 s8, s3 | 
|  | ; GISEL10-NEXT:    s_mov_b32 s9, s4 | 
|  | ; GISEL10-NEXT:    s_mov_b32 s4, s5 | 
|  | ; GISEL10-NEXT:    s_mov_b32 s5, s6 | 
|  | ; GISEL10-NEXT:    s_and_saveexec_b64 s[6:7], s[10:11] | 
|  | ; GISEL10-NEXT:    s_cbranch_execz .LBB0_2 | 
|  | ; GISEL10-NEXT:  ; %bb.1: ; %shader | 
|  | ; GISEL10-NEXT:    s_or_saveexec_b64 s[10:11], -1 | 
|  | ; GISEL10-NEXT:    v_cndmask_b32_e64 v0, 0x47, v13, s[10:11] | 
|  | ; GISEL10-NEXT:    v_cmp_ne_u32_e64 s[12:13], 0, v0 | 
|  | ; GISEL10-NEXT:    v_mov_b32_e32 v0, s12 | 
|  | ; GISEL10-NEXT:    v_mov_b32_e32 v1, s13 | 
|  | ; GISEL10-NEXT:    s_mov_b64 exec, s[10:11] | 
|  | ; GISEL10-NEXT:    v_mov_b32_e32 v11, v0 | 
|  | ; GISEL10-NEXT:    v_add_nc_u32_e32 v10, 42, v13 | 
|  | ; GISEL10-NEXT:    v_mov_b32_e32 v12, v1 | 
|  | ; GISEL10-NEXT:  .LBB0_2: ; %tail | 
|  | ; GISEL10-NEXT:    s_or_b64 exec, exec, s[6:7] | 
|  | ; GISEL10-NEXT:    s_mov_b64 exec, s[4:5] | 
|  | ; GISEL10-NEXT:    s_setpc_b64 s[8:9] | 
|  | ; | 
|  | ; DAGISEL10-LABEL: basic: | 
|  | ; DAGISEL10:       ; %bb.0: ; %entry | 
|  | ; DAGISEL10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; DAGISEL10-NEXT:    s_or_saveexec_b64 s[10:11], -1 | 
|  | ; DAGISEL10-NEXT:    s_mov_b32 s7, s6 | 
|  | ; DAGISEL10-NEXT:    s_mov_b32 s6, s5 | 
|  | ; DAGISEL10-NEXT:    s_mov_b32 s5, s4 | 
|  | ; DAGISEL10-NEXT:    s_mov_b32 s4, s3 | 
|  | ; DAGISEL10-NEXT:    s_and_saveexec_b64 s[8:9], s[10:11] | 
|  | ; DAGISEL10-NEXT:  ; %bb.1: ; %shader | 
|  | ; DAGISEL10-NEXT:    s_or_saveexec_b64 s[10:11], -1 | 
|  | ; DAGISEL10-NEXT:    v_cndmask_b32_e64 v0, 0x47, v13, s[10:11] | 
|  | ; DAGISEL10-NEXT:    v_cmp_ne_u32_e64 s[12:13], 0, v0 | 
|  | ; DAGISEL10-NEXT:    s_mov_b64 exec, s[10:11] | 
|  | ; DAGISEL10-NEXT:    v_mov_b32_e32 v11, s12 | 
|  | ; DAGISEL10-NEXT:    v_add_nc_u32_e32 v10, 42, v13 | 
|  | ; DAGISEL10-NEXT:    v_mov_b32_e32 v12, s13 | 
|  | ; DAGISEL10-NEXT:  ; %bb.2: ; %tail | 
|  | ; DAGISEL10-NEXT:    s_or_b64 exec, exec, s[8:9] | 
|  | ; DAGISEL10-NEXT:    s_mov_b64 exec, s[6:7] | 
|  | ; DAGISEL10-NEXT:    s_setpc_b64 s[4:5] | 
|  | entry: | 
|  | %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() | 
|  | br i1 %entry_exec, label %shader, label %tail | 
|  |  | 
|  | shader: | 
|  | %nonwwm = add i32 %x, 42 | 
|  | %vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i64} %vgpr, i32 %nonwwm, 2 | 
|  |  | 
|  | %full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %x, i32 71) | 
|  | %non.zero = icmp ne i32 %full.vgpr, 0 | 
|  | %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %non.zero) | 
|  | %wwm = call i64 @llvm.amdgcn.strict.wwm.i64(i64 %ballot) | 
|  | %vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i64} %vgpr.1, i64 %wwm, 3 | 
|  |  | 
|  | br label %tail | 
|  |  | 
|  | tail: | 
|  | %vgpr.args = phi { i32, ptr addrspace(5), i32, i64} [%vgpr, %entry], [%vgpr.2, %shader] | 
|  | call void(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i64 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i64 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i64 } %vgpr.args, i32 0) | 
|  | unreachable | 
|  | } |