| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s |
| |
| ; The register coalescer introduces a verifier error which later |
| ; results in a crash during scheduling. |
| |
| declare i32 @llvm.amdgcn.workitem.id.x() #0 |
| |
| define amdgpu_kernel void @reg_coalescer_breaks_dead(ptr addrspace(1) nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, i1 %c0) #1 { |
| ; GFX6-LABEL: reg_coalescer_breaks_dead: |
| ; GFX6: ; %bb.0: ; %bb |
| ; GFX6-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX6-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX6-NEXT: s_and_saveexec_b64 s[0:1], vcc |
| ; GFX6-NEXT: s_cbranch_execz .LBB0_2 |
| ; GFX6-NEXT: ; %bb.1: ; %bb3 |
| ; GFX6-NEXT: s_load_dword s2, s[4:5], 0xb |
| ; GFX6-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x9 |
| ; GFX6-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-NEXT: s_ashr_i32 s3, s2, 31 |
| ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 3 |
| ; GFX6-NEXT: s_add_u32 s2, s6, s2 |
| ; GFX6-NEXT: s_addc_u32 s3, s7, s3 |
| ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 |
| ; GFX6-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-NEXT: v_mov_b32_e32 v1, s2 |
| ; GFX6-NEXT: v_mov_b32_e32 v2, s3 |
| ; GFX6-NEXT: .LBB0_2: ; %bb4 |
| ; GFX6-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GFX6-NEXT: s_load_dword s0, s[4:5], 0xe |
| ; GFX6-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-NEXT: s_bitcmp0_b32 s0, 0 |
| ; GFX6-NEXT: s_cbranch_scc1 .LBB0_4 |
| ; GFX6-NEXT: ; %bb.3: ; %bb15 |
| ; GFX6-NEXT: s_mov_b32 m0, -1 |
| ; GFX6-NEXT: ds_write_b64 v0, v[1:2] |
| ; GFX6-NEXT: .LBB0_4: ; %bb16 |
| ; |
| ; GFX8-LABEL: reg_coalescer_breaks_dead: |
| ; GFX8: ; %bb.0: ; %bb |
| ; GFX8-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX8-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX8-NEXT: s_and_saveexec_b64 s[0:1], vcc |
| ; GFX8-NEXT: s_cbranch_execz .LBB0_2 |
| ; GFX8-NEXT: ; %bb.1: ; %bb3 |
| ; GFX8-NEXT: s_load_dword s2, s[4:5], 0x2c |
| ; GFX8-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: s_ashr_i32 s3, s2, 31 |
| ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 3 |
| ; GFX8-NEXT: s_add_u32 s2, s6, s2 |
| ; GFX8-NEXT: s_addc_u32 s3, s7, s3 |
| ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: v_mov_b32_e32 v1, s2 |
| ; GFX8-NEXT: v_mov_b32_e32 v2, s3 |
| ; GFX8-NEXT: .LBB0_2: ; %bb4 |
| ; GFX8-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x38 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: s_bitcmp0_b32 s0, 0 |
| ; GFX8-NEXT: s_cbranch_scc1 .LBB0_4 |
| ; GFX8-NEXT: ; %bb.3: ; %bb15 |
| ; GFX8-NEXT: s_mov_b32 m0, -1 |
| ; GFX8-NEXT: ds_write_b64 v0, v[1:2] |
| ; GFX8-NEXT: .LBB0_4: ; %bb16 |
| bb: |
| %id.x = call i32 @llvm.amdgcn.workitem.id.x() |
| %cmp0 = icmp eq i32 %id.x, 0 |
| br i1 %cmp0, label %bb3, label %bb4 |
| |
| bb3: ; preds = %bb |
| %tmp = ashr exact i32 poison, 8 |
| br label %bb6 |
| |
| bb4: ; preds = %bb6, %bb |
| %tmp5 = phi <2 x i32> [ zeroinitializer, %bb ], [ %tmp13, %bb6 ] |
| br i1 %c0, label %bb15, label %bb16 |
| |
| bb6: ; preds = %bb6, %bb3 |
| %tmp7 = phi <2 x i32> [ zeroinitializer, %bb3 ], [ %tmp13, %bb6 ] |
| %tmp8 = add nsw i32 0, %arg1 |
| %tmp9 = add nsw i32 %tmp8, 0 |
| %tmp10 = sext i32 %tmp9 to i64 |
| %tmp11 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %arg, i64 %tmp10 |
| %tmp12 = load <2 x i32>, ptr addrspace(1) %tmp11, align 8 |
| %tmp13 = add <2 x i32> %tmp12, %tmp7 |
| %tmp14 = icmp slt i32 poison, %arg2 |
| br i1 %tmp14, label %bb6, label %bb4 |
| |
| bb15: ; preds = %bb4 |
| store <2 x i32> %tmp5, ptr addrspace(3) poison, align 8 |
| br label %bb16 |
| |
| bb16: ; preds = %bb15, %bb4 |
| unreachable |
| } |
| |
| attributes #0 = { nounwind readnone } |
| attributes #1 = { nounwind } |