| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 |
| ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -amdgpu-global-isel-risky-select -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s |
| |
| ; Simples case, if - then, that requires lane mask merging, |
| ; %phi lane mask will hold %val_A at %A. Lanes that are active in %B |
| ; will overwrite its own lane bit in lane mask with val_B |
| define amdgpu_ps void @divergent_i1_phi_if_then(ptr addrspace(1) %out, i32 %tid, i32 %cond) { |
| ; GFX10-LABEL: divergent_i1_phi_if_then: |
| ; GFX10: ; %bb.0: ; %A |
| ; GFX10-NEXT: v_cmp_le_u32_e64 s0, 6, v2 |
| ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 |
| ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo |
| ; GFX10-NEXT: ; %bb.1: ; %B |
| ; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 1, v2 |
| ; GFX10-NEXT: ; %bb.2: ; %exit |
| ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 |
| ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0 |
| ; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2 |
| ; GFX10-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX10-NEXT: s_endpgm |
| A: |
| %val_A = icmp uge i32 %tid, 6 |
| %cmp = icmp eq i32 %cond, 0 |
| br i1 %cmp, label %B, label %exit |
| |
| B: |
| %val_B = icmp ult i32 %tid, 1 |
| br label %exit |
| |
| exit: |
| %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ] |
| %sel = select i1 %phi, i32 1, i32 2 |
| store i32 %sel, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; if - else |
| define amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid, i32 %cond) { |
| ; GFX10-LABEL: divergent_i1_phi_if_else: |
| ; GFX10: ; %bb.0: ; %entry |
| ; GFX10-NEXT: s_and_b32 s0, 1, s0 |
| ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 |
| ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 |
| ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo |
| ; GFX10-NEXT: s_xor_b32 s1, exec_lo, s1 |
| ; GFX10-NEXT: ; %bb.1: ; %B |
| ; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 2, v2 |
| ; GFX10-NEXT: ; implicit-def: $vgpr2 |
| ; GFX10-NEXT: ; %bb.2: ; %Flow |
| ; GFX10-NEXT: s_andn2_saveexec_b32 s1, s1 |
| ; GFX10-NEXT: ; %bb.3: ; %A |
| ; GFX10-NEXT: v_cmp_le_u32_e64 s0, 1, v2 |
| ; GFX10-NEXT: ; %bb.4: ; %exit |
| ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 |
| ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, s0 |
| ; GFX10-NEXT: v_add_nc_u32_e32 v2, 2, v2 |
| ; GFX10-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX10-NEXT: s_endpgm |
| entry: |
| %cmp = icmp eq i32 %cond, 0 |
| br i1 %cmp, label %A, label %B |
| |
| A: |
| %val_A = icmp uge i32 %tid, 1 |
| br label %exit |
| |
| B: |
| %val_B = icmp ult i32 %tid, 2 |
| br label %exit |
| |
| exit: |
| %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ] |
| %sel = select i1 %phi, i32 1, i32 2 |
| store i32 %sel, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; if - break; |
| |
| ; counter = 0; |
| ; do { |
| ; if (a[counter] == 0) |
| ; break; |
| ; if (b[counter] == 0) |
| ; break; |
| ; if (c[counter] == 0) |
| ; break; |
| ; x[counter++]+=1; |
| ; } while (counter<100); |
| |
| ; Tests with multiple break conditions. Divergent phis will be used to track |
| ; if any of the break conditions was reached. We only need to do simple lane |
| ; mask merging (for current loop iteration only). There is an intrinsic, |
| ; if_break, that will merge lane masks across all iterations of the loop. |
| |
| define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a) { |
| ; GFX10-LABEL: loop_with_1break: |
| ; GFX10: ; %bb.0: ; %entry |
| ; GFX10-NEXT: s_mov_b32 s0, 0 |
| ; GFX10-NEXT: v_mov_b32_e32 v4, s0 |
| ; GFX10-NEXT: s_branch .LBB2_2 |
| ; GFX10-NEXT: .LBB2_1: ; %Flow |
| ; GFX10-NEXT: ; in Loop: Header=BB2_2 Depth=1 |
| ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 |
| ; GFX10-NEXT: s_and_b32 s1, exec_lo, s2 |
| ; GFX10-NEXT: s_or_b32 s0, s1, s0 |
| ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 |
| ; GFX10-NEXT: s_cbranch_execz .LBB2_4 |
| ; GFX10-NEXT: .LBB2_2: ; %A |
| ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4 |
| ; GFX10-NEXT: s_mov_b32 s2, -1 |
| ; GFX10-NEXT: v_lshlrev_b64 v[5:6], 2, v[4:5] |
| ; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v2, v5 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v3, v6, vcc_lo |
| ; GFX10-NEXT: global_load_dword v7, v[7:8], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v7 |
| ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo |
| ; GFX10-NEXT: s_cbranch_execz .LBB2_1 |
| ; GFX10-NEXT: ; %bb.3: ; %loop.body |
| ; GFX10-NEXT: ; in Loop: Header=BB2_2 Depth=1 |
| ; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v0, v5 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v1, v6, vcc_lo |
| ; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v4 |
| ; GFX10-NEXT: v_cmp_gt_u32_e64 s2, 0x64, v4 |
| ; GFX10-NEXT: global_load_dword v7, v[5:6], off |
| ; GFX10-NEXT: v_mov_b32_e32 v4, v8 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_add_nc_u32_e32 v7, 1, v7 |
| ; GFX10-NEXT: global_store_dword v[5:6], v7, off |
| ; GFX10-NEXT: s_branch .LBB2_1 |
| ; GFX10-NEXT: .LBB2_4: ; %exit |
| ; GFX10-NEXT: s_endpgm |
| entry: |
| br label %A |
| |
| A: |
| %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] |
| %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter |
| %a.val = load i32, ptr addrspace(1) %a.plus.counter |
| %a.cond = icmp eq i32 %a.val, 0 |
| br i1 %a.cond, label %exit, label %loop.body |
| |
| loop.body: |
| %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter |
| %x.val = load i32, ptr addrspace(1) %x.plus.counter |
| %x.val.plus.1 = add i32 %x.val, 1 |
| store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter |
| %counter.plus.1 = add i32 %counter, 1 |
| %x.cond = icmp ult i32 %counter, 100 |
| br i1 %x.cond, label %exit, label %A |
| |
| exit: |
| ret void |
| } |
| |
| define amdgpu_cs void @loop_with_2breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b) { |
| ; GFX10-LABEL: loop_with_2breaks: |
| ; GFX10: ; %bb.0: ; %entry |
| ; GFX10-NEXT: s_mov_b32 s0, 0 |
| ; GFX10-NEXT: v_mov_b32_e32 v6, s0 |
| ; GFX10-NEXT: s_branch .LBB3_3 |
| ; GFX10-NEXT: .LBB3_1: ; %Flow3 |
| ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GFX10-NEXT: .LBB3_2: ; %Flow |
| ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 |
| ; GFX10-NEXT: s_and_b32 s1, exec_lo, s2 |
| ; GFX10-NEXT: s_or_b32 s0, s1, s0 |
| ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 |
| ; GFX10-NEXT: s_cbranch_execz .LBB3_6 |
| ; GFX10-NEXT: .LBB3_3: ; %A |
| ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6 |
| ; GFX10-NEXT: s_mov_b32 s2, -1 |
| ; GFX10-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] |
| ; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo |
| ; GFX10-NEXT: global_load_dword v9, v[9:10], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 |
| ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo |
| ; GFX10-NEXT: s_cbranch_execz .LBB3_2 |
| ; GFX10-NEXT: ; %bb.4: ; %B |
| ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v4, v7 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo |
| ; GFX10-NEXT: global_load_dword v9, v[9:10], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 |
| ; GFX10-NEXT: s_and_saveexec_b32 s3, vcc_lo |
| ; GFX10-NEXT: s_cbranch_execz .LBB3_1 |
| ; GFX10-NEXT: ; %bb.5: ; %loop.body |
| ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 |
| ; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo |
| ; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6 |
| ; GFX10-NEXT: v_cmp_gt_u32_e64 s2, 0x64, v6 |
| ; GFX10-NEXT: global_load_dword v9, v[7:8], off |
| ; GFX10-NEXT: v_mov_b32_e32 v6, v10 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9 |
| ; GFX10-NEXT: global_store_dword v[7:8], v9, off |
| ; GFX10-NEXT: s_branch .LBB3_1 |
| ; GFX10-NEXT: .LBB3_6: ; %exit |
| ; GFX10-NEXT: s_endpgm |
| entry: |
| br label %A |
| |
| A: |
| %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] |
| %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter |
| %a.val = load i32, ptr addrspace(1) %a.plus.counter |
| %a.cond = icmp eq i32 %a.val, 0 |
| br i1 %a.cond, label %exit, label %B |
| |
| B: |
| %b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter |
| %b.val = load i32, ptr addrspace(1) %b.plus.counter |
| %b.cond = icmp eq i32 %b.val, 0 |
| br i1 %b.cond, label %exit, label %loop.body |
| |
| loop.body: |
| %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter |
| %x.val = load i32, ptr addrspace(1) %x.plus.counter |
| %x.val.plus.1 = add i32 %x.val, 1 |
| store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter |
| %counter.plus.1 = add i32 %counter, 1 |
| %x.cond = icmp ult i32 %counter, 100 |
| br i1 %x.cond, label %exit, label %A |
| |
| exit: |
| ret void |
| } |
| |
| define amdgpu_cs void @loop_with_3breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c) { |
| ; GFX10-LABEL: loop_with_3breaks: |
| ; GFX10: ; %bb.0: ; %entry |
| ; GFX10-NEXT: s_mov_b32 s0, 0 |
| ; GFX10-NEXT: v_mov_b32_e32 v8, s0 |
| ; GFX10-NEXT: s_branch .LBB4_4 |
| ; GFX10-NEXT: .LBB4_1: ; %Flow5 |
| ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 |
| ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 |
| ; GFX10-NEXT: .LBB4_2: ; %Flow4 |
| ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 |
| ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s3 |
| ; GFX10-NEXT: .LBB4_3: ; %Flow |
| ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 |
| ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 |
| ; GFX10-NEXT: s_and_b32 s1, exec_lo, s2 |
| ; GFX10-NEXT: s_or_b32 s0, s1, s0 |
| ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 |
| ; GFX10-NEXT: s_cbranch_execz .LBB4_8 |
| ; GFX10-NEXT: .LBB4_4: ; %A |
| ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX10-NEXT: v_ashrrev_i32_e32 v9, 31, v8 |
| ; GFX10-NEXT: s_mov_b32 s2, -1 |
| ; GFX10-NEXT: v_lshlrev_b64 v[9:10], 2, v[8:9] |
| ; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v2, v9 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v3, v10, vcc_lo |
| ; GFX10-NEXT: global_load_dword v11, v[11:12], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 |
| ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo |
| ; GFX10-NEXT: s_cbranch_execz .LBB4_3 |
| ; GFX10-NEXT: ; %bb.5: ; %B |
| ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 |
| ; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v4, v9 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v5, v10, vcc_lo |
| ; GFX10-NEXT: global_load_dword v11, v[11:12], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 |
| ; GFX10-NEXT: s_and_saveexec_b32 s3, vcc_lo |
| ; GFX10-NEXT: s_cbranch_execz .LBB4_2 |
| ; GFX10-NEXT: ; %bb.6: ; %C |
| ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 |
| ; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v6, v9 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v7, v10, vcc_lo |
| ; GFX10-NEXT: global_load_dword v11, v[11:12], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 |
| ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo |
| ; GFX10-NEXT: s_cbranch_execz .LBB4_1 |
| ; GFX10-NEXT: ; %bb.7: ; %loop.body |
| ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 |
| ; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v0, v9 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v1, v10, vcc_lo |
| ; GFX10-NEXT: v_add_nc_u32_e32 v12, 1, v8 |
| ; GFX10-NEXT: v_cmp_gt_u32_e64 s2, 0x64, v8 |
| ; GFX10-NEXT: global_load_dword v11, v[9:10], off |
| ; GFX10-NEXT: v_mov_b32_e32 v8, v12 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_add_nc_u32_e32 v11, 1, v11 |
| ; GFX10-NEXT: global_store_dword v[9:10], v11, off |
| ; GFX10-NEXT: s_branch .LBB4_1 |
| ; GFX10-NEXT: .LBB4_8: ; %exit |
| ; GFX10-NEXT: s_endpgm |
| entry: |
| br label %A |
| |
| A: |
| %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] |
| %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter |
| %a.val = load i32, ptr addrspace(1) %a.plus.counter |
| %a.cond = icmp eq i32 %a.val, 0 |
| br i1 %a.cond, label %exit, label %B |
| |
| B: |
| %b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter |
| %b.val = load i32, ptr addrspace(1) %b.plus.counter |
| %b.cond = icmp eq i32 %b.val, 0 |
| br i1 %b.cond, label %exit, label %C |
| |
| C: |
| %c.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %c, i32 %counter |
| %c.val = load i32, ptr addrspace(1) %c.plus.counter |
| %c.cond = icmp eq i32 %c.val, 0 |
| br i1 %c.cond, label %exit, label %loop.body |
| |
| loop.body: |
| %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter |
| %x.val = load i32, ptr addrspace(1) %x.plus.counter |
| %x.val.plus.1 = add i32 %x.val, 1 |
| store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter |
| %counter.plus.1 = add i32 %counter, 1 |
| %x.cond = icmp ult i32 %counter, 100 |
| br i1 %x.cond, label %exit, label %A |
| |
| exit: |
| ret void |
| } |
| |
| ; Divergent condition if with body, ending with break. This is loop with two |
| ; exits but structurizer will create phi that will track exit from break |
| ; and move break.body after the loop. Loop will then have one exit and phi |
| ; used outside of the loop by condition used to enter the break.body. |
| define amdgpu_cs void @loop_with_div_break_with_body(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %a.break) { |
| ; GFX10-LABEL: loop_with_div_break_with_body: |
| ; GFX10: ; %bb.0: ; %entry |
| ; GFX10-NEXT: s_mov_b32 s0, 0 |
| ; GFX10-NEXT: v_mov_b32_e32 v6, s0 |
| ; GFX10-NEXT: s_branch .LBB5_2 |
| ; GFX10-NEXT: .LBB5_1: ; %Flow |
| ; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 |
| ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 |
| ; GFX10-NEXT: s_and_b32 s1, exec_lo, s2 |
| ; GFX10-NEXT: s_or_b32 s0, s1, s0 |
| ; GFX10-NEXT: s_and_b32 s1, 1, s3 |
| ; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, s1 |
| ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 |
| ; GFX10-NEXT: s_cbranch_execz .LBB5_4 |
| ; GFX10-NEXT: .LBB5_2: ; %A |
| ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6 |
| ; GFX10-NEXT: s_mov_b32 s2, -1 |
| ; GFX10-NEXT: s_mov_b32 s3, 1 |
| ; GFX10-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] |
| ; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo |
| ; GFX10-NEXT: global_load_dword v9, v[9:10], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 |
| ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo |
| ; GFX10-NEXT: s_cbranch_execz .LBB5_1 |
| ; GFX10-NEXT: ; %bb.3: ; %loop.body |
| ; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 |
| ; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo |
| ; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6 |
| ; GFX10-NEXT: v_cmp_gt_u32_e64 s2, 0x64, v6 |
| ; GFX10-NEXT: s_mov_b32 s3, 0 |
| ; GFX10-NEXT: global_load_dword v9, v[7:8], off |
| ; GFX10-NEXT: v_mov_b32_e32 v6, v10 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9 |
| ; GFX10-NEXT: global_store_dword v[7:8], v9, off |
| ; GFX10-NEXT: s_branch .LBB5_1 |
| ; GFX10-NEXT: .LBB5_4: ; %loop.exit.guard |
| ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
| ; GFX10-NEXT: s_and_saveexec_b32 s0, s1 |
| ; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0 |
| ; GFX10-NEXT: s_cbranch_execz .LBB5_6 |
| ; GFX10-NEXT: ; %bb.5: ; %break.body |
| ; GFX10-NEXT: v_mov_b32_e32 v0, 10 |
| ; GFX10-NEXT: global_store_dword v[4:5], v0, off |
| ; GFX10-NEXT: .LBB5_6: ; %exit |
| ; GFX10-NEXT: s_endpgm |
| entry: |
| br label %A |
| |
| A: |
| %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] |
| %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter |
| %a.val = load i32, ptr addrspace(1) %a.plus.counter |
| %a.cond = icmp eq i32 %a.val, 0 |
| br i1 %a.cond, label %break.body, label %loop.body |
| |
| break.body: |
| store i32 10, ptr addrspace(1) %a.break |
| br label %exit |
| |
| |
| loop.body: |
| %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter |
| %x.val = load i32, ptr addrspace(1) %x.plus.counter |
| %x.val.plus.1 = add i32 %x.val, 1 |
| store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter |
| %counter.plus.1 = add i32 %counter, 1 |
| %x.cond = icmp ult i32 %counter, 100 |
| br i1 %x.cond, label %exit, label %A |
| |
| exit: |
| ret void |
| } |
| |
| ; Snippet from test generated by the GraphicsFuzz tool, frontend generates ir |
| ; with irreducible control flow graph. FixIrreducible converts it into natural |
| ; loop and in the process creates i1 phi with three incoming values. |
| |
| ; int loop(int x, int y, int a0, int a1, int a2, int a3, int a4) { |
| ; do { |
| ; if (y < a2) { |
| ; do { |
| ; } while (x < a2); |
| ; } |
| ; if (x < a3) { |
| ; return a1; |
| ; } |
| ; } while (y < a2); |
| ; return a0; |
| ; } |
| |
| ; This test is also interesting because it has phi with three incomings |
| ;define amdgpu_ps i32 @irreducible_cfg(i32 %x, i32 %y, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { |
| ;.entry: |
| ; %.y_lt_a2 = icmp sgt i32 %a2, %y |
| ; %.x_lt_a2 = icmp sgt i32 %a2, %x |
| ; %.x_lt_a3 = icmp sgt i32 %a3, %x |
| ; br i1 %.y_lt_a2, label %.preheader, label %.loopexit ; first iteration, jump to inner loop if 'y < a2' or start with 'if (x < a3)' |
| ; |
| ;.preheader: ; if (y < a2), |
| ; br label %.inner_loop |
| ; |
| ;.inner_loop: ; do while x < a2 |
| ; br i1 %.x_lt_a2, label %.inner_loop, label %.loopexit |
| ; |
| ;.loopexit: ; if x < a3 |
| ; %not.inner_loop = xor i1 %.y_lt_a2, true |
| ; %brmerge = select i1 %.x_lt_a3, i1 true, i1 %not.inner_loop ; exit loop if 'x < a3' or 'loop ends since !(y < a2)' |
| ; %.ret = select i1 %.x_lt_a3, i32 %a1, i32 %a0 ; select retrun value a1 'x < a3' or a0 'loop ends' |
| ; br i1 %brmerge, label %.exit, label %.preheader |
| ; |
| ;.exit: |
| ; ret i32 %.ret |
| ;} |
| |