| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GCN %s |
| |
| --- |
| name: vmem_scratch_load |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0 |
| ; GCN-LABEL: name: vmem_scratch_load |
| ; GCN: liveins: $vgpr0 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec |
| ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr2, killed $vgpr2, implicit $mode, implicit $exec |
| $vgpr1 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec |
| $vgpr3 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr2, killed $vgpr2, implicit $mode, implicit $exec |
| ... |
| |
| --- |
| name: vmem_buffer_load_dword_offset |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-LABEL: name: vmem_buffer_load_dword_offset |
| ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec |
| ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr2, killed $vgpr2, implicit $mode, implicit $exec |
| $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec |
| $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec |
| $vgpr3 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr2, killed $vgpr2, implicit $mode, implicit $exec |
| ... |
| |
| --- |
| name: vmem_buffer_load_addr |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-LABEL: name: vmem_buffer_load_addr |
| ; GCN: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec |
| ; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec |
| $vgpr2 = BUFFER_LOAD_DWORD_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec |
| $vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec |
| $vgpr4 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec |
| ... |
| |
| --- |
| name: vmem_flat_load |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| ; GCN-LABEL: name: vmem_flat_load |
| ; GCN: liveins: $vgpr0, $vgpr1 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec |
| ; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec |
| $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) |
| $vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec |
| $vgpr4 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr0 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec |
| ... |
| |
| --- |
| name: vmem_global_load |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| ; GCN-LABEL: name: vmem_global_load |
| ; GCN: liveins: $vgpr0, $vgpr1 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec |
| ; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec |
| $vgpr2 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec:: (load (s32) from `float addrspace(1)* undef`, align 4, addrspace 1) |
| $vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec |
| $vgpr4 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr0 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec |
| ... |
| |
| --- |
| name: vmem_global_store |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 |
| ; GCN-LABEL: name: vmem_global_store |
| ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_XCNT 0 |
| ; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec |
| GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec |
| ... |
| |
| --- |
| name: vmem_buffer_store |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 |
| stackPtrOffsetReg: $sgpr32 |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| ; GCN-LABEL: name: vmem_buffer_store |
| ; GCN: liveins: $vgpr0, $vgpr1 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: S_WAIT_LOADCNT_DSCNT 0 |
| ; GCN-NEXT: S_WAIT_KMCNT 0 |
| ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_XCNT 0 |
| ; GCN-NEXT: $vgpr0 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec |
| BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec |
| $vgpr0 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec |
| ... |
| |
| --- |
| name: vmem_scratch_store |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| stackPtrOffsetReg: '$sgpr32' |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; GCN-LABEL: name: vmem_scratch_store |
| ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: S_WAIT_LOADCNT_DSCNT 0 |
| ; GCN-NEXT: S_WAIT_KMCNT 0 |
| ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: S_WAIT_XCNT 0 |
| ; GCN-NEXT: $vgpr1 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec |
| SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec |
| ... |
| |
| --- |
| name: smem_load |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $sgpr2_sgpr3 |
| ; GCN-LABEL: name: smem_load |
| ; GCN: liveins: $sgpr2_sgpr3 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr2_sgpr3, 0, 0 :: (load (s64), addrspace 4) |
| ; GCN-NEXT: S_WAIT_XCNT 0 |
| ; GCN-NEXT: $sgpr2 = S_MOV_B32 0 |
| $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr2_sgpr3, 0, 0 :: (load (s64), addrspace 4) |
| $sgpr2 = S_MOV_B32 0 |
| ... |
| |
| --- |
| name: smem_store |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $sgpr0, $sgpr2, $sgpr3 |
| ; GCN-LABEL: name: smem_store |
| ; GCN: liveins: $sgpr0, $sgpr2, $sgpr3 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr0, $sgpr2_sgpr3, 0, 0 |
| ; GCN-NEXT: S_WAIT_XCNT 0 |
| ; GCN-NEXT: $sgpr3 = S_MOV_B32 0 |
| S_STORE_DWORD_IMM $sgpr0, $sgpr2_sgpr3, 0, 0 |
| $sgpr3 = S_MOV_B32 0 |
| ... |
| |
| # 4 global_load instructions together form a load-group. |
| |
| --- |
| name: vmem_load_group |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr10 |
| ; GCN-LABEL: name: vmem_load_group |
| ; GCN: liveins: $vgpr0, $vgpr1, $vgpr10 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec |
| ; GCN-NEXT: $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec |
| ; GCN-NEXT: $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 2 |
| ; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec |
| $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec |
| $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec |
| $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec |
| $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec |
| $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec |
| ... |
| |
| # The contiguous stores form a single group. |
| |
| --- |
| name: vmem_store_group |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 |
| ; GCN-LABEL: name: vmem_store_group |
| ; GCN: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec |
| $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec |
| $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec |
| ... |
| |
| --- |
| name: smem_load_group |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1 |
| ; GCN-LABEL: name: smem_load_group |
| ; GCN: liveins: $sgpr0_sgpr1 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4) |
| ; GCN-NEXT: $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4) |
| ; GCN-NEXT: $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4) |
| ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4) |
| ; GCN-NEXT: S_WAIT_KMCNT 0 |
| ; GCN-NEXT: $sgpr2 = S_MOV_B32 0 |
| $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4) |
| $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4) |
| $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4) |
| $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4) |
| $sgpr2 = S_MOV_B32 0 |
| ... |
| |
| --- |
| name: smem_store_group |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5 |
| ; GCN-LABEL: name: smem_store_group |
| ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr2, $sgpr0_sgpr1, 0, 0 |
| ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr3, $sgpr0_sgpr1, 0, 0 |
| ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr4, $sgpr0_sgpr1, 0, 0 |
| ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr0_sgpr1, 0, 0 |
| ; GCN-NEXT: S_WAIT_XCNT 0 |
| ; GCN-NEXT: $sgpr2 = S_MOV_B32 0 |
| ; GCN-NEXT: $sgpr3 = S_MOV_B32 0 |
| S_STORE_DWORD_IMM $sgpr2, $sgpr0_sgpr1, 0, 0 |
| S_STORE_DWORD_IMM $sgpr3, $sgpr0_sgpr1, 0, 0 |
| S_STORE_DWORD_IMM $sgpr4, $sgpr0_sgpr1, 0, 0 |
| S_STORE_DWORD_IMM $sgpr5, $sgpr0_sgpr1, 0, 0 |
| $sgpr2 = S_MOV_B32 0 |
| $sgpr3 = S_MOV_B32 0 |
| ... |
| |
| # The four global_load instructions form two separate groups due to the interveing s_nop. |
| |
| --- |
| name: vmem_loads_with_an_intervening_nop |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr10 |
| ; GCN-LABEL: name: vmem_loads_with_an_intervening_nop |
| ; GCN: liveins: $vgpr0, $vgpr1, $vgpr10 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec |
| ; GCN-NEXT: S_NOP 0 |
| ; GCN-NEXT: $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec |
| ; GCN-NEXT: $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 2 |
| ; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec |
| $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec |
| $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec |
| S_NOP 0 |
| $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec |
| $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec |
| $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec |
| ... |
| |
| --- |
| name: vmem_contiguous_loads_with_an_intervening_store |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr10 |
| ; GCN-LABEL: name: vmem_contiguous_loads_with_an_intervening_store |
| ; GCN: liveins: $vgpr0, $vgpr1, $vgpr10 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 1 |
| ; GCN-NEXT: GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 32, 0, implicit $exec |
| ; GCN-NEXT: $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec |
| ; GCN-NEXT: $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 2 |
| ; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec |
| $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec |
| $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec |
| GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 32, 0, implicit $exec |
| $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec |
| $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec |
| $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec |
| ... |
| |
| --- |
| name: vmem_stores_with_intervening_nop |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 |
| ; GCN-LABEL: name: vmem_stores_with_intervening_nop |
| ; GCN: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec |
| ; GCN-NEXT: S_NOP 0 |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec |
| $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec |
| S_NOP 0 |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec |
| $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec |
| ... |
| |
| # The intervening load breaks the store group and form two distict store groups. |
| |
| --- |
| name: vmem_contiguous_stores_with_an_intervening_load |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 |
| ; GCN-LABEL: name: vmem_contiguous_stores_with_an_intervening_load |
| ; GCN: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr11 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec |
| $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec |
| $vgpr11 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec |
| $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec |
| ... |
| |
| # Atomic operations should not form a group. But they are memory instructions and should increment |
| # the xcnt counter value as they might cause register dependnecy. This test ensures S_WAIT_XCNT |
| # insertion for such cases. |
| |
| --- |
| name: atomic_op |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-LABEL: name: atomic_op |
| ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_ADDR64_RTN $vgpr2, $vgpr0_vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 1, implicit $exec :: (load store (s32), addrspace 1) |
| ; GCN-NEXT: GLOBAL_ATOMIC_ADD_F32 $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, implicit $exec :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1) |
| ; GCN-NEXT: $vgpr6 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: S_WAIT_XCNT 2 |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_XCNT 1 |
| ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr2 = BUFFER_ATOMIC_ADD_ADDR64_RTN $vgpr2, $vgpr0_vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 1, implicit $exec :: (load store (s32), addrspace 1) |
| GLOBAL_ATOMIC_ADD_F32 $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, implicit $exec :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1) |
| $vgpr6 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_MOV_B32_e32 0, implicit $exec |
| $vgpr3 = V_MOV_B32_e32 1, implicit $exec |
| ... |
| |
| # Force insert S_WAIT_XCNT 0 for dependency in SMEM instruction even though |
| # there is a pending VMEM dependency. |
| |
| --- |
| name: smem_xcnt_insertion_with_pending_vmem_event |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1 |
| ; GCN-LABEL: name: smem_xcnt_insertion_with_pending_vmem_event |
| ; GCN: liveins: $sgpr0_sgpr1 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4) |
| ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 2, implicit $exec |
| ; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: $vgpr5 = V_MOV_B32_e32 4, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr4_vgpr5, 16, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_KMCNT 0 |
| ; GCN-NEXT: $sgpr2 = S_ADD_I32 $sgpr0, 100, implicit-def $scc |
| ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 20, implicit $exec |
| $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4) |
| $vgpr0 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 2, implicit $exec |
| $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec |
| $vgpr4 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr5 = V_MOV_B32_e32 4, implicit $exec |
| GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr4_vgpr5, 16, 0, implicit $exec |
| $sgpr2 = S_ADD_I32 $sgpr0, 100, implicit-def $scc |
| $vgpr0 = V_MOV_B32_e32 20, implicit $exec |
| ... |
| |
| # The second instruction in the flat_load group has a WAR dependency with a prior |
| # memory opeartion (scratch_load instruction). |
| |
| --- |
| name: vmem_group_reg_dependency_with_prior_instruction |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr4, $vgpr5 |
| ; GCN-LABEL: name: vmem_group_reg_dependency_with_prior_instruction |
| ; GCN: liveins: $vgpr4, $vgpr5 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr4, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) |
| ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) |
| ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr0 = SCRATCH_LOAD_DWORD $vgpr4, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec |
| $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) |
| $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) |
| $vgpr3 = V_MOV_B32_e32 1, implicit $exec |
| ... |
| |
| # Two instructions inside the load group have dependencies with prior instructions. |
| |
| --- |
| name: multiple_xcnt_insertion_in_group |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr3, $vgpr4, $vgpr5 |
| ; GCN-LABEL: name: multiple_xcnt_insertion_in_group |
| ; GCN: liveins: $vgpr3, $vgpr4, $vgpr5 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr4, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr3, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr6 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) |
| ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) |
| ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) |
| ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr0 = SCRATCH_LOAD_DWORD $vgpr4, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec |
| $vgpr2 = SCRATCH_LOAD_DWORD $vgpr3, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr6 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) |
| $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) |
| $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) |
| $vgpr8 = V_MOV_B32_e32 1, implicit $exec |
| ... |
| |
| --- |
| name: xcnt_event_post_load_group |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr10 |
| ; GCN-LABEL: name: xcnt_event_post_load_group |
| ; GCN: liveins: $vgpr0, $vgpr1, $vgpr10 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec |
| ; GCN-NEXT: $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec |
| ; GCN-NEXT: $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 3 |
| ; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr11 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 2 |
| ; GCN-NEXT: $vgpr6 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_XCNT 1 |
| ; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec |
| $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec |
| $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec |
| $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec |
| $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec |
| GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| $vgpr11 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr3 = V_MOV_B32_e32 0, implicit $exec |
| $vgpr6 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec |
| ... |
| |
| # The three V_MOV_B32 instructions waiting outside the group needs appropriate wait_xcnt |
| # insertion as their dst registers have dependencies with instructions inside the group. |
| |
| --- |
| name: xcnt_event_post_store_group |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 |
| ; GCN-LABEL: name: xcnt_event_post_store_group |
| ; GCN: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr11 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_XCNT 8 |
| ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_XCNT 6 |
| ; GCN-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_XCNT 4 |
| ; GCN-NEXT: $vgpr7 = V_MOV_B32_e32 2, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr11 = V_LSHLREV_B32_e64 16, $vgpr10, implicit $exec |
| $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec |
| $vgpr11 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr |
| GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| $vgpr3 = V_MOV_B32_e32 0, implicit $exec |
| $vgpr5 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr7 = V_MOV_B32_e32 2, implicit $exec |
| $vgpr11 = V_LSHLREV_B32_e64 16, $vgpr10, implicit $exec |
| ... |
| |
| # This test captures the case that interleaving load store operations form separate groups. |
| # The registers in V_MOV_B32 are all have dependency with these independent groups and |
| # should have the wait_xcnt insertion with appropriate wait values. |
| |
| --- |
| name: load_store_switching |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 |
| ; GCN-LABEL: name: load_store_switching |
| ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr5, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: $vgpr1 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr7 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, $vgpr4, 0, 0, implicit $exec |
| ; GCN-NEXT: $vgpr8 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr2_vgpr3, $vgpr5, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 1 |
| ; GCN-NEXT: $vgpr7 = V_MOV_B32_e32 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_XCNT 2 |
| ; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 2, implicit $exec |
| ; GCN-NEXT: S_WAIT_XCNT 0 |
| ; GCN-NEXT: $vgpr5 = V_MOV_B32_e32 3, implicit $exec |
| $vgpr0 = SCRATCH_LOAD_DWORD $vgpr5, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec |
| $vgpr7 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr2_vgpr3, $vgpr4, 0, 0, implicit $exec |
| $vgpr8 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec |
| GLOBAL_STORE_DWORD killed $vgpr2_vgpr3, $vgpr5, 0, 0, implicit $exec |
| $vgpr7 = V_MOV_B32_e32 0, implicit $exec |
| $vgpr4 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr8 = V_MOV_B32_e32 2, implicit $exec |
| $vgpr5 = V_MOV_B32_e32 3, implicit $exec |
| ... |
| |
| # V_DUAL_MOV is a single instruction and should emit required xcnt |
| # if the destination registers have any memory-op dependency. |
| |
| --- |
| name: dual_mov |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $sgpr0, $sgpr1, $vgpr1 |
| ; GCN-LABEL: name: dual_mov |
| ; GCN: liveins: $sgpr0, $sgpr1, $vgpr1 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr1, $vgpr2 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr0, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec |
| ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr2 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr |
| $vgpr1, $vgpr2 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr0, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec |
| $vgpr0 = V_MOV_B32_e32 1, implicit $exec |
| ... |
| |
| # No xcnt wait insertion for DS load/store operations. |
| |
| --- |
| name: ds_load_store |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: ds_load_store |
| ; GCN: $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| ; GCN-NEXT: $vgpr0 = DS_READ_B32_gfx9 killed $vgpr1, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(3) undef`, addrspace 3) |
| ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 2, implicit $exec |
| ; GCN-NEXT: S_WAIT_DSCNT 0 |
| ; GCN-NEXT: DS_WRITE_B32_gfx9 killed $vgpr0, killed $vgpr1, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(3) undef`, addrspace 3) |
| ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 20, implicit $exec |
| $vgpr1 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr0 = DS_READ_B32_gfx9 killed $vgpr1, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(3)* undef`) |
| $vgpr1 = V_MOV_B32_e32 2, implicit $exec |
| DS_WRITE_B32_gfx9 killed $vgpr0, killed $vgpr1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(3)* undef`) |
| $vgpr0 = V_MOV_B32_e32 20, implicit $exec |
| ... |
| |
| --- |
| name: xcnt_max |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 |
| ; GCN-LABEL: name: xcnt_max |
| ; GCN: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_XCNT 62 |
| ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec |
| $vgpr2 = V_MOV_B32_e32 1, implicit $exec |
| ... |
| |
| # FIXME: Missing S_WAIT_XCNT before overwriting vgpr0. |
| --- |
| name: wait_kmcnt_with_outstanding_vmem |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 |
| ; GCN-LABEL: name: wait_kmcnt_with_outstanding_vmem |
| ; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 |
| ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_KMCNT 0 |
| ; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2 |
| ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec |
| $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 |
| $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec |
| $sgpr2 = S_MOV_B32 $sgpr2 |
| $vgpr0 = V_MOV_B32_e32 0, implicit $exec |
| ... |
| |
| # FIXME: Missing S_WAIT_XCNT before overwriting sgpr0. |
| --- |
| name: wait_loadcnt_with_outstanding_smem |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 |
| ; GCN-LABEL: name: wait_loadcnt_with_outstanding_smem |
| ; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec |
| ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 |
| ; GCN-NEXT: S_WAIT_LOADCNT 0 |
| ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec |
| ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 |
| $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec |
| $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 |
| $vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec |
| $sgpr0 = S_MOV_B32 0 |
| ... |
| |
| # TODO: Unnecessary wait before overwriting vgpr0. |
| --- |
| name: overwrite_vgpr_after_smem |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 |
| ; GCN-LABEL: name: overwrite_vgpr_after_smem |
| ; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec |
| ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 |
| ; GCN-NEXT: S_WAIT_XCNT 0 |
| ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec |
| $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec |
| $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 |
| $vgpr0 = V_MOV_B32_e32 0, implicit $exec |
| ... |
| |
| # TODO: Unnecessary wait before overwriting sgpr0. |
| --- |
| name: overwrite_sgpr_after_vmem |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| isEntryFunction: true |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 |
| ; GCN-LABEL: name: overwrite_sgpr_after_vmem |
| ; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 |
| ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec |
| ; GCN-NEXT: S_WAIT_XCNT 0 |
| ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 |
| $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 |
| $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec |
| $sgpr0 = S_MOV_B32 0 |
| ... |