blob: af8b9e7c8cd82d769f144a96709443deb5cf4c78 [file] [log] [blame] [edit]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GCN %s
---
name: vmem_scratch_load
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0
; GCN-LABEL: name: vmem_scratch_load
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr2, killed $vgpr2, implicit $mode, implicit $exec
$vgpr1 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
$vgpr3 = V_MOV_B32_e32 1, implicit $exec
$vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr2, killed $vgpr2, implicit $mode, implicit $exec
...
---
name: vmem_buffer_load_dword_offset
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-LABEL: name: vmem_buffer_load_dword_offset
; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr2, killed $vgpr2, implicit $mode, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
$vgpr2 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
$vgpr3 = V_MOV_B32_e32 1, implicit $exec
$vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr2, killed $vgpr2, implicit $mode, implicit $exec
...
---
name: vmem_buffer_load_addr
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-LABEL: name: vmem_buffer_load_addr
; GCN: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
$vgpr2 = BUFFER_LOAD_DWORD_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
$vgpr4 = V_MOV_B32_e32 1, implicit $exec
$vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
...
---
name: vmem_flat_load
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GCN-LABEL: name: vmem_flat_load
; GCN: liveins: $vgpr0, $vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
$vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
$vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
$vgpr4 = V_MOV_B32_e32 1, implicit $exec
$vgpr0 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
...
---
name: vmem_global_load
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GCN-LABEL: name: vmem_global_load
; GCN: liveins: $vgpr0, $vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
$vgpr2 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec:: (load (s32) from `float addrspace(1)* undef`, align 4, addrspace 1)
$vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
$vgpr4 = V_MOV_B32_e32 1, implicit $exec
$vgpr0 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
...
---
name: vmem_global_store
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN-LABEL: name: vmem_global_store
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GCN-NEXT: S_WAIT_XCNT 0
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
$vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
...
---
name: vmem_buffer_store
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GCN-LABEL: name: vmem_buffer_store
; GCN: liveins: $vgpr0, $vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_WAIT_LOADCNT_DSCNT 0
; GCN-NEXT: S_WAIT_KMCNT 0
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
; GCN-NEXT: S_WAIT_XCNT 0
; GCN-NEXT: $vgpr0 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
$vgpr0 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
...
---
name: vmem_scratch_store
tracksRegLiveness: true
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GCN-LABEL: name: vmem_scratch_store
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_WAIT_LOADCNT_DSCNT 0
; GCN-NEXT: S_WAIT_KMCNT 0
; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_WAIT_XCNT 0
; GCN-NEXT: $vgpr1 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
...
---
name: smem_load
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $sgpr2_sgpr3
; GCN-LABEL: name: smem_load
; GCN: liveins: $sgpr2_sgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr2_sgpr3, 0, 0 :: (load (s64), addrspace 4)
; GCN-NEXT: S_WAIT_XCNT 0
; GCN-NEXT: $sgpr2 = S_MOV_B32 0
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr2_sgpr3, 0, 0 :: (load (s64), addrspace 4)
$sgpr2 = S_MOV_B32 0
...
---
name: smem_store
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $sgpr0, $sgpr2, $sgpr3
; GCN-LABEL: name: smem_store
; GCN: liveins: $sgpr0, $sgpr2, $sgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr0, $sgpr2_sgpr3, 0, 0
; GCN-NEXT: S_WAIT_XCNT 0
; GCN-NEXT: $sgpr3 = S_MOV_B32 0
S_STORE_DWORD_IMM $sgpr0, $sgpr2_sgpr3, 0, 0
$sgpr3 = S_MOV_B32 0
...
# 4 global_load instructions together form a load-group.
---
name: vmem_load_group
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr10
; GCN-LABEL: name: vmem_load_group
; GCN: liveins: $vgpr0, $vgpr1, $vgpr10
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
; GCN-NEXT: $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
; GCN-NEXT: $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 2
; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
$vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
$vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
$vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
$vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
$vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
...
# The contiguous stores form a single group.
---
name: vmem_store_group
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
; GCN-LABEL: name: vmem_store_group
; GCN: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
$vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec
...
---
name: smem_load_group
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $sgpr0_sgpr1
; GCN-LABEL: name: smem_load_group
; GCN: liveins: $sgpr0_sgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
; GCN-NEXT: $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
; GCN-NEXT: $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
; GCN-NEXT: S_WAIT_KMCNT 0
; GCN-NEXT: $sgpr2 = S_MOV_B32 0
$sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
$sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
$sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
$sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
$sgpr2 = S_MOV_B32 0
...
---
name: smem_store_group
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5
; GCN-LABEL: name: smem_store_group
; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr2, $sgpr0_sgpr1, 0, 0
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr3, $sgpr0_sgpr1, 0, 0
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr4, $sgpr0_sgpr1, 0, 0
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr0_sgpr1, 0, 0
; GCN-NEXT: S_WAIT_XCNT 0
; GCN-NEXT: $sgpr2 = S_MOV_B32 0
; GCN-NEXT: $sgpr3 = S_MOV_B32 0
S_STORE_DWORD_IMM $sgpr2, $sgpr0_sgpr1, 0, 0
S_STORE_DWORD_IMM $sgpr3, $sgpr0_sgpr1, 0, 0
S_STORE_DWORD_IMM $sgpr4, $sgpr0_sgpr1, 0, 0
S_STORE_DWORD_IMM $sgpr5, $sgpr0_sgpr1, 0, 0
$sgpr2 = S_MOV_B32 0
$sgpr3 = S_MOV_B32 0
...
# The four global_load instructions form two separate groups due to the interveing s_nop.
---
name: vmem_loads_with_an_intervening_nop
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr10
; GCN-LABEL: name: vmem_loads_with_an_intervening_nop
; GCN: liveins: $vgpr0, $vgpr1, $vgpr10
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
; GCN-NEXT: S_NOP 0
; GCN-NEXT: $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
; GCN-NEXT: $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 2
; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
$vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
$vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
S_NOP 0
$vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
$vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
$vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
...
---
name: vmem_contiguous_loads_with_an_intervening_store
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr10
; GCN-LABEL: name: vmem_contiguous_loads_with_an_intervening_store
; GCN: liveins: $vgpr0, $vgpr1, $vgpr10
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 1
; GCN-NEXT: GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 32, 0, implicit $exec
; GCN-NEXT: $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
; GCN-NEXT: $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 2
; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
$vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
$vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 32, 0, implicit $exec
$vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
$vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
$vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
...
---
name: vmem_stores_with_intervening_nop
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
; GCN-LABEL: name: vmem_stores_with_intervening_nop
; GCN: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
; GCN-NEXT: S_NOP 0
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
S_NOP 0
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
$vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec
...
# The intervening load breaks the store group and form two distict store groups.
---
name: vmem_contiguous_stores_with_an_intervening_load
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
; GCN-LABEL: name: vmem_contiguous_stores_with_an_intervening_load
; GCN: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
; GCN-NEXT: $vgpr11 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
$vgpr11 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
$vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec
...
# Atomic operations should not form a group. But they are memory instructions and should increment
# the xcnt counter value as they might cause register dependnecy. This test ensures S_WAIT_XCNT
# insertion for such cases.
---
name: atomic_op
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-LABEL: name: atomic_op
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_ADDR64_RTN $vgpr2, $vgpr0_vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 1, implicit $exec :: (load store (s32), addrspace 1)
; GCN-NEXT: GLOBAL_ATOMIC_ADD_F32 $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, implicit $exec :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1)
; GCN-NEXT: $vgpr6 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_WAIT_XCNT 2
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: S_WAIT_XCNT 1
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec
$vgpr2 = BUFFER_ATOMIC_ADD_ADDR64_RTN $vgpr2, $vgpr0_vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 1, implicit $exec :: (load store (s32), addrspace 1)
GLOBAL_ATOMIC_ADD_F32 $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, implicit $exec :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1)
$vgpr6 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 1, implicit $exec
...
# Force insert S_WAIT_XCNT 0 for dependency in SMEM instruction even though
# there is a pending VMEM dependency.
---
name: smem_xcnt_insertion_with_pending_vmem_event
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $sgpr0_sgpr1
; GCN-LABEL: name: smem_xcnt_insertion_with_pending_vmem_event
; GCN: liveins: $sgpr0_sgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 2, implicit $exec
; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: $vgpr5 = V_MOV_B32_e32 4, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr4_vgpr5, 16, 0, implicit $exec
; GCN-NEXT: S_WAIT_KMCNT 0
; GCN-NEXT: $sgpr2 = S_ADD_I32 $sgpr0, 100, implicit-def $scc
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 20, implicit $exec
$sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr1 = V_MOV_B32_e32 2, implicit $exec
$vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
$vgpr4 = V_MOV_B32_e32 1, implicit $exec
$vgpr5 = V_MOV_B32_e32 4, implicit $exec
GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr4_vgpr5, 16, 0, implicit $exec
$sgpr2 = S_ADD_I32 $sgpr0, 100, implicit-def $scc
$vgpr0 = V_MOV_B32_e32 20, implicit $exec
...
# The second instruction in the flat_load group has a WAR dependency with a prior
# memory opeartion (scratch_load instruction).
---
name: vmem_group_reg_dependency_with_prior_instruction
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr4, $vgpr5
; GCN-LABEL: name: vmem_group_reg_dependency_with_prior_instruction
; GCN: liveins: $vgpr4, $vgpr5
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr4, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr4, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
$vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
$vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
$vgpr3 = V_MOV_B32_e32 1, implicit $exec
...
# Two instructions inside the load group have dependencies with prior instructions.
---
name: multiple_xcnt_insertion_in_group
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr3, $vgpr4, $vgpr5
; GCN-LABEL: name: multiple_xcnt_insertion_in_group
; GCN: liveins: $vgpr3, $vgpr4, $vgpr5
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr4, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr6 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 1, implicit $exec
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr4, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
$vgpr2 = SCRATCH_LOAD_DWORD $vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr6 = V_MOV_B32_e32 1, implicit $exec
$vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
$vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
$vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
$vgpr8 = V_MOV_B32_e32 1, implicit $exec
...
---
name: xcnt_event_post_load_group
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr10
; GCN-LABEL: name: xcnt_event_post_load_group
; GCN: liveins: $vgpr0, $vgpr1, $vgpr10
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
; GCN-NEXT: $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
; GCN-NEXT: $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 3
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GCN-NEXT: $vgpr11 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 2
; GCN-NEXT: $vgpr6 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_XCNT 1
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
$vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
$vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
$vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
$vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
$vgpr11 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
$vgpr6 = V_MOV_B32_e32 1, implicit $exec
$vgpr2 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
...
# The three V_MOV_B32 instructions waiting outside the group needs appropriate wait_xcnt
# insertion as their dst registers have dependencies with instructions inside the group.
---
name: xcnt_event_post_store_group
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
; GCN-LABEL: name: xcnt_event_post_store_group
; GCN: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
; GCN-NEXT: $vgpr11 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GCN-NEXT: S_WAIT_XCNT 8
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: S_WAIT_XCNT 6
; GCN-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_XCNT 4
; GCN-NEXT: $vgpr7 = V_MOV_B32_e32 2, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr11 = V_LSHLREV_B32_e64 16, $vgpr10, implicit $exec
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
$vgpr11 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
$vgpr5 = V_MOV_B32_e32 1, implicit $exec
$vgpr7 = V_MOV_B32_e32 2, implicit $exec
$vgpr11 = V_LSHLREV_B32_e64 16, $vgpr10, implicit $exec
...
# This test captures the case that interleaving load store operations form separate groups.
# The registers in V_MOV_B32 are all have dependency with these independent groups and
# should have the wait_xcnt insertion with appropriate wait values.
---
name: load_store_switching
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GCN-LABEL: name: load_store_switching
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr7 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, $vgpr4, 0, 0, implicit $exec
; GCN-NEXT: $vgpr8 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr2_vgpr3, $vgpr5, 0, 0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 1
; GCN-NEXT: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: S_WAIT_XCNT 2
; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 2, implicit $exec
; GCN-NEXT: S_WAIT_XCNT 0
; GCN-NEXT: $vgpr5 = V_MOV_B32_e32 3, implicit $exec
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
$vgpr7 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr2_vgpr3, $vgpr4, 0, 0, implicit $exec
$vgpr8 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec
GLOBAL_STORE_DWORD killed $vgpr2_vgpr3, $vgpr5, 0, 0, implicit $exec
$vgpr7 = V_MOV_B32_e32 0, implicit $exec
$vgpr4 = V_MOV_B32_e32 1, implicit $exec
$vgpr8 = V_MOV_B32_e32 2, implicit $exec
$vgpr5 = V_MOV_B32_e32 3, implicit $exec
...
# V_DUAL_MOV is a single instruction and should emit required xcnt
# if the destination registers have any memory-op dependency.
---
name: dual_mov
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr1
; GCN-LABEL: name: dual_mov
; GCN: liveins: $sgpr0, $sgpr1, $vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr1, $vgpr2 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr0, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr2 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1, $vgpr2 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr0, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
...
# No xcnt wait insertion for DS load/store operations.
---
name: ds_load_store
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
; GCN-LABEL: name: ds_load_store
; GCN: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: $vgpr0 = DS_READ_B32_gfx9 killed $vgpr1, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(3) undef`, addrspace 3)
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 2, implicit $exec
; GCN-NEXT: S_WAIT_DSCNT 0
; GCN-NEXT: DS_WRITE_B32_gfx9 killed $vgpr0, killed $vgpr1, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(3) undef`, addrspace 3)
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 20, implicit $exec
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
$vgpr0 = DS_READ_B32_gfx9 killed $vgpr1, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(3)* undef`)
$vgpr1 = V_MOV_B32_e32 2, implicit $exec
DS_WRITE_B32_gfx9 killed $vgpr0, killed $vgpr1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 20, implicit $exec
...
---
name: xcnt_max
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
; GCN-LABEL: name: xcnt_max
; GCN: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
; GCN-NEXT: S_WAIT_XCNT 62
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
$vgpr2 = V_MOV_B32_e32 1, implicit $exec
...
# FIXME: Missing S_WAIT_XCNT before overwriting vgpr0.
---
name: wait_kmcnt_with_outstanding_vmem
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
; GCN-LABEL: name: wait_kmcnt_with_outstanding_vmem
; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: S_WAIT_KMCNT 0
; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
$sgpr2 = S_MOV_B32 $sgpr2
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
...
# FIXME: Missing S_WAIT_XCNT before overwriting sgpr0.
---
name: wait_loadcnt_with_outstanding_smem
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
; GCN-LABEL: name: wait_loadcnt_with_outstanding_smem
; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
; GCN-NEXT: $sgpr0 = S_MOV_B32 0
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
$sgpr0 = S_MOV_B32 0
...
# TODO: Unnecessary wait before overwriting vgpr0.
---
name: overwrite_vgpr_after_smem
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
; GCN-LABEL: name: overwrite_vgpr_after_smem
; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GCN-NEXT: S_WAIT_XCNT 0
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
...
# TODO: Unnecessary wait before overwriting sgpr0.
---
name: overwrite_sgpr_after_vmem
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
; GCN-LABEL: name: overwrite_sgpr_after_vmem
; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: S_WAIT_XCNT 0
; GCN-NEXT: $sgpr0 = S_MOV_B32 0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
$sgpr0 = S_MOV_B32 0
...