blob: 6d0aa1e784530cadf3a5343ec47825c28cbaccd5 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s
define protected amdgpu_kernel void @no_folding_imm_to_inst_with_fi(<4 x i64> %val4, <16 x i64> %val16) {
; CHECK-LABEL: no_folding_imm_to_inst_with_fi:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_clause 0x2
; CHECK-NEXT: s_load_b256 s[36:43], s[4:5], 0x24
; CHECK-NEXT: s_load_b512 s[16:31], s[4:5], 0xe4
; CHECK-NEXT: s_load_b512 s[0:15], s[4:5], 0xa4
; CHECK-NEXT: s_mov_b64 s[34:35], src_private_base
; CHECK-NEXT: s_movk_i32 s33, 0x70
; CHECK-NEXT: s_movk_i32 s34, 0x60
; CHECK-NEXT: s_or_b32 s44, 0x80, s33
; CHECK-NEXT: s_mov_b32 s45, s35
; CHECK-NEXT: s_or_b32 s46, 0x80, s34
; CHECK-NEXT: s_mov_b32 s47, s35
; CHECK-NEXT: v_dual_mov_b32 v20, s44 :: v_dual_mov_b32 v21, s45
; CHECK-NEXT: v_dual_mov_b32 v22, s46 :: v_dual_mov_b32 v23, s47
; CHECK-NEXT: s_movk_i32 s34, 0x80
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; CHECK-NEXT: v_dual_mov_b32 v34, s34 :: v_dual_mov_b32 v35, s35
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v1, s41
; CHECK-NEXT: v_dual_mov_b32 v2, s42 :: v_dual_mov_b32 v3, s43
; CHECK-NEXT: v_dual_mov_b32 v4, s36 :: v_dual_mov_b32 v5, s37
; CHECK-NEXT: v_dual_mov_b32 v6, s38 :: v_dual_mov_b32 v7, s39
; CHECK-NEXT: scratch_store_b128 off, v[0:3], off offset:16 scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: v_dual_mov_b32 v0, s20 :: v_dual_mov_b32 v1, s21
; CHECK-NEXT: s_movk_i32 s20, 0x50
; CHECK-NEXT: v_dual_mov_b32 v8, s28 :: v_dual_mov_b32 v9, s29
; CHECK-NEXT: v_dual_mov_b32 v10, s30 :: v_dual_mov_b32 v11, s31
; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: s_or_b32 s20, 0x80, s20
; CHECK-NEXT: s_mov_b32 s21, s35
; CHECK-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25
; CHECK-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27
; CHECK-NEXT: v_dual_mov_b32 v2, s22 :: v_dual_mov_b32 v3, s23
; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: v_dual_mov_b32 v25, s21 :: v_dual_mov_b32 v24, s20
; CHECK-NEXT: scratch_store_b128 off, v[4:7], off scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[20:21], v[8:11] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[22:23], v[12:15] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[24:25], v[0:3] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
; CHECK-NEXT: s_or_b32 s16, 0x80, 64
; CHECK-NEXT: s_mov_b32 s17, s35
; CHECK-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13
; CHECK-NEXT: s_or_b32 s12, 0x80, 48
; CHECK-NEXT: s_mov_b32 s13, s35
; CHECK-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
; CHECK-NEXT: s_or_b32 s8, 0x80, 32
; CHECK-NEXT: s_mov_b32 s9, s35
; CHECK-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v13, s5
; CHECK-NEXT: s_or_b32 s4, 0x80, 16
; CHECK-NEXT: s_mov_b32 s5, s35
; CHECK-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: v_dual_mov_b32 v27, s17 :: v_dual_mov_b32 v26, s16
; CHECK-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15
; CHECK-NEXT: v_dual_mov_b32 v29, s13 :: v_dual_mov_b32 v28, s12
; CHECK-NEXT: v_dual_mov_b32 v31, s9 :: v_dual_mov_b32 v30, s8
; CHECK-NEXT: v_dual_mov_b32 v33, s5 :: v_dual_mov_b32 v32, s4
; CHECK-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
; CHECK-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v15, s7
; CHECK-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
; CHECK-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
; CHECK-NEXT: flat_store_b128 v[26:27], v[0:3] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[28:29], v[4:7] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[30:31], v[8:11] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[32:33], v[12:15] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_store_b128 v[34:35], v[16:19] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_storecnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[22:23] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[20:21] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[26:27] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[24:25] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[30:31] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[28:29] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[34:35] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: flat_load_b128 v[0:3], v[32:33] scope:SCOPE_SYS
; CHECK-NEXT: s_wait_loadcnt 0x0
; CHECK-NEXT: s_endpgm
bb:
%alloca = alloca <4 x i64>, align 32, addrspace(5)
%alloca1 = alloca <16 x i64>, align 128, addrspace(5)
store volatile <4 x i64> %val4, ptr addrspace(5) %alloca
%ascast = addrspacecast ptr addrspace(5) %alloca1 to ptr
store volatile <16 x i64> %val16, ptr %ascast
%load = load volatile <16 x i64>, ptr %ascast
ret void
}