| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-TRUE16 |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-FAKE16 |
| |
| define void @spill_i16_alu() { |
| ; GCN-TRUE16-LABEL: spill_i16_alu: |
| ; GCN-TRUE16: ; %bb.0: ; %entry |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill |
| ; GCN-TRUE16-NEXT: ;;#ASMSTART |
| ; GCN-TRUE16-NEXT: ;;#ASMEND |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GCN-FAKE16-LABEL: spill_i16_alu: |
| ; GCN-FAKE16: ; %bb.0: ; %entry |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: v_add_nc_u16 v0, 0x7b, v0 |
| ; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill |
| ; GCN-FAKE16-NEXT: ;;#ASMSTART |
| ; GCN-FAKE16-NEXT: ;;#ASMEND |
| ; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %alloca = alloca i16, i32 1, align 4, addrspace(5) |
| |
| %aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 |
| %a = load volatile i16, ptr addrspace(5) %aptr |
| %add = add i16 %a, 123 |
| |
| ; Force %a to spill. |
| call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () |
| |
| %outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 |
| store volatile i16 %add, ptr addrspace(5) %outptr |
| |
| ret void |
| } |
| |
| define void @spill_i16_alu_two_vals() { |
| ; GCN-TRUE16-LABEL: spill_i16_alu_two_vals: |
| ; GCN-TRUE16: ; %bb.0: ; %entry |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill |
| ; GCN-TRUE16-NEXT: ;;#ASMSTART |
| ; GCN-TRUE16-NEXT: ;;#ASMEND |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 glc dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 ; 2-byte Folded Reload |
| ; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_d16_hi_b16 off, v0, s32 dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GCN-FAKE16-LABEL: spill_i16_alu_two_vals: |
| ; GCN-FAKE16: ; %bb.0: ; %entry |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: v_add_nc_u16 v0, 0x7b, v0 |
| ; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill |
| ; GCN-FAKE16-NEXT: ;;#ASMSTART |
| ; GCN-FAKE16-NEXT: ;;#ASMEND |
| ; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:4 glc dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_load_b32 v1, off, s32 offset:8 ; 4-byte Folded Reload |
| ; GCN-FAKE16-NEXT: v_add_nc_u16 v0, 0x7b, v0 |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b16 off, v1, s32 dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %alloca = alloca i16, i32 1, align 4, addrspace(5) |
| %alloca2 = alloca i16, i32 1, align 4, addrspace(5) |
| |
| %aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 |
| %a = load volatile i16, ptr addrspace(5) %aptr |
| %add = add i16 %a, 123 |
| |
| ; Force %a to spill. |
| call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () |
| |
| %bptr = getelementptr i16, ptr addrspace(5) %alloca2, i32 0 |
| %b = load volatile i16, ptr addrspace(5) %bptr |
| %badd = add i16 %b, 123 |
| %outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 |
| store volatile i16 %add, ptr addrspace(5) %outptr |
| %outptr2 = getelementptr i16, ptr addrspace(5) %alloca2, i32 0 |
| store volatile i16 %badd, ptr addrspace(5) %outptr2 |
| |
| ret void |
| } |
| |
| ; Tests after this do not actually test 16 bit spills because there is no use of VGPR_16. They could demonstrate 16 bit spills if we update the instructions to use VGPR_16 instead of VGPR_32 |
| |
| define void @spill_i16() { |
| ; GCN-TRUE16-LABEL: spill_i16: |
| ; GCN-TRUE16: ; %bb.0: ; %entry |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill |
| ; GCN-TRUE16-NEXT: ;;#ASMSTART |
| ; GCN-TRUE16-NEXT: ;;#ASMEND |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GCN-FAKE16-LABEL: spill_i16: |
| ; GCN-FAKE16: ; %bb.0: ; %entry |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill |
| ; GCN-FAKE16-NEXT: ;;#ASMSTART |
| ; GCN-FAKE16-NEXT: ;;#ASMEND |
| ; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %alloca = alloca i16, i32 1, align 4, addrspace(5) |
| |
| %aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 |
| %a = load volatile i16, ptr addrspace(5) %aptr |
| |
| ; Force %a to spill. |
| call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () |
| |
| %outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 |
| store volatile i16 %a, ptr addrspace(5) %outptr |
| |
| ret void |
| } |
| |
| define void @spill_half() { |
| ; GCN-TRUE16-LABEL: spill_half: |
| ; GCN-TRUE16: ; %bb.0: ; %entry |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill |
| ; GCN-TRUE16-NEXT: ;;#ASMSTART |
| ; GCN-TRUE16-NEXT: ;;#ASMEND |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GCN-FAKE16-LABEL: spill_half: |
| ; GCN-FAKE16: ; %bb.0: ; %entry |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill |
| ; GCN-FAKE16-NEXT: ;;#ASMSTART |
| ; GCN-FAKE16-NEXT: ;;#ASMEND |
| ; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %alloca = alloca half, i32 1, align 4, addrspace(5) |
| |
| %aptr = getelementptr half, ptr addrspace(5) %alloca, i32 0 |
| %a = load volatile half, ptr addrspace(5) %aptr |
| |
| ; Force %a to spill. |
| call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () |
| |
| %outptr = getelementptr half, ptr addrspace(5) %alloca, i32 0 |
| store volatile half %a, ptr addrspace(5) %outptr |
| |
| ret void |
| } |
| |
| define void @spill_i16_from_v2i16() { |
| ; GCN-TRUE16-LABEL: spill_i16_from_v2i16: |
| ; GCN-TRUE16: ; %bb.0: ; %entry |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill |
| ; GCN-TRUE16-NEXT: ;;#ASMSTART |
| ; GCN-TRUE16-NEXT: ;;#ASMEND |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GCN-FAKE16-LABEL: spill_i16_from_v2i16: |
| ; GCN-FAKE16: ; %bb.0: ; %entry |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 glc dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill |
| ; GCN-FAKE16-NEXT: ;;#ASMSTART |
| ; GCN-FAKE16-NEXT: ;;#ASMEND |
| ; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5) |
| |
| %aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1 |
| %a = load volatile i16, ptr addrspace(5) %aptr |
| |
| ; Force %a to spill. |
| call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () |
| |
| %outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1 |
| store volatile i16 %a, ptr addrspace(5) %outptr |
| |
| ret void |
| } |
| |
| define void @spill_2xi16_from_v2i16() { |
| ; GCN-TRUE16-LABEL: spill_2xi16_from_v2i16: |
| ; GCN-TRUE16: ; %bb.0: ; %entry |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill |
| ; GCN-TRUE16-NEXT: ;;#ASMSTART |
| ; GCN-TRUE16-NEXT: ;;#ASMEND |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 ; 2-byte Folded Reload |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GCN-FAKE16-LABEL: spill_2xi16_from_v2i16: |
| ; GCN-FAKE16: ; %bb.0: ; %entry |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 glc dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill |
| ; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:12 ; 4-byte Folded Spill |
| ; GCN-FAKE16-NEXT: ;;#ASMSTART |
| ; GCN-FAKE16-NEXT: ;;#ASMEND |
| ; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:12 ; 4-byte Folded Reload |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5) |
| |
| %aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1 |
| %a = load volatile i16, ptr addrspace(5) %aptr |
| %bptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 |
| %b = load volatile i16, ptr addrspace(5) %bptr |
| |
| ; Force %a to spill. |
| call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () |
| |
| %outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1 |
| store volatile i16 %a, ptr addrspace(5) %outptr |
| %boutptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 |
| store volatile i16 %b, ptr addrspace(5) %boutptr |
| |
| ret void |
| } |
| |
| define void @spill_2xi16_from_v2i16_one_free_reg() { |
| ; GCN-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg: |
| ; GCN-TRUE16: ; %bb.0: ; %entry |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill |
| ; GCN-TRUE16-NEXT: ;;#ASMSTART |
| ; GCN-TRUE16-NEXT: ;;#ASMEND |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 ; 2-byte Folded Reload |
| ; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc |
| ; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GCN-FAKE16-LABEL: spill_2xi16_from_v2i16_one_free_reg: |
| ; GCN-FAKE16: ; %bb.0: ; %entry |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_load_u16 v7, off, s32 offset:2 glc dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill |
| ; GCN-FAKE16-NEXT: ;;#ASMSTART |
| ; GCN-FAKE16-NEXT: ;;#ASMEND |
| ; GCN-FAKE16-NEXT: scratch_store_b16 off, v7, s32 offset:2 dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload |
| ; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc |
| ; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5) |
| |
| %aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1 |
| %a = load volatile i16, ptr addrspace(5) %aptr |
| %bptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 |
| %b = load volatile i16, ptr addrspace(5) %bptr |
| |
| ; Force %a to spill. |
| ; Would not need to spill if the short scratch instructions used vgpr_16 |
| call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6}" () |
| |
| %outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1 |
| store volatile i16 %a, ptr addrspace(5) %outptr |
| %boutptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 |
| store volatile i16 %b, ptr addrspace(5) %boutptr |
| |
| ret void |
| } |
| |
| define void @spill_v2i16() { |
| ; GCN-LABEL: spill_v2i16: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc |
| ; GCN-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5) |
| |
| %aptr = getelementptr <2 x i16>, ptr addrspace(5) %alloca, i32 1 |
| %a = load volatile <2 x i16>, ptr addrspace(5) %aptr |
| |
| ; Force %a to spill. |
| call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () |
| |
| %outptr = getelementptr <2 x i16>, ptr addrspace(5) %alloca, i32 1 |
| store volatile <2 x i16> %a, ptr addrspace(5) %outptr |
| |
| ret void |
| } |