| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: opt -mtriple=amdgcn-- -amdgpu-codegenprepare -S < %s | FileCheck -check-prefix=OPT %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,DAGISEL-ASM |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,GISEL-ASM |
| |
| ; Tests that we can avoid nullptr checks for addrspacecasts from/to priv/local. |
| ; |
| ; Whenever a testcase is successful, we should see the addrspacecast replaced with the intrinsic |
| ; and the resulting code should have no select/cndmask null check for the pointer. |
| |
| define void @local_to_flat_nonnull_arg(ptr addrspace(3) nonnull %ptr) { |
| ; OPT-LABEL: define void @local_to_flat_nonnull_arg( |
| ; OPT-SAME: ptr addrspace(3) nonnull [[PTR:%.*]]) { |
| ; OPT-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]]) |
| ; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| ; ASM-LABEL: local_to_flat_nonnull_arg: |
| ; ASM: ; %bb.0: |
| ; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; ASM-NEXT: s_mov_b64 s[4:5], src_shared_base |
| ; ASM-NEXT: v_mov_b32_e32 v1, s5 |
| ; ASM-NEXT: v_mov_b32_e32 v2, 7 |
| ; ASM-NEXT: flat_store_dword v[0:1], v2 |
| ; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; ASM-NEXT: s_setpc_b64 s[30:31] |
| %x = addrspacecast ptr addrspace(3) %ptr to ptr |
| store volatile i32 7, ptr %x |
| ret void |
| } |
| |
| define void @private_to_flat_nonnull_arg(ptr addrspace(5) nonnull %ptr) { |
| ; OPT-LABEL: define void @private_to_flat_nonnull_arg( |
| ; OPT-SAME: ptr addrspace(5) nonnull [[PTR:%.*]]) { |
| ; OPT-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[PTR]]) |
| ; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| ; ASM-LABEL: private_to_flat_nonnull_arg: |
| ; ASM: ; %bb.0: |
| ; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; ASM-NEXT: s_mov_b64 s[4:5], src_private_base |
| ; ASM-NEXT: v_mov_b32_e32 v1, s5 |
| ; ASM-NEXT: v_mov_b32_e32 v2, 7 |
| ; ASM-NEXT: flat_store_dword v[0:1], v2 |
| ; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; ASM-NEXT: s_setpc_b64 s[30:31] |
| %x = addrspacecast ptr addrspace(5) %ptr to ptr |
| store volatile i32 7, ptr %x |
| ret void |
| } |
| |
| define void @flat_to_local_nonnull_arg(ptr nonnull %ptr) { |
| ; OPT-LABEL: define void @flat_to_local_nonnull_arg( |
| ; OPT-SAME: ptr nonnull [[PTR:%.*]]) { |
| ; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr [[PTR]]) |
| ; OPT-NEXT: store volatile i32 7, ptr addrspace(3) [[TMP1]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| ; ASM-LABEL: flat_to_local_nonnull_arg: |
| ; ASM: ; %bb.0: |
| ; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; ASM-NEXT: v_mov_b32_e32 v1, 7 |
| ; ASM-NEXT: ds_write_b32 v0, v1 |
| ; ASM-NEXT: s_waitcnt lgkmcnt(0) |
| ; ASM-NEXT: s_setpc_b64 s[30:31] |
| %x = addrspacecast ptr %ptr to ptr addrspace(3) |
| store volatile i32 7, ptr addrspace(3) %x |
| ret void |
| } |
| |
| define void @flat_to_private_nonnull_arg(ptr nonnull %ptr) { |
| ; OPT-LABEL: define void @flat_to_private_nonnull_arg( |
| ; OPT-SAME: ptr nonnull [[PTR:%.*]]) { |
| ; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr [[PTR]]) |
| ; OPT-NEXT: store volatile i32 7, ptr addrspace(5) [[TMP1]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| ; ASM-LABEL: flat_to_private_nonnull_arg: |
| ; ASM: ; %bb.0: |
| ; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; ASM-NEXT: v_mov_b32_e32 v1, 7 |
| ; ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; ASM-NEXT: s_waitcnt vmcnt(0) |
| ; ASM-NEXT: s_setpc_b64 s[30:31] |
| %x = addrspacecast ptr %ptr to ptr addrspace(5) |
| store volatile i32 7, ptr addrspace(5) %x |
| ret void |
| } |
| |
| define void @private_alloca_to_flat(ptr %ptr) { |
| ; OPT-LABEL: define void @private_alloca_to_flat( |
| ; OPT-SAME: ptr [[PTR:%.*]]) { |
| ; OPT-NEXT: [[ALLOCA:%.*]] = alloca i8, align 1, addrspace(5) |
| ; OPT-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[ALLOCA]]) |
| ; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| ; ASM-LABEL: private_alloca_to_flat: |
| ; ASM: ; %bb.0: |
| ; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; ASM-NEXT: s_mov_b64 s[4:5], src_private_base |
| ; ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; ASM-NEXT: v_mov_b32_e32 v1, s5 |
| ; ASM-NEXT: v_mov_b32_e32 v2, 7 |
| ; ASM-NEXT: flat_store_dword v[0:1], v2 |
| ; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; ASM-NEXT: s_setpc_b64 s[30:31] |
| %alloca = alloca i8, addrspace(5) |
| %x = addrspacecast ptr addrspace(5) %alloca to ptr |
| store volatile i32 7, ptr %x |
| ret void |
| } |
| |
| @lds = internal unnamed_addr addrspace(3) global i8 poison, align 4 |
| |
| define void @knownbits_on_flat_to_priv(ptr %ptr) { |
| ; OPT-LABEL: define void @knownbits_on_flat_to_priv( |
| ; OPT-SAME: ptr [[PTR:%.*]]) { |
| ; OPT-NEXT: [[PTR_INT:%.*]] = ptrtoint ptr [[PTR]] to i64 |
| ; OPT-NEXT: [[PTR_OR:%.*]] = or i64 [[PTR_INT]], 15 |
| ; OPT-NEXT: [[KB_PTR:%.*]] = inttoptr i64 [[PTR_OR]] to ptr |
| ; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr [[KB_PTR]]) |
| ; OPT-NEXT: store volatile i32 7, ptr addrspace(5) [[TMP1]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| ; ASM-LABEL: knownbits_on_flat_to_priv: |
| ; ASM: ; %bb.0: |
| ; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; ASM-NEXT: v_or_b32_e32 v0, 15, v0 |
| ; ASM-NEXT: v_mov_b32_e32 v1, 7 |
| ; ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; ASM-NEXT: s_waitcnt vmcnt(0) |
| ; ASM-NEXT: s_setpc_b64 s[30:31] |
| %ptr.int = ptrtoint ptr %ptr to i64 |
| %ptr.or = or i64 %ptr.int, 15 ; set some low bits |
| %kb.ptr = inttoptr i64 %ptr.or to ptr |
| %x = addrspacecast ptr %kb.ptr to ptr addrspace(5) |
| store volatile i32 7, ptr addrspace(5) %x |
| ret void |
| } |
| |
| define void @knownbits_on_priv_to_flat(ptr addrspace(5) %ptr) { |
| ; OPT-LABEL: define void @knownbits_on_priv_to_flat( |
| ; OPT-SAME: ptr addrspace(5) [[PTR:%.*]]) { |
| ; OPT-NEXT: [[PTR_INT:%.*]] = ptrtoint ptr addrspace(5) [[PTR]] to i32 |
| ; OPT-NEXT: [[PTR_OR:%.*]] = and i32 [[PTR_INT]], 65535 |
| ; OPT-NEXT: [[KB_PTR:%.*]] = inttoptr i32 [[PTR_OR]] to ptr addrspace(5) |
| ; OPT-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[KB_PTR]]) |
| ; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| ; ASM-LABEL: knownbits_on_priv_to_flat: |
| ; ASM: ; %bb.0: |
| ; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; ASM-NEXT: s_mov_b64 s[4:5], src_private_base |
| ; ASM-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; ASM-NEXT: v_mov_b32_e32 v1, s5 |
| ; ASM-NEXT: v_mov_b32_e32 v2, 7 |
| ; ASM-NEXT: flat_store_dword v[0:1], v2 |
| ; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; ASM-NEXT: s_setpc_b64 s[30:31] |
| %ptr.int = ptrtoint ptr addrspace(5) %ptr to i32 |
| %ptr.or = and i32 %ptr.int, 65535 ; ensure only lower 16 bits can be set. |
| %kb.ptr = inttoptr i32 %ptr.or to ptr addrspace(5) |
| %x = addrspacecast ptr addrspace(5) %kb.ptr to ptr |
| store volatile i32 7, ptr %x |
| ret void |
| } |
| |
| define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) { |
| ; OPT-LABEL: define void @recursive_phis( |
| ; OPT-SAME: i1 [[COND:%.*]], ptr addrspace(5) [[PTR:%.*]]) { |
| ; OPT-NEXT: entry: |
| ; OPT-NEXT: [[ALLOCA:%.*]] = alloca i8, align 1, addrspace(5) |
| ; OPT-NEXT: br i1 [[COND]], label [[THEN:%.*]], label [[ELSE:%.*]] |
| ; OPT: then: |
| ; OPT-NEXT: [[PTR_INT:%.*]] = ptrtoint ptr addrspace(5) [[PTR]] to i32 |
| ; OPT-NEXT: [[PTR_OR:%.*]] = and i32 [[PTR_INT]], 65535 |
| ; OPT-NEXT: [[KB_PTR:%.*]] = inttoptr i32 [[PTR_OR]] to ptr addrspace(5) |
| ; OPT-NEXT: br label [[FINALLY:%.*]] |
| ; OPT: else: |
| ; OPT-NEXT: [[OTHER_PHI:%.*]] = phi ptr addrspace(5) [ [[ALLOCA]], [[ENTRY:%.*]] ], [ [[PHI_PTR:%.*]], [[FINALLY]] ] |
| ; OPT-NEXT: br label [[FINALLY]] |
| ; OPT: finally: |
| ; OPT-NEXT: [[PHI_PTR]] = phi ptr addrspace(5) [ [[KB_PTR]], [[THEN]] ], [ [[OTHER_PHI]], [[ELSE]] ] |
| ; OPT-NEXT: [[TMP0:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[PHI_PTR]]) |
| ; OPT-NEXT: store volatile i32 7, ptr [[TMP0]], align 4 |
| ; OPT-NEXT: br i1 [[COND]], label [[ELSE]], label [[END:%.*]] |
| ; OPT: end: |
| ; OPT-NEXT: ret void |
| ; |
| ; DAGISEL-ASM-LABEL: recursive_phis: |
| ; DAGISEL-ASM: ; %bb.0: ; %entry |
| ; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-ASM-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; DAGISEL-ASM-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 |
| ; DAGISEL-ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; DAGISEL-ASM-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| ; DAGISEL-ASM-NEXT: ; %bb.1: ; %then |
| ; DAGISEL-ASM-NEXT: v_and_b32_e32 v0, 0xffff, v1 |
| ; DAGISEL-ASM-NEXT: ; %bb.2: ; %finallyendcf.split |
| ; DAGISEL-ASM-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; DAGISEL-ASM-NEXT: s_xor_b64 s[6:7], vcc, -1 |
| ; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], 0 |
| ; DAGISEL-ASM-NEXT: s_mov_b64 s[8:9], src_private_base |
| ; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7 |
| ; DAGISEL-ASM-NEXT: .LBB7_3: ; %finally |
| ; DAGISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; DAGISEL-ASM-NEXT: s_and_b64 s[10:11], exec, s[6:7] |
| ; DAGISEL-ASM-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] |
| ; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s9 |
| ; DAGISEL-ASM-NEXT: flat_store_dword v[0:1], v2 |
| ; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) |
| ; DAGISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[4:5] |
| ; DAGISEL-ASM-NEXT: s_cbranch_execnz .LBB7_3 |
| ; DAGISEL-ASM-NEXT: ; %bb.4: ; %end |
| ; DAGISEL-ASM-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) |
| ; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GISEL-ASM-LABEL: recursive_phis: |
| ; GISEL-ASM: ; %bb.0: ; %entry |
| ; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-ASM-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
| ; GISEL-ASM-NEXT: s_xor_b64 s[4:5], vcc, -1 |
| ; GISEL-ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GISEL-ASM-NEXT: s_and_saveexec_b64 s[6:7], vcc |
| ; GISEL-ASM-NEXT: ; %bb.1: ; %then |
| ; GISEL-ASM-NEXT: v_and_b32_e32 v0, 0xffff, v1 |
| ; GISEL-ASM-NEXT: ; %bb.2: ; %finallyendcf.split |
| ; GISEL-ASM-NEXT: s_or_b64 exec, exec, s[6:7] |
| ; GISEL-ASM-NEXT: s_mov_b64 s[8:9], src_private_base |
| ; GISEL-ASM-NEXT: s_mov_b64 s[6:7], 0 |
| ; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s9 |
| ; GISEL-ASM-NEXT: v_mov_b32_e32 v2, 7 |
| ; GISEL-ASM-NEXT: .LBB7_3: ; %finally |
| ; GISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; GISEL-ASM-NEXT: s_and_b64 s[8:9], exec, s[4:5] |
| ; GISEL-ASM-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] |
| ; GISEL-ASM-NEXT: flat_store_dword v[0:1], v2 |
| ; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) |
| ; GISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[6:7] |
| ; GISEL-ASM-NEXT: s_cbranch_execnz .LBB7_3 |
| ; GISEL-ASM-NEXT: ; %bb.4: ; %end |
| ; GISEL-ASM-NEXT: s_or_b64 exec, exec, s[6:7] |
| ; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) |
| ; GISEL-ASM-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %alloca = alloca i8, addrspace(5) |
| br i1 %cond, label %then, label %else |
| |
| then: |
| %ptr.int = ptrtoint ptr addrspace(5) %ptr to i32 |
| %ptr.or = and i32 %ptr.int, 65535 ; ensure low bits are zeroes |
| %kb.ptr = inttoptr i32 %ptr.or to ptr addrspace(5) |
| br label %finally |
| |
| else: |
| %other.phi = phi ptr addrspace(5) [%alloca, %entry], [%phi.ptr, %finally] |
| br label %finally |
| |
| finally: |
| %phi.ptr = phi ptr addrspace(5) [%kb.ptr, %then], [%other.phi, %else] |
| %x = addrspacecast ptr addrspace(5) %phi.ptr to ptr |
| store volatile i32 7, ptr %x |
| br i1 %cond, label %else, label %end |
| |
| end: |
| ret void |
| } |