|  | //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AMDGPUMachineFunction.h" | 
|  | #include "AMDGPU.h" | 
|  | #include "AMDGPUMemoryUtils.h" | 
|  | #include "AMDGPUSubtarget.h" | 
|  | #include "Utils/AMDGPUBaseInfo.h" | 
|  | #include "llvm/CodeGen/MachineModuleInfo.h" | 
|  | #include "llvm/IR/ConstantRange.h" | 
|  | #include "llvm/IR/Constants.h" | 
|  | #include "llvm/IR/Metadata.h" | 
|  | #include "llvm/Target/TargetMachine.h" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | static const GlobalVariable * | 
|  | getKernelDynLDSGlobalFromFunction(const Function &F) { | 
|  | const Module *M = F.getParent(); | 
|  | SmallString<64> KernelDynLDSName("llvm.amdgcn."); | 
|  | KernelDynLDSName += F.getName(); | 
|  | KernelDynLDSName += ".dynlds"; | 
|  | return M->getNamedGlobal(KernelDynLDSName); | 
|  | } | 
|  |  | 
|  | static bool hasLDSKernelArgument(const Function &F) { | 
|  | for (const Argument &Arg : F.args()) { | 
|  | Type *ArgTy = Arg.getType(); | 
|  | if (auto *PtrTy = dyn_cast<PointerType>(ArgTy)) { | 
|  | if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) | 
|  | return true; | 
|  | } | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, | 
|  | const AMDGPUSubtarget &ST) | 
|  | : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), | 
|  | IsModuleEntryFunction( | 
|  | AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), | 
|  | IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())) { | 
|  |  | 
|  | // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, | 
|  | // except reserved size is not correctly aligned. | 
|  |  | 
|  | Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); | 
|  | MemoryBound = MemBoundAttr.getValueAsBool(); | 
|  |  | 
|  | Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); | 
|  | WaveLimiter = WaveLimitAttr.getValueAsBool(); | 
|  |  | 
|  | // FIXME: How is this attribute supposed to interact with statically known | 
|  | // global sizes? | 
|  | StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); | 
|  | if (!S.empty()) | 
|  | S.consumeInteger(0, GDSSize); | 
|  |  | 
|  | // Assume the attribute allocates before any known GDS globals. | 
|  | StaticGDSSize = GDSSize; | 
|  |  | 
|  | // Second value, if present, is the maximum value that can be assigned. | 
|  | // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics | 
|  | // during codegen. | 
|  | std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute( | 
|  | F, "amdgpu-lds-size", {0, UINT32_MAX}, true); | 
|  |  | 
|  | // The two separate variables are only profitable when the LDS module lowering | 
|  | // pass is disabled. If graphics does not use dynamic LDS, this is never | 
|  | // profitable. Leaving cleanup for a later change. | 
|  | LDSSize = LDSSizeRange.first; | 
|  | StaticLDSSize = LDSSize; | 
|  |  | 
|  | CallingConv::ID CC = F.getCallingConv(); | 
|  | if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) | 
|  | ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); | 
|  |  | 
|  | // FIXME: Shouldn't be target specific | 
|  | Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); | 
|  | NoSignedZerosFPMath = | 
|  | NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; | 
|  |  | 
|  | const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F); | 
|  | if (DynLdsGlobal || hasLDSKernelArgument(F)) | 
|  | UsesDynamicLDS = true; | 
|  | } | 
|  |  | 
|  | unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, | 
|  | const GlobalVariable &GV, | 
|  | Align Trailing) { | 
|  | auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0)); | 
|  | if (!Entry.second) | 
|  | return Entry.first->second; | 
|  |  | 
|  | Align Alignment = | 
|  | DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); | 
|  |  | 
|  | unsigned Offset; | 
|  | if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { | 
|  | if (AMDGPU::isNamedBarrier(GV)) { | 
|  | std::optional<unsigned> BarAddr = getLDSAbsoluteAddress(GV); | 
|  | if (!BarAddr) | 
|  | llvm_unreachable("named barrier should have an assigned address"); | 
|  | Entry.first->second = BarAddr.value(); | 
|  | unsigned BarCnt = DL.getTypeAllocSize(GV.getValueType()) / 16; | 
|  | recordNumNamedBarriers(BarAddr.value(), BarCnt); | 
|  | return BarAddr.value(); | 
|  | } | 
|  |  | 
|  | std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV); | 
|  | if (MaybeAbs) { | 
|  | // Absolute address LDS variables that exist prior to the LDS lowering | 
|  | // pass raise a fatal error in that pass. These failure modes are only | 
|  | // reachable if that lowering pass is disabled or broken. If/when adding | 
|  | // support for absolute addresses on user specified variables, the | 
|  | // alignment check moves to the lowering pass and the frame calculation | 
|  | // needs to take the user variables into consideration. | 
|  |  | 
|  | uint32_t ObjectStart = *MaybeAbs; | 
|  |  | 
|  | if (ObjectStart != alignTo(ObjectStart, Alignment)) { | 
|  | report_fatal_error("Absolute address LDS variable inconsistent with " | 
|  | "variable alignment"); | 
|  | } | 
|  |  | 
|  | if (isModuleEntryFunction()) { | 
|  | // If this is a module entry function, we can also sanity check against | 
|  | // the static frame. Strictly it would be better to check against the | 
|  | // attribute, i.e. that the variable is within the always-allocated | 
|  | // section, and not within some other non-absolute-address object | 
|  | // allocated here, but the extra error detection is minimal and we would | 
|  | // have to pass the Function around or cache the attribute value. | 
|  | uint32_t ObjectEnd = | 
|  | ObjectStart + DL.getTypeAllocSize(GV.getValueType()); | 
|  | if (ObjectEnd > StaticLDSSize) { | 
|  | report_fatal_error( | 
|  | "Absolute address LDS variable outside of static frame"); | 
|  | } | 
|  | } | 
|  |  | 
|  | Entry.first->second = ObjectStart; | 
|  | return ObjectStart; | 
|  | } | 
|  |  | 
|  | /// TODO: We should sort these to minimize wasted space due to alignment | 
|  | /// padding. Currently the padding is decided by the first encountered use | 
|  | /// during lowering. | 
|  | Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); | 
|  |  | 
|  | StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); | 
|  |  | 
|  | // Align LDS size to trailing, e.g. for aligning dynamic shared memory | 
|  | LDSSize = alignTo(StaticLDSSize, Trailing); | 
|  | } else { | 
|  | assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && | 
|  | "expected region address space"); | 
|  |  | 
|  | Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); | 
|  | StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); | 
|  |  | 
|  | // FIXME: Apply alignment of dynamic GDS | 
|  | GDSSize = StaticGDSSize; | 
|  | } | 
|  |  | 
|  | Entry.first->second = Offset; | 
|  | return Offset; | 
|  | } | 
|  |  | 
|  | std::optional<uint32_t> | 
|  | AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { | 
|  | // TODO: Would be more consistent with the abs symbols to use a range | 
|  | MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); | 
|  | if (MD && MD->getNumOperands() == 1) { | 
|  | if (ConstantInt *KnownSize = | 
|  | mdconst::extract<ConstantInt>(MD->getOperand(0))) { | 
|  | uint64_t ZExt = KnownSize->getZExtValue(); | 
|  | if (ZExt <= UINT32_MAX) { | 
|  | return ZExt; | 
|  | } | 
|  | } | 
|  | } | 
|  | return {}; | 
|  | } | 
|  |  | 
|  | std::optional<uint32_t> | 
|  | AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { | 
|  | if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) | 
|  | return {}; | 
|  |  | 
|  | std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange(); | 
|  | if (!AbsSymRange) | 
|  | return {}; | 
|  |  | 
|  | if (const APInt *V = AbsSymRange->getSingleElement()) { | 
|  | std::optional<uint64_t> ZExt = V->tryZExtValue(); | 
|  | if (ZExt && (*ZExt <= UINT32_MAX)) { | 
|  | return *ZExt; | 
|  | } | 
|  | } | 
|  |  | 
|  | return {}; | 
|  | } | 
|  |  | 
|  | void AMDGPUMachineFunction::setDynLDSAlign(const Function &F, | 
|  | const GlobalVariable &GV) { | 
|  | const Module *M = F.getParent(); | 
|  | const DataLayout &DL = M->getDataLayout(); | 
|  | assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); | 
|  |  | 
|  | Align Alignment = | 
|  | DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); | 
|  | if (Alignment <= DynLDSAlign) | 
|  | return; | 
|  |  | 
|  | LDSSize = alignTo(StaticLDSSize, Alignment); | 
|  | DynLDSAlign = Alignment; | 
|  |  | 
|  | // If there is a dynamic LDS variable associated with this function F, every | 
|  | // further dynamic LDS instance (allocated by calling setDynLDSAlign) must | 
|  | // map to the same address. This holds because no LDS is allocated after the | 
|  | // lowering pass if there are dynamic LDS variables present. | 
|  | const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F); | 
|  | if (Dyn) { | 
|  | unsigned Offset = LDSSize; // return this? | 
|  | std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn); | 
|  | if (!Expect || (Offset != *Expect)) { | 
|  | report_fatal_error("Inconsistent metadata on dynamic LDS variable"); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) { | 
|  | UsesDynamicLDS = DynLDS; | 
|  | } | 
|  |  | 
|  | bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; } |