Use IndexMap for PrelaodKernArgs.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index 366be8a..6bab8e6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -182,16 +182,12 @@
SmallVector<const KernArgPreloadDescriptor *, 4>
AMDGPUFunctionArgInfo::getPreloadDescriptorsForArgIdx(unsigned ArgIdx) const {
SmallVector<const KernArgPreloadDescriptor *, 4> Results;
- for (const auto &KV : PreloadKernArgs) {
- if (KV.second.OrigArgIdx == ArgIdx)
- Results.push_back(&KV.second);
+ for (unsigned PartIdx = 0; PartIdx < PreloadKernArgs.size(); ++PartIdx) {
+ const auto &Desc = PreloadKernArgs[PartIdx];
+ if (Desc.OrigArgIdx == ArgIdx)
+ Results.push_back(&Desc);
}
- stable_sort(Results, [](const KernArgPreloadDescriptor *A,
- const KernArgPreloadDescriptor *B) {
- return A->PartIdx < B->PartIdx;
- });
-
return Results;
}
@@ -203,11 +199,9 @@
if (HiddenArgIt == PreloadHiddenArgsIndexMap.end())
return nullptr;
- auto KernArgIt = PreloadKernArgs.find(HiddenArgIt->second);
- if (KernArgIt == PreloadKernArgs.end())
- return nullptr;
-
- return &KernArgIt->second;
+ const KernArgPreloadDescriptor &Desc = PreloadKernArgs[HiddenArgIt->second];
+ assert(Desc.IsValid && "Hidden argument preload descriptor not valid.");
+ return &Desc;
}
const AMDGPUFunctionArgInfo &
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index 9512976..f672c6e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -11,6 +11,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/IR/LLVMContext.h"
@@ -167,6 +168,8 @@
// The registers that the argument is preloaded into. The argument may be
// split across multiple registers.
SmallVector<MCRegister, 2> Regs;
+
+ bool IsValid = false;
};
} // namespace KernArgPreload
@@ -231,9 +234,15 @@
ArgDescriptor WorkItemIDY;
ArgDescriptor WorkItemIDZ;
+ struct PreloadArgIndexFunctor {
+ using argument_type = unsigned;
+ unsigned operator()(unsigned Idx) const { return Idx; }
+ };
+
// Map the index of preloaded kernel arguments to its descriptor.
- SmallDenseMap<int, KernArgPreload::KernArgPreloadDescriptor>
- PreloadKernArgs{};
+ IndexedMap<KernArgPreload::KernArgPreloadDescriptor, PreloadArgIndexFunctor>
+ PreloadKernArgs;
+
// Map hidden argument to the index of it's descriptor.
SmallDenseMap<KernArgPreload::HiddenArg, int> PreloadHiddenArgsIndexMap{};
// The first user SGPR allocated for kernarg preloading.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 31a2f70..97c15f1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -793,7 +793,7 @@
ArgInfo->getHiddenArgPreloadDescriptor(HiddenArg);
if (PreloadDesc) {
const SmallVectorImpl<MCRegister> &Regs = PreloadDesc->Regs;
- for (const auto &Reg : Regs) {
+ for (const auto Reg : Regs) {
if (!PreloadStr.empty())
PreloadStr.push_back(' ');
PreloadStr += AMDGPUInstPrinter::getRegisterName(Reg);
@@ -817,7 +817,7 @@
if (!PreloadRegisters.empty())
PreloadRegisters.push_back(' ');
- for (const auto &Reg : Desc->Regs) {
+ for (const auto Reg : Desc->Regs) {
if (!PreloadRegisters.empty())
PreloadRegisters.push_back(' ');
PreloadRegisters += AMDGPUInstPrinter::getRegisterName(Reg);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 081d10a..6ba5756 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2745,17 +2745,20 @@
// Arg is preloaded into the previous SGPR.
if (ArgLoc.getLocVT().getStoreSize() < 4 && Alignment < 4) {
assert(InIdx >= 1 && "No previous SGPR");
- auto [It, Inserted] =
- Info.getArgInfo().PreloadKernArgs.try_emplace(InIdx);
- assert(Inserted && "Preload kernel argument allocated twice.");
- KernArgPreloadDescriptor &PreloadDesc = It->second;
+ auto &PreloadKernArgs = Info.getArgInfo().PreloadKernArgs;
+ PreloadKernArgs.grow(InIdx);
+ KernArgPreloadDescriptor &PreloadDesc = PreloadKernArgs[InIdx];
+ assert(!PreloadDesc.IsValid &&
+ "Preload kernel argument allocated twice.");
- const KernArgPreloadDescriptor &PrevDesc =
- Info.getArgInfo().PreloadKernArgs[InIdx - 1];
+ const KernArgPreloadDescriptor &PrevDesc = PreloadKernArgs[InIdx - 1];
+ assert(PrevDesc.IsValid &&
+ "Previous preload kernel argument not allocated.");
PreloadDesc.Regs.push_back(PrevDesc.Regs[0]);
PreloadDesc.OrigArgIdx = Arg.getArgNo();
PreloadDesc.PartIdx = InIdx;
+ PreloadDesc.IsValid = true;
if (Arg.hasAttribute("amdgpu-hidden-argument"))
mapHiddenArgToPreloadIndex(Info.getArgInfo(), ArgOffset,
ImplicitArgOffset, InIdx);
@@ -3183,7 +3186,9 @@
}
SDValue NewArg;
- if (Arg.isOrigArg() && Info->getArgInfo().PreloadKernArgs.count(i)) {
+ auto &PreloadKernArgs = Info->getArgInfo().PreloadKernArgs;
+ if (Arg.isOrigArg() && PreloadKernArgs.inBounds(i) &&
+ PreloadKernArgs[i].IsValid) {
if (MemVT.getStoreSize() < 4 && Alignment < 4) {
// In this case the argument is packed into the previous preload SGPR.
int64_t AlignDownOffset = alignDown(Offset, 4);
@@ -3193,8 +3198,7 @@
const SIMachineFunctionInfo *Info =
MF.getInfo<SIMachineFunctionInfo>();
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- Register Reg =
- Info->getArgInfo().PreloadKernArgs.find(i)->getSecond().Regs[0];
+ Register Reg = Info->getArgInfo().PreloadKernArgs[i].Regs[0];
assert(Reg);
Register VReg = MRI.getLiveInVirtReg(Reg);
@@ -3214,7 +3218,7 @@
MF.getInfo<SIMachineFunctionInfo>();
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
const SmallVectorImpl<MCRegister> &PreloadRegs =
- Info->getArgInfo().PreloadKernArgs.find(i)->getSecond().Regs;
+ Info->getArgInfo().PreloadKernArgs[i].Regs;
SDValue Copy;
if (PreloadRegs.size() == 1) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 6b99af5..7e1a6e5 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -278,11 +278,13 @@
const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
unsigned AllocSizeDWord, unsigned PartIdx, unsigned ArgIdx,
unsigned PaddingSGPRs) {
- auto [It, Inserted] = ArgInfo.PreloadKernArgs.try_emplace(PartIdx);
- assert(Inserted && "Preload kernel argument allocated twice.");
- KernArgPreload::KernArgPreloadDescriptor &PreloadDesc = It->second;
+ ArgInfo.PreloadKernArgs.grow(PartIdx);
+ KernArgPreload::KernArgPreloadDescriptor &PreloadDesc =
+ ArgInfo.PreloadKernArgs[PartIdx];
+ assert(!PreloadDesc.IsValid && "Preload kernel argument allocated twice.");
PreloadDesc.PartIdx = PartIdx;
PreloadDesc.OrigArgIdx = ArgIdx;
+ PreloadDesc.IsValid = true;
NumUserSGPRs += PaddingSGPRs;
// If the available register tuples are aligned with the kernarg to be