Use IndexMap for PrelaodKernArgs.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index 366be8a..6bab8e6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -182,16 +182,12 @@
 SmallVector<const KernArgPreloadDescriptor *, 4>
 AMDGPUFunctionArgInfo::getPreloadDescriptorsForArgIdx(unsigned ArgIdx) const {
   SmallVector<const KernArgPreloadDescriptor *, 4> Results;
-  for (const auto &KV : PreloadKernArgs) {
-    if (KV.second.OrigArgIdx == ArgIdx)
-      Results.push_back(&KV.second);
+  for (unsigned PartIdx = 0; PartIdx < PreloadKernArgs.size(); ++PartIdx) {
+    const auto &Desc = PreloadKernArgs[PartIdx];
+    if (Desc.OrigArgIdx == ArgIdx)
+      Results.push_back(&Desc);
   }
 
-  stable_sort(Results, [](const KernArgPreloadDescriptor *A,
-                          const KernArgPreloadDescriptor *B) {
-    return A->PartIdx < B->PartIdx;
-  });
-
   return Results;
 }
 
@@ -203,11 +199,9 @@
   if (HiddenArgIt == PreloadHiddenArgsIndexMap.end())
     return nullptr;
 
-  auto KernArgIt = PreloadKernArgs.find(HiddenArgIt->second);
-  if (KernArgIt == PreloadKernArgs.end())
-    return nullptr;
-
-  return &KernArgIt->second;
+  const KernArgPreloadDescriptor &Desc = PreloadKernArgs[HiddenArgIt->second];
+  assert(Desc.IsValid && "Hidden argument preload descriptor not valid.");
+  return &Desc;
 }
 
 const AMDGPUFunctionArgInfo &
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index 9512976..f672c6e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -11,6 +11,7 @@
 
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/Register.h"
 #include "llvm/IR/LLVMContext.h"
@@ -167,6 +168,8 @@
   // The registers that the argument is preloaded into. The argument may be
   // split across multiple registers.
   SmallVector<MCRegister, 2> Regs;
+
+  bool IsValid = false;
 };
 
 } // namespace KernArgPreload
@@ -231,9 +234,15 @@
   ArgDescriptor WorkItemIDY;
   ArgDescriptor WorkItemIDZ;
 
+  struct PreloadArgIndexFunctor {
+    using argument_type = unsigned;
+    unsigned operator()(unsigned Idx) const { return Idx; }
+  };
+
   // Map the index of preloaded kernel arguments to its descriptor.
-  SmallDenseMap<int, KernArgPreload::KernArgPreloadDescriptor>
-      PreloadKernArgs{};
+  IndexedMap<KernArgPreload::KernArgPreloadDescriptor, PreloadArgIndexFunctor>
+      PreloadKernArgs;
+
   // Map hidden argument to the index of it's descriptor.
   SmallDenseMap<KernArgPreload::HiddenArg, int> PreloadHiddenArgsIndexMap{};
   // The first user SGPR allocated for kernarg preloading.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 31a2f70..97c15f1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -793,7 +793,7 @@
       ArgInfo->getHiddenArgPreloadDescriptor(HiddenArg);
   if (PreloadDesc) {
     const SmallVectorImpl<MCRegister> &Regs = PreloadDesc->Regs;
-    for (const auto &Reg : Regs) {
+    for (const auto Reg : Regs) {
       if (!PreloadStr.empty())
         PreloadStr.push_back(' ');
       PreloadStr += AMDGPUInstPrinter::getRegisterName(Reg);
@@ -817,7 +817,7 @@
       if (!PreloadRegisters.empty())
         PreloadRegisters.push_back(' ');
 
-      for (const auto &Reg : Desc->Regs) {
+      for (const auto Reg : Desc->Regs) {
         if (!PreloadRegisters.empty())
           PreloadRegisters.push_back(' ');
         PreloadRegisters += AMDGPUInstPrinter::getRegisterName(Reg);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 081d10a..6ba5756 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2745,17 +2745,20 @@
       // Arg is preloaded into the previous SGPR.
       if (ArgLoc.getLocVT().getStoreSize() < 4 && Alignment < 4) {
         assert(InIdx >= 1 && "No previous SGPR");
-        auto [It, Inserted] =
-            Info.getArgInfo().PreloadKernArgs.try_emplace(InIdx);
-        assert(Inserted && "Preload kernel argument allocated twice.");
-        KernArgPreloadDescriptor &PreloadDesc = It->second;
+        auto &PreloadKernArgs = Info.getArgInfo().PreloadKernArgs;
+        PreloadKernArgs.grow(InIdx);
+        KernArgPreloadDescriptor &PreloadDesc = PreloadKernArgs[InIdx];
+        assert(!PreloadDesc.IsValid &&
+               "Preload kernel argument allocated twice.");
 
-        const KernArgPreloadDescriptor &PrevDesc =
-            Info.getArgInfo().PreloadKernArgs[InIdx - 1];
+        const KernArgPreloadDescriptor &PrevDesc = PreloadKernArgs[InIdx - 1];
+        assert(PrevDesc.IsValid &&
+               "Previous preload kernel argument not allocated.");
         PreloadDesc.Regs.push_back(PrevDesc.Regs[0]);
 
         PreloadDesc.OrigArgIdx = Arg.getArgNo();
         PreloadDesc.PartIdx = InIdx;
+        PreloadDesc.IsValid = true;
         if (Arg.hasAttribute("amdgpu-hidden-argument"))
           mapHiddenArgToPreloadIndex(Info.getArgInfo(), ArgOffset,
                                      ImplicitArgOffset, InIdx);
@@ -3183,7 +3186,9 @@
       }
 
       SDValue NewArg;
-      if (Arg.isOrigArg() && Info->getArgInfo().PreloadKernArgs.count(i)) {
+      auto &PreloadKernArgs = Info->getArgInfo().PreloadKernArgs;
+      if (Arg.isOrigArg() && PreloadKernArgs.inBounds(i) &&
+          PreloadKernArgs[i].IsValid) {
         if (MemVT.getStoreSize() < 4 && Alignment < 4) {
           // In this case the argument is packed into the previous preload SGPR.
           int64_t AlignDownOffset = alignDown(Offset, 4);
@@ -3193,8 +3198,7 @@
           const SIMachineFunctionInfo *Info =
               MF.getInfo<SIMachineFunctionInfo>();
           MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
-          Register Reg =
-              Info->getArgInfo().PreloadKernArgs.find(i)->getSecond().Regs[0];
+          Register Reg = Info->getArgInfo().PreloadKernArgs[i].Regs[0];
 
           assert(Reg);
           Register VReg = MRI.getLiveInVirtReg(Reg);
@@ -3214,7 +3218,7 @@
               MF.getInfo<SIMachineFunctionInfo>();
           MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
           const SmallVectorImpl<MCRegister> &PreloadRegs =
-              Info->getArgInfo().PreloadKernArgs.find(i)->getSecond().Regs;
+              Info->getArgInfo().PreloadKernArgs[i].Regs;
 
           SDValue Copy;
           if (PreloadRegs.size() == 1) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 6b99af5..7e1a6e5 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -278,11 +278,13 @@
     const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
     unsigned AllocSizeDWord, unsigned PartIdx, unsigned ArgIdx,
     unsigned PaddingSGPRs) {
-  auto [It, Inserted] = ArgInfo.PreloadKernArgs.try_emplace(PartIdx);
-  assert(Inserted && "Preload kernel argument allocated twice.");
-  KernArgPreload::KernArgPreloadDescriptor &PreloadDesc = It->second;
+  ArgInfo.PreloadKernArgs.grow(PartIdx);
+  KernArgPreload::KernArgPreloadDescriptor &PreloadDesc =
+      ArgInfo.PreloadKernArgs[PartIdx];
+  assert(!PreloadDesc.IsValid && "Preload kernel argument allocated twice.");
   PreloadDesc.PartIdx = PartIdx;
   PreloadDesc.OrigArgIdx = ArgIdx;
+  PreloadDesc.IsValid = true;
 
   NumUserSGPRs += PaddingSGPRs;
   // If the available register tuples are aligned with the kernarg to be