llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp - third_party/llvm-project - Git at Google

 //===-- AMDGPURewriteAGPRCopyMFMA.cpp -------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file \brief Try to replace MFMA instructions using VGPRs with MFMA
 /// instructions using AGPRs. We expect MFMAs to be selected using VGPRs, and
 /// only use AGPRs if it helps avoid spilling. In this case, the MFMA will have
 /// copies between AGPRs and VGPRs and the AGPR variant of an MFMA pseudo. This
 /// pass will attempt to delete the cross register bank copy and replace the
 /// MFMA opcode.
 ///
 /// TODO:
 /// - Handle rewrites of phis. This must be more careful than normal about the
 ///   reassignment. We do not want to introduce an AGPR-to-AGPR copy inside of a
 ///   loop, so it depends on the exact assignment of the copy.
 ///
 ///  - Update LiveIntervals incrementally instead of recomputing from scratch
 ///
 //===----------------------------------------------------------------------===//

 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStacks.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/InitializePasses.h"

 using namespace llvm;

 #define DEBUG_TYPE "amdgpu-rewrite-agpr-copy-mfma"

 namespace {

 STATISTIC(NumMFMAsRewrittenToAGPR,
           "Number of MFMA instructions rewritten to use AGPR form");

 /// Map from spill slot frame index to list of instructions which reference it.
 using SpillReferenceMap = DenseMap<int, SmallVector<MachineInstr *, 4>>;

 class AMDGPURewriteAGPRCopyMFMAImpl {
   MachineFunction &MF;
   const GCNSubtarget &ST;
   const SIInstrInfo &TII;
   const SIRegisterInfo &TRI;
   MachineRegisterInfo &MRI;
   VirtRegMap &VRM;
   LiveRegMatrix &LRM;
   LiveIntervals &LIS;
   LiveStacks &LSS;
   const RegisterClassInfo &RegClassInfo;

   bool attemptReassignmentsToAGPR(SmallSetVector<Register, 4> &InterferingRegs,
                                   MCPhysReg PrefPhysReg) const;

 public:
   AMDGPURewriteAGPRCopyMFMAImpl(MachineFunction &MF, VirtRegMap &VRM,
                                 LiveRegMatrix &LRM, LiveIntervals &LIS,
                                 LiveStacks &LSS,
                                 const RegisterClassInfo &RegClassInfo)
       : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
         TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
         LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}

   bool isRewriteCandidate(const MachineInstr &MI) const {
     return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()) != -1;
   }

   /// Find AV_* registers assigned to AGPRs (or virtual registers which were
   /// already required to be AGPR).
   ///
   /// \return the assigned physical register that \p VReg is assigned to if it
   /// is an AGPR, otherwise MCRegister().
   MCRegister getAssignedAGPR(Register VReg) const {
     MCRegister PhysReg = VRM.getPhys(VReg);
     if (!PhysReg)
       return MCRegister();

     // If this is an AV register, we have to check if the actual assignment is
     // to an AGPR
     const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
     return TRI.isAGPRClass(AssignedRC) ? PhysReg : MCRegister();
   }

   bool tryReassigningMFMAChain(MachineInstr &MFMA, Register MFMAHintReg,
                                MCPhysReg PhysRegHint) const;

   /// Compute the register class constraints based on the uses of \p Reg,
   /// excluding MFMA uses from which can be rewritten to change the register
   /// class constraint. This should be nearly identical to
   /// MachineRegisterInfo::recomputeRegClass.

   /// \p RewriteCandidates will collect the set of MFMA instructions that need
   /// to have the opcode mutated to perform the replacement.
   ///
   /// \p RewriteRegs will accumulate the set of register used by those MFMAs
   /// that need to have the register classes adjusted.
   bool recomputeRegClassExceptRewritable(
       Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
       SmallSetVector<Register, 4> &RewriteRegs) const;

   bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const;
   bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const;

   /// Replace spill instruction \p SpillMI which loads/stores from/to \p SpillFI
   /// with a COPY to the replacement register value \p VReg.
   void replaceSpillWithCopyToVReg(MachineInstr &SpillMI, int SpillFI,
                                   Register VReg) const;

   /// Create a map from frame index to use instructions for spills. If a use of
   /// the frame index does not consist only of spill instructions, it will not
   /// be included in the map.
   void collectSpillIndexUses(ArrayRef<LiveInterval *> StackIntervals,
                              SpillReferenceMap &Map) const;

   /// Attempt to unspill VGPRs by finding a free register and replacing the
   /// spill instructions with copies.
   void eliminateSpillsOfReassignedVGPRs() const;

   bool run(MachineFunction &MF) const;
 };

 bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
     Register StartReg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
     SmallSetVector<Register, 4> &RewriteRegs) const {
   SmallVector<Register, 8> Worklist = {StartReg};

   // Recursively visit all transitive MFMA users
   while (!Worklist.empty()) {
     Register Reg = Worklist.pop_back_val();
     const TargetRegisterClass *OldRC = MRI.getRegClass(Reg);

     // Inflate to the equivalent AV_* class.
     const TargetRegisterClass *NewRC = TRI.getLargestLegalSuperClass(OldRC, MF);
     if (OldRC == NewRC)
       return false;

     // Accumulate constraints from all uses.
     for (MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
       // Apply the effect of the given operand to NewRC.
       MachineInstr *MI = MO.getParent();

       // We can swap the classes of dst + src2 as a pair to AGPR, so ignore the
       // effects of rewrite candidates. It just so happens that we can use
       // either AGPR or VGPR in src0/src1, so don't bother checking the
       // constraint effects of the individual operands.
       if (isRewriteCandidate(*MI)) {
         const MachineOperand *VDst =
             TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
         const MachineOperand *Src2 =
             TII.getNamedOperand(*MI, AMDGPU::OpName::src2);
         for (const MachineOperand *Op : {VDst, Src2}) {
           if (!Op->isReg())
             continue;

           Register OtherReg = Op->getReg();
           if (OtherReg.isPhysical())
             return false;

           if (OtherReg != Reg && RewriteRegs.insert(OtherReg))
             Worklist.push_back(OtherReg);
         }

         if (!is_contained(RewriteCandidates, MI)) {
           LLVM_DEBUG({
             Register VDstPhysReg = VRM.getPhys(VDst->getReg());
             dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
                    << " Dst=[" << printReg(VDst->getReg()) << " => "
                    << printReg(VDstPhysReg, &TRI);

             if (Src2->isReg()) {
               Register Src2PhysReg = VRM.getPhys(Src2->getReg());
               dbgs() << "], Src2=[" << printReg(Src2->getReg(), &TRI) << " => "
                      << printReg(Src2PhysReg, &TRI);
             }

             dbgs() << "]: " << MI;
           });

           RewriteCandidates.push_back(MI);
         }

         continue;
       }

       unsigned OpNo = &MO - &MI->getOperand(0);
       NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, &TII, &TRI);
       if (!NewRC || NewRC == OldRC) {
         LLVM_DEBUG(dbgs() << "User of " << printReg(Reg, &TRI)
                           << " cannot be reassigned to "
                           << TRI.getRegClassName(NewRC) << ": " << *MI);
         return false;
       }
     }
   }

   return true;
 }

 bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(
     MachineInstr &MFMA, Register MFMAHintReg, MCPhysReg PhysRegHint) const {
   // src2 and dst have the same physical class constraint; try to preserve
   // the original src2 subclass if one were to exist.
   SmallVector<MachineInstr *, 4> RewriteCandidates = {&MFMA};
   SmallSetVector<Register, 4> RewriteRegs;

   // Make sure we reassign the MFMA we found the copy from first. We want
   // to ensure dst ends up in the physreg we were originally copying to.
   RewriteRegs.insert(MFMAHintReg);

   // We've found av = COPY (MFMA) (or MFMA (v = COPY av)) and need to verify
   // that we can trivially rewrite src2 to use the new AGPR. If we can't
   // trivially replace it, we're going to induce as many copies as we would have
   // emitted in the first place, as well as need to assign another register, and
   // need to figure out where to put them. The live range splitting is smarter
   // than anything we're doing here, so trust it did something reasonable.
   //
   // Note recomputeRegClassExceptRewritable will consider the constraints of
   // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
   if (!recomputeRegClassExceptRewritable(MFMAHintReg, RewriteCandidates,
                                          RewriteRegs)) {
     LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
                       << printReg(MFMAHintReg, &TRI) << '\n');
     return false;
   }

   // If src2 and dst are different registers, we need to also reassign the
   // input to an available AGPR if it is compatible with all other uses.
   //
   // If we can't reassign it, we'd need to introduce a different copy
   // which is likely worse than the copy we'd be saving.
   //
   // It's likely that the MFMA is used in sequence with other MFMAs; if we
   // cannot migrate the full use/def chain of MFMAs, we would need to
   // introduce intermediate copies somewhere. So we only make the
   // transform if all the interfering MFMAs can also be migrated. Collect
   // the set of rewritable MFMAs and check if we can assign an AGPR at
   // that point.
   //
   // If any of the MFMAs aren't reassignable, we give up and rollback to
   // the original register assignments.

   using RecoloringStack =
       SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
   RecoloringStack TentativeReassignments;

   for (Register RewriteReg : RewriteRegs) {
     LiveInterval &LI = LIS.getInterval(RewriteReg);
     TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)});
     LRM.unassign(LI);
   }

   if (!attemptReassignmentsToAGPR(RewriteRegs, PhysRegHint)) {
     // Roll back the register assignments to the original state.
     for (auto [LI, OldAssign] : TentativeReassignments) {
       if (VRM.hasPhys(LI->reg()))
         LRM.unassign(*LI);
       LRM.assign(*LI, OldAssign);
     }

     return false;
   }

   // Fixup the register classes of the virtual registers now that we've
   // committed to the reassignments.
   for (Register InterferingReg : RewriteRegs) {
     const TargetRegisterClass *EquivalentAGPRRegClass =
         TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
     MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
   }

   for (MachineInstr *RewriteCandidate : RewriteCandidates) {
     int NewMFMAOp =
         AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode());
     RewriteCandidate->setDesc(TII.get(NewMFMAOp));
     ++NumMFMAsRewrittenToAGPR;
   }

   return true;
 }

 /// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
 /// preference to use \p PhysReg first. Returns false if the reassignments
 /// cannot be trivially performed.
 bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
     SmallSetVector<Register, 4> &InterferingRegs, MCPhysReg PrefPhysReg) const {
   // FIXME: The ordering may matter here, but we're just taking uselistorder
   // with the special case of ensuring to process the starting instruction
   // first. We probably should extract the priority advisor out of greedy and
   // use that ordering.
   for (Register InterferingReg : InterferingRegs) {
     LiveInterval &ReassignLI = LIS.getInterval(InterferingReg);
     const TargetRegisterClass *EquivalentAGPRRegClass =
         TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));

     MCPhysReg Assignable = AMDGPU::NoRegister;
     if (EquivalentAGPRRegClass->contains(PrefPhysReg) &&
         LRM.checkInterference(ReassignLI, PrefPhysReg) ==
             LiveRegMatrix::IK_Free) {
       // First try to assign to the AGPR we were already copying to. This
       // should be the first assignment we attempt. We have to guard
       // against the use being a subregister (which doesn't have an exact
       // class match).

       // TODO: If this does happen to be a subregister use, we should
       // still try to assign to a subregister of the original copy result.
       Assignable = PrefPhysReg;
     } else {
       ArrayRef<MCPhysReg> AllocOrder =
           RegClassInfo.getOrder(EquivalentAGPRRegClass);
       for (MCPhysReg Reg : AllocOrder) {
         if (LRM.checkInterference(ReassignLI, Reg) == LiveRegMatrix::IK_Free) {
           Assignable = Reg;
           break;
         }
       }
     }

     if (!Assignable) {
       LLVM_DEBUG(dbgs() << "Unable to reassign VGPR "
                         << printReg(InterferingReg, &TRI)
                         << " to a free AGPR\n");
       return false;
     }

     LLVM_DEBUG(dbgs() << "Reassigning VGPR " << printReg(InterferingReg, &TRI)
                       << " to " << printReg(Assignable, &TRI) << '\n');
     LRM.assign(ReassignLI, Assignable);
   }

   return true;
 }

 /// Identify copies that look like:
 /// %vdst:vgpr = V_MFMA_.. %src0:av, %src1:av, %src2:vgpr
 /// %agpr = COPY %vgpr
 ///
 /// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
 /// versions of the MFMA. This should cover the common case.
 bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(
     Register VReg, MCRegister AssignedAGPR) const {
   bool MadeChange = false;
   for (MachineInstr &UseMI : MRI.def_instructions(VReg)) {
     if (!UseMI.isCopy())
       continue;

     Register CopySrcReg = UseMI.getOperand(1).getReg();
     if (!CopySrcReg.isVirtual())
       continue;

     // TODO: Handle loop phis copied to AGPR. e.g.
     //
     // loop:
     //   %phi:vgpr = COPY %mfma:vgpr
     //   %mfma:vgpr = V_MFMA_xxx_vgprcd_e64 %a, %b, %phi
     //   s_cbranch_vccnz loop
     //
     // endloop:
     //   %agpr = mfma
     //
     // We need to be sure that %phi is assigned to the same physical register as
     // %mfma, or else we will just be moving copies into the loop.

     for (MachineInstr &CopySrcDefMI : MRI.def_instructions(CopySrcReg)) {
       if (isRewriteCandidate(CopySrcDefMI) &&
           tryReassigningMFMAChain(
               CopySrcDefMI, CopySrcDefMI.getOperand(0).getReg(), AssignedAGPR))
         MadeChange = true;
     }
   }

   return MadeChange;
 }

 /// Identify copies that look like:
 /// %src:vgpr = COPY %src:agpr
 /// %vdst:vgpr = V_MFMA_... %src0:av, %src1:av, %src:vgpr
 ///
 /// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
 /// versions of the MFMA. This should cover rarer cases, and will generally be
 /// redundant with tryFoldCopiesToAGPR.
 bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
     Register VReg, MCRegister AssignedAGPR) const {
   bool MadeChange = false;
   for (MachineInstr &UseMI : MRI.use_instructions(VReg)) {
     if (!UseMI.isCopy())
       continue;

     Register CopyDstReg = UseMI.getOperand(0).getReg();
     if (!CopyDstReg.isVirtual())
       continue;
     for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(CopyDstReg)) {
       if (!CopyUseMO.readsReg())
         continue;

       MachineInstr &CopyUseMI = *CopyUseMO.getParent();
       if (isRewriteCandidate(CopyUseMI)) {
         if (tryReassigningMFMAChain(CopyUseMI, CopyDstReg,
                                     VRM.getPhys(CopyDstReg)))
           MadeChange = true;
       }
     }
   }

   return MadeChange;
 }

 void AMDGPURewriteAGPRCopyMFMAImpl::replaceSpillWithCopyToVReg(
     MachineInstr &SpillMI, int SpillFI, Register VReg) const {
   const DebugLoc &DL = SpillMI.getDebugLoc();
   MachineBasicBlock &MBB = *SpillMI.getParent();
   MachineInstr *NewCopy;
   if (SpillMI.mayStore()) {
     NewCopy = BuildMI(MBB, SpillMI, DL, TII.get(TargetOpcode::COPY), VReg)
                   .add(SpillMI.getOperand(0));
   } else {
     NewCopy = BuildMI(MBB, SpillMI, DL, TII.get(TargetOpcode::COPY))
                   .add(SpillMI.getOperand(0))
                   .addReg(VReg);
   }

   LIS.ReplaceMachineInstrInMaps(SpillMI, *NewCopy);
   SpillMI.eraseFromParent();
 }

 void AMDGPURewriteAGPRCopyMFMAImpl::collectSpillIndexUses(
     ArrayRef<LiveInterval *> StackIntervals, SpillReferenceMap &Map) const {

   SmallSet<int, 4> NeededFrameIndexes;
   for (const LiveInterval *LI : StackIntervals)
     NeededFrameIndexes.insert(LI->reg().stackSlotIndex());

   for (MachineBasicBlock &MBB : MF) {
     for (MachineInstr &MI : MBB) {
       for (MachineOperand &MO : MI.operands()) {
         if (!MO.isFI() || !NeededFrameIndexes.count(MO.getIndex()))
           continue;

         if (TII.isVGPRSpill(MI)) {
           SmallVector<MachineInstr *, 4> &References = Map[MO.getIndex()];
           References.push_back(&MI);
           break;
         }

         // Verify this was really a spill instruction, if it's not just ignore
         // all uses.

         // TODO: This should probably be verifier enforced.
         NeededFrameIndexes.erase(MO.getIndex());
         Map.erase(MO.getIndex());
       }
     }
   }
 }

 void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const {
   unsigned NumSlots = LSS.getNumIntervals();
   if (NumSlots == 0)
     return;

   MachineFrameInfo &MFI = MF.getFrameInfo();

   SmallVector<LiveInterval *, 32> StackIntervals;
   StackIntervals.reserve(NumSlots);

   for (auto &[Slot, LI] : LSS) {
     if (!MFI.isSpillSlotObjectIndex(Slot) || MFI.isDeadObjectIndex(Slot))
       continue;

     const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
     if (TRI.hasVGPRs(RC))
       StackIntervals.push_back(&LI);
   }

   sort(StackIntervals, [](const LiveInterval *A, const LiveInterval *B) {
     // The ordering has to be strictly weak.
     /// Sort heaviest intervals first to prioritize their unspilling
     if (A->weight() != B->weight())
       return A->weight() > B->weight();

     if (A->getSize() != B->getSize())
       return A->getSize() > B->getSize();

     // Tie breaker by number to avoid need for stable sort
     return A->reg().stackSlotIndex() < B->reg().stackSlotIndex();
   });

   // FIXME: The APIs for dealing with the LiveInterval of a frame index are
   // cumbersome. LiveStacks owns its LiveIntervals which refer to stack
   // slots. We cannot use the usual LiveRegMatrix::assign and unassign on these,
   // and must create a substitute virtual register to do so. This makes
   // incremental updating here difficult; we need to actually perform the IR
   // mutation to get the new vreg references in place to compute the register
   // LiveInterval to perform an assignment to track the new interference
   // correctly, and we can't simply migrate the LiveInterval we already have.
   //
   // To avoid walking through the entire function for each index, pre-collect
   // all the instructions slot referencess.

   DenseMap<int, SmallVector<MachineInstr *, 4>> SpillSlotReferences;
   collectSpillIndexUses(StackIntervals, SpillSlotReferences);

   for (LiveInterval *LI : StackIntervals) {
     int Slot = LI->reg().stackSlotIndex();
     auto SpillReferences = SpillSlotReferences.find(Slot);
     if (SpillReferences == SpillSlotReferences.end())
       continue;

     const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);

     LLVM_DEBUG(dbgs() << "Trying to eliminate " << printReg(Slot, &TRI)
                       << " by reassigning\n");

     ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(RC);

     for (MCPhysReg PhysReg : AllocOrder) {
       if (LRM.checkInterference(*LI, PhysReg) != LiveRegMatrix::IK_Free)
         continue;

       LLVM_DEBUG(dbgs() << "Reassigning " << *LI << " to "
                         << printReg(PhysReg, &TRI) << '\n');

       const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
       Register NewVReg = MRI.createVirtualRegister(RC);

       for (MachineInstr *SpillMI : SpillReferences->second)
         replaceSpillWithCopyToVReg(*SpillMI, Slot, NewVReg);

       // TODO: We should be able to transfer the information from the stack
       // slot's LiveInterval without recomputing from scratch with the
       // replacement vreg uses.
       LiveInterval &NewLI = LIS.createAndComputeVirtRegInterval(NewVReg);
       VRM.grow();
       LRM.assign(NewLI, PhysReg);
       MFI.RemoveStackObject(Slot);
       break;
     }
   }
 }

 bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
   // This only applies on subtargets that have a configurable AGPR vs. VGPR
   // allocation.
   if (!ST.hasGFX90AInsts())
     return false;

   // Early exit if no AGPRs were assigned.
   if (!LRM.isPhysRegUsed(AMDGPU::AGPR0)) {
     LLVM_DEBUG(dbgs() << "skipping function that did not allocate AGPRs\n");
     return false;
   }

   bool MadeChange = false;

   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
     Register VReg = Register::index2VirtReg(I);
     MCRegister AssignedAGPR = getAssignedAGPR(VReg);
     if (!AssignedAGPR)
       continue;

     if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))
       MadeChange = true;
     if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))
       MadeChange = true;
   }

   // If we've successfully rewritten some MFMAs, we've alleviated some VGPR
   // pressure. See if we can eliminate some spills now that those registers are
   // more available.
   if (MadeChange)
     eliminateSpillsOfReassignedVGPRs();

   return MadeChange;
 }

 class AMDGPURewriteAGPRCopyMFMALegacy : public MachineFunctionPass {
 public:
   static char ID;
   RegisterClassInfo RegClassInfo;

   AMDGPURewriteAGPRCopyMFMALegacy() : MachineFunctionPass(ID) {
     initializeAMDGPURewriteAGPRCopyMFMALegacyPass(
         *PassRegistry::getPassRegistry());
   }

   bool runOnMachineFunction(MachineFunction &MF) override;

   StringRef getPassName() const override {
     return "AMDGPU Rewrite AGPR-Copy-MFMA";
   }

   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<LiveIntervalsWrapperPass>();
     AU.addRequired<VirtRegMapWrapperLegacy>();
     AU.addRequired<LiveRegMatrixWrapperLegacy>();
     AU.addRequired<LiveStacksWrapperLegacy>();

     AU.addPreserved<LiveIntervalsWrapperPass>();
     AU.addPreserved<VirtRegMapWrapperLegacy>();
     AU.addPreserved<LiveRegMatrixWrapperLegacy>();
     AU.addPreserved<LiveStacksWrapperLegacy>();

     AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };

 } // End anonymous namespace.

 INITIALIZE_PASS_BEGIN(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
                       "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
 INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
 INITIALIZE_PASS_END(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
                     "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)

 char AMDGPURewriteAGPRCopyMFMALegacy::ID = 0;

 char &llvm::AMDGPURewriteAGPRCopyMFMALegacyID =
     AMDGPURewriteAGPRCopyMFMALegacy::ID;

 bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
     MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;

   RegClassInfo.runOnMachineFunction(MF);

   auto &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
   auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
   auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
   auto &LSS = getAnalysis<LiveStacksWrapperLegacy>().getLS();
   AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
   return Impl.run(MF);
 }

 PreservedAnalyses
 AMDGPURewriteAGPRCopyMFMAPass::run(MachineFunction &MF,
                                    MachineFunctionAnalysisManager &MFAM) {
   VirtRegMap &VRM = MFAM.getResult<VirtRegMapAnalysis>(MF);
   LiveRegMatrix &LRM = MFAM.getResult<LiveRegMatrixAnalysis>(MF);
   LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
   LiveStacks &LSS = MFAM.getResult<LiveStacksAnalysis>(MF);
   RegisterClassInfo RegClassInfo;
   RegClassInfo.runOnMachineFunction(MF);

   AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
   if (!Impl.run(MF))
     return PreservedAnalyses::all();
   auto PA = getMachineFunctionPassPreservedAnalyses();
   PA.preserveSet<CFGAnalyses>();
   PA.preserve<LiveStacksAnalysis>();
   return PA;
 }