|  | //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This file contains a pass that lowers homogeneous prolog/epilog instructions. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AArch64InstrInfo.h" | 
|  | #include "AArch64Subtarget.h" | 
|  | #include "MCTargetDesc/AArch64InstPrinter.h" | 
|  | #include "llvm/CodeGen/MachineBasicBlock.h" | 
|  | #include "llvm/CodeGen/MachineFunction.h" | 
|  | #include "llvm/CodeGen/MachineInstr.h" | 
|  | #include "llvm/CodeGen/MachineInstrBuilder.h" | 
|  | #include "llvm/CodeGen/MachineModuleInfo.h" | 
|  | #include "llvm/CodeGen/MachineOperand.h" | 
|  | #include "llvm/CodeGen/TargetSubtargetInfo.h" | 
|  | #include "llvm/IR/DebugLoc.h" | 
|  | #include "llvm/IR/IRBuilder.h" | 
|  | #include "llvm/IR/Module.h" | 
|  | #include "llvm/Pass.h" | 
|  | #include <optional> | 
|  | #include <sstream> | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME                           \ | 
|  | "AArch64 homogeneous prolog/epilog lowering pass" | 
|  |  | 
|  | static cl::opt<int> FrameHelperSizeThreshold( | 
|  | "frame-helper-size-threshold", cl::init(2), cl::Hidden, | 
|  | cl::desc("The minimum number of instructions that are outlined in a frame " | 
|  | "helper (default = 2)")); | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | class AArch64LowerHomogeneousPE { | 
|  | public: | 
|  | const AArch64InstrInfo *TII; | 
|  |  | 
|  | AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI) | 
|  | : M(M), MMI(MMI) {} | 
|  |  | 
|  | bool run(); | 
|  | bool runOnMachineFunction(MachineFunction &Fn); | 
|  |  | 
|  | private: | 
|  | Module *M; | 
|  | MachineModuleInfo *MMI; | 
|  |  | 
|  | bool runOnMBB(MachineBasicBlock &MBB); | 
|  | bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, | 
|  | MachineBasicBlock::iterator &NextMBBI); | 
|  |  | 
|  | /// Lower a HOM_Prolog pseudo instruction into a helper call | 
|  | /// or a sequence of homogeneous stores. | 
|  | /// When a fp setup follows, it can be optimized. | 
|  | bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, | 
|  | MachineBasicBlock::iterator &NextMBBI); | 
|  | /// Lower a HOM_Epilog pseudo instruction into a helper call | 
|  | /// or a sequence of homogeneous loads. | 
|  | /// When a return follow, it can be optimized. | 
|  | bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, | 
|  | MachineBasicBlock::iterator &NextMBBI); | 
|  | }; | 
|  |  | 
|  | class AArch64LowerHomogeneousPrologEpilog : public ModulePass { | 
|  | public: | 
|  | static char ID; | 
|  |  | 
|  | AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {} | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | AU.addRequired<MachineModuleInfoWrapperPass>(); | 
|  | AU.addPreserved<MachineModuleInfoWrapperPass>(); | 
|  | AU.setPreservesAll(); | 
|  | ModulePass::getAnalysisUsage(AU); | 
|  | } | 
|  | bool runOnModule(Module &M) override; | 
|  |  | 
|  | StringRef getPassName() const override { | 
|  | return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME; | 
|  | } | 
|  | }; | 
|  |  | 
|  | } // end anonymous namespace | 
|  |  | 
|  | char AArch64LowerHomogeneousPrologEpilog::ID = 0; | 
|  |  | 
|  | INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog, | 
|  | "aarch64-lower-homogeneous-prolog-epilog", | 
|  | AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false) | 
|  |  | 
|  | bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) { | 
|  | if (skipModule(M)) | 
|  | return false; | 
|  |  | 
|  | MachineModuleInfo *MMI = | 
|  | &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); | 
|  | return AArch64LowerHomogeneousPE(&M, MMI).run(); | 
|  | } | 
|  |  | 
|  | bool AArch64LowerHomogeneousPE::run() { | 
|  | bool Changed = false; | 
|  | for (auto &F : *M) { | 
|  | if (F.empty()) | 
|  | continue; | 
|  |  | 
|  | MachineFunction *MF = MMI->getMachineFunction(F); | 
|  | if (!MF) | 
|  | continue; | 
|  | Changed |= runOnMachineFunction(*MF); | 
|  | } | 
|  |  | 
|  | return Changed; | 
|  | } | 
|  | enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail }; | 
|  |  | 
|  | /// Return a frame helper name with the given CSRs and the helper type. | 
|  | /// For instance, a prolog helper that saves x19 and x20 is named as | 
|  | /// OUTLINED_FUNCTION_PROLOG_x19x20. | 
|  | static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs, | 
|  | FrameHelperType Type, unsigned FpOffset) { | 
|  | std::ostringstream RegStream; | 
|  | switch (Type) { | 
|  | case FrameHelperType::Prolog: | 
|  | RegStream << "OUTLINED_FUNCTION_PROLOG_"; | 
|  | break; | 
|  | case FrameHelperType::PrologFrame: | 
|  | RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_"; | 
|  | break; | 
|  | case FrameHelperType::Epilog: | 
|  | RegStream << "OUTLINED_FUNCTION_EPILOG_"; | 
|  | break; | 
|  | case FrameHelperType::EpilogTail: | 
|  | RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_"; | 
|  | break; | 
|  | } | 
|  |  | 
|  | for (auto Reg : Regs) { | 
|  | if (Reg == AArch64::NoRegister) | 
|  | continue; | 
|  | RegStream << AArch64InstPrinter::getRegisterName(Reg); | 
|  | } | 
|  |  | 
|  | return RegStream.str(); | 
|  | } | 
|  |  | 
|  | /// Create a Function for the unique frame helper with the given name. | 
|  | /// Return a newly created MachineFunction with an empty MachineBasicBlock. | 
|  | static MachineFunction &createFrameHelperMachineFunction(Module *M, | 
|  | MachineModuleInfo *MMI, | 
|  | StringRef Name) { | 
|  | LLVMContext &C = M->getContext(); | 
|  | Function *F = M->getFunction(Name); | 
|  | assert(F == nullptr && "Function has been created before"); | 
|  | F = Function::Create(FunctionType::get(Type::getVoidTy(C), false), | 
|  | Function::ExternalLinkage, Name, M); | 
|  | assert(F && "Function was null!"); | 
|  |  | 
|  | // Use ODR linkage to avoid duplication. | 
|  | F->setLinkage(GlobalValue::LinkOnceODRLinkage); | 
|  | F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); | 
|  |  | 
|  | // Set minsize, so we don't insert padding between outlined functions. | 
|  | F->addFnAttr(Attribute::NoInline); | 
|  | F->addFnAttr(Attribute::MinSize); | 
|  | F->addFnAttr(Attribute::Naked); | 
|  |  | 
|  | MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); | 
|  | // Remove unnecessary register liveness and set NoVRegs. | 
|  | MF.getProperties().resetTracksLiveness().resetIsSSA().setNoVRegs(); | 
|  | MF.getRegInfo().freezeReservedRegs(); | 
|  |  | 
|  | // Create entry block. | 
|  | BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); | 
|  | IRBuilder<> Builder(EntryBB); | 
|  | Builder.CreateRetVoid(); | 
|  |  | 
|  | // Insert the new block into the function. | 
|  | MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); | 
|  | MF.insert(MF.begin(), MBB); | 
|  |  | 
|  | return MF; | 
|  | } | 
|  |  | 
|  | /// Emit a store-pair instruction for frame-setup. | 
|  | /// If Reg2 is AArch64::NoRegister, emit STR instead. | 
|  | static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, | 
|  | MachineBasicBlock::iterator Pos, | 
|  | const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, | 
|  | int Offset, bool IsPreDec) { | 
|  | assert(Reg1 != AArch64::NoRegister); | 
|  | const bool IsPaired = Reg2 != AArch64::NoRegister; | 
|  | bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); | 
|  | assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); | 
|  | unsigned Opc; | 
|  | if (IsPreDec) { | 
|  | if (IsFloat) | 
|  | Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre; | 
|  | else | 
|  | Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre; | 
|  | } else { | 
|  | if (IsFloat) | 
|  | Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui; | 
|  | else | 
|  | Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui; | 
|  | } | 
|  | // The implicit scale for Offset is 8. | 
|  | TypeSize Scale(0U, false), Width(0U, false); | 
|  | int64_t MinOffset, MaxOffset; | 
|  | [[maybe_unused]] bool Success = | 
|  | AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); | 
|  | assert(Success && "Invalid Opcode"); | 
|  | Offset *= (8 / (int)Scale); | 
|  |  | 
|  | MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); | 
|  | if (IsPreDec) | 
|  | MIB.addDef(AArch64::SP); | 
|  | if (IsPaired) | 
|  | MIB.addReg(Reg2); | 
|  | MIB.addReg(Reg1) | 
|  | .addReg(AArch64::SP) | 
|  | .addImm(Offset) | 
|  | .setMIFlag(MachineInstr::FrameSetup); | 
|  | } | 
|  |  | 
|  | /// Emit a load-pair instruction for frame-destroy. | 
|  | /// If Reg2 is AArch64::NoRegister, emit LDR instead. | 
|  | static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, | 
|  | MachineBasicBlock::iterator Pos, | 
|  | const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, | 
|  | int Offset, bool IsPostDec) { | 
|  | assert(Reg1 != AArch64::NoRegister); | 
|  | const bool IsPaired = Reg2 != AArch64::NoRegister; | 
|  | bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); | 
|  | assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); | 
|  | unsigned Opc; | 
|  | if (IsPostDec) { | 
|  | if (IsFloat) | 
|  | Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost; | 
|  | else | 
|  | Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost; | 
|  | } else { | 
|  | if (IsFloat) | 
|  | Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui; | 
|  | else | 
|  | Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui; | 
|  | } | 
|  | // The implicit scale for Offset is 8. | 
|  | TypeSize Scale(0U, false), Width(0U, false); | 
|  | int64_t MinOffset, MaxOffset; | 
|  | [[maybe_unused]] bool Success = | 
|  | AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); | 
|  | assert(Success && "Invalid Opcode"); | 
|  | Offset *= (8 / (int)Scale); | 
|  |  | 
|  | MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); | 
|  | if (IsPostDec) | 
|  | MIB.addDef(AArch64::SP); | 
|  | if (IsPaired) | 
|  | MIB.addReg(Reg2, getDefRegState(true)); | 
|  | MIB.addReg(Reg1, getDefRegState(true)) | 
|  | .addReg(AArch64::SP) | 
|  | .addImm(Offset) | 
|  | .setMIFlag(MachineInstr::FrameDestroy); | 
|  | } | 
|  |  | 
|  | /// Return a unique function if a helper can be formed with the given Regs | 
|  | /// and frame type. | 
|  | /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22: | 
|  | ///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller | 
|  | ///    stp x20, x19, [sp, #16] | 
|  | ///    ret | 
|  | /// | 
|  | /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22: | 
|  | ///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller | 
|  | ///    stp x20, x19, [sp, #16] | 
|  | ///    add fp, sp, #32 | 
|  | ///    ret | 
|  | /// | 
|  | /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22: | 
|  | ///    mov x16, x30 | 
|  | ///    ldp x29, x30, [sp, #32] | 
|  | ///    ldp x20, x19, [sp, #16] | 
|  | ///    ldp x22, x21, [sp], #48 | 
|  | ///    ret x16 | 
|  | /// | 
|  | /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22: | 
|  | ///    ldp x29, x30, [sp, #32] | 
|  | ///    ldp x20, x19, [sp, #16] | 
|  | ///    ldp x22, x21, [sp], #48 | 
|  | ///    ret | 
|  | /// @param M module | 
|  | /// @param MMI machine module info | 
|  | /// @param Regs callee save regs that the helper will handle | 
|  | /// @param Type frame helper type | 
|  | /// @return a helper function | 
|  | static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI, | 
|  | SmallVectorImpl<unsigned> &Regs, | 
|  | FrameHelperType Type, | 
|  | unsigned FpOffset = 0) { | 
|  | assert(Regs.size() >= 2); | 
|  | auto Name = getFrameHelperName(Regs, Type, FpOffset); | 
|  | auto *F = M->getFunction(Name); | 
|  | if (F) | 
|  | return F; | 
|  |  | 
|  | auto &MF = createFrameHelperMachineFunction(M, MMI, Name); | 
|  | MachineBasicBlock &MBB = *MF.begin(); | 
|  | const TargetSubtargetInfo &STI = MF.getSubtarget(); | 
|  | const TargetInstrInfo &TII = *STI.getInstrInfo(); | 
|  |  | 
|  | int Size = (int)Regs.size(); | 
|  | switch (Type) { | 
|  | case FrameHelperType::Prolog: | 
|  | case FrameHelperType::PrologFrame: { | 
|  | // Compute the remaining SP adjust beyond FP/LR. | 
|  | auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR)); | 
|  |  | 
|  | // If the register stored to the lowest address is not LR, we must subtract | 
|  | // more from SP here. | 
|  | if (LRIdx != Size - 2) { | 
|  | assert(Regs[Size - 2] != AArch64::LR); | 
|  | emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], | 
|  | LRIdx - Size + 2, true); | 
|  | } | 
|  |  | 
|  | // Store CSRs in the reverse order. | 
|  | for (int I = Size - 3; I >= 0; I -= 2) { | 
|  | // FP/LR has been stored at call-site. | 
|  | if (Regs[I - 1] == AArch64::LR) | 
|  | continue; | 
|  | emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1, | 
|  | false); | 
|  | } | 
|  | if (Type == FrameHelperType::PrologFrame) | 
|  | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri)) | 
|  | .addDef(AArch64::FP) | 
|  | .addUse(AArch64::SP) | 
|  | .addImm(FpOffset) | 
|  | .addImm(0) | 
|  | .setMIFlag(MachineInstr::FrameSetup); | 
|  |  | 
|  | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) | 
|  | .addReg(AArch64::LR); | 
|  | break; | 
|  | } | 
|  | case FrameHelperType::Epilog: | 
|  | case FrameHelperType::EpilogTail: | 
|  | if (Type == FrameHelperType::Epilog) | 
|  | // Stash LR to X16 | 
|  | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs)) | 
|  | .addDef(AArch64::X16) | 
|  | .addReg(AArch64::XZR) | 
|  | .addUse(AArch64::LR) | 
|  | .addImm(0); | 
|  |  | 
|  | for (int I = 0; I < Size - 2; I += 2) | 
|  | emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2, | 
|  | false); | 
|  | // Restore the last CSR with post-increment of SP. | 
|  | emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size, | 
|  | true); | 
|  |  | 
|  | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) | 
|  | .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR); | 
|  | break; | 
|  | } | 
|  |  | 
|  | return M->getFunction(Name); | 
|  | } | 
|  |  | 
|  | /// This function checks if a frame helper should be used for | 
|  | /// HOM_Prolog/HOM_Epilog pseudo instruction expansion. | 
|  | /// @param MBB machine basic block | 
|  | /// @param NextMBBI  next instruction following HOM_Prolog/HOM_Epilog | 
|  | /// @param Regs callee save registers that are saved or restored. | 
|  | /// @param Type frame helper type | 
|  | /// @return True if a use of helper is qualified. | 
|  | static bool shouldUseFrameHelper(MachineBasicBlock &MBB, | 
|  | MachineBasicBlock::iterator &NextMBBI, | 
|  | SmallVectorImpl<unsigned> &Regs, | 
|  | FrameHelperType Type) { | 
|  | const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); | 
|  | auto RegCount = Regs.size(); | 
|  | assert(RegCount > 0 && (RegCount % 2 == 0)); | 
|  | // # of instructions that will be outlined. | 
|  | int InstCount = RegCount / 2; | 
|  |  | 
|  | // Do not use a helper call when not saving LR. | 
|  | if (!llvm::is_contained(Regs, AArch64::LR)) | 
|  | return false; | 
|  |  | 
|  | switch (Type) { | 
|  | case FrameHelperType::Prolog: | 
|  | // Prolog helper cannot save FP/LR. | 
|  | InstCount--; | 
|  | break; | 
|  | case FrameHelperType::PrologFrame: { | 
|  | // Effectively no change in InstCount since FpAdjustment is included. | 
|  | break; | 
|  | } | 
|  | case FrameHelperType::Epilog: | 
|  | // Bail-out if X16 is live across the epilog helper because it is used in | 
|  | // the helper to handle X30. | 
|  | for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) { | 
|  | if (NextMI->readsRegister(AArch64::W16, TRI)) | 
|  | return false; | 
|  | } | 
|  | // Epilog may not be in the last block. Check the liveness in successors. | 
|  | for (const MachineBasicBlock *SuccMBB : MBB.successors()) { | 
|  | if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16)) | 
|  | return false; | 
|  | } | 
|  | // No change in InstCount for the regular epilog case. | 
|  | break; | 
|  | case FrameHelperType::EpilogTail: { | 
|  | // EpilogTail helper includes the caller's return. | 
|  | if (NextMBBI == MBB.end()) | 
|  | return false; | 
|  | if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR) | 
|  | return false; | 
|  | InstCount++; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | return InstCount >= FrameHelperSizeThreshold; | 
|  | } | 
|  |  | 
|  | /// Lower a HOM_Epilog pseudo instruction into a helper call while | 
|  | /// creating the helper on demand. Or emit a sequence of loads in place when not | 
|  | /// using a helper call. | 
|  | /// | 
|  | /// 1. With a helper including ret | 
|  | ///    HOM_Epilog x30, x29, x19, x20, x21, x22              ; MBBI | 
|  | ///    ret                                                  ; NextMBBI | 
|  | ///    => | 
|  | ///    b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22 | 
|  | ///    ...                                                  ; NextMBBI | 
|  | /// | 
|  | /// 2. With a helper | 
|  | ///    HOM_Epilog x30, x29, x19, x20, x21, x22 | 
|  | ///    => | 
|  | ///    bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22 | 
|  | /// | 
|  | /// 3. Without a helper | 
|  | ///    HOM_Epilog x30, x29, x19, x20, x21, x22 | 
|  | ///    => | 
|  | ///    ldp x29, x30, [sp, #32] | 
|  | ///    ldp x20, x19, [sp, #16] | 
|  | ///    ldp x22, x21, [sp], #48 | 
|  | bool AArch64LowerHomogeneousPE::lowerEpilog( | 
|  | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, | 
|  | MachineBasicBlock::iterator &NextMBBI) { | 
|  | auto &MF = *MBB.getParent(); | 
|  | MachineInstr &MI = *MBBI; | 
|  |  | 
|  | DebugLoc DL = MI.getDebugLoc(); | 
|  | SmallVector<unsigned, 8> Regs; | 
|  | bool HasUnpairedReg = false; | 
|  | for (auto &MO : MI.operands()) | 
|  | if (MO.isReg()) { | 
|  | if (!MO.getReg().isValid()) { | 
|  | // For now we are only expecting unpaired GP registers which should | 
|  | // occur exactly once. | 
|  | assert(!HasUnpairedReg); | 
|  | HasUnpairedReg = true; | 
|  | } | 
|  | Regs.push_back(MO.getReg()); | 
|  | } | 
|  | (void)HasUnpairedReg; | 
|  | int Size = (int)Regs.size(); | 
|  | if (Size == 0) | 
|  | return false; | 
|  | // Registers are in pair. | 
|  | assert(Size % 2 == 0); | 
|  | assert(MI.getOpcode() == AArch64::HOM_Epilog); | 
|  |  | 
|  | auto Return = NextMBBI; | 
|  | if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) { | 
|  | // When MBB ends with a return, emit a tail-call to the epilog helper | 
|  | auto *EpilogTailHelper = | 
|  | getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail); | 
|  | BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi)) | 
|  | .addGlobalAddress(EpilogTailHelper) | 
|  | .addImm(0) | 
|  | .setMIFlag(MachineInstr::FrameDestroy) | 
|  | .copyImplicitOps(MI) | 
|  | .copyImplicitOps(*Return); | 
|  | NextMBBI = std::next(Return); | 
|  | Return->removeFromParent(); | 
|  | } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs, | 
|  | FrameHelperType::Epilog)) { | 
|  | // The default epilog helper case. | 
|  | auto *EpilogHelper = | 
|  | getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog); | 
|  | BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) | 
|  | .addGlobalAddress(EpilogHelper) | 
|  | .setMIFlag(MachineInstr::FrameDestroy) | 
|  | .copyImplicitOps(MI); | 
|  | } else { | 
|  | // Fall back to no-helper. | 
|  | for (int I = 0; I < Size - 2; I += 2) | 
|  | emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false); | 
|  | // Restore the last CSR with post-increment of SP. | 
|  | emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true); | 
|  | } | 
|  |  | 
|  | MBBI->removeFromParent(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /// Lower a HOM_Prolog pseudo instruction into a helper call while | 
|  | /// creating the helper on demand. Or emit a sequence of stores in place when | 
|  | /// not using a helper call. | 
|  | /// | 
|  | /// 1. With a helper including frame-setup | 
|  | ///    HOM_Prolog x30, x29, x19, x20, x21, x22, 32 | 
|  | ///    => | 
|  | ///    stp x29, x30, [sp, #-16]! | 
|  | ///    bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22 | 
|  | /// | 
|  | /// 2. With a helper | 
|  | ///    HOM_Prolog x30, x29, x19, x20, x21, x22 | 
|  | ///    => | 
|  | ///    stp x29, x30, [sp, #-16]! | 
|  | ///    bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22 | 
|  | /// | 
|  | /// 3. Without a helper | 
|  | ///    HOM_Prolog x30, x29, x19, x20, x21, x22 | 
|  | ///    => | 
|  | ///    stp	x22, x21, [sp, #-48]! | 
|  | ///    stp	x20, x19, [sp, #16] | 
|  | ///    stp	x29, x30, [sp, #32] | 
|  | bool AArch64LowerHomogeneousPE::lowerProlog( | 
|  | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, | 
|  | MachineBasicBlock::iterator &NextMBBI) { | 
|  | auto &MF = *MBB.getParent(); | 
|  | MachineInstr &MI = *MBBI; | 
|  |  | 
|  | DebugLoc DL = MI.getDebugLoc(); | 
|  | SmallVector<unsigned, 8> Regs; | 
|  | bool HasUnpairedReg = false; | 
|  | int LRIdx = 0; | 
|  | std::optional<int> FpOffset; | 
|  | for (auto &MO : MI.operands()) { | 
|  | if (MO.isReg()) { | 
|  | if (MO.getReg().isValid()) { | 
|  | if (MO.getReg() == AArch64::LR) | 
|  | LRIdx = Regs.size(); | 
|  | } else { | 
|  | // For now we are only expecting unpaired GP registers which should | 
|  | // occur exactly once. | 
|  | assert(!HasUnpairedReg); | 
|  | HasUnpairedReg = true; | 
|  | } | 
|  | Regs.push_back(MO.getReg()); | 
|  | } else if (MO.isImm()) { | 
|  | FpOffset = MO.getImm(); | 
|  | } | 
|  | } | 
|  | (void)HasUnpairedReg; | 
|  | int Size = (int)Regs.size(); | 
|  | if (Size == 0) | 
|  | return false; | 
|  | // Allow compact unwind case only for oww. | 
|  | assert(Size % 2 == 0); | 
|  | assert(MI.getOpcode() == AArch64::HOM_Prolog); | 
|  |  | 
|  | if (FpOffset && | 
|  | shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) { | 
|  | // FP/LR is stored at the top of stack before the prolog helper call. | 
|  | emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); | 
|  | auto *PrologFrameHelper = getOrCreateFrameHelper( | 
|  | M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset); | 
|  | BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) | 
|  | .addGlobalAddress(PrologFrameHelper) | 
|  | .setMIFlag(MachineInstr::FrameSetup) | 
|  | .copyImplicitOps(MI) | 
|  | .addReg(AArch64::FP, RegState::Implicit | RegState::Define) | 
|  | .addReg(AArch64::SP, RegState::Implicit); | 
|  | } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs, | 
|  | FrameHelperType::Prolog)) { | 
|  | // FP/LR is stored at the top of stack before the prolog helper call. | 
|  | emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); | 
|  | auto *PrologHelper = | 
|  | getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog); | 
|  | BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) | 
|  | .addGlobalAddress(PrologHelper) | 
|  | .setMIFlag(MachineInstr::FrameSetup) | 
|  | .copyImplicitOps(MI); | 
|  | } else { | 
|  | // Fall back to no-helper. | 
|  | emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true); | 
|  | for (int I = Size - 3; I >= 0; I -= 2) | 
|  | emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false); | 
|  | if (FpOffset) { | 
|  | BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri)) | 
|  | .addDef(AArch64::FP) | 
|  | .addUse(AArch64::SP) | 
|  | .addImm(*FpOffset) | 
|  | .addImm(0) | 
|  | .setMIFlag(MachineInstr::FrameSetup); | 
|  | } | 
|  | } | 
|  |  | 
|  | MBBI->removeFromParent(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /// Process each machine instruction | 
|  | /// @param MBB machine basic block | 
|  | /// @param MBBI current instruction iterator | 
|  | /// @param NextMBBI next instruction iterator which can be updated | 
|  | /// @return True when IR is changed. | 
|  | bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, | 
|  | MachineBasicBlock::iterator MBBI, | 
|  | MachineBasicBlock::iterator &NextMBBI) { | 
|  | MachineInstr &MI = *MBBI; | 
|  | unsigned Opcode = MI.getOpcode(); | 
|  | switch (Opcode) { | 
|  | default: | 
|  | break; | 
|  | case AArch64::HOM_Prolog: | 
|  | return lowerProlog(MBB, MBBI, NextMBBI); | 
|  | case AArch64::HOM_Epilog: | 
|  | return lowerEpilog(MBB, MBBI, NextMBBI); | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { | 
|  | bool Modified = false; | 
|  |  | 
|  | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); | 
|  | while (MBBI != E) { | 
|  | MachineBasicBlock::iterator NMBBI = std::next(MBBI); | 
|  | Modified |= runOnMI(MBB, MBBI, NMBBI); | 
|  | MBBI = NMBBI; | 
|  | } | 
|  |  | 
|  | return Modified; | 
|  | } | 
|  |  | 
|  | bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { | 
|  | TII = MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); | 
|  |  | 
|  | bool Modified = false; | 
|  | for (auto &MBB : MF) | 
|  | Modified |= runOnMBB(MBB); | 
|  | return Modified; | 
|  | } | 
|  |  | 
|  | ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() { | 
|  | return new AArch64LowerHomogeneousPrologEpilog(); | 
|  | } |