| //===- MachineSMEABIPass.cpp ----------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This pass implements the SME ABI requirements for ZA state. This includes |
| // implementing the lazy (and agnostic) ZA state save schemes around calls. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This pass works by collecting instructions that require ZA to be in a |
| // specific state (e.g., "ACTIVE" or "SAVED") and inserting the necessary state |
| // transitions to ensure ZA is in the required state before instructions. State |
| // transitions represent actions such as setting up or restoring a lazy save. |
| // Certain points within a function may also have predefined states independent |
| // of any instructions, for example, a "shared_za" function is always entered |
| // and exited in the "ACTIVE" state. |
| // |
| // To handle ZA state across control flow, we make use of edge bundling. This |
| // assigns each block an "incoming" and "outgoing" edge bundle (representing |
| // incoming and outgoing edges). Initially, these are unique to each block; |
| // then, in the process of forming bundles, the outgoing bundle of a block is |
| // joined with the incoming bundle of all successors. The result is that each |
| // bundle can be assigned a single ZA state, which ensures the state required by |
| // all a blocks' successors is the same, and that each basic block will always |
| // be entered with the same ZA state. This eliminates the need for splitting |
| // edges to insert state transitions or "phi" nodes for ZA states. |
| // |
| // See below for a simple example of edge bundling. |
| // |
| // The following shows a conditionally executed basic block (BB1): |
| // |
| // if (cond) |
| // BB1 |
| // BB2 |
| // |
| // Initial Bundles Joined Bundles |
| // |
| // ┌──0──┐ ┌──0──┐ |
| // │ BB0 │ │ BB0 │ |
| // └──1──┘ └──1──┘ |
| // ├───────┐ ├───────┐ |
| // ▼ │ ▼ │ |
| // ┌──2──┐ │ ─────► ┌──1──┐ │ |
| // │ BB1 │ ▼ │ BB1 │ ▼ |
| // └──3──┘ ┌──4──┐ └──1──┘ ┌──1──┐ |
| // └───►4 BB2 │ └───►1 BB2 │ |
| // └──5──┘ └──2──┘ |
| // |
| // On the left are the initial per-block bundles, and on the right are the |
| // joined bundles (which are the result of the EdgeBundles analysis). |
| |
| #include "AArch64InstrInfo.h" |
| #include "AArch64MachineFunctionInfo.h" |
| #include "AArch64Subtarget.h" |
| #include "MCTargetDesc/AArch64AddressingModes.h" |
| #include "llvm/ADT/BitmaskEnum.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/CodeGen/EdgeBundles.h" |
| #include "llvm/CodeGen/LivePhysRegs.h" |
| #include "llvm/CodeGen/MachineBasicBlock.h" |
| #include "llvm/CodeGen/MachineFunctionPass.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/TargetRegisterInfo.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "aarch64-machine-sme-abi" |
| |
| namespace { |
| |
| enum ZAState { |
| // Any/unknown state (not valid) |
| ANY = 0, |
| |
| // ZA is in use and active (i.e. within the accumulator) |
| ACTIVE, |
| |
| // A ZA save has been set up or committed (i.e. ZA is dormant or off) |
| LOCAL_SAVED, |
| |
| // ZA is off or a lazy save has been set up by the caller |
| CALLER_DORMANT, |
| |
| // ZA is off |
| OFF, |
| |
| // The number of ZA states (not a valid state) |
| NUM_ZA_STATE |
| }; |
| |
| /// A bitmask enum to record live physical registers that the "emit*" routines |
| /// may need to preserve. Note: This only tracks registers we may clobber. |
| enum LiveRegs : uint8_t { |
| None = 0, |
| NZCV = 1 << 0, |
| W0 = 1 << 1, |
| W0_HI = 1 << 2, |
| X0 = W0 | W0_HI, |
| LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ W0_HI) |
| }; |
| |
| /// Holds the virtual registers live physical registers have been saved to. |
| struct PhysRegSave { |
| LiveRegs PhysLiveRegs; |
| Register StatusFlags = AArch64::NoRegister; |
| Register X0Save = AArch64::NoRegister; |
| }; |
| |
| /// Contains the needed ZA state (and live registers) at an instruction. That is |
| /// the state ZA must be in _before_ "InsertPt". |
| struct InstInfo { |
| ZAState NeededState{ZAState::ANY}; |
| MachineBasicBlock::iterator InsertPt; |
| LiveRegs PhysLiveRegs = LiveRegs::None; |
| }; |
| |
| /// Contains the needed ZA state for each instruction in a block. Instructions |
| /// that do not require a ZA state are not recorded. |
| struct BlockInfo { |
| SmallVector<InstInfo> Insts; |
| ZAState FixedEntryState{ZAState::ANY}; |
| ZAState DesiredIncomingState{ZAState::ANY}; |
| ZAState DesiredOutgoingState{ZAState::ANY}; |
| LiveRegs PhysLiveRegsAtEntry = LiveRegs::None; |
| LiveRegs PhysLiveRegsAtExit = LiveRegs::None; |
| }; |
| |
| /// Contains the needed ZA state information for all blocks within a function. |
| struct FunctionInfo { |
| SmallVector<BlockInfo> Blocks; |
| std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt; |
| LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None; |
| }; |
| |
| /// State/helpers that is only needed when emitting code to handle |
| /// saving/restoring ZA. |
| class EmitContext { |
| public: |
| EmitContext() = default; |
| |
| /// Get or create a TPIDR2 block in \p MF. |
| int getTPIDR2Block(MachineFunction &MF) { |
| if (TPIDR2BlockFI) |
| return *TPIDR2BlockFI; |
| MachineFrameInfo &MFI = MF.getFrameInfo(); |
| TPIDR2BlockFI = MFI.CreateStackObject(16, Align(16), false); |
| return *TPIDR2BlockFI; |
| } |
| |
| /// Get or create agnostic ZA buffer pointer in \p MF. |
| Register getAgnosticZABufferPtr(MachineFunction &MF) { |
| if (AgnosticZABufferPtr != AArch64::NoRegister) |
| return AgnosticZABufferPtr; |
| Register BufferPtr = |
| MF.getInfo<AArch64FunctionInfo>()->getEarlyAllocSMESaveBuffer(); |
| AgnosticZABufferPtr = |
| BufferPtr != AArch64::NoRegister |
| ? BufferPtr |
| : MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); |
| return AgnosticZABufferPtr; |
| } |
| |
| /// Returns true if the function must allocate a ZA save buffer on entry. This |
| /// will be the case if, at any point in the function, a ZA save was emitted. |
| bool needsSaveBuffer() const { |
| assert(!(TPIDR2BlockFI && AgnosticZABufferPtr) && |
| "Cannot have both a TPIDR2 block and agnostic ZA buffer"); |
| return TPIDR2BlockFI || AgnosticZABufferPtr != AArch64::NoRegister; |
| } |
| |
| private: |
| std::optional<int> TPIDR2BlockFI; |
| Register AgnosticZABufferPtr = AArch64::NoRegister; |
| }; |
| |
| /// Checks if \p State is a legal edge bundle state. For a state to be a legal |
| /// bundle state, it must be possible to transition from it to any other bundle |
| /// state without losing any ZA state. This is the case for ACTIVE/LOCAL_SAVED, |
| /// as you can transition between those states by saving/restoring ZA. The OFF |
| /// state would not be legal, as transitioning to it drops the content of ZA. |
| static bool isLegalEdgeBundleZAState(ZAState State) { |
| switch (State) { |
| case ZAState::ACTIVE: // ZA state within the accumulator/ZT0. |
| case ZAState::LOCAL_SAVED: // ZA state is saved on the stack. |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| StringRef getZAStateString(ZAState State) { |
| #define MAKE_CASE(V) \ |
| case V: \ |
| return #V; |
| switch (State) { |
| MAKE_CASE(ZAState::ANY) |
| MAKE_CASE(ZAState::ACTIVE) |
| MAKE_CASE(ZAState::LOCAL_SAVED) |
| MAKE_CASE(ZAState::CALLER_DORMANT) |
| MAKE_CASE(ZAState::OFF) |
| default: |
| llvm_unreachable("Unexpected ZAState"); |
| } |
| #undef MAKE_CASE |
| } |
| |
| static bool isZAorZTRegOp(const TargetRegisterInfo &TRI, |
| const MachineOperand &MO) { |
| if (!MO.isReg() || !MO.getReg().isPhysical()) |
| return false; |
| return any_of(TRI.subregs_inclusive(MO.getReg()), [](const MCPhysReg &SR) { |
| return AArch64::MPR128RegClass.contains(SR) || |
| AArch64::ZTRRegClass.contains(SR); |
| }); |
| } |
| |
| /// Returns the required ZA state needed before \p MI and an iterator pointing |
| /// to where any code required to change the ZA state should be inserted. |
| static std::pair<ZAState, MachineBasicBlock::iterator> |
| getZAStateBeforeInst(const TargetRegisterInfo &TRI, MachineInstr &MI, |
| bool ZAOffAtReturn) { |
| MachineBasicBlock::iterator InsertPt(MI); |
| |
| if (MI.getOpcode() == AArch64::InOutZAUsePseudo) |
| return {ZAState::ACTIVE, std::prev(InsertPt)}; |
| |
| if (MI.getOpcode() == AArch64::RequiresZASavePseudo) |
| return {ZAState::LOCAL_SAVED, std::prev(InsertPt)}; |
| |
| if (MI.isReturn()) |
| return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt}; |
| |
| for (auto &MO : MI.operands()) { |
| if (isZAorZTRegOp(TRI, MO)) |
| return {ZAState::ACTIVE, InsertPt}; |
| } |
| |
| return {ZAState::ANY, InsertPt}; |
| } |
| |
| struct MachineSMEABI : public MachineFunctionPass { |
| inline static char ID = 0; |
| |
| MachineSMEABI(CodeGenOptLevel OptLevel = CodeGenOptLevel::Default) |
| : MachineFunctionPass(ID), OptLevel(OptLevel) {} |
| |
| bool runOnMachineFunction(MachineFunction &MF) override; |
| |
| StringRef getPassName() const override { return "Machine SME ABI pass"; } |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.setPreservesCFG(); |
| AU.addRequired<EdgeBundlesWrapperLegacy>(); |
| AU.addPreservedID(MachineLoopInfoID); |
| AU.addPreservedID(MachineDominatorsID); |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| |
| /// Collects the needed ZA state (and live registers) before each instruction |
| /// within the machine function. |
| FunctionInfo collectNeededZAStates(SMEAttrs SMEFnAttrs); |
| |
| /// Assigns each edge bundle a ZA state based on the needed states of blocks |
| /// that have incoming or outgoing edges in that bundle. |
| SmallVector<ZAState> assignBundleZAStates(const EdgeBundles &Bundles, |
| const FunctionInfo &FnInfo); |
| |
| /// Inserts code to handle changes between ZA states within the function. |
| /// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA. |
| void insertStateChanges(EmitContext &, const FunctionInfo &FnInfo, |
| const EdgeBundles &Bundles, |
| ArrayRef<ZAState> BundleStates); |
| |
| /// Propagates desired states forwards (from predecessors -> successors) if |
| /// \p Forwards, otherwise, propagates backwards (from successors -> |
| /// predecessors). |
| void propagateDesiredStates(FunctionInfo &FnInfo, bool Forwards = true); |
| |
| // Emission routines for private and shared ZA functions (using lazy saves). |
| void emitNewZAPrologue(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI); |
| void emitRestoreLazySave(EmitContext &, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| LiveRegs PhysLiveRegs); |
| void emitSetupLazySave(EmitContext &, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI); |
| void emitAllocateLazySaveBuffer(EmitContext &, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI); |
| void emitZAOff(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
| bool ClearTPIDR2); |
| |
| // Emission routines for agnostic ZA functions. |
| void emitSetupFullZASave(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| LiveRegs PhysLiveRegs); |
| // Emit a "full" ZA save or restore. It is "full" in the sense that this |
| // function will emit a call to __arm_sme_save or __arm_sme_restore, which |
| // handles saving and restoring both ZA and ZT0. |
| void emitFullZASaveRestore(EmitContext &, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| LiveRegs PhysLiveRegs, bool IsSave); |
| void emitAllocateFullZASaveBuffer(EmitContext &, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| LiveRegs PhysLiveRegs); |
| |
| /// Attempts to find an insertion point before \p Inst where the status flags |
| /// are not live. If \p Inst is `Block.Insts.end()` a point before the end of |
| /// the block is found. |
| std::pair<MachineBasicBlock::iterator, LiveRegs> |
| findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block, |
| SmallVectorImpl<InstInfo>::const_iterator Inst); |
| void emitStateChange(EmitContext &, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, ZAState From, |
| ZAState To, LiveRegs PhysLiveRegs); |
| |
| // Helpers for switching between lazy/full ZA save/restore routines. |
| void emitZASave(EmitContext &Context, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) { |
| if (AFI->getSMEFnAttrs().hasAgnosticZAInterface()) |
| return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs, |
| /*IsSave=*/true); |
| return emitSetupLazySave(Context, MBB, MBBI); |
| } |
| void emitZARestore(EmitContext &Context, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) { |
| if (AFI->getSMEFnAttrs().hasAgnosticZAInterface()) |
| return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs, |
| /*IsSave=*/false); |
| return emitRestoreLazySave(Context, MBB, MBBI, PhysLiveRegs); |
| } |
| void emitAllocateZASaveBuffer(EmitContext &Context, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| LiveRegs PhysLiveRegs) { |
| if (AFI->getSMEFnAttrs().hasAgnosticZAInterface()) |
| return emitAllocateFullZASaveBuffer(Context, MBB, MBBI, PhysLiveRegs); |
| return emitAllocateLazySaveBuffer(Context, MBB, MBBI); |
| } |
| |
| /// Save live physical registers to virtual registers. |
| PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, DebugLoc DL); |
| /// Restore physical registers from a save of their previous values. |
| void restorePhyRegSave(const PhysRegSave &RegSave, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, DebugLoc DL); |
| |
| private: |
| CodeGenOptLevel OptLevel = CodeGenOptLevel::Default; |
| |
| MachineFunction *MF = nullptr; |
| const AArch64Subtarget *Subtarget = nullptr; |
| const AArch64RegisterInfo *TRI = nullptr; |
| const AArch64FunctionInfo *AFI = nullptr; |
| const TargetInstrInfo *TII = nullptr; |
| MachineRegisterInfo *MRI = nullptr; |
| MachineLoopInfo *MLI = nullptr; |
| }; |
| |
| static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) { |
| LiveRegs PhysLiveRegs = LiveRegs::None; |
| if (!LiveUnits.available(AArch64::NZCV)) |
| PhysLiveRegs |= LiveRegs::NZCV; |
| // We have to track W0 and X0 separately as otherwise things can get |
| // confused if we attempt to preserve X0 but only W0 was defined. |
| if (!LiveUnits.available(AArch64::W0)) |
| PhysLiveRegs |= LiveRegs::W0; |
| if (!LiveUnits.available(AArch64::W0_HI)) |
| PhysLiveRegs |= LiveRegs::W0_HI; |
| return PhysLiveRegs; |
| } |
| |
| static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) { |
| if (PhysLiveRegs & LiveRegs::NZCV) |
| LiveUnits.addReg(AArch64::NZCV); |
| if (PhysLiveRegs & LiveRegs::W0) |
| LiveUnits.addReg(AArch64::W0); |
| if (PhysLiveRegs & LiveRegs::W0_HI) |
| LiveUnits.addReg(AArch64::W0_HI); |
| } |
| |
| [[maybe_unused]] bool isCallStartOpcode(unsigned Opc) { |
| switch (Opc) { |
| case AArch64::TLSDESC_CALLSEQ: |
| case AArch64::TLSDESC_AUTH_CALLSEQ: |
| case AArch64::ADJCALLSTACKDOWN: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { |
| assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() || |
| SMEFnAttrs.hasZAState()) && |
| "Expected function to have ZA/ZT0 state!"); |
| |
| SmallVector<BlockInfo> Blocks(MF->getNumBlockIDs()); |
| LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None; |
| std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt; |
| |
| for (MachineBasicBlock &MBB : *MF) { |
| BlockInfo &Block = Blocks[MBB.getNumber()]; |
| |
| if (MBB.isEntryBlock()) { |
| // Entry block: |
| Block.FixedEntryState = SMEFnAttrs.hasPrivateZAInterface() |
| ? ZAState::CALLER_DORMANT |
| : ZAState::ACTIVE; |
| } else if (MBB.isEHPad()) { |
| // EH entry block: |
| Block.FixedEntryState = ZAState::LOCAL_SAVED; |
| } |
| |
| LiveRegUnits LiveUnits(*TRI); |
| LiveUnits.addLiveOuts(MBB); |
| |
| Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits); |
| auto FirstTerminatorInsertPt = MBB.getFirstTerminator(); |
| auto FirstNonPhiInsertPt = MBB.getFirstNonPHI(); |
| for (MachineInstr &MI : reverse(MBB)) { |
| MachineBasicBlock::iterator MBBI(MI); |
| LiveUnits.stepBackward(MI); |
| LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits); |
| // The SMEStateAllocPseudo marker is added to a function if the save |
| // buffer was allocated in SelectionDAG. It marks the end of the |
| // allocation -- which is a safe point for this pass to insert any TPIDR2 |
| // block setup. |
| if (MI.getOpcode() == AArch64::SMEStateAllocPseudo) { |
| AfterSMEProloguePt = MBBI; |
| PhysLiveRegsAfterSMEPrologue = PhysLiveRegs; |
| } |
| // Note: We treat Agnostic ZA as inout_za with an alternate save/restore. |
| auto [NeededState, InsertPt] = getZAStateBeforeInst( |
| *TRI, MI, /*ZAOffAtReturn=*/SMEFnAttrs.hasPrivateZAInterface()); |
| assert((InsertPt == MBBI || isCallStartOpcode(InsertPt->getOpcode())) && |
| "Unexpected state change insertion point!"); |
| // TODO: Do something to avoid state changes where NZCV is live. |
| if (MBBI == FirstTerminatorInsertPt) |
| Block.PhysLiveRegsAtExit = PhysLiveRegs; |
| if (MBBI == FirstNonPhiInsertPt) |
| Block.PhysLiveRegsAtEntry = PhysLiveRegs; |
| if (NeededState != ZAState::ANY) |
| Block.Insts.push_back({NeededState, InsertPt, PhysLiveRegs}); |
| } |
| |
| // Reverse vector (as we had to iterate backwards for liveness). |
| std::reverse(Block.Insts.begin(), Block.Insts.end()); |
| |
| // Record the desired states on entry/exit of this block. These are the |
| // states that would not incur a state transition. |
| if (!Block.Insts.empty()) { |
| Block.DesiredIncomingState = Block.Insts.front().NeededState; |
| Block.DesiredOutgoingState = Block.Insts.back().NeededState; |
| } |
| } |
| |
| return FunctionInfo{std::move(Blocks), AfterSMEProloguePt, |
| PhysLiveRegsAfterSMEPrologue}; |
| } |
| |
| void MachineSMEABI::propagateDesiredStates(FunctionInfo &FnInfo, |
| bool Forwards) { |
| // If `Forwards`, this propagates desired states from predecessors to |
| // successors, otherwise, this propagates states from successors to |
| // predecessors. |
| auto GetBlockState = [](BlockInfo &Block, bool Incoming) -> ZAState & { |
| return Incoming ? Block.DesiredIncomingState : Block.DesiredOutgoingState; |
| }; |
| |
| SmallVector<MachineBasicBlock *> Worklist; |
| for (auto [BlockID, BlockInfo] : enumerate(FnInfo.Blocks)) { |
| if (!isLegalEdgeBundleZAState(GetBlockState(BlockInfo, Forwards))) |
| Worklist.push_back(MF->getBlockNumbered(BlockID)); |
| } |
| |
| while (!Worklist.empty()) { |
| MachineBasicBlock *MBB = Worklist.pop_back_val(); |
| BlockInfo &Block = FnInfo.Blocks[MBB->getNumber()]; |
| |
| // Pick a legal edge bundle state that matches the majority of |
| // predecessors/successors. |
| int StateCounts[ZAState::NUM_ZA_STATE] = {0}; |
| for (MachineBasicBlock *PredOrSucc : |
| Forwards ? predecessors(MBB) : successors(MBB)) { |
| BlockInfo &PredOrSuccBlock = FnInfo.Blocks[PredOrSucc->getNumber()]; |
| ZAState ZAState = GetBlockState(PredOrSuccBlock, !Forwards); |
| if (isLegalEdgeBundleZAState(ZAState)) |
| StateCounts[ZAState]++; |
| } |
| |
| ZAState PropagatedState = ZAState(max_element(StateCounts) - StateCounts); |
| ZAState &CurrentState = GetBlockState(Block, Forwards); |
| if (PropagatedState != CurrentState) { |
| CurrentState = PropagatedState; |
| ZAState &OtherState = GetBlockState(Block, !Forwards); |
| // Propagate to the incoming/outgoing state if that is also "ANY". |
| if (OtherState == ZAState::ANY) |
| OtherState = PropagatedState; |
| // Push any successors/predecessors that may need updating to the |
| // worklist. |
| for (MachineBasicBlock *SuccOrPred : |
| Forwards ? successors(MBB) : predecessors(MBB)) { |
| BlockInfo &SuccOrPredBlock = FnInfo.Blocks[SuccOrPred->getNumber()]; |
| if (!isLegalEdgeBundleZAState(GetBlockState(SuccOrPredBlock, Forwards))) |
| Worklist.push_back(SuccOrPred); |
| } |
| } |
| } |
| } |
| |
| /// Assigns each edge bundle a ZA state based on the needed states of blocks |
| /// that have incoming or outgoing edges in that bundle. |
| SmallVector<ZAState> |
| MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles, |
| const FunctionInfo &FnInfo) { |
| SmallVector<ZAState> BundleStates(Bundles.getNumBundles()); |
| for (unsigned I = 0, E = Bundles.getNumBundles(); I != E; ++I) { |
| LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n'); |
| |
| // Attempt to assign a ZA state for this bundle that minimizes state |
| // transitions. Edges within loops are given a higher weight as we assume |
| // they will be executed more than once. |
| int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0}; |
| for (unsigned BlockID : Bundles.getBlocks(I)) { |
| LLVM_DEBUG(dbgs() << "- bb." << BlockID); |
| |
| const BlockInfo &Block = FnInfo.Blocks[BlockID]; |
| bool InEdge = Bundles.getBundle(BlockID, /*Out=*/false) == I; |
| bool OutEdge = Bundles.getBundle(BlockID, /*Out=*/true) == I; |
| |
| bool LegalInEdge = |
| InEdge && isLegalEdgeBundleZAState(Block.DesiredIncomingState); |
| bool LegalOutEgde = |
| OutEdge && isLegalEdgeBundleZAState(Block.DesiredOutgoingState); |
| if (LegalInEdge) { |
| LLVM_DEBUG(dbgs() << " DesiredIncomingState: " |
| << getZAStateString(Block.DesiredIncomingState)); |
| EdgeStateCounts[Block.DesiredIncomingState]++; |
| } |
| if (LegalOutEgde) { |
| LLVM_DEBUG(dbgs() << " DesiredOutgoingState: " |
| << getZAStateString(Block.DesiredOutgoingState)); |
| EdgeStateCounts[Block.DesiredOutgoingState]++; |
| } |
| if (!LegalInEdge && !LegalOutEgde) |
| LLVM_DEBUG(dbgs() << " (no state preference)"); |
| LLVM_DEBUG(dbgs() << '\n'); |
| } |
| |
| ZAState BundleState = |
| ZAState(max_element(EdgeStateCounts) - EdgeStateCounts); |
| |
| if (BundleState == ZAState::ANY) |
| BundleState = ZAState::ACTIVE; |
| |
| LLVM_DEBUG({ |
| dbgs() << "Chosen ZA state: " << getZAStateString(BundleState) << '\n' |
| << "Edge counts:"; |
| for (auto [State, Count] : enumerate(EdgeStateCounts)) |
| dbgs() << " " << getZAStateString(ZAState(State)) << ": " << Count; |
| dbgs() << "\n\n"; |
| }); |
| |
| BundleStates[I] = BundleState; |
| } |
| |
| return BundleStates; |
| } |
| |
| std::pair<MachineBasicBlock::iterator, LiveRegs> |
| MachineSMEABI::findStateChangeInsertionPoint( |
| MachineBasicBlock &MBB, const BlockInfo &Block, |
| SmallVectorImpl<InstInfo>::const_iterator Inst) { |
| LiveRegs PhysLiveRegs; |
| MachineBasicBlock::iterator InsertPt; |
| if (Inst != Block.Insts.end()) { |
| InsertPt = Inst->InsertPt; |
| PhysLiveRegs = Inst->PhysLiveRegs; |
| } else { |
| InsertPt = MBB.getFirstTerminator(); |
| PhysLiveRegs = Block.PhysLiveRegsAtExit; |
| } |
| |
| if (!(PhysLiveRegs & LiveRegs::NZCV)) |
| return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags). |
| |
| // Find the previous state change. We can not move before this point. |
| MachineBasicBlock::iterator PrevStateChangeI; |
| if (Inst == Block.Insts.begin()) { |
| PrevStateChangeI = MBB.begin(); |
| } else { |
| // Note: `std::prev(Inst)` is the previous InstInfo. We only create an |
| // InstInfo object for instructions that require a specific ZA state, so the |
| // InstInfo is the site of the previous state change in the block (which can |
| // be several MIs earlier). |
| PrevStateChangeI = std::prev(Inst)->InsertPt; |
| } |
| |
| // Note: LiveUnits will only accurately track X0 and NZCV. |
| LiveRegUnits LiveUnits(*TRI); |
| setPhysLiveRegs(LiveUnits, PhysLiveRegs); |
| for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) { |
| // Don't move before/into a call (which may have a state change before it). |
| if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall()) |
| break; |
| LiveUnits.stepBackward(*I); |
| if (LiveUnits.available(AArch64::NZCV)) |
| return {I, getPhysLiveRegs(LiveUnits)}; |
| } |
| return {InsertPt, PhysLiveRegs}; |
| } |
| |
| void MachineSMEABI::insertStateChanges(EmitContext &Context, |
| const FunctionInfo &FnInfo, |
| const EdgeBundles &Bundles, |
| ArrayRef<ZAState> BundleStates) { |
| for (MachineBasicBlock &MBB : *MF) { |
| const BlockInfo &Block = FnInfo.Blocks[MBB.getNumber()]; |
| ZAState InState = BundleStates[Bundles.getBundle(MBB.getNumber(), |
| /*Out=*/false)]; |
| |
| ZAState CurrentState = Block.FixedEntryState; |
| if (CurrentState == ZAState::ANY) |
| CurrentState = InState; |
| |
| for (auto &Inst : Block.Insts) { |
| if (CurrentState != Inst.NeededState) { |
| auto [InsertPt, PhysLiveRegs] = |
| findStateChangeInsertionPoint(MBB, Block, &Inst); |
| emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState, |
| PhysLiveRegs); |
| CurrentState = Inst.NeededState; |
| } |
| } |
| |
| if (MBB.succ_empty()) |
| continue; |
| |
| ZAState OutState = |
| BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)]; |
| if (CurrentState != OutState) { |
| auto [InsertPt, PhysLiveRegs] = |
| findStateChangeInsertionPoint(MBB, Block, Block.Insts.end()); |
| emitStateChange(Context, MBB, InsertPt, CurrentState, OutState, |
| PhysLiveRegs); |
| } |
| } |
| } |
| |
| static DebugLoc getDebugLoc(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI) { |
| if (MBBI != MBB.end()) |
| return MBBI->getDebugLoc(); |
| return DebugLoc(); |
| } |
| |
| void MachineSMEABI::emitSetupLazySave(EmitContext &Context, |
| MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI) { |
| DebugLoc DL = getDebugLoc(MBB, MBBI); |
| |
| // Get pointer to TPIDR2 block. |
| Register TPIDR2 = MRI->createVirtualRegister(&AArch64::GPR64spRegClass); |
| Register TPIDR2Ptr = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2) |
| .addFrameIndex(Context.getTPIDR2Block(*MF)) |
| .addImm(0) |
| .addImm(0); |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), TPIDR2Ptr) |
| .addReg(TPIDR2); |
| // Set TPIDR2_EL0 to point to TPIDR2 block. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR)) |
| .addImm(AArch64SysReg::TPIDR2_EL0) |
| .addReg(TPIDR2Ptr); |
| } |
| |
| PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs, |
| MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| DebugLoc DL) { |
| PhysRegSave RegSave{PhysLiveRegs}; |
| if (PhysLiveRegs & LiveRegs::NZCV) { |
| RegSave.StatusFlags = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), RegSave.StatusFlags) |
| .addImm(AArch64SysReg::NZCV) |
| .addReg(AArch64::NZCV, RegState::Implicit); |
| } |
| // Note: Preserving X0 is "free" as this is before register allocation, so |
| // the register allocator is still able to optimize these copies. |
| if (PhysLiveRegs & LiveRegs::W0) { |
| RegSave.X0Save = MRI->createVirtualRegister(PhysLiveRegs & LiveRegs::W0_HI |
| ? &AArch64::GPR64RegClass |
| : &AArch64::GPR32RegClass); |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), RegSave.X0Save) |
| .addReg(PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0); |
| } |
| return RegSave; |
| } |
| |
| void MachineSMEABI::restorePhyRegSave(const PhysRegSave &RegSave, |
| MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| DebugLoc DL) { |
| if (RegSave.StatusFlags != AArch64::NoRegister) |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR)) |
| .addImm(AArch64SysReg::NZCV) |
| .addReg(RegSave.StatusFlags) |
| .addReg(AArch64::NZCV, RegState::ImplicitDefine); |
| |
| if (RegSave.X0Save != AArch64::NoRegister) |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), |
| RegSave.PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0) |
| .addReg(RegSave.X0Save); |
| } |
| |
| void MachineSMEABI::emitRestoreLazySave(EmitContext &Context, |
| MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| LiveRegs PhysLiveRegs) { |
| auto *TLI = Subtarget->getTargetLowering(); |
| DebugLoc DL = getDebugLoc(MBB, MBBI); |
| Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| Register TPIDR2 = AArch64::X0; |
| |
| // TODO: Emit these within the restore MBB to prevent unnecessary saves. |
| PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL); |
| |
| // Enable ZA. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1)) |
| .addImm(AArch64SVCR::SVCRZA) |
| .addImm(1); |
| // Get current TPIDR2_EL0. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), TPIDR2EL0) |
| .addImm(AArch64SysReg::TPIDR2_EL0); |
| // Get pointer to TPIDR2 block. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2) |
| .addFrameIndex(Context.getTPIDR2Block(*MF)) |
| .addImm(0) |
| .addImm(0); |
| // (Conditionally) restore ZA state. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::RestoreZAPseudo)) |
| .addReg(TPIDR2EL0) |
| .addReg(TPIDR2) |
| .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_RESTORE)) |
| .addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0()); |
| // Zero TPIDR2_EL0. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR)) |
| .addImm(AArch64SysReg::TPIDR2_EL0) |
| .addReg(AArch64::XZR); |
| |
| restorePhyRegSave(RegSave, MBB, MBBI, DL); |
| } |
| |
| void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| bool ClearTPIDR2) { |
| DebugLoc DL = getDebugLoc(MBB, MBBI); |
| |
| if (ClearTPIDR2) |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR)) |
| .addImm(AArch64SysReg::TPIDR2_EL0) |
| .addReg(AArch64::XZR); |
| |
| // Disable ZA. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1)) |
| .addImm(AArch64SVCR::SVCRZA) |
| .addImm(0); |
| } |
| |
| void MachineSMEABI::emitAllocateLazySaveBuffer( |
| EmitContext &Context, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI) { |
| MachineFrameInfo &MFI = MF->getFrameInfo(); |
| DebugLoc DL = getDebugLoc(MBB, MBBI); |
| Register SP = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| Register SVL = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| Register Buffer = AFI->getEarlyAllocSMESaveBuffer(); |
| |
| // Calculate SVL. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::RDSVLI_XI), SVL).addImm(1); |
| |
| // 1. Allocate the lazy save buffer. |
| if (Buffer == AArch64::NoRegister) { |
| // TODO: On Windows, we allocate the lazy save buffer in SelectionDAG (so |
| // Buffer != AArch64::NoRegister). This is done to reuse the existing |
| // expansions (which can insert stack checks). This works, but it means we |
| // will always allocate the lazy save buffer (even if the function contains |
| // no lazy saves). If we want to handle Windows here, we'll need to |
| // implement something similar to LowerWindowsDYNAMIC_STACKALLOC. |
| assert(!Subtarget->isTargetWindows() && |
| "Lazy ZA save is not yet supported on Windows"); |
| Buffer = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| // Get original stack pointer. |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), SP) |
| .addReg(AArch64::SP); |
| // Allocate a lazy-save buffer object of the size given, normally SVL * SVL |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSUBXrrr), Buffer) |
| .addReg(SVL) |
| .addReg(SVL) |
| .addReg(SP); |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::SP) |
| .addReg(Buffer); |
| // We have just allocated a variable sized object, tell this to PEI. |
| MFI.CreateVariableSizedObject(Align(16), nullptr); |
| } |
| |
| // 2. Setup the TPIDR2 block. |
| { |
| // Note: This case just needs to do `SVL << 48`. It is not implemented as we |
| // generally don't support big-endian SVE/SME. |
| if (!Subtarget->isLittleEndian()) |
| reportFatalInternalError( |
| "TPIDR2 block initialization is not supported on big-endian targets"); |
| |
| // Store buffer pointer and num_za_save_slices. |
| // Bytes 10-15 are implicitly zeroed. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::STPXi)) |
| .addReg(Buffer) |
| .addReg(SVL) |
| .addFrameIndex(Context.getTPIDR2Block(*MF)) |
| .addImm(0); |
| } |
| } |
| |
| void MachineSMEABI::emitNewZAPrologue(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI) { |
| auto *TLI = Subtarget->getTargetLowering(); |
| DebugLoc DL = getDebugLoc(MBB, MBBI); |
| |
| // Get current TPIDR2_EL0. |
| Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS)) |
| .addReg(TPIDR2EL0, RegState::Define) |
| .addImm(AArch64SysReg::TPIDR2_EL0); |
| // If TPIDR2_EL0 is non-zero, commit the lazy save. |
| // NOTE: Functions that only use ZT0 don't need to zero ZA. |
| bool ZeroZA = AFI->getSMEFnAttrs().hasZAState(); |
| auto CommitZASave = |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::CommitZASavePseudo)) |
| .addReg(TPIDR2EL0) |
| .addImm(ZeroZA ? 1 : 0) |
| .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_SAVE)) |
| .addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0()); |
| if (ZeroZA) |
| CommitZASave.addDef(AArch64::ZAB0, RegState::ImplicitDefine); |
| // Enable ZA (as ZA could have previously been in the OFF state). |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1)) |
| .addImm(AArch64SVCR::SVCRZA) |
| .addImm(1); |
| } |
| |
| void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context, |
| MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| LiveRegs PhysLiveRegs, bool IsSave) { |
| auto *TLI = Subtarget->getTargetLowering(); |
| DebugLoc DL = getDebugLoc(MBB, MBBI); |
| Register BufferPtr = AArch64::X0; |
| |
| PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL); |
| |
| // Copy the buffer pointer into X0. |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr) |
| .addReg(Context.getAgnosticZABufferPtr(*MF)); |
| |
| // Call __arm_sme_save/__arm_sme_restore. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) |
| .addReg(BufferPtr, RegState::Implicit) |
| .addExternalSymbol(TLI->getLibcallName( |
| IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE)) |
| .addRegMask(TRI->getCallPreservedMask( |
| *MF, |
| CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)); |
| |
| restorePhyRegSave(RegSave, MBB, MBBI, DL); |
| } |
| |
| void MachineSMEABI::emitAllocateFullZASaveBuffer( |
| EmitContext &Context, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) { |
| // Buffer already allocated in SelectionDAG. |
| if (AFI->getEarlyAllocSMESaveBuffer()) |
| return; |
| |
| DebugLoc DL = getDebugLoc(MBB, MBBI); |
| Register BufferPtr = Context.getAgnosticZABufferPtr(*MF); |
| Register BufferSize = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| |
| PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL); |
| |
| // Calculate the SME state size. |
| { |
| auto *TLI = Subtarget->getTargetLowering(); |
| const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) |
| .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_SME_STATE_SIZE)) |
| .addReg(AArch64::X0, RegState::ImplicitDefine) |
| .addRegMask(TRI->getCallPreservedMask( |
| *MF, CallingConv:: |
| AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)); |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferSize) |
| .addReg(AArch64::X0); |
| } |
| |
| // Allocate a buffer object of the size given __arm_sme_state_size. |
| { |
| MachineFrameInfo &MFI = MF->getFrameInfo(); |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP) |
| .addReg(AArch64::SP) |
| .addReg(BufferSize) |
| .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0)); |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr) |
| .addReg(AArch64::SP); |
| |
| // We have just allocated a variable sized object, tell this to PEI. |
| MFI.CreateVariableSizedObject(Align(16), nullptr); |
| } |
| |
| restorePhyRegSave(RegSave, MBB, MBBI, DL); |
| } |
| |
| void MachineSMEABI::emitStateChange(EmitContext &Context, |
| MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator InsertPt, |
| ZAState From, ZAState To, |
| LiveRegs PhysLiveRegs) { |
| // ZA not used. |
| if (From == ZAState::ANY || To == ZAState::ANY) |
| return; |
| |
| // If we're exiting from the CALLER_DORMANT state that means this new ZA |
| // function did not touch ZA (so ZA was never turned on). |
| if (From == ZAState::CALLER_DORMANT && To == ZAState::OFF) |
| return; |
| |
| // TODO: Avoid setting up the save buffer if there's no transition to |
| // LOCAL_SAVED. |
| if (From == ZAState::CALLER_DORMANT) { |
| assert(AFI->getSMEFnAttrs().hasPrivateZAInterface() && |
| "CALLER_DORMANT state requires private ZA interface"); |
| assert(&MBB == &MBB.getParent()->front() && |
| "CALLER_DORMANT state only valid in entry block"); |
| emitNewZAPrologue(MBB, MBB.getFirstNonPHI()); |
| if (To == ZAState::ACTIVE) |
| return; // Nothing more to do (ZA is active after the prologue). |
| |
| // Note: "emitNewZAPrologue" zeros ZA, so we may need to setup a lazy save |
| // if "To" is "ZAState::LOCAL_SAVED". It may be possible to improve this |
| // case by changing the placement of the zero instruction. |
| From = ZAState::ACTIVE; |
| } |
| |
| if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED) |
| emitZASave(Context, MBB, InsertPt, PhysLiveRegs); |
| else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE) |
| emitZARestore(Context, MBB, InsertPt, PhysLiveRegs); |
| else if (To == ZAState::OFF) { |
| assert(From != ZAState::CALLER_DORMANT && |
| "CALLER_DORMANT to OFF should have already been handled"); |
| assert(!AFI->getSMEFnAttrs().hasAgnosticZAInterface() && |
| "Should not turn ZA off in agnostic ZA function"); |
| emitZAOff(MBB, InsertPt, /*ClearTPIDR2=*/From == ZAState::LOCAL_SAVED); |
| } else { |
| dbgs() << "Error: Transition from " << getZAStateString(From) << " to " |
| << getZAStateString(To) << '\n'; |
| llvm_unreachable("Unimplemented state transition"); |
| } |
| } |
| |
| } // end anonymous namespace |
| |
| INITIALIZE_PASS(MachineSMEABI, "aarch64-machine-sme-abi", "Machine SME ABI", |
| false, false) |
| |
| bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) { |
| if (!MF.getSubtarget<AArch64Subtarget>().hasSME()) |
| return false; |
| |
| AFI = MF.getInfo<AArch64FunctionInfo>(); |
| SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs(); |
| if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State() && |
| !SMEFnAttrs.hasAgnosticZAInterface()) |
| return false; |
| |
| assert(MF.getRegInfo().isSSA() && "Expected to be run on SSA form!"); |
| |
| this->MF = &MF; |
| Subtarget = &MF.getSubtarget<AArch64Subtarget>(); |
| TII = Subtarget->getInstrInfo(); |
| TRI = Subtarget->getRegisterInfo(); |
| MRI = &MF.getRegInfo(); |
| |
| const EdgeBundles &Bundles = |
| getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles(); |
| |
| FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs); |
| |
| if (OptLevel != CodeGenOptLevel::None) { |
| // Propagate desired states forward, then backwards. Most of the propagation |
| // should be done in the forward step, and backwards propagation is then |
| // used to fill in the gaps. Note: Doing both in one step can give poor |
| // results. For example, consider this subgraph: |
| // |
| // ┌─────┐ |
| // ┌─┤ BB0 ◄───┐ |
| // │ └─┬───┘ │ |
| // │ ┌─▼───◄──┐│ |
| // │ │ BB1 │ ││ |
| // │ └─┬┬──┘ ││ |
| // │ │└─────┘│ |
| // │ ┌─▼───┐ │ |
| // │ │ BB2 ├───┘ |
| // │ └─┬───┘ |
| // │ ┌─▼───┐ |
| // └─► BB3 │ |
| // └─────┘ |
| // |
| // If: |
| // - "BB0" and "BB2" (outer loop) has no state preference |
| // - "BB1" (inner loop) desires the ACTIVE state on entry/exit |
| // - "BB3" desires the LOCAL_SAVED state on entry |
| // |
| // If we propagate forwards first, ACTIVE is propagated from BB1 to BB2, |
| // then from BB2 to BB0. Which results in the inner and outer loops having |
| // the "ACTIVE" state. This avoids any state changes in the loops. |
| // |
| // If we propagate backwards first, we _could_ propagate LOCAL_SAVED from |
| // BB3 to BB0, which would result in a transition from ACTIVE -> LOCAL_SAVED |
| // in the outer loop. |
| for (bool Forwards : {true, false}) |
| propagateDesiredStates(FnInfo, Forwards); |
| } |
| |
| SmallVector<ZAState> BundleStates = assignBundleZAStates(Bundles, FnInfo); |
| |
| EmitContext Context; |
| insertStateChanges(Context, FnInfo, Bundles, BundleStates); |
| |
| if (Context.needsSaveBuffer()) { |
| if (FnInfo.AfterSMEProloguePt) { |
| // Note: With inline stack probes the AfterSMEProloguePt may not be in the |
| // entry block (due to the probing loop). |
| MachineBasicBlock::iterator MBBI = *FnInfo.AfterSMEProloguePt; |
| emitAllocateZASaveBuffer(Context, *MBBI->getParent(), MBBI, |
| FnInfo.PhysLiveRegsAfterSMEPrologue); |
| } else { |
| MachineBasicBlock &EntryBlock = MF.front(); |
| emitAllocateZASaveBuffer( |
| Context, EntryBlock, EntryBlock.getFirstNonPHI(), |
| FnInfo.Blocks[EntryBlock.getNumber()].PhysLiveRegsAtEntry); |
| } |
| } |
| |
| return true; |
| } |
| |
| FunctionPass *llvm::createMachineSMEABIPass(CodeGenOptLevel OptLevel) { |
| return new MachineSMEABI(OptLevel); |
| } |