| //===- MachineSMEABIPass.cpp ----------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This pass implements the SME ABI requirements for ZA state. This includes |
| // implementing the lazy ZA state save schemes around calls. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This pass works by collecting instructions that require ZA to be in a |
| // specific state (e.g., "ACTIVE" or "SAVED") and inserting the necessary state |
| // transitions to ensure ZA is in the required state before instructions. State |
| // transitions represent actions such as setting up or restoring a lazy save. |
| // Certain points within a function may also have predefined states independent |
| // of any instructions, for example, a "shared_za" function is always entered |
| // and exited in the "ACTIVE" state. |
| // |
| // To handle ZA state across control flow, we make use of edge bundling. This |
| // assigns each block an "incoming" and "outgoing" edge bundle (representing |
| // incoming and outgoing edges). Initially, these are unique to each block; |
| // then, in the process of forming bundles, the outgoing block of a block is |
| // joined with the incoming bundle of all successors. The result is that each |
| // bundle can be assigned a single ZA state, which ensures the state required by |
| // all a blocks' successors is the same, and that each basic block will always |
| // be entered with the same ZA state. This eliminates the need for splitting |
| // edges to insert state transitions or "phi" nodes for ZA states. |
| // |
| // See below for a simple example of edge bundling. |
| // |
| // The following shows a conditionally executed basic block (BB1): |
| // |
| // if (cond) |
| // BB1 |
| // BB2 |
| // |
| // Initial Bundles Joined Bundles |
| // |
| // ┌──0──┐ ┌──0──┐ |
| // │ BB0 │ │ BB0 │ |
| // └──1──┘ └──1──┘ |
| // ├───────┐ ├───────┐ |
| // ▼ │ ▼ │ |
| // ┌──2──┐ │ ─────► ┌──1──┐ │ |
| // │ BB1 │ ▼ │ BB1 │ ▼ |
| // └──3──┘ ┌──4──┐ └──1──┘ ┌──1──┐ |
| // └───►4 BB2 │ └───►1 BB2 │ |
| // └──5──┘ └──2──┘ |
| // |
| // On the left are the initial per-block bundles, and on the right are the |
| // joined bundles (which are the result of the EdgeBundles analysis). |
| |
| #include "AArch64InstrInfo.h" |
| #include "AArch64MachineFunctionInfo.h" |
| #include "AArch64Subtarget.h" |
| #include "MCTargetDesc/AArch64AddressingModes.h" |
| #include "llvm/ADT/BitmaskEnum.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/CodeGen/EdgeBundles.h" |
| #include "llvm/CodeGen/LivePhysRegs.h" |
| #include "llvm/CodeGen/MachineBasicBlock.h" |
| #include "llvm/CodeGen/MachineFunctionPass.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/TargetRegisterInfo.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "aarch64-machine-sme-abi" |
| |
| namespace { |
| |
| enum ZAState { |
| // Any/unknown state (not valid) |
| ANY = 0, |
| |
| // ZA is in use and active (i.e. within the accumulator) |
| ACTIVE, |
| |
| // A ZA save has been set up or committed (i.e. ZA is dormant or off) |
| LOCAL_SAVED, |
| |
| // ZA is off or a lazy save has been set up by the caller |
| CALLER_DORMANT, |
| |
| // ZA is off |
| OFF, |
| |
| // The number of ZA states (not a valid state) |
| NUM_ZA_STATE |
| }; |
| |
| /// A bitmask enum to record live physical registers that the "emit*" routines |
| /// may need to preserve. Note: This only tracks registers we may clobber. |
| enum LiveRegs : uint8_t { |
| None = 0, |
| NZCV = 1 << 0, |
| W0 = 1 << 1, |
| W0_HI = 1 << 2, |
| X0 = W0 | W0_HI, |
| LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ W0_HI) |
| }; |
| |
| /// Holds the virtual registers live physical registers have been saved to. |
| struct PhysRegSave { |
| LiveRegs PhysLiveRegs; |
| Register StatusFlags = AArch64::NoRegister; |
| Register X0Save = AArch64::NoRegister; |
| }; |
| |
| static bool isLegalEdgeBundleZAState(ZAState State) { |
| switch (State) { |
| case ZAState::ACTIVE: |
| case ZAState::LOCAL_SAVED: |
| return true; |
| default: |
| return false; |
| } |
| } |
| struct TPIDR2State { |
| int FrameIndex = -1; |
| }; |
| |
| StringRef getZAStateString(ZAState State) { |
| #define MAKE_CASE(V) \ |
| case V: \ |
| return #V; |
| switch (State) { |
| MAKE_CASE(ZAState::ANY) |
| MAKE_CASE(ZAState::ACTIVE) |
| MAKE_CASE(ZAState::LOCAL_SAVED) |
| MAKE_CASE(ZAState::CALLER_DORMANT) |
| MAKE_CASE(ZAState::OFF) |
| default: |
| llvm_unreachable("Unexpected ZAState"); |
| } |
| #undef MAKE_CASE |
| } |
| |
| static bool isZAorZTRegOp(const TargetRegisterInfo &TRI, |
| const MachineOperand &MO) { |
| if (!MO.isReg() || !MO.getReg().isPhysical()) |
| return false; |
| return any_of(TRI.subregs_inclusive(MO.getReg()), [](const MCPhysReg &SR) { |
| return AArch64::MPR128RegClass.contains(SR) || |
| AArch64::ZTRRegClass.contains(SR); |
| }); |
| } |
| |
| /// Returns the required ZA state needed before \p MI and an iterator pointing |
| /// to where any code required to change the ZA state should be inserted. |
| static std::pair<ZAState, MachineBasicBlock::iterator> |
| getZAStateBeforeInst(const TargetRegisterInfo &TRI, MachineInstr &MI, |
| bool ZAOffAtReturn) { |
| MachineBasicBlock::iterator InsertPt(MI); |
| |
| if (MI.getOpcode() == AArch64::InOutZAUsePseudo) |
| return {ZAState::ACTIVE, std::prev(InsertPt)}; |
| |
| if (MI.getOpcode() == AArch64::RequiresZASavePseudo) |
| return {ZAState::LOCAL_SAVED, std::prev(InsertPt)}; |
| |
| if (MI.isReturn()) |
| return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt}; |
| |
| for (auto &MO : MI.operands()) { |
| if (isZAorZTRegOp(TRI, MO)) |
| return {ZAState::ACTIVE, InsertPt}; |
| } |
| |
| return {ZAState::ANY, InsertPt}; |
| } |
| |
| struct MachineSMEABI : public MachineFunctionPass { |
| inline static char ID = 0; |
| |
| MachineSMEABI() : MachineFunctionPass(ID) {} |
| |
| bool runOnMachineFunction(MachineFunction &MF) override; |
| |
| StringRef getPassName() const override { return "Machine SME ABI pass"; } |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.setPreservesCFG(); |
| AU.addRequired<EdgeBundlesWrapperLegacy>(); |
| AU.addPreservedID(MachineLoopInfoID); |
| AU.addPreservedID(MachineDominatorsID); |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| |
| /// Collects the needed ZA state (and live registers) before each instruction |
| /// within the machine function. |
| void collectNeededZAStates(SMEAttrs); |
| |
| /// Assigns each edge bundle a ZA state based on the needed states of blocks |
| /// that have incoming or outgoing edges in that bundle. |
| void assignBundleZAStates(); |
| |
| /// Inserts code to handle changes between ZA states within the function. |
| /// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA. |
| void insertStateChanges(); |
| |
| // Emission routines for private and shared ZA functions (using lazy saves). |
| void emitNewZAPrologue(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI); |
| void emitRestoreLazySave(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| LiveRegs PhysLiveRegs); |
| void emitSetupLazySave(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI); |
| void emitAllocateLazySaveBuffer(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI); |
| void emitZAOff(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
| bool ClearTPIDR2); |
| |
| void emitStateChange(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
| ZAState From, ZAState To, LiveRegs PhysLiveRegs); |
| |
| /// Save live physical registers to virtual registers. |
| PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, DebugLoc DL); |
| /// Restore physical registers from a save of their previous values. |
| void restorePhyRegSave(PhysRegSave const &RegSave, MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, DebugLoc DL); |
| |
| /// Get or create a TPIDR2 block in this function. |
| TPIDR2State getTPIDR2Block(); |
| |
| private: |
| /// Contains the needed ZA state (and live registers) at an instruction. |
| struct InstInfo { |
| ZAState NeededState{ZAState::ANY}; |
| MachineBasicBlock::iterator InsertPt; |
| LiveRegs PhysLiveRegs = LiveRegs::None; |
| }; |
| |
| /// Contains the needed ZA state for each instruction in a block. |
| /// Instructions that do not require a ZA state are not recorded. |
| struct BlockInfo { |
| ZAState FixedEntryState{ZAState::ANY}; |
| SmallVector<InstInfo> Insts; |
| LiveRegs PhysLiveRegsAtExit = LiveRegs::None; |
| }; |
| |
| // All pass state that must be cleared between functions. |
| struct PassState { |
| SmallVector<BlockInfo> Blocks; |
| SmallVector<ZAState> BundleStates; |
| std::optional<TPIDR2State> TPIDR2Block; |
| } State; |
| |
| MachineFunction *MF = nullptr; |
| EdgeBundles *Bundles = nullptr; |
| const AArch64Subtarget *Subtarget = nullptr; |
| const AArch64RegisterInfo *TRI = nullptr; |
| const TargetInstrInfo *TII = nullptr; |
| MachineRegisterInfo *MRI = nullptr; |
| }; |
| |
| void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { |
| assert((SMEFnAttrs.hasZT0State() || SMEFnAttrs.hasZAState()) && |
| "Expected function to have ZA/ZT0 state!"); |
| |
| State.Blocks.resize(MF->getNumBlockIDs()); |
| for (MachineBasicBlock &MBB : *MF) { |
| BlockInfo &Block = State.Blocks[MBB.getNumber()]; |
| if (MBB.isEntryBlock()) { |
| // Entry block: |
| Block.FixedEntryState = SMEFnAttrs.hasPrivateZAInterface() |
| ? ZAState::CALLER_DORMANT |
| : ZAState::ACTIVE; |
| } else if (MBB.isEHPad()) { |
| // EH entry block: |
| Block.FixedEntryState = ZAState::LOCAL_SAVED; |
| } |
| |
| LiveRegUnits LiveUnits(*TRI); |
| LiveUnits.addLiveOuts(MBB); |
| |
| auto GetPhysLiveRegs = [&] { |
| LiveRegs PhysLiveRegs = LiveRegs::None; |
| if (!LiveUnits.available(AArch64::NZCV)) |
| PhysLiveRegs |= LiveRegs::NZCV; |
| // We have to track W0 and X0 separately as otherwise things can get |
| // confused if we attempt to preserve X0 but only W0 was defined. |
| if (!LiveUnits.available(AArch64::W0)) |
| PhysLiveRegs |= LiveRegs::W0; |
| if (!LiveUnits.available(AArch64::W0_HI)) |
| PhysLiveRegs |= LiveRegs::W0_HI; |
| return PhysLiveRegs; |
| }; |
| |
| Block.PhysLiveRegsAtExit = GetPhysLiveRegs(); |
| auto FirstTerminatorInsertPt = MBB.getFirstTerminator(); |
| for (MachineInstr &MI : reverse(MBB)) { |
| MachineBasicBlock::iterator MBBI(MI); |
| LiveUnits.stepBackward(MI); |
| LiveRegs PhysLiveRegs = GetPhysLiveRegs(); |
| auto [NeededState, InsertPt] = getZAStateBeforeInst( |
| *TRI, MI, /*ZAOffAtReturn=*/SMEFnAttrs.hasPrivateZAInterface()); |
| assert((InsertPt == MBBI || |
| InsertPt->getOpcode() == AArch64::ADJCALLSTACKDOWN) && |
| "Unexpected state change insertion point!"); |
| // TODO: Do something to avoid state changes where NZCV is live. |
| if (MBBI == FirstTerminatorInsertPt) |
| Block.PhysLiveRegsAtExit = PhysLiveRegs; |
| if (NeededState != ZAState::ANY) |
| Block.Insts.push_back({NeededState, InsertPt, PhysLiveRegs}); |
| } |
| |
| // Reverse vector (as we had to iterate backwards for liveness). |
| std::reverse(Block.Insts.begin(), Block.Insts.end()); |
| } |
| } |
| |
| void MachineSMEABI::assignBundleZAStates() { |
| State.BundleStates.resize(Bundles->getNumBundles()); |
| for (unsigned I = 0, E = Bundles->getNumBundles(); I != E; ++I) { |
| LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n'); |
| |
| // Attempt to assign a ZA state for this bundle that minimizes state |
| // transitions. Edges within loops are given a higher weight as we assume |
| // they will be executed more than once. |
| // TODO: We should propagate desired incoming/outgoing states through blocks |
| // that have the "ANY" state first to make better global decisions. |
| int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0}; |
| for (unsigned BlockID : Bundles->getBlocks(I)) { |
| LLVM_DEBUG(dbgs() << "- bb." << BlockID); |
| |
| const BlockInfo &Block = State.Blocks[BlockID]; |
| if (Block.Insts.empty()) { |
| LLVM_DEBUG(dbgs() << " (no state preference)\n"); |
| continue; |
| } |
| bool InEdge = Bundles->getBundle(BlockID, /*Out=*/false) == I; |
| bool OutEdge = Bundles->getBundle(BlockID, /*Out=*/true) == I; |
| |
| ZAState DesiredIncomingState = Block.Insts.front().NeededState; |
| if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) { |
| EdgeStateCounts[DesiredIncomingState]++; |
| LLVM_DEBUG(dbgs() << " DesiredIncomingState: " |
| << getZAStateString(DesiredIncomingState)); |
| } |
| ZAState DesiredOutgoingState = Block.Insts.back().NeededState; |
| if (OutEdge && isLegalEdgeBundleZAState(DesiredOutgoingState)) { |
| EdgeStateCounts[DesiredOutgoingState]++; |
| LLVM_DEBUG(dbgs() << " DesiredOutgoingState: " |
| << getZAStateString(DesiredOutgoingState)); |
| } |
| LLVM_DEBUG(dbgs() << '\n'); |
| } |
| |
| ZAState BundleState = |
| ZAState(max_element(EdgeStateCounts) - EdgeStateCounts); |
| |
| // Force ZA to be active in bundles that don't have a preferred state. |
| // TODO: Something better here (to avoid extra mode switches). |
| if (BundleState == ZAState::ANY) |
| BundleState = ZAState::ACTIVE; |
| |
| LLVM_DEBUG({ |
| dbgs() << "Chosen ZA state: " << getZAStateString(BundleState) << '\n' |
| << "Edge counts:"; |
| for (auto [State, Count] : enumerate(EdgeStateCounts)) |
| dbgs() << " " << getZAStateString(ZAState(State)) << ": " << Count; |
| dbgs() << "\n\n"; |
| }); |
| |
| State.BundleStates[I] = BundleState; |
| } |
| } |
| |
| void MachineSMEABI::insertStateChanges() { |
| for (MachineBasicBlock &MBB : *MF) { |
| const BlockInfo &Block = State.Blocks[MBB.getNumber()]; |
| ZAState InState = State.BundleStates[Bundles->getBundle(MBB.getNumber(), |
| /*Out=*/false)]; |
| |
| ZAState CurrentState = Block.FixedEntryState; |
| if (CurrentState == ZAState::ANY) |
| CurrentState = InState; |
| |
| for (auto &Inst : Block.Insts) { |
| if (CurrentState != Inst.NeededState) |
| emitStateChange(MBB, Inst.InsertPt, CurrentState, Inst.NeededState, |
| Inst.PhysLiveRegs); |
| CurrentState = Inst.NeededState; |
| } |
| |
| if (MBB.succ_empty()) |
| continue; |
| |
| ZAState OutState = |
| State.BundleStates[Bundles->getBundle(MBB.getNumber(), /*Out=*/true)]; |
| if (CurrentState != OutState) |
| emitStateChange(MBB, MBB.getFirstTerminator(), CurrentState, OutState, |
| Block.PhysLiveRegsAtExit); |
| } |
| } |
| |
| TPIDR2State MachineSMEABI::getTPIDR2Block() { |
| if (State.TPIDR2Block) |
| return *State.TPIDR2Block; |
| MachineFrameInfo &MFI = MF->getFrameInfo(); |
| State.TPIDR2Block = TPIDR2State{MFI.CreateStackObject(16, Align(16), false)}; |
| return *State.TPIDR2Block; |
| } |
| |
| static DebugLoc getDebugLoc(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI) { |
| if (MBBI != MBB.end()) |
| return MBBI->getDebugLoc(); |
| return DebugLoc(); |
| } |
| |
| void MachineSMEABI::emitSetupLazySave(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI) { |
| DebugLoc DL = getDebugLoc(MBB, MBBI); |
| |
| // Get pointer to TPIDR2 block. |
| Register TPIDR2 = MRI->createVirtualRegister(&AArch64::GPR64spRegClass); |
| Register TPIDR2Ptr = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2) |
| .addFrameIndex(getTPIDR2Block().FrameIndex) |
| .addImm(0) |
| .addImm(0); |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), TPIDR2Ptr) |
| .addReg(TPIDR2); |
| // Set TPIDR2_EL0 to point to TPIDR2 block. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR)) |
| .addImm(AArch64SysReg::TPIDR2_EL0) |
| .addReg(TPIDR2Ptr); |
| } |
| |
| PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs, |
| MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| DebugLoc DL) { |
| PhysRegSave RegSave{PhysLiveRegs}; |
| if (PhysLiveRegs & LiveRegs::NZCV) { |
| RegSave.StatusFlags = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), RegSave.StatusFlags) |
| .addImm(AArch64SysReg::NZCV) |
| .addReg(AArch64::NZCV, RegState::Implicit); |
| } |
| // Note: Preserving X0 is "free" as this is before register allocation, so |
| // the register allocator is still able to optimize these copies. |
| if (PhysLiveRegs & LiveRegs::W0) { |
| RegSave.X0Save = MRI->createVirtualRegister(PhysLiveRegs & LiveRegs::W0_HI |
| ? &AArch64::GPR64RegClass |
| : &AArch64::GPR32RegClass); |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), RegSave.X0Save) |
| .addReg(PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0); |
| } |
| return RegSave; |
| } |
| |
| void MachineSMEABI::restorePhyRegSave(PhysRegSave const &RegSave, |
| MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| DebugLoc DL) { |
| if (RegSave.StatusFlags != AArch64::NoRegister) |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR)) |
| .addImm(AArch64SysReg::NZCV) |
| .addReg(RegSave.StatusFlags) |
| .addReg(AArch64::NZCV, RegState::ImplicitDefine); |
| |
| if (RegSave.X0Save != AArch64::NoRegister) |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), |
| RegSave.PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0) |
| .addReg(RegSave.X0Save); |
| } |
| |
| void MachineSMEABI::emitRestoreLazySave(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| LiveRegs PhysLiveRegs) { |
| auto *TLI = Subtarget->getTargetLowering(); |
| DebugLoc DL = getDebugLoc(MBB, MBBI); |
| Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| Register TPIDR2 = AArch64::X0; |
| |
| // TODO: Emit these within the restore MBB to prevent unnecessary saves. |
| PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL); |
| |
| // Enable ZA. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1)) |
| .addImm(AArch64SVCR::SVCRZA) |
| .addImm(1); |
| // Get current TPIDR2_EL0. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), TPIDR2EL0) |
| .addImm(AArch64SysReg::TPIDR2_EL0); |
| // Get pointer to TPIDR2 block. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2) |
| .addFrameIndex(getTPIDR2Block().FrameIndex) |
| .addImm(0) |
| .addImm(0); |
| // (Conditionally) restore ZA state. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::RestoreZAPseudo)) |
| .addReg(TPIDR2EL0) |
| .addReg(TPIDR2) |
| .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_RESTORE)) |
| .addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0()); |
| // Zero TPIDR2_EL0. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR)) |
| .addImm(AArch64SysReg::TPIDR2_EL0) |
| .addReg(AArch64::XZR); |
| |
| restorePhyRegSave(RegSave, MBB, MBBI, DL); |
| } |
| |
| void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI, |
| bool ClearTPIDR2) { |
| DebugLoc DL = getDebugLoc(MBB, MBBI); |
| |
| if (ClearTPIDR2) |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR)) |
| .addImm(AArch64SysReg::TPIDR2_EL0) |
| .addReg(AArch64::XZR); |
| |
| // Disable ZA. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1)) |
| .addImm(AArch64SVCR::SVCRZA) |
| .addImm(0); |
| } |
| |
| void MachineSMEABI::emitAllocateLazySaveBuffer( |
| MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { |
| MachineFrameInfo &MFI = MF->getFrameInfo(); |
| |
| DebugLoc DL = getDebugLoc(MBB, MBBI); |
| Register SP = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| Register SVL = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| Register Buffer = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| |
| // Calculate SVL. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::RDSVLI_XI), SVL).addImm(1); |
| |
| // 1. Allocate the lazy save buffer. |
| { |
| // TODO This function grows the stack with a subtraction, which doesn't work |
| // on Windows. Some refactoring to share the functionality in |
| // LowerWindowsDYNAMIC_STACKALLOC will be required once the Windows ABI |
| // supports SME |
| assert(!Subtarget->isTargetWindows() && |
| "Lazy ZA save is not yet supported on Windows"); |
| // Get original stack pointer. |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), SP) |
| .addReg(AArch64::SP); |
| // Allocate a lazy-save buffer object of the size given, normally SVL * SVL |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSUBXrrr), Buffer) |
| .addReg(SVL) |
| .addReg(SVL) |
| .addReg(SP); |
| BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::SP) |
| .addReg(Buffer); |
| // We have just allocated a variable sized object, tell this to PEI. |
| MFI.CreateVariableSizedObject(Align(16), nullptr); |
| } |
| |
| // 2. Setup the TPIDR2 block. |
| { |
| // Note: This case just needs to do `SVL << 48`. It is not implemented as we |
| // generally don't support big-endian SVE/SME. |
| if (!Subtarget->isLittleEndian()) |
| reportFatalInternalError( |
| "TPIDR2 block initialization is not supported on big-endian targets"); |
| |
| // Store buffer pointer and num_za_save_slices. |
| // Bytes 10-15 are implicitly zeroed. |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::STPXi)) |
| .addReg(Buffer) |
| .addReg(SVL) |
| .addFrameIndex(getTPIDR2Block().FrameIndex) |
| .addImm(0); |
| } |
| } |
| |
| void MachineSMEABI::emitNewZAPrologue(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator MBBI) { |
| auto *TLI = Subtarget->getTargetLowering(); |
| DebugLoc DL = getDebugLoc(MBB, MBBI); |
| |
| // Get current TPIDR2_EL0. |
| Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass); |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS)) |
| .addReg(TPIDR2EL0, RegState::Define) |
| .addImm(AArch64SysReg::TPIDR2_EL0); |
| // If TPIDR2_EL0 is non-zero, commit the lazy save. |
| // NOTE: Functions that only use ZT0 don't need to zero ZA. |
| bool ZeroZA = |
| MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs().hasZAState(); |
| auto CommitZASave = |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::CommitZASavePseudo)) |
| .addReg(TPIDR2EL0) |
| .addImm(ZeroZA ? 1 : 0) |
| .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_SAVE)) |
| .addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0()); |
| if (ZeroZA) |
| CommitZASave.addDef(AArch64::ZAB0, RegState::ImplicitDefine); |
| // Enable ZA (as ZA could have previously been in the OFF state). |
| BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1)) |
| .addImm(AArch64SVCR::SVCRZA) |
| .addImm(1); |
| } |
| |
| void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator InsertPt, |
| ZAState From, ZAState To, |
| LiveRegs PhysLiveRegs) { |
| |
| // ZA not used. |
| if (From == ZAState::ANY || To == ZAState::ANY) |
| return; |
| |
| // If we're exiting from the CALLER_DORMANT state that means this new ZA |
| // function did not touch ZA (so ZA was never turned on). |
| if (From == ZAState::CALLER_DORMANT && To == ZAState::OFF) |
| return; |
| |
| // TODO: Avoid setting up the save buffer if there's no transition to |
| // LOCAL_SAVED. |
| if (From == ZAState::CALLER_DORMANT) { |
| assert(MBB.getParent() |
| ->getInfo<AArch64FunctionInfo>() |
| ->getSMEFnAttrs() |
| .hasPrivateZAInterface() && |
| "CALLER_DORMANT state requires private ZA interface"); |
| assert(&MBB == &MBB.getParent()->front() && |
| "CALLER_DORMANT state only valid in entry block"); |
| emitNewZAPrologue(MBB, MBB.getFirstNonPHI()); |
| if (To == ZAState::ACTIVE) |
| return; // Nothing more to do (ZA is active after the prologue). |
| |
| // Note: "emitNewZAPrologue" zeros ZA, so we may need to setup a lazy save |
| // if "To" is "ZAState::LOCAL_SAVED". It may be possible to improve this |
| // case by changing the placement of the zero instruction. |
| From = ZAState::ACTIVE; |
| } |
| |
| if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED) |
| emitSetupLazySave(MBB, InsertPt); |
| else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE) |
| emitRestoreLazySave(MBB, InsertPt, PhysLiveRegs); |
| else if (To == ZAState::OFF) { |
| assert(From != ZAState::CALLER_DORMANT && |
| "CALLER_DORMANT to OFF should have already been handled"); |
| emitZAOff(MBB, InsertPt, /*ClearTPIDR2=*/From == ZAState::LOCAL_SAVED); |
| } else { |
| dbgs() << "Error: Transition from " << getZAStateString(From) << " to " |
| << getZAStateString(To) << '\n'; |
| llvm_unreachable("Unimplemented state transition"); |
| } |
| } |
| |
| } // end anonymous namespace |
| |
| INITIALIZE_PASS(MachineSMEABI, "aarch64-machine-sme-abi", "Machine SME ABI", |
| false, false) |
| |
| bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) { |
| if (!MF.getSubtarget<AArch64Subtarget>().hasSME()) |
| return false; |
| |
| auto *AFI = MF.getInfo<AArch64FunctionInfo>(); |
| SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs(); |
| if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State()) |
| return false; |
| |
| assert(MF.getRegInfo().isSSA() && "Expected to be run on SSA form!"); |
| |
| // Reset pass state. |
| State = PassState{}; |
| this->MF = &MF; |
| Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles(); |
| Subtarget = &MF.getSubtarget<AArch64Subtarget>(); |
| TII = Subtarget->getInstrInfo(); |
| TRI = Subtarget->getRegisterInfo(); |
| MRI = &MF.getRegInfo(); |
| |
| collectNeededZAStates(SMEFnAttrs); |
| assignBundleZAStates(); |
| insertStateChanges(); |
| |
| // Allocate save buffer (if needed). |
| if (State.TPIDR2Block) { |
| MachineBasicBlock &EntryBlock = MF.front(); |
| emitAllocateLazySaveBuffer(EntryBlock, EntryBlock.getFirstNonPHI()); |
| } |
| |
| return true; |
| } |
| |
| FunctionPass *llvm::createMachineSMEABIPass() { return new MachineSMEABI(); } |