blob: 7e03b97584fe12fd735d92f0940507e2a2600dd2 [file] [log] [blame] [edit]
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AArch64PrologueEpilogue.h"
#include "AArch64FrameLowering.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/CFIInstBuilder.h"
#include "llvm/MC/MCContext.h"
#define DEBUG_TYPE "frame-info"
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
namespace llvm {
static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
RTLIB::Libcall LC) {
return MO.isSymbol() &&
StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
}
bool AArch64PrologueEpilogueCommon::requiresGetVGCall() const {
return AFI->hasStreamingModeChanges() &&
!MF.getSubtarget<AArch64Subtarget>().hasSVE();
}
bool AArch64PrologueEpilogueCommon::isVGInstruction(
MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const {
unsigned Opc = MBBI->getOpcode();
if (Opc == AArch64::CNTD_XPiI)
return true;
if (!requiresGetVGCall())
return false;
if (Opc == AArch64::BL)
return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
return Opc == TargetOpcode::COPY;
}
// Convenience function to determine whether I is part of the ZPR callee saves.
static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I) {
switch (I->getOpcode()) {
default:
return false;
case AArch64::LD1B_2Z_IMM:
case AArch64::ST1B_2Z_IMM:
case AArch64::STR_ZXI:
case AArch64::LDR_ZXI:
case AArch64::PTRUE_C_B:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
case AArch64::SEH_SaveZReg:
return true;
}
}
// Convenience function to determine whether I is part of the PPR callee saves.
static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I) {
switch (I->getOpcode()) {
default:
return false;
case AArch64::STR_PXI:
case AArch64::LDR_PXI:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
case AArch64::SEH_SavePReg:
return true;
}
}
// Convenience function to determine whether I is part of the SVE callee saves.
static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I) {
return isPartOfZPRCalleeSaves(I) || isPartOfPPRCalleeSaves(I);
}
AArch64PrologueEpilogueCommon::AArch64PrologueEpilogueCommon(
MachineFunction &MF, MachineBasicBlock &MBB,
const AArch64FrameLowering &AFL)
: MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
RegInfo(*Subtarget.getRegisterInfo()) {
TII = Subtarget.getInstrInfo();
AFI = MF.getInfo<AArch64FunctionInfo>();
HasFP = AFL.hasFP(MF);
NeedsWinCFI = AFL.needsWinCFI(MF);
// Windows unwind can't represent the required stack adjustments if we have
// both SVE callee-saves and dynamic stack allocations, and the frame pointer
// is before the SVE spills. The allocation of the frame pointer must be the
// last instruction in the prologue so the unwinder can restore the stack
// pointer correctly. (And there isn't any unwind opcode for `addvl sp, x29,
// -17`.)
//
// Because of this, we do spills in the opposite order on Windows: first SVE,
// then GPRs. The main side-effect of this is that it makes accessing
// parameters passed on the stack more expensive.
//
// We could consider rearranging the spills for simpler cases.
if (Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize()) {
if (AFI->hasStackHazardSlotIndex())
reportFatalUsageError("SME hazard padding is not supported on Windows");
SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord;
} else if (AFI->hasSplitSVEObjects()) {
SVELayout = SVEStackLayout::Split;
}
}
MachineBasicBlock::iterator
AArch64PrologueEpilogueCommon::convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
unsigned NewOpc;
// If the function contains streaming mode changes, we expect instructions
// to calculate the value of VG before spilling. Move past these instructions
// if necessary.
if (AFL.requiresSaveVG(MF)) {
auto &TLI = *Subtarget.getTargetLowering();
while (isVGInstruction(MBBI, TLI))
++MBBI;
}
switch (MBBI->getOpcode()) {
default:
llvm_unreachable("Unexpected callee-save save/restore opcode!");
case AArch64::STPXi:
NewOpc = AArch64::STPXpre;
break;
case AArch64::STPDi:
NewOpc = AArch64::STPDpre;
break;
case AArch64::STPQi:
NewOpc = AArch64::STPQpre;
break;
case AArch64::STRXui:
NewOpc = AArch64::STRXpre;
break;
case AArch64::STRDui:
NewOpc = AArch64::STRDpre;
break;
case AArch64::STRQui:
NewOpc = AArch64::STRQpre;
break;
case AArch64::LDPXi:
NewOpc = AArch64::LDPXpost;
break;
case AArch64::LDPDi:
NewOpc = AArch64::LDPDpost;
break;
case AArch64::LDPQi:
NewOpc = AArch64::LDPQpost;
break;
case AArch64::LDRXui:
NewOpc = AArch64::LDRXpost;
break;
case AArch64::LDRDui:
NewOpc = AArch64::LDRDpost;
break;
case AArch64::LDRQui:
NewOpc = AArch64::LDRQpost;
break;
}
TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
int64_t MinOffset, MaxOffset;
bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
NewOpc, Scale, Width, MinOffset, MaxOffset);
(void)Success;
assert(Success && "unknown load/store opcode");
// If the first store isn't right where we want SP then we can't fold the
// update in so create a normal arithmetic instruction instead.
if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
// If we are destroying the frame, make sure we add the increment after the
// last frame operation.
if (FrameFlag == MachineInstr::FrameDestroy) {
++MBBI;
// Also skip the SEH instruction, if needed
if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
++MBBI;
}
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
false, NeedsWinCFI, &HasWinCFI, EmitCFI,
StackOffset::getFixed(CFAOffset));
return std::prev(MBBI);
}
// Get rid of the SEH code associated with the old instruction.
if (NeedsWinCFI) {
auto SEH = std::next(MBBI);
if (AArch64InstrInfo::isSEHInstruction(*SEH))
SEH->eraseFromParent();
}
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
MIB.addReg(AArch64::SP, RegState::Define);
// Copy all operands other than the immediate offset.
unsigned OpndIdx = 0;
for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
++OpndIdx)
MIB.add(MBBI->getOperand(OpndIdx));
assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
"Unexpected immediate offset in first/last callee-save save/restore "
"instruction!");
assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
"Unexpected base register in callee-save save/restore instruction!");
assert(CSStackSizeInc % Scale == 0);
MIB.addImm(CSStackSizeInc / (int)Scale);
MIB.setMIFlags(MBBI->getFlags());
MIB.setMemRefs(MBBI->memoperands());
// Generate a new SEH code that corresponds to the new instruction.
if (NeedsWinCFI) {
HasWinCFI = true;
AFL.insertSEH(*MIB, *TII, FrameFlag);
}
if (EmitCFI)
CFIInstBuilder(MBB, MBBI, FrameFlag)
.buildDefCFAOffset(CFAOffset - CSStackSizeInc);
return std::prev(MBB.erase(MBBI));
}
// Fix up the SEH opcode associated with the save/restore instruction.
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
unsigned LocalStackSize) {
MachineOperand *ImmOpnd = nullptr;
unsigned ImmIdx = MBBI->getNumOperands() - 1;
switch (MBBI->getOpcode()) {
default:
llvm_unreachable("Fix the offset in the SEH instruction");
case AArch64::SEH_SaveFPLR:
case AArch64::SEH_SaveRegP:
case AArch64::SEH_SaveReg:
case AArch64::SEH_SaveFRegP:
case AArch64::SEH_SaveFReg:
case AArch64::SEH_SaveAnyRegQP:
case AArch64::SEH_SaveAnyRegQPX:
ImmOpnd = &MBBI->getOperand(ImmIdx);
break;
}
if (ImmOpnd)
ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
}
void AArch64PrologueEpilogueCommon::fixupCalleeSaveRestoreStackOffset(
MachineInstr &MI, uint64_t LocalStackSize) const {
if (AArch64InstrInfo::isSEHInstruction(MI))
return;
unsigned Opc = MI.getOpcode();
unsigned Scale;
switch (Opc) {
case AArch64::STPXi:
case AArch64::STRXui:
case AArch64::STPDi:
case AArch64::STRDui:
case AArch64::LDPXi:
case AArch64::LDRXui:
case AArch64::LDPDi:
case AArch64::LDRDui:
Scale = 8;
break;
case AArch64::STPQi:
case AArch64::STRQui:
case AArch64::LDPQi:
case AArch64::LDRQui:
Scale = 16;
break;
default:
llvm_unreachable("Unexpected callee-save save/restore opcode!");
}
unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
"Unexpected base register in callee-save save/restore instruction!");
// Last operand is immediate offset that needs fixing.
MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
// All generated opcodes have scaled offsets.
assert(LocalStackSize % Scale == 0);
OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
if (NeedsWinCFI) {
HasWinCFI = true;
auto MBBI = std::next(MachineBasicBlock::iterator(MI));
assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
"Expecting a SEH instruction");
fixupSEHOpcode(MBBI, LocalStackSize);
}
}
bool AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
uint64_t StackBumpBytes) const {
if (AFL.homogeneousPrologEpilog(MF))
return false;
if (AFI->getLocalStackSize() == 0)
return false;
// For WinCFI, if optimizing for size, prefer to not combine the stack bump
// (to force a stp with predecrement) to match the packed unwind format,
// provided that there actually are any callee saved registers to merge the
// decrement with.
// This is potentially marginally slower, but allows using the packed
// unwind format for functions that both have a local area and callee saved
// registers. Using the packed unwind format notably reduces the size of
// the unwind info.
if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
MF.getFunction().hasOptSize())
return false;
// 512 is the maximum immediate for stp/ldp that will be used for
// callee-save save/restores
if (StackBumpBytes >= 512 ||
AFL.windowsRequiresStackProbe(MF, StackBumpBytes))
return false;
if (MFI.hasVarSizedObjects())
return false;
if (RegInfo.hasStackRealignment(MF))
return false;
// This isn't strictly necessary, but it simplifies things a bit since the
// current RedZone handling code assumes the SP is adjusted by the
// callee-save save/restore code.
if (AFL.canUseRedZone(MF))
return false;
// When there is an SVE area on the stack, always allocate the
// callee-saves and spills/locals separately.
if (AFI->hasSVEStackSize())
return false;
return true;
}
SVEFrameSizes AArch64PrologueEpilogueCommon::getSVEStackFrameSizes() const {
StackOffset PPRCalleeSavesSize =
StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
StackOffset ZPRCalleeSavesSize =
StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
if (SVELayout == SVEStackLayout::Split)
return {{PPRCalleeSavesSize, PPRLocalsSize},
{ZPRCalleeSavesSize, ZPRLocalsSize}};
// For simplicity, attribute all locals to ZPRs when split SVE is disabled.
return {{PPRCalleeSavesSize, StackOffset{}},
{ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
}
struct SVEPartitions {
struct {
MachineBasicBlock::iterator Begin, End;
} PPR, ZPR;
};
static SVEPartitions partitionSVECS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
StackOffset PPRCalleeSavesSize,
StackOffset ZPRCalleeSavesSize,
bool IsEpilogue) {
MachineBasicBlock::iterator PPRsI = MBBI;
MachineBasicBlock::iterator End =
IsEpilogue ? MBB.begin() : MBB.getFirstTerminator();
auto AdjustI = [&](auto MBBI) { return IsEpilogue ? std::prev(MBBI) : MBBI; };
// Process the SVE CS to find the starts/ends of the ZPR and PPR areas.
if (PPRCalleeSavesSize) {
PPRsI = AdjustI(PPRsI);
assert(isPartOfPPRCalleeSaves(*PPRsI) && "Unexpected instruction");
while (PPRsI != End && isPartOfPPRCalleeSaves(AdjustI(PPRsI)))
IsEpilogue ? (--PPRsI) : (++PPRsI);
}
MachineBasicBlock::iterator ZPRsI = PPRsI;
if (ZPRCalleeSavesSize) {
ZPRsI = AdjustI(ZPRsI);
assert(isPartOfZPRCalleeSaves(*ZPRsI) && "Unexpected instruction");
while (ZPRsI != End && isPartOfZPRCalleeSaves(AdjustI(ZPRsI)))
IsEpilogue ? (--ZPRsI) : (++ZPRsI);
}
if (IsEpilogue)
return {{PPRsI, MBBI}, {ZPRsI, PPRsI}};
return {{MBBI, PPRsI}, {PPRsI, ZPRsI}};
}
AArch64PrologueEmitter::AArch64PrologueEmitter(MachineFunction &MF,
MachineBasicBlock &MBB,
const AArch64FrameLowering &AFL)
: AArch64PrologueEpilogueCommon(MF, MBB, AFL), F(MF.getFunction()) {
EmitCFI = AFI->needsDwarfUnwindInfo(MF);
EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
IsFunclet = MBB.isEHFuncletEntry();
HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
#ifndef NDEBUG
collectBlockLiveins();
#endif
}
#ifndef NDEBUG
/// Collect live registers from the end of \p MI's parent up to (including) \p
/// MI in \p LiveRegs.
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
LivePhysRegs &LiveRegs) {
MachineBasicBlock &MBB = *MI.getParent();
LiveRegs.addLiveOuts(MBB);
for (const MachineInstr &MI :
reverse(make_range(MI.getIterator(), MBB.instr_end())))
LiveRegs.stepBackward(MI);
}
void AArch64PrologueEmitter::collectBlockLiveins() {
// Collect live register from the end of MBB up to the start of the existing
// frame setup instructions.
PrologueEndI = MBB.begin();
while (PrologueEndI != MBB.end() &&
PrologueEndI->getFlag(MachineInstr::FrameSetup))
++PrologueEndI;
if (PrologueEndI != MBB.end()) {
getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
// Ignore registers used for stack management for now.
LiveRegs.removeReg(AArch64::SP);
LiveRegs.removeReg(AArch64::X19);
LiveRegs.removeReg(AArch64::FP);
LiveRegs.removeReg(AArch64::LR);
// X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
// This is necessary to spill VG if required where SVE is unavailable, but
// X0 is preserved around this call.
if (requiresGetVGCall())
LiveRegs.removeReg(AArch64::X0);
}
}
void AArch64PrologueEmitter::verifyPrologueClobbers() const {
if (PrologueEndI == MBB.end())
return;
// Check if any of the newly instructions clobber any of the live registers.
for (MachineInstr &MI :
make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
for (auto &Op : MI.operands())
if (Op.isReg() && Op.isDef())
assert(!LiveRegs.contains(Op.getReg()) &&
"live register clobbered by inserted prologue instructions");
}
}
#endif
void AArch64PrologueEmitter::determineLocalsStackSize(
uint64_t StackSize, uint64_t PrologueSaveSize) {
AFI->setLocalStackSize(StackSize - PrologueSaveSize);
CombineSPBump = shouldCombineCSRLocalStackBump(StackSize);
}
// Return the maximum possible number of bytes for `Size` due to the
// architectural limit on the size of a SVE register.
static int64_t upperBound(StackOffset Size) {
static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
}
void AArch64PrologueEmitter::allocateStackSpace(
MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
bool FollowupAllocs) {
if (!AllocSize)
return;
DebugLoc DL;
const int64_t MaxAlign = MFI.getMaxAlign().value();
const uint64_t AndMask = ~(MaxAlign - 1);
if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) {
Register TargetReg = RealignmentPadding
? AFL.findScratchNonCalleeSaveRegister(&MBB)
: AArch64::SP;
// SUB Xd/SP, SP, AllocSize
emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
EmitCFI, InitialOffset);
if (RealignmentPadding) {
// AND SP, X9, 0b11111...0000
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
.addReg(TargetReg, RegState::Kill)
.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
.setMIFlags(MachineInstr::FrameSetup);
AFI->setStackRealigned(true);
// No need for SEH instructions here; if we're realigning the stack,
// we've set a frame pointer and already finished the SEH prologue.
assert(!NeedsWinCFI);
}
return;
}
//
// Stack probing allocation.
//
// Fixed length allocation. If we don't need to re-align the stack and don't
// have SVE objects, we can use a more efficient sequence for stack probing.
if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
assert(ScratchReg != AArch64::NoRegister);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
.addDef(ScratchReg)
.addImm(AllocSize.getFixed())
.addImm(InitialOffset.getFixed())
.addImm(InitialOffset.getScalable());
// The fixed allocation may leave unprobed bytes at the top of the
// stack. If we have subsequent allocation (e.g. if we have variable-sized
// objects), we need to issue an extra probe, so these allocations start in
// a known state.
if (FollowupAllocs) {
// STR XZR, [SP]
BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
.addReg(AArch64::XZR)
.addReg(AArch64::SP)
.addImm(0)
.setMIFlags(MachineInstr::FrameSetup);
}
return;
}
// Variable length allocation.
// If the (unknown) allocation size cannot exceed the probe size, decrement
// the stack pointer right away.
int64_t ProbeSize = AFI->getStackProbeSize();
if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
Register ScratchReg = RealignmentPadding
? AFL.findScratchNonCalleeSaveRegister(&MBB)
: AArch64::SP;
assert(ScratchReg != AArch64::NoRegister);
// SUB Xd, SP, AllocSize
emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
EmitCFI, InitialOffset);
if (RealignmentPadding) {
// AND SP, Xn, 0b11111...0000
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
.addReg(ScratchReg, RegState::Kill)
.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
.setMIFlags(MachineInstr::FrameSetup);
AFI->setStackRealigned(true);
}
if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
AArch64::StackProbeMaxUnprobedStack) {
// STR XZR, [SP]
BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
.addReg(AArch64::XZR)
.addReg(AArch64::SP)
.addImm(0)
.setMIFlags(MachineInstr::FrameSetup);
}
return;
}
// Emit a variable-length allocation probing loop.
// TODO: As an optimisation, the loop can be "unrolled" into a few parts,
// each of them guaranteed to adjust the stack by less than the probe size.
Register TargetReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
assert(TargetReg != AArch64::NoRegister);
// SUB Xd, SP, AllocSize
emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
EmitCFI, InitialOffset);
if (RealignmentPadding) {
// AND Xn, Xn, 0b11111...0000
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), TargetReg)
.addReg(TargetReg, RegState::Kill)
.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
.setMIFlags(MachineInstr::FrameSetup);
}
BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR))
.addReg(TargetReg);
if (EmitCFI) {
// Set the CFA register back to SP.
CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
.buildDefCFARegister(AArch64::SP);
}
if (RealignmentPadding)
AFI->setStackRealigned(true);
}
void AArch64PrologueEmitter::emitPrologue() {
const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
const MachineBasicBlock::iterator EndI = MBB.end();
// At this point, we're going to decide whether or not the function uses a
// redzone. In most cases, the function doesn't have a redzone so let's
// assume that's false and set it to true in the case that there's a redzone.
AFI->setHasRedZone(false);
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc DL;
// In some cases, particularly with CallingConv::SwiftTail, it is possible to
// have a tail-call where the caller only needs to adjust the stack pointer in
// the epilogue. In this case, we still need to emit a SEH prologue sequence.
// See `seh-minimal-prologue-epilogue.ll` test cases.
if (AFI->getArgumentStackToRestore())
HasWinCFI = true;
if (AFI->shouldSignReturnAddress(MF)) {
// If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
// are inserted by emitPacRetPlusLeafHardening().
if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
.setMIFlag(MachineInstr::FrameSetup);
}
// AArch64PointerAuth pass will insert SEH_PACSignLR
HasWinCFI |= NeedsWinCFI;
}
if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
emitShadowCallStackPrologue(PrologueBeginI, DL);
HasWinCFI |= NeedsWinCFI;
}
if (EmitCFI && AFI->isMTETagged())
BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
.setMIFlag(MachineInstr::FrameSetup);
// We signal the presence of a Swift extended frame to external tools by
// storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
// ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
// bits so that is still true.
if (HasFP && AFI->hasSwiftAsyncContext())
emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
// Set tagged base pointer to the requested stack slot. Ideally it should
// match SP value after prologue.
if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
else
AFI->setTaggedBasePointerOffset(MFI.getStackSize());
// getStackSize() includes all the locals in its size calculation. We don't
// include these locals when computing the stack size of a funclet, as they
// are allocated in the parent's stack frame and accessed via the frame
// pointer from the funclet. We only save the callee saved registers in the
// funclet, which are really the callee saved registers of the parent
// function, including the funclet.
int64_t NumBytes =
IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
// All of the remaining stack allocations are for locals.
determineLocalsStackSize(NumBytes, PrologueSaveSize);
MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
// If we're doing SVE saves first, we need to immediately allocate space
// for fixed objects, then space for the SVE callee saves.
//
// Windows unwind requires that the scalable size is a multiple of 16;
// that's handled when the callee-saved size is computed.
auto SaveSize =
StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
StackOffset::getFixed(FixedObject);
allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
/*FollowupAllocs=*/true);
NumBytes -= FixedObject;
// Now allocate space for the GPR callee saves.
MachineBasicBlock::iterator MBBI = PrologueBeginI;
while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
++MBBI;
FirstGPRSaveI = convertCalleeSaveRestoreToSPPrePostIncDec(
MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
NumBytes -= AFI->getCalleeSavedStackSize();
} else if (CombineSPBump) {
assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
EmitAsyncCFI);
NumBytes = 0;
} else if (HomPrologEpilog) {
// Stack has been already adjusted.
NumBytes -= PrologueSaveSize;
} else if (PrologueSaveSize != 0) {
FirstGPRSaveI = convertCalleeSaveRestoreToSPPrePostIncDec(
PrologueBeginI, DL, -PrologueSaveSize, EmitAsyncCFI);
NumBytes -= PrologueSaveSize;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
// Move past the saves of the callee-saved registers, fixing up the offsets
// and pre-inc if we decided to combine the callee-save and local stack
// pointer bump above.
auto &TLI = *Subtarget.getTargetLowering();
MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
while (AfterGPRSavesI != EndI &&
AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
!isPartOfSVECalleeSaves(AfterGPRSavesI)) {
if (CombineSPBump &&
// Only fix-up frame-setup load/store instructions.
(!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
fixupCalleeSaveRestoreStackOffset(*AfterGPRSavesI,
AFI->getLocalStackSize());
++AfterGPRSavesI;
}
// For funclets the FP belongs to the containing function. Only set up FP if
// we actually need to.
if (!IsFunclet && HasFP)
emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
// Now emit the moves for whatever callee saved regs we have (including FP,
// LR if those are saved). Frame instructions for SVE register are emitted
// later, after the instruction which actually save SVE regs.
if (EmitAsyncCFI)
emitCalleeSavedGPRLocations(AfterGPRSavesI);
// Alignment is required for the parent frame, not the funclet
const bool NeedsRealignment =
NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
const int64_t RealignmentPadding =
(NeedsRealignment && MFI.getMaxAlign() > Align(16))
? MFI.getMaxAlign().value() - 16
: 0;
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
auto [PPR, ZPR] = getSVEStackFrameSizes();
StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes);
StackOffset CFAOffset =
StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize;
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
// Allocate space for the callee saves and PPR locals (if any).
if (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord) {
auto [PPRRange, ZPRRange] =
partitionSVECS(MBB, AfterGPRSavesI, PPR.CalleeSavesSize,
ZPR.CalleeSavesSize, /*IsEpilogue=*/false);
AfterSVESavesI = ZPRRange.End;
if (EmitAsyncCFI)
emitCalleeSavedSVELocations(AfterSVESavesI);
StackOffset AllocateBeforePPRs = SVECalleeSavesSize;
StackOffset AllocateAfterPPRs = PPR.LocalsSize;
if (SVELayout == SVEStackLayout::Split) {
AllocateBeforePPRs = PPR.CalleeSavesSize;
AllocateAfterPPRs = PPR.LocalsSize + ZPR.CalleeSavesSize;
}
allocateStackSpace(PPRRange.Begin, 0, AllocateBeforePPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects() || AllocateAfterPPRs ||
ZPR.LocalsSize || NonSVELocalsSize);
CFAOffset += AllocateBeforePPRs;
assert(PPRRange.End == ZPRRange.Begin &&
"Expected ZPR callee saves after PPR locals");
allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects() || ZPR.LocalsSize ||
NonSVELocalsSize);
CFAOffset += AllocateAfterPPRs;
} else {
assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord);
// Note: With CalleeSavesAboveFrameRecord, the SVE CS have already been
// allocated (and separate PPR locals are not supported, all SVE locals,
// both PPR and ZPR, are within the ZPR locals area).
assert(!PPR.LocalsSize && "Unexpected PPR locals!");
CFAOffset += SVECalleeSavesSize;
}
// Allocate space for the rest of the frame including ZPR locals. Align the
// stack as necessary.
assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
"Cannot use redzone with stack realignment");
if (!AFL.canUseRedZone(MF)) {
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
// correct value here, as NumBytes also includes padding bytes, which
// shouldn't be counted here.
allocateStackSpace(
AfterSVESavesI, RealignmentPadding, ZPR.LocalsSize + NonSVELocalsSize,
EmitAsyncCFI && !HasFP, CFAOffset, MFI.hasVarSizedObjects());
}
// If we need a base pointer, set it up here. It's whatever the value of the
// stack pointer is at this point. Any variable size objects will be
// allocated after this, so we can still use the base pointer to reference
// locals.
//
// FIXME: Clarify FrameSetup flags here.
// Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
// needed.
// For funclets the BP belongs to the containing function.
if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
AArch64::SP, false);
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
}
}
// The very last FrameSetup instruction indicates the end of prologue. Emit a
// SEH opcode indicating the prologue end.
if (NeedsWinCFI && HasWinCFI) {
BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
}
// SEH funclets are passed the frame pointer in X1. If the parent
// function uses the base register, then the base register is used
// directly, and is not retrieved from X1.
if (IsFunclet && F.hasPersonalityFn()) {
EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
if (isAsynchronousEHPersonality(Per)) {
BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
AArch64::FP)
.addReg(AArch64::X1)
.setMIFlag(MachineInstr::FrameSetup);
MBB.addLiveIn(AArch64::X1);
}
}
if (EmitCFI && !EmitAsyncCFI) {
if (HasFP) {
emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
} else {
StackOffset TotalSize =
AFL.getSVEStackSize(MF) +
StackOffset::getFixed((int64_t)MFI.getStackSize());
CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
CFIBuilder.insertCFIInst(
createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
TotalSize, /*LastAdjustmentWasScalable=*/false));
}
emitCalleeSavedGPRLocations(AfterSVESavesI);
emitCalleeSavedSVELocations(AfterSVESavesI);
}
}
void AArch64PrologueEmitter::emitShadowCallStackPrologue(
MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
// Shadow call stack prolog: str x30, [x18], #8
BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
.addReg(AArch64::X18, RegState::Define)
.addReg(AArch64::LR)
.addReg(AArch64::X18)
.addImm(8)
.setMIFlag(MachineInstr::FrameSetup);
// This instruction also makes x18 live-in to the entry block.
MBB.addLiveIn(AArch64::X18);
if (NeedsWinCFI)
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
if (EmitCFI) {
// Emit a CFI instruction that causes 8 to be subtracted from the value of
// x18 when unwinding past this frame.
static const char CFIInst[] = {
dwarf::DW_CFA_val_expression,
18, // register
2, // length
static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
static_cast<char>(-8) & 0x7f, // addend (sleb128)
};
CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
.buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
}
}
void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
case SwiftAsyncFramePointerMode::DeploymentBased:
if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
// The special symbol below is absolute and has a *value* that can be
// combined with the frame pointer to signal an extended frame.
BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
.addExternalSymbol("swift_async_extendedFramePointerFlags",
AArch64II::MO_GOT);
if (NeedsWinCFI) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlags(MachineInstr::FrameSetup);
HasWinCFI = true;
}
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
.addUse(AArch64::FP)
.addUse(AArch64::X16)
.addImm(Subtarget.isTargetILP32() ? 32 : 0);
if (NeedsWinCFI) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlags(MachineInstr::FrameSetup);
HasWinCFI = true;
}
break;
}
[[fallthrough]];
case SwiftAsyncFramePointerMode::Always:
// ORR x29, x29, #0x1000_0000_0000_0000
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
.addUse(AArch64::FP)
.addImm(0x1100)
.setMIFlag(MachineInstr::FrameSetup);
if (NeedsWinCFI) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlags(MachineInstr::FrameSetup);
HasWinCFI = true;
}
break;
case SwiftAsyncFramePointerMode::Never:
break;
}
}
void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
int64_t NumBytes, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL) const {
assert(!HasFP && "unexpected function without stack frame but with FP");
assert(!AFL.getSVEStackSize(MF) &&
"unexpected function without stack frame but with SVE objects");
// All of the stack allocation is for locals.
AFI->setLocalStackSize(NumBytes);
if (!NumBytes) {
if (NeedsWinCFI && HasWinCFI) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
}
return;
}
// REDZONE: If the stack size is less than 128 bytes, we don't need
// to actually allocate.
if (AFL.canUseRedZone(MF)) {
AFI->setHasRedZone(true);
++NumRedZoneFunctions;
} else {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
if (EmitCFI) {
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
// Encode the stack size of the leaf function.
CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
.buildDefCFAOffset(NumBytes, FrameLabel);
}
}
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
}
}
void AArch64PrologueEmitter::emitFramePointerSetup(
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
unsigned FixedObject) {
int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
if (AFI->hasSwiftAsyncContext()) {
// Before we update the live FP we have to ensure there's a valid (or
// null) asynchronous context in its slot just before FP in the frame
// record, so store it now.
const auto &Attrs = MF.getFunction().getAttributes();
bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
if (HaveInitialContext)
MBB.addLiveIn(AArch64::X22);
Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
.addUse(Reg)
.addUse(AArch64::SP)
.addImm(FPOffset - 8)
.setMIFlags(MachineInstr::FrameSetup);
if (NeedsWinCFI) {
// WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
// to multiple instructions, should be mutually-exclusive.
assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlags(MachineInstr::FrameSetup);
HasWinCFI = true;
}
}
if (HomPrologEpilog) {
auto Prolog = MBBI;
--Prolog;
assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
} else {
// Issue sub fp, sp, FPOffset or
// mov fp,sp when FPOffset is zero.
// Note: All stores of callee-saved registers are marked as "FrameSetup".
// This code marks the instruction(s) that set the FP also.
emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
StackOffset::getFixed(FPOffset), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
if (NeedsWinCFI && HasWinCFI) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
// After setting up the FP, the rest of the prolog doesn't need to be
// included in the SEH unwind info.
NeedsWinCFI = false;
}
}
if (EmitAsyncCFI)
emitDefineCFAWithFP(MBBI, FixedObject);
}
// Define the current CFA rule to use the provided FP.
void AArch64PrologueEmitter::emitDefineCFAWithFP(
MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
const int OffsetToFirstCalleeSaveFromFP =
AFI->getCalleeSaveBaseToFrameRecordOffset() -
AFI->getCalleeSavedStackSize();
Register FramePtr = RegInfo.getFrameRegister(MF);
CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
.buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
}
void AArch64PrologueEmitter::emitWindowsStackProbe(
MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
int64_t RealignmentPadding) const {
if (AFI->getSVECalleeSavedStackSize())
report_fatal_error("SVE callee saves not yet supported with stack probing");
// Find an available register to spill the value of X15 to, if X15 is being
// used already for nest.
unsigned X15Scratch = AArch64::NoRegister;
if (llvm::any_of(MBB.liveins(),
[this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
return RegInfo.isSuperOrSubRegisterEq(AArch64::X15,
LiveIn.PhysReg);
})) {
X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
assert(X15Scratch != AArch64::NoRegister &&
(X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
#ifndef NDEBUG
LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
#endif
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
.addReg(AArch64::XZR)
.addReg(AArch64::X15, RegState::Undef)
.addReg(AArch64::X15, RegState::Implicit)
.setMIFlag(MachineInstr::FrameSetup);
}
uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
if (NeedsWinCFI) {
HasWinCFI = true;
// alloc_l can hold at most 256MB, so assume that NumBytes doesn't
// exceed this amount. We need to move at most 2^24 - 1 into x15.
// This is at most two instructions, MOVZ followed by MOVK.
// TODO: Fix to use multiple stack alloc unwind codes for stacks
// exceeding 256MB in size.
if (NumBytes >= (1 << 28))
report_fatal_error("Stack size cannot exceed 256MB for stack "
"unwinding purposes");
uint32_t LowNumWords = NumWords & 0xFFFF;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
.addImm(LowNumWords)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
if ((NumWords & 0xFFFF0000) != 0) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
.addReg(AArch64::X15)
.addImm((NumWords & 0xFFFF0000) >> 16) // High half
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
}
} else {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
.addImm(NumWords)
.setMIFlags(MachineInstr::FrameSetup);
}
const char *ChkStk = Subtarget.getChkStkName();
switch (MF.getTarget().getCodeModel()) {
case CodeModel::Tiny:
case CodeModel::Small:
case CodeModel::Medium:
case CodeModel::Kernel:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
.addExternalSymbol(ChkStk)
.addReg(AArch64::X15, RegState::Implicit)
.addReg(AArch64::X16,
RegState::Implicit | RegState::Define | RegState::Dead)
.addReg(AArch64::X17,
RegState::Implicit | RegState::Define | RegState::Dead)
.addReg(AArch64::NZCV,
RegState::Implicit | RegState::Define | RegState::Dead)
.setMIFlags(MachineInstr::FrameSetup);
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
}
break;
case CodeModel::Large:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
.addReg(AArch64::X16, RegState::Define)
.addExternalSymbol(ChkStk)
.addExternalSymbol(ChkStk)
.setMIFlags(MachineInstr::FrameSetup);
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
}
BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
.addReg(AArch64::X16, RegState::Kill)
.addReg(AArch64::X15, RegState::Implicit | RegState::Define)
.addReg(AArch64::X16,
RegState::Implicit | RegState::Define | RegState::Dead)
.addReg(AArch64::X17,
RegState::Implicit | RegState::Define | RegState::Dead)
.addReg(AArch64::NZCV,
RegState::Implicit | RegState::Define | RegState::Dead)
.setMIFlags(MachineInstr::FrameSetup);
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
}
break;
}
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
.addReg(AArch64::SP, RegState::Kill)
.addReg(AArch64::X15, RegState::Kill)
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
.setMIFlags(MachineInstr::FrameSetup);
if (NeedsWinCFI) {
HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
.addImm(NumBytes)
.setMIFlag(MachineInstr::FrameSetup);
}
NumBytes = 0;
if (RealignmentPadding > 0) {
if (RealignmentPadding >= 4096) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
.addReg(AArch64::X16, RegState::Define)
.addImm(RealignmentPadding)
.setMIFlags(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
.addReg(AArch64::SP)
.addReg(AArch64::X16, RegState::Kill)
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
.setMIFlag(MachineInstr::FrameSetup);
} else {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
.addReg(AArch64::SP)
.addImm(RealignmentPadding)
.addImm(0)
.setMIFlag(MachineInstr::FrameSetup);
}
uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
.addReg(AArch64::X15, RegState::Kill)
.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
AFI->setStackRealigned(true);
// No need for SEH instructions here; if we're realigning the stack,
// we've set a frame pointer and already finished the SEH prologue.
assert(!NeedsWinCFI);
}
if (X15Scratch != AArch64::NoRegister) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
.addReg(AArch64::XZR)
.addReg(X15Scratch, RegState::Undef)
.addReg(X15Scratch, RegState::Implicit)
.setMIFlag(MachineInstr::FrameSetup);
}
}
void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
MachineBasicBlock::iterator MBBI) const {
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
if (CSI.empty())
return;
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
for (const auto &Info : CSI) {
unsigned FrameIdx = Info.getFrameIdx();
if (MFI.hasScalableStackID(FrameIdx))
continue;
assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
CFIBuilder.buildOffset(Info.getReg(), Offset);
}
}
void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
MachineBasicBlock::iterator MBBI) const {
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
if (CSI.empty())
return;
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
std::optional<int64_t> IncomingVGOffsetFromDefCFA;
if (AFL.requiresSaveVG(MF)) {
auto IncomingVG = *find_if(
reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
AFL.getOffsetOfLocalArea();
}
StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
for (const auto &Info : CSI) {
int FI = Info.getFrameIdx();
if (!MFI.hasScalableStackID(FI))
continue;
// Not all unwinders may know about SVE registers, so assume the lowest
// common denominator.
assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
MCRegister Reg = Info.getReg();
if (!RegInfo.regNeedsCFI(Reg, Reg))
continue;
StackOffset Offset =
StackOffset::getScalable(MFI.getObjectOffset(FI)) -
StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
// The scalable vectors are below (lower address) the scalable predicates
// with split SVE objects, so we must subtract the size of the predicates.
if (SVELayout == SVEStackLayout::Split &&
MFI.getStackID(FI) == TargetStackID::ScalableVector)
Offset -= PPRStackSize;
CFIBuilder.insertCFIInst(
createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
}
}
static bool isFuncletReturnInstr(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
case AArch64::CATCHRET:
case AArch64::CLEANUPRET:
return true;
}
}
AArch64EpilogueEmitter::AArch64EpilogueEmitter(MachineFunction &MF,
MachineBasicBlock &MBB,
const AArch64FrameLowering &AFL)
: AArch64PrologueEpilogueCommon(MF, MBB, AFL) {
EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, &MBB);
SEHEpilogueStartI = MBB.end();
}
void AArch64EpilogueEmitter::emitEpilogue() {
MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
if (MBB.end() != EpilogueEndI) {
DL = EpilogueEndI->getDebugLoc();
IsFunclet = isFuncletReturnInstr(*EpilogueEndI);
}
int64_t NumBytes =
IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
// How much of the stack used by incoming arguments this function is expected
// to restore in this particular epilogue.
int64_t ArgumentStackToRestore = AFL.getArgumentStackToRestore(MF, MBB);
bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
MF.getFunction().isVarArg());
unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
int64_t AfterCSRPopSize = ArgumentStackToRestore;
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
// We cannot rely on the local stack size set in emitPrologue if the function
// has funclets, as funclets have different local stack size requirements, and
// the current value set in emitPrologue may be that of the containing
// function.
if (MF.hasEHFunclets())
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
if (HomPrologEpilog) {
assert(!NeedsWinCFI);
auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
if (FirstHomogenousEpilogI != MBB.begin()) {
auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI);
if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
FirstHomogenousEpilogI = HomogeneousEpilog;
}
// Adjust local stack
emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(AFI->getLocalStackSize()), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
// SP has been already adjusted while restoring callee save regs.
// We've bailed-out the case with adjusting SP for arguments.
assert(AfterCSRPopSize == 0);
return;
}
bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
unsigned ProloguePopSize = PrologueSaveSize;
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
// With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
// that needs to be popped until we reach the start of the SVE save area.
// The "FixedObject" stack occurs after the SVE area and must be popped
// later.
ProloguePopSize -= FixedObject;
AfterCSRPopSize += FixedObject;
}
// Assume we can't combine the last pop with the sp restore.
if (!CombineSPBump && ProloguePopSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
AArch64InstrInfo::isSEHInstruction(*Pop) ||
(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord &&
isPartOfSVECalleeSaves(Pop)))
Pop = std::prev(Pop);
// Converting the last ldp to a post-index ldp is valid only if the last
// ldp's offset is 0.
const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
// If the offset is 0 and the AfterCSR pop is not actually trying to
// allocate more stack for arguments (in space that an untimely interrupt
// may clobber), convert it to a post-index ldp.
if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
convertCalleeSaveRestoreToSPPrePostIncDec(
Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
ProloguePopSize);
} else if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
MachineBasicBlock::iterator AfterLastPop = std::next(Pop);
if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop))
++AfterLastPop;
// If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
// callee-save non-SVE registers to move the stack pointer to the start of
// the SVE area.
emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(ProloguePopSize), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI,
&HasWinCFI);
} else {
// Otherwise, make sure to emit an add after the last ldp.
// We're doing this by transferring the size to be restored from the
// adjustment *before* the CSR pops to the adjustment *after* the CSR
// pops.
AfterCSRPopSize += ProloguePopSize;
}
}
// Move past the restores of the callee-saved registers.
// If we plan on combining the sp bump of the local stack size and the callee
// save stack size, we might need to adjust the CSR save and restore offsets.
MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
MachineBasicBlock::iterator Begin = MBB.begin();
while (FirstGPRRestoreI != Begin) {
--FirstGPRRestoreI;
if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
(SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord &&
isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
++FirstGPRRestoreI;
break;
} else if (CombineSPBump)
fixupCalleeSaveRestoreStackOffset(*FirstGPRRestoreI,
AFI->getLocalStackSize());
}
if (NeedsWinCFI) {
// Note that there are cases where we insert SEH opcodes in the
// epilogue when we had no SEH opcodes in the prologue. For
// example, when there is no stack frame but there are stack
// arguments. Insert the SEH_EpilogStart and remove it later if it
// we didn't emit any SEH opcodes to avoid generating WinCFI for
// functions that don't need it.
BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart))
.setMIFlag(MachineInstr::FrameDestroy);
SEHEpilogueStartI = FirstGPRRestoreI;
--SEHEpilogueStartI;
}
// Determine the ranges of SVE callee-saves. This is done before emitting any
// code at the end of the epilogue (for Swift async), which can get in the way
// of finding SVE callee-saves with CalleeSavesAboveFrameRecord.
auto [PPR, ZPR] = getSVEStackFrameSizes();
auto [PPRRange, ZPRRange] = partitionSVECS(
MBB,
SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord
? MBB.getFirstTerminator()
: FirstGPRRestoreI,
PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
if (HasFP && AFI->hasSwiftAsyncContext())
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
assert(!AFI->hasSVEStackSize() && "Cannot combine SP bump with SVE");
// When we are about to restore the CSRs, the CFA register is SP again.
if (EmitCFI && HasFP)
CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy)
.buildDefCFA(AArch64::SP, NumBytes);
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(NumBytes + AfterCSRPopSize), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI,
EmitCFI, StackOffset::getFixed(NumBytes));
return;
}
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
StackOffset SVEStackSize =
SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize;
MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin;
MachineBasicBlock::iterator RestoreEnd = PPRRange.End;
// Deallocate the SVE area.
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
StackOffset SVELocalsSize = ZPR.LocalsSize + PPR.LocalsSize;
// If the callee-save area is before FP, restoring the FP implicitly
// deallocates non-callee-save SVE allocations. Otherwise, deallocate them
// explicitly.
if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
SVELocalsSize, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI);
}
// Deallocate callee-save SVE registers.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI);
} else if (AFI->hasSVEStackSize()) {
// If we have stack realignment or variable-sized objects we must use the FP
// to restore SVE callee saves (as there is an unknown amount of
// data/padding between the SP and SVE CS area).
Register BaseForSVEDealloc =
(AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
: AArch64::SP;
if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
// TODO: Support stack realigment and variable-sized objects.
assert(
SVELayout != SVEStackLayout::Split &&
"unexpected stack realignment or variable sized objects with split "
"SVE stack objects");
Register CalleeSaveBase = AArch64::FP;
if (int64_t CalleeSaveBaseOffset =
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
// If we have have an non-zero offset to the non-SVE CS base we need to
// compute the base address by subtracting the offest in a temporary
// register first (to avoid briefly deallocating the SVE CS).
CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
&AArch64::GPR64RegClass);
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
MachineInstr::FrameDestroy);
}
// The code below will deallocate the stack space space by moving the SP
// to the start of the SVE callee-save area.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
-SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);
} else if (BaseForSVEDealloc == AArch64::SP) {
auto CFAOffset =
SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
if (SVECalleeSavesSize) {
// Deallocate the non-SVE locals first before we can deallocate (and
// restore callee saves) from the SVE area.
auto NonSVELocals = StackOffset::getFixed(NumBytes);
emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
NonSVELocals, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
CFAOffset -= NonSVELocals;
NumBytes = 0;
}
if (ZPR.LocalsSize) {
emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
ZPR.LocalsSize, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
CFAOffset -= ZPR.LocalsSize;
}
StackOffset SVECalleeSavesToDealloc = SVECalleeSavesSize;
if (SVELayout == SVEStackLayout::Split &&
(PPR.LocalsSize || ZPR.CalleeSavesSize)) {
assert(PPRRange.Begin == ZPRRange.End &&
"Expected PPR restores after ZPR");
emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
PPR.LocalsSize + ZPR.CalleeSavesSize, TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI,
&HasWinCFI, EmitCFI && !HasFP, CFAOffset);
CFAOffset -= PPR.LocalsSize + ZPR.CalleeSavesSize;
SVECalleeSavesToDealloc -= ZPR.CalleeSavesSize;
}
// If split SVE is on, this dealloc PPRs, otherwise, deallocs ZPRs + PPRs:
if (SVECalleeSavesToDealloc)
emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
SVECalleeSavesToDealloc, TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI,
&HasWinCFI, EmitCFI && !HasFP, CFAOffset);
}
if (EmitCFI)
emitCalleeSavedSVERestores(
SVELayout == SVEStackLayout::Split ? ZPRRange.End : PPRRange.End);
}
if (!HasFP) {
bool RedZone = AFL.canUseRedZone(MF);
// If this was a redzone leaf function, we don't need to restore the
// stack pointer (but we may need to pop stack args for fastcc).
if (RedZone && AfterCSRPopSize == 0)
return;
// Pop the local variables off the stack. If there are no callee-saved
// registers, it means we are actually positioned at the terminator and can
// combine stack increment for the locals and the stack increment for
// callee-popped arguments into (possibly) a single instruction and be done.
bool NoCalleeSaveRestore = PrologueSaveSize == 0;
int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
if (NoCalleeSaveRestore)
StackRestoreBytes += AfterCSRPopSize;
emitFrameOffset(
MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(StackRestoreBytes), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI,
StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
// If we were able to combine the local stack pop with the argument pop,
// then we're done.
if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
return;
NumBytes = 0;
}
// Restore the original stack pointer.
// FIXME: Rather than doing the math here, we should instead just use
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
emitFrameOffset(
MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP,
StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
} else if (NumBytes)
emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(NumBytes), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
// When we are about to restore the CSRs, the CFA register is SP again.
if (EmitCFI && HasFP)
CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy)
.buildDefCFA(AArch64::SP, PrologueSaveSize);
// This must be placed after the callee-save restore code because that code
// assumes the SP is at the same location as it was after the callee-save save
// code in the prologue.
if (AfterCSRPopSize) {
assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
"interrupt may have clobbered");
emitFrameOffset(
MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
false, NeedsWinCFI, &HasWinCFI, EmitCFI,
StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
}
}
bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
uint64_t StackBumpBytes) const {
if (!AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
StackBumpBytes))
return false;
if (MBB.empty())
return true;
// Disable combined SP bump if the last instruction is an MTE tag store. It
// is almost always better to merge SP adjustment into those instructions.
MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
MachineBasicBlock::iterator Begin = MBB.begin();
while (LastI != Begin) {
--LastI;
if (LastI->isTransient())
continue;
if (!LastI->getFlag(MachineInstr::FrameDestroy))
break;
}
switch (LastI->getOpcode()) {
case AArch64::STGloop:
case AArch64::STZGloop:
case AArch64::STGi:
case AArch64::STZGi:
case AArch64::ST2Gi:
case AArch64::STZ2Gi:
return false;
default:
return true;
}
llvm_unreachable("unreachable");
}
void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
case SwiftAsyncFramePointerMode::DeploymentBased:
// Avoid the reload as it is GOT relative, and instead fall back to the
// hardcoded value below. This allows a mismatch between the OS and
// application without immediately terminating on the difference.
[[fallthrough]];
case SwiftAsyncFramePointerMode::Always:
// We need to reset FP to its untagged state on return. Bit 60 is
// currently used to show the presence of an extended frame.
// BIC x29, x29, #0x1000_0000_0000_0000
BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
AArch64::FP)
.addUse(AArch64::FP)
.addImm(0x10fe)
.setMIFlag(MachineInstr::FrameDestroy);
if (NeedsWinCFI) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlags(MachineInstr::FrameDestroy);
HasWinCFI = true;
}
break;
case SwiftAsyncFramePointerMode::Never:
break;
}
}
void AArch64EpilogueEmitter::emitShadowCallStackEpilogue(
MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
// Shadow call stack epilog: ldr x30, [x18, #-8]!
BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXpre))
.addReg(AArch64::X18, RegState::Define)
.addReg(AArch64::LR, RegState::Define)
.addReg(AArch64::X18)
.addImm(-8)
.setMIFlag(MachineInstr::FrameDestroy);
if (NeedsWinCFI)
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameDestroy);
if (AFI->needsAsyncDwarfUnwindInfo(MF))
CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
.buildRestore(AArch64::X18);
}
void AArch64EpilogueEmitter::emitCalleeSavedRestores(
MachineBasicBlock::iterator MBBI, bool SVE) const {
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
if (CSI.empty())
return;
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
for (const auto &Info : CSI) {
if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
continue;
MCRegister Reg = Info.getReg();
if (SVE && !RegInfo.regNeedsCFI(Reg, Reg))
continue;
CFIBuilder.buildRestore(Info.getReg());
}
}
void AArch64EpilogueEmitter::finalizeEpilogue() const {
if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
emitShadowCallStackEpilogue(MBB.getFirstTerminator(), DL);
HasWinCFI |= NeedsWinCFI;
}
if (EmitCFI)
emitCalleeSavedGPRRestores(MBB.getFirstTerminator());
if (AFI->shouldSignReturnAddress(MF)) {
// If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
// are inserted by emitPacRetPlusLeafHardening().
if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::PAUTH_EPILOGUE))
.setMIFlag(MachineInstr::FrameDestroy);
}
// AArch64PointerAuth pass will insert SEH_PACSignLR
HasWinCFI |= NeedsWinCFI;
}
if (HasWinCFI) {
BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
if (!MF.hasWinCFI())
MF.setHasWinCFI(true);
}
if (NeedsWinCFI) {
assert(SEHEpilogueStartI != MBB.end());
if (!HasWinCFI)
MBB.erase(SEHEpilogueStartI);
}
}
} // namespace llvm