llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp - third_party/github.com/llvm/llvm-project - Git at Google

 //===-- GCNPreRALongBranchReg.cpp ----------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // \file
 // \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if
 // there is a long branch. Branch size at this point is difficult to track since
 // we have no idea what spills will be inserted later on. We just assume 8 bytes
 // per instruction to compute approximations without computing the actual
 // instruction size to see if we're in the neighborhood of the maximum branch
 // distrance threshold tuning of what is considered "long" is handled through
 // amdgpu-long-branch-factor cl argument which sets LongBranchFactor.
 //===----------------------------------------------------------------------===//
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/InitializePasses.h"

 using namespace llvm;

 #define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg"

 namespace {

 static cl::opt<double> LongBranchFactor(
     "amdgpu-long-branch-factor", cl::init(1.0), cl::Hidden,
     cl::desc("Factor to apply to what qualifies as a long branch "
              "to reserve a pair of scalar registers. If this value "
              "is 0 the long branch registers are never reserved. As this "
              "value grows the greater chance the branch distance will fall "
              "within the threshold and the registers will be marked to be "
              "reserved. We lean towards always reserving a register for  "
              "long jumps"));

 class GCNPreRALongBranchReg : public MachineFunctionPass {

   struct BasicBlockInfo {
     // Offset - Distance from the beginning of the function to the beginning
     // of this basic block.
     uint64_t Offset = 0;
     // Size - Size of the basic block in bytes
     uint64_t Size = 0;
   };
   void generateBlockInfo(MachineFunction &MF,
                          SmallVectorImpl<BasicBlockInfo> &BlockInfo);

 public:
   static char ID;
   GCNPreRALongBranchReg() : MachineFunctionPass(ID) {
     initializeGCNPreRALongBranchRegPass(*PassRegistry::getPassRegistry());
   }
   bool runOnMachineFunction(MachineFunction &MF) override;
   StringRef getPassName() const override {
     return "AMDGPU Pre-RA Long Branch Reg";
   }
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
 } // End anonymous namespace.
 char GCNPreRALongBranchReg::ID = 0;

 INITIALIZE_PASS(GCNPreRALongBranchReg, DEBUG_TYPE,
                 "AMDGPU Pre-RA Long Branch Reg", false, false)

 char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchReg::ID;
 void GCNPreRALongBranchReg::generateBlockInfo(
     MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) {

   BlockInfo.resize(MF.getNumBlockIDs());

   // Approximate the size of all basic blocks by just
   // assuming 8 bytes per instruction
   for (const MachineBasicBlock &MBB : MF) {
     uint64_t NumInstr = 0;
     // Loop through the basic block and add up all non-debug
     // non-meta instructions
     for (const MachineInstr &MI : MBB) {
       // isMetaInstruction is a superset of isDebugIstr
       if (MI.isMetaInstruction())
         continue;
       NumInstr += 1;
     }
     // Approximate size as just 8 bytes per instruction
     BlockInfo[MBB.getNumber()].Size = 8 * NumInstr;
   }
   uint64_t PrevNum = (&MF)->begin()->getNumber();
   for (auto &MBB :
        make_range(std::next(MachineFunction::iterator((&MF)->begin())),
                   (&MF)->end())) {
     uint64_t Num = MBB.getNumber();
     // Compute the offset immediately following this block.
     BlockInfo[Num].Offset = BlockInfo[PrevNum].Offset + BlockInfo[PrevNum].Size;
     PrevNum = Num;
   }
 }
 bool GCNPreRALongBranchReg::runOnMachineFunction(MachineFunction &MF) {
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = STM.getInstrInfo();
   const SIRegisterInfo *TRI = STM.getRegisterInfo();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   MachineRegisterInfo &MRI = MF.getRegInfo();

   // For now, reserve highest available SGPR pair. After RA,
   // shift down to a lower unused pair of SGPRs
   // If all registers are used, then findUnusedRegister will return
   // AMDGPU::NoRegister.
   constexpr bool ReserveHighestRegister = true;
   Register LongBranchReservedReg = TRI->findUnusedRegister(
       MRI, &AMDGPU::SGPR_64RegClass, MF, ReserveHighestRegister);
   if (!LongBranchReservedReg)
     return false;

   // Approximate code size and offsets of each basic block
   SmallVector<BasicBlockInfo, 16> BlockInfo;
   generateBlockInfo(MF, BlockInfo);

   for (const MachineBasicBlock &MBB : MF) {
     MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr();
     if (Last == MBB.end() || !Last->isUnconditionalBranch())
       continue;
     MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last);
     uint64_t BlockDistance = static_cast<uint64_t>(
         LongBranchFactor * BlockInfo[DestBB->getNumber()].Offset);
     // If the distance falls outside the threshold assume it is a long branch
     // and we need to reserve the registers
     if (!TII->isBranchOffsetInRange(Last->getOpcode(), BlockDistance)) {
       MFI->setLongBranchReservedReg(LongBranchReservedReg);
       return true;
     }
   }
   return false;
 }
	//===-- GCNPreRALongBranchReg.cpp ----------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	// \file
	// \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if
	// there is a long branch. Branch size at this point is difficult to track since
	// we have no idea what spills will be inserted later on. We just assume 8 bytes
	// per instruction to compute approximations without computing the actual
	// instruction size to see if we're in the neighborhood of the maximum branch
	// distrance threshold tuning of what is considered "long" is handled through
	// amdgpu-long-branch-factor cl argument which sets LongBranchFactor.
	//===----------------------------------------------------------------------===//
	#include "AMDGPU.h"
	#include "GCNSubtarget.h"
	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
	#include "SIMachineFunctionInfo.h"
	#include "llvm/CodeGen/MachineFunctionPass.h"
	#include "llvm/InitializePasses.h"

	using namespace llvm;

	#define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg"

	namespace {

	static cl::opt<double> LongBranchFactor(
	"amdgpu-long-branch-factor", cl::init(1.0), cl::Hidden,
	cl::desc("Factor to apply to what qualifies as a long branch "
	"to reserve a pair of scalar registers. If this value "
	"is 0 the long branch registers are never reserved. As this "
	"value grows the greater chance the branch distance will fall "
	"within the threshold and the registers will be marked to be "
	"reserved. We lean towards always reserving a register for "
	"long jumps"));

	class GCNPreRALongBranchReg : public MachineFunctionPass {

	struct BasicBlockInfo {
	// Offset - Distance from the beginning of the function to the beginning
	// of this basic block.
	uint64_t Offset = 0;
	// Size - Size of the basic block in bytes
	uint64_t Size = 0;
	};
	void generateBlockInfo(MachineFunction &MF,
	SmallVectorImpl<BasicBlockInfo> &BlockInfo);

	public:
	static char ID;
	GCNPreRALongBranchReg() : MachineFunctionPass(ID) {
	initializeGCNPreRALongBranchRegPass(*PassRegistry::getPassRegistry());
	}
	bool runOnMachineFunction(MachineFunction &MF) override;
	StringRef getPassName() const override {
	return "AMDGPU Pre-RA Long Branch Reg";
	}
	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.setPreservesAll();
	MachineFunctionPass::getAnalysisUsage(AU);
	}
	};
	} // End anonymous namespace.
	char GCNPreRALongBranchReg::ID = 0;

	INITIALIZE_PASS(GCNPreRALongBranchReg, DEBUG_TYPE,
	"AMDGPU Pre-RA Long Branch Reg", false, false)

	char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchReg::ID;
	void GCNPreRALongBranchReg::generateBlockInfo(
	MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) {

	BlockInfo.resize(MF.getNumBlockIDs());

	// Approximate the size of all basic blocks by just
	// assuming 8 bytes per instruction
	for (const MachineBasicBlock &MBB : MF) {
	uint64_t NumInstr = 0;
	// Loop through the basic block and add up all non-debug
	// non-meta instructions
	for (const MachineInstr &MI : MBB) {
	// isMetaInstruction is a superset of isDebugIstr
	if (MI.isMetaInstruction())
	continue;
	NumInstr += 1;
	}
	// Approximate size as just 8 bytes per instruction
	BlockInfo[MBB.getNumber()].Size = 8 * NumInstr;
	}
	uint64_t PrevNum = (&MF)->begin()->getNumber();
	for (auto &MBB :
	make_range(std::next(MachineFunction::iterator((&MF)->begin())),
	(&MF)->end())) {
	uint64_t Num = MBB.getNumber();
	// Compute the offset immediately following this block.
	BlockInfo[Num].Offset = BlockInfo[PrevNum].Offset + BlockInfo[PrevNum].Size;
	PrevNum = Num;
	}
	}
	bool GCNPreRALongBranchReg::runOnMachineFunction(MachineFunction &MF) {
	const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
	const SIInstrInfo *TII = STM.getInstrInfo();
	const SIRegisterInfo *TRI = STM.getRegisterInfo();
	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
	MachineRegisterInfo &MRI = MF.getRegInfo();

	// For now, reserve highest available SGPR pair. After RA,
	// shift down to a lower unused pair of SGPRs
	// If all registers are used, then findUnusedRegister will return
	// AMDGPU::NoRegister.
	constexpr bool ReserveHighestRegister = true;
	Register LongBranchReservedReg = TRI->findUnusedRegister(
	MRI, &AMDGPU::SGPR_64RegClass, MF, ReserveHighestRegister);
	if (!LongBranchReservedReg)
	return false;

	// Approximate code size and offsets of each basic block
	SmallVector<BasicBlockInfo, 16> BlockInfo;
	generateBlockInfo(MF, BlockInfo);

	for (const MachineBasicBlock &MBB : MF) {
	MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr();
	if (Last == MBB.end() \|\| !Last->isUnconditionalBranch())
	continue;
	MachineBasicBlock DestBB = TII->getBranchDestBlock(Last);
	uint64_t BlockDistance = static_cast<uint64_t>(
	LongBranchFactor * BlockInfo[DestBB->getNumber()].Offset);
	// If the distance falls outside the threshold assume it is a long branch
	// and we need to reserve the registers
	if (!TII->isBranchOffsetInRange(Last->getOpcode(), BlockDistance)) {
	MFI->setLongBranchReservedReg(LongBranchReservedReg);
	return true;
	}
	}
	return false;
	}