blob: 5de0f9e0d6b8cd2f0f1f855ac5a8663e202482cf [file] [log] [blame]
//===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the DynoStats class.
//
//===----------------------------------------------------------------------===//
#include "bolt/Core/DynoStats.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <string>
#undef DEBUG_TYPE
#define DEBUG_TYPE "bolt"
using namespace llvm;
using namespace bolt;
namespace opts {
extern cl::OptionCategory BoltCategory;
static cl::opt<uint32_t>
DynoStatsScale("dyno-stats-scale",
cl::desc("scale to be applied while reporting dyno stats"),
cl::Optional,
cl::init(1),
cl::Hidden,
cl::cat(BoltCategory));
static cl::opt<uint32_t>
PrintDynoOpcodeStat("print-dyno-opcode-stats",
cl::desc("print per instruction opcode dyno stats and the function"
"names:BB offsets of the nth highest execution counts"),
cl::init(0),
cl::Hidden,
cl::cat(BoltCategory));
} // namespace opts
namespace llvm {
namespace bolt {
constexpr const char *DynoStats::Desc[];
bool DynoStats::operator<(const DynoStats &Other) const {
return std::lexicographical_compare(
&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
&Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]);
}
bool DynoStats::operator==(const DynoStats &Other) const {
return std::equal(&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
&Other.Stats[FIRST_DYNO_STAT]);
}
bool DynoStats::lessThan(const DynoStats &Other,
ArrayRef<Category> Keys) const {
return std::lexicographical_compare(
Keys.begin(), Keys.end(), Keys.begin(), Keys.end(),
[this, &Other](const Category A, const Category) {
return Stats[A] < Other.Stats[A];
});
}
void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
MCInstPrinter *Printer) const {
auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
uint64_t OtherStat) {
OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
if (Other) {
if (Stat != OtherStat) {
OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0
OS << format(" (%+.1f%%)", ((float)Stat - (float)OtherStat) * 100.0 /
(float)(OtherStat));
} else {
OS << " (=)";
}
}
OS << '\n';
};
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64)
continue;
printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
}
if (opts::PrintDynoOpcodeStat && Printer) {
OS << "\nProgram-wide opcode histogram:\n";
OS << " Opcode, Execution Count, Max Exec Count, "
"Function Name:Offset ...\n";
std::vector<std::pair<uint64_t, unsigned>> SortedHistogram;
for (const OpcodeStatTy &Stat : OpcodeHistogram)
SortedHistogram.emplace_back(Stat.second.first, Stat.first);
// Sort using lexicographic ordering
llvm::sort(SortedHistogram);
// Dump in ascending order: Start with Opcode with Highest execution
// count.
for (auto &Stat : llvm::reverse(SortedHistogram)) {
OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat.second).data(),
Stat.first * opts::DynoStatsScale);
MaxOpcodeHistogramTy MaxMultiMap = OpcodeHistogram.at(Stat.second).second;
// Start with function name:BB offset with highest execution count.
for (auto &Max : llvm::reverse(MaxMultiMap)) {
OS << format(", %'18lld, ", Max.first * opts::DynoStatsScale)
<< Max.second.first.str() << ':' << Max.second.second;
}
OS << '\n';
}
}
}
void DynoStats::operator+=(const DynoStats &Other) {
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
Stats[Stat] += Other[Stat];
}
for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) {
auto I = OpcodeHistogram.find(Stat.first);
if (I == OpcodeHistogram.end()) {
OpcodeHistogram.emplace(Stat);
} else {
// Merge other histograms, log only the opts::PrintDynoOpcodeStat'th
// maximum counts.
I->second.first += Stat.second.first;
auto &MMap = I->second.second;
auto &OtherMMap = Stat.second.second;
auto Size = MMap.size();
assert(Size <= opts::PrintDynoOpcodeStat);
for (auto OtherMMapPair : llvm::reverse(OtherMMap)) {
if (Size++ >= opts::PrintDynoOpcodeStat) {
auto First = MMap.begin();
if (OtherMMapPair.first <= First->first)
break;
MMap.erase(First);
}
MMap.emplace(OtherMMapPair);
}
}
}
}
DynoStats getDynoStats(BinaryFunction &BF) {
auto &BC = BF.getBinaryContext();
DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64());
// Return empty-stats about the function we don't completely understand.
if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG())
return Stats;
// Update enumeration of basic blocks for correct detection of branch'
// direction.
BF.getLayout().updateLayoutIndices();
for (BinaryBasicBlock *const BB : BF.getLayout().blocks()) {
// The basic block execution count equals to the sum of incoming branch
// frequencies. This may deviate from the sum of outgoing branches of the
// basic block especially since the block may contain a function that
// does not return or a function that throws an exception.
const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
// Ignore empty blocks and blocks that were not executed.
if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
continue;
// Count AArch64 linker-inserted veneers
if (BF.isAArch64Veneer())
Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount();
// Count various instruction types by iterating through all instructions.
// When -print-dyno-opcode-stats is on, count per each opcode and record
// maximum execution counts.
for (const MCInst &Instr : *BB) {
if (opts::PrintDynoOpcodeStat) {
unsigned Opcode = Instr.getOpcode();
auto I = Stats.OpcodeHistogram.find(Opcode);
if (I == Stats.OpcodeHistogram.end()) {
DynoStats::MaxOpcodeHistogramTy MMap;
MMap.emplace(BBExecutionCount,
std::make_pair(BF.getOneName(), BB->getOffset()));
Stats.OpcodeHistogram.emplace(Opcode,
std::make_pair(BBExecutionCount, MMap));
} else {
I->second.first += BBExecutionCount;
bool Insert = true;
if (I->second.second.size() == opts::PrintDynoOpcodeStat) {
auto First = I->second.second.begin();
if (First->first < BBExecutionCount)
I->second.second.erase(First);
else
Insert = false;
}
if (Insert) {
I->second.second.emplace(
BBExecutionCount,
std::make_pair(BF.getOneName(), BB->getOffset()));
}
}
}
if (BC.MIB->mayStore(Instr)) {
Stats[DynoStats::STORES] += BBExecutionCount;
}
if (BC.MIB->mayLoad(Instr)) {
Stats[DynoStats::LOADS] += BBExecutionCount;
}
if (!BC.MIB->isCall(Instr))
continue;
uint64_t CallFreq = BBExecutionCount;
if (BC.MIB->getConditionalTailCall(Instr)) {
CallFreq =
BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount");
}
Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
if (BC.MIB->isIndirectCall(Instr)) {
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
} else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr)) {
const BinaryFunction *BF = BC.getFunctionForSymbol(CallSymbol);
if (BF && BF->isPLTFunction()) {
Stats[DynoStats::PLT_CALLS] += CallFreq;
// We don't process PLT functions and hence have to adjust relevant
// dynostats here for:
//
// jmp *GOT_ENTRY(%rip)
//
// NOTE: this is arch-specific.
Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
Stats[DynoStats::LOADS] += CallFreq;
Stats[DynoStats::INSTRUCTIONS] += CallFreq;
}
}
}
Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
// Jump tables.
const MCInst *LastInstr = BB->getLastNonPseudoInstr();
if (BC.MIB->getJumpTable(*LastInstr)) {
Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
LLVM_DEBUG(
static uint64_t MostFrequentJT;
if (BBExecutionCount > MostFrequentJT) {
MostFrequentJT = BBExecutionCount;
dbgs() << "BOLT-INFO: most frequently executed jump table is in "
<< "function " << BF << " in basic block " << BB->getName()
<< " executed totally " << BBExecutionCount << " times.\n";
}
);
continue;
}
if (BC.MIB->isIndirectBranch(*LastInstr) && !BC.MIB->isCall(*LastInstr)) {
Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount;
continue;
}
// Update stats for branches.
const MCSymbol *TBB = nullptr;
const MCSymbol *FBB = nullptr;
MCInst *CondBranch = nullptr;
MCInst *UncondBranch = nullptr;
if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
continue;
if (!CondBranch && !UncondBranch)
continue;
// Simple unconditional branch.
if (!CondBranch) {
Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
continue;
}
// CTCs: instruction annotations could be stripped, hence check the number
// of successors to identify conditional tail calls.
if (BB->succ_size() == 1) {
if (BB->branch_info_begin() != BB->branch_info_end())
Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
continue;
}
// Conditional branch that could be followed by an unconditional branch.
uint64_t TakenCount = BB->getTakenBranchInfo().Count;
if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
TakenCount = 0;
uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
NonTakenCount = 0;
if (BF.isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
} else {
Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
}
if (UncondBranch) {
Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
}
}
return Stats;
}
} // namespace bolt
} // namespace llvm