[𝘀𝗽𝗿] changes to main this commit is based on
Created using spr 1.3.4
[skip ci]
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index a529985..e82b857 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -386,8 +386,8 @@
/// Profile match ratio.
float ProfileMatchRatio{0.0f};
- /// Raw branch count for this function in the profile.
- uint64_t RawBranchCount{0};
+ /// Raw sample/branch count for this function in the profile.
+ uint64_t RawSampleCount{0};
/// Dynamically executed function bytes, used for density computation.
uint64_t SampleCountInBytes{0};
@@ -1880,13 +1880,12 @@
/// Return COUNT_NO_PROFILE if there's no profile info.
uint64_t getExecutionCount() const { return ExecutionCount; }
- /// Return the raw profile information about the number of branch
- /// executions corresponding to this function.
- uint64_t getRawBranchCount() const { return RawBranchCount; }
+ /// Return the raw profile information about the number of samples (basic
+ /// profile) or branch executions (branch profile) recorded in this function.
+ uint64_t getRawSampleCount() const { return RawSampleCount; }
- /// Set the profile data about the number of branch executions corresponding
- /// to this function.
- void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
+ /// Set raw count of samples or branches recorded in this function.
+ void setRawSampleCount(uint64_t Count) { RawSampleCount = Count; }
/// Return the number of dynamically executed bytes, from raw perf data.
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index c4ee75e..d66d198 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -92,16 +92,6 @@
uint64_t Addr;
};
- /// Used for parsing specific pre-aggregated input files.
- struct AggregatedLBREntry {
- enum Type : char { BRANCH = 0, FT, FT_EXTERNAL_ORIGIN, TRACE };
- Location From;
- Location To;
- uint64_t Count;
- uint64_t Mispreds;
- Type EntryType;
- };
-
struct Trace {
uint64_t From;
uint64_t To;
@@ -131,7 +121,6 @@
/// and use them later for processing and assigning profile.
std::unordered_map<Trace, TakenBranchInfo, TraceHash> BranchLBRs;
std::unordered_map<Trace, FTInfo, TraceHash> FallthroughLBRs;
- std::vector<AggregatedLBREntry> AggregatedLBRs;
std::unordered_map<uint64_t, uint64_t> BasicSamples;
std::vector<PerfMemSample> MemSamples;
@@ -416,14 +405,7 @@
/// F 41be90 41be90 4
/// B 4b1942 39b57f0 3 0
/// B 4b196f 4b19e0 2 0
- void parsePreAggregated();
-
- /// Parse the full output of pre-aggregated LBR samples generated by
- /// an external tool.
- std::error_code parsePreAggregatedLBRSamples();
-
- /// Process parsed pre-aggregated data.
- void processPreAggregated();
+ std::error_code parsePreAggregated();
/// If \p Address falls into the binary address space based on memory
/// mapping info \p MMI, then adjust it for further processing by subtracting
diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h
index 314dcc9..a7a0933 100644
--- a/bolt/include/bolt/Profile/DataReader.h
+++ b/bolt/include/bolt/Profile/DataReader.h
@@ -252,6 +252,9 @@
/// Get the number of samples recorded in [Start, End)
uint64_t getSamples(uint64_t Start, uint64_t End) const;
+ /// Returns the total number of samples recorded in this function.
+ uint64_t getSamples() const;
+
/// Aggregation helper
DenseMap<uint64_t, size_t> Index;
diff --git a/bolt/include/bolt/Profile/Heatmap.h b/bolt/include/bolt/Profile/Heatmap.h
index 74d7eed..a63b221 100644
--- a/bolt/include/bolt/Profile/Heatmap.h
+++ b/bolt/include/bolt/Profile/Heatmap.h
@@ -9,6 +9,7 @@
#ifndef BOLT_PROFILE_HEATMAP_H
#define BOLT_PROFILE_HEATMAP_H
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include <cstdint>
#include <map>
@@ -57,9 +58,9 @@
}
/// Register a single sample at \p Address.
- void registerAddress(uint64_t Address) {
+ void registerAddress(uint64_t Address, uint64_t Count) {
if (!ignoreAddress(Address))
- ++Map[Address / BucketSize];
+ Map[Address / BucketSize] += Count;
}
/// Register \p Count samples at [\p StartAddress, \p EndAddress ].
@@ -77,9 +78,22 @@
void printCDF(raw_ostream &OS) const;
- void printSectionHotness(StringRef Filename) const;
+ /// Struct describing individual section hotness.
+ struct SectionStats {
+ uint64_t Samples{0};
+ uint64_t Buckets{0};
+ };
- void printSectionHotness(raw_ostream &OS) const;
+ /// Mapping from section name to associated \p SectionStats. Special entries:
+ /// - [total] for total stats,
+ /// - [unmapped] for samples outside any section, if non-zero.
+ using SectionStatsMap = StringMap<SectionStats>;
+
+ SectionStatsMap computeSectionStats() const;
+
+ void printSectionHotness(const SectionStatsMap &, StringRef Filename) const;
+
+ void printSectionHotness(const SectionStatsMap &, raw_ostream &OS) const;
size_t size() const { return Map.size(); }
};
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 9773e21..fc521dc 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -473,7 +473,7 @@
OS << "\n Image : 0x" << Twine::utohexstr(getImageAddress());
if (ExecutionCount != COUNT_NO_PROFILE) {
OS << "\n Exec Count : " << ExecutionCount;
- OS << "\n Branch Count: " << RawBranchCount;
+ OS << "\n Branch Count: " << RawSampleCount;
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
}
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index d8628c6..420ffc8 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1445,7 +1445,7 @@
if (!Function.hasProfile())
continue;
- uint64_t SampleCount = Function.getRawBranchCount();
+ uint64_t SampleCount = Function.getRawSampleCount();
TotalSampleCount += SampleCount;
if (Function.hasValidProfile()) {
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 80f4ea0..11850fa 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -349,25 +349,29 @@
return false;
}
-void DataAggregator::parsePreAggregated() {
- std::string Error;
+std::error_code DataAggregator::parsePreAggregated() {
+ outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
+ NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
+ TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(Filename);
- if (std::error_code EC = MB.getError()) {
- errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
- << EC.message() << "\n";
- exit(1);
- }
+ if (std::error_code EC = MB.getError())
+ return EC;
FileBuf = std::move(*MB);
ParsingBuf = FileBuf->getBuffer();
Col = 0;
Line = 1;
- if (parsePreAggregatedLBRSamples()) {
- errs() << "PERF2BOLT: failed to parse samples\n";
- exit(1);
+ size_t AggregatedLBRs = 0;
+ while (hasData()) {
+ if (std::error_code EC = parseAggregatedLBREntry())
+ return EC;
+ ++AggregatedLBRs;
}
+
+ outs() << "PERF2BOLT: read " << AggregatedLBRs << " aggregated LBR entries\n";
+ return std::error_code();
}
void DataAggregator::filterBinaryMMapInfo() {
@@ -446,11 +450,6 @@
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
this->BC = &BC;
- if (opts::ReadPreAggregated) {
- parsePreAggregated();
- return Error::success();
- }
-
if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
processFileBuildID(*FileBuildID);
@@ -471,6 +470,12 @@
ErrorCallback(ReturnCode, ErrBuf);
};
+ if (opts::ReadPreAggregated) {
+ if (std::error_code EC = parsePreAggregated())
+ return errorCodeToError(EC);
+ goto heatmap;
+ }
+
if (BC.IsLinuxKernel) {
// Current MMap parsing logic does not work with linux kernel.
// MMap entries for linux kernel uses PERF_RECORD_MMAP
@@ -499,16 +504,7 @@
filterBinaryMMapInfo();
prepareToParse("events", MainEventsPPI, ErrorCallback);
- if (opts::HeatmapMode) {
- if (std::error_code EC = printLBRHeatMap()) {
- errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
- exit(1);
- }
- exit(0);
- }
-
- if ((!opts::BasicAggregation && parseBranchEvents()) ||
- (opts::BasicAggregation && parseBasicEvents()))
+ if (opts::BasicAggregation ? parseBasicEvents() : parseBranchEvents())
errs() << "PERF2BOLT: failed to parse samples\n";
// Special handling for memory events
@@ -521,6 +517,13 @@
deleteTempFiles();
+heatmap:
+ if (opts::HeatmapMode) {
+ if (std::error_code EC = printLBRHeatMap())
+ return errorCodeToError(EC);
+ exit(0);
+ }
+
return Error::success();
}
@@ -557,9 +560,7 @@
}
void DataAggregator::processProfile(BinaryContext &BC) {
- if (opts::ReadPreAggregated)
- processPreAggregated();
- else if (opts::BasicAggregation)
+ if (opts::BasicAggregation)
processBasicEvents();
else
processBranchEvents();
@@ -567,15 +568,14 @@
processMemEvents();
// Mark all functions with registered events as having a valid profile.
- const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
- : BinaryFunction::PF_LBR;
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &BF = BFI.second;
- FuncBranchData *FBD = getBranchData(BF);
- if (FBD || getFuncSampleData(BF.getNames())) {
- BF.markProfiled(Flags);
- if (FBD)
- BF.RawBranchCount = FBD->getNumExecutedBranches();
+ if (FuncBranchData *FBD = getBranchData(BF)) {
+ BF.markProfiled(BinaryFunction::PF_LBR);
+ BF.RawSampleCount = FBD->getNumExecutedBranches();
+ } else if (FuncSampleData *FSD = getFuncSampleData(BF.getNames())) {
+ BF.markProfiled(BinaryFunction::PF_SAMPLE);
+ BF.RawSampleCount = FSD->getSamples();
}
}
@@ -588,7 +588,6 @@
// Release intermediate storage.
clear(BranchLBRs);
clear(FallthroughLBRs);
- clear(AggregatedLBRs);
clear(BasicSamples);
clear(MemSamples);
}
@@ -632,10 +631,18 @@
bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
uint64_t Count) {
+ // To record executed bytes, use basic block size as is regardless of BAT.
+ uint64_t BlockSize = 0;
+ if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset(
+ Address - OrigFunc.getAddress()))
+ BlockSize = BB->getOriginalSize();
+
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
- if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress())))
+ if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress())))
NumColdSamples += Count;
+ // Attach executed bytes to parent function in case of cold fragment.
+ Func.SampleCountInBytes += Count * BlockSize;
auto I = NamesToSamples.find(Func.getOneName());
if (I == NamesToSamples.end()) {
@@ -1209,15 +1216,14 @@
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
if (std::error_code EC = TypeOrErr.getError())
return EC;
- auto Type = AggregatedLBREntry::TRACE;
- if (LLVM_LIKELY(TypeOrErr.get() == "T")) {
- } else if (TypeOrErr.get() == "B") {
- Type = AggregatedLBREntry::BRANCH;
- } else if (TypeOrErr.get() == "F") {
- Type = AggregatedLBREntry::FT;
- } else if (TypeOrErr.get() == "f") {
- Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
- } else {
+ enum TType { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
+ auto Type = StringSwitch<TType>(TypeOrErr.get())
+ .Case("T", TRACE)
+ .Case("B", BRANCH)
+ .Case("F", FT)
+ .Case("f", FT_EXTERNAL_ORIGIN)
+ .Default(INVALID);
+ if (Type == INVALID) {
reportError("expected T, B, F or f");
return make_error_code(llvm::errc::io_error);
}
@@ -1235,7 +1241,7 @@
return EC;
ErrorOr<Location> TraceFtEnd = std::error_code();
- if (Type == AggregatedLBREntry::TRACE) {
+ if (Type == TRACE) {
while (checkAndConsumeFS()) {
}
TraceFtEnd = parseLocationOrOffset();
@@ -1245,13 +1251,12 @@
while (checkAndConsumeFS()) {
}
- ErrorOr<int64_t> Frequency =
- parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
+ ErrorOr<int64_t> Frequency = parseNumberField(FieldSeparator, Type != BRANCH);
if (std::error_code EC = Frequency.getError())
return EC;
uint64_t Mispreds = 0;
- if (Type == AggregatedLBREntry::BRANCH) {
+ if (Type == BRANCH) {
while (checkAndConsumeFS()) {
}
ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
@@ -1273,13 +1278,28 @@
BF->setHasProfileAvailable();
uint64_t Count = static_cast<uint64_t>(Frequency.get());
- AggregatedLBREntry Entry{From.get(), To.get(), Count, Mispreds, Type};
- AggregatedLBRs.emplace_back(Entry);
- if (Type == AggregatedLBREntry::TRACE) {
- auto FtType = (FromFunc == ToFunc) ? AggregatedLBREntry::FT
- : AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
- AggregatedLBREntry TraceFt{To.get(), TraceFtEnd.get(), Count, 0, FtType};
- AggregatedLBRs.emplace_back(TraceFt);
+
+ Trace Trace(From->Offset, To->Offset);
+ // Taken trace
+ if (Type == TRACE || Type == BRANCH) {
+ TakenBranchInfo &Info = BranchLBRs[Trace];
+ Info.TakenCount += Count;
+ Info.MispredCount += Mispreds;
+
+ NumTotalSamples += Count;
+ }
+ // Construct fallthrough part of the trace
+ if (Type == TRACE) {
+ Trace.From = To->Offset;
+ Trace.To = TraceFtEnd->Offset;
+ Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
+ }
+ // Add fallthrough trace
+ if (Type != BRANCH) {
+ FTInfo &Info = FallthroughLBRs[Trace];
+ (Type == FT ? Info.InternCount : Info.ExternCount) += Count;
+
+ NumTraces += Count;
}
return std::error_code();
@@ -1301,53 +1321,6 @@
}
Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
opts::HeatmapMaxAddress, getTextSections(BC));
- uint64_t NumTotalSamples = 0;
-
- if (opts::BasicAggregation) {
- while (hasData()) {
- ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
- if (std::error_code EC = SampleRes.getError()) {
- if (EC == errc::no_such_process)
- continue;
- return EC;
- }
- PerfBasicSample &Sample = SampleRes.get();
- HM.registerAddress(Sample.PC);
- NumTotalSamples++;
- }
- outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
- } else {
- while (hasData()) {
- ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
- if (std::error_code EC = SampleRes.getError()) {
- if (EC == errc::no_such_process)
- continue;
- return EC;
- }
-
- PerfBranchSample &Sample = SampleRes.get();
-
- // LBRs are stored in reverse execution order. NextLBR refers to the next
- // executed branch record.
- const LBREntry *NextLBR = nullptr;
- for (const LBREntry &LBR : Sample.LBR) {
- if (NextLBR) {
- // Record fall-through trace.
- const uint64_t TraceFrom = LBR.To;
- const uint64_t TraceTo = NextLBR->From;
- ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
- }
- NextLBR = &LBR;
- }
- if (!Sample.LBR.empty()) {
- HM.registerAddress(Sample.LBR.front().To);
- HM.registerAddress(Sample.LBR.back().From);
- }
- NumTotalSamples += Sample.LBR.size();
- }
- outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
- outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
- }
if (!NumTotalSamples) {
if (opts::BasicAggregation) {
@@ -1363,6 +1336,8 @@
outs() << "HEATMAP: building heat map...\n";
+ for (const auto &[PC, Hits] : BasicSamples)
+ HM.registerAddress(PC, Hits);
for (const auto &LBR : FallthroughLBRs) {
const Trace &Trace = LBR.first;
const FTInfo &Info = LBR.second;
@@ -1382,10 +1357,12 @@
HM.printCDF(opts::OutputFilename);
else
HM.printCDF(opts::OutputFilename + ".csv");
+ Heatmap::SectionStatsMap Stats = HM.computeSectionStats();
if (opts::OutputFilename == "-")
- HM.printSectionHotness(opts::OutputFilename);
+ HM.printSectionHotness(Stats, opts::OutputFilename);
else
- HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv");
+ HM.printSectionHotness(Stats,
+ opts::OutputFilename + "-section-hotness.csv");
return std::error_code();
}
@@ -1412,7 +1389,10 @@
const uint64_t TraceTo = NextLBR->From;
const BinaryFunction *TraceBF =
getBinaryFunctionContainingAddress(TraceFrom);
- if (TraceBF && TraceBF->containsAddress(TraceTo)) {
+ if (opts::HeatmapMode) {
+ FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
+ ++Info.InternCount;
+ } else if (TraceBF && TraceBF->containsAddress(TraceTo)) {
FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
if (TraceBF->containsAddress(LBR.From))
++Info.InternCount;
@@ -1446,6 +1426,11 @@
}
NextLBR = &LBR;
+ if (opts::HeatmapMode) {
+ TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)];
+ ++Info.TakenCount;
+ continue;
+ }
uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
if (!From && !To)
@@ -1454,6 +1439,10 @@
++Info.TakenCount;
Info.MispredCount += LBR.Mispred;
}
+ if (opts::HeatmapMode && !Sample.LBR.empty()) {
+ ++BasicSamples[Sample.LBR.front().To];
+ ++BasicSamples[Sample.LBR.back().From];
+ }
}
void DataAggregator::printColdSamplesDiagnostic() const {
@@ -1589,7 +1578,6 @@
printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
}
}
- printBranchSamplesDiagnostics();
return std::error_code();
}
@@ -1617,6 +1605,7 @@
const TakenBranchInfo &Info = AggrLBR.second;
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
}
+ printBranchSamplesDiagnostics();
}
std::error_code DataAggregator::parseBasicEvents() {
@@ -1630,6 +1619,7 @@
if (!Sample->PC)
continue;
+ ++NumTotalSamples;
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
BF->setHasProfileAvailable();
@@ -1637,6 +1627,7 @@
++BasicSamples[Sample->PC];
EventNames.insert(Sample->EventName);
}
+ outs() << "PERF2BOLT: read " << NumTotalSamples << " basic samples\n";
return std::error_code();
}
@@ -1649,7 +1640,6 @@
for (auto &Sample : BasicSamples) {
const uint64_t PC = Sample.first;
const uint64_t HitCount = Sample.second;
- NumTotalSamples += HitCount;
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
if (!Func) {
OutOfRangeSamples += HitCount;
@@ -1658,7 +1648,6 @@
doSample(*Func, PC, HitCount);
}
- outs() << "PERF2BOLT: read " << NumTotalSamples << " samples\n";
printBasicSamplesDiagnostics(OutOfRangeSamples);
}
@@ -1722,49 +1711,6 @@
}
}
-std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
- outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
- NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
- TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
- while (hasData())
- if (std::error_code EC = parseAggregatedLBREntry())
- return EC;
-
- return std::error_code();
-}
-
-void DataAggregator::processPreAggregated() {
- outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
- NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
- TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
-
- for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
- switch (AggrEntry.EntryType) {
- case AggregatedLBREntry::BRANCH:
- case AggregatedLBREntry::TRACE:
- doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
- AggrEntry.Mispreds);
- NumTotalSamples += AggrEntry.Count;
- break;
- case AggregatedLBREntry::FT:
- case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
- LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
- ? AggrEntry.From.Offset
- : 0,
- AggrEntry.From.Offset, false};
- LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
- doTrace(First, Second, AggrEntry.Count);
- NumTraces += AggrEntry.Count;
- break;
- }
- }
- }
-
- outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
- << " aggregated LBR entries\n";
- printBranchSamplesDiagnostics();
-}
-
std::optional<int32_t> DataAggregator::parseCommExecEvent() {
size_t LineEnd = ParsingBuf.find_first_of("\n");
if (LineEnd == StringRef::npos) {
diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp
index f2e999b..4a92c9e 100644
--- a/bolt/lib/Profile/DataReader.cpp
+++ b/bolt/lib/Profile/DataReader.cpp
@@ -128,6 +128,13 @@
return Result;
}
+uint64_t FuncSampleData::getSamples() const {
+ uint64_t Result = 0;
+ for (const SampleInfo &I : Data)
+ Result += I.Hits;
+ return Result;
+}
+
void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
auto Iter = Index.find(Offset);
if (Iter == Index.end()) {
@@ -407,12 +414,12 @@
FuncBranchData *FBD = getBranchData(BF);
if (FBD) {
BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
- BF.RawBranchCount = FBD->getNumExecutedBranches();
+ BF.RawSampleCount = FBD->getNumExecutedBranches();
if (BF.ProfileMatchRatio == 1.0f) {
if (fetchProfileForOtherEntryPoints(BF)) {
BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
BF.ExecutionCount = FBD->ExecutionCount;
- BF.RawBranchCount = FBD->getNumExecutedBranches();
+ BF.RawSampleCount = FBD->getNumExecutedBranches();
}
return;
}
diff --git a/bolt/lib/Profile/Heatmap.cpp b/bolt/lib/Profile/Heatmap.cpp
index 5fc3e06..067c96c 100644
--- a/bolt/lib/Profile/Heatmap.cpp
+++ b/bolt/lib/Profile/Heatmap.cpp
@@ -284,64 +284,92 @@
Counts.clear();
}
-void Heatmap::printSectionHotness(StringRef FileName) const {
+void Heatmap::printSectionHotness(const Heatmap::SectionStatsMap &Stats,
+ StringRef FileName) const {
std::error_code EC;
raw_fd_ostream OS(FileName, EC, sys::fs::OpenFlags::OF_None);
if (EC) {
errs() << "error opening output file: " << EC.message() << '\n';
exit(1);
}
- printSectionHotness(OS);
+ printSectionHotness(Stats, OS);
}
-void Heatmap::printSectionHotness(raw_ostream &OS) const {
+StringMap<Heatmap::SectionStats> Heatmap::computeSectionStats() const {
uint64_t NumTotalCounts = 0;
- StringMap<uint64_t> SectionHotness;
+ StringMap<SectionStats> Stat;
unsigned TextSectionIndex = 0;
if (TextSections.empty())
- return;
+ return Stat;
uint64_t UnmappedHotness = 0;
auto RecordUnmappedBucket = [&](uint64_t Address, uint64_t Frequency) {
- errs() << "Couldn't map the address bucket [0x" << Twine::utohexstr(Address)
- << ", 0x" << Twine::utohexstr(Address + BucketSize)
- << "] containing " << Frequency
- << " samples to a text section in the binary.";
+ if (opts::Verbosity >= 1)
+ errs() << "Couldn't map the address bucket ["
+ << formatv("{0:x}, {1:x}", Address, Address + BucketSize)
+ << "] containing " << Frequency
+ << " samples to a text section in the binary.\n";
UnmappedHotness += Frequency;
};
- for (const std::pair<const uint64_t, uint64_t> &KV : Map) {
- NumTotalCounts += KV.second;
+ for (const auto [Bucket, Count] : Map) {
+ NumTotalCounts += Count;
// We map an address bucket to the first section (lowest address)
// overlapping with that bucket.
- auto Address = KV.first * BucketSize;
+ auto Address = Bucket * BucketSize;
while (TextSectionIndex < TextSections.size() &&
Address >= TextSections[TextSectionIndex].EndAddress)
TextSectionIndex++;
if (TextSectionIndex >= TextSections.size() ||
Address + BucketSize < TextSections[TextSectionIndex].BeginAddress) {
- RecordUnmappedBucket(Address, KV.second);
+ RecordUnmappedBucket(Address, Count);
continue;
}
- SectionHotness[TextSections[TextSectionIndex].Name] += KV.second;
+ SectionStats &SecStats = Stat[TextSections[TextSectionIndex].Name];
+ ++SecStats.Buckets;
+ SecStats.Samples += Count;
}
+ Stat["[total]"] = SectionStats{NumTotalCounts, Map.size()};
+ if (UnmappedHotness)
+ Stat["[unmapped]"] = SectionStats{UnmappedHotness, 0};
+ return Stat;
+}
+
+void Heatmap::printSectionHotness(const StringMap<SectionStats> &Stats,
+ raw_ostream &OS) const {
+ if (TextSections.empty())
+ return;
+
+ auto TotalIt = Stats.find("[total]");
+ assert(TotalIt != Stats.end() && "Malformed SectionStatsMap");
+ const uint64_t NumTotalCounts = TotalIt->second.Samples;
assert(NumTotalCounts > 0 &&
"total number of heatmap buckets should be greater than 0");
- OS << "Section Name, Begin Address, End Address, Percentage Hotness\n";
- for (auto &TextSection : TextSections) {
- OS << TextSection.Name << ", 0x"
- << Twine::utohexstr(TextSection.BeginAddress) << ", 0x"
- << Twine::utohexstr(TextSection.EndAddress) << ", "
- << format("%.4f",
- 100.0 * SectionHotness[TextSection.Name] / NumTotalCounts)
- << "\n";
+ OS << "Section Name, Begin Address, End Address, Percentage Hotness, "
+ << "Utilization Pct\n";
+ for (const auto [Name, Begin, End] : TextSections) {
+ uint64_t Samples = 0;
+ uint64_t Buckets = 0;
+ auto SectionIt = Stats.find(Name);
+ if (SectionIt != Stats.end()) {
+ Samples = SectionIt->second.Samples;
+ Buckets = SectionIt->second.Buckets;
+ }
+ const float RelHotness = 100. * Samples / NumTotalCounts;
+ const unsigned NumBuckets =
+ End / BucketSize + !!(End % BucketSize) - Begin / BucketSize;
+ const float BucketUtilization = 100. * Buckets / NumBuckets;
+ OS << formatv("{0}, {1:x}, {2:x}, {3:f4}, {4:f4}\n", Name, Begin, End,
+ RelHotness, BucketUtilization);
}
- if (UnmappedHotness > 0)
- OS << "[unmapped], 0x0, 0x0, "
- << format("%.4f", 100.0 * UnmappedHotness / NumTotalCounts) << "\n";
+ auto UnmappedIt = Stats.find("[unmapped]");
+ if (UnmappedIt == Stats.end())
+ return;
+ const float UnmappedPct = 100. * UnmappedIt->second.Samples / NumTotalCounts;
+ OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0\n", UnmappedPct);
}
} // namespace bolt
} // namespace llvm
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index f5636bf..88b806c 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -177,11 +177,11 @@
BF.setExecutionCount(YamlBF.ExecCount);
- uint64_t FuncRawBranchCount = 0;
+ uint64_t FuncRawSampleCount = 0;
for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors)
- FuncRawBranchCount += YamlSI.Count;
- BF.setRawBranchCount(FuncRawBranchCount);
+ FuncRawSampleCount += YamlSI.Count;
+ BF.setRawSampleCount(FuncRawSampleCount);
if (BF.empty())
return true;
diff --git a/bolt/test/X86/heatmap-preagg.test b/bolt/test/X86/heatmap-preagg.test
new file mode 100644
index 0000000..660d37f
--- /dev/null
+++ b/bolt/test/X86/heatmap-preagg.test
@@ -0,0 +1,35 @@
+## Test heatmap with pre-aggregated profile
+
+RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
+## Non-BOLTed input binary
+RUN: llvm-bolt-heatmap %t.exe -o %t --pa -p %p/Inputs/blarge_new.preagg.txt \
+RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP %s
+RUN: FileCheck %s --check-prefix CHECK-SEC-HOT --input-file %t-section-hotness.csv
+
+## BOLTed input binary
+RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \
+RUN: --reorder-blocks=ext-tsp --split-functions --split-strategy=cdsplit \
+RUN: --reorder-functions=cdsort --enable-bat --dyno-stats --skip-funcs=main
+RUN: llvm-bolt-heatmap %t.out -o %t2 --pa -p %p/Inputs/blarge_new_bat.preagg.txt \
+RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP-BAT %s
+RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT --input-file %t2-section-hotness.csv
+
+CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries
+CHECK-HEATMAP: HEATMAP: invalid traces: 1
+
+CHECK-SEC-HOT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct
+CHECK-SEC-HOT-NEXT: .init, 0x401000, 0x40101b, 16.8545, 100.0000
+CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667
+CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064
+CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000, 0.0000
+
+CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries
+CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2
+
+CHECK-SEC-HOT-BAT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct
+CHECK-SEC-HOT-BAT-NEXT: .init, 0x401000, 0x40101b, 17.2888, 100.0000
+CHECK-SEC-HOT-BAT-NEXT: .plt, 0x401020, 0x4010b0, 5.6132, 66.6667
+CHECK-SEC-HOT-BAT-NEXT: .bolt.org.text, 0x4010b0, 0x401c25, 38.3385
+CHECK-SEC-HOT-BAT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000, 0.0000
+CHECK-SEC-HOT-BAT-NEXT: .text, 0x800000, 0x8002cc, 38.7595, 91.6667
+CHECK-SEC-HOT-BAT-NEXT: .text.cold, 0x800300, 0x800415, 0.0000, 0.0000
diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test
index 7bec442..44111de 100644
--- a/bolt/test/perf2bolt/perf_test.test
+++ b/bolt/test/perf2bolt/perf_test.test
@@ -8,6 +8,7 @@
CHECK-NOT: PERF2BOLT-ERROR
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
+CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts.
RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4