| //===- bolt/Profile/StaleProfileMatching.cpp - Profile data matching ----===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // BOLT often has to deal with profiles collected on binaries built from several |
| // revisions behind release. As a result, a certain percentage of functions is |
| // considered stale and not optimized. This file implements an ability to match |
| // profile to functions that are not 100% binary identical, and thus, increasing |
| // the optimization coverage and boost the performance of applications. |
| // |
| // The algorithm consists of two phases: matching and inference: |
| // - At the matching phase, we try to "guess" as many block and jump counts from |
| // the stale profile as possible. To this end, the content of each basic block |
| // is hashed and stored in the (yaml) profile. When BOLT optimizes a binary, |
| // it computes block hashes and identifies the corresponding entries in the |
| // stale profile. It yields a partial profile for every CFG in the binary. |
| // - At the inference phase, we employ a network flow-based algorithm (profi) to |
| // reconstruct "realistic" block and jump counts from the partial profile |
| // generated at the first stage. In practice, we don't always produce proper |
| // profile data but the majority (e.g., >90%) of CFGs get the correct counts. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "bolt/Core/HashUtilities.h" |
| #include "bolt/Profile/YAMLProfileReader.h" |
| #include "llvm/ADT/Bitfields.h" |
| #include "llvm/ADT/Hashing.h" |
| #include "llvm/MC/MCPseudoProbe.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Timer.h" |
| #include "llvm/Support/xxhash.h" |
| #include "llvm/Transforms/Utils/SampleProfileInference.h" |
| |
| #include <queue> |
| |
| using namespace llvm; |
| |
| #undef DEBUG_TYPE |
| #define DEBUG_TYPE "bolt-prof" |
| |
| namespace opts { |
| |
| extern cl::opt<bool> TimeRewrite; |
| extern cl::OptionCategory BoltOptCategory; |
| |
| cl::opt<bool> |
| InferStaleProfile("infer-stale-profile", |
| cl::desc("Infer counts from stale profile data."), |
| cl::init(false), cl::Hidden, cl::cat(BoltOptCategory)); |
| |
| static cl::opt<unsigned> StaleMatchingMinMatchedBlock( |
| "stale-matching-min-matched-block", |
| cl::desc("Percentage threshold of matched basic blocks at which stale " |
| "profile inference is executed."), |
| cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); |
| |
| static cl::opt<unsigned> StaleMatchingMaxFuncSize( |
| "stale-matching-max-func-size", |
| cl::desc("The maximum size of a function to consider for inference."), |
| cl::init(10000), cl::Hidden, cl::cat(BoltOptCategory)); |
| |
| // Parameters of the profile inference algorithm. The default values are tuned |
| // on several benchmarks. |
| static cl::opt<bool> StaleMatchingEvenFlowDistribution( |
| "stale-matching-even-flow-distribution", |
| cl::desc("Try to evenly distribute flow when there are multiple equally " |
| "likely options."), |
| cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| |
| static cl::opt<bool> StaleMatchingRebalanceUnknown( |
| "stale-matching-rebalance-unknown", |
| cl::desc("Evenly re-distribute flow among unknown subgraphs."), |
| cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| |
| static cl::opt<bool> StaleMatchingJoinIslands( |
| "stale-matching-join-islands", |
| cl::desc("Join isolated components having positive flow."), cl::init(true), |
| cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| |
| static cl::opt<unsigned> StaleMatchingCostBlockInc( |
| "stale-matching-cost-block-inc", |
| cl::desc("The cost of increasing a block count by one."), cl::init(150), |
| cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| |
| static cl::opt<unsigned> StaleMatchingCostBlockDec( |
| "stale-matching-cost-block-dec", |
| cl::desc("The cost of decreasing a block count by one."), cl::init(150), |
| cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| |
| static cl::opt<unsigned> StaleMatchingCostJumpInc( |
| "stale-matching-cost-jump-inc", |
| cl::desc("The cost of increasing a jump count by one."), cl::init(150), |
| cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| |
| static cl::opt<unsigned> StaleMatchingCostJumpDec( |
| "stale-matching-cost-jump-dec", |
| cl::desc("The cost of decreasing a jump count by one."), cl::init(150), |
| cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| |
| static cl::opt<unsigned> StaleMatchingCostBlockUnknownInc( |
| "stale-matching-cost-block-unknown-inc", |
| cl::desc("The cost of increasing an unknown block count by one."), |
| cl::init(1), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| |
| static cl::opt<unsigned> StaleMatchingCostJumpUnknownInc( |
| "stale-matching-cost-jump-unknown-inc", |
| cl::desc("The cost of increasing an unknown jump count by one."), |
| cl::init(140), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| |
| static cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc( |
| "stale-matching-cost-jump-unknown-ft-inc", |
| cl::desc( |
| "The cost of increasing an unknown fall-through jump count by one."), |
| cl::init(3), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| |
| cl::opt<bool> StaleMatchingWithPseudoProbes( |
| "stale-matching-with-pseudo-probes", |
| cl::desc("Turns on stale matching with block pseudo probes."), |
| cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| |
| } // namespace opts |
| |
| namespace llvm { |
| namespace bolt { |
| |
| /// An object wrapping several components of a basic block hash. The combined |
| /// (blended) hash is represented and stored as one uint64_t, while individual |
| /// components are of smaller size (e.g., uint16_t or uint8_t). |
| struct BlendedBlockHash { |
| private: |
| using ValueOffset = Bitfield::Element<uint16_t, 0, 16>; |
| using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>; |
| using ValueInstr = Bitfield::Element<uint16_t, 32, 16>; |
| using ValuePred = Bitfield::Element<uint8_t, 48, 8>; |
| using ValueSucc = Bitfield::Element<uint8_t, 56, 8>; |
| |
| public: |
| explicit BlendedBlockHash() {} |
| |
| explicit BlendedBlockHash(uint64_t Hash) { |
| Offset = Bitfield::get<ValueOffset>(Hash); |
| OpcodeHash = Bitfield::get<ValueOpcode>(Hash); |
| InstrHash = Bitfield::get<ValueInstr>(Hash); |
| PredHash = Bitfield::get<ValuePred>(Hash); |
| SuccHash = Bitfield::get<ValueSucc>(Hash); |
| } |
| |
| /// Combine the blended hash into uint64_t. |
| uint64_t combine() const { |
| uint64_t Hash = 0; |
| Bitfield::set<ValueOffset>(Hash, Offset); |
| Bitfield::set<ValueOpcode>(Hash, OpcodeHash); |
| Bitfield::set<ValueInstr>(Hash, InstrHash); |
| Bitfield::set<ValuePred>(Hash, PredHash); |
| Bitfield::set<ValueSucc>(Hash, SuccHash); |
| return Hash; |
| } |
| |
| /// Compute a distance between two given blended hashes. The smaller the |
| /// distance, the more similar two blocks are. For identical basic blocks, |
| /// the distance is zero. |
| uint64_t distance(const BlendedBlockHash &BBH) const { |
| assert(OpcodeHash == BBH.OpcodeHash && |
| "incorrect blended hash distance computation"); |
| uint64_t Dist = 0; |
| // Account for NeighborHash |
| Dist += SuccHash == BBH.SuccHash ? 0 : 1; |
| Dist += PredHash == BBH.PredHash ? 0 : 1; |
| Dist <<= 16; |
| // Account for InstrHash |
| Dist += InstrHash == BBH.InstrHash ? 0 : 1; |
| Dist <<= 16; |
| // Account for Offset |
| Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset); |
| return Dist; |
| } |
| |
| /// The offset of the basic block from the function start. |
| uint16_t Offset{0}; |
| /// (Loose) Hash of the basic block instructions, excluding operands. |
| uint16_t OpcodeHash{0}; |
| /// (Strong) Hash of the basic block instructions, including opcodes and |
| /// operands. |
| uint16_t InstrHash{0}; |
| /// (Loose) Hashes of the predecessors of the basic block. |
| uint8_t PredHash{0}; |
| /// (Loose) Hashes of the successors of the basic block. |
| uint8_t SuccHash{0}; |
| }; |
| |
| /// The object is used to identify and match basic blocks in a BinaryFunction |
| /// given their hashes computed on a binary built from several revisions behind |
| /// release. |
| class StaleMatcher { |
| public: |
| /// Initialize stale matcher. |
| void init(const std::vector<FlowBlock *> &Blocks, |
| const std::vector<BlendedBlockHash> &Hashes, |
| const std::vector<uint64_t> &CallHashes) { |
| assert(Blocks.size() == Hashes.size() && |
| Hashes.size() == CallHashes.size() && |
| "incorrect matcher initialization"); |
| for (size_t I = 0; I < Blocks.size(); I++) { |
| FlowBlock *Block = Blocks[I]; |
| uint16_t OpHash = Hashes[I].OpcodeHash; |
| OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block)); |
| if (CallHashes[I]) |
| CallHashToBlocks[CallHashes[I]].push_back( |
| std::make_pair(Hashes[I], Block)); |
| } |
| } |
| |
| /// Creates a mapping from a pseudo probe to a flow block. |
| void mapProbeToBB(const MCDecodedPseudoProbe *Probe, FlowBlock *Block) { |
| BBPseudoProbeToBlock[Probe] = Block; |
| } |
| |
| enum MatchMethod : char { |
| MATCH_EXACT = 0, |
| MATCH_PROBE_EXACT, |
| MATCH_PROBE_LOOSE, |
| MATCH_OPCODE, |
| MATCH_CALL, |
| NO_MATCH |
| }; |
| |
| /// Find the most similar flow block for a profile block given blended hash. |
| std::pair<const FlowBlock *, MatchMethod> |
| matchBlockStrict(BlendedBlockHash BlendedHash) { |
| const auto &[Block, ExactHash] = matchWithOpcodes(BlendedHash); |
| if (Block && ExactHash) |
| return {Block, MATCH_EXACT}; |
| return {nullptr, NO_MATCH}; |
| } |
| |
| /// Find the most similar flow block for a profile block given pseudo probes. |
| std::pair<const FlowBlock *, MatchMethod> matchBlockProbe( |
| const ArrayRef<yaml::bolt::PseudoProbeInfo> PseudoProbes, |
| const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) { |
| const auto &[ProbeBlock, ExactProbe] = |
| matchWithPseudoProbes(PseudoProbes, InlineTreeNodeMap); |
| if (ProbeBlock) |
| return {ProbeBlock, ExactProbe ? MATCH_PROBE_EXACT : MATCH_PROBE_LOOSE}; |
| return {nullptr, NO_MATCH}; |
| } |
| |
| /// Find the most similar flow block for a profile block given its hashes. |
| std::pair<const FlowBlock *, MatchMethod> |
| matchBlockLoose(BlendedBlockHash BlendedHash, uint64_t CallHash) { |
| if (const FlowBlock *CallBlock = matchWithCalls(BlendedHash, CallHash)) |
| return {CallBlock, MATCH_CALL}; |
| if (const FlowBlock *OpcodeBlock = matchWithOpcodes(BlendedHash).first) |
| return {OpcodeBlock, MATCH_OPCODE}; |
| return {nullptr, NO_MATCH}; |
| } |
| |
| /// Returns true if the two basic blocks (in the binary and in the profile) |
| /// corresponding to the given hashes are matched to each other with a high |
| /// confidence. |
| static bool isHighConfidenceMatch(BlendedBlockHash Hash1, |
| BlendedBlockHash Hash2) { |
| return Hash1.InstrHash == Hash2.InstrHash; |
| } |
| |
| private: |
| using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>; |
| std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks; |
| std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks; |
| DenseMap<const MCDecodedPseudoProbe *, FlowBlock *> BBPseudoProbeToBlock; |
| |
| // Uses OpcodeHash to find the most similar block for a given hash. |
| std::pair<const FlowBlock *, bool> |
| matchWithOpcodes(BlendedBlockHash BlendedHash) const { |
| auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash); |
| if (BlockIt == OpHashToBlocks.end()) |
| return {nullptr, false}; |
| FlowBlock *BestBlock = nullptr; |
| uint64_t BestDist = std::numeric_limits<uint64_t>::max(); |
| BlendedBlockHash BestHash; |
| for (const auto &[Hash, Block] : BlockIt->second) { |
| uint64_t Dist = Hash.distance(BlendedHash); |
| if (BestBlock == nullptr || Dist < BestDist) { |
| BestDist = Dist; |
| BestBlock = Block; |
| BestHash = Hash; |
| } |
| } |
| return {BestBlock, isHighConfidenceMatch(BestHash, BlendedHash)}; |
| } |
| |
| // Uses CallHash to find the most similar block for a given hash. |
| const FlowBlock *matchWithCalls(BlendedBlockHash BlendedHash, |
| uint64_t CallHash) const { |
| if (!CallHash) |
| return nullptr; |
| auto BlockIt = CallHashToBlocks.find(CallHash); |
| if (BlockIt == CallHashToBlocks.end()) |
| return nullptr; |
| FlowBlock *BestBlock = nullptr; |
| uint64_t BestDist = std::numeric_limits<uint64_t>::max(); |
| for (const auto &[Hash, Block] : BlockIt->second) { |
| uint64_t Dist = Hash.OpcodeHash > BlendedHash.OpcodeHash |
| ? Hash.OpcodeHash - BlendedHash.OpcodeHash |
| : BlendedHash.OpcodeHash - Hash.OpcodeHash; |
| if (BestBlock == nullptr || Dist < BestDist) { |
| BestDist = Dist; |
| BestBlock = Block; |
| } |
| } |
| return BestBlock; |
| } |
| |
| /// Matches a profile block with a binary block based on pseudo probes. |
| /// Returns the best matching block (or nullptr) and whether the match is |
| /// unambiguous. |
| std::pair<const FlowBlock *, bool> matchWithPseudoProbes( |
| const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes, |
| const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) const { |
| |
| if (!opts::StaleMatchingWithPseudoProbes) |
| return {nullptr, false}; |
| |
| DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount; |
| |
| auto matchProfileProbeToBlock = [&](uint32_t NodeId, |
| uint64_t ProbeId) -> const FlowBlock * { |
| const MCDecodedPseudoProbeInlineTree *BinaryNode = |
| InlineTreeNodeMap.getInlineTreeNode(NodeId); |
| if (!BinaryNode) |
| return nullptr; |
| const MCDecodedPseudoProbe Dummy(0, ProbeId, PseudoProbeType::Block, 0, 0, |
| nullptr); |
| ArrayRef<MCDecodedPseudoProbe> BinaryProbes = BinaryNode->getProbes(); |
| auto BinaryProbeIt = llvm::lower_bound( |
| BinaryProbes, Dummy, [](const auto &LHS, const auto &RHS) { |
| return LHS.getIndex() < RHS.getIndex(); |
| }); |
| if (BinaryProbeIt == BinaryNode->getProbes().end() || |
| BinaryProbeIt->getIndex() != ProbeId) |
| return nullptr; |
| auto It = BBPseudoProbeToBlock.find(&*BinaryProbeIt); |
| if (It == BBPseudoProbeToBlock.end()) |
| return nullptr; |
| return It->second; |
| }; |
| |
| auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo |
| &ProfileProbe, |
| uint32_t NodeId) { |
| for (uint64_t Index = 0; Index < 64; ++Index) |
| if (ProfileProbe.BlockMask & 1ull << Index) |
| ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, Index + 1)]; |
| for (const auto &ProfileProbes : |
| {ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes, |
| ProfileProbe.CallProbes}) |
| for (uint64_t ProfileProbe : ProfileProbes) |
| ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, ProfileProbe)]; |
| }; |
| |
| for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) { |
| if (!ProfileProbe.InlineTreeNodes.empty()) |
| for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes) |
| matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode); |
| else |
| matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex); |
| } |
| uint32_t BestMatchCount = 0; |
| uint32_t TotalMatchCount = 0; |
| const FlowBlock *BestMatchBlock = nullptr; |
| for (const auto &[FlowBlock, Count] : FlowBlockMatchCount) { |
| TotalMatchCount += Count; |
| if (Count < BestMatchCount || (Count == BestMatchCount && BestMatchBlock)) |
| continue; |
| BestMatchBlock = FlowBlock; |
| BestMatchCount = Count; |
| } |
| return {BestMatchBlock, BestMatchCount == TotalMatchCount}; |
| } |
| }; |
| |
| void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const { |
| if (size() == 0) |
| return; |
| |
| assert(hasCFG() && "the function is expected to have CFG"); |
| |
| std::vector<BlendedBlockHash> BlendedHashes(BasicBlocks.size()); |
| std::vector<uint64_t> OpcodeHashes(BasicBlocks.size()); |
| // Initialize hash components. |
| for (size_t I = 0; I < BasicBlocks.size(); I++) { |
| const BinaryBasicBlock *BB = BasicBlocks[I]; |
| assert(BB->getIndex() == I && "incorrect block index"); |
| BlendedHashes[I].Offset = BB->getOffset(); |
| // Hashing complete instructions. |
| std::string InstrHashStr = hashBlock( |
| BC, *BB, [&](const MCOperand &Op) { return hashInstOperand(BC, Op); }); |
| if (HashFunction == HashFunction::StdHash) { |
| uint64_t InstrHash = std::hash<std::string>{}(InstrHashStr); |
| BlendedHashes[I].InstrHash = (uint16_t)hash_value(InstrHash); |
| } else if (HashFunction == HashFunction::XXH3) { |
| uint64_t InstrHash = llvm::xxh3_64bits(InstrHashStr); |
| BlendedHashes[I].InstrHash = (uint16_t)InstrHash; |
| } else { |
| llvm_unreachable("Unhandled HashFunction"); |
| } |
| // Hashing opcodes. |
| std::string OpcodeHashStr = hashBlockLoose(BC, *BB); |
| if (HashFunction == HashFunction::StdHash) { |
| OpcodeHashes[I] = std::hash<std::string>{}(OpcodeHashStr); |
| BlendedHashes[I].OpcodeHash = (uint16_t)hash_value(OpcodeHashes[I]); |
| } else if (HashFunction == HashFunction::XXH3) { |
| OpcodeHashes[I] = llvm::xxh3_64bits(OpcodeHashStr); |
| BlendedHashes[I].OpcodeHash = (uint16_t)OpcodeHashes[I]; |
| } else { |
| llvm_unreachable("Unhandled HashFunction"); |
| } |
| } |
| |
| // Initialize neighbor hash. |
| for (size_t I = 0; I < BasicBlocks.size(); I++) { |
| const BinaryBasicBlock *BB = BasicBlocks[I]; |
| // Append hashes of successors. |
| uint64_t Hash = 0; |
| for (BinaryBasicBlock *SuccBB : BB->successors()) { |
| uint64_t SuccHash = OpcodeHashes[SuccBB->getIndex()]; |
| Hash = hashing::detail::hash_16_bytes(Hash, SuccHash); |
| } |
| if (HashFunction == HashFunction::StdHash) { |
| // Compatibility with old behavior. |
| BlendedHashes[I].SuccHash = (uint8_t)hash_value(Hash); |
| } else { |
| BlendedHashes[I].SuccHash = (uint8_t)Hash; |
| } |
| |
| // Append hashes of predecessors. |
| Hash = 0; |
| for (BinaryBasicBlock *PredBB : BB->predecessors()) { |
| uint64_t PredHash = OpcodeHashes[PredBB->getIndex()]; |
| Hash = hashing::detail::hash_16_bytes(Hash, PredHash); |
| } |
| if (HashFunction == HashFunction::StdHash) { |
| // Compatibility with old behavior. |
| BlendedHashes[I].PredHash = (uint8_t)hash_value(Hash); |
| } else { |
| BlendedHashes[I].PredHash = (uint8_t)Hash; |
| } |
| } |
| |
| // Assign hashes. |
| for (size_t I = 0; I < BasicBlocks.size(); I++) { |
| const BinaryBasicBlock *BB = BasicBlocks[I]; |
| BB->setHash(BlendedHashes[I].combine()); |
| } |
| } |
| // TODO: mediate the difference between flow function construction here in BOLT |
| // and in the compiler by splitting blocks with exception throwing calls at the |
| // call and adding the landing pad as the successor. |
| /// Create a wrapper flow function to use with the profile inference algorithm, |
| /// and initialize its jumps and metadata. |
| FlowFunction |
| createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) { |
| FlowFunction Func; |
| |
| // Add a special "dummy" source so that there is always a unique entry point. |
| FlowBlock EntryBlock; |
| EntryBlock.Index = 0; |
| Func.Blocks.push_back(EntryBlock); |
| |
| // Create FlowBlock for every basic block in the binary function. |
| for (const BinaryBasicBlock *BB : BlockOrder) { |
| Func.Blocks.emplace_back(); |
| FlowBlock &Block = Func.Blocks.back(); |
| Block.Index = Func.Blocks.size() - 1; |
| (void)BB; |
| assert(Block.Index == BB->getIndex() + 1 && |
| "incorrectly assigned basic block index"); |
| } |
| |
| // Add a special "dummy" sink block so there is always a unique sink. |
| FlowBlock SinkBlock; |
| SinkBlock.Index = Func.Blocks.size(); |
| Func.Blocks.push_back(SinkBlock); |
| |
| // Create FlowJump for each jump between basic blocks in the binary function. |
| std::vector<uint64_t> InDegree(Func.Blocks.size(), 0); |
| for (const BinaryBasicBlock *SrcBB : BlockOrder) { |
| std::unordered_set<const BinaryBasicBlock *> UniqueSuccs; |
| // Collect regular jumps |
| for (const BinaryBasicBlock *DstBB : SrcBB->successors()) { |
| // Ignoring parallel edges |
| if (UniqueSuccs.find(DstBB) != UniqueSuccs.end()) |
| continue; |
| |
| Func.Jumps.emplace_back(); |
| FlowJump &Jump = Func.Jumps.back(); |
| Jump.Source = SrcBB->getIndex() + 1; |
| Jump.Target = DstBB->getIndex() + 1; |
| InDegree[Jump.Target]++; |
| UniqueSuccs.insert(DstBB); |
| } |
| // TODO: set jump from exit block to landing pad to Unlikely. |
| // If the block is an exit, add a dummy edge from it to the sink block. |
| if (UniqueSuccs.empty()) { |
| Func.Jumps.emplace_back(); |
| FlowJump &Jump = Func.Jumps.back(); |
| Jump.Source = SrcBB->getIndex() + 1; |
| Jump.Target = Func.Blocks.size() - 1; |
| InDegree[Jump.Target]++; |
| } |
| |
| // Collect jumps to landing pads |
| for (const BinaryBasicBlock *DstBB : SrcBB->landing_pads()) { |
| // Ignoring parallel edges |
| if (UniqueSuccs.find(DstBB) != UniqueSuccs.end()) |
| continue; |
| |
| Func.Jumps.emplace_back(); |
| FlowJump &Jump = Func.Jumps.back(); |
| Jump.Source = SrcBB->getIndex() + 1; |
| Jump.Target = DstBB->getIndex() + 1; |
| InDegree[Jump.Target]++; |
| UniqueSuccs.insert(DstBB); |
| } |
| } |
| |
| // Add dummy edges to the extra sources. If there are multiple entry blocks, |
| // add an unlikely edge from 0 to the subsequent ones. Skips the sink block. |
| assert(InDegree[0] == 0 && "dummy entry blocks shouldn't have predecessors"); |
| for (uint64_t I = 1; I < Func.Blocks.size() - 1; I++) { |
| const BinaryBasicBlock *BB = BlockOrder[I - 1]; |
| if (BB->isEntryPoint() || InDegree[I] == 0) { |
| Func.Jumps.emplace_back(); |
| FlowJump &Jump = Func.Jumps.back(); |
| Jump.Source = 0; |
| Jump.Target = I; |
| if (!BB->isEntryPoint()) |
| Jump.IsUnlikely = true; |
| } |
| } |
| |
| // Create necessary metadata for the flow function |
| for (FlowJump &Jump : Func.Jumps) { |
| assert(Jump.Source < Func.Blocks.size()); |
| Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump); |
| assert(Jump.Target < Func.Blocks.size()); |
| Func.Blocks[Jump.Target].PredJumps.push_back(&Jump); |
| } |
| return Func; |
| } |
| |
| /// Assign initial block/jump weights based on the stale profile data. The goal |
| /// is to extract as much information from the stale profile as possible. Here |
| /// we assume that each basic block is specified via a hash value computed from |
| /// its content and the hashes of the unchanged basic blocks stay the same |
| /// across different revisions of the binary. Blocks may also have pseudo probe |
| /// information in the profile and the binary which is used for matching. |
| /// Whenever there is a count in the profile with the hash corresponding to one |
| /// of the basic blocks in the binary, the count is "matched" to the block. |
| /// Similarly, if both the source and the target of a count in the profile are |
| /// matched to a jump in the binary, the count is recorded in CFG. |
| size_t matchWeights( |
| BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder, |
| const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func, |
| HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF, |
| const BinaryFunction &BF, |
| const ArrayRef<YAMLProfileReader::ProbeMatchSpec> ProbeMatchSpecs) { |
| |
| assert(Func.Blocks.size() == BlockOrder.size() + 2); |
| |
| std::vector<uint64_t> CallHashes; |
| std::vector<FlowBlock *> Blocks; |
| std::vector<BlendedBlockHash> BlendedHashes; |
| for (uint64_t I = 0; I < BlockOrder.size(); I++) { |
| const BinaryBasicBlock *BB = BlockOrder[I]; |
| assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock"); |
| |
| std::string CallHashStr = hashBlockCalls(BC, *BB); |
| if (CallHashStr.empty()) { |
| CallHashes.push_back(0); |
| } else { |
| if (HashFunction == HashFunction::StdHash) |
| CallHashes.push_back(std::hash<std::string>{}(CallHashStr)); |
| else if (HashFunction == HashFunction::XXH3) |
| CallHashes.push_back(llvm::xxh3_64bits(CallHashStr)); |
| else |
| llvm_unreachable("Unhandled HashFunction"); |
| } |
| |
| Blocks.push_back(&Func.Blocks[I + 1]); |
| BlendedBlockHash BlendedHash(BB->getHash()); |
| BlendedHashes.push_back(BlendedHash); |
| LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = " |
| << Twine::utohexstr(BB->getHash()) << "\n"); |
| } |
| StaleMatcher Matcher; |
| // Collects function pseudo probes for use in the StaleMatcher. |
| if (opts::StaleMatchingWithPseudoProbes) { |
| const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); |
| assert(Decoder && |
| "If pseudo probes are in use, pseudo probe decoder should exist"); |
| const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap(); |
| const uint64_t FuncAddr = BF.getAddress(); |
| for (const MCDecodedPseudoProbe &Probe : |
| ProbeMap.find(FuncAddr, FuncAddr + BF.getSize())) |
| if (const BinaryBasicBlock *BB = |
| BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr)) |
| Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]); |
| } |
| Matcher.init(Blocks, BlendedHashes, CallHashes); |
| |
| using FlowBlockTy = |
| std::pair<const FlowBlock *, const yaml::bolt::BinaryBasicBlockProfile *>; |
| using ProfileBlockMatchMap = DenseMap<uint32_t, FlowBlockTy>; |
| // Binary profile => block index => matched block + its block profile |
| DenseMap<const yaml::bolt::BinaryFunctionProfile *, ProfileBlockMatchMap> |
| MatchedBlocks; |
| |
| // Map of FlowBlock and matching method. |
| DenseMap<const FlowBlock *, StaleMatcher::MatchMethod> MatchedFlowBlocks; |
| |
| auto addMatchedBlock = |
| [&](std::pair<const FlowBlock *, StaleMatcher::MatchMethod> BlockMethod, |
| const yaml::bolt::BinaryFunctionProfile &YamlBP, |
| const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { |
| const auto &[MatchedBlock, Method] = BlockMethod; |
| if (!MatchedBlock) |
| return; |
| // Don't override earlier matches |
| if (MatchedFlowBlocks.contains(MatchedBlock)) |
| return; |
| MatchedFlowBlocks.try_emplace(MatchedBlock, Method); |
| MatchedBlocks[&YamlBP][YamlBB.Index] = {MatchedBlock, &YamlBB}; |
| }; |
| |
| // Match blocks from the profile to the blocks in CFG by strict hash. |
| for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { |
| // Update matching stats. |
| ++BC.Stats.NumStaleBlocks; |
| BC.Stats.StaleSampleCount += YamlBB.ExecCount; |
| |
| assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); |
| BlendedBlockHash YamlHash(YamlBB.Hash); |
| addMatchedBlock(Matcher.matchBlockStrict(YamlHash), YamlBF, YamlBB); |
| } |
| // Match blocks from the profile to the blocks in CFG by pseudo probes. |
| for (const auto &[InlineNodeMap, YamlBP] : ProbeMatchSpecs) { |
| for (const yaml::bolt::BinaryBasicBlockProfile &BB : YamlBP.get().Blocks) |
| if (!BB.PseudoProbes.empty()) |
| addMatchedBlock(Matcher.matchBlockProbe(BB.PseudoProbes, InlineNodeMap), |
| YamlBP, BB); |
| } |
| // Match blocks from the profile to the blocks in CFG with loose methods. |
| for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { |
| assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); |
| BlendedBlockHash YamlHash(YamlBB.Hash); |
| |
| std::string CallHashStr = hashBlockCalls(IdToYamlBF, YamlBB); |
| uint64_t CallHash = 0; |
| if (!CallHashStr.empty()) { |
| if (HashFunction == HashFunction::StdHash) |
| CallHash = std::hash<std::string>{}(CallHashStr); |
| else if (HashFunction == HashFunction::XXH3) |
| CallHash = llvm::xxh3_64bits(CallHashStr); |
| else |
| llvm_unreachable("Unhandled HashFunction"); |
| } |
| auto [MatchedBlock, Method] = Matcher.matchBlockLoose(YamlHash, CallHash); |
| if (MatchedBlock == nullptr && YamlBB.Index == 0) { |
| MatchedBlock = Blocks[0]; |
| // Report as loose match |
| Method = StaleMatcher::MATCH_OPCODE; |
| } |
| if (!MatchedBlock) { |
| LLVM_DEBUG(dbgs() << "Couldn't match yaml block (bid = " << YamlBB.Index |
| << ")" << " with hash " << Twine::utohexstr(YamlBB.Hash) |
| << "\n"); |
| continue; |
| } |
| addMatchedBlock({MatchedBlock, Method}, YamlBF, YamlBB); |
| } |
| |
| // Match jumps from the profile to the jumps from CFG |
| std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0); |
| std::vector<uint64_t> InWeight(Func.Blocks.size(), 0); |
| |
| for (const auto &[YamlBF, MatchMap] : MatchedBlocks) { |
| for (const auto &[YamlBBIdx, FlowBlockProfile] : MatchMap) { |
| const auto &[MatchedBlock, YamlBB] = FlowBlockProfile; |
| StaleMatcher::MatchMethod Method = MatchedFlowBlocks.lookup(MatchedBlock); |
| BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1]; |
| LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBBIdx << ")" |
| << " with hash " << Twine::utohexstr(YamlBB->Hash) |
| << " to BB (index = " << MatchedBlock->Index - 1 << ")" |
| << " with hash " << Twine::utohexstr(BinHash.combine()) |
| << "\n"); |
| (void)BinHash; |
| uint64_t ExecCount = YamlBB->ExecCount; |
| // Update matching stats accounting for the matched block. |
| switch (Method) { |
| case StaleMatcher::MATCH_EXACT: |
| ++BC.Stats.NumExactMatchedBlocks; |
| BC.Stats.ExactMatchedSampleCount += ExecCount; |
| LLVM_DEBUG(dbgs() << " exact match\n"); |
| break; |
| case StaleMatcher::MATCH_PROBE_EXACT: |
| ++BC.Stats.NumPseudoProbeExactMatchedBlocks; |
| BC.Stats.PseudoProbeExactMatchedSampleCount += ExecCount; |
| LLVM_DEBUG(dbgs() << " exact pseudo probe match\n"); |
| break; |
| case StaleMatcher::MATCH_PROBE_LOOSE: |
| ++BC.Stats.NumPseudoProbeLooseMatchedBlocks; |
| BC.Stats.PseudoProbeLooseMatchedSampleCount += ExecCount; |
| LLVM_DEBUG(dbgs() << " loose pseudo probe match\n"); |
| break; |
| case StaleMatcher::MATCH_CALL: |
| ++BC.Stats.NumCallMatchedBlocks; |
| BC.Stats.CallMatchedSampleCount += ExecCount; |
| LLVM_DEBUG(dbgs() << " call match\n"); |
| break; |
| case StaleMatcher::MATCH_OPCODE: |
| ++BC.Stats.NumLooseMatchedBlocks; |
| BC.Stats.LooseMatchedSampleCount += ExecCount; |
| LLVM_DEBUG(dbgs() << " loose match\n"); |
| break; |
| case StaleMatcher::NO_MATCH: |
| LLVM_DEBUG(dbgs() << " no match\n"); |
| } |
| } |
| |
| for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF->Blocks) { |
| for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) { |
| if (YamlSI.Count == 0) |
| continue; |
| |
| // Try to find the jump for a given (src, dst) pair from the profile and |
| // assign the jump weight based on the profile count |
| const uint64_t SrcIndex = YamlBB.Index; |
| const uint64_t DstIndex = YamlSI.Index; |
| |
| const FlowBlock *MatchedSrcBlock = MatchMap.lookup(SrcIndex).first; |
| const FlowBlock *MatchedDstBlock = MatchMap.lookup(DstIndex).first; |
| |
| if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) { |
| // Find a jump between the two blocks |
| FlowJump *Jump = nullptr; |
| for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) { |
| if (SuccJump->Target == MatchedDstBlock->Index) { |
| Jump = SuccJump; |
| break; |
| } |
| } |
| // Assign the weight, if the corresponding jump is found |
| if (Jump != nullptr) { |
| Jump->Weight = YamlSI.Count; |
| Jump->HasUnknownWeight = false; |
| } |
| } |
| // Assign the weight for the src block, if it is found |
| if (MatchedSrcBlock != nullptr) |
| OutWeight[MatchedSrcBlock->Index] += YamlSI.Count; |
| // Assign the weight for the dst block, if it is found |
| if (MatchedDstBlock != nullptr) |
| InWeight[MatchedDstBlock->Index] += YamlSI.Count; |
| } |
| } |
| } |
| |
| // Assign block counts based on in-/out- jumps |
| for (FlowBlock &Block : Func.Blocks) { |
| if (OutWeight[Block.Index] == 0 && InWeight[Block.Index] == 0) { |
| assert(Block.HasUnknownWeight && "unmatched block with a positive count"); |
| continue; |
| } |
| Block.HasUnknownWeight = false; |
| Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]); |
| } |
| |
| return MatchedBlocks[&YamlBF].size(); |
| } |
| |
| /// The function finds all blocks that are (i) reachable from the Entry block |
| /// and (ii) do not have a path to an exit, and marks all such blocks 'cold' |
| /// so that profi does not send any flow to such blocks. |
| void preprocessUnreachableBlocks(FlowFunction &Func) { |
| const uint64_t NumBlocks = Func.Blocks.size(); |
| |
| // Start bfs from the source |
| std::queue<uint64_t> Queue; |
| std::vector<bool> VisitedEntry(NumBlocks, false); |
| for (uint64_t I = 0; I < NumBlocks; I++) { |
| FlowBlock &Block = Func.Blocks[I]; |
| if (Block.isEntry()) { |
| Queue.push(I); |
| VisitedEntry[I] = true; |
| break; |
| } |
| } |
| while (!Queue.empty()) { |
| const uint64_t Src = Queue.front(); |
| Queue.pop(); |
| for (FlowJump *Jump : Func.Blocks[Src].SuccJumps) { |
| const uint64_t Dst = Jump->Target; |
| if (!VisitedEntry[Dst]) { |
| Queue.push(Dst); |
| VisitedEntry[Dst] = true; |
| } |
| } |
| } |
| |
| // Start bfs from all sinks |
| std::vector<bool> VisitedExit(NumBlocks, false); |
| for (uint64_t I = 0; I < NumBlocks; I++) { |
| FlowBlock &Block = Func.Blocks[I]; |
| if (Block.isExit() && VisitedEntry[I]) { |
| Queue.push(I); |
| VisitedExit[I] = true; |
| } |
| } |
| while (!Queue.empty()) { |
| const uint64_t Src = Queue.front(); |
| Queue.pop(); |
| for (FlowJump *Jump : Func.Blocks[Src].PredJumps) { |
| const uint64_t Dst = Jump->Source; |
| if (!VisitedExit[Dst]) { |
| Queue.push(Dst); |
| VisitedExit[Dst] = true; |
| } |
| } |
| } |
| |
| // Make all blocks of zero weight so that flow is not sent |
| for (uint64_t I = 0; I < NumBlocks; I++) { |
| FlowBlock &Block = Func.Blocks[I]; |
| if (Block.Weight == 0) |
| continue; |
| if (!VisitedEntry[I] || !VisitedExit[I]) { |
| Block.Weight = 0; |
| Block.HasUnknownWeight = true; |
| Block.IsUnlikely = true; |
| for (FlowJump *Jump : Block.SuccJumps) { |
| if (Jump->Source == Block.Index && Jump->Target == Block.Index) { |
| Jump->Weight = 0; |
| Jump->HasUnknownWeight = true; |
| Jump->IsUnlikely = true; |
| } |
| } |
| } |
| } |
| } |
| |
| /// Decide if stale profile matching can be applied for a given function. |
| /// Currently we skip inference for (very) large instances and for instances |
| /// having "unexpected" control flow (e.g., having no sink basic blocks). |
| bool canApplyInference(const FlowFunction &Func, |
| const yaml::bolt::BinaryFunctionProfile &YamlBF, |
| const uint64_t &MatchedBlocks) { |
| if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize) |
| return false; |
| |
| if (MatchedBlocks * 100 < |
| opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size()) |
| return false; |
| |
| // Returns false if the artificial sink block has no predecessors meaning |
| // there are no exit blocks. |
| if (Func.Blocks[Func.Blocks.size() - 1].isEntry()) |
| return false; |
| |
| return true; |
| } |
| |
| /// Apply the profile inference algorithm for a given flow function. |
| void applyInference(FlowFunction &Func) { |
| ProfiParams Params; |
| // Set the params from the command-line flags. |
| Params.EvenFlowDistribution = opts::StaleMatchingEvenFlowDistribution; |
| Params.RebalanceUnknown = opts::StaleMatchingRebalanceUnknown; |
| Params.JoinIslands = opts::StaleMatchingJoinIslands; |
| |
| Params.CostBlockInc = opts::StaleMatchingCostBlockInc; |
| Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc; |
| Params.CostBlockDec = opts::StaleMatchingCostBlockDec; |
| Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec; |
| Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc; |
| |
| Params.CostJumpInc = opts::StaleMatchingCostJumpInc; |
| Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc; |
| Params.CostJumpDec = opts::StaleMatchingCostJumpDec; |
| Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec; |
| Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc; |
| Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc; |
| |
| applyFlowInference(Params, Func); |
| } |
| |
| /// Collect inferred counts from the flow function and update annotations in |
| /// the binary function. |
| void assignProfile(BinaryFunction &BF, |
| const BinaryFunction::BasicBlockOrderType &BlockOrder, |
| FlowFunction &Func) { |
| BinaryContext &BC = BF.getBinaryContext(); |
| |
| assert(Func.Blocks.size() == BlockOrder.size() + 2); |
| for (uint64_t I = 0; I < BlockOrder.size(); I++) { |
| FlowBlock &Block = Func.Blocks[I + 1]; |
| BinaryBasicBlock *BB = BlockOrder[I]; |
| |
| // Update block's count |
| BB->setExecutionCount(Block.Flow); |
| |
| // Update jump counts: (i) clean existing counts and then (ii) set new ones |
| auto BI = BB->branch_info_begin(); |
| for (const BinaryBasicBlock *DstBB : BB->successors()) { |
| (void)DstBB; |
| BI->Count = 0; |
| BI->MispredictedCount = 0; |
| ++BI; |
| } |
| for (FlowJump *Jump : Block.SuccJumps) { |
| if (Jump->IsUnlikely) |
| continue; |
| if (Jump->Flow == 0) |
| continue; |
| |
| // Skips the artificial sink block. |
| if (Jump->Target == Func.Blocks.size() - 1) |
| continue; |
| BinaryBasicBlock &SuccBB = *BlockOrder[Jump->Target - 1]; |
| // Check if the edge corresponds to a regular jump or a landing pad |
| if (BB->getSuccessor(SuccBB.getLabel())) { |
| BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(SuccBB); |
| BI.Count += Jump->Flow; |
| } else { |
| BinaryBasicBlock *LP = BB->getLandingPad(SuccBB.getLabel()); |
| if (LP && LP->getKnownExecutionCount() < Jump->Flow) |
| LP->setExecutionCount(Jump->Flow); |
| } |
| } |
| |
| // Update call-site annotations |
| auto setOrUpdateAnnotation = [&](MCInst &Instr, StringRef Name, |
| uint64_t Count) { |
| if (BC.MIB->hasAnnotation(Instr, Name)) |
| BC.MIB->removeAnnotation(Instr, Name); |
| // Do not add zero-count annotations |
| if (Count == 0) |
| return; |
| BC.MIB->addAnnotation(Instr, Name, Count); |
| }; |
| |
| for (MCInst &Instr : *BB) { |
| // Ignore pseudo instructions |
| if (BC.MIB->isPseudo(Instr)) |
| continue; |
| // Ignore jump tables |
| const MCInst *LastInstr = BB->getLastNonPseudoInstr(); |
| if (BC.MIB->getJumpTable(*LastInstr) && LastInstr == &Instr) |
| continue; |
| |
| if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) { |
| auto &ICSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>( |
| Instr, "CallProfile"); |
| if (!ICSP.empty()) { |
| // Try to evenly distribute the counts among the call sites |
| const uint64_t TotalCount = Block.Flow; |
| const uint64_t NumSites = ICSP.size(); |
| for (uint64_t Idx = 0; Idx < ICSP.size(); Idx++) { |
| IndirectCallProfile &CSP = ICSP[Idx]; |
| uint64_t CountPerSite = TotalCount / NumSites; |
| // When counts cannot be exactly distributed, increase by 1 the |
| // counts of the first (TotalCount % NumSites) call sites |
| if (Idx < TotalCount % NumSites) |
| CountPerSite++; |
| CSP.Count = CountPerSite; |
| } |
| } else { |
| ICSP.emplace_back(nullptr, Block.Flow, 0); |
| } |
| } else if (BC.MIB->getConditionalTailCall(Instr)) { |
| // We don't know exactly the number of times the conditional tail call |
| // is executed; conservatively, setting it to the count of the block |
| setOrUpdateAnnotation(Instr, "CTCTakenCount", Block.Flow); |
| BC.MIB->removeAnnotation(Instr, "CTCMispredCount"); |
| } else if (BC.MIB->isCall(Instr)) { |
| setOrUpdateAnnotation(Instr, "Count", Block.Flow); |
| } |
| } |
| } |
| |
| // Update function's execution count and mark the function inferred. |
| BF.setExecutionCount(Func.Blocks[0].Flow); |
| BF.setHasInferredProfile(true); |
| } |
| |
| bool YAMLProfileReader::inferStaleProfile( |
| BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF, |
| const ArrayRef<ProbeMatchSpec> ProbeMatchSpecs) { |
| |
| NamedRegionTimer T("inferStaleProfile", "stale profile inference", "rewrite", |
| "Rewrite passes", opts::TimeRewrite); |
| |
| if (!BF.hasCFG()) |
| return false; |
| |
| LLVM_DEBUG(dbgs() << "BOLT-INFO: applying profile inference for " |
| << "\"" << BF.getPrintName() << "\"\n"); |
| |
| // Make sure that block hashes are up to date. |
| BF.computeBlockHashes(YamlBP.Header.HashFunction); |
| |
| const BinaryFunction::BasicBlockOrderType BlockOrder( |
| BF.getLayout().block_begin(), BF.getLayout().block_end()); |
| |
| // Tracks the number of matched blocks. |
| |
| // Create a wrapper flow function to use with the profile inference algorithm. |
| FlowFunction Func = createFlowFunction(BlockOrder); |
| |
| // Match as many block/jump counts from the stale profile as possible |
| size_t MatchedBlocks = |
| matchWeights(BF.getBinaryContext(), BlockOrder, YamlBF, Func, |
| YamlBP.Header.HashFunction, IdToYamLBF, BF, ProbeMatchSpecs); |
| |
| // Adjust the flow function by marking unreachable blocks Unlikely so that |
| // they don't get any counts assigned. |
| preprocessUnreachableBlocks(Func); |
| |
| // Check if profile inference can be applied for the instance. |
| if (!canApplyInference(Func, YamlBF, MatchedBlocks)) |
| return false; |
| |
| // Apply the profile inference algorithm. |
| applyInference(Func); |
| |
| // Collect inferred counts and update function annotations. |
| assignProfile(BF, BlockOrder, Func); |
| |
| // As of now, we always mark the binary function having "correct" profile. |
| // In the future, we may discard the results for instances with poor inference |
| // metrics and keep such functions un-optimized. |
| return true; |
| } |
| |
| } // end namespace bolt |
| } // end namespace llvm |