blob: c4c67bd68198b6681b4dc95b7d3f0cb43caae5df [file] [log] [blame]
//===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Computation of hash values over BinaryFunction and BinaryBasicBlock.
//
//===----------------------------------------------------------------------===//
#include "bolt/Core/HashUtilities.h"
#include "bolt/Core/BinaryContext.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
namespace bolt {
std::string hashInteger(uint64_t Value) {
std::string HashString;
if (Value == 0)
HashString.push_back(0);
while (Value) {
uint8_t LSB = Value & 0xff;
HashString.push_back(LSB);
Value >>= 8;
}
return HashString;
}
std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
std::string HashString;
// Ignore function references.
if (BC.getFunctionForSymbol(&Symbol))
return HashString;
llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
if (!ErrorOrValue)
return HashString;
// Ignore jump table references.
if (BC.getJumpTableContainingAddress(*ErrorOrValue))
return HashString;
return HashString.append(hashInteger(*ErrorOrValue));
}
std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
switch (Expr.getKind()) {
case MCExpr::Constant:
return hashInteger(cast<MCConstantExpr>(Expr).getValue());
case MCExpr::SymbolRef:
return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
case MCExpr::Unary: {
const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
return hashInteger(UnaryExpr.getOpcode())
.append(hashExpr(BC, *UnaryExpr.getSubExpr()));
}
case MCExpr::Binary: {
const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
return hashExpr(BC, *BinaryExpr.getLHS())
.append(hashInteger(BinaryExpr.getOpcode()))
.append(hashExpr(BC, *BinaryExpr.getRHS()));
}
case MCExpr::Target:
return std::string();
}
llvm_unreachable("invalid expression kind");
}
std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
if (Operand.isImm())
return hashInteger(Operand.getImm());
if (Operand.isReg())
return hashInteger(Operand.getReg());
if (Operand.isExpr())
return hashExpr(BC, *Operand.getExpr());
return std::string();
}
std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
OperandHashFuncTy OperandHashFunc) {
const bool IsX86 = BC.isX86();
// The hash is computed by creating a string of all instruction opcodes and
// possibly their operands and then hashing that string with std::hash.
std::string HashString;
for (const MCInst &Inst : BB) {
if (BC.MIB->isPseudo(Inst))
continue;
unsigned Opcode = Inst.getOpcode();
// Ignore unconditional jumps since we check CFG consistency by processing
// basic blocks in order and do not rely on branches to be in-sync with
// CFG. Note that we still use condition code of conditional jumps.
if (BC.MIB->isUnconditionalBranch(Inst))
continue;
if (IsX86 && BC.MIB->isConditionalBranch(Inst))
Opcode = BC.MIB->getShortBranchOpcode(Opcode);
if (Opcode == 0) {
HashString.push_back(0);
} else {
StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode);
HashString.append(OpcodeName.str());
}
for (const MCOperand &Op : MCPlus::primeOperands(Inst))
HashString.append(OperandHashFunc(Op));
}
return HashString;
}
/// A "loose" hash of a basic block to use with the stale profile matching. The
/// computed value will be the same for blocks with minor changes (such as
/// reordering of instructions or using different operands) but may result in
/// collisions that need to be resolved by a stronger hashing.
std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
// The hash is computed by creating a string of all lexicographically ordered
// instruction opcodes, which is then hashed with std::hash.
std::set<std::string> Opcodes;
for (const MCInst &Inst : BB) {
// Skip pseudo instructions and nops.
if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst))
continue;
// Ignore unconditional jumps, as they can be added / removed as a result
// of basic block reordering.
if (BC.MIB->isUnconditionalBranch(Inst))
continue;
// Do not distinguish different types of conditional jumps.
if (BC.MIB->isConditionalBranch(Inst)) {
Opcodes.insert("JMP");
continue;
}
std::string Mnemonic = BC.InstPrinter->getMnemonic(&Inst).first;
llvm::erase_if(Mnemonic, [](unsigned char ch) { return std::isspace(ch); });
Opcodes.insert(Mnemonic);
}
std::string HashString;
for (const std::string &Opcode : Opcodes)
HashString.append(Opcode);
return HashString;
}
} // namespace bolt
} // namespace llvm