| //===- bolt/Target/X86/X86MCSymbolizer.cpp --------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "X86MCSymbolizer.h" |
| #include "MCTargetDesc/X86BaseInfo.h" |
| #include "bolt/Core/BinaryContext.h" |
| #include "bolt/Core/BinaryFunction.h" |
| #include "bolt/Core/MCPlusBuilder.h" |
| #include "bolt/Core/Relocation.h" |
| #include "llvm/MC/MCInst.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| |
| #define DEBUG_TYPE "bolt-symbolizer" |
| |
| namespace llvm { |
| namespace bolt { |
| |
| X86MCSymbolizer::~X86MCSymbolizer() {} |
| |
| bool X86MCSymbolizer::tryAddingSymbolicOperand( |
| MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress, |
| bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) { |
| if (IsBranch) |
| return false; |
| |
| // Ignore implicit operands. |
| if (ImmSize == 0) |
| return false; |
| |
| BinaryContext &BC = Function.getBinaryContext(); |
| MCContext *Ctx = BC.Ctx.get(); |
| |
| if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) |
| return false; |
| |
| /// Add symbolic operand to the instruction with an optional addend. |
| auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend) { |
| const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx); |
| if (Addend) |
| Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx), |
| *Ctx); |
| Inst.addOperand(MCOperand::createExpr(Expr)); |
| }; |
| |
| // Check if the operand being added is a displacement part of a compound |
| // memory operand that uses PC-relative addressing. If it is, try to symbolize |
| // it without relocations. Return true on success, false otherwise. |
| auto processPCRelOperandNoRel = [&]() { |
| const int MemOp = BC.MIB->getMemoryOperandNo(Inst); |
| if (MemOp == -1) |
| return false; |
| |
| const unsigned DispOp = MemOp + X86::AddrDisp; |
| if (Inst.getNumOperands() != DispOp) |
| return false; |
| |
| const MCOperand &Base = Inst.getOperand(MemOp + X86::AddrBaseReg); |
| if (Base.getReg() != BC.MRI->getProgramCounter()) |
| return false; |
| |
| const MCOperand &Scale = Inst.getOperand(MemOp + X86::AddrScaleAmt); |
| const MCOperand &Index = Inst.getOperand(MemOp + X86::AddrIndexReg); |
| if (Scale.getImm() != 0 && Index.getReg() != MCRegister::NoRegister) |
| return false; |
| |
| const MCSymbol *TargetSymbol; |
| uint64_t TargetOffset; |
| |
| if (!CreateNewSymbols) { |
| if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) { |
| TargetSymbol = BD->getSymbol(); |
| TargetOffset = Value - BD->getAddress(); |
| } else { |
| return false; |
| } |
| } else { |
| std::tie(TargetSymbol, TargetOffset) = |
| BC.handleAddressRef(Value, Function, /*IsPCRel=*/true); |
| } |
| |
| addOperand(TargetSymbol, TargetOffset); |
| |
| return true; |
| }; |
| |
| // Check for GOTPCRELX relocations first. Because these relocations allow the |
| // linker to modify the instruction, we have to check the offset range |
| // corresponding to the instruction, not the offset of the operand. |
| // Note that if there is GOTPCRELX relocation against the instruction, there |
| // will be no other relocation in this range, since GOTPCRELX applies only to |
| // certain instruction types. |
| const uint64_t InstOffset = InstAddress - Function.getAddress(); |
| const Relocation *Relocation = |
| Function.getRelocationInRange(InstOffset, InstOffset + InstSize); |
| if (Relocation && Relocation::isX86GOTPCRELX(Relocation->Type)) { |
| // If the operand is PC-relative, convert it without using the relocation |
| // information. For GOTPCRELX, it is safe to use the absolute address |
| // instead of extracting the addend from the relocation, as non-standard |
| // forms will be rejected by linker conversion process and the operand |
| // will always reference GOT which we don't rewrite. |
| if (processPCRelOperandNoRel()) |
| return true; |
| |
| // The linker converted the PC-relative address to an absolute one. |
| // Symbolize this address. |
| if (CreateNewSymbols) |
| BC.handleAddressRef(Value, Function, /*IsPCRel=*/false); |
| |
| const BinaryData *Target = BC.getBinaryDataAtAddress(Value); |
| if (!Target) { |
| assert(!CreateNewSymbols && |
| "BinaryData should exist at converted GOTPCRELX destination"); |
| return false; |
| } |
| |
| addOperand(Target->getSymbol(), /*Addend=*/0); |
| |
| return true; |
| } |
| |
| // Check for relocations against the operand. |
| if (!Relocation || Relocation->Offset != InstOffset + ImmOffset) |
| Relocation = Function.getRelocationAt(InstOffset + ImmOffset); |
| |
| if (!Relocation) |
| return processPCRelOperandNoRel(); |
| |
| // GOTPC64 is special because the X86 Assembler doesn't know how to emit |
| // a PC-relative 8-byte fixup, which is what we need to cover this. The |
| // only way to do this is to use the symbol name _GLOBAL_OFFSET_TABLE_. |
| if (Relocation::isX86GOTPC64(Relocation->Type)) { |
| auto PairOrErr = handleGOTPC64(*Relocation, InstAddress); |
| if (auto E = PairOrErr.takeError()) { |
| Function.setSimple(false); |
| BC.logBOLTErrorsAndQuitOnFatal(std::move(E)); |
| return false; |
| } |
| auto [Sym, Addend] = *PairOrErr; |
| addOperand(Sym, Addend); |
| return true; |
| } |
| |
| uint64_t SymbolValue = Relocation->Value - Relocation->Addend; |
| if (Relocation->isPCRelative()) |
| SymbolValue += InstAddress + ImmOffset; |
| |
| // Process reference to the symbol. |
| if (CreateNewSymbols) |
| BC.handleAddressRef(SymbolValue, Function, Relocation->isPCRelative()); |
| |
| uint64_t Addend = Relocation->Addend; |
| // Real addend for pc-relative targets is adjusted with a delta from |
| // the relocation placement to the next instruction. |
| if (Relocation->isPCRelative()) |
| Addend += InstOffset + InstSize - Relocation->Offset; |
| |
| addOperand(Relocation->Symbol, Addend); |
| |
| return true; |
| } |
| |
| Expected<std::pair<MCSymbol *, uint64_t>> |
| X86MCSymbolizer::handleGOTPC64(const Relocation &R, uint64_t InstrAddr) { |
| BinaryContext &BC = Function.getBinaryContext(); |
| const BinaryData *GOTSymBD = BC.getGOTSymbol(); |
| if (!GOTSymBD || !GOTSymBD->getAddress()) { |
| // This error is pretty serious but we can't kill the disassembler |
| // because of it, so don't make it fatal. Log it and warn the user. |
| return createNonFatalBOLTError( |
| "R_X86_GOTPC64 relocation is present but we did not detect " |
| "a valid _GLOBAL_OFFSET_TABLE_ in symbol table\n"); |
| } |
| // R_X86_GOTPC64 are not relative to the Reloc nor end of instruction, |
| // but the start of the MOVABSQ instruction. So the Target Address is |
| // whatever is encoded in the original operand when we disassembled |
| // the binary (here, R.Value) plus MOVABSQ address (InstrAddr). |
| // Here we extract the intended Addend by subtracting the real |
| // GOT addr. |
| const int64_t Addend = R.Value + InstrAddr - GOTSymBD->getAddress(); |
| return std::make_pair(BC.Ctx->getOrCreateSymbol("_GLOBAL_OFFSET_TABLE_"), |
| Addend); |
| } |
| |
| void X86MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream, |
| int64_t Value, |
| uint64_t Address) {} |
| |
| } // namespace bolt |
| } // namespace llvm |