blob: cef2fe7aac28f8a6a4677b1dcdb3dd722cfe1ba8 [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "garnet/bin/zxdb/client/disassembler.h"
#include <inttypes.h>
#include <limits>
#include "garnet/bin/zxdb/client/arch_info.h"
#include "garnet/bin/zxdb/client/memory_dump.h"
#include "garnet/lib/debug_ipc/records.h"
#include "garnet/public/lib/fxl/strings/string_printf.h"
#include "garnet/public/lib/fxl/strings/trim.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Support/TargetRegistry.h"
namespace zxdb {
namespace {
// In-place replaces instances of ANY of the characters in "search_for" with the
// given replacement in the given string.
void ReplaceAllInstancesOf(const char* search_for, char replace_with,
std::string* str) {
size_t found_pos = 0;
while ((found_pos = str->find_first_of(search_for, found_pos)) !=
std::string::npos) {
(*str)[found_pos] = replace_with;
void GetInvalidInstructionStrs(const uint8_t* data, size_t len,
std::string* instruction, std::string* params,
std::string* comment) {
*instruction = ".byte";
for (size_t i = 0; i < len; i++) {
if (i > 0)
params->push_back(' ');
params->append(fxl::StringPrintf("0x%2.2x", data[i]));
*comment = "Invalid instruction.";
// LLVM generates a instructions like "\tmov\ta,b". Given a string like this
// with two tabs in the instruction input, separates the parameters ("a,b")
// off into the given params string, and strips tabs leaving only the
// instruction ("mov") in the input string.
void SplitInstruction(std::string* instruction, std::string* params) {
size_t first_char = instruction->find_first_not_of("\t");
if (first_char == std::string::npos)
return; // Leave instruction unchanged if there are no tabs.
// Trim leading tabs.
instruction->erase(instruction->begin(), instruction->begin() + first_char);
// Extract the params.
size_t param_separator = instruction->find('\t');
if (param_separator == std::string::npos)
return; // Leave params empty.
// Trim off the params.
*params = instruction->substr(param_separator + 1);
instruction->erase(instruction->begin() + param_separator,
} // namespace
Disassembler::Row::Row() = default;
Disassembler::Row::Row(uint64_t address, const uint8_t* bytes, size_t bytes_len,
std::string op, std::string params, std::string comment)
: address(address),
bytes(bytes, bytes + bytes_len),
comment(comment) {}
Disassembler::Row::~Row() = default;
bool Disassembler::Row::operator==(const Row& other) const {
return address == other.address && bytes == other.bytes && op == other.op &&
params == other.params && comment == other.comment;
Disassembler::Disassembler() = default;
Disassembler::~Disassembler() = default;
Err Disassembler::Init(const ArchInfo* arch) {
arch_ = arch;
context_ = std::make_unique<llvm::MCContext>(arch_->asm_info(),
arch_->register_info(), nullptr);
if (!disasm_)
return Err("Couldn't create LLVM disassembler.");
constexpr int kAssemblyFlavor = 1; // 1 means "Intel" (not AT&T).
*arch_->triple(), kAssemblyFlavor, *arch_->asm_info(),
*arch_->instr_info(), *arch_->register_info()));
printer_->setPrintHexStyle(llvm::HexStyle::C); // ::C = 0xff-style.
return Err();
size_t Disassembler::DisassembleOne(const uint8_t* data, size_t data_len,
uint64_t address, const Options& options,
Row* out) const {
out->address = address;
// Decode.
llvm::MCInst inst;
uint64_t consumed = 0;
auto status = disasm_->getInstruction(inst, consumed,
llvm::ArrayRef<uint8_t>(data, data_len),
address, llvm::nulls(), llvm::nulls());
if (status == llvm::MCDisassembler::Success) {
// Print the instruction. Note that LLVM appends to the strings so we need
// to make sure they're empty before using.
llvm::raw_string_ostream inst_stream(out->op);
llvm::raw_string_ostream comment_stream(out->comment);
printer_->printInst(&inst, inst_stream, llvm::StringRef(),
SplitInstruction(&out->op, &out->params);
} else {
// Failure decoding.
if (!options.emit_undecodable)
return 0;
consumed = std::min(data_len, arch_->instr_align());
GetInvalidInstructionStrs(data, consumed, &out->op, &out->params,
// Comments.
if (!out->comment.empty()) {
// Canonicalize the comments, they'll end in a newline (which is added
// manually later) and may contain embedded newlines.
out->comment = fxl::TrimString(out->comment, "\r\n ").ToString();
ReplaceAllInstancesOf("\r\n", ' ', &out->comment);
out->comment =
arch_->asm_info()->getCommentString().str() + " " + out->comment;
out->bytes = std::vector<uint8_t>(data, data + consumed);
return consumed;
size_t Disassembler::DisassembleMany(const uint8_t* data, size_t data_len,
uint64_t start_address,
const Options& in_options,
size_t max_instructions,
std::vector<Row>* out) const {
if (max_instructions == 0)
max_instructions = std::numeric_limits<size_t>::max();
// Force emit_undecodable to true or we can never advance past undecodable
// instructions.
Options options = in_options;
options.emit_undecodable = true;
size_t byte_offset = 0;
while (byte_offset < data_len && out->size() < max_instructions) {
size_t bytes_read =
DisassembleOne(&data[byte_offset], data_len - byte_offset,
start_address + byte_offset, options, &out->back());
FXL_DCHECK(bytes_read > 0);
byte_offset += bytes_read;
return byte_offset;
size_t Disassembler::DisassembleDump(const MemoryDump& dump,
uint64_t start_address,
const Options& options,
size_t max_instructions,
std::vector<Row>* out) const {
if (max_instructions == 0)
max_instructions = std::numeric_limits<size_t>::max();
uint64_t cur_address = start_address;
for (size_t block_i = 0; block_i < dump.blocks().size(); block_i++) {
const debug_ipc::MemoryBlock& block = dump.blocks()[block_i];
uint64_t block_end = block.address + block.size;
if (cur_address >= block_end)
continue; // Not in this block.
if (!block.valid) {
// Invalid region.
std::string comment =
arch_->asm_info()->getCommentString().str() + " Invalid memory @ ";
if (block_i == dump.blocks().size() - 1) {
// If the last block, just show the starting address because the size
// will normally be irrelevant (say disassembling at the current IP
// which might be invalid -- the user doesn't care how big the
// invalid memory region is, or how much was requested).
comment += fxl::StringPrintf("0x%" PRIx64, block.address);
} else {
// Invalid range.
comment +=
fxl::StringPrintf("0x%" PRIx64 " - 0x%" PRIx64, block.address,
block.address + block.size - 1);
// Append the row.
Row& row = out->back();
row.address = block.address;
row.op = "??";
row.comment = std::move(comment);
cur_address = block_end;
uint64_t block_offset = cur_address - block.address;
if (block_offset < {
// Valid region, print instructions to the end of the block.
size_t block_bytes_consumed = DisassembleMany(
block.address + block_offset, options, max_instructions, out);
if (out->size() >= max_instructions) {
// Return the number of bytes from the beginning of the memory dump
// that were consumed.
return static_cast<size_t>(block.address + block_bytes_consumed -
cur_address = block_end;
// All bytes of the memory dump were consumed.
return static_cast<size_t>(dump.size());
} // namespace zxdb