bin/zxdb/client/disassembler.cc - garnet - Git at Google

 // Copyright 2018 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "garnet/bin/zxdb/client/disassembler.h"

 #include <inttypes.h>
 #include <limits>

 #include "garnet/bin/zxdb/client/arch_info.h"
 #include "garnet/bin/zxdb/client/memory_dump.h"
 #include "garnet/lib/debug_ipc/records.h"
 #include "garnet/public/lib/fxl/strings/string_printf.h"
 #include "garnet/public/lib/fxl/strings/trim.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/Support/TargetRegistry.h"

 namespace zxdb {

 namespace {

 // In-place replaces instances of ANY of the characters in "search_for" with the
 // given replacement in the given string.
 void ReplaceAllInstancesOf(const char* search_for, char replace_with,
                            std::string* str) {
   size_t found_pos = 0;
   while ((found_pos = str->find_first_of(search_for, found_pos)) !=
          std::string::npos) {
     (*str)[found_pos] = replace_with;
   }
 }

 void GetInvalidInstructionStrs(const uint8_t* data, size_t len,
                                std::string* instruction, std::string* params,
                                std::string* comment) {
   *instruction = ".byte";
   params->clear();
   for (size_t i = 0; i < len; i++) {
     if (i > 0)
       params->push_back(' ');
     params->append(fxl::StringPrintf("0x%2.2x", data[i]));
   }
   *comment = "Invalid instruction.";
 }

 // LLVM generates a instructions like "\tmov\ta,b". Given a string like this
 // with two tabs in the instruction input, separates the parameters ("a,b")
 // off into the given params string, and strips tabs leaving only the
 // instruction ("mov") in the input string.
 void SplitInstruction(std::string* instruction, std::string* params) {
   params->clear();

   size_t first_char = instruction->find_first_not_of("\t");
   if (first_char == std::string::npos)
     return;  // Leave instruction unchanged if there are no tabs.

   // Trim leading tabs.
   instruction->erase(instruction->begin(), instruction->begin() + first_char);

   // Extract the params.
   size_t param_separator = instruction->find('\t');
   if (param_separator == std::string::npos)
     return;  // Leave params empty.

   // Trim off the params.
   *params = instruction->substr(param_separator + 1);
   instruction->erase(instruction->begin() + param_separator,
                      instruction->end());
 }

 }  // namespace

 Disassembler::Row::Row() = default;

 Disassembler::Row::Row(uint64_t address, const uint8_t* bytes, size_t bytes_len,
                        std::string op, std::string params, std::string comment)
     : address(address),
       bytes(bytes, bytes + bytes_len),
       op(op),
       params(params),
       comment(comment) {}
 Disassembler::Row::~Row() = default;

 bool Disassembler::Row::operator==(const Row& other) const {
   return address == other.address && bytes == other.bytes && op == other.op &&
          params == other.params && comment == other.comment;
 }

 Disassembler::Disassembler() = default;
 Disassembler::~Disassembler() = default;

 Err Disassembler::Init(const ArchInfo* arch) {
   arch_ = arch;

   context_ = std::make_unique<llvm::MCContext>(arch_->asm_info(),
                                                arch_->register_info(), nullptr);
   disasm_.reset(arch_->target()->createMCDisassembler(*arch_->subtarget_info(),
                                                       *context_));
   if (!disasm_)
     return Err("Couldn't create LLVM disassembler.");

   constexpr int kAssemblyFlavor = 1;  // 1 means "Intel" (not AT&T).
   printer_.reset(arch_->target()->createMCInstPrinter(
       *arch_->triple(), kAssemblyFlavor, *arch_->asm_info(),
       *arch_->instr_info(), *arch_->register_info()));
   printer_->setPrintHexStyle(llvm::HexStyle::C);  // ::C = 0xff-style.
   printer_->setPrintImmHex(true);
   printer_->setUseMarkup(true);

   return Err();
 }

 size_t Disassembler::DisassembleOne(const uint8_t* data, size_t data_len,
                                     uint64_t address, const Options& options,
                                     Row* out) const {
   out->address = address;

   // Decode.
   llvm::MCInst inst;
   uint64_t consumed = 0;
   auto status = disasm_->getInstruction(inst, consumed,
                                         llvm::ArrayRef<uint8_t>(data, data_len),
                                         address, llvm::nulls(), llvm::nulls());
   if (status == llvm::MCDisassembler::Success) {
     // Print the instruction. Note that LLVM appends to the strings so we need
     // to make sure they're empty before using.
     out->op.clear();
     out->comment.clear();
     llvm::raw_string_ostream inst_stream(out->op);
     llvm::raw_string_ostream comment_stream(out->comment);

     printer_->setCommentStream(comment_stream);
     printer_->printInst(&inst, inst_stream, llvm::StringRef(),
                         *arch_->subtarget_info());
     printer_->setCommentStream(llvm::nulls());

     inst_stream.flush();
     comment_stream.flush();

     SplitInstruction(&out->op, &out->params);
   } else {
     // Failure decoding.
     if (!options.emit_undecodable)
       return 0;
     consumed = std::min(data_len, arch_->instr_align());
     GetInvalidInstructionStrs(data, consumed, &out->op, &out->params,
                               &out->comment);
   }

   // Comments.
   if (!out->comment.empty()) {
     // Canonicalize the comments, they'll end in a newline (which is added
     // manually later) and may contain embedded newlines.
     out->comment = fxl::TrimString(out->comment, "\r\n ").ToString();
     ReplaceAllInstancesOf("\r\n", ' ', &out->comment);

     out->comment =
         arch_->asm_info()->getCommentString().str() + " " + out->comment;
   }

   out->bytes = std::vector<uint8_t>(data, data + consumed);
   return consumed;
 }

 size_t Disassembler::DisassembleMany(const uint8_t* data, size_t data_len,
                                      uint64_t start_address,
                                      const Options& in_options,
                                      size_t max_instructions,
                                      std::vector<Row>* out) const {
   if (max_instructions == 0)
     max_instructions = std::numeric_limits<size_t>::max();

   // Force emit_undecodable to true or we can never advance past undecodable
   // instructions.
   Options options = in_options;
   options.emit_undecodable = true;

   size_t byte_offset = 0;
   while (byte_offset < data_len && out->size() < max_instructions) {
     out->emplace_back();
     size_t bytes_read =
         DisassembleOne(&data[byte_offset], data_len - byte_offset,
                        start_address + byte_offset, options, &out->back());
     FXL_DCHECK(bytes_read > 0);
     byte_offset += bytes_read;
   }

   return byte_offset;
 }

 size_t Disassembler::DisassembleDump(const MemoryDump& dump,
                                      uint64_t start_address,
                                      const Options& options,
                                      size_t max_instructions,
                                      std::vector<Row>* out) const {
   if (max_instructions == 0)
     max_instructions = std::numeric_limits<size_t>::max();

   uint64_t cur_address = start_address;
   for (size_t block_i = 0; block_i < dump.blocks().size(); block_i++) {
     const debug_ipc::MemoryBlock& block = dump.blocks()[block_i];

     uint64_t block_end = block.address + block.size;
     if (cur_address >= block_end)
       continue;  // Not in this block.

     if (!block.valid) {
       // Invalid region.
       std::string comment =
           arch_->asm_info()->getCommentString().str() + " Invalid memory @ ";

       if (block_i == dump.blocks().size() - 1) {
         // If the last block, just show the starting address because the size
         // will normally be irrelevant (say disassembling at the current IP
         // which might be invalid -- the user doesn't care how big the
         // invalid memory region is, or how much was requested).
         comment += fxl::StringPrintf("0x%" PRIx64, block.address);
       } else {
         // Invalid range.
         comment +=
             fxl::StringPrintf("0x%" PRIx64 " - 0x%" PRIx64, block.address,
                               block.address + block.size - 1);
       }

       // Append the row.
       out->emplace_back();
       Row& row = out->back();
       row.address = block.address;
       row.op = "??";
       row.comment = std::move(comment);

       cur_address = block_end;
       continue;
     }

     uint64_t block_offset = cur_address - block.address;
     if (block_offset < block.data.size()) {
       // Valid region, print instructions to the end of the block.
       size_t block_bytes_consumed = DisassembleMany(
           &block.data[block_offset], block.data.size(),
           block.address + block_offset, options, max_instructions, out);
       if (out->size() >= max_instructions) {
         // Return the number of bytes from the beginning of the memory dump
         // that were consumed.
         return static_cast<size_t>(block.address + block_bytes_consumed -
                                    dump.blocks()[0].address);
       }
     }
     cur_address = block_end;
   }

   // All bytes of the memory dump were consumed.
   return static_cast<size_t>(dump.size());
 }

 }  // namespace zxdb
	// Copyright 2018 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "garnet/bin/zxdb/client/disassembler.h"

	#include <inttypes.h>
	#include <limits>

	#include "garnet/bin/zxdb/client/arch_info.h"
	#include "garnet/bin/zxdb/client/memory_dump.h"
	#include "garnet/lib/debug_ipc/records.h"
	#include "garnet/public/lib/fxl/strings/string_printf.h"
	#include "garnet/public/lib/fxl/strings/trim.h"
	#include "llvm/MC/MCAsmInfo.h"
	#include "llvm/MC/MCContext.h"
	#include "llvm/MC/MCDisassembler/MCDisassembler.h"
	#include "llvm/MC/MCInstPrinter.h"
	#include "llvm/Support/TargetRegistry.h"

	namespace zxdb {

	namespace {

	// In-place replaces instances of ANY of the characters in "search_for" with the
	// given replacement in the given string.
	void ReplaceAllInstancesOf(const char* search_for, char replace_with,
	std::string* str) {
	size_t found_pos = 0;
	while ((found_pos = str->find_first_of(search_for, found_pos)) !=
	std::string::npos) {
	(*str)[found_pos] = replace_with;
	}
	}

	void GetInvalidInstructionStrs(const uint8_t* data, size_t len,
	std::string* instruction, std::string* params,
	std::string* comment) {
	*instruction = ".byte";
	params->clear();
	for (size_t i = 0; i < len; i++) {
	if (i > 0)
	params->push_back(' ');
	params->append(fxl::StringPrintf("0x%2.2x", data[i]));
	}
	*comment = "Invalid instruction.";
	}

	// LLVM generates a instructions like "\tmov\ta,b". Given a string like this
	// with two tabs in the instruction input, separates the parameters ("a,b")
	// off into the given params string, and strips tabs leaving only the
	// instruction ("mov") in the input string.
	void SplitInstruction(std::string* instruction, std::string* params) {
	params->clear();

	size_t first_char = instruction->find_first_not_of("\t");
	if (first_char == std::string::npos)
	return; // Leave instruction unchanged if there are no tabs.

	// Trim leading tabs.
	instruction->erase(instruction->begin(), instruction->begin() + first_char);

	// Extract the params.
	size_t param_separator = instruction->find('\t');
	if (param_separator == std::string::npos)
	return; // Leave params empty.

	// Trim off the params.
	*params = instruction->substr(param_separator + 1);
	instruction->erase(instruction->begin() + param_separator,
	instruction->end());
	}

	} // namespace

	Disassembler::Row::Row() = default;

	Disassembler::Row::Row(uint64_t address, const uint8_t* bytes, size_t bytes_len,
	std::string op, std::string params, std::string comment)
	: address(address),
	bytes(bytes, bytes + bytes_len),
	op(op),
	params(params),
	comment(comment) {}
	Disassembler::Row::~Row() = default;

	bool Disassembler::Row::operator==(const Row& other) const {
	return address == other.address && bytes == other.bytes && op == other.op &&
	params == other.params && comment == other.comment;
	}

	Disassembler::Disassembler() = default;
	Disassembler::~Disassembler() = default;

	Err Disassembler::Init(const ArchInfo* arch) {
	arch_ = arch;

	context_ = std::make_unique<llvm::MCContext>(arch_->asm_info(),
	arch_->register_info(), nullptr);
	disasm_.reset(arch_->target()->createMCDisassembler(*arch_->subtarget_info(),
	*context_));
	if (!disasm_)
	return Err("Couldn't create LLVM disassembler.");

	constexpr int kAssemblyFlavor = 1; // 1 means "Intel" (not AT&T).
	printer_.reset(arch_->target()->createMCInstPrinter(
	arch_->triple(), kAssemblyFlavor, arch_->asm_info(),
	arch_->instr_info(), arch_->register_info()));
	printer_->setPrintHexStyle(llvm::HexStyle::C); // ::C = 0xff-style.
	printer_->setPrintImmHex(true);
	printer_->setUseMarkup(true);

	return Err();
	}

	size_t Disassembler::DisassembleOne(const uint8_t* data, size_t data_len,
	uint64_t address, const Options& options,
	Row* out) const {
	out->address = address;

	// Decode.
	llvm::MCInst inst;
	uint64_t consumed = 0;
	auto status = disasm_->getInstruction(inst, consumed,
	llvm::ArrayRef<uint8_t>(data, data_len),
	address, llvm::nulls(), llvm::nulls());
	if (status == llvm::MCDisassembler::Success) {
	// Print the instruction. Note that LLVM appends to the strings so we need
	// to make sure they're empty before using.
	out->op.clear();
	out->comment.clear();
	llvm::raw_string_ostream inst_stream(out->op);
	llvm::raw_string_ostream comment_stream(out->comment);

	printer_->setCommentStream(comment_stream);
	printer_->printInst(&inst, inst_stream, llvm::StringRef(),
	*arch_->subtarget_info());
	printer_->setCommentStream(llvm::nulls());

	inst_stream.flush();
	comment_stream.flush();

	SplitInstruction(&out->op, &out->params);
	} else {
	// Failure decoding.
	if (!options.emit_undecodable)
	return 0;
	consumed = std::min(data_len, arch_->instr_align());
	GetInvalidInstructionStrs(data, consumed, &out->op, &out->params,
	&out->comment);
	}

	// Comments.
	if (!out->comment.empty()) {
	// Canonicalize the comments, they'll end in a newline (which is added
	// manually later) and may contain embedded newlines.
	out->comment = fxl::TrimString(out->comment, "\r\n ").ToString();
	ReplaceAllInstancesOf("\r\n", ' ', &out->comment);

	out->comment =
	arch_->asm_info()->getCommentString().str() + " " + out->comment;
	}

	out->bytes = std::vector<uint8_t>(data, data + consumed);
	return consumed;
	}

	size_t Disassembler::DisassembleMany(const uint8_t* data, size_t data_len,
	uint64_t start_address,
	const Options& in_options,
	size_t max_instructions,
	std::vector<Row>* out) const {
	if (max_instructions == 0)
	max_instructions = std::numeric_limits<size_t>::max();

	// Force emit_undecodable to true or we can never advance past undecodable
	// instructions.
	Options options = in_options;
	options.emit_undecodable = true;

	size_t byte_offset = 0;
	while (byte_offset < data_len && out->size() < max_instructions) {
	out->emplace_back();
	size_t bytes_read =
	DisassembleOne(&data[byte_offset], data_len - byte_offset,
	start_address + byte_offset, options, &out->back());
	FXL_DCHECK(bytes_read > 0);
	byte_offset += bytes_read;
	}

	return byte_offset;
	}

	size_t Disassembler::DisassembleDump(const MemoryDump& dump,
	uint64_t start_address,
	const Options& options,
	size_t max_instructions,
	std::vector<Row>* out) const {
	if (max_instructions == 0)
	max_instructions = std::numeric_limits<size_t>::max();

	uint64_t cur_address = start_address;
	for (size_t block_i = 0; block_i < dump.blocks().size(); block_i++) {
	const debug_ipc::MemoryBlock& block = dump.blocks()[block_i];

	uint64_t block_end = block.address + block.size;
	if (cur_address >= block_end)
	continue; // Not in this block.

	if (!block.valid) {
	// Invalid region.
	std::string comment =
	arch_->asm_info()->getCommentString().str() + " Invalid memory @ ";

	if (block_i == dump.blocks().size() - 1) {
	// If the last block, just show the starting address because the size
	// will normally be irrelevant (say disassembling at the current IP
	// which might be invalid -- the user doesn't care how big the
	// invalid memory region is, or how much was requested).
	comment += fxl::StringPrintf("0x%" PRIx64, block.address);
	} else {
	// Invalid range.
	comment +=
	fxl::StringPrintf("0x%" PRIx64 " - 0x%" PRIx64, block.address,
	block.address + block.size - 1);
	}

	// Append the row.
	out->emplace_back();
	Row& row = out->back();
	row.address = block.address;
	row.op = "??";
	row.comment = std::move(comment);

	cur_address = block_end;
	continue;
	}

	uint64_t block_offset = cur_address - block.address;
	if (block_offset < block.data.size()) {
	// Valid region, print instructions to the end of the block.
	size_t block_bytes_consumed = DisassembleMany(
	&block.data[block_offset], block.data.size(),
	block.address + block_offset, options, max_instructions, out);
	if (out->size() >= max_instructions) {
	// Return the number of bytes from the beginning of the memory dump
	// that were consumed.
	return static_cast<size_t>(block.address + block_bytes_consumed -
	dump.blocks()[0].address);
	}
	}
	cur_address = block_end;
	}

	// All bytes of the memory dump were consumed.
	return static_cast<size_t>(dump.size());
	}

	} // namespace zxdb