blob: ce13fbc1a86d70377eefdf8a6a06c1fabc829646 [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef SRC_DEVELOPER_DEBUG_ZXDB_CLIENT_DISASSEMBLER_H_
#define SRC_DEVELOPER_DEBUG_ZXDB_CLIENT_DISASSEMBLER_H_
#include <inttypes.h>
#include <iosfwd>
#include <memory>
#include <optional>
#include <string>
#include <vector>
#include "src/developer/debug/zxdb/common/err.h"
#include "src/lib/fxl/macros.h"
namespace llvm {
class MCContext;
class MCDisassembler;
class MCInst;
class MCInstPrinter;
} // namespace llvm
namespace zxdb {
class ArchInfo;
class MemoryDump;
// Disassembles a block of data.
class Disassembler {
public:
struct Options {
// Controls the behavior for undecodable instructions. When false, DisassembleOne() will report
// no data consumed and the empty string will be returned. When true, it will emit a "data"
// mnemonic and advance to the next instruction boundary.
//
// DisassembleMany will always should undecodable instructions (otherwise it won't advance).
bool emit_undecodable = true;
};
// Special known instruction classes. These are just the types of instructions we have a need to
// identify, more classes can be added as needed.
enum class InstructionType {
kCallDirect, // Call to a hardcoded address.
kCallIndirect, // Call to a register value.
kOther,
};
// One disassembled instruction.
struct Row {
Row();
Row(uint64_t address, const uint8_t* bytes, size_t bytes_len, std::string op,
std::string params, std::string comment, InstructionType type = InstructionType::kOther,
std::optional<uint64_t> call_dest = std::nullopt);
~Row();
uint64_t address = 0;
std::vector<uint8_t> bytes;
std::string op;
std::string params;
std::string comment;
InstructionType type = InstructionType::kOther;
// If this instruction is a direct call instruction, this will contain the address of the
// destination of the call.
//
// This is currently filled in for the most common cases but is not complete. It could be
// expanded to handle more call variants, and we could also add a tag have a destination address
// for jump instructions.
std::optional<uint64_t> call_dest;
// For unit testing.
bool operator==(const Row& other) const;
};
Disassembler();
~Disassembler();
// The ArchInfo pointer must outlive this class. Since typically this will come from the Session
// object which can destroy the LLVM context when the agent is disconnected, you will not want to
// store Disassembler objects.
Err Init(const ArchInfo* arch);
// Disassembles one machine instruction, setting the required information the to columns of the
// output vector. The output buffer will have columns for instruction, parameters, and comments.
// If addresses and bytes are requested, those will be prepended.
//
// The number of bytes consumed will be returned.
//
// Be sure the input buffer always has enough data for any instruction.
size_t DisassembleOne(const uint8_t* data, size_t data_len, uint64_t address,
const Options& options, Row* out) const;
// Disassembles the block, either until there is no more data, or |max_instructions| have been
// decoded. If max_instructions is 0 it will always decode the whole block.
//
// *Appends* the instructions to the output vector. The max_instructions applies to the total size
// of the output (so counts what may have already been there).
//
// The output will be one vector of columns per line. See DisassembleOne for row format.
size_t DisassembleMany(const uint8_t* data, size_t data_len, uint64_t start_address,
const Options& options, size_t max_instructions,
std::vector<Row>* out) const;
// Like DisassembleMany() but uses a MemoryDump object. The dump will start at the beginning of
// the memory dump. This function understands the addresses of the memory dump, and also invalid
// ranges (which will be marked in the disassembly).
//
// An unmapped range will be counted as one instruction. The memory addresses for unmapped ranges
// will always be shown even if disabled in the options.
size_t DisassembleDump(const MemoryDump& dump, uint64_t start_address, const Options& options,
size_t max_instructions, std::vector<Row>* out) const;
private:
// Computes the instruction type for the given instruction. If this is a direction call
// instruction it will compute the destination of the call.
//
// The address is the address of the beginning of the instruction. It is needed to decode relative
// addresses. The data and data_len indicates the array of bytes that makes up the instruction.
void FillInstructionInfo(uint64_t address, const uint8_t* data, uint64_t data_len,
const llvm::MCInst& inst, Row* row) const;
const ArchInfo* arch_ = nullptr;
std::unique_ptr<llvm::MCContext> context_;
std::unique_ptr<llvm::MCDisassembler> disasm_;
std::unique_ptr<llvm::MCInstPrinter> printer_;
FXL_DISALLOW_COPY_AND_ASSIGN(Disassembler);
};
// For unit testing.
std::ostream& operator<<(std::ostream& out, const Disassembler::Row& row);
} // namespace zxdb
#endif // SRC_DEVELOPER_DEBUG_ZXDB_CLIENT_DISASSEMBLER_H_