arch/X86/X86Disassembler.h - third_party/capstone - Git at Google

 //===-- X86Disassembler.h - Disassembler for x86 and x86_64 -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
 // 64-bit X86 instruction sets.  The main decode sequence for an assembly
 // instruction in this disassembler is:
 //
 // 1. Read the prefix bytes and determine the attributes of the instruction.
 //    These attributes, recorded in enum attributeBits
 //    (X86DisassemblerDecoderCommon.h), form a bitmask.  The table CONTEXTS_SYM
 //    provides a mapping from bitmasks to contexts, which are represented by
 //    enum InstructionContext (ibid.).
 //
 // 2. Read the opcode, and determine what kind of opcode it is.  The
 //    disassembler distinguishes four kinds of opcodes, which are enumerated in
 //    OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
 //    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
 //    (0x0f 0x3a 0xnn).  Mandatory prefixes are treated as part of the context.
 //
 // 3. Depending on the opcode type, look in one of four ClassDecision structures
 //    (X86DisassemblerDecoderCommon.h).  Use the opcode class to determine which
 //    OpcodeDecision (ibid.) to look the opcode in.  Look up the opcode, to get
 //    a ModRMDecision (ibid.).
 //
 // 4. Some instructions, such as escape opcodes or extended opcodes, or even
 //    instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
 //    ModR/M byte to complete decode.  The ModRMDecision's type is an entry from
 //    ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
 //    ModR/M byte is required and how to interpret it.
 //
 // 5. After resolving the ModRMDecision, the disassembler has a unique ID
 //    of type InstrUID (X86DisassemblerDecoderCommon.h).  Looking this ID up in
 //    INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
 //    meanings of its operands.
 //
 // 6. For each operand, its encoding is an entry from OperandEncoding
 //    (X86DisassemblerDecoderCommon.h) and its type is an entry from
 //    OperandType (ibid.).  The encoding indicates how to read it from the
 //    instruction; the type indicates how to interpret the value once it has
 //    been read.  For example, a register operand could be stored in the R/M
 //    field of the ModR/M byte, the REG field of the ModR/M byte, or added to
 //    the main opcode.  This is orthogonal from its meaning (an GPR or an XMM
 //    register, for instance).  Given this information, the operands can be
 //    extracted and interpreted.
 //
 // 7. As the last step, the disassembler translates the instruction information
 //    and operands into a format understandable by the client - in this case, an
 //    MCInst for use by the MC infrastructure.
 //
 // The disassembler is broken broadly into two parts: the table emitter that
 // emits the instruction decode tables discussed above during compilation, and
 // the disassembler itself.  The table emitter is documented in more detail in
 // utils/TableGen/X86DisassemblerEmitter.h.
 //
 // X86Disassembler.h contains the public interface for the disassembler,
 //   adhering to the MCDisassembler interface.
 // X86Disassembler.cpp contains the code responsible for step 7, and for
 //   invoking the decoder to execute steps 1-6.
 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the
 //   table emitter and the disassembler.
 // X86DisassemblerDecoder.h contains the public interface of the decoder,
 //   factored out into C for possible use by other projects.
 // X86DisassemblerDecoder.c contains the source code of the decoder, which is
 //   responsible for steps 1-6.
 //
 //===----------------------------------------------------------------------===//

 /* Capstone Disassembly Engine */
 /* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2015 */

 #ifndef CS_X86_DISASSEMBLER_H
 #define CS_X86_DISASSEMBLER_H

 #include "capstone/capstone.h"

 #include "../../MCInst.h"

 #include "../../MCRegisterInfo.h"
 #include "X86DisassemblerDecoderCommon.h"

 bool X86_getInstruction(csh handle, const uint8_t *code, size_t code_len,
 		MCInst *instr, uint16_t *size, uint64_t address, void *info);

 void X86_init(MCRegisterInfo *MRI);

 #endif
	//===-- X86Disassembler.h - Disassembler for x86 and x86_64 ------ C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
	// 64-bit X86 instruction sets. The main decode sequence for an assembly
	// instruction in this disassembler is:
	//
	// 1. Read the prefix bytes and determine the attributes of the instruction.
	// These attributes, recorded in enum attributeBits
	// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
	// provides a mapping from bitmasks to contexts, which are represented by
	// enum InstructionContext (ibid.).
	//
	// 2. Read the opcode, and determine what kind of opcode it is. The
	// disassembler distinguishes four kinds of opcodes, which are enumerated in
	// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
	// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
	// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
	//
	// 3. Depending on the opcode type, look in one of four ClassDecision structures
	// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
	// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
	// a ModRMDecision (ibid.).
	//
	// 4. Some instructions, such as escape opcodes or extended opcodes, or even
	// instructions that have ModRMReg / ModRMMem forms in LLVM, need the
	// ModR/M byte to complete decode. The ModRMDecision's type is an entry from
	// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
	// ModR/M byte is required and how to interpret it.
	//
	// 5. After resolving the ModRMDecision, the disassembler has a unique ID
	// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
	// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
	// meanings of its operands.
	//
	// 6. For each operand, its encoding is an entry from OperandEncoding
	// (X86DisassemblerDecoderCommon.h) and its type is an entry from
	// OperandType (ibid.). The encoding indicates how to read it from the
	// instruction; the type indicates how to interpret the value once it has
	// been read. For example, a register operand could be stored in the R/M
	// field of the ModR/M byte, the REG field of the ModR/M byte, or added to
	// the main opcode. This is orthogonal from its meaning (an GPR or an XMM
	// register, for instance). Given this information, the operands can be
	// extracted and interpreted.
	//
	// 7. As the last step, the disassembler translates the instruction information
	// and operands into a format understandable by the client - in this case, an
	// MCInst for use by the MC infrastructure.
	//
	// The disassembler is broken broadly into two parts: the table emitter that
	// emits the instruction decode tables discussed above during compilation, and
	// the disassembler itself. The table emitter is documented in more detail in
	// utils/TableGen/X86DisassemblerEmitter.h.
	//
	// X86Disassembler.h contains the public interface for the disassembler,
	// adhering to the MCDisassembler interface.
	// X86Disassembler.cpp contains the code responsible for step 7, and for
	// invoking the decoder to execute steps 1-6.
	// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
	// table emitter and the disassembler.
	// X86DisassemblerDecoder.h contains the public interface of the decoder,
	// factored out into C for possible use by other projects.
	// X86DisassemblerDecoder.c contains the source code of the decoder, which is
	// responsible for steps 1-6.
	//
	//===----------------------------------------------------------------------===//

	/* Capstone Disassembly Engine */
	/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2015 */

	#ifndef CS_X86_DISASSEMBLER_H
	#define CS_X86_DISASSEMBLER_H

	#include "capstone/capstone.h"

	#include "../../MCInst.h"

	#include "../../MCRegisterInfo.h"
	#include "X86DisassemblerDecoderCommon.h"

	bool X86_getInstruction(csh handle, const uint8_t *code, size_t code_len,
	MCInst instr, uint16_t size, uint64_t address, void *info);

	void X86_init(MCRegisterInfo *MRI);

	#endif