Merge pull request #61 from fischman/master
Silence is golden: genperf emits too much stdout in success path
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py
index 4de9d6c..6b7a333 100755
--- a/modules/arch/x86/gen_x86_insn.py
+++ b/modules/arch/x86/gen_x86_insn.py
@@ -39,7 +39,7 @@
"SSE3", "SVM", "PadLock", "SSSE3", "SSE41", "SSE42", "SSE4a", "SSE5",
"AVX", "FMA", "AES", "CLMUL", "MOVBE", "XOP", "FMA4", "F16C",
"FSGSBASE", "RDRAND", "XSAVEOPT", "EPTVPID", "SMX", "AVX2", "BMI1",
- "BMI2", "INVPCID", "LZCNT"]
+ "BMI2", "INVPCID", "LZCNT", "TBM", "TSX"]
unordered_cpu_features = ["Priv", "Prot", "Undoc", "Obs"]
# Predefined VEX prefix field values
@@ -2264,7 +2264,7 @@
add_group("call",
opersize=16,
- def_opersize_64=64,
+ not64=True, #there should not be 16bit call in 64bit mode
opcode=[0xE8],
operands=[Operand(type="Imm", size=16, tmod="Near", dest="JmpRel")])
add_group("call",
@@ -7116,6 +7116,45 @@
add_insn("vpgatherqd", "gather_32x_32y_128", modifiers=[0x91])
#####################################################################
+# Intel TSX instructions
+#####################################################################
+add_prefix("xacquire", "ACQREL", 0xF2)
+add_prefix("xrelease", "ACQREL", 0xF3)
+
+add_group("tsx_xabort",
+ cpu=["TSX"],
+ opcode=[0xC6, 0xF8],
+ operands=[Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_insn("xabort", "tsx_xabort")
+
+
+
+add_group("tsx_xbegin",
+ cpu=["TSX"],
+ opcode=[0xC7, 0xF8],
+ operands=[Operand(type="Imm", size=32, tmod="Near", dest="JmpRel")])
+
+add_group("tsx_xbegin",
+ cpu=["TSX"],
+ opersize=16,
+ not64=True, #there should not be 16bit xbegin in 64bit mode
+ opcode=[0xC7, 0xF8],
+ operands=[Operand(type="Imm", size=16, tmod="Near", dest="JmpRel")])
+add_insn("xbegin", "tsx_xbegin")
+
+add_group("tsx_0x0F_0x01",
+ cpu=["TSX"],
+ modifiers=["Op2Add"],
+ opcode=[0x0F, 0x01, 0x00],
+ operands=[])
+add_insn("xend", "tsx_0x0F_0x01", modifiers=[0xD5])
+add_insn("xtest", "tsx_0x0F_0x01", modifiers=[0xD6])
+
+
+
+
+
+#####################################################################
# Intel FMA instructions
#####################################################################
@@ -7961,8 +8000,6 @@
add_insn("bzhi", "vex_gpr_reg_rm_nds_0F", modifiers=[0x00, 0x38, 0xF5],
cpu=["BMI2"])
-add_insn("bextr","vex_gpr_reg_rm_nds_0F", modifiers=[0x00, 0x38, 0xF7],
- cpu=["BMI1"])
add_insn("shlx", "vex_gpr_reg_rm_nds_0F", modifiers=[0x66, 0x38, 0xF7],
cpu=["BMI2"])
add_insn("shrx", "vex_gpr_reg_rm_nds_0F", modifiers=[0xF2, 0x38, 0xF7],
@@ -7973,7 +8010,31 @@
add_insn("mulx", "vex_gpr_reg_nds_rm_0F", modifiers=[0xF2, 0x38, 0xF6],
cpu=["BMI2"])
+for sfx, sz in zip("lq", [32, 64]): # no 16-bit forms
+ add_group("bextr",
+ cpu=["BMI1"],
+ suffix=sfx,
+ opersize=sz,
+ prefix=0x00,
+ opcode=[0x0F, 0x38, 0xF7],
+ vex=0,
+ operands=[Operand(type="Reg", size=sz, dest="Spare"),
+ Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+ Operand(type="Reg", size=sz, dest="VEX")])
+ add_group("bextr", # TBM alternate form of bextr
+ cpu=["TBM"],
+ suffix=sfx,
+ opersize=sz,
+ prefix=0x00,
+ opcode=[0x0A, 0x10],
+ xop=128,
+ xopw=(sz==64),
+ onlyavx=True,
+ operands=[Operand(type="Reg", size=sz, dest="Spare"),
+ Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+ Operand(type="Imm", size=32, relaxed=True, dest="Imm")])
+add_insn("bextr", "bextr")
#####################################################################
# Intel INVPCID instruction
@@ -7996,6 +8057,33 @@
add_insn("invpcid", "invpcid")
#####################################################################
+# AMD trailing bit manipulation (TBM)
+#####################################################################
+
+for sfx, sz in zip("lq", [32, 64]): # no 16-bit forms
+ add_group("xop_gpr_reg_rm_09",
+ cpu=["TBM"],
+ suffix=sfx,
+ modifiers=["Op1Add","SpAdd"],
+ opersize=sz,
+ prefix=0x00,
+ opcode=[0x09, 0x00],
+ xop=128,
+ xopw=(sz==64),
+ operands=[Operand(type="Reg", size=sz, dest="VEX"),
+ Operand(type="RM", size=sz, relaxed=True, dest="EA")])
+
+add_insn("blcfill", "xop_gpr_reg_rm_09", modifiers=[0x01, 1])
+add_insn("blci", "xop_gpr_reg_rm_09", modifiers=[0x02, 6])
+add_insn("blcic", "xop_gpr_reg_rm_09", modifiers=[0x01, 5])
+add_insn("blcmsk", "xop_gpr_reg_rm_09", modifiers=[0x02, 1])
+add_insn("blcs", "xop_gpr_reg_rm_09", modifiers=[0x01, 3])
+add_insn("blsfill", "xop_gpr_reg_rm_09", modifiers=[0x01, 2])
+add_insn("blsic", "xop_gpr_reg_rm_09", modifiers=[0x01, 6])
+add_insn("t1mskc", "xop_gpr_reg_rm_09", modifiers=[0x01, 7])
+add_insn("tzmsk", "xop_gpr_reg_rm_09", modifiers=[0x01, 4])
+
+#####################################################################
# AMD 3DNow! instructions
#####################################################################
diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h
index 0c387f7..5c3a6cf 100644
--- a/modules/arch/x86/x86arch.h
+++ b/modules/arch/x86/x86arch.h
@@ -83,6 +83,8 @@
#define CPU_BMI2 49 /* Intel BMI2 instructions */
#define CPU_INVPCID 50 /* Intel INVPCID instruction */
#define CPU_LZCNT 51 /* Intel LZCNT instruction */
+#define CPU_TBM 52 /* AMD TBM instruction */
+#define CPU_TSX 53 /* Intel TSX instructions */
enum x86_parser_type {
X86_PARSER_NASM = 0,
@@ -139,7 +141,8 @@
X86_ADDRSIZE = 2<<8,
X86_OPERSIZE = 3<<8,
X86_SEGREG = 4<<8,
- X86_REX = 5<<8
+ X86_REX = 5<<8,
+ X86_ACQREL = 6<<8 /*TSX hint prefixes*/
} x86_parse_insn_prefix;
typedef enum {
@@ -219,6 +222,8 @@
unsigned char addrsize; /* 0 or =mode_bits => no override */
unsigned char opersize; /* 0 or =mode_bits => no override */
unsigned char lockrep_pre; /* 0 indicates no prefix */
+ unsigned char acqrel_pre; /* 0 indicates no prefix. We need this because
+ xqcuire/xrelease might require F0 prefix */
unsigned char mode_bits;
} x86_common;
diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c
index 1670df7..a668155 100644
--- a/modules/arch/x86/x86bc.c
+++ b/modules/arch/x86/x86bc.c
@@ -279,6 +279,24 @@
for (i=0; i<num_prefixes; i++) {
switch ((x86_parse_insn_prefix)(prefixes[i] & 0xff00)) {
+ /*To be accurate, we should enforce that TSX hints come only with a
+ predefined set of instructions, and in most cases only with F0
+ prefix. Otherwise they will have completely different semantics.
+ But F0 prefix can come only with a predefined set of instructions
+ too. And if it comes with other instructions, CPU will #UD.
+ Hence, F0-applicability should be enforced too. But it's not
+ currently. Maybe it is the decision made, that user should know
+ himself what he is doing with LOCK prefix. In this case, we should
+ not enforce TSX hints applicability too. And let user take care of
+ correct usage of TSX hints.
+ That is what we are going to do.*/
+ case X86_ACQREL:
+ if (common->acqrel_pre != 0)
+ yasm_warn_set(YASM_WARN_GENERAL,
+ N_("multiple XACQUIRE/XRELEASE prefixes, "
+ "using leftmost"));
+ common->acqrel_pre = (unsigned char)prefixes[i] & 0xff;
+ break;
case X86_LOCKREP:
if (common->lockrep_pre != 0)
yasm_warn_set(YASM_WARN_GENERAL,
@@ -395,11 +413,13 @@
static void
x86_common_print(const x86_common *common, FILE *f, int indent_level)
{
- fprintf(f, "%*sAddrSize=%u OperSize=%u LockRepPre=%02x BITS=%u\n",
+ fprintf(f, "%*sAddrSize=%u OperSize=%u LockRepPre=%02x "
+ "ACQREL_Pre=%02x BITS=%u\n",
indent_level, "",
(unsigned int)common->addrsize,
(unsigned int)common->opersize,
(unsigned int)common->lockrep_pre,
+ (unsigned int)common->acqrel_pre,
(unsigned int)common->mode_bits);
}
@@ -515,6 +535,9 @@
len++;
if (common->lockrep_pre != 0)
len++;
+ if (common->acqrel_pre != 0)
+ len++;
+
return len;
}
@@ -791,6 +814,9 @@
((common->mode_bits != 64 && common->opersize != common->mode_bits) ||
(common->mode_bits == 64 && common->opersize == 16)))
YASM_WRITE_8(*bufp, 0x66);
+ /*TSX hints come before lock prefix*/
+ if (common->acqrel_pre != 0)
+ YASM_WRITE_8(*bufp, common->acqrel_pre);
if (common->lockrep_pre != 0)
YASM_WRITE_8(*bufp, common->lockrep_pre);
}
diff --git a/modules/arch/x86/x86cpu.gperf b/modules/arch/x86/x86cpu.gperf
index 669a588..3d49574 100644
--- a/modules/arch/x86/x86cpu.gperf
+++ b/modules/arch/x86/x86cpu.gperf
@@ -390,6 +390,8 @@
nosmx, x86_cpu_clear, CPU_SMX
avx2, x86_cpu_set, CPU_AVX2
noavx2, x86_cpu_clear, CPU_AVX2
+tsx, x86_cpu_set, CPU_TSX
+notsx, x86_cpu_clear, CPU_TSX
bmi1, x86_cpu_set, CPU_BMI1
nobmi1, x86_cpu_clear, CPU_BMI1
bmi2, x86_cpu_set, CPU_BMI2
@@ -398,6 +400,8 @@
noinvpcid, x86_cpu_clear, CPU_INVPCID
lzcnt, x86_cpu_set, CPU_LZCNT
nolzcnt, x86_cpu_clear, CPU_LZCNT
+tbm, x86_cpu_set, CPU_TBM
+notbm, x86_cpu_clear, CPU_TBM
# Change NOP patterns
basicnop, x86_nop, X86_NOP_BASIC
intelnop, x86_nop, X86_NOP_INTEL
diff --git a/modules/arch/x86/x86id.c b/modules/arch/x86/x86id.c
index 3f645b8..b07c9fc 100644
--- a/modules/arch/x86/x86id.c
+++ b/modules/arch/x86/x86id.c
@@ -388,6 +388,7 @@
common->addrsize = 0;
common->opersize = info->opersize;
common->lockrep_pre = 0;
+ common->acqrel_pre = 0;
common->mode_bits = (unsigned char)mode_bits;
}
@@ -1603,8 +1604,9 @@
* Leave R=X=B=1 for now.
*/
if (insn->opcode.opcode[0] != 0x08 &&
- insn->opcode.opcode[0] != 0x09)
- yasm_internal_error(N_("first opcode byte of XOP must be 0x08 or 0x09"));
+ insn->opcode.opcode[0] != 0x09 &&
+ insn->opcode.opcode[0] != 0x0A)
+ yasm_internal_error(N_("first opcode byte of XOP must be 0x08, 0x09, or 0x0A"));
vex1 |= insn->opcode.opcode[0];
/* Move opcode byte back one byte to make room for XOP prefix. */
insn->opcode.opcode[2] = insn->opcode.opcode[1];