Added support for HSW TSX instructions:

XAXQUIRE
XRELEASE
XABORT
XBEGIN
XEND
XTEST

Also fixed a bug for CALL instruction (opcode 0xE8) - it allowed 16 bit operand with 0x66 prefix in 64 bit mode,
while 16 bit operand is not allowed at all in 64 bit mode.

Added X86_ACQREL prefix group for XACQUIRE/XRELEASE prefixes, since they need to be orthogonal to LOCKREP
prefixes, because TSX prefixes must come together with F0 (LOCK) prefix.

However this commit does not enforce using TSX hints only with instructions they are allowed to be used.
The reason for this is that lock prefix F0 itself is not enforced to be used only with lockable instructions, this seems to be a decision made by
Yasm developers, that user himself must take care of these situations.

Right now TSX hints can come with F0 prefix, can come with REPNE/REPZE prefixes, but they are used together in assembly, only the leftmost would be
encoded to the binary and warning will be issued. This is the behavior of Yasm for duplicate LOCKREP prefixes.
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py
index f3ced2a..6b7a333 100755
--- a/modules/arch/x86/gen_x86_insn.py
+++ b/modules/arch/x86/gen_x86_insn.py
@@ -39,7 +39,7 @@
     "SSE3", "SVM", "PadLock", "SSSE3", "SSE41", "SSE42", "SSE4a", "SSE5",
     "AVX", "FMA", "AES", "CLMUL", "MOVBE", "XOP", "FMA4", "F16C",
     "FSGSBASE", "RDRAND", "XSAVEOPT", "EPTVPID", "SMX", "AVX2", "BMI1",
-    "BMI2", "INVPCID", "LZCNT", "TBM"]
+    "BMI2", "INVPCID", "LZCNT", "TBM", "TSX"]
 unordered_cpu_features = ["Priv", "Prot", "Undoc", "Obs"]
 
 # Predefined VEX prefix field values
@@ -2264,7 +2264,7 @@
 
 add_group("call",
     opersize=16,
-    def_opersize_64=64,
+    not64=True, #there should not be 16bit call in 64bit mode
     opcode=[0xE8],
     operands=[Operand(type="Imm", size=16, tmod="Near", dest="JmpRel")])
 add_group("call",
@@ -7116,6 +7116,45 @@
 add_insn("vpgatherqd", "gather_32x_32y_128", modifiers=[0x91])
 
 #####################################################################
+# Intel TSX instructions
+#####################################################################
+add_prefix("xacquire",     "ACQREL",  0xF2)
+add_prefix("xrelease",     "ACQREL",  0xF3)
+
+add_group("tsx_xabort",
+    cpu=["TSX"],
+    opcode=[0xC6, 0xF8],
+    operands=[Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_insn("xabort", "tsx_xabort")
+
+
+
+add_group("tsx_xbegin",
+    cpu=["TSX"],
+    opcode=[0xC7, 0xF8],
+    operands=[Operand(type="Imm", size=32,  tmod="Near", dest="JmpRel")])
+
+add_group("tsx_xbegin",
+    cpu=["TSX"],
+    opersize=16,
+    not64=True, #there should not be 16bit xbegin in 64bit mode
+    opcode=[0xC7, 0xF8],
+    operands=[Operand(type="Imm", size=16,  tmod="Near", dest="JmpRel")])
+add_insn("xbegin", "tsx_xbegin")
+
+add_group("tsx_0x0F_0x01",
+    cpu=["TSX"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x01, 0x00],
+    operands=[])
+add_insn("xend", "tsx_0x0F_0x01", modifiers=[0xD5])
+add_insn("xtest", "tsx_0x0F_0x01", modifiers=[0xD6])
+
+
+
+
+
+#####################################################################
 # Intel FMA instructions
 #####################################################################
 
diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h
index 13f6c0e..5c3a6cf 100644
--- a/modules/arch/x86/x86arch.h
+++ b/modules/arch/x86/x86arch.h
@@ -84,6 +84,7 @@
 #define CPU_INVPCID 50      /* Intel INVPCID instruction */
 #define CPU_LZCNT   51      /* Intel LZCNT instruction */
 #define CPU_TBM     52      /* AMD TBM instruction */
+#define CPU_TSX     53      /* Intel TSX instructions */
 
 enum x86_parser_type {
     X86_PARSER_NASM = 0,
@@ -140,7 +141,8 @@
     X86_ADDRSIZE = 2<<8,
     X86_OPERSIZE = 3<<8,
     X86_SEGREG = 4<<8,
-    X86_REX = 5<<8
+    X86_REX = 5<<8,
+    X86_ACQREL = 6<<8     /*TSX hint prefixes*/
 } x86_parse_insn_prefix;
 
 typedef enum {
@@ -220,6 +222,8 @@
     unsigned char addrsize;         /* 0 or =mode_bits => no override */
     unsigned char opersize;         /* 0 or =mode_bits => no override */
     unsigned char lockrep_pre;      /* 0 indicates no prefix */
+    unsigned char acqrel_pre;      /* 0 indicates no prefix. We need this because
+                                   xqcuire/xrelease might require F0 prefix */
 
     unsigned char mode_bits;
 } x86_common;
diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c
index 1670df7..a668155 100644
--- a/modules/arch/x86/x86bc.c
+++ b/modules/arch/x86/x86bc.c
@@ -279,6 +279,24 @@
 
     for (i=0; i<num_prefixes; i++) {
         switch ((x86_parse_insn_prefix)(prefixes[i] & 0xff00)) {
+            /*To be accurate, we should enforce that TSX hints come only with a
+            predefined set of instructions, and in most cases only with F0
+            prefix. Otherwise they will have completely different semantics.
+            But F0 prefix can come only with a predefined set of instructions
+            too. And if it comes with other instructions, CPU will #UD.
+            Hence, F0-applicability should be enforced too. But it's not
+            currently. Maybe it is the decision made, that user should know
+            himself what he is doing with LOCK prefix. In this case, we should
+            not enforce TSX hints applicability too. And let user take care of
+            correct usage of TSX hints.
+            That is what we are going to do.*/
+            case X86_ACQREL:
+                if (common->acqrel_pre != 0)
+                    yasm_warn_set(YASM_WARN_GENERAL,
+                        N_("multiple XACQUIRE/XRELEASE prefixes, "
+                        "using leftmost"));
+                common->acqrel_pre = (unsigned char)prefixes[i] & 0xff;
+                break;
             case X86_LOCKREP:
                 if (common->lockrep_pre != 0)
                     yasm_warn_set(YASM_WARN_GENERAL,
@@ -395,11 +413,13 @@
 static void
 x86_common_print(const x86_common *common, FILE *f, int indent_level)
 {
-    fprintf(f, "%*sAddrSize=%u OperSize=%u LockRepPre=%02x BITS=%u\n",
+    fprintf(f, "%*sAddrSize=%u OperSize=%u LockRepPre=%02x "
+        "ACQREL_Pre=%02x BITS=%u\n",
             indent_level, "",
             (unsigned int)common->addrsize,
             (unsigned int)common->opersize,
             (unsigned int)common->lockrep_pre,
+            (unsigned int)common->acqrel_pre,
             (unsigned int)common->mode_bits);
 }
 
@@ -515,6 +535,9 @@
         len++;
     if (common->lockrep_pre != 0)
         len++;
+    if (common->acqrel_pre != 0)
+        len++;
+
 
     return len;
 }
@@ -791,6 +814,9 @@
         ((common->mode_bits != 64 && common->opersize != common->mode_bits) ||
          (common->mode_bits == 64 && common->opersize == 16)))
         YASM_WRITE_8(*bufp, 0x66);
+    /*TSX hints come before lock prefix*/
+    if (common->acqrel_pre != 0)
+        YASM_WRITE_8(*bufp, common->acqrel_pre);
     if (common->lockrep_pre != 0)
         YASM_WRITE_8(*bufp, common->lockrep_pre);
 }
diff --git a/modules/arch/x86/x86cpu.gperf b/modules/arch/x86/x86cpu.gperf
index cbdcf58..3d49574 100644
--- a/modules/arch/x86/x86cpu.gperf
+++ b/modules/arch/x86/x86cpu.gperf
@@ -390,6 +390,8 @@
 nosmx,		x86_cpu_clear,	CPU_SMX
 avx2,		x86_cpu_set,	CPU_AVX2
 noavx2,		x86_cpu_clear,	CPU_AVX2
+tsx,		x86_cpu_set,	CPU_TSX
+notsx,		x86_cpu_clear,	CPU_TSX
 bmi1,		x86_cpu_set,	CPU_BMI1
 nobmi1,		x86_cpu_clear,	CPU_BMI1
 bmi2,		x86_cpu_set,	CPU_BMI2
diff --git a/modules/arch/x86/x86id.c b/modules/arch/x86/x86id.c
index dc70026..b07c9fc 100644
--- a/modules/arch/x86/x86id.c
+++ b/modules/arch/x86/x86id.c
@@ -388,6 +388,7 @@
     common->addrsize = 0;
     common->opersize = info->opersize;
     common->lockrep_pre = 0;
+    common->acqrel_pre = 0;
     common->mode_bits = (unsigned char)mode_bits;
 }