Add most Intel AVX2 instructions.

Reference: http://www.intel.com/software/avx rev11 spec

This is all AVX2 instructions except for VGATHER*/VPGATHER*, which
require additional ModRM handling.

Portions contributed by: Mark Charney <mark.charney@intel.com>

Part of [#227].
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py
index ded19ea..5dec9e8 100755
--- a/modules/arch/x86/gen_x86_insn.py
+++ b/modules/arch/x86/gen_x86_insn.py
@@ -3997,44 +3997,44 @@
 add_insn("pxor",      "mmxsse2", modifiers=[0xEF])
 
 # AVX versions don't support the MMX registers
-add_insn("vpackssdw",  "xmm_xmm128", modifiers=[0x66, 0x6B, VEXL0], avx=True)
-add_insn("vpacksswb",  "xmm_xmm128", modifiers=[0x66, 0x63, VEXL0], avx=True)
-add_insn("vpackuswb",  "xmm_xmm128", modifiers=[0x66, 0x67, VEXL0], avx=True)
-add_insn("vpaddb",     "xmm_xmm128", modifiers=[0x66, 0xFC, VEXL0], avx=True)
-add_insn("vpaddw",     "xmm_xmm128", modifiers=[0x66, 0xFD, VEXL0], avx=True)
-add_insn("vpaddd",     "xmm_xmm128", modifiers=[0x66, 0xFE, VEXL0], avx=True)
-add_insn("vpaddq",     "xmm_xmm128", modifiers=[0x66, 0xD4, VEXL0], avx=True)
-add_insn("vpaddsb",    "xmm_xmm128", modifiers=[0x66, 0xEC, VEXL0], avx=True)
-add_insn("vpaddsw",    "xmm_xmm128", modifiers=[0x66, 0xED, VEXL0], avx=True)
-add_insn("vpaddusb",   "xmm_xmm128", modifiers=[0x66, 0xDC, VEXL0], avx=True)
-add_insn("vpaddusw",   "xmm_xmm128", modifiers=[0x66, 0xDD, VEXL0], avx=True)
-add_insn("vpand",      "xmm_xmm128", modifiers=[0x66, 0xDB, VEXL0], avx=True)
-add_insn("vpandn",     "xmm_xmm128", modifiers=[0x66, 0xDF, VEXL0], avx=True)
-add_insn("vpcmpeqb",   "xmm_xmm128", modifiers=[0x66, 0x74, VEXL0], avx=True)
-add_insn("vpcmpeqw",   "xmm_xmm128", modifiers=[0x66, 0x75, VEXL0], avx=True)
-add_insn("vpcmpeqd",   "xmm_xmm128", modifiers=[0x66, 0x76, VEXL0], avx=True)
-add_insn("vpcmpgtb",   "xmm_xmm128", modifiers=[0x66, 0x64, VEXL0], avx=True)
-add_insn("vpcmpgtw",   "xmm_xmm128", modifiers=[0x66, 0x65, VEXL0], avx=True)
-add_insn("vpcmpgtd",   "xmm_xmm128", modifiers=[0x66, 0x66, VEXL0], avx=True)
-add_insn("vpmaddwd",   "xmm_xmm128", modifiers=[0x66, 0xF5, VEXL0], avx=True)
-add_insn("vpmulhw",    "xmm_xmm128", modifiers=[0x66, 0xE5, VEXL0], avx=True)
-add_insn("vpmullw",    "xmm_xmm128", modifiers=[0x66, 0xD5, VEXL0], avx=True)
-add_insn("vpor",       "xmm_xmm128", modifiers=[0x66, 0xEB, VEXL0], avx=True)
-add_insn("vpsubb",     "xmm_xmm128", modifiers=[0x66, 0xF8, VEXL0], avx=True)
-add_insn("vpsubw",     "xmm_xmm128", modifiers=[0x66, 0xF9, VEXL0], avx=True)
-add_insn("vpsubd",     "xmm_xmm128", modifiers=[0x66, 0xFA, VEXL0], avx=True)
-add_insn("vpsubq",     "xmm_xmm128", modifiers=[0x66, 0xFB, VEXL0], avx=True)
-add_insn("vpsubsb",    "xmm_xmm128", modifiers=[0x66, 0xE8, VEXL0], avx=True)
-add_insn("vpsubsw",    "xmm_xmm128", modifiers=[0x66, 0xE9, VEXL0], avx=True)
-add_insn("vpsubusb",   "xmm_xmm128", modifiers=[0x66, 0xD8, VEXL0], avx=True)
-add_insn("vpsubusw",   "xmm_xmm128", modifiers=[0x66, 0xD9, VEXL0], avx=True)
-add_insn("vpunpckhbw", "xmm_xmm128", modifiers=[0x66, 0x68, VEXL0], avx=True)
-add_insn("vpunpckhwd", "xmm_xmm128", modifiers=[0x66, 0x69, VEXL0], avx=True)
-add_insn("vpunpckhdq", "xmm_xmm128", modifiers=[0x66, 0x6A, VEXL0], avx=True)
-add_insn("vpunpcklbw", "xmm_xmm128", modifiers=[0x66, 0x60, VEXL0], avx=True)
-add_insn("vpunpcklwd", "xmm_xmm128", modifiers=[0x66, 0x61, VEXL0], avx=True)
-add_insn("vpunpckldq", "xmm_xmm128", modifiers=[0x66, 0x62, VEXL0], avx=True)
-add_insn("vpxor",      "xmm_xmm128", modifiers=[0x66, 0xEF, VEXL0], avx=True)
+add_insn("vpackssdw",  "xmm_xmm128_256avx2", modifiers=[0x66, 0x6B, VEXL0], avx=True)
+add_insn("vpacksswb",  "xmm_xmm128_256avx2", modifiers=[0x66, 0x63, VEXL0], avx=True)
+add_insn("vpackuswb",  "xmm_xmm128_256avx2", modifiers=[0x66, 0x67, VEXL0], avx=True)
+add_insn("vpaddb",     "xmm_xmm128_256avx2", modifiers=[0x66, 0xFC, VEXL0], avx=True)
+add_insn("vpaddw",     "xmm_xmm128_256avx2", modifiers=[0x66, 0xFD, VEXL0], avx=True)
+add_insn("vpaddd",     "xmm_xmm128_256avx2", modifiers=[0x66, 0xFE, VEXL0], avx=True)
+add_insn("vpaddq",     "xmm_xmm128_256avx2", modifiers=[0x66, 0xD4, VEXL0], avx=True)
+add_insn("vpaddsb",    "xmm_xmm128_256avx2", modifiers=[0x66, 0xEC, VEXL0], avx=True)
+add_insn("vpaddsw",    "xmm_xmm128_256avx2", modifiers=[0x66, 0xED, VEXL0], avx=True)
+add_insn("vpaddusb",   "xmm_xmm128_256avx2", modifiers=[0x66, 0xDC, VEXL0], avx=True)
+add_insn("vpaddusw",   "xmm_xmm128_256avx2", modifiers=[0x66, 0xDD, VEXL0], avx=True)
+add_insn("vpand",      "xmm_xmm128_256avx2", modifiers=[0x66, 0xDB, VEXL0], avx=True)
+add_insn("vpandn",     "xmm_xmm128_256avx2", modifiers=[0x66, 0xDF, VEXL0], avx=True)
+add_insn("vpcmpeqb",   "xmm_xmm128_256avx2", modifiers=[0x66, 0x74, VEXL0], avx=True)
+add_insn("vpcmpeqw",   "xmm_xmm128_256avx2", modifiers=[0x66, 0x75, VEXL0], avx=True)
+add_insn("vpcmpeqd",   "xmm_xmm128_256avx2", modifiers=[0x66, 0x76, VEXL0], avx=True)
+add_insn("vpcmpgtb",   "xmm_xmm128_256avx2", modifiers=[0x66, 0x64, VEXL0], avx=True)
+add_insn("vpcmpgtw",   "xmm_xmm128_256avx2", modifiers=[0x66, 0x65, VEXL0], avx=True)
+add_insn("vpcmpgtd",   "xmm_xmm128_256avx2", modifiers=[0x66, 0x66, VEXL0], avx=True)
+add_insn("vpmaddwd",   "xmm_xmm128_256avx2", modifiers=[0x66, 0xF5, VEXL0], avx=True)
+add_insn("vpmulhw",    "xmm_xmm128_256avx2", modifiers=[0x66, 0xE5, VEXL0], avx=True)
+add_insn("vpmullw",    "xmm_xmm128_256avx2", modifiers=[0x66, 0xD5, VEXL0], avx=True)
+add_insn("vpor",       "xmm_xmm128_256avx2", modifiers=[0x66, 0xEB, VEXL0], avx=True)
+add_insn("vpsubb",     "xmm_xmm128_256avx2", modifiers=[0x66, 0xF8, VEXL0], avx=True)
+add_insn("vpsubw",     "xmm_xmm128_256avx2", modifiers=[0x66, 0xF9, VEXL0], avx=True)
+add_insn("vpsubd",     "xmm_xmm128_256avx2", modifiers=[0x66, 0xFA, VEXL0], avx=True)
+add_insn("vpsubq",     "xmm_xmm128_256avx2", modifiers=[0x66, 0xFB, VEXL0], avx=True)
+add_insn("vpsubsb",    "xmm_xmm128_256avx2", modifiers=[0x66, 0xE8, VEXL0], avx=True)
+add_insn("vpsubsw",    "xmm_xmm128_256avx2", modifiers=[0x66, 0xE9, VEXL0], avx=True)
+add_insn("vpsubusb",   "xmm_xmm128_256avx2", modifiers=[0x66, 0xD8, VEXL0], avx=True)
+add_insn("vpsubusw",   "xmm_xmm128_256avx2", modifiers=[0x66, 0xD9, VEXL0], avx=True)
+add_insn("vpunpckhbw", "xmm_xmm128_256avx2", modifiers=[0x66, 0x68, VEXL0], avx=True)
+add_insn("vpunpckhwd", "xmm_xmm128_256avx2", modifiers=[0x66, 0x69, VEXL0], avx=True)
+add_insn("vpunpckhdq", "xmm_xmm128_256avx2", modifiers=[0x66, 0x6A, VEXL0], avx=True)
+add_insn("vpunpcklbw", "xmm_xmm128_256avx2", modifiers=[0x66, 0x60, VEXL0], avx=True)
+add_insn("vpunpcklwd", "xmm_xmm128_256avx2", modifiers=[0x66, 0x61, VEXL0], avx=True)
+add_insn("vpunpckldq", "xmm_xmm128_256avx2", modifiers=[0x66, 0x62, VEXL0], avx=True)
+add_insn("vpxor",      "xmm_xmm128_256avx2", modifiers=[0x66, 0xEF, VEXL0], avx=True)
 
 add_group("pshift",
     cpu=["MMX"],
@@ -4075,42 +4075,43 @@
 add_insn("psrlq", "pshift", modifiers=[0xD3, 0x73, 2])
 
 # Ran out of modifiers, so AVX has to be separate
-add_group("vpshift",
-    cpu=["AVX"],
-    modifiers=["Op1Add"],
-    vex=128,
-    prefix=0x66,
-    opcode=[0x0F, 0x00],
-    operands=[Operand(type="SIMDReg", size=128, dest="SpareVEX"),
-              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
-add_group("vpshift",
-    cpu=["AVX"],
-    modifiers=["Gap", "Op1Add", "SpAdd"],
-    vex=128,
-    prefix=0x66,
-    opcode=[0x0F, 0x00],
-    spare=0,
-    operands=[Operand(type="SIMDReg", size=128, dest="EAVEX"),
-              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
-add_group("vpshift",
-    cpu=["AVX"],
-    modifiers=["Op1Add"],
-    vex=128,
-    prefix=0x66,
-    opcode=[0x0F, 0x00],
-    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
-              Operand(type="SIMDReg", size=128, dest="VEX"),
-              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
-add_group("vpshift",
-    cpu=["AVX"],
-    modifiers=["Gap", "Op1Add", "SpAdd"],
-    vex=128,
-    prefix=0x66,
-    opcode=[0x0F, 0x00],
-    spare=0,
-    operands=[Operand(type="SIMDReg", size=128, dest="VEX"),
-              Operand(type="SIMDReg", size=128, dest="EA"),
-              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+for cpu, sz in zip(["AVX", "AVX2"], [128, 256]):
+    add_group("vpshift",
+        cpu=[cpu],
+        modifiers=["Op1Add"],
+        vex=sz,
+        prefix=0x66,
+        opcode=[0x0F, 0x00],
+        operands=[Operand(type="SIMDReg", size=sz, dest="SpareVEX"),
+                  Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+    add_group("vpshift",
+        cpu=[cpu],
+        modifiers=["Gap", "Op1Add", "SpAdd"],
+        vex=sz,
+        prefix=0x66,
+        opcode=[0x0F, 0x00],
+        spare=0,
+        operands=[Operand(type="SIMDReg", size=sz, dest="EAVEX"),
+                  Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+    add_group("vpshift",
+        cpu=[cpu],
+        modifiers=["Op1Add"],
+        vex=sz,
+        prefix=0x66,
+        opcode=[0x0F, 0x00],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="SIMDReg", size=sz, dest="VEX"),
+                  Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+    add_group("vpshift",
+        cpu=[cpu],
+        modifiers=["Gap", "Op1Add", "SpAdd"],
+        vex=sz,
+        prefix=0x66,
+        opcode=[0x0F, 0x00],
+        spare=0,
+        operands=[Operand(type="SIMDReg", size=sz, dest="VEX"),
+                  Operand(type="SIMDReg", size=sz, dest="EA"),
+                  Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
 
 add_insn("vpsllw", "vpshift", modifiers=[0xF1, 0x71, 6])
 add_insn("vpslld", "vpshift", modifiers=[0xF2, 0x72, 6])
@@ -4134,14 +4135,14 @@
 add_insn("psadbw",  "mmxsse2", modifiers=[0xF6], cpu=["P3", "MMX"])
 
 # AVX versions don't support MMX register
-add_insn("vpavgb",   "xmm_xmm128", modifiers=[0x66, 0xE0, VEXL0], avx=True)
-add_insn("vpavgw",   "xmm_xmm128", modifiers=[0x66, 0xE3, VEXL0], avx=True)
-add_insn("vpmaxsw",  "xmm_xmm128", modifiers=[0x66, 0xEE, VEXL0], avx=True)
-add_insn("vpmaxub",  "xmm_xmm128", modifiers=[0x66, 0xDE, VEXL0], avx=True)
-add_insn("vpminsw",  "xmm_xmm128", modifiers=[0x66, 0xEA, VEXL0], avx=True)
-add_insn("vpminub",  "xmm_xmm128", modifiers=[0x66, 0xDA, VEXL0], avx=True)
-add_insn("vpmulhuw", "xmm_xmm128", modifiers=[0x66, 0xE4, VEXL0], avx=True)
-add_insn("vpsadbw",  "xmm_xmm128", modifiers=[0x66, 0xF6, VEXL0], avx=True)
+add_insn("vpavgb",   "xmm_xmm128_256avx2", modifiers=[0x66, 0xE0, VEXL0], avx=True)
+add_insn("vpavgw",   "xmm_xmm128_256avx2", modifiers=[0x66, 0xE3, VEXL0], avx=True)
+add_insn("vpmaxsw",  "xmm_xmm128_256avx2", modifiers=[0x66, 0xEE, VEXL0], avx=True)
+add_insn("vpmaxub",  "xmm_xmm128_256avx2", modifiers=[0x66, 0xDE, VEXL0], avx=True)
+add_insn("vpminsw",  "xmm_xmm128_256avx2", modifiers=[0x66, 0xEA, VEXL0], avx=True)
+add_insn("vpminub",  "xmm_xmm128_256avx2", modifiers=[0x66, 0xDA, VEXL0], avx=True)
+add_insn("vpmulhuw", "xmm_xmm128_256avx2", modifiers=[0x66, 0xE4, VEXL0], avx=True)
+add_insn("vpsadbw",  "xmm_xmm128_256avx2", modifiers=[0x66, 0xF6, VEXL0], avx=True)
 
 add_insn("prefetchnta", "twobytemem", modifiers=[0, 0x0F, 0x18], cpu=["P3"])
 add_insn("prefetcht0", "twobytemem", modifiers=[1, 0x0F, 0x18], cpu=["P3"])
@@ -4172,6 +4173,49 @@
     vex=256,
     prefix=0x00,
     opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
+add_group("xmm_xmm128_256",
+    cpu=["AVX"],
+    modifiers=["PreAdd", "Op1Add"],
+    vex=256,
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+              Operand(type="SIMDReg", size=256, dest="VEX"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
+
+# Same as above, except 256-bit version only available in AVX2
+add_group("xmm_xmm128_256avx2",
+    cpu=["SSE"],
+    modifiers=["PreAdd", "Op1Add", "SetVEX"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="SpareVEX"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("xmm_xmm128_256avx2",
+    cpu=["AVX"],
+    modifiers=["PreAdd", "Op1Add"],
+    vex=128,
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="VEX"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("xmm_xmm128_256avx2",
+    cpu=["AVX2"],
+    modifiers=["PreAdd", "Op1Add"],
+    vex=256,
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
+add_group("xmm_xmm128_256avx2",
+    cpu=["AVX2"],
+    modifiers=["PreAdd", "Op1Add"],
+    vex=256,
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
     operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
               Operand(type="SIMDReg", size=256, dest="VEX"),
               Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
@@ -4526,6 +4570,23 @@
 add_insn("cmpps", "xmm_xmm128_imm", modifiers=[0, 0xC2])
 add_insn("shufps", "xmm_xmm128_imm", modifiers=[0, 0xC6])
 
+# YMM register AVX2 version of above
+add_group("xmm_xmm128_imm_256avx2",
+    cpu=["SSE"],
+    modifiers=["PreAdd", "Op1Add", "SetVEX"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("xmm_xmm128_imm_256avx2",
+    cpu=["AVX2"],
+    modifiers=["PreAdd", "Op1Add"],
+    vex=256,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
 # YMM register and 4-operand version of above
 add_group("xmm_xmm128_imm_256",
     cpu=["SSE"],
@@ -4991,6 +5052,14 @@
     operands=[Operand(type="Reg", size=32, dest="Spare"),
               Operand(type="SIMDReg", size=128, dest="EA")])
 add_group("pmovmskb",
+    suffix="l",
+    cpu=["AVX2"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0xD7],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="SIMDReg", size=256, dest="EA")])
+add_group("pmovmskb",
     suffix="q",
     cpu=["MMX", "P3"],
     notavx=True,
@@ -5009,6 +5078,16 @@
     opcode=[0x0F, 0xD7],
     operands=[Operand(type="Reg", size=64, dest="Spare"),
               Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("pmovmskb",
+    suffix="q",
+    cpu=["SSE2"],
+    vex=256,
+    opersize=64,
+    def_opersize_64=64,
+    prefix=0x66,
+    opcode=[0x0F, 0xD7],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="SIMDReg", size=256, dest="EA")])
 
 add_insn("pmovmskb", "pmovmskb")
 add_insn("vpmovmskb", "pmovmskb", modifiers=[VEXL0], avx=True)
@@ -5419,12 +5498,12 @@
 add_insn("vcvttsd2si", "cvt_rx_xmm64", modifiers=[0xF2, 0x2C, VEXL0], avx=True)
 # vcvttpd2dq takes xmm, ymm combination
 # vcvttps2dq is two-operand
-add_insn("vpmuludq", "xmm_xmm128", modifiers=[0x66, 0xF4, VEXL0], avx=True)
-add_insn("vpshufd", "xmm_xmm128_imm", modifiers=[0x66, 0x70, VEXL0], avx=True)
-add_insn("vpshufhw", "xmm_xmm128_imm", modifiers=[0xF3, 0x70, VEXL0], avx=True)
-add_insn("vpshuflw", "xmm_xmm128_imm", modifiers=[0xF2, 0x70, VEXL0], avx=True)
-add_insn("vpunpckhqdq", "xmm_xmm128", modifiers=[0x66, 0x6D, VEXL0], avx=True)
-add_insn("vpunpcklqdq", "xmm_xmm128", modifiers=[0x66, 0x6C, VEXL0], avx=True)
+add_insn("vpmuludq", "xmm_xmm128_256avx2", modifiers=[0x66, 0xF4, VEXL0], avx=True)
+add_insn("vpshufd", "xmm_xmm128_imm_256avx2", modifiers=[0x66, 0x70, VEXL0], avx=True)
+add_insn("vpshufhw", "xmm_xmm128_imm_256avx2", modifiers=[0xF3, 0x70, VEXL0], avx=True)
+add_insn("vpshuflw", "xmm_xmm128_imm_256avx2", modifiers=[0xF2, 0x70, VEXL0], avx=True)
+add_insn("vpunpckhqdq", "xmm_xmm128_256avx2", modifiers=[0x66, 0x6D, VEXL0], avx=True)
+add_insn("vpunpcklqdq", "xmm_xmm128_256avx2", modifiers=[0x66, 0x6C, VEXL0], avx=True)
 
 add_insn("cvtss2sd", "xmm_xmm32", modifiers=[0xF3, 0x5A], cpu=["SSE2"])
 add_insn("vcvtss2sd", "xmm_xmm32", modifiers=[0xF3, 0x5A, VEXL0], avx=True)
@@ -5480,6 +5559,25 @@
     operands=[Operand(type="SIMDReg", size=128, dest="VEX"),
               Operand(type="SIMDReg", size=128, dest="EA"),
               Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pslrldq",
+    cpu=["AVX2"],
+    modifiers=["SpAdd"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0x73],
+    spare=0,
+    operands=[Operand(type="SIMDReg", size=256, dest="EAVEX"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pslrldq",
+    cpu=["AVX2"],
+    modifiers=["SpAdd"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0x73],
+    spare=0,
+    operands=[Operand(type="SIMDReg", size=256, dest="VEX"),
+              Operand(type="SIMDReg", size=256, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
 
 add_insn("pslldq", "pslrldq", modifiers=[7])
 add_insn("psrldq", "pslrldq", modifiers=[3])
@@ -5556,6 +5654,23 @@
     operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
               Operand(type="SIMDReg", size=128, dest="VEX"),
               Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("ssse3",
+    cpu=["AVX2"],
+    modifiers=["Op2Add"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0x38, 0x00],
+    operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
+add_group("ssse3",
+    cpu=["AVX2"],
+    modifiers=["Op2Add"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0x38, 0x00],
+    operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+              Operand(type="SIMDReg", size=256, dest="VEX"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
 
 add_insn("pshufb",    "ssse3", modifiers=[0x00])
 add_insn("phaddw",    "ssse3", modifiers=[0x01])
@@ -5604,7 +5719,7 @@
               Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
 
 add_insn("palignr", "ssse3imm", modifiers=[0x0F])
-add_insn("vpalignr", "sse4imm", modifiers=[0x0F, VEXL0], avx=True)
+add_insn("vpalignr", "sse4imm_256avx2", modifiers=[0x0F, VEXL0], avx=True)
 
 #####################################################################
 # SSE4.1 / SSE4.2 instructions
@@ -5684,6 +5799,54 @@
     vex=256,
     prefix=0x66,
     opcode=[0x0F, 0x3A, 0x00],
+    operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("sse4imm_256",
+    cpu=["AVX"],
+    modifiers=["Op2Add"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x00],
+    operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+              Operand(type="SIMDReg", size=256, dest="VEX"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+# Same as above except AVX2 required for 256-bit.
+add_group("sse4imm_256avx2",
+    cpu=["SSE41"],
+    modifiers=["Op2Add", "SetVEX"],
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="SpareVEX"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("sse4imm_256avx2",
+    cpu=["AVX"],
+    modifiers=["Op2Add"],
+    vex=128,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="VEX"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("sse4imm_256avx2",
+    cpu=["AVX2"],
+    modifiers=["Op2Add"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x00],
+    operands=[Operand(type="SIMDReg", size=256, dest="SpareVEX"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("sse4imm_256avx2",
+    cpu=["AVX2"],
+    modifiers=["Op2Add"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x00],
     operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
               Operand(type="SIMDReg", size=256, dest="VEX"),
               Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
@@ -5758,13 +5921,14 @@
 add_insn("roundsd", "sse4m64imm", modifiers=[0x0B])
 add_insn("roundss", "sse4m32imm", modifiers=[0x0A])
 
-# vdppd, vmpsadbw, and vpblendw do not allow YMM registers
+# vdppd does not allow YMM registers
+# vmpsadbw and vpblendw do not allow YMM registers unless AVX2
 add_insn("vblendpd", "sse4imm_256", modifiers=[0x0D, VEXL0], avx=True)
 add_insn("vblendps", "sse4imm_256", modifiers=[0x0C, VEXL0], avx=True)
 add_insn("vdppd",    "sse4imm", modifiers=[0x41, VEXL0], avx=True)
 add_insn("vdpps",    "sse4imm_256", modifiers=[0x40, VEXL0], avx=True)
-add_insn("vmpsadbw", "sse4imm", modifiers=[0x42, VEXL0], avx=True)
-add_insn("vpblendw", "sse4imm", modifiers=[0x0E, VEXL0], avx=True)
+add_insn("vmpsadbw", "sse4imm_256avx2", modifiers=[0x42, VEXL0], avx=True)
+add_insn("vpblendw", "sse4imm_256avx2", modifiers=[0x0E, VEXL0], avx=True)
 # vroundpd and vroundps don't add another register operand
 add_insn("vroundsd", "sse4m64imm", modifiers=[0x0B, VEXL0], avx=True)
 add_insn("vroundss", "sse4m32imm", modifiers=[0x0A, VEXL0], avx=True)
@@ -5814,9 +5978,9 @@
 add_insn("vblendvpd", "avx_sse4xmm0", modifiers=[0x4B])
 add_insn("vblendvps", "avx_sse4xmm0", modifiers=[0x4A])
 
-# vpblendvb doesn't have a 256-bit form
-add_group("avx_sse4xmm0_128",
-    cpu=["AVX"],
+# vpblendvb didn't have a 256-bit form until AVX2
+add_group("avx2_sse4xmm0",
+    cpu=["AVX2"],
     modifiers=["Op2Add"],
     vex=128,
     prefix=0x66,
@@ -5825,8 +5989,18 @@
               Operand(type="SIMDReg", size=128, dest="VEX"),
               Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
               Operand(type="SIMDReg", size=128, dest="VEXImmSrc")])
+add_group("avx2_sse4xmm0",
+    cpu=["AVX2"],
+    modifiers=["Op2Add"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x00],
+    operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+              Operand(type="SIMDReg", size=256, dest="VEX"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=256, dest="VEXImmSrc")])
 
-add_insn("vpblendvb", "avx_sse4xmm0_128", modifiers=[0x4C])
+add_insn("vpblendvb", "avx2_sse4xmm0", modifiers=[0x4C])
 
 for sfx, sz in zip("bwl", [8, 16, 32]):
     add_group("crc32",
@@ -5915,6 +6089,13 @@
     opcode=[0x0F, 0x38, 0x2A],
     operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
               Operand(type="Mem", size=128, relaxed=True, dest="EA")])
+add_group("movntdqa",
+    cpu=["AVX2"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0x38, 0x2A],
+    operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+              Operand(type="Mem", size=256, relaxed=True, dest="EA")])
 
 add_insn("movntdqa", "movntdqa")
 add_insn("vmovntdqa", "movntdqa", modifiers=[VEXL0], avx=True)
@@ -6089,6 +6270,22 @@
         opcode=[0x0F, 0x38, 0x00],
         operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
                   Operand(type="SIMDReg", size=128, dest="EA")])
+    add_group("sse4m%d" % sz,
+        cpu=["AVX2"],
+        modifiers=["Op2Add"],
+        vex=256,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x00],
+        operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+                  Operand(type="Mem", size=sz*2, relaxed=True, dest="EA")])
+    add_group("sse4m%d" % sz,
+        cpu=["AVX2"],
+        modifiers=["Op2Add"],
+        vex=256,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x00],
+        operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+                  Operand(type="SIMDReg", size=128, dest="EA")])
 
 add_insn("pmovsxbw", "sse4m64", modifiers=[0x20])
 add_insn("pmovsxwd", "sse4m64", modifiers=[0x23])
@@ -6365,12 +6562,22 @@
     opcode=[0x0F, 0x38, 0x00],
     operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
               Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
-
-add_insn("vpabsb",     "avx_ssse3_2op", modifiers=[0x1C], avx=True)
-add_insn("vpabsw",     "avx_ssse3_2op", modifiers=[0x1D], avx=True)
-add_insn("vpabsd",     "avx_ssse3_2op", modifiers=[0x1E], avx=True)
 add_insn("vphminposuw", "avx_ssse3_2op", modifiers=[0x41], avx=True)
 
+# VPABS* are extended to 256-bit in AVX2
+for cpu, sz in zip(["AVX", "AVX2"], [128, 256]):
+    add_group("avx2_ssse3_2op",
+        cpu=[cpu],
+        modifiers=["Op2Add"],
+        vex=sz,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x00],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA")])
+add_insn("vpabsb",     "avx2_ssse3_2op", modifiers=[0x1C], avx=True)
+add_insn("vpabsw",     "avx2_ssse3_2op", modifiers=[0x1D], avx=True)
+add_insn("vpabsd",     "avx2_ssse3_2op", modifiers=[0x1E], avx=True)
+
 # Some conversion functions take xmm, ymm combination
 # Need separate x and y versions for gas mode
 add_group("avx_cvt_xmm128_x",
@@ -6437,6 +6644,20 @@
     opcode=[0x0F, 0x38, 0x18],
     operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
               Operand(type="Mem", size=32, relaxed=True, dest="EA")])
+add_group("vbroadcastss",
+    cpu=["AVX2"],
+    vex=128,
+    prefix=0x66,
+    opcode=[0x0F, 0x38, 0x18],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("vbroadcastss",
+    cpu=["AVX2"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0x38, 0x18],
+    operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
 
 add_insn("vbroadcastss", "vbroadcastss")
 
@@ -6447,41 +6668,51 @@
     opcode=[0x0F, 0x38, 0x19],
     operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
               Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+add_group("vbroadcastsd",
+    cpu=["AVX2"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0x38, 0x19],
+    operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
 
 add_insn("vbroadcastsd", "vbroadcastsd")
 
-add_group("vbroadcastf128",
-    cpu=["AVX"],
+add_group("vbroadcastif128",
+    modifiers=["Op2Add"],
     vex=256,
     prefix=0x66,
-    opcode=[0x0F, 0x38, 0x1A],
+    opcode=[0x0F, 0x38, 0x00],
     operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
               Operand(type="Mem", size=128, relaxed=True, dest="EA")])
 
-add_insn("vbroadcastf128", "vbroadcastf128")
+add_insn("vbroadcastf128", "vbroadcastif128", modifiers=[0x1A], cpu=["AVX"])
+add_insn("vbroadcasti128", "vbroadcastif128", modifiers=[0x5A], cpu=["AVX2"])
 
-add_group("vextractf128",
-    cpu=["AVX"],
+add_group("vextractif128",
+    modifiers=["Op2Add"],
     vex=256,
     prefix=0x66,
-    opcode=[0x0F, 0x3A, 0x19],
+    opcode=[0x0F, 0x3A, 0x00],
     operands=[Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
               Operand(type="SIMDReg", size=256, dest="Spare"),
               Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
 
-add_insn("vextractf128", "vextractf128")
+add_insn("vextractf128", "vextractif128", modifiers=[0x19], cpu=["AVX"])
+add_insn("vextracti128", "vextractif128", modifiers=[0x39], cpu=["AVX2"])
 
-add_group("vinsertf128",
-    cpu=["AVX"],
+add_group("vinsertif128",
+    modifiers=["Op2Add"],
     vex=256,
     prefix=0x66,
-    opcode=[0x0F, 0x3A, 0x18],
+    opcode=[0x0F, 0x3A, 0x00],
     operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
               Operand(type="SIMDReg", size=256, dest="VEX"),
               Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
               Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
 
-add_insn("vinsertf128", "vinsertf128")
+add_insn("vinsertf128", "vinsertif128", modifiers=[0x18], cpu=["AVX"])
+add_insn("vinserti128", "vinsertif128", modifiers=[0x38], cpu=["AVX2"])
 
 add_group("vzero",
     cpu=["AVX"],
@@ -6493,7 +6724,6 @@
 add_insn("vzeroupper", "vzero", modifiers=[VEXL0])
 
 add_group("vmaskmov",
-    cpu=["AVX"],
     modifiers=["Op2Add"],
     vex=128,
     prefix=0x66,
@@ -6502,7 +6732,6 @@
               Operand(type="SIMDReg", size=128, dest="VEX"),
               Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
 add_group("vmaskmov",
-    cpu=["AVX"],
     modifiers=["Op2Add"],
     vex=256,
     prefix=0x66,
@@ -6511,7 +6740,6 @@
               Operand(type="SIMDReg", size=256, dest="VEX"),
               Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
 add_group("vmaskmov",
-    cpu=["AVX"],
     modifiers=["Op2Add"],
     vex=128,
     prefix=0x66,
@@ -6520,7 +6748,6 @@
               Operand(type="SIMDReg", size=128, dest="VEX"),
               Operand(type="SIMDReg", size=128, dest="Spare")])
 add_group("vmaskmov",
-    cpu=["AVX"],
     modifiers=["Op2Add"],
     vex=256,
     prefix=0x66,
@@ -6529,8 +6756,8 @@
               Operand(type="SIMDReg", size=256, dest="VEX"),
               Operand(type="SIMDReg", size=256, dest="Spare")])
 
-add_insn("vmaskmovps", "vmaskmov", modifiers=[0x2C])
-add_insn("vmaskmovpd", "vmaskmov", modifiers=[0x2D])
+add_insn("vmaskmovps", "vmaskmov", modifiers=[0x2C], cpu=["AVX"])
+add_insn("vmaskmovpd", "vmaskmov", modifiers=[0x2D], cpu=["AVX"])
 
 add_group("vpermil",
     cpu=["AVX"],
@@ -6585,6 +6812,222 @@
 add_insn("vperm2f128", "vperm2f128")
 
 #####################################################################
+# Intel AVX2 instructions
+#####################################################################
+
+# Most AVX2 instructions are mixed in with above SSEx/AVX groups.
+# Some make more sense to have separate groups.
+
+# vex.vvvv=1111b
+add_group("vperm_var_avx2",
+    cpu=["AVX2"],
+    modifiers=["Op2Add"],
+    vex=256,
+    vexw=0,
+    prefix=0x66,
+    opcode=[0x0F, 0x38, 0x00],
+    operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+              Operand(type="SIMDReg", size=256, dest="VEX"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA")])
+
+add_insn("vpermd",     "vperm_var_avx2", modifiers=[0x36])
+add_insn("vpermps",    "vperm_var_avx2", modifiers=[0x16])
+
+# vex.vvvv=1111b
+add_group("vperm_imm_avx2",
+    cpu=["AVX2"],
+    modifiers=["Op2Add"],
+    vex=256,
+    vexw=1,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x00],
+    operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("vpermq",     "vperm_imm_avx2", modifiers=[0x00])
+add_insn("vpermpd",    "vperm_imm_avx2", modifiers=[0x01])
+
+add_group("vperm2i128_avx2",
+    cpu=["AVX2"],
+    vex=256,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x46],
+    operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
+              Operand(type="SIMDReg", size=256, dest="VEX"),
+              Operand(type="SIMDRM", size=256, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("vperm2i128", "vperm2i128_avx2")
+
+# vex.vvvv=1111b
+for sz in [128, 256]:
+    add_group("vpbroadcastb_avx2",
+        cpu=["AVX2"],
+        vex=sz,
+        vexw=0,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x78],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="SIMDReg", size=128, relaxed=True, dest="EA")])
+# vex.vvvv=1111b
+for sz in [128, 256]:
+    add_group("vpbroadcastb_avx2",
+        cpu=["AVX2"],
+        vex=sz,
+        vexw=0,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x78],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=8, relaxed=True, dest="EA")])
+
+add_insn("vpbroadcastb", "vpbroadcastb_avx2")
+
+# vex.vvvv=1111b
+for sz in [128, 256]:
+    add_group("vpbroadcastw_avx2",
+        cpu=["AVX2"],
+        vex=sz,
+        vexw=0,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x79],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="SIMDReg", size=128, relaxed=True, dest="EA")])
+# vex.vvvv=1111b
+for sz in [128, 256]:
+    add_group("vpbroadcastw_avx2",
+        cpu=["AVX2"],
+        vex=sz,
+        vexw=0,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x79],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=16, relaxed=True, dest="EA")])
+
+add_insn("vpbroadcastw", "vpbroadcastw_avx2")
+
+# vex.vvvv=1111b
+for sz in [128, 256]:
+    add_group("vpbroadcastd_avx2",
+        cpu=["AVX2"],
+        vex=sz,
+        vexw=0,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x58],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="SIMDReg", size=128, relaxed=True, dest="EA")])
+# vex.vvvv=1111b
+for sz in [128, 256]:
+    add_group("vpbroadcastd_avx2",
+        cpu=["AVX2"],
+        vex=sz,
+        vexw=0,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x58],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=32, relaxed=True, dest="EA")])
+
+add_insn("vpbroadcastd", "vpbroadcastd_avx2")
+
+# vex.vvvv=1111b
+for sz in [128, 256]:
+    add_group("vpbroadcastq_avx2",
+        cpu=["AVX2"],
+        vex=sz,
+        vexw=0,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x59],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="SIMDReg", size=128, relaxed=True, dest="EA")])
+# vex.vvvv=1111b
+for sz in [128, 256]:
+    add_group("vpbroadcastq_avx2",
+        cpu=["AVX2"],
+        vex=sz,
+        vexw=0,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x59],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=64, relaxed=True, dest="EA")])
+
+add_insn("vpbroadcastq", "vpbroadcastq_avx2")
+
+for sz in [128, 256]:
+    add_group("vpshiftv_vexw0_avx2",
+        cpu=["AVX2"],
+        modifiers=["Op2Add"],
+        vex=sz,
+        vexw=0,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x00],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="SIMDReg", size=sz, dest="VEX"),
+                  Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA")])
+
+for sz in [128, 256]:
+    add_group("vpshiftv_vexw1_avx2",
+        cpu=["AVX2"],
+        modifiers=["Op2Add"],
+        vex=sz,
+        vexw=1,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x00],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="SIMDReg", size=sz, dest="VEX"),
+                  Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA")])
+
+add_insn("vpsrlvd", "vpshiftv_vexw0_avx2", modifiers=[0x45])
+add_insn("vpsrlvq", "vpshiftv_vexw1_avx2", modifiers=[0x45])
+add_insn("vpsravd", "vpshiftv_vexw0_avx2", modifiers=[0x46])
+
+add_insn("vpsllvd", "vpshiftv_vexw0_avx2", modifiers=[0x47])
+add_insn("vpsllvq", "vpshiftv_vexw1_avx2", modifiers=[0x47])
+
+add_insn("vpmaskmovd", "vmaskmov", modifiers=[0x8C], cpu=["AVX2"])
+
+# vex.vvvv=1111b
+for sz in [128, 256]:
+    add_group("vmaskmov_vexw1_avx2",
+        cpu=["AVX2"],
+        modifiers=["Op2Add"],
+        vex=sz,
+        vexw=1,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x00],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="SIMDReg", size=sz, dest="VEX"),
+                  Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA")])
+ 
+for sz in [128, 256]:
+    add_group("vmaskmov_vexw1_avx2",
+        cpu=["AVX2"],
+        modifiers=["Op2Add"],
+        vex=sz,
+        vexw=1,
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x02],
+        operands=[Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="SIMDReg", size=sz, dest="VEX"),
+                  Operand(type="SIMDReg", size=sz, dest="Spare")])
+ 
+add_insn("vpmaskmovq", "vmaskmov_vexw1_avx2", modifiers=[0x8C])
+
+for sz in [128, 256]:
+    add_group("vex_66_0F3A_imm8_avx2",
+        cpu=["AVX2"],
+        modifiers=["Op2Add"],
+        vex=sz,
+        vexw=0,
+        prefix=0x66,
+        opcode=[0x0F, 0x3A, 0x00],
+        operands=[Operand(type="SIMDReg", size=sz, dest="Spare"),
+                  Operand(type="SIMDReg", size=sz, dest="VEX"),
+                  Operand(type="SIMDRM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("vpblendd", "vex_66_0F3A_imm8_avx2", modifiers=[0x02]) 
+
+#####################################################################
 # Intel FMA instructions
 #####################################################################
 
diff --git a/modules/arch/x86/tests/Makefile.inc b/modules/arch/x86/tests/Makefile.inc
index fab543b..7aebd99 100644
--- a/modules/arch/x86/tests/Makefile.inc
+++ b/modules/arch/x86/tests/Makefile.inc
@@ -25,6 +25,8 @@
 EXTRA_DIST += modules/arch/x86/tests/avx.hex
 EXTRA_DIST += modules/arch/x86/tests/avx16.asm
 EXTRA_DIST += modules/arch/x86/tests/avx16.hex
+EXTRA_DIST += modules/arch/x86/tests/avx2.asm
+EXTRA_DIST += modules/arch/x86/tests/avx2.hex
 EXTRA_DIST += modules/arch/x86/tests/avxcc.asm
 EXTRA_DIST += modules/arch/x86/tests/avxcc.hex
 EXTRA_DIST += modules/arch/x86/tests/bittest.asm
diff --git a/modules/arch/x86/tests/avx2.asm b/modules/arch/x86/tests/avx2.asm
new file mode 100644
index 0000000..acf17ad
--- /dev/null
+++ b/modules/arch/x86/tests/avx2.asm
@@ -0,0 +1,612 @@
+; Exhaustive test of AVX2 instructions
+;
+;  Copyright (C) 2011  Peter Johnson
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; 1. Redistributions of source code must retain the above copyright
+;    notice, this list of conditions and the following disclaimer.
+; 2. Redistributions in binary form must reproduce the above copyright
+;    notice, this list of conditions and the following disclaimer in the
+;    documentation and/or other materials provided with the distribution.
+;
+; THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+; POSSIBILITY OF SUCH DAMAGE.
+;
+
+[bits 64]
+
+vmpsadbw ymm1, ymm3, 3			; c4 e3 75 42 cb 03
+vmpsadbw ymm1, yword [rax], 3		; c4 e3 75 42 08 03
+vmpsadbw ymm1, ymm2, ymm3, 3		; c4 e3 6d 42 cb 03
+vmpsadbw ymm1, ymm2, yword [rax], 3	; c4 e3 6d 42 08 03
+
+vpabsb ymm1, ymm2			; c4 e2 7d 1c ca
+vpabsb ymm1, yword [rax]		; c4 e2 7d 1c 08
+
+vpabsw ymm1, ymm2			; c4 e2 7d 1d ca
+vpabsw ymm1, yword [rax]		; c4 e2 7d 1d 08
+
+vpabsd ymm1, ymm2			; c4 e2 7d 1e ca
+vpabsd ymm1, yword [rax]		; c4 e2 7d 1e 08
+
+vpacksswb ymm1, ymm3			; c5 f5 63 cb
+vpacksswb ymm1, yword [rax]		; c5 f5 63 08
+vpacksswb ymm1, ymm2, ymm3		; c5 ed 63 cb
+vpacksswb ymm1, ymm2, yword [rax]	; c5 ed 63 08
+
+vpackssdw ymm1, ymm3			; c5 f5 6b cb
+vpackssdw ymm1, yword [rax]		; c5 f5 6b 08
+vpackssdw ymm1, ymm2, ymm3		; c5 ed 6b cb
+vpackssdw ymm1, ymm2, yword [rax]	; c5 ed 6b 08
+
+vpackusdw ymm1, ymm3			; c4 e2 75 2b cb
+vpackusdw ymm1, yword [rax]		; c4 e2 75 2b 08
+vpackusdw ymm1, ymm2, ymm3		; c4 e2 6d 2b cb
+vpackusdw ymm1, ymm2, yword [rax]	; c4 e2 6d 2b 08
+
+vpackuswb ymm1, ymm3			; c5 f5 67 cb
+vpackuswb ymm1, yword [rax]		; c5 f5 67 08
+vpackuswb ymm1, ymm2, ymm3		; c5 ed 67 cb
+vpackuswb ymm1, ymm2, yword [rax]	; c5 ed 67 08
+
+vpaddb ymm1, ymm3			; c5 f5 fc cb
+vpaddb ymm1, yword [rax]		; c5 f5 fc 08
+vpaddb ymm1, ymm2, ymm3			; c5 ed fc cb
+vpaddb ymm1, ymm2, yword [rax]		; c5 ed fc 08
+
+vpaddw ymm1, ymm3			; c5 f5 fd cb
+vpaddw ymm1, yword [rax]		; c5 f5 fd 08
+vpaddw ymm1, ymm2, ymm3			; c5 ed fd cb
+vpaddw ymm1, ymm2, yword [rax]		; c5 ed fd 08
+
+vpaddd ymm1, ymm3			; c5 f5 fe cb
+vpaddd ymm1, yword [rax]		; c5 f5 fe 08
+vpaddd ymm1, ymm2, ymm3			; c5 ed fe cb
+vpaddd ymm1, ymm2, yword [rax]		; c5 ed fe 08
+
+vpaddq ymm1, ymm3			; c5 f5 d4 cb
+vpaddq ymm1, yword [rax]		; c5 f5 d4 08
+vpaddq ymm1, ymm2, ymm3			; c5 ed d4 cb
+vpaddq ymm1, ymm2, yword [rax]		; c5 ed d4 08
+
+vpaddsb ymm1, ymm3			; c5 f5 ec cb
+vpaddsb ymm1, yword [rax]		; c5 f5 ec 08
+vpaddsb ymm1, ymm2, ymm3		; c5 ed ec cb
+vpaddsb ymm1, ymm2, yword [rax]		; c5 ed ec 08
+
+vpaddsw ymm1, ymm3			; c5 f5 ed cb
+vpaddsw ymm1, yword [rax]		; c5 f5 ed 08
+vpaddsw ymm1, ymm2, ymm3		; c5 ed ed cb
+vpaddsw ymm1, ymm2, yword [rax]		; c5 ed ed 08
+
+vpaddusb ymm1, ymm3			; c5 f5 dc cb
+vpaddusb ymm1, yword [rax]		; c5 f5 dc 08
+vpaddusb ymm1, ymm2, ymm3		; c5 ed dc cb
+vpaddusb ymm1, ymm2, yword [rax]	; c5 ed dc 08
+
+vpaddusw ymm1, ymm3			; c5 f5 dd cb
+vpaddusw ymm1, yword [rax]		; c5 f5 dd 08
+vpaddusw ymm1, ymm2, ymm3		; c5 ed dd cb
+vpaddusw ymm1, ymm2, yword [rax]	; c5 ed dd 08
+
+vpalignr ymm1, ymm2, ymm3, 3		; c4 e3 6d 0f cb 03
+vpalignr ymm1, ymm2, yword [rax], 3	; c4 e3 6d 0f 08 03
+
+vpand ymm1, ymm3			; c5 f5 db cb
+vpand ymm1, yword [rax]			; c5 f5 db 08
+vpand ymm1, ymm2, ymm3			; c5 ed db cb
+vpand ymm1, ymm2, yword [rax]		; c5 ed db 08
+
+vpandn ymm1, ymm3			; c5 f5 df cb
+vpandn ymm1, yword [rax]		; c5 f5 df 08
+vpandn ymm1, ymm2, ymm3			; c5 ed df cb
+vpandn ymm1, ymm2, yword [rax]		; c5 ed df 08
+
+vpavgb ymm1, ymm3			; c5 f5 e0 cb
+vpavgb ymm1, yword [rax]		; c5 f5 e0 08
+vpavgb ymm1, ymm2, ymm3			; c5 ed e0 cb
+vpavgb ymm1, ymm2, yword [rax]		; c5 ed e0 08
+
+vpavgw ymm1, ymm3			; c5 f5 e3 cb
+vpavgw ymm1, yword [rax]		; c5 f5 e3 08
+vpavgw ymm1, ymm2, ymm3			; c5 ed e3 cb
+vpavgw ymm1, ymm2, yword [rax]		; c5 ed e3 08
+
+vpblendvb ymm1, ymm2, ymm3, ymm4	; c4 e3 6d 4c cb 40
+vpblendvb ymm1, ymm2, yword [rax], ymm4	; c4 e3 6d 4c 08 40
+
+vpblendw ymm1, ymm3, 3			; c4 e3 75 0e cb 03
+vpblendw ymm1, yword [rax], 3		; c4 e3 75 0e 08 03
+vpblendw ymm1, ymm2, ymm3, 3		; c4 e3 6d 0e cb 03
+vpblendw ymm1, ymm2, yword [rax], 3	; c4 e3 6d 0e 08 03
+
+vpcmpeqb ymm1, ymm3			; c5 f5 74 cb
+vpcmpeqb ymm1, yword [rax]		; c5 f5 74 08
+vpcmpeqb ymm1, ymm2, ymm3		; c5 ed 74 cb
+vpcmpeqb ymm1, ymm2, yword [rax]	; c5 ed 74 08
+
+vpcmpeqw ymm1, ymm3			; c5 f5 75 cb
+vpcmpeqw ymm1, yword [rax]		; c5 f5 75 08
+vpcmpeqw ymm1, ymm2, ymm3		; c5 ed 75 cb
+vpcmpeqw ymm1, ymm2, yword [rax]	; c5 ed 75 08
+
+vpcmpeqd ymm1, ymm3			; c5 f5 76 cb
+vpcmpeqd ymm1, yword [rax]		; c5 f5 76 08
+vpcmpeqd ymm1, ymm2, ymm3		; c5 ed 76 cb
+vpcmpeqd ymm1, ymm2, yword [rax]	; c5 ed 76 08
+
+vpcmpeqq ymm1, ymm3			; c4 e2 75 29 cb
+vpcmpeqq ymm1, yword [rax]		; c4 e2 75 29 08
+vpcmpeqq ymm1, ymm2, ymm3		; c4 e2 6d 29 cb
+vpcmpeqq ymm1, ymm2, yword [rax]	; c4 e2 6d 29 08
+
+vpcmpgtb ymm1, ymm3			; c5 f5 64 cb
+vpcmpgtb ymm1, yword [rax]		; c5 f5 64 08
+vpcmpgtb ymm1, ymm2, ymm3		; c5 ed 64 cb
+vpcmpgtb ymm1, ymm2, yword [rax]	; c5 ed 64 08
+
+vpcmpgtw ymm1, ymm3			; c5 f5 65 cb
+vpcmpgtw ymm1, yword [rax]		; c5 f5 65 08
+vpcmpgtw ymm1, ymm2, ymm3		; c5 ed 65 cb
+vpcmpgtw ymm1, ymm2, yword [rax]	; c5 ed 65 08
+
+vpcmpgtd ymm1, ymm3			; c5 f5 66 cb
+vpcmpgtd ymm1, yword [rax]		; c5 f5 66 08
+vpcmpgtd ymm1, ymm2, ymm3		; c5 ed 66 cb
+vpcmpgtd ymm1, ymm2, yword [rax]	; c5 ed 66 08
+
+vpcmpgtq ymm1, ymm3			; c4 e2 75 37 cb
+vpcmpgtq ymm1, yword [rax]		; c4 e2 75 37 08
+vpcmpgtq ymm1, ymm2, ymm3		; c4 e2 6d 37 cb
+vpcmpgtq ymm1, ymm2, yword [rax]	; c4 e2 6d 37 08
+
+vphaddw ymm1, ymm3			; c4 e2 75 01 cb
+vphaddw ymm1, yword [rax]		; c4 e2 75 01 08
+vphaddw ymm1, ymm2, ymm3		; c4 e2 6d 01 cb
+vphaddw ymm1, ymm2, yword [rax]		; c4 e2 6d 01 08
+
+vphaddd ymm1, ymm3			; c4 e2 75 02 cb
+vphaddd ymm1, yword [rax]		; c4 e2 75 02 08
+vphaddd ymm1, ymm2, ymm3		; c4 e2 6d 02 cb
+vphaddd ymm1, ymm2, yword [rax]		; c4 e2 6d 02 08
+
+vphaddsw ymm1, ymm3			; c4 e2 75 03 cb
+vphaddsw ymm1, yword [rax]		; c4 e2 75 03 08
+vphaddsw ymm1, ymm2, ymm3		; c4 e2 6d 03 cb
+vphaddsw ymm1, ymm2, yword [rax]	; c4 e2 6d 03 08
+
+vphsubw ymm1, ymm3			; c4 e2 75 05 cb
+vphsubw ymm1, yword [rax]		; c4 e2 75 05 08
+vphsubw ymm1, ymm2, ymm3		; c4 e2 6d 05 cb
+vphsubw ymm1, ymm2, yword [rax]		; c4 e2 6d 05 08
+
+vphsubd ymm1, ymm3			; c4 e2 75 06 cb
+vphsubd ymm1, yword [rax]		; c4 e2 75 06 08
+vphsubd ymm1, ymm2, ymm3		; c4 e2 6d 06 cb
+vphsubd ymm1, ymm2, yword [rax]		; c4 e2 6d 06 08
+
+vphsubsw ymm1, ymm3			; c4 e2 75 07 cb
+vphsubsw ymm1, yword [rax]		; c4 e2 75 07 08
+vphsubsw ymm1, ymm2, ymm3		; c4 e2 6d 07 cb
+vphsubsw ymm1, ymm2, yword [rax]	; c4 e2 6d 07 08
+
+vpmaddubsw ymm1, ymm3			; c4 e2 75 04 cb
+vpmaddubsw ymm1, yword [rax]		; c4 e2 75 04 08
+vpmaddubsw ymm1, ymm2, ymm3		; c4 e2 6d 04 cb
+vpmaddubsw ymm1, ymm2, yword [rax]	; c4 e2 6d 04 08
+
+vpmaddwd ymm1, ymm3			; c5 f5 f5 cb
+vpmaddwd ymm1, yword [rax]		; c5 f5 f5 08
+vpmaddwd ymm1, ymm2, ymm3		; c5 ed f5 cb
+vpmaddwd ymm1, ymm2, yword [rax]	; c5 ed f5 08
+
+vpmaxsb ymm1, ymm3			; c4 e2 75 3c cb
+vpmaxsb ymm1, yword [rax]		; c4 e2 75 3c 08
+vpmaxsb ymm1, ymm2, ymm3		; c4 e2 6d 3c cb
+vpmaxsb ymm1, ymm2, yword [rax]		; c4 e2 6d 3c 08
+
+vpmaxsw ymm1, ymm3			; c5 f5 ee cb
+vpmaxsw ymm1, yword [rax]		; c5 f5 ee 08
+vpmaxsw ymm1, ymm2, ymm3		; c5 ed ee cb
+vpmaxsw ymm1, ymm2, yword [rax]		; c5 ed ee 08
+
+vpmaxsd ymm1, ymm3			; c4 e2 75 3d cb
+vpmaxsd ymm1, yword [rax]		; c4 e2 75 3d 08
+vpmaxsd ymm1, ymm2, ymm3		; c4 e2 6d 3d cb
+vpmaxsd ymm1, ymm2, yword [rax]		; c4 e2 6d 3d 08
+
+vpmaxub ymm1, ymm3			; c5 f5 de cb
+vpmaxub ymm1, yword [rax]		; c5 f5 de 08
+vpmaxub ymm1, ymm2, ymm3		; c5 ed de cb
+vpmaxub ymm1, ymm2, yword [rax]		; c5 ed de 08
+
+vpmaxuw ymm1, ymm3			; c4 e2 75 3e cb
+vpmaxuw ymm1, yword [rax]		; c4 e2 75 3e 08
+vpmaxuw ymm1, ymm2, ymm3		; c4 e2 6d 3e cb
+vpmaxuw ymm1, ymm2, yword [rax]		; c4 e2 6d 3e 08
+
+vpmaxud ymm1, ymm3			; c4 e2 75 3f cb
+vpmaxud ymm1, yword [rax]		; c4 e2 75 3f 08
+vpmaxud ymm1, ymm2, ymm3		; c4 e2 6d 3f cb
+vpmaxud ymm1, ymm2, yword [rax]		; c4 e2 6d 3f 08
+
+vpminsb ymm1, ymm3			; c4 e2 75 38 cb
+vpminsb ymm1, yword [rax]		; c4 e2 75 38 08
+vpminsb ymm1, ymm2, ymm3		; c4 e2 6d 38 cb
+vpminsb ymm1, ymm2, yword [rax]		; c4 e2 6d 38 08
+
+vpminsw ymm1, ymm3			; c5 f5 ea cb
+vpminsw ymm1, yword [rax]		; c5 f5 ea 08
+vpminsw ymm1, ymm2, ymm3		; c5 ed ea cb
+vpminsw ymm1, ymm2, yword [rax]		; c5 ed ea 08
+
+vpminsd ymm1, ymm3			; c4 e2 75 39 cb
+vpminsd ymm1, yword [rax]		; c4 e2 75 39 08
+vpminsd ymm1, ymm2, ymm3		; c4 e2 6d 39 cb
+vpminsd ymm1, ymm2, yword [rax]		; c4 e2 6d 39 08
+
+vpminub ymm1, ymm3			; c5 f5 da cb
+vpminub ymm1, yword [rax]		; c5 f5 da 08
+vpminub ymm1, ymm2, ymm3		; c5 ed da cb
+vpminub ymm1, ymm2, yword [rax]		; c5 ed da 08
+
+vpminuw ymm1, ymm3			; c4 e2 75 3a cb
+vpminuw ymm1, yword [rax]		; c4 e2 75 3a 08
+vpminuw ymm1, ymm2, ymm3		; c4 e2 6d 3a cb
+vpminuw ymm1, ymm2, yword [rax]		; c4 e2 6d 3a 08
+
+vpminud ymm1, ymm3			; c4 e2 75 3b cb
+vpminud ymm1, yword [rax]		; c4 e2 75 3b 08
+vpminud ymm1, ymm2, ymm3		; c4 e2 6d 3b cb
+vpminud ymm1, ymm2, yword [rax]		; c4 e2 6d 3b 08
+
+vpmovmskb eax, ymm1			; c5 fd d7 c1
+vpmovmskb rax, ymm1			; c5 fd d7 c1
+
+vpmovsxbw ymm1, xmm2			; c4 e2 7d 20 ca
+vpmovsxbw ymm1, [rax]			; c4 e2 7d 20 08
+vpmovsxbw ymm1, oword [rax]		; c4 e2 7d 20 08
+
+vpmovsxbd ymm1, xmm2			; c4 e2 7d 21 ca
+vpmovsxbd ymm1, [rax]			; c4 e2 7d 21 08
+vpmovsxbd ymm1, qword [rax]		; c4 e2 7d 21 08
+
+vpmovsxbq ymm1, xmm2			; c4 e2 7d 22 ca
+vpmovsxbq ymm1, [rax]			; c4 e2 7d 22 08
+vpmovsxbq ymm1, dword [rax]		; c4 e2 7d 22 08
+
+vpmovsxwd ymm1, xmm2			; c4 e2 7d 23 ca
+vpmovsxwd ymm1, [rax]			; c4 e2 7d 23 08
+vpmovsxwd ymm1, oword [rax]		; c4 e2 7d 23 08
+
+vpmovsxwq ymm1, xmm2			; c4 e2 7d 24 ca
+vpmovsxwq ymm1, [rax]			; c4 e2 7d 24 08
+vpmovsxwq ymm1, qword [rax]		; c4 e2 7d 24 08
+
+vpmovsxdq ymm1, xmm2			; c4 e2 7d 25 ca
+vpmovsxdq ymm1, [rax]			; c4 e2 7d 25 08
+vpmovsxdq ymm1, oword [rax]		; c4 e2 7d 25 08
+
+vpmovzxbw ymm1, xmm2			; c4 e2 7d 30 ca
+vpmovzxbw ymm1, [rax]			; c4 e2 7d 30 08
+vpmovzxbw ymm1, oword [rax]		; c4 e2 7d 30 08
+
+vpmovzxbd ymm1, xmm2			; c4 e2 7d 31 ca
+vpmovzxbd ymm1, [rax]			; c4 e2 7d 31 08
+vpmovzxbd ymm1, qword [rax]		; c4 e2 7d 31 08
+
+vpmovzxbq ymm1, xmm2			; c4 e2 7d 32 ca
+vpmovzxbq ymm1, [rax]			; c4 e2 7d 32 08
+vpmovzxbq ymm1, dword [rax]		; c4 e2 7d 32 08
+
+vpmovzxwd ymm1, xmm2			; c4 e2 7d 33 ca
+vpmovzxwd ymm1, [rax]			; c4 e2 7d 33 08
+vpmovzxwd ymm1, oword [rax]		; c4 e2 7d 33 08
+
+vpmovzxwq ymm1, xmm2			; c4 e2 7d 34 ca
+vpmovzxwq ymm1, [rax]			; c4 e2 7d 34 08
+vpmovzxwq ymm1, qword [rax]		; c4 e2 7d 34 08
+
+vpmovzxdq ymm1, xmm2			; c4 e2 7d 35 ca
+vpmovzxdq ymm1, [rax]			; c4 e2 7d 35 08
+vpmovzxdq ymm1, oword [rax]		; c4 e2 7d 35 08
+
+vpmuldq ymm1, ymm3			; c4 e2 75 28 cb
+vpmuldq ymm1, yword [rax]		; c4 e2 75 28 08
+vpmuldq ymm1, ymm2, ymm3		; c4 e2 6d 28 cb
+vpmuldq ymm1, ymm2, yword [rax]		; c4 e2 6d 28 08
+
+vpmulhrsw ymm1, ymm3			; c4 e2 75 0b cb
+vpmulhrsw ymm1, yword [rax]		; c4 e2 75 0b 08
+vpmulhrsw ymm1, ymm2, ymm3		; c4 e2 6d 0b cb
+vpmulhrsw ymm1, ymm2, yword [rax]	; c4 e2 6d 0b 08
+
+vpmulhuw ymm1, ymm3			; c5 f5 e4 cb
+vpmulhuw ymm1, yword [rax]		; c5 f5 e4 08
+vpmulhuw ymm1, ymm2, ymm3		; c5 ed e4 cb
+vpmulhuw ymm1, ymm2, yword [rax]	; c5 ed e4 08
+
+vpmulhw ymm1, ymm3			; c5 f5 e5 cb
+vpmulhw ymm1, yword [rax]		; c5 f5 e5 08
+vpmulhw ymm1, ymm2, ymm3		; c5 ed e5 cb
+vpmulhw ymm1, ymm2, yword [rax]		; c5 ed e5 08
+
+vpmullw ymm1, ymm3			; c5 f5 d5 cb
+vpmullw ymm1, yword [rax]		; c5 f5 d5 08
+vpmullw ymm1, ymm2, ymm3		; c5 ed d5 cb
+vpmullw ymm1, ymm2, yword [rax]		; c5 ed d5 08
+
+vpmulld ymm1, ymm3			; c4 e2 75 40 cb
+vpmulld ymm1, yword [rax]		; c4 e2 75 40 08
+vpmulld ymm1, ymm2, ymm3		; c4 e2 6d 40 cb
+vpmulld ymm1, ymm2, yword [rax]		; c4 e2 6d 40 08
+
+vpmuludq ymm1, ymm3			; c5 f5 f4 cb
+vpmuludq ymm1, yword [rax]		; c5 f5 f4 08
+vpmuludq ymm1, ymm2, ymm3		; c5 ed f4 cb
+vpmuludq ymm1, ymm2, yword [rax]	; c5 ed f4 08
+
+vpor ymm1, ymm3				; c5 f5 eb cb
+vpor ymm1, yword [rax]			; c5 f5 eb 08
+vpor ymm1, ymm2, ymm3			; c5 ed eb cb
+vpor ymm1, ymm2, yword [rax]		; c5 ed eb 08
+
+vpsadbw ymm1, ymm3			; c5 f5 f6 cb
+vpsadbw ymm1, yword [rax]		; c5 f5 f6 08
+vpsadbw ymm1, ymm2, ymm3		; c5 ed f6 cb
+vpsadbw ymm1, ymm2, yword [rax]		; c5 ed f6 08
+
+vpshufb ymm1, ymm3			; c4 e2 75 00 cb
+vpshufb ymm1, yword [rax]		; c4 e2 75 00 08
+vpshufb ymm1, ymm2, ymm3		; c4 e2 6d 00 cb
+vpshufb ymm1, ymm2, yword [rax]		; c4 e2 6d 00 08
+
+vpshufd ymm1, ymm3, 3			; c5 fd 70 cb 03
+vpshufd ymm1, yword [rax], 3		; c5 fd 70 08 03
+
+vpshufhw ymm1, ymm3, 3			; c5 fe 70 cb 03
+vpshufhw ymm1, yword [rax], 3		; c5 fe 70 08 03
+
+vpshuflw ymm1, ymm3, 3			; c5 ff 70 cb 03
+vpshuflw ymm1, yword [rax], 3		; c5 ff 70 08 03
+
+vpsignb ymm1, ymm3			; c4 e2 75 08 cb
+vpsignb ymm1, yword [rax]		; c4 e2 75 08 08
+vpsignb ymm1, ymm2, ymm3		; c4 e2 6d 08 cb
+vpsignb ymm1, ymm2, yword [rax]		; c4 e2 6d 08 08
+
+vpsignw ymm1, ymm3			; c4 e2 75 09 cb
+vpsignw ymm1, yword [rax]		; c4 e2 75 09 08
+vpsignw ymm1, ymm2, ymm3		; c4 e2 6d 09 cb
+vpsignw ymm1, ymm2, yword [rax]		; c4 e2 6d 09 08
+
+vpsignd ymm1, ymm3			; c4 e2 75 0a cb
+vpsignd ymm1, yword [rax]		; c4 e2 75 0a 08
+vpsignd ymm1, ymm2, ymm3		; c4 e2 6d 0a cb
+vpsignd ymm1, ymm2, yword [rax]		; c4 e2 6d 0a 08
+
+vpslldq ymm1, 3				; c5 f5 73 f9 03
+vpslldq ymm1, ymm2, 3			; c5 f5 73 fa 03
+
+vpsllw ymm1, xmm3			; c5 f5 f1 cb
+vpsllw ymm1, oword [rax]		; c5 f5 f1 08
+vpsllw ymm1, 3				; c5 f5 71 f1 03
+vpsllw ymm1, ymm2, xmm3			; c5 ed f1 cb
+vpsllw ymm1, ymm2, oword [rax]		; c5 ed f1 08
+vpsllw ymm1, ymm2, 3			; c5 f5 71 f2 03
+
+vpslld ymm1, xmm3			; c5 f5 f2 cb
+vpslld ymm1, oword [rax]		; c5 f5 f2 08
+vpslld ymm1, 3				; c5 f5 72 f1 03
+vpslld ymm1, ymm2, xmm3			; c5 ed f2 cb
+vpslld ymm1, ymm2, oword [rax]		; c5 ed f2 08
+vpslld ymm1, ymm2, 3			; c5 f5 72 f2 03
+
+vpsllq ymm1, xmm3			; c5 f5 f3 cb
+vpsllq ymm1, oword [rax]		; c5 f5 f3 08
+vpsllq ymm1, 3				; c5 f5 73 f1 03
+vpsllq ymm1, ymm2, xmm3			; c5 ed f3 cb
+vpsllq ymm1, ymm2, oword [rax]		; c5 ed f3 08
+vpsllq ymm1, ymm2, 3			; c5 f5 73 f2 03
+
+vpsraw ymm1, xmm3			; c5 f5 e1 cb
+vpsraw ymm1, oword [rax]		; c5 f5 e1 08
+vpsraw ymm1, 3				; c5 f5 71 e1 03
+vpsraw ymm1, ymm2, xmm3			; c5 ed e1 cb
+vpsraw ymm1, ymm2, oword [rax]		; c5 ed e1 08
+vpsraw ymm1, ymm2, 3			; c5 f5 71 e2 03
+
+vpsrad ymm1, xmm3			; c5 f5 e2 cb
+vpsrad ymm1, oword [rax]		; c5 f5 e2 08
+vpsrad ymm1, 3				; c5 f5 72 e1 03
+vpsrad ymm1, ymm2, xmm3			; c5 ed e2 cb
+vpsrad ymm1, ymm2, oword [rax]		; c5 ed e2 08
+vpsrad ymm1, ymm2, 3			; c5 f5 72 e2 03
+
+vpsrldq ymm1, 3				; c5 f5 73 d9 03
+vpsrldq ymm1, ymm2, 3			; c5 f5 73 da 03
+
+vpsrlw ymm1, xmm3			; c5 f5 d1 cb
+vpsrlw ymm1, oword [rax]		; c5 f5 d1 08
+vpsrlw ymm1, 3				; c5 f5 71 d1 03
+vpsrlw ymm1, ymm2, xmm3			; c5 ed d1 cb
+vpsrlw ymm1, ymm2, oword [rax]		; c5 ed d1 08
+vpsrlw ymm1, ymm2, 3			; c5 f5 71 d2 03
+
+vpsrld ymm1, xmm3			; c5 f5 d2 cb
+vpsrld ymm1, oword [rax]		; c5 f5 d2 08
+vpsrld ymm1, 3				; c5 f5 72 d1 03
+vpsrld ymm1, ymm2, xmm3			; c5 ed d2 cb
+vpsrld ymm1, ymm2, oword [rax]		; c5 ed d2 08
+vpsrld ymm1, ymm2, 3			; c5 f5 72 d2 03
+
+vpsrld ymm1, xmm3			; c5 f5 d2 cb
+vpsrld ymm1, oword [rax]		; c5 f5 d2 08
+vpsrld ymm1, 3				; c5 f5 72 d1 03
+vpsrld ymm1, ymm2, xmm3			; c5 ed d2 cb
+vpsrld ymm1, ymm2, oword [rax]		; c5 ed d2 08
+vpsrld ymm1, ymm2, 3			; c5 f5 72 d2 03
+
+vpsubsb ymm1, ymm3			; c5 f5 e8 cb
+vpsubsb ymm1, yword [rax]		; c5 f5 e8 08
+vpsubsb ymm1, ymm2, ymm3		; c5 ed e8 cb
+vpsubsb ymm1, ymm2, yword [rax]		; c5 ed e8 08
+
+vpsubsw ymm1, ymm3			; c5 f5 e9 cb
+vpsubsw ymm1, yword [rax]		; c5 f5 e9 08
+vpsubsw ymm1, ymm2, ymm3		; c5 ed e9 cb
+vpsubsw ymm1, ymm2, yword [rax]		; c5 ed e9 08
+
+vpsubusb ymm1, ymm3			; c5 f5 d8 cb
+vpsubusb ymm1, yword [rax]		; c5 f5 d8 08
+vpsubusb ymm1, ymm2, ymm3		; c5 ed d8 cb
+vpsubusb ymm1, ymm2, yword [rax]	; c5 ed d8 08
+
+vpsubusw ymm1, ymm3			; c5 f5 d9 cb
+vpsubusw ymm1, yword [rax]		; c5 f5 d9 08
+vpsubusw ymm1, ymm2, ymm3		; c5 ed d9 cb
+vpsubusw ymm1, ymm2, yword [rax]	; c5 ed d9 08
+
+vpunpckhbw ymm1, ymm3			; c5 f5 68 cb
+vpunpckhbw ymm1, yword [rax]		; c5 f5 68 08
+vpunpckhbw ymm1, ymm2, ymm3		; c5 ed 68 cb
+vpunpckhbw ymm1, ymm2, yword [rax]	; c5 ed 68 08
+
+vpunpckhwd ymm1, ymm3			; c5 f5 69 cb
+vpunpckhwd ymm1, yword [rax]		; c5 f5 69 08
+vpunpckhwd ymm1, ymm2, ymm3		; c5 ed 69 cb
+vpunpckhwd ymm1, ymm2, yword [rax]	; c5 ed 69 08
+
+vpunpckhdq ymm1, ymm3			; c5 f5 6a cb
+vpunpckhdq ymm1, yword [rax]		; c5 f5 6a 08
+vpunpckhdq ymm1, ymm2, ymm3		; c5 ed 6a cb
+vpunpckhdq ymm1, ymm2, yword [rax]	; c5 ed 6a 08
+
+vpunpckhqdq ymm1, ymm3			; c5 f5 6d cb
+vpunpckhqdq ymm1, yword [rax]		; c5 f5 6d 08
+vpunpckhqdq ymm1, ymm2, ymm3		; c5 ed 6d cb
+vpunpckhqdq ymm1, ymm2, yword [rax]	; c5 ed 6d 08
+
+vpunpcklbw ymm1, ymm3			; c5 f5 60 cb
+vpunpcklbw ymm1, yword [rax]		; c5 f5 60 08
+vpunpcklbw ymm1, ymm2, ymm3		; c5 ed 60 cb
+vpunpcklbw ymm1, ymm2, yword [rax]	; c5 ed 60 08
+
+vpunpcklwd ymm1, ymm3			; c5 f5 61 cb
+vpunpcklwd ymm1, yword [rax]		; c5 f5 61 08
+vpunpcklwd ymm1, ymm2, ymm3		; c5 ed 61 cb
+vpunpcklwd ymm1, ymm2, yword [rax]	; c5 ed 61 08
+
+vpunpckldq ymm1, ymm3			; c5 f5 62 cb
+vpunpckldq ymm1, yword [rax]		; c5 f5 62 08
+vpunpckldq ymm1, ymm2, ymm3		; c5 ed 62 cb
+vpunpckldq ymm1, ymm2, yword [rax]	; c5 ed 62 08
+
+vpunpcklqdq ymm1, ymm3			; c5 f5 6c cb
+vpunpcklqdq ymm1, yword [rax]		; c5 f5 6c 08
+vpunpcklqdq ymm1, ymm2, ymm3		; c5 ed 6c cb
+vpunpcklqdq ymm1, ymm2, yword [rax]	; c5 ed 6c 08
+
+vpxor ymm1, ymm3			; c5 f5 ef cb
+vpxor ymm1, yword [rax]			; c5 f5 ef 08
+vpxor ymm1, ymm2, ymm3			; c5 ed ef cb
+vpxor ymm1, ymm2, yword [rax]		; c5 ed ef 08
+
+vmovntdqa ymm1, yword [rax]		; c4 e2 7d 2a 08
+
+vbroadcastss xmm1, xmm2			; c4 e2 79 18 ca
+vbroadcastss ymm1, xmm2			; c4 e2 7d 18 ca
+
+vbroadcastsd ymm1, xmm2			; c4 e2 7d 19 ca
+
+vbroadcasti128 ymm1, oword [rax]	; c4 e2 7d 5a 08
+
+vpblendd ymm1, ymm2, ymm3, 3		; c4 e3 6d 02 cb 03
+vpblendd ymm1, ymm2, yword [rax], 3	; c4 e3 6d 02 08 03
+
+vpbroadcastb xmm1, xmm2			; c4 e2 79 78 ca
+vpbroadcastb xmm1, byte [rax]		; c4 e2 79 78 08
+vpbroadcastb ymm1, xmm2			; c4 e2 7d 78 ca
+vpbroadcastb ymm1, byte [rax]		; c4 e2 7d 78 08
+
+vpbroadcastw xmm1, xmm2			; c4 e2 79 79 ca
+vpbroadcastw xmm1, word [rax]		; c4 e2 79 79 08
+vpbroadcastw ymm1, xmm2			; c4 e2 7d 79 ca
+vpbroadcastw ymm1, word [rax]		; c4 e2 7d 79 08
+
+vpbroadcastd xmm1, xmm2			; c4 e2 79 58 ca
+vpbroadcastd xmm1, dword [rax]		; c4 e2 79 58 08
+vpbroadcastd ymm1, xmm2			; c4 e2 7d 58 ca
+vpbroadcastd ymm1, dword [rax]		; c4 e2 7d 58 08
+
+vpbroadcastq xmm1, xmm2			; c4 e2 79 59 ca
+vpbroadcastq xmm1, qword [rax]		; c4 e2 79 59 08
+vpbroadcastq ymm1, xmm2			; c4 e2 7d 59 ca
+vpbroadcastq ymm1, qword [rax]		; c4 e2 7d 59 08
+
+vpermd ymm1, ymm2, ymm3			; c4 e2 6d 36 cb
+vpermd ymm1, ymm2, yword [rax]		; c4 e2 6d 36 08
+
+vpermpd ymm1, ymm2, 3			; c4 e3 fd 01 ca 03
+vpermpd ymm1, yword [rax], 3		; c4 e3 fd 01 08 03
+
+vpermps ymm1, ymm2, ymm3		; c4 e2 6d 16 cb
+vpermps ymm1, ymm2, yword [rax]		; c4 e2 6d 16 08
+
+vpermq ymm1, ymm2, 3			; c4 e3 fd 00 ca 03
+vpermq ymm1, yword [rax], 3		; c4 e3 fd 00 08 03
+
+vperm2i128 ymm1, ymm2, ymm3, 3		; c4 e3 6d 46 cb 03
+vperm2i128 ymm1, ymm2, yword [rax], 3	; c4 e3 6d 46 08 03
+
+vextracti128 xmm1, ymm2, 3		; c4 e3 7d 39 d1 03
+vextracti128 oword [rax], ymm2, 3	; c4 e3 7d 39 10 03
+
+vinserti128 ymm1, ymm2, xmm3, 3		; c4 e3 6d 38 cb 03
+vinserti128 ymm1, ymm2, oword [rax], 3	; c4 e3 6d 38 08 03
+
+vpmaskmovd xmm1, xmm2, oword [rax]	; c4 e2 69 8c 08
+vpmaskmovd ymm1, ymm2, yword [rax]	; c4 e2 6d 8c 08
+vpmaskmovd oword [rax], xmm1, xmm2	; c4 e2 71 8e 10
+vpmaskmovd yword [rax], ymm1, ymm2	; c4 e2 75 8e 10
+
+vpmaskmovq xmm1, xmm2, oword [rax]	; c4 e2 e9 8c 08
+vpmaskmovq ymm1, ymm2, yword [rax]	; c4 e2 ed 8c 08
+vpmaskmovq oword [rax], xmm1, xmm2	; c4 e2 f1 8e 10
+vpmaskmovq yword [rax], ymm1, ymm2	; c4 e2 f5 8e 10
+
+vpsllvd xmm1, xmm2, xmm3		; c4 e2 69 47 cb
+vpsllvd xmm1, xmm2, oword [rax]		; c4 e2 69 47 08
+vpsllvd ymm1, ymm2, ymm3		; c4 e2 6d 47 cb
+vpsllvd ymm1, ymm2, yword [rax]		; c4 e2 6d 47 08
+
+vpsllvq xmm1, xmm2, xmm3		; c4 e2 e9 47 cb
+vpsllvq xmm1, xmm2, oword [rax]		; c4 e2 e9 47 08
+vpsllvq ymm1, ymm2, ymm3		; c4 e2 ed 47 cb
+vpsllvq ymm1, ymm2, yword [rax]		; c4 e2 ed 47 08
+
+vpsravd xmm1, xmm2, xmm3		; c4 e2 69 46 cb
+vpsravd xmm1, xmm2, oword [rax]		; c4 e2 69 46 08
+vpsravd ymm1, ymm2, ymm3		; c4 e2 6d 46 cb
+vpsravd ymm1, ymm2, yword [rax]		; c4 e2 6d 46 08
+
+vpsrlvd xmm1, xmm2, xmm3		; c4 e2 69 45 cb
+vpsrlvd xmm1, xmm2, oword [rax]		; c4 e2 69 45 08
+vpsrlvd ymm1, ymm2, ymm3		; c4 e2 6d 45 cb
+vpsrlvd ymm1, ymm2, yword [rax]		; c4 e2 6d 45 08
+
+vpsrlvq xmm1, xmm2, xmm3		; c4 e2 e9 45 cb
+vpsrlvq xmm1, xmm2, oword [rax]		; c4 e2 e9 45 08
+vpsrlvq ymm1, ymm2, ymm3		; c4 e2 ed 45 cb
+vpsrlvq ymm1, ymm2, yword [rax]		; c4 e2 ed 45 08
diff --git a/modules/arch/x86/tests/avx2.hex b/modules/arch/x86/tests/avx2.hex
new file mode 100644
index 0000000..3d9e9cd
--- /dev/null
+++ b/modules/arch/x86/tests/avx2.hex
@@ -0,0 +1,2105 @@
+c4 
+e3 
+75 
+42 
+cb 
+03 
+c4 
+e3 
+75 
+42 
+08 
+03 
+c4 
+e3 
+6d 
+42 
+cb 
+03 
+c4 
+e3 
+6d 
+42 
+08 
+03 
+c4 
+e2 
+7d 
+1c 
+ca 
+c4 
+e2 
+7d 
+1c 
+08 
+c4 
+e2 
+7d 
+1d 
+ca 
+c4 
+e2 
+7d 
+1d 
+08 
+c4 
+e2 
+7d 
+1e 
+ca 
+c4 
+e2 
+7d 
+1e 
+08 
+c5 
+f5 
+63 
+cb 
+c5 
+f5 
+63 
+08 
+c5 
+ed 
+63 
+cb 
+c5 
+ed 
+63 
+08 
+c5 
+f5 
+6b 
+cb 
+c5 
+f5 
+6b 
+08 
+c5 
+ed 
+6b 
+cb 
+c5 
+ed 
+6b 
+08 
+c4 
+e2 
+75 
+2b 
+cb 
+c4 
+e2 
+75 
+2b 
+08 
+c4 
+e2 
+6d 
+2b 
+cb 
+c4 
+e2 
+6d 
+2b 
+08 
+c5 
+f5 
+67 
+cb 
+c5 
+f5 
+67 
+08 
+c5 
+ed 
+67 
+cb 
+c5 
+ed 
+67 
+08 
+c5 
+f5 
+fc 
+cb 
+c5 
+f5 
+fc 
+08 
+c5 
+ed 
+fc 
+cb 
+c5 
+ed 
+fc 
+08 
+c5 
+f5 
+fd 
+cb 
+c5 
+f5 
+fd 
+08 
+c5 
+ed 
+fd 
+cb 
+c5 
+ed 
+fd 
+08 
+c5 
+f5 
+fe 
+cb 
+c5 
+f5 
+fe 
+08 
+c5 
+ed 
+fe 
+cb 
+c5 
+ed 
+fe 
+08 
+c5 
+f5 
+d4 
+cb 
+c5 
+f5 
+d4 
+08 
+c5 
+ed 
+d4 
+cb 
+c5 
+ed 
+d4 
+08 
+c5 
+f5 
+ec 
+cb 
+c5 
+f5 
+ec 
+08 
+c5 
+ed 
+ec 
+cb 
+c5 
+ed 
+ec 
+08 
+c5 
+f5 
+ed 
+cb 
+c5 
+f5 
+ed 
+08 
+c5 
+ed 
+ed 
+cb 
+c5 
+ed 
+ed 
+08 
+c5 
+f5 
+dc 
+cb 
+c5 
+f5 
+dc 
+08 
+c5 
+ed 
+dc 
+cb 
+c5 
+ed 
+dc 
+08 
+c5 
+f5 
+dd 
+cb 
+c5 
+f5 
+dd 
+08 
+c5 
+ed 
+dd 
+cb 
+c5 
+ed 
+dd 
+08 
+c4 
+e3 
+6d 
+0f 
+cb 
+03 
+c4 
+e3 
+6d 
+0f 
+08 
+03 
+c5 
+f5 
+db 
+cb 
+c5 
+f5 
+db 
+08 
+c5 
+ed 
+db 
+cb 
+c5 
+ed 
+db 
+08 
+c5 
+f5 
+df 
+cb 
+c5 
+f5 
+df 
+08 
+c5 
+ed 
+df 
+cb 
+c5 
+ed 
+df 
+08 
+c5 
+f5 
+e0 
+cb 
+c5 
+f5 
+e0 
+08 
+c5 
+ed 
+e0 
+cb 
+c5 
+ed 
+e0 
+08 
+c5 
+f5 
+e3 
+cb 
+c5 
+f5 
+e3 
+08 
+c5 
+ed 
+e3 
+cb 
+c5 
+ed 
+e3 
+08 
+c4 
+e3 
+6d 
+4c 
+cb 
+40 
+c4 
+e3 
+6d 
+4c 
+08 
+40 
+c4 
+e3 
+75 
+0e 
+cb 
+03 
+c4 
+e3 
+75 
+0e 
+08 
+03 
+c4 
+e3 
+6d 
+0e 
+cb 
+03 
+c4 
+e3 
+6d 
+0e 
+08 
+03 
+c5 
+f5 
+74 
+cb 
+c5 
+f5 
+74 
+08 
+c5 
+ed 
+74 
+cb 
+c5 
+ed 
+74 
+08 
+c5 
+f5 
+75 
+cb 
+c5 
+f5 
+75 
+08 
+c5 
+ed 
+75 
+cb 
+c5 
+ed 
+75 
+08 
+c5 
+f5 
+76 
+cb 
+c5 
+f5 
+76 
+08 
+c5 
+ed 
+76 
+cb 
+c5 
+ed 
+76 
+08 
+c4 
+e2 
+75 
+29 
+cb 
+c4 
+e2 
+75 
+29 
+08 
+c4 
+e2 
+6d 
+29 
+cb 
+c4 
+e2 
+6d 
+29 
+08 
+c5 
+f5 
+64 
+cb 
+c5 
+f5 
+64 
+08 
+c5 
+ed 
+64 
+cb 
+c5 
+ed 
+64 
+08 
+c5 
+f5 
+65 
+cb 
+c5 
+f5 
+65 
+08 
+c5 
+ed 
+65 
+cb 
+c5 
+ed 
+65 
+08 
+c5 
+f5 
+66 
+cb 
+c5 
+f5 
+66 
+08 
+c5 
+ed 
+66 
+cb 
+c5 
+ed 
+66 
+08 
+c4 
+e2 
+75 
+37 
+cb 
+c4 
+e2 
+75 
+37 
+08 
+c4 
+e2 
+6d 
+37 
+cb 
+c4 
+e2 
+6d 
+37 
+08 
+c4 
+e2 
+75 
+01 
+cb 
+c4 
+e2 
+75 
+01 
+08 
+c4 
+e2 
+6d 
+01 
+cb 
+c4 
+e2 
+6d 
+01 
+08 
+c4 
+e2 
+75 
+02 
+cb 
+c4 
+e2 
+75 
+02 
+08 
+c4 
+e2 
+6d 
+02 
+cb 
+c4 
+e2 
+6d 
+02 
+08 
+c4 
+e2 
+75 
+03 
+cb 
+c4 
+e2 
+75 
+03 
+08 
+c4 
+e2 
+6d 
+03 
+cb 
+c4 
+e2 
+6d 
+03 
+08 
+c4 
+e2 
+75 
+05 
+cb 
+c4 
+e2 
+75 
+05 
+08 
+c4 
+e2 
+6d 
+05 
+cb 
+c4 
+e2 
+6d 
+05 
+08 
+c4 
+e2 
+75 
+06 
+cb 
+c4 
+e2 
+75 
+06 
+08 
+c4 
+e2 
+6d 
+06 
+cb 
+c4 
+e2 
+6d 
+06 
+08 
+c4 
+e2 
+75 
+07 
+cb 
+c4 
+e2 
+75 
+07 
+08 
+c4 
+e2 
+6d 
+07 
+cb 
+c4 
+e2 
+6d 
+07 
+08 
+c4 
+e2 
+75 
+04 
+cb 
+c4 
+e2 
+75 
+04 
+08 
+c4 
+e2 
+6d 
+04 
+cb 
+c4 
+e2 
+6d 
+04 
+08 
+c5 
+f5 
+f5 
+cb 
+c5 
+f5 
+f5 
+08 
+c5 
+ed 
+f5 
+cb 
+c5 
+ed 
+f5 
+08 
+c4 
+e2 
+75 
+3c 
+cb 
+c4 
+e2 
+75 
+3c 
+08 
+c4 
+e2 
+6d 
+3c 
+cb 
+c4 
+e2 
+6d 
+3c 
+08 
+c5 
+f5 
+ee 
+cb 
+c5 
+f5 
+ee 
+08 
+c5 
+ed 
+ee 
+cb 
+c5 
+ed 
+ee 
+08 
+c4 
+e2 
+75 
+3d 
+cb 
+c4 
+e2 
+75 
+3d 
+08 
+c4 
+e2 
+6d 
+3d 
+cb 
+c4 
+e2 
+6d 
+3d 
+08 
+c5 
+f5 
+de 
+cb 
+c5 
+f5 
+de 
+08 
+c5 
+ed 
+de 
+cb 
+c5 
+ed 
+de 
+08 
+c4 
+e2 
+75 
+3e 
+cb 
+c4 
+e2 
+75 
+3e 
+08 
+c4 
+e2 
+6d 
+3e 
+cb 
+c4 
+e2 
+6d 
+3e 
+08 
+c4 
+e2 
+75 
+3f 
+cb 
+c4 
+e2 
+75 
+3f 
+08 
+c4 
+e2 
+6d 
+3f 
+cb 
+c4 
+e2 
+6d 
+3f 
+08 
+c4 
+e2 
+75 
+38 
+cb 
+c4 
+e2 
+75 
+38 
+08 
+c4 
+e2 
+6d 
+38 
+cb 
+c4 
+e2 
+6d 
+38 
+08 
+c5 
+f5 
+ea 
+cb 
+c5 
+f5 
+ea 
+08 
+c5 
+ed 
+ea 
+cb 
+c5 
+ed 
+ea 
+08 
+c4 
+e2 
+75 
+39 
+cb 
+c4 
+e2 
+75 
+39 
+08 
+c4 
+e2 
+6d 
+39 
+cb 
+c4 
+e2 
+6d 
+39 
+08 
+c5 
+f5 
+da 
+cb 
+c5 
+f5 
+da 
+08 
+c5 
+ed 
+da 
+cb 
+c5 
+ed 
+da 
+08 
+c4 
+e2 
+75 
+3a 
+cb 
+c4 
+e2 
+75 
+3a 
+08 
+c4 
+e2 
+6d 
+3a 
+cb 
+c4 
+e2 
+6d 
+3a 
+08 
+c4 
+e2 
+75 
+3b 
+cb 
+c4 
+e2 
+75 
+3b 
+08 
+c4 
+e2 
+6d 
+3b 
+cb 
+c4 
+e2 
+6d 
+3b 
+08 
+c5 
+fd 
+d7 
+c1 
+c5 
+fd 
+d7 
+c1 
+c4 
+e2 
+7d 
+20 
+ca 
+c4 
+e2 
+7d 
+20 
+08 
+c4 
+e2 
+7d 
+20 
+08 
+c4 
+e2 
+7d 
+21 
+ca 
+c4 
+e2 
+7d 
+21 
+08 
+c4 
+e2 
+7d 
+21 
+08 
+c4 
+e2 
+7d 
+22 
+ca 
+c4 
+e2 
+7d 
+22 
+08 
+c4 
+e2 
+7d 
+22 
+08 
+c4 
+e2 
+7d 
+23 
+ca 
+c4 
+e2 
+7d 
+23 
+08 
+c4 
+e2 
+7d 
+23 
+08 
+c4 
+e2 
+7d 
+24 
+ca 
+c4 
+e2 
+7d 
+24 
+08 
+c4 
+e2 
+7d 
+24 
+08 
+c4 
+e2 
+7d 
+25 
+ca 
+c4 
+e2 
+7d 
+25 
+08 
+c4 
+e2 
+7d 
+25 
+08 
+c4 
+e2 
+7d 
+30 
+ca 
+c4 
+e2 
+7d 
+30 
+08 
+c4 
+e2 
+7d 
+30 
+08 
+c4 
+e2 
+7d 
+31 
+ca 
+c4 
+e2 
+7d 
+31 
+08 
+c4 
+e2 
+7d 
+31 
+08 
+c4 
+e2 
+7d 
+32 
+ca 
+c4 
+e2 
+7d 
+32 
+08 
+c4 
+e2 
+7d 
+32 
+08 
+c4 
+e2 
+7d 
+33 
+ca 
+c4 
+e2 
+7d 
+33 
+08 
+c4 
+e2 
+7d 
+33 
+08 
+c4 
+e2 
+7d 
+34 
+ca 
+c4 
+e2 
+7d 
+34 
+08 
+c4 
+e2 
+7d 
+34 
+08 
+c4 
+e2 
+7d 
+35 
+ca 
+c4 
+e2 
+7d 
+35 
+08 
+c4 
+e2 
+7d 
+35 
+08 
+c4 
+e2 
+75 
+28 
+cb 
+c4 
+e2 
+75 
+28 
+08 
+c4 
+e2 
+6d 
+28 
+cb 
+c4 
+e2 
+6d 
+28 
+08 
+c4 
+e2 
+75 
+0b 
+cb 
+c4 
+e2 
+75 
+0b 
+08 
+c4 
+e2 
+6d 
+0b 
+cb 
+c4 
+e2 
+6d 
+0b 
+08 
+c5 
+f5 
+e4 
+cb 
+c5 
+f5 
+e4 
+08 
+c5 
+ed 
+e4 
+cb 
+c5 
+ed 
+e4 
+08 
+c5 
+f5 
+e5 
+cb 
+c5 
+f5 
+e5 
+08 
+c5 
+ed 
+e5 
+cb 
+c5 
+ed 
+e5 
+08 
+c5 
+f5 
+d5 
+cb 
+c5 
+f5 
+d5 
+08 
+c5 
+ed 
+d5 
+cb 
+c5 
+ed 
+d5 
+08 
+c4 
+e2 
+75 
+40 
+cb 
+c4 
+e2 
+75 
+40 
+08 
+c4 
+e2 
+6d 
+40 
+cb 
+c4 
+e2 
+6d 
+40 
+08 
+c5 
+f5 
+f4 
+cb 
+c5 
+f5 
+f4 
+08 
+c5 
+ed 
+f4 
+cb 
+c5 
+ed 
+f4 
+08 
+c5 
+f5 
+eb 
+cb 
+c5 
+f5 
+eb 
+08 
+c5 
+ed 
+eb 
+cb 
+c5 
+ed 
+eb 
+08 
+c5 
+f5 
+f6 
+cb 
+c5 
+f5 
+f6 
+08 
+c5 
+ed 
+f6 
+cb 
+c5 
+ed 
+f6 
+08 
+c4 
+e2 
+75 
+00 
+cb 
+c4 
+e2 
+75 
+00 
+08 
+c4 
+e2 
+6d 
+00 
+cb 
+c4 
+e2 
+6d 
+00 
+08 
+c5 
+fd 
+70 
+cb 
+03 
+c5 
+fd 
+70 
+08 
+03 
+c5 
+fe 
+70 
+cb 
+03 
+c5 
+fe 
+70 
+08 
+03 
+c5 
+ff 
+70 
+cb 
+03 
+c5 
+ff 
+70 
+08 
+03 
+c4 
+e2 
+75 
+08 
+cb 
+c4 
+e2 
+75 
+08 
+08 
+c4 
+e2 
+6d 
+08 
+cb 
+c4 
+e2 
+6d 
+08 
+08 
+c4 
+e2 
+75 
+09 
+cb 
+c4 
+e2 
+75 
+09 
+08 
+c4 
+e2 
+6d 
+09 
+cb 
+c4 
+e2 
+6d 
+09 
+08 
+c4 
+e2 
+75 
+0a 
+cb 
+c4 
+e2 
+75 
+0a 
+08 
+c4 
+e2 
+6d 
+0a 
+cb 
+c4 
+e2 
+6d 
+0a 
+08 
+c5 
+f5 
+73 
+f9 
+03 
+c5 
+f5 
+73 
+fa 
+03 
+c5 
+f5 
+f1 
+cb 
+c5 
+f5 
+f1 
+08 
+c5 
+f5 
+71 
+f1 
+03 
+c5 
+ed 
+f1 
+cb 
+c5 
+ed 
+f1 
+08 
+c5 
+f5 
+71 
+f2 
+03 
+c5 
+f5 
+f2 
+cb 
+c5 
+f5 
+f2 
+08 
+c5 
+f5 
+72 
+f1 
+03 
+c5 
+ed 
+f2 
+cb 
+c5 
+ed 
+f2 
+08 
+c5 
+f5 
+72 
+f2 
+03 
+c5 
+f5 
+f3 
+cb 
+c5 
+f5 
+f3 
+08 
+c5 
+f5 
+73 
+f1 
+03 
+c5 
+ed 
+f3 
+cb 
+c5 
+ed 
+f3 
+08 
+c5 
+f5 
+73 
+f2 
+03 
+c5 
+f5 
+e1 
+cb 
+c5 
+f5 
+e1 
+08 
+c5 
+f5 
+71 
+e1 
+03 
+c5 
+ed 
+e1 
+cb 
+c5 
+ed 
+e1 
+08 
+c5 
+f5 
+71 
+e2 
+03 
+c5 
+f5 
+e2 
+cb 
+c5 
+f5 
+e2 
+08 
+c5 
+f5 
+72 
+e1 
+03 
+c5 
+ed 
+e2 
+cb 
+c5 
+ed 
+e2 
+08 
+c5 
+f5 
+72 
+e2 
+03 
+c5 
+f5 
+73 
+d9 
+03 
+c5 
+f5 
+73 
+da 
+03 
+c5 
+f5 
+d1 
+cb 
+c5 
+f5 
+d1 
+08 
+c5 
+f5 
+71 
+d1 
+03 
+c5 
+ed 
+d1 
+cb 
+c5 
+ed 
+d1 
+08 
+c5 
+f5 
+71 
+d2 
+03 
+c5 
+f5 
+d2 
+cb 
+c5 
+f5 
+d2 
+08 
+c5 
+f5 
+72 
+d1 
+03 
+c5 
+ed 
+d2 
+cb 
+c5 
+ed 
+d2 
+08 
+c5 
+f5 
+72 
+d2 
+03 
+c5 
+f5 
+d2 
+cb 
+c5 
+f5 
+d2 
+08 
+c5 
+f5 
+72 
+d1 
+03 
+c5 
+ed 
+d2 
+cb 
+c5 
+ed 
+d2 
+08 
+c5 
+f5 
+72 
+d2 
+03 
+c5 
+f5 
+e8 
+cb 
+c5 
+f5 
+e8 
+08 
+c5 
+ed 
+e8 
+cb 
+c5 
+ed 
+e8 
+08 
+c5 
+f5 
+e9 
+cb 
+c5 
+f5 
+e9 
+08 
+c5 
+ed 
+e9 
+cb 
+c5 
+ed 
+e9 
+08 
+c5 
+f5 
+d8 
+cb 
+c5 
+f5 
+d8 
+08 
+c5 
+ed 
+d8 
+cb 
+c5 
+ed 
+d8 
+08 
+c5 
+f5 
+d9 
+cb 
+c5 
+f5 
+d9 
+08 
+c5 
+ed 
+d9 
+cb 
+c5 
+ed 
+d9 
+08 
+c5 
+f5 
+68 
+cb 
+c5 
+f5 
+68 
+08 
+c5 
+ed 
+68 
+cb 
+c5 
+ed 
+68 
+08 
+c5 
+f5 
+69 
+cb 
+c5 
+f5 
+69 
+08 
+c5 
+ed 
+69 
+cb 
+c5 
+ed 
+69 
+08 
+c5 
+f5 
+6a 
+cb 
+c5 
+f5 
+6a 
+08 
+c5 
+ed 
+6a 
+cb 
+c5 
+ed 
+6a 
+08 
+c5 
+f5 
+6d 
+cb 
+c5 
+f5 
+6d 
+08 
+c5 
+ed 
+6d 
+cb 
+c5 
+ed 
+6d 
+08 
+c5 
+f5 
+60 
+cb 
+c5 
+f5 
+60 
+08 
+c5 
+ed 
+60 
+cb 
+c5 
+ed 
+60 
+08 
+c5 
+f5 
+61 
+cb 
+c5 
+f5 
+61 
+08 
+c5 
+ed 
+61 
+cb 
+c5 
+ed 
+61 
+08 
+c5 
+f5 
+62 
+cb 
+c5 
+f5 
+62 
+08 
+c5 
+ed 
+62 
+cb 
+c5 
+ed 
+62 
+08 
+c5 
+f5 
+6c 
+cb 
+c5 
+f5 
+6c 
+08 
+c5 
+ed 
+6c 
+cb 
+c5 
+ed 
+6c 
+08 
+c5 
+f5 
+ef 
+cb 
+c5 
+f5 
+ef 
+08 
+c5 
+ed 
+ef 
+cb 
+c5 
+ed 
+ef 
+08 
+c4 
+e2 
+7d 
+2a 
+08 
+c4 
+e2 
+79 
+18 
+ca 
+c4 
+e2 
+7d 
+18 
+ca 
+c4 
+e2 
+7d 
+19 
+ca 
+c4 
+e2 
+7d 
+5a 
+08 
+c4 
+e3 
+6d 
+02 
+cb 
+03 
+c4 
+e3 
+6d 
+02 
+08 
+03 
+c4 
+e2 
+79 
+78 
+ca 
+c4 
+e2 
+79 
+78 
+08 
+c4 
+e2 
+7d 
+78 
+ca 
+c4 
+e2 
+7d 
+78 
+08 
+c4 
+e2 
+79 
+79 
+ca 
+c4 
+e2 
+79 
+79 
+08 
+c4 
+e2 
+7d 
+79 
+ca 
+c4 
+e2 
+7d 
+79 
+08 
+c4 
+e2 
+79 
+58 
+ca 
+c4 
+e2 
+79 
+58 
+08 
+c4 
+e2 
+7d 
+58 
+ca 
+c4 
+e2 
+7d 
+58 
+08 
+c4 
+e2 
+79 
+59 
+ca 
+c4 
+e2 
+79 
+59 
+08 
+c4 
+e2 
+7d 
+59 
+ca 
+c4 
+e2 
+7d 
+59 
+08 
+c4 
+e2 
+6d 
+36 
+cb 
+c4 
+e2 
+6d 
+36 
+08 
+c4 
+e3 
+fd 
+01 
+ca 
+03 
+c4 
+e3 
+fd 
+01 
+08 
+03 
+c4 
+e2 
+6d 
+16 
+cb 
+c4 
+e2 
+6d 
+16 
+08 
+c4 
+e3 
+fd 
+00 
+ca 
+03 
+c4 
+e3 
+fd 
+00 
+08 
+03 
+c4 
+e3 
+6d 
+46 
+cb 
+03 
+c4 
+e3 
+6d 
+46 
+08 
+03 
+c4 
+e3 
+7d 
+39 
+d1 
+03 
+c4 
+e3 
+7d 
+39 
+10 
+03 
+c4 
+e3 
+6d 
+38 
+cb 
+03 
+c4 
+e3 
+6d 
+38 
+08 
+03 
+c4 
+e2 
+69 
+8c 
+08 
+c4 
+e2 
+6d 
+8c 
+08 
+c4 
+e2 
+71 
+8e 
+10 
+c4 
+e2 
+75 
+8e 
+10 
+c4 
+e2 
+e9 
+8c 
+08 
+c4 
+e2 
+ed 
+8c 
+08 
+c4 
+e2 
+f1 
+8e 
+10 
+c4 
+e2 
+f5 
+8e 
+10 
+c4 
+e2 
+69 
+47 
+cb 
+c4 
+e2 
+69 
+47 
+08 
+c4 
+e2 
+6d 
+47 
+cb 
+c4 
+e2 
+6d 
+47 
+08 
+c4 
+e2 
+e9 
+47 
+cb 
+c4 
+e2 
+e9 
+47 
+08 
+c4 
+e2 
+ed 
+47 
+cb 
+c4 
+e2 
+ed 
+47 
+08 
+c4 
+e2 
+69 
+46 
+cb 
+c4 
+e2 
+69 
+46 
+08 
+c4 
+e2 
+6d 
+46 
+cb 
+c4 
+e2 
+6d 
+46 
+08 
+c4 
+e2 
+69 
+45 
+cb 
+c4 
+e2 
+69 
+45 
+08 
+c4 
+e2 
+6d 
+45 
+cb 
+c4 
+e2 
+6d 
+45 
+08 
+c4 
+e2 
+e9 
+45 
+cb 
+c4 
+e2 
+e9 
+45 
+08 
+c4 
+e2 
+ed 
+45 
+cb 
+c4 
+e2 
+ed 
+45 
+08