Fix VGATHER/VPGATHER memory sizes.

These now match the memory fetch sizes in the AVX reference.

Reported by: nasm64developer
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py
index 985b373..fdfe31d 100755
--- a/modules/arch/x86/gen_x86_insn.py
+++ b/modules/arch/x86/gen_x86_insn.py
@@ -7028,7 +7028,7 @@
 add_insn("vpblendd", "vex_66_0F3A_imm8_avx2", modifiers=[0x02]) 
 
 # Vector register in EA.
-add_group("gather_32x_32x",
+add_group("gather_64x_64x",
     cpu=["AVX2"],
     modifiers=["Op2Add"],
     vex=128,
@@ -7036,9 +7036,9 @@
     prefix=0x66,
     opcode=[0x0F, 0x38, 0x00],
     operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
-              Operand(type="MemXMMIndex", size=32, relaxed=True, dest="EA"),
+              Operand(type="MemXMMIndex", size=64, relaxed=True, dest="EA"),
               Operand(type="SIMDReg", size=128, dest="VEX")])
-add_group("gather_32x_32x",
+add_group("gather_64x_64x",
     cpu=["AVX2"],
     modifiers=["Op2Add"],
     vex=256,
@@ -7046,10 +7046,10 @@
     prefix=0x66,
     opcode=[0x0F, 0x38, 0x00],
     operands=[Operand(type="SIMDReg", size=256, dest="Spare"),
-              Operand(type="MemXMMIndex", size=32, relaxed=True, dest="EA"),
+              Operand(type="MemXMMIndex", size=64, relaxed=True, dest="EA"),
               Operand(type="SIMDReg", size=256, dest="VEX")])
-add_insn("vgatherdpd", "gather_32x_32x", modifiers=[0x92])
-add_insn("vpgatherdq", "gather_32x_32x", modifiers=[0x90])
+add_insn("vgatherdpd", "gather_64x_64x", modifiers=[0x92])
+add_insn("vpgatherdq", "gather_64x_64x", modifiers=[0x90])
 
 add_group("gather_64x_64y",
     cpu=["AVX2"],
@@ -7097,7 +7097,7 @@
 add_insn("vgatherdps", "gather_32x_32y", modifiers=[0x92])
 add_insn("vpgatherdd", "gather_32x_32y", modifiers=[0x90])
 
-add_group("gather_64x_64y_128",
+add_group("gather_32x_32y_128",
     cpu=["AVX2"],
     modifiers=["Op2Add"],
     vex=128,
@@ -7105,9 +7105,9 @@
     prefix=0x66,
     opcode=[0x0F, 0x38, 0x00],
     operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
-              Operand(type="MemXMMIndex", size=64, relaxed=True, dest="EA"),
+              Operand(type="MemXMMIndex", size=32, relaxed=True, dest="EA"),
               Operand(type="SIMDReg", size=128, dest="VEX")])
-add_group("gather_64x_64y_128",
+add_group("gather_32x_32y_128",
     cpu=["AVX2"],
     modifiers=["Op2Add"],
     vex=256,
@@ -7115,10 +7115,10 @@
     prefix=0x66,
     opcode=[0x0F, 0x38, 0x00],
     operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
-              Operand(type="MemYMMIndex", size=64, relaxed=True, dest="EA"),
+              Operand(type="MemYMMIndex", size=32, relaxed=True, dest="EA"),
               Operand(type="SIMDReg", size=128, dest="VEX")])
-add_insn("vgatherqps", "gather_64x_64y_128", modifiers=[0x93])
-add_insn("vpgatherqd", "gather_64x_64y_128", modifiers=[0x91])
+add_insn("vgatherqps", "gather_32x_32y_128", modifiers=[0x93])
+add_insn("vpgatherqd", "gather_32x_32y_128", modifiers=[0x91])
 
 #####################################################################
 # Intel FMA instructions
diff --git a/modules/arch/x86/tests/avx2.asm b/modules/arch/x86/tests/avx2.asm
index 2806d23..e76016c 100644
--- a/modules/arch/x86/tests/avx2.asm
+++ b/modules/arch/x86/tests/avx2.asm
@@ -612,9 +612,9 @@
 vpsrlvq ymm1, ymm2, yword [rax]		; c4 e2 ed 45 08
 
 vgatherdpd xmm1, [rax+xmm1], xmm2	; c4 e2 e9 92 0c 08
-vgatherdpd xmm1, dword [rax+xmm1], xmm2	; c4 e2 e9 92 0c 08
+vgatherdpd xmm1, qword [rax+xmm1], xmm2	; c4 e2 e9 92 0c 08
 vgatherdpd ymm1, [rax+xmm1], ymm2	; c4 e2 ed 92 0c 08
-vgatherdpd ymm1, dword [rax+xmm1], ymm2	; c4 e2 ed 92 0c 08
+vgatherdpd ymm1, qword [rax+xmm1], ymm2	; c4 e2 ed 92 0c 08
 
 vgatherqpd xmm1, [rax+xmm1], xmm2	; c4 e2 e9 93 0c 08
 vgatherqpd xmm1, qword [rax+xmm1], xmm2	; c4 e2 e9 93 0c 08
@@ -627,9 +627,9 @@
 vgatherdps ymm1, dword [rax+ymm1], ymm2	; c4 e2 6d 92 0c 08
 
 vgatherqps xmm1, [rax+xmm1], xmm2	; c4 e2 69 93 0c 08
-vgatherqps xmm1, qword [rax+xmm1], xmm2	; c4 e2 69 93 0c 08
+vgatherqps xmm1, dword [rax+xmm1], xmm2	; c4 e2 69 93 0c 08
 vgatherqps xmm1, [rax+ymm1], xmm2	; c4 e2 6d 93 0c 08
-vgatherqps xmm1, qword [rax+ymm1], xmm2	; c4 e2 6d 93 0c 08
+vgatherqps xmm1, dword [rax+ymm1], xmm2	; c4 e2 6d 93 0c 08
 
 vpgatherdd xmm1, [rax+xmm1], xmm2	; c4 e2 69 90 0c 08
 vpgatherdd xmm1, dword [rax+xmm1], xmm2	; c4 e2 69 90 0c 08
@@ -637,14 +637,14 @@
 vpgatherdd ymm1, dword [rax+ymm1], ymm2	; c4 e2 6d 90 0c 08
 
 vpgatherqd xmm1, [rax+xmm1], xmm2	; c4 e2 69 91 0c 08
-vpgatherqd xmm1, qword [rax+xmm1], xmm2	; c4 e2 69 91 0c 08
+vpgatherqd xmm1, dword [rax+xmm1], xmm2	; c4 e2 69 91 0c 08
 vpgatherqd xmm1, [rax+ymm1], xmm2	; c4 e2 6d 91 0c 08
-vpgatherqd xmm1, qword [rax+ymm1], xmm2	; c4 e2 6d 91 0c 08
+vpgatherqd xmm1, dword [rax+ymm1], xmm2	; c4 e2 6d 91 0c 08
 
 vpgatherdq xmm1, [rax+xmm1], xmm2	; c4 e2 e9 90 0c 08
-vpgatherdq xmm1, dword [rax+xmm1], xmm2	; c4 e2 e9 90 0c 08
+vpgatherdq xmm1, qword [rax+xmm1], xmm2	; c4 e2 e9 90 0c 08
 vpgatherdq ymm1, [rax+xmm1], ymm2	; c4 e2 ed 90 0c 08
-vpgatherdq ymm1, dword [rax+xmm1], ymm2	; c4 e2 ed 90 0c 08
+vpgatherdq ymm1, qword [rax+xmm1], ymm2	; c4 e2 ed 90 0c 08
 
 vpgatherqq xmm1, [rax+xmm1], xmm2	; c4 e2 e9 91 0c 08
 vpgatherqq xmm1, qword [rax+xmm1], xmm2	; c4 e2 e9 91 0c 08