diff --git a/Makefile.am b/Makefile.am
index 31af44d..3eae0cb 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -21,6 +21,9 @@
 noinst_HEADERS = util.h
 
 BUILT_SOURCES =
+MAINTAINERCLEANFILES =
+DISTCLEANFILES =
+SUFFIXES =
 
 # configure.lineno doesn't clean up after itself?
 CLEANFILES = configure.lineno
@@ -75,8 +78,8 @@
 EXTRA_DIST += Mkfiles/vc/modules/modules.vcproj
 EXTRA_DIST += Mkfiles/vc/re2c/re2c.vcproj
 EXTRA_DIST += Mkfiles/vc/re2c/run.bat
-EXTRA_DIST += Mkfiles/vc/gap/gap.vcproj
-EXTRA_DIST += Mkfiles/vc/gap/run.bat
+EXTRA_DIST += Mkfiles/vc/genperf/genperf.vcproj
+EXTRA_DIST += Mkfiles/vc/genperf/run.bat
 EXTRA_DIST += Mkfiles/vc8/crt_secure_no_deprecate.vsprops
 EXTRA_DIST += Mkfiles/vc8/yasm.sln
 EXTRA_DIST += Mkfiles/vc8/yasm.vcproj
@@ -96,11 +99,11 @@
 EXTRA_DIST += Mkfiles/vc8/modules/modules.vcproj
 EXTRA_DIST += Mkfiles/vc8/re2c/re2c.vcproj
 EXTRA_DIST += Mkfiles/vc8/re2c/run.bat
-EXTRA_DIST += Mkfiles/vc8/gap/gap.vcproj
-EXTRA_DIST += Mkfiles/vc8/gap/run.bat
+EXTRA_DIST += Mkfiles/vc8/genperf/genperf.vcproj
+EXTRA_DIST += Mkfiles/vc8/genperf/run.bat
 
 # Until this gets fixed in automake
-DISTCLEANFILES = libyasm/stamp-h libyasm/stamp-h[0-9]*
+DISTCLEANFILES += libyasm/stamp-h libyasm/stamp-h[0-9]*
 
 ACLOCAL_AMFLAGS = -I m4
 
@@ -115,7 +118,7 @@
 uninstall-hook: python-uninstall
 
 if BUILD_MAN
-MAINTAINERCLEANFILES = $(dist_man_MANS)
+MAINTAINERCLEANFILES += $(dist_man_MANS)
 endif
 
 # genstring build
diff --git a/Mkfiles/Makefile.dj b/Mkfiles/Makefile.dj
index aac7dc8..dff9b6d 100644
--- a/Mkfiles/Makefile.dj
+++ b/Mkfiles/Makefile.dj
@@ -50,18 +50,20 @@
  modules/arch/x86/x86arch.o \
  modules/arch/x86/x86bc.o \
  modules/arch/x86/x86expr.o \
- modules/arch/x86/x86id.o
+ modules/arch/x86/x86id.o \
+ x86cpu.o \
+ x86regtmod.o
 YASM_MODULES=arch_x86
 
-#MODULES_ARCH_LC3B_OBJS= \
-# modules/arch/lc3b/lc3barch.o \
-# modules/arch/lc3b/lc3bbc.o \
-# lc3bid.o
-#YASM_MODULES+=arch_lc3b
+MODULES_ARCH_LC3B_OBJS= \
+ modules/arch/lc3b/lc3barch.o \
+ modules/arch/lc3b/lc3bbc.o \
+ lc3bid.o
+YASM_MODULES+=arch_lc3b
 
 MODULES_ARCH_OBJS= \
  $(MODULES_ARCH_X86_OBJS) \
-# $(MODULES_ARCH_LC3B_OBJS)
+ $(MODULES_ARCH_LC3B_OBJS)
 
 MODULES_DBGFMTS_OBJS= \
  modules/dbgfmts/null/null-dbgfmt.o \
@@ -170,10 +172,23 @@
 module.c: libyasm/module.in genmodule
 	./genmodule libyasm/module.in Mkfiles/Makefile.dj
 
-x86parse.c: modules/arch/x86/x86parse.gap gap
-	./gap modules/arch/x86/x86parse.gap $@
+x86insn_nasm.gperf x86insn_gas.gperf x86insns.c: modules/arch/x86/gen_x86_insn.py
+	# ignore error in case python is not installed
+	-python modules/arch/x86/gen_x86_insn.py
 
-modules/arch/x86/x86id.c: x86parse.c
+x86insn_nasm.c: x86insn_nasm.gperf genperf
+	./genperf x86insn_nasm.gperf > $@
+
+x86insn_gas.c: x86insn_gas.gperf genperf
+	./genperf x86insn_gas.gperf > $@
+
+x86cpu.c: modules/arch/x86/x86cpu.gperf genperf
+	./genperf modules/arch/x86/x86cpu.gperf > $@
+
+x86regtmod.c: modules/arch/x86/x86regtmod.gperf genperf
+	./genperf modules/arch/x86/x86regtmod.gperf > $@
+
+modules/arch/x86/x86id.c: x86insn_nasm.c x86insn_gas.c x86insns.c
 
 lc3bid.c: modules/arch/lc3b/lc3bid.re re2c
 	./re2c -s -o $@ modules/arch/lc3b/lc3bid.re
@@ -198,15 +213,15 @@
 re2c: $(RE2C_SRCS)
 	$(BUILDCC) -I. -o re2c $(RE2C_SRCS)
 
-GAP_SRCS= \
-	tools/gap/gap.c \
-	tools/gap/perfect.c \
+GENPERF_SRCS= \
+	tools/genperf/genperf.c \
+	tools/genperf/perfect.c \
 	libyasm/phash.c \
 	libyasm/xmalloc.c \
 	libyasm/xstrdup.c
 
-gap: $(GAP_SRCS)
-	$(BUILDCC) -I. -o gap $(GAP_SRCS)
+genperf: $(GENPERF_SRCS)
+	$(BUILDCC) -I. -o genperf $(GENPERF_SRCS)
 
 yasm: $(YASM_OBJS)
 	$(CC) -o yasm $(YASM_OBJS)
diff --git a/Mkfiles/Makefile.flat b/Mkfiles/Makefile.flat
index 29e2a19..42ddd69 100644
--- a/Mkfiles/Makefile.flat
+++ b/Mkfiles/Makefile.flat
@@ -53,18 +53,20 @@
  modules/arch/x86/x86arch.o \
  modules/arch/x86/x86bc.o \
  modules/arch/x86/x86expr.o \
- modules/arch/x86/x86id.o
+ modules/arch/x86/x86id.o \
+ x86cpu.o \
+ x86regtmod.o
 YASM_MODULES=arch_x86
 
-#MODULES_ARCH_LC3B_OBJS= \
-# modules/arch/lc3b/lc3barch.o \
-# modules/arch/lc3b/lc3bbc.o \
-# lc3bid.o
-#YASM_MODULES+=arch_lc3b
+MODULES_ARCH_LC3B_OBJS= \
+ modules/arch/lc3b/lc3barch.o \
+ modules/arch/lc3b/lc3bbc.o \
+ lc3bid.o
+YASM_MODULES+=arch_lc3b
 
 MODULES_ARCH_OBJS= \
  $(MODULES_ARCH_X86_OBJS) \
-# $(MODULES_ARCH_LC3B_OBJS)
+ $(MODULES_ARCH_LC3B_OBJS)
 
 MODULES_DBGFMTS_OBJS= \
  modules/dbgfmts/null/null-dbgfmt.o \
@@ -162,7 +164,7 @@
 modules/preprocs/nasm/nasm-pp.c: nasm-macros.c
 
 genversion: modules/preprocs/nasm/genversion.c
-	$(BUILDCC) -IMkfiles -o $@ $<
+	$(BUILDCC) -IMkfiles -I. -o $@ $<
 
 version.mac: genversion
 	./genversion $@
@@ -173,10 +175,23 @@
 module.c: libyasm/module.in genmodule
 	./genmodule libyasm/module.in Mkfiles/Makefile.flat
 
-x86parse.c: modules/arch/x86/x86parse.gap gap
-	./gap modules/arch/x86/x86parse.gap $@
+x86insn_nasm.gperf x86insn_gas.gperf x86insns.c: modules/arch/x86/gen_x86_insn.py
+	# ignore error in case python is not installed
+	-python modules/arch/x86/gen_x86_insn.py
 
-modules/arch/x86/x86id.c: x86parse.c
+x86insn_nasm.c: x86insn_nasm.gperf genperf
+	./genperf x86insn_nasm.gperf > $@
+
+x86insn_gas.c: x86insn_gas.gperf genperf
+	./genperf x86insn_gas.gperf > $@
+
+x86cpu.c: modules/arch/x86/x86cpu.gperf genperf
+	./genperf modules/arch/x86/x86cpu.gperf > $@
+
+x86regtmod.c: modules/arch/x86/x86regtmod.gperf genperf
+	./genperf modules/arch/x86/x86regtmod.gperf > $@
+
+modules/arch/x86/x86id.c: x86insn_nasm.c x86insn_gas.c x86insns.c
 
 lc3bid.c: modules/arch/lc3b/lc3bid.re re2c
 	./re2c -s -o $@ modules/arch/lc3b/lc3bid.re
@@ -201,15 +216,15 @@
 re2c: $(RE2C_SRCS)
 	$(BUILDCC) -I. -o re2c $(RE2C_SRCS)
 
-GAP_SRCS= \
-	tools/gap/gap.c \
-	tools/gap/perfect.c \
+GENPERF_SRCS= \
+	tools/genperf/genperf.c \
+	tools/genperf/perfect.c \
 	libyasm/phash.c \
 	libyasm/xmalloc.c \
 	libyasm/xstrdup.c
 
-gap: $(GAP_SRCS)
-	$(BUILDCC) -I. -o gap $(GAP_SRCS)
+genperf: $(GENPERF_SRCS)
+	$(BUILDCC) -I. -o genperf $(GENPERF_SRCS)
 
 yasm: $(YASM_OBJS)
 	$(CC) -o yasm $(YASM_OBJS)
diff --git a/Mkfiles/vc/gap/run.bat b/Mkfiles/vc/gap/run.bat
deleted file mode 100644
index d506080..0000000
--- a/Mkfiles/vc/gap/run.bat
+++ /dev/null
@@ -1,2 +0,0 @@
-cd ..\..\..\
-%1 modules\arch\x86\x86parse.gap x86parse.c
diff --git a/Mkfiles/vc/gap/gap.vcproj b/Mkfiles/vc/genperf/genperf.vcproj
similarity index 84%
rename from Mkfiles/vc/gap/gap.vcproj
rename to Mkfiles/vc/genperf/genperf.vcproj
index ee7c877..a0d9f4d 100644
--- a/Mkfiles/vc/gap/gap.vcproj
+++ b/Mkfiles/vc/genperf/genperf.vcproj
@@ -2,9 +2,9 @@
 <VisualStudioProject
 	ProjectType="Visual C++"
 	Version="7.10"
-	Name="gap"
-	ProjectGUID="{5758BF4E-ABC4-11DA-B012-B622A1EF5492}"
-	RootNamespace="gap"
+	Name="genperf"
+	ProjectGUID="{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}"
+	RootNamespace="genperf"
 	Keyword="Win32Proj">
 	<Platforms>
 		<Platform
@@ -33,10 +33,10 @@
 				Name="VCCustomBuildTool"/>
 			<Tool
 				Name="VCLinkerTool"
-				OutputFile="$(OutDir)/gap.exe"
+				OutputFile="$(OutDir)/genperf.exe"
 				LinkIncremental="2"
 				GenerateDebugInformation="TRUE"
-				ProgramDatabaseFile="$(OutDir)/gap.pdb"
+				ProgramDatabaseFile="$(OutDir)/genperf.pdb"
 				SubSystem="1"
 				TargetMachine="1"/>
 			<Tool
@@ -80,7 +80,7 @@
 				Name="VCCustomBuildTool"/>
 			<Tool
 				Name="VCLinkerTool"
-				OutputFile="$(OutDir)/gap.exe"
+				OutputFile="$(OutDir)/genperf.exe"
 				LinkIncremental="1"
 				GenerateDebugInformation="TRUE"
 				SubSystem="1"
@@ -116,12 +116,12 @@
 		<Filter
 			Name="Source Files"
 			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
-			UniqueIdentifier="{7D996CB2-ABC4-11DA-B012-B622A1EF5492}">
+			UniqueIdentifier="{1062695D-1C50-4068-8313-73A409885BC1}">
 			<File
-				RelativePath="..\..\..\tools\gap\gap.c">
+				RelativePath="..\..\..\tools\genperf\genperf.c">
 			</File>
 			<File
-				RelativePath="..\..\..\tools\gap\perfect.c">
+				RelativePath="..\..\..\tools\genperf\perfect.c">
 			</File>
 			<File
 				RelativePath="..\..\..\libyasm\phash.c">
@@ -136,18 +136,18 @@
 		<Filter
 			Name="Header Files"
 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
-			UniqueIdentifier="{85DD7A94-ABC4-11DA-B012-B622A1EF5492}">
+			UniqueIdentifier="{3C1E9AA8-6338-4CED-99F1-BEBA80607BD5}">
 			<File
-				RelativePath="..\..\..\tools\gap\perfect.h">
+				RelativePath="..\..\..\tools\genperf\perfect.h">
 			</File>
 			<File
-				RelativePath="..\..\..\tools\gap\standard.h">
+				RelativePath="..\..\..\tools\genperf\standard.h">
 			</File>
 		</Filter>
 		<Filter
 			Name="Resource Files"
 			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
-			UniqueIdentifier="{8EB9B268-ABC4-11DA-B012-B622A1EF5492}">
+			UniqueIdentifier="{005ED203-AC60-4E97-A0A2-8239D00786FE}">
 		</Filter>
 		<File
 			RelativePath=".\run.bat">
diff --git a/Mkfiles/vc/genperf/run.bat b/Mkfiles/vc/genperf/run.bat
new file mode 100644
index 0000000..64ec1ae
--- /dev/null
+++ b/Mkfiles/vc/genperf/run.bat
@@ -0,0 +1,19 @@
+cd ..\..\..
+@echo off
+reg query HKCR\Python.File\shell\open\command >nul: 2>&1
+goto answer%errorlevel%
+:answer0
+echo ... building with Python ...
+@echo on
+modules\arch\x86\gen_x86_insn.py
+@echo off
+goto end
+:answer1
+echo ... building without Python ...
+goto end
+:end
+@echo on
+%1 x86insn_nasm.gperf x86insn_nasm.c
+%1 x86insn_gas.gperf x86insn_gas.c
+%1 modules\arch\x86\x86cpu.gperf x86cpu.c
+%1 modules\arch\x86\x86regtmod.gperf x86regtmod.c
diff --git a/Mkfiles/vc/modules/modules.vcproj b/Mkfiles/vc/modules/modules.vcproj
index 3879cd1..6d043ed 100644
--- a/Mkfiles/vc/modules/modules.vcproj
+++ b/Mkfiles/vc/modules/modules.vcproj
@@ -125,6 +125,18 @@
 				Name="arch"
 				Filter="">
 				<File
+					RelativePath="..\..\..\modules\arch\lc3b\lc3barch.c">
+				</File>
+				<File
+					RelativePath="..\..\..\modules\arch\lc3b\lc3barch.h">
+				</File>
+				<File
+					RelativePath="..\..\..\modules\arch\lc3b\lc3bbc.c">
+				</File>
+				<File
+					RelativePath="..\..\..\lc3bid.c">
+				</File>
+				<File
 					RelativePath="..\..\..\modules\arch\x86\x86arch.c">
 				</File>
 				<File
@@ -139,6 +151,12 @@
 				<File
 					RelativePath="..\..\..\modules\arch\x86\x86id.c">
 				</File>
+				<File
+					RelativePath="..\..\..\x86cpu.c">
+				</File>
+				<File
+					RelativePath="..\..\..\x86regtmod.c">
+				</File>
 			</Filter>
 			<Filter
 				Name="dbgfmts"
diff --git a/Mkfiles/vc/yasm.sln b/Mkfiles/vc/yasm.sln
index 8921a59..b0fd177 100644
--- a/Mkfiles/vc/yasm.sln
+++ b/Mkfiles/vc/yasm.sln
@@ -6,16 +6,17 @@
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "modules", "modules\modules.vcproj", "{D715A3D4-EFAA-442E-AD8B-5B4FF64E1DD6}"
 	ProjectSection(ProjectDependencies) = postProject
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619} = {C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}
+		{225700A5-07B8-434E-AD61-555278BF6733} = {225700A5-07B8-434E-AD61-555278BF6733}
+		{29FE7874-1256-4AD6-B889-68E399DC9608} = {29FE7874-1256-4AD6-B889-68E399DC9608}
 		{3C58BE13-50A3-4583-984D-D8902B3D7713} = {3C58BE13-50A3-4583-984D-D8902B3D7713}
 		{5758BF4E-ABC4-11DA-B012-B622A1EF5492} = {5758BF4E-ABC4-11DA-B012-B622A1EF5492}
-		{29FE7874-1256-4AD6-B889-68E399DC9608} = {29FE7874-1256-4AD6-B889-68E399DC9608}
-		{225700A5-07B8-434E-AD61-555278BF6733} = {225700A5-07B8-434E-AD61-555278BF6733}
 	EndProjectSection
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "yasm", "yasm.vcproj", "{34EB1BEB-C2D6-4A52-82B7-7ACD714A30D5}"
 	ProjectSection(ProjectDependencies) = postProject
-		{29FE7874-1256-4AD6-B889-68E399DC9608} = {29FE7874-1256-4AD6-B889-68E399DC9608}
 		{D715A3D4-EFAA-442E-AD8B-5B4FF64E1DD6} = {D715A3D4-EFAA-442E-AD8B-5B4FF64E1DD6}
+		{29FE7874-1256-4AD6-B889-68E399DC9608} = {29FE7874-1256-4AD6-B889-68E399DC9608}
 		{021CEB0A-F721-4F59-B349-9CEEAF244459} = {021CEB0A-F721-4F59-B349-9CEEAF244459}
 	EndProjectSection
 EndProject
@@ -32,7 +33,7 @@
 	ProjectSection(ProjectDependencies) = postProject
 	EndProjectSection
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gap", "gap\gap.vcproj", "{5758BF4E-ABC4-11DA-B012-B622A1EF5492}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genperf", "genperf\genperf.vcproj", "{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}"
 	ProjectSection(ProjectDependencies) = postProject
 	EndProjectSection
 EndProject
@@ -74,10 +75,10 @@
 		{3C58BE13-50A3-4583-984D-D8902B3D7713}.Debug.Build.0 = Debug|Win32
 		{3C58BE13-50A3-4583-984D-D8902B3D7713}.Release.ActiveCfg = Release|Win32
 		{3C58BE13-50A3-4583-984D-D8902B3D7713}.Release.Build.0 = Release|Win32
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492}.Debug.ActiveCfg = Debug|Win32
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492}.Debug.Build.0 = Debug|Win32
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492}.Release.ActiveCfg = Release|Win32
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492}.Release.Build.0 = Release|Win32
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}.Debug.ActiveCfg = Debug|Win32
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}.Debug.Build.0 = Debug|Win32
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}.Release.ActiveCfg = Release|Win32
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}.Release.Build.0 = Release|Win32
 		{F0E8B707-00C5-4FF2-B8EF-7C39817132A0}.Debug.ActiveCfg = Debug|Win32
 		{F0E8B707-00C5-4FF2-B8EF-7C39817132A0}.Debug.Build.0 = Debug|Win32
 		{F0E8B707-00C5-4FF2-B8EF-7C39817132A0}.Release.ActiveCfg = Release|Win32
diff --git a/Mkfiles/vc8/gap/run.bat b/Mkfiles/vc8/gap/run.bat
deleted file mode 100644
index d506080..0000000
--- a/Mkfiles/vc8/gap/run.bat
+++ /dev/null
@@ -1,2 +0,0 @@
-cd ..\..\..\
-%1 modules\arch\x86\x86parse.gap x86parse.c
diff --git a/Mkfiles/vc8/gap/gap.vcproj b/Mkfiles/vc8/genperf/genperf.vcproj
similarity index 89%
rename from Mkfiles/vc8/gap/gap.vcproj
rename to Mkfiles/vc8/genperf/genperf.vcproj
index 9ba9003..0bb6bd6 100644
--- a/Mkfiles/vc8/gap/gap.vcproj
+++ b/Mkfiles/vc8/genperf/genperf.vcproj
@@ -2,9 +2,9 @@
 <VisualStudioProject
 	ProjectType="Visual C++"
 	Version="8.00"
-	Name="gap"
-	ProjectGUID="{5758BF4E-ABC4-11DA-B012-B622A1EF5492}"
-	RootNamespace="gap"
+	Name="genperf"
+	ProjectGUID="{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}"
+	RootNamespace="genperf"
 	Keyword="Win32Proj"
 	>
 	<Platforms>
@@ -64,7 +64,7 @@
 			/>
 			<Tool
 				Name="VCLinkerTool"
-				OutputFile="$(OutDir)/gap.exe"
+				OutputFile="$(OutDir)/genperf.exe"
 				LinkIncremental="2"
 				GenerateDebugInformation="true"
 				ProgramDatabaseFile="$(OutDir)\$(TargetName).pdb"
@@ -144,7 +144,7 @@
 			/>
 			<Tool
 				Name="VCLinkerTool"
-				OutputFile="$(OutDir)/gap.exe"
+				OutputFile="$(OutDir)/genperf.exe"
 				LinkIncremental="1"
 				GenerateDebugInformation="false"
 				ProgramDatabaseFile="$(OutDir)\$(TargetName).pdb"
@@ -186,14 +186,14 @@
 		<Filter
 			Name="Source Files"
 			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
-			UniqueIdentifier="{7D996CB2-ABC4-11DA-B012-B622A1EF5492}"
+			UniqueIdentifier="{1062695D-1C50-4068-8313-73A409885BC1}"
 			>
 			<File
-				RelativePath="..\..\..\tools\gap\gap.c"
+				RelativePath="..\..\..\tools\genperf\genperf.c"
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\tools\gap\perfect.c"
+				RelativePath="..\..\..\tools\genperf\perfect.c"
 				>
 			</File>
 			<File
@@ -212,14 +212,14 @@
 		<Filter
 			Name="Header Files"
 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
-			UniqueIdentifier="{85DD7A94-ABC4-11DA-B012-B622A1EF5492}"
+			UniqueIdentifier="{3C1E9AA8-6338-4CED-99F1-BEBA80607BD5}"
 			>
 			<File
-				RelativePath="..\..\..\tools\gap\perfect.h"
+				RelativePath="..\..\..\tools\genperf\perfect.h"
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\tools\gap\standard.h"
+				RelativePath="..\..\..\tools\genperf\standard.h"
 				>
 			</File>
 		</Filter>
diff --git a/Mkfiles/vc8/genperf/run.bat b/Mkfiles/vc8/genperf/run.bat
new file mode 100644
index 0000000..64ec1ae
--- /dev/null
+++ b/Mkfiles/vc8/genperf/run.bat
@@ -0,0 +1,19 @@
+cd ..\..\..
+@echo off
+reg query HKCR\Python.File\shell\open\command >nul: 2>&1
+goto answer%errorlevel%
+:answer0
+echo ... building with Python ...
+@echo on
+modules\arch\x86\gen_x86_insn.py
+@echo off
+goto end
+:answer1
+echo ... building without Python ...
+goto end
+:end
+@echo on
+%1 x86insn_nasm.gperf x86insn_nasm.c
+%1 x86insn_gas.gperf x86insn_gas.c
+%1 modules\arch\x86\x86cpu.gperf x86cpu.c
+%1 modules\arch\x86\x86regtmod.gperf x86regtmod.c
diff --git a/Mkfiles/vc8/modules/modules.vcproj b/Mkfiles/vc8/modules/modules.vcproj
index 03821e7..d5b3965 100644
--- a/Mkfiles/vc8/modules/modules.vcproj
+++ b/Mkfiles/vc8/modules/modules.vcproj
@@ -330,6 +330,22 @@
 				Name="arch"
 				>
 				<File
+					RelativePath="..\..\..\modules\arch\lc3b\lc3barch.c"
+					>
+				</File>
+				<File
+					RelativePath="..\..\..\modules\arch\lc3b\lc3barch.h"
+					>
+				</File>
+				<File
+					RelativePath="..\..\..\modules\arch\lc3b\lc3bbc.c"
+					>
+				</File>
+				<File
+					RelativePath="..\..\..\lc3bid.c"
+					>
+				</File>
+				<File
 					RelativePath="..\..\..\modules\arch\x86\x86arch.c"
 					>
 				</File>
@@ -349,6 +365,14 @@
 					RelativePath="..\..\..\modules\arch\x86\x86id.c"
 					>
 				</File>
+				<File
+					RelativePath="..\..\..\x86cpu.c"
+					>
+				</File>
+				<File
+					RelativePath="..\..\..\x86regtmod.c"
+					>
+				</File>
 			</Filter>
 			<Filter
 				Name="dbgfmts"
diff --git a/Mkfiles/vc8/yasm.sln b/Mkfiles/vc8/yasm.sln
index 6717fac..03c56f6 100644
--- a/Mkfiles/vc8/yasm.sln
+++ b/Mkfiles/vc8/yasm.sln
@@ -7,17 +7,17 @@
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "modules", "modules\modules.vcproj", "{D715A3D4-EFAA-442E-AD8B-5B4FF64E1DD6}"
 	ProjectSection(ProjectDependencies) = postProject
-		{3C58BE13-50A3-4583-984D-D8902B3D7713} = {3C58BE13-50A3-4583-984D-D8902B3D7713}
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492} = {5758BF4E-ABC4-11DA-B012-B622A1EF5492}
-		{29FE7874-1256-4AD6-B889-68E399DC9608} = {29FE7874-1256-4AD6-B889-68E399DC9608}
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619} = {C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}
 		{225700A5-07B8-434E-AD61-555278BF6733} = {225700A5-07B8-434E-AD61-555278BF6733}
+		{29FE7874-1256-4AD6-B889-68E399DC9608} = {29FE7874-1256-4AD6-B889-68E399DC9608}
+		{3C58BE13-50A3-4583-984D-D8902B3D7713} = {3C58BE13-50A3-4583-984D-D8902B3D7713}
 	EndProjectSection
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "yasm", "yasm.vcproj", "{34EB1BEB-C2D6-4A52-82B7-7ACD714A30D5}"
 	ProjectSection(ProjectDependencies) = postProject
-		{021CEB0A-F721-4F59-B349-9CEEAF244459} = {021CEB0A-F721-4F59-B349-9CEEAF244459}
-		{29FE7874-1256-4AD6-B889-68E399DC9608} = {29FE7874-1256-4AD6-B889-68E399DC9608}
 		{D715A3D4-EFAA-442E-AD8B-5B4FF64E1DD6} = {D715A3D4-EFAA-442E-AD8B-5B4FF64E1DD6}
+		{29FE7874-1256-4AD6-B889-68E399DC9608} = {29FE7874-1256-4AD6-B889-68E399DC9608}
+		{021CEB0A-F721-4F59-B349-9CEEAF244459} = {021CEB0A-F721-4F59-B349-9CEEAF244459}
 	EndProjectSection
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genmacro", "genmacro\genmacro.vcproj", "{225700A5-07B8-434E-AD61-555278BF6733}"
@@ -29,7 +29,7 @@
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "re2c", "re2c\re2c.vcproj", "{3C58BE13-50A3-4583-984D-D8902B3D7713}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gap", "gap\gap.vcproj", "{5758BF4E-ABC4-11DA-B012-B622A1EF5492}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genperf", "genperf\genperf.vcproj", "{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}"
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genmodule", "genmodule\genmodule.vcproj", "{F0E8B707-00C5-4FF2-B8EF-7C39817132A0}"
 EndProject
@@ -97,14 +97,14 @@
 		{3C58BE13-50A3-4583-984D-D8902B3D7713}.Release|Win32.Build.0 = Release|Win32
 		{3C58BE13-50A3-4583-984D-D8902B3D7713}.Release|x64.ActiveCfg = Release|Win32
 		{3C58BE13-50A3-4583-984D-D8902B3D7713}.Release|x64.Build.0 = Release|Win32
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492}.Debug|Win32.ActiveCfg = Debug|Win32
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492}.Debug|Win32.Build.0 = Debug|Win32
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492}.Debug|x64.ActiveCfg = Debug|Win32
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492}.Debug|x64.Build.0 = Debug|Win32
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492}.Release|Win32.ActiveCfg = Release|Win32
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492}.Release|Win32.Build.0 = Release|Win32
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492}.Release|x64.ActiveCfg = Release|Win32
-		{5758BF4E-ABC4-11DA-B012-B622A1EF5492}.Release|x64.Build.0 = Release|Win32
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}.Debug|Win32.ActiveCfg = Debug|Win32
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}.Debug|Win32.Build.0 = Debug|Win32
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}.Debug|x64.ActiveCfg = Debug|Win32
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}.Debug|x64.Build.0 = Debug|Win32
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}.Release|Win32.ActiveCfg = Release|Win32
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}.Release|Win32.Build.0 = Release|Win32
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}.Release|x64.ActiveCfg = Release|Win32
+		{C45A8B59-8B59-4D5D-A8E8-FB090F8DD619}.Release|x64.Build.0 = Release|Win32
 		{F0E8B707-00C5-4FF2-B8EF-7C39817132A0}.Debug|Win32.ActiveCfg = Debug|Win32
 		{F0E8B707-00C5-4FF2-B8EF-7C39817132A0}.Debug|Win32.Build.0 = Debug|Win32
 		{F0E8B707-00C5-4FF2-B8EF-7C39817132A0}.Debug|x64.ActiveCfg = Debug|Win32
diff --git a/configure.ac b/configure.ac
index 5aef29b..8fa4b4d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,13 +54,21 @@
 esac])
 
 AC_ARG_ENABLE(python,
-AC_HELP_STRING([--enable-python],[Build Python bindings]),
+AC_HELP_STRING([--enable-python],[Enable Python-requiring portions of build]),
 [case "${enableval}" in
   yes) enable_python="yes" ;;
   no)  enable_python="no" ;;
   *) AC_MSG_ERROR([bad value ${enableval} for --enable-python]) ;;
 esac], enable_python="auto")
 
+AC_ARG_ENABLE(python-bindings,
+AC_HELP_STRING([--enable-python-bindings],[Build Python bindings]),
+[case "${enableval}" in
+  yes) enable_python_bindings="yes" ;;
+  no)  enable_python_bindings="no" ;;
+  *) AC_MSG_ERROR([bad value ${enableval} for --enable-python-bindings]) ;;
+esac], enable_python_bindings="auto")
+
 #
 # Checks for programs.
 #
@@ -245,18 +253,37 @@
 HOST_CC="$CC_FOR_BUILD"
 AC_SUBST(HOST_CC)
 
-# Detect if we can build Python bindings
-# (needs Python, Python headers, and Pyrex)
+# Detect if we have Python
 if test x$enable_python = xno; then
     have_python=no
 else
-    AC_MSG_NOTICE([Checking to see if we can build Python bindings])
+    AC_MSG_NOTICE([Checking for Python])
     have_python=no
     AM_PATH_PYTHON(2.4,[],[AC_MSG_WARN([Python not found])])
 
     if test -z "$PYTHON" || test "$PYTHON" = : ; then
         have_python=no
     else
+        have_python=yes
+    fi
+
+    if test x$have_python = xno ; then
+        if test x$enable_python = xyes ; then
+            AC_MSG_ERROR([Python explicitly requested, but a suitable Python version was not found])
+        else
+            AC_MSG_WARN([Could not find a suitable version of Python])
+        fi
+    fi
+fi
+
+# Detect if we can build Python bindings
+# (needs Python, Python headers, and Pyrex)
+if test x$enable_python_bindings = xno; then
+    have_python_bindings=no
+else
+    AC_MSG_NOTICE([Checking to see if we can build Python bindings])
+    have_python_bindings=no
+    if test x$have_python = xyes; then
         AC_MSG_CHECKING([for Pyrex >= 0.9.5.1])
         PYREX_CHECK_VERSION(0.9.5.1, [AC_MSG_RESULT(yes)
                                       have_pyrex=yes],
@@ -265,21 +292,22 @@
 
         AM_CHECK_PYTHON_HEADERS(have_python_headers=yes,have_python_headers=no)
 
-	if test x$have_pyrex = xyes -a x$have_python_headers = xyes ; then
-	    have_python=yes
+        if test x$have_pyrex = xyes -a x$have_python_headers = xyes ; then
+            have_python_bindings=yes
         fi
     fi
 
-    if test x$have_python = xno ; then
-        if test x$enable_python = xyes ; then
-            AC_MSG_ERROR([Building Python explicitly requested, but can't build Python bindings because either Pyrex, Python headers or a suitable Python version was not found])
+    if test x$have_python_bindings = xno ; then
+        if test x$enable_python_bindings = xyes ; then
+            AC_MSG_ERROR([Building Python bindings explicitly requested, but can't build Python bindings because either Pyrex, Python headers or a suitable Python version was not found])
         else
             AC_MSG_WARN([Couldn't find either Pyrex, the Python headers or a suitable version of Python, not building Python bindings])
         fi
-    fi               
+    fi
 fi
 
 AM_CONDITIONAL(HAVE_PYTHON, test x$have_python = xyes)
+AM_CONDITIONAL(HAVE_PYTHON_BINDINGS, test x$have_python_bindings = xyes)
 
 AC_CONFIG_FILES([Makefile
 	po/Makefile.in
diff --git a/libyasm/bytecode.h b/libyasm/bytecode.h
index e53dc54..741aabb 100644
--- a/libyasm/bytecode.h
+++ b/libyasm/bytecode.h
@@ -263,7 +263,11 @@
      yasm_output_value_func output_value,
      /*@null@*/ yasm_output_reloc_func output_reloc);
 
-#define yasm_bc__next(x)                STAILQ_NEXT(x, link)
+/** Get the next bytecode in a linked list of bytecodes.
+ * \param bc    bytecode
+ * \return Next bytecode.
+ */
+#define yasm_bc__next(bc)               STAILQ_NEXT(bc, link)
 
 /** Set multiple field of a bytecode.
  * A bytecode can be repeated a number of times when output.  This function
diff --git a/libyasm/expr.h b/libyasm/expr.h
index 6197567..990452f 100644
--- a/libyasm/expr.h
+++ b/libyasm/expr.h
@@ -56,13 +56,13 @@
 
     /** Expression item data.  Correct value depends on type. */
     union {
-        yasm_bytecode *precbc;  /**< Direct bytecode ref (#YASM_EXPR_PRECBC) */
-        yasm_symrec *sym;       /**< Symbol (#YASM_EXPR_SYM) */
-        yasm_expr *expn;        /**< Subexpression (#YASM_EXPR_EXPR) */
-        yasm_intnum *intn;      /**< Integer value (#YASM_EXPR_INT) */
-        yasm_floatnum *flt;     /**< Floating point value (#YASM_EXPR_FLOAT) */
-        uintptr_t reg;          /**< Register (#YASM_EXPR_REG) */
-        unsigned int subst;     /**< Subst placeholder (#YASM_EXPR_SUBST) */
+        yasm_bytecode *precbc;  /**< Direct bytecode ref (YASM_EXPR_PRECBC) */
+        yasm_symrec *sym;       /**< Symbol (YASM_EXPR_SYM) */
+        yasm_expr *expn;        /**< Subexpression (YASM_EXPR_EXPR) */
+        yasm_intnum *intn;      /**< Integer value (YASM_EXPR_INT) */
+        yasm_floatnum *flt;     /**< Floating point value (YASM_EXPR_FLOAT) */
+        uintptr_t reg;          /**< Register (YASM_EXPR_REG) */
+        unsigned int subst;     /**< Subst placeholder (YASM_EXPR_SUBST) */
     } data;
 } yasm_expr__item;
 
diff --git a/libyasm/floatnum.c b/libyasm/floatnum.c
index 951e17c..a7cfcb7 100644
--- a/libyasm/floatnum.c
+++ b/libyasm/floatnum.c
@@ -631,7 +631,17 @@
     return retval;
 }
 
-/* IEEE-754 (Intel) "single precision" format:
+/* IEEE-754r "half precision" format:
+ * 16 bits:
+ * 15     9      Bit 0
+ * |      |          |
+ * seee eemm mmmm mmmm
+ *
+ * e = bias 15 exponent
+ * s = sign bit
+ * m = mantissa bits, bit 10 is an implied one bit.
+ *
+ * IEEE-754 (Intel) "single precision" format:
  * 32 bits:
  * Bit 31    Bit 22              Bit 0
  * |         |                       |
@@ -672,6 +682,9 @@
         yasm_internal_error(N_("unsupported floatnum functionality"));
     }
     switch (destsize) {
+        case 2:
+            retval = floatnum_get_common(flt, ptr, 2, 10, 1, 5);
+            break;
         case 4:
             retval = floatnum_get_common(flt, ptr, 4, 23, 1, 8);
             break;
@@ -702,6 +715,7 @@
 yasm_floatnum_check_size(/*@unused@*/ const yasm_floatnum *flt, size_t size)
 {
     switch (size) {
+        case 16:
         case 32:
         case 64:
         case 80:
diff --git a/libyasm/insn.h b/libyasm/insn.h
index 20977b2..ea3fe53 100644
--- a/libyasm/insn.h
+++ b/libyasm/insn.h
@@ -64,6 +64,12 @@
      * "expr(,1)" (which is definitely an effective address).
      */
     unsigned int strong:1;
+
+    /** 1 if effective address is forced PC-relative. */
+    unsigned int pc_rel:1;
+
+    /** 1 if effective address is forced non-PC-relative. */
+    unsigned int not_pc_rel:1;
 };
 
 /** An instruction operand (opaque type). */
diff --git a/modules/arch/lc3b/lc3barch.c b/modules/arch/lc3b/lc3barch.c
index e1835d0..84f188a 100644
--- a/modules/arch/lc3b/lc3barch.c
+++ b/modules/arch/lc3b/lc3barch.c
@@ -158,6 +158,8 @@
     ea->nosplit = 0;
     ea->strong = 0;
     ea->segreg = 0;
+    ea->pc_rel = 0;
+    ea->not_pc_rel = 0;
     return ea;
 }
 
diff --git a/modules/arch/lc3b/tests/Makefile.inc b/modules/arch/lc3b/tests/Makefile.inc
index 43ffb70..43b861e 100644
--- a/modules/arch/lc3b/tests/Makefile.inc
+++ b/modules/arch/lc3b/tests/Makefile.inc
@@ -7,10 +7,8 @@
 EXTRA_DIST += modules/arch/lc3b/tests/lc3b-basic.errwarn
 EXTRA_DIST += modules/arch/lc3b/tests/lc3b-basic.hex
 EXTRA_DIST += modules/arch/lc3b/tests/lc3b-br.asm
-EXTRA_DIST += modules/arch/lc3b/tests/lc3b-br.errwarn
 EXTRA_DIST += modules/arch/lc3b/tests/lc3b-br.hex
 EXTRA_DIST += modules/arch/lc3b/tests/lc3b-ea-err.asm
 EXTRA_DIST += modules/arch/lc3b/tests/lc3b-ea-err.errwarn
 EXTRA_DIST += modules/arch/lc3b/tests/lc3b-mp22NC.asm
-EXTRA_DIST += modules/arch/lc3b/tests/lc3b-mp22NC.errwarn
 EXTRA_DIST += modules/arch/lc3b/tests/lc3b-mp22NC.hex
diff --git a/modules/arch/x86/Makefile.inc b/modules/arch/x86/Makefile.inc
index 24e90fa..0f68238 100644
--- a/modules/arch/x86/Makefile.inc
+++ b/modules/arch/x86/Makefile.inc
@@ -5,18 +5,56 @@
 libyasm_a_SOURCES += modules/arch/x86/x86bc.c
 libyasm_a_SOURCES += modules/arch/x86/x86expr.c
 libyasm_a_SOURCES += modules/arch/x86/x86id.c
+libyasm_a_SOURCES += x86cpu.c
+libyasm_a_SOURCES += x86regtmod.c
 
 YASM_MODULES += arch_x86
 
-modules/arch/x86/x86id.c: x86parse.c
+modules/arch/x86/x86id.c: x86insn_nasm.c x86insn_gas.c x86insns.c
 
-EXTRA_DIST += modules/arch/x86/x86parse.gap
+EXTRA_DIST += modules/arch/x86/gen_x86_insn.py
 
-x86parse.c: $(srcdir)/modules/arch/x86/x86parse.gap gap$(EXEEXT)
-	$(top_builddir)/gap$(EXEEXT) $(srcdir)/modules/arch/x86/x86parse.gap $@
+if HAVE_PYTHON
+x86insn_nasm.gperf x86insn_gas.gperf x86insns.c: $(srcdir)/modules/arch/x86/gen_x86_insn.py
+	$(PYTHON) $(srcdir)/modules/arch/x86/gen_x86_insn.py
+else
+x86insn_nasm.gperf: $(srcdir)/x86insn_nasm.gperf
+	@echo Python must be installed to regenerate x86 instructions files
+	cp $(srcdir)/x86insn_nasm.gperf $@
+x86insn_gas.gperf: $(srcdir)/x86insn_gas.gperf
+	@echo Python must be installed to regenerate x86 instructions files
+	cp $(srcdir)/x86insn_gas.gperf $@
+endif
 
-BUILT_SOURCES += x86parse.c
-CLEANFILES += x86parse.c
+BUILT_SOURCES += x86insns.c
+BUILT_SOURCES += x86insn_nasm.gperf
+BUILT_SOURCES += x86insn_gas.gperf
+EXTRA_DIST += x86insns.c
+EXTRA_DIST += x86insn_nasm.gperf
+EXTRA_DIST += x86insn_gas.gperf
+MAINTAINERCLEANFILES += x86insns.c
+MAINTAINERCLEANFILES += x86insn_nasm.gperf
+MAINTAINERCLEANFILES += x86insn_gas.gperf
+
+EXTRA_DIST += modules/arch/x86/x86cpu.gperf
+EXTRA_DIST += modules/arch/x86/x86regtmod.gperf
+
+# Use suffix rules for gperf files
+x86insn_nasm.c: x86insn_nasm.gperf genperf$(EXEEXT)
+x86insn_gas.c: x86insn_gas.gperf genperf$(EXEEXT)
+x86cpu.c: $(srcdir)/modules/arch/x86/x86cpu.gperf genperf$(EXEEXT)
+	$(top_builddir)/genperf$(EXEEXT) $(srcdir)/modules/arch/x86/x86cpu.gperf $@
+x86regtmod.c: $(srcdir)/modules/arch/x86/x86regtmod.gperf genperf$(EXEEXT)
+	$(top_builddir)/genperf$(EXEEXT) $(srcdir)/modules/arch/x86/x86regtmod.gperf $@
+
+BUILT_SOURCES += x86insn_nasm.c
+BUILT_SOURCES += x86insn_gas.c
+BUILT_SOURCES += x86cpu.c
+BUILT_SOURCES += x86regtmod.c
+CLEANFILES += x86insn_nasm.c
+CLEANFILES += x86insn_gas.c
+CLEANFILES += x86cpu.c
+CLEANFILES += x86regtmod.c
 
 EXTRA_DIST += modules/arch/x86/tests/Makefile.inc
 
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py
new file mode 100755
index 0000000..e891634
--- /dev/null
+++ b/modules/arch/x86/gen_x86_insn.py
@@ -0,0 +1,5637 @@
+#! /usr/bin/env python
+# $Id$
+# x86 instructions and prefixes data and code generation
+#
+#  Copyright (C) 2002-2007  Peter Johnson
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# NOTE: operands are arranged in NASM / Intel order (e.g. dest, src)
+
+ordered_cpus = [
+    "086", "186", "286", "386", "486", "586", "686", "K6", "Athlon", "P3",
+    "P4", "IA64", "Hammer"]
+ordered_cpu_features = [
+    "FPU", "Cyrix", "AMD", "MMX", "3DNow", "SMM", "SSE", "SSE2",
+    "SSE3", "SVM", "PadLock", "SSSE3", "SSE41", "SSE42", "SSE4a", "SSE5"]
+unordered_cpu_features = ["Priv", "Prot", "Undoc", "Obs"]
+
+def cpu_lcd(cpu1, cpu2):
+    """Find the lowest common denominator of two CPU sets."""
+    retval = set()
+
+    # CPU
+    cpu1cpus = set(ordered_cpus) & set(cpu1)
+    if not cpu1cpus:
+        cpu1cpus.add("086")
+    cpu1mincpu = min(ordered_cpus.index(x) for x in cpu1cpus)
+    cpu2cpus = set(ordered_cpus) & set(cpu2)
+    if not cpu2cpus:
+        cpu2cpus.add("086")
+    cpu2mincpu = min(ordered_cpus.index(x) for x in cpu1cpus)
+    cpumin = ordered_cpus[min(cpu1mincpu, cpu2mincpu)]
+    if cpumin == "086":
+        cpumin = "Any"
+
+    if cpumin != "Any":
+        retval.add(cpumin)
+
+    # Feature
+    cpu1features = set(ordered_cpu_features) & set(cpu1)
+    if not cpu1features:
+        cpu1minfeature = -1
+    else:
+        cpu1minfeature = min(ordered_cpu_features.index(x)
+                             for x in cpu1features)
+
+    cpu2features = set(ordered_cpu_features) & set(cpu2)
+    if not cpu2features:
+        cpu2minfeature = -1
+    else:
+        cpu2minfeature = min(ordered_cpu_features.index(x)
+                             for x in cpu2features)
+
+    if cpu1minfeature != -1 and cpu2minfeature != -1:
+        featuremin = ordered_cpu_features[min(cpu1minfeature, cpu2minfeature)]
+        retval.add(featuremin)
+
+    # Unordered features
+    for feature in set(unordered_cpu_features) & set(cpu1) & set(cpu2):
+        retval.add(feature)
+
+    # 64-bitness
+    if "64" in cpu1 and "64" in cpu2:
+        retval.add("64")
+    if "Not64" in cpu1 and "Not64" in cpu2:
+        retval.add("Not64")
+
+    return retval
+
+class Operand(object):
+    def __init__(self, **kwargs):
+        self.type = kwargs.pop("type")
+        self.size = kwargs.pop("size", "Any")
+        self.relaxed = kwargs.pop("relaxed", False)
+        self.dest = kwargs.pop("dest", None)
+        self.tmod = kwargs.pop("tmod", None)
+        self.opt = kwargs.pop("opt", None)
+
+        if kwargs:
+            for arg in kwargs:
+                print "Warning: unrecognized arg %s" % arg
+
+    def __str__(self):
+        return "{"+ ", ".join(["OPT_%s" % self.type,
+                               "OPS_%s" % self.size,
+                               "%d" % self.relaxed,
+                               self.dest == "EA64" and "1" or "0",
+                               "OPTM_%s" % self.tmod,
+                               "OPA_%s" % (self.dest == "EA64"
+                                           and "EA" or self.dest),
+                               "OPAP_%s" % self.opt]) + "}"
+
+    def __eq__(self, other):
+        return (self.type == other.type and
+                self.size == other.size and
+                self.relaxed == other.relaxed and
+                self.dest == other.dest and
+                self.tmod == other.tmod and
+                self.opt == other.opt)
+
+    def __ne__(self, other):
+        return (self.type != other.type or
+                self.size != other.size or
+                self.relaxed != other.relaxed or
+                self.dest != other.dest or
+                self.tmod != other.tmod or
+                self.opt != other.opt)
+
+class GroupForm(object):
+    def __init__(self, **kwargs):
+        # Parsers
+        self.parsers = set(kwargs.pop("parsers", ["gas", "nasm"]))
+
+        # CPU feature flags initialization
+        self.cpu = set(kwargs.pop("cpu", []))
+        if kwargs.pop("only64", False):
+            self.cpu.add("64")
+        if kwargs.pop("not64", False):
+            self.cpu.add("Not64")
+
+        # Operation size
+        self.opersize = kwargs.pop("opersize", 0)
+        if self.opersize == 8:
+            self.opersize = 0
+
+        if self.opersize == 64:
+            self.cpu.add("64")
+        elif self.opersize == 32 and "64" not in self.cpu:
+            self.cpu.add("386")
+
+        # Default operation size in 64-bit mode
+        self.def_opersize_64 = kwargs.pop("def_opersize_64", 0)
+
+        # GAS suffix
+        self.gen_suffix = kwargs.pop("gen_suffix", True)
+        self.suffixes = kwargs.pop("suffixes", None)
+        suffix = kwargs.pop("suffix", None)
+        if suffix is not None:
+            self.suffixes = [suffix]
+        if self.suffixes is not None:
+            self.suffixes = set(x.upper() for x in self.suffixes)
+
+        # Special instruction prefix
+        self.special_prefix = "0"
+        if "prefix" in kwargs:
+            self.special_prefix = "0x%02X" % kwargs.pop("prefix")
+
+        # Spare value
+        self.spare = kwargs.pop("spare", 0)
+
+        # Build opcodes string (C array initializer)
+        if "opcode" in kwargs:
+            # Usual case, just a single opcode
+            self.opcode = kwargs.pop("opcode")
+            self.opcode_len = len(self.opcode)
+        elif "opcode1" in kwargs and "opcode2" in kwargs:
+            # Two opcode case; the first opcode is the "optimized" opcode,
+            # the second is the "relaxed" opcode.  For this to work, an
+            # opt="foo" must be set for one of the operands.
+            self.opcode1 = kwargs.pop("opcode1")
+            self.opcode2 = kwargs.pop("opcode2")
+            self.opcode_len = len(self.opcode1)
+        else:
+            raise KeyError("missing opcode")
+
+        # DREX opcode0 field
+        self.drex_oc0 = kwargs.pop("drex_oc0", 0) and 0x08 or 0
+
+        # Build operands string (C array initializer)
+        self.operands = kwargs.pop("operands")
+        for op in self.operands:
+            if op.type in ["Reg", "RM", "Areg", "Creg", "Dreg"]:
+                if op.size == 64:
+                    self.cpu.add("64")
+                elif op.size == 32 and "64" not in self.cpu:
+                    self.cpu.add("386")
+            if op.type in ["Imm", "ImmNotSegOff"]:
+                if op.size == 64:
+                    self.cpu.add("64")
+                elif op.size == 32 and "64" not in self.cpu:
+                    self.cpu.add("386")
+            if op.type in ["FS", "GS"] and "64" not in self.cpu:
+                self.cpu.add("386")
+            if op.type in ["CR4"] and "64" not in self.cpu:
+                self.cpu.add("586")
+            if op.dest == "EA64":
+                self.cpu.add("64")
+            if op.dest == "DREX":
+                self.drex_oc0 |= 0x80
+
+        # Modifiers
+        self.modifiers = kwargs.pop("modifiers", [])
+
+        # GAS flags
+        self.gas_only = ("nasm" not in self.parsers)
+        self.gas_illegal = ("gas" not in self.parsers)
+        self.gas_no_rev = (kwargs.pop("gas_no_reverse", False) or
+                           kwargs.pop("gas_no_rev", False))
+
+        # CPU feature flags finalization
+        # Remove redundancies
+        maxcpu = -1
+        if "64" in self.cpu:
+            pass #maxcpu = ordered_cpus.index("Hammer")
+        else:
+            maxcpu_set = self.cpu & set(ordered_cpus)
+            if maxcpu_set:
+                maxcpu = max(ordered_cpus.index(x) for x in maxcpu_set)
+        if maxcpu != -1:
+            for cpu in ordered_cpus[0:maxcpu]:
+                self.cpu.discard(cpu)
+
+        if kwargs:
+            for arg in kwargs:
+                print "Warning: unrecognized arg %s" % arg
+
+    def __str__(self):
+        if hasattr(self, "opcode"):
+            opcodes_str = ["0x%02X" % x for x in self.opcode]
+        elif hasattr(self, "opcode1") and hasattr(self, "opcode2"):
+            opcodes_str = ["0x%02X" % x for x in self.opcode1]
+            opcodes_str.extend("0x%02X" % x for x in self.opcode2)
+        # Ensure opcodes initializer string is 3 long
+        opcodes_str.extend(["0", "0", "0"])
+        opcodes_str = "{" + ', '.join(opcodes_str[0:3]) + "}"
+
+        cpus_str = "|".join("CPU_%s" % x for x in sorted(self.cpu))
+
+        if len(self.modifiers) > 3:
+            raise ValueError("too many modifiers: %s" % (self.modifiers,))
+
+        cpus_str = []
+        if self.cpu is not None:
+            if len(self.cpu) > 3:
+                raise ValueError("too many CPUs: %s" % (self.cpu,))
+
+            # Ensure CPUs initializer string is at least 3 long
+            cpus_str.extend("CPU_%s" % x for x in sorted(self.cpu))
+
+        # Ensure cpus initializer string is 3 long; 0=CPU_Any
+        cpus_str.extend(["0", "0", "0"])
+
+
+        mods = ["MOD_%s" % x for x in self.modifiers]
+        # Ensure mods initializer string is 3 long
+        mods.extend(["0", "0", "0"])
+        mod_str = "{" + ', '.join(mods[0:3]) + "}"
+
+        gas_flags = []
+        if self.gas_only:
+            gas_flags.append("GAS_ONLY")
+        if self.gas_illegal:
+            gas_flags.append("GAS_ILLEGAL")
+        if self.gas_no_rev:
+            gas_flags.append("GAS_NO_REV")
+        if self.suffixes:
+            gas_flags.extend("SUF_%s" % x for x in sorted(self.suffixes))
+        gas_flags = "|".join(gas_flags)
+
+        # Build instruction info structure initializer
+        return "{ "+ ", ".join([gas_flags or "0",
+                                cpus_str[0],
+                                cpus_str[1],
+                                cpus_str[2],
+                                mod_str,
+                                "%d" % (self.opersize or 0),
+                                "%d" % (self.def_opersize_64 or 0),
+                                self.special_prefix or "0",
+                                self.drex_oc0 and
+                                    ("0x%02X" % self.drex_oc0) or "0",
+                                "%d" % self.opcode_len,
+                                opcodes_str,
+                                "%d" % (self.spare or 0),
+                                "%d" % len(self.operands),
+                                "%d" % self.all_operands_index]) + " }"
+
+groups = {}
+groupnames_ordered = []
+def add_group(name, **kwargs):
+    forms = groups.setdefault(name, [])
+    forms.append(GroupForm(**kwargs))
+    groupnames_ordered.append(name)
+
+class Insn(object):
+    def __init__(self, groupname, suffix=None, parser=None, modifiers=None,
+                 cpu=None, only64=False, not64=False):
+        self.groupname = groupname
+        if suffix is None:
+            self.suffix = None
+        else:
+            self.suffix = suffix.upper()
+
+        self.parsers = None
+        if suffix is not None:
+            self.parsers = set(["gas"])
+        if parser is not None:
+            self.parsers = set([parser])
+
+        if modifiers is None:
+            self.modifiers = []
+        else:
+            self.modifiers = modifiers
+        if cpu is None:
+            self.cpu = None
+        else:
+            self.cpu = set(cpu)
+
+        if only64:
+            if self.cpu is None:
+                self.cpu = set()
+            self.cpu.add("64")
+        if not64:
+            if self.cpu is None:
+                self.cpu = set()
+            self.cpu.add("Not64")
+
+    def auto_cpu(self, parser):
+        if self.cpu is not None:
+            return
+        """Determine lowest common denominator CPU from group and suffix.
+        Does nothing if CPU is already set."""
+        # Scan through group, matching parser and suffix
+        for form in groups[self.groupname]:
+            if parser not in form.parsers:
+                continue
+            if (self.suffix is not None and len(self.suffix) == 1 and
+                (form.suffixes is None or self.suffix not in form.suffixes)):
+                continue
+            if self.cpu is None:
+                self.cpu = form.cpu
+            else:
+                self.cpu = cpu_lcd(self.cpu, form.cpu)
+
+    def copy(self):
+        """Return a shallow copy."""
+        return Insn(self.groupname,
+                    suffix=self.suffix,
+                    modifiers=self.modifiers,
+                    cpu=self.cpu)
+
+    def __str__(self):
+        if self.suffix is None:
+            suffix_str = "NONE"
+        elif len(self.suffix) == 1:
+            suffix_str = "SUF_" + self.suffix
+        else:
+            suffix_str = self.suffix
+
+        cpus_str = []
+        if self.cpu is not None:
+            if len(self.cpu) > 3:
+                raise ValueError("too many CPUs: %s" % (self.cpu,))
+            cpus_str.extend("CPU_%s" % x for x in sorted(self.cpu))
+
+        # Ensure cpus initializer string is 3 long
+        cpus_str.extend(["0", "0", "0"])
+
+        if len(self.modifiers) > 3:
+            raise ValueError("too many modifiers")
+        mods_str = ["0x%02X" % x for x in self.modifiers]
+
+        # Ensure modifiers is at least 3 long
+        mods_str.extend(["0", "0", "0"])
+
+        return ",\t".join(["%s_insn" % self.groupname,
+                           "%d" % len(groups[self.groupname]),
+                           suffix_str,
+                           mods_str[0],
+                           mods_str[1],
+                           mods_str[2],
+                           cpus_str[0],
+                           cpus_str[1],
+                           cpus_str[2]])
+
+insns = {}
+def add_insn(name, groupname, **kwargs):
+    opts = insns.setdefault(name, [])
+    opts.append(Insn(groupname, **kwargs))
+
+class Prefix(object):
+    def __init__(self, groupname, value, only64=False):
+        self.groupname = groupname
+        self.value = value
+        self.only64 = only64
+
+    def __str__(self):
+        return ",\t".join(["NULL",
+                           "X86_%s>>8" % self.groupname,
+                           "0x%02X" % self.value,
+                           "0",
+                           "0",
+                           "0",
+                           self.only64 and "CPU_64" or "0",
+                           "0",
+                           "0"])
+
+gas_insns = {}
+nasm_insns = {}
+
+def add_prefix(name, groupname, value, parser=None, **kwargs):
+    prefix = Prefix(groupname, value, **kwargs)
+    if parser is None or parser == "gas":
+        gas_insns[name] = prefix
+    if parser is None or parser == "nasm":
+        nasm_insns[name] = prefix
+
+def finalize_insns():
+    for name, opts in insns.iteritems():
+        for insn in opts:
+            group = groups[insn.groupname]
+
+            parsers = set()
+            for form in group:
+                parsers |= form.parsers
+            if insn.parsers is not None:
+                parsers &= insn.parsers
+
+            if "gas" in parsers:
+                keyword = name
+                if keyword in gas_insns:
+                    raise ValueError("duplicate gas instruction %s" % keyword)
+                newinsn = insn.copy()
+                newinsn.auto_cpu("gas")
+                gas_insns[keyword] = newinsn
+
+                if insn.suffix is None:
+                    suffixes = set()
+                    for form in group:
+                        if form.gen_suffix and form.suffixes is not None:
+                            suffixes |= form.suffixes
+
+                    for suffix in suffixes:
+                        keyword = name+suffix
+                        if keyword in gas_insns:
+                            raise ValueError("duplicate gas instruction %s" %
+                                             keyword)
+                        newinsn = insn.copy()
+                        newinsn.suffix = suffix
+                        newinsn.auto_cpu("gas")
+                        gas_insns[keyword] = newinsn
+
+            if "nasm" in parsers:
+                keyword = name
+                if keyword in nasm_insns:
+                    raise ValueError("duplicate nasm instruction %s" % keyword)
+                newinsn = insn.copy()
+                newinsn.auto_cpu("nasm")
+                nasm_insns[keyword] = newinsn
+
+def output_insns(f, parser, insns):
+    print >>f, """%%ignore-case
+%%language=ANSI-C
+%%compare-strncmp
+%%readonly-tables
+%%enum
+%%struct-type
+%%define hash-function-name insnprefix_%s_hash
+%%define lookup-function-name insnprefix_%s_find
+struct insnprefix_parse_data;
+%%%%""" % (parser, parser)
+    for keyword in sorted(insns):
+        print >>f, "%s,\t%s" % (keyword.lower(), insns[keyword])
+
+def output_gas_insns(f):
+    output_insns(f, "gas", gas_insns)
+
+def output_nasm_insns(f):
+    output_insns(f, "nasm", nasm_insns)
+
+def output_groups(f):
+    # Merge all operand lists into single list
+    # Sort by number of operands to shorten output
+    all_operands = []
+    for form in sorted((form for g in groups.itervalues() for form in g),
+                       key=lambda x:len(x.operands), reverse=True):
+        num_operands = len(form.operands)
+        for i in xrange(len(all_operands)):
+            if all_operands[i:i+num_operands] == form.operands:
+                form.all_operands_index = i
+                break
+        else:
+            form.all_operands_index = len(all_operands)
+            all_operands.extend(form.operands)
+
+    # Output operands list
+    print >>f, "static const x86_info_operand insn_operands[] = {"
+    print >>f, "   ",
+    print >>f, ",\n    ".join(str(x) for x in all_operands)
+    print >>f, "};\n"
+
+    # Output groups
+    seen = set()
+    for name in groupnames_ordered:
+        if name in seen:
+            continue
+        seen.add(name)
+        print >>f, "static const x86_insn_info %s_insn[] = {" % name
+        print >>f, "   ",
+        print >>f, ",\n    ".join(str(x) for x in groups[name])
+        print >>f, "};\n"
+
+#####################################################################
+# General instruction groupings
+#####################################################################
+
+#
+# Empty instruction
+#
+add_group("empty", opcode=[], operands=[])
+
+#
+# Placeholder for instructions invalid in 64-bit mode
+#
+add_group("not64", opcode=[], operands=[], not64=True)
+
+#
+# One byte opcode instructions with no operands
+#
+add_group("onebyte",
+    modifiers=["Op0Add", "OpSizeR", "DOpS64R"],
+    opcode=[0x00],
+    operands=[])
+
+#
+# One byte opcode instructions with "special" prefix with no operands
+#
+add_group("onebyte_prefix",
+    modifiers=["PreAdd", "Op0Add"],
+    prefix=0x00,
+    opcode=[0x00],
+    operands=[])
+
+#
+# Two byte opcode instructions with no operands
+#
+add_group("twobyte",
+    gen_suffix=False,
+    suffixes=["l", "q"],
+    modifiers=["Op0Add", "Op1Add"],
+    opcode=[0x00, 0x00],
+    operands=[])
+
+#
+# Three byte opcode instructions with no operands
+#
+add_group("threebyte",
+    modifiers=["Op0Add", "Op1Add", "Op2Add"],
+    opcode=[0x00, 0x00, 0x00],
+    operands=[])
+
+#
+# One byte opcode instructions with general memory operand
+#
+add_group("onebytemem",
+    gen_suffix=False,
+    suffixes=["l", "q", "s"],
+    modifiers=["SpAdd", "Op0Add"],
+    opcode=[0x00],
+    spare=0,
+    operands=[Operand(type="Mem", dest="EA")])
+
+#
+# Two byte opcode instructions with general memory operand
+#
+add_group("twobytemem",
+    gen_suffix=False,
+    suffixes=["w", "l", "q", "s"],
+    modifiers=["SpAdd", "Op0Add", "Op1Add"],
+    opcode=[0x00, 0x00],
+    spare=0,
+    operands=[Operand(type="Mem", dest="EA")])
+
+#
+# mov
+#
+
+# Absolute forms for non-64-bit mode
+for sfx, sz in zip("bwl", [8, 16, 32]):
+    add_group("mov",
+        suffix=sfx,
+        not64=True,
+        opersize=sz,
+        opcode=[0xA0+(sz!=8)],
+        operands=[Operand(type="Areg", size=sz, dest=None),
+                  Operand(type="MemOffs", size=sz, relaxed=True, dest="EA")])
+
+for sfx, sz in zip("bwl", [8, 16, 32]):
+    add_group("mov",
+        suffix=sfx,
+        not64=True,
+        opersize=sz,
+        opcode=[0xA2+(sz!=8)],
+        operands=[Operand(type="MemOffs", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Areg", size=sz, dest=None)])
+
+# 64-bit absolute forms for 64-bit mode.  Disabled for GAS, see movabs
+for sz in (8, 16, 32, 64):
+    add_group("mov",
+        opersize=sz,
+        opcode=[0xA0+(sz!=8)],
+        only64=True,
+        operands=[Operand(type="Areg", size=sz, dest=None),
+                  Operand(type="MemOffs", size=sz, relaxed=True, dest="EA64")])
+
+for sz in (8, 16, 32, 64):
+    add_group("mov",
+        only64=True,
+        opersize=sz,
+        opcode=[0xA2+(sz!=8)],
+        operands=[Operand(type="MemOffs", size=sz, relaxed=True, dest="EA64"),
+                  Operand(type="Areg", size=sz, dest=None)])
+
+# General 32-bit forms using Areg / short absolute option
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("mov",
+        suffix=sfx,
+        opersize=sz,
+        opcode1=[0x88+(sz!=8)],
+        opcode2=[0xA2+(sz!=8)],
+        operands=[
+            Operand(type="RM", size=sz, relaxed=True, dest="EA", opt="ShortMov"),
+            Operand(type="Areg", size=sz, dest="Spare")])
+
+# General 32-bit forms
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("mov",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0x88+(sz!=8)],
+        operands=[Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Reg", size=sz, dest="Spare")])
+
+# General 32-bit forms using Areg / short absolute option
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("mov",
+        suffix=sfx,
+        opersize=sz,
+        opcode1=[0x8A+(sz!=8)],
+        opcode2=[0xA0+(sz!=8)],
+        operands=[Operand(type="Areg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=sz, relaxed=True, dest="EA",
+                          opt="ShortMov")])
+
+# General 32-bit forms
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("mov",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0x8A+(sz!=8)],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=sz, relaxed=True, dest="EA")])
+
+# Segment register forms
+add_group("mov",
+    suffix="w",
+    opcode=[0x8C],
+    operands=[Operand(type="Mem", size=16, relaxed=True, dest="EA"),
+              Operand(type="SegReg", size=16, relaxed=True, dest="Spare")])
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("mov",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0x8C],
+        operands=[
+            Operand(type="Reg", size=sz, dest="EA"),
+            Operand(type="SegReg", size=16, relaxed=True, dest="Spare")])
+add_group("mov",
+    suffix="w",
+    opcode=[0x8E],
+    operands=[Operand(type="SegReg", size=16, relaxed=True, dest="Spare"),
+              Operand(type="RM", size=16, relaxed=True, dest="EA")])
+for sfx, sz in zip("lq", [32, 64]):
+    add_group("mov",
+        suffix=sfx,
+        opcode=[0x8E],
+        operands=[
+            Operand(type="SegReg", size=16, relaxed=True, dest="Spare"),
+            Operand(type="Reg", size=sz, dest="EA")])
+
+# Immediate forms
+add_group("mov",
+    suffix="b",
+    opcode=[0xB0],
+    operands=[Operand(type="Reg", size=8, dest="Op0Add"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+for sfx, sz in zip("wl", [16, 32]):
+    add_group("mov",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0xB8],
+        operands=[Operand(type="Reg", size=sz, dest="Op0Add"),
+                  Operand(type="Imm", size=sz, relaxed=True, dest="Imm")])
+# 64-bit forced size form
+add_group("mov",
+    parsers=["nasm"],
+    opersize=64,
+    opcode=[0xB8],
+    operands=[Operand(type="Reg", size=64, dest="Op0Add"),
+              Operand(type="Imm", size=64, dest="Imm")])
+add_group("mov",
+    suffix="q",
+    opersize=64,
+    opcode1=[0xB8],
+    opcode2=[0xC7],
+    operands=[Operand(type="Reg", size=64, dest="Op0Add"),
+              Operand(type="Imm", size=64, relaxed=True, dest="Imm",
+                      opt="SImm32Avail")])
+# Need two sets here, one for strictness on left side, one for right.
+for sfx, sz, immsz in zip("bwlq", [8, 16, 32, 64], [8, 16, 32, 32]):
+    add_group("mov",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0xC6+(sz!=8)],
+        operands=[Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Imm", size=immsz, dest="Imm")])
+for sfx, sz, immsz in zip("bwlq", [8, 16, 32, 64], [8, 16, 32, 32]):
+    add_group("mov",
+            suffix=sfx,
+            opersize=sz,
+            opcode=[0xC6+(sz!=8)],
+            operands=[Operand(type="RM", size=sz, dest="EA"),
+                Operand(type="Imm", size=immsz, relaxed=True, dest="Imm")])
+
+# CR forms
+add_group("mov",
+    suffix="l",
+    not64=True,
+    cpu=["Priv"],
+    opcode=[0x0F, 0x22],
+    operands=[Operand(type="CR4", size=32, dest="Spare"),
+              Operand(type="Reg", size=32, dest="EA")])
+add_group("mov",
+    suffix="l",
+    not64=True,
+    cpu=["Priv"],
+    opcode=[0x0F, 0x22],
+    operands=[Operand(type="CRReg", size=32, dest="Spare"),
+              Operand(type="Reg", size=32, dest="EA")])
+add_group("mov",
+    suffix="q",
+    cpu=["Priv"],
+    opcode=[0x0F, 0x22],
+    operands=[Operand(type="CRReg", size=32, dest="Spare"),
+              Operand(type="Reg", size=64, dest="EA")])
+add_group("mov",
+    suffix="l",
+    not64=True,
+    cpu=["Priv"],
+    opcode=[0x0F, 0x20],
+    operands=[Operand(type="Reg", size=32, dest="EA"),
+              Operand(type="CR4", size=32, dest="Spare")])
+add_group("mov",
+    suffix="l",
+    cpu=["Priv"],
+    not64=True,
+    opcode=[0x0F, 0x20],
+    operands=[Operand(type="Reg", size=32, dest="EA"),
+              Operand(type="CRReg", size=32, dest="Spare")])
+add_group("mov",
+    suffix="q",
+    cpu=["Priv"],
+    opcode=[0x0F, 0x20],
+    operands=[Operand(type="Reg", size=64, dest="EA"),
+              Operand(type="CRReg", size=32, dest="Spare")])
+
+# DR forms
+add_group("mov",
+    suffix="l",
+    not64=True,
+    cpu=["Priv"],
+    opcode=[0x0F, 0x23],
+    operands=[Operand(type="DRReg", size=32, dest="Spare"),
+              Operand(type="Reg", size=32, dest="EA")])
+add_group("mov",
+    suffix="q",
+    cpu=["Priv"],
+    opcode=[0x0F, 0x23],
+    operands=[Operand(type="DRReg", size=32, dest="Spare"),
+              Operand(type="Reg", size=64, dest="EA")])
+add_group("mov",
+    suffix="l",
+    not64=True,
+    cpu=["Priv"],
+    opcode=[0x0F, 0x21],
+    operands=[Operand(type="Reg", size=32, dest="EA"),
+              Operand(type="DRReg", size=32, dest="Spare")])
+add_group("mov",
+    suffix="q",
+    cpu=["Priv"],
+    opcode=[0x0F, 0x21],
+    operands=[Operand(type="Reg", size=64, dest="EA"),
+              Operand(type="DRReg", size=32, dest="Spare")])
+
+# MMX forms for GAS parser (copied from movq)
+add_group("mov",
+    suffix="q",
+    cpu=["MMX"],
+    parsers=["gas"],
+    opcode=[0x0F, 0x6F],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA")])
+add_group("mov",
+    suffix="q",
+    cpu=["MMX"],
+    parsers=["gas"],
+    opersize=64,
+    opcode=[0x0F, 0x6E],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="RM", size=64, relaxed=True, dest="EA")])
+add_group("mov",
+    suffix="q",
+    cpu=["MMX"],
+    parsers=["gas"],
+    opcode=[0x0F, 0x7F],
+    operands=[Operand(type="SIMDRM", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=64, dest="Spare")])
+add_group("mov",
+    suffix="q",
+    cpu=["MMX"],
+    parsers=["gas"],
+    opersize=64,
+    opcode=[0x0F, 0x7E],
+    operands=[Operand(type="RM", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=64, dest="Spare")])
+
+# SSE2 forms for GAS parser (copied from movq)
+add_group("mov",
+    suffix="q",
+    cpu=["SSE2"],
+    parsers=["gas"],
+    prefix=0xF3,
+    opcode=[0x0F, 0x7E],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("mov",
+    suffix="q",
+    cpu=["SSE2"],
+    parsers=["gas"],
+    prefix=0xF3,
+    opcode=[0x0F, 0x7E],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA")])
+add_group("mov",
+    suffix="q",
+    cpu=["SSE2"],
+    parsers=["gas"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0x6E],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="RM", size=64, relaxed=True, dest="EA")])
+add_group("mov",
+    suffix="q",
+    cpu=["SSE2"],
+    parsers=["gas"],
+    prefix=0x66,
+    opcode=[0x0F, 0xD6],
+    operands=[Operand(type="SIMDRM", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+add_group("mov",
+    suffix="q",
+    cpu=["SSE2"],
+    parsers=["gas"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0x7E],
+    operands=[Operand(type="RM", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("mov", "mov")
+
+#
+# 64-bit absolute move (for GAS).
+# These are disabled for GAS for normal mov above.
+#
+add_group("movabs",
+    suffix="b",
+    only64=True,
+    opcode=[0xA0],
+    operands=[Operand(type="Areg", size=8, dest=None),
+              Operand(type="MemOffs", size=8, relaxed=True, dest="EA64")])
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("movabs",
+        only64=True,
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0xA1],
+        operands=[Operand(type="Areg", size=sz, dest=None),
+                  Operand(type="MemOffs", size=sz, relaxed=True,
+                          dest="EA64")])
+
+add_group("movabs",
+    suffix="b",
+    only64=True,
+    opcode=[0xA2],
+    operands=[Operand(type="MemOffs", size=8, relaxed=True, dest="EA64"),
+              Operand(type="Areg", size=8, dest=None)])
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("movabs",
+        suffix=sfx,
+        only64=True,
+        opersize=sz,
+        opcode=[0xA3],
+        operands=[Operand(type="MemOffs", size=sz, relaxed=True,
+                          dest="EA64"),
+                  Operand(type="Areg", size=sz, dest=None)])
+
+# 64-bit immediate form
+add_group("movabs",
+    suffix="q",
+    opersize=64,
+    opcode=[0xB8],
+    operands=[Operand(type="Reg", size=64, dest="Op0Add"),
+              Operand(type="Imm", size=64, relaxed=True, dest="Imm")])
+
+add_insn("movabs", "movabs", parser="gas")
+
+#
+# Move with sign/zero extend
+#
+add_group("movszx",
+    suffix="b",
+    cpu=["386"],
+    modifiers=["Op1Add"],
+    opersize=16,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Reg", size=16, dest="Spare"),
+              Operand(type="RM", size=8, relaxed=True, dest="EA")])
+add_group("movszx",
+    suffix="b",
+    cpu=["386"],
+    modifiers=["Op1Add"],
+    opersize=32,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="RM", size=8, dest="EA")])
+add_group("movszx",
+    suffix="b",
+    modifiers=["Op1Add"],
+    opersize=64,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="RM", size=8, dest="EA")])
+add_group("movszx",
+    suffix="w",
+    cpu=["386"],
+    modifiers=["Op1Add"],
+    opersize=32,
+    opcode=[0x0F, 0x01],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="RM", size=16, dest="EA")])
+add_group("movszx",
+    suffix="w",
+    modifiers=["Op1Add"],
+    opersize=64,
+    opcode=[0x0F, 0x01],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="RM", size=16, dest="EA")])
+
+add_insn("movsbw", "movszx", suffix="b", modifiers=[0xBE])
+add_insn("movsbl", "movszx", suffix="b", modifiers=[0xBE])
+add_insn("movswl", "movszx", suffix="w", modifiers=[0xBE])
+add_insn("movsbq", "movszx", suffix="b", modifiers=[0xBE], only64=True)
+add_insn("movswq", "movszx", suffix="w", modifiers=[0xBE], only64=True)
+add_insn("movsx", "movszx", modifiers=[0xBE])
+add_insn("movzbw", "movszx", suffix="b", modifiers=[0xB6])
+add_insn("movzbl", "movszx", suffix="b", modifiers=[0xB6])
+add_insn("movzwl", "movszx", suffix="w", modifiers=[0xB6])
+add_insn("movzbq", "movszx", suffix="b", modifiers=[0xB6], only64=True)
+add_insn("movzwq", "movszx", suffix="w", modifiers=[0xB6], only64=True)
+add_insn("movzx", "movszx", modifiers=[0xB6])
+
+#
+# Move with sign-extend doubleword (64-bit mode only)
+#
+add_group("movsxd",
+    suffix="l",
+    opersize=64,
+    opcode=[0x63],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="RM", size=32, dest="EA")])
+
+add_insn("movslq", "movsxd", suffix="l")
+add_insn("movsxd", "movsxd", parser="nasm")
+
+#
+# Push instructions
+#
+add_group("push",
+    suffix="w",
+    opersize=16,
+    def_opersize_64=64,
+    opcode=[0x50],
+    operands=[Operand(type="Reg", size=16, dest="Op0Add")])
+add_group("push",
+    suffix="l",
+    not64=True,
+    opersize=32,
+    opcode=[0x50],
+    operands=[Operand(type="Reg", size=32, dest="Op0Add")])
+add_group("push",
+    suffix="q",
+    def_opersize_64=64,
+    opcode=[0x50],
+    operands=[Operand(type="Reg", size=64, dest="Op0Add")])
+add_group("push",
+    suffix="w",
+    opersize=16,
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=6,
+    operands=[Operand(type="RM", size=16, dest="EA")])
+add_group("push",
+    suffix="l",
+    not64=True,
+    opersize=32,
+    opcode=[0xFF],
+    spare=6,
+    operands=[Operand(type="RM", size=32, dest="EA")])
+add_group("push",
+    suffix="q",
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=6,
+    operands=[Operand(type="RM", size=64, dest="EA")])
+add_group("push",
+    cpu=["186"],
+    parsers=["nasm"],
+    def_opersize_64=64,
+    opcode=[0x6A],
+    operands=[Operand(type="Imm", size=8, dest="SImm")])
+add_group("push",
+    cpu=["186"],
+    parsers=["gas"],
+    def_opersize_64=64,
+    opcode=[0x6A],
+    operands=[Operand(type="Imm", size=8, relaxed=True, dest="SImm")])
+add_group("push",
+    suffix="w",
+    cpu=["186"],
+    parsers=["gas"],
+    opersize=16,
+    def_opersize_64=64,
+    opcode1=[0x6A],
+    opcode2=[0x68],
+    operands=[Operand(type="Imm", size=16, relaxed=True, dest="Imm",
+                      opt="SImm8")])
+add_group("push",
+    suffix="l",
+    not64=True,
+    parsers=["gas"],
+    opersize=32,
+    opcode1=[0x6A],
+    opcode2=[0x68],
+    operands=[Operand(type="Imm", size=32, relaxed=True, dest="Imm",
+                      opt="SImm8")])
+add_group("push",
+    suffix="q",
+    only64=True,
+    opersize=64,
+    def_opersize_64=64,
+    opcode1=[0x6A],
+    opcode2=[0x68],
+    operands=[Operand(type="Imm", size=32, relaxed=True, dest="SImm",
+                      opt="SImm8")])
+add_group("push",
+    not64=True,
+    cpu=["186"],
+    parsers=["nasm"],
+    opcode1=[0x6A],
+    opcode2=[0x68],
+    operands=[Operand(type="Imm", size="BITS", relaxed=True, dest="Imm",
+                      opt="SImm8")])
+# Need these when we don't match the BITS size, but they need to be
+# below the above line so the optimizer can kick in by default.
+add_group("push",
+    cpu=["186"],
+    parsers=["nasm"],
+    opersize=16,
+    def_opersize_64=64,
+    opcode=[0x68],
+    operands=[Operand(type="Imm", size=16, dest="Imm")])
+add_group("push",
+    not64=True,
+    parsers=["nasm"],
+    opersize=32,
+    opcode=[0x68],
+    operands=[Operand(type="Imm", size=32, dest="Imm")])
+add_group("push",
+    only64=True,
+    parsers=["nasm"],
+    opersize=64,
+    def_opersize_64=64,
+    opcode=[0x68],
+    operands=[Operand(type="Imm", size=32, dest="SImm")])
+add_group("push",
+    not64=True,
+    opcode=[0x0E],
+    operands=[Operand(type="CS", dest=None)])
+add_group("push",
+    suffix="w",
+    not64=True,
+    opersize=16,
+    opcode=[0x0E],
+    operands=[Operand(type="CS", size=16, dest=None)])
+add_group("push",
+    suffix="l",
+    not64=True,
+    opersize=32,
+    opcode=[0x0E],
+    operands=[Operand(type="CS", size=32, dest=None)])
+add_group("push",
+    not64=True,
+    opcode=[0x16],
+    operands=[Operand(type="SS", dest=None)])
+add_group("push",
+    suffix="w",
+    not64=True,
+    opersize=16,
+    opcode=[0x16],
+    operands=[Operand(type="SS", size=16, dest=None)])
+add_group("push",
+    suffix="l",
+    not64=True,
+    opersize=32,
+    opcode=[0x16],
+    operands=[Operand(type="SS", size=32, dest=None)])
+add_group("push",
+    not64=True,
+    opcode=[0x1E],
+    operands=[Operand(type="DS", dest=None)])
+add_group("push",
+    suffix="w",
+    not64=True,
+    opersize=16,
+    opcode=[0x1E],
+    operands=[Operand(type="DS", size=16, dest=None)])
+add_group("push",
+    suffix="l",
+    not64=True,
+    opersize=32,
+    opcode=[0x1E],
+    operands=[Operand(type="DS", size=32, dest=None)])
+add_group("push",
+    not64=True,
+    opcode=[0x06],
+    operands=[Operand(type="ES", dest=None)])
+add_group("push",
+    suffix="w",
+    not64=True,
+    opersize=16,
+    opcode=[0x06],
+    operands=[Operand(type="ES", size=16, dest=None)])
+add_group("push",
+    suffix="l",
+    not64=True,
+    opersize=32,
+    opcode=[0x06],
+    operands=[Operand(type="ES", size=32, dest=None)])
+add_group("push",
+    opcode=[0x0F, 0xA0],
+    operands=[Operand(type="FS", dest=None)])
+add_group("push",
+    suffix="w",
+    opersize=16,
+    opcode=[0x0F, 0xA0],
+    operands=[Operand(type="FS", size=16, dest=None)])
+add_group("push",
+    suffix="l",
+    opersize=32,
+    opcode=[0x0F, 0xA0],
+    operands=[Operand(type="FS", size=32, dest=None)])
+add_group("push",
+    opcode=[0x0F, 0xA8],
+    operands=[Operand(type="GS", dest=None)])
+add_group("push",
+    suffix="w",
+    opersize=16,
+    opcode=[0x0F, 0xA8],
+    operands=[Operand(type="GS", size=16, dest=None)])
+add_group("push",
+    suffix="l",
+    opersize=32,
+    opcode=[0x0F, 0xA8],
+    operands=[Operand(type="GS", size=32, dest=None)])
+
+add_insn("push", "push")
+add_insn("pusha", "onebyte", modifiers=[0x60, 0], cpu=["186"], not64=True)
+add_insn("pushad", "onebyte", parser="nasm", modifiers=[0x60, 32],
+         cpu=["386"], not64=True)
+add_insn("pushal", "onebyte", parser="gas", modifiers=[0x60, 32],
+         cpu=["386"], not64=True)
+add_insn("pushaw", "onebyte", modifiers=[0x60, 16], cpu=["186"], not64=True)
+
+#
+# Pop instructions
+#
+add_group("pop",
+    suffix="w",
+    opersize=16,
+    def_opersize_64=64,
+    opcode=[0x58],
+    operands=[Operand(type="Reg", size=16, dest="Op0Add")])
+add_group("pop",
+    suffix="l",
+    not64=True,
+    opersize=32,
+    opcode=[0x58],
+    operands=[Operand(type="Reg", size=32, dest="Op0Add")])
+add_group("pop",
+    suffix="q",
+    def_opersize_64=64,
+    opcode=[0x58],
+    operands=[Operand(type="Reg", size=64, dest="Op0Add")])
+add_group("pop",
+    suffix="w",
+    opersize=16,
+    def_opersize_64=64,
+    opcode=[0x8F],
+    operands=[Operand(type="RM", size=16, dest="EA")])
+add_group("pop",
+    suffix="l",
+    not64=True,
+    opersize=32,
+    opcode=[0x8F],
+    operands=[Operand(type="RM", size=32, dest="EA")])
+add_group("pop",
+    suffix="q",
+    def_opersize_64=64,
+    opcode=[0x8F],
+    operands=[Operand(type="RM", size=64, dest="EA")])
+# POP CS is debateably valid on the 8086, if obsolete and undocumented.
+# We don't include it because it's VERY unlikely it will ever be used
+# anywhere.  If someone really wants it they can db 0x0F it.
+#add_group("pop",
+#    cpu=["Undoc", "Obs"],
+#    opcode=[0x0F],
+#    operands=[Operand(type="CS", dest=None)])
+add_group("pop",
+    not64=True,
+    opcode=[0x17],
+    operands=[Operand(type="SS", dest=None)])
+add_group("pop",
+    not64=True,
+    opersize=16,
+    opcode=[0x17],
+    operands=[Operand(type="SS", size=16, dest=None)])
+add_group("pop",
+    not64=True,
+    opersize=32,
+    opcode=[0x17],
+    operands=[Operand(type="SS", size=32, dest=None)])
+add_group("pop",
+    not64=True,
+    opcode=[0x1F],
+    operands=[Operand(type="DS", dest=None)])
+add_group("pop",
+    not64=True,
+    opersize=16,
+    opcode=[0x1F],
+    operands=[Operand(type="DS", size=16, dest=None)])
+add_group("pop",
+    not64=True,
+    opersize=32,
+    opcode=[0x1F],
+    operands=[Operand(type="DS", size=32, dest=None)])
+add_group("pop",
+    not64=True,
+    opcode=[0x07],
+    operands=[Operand(type="ES", dest=None)])
+add_group("pop",
+    not64=True,
+    opersize=16,
+    opcode=[0x07],
+    operands=[Operand(type="ES", size=16, dest=None)])
+add_group("pop",
+    not64=True,
+    opersize=32,
+    opcode=[0x07],
+    operands=[Operand(type="ES", size=32, dest=None)])
+add_group("pop",
+    opcode=[0x0F, 0xA1],
+    operands=[Operand(type="FS", dest=None)])
+add_group("pop",
+    opersize=16,
+    opcode=[0x0F, 0xA1],
+    operands=[Operand(type="FS", size=16, dest=None)])
+add_group("pop",
+    opersize=32,
+    opcode=[0x0F, 0xA1],
+    operands=[Operand(type="FS", size=32, dest=None)])
+add_group("pop",
+    opcode=[0x0F, 0xA9],
+    operands=[Operand(type="GS", dest=None)])
+add_group("pop",
+    opersize=16,
+    opcode=[0x0F, 0xA9],
+    operands=[Operand(type="GS", size=16, dest=None)])
+add_group("pop",
+    opersize=32,
+    opcode=[0x0F, 0xA9],
+    operands=[Operand(type="GS", size=32, dest=None)])
+
+add_insn("pop", "pop")
+add_insn("popa", "onebyte", modifiers=[0x61, 0], cpu=["186"], not64=True)
+add_insn("popad", "onebyte", parser="nasm", modifiers=[0x61, 32],
+         cpu=["386"], not64=True)
+add_insn("popal", "onebyte", parser="gas", modifiers=[0x61, 32],
+         cpu=["386"], not64=True)
+add_insn("popaw", "onebyte", modifiers=[0x61, 16], cpu=["186"], not64=True)
+
+#
+# Exchange instructions
+#
+add_group("xchg",
+    suffix="b",
+    opcode=[0x86],
+    operands=[Operand(type="RM", size=8, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=8, dest="Spare")])
+add_group("xchg",
+    suffix="b",
+    opcode=[0x86],
+    operands=[Operand(type="Reg", size=8, dest="Spare"),
+              Operand(type="RM", size=8, relaxed=True, dest="EA")])
+# We could be extra-efficient in the 64-bit mode case here.
+# XCHG AX, AX in 64-bit mode is a NOP, as it doesn't clear the
+# high 48 bits of RAX. Thus we don't need the operand-size prefix.
+# But this feels too clever, and probably not what the user really
+# expects in the generated code, so we don't do it.
+#add_group("xchg",
+#    suffix="w",
+#    only64=True,
+#    opcode=[0x90],
+#    operands=[Operand(type="Areg", size=16, dest=None),
+#              Operand(type="AReg", size=16, dest="Op0Add")])
+add_group("xchg",
+    suffix="w",
+    opersize=16,
+    opcode=[0x90],
+    operands=[Operand(type="Areg", size=16, dest=None),
+              Operand(type="Reg", size=16, dest="Op0Add")])
+add_group("xchg",
+    suffix="w",
+    opersize=16,
+    opcode=[0x90],
+    operands=[Operand(type="Reg", size=16, dest="Op0Add"),
+              Operand(type="Areg", size=16, dest=None)])
+add_group("xchg",
+    suffix="w",
+    opersize=16,
+    opcode=[0x87],
+    operands=[Operand(type="RM", size=16, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=16, dest="Spare")])
+add_group("xchg",
+    suffix="w",
+    opersize=16,
+    opcode=[0x87],
+    operands=[Operand(type="Reg", size=16, dest="Spare"),
+              Operand(type="RM", size=16, relaxed=True, dest="EA")])
+# Be careful with XCHG EAX, EAX in 64-bit mode.  This needs to use
+# the long form rather than the NOP form, as the long form clears
+# the high 32 bits of RAX.  This makes all 32-bit forms in 64-bit
+# mode have consistent operation.
+#
+# FIXME: due to a hard-to-fix bug in how we handle generating gas suffix CPU
+# rules, this causes xchgl to be CPU_Any instead of CPU_386.  A hacky patch
+# could fix it, but it's doubtful anyone will ever notice, so leave it.
+add_group("xchg",
+    suffix="l",
+    only64=True,
+    opersize=32,
+    opcode=[0x87],
+    operands=[Operand(type="Areg", size=32, dest="EA"),
+              Operand(type="Areg", size=32, dest="Spare")])
+add_group("xchg",
+    suffix="l",
+    opersize=32,
+    opcode=[0x90],
+    operands=[Operand(type="Areg", size=32, dest=None),
+              Operand(type="Reg", size=32, dest="Op0Add")])
+add_group("xchg",
+    suffix="l",
+    opersize=32,
+    opcode=[0x90],
+    operands=[Operand(type="Reg", size=32, dest="Op0Add"),
+              Operand(type="Areg", size=32, dest=None)])
+add_group("xchg",
+    suffix="l",
+    opersize=32,
+    opcode=[0x87],
+    operands=[Operand(type="RM", size=32, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=32, dest="Spare")])
+add_group("xchg",
+    suffix="l",
+    opersize=32,
+    opcode=[0x87],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="RM", size=32, relaxed=True, dest="EA")])
+# Be efficient with XCHG RAX, RAX.
+# This is a NOP and thus doesn't need the REX prefix.
+add_group("xchg",
+    suffix="q",
+    only64=True,
+    opcode=[0x90],
+    operands=[Operand(type="Areg", size=64, dest=None),
+              Operand(type="Areg", size=64, dest="Op0Add")])
+add_group("xchg",
+    suffix="q",
+    opersize=64,
+    opcode=[0x90],
+    operands=[Operand(type="Areg", size=64, dest=None),
+              Operand(type="Reg", size=64, dest="Op0Add")])
+add_group("xchg",
+    suffix="q",
+    opersize=64,
+    opcode=[0x90],
+    operands=[Operand(type="Reg", size=64, dest="Op0Add"),
+              Operand(type="Areg", size=64, dest=None)])
+add_group("xchg",
+    suffix="q",
+    opersize=64,
+    opcode=[0x87],
+    operands=[Operand(type="RM", size=64, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=64, dest="Spare")])
+add_group("xchg",
+    suffix="q",
+    opersize=64,
+    opcode=[0x87],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="RM", size=64, relaxed=True, dest="EA")])
+
+add_insn("xchg", "xchg")
+
+#####################################################################
+# In/out from ports
+#####################################################################
+add_group("in",
+    suffix="b",
+    opcode=[0xE4],
+    operands=[Operand(type="Areg", size=8, dest=None),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+for sfx, sz in zip("wl", [16, 32]):
+    add_group("in",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0xE5],
+        operands=[Operand(type="Areg", size=sz, dest=None),
+                  Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("in",
+    suffix="b",
+    opcode=[0xEC],
+    operands=[Operand(type="Areg", size=8, dest=None),
+              Operand(type="Dreg", size=16, dest=None)])
+for sfx, sz in zip("wl", [16, 32]):
+    add_group("in",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0xED],
+        operands=[Operand(type="Areg", size=sz, dest=None),
+                  Operand(type="Dreg", size=16, dest=None)])
+# GAS-only variants (implicit accumulator register)
+add_group("in",
+    suffix="b",
+    parsers=["gas"],
+    opcode=[0xE4],
+    operands=[Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+for sfx, sz in zip("wl", [16, 32]):
+    add_group("in",
+        suffix=sfx,
+        parsers=["gas"],
+        opersize=sz,
+        opcode=[0xE5],
+        operands=[Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("in",
+    suffix="b",
+    parsers=["gas"],
+    opcode=[0xEC],
+    operands=[Operand(type="Dreg", size=16, dest=None)])
+add_group("in",
+    suffix="w",
+    parsers=["gas"],
+    opersize=16,
+    opcode=[0xED],
+    operands=[Operand(type="Dreg", size=16, dest=None)])
+add_group("in",
+    suffix="l",
+    cpu=["386"],
+    parsers=["gas"],
+    opersize=32,
+    opcode=[0xED],
+    operands=[Operand(type="Dreg", size=16, dest=None)])
+
+add_insn("in", "in")
+
+add_group("out",
+    suffix="b",
+    opcode=[0xE6],
+    operands=[Operand(type="Imm", size=8, relaxed=True, dest="Imm"),
+              Operand(type="Areg", size=8, dest=None)])
+for sfx, sz in zip("wl", [16, 32]):
+    add_group("out",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0xE7],
+        operands=[Operand(type="Imm", size=8, relaxed=True, dest="Imm"),
+                  Operand(type="Areg", size=sz, dest=None)])
+add_group("out",
+    suffix="b",
+    opcode=[0xEE],
+    operands=[Operand(type="Dreg", size=16, dest=None),
+              Operand(type="Areg", size=8, dest=None)])
+for sfx, sz in zip("wl", [16, 32]):
+    add_group("out",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0xEF],
+        operands=[Operand(type="Dreg", size=16, dest=None),
+                  Operand(type="Areg", size=sz, dest=None)])
+# GAS-only variants (implicit accumulator register)
+add_group("out",
+    suffix="b",
+    parsers=["gas"],
+    opcode=[0xE6],
+    operands=[Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("out",
+    suffix="w",
+    parsers=["gas"],
+    opersize=16,
+    opcode=[0xE7],
+    operands=[Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("out",
+    suffix="l",
+    cpu=["386"],
+    parsers=["gas"],
+    opersize=32,
+    opcode=[0xE7],
+    operands=[Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("out",
+    suffix="b",
+    parsers=["gas"],
+    opcode=[0xEE],
+    operands=[Operand(type="Dreg", size=16, dest=None)])
+add_group("out",
+    suffix="w",
+    parsers=["gas"],
+    opersize=16,
+    opcode=[0xEF],
+    operands=[Operand(type="Dreg", size=16, dest=None)])
+add_group("out",
+    suffix="l",
+    cpu=["386"],
+    parsers=["gas"],
+    opersize=32,
+    opcode=[0xEF],
+    operands=[Operand(type="Dreg", size=16, dest=None)])
+
+add_insn("out", "out")
+
+#
+# Load effective address
+#
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("lea",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0x8D],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="Mem", size=sz, relaxed=True, dest="EA")])
+
+add_insn("lea", "lea")
+
+#
+# Load segment registers from memory
+#
+for sfx, sz in zip("wl", [16, 32]):
+    add_group("ldes",
+        suffix=sfx,
+        not64=True,
+        modifiers=["Op0Add"],
+        opersize=sz,
+        opcode=[0x00],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="Mem", dest="EA")])
+
+add_insn("lds", "ldes", modifiers=[0xC5])
+add_insn("les", "ldes", modifiers=[0xC4])
+
+for sfx, sz in zip("wl", [16, 32]):
+    add_group("lfgss",
+        suffix=sfx,
+        cpu=["386"],
+        modifiers=["Op1Add"],
+        opersize=sz,
+        opcode=[0x0F, 0x00],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="Mem", dest="EA")])
+
+add_insn("lfs", "lfgss", modifiers=[0xB4])
+add_insn("lgs", "lfgss", modifiers=[0xB5])
+add_insn("lss", "lfgss", modifiers=[0xB2])
+
+#
+# Flags registers instructions
+#
+add_insn("clc", "onebyte", modifiers=[0xF8])
+add_insn("cld", "onebyte", modifiers=[0xFC])
+add_insn("cli", "onebyte", modifiers=[0xFA])
+add_insn("clts", "twobyte", modifiers=[0x0F, 0x06], cpu=["286", "Priv"])
+add_insn("cmc", "onebyte", modifiers=[0xF5])
+add_insn("lahf", "onebyte", modifiers=[0x9F])
+add_insn("sahf", "onebyte", modifiers=[0x9E])
+add_insn("pushf", "onebyte", modifiers=[0x9C, 0, 64])
+add_insn("pushfd", "onebyte", parser="nasm", modifiers=[0x9C, 32],
+         cpu=["386"], not64=True)
+add_insn("pushfl", "onebyte", parser="gas", modifiers=[0x9C, 32],
+         cpu=["386"], not64=True)
+add_insn("pushfw", "onebyte", modifiers=[0x9C, 16, 64])
+add_insn("pushfq", "onebyte", modifiers=[0x9C, 64, 64], only64=True)
+add_insn("popf", "onebyte", modifiers=[0x9D, 0, 64])
+add_insn("popfd", "onebyte", parser="nasm", modifiers=[0x9D, 32],
+         cpu=["386"], not64=True)
+add_insn("popfl", "onebyte", parser="gas", modifiers=[0x9D, 32],
+         cpu=["386"], not64=True)
+add_insn("popfw", "onebyte", modifiers=[0x9D, 16, 64])
+add_insn("popfq", "onebyte", modifiers=[0x9D, 64, 64], only64=True)
+add_insn("stc", "onebyte", modifiers=[0xF9])
+add_insn("std", "onebyte", modifiers=[0xFD])
+add_insn("sti", "onebyte", modifiers=[0xFB])
+
+#
+# Arithmetic - general
+#
+add_group("arith",
+    suffix="b",
+    modifiers=["Op0Add"],
+    opcode=[0x04],
+    operands=[Operand(type="Areg", size=8, dest=None),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+for sfx, sz, immsz in zip("wlq", [16, 32, 64], [16, 32, 32]):
+    add_group("arith",
+        suffix=sfx,
+        modifiers=["Op2Add", "Op1AddSp"],
+        opersize=sz,
+        opcode1=[0x83, 0xC0],
+        opcode2=[0x05],
+        operands=[Operand(type="Areg", size=sz, dest=None),
+                  Operand(type="Imm", size=immsz, relaxed=True, dest="Imm",
+                          opt="SImm8")])
+
+add_group("arith",
+    suffix="b",
+    modifiers=["Gap", "SpAdd"],
+    opcode=[0x80],
+    spare=0,
+    operands=[Operand(type="RM", size=8, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("arith",
+    suffix="b",
+    modifiers=["Gap", "SpAdd"],
+    opcode=[0x80],
+    spare=0,
+    operands=[Operand(type="RM", size=8, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, dest="Imm")])
+
+add_group("arith",
+    suffix="w",
+    modifiers=["Gap", "SpAdd"],
+    opersize=16,
+    opcode=[0x83],
+    spare=0,
+    operands=[Operand(type="RM", size=16, dest="EA"),
+              Operand(type="Imm", size=8, dest="SImm")])
+add_group("arith",
+    parsers=["nasm"],
+    modifiers=["Gap", "SpAdd"],
+    opersize=16,
+    opcode1=[0x83],
+    opcode2=[0x81],
+    spare=0,
+    operands=[Operand(type="RM", size=16, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=16, dest="Imm", opt="SImm8")])
+add_group("arith",
+    suffix="w",
+    modifiers=["Gap", "SpAdd"],
+    opersize=16,
+    opcode1=[0x83],
+    opcode2=[0x81],
+    spare=0,
+    operands=[
+        Operand(type="RM", size=16, dest="EA"),
+        Operand(type="Imm", size=16, relaxed=True, dest="Imm", opt="SImm8")])
+
+add_group("arith",
+    suffix="l",
+    modifiers=["Gap", "SpAdd"],
+    opersize=32,
+    opcode=[0x83],
+    spare=0,
+    operands=[Operand(type="RM", size=32, dest="EA"),
+              Operand(type="Imm", size=8, dest="SImm")])
+# Not64 because we can't tell if add [], dword in 64-bit mode is supposed
+# to be a qword destination or a dword destination.
+add_group("arith",
+    not64=True,
+    parsers=["nasm"],
+    modifiers=["Gap", "SpAdd"],
+    opersize=32,
+    opcode1=[0x83],
+    opcode2=[0x81],
+    spare=0,
+    operands=[Operand(type="RM", size=32, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=32, dest="Imm", opt="SImm8")])
+add_group("arith",
+    suffix="l",
+    modifiers=["Gap", "SpAdd"],
+    opersize=32,
+    opcode1=[0x83],
+    opcode2=[0x81],
+    spare=0,
+    operands=[
+        Operand(type="RM", size=32, dest="EA"),
+        Operand(type="Imm", size=32, relaxed=True, dest="Imm", opt="SImm8")])
+
+# No relaxed-RM mode for 64-bit destinations; see above Not64 comment.
+add_group("arith",
+    suffix="q",
+    modifiers=["Gap", "SpAdd"],
+    opersize=64,
+    opcode=[0x83],
+    spare=0,
+    operands=[Operand(type="RM", size=64, dest="EA"),
+              Operand(type="Imm", size=8, dest="SImm")])
+add_group("arith",
+    suffix="q",
+    modifiers=["Gap", "SpAdd"],
+    opersize=64,
+    opcode1=[0x83],
+    opcode2=[0x81],
+    spare=0,
+    operands=[
+        Operand(type="RM", size=64, dest="EA"),
+        Operand(type="Imm", size=32, relaxed=True, dest="Imm", opt="SImm8")])
+
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("arith",
+        suffix=sfx,
+        modifiers=["Op0Add"],
+        opersize=sz,
+        opcode=[0x00+(sz!=8)],
+        operands=[Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Reg", size=sz, dest="Spare")])
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("arith",
+        suffix=sfx,
+        modifiers=["Op0Add"],
+        opersize=sz,
+        opcode=[0x02+(sz!=8)],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=sz, relaxed=True, dest="EA")])
+
+add_insn("add", "arith", modifiers=[0x00, 0])
+add_insn("or",  "arith", modifiers=[0x08, 1])
+add_insn("adc", "arith", modifiers=[0x10, 2])
+add_insn("sbb", "arith", modifiers=[0x18, 3])
+add_insn("and", "arith", modifiers=[0x20, 4])
+add_insn("sub", "arith", modifiers=[0x28, 5])
+add_insn("xor", "arith", modifiers=[0x30, 6])
+add_insn("cmp", "arith", modifiers=[0x38, 7])
+
+#
+# Arithmetic - inc/dec
+#
+add_group("incdec",
+    suffix="b",
+    modifiers=["Gap", "SpAdd"],
+    opcode=[0xFE],
+    spare=0,
+    operands=[Operand(type="RM", size=8, dest="EA")])
+for sfx, sz in zip("wl", [16, 32]):
+    add_group("incdec",
+        suffix=sfx,
+        not64=True,
+        modifiers=["Op0Add"],
+        opersize=sz,
+        opcode=[0x00],
+        operands=[Operand(type="Reg", size=sz, dest="Op0Add")])
+    add_group("incdec",
+        suffix=sfx,
+        modifiers=["Gap", "SpAdd"],
+        opersize=sz,
+        opcode=[0xFF],
+        spare=0,
+        operands=[Operand(type="RM", size=sz, dest="EA")])
+add_group("incdec",
+    suffix="q",
+    modifiers=["Gap", "SpAdd"],
+    opersize=64,
+    opcode=[0xFF],
+    spare=0,
+    operands=[Operand(type="RM", size=64, dest="EA")])
+
+add_insn("inc", "incdec", modifiers=[0x40, 0])
+add_insn("dec", "incdec", modifiers=[0x48, 1])
+
+#
+# Arithmetic - mul/neg/not F6 opcodes
+#
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("f6",
+        suffix=sfx,
+        modifiers=["SpAdd"],
+        opersize=sz,
+        opcode=[0xF6+(sz!=8)],
+        spare=0,
+        operands=[Operand(type="RM", size=sz, dest="EA")])
+
+add_insn("not", "f6", modifiers=[2])
+add_insn("neg", "f6", modifiers=[3])
+add_insn("mul", "f6", modifiers=[4])
+
+#
+# Arithmetic - div/idiv F6 opcodes
+# These allow explicit accumulator in GAS mode.
+#
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("div",
+        suffix=sfx,
+        modifiers=["SpAdd"],
+        opersize=sz,
+        opcode=[0xF6+(sz!=8)],
+        spare=0,
+        operands=[Operand(type="RM", size=sz, dest="EA")])
+# Versions with explicit accumulator
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("div",
+        suffix=sfx,
+        modifiers=["SpAdd"],
+        opersize=sz,
+        opcode=[0xF6+(sz!=8)],
+        spare=0,
+        operands=[Operand(type="Areg", size=sz, dest=None),
+                  Operand(type="RM", size=sz, dest="EA")])
+
+add_insn("div", "div", modifiers=[6])
+add_insn("idiv", "div", modifiers=[7])
+
+#
+# Arithmetic - test instruction
+#
+for sfx, sz, immsz in zip("bwlq", [8, 16, 32, 64], [8, 16, 32, 32]):
+    add_group("test",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0xA8+(sz!=8)],
+        operands=[Operand(type="Areg", size=sz, dest=None),
+                  Operand(type="Imm", size=immsz, relaxed=True, dest="Imm")])
+
+for sfx, sz, immsz in zip("bwlq", [8, 16, 32, 64], [8, 16, 32, 32]):
+    add_group("test",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0xF6+(sz!=8)],
+        operands=[Operand(type="RM", size=sz, dest="EA"),
+                  Operand(type="Imm", size=immsz, relaxed=True, dest="Imm")])
+    add_group("test",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0xF6+(sz!=8)],
+        operands=[Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Imm", size=immsz, dest="Imm")])
+
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("test",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0x84+(sz!=8)],
+        operands=[Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Reg", size=sz, dest="Spare")])
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("test",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0x84+(sz!=8)],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=sz, relaxed=True, dest="EA")])
+
+add_insn("test", "test")
+
+#
+# Arithmetic - aad/aam
+#
+add_group("aadm",
+    modifiers=["Op0Add"],
+    opcode=[0xD4, 0x0A],
+    operands=[])
+add_group("aadm",
+    modifiers=["Op0Add"],
+    opcode=[0xD4],
+    operands=[Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("aaa", "onebyte", modifiers=[0x37], not64=True)
+add_insn("aas", "onebyte", modifiers=[0x3F], not64=True)
+add_insn("daa", "onebyte", modifiers=[0x27], not64=True)
+add_insn("das", "onebyte", modifiers=[0x2F], not64=True)
+add_insn("aad", "aadm", modifiers=[0x01], not64=True)
+add_insn("aam", "aadm", modifiers=[0x00], not64=True)
+
+#
+# Conversion instructions
+#
+add_insn("cbw", "onebyte", modifiers=[0x98, 16])
+add_insn("cwde", "onebyte", modifiers=[0x98, 32], cpu=["386"])
+add_insn("cdqe", "onebyte", modifiers=[0x98, 64], only64=True)
+add_insn("cwd", "onebyte", modifiers=[0x99, 16])
+add_insn("cdq", "onebyte", modifiers=[0x99, 32], cpu=["386"])
+add_insn("cqo", "onebyte", modifiers=[0x99, 64], only64=True)
+
+#
+# Conversion instructions - GAS / AT&T naming
+#
+add_insn("cbtw", "onebyte", parser="gas", modifiers=[0x98, 16])
+add_insn("cwtl", "onebyte", parser="gas", modifiers=[0x98, 32], cpu=["386"])
+add_insn("cltq", "onebyte", parser="gas", modifiers=[0x98, 64], only64=True)
+add_insn("cwtd", "onebyte", parser="gas", modifiers=[0x99, 16])
+add_insn("cltd", "onebyte", parser="gas", modifiers=[0x99, 32], cpu=["386"])
+add_insn("cqto", "onebyte", parser="gas", modifiers=[0x99, 64], only64=True)
+
+#
+# Arithmetic - imul
+#
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("imul",
+        suffix=sfx,
+        opersize=sz,
+        opcode=[0xF6+(sz!=8)],
+        spare=5,
+        operands=[Operand(type="RM", size=sz, dest="EA")])
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("imul",
+        suffix=sfx,
+        cpu=["386"],
+        opersize=sz,
+        opcode=[0x0F, 0xAF],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=sz, relaxed=True, dest="EA")])
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("imul",
+        suffix=sfx,
+        cpu=["186"],
+        opersize=sz,
+        opcode=[0x6B],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Imm", size=8, dest="SImm")])
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("imul",
+        suffix=sfx,
+        cpu=["186"],
+        opersize=sz,
+        opcode=[0x6B],
+        operands=[Operand(type="Reg", size=sz, dest="SpareEA"),
+                  Operand(type="Imm", size=8, dest="SImm")])
+for sfx, sz, immsz in zip("wlq", [16, 32, 64], [16, 32, 32]):
+    add_group("imul",
+        suffix=sfx,
+        cpu=["186"],
+        opersize=sz,
+        opcode1=[0x6B],
+        opcode2=[0x69],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Imm", size=immsz, relaxed=True, dest="SImm",
+                          opt="SImm8")])
+for sfx, sz, immsz in zip("wlq", [16, 32, 64], [16, 32, 32]):
+    add_group("imul",
+        suffix=sfx,
+        cpu=["186"],
+        opersize=sz,
+        opcode1=[0x6B],
+        opcode2=[0x69],
+        operands=[Operand(type="Reg", size=sz, dest="SpareEA"),
+                  Operand(type="Imm", size=immsz, relaxed=True, dest="SImm",
+                          opt="SImm8")])
+
+add_insn("imul", "imul")
+
+#
+# Shifts - standard
+#
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("shift",
+        suffix=sfx,
+        modifiers=["SpAdd"],
+        opersize=sz,
+        opcode=[0xD2+(sz!=8)],
+        spare=0,
+        operands=[Operand(type="RM", size=sz, dest="EA"),
+                  Operand(type="Creg", size=8, dest=None)])
+    add_group("shift",
+        suffix=sfx,
+        modifiers=["SpAdd"],
+        opersize=sz,
+        opcode=[0xD0+(sz!=8)],
+        spare=0,
+        operands=[Operand(type="RM", size=sz, dest="EA"),
+                  Operand(type="Imm1", size=8, relaxed=True, dest=None)])
+    add_group("shift",
+        suffix=sfx,
+        cpu=["186"],
+        modifiers=["SpAdd"],
+        opersize=sz,
+        opcode=[0xC0+(sz!=8)],
+        spare=0,
+        operands=[Operand(type="RM", size=sz, dest="EA"),
+                  Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+# In GAS mode, single operands are equivalent to shifting by 1 forms
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("shift",
+        suffix=sfx,
+        parsers=["gas"],
+        modifiers=["SpAdd"],
+        opersize=sz,
+        opcode=[0xD0+(sz!=8)],
+        spare=0,
+        operands=[Operand(type="RM", size=sz, dest="EA")])
+
+add_insn("rol", "shift", modifiers=[0])
+add_insn("ror", "shift", modifiers=[1])
+add_insn("rcl", "shift", modifiers=[2])
+add_insn("rcr", "shift", modifiers=[3])
+add_insn("sal", "shift", modifiers=[4])
+add_insn("shl", "shift", modifiers=[4])
+add_insn("shr", "shift", modifiers=[5])
+add_insn("sar", "shift", modifiers=[7])
+
+#
+# Shifts - doubleword
+#
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("shlrd",
+        suffix=sfx,
+        cpu=["386"],
+        modifiers=["Op1Add"],
+        opersize=sz,
+        opcode=[0x0F, 0x00],
+        operands=[Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+    add_group("shlrd",
+        suffix=sfx,
+        cpu=["386"],
+        modifiers=["Op1Add"],
+        opersize=sz,
+        opcode=[0x0F, 0x01],
+        operands=[Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="Creg", size=8, dest=None)])
+# GAS parser supports two-operand form for shift with CL count
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("shlrd",
+        suffix=sfx,
+        cpu=["386"],
+        parsers=["gas"],
+        modifiers=["Op1Add"],
+        opersize=sz,
+        opcode=[0x0F, 0x01],
+        operands=[Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Reg", size=sz, dest="Spare")])
+
+add_insn("shld", "shlrd", modifiers=[0xA4])
+add_insn("shrd", "shlrd", modifiers=[0xAC])
+
+#####################################################################
+# Control transfer instructions (unconditional)
+#####################################################################
+#
+# call
+#
+add_group("call",
+    opcode=[],
+    operands=[Operand(type="ImmNotSegOff", dest="JmpRel")])
+add_group("call",
+    opersize=16,
+    opcode=[],
+    operands=[Operand(type="ImmNotSegOff", size=16, dest="JmpRel")])
+add_group("call",
+    not64=True,
+    opersize=32,
+    opcode=[],
+    operands=[Operand(type="ImmNotSegOff", size=32, dest="JmpRel")])
+add_group("call",
+    only64=True,
+    opersize=64,
+    opcode=[],
+    operands=[Operand(type="ImmNotSegOff", size=32, dest="JmpRel")])
+
+add_group("call",
+    opersize=16,
+    def_opersize_64=64,
+    opcode=[0xE8],
+    operands=[Operand(type="Imm", size=16, tmod="Near", dest="JmpRel")])
+add_group("call",
+    not64=True,
+    opersize=32,
+    opcode=[0xE8],
+    operands=[Operand(type="Imm", size=32, tmod="Near", dest="JmpRel")])
+add_group("call",
+    only64=True,
+    opersize=64,
+    def_opersize_64=64,
+    opcode=[0xE8],
+    operands=[Operand(type="Imm", size=32, tmod="Near", dest="JmpRel")])
+add_group("call",
+    def_opersize_64=64,
+    opcode=[0xE8],
+    operands=[Operand(type="Imm", tmod="Near", dest="JmpRel")])
+
+add_group("call",
+    opersize=16,
+    opcode=[0xFF],
+    spare=2,
+    operands=[Operand(type="RM", size=16, dest="EA")])
+add_group("call",
+    not64=True,
+    opersize=32,
+    opcode=[0xFF],
+    spare=2,
+    operands=[Operand(type="RM", size=32, dest="EA")])
+add_group("call",
+    opersize=64,
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=2,
+    operands=[Operand(type="RM", size=64, dest="EA")])
+add_group("call",
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=2,
+    operands=[Operand(type="Mem", dest="EA")])
+add_group("call",
+    opersize=16,
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=2,
+    operands=[Operand(type="RM", size=16, tmod="Near", dest="EA")])
+add_group("call",
+    not64=True,
+    opersize=32,
+    opcode=[0xFF],
+    spare=2,
+    operands=[Operand(type="RM", size=32, tmod="Near", dest="EA")])
+add_group("call",
+    opersize=64,
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=2,
+    operands=[Operand(type="RM", size=64, tmod="Near", dest="EA")])
+add_group("call",
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=2,
+    operands=[Operand(type="Mem", tmod="Near", dest="EA")])
+
+# Far indirect (through memory).  Needs explicit FAR override.
+for sz in [16, 32, 64]:
+    add_group("call",
+        opersize=sz,
+        opcode=[0xFF],
+        spare=3,
+        operands=[Operand(type="Mem", size=sz, tmod="Far", dest="EA")])
+add_group("call",
+    opcode=[0xFF],
+    spare=3,
+    operands=[Operand(type="Mem", tmod="Far", dest="EA")])
+
+# With explicit FAR override
+for sz in [16, 32]:
+    add_group("call",
+        not64=True,
+        opersize=sz,
+        opcode=[0x9A],
+        spare=3,
+        operands=[Operand(type="Imm", size=sz, tmod="Far", dest="JmpFar")])
+add_group("call",
+    not64=True,
+    opcode=[0x9A],
+    spare=3,
+    operands=[Operand(type="Imm", tmod="Far", dest="JmpFar")])
+
+# Since not caught by first ImmNotSegOff group, implicitly FAR.
+for sz in [16, 32]:
+    add_group("call",
+        not64=True,
+        opersize=sz,
+        opcode=[0x9A],
+        spare=3,
+        operands=[Operand(type="Imm", size=sz, dest="JmpFar")])
+add_group("call",
+    not64=True,
+    opcode=[0x9A],
+    spare=3,
+    operands=[Operand(type="Imm", dest="JmpFar")])
+
+add_insn("call", "call")
+add_insn("calll", "call", parser="gas", not64=True)
+add_insn("callq", "call", parser="gas", only64=True)
+
+#
+# jmp
+#
+add_group("jmp",
+    opcode=[],
+    operands=[Operand(type="ImmNotSegOff", dest="JmpRel")])
+add_group("jmp",
+    opersize=16,
+    opcode=[],
+    operands=[Operand(type="ImmNotSegOff", size=16, dest="JmpRel")])
+add_group("jmp",
+    not64=True,
+    opersize=32,
+    opcode=[0x00],
+    operands=[Operand(type="ImmNotSegOff", size=32, dest="JmpRel")])
+add_group("jmp",
+    only64=True,
+    opersize=64,
+    opcode=[0x00],
+    operands=[Operand(type="ImmNotSegOff", size=32, dest="JmpRel")])
+
+add_group("jmp",
+    def_opersize_64=64,
+    opcode=[0xEB],
+    operands=[Operand(type="Imm", tmod="Short", dest="JmpRel")])
+add_group("jmp",
+    opersize=16,
+    def_opersize_64=64,
+    opcode=[0xE9],
+    operands=[Operand(type="Imm", size=16, tmod="Near", dest="JmpRel")])
+add_group("jmp",
+    not64=True,
+    cpu=["386"],
+    opersize=32,
+    opcode=[0xE9],
+    operands=[Operand(type="Imm", size=32, tmod="Near", dest="JmpRel")])
+add_group("jmp",
+    only64=True,
+    opersize=64,
+    def_opersize_64=64,
+    opcode=[0xE9],
+    operands=[Operand(type="Imm", size=32, tmod="Near", dest="JmpRel")])
+add_group("jmp",
+    def_opersize_64=64,
+    opcode=[0xE9],
+    operands=[Operand(type="Imm", tmod="Near", dest="JmpRel")])
+
+add_group("jmp",
+    opersize=16,
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=4,
+    operands=[Operand(type="RM", size=16, dest="EA")])
+add_group("jmp",
+    not64=True,
+    opersize=32,
+    opcode=[0xFF],
+    spare=4,
+    operands=[Operand(type="RM", size=32, dest="EA")])
+add_group("jmp",
+    opersize=64,
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=4,
+    operands=[Operand(type="RM", size=64, dest="EA")])
+add_group("jmp",
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=4,
+    operands=[Operand(type="Mem", dest="EA")])
+add_group("jmp",
+    opersize=16,
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=4,
+    operands=[Operand(type="RM", size=16, tmod="Near", dest="EA")])
+add_group("jmp",
+    not64=True,
+    cpu=["386"],
+    opersize=32,
+    opcode=[0xFF],
+    spare=4,
+    operands=[Operand(type="RM", size=32, tmod="Near", dest="EA")])
+add_group("jmp",
+    opersize=64,
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=4,
+    operands=[Operand(type="RM", size=64, tmod="Near", dest="EA")])
+add_group("jmp",
+    def_opersize_64=64,
+    opcode=[0xFF],
+    spare=4,
+    operands=[Operand(type="Mem", tmod="Near", dest="EA")])
+
+# Far indirect (through memory).  Needs explicit FAR override.
+for sz in [16, 32, 64]:
+    add_group("jmp",
+        opersize=sz,
+        opcode=[0xFF],
+        spare=5,
+        operands=[Operand(type="Mem", size=sz, tmod="Far", dest="EA")])
+add_group("jmp",
+    opcode=[0xFF],
+    spare=5,
+    operands=[Operand(type="Mem", tmod="Far", dest="EA")])
+
+# With explicit FAR override
+for sz in [16, 32]:
+    add_group("jmp",
+        not64=True,
+        opersize=sz,
+        opcode=[0xEA],
+        spare=3,
+        operands=[Operand(type="Imm", size=sz, tmod="Far", dest="JmpFar")])
+add_group("jmp",
+    not64=True,
+    opcode=[0xEA],
+    spare=3,
+    operands=[Operand(type="Imm", tmod="Far", dest="JmpFar")])
+
+# Since not caught by first ImmNotSegOff group, implicitly FAR.
+for sz in [16, 32]:
+    add_group("jmp",
+        not64=True,
+        opersize=sz,
+        opcode=[0xEA],
+        spare=3,
+        operands=[Operand(type="Imm", size=sz, dest="JmpFar")])
+add_group("jmp",
+    not64=True,
+    opcode=[0xEA],
+    spare=3,
+    operands=[Operand(type="Imm", dest="JmpFar")])
+
+add_insn("jmp", "jmp")
+
+#
+# ret
+#
+add_group("retnf",
+    not64=True,
+    modifiers=["Op0Add"],
+    opcode=[0x01],
+    operands=[])
+add_group("retnf",
+    not64=True,
+    modifiers=["Op0Add"],
+    opcode=[0x00],
+    operands=[Operand(type="Imm", size=16, relaxed=True, dest="Imm")])
+add_group("retnf",
+    only64=True,
+    modifiers=["Op0Add", "OpSizeR"],
+    opcode=[0x01],
+    operands=[])
+add_group("retnf",
+    only64=True,
+    modifiers=["Op0Add", "OpSizeR"],
+    opcode=[0x00],
+    operands=[Operand(type="Imm", size=16, relaxed=True, dest="Imm")])
+add_group("retnf",
+    gen_suffix=False,
+    suffixes=["w", "l", "q"],
+    modifiers=["Op0Add", "OpSizeR"],
+    opcode=[0x01],
+    operands=[])
+# GAS suffix versions
+add_group("retnf",
+    gen_suffix=False,
+    suffixes=["w", "l", "q"],
+    modifiers=["Op0Add", "OpSizeR"],
+    opcode=[0x00],
+    operands=[Operand(type="Imm", size=16, relaxed=True, dest="Imm")])
+
+add_insn("ret", "retnf", modifiers=[0xC2])
+add_insn("retw", "retnf", parser="gas", modifiers=[0xC2, 16])
+add_insn("retl", "retnf", parser="gas", modifiers=[0xC2], not64=True)
+add_insn("retq", "retnf", parser="gas", modifiers=[0xC2], only64=True)
+add_insn("retn", "retnf", parser="nasm", modifiers=[0xC2])
+add_insn("retf", "retnf", parser="nasm", modifiers=[0xCA, 64])
+add_insn("lretw", "retnf", parser="gas", modifiers=[0xCA, 16], suffix="NONE")
+add_insn("lretl", "retnf", parser="gas", modifiers=[0xCA], suffix="NONE")
+add_insn("lretq", "retnf", parser="gas", modifiers=[0xCA, 64], only64=True,
+         suffix="NONE")
+
+#
+# enter
+#
+add_group("enter",
+    suffix="l",
+    not64=True,
+    cpu=["186"],
+    gas_no_reverse=True,
+    opcode=[0xC8],
+    operands=[
+        Operand(type="Imm", size=16, relaxed=True, dest="EA", opt="A16"),
+        Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("enter",
+    suffix="q",
+    only64=True,
+    cpu=["186"],
+    gas_no_reverse=True,
+    opersize=64,
+    def_opersize_64=64,
+    opcode=[0xC8],
+    operands=[
+        Operand(type="Imm", size=16, relaxed=True, dest="EA", opt="A16"),
+        Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+# GAS suffix version
+add_group("enter",
+    suffix="w",
+    cpu=["186"],
+    parsers=["gas"],
+    gas_no_reverse=True,
+    opersize=16,
+    opcode=[0xC8],
+    operands=[
+        Operand(type="Imm", size=16, relaxed=True, dest="EA", opt="A16"),
+        Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("enter", "enter")
+
+#
+# leave
+#
+add_insn("leave", "onebyte", modifiers=[0xC9, 0, 64], cpu=["186"])
+add_insn("leavew", "onebyte", parser="gas", modifiers=[0xC9, 16, 0],
+         cpu=["186"])
+add_insn("leavel", "onebyte", parser="gas", modifiers=[0xC9, 0, 64],
+         cpu=["186"])
+add_insn("leaveq", "onebyte", parser="gas", modifiers=[0xC9, 0, 64],
+         only64=True)
+
+#####################################################################
+# Conditional jumps
+#####################################################################
+add_group("jcc",
+    opcode=[],
+    operands=[Operand(type="Imm", dest="JmpRel")])
+add_group("jcc",
+    opersize=16,
+    opcode=[],
+    operands=[Operand(type="Imm", size=16, dest="JmpRel")])
+add_group("jcc",
+    not64=True,
+    opersize=32,
+    opcode=[],
+    operands=[Operand(type="Imm", size=32, dest="JmpRel")])
+add_group("jcc",
+    only64=True,
+    opersize=64,
+    opcode=[],
+    operands=[Operand(type="Imm", size=32, dest="JmpRel")])
+
+add_group("jcc",
+    modifiers=["Op0Add"],
+    def_opersize_64=64,
+    opcode=[0x70],
+    operands=[Operand(type="Imm", tmod="Short", dest="JmpRel")])
+add_group("jcc",
+    cpu=["186"],
+    modifiers=["Op1Add"],
+    opersize=16,
+    def_opersize_64=64,
+    opcode=[0x0F, 0x80],
+    operands=[Operand(type="Imm", size=16, tmod="Near", dest="JmpRel")])
+add_group("jcc",
+    not64=True,
+    cpu=["386"],
+    modifiers=["Op1Add"],
+    opersize=32,
+    opcode=[0x0F, 0x80],
+    operands=[Operand(type="Imm", size=32, tmod="Near", dest="JmpRel")])
+add_group("jcc",
+    only64=True,
+    modifiers=["Op1Add"],
+    opersize=64,
+    def_opersize_64=64,
+    opcode=[0x0F, 0x80],
+    operands=[Operand(type="Imm", size=32, tmod="Near", dest="JmpRel")])
+add_group("jcc",
+    cpu=["186"],
+    modifiers=["Op1Add"],
+    def_opersize_64=64,
+    opcode=[0x0F, 0x80],
+    operands=[Operand(type="Imm", tmod="Near", dest="JmpRel")])
+
+add_insn("jo", "jcc", modifiers=[0x00])
+add_insn("jno", "jcc", modifiers=[0x01])
+add_insn("jb", "jcc", modifiers=[0x02])
+add_insn("jc", "jcc", modifiers=[0x02])
+add_insn("jnae", "jcc", modifiers=[0x02])
+add_insn("jnb", "jcc", modifiers=[0x03])
+add_insn("jnc", "jcc", modifiers=[0x03])
+add_insn("jae", "jcc", modifiers=[0x03])
+add_insn("je", "jcc", modifiers=[0x04])
+add_insn("jz", "jcc", modifiers=[0x04])
+add_insn("jne", "jcc", modifiers=[0x05])
+add_insn("jnz", "jcc", modifiers=[0x05])
+add_insn("jbe", "jcc", modifiers=[0x06])
+add_insn("jna", "jcc", modifiers=[0x06])
+add_insn("jnbe", "jcc", modifiers=[0x07])
+add_insn("ja", "jcc", modifiers=[0x07])
+add_insn("js", "jcc", modifiers=[0x08])
+add_insn("jns", "jcc", modifiers=[0x09])
+add_insn("jp", "jcc", modifiers=[0x0A])
+add_insn("jpe", "jcc", modifiers=[0x0A])
+add_insn("jnp", "jcc", modifiers=[0x0B])
+add_insn("jpo", "jcc", modifiers=[0x0B])
+add_insn("jl", "jcc", modifiers=[0x0C])
+add_insn("jnge", "jcc", modifiers=[0x0C])
+add_insn("jnl", "jcc", modifiers=[0x0D])
+add_insn("jge", "jcc", modifiers=[0x0D])
+add_insn("jle", "jcc", modifiers=[0x0E])
+add_insn("jng", "jcc", modifiers=[0x0E])
+add_insn("jnle", "jcc", modifiers=[0x0F])
+add_insn("jg", "jcc", modifiers=[0x0F])
+
+#
+# jcxz
+#
+add_group("jcxz",
+    modifiers=["AdSizeR"],
+    opcode=[],
+    operands=[Operand(type="Imm", dest="JmpRel")])
+add_group("jcxz",
+    modifiers=["AdSizeR"],
+    def_opersize_64=64,
+    opcode=[0xE3],
+    operands=[Operand(type="Imm", tmod="Short", dest="JmpRel")])
+
+add_insn("jcxz", "jcxz", modifiers=[16])
+add_insn("jecxz", "jcxz", modifiers=[32], cpu=["386"])
+add_insn("jrcxz", "jcxz", modifiers=[64], only64=True)
+
+#####################################################################
+# Loop instructions
+#####################################################################
+add_group("loop",
+    opcode=[],
+    operands=[Operand(type="Imm", dest="JmpRel")])
+add_group("loop",
+    not64=True,
+    opcode=[],
+    operands=[Operand(type="Imm", dest="JmpRel"),
+              Operand(type="Creg", size=16, dest="AdSizeR")])
+add_group("loop",
+    def_opersize_64=64,
+    opcode=[],
+    operands=[Operand(type="Imm", dest="JmpRel"),
+              Operand(type="Creg", size=32, dest="AdSizeR")])
+add_group("loop",
+    def_opersize_64=64,
+    opcode=[],
+    operands=[Operand(type="Imm", dest="JmpRel"),
+              Operand(type="Creg", size=64, dest="AdSizeR")])
+
+add_group("loop",
+    not64=True,
+    modifiers=["Op0Add"],
+    opcode=[0xE0],
+    operands=[Operand(type="Imm", tmod="Short", dest="JmpRel")])
+for sz in [16, 32, 64]:
+    add_group("loop",
+        modifiers=["Op0Add"],
+        def_opersize_64=64,
+        opcode=[0xE0],
+        operands=[Operand(type="Imm", tmod="Short", dest="JmpRel"),
+                  Operand(type="Creg", size=sz, dest="AdSizeR")])
+
+add_insn("loop", "loop", modifiers=[2])
+add_insn("loopz", "loop", modifiers=[1])
+add_insn("loope", "loop", modifiers=[1])
+add_insn("loopnz", "loop", modifiers=[0])
+add_insn("loopne", "loop", modifiers=[0])
+
+#####################################################################
+# Set byte on flag instructions
+#####################################################################
+add_group("setcc",
+    suffix="b",
+    cpu=["386"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x90],
+    spare=2,
+    operands=[Operand(type="RM", size=8, relaxed=True, dest="EA")])
+
+add_insn("seto", "setcc", modifiers=[0x00])
+add_insn("setno", "setcc", modifiers=[0x01])
+add_insn("setb", "setcc", modifiers=[0x02])
+add_insn("setc", "setcc", modifiers=[0x02])
+add_insn("setnae", "setcc", modifiers=[0x02])
+add_insn("setnb", "setcc", modifiers=[0x03])
+add_insn("setnc", "setcc", modifiers=[0x03])
+add_insn("setae", "setcc", modifiers=[0x03])
+add_insn("sete", "setcc", modifiers=[0x04])
+add_insn("setz", "setcc", modifiers=[0x04])
+add_insn("setne", "setcc", modifiers=[0x05])
+add_insn("setnz", "setcc", modifiers=[0x05])
+add_insn("setbe", "setcc", modifiers=[0x06])
+add_insn("setna", "setcc", modifiers=[0x06])
+add_insn("setnbe", "setcc", modifiers=[0x07])
+add_insn("seta", "setcc", modifiers=[0x07])
+add_insn("sets", "setcc", modifiers=[0x08])
+add_insn("setns", "setcc", modifiers=[0x09])
+add_insn("setp", "setcc", modifiers=[0x0A])
+add_insn("setpe", "setcc", modifiers=[0x0A])
+add_insn("setnp", "setcc", modifiers=[0x0B])
+add_insn("setpo", "setcc", modifiers=[0x0B])
+add_insn("setl", "setcc", modifiers=[0x0C])
+add_insn("setnge", "setcc", modifiers=[0x0C])
+add_insn("setnl", "setcc", modifiers=[0x0D])
+add_insn("setge", "setcc", modifiers=[0x0D])
+add_insn("setle", "setcc", modifiers=[0x0E])
+add_insn("setng", "setcc", modifiers=[0x0E])
+add_insn("setnle", "setcc", modifiers=[0x0F])
+add_insn("setg", "setcc", modifiers=[0x0F])
+
+#####################################################################
+# String instructions
+#####################################################################
+add_insn("cmpsb", "onebyte", modifiers=[0xA6, 0])
+add_insn("cmpsw", "onebyte", modifiers=[0xA7, 16])
+
+# cmpsd has to be non-onebyte for SSE2 forms below
+add_group("cmpsd",
+    parsers=["nasm"],
+    opersize=32,
+    opcode=[0xA7],
+    operands=[])
+
+add_insn("cmpsd", "cmpsd", cpu=[])
+
+add_insn("cmpsl", "onebyte", parser="gas", modifiers=[0xA7, 32], cpu=["386"])
+add_insn("cmpsq", "onebyte", modifiers=[0xA7, 64], only64=True)
+add_insn("insb", "onebyte", modifiers=[0x6C, 0])
+add_insn("insw", "onebyte", modifiers=[0x6D, 16])
+add_insn("insd", "onebyte", parser="nasm", modifiers=[0x6D, 32], cpu=["386"])
+add_insn("insl", "onebyte", parser="gas", modifiers=[0x6D, 32], cpu=["386"])
+add_insn("outsb", "onebyte", modifiers=[0x6E, 0])
+add_insn("outsw", "onebyte", modifiers=[0x6F, 16])
+add_insn("outsd", "onebyte", parser="nasm", modifiers=[0x6F, 32],
+         cpu=["386"])
+add_insn("outsl", "onebyte", parser="gas", modifiers=[0x6F, 32], cpu=["386"])
+add_insn("lodsb", "onebyte", modifiers=[0xAC, 0])
+add_insn("lodsw", "onebyte", modifiers=[0xAD, 16])
+add_insn("lodsd", "onebyte", parser="nasm", modifiers=[0xAD, 32],
+         cpu=["386"])
+add_insn("lodsl", "onebyte", parser="gas", modifiers=[0xAD, 32], cpu=["386"])
+add_insn("lodsq", "onebyte", modifiers=[0xAD, 64], only64=True)
+add_insn("movsb", "onebyte", modifiers=[0xA4, 0])
+add_insn("movsw", "onebyte", modifiers=[0xA5, 16])
+
+# movsd has to be non-onebyte for SSE2 forms below
+add_group("movsd",
+    parsers=["nasm"],
+    opersize=32,
+    opcode=[0xA5],
+    operands=[])
+
+add_insn("movsd", "movsd", cpu=["386"])
+
+add_insn("movsl", "onebyte", parser="gas", modifiers=[0xA5, 32], cpu=["386"])
+add_insn("movsq", "onebyte", modifiers=[0xA5, 64], only64=True)
+# smov alias for movs in GAS mode
+add_insn("smovb", "onebyte", parser="gas", modifiers=[0xA4, 0])
+add_insn("smovw", "onebyte", parser="gas", modifiers=[0xA5, 16])
+add_insn("smovl", "onebyte", parser="gas", modifiers=[0xA5, 32], cpu=["386"])
+add_insn("smovq", "onebyte", parser="gas", modifiers=[0xA5, 64], only64=True)
+add_insn("scasb", "onebyte", modifiers=[0xAE, 0])
+add_insn("scasw", "onebyte", modifiers=[0xAF, 16])
+add_insn("scasd", "onebyte", parser="nasm", modifiers=[0xAF, 32],
+         cpu=["386"])
+add_insn("scasl", "onebyte", parser="gas", modifiers=[0xAF, 32], cpu=["386"])
+add_insn("scasq", "onebyte", modifiers=[0xAF, 64], only64=True)
+# ssca alias for scas in GAS mode
+add_insn("sscab", "onebyte", parser="gas", modifiers=[0xAE, 0])
+add_insn("sscaw", "onebyte", parser="gas", modifiers=[0xAF, 16])
+add_insn("sscal", "onebyte", parser="gas", modifiers=[0xAF, 32], cpu=["386"])
+add_insn("sscaq", "onebyte", parser="gas", modifiers=[0xAF, 64], only64=True)
+add_insn("stosb", "onebyte", modifiers=[0xAA, 0])
+add_insn("stosw", "onebyte", modifiers=[0xAB, 16])
+add_insn("stosd", "onebyte", parser="nasm", modifiers=[0xAB, 32],
+         cpu=["386"])
+add_insn("stosl", "onebyte", parser="gas", modifiers=[0xAB, 32], cpu=["386"])
+add_insn("stosq", "onebyte", modifiers=[0xAB, 64], only64=True)
+add_insn("xlatb", "onebyte", modifiers=[0xD7, 0])
+
+#####################################################################
+# Bit manipulation
+#####################################################################
+
+#
+# bit tests
+#
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("bittest",
+        suffix=sfx,
+        cpu=["386"],
+        modifiers=["Op1Add"],
+        opersize=sz,
+        opcode=[0x0F, 0x00],
+        operands=[Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Reg", size=sz, dest="Spare")])
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("bittest",
+        suffix=sfx,
+        cpu=["386"],
+        modifiers=["Gap", "SpAdd"],
+        opersize=sz,
+        opcode=[0x0F, 0xBA],
+        spare=0,
+        operands=[Operand(type="RM", size=sz, dest="EA"),
+                  Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("bt",  "bittest", modifiers=[0xA3, 4])
+add_insn("bts", "bittest", modifiers=[0xAB, 5])
+add_insn("btr", "bittest", modifiers=[0xB3, 6])
+add_insn("btc", "bittest", modifiers=[0xBB, 7])
+
+#
+# bit scans - also used for lar/lsl
+#
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("bsfr",
+        suffix=sfx,
+        cpu=["386"],
+        modifiers=["Op1Add"],
+        opersize=sz,
+        opcode=[0x0F, 0x00],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=sz, relaxed=True, dest="EA")])
+
+add_insn("bsf", "bsfr", modifiers=[0xBC])
+add_insn("bsr", "bsfr", modifiers=[0xBD])
+
+#####################################################################
+# Interrupts and operating system instructions
+#####################################################################
+add_group("int",
+    opcode=[0xCD],
+    operands=[Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("int", "int")
+add_insn("int3", "onebyte", modifiers=[0xCC])
+add_insn("int03", "onebyte", parser="nasm", modifiers=[0xCC])
+add_insn("into", "onebyte", modifiers=[0xCE], not64=True)
+add_insn("iret", "onebyte", modifiers=[0xCF])
+add_insn("iretw", "onebyte", modifiers=[0xCF, 16])
+add_insn("iretd", "onebyte", parser="nasm", modifiers=[0xCF, 32],
+         cpu=["386"])
+add_insn("iretl", "onebyte", parser="gas", modifiers=[0xCF, 32], cpu=["386"])
+add_insn("iretq", "onebyte", modifiers=[0xCF, 64], only64=True)
+add_insn("rsm", "twobyte", modifiers=[0x0F, 0xAA], cpu=["586", "SMM"])
+
+for sfx, sz in zip("wl", [16, 32]):
+    add_group("bound",
+        suffix=sfx,
+        cpu=["186"],
+        not64=True,
+        opersize=sz,
+        opcode=[0x62],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="Mem", size=sz, relaxed=True, dest="EA")])
+
+add_insn("bound", "bound")
+add_insn("hlt", "onebyte", modifiers=[0xF4], cpu=["Priv"])
+add_insn("nop", "onebyte", modifiers=[0x90])
+
+#
+# Protection control
+#
+add_insn("lar", "bsfr", modifiers=[0x02], cpu=["286", "Prot"])
+add_insn("lsl", "bsfr", modifiers=[0x03], cpu=["286", "Prot"])
+
+add_group("arpl",
+    suffix="w",
+    cpu=["Prot", "286"],
+    not64=True,
+    opcode=[0x63],
+    operands=[Operand(type="RM", size=16, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=16, dest="Spare")])
+
+add_insn("arpl", "arpl")
+
+for sfx in [None, "w", "l", "q"]:
+    add_insn("lgdt"+(sfx or ""), "twobytemem", suffix=sfx,
+             modifiers=[2, 0x0F, 0x01], cpu=["286", "Priv"])
+    add_insn("lidt"+(sfx or ""), "twobytemem", suffix=sfx,
+             modifiers=[3, 0x0F, 0x01], cpu=["286", "Priv"])
+    add_insn("sgdt"+(sfx or ""), "twobytemem", suffix=sfx,
+             modifiers=[0, 0x0F, 0x01], cpu=["286", "Priv"])
+    add_insn("sidt"+(sfx or ""), "twobytemem", suffix=sfx,
+             modifiers=[1, 0x0F, 0x01], cpu=["286", "Priv"])
+
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("str",
+        suffix=sfx,
+        cpu=["Prot", "286"],
+        opersize=sz,
+        opcode=[0x0F, 0x00],
+        spare=1,
+        operands=[Operand(type="Reg", size=sz, dest="EA")])
+add_group("str",
+    suffixes=["w", "l"],
+    cpu=["Prot", "286"],
+    opcode=[0x0F, 0x00],
+    spare=1,
+    operands=[Operand(type="RM", size=16, relaxed=True, dest="EA")])
+
+add_insn("str", "str")
+
+add_group("prot286",
+    suffix="w",
+    cpu=["286"],
+    modifiers=["SpAdd", "Op1Add"],
+    opcode=[0x0F, 0x00],
+    spare=0,
+    operands=[Operand(type="RM", size=16, relaxed=True, dest="EA")])
+
+add_insn("lldt", "prot286", modifiers=[2, 0], cpu=["286", "Prot", "Priv"])
+add_insn("ltr", "prot286", modifiers=[3, 0], cpu=["286", "Prot", "Priv"])
+add_insn("verr", "prot286", modifiers=[4, 0], cpu=["286", "Prot"])
+add_insn("verw", "prot286", modifiers=[5, 0], cpu=["286", "Prot"])
+add_insn("lmsw", "prot286", modifiers=[6, 1], cpu=["286", "Priv"])
+
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("sldtmsw",
+        suffix=sfx,
+        only64=(sz==64),
+        cpu=[(sz==32) and "386" or "286"],
+        modifiers=["SpAdd", "Op1Add"],
+        opcode=[0x0F, 0x00],
+        spare=0,
+        operands=[Operand(type="Mem", size=sz, relaxed=True, dest="EA")])
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("sldtmsw",
+        suffix=sfx,
+        cpu=["286"],
+        modifiers=["SpAdd", "Op1Add"],
+        opersize=sz,
+        opcode=[0x0F, 0x00],
+        spare=0,
+        operands=[Operand(type="Reg", size=sz, dest="EA")])
+
+add_insn("sldt", "sldtmsw", modifiers=[0, 0])
+add_insn("smsw", "sldtmsw", modifiers=[4, 1])
+
+#####################################################################
+# Floating point instructions
+#####################################################################
+add_insn("fcompp",  "twobyte", modifiers=[0xDE, 0xD9], cpu=["FPU"])
+add_insn("fucompp", "twobyte", modifiers=[0xDA, 0xE9], cpu=["286", "FPU"])
+add_insn("ftst",    "twobyte", modifiers=[0xD9, 0xE4], cpu=["FPU"])
+add_insn("fxam",    "twobyte", modifiers=[0xD9, 0xE5], cpu=["FPU"])
+add_insn("fld1",    "twobyte", modifiers=[0xD9, 0xE8], cpu=["FPU"])
+add_insn("fldl2t",  "twobyte", modifiers=[0xD9, 0xE9], cpu=["FPU"])
+add_insn("fldl2e",  "twobyte", modifiers=[0xD9, 0xEA], cpu=["FPU"])
+add_insn("fldpi",   "twobyte", modifiers=[0xD9, 0xEB], cpu=["FPU"])
+add_insn("fldlg2",  "twobyte", modifiers=[0xD9, 0xEC], cpu=["FPU"])
+add_insn("fldln2",  "twobyte", modifiers=[0xD9, 0xED], cpu=["FPU"])
+add_insn("fldz",    "twobyte", modifiers=[0xD9, 0xEE], cpu=["FPU"])
+add_insn("f2xm1",   "twobyte", modifiers=[0xD9, 0xF0], cpu=["FPU"])
+add_insn("fyl2x",   "twobyte", modifiers=[0xD9, 0xF1], cpu=["FPU"])
+add_insn("fptan",   "twobyte", modifiers=[0xD9, 0xF2], cpu=["FPU"])
+add_insn("fpatan",  "twobyte", modifiers=[0xD9, 0xF3], cpu=["FPU"])
+add_insn("fxtract", "twobyte", modifiers=[0xD9, 0xF4], cpu=["FPU"])
+add_insn("fprem1",  "twobyte", modifiers=[0xD9, 0xF5], cpu=["286", "FPU"])
+add_insn("fdecstp", "twobyte", modifiers=[0xD9, 0xF6], cpu=["FPU"])
+add_insn("fincstp", "twobyte", modifiers=[0xD9, 0xF7], cpu=["FPU"])
+add_insn("fprem",   "twobyte", modifiers=[0xD9, 0xF8], cpu=["FPU"])
+add_insn("fyl2xp1", "twobyte", modifiers=[0xD9, 0xF9], cpu=["FPU"])
+add_insn("fsqrt",   "twobyte", modifiers=[0xD9, 0xFA], cpu=["FPU"])
+add_insn("fsincos", "twobyte", modifiers=[0xD9, 0xFB], cpu=["286", "FPU"])
+add_insn("frndint", "twobyte", modifiers=[0xD9, 0xFC], cpu=["FPU"])
+add_insn("fscale",  "twobyte", modifiers=[0xD9, 0xFD], cpu=["FPU"])
+add_insn("fsin",    "twobyte", modifiers=[0xD9, 0xFE], cpu=["286", "FPU"])
+add_insn("fcos",    "twobyte", modifiers=[0xD9, 0xFF], cpu=["286", "FPU"])
+add_insn("fchs",    "twobyte", modifiers=[0xD9, 0xE0], cpu=["FPU"])
+add_insn("fabs",    "twobyte", modifiers=[0xD9, 0xE1], cpu=["FPU"])
+add_insn("fninit",  "twobyte", modifiers=[0xDB, 0xE3], cpu=["FPU"])
+add_insn("finit", "threebyte", modifiers=[0x9B, 0xDB, 0xE3], cpu=["FPU"])
+add_insn("fnclex",  "twobyte", modifiers=[0xDB, 0xE2], cpu=["FPU"])
+add_insn("fclex", "threebyte", modifiers=[0x9B, 0xDB, 0xE2], cpu=["FPU"])
+for sfx in [None, "l", "s"]:
+    add_insn("fnstenv"+(sfx or ""), "onebytemem", suffix=sfx,
+             modifiers=[6, 0xD9], cpu=["FPU"])
+    add_insn("fstenv"+(sfx or ""),  "twobytemem", suffix=sfx,
+             modifiers=[6, 0x9B, 0xD9], cpu=["FPU"])
+    add_insn("fldenv"+(sfx or ""),  "onebytemem", suffix=sfx,
+             modifiers=[4, 0xD9], cpu=["FPU"])
+    add_insn("fnsave"+(sfx or ""),  "onebytemem", suffix=sfx,
+             modifiers=[6, 0xDD], cpu=["FPU"])
+    add_insn("fsave"+(sfx or ""),   "twobytemem", suffix=sfx,
+             modifiers=[6, 0x9B, 0xDD], cpu=["FPU"])
+    add_insn("frstor"+(sfx or ""),  "onebytemem", suffix=sfx,
+             modifiers=[4, 0xDD], cpu=["FPU"])
+add_insn("fnop",    "twobyte", modifiers=[0xD9, 0xD0], cpu=["FPU"])
+add_insn("fwait",   "onebyte", modifiers=[0x9B], cpu=["FPU"])
+# Prefixes; should the others be here too? should wait be a prefix?
+add_insn("wait",    "onebyte", modifiers=[0x9B])
+
+#
+# load/store with pop (integer and normal)
+#
+add_group("fld",
+    suffix="s",
+    cpu=["FPU"],
+    opcode=[0xD9],
+    operands=[Operand(type="Mem", size=32, dest="EA")])
+add_group("fld",
+    suffix="l",
+    cpu=["FPU"],
+    opcode=[0xDD],
+    operands=[Operand(type="Mem", size=64, dest="EA")])
+add_group("fld",
+    cpu=["FPU"],
+    opcode=[0xDB],
+    spare=5,
+    operands=[Operand(type="Mem", size=80, dest="EA")])
+add_group("fld",
+    cpu=["FPU"],
+    opcode=[0xD9, 0xC0],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add")])
+
+add_insn("fld", "fld")
+
+add_group("fstp",
+    suffix="s",
+    cpu=["FPU"],
+    opcode=[0xD9],
+    spare=3,
+    operands=[Operand(type="Mem", size=32, dest="EA")])
+add_group("fstp",
+    suffix="l",
+    cpu=["FPU"],
+    opcode=[0xDD],
+    spare=3,
+    operands=[Operand(type="Mem", size=64, dest="EA")])
+add_group("fstp",
+    cpu=["FPU"],
+    opcode=[0xDB],
+    spare=7,
+    operands=[Operand(type="Mem", size=80, dest="EA")])
+add_group("fstp",
+    cpu=["FPU"],
+    opcode=[0xDD, 0xD8],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add")])
+
+add_insn("fstp", "fstp")
+
+#
+# Long memory version of floating point load/store for GAS
+#
+add_group("fldstpt",
+    cpu=["FPU"],
+    modifiers=["SpAdd"],
+    opcode=[0xDB],
+    spare=0,
+    operands=[Operand(type="Mem", size=80, dest="EA")])
+
+add_insn("fldt", "fldstpt", suffix="WEAK", modifiers=[5])
+add_insn("fstpt", "fldstpt", suffix="WEAK", modifiers=[7])
+
+add_group("fildstp",
+    suffix="s",
+    cpu=["FPU"],
+    modifiers=["SpAdd"],
+    opcode=[0xDF],
+    spare=0,
+    operands=[Operand(type="Mem", size=16, dest="EA")])
+add_group("fildstp",
+    suffix="l",
+    cpu=["FPU"],
+    modifiers=["SpAdd"],
+    opcode=[0xDB],
+    spare=0,
+    operands=[Operand(type="Mem", size=32, dest="EA")])
+add_group("fildstp",
+    suffix="q",
+    cpu=["FPU"],
+    modifiers=["Gap", "Op0Add", "SpAdd"],
+    opcode=[0xDD],
+    spare=0,
+    operands=[Operand(type="Mem", size=64, dest="EA")])
+
+add_insn("fild", "fildstp", modifiers=[0, 2, 5])
+add_insn("fistp", "fildstp", modifiers=[3, 2, 7])
+
+add_group("fbldstp",
+    cpu=["FPU"],
+    modifiers=["SpAdd"],
+    opcode=[0xDF],
+    spare=0,
+    operands=[Operand(type="Mem", size=80, relaxed=True, dest="EA")])
+
+add_insn("fbld", "fbldstp", modifiers=[4])
+add_insn("fildll", "fbldstp", parser="gas", modifiers=[5])
+add_insn("fbstp", "fbldstp", modifiers=[6])
+add_insn("fistpll", "fbldstp", parser="gas", modifiers=[7])
+
+#
+# store (normal)
+#
+add_group("fst",
+    suffix="s",
+    cpu=["FPU"],
+    opcode=[0xD9],
+    spare=2,
+    operands=[Operand(type="Mem", size=32, dest="EA")])
+add_group("fst",
+    suffix="l",
+    cpu=["FPU"],
+    opcode=[0xDD],
+    spare=2,
+    operands=[Operand(type="Mem", size=64, dest="EA")])
+add_group("fst",
+    cpu=["FPU"],
+    opcode=[0xDD, 0xD0],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add")])
+
+add_insn("fst", "fst")
+
+#
+# exchange (with ST0)
+#
+add_group("fxch",
+    cpu=["FPU"],
+    opcode=[0xD9, 0xC8],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add")])
+add_group("fxch",
+    cpu=["FPU"],
+    opcode=[0xD9, 0xC8],
+    operands=[Operand(type="ST0", size=80, dest=None),
+              Operand(type="Reg", size=80, dest="Op1Add")])
+add_group("fxch",
+    cpu=["FPU"],
+    opcode=[0xD9, 0xC8],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add"),
+              Operand(type="ST0", size=80, dest=None)])
+add_group("fxch",
+    cpu=["FPU"],
+    opcode=[0xD9, 0xC9],
+    operands=[])
+
+add_insn("fxch", "fxch")
+
+#
+# comparisons
+#
+add_group("fcom",
+    suffix="s",
+    cpu=["FPU"],
+    modifiers=["Gap", "SpAdd"],
+    opcode=[0xD8],
+    spare=0,
+    operands=[Operand(type="Mem", size=32, dest="EA")])
+add_group("fcom",
+    suffix="l",
+    cpu=["FPU"],
+    modifiers=["Gap", "SpAdd"],
+    opcode=[0xDC],
+    spare=0,
+    operands=[Operand(type="Mem", size=64, dest="EA")])
+add_group("fcom",
+    cpu=["FPU"],
+    modifiers=["Op1Add"],
+    opcode=[0xD8, 0x00],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add")])
+# Alias for fcom %st(1) for GAS compat
+add_group("fcom",
+    cpu=["FPU"],
+    parsers=["gas"],
+    modifiers=["Op1Add"],
+    opcode=[0xD8, 0x01],
+    operands=[])
+add_group("fcom",
+    cpu=["FPU"],
+    parsers=["nasm"],
+    modifiers=["Op1Add"],
+    opcode=[0xD8, 0x00],
+    operands=[Operand(type="ST0", size=80, dest=None),
+              Operand(type="Reg", size=80, dest="Op1Add")])
+
+add_insn("fcom", "fcom", modifiers=[0xD0, 2])
+add_insn("fcomp", "fcom", modifiers=[0xD8, 3])
+
+#
+# extended comparisons
+#
+add_group("fcom2",
+    cpu=["FPU", "286"],
+    modifiers=["Op0Add", "Op1Add"],
+    opcode=[0x00, 0x00],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add")])
+add_group("fcom2",
+    cpu=["FPU", "286"],
+    modifiers=["Op0Add", "Op1Add"],
+    opcode=[0x00, 0x00],
+    operands=[Operand(type="ST0", size=80, dest=None),
+              Operand(type="Reg", size=80, dest="Op1Add")])
+
+add_insn("fucom", "fcom2", modifiers=[0xDD, 0xE0])
+add_insn("fucomp", "fcom2", modifiers=[0xDD, 0xE8])
+
+#
+# arithmetic
+#
+add_group("farith",
+    suffix="s",
+    cpu=["FPU"],
+    modifiers=["Gap", "Gap", "SpAdd"],
+    opcode=[0xD8],
+    spare=0,
+    operands=[Operand(type="Mem", size=32, dest="EA")])
+add_group("farith",
+    suffix="l",
+    cpu=["FPU"],
+    modifiers=["Gap", "Gap", "SpAdd"],
+    opcode=[0xDC],
+    spare=0,
+    operands=[Operand(type="Mem", size=64, dest="EA")])
+add_group("farith",
+    cpu=["FPU"],
+    modifiers=["Gap", "Op1Add"],
+    opcode=[0xD8, 0x00],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add")])
+add_group("farith",
+    cpu=["FPU"],
+    modifiers=["Gap", "Op1Add"],
+    opcode=[0xD8, 0x00],
+    operands=[Operand(type="ST0", size=80, dest=None),
+              Operand(type="Reg", size=80, dest="Op1Add")])
+add_group("farith",
+    cpu=["FPU"],
+    modifiers=["Op1Add"],
+    opcode=[0xDC, 0x00],
+    operands=[Operand(type="Reg", size=80, tmod="To", dest="Op1Add")])
+add_group("farith",
+    cpu=["FPU"],
+    parsers=["nasm"],
+    modifiers=["Op1Add"],
+    opcode=[0xDC, 0x00],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add"),
+              Operand(type="ST0", size=80, dest=None)])
+add_group("farith",
+    cpu=["FPU"],
+    parsers=["gas"],
+    modifiers=["Gap", "Op1Add"],
+    opcode=[0xDC, 0x00],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add"),
+              Operand(type="ST0", size=80, dest=None)])
+
+add_insn("fadd", "farith", modifiers=[0xC0, 0xC0, 0])
+add_insn("fsub", "farith", modifiers=[0xE8, 0xE0, 4])
+add_insn("fsubr", "farith", modifiers=[0xE0, 0xE8, 5])
+add_insn("fmul", "farith", modifiers=[0xC8, 0xC8, 1])
+add_insn("fdiv", "farith", modifiers=[0xF8, 0xF0, 6])
+add_insn("fdivr", "farith", modifiers=[0xF0, 0xF8, 7])
+
+add_group("farithp",
+    cpu=["FPU"],
+    modifiers=["Op1Add"],
+    opcode=[0xDE, 0x01],
+    operands=[])
+add_group("farithp",
+    cpu=["FPU"],
+    modifiers=["Op1Add"],
+    opcode=[0xDE, 0x00],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add")])
+add_group("farithp",
+    cpu=["FPU"],
+    modifiers=["Op1Add"],
+    opcode=[0xDE, 0x00],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add"),
+              Operand(type="ST0", size=80, dest=None)])
+
+add_insn("faddp", "farithp", modifiers=[0xC0])
+add_insn("fsubp", "farithp", parser="nasm", modifiers=[0xE8])
+add_insn("fsubp", "farithp", parser="gas", modifiers=[0xE0])
+add_insn("fsubrp", "farithp", parser="nasm", modifiers=[0xE0])
+add_insn("fsubrp", "farithp", parser="gas", modifiers=[0xE8])
+add_insn("fmulp", "farithp", modifiers=[0xC8])
+add_insn("fdivp", "farithp", parser="nasm", modifiers=[0xF8])
+add_insn("fdivp", "farithp", parser="gas", modifiers=[0xF0])
+add_insn("fdivrp", "farithp", parser="nasm", modifiers=[0xF0])
+add_insn("fdivrp", "farithp", parser="gas", modifiers=[0xF8])
+
+#
+# integer arith/store wo pop/compare
+#
+add_group("fiarith",
+    suffix="s",
+    cpu=["FPU"],
+    modifiers=["SpAdd", "Op0Add"],
+    opcode=[0x04],
+    spare=0,
+    operands=[Operand(type="Mem", size=16, dest="EA")])
+add_group("fiarith",
+    suffix="l",
+    cpu=["FPU"],
+    modifiers=["SpAdd", "Op0Add"],
+    opcode=[0x00],
+    spare=0,
+    operands=[Operand(type="Mem", size=32, dest="EA")])
+
+add_insn("fist",   "fiarith", modifiers=[2, 0xDB])
+add_insn("ficom",  "fiarith", modifiers=[2, 0xDA])
+add_insn("ficomp", "fiarith", modifiers=[3, 0xDA])
+add_insn("fiadd",  "fiarith", modifiers=[0, 0xDA])
+add_insn("fisub",  "fiarith", modifiers=[4, 0xDA])
+add_insn("fisubr", "fiarith", modifiers=[5, 0xDA])
+add_insn("fimul",  "fiarith", modifiers=[1, 0xDA])
+add_insn("fidiv",  "fiarith", modifiers=[6, 0xDA])
+add_insn("fidivr", "fiarith", modifiers=[7, 0xDA])
+
+#
+# processor control
+#
+add_group("fldnstcw",
+    suffix="w",
+    cpu=["FPU"],
+    modifiers=["SpAdd"],
+    opcode=[0xD9],
+    spare=0,
+    operands=[Operand(type="Mem", size=16, relaxed=True, dest="EA")])
+
+add_insn("fldcw", "fldnstcw", modifiers=[5])
+add_insn("fnstcw", "fldnstcw", modifiers=[7])
+
+add_group("fstcw",
+    suffix="w",
+    cpu=["FPU"],
+    opcode=[0x9B, 0xD9],
+    spare=7,
+    operands=[Operand(type="Mem", size=16, relaxed=True, dest="EA")])
+
+add_insn("fstcw", "fstcw")
+
+add_group("fnstsw",
+    suffix="w",
+    cpu=["FPU"],
+    opcode=[0xDD],
+    spare=7,
+    operands=[Operand(type="Mem", size=16, relaxed=True, dest="EA")])
+add_group("fnstsw",
+    suffix="w",
+    cpu=["FPU"],
+    opcode=[0xDF, 0xE0],
+    operands=[Operand(type="Areg", size=16, dest=None)])
+
+add_insn("fnstsw", "fnstsw")
+
+add_group("fstsw",
+    suffix="w",
+    cpu=["FPU"],
+    opcode=[0x9B, 0xDD],
+    spare=7,
+    operands=[Operand(type="Mem", size=16, relaxed=True, dest="EA")])
+add_group("fstsw",
+    suffix="w",
+    cpu=["FPU"],
+    opcode=[0x9B, 0xDF, 0xE0],
+    operands=[Operand(type="Areg", size=16, dest=None)])
+
+add_insn("fstsw", "fstsw")
+
+add_group("ffree",
+    cpu=["FPU"],
+    modifiers=["Op0Add"],
+    opcode=[0x00, 0xC0],
+    operands=[Operand(type="Reg", size=80, dest="Op1Add")])
+
+add_insn("ffree", "ffree", modifiers=[0xDD])
+add_insn("ffreep", "ffree", modifiers=[0xDF], cpu=["686", "FPU", "Undoc"])
+
+#####################################################################
+# 486 extensions
+#####################################################################
+add_group("bswap",
+    suffix="l",
+    cpu=["486"],
+    opersize=32,
+    opcode=[0x0F, 0xC8],
+    operands=[Operand(type="Reg", size=32, dest="Op1Add")])
+add_group("bswap",
+    suffix="q",
+    opersize=64,
+    opcode=[0x0F, 0xC8],
+    operands=[Operand(type="Reg", size=64, dest="Op1Add")])
+
+add_insn("bswap", "bswap")
+
+for sfx, sz in zip("bwlq", [8, 16, 32, 64]):
+    add_group("cmpxchgxadd",
+        suffix=sfx,
+        cpu=["486"],
+        modifiers=["Op1Add"],
+        opersize=sz,
+        opcode=[0x0F, 0x00+(sz!=8)],
+        operands=[Operand(type="RM", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Reg", size=sz, dest="Spare")])
+
+add_insn("xadd", "cmpxchgxadd", modifiers=[0xC0])
+add_insn("cmpxchg", "cmpxchgxadd", modifiers=[0xB0])
+add_insn("cmpxchg486", "cmpxchgxadd", parser="nasm", modifiers=[0xA6],
+         cpu=["486", "Undoc"])
+
+add_insn("invd", "twobyte", modifiers=[0x0F, 0x08], cpu=["486", "Priv"])
+add_insn("wbinvd", "twobyte", modifiers=[0x0F, 0x09], cpu=["486", "Priv"])
+add_insn("invlpg", "twobytemem", modifiers=[7, 0x0F, 0x01],
+         cpu=["486", "Priv"])
+
+#####################################################################
+# 586+ and late 486 extensions
+#####################################################################
+add_insn("cpuid", "twobyte", modifiers=[0x0F, 0xA2], cpu=["486"])
+
+#####################################################################
+# Pentium extensions
+#####################################################################
+add_insn("wrmsr", "twobyte", modifiers=[0x0F, 0x30], cpu=["586", "Priv"])
+add_insn("rdtsc", "twobyte", modifiers=[0x0F, 0x31], cpu=["586"])
+add_insn("rdmsr", "twobyte", modifiers=[0x0F, 0x32], cpu=["586", "Priv"])
+
+add_group("cmpxchg8b",
+    suffix="q",
+    cpu=["586"],
+    opcode=[0x0F, 0xC7],
+    spare=1,
+    operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+
+add_insn("cmpxchg8b", "cmpxchg8b")
+
+#####################################################################
+# Pentium II/Pentium Pro extensions
+#####################################################################
+add_insn("sysenter", "twobyte", modifiers=[0x0F, 0x34], cpu=["686"],
+         not64=True)
+add_insn("sysexit",  "twobyte", modifiers=[0x0F, 0x35], cpu=["686", "Priv"],
+         not64=True)
+for sfx in [None, "q"]:
+    add_insn("fxsave"+(sfx or ""),  "twobytemem", suffix=sfx,
+             modifiers=[0, 0x0F, 0xAE], cpu=["686", "FPU"])
+    add_insn("fxrstor"+(sfx or ""), "twobytemem", suffix=sfx,
+             modifiers=[1, 0x0F, 0xAE], cpu=["686", "FPU"])
+add_insn("rdpmc", "twobyte", modifiers=[0x0F, 0x33], cpu=["686"])
+add_insn("ud2",   "twobyte", modifiers=[0x0F, 0x0B], cpu=["286"])
+add_insn("ud1",   "twobyte", modifiers=[0x0F, 0xB9], cpu=["286", "Undoc"])
+
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("cmovcc",
+        suffix=sfx,
+        cpu=["686"],
+        modifiers=["Op1Add"],
+        opersize=sz,
+        opcode=[0x0F, 0x40],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=sz, relaxed=True, dest="EA")])
+
+add_insn("cmovo", "cmovcc", modifiers=[0x00])
+add_insn("cmovno", "cmovcc", modifiers=[0x01])
+add_insn("cmovb", "cmovcc", modifiers=[0x02])
+add_insn("cmovc", "cmovcc", modifiers=[0x02])
+add_insn("cmovnae", "cmovcc", modifiers=[0x02])
+add_insn("cmovnb", "cmovcc", modifiers=[0x03])
+add_insn("cmovnc", "cmovcc", modifiers=[0x03])
+add_insn("cmovae", "cmovcc", modifiers=[0x03])
+add_insn("cmove", "cmovcc", modifiers=[0x04])
+add_insn("cmovz", "cmovcc", modifiers=[0x04])
+add_insn("cmovne", "cmovcc", modifiers=[0x05])
+add_insn("cmovnz", "cmovcc", modifiers=[0x05])
+add_insn("cmovbe", "cmovcc", modifiers=[0x06])
+add_insn("cmovna", "cmovcc", modifiers=[0x06])
+add_insn("cmovnbe", "cmovcc", modifiers=[0x07])
+add_insn("cmova", "cmovcc", modifiers=[0x07])
+add_insn("cmovs", "cmovcc", modifiers=[0x08])
+add_insn("cmovns", "cmovcc", modifiers=[0x09])
+add_insn("cmovp", "cmovcc", modifiers=[0x0A])
+add_insn("cmovpe", "cmovcc", modifiers=[0x0A])
+add_insn("cmovnp", "cmovcc", modifiers=[0x0B])
+add_insn("cmovpo", "cmovcc", modifiers=[0x0B])
+add_insn("cmovl", "cmovcc", modifiers=[0x0C])
+add_insn("cmovnge", "cmovcc", modifiers=[0x0C])
+add_insn("cmovnl", "cmovcc", modifiers=[0x0D])
+add_insn("cmovge", "cmovcc", modifiers=[0x0D])
+add_insn("cmovle", "cmovcc", modifiers=[0x0E])
+add_insn("cmovng", "cmovcc", modifiers=[0x0E])
+add_insn("cmovnle", "cmovcc", modifiers=[0x0F])
+add_insn("cmovg", "cmovcc", modifiers=[0x0F])
+
+add_group("fcmovcc",
+    cpu=["FPU", "686"],
+    modifiers=["Op0Add", "Op1Add"],
+    opcode=[0x00, 0x00],
+    operands=[Operand(type="ST0", size=80, dest=None),
+              Operand(type="Reg", size=80, dest="Op1Add")])
+
+add_insn("fcmovb",   "fcmovcc", modifiers=[0xDA, 0xC0])
+add_insn("fcmove",   "fcmovcc", modifiers=[0xDA, 0xC8])
+add_insn("fcmovbe",  "fcmovcc", modifiers=[0xDA, 0xD0])
+add_insn("fcmovu",   "fcmovcc", modifiers=[0xDA, 0xD8])
+add_insn("fcmovnb",  "fcmovcc", modifiers=[0xDB, 0xC0])
+add_insn("fcmovne",  "fcmovcc", modifiers=[0xDB, 0xC8])
+add_insn("fcmovnbe", "fcmovcc", modifiers=[0xDB, 0xD0])
+add_insn("fcmovnu",  "fcmovcc", modifiers=[0xDB, 0xD8])
+
+add_insn("fcomi", "fcom2", modifiers=[0xDB, 0xF0], cpu=["686", "FPU"])
+add_insn("fucomi", "fcom2", modifiers=[0xDB, 0xE8], cpu=["686", "FPU"])
+add_insn("fcomip", "fcom2", modifiers=[0xDF, 0xF0], cpu=["686", "FPU"])
+add_insn("fucomip", "fcom2", modifiers=[0xDF, 0xE8], cpu=["686", "FPU"])
+
+#####################################################################
+# Pentium4 extensions
+#####################################################################
+add_group("movnti",
+    suffix="l",
+    cpu=["P4"],
+    opcode=[0x0F, 0xC3],
+    operands=[Operand(type="Mem", size=32, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=32, dest="Spare")])
+add_group("movnti",
+    suffix="q",
+    cpu=["P4"],
+    opersize=64,
+    opcode=[0x0F, 0xC3],
+    operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=64, dest="Spare")])
+
+add_insn("movnti", "movnti")
+
+add_group("clflush",
+    cpu=["P3"],
+    opcode=[0x0F, 0xAE],
+    spare=7,
+    operands=[Operand(type="Mem", size=8, relaxed=True, dest="EA")])
+
+add_insn("clflush", "clflush")
+
+add_insn("lfence", "threebyte", modifiers=[0x0F, 0xAE, 0xE8], cpu=["P3"])
+add_insn("mfence", "threebyte", modifiers=[0x0F, 0xAE, 0xF0], cpu=["P3"])
+add_insn("pause", "onebyte_prefix", modifiers=[0xF3, 0x90], cpu=["P4"])
+
+#####################################################################
+# MMX/SSE2 instructions
+#####################################################################
+
+add_insn("emms", "twobyte", modifiers=[0x0F, 0x77], cpu=["MMX"])
+
+#
+# movd
+#
+add_group("movd",
+    cpu=["MMX"],
+    opcode=[0x0F, 0x6E],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="RM", size=32, relaxed=True, dest="EA")])
+add_group("movd",
+    cpu=["MMX"],
+    opersize=64,
+    opcode=[0x0F, 0x6E],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="RM", size=64, relaxed=True, dest="EA")])
+add_group("movd",
+    cpu=["MMX"],
+    opcode=[0x0F, 0x7E],
+    operands=[Operand(type="RM", size=32, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=64, dest="Spare")])
+add_group("movd",
+    cpu=["MMX"],
+    opersize=64,
+    opcode=[0x0F, 0x7E],
+    operands=[Operand(type="RM", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=64, dest="Spare")])
+add_group("movd",
+    cpu=["SSE2"],
+    prefix=0x66,
+    opcode=[0x0F, 0x6E],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="RM", size=32, relaxed=True, dest="EA")])
+add_group("movd",
+    cpu=["SSE2"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0x6E],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="RM", size=64, relaxed=True, dest="EA")])
+add_group("movd",
+    cpu=["SSE2"],
+    prefix=0x66,
+    opcode=[0x0F, 0x7E],
+    operands=[Operand(type="RM", size=32, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+add_group("movd",
+    cpu=["SSE2"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0x7E],
+    operands=[Operand(type="RM", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("movd", "movd")
+
+#
+# movq
+#
+
+# MMX forms
+add_group("movq",
+    cpu=["MMX"],
+    parsers=["nasm"],
+    opcode=[0x0F, 0x6F],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA")])
+add_group("movq",
+    cpu=["MMX"],
+    parsers=["nasm"],
+    opersize=64,
+    opcode=[0x0F, 0x6E],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="RM", size=64, relaxed=True, dest="EA")])
+add_group("movq",
+    cpu=["MMX"],
+    parsers=["nasm"],
+    opcode=[0x0F, 0x7F],
+    operands=[Operand(type="SIMDRM", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=64, dest="Spare")])
+add_group("movq",
+    cpu=["MMX"],
+    parsers=["nasm"],
+    opersize=64,
+    opcode=[0x0F, 0x7E],
+    operands=[Operand(type="RM", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=64, dest="Spare")])
+
+# SSE2 forms
+add_group("movq",
+    cpu=["SSE2"],
+    parsers=["nasm"],
+    prefix=0xF3,
+    opcode=[0x0F, 0x7E],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("movq",
+    cpu=["SSE2"],
+    parsers=["nasm"],
+    prefix=0xF3,
+    opcode=[0x0F, 0x7E],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA")])
+add_group("movq",
+    cpu=["SSE2"],
+    parsers=["nasm"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0x6E],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="RM", size=64, relaxed=True, dest="EA")])
+add_group("movq",
+    cpu=["SSE2"],
+    parsers=["nasm"],
+    prefix=0x66,
+    opcode=[0x0F, 0xD6],
+    operands=[Operand(type="SIMDRM", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+add_group("movq",
+    cpu=["SSE2"],
+    parsers=["nasm"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0x7E],
+    operands=[Operand(type="RM", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("movq", "movq")
+
+add_group("mmxsse2",
+    cpu=["MMX"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA")])
+add_group("mmxsse2",
+    cpu=["SSE2"],
+    modifiers=["Op1Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+
+add_insn("packssdw",  "mmxsse2", modifiers=[0x6B])
+add_insn("packsswb",  "mmxsse2", modifiers=[0x63])
+add_insn("packuswb",  "mmxsse2", modifiers=[0x67])
+add_insn("paddb",     "mmxsse2", modifiers=[0xFC])
+add_insn("paddw",     "mmxsse2", modifiers=[0xFD])
+add_insn("paddd",     "mmxsse2", modifiers=[0xFE])
+add_insn("paddq",     "mmxsse2", modifiers=[0xD4])
+add_insn("paddsb",    "mmxsse2", modifiers=[0xEC])
+add_insn("paddsw",    "mmxsse2", modifiers=[0xED])
+add_insn("paddusb",   "mmxsse2", modifiers=[0xDC])
+add_insn("paddusw",   "mmxsse2", modifiers=[0xDD])
+add_insn("pand",      "mmxsse2", modifiers=[0xDB])
+add_insn("pandn",     "mmxsse2", modifiers=[0xDF])
+add_insn("pcmpeqb",   "mmxsse2", modifiers=[0x74])
+add_insn("pcmpeqw",   "mmxsse2", modifiers=[0x75])
+add_insn("pcmpeqd",   "mmxsse2", modifiers=[0x76])
+add_insn("pcmpgtb",   "mmxsse2", modifiers=[0x64])
+add_insn("pcmpgtw",   "mmxsse2", modifiers=[0x65])
+add_insn("pcmpgtd",   "mmxsse2", modifiers=[0x66])
+add_insn("pmaddwd",   "mmxsse2", modifiers=[0xF5])
+add_insn("pmulhw",    "mmxsse2", modifiers=[0xE5])
+add_insn("pmullw",    "mmxsse2", modifiers=[0xD5])
+add_insn("por",       "mmxsse2", modifiers=[0xEB])
+add_insn("psubb",     "mmxsse2", modifiers=[0xF8])
+add_insn("psubw",     "mmxsse2", modifiers=[0xF9])
+add_insn("psubd",     "mmxsse2", modifiers=[0xFA])
+add_insn("psubq",     "mmxsse2", modifiers=[0xFB])
+add_insn("psubsb",    "mmxsse2", modifiers=[0xE8])
+add_insn("psubsw",    "mmxsse2", modifiers=[0xE9])
+add_insn("psubusb",   "mmxsse2", modifiers=[0xD8])
+add_insn("psubusw",   "mmxsse2", modifiers=[0xD9])
+add_insn("punpckhbw", "mmxsse2", modifiers=[0x68])
+add_insn("punpckhwd", "mmxsse2", modifiers=[0x69])
+add_insn("punpckhdq", "mmxsse2", modifiers=[0x6A])
+add_insn("punpcklbw", "mmxsse2", modifiers=[0x60])
+add_insn("punpcklwd", "mmxsse2", modifiers=[0x61])
+add_insn("punpckldq", "mmxsse2", modifiers=[0x62])
+add_insn("pxor",      "mmxsse2", modifiers=[0xEF])
+
+add_group("pshift",
+    cpu=["MMX"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA")])
+add_group("pshift",
+    cpu=["MMX"],
+    modifiers=["Gap", "Op1Add", "SpAdd"],
+    opcode=[0x0F, 0x00],
+    spare=0,
+    operands=[Operand(type="SIMDReg", size=64, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pshift",
+    cpu=["SSE2"],
+    modifiers=["Op1Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("pshift",
+    cpu=["SSE2"],
+    modifiers=["Gap", "Op1Add", "SpAdd"],
+    prefix=0x66,
+    opcode=[0x0F, 0x00],
+    spare=0,
+    operands=[Operand(type="SIMDReg", size=128, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("psllw", "pshift", modifiers=[0xF1, 0x71, 6])
+add_insn("pslld", "pshift", modifiers=[0xF2, 0x72, 6])
+add_insn("psllq", "pshift", modifiers=[0xF3, 0x73, 6])
+add_insn("psraw", "pshift", modifiers=[0xE1, 0x71, 4])
+add_insn("psrad", "pshift", modifiers=[0xE2, 0x72, 4])
+add_insn("psrlw", "pshift", modifiers=[0xD1, 0x71, 2])
+add_insn("psrld", "pshift", modifiers=[0xD2, 0x72, 2])
+add_insn("psrlq", "pshift", modifiers=[0xD3, 0x73, 2])
+
+#
+# PIII (Katmai) new instructions / SIMD instructions
+#
+add_insn("pavgb",   "mmxsse2", modifiers=[0xE0], cpu=["P3", "MMX"])
+add_insn("pavgw",   "mmxsse2", modifiers=[0xE3], cpu=["P3", "MMX"])
+add_insn("pmaxsw",  "mmxsse2", modifiers=[0xEE], cpu=["P3", "MMX"])
+add_insn("pmaxub",  "mmxsse2", modifiers=[0xDE], cpu=["P3", "MMX"])
+add_insn("pminsw",  "mmxsse2", modifiers=[0xEA], cpu=["P3", "MMX"])
+add_insn("pminub",  "mmxsse2", modifiers=[0xDA], cpu=["P3", "MMX"])
+add_insn("pmulhuw", "mmxsse2", modifiers=[0xE4], cpu=["P3", "MMX"])
+add_insn("psadbw",  "mmxsse2", modifiers=[0xF6], cpu=["P3", "MMX"])
+
+add_insn("prefetchnta", "twobytemem", modifiers=[0, 0x0F, 0x18], cpu=["P3"])
+add_insn("prefetcht0", "twobytemem", modifiers=[1, 0x0F, 0x18], cpu=["P3"])
+add_insn("prefetcht1", "twobytemem", modifiers=[2, 0x0F, 0x18], cpu=["P3"])
+add_insn("prefetcht2", "twobytemem", modifiers=[3, 0x0F, 0x18], cpu=["P3"])
+
+add_insn("sfence", "threebyte", modifiers=[0x0F, 0xAE, 0xF8], cpu=["P3"])
+
+add_group("sseps",
+    cpu=["SSE"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+
+add_insn("addps",    "sseps", modifiers=[0x58])
+add_insn("andnps",   "sseps", modifiers=[0x55])
+add_insn("andps",    "sseps", modifiers=[0x54])
+add_insn("comiss",   "sseps", modifiers=[0x2F])
+add_insn("divps",    "sseps", modifiers=[0x5E])
+add_insn("maxps",    "sseps", modifiers=[0x5F])
+add_insn("minps",    "sseps", modifiers=[0x5D])
+add_insn("mulps",    "sseps", modifiers=[0x59])
+add_insn("orps",     "sseps", modifiers=[0x56])
+add_insn("rcpps",    "sseps", modifiers=[0x53])
+add_insn("rsqrtps",  "sseps", modifiers=[0x52])
+add_insn("sqrtps",   "sseps", modifiers=[0x51])
+add_insn("subps",    "sseps", modifiers=[0x5C])
+add_insn("unpckhps", "sseps", modifiers=[0x15])
+add_insn("unpcklps", "sseps", modifiers=[0x14])
+add_insn("xorps",    "sseps", modifiers=[0x57])
+
+add_group("cvt_rx_xmm32",
+    suffix="l",
+    cpu=["SSE"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("cvt_rx_xmm32",
+    suffix="l",
+    cpu=["SSE"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="Mem", size=32, relaxed=True, dest="EA")])
+# REX
+add_group("cvt_rx_xmm32",
+    suffix="q",
+    cpu=["SSE"],
+    modifiers=["PreAdd", "Op1Add"],
+    opersize=64,
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("cvt_rx_xmm32",
+    suffix="q",
+    cpu=["SSE"],
+    modifiers=["PreAdd", "Op1Add"],
+    opersize=64,
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="Mem", size=32, relaxed=True, dest="EA")])
+
+add_insn("cvtss2si", "cvt_rx_xmm32", modifiers=[0xF3, 0x2D])
+add_insn("cvttss2si", "cvt_rx_xmm32", modifiers=[0xF3, 0x2C])
+
+add_group("cvt_mm_xmm64",
+    cpu=["SSE"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("cvt_mm_xmm64",
+    cpu=["SSE"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+
+add_insn("cvtps2pi", "cvt_mm_xmm64", modifiers=[0x2D])
+add_insn("cvttps2pi", "cvt_mm_xmm64", modifiers=[0x2C])
+
+add_group("cvt_xmm_mm_ps",
+    cpu=["SSE"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA")])
+
+add_insn("cvtpi2ps", "cvt_xmm_mm_ps", modifiers=[0x2A])
+
+add_group("cvt_xmm_rmx",
+    suffix="l",
+    cpu=["SSE"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="RM", size=32, relaxed=True, dest="EA")])
+# REX
+add_group("cvt_xmm_rmx",
+    suffix="q",
+    cpu=["SSE"],
+    modifiers=["PreAdd", "Op1Add"],
+    opersize=64,
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="RM", size=64, relaxed=True, dest="EA")])
+
+add_insn("cvtsi2ss", "cvt_xmm_rmx", modifiers=[0xF3, 0x2A])
+
+add_group("ssess",
+    cpu=["SSE"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+
+add_insn("addss",   "ssess", modifiers=[0xF3, 0x58])
+add_insn("divss",   "ssess", modifiers=[0xF3, 0x5E])
+add_insn("maxss",   "ssess", modifiers=[0xF3, 0x5F])
+add_insn("minss",   "ssess", modifiers=[0xF3, 0x5D])
+add_insn("mulss",   "ssess", modifiers=[0xF3, 0x59])
+add_insn("rcpss",   "ssess", modifiers=[0xF3, 0x53])
+add_insn("rsqrtss", "ssess", modifiers=[0xF3, 0x52])
+add_insn("sqrtss",  "ssess", modifiers=[0xF3, 0x51])
+add_insn("subss",   "ssess", modifiers=[0xF3, 0x5C])
+add_insn("ucomiss", "ssess", modifiers=[0, 0x2E])
+
+add_group("ssecmpps",
+    cpu=["SSE"],
+    modifiers=["Imm8"],
+    opcode=[0x0F, 0xC2],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+
+add_insn("cmpeqps",    "ssecmpps", modifiers=[0x00])
+add_insn("cmpleps",    "ssecmpps", modifiers=[0x02])
+add_insn("cmpltps",    "ssecmpps", modifiers=[0x01])
+add_insn("cmpneqps",   "ssecmpps", modifiers=[0x04])
+add_insn("cmpnleps",   "ssecmpps", modifiers=[0x06])
+add_insn("cmpnltps",   "ssecmpps", modifiers=[0x05])
+add_insn("cmpordps",   "ssecmpps", modifiers=[0x07])
+add_insn("cmpunordps", "ssecmpps", modifiers=[0x03])
+
+add_group("ssecmpss",
+    cpu=["SSE"],
+    modifiers=["Imm8", "PreAdd"],
+    prefix=0x00,
+    opcode=[0x0F, 0xC2],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+
+add_insn("cmpeqss",    "ssecmpss", modifiers=[0, 0xF3])
+add_insn("cmpless",    "ssecmpss", modifiers=[2, 0xF3])
+add_insn("cmpltss",    "ssecmpss", modifiers=[1, 0xF3])
+add_insn("cmpneqss",   "ssecmpss", modifiers=[4, 0xF3])
+add_insn("cmpnless",   "ssecmpss", modifiers=[6, 0xF3])
+add_insn("cmpnltss",   "ssecmpss", modifiers=[5, 0xF3])
+add_insn("cmpordss",   "ssecmpss", modifiers=[7, 0xF3])
+add_insn("cmpunordss", "ssecmpss", modifiers=[3, 0xF3])
+
+add_group("ssepsimm",
+    cpu=["SSE"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("cmpps", "ssepsimm", modifiers=[0xC2])
+add_insn("shufps", "ssepsimm", modifiers=[0xC6])
+
+add_group("ssessimm",
+    cpu=["SSE"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("cmpss", "ssessimm", modifiers=[0xF3, 0xC2])
+
+add_group("ldstmxcsr",
+    cpu=["SSE"],
+    modifiers=["SpAdd"],
+    opcode=[0x0F, 0xAE],
+    spare=0,
+    operands=[Operand(type="Mem", size=32, relaxed=True, dest="EA")])
+
+add_insn("ldmxcsr", "ldstmxcsr", modifiers=[2])
+add_insn("stmxcsr", "ldstmxcsr", modifiers=[3])
+
+add_group("maskmovq",
+    cpu=["MMX", "P3"],
+    opcode=[0x0F, 0xF7],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDReg", size=64, dest="EA")])
+
+add_insn("maskmovq", "maskmovq")
+
+add_group("movaups",
+    cpu=["SSE"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("movaups",
+    cpu=["SSE"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x01],
+    operands=[Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("movaps", "movaups", modifiers=[0x28])
+add_insn("movups", "movaups", modifiers=[0x10])
+
+add_group("movhllhps",
+    cpu=["SSE"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+
+add_insn("movhlps", "movhllhps", modifiers=[0x12])
+add_insn("movlhps", "movhllhps", modifiers=[0x16])
+
+add_group("movhlps",
+    cpu=["SSE"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+add_group("movhlps",
+    cpu=["SSE"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x01],
+    operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("movhps", "movhlps", modifiers=[0x16])
+add_insn("movlps", "movhlps", modifiers=[0x12])
+
+add_group("movmskps",
+    suffix="l",
+    cpu=["SSE"],
+    opcode=[0x0F, 0x50],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("movmskps",
+    suffix="q",
+    cpu=["SSE"],
+    opersize=64,
+    opcode=[0x0F, 0x50],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+
+add_insn("movmskps", "movmskps")
+
+add_group("movntps",
+    cpu=["SSE"],
+    opcode=[0x0F, 0x2B],
+    operands=[Operand(type="Mem", size=128, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("movntps", "movntps")
+
+add_group("movntq",
+    cpu=["SSE"],
+    opcode=[0x0F, 0xE7],
+    operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=64, dest="Spare")])
+
+add_insn("movntq", "movntq")
+
+add_group("movss",
+    cpu=["SSE"],
+    prefix=0xF3,
+    opcode=[0x0F, 0x10],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("movss",
+    cpu=["SSE"],
+    prefix=0xF3,
+    opcode=[0x0F, 0x10],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", size=32, relaxed=True, dest="EA")])
+add_group("movss",
+    cpu=["SSE"],
+    prefix=0xF3,
+    opcode=[0x0F, 0x11],
+    operands=[Operand(type="Mem", size=32, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+    
+add_insn("movss", "movss")
+
+add_group("pextrw",
+    suffix="l",
+    cpu=["MMX", "P3"],
+    opcode=[0x0F, 0xC5],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="SIMDReg", size=64, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pextrw",
+    suffix="l",
+    cpu=["SSE2"],
+    prefix=0x66,
+    opcode=[0x0F, 0xC5],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pextrw",
+    suffix="q",
+    cpu=["MMX", "P3"],
+    opersize=64,
+    opcode=[0x0F, 0xC5],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="SIMDReg", size=64, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pextrw",
+    suffix="q",
+    cpu=["SSE2"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0xC5],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+# SSE41 instructions
+add_group("pextrw",
+    cpu=["SSE41"],
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x15],
+    operands=[Operand(type="Mem", size=16, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pextrw",
+    cpu=["SSE41"],
+    opersize=32,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x15],
+    operands=[Operand(type="Reg", size=32, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pextrw",
+    cpu=["SSE41"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x15],
+    operands=[Operand(type="Reg", size=64, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("pextrw", "pextrw")
+
+add_group("pinsrw",
+    suffix="l",
+    cpu=["MMX", "P3"],
+    opcode=[0x0F, 0xC4],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="Reg", size=32, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pinsrw",
+    suffix="q",
+    cpu=["MMX", "P3"],
+    opersize=64,
+    opcode=[0x0F, 0xC4],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="Reg", size=64, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pinsrw",
+    suffix="l",
+    cpu=["MMX", "P3"],
+    opcode=[0x0F, 0xC4],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="Mem", size=16, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pinsrw",
+    suffix="l",
+    cpu=["SSE2"],
+    prefix=0x66,
+    opcode=[0x0F, 0xC4],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Reg", size=32, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pinsrw",
+    suffix="q",
+    cpu=["SSE2"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0xC4],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Reg", size=64, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pinsrw",
+    suffix="l",
+    cpu=["SSE2"],
+    prefix=0x66,
+    opcode=[0x0F, 0xC4],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", size=16, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("pinsrw", "pinsrw")
+
+add_group("pmovmskb",
+    suffix="l",
+    cpu=["MMX", "P3"],
+    opcode=[0x0F, 0xD7],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="SIMDReg", size=64, dest="EA")])
+add_group("pmovmskb",
+    suffix="l",
+    cpu=["SSE2"],
+    prefix=0x66,
+    opcode=[0x0F, 0xD7],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("pmovmskb",
+    suffix="q",
+    cpu=["MMX", "P3"],
+    opersize=64,
+    opcode=[0x0F, 0xD7],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="SIMDReg", size=64, dest="EA")])
+add_group("pmovmskb",
+    suffix="q",
+    cpu=["SSE2"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0xD7],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+
+add_insn("pmovmskb", "pmovmskb")
+
+add_group("pshufw",
+    cpu=["MMX", "P3"],
+    opcode=[0x0F, 0x70],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("pshufw", "pshufw")
+
+#####################################################################
+# SSE2 instructions
+#####################################################################
+add_insn("addpd",    "ssess", modifiers=[0x66, 0x58], cpu=["SSE2"])
+add_insn("addsd",    "ssess", modifiers=[0xF2, 0x58], cpu=["SSE2"])
+add_insn("andnpd",   "ssess", modifiers=[0x66, 0x55], cpu=["SSE2"])
+add_insn("andpd",    "ssess", modifiers=[0x66, 0x54], cpu=["SSE2"])
+add_insn("comisd",   "ssess", modifiers=[0x66, 0x2F], cpu=["SSE2"])
+add_insn("divpd",    "ssess", modifiers=[0x66, 0x5E], cpu=["SSE2"])
+add_insn("divsd",    "ssess", modifiers=[0xF2, 0x5E], cpu=["SSE2"])
+add_insn("maxpd",    "ssess", modifiers=[0x66, 0x5F], cpu=["SSE2"])
+add_insn("maxsd",    "ssess", modifiers=[0xF2, 0x5F], cpu=["SSE2"])
+add_insn("minpd",    "ssess", modifiers=[0x66, 0x5D], cpu=["SSE2"])
+add_insn("minsd",    "ssess", modifiers=[0xF2, 0x5D], cpu=["SSE2"])
+add_insn("mulpd",    "ssess", modifiers=[0x66, 0x59], cpu=["SSE2"])
+add_insn("mulsd",    "ssess", modifiers=[0xF2, 0x59], cpu=["SSE2"])
+add_insn("orpd",     "ssess", modifiers=[0x66, 0x56], cpu=["SSE2"])
+add_insn("sqrtpd",   "ssess", modifiers=[0x66, 0x51], cpu=["SSE2"])
+add_insn("sqrtsd",   "ssess", modifiers=[0xF2, 0x51], cpu=["SSE2"])
+add_insn("subpd",    "ssess", modifiers=[0x66, 0x5C], cpu=["SSE2"])
+add_insn("subsd",    "ssess", modifiers=[0xF2, 0x5C], cpu=["SSE2"])
+add_insn("ucomisd",  "ssess", modifiers=[0x66, 0x2E], cpu=["SSE2"])
+add_insn("unpckhpd", "ssess", modifiers=[0x66, 0x15], cpu=["SSE2"])
+add_insn("unpcklpd", "ssess", modifiers=[0x66, 0x14], cpu=["SSE2"])
+add_insn("xorpd",    "ssess", modifiers=[0x66, 0x57], cpu=["SSE2"])
+add_insn("cvtpd2dq", "ssess", modifiers=[0xF2, 0xE6], cpu=["SSE2"])
+add_insn("cvtpd2ps", "ssess", modifiers=[0x66, 0x5A], cpu=["SSE2"])
+add_insn("cvtps2dq", "ssess", modifiers=[0x66, 0x5B], cpu=["SSE2"])
+
+add_insn("cvtdq2ps", "sseps", modifiers=[0x5B], cpu=["SSE2"])
+
+add_insn("cmpeqpd",    "ssecmpss", modifiers=[0x00, 0x66], cpu=["SSE2"])
+add_insn("cmpeqsd",    "ssecmpss", modifiers=[0x00, 0xF2], cpu=["SSE2"])
+add_insn("cmplepd",    "ssecmpss", modifiers=[0x02, 0x66], cpu=["SSE2"])
+add_insn("cmplesd",    "ssecmpss", modifiers=[0x02, 0xF2], cpu=["SSE2"])
+add_insn("cmpltpd",    "ssecmpss", modifiers=[0x01, 0x66], cpu=["SSE2"])
+add_insn("cmpltsd",    "ssecmpss", modifiers=[0x01, 0xF2], cpu=["SSE2"])
+add_insn("cmpneqpd",   "ssecmpss", modifiers=[0x04, 0x66], cpu=["SSE2"])
+add_insn("cmpneqsd",   "ssecmpss", modifiers=[0x04, 0xF2], cpu=["SSE2"])
+add_insn("cmpnlepd",   "ssecmpss", modifiers=[0x06, 0x66], cpu=["SSE2"])
+add_insn("cmpnlesd",   "ssecmpss", modifiers=[0x06, 0xF2], cpu=["SSE2"])
+add_insn("cmpnltpd",   "ssecmpss", modifiers=[0x05, 0x66], cpu=["SSE2"])
+add_insn("cmpnltsd",   "ssecmpss", modifiers=[0x05, 0xF2], cpu=["SSE2"])
+add_insn("cmpordpd",   "ssecmpss", modifiers=[0x07, 0x66], cpu=["SSE2"])
+add_insn("cmpordsd",   "ssecmpss", modifiers=[0x07, 0xF2], cpu=["SSE2"])
+add_insn("cmpunordpd", "ssecmpss", modifiers=[0x03, 0x66], cpu=["SSE2"])
+add_insn("cmpunordsd", "ssecmpss", modifiers=[0x03, 0xF2], cpu=["SSE2"])
+
+add_insn("cmppd",  "ssessimm", modifiers=[0x66, 0xC2], cpu=["SSE2"])
+add_insn("shufpd", "ssessimm", modifiers=[0x66, 0xC6], cpu=["SSE2"])
+
+add_insn("cvtsi2sd", "cvt_xmm_rmx", modifiers=[0xF2, 0x2A], cpu=["SSE2"])
+
+add_group("cvt_xmm_xmm64_ss",
+    cpu=["SSE2"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("cvt_xmm_xmm64_ss",
+    cpu=["SSE2"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+
+add_insn("cvtdq2pd", "cvt_xmm_xmm64_ss", modifiers=[0xF3, 0xE6])
+add_insn("cvtsd2ss", "cvt_xmm_xmm64_ss", modifiers=[0xF2, 0x5A])
+
+add_group("cvt_xmm_xmm64_ps",
+    cpu=["SSE2"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("cvt_xmm_xmm64_ps",
+    cpu=["SSE2"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+
+add_insn("cvtps2pd", "cvt_xmm_xmm64_ps", modifiers=[0x5A])
+
+add_group("cvt_rx_xmm64",
+    suffix="l",
+    cpu=["SSE2"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("cvt_rx_xmm64",
+    suffix="l",
+    cpu=["SSE2"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+# REX
+add_group("cvt_rx_xmm64",
+    suffix="q",
+    cpu=["SSE2"],
+    modifiers=["PreAdd", "Op1Add"],
+    opersize=64,
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("cvt_rx_xmm64",
+    suffix="q",
+    cpu=["SSE2"],
+    modifiers=["PreAdd", "Op1Add"],
+    opersize=64,
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+
+add_insn("cvtsd2si", "cvt_rx_xmm64", modifiers=[0xF2, 0x2D])
+
+add_group("cvt_mm_xmm",
+    cpu=["SSE2"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+
+add_insn("cvtpd2pi", "cvt_mm_xmm", modifiers=[0x66, 0x2D], cpu=["SSE2"])
+
+add_group("cvt_xmm_mm_ss",
+    cpu=["SSE"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA")])
+
+add_insn("cvtpi2pd", "cvt_xmm_mm_ss", modifiers=[0x66, 0x2A], cpu=["SSE2"])
+
+# cmpsd SSE2 form
+add_group("cmpsd",
+    cpu=["SSE2"],
+    prefix=0xF2,
+    opcode=[0x0F, 0xC2],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+# cmpsd is added in string instructions above, so don't re-add_insn()
+
+add_group("movaupd",
+    cpu=["SSE2"],
+    modifiers=["Op1Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("movaupd",
+    cpu=["SSE2"],
+    modifiers=["Op1Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x01],
+    operands=[Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("movapd", "movaupd", modifiers=[0x28])
+add_insn("movupd", "movaupd", modifiers=[0x10])
+
+add_group("movhlpd",
+    cpu=["SSE2"],
+    modifiers=["Op1Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+add_group("movhlpd",
+    cpu=["SSE2"],
+    modifiers=["Op1Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x01],
+    operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("movhpd", "movhlpd", modifiers=[0x16])
+add_insn("movlpd", "movhlpd", modifiers=[0x12])
+
+add_group("movmskpd",
+    suffix="l",
+    cpu=["SSE2"],
+    prefix=0x66,
+    opcode=[0x0F, 0x50],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("movmskpd",
+    suffix="q",
+    cpu=["SSE2"],
+    prefix=0x66,
+    opcode=[0x0F, 0x50],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+
+add_insn("movmskpd", "movmskpd")
+
+add_group("movntpddq",
+    cpu=["SSE2"],
+    modifiers=["Op1Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Mem", size=128, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("movntpd", "movntpddq", modifiers=[0x2B])
+add_insn("movntdq", "movntpddq", modifiers=[0xE7])
+
+# movsd SSE2 forms
+add_group("movsd",
+    cpu=["SSE2"],
+    prefix=0xF2,
+    opcode=[0x0F, 0x10],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("movsd",
+    cpu=["SSE2"],
+    prefix=0xF2,
+    opcode=[0x0F, 0x10],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+add_group("movsd",
+    cpu=["SSE2"],
+    prefix=0xF2,
+    opcode=[0x0F, 0x11],
+    operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+# movsd is added in string instructions above, so don't re-add_insn()
+
+#####################################################################
+# P4 VMX Instructions
+#####################################################################
+
+add_insn("vmcall", "threebyte", modifiers=[0x0F, 0x01, 0xC1], cpu=["P4"])
+add_insn("vmlaunch", "threebyte", modifiers=[0x0F, 0x01, 0xC2], cpu=["P4"])
+add_insn("vmresume", "threebyte", modifiers=[0x0F, 0x01, 0xC3], cpu=["P4"])
+add_insn("vmxoff", "threebyte", modifiers=[0x0F, 0x01, 0xC4], cpu=["P4"])
+
+add_group("vmxmemrd",
+    suffix="l",
+    not64=True,
+    cpu=["P4"],
+    opersize=32,
+    opcode=[0x0F, 0x78],
+    operands=[Operand(type="RM", size=32, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=32, dest="Spare")])
+add_group("vmxmemrd",
+    suffix="q",
+    cpu=["P4"],
+    opersize=64,
+    def_opersize_64=64,
+    opcode=[0x0F, 0x78],
+    operands=[Operand(type="RM", size=64, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=64, dest="Spare")])
+add_insn("vmread", "vmxmemrd")
+
+add_group("vmxmemwr",
+    suffix="l",
+    not64=True,
+    cpu=["P4"],
+    opersize=32,
+    opcode=[0x0F, 0x79],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="RM", size=32, relaxed=True, dest="EA")])
+add_group("vmxmemwr",
+    suffix="q",
+    cpu=["P4"],
+    opersize=64,
+    def_opersize_64=64,
+    opcode=[0x0F, 0x79],
+    operands=[Operand(type="Reg", size=64, dest="Spare"),
+              Operand(type="RM", size=64, relaxed=True, dest="EA")])
+add_insn("vmwrite", "vmxmemwr")
+
+add_group("vmxtwobytemem",
+    modifiers=["SpAdd"],
+    cpu=["P4"],
+    opcode=[0x0F, 0xC7],
+    spare=0,
+    operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+add_insn("vmptrld", "vmxtwobytemem", modifiers=[6])
+add_insn("vmptrst", "vmxtwobytemem", modifiers=[7])
+
+add_group("vmxthreebytemem",
+    modifiers=["PreAdd"],
+    cpu=["P4"],
+    prefix=0x00,
+    opcode=[0x0F, 0xC7],
+    spare=6,
+    operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+add_insn("vmclear", "vmxthreebytemem", modifiers=[0x66])
+add_insn("vmxon", "vmxthreebytemem", modifiers=[0xF3])
+
+add_insn("cvttpd2pi", "cvt_mm_xmm", modifiers=[0x66, 0x2C], cpu=["SSE2"])
+add_insn("cvttsd2si", "cvt_rx_xmm64", modifiers=[0xF2, 0x2C], cpu=["SSE2"])
+add_insn("cvttpd2dq", "ssess", modifiers=[0x66, 0xE6], cpu=["SSE2"])
+add_insn("cvttps2dq", "ssess", modifiers=[0xF3, 0x5B], cpu=["SSE2"])
+add_insn("pmuludq", "mmxsse2", modifiers=[0xF4], cpu=["SSE2"])
+add_insn("pshufd", "ssessimm", modifiers=[0x66, 0x70], cpu=["SSE2"])
+add_insn("pshufhw", "ssessimm", modifiers=[0xF3, 0x70], cpu=["SSE2"])
+add_insn("pshuflw", "ssessimm", modifiers=[0xF2, 0x70], cpu=["SSE2"])
+add_insn("punpckhqdq", "ssess", modifiers=[0x66, 0x6D], cpu=["SSE2"])
+add_insn("punpcklqdq", "ssess", modifiers=[0x66, 0x6C], cpu=["SSE2"])
+
+add_group("cvt_xmm_xmm32",
+    cpu=["SSE2"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("cvt_xmm_xmm32",
+    cpu=["SSE2"],
+    modifiers=["PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", size=32, relaxed=True, dest="EA")])
+
+add_insn("cvtss2sd", "cvt_xmm_xmm32", modifiers=[0xF3, 0x5A])
+
+add_group("maskmovdqu",
+    cpu=["SSE2"],
+    prefix=0x66,
+    opcode=[0x0F, 0xF7],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+
+add_insn("maskmovdqu", "maskmovdqu")
+
+add_group("movdqau",
+    cpu=["SSE2"],
+    modifiers=["PreAdd"],
+    prefix=0x00,
+    opcode=[0x0F, 0x6F],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("movdqau",
+    cpu=["SSE2"],
+    modifiers=["PreAdd"],
+    prefix=0x00,
+    opcode=[0x0F, 0x7F],
+    operands=[Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("movdqa", "movdqau", modifiers=[0x66])
+add_insn("movdqu", "movdqau", modifiers=[0xF3])
+
+add_group("movdq2q",
+    cpu=["SSE2"],
+    prefix=0xF2,
+    opcode=[0x0F, 0xD6],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+
+add_insn("movdq2q", "movdq2q")
+
+add_group("movq2dq",
+    cpu=["SSE2"],
+    prefix=0xF3,
+    opcode=[0x0F, 0xD6],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=64, dest="EA")])
+
+add_insn("movq2dq", "movq2dq")
+
+add_group("pslrldq",
+    cpu=["SSE2"],
+    modifiers=["SpAdd"],
+    prefix=0x66,
+    opcode=[0x0F, 0x73],
+    spare=0,
+    operands=[Operand(type="SIMDReg", size=128, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("pslldq", "pslrldq", modifiers=[7])
+add_insn("psrldq", "pslrldq", modifiers=[3])
+
+#####################################################################
+# SSE3 / PNI Prescott New Instructions instructions
+#####################################################################
+add_insn("addsubpd", "ssess", modifiers=[0x66, 0xD0], cpu=["SSE3"])
+add_insn("addsubps", "ssess", modifiers=[0xF2, 0xD0], cpu=["SSE3"])
+add_insn("haddpd",   "ssess", modifiers=[0x66, 0x7C], cpu=["SSE3"])
+add_insn("haddps",   "ssess", modifiers=[0xF2, 0x7C], cpu=["SSE3"])
+add_insn("hsubpd",   "ssess", modifiers=[0x66, 0x7D], cpu=["SSE3"])
+add_insn("hsubps",   "ssess", modifiers=[0xF2, 0x7D], cpu=["SSE3"])
+add_insn("movshdup", "ssess", modifiers=[0xF3, 0x16], cpu=["SSE3"])
+add_insn("movsldup", "ssess", modifiers=[0xF3, 0x12], cpu=["SSE3"])
+add_insn("fisttp",   "fildstp", modifiers=[1, 0, 1], cpu=["SSE3"])
+add_insn("fisttpll", "fildstp", suffix="q", modifiers=[7], cpu=["SSE3"])
+add_insn("movddup", "cvt_xmm_xmm64_ss", modifiers=[0xF2, 0x12], cpu=["SSE3"])
+add_insn("monitor", "threebyte", modifiers=[0x0F, 0x01, 0xC8], cpu=["SSE3"])
+add_insn("mwait",   "threebyte", modifiers=[0x0F, 0x01, 0xC9], cpu=["SSE3"])
+
+add_group("lddqu",
+    cpu=["SSE3"],
+    prefix=0xF2,
+    opcode=[0x0F, 0xF0],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", dest="EA")])
+
+add_insn("lddqu", "lddqu")
+
+#####################################################################
+# SSSE3 / TNI Tejas New Intructions instructions
+#####################################################################
+
+add_group("ssse3",
+    cpu=["SSSE3"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x38, 0x00],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA")])
+add_group("ssse3",
+    cpu=["SSSE3"],
+    modifiers=["Op2Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x38, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+
+add_insn("pshufb",    "ssse3", modifiers=[0x00])
+add_insn("phaddw",    "ssse3", modifiers=[0x01])
+add_insn("phaddd",    "ssse3", modifiers=[0x02])
+add_insn("phaddsw",   "ssse3", modifiers=[0x03])
+add_insn("pmaddubsw", "ssse3", modifiers=[0x04])
+add_insn("phsubw",    "ssse3", modifiers=[0x05])
+add_insn("phsubd",    "ssse3", modifiers=[0x06])
+add_insn("phsubsw",   "ssse3", modifiers=[0x07])
+add_insn("psignb",    "ssse3", modifiers=[0x08])
+add_insn("psignw",    "ssse3", modifiers=[0x09])
+add_insn("psignd",    "ssse3", modifiers=[0x0A])
+add_insn("pmulhrsw",  "ssse3", modifiers=[0x0B])
+add_insn("pabsb",     "ssse3", modifiers=[0x1C])
+add_insn("pabsw",     "ssse3", modifiers=[0x1D])
+add_insn("pabsd",     "ssse3", modifiers=[0x1E])
+
+add_group("ssse3imm",
+    cpu=["SSSE3"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x3A, 0x00],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("ssse3imm",
+    cpu=["SSSE3"],
+    modifiers=["Op2Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("palignr", "ssse3imm", modifiers=[0x0F])
+
+#####################################################################
+# SSE4.1 / SSE4.2 instructions
+#####################################################################
+
+add_group("sse4",
+    cpu=["SSE41"],
+    modifiers=["Op2Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x38, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+
+add_insn("packusdw",   "sse4", modifiers=[0x2B])
+add_insn("pcmpeqq",    "sse4", modifiers=[0x29])
+add_insn("pcmpgtq",    "sse4", modifiers=[0x37])
+add_insn("phminposuw", "sse4", modifiers=[0x41])
+add_insn("pmaxsb",     "sse4", modifiers=[0x3C])
+add_insn("pmaxsd",     "sse4", modifiers=[0x3D])
+add_insn("pmaxud",     "sse4", modifiers=[0x3F])
+add_insn("pmaxuw",     "sse4", modifiers=[0x3E])
+add_insn("pminsb",     "sse4", modifiers=[0x38])
+add_insn("pminsd",     "sse4", modifiers=[0x39])
+add_insn("pminud",     "sse4", modifiers=[0x3B])
+add_insn("pminuw",     "sse4", modifiers=[0x3A])
+add_insn("pmuldq",     "sse4", modifiers=[0x28])
+add_insn("pmulld",     "sse4", modifiers=[0x40])
+add_insn("ptest",      "sse4", modifiers=[0x17])
+
+add_group("sse4imm",
+    cpu=["SSE41"],
+    modifiers=["Op2Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+for sz in [32, 64]:
+    add_group("sse4m%dimm" % sz,
+        cpu=["SSE41"],
+        modifiers=["Op2Add"],
+        prefix=0x66,
+        opcode=[0x0F, 0x3A, 0x00],
+        operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="SIMDReg", size=128, dest="EA"),
+                  Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+    add_group("sse4m%dimm" % sz,
+        cpu=["SSE41"],
+        modifiers=["Op2Add"],
+        prefix=0x66,
+        opcode=[0x0F, 0x3A, 0x00],
+        operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="Mem", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("blendpd", "sse4imm", modifiers=[0x0D])
+add_insn("blendps", "sse4imm", modifiers=[0x0C])
+add_insn("dppd",    "sse4imm", modifiers=[0x41])
+add_insn("dpps",    "sse4imm", modifiers=[0x40])
+add_insn("mpsadbw", "sse4imm", modifiers=[0x42])
+add_insn("pblendw", "sse4imm", modifiers=[0x0E])
+add_insn("roundpd", "sse4imm", modifiers=[0x09])
+add_insn("roundps", "sse4imm", modifiers=[0x08])
+add_insn("roundsd", "sse4m64imm", modifiers=[0x0B])
+add_insn("roundss", "sse4m32imm", modifiers=[0x0A])
+
+add_group("sse4xmm0",
+    cpu=["SSE41"],
+    modifiers=["Op2Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x38, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("sse4xmm0",
+    cpu=["SSE41"],
+    modifiers=["Op2Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x38, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="XMM0", size=128, dest=None)])
+
+add_insn("blendvpd", "sse4xmm0", modifiers=[0x15])
+add_insn("blendvps", "sse4xmm0", modifiers=[0x14])
+add_insn("pblendvb", "sse4xmm0", modifiers=[0x10])
+
+for sfx, sz in zip("bwl", [8, 16, 32]):
+    add_group("crc32",
+        suffix=sfx,
+        cpu=["SSE42"],
+        opersize=sz,
+        prefix=0xF2,
+        opcode=[0x0F, 0x38, 0xF0+(sz!=8)],
+        operands=[Operand(type="Reg", size=32, dest="Spare"),
+                  Operand(type="RM", size=sz, relaxed=(sz==32), dest="EA")])
+for sfx, sz in zip("bq", [8, 64]):
+    add_group("crc32",
+        suffix=sfx,
+        cpu=["SSE42"],
+        opersize=64,
+        prefix=0xF2,
+        opcode=[0x0F, 0x38, 0xF0+(sz!=8)],
+        operands=[Operand(type="Reg", size=64, dest="Spare"),
+                  Operand(type="RM", size=sz, relaxed=(sz==64), dest="EA")])
+
+add_insn("crc32", "crc32")
+
+add_group("extractps",
+    cpu=["SSE41"],
+    opersize=32,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x17],
+    operands=[Operand(type="RM", size=32, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("extractps",
+    cpu=["SSE41"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x17],
+    operands=[Operand(type="Reg", size=64, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("extractps", "extractps")
+
+add_group("insertps",
+    cpu=["SSE41"],
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x21],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", size=32, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("insertps",
+    cpu=["SSE41"],
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x21],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("insertps", "insertps")
+
+add_group("movntdqa",
+    cpu=["SSE41"],
+    prefix=0x66,
+    opcode=[0x0F, 0x38, 0x2A],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", size=128, relaxed=True, dest="EA")])
+
+add_insn("movntdqa", "movntdqa")
+
+add_group("sse4pcmpstr",
+    cpu=["SSE42"],
+    modifiers=["Op2Add"],
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("pcmpestri", "sse4pcmpstr", modifiers=[0x61])
+add_insn("pcmpestrm", "sse4pcmpstr", modifiers=[0x60])
+add_insn("pcmpistri", "sse4pcmpstr", modifiers=[0x63])
+add_insn("pcmpistrm", "sse4pcmpstr", modifiers=[0x62])
+
+add_group("pextrb",
+    cpu=["SSE41"],
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x14],
+    operands=[Operand(type="Mem", size=8, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+for sz in [32, 64]:
+    add_group("pextrb",
+        cpu=["SSE41"],
+        opersize=sz,
+        prefix=0x66,
+        opcode=[0x0F, 0x3A, 0x14],
+        operands=[Operand(type="Reg", size=sz, dest="EA"),
+                  Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("pextrb", "pextrb")
+
+add_group("pextrd",
+    cpu=["SSE41"],
+    opersize=32,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x16],
+    operands=[Operand(type="RM", size=32, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("pextrd", "pextrd")
+
+add_group("pextrq",
+    cpu=["SSE41"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x16],
+    operands=[Operand(type="RM", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("pextrq", "pextrq")
+
+add_group("pinsrb",
+    cpu=["SSE41"],
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x20],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", size=8, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("pinsrb",
+    cpu=["SSE41"],
+    opersize=32,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x20],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Reg", size=32, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("pinsrb", "pinsrb")
+
+add_group("pinsrd",
+    cpu=["SSE41"],
+    opersize=32,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x22],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="RM", size=32, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("pinsrd", "pinsrd")
+
+add_group("pinsrq",
+    cpu=["SSE41"],
+    opersize=64,
+    prefix=0x66,
+    opcode=[0x0F, 0x3A, 0x22],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="RM", size=64, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("pinsrq", "pinsrq")
+
+for sz in [16, 32, 64]:
+    add_group("sse4m%d" % sz,
+        cpu=["SSE41"],
+        modifiers=["Op2Add"],
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x00],
+        operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="Mem", size=sz, relaxed=True, dest="EA")])
+    add_group("sse4m%d" % sz,
+        cpu=["SSE41"],
+        modifiers=["Op2Add"],
+        prefix=0x66,
+        opcode=[0x0F, 0x38, 0x00],
+        operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="SIMDReg", size=128, dest="EA")])
+
+add_insn("pmovsxbw", "sse4m64", modifiers=[0x20])
+add_insn("pmovsxwd", "sse4m64", modifiers=[0x23])
+add_insn("pmovsxdq", "sse4m64", modifiers=[0x25])
+add_insn("pmovzxbw", "sse4m64", modifiers=[0x30])
+add_insn("pmovzxwd", "sse4m64", modifiers=[0x33])
+add_insn("pmovzxdq", "sse4m64", modifiers=[0x35])
+
+add_insn("pmovsxbd", "sse4m32", modifiers=[0x21])
+add_insn("pmovsxwq", "sse4m32", modifiers=[0x24])
+add_insn("pmovzxbd", "sse4m32", modifiers=[0x31])
+add_insn("pmovzxwq", "sse4m32", modifiers=[0x34])
+
+add_insn("pmovsxbq", "sse4m16", modifiers=[0x22])
+add_insn("pmovzxbq", "sse4m16", modifiers=[0x32])
+
+for sfx, sz in zip("wlq", [16, 32, 64]):
+    add_group("cnt",
+        suffix=sfx,
+        modifiers=["Op1Add"],
+        opersize=sz,
+        prefix=0xF3,
+        opcode=[0x0F, 0x00],
+        operands=[Operand(type="Reg", size=sz, dest="Spare"),
+                  Operand(type="RM", size=sz, relaxed=True, dest="EA")])
+
+add_insn("popcnt", "cnt", modifiers=[0xB8], cpu=["SSE42"])
+
+#####################################################################
+# AMD SSE4a instructions
+#####################################################################
+
+add_group("extrq",
+    cpu=["SSE4a"],
+    prefix=0x66,
+    opcode=[0x0F, 0x78],
+    operands=[Operand(type="SIMDReg", size=128, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("extrq",
+    cpu=["SSE4a"],
+    prefix=0x66,
+    opcode=[0x0F, 0x79],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+
+add_insn("extrq", "extrq")
+
+add_group("insertq",
+    cpu=["SSE4a"],
+    prefix=0xF2,
+    opcode=[0x0F, 0x78],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+add_group("insertq",
+    cpu=["SSE4a"],
+    prefix=0xF2,
+    opcode=[0x0F, 0x79],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+
+add_insn("insertq", "insertq")
+
+add_group("movntsd",
+    cpu=["SSE4a"],
+    prefix=0xF2,
+    opcode=[0x0F, 0x2B],
+    operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("movntsd", "movntsd")
+
+add_group("movntss",
+    cpu=["SSE4a"],
+    prefix=0xF3,
+    opcode=[0x0F, 0x2B],
+    operands=[Operand(type="Mem", size=32, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("movntss", "movntss")
+
+#####################################################################
+# AMD SSE5 instructions
+#####################################################################
+
+add_group("sse5com",
+    cpu=["SSE5"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x25, 0x00],
+    drex_oc0=0,
+    operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+for sz in [32, 64]:
+    add_group("sse5com%d" % sz,
+        cpu=["SSE5"],
+        modifiers=["Op2Add"],
+        opcode=[0x0F, 0x25, 0x00],
+        drex_oc0=0,
+        operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+                  Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="SIMDReg", size=128, dest="EA"),
+                  Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+    add_group("sse5com%d" % sz,
+        cpu=["SSE5"],
+        modifiers=["Op2Add"],
+        opcode=[0x0F, 0x25, 0x00],
+        drex_oc0=0,
+        operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+                  Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="Mem", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("comps", "sse5com", modifiers=[0x2C])
+add_insn("compd", "sse5com", modifiers=[0x2D])
+add_insn("comss", "sse5com32", modifiers=[0x2E])
+add_insn("comsd", "sse5com64", modifiers=[0x2F])
+
+add_insn("pcomb", "sse5com", modifiers=[0x4C])
+add_insn("pcomw", "sse5com", modifiers=[0x4D])
+add_insn("pcomd", "sse5com", modifiers=[0x4E])
+add_insn("pcomq", "sse5com", modifiers=[0x4F])
+
+add_insn("pcomub", "sse5com", modifiers=[0x6C])
+add_insn("pcomuw", "sse5com", modifiers=[0x6D])
+add_insn("pcomud", "sse5com", modifiers=[0x6E])
+add_insn("pcomuq", "sse5com", modifiers=[0x6F])
+
+add_group("cvtph2ps",
+    cpu=["SSE5"],
+    opcode=[0x0F, 0x7A, 0x30],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("cvtph2ps",
+    cpu=["SSE5"],
+    opcode=[0x0F, 0x7A, 0x30],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+
+add_insn("cvtph2ps", "cvtph2ps")
+
+add_group("cvtps2ph",
+    cpu=["SSE5"],
+    opcode=[0x0F, 0x7A, 0x31],
+    operands=[Operand(type="SIMDReg", size=128, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+add_group("cvtps2ph",
+    cpu=["SSE5"],
+    opcode=[0x0F, 0x7A, 0x31],
+    operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("cvtps2ph", "cvtps2ph")
+
+add_group("sse5arith",
+    cpu=["SSE5"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x24, 0x00],
+    drex_oc0=0,
+    operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+              Operand(type="SIMDRegMatch0", size=128, dest=None),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("sse5arith",
+    cpu=["SSE5"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x24, 0x00],
+    drex_oc0=1,
+    operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+              Operand(type="SIMDRegMatch0", size=128, dest=None),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+add_group("sse5arith",
+    cpu=["SSE5"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x24, 0x04],
+    drex_oc0=0,
+    operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="SIMDRegMatch0", size=128, dest=None)])
+add_group("sse5arith",
+    cpu=["SSE5"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x24, 0x04],
+    drex_oc0=1,
+    operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRegMatch0", size=128, dest=None)])
+
+for sz in [32, 64]:
+    add_group("sse5arith%d" % sz,
+        cpu=["SSE5"],
+        modifiers=["Op2Add"],
+        opcode=[0x0F, 0x24, 0x00],
+        drex_oc0=0,
+        operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+                  Operand(type="SIMDRegMatch0", size=128, dest=None),
+                  Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="SIMDReg", size=128, dest="EA")])
+    add_group("sse5arith%d" % sz,
+        cpu=["SSE5"],
+        modifiers=["Op2Add"],
+        opcode=[0x0F, 0x24, 0x00],
+        drex_oc0=0,
+        operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+                  Operand(type="SIMDRegMatch0", size=128, dest=None),
+                  Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="Mem", size=sz, relaxed=True, dest="EA")])
+    add_group("sse5arith%d" % sz,
+        cpu=["SSE5"],
+        modifiers=["Op2Add"],
+        opcode=[0x0F, 0x24, 0x00],
+        drex_oc0=1,
+        operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+                  Operand(type="SIMDRegMatch0", size=128, dest=None),
+                  Operand(type="SIMDReg", size=128, dest="EA"),
+                  Operand(type="SIMDReg", size=128, dest="Spare")])
+    add_group("sse5arith%d" % sz,
+        cpu=["SSE5"],
+        modifiers=["Op2Add"],
+        opcode=[0x0F, 0x24, 0x00],
+        drex_oc0=1,
+        operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+                  Operand(type="SIMDRegMatch0", size=128, dest=None),
+                  Operand(type="Mem", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="SIMDReg", size=128, dest="Spare")])
+    add_group("sse5arith%d" % sz,
+        cpu=["SSE5"],
+        modifiers=["Op2Add"],
+        opcode=[0x0F, 0x24, 0x04],
+        drex_oc0=0,
+        operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+                  Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="SIMDReg", size=128, dest="EA"),
+                  Operand(type="SIMDRegMatch0", size=128, dest=None)])
+    add_group("sse5arith%d" % sz,
+        cpu=["SSE5"],
+        modifiers=["Op2Add"],
+        opcode=[0x0F, 0x24, 0x04],
+        drex_oc0=0,
+        operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+                  Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="Mem", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="SIMDRegMatch0", size=128, dest=None)])
+    add_group("sse5arith%d" % sz,
+        cpu=["SSE5"],
+        modifiers=["Op2Add"],
+        opcode=[0x0F, 0x24, 0x04],
+        drex_oc0=1,
+        operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+                  Operand(type="SIMDReg", size=128, dest="EA"),
+                  Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="SIMDRegMatch0", size=128, dest=None)])
+    add_group("sse5arith%d" % sz,
+        cpu=["SSE5"],
+        modifiers=["Op2Add"],
+        opcode=[0x0F, 0x24, 0x04],
+        drex_oc0=1,
+        operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+                  Operand(type="Mem", size=sz, relaxed=True, dest="EA"),
+                  Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="SIMDRegMatch0", size=128, dest=None)])
+
+add_insn("fmaddps", "sse5arith", modifiers=[0x00])
+add_insn("fmaddpd", "sse5arith", modifiers=[0x01])
+add_insn("fmaddss", "sse5arith32", modifiers=[0x02])
+add_insn("fmaddsd", "sse5arith64", modifiers=[0x03])
+
+add_insn("fmsubps", "sse5arith", modifiers=[0x08])
+add_insn("fmsubpd", "sse5arith", modifiers=[0x09])
+add_insn("fmsubss", "sse5arith32", modifiers=[0x0A])
+add_insn("fmsubsd", "sse5arith64", modifiers=[0x0B])
+
+add_insn("fnmaddps", "sse5arith", modifiers=[0x10])
+add_insn("fnmaddpd", "sse5arith", modifiers=[0x11])
+add_insn("fnmaddss", "sse5arith32", modifiers=[0x12])
+add_insn("fnmaddsd", "sse5arith64", modifiers=[0x13])
+
+add_insn("fnmsubps", "sse5arith", modifiers=[0x18])
+add_insn("fnmsubpd", "sse5arith", modifiers=[0x19])
+add_insn("fnmsubss", "sse5arith32", modifiers=[0x1A])
+add_insn("fnmsubsd", "sse5arith64", modifiers=[0x1B])
+
+add_insn("pcmov", "sse5arith", modifiers=[0x22])
+
+add_insn("permps", "sse5arith", modifiers=[0x20])
+add_insn("permpd", "sse5arith", modifiers=[0x21])
+add_insn("pperm", "sse5arith", modifiers=[0x23])
+
+add_group("sse5two",
+    cpu=["SSE5"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x7A, 0x00],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+for sz in [32, 64]:
+    add_group("sse5two%d" % sz,
+        cpu=["SSE5"],
+        modifiers=["Op2Add"],
+        opcode=[0x0F, 0x7A, 0x00],
+        operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="SIMDReg", size=128, dest="EA")])
+    add_group("sse5two%d" % sz,
+        cpu=["SSE5"],
+        modifiers=["Op2Add"],
+        opcode=[0x0F, 0x7A, 0x00],
+        operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+                  Operand(type="Mem", size=sz, relaxed=True, dest="EA")])
+
+add_insn("frczps", "sse5two", modifiers=[0x10])
+add_insn("frczpd", "sse5two", modifiers=[0x11])
+add_insn("frczss", "sse5two32", modifiers=[0x12])
+add_insn("frczsd", "sse5two64", modifiers=[0x13])
+
+add_insn("phaddbw", "sse5two", modifiers=[0x41])
+add_insn("phaddbd", "sse5two", modifiers=[0x42])
+add_insn("phaddbq", "sse5two", modifiers=[0x43])
+add_insn("phaddwd", "sse5two", modifiers=[0x46])
+add_insn("phaddwq", "sse5two", modifiers=[0x47])
+add_insn("phadddq", "sse5two", modifiers=[0x4B])
+
+add_insn("phaddubw", "sse5two", modifiers=[0x51])
+add_insn("phaddubd", "sse5two", modifiers=[0x52])
+add_insn("phaddubq", "sse5two", modifiers=[0x53])
+add_insn("phadduwd", "sse5two", modifiers=[0x56])
+add_insn("phadduwq", "sse5two", modifiers=[0x57])
+add_insn("phaddudq", "sse5two", modifiers=[0x5B])
+
+add_insn("phsubbw", "sse5two", modifiers=[0x61])
+add_insn("phsubwd", "sse5two", modifiers=[0x62])
+add_insn("phsubdq", "sse5two", modifiers=[0x63])
+
+add_group("sse5pmacs",
+    cpu=["SSE5"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x24, 0x00],
+    drex_oc0=0,
+    operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="SIMDRegMatch0", size=128, dest=None)])
+
+add_insn("pmacsww", "sse5pmacs", modifiers=[0x95])
+add_insn("pmacswd", "sse5pmacs", modifiers=[0x96])
+add_insn("pmacsdql", "sse5pmacs", modifiers=[0x97])
+add_insn("pmacsdd", "sse5pmacs", modifiers=[0x9E])
+add_insn("pmacsdqh", "sse5pmacs", modifiers=[0x9F])
+
+add_insn("pmacssww", "sse5pmacs", modifiers=[0x85])
+add_insn("pmacsswd", "sse5pmacs", modifiers=[0x86])
+add_insn("pmacssdql", "sse5pmacs", modifiers=[0x87])
+add_insn("pmacssdd", "sse5pmacs", modifiers=[0x8E])
+add_insn("pmacssdqh", "sse5pmacs", modifiers=[0x8F])
+
+add_insn("pmadcsswd", "sse5pmacs", modifiers=[0xA6])
+add_insn("pmadcswd", "sse5pmacs", modifiers=[0xB6])
+
+add_group("sse5prot",
+    cpu=["SSE5"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x24, 0x40],
+    drex_oc0=0,
+    operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("sse5prot",
+    cpu=["SSE5"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x24, 0x40],
+    drex_oc0=1,
+    operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+add_group("sse5prot",
+    cpu=["SSE5"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x7B, 0x40],
+    operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_insn("protb", "sse5prot", modifiers=[0x00])
+add_insn("protw", "sse5prot", modifiers=[0x01])
+add_insn("protd", "sse5prot", modifiers=[0x02])
+add_insn("protq", "sse5prot", modifiers=[0x03])
+
+add_group("sse5psh",
+    cpu=["SSE5"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x24, 0x44],
+    drex_oc0=0,
+    operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+              Operand(type="SIMDReg", size=128, dest="Spare"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+add_group("sse5psh",
+    cpu=["SSE5"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x24, 0x44],
+    drex_oc0=1,
+    operands=[Operand(type="SIMDReg", size=128, dest="DREX"),
+              Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+              Operand(type="SIMDReg", size=128, dest="Spare")])
+
+add_insn("pshlb", "sse5psh", modifiers=[0x00])
+add_insn("pshlw", "sse5psh", modifiers=[0x01])
+add_insn("pshld", "sse5psh", modifiers=[0x02])
+add_insn("pshlq", "sse5psh", modifiers=[0x03])
+
+add_insn("pshab", "sse5psh", modifiers=[0x04])
+add_insn("pshaw", "sse5psh", modifiers=[0x05])
+add_insn("pshad", "sse5psh", modifiers=[0x06])
+add_insn("pshaq", "sse5psh", modifiers=[0x07])
+
+# roundps, roundpd, roundss, roundsd, ptest are in SSE4.1
+
+#####################################################################
+# AMD 3DNow! instructions
+#####################################################################
+
+add_insn("prefetch", "twobytemem", modifiers=[0x00, 0x0F, 0x0D], cpu=["3DNow"])
+add_insn("prefetchw", "twobytemem", modifiers=[0x01, 0x0F, 0x0D], cpu=["3DNow"])
+add_insn("femms", "twobyte", modifiers=[0x0F, 0x0E], cpu=["3DNow"])
+
+add_group("now3d",
+    cpu=["3DNow"],
+    modifiers=["Imm8"],
+    opcode=[0x0F, 0x0F],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA")])
+
+add_insn("pavgusb", "now3d", modifiers=[0xBF])
+add_insn("pf2id", "now3d", modifiers=[0x1D])
+add_insn("pf2iw", "now3d", modifiers=[0x1C], cpu=["Athlon", "3DNow"])
+add_insn("pfacc", "now3d", modifiers=[0xAE])
+add_insn("pfadd", "now3d", modifiers=[0x9E])
+add_insn("pfcmpeq", "now3d", modifiers=[0xB0])
+add_insn("pfcmpge", "now3d", modifiers=[0x90])
+add_insn("pfcmpgt", "now3d", modifiers=[0xA0])
+add_insn("pfmax", "now3d", modifiers=[0xA4])
+add_insn("pfmin", "now3d", modifiers=[0x94])
+add_insn("pfmul", "now3d", modifiers=[0xB4])
+add_insn("pfnacc", "now3d", modifiers=[0x8A], cpu=["Athlon", "3DNow"])
+add_insn("pfpnacc", "now3d", modifiers=[0x8E], cpu=["Athlon", "3DNow"])
+add_insn("pfrcp", "now3d", modifiers=[0x96])
+add_insn("pfrcpit1", "now3d", modifiers=[0xA6])
+add_insn("pfrcpit2", "now3d", modifiers=[0xB6])
+add_insn("pfrsqit1", "now3d", modifiers=[0xA7])
+add_insn("pfrsqrt", "now3d", modifiers=[0x97])
+add_insn("pfsub", "now3d", modifiers=[0x9A])
+add_insn("pfsubr", "now3d", modifiers=[0xAA])
+add_insn("pi2fd", "now3d", modifiers=[0x0D])
+add_insn("pi2fw", "now3d", modifiers=[0x0C], cpu=["Athlon", "3DNow"])
+add_insn("pmulhrwa", "now3d", modifiers=[0xB7])
+add_insn("pswapd", "now3d", modifiers=[0xBB], cpu=["Athlon", "3DNow"])
+
+#####################################################################
+# AMD extensions
+#####################################################################
+
+add_insn("syscall", "twobyte", modifiers=[0x0F, 0x05], cpu=["686", "AMD"])
+for sfx in [None, "l", "q"]:
+    add_insn("sysret"+(sfx or ""), "twobyte", suffix=sfx, modifiers=[0x0F, 0x07],
+             cpu=["686", "AMD", "Priv"])
+add_insn("lzcnt", "cnt", modifiers=[0xBD], cpu=["686", "AMD"])
+
+#####################################################################
+# AMD x86-64 extensions
+#####################################################################
+
+add_insn("swapgs", "threebyte", modifiers=[0x0F, 0x01, 0xF8], only64=True)
+add_insn("rdtscp", "threebyte", modifiers=[0x0F, 0x01, 0xF9],
+         cpu=["686", "AMD", "Priv"])
+
+add_group("cmpxchg16b",
+    only64=True,
+    opersize=64,
+    opcode=[0x0F, 0xC7],
+    spare=1,
+    operands=[Operand(type="Mem", size=128, relaxed=True, dest="EA")])
+
+add_insn("cmpxchg16b", "cmpxchg16b")
+
+#####################################################################
+# AMD Pacifica SVM instructions
+#####################################################################
+
+add_insn("clgi", "threebyte", modifiers=[0x0F, 0x01, 0xDD], cpu=["SVM"])
+add_insn("stgi", "threebyte", modifiers=[0x0F, 0x01, 0xDC], cpu=["SVM"])
+add_insn("vmmcall", "threebyte", modifiers=[0x0F, 0x01, 0xD9], cpu=["SVM"])
+
+add_group("invlpga",
+    cpu=["SVM"],
+    opcode=[0x0F, 0x01, 0xDF],
+    operands=[])
+add_group("invlpga",
+    cpu=["SVM"],
+    opcode=[0x0F, 0x01, 0xDF],
+    operands=[Operand(type="MemrAX", dest="AdSizeEA"),
+              Operand(type="Creg", size=32, dest=None)])
+
+add_insn("invlpga", "invlpga")
+
+add_group("skinit",
+    cpu=["SVM"],
+    opcode=[0x0F, 0x01, 0xDE],
+    operands=[])
+add_group("skinit",
+    cpu=["SVM"],
+    opcode=[0x0F, 0x01, 0xDE],
+    operands=[Operand(type="MemEAX", dest=None)])
+
+add_insn("skinit", "skinit")
+
+add_group("svm_rax",
+    cpu=["SVM"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x01, 0x00],
+    operands=[])
+add_group("svm_rax",
+    cpu=["SVM"],
+    modifiers=["Op2Add"],
+    opcode=[0x0F, 0x01, 0x00],
+    operands=[Operand(type="MemrAX", dest="AdSizeEA")])
+
+add_insn("vmload", "svm_rax", modifiers=[0xDA])
+add_insn("vmrun", "svm_rax", modifiers=[0xD8])
+add_insn("vmsave", "svm_rax", modifiers=[0xDB])
+
+#####################################################################
+# VIA PadLock instructions
+#####################################################################
+
+add_group("padlock",
+    cpu=["PadLock"],
+    modifiers=["Imm8", "PreAdd", "Op1Add"],
+    prefix=0x00,
+    opcode=[0x0F, 0x00],
+    operands=[])
+
+add_insn("xstore", "padlock", modifiers=[0xC0, 0x00, 0xA7])
+add_insn("xstorerng", "padlock", modifiers=[0xC0, 0x00, 0xA7])
+add_insn("xcryptecb", "padlock", modifiers=[0xC8, 0xF3, 0xA7])
+add_insn("xcryptcbc", "padlock", modifiers=[0xD0, 0xF3, 0xA7])
+add_insn("xcryptctr", "padlock", modifiers=[0xD8, 0xF3, 0xA7])
+add_insn("xcryptcfb", "padlock", modifiers=[0xE0, 0xF3, 0xA7])
+add_insn("xcryptofb", "padlock", modifiers=[0xE8, 0xF3, 0xA7])
+add_insn("montmul", "padlock", modifiers=[0xC0, 0xF3, 0xA6])
+add_insn("xsha1", "padlock", modifiers=[0xC8, 0xF3, 0xA6])
+add_insn("xsha256", "padlock", modifiers=[0xD0, 0xF3, 0xA6])
+
+#####################################################################
+# Cyrix MMX instructions
+#####################################################################
+
+add_group("cyrixmmx",
+    cpu=["MMX", "Cyrix"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="SIMDRM", size=64, relaxed=True, dest="EA")])
+
+add_insn("paddsiw", "cyrixmmx", modifiers=[0x51])
+add_insn("paveb", "cyrixmmx", modifiers=[0x50])
+add_insn("pdistib", "cyrixmmx", modifiers=[0x54])
+add_insn("pmagw", "cyrixmmx", modifiers=[0x52])
+add_insn("pmulhriw", "cyrixmmx", modifiers=[0x5D])
+add_insn("pmulhrwc", "cyrixmmx", modifiers=[0x59])
+add_insn("pmvgezb", "cyrixmmx", modifiers=[0x5C])
+add_insn("pmvlzb", "cyrixmmx", modifiers=[0x5B])
+add_insn("pmvnzb", "cyrixmmx", modifiers=[0x5A])
+add_insn("pmvzb", "cyrixmmx", modifiers=[0x58])
+add_insn("psubsiw", "cyrixmmx", modifiers=[0x55])
+
+add_group("pmachriw",
+    cpu=["MMX", "Cyrix"],
+    opcode=[0x0F, 0x5E],
+    operands=[Operand(type="SIMDReg", size=64, dest="Spare"),
+              Operand(type="Mem", size=64, relaxed=True, dest="EA")])
+
+add_insn("pmachriw", "pmachriw")
+
+#####################################################################
+# Cyrix extensions
+#####################################################################
+
+add_insn("smint", "twobyte", modifiers=[0x0F, 0x38], cpu=["686", "Cyrix"])
+add_insn("smintold", "twobyte", modifiers=[0x0F, 0x7E], cpu=["486", "Cyrix", "Obs"])
+
+add_group("rdwrshr",
+    cpu=["Cyrix", "SMM", "686"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x36],
+    operands=[Operand(type="RM", size=32, relaxed=True, dest="EA")])
+
+add_insn("rdshr", "rdwrshr", modifiers=[0x00])
+add_insn("wrshr", "rdwrshr", modifiers=[0x01])
+
+add_group("rsdc",
+    cpu=["Cyrix", "SMM", "486"],
+    opcode=[0x0F, 0x79],
+    operands=[Operand(type="SegReg", size=16, relaxed=True, dest="Spare"),
+              Operand(type="Mem", size=80, relaxed=True, dest="EA")])
+
+add_insn("rsdc", "rsdc")
+
+add_group("cyrixsmm",
+    cpu=["Cyrix", "SMM", "486"],
+    modifiers=["Op1Add"],
+    opcode=[0x0F, 0x00],
+    operands=[Operand(type="Mem", size=80, relaxed=True, dest="EA")])
+
+add_insn("rsldt", "cyrixsmm", modifiers=[0x7B])
+add_insn("rsts", "cyrixsmm", modifiers=[0x7D])
+add_insn("svldt", "cyrixsmm", modifiers=[0x7A])
+add_insn("svts", "cyrixsmm", modifiers=[0x7C])
+
+add_group("svdc",
+    cpu=["Cyrix", "SMM", "486"],
+    opcode=[0x0F, 0x78],
+    operands=[Operand(type="Mem", size=80, relaxed=True, dest="EA"),
+              Operand(type="SegReg", size=16, relaxed=True, dest="Spare")])
+
+add_insn("svdc", "svdc")
+
+#####################################################################
+# Obsolete/undocumented instructions
+#####################################################################
+
+add_insn("fsetpm", "twobyte", modifiers=[0xDB, 0xE4], cpu=["286", "FPU", "Obs"])
+add_insn("loadall", "twobyte", modifiers=[0x0F, 0x07], cpu=["386", "Undoc"])
+add_insn("loadall286", "twobyte", modifiers=[0x0F, 0x05], cpu=["286", "Undoc"])
+add_insn("salc", "onebyte", modifiers=[0xD6], cpu=["Undoc", "Not64"])
+add_insn("smi", "onebyte", modifiers=[0xF1], cpu=["386", "Undoc"])
+
+add_group("ibts",
+    cpu=["Undoc", "Obs", "386"],
+    opersize=16,
+    opcode=[0x0F, 0xA7],
+    operands=[Operand(type="RM", size=16, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=16, dest="Spare")])
+add_group("ibts",
+    cpu=["Undoc", "Obs", "386"],
+    opersize=32,
+    opcode=[0x0F, 0xA7],
+    operands=[Operand(type="RM", size=32, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=32, dest="Spare")])
+
+add_insn("ibts", "ibts")
+
+add_group("umov",
+    cpu=["Undoc", "386"],
+    opcode=[0x0F, 0x10],
+    operands=[Operand(type="RM", size=8, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=8, dest="Spare")])
+add_group("umov",
+    cpu=["Undoc", "386"],
+    opersize=16,
+    opcode=[0x0F, 0x11],
+    operands=[Operand(type="RM", size=16, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=16, dest="Spare")])
+add_group("umov",
+    cpu=["Undoc", "386"],
+    opersize=32,
+    opcode=[0x0F, 0x11],
+    operands=[Operand(type="RM", size=32, relaxed=True, dest="EA"),
+              Operand(type="Reg", size=32, dest="Spare")])
+add_group("umov",
+    cpu=["Undoc", "386"],
+    opcode=[0x0F, 0x12],
+    operands=[Operand(type="Reg", size=8, dest="Spare"),
+              Operand(type="RM", size=8, relaxed=True, dest="EA")])
+add_group("umov",
+    cpu=["Undoc", "386"],
+    opersize=16,
+    opcode=[0x0F, 0x13],
+    operands=[Operand(type="Reg", size=16, dest="Spare"),
+              Operand(type="RM", size=16, relaxed=True, dest="EA")])
+add_group("umov",
+    cpu=["Undoc", "386"],
+    opersize=32,
+    opcode=[0x0F, 0x13],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="RM", size=32, relaxed=True, dest="EA")])
+
+add_insn("umov", "umov")
+
+add_group("xbts",
+    cpu=["Undoc", "Obs", "386"],
+    opersize=16,
+    opcode=[0x0F, 0xA6],
+    operands=[Operand(type="Reg", size=16, dest="Spare"),
+              Operand(type="Mem", size=16, relaxed=True, dest="EA")])
+add_group("xbts",
+    cpu=["Undoc", "Obs", "386"],
+    opersize=32,
+    opcode=[0x0F, 0xA6],
+    operands=[Operand(type="Reg", size=32, dest="Spare"),
+              Operand(type="Mem", size=32, relaxed=True, dest="EA")])
+
+add_insn("xbts", "xbts")
+
+finalize_insns()
+
+#####################################################################
+# Prefixes
+#####################################################################
+# operand size overrides
+for sz in [16, 32, 64]:
+    add_prefix("o%d" % sz, "OPERSIZE", sz, parser="nasm", only64=(sz==64))
+    add_prefix("data%d" % sz, "OPERSIZE", sz, parser="gas", only64=(sz==64))
+add_prefix("word",      "OPERSIZE", 16, parser="gas")
+add_prefix("dword",     "OPERSIZE", 32, parser="gas")
+add_prefix("qword",     "OPERSIZE", 64, parser="gas", only64=True)
+
+# address size overrides
+for sz in [16, 32, 64]:
+    add_prefix("a%d" % sz, "ADDRSIZE", sz, parser="nasm", only64=(sz==64))
+    add_prefix("addr%d" % sz, "ADDRSIZE", sz, parser="gas", only64=(sz==64))
+add_prefix("aword",     "ADDRSIZE", 16, parser="gas")
+add_prefix("adword",    "ADDRSIZE", 32, parser="gas")
+add_prefix("aqword",    "ADDRSIZE", 64, parser="gas", only64=True)
+
+# instruction prefixes
+add_prefix("lock",      "LOCKREP",  0xF0)
+add_prefix("repne",     "LOCKREP",  0xF2)
+add_prefix("repnz",     "LOCKREP",  0xF2)
+add_prefix("rep",       "LOCKREP",  0xF3)
+add_prefix("repe",      "LOCKREP",  0xF3)
+add_prefix("repz",      "LOCKREP",  0xF3)
+
+# other prefixes, limited to GAS-only at the moment
+# Hint taken/not taken for jumps
+add_prefix("ht",        "SEGREG",   0x3E, parser="gas")
+add_prefix("hnt",       "SEGREG",   0x2E, parser="gas")
+
+# REX byte explicit prefixes
+for val, suf in enumerate(["", "z", "y", "yz", "x", "xz", "xy", "xyz"]):
+    add_prefix("rex" + suf, "REX", 0x40+val, parser="gas", only64=True)
+    add_prefix("rex64" + suf, "REX", 0x48+val, parser="gas", only64=True)
+
+#####################################################################
+# Output generation
+#####################################################################
+
+output_groups(file("x86insns.c", "wt"))
+output_gas_insns(file("x86insn_gas.gperf", "wt"))
+output_nasm_insns(file("x86insn_nasm.gperf", "wt"))
diff --git a/modules/arch/x86/tests/Makefile.inc b/modules/arch/x86/tests/Makefile.inc
index 3fc08da..67e8b5c 100644
--- a/modules/arch/x86/tests/Makefile.inc
+++ b/modules/arch/x86/tests/Makefile.inc
@@ -84,6 +84,8 @@
 EXTRA_DIST += modules/arch/x86/tests/mem64hi32.hex
 EXTRA_DIST += modules/arch/x86/tests/mem64rip.asm
 EXTRA_DIST += modules/arch/x86/tests/mem64rip.hex
+EXTRA_DIST += modules/arch/x86/tests/mixcase.asm
+EXTRA_DIST += modules/arch/x86/tests/mixcase.hex
 EXTRA_DIST += modules/arch/x86/tests/movdq32.asm
 EXTRA_DIST += modules/arch/x86/tests/movdq32.hex
 EXTRA_DIST += modules/arch/x86/tests/movdq64.asm
@@ -124,6 +126,11 @@
 EXTRA_DIST += modules/arch/x86/tests/rep.hex
 EXTRA_DIST += modules/arch/x86/tests/ret.asm
 EXTRA_DIST += modules/arch/x86/tests/ret.hex
+EXTRA_DIST += modules/arch/x86/tests/riprel1.asm
+EXTRA_DIST += modules/arch/x86/tests/riprel1.hex
+EXTRA_DIST += modules/arch/x86/tests/riprel2.asm
+EXTRA_DIST += modules/arch/x86/tests/riprel2.errwarn
+EXTRA_DIST += modules/arch/x86/tests/riprel2.hex
 EXTRA_DIST += modules/arch/x86/tests/segmov.asm
 EXTRA_DIST += modules/arch/x86/tests/segmov.hex
 EXTRA_DIST += modules/arch/x86/tests/shift.asm
@@ -144,6 +151,12 @@
 EXTRA_DIST += modules/arch/x86/tests/sse4.hex
 EXTRA_DIST += modules/arch/x86/tests/sse4-err.asm
 EXTRA_DIST += modules/arch/x86/tests/sse4-err.errwarn
+EXTRA_DIST += modules/arch/x86/tests/sse5-all.asm
+EXTRA_DIST += modules/arch/x86/tests/sse5-all.hex
+EXTRA_DIST += modules/arch/x86/tests/sse5-basic.asm
+EXTRA_DIST += modules/arch/x86/tests/sse5-basic.hex
+EXTRA_DIST += modules/arch/x86/tests/sse5-err.asm
+EXTRA_DIST += modules/arch/x86/tests/sse5-err.errwarn
 EXTRA_DIST += modules/arch/x86/tests/ssse3.asm
 EXTRA_DIST += modules/arch/x86/tests/ssse3.c
 EXTRA_DIST += modules/arch/x86/tests/ssse3.hex
diff --git a/modules/arch/x86/tests/mixcase.asm b/modules/arch/x86/tests/mixcase.asm
new file mode 100644
index 0000000..4964e72
--- /dev/null
+++ b/modules/arch/x86/tests/mixcase.asm
@@ -0,0 +1,3 @@
+CPU SSE5
+MOV AX,5
+
diff --git a/modules/arch/x86/tests/mixcase.hex b/modules/arch/x86/tests/mixcase.hex
new file mode 100644
index 0000000..76dc00b
--- /dev/null
+++ b/modules/arch/x86/tests/mixcase.hex
@@ -0,0 +1,3 @@
+b8 
+05 
+00 
diff --git a/modules/arch/x86/tests/riprel1.asm b/modules/arch/x86/tests/riprel1.asm
new file mode 100644
index 0000000..b1fdbec
--- /dev/null
+++ b/modules/arch/x86/tests/riprel1.asm
@@ -0,0 +1,66 @@
+bits 64
+val:
+default abs
+
+mov rax, val			; 32-bit imm
+mov rax, dword val		; 32-bit imm
+mov rax, qword val		; 64-bit imm
+
+mov rbx, val			; 32-bit imm
+mov rbx, dword val		; 32-bit imm
+mov rbx, qword val		; 64-bit imm
+
+mov rax, [val]			; 48 8b ... (32-bit disp)
+mov rax, [dword val]		; 48 8b ... (32-bit disp)
+mov rax, [qword val]		; 48 a1 ... (64-bit disp)
+a32 mov rax, [val]		; 67 48 a1 ... (32-bit disp)
+a32 mov rax, [dword val]	; 67 48 a1 ... (32-bit disp)
+a32 mov rax, [qword val]	; 67 48 a1 ... (32-bit disp)
+				; [this one is debatable on correctness,
+				; I chose in yasm to make a32 override]
+a64 mov rax, [val]		; 48 8b ... (32-bit disp)
+a64 mov rax, [dword val]	; 48 8b ... (32-bit disp)
+a64 mov rax, [qword val]	; 48 a1 ... (64-bit disp)
+
+mov rbx, [val]			; 48 8b ... (32-bit disp)
+mov rbx, [dword val]		; 48 8b ... (32-bit disp)
+;mov rbx, [qword val]		; illegal (can't have 64-bit disp)
+a32 mov rbx, [val]		; 67 48 8b ... (32-bit disp)
+a32 mov rbx, [dword val]	; 67 48 8b ... (32-bit disp)
+;a32 mov rbx, [qword val]	; illegal (can't have 64-bit disp)
+a64 mov rbx, [val]		; 48 8b ... (32-bit disp)
+a64 mov rbx, [dword val]	; 48 8b ... (32-bit disp)
+;a64 mov rbx, [qword val]	; illegal (can't have 64-bit disp)
+
+default rel
+
+mov rax, val			; 32-bit imm
+mov rax, dword val		; 32-bit imm
+mov rax, qword val		; 64-bit imm
+
+mov rbx, val			; 32-bit imm
+mov rbx, dword val		; 32-bit imm
+mov rbx, qword val		; 64-bit imm
+
+mov rax, [val]			; 48 8b ... (32-bit disp, RIP-rel)
+mov rax, [dword val]		; 48 8b ... (32-bit disp, RIP-rel)
+mov rax, [qword val]		; 48 a1 ... (64-bit disp, ABS)
+a32 mov rax, [val]		; 67 48 8b ... (32-bit disp, RIP-rel)
+a32 mov rax, [dword val]	; 67 48 8b ... (32-bit disp, RIP-rel)
+a32 mov rax, [qword val]	; 67 48 a1 ... (32-bit disp, ABS)
+				; [this one is debatable on correctness,
+				; I chose in yasm to make a32 override]
+a64 mov rax, [val]		; 48 8b ... (32-bit disp, RIP-rel)
+a64 mov rax, [dword val]	; 48 8b ... (32-bit disp, RIP-rel)
+a64 mov rax, [qword val]	; 48 a1 ... (64-bit disp, ABS)
+
+mov rbx, [val]			; 48 8b ... (32-bit disp, RIP-rel)
+mov rbx, [dword val]		; 48 8b ... (32-bit disp, RIP-rel)
+;mov rbx, [qword val]		; illegal (can't have 64-bit disp)
+a32 mov rbx, [val]		; 67 48 8b ... (32-bit disp, RIP-rel)
+a32 mov rbx, [dword val]	; 67 48 8b ... (32-bit disp, RIP-rel)
+;a32 mov rbx, [qword val]	; illegal (can't have 64-bit disp)
+a64 mov rbx, [val]		; 48 8b ... (32-bit disp, RIP-rel)
+a64 mov rbx, [dword val]	; 48 8b ... (32-bit disp, RIP-rel)
+;a64 mov rbx, [qword val]	; illegal (can't have 64-bit disp)
+
diff --git a/modules/arch/x86/tests/riprel1.hex b/modules/arch/x86/tests/riprel1.hex
new file mode 100644
index 0000000..78f28bf
--- /dev/null
+++ b/modules/arch/x86/tests/riprel1.hex
@@ -0,0 +1,334 @@
+48 
+c7 
+c0 
+00 
+00 
+00 
+00 
+48 
+c7 
+c0 
+00 
+00 
+00 
+00 
+48 
+b8 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+48 
+c7 
+c3 
+00 
+00 
+00 
+00 
+48 
+c7 
+c3 
+00 
+00 
+00 
+00 
+48 
+bb 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+48 
+8b 
+04 
+25 
+00 
+00 
+00 
+00 
+48 
+8b 
+04 
+25 
+00 
+00 
+00 
+00 
+48 
+a1 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+67 
+48 
+a1 
+00 
+00 
+00 
+00 
+67 
+48 
+a1 
+00 
+00 
+00 
+00 
+67 
+48 
+a1 
+00 
+00 
+00 
+00 
+48 
+8b 
+04 
+25 
+00 
+00 
+00 
+00 
+48 
+8b 
+04 
+25 
+00 
+00 
+00 
+00 
+48 
+a1 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+48 
+8b 
+1c 
+25 
+00 
+00 
+00 
+00 
+48 
+8b 
+1c 
+25 
+00 
+00 
+00 
+00 
+67 
+48 
+8b 
+1c 
+25 
+00 
+00 
+00 
+00 
+67 
+48 
+8b 
+1c 
+25 
+00 
+00 
+00 
+00 
+48 
+8b 
+1c 
+25 
+00 
+00 
+00 
+00 
+48 
+8b 
+1c 
+25 
+00 
+00 
+00 
+00 
+48 
+c7 
+c0 
+00 
+00 
+00 
+00 
+48 
+c7 
+c0 
+00 
+00 
+00 
+00 
+48 
+b8 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+48 
+c7 
+c3 
+00 
+00 
+00 
+00 
+48 
+c7 
+c3 
+00 
+00 
+00 
+00 
+48 
+bb 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+48 
+8b 
+05 
+1e 
+ff 
+ff 
+ff 
+48 
+8b 
+05 
+17 
+ff 
+ff 
+ff 
+48 
+a1 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+67 
+48 
+8b 
+05 
+05 
+ff 
+ff 
+ff 
+67 
+48 
+8b 
+05 
+fd 
+fe 
+ff 
+ff 
+67 
+48 
+a1 
+00 
+00 
+00 
+00 
+48 
+8b 
+05 
+ef 
+fe 
+ff 
+ff 
+48 
+8b 
+05 
+e8 
+fe 
+ff 
+ff 
+48 
+a1 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+00 
+48 
+8b 
+1d 
+d7 
+fe 
+ff 
+ff 
+48 
+8b 
+1d 
+d0 
+fe 
+ff 
+ff 
+67 
+48 
+8b 
+1d 
+c8 
+fe 
+ff 
+ff 
+67 
+48 
+8b 
+1d 
+c0 
+fe 
+ff 
+ff 
+48 
+8b 
+1d 
+b9 
+fe 
+ff 
+ff 
+48 
+8b 
+1d 
+b2 
+fe 
+ff 
+ff 
diff --git a/modules/arch/x86/tests/riprel2.asm b/modules/arch/x86/tests/riprel2.asm
new file mode 100644
index 0000000..813d9e5
--- /dev/null
+++ b/modules/arch/x86/tests/riprel2.asm
@@ -0,0 +1,110 @@
+	bits 64
+
+	default abs	; default abs, except for explicit rel
+
+	mov rax,[foo]
+	mov rax,[qword 123456789abcdef0h]
+	mov rbx,[foo]
+	mov rax,[dword foo]
+	mov rbx,[dword foo]
+	mov rax,[qword foo]
+	mov rax,[rel foo]		; rel
+	mov rbx,[rel foo]		; rel
+	mov rax,[rel dword foo]		; rel
+	;mov rax,[rel qword foo]	; illegal
+	mov rax,[abs foo]
+	mov rbx,[abs foo]
+	mov rax,[abs dword foo]
+	mov rax,[abs qword foo]
+
+	mov rax,[es:foo]
+	mov rax,[qword es:123456789abcdef0h]
+	mov rbx,[es:foo]
+	mov rax,[dword es:foo]
+	mov rbx,[dword es:foo]
+	mov rax,[qword es:foo]
+	mov rax,[rel es:foo]		; rel
+	mov rbx,[rel es:foo]		; rel
+	mov rax,[rel dword es:foo]	; rel
+	;mov rax,[rel qword es:foo]	; illegal
+	mov rax,[abs es:foo]
+	mov rbx,[abs es:foo]
+	mov rax,[abs dword es:foo]
+	mov rax,[abs qword es:foo]
+
+	mov rax,[fs:foo]
+	mov rax,[qword fs:123456789abcdef0h]
+	mov rbx,[fs:foo]
+	mov rax,[dword fs:foo]
+	mov rbx,[dword fs:foo]
+	mov rax,[qword fs:foo]
+	mov rax,[rel fs:foo]		; rel
+	mov rbx,[rel fs:foo]		; rel
+	mov rax,[rel dword fs:foo]	; rel
+	;mov rax,[rel qword fs:foo]	; illegal
+	mov rax,[abs fs:foo]
+	mov rbx,[abs fs:foo]
+	mov rax,[abs dword fs:foo]
+	mov rax,[abs qword fs:foo]
+
+	mov rax,[rbx]
+	mov rax,[rel rbx]
+	mov rax,[abs rbx]
+
+	default rel
+
+	; all of these are default rel, except for 64-bit displacements
+	mov rax,[foo]
+	mov rax,[qword 123456789abcdef0h]	; abs
+	mov rbx,[foo]
+	mov rax,[dword foo]
+	mov rbx,[dword foo]
+	mov rax,[qword foo]		; abs
+	mov rax,[rel foo]
+	mov rbx,[rel foo]
+	mov rax,[rel dword foo]
+	;mov rax,[rel qword foo]	; illegal
+	mov rax,[abs foo]
+	mov rbx,[abs foo]
+	mov rax,[abs dword foo]
+	mov rax,[abs qword foo]
+
+	; all of these are abs due to es:, except for explicit rel
+	mov rax,[es:foo]
+	mov rax,[qword es:123456789abcdef0h]
+	mov rbx,[es:foo]
+	mov rax,[dword es:foo]
+	mov rbx,[dword es:foo]
+	mov rax,[qword es:foo]
+	mov rax,[rel es:foo]		; rel
+	mov rbx,[rel es:foo]		; rel
+	mov rax,[rel dword es:foo]	; rel
+	;mov rax,[rel qword es:foo]	; illegal
+	mov rax,[abs es:foo]
+	mov rbx,[abs es:foo]
+	mov rax,[abs dword es:foo]
+	mov rax,[abs qword es:foo]
+
+	; all of these are abs due to fs:, except for explicit rel
+	mov rax,[fs:foo]
+	mov rax,[qword fs:123456789abcdef0h]
+	mov rbx,[fs:foo]
+	mov rax,[dword fs:foo]
+	mov rbx,[dword fs:foo]
+	mov rax,[qword fs:foo]
+	mov rax,[rel fs:foo]		; rel
+	mov rbx,[rel fs:foo]		; rel
+	mov rax,[rel dword fs:foo]	; rel
+	;mov rax,[rel qword fs:foo]	; illegal
+	mov rax,[abs fs:foo]
+	mov rbx,[abs fs:foo]
+	mov rax,[abs dword fs:foo]
+	mov rax,[abs qword fs:foo]
+
+	mov rax,[rbx]
+	mov rax,[rel rbx]
+	mov rax,[abs rbx]
+
+	section .data
+foo	equ $
+	
diff --git a/modules/arch/x86/tests/riprel2.errwarn b/modules/arch/x86/tests/riprel2.errwarn
new file mode 100644
index 0000000..b219dc7
--- /dev/null
+++ b/modules/arch/x86/tests/riprel2.errwarn
@@ -0,0 +1,26 @@
+-:20: warning: `es' segment register ignored in 64-bit mode
+-:21: warning: `es' segment register ignored in 64-bit mode
+-:22: warning: `es' segment register ignored in 64-bit mode
+-:23: warning: `es' segment register ignored in 64-bit mode
+-:24: warning: `es' segment register ignored in 64-bit mode
+-:25: warning: `es' segment register ignored in 64-bit mode
+-:26: warning: `es' segment register ignored in 64-bit mode
+-:27: warning: `es' segment register ignored in 64-bit mode
+-:28: warning: `es' segment register ignored in 64-bit mode
+-:30: warning: `es' segment register ignored in 64-bit mode
+-:31: warning: `es' segment register ignored in 64-bit mode
+-:32: warning: `es' segment register ignored in 64-bit mode
+-:33: warning: `es' segment register ignored in 64-bit mode
+-:73: warning: `es' segment register ignored in 64-bit mode
+-:74: warning: `es' segment register ignored in 64-bit mode
+-:75: warning: `es' segment register ignored in 64-bit mode
+-:76: warning: `es' segment register ignored in 64-bit mode
+-:77: warning: `es' segment register ignored in 64-bit mode
+-:78: warning: `es' segment register ignored in 64-bit mode
+-:79: warning: `es' segment register ignored in 64-bit mode
+-:80: warning: `es' segment register ignored in 64-bit mode
+-:81: warning: `es' segment register ignored in 64-bit mode
+-:83: warning: `es' segment register ignored in 64-bit mode
+-:84: warning: `es' segment register ignored in 64-bit mode
+-:85: warning: `es' segment register ignored in 64-bit mode
+-:86: warning: `es' segment register ignored in 64-bit mode
diff --git a/modules/arch/x86/tests/riprel2.hex b/modules/arch/x86/tests/riprel2.hex
new file mode 100644
index 0000000..5d9a918
--- /dev/null
+++ b/modules/arch/x86/tests/riprel2.hex
@@ -0,0 +1,708 @@
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+48 
+a1 
+f0 
+de 
+bc 
+9a 
+78 
+56 
+34 
+12 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+48 
+a1 
+c4 
+02 
+00 
+00 
+00 
+00 
+00 
+00 
+48 
+8b 
+05 
+89 
+02 
+00 
+00 
+48 
+8b 
+1d 
+82 
+02 
+00 
+00 
+48 
+8b 
+05 
+7b 
+02 
+00 
+00 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+48 
+a1 
+c4 
+02 
+00 
+00 
+00 
+00 
+00 
+00 
+26 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+a1 
+f0 
+de 
+bc 
+9a 
+78 
+56 
+34 
+12 
+26 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+a1 
+c4 
+02 
+00 
+00 
+00 
+00 
+00 
+00 
+26 
+48 
+8b 
+05 
+17 
+02 
+00 
+00 
+26 
+48 
+8b 
+1d 
+0f 
+02 
+00 
+00 
+26 
+48 
+8b 
+05 
+07 
+02 
+00 
+00 
+26 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+a1 
+c4 
+02 
+00 
+00 
+00 
+00 
+00 
+00 
+64 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+a1 
+f0 
+de 
+bc 
+9a 
+78 
+56 
+34 
+12 
+64 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+a1 
+c4 
+02 
+00 
+00 
+00 
+00 
+00 
+00 
+64 
+48 
+8b 
+05 
+9f 
+01 
+00 
+00 
+64 
+48 
+8b 
+1d 
+97 
+01 
+00 
+00 
+64 
+48 
+8b 
+05 
+8f 
+01 
+00 
+00 
+64 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+a1 
+c4 
+02 
+00 
+00 
+00 
+00 
+00 
+00 
+48 
+8b 
+03 
+48 
+8b 
+03 
+48 
+8b 
+03 
+48 
+8b 
+05 
+59 
+01 
+00 
+00 
+48 
+a1 
+f0 
+de 
+bc 
+9a 
+78 
+56 
+34 
+12 
+48 
+8b 
+1d 
+48 
+01 
+00 
+00 
+48 
+8b 
+05 
+41 
+01 
+00 
+00 
+48 
+8b 
+1d 
+3a 
+01 
+00 
+00 
+48 
+a1 
+c4 
+02 
+00 
+00 
+00 
+00 
+00 
+00 
+48 
+8b 
+05 
+29 
+01 
+00 
+00 
+48 
+8b 
+1d 
+22 
+01 
+00 
+00 
+48 
+8b 
+05 
+1b 
+01 
+00 
+00 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+48 
+a1 
+c4 
+02 
+00 
+00 
+00 
+00 
+00 
+00 
+26 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+a1 
+f0 
+de 
+bc 
+9a 
+78 
+56 
+34 
+12 
+26 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+a1 
+c4 
+02 
+00 
+00 
+00 
+00 
+00 
+00 
+26 
+48 
+8b 
+05 
+b7 
+00 
+00 
+00 
+26 
+48 
+8b 
+1d 
+af 
+00 
+00 
+00 
+26 
+48 
+8b 
+05 
+a7 
+00 
+00 
+00 
+26 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+26 
+48 
+a1 
+c4 
+02 
+00 
+00 
+00 
+00 
+00 
+00 
+64 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+a1 
+f0 
+de 
+bc 
+9a 
+78 
+56 
+34 
+12 
+64 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+a1 
+c4 
+02 
+00 
+00 
+00 
+00 
+00 
+00 
+64 
+48 
+8b 
+05 
+3f 
+00 
+00 
+00 
+64 
+48 
+8b 
+1d 
+37 
+00 
+00 
+00 
+64 
+48 
+8b 
+05 
+2f 
+00 
+00 
+00 
+64 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+8b 
+1c 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+8b 
+04 
+25 
+c4 
+02 
+00 
+00 
+64 
+48 
+a1 
+c4 
+02 
+00 
+00 
+00 
+00 
+00 
+00 
+48 
+8b 
+03 
+48 
+8b 
+03 
+48 
+8b 
+03 
diff --git a/modules/arch/x86/tests/sse5-all.asm b/modules/arch/x86/tests/sse5-all.asm
new file mode 100644
index 0000000..c4fdd6d
--- /dev/null
+++ b/modules/arch/x86/tests/sse5-all.asm
@@ -0,0 +1,509 @@
+; Instructions are ordered in SSE5 databook order
+; BITS=16 to minimize output length
+[bits 16]
+compd xmm1, xmm4, xmm7, 5		; 0F 25 2D 347 10 05
+compd xmm2, xmm5, [0], byte 5		; 0F 25 2D 056 20 00 00 05
+compd xmm3, xmm6, dqword [0], 5		; 0F 25 2D 066 30 00 00 05
+
+comps xmm1, xmm4, xmm7, 5		; 0F 25 2C 347 10 05
+comps xmm2, xmm5, [0], byte 5		; 0F 25 2C 056 20 00 00 05
+comps xmm3, xmm6, dqword [0], 5		; 0F 25 2C 066 30 00 00 05
+
+comsd xmm1, xmm4, xmm7, 5		; 0F 25 2F 347 10 05
+comsd xmm2, xmm5, [0], byte 5		; 0F 25 2F 056 20 00 00 05
+comsd xmm3, xmm6, qword [0], 5		; 0F 25 2F 066 30 00 00 05
+
+comss xmm1, xmm4, xmm7, 5		; 0F 25 2E 347 10 05
+comss xmm2, xmm5, [0], byte 5		; 0F 25 2E 056 20 00 00 05
+comss xmm3, xmm6, dword [0], 5		; 0F 25 2E 066 30 00 00 05
+
+cvtph2ps xmm1, xmm4			; 0F 7A 30 314
+cvtph2ps xmm2, [0]			; 0F 7A 30 026 00 00
+cvtph2ps xmm3, qword [0]		; 0F 7A 30 036 00 00
+
+cvtps2ph xmm1, xmm4			; 0F 7A 31 341
+cvtps2ph [0], xmm2			; 0F 7A 31 026 00 00
+cvtps2ph qword [0], xmm3		; 0F 7A 31 036 00 00
+
+fmaddpd xmm1, xmm1, xmm2, xmm3		; 0F 24 01 323 10 /or/ 0F 24 01 332 18
+fmaddpd xmm1, xmm1, xmm2, [0]		; 0F 24 01 026 10 00 00
+fmaddpd xmm1, xmm1, xmm2, dqword [0]	; 0F 24 01 026 10 00 00
+fmaddpd xmm1, xmm1, [0], xmm3		; 0F 24 01 036 18 00 00
+fmaddpd xmm1, xmm1, dqword [0], xmm3	; 0F 24 01 036 18 00 00
+fmaddpd xmm1, xmm2, xmm3, xmm1		; 0F 24 05 323 10 /or/ 0F 24 05 332 18
+fmaddpd xmm1, xmm2, [0], xmm1		; 0F 24 05 026 10 00 00
+fmaddpd xmm1, xmm2, dqword [0], xmm1	; 0F 24 05 026 10 00 00
+fmaddpd xmm1, [0], xmm3, xmm1		; 0F 24 05 036 18 00 00
+fmaddpd xmm1, dqword [0], xmm3, xmm1	; 0F 24 05 036 18 00 00
+
+fmaddps xmm1, xmm1, xmm2, xmm3		; 0F 24 00 323 10 /or/ 0F 24 00 332 18
+fmaddps xmm1, xmm1, xmm2, [0]		; 0F 24 00 026 10 00 00
+fmaddps xmm1, xmm1, xmm2, dqword [0]	; 0F 24 00 026 10 00 00
+fmaddps xmm1, xmm1, [0], xmm3		; 0F 24 00 036 18 00 00
+fmaddps xmm1, xmm1, dqword [0], xmm3	; 0F 24 00 036 18 00 00
+fmaddps xmm1, xmm2, xmm3, xmm1		; 0F 24 04 323 10 /or/ 0F 24 04 332 18
+fmaddps xmm1, xmm2, [0], xmm1		; 0F 24 04 026 10 00 00
+fmaddps xmm1, xmm2, dqword [0], xmm1	; 0F 24 04 026 10 00 00
+fmaddps xmm1, [0], xmm3, xmm1		; 0F 24 04 036 18 00 00
+fmaddps xmm1, dqword [0], xmm3, xmm1	; 0F 24 04 036 18 00 00
+
+fmaddsd xmm1, xmm1, xmm2, xmm3		; 0F 24 03 323 10 /or/ 0F 24 03 332 18
+fmaddsd xmm1, xmm1, xmm2, [0]		; 0F 24 03 026 10 00 00
+fmaddsd xmm1, xmm1, xmm2, qword [0]	; 0F 24 03 026 10 00 00
+fmaddsd xmm1, xmm1, [0], xmm3		; 0F 24 03 036 18 00 00
+fmaddsd xmm1, xmm1, qword [0], xmm3	; 0F 24 03 036 18 00 00
+fmaddsd xmm1, xmm2, xmm3, xmm1		; 0F 24 07 323 10 /or/ 0F 24 07 332 18
+fmaddsd xmm1, xmm2, [0], xmm1		; 0F 24 07 026 10 00 00
+fmaddsd xmm1, xmm2, qword [0], xmm1	; 0F 24 07 026 10 00 00
+fmaddsd xmm1, [0], xmm3, xmm1		; 0F 24 07 036 18 00 00
+fmaddsd xmm1, qword [0], xmm3, xmm1	; 0F 24 07 036 18 00 00
+
+fmaddss xmm1, xmm1, xmm2, xmm3		; 0F 24 02 323 10 /or/ 0F 24 02 332 18
+fmaddss xmm1, xmm1, xmm2, [0]		; 0F 24 02 026 10 00 00
+fmaddss xmm1, xmm1, xmm2, dword [0]	; 0F 24 02 026 10 00 00
+fmaddss xmm1, xmm1, [0], xmm3		; 0F 24 02 036 18 00 00
+fmaddss xmm1, xmm1, dword [0], xmm3	; 0F 24 02 036 18 00 00
+fmaddss xmm1, xmm2, xmm3, xmm1		; 0F 24 06 323 10 /or/ 0F 24 06 332 18
+fmaddss xmm1, xmm2, [0], xmm1		; 0F 24 06 026 10 00 00
+fmaddss xmm1, xmm2, dword [0], xmm1	; 0F 24 06 026 10 00 00
+fmaddss xmm1, [0], xmm3, xmm1		; 0F 24 06 036 18 00 00
+fmaddss xmm1, dword [0], xmm3, xmm1	; 0F 24 06 036 18 00 00
+
+fmsubpd xmm1, xmm1, xmm2, xmm3		; 0F 24 09 323 10 /or/ 0F 24 09 332 18
+fmsubpd xmm1, xmm1, xmm2, [0]		; 0F 24 09 026 10 00 00
+fmsubpd xmm1, xmm1, xmm2, dqword [0]	; 0F 24 09 026 10 00 00
+fmsubpd xmm1, xmm1, [0], xmm3		; 0F 24 09 036 18 00 00
+fmsubpd xmm1, xmm1, dqword [0], xmm3	; 0F 24 09 036 18 00 00
+fmsubpd xmm1, xmm2, xmm3, xmm1		; 0F 24 0D 323 10 /or/ 0F 24 0D 332 18
+fmsubpd xmm1, xmm2, [0], xmm1		; 0F 24 0D 026 10 00 00
+fmsubpd xmm1, xmm2, dqword [0], xmm1	; 0F 24 0D 026 10 00 00
+fmsubpd xmm1, [0], xmm3, xmm1		; 0F 24 0D 036 18 00 00
+fmsubpd xmm1, dqword [0], xmm3, xmm1	; 0F 24 0D 036 18 00 00
+
+fmsubps xmm1, xmm1, xmm2, xmm3		; 0F 24 08 323 10 /or/ 0F 24 08 332 18
+fmsubps xmm1, xmm1, xmm2, [0]		; 0F 24 08 026 10 00 00
+fmsubps xmm1, xmm1, xmm2, dqword [0]	; 0F 24 08 026 10 00 00
+fmsubps xmm1, xmm1, [0], xmm3		; 0F 24 08 036 18 00 00
+fmsubps xmm1, xmm1, dqword [0], xmm3	; 0F 24 08 036 18 00 00
+fmsubps xmm1, xmm2, xmm3, xmm1		; 0F 24 0C 323 10 /or/ 0F 24 0C 332 18
+fmsubps xmm1, xmm2, [0], xmm1		; 0F 24 0C 026 10 00 00
+fmsubps xmm1, xmm2, dqword [0], xmm1	; 0F 24 0C 026 10 00 00
+fmsubps xmm1, [0], xmm3, xmm1		; 0F 24 0C 036 18 00 00
+fmsubps xmm1, dqword [0], xmm3, xmm1	; 0F 24 0C 036 18 00 00
+
+fmsubsd xmm1, xmm1, xmm2, xmm3		; 0F 24 0B 323 10 /or/ 0F 24 0B 332 18
+fmsubsd xmm1, xmm1, xmm2, [0]		; 0F 24 0B 026 10 00 00
+fmsubsd xmm1, xmm1, xmm2, qword [0]	; 0F 24 0B 026 10 00 00
+fmsubsd xmm1, xmm1, [0], xmm3		; 0F 24 0B 036 18 00 00
+fmsubsd xmm1, xmm1, qword [0], xmm3	; 0F 24 0B 036 18 00 00
+fmsubsd xmm1, xmm2, xmm3, xmm1		; 0F 24 0F 323 10 /or/ 0F 24 0F 332 18
+fmsubsd xmm1, xmm2, [0], xmm1		; 0F 24 0F 026 10 00 00
+fmsubsd xmm1, xmm2, qword [0], xmm1	; 0F 24 0F 026 10 00 00
+fmsubsd xmm1, [0], xmm3, xmm1		; 0F 24 0F 036 18 00 00
+fmsubsd xmm1, qword [0], xmm3, xmm1	; 0F 24 0F 036 18 00 00
+
+fmsubss xmm1, xmm1, xmm2, xmm3		; 0F 24 0A 323 10 /or/ 0F 24 0A 332 18
+fmsubss xmm1, xmm1, xmm2, [0]		; 0F 24 0A 026 10 00 00
+fmsubss xmm1, xmm1, xmm2, dword [0]	; 0F 24 0A 026 10 00 00
+fmsubss xmm1, xmm1, [0], xmm3		; 0F 24 0A 036 18 00 00
+fmsubss xmm1, xmm1, dword [0], xmm3	; 0F 24 0A 036 18 00 00
+fmsubss xmm1, xmm2, xmm3, xmm1		; 0F 24 0E 323 10 /or/ 0F 24 0E 332 18
+fmsubss xmm1, xmm2, [0], xmm1		; 0F 24 0E 026 10 00 00
+fmsubss xmm1, xmm2, dword [0], xmm1	; 0F 24 0E 026 10 00 00
+fmsubss xmm1, [0], xmm3, xmm1		; 0F 24 0E 036 18 00 00
+fmsubss xmm1, dword [0], xmm3, xmm1	; 0F 24 0E 036 18 00 00
+
+fnmaddpd xmm1, xmm1, xmm2, xmm3		; 0F 24 11 323 10 /or/ 0F 24 11 332 18
+fnmaddpd xmm1, xmm1, xmm2, [0]		; 0F 24 11 026 10 00 00
+fnmaddpd xmm1, xmm1, xmm2, dqword [0]	; 0F 24 11 026 10 00 00
+fnmaddpd xmm1, xmm1, [0], xmm3		; 0F 24 11 036 18 00 00
+fnmaddpd xmm1, xmm1, dqword [0], xmm3	; 0F 24 11 036 18 00 00
+fnmaddpd xmm1, xmm2, xmm3, xmm1		; 0F 24 15 323 10 /or/ 0F 24 15 332 18
+fnmaddpd xmm1, xmm2, [0], xmm1		; 0F 24 15 026 10 00 00
+fnmaddpd xmm1, xmm2, dqword [0], xmm1	; 0F 24 15 026 10 00 00
+fnmaddpd xmm1, [0], xmm3, xmm1		; 0F 24 15 036 18 00 00
+fnmaddpd xmm1, dqword [0], xmm3, xmm1	; 0F 24 15 036 18 00 00
+
+fnmaddps xmm1, xmm1, xmm2, xmm3		; 0F 24 10 323 10 /or/ 0F 24 10 332 18
+fnmaddps xmm1, xmm1, xmm2, [0]		; 0F 24 10 026 10 00 00
+fnmaddps xmm1, xmm1, xmm2, dqword [0]	; 0F 24 10 026 10 00 00
+fnmaddps xmm1, xmm1, [0], xmm3		; 0F 24 10 036 18 00 00
+fnmaddps xmm1, xmm1, dqword [0], xmm3	; 0F 24 10 036 18 00 00
+fnmaddps xmm1, xmm2, xmm3, xmm1		; 0F 24 14 323 10 /or/ 0F 24 14 332 18
+fnmaddps xmm1, xmm2, [0], xmm1		; 0F 24 14 026 10 00 00
+fnmaddps xmm1, xmm2, dqword [0], xmm1	; 0F 24 14 026 10 00 00
+fnmaddps xmm1, [0], xmm3, xmm1		; 0F 24 14 036 18 00 00
+fnmaddps xmm1, dqword [0], xmm3, xmm1	; 0F 24 14 036 18 00 00
+
+fnmaddsd xmm1, xmm1, xmm2, xmm3		; 0F 24 13 323 10 /or/ 0F 24 13 332 18
+fnmaddsd xmm1, xmm1, xmm2, [0]		; 0F 24 13 026 10 00 00
+fnmaddsd xmm1, xmm1, xmm2, qword [0]	; 0F 24 13 026 10 00 00
+fnmaddsd xmm1, xmm1, [0], xmm3		; 0F 24 13 036 18 00 00
+fnmaddsd xmm1, xmm1, qword [0], xmm3	; 0F 24 13 036 18 00 00
+fnmaddsd xmm1, xmm2, xmm3, xmm1		; 0F 24 17 323 10 /or/ 0F 24 17 332 18
+fnmaddsd xmm1, xmm2, [0], xmm1		; 0F 24 17 026 10 00 00
+fnmaddsd xmm1, xmm2, qword [0], xmm1	; 0F 24 17 026 10 00 00
+fnmaddsd xmm1, [0], xmm3, xmm1		; 0F 24 17 036 18 00 00
+fnmaddsd xmm1, qword [0], xmm3, xmm1	; 0F 24 17 036 18 00 00
+
+fnmaddss xmm1, xmm1, xmm2, xmm3		; 0F 24 12 323 10 /or/ 0F 24 12 332 18
+fnmaddss xmm1, xmm1, xmm2, [0]		; 0F 24 12 026 10 00 00
+fnmaddss xmm1, xmm1, xmm2, dword [0]	; 0F 24 12 026 10 00 00
+fnmaddss xmm1, xmm1, [0], xmm3		; 0F 24 12 036 18 00 00
+fnmaddss xmm1, xmm1, dword [0], xmm3	; 0F 24 12 036 18 00 00
+fnmaddss xmm1, xmm2, xmm3, xmm1		; 0F 24 16 323 10 /or/ 0F 24 16 332 18
+fnmaddss xmm1, xmm2, [0], xmm1		; 0F 24 16 026 10 00 00
+fnmaddss xmm1, xmm2, dword [0], xmm1	; 0F 24 16 026 10 00 00
+fnmaddss xmm1, [0], xmm3, xmm1		; 0F 24 16 036 18 00 00
+fnmaddss xmm1, dword [0], xmm3, xmm1	; 0F 24 16 036 18 00 00
+
+fnmsubpd xmm1, xmm1, xmm2, xmm3		; 0F 24 19 323 10 /or/ 0F 24 19 332 18
+fnmsubpd xmm1, xmm1, xmm2, [0]		; 0F 24 19 026 10 00 00
+fnmsubpd xmm1, xmm1, xmm2, dqword [0]	; 0F 24 19 026 10 00 00
+fnmsubpd xmm1, xmm1, [0], xmm3		; 0F 24 19 036 18 00 00
+fnmsubpd xmm1, xmm1, dqword [0], xmm3	; 0F 24 19 036 18 00 00
+fnmsubpd xmm1, xmm2, xmm3, xmm1		; 0F 24 1D 323 10 /or/ 0F 24 1D 332 18
+fnmsubpd xmm1, xmm2, [0], xmm1		; 0F 24 1D 026 10 00 00
+fnmsubpd xmm1, xmm2, dqword [0], xmm1	; 0F 24 1D 026 10 00 00
+fnmsubpd xmm1, [0], xmm3, xmm1		; 0F 24 1D 036 18 00 00
+fnmsubpd xmm1, dqword [0], xmm3, xmm1	; 0F 24 1D 036 18 00 00
+
+fnmsubps xmm1, xmm1, xmm2, xmm3		; 0F 24 18 323 10 /or/ 0F 24 18 332 18
+fnmsubps xmm1, xmm1, xmm2, [0]		; 0F 24 18 026 10 00 00
+fnmsubps xmm1, xmm1, xmm2, dqword [0]	; 0F 24 18 026 10 00 00
+fnmsubps xmm1, xmm1, [0], xmm3		; 0F 24 18 036 18 00 00
+fnmsubps xmm1, xmm1, dqword [0], xmm3	; 0F 24 18 036 18 00 00
+fnmsubps xmm1, xmm2, xmm3, xmm1		; 0F 24 1C 323 10 /or/ 0F 24 1C 332 18
+fnmsubps xmm1, xmm2, [0], xmm1		; 0F 24 1C 026 10 00 00
+fnmsubps xmm1, xmm2, dqword [0], xmm1	; 0F 24 1C 026 10 00 00
+fnmsubps xmm1, [0], xmm3, xmm1		; 0F 24 1C 036 18 00 00
+fnmsubps xmm1, dqword [0], xmm3, xmm1	; 0F 24 1C 036 18 00 00
+
+fnmsubsd xmm1, xmm1, xmm2, xmm3		; 0F 24 1B 323 10 /or/ 0F 24 1B 332 18
+fnmsubsd xmm1, xmm1, xmm2, [0]		; 0F 24 1B 026 10 00 00
+fnmsubsd xmm1, xmm1, xmm2, qword [0]	; 0F 24 1B 026 10 00 00
+fnmsubsd xmm1, xmm1, [0], xmm3		; 0F 24 1B 036 18 00 00
+fnmsubsd xmm1, xmm1, qword [0], xmm3	; 0F 24 1B 036 18 00 00
+fnmsubsd xmm1, xmm2, xmm3, xmm1		; 0F 24 1F 323 10 /or/ 0F 24 1F 332 18
+fnmsubsd xmm1, xmm2, [0], xmm1		; 0F 24 1F 026 10 00 00
+fnmsubsd xmm1, xmm2, qword [0], xmm1	; 0F 24 1F 026 10 00 00
+fnmsubsd xmm1, [0], xmm3, xmm1		; 0F 24 1F 036 18 00 00
+fnmsubsd xmm1, qword [0], xmm3, xmm1	; 0F 24 1F 036 18 00 00
+
+fnmsubss xmm1, xmm1, xmm2, xmm3		; 0F 24 1A 323 10 /or/ 0F 24 1A 332 18
+fnmsubss xmm1, xmm1, xmm2, [0]		; 0F 24 1A 026 10 00 00
+fnmsubss xmm1, xmm1, xmm2, dword [0]	; 0F 24 1A 026 10 00 00
+fnmsubss xmm1, xmm1, [0], xmm3		; 0F 24 1A 036 18 00 00
+fnmsubss xmm1, xmm1, dword [0], xmm3	; 0F 24 1A 036 18 00 00
+fnmsubss xmm1, xmm2, xmm3, xmm1		; 0F 24 1E 323 10 /or/ 0F 24 1E 332 18
+fnmsubss xmm1, xmm2, [0], xmm1		; 0F 24 1E 026 10 00 00
+fnmsubss xmm1, xmm2, dword [0], xmm1	; 0F 24 1E 026 10 00 00
+fnmsubss xmm1, [0], xmm3, xmm1		; 0F 24 1E 036 18 00 00
+fnmsubss xmm1, dword [0], xmm3, xmm1	; 0F 24 1E 036 18 00 00
+
+frczpd xmm1, xmm2			; 0F 7A 11 312
+frczpd xmm1, [0]			; 0F 7A 11 016 00 00
+frczpd xmm1, dqword [0]			; 0F 7A 11 016 00 00
+
+frczps xmm1, xmm2			; 0F 7A 10 312
+frczps xmm1, [0]			; 0F 7A 10 016 00 00
+frczps xmm1, dqword [0]			; 0F 7A 10 016 00 00
+
+frczsd xmm1, xmm2			; 0F 7A 13 312
+frczsd xmm1, [0]			; 0F 7A 13 016 00 00
+frczsd xmm1, qword [0]			; 0F 7A 13 016 00 00
+
+frczss xmm1, xmm2			; 0F 7A 12 312
+frczss xmm1, [0]			; 0F 7A 12 016 00 00
+frczss xmm1, dword [0]			; 0F 7A 12 016 00 00
+
+pcmov xmm1, xmm1, xmm2, xmm3		; 0F 24 22 323 10 /or/ 0F 24 22 332 18
+pcmov xmm1, xmm1, xmm2, [0]		; 0F 24 22 026 10 00 00
+pcmov xmm1, xmm1, xmm2, dqword [0]	; 0F 24 22 026 10 00 00
+pcmov xmm1, xmm1, [0], xmm3		; 0F 24 22 036 18 00 00
+pcmov xmm1, xmm1, dqword [0], xmm3	; 0F 24 22 036 18 00 00
+pcmov xmm1, xmm2, xmm3, xmm1		; 0F 24 26 323 10 /or/ 0F 24 26 332 18
+pcmov xmm1, xmm2, [0], xmm1		; 0F 24 26 026 10 00 00
+pcmov xmm1, xmm2, dqword [0], xmm1	; 0F 24 26 026 10 00 00
+pcmov xmm1, [0], xmm3, xmm1		; 0F 24 26 036 18 00 00
+pcmov xmm1, dqword [0], xmm3, xmm1	; 0F 24 26 036 18 00 00
+
+pcomb xmm1, xmm4, xmm7, 5		; 0F 25 4C 347 10 05
+pcomb xmm2, xmm5, [0], byte 5		; 0F 25 4C 056 20 00 00 05
+pcomb xmm3, xmm6, dqword [0], 5		; 0F 25 4C 066 30 00 00 05
+
+pcomd xmm1, xmm4, xmm7, 5		; 0F 25 4E 347 10 05
+pcomd xmm2, xmm5, [0], byte 5		; 0F 25 4E 056 20 00 00 05
+pcomd xmm3, xmm6, dqword [0], 5		; 0F 25 4E 066 30 00 00 05
+
+pcomq xmm1, xmm4, xmm7, 5		; 0F 25 4F 347 10 05
+pcomq xmm2, xmm5, [0], byte 5		; 0F 25 4F 056 20 00 00 05
+pcomq xmm3, xmm6, dqword [0], 5		; 0F 25 4F 066 30 00 00 05
+
+pcomub xmm1, xmm4, xmm7, 5		; 0F 25 6C 347 10 05
+pcomub xmm2, xmm5, [0], byte 5		; 0F 25 6C 056 20 00 00 05
+pcomub xmm3, xmm6, dqword [0], 5	; 0F 25 6C 066 30 00 00 05
+
+pcomud xmm1, xmm4, xmm7, 5		; 0F 25 6E 347 10 05
+pcomud xmm2, xmm5, [0], byte 5		; 0F 25 6E 056 20 00 00 05
+pcomud xmm3, xmm6, dqword [0], 5	; 0F 25 6E 066 30 00 00 05
+
+pcomuq xmm1, xmm4, xmm7, 5		; 0F 25 6F 347 10 05
+pcomuq xmm2, xmm5, [0], byte 5		; 0F 25 6F 056 20 00 00 05
+pcomuq xmm3, xmm6, dqword [0], 5	; 0F 25 6F 066 30 00 00 05
+
+pcomuw xmm1, xmm4, xmm7, 5		; 0F 25 6D 347 10 05
+pcomuw xmm2, xmm5, [0], byte 5		; 0F 25 6D 056 20 00 00 05
+pcomuw xmm3, xmm6, dqword [0], 5	; 0F 25 6D 066 30 00 00 05
+
+pcomw xmm1, xmm4, xmm7, 5		; 0F 25 4D 347 10 05
+pcomw xmm2, xmm5, [0], byte 5		; 0F 25 4D 056 20 00 00 05
+pcomw xmm3, xmm6, dqword [0], 5		; 0F 25 4D 066 30 00 00 05
+
+permpd xmm1, xmm1, xmm2, xmm3		; 0F 24 21 323 10 /or/ 0F 24 21 332 18
+permpd xmm1, xmm1, xmm2, [0]		; 0F 24 21 026 10 00 00
+permpd xmm1, xmm1, xmm2, dqword [0]	; 0F 24 21 026 10 00 00
+permpd xmm1, xmm1, [0], xmm3		; 0F 24 21 036 18 00 00
+permpd xmm1, xmm1, dqword [0], xmm3	; 0F 24 21 036 18 00 00
+permpd xmm1, xmm2, xmm3, xmm1		; 0F 24 25 323 10 /or/ 0F 24 25 332 18
+permpd xmm1, xmm2, [0], xmm1		; 0F 24 25 026 10 00 00
+permpd xmm1, xmm2, dqword [0], xmm1	; 0F 24 25 026 10 00 00
+permpd xmm1, [0], xmm3, xmm1		; 0F 24 25 036 18 00 00
+permpd xmm1, dqword [0], xmm3, xmm1	; 0F 24 25 036 18 00 00
+
+permps xmm1, xmm1, xmm2, xmm3		; 0F 24 20 323 10 /or/ 0F 24 20 332 18
+permps xmm1, xmm1, xmm2, [0]		; 0F 24 20 026 10 00 00
+permps xmm1, xmm1, xmm2, dqword [0]	; 0F 24 20 026 10 00 00
+permps xmm1, xmm1, [0], xmm3		; 0F 24 20 036 18 00 00
+permps xmm1, xmm1, dqword [0], xmm3	; 0F 24 20 036 18 00 00
+permps xmm1, xmm2, xmm3, xmm1		; 0F 24 24 323 10 /or/ 0F 24 24 332 18
+permps xmm1, xmm2, [0], xmm1		; 0F 24 24 026 10 00 00
+permps xmm1, xmm2, dqword [0], xmm1	; 0F 24 24 026 10 00 00
+permps xmm1, [0], xmm3, xmm1		; 0F 24 24 036 18 00 00
+permps xmm1, dqword [0], xmm3, xmm1	; 0F 24 24 036 18 00 00
+
+phaddbd xmm1, xmm2			; 0F 7A 42 312
+phaddbd xmm1, [0]			; 0F 7A 42 016 00 00
+phaddbd xmm1, dqword [0]		; 0F 7A 42 016 00 00
+
+phaddbq xmm1, xmm2			; 0F 7A 43 312
+phaddbq xmm1, [0]			; 0F 7A 43 016 00 00
+phaddbq xmm1, dqword [0]		; 0F 7A 43 016 00 00
+
+phaddbw xmm1, xmm2			; 0F 7A 41 312
+phaddbw xmm1, [0]			; 0F 7A 41 016 00 00
+phaddbw xmm1, dqword [0]		; 0F 7A 41 016 00 00
+
+phadddq xmm1, xmm2			; 0F 7A 4B 312
+phadddq xmm1, [0]			; 0F 7A 4B 016 00 00
+phadddq xmm1, dqword [0]		; 0F 7A 4B 016 00 00
+
+phaddubd xmm1, xmm2			; 0F 7A 52 312
+phaddubd xmm1, [0]			; 0F 7A 52 016 00 00
+phaddubd xmm1, dqword [0]		; 0F 7A 52 016 00 00
+
+phaddubq xmm1, xmm2			; 0F 7A 53 312
+phaddubq xmm1, [0]			; 0F 7A 53 016 00 00
+phaddubq xmm1, dqword [0]		; 0F 7A 53 016 00 00
+
+phaddubw xmm1, xmm2			; 0F 7A 51 312
+phaddubw xmm1, [0]			; 0F 7A 51 016 00 00
+phaddubw xmm1, dqword [0]		; 0F 7A 51 016 00 00
+
+phaddudq xmm1, xmm2			; 0F 7A 5B 312
+phaddudq xmm1, [0]			; 0F 7A 5B 016 00 00
+phaddudq xmm1, dqword [0]		; 0F 7A 5B 016 00 00
+
+phadduwd xmm1, xmm2			; 0F 7A 56 312
+phadduwd xmm1, [0]			; 0F 7A 56 016 00 00
+phadduwd xmm1, dqword [0]		; 0F 7A 56 016 00 00
+
+phadduwq xmm1, xmm2			; 0F 7A 57 312
+phadduwq xmm1, [0]			; 0F 7A 57 016 00 00
+phadduwq xmm1, dqword [0]		; 0F 7A 57 016 00 00
+
+phaddwd xmm1, xmm2			; 0F 7A 46 312
+phaddwd xmm1, [0]			; 0F 7A 46 016 00 00
+phaddwd xmm1, dqword [0]		; 0F 7A 46 016 00 00
+
+phaddwq xmm1, xmm2			; 0F 7A 47 312
+phaddwq xmm1, [0]			; 0F 7A 47 016 00 00
+phaddwq xmm1, dqword [0]		; 0F 7A 47 016 00 00
+
+phsubbw xmm1, xmm2			; 0F 7A 61 312
+phsubbw xmm1, [0]			; 0F 7A 61 016 00 00
+phsubbw xmm1, dqword [0]		; 0F 7A 61 016 00 00
+
+phsubdq xmm1, xmm2			; 0F 7A 63 312
+phsubdq xmm1, [0]			; 0F 7A 63 016 00 00
+phsubdq xmm1, dqword [0]		; 0F 7A 63 016 00 00
+
+phsubwd xmm1, xmm2			; 0F 7A 62 312
+phsubwd xmm1, [0]			; 0F 7A 62 016 00 00
+phsubwd xmm1, dqword [0]		; 0F 7A 62 016 00 00
+
+pmacsdd xmm1, xmm4, xmm7, xmm1		; 0F 24 9E 347 10
+pmacsdd xmm2, xmm5, [0], xmm2		; 0F 24 9E 056 20 00 00
+pmacsdd xmm3, xmm6, dqword [0], xmm3	; 0F 24 9E 066 30 00 00
+
+pmacsdqh xmm1, xmm4, xmm7, xmm1		; 0F 24 9F 347 10
+pmacsdqh xmm2, xmm5, [0], xmm2		; 0F 24 9F 056 20 00 00
+pmacsdqh xmm3, xmm6, dqword [0], xmm3	; 0F 24 9F 066 30 00 00
+
+pmacsdql xmm1, xmm4, xmm7, xmm1		; 0F 24 97 347 10
+pmacsdql xmm2, xmm5, [0], xmm2		; 0F 24 97 056 20 00 00
+pmacsdql xmm3, xmm6, dqword [0], xmm3	; 0F 24 97 066 30 00 00
+
+pmacssdd xmm1, xmm4, xmm7, xmm1		; 0F 24 8E 347 10
+pmacssdd xmm2, xmm5, [0], xmm2		; 0F 24 8E 056 20 00 00
+pmacssdd xmm3, xmm6, dqword [0], xmm3	; 0F 24 8E 066 30 00 00
+
+pmacssdqh xmm1, xmm4, xmm7, xmm1	; 0F 24 8F 347 10
+pmacssdqh xmm2, xmm5, [0], xmm2		; 0F 24 8F 056 20 00 00
+pmacssdqh xmm3, xmm6, dqword [0], xmm3	; 0F 24 8F 066 30 00 00
+
+pmacssdql xmm1, xmm4, xmm7, xmm1	; 0F 24 87 347 10
+pmacssdql xmm2, xmm5, [0], xmm2		; 0F 24 87 056 20 00 00
+pmacssdql xmm3, xmm6, dqword [0], xmm3	; 0F 24 87 066 30 00 00
+
+pmacsswd xmm1, xmm4, xmm7, xmm1		; 0F 24 86 347 10
+pmacsswd xmm2, xmm5, [0], xmm2		; 0F 24 86 056 20 00 00
+pmacsswd xmm3, xmm6, dqword [0], xmm3	; 0F 24 86 066 30 00 00
+
+pmacssww xmm1, xmm4, xmm7, xmm1		; 0F 24 85 347 10
+pmacssww xmm2, xmm5, [0], xmm2		; 0F 24 85 056 20 00 00
+pmacssww xmm3, xmm6, dqword [0], xmm3	; 0F 24 85 066 30 00 00
+
+pmacswd xmm1, xmm4, xmm7, xmm1		; 0F 24 96 347 10
+pmacswd xmm2, xmm5, [0], xmm2		; 0F 24 96 056 20 00 00
+pmacswd xmm3, xmm6, dqword [0], xmm3	; 0F 24 96 066 30 00 00
+
+pmacsww xmm1, xmm4, xmm7, xmm1		; 0F 24 95 347 10
+pmacsww xmm2, xmm5, [0], xmm2		; 0F 24 95 056 20 00 00
+pmacsww xmm3, xmm6, dqword [0], xmm3	; 0F 24 95 066 30 00 00
+
+pmadcsswd xmm1, xmm4, xmm7, xmm1	; 0F 24 A6 347 10
+pmadcsswd xmm2, xmm5, [0], xmm2		; 0F 24 A6 056 20 00 00
+pmadcsswd xmm3, xmm6, dqword [0], xmm3	; 0F 24 A6 066 30 00 00
+
+pmadcswd xmm1, xmm4, xmm7, xmm1		; 0F 24 B6 347 10
+pmadcswd xmm2, xmm5, [0], xmm2		; 0F 24 B6 056 20 00 00
+pmadcswd xmm3, xmm6, dqword [0], xmm3	; 0F 24 B6 066 30 00 00
+
+pperm xmm1, xmm1, xmm2, xmm3		; 0F 24 23 323 10 /or/ 0F 24 23 332 18
+pperm xmm1, xmm1, xmm2, [0]		; 0F 24 23 026 10 00 00
+pperm xmm1, xmm1, xmm2, dqword [0]	; 0F 24 23 026 10 00 00
+pperm xmm1, xmm1, [0], xmm3		; 0F 24 23 036 18 00 00
+pperm xmm1, xmm1, dqword [0], xmm3	; 0F 24 23 036 18 00 00
+pperm xmm1, xmm2, xmm3, xmm1		; 0F 24 27 323 10 /or/ 0F 24 27 332 18
+pperm xmm1, xmm2, [0], xmm1		; 0F 24 27 026 10 00 00
+pperm xmm1, xmm2, dqword [0], xmm1	; 0F 24 27 026 10 00 00
+pperm xmm1, [0], xmm3, xmm1		; 0F 24 27 036 18 00 00
+pperm xmm1, dqword [0], xmm3, xmm1	; 0F 24 27 036 18 00 00
+
+protb xmm1, xmm2, xmm3			; 0F 24 40 323 10 /or/ 0F 24 40 332 18
+protb xmm1, xmm2, [0]			; 0F 24 40 026 10 00 00
+protb xmm1, xmm2, dqword [0]		; 0F 24 40 026 10 00 00
+protb xmm1, [0], xmm3			; 0F 24 40 036 18 00 00
+protb xmm1, dqword [0], xmm3		; 0F 24 40 036 18 00 00
+protb xmm1, xmm2, byte 5		; 0F 7B 40 312 05
+protb xmm1, [0], byte 5			; 0F 7B 40 016 00 00 05
+protb xmm1, dqword [0], 5		; 0F 7B 40 016 00 00 05
+
+protd xmm1, xmm2, xmm3			; 0F 24 42 323 10 /or/ 0F 24 42 332 18
+protd xmm1, xmm2, [0]			; 0F 24 42 026 10 00 00
+protd xmm1, xmm2, dqword [0]		; 0F 24 42 026 10 00 00
+protd xmm1, [0], xmm3			; 0F 24 42 036 18 00 00
+protd xmm1, dqword [0], xmm3		; 0F 24 42 036 18 00 00
+protd xmm1, xmm2, byte 5		; 0F 7B 42 312 05
+protd xmm1, [0], byte 5			; 0F 7B 42 016 00 00 05
+protd xmm1, dqword [0], 5		; 0F 7B 42 016 00 00 05
+
+protq xmm1, xmm2, xmm3			; 0F 24 43 323 10 /or/ 0F 24 43 332 18
+protq xmm1, xmm2, [0]			; 0F 24 43 026 10 00 00
+protq xmm1, xmm2, dqword [0]		; 0F 24 43 026 10 00 00
+protq xmm1, [0], xmm3			; 0F 24 43 036 18 00 00
+protq xmm1, dqword [0], xmm3		; 0F 24 43 036 18 00 00
+protq xmm1, xmm2, byte 5		; 0F 7B 43 312 05
+protq xmm1, [0], byte 5			; 0F 7B 43 016 00 00 05
+protq xmm1, dqword [0], 5		; 0F 7B 43 016 00 00 05
+
+protw xmm1, xmm2, xmm3			; 0F 24 41 323 10 /or/ 0F 24 41 332 18
+protw xmm1, xmm2, [0]			; 0F 24 41 026 10 00 00
+protw xmm1, xmm2, dqword [0]		; 0F 24 41 026 10 00 00
+protw xmm1, [0], xmm3			; 0F 24 41 036 18 00 00
+protw xmm1, dqword [0], xmm3		; 0F 24 41 036 18 00 00
+protw xmm1, xmm2, byte 5		; 0F 7B 41 312 05
+protw xmm1, [0], byte 5			; 0F 7B 41 016 00 00 05
+protw xmm1, dqword [0], 5		; 0F 7B 41 016 00 00 05
+
+pshab xmm1, xmm2, xmm3			; 0F 24 48 323 10 /or/ 0F 24 48 332 18
+pshab xmm1, xmm2, [0]			; 0F 24 48 026 10 00 00
+pshab xmm1, xmm2, dqword [0]		; 0F 24 48 026 10 00 00
+pshab xmm1, [0], xmm3			; 0F 24 48 036 18 00 00
+pshab xmm1, dqword [0], xmm3		; 0F 24 48 036 18 00 00
+
+pshad xmm1, xmm2, xmm3			; 0F 24 4A 323 10 /or/ 0F 24 4A 332 18
+pshad xmm1, xmm2, [0]			; 0F 24 4A 026 10 00 00
+pshad xmm1, xmm2, dqword [0]		; 0F 24 4A 026 10 00 00
+pshad xmm1, [0], xmm3			; 0F 24 4A 036 18 00 00
+pshad xmm1, dqword [0], xmm3		; 0F 24 4A 036 18 00 00
+
+pshaq xmm1, xmm2, xmm3			; 0F 24 4B 323 10 /or/ 0F 24 4B 332 18
+pshaq xmm1, xmm2, [0]			; 0F 24 4B 026 10 00 00
+pshaq xmm1, xmm2, dqword [0]		; 0F 24 4B 026 10 00 00
+pshaq xmm1, [0], xmm3			; 0F 24 4B 036 18 00 00
+pshaq xmm1, dqword [0], xmm3		; 0F 24 4B 036 18 00 00
+
+pshaw xmm1, xmm2, xmm3			; 0F 24 49 323 10 /or/ 0F 24 49 332 18
+pshaw xmm1, xmm2, [0]			; 0F 24 49 026 10 00 00
+pshaw xmm1, xmm2, dqword [0]		; 0F 24 49 026 10 00 00
+pshaw xmm1, [0], xmm3			; 0F 24 49 036 18 00 00
+pshaw xmm1, dqword [0], xmm3		; 0F 24 49 036 18 00 00
+
+pshlb xmm1, xmm2, xmm3			; 0F 24 44 323 10 /or/ 0F 24 44 332 18
+pshlb xmm1, xmm2, [0]			; 0F 24 44 026 10 00 00
+pshlb xmm1, xmm2, dqword [0]		; 0F 24 44 026 10 00 00
+pshlb xmm1, [0], xmm3			; 0F 24 44 036 18 00 00
+pshlb xmm1, dqword [0], xmm3		; 0F 24 44 036 18 00 00
+
+pshld xmm1, xmm2, xmm3			; 0F 24 46 323 10 /or/ 0F 24 46 332 18
+pshld xmm1, xmm2, [0]			; 0F 24 46 026 10 00 00
+pshld xmm1, xmm2, dqword [0]		; 0F 24 46 026 10 00 00
+pshld xmm1, [0], xmm3			; 0F 24 46 036 18 00 00
+pshld xmm1, dqword [0], xmm3		; 0F 24 46 036 18 00 00
+
+pshlq xmm1, xmm2, xmm3			; 0F 24 47 323 10 /or/ 0F 24 47 332 18
+pshlq xmm1, xmm2, [0]			; 0F 24 47 026 10 00 00
+pshlq xmm1, xmm2, dqword [0]		; 0F 24 47 026 10 00 00
+pshlq xmm1, [0], xmm3			; 0F 24 47 036 18 00 00
+pshlq xmm1, dqword [0], xmm3		; 0F 24 47 036 18 00 00
+
+pshlw xmm1, xmm2, xmm3			; 0F 24 45 323 10 /or/ 0F 24 45 332 18
+pshlw xmm1, xmm2, [0]			; 0F 24 45 026 10 00 00
+pshlw xmm1, xmm2, dqword [0]		; 0F 24 45 026 10 00 00
+pshlw xmm1, [0], xmm3			; 0F 24 45 036 18 00 00
+pshlw xmm1, dqword [0], xmm3		; 0F 24 45 036 18 00 00
+
+; SSE5 instructions that are also SSE4.1 instructions
+
+ptest xmm1, xmm2			; 66 0F 38 17 312
+ptest xmm1, [0]				; 66 0F 38 17 016 00 00
+ptest xmm1, dqword [0]			; 66 0F 38 17 016 00 00
+
+roundpd xmm1, xmm2, 5			; 66 0F 3A 09 312 05
+roundpd xmm1, [0], byte 5		; 66 0F 3A 09 016 00 00 05
+roundpd xmm1, dqword [0], 5		; 66 0F 3A 09 016 00 00 05
+
+roundps xmm1, xmm2, 5			; 66 0F 3A 08 312 05
+roundps xmm1, [0], byte 5		; 66 0F 3A 08 016 00 00 05
+roundps xmm1, dqword [0], 5		; 66 0F 3A 08 016 00 00 05
+
+roundsd xmm1, xmm2, 5			; 66 0F 3A 0B 312 05
+roundsd xmm1, [0], byte 5		; 66 0F 3A 0B 016 00 00 05
+roundsd xmm1, qword [0], 5		; 66 0F 3A 0B 016 00 00 05
+
+roundss xmm1, xmm2, 5			; 66 0F 3A 0A 312 05
+roundss xmm1, [0], byte 5		; 66 0F 3A 0A 016 00 00 05
+roundss xmm1, dword [0], 5		; 66 0F 3A 0A 016 00 00 05
+
diff --git a/modules/arch/x86/tests/sse5-all.hex b/modules/arch/x86/tests/sse5-all.hex
new file mode 100644
index 0000000..1c9edac
--- /dev/null
+++ b/modules/arch/x86/tests/sse5-all.hex
@@ -0,0 +1,2727 @@
+0f 
+25 
+2d 
+e7 
+10 
+05 
+0f 
+25 
+2d 
+2e 
+20 
+00 
+00 
+05 
+0f 
+25 
+2d 
+36 
+30 
+00 
+00 
+05 
+0f 
+25 
+2c 
+e7 
+10 
+05 
+0f 
+25 
+2c 
+2e 
+20 
+00 
+00 
+05 
+0f 
+25 
+2c 
+36 
+30 
+00 
+00 
+05 
+0f 
+25 
+2f 
+e7 
+10 
+05 
+0f 
+25 
+2f 
+2e 
+20 
+00 
+00 
+05 
+0f 
+25 
+2f 
+36 
+30 
+00 
+00 
+05 
+0f 
+25 
+2e 
+e7 
+10 
+05 
+0f 
+25 
+2e 
+2e 
+20 
+00 
+00 
+05 
+0f 
+25 
+2e 
+36 
+30 
+00 
+00 
+05 
+0f 
+7a 
+30 
+cc 
+0f 
+7a 
+30 
+16 
+00 
+00 
+0f 
+7a 
+30 
+1e 
+00 
+00 
+0f 
+7a 
+31 
+e1 
+0f 
+7a 
+31 
+16 
+00 
+00 
+0f 
+7a 
+31 
+1e 
+00 
+00 
+0f 
+24 
+01 
+d3 
+10 
+0f 
+24 
+01 
+16 
+10 
+00 
+00 
+0f 
+24 
+01 
+16 
+10 
+00 
+00 
+0f 
+24 
+01 
+1e 
+18 
+00 
+00 
+0f 
+24 
+01 
+1e 
+18 
+00 
+00 
+0f 
+24 
+05 
+d3 
+10 
+0f 
+24 
+05 
+16 
+10 
+00 
+00 
+0f 
+24 
+05 
+16 
+10 
+00 
+00 
+0f 
+24 
+05 
+1e 
+18 
+00 
+00 
+0f 
+24 
+05 
+1e 
+18 
+00 
+00 
+0f 
+24 
+00 
+d3 
+10 
+0f 
+24 
+00 
+16 
+10 
+00 
+00 
+0f 
+24 
+00 
+16 
+10 
+00 
+00 
+0f 
+24 
+00 
+1e 
+18 
+00 
+00 
+0f 
+24 
+00 
+1e 
+18 
+00 
+00 
+0f 
+24 
+04 
+d3 
+10 
+0f 
+24 
+04 
+16 
+10 
+00 
+00 
+0f 
+24 
+04 
+16 
+10 
+00 
+00 
+0f 
+24 
+04 
+1e 
+18 
+00 
+00 
+0f 
+24 
+04 
+1e 
+18 
+00 
+00 
+0f 
+24 
+03 
+d3 
+10 
+0f 
+24 
+03 
+16 
+10 
+00 
+00 
+0f 
+24 
+03 
+16 
+10 
+00 
+00 
+0f 
+24 
+03 
+1e 
+18 
+00 
+00 
+0f 
+24 
+03 
+1e 
+18 
+00 
+00 
+0f 
+24 
+07 
+d3 
+10 
+0f 
+24 
+07 
+16 
+10 
+00 
+00 
+0f 
+24 
+07 
+16 
+10 
+00 
+00 
+0f 
+24 
+07 
+1e 
+18 
+00 
+00 
+0f 
+24 
+07 
+1e 
+18 
+00 
+00 
+0f 
+24 
+02 
+d3 
+10 
+0f 
+24 
+02 
+16 
+10 
+00 
+00 
+0f 
+24 
+02 
+16 
+10 
+00 
+00 
+0f 
+24 
+02 
+1e 
+18 
+00 
+00 
+0f 
+24 
+02 
+1e 
+18 
+00 
+00 
+0f 
+24 
+06 
+d3 
+10 
+0f 
+24 
+06 
+16 
+10 
+00 
+00 
+0f 
+24 
+06 
+16 
+10 
+00 
+00 
+0f 
+24 
+06 
+1e 
+18 
+00 
+00 
+0f 
+24 
+06 
+1e 
+18 
+00 
+00 
+0f 
+24 
+09 
+d3 
+10 
+0f 
+24 
+09 
+16 
+10 
+00 
+00 
+0f 
+24 
+09 
+16 
+10 
+00 
+00 
+0f 
+24 
+09 
+1e 
+18 
+00 
+00 
+0f 
+24 
+09 
+1e 
+18 
+00 
+00 
+0f 
+24 
+0d 
+d3 
+10 
+0f 
+24 
+0d 
+16 
+10 
+00 
+00 
+0f 
+24 
+0d 
+16 
+10 
+00 
+00 
+0f 
+24 
+0d 
+1e 
+18 
+00 
+00 
+0f 
+24 
+0d 
+1e 
+18 
+00 
+00 
+0f 
+24 
+08 
+d3 
+10 
+0f 
+24 
+08 
+16 
+10 
+00 
+00 
+0f 
+24 
+08 
+16 
+10 
+00 
+00 
+0f 
+24 
+08 
+1e 
+18 
+00 
+00 
+0f 
+24 
+08 
+1e 
+18 
+00 
+00 
+0f 
+24 
+0c 
+d3 
+10 
+0f 
+24 
+0c 
+16 
+10 
+00 
+00 
+0f 
+24 
+0c 
+16 
+10 
+00 
+00 
+0f 
+24 
+0c 
+1e 
+18 
+00 
+00 
+0f 
+24 
+0c 
+1e 
+18 
+00 
+00 
+0f 
+24 
+0b 
+d3 
+10 
+0f 
+24 
+0b 
+16 
+10 
+00 
+00 
+0f 
+24 
+0b 
+16 
+10 
+00 
+00 
+0f 
+24 
+0b 
+1e 
+18 
+00 
+00 
+0f 
+24 
+0b 
+1e 
+18 
+00 
+00 
+0f 
+24 
+0f 
+d3 
+10 
+0f 
+24 
+0f 
+16 
+10 
+00 
+00 
+0f 
+24 
+0f 
+16 
+10 
+00 
+00 
+0f 
+24 
+0f 
+1e 
+18 
+00 
+00 
+0f 
+24 
+0f 
+1e 
+18 
+00 
+00 
+0f 
+24 
+0a 
+d3 
+10 
+0f 
+24 
+0a 
+16 
+10 
+00 
+00 
+0f 
+24 
+0a 
+16 
+10 
+00 
+00 
+0f 
+24 
+0a 
+1e 
+18 
+00 
+00 
+0f 
+24 
+0a 
+1e 
+18 
+00 
+00 
+0f 
+24 
+0e 
+d3 
+10 
+0f 
+24 
+0e 
+16 
+10 
+00 
+00 
+0f 
+24 
+0e 
+16 
+10 
+00 
+00 
+0f 
+24 
+0e 
+1e 
+18 
+00 
+00 
+0f 
+24 
+0e 
+1e 
+18 
+00 
+00 
+0f 
+24 
+11 
+d3 
+10 
+0f 
+24 
+11 
+16 
+10 
+00 
+00 
+0f 
+24 
+11 
+16 
+10 
+00 
+00 
+0f 
+24 
+11 
+1e 
+18 
+00 
+00 
+0f 
+24 
+11 
+1e 
+18 
+00 
+00 
+0f 
+24 
+15 
+d3 
+10 
+0f 
+24 
+15 
+16 
+10 
+00 
+00 
+0f 
+24 
+15 
+16 
+10 
+00 
+00 
+0f 
+24 
+15 
+1e 
+18 
+00 
+00 
+0f 
+24 
+15 
+1e 
+18 
+00 
+00 
+0f 
+24 
+10 
+d3 
+10 
+0f 
+24 
+10 
+16 
+10 
+00 
+00 
+0f 
+24 
+10 
+16 
+10 
+00 
+00 
+0f 
+24 
+10 
+1e 
+18 
+00 
+00 
+0f 
+24 
+10 
+1e 
+18 
+00 
+00 
+0f 
+24 
+14 
+d3 
+10 
+0f 
+24 
+14 
+16 
+10 
+00 
+00 
+0f 
+24 
+14 
+16 
+10 
+00 
+00 
+0f 
+24 
+14 
+1e 
+18 
+00 
+00 
+0f 
+24 
+14 
+1e 
+18 
+00 
+00 
+0f 
+24 
+13 
+d3 
+10 
+0f 
+24 
+13 
+16 
+10 
+00 
+00 
+0f 
+24 
+13 
+16 
+10 
+00 
+00 
+0f 
+24 
+13 
+1e 
+18 
+00 
+00 
+0f 
+24 
+13 
+1e 
+18 
+00 
+00 
+0f 
+24 
+17 
+d3 
+10 
+0f 
+24 
+17 
+16 
+10 
+00 
+00 
+0f 
+24 
+17 
+16 
+10 
+00 
+00 
+0f 
+24 
+17 
+1e 
+18 
+00 
+00 
+0f 
+24 
+17 
+1e 
+18 
+00 
+00 
+0f 
+24 
+12 
+d3 
+10 
+0f 
+24 
+12 
+16 
+10 
+00 
+00 
+0f 
+24 
+12 
+16 
+10 
+00 
+00 
+0f 
+24 
+12 
+1e 
+18 
+00 
+00 
+0f 
+24 
+12 
+1e 
+18 
+00 
+00 
+0f 
+24 
+16 
+d3 
+10 
+0f 
+24 
+16 
+16 
+10 
+00 
+00 
+0f 
+24 
+16 
+16 
+10 
+00 
+00 
+0f 
+24 
+16 
+1e 
+18 
+00 
+00 
+0f 
+24 
+16 
+1e 
+18 
+00 
+00 
+0f 
+24 
+19 
+d3 
+10 
+0f 
+24 
+19 
+16 
+10 
+00 
+00 
+0f 
+24 
+19 
+16 
+10 
+00 
+00 
+0f 
+24 
+19 
+1e 
+18 
+00 
+00 
+0f 
+24 
+19 
+1e 
+18 
+00 
+00 
+0f 
+24 
+1d 
+d3 
+10 
+0f 
+24 
+1d 
+16 
+10 
+00 
+00 
+0f 
+24 
+1d 
+16 
+10 
+00 
+00 
+0f 
+24 
+1d 
+1e 
+18 
+00 
+00 
+0f 
+24 
+1d 
+1e 
+18 
+00 
+00 
+0f 
+24 
+18 
+d3 
+10 
+0f 
+24 
+18 
+16 
+10 
+00 
+00 
+0f 
+24 
+18 
+16 
+10 
+00 
+00 
+0f 
+24 
+18 
+1e 
+18 
+00 
+00 
+0f 
+24 
+18 
+1e 
+18 
+00 
+00 
+0f 
+24 
+1c 
+d3 
+10 
+0f 
+24 
+1c 
+16 
+10 
+00 
+00 
+0f 
+24 
+1c 
+16 
+10 
+00 
+00 
+0f 
+24 
+1c 
+1e 
+18 
+00 
+00 
+0f 
+24 
+1c 
+1e 
+18 
+00 
+00 
+0f 
+24 
+1b 
+d3 
+10 
+0f 
+24 
+1b 
+16 
+10 
+00 
+00 
+0f 
+24 
+1b 
+16 
+10 
+00 
+00 
+0f 
+24 
+1b 
+1e 
+18 
+00 
+00 
+0f 
+24 
+1b 
+1e 
+18 
+00 
+00 
+0f 
+24 
+1f 
+d3 
+10 
+0f 
+24 
+1f 
+16 
+10 
+00 
+00 
+0f 
+24 
+1f 
+16 
+10 
+00 
+00 
+0f 
+24 
+1f 
+1e 
+18 
+00 
+00 
+0f 
+24 
+1f 
+1e 
+18 
+00 
+00 
+0f 
+24 
+1a 
+d3 
+10 
+0f 
+24 
+1a 
+16 
+10 
+00 
+00 
+0f 
+24 
+1a 
+16 
+10 
+00 
+00 
+0f 
+24 
+1a 
+1e 
+18 
+00 
+00 
+0f 
+24 
+1a 
+1e 
+18 
+00 
+00 
+0f 
+24 
+1e 
+d3 
+10 
+0f 
+24 
+1e 
+16 
+10 
+00 
+00 
+0f 
+24 
+1e 
+16 
+10 
+00 
+00 
+0f 
+24 
+1e 
+1e 
+18 
+00 
+00 
+0f 
+24 
+1e 
+1e 
+18 
+00 
+00 
+0f 
+7a 
+11 
+ca 
+0f 
+7a 
+11 
+0e 
+00 
+00 
+0f 
+7a 
+11 
+0e 
+00 
+00 
+0f 
+7a 
+10 
+ca 
+0f 
+7a 
+10 
+0e 
+00 
+00 
+0f 
+7a 
+10 
+0e 
+00 
+00 
+0f 
+7a 
+13 
+ca 
+0f 
+7a 
+13 
+0e 
+00 
+00 
+0f 
+7a 
+13 
+0e 
+00 
+00 
+0f 
+7a 
+12 
+ca 
+0f 
+7a 
+12 
+0e 
+00 
+00 
+0f 
+7a 
+12 
+0e 
+00 
+00 
+0f 
+24 
+22 
+d3 
+10 
+0f 
+24 
+22 
+16 
+10 
+00 
+00 
+0f 
+24 
+22 
+16 
+10 
+00 
+00 
+0f 
+24 
+22 
+1e 
+18 
+00 
+00 
+0f 
+24 
+22 
+1e 
+18 
+00 
+00 
+0f 
+24 
+26 
+d3 
+10 
+0f 
+24 
+26 
+16 
+10 
+00 
+00 
+0f 
+24 
+26 
+16 
+10 
+00 
+00 
+0f 
+24 
+26 
+1e 
+18 
+00 
+00 
+0f 
+24 
+26 
+1e 
+18 
+00 
+00 
+0f 
+25 
+4c 
+e7 
+10 
+05 
+0f 
+25 
+4c 
+2e 
+20 
+00 
+00 
+05 
+0f 
+25 
+4c 
+36 
+30 
+00 
+00 
+05 
+0f 
+25 
+4e 
+e7 
+10 
+05 
+0f 
+25 
+4e 
+2e 
+20 
+00 
+00 
+05 
+0f 
+25 
+4e 
+36 
+30 
+00 
+00 
+05 
+0f 
+25 
+4f 
+e7 
+10 
+05 
+0f 
+25 
+4f 
+2e 
+20 
+00 
+00 
+05 
+0f 
+25 
+4f 
+36 
+30 
+00 
+00 
+05 
+0f 
+25 
+6c 
+e7 
+10 
+05 
+0f 
+25 
+6c 
+2e 
+20 
+00 
+00 
+05 
+0f 
+25 
+6c 
+36 
+30 
+00 
+00 
+05 
+0f 
+25 
+6e 
+e7 
+10 
+05 
+0f 
+25 
+6e 
+2e 
+20 
+00 
+00 
+05 
+0f 
+25 
+6e 
+36 
+30 
+00 
+00 
+05 
+0f 
+25 
+6f 
+e7 
+10 
+05 
+0f 
+25 
+6f 
+2e 
+20 
+00 
+00 
+05 
+0f 
+25 
+6f 
+36 
+30 
+00 
+00 
+05 
+0f 
+25 
+6d 
+e7 
+10 
+05 
+0f 
+25 
+6d 
+2e 
+20 
+00 
+00 
+05 
+0f 
+25 
+6d 
+36 
+30 
+00 
+00 
+05 
+0f 
+25 
+4d 
+e7 
+10 
+05 
+0f 
+25 
+4d 
+2e 
+20 
+00 
+00 
+05 
+0f 
+25 
+4d 
+36 
+30 
+00 
+00 
+05 
+0f 
+24 
+21 
+d3 
+10 
+0f 
+24 
+21 
+16 
+10 
+00 
+00 
+0f 
+24 
+21 
+16 
+10 
+00 
+00 
+0f 
+24 
+21 
+1e 
+18 
+00 
+00 
+0f 
+24 
+21 
+1e 
+18 
+00 
+00 
+0f 
+24 
+25 
+d3 
+10 
+0f 
+24 
+25 
+16 
+10 
+00 
+00 
+0f 
+24 
+25 
+16 
+10 
+00 
+00 
+0f 
+24 
+25 
+1e 
+18 
+00 
+00 
+0f 
+24 
+25 
+1e 
+18 
+00 
+00 
+0f 
+24 
+20 
+d3 
+10 
+0f 
+24 
+20 
+16 
+10 
+00 
+00 
+0f 
+24 
+20 
+16 
+10 
+00 
+00 
+0f 
+24 
+20 
+1e 
+18 
+00 
+00 
+0f 
+24 
+20 
+1e 
+18 
+00 
+00 
+0f 
+24 
+24 
+d3 
+10 
+0f 
+24 
+24 
+16 
+10 
+00 
+00 
+0f 
+24 
+24 
+16 
+10 
+00 
+00 
+0f 
+24 
+24 
+1e 
+18 
+00 
+00 
+0f 
+24 
+24 
+1e 
+18 
+00 
+00 
+0f 
+7a 
+42 
+ca 
+0f 
+7a 
+42 
+0e 
+00 
+00 
+0f 
+7a 
+42 
+0e 
+00 
+00 
+0f 
+7a 
+43 
+ca 
+0f 
+7a 
+43 
+0e 
+00 
+00 
+0f 
+7a 
+43 
+0e 
+00 
+00 
+0f 
+7a 
+41 
+ca 
+0f 
+7a 
+41 
+0e 
+00 
+00 
+0f 
+7a 
+41 
+0e 
+00 
+00 
+0f 
+7a 
+4b 
+ca 
+0f 
+7a 
+4b 
+0e 
+00 
+00 
+0f 
+7a 
+4b 
+0e 
+00 
+00 
+0f 
+7a 
+52 
+ca 
+0f 
+7a 
+52 
+0e 
+00 
+00 
+0f 
+7a 
+52 
+0e 
+00 
+00 
+0f 
+7a 
+53 
+ca 
+0f 
+7a 
+53 
+0e 
+00 
+00 
+0f 
+7a 
+53 
+0e 
+00 
+00 
+0f 
+7a 
+51 
+ca 
+0f 
+7a 
+51 
+0e 
+00 
+00 
+0f 
+7a 
+51 
+0e 
+00 
+00 
+0f 
+7a 
+5b 
+ca 
+0f 
+7a 
+5b 
+0e 
+00 
+00 
+0f 
+7a 
+5b 
+0e 
+00 
+00 
+0f 
+7a 
+56 
+ca 
+0f 
+7a 
+56 
+0e 
+00 
+00 
+0f 
+7a 
+56 
+0e 
+00 
+00 
+0f 
+7a 
+57 
+ca 
+0f 
+7a 
+57 
+0e 
+00 
+00 
+0f 
+7a 
+57 
+0e 
+00 
+00 
+0f 
+7a 
+46 
+ca 
+0f 
+7a 
+46 
+0e 
+00 
+00 
+0f 
+7a 
+46 
+0e 
+00 
+00 
+0f 
+7a 
+47 
+ca 
+0f 
+7a 
+47 
+0e 
+00 
+00 
+0f 
+7a 
+47 
+0e 
+00 
+00 
+0f 
+7a 
+61 
+ca 
+0f 
+7a 
+61 
+0e 
+00 
+00 
+0f 
+7a 
+61 
+0e 
+00 
+00 
+0f 
+7a 
+63 
+ca 
+0f 
+7a 
+63 
+0e 
+00 
+00 
+0f 
+7a 
+63 
+0e 
+00 
+00 
+0f 
+7a 
+62 
+ca 
+0f 
+7a 
+62 
+0e 
+00 
+00 
+0f 
+7a 
+62 
+0e 
+00 
+00 
+0f 
+24 
+9e 
+e7 
+10 
+0f 
+24 
+9e 
+2e 
+20 
+00 
+00 
+0f 
+24 
+9e 
+36 
+30 
+00 
+00 
+0f 
+24 
+9f 
+e7 
+10 
+0f 
+24 
+9f 
+2e 
+20 
+00 
+00 
+0f 
+24 
+9f 
+36 
+30 
+00 
+00 
+0f 
+24 
+97 
+e7 
+10 
+0f 
+24 
+97 
+2e 
+20 
+00 
+00 
+0f 
+24 
+97 
+36 
+30 
+00 
+00 
+0f 
+24 
+8e 
+e7 
+10 
+0f 
+24 
+8e 
+2e 
+20 
+00 
+00 
+0f 
+24 
+8e 
+36 
+30 
+00 
+00 
+0f 
+24 
+8f 
+e7 
+10 
+0f 
+24 
+8f 
+2e 
+20 
+00 
+00 
+0f 
+24 
+8f 
+36 
+30 
+00 
+00 
+0f 
+24 
+87 
+e7 
+10 
+0f 
+24 
+87 
+2e 
+20 
+00 
+00 
+0f 
+24 
+87 
+36 
+30 
+00 
+00 
+0f 
+24 
+86 
+e7 
+10 
+0f 
+24 
+86 
+2e 
+20 
+00 
+00 
+0f 
+24 
+86 
+36 
+30 
+00 
+00 
+0f 
+24 
+85 
+e7 
+10 
+0f 
+24 
+85 
+2e 
+20 
+00 
+00 
+0f 
+24 
+85 
+36 
+30 
+00 
+00 
+0f 
+24 
+96 
+e7 
+10 
+0f 
+24 
+96 
+2e 
+20 
+00 
+00 
+0f 
+24 
+96 
+36 
+30 
+00 
+00 
+0f 
+24 
+95 
+e7 
+10 
+0f 
+24 
+95 
+2e 
+20 
+00 
+00 
+0f 
+24 
+95 
+36 
+30 
+00 
+00 
+0f 
+24 
+a6 
+e7 
+10 
+0f 
+24 
+a6 
+2e 
+20 
+00 
+00 
+0f 
+24 
+a6 
+36 
+30 
+00 
+00 
+0f 
+24 
+b6 
+e7 
+10 
+0f 
+24 
+b6 
+2e 
+20 
+00 
+00 
+0f 
+24 
+b6 
+36 
+30 
+00 
+00 
+0f 
+24 
+23 
+d3 
+10 
+0f 
+24 
+23 
+16 
+10 
+00 
+00 
+0f 
+24 
+23 
+16 
+10 
+00 
+00 
+0f 
+24 
+23 
+1e 
+18 
+00 
+00 
+0f 
+24 
+23 
+1e 
+18 
+00 
+00 
+0f 
+24 
+27 
+d3 
+10 
+0f 
+24 
+27 
+16 
+10 
+00 
+00 
+0f 
+24 
+27 
+16 
+10 
+00 
+00 
+0f 
+24 
+27 
+1e 
+18 
+00 
+00 
+0f 
+24 
+27 
+1e 
+18 
+00 
+00 
+0f 
+24 
+40 
+d3 
+10 
+0f 
+24 
+40 
+16 
+10 
+00 
+00 
+0f 
+24 
+40 
+16 
+10 
+00 
+00 
+0f 
+24 
+40 
+1e 
+18 
+00 
+00 
+0f 
+24 
+40 
+1e 
+18 
+00 
+00 
+0f 
+7b 
+40 
+ca 
+05 
+0f 
+7b 
+40 
+0e 
+00 
+00 
+05 
+0f 
+7b 
+40 
+0e 
+00 
+00 
+05 
+0f 
+24 
+42 
+d3 
+10 
+0f 
+24 
+42 
+16 
+10 
+00 
+00 
+0f 
+24 
+42 
+16 
+10 
+00 
+00 
+0f 
+24 
+42 
+1e 
+18 
+00 
+00 
+0f 
+24 
+42 
+1e 
+18 
+00 
+00 
+0f 
+7b 
+42 
+ca 
+05 
+0f 
+7b 
+42 
+0e 
+00 
+00 
+05 
+0f 
+7b 
+42 
+0e 
+00 
+00 
+05 
+0f 
+24 
+43 
+d3 
+10 
+0f 
+24 
+43 
+16 
+10 
+00 
+00 
+0f 
+24 
+43 
+16 
+10 
+00 
+00 
+0f 
+24 
+43 
+1e 
+18 
+00 
+00 
+0f 
+24 
+43 
+1e 
+18 
+00 
+00 
+0f 
+7b 
+43 
+ca 
+05 
+0f 
+7b 
+43 
+0e 
+00 
+00 
+05 
+0f 
+7b 
+43 
+0e 
+00 
+00 
+05 
+0f 
+24 
+41 
+d3 
+10 
+0f 
+24 
+41 
+16 
+10 
+00 
+00 
+0f 
+24 
+41 
+16 
+10 
+00 
+00 
+0f 
+24 
+41 
+1e 
+18 
+00 
+00 
+0f 
+24 
+41 
+1e 
+18 
+00 
+00 
+0f 
+7b 
+41 
+ca 
+05 
+0f 
+7b 
+41 
+0e 
+00 
+00 
+05 
+0f 
+7b 
+41 
+0e 
+00 
+00 
+05 
+0f 
+24 
+48 
+d3 
+10 
+0f 
+24 
+48 
+16 
+10 
+00 
+00 
+0f 
+24 
+48 
+16 
+10 
+00 
+00 
+0f 
+24 
+48 
+1e 
+18 
+00 
+00 
+0f 
+24 
+48 
+1e 
+18 
+00 
+00 
+0f 
+24 
+4a 
+d3 
+10 
+0f 
+24 
+4a 
+16 
+10 
+00 
+00 
+0f 
+24 
+4a 
+16 
+10 
+00 
+00 
+0f 
+24 
+4a 
+1e 
+18 
+00 
+00 
+0f 
+24 
+4a 
+1e 
+18 
+00 
+00 
+0f 
+24 
+4b 
+d3 
+10 
+0f 
+24 
+4b 
+16 
+10 
+00 
+00 
+0f 
+24 
+4b 
+16 
+10 
+00 
+00 
+0f 
+24 
+4b 
+1e 
+18 
+00 
+00 
+0f 
+24 
+4b 
+1e 
+18 
+00 
+00 
+0f 
+24 
+49 
+d3 
+10 
+0f 
+24 
+49 
+16 
+10 
+00 
+00 
+0f 
+24 
+49 
+16 
+10 
+00 
+00 
+0f 
+24 
+49 
+1e 
+18 
+00 
+00 
+0f 
+24 
+49 
+1e 
+18 
+00 
+00 
+0f 
+24 
+44 
+d3 
+10 
+0f 
+24 
+44 
+16 
+10 
+00 
+00 
+0f 
+24 
+44 
+16 
+10 
+00 
+00 
+0f 
+24 
+44 
+1e 
+18 
+00 
+00 
+0f 
+24 
+44 
+1e 
+18 
+00 
+00 
+0f 
+24 
+46 
+d3 
+10 
+0f 
+24 
+46 
+16 
+10 
+00 
+00 
+0f 
+24 
+46 
+16 
+10 
+00 
+00 
+0f 
+24 
+46 
+1e 
+18 
+00 
+00 
+0f 
+24 
+46 
+1e 
+18 
+00 
+00 
+0f 
+24 
+47 
+d3 
+10 
+0f 
+24 
+47 
+16 
+10 
+00 
+00 
+0f 
+24 
+47 
+16 
+10 
+00 
+00 
+0f 
+24 
+47 
+1e 
+18 
+00 
+00 
+0f 
+24 
+47 
+1e 
+18 
+00 
+00 
+0f 
+24 
+45 
+d3 
+10 
+0f 
+24 
+45 
+16 
+10 
+00 
+00 
+0f 
+24 
+45 
+16 
+10 
+00 
+00 
+0f 
+24 
+45 
+1e 
+18 
+00 
+00 
+0f 
+24 
+45 
+1e 
+18 
+00 
+00 
+66 
+0f 
+38 
+17 
+ca 
+66 
+0f 
+38 
+17 
+0e 
+00 
+00 
+66 
+0f 
+38 
+17 
+0e 
+00 
+00 
+66 
+0f 
+3a 
+09 
+ca 
+05 
+66 
+0f 
+3a 
+09 
+0e 
+00 
+00 
+05 
+66 
+0f 
+3a 
+09 
+0e 
+00 
+00 
+05 
+66 
+0f 
+3a 
+08 
+ca 
+05 
+66 
+0f 
+3a 
+08 
+0e 
+00 
+00 
+05 
+66 
+0f 
+3a 
+08 
+0e 
+00 
+00 
+05 
+66 
+0f 
+3a 
+0b 
+ca 
+05 
+66 
+0f 
+3a 
+0b 
+0e 
+00 
+00 
+05 
+66 
+0f 
+3a 
+0b 
+0e 
+00 
+00 
+05 
+66 
+0f 
+3a 
+0a 
+ca 
+05 
+66 
+0f 
+3a 
+0a 
+0e 
+00 
+00 
+05 
+66 
+0f 
+3a 
+0a 
+0e 
+00 
+00 
+05 
diff --git a/modules/arch/x86/tests/sse5-basic.asm b/modules/arch/x86/tests/sse5-basic.asm
new file mode 100644
index 0000000..ed79e77
--- /dev/null
+++ b/modules/arch/x86/tests/sse5-basic.asm
@@ -0,0 +1,12 @@
+[bits 32]
+compd xmm1, xmm4, xmm7, 5			; 0F 25 2D 347 10 05
+compd xmm2, xmm5, [0], byte 5			; 0F 25 2D 055 20 00 00 00 00 05
+compd xmm3, xmm6, dqword [ebx+ecx*4], byte 5	; 0F 25 2D 064 213 30 05
+
+[bits 64]
+compd xmm8, xmm11, xmm3, 5			; 0F 25 2D 333 84 05
+compd xmm12, xmm4, xmm14, 5			; 0F 25 2D 346 C1 05
+compd xmm9, xmm12, [0], byte 5			; 0F 25 2D 044 045 94 00 00 00 00 05
+compd xmm9, xmm12, [r8], byte 5			; 0F 25 2D 040 95 05
+compd xmm10, xmm13, dqword [rbx+r9*4], 5	; 0F 25 2D 054 213 A6 05
+
diff --git a/modules/arch/x86/tests/sse5-basic.hex b/modules/arch/x86/tests/sse5-basic.hex
new file mode 100644
index 0000000..2d6c87b
--- /dev/null
+++ b/modules/arch/x86/tests/sse5-basic.hex
@@ -0,0 +1,59 @@
+0f 
+25 
+2d 
+e7 
+10 
+05 
+0f 
+25 
+2d 
+2d 
+20 
+00 
+00 
+00 
+00 
+05 
+0f 
+25 
+2d 
+34 
+8b 
+30 
+05 
+0f 
+25 
+2d 
+db 
+84 
+05 
+0f 
+25 
+2d 
+e6 
+c1 
+05 
+0f 
+25 
+2d 
+24 
+25 
+94 
+00 
+00 
+00 
+00 
+05 
+0f 
+25 
+2d 
+20 
+95 
+05 
+0f 
+25 
+2d 
+2c 
+8b 
+a6 
+05 
diff --git a/modules/arch/x86/tests/sse5-err.asm b/modules/arch/x86/tests/sse5-err.asm
new file mode 100644
index 0000000..93b474f
--- /dev/null
+++ b/modules/arch/x86/tests/sse5-err.asm
@@ -0,0 +1,116 @@
+fmaddpd xmm1, xmm2, xmm1, xmm3		; illegal
+fmaddpd xmm1, xmm2, xmm3, xmm3		; illegal
+fmaddpd xmm1, xmm2, xmm2, xmm3		; illegal
+
+fmaddps xmm1, xmm2, xmm1, xmm3		; illegal
+fmaddps xmm1, xmm2, xmm3, xmm3		; illegal
+fmaddps xmm1, xmm2, xmm2, xmm3		; illegal
+
+fmaddsd xmm1, xmm2, xmm1, xmm3		; illegal
+fmaddsd xmm1, xmm2, xmm3, xmm3		; illegal
+fmaddsd xmm1, xmm2, xmm2, xmm3		; illegal
+
+fmaddss xmm1, xmm2, xmm1, xmm3		; illegal
+fmaddss xmm1, xmm2, xmm3, xmm3		; illegal
+fmaddss xmm1, xmm2, xmm2, xmm3		; illegal
+
+fmsubpd xmm1, xmm2, xmm1, xmm3		; illegal
+fmsubpd xmm1, xmm2, xmm3, xmm3		; illegal
+fmsubpd xmm1, xmm2, xmm2, xmm3		; illegal
+
+fmsubps xmm1, xmm2, xmm1, xmm3		; illegal
+fmsubps xmm1, xmm2, xmm3, xmm3		; illegal
+fmsubps xmm1, xmm2, xmm2, xmm3		; illegal
+
+fmsubsd xmm1, xmm2, xmm1, xmm3		; illegal
+fmsubsd xmm1, xmm2, xmm3, xmm3		; illegal
+fmsubsd xmm1, xmm2, xmm2, xmm3		; illegal
+
+fmsubss xmm1, xmm2, xmm1, xmm3		; illegal
+fmsubss xmm1, xmm2, xmm3, xmm3		; illegal
+fmsubss xmm1, xmm2, xmm2, xmm3		; illegal
+
+fnmaddpd xmm1, xmm2, xmm1, xmm3		; illegal
+fnmaddpd xmm1, xmm2, xmm3, xmm3		; illegal
+fnmaddpd xmm1, xmm2, xmm2, xmm3		; illegal
+
+fnmaddps xmm1, xmm2, xmm1, xmm3		; illegal
+fnmaddps xmm1, xmm2, xmm3, xmm3		; illegal
+fnmaddps xmm1, xmm2, xmm2, xmm3		; illegal
+
+fnmaddsd xmm1, xmm2, xmm1, xmm3		; illegal
+fnmaddsd xmm1, xmm2, xmm3, xmm3		; illegal
+fnmaddsd xmm1, xmm2, xmm2, xmm3		; illegal
+
+fnmaddss xmm1, xmm2, xmm1, xmm3		; illegal
+fnmaddss xmm1, xmm2, xmm3, xmm3		; illegal
+fnmaddss xmm1, xmm2, xmm2, xmm3		; illegal
+
+fnmsubpd xmm1, xmm2, xmm1, xmm3		; illegal
+fnmsubpd xmm1, xmm2, xmm3, xmm3		; illegal
+fnmsubpd xmm1, xmm2, xmm2, xmm3		; illegal
+
+fnmsubps xmm1, xmm2, xmm1, xmm3		; illegal
+fnmsubps xmm1, xmm2, xmm3, xmm3		; illegal
+fnmsubps xmm1, xmm2, xmm2, xmm3		; illegal
+
+fnmsubsd xmm1, xmm2, xmm1, xmm3		; illegal
+fnmsubsd xmm1, xmm2, xmm3, xmm3		; illegal
+fnmsubsd xmm1, xmm2, xmm2, xmm3		; illegal
+
+fnmsubss xmm1, xmm2, xmm1, xmm3		; illegal
+fnmsubss xmm1, xmm2, xmm3, xmm3		; illegal
+fnmsubss xmm1, xmm2, xmm2, xmm3		; illegal
+
+pcmov xmm1, xmm2, xmm1, xmm3		; illegal
+pcmov xmm1, xmm2, xmm3, xmm3		; illegal
+pcmov xmm1, xmm2, xmm2, xmm3		; illegal
+
+permpd xmm1, xmm2, xmm1, xmm3		; illegal
+permpd xmm1, xmm2, xmm3, xmm3		; illegal
+permpd xmm1, xmm2, xmm2, xmm3		; illegal
+
+permps xmm1, xmm2, xmm1, xmm3		; illegal
+permps xmm1, xmm2, xmm3, xmm3		; illegal
+permps xmm1, xmm2, xmm2, xmm3		; illegal
+
+pmacsdd xmm1, xmm2, xmm1, xmm3		; illegal
+pmacsdd xmm1, xmm1, xmm2, xmm3		; illegal - better message?
+
+pmacsdqh xmm1, xmm2, xmm1, xmm3		; illegal
+pmacsdqh xmm1, xmm1, xmm2, xmm3		; illegal - better message?
+
+pmacsdql xmm1, xmm2, xmm1, xmm3		; illegal
+pmacsdql xmm1, xmm1, xmm2, xmm3		; illegal - better message?
+
+pmacssdd xmm1, xmm2, xmm1, xmm3		; illegal
+pmacssdd xmm1, xmm1, xmm2, xmm3		; illegal - better message?
+
+pmacssdqh xmm1, xmm2, xmm1, xmm3	; illegal
+pmacssdqh xmm1, xmm1, xmm2, xmm3	; illegal - better message?
+
+pmacssdql xmm1, xmm2, xmm1, xmm3	; illegal
+pmacssdql xmm1, xmm1, xmm2, xmm3	; illegal - better message?
+
+pmacsswd xmm1, xmm2, xmm1, xmm3		; illegal
+pmacsswd xmm1, xmm1, xmm2, xmm3		; illegal - better message?
+
+pmacssww xmm1, xmm2, xmm1, xmm3		; illegal
+pmacssww xmm1, xmm1, xmm2, xmm3		; illegal - better message?
+
+pmacswd xmm1, xmm2, xmm1, xmm3		; illegal
+pmacswd xmm1, xmm1, xmm2, xmm3		; illegal - better message?
+
+pmacsww xmm1, xmm2, xmm1, xmm3		; illegal
+pmacsww xmm1, xmm1, xmm2, xmm3		; illegal - better message?
+
+pmadcsswd xmm1, xmm2, xmm1, xmm3	; illegal
+pmadcsswd xmm1, xmm1, xmm2, xmm3	; illegal - better message?
+
+pmadcswd xmm1, xmm2, xmm1, xmm3		; illegal
+pmadcswd xmm1, xmm1, xmm2, xmm3		; illegal - better message?
+
+pperm xmm1, xmm2, xmm1, xmm3		; illegal
+pperm xmm1, xmm2, xmm3, xmm3		; illegal
+pperm xmm1, xmm2, xmm2, xmm3		; illegal
+
diff --git a/modules/arch/x86/tests/sse5-err.errwarn b/modules/arch/x86/tests/sse5-err.errwarn
new file mode 100644
index 0000000..19df6e0
--- /dev/null
+++ b/modules/arch/x86/tests/sse5-err.errwarn
@@ -0,0 +1,84 @@
+-:1: one of source operand 1 or 3 must match dest operand
+-:2: one of source operand 1 or 3 must match dest operand
+-:3: one of source operand 1 or 3 must match dest operand
+-:5: one of source operand 1 or 3 must match dest operand
+-:6: one of source operand 1 or 3 must match dest operand
+-:7: one of source operand 1 or 3 must match dest operand
+-:9: one of source operand 1 or 3 must match dest operand
+-:10: one of source operand 1 or 3 must match dest operand
+-:11: one of source operand 1 or 3 must match dest operand
+-:13: one of source operand 1 or 3 must match dest operand
+-:14: one of source operand 1 or 3 must match dest operand
+-:15: one of source operand 1 or 3 must match dest operand
+-:17: one of source operand 1 or 3 must match dest operand
+-:18: one of source operand 1 or 3 must match dest operand
+-:19: one of source operand 1 or 3 must match dest operand
+-:21: one of source operand 1 or 3 must match dest operand
+-:22: one of source operand 1 or 3 must match dest operand
+-:23: one of source operand 1 or 3 must match dest operand
+-:25: one of source operand 1 or 3 must match dest operand
+-:26: one of source operand 1 or 3 must match dest operand
+-:27: one of source operand 1 or 3 must match dest operand
+-:29: one of source operand 1 or 3 must match dest operand
+-:30: one of source operand 1 or 3 must match dest operand
+-:31: one of source operand 1 or 3 must match dest operand
+-:33: one of source operand 1 or 3 must match dest operand
+-:34: one of source operand 1 or 3 must match dest operand
+-:35: one of source operand 1 or 3 must match dest operand
+-:37: one of source operand 1 or 3 must match dest operand
+-:38: one of source operand 1 or 3 must match dest operand
+-:39: one of source operand 1 or 3 must match dest operand
+-:41: one of source operand 1 or 3 must match dest operand
+-:42: one of source operand 1 or 3 must match dest operand
+-:43: one of source operand 1 or 3 must match dest operand
+-:45: one of source operand 1 or 3 must match dest operand
+-:46: one of source operand 1 or 3 must match dest operand
+-:47: one of source operand 1 or 3 must match dest operand
+-:49: one of source operand 1 or 3 must match dest operand
+-:50: one of source operand 1 or 3 must match dest operand
+-:51: one of source operand 1 or 3 must match dest operand
+-:53: one of source operand 1 or 3 must match dest operand
+-:54: one of source operand 1 or 3 must match dest operand
+-:55: one of source operand 1 or 3 must match dest operand
+-:57: one of source operand 1 or 3 must match dest operand
+-:58: one of source operand 1 or 3 must match dest operand
+-:59: one of source operand 1 or 3 must match dest operand
+-:61: one of source operand 1 or 3 must match dest operand
+-:62: one of source operand 1 or 3 must match dest operand
+-:63: one of source operand 1 or 3 must match dest operand
+-:65: one of source operand 1 or 3 must match dest operand
+-:66: one of source operand 1 or 3 must match dest operand
+-:67: one of source operand 1 or 3 must match dest operand
+-:69: one of source operand 1 or 3 must match dest operand
+-:70: one of source operand 1 or 3 must match dest operand
+-:71: one of source operand 1 or 3 must match dest operand
+-:73: one of source operand 1 or 3 must match dest operand
+-:74: one of source operand 1 or 3 must match dest operand
+-:75: one of source operand 1 or 3 must match dest operand
+-:77: one of source operand 1 or 3 must match dest operand
+-:78: one of source operand 1 or 3 must match dest operand
+-:80: one of source operand 1 or 3 must match dest operand
+-:81: one of source operand 1 or 3 must match dest operand
+-:83: one of source operand 1 or 3 must match dest operand
+-:84: one of source operand 1 or 3 must match dest operand
+-:86: one of source operand 1 or 3 must match dest operand
+-:87: one of source operand 1 or 3 must match dest operand
+-:89: one of source operand 1 or 3 must match dest operand
+-:90: one of source operand 1 or 3 must match dest operand
+-:92: one of source operand 1 or 3 must match dest operand
+-:93: one of source operand 1 or 3 must match dest operand
+-:95: one of source operand 1 or 3 must match dest operand
+-:96: one of source operand 1 or 3 must match dest operand
+-:98: one of source operand 1 or 3 must match dest operand
+-:99: one of source operand 1 or 3 must match dest operand
+-:101: one of source operand 1 or 3 must match dest operand
+-:102: one of source operand 1 or 3 must match dest operand
+-:104: one of source operand 1 or 3 must match dest operand
+-:105: one of source operand 1 or 3 must match dest operand
+-:107: one of source operand 1 or 3 must match dest operand
+-:108: one of source operand 1 or 3 must match dest operand
+-:110: one of source operand 1 or 3 must match dest operand
+-:111: one of source operand 1 or 3 must match dest operand
+-:113: one of source operand 1 or 3 must match dest operand
+-:114: one of source operand 1 or 3 must match dest operand
+-:115: one of source operand 1 or 3 must match dest operand
diff --git a/modules/arch/x86/x86arch.c b/modules/arch/x86/x86arch.c
index 0f8c952..5b8ad70 100644
--- a/modules/arch/x86/x86arch.c
+++ b/modules/arch/x86/x86arch.c
@@ -57,10 +57,17 @@
 
     arch_x86->arch.module = &yasm_x86_LTX_arch;
 
-    arch_x86->cpu_enabled = ~CPU_Any;
+    /* default to all instructions/features enabled */
+    arch_x86->active_cpu = 0;
+    arch_x86->cpu_enables_size = 1;
+    arch_x86->cpu_enables = yasm_xmalloc(sizeof(wordptr));
+    arch_x86->cpu_enables[0] = BitVector_Create(64, FALSE);
+    BitVector_Fill(arch_x86->cpu_enables[0]);
+
     arch_x86->amd64_machine = amd64_machine;
     arch_x86->mode_bits = 0;
     arch_x86->force_strict = 0;
+    arch_x86->default_rel = 0;
 
     if (yasm__strcasecmp(parser, "nasm") == 0)
         arch_x86->parser = X86_PARSER_NASM;
@@ -79,6 +86,11 @@
 static void
 x86_destroy(/*@only@*/ yasm_arch *arch)
 {
+    yasm_arch_x86 *arch_x86 = (yasm_arch_x86 *)arch;
+    unsigned int i;
+    for (i=0; i<arch_x86->cpu_enables_size; i++)
+        BitVector_Destroy(arch_x86->cpu_enables[i]);
+    yasm_xfree(arch_x86->cpu_enables);
     yasm_xfree(arch);
 }
 
@@ -112,7 +124,13 @@
         arch_x86->mode_bits = (unsigned int)val;
     else if (yasm__strcasecmp(var, "force_strict") == 0)
         arch_x86->force_strict = (unsigned int)val;
-    else
+    else if (yasm__strcasecmp(var, "default_rel") == 0) {
+        if (arch_x86->mode_bits != 64)
+            yasm_warn_set(YASM_WARN_GENERAL,
+                          N_("ignoring default rel in non-64-bit mode"));
+        else
+            arch_x86->default_rel = (unsigned int)val;
+    } else
         return 1;
     return 0;
 }
diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h
index cbe26f4..b5c0f09 100644
--- a/modules/arch/x86/x86arch.h
+++ b/modules/arch/x86/x86arch.h
@@ -27,53 +27,59 @@
 #ifndef YASM_X86ARCH_H
 #define YASM_X86ARCH_H
 
+#include <libyasm/bitvect.h>
+
 /* Available CPU feature flags */
-#define CPU_Any     (0UL)       /* Any old cpu will do */
+#define CPU_Any     0       /* Any old cpu will do */
 #define CPU_086     CPU_Any
-#define CPU_186     (1UL<<0)    /* i186 or better required */
-#define CPU_286     (1UL<<1)    /* i286 or better required */
-#define CPU_386     (1UL<<2)    /* i386 or better required */
-#define CPU_486     (1UL<<3)    /* i486 or better required */
-#define CPU_586     (1UL<<4)    /* i585 or better required */
-#define CPU_686     (1UL<<5)    /* i686 or better required */
-#define CPU_P3      (1UL<<6)    /* Pentium3 or better required */
-#define CPU_P4      (1UL<<7)    /* Pentium4 or better required */
-#define CPU_IA64    (1UL<<8)    /* IA-64 or better required */
-#define CPU_K6      (1UL<<9)    /* AMD K6 or better required */
-#define CPU_Athlon  (1UL<<10)   /* AMD Athlon or better required */
-#define CPU_Hammer  (1UL<<11)   /* AMD Sledgehammer or better required */
-#define CPU_FPU     (1UL<<12)   /* FPU support required */
-#define CPU_MMX     (1UL<<13)   /* MMX support required */
-#define CPU_SSE     (1UL<<14)   /* Streaming SIMD extensions required */
-#define CPU_SSE2    (1UL<<15)   /* Streaming SIMD extensions 2 required */
-#define CPU_SSE3    (1UL<<16)   /* Streaming SIMD extensions 3 required */
-#define CPU_3DNow   (1UL<<17)   /* 3DNow! support required */
-#define CPU_Cyrix   (1UL<<18)   /* Cyrix-specific instruction */
-#define CPU_AMD     (1UL<<19)   /* AMD-specific inst. (older than K6) */
-#define CPU_SMM     (1UL<<20)   /* System Management Mode instruction */
-#define CPU_Prot    (1UL<<21)   /* Protected mode only instruction */
-#define CPU_Undoc   (1UL<<22)   /* Undocumented instruction */
-#define CPU_Obs     (1UL<<23)   /* Obsolete instruction */
-#define CPU_Priv    (1UL<<24)   /* Priveleged instruction */
-#define CPU_SVM     (1UL<<25)   /* Secure Virtual Machine instruction */
-#define CPU_PadLock (1UL<<25)   /* VIA PadLock instruction */
-#define CPU_EM64T   (1UL<<26)   /* Intel EM64T or better */
-#define CPU_SSSE3   (1UL<<27)   /* Streaming SIMD extensions 3 required */
-#define CPU_SSE41   (1UL<<28)   /* Streaming SIMD extensions 4.1 required */
-#define CPU_SSE42   (1UL<<29)   /* Streaming SIMD extensions 4.2 required */
-#define CPU_SSE4    (CPU_SSE41|CPU_SSE42)
+#define CPU_186     1       /* i186 or better required */
+#define CPU_286     2       /* i286 or better required */
+#define CPU_386     3       /* i386 or better required */
+#define CPU_486     4       /* i486 or better required */
+#define CPU_586     5       /* i585 or better required */
+#define CPU_686     6       /* i686 or better required */
+#define CPU_P3      7       /* Pentium3 or better required */
+#define CPU_P4      8       /* Pentium4 or better required */
+#define CPU_IA64    9       /* IA-64 or better required */
+#define CPU_K6      10      /* AMD K6 or better required */
+#define CPU_Athlon  11      /* AMD Athlon or better required */
+#define CPU_Hammer  12      /* AMD Sledgehammer or better required */
+#define CPU_FPU     13      /* FPU support required */
+#define CPU_MMX     14      /* MMX support required */
+#define CPU_SSE     15      /* Streaming SIMD extensions required */
+#define CPU_SSE2    16      /* Streaming SIMD extensions 2 required */
+#define CPU_SSE3    17      /* Streaming SIMD extensions 3 required */
+#define CPU_3DNow   18      /* 3DNow! support required */
+#define CPU_Cyrix   19      /* Cyrix-specific instruction */
+#define CPU_AMD     20      /* AMD-specific inst. (older than K6) */
+#define CPU_SMM     21      /* System Management Mode instruction */
+#define CPU_Prot    22      /* Protected mode only instruction */
+#define CPU_Undoc   23      /* Undocumented instruction */
+#define CPU_Obs     24      /* Obsolete instruction */
+#define CPU_Priv    25      /* Priveleged instruction */
+#define CPU_SVM     26      /* Secure Virtual Machine instruction */
+#define CPU_PadLock 27      /* VIA PadLock instruction */
+#define CPU_EM64T   28      /* Intel EM64T or better */
+#define CPU_SSSE3   29      /* Streaming SIMD extensions 3 required */
+#define CPU_SSE41   30      /* Streaming SIMD extensions 4.1 required */
+#define CPU_SSE42   31      /* Streaming SIMD extensions 4.2 required */
+#define CPU_SSE4a   32      /* AMD Streaming SIMD extensions 4a required */
+#define CPU_SSE5    33      /* AMD Streaming SIMD extensions 5 required */
 
 /* Technically not CPU capabilities, they do affect what instructions are
  * available.  These are tested against BITS==64.
  */
-#define CPU_64      (1UL<<30)   /* Only available in 64-bit mode */
-#define CPU_Not64   (1UL<<31)   /* Not available (invalid) in 64-bit mode */
+#define CPU_64      120     /* Only available in 64-bit mode */
+#define CPU_Not64   121     /* Not available (invalid) in 64-bit mode */
 
 typedef struct yasm_arch_x86 {
     yasm_arch_base arch;        /* base structure */
 
     /* What instructions/features are enabled? */
-    unsigned long cpu_enabled;
+    unsigned int active_cpu;        /* active index into cpu_enables table */
+    unsigned int cpu_enables_size;  /* size of cpu_enables table */
+    wordptr *cpu_enables;
+
     unsigned int amd64_machine;
     enum {
         X86_PARSER_NASM = 0,
@@ -81,6 +87,7 @@
     } parser;
     unsigned int mode_bits;
     unsigned int force_strict;
+    unsigned int default_rel;
 } yasm_arch_x86;
 
 /* 0-15 (low 4 bits) used for register number, stored in same data area.
@@ -137,9 +144,9 @@
  * indicates bit of REX to use if REX is needed.  Will not modify REX if not
  * in 64-bit mode or if it wasn't needed to express reg.
  */
-int yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *low3,
-                               uintptr_t reg, unsigned int bits,
-                               x86_rex_bit_pos rexbit);
+int yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *drex,
+                               unsigned char *low3, uintptr_t reg,
+                               unsigned int bits, x86_rex_bit_pos rexbit);
 
 /* Effective address type */
 typedef struct x86_effaddr {
@@ -157,14 +164,19 @@
     unsigned char valid_sib;    /* 1 if SIB byte currently valid, 0 if not */
     unsigned char need_sib;     /* 1 if SIB byte needed, 0 if not,
                                    0xff if unknown */
+
+    unsigned char drex;         /* DREX SSE5 extension byte */
+    unsigned char need_drex;    /* 1 if DREX byte needed, 0 if not */
 } x86_effaddr;
 
 void yasm_x86__ea_init(x86_effaddr *x86_ea, unsigned int spare,
+                       unsigned int drex, unsigned int need_drex,
                        yasm_bytecode *precbc);
 
 void yasm_x86__ea_set_disponly(x86_effaddr *x86_ea);
 x86_effaddr *yasm_x86__ea_create_reg(x86_effaddr *x86_ea, unsigned long reg,
-                                     unsigned char *rex, unsigned int bits);
+                                     unsigned char *rex, unsigned char *drex,
+                                     unsigned int bits);
 x86_effaddr *yasm_x86__ea_create_imm
     (x86_effaddr *x86_ea, /*@keep@*/ yasm_expr *imm, unsigned int im_len);
 yasm_effaddr *yasm_x86__ea_create_expr(yasm_arch *arch,
diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c
index a01031d..f3717bf 100644
--- a/modules/arch/x86/x86bc.c
+++ b/modules/arch/x86/x86bc.c
@@ -103,9 +103,9 @@
 };
 
 int
-yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *low3,
-                           uintptr_t reg, unsigned int bits,
-                           x86_rex_bit_pos rexbit)
+yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *drex,
+                           unsigned char *low3, uintptr_t reg,
+                           unsigned int bits, x86_rex_bit_pos rexbit)
 {
     *low3 = (unsigned char)(reg&7);
 
@@ -113,13 +113,17 @@
         x86_expritem_reg_size size = (x86_expritem_reg_size)(reg & ~0xFUL);
 
         if (size == X86_REG8X || (reg & 0xF) >= 8) {
-            /* Check to make sure we can set it */
-            if (*rex == 0xff) {
-                yasm_error_set(YASM_ERROR_TYPE,
-                    N_("cannot use A/B/C/DH with instruction needing REX"));
-                return 1;
+            if (drex) {
+                *drex |= ((reg & 8) >> 3) << rexbit;
+            } else {
+                /* Check to make sure we can set it */
+                if (*rex == 0xff) {
+                    yasm_error_set(YASM_ERROR_TYPE,
+                        N_("cannot use A/B/C/DH with instruction needing REX"));
+                    return 1;
+                }
+                *rex |= 0x40 | (((reg & 8) >> 3) << rexbit);
             }
-            *rex |= 0x40 | (((reg & 8) >> 3) << rexbit);
         } else if (size == X86_REG8 && (reg & 7) >= 4) {
             /* AH/BH/CH/DH, so no REX allowed */
             if (*rex != 0 && *rex != 0xff) {
@@ -153,14 +157,16 @@
 }
 
 void
-yasm_x86__ea_init(x86_effaddr *x86_ea, unsigned int spare,
-                  yasm_bytecode *precbc)
+yasm_x86__ea_init(x86_effaddr *x86_ea, unsigned int spare, unsigned int drex,
+                  unsigned int need_drex, yasm_bytecode *precbc)
 {
     if (yasm_value_finalize(&x86_ea->ea.disp, precbc))
         yasm_error_set(YASM_ERROR_TOO_COMPLEX,
                        N_("effective address too complex"));
     x86_ea->modrm &= 0xC7;                  /* zero spare/reg bits */
     x86_ea->modrm |= (spare << 3) & 0x38;   /* plug in provided bits */
+    x86_ea->drex = (unsigned char)drex;
+    x86_ea->need_drex = (unsigned char)need_drex;
 }
 
 void
@@ -170,6 +176,7 @@
     x86_ea->need_modrm = 0;
     x86_ea->valid_sib = 0;
     x86_ea->need_sib = 0;
+    x86_ea->need_drex = 0;
 }
 
 static x86_effaddr *
@@ -183,23 +190,28 @@
     x86_ea->ea.nosplit = 0;
     x86_ea->ea.strong = 0;
     x86_ea->ea.segreg = 0;
+    x86_ea->ea.pc_rel = 0;
+    x86_ea->ea.not_pc_rel = 0;
     x86_ea->modrm = 0;
     x86_ea->valid_modrm = 0;
     x86_ea->need_modrm = 0;
     x86_ea->sib = 0;
     x86_ea->valid_sib = 0;
     x86_ea->need_sib = 0;
+    x86_ea->drex = 0;
+    x86_ea->need_drex = 0;
 
     return x86_ea;
 }
 
 x86_effaddr *
 yasm_x86__ea_create_reg(x86_effaddr *x86_ea, unsigned long reg,
-                        unsigned char *rex, unsigned int bits)
+                        unsigned char *rex, unsigned char *drex,
+                        unsigned int bits)
 {
     unsigned char rm;
 
-    if (yasm_x86__set_rex_from_reg(rex, &rm, reg, bits, X86_REX_B))
+    if (yasm_x86__set_rex_from_reg(rex, drex, &rm, reg, bits, X86_REX_B))
         return NULL;
 
     if (!x86_ea)
@@ -539,6 +551,7 @@
 
         /* Compute length of ea and add to total */
         bc->len += x86_ea->need_modrm + (x86_ea->need_sib ? 1:0);
+        bc->len += x86_ea->need_drex ? 1:0;
         bc->len += (x86_ea->ea.segreg != 0) ? 1 : 0;
     }
 
@@ -807,6 +820,9 @@
             YASM_WRITE_8(*bufp, x86_ea->sib);
         }
 
+        if (x86_ea->need_drex)
+            YASM_WRITE_8(*bufp, x86_ea->drex);
+
         if (x86_ea->ea.need_disp) {
             unsigned int disp_len = x86_ea->ea.disp.size/8;
 
diff --git a/modules/arch/x86/x86cpu.gperf b/modules/arch/x86/x86cpu.gperf
new file mode 100644
index 0000000..45edd07
--- /dev/null
+++ b/modules/arch/x86/x86cpu.gperf
@@ -0,0 +1,370 @@
+#
+# x86 CPU recognition
+#
+#  Copyright (C) 2002-2007  Peter Johnson
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+%{
+#include <util.h>
+RCSID("$Id$");
+
+#include <ctype.h>
+#include <libyasm.h>
+#include <libyasm/phash.h>
+
+#include "modules/arch/x86/x86arch.h"
+
+#define PROC_8086	0
+#define PROC_186	1
+#define PROC_286	2
+#define PROC_386	3
+#define PROC_486	4
+#define PROC_586	5
+#define PROC_686	6
+#define PROC_p2		7
+#define PROC_p3		8
+#define PROC_p4		9
+#define PROC_prescott	10
+#define PROC_conroe	11
+#define PROC_penryn	12
+#define PROC_nehalem	13
+
+static void
+x86_cpu_intel(wordptr cpu, unsigned int data)
+{
+    BitVector_Empty(cpu);
+
+    BitVector_Bit_On(cpu, CPU_Priv);
+    if (data >= PROC_286)
+        BitVector_Bit_On(cpu, CPU_Prot);
+    if (data >= PROC_386)
+        BitVector_Bit_On(cpu, CPU_SMM);
+    if (data >= PROC_nehalem)
+        BitVector_Bit_On(cpu, CPU_SSE42);
+    if (data >= PROC_penryn)
+        BitVector_Bit_On(cpu, CPU_SSE41);
+    if (data >= PROC_conroe)
+        BitVector_Bit_On(cpu, CPU_SSSE3);
+    if (data >= PROC_prescott)
+        BitVector_Bit_On(cpu, CPU_SSE3);
+    if (data >= PROC_p4)
+        BitVector_Bit_On(cpu, CPU_SSE2);
+    if (data >= PROC_p3)
+        BitVector_Bit_On(cpu, CPU_SSE);
+    if (data >= PROC_p2)
+        BitVector_Bit_On(cpu, CPU_MMX);
+    if (data >= PROC_486)
+        BitVector_Bit_On(cpu, CPU_FPU);
+    if (data >= PROC_prescott)
+        BitVector_Bit_On(cpu, CPU_EM64T);
+
+    if (data >= PROC_p4)
+        BitVector_Bit_On(cpu, CPU_P4);
+    if (data >= PROC_p3)
+        BitVector_Bit_On(cpu, CPU_P3);
+    if (data >= PROC_686)
+        BitVector_Bit_On(cpu, CPU_686);
+    if (data >= PROC_586)
+        BitVector_Bit_On(cpu, CPU_586);
+    if (data >= PROC_486)
+        BitVector_Bit_On(cpu, CPU_486);
+    if (data >= PROC_386)
+        BitVector_Bit_On(cpu, CPU_386);
+    if (data >= PROC_286)
+        BitVector_Bit_On(cpu, CPU_286);
+    if (data >= PROC_186)
+        BitVector_Bit_On(cpu, CPU_186);
+    BitVector_Bit_On(cpu, CPU_086);
+}
+
+static void
+x86_cpu_ia64(wordptr cpu, unsigned int data)
+{
+    BitVector_Empty(cpu);
+    BitVector_Bit_On(cpu, CPU_Priv);
+    BitVector_Bit_On(cpu, CPU_Prot);
+    BitVector_Bit_On(cpu, CPU_SMM);
+    BitVector_Bit_On(cpu, CPU_SSE2);
+    BitVector_Bit_On(cpu, CPU_SSE);
+    BitVector_Bit_On(cpu, CPU_MMX);
+    BitVector_Bit_On(cpu, CPU_FPU);
+    BitVector_Bit_On(cpu, CPU_IA64);
+    BitVector_Bit_On(cpu, CPU_P4);
+    BitVector_Bit_On(cpu, CPU_P3);
+    BitVector_Bit_On(cpu, CPU_686);
+    BitVector_Bit_On(cpu, CPU_586);
+    BitVector_Bit_On(cpu, CPU_486);
+    BitVector_Bit_On(cpu, CPU_386);
+    BitVector_Bit_On(cpu, CPU_286);
+    BitVector_Bit_On(cpu, CPU_186);
+    BitVector_Bit_On(cpu, CPU_086);
+}
+
+#define PROC_bulldozer	11
+#define PROC_k10    10
+#define PROC_venice 9
+#define PROC_hammer 8
+#define PROC_k7     7
+#define PROC_k6     6
+
+static void
+x86_cpu_amd(wordptr cpu, unsigned int data)
+{
+    BitVector_Empty(cpu);
+
+    BitVector_Bit_On(cpu, CPU_Priv);
+    BitVector_Bit_On(cpu, CPU_Prot);
+    BitVector_Bit_On(cpu, CPU_SMM);
+    BitVector_Bit_On(cpu, CPU_3DNow);
+    if (data >= PROC_bulldozer)
+        BitVector_Bit_On(cpu, CPU_SSE5);
+    if (data >= PROC_k10)
+        BitVector_Bit_On(cpu, CPU_SSE4a);
+    if (data >= PROC_venice)
+        BitVector_Bit_On(cpu, CPU_SSE3);
+    if (data >= PROC_hammer)
+        BitVector_Bit_On(cpu, CPU_SSE2);
+    if (data >= PROC_k7)
+        BitVector_Bit_On(cpu, CPU_SSE);
+    if (data >= PROC_k6)
+        BitVector_Bit_On(cpu, CPU_MMX);
+    BitVector_Bit_On(cpu, CPU_FPU);
+
+    if (data >= PROC_hammer)
+        BitVector_Bit_On(cpu, CPU_Hammer);
+    if (data >= PROC_k7)
+        BitVector_Bit_On(cpu, CPU_Athlon);
+    if (data >= PROC_k6)
+        BitVector_Bit_On(cpu, CPU_K6);
+    BitVector_Bit_On(cpu, CPU_686);
+    BitVector_Bit_On(cpu, CPU_586);
+    BitVector_Bit_On(cpu, CPU_486);
+    BitVector_Bit_On(cpu, CPU_386);
+    BitVector_Bit_On(cpu, CPU_286);
+    BitVector_Bit_On(cpu, CPU_186);
+    BitVector_Bit_On(cpu, CPU_086);
+}
+
+static void
+x86_cpu_set(wordptr cpu, unsigned int data)
+{
+    BitVector_Bit_On(cpu, data);
+}
+
+static void
+x86_cpu_clear(wordptr cpu, unsigned int data)
+{
+    BitVector_Bit_Off(cpu, data);
+}
+
+static void
+x86_cpu_set_sse4(wordptr cpu, unsigned int data)
+{
+    BitVector_Bit_On(cpu, CPU_SSE41);
+    BitVector_Bit_On(cpu, CPU_SSE42);
+}
+
+static void
+x86_cpu_clear_sse4(wordptr cpu, unsigned int data)
+{
+    BitVector_Bit_Off(cpu, CPU_SSE41);
+    BitVector_Bit_Off(cpu, CPU_SSE42);
+}
+
+%}
+%ignore-case
+%language=ANSI-C
+%compare-strncmp
+%readonly-tables
+%enum
+%struct-type
+%define hash-function-name cpu_hash
+%define lookup-function-name cpu_find
+struct cpu_parse_data {
+    const char *name;
+    void (*handler) (wordptr cpu, unsigned int data);
+    unsigned int data;
+};
+%%
+8086,		x86_cpu_intel,	PROC_8086
+186,		x86_cpu_intel,	PROC_186
+80186,		x86_cpu_intel,	PROC_186
+i186,		x86_cpu_intel,	PROC_186
+286,		x86_cpu_intel,	PROC_286
+80286,		x86_cpu_intel,	PROC_286
+i286,		x86_cpu_intel,	PROC_286
+386,		x86_cpu_intel,	PROC_386
+80386,		x86_cpu_intel,	PROC_386
+i386,		x86_cpu_intel,	PROC_386
+486,		x86_cpu_intel,	PROC_486
+80486,		x86_cpu_intel,	PROC_486
+i486,		x86_cpu_intel,	PROC_486
+586,		x86_cpu_intel,	PROC_586
+i586,		x86_cpu_intel,	PROC_586
+pentium,	x86_cpu_intel,	PROC_586
+p5,		x86_cpu_intel,	PROC_586
+686,		x86_cpu_intel,	PROC_686
+i686,		x86_cpu_intel,	PROC_686
+p6,		x86_cpu_intel,	PROC_686
+ppro,		x86_cpu_intel,	PROC_686
+pentiumpro,	x86_cpu_intel,	PROC_686
+p2,		x86_cpu_intel,	PROC_p2
+pentium2,	x86_cpu_intel,	PROC_p2
+pentium-2,	x86_cpu_intel,	PROC_p2
+pentiumii,	x86_cpu_intel,	PROC_p2
+pentium-ii,	x86_cpu_intel,	PROC_p2
+p3,		x86_cpu_intel,	PROC_p3
+pentium3,	x86_cpu_intel,	PROC_p3
+pentium-3,	x86_cpu_intel,	PROC_p3
+pentiumiii,	x86_cpu_intel,	PROC_p3
+pentium-iii,	x86_cpu_intel,	PROC_p3
+katmai,		x86_cpu_intel,	PROC_p3
+p4,		x86_cpu_intel,	PROC_p4
+pentium4,	x86_cpu_intel,	PROC_p4
+pentium-4,	x86_cpu_intel,	PROC_p4
+pentiumiv,	x86_cpu_intel,	PROC_p4
+pentium-iv,	x86_cpu_intel,	PROC_p4
+williamette,	x86_cpu_intel,	PROC_p4
+ia64,		x86_cpu_ia64,	0
+ia-64,		x86_cpu_ia64,	0
+itanium,	x86_cpu_ia64,	0
+k6,		x86_cpu_amd,	PROC_k6
+k7,		x86_cpu_amd,	PROC_k7
+athlon,		x86_cpu_amd,	PROC_k7
+k8,		x86_cpu_amd,	PROC_hammer
+hammer,		x86_cpu_amd,	PROC_hammer
+clawhammer,	x86_cpu_amd,	PROC_hammer
+opteron,	x86_cpu_amd,	PROC_hammer
+athlon64,	x86_cpu_amd,	PROC_hammer
+athlon-64,	x86_cpu_amd,	PROC_hammer
+venice,		x86_cpu_amd,	PROC_venice
+k10,		x86_cpu_amd,	PROC_k10
+bulldozer,	x86_cpu_amd,	PROC_bulldozer
+prescott,	x86_cpu_intel,	PROC_prescott
+conroe,		x86_cpu_intel,	PROC_conroe
+penryn,		x86_cpu_intel,	PROC_penryn
+nehalem,	x86_cpu_intel,	PROC_nehalem
+#
+# Features have "no" versions to disable them, and only set/reset the
+# specific feature being changed.  All other bits are left alone.
+#
+fpu,		x86_cpu_set,	CPU_FPU
+nofpu,		x86_cpu_clear,	CPU_FPU
+mmx,		x86_cpu_set,	CPU_MMX
+nommx,		x86_cpu_clear,	CPU_MMX
+sse,		x86_cpu_set,	CPU_SSE
+nosse,		x86_cpu_clear,	CPU_SSE
+sse2,		x86_cpu_set,	CPU_SSE2
+nosse2,		x86_cpu_clear,	CPU_SSE2
+sse3,		x86_cpu_set,	CPU_SSE3
+nosse3,		x86_cpu_clear,	CPU_SSE3
+#pni,		x86_cpu_set,	CPU_PNI
+#nopni,		x86_cpu_clear,	CPU_PNI
+3dnow,		x86_cpu_set,	CPU_3DNow
+no3dnow,	x86_cpu_clear,	CPU_3DNow
+cyrix,		x86_cpu_set,	CPU_Cyrix
+nocyrix,	x86_cpu_clear,	CPU_Cyrix
+amd,		x86_cpu_set,	CPU_AMD
+noamd,		x86_cpu_clear,	CPU_AMD
+smm,		x86_cpu_set,	CPU_SMM
+nosmm,		x86_cpu_clear,	CPU_SMM
+prot,		x86_cpu_set,	CPU_Prot
+noprot,		x86_cpu_clear,	CPU_Prot
+protected,	x86_cpu_set,	CPU_Prot
+noprotected,	x86_cpu_clear,	CPU_Prot
+undoc,		x86_cpu_set,	CPU_Undoc
+noundoc,	x86_cpu_clear,	CPU_Undoc
+undocumented,	x86_cpu_set,	CPU_Undoc
+noundocumented,	x86_cpu_clear,	CPU_Undoc
+obs,		x86_cpu_set,	CPU_Obs
+noobs,		x86_cpu_clear,	CPU_Obs
+obsolete,	x86_cpu_set,	CPU_Obs
+noobsolete,	x86_cpu_clear,	CPU_Obs
+priv,		x86_cpu_set,	CPU_Priv
+nopriv,		x86_cpu_clear,	CPU_Priv
+privileged,	x86_cpu_set,	CPU_Priv
+noprivileged,	x86_cpu_clear,	CPU_Priv
+svm,		x86_cpu_set,	CPU_SVM
+nosvm,		x86_cpu_clear,	CPU_SVM
+padlock,	x86_cpu_set,	CPU_PadLock
+nopadlock,	x86_cpu_clear,	CPU_PadLock
+em64t,		x86_cpu_set,	CPU_EM64T
+noem64t,	x86_cpu_clear,	CPU_EM64T
+ssse3,		x86_cpu_set,	CPU_SSSE3
+nossse3,	x86_cpu_clear,	CPU_SSSE3
+sse4.1,		x86_cpu_set,	CPU_SSE41
+nosse4.1,	x86_cpu_clear,	CPU_SSE41
+sse41,		x86_cpu_set,	CPU_SSE41
+nosse41,	x86_cpu_clear,	CPU_SSE41
+sse4.2,		x86_cpu_set,	CPU_SSE42
+nosse4.2,	x86_cpu_clear,	CPU_SSE42
+sse42,		x86_cpu_set,	CPU_SSE42
+nosse42,	x86_cpu_clear,	CPU_SSE42
+sse4a,		x86_cpu_set,	CPU_SSE4a
+nosse4a,	x86_cpu_clear,	CPU_SSE4a
+sse4,		x86_cpu_set_sse4,	0
+nosse4,		x86_cpu_clear_sse4,	0
+sse5,		x86_cpu_set,	CPU_SSE5
+nosse5,		x86_cpu_clear,	CPU_SSE5
+%%
+
+void
+yasm_x86__parse_cpu(yasm_arch_x86 *arch_x86, const char *cpuid,
+                    size_t cpuid_len)
+{
+    /*@null@*/ const struct cpu_parse_data *pdata;
+    wordptr new_cpu;
+    size_t i;
+    static char lcaseid[16];
+
+    if (cpuid_len > 15)
+        return;
+    for (i=0; i<cpuid_len; i++)
+        lcaseid[i] = tolower(cpuid[i]);
+    lcaseid[cpuid_len] = '\0';
+
+    pdata = cpu_find(lcaseid, cpuid_len);
+    if (!pdata) {
+        yasm_warn_set(YASM_WARN_GENERAL,
+                      N_("unrecognized CPU identifier `%s'"), cpuid);
+        return;
+    }
+
+    new_cpu = BitVector_Clone(arch_x86->cpu_enables[arch_x86->active_cpu]);
+    pdata->handler(new_cpu, pdata->data);
+
+    /* try to find an existing match in the CPU table first */
+    for (i=0; i<arch_x86->cpu_enables_size; i++) {
+        if (BitVector_equal(arch_x86->cpu_enables[i], new_cpu)) {
+            arch_x86->active_cpu = i;
+            BitVector_Destroy(new_cpu);
+            return;
+        }
+    }
+
+    /* not found, need to add a new entry */
+    arch_x86->active_cpu = arch_x86->cpu_enables_size++;
+    arch_x86->cpu_enables =
+        yasm_xrealloc(arch_x86->cpu_enables,
+                      arch_x86->cpu_enables_size*sizeof(wordptr));
+    arch_x86->cpu_enables[arch_x86->active_cpu] = new_cpu;
+}
diff --git a/modules/arch/x86/x86expr.c b/modules/arch/x86/x86expr.c
index 8d11b1a..6358bff 100644
--- a/modules/arch/x86/x86expr.c
+++ b/modules/arch/x86/x86expr.c
@@ -549,6 +549,7 @@
                        yasm_bytecode *bc)
 {
     int retval;
+    unsigned char *drex = x86_ea->need_drex ? &x86_ea->drex : NULL;
 
     if (*addrsize == 0) {
         /* we need to figure out the address size from what we know about:
@@ -635,6 +636,12 @@
             return 1;
         }
 
+        if (x86_ea->ea.pc_rel && bits != 64) {
+            yasm_warn_set(YASM_WARN_GENERAL,
+                N_("RIP-relative directive ignored in non-64-bit mode"));
+            x86_ea->ea.pc_rel = 0;
+        }
+
         reg3264_data.regs = reg3264mult;
         reg3264_data.bits = bits;
         reg3264_data.addrsize = *addrsize;
@@ -754,6 +761,15 @@
          * (optional) SIB bytes.
          */
 
+        /* If we're supposed to be RIP-relative and there's no register
+         * usage, change to RIP-relative.
+         */
+        if (basereg == REG3264_NONE && indexreg == REG3264_NONE &&
+            x86_ea->ea.pc_rel) {
+            basereg = REG64_RIP;
+            yasm_value_set_curpos_rel(&x86_ea->ea.disp, bc, 1);
+        }
+
         /* First determine R/M (Mod is later determined from disp size) */
         x86_ea->need_modrm = 1; /* we always need ModRM */
         if (basereg == REG3264_NONE && indexreg == REG3264_NONE) {
@@ -784,7 +800,7 @@
              * of register basereg is, as x86_set_rex_from_reg doesn't pay
              * much attention.
              */
-            if (yasm_x86__set_rex_from_reg(rex, &low3,
+            if (yasm_x86__set_rex_from_reg(rex, drex, &low3,
                                            (unsigned int)(X86_REG64 | basereg),
                                            bits, X86_REX_B))
                 return 1;
@@ -811,7 +827,7 @@
             if (basereg == REG3264_NONE)
                 x86_ea->sib |= 5;
             else {
-                if (yasm_x86__set_rex_from_reg(rex, &low3, (unsigned int)
+                if (yasm_x86__set_rex_from_reg(rex, drex, &low3, (unsigned int)
                                                (X86_REG64 | basereg), bits,
                                                X86_REX_B))
                     return 1;
@@ -823,7 +839,7 @@
                 x86_ea->sib |= 040;
                 /* Any scale field is valid, just leave at 0. */
             else {
-                if (yasm_x86__set_rex_from_reg(rex, &low3, (unsigned int)
+                if (yasm_x86__set_rex_from_reg(rex, drex, &low3, (unsigned int)
                                                (X86_REG64 | indexreg), bits,
                                                X86_REX_X))
                     return 1;
diff --git a/modules/arch/x86/x86id.c b/modules/arch/x86/x86id.c
index 807e9d4..59e74ae 100644
--- a/modules/arch/x86/x86id.c
+++ b/modules/arch/x86/x86id.c
@@ -34,207 +34,196 @@
 #include "modules/arch/x86/x86arch.h"
 
 
-static const char *cpu_find_reverse(unsigned long cpu);
+static const char *cpu_find_reverse(unsigned int cpu0, unsigned int cpu1,
+                                    unsigned int cpu2);
 
-/* Opcode modifiers.  The opcode bytes are in "reverse" order because the
- * parameters are read from the arch-specific data in LSB->MSB order.
- * (only for asthetic reasons in the lexer code below, no practical reason).
- */
-#define MOD_Gap0    (1UL<<0)    /* Eats a parameter */
-#define MOD_Op2Add  (1UL<<1)    /* Parameter adds to opcode byte 2 */
-#define MOD_Gap1    (1UL<<2)    /* Eats a parameter */
-#define MOD_Op1Add  (1UL<<3)    /* Parameter adds to opcode byte 1 */
-#define MOD_Gap2    (1UL<<4)    /* Eats a parameter */
-#define MOD_Op0Add  (1UL<<5)    /* Parameter adds to opcode byte 0 */
-#define MOD_PreAdd  (1UL<<6)    /* Parameter adds to "special" prefix */
-#define MOD_SpAdd   (1UL<<7)    /* Parameter adds to "spare" value */
-#define MOD_OpSizeR (1UL<<8)    /* Parameter replaces opersize */
-#define MOD_Imm8    (1UL<<9)    /* Parameter is included as immediate byte */
-#define MOD_AdSizeR (1UL<<10)   /* Parameter replaces addrsize (jmp only) */
-#define MOD_DOpS64R (1UL<<11)   /* Parameter replaces default 64-bit opersize */
-#define MOD_Op1AddSp (1UL<<12)  /* Parameter is added as "spare" to opcode byte 2 */
+/* Opcode modifiers. */
+#define MOD_Gap     0   /* Eats a parameter / does nothing */
+#define MOD_PreAdd  1   /* Parameter adds to "special" prefix */
+#define MOD_Op0Add  2   /* Parameter adds to opcode byte 0 */
+#define MOD_Op1Add  3   /* Parameter adds to opcode byte 1 */
+#define MOD_Op2Add  4   /* Parameter adds to opcode byte 2 */
+#define MOD_SpAdd   5   /* Parameter adds to "spare" value */
+#define MOD_OpSizeR 6   /* Parameter replaces opersize */
+#define MOD_Imm8    7   /* Parameter is included as immediate byte */
+#define MOD_AdSizeR 8   /* Parameter replaces addrsize (jmp only) */
+#define MOD_DOpS64R 9   /* Parameter replaces default 64-bit opersize */
+#define MOD_Op1AddSp 10 /* Parameter is added as "spare" to opcode byte 2 */
 
-/* Modifiers that aren't: these are used with the GAS parser to indicate
- * special cases.
- */
-#define MOD_GasOnly     (1UL<<13)       /* Only available in GAS mode */
-#define MOD_GasIllegal  (1UL<<14)       /* Illegal in GAS mode */
-#define MOD_GasNoRev    (1UL<<15)       /* Don't reverse operands */
-#define MOD_GasSufB     (1UL<<16)       /* GAS B suffix ok */
-#define MOD_GasSufW     (1UL<<17)       /* GAS W suffix ok */
-#define MOD_GasSufL     (1UL<<18)       /* GAS L suffix ok */
-#define MOD_GasSufQ     (1UL<<19)       /* GAS Q suffix ok */
-#define MOD_GasSufS     (1UL<<20)       /* GAS S suffix ok */
-#define MOD_GasSuf_SHIFT 16
-#define MOD_GasSuf_MASK (0x1FUL<<16)
+/* GAS suffix flags for instructions */
+enum x86_gas_suffix_flags {
+    NONE = 0,
+    SUF_B = 1<<0,
+    SUF_W = 1<<1,
+    SUF_L = 1<<2,
+    SUF_Q = 1<<3,
+    SUF_S = 1<<4,
+    SUF_MASK = SUF_B|SUF_W|SUF_L|SUF_Q|SUF_S,
 
-/* Operand types.  These are more detailed than the "general" types for all
- * architectures, as they include the size, for instance.
- * Bit Breakdown (from LSB to MSB):
- *  - 5 bits = general type (must be exact match, except for =3):
- *             0 = immediate
- *             1 = any general purpose or FPU register
- *             2 = memory
- *             3 = any general purpose or FPU register OR memory
- *             4 = any MMX or XMM register
- *             5 = any MMX or XMM register OR memory
- *             6 = any segment register
- *             7 = any CR register
- *             8 = any DR register
- *             9 = any TR register
- *             A = ST0
- *             B = AL/AX/EAX/RAX (depending on size)
- *             C = CL/CX/ECX/RCX (depending on size)
- *             D = DL/DX/EDX/RDX (depending on size)
- *             E = CS
- *             F = DS
- *             10 = ES
- *             11 = FS
- *             12 = GS
- *             13 = SS
- *             14 = CR4
- *             15 = memory offset (an EA, but with no registers allowed)
- *                  [special case for MOV opcode]
- *             16 = immediate, value=1 (for special-case shift)
- *             17 = immediate, does not contain SEG:OFF (for jmp/call),
- *             18 = XMM0
- *             19 = AX/EAX/RAX memory operand only (EA)
- *                  [special case for SVM opcodes]
- *             20 = EAX memory operand only (EA)
- *                  [special case for SVM skinit opcode]
- *  - 3 bits = size (user-specified, or from register size):
- *             0 = any size acceptable/no size spec acceptable (dep. on strict)
- *             1/2/3/4 = 8/16/32/64 bits (from user or reg size)
- *             5/6 = 80/128 bits (from user)
- *             7 = current BITS setting; when this is used the size matched
- *                 gets stored into the opersize as well.
- *  - 1 bit = size implicit or explicit ("strictness" of size matching on
- *            non-registers -- registers are always strictly matched):
- *            0 = user size must exactly match size above.
- *            1 = user size either unspecified or exactly match size above.
- *  - 3 bits = target modification.
- *            0 = no target mod acceptable
- *            1 = NEAR
- *            2 = SHORT
- *            3 = FAR (or SEG:OFF immediate)
- *            4 = TO
- *  - 1 bit = effective address size
- *            0 = any address size allowed except for 64-bit
- *            1 = only 64-bit address size allowed
- *
- * MSBs than the above are actions: what to do with the operand if the
- * instruction matches.  Essentially describes what part of the output bytecode
- * gets the operand.  This may require conversion (e.g. a register going into
- * an ea field).  Naturally, only one of each of these may be contained in the
- * operands of a single insn_info structure.
- *  - 4 bits = action:
- *             0 = does nothing (operand data is discarded)
- *             1 = operand data goes into ea field
- *             2 = operand data goes into imm field
- *             3 = operand data goes into sign-extended imm field
- *             4 = operand data goes into "spare" field
- *             5 = operand data is added to opcode byte 0
- *             6 = operand data is added to opcode byte 1
- *             7 = operand data goes into BOTH ea and spare
- *                 [special case for imul opcode]
- *             8 = relative jump (outputs a jmp instead of normal insn)
- *             9 = operand size goes into address size (jmp only)
- *             A = far jump (outputs a farjmp instead of normal insn)
- *             B = ea operand only sets address size (no actual ea field)
- * The below describes postponed actions: actions which can't be completed at
- * parse-time due to possibly dependent expressions.  For these, some
- * additional data (stored in the second byte of the opcode with a one-byte
- * opcode) is passed to later stages of the assembler with flags set to
- * indicate postponed actions.
- *  - 3 bits = postponed action:
- *             0 = none
- *             1 = sign-extended imm8 that could expand to a large imm16/32
- *             2 = could become a short opcode mov with bits=64 and a32 prefix
- *             3 = forced 16-bit address size (override ignored, no prefix)
- *             4 = large imm64 that can become a sign-extended imm32.
- */
-#define OPT_Imm         0x0
-#define OPT_Reg         0x1
-#define OPT_Mem         0x2
-#define OPT_RM          0x3
-#define OPT_SIMDReg     0x4
-#define OPT_SIMDRM      0x5
-#define OPT_SegReg      0x6
-#define OPT_CRReg       0x7
-#define OPT_DRReg       0x8
-#define OPT_TRReg       0x9
-#define OPT_ST0         0xA
-#define OPT_Areg        0xB
-#define OPT_Creg        0xC
-#define OPT_Dreg        0xD
-#define OPT_CS          0xE
-#define OPT_DS          0xF
-#define OPT_ES          0x10
-#define OPT_FS          0x11
-#define OPT_GS          0x12
-#define OPT_SS          0x13
-#define OPT_CR4         0x14
-#define OPT_MemOffs     0x15
-#define OPT_Imm1        0x16
-#define OPT_ImmNotSegOff 0x17
-#define OPT_XMM0        0x18
-#define OPT_MemrAX      0x19
-#define OPT_MemEAX      0x1A
-#define OPT_MASK        0x1F
+    /* Flags only used in x86_insn_info */
+    GAS_ONLY = 1<<5,        /* Only available in GAS mode */
+    GAS_ILLEGAL = 1<<6,     /* Illegal in GAS mode */
+    GAS_NO_REV = 1<<7,      /* Don't reverse operands in GAS mode */
 
-#define OPS_Any         (0UL<<5)
-#define OPS_8           (1UL<<5)
-#define OPS_16          (2UL<<5)
-#define OPS_32          (3UL<<5)
-#define OPS_64          (4UL<<5)
-#define OPS_80          (5UL<<5)
-#define OPS_128         (6UL<<5)
-#define OPS_BITS        (7UL<<5)
-#define OPS_MASK        (7UL<<5)
-#define OPS_SHIFT       5
+    /* Flags only used in insnprefix_parse_data */
+    WEAK = 1<<5             /* Relaxed operand mode for GAS */
+};
 
-#define OPS_Relaxed     (1UL<<8)
-#define OPS_RMASK       (1UL<<8)
+enum x86_operand_type {
+    OPT_Imm = 0,        /* immediate */
+    OPT_Reg = 1,        /* any general purpose or FPU register */
+    OPT_Mem = 2,        /* memory */
+    OPT_RM = 3,         /* any general purpose or FPU register OR memory */
+    OPT_SIMDReg = 4,    /* any MMX or XMM register */
+    OPT_SIMDRM = 5,     /* any MMX or XMM register OR memory */
+    OPT_SegReg = 6,     /* any segment register */
+    OPT_CRReg = 7,      /* any CR register */
+    OPT_DRReg = 8,      /* any DR register */
+    OPT_TRReg = 9,      /* any TR register */
+    OPT_ST0 = 10,       /* ST0 */
+    OPT_Areg = 11,      /* AL/AX/EAX/RAX (depending on size) */
+    OPT_Creg = 12,      /* CL/CX/ECX/RCX (depending on size) */
+    OPT_Dreg = 13,      /* DL/DX/EDX/RDX (depending on size) */
+    OPT_CS = 14,        /* CS */
+    OPT_DS = 15,        /* DS */
+    OPT_ES = 16,        /* ES */
+    OPT_FS = 17,        /* FS */
+    OPT_GS = 18,        /* GS */
+    OPT_SS = 19,        /* SS */
+    OPT_CR4 = 20,       /* CR4 */
+    /* memory offset (an EA, but with no registers allowed)
+     * [special case for MOV opcode]
+     */
+    OPT_MemOffs = 21,
+    OPT_Imm1 = 22,      /* immediate, value=1 (for special-case shift) */
+    /* immediate, does not contain SEG:OFF (for jmp/call) */
+    OPT_ImmNotSegOff = 23,
+    OPT_XMM0 = 24,      /* XMM0 */
+    /* AX/EAX/RAX memory operand only (EA) [special case for SVM opcodes]
+     */
+    OPT_MemrAX = 25,
+    /* EAX memory operand only (EA) [special case for SVM skinit opcode] */
+    OPT_MemEAX = 26,
+    /* SIMDReg with value equal to operand 0 SIMDReg */
+    OPT_SIMDRegMatch0 = 27
+};
 
-#define OPEAS_Not64     (0UL<<9)
-#define OPEAS_64        (1UL<<9)
-#define OPEAS_MASK      (1UL<<9)
+enum x86_operand_size {
+    /* any size acceptable/no size spec acceptable (dep. on strict) */
+    OPS_Any = 0,
+    /* 8/16/32/64 bits (from user or reg size) */
+    OPS_8 = 1,
+    OPS_16 = 2,
+    OPS_32 = 3,
+    OPS_64 = 4,
+    /* 80/128 bits (from user) */
+    OPS_80 = 5,
+    OPS_128 = 6,
+    /* current BITS setting; when this is used the size matched
+     * gets stored into the opersize as well.
+     */
+    OPS_BITS = 7
+};
 
-#define OPTM_None       (0UL<<10)
-#define OPTM_Near       (1UL<<10)
-#define OPTM_Short      (2UL<<10)
-#define OPTM_Far        (3UL<<10)
-#define OPTM_To         (4UL<<10)
-#define OPTM_MASK       (7UL<<10)
+enum x86_operand_targetmod {
+    OPTM_None = 0,  /* no target mod acceptable */
+    OPTM_Near = 1,  /* NEAR */
+    OPTM_Short = 2, /* SHORT */
+    OPTM_Far = 3,   /* FAR (or SEG:OFF immediate) */
+    OPTM_To = 4     /* TO */
+};
 
-#define OPA_None        (0UL<<13)
-#define OPA_EA          (1UL<<13)
-#define OPA_Imm         (2UL<<13)
-#define OPA_SImm        (3UL<<13)
-#define OPA_Spare       (4UL<<13)
-#define OPA_Op0Add      (5UL<<13)
-#define OPA_Op1Add      (6UL<<13)
-#define OPA_SpareEA     (7UL<<13)
-#define OPA_JmpRel      (8UL<<13)
-#define OPA_AdSizeR     (9UL<<13)
-#define OPA_JmpFar      (0xAUL<<13)
-#define OPA_AdSizeEA    (0xBUL<<13)
-#define OPA_MASK        (0xFUL<<13)
+enum x86_operand_action {
+    OPA_None = 0,   /* does nothing (operand data is discarded) */
+    OPA_EA = 1,     /* operand data goes into ea field */
+    OPA_Imm = 2,    /* operand data goes into imm field */
+    OPA_SImm = 3,   /* operand data goes into sign-extended imm field */
+    OPA_Spare = 4,  /* operand data goes into "spare" field */
+    OPA_Op0Add = 5, /* operand data is added to opcode byte 0 */
+    OPA_Op1Add = 6, /* operand data is added to opcode byte 1 */
+    /* operand data goes into BOTH ea and spare
+     * (special case for imul opcode)
+     */
+    OPA_SpareEA = 7,
+    /* relative jump (outputs a jmp instead of normal insn) */
+    OPA_JmpRel = 8,
+    /* operand size goes into address size (jmp only) */
+    OPA_AdSizeR = 9,
+    /* far jump (outputs a farjmp instead of normal insn) */
+    OPA_JmpFar = 10,
+    /* ea operand only sets address size (no actual ea field) */
+    OPA_AdSizeEA = 11,
+    OPA_DREX = 12   /* operand data goes into DREX "dest" field */
+};
 
-#define OPAP_None       (0UL<<17)
-#define OPAP_SImm8      (1UL<<17)
-#define OPAP_ShortMov   (2UL<<17)
-#define OPAP_A16        (3UL<<17)
-#define OPAP_SImm32Avail (4UL<<17)
-#define OPAP_MASK       (7UL<<17)
+enum x86_operand_post_action {
+    OPAP_None = 0,
+    /* sign-extended imm8 that could expand to a large imm16/32 */
+    OPAP_SImm8 = 1,
+    /* could become a short opcode mov with bits=64 and a32 prefix */
+    OPAP_ShortMov = 2,
+    /* forced 16-bit address size (override ignored, no prefix) */
+    OPAP_A16 = 3,
+    /* large imm64 that can become a sign-extended imm32 */
+    OPAP_SImm32Avail = 4
+};
+
+typedef struct x86_info_operand {
+    /* Operand types.  These are more detailed than the "general" types for all
+     * architectures, as they include the size, for instance.
+     */
+
+    /* general type (must be exact match, except for RM types): */
+    unsigned int type:5;
+
+    /* size (user-specified, or from register size) */
+    unsigned int size:3;
+
+    /* size implicit or explicit ("strictness" of size matching on
+     * non-registers -- registers are always strictly matched):
+     * 0 = user size must exactly match size above.
+     * 1 = user size either unspecified or exactly match size above.
+     */
+    unsigned int relaxed:1;
+
+    /* effective address size
+     * 0 = any address size allowed except for 64-bit
+     * 1 = only 64-bit address size allowed
+     */
+    unsigned int eas64:1;
+
+    /* target modification */
+    unsigned int targetmod:3;
+
+    /* Actions: what to do with the operand if the instruction matches.
+     * Essentially describes what part of the output bytecode gets the
+     * operand.  This may require conversion (e.g. a register going into
+     * an ea field).  Naturally, only one of each of these may be contained
+     * in the operands of a single insn_info structure.
+     */
+    unsigned int action:4;
+
+    /* Postponed actions: actions which can't be completed at
+     * parse-time due to possibly dependent expressions.  For these, some
+     * additional data (stored in the second byte of the opcode with a
+     * one-byte opcode) is passed to later stages of the assembler with
+     * flags set to indicate postponed actions.
+     */
+    unsigned int post_action:3;
+} x86_info_operand;
 
 typedef struct x86_insn_info {
+    /* GAS suffix flags */
+    unsigned int gas_flags:8;      /* Enabled for these GAS suffixes */
+
     /* The CPU feature flags needed to execute this instruction.  This is OR'ed
      * with arch-specific data[2].  This combined value is compared with
      * cpu_enabled to see if all bits set here are set in cpu_enabled--if so,
      * the instruction is available on this CPU.
      */
-    unsigned long cpu;
+    unsigned int cpu0:8;
+    unsigned int cpu1:8;
+    unsigned int cpu2:8;
 
     /* Opcode modifiers for variations of instruction.  As each modifier reads
      * its parameter in LSB->MSB order from the arch-specific data[1] from the
@@ -242,7 +231,7 @@
      * count of insn_info structures in the instruction grouping, there can
      * only be a maximum of 3 modifiers.
      */
-    unsigned long modifiers;
+    unsigned char modifiers[3];
 
     /* Operand Size */
     unsigned char opersize;
@@ -258,6 +247,14 @@
      */
     unsigned char special_prefix;
 
+    /* The DREX base byte value (almost).  The only bit kept from this
+     * value is the OC0 bit (0x08).  The MSB (0x80) of this value indicates
+     * if the DREX byte needs to be present in the instruction.
+     */
+#define NEED_DREX_MASK 0x80
+#define DREX_OC0_MASK 0x08
+    unsigned char drex_oc0;
+
     /* The length of the basic opcode */
     unsigned char opcode_len;
 
@@ -270,10 +267,12 @@
     unsigned char spare;
 
     /* The number of operands this form of the instruction takes */
-    unsigned char num_operands;
+    unsigned int num_operands:4;
 
-    /* The types of each operand, see above */
-    unsigned long operands[3];
+    /* The index into the insn_operands array which contains the type of each
+     * operand, see above
+     */
+    unsigned int operands_index:12;
 } x86_insn_info;
 
 typedef struct x86_id_insn {
@@ -283,10 +282,10 @@
     /*@null@*/ const x86_insn_info *group;
 
     /* CPU feature flags enabled at the time of parsing the instruction */
-    unsigned long cpu_enabled;
+    wordptr cpu_enabled;
 
     /* Modifier data */
-    unsigned long mod_data;
+    unsigned char mod_data[3];
 
     /* Number of elements in the instruction parse group */
     unsigned int num_info:8;
@@ -302,6 +301,9 @@
 
     /* Strict forced setting at the time of parsing the instruction */
     unsigned int force_strict:1;
+
+    /* Default rel setting at the time of parsing the instruction */
+    unsigned int default_rel:1;
 } x86_id_insn;
 
 static void x86_id_insn_destroy(void *contents);
@@ -318,2027 +320,7 @@
     YASM_BC_SPECIAL_INSN
 };
 
-/*
- * General instruction groupings
- */
-
-/* Empty instruction */
-static const x86_insn_info empty_insn[] = {
-    { CPU_Any, 0, 0, 0, 0, 0, {0, 0, 0}, 0, 0, {0, 0, 0} }
-};
-
-/* Placeholder for instructions invalid in 64-bit mode */
-static const x86_insn_info not64_insn[] = {
-    { CPU_Not64, 0, 0, 0, 0, 0, {0, 0, 0}, 0, 0, {0, 0, 0} }
-};
-
-/* One byte opcode instructions with no operands */
-static const x86_insn_info onebyte_insn[] = {
-    { CPU_Any, MOD_Op0Add|MOD_OpSizeR|MOD_DOpS64R, 0, 0, 0, 1, {0, 0, 0}, 0, 0,
-      {0, 0, 0} }
-};
-
-/* One byte opcode instructions with "special" prefix with no operands */
-static const x86_insn_info onebyte_prefix_insn[] = {
-    { CPU_Any, MOD_Op0Add|MOD_PreAdd, 0, 0, 0x00, 1, {0x00, 0, 0}, 0, 0,
-      {0, 0, 0} }
-};
-
-/* Two byte opcode instructions with no operands */
-static const x86_insn_info twobyte_insn[] = {
-    { CPU_Any, MOD_Op1Add|MOD_Op0Add|MOD_GasSufL|MOD_GasSufQ, 0, 0, 0, 2,
-      {0, 0, 0}, 0, 0, {0, 0, 0} }
-};
-
-/* Three byte opcode instructions with no operands */
-static const x86_insn_info threebyte_insn[] = {
-    { CPU_Any, MOD_Op2Add|MOD_Op1Add|MOD_Op0Add, 0, 0, 0, 3, {0, 0, 0}, 0, 0,
-      {0, 0, 0} }
-};
-
-/* One byte opcode instructions with general memory operand */
-static const x86_insn_info onebytemem_insn[] = {
-    { CPU_Any, MOD_Op0Add|MOD_SpAdd|MOD_GasSufL|MOD_GasSufQ|MOD_GasSufS,
-      0, 0, 0, 1, {0, 0, 0}, 0, 1, {OPT_Mem|OPS_Any|OPA_EA, 0, 0} }
-};
-
-/* Two byte opcode instructions with general memory operand */
-static const x86_insn_info twobytemem_insn[] = {
-    { CPU_Any,
-      MOD_Op1Add|MOD_Op0Add|MOD_SpAdd|MOD_GasSufL|MOD_GasSufQ|MOD_GasSufS,
-      0, 0, 0, 2, {0, 0, 0}, 0, 1, {OPT_Mem|OPS_Any|OPA_EA, 0, 0} }
-};
-
-/* P4 VMX Instructions */
-static const x86_insn_info vmxmemrd_insn[] = {
-    { CPU_Not64, MOD_Op1Add|MOD_Op0Add|MOD_GasSufL, 32, 0, 0, 2, {0, 0, 0}, 0,
-      2, {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
-    { CPU_64, MOD_Op1Add|MOD_Op0Add|MOD_GasSufQ, 64, 64, 0, 2, {0, 0, 0}, 0,
-      2, {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_64|OPA_Spare, 0} }
-};
-static const x86_insn_info vmxmemwr_insn[] = {
-    { CPU_Not64, MOD_Op1Add|MOD_Op0Add|MOD_GasSufL, 32, 0, 0, 2, {0, 0, 0}, 0,
-      2, {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_64, MOD_Op1Add|MOD_Op0Add|MOD_GasSufQ, 64, 64, 0, 2, {0, 0, 0}, 0,
-      2, {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} }
-};
-static const x86_insn_info vmxtwobytemem_insn[] = {
-    { CPU_Any, MOD_SpAdd|MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0, 0}, 0, 1,
-      {OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-static const x86_insn_info vmxthreebytemem_insn[] = {
-    { CPU_Any, MOD_SpAdd|MOD_PreAdd|MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0, 0}, 0, 1,
-      {OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-
-/* Move instructions */
-static const x86_insn_info mov_insn[] = {
-    /* Absolute forms for non-64-bit mode */
-    { CPU_Not64, MOD_GasSufB, 0, 0, 0, 1, {0xA0, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_8|OPA_None, OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_Not64, MOD_GasSufW, 16, 0, 0, 1, {0xA1, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_16|OPA_None, OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386|CPU_Not64, MOD_GasSufL, 32, 0, 0, 1, {0xA1, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_32|OPA_None, OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-
-    { CPU_Not64, MOD_GasSufB, 0, 0, 0, 1, {0xA2, 0, 0}, 0, 2,
-      {OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_8|OPA_None, 0} },
-    { CPU_Not64, MOD_GasSufW, 16, 0, 0, 1, {0xA3, 0, 0}, 0, 2,
-      {OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_16|OPA_None, 0} },
-    { CPU_386|CPU_Not64, MOD_GasSufL, 32, 0, 0, 1, {0xA3, 0, 0}, 0, 2,
-      {OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_32|OPA_None, 0} },
-
-    /* 64-bit absolute forms for 64-bit mode.  Disabled for GAS, see movabs */
-    { CPU_Hammer|CPU_64, 0, 0, 0, 0, 1, {0xA0, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_8|OPA_None,
-       OPT_MemOffs|OPS_8|OPS_Relaxed|OPEAS_64|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, 0, 16, 0, 0, 1, {0xA1, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_16|OPA_None,
-       OPT_MemOffs|OPS_16|OPS_Relaxed|OPEAS_64|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, 0, 32, 0, 0, 1, {0xA1, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_32|OPA_None,
-       OPT_MemOffs|OPS_32|OPS_Relaxed|OPEAS_64|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, 0, 64, 0, 0, 1, {0xA1, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_64|OPA_None,
-       OPT_MemOffs|OPS_64|OPS_Relaxed|OPEAS_64|OPA_EA, 0} },
-
-    { CPU_Hammer|CPU_64, 0, 0, 0, 0, 1, {0xA2, 0, 0}, 0, 2,
-      {OPT_MemOffs|OPS_8|OPS_Relaxed|OPEAS_64|OPA_EA,
-       OPT_Areg|OPS_8|OPA_None, 0} },
-    { CPU_Hammer|CPU_64, 0, 16, 0, 0, 1, {0xA3, 0, 0}, 0, 2,
-      {OPT_MemOffs|OPS_16|OPS_Relaxed|OPEAS_64|OPA_EA,
-       OPT_Areg|OPS_16|OPA_None, 0} },
-    { CPU_Hammer|CPU_64, 0, 32, 0, 0, 1, {0xA3, 0, 0}, 0, 2,
-      {OPT_MemOffs|OPS_32|OPS_Relaxed|OPEAS_64|OPA_EA,
-       OPT_Areg|OPS_32|OPA_None, 0} },
-    { CPU_Hammer|CPU_64, 0, 64, 0, 0, 1, {0xA3, 0, 0}, 0, 2,
-      {OPT_MemOffs|OPS_64|OPS_Relaxed|OPEAS_64|OPA_EA,
-       OPT_Areg|OPS_64|OPA_None, 0} },
-
-    /* General 32-bit forms using Areg / short absolute option */
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0x88, 0xA2, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA|OPAP_ShortMov, OPT_Areg|OPS_8|OPA_Spare,
-       0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0x89, 0xA3, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA|OPAP_ShortMov,
-       OPT_Areg|OPS_16|OPA_Spare, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x89, 0xA3, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA|OPAP_ShortMov,
-       OPT_Areg|OPS_32|OPA_Spare, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x89, 0xA3, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA|OPAP_ShortMov,
-       OPT_Areg|OPS_64|OPA_Spare, 0} },
-
-    /* General 32-bit forms */
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0x88, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_8|OPA_Spare, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0x89, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x89, 0, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x89, 0, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_64|OPA_Spare, 0} },
-
-    /* General 32-bit forms using Areg / short absolute option */
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0x8A, 0xA0, 0}, 0, 2,
-      {OPT_Areg|OPS_8|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA|OPAP_ShortMov,
-       0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0x8B, 0xA1, 0}, 0, 2,
-      {OPT_Areg|OPS_16|OPA_Spare,
-       OPT_RM|OPS_16|OPS_Relaxed|OPA_EA|OPAP_ShortMov, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x8B, 0xA1, 0}, 0, 2,
-      {OPT_Areg|OPS_32|OPA_Spare,
-       OPT_RM|OPS_32|OPS_Relaxed|OPA_EA|OPAP_ShortMov, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x8B, 0xA1, 0}, 0, 2,
-      {OPT_Areg|OPS_64|OPA_Spare,
-       OPT_RM|OPS_64|OPS_Relaxed|OPA_EA|OPAP_ShortMov, 0} },
-
-    /* General 32-bit forms */
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0x8A, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_8|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0x8B, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x8B, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x8B, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} },
-
-    /* Segment register forms */
-    { CPU_Any, MOD_GasSufW, 0, 0, 0, 1, {0x8C, 0, 0}, 0, 2,
-      {OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA,
-       OPT_SegReg|OPS_16|OPS_Relaxed|OPA_Spare, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0x8C, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_EA, OPT_SegReg|OPS_16|OPS_Relaxed|OPA_Spare, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x8C, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_EA, OPT_SegReg|OPS_16|OPS_Relaxed|OPA_Spare, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x8C, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_EA, OPT_SegReg|OPS_16|OPS_Relaxed|OPA_Spare, 0} },
-
-    { CPU_Any, MOD_GasSufW, 0, 0, 0, 1, {0x8E, 0, 0}, 0, 2,
-      {OPT_SegReg|OPS_16|OPS_Relaxed|OPA_Spare,
-       OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386, MOD_GasSufL, 0, 0, 0, 1, {0x8E, 0, 0}, 0, 2,
-      {OPT_SegReg|OPS_16|OPS_Relaxed|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 0, 0, 0, 1, {0x8E, 0, 0}, 0, 2,
-      {OPT_SegReg|OPS_16|OPS_Relaxed|OPA_Spare, OPT_Reg|OPS_64|OPA_EA, 0} },
-
-    /* Immediate forms */
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0xB0, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_8|OPA_Op0Add, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0xB8, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Op0Add, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0xB8, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Op0Add, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
-    /* 64-bit forced size form */
-    { CPU_Hammer|CPU_64, MOD_GasIllegal, 64, 0, 0, 1, {0xB8, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Op0Add, OPT_Imm|OPS_64|OPA_Imm, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0xB8, 0xC7, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Op0Add,
-       OPT_Imm|OPS_64|OPS_Relaxed|OPA_Imm|OPAP_SImm32Avail, 0} },
-    /* Need two sets here, one for strictness on left side, one for right. */
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0xC6, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPA_Imm, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0xC7, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_16|OPA_Imm, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0xC7, 0, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_32|OPA_Imm, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0xC7, 0, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_32|OPA_Imm, 0} },
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0xC6, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0xC7, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPA_EA, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0xC7, 0, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPA_EA, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0xC7, 0, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPA_EA, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
-
-    /* CR/DR forms */
-    { CPU_586|CPU_Priv|CPU_Not64, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0x22, 0}, 0,
-      2, {OPT_CR4|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
-    { CPU_386|CPU_Priv|CPU_Not64, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0x22, 0}, 0,
-      2, {OPT_CRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
-    { CPU_Hammer|CPU_Priv|CPU_64, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0x22, 0}, 0,
-      2, {OPT_CRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_64|OPA_EA, 0} },
-    { CPU_586|CPU_Priv|CPU_Not64, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0x20, 0}, 0,
-      2, {OPT_Reg|OPS_32|OPA_EA, OPT_CR4|OPS_32|OPA_Spare, 0} },
-    { CPU_386|CPU_Priv|CPU_Not64, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0x20, 0}, 0,
-      2, {OPT_Reg|OPS_32|OPA_EA, OPT_CRReg|OPS_32|OPA_Spare, 0} },
-    { CPU_Hammer|CPU_Priv|CPU_64, MOD_GasSufQ, 0, 0, 0, 2, {0x0F, 0x20, 0}, 0,
-      2, {OPT_Reg|OPS_64|OPA_EA, OPT_CRReg|OPS_32|OPA_Spare, 0} },
-
-    { CPU_386|CPU_Priv|CPU_Not64, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0x23, 0}, 0,
-      2, {OPT_DRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
-    { CPU_Hammer|CPU_Priv|CPU_64, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0x23, 0}, 0,
-      2, {OPT_DRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_64|OPA_EA, 0} },
-    { CPU_386|CPU_Priv|CPU_Not64, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0x21, 0}, 0,
-      2, {OPT_Reg|OPS_32|OPA_EA, OPT_DRReg|OPS_32|OPA_Spare, 0} },
-    { CPU_Hammer|CPU_Priv|CPU_64, MOD_GasSufQ, 0, 0, 0, 2, {0x0F, 0x21, 0}, 0,
-      2, {OPT_Reg|OPS_64|OPA_EA, OPT_DRReg|OPS_32|OPA_Spare, 0} },
-
-    /* MMX/SSE2 forms for GAS parser (copied from movq_insn) */
-    { CPU_MMX, MOD_GasOnly|MOD_GasSufQ, 0, 0, 0, 2, {0x0F, 0x6F, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    },
-    { CPU_MMX|CPU_Hammer|CPU_64, MOD_GasOnly|MOD_GasSufQ, 64, 0, 0, 2,
-      {0x0F, 0x6E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_MMX, MOD_GasOnly|MOD_GasSufQ, 0, 0, 0, 2, {0x0F, 0x7F, 0}, 0, 2,
-      {OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_64|OPA_Spare, 0}
-    },
-    { CPU_MMX|CPU_Hammer|CPU_64, MOD_GasOnly|MOD_GasSufQ, 64, 0, 0, 2,
-      {0x0F, 0x7E, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_64|OPA_Spare, 0} },
-    { CPU_SSE2, MOD_GasOnly|MOD_GasSufQ, 0, 0, 0xF3, 2, {0x0F, 0x7E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} },
-    { CPU_SSE2, MOD_GasOnly|MOD_GasSufQ, 0, 0, 0xF3, 2, {0x0F, 0x7E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    },
-    { CPU_SSE2|CPU_Hammer|CPU_64, MOD_GasOnly|MOD_GasSufQ, 64, 0, 0x66, 2,
-      {0x0F, 0x6E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE2, MOD_GasOnly|MOD_GasSufQ, 0, 0, 0x66, 2, {0x0F, 0xD6, 0}, 0, 2,
-      {OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0}
-    },
-    { CPU_SSE2|CPU_Hammer|CPU_64, MOD_GasOnly|MOD_GasSufQ, 64, 0, 0x66, 2,
-      {0x0F, 0x7E, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0} }
-};
-
-/* 64-bit absolute move (for GAS).
- * These are disabled for GAS for normal mov above.
- */
-static const x86_insn_info movabs_insn[] = {
-    { CPU_Hammer|CPU_64, MOD_GasSufB, 0, 0, 0, 1, {0xA0, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_8|OPA_None,
-       OPT_MemOffs|OPS_8|OPS_Relaxed|OPEAS_64|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufW, 16, 0, 0, 1, {0xA1, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_16|OPA_None,
-       OPT_MemOffs|OPS_16|OPS_Relaxed|OPEAS_64|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufL, 32, 0, 0, 1, {0xA1, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_32|OPA_None,
-       OPT_MemOffs|OPS_32|OPS_Relaxed|OPEAS_64|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0xA1, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_64|OPA_None,
-       OPT_MemOffs|OPS_64|OPS_Relaxed|OPEAS_64|OPA_EA, 0} },
-
-    { CPU_Hammer|CPU_64, MOD_GasSufB, 0, 0, 0, 1, {0xA2, 0, 0}, 0, 2,
-      {OPT_MemOffs|OPS_8|OPS_Relaxed|OPEAS_64|OPA_EA,
-       OPT_Areg|OPS_8|OPA_None, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufW, 16, 0, 0, 1, {0xA3, 0, 0}, 0, 2,
-      {OPT_MemOffs|OPS_16|OPS_Relaxed|OPEAS_64|OPA_EA,
-       OPT_Areg|OPS_16|OPA_None, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufL, 32, 0, 0, 1, {0xA3, 0, 0}, 0, 2,
-      {OPT_MemOffs|OPS_32|OPS_Relaxed|OPEAS_64|OPA_EA,
-       OPT_Areg|OPS_32|OPA_None, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0xA3, 0, 0}, 0, 2,
-      {OPT_MemOffs|OPS_64|OPS_Relaxed|OPEAS_64|OPA_EA,
-       OPT_Areg|OPS_64|OPA_None, 0} },
-
-    /* 64-bit immediate form */
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0xB8, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Op0Add, OPT_Imm|OPS_64|OPS_Relaxed|OPA_Imm, 0} },
-};
-
-/* Move with sign/zero extend */
-static const x86_insn_info movszx_insn[] = {
-    { CPU_386, MOD_Op1Add|MOD_GasSufB, 16, 0, 0, 2, {0x0F, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386, MOD_Op1Add|MOD_GasSufB, 32, 0, 0, 2, {0x0F, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_8|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op1Add|MOD_GasSufB, 64, 0, 0, 2, {0x0F, 0, 0}, 0,
-      2, {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_8|OPA_EA, 0} },
-    { CPU_386, MOD_Op1Add|MOD_GasSufW, 32, 0, 0, 2, {0x0F, 1, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_16|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op1Add|MOD_GasSufW, 64, 0, 0, 2, {0x0F, 1, 0}, 0,
-      2, {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_16|OPA_EA, 0} }
-};
-
-/* Move with sign-extend doubleword (64-bit mode only) */
-static const x86_insn_info movsxd_insn[] = {
-    { CPU_Hammer|CPU_64, MOD_GasSufL, 64, 0, 0, 1, {0x63, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_32|OPA_EA, 0} }
-};
-
-/* Push instructions */
-static const x86_insn_info push_insn[] = {
-    { CPU_Any, MOD_GasSufW, 16, 64, 0, 1, {0x50, 0, 0}, 0, 1,
-      {OPT_Reg|OPS_16|OPA_Op0Add, 0, 0} },
-    { CPU_386|CPU_Not64, MOD_GasSufL, 32, 0, 0, 1, {0x50, 0, 0}, 0, 1,
-      {OPT_Reg|OPS_32|OPA_Op0Add, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 0, 64, 0, 1, {0x50, 0, 0}, 0, 1,
-      {OPT_Reg|OPS_64|OPA_Op0Add, 0, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 64, 0, 1, {0xFF, 0, 0}, 6, 1,
-      {OPT_RM|OPS_16|OPA_EA, 0, 0} },
-    { CPU_386|CPU_Not64, MOD_GasSufL, 32, 0, 0, 1, {0xFF, 0, 0}, 6, 1,
-      {OPT_RM|OPS_32|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 0, 64, 0, 1, {0xFF, 0, 0}, 6, 1,
-      {OPT_RM|OPS_64|OPA_EA, 0, 0} },
-    { CPU_186, MOD_GasIllegal, 0, 64, 0, 1, {0x6A, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_8|OPA_SImm, 0, 0} },
-    { CPU_186, MOD_GasOnly|MOD_GasSufB, 0, 64, 0, 1, {0x6A, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_8|OPS_Relaxed|OPA_SImm, 0, 0} },
-    { CPU_186, MOD_GasOnly|MOD_GasSufW, 16, 64, 0, 1, {0x6A, 0x68, 0}, 0, 1,
-      {OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm|OPAP_SImm8, 0, 0} },
-    { CPU_386|CPU_Not64, MOD_GasOnly|MOD_GasSufL, 32, 0, 0, 1,
-      {0x6A, 0x68, 0}, 0, 1,
-      {OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm|OPAP_SImm8, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 64, 0, 1,
-      {0x6A, 0x68, 0}, 0, 1,
-      {OPT_Imm|OPS_32|OPS_Relaxed|OPA_SImm|OPAP_SImm8, 0, 0} },
-    { CPU_186|CPU_Not64, MOD_GasIllegal, 0, 0, 0, 1,
-      {0x6A, 0x68, 0}, 0, 1,
-      {OPT_Imm|OPS_BITS|OPS_Relaxed|OPA_Imm|OPAP_SImm8, 0, 0} },
-    /* Need these when we don't match the BITS size, but they need to be
-     * below the above line so the optimizer can kick in by default.
-     */
-    { CPU_186, MOD_GasIllegal, 16, 64, 0, 1, {0x68, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_16|OPA_Imm, 0, 0} },
-    { CPU_386|CPU_Not64, MOD_GasIllegal, 32, 0, 0, 1, {0x68, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_32|OPA_Imm, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasIllegal, 64, 64, 0, 1, {0x68, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_32|OPA_SImm, 0, 0} },
-    { CPU_Not64, 0, 0, 0, 0, 1, {0x0E, 0, 0}, 0, 1,
-      {OPT_CS|OPS_Any|OPA_None, 0, 0} },
-    { CPU_Not64, MOD_GasSufW, 16, 0, 0, 1, {0x0E, 0, 0}, 0, 1,
-      {OPT_CS|OPS_16|OPA_None, 0, 0} },
-    { CPU_Not64, MOD_GasSufL, 32, 0, 0, 1, {0x0E, 0, 0}, 0, 1,
-      {OPT_CS|OPS_32|OPA_None, 0, 0} },
-    { CPU_Not64, 0, 0, 0, 0, 1, {0x16, 0, 0}, 0, 1,
-      {OPT_SS|OPS_Any|OPA_None, 0, 0} },
-    { CPU_Not64, MOD_GasSufW, 16, 0, 0, 1, {0x16, 0, 0}, 0, 1,
-      {OPT_SS|OPS_16|OPA_None, 0, 0} },
-    { CPU_Not64, MOD_GasSufL, 32, 0, 0, 1, {0x16, 0, 0}, 0, 1,
-      {OPT_SS|OPS_32|OPA_None, 0, 0} },
-    { CPU_Not64, 0, 0, 0, 0, 1, {0x1E, 0, 0}, 0, 1,
-      {OPT_DS|OPS_Any|OPA_None, 0, 0} },
-    { CPU_Not64, MOD_GasSufW, 16, 0, 0, 1, {0x1E, 0, 0}, 0, 1,
-      {OPT_DS|OPS_16|OPA_None, 0, 0} },
-    { CPU_Not64, MOD_GasSufL, 32, 0, 0, 1, {0x1E, 0, 0}, 0, 1,
-      {OPT_DS|OPS_32|OPA_None, 0, 0} },
-    { CPU_Not64, 0, 0, 0, 0, 1, {0x06, 0, 0}, 0, 1,
-      {OPT_ES|OPS_Any|OPA_None, 0, 0} },
-    { CPU_Not64, MOD_GasSufW, 16, 0, 0, 1, {0x06, 0, 0}, 0, 1,
-      {OPT_ES|OPS_16|OPA_None, 0, 0} },
-    { CPU_Not64, MOD_GasSufL, 32, 0, 0, 1, {0x06, 0, 0}, 0, 1,
-      {OPT_ES|OPS_32|OPA_None, 0, 0} },
-    { CPU_386, 0, 0, 0, 0, 2, {0x0F, 0xA0, 0}, 0, 1,
-      {OPT_FS|OPS_Any|OPA_None, 0, 0} },
-    { CPU_386, MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0xA0, 0}, 0, 1,
-      {OPT_FS|OPS_16|OPA_None, 0, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0xA0, 0}, 0, 1,
-      {OPT_FS|OPS_32|OPA_None, 0, 0} },
-    { CPU_386, 0, 0, 0, 0, 2, {0x0F, 0xA8, 0}, 0, 1,
-      {OPT_GS|OPS_Any|OPA_None, 0, 0} },
-    { CPU_386, MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0xA8, 0}, 0, 1,
-      {OPT_GS|OPS_16|OPA_None, 0, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0xA8, 0}, 0, 1,
-      {OPT_GS|OPS_32|OPA_None, 0, 0} }
-};
-
-/* Pop instructions */
-static const x86_insn_info pop_insn[] = {
-    { CPU_Any, MOD_GasSufW, 16, 64, 0, 1, {0x58, 0, 0}, 0, 1,
-      {OPT_Reg|OPS_16|OPA_Op0Add, 0, 0} },
-    { CPU_386|CPU_Not64, MOD_GasSufL, 32, 0, 0, 1, {0x58, 0, 0}, 0, 1,
-      {OPT_Reg|OPS_32|OPA_Op0Add, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 0, 64, 0, 1, {0x58, 0, 0}, 0, 1,
-      {OPT_Reg|OPS_64|OPA_Op0Add, 0, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 64, 0, 1, {0x8F, 0, 0}, 0, 1,
-      {OPT_RM|OPS_16|OPA_EA, 0, 0} },
-    { CPU_386|CPU_Not64, MOD_GasSufL, 32, 0, 0, 1, {0x8F, 0, 0}, 0, 1,
-      {OPT_RM|OPS_32|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 0, 64, 0, 1, {0x8F, 0, 0}, 0, 1,
-      {OPT_RM|OPS_64|OPA_EA, 0, 0} },
-    /* POP CS is debateably valid on the 8086, if obsolete and undocumented.
-     * We don't include it because it's VERY unlikely it will ever be used
-     * anywhere.  If someone really wants it they can db 0x0F it.
-     */
-    /*{ CPU_Any|CPU_Undoc|CPU_Obs, 0, 0, 1, {0x0F, 0, 0}, 0, 1,
-        {OPT_CS|OPS_Any|OPA_None, 0, 0} },*/
-    { CPU_Not64, 0, 0, 0, 0, 1, {0x17, 0, 0}, 0, 1,
-      {OPT_SS|OPS_Any|OPA_None, 0, 0} },
-    { CPU_Not64, 0, 16, 0, 0, 1, {0x17, 0, 0}, 0, 1,
-      {OPT_SS|OPS_16|OPA_None, 0, 0} },
-    { CPU_Not64, 0, 32, 0, 0, 1, {0x17, 0, 0}, 0, 1,
-      {OPT_SS|OPS_32|OPA_None, 0, 0} },
-    { CPU_Not64, 0, 0, 0, 0, 1, {0x1F, 0, 0}, 0, 1,
-      {OPT_DS|OPS_Any|OPA_None, 0, 0} },
-    { CPU_Not64, 0, 16, 0, 0, 1, {0x1F, 0, 0}, 0, 1,
-      {OPT_DS|OPS_16|OPA_None, 0, 0} },
-    { CPU_Not64, 0, 32, 0, 0, 1, {0x1F, 0, 0}, 0, 1,
-      {OPT_DS|OPS_32|OPA_None, 0, 0} },
-    { CPU_Not64, 0, 0, 0, 0, 1, {0x07, 0, 0}, 0, 1,
-      {OPT_ES|OPS_Any|OPA_None, 0, 0} },
-    { CPU_Not64, 0, 16, 0, 0, 1, {0x07, 0, 0}, 0, 1,
-      {OPT_ES|OPS_16|OPA_None, 0, 0} },
-    { CPU_Not64, 0, 32, 0, 0, 1, {0x07, 0, 0}, 0, 1,
-      {OPT_ES|OPS_32|OPA_None, 0, 0} },
-    { CPU_386, 0, 0, 0, 0, 2, {0x0F, 0xA1, 0}, 0, 1,
-      {OPT_FS|OPS_Any|OPA_None, 0, 0} },
-    { CPU_386, 0, 16, 0, 0, 2, {0x0F, 0xA1, 0}, 0, 1,
-      {OPT_FS|OPS_16|OPA_None, 0, 0} },
-    { CPU_386, 0, 32, 0, 0, 2, {0x0F, 0xA1, 0}, 0, 1,
-      {OPT_FS|OPS_32|OPA_None, 0, 0} },
-    { CPU_386, 0, 0, 0, 0, 2, {0x0F, 0xA9, 0}, 0, 1,
-      {OPT_GS|OPS_Any|OPA_None, 0, 0} },
-    { CPU_386, 0, 16, 0, 0, 2, {0x0F, 0xA9, 0}, 0, 1,
-      {OPT_GS|OPS_16|OPA_None, 0, 0} },
-    { CPU_386, 0, 32, 0, 0, 2, {0x0F, 0xA9, 0}, 0, 1,
-      {OPT_GS|OPS_32|OPA_None, 0, 0} }
-};
-
-/* Exchange instructions */
-static const x86_insn_info xchg_insn[] = {
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0x86, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_8|OPA_Spare, 0} },
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0x86, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_8|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
-    /* We could be extra-efficient in the 64-bit mode case here.
-     * XCHG AX, AX in 64-bit mode is a NOP, as it doesn't clear the
-     * high 48 bits of RAX. Thus we don't need the operand-size prefix.
-     * But this feels too clever, and probably not what the user really
-     * expects in the generated code, so we don't do it.
-     *
-     * { CPU_Any|CPU_64, MOD_GasSufW, 0, 0, 0, 1, {0x90, 0, 0}, 0, 2,
-     *  {OPT_Areg|OPS_16|OPA_None, OPT_Areg|OPS_16|OPA_Op0Add, 0} },
-     */
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0x90, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_16|OPA_None, OPT_Reg|OPS_16|OPA_Op0Add, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0x90, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Op0Add, OPT_Areg|OPS_16|OPA_None, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0x87, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0x87, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    /* Be careful with XCHG EAX, EAX in 64-bit mode.  This needs to use
-     * the long form rather than the NOP form, as the long form clears
-     * the high 32 bits of RAX.  This makes all 32-bit forms in 64-bit
-     * mode have consistent operation.
-     */
-    { CPU_386|CPU_64, MOD_GasSufL, 32, 0, 0, 1, {0x87, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_32|OPA_EA, OPT_Areg|OPS_32|OPA_Spare, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x90, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_32|OPA_None, OPT_Reg|OPS_32|OPA_Op0Add, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x90, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Op0Add, OPT_Areg|OPS_32|OPA_None, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x87, 0, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x87, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    /* Be efficient with XCHG RAX, RAX.
-     * This is a NOP and thus doesn't need the REX prefix.
-     */
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 0, 0, 0, 1, {0x90, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_64|OPA_None, OPT_Areg|OPS_64|OPA_Op0Add, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x90, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_64|OPA_None, OPT_Reg|OPS_64|OPA_Op0Add, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x90, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Op0Add, OPT_Areg|OPS_64|OPA_None, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x87, 0, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_64|OPA_Spare, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x87, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} }
-};
-
-/* In/out from ports */
-static const x86_insn_info in_insn[] = {
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0xE4, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_8|OPA_None, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0xE5, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_16|OPA_None, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0xE5, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_32|OPA_None, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0xEC, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_8|OPA_None, OPT_Dreg|OPS_16|OPA_None, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0xED, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_16|OPA_None, OPT_Dreg|OPS_16|OPA_None, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0xED, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_32|OPA_None, OPT_Dreg|OPS_16|OPA_None, 0} },
-    /* GAS-only variants (implict accumulator register) */
-    { CPU_Any, MOD_GasOnly|MOD_GasSufB, 0, 0, 0, 1, {0xE4, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0, 0} },
-    { CPU_Any, MOD_GasOnly|MOD_GasSufW, 16, 0, 0, 1, {0xE5, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0, 0} },
-    { CPU_386, MOD_GasOnly|MOD_GasSufL, 32, 0, 0, 1, {0xE5, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0, 0} },
-    { CPU_Any, MOD_GasOnly|MOD_GasSufB, 0, 0, 0, 1, {0xEC, 0, 0}, 0, 1,
-      {OPT_Dreg|OPS_16|OPA_None, 0, 0} },
-    { CPU_Any, MOD_GasOnly|MOD_GasSufW, 16, 0, 0, 1, {0xED, 0, 0}, 0, 1,
-      {OPT_Dreg|OPS_16|OPA_None, 0, 0} },
-    { CPU_386, MOD_GasOnly|MOD_GasSufL, 32, 0, 0, 1, {0xED, 0, 0}, 0, 1,
-      {OPT_Dreg|OPS_16|OPA_None, 0, 0} }
-};
-static const x86_insn_info out_insn[] = {
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0xE6, 0, 0}, 0, 2,
-      {OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, OPT_Areg|OPS_8|OPA_None, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0xE7, 0, 0}, 0, 2,
-      {OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, OPT_Areg|OPS_16|OPA_None, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0xE7, 0, 0}, 0, 2,
-      {OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, OPT_Areg|OPS_32|OPA_None, 0} },
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0xEE, 0, 0}, 0, 2,
-      {OPT_Dreg|OPS_16|OPA_None, OPT_Areg|OPS_8|OPA_None, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0xEF, 0, 0}, 0, 2,
-      {OPT_Dreg|OPS_16|OPA_None, OPT_Areg|OPS_16|OPA_None, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0xEF, 0, 0}, 0, 2,
-      {OPT_Dreg|OPS_16|OPA_None, OPT_Areg|OPS_32|OPA_None, 0} },
-    /* GAS-only variants (implict accumulator register) */
-    { CPU_Any, MOD_GasOnly|MOD_GasSufB, 0, 0, 0, 1, {0xE6, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0, 0} },
-    { CPU_Any, MOD_GasOnly|MOD_GasSufW, 16, 0, 0, 1, {0xE7, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0, 0} },
-    { CPU_386, MOD_GasOnly|MOD_GasSufL, 32, 0, 0, 1, {0xE7, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0, 0} },
-    { CPU_Any, MOD_GasOnly|MOD_GasSufB, 0, 0, 0, 1, {0xEE, 0, 0}, 0, 1,
-      {OPT_Dreg|OPS_16|OPA_None, 0, 0} },
-    { CPU_Any, MOD_GasOnly|MOD_GasSufW, 16, 0, 0, 1, {0xEF, 0, 0}, 0, 1,
-      {OPT_Dreg|OPS_16|OPA_None, 0, 0} },
-    { CPU_386, MOD_GasOnly|MOD_GasSufL, 32, 0, 0, 1, {0xEF, 0, 0}, 0, 1,
-      {OPT_Dreg|OPS_16|OPA_None, 0, 0} }
-};
-
-/* Load effective address */
-static const x86_insn_info lea_insn[] = {
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0x8D, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x8D, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x8D, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0} }
-};
-
-/* Load segment registers from memory */
-static const x86_insn_info ldes_insn[] = {
-    { CPU_Not64, MOD_Op0Add|MOD_GasSufW, 16, 0, 0, 1, {0, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_Mem|OPS_Any|OPA_EA, 0} },
-    { CPU_386|CPU_Not64, MOD_Op0Add|MOD_GasSufL, 32, 0, 0, 1, {0, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_Mem|OPS_Any|OPA_EA, 0} }
-};
-static const x86_insn_info lfgss_insn[] = {
-    { CPU_386, MOD_Op1Add|MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_Mem|OPS_Any|OPA_EA, 0} },
-    { CPU_386, MOD_Op1Add|MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_Mem|OPS_Any|OPA_EA, 0} }
-};
-
-/* Arithmetic - general */
-static const x86_insn_info arith_insn[] = {
-    { CPU_Any, MOD_Op0Add|MOD_GasSufB, 0, 0, 0, 1, {0x04, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_8|OPA_None, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Any, MOD_Op2Add|MOD_Op1AddSp|MOD_GasSufW, 16, 0, 0, 2,
-      {0x83, 0xC0, 0x05}, 0, 2,
-      {OPT_Areg|OPS_16|OPA_None,
-       OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm|OPAP_SImm8, 0} },
-    { CPU_386, MOD_Op2Add|MOD_Op1AddSp|MOD_GasSufL, 32, 0, 0, 2,
-      {0x83, 0xC0, 0x05}, 0, 2,
-      {OPT_Areg|OPS_32|OPA_None,
-       OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm|OPAP_SImm8, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op2Add|MOD_Op1AddSp|MOD_GasSufQ, 64, 0, 0, 2,
-      {0x83, 0xC0, 0x05}, 0,
-      2, {OPT_Areg|OPS_64|OPA_None,
-          OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm|OPAP_SImm8, 0} },
-
-    { CPU_Any, MOD_Gap0|MOD_SpAdd|MOD_GasSufB, 0, 0, 0, 1, {0x80, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Any, MOD_Gap0|MOD_SpAdd|MOD_GasSufB, 0, 0, 0, 1, {0x80, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPA_Imm, 0} },
-
-    { CPU_Any, MOD_Gap0|MOD_SpAdd|MOD_GasSufW, 16, 0, 0, 1, {0x83, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPA_EA, OPT_Imm|OPS_8|OPA_SImm, 0} },
-    { CPU_Any, MOD_Gap0|MOD_SpAdd|MOD_GasIllegal, 16, 0, 0, 1, {0x83, 0x81, 0},
-      0, 2, {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA,
-             OPT_Imm|OPS_16|OPA_Imm|OPAP_SImm8, 0} },
-    { CPU_Any, MOD_Gap0|MOD_SpAdd|MOD_GasSufW, 16, 0, 0, 1, {0x83, 0x81, 0}, 0,
-      2, {OPT_RM|OPS_16|OPA_EA,
-          OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm|OPAP_SImm8, 0} },
-
-    { CPU_386, MOD_Gap0|MOD_SpAdd|MOD_GasSufL, 32, 0, 0, 1, {0x83, 0, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPA_EA, OPT_Imm|OPS_8|OPA_SImm, 0} },
-    /* Not64 because we can't tell if add [], dword in 64-bit mode is supposed
-     * to be a qword destination or a dword destination.
-     */
-    { CPU_386|CPU_Not64, MOD_Gap0|MOD_SpAdd|MOD_GasIllegal, 32, 0, 0, 1,
-      {0x83, 0x81, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_32|OPA_Imm|OPAP_SImm8, 0} },
-    { CPU_386, MOD_Gap0|MOD_SpAdd|MOD_GasSufL, 32, 0, 0, 1, {0x83, 0x81, 0}, 0,
-      2, {OPT_RM|OPS_32|OPA_EA,
-          OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm|OPAP_SImm8, 0} },
-
-    /* No relaxed-RM mode for 64-bit destinations; see above Not64 comment. */
-    { CPU_Hammer|CPU_64, MOD_Gap0|MOD_SpAdd|MOD_GasSufQ, 64, 0, 0, 1,
-      {0x83, 0, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPA_EA, OPT_Imm|OPS_8|OPA_SImm, 0} },
-    { CPU_Hammer|CPU_64, MOD_Gap0|MOD_SpAdd|MOD_GasSufQ, 64, 0, 0, 1,
-      {0x83, 0x81, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPA_EA,
-       OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm|OPAP_SImm8, 0} },
-
-    { CPU_Any, MOD_Op0Add|MOD_GasSufB, 0, 0, 0, 1, {0x00, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_8|OPA_Spare, 0} },
-    { CPU_Any, MOD_Op0Add|MOD_GasSufW, 16, 0, 0, 1, {0x01, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
-    { CPU_386, MOD_Op0Add|MOD_GasSufL, 32, 0, 0, 1, {0x01, 0, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op0Add|MOD_GasSufQ, 64, 0, 0, 1, {0x01, 0, 0}, 0,
-      2, {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_64|OPA_Spare, 0} },
-    { CPU_Any, MOD_Op0Add|MOD_GasSufB, 0, 0, 0, 1, {0x02, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_8|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_Any, MOD_Op0Add|MOD_GasSufW, 16, 0, 0, 1, {0x03, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386, MOD_Op0Add|MOD_GasSufL, 32, 0, 0, 1, {0x03, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op0Add|MOD_GasSufQ, 64, 0, 0, 1, {0x03, 0, 0}, 0,
-      2, {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} }
-};
-
-/* Arithmetic - inc/dec */
-static const x86_insn_info incdec_insn[] = {
-    { CPU_Any, MOD_Gap0|MOD_SpAdd|MOD_GasSufB, 0, 0, 0, 1, {0xFE, 0, 0}, 0, 1,
-      {OPT_RM|OPS_8|OPA_EA, 0, 0} },
-    { CPU_Not64, MOD_Op0Add|MOD_GasSufW, 16, 0, 0, 1, {0, 0, 0}, 0, 1,
-      {OPT_Reg|OPS_16|OPA_Op0Add, 0, 0} },
-    { CPU_Any, MOD_Gap0|MOD_SpAdd|MOD_GasSufW, 16, 0, 0, 1, {0xFF, 0, 0}, 0,
-      1, {OPT_RM|OPS_16|OPA_EA, 0, 0} },
-    { CPU_386|CPU_Not64, MOD_Op0Add|MOD_GasSufL, 32, 0, 0, 1, {0, 0, 0}, 0, 1,
-      {OPT_Reg|OPS_32|OPA_Op0Add, 0, 0} },
-    { CPU_386, MOD_Gap0|MOD_SpAdd|MOD_GasSufL, 32, 0, 0, 1, {0xFF, 0, 0}, 0, 1,
-      {OPT_RM|OPS_32|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_Gap0|MOD_SpAdd|MOD_GasSufQ, 64, 0, 0, 1,
-      {0xFF, 0, 0}, 0, 1, {OPT_RM|OPS_64|OPA_EA, 0, 0} },
-};
-
-/* Arithmetic - mul/neg/not F6 opcodes */
-static const x86_insn_info f6_insn[] = {
-    { CPU_Any, MOD_SpAdd|MOD_GasSufB, 0, 0, 0, 1, {0xF6, 0, 0}, 0, 1,
-      {OPT_RM|OPS_8|OPA_EA, 0, 0} },
-    { CPU_Any, MOD_SpAdd|MOD_GasSufW, 16, 0, 0, 1, {0xF7, 0, 0}, 0, 1,
-      {OPT_RM|OPS_16|OPA_EA, 0, 0} },
-    { CPU_386, MOD_SpAdd|MOD_GasSufL, 32, 0, 0, 1, {0xF7, 0, 0}, 0, 1,
-      {OPT_RM|OPS_32|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_SpAdd|MOD_GasSufQ, 64, 0, 0, 1, {0xF7, 0, 0}, 0,
-      1, {OPT_RM|OPS_64|OPA_EA, 0, 0} },
-};
-
-/* Arithmetic - div/idiv F6 opcodes
- * These allow explicit accumulator in GAS mode.
- */
-static const x86_insn_info div_insn[] = {
-    { CPU_Any, MOD_SpAdd|MOD_GasSufB, 0, 0, 0, 1, {0xF6, 0, 0}, 0, 1,
-      {OPT_RM|OPS_8|OPA_EA, 0, 0} },
-    { CPU_Any, MOD_SpAdd|MOD_GasSufW, 16, 0, 0, 1, {0xF7, 0, 0}, 0, 1,
-      {OPT_RM|OPS_16|OPA_EA, 0, 0} },
-    { CPU_386, MOD_SpAdd|MOD_GasSufL, 32, 0, 0, 1, {0xF7, 0, 0}, 0, 1,
-      {OPT_RM|OPS_32|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_SpAdd|MOD_GasSufQ, 64, 0, 0, 1, {0xF7, 0, 0}, 0,
-      1, {OPT_RM|OPS_64|OPA_EA, 0, 0} },
-    /* Versions with explicit accumulator */
-    { CPU_Any, MOD_SpAdd|MOD_GasSufB, 0, 0, 0, 1, {0xF6, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_8|OPA_None, OPT_RM|OPS_8|OPA_EA, 0} },
-    { CPU_Any, MOD_SpAdd|MOD_GasSufW, 16, 0, 0, 1, {0xF7, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_16|OPA_None, OPT_RM|OPS_16|OPA_EA, 0} },
-    { CPU_386, MOD_SpAdd|MOD_GasSufL, 32, 0, 0, 1, {0xF7, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_32|OPA_None, OPT_RM|OPS_32|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_SpAdd|MOD_GasSufQ, 64, 0, 0, 1, {0xF7, 0, 0}, 0,
-      2, {OPT_Areg|OPS_64|OPA_None, OPT_RM|OPS_64|OPA_EA, 0} },
-};
-
-/* Arithmetic - test instruction */
-static const x86_insn_info test_insn[] = {
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0xA8, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_8|OPA_None, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0xA9, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_16|OPA_None, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0xA9, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_32|OPA_None, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0xA9, 0, 0}, 0, 2,
-      {OPT_Areg|OPS_64|OPA_None, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
-
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0xF6, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0xF6, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPA_Imm, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0xF7, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPA_EA, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0xF7, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_16|OPA_Imm, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0xF7, 0, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPA_EA, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0xF7, 0, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_32|OPA_Imm, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0xF7, 0, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPA_EA, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0xF7, 0, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_32|OPA_Imm, 0} },
-
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0x84, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_8|OPA_Spare, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0x85, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x85, 0, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x85, 0, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_64|OPA_Spare, 0} },
-
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0x84, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_8|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0x85, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x85, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x85, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} }
-};
-
-/* Arithmetic - aad/aam */
-static const x86_insn_info aadm_insn[] = {
-    { CPU_Any, MOD_Op0Add, 0, 0, 0, 2, {0xD4, 0x0A, 0}, 0, 0, {0, 0, 0} },
-    { CPU_Any, MOD_Op0Add, 0, 0, 0, 1, {0xD4, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0, 0} }
-};
-
-/* Arithmetic - imul */
-static const x86_insn_info imul_insn[] = {
-    { CPU_Any, MOD_GasSufB, 0, 0, 0, 1, {0xF6, 0, 0}, 5, 1,
-      {OPT_RM|OPS_8|OPA_EA, 0, 0} },
-    { CPU_Any, MOD_GasSufW, 16, 0, 0, 1, {0xF7, 0, 0}, 5, 1,
-      {OPT_RM|OPS_16|OPA_EA, 0, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0xF7, 0, 0}, 5, 1,
-      {OPT_RM|OPS_32|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0xF7, 0, 0}, 5, 1,
-      {OPT_RM|OPS_64|OPA_EA, 0, 0} },
-
-    { CPU_386, MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0xAF, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0xAF, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0xAF, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} },
-
-    { CPU_186, MOD_GasSufW, 16, 0, 0, 1, {0x6B, 0, 0}, 0, 3,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPA_SImm} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x6B, 0, 0}, 0, 3,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPA_SImm} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x6B, 0, 0}, 0, 3,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPA_SImm} },
-
-    { CPU_186, MOD_GasSufW, 16, 0, 0, 1, {0x6B, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_SpareEA, OPT_Imm|OPS_8|OPA_SImm, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x6B, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_SpareEA, OPT_Imm|OPS_8|OPA_SImm, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x6B, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_SpareEA, OPT_Imm|OPS_8|OPA_SImm, 0} },
-
-    { CPU_186, MOD_GasSufW, 16, 0, 0, 1, {0x6B, 0x69, 0}, 0, 3,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_16|OPS_Relaxed|OPA_SImm|OPAP_SImm8} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x6B, 0x69, 0}, 0, 3,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_32|OPS_Relaxed|OPA_SImm|OPAP_SImm8} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x6B, 0x69, 0}, 0, 3,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_32|OPS_Relaxed|OPA_SImm|OPAP_SImm8} },
-
-    { CPU_186, MOD_GasSufW, 16, 0, 0, 1, {0x6B, 0x69, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_SpareEA,
-       OPT_Imm|OPS_16|OPS_Relaxed|OPA_SImm|OPAP_SImm8, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x6B, 0x69, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_SpareEA,
-       OPT_Imm|OPS_32|OPS_Relaxed|OPA_SImm|OPAP_SImm8, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 1, {0x6B, 0x69, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_SpareEA,
-       OPT_Imm|OPS_32|OPS_Relaxed|OPA_SImm|OPAP_SImm8, 0} }
-};
-
-/* Shifts - standard */
-static const x86_insn_info shift_insn[] = {
-    { CPU_Any, MOD_SpAdd|MOD_GasSufB, 0, 0, 0, 1, {0xD2, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPA_EA, OPT_Creg|OPS_8|OPA_None, 0} },
-    { CPU_Any, MOD_SpAdd|MOD_GasSufB, 0, 0, 0, 1, {0xD0, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPA_EA, OPT_Imm1|OPS_8|OPS_Relaxed|OPA_None, 0} },
-    { CPU_186, MOD_SpAdd|MOD_GasSufB, 0, 0, 0, 1, {0xC0, 0, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Any, MOD_SpAdd|MOD_GasSufW, 16, 0, 0, 1, {0xD3, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPA_EA, OPT_Creg|OPS_8|OPA_None, 0} },
-    { CPU_Any, MOD_SpAdd|MOD_GasSufW, 16, 0, 0, 1, {0xD1, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPA_EA, OPT_Imm1|OPS_8|OPS_Relaxed|OPA_None, 0} },
-    { CPU_186, MOD_SpAdd|MOD_GasSufW, 16, 0, 0, 1, {0xC1, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Any, MOD_SpAdd|MOD_GasSufL, 32, 0, 0, 1, {0xD3, 0, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPA_EA, OPT_Creg|OPS_8|OPA_None, 0} },
-    { CPU_386, MOD_SpAdd|MOD_GasSufL, 32, 0, 0, 1, {0xD1, 0, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPA_EA, OPT_Imm1|OPS_8|OPS_Relaxed|OPA_None, 0} },
-    { CPU_386, MOD_SpAdd|MOD_GasSufL, 32, 0, 0, 1, {0xC1, 0, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Hammer|CPU_64, MOD_SpAdd|MOD_GasSufQ, 64, 0, 0, 1, {0xD3, 0, 0}, 0,
-      2, {OPT_RM|OPS_64|OPA_EA, OPT_Creg|OPS_8|OPA_None, 0} },
-    { CPU_Hammer|CPU_64, MOD_SpAdd|MOD_GasSufQ, 64, 0, 0, 1, {0xD1, 0, 0},
-      0, 2, {OPT_RM|OPS_64|OPA_EA, OPT_Imm1|OPS_8|OPS_Relaxed|OPA_None, 0} },
-    { CPU_Hammer|CPU_64, MOD_SpAdd|MOD_GasSufQ, 64, 0, 0, 1, {0xC1, 0, 0},
-      0, 2, {OPT_RM|OPS_64|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    /* In GAS mode, single operands are equivalent to shifting by 1 forms */
-    { CPU_Any, MOD_SpAdd|MOD_GasOnly|MOD_GasSufB, 0, 0, 0, 1, {0xD0, 0, 0},
-      0, 1, {OPT_RM|OPS_8|OPA_EA, 0, 0} },
-    { CPU_Any, MOD_SpAdd|MOD_GasOnly|MOD_GasSufW, 16, 0, 0, 1, {0xD1, 0, 0},
-      0, 1, {OPT_RM|OPS_16|OPA_EA, 0, 0} },
-    { CPU_Any, MOD_SpAdd|MOD_GasOnly|MOD_GasSufL, 32, 0, 0, 1, {0xD1, 0, 0},
-      0, 1, {OPT_RM|OPS_32|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_SpAdd|MOD_GasOnly|MOD_GasSufQ, 64, 0, 0, 1,
-      {0xD1, 0, 0}, 0, 1, {OPT_RM|OPS_64|OPA_EA, 0, 0} }
-};
-
-/* Shifts - doubleword */
-static const x86_insn_info shlrd_insn[] = {
-    { CPU_386, MOD_Op1Add|MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0x00, 0}, 0, 3,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_386, MOD_Op1Add|MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0x01, 0}, 0, 3,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare,
-       OPT_Creg|OPS_8|OPA_None} },
-    { CPU_386, MOD_Op1Add|MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0x00, 0}, 0, 3,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_386, MOD_Op1Add|MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0x01, 0}, 0, 3,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare,
-       OPT_Creg|OPS_8|OPA_None} },
-    { CPU_Hammer|CPU_64, MOD_Op1Add|MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0x00, 0},
-      0, 3, {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_64|OPA_Spare,
-             OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_Hammer|CPU_64, MOD_Op1Add|MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0x01, 0},
-      0, 3, {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_64|OPA_Spare,
-             OPT_Creg|OPS_8|OPA_None} },
-    /* GAS parser supports two-operand form for shift with CL count */
-    { CPU_386, MOD_Op1Add|MOD_GasOnly|MOD_GasSufW, 16, 0, 0, 2,
-      {0x0F, 0x01, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
-    { CPU_386, MOD_Op1Add|MOD_GasOnly|MOD_GasSufL, 32, 0, 0, 2,
-      {0x0F, 0x01, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op1Add|MOD_GasOnly|MOD_GasSufQ, 64, 0, 0, 2,
-      {0x0F, 0x01, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_64|OPA_Spare, 0} }
-};
-
-/* Control transfer instructions (unconditional) */
-static const x86_insn_info call_insn[] = {
-    { CPU_Any, 0, 0, 0, 0, 0, {0, 0, 0}, 0, 1,
-      {OPT_ImmNotSegOff|OPS_Any|OPA_JmpRel, 0, 0} },
-    { CPU_Any, 0, 16, 0, 0, 0, {0, 0, 0}, 0, 1,
-      {OPT_ImmNotSegOff|OPS_16|OPA_JmpRel, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 0, {0, 0, 0}, 0, 1,
-      {OPT_ImmNotSegOff|OPS_32|OPA_JmpRel, 0, 0} },
-    { CPU_Hammer|CPU_64, 0, 64, 0, 0, 0, {0, 0, 0}, 0, 1,
-      {OPT_ImmNotSegOff|OPS_32|OPA_JmpRel, 0, 0} },
-
-    { CPU_Any, 0, 16, 64, 0, 1, {0xE8, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_16|OPTM_Near|OPA_JmpRel, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 1, {0xE8, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_32|OPTM_Near|OPA_JmpRel, 0, 0} },
-    { CPU_Hammer|CPU_64, 0, 64, 64, 0, 1, {0xE8, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_32|OPTM_Near|OPA_JmpRel, 0, 0} },
-    { CPU_Any, 0, 0, 64, 0, 1, {0xE8, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_Any|OPTM_Near|OPA_JmpRel, 0, 0} },
-
-    { CPU_Any, 0, 16, 0, 0, 1, {0xFF, 0, 0}, 2, 1,
-      {OPT_RM|OPS_16|OPA_EA, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 1, {0xFF, 0, 0}, 2, 1,
-      {OPT_RM|OPS_32|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, 0, 64, 64, 0, 1, {0xFF, 0, 0}, 2, 1,
-      {OPT_RM|OPS_64|OPA_EA, 0, 0} },
-    { CPU_Any, 0, 0, 64, 0, 1, {0xFF, 0, 0}, 2, 1,
-      {OPT_Mem|OPS_Any|OPA_EA, 0, 0} },
-    { CPU_Any, 0, 16, 64, 0, 1, {0xFF, 0, 0}, 2, 1,
-      {OPT_RM|OPS_16|OPTM_Near|OPA_EA, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 1, {0xFF, 0, 0}, 2, 1,
-      {OPT_RM|OPS_32|OPTM_Near|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, 0, 64, 64, 0, 1, {0xFF, 0, 0}, 2, 1,
-      {OPT_RM|OPS_64|OPTM_Near|OPA_EA, 0, 0} },
-    { CPU_Any, 0, 0, 64, 0, 1, {0xFF, 0, 0}, 2, 1,
-      {OPT_Mem|OPS_Any|OPTM_Near|OPA_EA, 0, 0} },
-
-    /* Far indirect (through memory).  Needs explicit FAR override. */
-    { CPU_Any, 0, 16, 0, 0, 1, {0xFF, 0, 0}, 3, 1,
-      {OPT_Mem|OPS_16|OPTM_Far|OPA_EA, 0, 0} },
-    { CPU_386, 0, 32, 0, 0, 1, {0xFF, 0, 0}, 3, 1,
-      {OPT_Mem|OPS_32|OPTM_Far|OPA_EA, 0, 0} },
-    { CPU_EM64T|CPU_64, 0, 64, 0, 0, 1, {0xFF, 0, 0}, 3, 1,
-      {OPT_Mem|OPS_64|OPTM_Far|OPA_EA, 0, 0} },
-    { CPU_Any, 0, 0, 0, 0, 1, {0xFF, 0, 0}, 3, 1,
-      {OPT_Mem|OPS_Any|OPTM_Far|OPA_EA, 0, 0} },
-
-    /* With explicit FAR override */
-    { CPU_Not64, 0, 16, 0, 0, 1, {0x9A, 0, 0}, 3, 1,
-      {OPT_Imm|OPS_16|OPTM_Far|OPA_JmpFar, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 1, {0x9A, 0, 0}, 3, 1,
-      {OPT_Imm|OPS_32|OPTM_Far|OPA_JmpFar, 0, 0} },
-    { CPU_Not64, 0, 0, 0, 0, 1, {0x9A, 0, 0}, 3, 1,
-      {OPT_Imm|OPS_Any|OPTM_Far|OPA_JmpFar, 0, 0} },
-
-    /* Since not caught by first ImmNotSegOff group, implicitly FAR. */
-    { CPU_Not64, 0, 16, 0, 0, 1, {0x9A, 0, 0}, 3, 1,
-      {OPT_Imm|OPS_16|OPA_JmpFar, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 1, {0x9A, 0, 0}, 3, 1,
-      {OPT_Imm|OPS_32|OPA_JmpFar, 0, 0} },
-    { CPU_Not64, 0, 0, 0, 0, 1, {0x9A, 0, 0}, 3, 1,
-      {OPT_Imm|OPS_Any|OPA_JmpFar, 0, 0} }
-};
-static const x86_insn_info jmp_insn[] = {
-    { CPU_Any, 0, 0, 0, 0, 0, {0, 0, 0}, 0, 1,
-      {OPT_ImmNotSegOff|OPS_Any|OPA_JmpRel, 0, 0} },
-    { CPU_Any, 0, 16, 0, 0, 0, {0, 0, 0}, 0, 1,
-      {OPT_ImmNotSegOff|OPS_16|OPA_JmpRel, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 1, {0, 0, 0}, 0, 1,
-      {OPT_ImmNotSegOff|OPS_32|OPA_JmpRel, 0, 0} },
-    { CPU_Hammer|CPU_64, 0, 64, 0, 0, 1, {0, 0, 0}, 0, 1,
-      {OPT_ImmNotSegOff|OPS_32|OPA_JmpRel, 0, 0} },
-
-    { CPU_Any, 0, 0, 64, 0, 1, {0xEB, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_Any|OPTM_Short|OPA_JmpRel, 0, 0} },
-    { CPU_Any, 0, 16, 64, 0, 1, {0xE9, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_16|OPTM_Near|OPA_JmpRel, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 1, {0xE9, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_32|OPTM_Near|OPA_JmpRel, 0, 0} },
-    { CPU_Hammer|CPU_64, 0, 64, 64, 0, 1, {0xE9, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_32|OPTM_Near|OPA_JmpRel, 0, 0} },
-    { CPU_Any, 0, 0, 64, 0, 1, {0xE9, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_Any|OPTM_Near|OPA_JmpRel, 0, 0} },
-
-    { CPU_Any, 0, 16, 64, 0, 1, {0xFF, 0, 0}, 4, 1,
-      {OPT_RM|OPS_16|OPA_EA, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 1, {0xFF, 0, 0}, 4, 1,
-      {OPT_RM|OPS_32|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, 0, 64, 64, 0, 1, {0xFF, 0, 0}, 4, 1,
-      {OPT_RM|OPS_64|OPA_EA, 0, 0} },
-    { CPU_Any, 0, 0, 64, 0, 1, {0xFF, 0, 0}, 4, 1,
-      {OPT_Mem|OPS_Any|OPA_EA, 0, 0} },
-    { CPU_Any, 0, 16, 64, 0, 1, {0xFF, 0, 0}, 4, 1,
-      {OPT_RM|OPS_16|OPTM_Near|OPA_EA, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 1, {0xFF, 0, 0}, 4, 1,
-      {OPT_RM|OPS_32|OPTM_Near|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, 0, 64, 64, 0, 1, {0xFF, 0, 0}, 4, 1,
-      {OPT_RM|OPS_64|OPTM_Near|OPA_EA, 0, 0} },
-    { CPU_Any, 0, 0, 64, 0, 1, {0xFF, 0, 0}, 4, 1,
-      {OPT_Mem|OPS_Any|OPTM_Near|OPA_EA, 0, 0} },
-
-    /* Far indirect (through memory).  Needs explicit FAR override. */
-    { CPU_Any, 0, 16, 0, 0, 1, {0xFF, 0, 0}, 5, 1,
-      {OPT_Mem|OPS_16|OPTM_Far|OPA_EA, 0, 0} },
-    { CPU_386, 0, 32, 0, 0, 1, {0xFF, 0, 0}, 5, 1,
-      {OPT_Mem|OPS_32|OPTM_Far|OPA_EA, 0, 0} },
-    { CPU_EM64T|CPU_64, 0, 64, 0, 0, 1, {0xFF, 0, 0}, 5, 1,
-      {OPT_Mem|OPS_64|OPTM_Far|OPA_EA, 0, 0} },
-    { CPU_Any, 0, 0, 0, 0, 1, {0xFF, 0, 0}, 5, 1,
-      {OPT_Mem|OPS_Any|OPTM_Far|OPA_EA, 0, 0} },
-
-    /* With explicit FAR override */
-    { CPU_Not64, 0, 16, 0, 0, 1, {0xEA, 0, 0}, 3, 1,
-      {OPT_Imm|OPS_16|OPTM_Far|OPA_JmpFar, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 1, {0xEA, 0, 0}, 3, 1,
-      {OPT_Imm|OPS_32|OPTM_Far|OPA_JmpFar, 0, 0} },
-    { CPU_Not64, 0, 0, 0, 0, 1, {0xEA, 0, 0}, 3, 1,
-      {OPT_Imm|OPS_Any|OPTM_Far|OPA_JmpFar, 0, 0} },
-
-    /* Since not caught by first ImmNotSegOff group, implicitly FAR. */
-    { CPU_Not64, 0, 16, 0, 0, 1, {0xEA, 0, 0}, 3, 1,
-      {OPT_Imm|OPS_16|OPA_JmpFar, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 1, {0xEA, 0, 0}, 3, 1,
-      {OPT_Imm|OPS_32|OPA_JmpFar, 0, 0} },
-    { CPU_Not64, 0, 0, 0, 0, 1, {0xEA, 0, 0}, 3, 1,
-      {OPT_Imm|OPS_Any|OPA_JmpFar, 0, 0} }
-};
-static const x86_insn_info retnf_insn[] = {
-    { CPU_Not64, MOD_Op0Add, 0, 0, 0, 1,
-      {0x01, 0, 0}, 0, 0, {0, 0, 0} },
-    { CPU_Not64, MOD_Op0Add, 0, 0, 0, 1,
-      {0x00, 0, 0}, 0, 1, {OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0, 0} },
-    { CPU_64, MOD_Op0Add|MOD_OpSizeR, 0, 0, 0, 1,
-      {0x01, 0, 0}, 0, 0, {0, 0, 0} },
-    { CPU_64, MOD_Op0Add|MOD_OpSizeR, 0, 0, 0, 1,
-      {0x00, 0, 0}, 0, 1, {OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0, 0} },
-    /* GAS suffix versions */
-    { CPU_Any, MOD_Op0Add|MOD_OpSizeR|MOD_GasSufW|MOD_GasSufL|MOD_GasSufQ, 0,
-      0, 0, 1, {0x01, 0, 0}, 0, 0, {0, 0, 0} },
-    { CPU_Any, MOD_Op0Add|MOD_OpSizeR|MOD_GasSufW|MOD_GasSufL|MOD_GasSufQ, 0,
-      0, 0, 1, {0x00, 0, 0}, 0, 1, {OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0, 0} }
-};
-static const x86_insn_info enter_insn[] = {
-    { CPU_186|CPU_Not64, MOD_GasNoRev|MOD_GasSufL, 0, 0, 0, 1, {0xC8, 0, 0}, 0,
-      2, {OPT_Imm|OPS_16|OPS_Relaxed|OPA_EA|OPAP_A16,
-          OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasNoRev|MOD_GasSufQ, 64, 64, 0, 1, {0xC8, 0, 0},
-      0, 2, {OPT_Imm|OPS_16|OPS_Relaxed|OPA_EA|OPAP_A16,
-             OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    /* GAS suffix version */
-    { CPU_186, MOD_GasOnly|MOD_GasNoRev|MOD_GasSufW, 16, 0, 0, 1,
-      {0xC8, 0, 0}, 0, 2, {OPT_Imm|OPS_16|OPS_Relaxed|OPA_EA|OPAP_A16,
-                           OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-};
-
-/* Conditional jumps */
-static const x86_insn_info jcc_insn[] = {
-    { CPU_Any, 0, 0, 0, 0, 0, {0, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_Any|OPA_JmpRel, 0, 0} },
-    { CPU_Any, 0, 16, 0, 0, 0, {0, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_16|OPA_JmpRel, 0, 0} },
-    { CPU_386|CPU_Not64, 0, 32, 0, 0, 0, {0, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_32|OPA_JmpRel, 0, 0} },
-    { CPU_Hammer|CPU_64, 0, 64, 0, 0, 0, {0, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_32|OPA_JmpRel, 0, 0} },
-
-    { CPU_Any, MOD_Op0Add, 0, 64, 0, 1, {0x70, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_Any|OPTM_Short|OPA_JmpRel, 0, 0} },
-    { CPU_386, MOD_Op1Add, 16, 64, 0, 2, {0x0F, 0x80, 0}, 0, 1,
-      {OPT_Imm|OPS_16|OPTM_Near|OPA_JmpRel, 0, 0} },
-    { CPU_386|CPU_Not64, MOD_Op1Add, 32, 0, 0, 2, {0x0F, 0x80, 0}, 0, 1,
-      {OPT_Imm|OPS_32|OPTM_Near|OPA_JmpRel, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op1Add, 64, 64, 0, 2, {0x0F, 0x80, 0}, 0, 1,
-      {OPT_Imm|OPS_32|OPTM_Near|OPA_JmpRel, 0, 0} },
-    { CPU_386, MOD_Op1Add, 0, 64, 0, 2, {0x0F, 0x80, 0}, 0, 1,
-      {OPT_Imm|OPS_Any|OPTM_Near|OPA_JmpRel, 0, 0} }
-};
-static const x86_insn_info jcxz_insn[] = {
-    { CPU_Any, MOD_AdSizeR, 0, 0, 0, 0, {0, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_Any|OPA_JmpRel, 0, 0} },
-    { CPU_Any, MOD_AdSizeR, 0, 64, 0, 1, {0xE3, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_Any|OPTM_Short|OPA_JmpRel, 0, 0} }
-};
-
-/* Loop instructions */
-static const x86_insn_info loop_insn[] = {
-    { CPU_Any, 0, 0, 0, 0, 0, {0, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_Any|OPA_JmpRel, 0, 0} },
-    { CPU_Not64, 0, 0, 0, 0, 0, {0, 0, 0}, 0, 2,
-      {OPT_Imm|OPS_Any|OPA_JmpRel, OPT_Creg|OPS_16|OPA_AdSizeR, 0} },
-    { CPU_386, 0, 0, 64, 0, 0, {0, 0, 0}, 0, 2,
-      {OPT_Imm|OPS_Any|OPA_JmpRel, OPT_Creg|OPS_32|OPA_AdSizeR, 0} },
-    { CPU_Hammer|CPU_64, 0, 0, 64, 0, 0, {0, 0, 0}, 0, 2,
-      {OPT_Imm|OPS_Any|OPA_JmpRel, OPT_Creg|OPS_64|OPA_AdSizeR, 0} },
-
-    { CPU_Not64, MOD_Op0Add, 0, 0, 0, 1, {0xE0, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_Any|OPTM_Short|OPA_JmpRel, 0, 0} },
-    { CPU_Any, MOD_Op0Add, 0, 64, 0, 1, {0xE0, 0, 0}, 0, 2,
-      {OPT_Imm|OPS_Any|OPTM_Short|OPA_JmpRel, OPT_Creg|OPS_16|OPA_AdSizeR, 0}
-    },
-    { CPU_386, MOD_Op0Add, 0, 64, 0, 1, {0xE0, 0, 0}, 0, 2,
-      {OPT_Imm|OPS_Any|OPTM_Short|OPA_JmpRel, OPT_Creg|OPS_32|OPA_AdSizeR, 0}
-    },
-    { CPU_Hammer|CPU_64, MOD_Op0Add, 0, 64, 0, 1, {0xE0, 0, 0}, 0, 2,
-      {OPT_Imm|OPS_Any|OPTM_Short|OPA_JmpRel, OPT_Creg|OPS_64|OPA_AdSizeR, 0} }
-};
-
-/* Set byte on flag instructions */
-static const x86_insn_info setcc_insn[] = {
-    { CPU_386, MOD_Op1Add|MOD_GasSufB, 0, 0, 0, 2, {0x0F, 0x90, 0}, 2, 1,
-      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-
-/* Bit manipulation - bit tests */
-static const x86_insn_info bittest_insn[] = {
-    { CPU_386, MOD_Op1Add|MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
-    { CPU_386, MOD_Op1Add|MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op1Add|MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0x00, 0},
-      0, 2, {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_64|OPA_Spare, 0} },
-    { CPU_386, MOD_Gap0|MOD_SpAdd|MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0xBA, 0},
-      0, 2, {OPT_RM|OPS_16|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_386, MOD_Gap0|MOD_SpAdd|MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0xBA, 0},
-      0, 2, {OPT_RM|OPS_32|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_Hammer|CPU_64, MOD_Gap0|MOD_SpAdd|MOD_GasSufQ, 64, 0, 0, 2,
-      {0x0F, 0xBA, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} }
-};
-
-/* Bit manipulation - bit scans - also used for lar/lsl */
-static const x86_insn_info bsfr_insn[] = {
-    { CPU_286, MOD_Op1Add|MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386, MOD_Op1Add|MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op1Add|MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0x00, 0},
-      0, 2, {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} }
-};
-
-/* Interrupts and operating system instructions */
-static const x86_insn_info int_insn[] = {
-    { CPU_Any, 0, 0, 0, 0, 1, {0xCD, 0, 0}, 0, 1,
-      {OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0, 0} }
-};
-static const x86_insn_info bound_insn[] = {
-    { CPU_186, MOD_GasSufW, 16, 0, 0, 1, {0x62, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386, MOD_GasSufL, 32, 0, 0, 1, {0x62, 0, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, 0} }
-};
-
-/* Protection control */
-static const x86_insn_info arpl_insn[] = {
-    { CPU_286|CPU_Prot, MOD_GasSufW, 0, 0, 0, 1, {0x63, 0, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} }
-};
-static const x86_insn_info str_insn[] = {
-    { CPU_286|CPU_Prot, MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0x00, 0}, 1, 1,
-      {OPT_Reg|OPS_16|OPA_EA, 0, 0} },
-    { CPU_386|CPU_Prot, MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0x00, 0}, 1, 1,
-      {OPT_Reg|OPS_32|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64|CPU_Prot, MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0x00, 0}, 1,
-      1, {OPT_Reg|OPS_64|OPA_EA, 0, 0} },
-    { CPU_286|CPU_Prot, MOD_GasSufW|MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0x00, 0},
-      1, 1, {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-static const x86_insn_info prot286_insn[] = {
-    { CPU_286, MOD_Op1Add|MOD_SpAdd|MOD_GasSufW, 0, 0, 0, 2, {0x0F, 0x00, 0},
-      0, 1, {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-static const x86_insn_info sldtmsw_insn[] = {
-    { CPU_286, MOD_Op1Add|MOD_SpAdd|MOD_GasSufW, 0, 0, 0, 2, {0x0F, 0x00, 0},
-      0, 1, {OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA, 0, 0} },
-    { CPU_386, MOD_Op1Add|MOD_SpAdd|MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0x00, 0},
-      0, 1, {OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op1Add|MOD_SpAdd|MOD_GasSufQ, 0, 0, 0, 2,
-      {0x0F, 0x00, 0}, 0, 1, {OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0, 0} },
-    { CPU_286, MOD_Op1Add|MOD_SpAdd|MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0x00, 0},
-      0, 1, {OPT_Reg|OPS_16|OPA_EA, 0, 0} },
-    { CPU_386, MOD_Op1Add|MOD_SpAdd|MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0x00, 0},
-      0, 1, {OPT_Reg|OPS_32|OPA_EA, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op1Add|MOD_SpAdd|MOD_GasSufQ, 64, 0, 0, 2,
-      {0x0F, 0x00, 0}, 0, 1, {OPT_Reg|OPS_64|OPA_EA, 0, 0} }
-};
-
-/* Floating point instructions - load/store with pop (integer and normal) */
-static const x86_insn_info fld_insn[] = {
-    { CPU_FPU, MOD_GasSufS, 0, 0, 0, 1, {0xD9, 0, 0}, 0, 1,
-      {OPT_Mem|OPS_32|OPA_EA, 0, 0} },
-    { CPU_FPU, MOD_GasSufL, 0, 0, 0, 1, {0xDD, 0, 0}, 0, 1,
-      {OPT_Mem|OPS_64|OPA_EA, 0, 0} },
-    { CPU_FPU, 0, 0, 0, 0, 1, {0xDB, 0, 0}, 5, 1,
-      {OPT_Mem|OPS_80|OPA_EA, 0, 0} },
-    { CPU_FPU, 0, 0, 0, 0, 2, {0xD9, 0xC0, 0}, 0, 1,
-      {OPT_Reg|OPS_80|OPA_Op1Add, 0, 0} }
-};
-static const x86_insn_info fstp_insn[] = {
-    { CPU_FPU, MOD_GasSufS, 0, 0, 0, 1, {0xD9, 0, 0}, 3, 1,
-      {OPT_Mem|OPS_32|OPA_EA, 0, 0} },
-    { CPU_FPU, MOD_GasSufL, 0, 0, 0, 1, {0xDD, 0, 0}, 3, 1,
-      {OPT_Mem|OPS_64|OPA_EA, 0, 0} },
-    { CPU_FPU, 0, 0, 0, 0, 1, {0xDB, 0, 0}, 7, 1,
-      {OPT_Mem|OPS_80|OPA_EA, 0, 0} },
-    { CPU_FPU, 0, 0, 0, 0, 2, {0xDD, 0xD8, 0}, 0, 1,
-      {OPT_Reg|OPS_80|OPA_Op1Add, 0, 0} }
-};
-/* Long memory version of floating point load/store for GAS */
-static const x86_insn_info fldstpt_insn[] = {
-    { CPU_FPU, MOD_SpAdd, 0, 0, 0, 1, {0xDB, 0, 0}, 0, 1,
-      {OPT_Mem|OPS_80|OPA_EA, 0, 0} }
-};
-static const x86_insn_info fildstp_insn[] = {
-    { CPU_FPU, MOD_SpAdd|MOD_GasSufS, 0, 0, 0, 1, {0xDF, 0, 0}, 0, 1,
-      {OPT_Mem|OPS_16|OPA_EA, 0, 0} },
-    { CPU_FPU, MOD_SpAdd|MOD_GasSufL, 0, 0, 0, 1, {0xDB, 0, 0}, 0, 1,
-      {OPT_Mem|OPS_32|OPA_EA, 0, 0} },
-    { CPU_FPU, MOD_Gap0|MOD_Op0Add|MOD_SpAdd|MOD_GasSufQ, 0, 0, 0, 1,
-      {0xDD, 0, 0}, 0, 1, {OPT_Mem|OPS_64|OPA_EA, 0, 0} }
-};
-static const x86_insn_info fbldstp_insn[] = {
-    { CPU_FPU, MOD_SpAdd, 0, 0, 0, 1, {0xDF, 0, 0}, 0, 1,
-      {OPT_Mem|OPS_80|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-/* Floating point instructions - store (normal) */
-static const x86_insn_info fst_insn[] = {
-    { CPU_FPU, MOD_GasSufS, 0, 0, 0, 1, {0xD9, 0, 0}, 2, 1,
-      {OPT_Mem|OPS_32|OPA_EA, 0, 0} },
-    { CPU_FPU, MOD_GasSufL, 0, 0, 0, 1, {0xDD, 0, 0}, 2, 1,
-      {OPT_Mem|OPS_64|OPA_EA, 0, 0} },
-    { CPU_FPU, 0, 0, 0, 0, 2, {0xDD, 0xD0, 0}, 0, 1,
-      {OPT_Reg|OPS_80|OPA_Op1Add, 0, 0} }
-};
-/* Floating point instructions - exchange (with ST0) */
-static const x86_insn_info fxch_insn[] = {
-    { CPU_FPU, 0, 0, 0, 0, 2, {0xD9, 0xC8, 0}, 0, 1,
-      {OPT_Reg|OPS_80|OPA_Op1Add, 0, 0} },
-    { CPU_FPU, 0, 0, 0, 0, 2, {0xD9, 0xC8, 0}, 0, 2,
-      {OPT_ST0|OPS_80|OPA_None, OPT_Reg|OPS_80|OPA_Op1Add, 0} },
-    { CPU_FPU, 0, 0, 0, 0, 2, {0xD9, 0xC8, 0}, 0, 2,
-      {OPT_Reg|OPS_80|OPA_Op1Add, OPT_ST0|OPS_80|OPA_None, 0} },
-    { CPU_FPU, 0, 0, 0, 0, 2, {0xD9, 0xC9, 0}, 0, 0, {0, 0, 0} }
-};
-/* Floating point instructions - comparisons */
-static const x86_insn_info fcom_insn[] = {
-    { CPU_FPU, MOD_Gap0|MOD_SpAdd|MOD_GasSufS, 0, 0, 0, 1, {0xD8, 0, 0}, 0, 1,
-      {OPT_Mem|OPS_32|OPA_EA, 0, 0} },
-    { CPU_FPU, MOD_Gap0|MOD_SpAdd|MOD_GasSufL, 0, 0, 0, 1, {0xDC, 0, 0}, 0, 1,
-      {OPT_Mem|OPS_64|OPA_EA, 0, 0} },
-    { CPU_FPU, MOD_Op1Add, 0, 0, 0, 2, {0xD8, 0x00, 0}, 0, 1,
-      {OPT_Reg|OPS_80|OPA_Op1Add, 0, 0} },
-    /* Alias for fcom %st(1) for GAS compat */
-    { CPU_FPU, MOD_Op1Add|MOD_GasOnly, 0, 0, 0, 2, {0xD8, 0x01, 0}, 0, 0,
-      {0, 0, 0} },
-    { CPU_FPU, MOD_Op1Add|MOD_GasIllegal, 0, 0, 0, 2, {0xD8, 0x00, 0}, 0, 2,
-      {OPT_ST0|OPS_80|OPA_None, OPT_Reg|OPS_80|OPA_Op1Add, 0} }
-};
-/* Floating point instructions - extended comparisons */
-static const x86_insn_info fcom2_insn[] = {
-    { CPU_286|CPU_FPU, MOD_Op0Add|MOD_Op1Add, 0, 0, 0, 2, {0x00, 0x00, 0},
-      0, 1, {OPT_Reg|OPS_80|OPA_Op1Add, 0, 0} },
-    { CPU_286|CPU_FPU, MOD_Op0Add|MOD_Op1Add, 0, 0, 0, 2, {0x00, 0x00, 0},
-      0, 2, {OPT_ST0|OPS_80|OPA_None, OPT_Reg|OPS_80|OPA_Op1Add, 0} }
-};
-/* Floating point instructions - arithmetic */
-static const x86_insn_info farith_insn[] = {
-    { CPU_FPU, MOD_Gap0|MOD_Gap1|MOD_SpAdd|MOD_GasSufS, 0, 0, 0, 1,
-      {0xD8, 0, 0}, 0, 1, {OPT_Mem|OPS_32|OPA_EA, 0, 0} },
-    { CPU_FPU, MOD_Gap0|MOD_Gap1|MOD_SpAdd|MOD_GasSufL, 0, 0, 0, 1,
-      {0xDC, 0, 0}, 0, 1, {OPT_Mem|OPS_64|OPA_EA, 0, 0} },
-    { CPU_FPU, MOD_Gap0|MOD_Op1Add, 0, 0, 0, 2, {0xD8, 0x00, 0}, 0, 1,
-      {OPT_Reg|OPS_80|OPA_Op1Add, 0, 0} },
-    { CPU_FPU, MOD_Gap0|MOD_Op1Add, 0, 0, 0, 2, {0xD8, 0x00, 0}, 0, 2,
-      {OPT_ST0|OPS_80|OPA_None, OPT_Reg|OPS_80|OPA_Op1Add, 0} },
-    { CPU_FPU, MOD_Op1Add, 0, 0, 0, 2, {0xDC, 0x00, 0}, 0, 1,
-      {OPT_Reg|OPS_80|OPTM_To|OPA_Op1Add, 0, 0} },
-    { CPU_FPU, MOD_Op1Add|MOD_GasIllegal, 0, 0, 0, 2, {0xDC, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_80|OPA_Op1Add, OPT_ST0|OPS_80|OPA_None, 0} },
-    { CPU_FPU, MOD_Gap0|MOD_Op1Add|MOD_GasOnly, 0, 0, 0, 2, {0xDC, 0x00, 0},
-      0, 2, {OPT_Reg|OPS_80|OPA_Op1Add, OPT_ST0|OPS_80|OPA_None, 0} }
-};
-static const x86_insn_info farithp_insn[] = {
-    { CPU_FPU, MOD_Op1Add, 0, 0, 0, 2, {0xDE, 0x01, 0}, 0, 0, {0, 0, 0} },
-    { CPU_FPU, MOD_Op1Add, 0, 0, 0, 2, {0xDE, 0x00, 0}, 0, 1,
-      {OPT_Reg|OPS_80|OPA_Op1Add, 0, 0} },
-    { CPU_FPU, MOD_Op1Add, 0, 0, 0, 2, {0xDE, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_80|OPA_Op1Add, OPT_ST0|OPS_80|OPA_None, 0} }
-};
-/* Floating point instructions - integer arith/store wo pop/compare */
-static const x86_insn_info fiarith_insn[] = {
-    { CPU_FPU, MOD_Op0Add|MOD_SpAdd|MOD_GasSufS, 0, 0, 0, 1, {0x04, 0, 0}, 0,
-      1, {OPT_Mem|OPS_16|OPA_EA, 0, 0} },
-    { CPU_FPU, MOD_Op0Add|MOD_SpAdd|MOD_GasSufL, 0, 0, 0, 1, {0x00, 0, 0}, 0,
-      1, {OPT_Mem|OPS_32|OPA_EA, 0, 0} }
-};
-/* Floating point instructions - processor control */
-static const x86_insn_info fldnstcw_insn[] = {
-    { CPU_FPU, MOD_SpAdd|MOD_GasSufW, 0, 0, 0, 1, {0xD9, 0, 0}, 0, 1,
-      {OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-static const x86_insn_info fstcw_insn[] = {
-    { CPU_FPU, MOD_GasSufW, 0, 0, 0, 2, {0x9B, 0xD9, 0}, 7, 1,
-      {OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-static const x86_insn_info fnstsw_insn[] = {
-    { CPU_FPU, MOD_GasSufW, 0, 0, 0, 1, {0xDD, 0, 0}, 7, 1,
-      {OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA, 0, 0} },
-    { CPU_FPU, MOD_GasSufW, 0, 0, 0, 2, {0xDF, 0xE0, 0}, 0, 1,
-      {OPT_Areg|OPS_16|OPA_None, 0, 0} }
-};
-static const x86_insn_info fstsw_insn[] = {
-    { CPU_FPU, MOD_GasSufW, 0, 0, 0, 2, {0x9B, 0xDD, 0}, 7, 1,
-      {OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA, 0, 0} },
-    { CPU_FPU, MOD_GasSufW, 0, 0, 0, 3, {0x9B, 0xDF, 0xE0}, 0, 1,
-      {OPT_Areg|OPS_16|OPA_None, 0, 0} }
-};
-static const x86_insn_info ffree_insn[] = {
-    { CPU_FPU, MOD_Op0Add, 0, 0, 0, 2, {0x00, 0xC0, 0}, 0, 1,
-      {OPT_Reg|OPS_80|OPA_Op1Add, 0, 0} }
-};
-
-/* 486 extensions */
-static const x86_insn_info bswap_insn[] = {
-    { CPU_486, MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0xC8, 0}, 0, 1,
-      {OPT_Reg|OPS_32|OPA_Op1Add, 0, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0xC8, 0}, 0, 1,
-      {OPT_Reg|OPS_64|OPA_Op1Add, 0, 0} }
-};
-static const x86_insn_info cmpxchgxadd_insn[] = {
-    { CPU_486, MOD_Op1Add|MOD_GasSufB, 0, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_8|OPA_Spare, 0} },
-    { CPU_486, MOD_Op1Add|MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0x01, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
-    { CPU_486, MOD_Op1Add|MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0x01, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op1Add|MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0x01, 0},
-      0, 2, {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_64|OPA_Spare, 0} }
-};
-
-/* Pentium extensions */
-static const x86_insn_info cmpxchg8b_insn[] = {
-    { CPU_586, MOD_GasSufQ, 0, 0, 0, 2, {0x0F, 0xC7, 0}, 1, 1,
-      {OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-
-/* Pentium II/Pentium Pro extensions */
-static const x86_insn_info cmovcc_insn[] = {
-    { CPU_686, MOD_Op1Add|MOD_GasSufW, 16, 0, 0, 2, {0x0F, 0x40, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_686, MOD_Op1Add|MOD_GasSufL, 32, 0, 0, 2, {0x0F, 0x40, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_Op1Add|MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0x40, 0},
-      0, 2, {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} }
-};
-static const x86_insn_info fcmovcc_insn[] = {
-    { CPU_686|CPU_FPU, MOD_Op0Add|MOD_Op1Add, 0, 0, 0, 2, {0x00, 0x00, 0},
-      0, 2, {OPT_ST0|OPS_80|OPA_None, OPT_Reg|OPS_80|OPA_Op1Add, 0} }
-};
-
-/* Pentium4 extensions */
-static const x86_insn_info movnti_insn[] = {
-    { CPU_P4, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0xC3, 0}, 0, 2,
-      {OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0xC3, 0}, 0, 2,
-      {OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_64|OPA_Spare, 0} }
-};
-static const x86_insn_info clflush_insn[] = {
-    { CPU_P3, 0, 0, 0, 0, 2, {0x0F, 0xAE, 0}, 7, 1,
-      {OPT_Mem|OPS_8|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-
-/* MMX/SSE2 instructions */
-static const x86_insn_info movd_insn[] = {
-    { CPU_MMX, 0, 0, 0, 0, 2, {0x0F, 0x6E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_MMX|CPU_Hammer|CPU_64, 0, 64, 0, 0, 2, {0x0F, 0x6E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_MMX, 0, 0, 0, 0, 2, {0x0F, 0x7E, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_64|OPA_Spare, 0} },
-    { CPU_MMX|CPU_Hammer|CPU_64, 0, 64, 0, 0, 2, {0x0F, 0x7E, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_64|OPA_Spare, 0} },
-    { CPU_SSE2, 0, 0, 0, 0x66, 2, {0x0F, 0x6E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE2|CPU_Hammer|CPU_64, 0, 64, 0, 0x66, 2, {0x0F, 0x6E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE2, 0, 0, 0, 0x66, 2, {0x0F, 0x7E, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0} },
-    { CPU_SSE2|CPU_Hammer|CPU_64, 0, 64, 0, 0x66, 2, {0x0F, 0x7E, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0} }
-};
-static const x86_insn_info movq_insn[] = {
-    { CPU_MMX, 0, 0, 0, 0, 2, {0x0F, 0x6F, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    },
-    { CPU_MMX|CPU_Hammer|CPU_64, 0, 64, 0, 0, 2, {0x0F, 0x6E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_MMX, 0, 0, 0, 0, 2, {0x0F, 0x7F, 0}, 0, 2,
-      {OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_64|OPA_Spare, 0}
-    },
-    { CPU_MMX|CPU_Hammer|CPU_64, 0, 64, 0, 0, 2, {0x0F, 0x7E, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_64|OPA_Spare, 0} },
-    { CPU_SSE2, 0, 0, 0, 0xF3, 2, {0x0F, 0x7E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} },
-    { CPU_SSE2, 0, 0, 0, 0xF3, 2, {0x0F, 0x7E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    },
-    { CPU_SSE2|CPU_Hammer|CPU_64, 0, 64, 0, 0x66, 2, {0x0F, 0x6E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE2, 0, 0, 0, 0x66, 2, {0x0F, 0xD6, 0}, 0, 2,
-      {OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0}
-    },
-    { CPU_SSE2|CPU_Hammer|CPU_64, 0, 64, 0, 0x66, 2, {0x0F, 0x7E, 0}, 0, 2,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0} }
-};
-static const x86_insn_info mmxsse2_insn[] = {
-    { CPU_MMX, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    },
-    { CPU_SSE2, MOD_Op1Add, 0, 0, 0x66, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info pshift_insn[] = {
-    { CPU_MMX, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    },
-    { CPU_MMX, MOD_Gap0|MOD_Op1Add|MOD_SpAdd, 0, 0, 0, 2, {0x0F, 0x00, 0}, 0,
-      2, {OPT_SIMDReg|OPS_64|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
-    { CPU_SSE2, MOD_Op1Add, 0, 0, 0x66, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    },
-    { CPU_SSE2, MOD_Gap0|MOD_Op1Add|MOD_SpAdd, 0, 0, 0x66, 2, {0x0F, 0x00, 0},
-      0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} }
-};
-
-/* PIII (Katmai) new instructions / SIMD instructiosn */
-static const x86_insn_info sseps_insn[] = {
-    { CPU_SSE, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info cvt_xmm_xmm64_ss_insn[] = {
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add, 0, 0, 0x00, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0}
-    },
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add, 0, 0, 0x00, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info cvt_xmm_xmm64_ps_insn[] = {
-    { CPU_SSE, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0}
-    },
-    { CPU_SSE, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info cvt_xmm_xmm32_insn[] = {
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add, 0, 0, 0x00, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0}
-    },
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add, 0, 0, 0x00, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info cvt_rx_xmm64_insn[] = {
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add|MOD_GasSufL, 0, 0, 0x00, 2,
-      {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0}
-    },
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add|MOD_GasSufL, 0, 0, 0x00, 2,
-      {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    },
-    /* REX */
-    { CPU_SSE|CPU_Hammer|CPU_64, MOD_PreAdd|MOD_Op1Add|MOD_GasSufQ, 64, 0,
-      0x00, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0}
-    },
-    { CPU_SSE|CPU_Hammer|CPU_64, MOD_PreAdd|MOD_Op1Add|MOD_GasSufQ, 64, 0,
-      0x00, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info cvt_rx_xmm32_insn[] = {
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add|MOD_GasSufL, 0, 0, 0x00, 2,
-      {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0}
-    },
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add|MOD_GasSufL, 0, 0, 0x00, 2,
-      {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, 0}
-    },
-    /* REX */
-    { CPU_SSE|CPU_Hammer|CPU_64, MOD_PreAdd|MOD_Op1Add|MOD_GasSufQ, 64, 0,
-      0x00, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0}
-    },
-    { CPU_SSE|CPU_Hammer|CPU_64, MOD_PreAdd|MOD_Op1Add|MOD_GasSufQ, 64, 0,
-      0x00, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info cvt_mm_xmm64_insn[] = {
-    { CPU_SSE, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0}
-    },
-    { CPU_SSE, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info cvt_mm_xmm_insn[] = {
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add, 0, 0, 0x00, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info cvt_xmm_mm_ss_insn[] = {
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add, 0, 0, 0x00, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info cvt_xmm_mm_ps_insn[] = {
-    { CPU_SSE, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info cvt_xmm_rmx_insn[] = {
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add|MOD_GasSufL, 0, 0, 0x00, 2,
-      {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0}
-    },
-    /* REX */
-    { CPU_Hammer|CPU_64, MOD_PreAdd|MOD_Op1Add|MOD_GasSufQ, 64, 0, 0x00, 2,
-      {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info ssess_insn[] = {
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add, 0, 0, 0x00, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info ssecmpps_insn[] = {
-    { CPU_SSE, MOD_Imm8, 0, 0, 0, 2, {0x0F, 0xC2, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info ssecmpss_insn[] = {
-    { CPU_SSE, MOD_PreAdd|MOD_Imm8, 0, 0, 0x00, 2, {0x0F, 0xC2, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-static const x86_insn_info ssepsimm_insn[] = {
-    { CPU_SSE, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-static const x86_insn_info ssessimm_insn[] = {
-    { CPU_SSE, MOD_PreAdd|MOD_Op1Add, 0, 0, 0x00, 2, {0x0F, 0x00, 0}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-static const x86_insn_info ldstmxcsr_insn[] = {
-    { CPU_SSE, MOD_SpAdd, 0, 0, 0, 2, {0x0F, 0xAE, 0}, 0, 1,
-      {OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-static const x86_insn_info maskmovq_insn[] = {
-    { CPU_P3|CPU_MMX, 0, 0, 0, 0, 2, {0x0F, 0xF7, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDReg|OPS_64|OPA_EA, 0} }
-};
-static const x86_insn_info movaups_insn[] = {
-    { CPU_SSE, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    },
-    { CPU_SSE, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x01, 0}, 0, 2,
-      {OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0}
-    }
-};
-static const x86_insn_info movhllhps_insn[] = {
-    { CPU_SSE, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} }
-};
-static const x86_insn_info movhlps_insn[] = {
-    { CPU_SSE, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x01, 0}, 0, 2,
-      {OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0} }
-};
-static const x86_insn_info movmskps_insn[] = {
-    { CPU_SSE, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0x50, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0x50, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} }
-};
-static const x86_insn_info movntps_insn[] = {
-    { CPU_SSE, 0, 0, 0, 0, 2, {0x0F, 0x2B, 0}, 0, 2,
-      {OPT_Mem|OPS_128|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0} }
-};
-static const x86_insn_info movntq_insn[] = {
-    { CPU_SSE, 0, 0, 0, 0, 2, {0x0F, 0xE7, 0}, 0, 2,
-      {OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_64|OPA_Spare, 0} }
-};
-static const x86_insn_info movss_insn[] = {
-    { CPU_SSE, 0, 0, 0, 0xF3, 2, {0x0F, 0x10, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} },
-    { CPU_SSE, 0, 0, 0, 0xF3, 2, {0x0F, 0x10, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE, 0, 0, 0, 0xF3, 2, {0x0F, 0x11, 0}, 0, 2,
-      {OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0} }
-};
-static const x86_insn_info pextrw_insn[] = {
-    { CPU_P3|CPU_MMX, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0xC5, 0}, 0, 3,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_SIMDReg|OPS_64|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE2, MOD_GasSufL, 0, 0, 0x66, 2, {0x0F, 0xC5, 0}, 0, 3,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE2|CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0xC5, 0},
-      0, 3,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_SIMDReg|OPS_64|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE2|CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0x66, 2, {0x0F, 0xC5, 0},
-      0, 3,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    /* SSE4.1 instructions */
-    { CPU_SSE41, 0, 0, 0, 0x66, 3, {0x0F, 0x3A, 0x15}, 0, 3,
-      {OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE41, 0, 32, 0, 0x66, 3, {0x0F, 0x3A, 0x15}, 0, 3,
-      {OPT_Reg|OPS_32|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE41|CPU_64, 0, 64, 0, 0x66, 3, {0x0F, 0x3A, 0x15}, 0, 3,
-      {OPT_Reg|OPS_64|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-static const x86_insn_info pinsrw_insn[] = {
-    { CPU_P3|CPU_MMX, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0xC4, 0}, 0, 3,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_Reg|OPS_32|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0xC4, 0}, 0, 3,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_Reg|OPS_64|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_P3|CPU_MMX, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0xC4, 0}, 0, 3,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE2, MOD_GasSufL, 0, 0, 0x66, 2, {0x0F, 0xC4, 0}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Reg|OPS_32|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0x66, 2, {0x0F, 0xC4, 0}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Reg|OPS_64|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE2, MOD_GasSufL, 0, 0, 0x66, 2, {0x0F, 0xC4, 0}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-static const x86_insn_info pmovmskb_insn[] = {
-    { CPU_P3|CPU_MMX, MOD_GasSufL, 0, 0, 0, 2, {0x0F, 0xD7, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_SIMDReg|OPS_64|OPA_EA, 0} },
-    { CPU_SSE2, MOD_GasSufL, 0, 0, 0x66, 2, {0x0F, 0xD7, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0, 2, {0x0F, 0xD7, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_SIMDReg|OPS_64|OPA_EA, 0} },
-    { CPU_Hammer|CPU_64, MOD_GasSufQ, 64, 0, 0x66, 2, {0x0F, 0xD7, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} }
-};
-static const x86_insn_info pshufw_insn[] = {
-    { CPU_P3|CPU_MMX, 0, 0, 0, 0, 2, {0x0F, 0x70, 0}, 0, 3,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-
-/* SSE2 instructions */
-static const x86_insn_info cmpsd_insn[] = {
-    { CPU_Any, MOD_GasIllegal, 32, 0, 0, 1, {0xA7, 0, 0}, 0, 0, {0, 0, 0} },
-    { CPU_SSE2, 0, 0, 0, 0xF2, 2, {0x0F, 0xC2, 0}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-static const x86_insn_info movaupd_insn[] = {
-    { CPU_SSE2, MOD_Op1Add, 0, 0, 0x66, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    },
-    { CPU_SSE2, MOD_Op1Add, 0, 0, 0x66, 2, {0x0F, 0x01, 0}, 0, 2,
-      {OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0}
-    }
-};
-static const x86_insn_info movhlpd_insn[] = {
-    { CPU_SSE2, MOD_Op1Add, 0, 0, 0x66, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE2, MOD_Op1Add, 0, 0, 0x66, 2, {0x0F, 0x01, 0}, 0, 2,
-      {OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0} }
-};
-static const x86_insn_info movmskpd_insn[] = {
-    { CPU_SSE2, MOD_GasSufL, 0, 0, 0x66, 2, {0x0F, 0x50, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} }
-};
-static const x86_insn_info movntpddq_insn[] = {
-    { CPU_SSE2, MOD_Op1Add, 0, 0, 0x66, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Mem|OPS_128|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0} }
-};
-static const x86_insn_info movsd_insn[] = {
-    { CPU_Any, MOD_GasIllegal, 32, 0, 0, 1, {0xA5, 0, 0}, 0, 0, {0, 0, 0} },
-    { CPU_SSE2, 0, 0, 0, 0xF2, 2, {0x0F, 0x10, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} },
-    { CPU_SSE2, 0, 0, 0, 0xF2, 2, {0x0F, 0x10, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE2, 0, 0, 0, 0xF2, 2, {0x0F, 0x11, 0}, 0, 2,
-      {OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0} }
-};
-static const x86_insn_info maskmovdqu_insn[] = {
-    { CPU_SSE2, 0, 0, 0, 0x66, 2, {0x0F, 0xF7, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} }
-};
-static const x86_insn_info movdqau_insn[] = {
-    { CPU_SSE2, MOD_PreAdd, 0, 0, 0x00, 2, {0x0F, 0x6F, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    },
-    { CPU_SSE2, MOD_PreAdd, 0, 0, 0x00, 2, {0x0F, 0x7F, 0}, 0, 2,
-      {OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0}
-    }
-};
-static const x86_insn_info movdq2q_insn[] = {
-    { CPU_SSE2, 0, 0, 0, 0xF2, 2, {0x0F, 0xD6, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} }
-};
-static const x86_insn_info movq2dq_insn[] = {
-    { CPU_SSE2, 0, 0, 0, 0xF3, 2, {0x0F, 0xD6, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_64|OPA_EA, 0} }
-};
-static const x86_insn_info pslrldq_insn[] = {
-    { CPU_SSE2, MOD_SpAdd, 0, 0, 0x66, 2, {0x0F, 0x73, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} }
-};
-
-/* SSE3 instructions */
-static const x86_insn_info lddqu_insn[] = {
-    { CPU_SSE3, 0, 0, 0, 0xF2, 2, {0x0F, 0xF0, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_Any|OPA_EA, 0} }
-};
-
-/* SSSE3 instructions */
-static const x86_insn_info ssse3_insn[] = {
-    { CPU_SSSE3, MOD_Op2Add, 0, 0, 0, 3, {0x0F, 0x38, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, 0}
-    },
-    { CPU_SSSE3, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x38, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-
-static const x86_insn_info ssse3imm_insn[] = {
-    { CPU_SSSE3, MOD_Op2Add, 0, 0, 0, 3, {0x0F, 0x3A, 0x00}, 0, 3,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm}
-    },
-    { CPU_SSSE3, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x3A, 0x00}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm}
-    }
-};
-
-/* SSE4 instructions */
-
-static const x86_insn_info sse4_insn[] = {
-    { CPU_Any, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x38, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    }
-};
-
-static const x86_insn_info sse4imm_insn[] = {
-    { CPU_SSE41, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x3A, 0x00}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-
-static const x86_insn_info sse4xmm0_insn[] = {
-    { CPU_SSE41, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x38, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA, 0}
-    },
-    { CPU_SSE41, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x38, 0x00}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA,
-       OPT_XMM0|OPS_128|OPA_None}
-    }
-};
-
-static const x86_insn_info crc32_insn[] = {
-    { CPU_SSE42, MOD_GasSufB, 0, 0, 0xF2, 3, {0x0F, 0x38, 0xF0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_8|OPA_EA, 0} },
-    { CPU_SSE42, MOD_GasSufW, 16, 0, 0xF2, 3, {0x0F, 0x38, 0xF1}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_16|OPA_EA, 0} },
-    { CPU_SSE42, MOD_GasSufL, 32, 0, 0xF2, 3, {0x0F, 0x38, 0xF1}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE42|CPU_64, MOD_GasSufB, 64, 0, 0xF2, 3, {0x0F, 0x38, 0xF0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_8|OPA_EA, 0} },
-    { CPU_SSE42|CPU_64, MOD_GasSufQ, 64, 0, 0xF2, 3, {0x0F, 0x38, 0xF1}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} }
-};
-
-static const x86_insn_info extractps_insn[] = {
-    { CPU_SSE41, 0, 32, 0, 0x66, 3, {0x0F, 0x3A, 0x17}, 0, 3,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE41|CPU_64, 0, 64, 0, 0x66, 3, {0x0F, 0x3A, 0x17}, 0, 3,
-      {OPT_Reg|OPS_64|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-
-static const x86_insn_info insertps_insn[] = {
-    { CPU_SSE41, 0, 0, 0, 0x66, 3, {0x0F, 0x3A, 0x21}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE41, 0, 0, 0, 0x66, 3, {0x0F, 0x3A, 0x21}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-
-static const x86_insn_info movntdqa_insn[] = {
-    { CPU_SSE41, 0, 0, 0, 0x66, 3, {0x0F, 0x38, 0x2A}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_128|OPS_Relaxed|OPA_EA, 0} }
-};
-
-static const x86_insn_info sse4pcmpstr_insn[] = {
-    { CPU_SSE42, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x3A, 0x00}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE42, MOD_Op2Add|MOD_GasOnly|MOD_GasSufW, 16, 0, 0x66, 3,
-      {0x0F, 0x3A, 0x00}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm}
-    },
-    { CPU_SSE42, MOD_Op2Add|MOD_GasOnly|MOD_GasSufL, 32, 0, 0x66, 3,
-      {0x0F, 0x3A, 0x00}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm}
-    },
-    { CPU_SSE42, MOD_Op2Add|MOD_GasOnly|MOD_GasSufQ, 64, 0, 0x66, 3,
-      {0x0F, 0x3A, 0x00}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDRM|OPS_128|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm}
-    }
-};
-
-static const x86_insn_info pextrb_insn[] = {
-    { CPU_SSE41, 0, 0, 0, 0x66, 3, {0x0F, 0x3A, 0x14}, 0, 3,
-      {OPT_Mem|OPS_8|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE41, 0, 32, 0, 0x66, 3, {0x0F, 0x3A, 0x14}, 0, 3,
-      {OPT_Reg|OPS_32|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE41|CPU_64, 0, 64, 0, 0x66, 3, {0x0F, 0x3A, 0x14}, 0, 3,
-      {OPT_Reg|OPS_64|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-
-static const x86_insn_info pextrd_insn[] = {
-    { CPU_SSE41, 0, 32, 0, 0x66, 3, {0x0F, 0x3A, 0x16}, 0, 3,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-
-static const x86_insn_info pextrq_insn[] = {
-    { CPU_SSE41|CPU_64, 0, 64, 0, 0x66, 3, {0x0F, 0x3A, 0x16}, 0, 3,
-      {OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-
-static const x86_insn_info pinsrb_insn[] = {
-    { CPU_SSE41, 0, 0, 0, 0x66, 3, {0x0F, 0x3A, 0x20}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_8|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE41, 0, 32, 0, 0x66, 3, {0x0F, 0x3A, 0x20}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Reg|OPS_32|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-
-static const x86_insn_info pinsrd_insn[] = {
-    { CPU_SSE41, 0, 32, 0, 0x66, 3, {0x0F, 0x3A, 0x22}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-
-static const x86_insn_info pinsrq_insn[] = {
-    { CPU_SSE41|CPU_64, 0, 64, 0, 0x66, 3, {0x0F, 0x3A, 0x22}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} }
-};
-
-static const x86_insn_info sse4m64_insn[] = {
-    { CPU_SSE41, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x38, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE41, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x38, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} }
-};
-
-static const x86_insn_info sse4m32_insn[] = {
-    { CPU_SSE41, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x38, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE41, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x38, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} }
-};
-
-static const x86_insn_info sse4m16_insn[] = {
-    { CPU_SSE41, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x38, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE41, MOD_Op2Add, 0, 0, 0x66, 3, {0x0F, 0x38, 0x00}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} }
-};
-
-static const x86_insn_info cnt_insn[] = {
-    { CPU_SSE42, MOD_Op1Add|MOD_GasSufW, 16, 0, 0xF3, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE42, MOD_Op1Add|MOD_GasSufL, 32, 0, 0xF3, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_SSE42|CPU_64, MOD_Op1Add|MOD_GasSufQ, 64, 0, 0xF3, 2,
-      {0x0F, 0x00, 0}, 0, 2,
-      {OPT_Reg|OPS_64|OPA_Spare, OPT_RM|OPS_64|OPS_Relaxed|OPA_EA, 0} }
-};
-
-static const x86_insn_info extrq_insn[] = {
-    { CPU_SSE41, 0, 0, 0, 0x66, 2, {0x0F, 0x78, 0}, 0, 3,
-      {OPT_SIMDReg|OPS_128|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_EA,
-       OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm} },
-    { CPU_SSE41, 0, 0, 0, 0x66, 2, {0x0F, 0x79, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} }
-};
-
-static const unsigned long insertq_4operands[] =
-    {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA,
-     OPT_Imm|OPS_8|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm};
-static const x86_insn_info insertq_insn[] = {
-    { CPU_SSE41, 0, 0, 0, 0xF2, 2, {0x0F, 0x78, 0}, 0, 4, {0, 0, 0} },
-    { CPU_SSE41, 0, 0, 0, 0xF2, 2, {0x0F, 0x79, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_SIMDReg|OPS_128|OPA_EA, 0} }
-};
-
-static const x86_insn_info movntsd_insn[] = {
-    { CPU_SSE41, 0, 0, 0, 0xF2, 2, {0x0F, 0x2B, 0}, 0, 2,
-      {OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0} }
-};
-
-static const x86_insn_info movntss_insn[] = {
-    { CPU_SSE41, 0, 0, 0, 0xF3, 2, {0x0F, 0x2B, 0}, 0, 2,
-      {OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, OPT_SIMDReg|OPS_128|OPA_Spare, 0} }
-};
-
-/* AMD 3DNow! instructions */
-static const x86_insn_info now3d_insn[] = {
-    { CPU_3DNow, MOD_Imm8, 0, 0, 0, 2, {0x0F, 0x0F, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, 0} }
-};
-
-/* AMD x86-64 extensions */
-static const x86_insn_info cmpxchg16b_insn[] = {
-    { CPU_64|CPU_Hammer, 0, 64, 0, 0, 2, {0x0F, 0xC7, 0}, 1, 1,
-      {OPT_Mem|OPS_128|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-
-/* AMD Pacifica (SVM) instructions */
-static const x86_insn_info invlpga_insn[] = {
-    { CPU_SVM, 0, 0, 0, 0, 3, {0x0F, 0x01, 0xDF}, 0, 0, {0, 0, 0} },
-    { CPU_SVM, 0, 0, 0, 0, 3, {0x0F, 0x01, 0xDF}, 0, 2,
-      {OPT_MemrAX|OPS_Any|OPA_AdSizeEA, OPT_Creg|OPS_32|OPA_None, 0} }
-};
-static const x86_insn_info skinit_insn[] = {
-    { CPU_SVM, 0, 0, 0, 0, 3, {0x0F, 0x01, 0xDE}, 0, 0, {0, 0, 0} },
-    { CPU_SVM, 0, 0, 0, 0, 3, {0x0F, 0x01, 0xDE}, 0, 1,
-      {OPT_MemEAX|OPS_Any|OPA_None, 0, 0} }
-};
-static const x86_insn_info svm_rax_insn[] = {
-    { CPU_SVM, MOD_Op2Add, 0, 0, 0, 3, {0x0F, 0x01, 0x00}, 0, 0, {0, 0, 0} },
-    { CPU_SVM, MOD_Op2Add, 0, 0, 0, 3, {0x0F, 0x01, 0x00}, 0, 1,
-      {OPT_MemrAX|OPS_Any|OPA_AdSizeEA, 0, 0} }
-};
-/* VIA PadLock instructions */
-static const x86_insn_info padlock_insn[] = {
-    { CPU_Any, MOD_Imm8|MOD_PreAdd|MOD_Op1Add, 0, 0, 0x00, 2, {0x0F, 0x00, 0},
-      0, 0, {0, 0, 0} }
-};
-
-/* Cyrix MMX instructions */
-static const x86_insn_info cyrixmmx_insn[] = {
-    { CPU_Cyrix|CPU_MMX, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_SIMDRM|OPS_64|OPS_Relaxed|OPA_EA, 0} }
-};
-static const x86_insn_info pmachriw_insn[] = {
-    { CPU_Cyrix|CPU_MMX, 0, 0, 0, 0, 2, {0x0F, 0x5E, 0}, 0, 2,
-      {OPT_SIMDReg|OPS_64|OPA_Spare, OPT_Mem|OPS_64|OPS_Relaxed|OPA_EA, 0} }
-};
-
-/* Cyrix extensions */
-static const x86_insn_info rdwrshr_insn[] = {
-    { CPU_486|CPU_Cyrix|CPU_SMM, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x36, 0}, 0, 1,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} }
-};
-static const x86_insn_info rsdc_insn[] = {
-    { CPU_486|CPU_Cyrix|CPU_SMM, 0, 0, 0, 0, 2, {0x0F, 0x79, 0}, 0, 2,
-      {OPT_SegReg|OPS_16|OPS_Relaxed|OPA_Spare,
-       OPT_Mem|OPS_80|OPS_Relaxed|OPA_EA, 0} }
-};
-static const x86_insn_info cyrixsmm_insn[] = {
-    { CPU_486|CPU_Cyrix|CPU_SMM, MOD_Op1Add, 0, 0, 0, 2, {0x0F, 0x00, 0}, 0, 1,
-      {OPT_Mem|OPS_80|OPS_Relaxed|OPA_EA, 0, 0} }
-};
-static const x86_insn_info svdc_insn[] = {
-    { CPU_486|CPU_Cyrix|CPU_SMM, 0, 0, 0, 0, 2, {0x0F, 0x78, 0}, 0, 2,
-      {OPT_Mem|OPS_80|OPS_Relaxed|OPA_EA,
-       OPT_SegReg|OPS_16|OPS_Relaxed|OPA_Spare, 0} }
-};
-
-/* Obsolete/undocumented instructions */
-static const x86_insn_info ibts_insn[] = {
-    { CPU_386|CPU_Undoc|CPU_Obs, 0, 16, 0, 0, 2, {0x0F, 0xA7, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
-    { CPU_386|CPU_Undoc|CPU_Obs, 0, 32, 0, 0, 2, {0x0F, 0xA7, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} }
-};
-static const x86_insn_info umov_insn[] = {
-    { CPU_386|CPU_Undoc, 0, 0, 0, 0, 2, {0x0F, 0x10, 0}, 0, 2,
-      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_8|OPA_Spare, 0} },
-    { CPU_386|CPU_Undoc, 0, 16, 0, 0, 2, {0x0F, 0x11, 0}, 0, 2,
-      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
-    { CPU_386|CPU_Undoc, 0, 32, 0, 0, 2, {0x0F, 0x11, 0}, 0, 2,
-      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
-    { CPU_386|CPU_Undoc, 0, 0, 0, 0, 2, {0x0F, 0x12, 0}, 0, 2,
-      {OPT_Reg|OPS_8|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386|CPU_Undoc, 0, 16, 0, 0, 2, {0x0F, 0x13, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386|CPU_Undoc, 0, 32, 0, 0, 2, {0x0F, 0x13, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} }
-};
-static const x86_insn_info xbts_insn[] = {
-    { CPU_386|CPU_Undoc|CPU_Obs, 0, 16, 0, 0, 2, {0x0F, 0xA6, 0}, 0, 2,
-      {OPT_Reg|OPS_16|OPA_Spare, OPT_Mem|OPS_16|OPS_Relaxed|OPA_EA, 0} },
-    { CPU_386|CPU_Undoc|CPU_Obs, 0, 32, 0, 0, 2, {0x0F, 0xA6, 0}, 0, 2,
-      {OPT_Reg|OPS_32|OPA_Spare, OPT_Mem|OPS_32|OPS_Relaxed|OPA_EA, 0} }
-};
-
+#include "x86insns.c"
 
 static void
 x86_finalize_common(x86_common *common, const x86_insn_info *info,
@@ -2430,11 +412,12 @@
     x86_jmp *jmp;
     int num_info = id_insn->num_info;
     const x86_insn_info *info = id_insn->group;
-    unsigned long mod_data = id_insn->mod_data;
+    unsigned char *mod_data = id_insn->mod_data;
     unsigned int mode_bits = id_insn->mode_bits;
     /*unsigned char suffix = id_insn->suffix;*/
     yasm_insn_operand *op;
     static const unsigned char size_lookup[] = {0, 8, 16, 32, 64, 80, 128, 0};
+    unsigned int i;
 
     /* We know the target is in operand 0, but sanity check for Imm. */
     op = yasm_insn_ops_first(&id_insn->insn);
@@ -2452,7 +435,7 @@
     jmp->target.jump_target = 1;
 
     /* See if the user explicitly specified short/near/far. */
-    switch ((int)(jinfo->operands[0] & OPTM_MASK)) {
+    switch (insn_operands[jinfo->operands_index+0].targetmod) {
         case OPTM_Short:
             jmp->op_sel = JMP_SHORT_FORCED;
             break;
@@ -2465,13 +448,15 @@
 
     /* Check for address size setting in second operand, if present */
     if (jinfo->num_operands > 1 &&
-        (jinfo->operands[1] & OPA_MASK) == OPA_AdSizeR)
+        insn_operands[jinfo->operands_index+1].action == OPA_AdSizeR)
         jmp->common.addrsize = (unsigned char)
-            size_lookup[(jinfo->operands[1] & OPS_MASK)>>OPS_SHIFT];
+            size_lookup[insn_operands[jinfo->operands_index+1].size];
 
     /* Check for address size override */
-    if (jinfo->modifiers & MOD_AdSizeR)
-        jmp->common.addrsize = (unsigned char)(mod_data & 0xFF);
+    for (i=0; i<NELEMS(info->modifiers); i++) {
+        if (jinfo->modifiers[i] == MOD_AdSizeR)
+            jmp->common.addrsize = mod_data[i];
+    }
 
     /* Scan through other infos for this insn looking for short/near versions.
      * Needs to match opersize and number of operands, also be within CPU.
@@ -2480,36 +465,52 @@
     jmp->nearop.len = 0;
     for (; num_info>0 && (jmp->shortop.len == 0 || jmp->nearop.len == 0);
          num_info--, info++) {
-        unsigned long cpu = info->cpu;
+        unsigned int cpu0 = info->cpu0;
+        unsigned int cpu1 = info->cpu1;
+        unsigned int cpu2 = info->cpu2;
 
-        if ((cpu & CPU_64) && mode_bits != 64)
+        /* Match CPU */
+        if (mode_bits != 64 &&
+            (cpu0 == CPU_64 || cpu1 == CPU_64 || cpu2 == CPU_64))
             continue;
-        if ((cpu & CPU_Not64) && mode_bits == 64)
+        if (mode_bits == 64 &&
+            (cpu0 == CPU_Not64 || cpu1 == CPU_Not64 || cpu2 == CPU_Not64))
             continue;
-        cpu &= ~(CPU_64 | CPU_Not64);
 
-        if ((id_insn->cpu_enabled & cpu) != cpu)
+        if (cpu0 == CPU_64 || cpu0 == CPU_Not64)
+            cpu0 = CPU_Any;
+        if (cpu1 == CPU_64 || cpu1 == CPU_Not64)
+            cpu1 = CPU_Any;
+        if (cpu2 == CPU_64 || cpu2 == CPU_Not64)
+            cpu2 = CPU_Any;
+        if (!BitVector_bit_test(id_insn->cpu_enabled, cpu0) ||
+            !BitVector_bit_test(id_insn->cpu_enabled, cpu1) ||
+            !BitVector_bit_test(id_insn->cpu_enabled, cpu2))
             continue;
 
         if (info->num_operands == 0)
             continue;
 
-        if ((info->operands[0] & OPA_MASK) != OPA_JmpRel)
+        if (insn_operands[info->operands_index+0].action != OPA_JmpRel)
             continue;
 
         if (info->opersize != jmp->common.opersize)
             continue;
 
-        switch ((int)(info->operands[0] & OPTM_MASK)) {
+        switch (insn_operands[info->operands_index+0].targetmod) {
             case OPTM_Short:
                 x86_finalize_opcode(&jmp->shortop, info);
-                if (info->modifiers & MOD_Op0Add)
-                    jmp->shortop.opcode[0] += (unsigned char)(mod_data & 0xFF);
+                for (i=0; i<NELEMS(info->modifiers); i++) {
+                    if (info->modifiers[i] == MOD_Op0Add)
+                        jmp->shortop.opcode[0] += mod_data[i];
+                }
                 break;
             case OPTM_Near:
                 x86_finalize_opcode(&jmp->nearop, info);
-                if (info->modifiers & MOD_Op1Add)
-                    jmp->nearop.opcode[1] += (unsigned char)(mod_data & 0xFF);
+                for (i=0; i<NELEMS(info->modifiers); i++) {
+                    if (info->modifiers[i] == MOD_Op1Add)
+                        jmp->nearop.opcode[1] += mod_data[i];
+                }
                 break;
         }
     }
@@ -2546,6 +547,7 @@
     const x86_insn_info *info = id_insn->group;
     unsigned int num_info = id_insn->num_info;
     unsigned int suffix = id_insn->suffix;
+    unsigned int mode_bits = id_insn->mode_bits;
     int found = 0;
 
     /* Just do a simple linear search through the info array for a match.
@@ -2553,22 +555,33 @@
      */
     for (; num_info>0 && !found; num_info--, info++) {
         yasm_insn_operand *op, **use_ops;
-        const unsigned long *info_ops = info->operands;
-        unsigned long icpu;
+        const x86_info_operand *info_ops =
+            &insn_operands[info->operands_index];
+        unsigned int cpu0 = info->cpu0;
+        unsigned int cpu1 = info->cpu1;
+        unsigned int cpu2 = info->cpu2;
+        unsigned int gas_flags = info->gas_flags;
         unsigned int size;
         int mismatch = 0;
         int i;
 
         /* Match CPU */
-        icpu = info->cpu;
-
-        if ((icpu & CPU_64) && id_insn->mode_bits != 64)
+        if (mode_bits != 64 &&
+            (cpu0 == CPU_64 || cpu1 == CPU_64 || cpu2 == CPU_64))
             continue;
-        if ((icpu & CPU_Not64) && id_insn->mode_bits == 64)
+        if (mode_bits == 64 &&
+            (cpu0 == CPU_Not64 || cpu1 == CPU_Not64 || cpu2 == CPU_Not64))
             continue;
-        icpu &= ~(CPU_64 | CPU_Not64);
 
-        if (bypass != 7 && (id_insn->cpu_enabled & icpu) != icpu)
+        if (cpu0 == CPU_64 || cpu0 == CPU_Not64)
+            cpu0 = CPU_Any;
+        if (cpu1 == CPU_64 || cpu1 == CPU_Not64)
+            cpu1 = CPU_Any;
+        if (cpu2 == CPU_64 || cpu2 == CPU_Not64)
+            cpu2 = CPU_Any;
+        if (bypass != 8 && (!BitVector_bit_test(id_insn->cpu_enabled, cpu0) ||
+                            !BitVector_bit_test(id_insn->cpu_enabled, cpu1) ||
+                            !BitVector_bit_test(id_insn->cpu_enabled, cpu2)))
             continue;
 
         /* Match # of operands */
@@ -2576,22 +589,19 @@
             continue;
 
         /* Match parser mode */
-        if ((info->modifiers & MOD_GasOnly)
-            && id_insn->parser != X86_PARSER_GAS)
+        if ((gas_flags & GAS_ONLY) && id_insn->parser != X86_PARSER_GAS)
             continue;
-        if ((info->modifiers & MOD_GasIllegal)
-            && id_insn->parser == X86_PARSER_GAS)
+        if ((gas_flags & GAS_ILLEGAL) && id_insn->parser == X86_PARSER_GAS)
             continue;
 
         /* Match suffix (if required) */
-        if (suffix != 0 && suffix != 0x80
-            && ((suffix<<MOD_GasSuf_SHIFT) & info->modifiers) == 0)
+        if (suffix != 0 && suffix != WEAK
+            && ((suffix & SUF_MASK) & (gas_flags & SUF_MASK)) == 0)
             continue;
 
         /* Use reversed operands in GAS mode if not otherwise specified */
         use_ops = ops;
-        if (id_insn->parser == X86_PARSER_GAS
-            && !(info->modifiers & MOD_GasNoRev))
+        if (id_insn->parser == X86_PARSER_GAS && !(gas_flags & GAS_NO_REV))
             use_ops = rev_ops;
 
         if (id_insn->insn.num_operands == 0) {
@@ -2599,15 +609,11 @@
             break;
         }
 
-        /* 4-operand special case for insertq */
-        if (info->num_operands > 3)
-            info_ops = insertq_4operands;
-
         /* Match each operand type and size */
         for (i = 0, op = use_ops[0]; op && i<info->num_operands && !mismatch;
              op = use_ops[++i]) {
             /* Check operand type */
-            switch ((int)(info_ops[i] & OPT_MASK)) {
+            switch (info_ops[i].type) {
                 case OPT_Imm:
                     if (op->type != YASM_INSN__OPERAND_IMM)
                         mismatch = 1;
@@ -2642,6 +648,7 @@
                     if (op->type == YASM_INSN__OPERAND_MEMORY)
                         break;
                     /*@fallthrough@*/
+                case OPT_SIMDRegMatch0:
                 case OPT_SIMDReg:
                     if (op->type != YASM_INSN__OPERAND_REG)
                         mismatch = 1;
@@ -2655,6 +662,9 @@
                                 break;
                         }
                     }
+                    if (!mismatch && info_ops[i].type == OPT_SIMDRegMatch0 &&
+                        bypass != 7 && op->data.reg != use_ops[0]->data.reg)
+                        mismatch = 1;
                     break;
                 case OPT_SegReg:
                     if (op->type != YASM_INSN__OPERAND_SEGREG)
@@ -2682,40 +692,40 @@
                     break;
                 case OPT_Areg:
                     if (op->type != YASM_INSN__OPERAND_REG ||
-                        ((info_ops[i] & OPS_MASK) == OPS_8 &&
+                        (info_ops[i].size == OPS_8 &&
                          op->data.reg != (X86_REG8 | 0) &&
                          op->data.reg != (X86_REG8X | 0)) ||
-                        ((info_ops[i] & OPS_MASK) == OPS_16 &&
+                        (info_ops[i].size == OPS_16 &&
                          op->data.reg != (X86_REG16 | 0)) ||
-                        ((info_ops[i] & OPS_MASK) == OPS_32 &&
+                        (info_ops[i].size == OPS_32 &&
                          op->data.reg != (X86_REG32 | 0)) ||
-                        ((info_ops[i] & OPS_MASK) == OPS_64 &&
+                        (info_ops[i].size == OPS_64 &&
                          op->data.reg != (X86_REG64 | 0)))
                         mismatch = 1;
                     break;
                 case OPT_Creg:
                     if (op->type != YASM_INSN__OPERAND_REG ||
-                        ((info_ops[i] & OPS_MASK) == OPS_8 &&
+                        (info_ops[i].size == OPS_8 &&
                          op->data.reg != (X86_REG8 | 1) &&
                          op->data.reg != (X86_REG8X | 1)) ||
-                        ((info_ops[i] & OPS_MASK) == OPS_16 &&
+                        (info_ops[i].size == OPS_16 &&
                          op->data.reg != (X86_REG16 | 1)) ||
-                        ((info_ops[i] & OPS_MASK) == OPS_32 &&
+                        (info_ops[i].size == OPS_32 &&
                          op->data.reg != (X86_REG32 | 1)) ||
-                        ((info_ops[i] & OPS_MASK) == OPS_64 &&
+                        (info_ops[i].size == OPS_64 &&
                          op->data.reg != (X86_REG64 | 1)))
                         mismatch = 1;
                     break;
                 case OPT_Dreg:
                     if (op->type != YASM_INSN__OPERAND_REG ||
-                        ((info_ops[i] & OPS_MASK) == OPS_8 &&
+                        (info_ops[i].size == OPS_8 &&
                          op->data.reg != (X86_REG8 | 2) &&
                          op->data.reg != (X86_REG8X | 2)) ||
-                        ((info_ops[i] & OPS_MASK) == OPS_16 &&
+                        (info_ops[i].size == OPS_16 &&
                          op->data.reg != (X86_REG16 | 2)) ||
-                        ((info_ops[i] & OPS_MASK) == OPS_32 &&
+                        (info_ops[i].size == OPS_32 &&
                          op->data.reg != (X86_REG32 | 2)) ||
-                        ((info_ops[i] & OPS_MASK) == OPS_64 &&
+                        (info_ops[i].size == OPS_64 &&
                          op->data.reg != (X86_REG64 | 2)))
                         mismatch = 1;
                     break;
@@ -2757,7 +767,10 @@
                 case OPT_MemOffs:
                     if (op->type != YASM_INSN__OPERAND_MEMORY ||
                         yasm_expr__contains(op->data.ea->disp.abs,
-                                            YASM_EXPR_REG))
+                                            YASM_EXPR_REG) ||
+                        op->data.ea->pc_rel ||
+                        (!op->data.ea->not_pc_rel && id_insn->default_rel &&
+                         op->data.ea->disp.size != 64))
                         mismatch = 1;
                     break;
                 case OPT_Imm1:
@@ -2806,7 +819,7 @@
                 break;
 
             /* Check operand size */
-            size = size_lookup[(info_ops[i] & OPS_MASK)>>OPS_SHIFT];
+            size = size_lookup[info_ops[i].size];
             if (suffix != 0) {
                 /* Require relaxed operands for GAS mode (don't allow
                  * per-operand sizing).
@@ -2815,11 +828,11 @@
                     /* Register size must exactly match */
                     if (yasm_x86__get_reg_size(op->data.reg) != size)
                         mismatch = 1;
-                } else if (((info_ops[i] & OPT_MASK) == OPT_Imm
-                            || (info_ops[i] & OPT_MASK) == OPT_ImmNotSegOff
-                            || (info_ops[i] & OPT_MASK) == OPT_Imm1)
-                    && (info_ops[i] & OPS_RMASK) != OPS_Relaxed
-                    && (info_ops[i] & OPA_MASK) != OPA_JmpRel)
+                } else if ((info_ops[i].type == OPT_Imm
+                            || info_ops[i].type == OPT_ImmNotSegOff
+                            || info_ops[i].type == OPT_Imm1)
+                    && !info_ops[i].relaxed
+                    && info_ops[i].action != OPA_JmpRel)
                     mismatch = 1;
             } else {
                 if (op->type == YASM_INSN__OPERAND_REG && op->size == 0) {
@@ -2833,7 +846,7 @@
                     if ((bypass == 1 && i == 0) || (bypass == 2 && i == 1)
                         || (bypass == 3 && i == 3))
                         ;
-                    else if ((info_ops[i] & OPS_RMASK) == OPS_Relaxed) {
+                    else if (info_ops[i].relaxed) {
                         /* Relaxed checking */
                         if (size != 0 && op->size != size && op->size != 0)
                             mismatch = 1;
@@ -2850,7 +863,7 @@
 
             /* Check for 64-bit effective address size in NASM mode */
             if (suffix == 0 && op->type == YASM_INSN__OPERAND_MEMORY) {
-                if ((info_ops[i] & OPEAS_MASK) == OPEAS_64) {
+                if (info_ops[i].eas64) {
                     if (op->data.ea->disp.size != 64)
                         mismatch = 1;
                 } else if (op->data.ea->disp.size == 64)
@@ -2861,7 +874,7 @@
                 break;
 
             /* Check target modifier */
-            switch ((int)(info_ops[i] & OPTM_MASK)) {
+            switch (info_ops[i].targetmod) {
                 case OPTM_None:
                     if (op->targetmod != 0)
                         mismatch = 1;
@@ -2920,7 +933,7 @@
         return;
     }
 
-    for (bypass=1; bypass<8; bypass++) {
+    for (bypass=1; bypass<9; bypass++) {
         i = x86_find_match(id_insn, ops, rev_ops, size_lookup, bypass);
         if (i)
             break;
@@ -2944,9 +957,16 @@
             break;
         case 7:
             yasm_error_set(YASM_ERROR_TYPE,
-                          N_("requires CPU%s"),
-                          cpu_find_reverse(i->cpu & ~(CPU_64 | CPU_Not64)));
+                N_("one of source operand 1 or 3 must match dest operand"));
             break;
+        case 8:
+        {
+            unsigned int cpu0 = i->cpu0, cpu1 = i->cpu1, cpu2 = i->cpu2;
+            yasm_error_set(YASM_ERROR_TYPE,
+                          N_("requires CPU%s"),
+                          cpu_find_reverse(cpu0, cpu1, cpu2));
+            break;
+        }
         default:
             yasm_error_set(YASM_ERROR_TYPE,
                            N_("invalid combination of opcode and operands"));
@@ -2959,13 +979,15 @@
     x86_id_insn *id_insn = (x86_id_insn *)bc->contents;
     x86_insn *insn;
     const x86_insn_info *info = id_insn->group;
-    unsigned long mod_data = id_insn->mod_data;
     unsigned int mode_bits = id_insn->mode_bits;
+    unsigned char *mod_data = id_insn->mod_data;
     yasm_insn_operand *op, *ops[4], *rev_ops[4];
     /*@null@*/ yasm_expr *imm;
     unsigned char im_len;
     unsigned char im_sign;
     unsigned char spare;
+    unsigned char drex;
+    unsigned char *pdrex;
     unsigned int i;
     unsigned int size_lookup[] = {0, 8, 16, 32, 64, 80, 128, 0};
     unsigned long do_postop = 0;
@@ -2975,11 +997,9 @@
     yasm_insn_finalize(&id_insn->insn);
 
     /* Build local array of operands from list, since we know we have a max
-     * of 3 operands.
+     * of 4 operands.
      */
-    if (id_insn->insn.num_operands == 4 && info == insertq_insn)
-        ;
-    else if (id_insn->insn.num_operands > 3) {
+    if (id_insn->insn.num_operands > 4) {
         yasm_error_set(YASM_ERROR_TYPE, N_("too many operands"));
         return;
     }
@@ -3005,7 +1025,7 @@
      * operands and adjust for dereferences / lack thereof.
      */
     if (id_insn->parser == X86_PARSER_GAS
-        && (info->operands[0] & OPA_MASK) == OPA_JmpRel) {
+        && insn_operands[info->operands_index+0].action == OPA_JmpRel) {
         for (i = 0, op = ops[0]; op; op = ops[++i]) {
             if (!op->deref && (op->type == YASM_INSN__OPERAND_REG
                                || (op->type == YASM_INSN__OPERAND_MEMORY
@@ -3038,7 +1058,7 @@
     }
 
     if (id_insn->insn.num_operands > 0) {
-        switch (info->operands[0] & OPA_MASK) {
+        switch (insn_operands[info->operands_index+0].action) {
             case OPA_JmpRel:
                 /* Shortcut to JmpRel */
                 x86_finalize_jmp(bc, prev_bc, info);
@@ -3059,55 +1079,50 @@
     insn->def_opersize_64 = info->def_opersize_64;
     insn->special_prefix = info->special_prefix;
     spare = info->spare;
+    drex = info->drex_oc0 & DREX_OC0_MASK;
     im_len = 0;
     im_sign = 0;
     insn->postop = X86_POSTOP_NONE;
     insn->rex = 0;
+    pdrex = (info->drex_oc0 & NEED_DREX_MASK) ? &drex : NULL;
 
     /* Apply modifiers */
-    if (info->modifiers & MOD_Gap0)
-        mod_data >>= 8;
-    if (info->modifiers & MOD_Op2Add) {
-        insn->opcode.opcode[2] += (unsigned char)(mod_data & 0xFF);
-        mod_data >>= 8;
-    }
-    if (info->modifiers & MOD_Gap1)
-        mod_data >>= 8;
-    if (info->modifiers & MOD_Op1Add) {
-        insn->opcode.opcode[1] += (unsigned char)(mod_data & 0xFF);
-        mod_data >>= 8;
-    }
-    if (info->modifiers & MOD_Gap2)
-        mod_data >>= 8;
-    if (info->modifiers & MOD_Op0Add) {
-        insn->opcode.opcode[0] += (unsigned char)(mod_data & 0xFF);
-        mod_data >>= 8;
-    }
-    if (info->modifiers & MOD_PreAdd) {
-        insn->special_prefix += (unsigned char)(mod_data & 0xFF);
-        mod_data >>= 8;
-    }
-    if (info->modifiers & MOD_SpAdd) {
-        spare += (unsigned char)(mod_data & 0xFF);
-        mod_data >>= 8;
-    }
-    if (info->modifiers & MOD_OpSizeR) {
-        insn->common.opersize = (unsigned char)(mod_data & 0xFF);
-        mod_data >>= 8;
-    }
-    if (info->modifiers & MOD_Imm8) {
-        imm = yasm_expr_create_ident(yasm_expr_int(
-            yasm_intnum_create_uint(mod_data & 0xFF)), bc->line);
-        im_len = 8;
-        mod_data >>= 8;
-    }
-    if (info->modifiers & MOD_DOpS64R) {
-        insn->def_opersize_64 = (unsigned char)(mod_data & 0xFF);
-        mod_data >>= 8;
-    }
-    if (info->modifiers & MOD_Op1AddSp) {
-        insn->opcode.opcode[1] += (unsigned char)(mod_data & 0xFF)<<3;
-        /*mod_data >>= 8;*/
+    for (i=0; i<NELEMS(info->modifiers); i++) {
+        switch (info->modifiers[i]) {
+            case MOD_Gap:
+                break;
+            case MOD_PreAdd:
+                insn->special_prefix += mod_data[i];
+                break;
+            case MOD_Op0Add:
+                insn->opcode.opcode[0] += mod_data[i];
+                break;
+            case MOD_Op1Add:
+                insn->opcode.opcode[1] += mod_data[i];
+                break;
+            case MOD_Op2Add:
+                insn->opcode.opcode[2] += mod_data[i];
+                break;
+            case MOD_SpAdd:
+                spare += mod_data[i];
+                break;
+            case MOD_OpSizeR:
+                insn->common.opersize = mod_data[i];
+                break;
+            case MOD_Imm8:
+                imm = yasm_expr_create_ident(yasm_expr_int(
+                    yasm_intnum_create_uint(mod_data[i])), bc->line);
+                im_len = 8;
+                break;
+            case MOD_DOpS64R:
+                insn->def_opersize_64 = mod_data[i];
+                break;
+            case MOD_Op1AddSp:
+                insn->opcode.opcode[1] += mod_data[i]<<3;
+                break;
+            default:
+                break;
+        }
     }
 
     /* In 64-bit mode, if opersize is 64 and default is not 64,
@@ -3120,20 +1135,17 @@
     /* Go through operands and assign */
     if (id_insn->insn.num_operands > 0) {
         yasm_insn_operand **use_ops = ops;
-        const unsigned long *info_ops = info->operands;
+        const x86_info_operand *info_ops =
+            &insn_operands[info->operands_index];
 
         /* Use reversed operands in GAS mode if not otherwise specified */
         if (id_insn->parser == X86_PARSER_GAS
-            && !(info->modifiers & MOD_GasNoRev))
+            && !(info->gas_flags & GAS_NO_REV))
             use_ops = rev_ops;
 
-        /* 4-operand special case for insertq */
-        if (info->num_operands > 3)
-            info_ops = insertq_4operands;
-
         for (i = 0, op = use_ops[0]; op && i<info->num_operands;
              op = use_ops[++i]) {
-            switch ((int)(info_ops[i] & OPA_MASK)) {
+            switch (info_ops[i].action) {
                 case OPA_None:
                     /* Throw away the operand contents */
                     switch (op->type) {
@@ -3154,39 +1166,43 @@
                             insn->x86_ea =
                                 yasm_x86__ea_create_reg(insn->x86_ea,
                                     (unsigned long)op->data.reg, &insn->rex,
-                                    mode_bits);
+                                    pdrex, mode_bits);
                             break;
                         case YASM_INSN__OPERAND_SEGREG:
                             yasm_internal_error(
                                 N_("invalid operand conversion"));
                         case YASM_INSN__OPERAND_MEMORY:
                             insn->x86_ea = (x86_effaddr *)op->data.ea;
-                            if ((info_ops[i] & OPT_MASK) == OPT_MemOffs)
+                            if (info_ops[i].type == OPT_MemOffs)
                                 /* Special-case for MOV MemOffs instruction */
                                 yasm_x86__ea_set_disponly(insn->x86_ea);
+                            else if (id_insn->default_rel &&
+                                     !op->data.ea->not_pc_rel &&
+                                     op->data.ea->segreg == 0 &&
+                                     !yasm_expr__contains(
+                                        op->data.ea->disp.abs, YASM_EXPR_REG))
+                                /* Enable default PC-rel if no regs/segregs */
+                                insn->x86_ea->ea.pc_rel = 1;
                             break;
                         case YASM_INSN__OPERAND_IMM:
                             insn->x86_ea =
                                 yasm_x86__ea_create_imm(insn->x86_ea,
                                     op->data.val,
-                                    size_lookup[(info_ops[i] &
-                                                OPS_MASK)>>OPS_SHIFT]);
+                                    size_lookup[info_ops[i].size]);
                             break;
                     }
                     break;
                 case OPA_Imm:
                     if (op->type == YASM_INSN__OPERAND_IMM) {
                         imm = op->data.val;
-                        im_len = size_lookup[(info_ops[i] &
-                                              OPS_MASK)>>OPS_SHIFT];
+                        im_len = size_lookup[info_ops[i].size];
                     } else
                         yasm_internal_error(N_("invalid operand conversion"));
                     break;
                 case OPA_SImm:
                     if (op->type == YASM_INSN__OPERAND_IMM) {
                         imm = op->data.val;
-                        im_len = size_lookup[(info_ops[i] &
-                                              OPS_MASK)>>OPS_SHIFT];
+                        im_len = size_lookup[info_ops[i].size];
                         im_sign = 1;
                     } else
                         yasm_internal_error(N_("invalid operand conversion"));
@@ -3195,8 +1211,8 @@
                     if (op->type == YASM_INSN__OPERAND_SEGREG)
                         spare = (unsigned char)(op->data.reg&7);
                     else if (op->type == YASM_INSN__OPERAND_REG) {
-                        if (yasm_x86__set_rex_from_reg(&insn->rex, &spare,
-                                op->data.reg, mode_bits, X86_REX_R))
+                        if (yasm_x86__set_rex_from_reg(&insn->rex, pdrex,
+                                &spare, op->data.reg, mode_bits, X86_REX_R))
                             return;
                     } else
                         yasm_internal_error(N_("invalid operand conversion"));
@@ -3204,8 +1220,8 @@
                 case OPA_Op0Add:
                     if (op->type == YASM_INSN__OPERAND_REG) {
                         unsigned char opadd;
-                        if (yasm_x86__set_rex_from_reg(&insn->rex, &opadd,
-                                op->data.reg, mode_bits, X86_REX_B))
+                        if (yasm_x86__set_rex_from_reg(&insn->rex, pdrex,
+                                &opadd, op->data.reg, mode_bits, X86_REX_B))
                             return;
                         insn->opcode.opcode[0] += opadd;
                     } else
@@ -3214,8 +1230,8 @@
                 case OPA_Op1Add:
                     if (op->type == YASM_INSN__OPERAND_REG) {
                         unsigned char opadd;
-                        if (yasm_x86__set_rex_from_reg(&insn->rex, &opadd,
-                                op->data.reg, mode_bits, X86_REX_B))
+                        if (yasm_x86__set_rex_from_reg(&insn->rex, pdrex,
+                                &opadd, op->data.reg, mode_bits, X86_REX_B))
                             return;
                         insn->opcode.opcode[1] += opadd;
                     } else
@@ -3225,11 +1241,11 @@
                     if (op->type == YASM_INSN__OPERAND_REG) {
                         insn->x86_ea =
                             yasm_x86__ea_create_reg(insn->x86_ea,
-                                                    (unsigned long)op->data.reg,
-                                                    &insn->rex, mode_bits);
+                                (unsigned long)op->data.reg, &insn->rex,
+                                pdrex, mode_bits);
                         if (!insn->x86_ea ||
-                            yasm_x86__set_rex_from_reg(&insn->rex, &spare,
-                                op->data.reg, mode_bits, X86_REX_R)) {
+                            yasm_x86__set_rex_from_reg(&insn->rex, pdrex,
+                                &spare, op->data.reg, mode_bits, X86_REX_R)) {
                             if (insn->x86_ea)
                                 yasm_xfree(insn->x86_ea);
                             yasm_xfree(insn);
@@ -3262,14 +1278,18 @@
                     yasm_x86__ea_destroy(op->data.ea);
                     break;
                 }
+                case OPA_DREX:
+                    drex &= 0x0F;
+                    drex |= (op->data.reg << 4) & 0xF0;
+                    break;
                 default:
                     yasm_internal_error(N_("unknown operand action"));
             }
 
-            if ((info_ops[i] & OPS_MASK) == OPS_BITS)
+            if (info_ops[i].size == OPS_BITS)
                 insn->common.opersize = (unsigned char)mode_bits;
 
-            switch ((int)(info_ops[i] & OPAP_MASK)) {
+            switch (info_ops[i].post_action) {
                 case OPAP_None:
                     break;
                 case OPAP_SImm8:
@@ -3303,7 +1323,9 @@
     }
 
     if (insn->x86_ea) {
-        yasm_x86__ea_init(insn->x86_ea, spare, prev_bc);
+        yasm_x86__ea_init(insn->x86_ea, spare, drex,
+                          (unsigned int)(info->drex_oc0 & NEED_DREX_MASK),
+                          prev_bc);
         for (i=0; i<id_insn->insn.num_segregs; i++)
             yasm_ea_set_segreg(&insn->x86_ea->ea, id_insn->insn.segregs[i]);
     } else if (id_insn->insn.num_segregs > 0 && insn->special_prefix == 0) {
@@ -3342,8 +1364,12 @@
              * short mov instructions if a 32-bit address override is applied in
              * 64-bit mode to an EA of just an offset (no registers) and the
              * target register is al/ax/eax/rax.
+             *
+             * We don't want to do this if we're in default rel mode.
              */
-            if (insn->common.mode_bits == 64 && insn->common.addrsize == 32 &&
+            if (!id_insn->default_rel &&
+                insn->common.mode_bits == 64 &&
+                insn->common.addrsize == 32 &&
                 (!insn->x86_ea->ea.disp.abs ||
                  !yasm_expr__contains(insn->x86_ea->ea.disp.abs,
                                       YASM_EXPR_REG))) {
@@ -3370,7 +1396,8 @@
                  * opcode 0 being a mov instruction!
                  */
                 insn->x86_ea = yasm_x86__ea_create_reg(insn->x86_ea,
-                    (unsigned long)insn->opcode.opcode[0]-0xB8, &rex_temp, 64);
+                    (unsigned long)insn->opcode.opcode[0]-0xB8, &rex_temp,
+                    NULL, 64);
 
                 /* Make the imm32s form permanent. */
                 insn->opcode.opcode[0] = insn->opcode.opcode[1];
@@ -3395,135 +1422,112 @@
     /* instruction parse group - NULL if prefix */
     /*@null@*/ const x86_insn_info *group;
 
-    /* For instruction, modifier in upper 24 bits, number of elements in group
-     * in lower 8 bits.
-     * For prefix, prefix type.
+    /* For instruction, number of elements in group in lower 8 bits.
+     * For prefix, prefix type shifted right by 8.
      */
-    unsigned long data1;
+    unsigned int num_info:8;
 
-    /* For instruction, cpu flags.
+    /* For instruction, GAS suffix flags.
      * For prefix, prefix value.
      */
-    unsigned long data2;
+    unsigned int flags:8;
 
-    /* suffix flags for instructions */
-    enum {
-        NONE = 0,
-        SUF_B = (MOD_GasSufB >> MOD_GasSuf_SHIFT),
-        SUF_W = (MOD_GasSufW >> MOD_GasSuf_SHIFT),
-        SUF_L = (MOD_GasSufL >> MOD_GasSuf_SHIFT),
-        SUF_Q = (MOD_GasSufQ >> MOD_GasSuf_SHIFT),
-        SUF_S = (MOD_GasSufS >> MOD_GasSuf_SHIFT),
-        WEAK = 0x80     /* Relaxed operand mode for GAS */
-    } flags;
+    /* Instruction modifier data. */
+    unsigned int mod_data0:8;
+    unsigned int mod_data1:8;
+    unsigned int mod_data2:8;
+
+    /* CPU flags */
+    unsigned int cpu0:8;
+    unsigned int cpu1:8;
+    unsigned int cpu2:8;
 } insnprefix_parse_data;
-#define INSN(name, flags, group, mod, cpu) \
-    { name, group##_insn, (mod##UL<<8)|NELEMS(group##_insn), cpu, flags }
-#define PREFIX(name, type, value) \
-    { name, NULL, type, value, NONE }
-
-/* Static parse data structure for CPU feature flags */
-typedef struct cpu_parse_data {
-    const char *name;
-
-    unsigned long cpu;
-    enum {
-        CPU_MODE_VERBATIM,
-        CPU_MODE_SET,
-        CPU_MODE_CLEAR
-    } mode;
-} cpu_parse_data;
-
-typedef struct regtmod_parse_data {
-    const char *name;
-
-    unsigned long regtmod;
-} regtmod_parse_data;
-#define REG(name, type, index, bits) \
-    { name, (((unsigned long)YASM_ARCH_REG) << 24) | \
-            (((unsigned long)bits) << 16) | (type | index) }
-#define REGGROUP(name, group) \
-    { name, (((unsigned long)YASM_ARCH_REGGROUP) << 24) | (group) }
-#define SEGREG(name, prefix, num, bits) \
-    { name, (((unsigned long)YASM_ARCH_SEGREG) << 24) | \
-            (((unsigned long)bits) << 16) | (prefix << 8) | (num) }
-#define TARGETMOD(name, mod) \
-    { name, (((unsigned long)YASM_ARCH_TARGETMOD) << 24) | (mod) }
 
 /* Pull in all parse data */
-#include "x86parse.c"
+#include "x86insn_nasm.c"
+#include "x86insn_gas.c"
 
 static const char *
-cpu_find_reverse(unsigned long cpu)
+cpu_find_reverse(unsigned int cpu0, unsigned int cpu1, unsigned int cpu2)
 {
     static char cpuname[200];
+    wordptr cpu = BitVector_Create(128, TRUE);
+
+    if (cpu0 != CPU_Any)
+        BitVector_Bit_On(cpu, cpu0);
+    if (cpu1 != CPU_Any)
+        BitVector_Bit_On(cpu, cpu1);
+    if (cpu2 != CPU_Any)
+        BitVector_Bit_On(cpu, cpu2);
 
     cpuname[0] = '\0';
 
-    if (cpu & CPU_Prot)
+    if (BitVector_bit_test(cpu, CPU_Prot))
         strcat(cpuname, " Protected");
-    if (cpu & CPU_Undoc)
+    if (BitVector_bit_test(cpu, CPU_Undoc))
         strcat(cpuname, " Undocumented");
-    if (cpu & CPU_Obs)
+    if (BitVector_bit_test(cpu, CPU_Obs))
         strcat(cpuname, " Obsolete");
-    if (cpu & CPU_Priv)
+    if (BitVector_bit_test(cpu, CPU_Priv))
         strcat(cpuname, " Privileged");
 
-    if (cpu & CPU_FPU)
+    if (BitVector_bit_test(cpu, CPU_FPU))
         strcat(cpuname, " FPU");
-    if (cpu & CPU_MMX)
+    if (BitVector_bit_test(cpu, CPU_MMX))
         strcat(cpuname, " MMX");
-    if (cpu & CPU_SSE)
+    if (BitVector_bit_test(cpu, CPU_SSE))
         strcat(cpuname, " SSE");
-    if (cpu & CPU_SSE2)
+    if (BitVector_bit_test(cpu, CPU_SSE2))
         strcat(cpuname, " SSE2");
-    if (cpu & CPU_SSE3)
+    if (BitVector_bit_test(cpu, CPU_SSE3))
         strcat(cpuname, " SSE3");
-    if (cpu & CPU_3DNow)
+    if (BitVector_bit_test(cpu, CPU_3DNow))
         strcat(cpuname, " 3DNow");
-    if (cpu & CPU_Cyrix)
+    if (BitVector_bit_test(cpu, CPU_Cyrix))
         strcat(cpuname, " Cyrix");
-    if (cpu & CPU_AMD)
+    if (BitVector_bit_test(cpu, CPU_AMD))
         strcat(cpuname, " AMD");
-    if (cpu & CPU_SMM)
+    if (BitVector_bit_test(cpu, CPU_SMM))
         strcat(cpuname, " SMM");
-    if (cpu & CPU_SVM)
+    if (BitVector_bit_test(cpu, CPU_SVM))
         strcat(cpuname, " SVM");
-    if (cpu & CPU_PadLock)
+    if (BitVector_bit_test(cpu, CPU_PadLock))
         strcat(cpuname, " PadLock");
-    if (cpu & CPU_EM64T)
+    if (BitVector_bit_test(cpu, CPU_EM64T))
         strcat(cpuname, " EM64T");
-    if (cpu & CPU_SSSE3)
+    if (BitVector_bit_test(cpu, CPU_SSSE3))
         strcat(cpuname, " SSSE3");
-    if (cpu & CPU_SSE41)
+    if (BitVector_bit_test(cpu, CPU_SSE41))
         strcat(cpuname, " SSE4.1");
-    if (cpu & CPU_SSE42)
+    if (BitVector_bit_test(cpu, CPU_SSE42))
         strcat(cpuname, " SSE4.2");
 
-    if (cpu & CPU_186)
+    if (BitVector_bit_test(cpu, CPU_186))
         strcat(cpuname, " 186");
-    if (cpu & CPU_286)
+    if (BitVector_bit_test(cpu, CPU_286))
         strcat(cpuname, " 286");
-    if (cpu & CPU_386)
+    if (BitVector_bit_test(cpu, CPU_386))
         strcat(cpuname, " 386");
-    if (cpu & CPU_486)
+    if (BitVector_bit_test(cpu, CPU_486))
         strcat(cpuname, " 486");
-    if (cpu & CPU_586)
+    if (BitVector_bit_test(cpu, CPU_586))
         strcat(cpuname, " 586");
-    if (cpu & CPU_686)
+    if (BitVector_bit_test(cpu, CPU_686))
         strcat(cpuname, " 686");
-    if (cpu & CPU_P3)
+    if (BitVector_bit_test(cpu, CPU_P3))
         strcat(cpuname, " P3");
-    if (cpu & CPU_P4)
+    if (BitVector_bit_test(cpu, CPU_P4))
         strcat(cpuname, " P4");
-    if (cpu & CPU_IA64)
+    if (BitVector_bit_test(cpu, CPU_IA64))
         strcat(cpuname, " IA64");
-    if (cpu & CPU_K6)
+    if (BitVector_bit_test(cpu, CPU_K6))
         strcat(cpuname, " K6");
-    if (cpu & CPU_Athlon)
+    if (BitVector_bit_test(cpu, CPU_Athlon))
         strcat(cpuname, " Athlon");
-    if (cpu & CPU_Hammer)
+    if (BitVector_bit_test(cpu, CPU_Hammer))
         strcat(cpuname, " Hammer");
+
+    BitVector_Destroy(cpu);
     return cpuname;
 }
 
@@ -3534,6 +1538,7 @@
 {
     yasm_arch_x86 *arch_x86 = (yasm_arch_x86 *)arch;
     /*@null@*/ const insnprefix_parse_data *pdata;
+    unsigned int cpu0, cpu1, cpu2;
     size_t i;
     static char lcaseid[16];
 
@@ -3559,55 +1564,74 @@
     if (!pdata)
         return YASM_ARCH_NOTINSNPREFIX;
 
-    if (pdata->group) {
-        unsigned long cpu = pdata->data2;
-        x86_id_insn *id_insn;
+    cpu0 = pdata->cpu0;
+    cpu1 = pdata->cpu1;
+    cpu2 = pdata->cpu2;
 
-        if ((cpu & CPU_64) && arch_x86->mode_bits != 64) {
+    if (pdata->group) {
+        x86_id_insn *id_insn;
+        wordptr cpu_enabled = arch_x86->cpu_enables[arch_x86->active_cpu];
+
+        if (arch_x86->mode_bits != 64 &&
+            (cpu0 == CPU_64 || cpu1 == CPU_64 || cpu2 == CPU_64)) {
             yasm_warn_set(YASM_WARN_GENERAL,
                           N_("`%s' is an instruction in 64-bit mode"), id);
             return YASM_ARCH_NOTINSNPREFIX;
         }
-        if ((cpu & CPU_Not64) && arch_x86->mode_bits == 64) {
+        if (arch_x86->mode_bits == 64 &&
+            (cpu0 == CPU_Not64 || cpu1 == CPU_Not64 || cpu2 == CPU_Not64)) {
             yasm_error_set(YASM_ERROR_GENERAL,
                            N_("`%s' invalid in 64-bit mode"), id);
             id_insn = yasm_xmalloc(sizeof(x86_id_insn));
             yasm_insn_initialize(&id_insn->insn);
             id_insn->group = not64_insn;
-            id_insn->cpu_enabled = CPU_Not64;
-            id_insn->mod_data = 0;
+            id_insn->cpu_enabled = cpu_enabled;
+            id_insn->mod_data[0] = 0;
+            id_insn->mod_data[1] = 0;
+            id_insn->mod_data[2] = 0;
             id_insn->num_info = NELEMS(not64_insn);
             id_insn->mode_bits = arch_x86->mode_bits;
             id_insn->suffix = 0;
             id_insn->parser = arch_x86->parser;
             id_insn->force_strict = arch_x86->force_strict != 0;
+            id_insn->default_rel = arch_x86->default_rel != 0;
             *bc = yasm_bc_create_common(&x86_id_insn_callback, id_insn, line);
             return YASM_ARCH_INSN;
         }
 
-        cpu &= ~(CPU_64 | CPU_Not64);
-        if ((arch_x86->cpu_enabled & cpu) != cpu) {
+        if (cpu0 == CPU_64 || cpu0 == CPU_Not64)
+            cpu0 = CPU_Any;
+        if (cpu1 == CPU_64 || cpu1 == CPU_Not64)
+            cpu1 = CPU_Any;
+        if (cpu2 == CPU_64 || cpu2 == CPU_Not64)
+            cpu2 = CPU_Any;
+        if (!BitVector_bit_test(cpu_enabled, cpu0) ||
+            !BitVector_bit_test(cpu_enabled, cpu1) ||
+            !BitVector_bit_test(cpu_enabled, cpu2)) {
             yasm_warn_set(YASM_WARN_GENERAL,
                           N_("`%s' is an instruction in CPU%s"), id,
-                          cpu_find_reverse(cpu));
+                          cpu_find_reverse(cpu0, cpu1, cpu2));
             return YASM_ARCH_NOTINSNPREFIX;
         }
 
         id_insn = yasm_xmalloc(sizeof(x86_id_insn));
         yasm_insn_initialize(&id_insn->insn);
         id_insn->group = pdata->group;
-        id_insn->cpu_enabled = arch_x86->cpu_enabled;
-        id_insn->mod_data = pdata->data1 >> 8;
-        id_insn->num_info = pdata->data1 & 0xff;
+        id_insn->cpu_enabled = cpu_enabled;
+        id_insn->mod_data[0] = pdata->mod_data0;
+        id_insn->mod_data[1] = pdata->mod_data1;
+        id_insn->mod_data[2] = pdata->mod_data2;
+        id_insn->num_info = pdata->num_info;
         id_insn->mode_bits = arch_x86->mode_bits;
         id_insn->suffix = pdata->flags;
         id_insn->parser = arch_x86->parser;
         id_insn->force_strict = arch_x86->force_strict != 0;
+        id_insn->default_rel = arch_x86->default_rel != 0;
         *bc = yasm_bc_create_common(&x86_id_insn_callback, id_insn, line);
         return YASM_ARCH_INSN;
     } else {
-        unsigned long type = pdata->data1;
-        unsigned long value = pdata->data2;
+        unsigned long type = pdata->num_info<<8;
+        unsigned long value = pdata->flags;
 
         if (arch_x86->mode_bits == 64 && type == X86_OPERSIZE && value == 32) {
             yasm_error_set(YASM_ERROR_GENERAL,
@@ -3621,9 +1645,8 @@
             return YASM_ARCH_NOTINSNPREFIX;
         }
 
-        if ((type == X86_REX ||
-             (value == 64 && (type == X86_OPERSIZE || type == X86_ADDRSIZE)))
-            && arch_x86->mode_bits != 64) {
+        if (arch_x86->mode_bits != 64 &&
+            (cpu0 == CPU_64 || cpu1 == CPU_64 || cpu2 == CPU_64)) {
             yasm_warn_set(YASM_WARN_GENERAL,
                           N_("`%s' is a prefix in 64-bit mode"), id);
             return YASM_ARCH_NOTINSNPREFIX;
@@ -3633,80 +1656,6 @@
     }
 }
 
-void
-yasm_x86__parse_cpu(yasm_arch_x86 *arch_x86, const char *cpuid,
-                    size_t cpuid_len)
-{
-    /*@null@*/ const cpu_parse_data *pdata;
-    size_t i;
-    static char lcaseid[16];
-
-    if (cpuid_len > 15)
-        return;
-    for (i=0; i<cpuid_len; i++)
-        lcaseid[i] = tolower(cpuid[i]);
-    lcaseid[cpuid_len] = '\0';
-
-    pdata = cpu_find(lcaseid, cpuid_len);
-    if (!pdata) {
-        yasm_warn_set(YASM_WARN_GENERAL,
-                      N_("unrecognized CPU identifier `%s'"), cpuid);
-        return;
-    }
-
-    switch (pdata->mode) {
-        case CPU_MODE_VERBATIM:
-            arch_x86->cpu_enabled = pdata->cpu;
-            break;
-        case CPU_MODE_SET:
-            arch_x86->cpu_enabled |= pdata->cpu;
-            break;
-        case CPU_MODE_CLEAR:
-            arch_x86->cpu_enabled &= ~pdata->cpu;
-            break;
-    }
-}
-
-yasm_arch_regtmod
-yasm_x86__parse_check_regtmod(yasm_arch *arch, const char *id, size_t id_len,
-                              uintptr_t *data)
-{
-    yasm_arch_x86 *arch_x86 = (yasm_arch_x86 *)arch;
-    /*@null@*/ const regtmod_parse_data *pdata;
-    size_t i;
-    static char lcaseid[8];
-    unsigned int bits;
-    yasm_arch_regtmod type;
-
-    if (id_len > 7)
-        return YASM_ARCH_NOTREGTMOD;
-    for (i=0; i<id_len; i++)
-        lcaseid[i] = tolower(id[i]);
-    lcaseid[id_len] = '\0';
-
-    pdata = regtmod_find(lcaseid, id_len);
-    if (!pdata)
-        return YASM_ARCH_NOTREGTMOD;
-
-    type = (yasm_arch_regtmod)(pdata->regtmod >> 24);
-    bits = (pdata->regtmod >> 16) & 0xFF;
-
-    if (type == YASM_ARCH_REG && bits != 0 && arch_x86->mode_bits != bits) {
-        yasm_warn_set(YASM_WARN_GENERAL,
-                      N_("`%s' is a register in %u-bit mode"), id, bits);
-        return YASM_ARCH_NOTREGTMOD;
-    }
-
-    if (type == YASM_ARCH_SEGREG && bits != 0 && arch_x86->mode_bits == bits) {
-        yasm_warn_set(YASM_WARN_GENERAL,
-                      N_("`%s' segment register ignored in %u-bit mode"), id,
-                      bits);
-    }
-
-    *data = pdata->regtmod & 0x0000FFFFUL;
-    return type;
-}
-
 static void
 x86_id_insn_destroy(void *contents)
 {
@@ -3731,13 +1680,16 @@
 
     yasm_insn_initialize(&id_insn->insn);
     id_insn->group = empty_insn;
-    id_insn->cpu_enabled = arch_x86->cpu_enabled;
-    id_insn->mod_data = 0;
+    id_insn->cpu_enabled = arch_x86->cpu_enables[arch_x86->active_cpu];
+    id_insn->mod_data[0] = 0;
+    id_insn->mod_data[1] = 0;
+    id_insn->mod_data[2] = 0;
     id_insn->num_info = NELEMS(empty_insn);
     id_insn->mode_bits = arch_x86->mode_bits;
     id_insn->suffix = 0;
     id_insn->parser = arch_x86->parser;
     id_insn->force_strict = arch_x86->force_strict != 0;
+    id_insn->default_rel = arch_x86->default_rel != 0;
 
     return yasm_bc_create_common(&x86_id_insn_callback, id_insn, line);
 }
diff --git a/modules/arch/x86/x86parse.gap b/modules/arch/x86/x86parse.gap
deleted file mode 100644
index b06a930..0000000
--- a/modules/arch/x86/x86parse.gap
+++ /dev/null
@@ -1,1274 +0,0 @@
-# GAP (gen_arch_parse) input file for x86 architecture
-# $Id$
-#
-#  Copyright (C) 2001-2007  Peter Johnson
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-# Configure GAP for x86 generation mode
-ARCH	x86
-
-# Supported x86 parsers
-PARSERS	nasm gas
-
-# INSN parameters:
-# - parser (- if any)
-# - base name of instruction
-# - if string, each character is an allowed GAS suffix
-#   if defined name, value is GAS suffix mode set (no character suffix reqd)
-# - instruction group (sans _insn suffix)
-# - modifiers (up to 3 bytes)
-# - CPU flags
-#
-# The string mode of the second parameter is a shortcut for GAS forms, e.g.:
-# INSN	-	mov	"bwl"	mov	0	CPU_Any
-# is equivalent to:
-# INSN	-	mov	NONE	mov	0	CPU_Any
-# INSN	gas	movb	SUF_B	mov	0	CPU_Any
-# INSN	gas	movw	SUF_W	mov	0	CPU_Any
-# INSN	gas	movl	SUF_L	mov	0	CPU_Any
-
-# Move
-INSN	-	mov	"bwl"	mov	0		CPU_Any
-INSN	gas	movabs	"bwlq"	movabs	0		CPU_Hammer|CPU_64
-
-# Move with sign/zero extend
-INSN	gas	movsbw	SUF_B	movszx	0xBE		CPU_386
-INSN	gas	movsbl	SUF_B	movszx	0xBE		CPU_386
-INSN	gas	movswl	SUF_W	movszx	0xBE		CPU_386
-INSN	gas	movsbq	SUF_B	movszx	0xBE		CPU_Hammer|CPU_64
-INSN	gas	movswq	SUF_W	movszx	0xBE		CPU_Hammer|CPU_64
-INSN	-	movsx	"bw"	movszx	0xBE		CPU_386
-INSN	gas	movslq	SUF_L	movsxd	0		CPU_Hammer|CPU_64
-INSN	nasm	movsxd	NONE	movsxd	0		CPU_Hammer|CPU_64
-INSN	gas	movzbw	SUF_B	movszx	0xB6		CPU_386
-INSN	gas	movzbl	SUF_B	movszx	0xB6		CPU_386
-INSN	gas	movzwl	SUF_W	movszx	0xB6		CPU_386
-INSN	gas	movzbq	SUF_B	movszx	0xB6		CPU_Hammer|CPU_64
-INSN	gas	movzwq	SUF_W	movszx	0xB6		CPU_Hammer|CPU_64
-INSN	-	movzx	NONE	movszx	0xB6		CPU_386
-
-# Push instructions
-INSN	-	push	"wlq"	push	0		CPU_Any
-INSN	-	pusha	NONE	onebyte	0x0060		CPU_186|CPU_Not64
-INSN	nasm	pushad	NONE	onebyte	0x2060		CPU_386|CPU_Not64
-INSN	gas	pushal	NONE	onebyte	0x2060		CPU_386|CPU_Not64
-INSN	-	pushaw	NONE	onebyte	0x1060		CPU_186|CPU_Not64
-
-# Pop instructions
-INSN	-	pop	"wlq"	pop	0		CPU_Any
-INSN	-	popa	NONE	onebyte	0x0061		CPU_186|CPU_Not64
-INSN	nasm	popad	NONE	onebyte	0x2061		CPU_386|CPU_Not64
-INSN	gas	popal	NONE	onebyte	0x2061		CPU_386|CPU_Not64
-INSN	-	popaw	NONE	onebyte	0x1061		CPU_186|CPU_Not64
-
-# Exchange
-INSN	-	xchg	"bwlq"	xchg	0		CPU_Any
-
-# In/out from ports
-INSN	-	in	"bwl"	in	0		CPU_Any
-INSN	-	out	"bwl"	out	0		CPU_Any
-# Load effective address
-INSN	-	lea	"wlq"	lea	0		CPU_Any
-# Load segment registers from memory
-INSN	-	lds	"wl"	ldes	0xC5		CPU_Not64
-INSN	-	les	"wl"	ldes	0xC4		CPU_Not64
-INSN	-	lfs	"wl"	lfgss	0xB4		CPU_386
-INSN	-	lgs	"wl"	lfgss	0xB5		CPU_386
-INSN	-	lss	"wl"	lfgss	0xB2		CPU_386
-# Flags register instructions
-INSN	-	clc	NONE	onebyte	0x00F8		CPU_Any
-INSN	-	cld	NONE	onebyte	0x00FC		CPU_Any
-INSN	-	cli	NONE	onebyte	0x00FA		CPU_Any
-INSN	-	clts	NONE	twobyte	0x0F06		CPU_286|CPU_Priv
-INSN	-	cmc	NONE	onebyte	0x00F5		CPU_Any
-INSN	-	lahf	NONE	onebyte	0x009F		CPU_Any
-INSN	-	sahf	NONE	onebyte	0x009E		CPU_Any
-INSN	-	pushf	NONE	onebyte	0x40009C	CPU_Any
-INSN	nasm	pushfd	NONE	onebyte	0x00209C	CPU_386|CPU_Not64
-INSN	gas	pushfl	NONE	onebyte	0x00209C	CPU_386|CPU_Not64
-INSN	-	pushfw	NONE	onebyte	0x40109C	CPU_Any
-INSN	-	pushfq	NONE	onebyte	0x40409C	CPU_Hammer|CPU_64
-INSN	-	popf	NONE	onebyte	0x40009D	CPU_Any
-INSN	nasm	popfd	NONE	onebyte	0x00209D	CPU_386|CPU_Not64
-INSN	gas	popfl   NONE	onebyte	0x00209D	CPU_386|CPU_Not64
-INSN	-	popfw   NONE	onebyte	0x40109D	CPU_Any
-INSN	-	popfq   NONE	onebyte	0x40409D	CPU_Hammer|CPU_64
-INSN	-	stc	NONE	onebyte	0x00F9		CPU_Any
-INSN	-	std	NONE	onebyte	0x00FD		CPU_Any
-INSN	-	sti	NONE	onebyte	0x00FB		CPU_Any
-# Arithmetic
-INSN	-	add	"bwlq"	arith	0x0000		CPU_Any
-INSN	-	inc	"bwlq"	incdec	0x0040		CPU_Any
-INSN	-	sub	"bwlq"	arith	0x0528		CPU_Any
-INSN	-	dec	"bwlq"	incdec	0x0148		CPU_Any
-INSN	-	sbb	"bwlq"	arith	0x0318		CPU_Any
-INSN	-	cmp	"bwlq"	arith	0x0738		CPU_Any
-INSN	-	test    "bwlq"	test	0		CPU_Any
-INSN	-	and	"bwlq"	arith	0x0420		CPU_Any
-INSN	-	or	"bwlq"	arith	0x0108		CPU_Any
-INSN	-	xor	"bwlq"	arith	0x0630		CPU_Any
-INSN	-	adc	"bwlq"	arith	0x0210		CPU_Any
-INSN	-	neg	"bwlq"	f6	0x03		CPU_Any
-INSN	-	not	"bwlq"	f6	0x02		CPU_Any
-INSN	-	aaa	NONE	onebyte	0x0037		CPU_Not64
-INSN	-	aas	NONE	onebyte	0x003F		CPU_Not64
-INSN	-	daa	NONE	onebyte	0x0027		CPU_Not64
-INSN	-	das	NONE	onebyte	0x002F		CPU_Not64
-INSN	-	aad	NONE	aadm	0x01		CPU_Not64
-INSN	-	aam	NONE	aadm	0x00		CPU_Not64
-# Conversion instructions
-INSN	-	cbw	NONE	onebyte	0x1098		CPU_Any
-INSN	-	cwde    NONE	onebyte	0x2098		CPU_386
-INSN	-	cdqe    NONE	onebyte	0x4098		CPU_Hammer|CPU_64
-INSN	-	cwd	NONE	onebyte	0x1099		CPU_Any
-INSN	-	cdq	NONE	onebyte	0x2099		CPU_386
-INSN	-	cqo	NONE	onebyte	0x4099		CPU_Hammer|CPU_64
-# Conversion instructions - GAS / AT&T naming
-INSN	gas	cbtw	NONE	onebyte	0x1098		CPU_Any
-INSN	gas	cwtl	NONE	onebyte	0x2098		CPU_386
-INSN	gas	cltq	NONE	onebyte	0x4098		CPU_Hammer|CPU_64
-INSN	gas	cwtd	NONE	onebyte	0x1099		CPU_Any
-INSN	gas	cltd	NONE	onebyte	0x2099		CPU_386
-INSN	gas	cqto	NONE	onebyte	0x4099		CPU_Hammer|CPU_64
-# Multiplication and division
-INSN	-	mul	"bwlq"	f6	0x04		CPU_Any
-INSN	-	imul	"bwlq"	imul	0		CPU_Any
-INSN	-	div	"bwlq"	div	0x06		CPU_Any
-INSN	-	idiv	"bwlq"	div	0x07		CPU_Any
-# Shifts
-INSN	-	rol	"bwlq"	shift	0x00		CPU_Any
-INSN	-	ror	"bwlq"	shift	0x01		CPU_Any
-INSN	-	rcl	"bwlq"	shift	0x02		CPU_Any
-INSN	-	rcr	"bwlq"	shift	0x03		CPU_Any
-INSN	-	sal	"bwlq"	shift	0x04		CPU_Any
-INSN	-	shl	"bwlq"	shift	0x04		CPU_Any
-INSN	-	shr	"bwlq"	shift	0x05		CPU_Any
-INSN	-	sar	"bwlq"	shift	0x07		CPU_Any
-INSN	-	shld	"wlq"	shlrd	0xA4		CPU_386
-INSN	-	shrd	"wlq"	shlrd	0xAC		CPU_386
-# Control transfer instructions unconditional)
-INSN	-	call	NONE	call	0		CPU_Any
-INSN	gas	calll	NONE	call	0		CPU_Not64
-INSN	gas	callq	NONE	call	0		CPU_Hammer|CPU_64
-INSN	-	jmp	NONE	jmp	0		CPU_Any
-INSN	-	ret	NONE	retnf	0x00C2		CPU_Any
-INSN	gas	retw	NONE	retnf	0x10C2		CPU_Any
-INSN	gas	retl	NONE	retnf	0x00C2		CPU_Not64
-INSN	gas	retq	NONE	retnf	0x00C2		CPU_Hammer|CPU_64
-INSN	nasm	retn	NONE	retnf	0x00C2		CPU_Any
-INSN	nasm	retf	NONE	retnf	0x40CA		CPU_Any
-INSN	gas	lretw	NONE	retnf	0x10CA		CPU_Any
-INSN	gas	lretl	NONE	retnf	0x00CA		CPU_Any
-INSN	gas	lretq	NONE	retnf	0x40CA		CPU_Hammer|CPU_64
-INSN	-	enter	"wlq"	enter	0		CPU_186
-INSN	-	leave	NONE	onebyte	0x4000C9	CPU_186
-INSN	gas	leavew	NONE	onebyte	0x0010C9	CPU_186
-INSN	gas	leavel	NONE	onebyte	0x4000C9	CPU_186
-INSN	gas	leaveq	NONE	onebyte	0x4000C9	CPU_Hammer|CPU_64
-# Conditional jumps
-INSN	-	jo	NONE	jcc	0x00		CPU_Any
-INSN	-	jno	NONE	jcc	0x01		CPU_Any
-INSN	-	jb	NONE	jcc	0x02		CPU_Any
-INSN	-	jc	NONE	jcc	0x02		CPU_Any
-INSN	-	jnae	NONE	jcc	0x02		CPU_Any
-INSN	-	jnb	NONE	jcc	0x03		CPU_Any
-INSN	-	jnc	NONE	jcc	0x03		CPU_Any
-INSN	-	jae	NONE	jcc	0x03		CPU_Any
-INSN	-	je	NONE	jcc	0x04		CPU_Any
-INSN	-	jz	NONE	jcc	0x04		CPU_Any
-INSN	-	jne	NONE	jcc	0x05		CPU_Any
-INSN	-	jnz	NONE	jcc	0x05		CPU_Any
-INSN	-	jbe	NONE	jcc	0x06		CPU_Any
-INSN	-	jna	NONE	jcc	0x06		CPU_Any
-INSN	-	jnbe	NONE	jcc	0x07		CPU_Any
-INSN	-	ja	NONE	jcc	0x07		CPU_Any
-INSN	-	js	NONE	jcc	0x08		CPU_Any
-INSN	-	jns	NONE	jcc	0x09		CPU_Any
-INSN	-	jp	NONE	jcc	0x0A		CPU_Any
-INSN	-	jpe	NONE	jcc	0x0A		CPU_Any
-INSN	-	jnp	NONE	jcc	0x0B		CPU_Any
-INSN	-	jpo	NONE	jcc	0x0B		CPU_Any
-INSN	-	jl	NONE	jcc	0x0C		CPU_Any
-INSN	-	jnge	NONE	jcc	0x0C		CPU_Any
-INSN	-	jnl	NONE	jcc	0x0D		CPU_Any
-INSN	-	jge	NONE	jcc	0x0D		CPU_Any
-INSN	-	jle	NONE	jcc	0x0E		CPU_Any
-INSN	-	jng	NONE	jcc	0x0E		CPU_Any
-INSN	-	jnle	NONE	jcc	0x0F		CPU_Any
-INSN	-	jg	NONE	jcc	0x0F		CPU_Any
-INSN	-	jcxz	NONE	jcxz	0x10		CPU_Any
-INSN	-	jecxz	NONE	jcxz	0x20		CPU_386
-INSN	-	jrcxz	NONE	jcxz	0x40		CPU_Hammer|CPU_64
-# Loop instructions
-INSN	-	loop	NONE	loop	0x02		CPU_Any
-INSN	-	loopz	NONE	loop	0x01		CPU_Any
-INSN	-	loope	NONE	loop	0x01		CPU_Any
-INSN	-	loopnz	NONE	loop	0x00		CPU_Any
-INSN	-	loopne	NONE	loop	0x00		CPU_Any
-# Set byte on flag instructions
-INSN	-	seto	"b"	setcc	0x00		CPU_386
-INSN	-	setno	"b"	setcc	0x01		CPU_386
-INSN	-	setb	"b"	setcc	0x02		CPU_386
-INSN	-	setc	"b"	setcc	0x02		CPU_386
-INSN	-	setnae	"b"	setcc	0x02		CPU_386
-INSN	-	setnb	"b"	setcc	0x03		CPU_386
-INSN	-	setnc	"b"	setcc	0x03		CPU_386
-INSN	-	setae	"b"	setcc	0x03		CPU_386
-INSN	-	sete	"b"	setcc	0x04		CPU_386
-INSN	-	setz	"b"	setcc	0x04		CPU_386
-INSN	-	setne	"b"	setcc	0x05		CPU_386
-INSN	-	setnz	"b"	setcc	0x05		CPU_386
-INSN	-	setbe	"b"	setcc	0x06		CPU_386
-INSN	-	setna	"b"	setcc	0x06		CPU_386
-INSN	-	setnbe	"b"	setcc	0x07		CPU_386
-INSN	-	seta	"b"	setcc	0x07		CPU_386
-INSN	-	sets	"b"	setcc	0x08		CPU_386
-INSN	-	setns	"b"	setcc	0x09		CPU_386
-INSN	-	setp	"b"	setcc	0x0A		CPU_386
-INSN	-	setpe	"b"	setcc	0x0A		CPU_386
-INSN	-	setnp	"b"	setcc	0x0B		CPU_386
-INSN	-	setpo	"b"	setcc	0x0B		CPU_386
-INSN	-	setl	"b"	setcc	0x0C		CPU_386
-INSN	-	setnge	"b"	setcc	0x0C		CPU_386
-INSN	-	setnl	"b"	setcc	0x0D		CPU_386
-INSN	-	setge	"b"	setcc	0x0D		CPU_386
-INSN	-	setle	"b"	setcc	0x0E		CPU_386
-INSN	-	setng	"b"	setcc	0x0E		CPU_386
-INSN	-	setnle	"b"	setcc	0x0F		CPU_386
-INSN	-	setg	"b"	setcc	0x0F		CPU_386
-# String instructions
-INSN	-	cmpsb	NONE	onebyte	0x00A6		CPU_Any
-INSN	-	cmpsw	NONE	onebyte	0x10A7		CPU_Any
-INSN	-	cmpsd	NONE	cmpsd	0		CPU_Any
-INSN	gas	cmpsl	NONE	onebyte	0x20A7		CPU_386
-INSN	-	cmpsq	NONE	onebyte	0x40A7		CPU_Hammer|CPU_64
-INSN	-	insb	NONE	onebyte	0x006C		CPU_Any
-INSN	-	insw	NONE	onebyte	0x106D		CPU_Any
-INSN	nasm	insd	NONE	onebyte	0x206D		CPU_386
-INSN	gas	insl	NONE	onebyte	0x206D		CPU_386
-INSN	-	outsb	NONE	onebyte	0x006E		CPU_Any
-INSN	-	outsw	NONE	onebyte	0x106F		CPU_Any
-INSN	nasm	outsd	NONE	onebyte	0x206F		CPU_386
-INSN	gas	outsl	NONE	onebyte	0x206F		CPU_386
-INSN	-	lodsb	NONE	onebyte	0x00AC		CPU_Any
-INSN	-	lodsw	NONE	onebyte	0x10AD		CPU_Any
-INSN	nasm	lodsd	NONE	onebyte	0x20AD		CPU_386
-INSN	gas	lodsl	NONE	onebyte	0x20AD		CPU_386
-INSN	-	lodsq	NONE	onebyte	0x40AD		CPU_Hammer|CPU_64
-INSN	-	movsb	NONE	onebyte	0x00A4		CPU_Any
-INSN	-	movsw	NONE	onebyte	0x10A5		CPU_Any
-INSN	-	movsd	NONE	movsd	0		CPU_386
-INSN	gas	movsl	NONE	onebyte	0x20A5		CPU_386
-INSN	-	movsq	NONE	onebyte	0x40A5		CPU_Hammer|CPU_64
-# smov alias for movs in GAS mode
-INSN	gas	smovb	NONE	onebyte	0x00A4		CPU_Any
-INSN	gas	smovw	NONE	onebyte	0x10A5		CPU_Any
-INSN	gas	smovl	NONE	onebyte	0x20A5		CPU_386
-INSN	gas	smovq	NONE	onebyte	0x40A5		CPU_Hammer|CPU_64
-INSN	-	scasb	NONE	onebyte	0x00AE		CPU_Any
-INSN	-	scasw	NONE	onebyte	0x10AF		CPU_Any
-INSN	nasm	scasd	NONE	onebyte	0x20AF		CPU_386
-INSN	gas	scasl	NONE	onebyte	0x20AF		CPU_386
-INSN	-	scasq	NONE	onebyte	0x40AF		CPU_Hammer|CPU_64
-# ssca alias for scas in GAS mode
-INSN	gas	sscab	NONE	onebyte	0x00AE		CPU_Any
-INSN	gas	sscaw	NONE	onebyte	0x10AF		CPU_Any
-INSN	gas	sscal	NONE	onebyte	0x20AF		CPU_386
-INSN	gas	sscaq	NONE	onebyte	0x40AF		CPU_Hammer|CPU_64
-INSN	-	stosb	NONE	onebyte	0x00AA		CPU_Any
-INSN	-	stosw	NONE	onebyte	0x10AB		CPU_Any
-INSN	nasm	stosd	NONE	onebyte	0x20AB		CPU_386
-INSN	gas	stosl	NONE	onebyte	0x20AB		CPU_386
-INSN	-	stosq	NONE	onebyte	0x40AB		CPU_Hammer|CPU_64
-INSN	-	xlatb	NONE	onebyte	0x00D7		CPU_Any
-# Bit manipulation
-INSN	-	bsf	"wlq"	bsfr	0xBC		CPU_386
-INSN	-	bsr	"wlq"	bsfr	0xBD		CPU_386
-INSN	-	bt	"wlq"	bittest	0x04A3		CPU_386
-INSN	-	btc	"wlq"	bittest	0x07BB		CPU_386
-INSN	-	btr	"wlq"	bittest	0x06B3		CPU_386
-INSN	-	bts	"wlq"	bittest	0x05AB		CPU_386
-# Interrupts and operating system instructions
-INSN	-	int	NONE	int	0		CPU_Any
-INSN	-	int3	NONE	onebyte	0x00CC		CPU_Any
-INSN	nasm	int03	NONE	onebyte	0x00CC		CPU_Any
-INSN	-	into	NONE	onebyte	0x00CE		CPU_Not64
-INSN	-	iret	NONE	onebyte	0x00CF		CPU_Any
-INSN	-	iretw	NONE	onebyte	0x10CF		CPU_Any
-INSN	nasm	iretd	NONE	onebyte	0x20CF		CPU_386
-INSN	gas	iretl	NONE	onebyte	0x20CF		CPU_386
-INSN	-	iretq	NONE	onebyte	0x40CF		CPU_Hammer|CPU_64
-INSN	-	rsm	NONE	twobyte	0x0FAA		CPU_586|CPU_SMM
-INSN	-	bound	"wl"	bound	0		CPU_186|CPU_Not64
-INSN	-	hlt	NONE	onebyte	0x00F4		CPU_Priv
-INSN	-	nop	NONE	onebyte	0x0090		CPU_Any
-# Protection control
-INSN	-	arpl	"w"	arpl	0		CPU_286|CPU_Prot|CPU_Not64
-INSN	-	lar	"wlq"	bsfr	0x02		CPU_286|CPU_Prot
-INSN	-	lgdt	"wlq"	twobytemem  0x020F01	CPU_286|CPU_Priv
-INSN	-	lidt	"wlq"	twobytemem  0x030F01	CPU_286|CPU_Priv
-INSN	-	lldt	"w"	prot286	0x0200		CPU_286|CPU_Prot|CPU_Priv
-INSN	-	lmsw	"w"	prot286	0x0601		CPU_286|CPU_Priv
-INSN	-	lsl	"wlq"	bsfr	0x03		CPU_286|CPU_Prot
-INSN	-	ltr	"w"	prot286	0x0300		CPU_286|CPU_Prot|CPU_Priv
-INSN	-	sgdt	"wlq"	twobytemem  0x000F01	CPU_286|CPU_Priv
-INSN	-	sidt	"wlq"	twobytemem  0x010F01	CPU_286|CPU_Priv
-INSN	-	sldt	"wlq"	sldtmsw	0x0000		CPU_286
-INSN	-	smsw	"wlq"	sldtmsw	0x0401		CPU_286
-INSN	-	str	"wlq"	str	0		CPU_286|CPU_Prot
-INSN	-	verr	"w"	prot286	0x0400		CPU_286|CPU_Prot
-INSN	-	verw	"w"	prot286	0x0500		CPU_286|CPU_Prot
-# Floating point instructions
-INSN	-	fld	"ls"	fld	0		CPU_FPU
-INSN	gas	fldt	WEAK	fldstpt	0x05		CPU_FPU
-INSN	-	fild	"lqs"	fildstp	0x050200	CPU_FPU
-INSN	gas	fildll	NONE	fbldstp	0x05		CPU_FPU
-INSN	-	fbld	NONE	fbldstp	0x04		CPU_FPU
-INSN	-	fst	"ls"	fst	0		CPU_FPU
-INSN	-	fist	"ls"	fiarith	0x02DB		CPU_FPU
-INSN	-	fstp	"ls"	fstp	0		CPU_FPU
-INSN	gas	fstpt	WEAK	fldstpt	0x07		CPU_FPU
-INSN	-	fistp	"lqs"	fildstp	0x070203	CPU_FPU
-INSN	gas	fistpll	NONE	fbldstp	0x07		CPU_FPU
-INSN	-	fbstp	NONE	fbldstp	0x06		CPU_FPU
-INSN	-	fxch	NONE	fxch	0		CPU_FPU
-INSN	-	fcom	"ls"	fcom	0x02D0		CPU_FPU
-INSN	-	ficom	"ls"	fiarith	0x02DA		CPU_FPU
-INSN	-	fcomp	"ls"	fcom	0x03D8		CPU_FPU
-INSN	-	ficomp	"ls"	fiarith	0x03DA		CPU_FPU
-INSN	-	fcompp	NONE	twobyte	0xDED9		CPU_FPU
-INSN	-	fucom	NONE	fcom2	0xDDE0		CPU_286|CPU_FPU
-INSN	-	fucomp	NONE	fcom2	0xDDE8		CPU_286|CPU_FPU
-INSN	-	fucompp	NONE	twobyte	0xDAE9		CPU_286|CPU_FPU
-INSN	-	ftst	NONE	twobyte	0xD9E4		CPU_FPU
-INSN	-	fxam	NONE	twobyte	0xD9E5		CPU_FPU
-INSN	-	fld1	NONE	twobyte	0xD9E8		CPU_FPU
-INSN	-	fldl2t	NONE	twobyte	0xD9E9		CPU_FPU
-INSN	-	fldl2e	NONE	twobyte	0xD9EA		CPU_FPU
-INSN	-	fldpi	NONE	twobyte	0xD9EB		CPU_FPU
-INSN	-	fldlg2	NONE	twobyte	0xD9EC		CPU_FPU
-INSN	-	fldln2	NONE	twobyte	0xD9ED		CPU_FPU
-INSN	-	fldz	NONE	twobyte	0xD9EE		CPU_FPU
-INSN	-	fadd	"ls"	farith	0x00C0C0	CPU_FPU
-INSN	-	faddp	NONE	farithp	0xC0		CPU_FPU
-INSN	-	fiadd	"ls"	fiarith	0x00DA		CPU_FPU
-INSN	-	fsub	"ls"	farith	0x04E0E8	CPU_FPU
-INSN	-	fisub	"ls"	fiarith	0x04DA		CPU_FPU
-INSN	nasm	fsubp	NONE	farithp	0xE8		CPU_FPU
-INSN	gas	fsubp	NONE	farithp	0xE0		CPU_FPU
-INSN	-	fsubr	"ls"	farith	0x05E8E0	CPU_FPU
-INSN	-	fisubr	"ls"	fiarith	0x05DA		CPU_FPU
-INSN	nasm	fsubrp	NONE	farithp	0xE0		CPU_FPU
-INSN	gas	fsubrp	NONE	farithp	0xE8		CPU_FPU
-INSN	-	fmul	"ls"	farith	0x01C8C8	CPU_FPU
-INSN	-	fimul	"ls"	fiarith	0x01DA		CPU_FPU
-INSN	-	fmulp	NONE	farithp	0xC8		CPU_FPU
-INSN	-	fdiv	"ls"	farith	0x06F0F8	CPU_FPU
-INSN	-	fidiv	"ls"	fiarith	0x06DA		CPU_FPU
-INSN	nasm	fdivp	NONE	farithp	0xF8		CPU_FPU
-INSN	gas	fdivp	NONE	farithp	0xF0		CPU_FPU
-INSN	-	fdivr	"ls"	farith	0x07F8F0	CPU_FPU
-INSN	-	fidivr	"ls"	fiarith	0x07DA		CPU_FPU
-INSN	nasm	fdivrp	NONE	farithp	0xF0		CPU_FPU
-INSN	gas	fdivrp	NONE	farithp	0xF8		CPU_FPU
-INSN	-	f2xm1	NONE	twobyte	0xD9F0		CPU_FPU
-INSN	-	fyl2x	NONE	twobyte	0xD9F1		CPU_FPU
-INSN	-	fptan	NONE	twobyte	0xD9F2		CPU_FPU
-INSN	-	fpatan	NONE	twobyte	0xD9F3		CPU_FPU
-INSN	-	fxtract	NONE	twobyte	0xD9F4		CPU_FPU
-INSN	-	fprem1	NONE	twobyte	0xD9F5		CPU_286|CPU_FPU
-INSN	-	fdecstp	NONE	twobyte	0xD9F6		CPU_FPU
-INSN	-	fincstp	NONE	twobyte	0xD9F7		CPU_FPU
-INSN	-	fprem	NONE	twobyte	0xD9F8		CPU_FPU
-INSN	-	fyl2xp1	NONE	twobyte	0xD9F9		CPU_FPU
-INSN	-	fsqrt	NONE	twobyte	0xD9FA		CPU_FPU
-INSN	-	fsincos	NONE	twobyte	0xD9FB		CPU_286|CPU_FPU
-INSN	-	frndint	NONE	twobyte	0xD9FC		CPU_FPU
-INSN	-	fscale	NONE	twobyte	0xD9FD		CPU_FPU
-INSN	-	fsin	NONE	twobyte	0xD9FE		CPU_286|CPU_FPU
-INSN	-	fcos	NONE	twobyte	0xD9FF		CPU_286|CPU_FPU
-INSN	-	fchs	NONE	twobyte	0xD9E0		CPU_FPU
-INSN	-	fabs	NONE	twobyte	0xD9E1		CPU_FPU
-INSN	-	fninit	NONE	twobyte	0xDBE3		CPU_FPU
-INSN	-	finit	NONE	threebyte   0x9BDBE3	CPU_FPU
-INSN	-	fldcw	"w"	fldnstcw	0x05	CPU_FPU
-INSN	-	fnstcw	"w"	fldnstcw	0x07	CPU_FPU
-INSN	-	fstcw	"w"	fstcw	0		CPU_FPU
-INSN	-	fnstsw	"w"	fnstsw	0		CPU_FPU
-INSN	-	fstsw	"w"	fstsw	0		CPU_FPU
-INSN	-	fnclex	NONE	twobyte	0xDBE2		CPU_FPU
-INSN	-	fclex	NONE	threebyte   0x9BDBE2	CPU_FPU
-INSN	-	fnstenv	"ls"	onebytemem 0x06D9	CPU_FPU
-INSN	-	fstenv	"ls"	twobytemem 0x069BD9	CPU_FPU
-INSN	-	fldenv	"ls"	onebytemem 0x04D9	CPU_FPU
-INSN	-	fnsave	"ls"	onebytemem 0x06DD	CPU_FPU
-INSN	-	fsave	"ls"	twobytemem 0x069BDD	CPU_FPU
-INSN	-	frstor	"ls"	onebytemem 0x04DD	CPU_FPU
-INSN	-	ffree	NONE	ffree	0xDD		CPU_FPU
-INSN	-	ffreep	NONE	ffree	0xDF		CPU_686|CPU_FPU|CPU_Undoc
-INSN	-	fnop	NONE	twobyte	0xD9D0		CPU_FPU
-INSN	-	fwait	NONE	onebyte	0x009B		CPU_FPU
-# Prefixes should the others be here too? should wait be a prefix?
-INSN	-	wait	NONE	onebyte	0x009B		CPU_Any
-# 486 extensions
-INSN	-	bswap	"lq"	bswap	0		CPU_486
-INSN	-	xadd	"bwlq"	cmpxchgxadd 0xC0	CPU_486
-INSN	-	cmpxchg	"bwlq"	cmpxchgxadd 0xB0	CPU_486
-INSN	nasm	cmpxchg486 NONE cmpxchgxadd 0xA6	CPU_486|CPU_Undoc
-INSN	-	invd	NONE	twobyte	0x0F08		CPU_486|CPU_Priv
-INSN	-	wbinvd	NONE	twobyte	0x0F09		CPU_486|CPU_Priv
-INSN	-	invlpg	NONE	twobytemem  0x070F01	CPU_486|CPU_Priv
-# 586+ and late 486 extensions
-INSN	-	cpuid	NONE	twobyte	0x0FA2		CPU_486
-# Pentium extensions
-INSN	-	wrmsr	NONE	twobyte	0x0F30		CPU_586|CPU_Priv
-INSN	-	rdtsc	NONE	twobyte	0x0F31		CPU_586
-INSN	-	rdmsr	NONE	twobyte	0x0F32		CPU_586|CPU_Priv
-INSN	-	cmpxchg8b "q"	cmpxchg8b	0	CPU_586
-# Pentium II/Pentium Pro extensions
-INSN	-	sysenter NONE	twobyte	0x0F34		CPU_686|CPU_Not64
-INSN	-	sysexit	NONE	twobyte	0x0F35		CPU_686|CPU_Priv|CPU_Not64
-INSN	-	fxsave	"q"	twobytemem  0x000FAE	CPU_686|CPU_FPU
-INSN	-	fxrstor	"q"	twobytemem  0x010FAE	CPU_686|CPU_FPU
-INSN	-	rdpmc	NONE	twobyte	0x0F33		CPU_686
-INSN	-	ud2	NONE	twobyte	0x0F0B		CPU_286
-INSN	-	ud1	NONE	twobyte	0x0FB9		CPU_286|CPU_Undoc
-INSN	-	cmovo	"wlq"	cmovcc	0x00		CPU_686
-INSN	-	cmovno	"wlq"	cmovcc	0x01		CPU_686
-INSN	-	cmovb	"wlq"	cmovcc	0x02		CPU_686
-INSN	-	cmovc	"wlq"	cmovcc	0x02		CPU_686
-INSN	-	cmovnae	"wlq"	cmovcc	0x02		CPU_686
-INSN	-	cmovnb	"wlq"	cmovcc	0x03		CPU_686
-INSN	-	cmovnc	"wlq"	cmovcc	0x03		CPU_686
-INSN	-	cmovae	"wlq"	cmovcc	0x03		CPU_686
-INSN	-	cmove	"wlq"	cmovcc	0x04		CPU_686
-INSN	-	cmovz	"wlq"	cmovcc	0x04		CPU_686
-INSN	-	cmovne	"wlq"	cmovcc	0x05		CPU_686
-INSN	-	cmovnz	"wlq"	cmovcc	0x05		CPU_686
-INSN	-	cmovbe	"wlq"	cmovcc	0x06		CPU_686
-INSN	-	cmovna	"wlq"	cmovcc	0x06		CPU_686
-INSN	-	cmovnbe	"wlq"	cmovcc	0x07		CPU_686
-INSN	-	cmova	"wlq"	cmovcc	0x07		CPU_686
-INSN	-	cmovs	"wlq"	cmovcc	0x08		CPU_686
-INSN	-	cmovns	"wlq"	cmovcc	0x09		CPU_686
-INSN	-	cmovp	"wlq"	cmovcc	0x0A		CPU_686
-INSN	-	cmovpe	"wlq"	cmovcc	0x0A		CPU_686
-INSN	-	cmovnp	"wlq"	cmovcc	0x0B		CPU_686
-INSN	-	cmovpo	"wlq"	cmovcc	0x0B		CPU_686
-INSN	-	cmovl	"wlq"	cmovcc	0x0C		CPU_686
-INSN	-	cmovnge	"wlq"	cmovcc	0x0C		CPU_686
-INSN	-	cmovnl	"wlq"	cmovcc	0x0D		CPU_686
-INSN	-	cmovge	"wlq"	cmovcc	0x0D		CPU_686
-INSN	-	cmovle	"wlq"	cmovcc	0x0E		CPU_686
-INSN	-	cmovng	"wlq"	cmovcc	0x0E		CPU_686
-INSN	-	cmovnle	"wlq"	cmovcc	0x0F		CPU_686
-INSN	-	cmovg	"wlq"	cmovcc	0x0F		CPU_686
-INSN	-	fcmovb	NONE	fcmovcc	0xDAC0		CPU_686|CPU_FPU
-INSN	-	fcmove	NONE	fcmovcc	0xDAC8		CPU_686|CPU_FPU
-INSN	-	fcmovbe	NONE	fcmovcc	0xDAD0		CPU_686|CPU_FPU
-INSN	-	fcmovu	NONE	fcmovcc	0xDAD8		CPU_686|CPU_FPU
-INSN	-	fcmovnb	NONE	fcmovcc	0xDBC0		CPU_686|CPU_FPU
-INSN	-	fcmovne	NONE	fcmovcc	0xDBC8		CPU_686|CPU_FPU
-INSN	-	fcmovnbe NONE	fcmovcc	0xDBD0		CPU_686|CPU_FPU
-INSN	-	fcmovnu	NONE	fcmovcc	0xDBD8		CPU_686|CPU_FPU
-INSN	-	fcomi	NONE	fcom2	0xDBF0		CPU_686|CPU_FPU
-INSN	-	fucomi	NONE	fcom2	0xDBE8		CPU_686|CPU_FPU
-INSN	-	fcomip	NONE	fcom2	0xDFF0		CPU_686|CPU_FPU
-INSN	-	fucomip	NONE	fcom2	0xDFE8		CPU_686|CPU_FPU
-# Pentium4 extensions
-INSN	-	movnti	"lq"	movnti	0		CPU_P4
-INSN	-	clflush	NONE	clflush	0		CPU_P3
-INSN	-	lfence	NONE	threebyte   0x0FAEE8	CPU_P3
-INSN	-	mfence	NONE	threebyte   0x0FAEF0	CPU_P3
-INSN	-	pause	NONE	onebyte_prefix	0xF390	CPU_P4
-# MMX/SSE2 instructions
-INSN	-	emms	NONE	twobyte	0x0F77		CPU_MMX
-INSN	-	movd	NONE	movd	0		CPU_MMX
-# For GAS movq must use standard mov instruction.
-# For NASM it can use a dedicated instruction.
-INSN	gas	movq	SUF_Q	mov	0		CPU_Any
-INSN	nasm	movq	NONE	movq	0		CPU_MMX
-INSN	-	packssdw NONE	mmxsse2	0x6B		CPU_MMX
-INSN	-	packsswb NONE	mmxsse2	0x63		CPU_MMX
-INSN	-	packuswb NONE	mmxsse2	0x67		CPU_MMX
-INSN	-	paddb	NONE	mmxsse2	0xFC		CPU_MMX
-INSN	-	paddw	NONE	mmxsse2	0xFD		CPU_MMX
-INSN	-	paddd	NONE	mmxsse2	0xFE		CPU_MMX
-INSN	-	paddq	NONE	mmxsse2	0xD4		CPU_MMX
-INSN	-	paddsb	NONE	mmxsse2	0xEC		CPU_MMX
-INSN	-	paddsw	NONE	mmxsse2	0xED		CPU_MMX
-INSN	-	paddusb	NONE	mmxsse2	0xDC		CPU_MMX
-INSN	-	paddusw	NONE	mmxsse2	0xDD		CPU_MMX
-INSN	-	pand	NONE	mmxsse2	0xDB		CPU_MMX
-INSN	-	pandn	NONE	mmxsse2	0xDF		CPU_MMX
-INSN	-	pcmpeqb	NONE	mmxsse2	0x74		CPU_MMX
-INSN	-	pcmpeqw	NONE	mmxsse2	0x75		CPU_MMX
-INSN	-	pcmpeqd	NONE	mmxsse2	0x76		CPU_MMX
-INSN	-	pcmpgtb	NONE	mmxsse2	0x64		CPU_MMX
-INSN	-	pcmpgtw	NONE	mmxsse2	0x65		CPU_MMX
-INSN	-	pcmpgtd	NONE	mmxsse2	0x66		CPU_MMX
-INSN	-	pmaddwd	NONE	mmxsse2	0xF5		CPU_MMX
-INSN	-	pmulhw	NONE	mmxsse2	0xE5		CPU_MMX
-INSN	-	pmullw	NONE	mmxsse2	0xD5		CPU_MMX
-INSN	-	por	NONE	mmxsse2	0xEB		CPU_MMX
-INSN	-	psllw	NONE	pshift	0x0671F1	CPU_MMX
-INSN	-	pslld	NONE	pshift	0x0672F2	CPU_MMX
-INSN	-	psllq	NONE	pshift	0x0673F3	CPU_MMX
-INSN	-	psraw	NONE	pshift	0x0471E1	CPU_MMX
-INSN	-	psrad	NONE	pshift	0x0472E2	CPU_MMX
-INSN	-	psrlw	NONE	pshift	0x0271D1	CPU_MMX
-INSN	-	psrld	NONE	pshift	0x0272D2	CPU_MMX
-INSN	-	psrlq	NONE	pshift	0x0273D3	CPU_MMX
-INSN	-	psubb	NONE	mmxsse2	0xF8		CPU_MMX
-INSN	-	psubw	NONE	mmxsse2	0xF9		CPU_MMX
-INSN	-	psubd	NONE	mmxsse2	0xFA		CPU_MMX
-INSN	-	psubq	NONE	mmxsse2	0xFB		CPU_MMX
-INSN	-	psubsb	NONE	mmxsse2	0xE8		CPU_MMX
-INSN	-	psubsw	NONE	mmxsse2	0xE9		CPU_MMX
-INSN	-	psubusb	NONE	mmxsse2	0xD8		CPU_MMX
-INSN	-	psubusw	NONE	mmxsse2	0xD9		CPU_MMX
-INSN	-	punpckhbw NONE	mmxsse2	0x68		CPU_MMX
-INSN	-	punpckhwd NONE	mmxsse2	0x69		CPU_MMX
-INSN	-	punpckhdq NONE	mmxsse2	0x6A		CPU_MMX
-INSN	-	punpcklbw NONE	mmxsse2	0x60		CPU_MMX
-INSN	-	punpcklwd NONE	mmxsse2	0x61		CPU_MMX
-INSN	-	punpckldq NONE	mmxsse2	0x62		CPU_MMX
-INSN	-	pxor	NONE	mmxsse2	0xEF		CPU_MMX
-# PIII Katmai new instructions / SIMD instructions
-INSN	-	addps	NONE	sseps	0x58		CPU_SSE
-INSN	-	addss	NONE	ssess	0xF358		CPU_SSE
-INSN	-	andnps	NONE	sseps	0x55		CPU_SSE
-INSN	-	andps	NONE	sseps	0x54		CPU_SSE
-INSN	-	cmpeqps	NONE	ssecmpps	0x00	CPU_SSE
-INSN	-	cmpeqss	NONE	ssecmpss	0x00F3	CPU_SSE
-INSN	-	cmpleps	NONE	ssecmpps	0x02	CPU_SSE
-INSN	-	cmpless	NONE	ssecmpss	0x02F3	CPU_SSE
-INSN	-	cmpltps	NONE	ssecmpps	0x01	CPU_SSE
-INSN	-	cmpltss	NONE	ssecmpss	0x01F3	CPU_SSE
-INSN	-	cmpneqps NONE	ssecmpps	0x04	CPU_SSE
-INSN	-	cmpneqss NONE	ssecmpss	0x04F3	CPU_SSE
-INSN	-	cmpnleps NONE	ssecmpps	0x06	CPU_SSE
-INSN	-	cmpnless NONE	ssecmpss	0x06F3	CPU_SSE
-INSN	-	cmpnltps NONE	ssecmpps	0x05	CPU_SSE
-INSN	-	cmpnltss NONE	ssecmpss	0x05F3	CPU_SSE
-INSN	-	cmpordps NONE	ssecmpps	0x07	CPU_SSE
-INSN	-	cmpordss NONE	ssecmpss	0x07F3	CPU_SSE
-INSN	-	cmpunordps NONE	ssecmpps	0x03	CPU_SSE
-INSN	-	cmpunordss NONE	ssecmpss	0x03F3	CPU_SSE
-INSN	-	cmpps	NONE	ssepsimm	0xC2	CPU_SSE
-INSN	-	cmpss	NONE	ssessimm	0xF3C2	CPU_SSE
-INSN	-	comiss	NONE	sseps	0x2F		CPU_SSE
-INSN	-	cvtpi2ps NONE	cvt_xmm_mm_ps  0x2A	CPU_SSE
-INSN	-	cvtps2pi NONE	cvt_mm_xmm64   0x2D	CPU_SSE
-INSN	-	cvtsi2ss "lq"	cvt_xmm_rmx    0xF32A	CPU_SSE
-INSN	-	cvtss2si "lq"	cvt_rx_xmm32   0xF32D	CPU_SSE
-INSN	-	cvttps2pi NONE	cvt_mm_xmm64   0x2C	CPU_SSE
-INSN	-	cvttss2si "lq"	cvt_rx_xmm32   0xF32C	CPU_SSE
-INSN	-	divps	NONE	sseps	0x5E		CPU_SSE
-INSN	-	divss	NONE	ssess	0xF35E		CPU_SSE
-INSN	-	ldmxcsr	NONE	ldstmxcsr	0x02	CPU_SSE
-INSN	-	maskmovq NONE	maskmovq	0	CPU_P3|CPU_MMX
-INSN	-	maxps	NONE	sseps	0x5F		CPU_SSE
-INSN	-	maxss	NONE	ssess	0xF35F		CPU_SSE
-INSN	-	minps	NONE	sseps	0x5D		CPU_SSE
-INSN	-	minss	NONE	ssess	0xF35D		CPU_SSE
-INSN	-	movaps	NONE	movaups	0x28		CPU_SSE
-INSN	-	movhlps	NONE	movhllhps	0x12	CPU_SSE
-INSN	-	movhps	NONE	movhlps	0x16		CPU_SSE
-INSN	-	movlhps	NONE	movhllhps	0x16	CPU_SSE
-INSN	-	movlps	NONE	movhlps	0x12		CPU_SSE
-INSN	-	movmskps "lq"	movmskps	0	CPU_SSE
-INSN	-	movntps	NONE	movntps	0		CPU_SSE
-INSN	-	movntq	NONE	movntq	0		CPU_SSE
-INSN	-	movss	NONE	movss	0		CPU_SSE
-INSN	-	movups	NONE	movaups	0x10		CPU_SSE
-INSN	-	mulps	NONE	sseps	0x59		CPU_SSE
-INSN	-	mulss	NONE	ssess	0xF359		CPU_SSE
-INSN	-	orps	NONE	sseps	0x56		CPU_SSE
-INSN	-	pavgb	NONE	mmxsse2	0xE0		CPU_P3|CPU_MMX
-INSN	-	pavgw	NONE	mmxsse2	0xE3		CPU_P3|CPU_MMX
-INSN	-	pextrw	"lq"	pextrw	0		CPU_P3|CPU_MMX
-INSN	-	pinsrw	"lq"	pinsrw	0		CPU_P3|CPU_MMX
-INSN	-	pmaxsw	NONE	mmxsse2	0xEE		CPU_P3|CPU_MMX
-INSN	-	pmaxub	NONE	mmxsse2	0xDE		CPU_P3|CPU_MMX
-INSN	-	pminsw	NONE	mmxsse2	0xEA		CPU_P3|CPU_MMX
-INSN	-	pminub	NONE	mmxsse2	0xDA		CPU_P3|CPU_MMX
-INSN	-	pmovmskb "lq"	pmovmskb	0	CPU_SSE
-INSN	-	pmulhuw NONE	mmxsse2	0xE4		CPU_P3|CPU_MMX
-INSN	-	prefetchnta NONE twobytemem 0x000F18	CPU_P3
-INSN	-	prefetcht0 NONE	twobytemem  0x010F18	CPU_P3
-INSN	-	prefetcht1 NONE	twobytemem  0x020F18	CPU_P3
-INSN	-	prefetcht2 NONE	twobytemem  0x030F18	CPU_P3
-INSN	-	psadbw	NONE	mmxsse2	0xF6		CPU_P3|CPU_MMX
-INSN	-	pshufw	NONE	pshufw	0		CPU_P3|CPU_MMX
-INSN	-	rcpps	NONE	sseps	0x53		CPU_SSE
-INSN	-	rcpss	NONE	ssess	0xF353		CPU_SSE
-INSN	-	rsqrtps	NONE	sseps	0x52		CPU_SSE
-INSN	-	rsqrtss	NONE	ssess	0xF352		CPU_SSE
-INSN	-	sfence	NONE	threebyte   0x0FAEF8	CPU_P3
-INSN	-	shufps	NONE	ssepsimm	0xC6	CPU_SSE
-INSN	-	sqrtps	NONE	sseps	0x51		CPU_SSE
-INSN	-	sqrtss	NONE	ssess	0xF351		CPU_SSE
-INSN	-	stmxcsr	NONE	ldstmxcsr	0x03	CPU_SSE
-INSN	-	subps	NONE	sseps	0x5C		CPU_SSE
-INSN	-	subss	NONE	ssess	0xF35C		CPU_SSE
-INSN	-	ucomiss	NONE	ssess	0x2E		CPU_SSE
-INSN	-	unpckhps NONE	sseps	0x15		CPU_SSE
-INSN	-	unpcklps NONE	sseps	0x14		CPU_SSE
-INSN	-	xorps	NONE	sseps	0x57		CPU_SSE
-# SSE2 instructions
-INSN	-	addpd	NONE	ssess	0x6658		CPU_SSE2
-INSN	-	addsd	NONE	ssess	0xF258		CPU_SSE2
-INSN	-	andnpd	NONE	ssess	0x6655		CPU_SSE2
-INSN	-	andpd	NONE	ssess	0x6654		CPU_SSE2
-INSN	-	cmpeqpd	NONE	ssecmpss	0x0066	CPU_SSE2
-INSN	-	cmpeqsd	NONE	ssecmpss	0x00F2	CPU_SSE2
-INSN	-	cmplepd	NONE	ssecmpss	0x0266	CPU_SSE2
-INSN	-	cmplesd	NONE	ssecmpss	0x02F2	CPU_SSE2
-INSN	-	cmpltpd	NONE	ssecmpss	0x0166	CPU_SSE2
-INSN	-	cmpltsd	NONE	ssecmpss	0x01F2	CPU_SSE2
-INSN	-	cmpneqpd NONE	ssecmpss	0x0466	CPU_SSE2
-INSN	-	cmpneqsd NONE	ssecmpss	0x04F2	CPU_SSE2
-INSN	-	cmpnlepd NONE	ssecmpss	0x0666	CPU_SSE2
-INSN	-	cmpnlesd NONE	ssecmpss	0x06F2	CPU_SSE2
-INSN	-	cmpnltpd NONE	ssecmpss	0x0566	CPU_SSE2
-INSN	-	cmpnltsd NONE	ssecmpss	0x05F2	CPU_SSE2
-INSN	-	cmpordpd NONE	ssecmpss	0x0766	CPU_SSE2
-INSN	-	cmpordsd NONE	ssecmpss	0x07F2	CPU_SSE2
-INSN	-	cmpunordpd NONE	ssecmpss	0x0366	CPU_SSE2
-INSN	-	cmpunordsd NONE	ssecmpss	0x03F2	CPU_SSE2
-INSN	-	cmppd	NONE	ssessimm	0x66C2	CPU_SSE2
-# cmpsd is in string instructions above
-INSN	-	comisd	NONE	ssess	0x662F		CPU_SSE2
-INSN	-	cvtpi2pd NONE	cvt_xmm_mm_ss	0x662A	CPU_SSE2
-INSN	-	cvtsi2sd "lq"	cvt_xmm_rmx	0xF22A	CPU_SSE2
-INSN	-	divpd	NONE	ssess	0x665E		CPU_SSE2
-INSN	-	divsd	NONE	ssess	0xF25E		CPU_SSE2
-INSN	-	maxpd	NONE	ssess	0x665F		CPU_SSE2
-INSN	-	maxsd	NONE	ssess	0xF25F		CPU_SSE2
-INSN	-	minpd	NONE	ssess	0x665D		CPU_SSE2
-INSN	-	minsd	NONE	ssess	0xF25D		CPU_SSE2
-INSN	-	movapd	NONE	movaupd	0x28		CPU_SSE2
-INSN	-	movhpd	NONE	movhlpd	0x16		CPU_SSE2
-INSN	-	movlpd	NONE	movhlpd	0x12		CPU_SSE2
-INSN	-	movmskpd "lq"	movmskpd	0	CPU_SSE2
-INSN	-	movntpd	NONE	movntpddq	0x2B	CPU_SSE2
-INSN	-	movntdq	NONE	movntpddq	0xE7	CPU_SSE2
-# movsd is in string instructions above
-INSN	-	movupd	NONE	movaupd	0x10		CPU_SSE2
-INSN	-	mulpd	NONE	ssess	0x6659		CPU_SSE2
-INSN	-	mulsd	NONE	ssess	0xF259		CPU_SSE2
-INSN	-	orpd	NONE	ssess	0x6656		CPU_SSE2
-INSN	-	shufpd	NONE	ssessimm	0x66C6	CPU_SSE2
-INSN	-	sqrtpd	NONE	ssess	0x6651		CPU_SSE2
-INSN	-	sqrtsd	NONE	ssess	0xF251		CPU_SSE2
-INSN	-	subpd	NONE	ssess	0x665C		CPU_SSE2
-INSN	-	subsd	NONE	ssess	0xF25C		CPU_SSE2
-INSN	-	ucomisd	NONE	ssess	0x662E		CPU_SSE2
-INSN	-	unpckhpd NONE	ssess	0x6615		CPU_SSE2
-INSN	-	unpcklpd NONE	ssess	0x6614		CPU_SSE2
-INSN	-	xorpd	NONE	ssess	0x6657		CPU_SSE2
-INSN	-	cvtdq2pd NONE	cvt_xmm_xmm64_ss 0xF3E6	CPU_SSE2
-INSN	-	cvtpd2dq NONE	ssess	0xF2E6		CPU_SSE2
-INSN	-	cvtdq2ps NONE	sseps	0x5B		CPU_SSE2
-INSN	-	cvtpd2pi NONE	cvt_mm_xmm	0x662D	CPU_SSE2
-INSN	-	cvtpd2ps NONE	ssess	0x665A		CPU_SSE2
-INSN	-	cvtps2pd NONE	cvt_xmm_xmm64_ps 0x5A	CPU_SSE2
-INSN	-	cvtps2dq NONE	ssess	0x665B		CPU_SSE2
-INSN	-	cvtsd2si "lq"	cvt_rx_xmm64	0xF22D	CPU_SSE2
-INSN	-	cvtsd2ss NONE	cvt_xmm_xmm64_ss 0xF25A	CPU_SSE2
-# P4 VMX Instructions
-INSN	-	vmcall	NONE	threebyte   0x0F01C1	CPU_P4
-INSN	-	vmlaunch NONE	threebyte   0x0F01C2	CPU_P4
-INSN	-	vmresume NONE	threebyte   0x0F01C3	CPU_P4
-INSN	-	vmxoff	NONE	threebyte   0x0F01C4	CPU_P4
-INSN	-	vmread	"lq"	vmxmemrd    0x0F78	CPU_P4
-INSN	-	vmwrite	"lq"	vmxmemwr    0x0F79	CPU_P4
-INSN	-	vmptrld	NONE	vmxtwobytemem	0x06C7	CPU_P4
-INSN	-	vmptrst	NONE	vmxtwobytemem	0x07C7	CPU_P4
-INSN	-	vmclear	NONE	vmxthreebytemem	0x0666C7 CPU_P4
-INSN	-	vmxon	NONE	vmxthreebytemem	0x06F3C7 CPU_P4
-INSN	-	cvtss2sd NONE	cvt_xmm_xmm32	0xF35A	CPU_SSE2
-INSN	-	cvttpd2pi NONE	cvt_mm_xmm	0x662C	CPU_SSE2
-INSN	-	cvttsd2si "lq"	cvt_rx_xmm64	0xF22C	CPU_SSE2
-INSN	-	cvttpd2dq NONE	ssess	0x66E6		CPU_SSE2
-INSN	-	cvttps2dq NONE	ssess	0xF35B		CPU_SSE2
-INSN	-	maskmovdqu NONE	maskmovdqu	0	CPU_SSE2
-INSN	-	movdqa	NONE	movdqau	0x66		CPU_SSE2
-INSN	-	movdqu	NONE	movdqau	0xF3		CPU_SSE2
-INSN	-	movdq2q	NONE	movdq2q	0		CPU_SSE2
-INSN	-	movq2dq	NONE	movq2dq	0		CPU_SSE2
-INSN	-	pmuludq	NONE	mmxsse2	0xF4		CPU_SSE2
-INSN	-	pshufd	NONE	ssessimm	0x6670	CPU_SSE2
-INSN	-	pshufhw	NONE	ssessimm	0xF370	CPU_SSE2
-INSN	-	pshuflw	NONE	ssessimm	0xF270	CPU_SSE2
-INSN	-	pslldq	NONE	pslrldq	0x07		CPU_SSE2
-INSN	-	psrldq	NONE	pslrldq	0x03		CPU_SSE2
-INSN	-	punpckhqdq NONE	ssess	0x666D		CPU_SSE2
-INSN	-	punpcklqdq NONE	ssess	0x666C		CPU_SSE2
-# SSE3 / PNI Prescott New Instructions instructions
-INSN	-	addsubpd NONE	ssess	0x66D0		CPU_SSE3
-INSN	-	addsubps NONE	ssess	0xF2D0		CPU_SSE3
-INSN	-	fisttp	"lqs"	fildstp	0x010001	CPU_SSE3
-INSN	gas	fisttpll SUF_Q	fildstp	0x07		CPU_FPU
-INSN	-	haddpd	NONE	ssess	0x667C		CPU_SSE3
-INSN	-	haddps	NONE	ssess	0xF27C		CPU_SSE3
-INSN	-	hsubpd	NONE	ssess	0x667D		CPU_SSE3
-INSN	-	hsubps	NONE	ssess	0xF27D		CPU_SSE3
-INSN	-	lddqu	NONE	lddqu	0		CPU_SSE3
-INSN	-	monitor	NONE	threebyte   0x0F01C8	CPU_SSE3
-INSN	-	movddup	NONE	cvt_xmm_xmm64_ss 0xF212	CPU_SSE3
-INSN	-	movshdup NONE	ssess	0xF316		CPU_SSE3
-INSN	-	movsldup NONE	ssess	0xF312		CPU_SSE3
-INSN	-	mwait	NONE	threebyte   0x0F01C9	CPU_SSE3
-# SSSE3 / TNI Tejas New Intructions instructions
-INSN	-	pshufb	NONE	ssse3		0x00	CPU_SSSE3
-INSN	-	phaddw	NONE	ssse3		0x01	CPU_SSSE3
-INSN	-	phaddd	NONE	ssse3		0x02	CPU_SSSE3
-INSN	-	phaddsw	NONE	ssse3		0x03	CPU_SSSE3
-INSN	-	pmaddubsw	NONE	ssse3	0x04	CPU_SSSE3
-INSN	-	phsubw	NONE	ssse3		0x05	CPU_SSSE3
-INSN	-	phsubd	NONE	ssse3		0x06	CPU_SSSE3
-INSN	-	phsubsw	NONE	ssse3		0x07	CPU_SSSE3
-INSN	-	psignb	NONE	ssse3		0x08	CPU_SSSE3
-INSN	-	psignw	NONE	ssse3		0x09	CPU_SSSE3
-INSN	-	psignd	NONE	ssse3		0x0A	CPU_SSSE3
-INSN	-	pmulhrsw	NONE	ssse3	0x0B	CPU_SSSE3
-INSN	-	pabsb	NONE	ssse3		0x1C	CPU_SSSE3
-INSN	-	pabsw	NONE	ssse3		0x1D	CPU_SSSE3
-INSN	-	pabsd	NONE	ssse3		0x1E	CPU_SSSE3
-INSN	-	palignr	NONE	ssse3imm	0x0F	CPU_SSSE3
-# SSE4.1 / SSE4.2 instructions
-INSN	-	blendpd	NONE	sse4imm		0x0D	CPU_SSE41
-INSN	-	blendps	NONE	sse4imm		0x0C	CPU_SSE41
-INSN	-	blendvpd NONE	sse4xmm0	0x15	CPU_SSE41
-INSN	-	blendvps NONE	sse4xmm0	0x14	CPU_SSE41
-INSN	-	crc32	"bwlq"	crc32		0	CPU_SSE42
-INSN	-	dppd	NONE	sse4imm		0x41	CPU_SSE41
-INSN	-	dpps	NONE	sse4imm		0x40	CPU_SSE41
-INSN	-	extractps NONE	extractps	0	CPU_SSE41
-INSN	-	insertps NONE	insertps	0	CPU_SSE41
-INSN	-	movntdqa NONE	movntdqa	0	CPU_SSE41
-INSN	-	mpsadbw	NONE	sse4imm		0x42	CPU_SSE41
-INSN	-	packusdw NONE	sse4		0x2B	CPU_SSE41
-INSN	-	pblendvb NONE	sse4xmm0	0x10	CPU_SSE41
-INSN	-	pblendw	NONE	sse4imm		0x0E	CPU_SSE41
-INSN	-	pcmpeqq	NONE	sse4		0x29	CPU_SSE41
-INSN	-	pcmpestri "wlq"	sse4pcmpstr	0x61	CPU_SSE42
-INSN	-	pcmpestrm "wlq"	sse4pcmpstr	0x60	CPU_SSE42
-INSN	-	pcmpistri "wlq"	sse4pcmpstr	0x63	CPU_SSE42
-INSN	-	pcmpistrm "wlq"	sse4pcmpstr	0x62	CPU_SSE42
-INSN	-	pcmpgtq	NONE	sse4		0x37	CPU_SSE42
-INSN	-	pextrb	NONE	pextrb		0	CPU_SSE41
-INSN	-	pextrd	NONE	pextrd		0	CPU_SSE41
-INSN	-	pextrq	NONE	pextrq		0	CPU_SSE41
-#INSN	-	pextrw	NONE	pextrw		0	CPU_SSE41
-INSN	-	phminposuw NONE	sse4		0x41	CPU_SSE41
-INSN	-	pinsrb	NONE	pinsrb		0	CPU_SSE41
-INSN	-	pinsrd	NONE	pinsrd		0	CPU_SSE41
-INSN	-	pinsrq	NONE	pinsrq		0	CPU_SSE41
-INSN	-	pmaxsb	NONE	sse4		0x3C	CPU_SSE41
-INSN	-	pmaxsd	NONE	sse4		0x3D	CPU_SSE41
-INSN	-	pmaxud	NONE	sse4		0x3F	CPU_SSE41
-INSN	-	pmaxuw	NONE	sse4		0x3E	CPU_SSE41
-INSN	-	pminsb	NONE	sse4		0x38	CPU_SSE41
-INSN	-	pminsd	NONE	sse4		0x39	CPU_SSE41
-INSN	-	pminud	NONE	sse4		0x3B	CPU_SSE41
-INSN	-	pminuw	NONE	sse4		0x3A	CPU_SSE41
-INSN	-	pmovsxbw NONE	sse4m64		0x20	CPU_SSE41
-INSN	-	pmovsxbd NONE	sse4m32		0x21	CPU_SSE41
-INSN	-	pmovsxbq NONE	sse4m16		0x22	CPU_SSE41
-INSN	-	pmovsxwd NONE	sse4m64		0x23	CPU_SSE41
-INSN	-	pmovsxwq NONE	sse4m32		0x24	CPU_SSE41
-INSN	-	pmovsxdq NONE	sse4m64		0x25	CPU_SSE41
-INSN	-	pmovzxbw NONE	sse4m64		0x30	CPU_SSE41
-INSN	-	pmovzxbd NONE	sse4m32		0x31	CPU_SSE41
-INSN	-	pmovzxbq NONE	sse4m16		0x32	CPU_SSE41
-INSN	-	pmovzxwd NONE	sse4m64		0x33	CPU_SSE41
-INSN	-	pmovzxwq NONE	sse4m32		0x34	CPU_SSE41
-INSN	-	pmovzxdq NONE	sse4m64		0x35	CPU_SSE41
-INSN	-	pmuldq	NONE	sse4		0x28	CPU_SSE41
-INSN	-	pmulld	NONE	sse4		0x40	CPU_SSE41
-INSN	-	popcnt	"wlq"	cnt		0xB8	CPU_SSE42
-INSN	-	ptest	NONE	sse4		0x17	CPU_SSE41
-INSN	-	roundpd	NONE	sse4imm		0x09	CPU_SSE41
-INSN	-	roundps	NONE	sse4imm		0x08	CPU_SSE41
-INSN	-	roundsd	NONE	sse4imm		0x0B	CPU_SSE41
-INSN	-	roundss	NONE	sse4imm		0x0A	CPU_SSE41
-# AMD SSE4.1 instructions
-INSN	-	extrq	NONE	extrq		0	CPU_SSE41
-INSN	-	insertq	NONE	insertq		0	CPU_SSE41
-INSN	-	movntsd	NONE	movntsd		0	CPU_SSE41
-INSN	-	movntss	NONE	movntss		0	CPU_SSE41
-# AMD 3DNow! instructions
-INSN	-	prefetch NONE	twobytemem  0x000F0D	CPU_3DNow
-INSN	-	prefetchw NONE	twobytemem  0x010F0D	CPU_3DNow
-INSN	-	femms	NONE	twobyte	0x0F0E		CPU_3DNow
-INSN	-	pavgusb	NONE	now3d	0xBF		CPU_3DNow
-INSN	-	pf2id	NONE	now3d	0x1D		CPU_3DNow
-INSN	-	pf2iw	NONE	now3d	0x1C		CPU_Athlon|CPU_3DNow
-INSN	-	pfacc	NONE	now3d	0xAE		CPU_3DNow
-INSN	-	pfadd	NONE	now3d	0x9E		CPU_3DNow
-INSN	-	pfcmpeq	NONE	now3d	0xB0		CPU_3DNow
-INSN	-	pfcmpge NONE	now3d	0x90		CPU_3DNow
-INSN	-	pfcmpgt	NONE	now3d	0xA0		CPU_3DNow
-INSN	-	pfmax	NONE	now3d	0xA4		CPU_3DNow
-INSN	-	pfmin	NONE	now3d	0x94		CPU_3DNow
-INSN	-	pfmul	NONE	now3d	0xB4		CPU_3DNow
-INSN	-	pfnacc	NONE	now3d	0x8A		CPU_Athlon|CPU_3DNow
-INSN	-	pfpnacc	NONE	now3d	0x8E		CPU_Athlon|CPU_3DNow
-INSN	-	pfrcp	NONE	now3d	0x96		CPU_3DNow
-INSN	-	pfrcpit1 NONE	now3d	0xA6		CPU_3DNow
-INSN	-	pfrcpit2 NONE	now3d	0xB6		CPU_3DNow
-INSN	-	pfrsqit1 NONE	now3d	0xA7		CPU_3DNow
-INSN	-	pfrsqrt	NONE	now3d	0x97		CPU_3DNow
-INSN	-	pfsub	NONE	now3d	0x9A		CPU_3DNow
-INSN	-	pfsubr	NONE	now3d	0xAA		CPU_3DNow
-INSN	-	pi2fd	NONE	now3d	0x0D		CPU_3DNow
-INSN	-	pi2fw	NONE	now3d	0x0C		CPU_Athlon|CPU_3DNow
-INSN	-	pmulhrwa NONE	now3d	0xB7		CPU_3DNow
-INSN	-	pswapd	NONE	now3d	0xBB		CPU_Athlon|CPU_3DNow
-# AMD extensions
-INSN	-	syscall	NONE	twobyte	0x0F05		CPU_686|CPU_AMD
-INSN	-	sysret	"lq"	twobyte	0x0F07		CPU_686|CPU_AMD|CPU_Priv
-INSN	-	lzcnt	"wlq"	cnt		0xBD	CPU_686|CPU_AMD
-# AMD x86-64 extensions
-INSN	-	swapgs	NONE	threebyte   0x0F01F8	CPU_Hammer|CPU_64
-INSN	-	rdtscp	NONE	threebyte   0x0F01F9	CPU_686|CPU_AMD|CPU_Priv
-INSN	-	cmpxchg16b NONE	cmpxchg16b	0	CPU_Hammer|CPU_64
-# AMD Pacifica SVM instructions
-INSN	-	clgi	NONE	threebyte   0x0F01DD	CPU_SVM
-INSN	-	invlpga	NONE	invlpga	    0		CPU_SVM
-INSN	-	skinit	NONE	skinit	    0		CPU_SVM
-INSN	-	stgi	NONE	threebyte   0x0F01DC	CPU_SVM
-INSN	-	vmload	NONE	svm_rax	    0xDA	CPU_SVM
-INSN	-	vmmcall	NONE	threebyte   0x0F01D9	CPU_SVM
-INSN	-	vmrun	NONE	svm_rax	    0xD8	CPU_SVM
-INSN	-	vmsave	NONE	svm_rax	    0xDB	CPU_SVM
-# VIA PadLock instructions
-INSN	-	xstore	NONE	padlock	0xC000A7   CPU_PadLock
-INSN	-	xstorerng NONE	padlock	0xC000A7   CPU_PadLock
-INSN	-	xcryptecb NONE	padlock	0xC8F3A7   CPU_PadLock
-INSN	-	xcryptcbc NONE	padlock	0xD0F3A7   CPU_PadLock
-INSN	-	xcryptctr NONE	padlock	0xD8F3A7   CPU_PadLock
-INSN	-	xcryptcfb NONE	padlock	0xE0F3A7   CPU_PadLock
-INSN	-	xcryptofb NONE	padlock	0xE8F3A7   CPU_PadLock
-INSN	-	montmul	NONE	padlock	0xC0F3A6   CPU_PadLock
-INSN	-	xsha1	NONE	padlock	0xC8F3A6   CPU_PadLock
-INSN	-	xsha256	NONE	padlock	0xD0F3A6   CPU_PadLock
-# Cyrix MMX instructions
-INSN	-	paddsiw	NONE	cyrixmmx    0x51	CPU_Cyrix|CPU_MMX
-INSN	-	paveb	NONE	cyrixmmx    0x50	CPU_Cyrix|CPU_MMX
-INSN	-	pdistib	NONE	cyrixmmx    0x54	CPU_Cyrix|CPU_MMX
-INSN	-	pmachriw NONE	pmachriw    0		CPU_Cyrix|CPU_MMX
-INSN	-	pmagw	NONE	cyrixmmx    0x52	CPU_Cyrix|CPU_MMX
-INSN	-	pmulhriw NONE	cyrixmmx    0x5D	CPU_Cyrix|CPU_MMX
-INSN	-	pmulhrwc NONE	cyrixmmx    0x59	CPU_Cyrix|CPU_MMX
-INSN	-	pmvgezb	NONE	cyrixmmx    0x5C	CPU_Cyrix|CPU_MMX
-INSN	-	pmvlzb	NONE	cyrixmmx    0x5B	CPU_Cyrix|CPU_MMX
-INSN	-	pmvnzb	NONE	cyrixmmx    0x5A	CPU_Cyrix|CPU_MMX
-INSN	-	pmvzb	NONE	cyrixmmx    0x58	CPU_Cyrix|CPU_MMX
-INSN	-	psubsiw	NONE	cyrixmmx    0x55	CPU_Cyrix|CPU_MMX
-# Cyrix extensions
-INSN	-	rdshr	NONE	rdwrshr	    0x00	CPU_686|CPU_Cyrix|CPU_SMM
-INSN	-	rsdc	NONE	rsdc	    0		CPU_486|CPU_Cyrix|CPU_SMM
-INSN	-	rsldt	NONE	cyrixsmm    0x7B	CPU_486|CPU_Cyrix|CPU_SMM
-INSN	-	rsts	NONE	cyrixsmm    0x7D	CPU_486|CPU_Cyrix|CPU_SMM
-INSN	-	svdc	NONE	svdc	    0		CPU_486|CPU_Cyrix|CPU_SMM
-INSN	-	svldt	NONE	cyrixsmm    0x7A	CPU_486|CPU_Cyrix|CPU_SMM
-INSN	-	svts	NONE	cyrixsmm    0x7C	CPU_486|CPU_Cyrix|CPU_SMM
-INSN	-	smint	NONE	twobyte	    0x0F38	CPU_686|CPU_Cyrix
-INSN	-	smintold NONE	twobyte	    0x0F7E	CPU_486|CPU_Cyrix|CPU_Obs
-INSN	-	wrshr	NONE	rdwrshr	    0x01	CPU_686|CPU_Cyrix|CPU_SMM
-# Obsolete/undocumented instructions
-INSN	-	fsetpm	NONE	twobyte	0xDBE4		CPU_286|CPU_FPU|CPU_Obs
-INSN	-	ibts	NONE	ibts	0		CPU_386|CPU_Undoc|CPU_Obs
-INSN	-	loadall	NONE	twobyte	0x0F07		CPU_386|CPU_Undoc
-INSN	-	loadall286 NONE	twobyte	0x0F05		CPU_286|CPU_Undoc
-INSN	-	salc	NONE	onebyte	0x00D6		CPU_Undoc|CPU_Not64
-INSN	-	smi	NONE	onebyte	0x00F1		CPU_386|CPU_Undoc
-INSN	-	umov	NONE	umov	0		CPU_386|CPU_Undoc
-INSN	-	xbts	NONE	xbts	0		CPU_386|CPU_Undoc|CPU_Obs
-
-
-# DEF_CPU parameters:
-# - CPU name
-# - CPU flags to set
-# DEF_CPU_ALIAS parameters:
-# - CPU alias name
-# - CPU base name
-# DEF_CPU_FEATURE parameters:
-# - CPU feature name
-# - CPU flag to set feature name alone or unset ("no" + feature name)
-
-# The standard CPU names /set/ cpu_enabled.
-CPU		8086	CPU_Priv
-CPU		186	CPU_186|CPU_Priv
-CPU_ALIAS	80186		186
-CPU_ALIAS	i186		186
-CPU		286	CPU_186|CPU_286|CPU_Priv
-CPU_ALIAS	80286		286
-CPU_ALIAS	i286		286
-CPU		386	CPU_186|CPU_286|CPU_386|CPU_SMM|CPU_Prot|CPU_Priv
-CPU_ALIAS	80386		386
-CPU_ALIAS	i386		386
-CPU		486	CPU_186|CPU_286|CPU_386|CPU_486|CPU_FPU|CPU_SMM|\
-			CPU_Prot|CPU_Priv
-CPU_ALIAS	80486		486
-CPU_ALIAS	i486		486
-CPU		586	CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_FPU|\
-			CPU_SMM|CPU_Prot|CPU_Priv
-CPU_ALIAS	i586		586
-CPU_ALIAS	pentium		586
-CPU_ALIAS	p5		586
-CPU		686	CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\
-			CPU_FPU|CPU_SMM|CPU_Prot|CPU_Priv
-CPU_ALIAS	i686		686
-CPU_ALIAS	p6		686
-CPU_ALIAS	ppro		686
-CPU_ALIAS	pentiumpro	686
-CPU		p2	CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\
-			CPU_FPU|CPU_MMX|CPU_SMM|CPU_Prot|CPU_Priv
-CPU_ALIAS	pentium2	p2
-CPU_ALIAS	pentium-2	p2
-CPU_ALIAS	pentiumii	p2
-CPU_ALIAS	pentium-ii	p2
-CPU		p3	CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\
-			CPU_P3|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SMM|CPU_Prot|\
-			CPU_Priv
-CPU_ALIAS	pentium3	p3
-CPU_ALIAS	pentium-3	p3
-CPU_ALIAS	pentiumiii	p3
-CPU_ALIAS	pentium-iii	p3
-CPU_ALIAS	katmai	p3
-CPU		p4	CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\
-			CPU_P3|CPU_P4|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SSE2|\
-			CPU_SMM|CPU_Prot|CPU_Priv
-CPU_ALIAS	pentium4	p4
-CPU_ALIAS	pentium-4	p4
-CPU_ALIAS	pentiumiv	p4
-CPU_ALIAS	pentium-iv	p4
-CPU_ALIAS	williamette	p4
-CPU		ia64	CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\
-			CPU_P3|CPU_P4|CPU_IA64|CPU_FPU|CPU_MMX|CPU_SSE|\
-			CPU_SSE2|CPU_SMM|CPU_Prot|CPU_Priv
-CPU_ALIAS	ia-64		ia64
-CPU_ALIAS	itanium		ia64
-CPU		k6	CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\
-			CPU_K6|CPU_FPU|CPU_MMX|CPU_3DNow|CPU_SMM|CPU_Prot|\
-			CPU_Priv
-CPU		k7	CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\
-			CPU_K6|CPU_Athlon|CPU_FPU|CPU_MMX|CPU_SSE|CPU_3DNow|\
-			CPU_SMM|CPU_Prot|CPU_Priv
-CPU_ALIAS	athlon		k7
-CPU		hammer	CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\
-			CPU_K6|CPU_Athlon|CPU_Hammer|CPU_FPU|CPU_MMX|\
-			CPU_SSE|CPU_SSE2|CPU_3DNow|CPU_SMM|CPU_Prot|\
-			CPU_Priv
-CPU_ALIAS	sledgehammer	hammer
-CPU_ALIAS	opteron		hammer
-CPU_ALIAS	athlon64	hammer
-CPU_ALIAS	athlon-64	hammer
-CPU		prescott CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\
-			CPU_Hammer|CPU_EM64T|CPU_FPU|CPU_MMX|\
-			CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SMM|\
-			CPU_Prot|CPU_Priv
-CPU		conroe	CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\
-			CPU_Hammer|CPU_EM64T|CPU_FPU|CPU_MMX|\
-			CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SMM|\
-			CPU_Prot|CPU_Priv
-CPU		penryn	CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\
-			CPU_Hammer|CPU_EM64T|CPU_FPU|CPU_MMX|\
-			CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE41|CPU_SMM|\
-			CPU_Prot|CPU_Priv
-CPU		nehalem	CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|\
-			CPU_Hammer|CPU_EM64T|CPU_FPU|CPU_MMX|\
-			CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE41|\
-			CPU_SSE42|CPU_SMM|CPU_Prot|CPU_Priv
-
-# Features have "no" versions to disable them, and only set/reset the
-# specific feature being changed.  All other bits are left alone.
-
-CPU_FEATURE	fpu	CPU_FPU
-CPU_FEATURE	mmx	CPU_MMX
-CPU_FEATURE	sse	CPU_SSE
-CPU_FEATURE	sse2	CPU_SSE2
-CPU_FEATURE	sse3	CPU_SSE3
-#CPU_FEATURE	pni	CPU_PNI
-CPU_FEATURE	3dnow	CPU_3DNow
-CPU_FEATURE	cyrix	CPU_Cyrix
-CPU_FEATURE	amd	CPU_AMD
-CPU_FEATURE	smm	CPU_SMM
-CPU_FEATURE	prot	CPU_Prot
-CPU_FEATURE	protected	CPU_Prot
-CPU_FEATURE	undoc	CPU_Undoc
-CPU_FEATURE	undocumented	CPU_Undoc
-CPU_FEATURE	obs	CPU_Obs
-CPU_FEATURE	obsolete	CPU_Obs
-CPU_FEATURE	priv	CPU_Priv
-CPU_FEATURE	privileged	CPU_Priv
-CPU_FEATURE	svm	CPU_SVM
-CPU_FEATURE	padlock	CPU_PadLock
-CPU_FEATURE	em64t	CPU_EM64T
-CPU_FEATURE	ssse3	CPU_SSSE3
-CPU_FEATURE	sse4.1	CPU_SSE41
-CPU_FEATURE	sse4.2	CPU_SSE42
-CPU_FEATURE	sse4	CPU_SSE4
-
-
-# TARGETMOD parameters:
-# - target modifier name
-# - modifier to return
-
-TARGETMOD	near	X86_NEAR
-TARGETMOD	short	X86_SHORT
-TARGETMOD	far	X86_FAR
-TARGETMOD	to	X86_TO
-
-
-# PREFIX parameters:
-# - parser
-# - prefix name
-# - prefix type
-# - prefix value
-
-# operand size overrides
-PREFIX	nasm	o16	X86_OPERSIZE	16
-PREFIX	gas	data16	X86_OPERSIZE	16
-PREFIX	gas	word	X86_OPERSIZE	16
-PREFIX	nasm	o32	X86_OPERSIZE	32
-PREFIX	gas	data32	X86_OPERSIZE	32
-PREFIX	gas	dword	X86_OPERSIZE	32
-PREFIX	nasm	o64	X86_OPERSIZE	64
-PREFIX	gas	data64	X86_OPERSIZE	64
-PREFIX	gas	qword	X86_OPERSIZE	64
-
-# address size overrides
-PREFIX	nasm	a16	X86_ADDRSIZE	16
-PREFIX	gas	addr16	X86_ADDRSIZE	16
-PREFIX	gas	aword	X86_ADDRSIZE	16
-PREFIX	nasm	a32	X86_ADDRSIZE	32
-PREFIX	gas	addr32	X86_ADDRSIZE	32
-PREFIX	gas	adword	X86_ADDRSIZE	32
-PREFIX	nasm	a64	X86_ADDRSIZE	64
-PREFIX	gas	addr64	X86_ADDRSIZE	64
-PREFIX	gas	aqword	X86_ADDRSIZE	64
-
-# instruction prefixes
-PREFIX	-	lock	X86_LOCKREP	0xF0
-PREFIX	-	repne	X86_LOCKREP	0xF2
-PREFIX	-	repnz	X86_LOCKREP	0xF2
-PREFIX	-	rep	X86_LOCKREP	0xF3
-PREFIX	-	repe	X86_LOCKREP	0xF3
-PREFIX	-	repz	X86_LOCKREP	0xF3
-
-# other prefixes, limited to GAS-only at the moment
-# Hint taken/not taken for jumps
-PREFIX	gas	ht	X86_SEGREG	0x3E
-PREFIX	gas	hnt	X86_SEGREG	0x2E
-
-# REX byte explicit prefixes
-PREFIX	gas	rex	X86_REX		0x40
-PREFIX	gas	rexz	X86_REX		0x41
-PREFIX	gas	rexy	X86_REX		0x42
-PREFIX	gas	rexyz	X86_REX		0x43
-PREFIX	gas	rexx	X86_REX		0x44
-PREFIX	gas	rexxz	X86_REX		0x45
-PREFIX	gas	rexxy	X86_REX		0x46
-PREFIX	gas	rexxyz	X86_REX		0x47
-PREFIX	gas	rex64	X86_REX		0x48
-PREFIX	gas	rex64z	X86_REX		0x49
-PREFIX	gas	rex64y	X86_REX		0x4A
-PREFIX	gas	rex64yz	X86_REX		0x4B
-PREFIX	gas	rex64x	X86_REX		0x4C
-PREFIX	gas	rex64xz	X86_REX		0x4D
-PREFIX	gas	rex64xy	X86_REX		0x4E
-PREFIX	gas	rex64xyz X86_REX	0x4F
-
-
-# REG parameters:
-# - register name
-# - register type
-# - register index
-# - required BITS setting (0 for any)
-#
-# REGGROUP parameters:
-# - register group name
-# - register group type
-#
-# SEGREG parameters:
-# - segment register name
-# - prefix encoding
-# - register encoding
-# - BITS in which the segment is ignored
-
-# control, debug, and test registers
-REG	cr0	X86_CRREG	0	0
-REG	cr2	X86_CRREG	2	0
-REG	cr3	X86_CRREG	3	0
-REG	cr4	X86_CRREG	4	0
-REG	cr8	X86_CRREG	8	64
-
-REG	dr0	X86_DRREG	0	0
-REG	dr1	X86_DRREG	1	0
-REG	dr2	X86_DRREG	2	0
-REG	dr3	X86_DRREG	3	0
-REG	dr4	X86_DRREG	4	0
-REG	dr5	X86_DRREG	5	0
-REG	dr6	X86_DRREG	6	0
-REG	dr7	X86_DRREG	7	0
-
-REG	tr0	X86_TRREG	0	0
-REG	tr1	X86_TRREG	1	0
-REG	tr2	X86_TRREG	2	0
-REG	tr3	X86_TRREG	3	0
-REG	tr4	X86_TRREG	4	0
-REG	tr5	X86_TRREG	5	0
-REG	tr6	X86_TRREG	6	0
-REG	tr7	X86_TRREG	7	0
-
-# floating point, MMX, and SSE/SSE2 registers
-REG	st0	X86_FPUREG	0	0
-REG	st1	X86_FPUREG	1	0
-REG	st2	X86_FPUREG	2	0
-REG	st3	X86_FPUREG	3	0
-REG	st4	X86_FPUREG	4	0
-REG	st5	X86_FPUREG	5	0
-REG	st6	X86_FPUREG	6	0
-REG	st7	X86_FPUREG	7	0
-
-REG	mm0	X86_MMXREG	0	0
-REG	mm1	X86_MMXREG	1	0
-REG	mm2	X86_MMXREG	2	0
-REG	mm3	X86_MMXREG	3	0
-REG	mm4	X86_MMXREG	4	0
-REG	mm5	X86_MMXREG	5	0
-REG	mm6	X86_MMXREG	6	0
-REG	mm7	X86_MMXREG	7	0
-
-REG	xmm0	X86_XMMREG	0	0
-REG	xmm1	X86_XMMREG	1	0
-REG	xmm2	X86_XMMREG	2	0
-REG	xmm3	X86_XMMREG	3	0
-REG	xmm4	X86_XMMREG	4	0
-REG	xmm5	X86_XMMREG	5	0
-REG	xmm6	X86_XMMREG	6	0
-REG	xmm7	X86_XMMREG	7	0
-REG	xmm8	X86_XMMREG	8	64
-REG	xmm9	X86_XMMREG	9	64
-REG	xmm10	X86_XMMREG	10	64
-REG	xmm11	X86_XMMREG	11	64
-REG	xmm12	X86_XMMREG	12	64
-REG	xmm13	X86_XMMREG	13	64
-REG	xmm14	X86_XMMREG	14	64
-REG	xmm15	X86_XMMREG	15	64
-
-# integer registers
-REG	rax	X86_REG64	0	64
-REG	rcx	X86_REG64	1	64
-REG	rdx	X86_REG64	2	64
-REG	rbx	X86_REG64	3	64
-REG	rsp	X86_REG64	4	64
-REG	rbp	X86_REG64	5	64
-REG	rsi	X86_REG64	6	64
-REG	rdi	X86_REG64	7	64
-REG	r8	X86_REG64	8	64
-REG	r9	X86_REG64	9	64
-REG	r10	X86_REG64	10	64
-REG	r11	X86_REG64	11	64
-REG	r12	X86_REG64	12	64
-REG	r13	X86_REG64	13	64
-REG	r14	X86_REG64	14	64
-REG	r15	X86_REG64	15	64
-
-REG	eax	X86_REG32	0	0
-REG	ecx	X86_REG32	1	0
-REG	edx	X86_REG32	2	0
-REG	ebx	X86_REG32	3	0
-REG	esp	X86_REG32	4	0
-REG	ebp	X86_REG32	5	0
-REG	esi	X86_REG32	6	0
-REG	edi	X86_REG32	7	0
-REG	r8d	X86_REG32	8	64
-REG	r9d	X86_REG32	9	64
-REG	r10d	X86_REG32	10	64
-REG	r11d	X86_REG32	11	64
-REG	r12d	X86_REG32	12	64
-REG	r13d	X86_REG32	13	64
-REG	r14d	X86_REG32	14	64
-REG	r15d	X86_REG32	15	64
-
-REG	ax	X86_REG16	0	0
-REG	cx	X86_REG16	1	0
-REG	dx	X86_REG16	2	0
-REG	bx	X86_REG16	3	0
-REG	sp	X86_REG16	4	0
-REG	bp	X86_REG16	5	0
-REG	si	X86_REG16	6	0
-REG	di	X86_REG16	7	0
-REG	r8w	X86_REG16	8	64
-REG	r9w	X86_REG16	9	64
-REG	r10w	X86_REG16	10	64
-REG	r11w	X86_REG16	11	64
-REG	r12w	X86_REG16	12	64
-REG	r13w	X86_REG16	13	64
-REG	r14w	X86_REG16	14	64
-REG	r15w	X86_REG16	15	64
-
-REG	al	X86_REG8	0	0
-REG	cl	X86_REG8	1	0
-REG	dl	X86_REG8	2	0
-REG	bl	X86_REG8	3	0
-REG	ah	X86_REG8	4	0
-REG	ch	X86_REG8	5	0
-REG	dh	X86_REG8	6	0
-REG	bh	X86_REG8	7	0
-REG	r8b	X86_REG8	8	64
-REG	r9b	X86_REG8	9	64
-REG	r10b	X86_REG8	10	64
-REG	r11b	X86_REG8	11	64
-REG	r12b	X86_REG8	12	64
-REG	r13b	X86_REG8	13	64
-REG	r14b	X86_REG8	14	64
-REG	r15b	X86_REG8	15	64
-
-REG	spl	X86_REG8X	4	64
-REG	bpl	X86_REG8X	5	64
-REG	sil	X86_REG8X	6	64
-REG	dil	X86_REG8X	7	64
-
-REG	rip	X86_RIP		0	64
-
-# floating point, MMX, and SSE/SSE2 registers
-REGGROUP	st	X86_FPUREG
-REGGROUP	mm	X86_MMXREG
-REGGROUP	xmm	X86_XMMREG
-
-# segment registers
-SEGREG	es	0x26	0x00	64
-SEGREG	cs	0x2e	0x01	0
-SEGREG	ss	0x36	0x02	64
-SEGREG	ds	0x3e	0x03	64
-SEGREG	fs	0x64	0x04	0
-SEGREG	gs	0x65	0x05	0
-
diff --git a/modules/arch/x86/x86regtmod.gperf b/modules/arch/x86/x86regtmod.gperf
new file mode 100644
index 0000000..a576343
--- /dev/null
+++ b/modules/arch/x86/x86regtmod.gperf
@@ -0,0 +1,280 @@
+#
+# x86 register and target modifier recognition
+#
+#  Copyright (C) 2002-2007  Peter Johnson
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+%{
+#include <util.h>
+RCSID("$Id$");
+
+#include <ctype.h>
+#include <libyasm.h>
+#include <libyasm/phash.h>
+
+#include "modules/arch/x86/x86arch.h"
+
+enum regtmod_type {
+    REG = 1,
+    REGGROUP,
+    SEGREG,
+    TARGETMOD
+};
+%}
+%ignore-case
+%language=ANSI-C
+%compare-strncmp
+%readonly-tables
+%enum
+%struct-type
+%define hash-function-name regtmod_hash
+%define lookup-function-name regtmod_find
+struct regtmod_parse_data {
+    const char *name;
+    unsigned int type:8;                /* regtmod_type */
+
+    /* REG: register size
+     * SEGREG: prefix encoding
+     * Others: 0
+     */
+    unsigned int size_prefix:8;
+
+    /* REG: register index
+     * REGGROUP: register group type
+     * SEGREG: register encoding
+     * TARGETMOD: target modifier
+     */
+    unsigned int data:8;
+
+    /* REG: required bits setting
+     * SEGREG: BITS in which the segment is ignored
+     * Others: 0
+     */
+    unsigned int bits:8;
+};
+%%
+#
+# control, debug, and test registers
+#
+cr0,	REG,	X86_CRREG,	0,	0
+cr2,	REG,	X86_CRREG,	2,	0
+cr3,	REG,	X86_CRREG,	3,	0
+cr4,	REG,	X86_CRREG,	4,	0
+cr8,	REG,	X86_CRREG,	8,	64
+#
+dr0,	REG,	X86_DRREG,	0,	0
+dr1,	REG,	X86_DRREG,	1,	0
+dr2,	REG,	X86_DRREG,	2,	0
+dr3,	REG,	X86_DRREG,	3,	0
+dr4,	REG,	X86_DRREG,	4,	0
+dr5,	REG,	X86_DRREG,	5,	0
+dr6,	REG,	X86_DRREG,	6,	0
+dr7,	REG,	X86_DRREG,	7,	0
+#
+tr0,	REG,	X86_TRREG,	0,	0
+tr1,	REG,	X86_TRREG,	1,	0
+tr2,	REG,	X86_TRREG,	2,	0
+tr3,	REG,	X86_TRREG,	3,	0
+tr4,	REG,	X86_TRREG,	4,	0
+tr5,	REG,	X86_TRREG,	5,	0
+tr6,	REG,	X86_TRREG,	6,	0
+tr7,	REG,	X86_TRREG,	7,	0
+#
+# floating point, MMX, and SSE/SSE2 registers
+#
+st0,	REG,	X86_FPUREG,	0,	0
+st1,	REG,	X86_FPUREG,	1,	0
+st2,	REG,	X86_FPUREG,	2,	0
+st3,	REG,	X86_FPUREG,	3,	0
+st4,	REG,	X86_FPUREG,	4,	0
+st5,	REG,	X86_FPUREG,	5,	0
+st6,	REG,	X86_FPUREG,	6,	0
+st7,	REG,	X86_FPUREG,	7,	0
+#
+mm0,	REG,	X86_MMXREG,	0,	0
+mm1,	REG,	X86_MMXREG,	1,	0
+mm2,	REG,	X86_MMXREG,	2,	0
+mm3,	REG,	X86_MMXREG,	3,	0
+mm4,	REG,	X86_MMXREG,	4,	0
+mm5,	REG,	X86_MMXREG,	5,	0
+mm6,	REG,	X86_MMXREG,	6,	0
+mm7,	REG,	X86_MMXREG,	7,	0
+#
+xmm0,	REG,	X86_XMMREG,	0,	0
+xmm1,	REG,	X86_XMMREG,	1,	0
+xmm2,	REG,	X86_XMMREG,	2,	0
+xmm3,	REG,	X86_XMMREG,	3,	0
+xmm4,	REG,	X86_XMMREG,	4,	0
+xmm5,	REG,	X86_XMMREG,	5,	0
+xmm6,	REG,	X86_XMMREG,	6,	0
+xmm7,	REG,	X86_XMMREG,	7,	0
+xmm8,	REG,	X86_XMMREG,	8,	64
+xmm9,	REG,	X86_XMMREG,	9,	64
+xmm10,	REG,	X86_XMMREG,	10,	64
+xmm11,	REG,	X86_XMMREG,	11,	64
+xmm12,	REG,	X86_XMMREG,	12,	64
+xmm13,	REG,	X86_XMMREG,	13,	64
+xmm14,	REG,	X86_XMMREG,	14,	64
+xmm15,	REG,	X86_XMMREG,	15,	64
+#
+# integer registers
+#
+rax,	REG,	X86_REG64,	0,	64
+rcx,	REG,	X86_REG64,	1,	64
+rdx,	REG,	X86_REG64,	2,	64
+rbx,	REG,	X86_REG64,	3,	64
+rsp,	REG,	X86_REG64,	4,	64
+rbp,	REG,	X86_REG64,	5,	64
+rsi,	REG,	X86_REG64,	6,	64
+rdi,	REG,	X86_REG64,	7,	64
+r8,	REG,	X86_REG64,	8,	64
+r9,	REG,	X86_REG64,	9,	64
+r10,	REG,	X86_REG64,	10,	64
+r11,	REG,	X86_REG64,	11,	64
+r12,	REG,	X86_REG64,	12,	64
+r13,	REG,	X86_REG64,	13,	64
+r14,	REG,	X86_REG64,	14,	64
+r15,	REG,	X86_REG64,	15,	64
+#
+eax,	REG,	X86_REG32,	0,	0
+ecx,	REG,	X86_REG32,	1,	0
+edx,	REG,	X86_REG32,	2,	0
+ebx,	REG,	X86_REG32,	3,	0
+esp,	REG,	X86_REG32,	4,	0
+ebp,	REG,	X86_REG32,	5,	0
+esi,	REG,	X86_REG32,	6,	0
+edi,	REG,	X86_REG32,	7,	0
+r8d,	REG,	X86_REG32,	8,	64
+r9d,	REG,	X86_REG32,	9,	64
+r10d,	REG,	X86_REG32,	10,	64
+r11d,	REG,	X86_REG32,	11,	64
+r12d,	REG,	X86_REG32,	12,	64
+r13d,	REG,	X86_REG32,	13,	64
+r14d,	REG,	X86_REG32,	14,	64
+r15d,	REG,	X86_REG32,	15,	64
+#
+ax,	REG,	X86_REG16,	0,	0
+cx,	REG,	X86_REG16,	1,	0
+dx,	REG,	X86_REG16,	2,	0
+bx,	REG,	X86_REG16,	3,	0
+sp,	REG,	X86_REG16,	4,	0
+bp,	REG,	X86_REG16,	5,	0
+si,	REG,	X86_REG16,	6,	0
+di,	REG,	X86_REG16,	7,	0
+r8w,	REG,	X86_REG16,	8,	64
+r9w,	REG,	X86_REG16,	9,	64
+r10w,	REG,	X86_REG16,	10,	64
+r11w,	REG,	X86_REG16,	11,	64
+r12w,	REG,	X86_REG16,	12,	64
+r13w,	REG,	X86_REG16,	13,	64
+r14w,	REG,	X86_REG16,	14,	64
+r15w,	REG,	X86_REG16,	15,	64
+#
+al,	REG,	X86_REG8,	0,	0
+cl,	REG,	X86_REG8,	1,	0
+dl,	REG,	X86_REG8,	2,	0
+bl,	REG,	X86_REG8,	3,	0
+ah,	REG,	X86_REG8,	4,	0
+ch,	REG,	X86_REG8,	5,	0
+dh,	REG,	X86_REG8,	6,	0
+bh,	REG,	X86_REG8,	7,	0
+r8b,	REG,	X86_REG8,	8,	64
+r9b,	REG,	X86_REG8,	9,	64
+r10b,	REG,	X86_REG8,	10,	64
+r11b,	REG,	X86_REG8,	11,	64
+r12b,	REG,	X86_REG8,	12,	64
+r13b,	REG,	X86_REG8,	13,	64
+r14b,	REG,	X86_REG8,	14,	64
+r15b,	REG,	X86_REG8,	15,	64
+#
+spl,	REG,	X86_REG8X,	4,	64
+bpl,	REG,	X86_REG8X,	5,	64
+sil,	REG,	X86_REG8X,	6,	64
+dil,	REG,	X86_REG8X,	7,	64
+#
+rip,	REG,	X86_RIP,	0,	64
+#
+# floating point, MMX, and SSE/SSE2 registers
+#
+st,	REGGROUP,	0,	X86_FPUREG,	0
+mm,	REGGROUP,	0,	X86_MMXREG,	0
+xmm,	REGGROUP,	0,	X86_XMMREG,	0
+#
+# segment registers
+#
+es,	SEGREG,	0x26,	0x00,	64
+cs,	SEGREG,	0x2e,	0x01,	0
+ss,	SEGREG,	0x36,	0x02,	64
+ds,	SEGREG,	0x3e,	0x03,	64
+fs,	SEGREG,	0x64,	0x04,	0
+gs,	SEGREG,	0x65,	0x05,	0
+#
+# target modifiers
+#
+near,	TARGETMOD,	0,	X86_NEAR,	0
+short,	TARGETMOD,	0,	X86_SHORT,	0
+far,	TARGETMOD,	0,	X86_FAR,	0
+to,	TARGETMOD,	0,	X86_TO,		0
+%%
+
+yasm_arch_regtmod
+yasm_x86__parse_check_regtmod(yasm_arch *arch, const char *id, size_t id_len,
+                              uintptr_t *data)
+{
+    yasm_arch_x86 *arch_x86 = (yasm_arch_x86 *)arch;
+    /*@null@*/ const struct regtmod_parse_data *pdata;
+    size_t i;
+    static char lcaseid[8];
+    unsigned int bits;
+    yasm_arch_regtmod type;
+
+    if (id_len > 7)
+        return YASM_ARCH_NOTREGTMOD;
+    for (i=0; i<id_len; i++)
+        lcaseid[i] = tolower(id[i]);
+    lcaseid[id_len] = '\0';
+
+    pdata = regtmod_find(lcaseid, id_len);
+    if (!pdata)
+        return YASM_ARCH_NOTREGTMOD;
+
+    type = (yasm_arch_regtmod)pdata->type;
+    bits = pdata->bits;
+
+    if (type == YASM_ARCH_REG && bits != 0 && arch_x86->mode_bits != bits) {
+        yasm_warn_set(YASM_WARN_GENERAL,
+                      N_("`%s' is a register in %u-bit mode"), id, bits);
+        return YASM_ARCH_NOTREGTMOD;
+    }
+
+    if (type == YASM_ARCH_SEGREG && bits != 0 && arch_x86->mode_bits == bits) {
+        yasm_warn_set(YASM_WARN_GENERAL,
+                      N_("`%s' segment register ignored in %u-bit mode"), id,
+                      bits);
+    }
+
+    if (type == YASM_ARCH_SEGREG)
+        *data = (pdata->size_prefix<<8) | pdata->data;
+    else
+        *data = pdata->size_prefix | pdata->data;
+    return type;
+}
diff --git a/modules/objfmts/bin/tests/float-err.asm b/modules/objfmts/bin/tests/float-err.asm
index 56a766c..7c608ba 100644
--- a/modules/objfmts/bin/tests/float-err.asm
+++ b/modules/objfmts/bin/tests/float-err.asm
@@ -1,6 +1,6 @@
 ; Tests illegal float handling
 db 1.2
-dw 3.14
+dw 3.14e500
 dd 5.12e100000
 dq 3.141592653589793e-158105
 dt 5653894745.318293470142875104710284019245e-1999
diff --git a/modules/objfmts/bin/tests/float-err.errwarn b/modules/objfmts/bin/tests/float-err.errwarn
index 3efe445..33fcebe 100644
--- a/modules/objfmts/bin/tests/float-err.errwarn
+++ b/modules/objfmts/bin/tests/float-err.errwarn
@@ -1,8 +1,8 @@
 -:2: invalid floating point constant size
--:3: invalid floating point constant size
+-:3: warning: overflow in floating point expression
 -:4: warning: overflow in floating point expression
 -:5: warning: underflow in floating point expression
 -:8: invalid floating point constant size
--:9: invalid floating point constant size
+-:9: warning: overflow in floating point expression
 -:11: warning: overflow in floating point expression
 -:12: warning: underflow in floating point expression
diff --git a/modules/objfmts/bin/tests/float.asm b/modules/objfmts/bin/tests/float.asm
index a2823f3..32d2777 100644
--- a/modules/objfmts/bin/tests/float.asm
+++ b/modules/objfmts/bin/tests/float.asm
@@ -1,8 +1,10 @@
 ; Tests float handling
+dw 3.14
 dd 5.12
 dq 3.141592653589793
 dt 5653894745.318293470142875104710284019245e335
 
+dw -62000.0
 dd -47102940.467103581
 dq -45102571092751092341095.5827509174509178450917845019
 dt -1.e-1000
diff --git a/modules/objfmts/bin/tests/float.hex b/modules/objfmts/bin/tests/float.hex
index 066fa76..33b0d97 100644
--- a/modules/objfmts/bin/tests/float.hex
+++ b/modules/objfmts/bin/tests/float.hex
@@ -1,3 +1,5 @@
+48 
+42 
 0a 
 d7 
 a3 
@@ -20,6 +22,8 @@
 97 
 78 
 44 
+92 
+fb 
 f7 
 ae 
 33 
diff --git a/modules/objfmts/elf/elf-machine.h b/modules/objfmts/elf/elf-machine.h
index 562e098..f2bb8db 100644
--- a/modules/objfmts/elf/elf-machine.h
+++ b/modules/objfmts/elf/elf-machine.h
@@ -69,6 +69,11 @@
                                     unsigned long secthead_count,
                                     elf_section_index shstrtab_index);
 
+enum {
+    ELF_SSYM_SYM_RELATIVE = 1 << 0,
+    ELF_SSYM_CURPOS_ADJUST = 1 << 1
+};
+
 typedef struct {
     const char *name;       /* should be something like ..name */
     const int sym_rel;      /* symbol or section-relative? */
diff --git a/modules/objfmts/elf/elf-objfmt.c b/modules/objfmts/elf/elf-objfmt.c
index 664d135..5ec17b7 100644
--- a/modules/objfmts/elf/elf-objfmt.c
+++ b/modules/objfmts/elf/elf-objfmt.c
@@ -514,6 +514,8 @@
             wrt = NULL;
         else if (wrt && elf_is_wrt_sym_relative(wrt))
             ;
+        else if (wrt && elf_is_wrt_pos_adjusted(wrt))
+            intn_val = offset + bc->offset;
         else if (vis == YASM_SYM_LOCAL) {
             yasm_bytecode *sym_precbc;
             /* Local symbols need relocation to their section's start, and
diff --git a/modules/objfmts/elf/elf-x86-amd64.c b/modules/objfmts/elf/elf-x86-amd64.c
index ce0cdda..4bc53b2 100644
--- a/modules/objfmts/elf/elf-x86-amd64.c
+++ b/modules/objfmts/elf/elf-x86-amd64.c
@@ -209,8 +209,8 @@
 }
 
 static elf_machine_ssym elf_x86_amd64_ssyms[] = {
-    {"..gotpcrel", 1},
-    {"..got", 1},
+    {"..gotpcrel", ELF_SSYM_SYM_RELATIVE},
+    {"..got", ELF_SSYM_SYM_RELATIVE},
     {"..plt", 0}
 };
 
diff --git a/modules/objfmts/elf/elf-x86-x86.c b/modules/objfmts/elf/elf-x86-x86.c
index 827c861..67a23d0 100644
--- a/modules/objfmts/elf/elf-x86-x86.c
+++ b/modules/objfmts/elf/elf-x86-x86.c
@@ -194,9 +194,9 @@
 }
 
 static elf_machine_ssym elf_x86_x86_ssyms[] = {
-    {"..gotpc", 0},
+    {"..gotpc", ELF_SSYM_CURPOS_ADJUST},
     {"..gotoff", 0},
-    {"..got", 1},
+    {"..got", ELF_SSYM_SYM_RELATIVE},
     {"..plt", 0}
 };
 
diff --git a/modules/objfmts/elf/elf.c b/modules/objfmts/elf/elf.c
index f187b9e..87ee982 100644
--- a/modules/objfmts/elf/elf.c
+++ b/modules/objfmts/elf/elf.c
@@ -99,13 +99,27 @@
 }
 
 /* reloc functions */
+int elf_ssym_has_flag(yasm_symrec *wrt, int flag);
+
 int
 elf_is_wrt_sym_relative(yasm_symrec *wrt)
 {
+    return elf_ssym_has_flag(wrt, ELF_SSYM_SYM_RELATIVE);
+}
+
+int
+elf_is_wrt_pos_adjusted(yasm_symrec *wrt)
+{
+    return elf_ssym_has_flag(wrt, ELF_SSYM_CURPOS_ADJUST);
+}
+
+int
+elf_ssym_has_flag(yasm_symrec *wrt, int flag)
+{
     int i;
     for (i=0; (unsigned int)i<elf_march->num_ssyms; i++) {
         if (elf_ssyms[i] == wrt)
-            return elf_march->ssyms[i].sym_rel;
+            return (elf_march->ssyms[i].sym_rel & flag) != 0;
     }
     return 0;
 }
diff --git a/modules/objfmts/elf/elf.h b/modules/objfmts/elf/elf.h
index b876a75..954e052 100644
--- a/modules/objfmts/elf/elf.h
+++ b/modules/objfmts/elf/elf.h
@@ -412,6 +412,7 @@
 
 /* reloc functions */
 int elf_is_wrt_sym_relative(yasm_symrec *wrt);
+int elf_is_wrt_pos_adjusted(yasm_symrec *wrt);
 elf_reloc_entry *elf_reloc_entry_create(yasm_symrec *sym,
                                         /*@null@*/ yasm_symrec *wrt,
                                         yasm_intnum *addr,
diff --git a/modules/objfmts/elf/tests/elfso.hex b/modules/objfmts/elf/tests/elfso.hex
index 9d99c0f..206cce9 100644
--- a/modules/objfmts/elf/tests/elfso.hex
+++ b/modules/objfmts/elf/tests/elfso.hex
@@ -88,10 +88,10 @@
 5b 
 81 
 c3 
-e9 
-ff 
-ff 
-ff 
+03 
+00 
+00 
+00 
 8b 
 83 
 00 
diff --git a/modules/parsers/nasm/nasm-parse.c b/modules/parsers/nasm/nasm-parse.c
index b449dd1..b3927e0 100644
--- a/modules/parsers/nasm/nasm-parse.c
+++ b/modules/parsers/nasm/nasm-parse.c
@@ -755,8 +755,11 @@
             }
             get_next_token();
             ea = parse_memaddr(parser_nasm);
-            if (ea)
+            if (ea) {
                 yasm_ea_set_segreg(ea, segreg);
+                ea->pc_rel = 0;
+                ea->not_pc_rel = 1;
+            }
             return ea;
         }
         case SIZE_OVERRIDE:
@@ -774,6 +777,22 @@
             if (ea)
                 ea->nosplit = 1;
             return ea;
+        case REL:
+            get_next_token();
+            ea = parse_memaddr(parser_nasm);
+            if (ea) {
+                ea->pc_rel = 1;
+                ea->not_pc_rel = 0;
+            }
+            return ea;
+        case ABS:
+            get_next_token();
+            ea = parse_memaddr(parser_nasm);
+            if (ea) {
+                ea->pc_rel = 0;
+                ea->not_pc_rel = 1;
+            }
+            return ea;
         default:
         {
             yasm_expr *e = parse_expr(parser_nasm, NORM_EXPR);
@@ -1131,15 +1150,21 @@
                                objext_valparams, line))
         ;
     else if (yasm__strcasecmp(name, "absolute") == 0) {
-        vp = yasm_vps_first(valparams);
-        if (parser_nasm->absstart)
-            yasm_expr_destroy(parser_nasm->absstart);
-        if (parser_nasm->abspos)
-            yasm_expr_destroy(parser_nasm->abspos);
-        parser_nasm->absstart = yasm_vp_expr(vp, p_object->symtab, line);
-        parser_nasm->abspos = yasm_expr_copy(parser_nasm->absstart);
-        cursect = NULL;
-        parser_nasm->prev_bc = NULL;
+        if (!valparams) {
+            yasm_error_set(YASM_ERROR_SYNTAX,
+                           N_("directive `%s' requires an argument"),
+                           "absolute");
+        } else {
+            vp = yasm_vps_first(valparams);
+            if (parser_nasm->absstart)
+                yasm_expr_destroy(parser_nasm->absstart);
+            if (parser_nasm->abspos)
+                yasm_expr_destroy(parser_nasm->abspos);
+            parser_nasm->absstart = yasm_vp_expr(vp, p_object->symtab, line);
+            parser_nasm->abspos = yasm_expr_copy(parser_nasm->absstart);
+            cursect = NULL;
+            parser_nasm->prev_bc = NULL;
+        }
     } else if (yasm__strcasecmp(name, "align") == 0) {
         /* Really, we shouldn't end up with an align directive in an absolute
          * section (as it's supposed to be only used for nop fill), but handle
@@ -1166,6 +1191,27 @@
                            N_("directive `%s' requires an argument"), "align");
         } else
             dir_align(p_object, valparams, objext_valparams, line);
+    } else if (yasm__strcasecmp(name, "default") == 0) {
+        if (!valparams)
+            ;
+        else {
+            vp = yasm_vps_first(valparams);
+            while (vp) {
+                const char *id = yasm_vp_id(vp);
+                if (id) {
+                    if (yasm__strcasecmp(id, "rel") == 0)
+                        yasm_arch_set_var(p_object->arch, "default_rel", 1);
+                    else if (yasm__strcasecmp(id, "abs") == 0)
+                        yasm_arch_set_var(p_object->arch, "default_rel", 0);
+                    else
+                        yasm_error_set(YASM_ERROR_SYNTAX,
+                                       N_("unrecognized default `%s'"), id);
+                } else
+                    yasm_error_set(YASM_ERROR_SYNTAX,
+                                   N_("unrecognized default value"));
+                vp = yasm_vps_next(vp);
+            }
+        }
     } else
         yasm_error_set(YASM_ERROR_SYNTAX, N_("unrecognized directive `%s'"),
                        name);
diff --git a/modules/parsers/nasm/nasm-parser.h b/modules/parsers/nasm/nasm-parser.h
index 72e4c6a..2f14522 100644
--- a/modules/parsers/nasm/nasm-parser.h
+++ b/modules/parsers/nasm/nasm-parser.h
@@ -45,6 +45,8 @@
     TIMES,
     SEG,
     WRT,
+    ABS,
+    REL,
     NOSPLIT,
     STRICT,
     INSN,
diff --git a/modules/parsers/nasm/nasm-token.re b/modules/parsers/nasm/nasm-token.re
index de58cf0..cfaad6d 100644
--- a/modules/parsers/nasm/nasm-token.re
+++ b/modules/parsers/nasm/nasm-token.re
@@ -246,6 +246,10 @@
             lvalp->int_info = yasm_arch_wordsize(p_object->arch)*8;
             RETURN(SIZE_OVERRIDE);
         }
+        'oword'        {
+            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*8;
+            RETURN(SIZE_OVERRIDE);
+        }
 
         /* pseudo-instructions */
         'db'            { lvalp->int_info = 8; RETURN(DECLARE_DATA); }
@@ -270,6 +274,10 @@
             lvalp->int_info = yasm_arch_wordsize(p_object->arch)*8;
             RETURN(DECLARE_DATA);
         }
+        'do'           {
+            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*8;
+            RETURN(DECLARE_DATA);
+        }
 
         'resb'          { lvalp->int_info = 8; RETURN(RESERVE_SPACE); }
         'reshw'         {
@@ -293,6 +301,10 @@
             lvalp->int_info = yasm_arch_wordsize(p_object->arch)*8;
             RETURN(RESERVE_SPACE);
         }
+        'reso'         {
+            lvalp->int_info = yasm_arch_wordsize(p_object->arch)*8;
+            RETURN(RESERVE_SPACE);
+        }
 
         'incbin'        { RETURN(INCBIN); }
 
@@ -303,6 +315,9 @@
         'seg'           { RETURN(SEG); }
         'wrt'           { RETURN(WRT); }
 
+        'abs'           { RETURN(ABS); }
+        'rel'           { RETURN(REL); }
+
         'nosplit'       { RETURN(NOSPLIT); }
         'strict'        { RETURN(STRICT); }
 
diff --git a/modules/preprocs/nasm/standard.mac b/modules/preprocs/nasm/standard.mac
index 6d285d9..6efd3c8 100644
--- a/modules/preprocs/nasm/standard.mac
+++ b/modules/preprocs/nasm/standard.mac
@@ -114,6 +114,10 @@
 [cpu %1]
 %endmacro
 
+%imacro default 1+.nolist
+[default %1]
+%endmacro
+
 ; NASM compatibility shim
 %define __OUTPUT_FORMAT__ __YASM_OBJFMT__
 
diff --git a/tools/Makefile.inc b/tools/Makefile.inc
index 56a4697..984578b 100644
--- a/tools/Makefile.inc
+++ b/tools/Makefile.inc
@@ -1,9 +1,9 @@
 # $Id$
 
 EXTRA_DIST += tools/re2c/Makefile.inc
-EXTRA_DIST += tools/gap/Makefile.inc
+EXTRA_DIST += tools/genperf/Makefile.inc
 EXTRA_DIST += tools/python-yasm/Makefile.inc
 
 include tools/re2c/Makefile.inc
-include tools/gap/Makefile.inc
+include tools/genperf/Makefile.inc
 include tools/python-yasm/Makefile.inc
diff --git a/tools/gap/Makefile.inc b/tools/gap/Makefile.inc
deleted file mode 100644
index 4457af7..0000000
--- a/tools/gap/Makefile.inc
+++ /dev/null
@@ -1,34 +0,0 @@
-# $Id$
-
-# These utility programs have to be built for BUILD host in cross-build.
-# This makes things rather non-standard automake
-
-noinst_PROGRAMS += gap
-
-gap_SOURCES =
-EXTRA_DIST += tools/gap/gap.c
-EXTRA_DIST += tools/gap/perfect.c
-EXTRA_DIST += tools/gap/perfect.h
-EXTRA_DIST += tools/gap/standard.h
-gap_LDADD  = gap.$(OBJEXT)
-gap_LDADD += gap-perfect.$(OBJEXT)
-gap_LDADD += gap-phash.$(OBJEXT)
-gap_LDADD += gap-xmalloc.$(OBJEXT)
-gap_LDADD += gap-xstrdup.$(OBJEXT)
-gap_LINK = $(CCLD_FOR_BUILD) -o $@
-
-gap.$(OBJEXT): tools/gap/gap.c
-	$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/gap/gap.c || echo '$(srcdir)/'`tools/gap/gap.c
-
-gap-perfect.$(OBJEXT): tools/gap/perfect.c
-	$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/gap/perfect.c || echo '$(srcdir)/'`tools/gap/perfect.c
-
-gap-phash.$(OBJEXT): libyasm/phash.c
-	$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f libyasm/phash.c || echo '$(srcdir)/'`libyasm/phash.c
-
-gap-xmalloc.$(OBJEXT): libyasm/xmalloc.c
-	$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f libyasm/xmalloc.c || echo '$(srcdir)/'`libyasm/xmalloc.c
-
-gap-xstrdup.$(OBJEXT): libyasm/xstrdup.c
-	$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f libyasm/xstrdup.c || echo '$(srcdir)/'`libyasm/xstrdup.c
-
diff --git a/tools/gap/gap.c b/tools/gap/gap.c
deleted file mode 100644
index 0f135bf..0000000
--- a/tools/gap/gap.c
+++ /dev/null
@@ -1,854 +0,0 @@
-/* $Id$
- *
- * Generate Arch Parser (GAP): generates ARCHparse.c from ARCHparse.gap.
- *
- *  Copyright (C) 2006-2007  Peter Johnson
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-#include <stdio.h>
-#include <ctype.h>
-#include <stdarg.h>
-#include <string.h>
-#include "tools/gap/perfect.h"
-#include "libyasm/compat-queue.h"
-#include "libyasm/coretype.h"
-#include "libyasm/errwarn.h"
-
-typedef STAILQ_HEAD(slist, sval) slist;
-typedef struct sval {
-    STAILQ_ENTRY(sval) link;
-    char *str;
-} sval;
-
-typedef STAILQ_HEAD(dir_list, dir) dir_list;
-typedef struct dir {
-    STAILQ_ENTRY(dir) link;
-    char *name;
-    const char *func;
-    slist args;
-} dir;
-
-typedef STAILQ_HEAD(dir_byp_list, dir_byp) dir_byp_list;
-typedef struct dir_byp {
-    STAILQ_ENTRY(dir_byp) link;
-    /*@null@*/ char *parser;
-    dir_list dirs;
-} dir_byp;
-
-typedef enum {
-    ARCH = 0,
-    PARSERS,
-    INSN,
-    CPU,
-    CPU_ALIAS,
-    CPU_FEATURE,
-    TARGETMOD,
-    PREFIX,
-    REG,
-    REGGROUP,
-    SEGREG,
-    NUM_DIRS
-} dir_type;
-
-typedef struct {
-    void (*parse_insn) (void);  /* arch-specific parse_insn */
-    int multi_parser[NUM_DIRS]; /* whether it has an initial parser field */
-} arch_handler;
-
-static void x86_parse_insn(void);
-static const arch_handler arch_x86 = {
-    x86_parse_insn,
-    {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}
-};
-
-static struct {
-    const char *name;
-    const arch_handler *arch;
-} archs[] = {
-    {"x86", &arch_x86},
-};
-
-static char line[1024];
-static unsigned int cur_line = 0, next_line = 1;
-static int errors = 0;
-static const arch_handler *arch = NULL;
-
-/* Lists of directives, keyed by parser name */
-static dir_byp_list insnprefix_byp;
-static dir_byp_list cpu_byp;
-static dir_byp_list regtmod_byp;
-
-static void
-report_error(const char *fmt, ...)
-{
-    va_list ap;
-
-    fprintf(stderr, "%u: ", cur_line);
-    va_start(ap, fmt);
-    vfprintf(stderr, fmt, ap);
-    va_end(ap);
-    fputc('\n', stderr);
-    errors++;
-}
-
-void
-yasm__fatal(const char *message, ...)
-{
-    abort();
-}
-
-static void
-dup_slist(slist *out, slist *in)
-{
-    sval *sv;
-
-    STAILQ_INIT(out);
-    STAILQ_FOREACH(sv, in, link) {
-        sval *nsv = yasm_xmalloc(sizeof(sval));
-        nsv->str = yasm__xstrdup(sv->str);
-        STAILQ_INSERT_TAIL(out, nsv, link);
-    }
-}
-
-static dir *
-dup_dir(dir *in)
-{
-    dir *out = yasm_xmalloc(sizeof(dir));
-    out->name = yasm__xstrdup(in->name);
-    out->func = in->func;
-    dup_slist(&out->args, &in->args);
-    return out;
-}
-
-static dir_list *
-get_dirs(dir_byp_list *byp, /*@null@*/ const char *parser)
-{
-    dir_list *found = NULL;
-    dir_byp *db;
-
-    if (STAILQ_EMPTY(byp)) {
-        report_error("PARSERS not yet specified");
-        return NULL;
-    }
-
-    STAILQ_FOREACH(db, byp, link) {
-        if ((!parser && !db->parser) ||
-            (parser && db->parser && strcmp(parser, db->parser) == 0)) {
-            found = &db->dirs;
-            break;
-        }
-    }
-
-    return found;
-}
-
-/* Add a keyword/data to a slist of slist keyed by parser name.
- * Returns nonzero on error.
- */
-static int
-add_dir(dir_byp_list *byp, /*@null@*/ const char *parser, dir *d)
-{
-    dir_list *found = get_dirs(byp, parser);
-
-    if (found) {
-        STAILQ_INSERT_TAIL(found, d, link);
-        return 0;
-    } else if (!parser) {
-        /* Add separately to all */
-        dir_byp *db;
-        int first = 1;
-        STAILQ_FOREACH(db, byp, link) {
-            if (!first)
-                d = dup_dir(d);
-            first = 0;
-            STAILQ_INSERT_TAIL(&db->dirs, d, link);
-        }
-        return 0;
-    } else {
-        report_error("parser not found");
-        return 1;
-    }
-}
-
-static char *
-check_parser(dir_type type)
-{
-    char *parser = NULL;
-
-    if (arch->multi_parser[type]) {
-        parser = strtok(NULL, " \t\n");
-        if (strcmp(parser, "-") == 0)
-            parser = NULL;
-    }
-
-    return parser;
-}
-
-static void
-parse_args(slist *args)
-{
-    char *tok;
-    sval *sv;
-
-    STAILQ_INIT(args);
-
-    tok = strtok(NULL, " \t\n");
-    if (!tok) {
-        report_error("no args");
-        return;
-    }
-
-    while (tok) {
-        sv = yasm_xmalloc(sizeof(sval));
-        sv->str = yasm__xstrdup(tok);
-        STAILQ_INSERT_TAIL(args, sv, link);
-        tok = strtok(NULL, " \t\n");
-    }
-}
-
-static dir *
-parse_generic(dir_type type, const char *func, dir_byp_list *byp)
-{
-    char *parser = check_parser(type);
-    char *name = strtok(NULL, " \t\n");
-    dir *d = yasm_xmalloc(sizeof(dir));
-
-    d->name = yasm__xstrdup(name);
-    d->func = func;
-    parse_args(&d->args);
-
-    add_dir(byp, parser, d);
-    return d;
-}
-
-static void
-parse_arch(void)
-{
-    size_t i;
-    int found = 0;
-    char *tok = strtok(NULL, " \t\n");
-
-    if (!tok) {
-        report_error("ARCH requires an operand");
-        return;
-    }
-    for (i=0; i<sizeof(archs)/sizeof(archs[0]); i++) {
-        if (strcmp(archs[i].name, tok) == 0) {
-            found = 1;
-            break;
-        }
-    }
-    if (!found) {
-        report_error("unrecognized ARCH");
-        return;
-    }
-
-    arch = archs[i].arch;
-}
-
-static void
-parse_parsers(void)
-{
-    dir_byp *db;
-    char *tok;
-
-    if (!arch) {
-        report_error("ARCH not specified before PARSERS");
-        return;
-    }
-
-    tok = strtok(NULL, " \t\n");
-    if (!tok) {
-        report_error("no PARSERS parameter");
-        return;
-    }
-
-    while (tok) {
-        /* Insert into each slist of slist if broken out by parser */
-        if (arch->multi_parser[INSN] || arch->multi_parser[PREFIX]) {
-            db = yasm_xmalloc(sizeof(dir_byp));
-            db->parser = yasm__xstrdup(tok);
-            STAILQ_INIT(&db->dirs);
-
-            STAILQ_INSERT_TAIL(&insnprefix_byp, db, link);
-        }
-        if (arch->multi_parser[CPU] || arch->multi_parser[CPU_ALIAS] ||
-            arch->multi_parser[CPU_FEATURE]) {
-            db = yasm_xmalloc(sizeof(dir_byp));
-            db->parser = yasm__xstrdup(tok);
-            STAILQ_INIT(&db->dirs);
-
-            STAILQ_INSERT_TAIL(&cpu_byp, db, link);
-        }
-        if (arch->multi_parser[TARGETMOD] || arch->multi_parser[REG] ||
-            arch->multi_parser[REGGROUP] || arch->multi_parser[SEGREG]) {
-            db = yasm_xmalloc(sizeof(dir_byp));
-            db->parser = yasm__xstrdup(tok);
-            STAILQ_INIT(&db->dirs);
-
-            STAILQ_INSERT_TAIL(&regtmod_byp, db, link);
-        }
-        tok = strtok(NULL, " \t\n");
-    }
-
-    /* Add NULL (global) versions if not already created */
-    if (STAILQ_EMPTY(&insnprefix_byp)) {
-        db = yasm_xmalloc(sizeof(dir_byp));
-        db->parser = NULL;
-        STAILQ_INIT(&db->dirs);
-
-        STAILQ_INSERT_TAIL(&insnprefix_byp, db, link);
-    }
-    if (STAILQ_EMPTY(&cpu_byp)) {
-        db = yasm_xmalloc(sizeof(dir_byp));
-        db->parser = NULL;
-        STAILQ_INIT(&db->dirs);
-
-        STAILQ_INSERT_TAIL(&cpu_byp, db, link);
-    }
-    if (STAILQ_EMPTY(&regtmod_byp)) {
-        db = yasm_xmalloc(sizeof(dir_byp));
-        db->parser = NULL;
-        STAILQ_INIT(&db->dirs);
-
-        STAILQ_INSERT_TAIL(&regtmod_byp, db, link);
-    }
-}
-
-static void
-x86_parse_insn(void)
-{
-    char *parser = check_parser(INSN);
-    char *bname = strtok(NULL, " \t\n");
-    char *suffix = strtok(NULL, " \t\n");
-    dir *d;
-    slist args;
-    sval *sv;
-
-    if (!suffix) {
-        report_error("INSN requires suffix");
-        return;
-    }
-
-    /* save the remainder of args */
-    parse_args(&args);
-
-    if (suffix[0] != '"') {
-        /* Just one instruction to generate */
-        sv = yasm_xmalloc(sizeof(sval));
-        sv->str = yasm__xstrdup(suffix);
-        STAILQ_INSERT_HEAD(&args, sv, link);
-
-        d = yasm_xmalloc(sizeof(dir));
-        d->name = yasm__xstrdup(bname);
-        d->func = "INSN";
-        d->args = args;
-        add_dir(&insnprefix_byp, parser, d);
-    } else {
-        /* Need to generate with suffixes for gas */
-        char *p;
-        char sufstr[6];
-        size_t bnamelen = strlen(bname);
-
-        strcpy(sufstr, "SUF_X");
-
-        for (p = &suffix[1]; *p != '"'; p++) {
-            sufstr[4] = toupper(*p);
-
-            d = yasm_xmalloc(sizeof(dir));
-
-            d->name = yasm_xmalloc(bnamelen+2);
-            strcpy(d->name, bname);
-            d->name[bnamelen] = tolower(*p);
-            d->name[bnamelen+1] = '\0';
-
-            d->func = "INSN";
-            dup_slist(&d->args, &args);
-
-            sv = yasm_xmalloc(sizeof(sval));
-            sv->str = yasm__xstrdup(sufstr);
-            STAILQ_INSERT_HEAD(&d->args, sv, link);
-
-            add_dir(&insnprefix_byp, "gas", d);
-        }
-
-        /* And finally the version sans suffix */
-        sv = yasm_xmalloc(sizeof(sval));
-        sv->str = yasm__xstrdup("NONE");
-        STAILQ_INSERT_HEAD(&args, sv, link);
-
-        d = yasm_xmalloc(sizeof(dir));
-        d->name = yasm__xstrdup(bname);
-        d->func = "INSN";
-        d->args = args;
-        add_dir(&insnprefix_byp, parser, d);
-    }
-}
-
-static void
-parse_insn(void)
-{
-    if (!arch) {
-        report_error("ARCH not defined prior to INSN");
-        return;
-    }
-    arch->parse_insn();
-}
-
-static void
-parse_cpu(void)
-{
-    dir *d = parse_generic(CPU, "CPU", &cpu_byp);
-    sval *sv = yasm_xmalloc(sizeof(sval));
-    sv->str = yasm__xstrdup("CPU_MODE_VERBATIM");
-    STAILQ_INSERT_TAIL(&d->args, sv, link);
-}
-
-static void
-parse_cpu_alias(void)
-{
-    char *parser = check_parser(CPU_ALIAS);
-    char *name = strtok(NULL, " \t\n");
-    char *alias = strtok(NULL, " \t\n");
-    dir_list *dirs = get_dirs(&cpu_byp, parser);
-    dir *aliasd, *d;
-
-    if (!alias) {
-        report_error("CPU_ALIAS requires an operand");
-        return;
-    }
-
-    STAILQ_FOREACH(aliasd, dirs, link) {
-        if (strcmp(aliasd->name, alias) == 0)
-            break;
-    }
-    if (!aliasd) {
-        report_error("could not find `%s'", alias);
-        return;
-    }
-
-    d = yasm_xmalloc(sizeof(dir));
-    d->name = yasm__xstrdup(name);
-    d->func = "CPU";
-    dup_slist(&d->args, &aliasd->args);
-
-    add_dir(&cpu_byp, parser, d);
-}
-
-static void
-parse_cpu_feature(void)
-{
-    char *parser = check_parser(CPU_FEATURE);
-    char *name = strtok(NULL, " \t\n");
-    dir *name_dir = yasm_xmalloc(sizeof(dir));
-    dir *noname_dir = yasm_xmalloc(sizeof(dir));
-    sval *sv;
-
-    name_dir->name = yasm__xstrdup(name);
-    name_dir->func = "CPU_FEATURE";
-    parse_args(&name_dir->args);
-
-    noname_dir->name = yasm_xmalloc(strlen(name)+3);
-    strcpy(noname_dir->name, "no");
-    strcat(noname_dir->name, name);
-    noname_dir->func = name_dir->func;
-    dup_slist(&noname_dir->args, &name_dir->args);
-
-    sv = yasm_xmalloc(sizeof(sval));
-    sv->str = yasm__xstrdup("CPU_MODE_SET");
-    STAILQ_INSERT_TAIL(&name_dir->args, sv, link);
-
-    sv = yasm_xmalloc(sizeof(sval));
-    sv->str = yasm__xstrdup("CPU_MODE_CLEAR");
-    STAILQ_INSERT_TAIL(&noname_dir->args, sv, link);
-
-    add_dir(&cpu_byp, parser, name_dir);
-    add_dir(&cpu_byp, parser, noname_dir);
-}
-
-static void
-parse_targetmod(void)
-{
-    parse_generic(TARGETMOD, "TARGETMOD", &regtmod_byp);
-}
-
-static void
-parse_prefix(void)
-{
-    parse_generic(PREFIX, "PREFIX", &insnprefix_byp);
-}
-
-static void
-parse_reg(void)
-{
-    parse_generic(REG, "REG", &regtmod_byp);
-}
-
-static void
-parse_reggroup(void)
-{
-    parse_generic(REGGROUP, "REGGROUP", &regtmod_byp);
-}
-
-static void
-parse_segreg(void)
-{
-    parse_generic(SEGREG, "SEGREG", &regtmod_byp);
-}
-
-/* make the c output for the perfect hash tab array */
-static void
-make_c_tab(
-    FILE *f,
-    const char *which,
-    const char *parser,
-    bstuff *tab,        /* table indexed by b */
-    ub4 smax,           /* range of scramble[] */
-    ub4 blen,           /* b in 0..blen-1, power of 2 */
-    ub4 *scramble)      /* used in final hash */
-{
-    ub4   i;
-    /* table for the mapping for the perfect hash */
-    if (blen >= USE_SCRAMBLE) {
-        /* A way to make the 1-byte values in tab bigger */
-        if (smax > UB2MAXVAL+1) {
-            fprintf(f, "static const unsigned long %s_", which);
-            if (parser)
-                fprintf(f, "%s_", parser);
-            fprintf(f, "scramble[] = {\n");
-            for (i=0; i<=UB1MAXVAL; i+=4)
-                fprintf(f, "0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n",
-                    scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]);
-        } else {
-            fprintf(f, "static const unsigned short %s_", which);
-            if (parser)
-                fprintf(f, "%s_", parser);
-            fprintf(f, "scramble[] = {\n");
-            for (i=0; i<=UB1MAXVAL; i+=8)
-                fprintf(f, 
-"0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n",
-                    scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3],
-                    scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]);
-        }
-        fprintf(f, "};\n");
-        fprintf(f, "\n");
-    }
-
-    if (blen > 0) {
-        /* small adjustments to _a_ to make values distinct */
-        if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
-            fprintf(f, "static const unsigned char %s_", which);
-        else
-            fprintf(f, "static const unsigned short %s_", which);
-        if (parser)
-            fprintf(f, "%s_", parser);
-        fprintf(f, "tab[] = {\n");
-
-        if (blen < 16) {
-            for (i=0; i<blen; ++i)
-                fprintf(f, "%3ld,", scramble[tab[i].val_b]);
-        } else if (blen <= 1024) {
-            for (i=0; i<blen; i+=16)
-                fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
-                    scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], 
-                    scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], 
-                    scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], 
-                    scramble[tab[i+6].val_b], scramble[tab[i+7].val_b], 
-                    scramble[tab[i+8].val_b], scramble[tab[i+9].val_b], 
-                    scramble[tab[i+10].val_b], scramble[tab[i+11].val_b], 
-                    scramble[tab[i+12].val_b], scramble[tab[i+13].val_b], 
-                    scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]); 
-        } else if (blen < USE_SCRAMBLE) {
-            for (i=0; i<blen; i+=8)
-                fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
-                    scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], 
-                    scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], 
-                    scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], 
-                    scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]); 
-        } else {
-            for (i=0; i<blen; i+=16)
-                fprintf(f, "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,\n",
-                    tab[i+0].val_b, tab[i+1].val_b, 
-                    tab[i+2].val_b, tab[i+3].val_b, 
-                    tab[i+4].val_b, tab[i+5].val_b, 
-                    tab[i+6].val_b, tab[i+7].val_b, 
-                    tab[i+8].val_b, tab[i+9].val_b, 
-                    tab[i+10].val_b, tab[i+11].val_b, 
-                    tab[i+12].val_b, tab[i+13].val_b, 
-                    tab[i+14].val_b, tab[i+15].val_b); 
-        }
-        fprintf(f, "};\n");
-        fprintf(f, "\n");
-    }
-}
-
-static void
-perfect_dir(FILE *out, const char *which, const char *parser, dir_list *dirs)
-{
-    ub4 nkeys;
-    key *keys;
-    hashform form;
-    bstuff *tab;                /* table indexed by b */
-    hstuff *tabh;               /* table indexed by hash value */
-    ub4 smax;           /* scramble[] values in 0..smax-1, a power of 2 */
-    ub4 alen;                   /* a in 0..alen-1, a power of 2 */
-    ub4 blen;                   /* b in 0..blen-1, a power of 2 */
-    ub4 salt;                   /* a parameter to the hash function */
-    gencode final;              /* code for final hash */
-    ub4 i;
-    ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */
-    char buf[10][80];           /* buffer for generated code */
-    char *buf2[10];             /* also for generated code */
-    int cpumode = strcmp(which, "cpu") == 0;
-    dir *d;
-
-    /* perfect hash configuration */
-    form.mode = NORMAL_HM;
-    form.hashtype = STRING_HT;
-    form.perfect = MINIMAL_HP;
-    form.speed = SLOW_HS;
-
-    /* set up code for final hash */
-    final.line = buf2;
-    final.used = 0;
-    final.len  = 10;
-    for (i=0; i<10; i++)
-        final.line[i] = buf[i];
-
-    /* build list of keys */
-    nkeys = 0;
-    keys = NULL;
-    STAILQ_FOREACH(d, dirs, link) {
-        key *k = yasm_xmalloc(sizeof(key));
-
-        k->name_k = yasm__xstrdup(d->name);
-        k->len_k = (ub4)strlen(d->name);
-        k->next_k = keys;
-        keys = k;
-        nkeys++;
-    }
-
-    /* find the hash */
-    findhash(&tab, &tabh, &alen, &blen, &salt, &final, 
-             scramble, &smax, keys, nkeys, &form);
-
-    /* output the dir table: this should loop up to smax for NORMAL_HP,
-     * or up to pakd.nkeys for MINIMAL_HP.
-     */
-    fprintf(out, "static const %s_parse_data %s_", which, which);
-    if (parser)
-        fprintf(out, "%s_", parser);
-    fprintf(out, "pd[%lu] = {\n", nkeys);
-    for (i=0; i<nkeys; i++) {
-        if (tabh[i].key_h) {
-            sval *sv;
-            STAILQ_FOREACH(d, dirs, link) {
-                if (strcmp(d->name, tabh[i].key_h->name_k) == 0)
-                    break;
-            }
-            if (!d) {
-                report_error("internal error: could not find `%s'",
-                             tabh[i].key_h->name_k);
-                break;
-            }
-            if (cpumode)
-                fprintf(out, "{\"%s\",", d->name);
-            else
-                fprintf(out, "%s(\"%s\",", d->func, d->name);
-            STAILQ_FOREACH(sv, &d->args, link) {
-                fprintf(out, " %s", sv->str);
-                if (STAILQ_NEXT(sv, link))
-                    fprintf(out, ",");
-            }
-            fprintf(out, cpumode ? "}" : ")");
-        } else
-            fprintf(out, "  { NULL }");
-
-        if (i < nkeys-1)
-            fprintf(out, ",");
-        fprintf(out, "\n");
-    }
-    fprintf(out, "};\n");
-
-    /* output the hash tab[] array */
-    make_c_tab(out, which, parser, tab, smax, blen, scramble);
-
-    /* The hash function */
-    fprintf(out, "#define tab %s_", which);
-    if (parser)
-        fprintf(out, "%s_", parser);
-    fprintf(out, "tab\n");
-    fprintf(out, "static const %s_parse_data *\n%s_", which, which);
-    if (parser)
-        fprintf(out, "%s_", parser);
-    fprintf(out, "find(const char *key, size_t len)\n");
-    fprintf(out, "{\n");
-    fprintf(out, "  const %s_parse_data *ret;\n", which);
-    for (i=0; i<final.used; ++i)
-        fprintf(out, final.line[i]);
-    fprintf(out, "  if (rsl >= %lu) return NULL;\n", nkeys);
-    fprintf(out, "  ret = &%s_", which);
-    if (parser)
-        fprintf(out, "%s_", parser);
-    fprintf(out, "pd[rsl];\n");
-    fprintf(out, "  if (strcmp(key, ret->name) != 0) return NULL;\n");
-    fprintf(out, "  return ret;\n");
-    fprintf(out, "}\n");
-    fprintf(out, "#undef tab\n\n");
-
-    free(tab);
-    free(tabh);
-}
-
-/* Get an entire "real" line from the input file by combining any
- * \\\n continuations.
- */
-static int get_line(FILE *in)
-{
-    char *p = line;
-    cur_line = next_line;
-
-    if (feof(in))
-        return 0;
-
-    while (p < &line[1023-128]) {
-        if (!fgets(p, 128, in))
-            return 1;
-        next_line++;
-        /* if continuation, strip out leading whitespace */
-        if (p > line) {
-            char *p2 = p;
-            while (isspace(*p2)) p2++;
-            if (p2 > p)
-                memmove(p, p2, strlen(p2)+1);
-        }
-        while (*p) p++;
-        if (p[-2] != '\\' || p[-1] != '\n') {
-            if (p[-1] == '\n')
-                p[-1] = '\0';
-            return 1;
-        }
-        p -= 2;
-    }
-    return 0;
-}
-
-static struct {
-    const char *name;
-    int indx;
-    void (*handler) (void);
-} directives[] = {
-    {"ARCH", ARCH, parse_arch},
-    {"PARSERS", PARSERS, parse_parsers},
-    {"INSN", INSN, parse_insn},
-    {"CPU", CPU, parse_cpu},
-    {"CPU_ALIAS", CPU_ALIAS, parse_cpu_alias},
-    {"CPU_FEATURE", CPU_FEATURE, parse_cpu_feature},
-    {"TARGETMOD", TARGETMOD, parse_targetmod},
-    {"PREFIX", PREFIX, parse_prefix},
-    {"REG", REG, parse_reg},
-    {"REGGROUP", REGGROUP, parse_reggroup},
-    {"SEGREG", SEGREG, parse_segreg},
-};
-
-int
-main(int argc, char *argv[])
-{
-    FILE *in, *out;
-    size_t i;
-    char *tok;
-    int count[NUM_DIRS];
-    dir_byp *db;
-
-    for (i=0; i<NUM_DIRS; i++)
-        count[i] = 0;
-
-    if (argc != 3) {
-        fprintf(stderr, "Usage: gap <in> <out>\n");
-        return EXIT_FAILURE;
-    }
-
-    in = fopen(argv[1], "rt");
-    if (!in) {
-        fprintf(stderr, "Could not open `%s' for reading\n", argv[1]);
-        return EXIT_FAILURE;
-    }
-
-    STAILQ_INIT(&insnprefix_byp);
-    STAILQ_INIT(&cpu_byp);
-    STAILQ_INIT(&regtmod_byp);
-
-    /* Parse input file */
-    while (get_line(in)) {
-        int found;
-        /*printf("%s\n", line);*/
-        tok = strtok(line, " \t\n");
-        if (!tok)
-            continue;
-
-        /* Comments start with # as the first thing on a line */
-        if (tok[0] == '#')
-            continue;
-
-        /* Look for directive */
-        found = 0;
-        for (i=0; i<sizeof(directives)/sizeof(directives[0]); i++) {
-            if (strcmp(tok, directives[i].name) == 0) {
-                count[directives[i].indx]++;
-                directives[i].handler();
-                found = 1;
-                break;
-            }
-        }
-        if (!found)
-            report_error("unknown directive `%s'\n", tok);
-    }
-
-    /* Output some informational statistics */
-    printf("Directives read:\n");
-    for (i=0; i<sizeof(directives)/sizeof(directives[0]); i++)
-        printf("\t%d\t%s\n", count[directives[i].indx], directives[i].name);
-
-    if (errors > 0)
-        return EXIT_FAILURE;
-
-    out = fopen(argv[2], "wt");
-    if (!out) {
-        fprintf(stderr, "Could not open `%s' for writing\n", argv[2]);
-        return EXIT_FAILURE;
-    }
-
-    /* Get perfect hashes for the three lists of directives */
-    STAILQ_FOREACH(db, &insnprefix_byp, link)
-        perfect_dir(out, "insnprefix", db->parser, &db->dirs);
-    STAILQ_FOREACH(db, &cpu_byp, link)
-        perfect_dir(out, "cpu", db->parser, &db->dirs);
-    STAILQ_FOREACH(db, &regtmod_byp, link)
-        perfect_dir(out, "regtmod", db->parser, &db->dirs);
-
-    if (errors > 0)
-        return EXIT_FAILURE;
-
-    return EXIT_SUCCESS;
-}
-
diff --git a/tools/genperf/Makefile.inc b/tools/genperf/Makefile.inc
new file mode 100644
index 0000000..b7e80ed
--- /dev/null
+++ b/tools/genperf/Makefile.inc
@@ -0,0 +1,39 @@
+# $Id$
+
+# These utility programs have to be built for BUILD host in cross-build.
+# This makes things rather non-standard automake
+
+noinst_PROGRAMS += genperf
+
+# Suffix rule for genperf
+SUFFIXES += .gperf
+.gperf.c: genperf$(EXEEXT)
+	$(top_builddir)/genperf$(EXEEXT) $< $@
+
+genperf_SOURCES =
+EXTRA_DIST += tools/genperf/genperf.c
+EXTRA_DIST += tools/genperf/perfect.c
+EXTRA_DIST += tools/genperf/perfect.h
+EXTRA_DIST += tools/genperf/standard.h
+genperf_LDADD  = genperf.$(OBJEXT)
+genperf_LDADD += gp-perfect.$(OBJEXT)
+genperf_LDADD += gp-phash.$(OBJEXT)
+genperf_LDADD += gp-xmalloc.$(OBJEXT)
+genperf_LDADD += gp-xstrdup.$(OBJEXT)
+genperf_LINK = $(CCLD_FOR_BUILD) -o $@
+
+genperf.$(OBJEXT): tools/genperf/genperf.c
+	$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/genperf/genperf.c || echo '$(srcdir)/'`tools/genperf/genperf.c
+
+gp-perfect.$(OBJEXT): tools/genperf/perfect.c
+	$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/genperf/perfect.c || echo '$(srcdir)/'`tools/genperf/perfect.c
+
+gp-phash.$(OBJEXT): libyasm/phash.c
+	$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f libyasm/phash.c || echo '$(srcdir)/'`libyasm/phash.c
+
+gp-xmalloc.$(OBJEXT): libyasm/xmalloc.c
+	$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f libyasm/xmalloc.c || echo '$(srcdir)/'`libyasm/xmalloc.c
+
+gp-xstrdup.$(OBJEXT): libyasm/xstrdup.c
+	$(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f libyasm/xstrdup.c || echo '$(srcdir)/'`libyasm/xstrdup.c
+
diff --git a/tools/genperf/genperf.c b/tools/genperf/genperf.c
new file mode 100644
index 0000000..1fcf2da
--- /dev/null
+++ b/tools/genperf/genperf.c
@@ -0,0 +1,540 @@
+/* $Id$
+ *
+ * Generate Minimal Perfect Hash (genperf)
+ *
+ *  Copyright (C) 2006-2007  Peter Johnson
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdio.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <string.h>
+#include "tools/genperf/perfect.h"
+#include "libyasm/compat-queue.h"
+#include "libyasm/coretype.h"
+#include "libyasm/errwarn.h"
+
+typedef STAILQ_HEAD(slist, sval) slist;
+typedef struct sval {
+    STAILQ_ENTRY(sval) link;
+    char *str;
+} sval;
+
+typedef STAILQ_HEAD(keyword_list, keyword) keyword_list;
+typedef struct keyword {
+    STAILQ_ENTRY(keyword) link;
+    char *name;
+    char *args;
+    unsigned int line;
+} keyword;
+
+static unsigned int cur_line = 1;
+static int errors = 0;
+
+static void
+report_error(const char *fmt, ...)
+{
+    va_list ap;
+
+    fprintf(stderr, "%u: ", cur_line);
+    va_start(ap, fmt);
+    vfprintf(stderr, fmt, ap);
+    va_end(ap);
+    fputc('\n', stderr);
+    errors++;
+}
+
+void
+yasm__fatal(const char *message, ...)
+{
+    abort();
+}
+
+/* make the c output for the perfect hash tab array */
+static void
+make_c_tab(
+    FILE *f,
+    bstuff *tab,        /* table indexed by b */
+    ub4 smax,           /* range of scramble[] */
+    ub4 blen,           /* b in 0..blen-1, power of 2 */
+    ub4 *scramble)      /* used in final hash */
+{
+    ub4   i;
+    /* table for the mapping for the perfect hash */
+    if (blen >= USE_SCRAMBLE) {
+        /* A way to make the 1-byte values in tab bigger */
+        if (smax > UB2MAXVAL+1) {
+            fprintf(f, "  static const unsigned long scramble[] = {\n");
+            for (i=0; i<=UB1MAXVAL; i+=4)
+                fprintf(f, "    0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n",
+                    scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]);
+        } else {
+            fprintf(f, "  static const unsigned short scramble[] = {\n");
+            for (i=0; i<=UB1MAXVAL; i+=8)
+                fprintf(f, 
+"    0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n",
+                    scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3],
+                    scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]);
+        }
+        fprintf(f, "  };\n");
+        fprintf(f, "\n");
+    }
+
+    if (blen > 0) {
+        /* small adjustments to _a_ to make values distinct */
+        if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
+            fprintf(f, "  static const unsigned char ");
+        else
+            fprintf(f, "  static const unsigned short ");
+        fprintf(f, "tab[] = {\n");
+
+        if (blen < 16) {
+            for (i=0; i<blen; ++i)
+                fprintf(f, "%3ld,", scramble[tab[i].val_b]);
+        } else if (blen <= 1024) {
+            for (i=0; i<blen; i+=16)
+                fprintf(f, "    %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
+                    scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], 
+                    scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], 
+                    scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], 
+                    scramble[tab[i+6].val_b], scramble[tab[i+7].val_b], 
+                    scramble[tab[i+8].val_b], scramble[tab[i+9].val_b], 
+                    scramble[tab[i+10].val_b], scramble[tab[i+11].val_b], 
+                    scramble[tab[i+12].val_b], scramble[tab[i+13].val_b], 
+                    scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]); 
+        } else if (blen < USE_SCRAMBLE) {
+            for (i=0; i<blen; i+=8)
+                fprintf(f, "    %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
+                    scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], 
+                    scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], 
+                    scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], 
+                    scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]); 
+        } else {
+            for (i=0; i<blen; i+=16)
+                fprintf(f, "    %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,\n",
+                    tab[i+0].val_b, tab[i+1].val_b, 
+                    tab[i+2].val_b, tab[i+3].val_b, 
+                    tab[i+4].val_b, tab[i+5].val_b, 
+                    tab[i+6].val_b, tab[i+7].val_b, 
+                    tab[i+8].val_b, tab[i+9].val_b, 
+                    tab[i+10].val_b, tab[i+11].val_b, 
+                    tab[i+12].val_b, tab[i+13].val_b, 
+                    tab[i+14].val_b, tab[i+15].val_b); 
+        }
+        fprintf(f, "  };\n");
+        fprintf(f, "\n");
+    }
+}
+
+static void
+perfect_gen(FILE *out, const char *lookup_function_name,
+            const char *struct_name, keyword_list *kws,
+            const char *filename)
+{
+    ub4 nkeys;
+    key *keys;
+    hashform form;
+    bstuff *tab;                /* table indexed by b */
+    hstuff *tabh;               /* table indexed by hash value */
+    ub4 smax;           /* scramble[] values in 0..smax-1, a power of 2 */
+    ub4 alen;                   /* a in 0..alen-1, a power of 2 */
+    ub4 blen;                   /* b in 0..blen-1, a power of 2 */
+    ub4 salt;                   /* a parameter to the hash function */
+    gencode final;              /* code for final hash */
+    ub4 i;
+    ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */
+    char buf[10][80];           /* buffer for generated code */
+    char *buf2[10];             /* also for generated code */
+    keyword *kw;
+
+    /* perfect hash configuration */
+    form.mode = NORMAL_HM;
+    form.hashtype = STRING_HT;
+    form.perfect = MINIMAL_HP;
+    form.speed = SLOW_HS;
+
+    /* set up code for final hash */
+    final.line = buf2;
+    final.used = 0;
+    final.len  = 10;
+    for (i=0; i<10; i++)
+        final.line[i] = buf[i];
+
+    /* build list of keys */
+    nkeys = 0;
+    keys = NULL;
+    STAILQ_FOREACH(kw, kws, link) {
+        key *k = yasm_xmalloc(sizeof(key));
+
+        k->name_k = yasm__xstrdup(kw->name);
+        k->len_k = (ub4)strlen(kw->name);
+        k->next_k = keys;
+        keys = k;
+        nkeys++;
+    }
+
+    /* find the hash */
+    findhash(&tab, &tabh, &alen, &blen, &salt, &final, 
+             scramble, &smax, keys, nkeys, &form);
+
+    /* The hash function beginning */
+    fprintf(out, "static const struct %s *\n", struct_name);
+    fprintf(out, "%s(const char *key, size_t len)\n", lookup_function_name);
+    fprintf(out, "{\n");
+
+    /* output the dir table: this should loop up to smax for NORMAL_HP,
+     * or up to pakd.nkeys for MINIMAL_HP.
+     */
+    fprintf(out, "  static const struct %s pd[%lu] = {\n", struct_name, nkeys);
+    for (i=0; i<nkeys; i++) {
+        if (tabh[i].key_h) {
+            STAILQ_FOREACH(kw, kws, link) {
+                if (strcmp(kw->name, tabh[i].key_h->name_k) == 0)
+                    break;
+            }
+            if (!kw) {
+                report_error("internal error: could not find `%s'",
+                             tabh[i].key_h->name_k);
+                break;
+            }
+            fprintf(out, "#line %u \"%s\"\n", kw->line, filename);
+            fprintf(out, "    {\"%s\"%s}", kw->name, kw->args);
+        } else
+            fprintf(out, "    { NULL }");
+
+        if (i < nkeys-1)
+            fprintf(out, ",");
+        fprintf(out, "\n");
+    }
+    fprintf(out, "  };\n");
+
+    /* output the hash tab[] array */
+    make_c_tab(out, tab, smax, blen, scramble);
+
+    /* The hash function body */
+    fprintf(out, "  const struct %s *ret;\n", struct_name);
+    for (i=0; i<final.used; ++i)
+        fprintf(out, final.line[i]);
+    fprintf(out, "  if (rsl >= %lu) return NULL;\n", nkeys);
+    fprintf(out, "  ret = &pd[rsl];\n");
+    fprintf(out, "  if (strcmp(key, ret->name) != 0) return NULL;\n");
+    fprintf(out, "  return ret;\n");
+    fprintf(out, "}\n");
+    fprintf(out, "\n");
+
+    free(tab);
+    free(tabh);
+}
+
+int
+main(int argc, char *argv[])
+{
+    FILE *in, *out;
+    size_t i;
+    char *ch;
+    static char line[1024], tmp[1024];
+    static char struct_name[128] = "";
+    static char lookup_function_name[128] = "in_word_set";
+    static char language[16] = "";
+    static char delimiters[16] = ",\r\n";
+    static char name[128];
+    static char filename[768];
+    int need_struct = 0;
+    int have_struct = 0;
+    int go_keywords = 0;
+    int ignore_case = 0;
+    int compare_strncmp = 0;
+    int readonly_tables = 0;
+    slist usercode, usercode2;
+    keyword_list keywords;
+    sval *sv;
+    keyword *kw;
+
+    if (argc != 3) {
+        fprintf(stderr, "Usage: genperf <in> <out>\n");
+        return EXIT_FAILURE;
+    }
+
+    in = fopen(argv[1], "rt");
+    if (!in) {
+        fprintf(stderr, "Could not open `%s' for reading\n", argv[1]);
+        return EXIT_FAILURE;
+    }
+
+    ch = argv[1];
+    i = 0;
+    while (*ch && i < 767) {
+        if (*ch == '\\') {
+            filename[i++] = '/';
+            ch++;
+        } else
+            filename[i++] = *ch++;
+    }
+    filename[i] = '\0';
+
+    STAILQ_INIT(&usercode);
+    STAILQ_INIT(&usercode2);
+    STAILQ_INIT(&keywords);
+
+    /* Parse declarations section */
+    while (fgets(line, 1024, in)) {
+        /* Comments start with # as the first thing on a line */
+        if (line[0] == '#') {
+            cur_line++;
+            continue;
+        }
+
+        /* Handle structure declaration */
+        if (strncmp(line, "struct", 6) == 0) {
+            int braces;
+
+            if (!need_struct) {
+                report_error("struct without %%struct-type declaration");
+                return EXIT_FAILURE;
+            }
+            if (have_struct) {
+                report_error("more than one struct declaration");
+                return EXIT_FAILURE;
+            }
+            have_struct = 1;
+
+            /* copy struct name */
+            ch = &line[6];
+            while (isspace(*ch))
+                ch++;
+            i = 0;
+            while ((isalnum(*ch) || *ch == '_') && i < 127)
+                struct_name[i++] = *ch++;
+            if (i == 127) {
+                report_error("struct name too long");
+                return EXIT_FAILURE;
+            }
+            struct_name[i] = '\0';
+
+            sv = yasm_xmalloc(sizeof(sval));
+            sprintf(tmp, "#line %u \"%s\"\n", cur_line, filename);
+            sv->str = yasm__xstrdup(tmp);
+            STAILQ_INSERT_TAIL(&usercode, sv, link);
+
+            braces = 0;
+            do {
+                /* count braces to determine when we're done */
+                ch = line;
+                while (*ch != '\0') {
+                    if (*ch == '{')
+                        braces++;
+                    if (*ch == '}')
+                        braces--;
+                    ch++;
+                }
+                sv = yasm_xmalloc(sizeof(sval));
+                sv->str = yasm__xstrdup(line);
+                STAILQ_INSERT_TAIL(&usercode, sv, link);
+                cur_line++;
+                if (braces <= 0)
+                    break;
+            } while (fgets(line, 1024, in));
+            cur_line++;
+            continue;
+        }
+
+        /* Ignore non-declaration lines */
+        if (line[0] != '%') {
+            cur_line++;
+            continue;
+        }
+
+        /* %% terminates declarations section */
+        if (line[1] == '%') {
+            if (need_struct && !have_struct) {
+                report_error("%%struct-type declaration, but no struct found");
+                return EXIT_FAILURE;
+            }
+            go_keywords = 1;
+            break;      /* move on to keywords section */
+        }
+
+        /* %{ begins a verbatim code section that ends with %} */
+        if (line[1] == '{') {
+            sv = yasm_xmalloc(sizeof(sval));
+            sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename);
+            sv->str = yasm__xstrdup(tmp);
+            STAILQ_INSERT_TAIL(&usercode, sv, link);
+
+            while (fgets(line, 1024, in)) {
+                cur_line++;
+                if (line[0] == '%' && line[1] == '}')
+                    break;
+                sv = yasm_xmalloc(sizeof(sval));
+                sv->str = yasm__xstrdup(line);
+                STAILQ_INSERT_TAIL(&usercode, sv, link);
+            }
+            cur_line++;
+            continue;
+        }
+
+        if (strncmp(&line[1], "ignore-case", 11) == 0) {
+            ignore_case = 1;
+        } else if (strncmp(&line[1], "compare-strncmp", 15) == 0) {
+            compare_strncmp = 1;
+        } else if (strncmp(&line[1], "readonly-tables", 15) == 0) {
+            readonly_tables = 1;
+        } else if (strncmp(&line[1], "language=", 9) == 0) {
+            ch = &line[10];
+            i = 0;
+            while (*ch != '\n' && i<15)
+                language[i++] = *ch++;
+            language[i] = '\0';
+        } else if (strncmp(&line[1], "delimiters=", 11) == 0) {
+            ch = &line[12];
+            i = 0;
+            while (i<15)
+                delimiters[i++] = *ch++;
+            delimiters[i] = '\0';
+        } else if (strncmp(&line[1], "enum", 4) == 0) {
+            /* unused */
+        } else if (strncmp(&line[1], "struct-type", 11) == 0) {
+            need_struct = 1;
+        } else if (strncmp(&line[1], "define", 6) == 0) {
+            /* Several different defines we need to handle */
+            ch = &line[7];
+            while (isspace(*ch))
+                ch++;
+
+            if (strncmp(ch, "hash-function-name", 18) == 0) {
+                /* unused */
+            } else if (strncmp(ch, "lookup-function-name", 20) == 0) {
+                ch = &line[7+20+1];
+                while (isspace(*ch))
+                    ch++;
+                i = 0;
+                while ((isalnum(*ch) || *ch == '_') && i < 127)
+                    lookup_function_name[i++] = *ch++;
+                if (i == 127) {
+                    report_error("struct name too long");
+                    return EXIT_FAILURE;
+                }
+                lookup_function_name[i] = '\0';
+            } else {
+                fprintf(stderr, "%u: unrecognized define `%s'\n", cur_line,
+                        line);
+            }
+        } else {
+            fprintf(stderr, "%u: unrecognized declaration `%s'\n", cur_line,
+                    line);
+        }
+
+        cur_line++;
+    }
+
+    if (!go_keywords) {
+        report_error("no keywords section found");
+        return EXIT_FAILURE;
+    }
+
+    /* Parse keywords section */
+    while (fgets(line, 1024, in)) {
+        char *d;
+
+        /* Comments start with # as the first thing on a line */
+        if (line[0] == '#') {
+            cur_line++;
+            continue;
+        }
+
+        /* Keywords section terminated with %% */
+        if (line[0] == '%' && line[1] == '%')
+            break;
+
+        /* Look for name */
+        ch = &line[0];
+        i = 0;
+        while (strchr(delimiters, *ch) == NULL && i < 127)
+            name[i++] = *ch++;
+        if (i == 127) {
+            report_error("keyword name too long");
+            return EXIT_FAILURE;
+        }
+        name[i] = '\0';
+
+        /* Strip EOL */
+        d = strrchr(ch, '\n');
+        if (d)
+            *d = '\0';
+        d = strrchr(ch, '\r');
+        if (d)
+            *d = '\0';
+        kw = yasm_xmalloc(sizeof(keyword));
+        kw->name = yasm__xstrdup(name);
+        kw->args = yasm__xstrdup(ch);
+        kw->line = cur_line;
+        STAILQ_INSERT_TAIL(&keywords, kw, link);
+        cur_line++;
+    }
+
+    if (errors > 0)
+        return EXIT_FAILURE;
+
+    /* Pull in any end code */
+    if (!feof(in)) {
+        sv = yasm_xmalloc(sizeof(sval));
+        sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename);
+        sv->str = yasm__xstrdup(tmp);
+        STAILQ_INSERT_TAIL(&usercode2, sv, link);
+
+        while (fgets(line, 1024, in)) {
+            sv = yasm_xmalloc(sizeof(sval));
+            sv->str = yasm__xstrdup(line);
+            STAILQ_INSERT_TAIL(&usercode2, sv, link);
+        }
+    }
+
+    /* output code */
+    out = fopen(argv[2], "wt");
+    if (!out) {
+        fprintf(stderr, "Could not open `%s' for writing\n", argv[2]);
+        return EXIT_FAILURE;
+    }
+
+    fprintf(out, "/* %s code produced by genperf */\n", language);
+    fprintf(out, "/* Command-line: genperf %s %s */\n", argv[1], argv[2]);
+
+    STAILQ_FOREACH(sv, &usercode, link)
+        fprintf(out, "%s", sv->str);
+
+    /* Get perfect hash */
+    perfect_gen(out, lookup_function_name, struct_name, &keywords, filename);
+
+    STAILQ_FOREACH(sv, &usercode2, link)
+        fprintf(out, "%s", sv->str);
+
+    fclose(out);
+
+    if (errors > 0) {
+        remove(argv[2]);
+        return EXIT_FAILURE;
+    }
+
+    return EXIT_SUCCESS;
+}
+
diff --git a/tools/gap/perfect.c b/tools/genperf/perfect.c
similarity index 98%
rename from tools/gap/perfect.c
rename to tools/genperf/perfect.c
index d121804..579d360 100644
--- a/tools/gap/perfect.c
+++ b/tools/genperf/perfect.c
@@ -50,10 +50,10 @@
 */
 
 #include <string.h>
-#include "tools/gap/standard.h"
+#include "tools/genperf/standard.h"
 #include "libyasm/coretype.h"
 #include "libyasm/phash.h"
-#include "tools/gap/perfect.h"
+#include "tools/genperf/perfect.h"
 
 #define CHECKSTATE 8
 
@@ -565,7 +565,7 @@
         if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys, 
                      i+1, form))
         {
-          printf("fail to map group of size %ld for tab size %ld\n", j, blen);
+          fprintf(stderr, "fail to map group of size %ld for tab size %ld\n", j, blen);
           return FALSE;
         }
 
@@ -631,7 +631,7 @@
   {
     if (form->perfect == MINIMAL_HP)
     {
-      printf("fatal error: Cannot find perfect hash for user (A,B) pairs\n");
+      fprintf(stderr, "fatal error: Cannot find perfect hash for user (A,B) pairs\n");
       exit(EXIT_FAILURE);
     }
     else
@@ -644,7 +644,7 @@
                                                 nkeys : *smax));
       if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
       {
-        printf("fatal error: Cannot find perfect hash for user (A,B) pairs\n");
+        fprintf(stderr, "fatal error: Cannot find perfect hash for user (A,B) pairs\n");
         exit(EXIT_FAILURE);
       }
     }
@@ -894,7 +894,7 @@
         else
         {
           duplicates(*tabb, *blen, keys, form);      /* check for duplicates */
-          printf("fatal error: Cannot perfect hash: cannot find distinct (A,B)\n");
+          fprintf(stderr, "fatal error: Cannot perfect hash: cannot find distinct (A,B)\n");
           exit(EXIT_FAILURE);
         }
         bad_initkey = 0;
@@ -922,7 +922,7 @@
         }
         else
         {
-          printf("fatal error: Cannot perfect hash: cannot build tab[]\n");
+          fprintf(stderr, "fatal error: Cannot perfect hash: cannot build tab[]\n");
           exit(EXIT_FAILURE);
         }
         bad_perfect = 0;
diff --git a/tools/gap/perfect.h b/tools/genperf/perfect.h
similarity index 100%
rename from tools/gap/perfect.h
rename to tools/genperf/perfect.h
diff --git a/tools/gap/standard.h b/tools/genperf/standard.h
similarity index 100%
rename from tools/gap/standard.h
rename to tools/genperf/standard.h
diff --git a/tools/python-yasm/Makefile.inc b/tools/python-yasm/Makefile.inc
index 9a1193c..b427558 100644
--- a/tools/python-yasm/Makefile.inc
+++ b/tools/python-yasm/Makefile.inc
@@ -20,7 +20,7 @@
 EXTRA_DIST += tools/python-yasm/yasm.pyx
 EXTRA_DIST += $(PYBINDING_DEPS)
 
-if HAVE_PYTHON
+if HAVE_PYTHON_BINDINGS
 
 # Use Pyxelator to generate Pyrex function headers.
 _yasm.pxi: ${HEADERS}
diff --git a/tools/python-yasm/pyxelator/wrap_yasm.py b/tools/python-yasm/pyxelator/wrap_yasm.py
index 45ee623..58553ab 100755
--- a/tools/python-yasm/pyxelator/wrap_yasm.py
+++ b/tools/python-yasm/pyxelator/wrap_yasm.py
@@ -25,7 +25,7 @@
     CPPFLAGS += " -DYASM_LIB_INTERNAL"
     CPPFLAGS += " -DYASM_BC_INTERNAL"
     CPPFLAGS += " -DYASM_EXPR_INTERNAL"
-    files = [ 'libyasm.h', 'libyasm/assocdat.h' ]
+    files = [ 'libyasm.h', 'libyasm/assocdat.h', 'libyasm/bitvect.h' ]
 
     syms = get_syms( ['yasm'], [YASM_DIR] )
     def cb(trans_unit, node, *args):
diff --git a/tools/python-yasm/tests/Makefile.inc b/tools/python-yasm/tests/Makefile.inc
index 6bf72fd..50c499d 100644
--- a/tools/python-yasm/tests/Makefile.inc
+++ b/tools/python-yasm/tests/Makefile.inc
@@ -7,7 +7,7 @@
 EXTRA_DIST += tools/python-yasm/tests/test_intnum.py
 EXTRA_DIST += tools/python-yasm/tests/test_symrec.py
 
-if HAVE_PYTHON
+if HAVE_PYTHON_BINDINGS
 
 TESTS_ENVIRONMENT += PYTHON=${PYTHON}
 TESTS += tools/python-yasm/tests/python_test.sh
