Optimize non-strict push with 66 override to byte size if possible in NASM
syntax.

Previously, the forms of push that did this optimization were disabled in
NASM syntax due to conflicting with the size=BITS case.  Fix this via
reordering to allow these forms to be active in NASM syntax.

svn path=/trunk/yasm/; revision=2127
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py
index fa4069b..4a7c715 100755
--- a/modules/arch/x86/gen_x86_insn.py
+++ b/modules/arch/x86/gen_x86_insn.py
@@ -1139,25 +1139,6 @@
     opcode=[0x6A],
     operands=[Operand(type="Imm", size=8, relaxed=True, dest="SImm")])
 add_group("push",
-    suffix="w",
-    cpu=["186"],
-    parsers=["gas"],
-    opersize=16,
-    def_opersize_64=64,
-    opcode1=[0x6A],
-    opcode2=[0x68],
-    operands=[Operand(type="Imm", size=16, relaxed=True, dest="Imm",
-                      opt="SImm8")])
-add_group("push",
-    suffix="l",
-    not64=True,
-    parsers=["gas"],
-    opersize=32,
-    opcode1=[0x6A],
-    opcode2=[0x68],
-    operands=[Operand(type="Imm", size=32, relaxed=True, dest="Imm",
-                      opt="SImm8")])
-add_group("push",
     suffix="q",
     only64=True,
     opersize=64,
@@ -1174,6 +1155,23 @@
     opcode2=[0x68],
     operands=[Operand(type="Imm", size="BITS", relaxed=True, dest="Imm",
                       opt="SImm8")])
+add_group("push",
+    suffix="w",
+    cpu=["186"],
+    opersize=16,
+    def_opersize_64=64,
+    opcode1=[0x6A],
+    opcode2=[0x68],
+    operands=[Operand(type="Imm", size=16, relaxed=True, dest="Imm",
+                      opt="SImm8")])
+add_group("push",
+    suffix="l",
+    not64=True,
+    opersize=32,
+    opcode1=[0x6A],
+    opcode2=[0x68],
+    operands=[Operand(type="Imm", size=32, relaxed=True, dest="Imm",
+                      opt="SImm8")])
 # Need these when we don't match the BITS size, but they need to be
 # below the above line so the optimizer can kick in by default.
 add_group("push",
diff --git a/modules/arch/x86/tests/pushnosize.asm b/modules/arch/x86/tests/pushnosize.asm
index 3d99977..ada9be1 100644
--- a/modules/arch/x86/tests/pushnosize.asm
+++ b/modules/arch/x86/tests/pushnosize.asm
@@ -2,7 +2,7 @@
 push 0		; 6A 00 - equivalent to push byte 0
 push byte 0	; 6A 00
 push word 0	; 6A 00 - optimized
-push dword 0	; 66 68 00000000
+push dword 0	; 66 6A 00 - optimized
 push strict byte 0	; 6A 00
 push strict word 0	; 68 0000
 push strict dword 0	; 66 68 00000000
@@ -17,7 +17,7 @@
 [bits 32]
 push 0		; 6A 00 - equivalent to push byte 0
 push byte 0	; 6A 00
-push word 0	; 66 68 0000
+push word 0	; 66 6A 00 - optimized
 push dword 0	; 6A 00 - optimized
 push strict byte 0	; 6A 00
 push strict word 0	; 66 68 0000
@@ -32,12 +32,12 @@
 
 [bits 64]
 push 0		; same as bits 32 output
-push byte 0
-push word 0
-push dword 0	; optimized to byte
-push strict byte 0
-push strict word 0
-push strict dword 0
+push byte 0	; 6A 00; 64 bits pushed onto stack
+push word 0	; 66 6A 00 - 66h prefix, optimized to byte
+push dword 0	; 6A 00 - optimized to byte; note 64 bits pushed onto stack
+push strict byte 0	; 6A 00; 64 bits pushed onto stack
+push strict word 0	; 66 68 0000
+push strict dword 0	; 68 00000000; note 64 bits pushed onto stack
 push 128
 push byte 128	; warning
 push word 128
diff --git a/modules/arch/x86/tests/pushnosize.hex b/modules/arch/x86/tests/pushnosize.hex
index de28feb..0896020 100644
--- a/modules/arch/x86/tests/pushnosize.hex
+++ b/modules/arch/x86/tests/pushnosize.hex
@@ -5,10 +5,7 @@
 6a 
 00 
 66 
-68 
-00 
-00 
-00 
+6a 
 00 
 6a 
 00 
@@ -51,8 +48,7 @@
 6a 
 00 
 66 
-68 
-00 
+6a 
 00 
 6a 
 00 
@@ -99,8 +95,7 @@
 6a 
 00 
 66 
-68 
-00 
+6a 
 00 
 6a 
 00