| #!/usr/bin/python |
| # print list of instructions LLVM inc files, for Capstone disassembler. |
| # this will be put into capstone/<arch>.h |
| # by Nguyen Anh Quynh, 2019 |
| |
| import sys |
| |
| if len(sys.argv) == 1: |
| print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> MappingInsn.inc" %sys.argv[0]) |
| sys.exit(1) |
| |
| # MappingInsn.inc |
| f = open(sys.argv[3]) |
| mapping = f.readlines() |
| f.close() |
| |
| print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ |
| /* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ |
| /* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */ |
| """) |
| |
| # lib/Target/X86/X86GenAsmMatcher.inc |
| # static const MatchEntry MatchTable1[] = { |
| # { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, |
| |
| # extract insn from GenAsmMatcher Table |
| # return (arch, mnem, insn_id) |
| def extract_insn(line): |
| tmp = line.split(',') |
| insn_raw = tmp[1].strip() |
| insn_mnem = tmp[0].split(' ')[3] |
| # X86 mov.s |
| if '.' in insn_mnem: |
| tmp = insn_mnem.split('.') |
| insn_mnem = tmp[0] |
| tmp = insn_raw.split('::') |
| arch = tmp[0] |
| # AArch64 -> ARM64 |
| if arch.upper() == 'AArch64': |
| arch = 'ARM64' |
| return (arch, insn_mnem, tmp[1]) |
| |
| |
| |
| # extract all insn lines from GenAsmMatcher |
| # return arch, insn_id_list, insn_lines |
| def extract_matcher(filename): |
| f = open(filename) |
| lines = f.readlines() |
| f.close() |
| |
| match_count = 0 |
| #insn_lines = [] |
| insn_id_list = {} |
| arch = None |
| first_insn = None |
| |
| pattern = None |
| # first we try to find Table1, or Table0 |
| for line in lines: |
| if 'MatchEntry MatchTable0[] = {' in line.strip(): |
| pattern = 'MatchEntry MatchTable0[] = {' |
| elif 'MatchEntry MatchTable1[] = {' in line.strip(): |
| pattern = 'MatchEntry MatchTable1[] = {' |
| # last pattern, done |
| break |
| |
| # 1st enum is register enum |
| for line in lines: |
| line = line.rstrip() |
| |
| if len(line.strip()) == 0: |
| continue |
| |
| if pattern in line.strip(): |
| match_count += 1 |
| #print(line.strip()) |
| continue |
| |
| line = line.strip() |
| if match_count == 1: |
| if line == '};': |
| # done with first enum |
| break |
| else: |
| _arch, mnem, insn_id = extract_insn(line) |
| if not mnem.startswith('__'): |
| if not first_insn: |
| arch, first_insn = _arch, insn_id |
| if not insn_id in insn_id_list: |
| # print("***", arch, mnem, insn_id) |
| insn_id_list[insn_id] = mnem |
| #insn_lines.append(line) |
| |
| #return arch, first_insn, insn_id_list, insn_lines |
| return arch, first_insn, insn_id_list |
| |
| # GenAsmMatcher.inc |
| #arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) |
| arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) |
| arch = arch.upper() |
| |
| #for line in insn_id_list: |
| # print(line) |
| |
| |
| insn_list = [] |
| #{ |
| # X86_AAA, X86_INS_AAA, |
| ##ifndef CAPSTONE_DIET |
| # { 0 }, { 0 }, { X86_GRP_NOT64BITMODE, 0 }, 0, 0 |
| ##endif |
| #}, |
| def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong): |
| print(arch, insn_id, mnem, mnem_can_be_wrong) |
| if not mnem_can_be_wrong: |
| insn = "%s_INS_%s" %(arch.upper(), mnem.upper()) |
| if insn in insn_list: |
| return |
| print("%s," %insn) |
| insn_list.append(insn) |
| return |
| |
| insn = "%s_%s" %(arch.upper(), insn_id) |
| # so mnem can be wrong, we need to verify with MappingInsn.inc |
| # first, try to find this entry in old MappingInsn.inc file |
| for i in range(len(mapping)): |
| tmp = mapping[i].split(',') |
| if tmp[0].strip() == insn: |
| insn = tmp[1].strip() |
| if insn in insn_list: |
| return |
| #print("==== get below from MappingInsn.inc file: %s" %insn) |
| print("%s," %insn) |
| insn_list.append(insn) |
| return |
| |
| |
| # extract from GenInstrInfo.inc, because the insn id is in order |
| enum_count = 0 |
| meet_insn = False |
| |
| # GenInstrInfo.inc |
| f = open(sys.argv[2]) |
| lines = f.readlines() |
| f.close() |
| |
| count = 0 |
| last_mnem = None |
| |
| # 1st enum is register enum |
| for line in lines: |
| line = line.rstrip() |
| |
| if len(line.strip()) == 0: |
| continue |
| |
| if line.strip() == 'enum {': |
| enum_count += 1 |
| #print(line.strip()) |
| continue |
| |
| line = line.strip() |
| if enum_count == 1: |
| if 'INSTRUCTION_LIST_END' in line: |
| break |
| else: |
| insn = None |
| if meet_insn: |
| # enum items |
| insn = line.split('=')[0].strip() |
| if 'CALLSTACK' in insn or 'TAILJUMP' in insn: |
| # pseudo instruction |
| insn = None |
| |
| elif line.startswith(first_insn): |
| insn = line.split('=')[0].strip() |
| meet_insn = True |
| |
| if insn: |
| count += 1 |
| if insn == 'BSWAP16r_BAD': |
| last_mnem = 'BSWAP' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'CMOVNP_Fp32': |
| last_mnem = 'FCMOVNP' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'CMOVP_Fp3': |
| last_mnem = 'FCMOVP' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'CMPSDrm_Int': |
| last_mnem = 'CMPSD' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'MOVSX16rm16': |
| last_mnem = 'MOVSX' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'MOVZX16rm16': |
| last_mnem = 'MOVZX' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'ST_Fp32m': |
| last_mnem = 'FST' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'CMOVNP_Fp64': |
| last_mnem = 'FCMOVNU' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'CMPSDrr_Int': |
| last_mnem = 'CMPSD' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'CMPSSrm_Int': |
| last_mnem = 'CMPSS' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VCMPSDrm_Int': |
| last_mnem = 'VCMPSD' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VCMPSSrm_Int': |
| last_mnem = 'VCMPSS' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VPCMOVYrrr_REV': |
| last_mnem = 'VPCMOV' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VRNDSCALESDZm': |
| last_mnem = 'VRNDSCALESD' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VRNDSCALESSZm': |
| last_mnem = 'VRNDSCALESS' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VMAXCPDZ128rm': |
| last_mnem = 'VMAXPD' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VMAXCPSZ128rm': |
| last_mnem = 'VMAXPS' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VMAXCSDZrm': |
| last_mnem = 'VMAXSD' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VMAXCSSZrm': |
| last_mnem = 'VMAXSS' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VMINCPDZ128rm': |
| last_mnem = 'VMINPD' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VMINCPSZ128rm': |
| last_mnem = 'VMINPS' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VMINCSDZrm': |
| last_mnem = 'VMINSD' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VMINCSSZrm': |
| last_mnem = 'VMINSS' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VMOV64toPQIZrm': |
| last_mnem = 'VMOVQ' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VPERMIL2PDYrr_REV': |
| last_mnem = 'VPERMILPD' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VPERMIL2PSYrr_REV': |
| last_mnem = 'VPERMILPS' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VCVTSD2SI64Zrm_Int': |
| last_mnem = 'VCVTSD2SI' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VCVTSD2SSrm_Int': |
| last_mnem = 'VCVTSD2SS' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VCVTSS2SI64Zrm_Int': |
| last_mnem = 'VCVTSS2SI' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VCVTTSD2SI64Zrm_Int': |
| last_mnem = 'VCVTTSD2SI' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| elif insn == 'VCVTTSS2SI64Zrm_Int': |
| last_mnem = 'VCVTTSS2SI' |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| |
| elif insn.startswith('VFMSUBADD'): |
| if insn[len('VFMSUBADD')].isdigit(): |
| last_mnem = insn[:len('VFMSUBADD123xy')] |
| else: |
| last_mnem = insn[:len('VFMSUBADDSS')] |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| |
| elif insn.startswith('VFMADDSUB'): |
| if insn[len('VFMADDSUB')].isdigit(): |
| last_mnem = insn[:len('VFMADDSUB123xy')] |
| else: |
| last_mnem = insn[:len('VFMADDSUBSS')] |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| |
| elif insn.startswith('VFMADD'): |
| if insn[len('VFMADD')].isdigit(): |
| last_mnem = insn[:len('VFMADD123PD')] |
| else: |
| last_mnem = insn[:len('VFMADDPD')] |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| |
| elif insn.startswith('VFMSUB'): |
| if insn[len('VFMSUB')].isdigit(): |
| last_mnem = insn[:len('VFMSUB123PD')] |
| else: |
| last_mnem = insn[:len('VFMSUBPD')] |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| |
| elif insn.startswith('VFNMADD'): |
| if insn[len('VFNMADD')].isdigit(): |
| last_mnem = insn[:len('VFNMADD123xy')] |
| else: |
| last_mnem = insn[:len('VFNMADDSS')] |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| |
| elif insn.startswith('VFNMSUB'): |
| if insn[len('VFNMSUB')].isdigit(): |
| last_mnem = insn[:len('VFNMSUB123xy')] |
| else: |
| last_mnem = insn[:len('VFNMSUBSS')] |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| |
| elif insn in insn_id_list: |
| # trust old mapping table |
| last_mnem = insn_id_list[insn].upper() |
| print_entry(arch.upper(), insn, last_mnem, mapping, False) |
| else: |
| # the last option when we cannot find mnem: use the last good mnem |
| print_entry(arch.upper(), insn, last_mnem, mapping, True) |