blob: 3e84cf18ff01d38955e942511a89b2500d9120bf [file] [log] [blame]
#!/usr/bin/env python
import argparse
import re
import subprocess
import sys
useCSV = False
groupSpecializations = False
listGroupSpecializations = False
def main(arguments):
parser = argparse.ArgumentParser(
description='Analyze the code size in a binary')
parser.add_argument('-arch', type=str,
help='the arch to look at', default='arm64')
parser.add_argument('-categorize', action='store_true',
help='categorize symbols', dest='build_categories',
default=False)
parser.add_argument('-list-category', type=str,
help='list symbols in category')
parser.add_argument('-group-specializations', action='store_true',
help='group specializations')
parser.add_argument('-list-group-specializations', action='store_true',
help='list group specializations')
parser.add_argument('-csv', dest='use_csv', action='store_true',
help='print results as csv')
parser.add_argument('-uncategorized', action='store_true',
help='show all uncategorized symbols',
dest='show_uncategorized',
default=False)
parser.add_argument('bin', help='the binary')
parser.set_defaults(use_csv=False)
args = parser.parse_args(arguments)
if args.use_csv:
global useCSV
useCSV = True
print("Using csv")
if args.group_specializations:
global groupSpecializations
groupSpecializations = True
if args.list_group_specializations:
global listGroupSpecializations
listGroupSpecializations = True
segments = parse_segments(args.bin, args.arch)
if args.build_categories:
categorize(segments)
elif args.show_uncategorized:
uncategorized(segments)
elif args.list_category:
list_category(segments, args.list_category)
else:
show_all(segments)
class Symbol(object):
def __init__(self, name, mangled_name, size):
self.name = name
self.mangled_name = mangled_name
self.count = 1
self.size = int(size)
def get_symbol_size(sym):
return sym.size
class Segment(object):
def __init__(self, name):
self.name = name
self.sections = []
class Section(object):
def __init__(self, name, size):
self.name = name
self.size = size
self.symbols = []
class Category(object):
def __init__(self, name):
self.name = name
self.size = 0
self.symbols = []
def add(self, symbol):
self.symbols.append(symbol)
self.size += symbol.size
class GenericSpecializationGroupKey(object):
def __init__(self, module_name, type_name, specialization):
self.module_name = module_name
self.type_name = type_name
self.specialization = specialization
def __hash__(self):
return hash((self.module_name, self.type_name, self.specialization))
def __eq__(self, other):
return (self.module_name == other.module_name
and self.type_name == other.type_name
and self.specialization == other.specialization)
class GenericSpecialization(object):
def __init__(self, module_name, type_name, specialization):
self.module_name = module_name
self.type_name = type_name
self.specialization = specialization
self.size = 0
self.symbols = []
def add(self, symbol):
self.symbols.append(symbol)
self.size += symbol.size
def list_symbols(self):
sorted_symbols = []
for symbol in self.symbols:
sorted_symbols.append((symbol.name, symbol.size))
sorted_symbols.sort(key=lambda entry: entry[1], reverse=True)
for symbol in sorted_symbols:
print("%9d %s" % (symbol[1], symbol[0]))
class Categories(object):
def __init__(self):
self.category_matching = [
['Objective-C function', re.compile(r'.*[+-]\[')],
['C++', re.compile(r'_+swift')],
['Generic specialization of stdlib',
re.compile(
r'.*generic specialization.* of ' +
r'(static )?(\(extension in Swift\):)?Swift\.'
)],
['Generic specialization',
re.compile(r'.*generic specialization')],
['Merged function', re.compile(r'merged ')],
['Key path', re.compile(r'key path')],
['Function signature specialization',
re.compile(r'function signature specialization')],
['Reabstraction thunk helper',
re.compile(r'reabstraction thunk helper')],
['vtable thunk', re.compile(r'vtable thunk for')],
['@objc thunk', re.compile(r'@objc')],
['@nonobjc thunk', re.compile(r'@nonobjc')],
['Value witness', re.compile(r'.*value witness for')],
['Block copy helper', re.compile(r'_block_copy_helper')],
['Block destroy helper', re.compile(r'_block_destroy_helper')],
['Block literal global', re.compile(r'___block_literal_global')],
['Destroy helper block', re.compile(r'___destroy_helper_block')],
['Copy helper block', re.compile(r'___copy_helper_block')],
['Object destroy', re.compile(r'_objectdestroy')],
['Partial apply forwarder',
re.compile(r'partial apply forwarder')],
['Closure function', re.compile(r'closure #')],
['ObjC metadata update function',
re.compile(r'ObjC metadata update function for')],
['Variable initialization expression',
re.compile(r'variable initialization expression of')],
['Global initialization', re.compile(r'_globalinit_')],
['Unnamed', re.compile(r'___unnamed_')],
['Dyld stubs', re.compile(r'DYLD-STUB\$')],
['Witness table accessor',
re.compile(r'.*witness table accessor for')],
['Protocol witness', re.compile(r'protocol witness for')],
['Outlined variable', re.compile(r'outlined variable #')],
['Outlined value function (copy,destroy,release...)',
re.compile(r'outlined')],
['_symbolic', re.compile(r'_symbolic')],
['_associated conformance',
re.compile(r'_associated conformance')],
['Direct field offset', re.compile(r'direct field offset for')],
['Value witness tables', re.compile(r'.*value witness table')],
['Protocol witness table',
re.compile(r'.*protocol witness table for')],
['Protocol conformance descriptor',
re.compile(r'protocol conformance descriptor for')],
['Lazy protocol witness table cache var',
re.compile(
r'lazy protocol witness table cache variable for type')],
['Nominal type descriptor',
re.compile(r'nominal type descriptor for')],
['ObjC class', re.compile(r'_OBJC_CLASS_')],
['ObjC metaclass', re.compile(r'_OBJC_METACLASS')],
['ObjC ivar', re.compile(r'_OBJC_IVAR')],
['Metaclass', re.compile(r'metaclass for')],
['Block descriptor', re.compile(r'_+block_descriptor')],
['Extension descriptor', re.compile(r'extension descriptor')],
['Module descriptor', re.compile(r'module descriptor')],
['Associated type descriptor',
re.compile(r'associated type descriptor for')],
['Associated conformance descriptor',
re.compile(r'associated conformance descriptor for')],
['Protocol descriptor', re.compile(r'protocol descriptor for')],
['Base conformance descriptor',
re.compile(r'base conformance descriptor for')],
['Protocol requirements base descriptor',
re.compile(r'protocol requirements base descriptor for')],
['Property descriptor', re.compile(r'property descriptor for')],
['Method descriptor', re.compile(r'method descriptor for')],
['Anonymous descriptor', re.compile(r'anonymous descriptor')],
['Type metadata accessor',
re.compile(r'.*type metadata accessor')],
['Type metadata', re.compile(r'.*type metadata')],
['Reflection metadata descriptor',
re.compile(r'reflection metadata .* descriptor')],
]
self.category_mangled_matching = [
['Swift variable storage', re.compile(r'^_\$s.*[v][p][Z]?$')],
['Swift constructor', re.compile(r'^_\$s.*[f][cC]$')],
['Swift initializer', re.compile(r'^_\$s.*[f][ie]$')],
['Swift destructor/destroyer', re.compile(r'^_\$s.*[f][dDE]$')],
['Swift getter', re.compile(r'^_\$s.*[iv][gG]$')],
['Swift setter', re.compile(r'^_\$s.*[iv][swW]$')],
['Swift materializeForSet', re.compile(r'^_\$s.*[iv][m]$')],
['Swift modify', re.compile(r'^_\$s.*[iv][M]$')],
['Swift read', re.compile(r'^_\$s.*[iv][r]$')],
['Swift addressor', re.compile(r'^_\$s.*[iv][al][uOop]$')],
['Swift function', re.compile(r'^_\$s.*F$')],
['Swift unknown', re.compile(r'^_\$s.*')],
]
self.categories = {}
self.specializations = {}
self.specialization_matcher = re.compile(
r'.*generic specialization <(?P<spec_list>.*)> of' +
r' (static )?(\(extension in Swift\):)?(?P<module_name>[^.]*)\.' +
r'(?:(?P<first_type>[^.^(^<]*)\.){0,1}' +
r'(?:(?P<last_type>[^.^(^<]*)\.)*(?P<function_name>[^(^<]*)'
)
self.single_stdlib_specialized_type_matcher = re.compile(
r'(Swift\.)?[^,^.]*$'
)
self.two_specialized_stdlib_types_matcher = re.compile(
r'(Swift\.)?[^,^.]*, (Swift\.)?[^,^.]*$'
)
self.single_specialized_foundation_type_matcher = re.compile(
r'(Foundation\.)?[^,^.]*$'
)
self.two_specialized_foundation_types_matcher = re.compile(
r'(Swift\.)?[^,^.]*, (Foundation\.)?[^,^.]*$'
)
self.two_specialized_foundation_types_matcher2 = re.compile(
r'(Foundation\.)?[^,^.]*, (Foundation\.)?[^,^.]*$'
)
self.two_specialized_foundation_types_matcher3 = re.compile(
r'(Foundation\.)?[^,^.]*, (Swift\.)?[^,^.]*$'
)
self.array_type_matcher = re.compile(r'Array')
self.dictionary = re.compile(r'Array')
self.single_specialized_types_matcher = re.compile(
r'(?P<module_name>[^,^.]*)\.([^,^.]*\.)*(?P<type_name>[^,^.]*)$'
)
self.is_class_type_dict = {}
self.stdlib_and_other_type_matcher = re.compile(
r'(Swift\.)?[^,^.]*, (?P<module_name>[^,^.]*)\.(?P<type_name>[^,^.]*)$'
)
self.foundation_and_other_type_matcher = re.compile(
r'(Foundation\.)?[^,^.]*, (?P<module_name>[^,^.]*)\.' +
r'(?P<type_name>[^,^.]*)$'
)
def categorize_by_name(self, symbol):
for c in self.category_matching:
if c[1].match(symbol.name):
return c[0]
return None
def categorize_by_mangled_name(self, symbol):
for c in self.category_mangled_matching:
if c[1].match(symbol.mangled_name):
return c[0]
return None
def add_symbol(self, category_name, symbol):
existing_category = self.categories.get(category_name)
if existing_category:
existing_category.add(symbol)
else:
new_category = Category(category_name)
new_category.add(symbol)
self.categories[category_name] = new_category
def add(self, symbol):
category_name = self.categorize_by_name(symbol)
if category_name:
self.add_symbol(category_name, symbol)
if (groupSpecializations and
category_name == 'Generic specialization of stdlib'):
self.add_specialization(symbol)
return
category_name = self.categorize_by_mangled_name(symbol)
if category_name:
self.add_symbol(category_name, symbol)
else:
self.add_symbol('Unknown', symbol)
if (groupSpecializations and
category_name == 'Generic specialization of stdlib'):
self.add_specialization(symbol)
def is_class_type_(self, type_name, mangled_name):
match_class_name = str(len(type_name)) + type_name + 'C'
if match_class_name in mangled_name:
return True
return False
def is_class_type(self, type_name, mangled_name):
existing_categorization = self.is_class_type_dict.get(type_name, 3)
if existing_categorization == 3:
is_class = self.is_class_type_(type_name, mangled_name)
self.is_class_type_dict[type_name] = is_class
return is_class
else:
return existing_categorization
def is_dictionary_like_type(self, type_name):
if 'Dictionary' in type_name:
return True
if 'Set' in type_name:
return True
return False
def group_library_types(self, module, type_name, specialization, mangled_name):
if module != 'Swift':
return module, type_name, specialization
if self.single_stdlib_specialized_type_matcher.match(specialization):
return module, 'stdlib', 'stdlib'
if self.two_specialized_stdlib_types_matcher.match(specialization):
return module, 'stdlib', 'stdlib'
if self.single_specialized_foundation_type_matcher.match(specialization):
return module, 'stdlib', 'foundation'
if self.two_specialized_foundation_types_matcher.match(specialization):
return module, 'stdlib', 'foundation'
if self.two_specialized_foundation_types_matcher2.match(specialization):
return module, 'stdlib', 'foundation'
if self.two_specialized_foundation_types_matcher3.match(specialization):
return module, 'stdlib', 'foundation'
single_spec = self.single_specialized_types_matcher.match(specialization)
if single_spec:
is_class = self.is_class_type(single_spec.group('type_name'), mangled_name)
is_dict = type_name is not None and self.is_dictionary_like_type(type_name)
if not is_dict and is_class:
return module, 'stdlib', 'class'
if is_dict and is_class:
return module, 'stdlib', 'class(dict)'
stdlib_other_spec = self.stdlib_and_other_type_matcher.match(specialization)
if stdlib_other_spec:
is_class = self.is_class_type(stdlib_other_spec.group('type_name'),
mangled_name)
if is_class:
return module, 'stdlib', 'stdlib, class'
foundation_other_spec = self.foundation_and_other_type_matcher.match(
specialization)
if foundation_other_spec:
is_class = self.is_class_type(foundation_other_spec.group('type_name'),
mangled_name)
if is_class:
return module, 'stdlib', 'foundation, class'
return module, 'stdlib', 'other'
def add_specialization(self, symbol):
specialization_match = self.specialization_matcher.match(symbol.name)
if specialization_match:
module = specialization_match.group('module_name')
type_name = specialization_match.group('first_type')
specialization = specialization_match.group('spec_list')
module, type_name, specialization = self.group_library_types(
module, type_name, specialization, symbol.mangled_name)
key = GenericSpecializationGroupKey(module, type_name, specialization)
existing_specialization = self.specializations.get(key)
if existing_specialization:
existing_specialization.add(symbol)
else:
new_specialization = GenericSpecialization(module, type_name,
specialization)
new_specialization.add(symbol)
self.specializations[key] = new_specialization
else:
print(symbol.name)
print('not matched')
return
def print_specializations(self):
values = self.specializations.values()
sorted_specializations = []
for v in values:
sorted_specializations.append(v)
if not sorted_specializations:
return None
else:
sorted_specializations.sort(key=lambda entry: entry.specialization)
sorted_specializations.sort(key=lambda entry: entry.type_name)
sorted_specializations.sort(key=lambda entry: entry.module_name)
print("Specialization info")
for spec in sorted_specializations:
print("%20s.%s %20s %8d" % (spec.module_name, spec.type_name,
spec.specialization, spec.size))
if listGroupSpecializations:
spec.list_symbols()
print("")
return None
def categorize(self, symbols):
for sym in symbols:
self.add(sym)
def print_summary(self, section_size):
names = [c[0] for c in self.category_matching]
names.extend([c[0] for c in self.category_mangled_matching])
names.append('Unknown')
total_size = 0
sorted_categories = []
for name in names:
category = self.categories.get(name)
size = 0
if category:
size = category.size
total_size += size
if size > 0:
sorted_categories.append(
(name, size, (float(size) * 100) / section_size))
sorted_categories.sort(key=lambda entry: entry[1], reverse=True)
for category in sorted_categories:
if useCSV:
print("%s;%d;%.2f%%" %
(category[0], category[1], category[2]))
else:
print("%60s: %8d (%6.2f%%)" %
(category[0], category[1], category[2]))
print("%60s: %8d (%6.2f%%)" % ('TOTAL', total_size, float(100)))
def uncatorizedSymbols(self):
category = self.categories.get('Unknown')
if category:
return category.symbols
return None
def print_uncategorizedSymbols(self):
syms = self.uncatorizedSymbols()
if syms:
for symbol in syms:
print(symbol.mangled_name + " " + symbol.name + " " +
str(symbol.size))
def print_category(self, category):
category = self.categories.get(category)
if category:
if category.symbols:
sorted_symbols = sorted(category.symbols, key=get_symbol_size)
for sym in sorted_symbols:
print('%8d %s %s' % (sym.size, sym.name, sym.mangled_name))
def has_category(self, category):
category = self.categories.get(category)
if category:
if category.symbols:
return True
return False
def parse_segments(path, arch):
mangled = subprocess.check_output(
['symbols', '-noSources', '-noDemangling', '-arch', arch, path])
demangle = subprocess.Popen(
['xcrun', 'swift-demangle'], stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
demangled = demangle.communicate(mangled)[0]
symbols = {}
segments = []
segment_regex = re.compile(
r"^ 0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
r"(?P<name>.+?) (?P<name2>.+?)$")
object_file_segment_regex = re.compile(
r"^ 0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
r"SEGMENT$")
section_regex = re.compile(
r"^ 0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
r"(?P<name>.+?) (?P<name2>.+?)$")
symbol_regex = re.compile(
r"^ 0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
r"(?P<name>.+?) \[[^\]]+\] $")
mangled_lines = mangled.splitlines()
current_line_number = 0
for line in demangled.splitlines():
mangled_line = mangled_lines[current_line_number]
current_line_number += 1
# Match a segment entry.
segment_match = segment_regex.match(line)
if segment_match:
new_segment = Segment(segment_match.group('name'))
segments.append(new_segment)
continue
object_file_segment_match = object_file_segment_regex.match(line)
if object_file_segment_match:
new_segment = Segment("SEGMENT")
segments.append(new_segment)
continue
# Match a section entry.
section_match = section_regex.match(line)
if section_match:
new_section = Section(section_match.group('name2'),
int(section_match.group('size'), 16))
segments[-1].sections.append(new_section)
continue
# Match a symbol entry.
symbol_match = symbol_regex.match(line)
if not symbol_match:
continue
mangled_symbol_match = symbol_regex.match(mangled_line)
if not mangled_symbol_match:
print('mangled and demangled mismatch')
print(mangled_line)
print(line)
assert False
symbol = Symbol(symbol_match.group('name'),
mangled_symbol_match.group('name'),
int(symbol_match.group('size'), 16))
existing = symbols.get(symbol.name)
if existing:
existing.size += symbol.size
else:
symbols[symbol.name] = symbol
segments[-1].sections[-1].symbols.append(symbol)
return segments
def show_all(segments):
for segment in segments:
for section in segment.sections:
symbols = section.symbols
for sym in symbols:
print(str(sym.size) + ' ' + sym.name + ' ' + sym.mangled_name)
def categorize(segments):
for segment in segments:
for section in segment.sections:
print('Section %52s: %8d' %
(segment.name + ';' + section.name, section.size))
symbols = section.symbols
categories = Categories()
categories.categorize(symbols)
categories.print_summary(section.size)
print('')
if groupSpecializations:
categories.print_specializations()
def uncategorized(segments):
for segment in segments:
for section in segment.sections:
symbols = section.symbols
categories = Categories()
categories.categorize(symbols)
categories.print_uncategorizedSymbols()
def list_category(segments, category):
for segment in segments:
for section in segment.sections:
symbols = section.symbols
categories = Categories()
categories.categorize(symbols)
if categories.has_category(category):
print('Section %22s: %8d' %
(segment.name + ';' + section.name, section.size))
categories.print_category(category)
print('')
if groupSpecializations:
categories.print_specializations()
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))