utils/analyze_code_size.py - third_party/swift - Git at Google

 #!/usr/bin/env python

 import argparse
 import re
 import subprocess
 import sys


 useCSV = False
 groupSpecializations = False
 listGroupSpecializations = False


 def main(arguments):
     parser = argparse.ArgumentParser(
         description='Analyze the code size in a binary')
     parser.add_argument('-arch', type=str,
                         help='the arch to look at', default='arm64')
     parser.add_argument('-categorize', action='store_true',
                         help='categorize symbols', dest='build_categories',
                         default=False)
     parser.add_argument('-list-category', type=str,
                         help='list symbols in category')
     parser.add_argument('-group-specializations', action='store_true',
                         help='group specializations')
     parser.add_argument('-list-group-specializations', action='store_true',
                         help='list group specializations')
     parser.add_argument('-csv', dest='use_csv', action='store_true',
                         help='print results as csv')
     parser.add_argument('-uncategorized', action='store_true',
                         help='show all uncategorized symbols',
                         dest='show_uncategorized',
                         default=False)
     parser.add_argument('bin', help='the binary')
     parser.set_defaults(use_csv=False)

     args = parser.parse_args(arguments)
     if args.use_csv:
         global useCSV
         useCSV = True
         print("Using csv")

     if args.group_specializations:
         global groupSpecializations
         groupSpecializations = True

     if args.list_group_specializations:
         global listGroupSpecializations
         listGroupSpecializations = True

     segments = parse_segments(args.bin, args.arch)

     if args.build_categories:
         categorize(segments)
     elif args.show_uncategorized:
         uncategorized(segments)
     elif args.list_category:
         list_category(segments, args.list_category)
     else:
         show_all(segments)


 class Symbol(object):
     def __init__(self, name, mangled_name, size):
         self.name = name
         self.mangled_name = mangled_name
         self.count = 1
         self.size = int(size)


 def get_symbol_size(sym):
     return sym.size


 class Segment(object):
     def __init__(self, name):
         self.name = name
         self.sections = []


 class Section(object):
     def __init__(self, name, size):
         self.name = name
         self.size = size
         self.symbols = []


 class Category(object):
     def __init__(self, name):
         self.name = name
         self.size = 0
         self.symbols = []

     def add(self, symbol):
         self.symbols.append(symbol)
         self.size += symbol.size


 class GenericSpecializationGroupKey(object):
     def __init__(self, module_name, type_name, specialization):
         self.module_name = module_name
         self.type_name = type_name
         self.specialization = specialization

     def __hash__(self):
         return hash((self.module_name, self.type_name, self.specialization))

     def __eq__(self, other):
         return (self.module_name == other.module_name
                 and self.type_name == other.type_name
                 and self.specialization == other.specialization)


 class GenericSpecialization(object):
     def __init__(self, module_name, type_name, specialization):
         self.module_name = module_name
         self.type_name = type_name
         self.specialization = specialization
         self.size = 0
         self.symbols = []

     def add(self, symbol):
         self.symbols.append(symbol)
         self.size += symbol.size

     def list_symbols(self):
         sorted_symbols = []
         for symbol in self.symbols:
             sorted_symbols.append((symbol.name, symbol.size))
         sorted_symbols.sort(key=lambda entry: entry[1], reverse=True)
         for symbol in sorted_symbols:
             print("%9d %s" % (symbol[1], symbol[0]))


 class Categories(object):
     def __init__(self):
         self.category_matching = [
             ['Objective-C function', re.compile(r'.*[+-]\[')],
             ['C++', re.compile(r'_+swift')],
             ['Generic specialization of stdlib',
                 re.compile(
                     r'.*generic specialization.* of ' +
                     r'(static )?(\(extension in Swift\):)?Swift\.'
                 )],
             ['Generic specialization',
                 re.compile(r'.*generic specialization')],
             ['Merged function', re.compile(r'merged ')],
             ['Key path', re.compile(r'key path')],
             ['Function signature specialization',
                 re.compile(r'function signature specialization')],
             ['Reabstraction thunk helper',
                 re.compile(r'reabstraction thunk helper')],
             ['vtable thunk', re.compile(r'vtable thunk for')],
             ['@objc thunk', re.compile(r'@objc')],
             ['@nonobjc thunk', re.compile(r'@nonobjc')],
             ['Value witness', re.compile(r'.*value witness for')],
             ['Block copy helper', re.compile(r'_block_copy_helper')],
             ['Block destroy helper', re.compile(r'_block_destroy_helper')],
             ['Block literal global', re.compile(r'___block_literal_global')],
             ['Destroy helper block', re.compile(r'___destroy_helper_block')],
             ['Copy helper block', re.compile(r'___copy_helper_block')],
             ['Object destroy', re.compile(r'_objectdestroy')],
             ['Partial apply forwarder',
                 re.compile(r'partial apply forwarder')],
             ['Closure function', re.compile(r'closure #')],
             ['ObjC metadata update function',
                 re.compile(r'ObjC metadata update function for')],
             ['Variable initialization expression',
                 re.compile(r'variable initialization expression of')],
             ['Global initialization', re.compile(r'_globalinit_')],
             ['Unnamed', re.compile(r'___unnamed_')],
             ['Dyld stubs', re.compile(r'DYLD-STUB\$')],
             ['Witness table accessor',
                 re.compile(r'.*witness table accessor for')],
             ['Protocol witness', re.compile(r'protocol witness for')],
             ['Outlined variable', re.compile(r'outlined variable #')],
             ['Outlined value function (copy,destroy,release...)',
                 re.compile(r'outlined')],
             ['_symbolic', re.compile(r'_symbolic')],
             ['_associated conformance',
                 re.compile(r'_associated conformance')],
             ['Direct field offset', re.compile(r'direct field offset for')],
             ['Value witness tables', re.compile(r'.*value witness table')],
             ['Protocol witness table',
                 re.compile(r'.*protocol witness table for')],
             ['Protocol conformance descriptor',
                 re.compile(r'protocol conformance descriptor for')],
             ['Lazy protocol witness table cache var',
                 re.compile(
                     r'lazy protocol witness table cache variable for type')],
             ['Nominal type descriptor',
                 re.compile(r'nominal type descriptor for')],
             ['ObjC class', re.compile(r'_OBJC_CLASS_')],
             ['ObjC metaclass', re.compile(r'_OBJC_METACLASS')],
             ['ObjC ivar', re.compile(r'_OBJC_IVAR')],
             ['Metaclass', re.compile(r'metaclass for')],
             ['Block descriptor', re.compile(r'_+block_descriptor')],
             ['Extension descriptor', re.compile(r'extension descriptor')],
             ['Module descriptor', re.compile(r'module descriptor')],
             ['Associated type descriptor',
                 re.compile(r'associated type descriptor for')],
             ['Associated conformance descriptor',
                 re.compile(r'associated conformance descriptor for')],
             ['Protocol descriptor', re.compile(r'protocol descriptor for')],
             ['Base conformance descriptor',
                 re.compile(r'base conformance descriptor for')],
             ['Protocol requirements base descriptor',
                 re.compile(r'protocol requirements base descriptor for')],
             ['Property descriptor', re.compile(r'property descriptor for')],
             ['Method descriptor', re.compile(r'method descriptor for')],
             ['Anonymous descriptor', re.compile(r'anonymous descriptor')],
             ['Type metadata accessor',
                 re.compile(r'.*type metadata accessor')],
             ['Type metadata', re.compile(r'.*type metadata')],
             ['Reflection metadata descriptor',
                 re.compile(r'reflection metadata .* descriptor')],
         ]

         self.category_mangled_matching = [
             ['Swift variable storage', re.compile(r'^_\$s.*[v][p][Z]?$')],
             ['Swift constructor', re.compile(r'^_\$s.*[f][cC]$')],
             ['Swift initializer', re.compile(r'^_\$s.*[f][ie]$')],
             ['Swift destructor/destroyer', re.compile(r'^_\$s.*[f][dDE]$')],
             ['Swift getter', re.compile(r'^_\$s.*[iv][gG]$')],
             ['Swift setter', re.compile(r'^_\$s.*[iv][swW]$')],
             ['Swift materializeForSet', re.compile(r'^_\$s.*[iv][m]$')],
             ['Swift modify', re.compile(r'^_\$s.*[iv][M]$')],
             ['Swift read', re.compile(r'^_\$s.*[iv][r]$')],
             ['Swift addressor', re.compile(r'^_\$s.*[iv][al][uOop]$')],
             ['Swift function', re.compile(r'^_\$s.*F$')],
             ['Swift unknown', re.compile(r'^_\$s.*')],
         ]
         self.categories = {}
         self.specializations = {}
         self.specialization_matcher = re.compile(
             r'.*generic specialization <(?P<spec_list>.*)> of' +
             r' (static )?(\(extension in Swift\):)?(?P<module_name>[^.]*)\.' +
             r'(?:(?P<first_type>[^.^(^<]*)\.){0,1}' +
             r'(?:(?P<last_type>[^.^(^<]*)\.)*(?P<function_name>[^(^<]*)'
         )
         self.single_stdlib_specialized_type_matcher = re.compile(
             r'(Swift\.)?[^,^.]*$'
         )
         self.two_specialized_stdlib_types_matcher = re.compile(
             r'(Swift\.)?[^,^.]*, (Swift\.)?[^,^.]*$'
         )
         self.single_specialized_foundation_type_matcher = re.compile(
             r'(Foundation\.)?[^,^.]*$'
         )
         self.two_specialized_foundation_types_matcher = re.compile(
             r'(Swift\.)?[^,^.]*, (Foundation\.)?[^,^.]*$'
         )
         self.two_specialized_foundation_types_matcher2 = re.compile(
             r'(Foundation\.)?[^,^.]*, (Foundation\.)?[^,^.]*$'
         )
         self.two_specialized_foundation_types_matcher3 = re.compile(
             r'(Foundation\.)?[^,^.]*, (Swift\.)?[^,^.]*$'
         )
         self.array_type_matcher = re.compile(r'Array')
         self.dictionary = re.compile(r'Array')
         self.single_specialized_types_matcher = re.compile(
             r'(?P<module_name>[^,^.]*)\.([^,^.]*\.)*(?P<type_name>[^,^.]*)$'
         )
         self.is_class_type_dict = {}
         self.stdlib_and_other_type_matcher = re.compile(
             r'(Swift\.)?[^,^.]*, (?P<module_name>[^,^.]*)\.(?P<type_name>[^,^.]*)$'
         )
         self.foundation_and_other_type_matcher = re.compile(
             r'(Foundation\.)?[^,^.]*, (?P<module_name>[^,^.]*)\.' +
             r'(?P<type_name>[^,^.]*)$'
         )

     def categorize_by_name(self, symbol):
         for c in self.category_matching:
             if c[1].match(symbol.name):
                 return c[0]
         return None

     def categorize_by_mangled_name(self, symbol):
         for c in self.category_mangled_matching:
             if c[1].match(symbol.mangled_name):
                 return c[0]
         return None

     def add_symbol(self, category_name, symbol):
         existing_category = self.categories.get(category_name)
         if existing_category:
             existing_category.add(symbol)
         else:
             new_category = Category(category_name)
             new_category.add(symbol)
             self.categories[category_name] = new_category

     def add(self, symbol):
         category_name = self.categorize_by_name(symbol)
         if category_name:
             self.add_symbol(category_name, symbol)
             if (groupSpecializations and
                     category_name == 'Generic specialization of stdlib'):
                 self.add_specialization(symbol)
             return
         category_name = self.categorize_by_mangled_name(symbol)
         if category_name:
             self.add_symbol(category_name, symbol)
         else:
             self.add_symbol('Unknown', symbol)
         if (groupSpecializations and
                 category_name == 'Generic specialization of stdlib'):
             self.add_specialization(symbol)

     def is_class_type_(self, type_name, mangled_name):
         match_class_name = str(len(type_name)) + type_name + 'C'
         if match_class_name in mangled_name:
             return True
         return False

     def is_class_type(self, type_name, mangled_name):
         existing_categorization = self.is_class_type_dict.get(type_name, 3)
         if existing_categorization == 3:
             is_class = self.is_class_type_(type_name, mangled_name)
             self.is_class_type_dict[type_name] = is_class
             return is_class
         else:
             return existing_categorization

     def is_dictionary_like_type(self, type_name):
         if 'Dictionary' in type_name:
             return True
         if 'Set' in type_name:
             return True
         return False

     def group_library_types(self, module, type_name, specialization, mangled_name):
         if module != 'Swift':
             return module, type_name, specialization
         if self.single_stdlib_specialized_type_matcher.match(specialization):
             return module, 'stdlib', 'stdlib'
         if self.two_specialized_stdlib_types_matcher.match(specialization):
             return module, 'stdlib', 'stdlib'
         if self.single_specialized_foundation_type_matcher.match(specialization):
             return module, 'stdlib', 'foundation'
         if self.two_specialized_foundation_types_matcher.match(specialization):
             return module, 'stdlib', 'foundation'
         if self.two_specialized_foundation_types_matcher2.match(specialization):
             return module, 'stdlib', 'foundation'
         if self.two_specialized_foundation_types_matcher3.match(specialization):
             return module, 'stdlib', 'foundation'
         single_spec = self.single_specialized_types_matcher.match(specialization)
         if single_spec:
             is_class = self.is_class_type(single_spec.group('type_name'), mangled_name)
             is_dict = type_name is not None and self.is_dictionary_like_type(type_name)
             if not is_dict and is_class:
                 return module, 'stdlib', 'class'
             if is_dict and is_class:
                 return module, 'stdlib', 'class(dict)'
         stdlib_other_spec = self.stdlib_and_other_type_matcher.match(specialization)
         if stdlib_other_spec:
             is_class = self.is_class_type(stdlib_other_spec.group('type_name'),
                                           mangled_name)
             if is_class:
                 return module, 'stdlib', 'stdlib, class'
         foundation_other_spec = self.foundation_and_other_type_matcher.match(
             specialization)
         if foundation_other_spec:
             is_class = self.is_class_type(foundation_other_spec.group('type_name'),
                                           mangled_name)
             if is_class:
                 return module, 'stdlib', 'foundation, class'
         return module, 'stdlib', 'other'

     def add_specialization(self, symbol):
         specialization_match = self.specialization_matcher.match(symbol.name)
         if specialization_match:
             module = specialization_match.group('module_name')
             type_name = specialization_match.group('first_type')
             specialization = specialization_match.group('spec_list')
             module, type_name, specialization = self.group_library_types(
                 module, type_name, specialization, symbol.mangled_name)
             key = GenericSpecializationGroupKey(module, type_name, specialization)
             existing_specialization = self.specializations.get(key)
             if existing_specialization:
                 existing_specialization.add(symbol)
             else:
                 new_specialization = GenericSpecialization(module, type_name,
                                                            specialization)
                 new_specialization.add(symbol)
                 self.specializations[key] = new_specialization
         else:
             print(symbol.name)
             print('not matched')
         return

     def print_specializations(self):
         values = self.specializations.values()
         sorted_specializations = []
         for v in values:
             sorted_specializations.append(v)

         if not sorted_specializations:
             return None
         else:
             sorted_specializations.sort(key=lambda entry: entry.specialization)
             sorted_specializations.sort(key=lambda entry: entry.type_name)
             sorted_specializations.sort(key=lambda entry: entry.module_name)
             print("Specialization info")
             for spec in sorted_specializations:
                 print("%20s.%s %20s %8d" % (spec.module_name, spec.type_name,
                                             spec.specialization, spec.size))
                 if listGroupSpecializations:
                     spec.list_symbols()
             print("")
             return None

     def categorize(self, symbols):
         for sym in symbols:
             self.add(sym)

     def print_summary(self, section_size):
         names = [c[0] for c in self.category_matching]
         names.extend([c[0] for c in self.category_mangled_matching])
         names.append('Unknown')
         total_size = 0
         sorted_categories = []
         for name in names:
             category = self.categories.get(name)
             size = 0
             if category:
                 size = category.size
             total_size += size
             if size > 0:
                 sorted_categories.append(
                     (name, size, (float(size) * 100) / section_size))
         sorted_categories.sort(key=lambda entry: entry[1], reverse=True)
         for category in sorted_categories:
             if useCSV:
                 print("%s;%d;%.2f%%" %
                       (category[0], category[1], category[2]))
             else:
                 print("%60s: %8d (%6.2f%%)" %
                       (category[0], category[1], category[2]))
         print("%60s: %8d (%6.2f%%)" % ('TOTAL', total_size, float(100)))

     def uncatorizedSymbols(self):
         category = self.categories.get('Unknown')
         if category:
             return category.symbols
         return None

     def print_uncategorizedSymbols(self):
         syms = self.uncatorizedSymbols()
         if syms:
             for symbol in syms:
                 print(symbol.mangled_name + " " + symbol.name + " " +
                       str(symbol.size))

     def print_category(self, category):
         category = self.categories.get(category)
         if category:
             if category.symbols:
                 sorted_symbols = sorted(category.symbols, key=get_symbol_size)
                 for sym in sorted_symbols:
                     print('%8d %s %s' % (sym.size, sym.name, sym.mangled_name))

     def has_category(self, category):
         category = self.categories.get(category)
         if category:
             if category.symbols:
                 return True
         return False


 def parse_segments(path, arch):
     mangled = subprocess.check_output(
         ['symbols', '-noSources', '-noDemangling', '-arch', arch, path])
     demangle = subprocess.Popen(
         ['xcrun', 'swift-demangle'], stdin=subprocess.PIPE,
         stdout=subprocess.PIPE)
     demangled = demangle.communicate(mangled)[0]
     symbols = {}
     segments = []
     segment_regex = re.compile(
         r"^        0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
         r"(?P<name>.+?) (?P<name2>.+?)$")
     object_file_segment_regex = re.compile(
         r"^        0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\)  "
         r"SEGMENT$")
     section_regex = re.compile(
         r"^            0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
         r"(?P<name>.+?) (?P<name2>.+?)$")
     symbol_regex = re.compile(
         r"^                0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
         r"(?P<name>.+?) \[[^\]]+\] $")

     mangled_lines = mangled.splitlines()
     current_line_number = 0

     for line in demangled.splitlines():
         mangled_line = mangled_lines[current_line_number]
         current_line_number += 1

         # Match a segment entry.
         segment_match = segment_regex.match(line)
         if segment_match:
             new_segment = Segment(segment_match.group('name'))
             segments.append(new_segment)
             continue

         object_file_segment_match = object_file_segment_regex.match(line)
         if object_file_segment_match:
             new_segment = Segment("SEGMENT")
             segments.append(new_segment)
             continue

         # Match a section entry.
         section_match = section_regex.match(line)
         if section_match:
             new_section = Section(section_match.group('name2'),
                                   int(section_match.group('size'), 16))
             segments[-1].sections.append(new_section)
             continue

         # Match a symbol entry.
         symbol_match = symbol_regex.match(line)
         if not symbol_match:
             continue
         mangled_symbol_match = symbol_regex.match(mangled_line)
         if not mangled_symbol_match:
             print('mangled and demangled mismatch')
             print(mangled_line)
             print(line)
             assert False

         symbol = Symbol(symbol_match.group('name'),
                         mangled_symbol_match.group('name'),
                         int(symbol_match.group('size'), 16))
         existing = symbols.get(symbol.name)
         if existing:
             existing.size += symbol.size
         else:
             symbols[symbol.name] = symbol
             segments[-1].sections[-1].symbols.append(symbol)

     return segments


 def show_all(segments):
     for segment in segments:
         for section in segment.sections:
             symbols = section.symbols
             for sym in symbols:
                 print(str(sym.size) + ' ' + sym.name + ' ' + sym.mangled_name)


 def categorize(segments):
     for segment in segments:
         for section in segment.sections:
             print('Section %52s: %8d' %
                   (segment.name + ';' + section.name, section.size))
             symbols = section.symbols
             categories = Categories()
             categories.categorize(symbols)
             categories.print_summary(section.size)
             print('')
             if groupSpecializations:
                 categories.print_specializations()


 def uncategorized(segments):
     for segment in segments:
         for section in segment.sections:
             symbols = section.symbols
             categories = Categories()
             categories.categorize(symbols)
             categories.print_uncategorizedSymbols()


 def list_category(segments, category):
     for segment in segments:
         for section in segment.sections:
             symbols = section.symbols
             categories = Categories()
             categories.categorize(symbols)
             if categories.has_category(category):
                 print('Section %22s: %8d' %
                       (segment.name + ';' + section.name, section.size))
                 categories.print_category(category)
                 print('')
                 if groupSpecializations:
                     categories.print_specializations()


 if __name__ == '__main__':
     sys.exit(main(sys.argv[1:]))
	#!/usr/bin/env python

	import argparse
	import re
	import subprocess
	import sys


	useCSV = False
	groupSpecializations = False
	listGroupSpecializations = False


	def main(arguments):
	parser = argparse.ArgumentParser(
	description='Analyze the code size in a binary')
	parser.add_argument('-arch', type=str,
	help='the arch to look at', default='arm64')
	parser.add_argument('-categorize', action='store_true',
	help='categorize symbols', dest='build_categories',
	default=False)
	parser.add_argument('-list-category', type=str,
	help='list symbols in category')
	parser.add_argument('-group-specializations', action='store_true',
	help='group specializations')
	parser.add_argument('-list-group-specializations', action='store_true',
	help='list group specializations')
	parser.add_argument('-csv', dest='use_csv', action='store_true',
	help='print results as csv')
	parser.add_argument('-uncategorized', action='store_true',
	help='show all uncategorized symbols',
	dest='show_uncategorized',
	default=False)
	parser.add_argument('bin', help='the binary')
	parser.set_defaults(use_csv=False)

	args = parser.parse_args(arguments)
	if args.use_csv:
	global useCSV
	useCSV = True
	print("Using csv")

	if args.group_specializations:
	global groupSpecializations
	groupSpecializations = True

	if args.list_group_specializations:
	global listGroupSpecializations
	listGroupSpecializations = True

	segments = parse_segments(args.bin, args.arch)

	if args.build_categories:
	categorize(segments)
	elif args.show_uncategorized:
	uncategorized(segments)
	elif args.list_category:
	list_category(segments, args.list_category)
	else:
	show_all(segments)


	class Symbol(object):
	def __init__(self, name, mangled_name, size):
	self.name = name
	self.mangled_name = mangled_name
	self.count = 1
	self.size = int(size)


	def get_symbol_size(sym):
	return sym.size


	class Segment(object):
	def __init__(self, name):
	self.name = name
	self.sections = []


	class Section(object):
	def __init__(self, name, size):
	self.name = name
	self.size = size
	self.symbols = []


	class Category(object):
	def __init__(self, name):
	self.name = name
	self.size = 0
	self.symbols = []

	def add(self, symbol):
	self.symbols.append(symbol)
	self.size += symbol.size


	class GenericSpecializationGroupKey(object):
	def __init__(self, module_name, type_name, specialization):
	self.module_name = module_name
	self.type_name = type_name
	self.specialization = specialization

	def __hash__(self):
	return hash((self.module_name, self.type_name, self.specialization))

	def __eq__(self, other):
	return (self.module_name == other.module_name
	and self.type_name == other.type_name
	and self.specialization == other.specialization)


	class GenericSpecialization(object):
	def __init__(self, module_name, type_name, specialization):
	self.module_name = module_name
	self.type_name = type_name
	self.specialization = specialization
	self.size = 0
	self.symbols = []

	def add(self, symbol):
	self.symbols.append(symbol)
	self.size += symbol.size

	def list_symbols(self):
	sorted_symbols = []
	for symbol in self.symbols:
	sorted_symbols.append((symbol.name, symbol.size))
	sorted_symbols.sort(key=lambda entry: entry[1], reverse=True)
	for symbol in sorted_symbols:
	print("%9d %s" % (symbol[1], symbol[0]))


	class Categories(object):
	def __init__(self):
	self.category_matching = [
	['Objective-C function', re.compile(r'.*[+-]\[')],
	['C++', re.compile(r'_+swift')],
	['Generic specialization of stdlib',
	re.compile(
	r'.generic specialization. of ' +
	r'(static )?(\(extension in Swift\):)?Swift\.'
	)],
	['Generic specialization',
	re.compile(r'.*generic specialization')],
	['Merged function', re.compile(r'merged ')],
	['Key path', re.compile(r'key path')],
	['Function signature specialization',
	re.compile(r'function signature specialization')],
	['Reabstraction thunk helper',
	re.compile(r'reabstraction thunk helper')],
	['vtable thunk', re.compile(r'vtable thunk for')],
	['@objc thunk', re.compile(r'@objc')],
	['@nonobjc thunk', re.compile(r'@nonobjc')],
	['Value witness', re.compile(r'.*value witness for')],
	['Block copy helper', re.compile(r'_block_copy_helper')],
	['Block destroy helper', re.compile(r'_block_destroy_helper')],
	['Block literal global', re.compile(r'___block_literal_global')],
	['Destroy helper block', re.compile(r'___destroy_helper_block')],
	['Copy helper block', re.compile(r'___copy_helper_block')],
	['Object destroy', re.compile(r'_objectdestroy')],
	['Partial apply forwarder',
	re.compile(r'partial apply forwarder')],
	['Closure function', re.compile(r'closure #')],
	['ObjC metadata update function',
	re.compile(r'ObjC metadata update function for')],
	['Variable initialization expression',
	re.compile(r'variable initialization expression of')],
	['Global initialization', re.compile(r'_globalinit_')],
	['Unnamed', re.compile(r'___unnamed_')],
	['Dyld stubs', re.compile(r'DYLD-STUB\$')],
	['Witness table accessor',
	re.compile(r'.*witness table accessor for')],
	['Protocol witness', re.compile(r'protocol witness for')],
	['Outlined variable', re.compile(r'outlined variable #')],
	['Outlined value function (copy,destroy,release...)',
	re.compile(r'outlined')],
	['_symbolic', re.compile(r'_symbolic')],
	['_associated conformance',
	re.compile(r'_associated conformance')],
	['Direct field offset', re.compile(r'direct field offset for')],
	['Value witness tables', re.compile(r'.*value witness table')],
	['Protocol witness table',
	re.compile(r'.*protocol witness table for')],
	['Protocol conformance descriptor',
	re.compile(r'protocol conformance descriptor for')],
	['Lazy protocol witness table cache var',
	re.compile(
	r'lazy protocol witness table cache variable for type')],
	['Nominal type descriptor',
	re.compile(r'nominal type descriptor for')],
	['ObjC class', re.compile(r'_OBJC_CLASS_')],
	['ObjC metaclass', re.compile(r'_OBJC_METACLASS')],
	['ObjC ivar', re.compile(r'_OBJC_IVAR')],
	['Metaclass', re.compile(r'metaclass for')],
	['Block descriptor', re.compile(r'_+block_descriptor')],
	['Extension descriptor', re.compile(r'extension descriptor')],
	['Module descriptor', re.compile(r'module descriptor')],
	['Associated type descriptor',
	re.compile(r'associated type descriptor for')],
	['Associated conformance descriptor',
	re.compile(r'associated conformance descriptor for')],
	['Protocol descriptor', re.compile(r'protocol descriptor for')],
	['Base conformance descriptor',
	re.compile(r'base conformance descriptor for')],
	['Protocol requirements base descriptor',
	re.compile(r'protocol requirements base descriptor for')],
	['Property descriptor', re.compile(r'property descriptor for')],
	['Method descriptor', re.compile(r'method descriptor for')],
	['Anonymous descriptor', re.compile(r'anonymous descriptor')],
	['Type metadata accessor',
	re.compile(r'.*type metadata accessor')],
	['Type metadata', re.compile(r'.*type metadata')],
	['Reflection metadata descriptor',
	re.compile(r'reflection metadata .* descriptor')],
	]

	self.category_mangled_matching = [
	['Swift variable storage', re.compile(r'^_\$s.*[v][p][Z]?$')],
	['Swift constructor', re.compile(r'^_\$s.*[f][cC]$')],
	['Swift initializer', re.compile(r'^_\$s.*[f][ie]$')],
	['Swift destructor/destroyer', re.compile(r'^_\$s.*[f][dDE]$')],
	['Swift getter', re.compile(r'^_\$s.*[iv][gG]$')],
	['Swift setter', re.compile(r'^_\$s.*[iv][swW]$')],
	['Swift materializeForSet', re.compile(r'^_\$s.*[iv][m]$')],
	['Swift modify', re.compile(r'^_\$s.*[iv][M]$')],
	['Swift read', re.compile(r'^_\$s.*[iv][r]$')],
	['Swift addressor', re.compile(r'^_\$s.*[iv][al][uOop]$')],
	['Swift function', re.compile(r'^_\$s.*F$')],
	['Swift unknown', re.compile(r'^_\$s.*')],
	]
	self.categories = {}
	self.specializations = {}
	self.specialization_matcher = re.compile(
	r'.generic specialization <(?P<spec_list>.)> of' +
	r' (static )?(\(extension in Swift\):)?(?P<module_name>[^.]*)\.' +
	r'(?:(?P<first_type>[^.^(^<]*)\.){0,1}' +
	r'(?:(?P<last_type>[^.^(^<])\.)(?P<function_name>[^(^<]*)'
	)
	self.single_stdlib_specialized_type_matcher = re.compile(
	r'(Swift\.)?[^,^.]*$'
	)
	self.two_specialized_stdlib_types_matcher = re.compile(
	r'(Swift\.)?[^,^.], (Swift\.)?[^,^.]$'
	)
	self.single_specialized_foundation_type_matcher = re.compile(
	r'(Foundation\.)?[^,^.]*$'
	)
	self.two_specialized_foundation_types_matcher = re.compile(
	r'(Swift\.)?[^,^.], (Foundation\.)?[^,^.]$'
	)
	self.two_specialized_foundation_types_matcher2 = re.compile(
	r'(Foundation\.)?[^,^.], (Foundation\.)?[^,^.]$'
	)
	self.two_specialized_foundation_types_matcher3 = re.compile(
	r'(Foundation\.)?[^,^.], (Swift\.)?[^,^.]$'
	)
	self.array_type_matcher = re.compile(r'Array')
	self.dictionary = re.compile(r'Array')
	self.single_specialized_types_matcher = re.compile(
	r'(?P<module_name>[^,^.])\.([^,^.]\.)(?P<type_name>[^,^.])$'
	)
	self.is_class_type_dict = {}
	self.stdlib_and_other_type_matcher = re.compile(
	r'(Swift\.)?[^,^.], (?P<module_name>[^,^.])\.(?P<type_name>[^,^.]*)$'
	)
	self.foundation_and_other_type_matcher = re.compile(
	r'(Foundation\.)?[^,^.], (?P<module_name>[^,^.])\.' +
	r'(?P<type_name>[^,^.]*)$'
	)

	def categorize_by_name(self, symbol):
	for c in self.category_matching:
	if c[1].match(symbol.name):
	return c[0]
	return None

	def categorize_by_mangled_name(self, symbol):
	for c in self.category_mangled_matching:
	if c[1].match(symbol.mangled_name):
	return c[0]
	return None

	def add_symbol(self, category_name, symbol):
	existing_category = self.categories.get(category_name)
	if existing_category:
	existing_category.add(symbol)
	else:
	new_category = Category(category_name)
	new_category.add(symbol)
	self.categories[category_name] = new_category

	def add(self, symbol):
	category_name = self.categorize_by_name(symbol)
	if category_name:
	self.add_symbol(category_name, symbol)
	if (groupSpecializations and
	category_name == 'Generic specialization of stdlib'):
	self.add_specialization(symbol)
	return
	category_name = self.categorize_by_mangled_name(symbol)
	if category_name:
	self.add_symbol(category_name, symbol)
	else:
	self.add_symbol('Unknown', symbol)
	if (groupSpecializations and
	category_name == 'Generic specialization of stdlib'):
	self.add_specialization(symbol)

	def is_class_type_(self, type_name, mangled_name):
	match_class_name = str(len(type_name)) + type_name + 'C'
	if match_class_name in mangled_name:
	return True
	return False

	def is_class_type(self, type_name, mangled_name):
	existing_categorization = self.is_class_type_dict.get(type_name, 3)
	if existing_categorization == 3:
	is_class = self.is_class_type_(type_name, mangled_name)
	self.is_class_type_dict[type_name] = is_class
	return is_class
	else:
	return existing_categorization

	def is_dictionary_like_type(self, type_name):
	if 'Dictionary' in type_name:
	return True
	if 'Set' in type_name:
	return True
	return False

	def group_library_types(self, module, type_name, specialization, mangled_name):
	if module != 'Swift':
	return module, type_name, specialization
	if self.single_stdlib_specialized_type_matcher.match(specialization):
	return module, 'stdlib', 'stdlib'
	if self.two_specialized_stdlib_types_matcher.match(specialization):
	return module, 'stdlib', 'stdlib'
	if self.single_specialized_foundation_type_matcher.match(specialization):
	return module, 'stdlib', 'foundation'
	if self.two_specialized_foundation_types_matcher.match(specialization):
	return module, 'stdlib', 'foundation'
	if self.two_specialized_foundation_types_matcher2.match(specialization):
	return module, 'stdlib', 'foundation'
	if self.two_specialized_foundation_types_matcher3.match(specialization):
	return module, 'stdlib', 'foundation'
	single_spec = self.single_specialized_types_matcher.match(specialization)
	if single_spec:
	is_class = self.is_class_type(single_spec.group('type_name'), mangled_name)
	is_dict = type_name is not None and self.is_dictionary_like_type(type_name)
	if not is_dict and is_class:
	return module, 'stdlib', 'class'
	if is_dict and is_class:
	return module, 'stdlib', 'class(dict)'
	stdlib_other_spec = self.stdlib_and_other_type_matcher.match(specialization)
	if stdlib_other_spec:
	is_class = self.is_class_type(stdlib_other_spec.group('type_name'),
	mangled_name)
	if is_class:
	return module, 'stdlib', 'stdlib, class'
	foundation_other_spec = self.foundation_and_other_type_matcher.match(
	specialization)
	if foundation_other_spec:
	is_class = self.is_class_type(foundation_other_spec.group('type_name'),
	mangled_name)
	if is_class:
	return module, 'stdlib', 'foundation, class'
	return module, 'stdlib', 'other'

	def add_specialization(self, symbol):
	specialization_match = self.specialization_matcher.match(symbol.name)
	if specialization_match:
	module = specialization_match.group('module_name')
	type_name = specialization_match.group('first_type')
	specialization = specialization_match.group('spec_list')
	module, type_name, specialization = self.group_library_types(
	module, type_name, specialization, symbol.mangled_name)
	key = GenericSpecializationGroupKey(module, type_name, specialization)
	existing_specialization = self.specializations.get(key)
	if existing_specialization:
	existing_specialization.add(symbol)
	else:
	new_specialization = GenericSpecialization(module, type_name,
	specialization)
	new_specialization.add(symbol)
	self.specializations[key] = new_specialization
	else:
	print(symbol.name)
	print('not matched')
	return

	def print_specializations(self):
	values = self.specializations.values()
	sorted_specializations = []
	for v in values:
	sorted_specializations.append(v)

	if not sorted_specializations:
	return None
	else:
	sorted_specializations.sort(key=lambda entry: entry.specialization)
	sorted_specializations.sort(key=lambda entry: entry.type_name)
	sorted_specializations.sort(key=lambda entry: entry.module_name)
	print("Specialization info")
	for spec in sorted_specializations:
	print("%20s.%s %20s %8d" % (spec.module_name, spec.type_name,
	spec.specialization, spec.size))
	if listGroupSpecializations:
	spec.list_symbols()
	print("")
	return None

	def categorize(self, symbols):
	for sym in symbols:
	self.add(sym)

	def print_summary(self, section_size):
	names = [c[0] for c in self.category_matching]
	names.extend([c[0] for c in self.category_mangled_matching])
	names.append('Unknown')
	total_size = 0
	sorted_categories = []
	for name in names:
	category = self.categories.get(name)
	size = 0
	if category:
	size = category.size
	total_size += size
	if size > 0:
	sorted_categories.append(
	(name, size, (float(size) * 100) / section_size))
	sorted_categories.sort(key=lambda entry: entry[1], reverse=True)
	for category in sorted_categories:
	if useCSV:
	print("%s;%d;%.2f%%" %
	(category[0], category[1], category[2]))
	else:
	print("%60s: %8d (%6.2f%%)" %
	(category[0], category[1], category[2]))
	print("%60s: %8d (%6.2f%%)" % ('TOTAL', total_size, float(100)))

	def uncatorizedSymbols(self):
	category = self.categories.get('Unknown')
	if category:
	return category.symbols
	return None

	def print_uncategorizedSymbols(self):
	syms = self.uncatorizedSymbols()
	if syms:
	for symbol in syms:
	print(symbol.mangled_name + " " + symbol.name + " " +
	str(symbol.size))

	def print_category(self, category):
	category = self.categories.get(category)
	if category:
	if category.symbols:
	sorted_symbols = sorted(category.symbols, key=get_symbol_size)
	for sym in sorted_symbols:
	print('%8d %s %s' % (sym.size, sym.name, sym.mangled_name))

	def has_category(self, category):
	category = self.categories.get(category)
	if category:
	if category.symbols:
	return True
	return False


	def parse_segments(path, arch):
	mangled = subprocess.check_output(
	['symbols', '-noSources', '-noDemangling', '-arch', arch, path])
	demangle = subprocess.Popen(
	['xcrun', 'swift-demangle'], stdin=subprocess.PIPE,
	stdout=subprocess.PIPE)
	demangled = demangle.communicate(mangled)[0]
	symbols = {}
	segments = []
	segment_regex = re.compile(
	r"^ 0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
	r"(?P<name>.+?) (?P<name2>.+?)$")
	object_file_segment_regex = re.compile(
	r"^ 0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
	r"SEGMENT$")
	section_regex = re.compile(
	r"^ 0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
	r"(?P<name>.+?) (?P<name2>.+?)$")
	symbol_regex = re.compile(
	r"^ 0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
	r"(?P<name>.+?) \[[^\]]+\] $")

	mangled_lines = mangled.splitlines()
	current_line_number = 0

	for line in demangled.splitlines():
	mangled_line = mangled_lines[current_line_number]
	current_line_number += 1

	# Match a segment entry.
	segment_match = segment_regex.match(line)
	if segment_match:
	new_segment = Segment(segment_match.group('name'))
	segments.append(new_segment)
	continue

	object_file_segment_match = object_file_segment_regex.match(line)
	if object_file_segment_match:
	new_segment = Segment("SEGMENT")
	segments.append(new_segment)
	continue

	# Match a section entry.
	section_match = section_regex.match(line)
	if section_match:
	new_section = Section(section_match.group('name2'),
	int(section_match.group('size'), 16))
	segments[-1].sections.append(new_section)
	continue

	# Match a symbol entry.
	symbol_match = symbol_regex.match(line)
	if not symbol_match:
	continue
	mangled_symbol_match = symbol_regex.match(mangled_line)
	if not mangled_symbol_match:
	print('mangled and demangled mismatch')
	print(mangled_line)
	print(line)
	assert False

	symbol = Symbol(symbol_match.group('name'),
	mangled_symbol_match.group('name'),
	int(symbol_match.group('size'), 16))
	existing = symbols.get(symbol.name)
	if existing:
	existing.size += symbol.size
	else:
	symbols[symbol.name] = symbol
	segments[-1].sections[-1].symbols.append(symbol)

	return segments


	def show_all(segments):
	for segment in segments:
	for section in segment.sections:
	symbols = section.symbols
	for sym in symbols:
	print(str(sym.size) + ' ' + sym.name + ' ' + sym.mangled_name)


	def categorize(segments):
	for segment in segments:
	for section in segment.sections:
	print('Section %52s: %8d' %
	(segment.name + ';' + section.name, section.size))
	symbols = section.symbols
	categories = Categories()
	categories.categorize(symbols)
	categories.print_summary(section.size)
	print('')
	if groupSpecializations:
	categories.print_specializations()


	def uncategorized(segments):
	for segment in segments:
	for section in segment.sections:
	symbols = section.symbols
	categories = Categories()
	categories.categorize(symbols)
	categories.print_uncategorizedSymbols()


	def list_category(segments, category):
	for segment in segments:
	for section in segment.sections:
	symbols = section.symbols
	categories = Categories()
	categories.categorize(symbols)
	if categories.has_category(category):
	print('Section %22s: %8d' %
	(segment.name + ';' + section.name, section.size))
	categories.print_category(category)
	print('')
	if groupSpecializations:
	categories.print_specializations()


	if __name__ == '__main__':
	sys.exit(main(sys.argv[1:]))