build/images/elfinfo.py - fuchsia/ - Git at Google

 #!/usr/bin/env python
 # Copyright 2017 The Fuchsia Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 from contextlib import contextmanager
 from collections import namedtuple
 import mmap
 import os
 import struct
 import uuid

 # Standard ELF constants.
 ELFMAG = '\x7fELF'
 EI_CLASS = 4
 ELFCLASS32 = 1
 ELFCLASS64 = 2
 EI_DATA = 5
 ELFDATA2LSB = 1
 ELFDATA2MSB = 2
 EM_386 = 3
 EM_ARM = 40
 EM_X86_64 = 62
 EM_AARCH64 = 183
 PT_LOAD = 1
 PT_DYNAMIC = 2
 PT_INTERP = 3
 PT_NOTE = 4
 DT_NEEDED = 1
 DT_STRTAB = 5
 DT_SONAME = 14
 NT_GNU_BUILD_ID = 3
 SHT_SYMTAB = 2


 class elf_note(
     namedtuple('elf_note', [
         'name',
         'type',
         'desc',
     ])):

     # An ELF note is identified by (name_string, type_integer).
     def ident(self):
         return (self.name, self.type)

     def is_build_id(self):
         return self.ident() == ('GNU\0', NT_GNU_BUILD_ID)

     def build_id_hex(self):
         if self.is_build_id():
             return ''.join(('%02x' % ord(byte)) for byte in self.desc)
         return None

     def __repr__(self):
         return ('elf_note(%r, %#x, <%d bytes>)' %
                 (self.name, self.type, len(self.desc)))


 def gen_elf():
     # { 'Struct1': (ELFCLASS32 fields, ELFCLASS64 fields),
     #   'Struct2': fields_same_for_both, ... }
     elf_types = {
         'Ehdr': ([
             ('e_ident', '16s'),
             ('e_type', 'H'),
             ('e_machine', 'H'),
             ('e_version', 'I'),
             ('e_entry', 'I'),
             ('e_phoff', 'I'),
             ('e_shoff', 'I'),
             ('e_flags', 'I'),
             ('e_ehsize', 'H'),
             ('e_phentsize', 'H'),
             ('e_phnum', 'H'),
             ('e_shentsize', 'H'),
             ('e_shnum', 'H'),
             ('e_shstrndx', 'H'),
         ], [
             ('e_ident', '16s'),
             ('e_type', 'H'),
             ('e_machine', 'H'),
             ('e_version', 'I'),
             ('e_entry', 'Q'),
             ('e_phoff', 'Q'),
             ('e_shoff', 'Q'),
             ('e_flags', 'I'),
             ('e_ehsize', 'H'),
             ('e_phentsize', 'H'),
             ('e_phnum', 'H'),
             ('e_shentsize', 'H'),
             ('e_shnum', 'H'),
             ('e_shstrndx', 'H'),
         ]),
         'Phdr': ([
             ('p_type', 'I'),
             ('p_offset', 'I'),
             ('p_vaddr', 'I'),
             ('p_paddr', 'I'),
             ('p_filesz', 'I'),
             ('p_memsz', 'I'),
             ('p_flags', 'I'),
             ('p_align', 'I'),
         ], [
             ('p_type', 'I'),
             ('p_flags', 'I'),
             ('p_offset', 'Q'),
             ('p_vaddr', 'Q'),
             ('p_paddr', 'Q'),
             ('p_filesz', 'Q'),
             ('p_memsz', 'Q'),
             ('p_align', 'Q'),
         ]),
         'Shdr': ([
             ('sh_name', 'L'),
             ('sh_type', 'L'),
             ('sh_flags', 'L'),
             ('sh_addr', 'L'),
             ('sh_offset', 'L'),
             ('sh_size', 'L'),
             ('sh_link', 'L'),
             ('sh_info', 'L'),
             ('sh_addralign', 'L'),
             ('sh_entsize', 'L'),
         ], [
             ('sh_name', 'L'),
             ('sh_type', 'L'),
             ('sh_flags', 'Q'),
             ('sh_addr', 'Q'),
             ('sh_offset', 'Q'),
             ('sh_size', 'Q'),
             ('sh_link', 'L'),
             ('sh_info', 'L'),
             ('sh_addralign', 'Q'),
             ('sh_entsize', 'Q'),
         ]),
         'Dyn': ([
             ('d_tag', 'i'),
             ('d_val', 'I'),
         ], [
             ('d_tag', 'q'),
             ('d_val', 'Q'),
         ]),
         'Nhdr': [
             ('n_namesz', 'I'),
             ('n_descsz', 'I'),
             ('n_type', 'I'),
         ],
         'dwarf2_line_header': [
             ('unit_length', 'L'),
             ('version', 'H'),
             ('header_length', 'L'),
             ('minimum_instruction_length', 'B'),
             ('default_is_stmt', 'B'),
             ('line_base', 'b'),
             ('line_range', 'B'),
             ('opcode_base', 'B'),
         ],
         'dwarf4_line_header': [
             ('unit_length', 'L'),
             ('version', 'H'),
             ('header_length', 'L'),
             ('minimum_instruction_length', 'B'),
             ('maximum_operations_per_instruction', 'B'),
             ('default_is_stmt', 'B'),
             ('line_base', 'b'),
             ('line_range', 'b'),
             ('opcode_base', 'B'),
         ],
     }

     # There is an accessor for each struct, e.g. Ehdr.
     # Ehdr.read is a function like Struct.unpack_from.
     # Ehdr.size is the size of the struct.
     elf_accessor = namedtuple('elf_accessor',
                               ['size', 'read', 'write', 'pack'])

     # All the accessors for a format (class, byte-order) form one elf,
     # e.g. use elf.Ehdr and elf.Phdr.
     elf = namedtuple('elf', elf_types.keys())

     def gen_accessors(is64, struct_byte_order):
         def make_accessor(type, decoder):
             return elf_accessor(
                 size=decoder.size,
                 read=lambda buffer, offset=0: type._make(
                     decoder.unpack_from(buffer, offset)),
                 write=lambda buffer, offset, x: decoder.pack_into(
                     buffer, offset, *x),
                 pack=lambda x: decoder.pack(*x))
         for name, fields in elf_types.iteritems():
             if isinstance(fields, tuple):
                 fields = fields[1 if is64 else 0]
             type = namedtuple(name, [field_name for field_name, fmt in fields])
             decoder = struct.Struct(struct_byte_order +
                                     ''.join(fmt for field_name, fmt in fields))
             yield make_accessor(type, decoder)

     for elfclass, is64 in [(ELFCLASS32, False), (ELFCLASS64, True)]:
         for elf_bo, struct_bo in [(ELFDATA2LSB, '<'), (ELFDATA2MSB, '>')]:
             yield ((chr(elfclass), chr(elf_bo)),
                    elf(*gen_accessors(is64, struct_bo)))

 # e.g. ELF[file[EI_CLASS], file[EI_DATA]].Ehdr.read(file).e_phnum
 ELF = dict(gen_elf())

 def get_elf_accessor(file):
     # If it looks like an ELF file, whip out the decoder ring.
     if file[:len(ELFMAG)] == ELFMAG:
         return ELF[file[EI_CLASS], file[EI_DATA]]
     return None


 def gen_phdrs(file, elf, ehdr):
   for pos in xrange(0, ehdr.e_phnum * elf.Phdr.size, elf.Phdr.size):
       yield elf.Phdr.read(file, ehdr.e_phoff + pos)


 def gen_shdrs(file, elf, ehdr):
   for pos in xrange(0, ehdr.e_shnum * elf.Shdr.size, elf.Shdr.size):
       yield elf.Shdr.read(file, ehdr.e_shoff + pos)


 cpu = namedtuple('cpu', [
     'e_machine',                # ELF e_machine int
     'llvm',                     # LLVM triple CPU component
     'gn',                       # GN target_cpu
 ])

 ELF_MACHINE_TO_CPU = {elf: cpu(elf, llvm, gn) for elf, llvm, gn in [
     (EM_386, 'i386', 'x86'),
     (EM_ARM, 'arm', 'arm'),
     (EM_X86_64, 'x86_64', 'x64'),
     (EM_AARCH64, 'aarch64', 'arm64'),
 ]}


 @contextmanager
 def mmapper(filename):
     """A context manager that yields (fd, file_contents) given a file name.
 This ensures that the mmap and file objects are closed at the end of the
 'with' statement."""
     fileobj = open(filename, 'rb')
     fd = fileobj.fileno()
     if os.fstat(fd).st_size == 0:
         # mmap can't handle empty files.
         try:
             yield fd, ''
         finally:
             fileobj.close()
     else:
         mmapobj = mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
         try:
             yield fd, mmapobj
         finally:
             mmapobj.close()
             fileobj.close()

 def makedirs(dirs):
     try:
         os.makedirs(dirs)
     except OSError as e:
         if e.errno != os.errno.EEXIST:
             raise e

 # elf_info objects are only created by `get_elf_info` or the `copy` or
 # `rename` methods.
 class elf_info(
     namedtuple('elf_info', [
         'filename',
         'cpu',                     # cpu tuple
         'notes',                   # list of (ident, desc): selected notes
         'build_id',                # string: lowercase hex
         'stripped',                # bool: Has no symbols or .debug_* sections
         'interp',                  # string or None: PT_INTERP (without \0)
         'soname',                  # string or None: DT_SONAME
         'needed',                  # list of strings: DT_NEEDED
     ])):

     def rename(self, filename):
         assert os.path.samefile(self.filename, filename)
         # Copy the tuple.
         clone = self.__class__(filename, *self[1:])
         # Copy the lazy state.
         clone.elf = self.elf
         if self.get_sources == clone.get_sources:
             raise Exception("uninitialized elf_info object!")
         clone.get_sources = self.get_sources
         return clone

     def copy(self):
         return self.rename(self.filename)

     # This is replaced with a closure by the creator in get_elf_info.
     def get_sources(self):
         raise Exception("uninitialized elf_info object!")

     def strip(self, stripped_filename):
         """Write stripped output to the given file unless it already exists
 with identical contents.  Returns True iff the file was changed."""
         with mmapper(self.filename) as mapped:
             fd, file = mapped
             ehdr = self.elf.Ehdr.read(file)

             stripped_ehdr = ehdr._replace(e_shoff=0, e_shnum=0, e_shstrndx=0)
             stripped_size = max(phdr.p_offset + phdr.p_filesz
                                 for phdr in gen_phdrs(file, self.elf, ehdr)
                                 if phdr.p_type == PT_LOAD)
             assert ehdr.e_phoff + (ehdr.e_phnum *
                                    ehdr.e_phentsize) <= stripped_size

             def gen_stripped_contents():
                 yield self.elf.Ehdr.pack(stripped_ehdr)
                 yield file[self.elf.Ehdr.size:stripped_size]

             def old_file_matches():
                 old_size = os.path.getsize(stripped_filename)
                 new_size = sum(len(x) for x in gen_stripped_contents())
                 if old_size != new_size:
                     return False
                 with open(stripped_filename, 'rb') as f:
                     for chunk in gen_stripped_contents():
                         if f.read(len(chunk)) != chunk:
                             return False
                 return True

             if os.path.exists(stripped_filename):
                 if old_file_matches():
                     return False
                 else:
                     os.remove(stripped_filename)
             # Create the new file with the same mode as the original.
             with os.fdopen(os.open(stripped_filename,
                                    os.O_WRONLY | os.O_CREAT | os.O_EXCL,
                                    os.fstat(fd).st_mode & 0777),
                            'wb') as stripped_file:
                 stripped_file.write(self.elf.Ehdr.pack(stripped_ehdr))
                 stripped_file.write(file[self.elf.Ehdr.size:stripped_size])
             return True

 def get_elf_info(filename, match_notes=False):
     file = None
     elf = None
     ehdr = None
     phdrs = None

     # Yields an elf_note for each note in any PT_NOTE segment.
     def gen_notes():
         def round_up_to(size):
             return ((size + 3) / 4) * 4
         for phdr in phdrs:
             if phdr.p_type == PT_NOTE:
                 pos = phdr.p_offset
                 while pos < phdr.p_offset + phdr.p_filesz:
                     nhdr = elf.Nhdr.read(file, pos)
                     pos += elf.Nhdr.size
                     name = file[pos:pos + nhdr.n_namesz]
                     pos += round_up_to(nhdr.n_namesz)
                     desc = file[pos:pos + nhdr.n_descsz]
                     pos += round_up_to(nhdr.n_descsz)
                     yield elf_note(name, nhdr.n_type, desc)

     def gen_sections():
         shdrs = list(gen_shdrs(file, elf, ehdr))
         if not shdrs:
             return
         strtab_shdr = shdrs[ehdr.e_shstrndx]
         for shdr, i in zip(shdrs, xrange(len(shdrs))):
             if i == 0:
                 continue
             assert shdr.sh_name < strtab_shdr.sh_size, (
                 "%s: invalid sh_name" % filename)
             yield (shdr,
                    extract_C_string(strtab_shdr.sh_offset + shdr.sh_name))

     # Generates '\0'-terminated strings starting at the given offset,
     # until an empty string.
     def gen_strings(start):
         while True:
             end = file.find('\0', start)
             assert end >= start, (
                 "%s: Unterminated string at %#x" % (filename, start))
             if start == end:
                 break
             yield file[start:end]
             start = end + 1

     def extract_C_string(start):
         for string in gen_strings(start):
             return string
         return ''

     # Returns a string of hex digits (or None).
     def get_build_id():
         build_id = None
         for note in gen_notes():
             # Note that the last build_id note needs to be used due to TO-442.
             possible_build_id = note.build_id_hex()
             if possible_build_id:
                 build_id = possible_build_id
         return build_id

     # Returns a list of elf_note objects.
     def get_matching_notes():
         if isinstance(match_notes, bool):
             if match_notes:
                 return list(gen_notes())
             else:
                 return []
         # If not a bool, it's an iterable of ident pairs.
         return [note for note in gen_notes() if note.ident() in match_notes]

     # Returns a string (without trailing '\0'), or None.
     def get_interp():
         # PT_INTERP points directly to a string in the file.
         for interp in (phdr for phdr in phdrs if phdr.p_type == PT_INTERP):
             interp = file[interp.p_offset:interp.p_offset + interp.p_filesz]
             if interp[-1:] == '\0':
                 interp = interp[:-1]
             return interp
         return None

     # Returns a set of strings.
     def get_soname_and_needed():
         # Each DT_NEEDED or DT_SONAME points to a string in the .dynstr table.
         def GenDTStrings(tag):
             return (extract_C_string(strtab_offset + dt.d_val)
                     for dt in dyn if dt.d_tag == tag)

         # PT_DYNAMIC points to the list of ElfNN_Dyn tags.
         for dynamic in (phdr for phdr in phdrs if phdr.p_type == PT_DYNAMIC):
             dyn = [elf.Dyn.read(file, dynamic.p_offset + dyn_offset)
                    for dyn_offset in xrange(0, dynamic.p_filesz, elf.Dyn.size)]

             # DT_STRTAB points to the string table's vaddr (.dynstr).
             [strtab_vaddr] = [dt.d_val for dt in dyn if dt.d_tag == DT_STRTAB]

             # Find the PT_LOAD containing the vaddr to compute the file offset.
             [strtab_offset] = [
                 strtab_vaddr - phdr.p_vaddr + phdr.p_offset
                 for phdr in phdrs
                 if (phdr.p_type == PT_LOAD and
                     phdr.p_vaddr <= strtab_vaddr and
                     strtab_vaddr - phdr.p_vaddr < phdr.p_filesz)
             ]

             soname = None
             for soname in GenDTStrings(DT_SONAME):
                 break

             return soname, set(GenDTStrings(DT_NEEDED))
         return None, set()

     def get_stripped():
         return all(
             shdr.sh_type != SHT_SYMTAB and not name.startswith('.debug_')
             for shdr, name in gen_sections())

     def get_cpu():
         return ELF_MACHINE_TO_CPU.get(ehdr.e_machine)

     def gen_source_files():
         # Given the file position of a CU header (starting with the
         # beginning of the .debug_line section), return the position
         # of the include_directories portion and the position of the
         # next CU header.
         def read_line_header(pos):
                 # Decode DWARF .debug_line per-CU header.
                 hdr_type = elf.dwarf2_line_header
                 hdr = hdr_type.read(file, pos)
                 assert hdr.unit_length < 0xfffffff0, (
                     "%s: 64-bit DWARF" % filename)
                 assert hdr.version in [2,3,4], (
                     "%s: DWARF .debug_line version %r" %
                     (filename, hdr.version))
                 if hdr.version == 4:
                     hdr_type = elf.dwarf4_line_header
                     hdr = hdr_type.read(file, pos)
                 return (pos + hdr_type.size + hdr.opcode_base - 1,
                         pos + 4 + hdr.unit_length)

         # Decode include_directories portion of DWARF .debug_line format.
         def read_include_dirs(pos):
             include_dirs = list(gen_strings(pos))
             pos += sum(len(dir) + 1 for dir in include_dirs) + 1
             return pos, include_dirs

         # Decode file_paths portion of DWARF .debug_line format.
         def gen_file_paths(start, limit):
             while start < limit:
                 end = file.find('\0', start, limit)
                 assert end >= start, (
                     "%s: Unterminated string at %#x" % (filename, start))
                 if start == end:
                     break
                 name = file[start:end]
                 start = end + 1
                 # Decode 3 ULEB128s to advance start, but only use the first.
                 for i in range(3):
                     value = 0
                     bits = 0
                     while start < limit:
                         byte = ord(file[start])
                         start += 1
                         value |= (byte & 0x7f) << bits
                         if (byte & 0x80) == 0:
                             break
                         bits += 7
                     if i == 0:
                         include_idx = value
                 # Ignore the fake file names the compiler leaks into the DWARF.
                 if name not in ['<stdin>', '<command-line>']:
                     yield name, include_idx

         for shdr, name in gen_sections():
             if name == '.debug_line':
                 next = shdr.sh_offset
                 while next < shdr.sh_offset + shdr.sh_size:
                     pos, next = read_line_header(next)

                     pos, include_dirs = read_include_dirs(pos)
                     assert pos <= next

                     # 0 means relative to DW_AT_comp_dir, which should be ".".
                     # Indices into the actual table start at 1.
                     include_dirs.insert(0, '')

                     # Decode file_paths and apply include directories.
                     for name, i in gen_file_paths(pos, next):
                         name = os.path.join(include_dirs[i], name)
                         yield os.path.normpath(name)

     # This closure becomes the elf_info object's `get_sources` method.
     def lazy_get_sources():
         # Run the generator and cache its results as a set.
         sources_cache = set(gen_source_files())
         # Replace the method to just return the cached set next time.
         info.get_sources = lambda: sources_cache
         return sources_cache

     # Map in the whole file's contents and use it as a string.
     with mmapper(filename) as mapped:
         fd, file = mapped
         elf = get_elf_accessor(file)
         if elf is not None:
             # ELF header leads to program headers.
             ehdr = elf.Ehdr.read(file)
             assert ehdr.e_phentsize == elf.Phdr.size, (
                 "%s: invalid e_phentsize" % filename)
             phdrs = list(gen_phdrs(file, elf, ehdr))
             info = elf_info(filename,
                             get_cpu(),
                             get_matching_notes(),
                             get_build_id(),
                             get_stripped(),
                             get_interp(),
                             *get_soname_and_needed())
             info.elf = elf
             info.get_sources = lazy_get_sources
             return info

     return None


 # Module public API.
 __all__ = ['cpu', 'elf_info', 'elf_note', 'get_elf_accessor', 'get_elf_info']


 def test_main_strip(filenames):
     for filename in filenames:
         info = get_elf_info(filename)
         print info
         stripped_filename = info.filename + '.ei-strip'
         info.strip(stripped_filename)
         print '\t%s: %u -> %u' % (stripped_filename,
                                   os.stat(filename).st_size,
                                   os.stat(stripped_filename).st_size)


 def test_main_get_info(filenames):
     for filename in filenames:
         info = get_elf_info(filename)
         print info
         for source in info.get_sources():
             print '\t' + source


 # For manual testing.
 if __name__ == "__main__":
     import sys
     if sys.argv[1] == '-strip':
         test_main_strip(sys.argv[2:])
     else:
         test_main_get_info(sys.argv[1:])
	#!/usr/bin/env python
	# Copyright 2017 The Fuchsia Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	from contextlib import contextmanager
	from collections import namedtuple
	import mmap
	import os
	import struct
	import uuid

	# Standard ELF constants.
	ELFMAG = '\x7fELF'
	EI_CLASS = 4
	ELFCLASS32 = 1
	ELFCLASS64 = 2
	EI_DATA = 5
	ELFDATA2LSB = 1
	ELFDATA2MSB = 2
	EM_386 = 3
	EM_ARM = 40
	EM_X86_64 = 62
	EM_AARCH64 = 183
	PT_LOAD = 1
	PT_DYNAMIC = 2
	PT_INTERP = 3
	PT_NOTE = 4
	DT_NEEDED = 1
	DT_STRTAB = 5
	DT_SONAME = 14
	NT_GNU_BUILD_ID = 3
	SHT_SYMTAB = 2


	class elf_note(
	namedtuple('elf_note', [
	'name',
	'type',
	'desc',
	])):

	# An ELF note is identified by (name_string, type_integer).
	def ident(self):
	return (self.name, self.type)

	def is_build_id(self):
	return self.ident() == ('GNU\0', NT_GNU_BUILD_ID)

	def build_id_hex(self):
	if self.is_build_id():
	return ''.join(('%02x' % ord(byte)) for byte in self.desc)
	return None

	def __repr__(self):
	return ('elf_note(%r, %#x, <%d bytes>)' %
	(self.name, self.type, len(self.desc)))


	def gen_elf():
	# { 'Struct1': (ELFCLASS32 fields, ELFCLASS64 fields),
	# 'Struct2': fields_same_for_both, ... }
	elf_types = {
	'Ehdr': ([
	('e_ident', '16s'),
	('e_type', 'H'),
	('e_machine', 'H'),
	('e_version', 'I'),
	('e_entry', 'I'),
	('e_phoff', 'I'),
	('e_shoff', 'I'),
	('e_flags', 'I'),
	('e_ehsize', 'H'),
	('e_phentsize', 'H'),
	('e_phnum', 'H'),
	('e_shentsize', 'H'),
	('e_shnum', 'H'),
	('e_shstrndx', 'H'),
	], [
	('e_ident', '16s'),
	('e_type', 'H'),
	('e_machine', 'H'),
	('e_version', 'I'),
	('e_entry', 'Q'),
	('e_phoff', 'Q'),
	('e_shoff', 'Q'),
	('e_flags', 'I'),
	('e_ehsize', 'H'),
	('e_phentsize', 'H'),
	('e_phnum', 'H'),
	('e_shentsize', 'H'),
	('e_shnum', 'H'),
	('e_shstrndx', 'H'),
	]),
	'Phdr': ([
	('p_type', 'I'),
	('p_offset', 'I'),
	('p_vaddr', 'I'),
	('p_paddr', 'I'),
	('p_filesz', 'I'),
	('p_memsz', 'I'),
	('p_flags', 'I'),
	('p_align', 'I'),
	], [
	('p_type', 'I'),
	('p_flags', 'I'),
	('p_offset', 'Q'),
	('p_vaddr', 'Q'),
	('p_paddr', 'Q'),
	('p_filesz', 'Q'),
	('p_memsz', 'Q'),
	('p_align', 'Q'),
	]),
	'Shdr': ([
	('sh_name', 'L'),
	('sh_type', 'L'),
	('sh_flags', 'L'),
	('sh_addr', 'L'),
	('sh_offset', 'L'),
	('sh_size', 'L'),
	('sh_link', 'L'),
	('sh_info', 'L'),
	('sh_addralign', 'L'),
	('sh_entsize', 'L'),
	], [
	('sh_name', 'L'),
	('sh_type', 'L'),
	('sh_flags', 'Q'),
	('sh_addr', 'Q'),
	('sh_offset', 'Q'),
	('sh_size', 'Q'),
	('sh_link', 'L'),
	('sh_info', 'L'),
	('sh_addralign', 'Q'),
	('sh_entsize', 'Q'),
	]),
	'Dyn': ([
	('d_tag', 'i'),
	('d_val', 'I'),
	], [
	('d_tag', 'q'),
	('d_val', 'Q'),
	]),
	'Nhdr': [
	('n_namesz', 'I'),
	('n_descsz', 'I'),
	('n_type', 'I'),
	],
	'dwarf2_line_header': [
	('unit_length', 'L'),
	('version', 'H'),
	('header_length', 'L'),
	('minimum_instruction_length', 'B'),
	('default_is_stmt', 'B'),
	('line_base', 'b'),
	('line_range', 'B'),
	('opcode_base', 'B'),
	],
	'dwarf4_line_header': [
	('unit_length', 'L'),
	('version', 'H'),
	('header_length', 'L'),
	('minimum_instruction_length', 'B'),
	('maximum_operations_per_instruction', 'B'),
	('default_is_stmt', 'B'),
	('line_base', 'b'),
	('line_range', 'b'),
	('opcode_base', 'B'),
	],
	}

	# There is an accessor for each struct, e.g. Ehdr.
	# Ehdr.read is a function like Struct.unpack_from.
	# Ehdr.size is the size of the struct.
	elf_accessor = namedtuple('elf_accessor',
	['size', 'read', 'write', 'pack'])

	# All the accessors for a format (class, byte-order) form one elf,
	# e.g. use elf.Ehdr and elf.Phdr.
	elf = namedtuple('elf', elf_types.keys())

	def gen_accessors(is64, struct_byte_order):
	def make_accessor(type, decoder):
	return elf_accessor(
	size=decoder.size,
	read=lambda buffer, offset=0: type._make(
	decoder.unpack_from(buffer, offset)),
	write=lambda buffer, offset, x: decoder.pack_into(
	buffer, offset, *x),
	pack=lambda x: decoder.pack(*x))
	for name, fields in elf_types.iteritems():
	if isinstance(fields, tuple):
	fields = fields[1 if is64 else 0]
	type = namedtuple(name, [field_name for field_name, fmt in fields])
	decoder = struct.Struct(struct_byte_order +
	''.join(fmt for field_name, fmt in fields))
	yield make_accessor(type, decoder)

	for elfclass, is64 in [(ELFCLASS32, False), (ELFCLASS64, True)]:
	for elf_bo, struct_bo in [(ELFDATA2LSB, '<'), (ELFDATA2MSB, '>')]:
	yield ((chr(elfclass), chr(elf_bo)),
	elf(*gen_accessors(is64, struct_bo)))

	# e.g. ELF[file[EI_CLASS], file[EI_DATA]].Ehdr.read(file).e_phnum
	ELF = dict(gen_elf())

	def get_elf_accessor(file):
	# If it looks like an ELF file, whip out the decoder ring.
	if file[:len(ELFMAG)] == ELFMAG:
	return ELF[file[EI_CLASS], file[EI_DATA]]
	return None


	def gen_phdrs(file, elf, ehdr):
	for pos in xrange(0, ehdr.e_phnum * elf.Phdr.size, elf.Phdr.size):
	yield elf.Phdr.read(file, ehdr.e_phoff + pos)


	def gen_shdrs(file, elf, ehdr):
	for pos in xrange(0, ehdr.e_shnum * elf.Shdr.size, elf.Shdr.size):
	yield elf.Shdr.read(file, ehdr.e_shoff + pos)


	cpu = namedtuple('cpu', [
	'e_machine', # ELF e_machine int
	'llvm', # LLVM triple CPU component
	'gn', # GN target_cpu
	])

	ELF_MACHINE_TO_CPU = {elf: cpu(elf, llvm, gn) for elf, llvm, gn in [
	(EM_386, 'i386', 'x86'),
	(EM_ARM, 'arm', 'arm'),
	(EM_X86_64, 'x86_64', 'x64'),
	(EM_AARCH64, 'aarch64', 'arm64'),
	]}


	@contextmanager
	def mmapper(filename):
	"""A context manager that yields (fd, file_contents) given a file name.
	This ensures that the mmap and file objects are closed at the end of the
	'with' statement."""
	fileobj = open(filename, 'rb')
	fd = fileobj.fileno()
	if os.fstat(fd).st_size == 0:
	# mmap can't handle empty files.
	try:
	yield fd, ''
	finally:
	fileobj.close()
	else:
	mmapobj = mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
	try:
	yield fd, mmapobj
	finally:
	mmapobj.close()
	fileobj.close()

	def makedirs(dirs):
	try:
	os.makedirs(dirs)
	except OSError as e:
	if e.errno != os.errno.EEXIST:
	raise e

	# elf_info objects are only created by `get_elf_info` or the `copy` or
	# `rename` methods.
	class elf_info(
	namedtuple('elf_info', [
	'filename',
	'cpu', # cpu tuple
	'notes', # list of (ident, desc): selected notes
	'build_id', # string: lowercase hex
	'stripped', # bool: Has no symbols or .debug_* sections
	'interp', # string or None: PT_INTERP (without \0)
	'soname', # string or None: DT_SONAME
	'needed', # list of strings: DT_NEEDED
	])):

	def rename(self, filename):
	assert os.path.samefile(self.filename, filename)
	# Copy the tuple.
	clone = self.__class__(filename, *self[1:])
	# Copy the lazy state.
	clone.elf = self.elf
	if self.get_sources == clone.get_sources:
	raise Exception("uninitialized elf_info object!")
	clone.get_sources = self.get_sources
	return clone

	def copy(self):
	return self.rename(self.filename)

	# This is replaced with a closure by the creator in get_elf_info.
	def get_sources(self):
	raise Exception("uninitialized elf_info object!")

	def strip(self, stripped_filename):
	"""Write stripped output to the given file unless it already exists
	with identical contents. Returns True iff the file was changed."""
	with mmapper(self.filename) as mapped:
	fd, file = mapped
	ehdr = self.elf.Ehdr.read(file)

	stripped_ehdr = ehdr._replace(e_shoff=0, e_shnum=0, e_shstrndx=0)
	stripped_size = max(phdr.p_offset + phdr.p_filesz
	for phdr in gen_phdrs(file, self.elf, ehdr)
	if phdr.p_type == PT_LOAD)
	assert ehdr.e_phoff + (ehdr.e_phnum *
	ehdr.e_phentsize) <= stripped_size

	def gen_stripped_contents():
	yield self.elf.Ehdr.pack(stripped_ehdr)
	yield file[self.elf.Ehdr.size:stripped_size]

	def old_file_matches():
	old_size = os.path.getsize(stripped_filename)
	new_size = sum(len(x) for x in gen_stripped_contents())
	if old_size != new_size:
	return False
	with open(stripped_filename, 'rb') as f:
	for chunk in gen_stripped_contents():
	if f.read(len(chunk)) != chunk:
	return False
	return True

	if os.path.exists(stripped_filename):
	if old_file_matches():
	return False
	else:
	os.remove(stripped_filename)
	# Create the new file with the same mode as the original.
	with os.fdopen(os.open(stripped_filename,
	os.O_WRONLY \| os.O_CREAT \| os.O_EXCL,
	os.fstat(fd).st_mode & 0777),
	'wb') as stripped_file:
	stripped_file.write(self.elf.Ehdr.pack(stripped_ehdr))
	stripped_file.write(file[self.elf.Ehdr.size:stripped_size])
	return True

	def get_elf_info(filename, match_notes=False):
	file = None
	elf = None
	ehdr = None
	phdrs = None

	# Yields an elf_note for each note in any PT_NOTE segment.
	def gen_notes():
	def round_up_to(size):
	return ((size + 3) / 4) * 4
	for phdr in phdrs:
	if phdr.p_type == PT_NOTE:
	pos = phdr.p_offset
	while pos < phdr.p_offset + phdr.p_filesz:
	nhdr = elf.Nhdr.read(file, pos)
	pos += elf.Nhdr.size
	name = file[pos:pos + nhdr.n_namesz]
	pos += round_up_to(nhdr.n_namesz)
	desc = file[pos:pos + nhdr.n_descsz]
	pos += round_up_to(nhdr.n_descsz)
	yield elf_note(name, nhdr.n_type, desc)

	def gen_sections():
	shdrs = list(gen_shdrs(file, elf, ehdr))
	if not shdrs:
	return
	strtab_shdr = shdrs[ehdr.e_shstrndx]
	for shdr, i in zip(shdrs, xrange(len(shdrs))):
	if i == 0:
	continue
	assert shdr.sh_name < strtab_shdr.sh_size, (
	"%s: invalid sh_name" % filename)
	yield (shdr,
	extract_C_string(strtab_shdr.sh_offset + shdr.sh_name))

	# Generates '\0'-terminated strings starting at the given offset,
	# until an empty string.
	def gen_strings(start):
	while True:
	end = file.find('\0', start)
	assert end >= start, (
	"%s: Unterminated string at %#x" % (filename, start))
	if start == end:
	break
	yield file[start:end]
	start = end + 1

	def extract_C_string(start):
	for string in gen_strings(start):
	return string
	return ''

	# Returns a string of hex digits (or None).
	def get_build_id():
	build_id = None
	for note in gen_notes():
	# Note that the last build_id note needs to be used due to TO-442.
	possible_build_id = note.build_id_hex()
	if possible_build_id:
	build_id = possible_build_id
	return build_id

	# Returns a list of elf_note objects.
	def get_matching_notes():
	if isinstance(match_notes, bool):
	if match_notes:
	return list(gen_notes())
	else:
	return []
	# If not a bool, it's an iterable of ident pairs.
	return [note for note in gen_notes() if note.ident() in match_notes]

	# Returns a string (without trailing '\0'), or None.
	def get_interp():
	# PT_INTERP points directly to a string in the file.
	for interp in (phdr for phdr in phdrs if phdr.p_type == PT_INTERP):
	interp = file[interp.p_offset:interp.p_offset + interp.p_filesz]
	if interp[-1:] == '\0':
	interp = interp[:-1]
	return interp
	return None

	# Returns a set of strings.
	def get_soname_and_needed():
	# Each DT_NEEDED or DT_SONAME points to a string in the .dynstr table.
	def GenDTStrings(tag):
	return (extract_C_string(strtab_offset + dt.d_val)
	for dt in dyn if dt.d_tag == tag)

	# PT_DYNAMIC points to the list of ElfNN_Dyn tags.
	for dynamic in (phdr for phdr in phdrs if phdr.p_type == PT_DYNAMIC):
	dyn = [elf.Dyn.read(file, dynamic.p_offset + dyn_offset)
	for dyn_offset in xrange(0, dynamic.p_filesz, elf.Dyn.size)]

	# DT_STRTAB points to the string table's vaddr (.dynstr).
	[strtab_vaddr] = [dt.d_val for dt in dyn if dt.d_tag == DT_STRTAB]

	# Find the PT_LOAD containing the vaddr to compute the file offset.
	[strtab_offset] = [
	strtab_vaddr - phdr.p_vaddr + phdr.p_offset
	for phdr in phdrs
	if (phdr.p_type == PT_LOAD and
	phdr.p_vaddr <= strtab_vaddr and
	strtab_vaddr - phdr.p_vaddr < phdr.p_filesz)
	]

	soname = None
	for soname in GenDTStrings(DT_SONAME):
	break

	return soname, set(GenDTStrings(DT_NEEDED))
	return None, set()

	def get_stripped():
	return all(
	shdr.sh_type != SHT_SYMTAB and not name.startswith('.debug_')
	for shdr, name in gen_sections())

	def get_cpu():
	return ELF_MACHINE_TO_CPU.get(ehdr.e_machine)

	def gen_source_files():
	# Given the file position of a CU header (starting with the
	# beginning of the .debug_line section), return the position
	# of the include_directories portion and the position of the
	# next CU header.
	def read_line_header(pos):
	# Decode DWARF .debug_line per-CU header.
	hdr_type = elf.dwarf2_line_header
	hdr = hdr_type.read(file, pos)
	assert hdr.unit_length < 0xfffffff0, (
	"%s: 64-bit DWARF" % filename)
	assert hdr.version in [2,3,4], (
	"%s: DWARF .debug_line version %r" %
	(filename, hdr.version))
	if hdr.version == 4:
	hdr_type = elf.dwarf4_line_header
	hdr = hdr_type.read(file, pos)
	return (pos + hdr_type.size + hdr.opcode_base - 1,
	pos + 4 + hdr.unit_length)

	# Decode include_directories portion of DWARF .debug_line format.
	def read_include_dirs(pos):
	include_dirs = list(gen_strings(pos))
	pos += sum(len(dir) + 1 for dir in include_dirs) + 1
	return pos, include_dirs

	# Decode file_paths portion of DWARF .debug_line format.
	def gen_file_paths(start, limit):
	while start < limit:
	end = file.find('\0', start, limit)
	assert end >= start, (
	"%s: Unterminated string at %#x" % (filename, start))
	if start == end:
	break
	name = file[start:end]
	start = end + 1
	# Decode 3 ULEB128s to advance start, but only use the first.
	for i in range(3):
	value = 0
	bits = 0
	while start < limit:
	byte = ord(file[start])
	start += 1
	value \|= (byte & 0x7f) << bits
	if (byte & 0x80) == 0:
	break
	bits += 7
	if i == 0:
	include_idx = value
	# Ignore the fake file names the compiler leaks into the DWARF.
	if name not in ['<stdin>', '<command-line>']:
	yield name, include_idx

	for shdr, name in gen_sections():
	if name == '.debug_line':
	next = shdr.sh_offset
	while next < shdr.sh_offset + shdr.sh_size:
	pos, next = read_line_header(next)

	pos, include_dirs = read_include_dirs(pos)
	assert pos <= next

	# 0 means relative to DW_AT_comp_dir, which should be ".".
	# Indices into the actual table start at 1.
	include_dirs.insert(0, '')

	# Decode file_paths and apply include directories.
	for name, i in gen_file_paths(pos, next):
	name = os.path.join(include_dirs[i], name)
	yield os.path.normpath(name)

	# This closure becomes the elf_info object's `get_sources` method.
	def lazy_get_sources():
	# Run the generator and cache its results as a set.
	sources_cache = set(gen_source_files())
	# Replace the method to just return the cached set next time.
	info.get_sources = lambda: sources_cache
	return sources_cache

	# Map in the whole file's contents and use it as a string.
	with mmapper(filename) as mapped:
	fd, file = mapped
	elf = get_elf_accessor(file)
	if elf is not None:
	# ELF header leads to program headers.
	ehdr = elf.Ehdr.read(file)
	assert ehdr.e_phentsize == elf.Phdr.size, (
	"%s: invalid e_phentsize" % filename)
	phdrs = list(gen_phdrs(file, elf, ehdr))
	info = elf_info(filename,
	get_cpu(),
	get_matching_notes(),
	get_build_id(),
	get_stripped(),
	get_interp(),
	*get_soname_and_needed())
	info.elf = elf
	info.get_sources = lazy_get_sources
	return info

	return None


	# Module public API.
	__all__ = ['cpu', 'elf_info', 'elf_note', 'get_elf_accessor', 'get_elf_info']


	def test_main_strip(filenames):
	for filename in filenames:
	info = get_elf_info(filename)
	print info
	stripped_filename = info.filename + '.ei-strip'
	info.strip(stripped_filename)
	print '\t%s: %u -> %u' % (stripped_filename,
	os.stat(filename).st_size,
	os.stat(stripped_filename).st_size)


	def test_main_get_info(filenames):
	for filename in filenames:
	info = get_elf_info(filename)
	print info
	for source in info.get_sources():
	print '\t' + source


	# For manual testing.
	if __name__ == "__main__":
	import sys
	if sys.argv[1] == '-strip':
	test_main_strip(sys.argv[2:])
	else:
	test_main_get_info(sys.argv[1:])