| // Copyright 2016 Google Inc. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include <algorithm> |
| #include <string> |
| #include <iostream> |
| #include <fstream> |
| #include <sstream> |
| #include "absl/numeric/int128.h" |
| #include "absl/strings/escaping.h" |
| #include "absl/strings/string_view.h" |
| #include "absl/strings/substitute.h" |
| #include "third_party/freebsd_elf/elf.h" |
| #include "bloaty.h" |
| #include "link_map.h" |
| |
| #include <assert.h> |
| #include <limits.h> |
| #include <stdlib.h> |
| |
| using absl::string_view; |
| |
| namespace bloaty { |
| |
| namespace { |
| |
| struct ByteSwapFunc { |
| template <class T> |
| T operator()(T val) { |
| return ByteSwap(val); |
| } |
| }; |
| |
| struct NullFunc { |
| template <class T> |
| T operator()(T val) { return val; } |
| }; |
| |
| size_t StringViewToSize(string_view str) { |
| size_t ret; |
| if (!absl::SimpleAtoi(str, &ret)) { |
| THROWF("couldn't convert string '$0' to integer.", str); |
| } |
| return ret; |
| } |
| |
| template <class T> |
| void AdvancePastStruct(string_view* data) { |
| *data = data->substr(sizeof(T)); |
| } |
| |
| // ElfFile ///////////////////////////////////////////////////////////////////// |
| |
| // For parsing the pieces we need out of an ELF file (.o, .so, and binaries). |
| |
| class ElfFile { |
| public: |
| ElfFile(string_view data) : data_(data) { |
| ok_ = Initialize(); |
| } |
| |
| bool IsOpen() { return ok_; } |
| |
| // Regions of the file where different headers live. |
| string_view entire_file() const { return data_; } |
| string_view header_region() const { return header_region_; } |
| string_view section_headers() const { return section_headers_; } |
| string_view segment_headers() const { return segment_headers_; } |
| |
| const Elf64_Ehdr& header() const { return header_; } |
| Elf64_Xword section_count() const { return section_count_; } |
| Elf64_Xword section_string_index() const { return section_string_index_; } |
| |
| // Represents an ELF segment (data used by the loader / dynamic linker). |
| class Segment { |
| public: |
| const Elf64_Phdr& header() const { return header_; } |
| string_view contents() const { return contents_; } |
| string_view range() const { return range_; } |
| |
| private: |
| friend class ElfFile; |
| Elf64_Phdr header_; |
| string_view contents_; |
| string_view range_; |
| }; |
| |
| // Represents an ELF section (.text, .data, .bss, etc.) |
| class Section { |
| public: |
| const Elf64_Shdr& header() const { return header_; } |
| string_view contents() const { return contents_; } |
| string_view range() const { return range_; } |
| |
| // For SHN_UNDEF (undefined name), returns [nullptr, 0]. |
| string_view GetName() const; |
| |
| // Requires: this is a section with fixed-width entries (symbol table, |
| // relocation table, etc). |
| Elf64_Word GetEntryCount() const; |
| |
| // Requires: header().sh_type == SHT_STRTAB. |
| string_view ReadString(Elf64_Word index) const; |
| |
| // Requires: header().sh_type == SHT_SYMTAB || header().sh_type == |
| // SHT_DYNSYM |
| void ReadSymbol(Elf64_Word index, Elf64_Sym* sym, |
| string_view* file_range) const; |
| |
| // Requires: header().sh_type == SHT_REL |
| void ReadRelocation(Elf64_Word index, Elf64_Rel* rel, |
| string_view* file_range) const; |
| |
| // Requires: header().sh_type == SHT_RELA |
| void ReadRelocationWithAddend(Elf64_Word index, Elf64_Rela* rel, |
| string_view* file_range) const; |
| |
| const ElfFile& elf() const { return *elf_; } |
| |
| private: |
| friend class ElfFile; |
| const ElfFile* elf_; |
| Elf64_Shdr header_; |
| string_view contents_; |
| string_view range_; |
| }; |
| |
| class NoteIter { |
| public: |
| NoteIter(const Section& section) |
| : elf_(§ion.elf()), remaining_(section.contents()) { |
| Next(); |
| } |
| NoteIter(const Segment& segment, const ElfFile* elf) |
| : elf_(elf), remaining_(segment.contents()) { |
| Next(); |
| } |
| |
| bool IsDone() const { return done_; } |
| uint32_t type() const { return type_; } |
| string_view name() const { return name_; } |
| string_view descriptor() const { return descriptor_; } |
| |
| void Next(); |
| |
| public: |
| const ElfFile* elf_; |
| string_view name_; |
| string_view descriptor_; |
| string_view remaining_; |
| uint32_t type_; |
| bool done_ = false; |
| }; |
| |
| void ReadSegment(Elf64_Word index, Segment* segment) const; |
| void ReadSection(Elf64_Word index, Section* section) const; |
| |
| bool FindSectionByName(absl::string_view name, Section* section) const; |
| |
| bool is_64bit() const { return is_64bit_; } |
| bool is_native_endian() const { return is_native_endian_; } |
| |
| template <class T32, class T64, class Munger> |
| void ReadStruct(absl::string_view contents, uint64_t offset, Munger munger, |
| absl::string_view* range, T64* out) const { |
| StructReader(*this, contents).Read<T32>(offset, munger, range, out); |
| } |
| |
| private: |
| friend class Section; |
| |
| bool Initialize(); |
| |
| string_view GetRegion(uint64_t start, uint64_t n) const { |
| return StrictSubstr(data_, start, n); |
| } |
| |
| // Shared code for reading various ELF structures. Handles endianness |
| // conversion and 32->64 bit conversion, when necessary. |
| class StructReader { |
| public: |
| StructReader(const ElfFile& elf, string_view data) |
| : elf_(elf), data_(data) {} |
| |
| template <class T32, class T64, class Munger> |
| void Read(uint64_t offset, Munger /*munger*/, absl::string_view* range, |
| T64* out) const { |
| if (elf_.is_64bit() && elf_.is_native_endian()) { |
| return Memcpy(offset, range, out); |
| } else { |
| return ReadFallback<T32, T64, Munger>(offset, range, out); |
| } |
| } |
| |
| private: |
| const ElfFile& elf_; |
| string_view data_; |
| |
| template <class T32, class T64, class Munger> |
| void ReadFallback(uint64_t offset, absl::string_view* range, |
| T64* out) const; |
| |
| template <class T> |
| void Memcpy(uint64_t offset, absl::string_view* out_range, T* out) const { |
| absl::string_view range = StrictSubstr(data_, offset, sizeof(*out)); |
| if (out_range) { |
| *out_range = range; |
| } |
| memcpy(out, data_.data() + offset, sizeof(*out)); |
| } |
| }; |
| |
| bool ok_; |
| bool is_64bit_; |
| bool is_native_endian_; |
| string_view data_; |
| Elf64_Ehdr header_; |
| Elf64_Xword section_count_; |
| Elf64_Xword section_string_index_; |
| string_view header_region_; |
| string_view section_headers_; |
| string_view segment_headers_; |
| Section section_name_table_; |
| }; |
| |
| // ELF uses different structure definitions for 32/64 bit files. The sizes of |
| // members are different, and members are even in a different order! |
| // |
| // These mungers can convert 32 bit structures to 64-bit ones. They can also |
| // handle converting endianness. We use templates so a single template function |
| // can handle all three patterns: |
| // |
| // 32 native -> 64 native |
| // 32 swapped -> 64 native |
| // 64 swapped -> 64 native |
| |
| struct EhdrMunger { |
| template <class From, class Func> |
| void operator()(const From& from, Elf64_Ehdr* to, Func func) { |
| memmove(&to->e_ident[0], &from.e_ident[0], EI_NIDENT); |
| to->e_type = func(from.e_type); |
| to->e_machine = func(from.e_machine); |
| to->e_version = func(from.e_version); |
| to->e_entry = func(from.e_entry); |
| to->e_phoff = func(from.e_phoff); |
| to->e_shoff = func(from.e_shoff); |
| to->e_flags = func(from.e_flags); |
| to->e_ehsize = func(from.e_ehsize); |
| to->e_phentsize = func(from.e_phentsize); |
| to->e_phnum = func(from.e_phnum); |
| to->e_shentsize = func(from.e_shentsize); |
| to->e_shnum = func(from.e_shnum); |
| to->e_shstrndx = func(from.e_shstrndx); |
| } |
| }; |
| |
| struct ShdrMunger { |
| template <class From, class Func> |
| void operator()(const From& from, Elf64_Shdr* to, Func func) { |
| to->sh_name = func(from.sh_name); |
| to->sh_type = func(from.sh_type); |
| to->sh_flags = func(from.sh_flags); |
| to->sh_addr = func(from.sh_addr); |
| to->sh_offset = func(from.sh_offset); |
| to->sh_size = func(from.sh_size); |
| to->sh_link = func(from.sh_link); |
| to->sh_info = func(from.sh_info); |
| to->sh_addralign = func(from.sh_addralign); |
| to->sh_entsize = func(from.sh_entsize); |
| } |
| }; |
| |
| struct PhdrMunger { |
| template <class From, class Func> |
| void operator()(const From& from, Elf64_Phdr* to, Func func) { |
| to->p_type = func(from.p_type); |
| to->p_flags = func(from.p_flags); |
| to->p_offset = func(from.p_offset); |
| to->p_vaddr = func(from.p_vaddr); |
| to->p_paddr = func(from.p_paddr); |
| to->p_filesz = func(from.p_filesz); |
| to->p_memsz = func(from.p_memsz); |
| to->p_align = func(from.p_align); |
| } |
| }; |
| |
| struct SymMunger { |
| template <class From, class Func> |
| void operator()(const From& from, Elf64_Sym* to, Func func) { |
| to->st_name = func(from.st_name); |
| to->st_info = func(from.st_info); |
| to->st_other = func(from.st_other); |
| to->st_shndx = func(from.st_shndx); |
| to->st_value = func(from.st_value); |
| to->st_size = func(from.st_size); |
| } |
| }; |
| |
| struct RelMunger { |
| template <class From, class Func> |
| void operator()(const From& from, Elf64_Rel* to, Func func) { |
| to->r_offset = func(from.r_offset); |
| to->r_info = func(from.r_info); |
| } |
| }; |
| |
| struct RelaMunger { |
| template <class From, class Func> |
| void operator()(const From& from, Elf64_Rela* to, Func func) { |
| to->r_offset = func(from.r_offset); |
| to->r_info = func(from.r_info); |
| to->r_addend = func(from.r_addend); |
| } |
| }; |
| |
| struct NoteMunger { |
| template <class From, class Func> |
| void operator()(const From& from, Elf64_Nhdr* to, Func func) { |
| to->n_namesz = func(from.n_namesz); |
| to->n_descsz = func(from.n_descsz); |
| to->n_type = func(from.n_type); |
| } |
| }; |
| |
| struct ChdrMunger { |
| template <class From, class Func> |
| void operator()(const From& from, Elf64_Chdr* to, Func func) { |
| to->ch_type = func(from.ch_type); |
| to->ch_size = func(from.ch_size); |
| to->ch_addralign = func(from.ch_addralign); |
| } |
| }; |
| |
| template <class T32, class T64, class Munger> |
| void ElfFile::StructReader::ReadFallback(uint64_t offset, |
| absl::string_view* range, |
| T64* out) const { |
| // Fallback for either 32-bit ELF file or non-native endian. |
| if (elf_.is_64bit()) { |
| assert(!elf_.is_native_endian()); |
| Memcpy(offset, range, out); |
| Munger()(*out, out, ByteSwapFunc()); |
| } else { |
| T32 data32; |
| Memcpy(offset, range, &data32); |
| if (elf_.is_native_endian()) { |
| Munger()(data32, out, NullFunc()); |
| } else { |
| Munger()(data32, out, ByteSwapFunc()); |
| } |
| } |
| } |
| |
| string_view ElfFile::Section::GetName() const { |
| if (header_.sh_name == SHN_UNDEF) { |
| return string_view(nullptr, 0); |
| } |
| return elf_->section_name_table_.ReadString(header_.sh_name); |
| } |
| |
| string_view ElfFile::Section::ReadString(Elf64_Word index) const { |
| assert(header().sh_type == SHT_STRTAB); |
| |
| if (index == SHN_UNDEF || index >= contents_.size()) { |
| THROWF("can't read index $0 from strtab, total size is $1", index, |
| contents_.size()); |
| } |
| |
| string_view ret = StrictSubstr(contents_, index); |
| |
| const char* null_pos = |
| static_cast<const char*>(memchr(ret.data(), '\0', ret.size())); |
| |
| if (null_pos == NULL) { |
| THROW("no NULL terminator found"); |
| } |
| |
| size_t len = null_pos - ret.data(); |
| ret = ret.substr(0, len); |
| return ret; |
| } |
| |
| Elf64_Word ElfFile::Section::GetEntryCount() const { |
| if (header_.sh_entsize == 0) { |
| THROW("sh_entsize is zero"); |
| } |
| return contents_.size() / header_.sh_entsize; |
| } |
| |
| void ElfFile::Section::ReadSymbol(Elf64_Word index, Elf64_Sym* sym, |
| string_view* file_range) const { |
| assert(header().sh_type == SHT_SYMTAB || header().sh_type == SHT_DYNSYM); |
| size_t offset = header_.sh_entsize * index; |
| elf_->ReadStruct<Elf32_Sym>(contents(), offset, SymMunger(), file_range, sym); |
| } |
| |
| void ElfFile::Section::ReadRelocation(Elf64_Word index, Elf64_Rel* rel, |
| string_view* file_range) const { |
| assert(header().sh_type == SHT_REL); |
| size_t offset = header_.sh_entsize * index; |
| elf_->ReadStruct<Elf32_Rel>(contents(), offset, RelMunger(), file_range, rel); |
| } |
| |
| void ElfFile::Section::ReadRelocationWithAddend(Elf64_Word index, |
| Elf64_Rela* rela, |
| string_view* file_range) const { |
| assert(header().sh_type == SHT_RELA); |
| size_t offset = header_.sh_entsize * index; |
| elf_->ReadStruct<Elf32_Rela>(contents(), offset, RelaMunger(), file_range, |
| rela); |
| } |
| |
| void ElfFile::NoteIter::Next() { |
| if (remaining_.empty()) { |
| done_ = true; |
| return; |
| } |
| |
| Elf_Note note; |
| elf_->ReadStruct<Elf_Note>(remaining_, 0, NoteMunger(), nullptr, ¬e); |
| |
| // 32-bit and 64-bit note are the same size, so we don't have to treat |
| // them separately when advancing. |
| AdvancePastStruct<Elf_Note>(&remaining_); |
| |
| type_ = note.n_type; |
| name_ = StrictSubstr(remaining_, 0, note.n_namesz); |
| |
| // Size might include NULL terminator. |
| if (name_[name_.size() - 1] == 0) { |
| name_ = name_.substr(0, name_.size() - 1); |
| } |
| |
| remaining_ = StrictSubstr(remaining_, AlignUp(note.n_namesz, 4)); |
| descriptor_ = StrictSubstr(remaining_, 0, note.n_descsz); |
| remaining_ = StrictSubstr(remaining_, AlignUp(note.n_descsz, 4)); |
| } |
| |
| bool ElfFile::Initialize() { |
| if (data_.size() < EI_NIDENT) { |
| return false; |
| } |
| |
| unsigned char ident[EI_NIDENT]; |
| memcpy(ident, data_.data(), EI_NIDENT); |
| |
| if (memcmp(ident, "\177ELF", 4) != 0) { |
| // Not an ELF file. |
| return false; |
| } |
| |
| switch (ident[EI_CLASS]) { |
| case ELFCLASS32: |
| is_64bit_ = false; |
| break; |
| case ELFCLASS64: |
| is_64bit_ = true; |
| break; |
| default: |
| THROWF("unexpected ELF class: $0", ident[EI_CLASS]); |
| } |
| |
| switch (ident[EI_DATA]) { |
| case ELFDATA2LSB: |
| is_native_endian_ = GetMachineEndian() == Endian::kLittle; |
| break; |
| case ELFDATA2MSB: |
| is_native_endian_ = GetMachineEndian() == Endian::kBig; |
| break; |
| default: |
| THROWF("unexpected ELF data: $0", ident[EI_DATA]); |
| } |
| |
| absl::string_view range; |
| ReadStruct<Elf32_Ehdr>(entire_file(), 0, EhdrMunger(), &range, &header_); |
| |
| Section section0; |
| bool has_section0 = 0; |
| |
| // ELF extensions: if certain fields overflow, we have to find their true data |
| // from elsewhere. For more info see: |
| // https://docs.oracle.com/cd/E19683-01/817-3677/chapter6-94076/index.html |
| if (header_.e_shoff > 0 && |
| data_.size() > (header_.e_shoff + header_.e_shentsize)) { |
| section_count_ = 1; |
| ReadSection(0, §ion0); |
| has_section0 = true; |
| } |
| |
| section_count_ = header_.e_shnum; |
| section_string_index_ = header_.e_shstrndx; |
| |
| if (section_count_ == 0 && has_section0) { |
| section_count_ = section0.header().sh_size; |
| } |
| |
| if (section_string_index_ == SHN_XINDEX && has_section0) { |
| section_string_index_ = section0.header().sh_link; |
| } |
| |
| header_region_ = GetRegion(0, header_.e_ehsize); |
| section_headers_ = GetRegion(header_.e_shoff, |
| CheckedMul(header_.e_shentsize, section_count_)); |
| segment_headers_ = GetRegion( |
| header_.e_phoff, CheckedMul(header_.e_phentsize, header_.e_phnum)); |
| |
| if (section_count_ > 0) { |
| ReadSection(section_string_index_, §ion_name_table_); |
| if (section_name_table_.header().sh_type != SHT_STRTAB) { |
| THROW("section string index pointed to non-strtab"); |
| } |
| } |
| |
| return true; |
| } |
| |
| void ElfFile::ReadSegment(Elf64_Word index, Segment* segment) const { |
| if (index >= header_.e_phnum) { |
| THROWF("segment $0 doesn't exist, only $1 segments", index, |
| header_.e_phnum); |
| } |
| |
| Elf64_Phdr* header = &segment->header_; |
| ReadStruct<Elf32_Phdr>( |
| entire_file(), |
| CheckedAdd(header_.e_phoff, CheckedMul(header_.e_phentsize, index)), |
| PhdrMunger(), &segment->range_, header); |
| segment->contents_ = GetRegion(header->p_offset, header->p_filesz); |
| } |
| |
| void ElfFile::ReadSection(Elf64_Word index, Section* section) const { |
| if (index >= section_count_) { |
| THROWF("tried to read section $0, but there are only $1", index, |
| section_count_); |
| } |
| |
| Elf64_Shdr* header = §ion->header_; |
| ReadStruct<Elf32_Shdr>( |
| entire_file(), |
| CheckedAdd(header_.e_shoff, CheckedMul(header_.e_shentsize, index)), |
| ShdrMunger(), §ion->range_, header); |
| |
| if (header->sh_type == SHT_NOBITS) { |
| section->contents_ = string_view(); |
| } else { |
| section->contents_ = GetRegion(header->sh_offset, header->sh_size); |
| } |
| |
| section->elf_ = this; |
| } |
| |
| bool ElfFile::FindSectionByName(absl::string_view name, Section* section) const { |
| for (Elf64_Word i = 0; i < section_count_; i++) { |
| ReadSection(i, section); |
| if (section->GetName() == name) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| |
| // ArFile ////////////////////////////////////////////////////////////////////// |
| |
| // For parsing .a files (static libraries). |
| // |
| // The best documentation I've been able to find for this file format is |
| // Wikipedia: https://en.wikipedia.org/wiki/Ar_(Unix) |
| // |
| // So far we only parse the System V / GNU variant. |
| |
| class ArFile { |
| public: |
| ArFile(string_view data) |
| : magic_(StrictSubstr(data, 0, kMagicSize)), |
| contents_(data.substr(std::min<size_t>(data.size(), kMagicSize))) {} |
| |
| bool IsOpen() const { return magic() == string_view(kMagic); } |
| |
| string_view magic() const { return magic_; } |
| string_view contents() const { return contents_; } |
| |
| struct MemberFile { |
| enum { |
| kSymbolTable, // Stores a symbol table. |
| kLongFilenameTable, // Stores long filenames, users should ignore. |
| kNormal, // Regular data file. |
| } file_type; |
| string_view filename; // Only when file_type == kNormal |
| size_t size; |
| string_view header; |
| string_view contents; |
| }; |
| |
| class MemberReader { |
| public: |
| MemberReader(const ArFile& ar) : remaining_(ar.contents()) {} |
| bool ReadMember(MemberFile* file); |
| bool IsEof() const { return remaining_.size() == 0; } |
| |
| private: |
| string_view Consume(size_t n) { |
| n = (n % 2 == 0 ? n : n + 1); |
| if (remaining_.size() < n) { |
| THROW("premature end of file"); |
| } |
| string_view ret = remaining_.substr(0, n); |
| remaining_.remove_prefix(n); |
| return ret; |
| } |
| |
| string_view long_filenames_; |
| string_view remaining_; |
| }; |
| |
| private: |
| const string_view magic_; |
| const string_view contents_; |
| |
| static constexpr const char* kMagic = "!<arch>\n"; |
| static constexpr int kMagicSize = 8; |
| }; |
| |
| bool ArFile::MemberReader::ReadMember(MemberFile* file) { |
| struct Header { |
| char file_id[16]; |
| char modified_timestamp[12]; |
| char owner_id[6]; |
| char group_id[6]; |
| char mode[8]; |
| char size[10]; |
| char end[2]; |
| }; |
| |
| if (remaining_.size() == 0) { |
| return false; |
| } else if (remaining_.size() < sizeof(Header)) { |
| THROW("Premature EOF in AR data"); |
| } |
| |
| const Header* header = reinterpret_cast<const Header*>(remaining_.data()); |
| file->header = Consume(sizeof(Header)); |
| |
| string_view file_id(&header->file_id[0], sizeof(header->file_id)); |
| string_view size_str(&header->size[0], sizeof(header->size)); |
| file->size = StringViewToSize(size_str); |
| file->contents = Consume(file->size); |
| file->file_type = MemberFile::kNormal; |
| |
| if (file_id[0] == '/') { |
| // Special filename, internal to the format. |
| if (file_id[1] == ' ') { |
| file->file_type = MemberFile::kSymbolTable; |
| } else if (file_id[1] == '/') { |
| file->file_type = MemberFile::kLongFilenameTable; |
| long_filenames_ = file->contents; |
| } else if (isdigit(file_id[1])) { |
| size_t offset = StringViewToSize(file_id.substr(1)); |
| size_t end = long_filenames_.find('/', offset); |
| |
| if (end == std::string::npos) { |
| THROW("Unterminated long filename"); |
| } |
| |
| file->filename = long_filenames_.substr(offset, end - offset); |
| } else { |
| THROW("Unexpected special filename in AR archive"); |
| } |
| } else { |
| // Normal filename, slash-terminated. |
| size_t slash = file_id.find('/'); |
| |
| if (slash == std::string::npos) { |
| THROW("BSD-style AR not yet implemented"); |
| } |
| |
| file->filename = file_id.substr(0, slash); |
| } |
| |
| return true; |
| } |
| |
| void MaybeAddFileRange(const char* analyzer, RangeSink* sink, string_view label, |
| string_view range) { |
| if (sink) { |
| sink->AddFileRange(analyzer, label, range); |
| } |
| } |
| |
| // Iterate over each ELF file, agnostic to whether it is inside a .a (AR) file |
| // or not. |
| template <class Func> |
| void ForEachElf(const InputFile& file, RangeSink* sink, Func func) { |
| ArFile ar_file(file.data()); |
| uint64_t index_base = 0; |
| |
| if (ar_file.IsOpen()) { |
| ArFile::MemberFile member; |
| ArFile::MemberReader reader(ar_file); |
| |
| MaybeAddFileRange("ar_archive", sink, "[AR Headers]", ar_file.magic()); |
| |
| while (reader.ReadMember(&member)) { |
| MaybeAddFileRange("ar_archive", sink, "[AR Headers]", member.header); |
| switch (member.file_type) { |
| case ArFile::MemberFile::kNormal: { |
| ElfFile elf(member.contents); |
| if (elf.IsOpen()) { |
| func(elf, member.filename, index_base); |
| index_base += elf.section_count(); |
| } else { |
| MaybeAddFileRange("ar_archive", sink, "[AR Non-ELF Member File]", |
| member.contents); |
| } |
| break; |
| } |
| case ArFile::MemberFile::kSymbolTable: |
| MaybeAddFileRange("ar_archive", sink, "[AR Symbol Table]", |
| member.contents); |
| break; |
| case ArFile::MemberFile::kLongFilenameTable: |
| MaybeAddFileRange("ar_archive", sink, "[AR Headers]", |
| member.contents); |
| break; |
| } |
| } |
| } else { |
| ElfFile elf(file.data()); |
| if (!elf.IsOpen()) { |
| THROWF("Not an ELF or Archive file: $0", file.filename()); |
| } |
| |
| func(elf, file.filename(), index_base); |
| } |
| } |
| |
| // For object files, addresses are relative to the section they live in, which |
| // is indicated by ndx. We split this into: |
| // |
| // - 24 bits for index (up to 16M symbols with -ffunction-sections) |
| // - 40 bits for address (up to 1TB section) |
| static uint64_t ToVMAddr(uint64_t addr, uint64_t ndx, bool is_object) { |
| if (is_object) { |
| if (ndx >= 1 << 24) { |
| THROW("ndx overflow: too many sections"); |
| } |
| if (addr >= ((uint64_t)1) << 40) { |
| THROW("address overflow: section too big"); |
| } |
| return (ndx << 40) | addr; |
| } else { |
| return addr; |
| } |
| } |
| |
| static bool IsArchiveFile(string_view data) { |
| ArFile ar(data); |
| return ar.IsOpen(); |
| } |
| |
| static bool IsObjectFile(string_view data) { |
| ElfFile elf(data); |
| return IsArchiveFile(data) || (elf.IsOpen() && elf.header().e_type == ET_REL); |
| } |
| |
| static void CheckNotObject(const char* source, RangeSink* sink) { |
| if (IsObjectFile(sink->input_file().data())) { |
| THROWF( |
| "can't use data source '$0' on object files (only binaries and shared " |
| "libraries)", |
| source); |
| } |
| } |
| |
| static bool ElfMachineToCapstone(Elf64_Half e_machine, cs_arch* arch, |
| cs_mode* mode) { |
| switch (e_machine) { |
| case EM_386: |
| *arch = CS_ARCH_X86; |
| *mode = CS_MODE_32; |
| return true; |
| case EM_X86_64: |
| *arch = CS_ARCH_X86; |
| *mode = CS_MODE_64; |
| return true; |
| |
| // These aren't tested, but we include them on the off-chance |
| // that it will work. |
| case EM_ARM: |
| *arch = CS_ARCH_ARM; |
| *mode = CS_MODE_LITTLE_ENDIAN; |
| return true; |
| case EM_AARCH64: |
| *arch = CS_ARCH_ARM64; |
| *mode = CS_MODE_ARM; |
| return true; |
| case EM_MIPS: |
| *arch = CS_ARCH_MIPS; |
| return true; |
| case EM_PPC: |
| *arch = CS_ARCH_PPC; |
| *mode = CS_MODE_32; |
| return true; |
| case EM_PPC64: |
| *arch = CS_ARCH_PPC; |
| *mode = CS_MODE_64; |
| return true; |
| case EM_SPARC: |
| *arch = CS_ARCH_SPARC; |
| *mode = CS_MODE_BIG_ENDIAN; |
| return true; |
| case EM_SPARCV9: |
| *arch = CS_ARCH_SPARC; |
| *mode = CS_MODE_V9; |
| return true; |
| |
| default: |
| if (verbose_level > 1) { |
| printf( |
| "Unable to map to capstone target, disassembly will be " |
| "unavailable"); |
| } |
| return false; |
| } |
| } |
| |
| static bool ReadElfArchMode(const InputFile& file, cs_arch* arch, cs_mode* mode) { |
| bool capstone_available = true; |
| ForEachElf(file, nullptr, |
| [&capstone_available, arch, mode](const ElfFile& elf, |
| string_view /*filename*/, |
| uint32_t /*index_base*/) { |
| // Last .o file wins? (For .a files)? It's kind of arbitrary, |
| // but a single .a file shouldn't have multiple archs in it. |
| capstone_available &= |
| ElfMachineToCapstone(elf.header().e_machine, arch, mode); |
| }); |
| return capstone_available; |
| } |
| |
| static void ReadELFSymbols(const InputFile& file, RangeSink* sink, |
| SymbolTable* table, bool disassemble) { |
| bool is_object = IsObjectFile(file.data()); |
| DisassemblyInfo info; |
| DisassemblyInfo* infop = &info; |
| bool capstone_available = ReadElfArchMode(file, &info.arch, &info.mode); |
| |
| ForEachElf( |
| file, sink, |
| [=](const ElfFile& elf, string_view /*filename*/, uint64_t index_base) { |
| for (Elf64_Xword i = 1; i < elf.section_count(); i++) { |
| ElfFile::Section section; |
| elf.ReadSection(i, §ion); |
| |
| if (section.header().sh_type != SHT_SYMTAB) { |
| continue; |
| } |
| |
| Elf64_Word symbol_count = section.GetEntryCount(); |
| |
| // Find the corresponding section where the strings for the symbol |
| // table can be found. |
| ElfFile::Section strtab_section; |
| elf.ReadSection(section.header().sh_link, &strtab_section); |
| if (strtab_section.header().sh_type != SHT_STRTAB) { |
| THROW("symtab section pointed to non-strtab section"); |
| } |
| |
| for (Elf64_Word i = 1; i < symbol_count; i++) { |
| Elf64_Sym sym; |
| |
| section.ReadSymbol(i, &sym, nullptr); |
| |
| if (ELF64_ST_TYPE(sym.st_info) == STT_SECTION) { |
| continue; |
| } |
| |
| if (sym.st_shndx == STN_UNDEF) { |
| continue; |
| } |
| |
| if (sym.st_size == 0) { |
| // Maybe try to refine? See ReadELFSectionsRefineSymbols below. |
| continue; |
| } |
| |
| string_view name = strtab_section.ReadString(sym.st_name); |
| uint64_t full_addr = |
| ToVMAddr(sym.st_value, index_base + sym.st_shndx, is_object); |
| if (sink && !(capstone_available && disassemble)) { |
| // Checks for a negative number in two's complement |
| if (sym.st_size > 0x7fffffffffffffffULL) { |
| fprintf(stderr, "Invalid symbol size at 0x%" PRIx64 \ |
| ", size: 0x%" PRIx64 ", shndx: %d, name: %.*s\n", |
| full_addr, sym.st_size, sym.st_shndx, |
| static_cast<int>(name.size()), name.data()); |
| } else { |
| sink->AddVMRangeAllowAlias( |
| "elf_symbols", full_addr, sym.st_size, |
| ItaniumDemangle(name, sink->data_source())); |
| } |
| } |
| if (table) { |
| table->insert( |
| std::make_pair(name, std::make_pair(full_addr, sym.st_size))); |
| } |
| if (capstone_available && disassemble && |
| ELF64_ST_TYPE(sym.st_info) == STT_FUNC) { |
| if (verbose_level > 1) { |
| printf("Disassembling function: %s\n", name.data()); |
| } |
| // TODO(brandonvu) Continue if VM pointer cannot be translated. Issue #315 |
| uint64_t unused; |
| if (!sink->Translator()->vm_map.Translate(full_addr, &unused)) { |
| WARN("Can't translate VM pointer ($0) to file", full_addr); |
| continue; |
| } |
| infop->text = sink->TranslateVMToFile(full_addr).substr(0, sym.st_size); |
| infop->start_address = full_addr; |
| DisassembleFindReferences(*infop, sink); |
| } |
| } |
| } |
| }); |
| } |
| |
| static void ReadELFSymbolTableEntries(const ElfFile& elf, |
| const ElfFile::Section& section, |
| uint64_t index_base, bool is_object, |
| RangeSink* sink) { |
| Elf64_Word symbol_count = section.GetEntryCount(); |
| |
| // Find the corresponding section where the strings for the symbol |
| // table can be found. |
| ElfFile::Section strtab_section; |
| elf.ReadSection(section.header().sh_link, &strtab_section); |
| if (strtab_section.header().sh_type != SHT_STRTAB) { |
| THROW("symtab section pointed to non-strtab section"); |
| } |
| |
| for (Elf64_Word i = 1; i < symbol_count; i++) { |
| Elf64_Sym sym; |
| string_view sym_range; |
| section.ReadSymbol(i, &sym, &sym_range); |
| |
| if (ELF64_ST_TYPE(sym.st_info) == STT_SECTION || |
| sym.st_shndx == STN_UNDEF || |
| sym.st_name == SHN_UNDEF) { |
| continue; |
| } |
| |
| string_view name = strtab_section.ReadString(sym.st_name); |
| uint64_t full_addr = |
| ToVMAddr(sym.st_value, index_base + sym.st_shndx, is_object); |
| // Capture the trailing NULL. |
| name = string_view(name.data(), name.size() + 1); |
| sink->AddFileRangeForVMAddr("elf_symtab_name", full_addr, name); |
| sink->AddFileRangeForVMAddr("elf_symtab_sym", full_addr, sym_range); |
| } |
| } |
| |
| static void ReadELFRelaEntries(const ElfFile::Section& section, |
| uint64_t index_base, bool is_object, |
| RangeSink* sink) { |
| Elf64_Word rela_count = section.GetEntryCount(); |
| Elf64_Word sh_info = section.header().sh_info; |
| for (Elf64_Word i = 1; i < rela_count; i++) { |
| Elf64_Rela rela; |
| string_view rela_range; |
| section.ReadRelocationWithAddend(i, &rela, &rela_range); |
| uint64_t full_addr = |
| ToVMAddr(rela.r_offset, index_base + sh_info, is_object); |
| sink->AddFileRangeForVMAddr("elf_rela", full_addr, rela_range); |
| } |
| } |
| |
| // Adds file ranges for the symbol tables and string tables *themselves* (ie. |
| // the space that the symtab/strtab take up in the file). This will cover |
| // .symtab |
| // .strtab |
| // .dynsym |
| // .dynstr |
| static void ReadELFTables(const InputFile& file, RangeSink* sink) { |
| bool is_object = IsObjectFile(file.data()); |
| |
| // Disassemble first, because sometimes other tables will refer to things we |
| // discovered through disassembling. |
| ReadELFSymbols(file, sink, nullptr, true); |
| |
| // Now scan other tables. |
| ForEachElf(file, sink, |
| [sink, is_object](const ElfFile& elf, string_view /*filename*/, |
| uint32_t index_base) { |
| for (Elf64_Xword i = 1; i < elf.section_count(); i++) { |
| ElfFile::Section section; |
| elf.ReadSection(i, §ion); |
| |
| switch (section.header().sh_type) { |
| case SHT_SYMTAB: |
| case SHT_DYNSYM: |
| ReadELFSymbolTableEntries(elf, section, index_base, |
| is_object, sink); |
| break; |
| case SHT_RELA: |
| ReadELFRelaEntries(section, index_base, is_object, sink); |
| break; |
| } |
| |
| // We are looking by section name, which is a little different |
| // than what the loader actually does (which is find |
| // eh_frame_hdr from the program headers and then find eh_frame |
| // fde entries from there). But these section names should be |
| // standard enough that this approach works also. |
| if (section.GetName() == ".eh_frame") { |
| ReadEhFrame(section.contents(), sink); |
| } else if (section.GetName() == ".eh_frame_hdr") { |
| ReadEhFrameHdr(section.contents(), sink); |
| } |
| } |
| }); |
| } |
| |
| enum ReportSectionsBy { |
| kReportBySectionName, |
| kReportByEscapedSectionName, |
| kReportByFlags, |
| kReportByArchiveMember, |
| }; |
| |
| static void DoReadELFSections(RangeSink* sink, enum ReportSectionsBy report_by) { |
| bool is_object = IsObjectFile(sink->input_file().data()); |
| ForEachElf( |
| sink->input_file(), sink, |
| [=](const ElfFile& elf, string_view filename, uint32_t index_base) { |
| std::string name_from_flags; |
| for (Elf64_Xword i = 1; i < elf.section_count(); i++) { |
| ElfFile::Section section; |
| elf.ReadSection(i, §ion); |
| string_view name = section.GetName(); |
| |
| if (name.size() == 0) { |
| return; |
| } |
| |
| const auto& header = section.header(); |
| auto addr = header.sh_addr; |
| auto size = header.sh_size; |
| auto filesize = (header.sh_type == SHT_NOBITS) ? 0 : size; |
| auto vmsize = (header.sh_flags & SHF_ALLOC) ? size : 0; |
| |
| string_view contents = StrictSubstr(section.contents(), 0, filesize); |
| |
| uint64_t full_addr = ToVMAddr(addr, index_base + i, is_object); |
| |
| if (report_by == kReportByFlags) { |
| name_from_flags = std::string(name); |
| |
| name_from_flags = "Section ["; |
| |
| if (header.sh_flags & SHF_ALLOC) { |
| name_from_flags += 'A'; |
| } |
| |
| if (header.sh_flags & SHF_WRITE) { |
| name_from_flags += 'W'; |
| } |
| |
| if (header.sh_flags & SHF_EXECINSTR) { |
| name_from_flags += 'X'; |
| } |
| |
| name_from_flags += ']'; |
| sink->AddRange("elf_section", name_from_flags, full_addr, vmsize, |
| contents); |
| } else if (report_by == kReportBySectionName) { |
| sink->AddRange("elf_section", name, full_addr, vmsize, contents); |
| } else if (report_by == kReportByEscapedSectionName) { |
| sink->AddRange("elf_section", |
| std::string("[section ") + std::string(name) + "]", |
| full_addr, vmsize, contents); |
| } else if (report_by == kReportByArchiveMember) { |
| sink->AddRange("elf_section", filename, full_addr, vmsize, |
| contents); |
| } |
| } |
| |
| if (report_by == kReportByArchiveMember) { |
| // Cover unmapped parts of the file. |
| sink->AddFileRange("unmapped_armember", filename, elf.entire_file()); |
| } |
| }); |
| } |
| |
| enum ReportSegmentsBy { |
| kReportBySegmentName, |
| kReportByEscapedSegmentName, |
| }; |
| |
| std::string GetSegmentName(const ElfFile::Segment& segment, Elf64_Xword i, |
| ReportSegmentsBy report_by) { |
| const auto& header = segment.header(); |
| |
| // Include the segment index in the label, to support embedded. |
| // |
| // Including the index in the segment label differentiates |
| // segments with the same access control (e.g. RWX vs RW). In |
| // ELF files built for embedded microcontroller projects, a |
| // segment is used for each distinct type of memory. In simple |
| // cases, there is a segment for the flash (which will store |
| // code and read-only data) and a segment for RAM (which |
| // usually stores globals, stacks, and maybe a heap). In more |
| // involved projects, there may be special segments for faster |
| // RAM (e.g. core coupled RAM or CCRAM), or there may even be |
| // memory overlays to support manual paging of code from flash |
| // (which may be slow) into RAM. |
| std::string name(absl::StrCat("LOAD #", i, " [")); |
| |
| if (header.p_flags & PF_R) { |
| name += 'R'; |
| } |
| |
| if (header.p_flags & PF_W) { |
| name += 'W'; |
| } |
| |
| if (header.p_flags & PF_X) { |
| name += 'X'; |
| } |
| |
| name += ']'; |
| |
| if (report_by == kReportByEscapedSegmentName) { |
| return absl::StrCat("[", name, "]"); |
| } else { |
| return name; |
| } |
| } |
| |
| static void DoReadELFSegments(RangeSink* sink, ReportSegmentsBy report_by) { |
| ForEachElf(sink->input_file(), sink, |
| [=](const ElfFile& elf, string_view /*filename*/, |
| uint32_t /*index_base*/) { |
| for (Elf64_Xword i = 0; i < elf.header().e_phnum; i++) { |
| ElfFile::Segment segment; |
| elf.ReadSegment(i, &segment); |
| std::string name = GetSegmentName(segment, i, report_by); |
| |
| if (segment.header().p_type != PT_LOAD) { |
| continue; |
| } |
| |
| sink->AddRange("elf_segment", name, segment.header().p_vaddr, |
| segment.header().p_memsz, segment.contents()); |
| } |
| }); |
| |
| ForEachElf(sink->input_file(), sink, |
| [=](const ElfFile& elf, string_view /*filename*/, |
| uint32_t /*index_base*/) { |
| for (Elf64_Xword i = 0; i < elf.header().e_phnum; i++) { |
| ElfFile::Segment segment; |
| elf.ReadSegment(i, &segment); |
| const auto& header = segment.header(); |
| if (header.p_type != PT_TLS) continue; |
| std::string name = "TLS"; |
| sink->AddRange("elf_segment", "TLS", header.p_vaddr, |
| header.p_memsz, segment.contents()); |
| } |
| }); |
| } |
| |
| static void ReadELFSegments(RangeSink* sink) { |
| if (IsObjectFile(sink->input_file().data())) { |
| // Object files don't actually have segments. But we can cheat a little bit |
| // and make up "segments" based on section flags. This can be really useful |
| // when you are compiling with -ffunction-sections and -fdata-sections, |
| // because in those cases the actual "sections" report becomes pretty |
| // useless (since every function/data has its own section, it's like the |
| // "symbols" report except less readable). |
| DoReadELFSections(sink, kReportByFlags); |
| } else { |
| DoReadELFSegments(sink, kReportBySegmentName); |
| } |
| } |
| |
| // ELF files put debug info directly into the binary, so we call the DWARF |
| // reader directly on them. At the moment we don't attempt to make these |
| // work with object files. |
| |
| void ReadDWARFSections(const InputFile &file, dwarf::File *dwarf, |
| RangeSink *sink) { |
| ElfFile elf(file.data()); |
| assert(elf.IsOpen()); |
| dwarf->file = &file; |
| dwarf->open = &ReadDWARFSections; |
| for (Elf64_Xword i = 1; i < elf.section_count(); i++) { |
| ElfFile::Section section; |
| elf.ReadSection(i, §ion); |
| string_view name = section.GetName(); |
| string_view contents = section.contents(); |
| uint64_t uncompressed_size = 0; |
| |
| if (section.header().sh_flags & SHF_COMPRESSED) { |
| // Standard ELF section compression, produced when you link with |
| // --compress-debug-sections=zlib-gabi |
| Elf64_Chdr chdr; |
| absl::string_view range; |
| elf.ReadStruct<Elf32_Chdr>(contents, 0, ChdrMunger(), &range, &chdr); |
| if (chdr.ch_type != ELFCOMPRESS_ZLIB) { |
| // Unknown compression format. |
| continue; |
| } |
| uncompressed_size = chdr.ch_size; |
| contents.remove_prefix(range.size()); |
| } |
| |
| if (name.find(".debug_") == 0) { |
| name.remove_prefix(string_view(".debug_").size()); |
| } else if (name.find(".zdebug_") == 0) { |
| // GNU format compressed debug info, produced when you link with |
| // --compress-debug-sections=zlib-gnu |
| name.remove_prefix(string_view(".zdebug_").size()); |
| if (ReadBytes(4, &contents) != "ZLIB") { |
| continue; // Bad compression header. |
| } |
| uncompressed_size = ReadBigEndian<uint64_t>(&contents); |
| } |
| |
| static constexpr string_view dwo_str(".dwo"); |
| if (name.size() >= dwo_str.size() && |
| name.rfind(".dwo") == name.size() - dwo_str.size()) { |
| name.remove_suffix(dwo_str.size()); |
| } |
| |
| if (string_view* member = dwarf->GetFieldByName(name)) { |
| if (uncompressed_size) { |
| *member = sink->ZlibDecompress(contents, uncompressed_size); |
| } else { |
| *member = section.contents(); |
| } |
| } |
| } |
| } |
| |
| void AddCatchAll(RangeSink* sink) { |
| // The last-line fallback to make sure we cover the entire VM space. |
| if (sink->IsBaseMap() || sink->data_source() != DataSource::kSegments) { |
| if (!sink->IsBaseMap()) { |
| DoReadELFSections(sink, kReportByEscapedSectionName); |
| } |
| ForEachElf(sink->input_file(), sink, |
| [sink](const ElfFile& elf, string_view /*filename*/, |
| uint32_t /*index_base*/) { |
| sink->AddFileRange("elf_catchall", "[ELF Header]", |
| elf.header_region()); |
| sink->AddFileRange("elf_catchall", "[ELF Section Headers]", |
| elf.section_headers()); |
| sink->AddFileRange("elf_catchall", "[ELF Program Headers]", |
| elf.segment_headers()); |
| }); |
| } |
| DoReadELFSegments(sink, kReportByEscapedSegmentName); |
| |
| |
| // The last-line fallback to make sure we cover the entire file. |
| sink->AddFileRange("elf_catchall", "[Unmapped]", sink->input_file().data()); |
| } |
| |
| class ElfObjectFile : public ObjectFile { |
| public: |
| ElfObjectFile(std::unique_ptr<InputFile> file, std::optional<std::string> link_map_file) |
| : ObjectFile(std::move(file)) { |
| if (link_map_file.has_value()) { |
| std::ifstream infile(*link_map_file); |
| std::string link_map; |
| |
| // Strip comments and empty lines. |
| for (std::string line; getline(infile, line);) { |
| if (line.empty()) continue; |
| if (line[0] == '#') continue; |
| link_map += line; |
| link_map += '\n'; |
| } |
| |
| absl::StripLeadingAsciiWhitespace(&link_map); |
| absl::StripTrailingAsciiWhitespace(&link_map); |
| |
| link_map_symbols_ = bloaty_link_map::ParseLldLinkMap(link_map); |
| link_map_sections_ = bloaty_link_map::ParseLldLinkMapSections(link_map); |
| } |
| } |
| |
| std::string GetBuildId() const override { |
| if (IsObjectFile(file_data().data())) { |
| // Object files don't have a build ID. |
| return std::string(); |
| } |
| |
| ElfFile elf(file_data().data()); |
| assert(elf.IsOpen()); |
| // Search for a build-id section. |
| for (Elf64_Xword i = 1; i < elf.section_count(); i++) { |
| ElfFile::Section section; |
| elf.ReadSection(i, §ion); |
| if (section.header().sh_type != SHT_NOTE) { |
| continue; |
| } |
| |
| for (ElfFile::NoteIter notes(section); !notes.IsDone(); notes.Next()) { |
| if (notes.name() == "GNU" && notes.type() == NT_GNU_BUILD_ID) { |
| return std::string(notes.descriptor()); |
| } |
| } |
| } |
| // Search for a build-id segment. |
| for (Elf64_Xword i = 0; i < elf.header().e_phnum; i++) { |
| ElfFile::Segment segment; |
| elf.ReadSegment(i, &segment); |
| const auto &header = segment.header(); |
| if (header.p_type != PT_NOTE) { |
| continue; |
| } |
| for (ElfFile::NoteIter notes(segment, &elf); !notes.IsDone(); notes.Next()) { |
| if (notes.name() == "GNU" && notes.type() == NT_GNU_BUILD_ID) { |
| return std::string(notes.descriptor()); |
| } |
| } |
| } |
| |
| // No build id section found. |
| return std::string(); |
| } |
| |
| void ReadAccessPattern(RangeSink* sink) const { |
| if (!sink->options().has_cold_bytes_filter()) { |
| THROW("need to specify cold bytes filter"); |
| } |
| // Each element corresponds to |kAccessPatternFrameSize| bytes. |
| std::vector<bool> access_pattern; |
| uint64_t kAccessPatternFrameSize = sink->options().access_pattern_frame_size(); |
| auto frequencies = sink->options().cold_bytes_filter(); |
| size_t file_size = sink->input_file().data().size(); |
| size_t num_frames = (file_size + kAccessPatternFrameSize - 1) / kAccessPatternFrameSize; |
| access_pattern.reserve(num_frames); |
| for (size_t i = 0; i < num_frames; i ++) { |
| access_pattern.push_back(false); |
| } |
| std::vector<std::string> frequencies_vec = absl::StrSplit(frequencies, ','); |
| for (const auto& part : frequencies_vec) { |
| std::vector<std::string> frame_and_count = absl::StrSplit(part, ':'); |
| if (frame_and_count.size() != 2) { |
| THROWF("Invalid format in cold bytes filter: $0", part); |
| } |
| size_t frame = std::stoi(frame_and_count[0]); |
| size_t count = std::stoi(frame_and_count[1]); |
| if (frame >= access_pattern.size()) { |
| THROW("access pattern exceeded end of file"); |
| } |
| if (count > 0) { |
| access_pattern[frame] = true; |
| } |
| } |
| for (size_t i = 0; i < access_pattern.size(); i++) { |
| std::string label = access_pattern[i] ? "Hot" : "Cold"; |
| size_t length; |
| size_t file_size = sink->input_file().data().size(); |
| if (i * kAccessPatternFrameSize > file_size) { |
| THROW("access pattern exceeded end of file"); |
| } |
| if (i * kAccessPatternFrameSize + kAccessPatternFrameSize > file_size) { |
| // We're at the last frame in the ELF, and it is not fully 32 KiB. |
| length = file_size % kAccessPatternFrameSize; |
| } else { |
| length = kAccessPatternFrameSize; |
| } |
| sink->AddFileRange("access_pattern", label, |
| i * kAccessPatternFrameSize, length); |
| } |
| } |
| |
| void ReadLinkMapSymbols(RangeSink* sink) const { |
| if (!link_map_symbols_.has_value()) return; |
| const auto& symbols = *link_map_symbols_; |
| for (const auto& symbol : symbols) { |
| auto maybe_transformed_compile_unit = |
| bloaty_link_map::TransformCompileUnitForFuchsia(symbol.compile_unit); |
| auto demangled = ItaniumDemangle(symbol.name, sink->data_source()); |
| if (maybe_transformed_compile_unit.has_value()) { |
| auto [transformed_compile_unit, maybe_rust_crate] = |
| *maybe_transformed_compile_unit; |
| if (maybe_rust_crate.has_value()) { |
| auto symbol_with_crate_id = |
| EncodeSymbolWithCrateId(demangled, *maybe_rust_crate); |
| sink->AddVMRange("link_map", symbol.addr, symbol.size, |
| symbol_with_crate_id); |
| continue; |
| } |
| } |
| sink->AddVMRange("link_map", symbol.addr, symbol.size, demangled); |
| } |
| |
| if (!link_map_sections_.has_value()) return; |
| const auto& sections = *link_map_sections_; |
| for (const auto& section : sections) { |
| sink->AddVMRange("link_map", section.addr, section.size, |
| "[section " + section.name + "]"); |
| } |
| } |
| |
| void ReadLinkMapCompileUnits(RangeSink* sink) const { |
| if (!link_map_symbols_.has_value()) return; |
| const auto& symbols = *link_map_symbols_; |
| for (const auto& symbol : symbols) { |
| auto maybe_transformed_compile_unit = |
| bloaty_link_map::TransformCompileUnitForFuchsia(symbol.compile_unit); |
| if (maybe_transformed_compile_unit.has_value()) { |
| auto [transformed_compile_unit, maybe_rust_crate] = |
| *maybe_transformed_compile_unit; |
| sink->AddVMRange("link_map", symbol.addr, symbol.size, |
| transformed_compile_unit); |
| } |
| } |
| |
| if (!link_map_sections_.has_value()) return; |
| const auto& sections = *link_map_sections_; |
| for (const auto& section : sections) { |
| sink->AddVMRange("link_map", section.addr, section.size, |
| "[section " + section.name + "]"); |
| } |
| } |
| |
| void ProcessFile(const std::vector<RangeSink*>& sinks) const override { |
| for (auto sink : sinks) { |
| if (verbose_level > 1) { |
| printf("Scanning source %d\n", (int)sink->data_source()); |
| } |
| switch (sink->data_source()) { |
| case DataSource::kSegments: |
| ReadELFSegments(sink); |
| break; |
| case DataSource::kSections: |
| DoReadELFSections(sink, kReportBySectionName); |
| break; |
| case DataSource::kRawSymbols: |
| case DataSource::kShortSymbols: |
| case DataSource::kFullSymbols: |
| ReadLinkMapSymbols(sink); |
| ReadELFSymbols(debug_file().file_data(), sink, nullptr, false); |
| break; |
| case DataSource::kArchiveMembers: |
| DoReadELFSections(sink, kReportByArchiveMember); |
| break; |
| case DataSource::kAccessPattern: { |
| ReadAccessPattern(sink); |
| break; |
| } |
| case DataSource::kCompileUnits: { |
| CheckNotObject("compileunits", sink); |
| SymbolTable symtab; |
| DualMap symbol_map; |
| NameMunger empty_munger; |
| RangeSink symbol_sink(&debug_file().file_data(), |
| sink->options(), |
| DataSource::kRawSymbols, |
| &sinks[0]->MapAtIndex(0), nullptr); |
| symbol_sink.AddOutput(&symbol_map, &empty_munger); |
| ReadELFSymbols(debug_file().file_data(), &symbol_sink, &symtab, |
| false); |
| dwarf::File dwarf; |
| ReadDWARFSections(debug_file().file_data(), &dwarf, sink); |
| ReadDWARFCompileUnits(dwarf, symbol_map, sink); |
| ReadLinkMapCompileUnits(sink); |
| break; |
| } |
| case DataSource::kInlines: { |
| CheckNotObject("lineinfo", sink); |
| dwarf::File dwarf; |
| ReadDWARFSections(debug_file().file_data(), &dwarf, sink); |
| ReadDWARFInlines(dwarf, sink, true); |
| DoReadELFSections(sink, kReportByEscapedSectionName); |
| break; |
| } |
| default: |
| THROW("unknown data source"); |
| } |
| |
| switch (sink->data_source()) { |
| case DataSource::kSegments: |
| case DataSource::kSections: |
| case DataSource::kArchiveMembers: |
| case DataSource::kAccessPattern: |
| break; |
| default: |
| // Add these *after* processing all other data sources. |
| ReadELFTables(sink->input_file(), sink); |
| break; |
| } |
| |
| AddCatchAll(sink); |
| } |
| } |
| |
| bool GetDisassemblyInfo(const absl::string_view symbol, |
| DataSource symbol_source, |
| DisassemblyInfo* info) const override { |
| return DoGetDisassemblyInfo(&symbol, symbol_source, info); |
| } |
| |
| bool DoGetDisassemblyInfo(const absl::string_view* symbol, |
| DataSource symbol_source, |
| DisassemblyInfo* info) const { |
| // Find the corresponding file range. This also could be optimized not to |
| // build the entire map. |
| DualMap base_map; |
| NameMunger empty_munger; |
| RangeSink base_sink(&file_data(), bloaty::Options(), DataSource::kSegments, |
| nullptr, nullptr); |
| base_sink.AddOutput(&base_map, &empty_munger); |
| std::vector<RangeSink*> sink_ptrs{&base_sink}; |
| ProcessFile(sink_ptrs); |
| |
| // Could optimize this not to build the whole table if necessary. |
| SymbolTable symbol_table; |
| RangeSink symbol_sink(&file_data(), bloaty::Options(), symbol_source, |
| &base_map, nullptr); |
| symbol_sink.AddOutput(&info->symbol_map, &empty_munger); |
| ReadELFSymbols(debug_file().file_data(), &symbol_sink, &symbol_table, |
| false); |
| |
| if (symbol) { |
| auto entry = symbol_table.find(*symbol); |
| if (entry == symbol_table.end()) { |
| entry = symbol_table.find(ItaniumDemangle(*symbol, symbol_source)); |
| if (entry == symbol_table.end()) { |
| return false; |
| } |
| } |
| uint64_t vmaddr = entry->second.first; |
| uint64_t size = entry->second.second; |
| |
| // TODO(haberman); Add PLT entries to symbol map, so call <plt stub> gets |
| // symbolized. |
| |
| uint64_t fileoff; |
| if (!base_map.vm_map.Translate(vmaddr, &fileoff)) { |
| THROWF("Couldn't translate VM address for function $0", symbol); |
| } |
| |
| info->text = StrictSubstr(file_data().data(), fileoff, size); |
| info->start_address = vmaddr; |
| } |
| |
| return ReadElfArchMode(file_data(), &info->arch, &info->mode); |
| } |
| |
| private: |
| std::optional<std::vector<bloaty_link_map::Symbol>> link_map_symbols_ = std::nullopt; |
| std::optional<std::vector<bloaty_link_map::Section>> link_map_sections_ = std::nullopt; |
| }; |
| |
| } // namespace |
| |
| std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file, |
| std::optional<std::string> link_map_file) { |
| ElfFile elf(file->data()); |
| ArFile ar(file->data()); |
| if (elf.IsOpen() || ar.IsOpen()) { |
| if (link_map_file.has_value()) { |
| std::cerr << "Using link map: " << *link_map_file << std::endl; |
| } |
| return std::unique_ptr<ObjectFile>(new ElfObjectFile(std::move(file), link_map_file)); |
| } else { |
| return nullptr; |
| } |
| |
| // A few functions that have been defined but are not yet used. |
| (void)&ElfFile::FindSectionByName; |
| (void)&ElfFile::Section::ReadRelocation; |
| } |
| |
| } // namespace bloaty |