blob: 9832566ecd3e5484ecde8af42d73d93250d4062c [file] [log] [blame] [edit]
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include "absl/numeric/int128.h"
#include "absl/strings/escaping.h"
#include "absl/strings/string_view.h"
#include "absl/strings/substitute.h"
#include "third_party/freebsd_elf/elf.h"
#include "bloaty.h"
#include "link_map.h"
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
using absl::string_view;
namespace bloaty {
namespace {
struct ByteSwapFunc {
template <class T>
T operator()(T val) {
return ByteSwap(val);
}
};
struct NullFunc {
template <class T>
T operator()(T val) { return val; }
};
size_t StringViewToSize(string_view str) {
size_t ret;
if (!absl::SimpleAtoi(str, &ret)) {
THROWF("couldn't convert string '$0' to integer.", str);
}
return ret;
}
template <class T>
void AdvancePastStruct(string_view* data) {
*data = data->substr(sizeof(T));
}
// ElfFile /////////////////////////////////////////////////////////////////////
// For parsing the pieces we need out of an ELF file (.o, .so, and binaries).
class ElfFile {
public:
ElfFile(string_view data) : data_(data) {
ok_ = Initialize();
}
bool IsOpen() { return ok_; }
// Regions of the file where different headers live.
string_view entire_file() const { return data_; }
string_view header_region() const { return header_region_; }
string_view section_headers() const { return section_headers_; }
string_view segment_headers() const { return segment_headers_; }
const Elf64_Ehdr& header() const { return header_; }
Elf64_Xword section_count() const { return section_count_; }
Elf64_Xword section_string_index() const { return section_string_index_; }
// Represents an ELF segment (data used by the loader / dynamic linker).
class Segment {
public:
const Elf64_Phdr& header() const { return header_; }
string_view contents() const { return contents_; }
string_view range() const { return range_; }
private:
friend class ElfFile;
Elf64_Phdr header_;
string_view contents_;
string_view range_;
};
// Represents an ELF section (.text, .data, .bss, etc.)
class Section {
public:
const Elf64_Shdr& header() const { return header_; }
string_view contents() const { return contents_; }
string_view range() const { return range_; }
// For SHN_UNDEF (undefined name), returns [nullptr, 0].
string_view GetName() const;
// Requires: this is a section with fixed-width entries (symbol table,
// relocation table, etc).
Elf64_Word GetEntryCount() const;
// Requires: header().sh_type == SHT_STRTAB.
string_view ReadString(Elf64_Word index) const;
// Requires: header().sh_type == SHT_SYMTAB || header().sh_type ==
// SHT_DYNSYM
void ReadSymbol(Elf64_Word index, Elf64_Sym* sym,
string_view* file_range) const;
// Requires: header().sh_type == SHT_REL
void ReadRelocation(Elf64_Word index, Elf64_Rel* rel,
string_view* file_range) const;
// Requires: header().sh_type == SHT_RELA
void ReadRelocationWithAddend(Elf64_Word index, Elf64_Rela* rel,
string_view* file_range) const;
const ElfFile& elf() const { return *elf_; }
private:
friend class ElfFile;
const ElfFile* elf_;
Elf64_Shdr header_;
string_view contents_;
string_view range_;
};
class NoteIter {
public:
NoteIter(const Section& section)
: elf_(&section.elf()), remaining_(section.contents()) {
Next();
}
NoteIter(const Segment& segment, const ElfFile* elf)
: elf_(elf), remaining_(segment.contents()) {
Next();
}
bool IsDone() const { return done_; }
uint32_t type() const { return type_; }
string_view name() const { return name_; }
string_view descriptor() const { return descriptor_; }
void Next();
public:
const ElfFile* elf_;
string_view name_;
string_view descriptor_;
string_view remaining_;
uint32_t type_;
bool done_ = false;
};
void ReadSegment(Elf64_Word index, Segment* segment) const;
void ReadSection(Elf64_Word index, Section* section) const;
bool FindSectionByName(absl::string_view name, Section* section) const;
bool is_64bit() const { return is_64bit_; }
bool is_native_endian() const { return is_native_endian_; }
template <class T32, class T64, class Munger>
void ReadStruct(absl::string_view contents, uint64_t offset, Munger munger,
absl::string_view* range, T64* out) const {
StructReader(*this, contents).Read<T32>(offset, munger, range, out);
}
private:
friend class Section;
bool Initialize();
string_view GetRegion(uint64_t start, uint64_t n) const {
return StrictSubstr(data_, start, n);
}
// Shared code for reading various ELF structures. Handles endianness
// conversion and 32->64 bit conversion, when necessary.
class StructReader {
public:
StructReader(const ElfFile& elf, string_view data)
: elf_(elf), data_(data) {}
template <class T32, class T64, class Munger>
void Read(uint64_t offset, Munger /*munger*/, absl::string_view* range,
T64* out) const {
if (elf_.is_64bit() && elf_.is_native_endian()) {
return Memcpy(offset, range, out);
} else {
return ReadFallback<T32, T64, Munger>(offset, range, out);
}
}
private:
const ElfFile& elf_;
string_view data_;
template <class T32, class T64, class Munger>
void ReadFallback(uint64_t offset, absl::string_view* range,
T64* out) const;
template <class T>
void Memcpy(uint64_t offset, absl::string_view* out_range, T* out) const {
absl::string_view range = StrictSubstr(data_, offset, sizeof(*out));
if (out_range) {
*out_range = range;
}
memcpy(out, data_.data() + offset, sizeof(*out));
}
};
bool ok_;
bool is_64bit_;
bool is_native_endian_;
string_view data_;
Elf64_Ehdr header_;
Elf64_Xword section_count_;
Elf64_Xword section_string_index_;
string_view header_region_;
string_view section_headers_;
string_view segment_headers_;
Section section_name_table_;
};
// ELF uses different structure definitions for 32/64 bit files. The sizes of
// members are different, and members are even in a different order!
//
// These mungers can convert 32 bit structures to 64-bit ones. They can also
// handle converting endianness. We use templates so a single template function
// can handle all three patterns:
//
// 32 native -> 64 native
// 32 swapped -> 64 native
// 64 swapped -> 64 native
struct EhdrMunger {
template <class From, class Func>
void operator()(const From& from, Elf64_Ehdr* to, Func func) {
memmove(&to->e_ident[0], &from.e_ident[0], EI_NIDENT);
to->e_type = func(from.e_type);
to->e_machine = func(from.e_machine);
to->e_version = func(from.e_version);
to->e_entry = func(from.e_entry);
to->e_phoff = func(from.e_phoff);
to->e_shoff = func(from.e_shoff);
to->e_flags = func(from.e_flags);
to->e_ehsize = func(from.e_ehsize);
to->e_phentsize = func(from.e_phentsize);
to->e_phnum = func(from.e_phnum);
to->e_shentsize = func(from.e_shentsize);
to->e_shnum = func(from.e_shnum);
to->e_shstrndx = func(from.e_shstrndx);
}
};
struct ShdrMunger {
template <class From, class Func>
void operator()(const From& from, Elf64_Shdr* to, Func func) {
to->sh_name = func(from.sh_name);
to->sh_type = func(from.sh_type);
to->sh_flags = func(from.sh_flags);
to->sh_addr = func(from.sh_addr);
to->sh_offset = func(from.sh_offset);
to->sh_size = func(from.sh_size);
to->sh_link = func(from.sh_link);
to->sh_info = func(from.sh_info);
to->sh_addralign = func(from.sh_addralign);
to->sh_entsize = func(from.sh_entsize);
}
};
struct PhdrMunger {
template <class From, class Func>
void operator()(const From& from, Elf64_Phdr* to, Func func) {
to->p_type = func(from.p_type);
to->p_flags = func(from.p_flags);
to->p_offset = func(from.p_offset);
to->p_vaddr = func(from.p_vaddr);
to->p_paddr = func(from.p_paddr);
to->p_filesz = func(from.p_filesz);
to->p_memsz = func(from.p_memsz);
to->p_align = func(from.p_align);
}
};
struct SymMunger {
template <class From, class Func>
void operator()(const From& from, Elf64_Sym* to, Func func) {
to->st_name = func(from.st_name);
to->st_info = func(from.st_info);
to->st_other = func(from.st_other);
to->st_shndx = func(from.st_shndx);
to->st_value = func(from.st_value);
to->st_size = func(from.st_size);
}
};
struct RelMunger {
template <class From, class Func>
void operator()(const From& from, Elf64_Rel* to, Func func) {
to->r_offset = func(from.r_offset);
to->r_info = func(from.r_info);
}
};
struct RelaMunger {
template <class From, class Func>
void operator()(const From& from, Elf64_Rela* to, Func func) {
to->r_offset = func(from.r_offset);
to->r_info = func(from.r_info);
to->r_addend = func(from.r_addend);
}
};
struct NoteMunger {
template <class From, class Func>
void operator()(const From& from, Elf64_Nhdr* to, Func func) {
to->n_namesz = func(from.n_namesz);
to->n_descsz = func(from.n_descsz);
to->n_type = func(from.n_type);
}
};
struct ChdrMunger {
template <class From, class Func>
void operator()(const From& from, Elf64_Chdr* to, Func func) {
to->ch_type = func(from.ch_type);
to->ch_size = func(from.ch_size);
to->ch_addralign = func(from.ch_addralign);
}
};
template <class T32, class T64, class Munger>
void ElfFile::StructReader::ReadFallback(uint64_t offset,
absl::string_view* range,
T64* out) const {
// Fallback for either 32-bit ELF file or non-native endian.
if (elf_.is_64bit()) {
assert(!elf_.is_native_endian());
Memcpy(offset, range, out);
Munger()(*out, out, ByteSwapFunc());
} else {
T32 data32;
Memcpy(offset, range, &data32);
if (elf_.is_native_endian()) {
Munger()(data32, out, NullFunc());
} else {
Munger()(data32, out, ByteSwapFunc());
}
}
}
string_view ElfFile::Section::GetName() const {
if (header_.sh_name == SHN_UNDEF) {
return string_view(nullptr, 0);
}
return elf_->section_name_table_.ReadString(header_.sh_name);
}
string_view ElfFile::Section::ReadString(Elf64_Word index) const {
assert(header().sh_type == SHT_STRTAB);
if (index == SHN_UNDEF || index >= contents_.size()) {
THROWF("can't read index $0 from strtab, total size is $1", index,
contents_.size());
}
string_view ret = StrictSubstr(contents_, index);
const char* null_pos =
static_cast<const char*>(memchr(ret.data(), '\0', ret.size()));
if (null_pos == NULL) {
THROW("no NULL terminator found");
}
size_t len = null_pos - ret.data();
ret = ret.substr(0, len);
return ret;
}
Elf64_Word ElfFile::Section::GetEntryCount() const {
if (header_.sh_entsize == 0) {
THROW("sh_entsize is zero");
}
return contents_.size() / header_.sh_entsize;
}
void ElfFile::Section::ReadSymbol(Elf64_Word index, Elf64_Sym* sym,
string_view* file_range) const {
assert(header().sh_type == SHT_SYMTAB || header().sh_type == SHT_DYNSYM);
size_t offset = header_.sh_entsize * index;
elf_->ReadStruct<Elf32_Sym>(contents(), offset, SymMunger(), file_range, sym);
}
void ElfFile::Section::ReadRelocation(Elf64_Word index, Elf64_Rel* rel,
string_view* file_range) const {
assert(header().sh_type == SHT_REL);
size_t offset = header_.sh_entsize * index;
elf_->ReadStruct<Elf32_Rel>(contents(), offset, RelMunger(), file_range, rel);
}
void ElfFile::Section::ReadRelocationWithAddend(Elf64_Word index,
Elf64_Rela* rela,
string_view* file_range) const {
assert(header().sh_type == SHT_RELA);
size_t offset = header_.sh_entsize * index;
elf_->ReadStruct<Elf32_Rela>(contents(), offset, RelaMunger(), file_range,
rela);
}
void ElfFile::NoteIter::Next() {
if (remaining_.empty()) {
done_ = true;
return;
}
Elf_Note note;
elf_->ReadStruct<Elf_Note>(remaining_, 0, NoteMunger(), nullptr, &note);
// 32-bit and 64-bit note are the same size, so we don't have to treat
// them separately when advancing.
AdvancePastStruct<Elf_Note>(&remaining_);
type_ = note.n_type;
name_ = StrictSubstr(remaining_, 0, note.n_namesz);
// Size might include NULL terminator.
if (name_[name_.size() - 1] == 0) {
name_ = name_.substr(0, name_.size() - 1);
}
remaining_ = StrictSubstr(remaining_, AlignUp(note.n_namesz, 4));
descriptor_ = StrictSubstr(remaining_, 0, note.n_descsz);
remaining_ = StrictSubstr(remaining_, AlignUp(note.n_descsz, 4));
}
bool ElfFile::Initialize() {
if (data_.size() < EI_NIDENT) {
return false;
}
unsigned char ident[EI_NIDENT];
memcpy(ident, data_.data(), EI_NIDENT);
if (memcmp(ident, "\177ELF", 4) != 0) {
// Not an ELF file.
return false;
}
switch (ident[EI_CLASS]) {
case ELFCLASS32:
is_64bit_ = false;
break;
case ELFCLASS64:
is_64bit_ = true;
break;
default:
THROWF("unexpected ELF class: $0", ident[EI_CLASS]);
}
switch (ident[EI_DATA]) {
case ELFDATA2LSB:
is_native_endian_ = GetMachineEndian() == Endian::kLittle;
break;
case ELFDATA2MSB:
is_native_endian_ = GetMachineEndian() == Endian::kBig;
break;
default:
THROWF("unexpected ELF data: $0", ident[EI_DATA]);
}
absl::string_view range;
ReadStruct<Elf32_Ehdr>(entire_file(), 0, EhdrMunger(), &range, &header_);
Section section0;
bool has_section0 = 0;
// ELF extensions: if certain fields overflow, we have to find their true data
// from elsewhere. For more info see:
// https://docs.oracle.com/cd/E19683-01/817-3677/chapter6-94076/index.html
if (header_.e_shoff > 0 &&
data_.size() > (header_.e_shoff + header_.e_shentsize)) {
section_count_ = 1;
ReadSection(0, &section0);
has_section0 = true;
}
section_count_ = header_.e_shnum;
section_string_index_ = header_.e_shstrndx;
if (section_count_ == 0 && has_section0) {
section_count_ = section0.header().sh_size;
}
if (section_string_index_ == SHN_XINDEX && has_section0) {
section_string_index_ = section0.header().sh_link;
}
header_region_ = GetRegion(0, header_.e_ehsize);
section_headers_ = GetRegion(header_.e_shoff,
CheckedMul(header_.e_shentsize, section_count_));
segment_headers_ = GetRegion(
header_.e_phoff, CheckedMul(header_.e_phentsize, header_.e_phnum));
if (section_count_ > 0) {
ReadSection(section_string_index_, &section_name_table_);
if (section_name_table_.header().sh_type != SHT_STRTAB) {
THROW("section string index pointed to non-strtab");
}
}
return true;
}
void ElfFile::ReadSegment(Elf64_Word index, Segment* segment) const {
if (index >= header_.e_phnum) {
THROWF("segment $0 doesn't exist, only $1 segments", index,
header_.e_phnum);
}
Elf64_Phdr* header = &segment->header_;
ReadStruct<Elf32_Phdr>(
entire_file(),
CheckedAdd(header_.e_phoff, CheckedMul(header_.e_phentsize, index)),
PhdrMunger(), &segment->range_, header);
segment->contents_ = GetRegion(header->p_offset, header->p_filesz);
}
void ElfFile::ReadSection(Elf64_Word index, Section* section) const {
if (index >= section_count_) {
THROWF("tried to read section $0, but there are only $1", index,
section_count_);
}
Elf64_Shdr* header = &section->header_;
ReadStruct<Elf32_Shdr>(
entire_file(),
CheckedAdd(header_.e_shoff, CheckedMul(header_.e_shentsize, index)),
ShdrMunger(), &section->range_, header);
if (header->sh_type == SHT_NOBITS) {
section->contents_ = string_view();
} else {
section->contents_ = GetRegion(header->sh_offset, header->sh_size);
}
section->elf_ = this;
}
bool ElfFile::FindSectionByName(absl::string_view name, Section* section) const {
for (Elf64_Word i = 0; i < section_count_; i++) {
ReadSection(i, section);
if (section->GetName() == name) {
return true;
}
}
return false;
}
// ArFile //////////////////////////////////////////////////////////////////////
// For parsing .a files (static libraries).
//
// The best documentation I've been able to find for this file format is
// Wikipedia: https://en.wikipedia.org/wiki/Ar_(Unix)
//
// So far we only parse the System V / GNU variant.
class ArFile {
public:
ArFile(string_view data)
: magic_(StrictSubstr(data, 0, kMagicSize)),
contents_(data.substr(std::min<size_t>(data.size(), kMagicSize))) {}
bool IsOpen() const { return magic() == string_view(kMagic); }
string_view magic() const { return magic_; }
string_view contents() const { return contents_; }
struct MemberFile {
enum {
kSymbolTable, // Stores a symbol table.
kLongFilenameTable, // Stores long filenames, users should ignore.
kNormal, // Regular data file.
} file_type;
string_view filename; // Only when file_type == kNormal
size_t size;
string_view header;
string_view contents;
};
class MemberReader {
public:
MemberReader(const ArFile& ar) : remaining_(ar.contents()) {}
bool ReadMember(MemberFile* file);
bool IsEof() const { return remaining_.size() == 0; }
private:
string_view Consume(size_t n) {
n = (n % 2 == 0 ? n : n + 1);
if (remaining_.size() < n) {
THROW("premature end of file");
}
string_view ret = remaining_.substr(0, n);
remaining_.remove_prefix(n);
return ret;
}
string_view long_filenames_;
string_view remaining_;
};
private:
const string_view magic_;
const string_view contents_;
static constexpr const char* kMagic = "!<arch>\n";
static constexpr int kMagicSize = 8;
};
bool ArFile::MemberReader::ReadMember(MemberFile* file) {
struct Header {
char file_id[16];
char modified_timestamp[12];
char owner_id[6];
char group_id[6];
char mode[8];
char size[10];
char end[2];
};
if (remaining_.size() == 0) {
return false;
} else if (remaining_.size() < sizeof(Header)) {
THROW("Premature EOF in AR data");
}
const Header* header = reinterpret_cast<const Header*>(remaining_.data());
file->header = Consume(sizeof(Header));
string_view file_id(&header->file_id[0], sizeof(header->file_id));
string_view size_str(&header->size[0], sizeof(header->size));
file->size = StringViewToSize(size_str);
file->contents = Consume(file->size);
file->file_type = MemberFile::kNormal;
if (file_id[0] == '/') {
// Special filename, internal to the format.
if (file_id[1] == ' ') {
file->file_type = MemberFile::kSymbolTable;
} else if (file_id[1] == '/') {
file->file_type = MemberFile::kLongFilenameTable;
long_filenames_ = file->contents;
} else if (isdigit(file_id[1])) {
size_t offset = StringViewToSize(file_id.substr(1));
size_t end = long_filenames_.find('/', offset);
if (end == std::string::npos) {
THROW("Unterminated long filename");
}
file->filename = long_filenames_.substr(offset, end - offset);
} else {
THROW("Unexpected special filename in AR archive");
}
} else {
// Normal filename, slash-terminated.
size_t slash = file_id.find('/');
if (slash == std::string::npos) {
THROW("BSD-style AR not yet implemented");
}
file->filename = file_id.substr(0, slash);
}
return true;
}
void MaybeAddFileRange(const char* analyzer, RangeSink* sink, string_view label,
string_view range) {
if (sink) {
sink->AddFileRange(analyzer, label, range);
}
}
// Iterate over each ELF file, agnostic to whether it is inside a .a (AR) file
// or not.
template <class Func>
void ForEachElf(const InputFile& file, RangeSink* sink, Func func) {
ArFile ar_file(file.data());
uint64_t index_base = 0;
if (ar_file.IsOpen()) {
ArFile::MemberFile member;
ArFile::MemberReader reader(ar_file);
MaybeAddFileRange("ar_archive", sink, "[AR Headers]", ar_file.magic());
while (reader.ReadMember(&member)) {
MaybeAddFileRange("ar_archive", sink, "[AR Headers]", member.header);
switch (member.file_type) {
case ArFile::MemberFile::kNormal: {
ElfFile elf(member.contents);
if (elf.IsOpen()) {
func(elf, member.filename, index_base);
index_base += elf.section_count();
} else {
MaybeAddFileRange("ar_archive", sink, "[AR Non-ELF Member File]",
member.contents);
}
break;
}
case ArFile::MemberFile::kSymbolTable:
MaybeAddFileRange("ar_archive", sink, "[AR Symbol Table]",
member.contents);
break;
case ArFile::MemberFile::kLongFilenameTable:
MaybeAddFileRange("ar_archive", sink, "[AR Headers]",
member.contents);
break;
}
}
} else {
ElfFile elf(file.data());
if (!elf.IsOpen()) {
THROWF("Not an ELF or Archive file: $0", file.filename());
}
func(elf, file.filename(), index_base);
}
}
// For object files, addresses are relative to the section they live in, which
// is indicated by ndx. We split this into:
//
// - 24 bits for index (up to 16M symbols with -ffunction-sections)
// - 40 bits for address (up to 1TB section)
static uint64_t ToVMAddr(uint64_t addr, uint64_t ndx, bool is_object) {
if (is_object) {
if (ndx >= 1 << 24) {
THROW("ndx overflow: too many sections");
}
if (addr >= ((uint64_t)1) << 40) {
THROW("address overflow: section too big");
}
return (ndx << 40) | addr;
} else {
return addr;
}
}
static bool IsArchiveFile(string_view data) {
ArFile ar(data);
return ar.IsOpen();
}
static bool IsObjectFile(string_view data) {
ElfFile elf(data);
return IsArchiveFile(data) || (elf.IsOpen() && elf.header().e_type == ET_REL);
}
static void CheckNotObject(const char* source, RangeSink* sink) {
if (IsObjectFile(sink->input_file().data())) {
THROWF(
"can't use data source '$0' on object files (only binaries and shared "
"libraries)",
source);
}
}
static bool ElfMachineToCapstone(Elf64_Half e_machine, cs_arch* arch,
cs_mode* mode) {
switch (e_machine) {
case EM_386:
*arch = CS_ARCH_X86;
*mode = CS_MODE_32;
return true;
case EM_X86_64:
*arch = CS_ARCH_X86;
*mode = CS_MODE_64;
return true;
// These aren't tested, but we include them on the off-chance
// that it will work.
case EM_ARM:
*arch = CS_ARCH_ARM;
*mode = CS_MODE_LITTLE_ENDIAN;
return true;
case EM_AARCH64:
*arch = CS_ARCH_ARM64;
*mode = CS_MODE_ARM;
return true;
case EM_MIPS:
*arch = CS_ARCH_MIPS;
return true;
case EM_PPC:
*arch = CS_ARCH_PPC;
*mode = CS_MODE_32;
return true;
case EM_PPC64:
*arch = CS_ARCH_PPC;
*mode = CS_MODE_64;
return true;
case EM_SPARC:
*arch = CS_ARCH_SPARC;
*mode = CS_MODE_BIG_ENDIAN;
return true;
case EM_SPARCV9:
*arch = CS_ARCH_SPARC;
*mode = CS_MODE_V9;
return true;
default:
if (verbose_level > 1) {
printf(
"Unable to map to capstone target, disassembly will be "
"unavailable");
}
return false;
}
}
static bool ReadElfArchMode(const InputFile& file, cs_arch* arch, cs_mode* mode) {
bool capstone_available = true;
ForEachElf(file, nullptr,
[&capstone_available, arch, mode](const ElfFile& elf,
string_view /*filename*/,
uint32_t /*index_base*/) {
// Last .o file wins? (For .a files)? It's kind of arbitrary,
// but a single .a file shouldn't have multiple archs in it.
capstone_available &=
ElfMachineToCapstone(elf.header().e_machine, arch, mode);
});
return capstone_available;
}
static void ReadELFSymbols(const InputFile& file, RangeSink* sink,
SymbolTable* table, bool disassemble) {
bool is_object = IsObjectFile(file.data());
DisassemblyInfo info;
DisassemblyInfo* infop = &info;
bool capstone_available = ReadElfArchMode(file, &info.arch, &info.mode);
ForEachElf(
file, sink,
[=](const ElfFile& elf, string_view /*filename*/, uint64_t index_base) {
for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
ElfFile::Section section;
elf.ReadSection(i, &section);
if (section.header().sh_type != SHT_SYMTAB) {
continue;
}
Elf64_Word symbol_count = section.GetEntryCount();
// Find the corresponding section where the strings for the symbol
// table can be found.
ElfFile::Section strtab_section;
elf.ReadSection(section.header().sh_link, &strtab_section);
if (strtab_section.header().sh_type != SHT_STRTAB) {
THROW("symtab section pointed to non-strtab section");
}
for (Elf64_Word i = 1; i < symbol_count; i++) {
Elf64_Sym sym;
section.ReadSymbol(i, &sym, nullptr);
if (ELF64_ST_TYPE(sym.st_info) == STT_SECTION) {
continue;
}
if (sym.st_shndx == STN_UNDEF) {
continue;
}
if (sym.st_size == 0) {
// Maybe try to refine? See ReadELFSectionsRefineSymbols below.
continue;
}
string_view name = strtab_section.ReadString(sym.st_name);
uint64_t full_addr =
ToVMAddr(sym.st_value, index_base + sym.st_shndx, is_object);
if (sink && !(capstone_available && disassemble)) {
// Checks for a negative number in two's complement
if (sym.st_size > 0x7fffffffffffffffULL) {
fprintf(stderr, "Invalid symbol size at 0x%" PRIx64 \
", size: 0x%" PRIx64 ", shndx: %d, name: %.*s\n",
full_addr, sym.st_size, sym.st_shndx,
static_cast<int>(name.size()), name.data());
} else {
sink->AddVMRangeAllowAlias(
"elf_symbols", full_addr, sym.st_size,
ItaniumDemangle(name, sink->data_source()));
}
}
if (table) {
table->insert(
std::make_pair(name, std::make_pair(full_addr, sym.st_size)));
}
if (capstone_available && disassemble &&
ELF64_ST_TYPE(sym.st_info) == STT_FUNC) {
if (verbose_level > 1) {
printf("Disassembling function: %s\n", name.data());
}
// TODO(brandonvu) Continue if VM pointer cannot be translated. Issue #315
uint64_t unused;
if (!sink->Translator()->vm_map.Translate(full_addr, &unused)) {
WARN("Can't translate VM pointer ($0) to file", full_addr);
continue;
}
infop->text = sink->TranslateVMToFile(full_addr).substr(0, sym.st_size);
infop->start_address = full_addr;
DisassembleFindReferences(*infop, sink);
}
}
}
});
}
static void ReadELFSymbolTableEntries(const ElfFile& elf,
const ElfFile::Section& section,
uint64_t index_base, bool is_object,
RangeSink* sink) {
Elf64_Word symbol_count = section.GetEntryCount();
// Find the corresponding section where the strings for the symbol
// table can be found.
ElfFile::Section strtab_section;
elf.ReadSection(section.header().sh_link, &strtab_section);
if (strtab_section.header().sh_type != SHT_STRTAB) {
THROW("symtab section pointed to non-strtab section");
}
for (Elf64_Word i = 1; i < symbol_count; i++) {
Elf64_Sym sym;
string_view sym_range;
section.ReadSymbol(i, &sym, &sym_range);
if (ELF64_ST_TYPE(sym.st_info) == STT_SECTION ||
sym.st_shndx == STN_UNDEF ||
sym.st_name == SHN_UNDEF) {
continue;
}
string_view name = strtab_section.ReadString(sym.st_name);
uint64_t full_addr =
ToVMAddr(sym.st_value, index_base + sym.st_shndx, is_object);
// Capture the trailing NULL.
name = string_view(name.data(), name.size() + 1);
sink->AddFileRangeForVMAddr("elf_symtab_name", full_addr, name);
sink->AddFileRangeForVMAddr("elf_symtab_sym", full_addr, sym_range);
}
}
static void ReadELFRelaEntries(const ElfFile::Section& section,
uint64_t index_base, bool is_object,
RangeSink* sink) {
Elf64_Word rela_count = section.GetEntryCount();
Elf64_Word sh_info = section.header().sh_info;
for (Elf64_Word i = 1; i < rela_count; i++) {
Elf64_Rela rela;
string_view rela_range;
section.ReadRelocationWithAddend(i, &rela, &rela_range);
uint64_t full_addr =
ToVMAddr(rela.r_offset, index_base + sh_info, is_object);
sink->AddFileRangeForVMAddr("elf_rela", full_addr, rela_range);
}
}
// Adds file ranges for the symbol tables and string tables *themselves* (ie.
// the space that the symtab/strtab take up in the file). This will cover
// .symtab
// .strtab
// .dynsym
// .dynstr
static void ReadELFTables(const InputFile& file, RangeSink* sink) {
bool is_object = IsObjectFile(file.data());
// Disassemble first, because sometimes other tables will refer to things we
// discovered through disassembling.
ReadELFSymbols(file, sink, nullptr, true);
// Now scan other tables.
ForEachElf(file, sink,
[sink, is_object](const ElfFile& elf, string_view /*filename*/,
uint32_t index_base) {
for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
ElfFile::Section section;
elf.ReadSection(i, &section);
switch (section.header().sh_type) {
case SHT_SYMTAB:
case SHT_DYNSYM:
ReadELFSymbolTableEntries(elf, section, index_base,
is_object, sink);
break;
case SHT_RELA:
ReadELFRelaEntries(section, index_base, is_object, sink);
break;
}
// We are looking by section name, which is a little different
// than what the loader actually does (which is find
// eh_frame_hdr from the program headers and then find eh_frame
// fde entries from there). But these section names should be
// standard enough that this approach works also.
if (section.GetName() == ".eh_frame") {
ReadEhFrame(section.contents(), sink);
} else if (section.GetName() == ".eh_frame_hdr") {
ReadEhFrameHdr(section.contents(), sink);
}
}
});
}
enum ReportSectionsBy {
kReportBySectionName,
kReportByEscapedSectionName,
kReportByFlags,
kReportByArchiveMember,
};
static void DoReadELFSections(RangeSink* sink, enum ReportSectionsBy report_by) {
bool is_object = IsObjectFile(sink->input_file().data());
ForEachElf(
sink->input_file(), sink,
[=](const ElfFile& elf, string_view filename, uint32_t index_base) {
std::string name_from_flags;
for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
ElfFile::Section section;
elf.ReadSection(i, &section);
string_view name = section.GetName();
if (name.size() == 0) {
return;
}
const auto& header = section.header();
auto addr = header.sh_addr;
auto size = header.sh_size;
auto filesize = (header.sh_type == SHT_NOBITS) ? 0 : size;
auto vmsize = (header.sh_flags & SHF_ALLOC) ? size : 0;
string_view contents = StrictSubstr(section.contents(), 0, filesize);
uint64_t full_addr = ToVMAddr(addr, index_base + i, is_object);
if (report_by == kReportByFlags) {
name_from_flags = std::string(name);
name_from_flags = "Section [";
if (header.sh_flags & SHF_ALLOC) {
name_from_flags += 'A';
}
if (header.sh_flags & SHF_WRITE) {
name_from_flags += 'W';
}
if (header.sh_flags & SHF_EXECINSTR) {
name_from_flags += 'X';
}
name_from_flags += ']';
sink->AddRange("elf_section", name_from_flags, full_addr, vmsize,
contents);
} else if (report_by == kReportBySectionName) {
sink->AddRange("elf_section", name, full_addr, vmsize, contents);
} else if (report_by == kReportByEscapedSectionName) {
sink->AddRange("elf_section",
std::string("[section ") + std::string(name) + "]",
full_addr, vmsize, contents);
} else if (report_by == kReportByArchiveMember) {
sink->AddRange("elf_section", filename, full_addr, vmsize,
contents);
}
}
if (report_by == kReportByArchiveMember) {
// Cover unmapped parts of the file.
sink->AddFileRange("unmapped_armember", filename, elf.entire_file());
}
});
}
enum ReportSegmentsBy {
kReportBySegmentName,
kReportByEscapedSegmentName,
};
std::string GetSegmentName(const ElfFile::Segment& segment, Elf64_Xword i,
ReportSegmentsBy report_by) {
const auto& header = segment.header();
// Include the segment index in the label, to support embedded.
//
// Including the index in the segment label differentiates
// segments with the same access control (e.g. RWX vs RW). In
// ELF files built for embedded microcontroller projects, a
// segment is used for each distinct type of memory. In simple
// cases, there is a segment for the flash (which will store
// code and read-only data) and a segment for RAM (which
// usually stores globals, stacks, and maybe a heap). In more
// involved projects, there may be special segments for faster
// RAM (e.g. core coupled RAM or CCRAM), or there may even be
// memory overlays to support manual paging of code from flash
// (which may be slow) into RAM.
std::string name(absl::StrCat("LOAD #", i, " ["));
if (header.p_flags & PF_R) {
name += 'R';
}
if (header.p_flags & PF_W) {
name += 'W';
}
if (header.p_flags & PF_X) {
name += 'X';
}
name += ']';
if (report_by == kReportByEscapedSegmentName) {
return absl::StrCat("[", name, "]");
} else {
return name;
}
}
static void DoReadELFSegments(RangeSink* sink, ReportSegmentsBy report_by) {
ForEachElf(sink->input_file(), sink,
[=](const ElfFile& elf, string_view /*filename*/,
uint32_t /*index_base*/) {
for (Elf64_Xword i = 0; i < elf.header().e_phnum; i++) {
ElfFile::Segment segment;
elf.ReadSegment(i, &segment);
std::string name = GetSegmentName(segment, i, report_by);
if (segment.header().p_type != PT_LOAD) {
continue;
}
sink->AddRange("elf_segment", name, segment.header().p_vaddr,
segment.header().p_memsz, segment.contents());
}
});
ForEachElf(sink->input_file(), sink,
[=](const ElfFile& elf, string_view /*filename*/,
uint32_t /*index_base*/) {
for (Elf64_Xword i = 0; i < elf.header().e_phnum; i++) {
ElfFile::Segment segment;
elf.ReadSegment(i, &segment);
const auto& header = segment.header();
if (header.p_type != PT_TLS) continue;
std::string name = "TLS";
sink->AddRange("elf_segment", "TLS", header.p_vaddr,
header.p_memsz, segment.contents());
}
});
}
static void ReadELFSegments(RangeSink* sink) {
if (IsObjectFile(sink->input_file().data())) {
// Object files don't actually have segments. But we can cheat a little bit
// and make up "segments" based on section flags. This can be really useful
// when you are compiling with -ffunction-sections and -fdata-sections,
// because in those cases the actual "sections" report becomes pretty
// useless (since every function/data has its own section, it's like the
// "symbols" report except less readable).
DoReadELFSections(sink, kReportByFlags);
} else {
DoReadELFSegments(sink, kReportBySegmentName);
}
}
// ELF files put debug info directly into the binary, so we call the DWARF
// reader directly on them. At the moment we don't attempt to make these
// work with object files.
void ReadDWARFSections(const InputFile &file, dwarf::File *dwarf,
RangeSink *sink) {
ElfFile elf(file.data());
assert(elf.IsOpen());
dwarf->file = &file;
dwarf->open = &ReadDWARFSections;
for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
ElfFile::Section section;
elf.ReadSection(i, &section);
string_view name = section.GetName();
string_view contents = section.contents();
uint64_t uncompressed_size = 0;
if (section.header().sh_flags & SHF_COMPRESSED) {
// Standard ELF section compression, produced when you link with
// --compress-debug-sections=zlib-gabi
Elf64_Chdr chdr;
absl::string_view range;
elf.ReadStruct<Elf32_Chdr>(contents, 0, ChdrMunger(), &range, &chdr);
if (chdr.ch_type != ELFCOMPRESS_ZLIB) {
// Unknown compression format.
continue;
}
uncompressed_size = chdr.ch_size;
contents.remove_prefix(range.size());
}
if (name.find(".debug_") == 0) {
name.remove_prefix(string_view(".debug_").size());
} else if (name.find(".zdebug_") == 0) {
// GNU format compressed debug info, produced when you link with
// --compress-debug-sections=zlib-gnu
name.remove_prefix(string_view(".zdebug_").size());
if (ReadBytes(4, &contents) != "ZLIB") {
continue; // Bad compression header.
}
uncompressed_size = ReadBigEndian<uint64_t>(&contents);
}
static constexpr string_view dwo_str(".dwo");
if (name.size() >= dwo_str.size() &&
name.rfind(".dwo") == name.size() - dwo_str.size()) {
name.remove_suffix(dwo_str.size());
}
if (string_view* member = dwarf->GetFieldByName(name)) {
if (uncompressed_size) {
*member = sink->ZlibDecompress(contents, uncompressed_size);
} else {
*member = section.contents();
}
}
}
}
void AddCatchAll(RangeSink* sink) {
// The last-line fallback to make sure we cover the entire VM space.
if (sink->IsBaseMap() || sink->data_source() != DataSource::kSegments) {
if (!sink->IsBaseMap()) {
DoReadELFSections(sink, kReportByEscapedSectionName);
}
ForEachElf(sink->input_file(), sink,
[sink](const ElfFile& elf, string_view /*filename*/,
uint32_t /*index_base*/) {
sink->AddFileRange("elf_catchall", "[ELF Header]",
elf.header_region());
sink->AddFileRange("elf_catchall", "[ELF Section Headers]",
elf.section_headers());
sink->AddFileRange("elf_catchall", "[ELF Program Headers]",
elf.segment_headers());
});
}
DoReadELFSegments(sink, kReportByEscapedSegmentName);
// The last-line fallback to make sure we cover the entire file.
sink->AddFileRange("elf_catchall", "[Unmapped]", sink->input_file().data());
}
class ElfObjectFile : public ObjectFile {
public:
ElfObjectFile(std::unique_ptr<InputFile> file, std::optional<std::string> link_map_file)
: ObjectFile(std::move(file)) {
if (link_map_file.has_value()) {
std::ifstream infile(*link_map_file);
std::string link_map;
// Strip comments and empty lines.
for (std::string line; getline(infile, line);) {
if (line.empty()) continue;
if (line[0] == '#') continue;
link_map += line;
link_map += '\n';
}
absl::StripLeadingAsciiWhitespace(&link_map);
absl::StripTrailingAsciiWhitespace(&link_map);
link_map_symbols_ = bloaty_link_map::ParseLldLinkMap(link_map);
link_map_sections_ = bloaty_link_map::ParseLldLinkMapSections(link_map);
}
}
std::string GetBuildId() const override {
if (IsObjectFile(file_data().data())) {
// Object files don't have a build ID.
return std::string();
}
ElfFile elf(file_data().data());
assert(elf.IsOpen());
// Search for a build-id section.
for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
ElfFile::Section section;
elf.ReadSection(i, &section);
if (section.header().sh_type != SHT_NOTE) {
continue;
}
for (ElfFile::NoteIter notes(section); !notes.IsDone(); notes.Next()) {
if (notes.name() == "GNU" && notes.type() == NT_GNU_BUILD_ID) {
return std::string(notes.descriptor());
}
}
}
// Search for a build-id segment.
for (Elf64_Xword i = 0; i < elf.header().e_phnum; i++) {
ElfFile::Segment segment;
elf.ReadSegment(i, &segment);
const auto &header = segment.header();
if (header.p_type != PT_NOTE) {
continue;
}
for (ElfFile::NoteIter notes(segment, &elf); !notes.IsDone(); notes.Next()) {
if (notes.name() == "GNU" && notes.type() == NT_GNU_BUILD_ID) {
return std::string(notes.descriptor());
}
}
}
// No build id section found.
return std::string();
}
void ReadAccessPattern(RangeSink* sink) const {
if (!sink->options().has_cold_bytes_filter()) {
THROW("need to specify cold bytes filter");
}
// Each element corresponds to |kAccessPatternFrameSize| bytes.
std::vector<bool> access_pattern;
uint64_t kAccessPatternFrameSize = sink->options().access_pattern_frame_size();
auto frequencies = sink->options().cold_bytes_filter();
size_t file_size = sink->input_file().data().size();
size_t num_frames = (file_size + kAccessPatternFrameSize - 1) / kAccessPatternFrameSize;
access_pattern.reserve(num_frames);
for (size_t i = 0; i < num_frames; i ++) {
access_pattern.push_back(false);
}
std::vector<std::string> frequencies_vec = absl::StrSplit(frequencies, ',');
for (const auto& part : frequencies_vec) {
std::vector<std::string> frame_and_count = absl::StrSplit(part, ':');
if (frame_and_count.size() != 2) {
THROWF("Invalid format in cold bytes filter: $0", part);
}
size_t frame = std::stoi(frame_and_count[0]);
size_t count = std::stoi(frame_and_count[1]);
if (frame >= access_pattern.size()) {
THROW("access pattern exceeded end of file");
}
if (count > 0) {
access_pattern[frame] = true;
}
}
for (size_t i = 0; i < access_pattern.size(); i++) {
std::string label = access_pattern[i] ? "Hot" : "Cold";
size_t length;
size_t file_size = sink->input_file().data().size();
if (i * kAccessPatternFrameSize > file_size) {
THROW("access pattern exceeded end of file");
}
if (i * kAccessPatternFrameSize + kAccessPatternFrameSize > file_size) {
// We're at the last frame in the ELF, and it is not fully 32 KiB.
length = file_size % kAccessPatternFrameSize;
} else {
length = kAccessPatternFrameSize;
}
sink->AddFileRange("access_pattern", label,
i * kAccessPatternFrameSize, length);
}
}
void ReadLinkMapSymbols(RangeSink* sink) const {
if (!link_map_symbols_.has_value()) return;
const auto& symbols = *link_map_symbols_;
for (const auto& symbol : symbols) {
auto maybe_transformed_compile_unit =
bloaty_link_map::TransformCompileUnitForFuchsia(symbol.compile_unit);
auto demangled = ItaniumDemangle(symbol.name, sink->data_source());
if (maybe_transformed_compile_unit.has_value()) {
auto [transformed_compile_unit, maybe_rust_crate] =
*maybe_transformed_compile_unit;
if (maybe_rust_crate.has_value()) {
auto symbol_with_crate_id =
EncodeSymbolWithCrateId(demangled, *maybe_rust_crate);
sink->AddVMRange("link_map", symbol.addr, symbol.size,
symbol_with_crate_id);
continue;
}
}
sink->AddVMRange("link_map", symbol.addr, symbol.size, demangled);
}
if (!link_map_sections_.has_value()) return;
const auto& sections = *link_map_sections_;
for (const auto& section : sections) {
sink->AddVMRange("link_map", section.addr, section.size,
"[section " + section.name + "]");
}
}
void ReadLinkMapCompileUnits(RangeSink* sink) const {
if (!link_map_symbols_.has_value()) return;
const auto& symbols = *link_map_symbols_;
for (const auto& symbol : symbols) {
auto maybe_transformed_compile_unit =
bloaty_link_map::TransformCompileUnitForFuchsia(symbol.compile_unit);
if (maybe_transformed_compile_unit.has_value()) {
auto [transformed_compile_unit, maybe_rust_crate] =
*maybe_transformed_compile_unit;
sink->AddVMRange("link_map", symbol.addr, symbol.size,
transformed_compile_unit);
}
}
if (!link_map_sections_.has_value()) return;
const auto& sections = *link_map_sections_;
for (const auto& section : sections) {
sink->AddVMRange("link_map", section.addr, section.size,
"[section " + section.name + "]");
}
}
void ProcessFile(const std::vector<RangeSink*>& sinks) const override {
for (auto sink : sinks) {
if (verbose_level > 1) {
printf("Scanning source %d\n", (int)sink->data_source());
}
switch (sink->data_source()) {
case DataSource::kSegments:
ReadELFSegments(sink);
break;
case DataSource::kSections:
DoReadELFSections(sink, kReportBySectionName);
break;
case DataSource::kRawSymbols:
case DataSource::kShortSymbols:
case DataSource::kFullSymbols:
ReadLinkMapSymbols(sink);
ReadELFSymbols(debug_file().file_data(), sink, nullptr, false);
break;
case DataSource::kArchiveMembers:
DoReadELFSections(sink, kReportByArchiveMember);
break;
case DataSource::kAccessPattern: {
ReadAccessPattern(sink);
break;
}
case DataSource::kCompileUnits: {
CheckNotObject("compileunits", sink);
SymbolTable symtab;
DualMap symbol_map;
NameMunger empty_munger;
RangeSink symbol_sink(&debug_file().file_data(),
sink->options(),
DataSource::kRawSymbols,
&sinks[0]->MapAtIndex(0), nullptr);
symbol_sink.AddOutput(&symbol_map, &empty_munger);
ReadELFSymbols(debug_file().file_data(), &symbol_sink, &symtab,
false);
dwarf::File dwarf;
ReadDWARFSections(debug_file().file_data(), &dwarf, sink);
ReadDWARFCompileUnits(dwarf, symbol_map, sink);
ReadLinkMapCompileUnits(sink);
break;
}
case DataSource::kInlines: {
CheckNotObject("lineinfo", sink);
dwarf::File dwarf;
ReadDWARFSections(debug_file().file_data(), &dwarf, sink);
ReadDWARFInlines(dwarf, sink, true);
DoReadELFSections(sink, kReportByEscapedSectionName);
break;
}
default:
THROW("unknown data source");
}
switch (sink->data_source()) {
case DataSource::kSegments:
case DataSource::kSections:
case DataSource::kArchiveMembers:
case DataSource::kAccessPattern:
break;
default:
// Add these *after* processing all other data sources.
ReadELFTables(sink->input_file(), sink);
break;
}
AddCatchAll(sink);
}
}
bool GetDisassemblyInfo(const absl::string_view symbol,
DataSource symbol_source,
DisassemblyInfo* info) const override {
return DoGetDisassemblyInfo(&symbol, symbol_source, info);
}
bool DoGetDisassemblyInfo(const absl::string_view* symbol,
DataSource symbol_source,
DisassemblyInfo* info) const {
// Find the corresponding file range. This also could be optimized not to
// build the entire map.
DualMap base_map;
NameMunger empty_munger;
RangeSink base_sink(&file_data(), bloaty::Options(), DataSource::kSegments,
nullptr, nullptr);
base_sink.AddOutput(&base_map, &empty_munger);
std::vector<RangeSink*> sink_ptrs{&base_sink};
ProcessFile(sink_ptrs);
// Could optimize this not to build the whole table if necessary.
SymbolTable symbol_table;
RangeSink symbol_sink(&file_data(), bloaty::Options(), symbol_source,
&base_map, nullptr);
symbol_sink.AddOutput(&info->symbol_map, &empty_munger);
ReadELFSymbols(debug_file().file_data(), &symbol_sink, &symbol_table,
false);
if (symbol) {
auto entry = symbol_table.find(*symbol);
if (entry == symbol_table.end()) {
entry = symbol_table.find(ItaniumDemangle(*symbol, symbol_source));
if (entry == symbol_table.end()) {
return false;
}
}
uint64_t vmaddr = entry->second.first;
uint64_t size = entry->second.second;
// TODO(haberman); Add PLT entries to symbol map, so call <plt stub> gets
// symbolized.
uint64_t fileoff;
if (!base_map.vm_map.Translate(vmaddr, &fileoff)) {
THROWF("Couldn't translate VM address for function $0", symbol);
}
info->text = StrictSubstr(file_data().data(), fileoff, size);
info->start_address = vmaddr;
}
return ReadElfArchMode(file_data(), &info->arch, &info->mode);
}
private:
std::optional<std::vector<bloaty_link_map::Symbol>> link_map_symbols_ = std::nullopt;
std::optional<std::vector<bloaty_link_map::Section>> link_map_sections_ = std::nullopt;
};
} // namespace
std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file,
std::optional<std::string> link_map_file) {
ElfFile elf(file->data());
ArFile ar(file->data());
if (elf.IsOpen() || ar.IsOpen()) {
if (link_map_file.has_value()) {
std::cerr << "Using link map: " << *link_map_file << std::endl;
}
return std::unique_ptr<ObjectFile>(new ElfObjectFile(std::move(file), link_map_file));
} else {
return nullptr;
}
// A few functions that have been defined but are not yet used.
(void)&ElfFile::FindSectionByName;
(void)&ElfFile::Section::ReadRelocation;
}
} // namespace bloaty