Merge branch 'dataquality' of github.com:haberman/bloaty into dataquality
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e381e3e..abc1ef0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -40,11 +40,15 @@
# Baseline build flags.
set(CMAKE_CXX_FLAGS "-std=c++11 -W -Wall -Wno-sign-compare")
-set(CMAKE_EXE_LINKER_FLAGS "-Wl,--build-id")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-O2")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g")
+if(APPLE)
+elseif(UNIX)
+ set(CMAKE_EXE_LINKER_FLAGS "-Wl,--build-id")
+endif()
+
# When using Ninja, compiler output won't be colorized without this.
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG(-fdiagnostics-color=always SUPPORTS_COLOR_ALWAYS)
diff --git a/src/elf.cc b/src/elf.cc
index 9235fa9..94ce2ad 100644
--- a/src/elf.cc
+++ b/src/elf.cc
@@ -41,6 +41,8 @@
#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
#define WARN(x) fprintf(stderr, "bloaty: %s\n", x);
+namespace bloaty {
+
namespace {
uint64_t CheckedAdd(uint64_t a, uint64_t b) {
@@ -61,12 +63,6 @@
return static_cast<uint64_t>(c);
}
-}
-
-namespace bloaty {
-
-namespace {
-
struct ByteSwapFunc {
template <class T>
T operator()(T val) {
@@ -1215,8 +1211,6 @@
sink->AddFileRange("elf_catchall", "[Unmapped]", sink->input_file().data());
}
-} // namespace
-
class ElfObjectFile : public ObjectFile {
public:
ElfObjectFile(std::unique_ptr<InputFile> file)
@@ -1371,6 +1365,8 @@
}
};
+} // namespace
+
std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file) {
ElfFile elf(file->data());
ArFile ar(file->data());
diff --git a/src/macho.cc b/src/macho.cc
index 79c9758..68729e9 100644
--- a/src/macho.cc
+++ b/src/macho.cc
@@ -38,6 +38,9 @@
#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
#define WARN(x) fprintf(stderr, "bloaty: %s\n", x);
+namespace bloaty {
+namespace macho {
+
// segname (& sectname) may NOT be NULL-terminated,
// i.e. can use up all 16 chars, e.g. '__gcc_except_tab' (no '\0'!)
// hence specifying size when constructing std::string
@@ -62,8 +65,6 @@
return data.substr(off, n);
}
-namespace bloaty {
-
uint32_t ReadMagic(string_view data) {
if (data.size() < sizeof(uint32_t)) {
THROW("Malformed Mach-O file");
@@ -93,9 +94,127 @@
return ret;
}
+void MaybeAddOverhead(RangeSink* sink, const char* label, string_view data) {
+ if (sink) {
+ sink->AddFileRange("macho_overhead", label, data);
+ }
+}
+
+template <class Struct, class Func>
+void ParseMachOHeaderImpl(string_view macho_data, RangeSink* overhead_sink,
+ Func&& loadcmd_func) {
+ string_view header_data = macho_data;
+ auto header = GetStructPointerAndAdvance<Struct>(&header_data);
+ MaybeAddOverhead(overhead_sink,
+ "[Mach-O Headers]",
+ macho_data.substr(0, sizeof(Struct)));
+ uint32_t ncmds = header->ncmds;
+
+ for (uint32_t i = 0; i < ncmds; i++) {
+ auto command = GetStructPointer<load_command>(header_data);
+ string_view command_data = StrictSubstr(header_data, 0, command->cmdsize);
+ std::forward<Func>(loadcmd_func)(command->cmd, command_data, macho_data);
+ MaybeAddOverhead(overhead_sink, "[Mach-O Headers]", command_data);
+ header_data = header_data.substr(command->cmdsize);
+ }
+}
+
+template <class Func>
+void ParseMachOHeader(string_view macho_file, RangeSink* overhead_sink,
+ Func&& loadcmd_func) {
+ uint32_t magic = ReadMagic(macho_file);
+ switch (magic) {
+ case MH_MAGIC:
+ // We don't expect to see many 32-bit binaries out in the wild.
+ // Apple is aggressively phasing out support for 32-bit binaries:
+ // https://www.macrumors.com/2017/06/06/apple-to-phase-out-32-bit-mac-apps/
+ //
+ // Still, you can build 32-bit binaries as of this writing, and
+ // there are existing 32-bit binaries floating around, so we might
+ // as well support them.
+ ParseMachOHeaderImpl<mach_header>(macho_file, overhead_sink,
+ std::forward<Func>(loadcmd_func));
+ break;
+ case MH_MAGIC_64:
+ ParseMachOHeaderImpl<mach_header_64>(
+ macho_file, overhead_sink, std::forward<Func>(loadcmd_func));
+ break;
+ case MH_CIGAM:
+ case MH_CIGAM_64:
+ // OS X and Darwin currently only run on x86/x86-64 (little-endian
+ // platforms), so we expect basically all Mach-O files to be
+ // little-endian. Additionally, pretty much all CPU architectures
+ // are little-endian these days. ARM has the option to be
+ // big-endian, but I can't find any OS that is actually compiled to
+ // use big-endian mode. debian-mips is the only big-endian OS I can
+ // find (and maybe SPARC).
+ //
+ // All of this is to say, this case should only happen if you are
+ // running Bloaty on debian-mips. I consider that uncommon enough
+ // (and hard enough to test) that we don't support this until there
+ // is a demonstrated need.
+ THROW("We don't support cross-endian Mach-O files.");
+ default:
+ THROW("Corrupt Mach-O file");
+ }
+}
+
+template <class Func>
+void ParseFatHeader(string_view fat_file, RangeSink* overhead_sink,
+ Func&& loadcmd_func) {
+ string_view header_data = fat_file;
+ auto header = GetStructPointerAndAdvance<fat_header>(&header_data);
+ MaybeAddOverhead(overhead_sink, "[Mach-O Headers]",
+ fat_file.substr(0, sizeof(fat_header)));
+ assert(ByteSwap(header->magic) == FAT_MAGIC);
+ uint32_t nfat_arch = ByteSwap(header->nfat_arch);
+ for (uint32_t i = 0; i < nfat_arch; i++) {
+ auto arch = GetStructPointerAndAdvance<fat_arch>(&header_data);
+ string_view macho_data = StrictSubstr(
+ fat_file, ByteSwap(arch->offset), ByteSwap(arch->size));
+ ParseMachOHeader(macho_data, overhead_sink,
+ std::forward<Func>(loadcmd_func));
+ }
+}
+
+template <class Func>
+void ForEachLoadCommand(string_view maybe_fat_file, RangeSink* overhead_sink,
+ Func&& loadcmd_func) {
+ uint32_t magic = ReadMagic(maybe_fat_file);
+ switch (magic) {
+ case MH_MAGIC:
+ case MH_MAGIC_64:
+ case MH_CIGAM:
+ case MH_CIGAM_64:
+ ParseMachOHeader(maybe_fat_file, overhead_sink,
+ std::forward<Func>(loadcmd_func));
+ break;
+ case FAT_CIGAM:
+ ParseFatHeader(maybe_fat_file, overhead_sink,
+ std::forward<Func>(loadcmd_func));
+ break;
+ }
+}
+
template <class Segment, class Section>
-void ParseMachOSegment(string_view command_data, string_view file_data,
- RangeSink* sink) {
+void AddSegmentAsFallback(string_view command_data, string_view file_data,
+ RangeSink* sink) {
+ auto segment = GetStructPointerAndAdvance<Segment>(&command_data);
+
+ if (segment->maxprot == VM_PROT_NONE) {
+ return;
+ }
+
+ string_view segname = ArrayToStr(segment->segname, 16);
+
+ sink->AddRange("macho_segment", "[" + std::string(segname) + "]",
+ segment->vmaddr, segment->vmsize,
+ StrictSubstr(file_data, segment->fileoff, segment->filesize));
+}
+
+template <class Segment, class Section>
+void ParseSegment(string_view command_data, string_view file_data,
+ RangeSink* sink) {
auto segment = GetStructPointerAndAdvance<Segment>(&command_data);
if (segment->maxprot == VM_PROT_NONE) {
@@ -135,8 +254,8 @@
}
}
-static void ParseMachODyldInfo(string_view command_data, string_view file_data,
- RangeSink* sink) {
+static void ParseDyldInfo(string_view command_data, string_view file_data,
+ RangeSink* sink) {
auto info = GetStructPointer<dyld_info_command>(command_data);
sink->AddFileRange(
@@ -202,20 +321,19 @@
StrictSubstr(file_data, linkedit->dataoff, linkedit->datasize));
}
-void ParseMachOLoadCommand(uint32_t cmd, string_view command_data,
- string_view file_data, RangeSink* sink) {
+void ParseLoadCommand(uint32_t cmd, string_view command_data,
+ string_view file_data, RangeSink* sink) {
switch (cmd) {
case LC_SEGMENT_64:
- ParseMachOSegment<segment_command_64, section_64>(command_data, file_data,
- sink);
+ ParseSegment<segment_command_64, section_64>(command_data, file_data,
+ sink);
break;
case LC_SEGMENT:
- ParseMachOSegment<segment_command, section>(command_data, file_data,
- sink);
+ ParseSegment<segment_command, section>(command_data, file_data, sink);
break;
case LC_DYLD_INFO:
case LC_DYLD_INFO_ONLY:
- ParseMachODyldInfo(command_data, file_data, sink);
+ ParseDyldInfo(command_data, file_data, sink);
break;
case LC_SYMTAB:
ParseSymbolTable(command_data, file_data, sink);
@@ -246,10 +364,15 @@
}
}
-static void ParseMachOSymbols(string_view command_data, string_view file_data,
- RangeSink* sink) {
- (void)command_data;
- (void)file_data;
+void ParseLoadCommands(RangeSink* sink) {
+ ForEachLoadCommand(
+ sink->input_file().data(), sink,
+ [sink](uint32_t cmd, string_view command_data, string_view file_data) {
+ ParseLoadCommand(cmd, command_data, file_data, sink);
+ });
+}
+
+static void ParseSymbols(RangeSink* sink) {
#if 0
auto symtab = GetStructPointer<symtab_command>(command_data);
@@ -285,103 +408,21 @@
}
-template <class Struct>
-void ParseMachOHeaderImpl(string_view file_data, RangeSink* sink) {
- string_view header_data = file_data;
- auto header = GetStructPointerAndAdvance<Struct>(&header_data);
- uint32_t ncmds = header->ncmds;
-
- for (uint32_t i = 0; i < ncmds; i++) {
- auto command = GetStructPointer<load_command>(header_data);
- string_view command_data = StrictSubstr(header_data, 0, command->cmdsize);
- switch (sink->data_source()) {
- case DataSource::kSegments:
- case DataSource::kSections:
- ParseMachOLoadCommand(command->cmd, command_data, file_data, sink);
- break;
- case DataSource::kSymbols:
- case DataSource::kRawSymbols:
- case DataSource::kShortSymbols:
- case DataSource::kFullSymbols:
- if (command->cmd == LC_SYMTAB) {
- ParseMachOSymbols(command_data, file_data, sink);
- }
- break;
- default:
- THROW("Unexpected data source");
- }
- header_data = header_data.substr(command->cmdsize);
- }
-}
-
-static void ParseMachOHeader(string_view file_data, RangeSink* sink) {
- uint32_t magic = ReadMagic(file_data);
- switch (magic) {
- case MH_MAGIC:
- // We don't expect to see many 32-bit binaries out in the wild. Apple is
- // aggressively phasing out support for 32-bit binaries:
- // https://www.macrumors.com/2017/06/06/apple-to-phase-out-32-bit-mac-apps/
- //
- // Still, you can build 32-bit binaries as of this writing, and there are
- // existing 32-bit binaries floating around, so we might as well support
- // them.
- ParseMachOHeaderImpl<mach_header>(file_data, sink);
- break;
- case MH_MAGIC_64:
- ParseMachOHeaderImpl<mach_header_64>(file_data, sink);
- break;
- case MH_CIGAM:
- case MH_CIGAM_64:
- // OS X and Darwin currently only run on x86/x86-64 (little-endian
- // platforms), so we expect basically all Mach-O files to be
- // little-endian. Additionally, pretty much all CPU architectures are
- // little-endian these days. ARM has the option to be big-endian, but I
- // can't find any OS that is actually compiled to use big-endian mode.
- // debian-mips is the only big-endian OS I can find (and maybe SPARC).
- //
- // All of this is to say, this case should only happen if you are running
- // Bloaty on debian-mips. I consider that uncommon enough (and hard
- // enough to test) that we don't support this until there is a
- // demonstrated need.
- THROW("We don't support cross-endian Mach-O files.");
- default:
- THROW("Corrupt Mach-O file");
- }
-}
-
-static void ParseMachOFatHeader(string_view file_data, RangeSink* sink) {
- string_view header_data = file_data;
- auto header = GetStructPointerAndAdvance<fat_header>(&header_data);
- assert(ByteSwap(header->magic) == FAT_MAGIC);
- uint32_t nfat_arch = ByteSwap(header->nfat_arch);
- for (uint32_t i = 0; i < nfat_arch; i++) {
- auto arch = GetStructPointerAndAdvance<fat_arch>(&header_data);
- string_view arch_data =
- StrictSubstr(file_data, ByteSwap(arch->offset), ByteSwap(arch->size));
- ParseMachOHeader(arch_data, sink);
- }
-}
-
-static void ParseMachOFile(RangeSink* sink) {
- string_view file_data = sink->input_file().data();
- uint32_t magic = ReadMagic(file_data);
- switch (magic) {
- case MH_MAGIC:
- case MH_MAGIC_64:
- case MH_CIGAM:
- case MH_CIGAM_64:
- ParseMachOHeader(file_data, sink);
- break;
- case FAT_CIGAM:
- ParseMachOFatHeader(file_data, sink);
- break;
- default:
- // TODO: .a file (AR).
- THROWF("Unrecognized Mach-O file, magic: $0", magic);
- }
-}
-
static void AddMachOFallback(RangeSink* sink) {
+ ForEachLoadCommand(
+ sink->input_file().data(), sink,
+ [sink](uint32_t cmd, string_view command_data, string_view file_data) {
+ switch (cmd) {
+ case LC_SEGMENT_64:
+ AddSegmentAsFallback<segment_command_64, section_64>(
+ command_data, file_data, sink);
+ break;
+ case LC_SEGMENT:
+ AddSegmentAsFallback<segment_command, section>(command_data,
+ file_data, sink);
+ break;
+ }
+ });
sink->AddFileRange("macho_fallback", "[Unmapped]", sink->input_file().data());
}
@@ -400,11 +441,13 @@
switch (sink->data_source()) {
case DataSource::kSegments:
case DataSource::kSections:
+ ParseLoadCommands(sink);
+ break;
case DataSource::kSymbols:
case DataSource::kRawSymbols:
case DataSource::kShortSymbols:
case DataSource::kFullSymbols:
- ParseMachOFile(sink);
+ ParseSymbols(sink);
break;
case DataSource::kArchiveMembers:
case DataSource::kCompileUnits:
@@ -424,14 +467,17 @@
}
};
+} // namespace macho
+
std::unique_ptr<ObjectFile> TryOpenMachOFile(std::unique_ptr<InputFile> &file) {
- uint32_t magic = ReadMagic(file->data());
+ uint32_t magic = macho::ReadMagic(file->data());
// We only support little-endian host and little endian binaries (see
// ParseMachOHeader() for more rationale). Fat headers are always on disk as
// big-endian.
if (magic == MH_MAGIC || magic == MH_MAGIC_64 || magic == FAT_CIGAM) {
- return std::unique_ptr<ObjectFile>(new MachOObjectFile(std::move(file)));
+ return std::unique_ptr<ObjectFile>(
+ new macho::MachOObjectFile(std::move(file)));
}
return nullptr;
diff --git a/src/range_map.h b/src/range_map.h
index d6e2941..c227382 100644
--- a/src/range_map.h
+++ b/src/range_map.h
@@ -188,9 +188,6 @@
}
} else {
uint64_t ret = iter->first + iter->second.size;
- if (ret <= iter->first) {
- printf("%lx %lx %lx\n", iter->first, iter->second.size, ret);
- }
assert(ret > iter->first);
return ret;
}