Merge pull request #136 from haberman/macho
Enabled "-d compileunits" for Mach-O.
diff --git a/README.md b/README.md
index 63951a9..6d48013 100644
--- a/README.md
+++ b/README.md
@@ -329,10 +329,14 @@
so no special configuration is needed to make sure you get
them.
-TODO: Mach-O puts debug info in separate files, which are
-created using `dsymutil`. DWARF is not yet supported for
-Mach-O, but once it is then `--debug-file` will be necessary
-to help Bloaty find these separate debug files also.
+Mach-O puts debug information in separate files which you
+can create with `dsymutil`:
+
+```
+$ dsymutil bloaty
+$ strip bloaty (optional)
+$ ./bloaty -d symbols --debug-file=bloaty.dSYM/Contents/Resources/DWARF/bloaty bloaty
+```
# Configuration Files
diff --git a/src/bloaty.cc b/src/bloaty.cc
index 3ec0a88..0f09756 100644
--- a/src/bloaty.cc
+++ b/src/bloaty.cc
@@ -98,6 +98,7 @@
"raw ranges of previous data source."},
{DataSource::kSections, "sections", "object file section"},
{DataSource::kSegments, "segments", "load commands in the binary"},
+ // We require that all symbols sources are >= kSymbols.
{DataSource::kSymbols, "symbols",
"symbols from symbol table (configure demangling with --demangle)"},
{DataSource::kRawSymbols, "rawsymbols", "unmangled symbols"},
@@ -817,6 +818,8 @@
// RangeMap ////////////////////////////////////////////////////////////////////
+constexpr uint64_t RangeSink::kUnknownSize;
+
// MmapInputFile ///////////////////////////////////////////////////////////////
diff --git a/src/bloaty.h b/src/bloaty.h
index d37e9a0..8e1cbce 100644
--- a/src/bloaty.h
+++ b/src/bloaty.h
@@ -220,7 +220,7 @@
uint64_t TranslateFileToVM(const char* ptr);
absl::string_view TranslateVMToFile(uint64_t address);
- static const uint64_t kUnknownSize = RangeMap::kUnknownSize;
+ static constexpr uint64_t kUnknownSize = RangeMap::kUnknownSize;
private:
BLOATY_DISALLOW_COPY_AND_ASSIGN(RangeSink);
diff --git a/src/dwarf.cc b/src/dwarf.cc
index f795e67..20fad8d 100644
--- a/src/dwarf.cc
+++ b/src/dwarf.cc
@@ -183,6 +183,9 @@
// The size of addresses. Guaranteed to be either 4 or 8.
uint8_t address_size() const { return address_size_; }
+ // DWARF version of this unit.
+ uint8_t dwarf_version() const { return dwarf_version_; }
+
void SetAddressSize(uint8_t address_size) {
if (address_size != 4 && address_size != 8) {
THROWF("Unexpected address size: $0", address_size);
@@ -243,7 +246,12 @@
return unit;
}
+ void ReadDWARFVersion(string_view* data) {
+ dwarf_version_ = ReadMemcpy<uint16_t>(data);
+ }
+
private:
+ uint16_t dwarf_version_;
bool dwarf64_;
uint8_t address_size_;
};
@@ -422,9 +430,9 @@
}
unit_remaining_ = sizes_.ReadInitialLength(&next_unit_);
- uint16_t version = ReadMemcpy<uint16_t>(&unit_remaining_);
+ sizes_.ReadDWARFVersion(&unit_remaining_);
- if (version > 2) {
+ if (sizes_.dwarf_version() > 2) {
THROW("DWARF data is too new for us");
}
@@ -686,11 +694,13 @@
bool DIEReader::ReadCode() {
uint32_t code;
+ size_t offset;
again:
if (remaining_.empty()) {
state_ = State::kEof;
return false;
}
+ offset = remaining_.data() - unit_range_.data();
code = ReadLEB128<uint32_t>(&remaining_);
if (code == 0) {
// null entry terminates a chain of sibling entries.
@@ -748,9 +758,9 @@
unit_range_ = unit_range_.substr(
0, remaining_.size() + (remaining_.data() - unit_range_.data()));
- uint16_t version = ReadMemcpy<uint16_t>(&remaining_);
+ unit_sizes_.ReadDWARFVersion(&remaining_);
- if (version > 4) {
+ if (unit_sizes_.dwarf_version() > 4) {
THROW("Data is in new DWARF format we don't understand");
}
@@ -874,7 +884,7 @@
case DW_FORM_ref8:
return AttrValue(ReadMemcpy<uint64_t>(data));
case DW_FORM_addr:
- case DW_FORM_ref_addr:
+ address_size:
switch (reader.unit_sizes().address_size()) {
case 4:
return AttrValue(ReadMemcpy<uint32_t>(data));
@@ -883,6 +893,10 @@
default:
BLOATY_UNREACHABLE();
}
+ case DW_FORM_ref_addr:
+ if (reader.unit_sizes().dwarf_version() <= 2) {
+ goto address_size;
+ }
case DW_FORM_sec_offset:
if (reader.unit_sizes().dwarf64()) {
return AttrValue(ReadMemcpy<uint64_t>(data));
@@ -1108,13 +1122,13 @@
sizes_.SetAddressSize(address_size);
data = sizes_.ReadInitialLength(&data);
- uint16_t version = ReadMemcpy<uint16_t>(&data);
+ sizes_.ReadDWARFVersion(&data);
uint64_t header_length = sizes_.ReadDWARFOffset(&data);
string_view program = data;
SkipBytes(header_length, &program);
params_.minimum_instruction_length = ReadMemcpy<uint8_t>(&data);
- if (version == 4) {
+ if (sizes_.dwarf_version() == 4) {
params_.maximum_operations_per_instruction = ReadMemcpy<uint8_t>(&data);
if (params_.maximum_operations_per_instruction == 0) {
@@ -1630,7 +1644,7 @@
string_view unit = sizes.ReadInitialLength(&remaining);
full_unit =
full_unit.substr(0, unit.size() + (unit.data() - full_unit.data()));
- dwarf::SkipBytes(2, &unit);
+ sizes.ReadDWARFVersion(&unit);
uint64_t debug_info_offset = sizes.ReadDWARFOffset(&unit);
bool ok = die_reader.SeekToCompilationUnit(
dwarf::DIEReader::Section::kDebugInfo, debug_info_offset);
diff --git a/src/macho.cc b/src/macho.cc
index e318c90..3ba5239 100644
--- a/src/macho.cc
+++ b/src/macho.cc
@@ -427,7 +427,8 @@
}
template <class NList>
-void ParseSymbolsFromSymbolTable(const LoadCommand& cmd, RangeSink* sink) {
+void ParseSymbolsFromSymbolTable(const LoadCommand& cmd, SymbolTable* table,
+ RangeSink* sink) {
auto symtab_cmd = GetStructPointer<symtab_command>(cmd.command_data);
string_view symtab = StrictSubstr(cmd.file_data, symtab_cmd->symoff,
@@ -438,27 +439,41 @@
uint32_t nsyms = symtab_cmd->nsyms;
for (uint32_t i = 0; i < nsyms; i++) {
auto sym = GetStructPointerAndAdvance<NList>(&symtab);
+ string_view sym_range(reinterpret_cast<const char*>(sym), sizeof(NList));
if (sym->n_type & N_STAB || sym->n_value == 0) {
continue;
}
string_view name = ReadNullTerminated(strtab.substr(sym->n_un.n_strx));
- sink->AddVMRange("macho_symbols", sym->n_value, RangeSink::kUnknownSize,
- ItaniumDemangle(name, sink->data_source()));
+
+ if (sink->data_source() >= DataSource::kSymbols) {
+ sink->AddVMRange("macho_symbols", sym->n_value, RangeSink::kUnknownSize,
+ ItaniumDemangle(name, sink->data_source()));
+ }
+
+ if (table) {
+ table->insert(std::make_pair(
+ name, std::make_pair(sym->n_value, RangeSink::kUnknownSize)));
+ }
+
+ // Capture the trailing NULL.
+ name = string_view(name.data(), name.size() + 1);
+ sink->AddFileRangeFor("macho_symtab_name", sym->n_value, name);
+ sink->AddFileRangeFor("macho_symtab_sym", sym->n_value, sym_range);
}
}
-void ParseSymbols(string_view file_data, RangeSink* sink) {
+void ParseSymbols(string_view file_data, SymbolTable* symtab, RangeSink* sink) {
ForEachLoadCommand(
file_data, sink,
- [sink](const LoadCommand& cmd) {
+ [symtab, sink](const LoadCommand& cmd) {
switch (cmd.cmd) {
case LC_SYMTAB:
if (cmd.is64bit) {
- ParseSymbolsFromSymbolTable<nlist_64>(cmd, sink);
+ ParseSymbolsFromSymbolTable<nlist_64>(cmd, symtab, sink);
} else {
- ParseSymbolsFromSymbolTable<struct nlist>(cmd, sink);
+ ParseSymbolsFromSymbolTable<struct nlist>(cmd, symtab, sink);
}
break;
case LC_DYSYMTAB:
@@ -486,6 +501,78 @@
sink->AddFileRange("macho_fallback", "[Unmapped]", sink->input_file().data());
}
+template <class Segment, class Section>
+void ReadDebugSectionsFromSegment(LoadCommand cmd, dwarf::File* dwarf) {
+ auto segment = GetStructPointerAndAdvance<Segment>(&cmd.command_data);
+
+ if (segment->maxprot == VM_PROT_NONE) {
+ return;
+ }
+
+ string_view segname = ArrayToStr(segment->segname, 16);
+
+ if (segname != "__DWARF") {
+ return;
+ }
+
+ uint32_t nsects = segment->nsects;
+ for (uint32_t j = 0; j < nsects; j++) {
+ auto section = GetStructPointerAndAdvance<Section>(&cmd.command_data);
+ string_view sectname = ArrayToStr(section->sectname, 16);
+
+ // filesize equals vmsize unless the section is zerofill
+ uint64_t filesize = section->size;
+ switch (section->flags & SECTION_TYPE) {
+ case S_ZEROFILL:
+ case S_GB_ZEROFILL:
+ case S_THREAD_LOCAL_ZEROFILL:
+ filesize = 0;
+ break;
+ default:
+ break;
+ }
+
+ string_view contents =
+ StrictSubstr(cmd.file_data, section->offset, filesize);
+
+ if (sectname == "__debug_aranges") {
+ dwarf->debug_aranges = contents;
+ } else if (sectname == "__debug_str") {
+ dwarf->debug_str = contents;
+ } else if (sectname == "__debug_info") {
+ dwarf->debug_info = contents;
+ } else if (sectname == "__debug_types") {
+ dwarf->debug_types = contents;
+ } else if (sectname == "__debug_abbrev") {
+ dwarf->debug_abbrev = contents;
+ } else if (sectname == "__debug_line") {
+ dwarf->debug_line = contents;
+ } else if (sectname == "__debug_loc") {
+ dwarf->debug_loc = contents;
+ } else if (sectname == "__debug_pubnames") {
+ dwarf->debug_pubnames = contents;
+ } else if (sectname == "__debug_pubtypes") {
+ dwarf->debug_pubtypes = contents;
+ } else if (sectname == "__debug_ranges") {
+ dwarf->debug_ranges = contents;
+ }
+ }
+}
+
+static void ReadDebugSectionsFromMachO(const InputFile& file, dwarf::File* dwarf) {
+ ForEachLoadCommand(file.data(), nullptr, [dwarf](const LoadCommand& cmd) {
+ switch (cmd.cmd) {
+ case LC_SEGMENT_64:
+ ReadDebugSectionsFromSegment<segment_command_64, section_64>(cmd,
+ dwarf);
+ break;
+ case LC_SEGMENT:
+ ReadDebugSectionsFromSegment<segment_command, section>(cmd, dwarf);
+ break;
+ }
+ });
+}
+
class MachOObjectFile : public ObjectFile {
public:
MachOObjectFile(std::unique_ptr<InputFile> file_data)
@@ -520,10 +607,25 @@
case DataSource::kRawSymbols:
case DataSource::kShortSymbols:
case DataSource::kFullSymbols:
- ParseSymbols(debug_file().file_data().data(), sink);
+ ParseSymbols(debug_file().file_data().data(), nullptr, sink);
break;
+ case DataSource::kCompileUnits: {
+ SymbolTable symtab;
+ DualMap symbol_map;
+ NameMunger empty_munger;
+ RangeSink symbol_sink(&debug_file().file_data(),
+ sink->options(),
+ DataSource::kRawSymbols,
+ &sinks[0]->MapAtIndex(0));
+ symbol_sink.AddOutput(&symbol_map, &empty_munger);
+ ParseSymbols(debug_file().file_data().data(), &symtab, &symbol_sink);
+ dwarf::File dwarf;
+ ReadDebugSectionsFromMachO(debug_file().file_data(), &dwarf);
+ ReadDWARFCompileUnits(dwarf, symtab, symbol_map, sink);
+ ParseSymbols(sink->input_file().data(), nullptr, sink);
+ break;
+ }
case DataSource::kArchiveMembers:
- case DataSource::kCompileUnits:
case DataSource::kInlines:
default:
THROW("Mach-O doesn't support this data source");
diff --git a/src/range_map.cc b/src/range_map.cc
index 5c3658e..39184b2 100644
--- a/src/range_map.cc
+++ b/src/range_map.cc
@@ -18,6 +18,8 @@
namespace bloaty {
+constexpr uint64_t RangeMap::kUnknownSize;
+
template <class T>
uint64_t RangeMap::TranslateWithEntry(T iter, uint64_t addr) const {
assert(EntryContains(iter, addr));
diff --git a/src/range_map.h b/src/range_map.h
index d16aea0..d2c62b2 100644
--- a/src/range_map.h
+++ b/src/range_map.h
@@ -136,7 +136,7 @@
}
}
- static const uint64_t kUnknownSize = UINT64_MAX;
+ static constexpr uint64_t kUnknownSize = UINT64_MAX;
private:
friend class RangeMapTest;