Merge branch 'master' into dataquality
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 91eb0b1..b86941e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -40,6 +40,7 @@
# Baseline build flags.
set(CMAKE_CXX_FLAGS "-std=c++11 -W -Wall -Wno-sign-compare")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--build-id")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-O2")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g")
diff --git a/src/bloaty.cc b/src/bloaty.cc
index fa3ba4f..47bb060 100644
--- a/src/bloaty.cc
+++ b/src/bloaty.cc
@@ -270,21 +270,16 @@
}
std::string NameMunger::Munge(string_view name) const {
- re2::StringPiece piece(name.data(), name.size());
- std::string ret;
-
- if (!name.empty() && name[0] == '[') {
- // This is a special symbol, don't mangle.
- return std::string(name);
- }
+ std::string name_str(name);
+ std::string ret(name);
for (const auto& pair : regexes_) {
- if (RE2::Extract(piece, *pair.first, pair.second, &ret)) {
+ if (RE2::Extract(name_str, *pair.first, pair.second, &ret)) {
return ret;
}
}
- return std::string(name);
+ return name_str;
}
@@ -863,6 +858,30 @@
}
}
+bool RangeMap::TryGetSize(uint64_t addr, uint64_t* size) const {
+ auto iter = mappings_.find(addr);
+ if (iter == mappings_.end()) {
+ return false;
+ } else {
+ *size = iter->second.end - addr;
+ return true;
+ }
+}
+
+std::string RangeMap::DebugString() const {
+ std::string ret;
+ for (const auto& pair : mappings_) {
+ absl::StrAppend(&ret, "[", absl::Hex(pair.first), ", ",
+ absl::Hex(pair.second.end), "]: ", pair.second.label);
+ if (pair.second.other_start != UINT64_MAX) {
+ absl::StrAppend(&ret,
+ ", other_start=", absl::Hex(pair.second.other_start));
+ }
+ absl::StrAppend(&ret, "\n");
+ }
+ return ret;
+}
+
void RangeMap::AddRange(uint64_t addr, uint64_t size, const std::string& val) {
AddDualRange(addr, size, UINT64_MAX, val);
}
@@ -879,7 +898,7 @@
while (1) {
while (it != mappings_.end() && EntryContains(it, addr)) {
if (verbose_level > 1) {
- fprintf(stderr,
+ fprintf(stdout,
"WARN: adding mapping [%" PRIx64 "x, %" PRIx64 "x] for label"
"%s, this conflicts with existing mapping [%" PRIx64 ", %"
PRIx64 "] for label %s\n",
@@ -898,7 +917,7 @@
if (it != mappings_.end() && end > it->first) {
this_end = std::min(end, it->first);
if (verbose_level > 1) {
- fprintf(stderr,
+ fprintf(stdout,
"WARN(2): adding mapping [%" PRIx64 ", %" PRIx64 "] for label "
"%s, this conflicts with existing mapping [%" PRIx64 ", %"
PRIx64 "] for label %s\n",
@@ -940,7 +959,7 @@
if (translator.TranslateAndTrimRangeWithEntry(it, addr, end, &this_addr,
&this_size)) {
if (verbose_level > 2) {
- fprintf(stderr, " -> translates to: [%" PRIx64 " %" PRIx64 "]\n",
+ fprintf(stdout, " -> translates to: [%" PRIx64 " %" PRIx64 "]\n",
this_addr, this_size);
}
other->AddRange(this_addr, this_size, val);
@@ -1126,7 +1145,7 @@
void RangeSink::AddFileRange(string_view name, uint64_t fileoff,
uint64_t filesize) {
if (verbose_level > 2) {
- fprintf(stderr, "[%s] AddFileRange(%.*s, %" PRIx64 ", %" PRIx64 ")\n",
+ fprintf(stdout, "[%s] AddFileRange(%.*s, %" PRIx64 ", %" PRIx64 ")\n",
GetDataSourceLabel(data_source_), (int)name.size(), name.data(),
fileoff, filesize);
}
@@ -1142,10 +1161,33 @@
}
}
+void RangeSink::AddFileRangeFor(uint64_t label_from_vmaddr,
+ string_view file_range) {
+ uint64_t file_offset = file_range.data() - file_->data().data();
+ if (verbose_level > 2) {
+ fprintf(stdout,
+ "[%s] AddFileRangeFor(%" PRIx64 ", [%" PRIx64 ", %" PRIx64 "])\n",
+ GetDataSourceLabel(data_source_), label_from_vmaddr, file_offset,
+ file_range.size());
+ fprintf(stdout, "Translation map:\n%s",
+ translator_->file_map.DebugString().c_str());
+ }
+ assert(translator_);
+ for (auto& pair : outputs_) {
+ std::string label;
+ uint64_t offset;
+ if (pair.first->vm_map.TryGetLabel(label_from_vmaddr, &label, &offset)) {
+ pair.first->file_map.AddRangeWithTranslation(
+ file_offset, file_range.size(), label, translator_->file_map,
+ &pair.first->vm_map);
+ }
+ }
+}
+
void RangeSink::AddVMRange(uint64_t vmaddr, uint64_t vmsize,
const std::string& name) {
if (verbose_level > 2) {
- fprintf(stderr, "[%s] AddVMRange(%.*s, %" PRIx64 ", %" PRIx64 ")\n",
+ fprintf(stdout, "[%s] AddVMRange(%.*s, %" PRIx64 ", %" PRIx64 ")\n",
GetDataSourceLabel(data_source_), (int)name.size(), name.data(),
vmaddr, vmsize);
}
@@ -1173,7 +1215,7 @@
void RangeSink::AddRange(string_view name, uint64_t vmaddr, uint64_t vmsize,
uint64_t fileoff, uint64_t filesize) {
if (verbose_level > 2) {
- fprintf(stderr, "[%s] AddRange(%.*s, %" PRIx64 ", %" PRIx64 ", %" PRIx64
+ fprintf(stdout, "[%s] AddRange(%.*s, %" PRIx64 ", %" PRIx64 ", %" PRIx64
", %" PRIx64 ")\n",
GetDataSourceLabel(data_source_), (int)name.size(), name.data(),
vmaddr, vmsize, fileoff, filesize);
@@ -1536,9 +1578,9 @@
assert(filesize == file->file_data().data().size());
if (verbose_level > 0) {
- fprintf(stderr, "FILE MAP:\n");
+ fprintf(stdout, "FILE MAP:\n");
maps.PrintFileMaps(filename, filename_position_);
- fprintf(stderr, "VM MAP:\n");
+ fprintf(stdout, "VM MAP:\n");
maps.PrintVMMaps(filename, filename_position_);
}
}
diff --git a/src/bloaty.h b/src/bloaty.h
index 3ecd574..4ee8795 100644
--- a/src/bloaty.h
+++ b/src/bloaty.h
@@ -153,6 +153,12 @@
void AddFileRange(absl::string_view name,
uint64_t fileoff, uint64_t filesize);
+ // Like AddFileRange(), but the label is whatever label was previously
+ // assigned to VM address |label_from_vmaddr|. If no existing label is
+ // assigned to |label_from_vmaddr|, this function does nothing.
+ void AddFileRangeFor(uint64_t label_from_vmaddr,
+ absl::string_view file_range);
+
void AddFileRange(absl::string_view name, absl::string_view file_range) {
AddFileRange(name, file_range.data() - file_->data().data(),
file_range.size());
@@ -183,6 +189,10 @@
void AddVMRangeIgnoreDuplicate(uint64_t vmaddr, uint64_t size,
const std::string& name);
+ const DualMap& MapAtIndex(size_t index) const {
+ return *outputs_[index].first;
+ }
+
private:
BLOATY_DISALLOW_COPY_AND_ASSIGN(RangeSink);
@@ -262,6 +272,10 @@
absl::string_view debug_abbrev;
absl::string_view debug_aranges;
absl::string_view debug_line;
+ absl::string_view debug_loc;
+ absl::string_view debug_pubnames;
+ absl::string_view debug_pubtypes;
+ absl::string_view debug_ranges;
};
} // namespace dwarf
@@ -269,7 +283,7 @@
// Provided by dwarf.cc. To use these, a module should fill in a dwarf::File
// and then call these functions.
void ReadDWARFCompileUnits(const dwarf::File& file, const SymbolTable& symtab,
- RangeSink* sink);
+ const DualMap& map, RangeSink* sink);
void ReadDWARFInlines(const dwarf::File& file, RangeSink* sink,
bool include_line);
@@ -385,6 +399,12 @@
// offset from the beginning of this range.
bool TryGetLabel(uint64_t addr, std::string* label, uint64_t* offset) const;
+ // Looks for a range that starts exactly on |addr|. If it exists, returns
+ // true and sets |size| to its size.
+ bool TryGetSize(uint64_t addr, uint64_t* size) const;
+
+ std::string DebugString() const;
+
template <class Func>
static void ComputeRollup(const std::vector<const RangeMap*>& range_maps,
const std::string& filename, int filename_position,
diff --git a/src/dwarf.cc b/src/dwarf.cc
index 1c1a619..98e2255 100644
--- a/src/dwarf.cc
+++ b/src/dwarf.cc
@@ -254,7 +254,7 @@
class AbbrevTable {
public:
// Reads abbreviations until a terminating abbreviation is seen.
- void ReadAbbrevs(string_view data);
+ string_view ReadAbbrevs(string_view data);
// In a DWARF abbreviation, each attribute has a name and a form.
struct Attribute {
@@ -291,12 +291,12 @@
std::unordered_map<uint32_t, Abbrev> abbrev_;
};
-void AbbrevTable::ReadAbbrevs(string_view data) {
+string_view AbbrevTable::ReadAbbrevs(string_view data) {
while (true) {
uint32_t code = ReadLEB128<uint32_t>(&data);
if (code == 0) {
- return; // Terminator entry.
+ return data; // Terminator entry.
}
Abbrev& abbrev = abbrev_[code];
@@ -436,6 +436,87 @@
}
+// LocationList ////////////////////////////////////////////////////////////////
+
+// Code for reading entries out of a location list.
+// For the moment we only care about finding the bounds of a list given its
+// offset, so we don't actually vend any of the data.
+
+class LocationList {
+ public:
+ LocationList(CompilationUnitSizes sizes, string_view data)
+ : sizes_(sizes), remaining_(data) {}
+
+ const char* read_offset() const { return remaining_.data(); }
+ bool NextEntry();
+
+ private:
+ CompilationUnitSizes sizes_;
+ string_view remaining_;
+};
+
+bool LocationList::NextEntry() {
+ uint64_t start, end;
+ start = sizes_.ReadAddress(&remaining_);
+ end = sizes_.ReadAddress(&remaining_);
+ if (start == 0 && end == 0) {
+ return false;
+ } else if (start == UINT64_MAX ||
+ (start == UINT32_MAX && sizes_.address_size == 4)) {
+ // Base address selection, nothing more to do.
+ } else {
+ // Need to skip the location description.
+ uint16_t length = ReadMemcpy<uint16_t>(&remaining_);
+ SkipBytes(length, &remaining_);
+ }
+ return true;
+}
+
+string_view GetLocationListRange(CompilationUnitSizes sizes,
+ string_view available) {
+ LocationList list(sizes, available);
+ while (list.NextEntry()) {}
+ return available.substr(0, list.read_offset() - available.data());
+}
+
+
+// RangeList ///////////////////////////////////////////////////////////////////
+
+// Code for reading entries out of a range list.
+// For the moment we only care about finding the bounds of a list given its
+// offset, so we don't actually vend any of the data.
+
+class RangeList {
+ public:
+ RangeList(CompilationUnitSizes sizes, string_view data)
+ : sizes_(sizes), remaining_(data) {}
+
+ const char* read_offset() const { return remaining_.data(); }
+ bool NextEntry();
+
+ private:
+ CompilationUnitSizes sizes_;
+ string_view remaining_;
+};
+
+bool RangeList::NextEntry() {
+ uint64_t start, end;
+ start = sizes_.ReadAddress(&remaining_);
+ end = sizes_.ReadAddress(&remaining_);
+ if (start == 0 && end == 0) {
+ return false;
+ }
+ return true;
+}
+
+string_view GetRangeListRange(CompilationUnitSizes sizes,
+ string_view available) {
+ RangeList list(sizes, available);
+ while (list.NextEntry()) {
+ }
+ return available.substr(0, list.read_offset() - available.data());
+}
+
// DIEReader ///////////////////////////////////////////////////////////////////
// Reads a sequence of DWARF DIE's (Debugging Information Entries) from the
@@ -495,8 +576,25 @@
const File& dwarf() const { return dwarf_; }
+ string_view unit_range() const { return unit_range_; }
CompilationUnitSizes unit_sizes() const { return unit_sizes_; }
uint32_t abbrev_version() const { return abbrev_version_; }
+ uint64_t debug_abbrev_offset() const { return debug_abbrev_offset_; }
+
+ // If both compileunit_name and strp_sink are set, this will automatically
+ // call strp_sink->AddFileRange(compileunit_name, <string range>) for every
+ // DW_FORM_strp attribute encountered. These strings occur in the .debug_str
+ // section.
+ void set_compileunit_name(absl::string_view name) {
+ unit_name_ = std::string(name);
+ }
+ void set_strp_sink(RangeSink* sink) { strp_sink_ = sink; }
+
+ void AddIndirectString(string_view range) const {
+ if (strp_sink_) {
+ strp_sink_->AddFileRange(unit_name_, range);
+ }
+ }
private:
BLOATY_DISALLOW_COPY_AND_ASSIGN(DIEReader);
@@ -540,6 +638,7 @@
std::string error_;
const File& dwarf_;
+ RangeSink* strp_sink_ = nullptr;
// Abbreviation for the current entry.
const AbbrevTable::Abbrev* current_abbrev_;
@@ -562,6 +661,9 @@
Section section_;
// Information about the current compilation unit.
+ uint64_t debug_abbrev_offset_;
+ std::string unit_name_;
+ string_view unit_range_;
CompilationUnitSizes unit_sizes_;
AbbrevTable* unit_abbrev_;
@@ -625,7 +727,10 @@
return false;
}
+ unit_range_ = next_unit_;
remaining_ = unit_sizes_.ReadInitialLength(&next_unit_);
+ unit_range_ = unit_range_.substr(
+ 0, remaining_.size() + (remaining_.data() - unit_range_.data()));
uint16_t version = ReadMemcpy<uint16_t>(&remaining_);
@@ -633,14 +738,14 @@
THROW("Data is in new DWARF format we don't understand");
}
- uint64_t debug_abbrev_offset = unit_sizes_.ReadDWARFOffset(&remaining_);
- unit_abbrev_ = &abbrev_tables_[debug_abbrev_offset];
+ debug_abbrev_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
+ unit_abbrev_ = &abbrev_tables_[debug_abbrev_offset_];
- // If we haven't already read abbreviations for this debug_abbrev_offset, we
+ // If we haven't already read abbreviations for this debug_abbrev_offset_, we
// need to do so now.
if (unit_abbrev_->IsEmpty()) {
string_view abbrev_data = dwarf_.debug_abbrev;
- SkipBytes(debug_abbrev_offset, &abbrev_data);
+ SkipBytes(debug_abbrev_offset_, &abbrev_data);
unit_abbrev_->ReadAbbrevs(abbrev_data);
}
@@ -733,6 +838,132 @@
}
};
+// FormReader for void. For skipping the data instead of reading it somewhere.
+template <>
+class FormReader<void> : public FormReaderBase<FormReader<void>> {
+ public:
+ typedef FormReader ME;
+ typedef FormReaderBase<ME> Base;
+ typedef void type;
+ using Base::data_;
+
+ FormReader(const DIEReader& reader, string_view data, void* /*val*/)
+ : Base(reader, data) {}
+
+ template <class Func>
+ static void GetFunctionForForm(CompilationUnitSizes sizes, uint8_t form,
+ Func func) {
+ switch (form) {
+ case DW_FORM_flag_present:
+ func(&Base::template ReadAttr<&ME::DoNothing>);
+ return;
+ case DW_FORM_data1:
+ case DW_FORM_ref1:
+ case DW_FORM_flag:
+ func(&Base::template ReadAttr<&ME::SkipFixed<1>>);
+ return;
+ case DW_FORM_data2:
+ case DW_FORM_ref2:
+ func(&Base::template ReadAttr<&ME::SkipFixed<2>>);
+ return;
+ case DW_FORM_data4:
+ case DW_FORM_ref4:
+ func(&Base::template ReadAttr<&ME::SkipFixed<4>>);
+ return;
+ case DW_FORM_data8:
+ case DW_FORM_ref8:
+ case DW_FORM_ref_sig8:
+ func(&Base::template ReadAttr<&ME::SkipFixed<8>>);
+ return;
+ case DW_FORM_addr:
+ case DW_FORM_ref_addr:
+ if (sizes.address_size == 8) {
+ func(&Base::template ReadAttr<&ME::SkipFixed<8>>);
+ } else if (sizes.address_size == 4) {
+ func(&Base::template ReadAttr<&ME::SkipFixed<4>>);
+ } else {
+ THROWF("don't know how to skip address size $0", sizes.address_size);
+ }
+ return;
+ case DW_FORM_sec_offset:
+ if (sizes.dwarf64) {
+ func(&Base::template ReadAttr<&ME::SkipFixed<8>>);
+ } else {
+ func(&Base::template ReadAttr<&ME::SkipFixed<4>>);
+ }
+ return;
+ case DW_FORM_strp:
+ if (sizes.dwarf64) {
+ func(&Base::template ReadAttr<&ME::SkipIndirectString<uint64_t>>);
+ } else {
+ func(&Base::template ReadAttr<&ME::SkipIndirectString<uint32_t>>);
+ }
+ return;
+ case DW_FORM_sdata:
+ case DW_FORM_udata:
+ case DW_FORM_ref_udata:
+ func(&Base::template ReadAttr<&ME::SkipVariable>);
+ return;
+ case DW_FORM_block1:
+ func(&Base::template ReadAttr<&ME::SkipBlock<uint8_t>>);
+ return;
+ case DW_FORM_block2:
+ func(&Base::template ReadAttr<&ME::SkipBlock<uint16_t>>);
+ return;
+ case DW_FORM_block4:
+ func(&Base::template ReadAttr<&ME::SkipBlock<uint32_t>>);
+ return;
+ case DW_FORM_block:
+ case DW_FORM_exprloc:
+ func(&Base::template ReadAttr<&ME::SkipVariableBlock>);
+ return;
+ case DW_FORM_string:
+ func(&Base::template ReadAttr<&ME::SkipString>);
+ return;
+ case DW_FORM_indirect:
+ func(&ME::ReadIndirect);
+ return;
+ default:
+ THROWF("don't know how to skip DWARF form $0", form);
+ }
+ }
+
+ private:
+ void DoNothing() {}
+
+ template <size_t N>
+ void SkipFixed() {
+ SkipBytes(N, &data_);
+ }
+
+ void SkipVariable() {
+ SkipLEB128(&data_);
+ }
+
+ template <class D>
+ void SkipBlock() {
+ D len = ReadMemcpy<D>(&data_);
+ SkipBytes(len, &data_);
+ }
+
+ void SkipVariableBlock() {
+ uint64_t len = ReadLEB128<uint64_t>(&data_);
+ SkipBytes(len, &data_);
+ }
+
+ void SkipString() {
+ SkipNullTerminated(&data_);
+ }
+
+ template <class D>
+ void SkipIndirectString() {
+ D ofs = ReadMemcpy<D>(&data_);
+ StringTable table(reader_.dwarf().debug_str);
+ string_view str = table.ReadEntry(ofs);
+ reader_.AddIndirectString(str);
+ }
+};
+
// FormReader for string_view. We accept the true string forms (DW_FORM_string
// and DW_FORM_strp) as well as a number of other forms that contain delimited
// string data. We also accept the generic/opaque DW_FORM_data* types; the
@@ -792,7 +1023,9 @@
func(&FormReader::ReadIndirect);
return;
default:
- THROWF("don't know how to translate form $0 to string_view", form);
+ // Skip it.
+ FormReader<void>::GetFunctionForForm(sizes, form, func);
+ return;
}
}
@@ -824,6 +1057,7 @@
D ofs = ReadMemcpy<D>(&data_);
StringTable table(reader_.dwarf().debug_str);
*val_ = table.ReadEntry(ofs);
+ reader_.AddIndirectString(*val_);
}
};
@@ -915,7 +1149,9 @@
func(&Base::ReadIndirect);
return;
default:
- THROWF("don't know how to translate form $0 to integer", form);
+ // Skip it.
+ FormReader<void>::GetFunctionForForm(sizes, form, func);
+ return;
}
}
@@ -982,118 +1218,6 @@
}
};
-// FormReader for void. For skipping the data instead of reading it somewhere.
-template <>
-class FormReader<void> : public FormReaderBase<FormReader<void>> {
- public:
- typedef FormReader ME;
- typedef FormReaderBase<ME> Base;
- typedef void type;
- using Base::data_;
-
- FormReader(const DIEReader& reader, string_view data, void* /*val*/)
- : Base(reader, data) {}
-
- template <class Func>
- static void GetFunctionForForm(CompilationUnitSizes sizes, uint8_t form,
- Func func) {
- switch (form) {
- case DW_FORM_flag_present:
- func(&Base::template ReadAttr<&ME::DoNothing>);
- return;
- case DW_FORM_data1:
- case DW_FORM_ref1:
- case DW_FORM_flag:
- func(&Base::template ReadAttr<&ME::SkipFixed<1>>);
- return;
- case DW_FORM_data2:
- case DW_FORM_ref2:
- func(&Base::template ReadAttr<&ME::SkipFixed<2>>);
- return;
- case DW_FORM_data4:
- case DW_FORM_ref4:
- func(&Base::template ReadAttr<&ME::SkipFixed<4>>);
- return;
- case DW_FORM_data8:
- case DW_FORM_ref8:
- case DW_FORM_ref_sig8:
- func(&Base::template ReadAttr<&ME::SkipFixed<8>>);
- return;
- case DW_FORM_addr:
- case DW_FORM_ref_addr:
- if (sizes.address_size == 8) {
- func(&Base::template ReadAttr<&ME::SkipFixed<8>>);
- } else if (sizes.address_size == 4) {
- func(&Base::template ReadAttr<&ME::SkipFixed<4>>);
- } else {
- THROWF("don't know how to skip address size $0", sizes.address_size);
- }
- return;
- case DW_FORM_sec_offset:
- case DW_FORM_strp:
- if (sizes.dwarf64) {
- func(&Base::template ReadAttr<&ME::SkipFixed<8>>);
- } else {
- func(&Base::template ReadAttr<&ME::SkipFixed<4>>);
- }
- return;
- case DW_FORM_sdata:
- case DW_FORM_udata:
- case DW_FORM_ref_udata:
- func(&Base::template ReadAttr<&ME::SkipVariable>);
- return;
- case DW_FORM_block1:
- func(&Base::template ReadAttr<&ME::SkipBlock<uint8_t>>);
- return;
- case DW_FORM_block2:
- func(&Base::template ReadAttr<&ME::SkipBlock<uint16_t>>);
- return;
- case DW_FORM_block4:
- func(&Base::template ReadAttr<&ME::SkipBlock<uint32_t>>);
- return;
- case DW_FORM_block:
- case DW_FORM_exprloc:
- func(&Base::template ReadAttr<&ME::SkipVariableBlock>);
- return;
- case DW_FORM_string:
- func(&Base::template ReadAttr<&ME::SkipString>);
- return;
- case DW_FORM_indirect:
- func(&ME::ReadIndirect);
- return;
- default:
- THROWF("don't know how to skip DWARF form $0", form);
- }
- }
-
- private:
- void DoNothing() {}
-
- template <size_t N>
- void SkipFixed() {
- SkipBytes(N, &data_);
- }
-
- void SkipVariable() {
- SkipLEB128(&data_);
- }
-
- template <class D>
- void SkipBlock() {
- D len = ReadMemcpy<D>(&data_);
- SkipBytes(len, &data_);
- }
-
- void SkipVariableBlock() {
- uint64_t len = ReadLEB128<uint64_t>(&data_);
- SkipBytes(len, &data_);
- }
-
- void SkipString() {
- SkipNullTerminated(&data_);
- }
-};
-
// ActionBuf ///////////////////////////////////////////////////////////////////
@@ -1157,7 +1281,9 @@
// Overwrite any entries for attributes we actually want to store somewhere.
for (const auto& action : indexed_actions) {
- if (action.action.func) {
+ const auto& attr = abbrev.attr[action.index];
+ if (action.action.func &&
+ action.action.func != GetFormDecodeFunc<void>(attr.form, sizes)) {
assert(action.index < action_list_.size());
if (action_list_[action.index].data) {
THROW(
@@ -1733,17 +1859,28 @@
return true;
}
-void AddDIE(const std::string& name,
+void AddDIE(const dwarf::File& file, const std::string& name,
const dwarf::FixedAttrReader<string_view, string_view, uint64_t,
- uint64_t>& attr,
- const SymbolTable& symtab, RangeSink* sink) {
+ uint64_t, string_view, uint64_t,
+ uint64_t, uint64_t, uint64_t>& attr,
+ const SymbolTable& symtab, const DualMap& symbol_map,
+ const dwarf::CompilationUnitSizes& sizes, RangeSink* sink) {
uint64_t low_pc = attr.GetAttribute<2>();
uint64_t high_pc = attr.GetAttribute<3>();
+ // Some DIEs mark address ranges with high_pc/low_pc pairs (especially
+ // functions).
if (attr.HasAttribute<2>() && attr.HasAttribute<3>()) {
- sink->AddVMRangeIgnoreDuplicate(low_pc, high_pc - low_pc, name);
+ // It appears that some compilers make high_pc a size, and others make it an
+ // address.
+ if (high_pc > low_pc) {
+ high_pc -= low_pc;
+ }
+ sink->AddVMRangeIgnoreDuplicate(low_pc, high_pc, name);
}
+ // Sometimes a DIE has a linkage_name, which we can look up in the symbol
+ // table.
if (attr.HasAttribute<1>()) {
auto it = symtab.find(attr.GetAttribute<1>());
if (it != symtab.end()) {
@@ -1751,17 +1888,114 @@
name);
}
}
+
+ // Sometimes the DIE has a "location", which gives the location as an address.
+ // This parses a very small subset of the overall DWARF expression grammar.
+ if (attr.HasAttribute<4>()) {
+ string_view location = attr.GetAttribute<4>();
+ if (location.size() == sizes.address_size + 1 &&
+ location[0] == DW_OP_addr) {
+ location.remove_prefix(1);
+ uint64_t addr;
+ // TODO(haberman): endian?
+ if (sizes.address_size == 4) {
+ addr = dwarf::ReadMemcpy<uint32_t>(&location);
+ } else if (sizes.address_size == 8) {
+ addr = dwarf::ReadMemcpy<uint64_t>(&location);
+ } else {
+ THROW("Unexpected address size");
+ }
+
+ // Unfortunately the location doesn't include a size, so we look that part
+ // up in the symbol map.
+ uint64_t size;
+ if (symbol_map.vm_map.TryGetSize(addr, &size)) {
+ sink->AddVMRangeIgnoreDuplicate(addr, size, name);
+ } else {
+ if (verbose_level > 0) {
+ fprintf(stderr,
+ "bloaty: warning: couldn't find DWARF location in symbol "
+ "table, address: %" PRIx64 "\n",
+ addr);
+ }
+ }
+ }
+ }
+
+ if (attr.HasAttribute<5>()) {
+ absl::string_view loc_range = file.debug_loc.substr(attr.GetAttribute<5>());
+ loc_range = GetLocationListRange(sizes, loc_range);
+ sink->AddFileRange(name, loc_range);
+ }
+
+ uint64_t ranges_offset = UINT64_MAX;
+
+ if (attr.HasAttribute<7>()) {
+ ranges_offset = attr.GetAttribute<7>();
+ } else if (attr.HasAttribute<8>()) {
+ ranges_offset = attr.GetAttribute<8>();
+ }
+
+ if (ranges_offset != UINT64_MAX) {
+ absl::string_view ranges_range = file.debug_ranges.substr(ranges_offset);
+ ranges_range = GetRangeListRange(sizes, ranges_range);
+ sink->AddFileRange(name, ranges_range);
+ }
+}
+
+static void ReadDWARFPubNames(const dwarf::File& file, string_view section,
+ RangeSink* sink) {
+ dwarf::DIEReader die_reader(file);
+ dwarf::FixedAttrReader<string_view> attr_reader(&die_reader, {DW_AT_name});
+ string_view remaining = section;
+
+ while (remaining.size() > 0) {
+ dwarf::CompilationUnitSizes sizes;
+ string_view full_unit = remaining;
+ string_view unit = sizes.ReadInitialLength(&remaining);
+ full_unit =
+ full_unit.substr(0, unit.size() + (unit.data() - full_unit.data()));
+ dwarf::SkipBytes(2, &unit);
+ uint64_t debug_info_offset = sizes.ReadDWARFOffset(&unit);
+ bool ok = die_reader.SeekToCompilationUnit(
+ dwarf::DIEReader::Section::kDebugInfo, debug_info_offset);
+ if (!ok) {
+ THROW("Couldn't seek to debug_info section");
+ }
+ attr_reader.ReadAttributes(&die_reader);
+ std::string compileunit_name = std::string(attr_reader.GetAttribute<0>());
+ if (!compileunit_name.empty()) {
+ sink->AddFileRange(compileunit_name, full_unit);
+ }
+ }
+}
+
+static void ReadDWARFStmtListRange(const dwarf::File& file, uint64_t offset,
+ string_view unit_name, RangeSink* sink) {
+ string_view data = file.debug_line;
+ dwarf::SkipBytes(offset, &data);
+ string_view data_with_length = data;
+ dwarf::CompilationUnitSizes sizes;
+ data = sizes.ReadInitialLength(&data);
+ data = data_with_length.substr(
+ 0, data.size() + (data.data() - data_with_length.data()));
+ sink->AddFileRange(unit_name, data);
}
// The DWARF debug info can help us get compileunits info. DIEs for compilation
// units, functions, and global variables often have attributes that will
// resolve to addresses.
static void ReadDWARFDebugInfo(const dwarf::File& file,
- const SymbolTable& symtab, RangeSink* sink) {
+ const SymbolTable& symtab,
+ const DualMap& symbol_map, RangeSink* sink) {
dwarf::DIEReader die_reader(file);
- dwarf::FixedAttrReader<string_view, string_view, uint64_t, uint64_t>
- attr_reader(&die_reader, {DW_AT_name, DW_AT_linkage_name, DW_AT_low_pc,
- DW_AT_high_pc});
+ die_reader.set_strp_sink(sink);
+ dwarf::FixedAttrReader<string_view, string_view, uint64_t, uint64_t,
+ string_view, uint64_t, uint64_t, uint64_t, uint64_t>
+ attr_reader(&die_reader,
+ {DW_AT_name, DW_AT_linkage_name, DW_AT_low_pc, DW_AT_high_pc,
+ DW_AT_location, DW_AT_location, DW_AT_stmt_list,
+ DW_AT_ranges, DW_AT_start_scope});
if (!die_reader.SeekToStart(dwarf::DIEReader::Section::kDebugInfo)) {
THROW("debug info is present, but empty");
@@ -1770,19 +2004,34 @@
do {
attr_reader.ReadAttributes(&die_reader);
std::string compileunit_name = std::string(attr_reader.GetAttribute<0>());
+ die_reader.set_compileunit_name(compileunit_name);
if (!compileunit_name.empty()) {
- AddDIE(compileunit_name, attr_reader, symtab, sink);
+ sink->AddFileRange(compileunit_name, die_reader.unit_range());
+ AddDIE(file, compileunit_name, attr_reader, symtab, symbol_map,
+ die_reader.unit_sizes(), sink);
+
+ if (attr_reader.HasAttribute<6>()) {
+ uint64_t offset = attr_reader.GetAttribute<6>();
+ ReadDWARFStmtListRange(file, offset, compileunit_name, sink);
+ }
+
+ string_view abbrev_data = file.debug_abbrev;
+ dwarf::SkipBytes(die_reader.debug_abbrev_offset(), &abbrev_data);
+ dwarf::AbbrevTable unit_abbrev;
+ abbrev_data = unit_abbrev.ReadAbbrevs(abbrev_data);
+ sink->AddFileRange(compileunit_name, abbrev_data);
while (die_reader.NextDIE()) {
attr_reader.ReadAttributes(&die_reader);
- AddDIE(compileunit_name, attr_reader, symtab, sink);
+ AddDIE(file, compileunit_name, attr_reader, symtab, symbol_map,
+ die_reader.unit_sizes(), sink);
}
}
} while (die_reader.NextCompilationUnit());
}
void ReadDWARFCompileUnits(const dwarf::File& file, const SymbolTable& symtab,
- RangeSink* sink) {
+ const DualMap& symbol_map, RangeSink* sink) {
if (!file.debug_info.size()) {
THROW("missing debug info");
}
@@ -1791,7 +2040,9 @@
ReadDWARFAddressRanges(file, sink);
}
- ReadDWARFDebugInfo(file, symtab, sink);
+ ReadDWARFDebugInfo(file, symtab, symbol_map, sink);
+ ReadDWARFPubNames(file, file.debug_pubnames, sink);
+ ReadDWARFPubNames(file, file.debug_pubtypes, sink);
}
static std::string LineInfoKey(const std::string& file, uint32_t line,
diff --git a/src/elf.cc b/src/elf.cc
index 08598d3..981b73a 100644
--- a/src/elf.cc
+++ b/src/elf.cc
@@ -170,8 +170,10 @@
// Requires: header().sh_type == SHT_STRTAB.
string_view ReadString(Elf64_Word index) const;
- // Requires: header().sh_type == SHT_SYMTAB
- void ReadSymbol(Elf64_Word index, Elf64_Sym* sym) const;
+ // Requires: header().sh_type == SHT_SYMTAB || header().sh_type ==
+ // SHT_DYNSYM
+ void ReadSymbol(Elf64_Word index, Elf64_Sym* sym,
+ string_view* file_range) const;
// Requires: header().sh_type == SHT_REL
void ReadRelocation(Elf64_Word index, Elf64_Rel* rel) const;
@@ -442,10 +444,16 @@
return contents_.size() / header_.sh_entsize;
}
-void ElfFile::Section::ReadSymbol(Elf64_Word index, Elf64_Sym* sym) const {
- assert(header().sh_type == SHT_SYMTAB);
+void ElfFile::Section::ReadSymbol(Elf64_Word index, Elf64_Sym* sym,
+ string_view* file_range) const {
+ assert(header().sh_type == SHT_SYMTAB || header().sh_type == SHT_DYNSYM);
ElfFile::StructReader reader(*elf_, contents());
- reader.Read<Elf32_Sym>(header_.sh_entsize * index, SymMunger(), sym);
+ size_t offset = header_.sh_entsize * index;
+ reader.Read<Elf32_Sym>(offset, SymMunger(), sym);
+ if (file_range) {
+ size_t size = elf_->is_64bit() ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym);
+ *file_range = contents().substr(offset, size);
+ }
}
void ElfFile::Section::ReadRelocation(Elf64_Word index, Elf64_Rel* rel) const {
@@ -707,8 +715,10 @@
}
}
+// Iterate over each ELF file, agnostic to whether it is inside a .a (AR) file
+// or not.
template <class Func>
-bool ForEachElf(const InputFile& file, RangeSink* sink, Func func) {
+void ForEachElf(const InputFile& file, RangeSink* sink, Func func) {
ArFile ar_file(file.data());
unsigned long index_base = 0;
@@ -743,18 +753,14 @@
} else {
ElfFile elf(file.data());
if (!elf.IsOpen()) {
- fprintf(stderr, "Not an ELF or Archive file: %s\n",
- file.filename().c_str());
- return false;
+ THROWF("Not an ELF or Archive file: $0", file.filename());
}
func(elf, file.filename(), index_base);
}
-
- return true;
}
-void AddELFFallback(RangeSink* sink) {
+void AddCatchAll(RangeSink* sink) {
ForEachElf(sink->input_file(), sink,
[sink](const ElfFile& elf, string_view /*filename*/,
uint32_t /*index_base*/) {
@@ -827,10 +833,13 @@
for (Elf64_Word i = 1; i < symbol_count; i++) {
Elf64_Sym sym;
- section.ReadSymbol(i, &sym);
- int type = ELF64_ST_TYPE(sym.st_info);
+ section.ReadSymbol(i, &sym, nullptr);
- if (type != STT_OBJECT && type != STT_FUNC) {
+ if (ELF64_ST_TYPE(sym.st_info) == STT_SECTION) {
+ continue;
+ }
+
+ if (sym.st_shndx == STN_UNDEF) {
continue;
}
@@ -856,15 +865,70 @@
});
}
+// Adds file ranges for the symbol tables and string tables *themselves* (ie.
+// the space that the symtab/strtab take up in the file). This will cover
+// .symtab
+// .strtab
+// .dynsym
+// .dynstr
+static void ReadELFSymbolTables(const InputFile& file, RangeSink* sink) {
+ bool is_object = IsObjectFile(file.data());
+
+ ForEachElf(
+ file, sink,
+ [=](const ElfFile& elf, string_view /*filename*/, uint32_t index_base) {
+ for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
+ ElfFile::Section section;
+ elf.ReadSection(i, §ion);
+
+ if (section.header().sh_type != SHT_SYMTAB &&
+ section.header().sh_type != SHT_DYNSYM) {
+ continue;
+ }
+
+ Elf64_Word symbol_count = section.GetEntryCount();
+
+ // Find the corresponding section where the strings for the symbol
+ // table can be found.
+ ElfFile::Section strtab_section;
+ elf.ReadSection(section.header().sh_link, &strtab_section);
+ if (strtab_section.header().sh_type != SHT_STRTAB) {
+ THROW("symtab section pointed to non-strtab section");
+ }
+
+ for (Elf64_Word i = 1; i < symbol_count; i++) {
+ Elf64_Sym sym;
+ string_view sym_range;
+ section.ReadSymbol(i, &sym, &sym_range);
+
+ if (ELF64_ST_TYPE(sym.st_info) == STT_SECTION ||
+ sym.st_shndx == STN_UNDEF ||
+ sym.st_name == SHN_UNDEF) {
+ continue;
+ }
+
+ string_view name = strtab_section.ReadString(sym.st_name);
+ uint64_t full_addr =
+ ToVMAddr(sym.st_value, index_base + sym.st_shndx, is_object);
+ // Capture the trailing NULL.
+ name = string_view(name.data(), name.size() + 1);
+ sink->AddFileRangeFor(full_addr, name);
+ sink->AddFileRangeFor(full_addr, sym_range);
+ }
+ }
+ });
+}
+
enum ReportSectionsBy {
kReportBySectionName,
+ kReportByEscapedSectionName,
kReportByFlags,
kReportByFilename,
};
-static bool DoReadELFSections(RangeSink* sink, enum ReportSectionsBy report_by) {
+static void DoReadELFSections(RangeSink* sink, enum ReportSectionsBy report_by) {
bool is_object = IsObjectFile(sink->input_file().data());
- return ForEachElf(
+ ForEachElf(
sink->input_file(), sink,
[=](const ElfFile& elf, string_view filename, uint32_t index_base) {
std::string name_from_flags;
@@ -908,6 +972,9 @@
sink->AddRange(name_from_flags, full_addr, vmsize, contents);
} else if (report_by == kReportBySectionName) {
sink->AddRange(name, full_addr, vmsize, contents);
+ } else if (report_by == kReportByEscapedSectionName) {
+ sink->AddRange(std::string("[section ") + std::string(name) + "]",
+ full_addr, vmsize, contents);
} else if (report_by == kReportByFilename) {
sink->AddRange(filename, full_addr, vmsize, contents);
}
@@ -988,6 +1055,14 @@
dwarf->debug_abbrev = section.contents();
} else if (name == ".debug_line") {
dwarf->debug_line = section.contents();
+ } else if (name == ".debug_loc") {
+ dwarf->debug_loc = section.contents();
+ } else if (name == ".debug_pubnames") {
+ dwarf->debug_pubnames = section.contents();
+ } else if (name == ".debug_pubtypes") {
+ dwarf->debug_pubtypes = section.contents();
+ } else if (name == ".debug_ranges") {
+ dwarf->debug_ranges = section.contents();
}
}
}
@@ -1090,6 +1165,8 @@
case DataSource::kShortSymbols:
case DataSource::kFullSymbols:
ReadELFSymbols(debug_file().file_data(), sink, nullptr);
+ ReadELFSymbolTables(sink->input_file(), sink);
+ DoReadELFSections(sink, kReportByEscapedSectionName);
break;
case DataSource::kArchiveMembers:
DoReadELFSections(sink, kReportByFilename);
@@ -1097,10 +1174,18 @@
case DataSource::kCompileUnits: {
CheckNotObject("compileunits", sink);
SymbolTable symtab;
- ReadELFSymbols(debug_file().file_data(), nullptr, &symtab);
+ DualMap symbol_map;
+ NameMunger empty_munger;
+ RangeSink symbol_sink(&debug_file().file_data(),
+ DataSource::kRawSymbols,
+ &sinks[0]->MapAtIndex(0));
+ symbol_sink.AddOutput(&symbol_map, &empty_munger);
+ ReadELFSymbols(debug_file().file_data(), &symbol_sink, &symtab);
dwarf::File dwarf;
ReadDWARFSections(debug_file().file_data(), &dwarf);
- ReadDWARFCompileUnits(dwarf, symtab, sink);
+ ReadDWARFCompileUnits(dwarf, symtab, symbol_map, sink);
+ ReadELFSymbolTables(sink->input_file(), sink);
+ DoReadELFSections(sink, kReportByEscapedSectionName);
break;
}
case DataSource::kInlines: {
@@ -1108,13 +1193,16 @@
dwarf::File dwarf;
ReadDWARFSections(debug_file().file_data(), &dwarf);
ReadDWARFInlines(dwarf, sink, true);
+ DoReadELFSections(sink, kReportByEscapedSectionName);
break;
}
default:
THROW("unknown data source");
}
- AddELFFallback(sink);
+ // Add these *after* processing all other data sources.
+ AddCatchAll(sink);
}
+
}
bool GetDisassemblyInfo(absl::string_view symbol, DataSource symbol_source,