| // Copyright 2016 Google Inc. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include <assert.h> |
| #include <stdio.h> |
| |
| #include <algorithm> |
| #include <initializer_list> |
| #include <iostream> |
| #include <memory> |
| #include <stack> |
| #include <unordered_map> |
| #include <unordered_set> |
| #include <vector> |
| |
| #include "bloaty.h" |
| #include "dwarf_constants.h" |
| #include "re2/re2.h" |
| |
| using namespace dwarf2reader; |
| |
| static size_t AlignUpTo(size_t offset, size_t granularity) { |
| // Granularity must be a power of two. |
| return (offset + granularity - 1) & ~(granularity - 1); |
| } |
| |
| namespace bloaty { |
| namespace dwarf { |
| |
| int DivRoundUp(int n, int d) { |
| return (n + (d - 1)) / d; |
| } |
| |
| #define CHECK_RETURN(call) if (!(call)) { return false; } |
| #define CHECK_RETURN_STRINGPIECE(call) if (!(call)) { return StringPiece(); } |
| |
| |
| // Low-level Parsing Routines ////////////////////////////////////////////////// |
| |
| // For parsing the low-level values found in DWARF files. These are the only |
| // routines that touch the bytes of the input buffer directly. Everything else |
| // is layered on top of these. |
| |
| template <class T> |
| bool ReadMemcpy(StringPiece* data, T* val) { |
| CHECK_RETURN(data->size() >= sizeof(T)); |
| memcpy(val, data->data(), sizeof(T)); |
| data->remove_prefix(sizeof(T)); |
| return true; |
| } |
| |
| bool ReadPiece(size_t bytes, StringPiece* data, StringPiece* val) { |
| CHECK_RETURN(data->size() >= bytes); |
| *val = data->substr(0, bytes); |
| data->remove_prefix(bytes); |
| return true; |
| } |
| |
| bool SkipBytes(size_t bytes, StringPiece* data) { |
| CHECK_RETURN(data->size() >= bytes); |
| data->remove_prefix(bytes); |
| return true; |
| } |
| |
| bool ReadNullTerminated(StringPiece* data, StringPiece* val) { |
| const char* nullz = |
| static_cast<const char*>(memchr(data->data(), '\0', data->size())); |
| |
| // Return false if not NULL-terminated. |
| CHECK_RETURN(nullz != NULL); |
| |
| size_t len = nullz - data->data(); |
| *val = data->substr(0, len); |
| data->remove_prefix(len + 1); // Remove NULL also. |
| return true; |
| } |
| |
| // Parses the LEB128 format defined by DWARF (both signed and unsigned |
| // versions). |
| |
| template <class T> |
| typename std::enable_if<std::is_unsigned<T>::value, bool>::type ReadLEB128( |
| StringPiece* data, T* out) { |
| uint64_t ret = 0; |
| int shift = 0; |
| int maxshift = 70; |
| const char* ptr = data->data(); |
| const char* limit = ptr + data->size(); |
| |
| for (; ptr < limit && shift < maxshift; shift += 7) { |
| char byte = *(ptr++); |
| ret |= (byte & 0x7f) << shift; |
| if ((byte & 0x80) == 0) { |
| data->remove_prefix(ptr - data->data()); |
| if (ret > std::numeric_limits<T>::max()) { |
| fprintf(stderr, |
| "DWARF data contained larger LEB128 than we were expecting.\n"); |
| return false; |
| } |
| *out = static_cast<T>(ret); |
| return true; |
| } |
| } |
| |
| fprintf(stderr, "Corrupt DWARF data, unterminated LEB128.\n"); |
| return false; |
| } |
| |
| template <class T> |
| typename std::enable_if<std::is_signed<T>::value, bool>::type ReadLEB128( |
| StringPiece* data, T* out) { |
| int64_t ret = 0; |
| int shift = 0; |
| int maxshift = 70; |
| const char* ptr = data->data(); |
| const char* limit = ptr + data->size(); |
| |
| while (ptr < limit && shift < maxshift) { |
| char byte = *(ptr++); |
| ret |= (byte & 0x7f) << shift; |
| shift += 7; |
| if ((byte & 0x80) == 0) { |
| data->remove_prefix(ptr - data->data()); |
| if (byte & 0x40) { |
| ret |= -(1 << shift); |
| } |
| if (ret > std::numeric_limits<T>::max() || |
| ret < std::numeric_limits<T>::min()) { |
| fprintf(stderr, |
| "DWARF data contained larger LEB128 than we were expecting.\n"); |
| return false; |
| } |
| *out = ret; |
| return true; |
| } |
| } |
| |
| fprintf(stderr, "Corrupt DWARF data, unterminated LEB128.\n"); |
| return false; |
| } |
| |
| bool SkipLEB128(StringPiece* data) { |
| size_t limit = |
| std::min(static_cast<size_t>(data->size()), static_cast<size_t>(10)); |
| for (size_t i = 0; i < limit; i++) { |
| if (((*data)[i] & 0x80) == 0) { |
| data->remove_prefix(i + 1); |
| return true; |
| } |
| } |
| |
| fprintf(stderr, "Corrupt DWARF data, unterminated LEB128.\n"); |
| return false; |
| } |
| |
| // Some size information attached to each compilation unit. The size of an |
| // address or offset in the DWARF data depends on this state which is parsed |
| // from the header. |
| struct CompilationUnitSizes { |
| // When true, DWARF offsets are 64 bits, otherwise they are 32 bit. |
| bool dwarf64; |
| |
| // The size of addresses. |
| uint8_t address_size; |
| |
| // To allow this as the key in a map. |
| bool operator<(const CompilationUnitSizes& rhs) const { |
| return std::tie(dwarf64, address_size) < |
| std::tie(rhs.dwarf64, rhs.address_size); |
| } |
| |
| // Reads a DWARF offset based on whether we are reading dwarf32 or dwarf64 |
| // format. |
| bool ReadDWARFOffset(StringPiece* data, uint64_t* ofs) const { |
| if (dwarf64) { |
| return ReadMemcpy(data, ofs); |
| } else { |
| uint32_t ofs32; |
| CHECK_RETURN(ReadMemcpy(data, &ofs32)); |
| *ofs = ofs32; |
| return true; |
| } |
| } |
| |
| // Reads an address according to the expected address_size. |
| bool ReadAddress(StringPiece* data, uint64_t* addr) const { |
| if (address_size == 8) { |
| return ReadMemcpy(data, addr); |
| } else if (address_size == 4) { |
| uint32_t addr32; |
| CHECK_RETURN(ReadMemcpy(data, &addr32)); |
| *addr = addr32; |
| return true; |
| } else { |
| fprintf(stderr, "bloaty: unexpected address size: %d\n", |
| static_cast<int>(address_size)); |
| return false; |
| } |
| } |
| |
| // Reads an "initial length" as specified in many DWARF headers. This |
| // contains either a 32-bit or a 64-bit length, and signals whether we are |
| // using the 32-bit or 64-bit DWARF format (so it sets dwarf64 appropriately). |
| // |
| // Stores the range for this section in |data| and all of the remaining data |
| // in |next|. |
| bool ReadInitialLength(StringPiece* data, StringPiece* next) { |
| uint64_t len; |
| uint32_t len32; |
| CHECK_RETURN(ReadMemcpy(data, &len32)); |
| |
| if (len32 == 0xffffffff) { |
| dwarf64 = true; |
| CHECK_RETURN(ReadMemcpy(data, &len)); |
| } else { |
| dwarf64 = false; |
| len = len32; |
| } |
| |
| CHECK_RETURN(data->size() >= len); |
| |
| if (next) *next = data->substr(len); |
| data->remove_suffix(data->size() - len); |
| return true; |
| } |
| }; |
| |
| |
| // AbbrevTable ///////////////////////////////////////////////////////////////// |
| |
| // Parses and stores a representation of (a portion of) the .debug_abbrev |
| // section of a DWARF file. An abbreviation is defined by a unique "code" |
| // (unique within one table), and defines the DIE tag and set of attributes. |
| // The encoding of the DIE then contains just the abbreviation code and the |
| // attribute values -- thanks to the abbreviation table, the tag and attribute |
| // keys/names are not required. |
| // |
| // The abbreviations are an internal detail of the DWARF format and users should |
| // not need to care about them. |
| |
| class AbbrevTable { |
| public: |
| // Reads abbreviations until a terminating abbreviation is seen. Returns |
| // false if there is a parse error or a premature EOF. |
| bool ReadAbbrevs(StringPiece data); |
| |
| // In a DWARF abbreviation, each attribute has a name and a form. |
| struct Attribute { |
| uint16_t name; |
| uint8_t form; |
| }; |
| |
| // The representation of a single abbreviation. |
| struct Abbrev { |
| uint32_t code; |
| uint16_t tag; |
| bool has_child; |
| std::vector<Attribute> attr; |
| }; |
| |
| bool IsEmpty() const { return abbrev_.empty(); } |
| |
| // Looks for an abbreviation with the given code. Returns true if the lookup |
| // succeeded. |
| bool GetAbbrev(uint32_t code, const Abbrev** abbrev) const { |
| auto it = abbrev_.find(code); |
| if (it != abbrev_.end()) { |
| *abbrev = &it->second; |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| private: |
| // Keyed by abbreviation code. |
| // Generally we expect these to be small, so we could almost use a vector<>. |
| // But you never know what crazy input data is going to do... |
| std::unordered_map<uint32_t, Abbrev> abbrev_; |
| }; |
| |
| bool AbbrevTable::ReadAbbrevs(StringPiece data) { |
| while (true) { |
| uint32_t code; |
| CHECK_RETURN(ReadLEB128(&data, &code)); |
| |
| if (code == 0) { |
| return true; // Terminator entry. |
| } |
| |
| Abbrev& abbrev = abbrev_[code]; |
| |
| if (abbrev.code) { |
| fprintf(stderr, "bloaty: DWARF data contained duplicate abbrev code.\n"); |
| return false; |
| } |
| |
| uint8_t has_child; |
| |
| abbrev.code = code; |
| CHECK_RETURN(ReadLEB128(&data, &abbrev.tag)); |
| CHECK_RETURN(ReadMemcpy(&data, &has_child)); |
| |
| switch (has_child) { |
| case DW_children_yes: |
| abbrev.has_child = true; |
| break; |
| case DW_children_no: |
| abbrev.has_child = false; |
| break; |
| default: |
| return false; |
| } |
| |
| while (true) { |
| Attribute attr; |
| CHECK_RETURN(ReadLEB128(&data, &attr.name)); |
| CHECK_RETURN(ReadLEB128(&data, &attr.form)); |
| |
| if (attr.name == 0 && attr.form == 0) { |
| break; // End of this abbrev |
| } |
| |
| abbrev.attr.push_back(attr); |
| } |
| } |
| } |
| |
| |
| // StringTable ///////////////////////////////////////////////////////////////// |
| |
| // Represents the .debug_str portion of a DWARF file and contains code for |
| // reading strings out of it. This is an internal detail of the DWARF format |
| // and users should not need to care about it. |
| |
| class StringTable { |
| public: |
| // Construct with the debug_str data from a DWARF file. |
| StringTable(StringPiece debug_str) : debug_str_(debug_str) {} |
| |
| // Read a string from the table. |
| bool ReadEntry(size_t ofs, StringPiece* val) const; |
| |
| private: |
| StringPiece debug_str_; |
| }; |
| |
| bool StringTable::ReadEntry(size_t ofs, StringPiece* val) const { |
| CHECK_RETURN(ofs < debug_str_.size()); |
| StringPiece str = debug_str_.substr(ofs); |
| CHECK_RETURN(ReadNullTerminated(&str, val)); |
| return true; |
| } |
| |
| |
| // AddressRanges /////////////////////////////////////////////////////////////// |
| |
| // Code for reading address ranges out of .debug_aranges. |
| |
| class AddressRanges { |
| public: |
| AddressRanges(StringPiece data) : section_(data), next_unit_(data) {} |
| |
| // Offset into .debug_info for the current compilation unit. |
| uint64_t debug_info_offset() { return debug_info_offset_; } |
| |
| // Address and length for this range. |
| uint64_t address() { return address_; } |
| uint64_t length() { return length_; } |
| |
| // Advance to the next range. The values will be available in address() and |
| // length(). Returns false when the end of this compilation unit is hit. |
| // Must call this once before reading the first range. |
| bool NextRange(); |
| |
| // Advance to the next compilation unit. The unit offset will be available in |
| // debug_info_offset(). Must call this once before reading the first unit. |
| bool NextUnit(); |
| |
| private: |
| CompilationUnitSizes sizes_; |
| StringPiece section_; |
| StringPiece unit_remaining_; |
| StringPiece next_unit_; |
| uint64_t debug_info_offset_; |
| uint64_t address_; |
| uint64_t length_; |
| }; |
| |
| bool AddressRanges::NextRange() { |
| CHECK_RETURN(sizes_.ReadAddress(&unit_remaining_, &address_)); |
| CHECK_RETURN(sizes_.ReadAddress(&unit_remaining_, &length_)); |
| return true; |
| } |
| |
| bool AddressRanges::NextUnit() { |
| unit_remaining_ = next_unit_; |
| CHECK_RETURN(sizes_.ReadInitialLength(&unit_remaining_, &next_unit_)); |
| |
| uint16_t version; |
| CHECK_RETURN(ReadMemcpy(&unit_remaining_, &version)); |
| |
| if (version > 2) { |
| fprintf(stderr, "bloaty: DWARF data is too new for us.\n"); |
| return false; |
| } |
| |
| CHECK_RETURN(sizes_.ReadDWARFOffset(&unit_remaining_, &debug_info_offset_)); |
| |
| uint8_t segment_size; |
| |
| CHECK_RETURN(ReadMemcpy(&unit_remaining_, &sizes_.address_size)); |
| CHECK_RETURN(ReadMemcpy(&unit_remaining_, &segment_size)); |
| |
| if (segment_size) { |
| fprintf(stderr, |
| "bloaty: we don't know how to handle segmented addresses.\n"); |
| return false; |
| } |
| |
| size_t ofs = unit_remaining_.data() - section_.data(); |
| size_t aligned_ofs = AlignUpTo(ofs, sizes_.address_size * 2); |
| unit_remaining_.remove_prefix(aligned_ofs - ofs); |
| |
| return true; |
| } |
| |
| |
| // DIEReader /////////////////////////////////////////////////////////////////// |
| |
| // Reads a sequence of DWARF DIE's (Debugging Information Entries) from the |
| // .debug_info or .debug_types section of a binary. |
| // |
| // Each DIE contains a tag and a set of attribute/value pairs. We rely on the |
| // abbreviations in an AbbrevTable to decode the DIEs. |
| |
| template <class T, class Enable = void> |
| class FormReader; |
| |
| class DIEReader { |
| public: |
| // Constructs a new DIEReader. Cannot be used until you call one of the |
| // Seek() methods below. |
| DIEReader(const File& file) : dwarf_(file) {} |
| |
| // Returns true if we are at the end of DIEs for the current depth and no |
| // error occurred. |
| bool IsEof() const { return state_ == State::kEof; } |
| |
| // Returns true if an error has occurred in reading. |
| bool IsError() const { return state_ == State::kError; } |
| |
| // DIEs exist in both .debug_info and .debug_types. |
| enum class Section { |
| kDebugInfo, |
| kDebugTypes |
| }; |
| |
| // Seeks to the overall start or the start of a specific compilation unit. |
| // Note that |header_offset| is the offset of the compilation unit *header*, |
| // not the offset of the first DIE. |
| bool SeekToCompilationUnit(Section section, uint64_t header_offset); |
| bool SeekToStart(Section section) { |
| return SeekToCompilationUnit(section, 0); |
| } |
| |
| bool NextCompilationUnit(); |
| |
| // Advances to the next overall DIE, ignoring whether it happens to be a |
| // child, a sibling, or an uncle/aunt. Returns false at error or EOF. |
| bool NextDIE(); |
| |
| const AbbrevTable::Abbrev& GetAbbrev() const { |
| assert(!IsEof()); |
| return *current_abbrev_; |
| } |
| |
| // Returns the tag of the current DIE. |
| // Requires that ReadCode() has been called at least once. |
| uint16_t GetTag() const { return GetAbbrev().tag; } |
| |
| // Returns whether the current DIE has a child. |
| // Requires that ReadCode() has been called at least once. |
| bool HasChild() const { return GetAbbrev().has_child; } |
| |
| const File& dwarf() const { return dwarf_; } |
| |
| CompilationUnitSizes unit_sizes() const { return unit_sizes_; } |
| uint32_t abbrev_version() const { return abbrev_version_; } |
| |
| private: |
| BLOATY_DISALLOW_COPY_AND_ASSIGN(DIEReader); |
| |
| template<typename...> friend class FixedAttrReader; |
| |
| // APIs for our friends to use to update our state. |
| |
| // Call to get the current read head where attributes should be parsed. |
| StringPiece ReadAttributesBegin() { |
| assert(state_ == State::kReadyToReadAttributes); |
| return remaining_; |
| } |
| |
| // When some data has been parsed, this updates our read head. |
| bool ReadAttributesEnd(StringPiece remaining, uint64_t sibling) { |
| assert(state_ == State::kReadyToReadAttributes); |
| if (remaining.data() == nullptr) { |
| state_ = State::kError; |
| return false; |
| } else { |
| remaining_ = remaining; |
| sibling_offset_ = sibling; |
| state_ = State::kReadyToNext; |
| return true; |
| } |
| } |
| |
| // Internal APIs. |
| |
| bool ReadCompilationUnitHeader(StringPiece data); |
| bool ReadCode(); |
| |
| enum class State { |
| kReadyToReadAttributes, |
| kReadyToNext, |
| kEof, |
| kError |
| } state_; |
| |
| std::string error_; |
| |
| const File& dwarf_; |
| |
| // Abbreviation for the current entry. |
| const AbbrevTable::Abbrev* current_abbrev_; |
| |
| // Our current read position. |
| StringPiece remaining_; |
| uint64_t sibling_offset_; |
| |
| // The read position of the next entry at each level, or size()==0 for levels |
| // where we don't know (because we're not at the top-level and the previous |
| // DIE didn't include DW_AT_sibling). Length of this array indicates the |
| // current depth. |
| StringPiece next_unit_; |
| |
| // All of the AbbrevTables we've read from .debug_abbrev, indexed by their |
| // offset within .debug_abbrev. |
| std::unordered_map<uint64_t, AbbrevTable> abbrev_tables_; |
| |
| // Whether we are in .debug_types or .debug_info. |
| Section section_; |
| |
| // Information about the current compilation unit. |
| StringPiece unit_data_; |
| CompilationUnitSizes unit_sizes_; |
| AbbrevTable* unit_abbrev_; |
| |
| // A small integer that uniquely identifies the combination of unit_abbrev_ |
| // and unit_sizes_. Attribute readers use this to know when they can reuse an |
| // existing (abbrev code) -> (Actions) mapping, since this table depends on |
| // both the current abbrev. table and the sizes. |
| uint32_t abbrev_version_; |
| |
| std::map<std::pair<AbbrevTable*, CompilationUnitSizes>, uint32_t> |
| abbrev_versions_; |
| |
| // Only for .debug_types |
| uint64_t unit_type_signature_; |
| uint64_t unit_type_offset_; |
| }; |
| |
| bool DIEReader::ReadCode() { |
| uint32_t code; |
| state_ = State::kError; |
| StringPiece data = remaining_; |
| |
| CHECK_RETURN(ReadLEB128(&data, &code)); |
| |
| if (code == 0) { |
| remaining_ = data; |
| state_ = State::kEof; |
| return false; |
| } else { |
| CHECK_RETURN(unit_abbrev_->GetAbbrev(code, ¤t_abbrev_)); |
| remaining_ = data; |
| state_ = State::kReadyToReadAttributes; |
| sibling_offset_ = 0; |
| return true; |
| } |
| } |
| |
| bool DIEReader::NextCompilationUnit() { |
| if (next_unit_.size() == 0) { |
| state_ = State::kEof; |
| return false; |
| } |
| |
| CHECK_RETURN(ReadCompilationUnitHeader(next_unit_)); |
| CHECK_RETURN(ReadCode()); |
| return true; |
| } |
| |
| bool DIEReader::NextDIE() { |
| do { |
| if (remaining_.size() == 0) { |
| state_ = State::kEof; |
| return false; |
| } |
| ReadCode(); |
| } while (state_ == State::kEof); |
| |
| return state_ == State::kReadyToReadAttributes; |
| } |
| |
| bool DIEReader::SeekToCompilationUnit(Section section, uint64_t offset) { |
| StringPiece data; |
| section_ = section; |
| |
| if (section == Section::kDebugInfo) { |
| data = dwarf_.debug_info; |
| } else { |
| data = dwarf_.debug_types; |
| } |
| |
| CHECK_RETURN(offset < data.size()); |
| data.remove_prefix(offset); |
| CHECK_RETURN(ReadCompilationUnitHeader(data)); |
| CHECK_RETURN(ReadCode()); |
| |
| return true; |
| } |
| |
| bool DIEReader::ReadCompilationUnitHeader(StringPiece data) { |
| if (data.size() == 0) { |
| state_ = State::kEof; |
| return false; |
| } |
| |
| StringPiece unit_data = data; |
| StringPiece next_unit; |
| unit_sizes_.ReadInitialLength(&data, &next_unit); |
| |
| uint16_t version; |
| CHECK_RETURN(ReadMemcpy(&data, &version)); |
| |
| if (version > 4) { |
| fprintf(stderr, "Data is in new DWARF format we don't understand.\n"); |
| return false; |
| } |
| |
| uint64_t debug_abbrev_offset; |
| CHECK_RETURN(unit_sizes_.ReadDWARFOffset(&data, &debug_abbrev_offset)); |
| unit_abbrev_ = &abbrev_tables_[debug_abbrev_offset]; |
| |
| // If we haven't already read abbreviations for this debug_abbrev_offset, we |
| // need to do so now. |
| if (unit_abbrev_->IsEmpty()) { |
| StringPiece abbrev_data = dwarf_.debug_abbrev; |
| abbrev_data.remove_prefix(debug_abbrev_offset); |
| CHECK_RETURN(unit_abbrev_->ReadAbbrevs(abbrev_data)); |
| } |
| |
| CHECK_RETURN(ReadMemcpy(&data, &unit_sizes_.address_size)); |
| |
| if (section_ == Section::kDebugTypes) { |
| CHECK_RETURN(ReadMemcpy(&data, &unit_type_signature_)); |
| CHECK_RETURN(unit_sizes_.ReadDWARFOffset(&data, &unit_type_offset_)); |
| } |
| |
| unit_data_ = unit_data; |
| remaining_ = data; |
| next_unit_ = next_unit; |
| |
| auto abbrev_id = std::make_pair(unit_abbrev_, unit_sizes_); |
| auto insert_pair = abbrev_versions_.insert( |
| std::make_pair(abbrev_id, abbrev_versions_.size())); |
| |
| // This will be either the newly inserted value or the existing one, if there |
| // was one. |
| abbrev_version_ = insert_pair.first->second; |
| |
| return true; |
| } |
| |
| |
| // FormReader ////////////////////////////////////////////////////////////////// |
| |
| // A mapping of DWARF "forms" into C++ datatypes, and code to parse an attribute |
| // into those C++ types. This is the main parsing code for parsing DIE |
| // attributes, and there's a lot going on here because DWARF specifies a lot of |
| // forms/encodings with ambiguous/overloaded semantics in some cases. |
| // |
| // Note that this code is only concerned with mapping DWARF data into C++. It |
| // is not concerned with any possible *semantic* differences between the forms. |
| // For example, DW_FORM_block and DW_FORM_exprloc both represent delimited |
| // sections of the input, so this code treats them identically (both map to |
| // StringPiece) even though DW_FORM_exprloc carries extra semantic meaning about |
| // the *interpretation* of those bytes. |
| |
| // The type of the decoding function yielded from all GetFunctionForForm() |
| // functions. The return value indicates the data that remains after we parsed |
| // our value out. If return_value.data() == nullptr, there was an error. |
| typedef StringPiece FormDecodeFunc(const DIEReader& reader, StringPiece data, |
| void* val); |
| |
| // Helper to get decoding function as a function pointer. |
| template <class T> |
| FormDecodeFunc* GetFormDecodeFunc(uint8_t form, CompilationUnitSizes sizes) { |
| FormDecodeFunc* func = nullptr; |
| FormReader<T>::GetFunctionForForm(sizes, form, [&func](FormDecodeFunc* f) { |
| func = f; |
| return true; |
| }); |
| return func; |
| } |
| |
| template <class Derived> |
| class FormReaderBase { |
| public: |
| FormReaderBase(const DIEReader& reader, StringPiece data) |
| : reader_(reader), data_(data) {} |
| |
| StringPiece data() const { return data_; } |
| |
| protected: |
| const DIEReader& reader_; |
| StringPiece data_; |
| |
| // Function for parsing a specific, known form. This function compiles into |
| // extremely tight/optimized code for parsing this specific form into one |
| // specific C++ type. |
| template <bool (Derived::*mf)()> |
| static StringPiece ReadAttr(const DIEReader& reader, StringPiece data, |
| void* val) { |
| Derived form_reader(reader, data, |
| static_cast<typename Derived::type*>(val)); |
| if ((form_reader.*mf)() == false) { return StringPiece(); } |
| return form_reader.data(); |
| } |
| |
| // Function for parsing the "indirect" form, which only gives you the concrete |
| // form when you see the data. This compiles into a switch() statement based |
| // on the form we parse. |
| static StringPiece ReadIndirect(const DIEReader& reader, StringPiece data, |
| void* value) { |
| uint16_t form; |
| CHECK_RETURN_STRINGPIECE(ReadLEB128(&data, &form)); |
| CHECK_RETURN_STRINGPIECE(form != DW_FORM_indirect); |
| bool ok = Derived::GetFunctionForForm(reader.unit_sizes(), form, |
| [&](FormDecodeFunc* func) { |
| data = func(reader, data, value); |
| return data.data() != nullptr; |
| }); |
| CHECK_RETURN_STRINGPIECE(ok); |
| return data; |
| } |
| }; |
| |
| // FormReader for StringPiece. We accept the true string forms (DW_FORM_string |
| // and DW_FORM_strp) as well as a number of other forms that contain delimited |
| // string data. We also accept the generic/opaque DW_FORM_data* types; the |
| // StringPiece can store the uninterpreted data which can then be interpreted by |
| // a higher layer. |
| template <> |
| class FormReader<StringPiece> : public FormReaderBase<FormReader<StringPiece>> { |
| public: |
| typedef FormReader ME; |
| typedef FormReaderBase<ME> Base; |
| typedef StringPiece type; |
| using Base::data_; |
| |
| FormReader(const DIEReader& reader, StringPiece data, StringPiece* val) |
| : Base(reader, data), val_(val) {} |
| |
| template <class Func> |
| static bool GetFunctionForForm(CompilationUnitSizes sizes, uint8_t form, |
| Func func) { |
| switch (form) { |
| case DW_FORM_block1: |
| return func(&ReadAttr<&FormReader::ReadBlock<uint8_t>>); |
| case DW_FORM_block2: |
| return func(&ReadAttr<&FormReader::ReadBlock<uint16_t>>); |
| case DW_FORM_block4: |
| return func(&ReadAttr<&FormReader::ReadBlock<uint32_t>>); |
| case DW_FORM_block: |
| case DW_FORM_exprloc: |
| return func(&ReadAttr<&FormReader::ReadVariableBlock>); |
| case DW_FORM_string: |
| return func(&ReadAttr<&FormReader::ReadString>); |
| case DW_FORM_strp: |
| if (sizes.dwarf64) { |
| return func(&ReadAttr<&FormReader::ReadIndirectString<uint64_t>>); |
| } else { |
| return func(&ReadAttr<&FormReader::ReadIndirectString<uint32_t>>); |
| } |
| case DW_FORM_data1: |
| return func(&ReadAttr<&FormReader::ReadFixed<1>>); |
| case DW_FORM_data2: |
| return func(&ReadAttr<&FormReader::ReadFixed<2>>); |
| case DW_FORM_data4: |
| return func(&ReadAttr<&FormReader::ReadFixed<4>>); |
| case DW_FORM_data8: |
| return func(&ReadAttr<&FormReader::ReadFixed<8>>); |
| case DW_FORM_indirect: |
| return func(&FormReader::ReadIndirect); |
| default: |
| return false; |
| } |
| } |
| |
| private: |
| StringPiece* val_; |
| |
| template <size_t N> |
| bool ReadFixed() { |
| return ReadPiece(N, &data_, val_); |
| } |
| |
| template <class D> |
| bool ReadBlock() { |
| D len; |
| CHECK_RETURN(ReadMemcpy(&data_, &len)); |
| CHECK_RETURN(ReadPiece(len, &data_, val_)); |
| return true; |
| } |
| |
| bool ReadVariableBlock() { |
| uint64_t len; |
| CHECK_RETURN(ReadLEB128(&data_, &len)); |
| CHECK_RETURN(ReadPiece(len, &data_, val_)); |
| return true; |
| } |
| |
| bool ReadString() { |
| return ReadNullTerminated(&data_, val_); |
| } |
| |
| template <class D> |
| bool ReadIndirectString() { |
| D ofs; |
| StringTable table(reader_.dwarf().debug_str); |
| CHECK_RETURN(ReadMemcpy(&data_, &ofs)); |
| CHECK_RETURN(table.ReadEntry(ofs, val_)); |
| return true; |
| } |
| }; |
| |
| // FormReader for all integral types. We accept any DW_FORM_data* forms (sign |
| // or zero-extending as necessary), as well as the true integer and address |
| // types. |
| template <class T> |
| class FormReader<T, typename std::enable_if<std::is_integral<T>::value>::type> |
| : public FormReaderBase<FormReader<T>> { |
| public: |
| typedef FormReader ME; |
| typedef FormReaderBase<ME> Base; |
| typedef T type; |
| using Base::data_; |
| |
| FormReader(const DIEReader& reader, StringPiece data, T* val) |
| : Base(reader, data), val_(val) {} |
| |
| template <class Func> |
| static bool GetFunctionForForm(CompilationUnitSizes sizes, uint8_t form, |
| Func func) { |
| switch (form) { |
| case DW_FORM_data1: |
| case DW_FORM_ref1: |
| return func(&Base::template ReadAttr<&ME::ReadFixed<int8_t>>); |
| case DW_FORM_data2: |
| case DW_FORM_ref2: |
| CHECK_RETURN(sizeof(T) >= 2); |
| return func(&Base::template ReadAttr<&ME::ReadFixed<int16_t>>); |
| case DW_FORM_data4: |
| case DW_FORM_ref4: |
| CHECK_RETURN(sizeof(T) >= 4); |
| return func(&Base::template ReadAttr<&ME::ReadFixed<int32_t>>); |
| case DW_FORM_data8: |
| case DW_FORM_ref8: |
| CHECK_RETURN(sizeof(T) >= 8); |
| return func(&Base::template ReadAttr<&ME::ReadFixed<int64_t>>); |
| case DW_FORM_addr: |
| // We require FORM_addr to be parsed into 8 bytes, since there is always |
| // the possibility of running into 64-bit files. |
| CHECK_RETURN(sizeof(T) >= 8); |
| CHECK_RETURN(!std::is_signed<T>::value); |
| if (sizes.address_size == 8) { |
| return func(&Base::template ReadAttr<&ME::ReadFixed<int64_t>>); |
| } else if (sizes.address_size == 4) { |
| return func(&Base::template ReadAttr<&ME::ReadFixed<int32_t>>); |
| } else { |
| return false; |
| } |
| return true; |
| case DW_FORM_sec_offset: |
| // We require FORM_addr to be parsed into 8 bytes, since there is always |
| // the possibility of running into 64-bit files. |
| CHECK_RETURN(sizeof(T) >= 8); |
| CHECK_RETURN(!std::is_signed<T>::value); |
| if (sizes.dwarf64) { |
| return func(&Base::template ReadAttr<&ME::ReadFixed<int64_t>>); |
| } else { |
| return func(&Base::template ReadAttr<&ME::ReadFixed<int32_t>>); |
| } |
| case DW_FORM_sdata: |
| CHECK_RETURN(std::is_signed<T>::value); |
| return func(&Base::template ReadAttr<&ME::ReadVariable>); |
| case DW_FORM_udata: |
| CHECK_RETURN(!std::is_signed<T>::value); |
| return func(&Base::template ReadAttr<&ME::ReadVariable>); |
| case DW_FORM_indirect: |
| return func(&Base::ReadIndirect); |
| default: |
| return false; |
| } |
| } |
| |
| private: |
| T* val_; |
| |
| template <class U> |
| bool ReadFixed() { |
| if (std::is_signed<T>::value) { |
| // I don't know if this case exists or not in practice. Do producers ever |
| // ship a data1 that is meant to represent a signed number? |
| typename std::make_signed<U>::type tmp; |
| CHECK_RETURN(ReadMemcpy(&data_, &tmp)); |
| *val_ = tmp; |
| } else { |
| typename std::make_unsigned<U>::type tmp; |
| CHECK_RETURN(ReadMemcpy(&data_, &tmp)); |
| *val_ = tmp; |
| } |
| return true; |
| } |
| |
| bool ReadVariable() { |
| return ReadLEB128(&data_, val_); |
| } |
| }; |
| |
| // FormReader for bool. The only types we expect for a bool field are |
| // DW_FORM_flag and DW_FORM_flag_present. |
| template <> |
| class FormReader<bool> : public FormReaderBase<FormReader<bool>> { |
| public: |
| typedef FormReader ME; |
| typedef FormReaderBase<ME> Base; |
| typedef bool type; |
| using Base::data_; |
| |
| FormReader(const DIEReader& reader, StringPiece data, bool* val) |
| : Base(reader, data), val_(val) {} |
| |
| template <class Func> |
| static bool GetFunctionForForm(const DIEReader& /*reader*/, uint8_t form, |
| Func func) { |
| switch (form) { |
| case DW_FORM_flag: |
| return func(&Base::template ReadAttr<&FormReader::ReadFlag>); |
| case DW_FORM_flag_present: |
| return func(&Base::template ReadAttr<&FormReader::ReadFlagPresent>); |
| case DW_FORM_indirect: |
| return func(&ME::ReadIndirect); |
| default: |
| return false; |
| } |
| } |
| |
| private: |
| bool* val_; |
| |
| bool ReadFlag() { |
| uint8_t byte; |
| CHECK_RETURN(ReadMemcpy(&data_, &byte)); |
| *val_ = byte; |
| return true; |
| } |
| |
| bool ReadFlagPresent() { |
| *val_ = true; |
| return true; |
| } |
| }; |
| |
| // FormReader for void. For skipping the data instead of reading it somewhere. |
| template <> |
| class FormReader<void> : public FormReaderBase<FormReader<void>> { |
| public: |
| typedef FormReader ME; |
| typedef FormReaderBase<ME> Base; |
| typedef void type; |
| using Base::data_; |
| |
| FormReader(const DIEReader& reader, StringPiece data, void* /*val*/) |
| : Base(reader, data) {} |
| |
| template <class Func> |
| static bool GetFunctionForForm(CompilationUnitSizes sizes, uint8_t form, |
| Func func) { |
| switch (form) { |
| case DW_FORM_flag_present: |
| return func(&Base::template ReadAttr<&ME::DoNothing>); |
| case DW_FORM_data1: |
| case DW_FORM_ref1: |
| case DW_FORM_flag: |
| return func(&Base::template ReadAttr<&ME::SkipFixed<1>>); |
| case DW_FORM_data2: |
| case DW_FORM_ref2: |
| return func(&Base::template ReadAttr<&ME::SkipFixed<2>>); |
| case DW_FORM_data4: |
| case DW_FORM_ref4: |
| return func(&Base::template ReadAttr<&ME::SkipFixed<4>>); |
| case DW_FORM_data8: |
| case DW_FORM_ref8: |
| case DW_FORM_ref_sig8: |
| return func(&Base::template ReadAttr<&ME::SkipFixed<8>>); |
| case DW_FORM_addr: |
| case DW_FORM_ref_addr: |
| if (sizes.address_size) { |
| return func(&Base::template ReadAttr<&ME::SkipFixed<8>>); |
| } else if (sizes.address_size == 4) { |
| return func(&Base::template ReadAttr<&ME::SkipFixed<4>>); |
| } else { |
| return false; |
| } |
| case DW_FORM_sec_offset: |
| case DW_FORM_strp: |
| if (sizes.dwarf64) { |
| return func(&Base::template ReadAttr<&ME::SkipFixed<8>>); |
| } else { |
| return func(&Base::template ReadAttr<&ME::SkipFixed<4>>); |
| } |
| case DW_FORM_sdata: |
| case DW_FORM_udata: |
| case DW_FORM_ref_udata: |
| return func(&Base::template ReadAttr<&ME::SkipVariable>); |
| return true; |
| case DW_FORM_block1: |
| return func(&Base::template ReadAttr<&ME::SkipBlock<uint8_t>>); |
| return true; |
| case DW_FORM_block2: |
| return func(&Base::template ReadAttr<&ME::SkipBlock<uint16_t>>); |
| case DW_FORM_block4: |
| return func(&Base::template ReadAttr<&ME::SkipBlock<uint32_t>>); |
| case DW_FORM_block: |
| case DW_FORM_exprloc: |
| return func(&Base::template ReadAttr<&ME::SkipVariableBlock>); |
| case DW_FORM_string: |
| return func(&Base::template ReadAttr<&ME::SkipString>); |
| case DW_FORM_indirect: |
| return func(&ME::ReadIndirect); |
| default: |
| return false; |
| } |
| } |
| |
| private: |
| bool DoNothing() { return true; } |
| |
| template <size_t N> |
| bool SkipFixed() { |
| return SkipBytes(N, &data_); |
| } |
| |
| bool SkipVariable() { |
| return SkipLEB128(&data_); |
| } |
| |
| template <class D> |
| bool SkipBlock() { |
| D len; |
| CHECK_RETURN(ReadMemcpy(&data_, &len)); |
| CHECK_RETURN(SkipBytes(len, &data_)); |
| return true; |
| } |
| |
| bool SkipVariableBlock() { |
| uint64_t len; |
| CHECK_RETURN(ReadLEB128(&data_, &len)); |
| CHECK_RETURN(SkipBytes(len, &data_)); |
| return true; |
| } |
| |
| bool SkipString() { |
| const char* nullz = |
| static_cast<const char*>(memchr(data_.data(), '\0', data_.size())); |
| CHECK_RETURN(nullz != NULL); // String must be null terminated. |
| CHECK_RETURN(SkipBytes(nullz - data_.data(), &data_)); |
| return true; |
| } |
| }; |
| |
| |
| // ActionBuf /////////////////////////////////////////////////////////////////// |
| |
| // ActionBuf is an optimized list of decoding functions to call (and pointers to |
| // where to store the data) when a particular abbreviation is seen. It is used |
| // by the attribute readers. |
| |
| class ActionBuf { |
| private: |
| struct AttrAction { |
| AttrAction(FormDecodeFunc* func_, void* data_, bool* has_) |
| : func(func_), data(data_), has(has_) {} |
| FormDecodeFunc* func; |
| void* data; |
| bool* has; |
| }; |
| |
| struct IndexedAction { |
| IndexedAction(size_t index_, FormDecodeFunc* func_, void* data_, bool* has_) |
| : index(index_), action(func_, data_, has_) {} |
| size_t index; // The index where this action should go. |
| AttrAction action; // The action, but func will be nullptr if invalid. |
| }; |
| |
| public: |
| // Build a list of actions to perform for the given abbreviation in a |
| // compilation unit with the given sizes. Any attributes you want to parse |
| // should be listed in "actions" (which came from calling GetAction()). |
| ActionBuf(const AbbrevTable::Abbrev& abbrev, CompilationUnitSizes sizes, |
| std::initializer_list<IndexedAction> actions); |
| |
| // For the given |attr_name| and destination type |T|, destination data |data| |
| // and |has| bool locations, returns an action suitable for passing to the |
| // ActionBuf constructor. |
| template <class T> |
| static IndexedAction GetAction(uint16_t attr_name, |
| const AbbrevTable::Abbrev& abbrev, |
| CompilationUnitSizes sizes, void* data, |
| bool* has); |
| |
| StringPiece ReadAttributes(const DIEReader& reader, StringPiece data) const; |
| |
| private: |
| std::vector<AttrAction> action_list_; |
| }; |
| |
| ActionBuf::ActionBuf(const AbbrevTable::Abbrev& abbrev, |
| CompilationUnitSizes sizes, |
| std::initializer_list<IndexedAction> indexed_actions) { |
| // Initialize list with functions that will just skip the fields. |
| for (size_t i = 0; i < abbrev.attr.size(); i++) { |
| const auto& attr = abbrev.attr[i]; |
| auto func = GetFormDecodeFunc<void>(attr.form, sizes); |
| |
| if (!func) { |
| fprintf(stderr, "bloaty: don't know how to skip DWARF form %d\n", |
| attr.form); |
| exit(1); |
| } |
| |
| action_list_.push_back(AttrAction(func, nullptr, nullptr)); |
| } |
| |
| // Overwrite any entries for attributes we actually want to store somewhere. |
| for (const auto& action : indexed_actions) { |
| if (action.action.func) { |
| assert(action.index < action_list_.size()); |
| if (action_list_[action.index].data) { |
| fprintf(stderr, |
| "bloaty: internal error, specified same DWARF attribute more " |
| "than once\n"); |
| exit(1); |
| } |
| action_list_[action.index] = action.action; |
| } |
| } |
| } |
| |
| template <class T> |
| ActionBuf::IndexedAction ActionBuf::GetAction(uint16_t attr_name, |
| const AbbrevTable::Abbrev& abbrev, |
| CompilationUnitSizes sizes, |
| void* data, bool* has) { |
| for (size_t i = 0; i < abbrev.attr.size(); i++) { |
| if (attr_name == abbrev.attr[i].name) { |
| FormDecodeFunc* func = GetFormDecodeFunc<T>(abbrev.attr[i].form, sizes); |
| |
| if (!func) { |
| fprintf(stderr, |
| "Warning: don't know how to convert form %d to type %s\n", |
| abbrev.attr[i].form, typeid(T).name()); |
| } |
| |
| return IndexedAction(i, func, data, has); |
| } |
| } |
| |
| // This attribute doesn't occur. |
| return IndexedAction(0, nullptr, nullptr, nullptr); |
| } |
| |
| // The fast path function that reads all attributes by simply calling a list of |
| // function pointers to super-specialized functions. |
| StringPiece ActionBuf::ReadAttributes(const DIEReader& reader, |
| StringPiece data) const { |
| for (const auto& action : action_list_) { |
| assert(action.func); |
| data = action.func(reader, data, action.data); |
| if (data.data() == nullptr) { |
| return data; // Propagate error. |
| } |
| if (action.has) { |
| *action.has = true; |
| } |
| } |
| return data; |
| } |
| |
| |
| // FixedAttrReader ///////////////////////////////////////////////////////////// |
| |
| // Parses a DIE's attributes into a tuple of values. The user specifies the |
| // attributes they are expecting to see and the C++ types they want to parse |
| // into. Any attributes that we don't list, or that have a type that doesn't |
| // fit our expected type, are skipped/ignored. This is more convenient and more |
| // efficient than parsing all attributes into a generic representation and then |
| // selecting/converting them in a second phase. |
| // |
| // For the moment we don't distinguish between "data was not present" and "data |
| // was present but in a bad form." |
| |
| template <class... Args> |
| class FixedAttrReader { |
| public: |
| typedef std::tuple<Args...> ValueTuple; |
| |
| // Constructs a decoder for the given attributes. We will accept any |
| // attribute forms that can decode to our target types (the template params on |
| // this class). If we want to be more restrictive about this later, we could |
| // let users specify that only certain forms should be allowed. |
| template <size_t N> |
| FixedAttrReader(DIEReader* /*reader*/, const DwarfAttribute (&attributes)[N]) { |
| static_assert(N == sizeof...(Args), "must match number of template params"); |
| std::copy(std::begin(attributes), std::end(attributes), |
| std::begin(attributes_)); |
| } |
| |
| FixedAttrReader(DIEReader* /*reader*/, |
| std::initializer_list<DwarfAttribute> attributes) { |
| assert(attributes.size() == sizeof...(Args)); |
| std::copy(std::begin(attributes), std::end(attributes), |
| std::begin(attributes_)); |
| } |
| |
| // Returns true if the DIE's attributes were successfully parsed and all |
| // expected attributes were present. The values are available from values(). |
| // |
| // If we wanted to allow some parameters to be optional, we could support |
| // having params have an optional<> type. |
| bool ReadAttributes(DIEReader* reader) { |
| StringPiece data = reader->ReadAttributesBegin(); |
| |
| // Clear all existing attributes. |
| values_ = std::tuple<Args...>(); |
| memset(&has_attr_, 0, sizeof...(Args)); |
| |
| // Parse all attributes. |
| data = GetActionBuf(*reader).ReadAttributes(*reader, data); |
| return reader->ReadAttributesEnd(data, 0); |
| } |
| |
| template <size_t N> |
| bool HasAttribute() const { |
| static_assert(N < sizeof...(Args), "index too large"); |
| return has_attr_[N]; |
| } |
| |
| template <size_t N> |
| typename std::tuple_element<N, ValueTuple>::type GetAttribute() const { |
| return std::get<N>(values_); |
| } |
| |
| const ValueTuple& values() const { return values_; } |
| |
| private: |
| static const size_t kCount = sizeof...(Args); |
| |
| // Template to generate a compile-time sequence of integers, so we can do |
| // "foreach element in the tuple". |
| // |
| // With C++14 we'll be able to use simple std:index_sequence instead of these |
| // custom sequence-making templates. |
| template <size_t... Indexes> |
| struct IndexSequence {}; |
| |
| template <size_t N, size_t... Indexes> |
| struct MakeIndexSequence : MakeIndexSequence<N - 1, N - 1, Indexes...> {}; |
| |
| template <size_t... Indexes> |
| struct MakeIndexSequence<0, Indexes...> : IndexSequence<Indexes...> {}; |
| |
| // Keyed by abbrev code, this stores a list of attribute actions and |
| // associated data pointers. |
| typedef std::unordered_map<uint32_t, ActionBuf> AbbrevCodeMap; |
| |
| const ActionBuf& GetActionBuf(const DIEReader& reader) { |
| if (actions_.size() <= reader.abbrev_version()) { |
| actions_.resize(reader.abbrev_version() + 1); |
| } |
| |
| auto code = reader.GetAbbrev().code; |
| auto& map = actions_[reader.abbrev_version()]; |
| auto it = map.find(code); |
| auto sizes = reader.unit_sizes(); |
| |
| if (it == map.end()) { |
| return BuildActionBuf(reader.GetAbbrev(), sizes, |
| MakeIndexSequence<sizeof...(Args)>(), &map); |
| } else { |
| return it->second; |
| } |
| } |
| |
| template <size_t... I> |
| const ActionBuf& BuildActionBuf(const AbbrevTable::Abbrev& abbrev, |
| CompilationUnitSizes sizes, |
| IndexSequence<I...>, AbbrevCodeMap* map); |
| |
| // Specifies for each attribute whether it was present or not. |
| bool has_attr_[sizeof...(Args)]; |
| |
| // The set of attributes we are expecting. |
| DwarfAttribute attributes_[sizeof...(Args)]; |
| |
| // The slots where we store the values we have parsed. |
| ValueTuple values_; |
| |
| // We always store the sibling if we see one. |
| uint64_t sibling_; |
| |
| // Indexed by DIEReader::abbrev_version(), so we have a different code map |
| // when the abbreviation table or compilation unit sizes change. |
| std::vector<AbbrevCodeMap> actions_; |
| }; |
| |
| template <class... Args> |
| template <size_t... I> |
| const ActionBuf& FixedAttrReader<Args...>::BuildActionBuf( |
| const AbbrevTable::Abbrev& abbrev, CompilationUnitSizes sizes, |
| FixedAttrReader<Args...>::IndexSequence<I...>, AbbrevCodeMap* map) { |
| auto actions = { |
| ActionBuf::GetAction<Args>(attributes_[I], abbrev, sizes, |
| &std::get<I>(values_), &has_attr_[I])..., |
| ActionBuf::GetAction<uint64_t>(DW_AT_sibling, abbrev, sizes, &sibling_, |
| nullptr)}; |
| auto pair = |
| map->emplace(std::piecewise_construct, std::make_tuple(abbrev.code), |
| std::make_tuple(abbrev, sizes, actions)); |
| |
| // Must have inserted. |
| assert(pair.second); |
| return pair.first->second; |
| } |
| |
| |
| // LineInfoReader ////////////////////////////////////////////////////////////// |
| |
| // Code to read the .line_info programs in a DWARF file. |
| |
| class LineInfoReader { |
| public: |
| LineInfoReader(const File& file) : file_(file), info_(0) {} |
| |
| struct LineInfo { |
| LineInfo(bool default_is_stmt) : is_stmt(default_is_stmt) {} |
| uint64_t address = 0; |
| uint32_t file = 1; |
| uint32_t line = 1; |
| uint32_t column = 0; |
| uint32_t discriminator = 0; |
| bool end_sequence = false; |
| bool basic_block = false; |
| bool prologue_end = false; |
| bool epilogue_begin = false; |
| bool is_stmt; |
| uint8_t op_index = 0; |
| uint8_t isa = 0; |
| }; |
| |
| struct FileName { |
| StringPiece name; |
| uint32_t directory_index; |
| uint64_t modified_time; |
| uint64_t file_size; |
| }; |
| |
| bool SeekToOffset(uint64_t offset, uint8_t address_size); |
| bool ReadLineInfo(); |
| const LineInfo& lineinfo() const { return info_; } |
| const FileName& filename(size_t i) const { return file_names_[i]; } |
| StringPiece include_directory(size_t i) const { |
| return include_directories_[i]; |
| } |
| |
| private: |
| struct Params { |
| uint8_t minimum_instruction_length; |
| uint8_t maximum_operations_per_instruction; |
| uint8_t default_is_stmt; |
| int8_t line_base; |
| uint8_t line_range; |
| uint8_t opcode_base; |
| } params_; |
| |
| const File& file_; |
| |
| CompilationUnitSizes sizes_; |
| std::vector<StringPiece> include_directories_; |
| std::vector<FileName> file_names_; |
| std::vector<uint8_t> standard_opcode_lengths_; |
| |
| StringPiece program_; |
| StringPiece remaining_; |
| |
| // Whether we are in a "shadow" part of the bytecode program. Sometimes parts |
| // of the line info program make it into the final binary even though the |
| // corresponding code was stripped. We can tell when this happened by looking |
| // for DW_LNE_set_address ops where the operand is 0. This indicates that a |
| // relocation for that argument never got applied, which probably means that |
| // the code got stripped. |
| // |
| // While this is true, we don't yield any LineInfo entries, because the |
| // "address" value is garbage. |
| bool shadow_; |
| |
| LineInfo info_; |
| |
| void DoAdvance(uint64_t advance, uint8_t max_per_instr) { |
| info_.address += params_.minimum_instruction_length * |
| ((info_.op_index + advance) / max_per_instr); |
| info_.op_index = (info_.op_index + advance) % max_per_instr; |
| } |
| |
| void Advance(uint64_t amount) { |
| if (params_.maximum_operations_per_instruction == 1) { |
| // This is by far the common case (only false on VLIW architectuers), and |
| // this inlining/specialization avoids a costly division. |
| DoAdvance(amount, 1); |
| } else { |
| DoAdvance(amount, params_.maximum_operations_per_instruction); |
| } |
| } |
| |
| uint8_t AdjustedOpcode(uint8_t op) { return op - params_.opcode_base; } |
| |
| void SpecialOpcodeAdvance(uint8_t op) { |
| Advance(AdjustedOpcode(op) / params_.line_range); |
| } |
| }; |
| |
| bool LineInfoReader::SeekToOffset(uint64_t offset, uint8_t address_size) { |
| CHECK_RETURN(file_.debug_line.size() > offset); |
| StringPiece data = file_.debug_line.substr(offset); |
| program_ = data; |
| |
| uint16_t version; |
| uint64_t header_length; |
| sizes_.address_size = address_size; |
| CHECK_RETURN(sizes_.ReadInitialLength(&data, nullptr)); |
| CHECK_RETURN(ReadMemcpy(&data, &version)); |
| CHECK_RETURN(sizes_.ReadDWARFOffset(&data, &header_length)); |
| |
| StringPiece program = data.substr(header_length); |
| |
| CHECK_RETURN(ReadMemcpy(&data, ¶ms_.minimum_instruction_length)); |
| if (version == 4) { |
| CHECK_RETURN( |
| ReadMemcpy(&data, ¶ms_.maximum_operations_per_instruction)); |
| } else { |
| params_.maximum_operations_per_instruction = 1; |
| } |
| CHECK_RETURN(ReadMemcpy(&data, ¶ms_.default_is_stmt)); |
| CHECK_RETURN(ReadMemcpy(&data, ¶ms_.line_base)); |
| CHECK_RETURN(ReadMemcpy(&data, ¶ms_.line_range)); |
| CHECK_RETURN(ReadMemcpy(&data, ¶ms_.opcode_base)); |
| |
| standard_opcode_lengths_.resize(params_.opcode_base); |
| for (size_t i = 1; i < params_.opcode_base; i++) { |
| CHECK_RETURN(ReadMemcpy(&data, &standard_opcode_lengths_[i])); |
| } |
| |
| // Read include_directories. |
| include_directories_.clear(); |
| |
| // Implicit current directory entry. |
| include_directories_.push_back(StringPiece()); |
| |
| while (true) { |
| StringPiece dir; |
| CHECK_RETURN(ReadNullTerminated(&data, &dir)); |
| if (dir.size() == 0) { |
| break; |
| } |
| include_directories_.push_back(dir); |
| } |
| |
| // Read file_names. |
| file_names_.clear(); |
| |
| // Filename 0 is unused. |
| file_names_.push_back(FileName()); |
| while (true) { |
| FileName file_name; |
| CHECK_RETURN(ReadNullTerminated(&data, &file_name.name)); |
| if (file_name.name.size() == 0) { |
| break; |
| } |
| CHECK_RETURN(ReadLEB128(&data, &file_name.directory_index)); |
| CHECK_RETURN(ReadLEB128(&data, &file_name.modified_time)); |
| CHECK_RETURN(ReadLEB128(&data, &file_name.file_size)); |
| file_names_.push_back(file_name); |
| } |
| |
| info_ = LineInfo(params_.default_is_stmt); |
| remaining_ = program; |
| shadow_ = false; |
| return true; |
| } |
| |
| bool LineInfoReader::ReadLineInfo() { |
| // Final step of last DW_LNS_copy / special opcode. |
| info_.discriminator = 0; |
| info_.basic_block = false; |
| info_.prologue_end = false; |
| info_.epilogue_begin = false; |
| |
| // Final step of DW_LNE_end_sequence. |
| info_.end_sequence = false; |
| |
| StringPiece data = remaining_; |
| |
| while (true) { |
| if (data.size() == 0) { |
| remaining_ = data; |
| return false; |
| } |
| |
| uint8_t op; |
| CHECK_RETURN(ReadMemcpy(&data, &op)); |
| |
| if (op >= params_.opcode_base) { |
| SpecialOpcodeAdvance(op); |
| info_.line += |
| params_.line_base + (AdjustedOpcode(op) % params_.line_range); |
| if (!shadow_) { |
| remaining_ = data; |
| return true; |
| } |
| } else { |
| switch (op) { |
| case DW_LNS_extended_op: { |
| uint16_t len; |
| uint8_t extended_op; |
| CHECK_RETURN(ReadLEB128(&data, &len)); |
| CHECK_RETURN(ReadMemcpy(&data, &extended_op)); |
| switch (extended_op) { |
| case DW_LNE_end_sequence: { |
| // Preserve address and set end_sequence, but reset everything |
| // else. |
| uint64_t addr = info_.address; |
| info_ = LineInfo(params_.default_is_stmt); |
| info_.address = addr; |
| info_.end_sequence = true; |
| if (!shadow_) { |
| remaining_ = data; |
| return true; |
| } |
| break; |
| } |
| case DW_LNE_set_address: |
| CHECK_RETURN(sizes_.ReadAddress(&data, &info_.address)); |
| info_.op_index = 0; |
| shadow_ = (info_.address == 0); |
| break; |
| case DW_LNE_define_file: { |
| FileName file_name; |
| CHECK_RETURN(ReadNullTerminated(&data, &file_name.name)); |
| CHECK_RETURN(ReadLEB128(&data, &file_name.directory_index)); |
| CHECK_RETURN(ReadLEB128(&data, &file_name.modified_time)); |
| CHECK_RETURN(ReadLEB128(&data, &file_name.file_size)); |
| file_names_.push_back(file_name); |
| break; |
| } |
| case DW_LNE_set_discriminator: |
| CHECK_RETURN(ReadLEB128(&data, &info_.discriminator)); |
| break; |
| default: |
| // We don't understand this opcode, skip it. |
| CHECK_RETURN(SkipBytes(len, &data)); |
| fprintf(stderr, |
| "bloaty: warning: unknown DWARF line table extended " |
| "opcode: %d\n", |
| extended_op); |
| break; |
| } |
| break; |
| } |
| case DW_LNS_copy: |
| if (!shadow_) { |
| remaining_ = data; |
| return true; |
| } |
| break; |
| case DW_LNS_advance_pc: { |
| uint64_t operand; |
| CHECK_RETURN(ReadLEB128(&data, &operand)); |
| Advance(operand); |
| break; |
| } |
| case DW_LNS_advance_line: { |
| int32_t operand; |
| CHECK_RETURN(ReadLEB128(&data, &operand)); |
| info_.line += operand; |
| break; |
| } |
| case DW_LNS_set_file: { |
| uint32_t operand; |
| CHECK_RETURN(ReadLEB128(&data, &operand)); |
| info_.file = operand; |
| break; |
| } |
| case DW_LNS_set_column: { |
| uint32_t operand; |
| CHECK_RETURN(ReadLEB128(&data, &operand)); |
| info_.column = operand; |
| break; |
| } |
| case DW_LNS_negate_stmt: |
| info_.is_stmt = !info_.is_stmt; |
| break; |
| case DW_LNS_set_basic_block: |
| info_.basic_block = true; |
| break; |
| case DW_LNS_const_add_pc: |
| SpecialOpcodeAdvance(255); |
| break; |
| case DW_LNS_fixed_advance_pc: { |
| uint16_t operand; |
| CHECK_RETURN(ReadMemcpy(&data, &operand)); |
| info_.address += operand; |
| info_.op_index = 0; |
| break; |
| } |
| case DW_LNS_set_prologue_end: |
| info_.prologue_end = true; |
| break; |
| case DW_LNS_set_epilogue_begin: |
| info_.epilogue_begin = true; |
| break; |
| case DW_LNS_set_isa: |
| CHECK_RETURN(ReadLEB128(&data, &info_.isa)); |
| break; |
| default: |
| // Unknown opcode, but we know its length so can skip it. |
| CHECK_RETURN(SkipBytes(standard_opcode_lengths_[op], &data)); |
| fprintf(stderr, |
| "bloaty: warning: unknown DWARF line table opcode: %d\n", op); |
| break; |
| } |
| } |
| } |
| } |
| |
| } // namespace dwarf |
| |
| |
| // Bloaty DWARF Data Sources /////////////////////////////////////////////////// |
| |
| // The DWARF .debug_aranges section should, in theory, give us exactly the |
| // information we need to map file ranges in linked binaries to compilation |
| // units from where that code came. However, .debug_aranges is often incomplete |
| // or missing completely, so we use it as just one of several data sources for |
| // the "compileunits" data source. |
| static bool ReadDWARFAddressRanges(const dwarf::File& file, RangeSink* sink) { |
| // Maps compilation unit offset -> source filename |
| // Lazily initialized. |
| class FilenameMap { |
| public: |
| FilenameMap(const dwarf::File& file) |
| : die_reader_(file), |
| attr_reader_(&die_reader_, {DW_AT_name}), |
| missing_("[DWARF is missing filename]") {} |
| |
| std::string GetFilename(uint64_t compilation_unit_offset) { |
| auto& name = map_[compilation_unit_offset]; |
| if (name.size() == 0) { |
| name = LookupFilename(compilation_unit_offset); |
| } |
| return name; |
| } |
| |
| private: |
| std::string LookupFilename(uint64_t compilation_unit_offset) { |
| auto section = dwarf::DIEReader::Section::kDebugInfo; |
| if (die_reader_.SeekToCompilationUnit(section, compilation_unit_offset) && |
| die_reader_.GetTag() == DW_TAG_compile_unit && |
| attr_reader_.ReadAttributes(&die_reader_) && |
| attr_reader_.HasAttribute<0>()) { |
| return attr_reader_.GetAttribute<0>().as_string(); |
| } else { |
| return missing_; |
| } |
| } |
| |
| dwarf::DIEReader die_reader_; |
| dwarf::FixedAttrReader<StringPiece> attr_reader_; |
| std::unordered_map<uint64_t, std::string> map_; |
| std::string missing_; |
| } map(file); |
| |
| dwarf::AddressRanges ranges(file.debug_aranges); |
| |
| while (ranges.NextUnit()) { |
| std::string filename = map.GetFilename(ranges.debug_info_offset()); |
| |
| while (ranges.NextRange()) { |
| sink->AddVMRangeIgnoreDuplicate(ranges.address(), ranges.length(), |
| filename); |
| } |
| } |
| |
| return true; |
| } |
| |
| // The DWARF debug info can help us get compileunits info. DIEs for compilation |
| // units, functions, and global variables often have attributes that will |
| // resolve to addresses. |
| static bool ReadDWARFDebugInfo(const dwarf::File& file, |
| const SymbolTable& symtab, RangeSink* sink) { |
| dwarf::DIEReader die_reader(file); |
| dwarf::FixedAttrReader<StringPiece, StringPiece, uint64_t, uint64_t> |
| attr_reader(&die_reader, {DW_AT_name, DW_AT_linkage_name, DW_AT_low_pc, |
| DW_AT_high_pc}); |
| |
| CHECK_RETURN(die_reader.SeekToStart(dwarf::DIEReader::Section::kDebugInfo)); |
| |
| do { |
| CHECK_RETURN(attr_reader.ReadAttributes(&die_reader)); |
| std::string name = attr_reader.GetAttribute<0>().as_string(); |
| if (name.empty()) { |
| continue; |
| } |
| |
| do { |
| uint64_t low_pc = attr_reader.GetAttribute<2>(); |
| uint64_t high_pc = attr_reader.GetAttribute<3>(); |
| |
| if (attr_reader.HasAttribute<2>() && attr_reader.HasAttribute<3>()) { |
| sink->AddVMRangeIgnoreDuplicate(low_pc, high_pc - low_pc, name); |
| } |
| |
| if (attr_reader.HasAttribute<1>()) { |
| auto it = symtab.find(attr_reader.GetAttribute<1>()); |
| if (it != symtab.end()) { |
| sink->AddVMRangeIgnoreDuplicate(it->second.first, it->second.second, |
| name); |
| } |
| } |
| } while (die_reader.NextDIE() && attr_reader.ReadAttributes(&die_reader)); |
| } while (die_reader.NextCompilationUnit()); |
| |
| return die_reader.IsEof(); |
| } |
| |
| bool ReadDWARFCompileUnits(const dwarf::File& file, const SymbolTable& symtab, |
| RangeSink* sink) { |
| if (!file.debug_info.size()) { |
| if (file.zdebug_info.size()) { |
| fprintf(stderr, "bloaty: can't read compressed debug info: \n"); |
| } else { |
| fprintf(stderr, "bloaty: missing debug info\n"); |
| } |
| return false; |
| } |
| |
| if (file.debug_aranges.size()) { |
| CHECK_RETURN(ReadDWARFAddressRanges(file, sink)); |
| } |
| |
| CHECK_RETURN(ReadDWARFDebugInfo(file, symtab, sink)); |
| |
| return true; |
| } |
| |
| static std::string LineInfoKey(const std::string& file, uint32_t line, |
| bool include_line) { |
| if (include_line) { |
| return file + ":" + std::to_string(line); |
| } else { |
| return file; |
| } |
| } |
| |
| bool ReadDWARFInlines(const dwarf::File& file, RangeSink* sink, |
| bool include_line) { |
| if (!file.debug_info.size() || !file.debug_line.size()) { |
| if (file.zdebug_info.size() && file.zdebug_line.size()) { |
| fprintf(stderr, "bloaty: can't read compressed debug info: \n"); |
| } else { |
| fprintf(stderr, "bloaty: missing debug info\n"); |
| } |
| return false; |
| } |
| |
| dwarf::DIEReader die_reader(file); |
| dwarf::LineInfoReader line_info_reader(file); |
| dwarf::FixedAttrReader<uint64_t> attr_reader(&die_reader, {DW_AT_stmt_list}); |
| std::unordered_map<uint64_t, std::string> map_; |
| std::string missing_; |
| |
| class FilenameMap { |
| public: |
| FilenameMap(const dwarf::LineInfoReader& reader) : reader_(reader) {} |
| const std::string& GetSourceFilename(size_t index) { |
| auto& ret = filenames_[index]; |
| if (ret.empty()) { |
| const dwarf::LineInfoReader::FileName& filename = |
| reader_.filename(index); |
| StringPiece directory = |
| reader_.include_directory(filename.directory_index); |
| ret = directory.as_string(); |
| if (!ret.empty()) { |
| ret += "/"; |
| } |
| ret += filename.name.as_string(); |
| } |
| return ret; |
| } |
| |
| private: |
| const dwarf::LineInfoReader& reader_; |
| std::unordered_map<uint32_t, std::string> filenames_; |
| }; |
| |
| die_reader.SeekToStart(dwarf::DIEReader::Section::kDebugInfo); |
| |
| while (true) { |
| CHECK_RETURN(attr_reader.ReadAttributes(&die_reader)); |
| FilenameMap map(line_info_reader); |
| |
| if (!attr_reader.HasAttribute<0>()) { |
| continue; |
| } |
| |
| CHECK_RETURN(line_info_reader.SeekToOffset(attr_reader.GetAttribute<0>(), |
| die_reader.unit_sizes().address_size)); |
| uint64_t span_startaddr = 0; |
| std::string last_source; |
| |
| while (line_info_reader.ReadLineInfo()) { |
| const auto& line_info = line_info_reader.lineinfo(); |
| auto addr = line_info.address; |
| auto number = line_info.line; |
| auto name = line_info.end_sequence |
| ? last_source |
| : LineInfoKey(map.GetSourceFilename(line_info.file), |
| number, include_line); |
| if (!span_startaddr) { |
| span_startaddr = addr; |
| } else if (line_info.end_sequence || |
| (!last_source.empty() && name != last_source)) { |
| sink->AddVMRange(span_startaddr, addr - span_startaddr, last_source); |
| if (line_info.end_sequence) { |
| span_startaddr = 0; |
| } else { |
| span_startaddr = addr; |
| } |
| } |
| last_source = name; |
| } |
| |
| if (!die_reader.NextCompilationUnit()) { |
| return die_reader.IsEof(); |
| } |
| } |
| |
| return true; |
| } |
| |
| } // namespace bloaty |