src/dwarf.cc - third_party/bloaty - Git at Google

 // Copyright 2016 Google Inc. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include <assert.h>
 #include <stdio.h>

 #include <algorithm>
 #include <initializer_list>
 #include <iostream>
 #include <memory>
 #include <stack>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>

 #include "absl/base/attributes.h"
 #include "absl/base/macros.h"
 #include "absl/strings/string_view.h"
 #include "absl/strings/substitute.h"
 #include "absl/types/optional.h"
 #include "bloaty.h"
 #include "bloaty.pb.h"
 #include "dwarf_constants.h"
 #include "re2/re2.h"

 using namespace dwarf2reader;
 using absl::string_view;

 static size_t AlignUpTo(size_t offset, size_t granularity) {
   // Granularity must be a power of two.
   return (offset + granularity - 1) & ~(granularity - 1);
 }

 ABSL_ATTRIBUTE_NORETURN
 static void Throw(const char *str, int line) {
   throw bloaty::Error(str, __FILE__, line);
 }

 #define THROW(msg) Throw(msg, __LINE__)
 #define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)

 namespace bloaty {

 extern int verbose_level;

 namespace dwarf {

 int DivRoundUp(int n, int d) {
   return (n + (d - 1)) / d;
 }


 // Low-level Parsing Routines //////////////////////////////////////////////////

 // For parsing the low-level values found in DWARF files.  These are the only
 // routines that touch the bytes of the input buffer directly.  Everything else
 // is layered on top of these.

 template <class T>
 T ReadMemcpy(string_view* data) {
   T ret;
   if (data->size() < sizeof(T)) {
     THROW("premature EOF reading fixed-length DWARF data");
   }
   memcpy(&ret, data->data(), sizeof(T));
   data->remove_prefix(sizeof(T));
   return ret;
 }

 string_view ReadPiece(size_t bytes, string_view* data) {
   if(data->size() < bytes) {
     THROW("premature EOF reading variable-length DWARF data");
   }
   string_view ret = data->substr(0, bytes);
   data->remove_prefix(bytes);
   return ret;
 }

 void SkipBytes(size_t bytes, string_view* data) {
   if (data->size() < bytes) {
     THROW("premature EOF skipping DWARF data");
   }
   data->remove_prefix(bytes);
 }

 string_view ReadNullTerminated(string_view* data) {
   const char* nullz =
       static_cast<const char*>(memchr(data->data(), '\0', data->size()));

   // Return false if not NULL-terminated.
   if (nullz == NULL) {
     THROW("DWARF string was not NULL-terminated");
   }

   size_t len = nullz - data->data();
   string_view val = data->substr(0, len);
   data->remove_prefix(len + 1);  // Remove NULL also.
   return val;
 }

 void SkipNullTerminated(string_view* data) {
   const char* nullz =
       static_cast<const char*>(memchr(data->data(), '\0', data->size()));

   // Return false if not NULL-terminated.
   if (nullz == NULL) {
     THROW("DWARF string was not NULL-terminated");
   }

   size_t len = nullz - data->data();
   data->remove_prefix(len + 1);  // Remove NULL also.
 }

 // Parses the LEB128 format defined by DWARF (both signed and unsigned
 // versions).

 uint64_t ReadLEB128Internal(bool is_signed, string_view* data) {
   uint64_t ret = 0;
   int shift = 0;
   int maxshift = 70;
   const char* ptr = data->data();
   const char* limit = ptr + data->size();

   while (ptr < limit && shift < maxshift) {
     char byte = *(ptr++);
     ret |= static_cast<uint64_t>(byte & 0x7f) << shift;
     shift += 7;
     if ((byte & 0x80) == 0) {
       data->remove_prefix(ptr - data->data());
       if (is_signed && shift < 64 && (byte & 0x40)) {
         ret |= -(1ULL << shift);
       }
       return ret;
     }
   }

   THROW("corrupt DWARF data, unterminated LEB128");
 }

 template <typename T>
 T ReadLEB128(string_view* data) {
   typedef typename std::conditional<std::is_signed<T>::value, int64_t,
                                     uint64_t>::type Int64Type;
   Int64Type val = ReadLEB128Internal(std::is_signed<T>::value, data);
   if (val > std::numeric_limits<T>::max() ||
       val < std::numeric_limits<T>::min()) {
     THROW("DWARF data contained larger LEB128 than we were expecting");
   }
   return static_cast<T>(val);
 }

 void SkipLEB128(string_view* data) {
   size_t limit =
       std::min(static_cast<size_t>(data->size()), static_cast<size_t>(10));
   for (size_t i = 0; i < limit; i++) {
     if (((*data)[i] & 0x80) == 0) {
       data->remove_prefix(i + 1);
       return;
     }
   }

   THROW("corrupt DWARF data, unterminated LEB128");
 }

 // Some size information attached to each compilation unit.  The size of an
 // address or offset in the DWARF data depends on this state which is parsed
 // from the header.
 class CompilationUnitSizes {
  public:
   // When true, DWARF offsets are 64 bits, otherwise they are 32 bit.
   bool dwarf64() const { return dwarf64_; }

   // The size of addresses.  Guaranteed to be either 4 or 8.
   uint8_t address_size() const { return address_size_; }

   // DWARF version of this unit.
   uint8_t dwarf_version() const { return dwarf_version_; }

   void SetAddressSize(uint8_t address_size) {
     if (address_size != 4 && address_size != 8) {
       THROWF("Unexpected address size: $0", address_size);
     }
     address_size_ = address_size;
   }

   // To allow this as the key in a map.
   bool operator<(const CompilationUnitSizes& rhs) const {
     return std::tie(dwarf64_, address_size_) <
            std::tie(rhs.dwarf64_, rhs.address_size_);
   }

   // Reads a DWARF offset based on whether we are reading dwarf32 or dwarf64
   // format.
   uint64_t ReadDWARFOffset(string_view* data) const {
     if (dwarf64_) {
       return ReadMemcpy<uint64_t>(data);
     } else {
       return ReadMemcpy<uint32_t>(data);
     }
   }

   // Reads an address according to the expected address_size.
   uint64_t ReadAddress(string_view* data) const {
     if (address_size_ == 8) {
       return ReadMemcpy<uint64_t>(data);
     } else if (address_size_ == 4) {
       return ReadMemcpy<uint32_t>(data);
     } else {
       BLOATY_UNREACHABLE();
     }
   }

   // Reads an "initial length" as specified in many DWARF headers.  This
   // contains either a 32-bit or a 64-bit length, and signals whether we are
   // using the 32-bit or 64-bit DWARF format (so it sets dwarf64 appropriately).
   //
   // Returns the range for this section and stores the remaining data
   // in |remaining|.
   string_view ReadInitialLength(string_view* remaining) {
     uint64_t len = ReadMemcpy<uint32_t>(remaining);

     if (len == 0xffffffff) {
       dwarf64_ = true;
       len = ReadMemcpy<uint64_t>(remaining);
     } else {
       dwarf64_ = false;
     }

     if (remaining->size() < len) {
       THROW("short DWARF compilation unit");
     }

     string_view unit = *remaining;
     unit.remove_suffix(remaining->size() - len);
     *remaining = remaining->substr(len);
     return unit;
   }

   void ReadDWARFVersion(string_view* data) {
     dwarf_version_ = ReadMemcpy<uint16_t>(data);
   }

  private:
   uint16_t dwarf_version_;
   bool dwarf64_;
   uint8_t address_size_;
 };


 // AbbrevTable /////////////////////////////////////////////////////////////////

 // Parses and stores a representation of (a portion of) the .debug_abbrev
 // section of a DWARF file.  An abbreviation is defined by a unique "code"
 // (unique within one table), and defines the DIE tag and set of attributes.
 // The encoding of the DIE then contains just the abbreviation code and the
 // attribute values -- thanks to the abbreviation table, the tag and attribute
 // keys/names are not required.
 //
 // The abbreviations are an internal detail of the DWARF format and users should
 // not need to care about them.

 class AbbrevTable {
  public:
   // Reads abbreviations until a terminating abbreviation is seen.
   string_view ReadAbbrevs(string_view data);

   // In a DWARF abbreviation, each attribute has a name and a form.
   struct Attribute {
     uint16_t name;
     uint8_t form;
   };

   // The representation of a single abbreviation.
   struct Abbrev {
     uint32_t code;
     uint16_t tag;
     bool has_child;
     std::vector<Attribute> attr;
   };

   bool IsEmpty() const { return abbrev_.empty(); }

   // Looks for an abbreviation with the given code.  Returns true if the lookup
   // succeeded.
   bool GetAbbrev(uint32_t code, const Abbrev** abbrev) const {
     auto it = abbrev_.find(code);
     if (it != abbrev_.end()) {
       *abbrev = &it->second;
       return true;
     } else {
       return false;
     }
   }

  private:
   // Keyed by abbreviation code.
   // Generally we expect these to be small, so we could almost use a vector<>.
   // But you never know what crazy input data is going to do...
   std::unordered_map<uint32_t, Abbrev> abbrev_;
 };

 string_view AbbrevTable::ReadAbbrevs(string_view data) {
   while (true) {
     uint32_t code = ReadLEB128<uint32_t>(&data);

     if (code == 0) {
       return data;  // Terminator entry.
     }

     Abbrev& abbrev = abbrev_[code];

     if (abbrev.code) {
       THROW("DWARF data contained duplicate abbrev code");
     }

     uint8_t has_child;

     abbrev.code = code;
     abbrev.tag = ReadLEB128<uint16_t>(&data);
     has_child = ReadMemcpy<uint8_t>(&data);

     switch (has_child) {
       case DW_children_yes:
         abbrev.has_child = true;
         break;
       case DW_children_no:
         abbrev.has_child = false;
         break;
       default:
         THROW("DWARF has_child is neither true nor false.");
     }

     while (true) {
       Attribute attr;
       attr.name = ReadLEB128<uint16_t>(&data);
       attr.form = ReadLEB128<uint8_t>(&data);

       if (attr.name == 0 && attr.form == 0) {
         break;  // End of this abbrev
       }

       abbrev.attr.push_back(attr);
     }
   }
 }


 // StringTable /////////////////////////////////////////////////////////////////

 // Represents the .debug_str portion of a DWARF file and contains code for
 // reading strings out of it.  This is an internal detail of the DWARF format
 // and users should not need to care about it.

 class StringTable {
  public:
   // Construct with the debug_str data from a DWARF file.
   StringTable(string_view debug_str) : debug_str_(debug_str) {}

   // Read a string from the table.
   string_view ReadEntry(size_t ofs) const;

  private:
   string_view debug_str_;
 };

 string_view StringTable::ReadEntry(size_t ofs) const {
   string_view str = debug_str_;
   SkipBytes(ofs, &str);
   return ReadNullTerminated(&str);
 }


 // AddressRanges ///////////////////////////////////////////////////////////////

 // Code for reading address ranges out of .debug_aranges.

 class AddressRanges {
  public:
   AddressRanges(string_view data) : section_(data), next_unit_(data) {}

   // Offset into .debug_info for the current compilation unit.
   uint64_t debug_info_offset() { return debug_info_offset_; }

   // Address and length for this range.
   uint64_t address() { return address_; }
   uint64_t length() { return length_; }

   // Advance to the next range.  The values will be available in address() and
   // length().  Returns false when the end of this compilation unit is hit.
   // Must call this once before reading the first range.
   bool NextRange();

   // Advance to the next compilation unit.  The unit offset will be available in
   // debug_info_offset().  Must call this once before reading the first unit.
   bool NextUnit();

  private:
   CompilationUnitSizes sizes_;
   string_view section_;
   string_view unit_remaining_;
   string_view next_unit_;
   uint64_t debug_info_offset_;
   uint64_t address_;
   uint64_t length_;
 };

 bool AddressRanges::NextRange() {
   if (unit_remaining_.empty()) {
     return false;
   }

   address_ = sizes_.ReadAddress(&unit_remaining_);
   length_ = sizes_.ReadAddress(&unit_remaining_);
   return true;
 }

 bool AddressRanges::NextUnit() {
   if (next_unit_.empty()) {
     return false;
   }

   unit_remaining_ = sizes_.ReadInitialLength(&next_unit_);
   sizes_.ReadDWARFVersion(&unit_remaining_);

   if (sizes_.dwarf_version() > 4) {
     THROW("DWARF data is too new for us");
   }

   debug_info_offset_ = sizes_.ReadDWARFOffset(&unit_remaining_);

   uint8_t segment_size;

   sizes_.SetAddressSize(ReadMemcpy<uint8_t>(&unit_remaining_));
   segment_size = ReadMemcpy<uint8_t>(&unit_remaining_);

   if (segment_size) {
     THROW("we don't know how to handle segmented addresses.");
   }

   size_t ofs = unit_remaining_.data() - section_.data();
   size_t aligned_ofs = AlignUpTo(ofs, sizes_.address_size() * 2);
   SkipBytes(aligned_ofs - ofs, &unit_remaining_);
   return true;
 }


 // LocationList ////////////////////////////////////////////////////////////////

 // Code for reading entries out of a location list.
 // For the moment we only care about finding the bounds of a list given its
 // offset, so we don't actually vend any of the data.

 class LocationList {
  public:
   LocationList(CompilationUnitSizes sizes, string_view data)
       : sizes_(sizes), remaining_(data) {}

   const char* read_offset() const { return remaining_.data(); }
   bool NextEntry();

  private:
   CompilationUnitSizes sizes_;
   string_view remaining_;
 };

 bool LocationList::NextEntry() {
   uint64_t start, end;
   start = sizes_.ReadAddress(&remaining_);
   end = sizes_.ReadAddress(&remaining_);
   if (start == 0 && end == 0) {
     return false;
   } else if (start == UINT64_MAX ||
              (start == UINT32_MAX && sizes_.address_size() == 4)) {
     // Base address selection, nothing more to do.
   } else {
     // Need to skip the location description.
     uint16_t length = ReadMemcpy<uint16_t>(&remaining_);
     SkipBytes(length, &remaining_);
   }
   return true;
 }

 string_view GetLocationListRange(CompilationUnitSizes sizes,
                                  string_view available) {
   LocationList list(sizes, available);
   while (list.NextEntry()) {}
   return available.substr(0, list.read_offset() - available.data());
 }


 // RangeList ///////////////////////////////////////////////////////////////////

 // Code for reading entries out of a range list.
 // For the moment we only care about finding the bounds of a list given its
 // offset, so we don't actually vend any of the data.

 class RangeList {
  public:
   RangeList(CompilationUnitSizes sizes, string_view data)
       : sizes_(sizes), remaining_(data) {}

   const char* read_offset() const { return remaining_.data(); }
   bool NextEntry();

  private:
   CompilationUnitSizes sizes_;
   string_view remaining_;
 };

 bool RangeList::NextEntry() {
   uint64_t start, end;
   start = sizes_.ReadAddress(&remaining_);
   end = sizes_.ReadAddress(&remaining_);
   if (start == 0 && end == 0) {
     return false;
   }
   return true;
 }

 string_view GetRangeListRange(CompilationUnitSizes sizes,
                               string_view available) {
   RangeList list(sizes, available);
   while (list.NextEntry()) {
   }
   return available.substr(0, list.read_offset() - available.data());
 }

 // DIEReader ///////////////////////////////////////////////////////////////////

 // Reads a sequence of DWARF DIE's (Debugging Information Entries) from the
 // .debug_info or .debug_types section of a binary.
 //
 // Each DIE contains a tag and a set of attribute/value pairs.  We rely on the
 // abbreviations in an AbbrevTable to decode the DIEs.

 class DIEReader {
  public:
   // Constructs a new DIEReader.  Cannot be used until you call one of the
   // Seek() methods below.
   DIEReader(const File& file) : dwarf_(file) {}

   // Returns true if we are at the end of DIEs for this compilation unit.
   bool IsEof() const { return state_ == State::kEof; }

   // DIEs exist in both .debug_info and .debug_types.
   enum class Section {
     kDebugInfo,
     kDebugTypes
   };

   // Seeks to the overall start or the start of a specific compilation unit.
   // Note that |header_offset| is the offset of the compilation unit *header*,
   // not the offset of the first DIE.
   bool SeekToCompilationUnit(Section section, uint64_t header_offset);
   bool SeekToStart(Section section) {
     return SeekToCompilationUnit(section, 0);
   }

   bool NextCompilationUnit();

   // Advances to the next overall DIE, ignoring whether it happens to be a
   // child, a sibling, or an uncle/aunt.  Returns false at error or EOF.
   bool NextDIE();

   // Skips children of the current DIE, so that the next call to NextDIE()
   // will read the next sibling (or parent, if no sibling exists).
   bool SkipChildren();

   const AbbrevTable::Abbrev& GetAbbrev() const {
     assert(!IsEof());
     return *current_abbrev_;
   }

   // Returns the tag of the current DIE.
   // Requires that ReadCode() has been called at least once.
   uint16_t GetTag() const { return GetAbbrev().tag; }

   // Returns whether the current DIE has a child.
   // Requires that ReadCode() has been called at least once.
   bool HasChild() const { return GetAbbrev().has_child; }

   const File& dwarf() const { return dwarf_; }

   string_view unit_range() const { return unit_range_; }
   CompilationUnitSizes unit_sizes() const { return unit_sizes_; }
   uint32_t abbrev_version() const { return abbrev_version_; }
   uint64_t debug_abbrev_offset() const { return debug_abbrev_offset_; }

   // If both compileunit_name and strp_sink are set, this will automatically
   // call strp_sink->AddFileRange(compileunit_name, <string range>) for every
   // DW_FORM_strp attribute encountered.  These strings occur in the .debug_str
   // section.
   void set_compileunit_name(absl::string_view name) {
     unit_name_ = std::string(name);
   }
   void set_strp_sink(RangeSink* sink) { strp_sink_ = sink; }

   void AddIndirectString(string_view range) const {
     if (strp_sink_) {
       strp_sink_->AddFileRange("dwarf_strp", unit_name_, range);
     }
   }

  private:
   BLOATY_DISALLOW_COPY_AND_ASSIGN(DIEReader);

   template<typename> friend class AttrReader;

   // APIs for our friends to use to update our state.

   // Call to get the current read head where attributes should be parsed.
   string_view ReadAttributesBegin() {
     assert(state_ == State::kReadyToReadAttributes);
     return remaining_;
   }

   // When some data has been parsed, this updates our read head.
   bool ReadAttributesEnd(string_view remaining, uint64_t sibling) {
     assert(state_ == State::kReadyToReadAttributes);
     if (remaining.data() == nullptr) {
       THROW("premature EOF reading DWARF attributes");
     } else {
       remaining_ = remaining;
       sibling_offset_ = sibling;
       state_ = State::kReadyToNext;
       return true;
     }
   }

   // Internal APIs.

   bool ReadCompilationUnitHeader();
   bool ReadCode();

   enum class State {
     kReadyToReadAttributes,
     kReadyToNext,
     kEof,
   } state_;

   std::string error_;

   const File& dwarf_;
   RangeSink* strp_sink_ = nullptr;

   // Abbreviation for the current entry.
   const AbbrevTable::Abbrev* current_abbrev_;

   // Our current read position.
   string_view remaining_;
   uint64_t sibling_offset_;
   int depth_ = 0;

   // Data for the next compilation unit.
   string_view next_unit_;

   // All of the AbbrevTables we've read from .debug_abbrev, indexed by their
   // offset within .debug_abbrev.
   std::unordered_map<uint64_t, AbbrevTable> abbrev_tables_;

   // Whether we are in .debug_types or .debug_info.
   Section section_;

   // Information about the current compilation unit.
   uint64_t debug_abbrev_offset_;
   std::string unit_name_;
   string_view unit_range_;
   CompilationUnitSizes unit_sizes_;
   AbbrevTable* unit_abbrev_;

   // A small integer that uniquely identifies the combination of unit_abbrev_
   // and unit_sizes_.  Attribute readers use this to know when they can reuse an
   // existing (abbrev code) -> (Actions) mapping, since this table depends on
   // both the current abbrev. table and the sizes.
   uint32_t abbrev_version_;

   std::map<std::pair<AbbrevTable*, CompilationUnitSizes>, uint32_t>
       abbrev_versions_;

   // Only for .debug_types
   uint64_t unit_type_signature_;
   uint64_t unit_type_offset_;
 };

 bool DIEReader::ReadCode() {
   uint32_t code;
 again:
   if (remaining_.empty()) {
     state_ = State::kEof;
     return false;
   }
   code = ReadLEB128<uint32_t>(&remaining_);
   if (code == 0) {
     // null entry terminates a chain of sibling entries.
     depth_--;
     goto again;
   }

   if (!unit_abbrev_->GetAbbrev(code, &current_abbrev_)) {
     THROW("couldn't find abbreviation for code");
   }
   state_ = State::kReadyToReadAttributes;
   sibling_offset_ = 0;

   if (HasChild()) {
     depth_++;
   }

   return true;
 }

 bool DIEReader::NextCompilationUnit() {
   return ReadCompilationUnitHeader();
 }

 bool DIEReader::NextDIE() {
   if (state_ == State::kEof) {
     return false;
   }

   assert(state_ == State::kReadyToNext);
   return ReadCode();
 }

 bool DIEReader::SeekToCompilationUnit(Section section, uint64_t offset) {
   section_ = section;

   if (section == Section::kDebugInfo) {
     next_unit_ = dwarf_.debug_info;
   } else {
     next_unit_ = dwarf_.debug_types;
   }

   SkipBytes(offset, &next_unit_);
   return ReadCompilationUnitHeader();
 }

 bool DIEReader::ReadCompilationUnitHeader() {
   if (next_unit_.empty()) {
     state_ = State::kEof;
     return false;
   }

   unit_range_ = next_unit_;
   remaining_ = unit_sizes_.ReadInitialLength(&next_unit_);
   unit_range_ = unit_range_.substr(
       0, remaining_.size() + (remaining_.data() - unit_range_.data()));

   unit_sizes_.ReadDWARFVersion(&remaining_);

   if (unit_sizes_.dwarf_version() > 4) {
     THROW("Data is in new DWARF format we don't understand");
   }

   debug_abbrev_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
   unit_abbrev_ = &abbrev_tables_[debug_abbrev_offset_];

   // If we haven't already read abbreviations for this debug_abbrev_offset_, we
   // need to do so now.
   if (unit_abbrev_->IsEmpty()) {
     string_view abbrev_data = dwarf_.debug_abbrev;
     SkipBytes(debug_abbrev_offset_, &abbrev_data);
     unit_abbrev_->ReadAbbrevs(abbrev_data);
   }

   unit_sizes_.SetAddressSize(ReadMemcpy<uint8_t>(&remaining_));

   if (section_ == Section::kDebugTypes) {
     unit_type_signature_ = ReadMemcpy<uint64_t>(&remaining_);
     unit_type_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
   }

   auto abbrev_id = std::make_pair(unit_abbrev_, unit_sizes_);
   auto insert_pair = abbrev_versions_.insert(
       std::make_pair(abbrev_id, abbrev_versions_.size()));

   // This will be either the newly inserted value or the existing one, if there
   // was one.
   abbrev_version_ = insert_pair.first->second;

   return ReadCode();
 }


 // DWARF form parsing //////////////////////////////////////////////////////////

 class AttrValue {
  public:
   AttrValue(uint64_t val) : uint_(val), type_(Type::kUint) {}
   AttrValue(string_view val) : string_(val), type_(Type::kString) {}

   enum class Type {
     kUint,
     kString
   };

   Type type() const { return type_; }
   bool IsUint() const { return type_ == Type::kUint; }
   bool IsString() const { return type_ == Type::kString; }

   absl::optional<uint64_t> ToUint() const {
     if (IsUint()) return uint_;
     string_view str = string_;
     switch (str.size()) {
       case 1:
         return ReadMemcpy<uint8_t>(&str);
       case 2:
         return ReadMemcpy<uint8_t>(&str);
       case 4:
         return ReadMemcpy<uint32_t>(&str);
       case 8:
         return ReadMemcpy<uint64_t>(&str);
     }
     return absl::nullopt;
   }

   uint64_t GetUint() const {
     assert(type_ == Type::kUint);
     return uint_;
   }

   string_view GetString() const {
     assert(type_ == Type::kString);
     return string_;
   }

  private:
   union {
     uint64_t uint_;
     string_view string_;
   };

   Type type_;
 };

 template <class D>
 string_view ReadBlock(string_view* data) {
   D len = ReadMemcpy<D>(data);
   return ReadPiece(len, data);
 }

 string_view ReadVariableBlock(string_view* data) {
   uint64_t len = ReadLEB128<uint64_t>(data);
   return ReadPiece(len, data);
 }

 template <class D>
 string_view ReadIndirectString(const DIEReader& reader, string_view* data) {
   D ofs = ReadMemcpy<D>(data);
   StringTable table(reader.dwarf().debug_str);
   string_view ret = table.ReadEntry(ofs);
   reader.AddIndirectString(ret);
   return ret;
 }

 AttrValue ParseAttr(const DIEReader& reader, uint8_t form, string_view* data) {
   switch (form) {
     case DW_FORM_indirect: {
       uint16_t indirect_form = ReadLEB128<uint16_t>(data);
       if (indirect_form == DW_FORM_indirect) {
         THROW("indirect attribute has indirect form type");
       }
       return ParseAttr(reader, indirect_form, data);
     }
     case DW_FORM_ref1:
       return AttrValue(ReadMemcpy<uint8_t>(data));
     case DW_FORM_ref2:
       return AttrValue(ReadMemcpy<uint16_t>(data));
     case DW_FORM_ref4:
       return AttrValue(ReadMemcpy<uint32_t>(data));
     case DW_FORM_ref_sig8:
     case DW_FORM_ref8:
       return AttrValue(ReadMemcpy<uint64_t>(data));
     case DW_FORM_ref_udata:
       return AttrValue(ReadLEB128<uint64_t>(data));
     case DW_FORM_addr:
     address_size:
       switch (reader.unit_sizes().address_size()) {
         case 4:
           return AttrValue(ReadMemcpy<uint32_t>(data));
         case 8:
           return AttrValue(ReadMemcpy<uint64_t>(data));
         default:
           BLOATY_UNREACHABLE();
       }
     case DW_FORM_ref_addr:
       if (reader.unit_sizes().dwarf_version() <= 2) {
         goto address_size;
       }
       ABSL_FALLTHROUGH_INTENDED;
     case DW_FORM_sec_offset:
       if (reader.unit_sizes().dwarf64()) {
         return AttrValue(ReadMemcpy<uint64_t>(data));
       } else {
         return AttrValue(ReadMemcpy<uint32_t>(data));
       }
     case DW_FORM_udata:
       return AttrValue(ReadLEB128<uint64_t>(data));
     case DW_FORM_block1:
       return AttrValue(ReadBlock<uint8_t>(data));
     case DW_FORM_block2:
       return AttrValue(ReadBlock<uint16_t>(data));
     case DW_FORM_block4:
       return AttrValue(ReadBlock<uint32_t>(data));
     case DW_FORM_block:
     case DW_FORM_exprloc:
       return AttrValue(ReadVariableBlock(data));
     case DW_FORM_string:
       return AttrValue(ReadNullTerminated(data));
     case DW_FORM_strp:
       if (reader.unit_sizes().dwarf64()) {
         return AttrValue(ReadIndirectString<uint64_t>(reader, data));
       } else {
         return AttrValue(ReadIndirectString<uint32_t>(reader, data));
       }
     case DW_FORM_data1:
       return AttrValue(ReadPiece(1, data));
     case DW_FORM_data2:
       return AttrValue(ReadPiece(2, data));
     case DW_FORM_data4:
       return AttrValue(ReadPiece(4, data));
     case DW_FORM_data8:
       return AttrValue(ReadPiece(8, data));

     // Bloaty doesn't currently care about any bool or signed data.
     // So we fudge it a bit and just stuff these in a uint64.
     case DW_FORM_flag_present:
       return AttrValue(1);
     case DW_FORM_flag:
       return AttrValue(ReadMemcpy<uint8_t>(data));
     case DW_FORM_sdata:
       return AttrValue(ReadLEB128<uint64_t>(data));
     default:
       THROWF("Don't know how to parse DWARF form: $0", form);
   }
 }


 // AttrReader //////////////////////////////////////////////////////////////////

 // Parses a DIE's attributes, calling user callbacks with the parsed values.

 template <class T>
 class AttrReader {
  public:
   typedef void CallbackFunc(T* container, AttrValue val);

   void OnAttribute(DwarfAttribute attr, CallbackFunc* func) {
     attributes_[attr] = func;
   }

   // Reads all attributes for this DIE, storing the ones we were expecting.
   void ReadAttributes(DIEReader* reader, T* container) {
     string_view data = reader->ReadAttributesBegin();
     const AbbrevTable::Abbrev& abbrev = reader->GetAbbrev();

     for (auto attr : abbrev.attr) {
       AttrValue value = ParseAttr(*reader, attr.form, &data);
       auto it = attributes_.find(attr.name);
       if (it != attributes_.end()) {
         it->second(container, value);
       }
     }

     reader->ReadAttributesEnd(data, 0);
   }

  private:
   std::unordered_map<int, CallbackFunc*> attributes_;
 };

 // From DIEReader, defined here because it depends on FixedAttrReader.
 bool DIEReader::SkipChildren() {
   assert(state_ == State::kReadyToNext);
   if (!HasChild()) {
     return true;
   }

   int target_depth = depth_ - 1;
   dwarf::AttrReader<void> attr_reader;
   while (depth_ > target_depth) {
     // TODO(haberman): use DW_AT_sibling to optimize skipping when it is
     // available.
     if (!NextDIE()) {
       return false;
     }
     attr_reader.ReadAttributes(this, nullptr);
   }
   return true;
 }

 // LineInfoReader //////////////////////////////////////////////////////////////

 // Code to read the .line_info programs in a DWARF file.

 class LineInfoReader {
  public:
   LineInfoReader(const File& file) : file_(file), info_(0) {}

   struct LineInfo {
     LineInfo(bool default_is_stmt) : is_stmt(default_is_stmt) {}
     uint64_t address = 0;
     uint32_t file = 1;
     uint32_t line = 1;
     uint32_t column = 0;
     uint32_t discriminator = 0;
     bool end_sequence = false;
     bool basic_block = false;
     bool prologue_end = false;
     bool epilogue_begin = false;
     bool is_stmt;
     uint8_t op_index = 0;
     uint8_t isa = 0;
   };

   struct FileName {
     string_view name;
     uint32_t directory_index;
     uint64_t modified_time;
     uint64_t file_size;
   };

   void SeekToOffset(uint64_t offset, uint8_t address_size);
   bool ReadLineInfo();
   const LineInfo& lineinfo() const { return info_; }
   const FileName& filename(size_t i) const { return filenames_[i]; }
   string_view include_directory(size_t i) const {
     return include_directories_[i];
   }

   const std::string& GetExpandedFilename(size_t index) {
     if (index >= filenames_.size()) {
       THROW("filename index out of range");
     }

     // Generate these lazily.
     if (expanded_filenames_.size() <= index) {
       expanded_filenames_.resize(filenames_.size());
     }

     std::string& ret = expanded_filenames_[index];
     if (ret.empty()) {
       const FileName& filename = filenames_[index];
       string_view directory = include_directories_[filename.directory_index];
       ret = std::string(directory);
       if (!ret.empty()) {
         ret += "/";
       }
       ret += std::string(filename.name);
     }
     return ret;
   }

  private:
   struct Params {
     uint8_t minimum_instruction_length;
     uint8_t maximum_operations_per_instruction;
     uint8_t default_is_stmt;
     int8_t line_base;
     uint8_t line_range;
     uint8_t opcode_base;
   } params_;

   const File& file_;

   CompilationUnitSizes sizes_;
   std::vector<string_view> include_directories_;
   std::vector<FileName> filenames_;
   std::vector<uint8_t> standard_opcode_lengths_;
   std::vector<std::string> expanded_filenames_;

   string_view remaining_;

   // Whether we are in a "shadow" part of the bytecode program.  Sometimes
   // parts of the line info program make it into the final binary even though
   // the corresponding code was stripped.  We can tell when this happened by
   // looking for DW_LNE_set_address ops where the operand is 0.  This
   // indicates that a relocation for that argument never got applied, which
   // probably means that the code got stripped.
   //
   // While this is true, we don't yield any LineInfo entries, because the
   // "address" value is garbage.
   bool shadow_;

   LineInfo info_;

   void DoAdvance(uint64_t advance, uint8_t max_per_instr) {
     info_.address += params_.minimum_instruction_length *
                      ((info_.op_index + advance) / max_per_instr);
     info_.op_index = (info_.op_index + advance) % max_per_instr;
   }

   void Advance(uint64_t amount) {
     if (params_.maximum_operations_per_instruction == 1) {
       // This is by far the common case (only false on VLIW architectuers),
       // and this inlining/specialization avoids a costly division.
       DoAdvance(amount, 1);
     } else {
       DoAdvance(amount, params_.maximum_operations_per_instruction);
     }
   }

   uint8_t AdjustedOpcode(uint8_t op) { return op - params_.opcode_base; }

   void SpecialOpcodeAdvance(uint8_t op) {
     Advance(AdjustedOpcode(op) / params_.line_range);
   }
 };

 void LineInfoReader::SeekToOffset(uint64_t offset, uint8_t address_size) {
   string_view data = file_.debug_line;
   SkipBytes(offset, &data);

   sizes_.SetAddressSize(address_size);
   data = sizes_.ReadInitialLength(&data);
   sizes_.ReadDWARFVersion(&data);
   uint64_t header_length = sizes_.ReadDWARFOffset(&data);
   string_view program = data;
   SkipBytes(header_length, &program);

   params_.minimum_instruction_length = ReadMemcpy<uint8_t>(&data);
   if (sizes_.dwarf_version() == 4) {
     params_.maximum_operations_per_instruction = ReadMemcpy<uint8_t>(&data);

     if (params_.maximum_operations_per_instruction == 0) {
       THROW("DWARF line info had maximum_operations_per_instruction=0");
     }
   } else {
     params_.maximum_operations_per_instruction = 1;
   }
   params_.default_is_stmt = ReadMemcpy<uint8_t>(&data);
   params_.line_base = ReadMemcpy<int8_t>(&data);
   params_.line_range = ReadMemcpy<uint8_t>(&data);
   params_.opcode_base = ReadMemcpy<uint8_t>(&data);
   if (params_.line_range == 0) {
     THROW("line_range of zero will cause divide by zero");
   }

   standard_opcode_lengths_.resize(params_.opcode_base);
   for (size_t i = 1; i < params_.opcode_base; i++) {
     standard_opcode_lengths_[i] = ReadMemcpy<uint8_t>(&data);
   }

   // Read include_directories.
   include_directories_.clear();

   // Implicit current directory entry.
   include_directories_.push_back(string_view());

   while (true) {
     string_view dir = ReadNullTerminated(&data);
     if (dir.empty()) {
       break;
     }
     include_directories_.push_back(dir);
   }

   // Read file_names.
   filenames_.clear();
   expanded_filenames_.clear();

   // Filename 0 is unused.
   filenames_.push_back(FileName());
   while (true) {
     FileName file_name;
     file_name.name = ReadNullTerminated(&data);
     if (file_name.name.empty()) {
       break;
     }
     file_name.directory_index = ReadLEB128<uint32_t>(&data);
     file_name.modified_time = ReadLEB128<uint64_t>(&data);
     file_name.file_size = ReadLEB128<uint64_t>(&data);
     if (file_name.directory_index >= include_directories_.size()) {
       THROW("directory index out of range");
     }
     filenames_.push_back(file_name);
   }

   info_ = LineInfo(params_.default_is_stmt);
   remaining_ = program;
   shadow_ = false;
 }

 bool LineInfoReader::ReadLineInfo() {
   // Final step of last DW_LNS_copy / special opcode.
   info_.discriminator = 0;
   info_.basic_block = false;
   info_.prologue_end = false;
   info_.epilogue_begin = false;

   // Final step of DW_LNE_end_sequence.
   info_.end_sequence = false;

   string_view data = remaining_;

   while (true) {
     if (data.empty()) {
       remaining_ = data;
       return false;
     }

     uint8_t op = ReadMemcpy<uint8_t>(&data);

     if (op >= params_.opcode_base) {
       SpecialOpcodeAdvance(op);
       info_.line +=
           params_.line_base + (AdjustedOpcode(op) % params_.line_range);
       if (!shadow_) {
         remaining_ = data;
         return true;
       }
     } else {
       switch (op) {
         case DW_LNS_extended_op: {
           uint16_t len = ReadLEB128<uint16_t>(&data);
           uint8_t extended_op = ReadMemcpy<uint8_t>(&data);
           switch (extended_op) {
             case DW_LNE_end_sequence: {
               // Preserve address and set end_sequence, but reset everything
               // else.
               uint64_t addr = info_.address;
               info_ = LineInfo(params_.default_is_stmt);
               info_.address = addr;
               info_.end_sequence = true;
               if (!shadow_) {
                 remaining_ = data;
                 return true;
               }
               break;
             }
             case DW_LNE_set_address:
               info_.address = sizes_.ReadAddress(&data);
               info_.op_index = 0;
               shadow_ = (info_.address == 0);
               break;
             case DW_LNE_define_file: {
               FileName file_name;
               file_name.name = ReadNullTerminated(&data);
               file_name.directory_index = ReadLEB128<uint32_t>(&data);
               file_name.modified_time = ReadLEB128<uint64_t>(&data);
               file_name.file_size = ReadLEB128<uint64_t>(&data);
               if (file_name.directory_index >= include_directories_.size()) {
                 THROW("directory index out of range");
               }
               filenames_.push_back(file_name);
               break;
             }
             case DW_LNE_set_discriminator:
               info_.discriminator = ReadLEB128<uint32_t>(&data);
               break;
             default:
               // We don't understand this opcode, skip it.
               SkipBytes(len, &data);
               if (verbose_level > 0) {
                 fprintf(stderr,
                         "bloaty: warning: unknown DWARF line table extended "
                         "opcode: %d\n",
                         extended_op);
               }
               break;
           }
           break;
         }
         case DW_LNS_copy:
           if (!shadow_) {
             remaining_ = data;
             return true;
           }
           break;
         case DW_LNS_advance_pc:
           Advance(ReadLEB128<uint64_t>(&data));
           break;
         case DW_LNS_advance_line:
           info_.line += ReadLEB128<int32_t>(&data);
           break;
         case DW_LNS_set_file:
           info_.file = ReadLEB128<uint32_t>(&data);
           if (info_.file >= filenames_.size()) {
             THROW("filename index too big");
           }
           break;
         case DW_LNS_set_column:
           info_.column = ReadLEB128<uint32_t>(&data);
           break;
         case DW_LNS_negate_stmt:
           info_.is_stmt = !info_.is_stmt;
           break;
         case DW_LNS_set_basic_block:
           info_.basic_block = true;
           break;
         case DW_LNS_const_add_pc:
           SpecialOpcodeAdvance(255);
           break;
         case DW_LNS_fixed_advance_pc:
           info_.address += ReadMemcpy<uint16_t>(&data);
           info_.op_index = 0;
           break;
         case DW_LNS_set_prologue_end:
           info_.prologue_end = true;
           break;
         case DW_LNS_set_epilogue_begin:
           info_.epilogue_begin = true;
           break;
         case DW_LNS_set_isa:
           info_.isa = ReadLEB128<uint8_t>(&data);
           break;
         default:
           // Unknown opcode, but we know its length so can skip it.
           SkipBytes(standard_opcode_lengths_[op], &data);
           if (verbose_level > 0) {
             fprintf(stderr,
                     "bloaty: warning: unknown DWARF line table opcode: %d\n",
                     op);
           }
           break;
       }
     }
   }
 }

 }  // namespace dwarf

 // Bloaty DWARF Data Sources ///////////////////////////////////////////////////

 // The DWARF .debug_aranges section should, in theory, give us exactly the
 // information we need to map file ranges in linked binaries to compilation
 // units from where that code came.  However, .debug_aranges is often incomplete
 // or missing completely, so we use it as just one of several data sources for
 // the "compileunits" data source.
 static bool ReadDWARFAddressRanges(const dwarf::File& file, RangeSink* sink) {
   // Maps compilation unit offset -> source filename
   // Lazily initialized.
   class FilenameMap {
    public:
     FilenameMap(const dwarf::File& file)
         : die_reader_(file),
           missing_("[DWARF is missing filename]") {
       attr_reader_.OnAttribute(
           DW_AT_name, [](string_view* s, dwarf::AttrValue data) {
             if (!data.IsString()) return;
             *s = data.GetString();
           });
     }

     std::string GetFilename(uint64_t compilation_unit_offset) {
       auto& name = map_[compilation_unit_offset];
       if (name.empty()) {
         name = LookupFilename(compilation_unit_offset);
       }
       return name;
     }

    private:
     std::string LookupFilename(uint64_t compilation_unit_offset) {
       auto section = dwarf::DIEReader::Section::kDebugInfo;
       string_view name;
       if (die_reader_.SeekToCompilationUnit(section, compilation_unit_offset) &&
           die_reader_.GetTag() == DW_TAG_compile_unit &&
           (attr_reader_.ReadAttributes(&die_reader_, &name),
            !name.empty())) {
         return std::string(name);
       } else {
         return missing_;
       }
     }

     dwarf::DIEReader die_reader_;
     dwarf::AttrReader<string_view> attr_reader_;
     std::unordered_map<uint64_t, std::string> map_;
     std::string missing_;
   } map(file);

   dwarf::AddressRanges ranges(file.debug_aranges);

   while (ranges.NextUnit()) {
     std::string filename = map.GetFilename(ranges.debug_info_offset());

     while (ranges.NextRange()) {
       if (ranges.address() != 0) {
         sink->AddVMRangeIgnoreDuplicate("dwarf_aranges", ranges.address(),
                                         ranges.length(), filename);
       }
     }
   }

   return true;
 }

 // TODO(haberman): make these into real protobufs once proto supports
 // string_view.
 class GeneralDIE {
  public:
   bool has_name() const { return has_name_; }
   bool has_linkage_name() const { return has_linkage_name_; }
   bool has_location_string() const { return has_location_string_; }
   bool has_low_pc() const { return has_low_pc_; }
   bool has_high_pc() const { return has_high_pc_; }
   bool has_location_uint64() const { return has_location_uint64_; }
   bool has_stmt_list() const { return has_stmt_list_; }
   bool has_ranges() const { return has_ranges_; }
   bool has_start_scope() const { return has_start_scope_; }

   std::string DebugString() {
     std::string ret;
     if (has_name()) {
       ret += absl::Substitute("name: $0\n", name());
     }
     if (has_linkage_name()) {
       ret += absl::Substitute("linkage_name: $0\n", linkage_name());
     }
     if (has_location_string()) {
       ret += absl::Substitute("location_string: $0\n", location_string());
     }
     if (has_low_pc()) {
       ret += absl::Substitute("low_pc: $0\n", low_pc());
     }
     if (has_high_pc()) {
       ret += absl::Substitute("high_pc: $0\n", high_pc());
     }
     if (has_location_uint64()) {
       ret += absl::Substitute("location_uint64: $0\n", location_uint64());
     }
     if (has_stmt_list()) {
       ret += absl::Substitute("stmt_list: $0\n", stmt_list());
     }
     if (has_ranges()) {
       ret += absl::Substitute("ranges: $0\n", ranges());
     }
     if (has_start_scope()) {
       ret += absl::Substitute("start_scope: $0\n", start_scope());
     }
     return ret;
   }

   string_view name() const { return name_; }
   string_view linkage_name() const { return linkage_name_; }
   string_view location_string() const { return location_string_; }
   uint64_t low_pc() const { return low_pc_; }
   uint64_t high_pc() const { return high_pc_; }
   uint64_t location_uint64() const { return location_uint64_; }
   uint64_t stmt_list() const { return stmt_list_; }
   uint64_t ranges() const { return ranges_; }
   uint64_t start_scope() const { return start_scope_; }

   void set_name(string_view val) {
     has_name_ = true;
     name_ = val;
   }
   void set_linkage_name(string_view val) {
     has_linkage_name_ = true;
     location_string_ = val;
   }
   void set_location_string(string_view val) {
     has_location_string_ = true;
     location_string_ = val;
   }
   void set_low_pc(uint64_t val) {
     has_low_pc_ = true;
     low_pc_ = val;
   }
   void set_high_pc(uint64_t val) {
     has_high_pc_ = true;
     high_pc_ = val;
   }
   void set_location_uint64(uint64_t val) {
     has_location_uint64_ = true;
     location_uint64_ = val;
   }
   void set_stmt_list(uint64_t val) {
     has_stmt_list_ = true;
     stmt_list_ = val;
   }
   void set_ranges(uint64_t val) {
     has_ranges_ = true;
     ranges_ = val;
   }
   void set_start_scope(uint64_t val) {
     has_start_scope_ = true;
     start_scope_ = val;
   }

  private:
   bool has_name_ = false;
   bool has_linkage_name_ = false;
   bool has_location_string_ = false;
   bool has_low_pc_ = false;
   bool has_high_pc_ = false;
   bool has_location_uint64_ = false;
   bool has_stmt_list_ = false;
   bool has_ranges_ = false;
   bool has_start_scope_ = false;

   string_view name_;
   string_view linkage_name_;
   string_view location_string_;
   uint64_t low_pc_ = 0;
   uint64_t high_pc_ = 0;
   uint64_t location_uint64_ = 0;
   uint64_t stmt_list_ = 0;
   uint64_t ranges_ = 0;
   uint64_t start_scope_ = 0;
 };

 class InlinesDIE {
  public:
   bool has_stmt_list() const { return has_stmt_list_; }

   uint64_t stmt_list() const { return stmt_list_; }

   void set_stmt_list(uint64_t val) {
     has_stmt_list_ = true;
     stmt_list_ = val;
   }

  private:
   bool has_stmt_list_ = false;
   uint64_t stmt_list_ = 0;
 };

 void AddDIE(const dwarf::File& file, const std::string& name,
             const GeneralDIE& die, const SymbolTable& symtab,
             const DualMap& symbol_map, const dwarf::CompilationUnitSizes& sizes,
             RangeSink* sink) {
   // Some DIEs mark address ranges with high_pc/low_pc pairs (especially
   // functions).
   if (die.has_low_pc() && die.has_high_pc() && die.low_pc() != 0) {
     uint64_t high_pc = die.high_pc();

     // It appears that some compilers make high_pc a size, and others make it an
     // address.
     if (high_pc >= die.low_pc()) {
       high_pc -= die.low_pc();
     }
     sink->AddVMRangeIgnoreDuplicate("dwarf_pcpair", die.low_pc(), high_pc,
                                     name);
   }

   // Sometimes a DIE has a linkage_name, which we can look up in the symbol
   // table.
   if (die.has_linkage_name()) {
     auto it = symtab.find(die.linkage_name());
     if (it != symtab.end()) {
       sink->AddVMRangeIgnoreDuplicate("dwarf_linkagename", it->second.first,
                                       it->second.second, name);
     }
   }

   // Sometimes the DIE has a "location", which gives the location as an address.
   // This parses a very small subset of the overall DWARF expression grammar.
   if (die.has_location_string()) {
     string_view location = die.location_string();
     if (location.size() == sizes.address_size() + 1 &&
         location[0] == DW_OP_addr) {
       location.remove_prefix(1);
       uint64_t addr;
       // TODO(haberman): endian?
       if (sizes.address_size() == 4) {
         addr = dwarf::ReadMemcpy<uint32_t>(&location);
       } else if (sizes.address_size() == 8) {
         addr = dwarf::ReadMemcpy<uint64_t>(&location);
       } else {
         BLOATY_UNREACHABLE();
       }

       // Unfortunately the location doesn't include a size, so we look that part
       // up in the symbol map.
       uint64_t size;
       if (symbol_map.vm_map.TryGetSize(addr, &size)) {
         sink->AddVMRangeIgnoreDuplicate("dwarf_location", addr, size, name);
       } else {
         if (verbose_level > 0) {
           fprintf(stderr,
                   "bloaty: warning: couldn't find DWARF location in symbol "
                   "table, address: %" PRIx64 "\n",
                   addr);
         }
       }
     }
   }

   // Sometimes a location is given as an offset into debug_loc.
   if (die.has_location_uint64()) {
     if (die.location_uint64() < file.debug_loc.size()) {
       absl::string_view loc_range = file.debug_loc.substr(die.location_uint64());
       loc_range = GetLocationListRange(sizes, loc_range);
       sink->AddFileRange("dwarf_locrange", name, loc_range);
     } else if (verbose_level > 0) {
       fprintf(stderr,
               "bloaty: warning: DWARF location out of range, location=%" PRIx64
               "\n",
               die.location_uint64());
     }
   }

   uint64_t ranges_offset = UINT64_MAX;

   // There are two different attributes that sometimes contain an offset into
   // debug_ranges.
   if (die.has_ranges()) {
     ranges_offset = die.ranges();
   } else if (die.has_start_scope()) {
     ranges_offset = die.start_scope();
   }

   if (ranges_offset != UINT64_MAX) {
     if (ranges_offset < file.debug_ranges.size()) {
       absl::string_view ranges_range = file.debug_ranges.substr(ranges_offset);
       ranges_range = GetRangeListRange(sizes, ranges_range);
       sink->AddFileRange("dwarf_debugrange", name, ranges_range);
     } else if (verbose_level > 0) {
       fprintf(stderr,
               "bloaty: warning: DWARF debug range out of range, "
               "ranges_offset=%" PRIx64 "\n",
               ranges_offset);
     }
   }
 }

 static void ReadDWARFPubNames(const dwarf::File& file, string_view section,
                               RangeSink* sink) {
   dwarf::DIEReader die_reader(file);
   dwarf::AttrReader<string_view> attr_reader;
   string_view remaining = section;

   attr_reader.OnAttribute(
       DW_AT_name, [](string_view* s, dwarf::AttrValue data) {
         if (data.type() == dwarf::AttrValue::Type::kString) {
           *s = data.GetString();
         }
       });

   while (remaining.size() > 0) {
     dwarf::CompilationUnitSizes sizes;
     string_view full_unit = remaining;
     string_view unit = sizes.ReadInitialLength(&remaining);
     full_unit =
         full_unit.substr(0, unit.size() + (unit.data() - full_unit.data()));
     sizes.ReadDWARFVersion(&unit);
     uint64_t debug_info_offset = sizes.ReadDWARFOffset(&unit);
     bool ok = die_reader.SeekToCompilationUnit(
         dwarf::DIEReader::Section::kDebugInfo, debug_info_offset);
     if (!ok) {
       THROW("Couldn't seek to debug_info section");
     }
     string_view compileunit_name;
     attr_reader.ReadAttributes(&die_reader, &compileunit_name);
     if (!compileunit_name.empty()) {
       sink->AddFileRange("dwarf_pubnames", compileunit_name, full_unit);
     }
   }
 }

 uint64_t ReadEncodedPointer(uint8_t encoding, bool is_64bit, string_view* data,
                             const char* data_base, RangeSink* sink) {
   uint64_t value;
   const char* ptr = data->data();
   uint8_t format = encoding & DW_EH_PE_FORMAT_MASK;

   switch (format) {
     case DW_EH_PE_omit:
       return 0;
     case DW_EH_PE_absptr:
       if (is_64bit) {
         value = dwarf::ReadMemcpy<uint64_t>(data);
       } else {
         value = dwarf::ReadMemcpy<uint32_t>(data);
       }
       break;
     case DW_EH_PE_uleb128:
       value = dwarf::ReadLEB128<uint64_t>(data);
       break;
     case DW_EH_PE_udata2:
       value = dwarf::ReadMemcpy<uint16_t>(data);
       break;
     case DW_EH_PE_udata4:
       value = dwarf::ReadMemcpy<uint32_t>(data);
       break;
     case DW_EH_PE_udata8:
       value = dwarf::ReadMemcpy<uint64_t>(data);
       break;
     case DW_EH_PE_sleb128:
       value = dwarf::ReadLEB128<int64_t>(data);
       break;
     case DW_EH_PE_sdata2:
       value = dwarf::ReadMemcpy<int16_t>(data);
       break;
     case DW_EH_PE_sdata4:
       value = dwarf::ReadMemcpy<int32_t>(data);
       break;
     case DW_EH_PE_sdata8:
       value = dwarf::ReadMemcpy<int64_t>(data);
       break;
     default:
       THROWF("Unexpected eh_frame format value: $0", format);
   }

   uint8_t application = encoding & DW_EH_PE_APPLICATION_MASK;

   switch (application) {
     case 0:
       break;
     case DW_EH_PE_pcrel:
       value += sink->TranslateFileToVM(ptr);
       break;
     case DW_EH_PE_datarel:
       if (data_base == nullptr) {
         THROW("datarel requested but no data_base provided");
       }
       value += sink->TranslateFileToVM(data_base);
       break;
     case DW_EH_PE_textrel:
     case DW_EH_PE_funcrel:
     case DW_EH_PE_aligned:
       THROWF("Unimplemented eh_frame application value: $0", application);
   }

   if (encoding & DW_EH_PE_indirect) {
     string_view location = sink->TranslateVMToFile(value);
     if (is_64bit) {
       value = dwarf::ReadMemcpy<uint64_t>(&location);
     } else {
       value = dwarf::ReadMemcpy<uint32_t>(&location);
     }
   }

   return value;
 }

 // Code to read the .eh_frame section.  This is not technically DWARF, but it
 // is similar to .debug_frame (which is DWARF) so it's convenient to put it
 // here.
 //
 // The best documentation I can find for this format comes from:
 //
 // *
 // http://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
 // * https://www.airs.com/blog/archives/460
 //
 // However these are both under-specified.  Some details are not mentioned in
 // either of these (for example, the fact that the function length uses the FDE
 // encoding, but always absolute).  libdwarf's implementation contains a comment
 // saying "It is not clear if this is entirely correct".  Basically the only
 // thing you can trust for some of these details is the code that actually
 // implements unwinding in production:
 //
 // * libunwind http://www.nongnu.org/libunwind/
 //   https://github.com/pathscale/libunwind/blob/master/src/dwarf/Gfde.c
 // * LLVM libunwind (a different project!!)
 //   https://github.com/llvm-mirror/libunwind/blob/master/src/DwarfParser.hpp
 // * libgcc
 //   https://github.com/gcc-mirror/gcc/blob/master/libgcc/unwind-dw2-fde.c
 void ReadEhFrame(string_view data, RangeSink* sink) {
   string_view remaining = data;

   struct CIEInfo {
     int version = 0;
     uint32_t code_align = 0;
     int32_t data_align = 0;
     uint8_t fde_encoding = 0;
     uint8_t lsda_encoding = 0;
     bool is_signal_handler = false;
     bool has_augmentation_length = false;
     uint64_t personality_function = 0;
     uint32_t return_address_reg = 0;
   };

   std::unordered_map<const void*, CIEInfo> cie_map;

   while (remaining.size() > 0) {
     dwarf::CompilationUnitSizes sizes;
     string_view full_entry = remaining;
     string_view entry = sizes.ReadInitialLength(&remaining);
     if (entry.size() == 0 && remaining.size() == 0) {
       return;
     }
     full_entry =
         full_entry.substr(0, entry.size() + (entry.data() - full_entry.data()));
     uint32_t id = dwarf::ReadMemcpy<uint32_t>(&entry);
     if (id == 0) {
       // CIE, we don't attribute this yet.
       CIEInfo& cie_info = cie_map[full_entry.data()];
       cie_info.version = dwarf::ReadMemcpy<uint8_t>(&entry);
       string_view aug_string = dwarf::ReadNullTerminated(&entry);
       cie_info.code_align = dwarf::ReadLEB128<uint32_t>(&entry);
       cie_info.data_align = dwarf::ReadLEB128<int32_t>(&entry);
       switch (cie_info.version) {
         case 1:
           cie_info.return_address_reg = dwarf::ReadMemcpy<uint8_t>(&entry);
           break;
         case 3:
           cie_info.return_address_reg = dwarf::ReadLEB128<uint32_t>(&entry);
           break;
         default:
           THROW("Unexpected eh_frame CIE version");
       }
       while (aug_string.size() > 0) {
         switch (aug_string[0]) {
           case 'z':
             // Length until the end of augmentation data.
             cie_info.has_augmentation_length = true;
             dwarf::ReadLEB128<uint32_t>(&entry);
             break;
           case 'L':
             cie_info.lsda_encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
             break;
           case 'R':
             cie_info.fde_encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
             break;
           case 'S':
             cie_info.is_signal_handler = true;
             break;
           case 'P': {
             uint8_t encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
             cie_info.personality_function =
                 ReadEncodedPointer(encoding, true, &entry, nullptr, sink);
             break;
           }
           default:
             THROW("Unexepcted augmentation character");
         }
         aug_string.remove_prefix(1);
       }
     } else {
       auto iter = cie_map.find(entry.data() - id - 4);
       if (iter == cie_map.end()) {
         THROW("Couldn't find CIE for FDE");
       }
       const CIEInfo& cie_info = iter->second;
       // TODO(haberman): don't hard-code 64-bit.
       uint64_t address = ReadEncodedPointer(cie_info.fde_encoding, true, &entry,
                                             nullptr, sink);
       // TODO(haberman); Technically the FDE addresses could span a
       // function/compilation unit?  They can certainly span inlines.
       /*
       uint64_t length =
         ReadEncodedPointer(cie_info.fde_encoding & 0xf, true, &entry, sink);
       (void)length;

       if (cie_info.has_augmentation_length) {
         uint32_t augmentation_length = dwarf::ReadLEB128<uint32_t>(&entry);
         (void)augmentation_length;
       }

       uint64_t lsda =
           ReadEncodedPointer(cie_info.lsda_encoding, true, &entry, sink);
       if (lsda) {
       }
       */

       sink->AddFileRangeForVMAddr("dwarf_fde", address, full_entry);
     }
   }
 }

 // See documentation here:
 //   http://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html#EHFRAME
 void ReadEhFrameHdr(string_view data, RangeSink* sink) {
   const char* base = data.data();
   uint8_t version = dwarf::ReadMemcpy<uint8_t>(&data);
   uint8_t eh_frame_ptr_enc = dwarf::ReadMemcpy<uint8_t>(&data);
   uint8_t fde_count_enc = dwarf::ReadMemcpy<uint8_t>(&data);
   uint8_t table_enc = dwarf::ReadMemcpy<uint8_t>(&data);

   if (version != 1) {
     THROWF("Unknown eh_frame_hdr version: $0", version);
   }

   // TODO(haberman): don't hard-code 64-bit.
   uint64_t eh_frame_ptr =
       ReadEncodedPointer(eh_frame_ptr_enc, true, &data, base, sink);
   (void)eh_frame_ptr;
   uint64_t fde_count =
       ReadEncodedPointer(fde_count_enc, true, &data, base, sink);

   for (uint64_t i = 0; i < fde_count; i++) {
     string_view entry_data = data;
     uint64_t initial_location =
         ReadEncodedPointer(table_enc, true, &data, base, sink);
     uint64_t fde_addr = ReadEncodedPointer(table_enc, true, &data, base, sink);
     entry_data.remove_suffix(data.size());
     sink->AddFileRangeForVMAddr("dwarf_fde_table", initial_location,
                                 entry_data);

     // We could add fde_addr with an unknown length if we wanted to skip reading
     // eh_frame.  We can't count on this table being available though, so we
     // don't want to remove the eh_frame reading code altogether.
     (void)fde_addr;
   }
 }

 static void ReadDWARFStmtListRange(const dwarf::File& file, uint64_t offset,
                                    string_view unit_name, RangeSink* sink) {
   string_view data = file.debug_line;
   dwarf::SkipBytes(offset, &data);
   string_view data_with_length = data;
   dwarf::CompilationUnitSizes sizes;
   data = sizes.ReadInitialLength(&data);
   data = data_with_length.substr(
       0, data.size() + (data.data() - data_with_length.data()));
   sink->AddFileRange("dwarf_stmtlistrange", unit_name, data);
 }

 // The DWARF debug info can help us get compileunits info.  DIEs for compilation
 // units, functions, and global variables often have attributes that will
 // resolve to addresses.
 static void ReadDWARFDebugInfo(
     const dwarf::File& file, dwarf::DIEReader::Section section,
     const SymbolTable& symtab, const DualMap& symbol_map, RangeSink* sink,
     std::unordered_map<uint64_t, std::string>* stmt_list_map) {
   dwarf::DIEReader die_reader(file);
   die_reader.set_strp_sink(sink);
   dwarf::AttrReader<GeneralDIE> attr_reader;

   attr_reader.OnAttribute(DW_AT_name,
                           [](GeneralDIE* die, dwarf::AttrValue val) {
                             if (!val.IsString()) return;
                             die->set_name(val.GetString());
                           });
   attr_reader.OnAttribute(DW_AT_linkage_name,
                           [](GeneralDIE* die, dwarf::AttrValue val) {
                             if (!val.IsString()) return;
                             die->set_linkage_name(val.GetString());
                           });
   attr_reader.OnAttribute(DW_AT_location,
                           [](GeneralDIE* die, dwarf::AttrValue val) {
                             if (val.IsString()) {
                               die->set_location_string(val.GetString());
                             } else {
                               die->set_location_uint64(val.GetUint());
                             }
                           });
   attr_reader.OnAttribute(DW_AT_low_pc,
                           [](GeneralDIE* die, dwarf::AttrValue val) {
                             absl::optional<uint64_t> uint = val.ToUint();
                             if (!uint.has_value()) return;
                             die->set_low_pc(uint.value());
                           });
   attr_reader.OnAttribute(DW_AT_high_pc,
                           [](GeneralDIE* die, dwarf::AttrValue val) {
                             absl::optional<uint64_t> uint = val.ToUint();
                             if (!uint.has_value()) return;
                             die->set_high_pc(uint.value());
                           });
   attr_reader.OnAttribute(DW_AT_stmt_list,
                           [](GeneralDIE* die, dwarf::AttrValue val) {
                             absl::optional<uint64_t> uint = val.ToUint();
                             if (!uint.has_value()) return;
                             die->set_stmt_list(uint.value());
                           });
   attr_reader.OnAttribute(DW_AT_ranges,
                           [](GeneralDIE* die, dwarf::AttrValue val) {
                             absl::optional<uint64_t> uint = val.ToUint();
                             if (!uint.has_value()) return;
                             die->set_ranges(uint.value());
                           });
   attr_reader.OnAttribute(DW_AT_start_scope,
                           [](GeneralDIE* die, dwarf::AttrValue val) {
                             absl::optional<uint64_t> uint = val.ToUint();
                             if (!uint.has_value()) return;
                             die->set_start_scope(uint.value());
                           });

   if (!die_reader.SeekToStart(section)) {
     return;
   }

   do {
     GeneralDIE compileunit_die;
     attr_reader.ReadAttributes(&die_reader, &compileunit_die);
     std::string compileunit_name = std::string(compileunit_die.name());

     if (compileunit_die.has_stmt_list()) {
       uint64_t stmt_list = compileunit_die.stmt_list();
       if (compileunit_name.empty()) {
         auto iter = stmt_list_map->find(stmt_list);
         if (iter != stmt_list_map->end()) {
           compileunit_name = iter->second;
         }
       } else {
         (*stmt_list_map)[stmt_list] = compileunit_name;
       }
     }

     if (compileunit_name.empty()) {
       continue;
     }

     die_reader.set_compileunit_name(compileunit_name);
     sink->AddFileRange("dwarf_debuginfo", compileunit_name,
                        die_reader.unit_range());
     AddDIE(file, compileunit_name, compileunit_die, symtab, symbol_map,
            die_reader.unit_sizes(), sink);

     if (compileunit_die.has_stmt_list()) {
       uint64_t offset = compileunit_die.stmt_list();
       ReadDWARFStmtListRange(file, offset, compileunit_name, sink);
     }

     string_view abbrev_data = file.debug_abbrev;
     dwarf::SkipBytes(die_reader.debug_abbrev_offset(), &abbrev_data);
     dwarf::AbbrevTable unit_abbrev;
     abbrev_data = unit_abbrev.ReadAbbrevs(abbrev_data);
     sink->AddFileRange("dwarf_abbrev", compileunit_name, abbrev_data);

     while (die_reader.NextDIE()) {
       GeneralDIE die;
       attr_reader.ReadAttributes(&die_reader, &die);

       // low_pc == 0 is a signal that this routine was stripped out of the
       // final binary.  Skip this DIE and all of its children.
       if (die.has_low_pc() && die.low_pc() == 0) {
         die_reader.SkipChildren();
       } else {
         AddDIE(file, compileunit_name, die, symtab, symbol_map,
                die_reader.unit_sizes(), sink);
       }
     }
   } while (die_reader.NextCompilationUnit());
 }

 void ReadDWARFCompileUnits(const dwarf::File& file, const SymbolTable& symtab,
                            const DualMap& symbol_map, RangeSink* sink) {
   if (!file.debug_info.size()) {
     THROW("missing debug info");
   }

   if (file.debug_aranges.size()) {
     ReadDWARFAddressRanges(file, sink);
   }

   std::unordered_map<uint64_t, std::string> stmt_list_map;
   ReadDWARFDebugInfo(file, dwarf::DIEReader::Section::kDebugInfo, symtab,
                      symbol_map, sink, &stmt_list_map);
   ReadDWARFDebugInfo(file, dwarf::DIEReader::Section::kDebugTypes, symtab,
                      symbol_map, sink, &stmt_list_map);
   ReadDWARFPubNames(file, file.debug_pubnames, sink);
   ReadDWARFPubNames(file, file.debug_pubtypes, sink);
 }

 static std::string LineInfoKey(const std::string& file, uint32_t line,
                                bool include_line) {
   if (include_line) {
     return file + ":" + std::to_string(line);
   } else {
     return file;
   }
 }

 static void ReadDWARFStmtList(bool include_line,
                               dwarf::LineInfoReader* line_info_reader,
                               RangeSink* sink) {
   uint64_t span_startaddr = 0;
   std::string last_source;

   while (line_info_reader->ReadLineInfo()) {
     const auto& line_info = line_info_reader->lineinfo();
     auto addr = line_info.address;
     auto number = line_info.line;
     auto name =
         line_info.end_sequence
             ? last_source
             : LineInfoKey(line_info_reader->GetExpandedFilename(line_info.file),
                           number, include_line);
     if (!span_startaddr) {
       span_startaddr = addr;
     } else if (line_info.end_sequence ||
                (!last_source.empty() && name != last_source)) {
       sink->AddVMRange("dwarf_stmtlist", span_startaddr, addr - span_startaddr,
                        last_source);
       if (line_info.end_sequence) {
         span_startaddr = 0;
       } else {
         span_startaddr = addr;
       }
     }
     last_source = name;
   }
 }

 void ReadDWARFInlines(const dwarf::File& file, RangeSink* sink,
                       bool include_line) {
   if (!file.debug_info.size() || !file.debug_line.size()) {
     THROW("no debug info");
   }

   dwarf::DIEReader die_reader(file);
   dwarf::LineInfoReader line_info_reader(file);
   dwarf::AttrReader<InlinesDIE> attr_reader;

   attr_reader.OnAttribute(
       DW_AT_stmt_list, [](InlinesDIE* die, dwarf::AttrValue data) {
         absl::optional<uint64_t> uint = data.ToUint();
         if (!uint.has_value()) return;
         die->set_stmt_list(uint.value());
       });

   if (!die_reader.SeekToStart(dwarf::DIEReader::Section::kDebugInfo)) {
     THROW("debug info is present, but empty");
   }

   while (true) {
     InlinesDIE die;
     attr_reader.ReadAttributes(&die_reader, &die);

     if (die.has_stmt_list()) {
       uint64_t offset = die.stmt_list();
       line_info_reader.SeekToOffset(offset,
                                     die_reader.unit_sizes().address_size());
       ReadDWARFStmtList(include_line, &line_info_reader, sink);
     }

     if (!die_reader.NextCompilationUnit()) {
       return;
     }
   }
 }

 }  // namespace bloaty