| // Copyright 2019 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "src/developer/debug/zxdb/symbols/index.h" |
| |
| #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
| #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" |
| #include "llvm/DebugInfo/DWARF/DWARFUnit.h" |
| #include "src/developer/debug/zxdb/common/adapters.h" |
| #include "src/developer/debug/zxdb/common/file_util.h" |
| #include "src/developer/debug/zxdb/common/string_util.h" |
| #include "src/developer/debug/zxdb/symbols/dwarf_die_decoder.h" |
| #include "src/developer/debug/zxdb/symbols/dwarf_die_scanner.h" |
| #include "src/developer/debug/zxdb/symbols/dwarf_tag.h" |
| |
| namespace zxdb { |
| |
| namespace { |
| |
| // Don't index more than this number of levels to prevent infinite recursion. |
| constexpr size_t kMaxParentPath = 16; |
| |
| // Stores a name with a SymbolRef for later indexing. |
| class NamedSymbolRef : public IndexNode::SymbolRef { |
| public: |
| NamedSymbolRef() = default; |
| |
| // Creates a SymbolRef we should index. The pointed-to string must outlive this class. |
| NamedSymbolRef(SymbolRef::Kind kind, uint64_t offset, IndexNode::Kind k, const char* name, |
| uint64_t decl_offset, bool has_abstract_origin) |
| : SymbolRef(kind, offset), |
| kind_(k), |
| name_(name), |
| decl_offset_(decl_offset), |
| has_abstract_origin_(has_abstract_origin) {} |
| |
| bool should_index() const { return kind_ != IndexNode::Kind::kNone; } |
| |
| IndexNode::Kind kind() const { return kind_; } |
| |
| // The name associated with the DIE. Could be null. |
| // |
| // It's also possible for this to be valid for an otherwise !should_index() SymbolRef. In the case |
| // of a function with a specification, the implementation will have should_index set, but we'll |
| // traverse the specification to fill in the name. This will generate a valid but not indexable |
| // item for the specification. |
| const char* name() const { return name_; } |
| void set_name(const char* n) { name_ = n; } |
| |
| // If this DIE has a declaration associated with it (a DW_AT_declaration tag), this indicates the |
| // absolute offset of the declaration DIE. Will be 0 if none. It may or may not be inside the |
| // current unit (it normally will be though). |
| uint64_t decl_offset() const { return decl_offset_; } |
| |
| // The indexing layer uses this to cache the node found for a given thing. This allows us to |
| // bypass lookup for the common case of things that are all in the same scope. |
| IndexNode* index_node() const { return index_node_; } |
| void set_index_node(IndexNode* n) { index_node_ = n; } |
| |
| // Sometimes we need to know whether an abstract origin is present for parent computations. |
| // |
| // When walking the dependency path, the abstract origin (if any) encodes the lexical scope. |
| // As an example, DW_TAG_inlined_subroutine DIEs are inside of the function they're inlined into |
| // (the calling function will be die.getParent()). These will then have an abstract origin of a |
| // DIE outside of the function containing the common info for all inlined instances. |
| // |
| // When there's no separate declaration, this abstract origin will be the scope that the function |
| // was declared in where we index from. For: |
| // |
| // namespace ns { |
| // inline void InlinedFunction() { ... } |
| // } |
| // void CallingFunction() { |
| // ns::InlinedFunction(); |
| // } |
| // |
| // The DWARF would look like: |
| // |
| // 1: DW_TAG_namespace (name = "ns") |
| // 2: DW_TAG_subprogram (name = "InlinedFunction") |
| // 2: DW_TAG_subprogram (name = "CallingFunction") |
| // 3: DW_TAG_inlined_subroutine (abstract_origin = 2) |
| // |
| // When there is a declaration, that declaration will encode the scope: |
| // |
| // 1: DW_TAG_namespace (name = "ns") |
| // 2: DW_TAG_subprogram (name = "InlinedFunction", is_declaration = true) |
| // 3: DW_TAG_subprogram (declaration = 2) |
| // 2: DW_TAG_subprogram (name = "CallingFunction") |
| // 3: DW_TAG_inlined_subroutine (abstract_origin = 3) |
| bool has_abstract_origin() const { return has_abstract_origin_; } |
| void set_has_abstract_origin(bool b) { has_abstract_origin_ = b; } |
| |
| private: |
| IndexNode::Kind kind_ = IndexNode::Kind::kNone; |
| const char* name_ = nullptr; |
| uint64_t decl_offset_ = 0; |
| IndexNode* index_node_ = nullptr; |
| bool has_abstract_origin_ = false; |
| }; |
| |
| // Returns true if the given abbreviation defines a PC range. |
| bool AbbrevHasCode(const llvm::DWARFAbbreviationDeclaration* abbrev) { |
| for (const auto spec : abbrev->attributes()) { |
| if (spec.Attr == llvm::dwarf::DW_AT_low_pc || spec.Attr == llvm::dwarf::DW_AT_high_pc || |
| spec.Attr == llvm::dwarf::DW_AT_ranges) |
| return true; |
| } |
| return false; |
| } |
| |
| // Returns true if the given abbreviation defines a "location". |
| bool AbbrevHasLocation(const llvm::DWARFAbbreviationDeclaration* abbrev) { |
| for (const auto spec : abbrev->attributes()) { |
| if (spec.Attr == llvm::dwarf::DW_AT_location) |
| return true; |
| } |
| return false; |
| } |
| |
| size_t RecursiveCountDies(const IndexNode& node) { |
| size_t result = node.dies().size(); |
| |
| for (int i = 0; i < static_cast<int>(IndexNode::Kind::kEndPhysical); i++) { |
| for (const auto& pair : node.MapForKind(static_cast<IndexNode::Kind>(i))) |
| result += RecursiveCountDies(pair.second); |
| } |
| return result; |
| } |
| |
| // This helper class is used to index the symbols of one unit. It keeps some state to avoid |
| // reallocating for each call. |
| // |
| // Indexing is two passes. In the first pass we scan the DIEs in the unit. We identify which ones |
| // will need indexing and save information on the nesting. The parent chain information is important |
| // because we need to go from a DIE to its parent chain. |
| // |
| // In the second pass we actually index the items identified, using the saved parent and name |
| // information from the scan pass. |
| // |
| // In the second pass we can encounter some DIEs in the hierarchy chain that were not decoded in |
| // the first pass. An example is when going to a function declaration. We only identify the |
| // implementations in the first pass, but need to take the name from the declaration. These missing |
| // ones are filled in by FillDieInfo(). |
| class UnitIndexer { |
| public: |
| // All passed-in objects must outlive this class. |
| explicit UnitIndexer(llvm::DWARFContext* context, llvm::DWARFUnit* unit) |
| : context_(context), unit_(unit), scanner_(unit), name_decoder_(context) { |
| // The indexable array is 1:1 with the scanner entries. |
| indexable_.resize(scanner_.die_count()); |
| |
| path_.reserve(8); // Don't want to reallocate. |
| |
| // Set up the name decoder to extract to this local. |
| name_decoder_.AddCString(llvm::dwarf::DW_AT_name, &name_decoder_name_); |
| } |
| |
| // Forces indexing to go through the slow path (AddStandaloneEntryToIndex() instead of |
| // AddEntryToIndex()) which can handle cross-unit references. This allows us to test the slow path |
| // on the same data as the fast path and make sure they match. |
| void set_force_slow_path(bool force) { force_slow_path_ = force; } |
| |
| // To use, first call Scan() to populate the indexable_ array, then call Index() to add the |
| // items to the given index node root. The Scan pass will additionally add any entrypoint |
| // functions it finds to the main_functions vector. |
| void Scan(std::vector<IndexNode::SymbolRef>* main_functions); |
| void Index(IndexNode* root); |
| |
| private: |
| // Returns kNone for non-indexable items. |
| // |
| // The kVar case is also returned for collection members. These need to be treated as variables |
| // when they have const data, but not otherwise, and this function does not decode the attributes. |
| IndexNode::Kind GetKindForDie(const llvm::DWARFDebugInfoEntry* die) const; |
| |
| // Computes in the name and type for a DIE entry that wasn't filled in in the first pass (see |
| // class-level comment). Returns empty string if there is no name (this is important for the |
| // caller, see that code for more). |
| // |
| // This requires that the DIE be in the current unit_ (the decoder references the unit). |
| const char* GetDieName(uint32_t index); |
| |
| // Adds the entry at the given index. If the entry is not in the current compilation unit, this |
| // will fall back to the slow path: AddStandaloneEntryToIndex(). |
| void AddEntryToIndex(uint32_t index_me, IndexNode* root); |
| |
| // Slow path for adding an entry. |
| // |
| // This takes the index of a SymbolRef we want to index and adds it to the index without using |
| // anything that references the compilation unit, notably the scanner_ which computes parent |
| // information. |
| // |
| // This is used for the uncommon case of cross-unit references, where the declaration might be |
| // in a different unit from the implementation. This means that the scanner's parent tree |
| // doesn't cover the object we want. This walks the tree using DWARFDie.getParent() which is |
| // conceptually simpler but requires a binary search at each step. |
| void AddStandaloneEntryToIndex(uint32_t index_me, IndexNode* root); |
| |
| // Given the index of a NamedSymbolRef known to have an abstract origin, fills in the index of the |
| // abstract origin to the given output variable if it exists in the same unit and returns true. |
| // |
| // If it doesn't exist or is in a different unit, returns false. Being in the same unit is |
| // required to stay in the index fast path. |
| bool GetAbstractOriginIndex(uint32_t source, uint32_t* abstract_origin_index) const; |
| |
| llvm::DWARFContext* context_; |
| llvm::DWARFUnit* unit_; |
| |
| bool force_slow_path_ = false; // See setter above. |
| |
| DwarfDieScanner2 scanner_; |
| std::vector<NamedSymbolRef> indexable_; |
| |
| // Variable used for collecting the path of parents in AddDIE. This would make more sense as a |
| // local variable but having it here prevents reallocating each time. |
| std::vector<NamedSymbolRef*> path_; |
| |
| // Used to decode names for DIEs in the second pass when we find one we need that wasn't extracted |
| // in the first. |
| DwarfDieDecoder name_decoder_; |
| llvm::Optional<const char*> name_decoder_name_; |
| }; |
| |
| // The symbol storage will be filled with the indexable entries. |
| void UnitIndexer::Scan(std::vector<IndexNode::SymbolRef>* main_functions) { |
| DwarfDieDecoder decoder(context_); |
| |
| // The offset of the declaration. This can be unit-relative or .debug_info-relative (global). |
| llvm::DWARFDie decl_die; |
| decoder.AddReference(llvm::dwarf::DW_AT_specification, &decl_die); |
| |
| llvm::Optional<bool> is_declaration; |
| decoder.AddBool(llvm::dwarf::DW_AT_declaration, &is_declaration); |
| |
| bool has_const_value = false; |
| decoder.AddPresenceCheck(llvm::dwarf::DW_AT_const_value, &has_const_value); |
| |
| llvm::Optional<bool> is_main_subprogram; |
| decoder.AddBool(llvm::dwarf::DW_AT_main_subprogram, &is_main_subprogram); |
| |
| llvm::Optional<const char*> name; |
| decoder.AddCString(llvm::dwarf::DW_AT_name, &name); |
| |
| bool has_abstract_origin = false; |
| decoder.AddPresenceCheck(llvm::dwarf::DW_AT_abstract_origin, &has_abstract_origin); |
| |
| // IF YOU ADD MORE ATTRIBUTES HERE don't forget to reset() them before Decode(). |
| |
| for (; !scanner_.done(); scanner_.Advance()) { |
| const llvm::DWARFDebugInfoEntry* die = scanner_.Prepare(); |
| |
| // Check whether we should consider this before decoding since decoding is slow. |
| IndexNode::Kind kind = GetKindForDie(die); |
| if (kind == IndexNode::Kind::kNone) |
| continue; |
| |
| // This DIE is of the type we want to index so decode. Must reset all output vars first. |
| is_declaration.reset(); |
| has_const_value = false; |
| decl_die = llvm::DWARFDie(); |
| is_main_subprogram.reset(); |
| name.reset(); |
| has_abstract_origin = false; |
| if (!decoder.Decode(llvm::DWARFDie(unit_, die))) |
| continue; |
| |
| // Compute the offset of a separate declaration if this DIE has one. |
| uint64_t decl_offset = 0; |
| if (decl_die) |
| decl_offset = decl_die.getOffset(); |
| |
| if (kind == IndexNode::Kind::kVar && die->getTag() == llvm::dwarf::DW_TAG_member && |
| !has_const_value) { |
| // Don't need to index structure members that don't have const values. This needs to be |
| // disambiguated because GetKindForDie doesn't have access to the attributes and we don't |
| // want to decode twice. |
| // |
| // In C++ everything with a const_value will generally also be external (i.e. "static") which |
| // are things we want to index. Theoretically the compiler could generated a const_value |
| // member if it notices the member is never modified and optimize it. In that case, the user |
| // would never expect to reference it outside of a known Collection object and it doesn't need |
| // to be in the index. But that requires some extra work checking for the external flag in |
| // this time-critical indexing step, and the worse thing is that "print MyClass::kMyConstant" |
| // evaluates to a correct value where it might not be allowed in the actual language. |
| // |
| // As a result, we don't also check DW_AT_external. |
| continue; |
| } |
| |
| FX_DCHECK(scanner_.die_index() < indexable_.size()); |
| auto ref_kind = is_declaration && *is_declaration ? IndexNode::SymbolRef::kDwarfDeclaration |
| : IndexNode::SymbolRef::kDwarf; |
| indexable_[scanner_.die_index()] = NamedSymbolRef( |
| ref_kind, die->getOffset(), kind, name ? *name : nullptr, decl_offset, has_abstract_origin); |
| |
| // Check for "main" function annotation. |
| if (kind == IndexNode::Kind::kFunction && is_main_subprogram && *is_main_subprogram) |
| main_functions->emplace_back(IndexNode::SymbolRef::kDwarf, die->getOffset()); |
| } |
| } |
| |
| void UnitIndexer::Index(IndexNode* root) { |
| if (force_slow_path_) { |
| // Index everything with the slow path for testing purposes. |
| for (uint32_t i = 0; i < indexable_.size(); i++) { |
| if (indexable_[i].should_index()) |
| AddStandaloneEntryToIndex(i, root); |
| } |
| } else { |
| // Normal fast-path. This is about 6x faster than the slow path for large programs. |
| for (uint32_t i = 0; i < indexable_.size(); i++) { |
| if (indexable_[i].should_index()) |
| AddEntryToIndex(i, root); |
| } |
| } |
| } |
| |
| IndexNode::Kind UnitIndexer::GetKindForDie(const llvm::DWARFDebugInfoEntry* die) const { |
| const llvm::DWARFAbbreviationDeclaration* abbrev = die->getAbbreviationDeclarationPtr(); |
| if (!abbrev) |
| return IndexNode::Kind::kNone; // Corrupt. |
| |
| switch (static_cast<DwarfTag>(abbrev->getTag())) { |
| case DwarfTag::kSubprogram: |
| case DwarfTag::kInlinedSubroutine: |
| if (AbbrevHasCode(abbrev)) |
| return IndexNode::Kind::kFunction; |
| return IndexNode::Kind::kNone; // Skip functions with no code. |
| |
| case DwarfTag::kNamespace: |
| return IndexNode::Kind::kNamespace; |
| |
| case DwarfTag::kBaseType: |
| case DwarfTag::kClassType: |
| case DwarfTag::kEnumerationType: |
| case DwarfTag::kPtrToMemberType: |
| case DwarfTag::kStringType: |
| case DwarfTag::kStructureType: |
| case DwarfTag::kSubroutineType: |
| case DwarfTag::kTypedef: |
| case DwarfTag::kUnionType: |
| return IndexNode::Kind::kType; |
| |
| case DwarfTag::kVariable: |
| if (!scanner_.is_inside_function() && AbbrevHasLocation(abbrev)) { |
| // Found variable storage outside of a function (variables inside functions are local so |
| // don't get added to the global index). |
| // TODO(bug 36671): index function-static variables. |
| return IndexNode::Kind::kVar; |
| } |
| return IndexNode::Kind::kNone; // Variable with no location. |
| |
| case DwarfTag::kMember: |
| // Caller needs to check this case (see declaration comment). |
| return IndexNode::Kind::kVar; |
| |
| default: |
| // Don't index anything else. |
| return IndexNode::Kind::kNone; |
| } |
| } |
| |
| const char* UnitIndexer::GetDieName(uint32_t index) { |
| name_decoder_name_.reset(); |
| if (name_decoder_.Decode(unit_->getDIEAtIndex(index)) && name_decoder_name_) |
| return *name_decoder_name_; |
| return ""; |
| } |
| |
| // NOTE: Changes in this function may require updates in the slow path: AddStandaloneEntryToIndex(). |
| void UnitIndexer::AddEntryToIndex(uint32_t index_me, IndexNode* root) { |
| // The path to index always ends with the last thing being indexed (the path_ is in reverse). |
| path_.clear(); |
| path_.push_back(&indexable_[index_me]); |
| |
| uint32_t cur = index_me; |
| if (indexable_[index_me].decl_offset()) { |
| // When the entry has a decl_offset, that means it's the implementation for e.g. a function. |
| // The actual name comes from the declaration so start from that index. |
| // |
| // 99% of all declarations are within the same unit so look up in the current unit first. If |
| // the current unit doesn't cover the offset, getDIEForOffset will return a null DIE. |
| llvm::DWARFDie die = unit_->getDIEForOffset(indexable_[index_me].decl_offset()); |
| if (!die) { |
| // DIE not found in this unit, try adding it to the index using the slow path which allows |
| // cross-unit references. |
| AddStandaloneEntryToIndex(index_me, root); |
| return; |
| } |
| cur = unit_->getDIEIndex(die); |
| |
| if (!indexable_[index_me].name()) { |
| // When there's no name, take the name from the declaration. |
| if (!indexable_[cur].name()) { |
| // The declaration has no name because the first pass didn't need to index it. Compute |
| // the name now. Caching it on both the declaration and the implementation is useful because |
| // many implementation can share the same declaration and this saves multiple name |
| // retrievals. |
| // |
| // Here GetDieName() returns the empty string if there's no name which allows us to cache |
| // the lack of a name and not recompute. |
| indexable_[cur].set_name(GetDieName(cur)); |
| } |
| indexable_[index_me].set_name(indexable_[cur].name()); |
| } |
| } |
| |
| // If at this point we still don't have a name for the thing being indexed, give up trying to |
| // index it. |
| if (!indexable_[cur].name() || !indexable_[cur].name()[0]) |
| return; |
| |
| // Move to the abstract origin if present to start walking the scopes. The abstract origin (if |
| // any) encodes the lexical scope (see has_abstract_origin() declaration above). |
| if (indexable_[cur].has_abstract_origin()) { |
| if (!GetAbstractOriginIndex(cur, &cur)) { |
| // Fall back to the slow path. This will be all error cases as well as when the abstract |
| // origin is in a different complication unit (I have not seen this in practice). |
| AddStandaloneEntryToIndex(index_me, root); |
| return; |
| } |
| } |
| |
| // Goes to the parent. The first item was added above, the loop below will add going up the |
| // parent chain from there. |
| cur = scanner_.GetParentIndex(cur); |
| |
| // Start indexing from here. We may find a cached one that will prevent us from having to |
| // go to the root. |
| IndexNode* index_from = root; |
| |
| // Collect the path from the current item (path_[0]) to its ultimate parent (path_.back()). |
| while (cur != DwarfDieScanner2::kNoParent && indexable_[cur].should_index()) { |
| if (path_.size() > kMaxParentPath) |
| return; // Too many components, consider this item corrupt and don't index. |
| |
| if (indexable_[cur].index_node()) { |
| // Already indexed this node (for example, this is a namespace that was already referenced). |
| // so we can start inserting the path from this node. |
| index_from = indexable_[cur].index_node(); |
| break; |
| } |
| path_.push_back(&indexable_[cur]); |
| cur = scanner_.GetParentIndex(cur); |
| } |
| |
| // Add the path to the index (walk in reverse to start from the root). |
| for (int path_i = static_cast<int>(path_.size()) - 1; path_i >= 0; path_i--) { |
| NamedSymbolRef* named_ref = path_[path_i]; |
| const char* name = named_ref->name() ? named_ref->name() : ""; |
| |
| // Only save the DIE reference for the thing we're attempting to index (the leaf node at |
| // path_[0]). Intermediate things like the namespaces and classes along the path don't need DIE |
| // references unless Scan() independently decided those need indexing (should_index()). Not only |
| // is adding these DIEs unnecessary, it can create unnamed type entries for things like |
| // anonymous enums which we don't want. |
| if (path_i == 0) |
| index_from = index_from->AddChild(named_ref->kind(), name, *named_ref); |
| else |
| index_from = index_from->AddChild(named_ref->kind(), name); |
| named_ref->set_index_node(index_from); |
| } |
| } |
| |
| // NOTE: Changes in this function may require updates in the fast path: AddEntryToIndex(). |
| void UnitIndexer::AddStandaloneEntryToIndex(uint32_t index_me, IndexNode* index_root) { |
| // This function can not use the unit_, scanner_, or name_decoder_ (and hence GetDieName()) |
| // because those all reference the current compilation unit. This code path must be able to handle |
| // cross-unit references. |
| |
| // Thing to add to the index. |
| const NamedSymbolRef& named_ref = indexable_[index_me]; |
| |
| // Compute the name (avoiding GetDieName()) and the DIE to start indexing from. |
| const char* name = named_ref.name(); |
| llvm::DWARFDie die; |
| if (named_ref.decl_offset()) { |
| // When there's a separate declaration, its parent encodes the scope information. |
| die = context_->getDIEForOffset(named_ref.decl_offset()); |
| if (!die) |
| return; // Invalid decl offset, skip indexing. |
| if (!name) { |
| // The declaration can fill in the name if the name is not present on the implementation |
| // (normally it's not there). |
| name = die.getName(llvm::DINameKind::ShortName); |
| } |
| } else { |
| // When there's no declaration, the name will already have been filled in (if present) to the |
| // named_ref. |
| die = context_->getDIEForOffset(named_ref.offset()); |
| } |
| |
| if (!name) |
| return; // This item has no name, can't index it. |
| |
| // When walking the dependency path, the abstract origin (if any) encodes the lexical scope |
| // (see has_abstract_origin() declaration above). |
| if (llvm::DWARFDie ao = die.getAttributeValueAsReferencedDie(llvm::dwarf::DW_AT_abstract_origin)) |
| die = ao; |
| |
| // Stores the reverse path to the node we're inserting. This includes all scopes like namespaces |
| // and classes in reverse order, but does not include the thing we're inserting itself. |
| // So when insertng std::vector::vector this will be { "vector" (type), "std" (namespace) }. |
| struct NameKind { |
| NameKind(const char* n, IndexNode::Kind k) : name(n), kind(k) {} |
| |
| const char* name = nullptr; |
| IndexNode::Kind kind = IndexNode::Kind::kNone; |
| }; |
| std::vector<NameKind> path; |
| |
| // Walk the path upward saving the path. Don't include the leaf DIE. |
| while ((die = die.getParent())) { |
| if (path.size() > kMaxParentPath) |
| return; // Too deep nesting, give up. |
| |
| auto kind = GetKindForDie(die.getDebugInfoEntry()); |
| if (kind == IndexNode::Kind::kNone) |
| break; // Hit the top of what we want to index (like the unit). |
| |
| path.emplace_back(die.getName(llvm::DINameKind::ShortName), kind); |
| } |
| |
| // Insert the containing elements (in reverse order to start from the top level and work inwards). |
| IndexNode* cur_index = index_root; |
| for (const auto& name_kind : Reversed(path)) |
| cur_index = cur_index->AddChild(name_kind.kind, name_kind.name ? name_kind.name : ""); |
| |
| // Add the leaf item (holding the DIE reference) to the index. |
| cur_index->AddChild(named_ref.kind(), name, named_ref); |
| } |
| |
| bool UnitIndexer::GetAbstractOriginIndex(uint32_t source, uint32_t* abstract_origin_index) const { |
| llvm::DWARFDie die = unit_->getDIEForOffset(indexable_[source].offset()); |
| if (!die) |
| return false; // Internal error, maybe symbols corrupt. |
| |
| llvm::DWARFDie ao = die.getAttributeValueAsReferencedDie(llvm::dwarf::DW_AT_abstract_origin); |
| if (!ao) |
| return false; // Symbols corrupt. |
| if (ao.getDwarfUnit() != unit_) |
| return false; // Different compilation unit. |
| |
| *abstract_origin_index = unit_->getDIEIndex(ao); |
| return true; |
| } |
| |
| void RecursiveFindExact(const IndexNode* node, const Identifier& input, size_t input_index, |
| std::vector<IndexNode::SymbolRef>* result) { |
| if (input_index == input.components().size()) { |
| result->insert(result->end(), node->dies().begin(), node->dies().end()); |
| return; |
| } |
| |
| // Recursively search each category in this node. |
| for (auto* map : {&node->namespaces(), &node->types(), &node->functions(), &node->vars()}) { |
| auto found = map->find(input.components()[input_index].GetName(false)); |
| if (found != map->end()) // Got a match for this category. |
| RecursiveFindExact(&found->second, input, input_index + 1, result); |
| } |
| |
| // Also implicitly search anonymous namespaces (without advancing the input index). |
| auto found = node->namespaces().find(std::string()); |
| if (found != node->namespaces().end()) |
| RecursiveFindExact(&found->second, input, input_index, result); |
| } |
| |
| } // namespace |
| |
| void Index::CreateIndex(llvm::object::ObjectFile* object_file, bool force_slow_path) { |
| std::unique_ptr<llvm::DWARFContext> context = llvm::DWARFContext::create(*object_file); |
| |
| // Extracts the units to a place where we can destroy them after indexing is complete. This |
| // construction order matches that of LLVM's DWARFContext so the indexes into this vector will |
| // match the indices into DWARFContext's. |
| llvm::DWARFUnitVector compile_units; |
| context->getDWARFObj().forEachInfoSections([&](const llvm::DWARFSection& s) { |
| compile_units.addUnitsForSection(*context, s, llvm::DW_SECT_INFO); |
| }); |
| |
| for (unsigned i = 0; i < compile_units.size(); i++) |
| IndexCompileUnit(context.get(), compile_units[i].get(), i, force_slow_path); |
| |
| IndexFileNames(); |
| |
| // Free compilation units after we process them. They will hold all of the parsed DIE data that we |
| // don't need any more which can be multiple GB's for large programs. |
| // |
| // This must be done after indexing since some internal LLVM functions assume the units exist. |
| for (unsigned i = 0; i < compile_units.size(); i++) |
| compile_units[i].reset(); |
| } |
| |
| void Index::DumpFileIndex(std::ostream& out) const { |
| for (const auto& [filename, file_index_entry] : file_name_index_) { |
| const auto& [filepath, compilation_units] = *file_index_entry; |
| out << filename << " -> " << filepath << " -> " << compilation_units.size() << " units\n"; |
| } |
| } |
| |
| std::vector<IndexNode::SymbolRef> Index::FindExact(const Identifier& input) const { |
| std::vector<IndexNode::SymbolRef> result; |
| RecursiveFindExact(&root_, input, 0, &result); |
| return result; |
| } |
| |
| std::vector<std::string> Index::FindFileMatches(std::string_view name) const { |
| std::string_view name_last_comp = ExtractLastFileComponent(name); |
| |
| std::vector<std::string> result; |
| |
| // Search all files whose last component matches (the input may contain more than one component). |
| FileNameIndex::const_iterator iter = file_name_index_.lower_bound(name_last_comp); |
| while (iter != file_name_index_.end() && iter->first == name_last_comp) { |
| const auto& pair = *iter->second; |
| if (PathEndsWith(pair.first, name)) |
| result.push_back(pair.first); |
| ++iter; |
| } |
| |
| return result; |
| } |
| |
| std::vector<std::string> Index::FindFilePrefixes(const std::string& prefix) const { |
| std::vector<std::string> result; |
| |
| auto found = file_name_index_.lower_bound(prefix); |
| while (found != file_name_index_.end() && StringStartsWith(found->first, prefix)) { |
| result.push_back(std::string(found->first)); |
| ++found; |
| } |
| return result; |
| } |
| |
| const std::vector<unsigned>* Index::FindFileUnitIndices(const std::string& name) const { |
| auto found = files_.find(name); |
| if (found == files_.end()) |
| return nullptr; |
| return &found->second; |
| } |
| |
| size_t Index::CountSymbolsIndexed() const { return RecursiveCountDies(root_); } |
| |
| void Index::IndexCompileUnit(llvm::DWARFContext* context, llvm::DWARFUnit* unit, |
| unsigned unit_index, bool force_slow_path) { |
| UnitIndexer indexer(context, unit); |
| indexer.set_force_slow_path(force_slow_path); |
| |
| indexer.Scan(&main_functions_); |
| indexer.Index(&root_); |
| |
| IndexCompileUnitSourceFiles(context, unit, unit_index); |
| } |
| |
| void Index::IndexCompileUnitSourceFiles(llvm::DWARFContext* context, llvm::DWARFUnit* unit, |
| unsigned unit_index) { |
| const llvm::DWARFDebugLine::LineTable* line_table = context->getLineTableForUnit(unit); |
| if (!line_table) |
| return; // No line table for this unit. |
| |
| // This table is the size of the file name table (which confusingly counts from 0 up to the |
| // the "size"). Entries are set to 1 when we've added them to the index already. |
| std::vector<int> added_file; |
| added_file.resize(line_table->Prologue.FileNames.size() + 1, 0); |
| |
| // We don't want to just add all the files from the line table to the index. The line table will |
| // contain entries for every file referenced by the compilation unit, which includes declarations. |
| // We want only files that contribute code, which in practice is a tiny fraction of the total. |
| // |
| // To get this, iterate through the unit's row table and collect all referenced file names. |
| std::string file_name; |
| for (size_t i = 0; i < line_table->Rows.size(); i++) { |
| auto file_id = line_table->Rows[i].File; |
| if (file_id > added_file.size()) |
| continue; |
| |
| if (!added_file[file_id]) { |
| added_file[file_id] = 1; |
| if (line_table->getFileNameByIndex( |
| file_id, "", llvm::DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath, |
| file_name)) { |
| // The files here can contain relative components like "/foo/bar/../baz". This is OK because |
| // we want it to match other places in the symbol code that do a similar computation to get |
| // a file name. |
| files_[NormalizePath(file_name)].push_back(unit_index); |
| } |
| } |
| } |
| } |
| |
| void Index::IndexFileNames() { |
| for (FileIndex::const_iterator iter = files_.begin(); iter != files_.end(); ++iter) |
| file_name_index_.insert(std::make_pair(ExtractLastFileComponent(iter->first), iter)); |
| } |
| |
| } // namespace zxdb |