| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #pragma once |
| |
| #include <iosfwd> |
| #include <map> |
| #include <string> |
| #include <vector> |
| |
| namespace llvm { |
| class DWARFContext; |
| class DWARFDie; |
| } // namespace llvm |
| |
| namespace zxdb { |
| |
| // One node in the ModuleSymbolIndex tree. One node represents the set of |
| // things with the same name inside a given named scope (namespace, class, |
| // type, etc.) of a module. The index contains things that might need to be |
| // named globally: types, functions, globals, and class statics. It does not |
| // contain function-level statics. Variables in functions are searched when in |
| // the context of that function, and can't be referenced outside of it. |
| // |
| // There could be multiple types of things with the same name in different |
| // compilation unit, and a single function can have multiple locations. So one |
| // one can represent many namespaces and functions. |
| // |
| // DUPLICATE TYPE HANDLING |
| // ----------------------- |
| // We assume there is only one definition for a given type name. Usually there |
| // will be one definition of a type in each compilation unit it appears in, so |
| // there is epic duplication of common type definitions in each module |
| // (covering many compilation units). |
| // |
| // The duplications aren't necessarily the same since the programmer could have |
| // multiple different types with the same names in different contexts. |
| // Depending on how things are linked, the user may not even notice |
| // (technically violating the "one definition rule" leads to undefined |
| // behavior, not failure). |
| // |
| // The main time this will come up is types defined in anonymous namespaces |
| // which can easily be legally duplicated. For this, we need specific |
| // disambiguation for anonymous namespaces associated with a given file. Once |
| // we can express the difference between different anonymous namespaces, these |
| // will no longer collide without having to do special handling in this |
| // function. |
| // |
| // We do want to upgrade forward-declarations to full definitions when we |
| // find them. Some compilation units won't even have full definitions for |
| // a type they use (say when a pointer is passed through a file without being |
| // dereferenced). Therefore, "types" will overwrite "type declarations." |
| // |
| // NAMESPACE HANDLING |
| // ------------------ |
| // Namespaces are de-duplicated, with only one DIE saved per namespace name. |
| // This means that one won't be able to enumerate the contents of a namespace |
| // with the symbol returned from the index. This is because currently we only |
| // need to know that a namespace exists with that name, not exactly where all |
| // of its declarations are. |
| class ModuleSymbolIndexNode { |
| private: |
| using Map = std::map<std::string, ModuleSymbolIndexNode>; |
| |
| public: |
| using Iterator = Map::iterator; |
| using ConstIterator = Map::const_iterator; |
| |
| // Type for a DieRef. |
| enum class RefType { |
| kNamespace, // Namespaces. |
| kFunction, // Any kind of code. |
| kVariable, // Any kind of data. |
| kTypeDecl, // Forward declaration of a type. |
| kType, // Full type definition. |
| }; |
| |
| // A reference to a DIE that doesn't need the unit or the underlying |
| // llvm::DwarfDebugInfoEntry to be kept. This allows the discarding of the |
| // full parsed DIE structures after indexing. It can be converted back to a |
| // DIE, which will cause the DWARFUnit to be re-parsed. |
| // |
| // The offset stored in this structure is the offset from the beginning of |
| // the .debug_info section, which is the same as the offset stored in the |
| // llvm::DWARFDebugInfoEntry. |
| // |
| // Random code reading the index can convert a DieRef to a Symbol object |
| // using ModuleSymbols::IndexDieRefToSymbol(). |
| class DieRef { |
| public: |
| explicit DieRef() : offset_(0) {} |
| DieRef(RefType type, uint32_t offset) : type_(type), offset_(offset) {} |
| |
| RefType type() const { return type_; } |
| uint32_t offset() const { return offset_; } |
| |
| // For use by ModuleSymbols. Other callers read the DieRef comments above. |
| llvm::DWARFDie ToDie(llvm::DWARFContext* context) const; |
| |
| private: |
| RefType type_; |
| uint32_t offset_; |
| }; |
| |
| ModuleSymbolIndexNode(); |
| |
| // Makes a node pointing to one DIE. |
| explicit ModuleSymbolIndexNode(const DieRef& ref); |
| |
| // Move-only. |
| ModuleSymbolIndexNode(ModuleSymbolIndexNode&&) = default; |
| ModuleSymbolIndexNode(const ModuleSymbolIndexNode&) = delete; |
| |
| ~ModuleSymbolIndexNode(); |
| |
| bool empty() const { return sub_.empty() && dies_.empty(); } |
| |
| const Map& sub() const { return sub_; } |
| const std::vector<DieRef>& dies() const { return dies_; } |
| |
| // Dump DIEs for debugging. A node does not contain its own name (this |
| // is stored in the parent's map. If printing some node other than the root, |
| // specify the name. |
| void Dump(std::ostream& out, int indent_level = 0) const; |
| void Dump(const std::string& name, std::ostream& out, |
| int indent_level = 0) const; |
| |
| // AsString is useful only in small unit tests since even a small module can |
| // have many megabytes of dump. |
| std::string AsString(int indent_level = 0) const; |
| |
| // Adds a DIE with the name of this node. |
| void AddDie(const DieRef& ref); |
| |
| // Adds a child node with the given name and returns it. If one already exits |
| // with the name, returns the existing one. |
| // |
| // Even with move semantics, having the map construct the std::string as |
| // needed upon insertion is slightly faster than creating a std::string and |
| // passing it in. Use the char* version if the source string is a |
| // null-terminated C-string. |
| ModuleSymbolIndexNode* AddChild(std::string name); |
| ModuleSymbolIndexNode* AddChild(const char* name); |
| |
| // Adds a child to this node. If a node with this key already exists in this |
| // node, they will be merged. |
| void AddChild(const std::string& name, ModuleSymbolIndexNode&& child); |
| |
| // Merges another node's children into this one. It's assumed there are no |
| // duplicate DIEs so the lists are just appended. |
| void Merge(ModuleSymbolIndexNode&& other); |
| |
| // Finds the first child node that contains the input string a prefix. The |
| // first return iterator points to this node. |
| // |
| // The second returned iterator points to the last node IN THE CONTAINER. |
| // This does not indicate the last node with the prefix. Many callers won't |
| // need all of the matches and doing it this way avoids a second lookup. |
| // |
| // If there are no matches both iterators will be the same (found == end). |
| // |
| // If the caller wants to find all matching prefixes, it can advance the |
| // iterator as long as the last input component is a prefix if the current |
| // iterator key and less than the end. |
| std::pair<ModuleSymbolIndexNode::ConstIterator, |
| ModuleSymbolIndexNode::ConstIterator> |
| FindPrefix(const std::string& input) const; |
| |
| private: |
| // Performance note: The strings are all null-terminated C strings that come |
| // from the mapped DWARF data. We should use that in the map instead to avoid |
| // copying all the strings again. |
| Map sub_; |
| |
| // For any DIES matching this name, lists the DIEs that implement it. |
| // If a function or static variable has the same name as a namespace, there |
| // could be sub_ entries as well as dies_. |
| std::vector<DieRef> dies_; |
| }; |
| |
| } // namespace zxdb |