src/developer/debug/zxdb/symbols/module_symbol_index.h - fuchsia - Git at Google

 // Copyright 2018 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #pragma once

 #include <map>
 #include <memory>
 #include <optional>
 #include <string_view>
 #include <vector>

 #include "src/developer/debug/zxdb/symbols/module_symbol_index_node.h"
 #include "src/lib/fxl/macros.h"

 namespace llvm {

 class DWARFCompileUnit;
 class DWARFContext;
 class DWARFDie;
 class DWARFUnit;

 namespace object {
 class ObjectFile;
 }  // namespace object

 }  // namespace llvm

 namespace zxdb {

 class Identifier;

 // Holds the index of symbols for a given module.
 class ModuleSymbolIndex {
  public:
   ModuleSymbolIndex();
   ~ModuleSymbolIndex();

   // This function takes an object file rather than a context so it can create
   // its own context, and then discard the context when it's done. Since most
   // debugging information is not needed after indexing, this saves a lot of
   // memory.
   void CreateIndex(llvm::object::ObjectFile* object_file);

   const ModuleSymbolIndexNode& root() const { return root_; }
   ModuleSymbolIndexNode& root() { return root_; }

   size_t files_indexed() const { return file_name_index_.size(); }

   // Returns how many symbols are indexed. This iterates through everything so
   // can be slow.
   size_t CountSymbolsIndexed() const;

   // Takes a fully-qualified name with namespaces and classes and template
   // parameters and returns the list of symbols which match exactly.
   const std::vector<ModuleSymbolIndexNode::DieRef>& FindExact(
       const Identifier& input) const;

   // Takes a fully-qualified name with namespaces and classes and returns a
   // pair of iterators.
   //
   // The first iterator points to the first node that has the input as a
   // prefix.
   //
   // The second returned iterator points to the last node IN THE CONTAINER.
   // This does not indicate the last node with the prefix. Many callers won't
   // need all of the matches and doing it this way avoids a second lookup.
   //
   // Non-last input nodes must match exactly with "std::string::operator==".
   // For example, the input:
   //   { "std", "vector<" }
   // Would look in the "std" node and would return an iterator to the
   // "vector<Aardvark>" node inside it and the end of the "std" mode. Nodes are
   // sorted by "std::string::operator<".
   //
   // If there are no matches both iterators will be the same (found == end).
   //
   // If the caller wants to find all matching prefixes, it can advance the
   // iterator as long as the last input component is a prefix if the current
   // iterator key and less than the end.
   std::pair<ModuleSymbolIndexNode::ConstIterator,
             ModuleSymbolIndexNode::ConstIterator>
   FindPrefix(const Identifier& input) const;

   // Looks up the name in the file index and returns the set of matches. The
   // name is matched from the right side with a left boundary of either a slash
   // or the beginning of the full path. This may match more than one file name,
   // and the caller is left to decide which one(s) it wants.
   std::vector<std::string> FindFileMatches(std::string_view name) const;

   // Same as FindFileMatches but does a prefix search. This only matches the
   // file name component (not directory paths).
   //
   // In the future it would be nice to match directories if there was a "/".
   std::vector<std::string> FindFilePrefixes(const std::string& prefix) const;

   // Looks up the given exact file path and returns all compile units it
   // appears in. The file must be an exact match (normally it's one of the
   // results from FindFileMatches).
   //
   // The contents of the vector are indices into the compilation unit array.
   // (see llvm::DWARFContext::getCompileUnitAtIndex).
   const std::vector<unsigned>* FindFileUnitIndices(
       const std::string& name) const;

   // Dumps the file index to the stream for debugging.
   void DumpFileIndex(std::ostream& out) const;

  private:
   void IndexCompileUnit(llvm::DWARFContext* context, llvm::DWARFUnit* unit,
                         unsigned unit_index);

   void IndexCompileUnitSourceFiles(llvm::DWARFContext* context,
                                    llvm::DWARFUnit* unit, unsigned unit_index);

   // Populates the file_name_index_ given a now-unchanging files_ map.
   void IndexFileNames();

   ModuleSymbolIndexNode root_;

   // Maps full path names to compile units that reference them. This must not
   // be mutated once the file_name_index_ is built.
   //
   // The contents of the vector are indices into the compilation unit array.
   // (see llvm::DWARFContext::getCompileUnitAtIndex). These are "unsigned"
   // type because that's what LLVM uses for these indices.
   //
   // This is a map, not a multimap, because some files will appear in many
   // compilation units. I suspect it's better to avoid duplicating the names
   // (like a multimap would) and eating the cost of indirect heap allocations
   // for vectors in the single-item case.
   using FileIndex = std::map<std::string, std::vector<unsigned>>;
   FileIndex files_;

   // Maps the last file name component (the part following the last slash) to
   // the set of entries in the files_ index that have that name.
   //
   // This is a multimap because the name parts will generally be unique so we
   // should get few duplicates. The cost of using a vector for most items
   // containing one element becomes higher in that case.
   using FileNameIndex =
       std::multimap<std::string_view, FileIndex::const_iterator>;
   FileNameIndex file_name_index_;

   FXL_DISALLOW_COPY_AND_ASSIGN(ModuleSymbolIndex);
 };

 }  // namespace zxdb
	// Copyright 2018 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#pragma once

	#include <map>
	#include <memory>
	#include <optional>
	#include <string_view>
	#include <vector>

	#include "src/developer/debug/zxdb/symbols/module_symbol_index_node.h"
	#include "src/lib/fxl/macros.h"

	namespace llvm {

	class DWARFCompileUnit;
	class DWARFContext;
	class DWARFDie;
	class DWARFUnit;

	namespace object {
	class ObjectFile;
	} // namespace object

	} // namespace llvm

	namespace zxdb {

	class Identifier;

	// Holds the index of symbols for a given module.
	class ModuleSymbolIndex {
	public:
	ModuleSymbolIndex();
	~ModuleSymbolIndex();

	// This function takes an object file rather than a context so it can create
	// its own context, and then discard the context when it's done. Since most
	// debugging information is not needed after indexing, this saves a lot of
	// memory.
	void CreateIndex(llvm::object::ObjectFile* object_file);

	const ModuleSymbolIndexNode& root() const { return root_; }
	ModuleSymbolIndexNode& root() { return root_; }

	size_t files_indexed() const { return file_name_index_.size(); }

	// Returns how many symbols are indexed. This iterates through everything so
	// can be slow.
	size_t CountSymbolsIndexed() const;

	// Takes a fully-qualified name with namespaces and classes and template
	// parameters and returns the list of symbols which match exactly.
	const std::vector<ModuleSymbolIndexNode::DieRef>& FindExact(
	const Identifier& input) const;

	// Takes a fully-qualified name with namespaces and classes and returns a
	// pair of iterators.
	//
	// The first iterator points to the first node that has the input as a
	// prefix.
	//
	// The second returned iterator points to the last node IN THE CONTAINER.
	// This does not indicate the last node with the prefix. Many callers won't
	// need all of the matches and doing it this way avoids a second lookup.
	//
	// Non-last input nodes must match exactly with "std::string::operator==".
	// For example, the input:
	// { "std", "vector<" }
	// Would look in the "std" node and would return an iterator to the
	// "vector<Aardvark>" node inside it and the end of the "std" mode. Nodes are
	// sorted by "std::string::operator<".
	//
	// If there are no matches both iterators will be the same (found == end).
	//
	// If the caller wants to find all matching prefixes, it can advance the
	// iterator as long as the last input component is a prefix if the current
	// iterator key and less than the end.
	std::pair<ModuleSymbolIndexNode::ConstIterator,
	ModuleSymbolIndexNode::ConstIterator>
	FindPrefix(const Identifier& input) const;

	// Looks up the name in the file index and returns the set of matches. The
	// name is matched from the right side with a left boundary of either a slash
	// or the beginning of the full path. This may match more than one file name,
	// and the caller is left to decide which one(s) it wants.
	std::vector<std::string> FindFileMatches(std::string_view name) const;

	// Same as FindFileMatches but does a prefix search. This only matches the
	// file name component (not directory paths).
	//
	// In the future it would be nice to match directories if there was a "/".
	std::vector<std::string> FindFilePrefixes(const std::string& prefix) const;

	// Looks up the given exact file path and returns all compile units it
	// appears in. The file must be an exact match (normally it's one of the
	// results from FindFileMatches).
	//
	// The contents of the vector are indices into the compilation unit array.
	// (see llvm::DWARFContext::getCompileUnitAtIndex).
	const std::vector<unsigned>* FindFileUnitIndices(
	const std::string& name) const;

	// Dumps the file index to the stream for debugging.
	void DumpFileIndex(std::ostream& out) const;

	private:
	void IndexCompileUnit(llvm::DWARFContext* context, llvm::DWARFUnit* unit,
	unsigned unit_index);

	void IndexCompileUnitSourceFiles(llvm::DWARFContext* context,
	llvm::DWARFUnit* unit, unsigned unit_index);

	// Populates the file_name_index_ given a now-unchanging files_ map.
	void IndexFileNames();

	ModuleSymbolIndexNode root_;

	// Maps full path names to compile units that reference them. This must not
	// be mutated once the file_name_index_ is built.
	//
	// The contents of the vector are indices into the compilation unit array.
	// (see llvm::DWARFContext::getCompileUnitAtIndex). These are "unsigned"
	// type because that's what LLVM uses for these indices.
	//
	// This is a map, not a multimap, because some files will appear in many
	// compilation units. I suspect it's better to avoid duplicating the names
	// (like a multimap would) and eating the cost of indirect heap allocations
	// for vectors in the single-item case.
	using FileIndex = std::map<std::string, std::vector<unsigned>>;
	FileIndex files_;

	// Maps the last file name component (the part following the last slash) to
	// the set of entries in the files_ index that have that name.
	//
	// This is a multimap because the name parts will generally be unique so we
	// should get few duplicates. The cost of using a vector for most items
	// containing one element becomes higher in that case.
	using FileNameIndex =
	std::multimap<std::string_view, FileIndex::const_iterator>;
	FileNameIndex file_name_index_;

	FXL_DISALLOW_COPY_AND_ASSIGN(ModuleSymbolIndex);
	};

	} // namespace zxdb