[debugger] Symbol Listing/searching.

Introduced a new command: sym-search, which permits to use a regular
expression to search for symbols loaded by a process.

TEST=Manual

Change-Id: I18e8c45394dd238392f781425b3605eb8a4c36bd
diff --git a/bin/zxdb/console/verbs.h b/bin/zxdb/console/verbs.h
index 7067ff9..ae65f3b 100644
--- a/bin/zxdb/console/verbs.h
+++ b/bin/zxdb/console/verbs.h
@@ -72,6 +72,7 @@
   kStepi,
   kSymInfo,
   kSymNear,
+  kSymSearch,
   kSymStat,
   kUntil,
 
diff --git a/bin/zxdb/console/verbs_symbol.cc b/bin/zxdb/console/verbs_symbol.cc
index b94ac34..ac702e7 100644
--- a/bin/zxdb/console/verbs_symbol.cc
+++ b/bin/zxdb/console/verbs_symbol.cc
@@ -15,6 +15,7 @@
 #include "garnet/bin/zxdb/client/session.h"
 #include "garnet/bin/zxdb/client/target.h"
 #include "garnet/bin/zxdb/common/err.h"
+#include "garnet/bin/zxdb/common/regex.h"
 #include "garnet/bin/zxdb/console/command.h"
 #include "garnet/bin/zxdb/console/command_utils.h"
 #include "garnet/bin/zxdb/console/console.h"
@@ -29,7 +30,10 @@
 #include "garnet/bin/zxdb/symbols/collection.h"
 #include "garnet/bin/zxdb/symbols/data_member.h"
 #include "garnet/bin/zxdb/symbols/location.h"
+#include "garnet/bin/zxdb/symbols/loaded_module_symbols.h"
+#include "garnet/bin/zxdb/symbols/module_symbol_index.h"
 #include "garnet/bin/zxdb/symbols/module_symbol_status.h"
+#include "garnet/bin/zxdb/symbols/module_symbols.h"
 #include "garnet/bin/zxdb/symbols/process_symbols.h"
 #include "garnet/bin/zxdb/symbols/resolve_options.h"
 #include "garnet/bin/zxdb/symbols/system_symbols.h"
@@ -37,6 +41,8 @@
 #include "garnet/bin/zxdb/symbols/type.h"
 #include "garnet/bin/zxdb/symbols/variable.h"
 #include "lib/fxl/logging.h"
+#include "lib/fxl/strings/ascii.h"
+#include "lib/fxl/strings/join_strings.h"
 #include "lib/fxl/strings/string_printf.h"
 
 namespace zxdb {
@@ -559,6 +565,224 @@
   return Err();
 }
 
+// sym-search ------------------------------------------------------------------
+
+constexpr size_t kSymSearchListLimit = 200;
+
+constexpr int kSymSearchUnfold = 1;
+constexpr int kSymSearchListAll= 2;
+
+const char kSymSearchShortHelp[] = "sym-search: Search for symbols.";
+const char kSymSearchHelp[] =
+  R"(sym-search [--all] [--unfold] [<regexp>]
+
+  Searches for symbols loaded by a process.
+
+  By default will display all the symbols loaded by the process, truncated to a
+  limit. It is possible to use a regular expression to limit the search to a
+  desired symbol(s).
+
+  Default display is nested scoping (namespaces, classes) to be joined by "::".
+  While this looks similar to what C++ symbols are, they are not meant to be
+  literal C++ symbols, but rather to have a relatively familiar way of
+  displaying symbols.
+
+  The symbols are displayed by loaded modules.
+
+Arguments
+
+  <regexp>
+      Case insensitive regular expression. Uses the POSIX Extended Regular
+      Expression syntax. This regexp will be compared with every symbol. Any
+      successful matches will be included in the output.
+
+      NOTE: Currently using both regexp and unfold (-u) result in the scoping
+            symbols to not be outputted. In order to see the complete scopes,
+            don't unfold the output.
+
+  --all | -a
+      Don't limit the output. By default zxdb will limit the amount of output
+      in order not to print thousands of entries.
+
+  --unfold | -u
+      This changes to use a "nesting" formatting, in which scoping symbols,
+      such as namespaces or classes, indent other symbols.
+
+Examples
+
+  sym-search
+      List all the symbols with the default C++-ish nesting collapsing.
+
+      some_module.so
+
+      nested::scoping::symbol
+      nested::scoping::other_symbol
+      ...
+
+  pr 3 sym-search other
+      Filter using "other" as a regular expression for process 3.
+
+      some_module.so
+
+      nested::scoping::other_symbol
+      ...
+
+  sym-search --unfold
+      List all the symbols in an unfolded fashion.
+      This will be truncated.
+
+      some_module.so
+
+      nested
+        scoping
+          symbol
+          other_symbol
+      ...
+)";
+
+struct CaseInsensitiveCompare {
+  bool operator()(const std::string* lhs, const std::string* rhs) const {
+    auto lhs_it = lhs->begin();
+    auto rhs_it = rhs->begin();
+
+    while (lhs_it != lhs->end() && rhs_it != rhs->end()) {
+      char lhs_low = fxl::ToLowerASCII(*lhs_it);
+      char rhs_low = fxl::ToLowerASCII(*rhs_it);
+      if (lhs_low != rhs_low)
+        return lhs_low < rhs_low;
+
+      lhs_it++;
+      rhs_it++;
+    }
+
+    // The shortest string wins!
+    return lhs->size() < rhs->size();
+  }
+};
+
+std::string CreateSymbolName(const Command& cmd,
+                             const std::vector<std::string>& names,
+                             int indent_level) {
+  if (cmd.HasSwitch(kSymSearchUnfold))
+    return fxl::StringPrintf("%*s%s", indent_level, "", names.back().c_str());
+  return fxl::JoinStrings(names, "::");
+}
+
+struct DumpModuleContext {
+  std::vector<std::string>* names = nullptr;
+  std::vector<std::string>* output = nullptr;
+  Regex* regex = nullptr;   // nullptr if no filter is defined.
+};
+
+// Returns true if the list was truncated.
+bool DumpModule(const Command& cmd, const ModuleSymbolIndexNode& node,
+                DumpModuleContext* context, int indent_level = 0) {
+  // Root node doesn't have a name, so it's not printed.
+  bool root = context->names->empty();
+  if (!root) {
+    auto name = CreateSymbolName(cmd, *context->names, indent_level);
+    if (!context->regex || context->regex->Match(name)) {
+      context->output->push_back(std::move(name));
+    }
+  }
+
+  if (!cmd.HasSwitch(kSymSearchListAll) &&
+      context->output->size() >= kSymSearchListLimit) {
+    return true;
+  }
+
+  // Root should not indent forward.
+  indent_level = root ? 0 : indent_level + 2;
+  for (const auto& [child_name, child] : node.sub()) {
+    context->names->push_back(child_name);
+    if (DumpModule(cmd, child, context, indent_level))
+      return true;
+    context->names->pop_back();
+  }
+
+  return false;
+}
+
+Err DoSymSearch(ConsoleContext* context, const Command& cmd) {
+  if (cmd.args().size() > 1)
+    return Err("Too many arguments. See \"help sym-search\".");
+
+  Process* process = cmd.target()->GetProcess();
+  if (!process)
+    return Err("No process is running.");
+
+  ProcessSymbols* process_symbols = process->GetSymbols();
+  auto process_status = process_symbols->GetStatus();
+
+  // We sort them alphabetically in order to ensure all runs return the same
+  // result.
+  std::sort(process_status.begin(), process_status.end(),
+            [](const ModuleSymbolStatus& lhs, const ModuleSymbolStatus& rhs) {
+              return lhs.name < rhs.name;
+            });
+
+  Console* console = Console::get();
+
+  Regex regex;
+  if (cmd.args().size() == 1) {
+    Err err = regex.Init(cmd.args().front());
+    if (err.has_error())
+      return err;
+  }
+
+  // The collected symbols that pass the filter.
+  std::vector<std::string> dump;
+  // Marks where within the dump vector each module ends.
+  std::vector<std::pair<ModuleSymbolStatus, size_t>> module_symbol_indices;
+  bool truncated = false;
+  for (auto& module_status : process_status) {
+    if (!module_status.symbols)
+      continue;
+
+    const auto& index = module_status.symbols->module_symbols()->GetIndex();
+    const auto& root = index.root();
+
+    std::vector<std::string> names;
+    size_t size_before = dump.size();
+
+    DumpModuleContext dump_context;
+    dump_context.names = &names;
+    dump_context.output = &dump;
+    dump_context.regex = regex.valid() ? &regex : nullptr;
+    truncated = DumpModule(cmd, root, &dump_context);
+
+    // Only track this module if symbols were actually added.
+    if (size_before < dump.size())
+      module_symbol_indices.push_back({module_status, dump.size()});
+    if (truncated)
+      break;
+  }
+
+  size_t current_index = 0;
+  for (const auto& [module_info, limit] : module_symbol_indices) {
+    console->Output(OutputBuffer(
+        Syntax::kHeading,
+        fxl::StringPrintf("%s\n\n", module_info.name.c_str())));
+
+    while (current_index < limit) {
+      console->Output(dump[current_index]);
+      current_index++;
+    }
+    console->Output("\n");
+  }
+
+  if (truncated) {
+    console->Output(
+        Err("Limiting results to %lu. Make a more specific filter or use "
+            "--all.",
+            dump.size()));
+  } else {
+    console->Output(fxl::StringPrintf("Displaying %lu entries.", dump.size()));
+  }
+
+  return Err();
+}
+
 }  // namespace
 
 void AppendSymbolVerbs(std::map<Verb, VerbRecord>* verbs) {
@@ -581,6 +805,12 @@
   (*verbs)[Verb::kSymNear] =
       VerbRecord(&DoSymNear, {"sym-near", "sn"}, kSymNearShortHelp,
                  kSymNearHelp, CommandGroup::kQuery);
+
+  VerbRecord search(&DoSymSearch, {"sym-search"}, kSymSearchShortHelp,
+                    kSymSearchHelp, CommandGroup::kQuery);
+  search.switches.emplace_back(kSymSearchListAll, false, "--all", 'a');
+  search.switches.emplace_back(kSymSearchUnfold, false, "unfold", 'u');
+  (*verbs)[Verb::kSymSearch] = std::move(search);
 }
 
 }  // namespace zxdb
diff --git a/bin/zxdb/console/verbs_thread.cc b/bin/zxdb/console/verbs_thread.cc
index 1618723..ea049b0 100644
--- a/bin/zxdb/console/verbs_thread.cc
+++ b/bin/zxdb/console/verbs_thread.cc
@@ -909,7 +909,7 @@
 
   <regexp>
       Case insensitive regular expression. Any register that matches will be
-      shown. Uses POSIX Basic Regular Expression syntax. If not specified, it
+      shown. Uses POSIX Extended Regular Expression syntax. If not specified, it
       will match all registers.
 
 Examples
diff --git a/bin/zxdb/symbols/module_symbol_index.cc b/bin/zxdb/symbols/module_symbol_index.cc
index f5b0e63..49f675b 100644
--- a/bin/zxdb/symbols/module_symbol_index.cc
+++ b/bin/zxdb/symbols/module_symbol_index.cc
@@ -414,11 +414,11 @@
   return &found->second;
 }
 
-void ModuleSymbolIndex::DumpFileIndex(std::ostream& out) {
-  for (const auto& name_pair : file_name_index_) {
-    const auto& full_pair = *name_pair.second;
-    out << name_pair.first << " -> " << full_pair.first << " -> "
-        << full_pair.second.size() << " units\n";
+void ModuleSymbolIndex::DumpFileIndex(std::ostream& out) const {
+  for (const auto& [filename, file_index_entry]: file_name_index_) {
+    const auto& [filepath, compilation_units] = *file_index_entry;
+    out << filename << " -> " << filepath << " -> "
+        << compilation_units.size() << " units\n";
   }
 }
 
diff --git a/bin/zxdb/symbols/module_symbol_index.h b/bin/zxdb/symbols/module_symbol_index.h
index b238e9a..5e1b380 100644
--- a/bin/zxdb/symbols/module_symbol_index.h
+++ b/bin/zxdb/symbols/module_symbol_index.h
@@ -69,7 +69,7 @@
       const std::string& name) const;
 
   // Dumps the file index to the stream for debugging.
-  void DumpFileIndex(std::ostream& out);
+  void DumpFileIndex(std::ostream& out) const;
 
  private:
   void IndexCompileUnit(llvm::DWARFContext* context, llvm::DWARFUnit* unit,
diff --git a/bin/zxdb/symbols/module_symbol_status.h b/bin/zxdb/symbols/module_symbol_status.h
index fb78858..4464dab 100644
--- a/bin/zxdb/symbols/module_symbol_status.h
+++ b/bin/zxdb/symbols/module_symbol_status.h
@@ -10,6 +10,8 @@
 
 namespace zxdb {
 
+class LoadedModuleSymbols;
+
 struct ModuleSymbolStatus {
   // Name of the executable or shared library on the system.
   std::string name;
@@ -28,6 +30,10 @@
 
   // Local file name with the symbols if the symbols were loaded.
   std::string symbol_file;
+
+  // Represents a handle to the actual symbols.
+  // nullptr if the symbols are not loaded.
+  LoadedModuleSymbols* symbols = nullptr;
 };
 
 }  // namespace zxdb
diff --git a/bin/zxdb/symbols/process_symbols.cc b/bin/zxdb/symbols/process_symbols.cc
index baa86c1..a3e357d 100644
--- a/bin/zxdb/symbols/process_symbols.cc
+++ b/bin/zxdb/symbols/process_symbols.cc
@@ -83,10 +83,10 @@
 
   // Update the TargetSymbols.
   target_symbols_->RemoveAllModules();
-  for (auto& pair : modules_) {
-    if (pair.second.symbols) {
+  for (auto& [base, mod_info] : modules_) {
+    if (mod_info.symbols) {
       target_symbols_->AddModule(fxl::RefPtr<SystemSymbols::ModuleRef>(
-          pair.second.symbols->module_ref()));
+          mod_info.symbols->module_ref()));
     }
   }
 
@@ -118,18 +118,19 @@
 
 std::vector<ModuleSymbolStatus> ProcessSymbols::GetStatus() const {
   std::vector<ModuleSymbolStatus> result;
-  for (const auto& pair : modules_) {
-    if (pair.second.symbols.get()) {
-      result.push_back(pair.second.symbols->module_symbols()->GetStatus());
+  for (const auto& [base, mod_info] : modules_) {
+    if (mod_info.symbols) {
+      result.push_back(mod_info.symbols->module_symbols()->GetStatus());
       // ModuleSymbols doesn't know the name or base address so fill in now.
-      result.back().name = pair.second.name;
-      result.back().base = pair.second.base;
+      result.back().name = mod_info.name;
+      result.back().base = mod_info.base;
+      result.back().symbols = mod_info.symbols.get();
     } else {
       // No symbols, make an empty record.
       ModuleSymbolStatus status;
-      status.name = pair.second.name;
-      status.build_id = pair.second.build_id;
-      status.base = pair.second.base;
+      status.name = mod_info.name;
+      status.build_id = mod_info.build_id;
+      status.base = mod_info.base;
       status.symbols_loaded = false;
       result.push_back(std::move(status));
     }
@@ -141,7 +142,7 @@
 ProcessSymbols::GetLoadedModuleSymbols() const {
   std::vector<const LoadedModuleSymbols*> result;
   result.reserve(modules_.size());
-  for (const auto & [ base, mod_info ] : modules_) {
+  for (const auto& [base, mod_info] : modules_) {
     if (mod_info.symbols)
       result.push_back(mod_info.symbols.get());
   }
@@ -174,9 +175,9 @@
 
   // Symbol and file/line resolution both requires iterating over all modules.
   std::vector<Location> result;
-  for (const auto& pair : modules_) {
-    if (pair.second.symbols) {
-      const LoadedModuleSymbols* loaded = pair.second.symbols.get();
+  for (const auto& [base, mod_info] : modules_) {
+    if (mod_info.symbols) {
+      const LoadedModuleSymbols* loaded = mod_info.symbols.get();
       for (Location& location : loaded->module_symbols()->ResolveInputLocation(
                loaded->symbol_context(), input_location, options))
         result.push_back(std::move(location));