Merge pull request #109 from haberman/debug-print

Added options to debug print a VM address or file offset.
diff --git a/README.md b/README.md
index bd33781..1f4fa3c 100644
--- a/README.md
+++ b/README.md
@@ -150,12 +150,19 @@
                        -s vm
                        -s file
                        -s both (the default: sorts by max(vm, file)).
-  -v                 Verbose output.  Dumps warnings encountered during
-                     processing and full VM/file maps at the end.
-                     Add more v's (-vv, -vvv) for even more.
   -w                 Wide output; don't truncate long labels.
   --help             Display this message and exit.
   --list-sources     Show a list of available sources and exit.
+
+Options for debugging Bloaty:
+
+  --debug-vmaddr=ADDR
+  --debug-fileoff=OFF
+                     Print extended debugging information for the given
+                     VM address and/or file offset.
+  -v                 Verbose output.  Dumps warnings encountered during
+                     processing and full VM/file maps at the end.
+                     Add more v's (-vv, -vvv) for even more.
 ```
 
 # Size Diffs
diff --git a/src/bloaty.cc b/src/bloaty.cc
index f6097c7..71af7e8 100644
--- a/src/bloaty.cc
+++ b/src/bloaty.cc
@@ -39,6 +39,7 @@
 #include <unistd.h>
 
 #include "absl/memory/memory.h"
+#include "absl/strings/numbers.h"
 #include "absl/strings/string_view.h"
 #include "absl/strings/str_join.h"
 #include "absl/strings/substitute.h"
@@ -857,30 +858,93 @@
 
 // RangeSink ///////////////////////////////////////////////////////////////////
 
-RangeSink::RangeSink(const InputFile* file, DataSource data_source,
-                     const DualMap* translator)
+RangeSink::RangeSink(const InputFile* file, const Options& options,
+                     DataSource data_source, const DualMap* translator)
     : file_(file),
+      options_(options),
       data_source_(data_source),
       translator_(translator) {}
 
 RangeSink::~RangeSink() {}
 
+uint64_t debug_vmaddr = -1;
+uint64_t debug_fileoff = -1;
+
+bool RangeSink::ContainsVerboseVMAddr(uint64_t vmstart, uint64_t vmsize) {
+  return options_.verbose_level() > 2 ||
+         (options_.has_debug_vmaddr() && options_.debug_vmaddr() >= vmstart &&
+          options_.debug_vmaddr() < (vmstart + vmsize));
+}
+
+bool RangeSink::ContainsVerboseFileOffset(uint64_t fileoff, uint64_t filesize) {
+  return options_.verbose_level() > 2 ||
+         (options_.has_debug_fileoff() && options_.debug_fileoff() >= fileoff &&
+          options_.debug_fileoff() < (fileoff + filesize));
+}
+
+bool RangeSink::IsVerboseForVMRange(uint64_t vmstart, uint64_t vmsize) {
+  if (ContainsVerboseVMAddr(vmstart, vmsize)) {
+    return true;
+  }
+
+  if (translator_ && options_.has_debug_fileoff()) {
+    RangeMap vm_map;
+    RangeMap file_map;
+    bool contains = false;
+    vm_map.AddRangeWithTranslation(vmstart, vmsize, "", translator_->vm_map,
+                                   false, &file_map);
+    file_map.ForEachRange(
+        [this, &contains](uint64_t fileoff, uint64_t filesize) {
+          if (ContainsVerboseFileOffset(fileoff, filesize)) {
+            contains = true;
+          }
+        });
+    return contains;
+  }
+
+  return false;
+}
+
+bool RangeSink::IsVerboseForFileRange(uint64_t fileoff, uint64_t filesize) {
+  if (ContainsVerboseFileOffset(fileoff, filesize)) {
+    return true;
+  }
+
+  if (translator_ && options_.has_debug_vmaddr()) {
+    RangeMap vm_map;
+    RangeMap file_map;
+    bool contains = false;
+    file_map.AddRangeWithTranslation(fileoff, filesize, "",
+                                     translator_->file_map, false, &vm_map);
+    vm_map.ForEachRange([this, &contains](uint64_t vmstart, uint64_t vmsize) {
+      if (ContainsVerboseVMAddr(vmstart, vmsize)) {
+        contains = true;
+      }
+    });
+    return contains;
+  }
+
+  return false;
+}
+
 void RangeSink::AddOutput(DualMap* map, const NameMunger* munger) {
   outputs_.push_back(std::make_pair(map, munger));
 }
 
 void RangeSink::AddFileRange(const char* analyzer, string_view name,
                              uint64_t fileoff, uint64_t filesize) {
-  if (verbose_level > 2) {
-    fprintf(stdout, "[%s, %s] AddFileRange(%.*s, %" PRIx64 ", %" PRIx64 ")\n",
-            GetDataSourceLabel(data_source_), analyzer, (int)name.size(),
-            name.data(), fileoff, filesize);
+  bool verbose = IsVerboseForFileRange(fileoff, filesize);
+  if (verbose) {
+    printf("[%s, %s] AddFileRange(%.*s, %" PRIx64 ", %" PRIx64 ")\n",
+           GetDataSourceLabel(data_source_), analyzer, (int)name.size(),
+           name.data(), fileoff, filesize);
   }
   for (auto& pair : outputs_) {
     const std::string label = pair.second->Munge(name);
     if (translator_) {
       bool ok = pair.first->file_map.AddRangeWithTranslation(
-          fileoff, filesize, label, translator_->file_map, &pair.first->vm_map);
+          fileoff, filesize, label, translator_->file_map, verbose,
+          &pair.first->vm_map);
       if (!ok) {
         THROWF("File range ($0, $1) for label $2 extends beyond base map",
                fileoff, filesize, name);
@@ -895,12 +959,11 @@
                                 uint64_t label_from_vmaddr,
                                 string_view file_range) {
   uint64_t file_offset = file_range.data() - file_->data().data();
-  if (verbose_level > 2) {
-    fprintf(stdout,
-            "[%s, %s] AddFileRangeFor(%" PRIx64 ", [%" PRIx64
-            ", %zx])\n",
-            GetDataSourceLabel(data_source_), analyzer, label_from_vmaddr,
-            file_offset, file_range.size());
+  bool verbose = IsVerboseForFileRange(file_offset, file_range.size());
+  if (verbose) {
+    printf("[%s, %s] AddFileRangeFor(%" PRIx64 ", [%" PRIx64 ", %zx])\n",
+           GetDataSourceLabel(data_source_), analyzer, label_from_vmaddr,
+           file_offset, file_range.size());
   }
   assert(translator_);
   for (auto& pair : outputs_) {
@@ -908,7 +971,7 @@
     uint64_t offset;
     if (pair.first->vm_map.TryGetLabel(label_from_vmaddr, &label, &offset)) {
       bool ok = pair.first->file_map.AddRangeWithTranslation(
-          file_offset, file_range.size(), label, translator_->file_map,
+          file_offset, file_range.size(), label, translator_->file_map, verbose,
           &pair.first->vm_map);
       if (!ok) {
         THROWF("File range ($0, $1) for label $2 extends beyond base map",
@@ -922,11 +985,11 @@
 
 void RangeSink::AddVMRangeFor(const char* analyzer, uint64_t label_from_vmaddr,
                               uint64_t addr, uint64_t size) {
-  if (verbose_level > 2) {
-    fprintf(stdout,
-            "[%s, %s] AddVMRangeFor(%" PRIx64 ", [%" PRIx64 ", %" PRIx64 "])\n",
-            GetDataSourceLabel(data_source_), analyzer, label_from_vmaddr, addr,
-            size);
+  bool verbose = IsVerboseForVMRange(addr, size);
+  if (verbose) {
+    printf("[%s, %s] AddVMRangeFor(%" PRIx64 ", [%" PRIx64 ", %" PRIx64 "])\n",
+           GetDataSourceLabel(data_source_), analyzer, label_from_vmaddr, addr,
+           size);
   }
   assert(translator_);
   for (auto& pair : outputs_) {
@@ -934,7 +997,8 @@
     uint64_t offset;
     if (pair.first->vm_map.TryGetLabel(label_from_vmaddr, &label, &offset)) {
       bool ok = pair.first->vm_map.AddRangeWithTranslation(
-          addr, size, label, translator_->vm_map, &pair.first->file_map);
+          addr, size, label, translator_->vm_map, verbose,
+          &pair.first->file_map);
       if (!ok) {
         THROWF("VM range ($0, $1) for label $2 extends beyond base map", addr,
                size, label);
@@ -947,16 +1011,18 @@
 
 void RangeSink::AddVMRange(const char* analyzer, uint64_t vmaddr,
                            uint64_t vmsize, const std::string& name) {
-  if (verbose_level > 2) {
-    fprintf(stdout, "[%s, %s] AddVMRange(%.*s, %" PRIx64 ", %" PRIx64 ")\n",
-            GetDataSourceLabel(data_source_), analyzer, (int)name.size(),
-            name.data(), vmaddr, vmsize);
+  bool verbose = IsVerboseForVMRange(vmaddr, vmsize);
+  if (verbose) {
+    printf("[%s, %s] AddVMRange(%.*s, %" PRIx64 ", %" PRIx64 ")\n",
+           GetDataSourceLabel(data_source_), analyzer, (int)name.size(),
+           name.data(), vmaddr, vmsize);
   }
   assert(translator_);
   for (auto& pair : outputs_) {
     const std::string label = pair.second->Munge(name);
     bool ok = pair.first->vm_map.AddRangeWithTranslation(
-        vmaddr, vmsize, label, translator_->vm_map, &pair.first->file_map);
+        vmaddr, vmsize, label, translator_->vm_map, verbose,
+        &pair.first->file_map);
     if (!ok) {
       THROWF("VM range ($0, $1) for label $2 extends beyond base map", vmaddr,
              vmsize, name);
@@ -981,12 +1047,12 @@
 void RangeSink::AddRange(const char* analyzer, string_view name,
                          uint64_t vmaddr, uint64_t vmsize, uint64_t fileoff,
                          uint64_t filesize) {
-  if (verbose_level > 2) {
-    fprintf(stdout,
-            "[%s, %s] AddRange(%.*s, %" PRIx64 ", %" PRIx64 ", %" PRIx64
-            ", %" PRIx64 ")\n",
-            GetDataSourceLabel(data_source_), analyzer, (int)name.size(),
-            name.data(), vmaddr, vmsize, fileoff, filesize);
+  if (IsVerboseForVMRange(vmaddr, vmsize) ||
+      IsVerboseForFileRange(fileoff, filesize)) {
+    printf("[%s, %s] AddRange(%.*s, %" PRIx64 ", %" PRIx64 ", %" PRIx64
+           ", %" PRIx64 ")\n",
+           GetDataSourceLabel(data_source_), analyzer, (int)name.size(),
+           name.data(), vmaddr, vmsize, fileoff, filesize);
   }
 
   if (translator_) {
@@ -1151,6 +1217,7 @@
                          std::string* out_build_id) const;
 
   const InputFileFactory& file_factory_;
+  const Options options_;
 
   // All data sources, indexed by name.
   // Contains both built-in sources and custom sources.
@@ -1168,7 +1235,7 @@
 };
 
 Bloaty::Bloaty(const InputFileFactory& factory, const Options& options)
-    : file_factory_(factory) {
+    : file_factory_(factory), options_(options) {
   AddBuiltInSources(data_sources, options);
 }
 
@@ -1335,15 +1402,16 @@
   std::vector<RangeSink*> filename_sink_ptrs;
 
   // Base map always goes first.
-  sinks.push_back(absl::make_unique<RangeSink>(
-      &file->file_data(), DataSource::kSegments, nullptr));
+  sinks.push_back(absl::make_unique<RangeSink>(&file->file_data(), options_,
+                                               DataSource::kSegments, nullptr));
   NameMunger empty_munger;
   sinks.back()->AddOutput(maps.base_map(), &empty_munger);
   sink_ptrs.push_back(sinks.back().get());
 
   for (auto source : sources_) {
-    sinks.push_back(absl::make_unique<RangeSink>(
-        &file->file_data(), source->effective_source, maps.base_map()));
+    sinks.push_back(absl::make_unique<RangeSink>(&file->file_data(), options_,
+                                                 source->effective_source,
+                                                 maps.base_map()));
     sinks.back()->AddOutput(maps.AppendMap(), source->munger.get());
     // We handle the kInputFiles data source internally, without handing it off
     // to the file format implementation.  This seems slightly simpler, since
@@ -1414,9 +1482,9 @@
   assert(filesize == file->file_data().data().size());
 
   if (verbose_level > 0) {
-    fprintf(stdout, "FILE MAP:\n");
+    printf("FILE MAP:\n");
     maps.PrintFileMaps();
-    fprintf(stdout, "VM MAP:\n");
+    printf("VM MAP:\n");
     maps.PrintVMMaps();
   }
 }
@@ -1562,12 +1630,19 @@
                        -s vm
                        -s file
                        -s both (the default: sorts by max(vm, file)).
-  -v                 Verbose output.  Dumps warnings encountered during
-                     processing and full VM/file maps at the end.
-                     Add more v's (-vv, -vvv) for even more.
   -w                 Wide output; don't truncate long labels.
   --help             Display this message and exit.
   --list-sources     Show a list of available sources and exit.
+
+Options for debugging Bloaty:
+
+  --debug-vmaddr=ADDR
+  --debug-fileoff=OFF
+                     Print extended debugging information for the given
+                     VM address and/or file offset.
+  -v                 Verbose output.  Dumps warnings encountered during
+                     processing and full VM/file maps at the end.
+                     Add more v's (-vv, -vvv) for even more.
 )";
 
 class ArgParser {
@@ -1636,7 +1711,30 @@
 
   bool TryParseIntegerOption(string_view flag, int* val) {
     string_view val_str;
-    return TryParseOption(flag, &val_str) && absl::SimpleAtoi(val_str, val);
+    if (!TryParseOption(flag, &val_str)) {
+      return false;
+    }
+
+    if (!absl::SimpleAtoi(val_str, val)) {
+      THROWF("option '$0' had non-integral argument: $1", flag, val_str);
+    }
+
+    return true;
+  }
+
+  bool TryParseUint64Option(string_view flag, uint64_t* val) {
+    string_view val_str;
+    if (!TryParseOption(flag, &val_str)) {
+      return false;
+    }
+
+    try {
+      *val = std::stoull(std::string(val_str));
+    } catch (...) {
+      THROWF("option '$0' had non-integral argument: $1", flag, val_str);
+    }
+
+    return true;
   }
 
  public:
@@ -1653,6 +1751,7 @@
   ArgParser args(argc, argv);
   string_view option;
   int int_option;
+  uint64_t uint64_option;
 
   while (!args.IsDone()) {
     if (args.TryParseFlag("--")) {
@@ -1689,6 +1788,16 @@
       }
     } else if (args.TryParseOption("--debug-file", &option)) {
       options->add_debug_filename(std::string(option));
+    } else if (args.TryParseUint64Option("--debug-fileoff", &uint64_option)) {
+      if (options->has_debug_fileoff()) {
+        THROW("currently we only support a single debug fileoff");
+      }
+      options->set_debug_fileoff(uint64_option);
+    } else if (args.TryParseUint64Option("--debug-vmaddr", &uint64_option)) {
+      if (options->has_debug_vmaddr()) {
+        THROW("currently we only support a single debug vmaddr");
+      }
+      options->set_debug_vmaddr(uint64_option);
     } else if (args.TryParseOption("--disassemble", &option)) {
       options->mutable_disassemble_function()->assign(std::string(option));
     } else if (args.TryParseIntegerOption("-n", &int_option)) {
diff --git a/src/bloaty.h b/src/bloaty.h
index 8e36592..1a39e5f 100644
--- a/src/bloaty.h
+++ b/src/bloaty.h
@@ -33,6 +33,8 @@
 #include "absl/strings/strip.h"
 #include "capstone.h"
 #include "re2/re2.h"
+
+#include "bloaty.pb.h"
 #include "range_map.h"
 
 #define BLOATY_DISALLOW_COPY_AND_ASSIGN(class_name) \
@@ -134,10 +136,12 @@
 // space and/or file offsets.
 class RangeSink {
  public:
-  RangeSink(const InputFile* file, DataSource data_source,
-            const DualMap* translator);
+  RangeSink(const InputFile* file, const Options& options,
+            DataSource data_source, const DualMap* translator);
   ~RangeSink();
 
+  const Options& options() const { return options_; }
+
   void AddOutput(DualMap* map, const NameMunger* munger);
 
   DataSource data_source() const { return data_source_; }
@@ -225,7 +229,13 @@
     return ptr >= file_data.data() && ptr < file_data.data() + file_data.size();
   }
 
+  bool ContainsVerboseVMAddr(uint64_t vmstart, uint64_t vmsize);
+  bool ContainsVerboseFileOffset(uint64_t fileoff, uint64_t filesize);
+  bool IsVerboseForVMRange(uint64_t vmstart, uint64_t vmsize);
+  bool IsVerboseForFileRange(uint64_t fileoff, uint64_t filesize);
+
   const InputFile* file_;
+  const Options options_;
   DataSource data_source_;
   const DualMap* translator_;
   std::vector<std::pair<DualMap*, const NameMunger*>> outputs_;
diff --git a/src/bloaty.proto b/src/bloaty.proto
index 8b771ef..5f2a1bf 100644
--- a/src/bloaty.proto
+++ b/src/bloaty.proto
@@ -55,10 +55,15 @@
   }
   optional SortBy sort_by = 6 [default = SORTBY_BOTH];
 
-  // When greater than zero, Bloaty will print verbose output to stderr.
+  // When greater than zero, Bloaty will print verbose output to stdout.
   // TODO(haberman): should this be in the output object instead?
   optional int32 verbose_level = 7;
 
+  // If set, these will print verbose information pertaining to the given
+  // vm address and/or file offset.
+  optional uint64 debug_vmaddr = 11;
+  optional uint64 debug_fileoff = 12;
+
   // Custom data sources for this analysis.
   repeated CustomDataSource custom_data_source = 8;
 
diff --git a/src/elf.cc b/src/elf.cc
index 94ce2ad..9401847 100644
--- a/src/elf.cc
+++ b/src/elf.cc
@@ -1265,6 +1265,7 @@
           DualMap symbol_map;
           NameMunger empty_munger;
           RangeSink symbol_sink(&debug_file().file_data(),
+                                sink->options(),
                                 DataSource::kRawSymbols,
                                 &sinks[0]->MapAtIndex(0));
           symbol_sink.AddOutput(&symbol_map, &empty_munger);
@@ -1325,14 +1326,16 @@
     // build the entire map.
     DualMap base_map;
     NameMunger empty_munger;
-    RangeSink base_sink(&file_data(), DataSource::kSegments, nullptr);
+    RangeSink base_sink(&file_data(), bloaty::Options(), DataSource::kSegments,
+                        nullptr);
     base_sink.AddOutput(&base_map, &empty_munger);
     std::vector<RangeSink*> sink_ptrs{&base_sink};
     ProcessFile(sink_ptrs);
 
     // Could optimize this not to build the whole table if necessary.
     SymbolTable symbol_table;
-    RangeSink symbol_sink(&file_data(), symbol_source, &base_map);
+    RangeSink symbol_sink(&file_data(), bloaty::Options(), symbol_source,
+                          &base_map);
     symbol_sink.AddOutput(&info->symbol_map, &empty_munger);
     ReadELFSymbols(debug_file().file_data(), &symbol_sink, &symbol_table,
                    false);
diff --git a/src/range_map.cc b/src/range_map.cc
index de8b91c..1444fd5 100644
--- a/src/range_map.cc
+++ b/src/range_map.cc
@@ -230,6 +230,7 @@
 bool RangeMap::AddRangeWithTranslation(uint64_t addr, uint64_t size,
                                        const std::string& val,
                                        const RangeMap& translator,
+                                       bool verbose,
                                        RangeMap* other) {
   AddRange(addr, size, val);
 
@@ -251,7 +252,7 @@
     uint64_t this_size;
     if (translator.TranslateAndTrimRangeWithEntry(it, addr, size, &this_addr,
                                                   &this_size)) {
-      if (verbose_level > 2) {
+      if (verbose_level > 2 || verbose) {
         printf("  -> translates to: [%" PRIx64 " %" PRIx64 "]\n", this_addr,
                this_size);
       }
diff --git a/src/range_map.h b/src/range_map.h
index 1153545..7b39ce8 100644
--- a/src/range_map.h
+++ b/src/range_map.h
@@ -71,7 +71,8 @@
   // case).
   bool AddRangeWithTranslation(uint64_t addr, uint64_t size,
                                const std::string& val,
-                               const RangeMap& translator, RangeMap* other);
+                               const RangeMap& translator, bool verbose,
+                               RangeMap* other);
 
   // Returns whether this RangeMap fully covers the given range.
   bool CoversRange(uint64_t addr, uint64_t size) const;
diff --git a/tests/range_map_test.cc b/tests/range_map_test.cc
index 565d14d..4b5b1ae 100644
--- a/tests/range_map_test.cc
+++ b/tests/range_map_test.cc
@@ -284,8 +284,8 @@
     {20, 25, 120, "foo"}
   });
 
-  ASSERT_TRUE(
-      map2_.AddRangeWithTranslation(20, 5, "translate me", map_, &map3_));
+  ASSERT_TRUE(map2_.AddRangeWithTranslation(20, 5, "translate me", map_, false,
+                                            &map3_));
 
   CheckConsistency();
   AssertMapEquals(map2_, {
@@ -296,8 +296,8 @@
   });
 
   map_.AddDualRange(1000, 30, 1100, "bar");
-  ASSERT_TRUE(
-      map2_.AddRangeWithTranslation(1000, 5, "translate me2", map_, &map3_));
+  ASSERT_TRUE(map2_.AddRangeWithTranslation(1000, 5, "translate me2", map_,
+                                            false, &map3_));
   AssertMapEquals(map2_, {
     {20, 25, kNoTranslation, "translate me"},
     {1000, 1005, kNoTranslation, "translate me2"}
@@ -308,16 +308,16 @@
   });
 
   // Starts before base map.
-  ASSERT_FALSE(
-      map2_.AddRangeWithTranslation(15, 8, "translate me", map_, &map3_));
+  ASSERT_FALSE(map2_.AddRangeWithTranslation(15, 8, "translate me", map_, false,
+                                             &map3_));
 
   // Extends past base map.
-  ASSERT_FALSE(
-      map2_.AddRangeWithTranslation(22, 15, "translate me", map_, &map3_));
+  ASSERT_FALSE(map2_.AddRangeWithTranslation(22, 15, "translate me", map_,
+                                             false, &map3_));
 
   // Starts and ends in base map, but skips range in the middle.
-  ASSERT_FALSE(
-      map2_.AddRangeWithTranslation(20, 1000, "translate me", map_, &map3_));
+  ASSERT_FALSE(map2_.AddRangeWithTranslation(20, 1000, "translate me", map_,
+                                             false, &map3_));
 }
 
 TEST_F(RangeMapTest, Translation2) {
@@ -333,8 +333,8 @@
     {30, 35, 130, "quux"}
   });
 
-  ASSERT_TRUE(
-      map2_.AddRangeWithTranslation(20, 15, "translate me", map_, &map3_));
+  ASSERT_TRUE(map2_.AddRangeWithTranslation(20, 15, "translate me", map_, false,
+                                            &map3_));
   CheckConsistency();
   AssertMapEquals(map2_, {
     {20, 35, kNoTranslation, "translate me"}
@@ -352,7 +352,8 @@
     {20, 30, 120, "foo"}
   });
 
-  map2_.AddRangeWithTranslation(25, kUnknownSize, "translate me", map_, &map3_);
+  map2_.AddRangeWithTranslation(25, kUnknownSize, "translate me", map_, false,
+                                &map3_);
   CheckConsistency();
   AssertMapEquals(map2_, {
     {25, UINT64_MAX, kNoTranslation, "translate me"}