Merge branch 'master' of https://github.com/google/bloaty into pack
diff --git a/Makefile b/Makefile
index 94f6cc6..4ceff71 100644
--- a/Makefile
+++ b/Makefile
@@ -6,11 +6,12 @@
 CXXFLAGS=-std=c++11 -W -Wall -Wno-sign-compare -g -I third_party/re2 -I. -Isrc
 RE2_H=third_party/re2/re2/re2.h
 RE2_A=third_party/re2/obj/libre2.a
+LEVELDB_A=third_party/leveldb/out-static/libleveldb.a
 
-bloaty: src/main.cc src/libbloaty.a $(RE2_A)
+bloaty: src/main.cc src/libbloaty.a $(RE2_A) $(LEVELDB_A)
 	$(CXX) $(GC_SECTIONS) $(CXXFLAGS) -O2 -o $@ $^ -lpthread
 
-OBJS=src/bloaty.o src/dwarf.o src/elf.o src/macho.o
+OBJS=src/bloaty.o src/dwarf.o src/elf.o src/macho.o src/pack.o
 
 $(OBJS): %.o : %.cc src/bloaty.h src/dwarf_constants.h $(RE2_H)
 	$(CXX) $(CXXFLAGS) -O2 -c -o $@ $<
@@ -21,9 +22,12 @@
 third_party/re2/obj/libre2.a: third_party/re2/Makefile
 	$(MAKE) -C third_party/re2 CPPFLAGS="-ffunction-sections -fdata-sections -g"
 
+third_party/leveldb/out-static/libleveldb.a: third_party/leveldb/Makefile
+	$(MAKE) -C third_party/leveldb CPPFLAGS="-ffunction-sections -fdata-sections -g" out-static/libleveldb.a
+
 # These targets share a pattern match to coerce make into only executing once
 # See this discussion: http://stackoverflow.com/a/3077254/1780018
-third%party/re2/Makefile third%party/re2/re2/re2.h third%party/googletest/CMakeLists.txt third%party/libFuzzer/build.sh: .gitmodules
+third%party/re2/Makefile third%party/re2/re2/re2.h third%party/googletest/CMakeLists.txt third%party/libFuzzer/build.sh third%party/leveldb/Makefile: .gitmodules
 	git submodule init && git submodule update
 	@# Ensure .gitmodules cannot be newer
 	touch -r .gitmodules $@
@@ -70,7 +74,7 @@
 	$(CXX) $(CXXFLAGS) $(TESTFLAGS) -o $@ $^ -lpthread
 
 third_party/googletest/googlemock/gtest/libgtest_main.a: third_party/googletest/CMakeLists.txt
-	cd third_party/googletest && cmake . && $(MAKE)
+	cd third_party/googletest/googletest && cmake . && $(MAKE)
 
 ## Fuzzing #####################################################################
 
diff --git a/src/bloaty.cc b/src/bloaty.cc
index 74fa54e..83fe980 100644
--- a/src/bloaty.cc
+++ b/src/bloaty.cc
@@ -1200,33 +1200,10 @@
 
 // MemoryMap ///////////////////////////////////////////////////////////////////
 
-// Contains a RangeMap for VM space and file space.
+MemoryMap::MemoryMap(DataSource source, std::unique_ptr<NameMunger>&& munger)
+    : source_(source), munger_(std::move(munger)) {}
 
-class MemoryMap {
- public:
-  MemoryMap(std::unique_ptr<NameMunger>&& munger) : munger_(std::move(munger)) {}
-  virtual ~MemoryMap() {}
-
-  bool FindAtAddr(uint64_t vmaddr, std::string* name) const;
-  bool FindContainingAddr(uint64_t vmaddr, uint64_t* start,
-                          std::string* name) const;
-
-  const RangeMap* file_map() const { return &file_map_; }
-  const RangeMap* vm_map() const { return &vm_map_; }
-  RangeMap* file_map() { return &file_map_; }
-  RangeMap* vm_map() { return &vm_map_; }
-
- protected:
-  std::string ApplyNameRegexes(string_view name);
-
- private:
-  BLOATY_DISALLOW_COPY_AND_ASSIGN(MemoryMap);
-  friend class RangeSink;
-
-  RangeMap vm_map_;
-  RangeMap file_map_;
-  std::unique_ptr<NameMunger> munger_;
-};
+MemoryMap::~MemoryMap() {}
 
 std::string MemoryMap::ApplyNameRegexes(string_view name) {
   return munger_ ? munger_->Munge(name) : std::string(name);
@@ -1470,8 +1447,12 @@
 }
 
 bool Bloaty::ScanAndRollupFile(const InputFile& file, Rollup* rollup) {
-  const std::string& filename = file.filename();
-  auto file_handler = TryOpenELFFile(file);
+  std::string filename = file.filename();
+  auto file_handler = TryOpenPackFile(file);
+
+  if (!file_handler.get()) {
+    file_handler = TryOpenELFFile(file);
+  }
 
   if (!file_handler.get()) {
     file_handler = TryOpenMachOFile(file);
@@ -1484,7 +1465,10 @@
 
   struct Maps {
    public:
-    Maps() : base_map_(nullptr) { PushMap(&base_map_); }
+    Maps()
+        : base_map_(DataSource::kInputFiles, nullptr) {
+      PushMap(&base_map_);
+    }
 
     void PushAndOwnMap(MemoryMap* map) {
       maps_.emplace_back(map);
@@ -1558,23 +1542,25 @@
 
   } maps;
 
-  RangeSink sink(&file, DataSource::kSegments, nullptr, maps.base_map());
-  file_handler->ProcessBaseMap(&sink);
-  maps.base_map()->file_map()->AddRange(0, file.data().size(), "[None]");
-
   std::vector<std::unique_ptr<RangeSink>> sinks;
   std::vector<RangeSink*> sink_ptrs;
 
   for (size_t i = 0; i < sources_.size(); i++) {
     auto& source = sources_[i];
-    auto map = new MemoryMap(std::move(source.munger));
+    auto map = new MemoryMap(source.source, std::move(source.munger));
     maps.PushAndOwnMap(map);
     sinks.push_back(std::unique_ptr<RangeSink>(
         new RangeSink(&file, source.source, maps.base_map(), map)));
     sink_ptrs.push_back(sinks.back().get());
   }
 
-  CHECK_RETURN(file_handler->ProcessFile(sink_ptrs));
+  RangeSink sink(&file, DataSource::kInputFiles, nullptr, maps.base_map());
+  file_handler->ProcessFile({&sink});
+  maps.base_map()->file_map()->AddRange(0, file.data().size(), "[None]");
+
+  do {
+    CHECK_RETURN(file_handler->ProcessFile(sink_ptrs));
+  } while (!file_handler->IsDone());
 
   maps.ComputeRollup(filename, filename_position_, rollup);
   if (verbose_level > 0) {
@@ -1625,6 +1611,8 @@
   -n <num>         How many rows to show per level before collapsing
                    other keys into '[Other]'.  Set to '0' for unlimited.
                    Defaults to 20.
+  -p <filename>    Write a packfile to <filename>.  This can be used as an
+                   input file for a subsequent invocation.
   -r <regex>       Add regex to the list of regexes.
                    Format for regex is:
                      SOURCE:s/PATTERN/REPLACEMENT/
@@ -1663,6 +1651,7 @@
 
   RE2 regex_pattern("(\\w+)\\:s/(.*)/(.*)/");
   bool base_files = false;
+  std::string packfile_output;
 
   for (int i = 1; i < argc; i++) {
     if (strcmp(argv[i], "--") == 0) {
diff --git a/src/bloaty.h b/src/bloaty.h
index 1d88925..e1147e0 100644
--- a/src/bloaty.h
+++ b/src/bloaty.h
@@ -39,14 +39,20 @@
 namespace bloaty {
 
 class MemoryMap;
+class NameMunger;
 
 enum class DataSource {
-  kArchiveMembers,
-  kCompileUnits,
-  kInlines,
-  kSections,
-  kSegments,
-  kSymbols,
+  // src/pack.cc depends on kInputFiles being the smallest number.
+  kInputFiles = 0,
+
+  // The order of these is not significant, but these numbers are persisted
+  // in our pack files, so they must not change.
+  kArchiveMembers = 1,
+  kCompileUnits = 2,
+  kInlines = 3,
+  kSections = 4,
+  kSegments = 5,
+  kSymbols = 6,
 };
 
 class InputFile {
@@ -57,9 +63,15 @@
   const std::string& filename() const { return filename_; }
   absl::string_view data() const { return data_; }
 
+  // Allows data sources to change the reported input file name.
+  // This is only intended to be used by pack files.
+  void SetFilename(const std::string& filename) {
+    filename_ = filename;
+  }
+
  private:
   BLOATY_DISALLOW_COPY_AND_ASSIGN(InputFile);
-  const std::string filename_;
+  std::string filename_;
 
  protected:
   absl::string_view data_;
@@ -94,9 +106,25 @@
             const MemoryMap* translator, MemoryMap* map);
   ~RangeSink();
 
-  DataSource data_source() const { return data_source_; }
+  // Returns the data source that is expected for this RangeSink.  Clients
+  // use this to determine what kind of info to parse from the input file and
+  // push to the sink.
+  //
+  // kInputFiles is special-cased a bit.  When you call RangeSink::Add*() for
+  // a kInputFiles sink, Bloaty won't pay attention to the name you pass, it
+  // will just use input_file().filename() instead.  So you can make your
+  // kInputFiles handler just delegate to any other handler you have that will
+  // definitely cover 100% of the input file (probably segments or sections).
+  const DataSource data_source() const { return data_source_; }
+
   const InputFile& input_file() const { return *file_; }
 
+  // AddRange() and AddFileRange() label the given range with "name".  If this
+  // overlaps with any previously labeled range, the existing mapping takes
+  // precedence.  So any specific labels should be added first, and fallback
+  // labels can be added last for parts of the file that didn't have a specific
+  // label.
+  //
   // If vmsize or filesize is zero, this mapping is presumed not to exist in
   // that domain.  For example, .bss mappings don't exist in the file, and
   // .debug_* mappings don't exist in memory.
@@ -117,7 +145,8 @@
                  file_range.size());
   }
 
-  // The VM-only functions below may not be used to populate the base map!
+  // The VM-only functions below may not be used if
+  // data_source() == kInputFiles!
 
   // Adds a region to the memory map.  It should not overlap any previous
   // region added with Add(), but it should overlap the base memory map.
@@ -157,15 +186,21 @@
  public:
   virtual ~FileHandler() {}
 
-  virtual bool ProcessBaseMap(RangeSink* sink) = 0;
-
   // Process this file, pushing data to |sinks| as appropriate for each data
-  // source.
+  // source.  The first sink in |sinks| will be for kInputFiles, and this *must*
+  // be populated before any other sink.  The other sinks can be populated in
+  // any order.
   virtual bool ProcessFile(const std::vector<RangeSink*>& sinks) = 0;
+
+  // Returns true if there are no more files to process.  Will only be called
+  // after processing the first file, so you can unconditionally return true
+  // if this FileHandler only processes one file at a time.
+  virtual bool IsDone() { return true; }
 };
 
 std::unique_ptr<FileHandler> TryOpenELFFile(const InputFile& file);
 std::unique_ptr<FileHandler> TryOpenMachOFile(const InputFile& file);
+std::unique_ptr<FileHandler> TryOpenPackFile(const InputFile& file);
 
 namespace dwarf {
 
@@ -341,6 +376,38 @@
 };
 
 
+// MemoryMap ///////////////////////////////////////////////////////////////////
+
+// Contains a RangeMap for VM space and file space.
+
+class MemoryMap {
+ public:
+  MemoryMap(DataSource source, std::unique_ptr<NameMunger>&& munger);
+  virtual ~MemoryMap();
+
+  bool FindAtAddr(uint64_t vmaddr, std::string* name) const;
+  bool FindContainingAddr(uint64_t vmaddr, uint64_t* start,
+                          std::string* name) const;
+
+  DataSource source() const { return source_; }
+  const RangeMap* file_map() const { return &file_map_; }
+  const RangeMap* vm_map() const { return &vm_map_; }
+  RangeMap* file_map() { return &file_map_; }
+  RangeMap* vm_map() { return &vm_map_; }
+
+ private:
+  BLOATY_DISALLOW_COPY_AND_ASSIGN(MemoryMap);
+  friend class RangeSink;
+
+  std::string ApplyNameRegexes(StringPiece name);
+
+  DataSource source_;
+  RangeMap vm_map_;
+  RangeMap file_map_;
+  std::unique_ptr<NameMunger> munger_;
+};
+
+
 // Top-level API ///////////////////////////////////////////////////////////////
 
 // This should only be used by main.cc and unit tests.
diff --git a/src/elf.cc b/src/elf.cc
index 97191ea..5418e69 100644
--- a/src/elf.cc
+++ b/src/elf.cc
@@ -744,11 +744,6 @@
           }
         }
 
-        if (report_by == kReportByFilename) {
-          // Cover unmapped parts of the file.
-          sink->AddFileRange(filename, elf.entire_file());
-        }
-
         return true;
       });
 }
@@ -840,19 +835,17 @@
 }  // namespace
 
 class ElfFileHandler : public FileHandler {
-  bool ProcessBaseMap(RangeSink* sink) override {
-    if (IsObjectFile(sink->input_file().data())) {
-      return DoReadELFSections(sink, kReportBySectionName);
-    } else {
-      // Slightly more complete for executables, but not present in object
-      // files.
-      return ReadELFSegments(sink);
-    }
-  }
-
   bool ProcessFile(const std::vector<RangeSink*>& sinks) override {
     for (auto sink : sinks) {
       switch (sink->data_source()) {
+        case DataSource::kInputFiles:
+          if (IsObjectFile(sink->input_file().data())) {
+            return DoReadELFSections(sink, kReportBySectionName);
+          } else {
+            // Slightly more complete for executables, but not present in object
+            // files.
+            return ReadELFSegments(sink);
+          }
         case DataSource::kSegments:
           CHECK_RETURN(ReadELFSegments(sink));
           break;
diff --git a/src/macho.cc b/src/macho.cc
index 9bf3b96..16d60fd 100644
--- a/src/macho.cc
+++ b/src/macho.cc
@@ -206,13 +206,11 @@
 }
 
 class MachOFileHandler : public FileHandler {
-  bool ProcessBaseMap(RangeSink* sink) override {
-    return ParseMachOSegments(sink);
-  }
-
   bool ProcessFile(const std::vector<RangeSink*>& sinks) override {
     for (auto sink : sinks) {
       switch (sink->data_source()) {
+        case DataSource::kInputFiles:
+          return ParseMachOSegments(sink);
         case DataSource::kSegments:
           CHECK_RETURN(ParseMachOSegments(sink));
           break;
diff --git a/src/pack.cc b/src/pack.cc
new file mode 100644
index 0000000..844f8ed
--- /dev/null
+++ b/src/pack.cc
@@ -0,0 +1,77 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// A pack file stores the parsed results from reading one or more input files.
+// This lets users store the results of parsing in a way that is easy to do
+// further processing on later.
+//
+// We use an SSTable file to store the data.  This provides random access so
+// that we only need to scan the parts of the file corresponding to the "-d"
+// options the user passes.
+
+#include "bloaty.h"
+
+namespace bloaty {
+
+// We pack several values into the key and value.
+struct Key {
+  DataSource data_source;
+  std::string filename;
+  uint64_t start_address;
+
+  void Pack(std::string* serialized) const {}
+  void Unpack(StringPiece serialized) {}
+};
+
+struct Value {
+  int64_t range_size;
+  std::string label;
+
+  // Only for DataSource::kSegments and DataSource::kSections.
+  uint64_t file_start_address;
+  int64_t file_size;
+
+  void Pack(std::string* serialized) const {}
+  void Unpack(StringPiece serialized) {}
+};
+
+
+class PackFileHandler : public FileHandler {
+  bool ProcessFile(const std::vector<RangeSink*>& sinks) override {
+    for (auto sink : sinks) {
+      switch (sink->data_source()) {
+        case DataSource::kSegments:
+        case DataSource::kSections:
+        case DataSource::kSymbols:
+        case DataSource::kArchiveMembers:
+        case DataSource::kCompileUnits:
+        case DataSource::kInlines:
+        default:
+          return false;
+      }
+    }
+
+    return true;
+  }
+};
+
+std::unique_ptr<FileHandler> TryOpenPackFile(const InputFile& file) {
+  return nullptr;
+}
+
+bool WritePackFile(std::vector<MemoryMap*> maps) {
+  return false;
+}
+
+}  // namespace
diff --git a/third_party/googletest b/third_party/googletest
index 7b6561c..a2b8a8e 160000
--- a/third_party/googletest
+++ b/third_party/googletest
@@ -1 +1 @@
-Subproject commit 7b6561c56e353100aca8458d7bc49c4e0119bae8
+Subproject commit a2b8a8e07628e5fd60644b6dd99c1b5e7d7f1f47
diff --git a/third_party/leveldb b/third_party/leveldb
new file mode 160000
index 0000000..646c358
--- /dev/null
+++ b/third_party/leveldb
@@ -0,0 +1 @@
+Subproject commit 646c3588de84ac532a0e3525eae03edae1ea759f
diff --git a/third_party/re2 b/third_party/re2
index 16dd885..c964d9b 160000
--- a/third_party/re2
+++ b/third_party/re2
@@ -1 +1 @@
-Subproject commit 16dd8856b79b3c6163a5b6da40aa45267031a79d
+Subproject commit c964d9b07816f9b52ca692e23b8b3dba33beec54