Merge branch 'master' of https://github.com/google/bloaty into pack
diff --git a/Makefile b/Makefile
index 94f6cc6..4ceff71 100644
--- a/Makefile
+++ b/Makefile
@@ -6,11 +6,12 @@
CXXFLAGS=-std=c++11 -W -Wall -Wno-sign-compare -g -I third_party/re2 -I. -Isrc
RE2_H=third_party/re2/re2/re2.h
RE2_A=third_party/re2/obj/libre2.a
+LEVELDB_A=third_party/leveldb/out-static/libleveldb.a
-bloaty: src/main.cc src/libbloaty.a $(RE2_A)
+bloaty: src/main.cc src/libbloaty.a $(RE2_A) $(LEVELDB_A)
$(CXX) $(GC_SECTIONS) $(CXXFLAGS) -O2 -o $@ $^ -lpthread
-OBJS=src/bloaty.o src/dwarf.o src/elf.o src/macho.o
+OBJS=src/bloaty.o src/dwarf.o src/elf.o src/macho.o src/pack.o
$(OBJS): %.o : %.cc src/bloaty.h src/dwarf_constants.h $(RE2_H)
$(CXX) $(CXXFLAGS) -O2 -c -o $@ $<
@@ -21,9 +22,12 @@
third_party/re2/obj/libre2.a: third_party/re2/Makefile
$(MAKE) -C third_party/re2 CPPFLAGS="-ffunction-sections -fdata-sections -g"
+third_party/leveldb/out-static/libleveldb.a: third_party/leveldb/Makefile
+ $(MAKE) -C third_party/leveldb CPPFLAGS="-ffunction-sections -fdata-sections -g" out-static/libleveldb.a
+
# These targets share a pattern match to coerce make into only executing once
# See this discussion: http://stackoverflow.com/a/3077254/1780018
-third%party/re2/Makefile third%party/re2/re2/re2.h third%party/googletest/CMakeLists.txt third%party/libFuzzer/build.sh: .gitmodules
+third%party/re2/Makefile third%party/re2/re2/re2.h third%party/googletest/CMakeLists.txt third%party/libFuzzer/build.sh third%party/leveldb/Makefile: .gitmodules
git submodule init && git submodule update
@# Ensure .gitmodules cannot be newer
touch -r .gitmodules $@
@@ -70,7 +74,7 @@
$(CXX) $(CXXFLAGS) $(TESTFLAGS) -o $@ $^ -lpthread
third_party/googletest/googlemock/gtest/libgtest_main.a: third_party/googletest/CMakeLists.txt
- cd third_party/googletest && cmake . && $(MAKE)
+ cd third_party/googletest/googletest && cmake . && $(MAKE)
## Fuzzing #####################################################################
diff --git a/src/bloaty.cc b/src/bloaty.cc
index 74fa54e..83fe980 100644
--- a/src/bloaty.cc
+++ b/src/bloaty.cc
@@ -1200,33 +1200,10 @@
// MemoryMap ///////////////////////////////////////////////////////////////////
-// Contains a RangeMap for VM space and file space.
+MemoryMap::MemoryMap(DataSource source, std::unique_ptr<NameMunger>&& munger)
+ : source_(source), munger_(std::move(munger)) {}
-class MemoryMap {
- public:
- MemoryMap(std::unique_ptr<NameMunger>&& munger) : munger_(std::move(munger)) {}
- virtual ~MemoryMap() {}
-
- bool FindAtAddr(uint64_t vmaddr, std::string* name) const;
- bool FindContainingAddr(uint64_t vmaddr, uint64_t* start,
- std::string* name) const;
-
- const RangeMap* file_map() const { return &file_map_; }
- const RangeMap* vm_map() const { return &vm_map_; }
- RangeMap* file_map() { return &file_map_; }
- RangeMap* vm_map() { return &vm_map_; }
-
- protected:
- std::string ApplyNameRegexes(string_view name);
-
- private:
- BLOATY_DISALLOW_COPY_AND_ASSIGN(MemoryMap);
- friend class RangeSink;
-
- RangeMap vm_map_;
- RangeMap file_map_;
- std::unique_ptr<NameMunger> munger_;
-};
+MemoryMap::~MemoryMap() {}
std::string MemoryMap::ApplyNameRegexes(string_view name) {
return munger_ ? munger_->Munge(name) : std::string(name);
@@ -1470,8 +1447,12 @@
}
bool Bloaty::ScanAndRollupFile(const InputFile& file, Rollup* rollup) {
- const std::string& filename = file.filename();
- auto file_handler = TryOpenELFFile(file);
+ std::string filename = file.filename();
+ auto file_handler = TryOpenPackFile(file);
+
+ if (!file_handler.get()) {
+ file_handler = TryOpenELFFile(file);
+ }
if (!file_handler.get()) {
file_handler = TryOpenMachOFile(file);
@@ -1484,7 +1465,10 @@
struct Maps {
public:
- Maps() : base_map_(nullptr) { PushMap(&base_map_); }
+ Maps()
+ : base_map_(DataSource::kInputFiles, nullptr) {
+ PushMap(&base_map_);
+ }
void PushAndOwnMap(MemoryMap* map) {
maps_.emplace_back(map);
@@ -1558,23 +1542,25 @@
} maps;
- RangeSink sink(&file, DataSource::kSegments, nullptr, maps.base_map());
- file_handler->ProcessBaseMap(&sink);
- maps.base_map()->file_map()->AddRange(0, file.data().size(), "[None]");
-
std::vector<std::unique_ptr<RangeSink>> sinks;
std::vector<RangeSink*> sink_ptrs;
for (size_t i = 0; i < sources_.size(); i++) {
auto& source = sources_[i];
- auto map = new MemoryMap(std::move(source.munger));
+ auto map = new MemoryMap(source.source, std::move(source.munger));
maps.PushAndOwnMap(map);
sinks.push_back(std::unique_ptr<RangeSink>(
new RangeSink(&file, source.source, maps.base_map(), map)));
sink_ptrs.push_back(sinks.back().get());
}
- CHECK_RETURN(file_handler->ProcessFile(sink_ptrs));
+ RangeSink sink(&file, DataSource::kInputFiles, nullptr, maps.base_map());
+ file_handler->ProcessFile({&sink});
+ maps.base_map()->file_map()->AddRange(0, file.data().size(), "[None]");
+
+ do {
+ CHECK_RETURN(file_handler->ProcessFile(sink_ptrs));
+ } while (!file_handler->IsDone());
maps.ComputeRollup(filename, filename_position_, rollup);
if (verbose_level > 0) {
@@ -1625,6 +1611,8 @@
-n <num> How many rows to show per level before collapsing
other keys into '[Other]'. Set to '0' for unlimited.
Defaults to 20.
+ -p <filename> Write a packfile to <filename>. This can be used as an
+ input file for a subsequent invocation.
-r <regex> Add regex to the list of regexes.
Format for regex is:
SOURCE:s/PATTERN/REPLACEMENT/
@@ -1663,6 +1651,7 @@
RE2 regex_pattern("(\\w+)\\:s/(.*)/(.*)/");
bool base_files = false;
+ std::string packfile_output;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "--") == 0) {
diff --git a/src/bloaty.h b/src/bloaty.h
index 1d88925..e1147e0 100644
--- a/src/bloaty.h
+++ b/src/bloaty.h
@@ -39,14 +39,20 @@
namespace bloaty {
class MemoryMap;
+class NameMunger;
enum class DataSource {
- kArchiveMembers,
- kCompileUnits,
- kInlines,
- kSections,
- kSegments,
- kSymbols,
+ // src/pack.cc depends on kInputFiles being the smallest number.
+ kInputFiles = 0,
+
+ // The order of these is not significant, but these numbers are persisted
+ // in our pack files, so they must not change.
+ kArchiveMembers = 1,
+ kCompileUnits = 2,
+ kInlines = 3,
+ kSections = 4,
+ kSegments = 5,
+ kSymbols = 6,
};
class InputFile {
@@ -57,9 +63,15 @@
const std::string& filename() const { return filename_; }
absl::string_view data() const { return data_; }
+ // Allows data sources to change the reported input file name.
+ // This is only intended to be used by pack files.
+ void SetFilename(const std::string& filename) {
+ filename_ = filename;
+ }
+
private:
BLOATY_DISALLOW_COPY_AND_ASSIGN(InputFile);
- const std::string filename_;
+ std::string filename_;
protected:
absl::string_view data_;
@@ -94,9 +106,25 @@
const MemoryMap* translator, MemoryMap* map);
~RangeSink();
- DataSource data_source() const { return data_source_; }
+ // Returns the data source that is expected for this RangeSink. Clients
+ // use this to determine what kind of info to parse from the input file and
+ // push to the sink.
+ //
+ // kInputFiles is special-cased a bit. When you call RangeSink::Add*() for
+ // a kInputFiles sink, Bloaty won't pay attention to the name you pass, it
+ // will just use input_file().filename() instead. So you can make your
+ // kInputFiles handler just delegate to any other handler you have that will
+ // definitely cover 100% of the input file (probably segments or sections).
+ const DataSource data_source() const { return data_source_; }
+
const InputFile& input_file() const { return *file_; }
+ // AddRange() and AddFileRange() label the given range with "name". If this
+ // overlaps with any previously labeled range, the existing mapping takes
+ // precedence. So any specific labels should be added first, and fallback
+ // labels can be added last for parts of the file that didn't have a specific
+ // label.
+ //
// If vmsize or filesize is zero, this mapping is presumed not to exist in
// that domain. For example, .bss mappings don't exist in the file, and
// .debug_* mappings don't exist in memory.
@@ -117,7 +145,8 @@
file_range.size());
}
- // The VM-only functions below may not be used to populate the base map!
+ // The VM-only functions below may not be used if
+ // data_source() == kInputFiles!
// Adds a region to the memory map. It should not overlap any previous
// region added with Add(), but it should overlap the base memory map.
@@ -157,15 +186,21 @@
public:
virtual ~FileHandler() {}
- virtual bool ProcessBaseMap(RangeSink* sink) = 0;
-
// Process this file, pushing data to |sinks| as appropriate for each data
- // source.
+ // source. The first sink in |sinks| will be for kInputFiles, and this *must*
+ // be populated before any other sink. The other sinks can be populated in
+ // any order.
virtual bool ProcessFile(const std::vector<RangeSink*>& sinks) = 0;
+
+ // Returns true if there are no more files to process. Will only be called
+ // after processing the first file, so you can unconditionally return true
+ // if this FileHandler only processes one file at a time.
+ virtual bool IsDone() { return true; }
};
std::unique_ptr<FileHandler> TryOpenELFFile(const InputFile& file);
std::unique_ptr<FileHandler> TryOpenMachOFile(const InputFile& file);
+std::unique_ptr<FileHandler> TryOpenPackFile(const InputFile& file);
namespace dwarf {
@@ -341,6 +376,38 @@
};
+// MemoryMap ///////////////////////////////////////////////////////////////////
+
+// Contains a RangeMap for VM space and file space.
+
+class MemoryMap {
+ public:
+ MemoryMap(DataSource source, std::unique_ptr<NameMunger>&& munger);
+ virtual ~MemoryMap();
+
+ bool FindAtAddr(uint64_t vmaddr, std::string* name) const;
+ bool FindContainingAddr(uint64_t vmaddr, uint64_t* start,
+ std::string* name) const;
+
+ DataSource source() const { return source_; }
+ const RangeMap* file_map() const { return &file_map_; }
+ const RangeMap* vm_map() const { return &vm_map_; }
+ RangeMap* file_map() { return &file_map_; }
+ RangeMap* vm_map() { return &vm_map_; }
+
+ private:
+ BLOATY_DISALLOW_COPY_AND_ASSIGN(MemoryMap);
+ friend class RangeSink;
+
+ std::string ApplyNameRegexes(StringPiece name);
+
+ DataSource source_;
+ RangeMap vm_map_;
+ RangeMap file_map_;
+ std::unique_ptr<NameMunger> munger_;
+};
+
+
// Top-level API ///////////////////////////////////////////////////////////////
// This should only be used by main.cc and unit tests.
diff --git a/src/elf.cc b/src/elf.cc
index 97191ea..5418e69 100644
--- a/src/elf.cc
+++ b/src/elf.cc
@@ -744,11 +744,6 @@
}
}
- if (report_by == kReportByFilename) {
- // Cover unmapped parts of the file.
- sink->AddFileRange(filename, elf.entire_file());
- }
-
return true;
});
}
@@ -840,19 +835,17 @@
} // namespace
class ElfFileHandler : public FileHandler {
- bool ProcessBaseMap(RangeSink* sink) override {
- if (IsObjectFile(sink->input_file().data())) {
- return DoReadELFSections(sink, kReportBySectionName);
- } else {
- // Slightly more complete for executables, but not present in object
- // files.
- return ReadELFSegments(sink);
- }
- }
-
bool ProcessFile(const std::vector<RangeSink*>& sinks) override {
for (auto sink : sinks) {
switch (sink->data_source()) {
+ case DataSource::kInputFiles:
+ if (IsObjectFile(sink->input_file().data())) {
+ return DoReadELFSections(sink, kReportBySectionName);
+ } else {
+ // Slightly more complete for executables, but not present in object
+ // files.
+ return ReadELFSegments(sink);
+ }
case DataSource::kSegments:
CHECK_RETURN(ReadELFSegments(sink));
break;
diff --git a/src/macho.cc b/src/macho.cc
index 9bf3b96..16d60fd 100644
--- a/src/macho.cc
+++ b/src/macho.cc
@@ -206,13 +206,11 @@
}
class MachOFileHandler : public FileHandler {
- bool ProcessBaseMap(RangeSink* sink) override {
- return ParseMachOSegments(sink);
- }
-
bool ProcessFile(const std::vector<RangeSink*>& sinks) override {
for (auto sink : sinks) {
switch (sink->data_source()) {
+ case DataSource::kInputFiles:
+ return ParseMachOSegments(sink);
case DataSource::kSegments:
CHECK_RETURN(ParseMachOSegments(sink));
break;
diff --git a/src/pack.cc b/src/pack.cc
new file mode 100644
index 0000000..844f8ed
--- /dev/null
+++ b/src/pack.cc
@@ -0,0 +1,77 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// A pack file stores the parsed results from reading one or more input files.
+// This lets users store the results of parsing in a way that is easy to do
+// further processing on later.
+//
+// We use an SSTable file to store the data. This provides random access so
+// that we only need to scan the parts of the file corresponding to the "-d"
+// options the user passes.
+
+#include "bloaty.h"
+
+namespace bloaty {
+
+// We pack several values into the key and value.
+struct Key {
+ DataSource data_source;
+ std::string filename;
+ uint64_t start_address;
+
+ void Pack(std::string* serialized) const {}
+ void Unpack(StringPiece serialized) {}
+};
+
+struct Value {
+ int64_t range_size;
+ std::string label;
+
+ // Only for DataSource::kSegments and DataSource::kSections.
+ uint64_t file_start_address;
+ int64_t file_size;
+
+ void Pack(std::string* serialized) const {}
+ void Unpack(StringPiece serialized) {}
+};
+
+
+class PackFileHandler : public FileHandler {
+ bool ProcessFile(const std::vector<RangeSink*>& sinks) override {
+ for (auto sink : sinks) {
+ switch (sink->data_source()) {
+ case DataSource::kSegments:
+ case DataSource::kSections:
+ case DataSource::kSymbols:
+ case DataSource::kArchiveMembers:
+ case DataSource::kCompileUnits:
+ case DataSource::kInlines:
+ default:
+ return false;
+ }
+ }
+
+ return true;
+ }
+};
+
+std::unique_ptr<FileHandler> TryOpenPackFile(const InputFile& file) {
+ return nullptr;
+}
+
+bool WritePackFile(std::vector<MemoryMap*> maps) {
+ return false;
+}
+
+} // namespace
diff --git a/third_party/googletest b/third_party/googletest
index 7b6561c..a2b8a8e 160000
--- a/third_party/googletest
+++ b/third_party/googletest
@@ -1 +1 @@
-Subproject commit 7b6561c56e353100aca8458d7bc49c4e0119bae8
+Subproject commit a2b8a8e07628e5fd60644b6dd99c1b5e7d7f1f47
diff --git a/third_party/leveldb b/third_party/leveldb
new file mode 160000
index 0000000..646c358
--- /dev/null
+++ b/third_party/leveldb
@@ -0,0 +1 @@
+Subproject commit 646c3588de84ac532a0e3525eae03edae1ea759f
diff --git a/third_party/re2 b/third_party/re2
index 16dd885..c964d9b 160000
--- a/third_party/re2
+++ b/third_party/re2
@@ -1 +1 @@
-Subproject commit 16dd8856b79b3c6163a5b6da40aa45267031a79d
+Subproject commit c964d9b07816f9b52ca692e23b8b3dba33beec54