Added experimental support for WebAssembly. Currently, only sections and symbols are supported.

commit: bd1e2a521928c7d6f83b8918b430ce026c97a01a [log] [tgz]
author: Joshua Haberman <jhaberman@gmail.com> Sat Jul 21 13:18:41 2018 -0700
committer: Joshua Haberman <jhaberman@gmail.com> Sat Jul 21 13:18:41 2018 -0700
tree: c97856a1a251beddaddb744cbde5da426dc6aa4c
parent: 2adf8706bdfa4ae93d43e4f829f53fa7c0983e0e [diff]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5ae4569..63c0df2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt

@@ -89,6 +89,7 @@
     src/elf.cc
     src/macho.cc
     src/range_map.cc
+    src/webassembly.cc
     # Until Abseil has a proper CMake build system
     third_party/abseil-cpp/absl/base/internal/raw_logging.cc # Grrrr...
     third_party/abseil-cpp/absl/base/internal/throw_delegate.cc

diff --git a/README.md b/README.md
index 1f4fa3c..da8e515 100644
--- a/README.md
+++ b/README.md

@@ -3,15 +3,24 @@
 
 [![Build Status](https://travis-ci.org/google/bloaty.svg?branch=master)](https://travis-ci.org/google/bloaty)
 
-Ever wondered what's making your ELF or Mach-O binary big?
-Bloaty McBloatface will show you a size profile of the binary
-so you can understand what's taking up space inside.
+Ever wondered what's making your binary big?  Bloaty
+McBloatface will show you a size profile of the binary so
+you can understand what's taking up space inside.
 
 Bloaty works on binaries, shared objects, object files, and
-static libraries (`.a` files).  It supports ELF/DWARF and
-Mach-O, though the Mach-O support is much more preliminary
-(it shells out to `otool`/`symbols` instead of parsing the
-file directly).
+static libraries (`.a` files).  The following file formats
+are supported:
+
+* ELF
+* Mach-O
+* WebAssembly (experimental)
+
+These formats are NOT supported, but I am very interested
+in adding support for them (I may implement these myself but
+would also be happy to get contributions!)
+
+* PE/COFF (not supported)
+* Android APK (not supported, might be tricky due to compression)
 
 This is not an official Google product.
 

diff --git a/src/bloaty.cc b/src/bloaty.cc
index 8834c0c..acb1606 100644
--- a/src/bloaty.cc
+++ b/src/bloaty.cc

@@ -706,16 +706,36 @@
        << PercentString(row.filepercent, diff_mode_) << "\n";
 }
 
+bool RollupOutput::IsSame(const std::string& a, const std::string& b) {
+  if (a == b) {
+    return true;
+  }
+
+  if (absl::EndsWith(b, a + "]") || absl::EndsWith(a, b + "]")) {
+    return true;
+  }
+
+  return false;
+}
+
 void RollupOutput::PrettyPrintTree(const RollupRow& row, size_t indent,
                                    size_t longest_label,
                                    std::ostream* out) const {
   // Rows are printed before their sub-rows.
   PrettyPrintRow(row, indent, longest_label, out);
 
-  if (row.vmsize || row.filesize) {
-    for (const auto& child : row.sorted_children) {
-      PrettyPrintTree(child, indent + 4, longest_label, out);
-    }
+  if (!row.vmsize && !row.filesize) {
+    return;
+  }
+
+  if (row.sorted_children.size() == 1 &&
+      row.sorted_children[0].sorted_children.size() == 0 &&
+      IsSame(row.name, row.sorted_children[0].name)) {
+    return;
+  }
+
+  for (const auto& child : row.sorted_children) {
+    PrettyPrintTree(child, indent + 4, longest_label, out);
   }
 }
 
@@ -1267,6 +1287,10 @@
   }
 
   if (!object_file.get()) {
+    object_file = TryOpenWebAssemblyFile(file);
+  }
+
+  if (!object_file.get()) {
     THROWF("unknown file type for file '$0'", filename.c_str());
   }
 

diff --git a/src/bloaty.h b/src/bloaty.h
index 8f6f8e3..780b1c8 100644
--- a/src/bloaty.h
+++ b/src/bloaty.h

@@ -302,6 +302,7 @@
 
 std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file);
 std::unique_ptr<ObjectFile> TryOpenMachOFile(std::unique_ptr<InputFile>& file);
+std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile(std::unique_ptr<InputFile>& file);
 
 namespace dwarf {
 
@@ -499,6 +500,7 @@
   // When we are in diff mode, rollup sizes are relative to the baseline.
   bool diff_mode_ = false;
 
+  static bool IsSame(const std::string& a, const std::string& b);
   void PrettyPrint(size_t max_label_len, std::ostream* out) const;
   void PrintToCSV(std::ostream* out) const;
   size_t CalculateLongestLabel(const RollupRow& row, int indent) const;

diff --git a/src/webassembly.cc b/src/webassembly.cc
new file mode 100644
index 0000000..c358a34
--- /dev/null
+++ b/src/webassembly.cc

@@ -0,0 +1,327 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bloaty.h"
+
+#include "absl/strings/substitute.h"
+
+ABSL_ATTRIBUTE_NORETURN
+static void Throw(const char *str, int line) {
+  throw bloaty::Error(str, __FILE__, line);
+}
+
+#define THROW(msg) Throw(msg, __LINE__)
+#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
+#define WARN(x) fprintf(stderr, "bloaty: %s\n", x);
+
+using absl::string_view;
+
+namespace bloaty {
+namespace wasm {
+
+template <class T>
+T ReadMemcpy(string_view* data) {
+  T ret;
+  if (data->size() < sizeof(T)) {
+    THROW("premature EOF reading fixed-length wasm data");
+  }
+  memcpy(&ret, data->data(), sizeof(T));
+  data->remove_prefix(sizeof(T));
+  return ret;
+}
+
+uint64_t ReadLEB128Internal(bool is_signed, size_t size, string_view* data) {
+  uint64_t ret = 0;
+  int shift = 0;
+  int maxshift = 70;
+  const char* ptr = data->data();
+  const char* limit = ptr + data->size();
+
+  while (ptr < limit && shift < maxshift) {
+    char byte = *(ptr++);
+    ret |= static_cast<uint64_t>(byte & 0x7f) << shift;
+    shift += 7;
+    if ((byte & 0x80) == 0) {
+      data->remove_prefix(ptr - data->data());
+      if (is_signed && shift < size && (byte & 0x40)) {
+        ret |= -(1ULL << shift);
+      }
+      return ret;
+    }
+  }
+
+  THROW("corrupt wasm data, unterminated LEB128");
+}
+
+bool ReadVarUInt1(string_view* data) {
+  return static_cast<bool>(ReadLEB128Internal(false, 1, data));
+}
+
+char ReadVarUInt7(string_view* data) {
+  return static_cast<char>(ReadLEB128Internal(false, 7, data));
+}
+
+uint32_t ReadVarUInt32(string_view* data) {
+  return static_cast<uint32_t>(ReadLEB128Internal(false, 32, data));
+}
+
+string_view ReadPiece(size_t bytes, string_view* data) {
+  if(data->size() < bytes) {
+    THROW("premature EOF reading variable-length DWARF data");
+  }
+  string_view ret = data->substr(0, bytes);
+  data->remove_prefix(bytes);
+  return ret;
+}
+
+bool ReadMagic(string_view* data) {
+  const uint32_t wasm_magic = 0x6d736100;
+  uint32_t magic = ReadMemcpy<uint32_t>(data);
+
+  if (magic != wasm_magic) {
+    return false;
+  }
+
+  // TODO(haberman): do we need to fail if this is >1?
+  uint32_t version = ReadMemcpy<uint32_t>(data);
+  (void)version;
+
+  return true;
+}
+
+class Section {
+ public:
+  int id;
+  std::string name;
+  string_view data;
+  string_view contents;
+
+  static Section Read(string_view* data_param) {
+    Section ret;
+    string_view data = *data_param;
+    string_view section_data = data;
+
+    ret.id = ReadVarUInt7(&data);
+    uint32_t size = ReadVarUInt32(&data);
+    string_view next_section = data.substr(size);
+    data = data.substr(0, size);
+    size_t header_size = data.data() - section_data.data();
+    ret.data = section_data.substr(0, size + header_size);
+
+    if (ret.id == 0) {
+      uint32_t name_len = ReadVarUInt32(&data);
+      ret.name = std::string(ReadPiece(name_len, &data));
+    } else if (ret.id <= 11) {
+      ret.name = names[ret.id];
+    } else {
+      THROWF("Unknown section id: $0", ret.id);
+    }
+
+    ret.contents = data;
+    *data_param = next_section;
+    return ret;
+  }
+
+  enum Name {
+    kType      = 1,
+    kImport    = 2,
+    kFunction  = 3,
+    kTable     = 4,
+    kMemory    = 5,
+    kGlobal    = 6,
+    kExport    = 7,
+    kStart     = 8,
+    kElement   = 9,
+    kCode      = 10,
+    kData      = 11,
+  };
+
+  static const char* names[];
+};
+
+const char* Section::names[] = {
+  "<none>",    // 0
+  "Type",      // 1
+  "Import",    // 2
+  "Function",  // 3
+  "Table",     // 4
+  "Memory",    // 5
+  "Global",    // 6
+  "Export",    // 7
+  "Start",     // 8
+  "Element",   // 9
+  "Code",      // 10
+  "Data",      // 11
+};
+
+template <class Func>
+void ForEachSection(string_view file, Func&& section_func) {
+  string_view data = file;
+  ReadMagic(&data);
+
+  while (!data.empty()) {
+    Section section = Section::Read(&data);
+    section_func(section);
+  }
+}
+
+void ParseSections(RangeSink* sink) {
+  ForEachSection(sink->input_file().data(), [sink](const Section& section) {
+    sink->AddFileRange("wasm_sections", section.name, section.data);
+  });
+}
+
+typedef std::unordered_map<int, std::string> FuncNames;
+
+void ReadFunctionNames(const Section& section, FuncNames* names,
+                       RangeSink* sink) {
+  enum class NameType {
+    kModule = 0,
+    kFunction = 1,
+    kLocal = 2,
+  };
+
+  string_view data = section.contents;
+
+  while (!data.empty()) {
+    char type = ReadVarUInt7(&data);
+    uint32_t size = ReadVarUInt32(&data);
+    string_view section = data.substr(0, size);
+    data = data.substr(size);
+
+    if (static_cast<NameType>(type) == NameType::kFunction) {
+      uint32_t count = ReadVarUInt32(&section);
+      for (uint32_t i = 0; i < count; i++) {
+        string_view entry = section;
+        uint32_t index = ReadVarUInt32(&section);
+        uint32_t name_len = ReadVarUInt32(&section);
+        string_view name = ReadPiece(name_len, &section);
+        entry = entry.substr(0, name.data() - entry.data() + name.size());
+        sink->AddFileRange("wasm_funcname", name, entry);
+        (*names)[index] = std::string(name);
+      }
+    }
+  }
+}
+
+void ReadCodeSection(const Section& section, const FuncNames& names,
+                     RangeSink* sink) {
+  string_view data = section.contents;
+
+  uint32_t count = ReadVarUInt32(&data);
+
+  for (uint32_t i = 0; i < count; i++) {
+    string_view func = data;
+    uint32_t size = ReadVarUInt32(&data);
+    uint32_t total_size = size + (data.data() - func.data());
+
+    func = func.substr(0, total_size);
+    data = data.substr(size);
+
+    auto iter = names.find(i);
+
+    if (iter == names.end()) {
+      std::string name = "func[" + std::to_string(i) + "]";
+      sink->AddFileRange("wasm_function", name, func);
+    } else {
+      sink->AddFileRange("wasm_function", iter->second, func);
+    }
+  }
+}
+
+void ParseSymbols(RangeSink* sink) {
+  // First pass: read the custom naming section to get function names.
+  std::unordered_map<int, std::string> func_names;
+
+  ForEachSection(sink->input_file().data(),
+                 [&func_names, sink](const Section& section) {
+                   if (section.name == "name") {
+                     ReadFunctionNames(section, &func_names, sink);
+                   }
+                 });
+
+  // Second pass: read the function/code sections.
+  ForEachSection(sink->input_file().data(),
+                 [&func_names, sink](const Section& section) {
+                   if (section.id == Section::kCode) {
+                     ReadCodeSection(section, func_names, sink);
+                   }
+                 });
+}
+
+void AddWebAssemblyFallback(RangeSink* sink) {
+  ForEachSection(sink->input_file().data(), [sink](const Section& section) {
+    std::string name2 =
+        std::string("[section ") + std::string(section.name) + std::string("]");
+    sink->AddFileRange("wasm_overhead", name2, section.data);
+  });
+  sink->AddFileRange("wasm_overhead", "[WASM Header]",
+                     sink->input_file().data().substr(0, 8));
+}
+
+class WebAssemblyObjectFile : public ObjectFile {
+ public:
+  WebAssemblyObjectFile(std::unique_ptr<InputFile> file_data)
+      : ObjectFile(std::move(file_data)) {}
+
+  std::string GetBuildId() const override {
+    // TODO(haberman): does WebAssembly support this?
+    return std::string();
+  }
+
+  void ProcessFile(const std::vector<RangeSink*>& sinks) const override {
+    for (auto sink : sinks) {
+      switch (sink->data_source()) {
+        case DataSource::kSegments:
+        case DataSource::kSections:
+          ParseSections(sink);
+          break;
+        case DataSource::kSymbols:
+        case DataSource::kRawSymbols:
+        case DataSource::kShortSymbols:
+        case DataSource::kFullSymbols:
+          ParseSymbols(sink);
+          break;
+        case DataSource::kArchiveMembers:
+        case DataSource::kCompileUnits:
+        case DataSource::kInlines:
+        default:
+          THROW("WebAssembly doesn't support this data source");
+      }
+      AddWebAssemblyFallback(sink);
+    }
+  }
+
+  bool GetDisassemblyInfo(absl::string_view /*symbol*/,
+                          DataSource /*symbol_source*/,
+                          DisassemblyInfo* /*info*/) const override {
+    WARN("WebAssembly files do not support disassembly yet");
+    return false;
+  }
+};
+
+}  // namespace wasm
+
+std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile(
+    std::unique_ptr<InputFile>& file) {
+  string_view data = file->data();
+  if (wasm::ReadMagic(&data)) {
+    return std::unique_ptr<ObjectFile>(
+        new wasm::WebAssemblyObjectFile(std::move(file)));
+  }
+
+  return nullptr;
+}
+
+}  // namespace bloaty
commit	bd1e2a521928c7d6f83b8918b430ce026c97a01a	[log] [tgz]
author	Joshua Haberman <jhaberman@gmail.com>	Sat Jul 21 13:18:41 2018 -0700
committer	Joshua Haberman <jhaberman@gmail.com>	Sat Jul 21 13:18:41 2018 -0700
tree	c97856a1a251beddaddb744cbde5da426dc6aa4c
parent	2adf8706bdfa4ae93d43e4f829f53fa7c0983e0e [diff]