Merge pull request #140 from juliehockett/cmake
Propagate linker flags from the command line
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 918b2fc..6f8da51 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,6 @@
-cmake_minimum_required (VERSION 2.6)
-project (Bloaty)
+cmake_minimum_required (VERSION 3.0)
+cmake_policy(SET CMP0048 NEW)
+project (Bloaty VERSION 1.0)
# Options we define for users.
option(BLOATY_ENABLE_ASAN "Enable address sanitizer." OFF)
@@ -89,10 +90,14 @@
src/elf.cc
src/macho.cc
src/range_map.cc
+ src/webassembly.cc
# Until Abseil has a proper CMake build system
third_party/abseil-cpp/absl/base/internal/raw_logging.cc # Grrrr...
third_party/abseil-cpp/absl/base/internal/throw_delegate.cc
third_party/abseil-cpp/absl/strings/ascii.cc
+ third_party/abseil-cpp/absl/strings/charconv.cc
+ third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc
+ third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc
third_party/abseil-cpp/absl/strings/escaping.cc
third_party/abseil-cpp/absl/strings/internal/memutil.cc
third_party/abseil-cpp/absl/strings/internal/utf8.cc
@@ -135,34 +140,36 @@
RUNTIME DESTINATION bin
)
- enable_testing()
+ if (IS_DIRECTORY "${PROJECT_SOURCE_DIR}/tests")
+ enable_testing()
- if(BUILD_TESTING)
- add_subdirectory(third_party/googletest)
- include_directories(third_party/googletest/googletest/include)
- include_directories(third_party/googletest/googlemock/include)
+ if(BUILD_TESTING)
+ add_subdirectory(third_party/googletest)
+ include_directories(third_party/googletest/googletest/include)
+ include_directories(third_party/googletest/googlemock/include)
- set(TEST_TARGETS
- bloaty_test
- bloaty_misc_test
- range_map_test
- )
+ set(TEST_TARGETS
+ bloaty_test
+ bloaty_misc_test
+ range_map_test
+ )
- foreach(target ${TEST_TARGETS})
- add_executable(${target} tests/${target}.cc)
- target_link_libraries(${target} "${LIBBLOATY_LIBS}" gtest_main gmock "${CMAKE_THREAD_LIBS_INIT}")
- endforeach(target)
+ foreach(target ${TEST_TARGETS})
+ add_executable(${target} tests/${target}.cc)
+ target_link_libraries(${target} "${LIBBLOATY_LIBS}" gtest_main gmock "${CMAKE_THREAD_LIBS_INIT}")
+ endforeach(target)
- add_executable(fuzz_test tests/fuzz_target.cc tests/fuzz_driver.cc)
- target_link_libraries(fuzz_test "${LIBBLOATY_LIBS}" "${CMAKE_THREAD_LIBS_INIT}")
+ add_executable(fuzz_test tests/fuzz_target.cc tests/fuzz_driver.cc)
+ target_link_libraries(fuzz_test "${LIBBLOATY_LIBS}" "${CMAKE_THREAD_LIBS_INIT}")
- file(GLOB fuzz_corpus tests/testdata/fuzz_corpus/*)
+ file(GLOB fuzz_corpus tests/testdata/fuzz_corpus/*)
- add_test(NAME range_map_test COMMAND range_map_test)
- add_test(NAME bloaty_test_x86-64 COMMAND bloaty_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/linux-x86_64)
- add_test(NAME bloaty_test_x86 COMMAND bloaty_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/linux-x86)
- add_test(NAME bloaty_misc_test COMMAND bloaty_misc_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/misc)
- add_test(NAME fuzz_test COMMAND fuzz_test ${fuzz_corpus} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/fuzz_corpus)
+ add_test(NAME range_map_test COMMAND range_map_test)
+ add_test(NAME bloaty_test_x86-64 COMMAND bloaty_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/linux-x86_64)
+ add_test(NAME bloaty_test_x86 COMMAND bloaty_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/linux-x86)
+ add_test(NAME bloaty_misc_test COMMAND bloaty_misc_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/misc)
+ add_test(NAME fuzz_test COMMAND fuzz_test ${fuzz_corpus} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/fuzz_corpus)
+ endif()
endif()
install(EXPORT ${PROJECT_NAME}Targets NAMESPACE ${PROJECT_NAME} DESTINATION lib/${PROJECT_NAME})
diff --git a/README.md b/README.md
index 11ed7dd..1992099 100644
--- a/README.md
+++ b/README.md
@@ -3,15 +3,24 @@
[![Build Status](https://travis-ci.org/google/bloaty.svg?branch=master)](https://travis-ci.org/google/bloaty)
-Ever wondered what's making your ELF or Mach-O binary big?
-Bloaty McBloatface will show you a size profile of the binary
-so you can understand what's taking up space inside.
+Ever wondered what's making your binary big? Bloaty
+McBloatface will show you a size profile of the binary so
+you can understand what's taking up space inside.
Bloaty works on binaries, shared objects, object files, and
-static libraries (`.a` files). It supports ELF/DWARF and
-Mach-O, though the Mach-O support is much more preliminary
-(it shells out to `otool`/`symbols` instead of parsing the
-file directly).
+static libraries (`.a` files). The following file formats
+are supported:
+
+* ELF
+* Mach-O
+* WebAssembly (experimental)
+
+These formats are NOT supported, but I am very interested
+in adding support for them (I may implement these myself but
+would also be happy to get contributions!)
+
+* PE/COFF (not supported)
+* Android APK (not supported, might be tricky due to compression)
This is not an official Google product.
@@ -25,7 +34,7 @@
$ make -j6
```
-To run tests, type:
+To run tests (Git only, these are not included in the release tarball), type:
```
$ make test
@@ -53,28 +62,28 @@
```
VM SIZE FILE SIZE
-------------- --------------
- 0.0% 0 .debug_info 13.0Mi 37.6%
- 0.0% 0 .debug_loc 7.45Mi 21.5%
- 0.0% 0 .debug_str 5.14Mi 14.8%
- 40.1% 2.17Mi .text 2.17Mi 6.3%
- 0.0% 0 .debug_ranges 1.83Mi 5.3%
- 30.6% 1.66Mi .rodata 1.66Mi 4.8%
- 0.0% 0 .debug_line 878Ki 2.5%
- 0.0% 0 .strtab 458Ki 1.3%
- 7.1% 394Ki .rela.dyn 394Ki 1.1%
- 6.4% 357Ki .dynstr 357Ki 1.0%
- 5.5% 307Ki .data.rel.ro 307Ki 0.9%
- 0.0% 0 .debug_abbrev 283Ki 0.8%
- 4.2% 235Ki .eh_frame 235Ki 0.7%
- 0.0% 0 .symtab 187Ki 0.5%
- 2.2% 123Ki .dynsym 123Ki 0.3%
- 1.0% 54.1Ki .data 54.1Ki 0.2%
- 0.8% 44.6Ki .gcc_except_table 44.6Ki 0.1%
- 0.7% 39.6Ki .gnu.hash 39.6Ki 0.1%
- 0.7% 36.5Ki .eh_frame_hdr 36.5Ki 0.1%
- 0.5% 30.0Ki [24 Others] 29.7Ki 0.1%
- 0.0% 0 .debug_aranges 27.3Ki 0.1%
- 100.0% 5.42Mi TOTAL 34.7Mi 100.0%
+ 0.0% 0 .debug_info 10.7Mi 37.1%
+ 0.0% 0 .debug_loc 5.39Mi 18.6%
+ 0.0% 0 .debug_str 4.48Mi 15.5%
+ 37.7% 1.86Mi .text 1.86Mi 6.4%
+ 0.0% 0 .debug_ranges 1.67Mi 5.8%
+ 32.6% 1.61Mi .rodata 1.61Mi 5.6%
+ 0.0% 0 .debug_line 856Ki 2.9%
+ 0.0% 0 .strtab 470Ki 1.6%
+ 7.2% 362Ki .dynstr 362Ki 1.2%
+ 6.4% 321Ki .rela.dyn 321Ki 1.1%
+ 6.1% 307Ki .data.rel.ro 307Ki 1.0%
+ 0.0% 0 .debug_abbrev 241Ki 0.8%
+ 4.6% 232Ki .eh_frame 232Ki 0.8%
+ 0.0% 0 .symtab 188Ki 0.6%
+ 2.5% 123Ki .dynsym 123Ki 0.4%
+ 1.0% 48.4Ki .gcc_except_table 48.4Ki 0.2%
+ 0.8% 39.8Ki .gnu.hash 39.8Ki 0.1%
+ 0.7% 36.6Ki .eh_frame_hdr 36.6Ki 0.1%
+ 0.0% 0 .debug_aranges 27.1Ki 0.1%
+ 0.4% 17.7Ki [23 Others] 14.5Ki 0.0%
+ 0.2% 10.3Ki .gnu.version 10.3Ki 0.0%
+ 100.0% 4.93Mi TOTAL 28.9Mi 100.0%
```
The "VM SIZE" column tells you how much space the binary
@@ -95,58 +104,58 @@
```
$ ./bloaty bloaty -d compileunits
- VM SIZE FILE SIZE
- -------------- --------------
- 62.3% 3.04Mi [None] 31.1Mi 94.4%
- 11.2% 557Ki [91 Others] 556Ki 1.7%
- 3.7% 182Ki third_party/protobuf/src/google/protobuf/descriptor.cc 179Ki 0.5%
- 3.2% 162Ki third_party/protobuf/src/google/protobuf/descriptor.pb.cc 161Ki 0.5%
- 2.4% 117Ki third_party/capstone/arch/AArch64/AArch64InstPrinter.c 117Ki 0.3%
- 2.1% 103Ki third_party/capstone/arch/ARM/ARMDisassembler.c 103Ki 0.3%
- 1.9% 96.5Ki third_party/capstone/arch/Sparc/SparcInstPrinter.c 96.5Ki 0.3%
- 1.6% 82.1Ki third_party/demumble/third_party/libcxxabi/cxa_demangle.cpp 82.1Ki 0.2%
- 1.5% 74.7Ki third_party/capstone/arch/PowerPC/PPCInstPrinter.c 74.7Ki 0.2%
- 1.2% 61.8Ki third_party/protobuf/src/google/protobuf/generated_message_reflection.cc 61.8Ki 0.2%
- 1.2% 59.8Ki src/bloaty.cc 59.7Ki 0.2%
- 1.1% 55.1Ki third_party/protobuf/src/google/protobuf/text_format.cc 55.1Ki 0.2%
- 0.9% 43.3Ki third_party/capstone/arch/ARM/ARMInstPrinter.c 43.3Ki 0.1%
- 0.8% 41.9Ki third_party/re2/re2/parse.cc 41.9Ki 0.1%
- 0.8% 39.1Ki third_party/protobuf/src/google/protobuf/map_field.cc 39.1Ki 0.1%
- 0.7% 36.1Ki third_party/protobuf/src/google/protobuf/wire_format.cc 36.1Ki 0.1%
- 0.7% 36.0Ki src/dwarf.cc 36.0Ki 0.1%
- 0.7% 35.3Ki third_party/re2/re2/re2.cc 35.3Ki 0.1%
- 0.7% 33.8Ki third_party/protobuf/src/google/protobuf/extension_set.cc 33.8Ki 0.1%
- 0.6% 30.8Ki third_party/capstone/arch/AArch64/AArch64Disassembler.c 30.8Ki 0.1%
- 0.6% 29.4Ki third_party/re2/re2/dfa.cc 29.4Ki 0.1%
- 100.0% 4.87Mi TOTAL 32.9Mi 100.0%
+ VM SIZE FILE SIZE
+ -------------- --------------
+ 44.2% 2.18Mi [137 Others] 10.7Mi 36.9%
+ 5.4% 271Ki ../third_party/protobuf/src/google/protobuf/descriptor.cc 3.92Mi 13.5%
+ 7.1% 360Ki ../third_party/protobuf/src/google/protobuf/descriptor.pb.cc 2.39Mi 8.2%
+ 8.3% 416Ki ../third_party/capstone/arch/ARM/ARMDisassembler.c 1.57Mi 5.4%
+ 1.7% 87.4Ki ../third_party/protobuf/src/google/protobuf/text_format.cc 1.00Mi 3.5%
+ 2.1% 106Ki ../third_party/protobuf/src/google/protobuf/generated_message_reflection.cc 959Ki 3.2%
+ 0.8% 38.1Ki ../third_party/protobuf/src/google/protobuf/descriptor_database.cc 771Ki 2.6%
+ 1.5% 73.4Ki ../third_party/protobuf/src/google/protobuf/message.cc 754Ki 2.5%
+ 2.5% 126Ki ../src/bloaty.cc 753Ki 2.5%
+ 0.9% 43.5Ki ../third_party/re2/re2/dfa.cc 648Ki 2.2%
+ 1.2% 60.5Ki ../third_party/protobuf/src/google/protobuf/extension_set.cc 610Ki 2.1%
+ 0.8% 42.0Ki ../third_party/re2/re2/re2.cc 595Ki 2.0%
+ 0.6% 28.2Ki ../third_party/protobuf/src/google/protobuf/generated_message_util.cc 572Ki 1.9%
+ 1.1% 56.5Ki ../third_party/protobuf/src/google/protobuf/map_field.cc 565Ki 1.9%
+ 0.8% 42.5Ki ../third_party/re2/re2/regexp.cc 543Ki 1.8%
+ 1.8% 91.3Ki ../third_party/capstone/arch/AArch64/AArch64Disassembler.c 523Ki 1.8%
+ 1.0% 50.9Ki ../third_party/protobuf/src/google/protobuf/wire_format.cc 520Ki 1.8%
+ 1.8% 88.8Ki ../third_party/demumble/third_party/libcxxabi/cxa_demangle.cpp 490Ki 1.7%
+ 3.2% 163Ki ../third_party/capstone/arch/AArch64/AArch64InstPrinter.c 456Ki 1.5%
+ 6.5% 329Ki ../third_party/capstone/arch/X86/X86DisassemblerDecoder.c 427Ki 1.4%
+ 6.7% 337Ki ../third_party/capstone/arch/X86/X86Mapping.c 417Ki 1.4%
+ 100.0% 4.93Mi TOTAL 28.9Mi 100.0%
```
Run Bloaty with `--help` to see a list of available options:
```
-$ ./bloaty --help
Bloaty McBloatface: a size profiler for binaries.
-USAGE: bloaty [options] file... [-- base_file...]
+USAGE: bloaty [OPTION]... FILE... [-- BASE_FILE...]
Options:
--csv Output in CSV format instead of human-readable.
--tsv Output in TSV format instead of human-readable.
- -c <file> Load configuration from <file>.
- -d <sources> Comma-separated list of sources to scan.
- -C <mode> How to demangle symbols. Possible values are:
- --demangle=<mode> --demangle=none no demangling, print raw symbols
+ -c FILE Load configuration from <file>.
+ -d SOURCE,SOURCE Comma-separated list of sources to scan.
+ --debug-file=FILE Use this file for debug symbols and/or symbol table.
+ -C MODE How to demangle symbols. Possible values are:
+ --demangle=MODE --demangle=none no demangling, print raw symbols
--demangle=short demangle, but omit arg/return types
--demangle=full print full demangled type
The default is --demangle=short.
- --disassemble=<function>
+ --disassemble=FUNCTION
Disassemble this function (EXPERIMENTAL)
- -n <num> How many rows to show per level before collapsing
+ -n NUM How many rows to show per level before collapsing
other keys into '[Other]'. Set to '0' for unlimited.
Defaults to 20.
- -s <sortby> Whether to sort by VM or File size. Possible values
+ -s SORTBY Whether to sort by VM or File size. Possible values
are:
-s vm
-s file
@@ -217,55 +226,56 @@
```
$ bloaty -d segments,sections bloaty
- VM SIZE FILE SIZE
+ VM SIZE FILE SIZE
-------------- --------------
- 0.0% 0 [Unmapped] 7.31Mi 94.2%
- -NAN% 0 .debug_info 2.97Mi 40.6%
- -NAN% 0 .debug_loc 2.30Mi 31.5%
- -NAN% 0 .debug_str 1.03Mi 14.2%
- -NAN% 0 .debug_ranges 611Ki 8.2%
- -NAN% 0 .debug_line 218Ki 2.9%
- -NAN% 0 .debug_abbrev 85.4Ki 1.1%
- -NAN% 0 .strtab 62.8Ki 0.8%
- -NAN% 0 .symtab 27.8Ki 0.4%
- -NAN% 0 .debug_aranges 13.5Ki 0.2%
- -NAN% 0 [Unmapped] 2.82Ki 0.0%
- -NAN% 0 .shstrtab 371 0.0%
- -NAN% 0 .comment 43 0.0%
- 99.2% 452Ki LOAD [RX] 452Ki 5.7%
- 73.4% 332Ki .text 332Ki 73.4%
- 13.3% 60.0Ki .rodata 60.0Ki 13.3%
- 7.0% 31.8Ki .eh_frame 31.8Ki 7.0%
- 2.3% 10.5Ki .gcc_except_table 10.5Ki 2.3%
- 0.9% 4.18Ki .eh_frame_hdr 4.18Ki 0.9%
- 0.8% 3.54Ki .dynsym 3.54Ki 0.8%
- 0.8% 3.52Ki .dynstr 3.52Ki 0.8%
- 0.7% 2.98Ki .rela.plt 2.98Ki 0.7%
- 0.4% 2.00Ki .plt 2.00Ki 0.4%
- 0.1% 568 [ELF Headers] 568 0.1%
- 0.1% 408 .rela.dyn 408 0.1%
- 0.1% 304 .gnu.version_r 304 0.1%
- 0.1% 302 .gnu.version 302 0.1%
- 0.0% 216 .gnu.hash 216 0.0%
+ 0.0% 0 [Unmapped] 24.0Mi 83.0%
+ NAN% 0 .debug_info 10.7Mi 44.7%
+ NAN% 0 .debug_loc 5.39Mi 22.4%
+ NAN% 0 .debug_str 4.48Mi 18.7%
+ NAN% 0 .debug_ranges 1.67Mi 6.9%
+ NAN% 0 .debug_line 856Ki 3.5%
+ NAN% 0 .strtab 470Ki 1.9%
+ NAN% 0 .debug_abbrev 241Ki 1.0%
+ NAN% 0 .symtab 188Ki 0.8%
+ NAN% 0 .debug_aranges 27.1Ki 0.1%
+ NAN% 0 .shstrtab 390 0.0%
+ NAN% 0 [Unmapped] 118 0.0%
+ NAN% 0 .comment 28 0.0%
+ 93.7% 4.62Mi LOAD [RX] 4.62Mi 16.0%
+ 40.2% 1.86Mi .text 1.86Mi 40.2%
+ 34.8% 1.61Mi .rodata 1.61Mi 34.8%
+ 7.7% 362Ki .dynstr 362Ki 7.7%
+ 6.8% 321Ki .rela.dyn 321Ki 6.8%
+ 4.9% 232Ki .eh_frame 232Ki 4.9%
+ 2.6% 123Ki .dynsym 123Ki 2.6%
+ 1.0% 48.4Ki .gcc_except_table 48.4Ki 1.0%
+ 0.8% 39.8Ki .gnu.hash 39.8Ki 0.8%
+ 0.8% 36.6Ki .eh_frame_hdr 36.6Ki 0.8%
+ 0.2% 10.3Ki .gnu.version 10.3Ki 0.2%
+ 0.1% 4.36Ki .rela.plt 4.36Ki 0.1%
+ 0.1% 2.92Ki .plt 2.92Ki 0.1%
+ 0.0% 624 [ELF Headers] 624 0.0%
+ 0.0% 384 .gnu.version_r 384 0.0%
+ 0.0% 104 .plt.got 104 0.0%
+ 0.0% 39 [LOAD [RX]] 39 0.0%
0.0% 36 .note.gnu.build-id 36 0.0%
0.0% 32 .note.ABI-tag 32 0.0%
0.0% 28 .interp 28 0.0%
- 0.0% 26 .init 26 0.0%
- 0.0% 18 [Unmapped] 18 0.0%
- 0.0% 9 .fini 9 0.0%
- 0.8% 3.46Ki LOAD [RW] 1.88Ki 0.0%
- 45.6% 1.58Ki .bss 0 0.0%
- 29.3% 1.02Ki .got.plt 1.02Ki 54.1%
- 14.9% 528 .dynamic 528 27.4%
- 7.1% 252 .data 252 13.1%
- 1.4% 48 .init_array 48 2.5%
- 0.7% 24 .got 24 1.2%
- 0.5% 16 [Unmapped] 16 0.8%
- 0.2% 8 .fini_array 8 0.4%
- 0.2% 8 .jcr 8 0.4%
- 0.1% 4 [None] 0 0.0%
- 0.0% 0 [ELF Headers] 2.38Ki 0.0%
- 100.0% 456Ki TOTAL 7.75Mi 100.0%
+ 0.0% 23 .init 23 0.0%
+ 0.0% 9 [1 Others] 9 0.0%
+ 6.3% 316Ki LOAD [RW] 310Ki 1.0%
+ 97.1% 307Ki .data.rel.ro 307Ki 99.1%
+ 2.0% 6.20Ki .bss 0 0.0%
+ 0.5% 1.48Ki .got.plt 1.48Ki 0.5%
+ 0.2% 560 .dynamic 560 0.2%
+ 0.1% 352 .init_array 352 0.1%
+ 0.1% 328 .data 328 0.1%
+ 0.1% 192 .got 192 0.1%
+ 0.0% 56 [LOAD [RW]] 32 0.0%
+ 0.0% 16 .tdata 16 0.0%
+ 0.0% 8 .fini_array 8 0.0%
+ 0.0% 0 [ELF Headers] 2.50Ki 0.0%
+ 100.0% 4.93Mi TOTAL 28.9Mi 100.0%
```
Bloaty displays a maximum of 20 lines for each level; other
@@ -286,27 +296,26 @@
easy mistake to make, and one that I made several times even
as Bloaty's author!).
-Make sure you are compiling with build IDs enabled. For gcc
-this happens automatically, but [Clang decided not to make
-this the default, since it makes the link
-slower](http://releases.llvm.org/3.9.0/tools/clang/docs/ReleaseNotes.html#major-new-features).
-For Clang add `-Wl,--build-id` to your link line. (If you
-want a slightly faster link and don't care about
-reproducibility, you can use `-Wl,--build-id=uuid` instead).
-
-Then you can strip the binary and uses the unstripped binary
-as your debug file. For example, with bloaty itself:
+If your binary has a build ID, then using separate debug
+files is as simple as:
```
$ cp bloaty bloaty.stripped
$ strip bloaty.stripped
-$ ./bloaty -d compileunits --debug-file=bloaty bloaty.stripped
+$ ./bloaty -d symbols --debug-file=bloaty bloaty.stripped
```
-It is also possible to remove debug sections only (see
-`objcopy --strip-debug`) while keeping the symbol table.
-You can also create debug file that contain *only* debug
-info (see `objcopy --only-keep-debug`).
+Some format-specific notes follow.
+
+## ELF
+
+For ELF, make sure you are compiling with build IDs enabled.
+With gcc this happens automatically, but [Clang decided not
+to make this the default, since it makes the link
+slower](http://releases.llvm.org/3.9.0/tools/clang/docs/ReleaseNotes.html#major-new-features).
+For Clang add `-Wl,--build-id` to your link line. (If you
+want a slightly faster link and don't care about
+reproducibility, you can use `-Wl,--build-id=uuid` instead).
Bloaty does not currently support the GNU debuglink or
looking up debug files by build ID, [which are the methods
@@ -315,6 +324,21 @@
If there are use cases where Bloaty's `--debug-file` option
won't work, we can reconsider implementing these.
+## Mach-O
+
+Mach-O files always have build IDs (as far as I can tell),
+so no special configuration is needed to make sure you get
+them.
+
+Mach-O puts debug information in separate files which you
+can create with `dsymutil`:
+
+```
+$ dsymutil bloaty
+$ strip bloaty (optional)
+$ ./bloaty -d symbols --debug-file=bloaty.dSYM/Contents/Resources/DWARF/bloaty bloaty
+```
+
# Configuration Files
Any options that you can specify on the command-line, you
@@ -442,30 +466,30 @@
```
$ ./bloaty -d symbols bloaty
- VM SIZE FILE SIZE
- -------------- --------------
- 17.9% 81.9Ki [Unmapped] 7.39Mi 95.3%
- 62.3% 283Ki [Other] 284Ki 3.6%
- 2.7% 12.3Ki re2::RE2::Match(re2::StringPiece const&, int, int, re2::RE2::Anchor, re2::String 12.3Ki 0.2%
- 1.7% 7.83Ki re2::unicode_groups 7.83Ki 0.1%
- 1.7% 7.56Ki re2::NFA::Search 7.56Ki 0.1%
- 1.3% 5.76Ki re2::BitState::TrySearch 5.76Ki 0.1%
- 1.2% 5.43Ki bloaty::Bloaty::ScanAndRollupFile 5.43Ki 0.1%
- 1.0% 4.49Ki re2::DFA::DFA 4.49Ki 0.1%
- 1.0% 4.35Ki bool bloaty::(anonymous namespace)::ForEachElf<bloaty::(anonymous namespace)::Do 4.35Ki 0.1%
- 1.0% 4.34Ki re2::Regexp::Parse 4.34Ki 0.1%
- 0.9% 4.20Ki re2::RE2::Init 4.20Ki 0.1%
- 0.9% 4.09Ki re2::Prog::IsOnePass 4.09Ki 0.1%
- 0.9% 4.04Ki re2::Compiler::PostVisit 4.04Ki 0.1%
- 0.9% 4.04Ki bloaty::ReadDWARFInlines 4.04Ki 0.1%
- 0.9% 3.91Ki re2::Regexp::FactorAlternationRecursive 3.91Ki 0.0%
- 0.8% 3.77Ki re2::DFA::RunStateOnByte 3.77Ki 0.0%
- 0.8% 3.68Ki re2::unicode_casefold 3.68Ki 0.0%
- 0.8% 3.52Ki bloaty::ElfFileHandler::ProcessFile 3.52Ki 0.0%
- 0.7% 3.40Ki re2::DFA::InlinedSearchLoop(re2::DFA::SearchParams*, bool, bool, bool) [clone .c 3.40Ki 0.0%
- 0.7% 3.38Ki re2::DFA::InlinedSearchLoop(re2::DFA::SearchParams*, bool, bool, bool) [clone .c 3.38Ki 0.0%
- 0.0% 165 [None] 0 0.0%
- 100.0% 456Ki TOTAL 7.75Mi 100.0%
+ VM SIZE FILE SIZE
+ -------------- --------------
+ 0.0% 0 [section .debug_info] 10.7Mi 37.1%
+ 0.0% 0 [section .debug_loc] 5.39Mi 18.6%
+ 0.0% 0 [section .debug_str] 4.48Mi 15.5%
+ 64.8% 3.20Mi [5661 Others] 3.86Mi 13.3%
+ 0.0% 0 [section .debug_ranges] 1.67Mi 5.8%
+ 0.0% 0 [section .debug_line] 856Ki 2.9%
+ 12.9% 648Ki insns 648Ki 2.2%
+ 0.0% 0 [section .debug_abbrev] 241Ki 0.8%
+ 4.3% 217Ki ARMInsts 217Ki 0.7%
+ 3.7% 185Ki insn_name_maps 185Ki 0.6%
+ 2.3% 117Ki AArch64_printInst 117Ki 0.4%
+ 2.3% 117Ki x86DisassemblerTwoByteOpcodes 117Ki 0.4%
+ 2.0% 101Ki Sparc_printInst 101Ki 0.3%
+ 1.5% 74.3Ki PPC_printInst 74.4Ki 0.3%
+ 1.1% 54.0Ki x86DisassemblerThreeByte38Opcodes 54.0Ki 0.2%
+ 1.1% 53.0Ki DecoderTable32 53.1Ki 0.2%
+ 1.0% 48.4Ki [section .gcc_except_table] 48.4Ki 0.2%
+ 0.8% 41.5Ki reg_name_maps 41.7Ki 0.1%
+ 0.8% 39.8Ki [section .gnu.hash] 39.8Ki 0.1%
+ 0.8% 38.7Ki decodeInstruction_4.constprop.128 38.8Ki 0.1%
+ 0.7% 37.8Ki printInstruction 37.8Ki 0.1%
+ 100.0% 4.93Mi TOTAL 28.9Mi 100.0%
```
You can control how symbols are demangled with the `-C MODE`
@@ -540,30 +564,30 @@
```
$ ./bloaty -d compileunits bloaty
- VM SIZE FILE SIZE
- -------------- --------------
- 27.9% 128Ki [None] 7.43Mi 95.9%
- 12.9% 59.2Ki src/bloaty.cc 59.0Ki 0.7%
- 7.3% 33.4Ki re2/re2.cc 32.3Ki 0.4%
- 6.9% 31.6Ki re2/dfa.cc 31.6Ki 0.4%
- 6.8% 31.4Ki re2/parse.cc 31.4Ki 0.4%
- 6.7% 30.9Ki src/dwarf.cc 30.9Ki 0.4%
- 6.7% 30.6Ki re2/regexp.cc 27.8Ki 0.4%
- 5.1% 23.7Ki re2/compile.cc 23.7Ki 0.3%
- 4.3% 19.7Ki re2/simplify.cc 19.7Ki 0.2%
- 3.2% 14.8Ki src/elf.cc 14.8Ki 0.2%
- 3.1% 14.2Ki re2/nfa.cc 14.2Ki 0.2%
- 1.8% 8.34Ki re2/bitstate.cc 8.34Ki 0.1%
- 1.7% 7.84Ki re2/prog.cc 7.84Ki 0.1%
- 1.6% 7.13Ki re2/tostring.cc 7.13Ki 0.1%
- 1.5% 6.67Ki re2/onepass.cc 6.67Ki 0.1%
- 1.4% 6.58Ki src/macho.cc 6.58Ki 0.1%
- 0.7% 3.27Ki src/main.cc 3.27Ki 0.0%
- 0.2% 797 [Other] 797 0.0%
- 0.1% 666 util/stringprintf.cc 666 0.0%
- 0.1% 573 util/strutil.cc 573 0.0%
- 0.1% 476 util/rune.cc 476 0.0%
- 100.0% 460Ki TOTAL 7.75Mi 100.0%
+ VM SIZE FILE SIZE
+ -------------- --------------
+ 44.2% 2.18Mi [137 Others] 10.7Mi 36.9%
+ 5.4% 271Ki ../third_party/protobuf/src/google/protobuf/descriptor.cc 3.92Mi 13.5%
+ 7.1% 360Ki ../third_party/protobuf/src/google/protobuf/descriptor.pb.cc 2.39Mi 8.2%
+ 8.3% 416Ki ../third_party/capstone/arch/ARM/ARMDisassembler.c 1.57Mi 5.4%
+ 1.7% 87.4Ki ../third_party/protobuf/src/google/protobuf/text_format.cc 1.00Mi 3.5%
+ 2.1% 106Ki ../third_party/protobuf/src/google/protobuf/generated_message_reflection.cc 959Ki 3.2%
+ 0.8% 38.1Ki ../third_party/protobuf/src/google/protobuf/descriptor_database.cc 771Ki 2.6%
+ 1.5% 73.4Ki ../third_party/protobuf/src/google/protobuf/message.cc 754Ki 2.5%
+ 2.5% 126Ki ../src/bloaty.cc 753Ki 2.5%
+ 0.9% 43.5Ki ../third_party/re2/re2/dfa.cc 648Ki 2.2%
+ 1.2% 60.5Ki ../third_party/protobuf/src/google/protobuf/extension_set.cc 610Ki 2.1%
+ 0.8% 42.0Ki ../third_party/re2/re2/re2.cc 595Ki 2.0%
+ 0.6% 28.2Ki ../third_party/protobuf/src/google/protobuf/generated_message_util.cc 572Ki 1.9%
+ 1.1% 56.5Ki ../third_party/protobuf/src/google/protobuf/map_field.cc 565Ki 1.9%
+ 0.8% 42.5Ki ../third_party/re2/re2/regexp.cc 543Ki 1.8%
+ 1.8% 91.3Ki ../third_party/capstone/arch/AArch64/AArch64Disassembler.c 523Ki 1.8%
+ 1.0% 50.9Ki ../third_party/protobuf/src/google/protobuf/wire_format.cc 520Ki 1.8%
+ 1.8% 88.8Ki ../third_party/demumble/third_party/libcxxabi/cxa_demangle.cpp 490Ki 1.7%
+ 3.2% 163Ki ../third_party/capstone/arch/AArch64/AArch64InstPrinter.c 456Ki 1.5%
+ 6.5% 329Ki ../third_party/capstone/arch/X86/X86DisassemblerDecoder.c 427Ki 1.4%
+ 6.7% 337Ki ../third_party/capstone/arch/X86/X86Mapping.c 417Ki 1.4%
+ 100.0% 4.93Mi TOTAL 28.9Mi 100.0%
```
## Inlines
@@ -821,12 +845,3 @@
- Visualize the dependency tree of symbols (probably as a
dominator tree) so users can see the weight of their
binary in this way.
-
-## Improving the quality of data sources
-
-One of the things we have to do in Bloaty is deal with
-incomplete information. For examples, `.debug_aranges`
-which we use for the `compileunits` data source is often
-missing or incomplete. Refining the input sources to be
-more complete and accurate will make help Bloaty's numbers
-be even more accurate.
diff --git a/make-release-tarball.sh b/make-release-tarball.sh
new file mode 100755
index 0000000..45873be
--- /dev/null
+++ b/make-release-tarball.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+# Makes a release tarball. We include our dependencies/submodules,
+# but we heavily prune their file lists to avoid including lots of
+# extraneous baggage. We also leave out Bloaty's tests, especially
+# because some of the test data is large.
+
+set -e
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: make-release.tarball.sh VERSION"
+ exit 1
+fi
+
+VERSION=$1
+
+FILES=$(git ls-files --exclude-standard --recurse-submodules |
+ grep -v googletest |
+ grep -v ^tests |
+ grep -v third_party/protobuf |
+ grep -v 'third_party/capstone/\(suite\|bindings\|xcode\|msvc\|contrib\)' |
+ grep -v third_party/abseil-cpp/absl/time/internal/cctz/testdata |
+ grep -v ^.git)
+FILES="$FILES $(git ls-files --exclude-standard --recurse-submodules |
+ grep 'third_party/protobuf/\(src\|cmake\|configure.ac\)')"
+
+# Unfortunately tar on Mac doesn't support --transform, so we have to
+# actually move our files to a different directory to get the prefix.
+DIR=/tmp/bloaty-$VERSION
+rm -rf $DIR
+mkdir $DIR
+rsync -R $FILES $DIR
+
+BASE=$PWD
+cd /tmp
+OUT=bloaty-$VERSION.tar.bz2
+tar cjf $BASE/$OUT bloaty-$VERSION
+
+echo "Created $OUT"
+
diff --git a/src/bloaty.cc b/src/bloaty.cc
index 4d4b21d..2e44166 100644
--- a/src/bloaty.cc
+++ b/src/bloaty.cc
@@ -98,6 +98,7 @@
"raw ranges of previous data source."},
{DataSource::kSections, "sections", "object file section"},
{DataSource::kSegments, "segments", "load commands in the binary"},
+ // We require that all symbols sources are >= kSymbols.
{DataSource::kSymbols, "symbols",
"symbols from symbol table (configure demangling with --demangle)"},
{DataSource::kRawSymbols, "rawsymbols", "unmangled symbols"},
@@ -706,16 +707,36 @@
<< PercentString(row.filepercent, diff_mode_) << "\n";
}
+bool RollupOutput::IsSame(const std::string& a, const std::string& b) {
+ if (a == b) {
+ return true;
+ }
+
+ if (absl::EndsWith(b, a + "]") || absl::EndsWith(a, b + "]")) {
+ return true;
+ }
+
+ return false;
+}
+
void RollupOutput::PrettyPrintTree(const RollupRow& row, size_t indent,
size_t longest_label,
std::ostream* out) const {
// Rows are printed before their sub-rows.
PrettyPrintRow(row, indent, longest_label, out);
- if (row.vmsize || row.filesize) {
- for (const auto& child : row.sorted_children) {
- PrettyPrintTree(child, indent + 4, longest_label, out);
- }
+ if (!row.vmsize && !row.filesize) {
+ return;
+ }
+
+ if (row.sorted_children.size() == 1 &&
+ row.sorted_children[0].sorted_children.size() == 0 &&
+ IsSame(row.name, row.sorted_children[0].name)) {
+ return;
+ }
+
+ for (const auto& child : row.sorted_children) {
+ PrettyPrintTree(child, indent + 4, longest_label, out);
}
}
@@ -797,6 +818,8 @@
// RangeMap ////////////////////////////////////////////////////////////////////
+constexpr uint64_t RangeSink::kUnknownSize;
+
// MmapInputFile ///////////////////////////////////////////////////////////////
@@ -1269,6 +1292,10 @@
}
if (!object_file.get()) {
+ object_file = TryOpenWebAssemblyFile(file);
+ }
+
+ if (!object_file.get()) {
THROWF("unknown file type for file '$0'", filename.c_str());
}
@@ -1857,6 +1884,9 @@
} else if (args.TryParseFlag("--help")) {
fputs(usage, stderr);
return false;
+ } else if (args.TryParseFlag("--version")) {
+ printf("Bloaty McBloatface 1.0\n");
+ exit(0);
} else if (absl::StartsWith(args.Arg(), "-")) {
if (skip_unknown) {
args.ConsumeAndSaveArg();
diff --git a/src/bloaty.h b/src/bloaty.h
index accc0ae..58572ee 100644
--- a/src/bloaty.h
+++ b/src/bloaty.h
@@ -32,7 +32,7 @@
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
-#include "capstone.h"
+#include "capstone/capstone.h"
#include "re2/re2.h"
#include "bloaty.pb.h"
@@ -220,7 +220,7 @@
uint64_t TranslateFileToVM(const char* ptr);
absl::string_view TranslateVMToFile(uint64_t address);
- static const uint64_t kUnknownSize = RangeMap::kUnknownSize;
+ static constexpr uint64_t kUnknownSize = RangeMap::kUnknownSize;
private:
BLOATY_DISALLOW_COPY_AND_ASSIGN(RangeSink);
@@ -302,6 +302,7 @@
std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file);
std::unique_ptr<ObjectFile> TryOpenMachOFile(std::unique_ptr<InputFile>& file);
+std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile(std::unique_ptr<InputFile>& file);
namespace dwarf {
@@ -503,6 +504,7 @@
// When we are in diff mode, rollup sizes are relative to the baseline.
bool diff_mode_ = false;
+ static bool IsSame(const std::string& a, const std::string& b);
void PrettyPrint(size_t max_label_len, std::ostream* out) const;
void PrintToCSV(std::ostream* out, bool tabs) const;
size_t CalculateLongestLabel(const RollupRow& row, int indent) const;
diff --git a/src/disassemble.cc b/src/disassemble.cc
index b1bb710..875b082 100644
--- a/src/disassemble.cc
+++ b/src/disassemble.cc
@@ -20,7 +20,7 @@
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/strings/substitute.h"
-#include "capstone.h"
+#include "capstone/capstone.h"
static void Throw(const char *str, int line) {
throw bloaty::Error(str, __FILE__, line);
diff --git a/src/dwarf.cc b/src/dwarf.cc
index f795e67..cb7dd0c 100644
--- a/src/dwarf.cc
+++ b/src/dwarf.cc
@@ -25,6 +25,7 @@
#include <vector>
#include "absl/base/attributes.h"
+#include "absl/base/macros.h"
#include "absl/strings/string_view.h"
#include "absl/strings/substitute.h"
#include "absl/types/optional.h"
@@ -183,6 +184,9 @@
// The size of addresses. Guaranteed to be either 4 or 8.
uint8_t address_size() const { return address_size_; }
+ // DWARF version of this unit.
+ uint8_t dwarf_version() const { return dwarf_version_; }
+
void SetAddressSize(uint8_t address_size) {
if (address_size != 4 && address_size != 8) {
THROWF("Unexpected address size: $0", address_size);
@@ -243,7 +247,12 @@
return unit;
}
+ void ReadDWARFVersion(string_view* data) {
+ dwarf_version_ = ReadMemcpy<uint16_t>(data);
+ }
+
private:
+ uint16_t dwarf_version_;
bool dwarf64_;
uint8_t address_size_;
};
@@ -422,9 +431,9 @@
}
unit_remaining_ = sizes_.ReadInitialLength(&next_unit_);
- uint16_t version = ReadMemcpy<uint16_t>(&unit_remaining_);
+ sizes_.ReadDWARFVersion(&unit_remaining_);
- if (version > 2) {
+ if (sizes_.dwarf_version() > 2) {
THROW("DWARF data is too new for us");
}
@@ -748,9 +757,9 @@
unit_range_ = unit_range_.substr(
0, remaining_.size() + (remaining_.data() - unit_range_.data()));
- uint16_t version = ReadMemcpy<uint16_t>(&remaining_);
+ unit_sizes_.ReadDWARFVersion(&remaining_);
- if (version > 4) {
+ if (unit_sizes_.dwarf_version() > 4) {
THROW("Data is in new DWARF format we don't understand");
}
@@ -874,7 +883,7 @@
case DW_FORM_ref8:
return AttrValue(ReadMemcpy<uint64_t>(data));
case DW_FORM_addr:
- case DW_FORM_ref_addr:
+ address_size:
switch (reader.unit_sizes().address_size()) {
case 4:
return AttrValue(ReadMemcpy<uint32_t>(data));
@@ -883,6 +892,11 @@
default:
BLOATY_UNREACHABLE();
}
+ case DW_FORM_ref_addr:
+ if (reader.unit_sizes().dwarf_version() <= 2) {
+ goto address_size;
+ }
+ ABSL_FALLTHROUGH_INTENDED;
case DW_FORM_sec_offset:
if (reader.unit_sizes().dwarf64()) {
return AttrValue(ReadMemcpy<uint64_t>(data));
@@ -1108,13 +1122,13 @@
sizes_.SetAddressSize(address_size);
data = sizes_.ReadInitialLength(&data);
- uint16_t version = ReadMemcpy<uint16_t>(&data);
+ sizes_.ReadDWARFVersion(&data);
uint64_t header_length = sizes_.ReadDWARFOffset(&data);
string_view program = data;
SkipBytes(header_length, &program);
params_.minimum_instruction_length = ReadMemcpy<uint8_t>(&data);
- if (version == 4) {
+ if (sizes_.dwarf_version() == 4) {
params_.maximum_operations_per_instruction = ReadMemcpy<uint8_t>(&data);
if (params_.maximum_operations_per_instruction == 0) {
@@ -1630,7 +1644,7 @@
string_view unit = sizes.ReadInitialLength(&remaining);
full_unit =
full_unit.substr(0, unit.size() + (unit.data() - full_unit.data()));
- dwarf::SkipBytes(2, &unit);
+ sizes.ReadDWARFVersion(&unit);
uint64_t debug_info_offset = sizes.ReadDWARFOffset(&unit);
bool ok = die_reader.SeekToCompilationUnit(
dwarf::DIEReader::Section::kDebugInfo, debug_info_offset);
diff --git a/src/elf.cc b/src/elf.cc
index 3bd92c7..d82c96a 100644
--- a/src/elf.cc
+++ b/src/elf.cc
@@ -988,8 +988,7 @@
// .strtab
// .dynsym
// .dynstr
-static void ReadELFTables(const InputFile& file, DisassemblyInfo* info,
- RangeSink* sink) {
+static void ReadELFTables(const InputFile& file, RangeSink* sink) {
bool is_object = IsObjectFile(file.data());
// Disassemble first, because sometimes other tables will refer to things we
@@ -997,37 +996,36 @@
ReadELFSymbols(file, sink, nullptr, true);
// Now scan other tables.
- ForEachElf(
- file, sink,
- [&file, info, sink, is_object](
- const ElfFile& elf, string_view /*filename*/, uint32_t index_base) {
- for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
- ElfFile::Section section;
- elf.ReadSection(i, §ion);
+ ForEachElf(file, sink,
+ [sink, is_object](const ElfFile& elf, string_view /*filename*/,
+ uint32_t index_base) {
+ for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
+ ElfFile::Section section;
+ elf.ReadSection(i, §ion);
- switch (section.header().sh_type) {
- case SHT_SYMTAB:
- case SHT_DYNSYM:
- ReadELFSymbolTableEntries(elf, section, index_base, is_object,
- sink);
- break;
- case SHT_RELA:
- ReadELFRelaEntries(section, index_base, is_object, sink);
- break;
- }
+ switch (section.header().sh_type) {
+ case SHT_SYMTAB:
+ case SHT_DYNSYM:
+ ReadELFSymbolTableEntries(elf, section, index_base,
+ is_object, sink);
+ break;
+ case SHT_RELA:
+ ReadELFRelaEntries(section, index_base, is_object, sink);
+ break;
+ }
- // We are looking by section name, which is a little different than
- // what the loader actually does (which is find eh_frame_hdr from the
- // program headers and then find eh_frame fde entries from there).
- // But these section names should be standard enough that this
- // approach works also.
- if (section.GetName() == ".eh_frame") {
- ReadEhFrame(section.contents(), sink);
- } else if (section.GetName() == ".eh_frame_hdr") {
- ReadEhFrameHdr(section.contents(), sink);
- }
- }
- });
+ // We are looking by section name, which is a little different
+ // than what the loader actually does (which is find
+ // eh_frame_hdr from the program headers and then find eh_frame
+ // fde entries from there). But these section names should be
+ // standard enough that this approach works also.
+ if (section.GetName() == ".eh_frame") {
+ ReadEhFrame(section.contents(), sink);
+ } else if (section.GetName() == ".eh_frame_hdr") {
+ ReadEhFrameHdr(section.contents(), sink);
+ }
+ }
+ });
}
enum ReportSectionsBy {
@@ -1314,11 +1312,7 @@
case DataSource::kArchiveMembers:
break;
default: {
- DisassemblyInfo info;
- if (!DoGetDisassemblyInfo(nullptr, DataSource::kRawSymbols, &info)) {
- THROW("Failed to get disassembly info!");
- }
- ReadELFTables(sink->input_file(), &info, sink);
+ ReadELFTables(sink->input_file(), sink);
DoReadELFSections(sink, kReportByEscapedSectionName);
if (!IsObjectFile(sink->input_file().data())) {
DoReadELFSegments(sink, kReportByEscapedSegmentName);
diff --git a/src/macho.cc b/src/macho.cc
index a027952..3ba5239 100644
--- a/src/macho.cc
+++ b/src/macho.cc
@@ -113,6 +113,19 @@
}
}
+struct LoadCommand {
+ bool is64bit;
+ uint32_t cmd;
+ string_view command_data;
+ string_view file_data;
+};
+
+template <class Struct>
+bool Is64Bit() { return false; }
+
+template <>
+bool Is64Bit<mach_header_64>() { return true; }
+
template <class Struct, class Func>
void ParseMachOHeaderImpl(string_view macho_data, RangeSink* overhead_sink,
Func&& loadcmd_func) {
@@ -133,9 +146,14 @@
THROW("Mach-O load command had zero size.");
}
- string_view command_data = StrictSubstr(header_data, 0, command->cmdsize);
- std::forward<Func>(loadcmd_func)(command->cmd, command_data, macho_data);
- MaybeAddOverhead(overhead_sink, "[Mach-O Headers]", command_data);
+ LoadCommand data;
+ data.is64bit = Is64Bit<Struct>();
+ data.cmd = command->cmd;
+ data.command_data = StrictSubstr(header_data, 0, command->cmdsize);
+ data.file_data = macho_data;
+ std::forward<Func>(loadcmd_func)(data);
+
+ MaybeAddOverhead(overhead_sink, "[Mach-O Headers]", data.command_data);
header_data = header_data.substr(command->cmdsize);
}
}
@@ -257,9 +275,8 @@
}
template <class Segment, class Section>
-void ParseSegment(string_view command_data, string_view file_data,
- RangeSink* sink) {
- auto segment = GetStructPointerAndAdvance<Segment>(&command_data);
+void ParseSegment(LoadCommand cmd, RangeSink* sink) {
+ auto segment = GetStructPointerAndAdvance<Segment>(&cmd.command_data);
if (segment->maxprot == VM_PROT_NONE) {
return;
@@ -270,11 +287,11 @@
if (sink->data_source() == DataSource::kSegments) {
sink->AddRange(
"macho_segment", segname, segment->vmaddr, segment->vmsize,
- StrictSubstr(file_data, segment->fileoff, segment->filesize));
+ StrictSubstr(cmd.file_data, segment->fileoff, segment->filesize));
} else if (sink->data_source() == DataSource::kSections) {
uint32_t nsects = segment->nsects;
for (uint32_t j = 0; j < nsects; j++) {
- auto section = GetStructPointerAndAdvance<Section>(&command_data);
+ auto section = GetStructPointerAndAdvance<Section>(&cmd.command_data);
// filesize equals vmsize unless the section is zerofill
uint64_t filesize = section->size;
@@ -291,119 +308,114 @@
std::string label = absl::StrJoin(
std::make_tuple(segname, ArrayToStr(section->sectname, 16)), ",");
sink->AddRange("macho_section", label, section->addr, section->size,
- StrictSubstr(file_data, section->offset, filesize));
+ StrictSubstr(cmd.file_data, section->offset, filesize));
}
} else {
BLOATY_UNREACHABLE();
}
}
-static void ParseDyldInfo(string_view command_data, string_view file_data,
- RangeSink* sink) {
- auto info = GetStructPointer<dyld_info_command>(command_data);
+static void ParseDyldInfo(const LoadCommand& cmd, RangeSink* sink) {
+ auto info = GetStructPointer<dyld_info_command>(cmd.command_data);
sink->AddFileRange(
"macho_dyld", "Rebase Info",
- StrictSubstr(file_data, info->rebase_off, info->rebase_size));
- sink->AddFileRange("macho_dyld", "Binding Info",
- StrictSubstr(file_data, info->bind_off, info->bind_size));
+ StrictSubstr(cmd.file_data, info->rebase_off, info->rebase_size));
+ sink->AddFileRange(
+ "macho_dyld", "Binding Info",
+ StrictSubstr(cmd.file_data, info->bind_off, info->bind_size));
sink->AddFileRange(
"macho_dyld", "Weak Binding Info",
- StrictSubstr(file_data, info->weak_bind_off, info->weak_bind_size));
+ StrictSubstr(cmd.file_data, info->weak_bind_off, info->weak_bind_size));
sink->AddFileRange(
"macho_dyld", "Lazy Binding Info",
- StrictSubstr(file_data, info->lazy_bind_off, info->lazy_bind_size));
+ StrictSubstr(cmd.file_data, info->lazy_bind_off, info->lazy_bind_size));
sink->AddFileRange(
"macho_dyld", "Export Info",
- StrictSubstr(file_data, info->export_off, info->export_size));
+ StrictSubstr(cmd.file_data, info->export_off, info->export_size));
}
-static void ParseSymbolTable(string_view command_data, string_view file_data,
- RangeSink* sink) {
- auto symtab = GetStructPointer<symtab_command>(command_data);
+static void ParseSymbolTable(const LoadCommand& cmd, RangeSink* sink) {
+ auto symtab = GetStructPointer<symtab_command>(cmd.command_data);
- // TODO(haberman): use 32-bit symbol size where appropriate.
- sink->AddFileRange("macho_symtab", "Symbol Table",
- StrictSubstr(file_data, symtab->symoff,
- symtab->nsyms * sizeof(nlist_64)));
- sink->AddFileRange("macho_symtab", "String Table",
- StrictSubstr(file_data, symtab->stroff, symtab->strsize));
+ size_t size = cmd.is64bit ? sizeof(nlist_64) : sizeof(struct nlist);
+ sink->AddFileRange(
+ "macho_symtab", "Symbol Table",
+ StrictSubstr(cmd.file_data, symtab->symoff, symtab->nsyms * size));
+ sink->AddFileRange(
+ "macho_symtab", "String Table",
+ StrictSubstr(cmd.file_data, symtab->stroff, symtab->strsize));
}
-static void ParseDynamicSymbolTable(string_view command_data,
- string_view file_data, RangeSink* sink) {
- auto dysymtab = GetStructPointer<dysymtab_command>(command_data);
+static void ParseDynamicSymbolTable(const LoadCommand& cmd, RangeSink* sink) {
+ auto dysymtab = GetStructPointer<dysymtab_command>(cmd.command_data);
sink->AddFileRange(
"macho_dynsymtab", "Table of Contents",
- StrictSubstr(file_data, dysymtab->tocoff,
+ StrictSubstr(cmd.file_data, dysymtab->tocoff,
dysymtab->ntoc * sizeof(dylib_table_of_contents)));
sink->AddFileRange("macho_dynsymtab", "Module Table",
- StrictSubstr(file_data, dysymtab->modtaboff,
+ StrictSubstr(cmd.file_data, dysymtab->modtaboff,
dysymtab->nmodtab * sizeof(dylib_module_64)));
sink->AddFileRange(
"macho_dynsymtab", "Referenced Symbol Table",
- StrictSubstr(file_data, dysymtab->extrefsymoff,
+ StrictSubstr(cmd.file_data, dysymtab->extrefsymoff,
dysymtab->nextrefsyms * sizeof(dylib_reference)));
sink->AddFileRange("macho_dynsymtab", "Indirect Symbol Table",
- StrictSubstr(file_data, dysymtab->indirectsymoff,
+ StrictSubstr(cmd.file_data, dysymtab->indirectsymoff,
dysymtab->nindirectsyms * sizeof(uint32_t)));
sink->AddFileRange("macho_dynsymtab", "External Relocation Entries",
- StrictSubstr(file_data, dysymtab->extreloff,
+ StrictSubstr(cmd.file_data, dysymtab->extreloff,
dysymtab->nextrel * sizeof(relocation_info)));
sink->AddFileRange(
"macho_dynsymtab", "Local Relocation Entries",
- StrictSubstr(file_data, dysymtab->locreloff,
+ StrictSubstr(cmd.file_data, dysymtab->locreloff,
dysymtab->nlocrel * sizeof(struct relocation_info)));
}
-static void ParseLinkeditCommand(string_view label, string_view command_data,
- string_view file_data, RangeSink* sink) {
- auto linkedit = GetStructPointer<linkedit_data_command>(command_data);
+static void ParseLinkeditCommand(string_view label, const LoadCommand& cmd,
+ RangeSink* sink) {
+ auto linkedit = GetStructPointer<linkedit_data_command>(cmd.command_data);
sink->AddFileRange(
"macho_linkedit", label,
- StrictSubstr(file_data, linkedit->dataoff, linkedit->datasize));
+ StrictSubstr(cmd.file_data, linkedit->dataoff, linkedit->datasize));
}
-void ParseLoadCommand(uint32_t cmd, string_view command_data,
- string_view file_data, RangeSink* sink) {
- switch (cmd) {
+void ParseLoadCommand(const LoadCommand& cmd, RangeSink* sink) {
+ switch (cmd.cmd) {
case LC_SEGMENT_64:
- ParseSegment<segment_command_64, section_64>(command_data, file_data,
- sink);
+ ParseSegment<segment_command_64, section_64>(cmd, sink);
break;
case LC_SEGMENT:
- ParseSegment<segment_command, section>(command_data, file_data, sink);
+ ParseSegment<segment_command, section>(cmd, sink);
break;
case LC_DYLD_INFO:
case LC_DYLD_INFO_ONLY:
- ParseDyldInfo(command_data, file_data, sink);
+ ParseDyldInfo(cmd, sink);
break;
case LC_SYMTAB:
- ParseSymbolTable(command_data, file_data, sink);
+ ParseSymbolTable(cmd, sink);
break;
case LC_DYSYMTAB:
- ParseDynamicSymbolTable(command_data, file_data, sink);
+ ParseDynamicSymbolTable(cmd, sink);
break;
case LC_CODE_SIGNATURE:
- ParseLinkeditCommand("Code Signature", command_data, file_data, sink);
+ ParseLinkeditCommand("Code Signature", cmd, sink);
break;
case LC_SEGMENT_SPLIT_INFO:
- ParseLinkeditCommand("Segment Split Info", command_data, file_data, sink);
+ ParseLinkeditCommand("Segment Split Info", cmd, sink);
break;
case LC_FUNCTION_STARTS:
- ParseLinkeditCommand("Function Start Addresses", command_data, file_data,
- sink);
+ ParseLinkeditCommand("Function Start Addresses", cmd, sink);
break;
case LC_DATA_IN_CODE:
- ParseLinkeditCommand("Table of Non-instructions", command_data, file_data,
- sink);
+ ParseLinkeditCommand("Table of Non-instructions", cmd, sink);
break;
case LC_DYLIB_CODE_SIGN_DRS:
- ParseLinkeditCommand("Code Signing DRs", command_data, file_data, sink);
+ ParseLinkeditCommand("Code Signing DRs", cmd, sink);
break;
case LC_LINKER_OPTIMIZATION_HINT:
- ParseLinkeditCommand("Optimization Hints", command_data, file_data, sink);
+ ParseLinkeditCommand("Optimization Hints", cmd, sink);
break;
}
}
@@ -411,41 +423,58 @@
void ParseLoadCommands(RangeSink* sink) {
ForEachLoadCommand(
sink->input_file().data(), sink,
- [sink](uint32_t cmd, string_view command_data, string_view file_data) {
- ParseLoadCommand(cmd, command_data, file_data, sink);
- });
+ [sink](const LoadCommand& cmd) { ParseLoadCommand(cmd, sink); });
}
-void ParseSymbolsFromSymbolTable(string_view command_data, string_view file_data, RangeSink* sink) {
- auto symtab_cmd = GetStructPointer<symtab_command>(command_data);
+template <class NList>
+void ParseSymbolsFromSymbolTable(const LoadCommand& cmd, SymbolTable* table,
+ RangeSink* sink) {
+ auto symtab_cmd = GetStructPointer<symtab_command>(cmd.command_data);
- // TODO(haberman): use 32-bit symbol size where appropriate.
- string_view symtab = StrictSubstr(file_data, symtab_cmd->symoff,
- symtab_cmd->nsyms * sizeof(nlist_64));
+ string_view symtab = StrictSubstr(cmd.file_data, symtab_cmd->symoff,
+ symtab_cmd->nsyms * sizeof(NList));
string_view strtab =
- StrictSubstr(file_data, symtab_cmd->stroff, symtab_cmd->strsize);
+ StrictSubstr(cmd.file_data, symtab_cmd->stroff, symtab_cmd->strsize);
uint32_t nsyms = symtab_cmd->nsyms;
for (uint32_t i = 0; i < nsyms; i++) {
- auto sym = GetStructPointerAndAdvance<nlist_64>(&symtab);
+ auto sym = GetStructPointerAndAdvance<NList>(&symtab);
+ string_view sym_range(reinterpret_cast<const char*>(sym), sizeof(NList));
if (sym->n_type & N_STAB || sym->n_value == 0) {
continue;
}
string_view name = ReadNullTerminated(strtab.substr(sym->n_un.n_strx));
- sink->AddVMRange("macho_symbols", sym->n_value, RangeSink::kUnknownSize,
- ItaniumDemangle(name, sink->data_source()));
+
+ if (sink->data_source() >= DataSource::kSymbols) {
+ sink->AddVMRange("macho_symbols", sym->n_value, RangeSink::kUnknownSize,
+ ItaniumDemangle(name, sink->data_source()));
+ }
+
+ if (table) {
+ table->insert(std::make_pair(
+ name, std::make_pair(sym->n_value, RangeSink::kUnknownSize)));
+ }
+
+ // Capture the trailing NULL.
+ name = string_view(name.data(), name.size() + 1);
+ sink->AddFileRangeFor("macho_symtab_name", sym->n_value, name);
+ sink->AddFileRangeFor("macho_symtab_sym", sym->n_value, sym_range);
}
}
-void ParseSymbols(RangeSink* sink) {
+void ParseSymbols(string_view file_data, SymbolTable* symtab, RangeSink* sink) {
ForEachLoadCommand(
- sink->input_file().data(), sink,
- [sink](uint32_t cmd, string_view command_data, string_view file_data) {
- switch (cmd) {
+ file_data, sink,
+ [symtab, sink](const LoadCommand& cmd) {
+ switch (cmd.cmd) {
case LC_SYMTAB:
- ParseSymbolsFromSymbolTable(command_data, file_data, sink);
+ if (cmd.is64bit) {
+ ParseSymbolsFromSymbolTable<nlist_64>(cmd, symtab, sink);
+ } else {
+ ParseSymbolsFromSymbolTable<struct nlist>(cmd, symtab, sink);
+ }
break;
case LC_DYSYMTAB:
//ParseSymbolsFromDynamicSymbolTable(command_data, file_data, sink);
@@ -457,29 +486,114 @@
static void AddMachOFallback(RangeSink* sink) {
ForEachLoadCommand(
sink->input_file().data(), sink,
- [sink](uint32_t cmd, string_view command_data, string_view file_data) {
- switch (cmd) {
+ [sink](const LoadCommand& cmd) {
+ switch (cmd.cmd) {
case LC_SEGMENT_64:
AddSegmentAsFallback<segment_command_64, section_64>(
- command_data, file_data, sink);
+ cmd.command_data, cmd.file_data, sink);
break;
case LC_SEGMENT:
- AddSegmentAsFallback<segment_command, section>(command_data,
- file_data, sink);
+ AddSegmentAsFallback<segment_command, section>(cmd.command_data,
+ cmd.file_data, sink);
break;
}
});
sink->AddFileRange("macho_fallback", "[Unmapped]", sink->input_file().data());
}
+template <class Segment, class Section>
+void ReadDebugSectionsFromSegment(LoadCommand cmd, dwarf::File* dwarf) {
+ auto segment = GetStructPointerAndAdvance<Segment>(&cmd.command_data);
+
+ if (segment->maxprot == VM_PROT_NONE) {
+ return;
+ }
+
+ string_view segname = ArrayToStr(segment->segname, 16);
+
+ if (segname != "__DWARF") {
+ return;
+ }
+
+ uint32_t nsects = segment->nsects;
+ for (uint32_t j = 0; j < nsects; j++) {
+ auto section = GetStructPointerAndAdvance<Section>(&cmd.command_data);
+ string_view sectname = ArrayToStr(section->sectname, 16);
+
+ // filesize equals vmsize unless the section is zerofill
+ uint64_t filesize = section->size;
+ switch (section->flags & SECTION_TYPE) {
+ case S_ZEROFILL:
+ case S_GB_ZEROFILL:
+ case S_THREAD_LOCAL_ZEROFILL:
+ filesize = 0;
+ break;
+ default:
+ break;
+ }
+
+ string_view contents =
+ StrictSubstr(cmd.file_data, section->offset, filesize);
+
+ if (sectname == "__debug_aranges") {
+ dwarf->debug_aranges = contents;
+ } else if (sectname == "__debug_str") {
+ dwarf->debug_str = contents;
+ } else if (sectname == "__debug_info") {
+ dwarf->debug_info = contents;
+ } else if (sectname == "__debug_types") {
+ dwarf->debug_types = contents;
+ } else if (sectname == "__debug_abbrev") {
+ dwarf->debug_abbrev = contents;
+ } else if (sectname == "__debug_line") {
+ dwarf->debug_line = contents;
+ } else if (sectname == "__debug_loc") {
+ dwarf->debug_loc = contents;
+ } else if (sectname == "__debug_pubnames") {
+ dwarf->debug_pubnames = contents;
+ } else if (sectname == "__debug_pubtypes") {
+ dwarf->debug_pubtypes = contents;
+ } else if (sectname == "__debug_ranges") {
+ dwarf->debug_ranges = contents;
+ }
+ }
+}
+
+static void ReadDebugSectionsFromMachO(const InputFile& file, dwarf::File* dwarf) {
+ ForEachLoadCommand(file.data(), nullptr, [dwarf](const LoadCommand& cmd) {
+ switch (cmd.cmd) {
+ case LC_SEGMENT_64:
+ ReadDebugSectionsFromSegment<segment_command_64, section_64>(cmd,
+ dwarf);
+ break;
+ case LC_SEGMENT:
+ ReadDebugSectionsFromSegment<segment_command, section>(cmd, dwarf);
+ break;
+ }
+ });
+}
+
class MachOObjectFile : public ObjectFile {
public:
MachOObjectFile(std::unique_ptr<InputFile> file_data)
: ObjectFile(std::move(file_data)) {}
std::string GetBuildId() const override {
- // TODO(haberman): implement.
- return std::string();
+ std::string id;
+
+ ForEachLoadCommand(file_data().data(), nullptr, [&id](LoadCommand cmd) {
+ if (cmd.cmd == LC_UUID) {
+ auto uuid_cmd =
+ GetStructPointerAndAdvance<uuid_command>(&cmd.command_data);
+ if (!cmd.command_data.empty()) {
+ THROWF("Unexpected excess uuid data: $0", cmd.command_data.size());
+ }
+ id.resize(sizeof(uuid_cmd->uuid));
+ memcpy(&id[0], &uuid_cmd->uuid[0], sizeof(uuid_cmd->uuid));
+ }
+ });
+
+ return id;
}
void ProcessFile(const std::vector<RangeSink*>& sinks) const override {
@@ -493,10 +607,25 @@
case DataSource::kRawSymbols:
case DataSource::kShortSymbols:
case DataSource::kFullSymbols:
- ParseSymbols(sink);
+ ParseSymbols(debug_file().file_data().data(), nullptr, sink);
break;
+ case DataSource::kCompileUnits: {
+ SymbolTable symtab;
+ DualMap symbol_map;
+ NameMunger empty_munger;
+ RangeSink symbol_sink(&debug_file().file_data(),
+ sink->options(),
+ DataSource::kRawSymbols,
+ &sinks[0]->MapAtIndex(0));
+ symbol_sink.AddOutput(&symbol_map, &empty_munger);
+ ParseSymbols(debug_file().file_data().data(), &symtab, &symbol_sink);
+ dwarf::File dwarf;
+ ReadDebugSectionsFromMachO(debug_file().file_data(), &dwarf);
+ ReadDWARFCompileUnits(dwarf, symtab, symbol_map, sink);
+ ParseSymbols(sink->input_file().data(), nullptr, sink);
+ break;
+ }
case DataSource::kArchiveMembers:
- case DataSource::kCompileUnits:
case DataSource::kInlines:
default:
THROW("Mach-O doesn't support this data source");
diff --git a/src/range_map.cc b/src/range_map.cc
index 5c3658e..39184b2 100644
--- a/src/range_map.cc
+++ b/src/range_map.cc
@@ -18,6 +18,8 @@
namespace bloaty {
+constexpr uint64_t RangeMap::kUnknownSize;
+
template <class T>
uint64_t RangeMap::TranslateWithEntry(T iter, uint64_t addr) const {
assert(EntryContains(iter, addr));
diff --git a/src/range_map.h b/src/range_map.h
index d16aea0..d2c62b2 100644
--- a/src/range_map.h
+++ b/src/range_map.h
@@ -136,7 +136,7 @@
}
}
- static const uint64_t kUnknownSize = UINT64_MAX;
+ static constexpr uint64_t kUnknownSize = UINT64_MAX;
private:
friend class RangeMapTest;
diff --git a/src/webassembly.cc b/src/webassembly.cc
new file mode 100644
index 0000000..afbbd92
--- /dev/null
+++ b/src/webassembly.cc
@@ -0,0 +1,409 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bloaty.h"
+
+#include "absl/strings/substitute.h"
+
+ABSL_ATTRIBUTE_NORETURN
+static void Throw(const char *str, int line) {
+ throw bloaty::Error(str, __FILE__, line);
+}
+
+#define THROW(msg) Throw(msg, __LINE__)
+#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
+#define WARN(x) fprintf(stderr, "bloaty: %s\n", x);
+
+using absl::string_view;
+
+namespace bloaty {
+namespace wasm {
+
+template <class T>
+T ReadMemcpy(string_view* data) {
+ T ret;
+ if (data->size() < sizeof(T)) {
+ THROW("premature EOF reading fixed-length wasm data");
+ }
+ memcpy(&ret, data->data(), sizeof(T));
+ data->remove_prefix(sizeof(T));
+ return ret;
+}
+
+uint64_t ReadLEB128Internal(bool is_signed, size_t size, string_view* data) {
+ uint64_t ret = 0;
+ int shift = 0;
+ int maxshift = 70;
+ const char* ptr = data->data();
+ const char* limit = ptr + data->size();
+
+ while (ptr < limit && shift < maxshift) {
+ char byte = *(ptr++);
+ ret |= static_cast<uint64_t>(byte & 0x7f) << shift;
+ shift += 7;
+ if ((byte & 0x80) == 0) {
+ data->remove_prefix(ptr - data->data());
+ if (is_signed && shift < size && (byte & 0x40)) {
+ ret |= -(1ULL << shift);
+ }
+ return ret;
+ }
+ }
+
+ THROW("corrupt wasm data, unterminated LEB128");
+}
+
+bool ReadVarUInt1(string_view* data) {
+ return static_cast<bool>(ReadLEB128Internal(false, 1, data));
+}
+
+uint8_t ReadVarUInt7(string_view* data) {
+ return static_cast<char>(ReadLEB128Internal(false, 7, data));
+}
+
+uint32_t ReadVarUInt32(string_view* data) {
+ return static_cast<uint32_t>(ReadLEB128Internal(false, 32, data));
+}
+
+int8_t ReadVarint7(string_view* data) {
+ return static_cast<int8_t>(ReadLEB128Internal(true, 7, data));
+}
+
+string_view ReadPiece(size_t bytes, string_view* data) {
+ if(data->size() < bytes) {
+ THROW("premature EOF reading variable-length DWARF data");
+ }
+ string_view ret = data->substr(0, bytes);
+ data->remove_prefix(bytes);
+ return ret;
+}
+
+bool ReadMagic(string_view* data) {
+ const uint32_t wasm_magic = 0x6d736100;
+ uint32_t magic = ReadMemcpy<uint32_t>(data);
+
+ if (magic != wasm_magic) {
+ return false;
+ }
+
+ // TODO(haberman): do we need to fail if this is >1?
+ uint32_t version = ReadMemcpy<uint32_t>(data);
+ (void)version;
+
+ return true;
+}
+
+class Section {
+ public:
+ uint32_t id;
+ std::string name;
+ string_view data;
+ string_view contents;
+
+ static Section Read(string_view* data_param) {
+ Section ret;
+ string_view data = *data_param;
+ string_view section_data = data;
+
+ ret.id = ReadVarUInt7(&data);
+ uint32_t size = ReadVarUInt32(&data);
+ string_view next_section = data.substr(size);
+ data = data.substr(0, size);
+ size_t header_size = data.data() - section_data.data();
+ ret.data = section_data.substr(0, size + header_size);
+
+ if (ret.id == 0) {
+ uint32_t name_len = ReadVarUInt32(&data);
+ ret.name = std::string(ReadPiece(name_len, &data));
+ } else if (ret.id <= 11) {
+ ret.name = names[ret.id];
+ } else {
+ THROWF("Unknown section id: $0", ret.id);
+ }
+
+ ret.contents = data;
+ *data_param = next_section;
+ return ret;
+ }
+
+ enum Name {
+ kType = 1,
+ kImport = 2,
+ kFunction = 3,
+ kTable = 4,
+ kMemory = 5,
+ kGlobal = 6,
+ kExport = 7,
+ kStart = 8,
+ kElement = 9,
+ kCode = 10,
+ kData = 11,
+ };
+
+ static const char* names[];
+};
+
+const char* Section::names[] = {
+ "<none>", // 0
+ "Type", // 1
+ "Import", // 2
+ "Function", // 3
+ "Table", // 4
+ "Memory", // 5
+ "Global", // 6
+ "Export", // 7
+ "Start", // 8
+ "Element", // 9
+ "Code", // 10
+ "Data", // 11
+};
+
+struct ExternalKind {
+ enum Kind {
+ kFunction = 0,
+ kTable = 1,
+ kMemory = 2,
+ kGlobal = 3,
+ };
+};
+
+template <class Func>
+void ForEachSection(string_view file, Func&& section_func) {
+ string_view data = file;
+ ReadMagic(&data);
+
+ while (!data.empty()) {
+ Section section = Section::Read(&data);
+ section_func(section);
+ }
+}
+
+void ParseSections(RangeSink* sink) {
+ ForEachSection(sink->input_file().data(), [sink](const Section& section) {
+ sink->AddFileRange("wasm_sections", section.name, section.data);
+ });
+}
+
+typedef std::unordered_map<int, std::string> FuncNames;
+
+void ReadFunctionNames(const Section& section, FuncNames* names,
+ RangeSink* sink) {
+ enum class NameType {
+ kModule = 0,
+ kFunction = 1,
+ kLocal = 2,
+ };
+
+ string_view data = section.contents;
+
+ while (!data.empty()) {
+ char type = ReadVarUInt7(&data);
+ uint32_t size = ReadVarUInt32(&data);
+ string_view section = data.substr(0, size);
+ data = data.substr(size);
+
+ if (static_cast<NameType>(type) == NameType::kFunction) {
+ uint32_t count = ReadVarUInt32(§ion);
+ for (uint32_t i = 0; i < count; i++) {
+ string_view entry = section;
+ uint32_t index = ReadVarUInt32(§ion);
+ uint32_t name_len = ReadVarUInt32(§ion);
+ string_view name = ReadPiece(name_len, §ion);
+ entry = entry.substr(0, name.data() - entry.data() + name.size());
+ sink->AddFileRange("wasm_funcname", name, entry);
+ (*names)[index] = std::string(name);
+ }
+ }
+ }
+}
+
+int ReadValueType(string_view* data) {
+ return ReadVarint7(data);
+}
+
+int ReadElemType(string_view* data) {
+ return ReadVarint7(data);
+}
+
+void ReadResizableLimits(string_view* data) {
+ auto flags = ReadVarUInt1(data);
+ ReadVarUInt32(data);
+ if (flags) {
+ ReadVarUInt32(data);
+ }
+}
+
+void ReadGlobalType(string_view* data) {
+ ReadValueType(data);
+ ReadVarUInt1(data);
+}
+
+void ReadTableType(string_view* data) {
+ ReadElemType(data);
+ ReadResizableLimits(data);
+}
+
+void ReadMemoryType(string_view* data) {
+ ReadResizableLimits(data);
+}
+
+uint32_t GetNumFunctionImports(const Section& section) {
+ assert(section.id == Section::kImport);
+ string_view data = section.contents;
+
+ uint32_t count = ReadVarUInt32(&data);
+ uint32_t func_count = 0;
+
+ for (uint32_t i = 0; i < count; i++) {
+ uint32_t module_len = ReadVarUInt32(&data);
+ ReadPiece(module_len, &data);
+ uint32_t field_len = ReadVarUInt32(&data);
+ ReadPiece(field_len, &data);
+ auto kind = ReadMemcpy<uint8_t>(&data);
+
+ switch (kind) {
+ case ExternalKind::kFunction:
+ func_count++;
+ ReadVarUInt32(&data);
+ break;
+ case ExternalKind::kTable:
+ ReadTableType(&data);
+ break;
+ case ExternalKind::kMemory:
+ ReadMemoryType(&data);
+ break;
+ case ExternalKind::kGlobal:
+ ReadGlobalType(&data);
+ break;
+ default:
+ THROWF("Unrecognized import kind: $0", kind);
+ }
+ }
+
+ return func_count;
+}
+
+void ReadCodeSection(const Section& section, const FuncNames& names,
+ uint32_t num_imports, RangeSink* sink) {
+ string_view data = section.contents;
+
+ uint32_t count = ReadVarUInt32(&data);
+
+ for (uint32_t i = 0; i < count; i++) {
+ string_view func = data;
+ uint32_t size = ReadVarUInt32(&data);
+ uint32_t total_size = size + (data.data() - func.data());
+
+ func = func.substr(0, total_size);
+ data = data.substr(size);
+
+ auto iter = names.find(num_imports + i);
+
+ if (iter == names.end()) {
+ std::string name = "func[" + std::to_string(i) + "]";
+ sink->AddFileRange("wasm_function", name, func);
+ } else {
+ sink->AddFileRange("wasm_function", iter->second, func);
+ }
+ }
+}
+
+void ParseSymbols(RangeSink* sink) {
+ // First pass: read the custom naming section to get function names.
+ std::unordered_map<int, std::string> func_names;
+ uint32_t num_imports = 0;
+
+ ForEachSection(sink->input_file().data(),
+ [&func_names, sink](const Section& section) {
+ if (section.name == "name") {
+ ReadFunctionNames(section, &func_names, sink);
+ }
+ });
+
+ // Second pass: read the function/code sections.
+ ForEachSection(sink->input_file().data(),
+ [&func_names, &num_imports, sink](const Section& section) {
+ if (section.id == Section::kImport) {
+ num_imports = GetNumFunctionImports(section);
+ } else if (section.id == Section::kCode) {
+ ReadCodeSection(section, func_names, num_imports, sink);
+ }
+ });
+}
+
+void AddWebAssemblyFallback(RangeSink* sink) {
+ ForEachSection(sink->input_file().data(), [sink](const Section& section) {
+ std::string name2 =
+ std::string("[section ") + std::string(section.name) + std::string("]");
+ sink->AddFileRange("wasm_overhead", name2, section.data);
+ });
+ sink->AddFileRange("wasm_overhead", "[WASM Header]",
+ sink->input_file().data().substr(0, 8));
+}
+
+class WebAssemblyObjectFile : public ObjectFile {
+ public:
+ WebAssemblyObjectFile(std::unique_ptr<InputFile> file_data)
+ : ObjectFile(std::move(file_data)) {}
+
+ std::string GetBuildId() const override {
+ // TODO(haberman): does WebAssembly support this?
+ return std::string();
+ }
+
+ void ProcessFile(const std::vector<RangeSink*>& sinks) const override {
+ for (auto sink : sinks) {
+ switch (sink->data_source()) {
+ case DataSource::kSegments:
+ case DataSource::kSections:
+ ParseSections(sink);
+ break;
+ case DataSource::kSymbols:
+ case DataSource::kRawSymbols:
+ case DataSource::kShortSymbols:
+ case DataSource::kFullSymbols:
+ ParseSymbols(sink);
+ break;
+ case DataSource::kArchiveMembers:
+ case DataSource::kCompileUnits:
+ case DataSource::kInlines:
+ default:
+ THROW("WebAssembly doesn't support this data source");
+ }
+ AddWebAssemblyFallback(sink);
+ }
+ }
+
+ bool GetDisassemblyInfo(absl::string_view /*symbol*/,
+ DataSource /*symbol_source*/,
+ DisassemblyInfo* /*info*/) const override {
+ WARN("WebAssembly files do not support disassembly yet");
+ return false;
+ }
+};
+
+} // namespace wasm
+
+std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile(
+ std::unique_ptr<InputFile>& file) {
+ string_view data = file->data();
+ if (wasm::ReadMagic(&data)) {
+ return std::unique_ptr<ObjectFile>(
+ new wasm::WebAssemblyObjectFile(std::move(file)));
+ }
+
+ return nullptr;
+}
+
+} // namespace bloaty
diff --git a/tests/testdata/fuzz_corpus/5b69d5a1654e6f7a84538fa8af16414ea44a00f5 b/tests/testdata/fuzz_corpus/5b69d5a1654e6f7a84538fa8af16414ea44a00f5
new file mode 100644
index 0000000..791827c
--- /dev/null
+++ b/tests/testdata/fuzz_corpus/5b69d5a1654e6f7a84538fa8af16414ea44a00f5
Binary files differ
diff --git a/third_party/abseil-cpp b/third_party/abseil-cpp
index cc4bed2..bea85b5 160000
--- a/third_party/abseil-cpp
+++ b/third_party/abseil-cpp
@@ -1 +1 @@
-Subproject commit cc4bed2d74f7c8717e31f9579214ab52a9c9c610
+Subproject commit bea85b52733022294eef108a2e42d77b616ddca2
diff --git a/third_party/capstone b/third_party/capstone
index 1145eb2..bfa649f 160000
--- a/third_party/capstone
+++ b/third_party/capstone
@@ -1 +1 @@
-Subproject commit 1145eb273460151821d265c3bb9766fe59d1bd03
+Subproject commit bfa649ff1a0ed3807f94ac2d5690c5c3108cc75f
diff --git a/third_party/demumble b/third_party/demumble
index 2fb6ff4..01098ea 160000
--- a/third_party/demumble
+++ b/third_party/demumble
@@ -1 +1 @@
-Subproject commit 2fb6ff499ca3659b0d80d82373c3de419e2fefd7
+Subproject commit 01098eab821b33bd31b9778aea38565cd796aa85
diff --git a/third_party/googletest b/third_party/googletest
index 7b6561c..b95a702 160000
--- a/third_party/googletest
+++ b/third_party/googletest
@@ -1 +1 @@
-Subproject commit 7b6561c56e353100aca8458d7bc49c4e0119bae8
+Subproject commit b95a702d5f5f25e76f61ba8f9140de7a37b91b11
diff --git a/third_party/protobuf b/third_party/protobuf
index f850188..f504d95 160000
--- a/third_party/protobuf
+++ b/third_party/protobuf
@@ -1 +1 @@
-Subproject commit f850188e6e1021b4fe21ecb0aca548a54c272ce5
+Subproject commit f504d95d461319ad9fc11e0925569b84a4c65d7d
diff --git a/third_party/re2 b/third_party/re2
index 16dd885..3b4a3d5 160000
--- a/third_party/re2
+++ b/third_party/re2
@@ -1 +1 @@
-Subproject commit 16dd8856b79b3c6163a5b6da40aa45267031a79d
+Subproject commit 3b4a3d57f3a0231cfb70ad649099c3aed0499555