Merge branch 'upstream/master' into fuchsia
Change-Id: I86bf67113dbb17d7e77e97fa71125f5cb2c78a6d
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..4e34e72
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,2 @@
+
+BasedOnStyle: google
diff --git a/.gitignore b/.gitignore
index 1a6c563..67d9094 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,8 +9,10 @@
.cache
/build
/CMakeFiles
+/Testing
CMakeCache.txt
CTestTestfile.cmake
+DartConfiguration.tcl
Makefile
bloaty_package.bloaty
capstone.pc
diff --git a/.gitmodules b/.gitmodules
index c17f611..e6b7b11 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -15,7 +15,10 @@
url = https://github.com/aquynh/capstone.git
[submodule "third_party/demumble"]
path = third_party/demumble
- url = https://github.com/tengyifei/demumble.git
+ url = https://github.com/nico/demumble.git
[submodule "third_party/rustc-demangle"]
path = third_party/rustc-demangle
url = https://github.com/alexcrichton/rustc-demangle.git
+[submodule "third_party/zlib"]
+ path = third_party/zlib
+ url = https://github.com/madler/zlib
diff --git a/.travis.yml b/.travis.yml
index 70af010..be51f42 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,7 @@
language: cpp
-dist: trusty
+dist: bionic
+osx_image: xcode11
compiler:
- clang
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 92c3944..bbebe3f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,12 +3,15 @@
project (Bloaty VERSION 1.1)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_GENERATOR "Ninja")
+include(CTest)
+set_property(GLOBAL PROPERTY USE_FOLDERS ON) # Group projects in visual studio
# Options we define for users.
option(BLOATY_ENABLE_ASAN "Enable address sanitizer." OFF)
option(BLOATY_ENABLE_UBSAN "Enable undefined behavior sanitizer." OFF)
option(BLOATY_ENABLE_CMAKETARGETS "Enable installing cmake target files." ON)
option(BLOATY_ENABLE_BUILDID "Enable build id." ON)
+option(BLOATY_ENABLE_RE2 "Enable the support for regular expression functions." ON)
if(APPLE)
# When building bloaty on macOS infra builders,
@@ -24,31 +27,37 @@
if(UNIX)
find_package(PkgConfig)
-if(${PKG_CONFIG_FOUND})
-pkg_search_module(RE2 re2)
+find_package(ZLIB)
+if(BLOATY_ENABLE_RE2)
+ pkg_search_module(RE2 re2)
+endif()
pkg_search_module(CAPSTONE capstone)
# Always use bundled protobuf, to accommodate building for other systems
# without protobuf installed.
# pkg_search_module(PROTOBUF protobuf)
-if(${RE2_FOUND})
- MESSAGE(STATUS "System re2 found, using")
-else(${RE2_FOUND})
- MESSAGE(STATUS "System re2 not found, using bundled version")
-endif(${RE2_FOUND})
-if(${CAPSTONE_FOUND})
+if(BLOATY_ENABLE_RE2)
+ if(RE2_FOUND)
+ MESSAGE(STATUS "System re2 found, using")
+ else()
+ MESSAGE(STATUS "System re2 not found, using bundled version")
+ endif()
+endif()
+if(CAPSTONE_FOUND)
MESSAGE(STATUS "System capstone found, using")
-else(${CAPSTONE_FOUND})
+else()
MESSAGE(STATUS "System capstone not found, using bundled version")
-endif(${CAPSTONE_FOUND})
-if(${PROTOBUF_FOUND})
+endif()
+if(PROTOBUF_FOUND)
MESSAGE(STATUS "System protobuf found, using")
-else(${PROTOBUF_FOUND})
+else()
MESSAGE(STATUS "System protobuf not found, using bundled version")
-endif(${PROTOBUF_FOUND})
-else(${PKG_CONFIG_FOUND})
- MESSAGE(STATUS "pkg-config not found, using bundled dependencies")
-endif(${PKG_CONFIG_FOUND})
-endif(UNIX)
+endif()
+if (ZLIB_FOUND)
+ MESSAGE(STATUS "System zlib found, using")
+else()
+ MESSAGE(STATUS "System zlib not found, using bundled version")
+endif()
+endif()
# Set default build type.
if(NOT CMAKE_BUILD_TYPE)
@@ -67,41 +76,90 @@
# Add third_party libraries, disabling as much as we can of their builds.
add_definitions(-D_LIBCXXABI_FUNC_VIS=) # For Demumble.
+if(BLOATY_ENABLE_RE2)
+ add_definitions(-DUSE_RE2)
+endif()
+
+# Set MSVC runtime before including thirdparty libraries
+if(MSVC)
+ # Link also the runtime library statically so that MSVCR*.DLL is not required at runtime.
+ # https://msdn.microsoft.com/en-us/library/2kzt1wy3.aspx
+ # This is achieved by replacing msvc option /MD with /MT and /MDd with /MTd
+ # http://www.cmake.org/Wiki/CMake_FAQ#How_can_I_build_my_MSVC_application_with_a_static_runtime.3F
+ foreach(flag_var
+ CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+ CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+ if (flag_var MATCHES "/MD")
+ string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+ endif()
+ endforeach()
+endif()
+
+set(THREADS_PREFER_PTHREAD_FLAG TRUE)
+find_package(Threads REQUIRED)
if(UNIX)
- if(${RE2_FOUND})
- include_directories(${RE2_INCLUDE_DIRS})
- else(${RE2_FOUND})
- set(RE2_BUILD_TESTING OFF CACHE BOOL "enable testing for RE2" FORCE)
- add_subdirectory(third_party/re2)
- include_directories(third_party/re2)
- endif(${RE2_FOUND})
- if(${CAPSTONE_FOUND})
+ if(BLOATY_ENABLE_RE2)
+ if(RE2_FOUND)
+ include_directories(${RE2_INCLUDE_DIRS})
+ else()
+ set(RE2_BUILD_TESTING OFF CACHE BOOL "enable testing for RE2" FORCE)
+ add_subdirectory(third_party/re2)
+ include_directories(third_party/re2)
+ endif()
+ endif()
+ if(CAPSTONE_FOUND)
include_directories(${CAPSTONE_INCLUDE_DIRS})
- else(${CAPSTONE_FOUND})
+ else()
set(CAPSTONE_BUILD_SHARED OFF CACHE BOOL "Build shared library" FORCE)
set(CAPSTONE_BUILD_TESTS OFF CACHE BOOL "Build tests" FORCE)
add_subdirectory(third_party/capstone)
include_directories(third_party/capstone/include)
- endif(${CAPSTONE_FOUND})
- if(${PROTOBUF_FOUND})
+ endif()
+ if(PROTOBUF_FOUND)
include_directories(${PROTOBUF_INCLUDE_DIRS})
- else(${PROTOBUF_FOUND})
+ else()
set(protobuf_BUILD_TESTS OFF CACHE BOOL "enable tests for proto2" FORCE)
set(protobuf_BUILD_SHARED_LIBS OFF CACHE BOOL "enable shared libs for proto2" FORCE)
add_subdirectory(third_party/protobuf/cmake)
include_directories(SYSTEM third_party/protobuf/src)
- endif(${PROTOBUF_FOUND})
-else(UNIX)
+ endif()
+ if(NOT ZLIB_FOUND)
+ add_subdirectory(third_party/zlib)
+ include_directories(third_party/zlib)
+ endif()
+else()
+ if(BLOATY_ENABLE_RE2)
+ set(RE2_BUILD_TESTING OFF CACHE BOOL "enable testing for RE2" FORCE)
+ add_subdirectory(third_party/re2)
+ include_directories(third_party/re2)
+ set_property(TARGET re2 PROPERTY FOLDER "third_party")
+ endif()
+
+ set(CAPSTONE_BUILD_SHARED OFF CACHE BOOL "Build shared library" FORCE)
+ set(CAPSTONE_BUILD_TESTS OFF CACHE BOOL "Build tests" FORCE)
+
+ add_subdirectory(third_party/capstone)
+ include_directories(third_party/capstone/include)
+ set_property(TARGET capstone-static PROPERTY FOLDER "third_party")
+
set(protobuf_BUILD_TESTS OFF CACHE BOOL "enable tests for proto2" FORCE)
set(protobuf_BUILD_SHARED_LIBS OFF CACHE BOOL "enable shared libs for proto2" FORCE)
- add_subdirectory(third_party/re2)
- add_subdirectory(third_party/capstone)
add_subdirectory(third_party/protobuf/cmake)
- include_directories(third_party/re2)
- include_directories(third_party/capstone/include)
include_directories(SYSTEM third_party/protobuf/src)
-endif(UNIX)
+
+ add_subdirectory(third_party/zlib)
+ include_directories(third_party/zlib)
+ include_directories(${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib)
+ set_property(TARGET example PROPERTY FOLDER "third_party")
+ set_property(TARGET minigzip PROPERTY FOLDER "third_party")
+ set_property(TARGET zlib PROPERTY FOLDER "third_party")
+ set_property(TARGET zlibstatic PROPERTY FOLDER "third_party")
+ set_property(TARGET libprotobuf PROPERTY FOLDER "third_party")
+ set_property(TARGET libprotobuf-lite PROPERTY FOLDER "third_party")
+ set_property(TARGET libprotoc PROPERTY FOLDER "third_party")
+ set_property(TARGET protoc PROPERTY FOLDER "third_party")
+endif()
include_directories(.)
include_directories(src)
@@ -110,11 +168,15 @@
include_directories("${CMAKE_CURRENT_BINARY_DIR}/src")
# Baseline build flags.
-set(CMAKE_CXX_FLAGS "-std=c++17 -W -Wall -Wno-sign-compare")
-set(CMAKE_CXX_FLAGS_DEBUG "-g1")
-set(CMAKE_CXX_FLAGS_RELEASE "-O3")
-set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g1")
-set_source_files_properties(third_party/demumble/third_party/libcxxabi/cxa_demangle.cpp PROPERTIES COMPILE_FLAGS -Wno-implicit-fallthrough)
+if(MSVC)
+ set(CMAKE_CXX_FLAGS "/EHsc /wd4018 /D_CRT_SECURE_NO_WARNINGS /DNOMINMAX")
+else()
+ set(CMAKE_CXX_FLAGS "-std=c++17 -W -Wall -Wno-sign-compare")
+ set(CMAKE_CXX_FLAGS_DEBUG "-g1")
+ set(CMAKE_CXX_FLAGS_RELEASE "-O3")
+ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g1")
+ set_source_files_properties(third_party/demumble/third_party/libcxxabi/cxa_demangle.cpp PROPERTIES COMPILE_FLAGS -Wno-implicit-fallthrough)
+endif()
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexceptions -ffunction-sections -fdata-sections")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -ffunction-sections -fdata-sections")
@@ -129,7 +191,7 @@
elseif(UNIX)
if(BLOATY_ENABLE_BUILDID)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id")
- endif(BLOATY_ENABLE_BUILDID)
+ endif()
endif()
# When using Ninja, compiler output won't be colorized without this.
@@ -155,7 +217,7 @@
endif()
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/src)
-if(${PROTOC_FOUND})
+if(PROTOC_FOUND)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/src/bloaty.pb.cc
DEPENDS protoc ${CMAKE_CURRENT_SOURCE_DIR}/src/bloaty.proto
@@ -170,7 +232,7 @@
--cpp_out=${CMAKE_CURRENT_BINARY_DIR}/src
-I${CMAKE_CURRENT_SOURCE_DIR}/src
)
-else(${PROTOC_FOUND})
+else()
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/src/bloaty.pb.cc
COMMAND protoc ${CMAKE_CURRENT_SOURCE_DIR}/src/bloaty.proto
@@ -183,33 +245,42 @@
--cpp_out=${CMAKE_CURRENT_BINARY_DIR}/src
-I${CMAKE_CURRENT_SOURCE_DIR}/src
)
-endif(${PROTOC_FOUND})
+endif()
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/src/bloaty_package.bloaty
DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
add_library(libbloaty STATIC
src/bloaty.cc
- src/demangle.cc
+ src/bloaty.h
src/disassemble.cc
${CMAKE_CURRENT_BINARY_DIR}/src/bloaty.pb.cc
${CMAKE_CURRENT_BINARY_DIR}/src/report.pb.cc
src/dwarf.cc
+ src/dwarf_constants.h
src/elf.cc
src/link_map.cc
src/macho.cc
+ src/pe.cc
+ third_party/lief_pe/pe_structures.h
src/range_map.cc
+ src/range_map.h
+ src/re.h
+ src/util.cc
+ src/util.h
src/webassembly.cc
src/write_bloaty_report.cc
# Until Abseil has a proper CMake build system
third_party/abseil-cpp/absl/base/internal/raw_logging.cc # Grrrr...
third_party/abseil-cpp/absl/base/internal/throw_delegate.cc
+ third_party/abseil-cpp/absl/debugging/internal/demangle.cc
third_party/abseil-cpp/absl/numeric/int128.cc
third_party/abseil-cpp/absl/strings/ascii.cc
third_party/abseil-cpp/absl/strings/charconv.cc
third_party/abseil-cpp/absl/strings/escaping.cc
third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc
third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc
+ third_party/abseil-cpp/absl/strings/internal/escaping.cc
third_party/abseil-cpp/absl/strings/internal/memutil.cc
third_party/abseil-cpp/absl/strings/internal/utf8.cc
third_party/abseil-cpp/absl/strings/match.cc
@@ -222,6 +293,7 @@
# One source file, no special build system needed.
third_party/demumble/third_party/libcxxabi/cxa_demangle.cpp
)
+set_property(TARGET libbloaty PROPERTY FOLDER "bloaty")
# Teach CMake how to build rustc-demangle
# RUST_TOOLCHAIN_PREFIX is the directory containing the `cargo` executable.
@@ -272,48 +344,65 @@
if(UNIX)
set(LIBBLOATY_LIBS libbloaty)
- if(${PROTOBUF_FOUND})
- set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} ${PROTOBUF_LIBRARIES})
- else(${PROTOBUF_FOUND})
- set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} libprotoc)
- endif(${PROTOBUF_FOUND})
- if(${RE2_FOUND})
- set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} ${RE2_LIBRARIES})
- else(${RE2_FOUND})
- set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} re2)
- endif(${RE2_FOUND})
- if(${CAPSTONE_FOUND})
- set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} ${CAPSTONE_LIBRARIES})
- else(${CAPSTONE_FOUND})
- set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} capstone-static)
- endif(${CAPSTONE_FOUND})
+ if(PROTOBUF_FOUND)
+ list(APPEND LIBBLOATY_LIBS ${PROTOBUF_LIBRARIES})
+ else()
+ list(APPEND LIBBLOATY_LIBS libprotoc)
+ endif()
+ if(BLOATY_ENABLE_RE2)
+ if(RE2_FOUND)
+ list(APPEND LIBBLOATY_LIBS ${RE2_LIBRARIES})
+ else()
+ list(APPEND LIBBLOATY_LIBS re2)
+ endif()
+ endif()
+ if(CAPSTONE_FOUND)
+ list(APPEND LIBBLOATY_LIBS ${CAPSTONE_LIBRARIES})
+ else()
+ list(APPEND LIBBLOATY_LIBS capstone-static)
+ endif()
+ if(ZLIB_FOUND)
+ list(APPEND LIBBLOATY_LIBS ZLIB::ZLIB)
+ else()
+ list(APPEND LIBBLOATY_LIBS zlib)
+ endif()
+
set(LIBBLOATY_LIBS ${LIBBLOATY_LIBS} "${CMAKE_CURRENT_SOURCE_DIR}/third_party/rustc-demangle/target/release/librustc_demangle.a" dl)
-else(UNIX)
- set(LIBBLOATY_LIBS libbloaty libprotoc re2 capstone-static "${CMAKE_CURRENT_SOURCE_DIR}/third_party/rustc-demangle/target/release/librustc_demangle.a" dl)
-endif(UNIX)
+
+else()
+ set(LIBBLOATY_LIBS libbloaty libprotoc capstone-static "${CMAKE_CURRENT_SOURCE_DIR}/third_party/rustc-demangle/target/release/librustc_demangle.a" dl)
+ if(BLOATY_ENABLE_RE2)
+ list(APPEND LIBBLOATY_LIBS re2)
+ endif()
+ list(APPEND LIBBLOATY_LIBS zlibstatic)
+endif()
if(UNIX)
- if(${RE2_FOUND})
- link_directories(${RE2_LIBRARY_DIRS})
- endif(${RE2_FOUND})
- if(${CAPSTONE_FOUND})
+ if(BLOATY_ENABLE_RE2)
+ if(RE2_FOUND)
+ link_directories(${RE2_LIBRARY_DIRS})
+ endif()
+ endif()
+ if(CAPSTONE_FOUND)
link_directories(${CAPSTONE_LIBRARY_DIRS})
- endif(${CAPSTONE_FOUND})
- if(${PROTOBUF_FOUND})
+ endif()
+ if(PROTOBUF_FOUND)
link_directories(${PROTOBUF_LIBRARY_DIRS})
- endif(${PROTOBUF_FOUND})
-endif(UNIX)
+ endif()
+endif()
+
+list(APPEND LIBBLOATY_LIBS Threads::Threads)
if(DEFINED ENV{LIB_FUZZING_ENGINE})
message("LIB_FUZZING_ENGINE set, building fuzz_target instead of Bloaty")
add_executable(fuzz_target tests/fuzz_target.cc)
- target_link_libraries(fuzz_target "${LIBBLOATY_LIBS}" "${CMAKE_THREAD_LIBS_INIT}" $ENV{LIB_FUZZING_ENGINE})
+ target_link_libraries(fuzz_target ${LIBBLOATY_LIBS} $ENV{LIB_FUZZING_ENGINE})
else()
include(CheckIPOSupported)
check_ipo_supported(RESULT ipo_supported OUTPUT ipo_error)
add_executable(bloaty src/main.cc)
- target_link_libraries(bloaty "${LIBBLOATY_LIBS}")
+ target_link_libraries(bloaty ${LIBBLOATY_LIBS})
if(ipo_supported)
message(STATUS "IPO / LTO enabled")
@@ -332,18 +421,20 @@
target_link_libraries(bloaty "${CMAKE_THREAD_LIBS_INIT}")
endif()
+ set_property(TARGET bloaty PROPERTY FOLDER "bloaty")
+
if(BLOATY_ENABLE_CMAKETARGETS)
install(
TARGETS bloaty
EXPORT ${PROJECT_NAME}Targets
RUNTIME DESTINATION bin
)
- else(BLOATY_ENABLE_CMAKETARGETS)
+ else()
install(
TARGETS bloaty
RUNTIME DESTINATION bin
)
- endif(BLOATY_ENABLE_CMAKETARGETS)
+ endif()
if (IS_DIRECTORY "${PROJECT_SOURCE_DIR}/tests")
enable_testing()
@@ -357,6 +448,7 @@
set(TEST_TARGETS
bloaty_test
bloaty_report_test
+ bloaty_test_pe
bloaty_misc_test
link_map_test
range_map_test
@@ -364,11 +456,17 @@
foreach(target ${TEST_TARGETS})
add_executable(${target} tests/${target}.cc)
- target_link_libraries(${target} "${LIBBLOATY_LIBS}" gtest_main gmock "${CMAKE_THREAD_LIBS_INIT}")
+ target_link_libraries(${target} ${LIBBLOATY_LIBS} gtest_main gmock)
+ set_property(TARGET ${target} PROPERTY FOLDER "tests")
endforeach(target)
add_executable(fuzz_test tests/fuzz_target.cc tests/fuzz_driver.cc)
- target_link_libraries(fuzz_test "${LIBBLOATY_LIBS}" "${CMAKE_THREAD_LIBS_INIT}")
+ target_link_libraries(fuzz_test ${LIBBLOATY_LIBS})
+ set_property(TARGET fuzz_test PROPERTY FOLDER "tests")
+
+ foreach(testlib gmock gmock_main gtest gtest_main)
+ set_property(TARGET ${testlib} PROPERTY FOLDER "tests/libs")
+ endforeach(testlib)
file(GLOB fuzz_corpus tests/testdata/fuzz_corpus/*)
@@ -377,6 +475,8 @@
add_test(NAME bloaty_test_x86-64 COMMAND bloaty_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/linux-x86_64)
add_test(NAME bloaty_test_x86 COMMAND bloaty_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/linux-x86)
add_test(NAME bloaty_report_test_x86-64 COMMAND bloaty_report_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/linux-x86_64)
+ add_test(NAME bloaty_test_pe_x64 COMMAND bloaty_test_pe WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/PE/x64)
+ add_test(NAME bloaty_test_pe_x86 COMMAND bloaty_test_pe WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/PE/x86)
add_test(NAME bloaty_misc_test COMMAND bloaty_misc_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/misc)
add_test(NAME fuzz_test COMMAND fuzz_test ${fuzz_corpus} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests/testdata/fuzz_corpus)
endif()
@@ -384,5 +484,5 @@
if(BLOATY_ENABLE_CMAKETARGETS)
install(EXPORT ${PROJECT_NAME}Targets NAMESPACE ${PROJECT_NAME} DESTINATION lib/${PROJECT_NAME})
- endif(BLOATY_ENABLE_CMAKETARGETS)
+ endif()
endif()
diff --git a/README.md b/README.md
index f655fde..096d7a8 100644
--- a/README.md
+++ b/README.md
@@ -34,13 +34,27 @@
## Building Bloaty
-Building Bloaty requires CMake and ``protoc``, the protobuf compiler. On Ubuntu, install them with:
+Building Bloaty requires CMake. On Ubuntu, install this with:
```
-$ sudo apt install cmake protobuf-compiler
+$ sudo apt install cmake
```
-Bloaty bundles ``libprotobuf``, ``re2``, ``capstone``, and ``pkg-config`` as Git submodules, but it will prefer the system's versions of those dependencies if available. All other dependencies are included as Git submodules. To build, run:
+and on macOS, this can be installed using homebrew with:
+```
+$ brew install cmake
+```
+
+
+Bloaty bundles ``libprotobuf``, ``re2``, ``capstone``, and ``pkg-config`` as Git submodules, and uses ``protoc`` build from libprotobuf, but it will prefer the system's versions of those dependencies if available. All other dependencies are included as Git submodules.
+
+If the Git repository hasn't been cloned with the `--recursive`, the submodules can be checked out with:
+
+```
+$ git submodule update --init --recursive
+```
+
+To build, run:
```
$ cmake .
diff --git a/appveyor.yml b/appveyor.yml
new file mode 100644
index 0000000..933bc45
--- /dev/null
+++ b/appveyor.yml
@@ -0,0 +1,8 @@
+version: 1.0.{build}
+image: Visual Studio 2019
+build_script:
+ - cmd: mkdir build && cd build
+ - cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo
+ - cmake --build . --config RelWithDebInfo
+test_script:
+ - ctest -C RelWithDebInfo
diff --git a/src/bloaty.cc b/src/bloaty.cc
index fb28cd7..ac3628b 100644
--- a/src/bloaty.cc
+++ b/src/bloaty.cc
@@ -34,12 +34,18 @@
#include <math.h>
#include <signal.h>
#include <stdlib.h>
+#if !defined(_MSC_VER)
#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
+#else
+#include <Windows.h>
+#endif
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <zlib.h>
+#include "absl/debugging/internal/demangle.h"
#include "absl/memory/memory.h"
#include "absl/strings/numbers.h"
#include "absl/strings/string_view.h"
@@ -47,35 +53,15 @@
#include "absl/strings/substitute.h"
#include "google/protobuf/io/zero_copy_stream_impl.h"
#include "google/protobuf/text_format.h"
-#include "re2/re2.h"
#include "bloaty.h"
#include "bloaty.pb.h"
#include "demangle.h"
#include "rustc_demangle.h"
+#include "re.h"
using absl::string_view;
-#define STRINGIFY(x) #x
-#define TOSTRING(x) STRINGIFY(x)
-#define CHECK_SYSCALL(call) \
- if (call < 0) { \
- perror(#call " " __FILE__ ":" TOSTRING(__LINE__)); \
- exit(1); \
- }
-
-ABSL_ATTRIBUTE_NORETURN
-static void Throw(const char *str, int line) {
- throw bloaty::Error(str, __FILE__, line);
-}
-
-#define THROW(msg) Throw(msg, __LINE__)
-#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
-#define WARN(...) \
- if (verbose_level > 0) { \
- printf("WARNING: %s\n", absl::Substitute(__VA_ARGS__).c_str()); \
- }
-
namespace bloaty {
// Use a global since we would have to plumb it through so many call-stacks
@@ -134,18 +120,15 @@
}
}
-template <typename A, typename B>
-void CheckedAdd(A* accum, B val) {
- // We've only implemented the portable version for a subset of possible types.
- static_assert(std::is_signed<A>::value, "requires signed A");
- static_assert(sizeof(A) == sizeof(B), "requires integers of the same type");
+void CheckedAdd(int64_t* accum, int64_t val) {
#if ABSL_HAVE_BUILTIN(__builtin_add_overflow)
if (__builtin_add_overflow(*accum, val, accum)) {
THROW("integer overflow");
}
#else
- bool safe = *accum < 0 ? (val >= std::numeric_limits<B>::max() - *accum)
- : (val <= std::numeric_limits<B>::max() - *accum);
+ bool safe = *accum < 0
+ ? (val >= std::numeric_limits<int64_t>::max() - *accum)
+ : (val <= std::numeric_limits<int64_t>::max() - *accum);
if (!safe) {
THROW("integer overflow");
}
@@ -184,6 +167,7 @@
// Convenience code for iterating over lines of a pipe.
+#if !defined(_MSC_VER)
LineReader::LineReader(LineReader&& other) {
Close();
@@ -237,6 +221,7 @@
return LineReader(pipe, true);
}
+#endif
namespace {
@@ -296,7 +281,8 @@
if (source == DataSource::kShortSymbols) {
char demangled[4096];
- if (::Demangle(demangle_from.data(), demangled, sizeof(demangled))) {
+ if (absl::debugging_internal::Demangle(demangle_from.data(), demangled,
+ sizeof(demangled))) {
return std::string(demangled);
} else {
// TODO(yifeit): Certain symbols have dots (".") in them. Those are not allowed.
@@ -305,7 +291,7 @@
if (pos != absl::string_view::npos) {
demangle_from.remove_suffix(demangle_from.length() - pos);
std::string shortened(demangle_from);
- if (::Demangle(shortened.c_str(), demangled, sizeof(demangled))) {
+ if (absl::debugging_internal::Demangle(shortened.c_str(), demangled, sizeof(demangled))) {
return std::string(demangled);
}
}
@@ -345,8 +331,8 @@
// NameMunger //////////////////////////////////////////////////////////////////
void NameMunger::AddRegex(const std::string& regex, const std::string& replacement) {
- auto re2 = absl::make_unique<RE2>(regex);
- regexes_.push_back(std::make_pair(std::move(re2), replacement));
+ auto reg = absl::make_unique<ReImpl>(regex);
+ regexes_.push_back(std::make_pair(std::move(reg), replacement));
}
std::string NameMunger::Munge(string_view name) const {
@@ -354,7 +340,7 @@
std::string ret(name);
for (const auto& pair : regexes_) {
- if (RE2::Extract(name_str, *pair.first, pair.second, &ret)) {
+ if (ReImpl::Extract(name_str, *pair.first, pair.second, &ret)) {
return ret;
}
}
@@ -392,6 +378,8 @@
class Rollup {
public:
Rollup() {}
+ Rollup(const Rollup&) = delete;
+ Rollup& operator=(const Rollup&) = delete;
Rollup(Rollup&& other) = default;
Rollup& operator=(Rollup&& other) = default;
@@ -421,7 +409,7 @@
CreateRows(row, base, options, true);
}
- void SetFilterRegex(const RE2* regex) {
+ void SetFilterRegex(const ReImpl* regex) {
filter_regex_ = regex;
}
@@ -457,14 +445,12 @@
int64_t filtered_file_total() const { return filtered_file_total_; }
private:
- BLOATY_DISALLOW_COPY_AND_ASSIGN(Rollup);
-
int64_t vm_total_ = 0;
int64_t file_total_ = 0;
int64_t filtered_vm_total_ = 0;
int64_t filtered_file_total_ = 0;
- const RE2* filter_regex_ = nullptr;
+ const ReImpl* filter_regex_ = nullptr;
// Putting Rollup by value seems to work on some compilers/libs but not
// others.
@@ -490,7 +476,7 @@
bool any_matched = false;
for (const auto& name : names) {
- if (RE2::PartialMatch(name, *filter_regex_)) {
+ if (ReImpl::PartialMatch(name, *filter_regex_)) {
any_matched = true;
break;
}
@@ -977,15 +963,16 @@
// MmapInputFile ///////////////////////////////////////////////////////////////
+#if !defined(_MSC_VER)
class MmapInputFile : public InputFile {
public:
MmapInputFile(const std::string& filename);
+ MmapInputFile(const MmapInputFile&) = delete;
+ MmapInputFile& operator=(const MmapInputFile&) = delete;
~MmapInputFile() override;
-
- private:
- BLOATY_DISALLOW_COPY_AND_ASSIGN(MmapInputFile);
};
+
class FileDescriptor {
public:
FileDescriptor(int fd) : fd_(fd) {}
@@ -1038,15 +1025,86 @@
return absl::make_unique<MmapInputFile>(filename);
}
+#else // !_MSC_VER
+
+// MmapInputFile ///////////////////////////////////////////////////////////////
+
+class Win32MMapInputFile : public InputFile {
+ public:
+ Win32MMapInputFile(const std::string& filename);
+ Win32MMapInputFile(const Win32MMapInputFile&) = delete;
+ Win32MMapInputFile& operator=(const Win32MMapInputFile&) = delete;
+ ~Win32MMapInputFile() override;
+};
+
+class Win32Handle {
+ public:
+ Win32Handle(HANDLE h) : h_(h) {}
+
+ ~Win32Handle() {
+ if (h_ && h_ != INVALID_HANDLE_VALUE && !CloseHandle(h_)) {
+ fprintf(stderr, "bloaty: error calling CloseHandle(): %d\n",
+ GetLastError());
+ }
+ }
+
+ HANDLE h() { return h_; }
+
+ private:
+ HANDLE h_;
+};
+
+Win32MMapInputFile::Win32MMapInputFile(const std::string& filename)
+ : InputFile(filename) {
+ Win32Handle fd(::CreateFileA(filename.c_str(), FILE_GENERIC_READ,
+ FILE_SHARE_READ, NULL, OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL, NULL));
+ LARGE_INTEGER li = {};
+ const char* map;
+
+ if (fd.h() == INVALID_HANDLE_VALUE) {
+ THROWF("couldn't open file '$0': $1", filename, ::GetLastError());
+ }
+
+ if (!::GetFileSizeEx(fd.h(), &li)) {
+ THROWF("couldn't stat file '$0': $1", filename, ::GetLastError());
+ }
+
+ Win32Handle mapfd(
+ ::CreateFileMappingA(fd.h(), NULL, PAGE_READONLY, 0, 0, nullptr));
+ if (!mapfd.h()) {
+ THROWF("couldn't create file mapping '$0': $1", filename, ::GetLastError());
+ }
+
+ map = static_cast<char*>(::MapViewOfFile(mapfd.h(), FILE_MAP_READ, 0, 0, 0));
+ if (!map) {
+ THROWF("couldn't MapViewOfFile file '$0': $1", filename, ::GetLastError());
+ }
+
+ data_ = string_view(map, li.QuadPart);
+}
+
+Win32MMapInputFile::~Win32MMapInputFile() {
+ if (data_.data() != nullptr && !::UnmapViewOfFile(data_.data())) {
+ fprintf(stderr, "bloaty: error calling UnmapViewOfFile(): %d\n",
+ ::GetLastError());
+ }
+}
+
+std::unique_ptr<InputFile> MmapInputFileFactory::OpenFile(
+ const std::string& filename) const {
+ return absl::make_unique<Win32MMapInputFile>(filename);
+}
+
+#endif
// RangeSink ///////////////////////////////////////////////////////////////////
-RangeSink::RangeSink(const InputFile* file, const Options& options,
- DataSource data_source, const DualMap* translator)
- : file_(file),
- options_(options),
- data_source_(data_source),
- translator_(translator) {}
+RangeSink::RangeSink(const InputFile *file, const Options &options,
+ DataSource data_source, const DualMap *translator,
+ google::protobuf::Arena *arena)
+ : file_(file), options_(options), data_source_(data_source),
+ translator_(translator), arena_(arena) {}
RangeSink::~RangeSink() {}
@@ -1335,6 +1393,22 @@
return file_->data().substr(translated);
}
+absl::string_view RangeSink::ZlibDecompress(absl::string_view data,
+ uint64_t uncompressed_size) {
+ if (!arena_) {
+ THROW("This range sink isn't prepared to zlib decompress.");
+ }
+ unsigned char *dbuf =
+ arena_->google::protobuf::Arena::CreateArray<unsigned char>(
+ arena_, uncompressed_size);
+ uLongf zliblen = uncompressed_size;
+ if (uncompress(dbuf, &zliblen, (unsigned char*)(data.data()), data.size()) != Z_OK) {
+ THROW("Error decompressing debug info");
+ }
+ string_view sv(reinterpret_cast<char *>(dbuf), zliblen);
+ return sv;
+}
+
// ThreadSafeIterIndex /////////////////////////////////////////////////////////
class ThreadSafeIterIndex {
@@ -1395,6 +1469,8 @@
class Bloaty {
public:
Bloaty(const InputFileFactory& factory, const Options& options);
+ Bloaty(const Bloaty&) = delete;
+ Bloaty& operator=(const Bloaty&) = delete;
void AddFilename(const std::string& filename, bool base_file);
void AddDebugFilename(const std::string& filename);
@@ -1411,8 +1487,6 @@
RollupOutput* output);
private:
- BLOATY_DISALLOW_COPY_AND_ASSIGN(Bloaty);
-
template <size_t T>
void AddBuiltInSources(const DataSourceDefinition (&sources)[T],
const Options& options) {
@@ -1472,10 +1546,13 @@
// "foo" -> "some/path/foo.map"
std::map<std::string, std::string> link_map_files_;
+ // For allocating memory, like to decompress compressed sections.
+ std::unique_ptr<google::protobuf::Arena> arena_;
};
-Bloaty::Bloaty(const InputFileFactory& factory, const Options& options)
- : file_factory_(factory), options_(options) {
+Bloaty::Bloaty(const InputFileFactory &factory, const Options &options)
+ : file_factory_(factory), options_(options),
+ arena_(std::make_unique<google::protobuf::Arena>()) {
AddBuiltInSources(data_sources, options);
}
@@ -1510,6 +1587,10 @@
}
if (!object_file.get()) {
+ object_file = TryOpenPEFile(file);
+ }
+
+ if (!object_file.get()) {
THROWF("unknown file type for file '$0'", filename.c_str());
}
@@ -1553,7 +1634,7 @@
auto iter = all_known_sources_.find(source.base_data_source());
if (iter == all_known_sources_.end()) {
- THROWF("custom data source '$0': no such base source '$1'", source.name(),
+ THROWF("custom data source '$0': no such base source '$1'.\nTry --list-sources to see valid sources.", source.name(),
source.base_data_source());
} else if (!iter->second->munger->IsEmpty()) {
THROWF("custom data source '$0' tries to depend on custom data source '$1'",
@@ -1572,7 +1653,7 @@
source_names_.emplace_back(name);
auto it = all_known_sources_.find(name);
if (it == all_known_sources_.end()) {
- THROWF("no such data source: $0", name);
+ THROWF("no such data source: $0.\nTry --list-sources to see valid sources.", name);
}
sources_.emplace_back(it->second.get());
@@ -1677,8 +1758,8 @@
std::vector<RangeSink*> filename_sink_ptrs;
// Base map always goes first.
- sinks.push_back(absl::make_unique<RangeSink>(&file->file_data(), options_,
- DataSource::kSegments, nullptr));
+ sinks.push_back(absl::make_unique<RangeSink>(
+ &file->file_data(), options_, DataSource::kSegments, nullptr, nullptr));
NameMunger empty_munger;
sinks.back()->AddOutput(maps.base_map(), &empty_munger);
sink_ptrs.push_back(sinks.back().get());
@@ -1686,7 +1767,7 @@
for (auto source : sources_) {
sinks.push_back(absl::make_unique<RangeSink>(&file->file_data(), options_,
source->effective_source,
- maps.base_map()));
+ maps.base_map(), arena_.get()));
sinks.back()->AddOutput(maps.AppendMap(), source->munger.get());
// We handle the kInputFiles data source internally, without handing it off
// to the file format implementation. This seems slightly simpler, since
@@ -1784,9 +1865,9 @@
std::vector<std::thread> threads(num_threads);
ThreadSafeIterIndex index(filenames.size());
- std::unique_ptr<RE2> regex = nullptr;
+ std::unique_ptr<ReImpl> regex = nullptr;
if (options_.has_source_filter()) {
- regex = absl::make_unique<RE2>(options_.source_filter());
+ regex = absl::make_unique<ReImpl>(options_.source_filter());
}
for (int i = 0; i < num_threads; i++) {
@@ -2266,7 +2347,7 @@
}
if (options.has_source_filter()) {
- RE2 re(options.source_filter());
+ ReImpl re(options.source_filter());
if (!re.ok()) {
THROW("invalid regex for source_filter");
}
diff --git a/src/bloaty.h b/src/bloaty.h
index a6722a3..ecedc73 100644
--- a/src/bloaty.h
+++ b/src/bloaty.h
@@ -34,26 +34,11 @@
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
#include "capstone/capstone.h"
-#include "re2/re2.h"
#include "bloaty.pb.h"
#include "range_map.h"
-
-#define BLOATY_DISALLOW_COPY_AND_ASSIGN(class_name) \
- class_name(const class_name&) = delete; \
- void operator=(const class_name&) = delete;
-
-#define BLOATY_UNREACHABLE() do { \
- assert(false); \
- __builtin_unreachable(); \
-} while (0)
-
-#ifdef NDEBUG
-// Prevent "unused variable" warnings.
-#define BLOATY_ASSERT(expr) do {} while (false && (expr))
-#else
-#define BLOATY_ASSERT(expr) assert(expr)
-#endif
+#include "re.h"
+#include "util.h"
namespace bloaty {
@@ -83,31 +68,17 @@
kShortSymbols
};
-class Error : public std::runtime_error {
- public:
- Error(const char* msg, const char* file, int line)
- : std::runtime_error(msg), file_(file), line_(line) {}
-
- // TODO(haberman): add these to Bloaty's error message when verbose is
- // enabled.
- const char* file() const { return file_; }
- int line() const { return line_; }
-
- private:
- const char* file_;
- int line_;
-};
-
class InputFile {
public:
InputFile(const std::string& filename) : filename_(filename) {}
+ InputFile(const InputFile&) = delete;
+ InputFile& operator=(const InputFile&) = delete;
virtual ~InputFile() {}
const std::string& filename() const { return filename_; }
absl::string_view data() const { return data_; }
private:
- BLOATY_DISALLOW_COPY_AND_ASSIGN(InputFile);
const std::string filename_;
protected:
@@ -138,26 +109,29 @@
// A RangeSink allows data sources to assign labels to ranges of VM address
// space and/or file offsets.
class RangeSink {
- public:
- RangeSink(const InputFile* file, const Options& options,
- DataSource data_source, const DualMap* translator);
+public:
+ RangeSink(const InputFile *file, const Options &options,
+ DataSource data_source, const DualMap *translator,
+ google::protobuf::Arena *arena);
+ RangeSink(const RangeSink &) = delete;
+ RangeSink &operator=(const RangeSink &) = delete;
~RangeSink();
- const Options& options() const { return options_; }
+ const Options &options() const { return options_; }
- void AddOutput(DualMap* map, const NameMunger* munger);
+ void AddOutput(DualMap *map, const NameMunger *munger);
DataSource data_source() const { return data_source_; }
- const InputFile& input_file() const { return *file_; }
+ const InputFile &input_file() const { return *file_; }
bool IsBaseMap() const { return translator_ == nullptr; }
// If vmsize or filesize is zero, this mapping is presumed not to exist in
// that domain. For example, .bss mappings don't exist in the file, and
// .debug_* mappings don't exist in memory.
- void AddRange(const char* analyzer, absl::string_view name, uint64_t vmaddr,
+ void AddRange(const char *analyzer, absl::string_view name, uint64_t vmaddr,
uint64_t vmsize, uint64_t fileoff, uint64_t filesize);
- void AddRange(const char* analyzer, absl::string_view name, uint64_t vmaddr,
+ void AddRange(const char *analyzer, absl::string_view name, uint64_t vmaddr,
uint64_t vmsize, absl::string_view file_range) {
AddRange(analyzer, name, vmaddr, vmsize,
file_range.data() - file_->data().data(), file_range.size());
@@ -229,11 +203,15 @@
uint64_t TranslateFileToVM(const char* ptr);
absl::string_view TranslateVMToFile(uint64_t address);
+ // Decompresses zlib-formatted data and returns the decompressed data.
+ // Since the decompressed data is not actually part of the file, any
+ // Add*Range() calls to this region will be no-ops.
+ absl::string_view ZlibDecompress(absl::string_view contents,
+ uint64_t uncompressed_size);
+
static constexpr uint64_t kUnknownSize = RangeMap::kUnknownSize;
private:
- BLOATY_DISALLOW_COPY_AND_ASSIGN(RangeSink);
-
bool FileContainsPointer(const void* ptr) const {
absl::string_view file_data = file_->data();
return ptr >= file_data.data() && ptr < file_data.data() + file_data.size();
@@ -249,9 +227,9 @@
DataSource data_source_;
const DualMap* translator_;
std::vector<std::pair<DualMap*, const NameMunger*>> outputs_;
+ google::protobuf::Arena *arena_;
};
-
// NameMunger //////////////////////////////////////////////////////////////////
// Use to transform input names according to the user's configuration.
@@ -259,18 +237,18 @@
class NameMunger {
public:
NameMunger() {}
+ NameMunger(const NameMunger&) = delete;
+ NameMunger& operator=(const NameMunger&) = delete;
// Adds a regex that will be applied to all names. All regexes will be
// applied in sequence.
void AddRegex(const std::string& regex, const std::string& replacement);
-
std::string Munge(absl::string_view name) const;
bool IsEmpty() const { return regexes_.empty(); }
private:
- BLOATY_DISALLOW_COPY_AND_ASSIGN(NameMunger);
- std::vector<std::pair<std::unique_ptr<RE2>, std::string>> regexes_;
+ std::vector<std::pair<std::unique_ptr<ReImpl>, std::string>> regexes_;
};
typedef std::map<absl::string_view, std::pair<uint64_t, uint64_t>> SymbolTable;
@@ -319,20 +297,30 @@
std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file, std::optional<std::string> link_map_file);
std::unique_ptr<ObjectFile> TryOpenMachOFile(std::unique_ptr<InputFile>& file);
std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile(std::unique_ptr<InputFile>& file);
+std::unique_ptr<ObjectFile> TryOpenPEFile(std::unique_ptr<InputFile>& file);
namespace dwarf {
struct File {
- absl::string_view debug_info;
- absl::string_view debug_types;
- absl::string_view debug_str;
absl::string_view debug_abbrev;
+ absl::string_view debug_addr;
absl::string_view debug_aranges;
+ absl::string_view debug_info;
absl::string_view debug_line;
absl::string_view debug_loc;
absl::string_view debug_pubnames;
absl::string_view debug_pubtypes;
absl::string_view debug_ranges;
+ absl::string_view debug_rnglists;
+ absl::string_view debug_str;
+ absl::string_view debug_str_offsets;
+ absl::string_view debug_types;
+
+ absl::string_view* GetFieldByName(absl::string_view name);
+ void SetFieldByName(absl::string_view name, absl::string_view contents) {
+ absl::string_view *member = GetFieldByName(name);
+ if (member) *member = contents;
+ }
};
} // namespace dwarf
@@ -360,6 +348,9 @@
public:
LineReader(FILE* file, bool pclose) : file_(file), pclose_(pclose) {}
LineReader(LineReader&& other);
+ LineReader(const LineReader&) = delete;
+ LineReader& operator=(const LineReader&);
+
~LineReader() { Close(); }
@@ -372,8 +363,6 @@
bool eof() { return eof_; }
private:
- BLOATY_DISALLOW_COPY_AND_ASSIGN(LineReader);
-
void Close();
FILE* file_;
@@ -496,6 +485,8 @@
struct RollupOutput {
public:
RollupOutput() : toplevel_row_("TOTAL") {}
+ RollupOutput(const RollupOutput&) = delete;
+ RollupOutput& operator=(const RollupOutput&) = delete;
void AddDataSourceName(absl::string_view name) {
source_names_.emplace_back(std::string(name));
@@ -522,7 +513,6 @@
BLOATY_UNREACHABLE();
}
}
-
if (!disassembly_.empty()) {
*out << disassembly_;
}
@@ -539,7 +529,6 @@
bool diff_mode() const { return diff_mode_; }
private:
- BLOATY_DISALLOW_COPY_AND_ASSIGN(RollupOutput);
friend class Rollup;
std::vector<std::string> source_names_;
@@ -573,65 +562,6 @@
bool BloatyMain(const Options& options, const InputFileFactory& file_factory,
RollupOutput* output, std::string* error);
-// Endianness utilities ////////////////////////////////////////////////////////
-
-inline bool IsLittleEndian() {
- int x = 1;
- return *(char*)&x == 1;
-}
-
-// It seems like it would be simpler to just specialize on:
-// template <class T> T ByteSwap(T val);
-// template <> T ByteSwap<uint16>(T val) { /* ... */ }
-// template <> T ByteSwap<uint32>(T val) { /* ... */ }
-// // etc...
-//
-// But this doesn't work out so well. Consider that on LP32, uint32 could
-// be either "unsigned int" or "unsigned long". Specializing ByteSwap<uint32>
-// will leave one of those two unspecialized. C++ is annoying in this regard.
-// Our approach here handles both cases with just one specialization.
-template <class T, size_t size> struct ByteSwapper { T operator()(T val); };
-
-template <class T>
-struct ByteSwapper<T, 1> {
- T operator()(T val) { return val; }
-};
-
-template <class T>
-struct ByteSwapper<T, 2> {
- T operator()(T val) {
- return ((val & 0xff) << 8) |
- ((val & 0xff00) >> 8);
- }
-};
-
-template <class T>
-struct ByteSwapper<T, 4> {
- T operator()(T val) {
- return ((val & 0xff) << 24) |
- ((val & 0xff00) << 8) |
- ((val & 0xff0000ULL) >> 8) |
- ((val & 0xff000000ULL) >> 24);
- }
-};
-
-template <class T>
-struct ByteSwapper<T, 8> {
- T operator()(T val) {
- return ((val & 0xff) << 56) |
- ((val & 0xff00) << 40) |
- ((val & 0xff0000) << 24) |
- ((val & 0xff000000) << 8) |
- ((val & 0xff00000000ULL) >> 8) |
- ((val & 0xff0000000000ULL) >> 24) |
- ((val & 0xff000000000000ULL) >> 40) |
- ((val & 0xff00000000000000ULL) >> 56);
- }
-};
-
-template <class T>
-T ByteSwap(T val) { return ByteSwapper<T, sizeof(T)>()(val); }
-
} // namespace bloaty
#endif
diff --git a/src/demangle.h b/src/demangle.h
index 806b7fd..823b0da 100644
--- a/src/demangle.h
+++ b/src/demangle.h
@@ -11,12 +11,10 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
-
// Author: satorux@google.com (Satoru Takabayashi)
//
// An async-signal-safe and thread-safe demangler for Itanium C++ ABI
// (aka G++ V3 ABI).
-
// The demangler is implemented to be used in async signal handlers to
// symbolize stack traces. We cannot use libstdc++'s
// abi::__cxa_demangle() in such signal handlers since it's not async
@@ -51,13 +49,10 @@
// Note: we might want to write demanglers for ABIs other than Itanium
// C++ ABI in the future.
//
-
#ifndef BASE_DEMANGLE_H_
#define BASE_DEMANGLE_H_
-
// Demangle "mangled". On success, return true and write the
// demangled symbol name to "out". Otherwise, return false.
// "out" is modified even if demangling is unsuccessful.
bool Demangle(const char *mangled, char *out, int out_size);
-
#endif // BASE_DEMANGLE_H_
diff --git a/src/disassemble.cc b/src/disassemble.cc
index 3de593c..6e9cdc4 100644
--- a/src/disassemble.cc
+++ b/src/disassemble.cc
@@ -14,20 +14,15 @@
#include <string>
-#include "bloaty.h"
#include "absl/strings/ascii.h"
#include "absl/strings/escaping.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/strings/substitute.h"
+#include "bloaty.h"
#include "capstone/capstone.h"
-
-static void Throw(const char *str, int line) {
- throw bloaty::Error(str, __FILE__, line);
-}
-
-#define THROW(msg) Throw(msg, __LINE__)
-#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
+#include "re.h"
+#include "util.h"
using absl::string_view;
@@ -186,20 +181,20 @@
if (info.arch == CS_ARCH_X86) {
if (in->id == X86_INS_LEA) {
- RE2::GlobalReplace(&op_str, "\\w?word ptr ", "");
+ ReImpl::GlobalReplace(&op_str, "\\w?word ptr ", "");
} else if (in->id == X86_INS_NOP) {
op_str.clear();
} else {
// qword ptr => QWORD
- while (RE2::PartialMatch(op_str, "(\\w?word) ptr", &match)) {
+ while (ReImpl::PartialMatch(op_str, "(\\w?word) ptr", &match)) {
std::string upper_match = match;
absl::AsciiStrToUpper(&upper_match);
- RE2::Replace(&op_str, match + " ptr", upper_match);
+ ReImpl::Replace(&op_str, match + " ptr", upper_match);
}
}
}
- RE2::GlobalReplace(&op_str, " ", "");
+ ReImpl::GlobalReplace(&op_str, " ", "");
auto iter = local_labels.find(in->address);
if (iter != local_labels.end()) {
diff --git a/src/dwarf.cc b/src/dwarf.cc
index 282c915..dad4317 100644
--- a/src/dwarf.cc
+++ b/src/dwarf.cc
@@ -33,24 +33,11 @@
#include "bloaty.h"
#include "bloaty.pb.h"
#include "dwarf_constants.h"
-#include "re2/re2.h"
+#include "util.h"
using namespace dwarf2reader;
using absl::string_view;
-static size_t AlignUpTo(size_t offset, size_t granularity) {
- // Granularity must be a power of two.
- return (offset + granularity - 1) & ~(granularity - 1);
-}
-
-ABSL_ATTRIBUTE_NORETURN
-static void Throw(const char *str, int line) {
- throw bloaty::Error(str, __FILE__, line);
-}
-
-#define THROW(msg) Throw(msg, __LINE__)
-#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
-
namespace bloaty {
extern int verbose_level;
@@ -82,61 +69,6 @@
// routines that touch the bytes of the input buffer directly. Everything else
// is layered on top of these.
-template <class T>
-T ReadMemcpy(string_view* data) {
- T ret;
- if (data->size() < sizeof(T)) {
- THROW("premature EOF reading fixed-length DWARF data");
- }
- memcpy(&ret, data->data(), sizeof(T));
- data->remove_prefix(sizeof(T));
- return ret;
-}
-
-string_view ReadPiece(size_t bytes, string_view* data) {
- if(data->size() < bytes) {
- THROW("premature EOF reading variable-length DWARF data");
- }
- string_view ret = data->substr(0, bytes);
- data->remove_prefix(bytes);
- return ret;
-}
-
-void SkipBytes(size_t bytes, string_view* data) {
- if (data->size() < bytes) {
- THROW("premature EOF skipping DWARF data");
- }
- data->remove_prefix(bytes);
-}
-
-string_view ReadNullTerminated(string_view* data) {
- const char* nullz =
- static_cast<const char*>(memchr(data->data(), '\0', data->size()));
-
- // Return false if not NULL-terminated.
- if (nullz == NULL) {
- THROW("DWARF string was not NULL-terminated");
- }
-
- size_t len = nullz - data->data();
- string_view val = data->substr(0, len);
- data->remove_prefix(len + 1); // Remove NULL also.
- return val;
-}
-
-void SkipNullTerminated(string_view* data) {
- const char* nullz =
- static_cast<const char*>(memchr(data->data(), '\0', data->size()));
-
- // Return false if not NULL-terminated.
- if (nullz == NULL) {
- THROW("DWARF string was not NULL-terminated");
- }
-
- size_t len = nullz - data->data();
- data->remove_prefix(len + 1); // Remove NULL also.
-}
-
// Parses the LEB128 format defined by DWARF (both signed and unsigned
// versions).
@@ -202,6 +134,10 @@
// DWARF version of this unit.
uint8_t dwarf_version() const { return dwarf_version_; }
+ uint64_t addr_base() const { return addr_base_; }
+ uint64_t str_offsets_base() const { return str_offsets_base_; }
+ uint64_t range_lists_base() const { return range_lists_base_; }
+
void SetAddressSize(uint8_t address_size) {
if (address_size != 4 && address_size != 8) {
THROWF("Unexpected address size: $0", address_size);
@@ -209,6 +145,18 @@
address_size_ = address_size;
}
+ void SetAddrBase(uint64_t addr_base) {
+ addr_base_ = addr_base;
+ }
+
+ void SetStrOffsetsBase(uint64_t str_offsets_base) {
+ str_offsets_base_ = str_offsets_base;
+ }
+
+ void SetRangeListsBase(uint64_t range_lists_base) {
+ range_lists_base_ = range_lists_base;
+ }
+
// To allow this as the key in a map.
bool operator<(const CompilationUnitSizes& rhs) const {
return std::tie(dwarf64_, address_size_) <
@@ -219,18 +167,18 @@
// format.
uint64_t ReadDWARFOffset(string_view* data) const {
if (dwarf64_) {
- return ReadMemcpy<uint64_t>(data);
+ return ReadFixed<uint64_t>(data);
} else {
- return ReadMemcpy<uint32_t>(data);
+ return ReadFixed<uint32_t>(data);
}
}
// Reads an address according to the expected address_size.
uint64_t ReadAddress(string_view* data) const {
if (address_size_ == 8) {
- return ReadMemcpy<uint64_t>(data);
+ return ReadFixed<uint64_t>(data);
} else if (address_size_ == 4) {
- return ReadMemcpy<uint32_t>(data);
+ return ReadFixed<uint32_t>(data);
} else {
BLOATY_UNREACHABLE();
}
@@ -243,11 +191,11 @@
// Returns the range for this section and stores the remaining data
// in |remaining|.
string_view ReadInitialLength(string_view* remaining) {
- uint64_t len = ReadMemcpy<uint32_t>(remaining);
+ uint64_t len = ReadFixed<uint32_t>(remaining);
if (len == 0xffffffff) {
dwarf64_ = true;
- len = ReadMemcpy<uint64_t>(remaining);
+ len = ReadFixed<uint64_t>(remaining);
} else {
dwarf64_ = false;
}
@@ -263,13 +211,16 @@
}
void ReadDWARFVersion(string_view* data) {
- dwarf_version_ = ReadMemcpy<uint16_t>(data);
+ dwarf_version_ = ReadFixed<uint16_t>(data);
}
private:
uint16_t dwarf_version_;
bool dwarf64_;
uint8_t address_size_;
+ uint64_t addr_base_ = 0;
+ uint64_t str_offsets_base_ = 0;
+ uint64_t range_lists_base_ = 0;
};
@@ -343,7 +294,7 @@
abbrev.code = code;
abbrev.tag = ReadLEB128<uint16_t>(&data);
- has_child = ReadMemcpy<uint8_t>(&data);
+ has_child = ReadFixed<uint8_t>(&data);
switch (has_child) {
case DW_children_yes:
@@ -353,7 +304,8 @@
abbrev.has_child = false;
break;
default:
- THROW("DWARF has_child is neither true nor false.");
+ THROWF("DWARF has_child is neither true nor false: $0, code=$1, tag=$2",
+ has_child, abbrev.code, abbrev.tag);
}
while (true) {
@@ -458,15 +410,15 @@
uint8_t segment_size;
- sizes_.SetAddressSize(ReadMemcpy<uint8_t>(&unit_remaining_));
- segment_size = ReadMemcpy<uint8_t>(&unit_remaining_);
+ sizes_.SetAddressSize(ReadFixed<uint8_t>(&unit_remaining_));
+ segment_size = ReadFixed<uint8_t>(&unit_remaining_);
if (segment_size) {
THROW("we don't know how to handle segmented addresses.");
}
size_t ofs = unit_remaining_.data() - section_.data();
- size_t aligned_ofs = AlignUpTo(ofs, sizes_.address_size() * 2);
+ size_t aligned_ofs = AlignUp(ofs, sizes_.address_size() * 2);
SkipBytes(aligned_ofs - ofs, &unit_remaining_);
return true;
}
@@ -502,7 +454,7 @@
// Base address selection, nothing more to do.
} else {
// Need to skip the location description.
- uint16_t length = ReadMemcpy<uint16_t>(&remaining_);
+ uint16_t length = ReadFixed<uint16_t>(&remaining_);
SkipBytes(length, &remaining_);
}
return true;
@@ -515,44 +467,6 @@
return available.substr(0, list.read_offset() - available.data());
}
-
-// RangeList ///////////////////////////////////////////////////////////////////
-
-// Code for reading entries out of a range list.
-// For the moment we only care about finding the bounds of a list given its
-// offset, so we don't actually vend any of the data.
-
-class RangeList {
- public:
- RangeList(CompilationUnitSizes sizes, string_view data)
- : sizes_(sizes), remaining_(data) {}
-
- const char* read_offset() const { return remaining_.data(); }
- bool NextEntry();
-
- private:
- CompilationUnitSizes sizes_;
- string_view remaining_;
-};
-
-bool RangeList::NextEntry() {
- uint64_t start, end;
- start = sizes_.ReadAddress(&remaining_);
- end = sizes_.ReadAddress(&remaining_);
- if (start == 0 && end == 0) {
- return false;
- }
- return true;
-}
-
-string_view GetRangeListRange(CompilationUnitSizes sizes,
- string_view available) {
- RangeList list(sizes, available);
- while (list.NextEntry()) {
- }
- return available.substr(0, list.read_offset() - available.data());
-}
-
// DIEReader ///////////////////////////////////////////////////////////////////
// Reads a sequence of DWARF DIE's (Debugging Information Entries) from the
@@ -566,6 +480,8 @@
// Constructs a new DIEReader. Cannot be used until you call one of the
// Seek() methods below.
DIEReader(const File& file) : dwarf_(file) {}
+ DIEReader(const DIEReader&) = delete;
+ DIEReader& operator=(const DIEReader&) = delete;
// Returns true if we are at the end of DIEs for this compilation unit.
bool IsEof() const { return state_ == State::kEof; }
@@ -599,6 +515,11 @@
return *current_abbrev_;
}
+ // Returns the current read offset within the current compilation unit.
+ int64_t GetReadOffset() const { return remaining_.data() - start_; }
+
+ int GetDepth() const { return depth_; }
+
// Returns the tag of the current DIE.
// Requires that ReadCode() has been called at least once.
uint16_t GetTag() const { return GetAbbrev().tag; }
@@ -607,10 +528,13 @@
// Requires that ReadCode() has been called at least once.
bool HasChild() const { return GetAbbrev().has_child; }
+ template <class T>
+ void ReadAttributes(T&& func);
+
const File& dwarf() const { return dwarf_; }
string_view unit_range() const { return unit_range_; }
- CompilationUnitSizes unit_sizes() const { return unit_sizes_; }
+ const CompilationUnitSizes& unit_sizes() const { return unit_sizes_; }
uint32_t abbrev_version() const { return abbrev_version_; }
uint64_t debug_abbrev_offset() const { return debug_abbrev_offset_; }
@@ -629,36 +553,14 @@
}
}
+ CompilationUnitSizes* mutable_unit_sizes() { return &unit_sizes_; }
+
private:
- BLOATY_DISALLOW_COPY_AND_ASSIGN(DIEReader);
-
- template<typename> friend class AttrReader;
-
- // APIs for our friends to use to update our state.
-
- // Call to get the current read head where attributes should be parsed.
- string_view ReadAttributesBegin() {
- assert(state_ == State::kReadyToReadAttributes);
- return remaining_;
- }
-
- // When some data has been parsed, this updates our read head.
- bool ReadAttributesEnd(string_view remaining, uint64_t sibling) {
- assert(state_ == State::kReadyToReadAttributes);
- if (remaining.data() == nullptr) {
- THROW("premature EOF reading DWARF attributes");
- } else {
- remaining_ = remaining;
- sibling_offset_ = sibling;
- state_ = State::kReadyToNext;
- return true;
- }
- }
-
// Internal APIs.
bool ReadCompilationUnitHeader();
bool ReadCode();
+ void SkipNullEntries();
enum class State {
kReadyToReadAttributes,
@@ -670,6 +572,7 @@
const File& dwarf_;
RangeSink* strp_sink_ = nullptr;
+ const char *start_ = nullptr;
// Abbreviation for the current entry.
const AbbrevTable::Abbrev* current_abbrev_;
@@ -710,20 +613,311 @@
uint64_t unit_type_offset_;
};
+class AttrValue {
+ public:
+ static AttrValue ParseAttr(const DIEReader &reader, uint8_t form,
+ string_view *data);
+
+ AttrValue(const AttrValue &) = default;
+ AttrValue &operator=(const AttrValue &) = default;
+
+ bool IsUint() const {
+ return type_ == Type::kUint || type_ == Type::kUnresolvedUint;
+ }
+
+ bool IsString() const {
+ return type_ == Type::kString || type_ == Type::kUnresolvedString;
+ }
+
+ absl::optional<uint64_t> ToUint(const DIEReader& reader) const {
+ if (IsUint()) return GetUint(reader);
+ string_view str = GetString(reader);
+ switch (str.size()) {
+ case 1:
+ return ReadFixed<uint8_t>(&str);
+ case 2:
+ return ReadFixed<uint8_t>(&str);
+ case 4:
+ return ReadFixed<uint32_t>(&str);
+ case 8:
+ return ReadFixed<uint64_t>(&str);
+ }
+ return absl::nullopt;
+ }
+
+ uint64_t GetUint(const DIEReader& reader) const {
+ if (type_ == Type::kUnresolvedUint) {
+ return ResolveIndirectAddress(reader);
+ } else {
+ assert(type_ == Type::kUint);
+ return uint_;
+ }
+ }
+
+ string_view GetString(const DIEReader& reader) const {
+ if (type_ == Type::kUnresolvedString) {
+ return ResolveDoubleIndirectString(reader);
+ } else {
+ assert(type_ == Type::kString);
+ return string_;
+ }
+ }
+
+ void SetForm(uint16_t form) {
+ form_ = form;
+ }
+
+ uint16_t form() const { return form_; }
+
+ std::string DebugString() const {
+ if (type_ == Type::kUint) {
+ return absl::Substitute("AttrValue{type_=Type::kUint, form_=$0, uint_=$1}", form_, uint_);
+ } else {
+ return absl::Substitute("AttrValue{type_=Type::kString, form_=$0, string_=$1}", form_, string_);
+ }
+ }
+
+ private:
+ explicit AttrValue(uint64_t val) : uint_(val), type_(Type::kUint) {}
+ explicit AttrValue(string_view val) : string_(val), type_(Type::kString) {}
+
+ // Some attribute values remain unresolved after being parsed.
+ // We have to delay the resolution of some indirect values because they are
+ // dependent on bases that come after it in the sequence of attributes, eg.
+ //
+ // $ dwarfdump -i bloaty
+ // COMPILE_UNIT<header overall offset = 0x00000000>:
+ // < 0><0x0000000c> DW_TAG_compile_unit
+ // DW_AT_producer (indexed string: 0x00000000)Debian clang version 11.0.1-2
+ // DW_AT_language DW_LANG_C_plus_plus_14
+ // DW_AT_name (indexed string: 0x00000001)../src/main.cc
+ // DW_AT_str_offsets_base 0x00000008
+ //
+ // Note that DW_AT_name comes before DW_AT_str_offset_base, but the latter
+ // value is required to resolve the name attribute.
+ enum class Type {
+ kUint,
+ kString,
+ kUnresolvedUint,
+ kUnresolvedString
+ };
+
+ Type type() const { return type_; }
+
+ static AttrValue UnresolvedUint(uint64_t val) {
+ AttrValue ret(val);
+ ret.type_ = Type::kUnresolvedUint;
+ return ret;
+ }
+
+ static AttrValue UnresolvedString(uint64_t val) {
+ AttrValue ret(val);
+ ret.type_ = Type::kUnresolvedString;
+ return ret;
+ }
+
+ union {
+ uint64_t uint_;
+ string_view string_;
+ };
+
+ Type type_;
+ uint16_t form_;
+
+ template <class D>
+ static string_view ReadBlock(string_view* data);
+ static string_view ReadVariableBlock(string_view* data);
+ template <class D>
+ static string_view ReadIndirectString(const DIEReader &reader,
+ string_view *data);
+ static string_view ResolveIndirectString(const DIEReader &reader,
+ uint64_t ofs);
+
+ string_view ResolveDoubleIndirectString(const DIEReader &reader) const;
+ uint64_t ResolveIndirectAddress(const DIEReader& reader) const;
+};
+
+uint64_t ReadIndirectAddress(const DIEReader& reader, uint64_t val) {
+ string_view addrs = reader.dwarf().debug_addr;
+ const dwarf::CompilationUnitSizes& sizes = reader.unit_sizes();
+ switch (sizes.address_size()) {
+ case 4:
+ SkipBytes((val * 4) + sizes.addr_base(), &addrs);
+ return ReadFixed<uint32_t>(&addrs);
+ case 8:
+ SkipBytes((val * 8) + sizes.addr_base(), &addrs);
+ return ReadFixed<uint64_t>(&addrs);
+ default:
+ BLOATY_UNREACHABLE();
+ }
+}
+
+template <class D>
+string_view AttrValue::ReadBlock(string_view* data) {
+ D len = ReadFixed<D>(data);
+ return ReadBytes(len, data);
+}
+
+string_view AttrValue::ReadVariableBlock(string_view* data) {
+ uint64_t len = ReadLEB128<uint64_t>(data);
+ return ReadBytes(len, data);
+}
+
+string_view AttrValue::ResolveIndirectString(const DIEReader &reader,
+ uint64_t ofs) {
+ StringTable table(reader.dwarf().debug_str);
+ string_view ret = table.ReadEntry(ofs);
+ reader.AddIndirectString(ret);
+ return ret;
+}
+
+template <class D>
+string_view AttrValue::ReadIndirectString(const DIEReader &reader,
+ string_view *data) {
+ return ResolveIndirectString(reader, ReadFixed<D>(data));
+}
+
+string_view
+AttrValue::ResolveDoubleIndirectString(const DIEReader &reader) const {
+ uint64_t ofs = uint_;
+ string_view offsets = reader.dwarf().debug_str_offsets;
+ uint64_t ofs2;
+ if (reader.unit_sizes().dwarf64()) {
+ SkipBytes((ofs * 8) + reader.unit_sizes().str_offsets_base(), &offsets);
+ ofs2 = ReadFixed<uint64_t>(&offsets);
+ } else {
+ SkipBytes((ofs * 4) + reader.unit_sizes().str_offsets_base(), &offsets);
+ ofs2 = ReadFixed<uint32_t>(&offsets);
+ }
+ StringTable table(reader.dwarf().debug_str);
+ string_view ret = table.ReadEntry(ofs2);
+ reader.AddIndirectString(ret);
+ return ret;
+}
+
+uint64_t AttrValue::ResolveIndirectAddress(const DIEReader& reader) const {
+ return ReadIndirectAddress(reader, uint_);
+}
+
+AttrValue AttrValue::ParseAttr(const DIEReader &reader, uint8_t form,
+ string_view *data) {
+ switch (form) {
+ case DW_FORM_indirect: {
+ uint16_t indirect_form = ReadLEB128<uint16_t>(data);
+ if (indirect_form == DW_FORM_indirect) {
+ THROW("indirect attribute has indirect form type");
+ }
+ return ParseAttr(reader, indirect_form, data);
+ }
+ case DW_FORM_ref1:
+ return AttrValue(ReadFixed<uint8_t>(data));
+ case DW_FORM_ref2:
+ return AttrValue(ReadFixed<uint16_t>(data));
+ case DW_FORM_ref4:
+ return AttrValue(ReadFixed<uint32_t>(data));
+ case DW_FORM_ref_sig8:
+ case DW_FORM_ref8:
+ return AttrValue(ReadFixed<uint64_t>(data));
+ case DW_FORM_ref_udata:
+ case DW_FORM_strx1:
+ return AttrValue::UnresolvedString(ReadFixed<uint8_t>(data));
+ case DW_FORM_strx2:
+ return AttrValue::UnresolvedString(ReadFixed<uint16_t>(data));
+ case DW_FORM_strx4:
+ return AttrValue::UnresolvedString(ReadFixed<uint32_t>(data));
+ case DW_FORM_strx:
+ return AttrValue::UnresolvedString(ReadLEB128<uint64_t>(data));
+ case DW_FORM_addrx1:
+ return AttrValue::UnresolvedUint(ReadFixed<uint8_t>(data));
+ case DW_FORM_addrx2:
+ return AttrValue::UnresolvedUint(ReadFixed<uint16_t>(data));
+ case DW_FORM_addrx3:
+ return AttrValue::UnresolvedUint(ReadFixed<uint32_t, 3>(data));
+ case DW_FORM_addrx4:
+ return AttrValue::UnresolvedUint(ReadFixed<uint32_t>(data));
+ case DW_FORM_addrx:
+ return AttrValue::UnresolvedUint(ReadLEB128<uint64_t>(data));
+ case DW_FORM_addr:
+ address_size:
+ switch (reader.unit_sizes().address_size()) {
+ case 4:
+ return AttrValue(ReadFixed<uint32_t>(data));
+ case 8:
+ return AttrValue(ReadFixed<uint64_t>(data));
+ default:
+ BLOATY_UNREACHABLE();
+ }
+ case DW_FORM_ref_addr:
+ if (reader.unit_sizes().dwarf_version() <= 2) {
+ goto address_size;
+ }
+ ABSL_FALLTHROUGH_INTENDED;
+ case DW_FORM_sec_offset:
+ if (reader.unit_sizes().dwarf64()) {
+ return AttrValue(ReadFixed<uint64_t>(data));
+ } else {
+ return AttrValue(ReadFixed<uint32_t>(data));
+ }
+ case DW_FORM_udata:
+ return AttrValue(ReadLEB128<uint64_t>(data));
+ case DW_FORM_block1:
+ return AttrValue(ReadBlock<uint8_t>(data));
+ case DW_FORM_block2:
+ return AttrValue(ReadBlock<uint16_t>(data));
+ case DW_FORM_block4:
+ return AttrValue(ReadBlock<uint32_t>(data));
+ case DW_FORM_block:
+ case DW_FORM_exprloc:
+ return AttrValue(ReadVariableBlock(data));
+ case DW_FORM_string:
+ return AttrValue(ReadNullTerminated(data));
+ case DW_FORM_strp:
+ if (reader.unit_sizes().dwarf64()) {
+ return AttrValue(ReadIndirectString<uint64_t>(reader, data));
+ } else {
+ return AttrValue(ReadIndirectString<uint32_t>(reader, data));
+ }
+ case DW_FORM_data1:
+ return AttrValue(ReadBytes(1, data));
+ case DW_FORM_data2:
+ return AttrValue(ReadBytes(2, data));
+ case DW_FORM_data4:
+ return AttrValue(ReadBytes(4, data));
+ case DW_FORM_data8:
+ return AttrValue(ReadBytes(8, data));
+ case DW_FORM_rnglistx: {
+ auto val = AttrValue(ReadLEB128<uint64_t>(data));
+ return val;
+ }
+
+ // Bloaty doesn't currently care about any bool or signed data.
+ // So we fudge it a bit and just stuff these in a uint64.
+ case DW_FORM_flag_present:
+ return AttrValue(1);
+ case DW_FORM_flag:
+ return AttrValue(ReadFixed<uint8_t>(data));
+ case DW_FORM_sdata:
+ return AttrValue(ReadLEB128<uint64_t>(data));
+ default:
+ THROWF("Don't know how to parse DWARF form: $0", form);
+ }
+}
+
+void DIEReader::SkipNullEntries() {
+ while (!remaining_.empty() && remaining_[0] == 0) {
+ // null entry terminates a chain of sibling entries.
+ remaining_.remove_prefix(1);
+ depth_--;
+ }
+}
+
bool DIEReader::ReadCode() {
- uint32_t code;
-again:
+ SkipNullEntries();
if (remaining_.empty()) {
state_ = State::kEof;
return false;
}
- code = ReadLEB128<uint32_t>(&remaining_);
- if (code == 0) {
- // null entry terminates a chain of sibling entries.
- depth_--;
- goto again;
- }
-
+ uint32_t code = ReadLEB128<uint32_t>(&remaining_);
if (!unit_abbrev_->GetAbbrev(code, ¤t_abbrev_)) {
THROW("couldn't find abbreviation for code");
}
@@ -759,6 +953,7 @@
next_unit_ = dwarf_.debug_types;
}
+ start_ = next_unit_.data();
SkipBytes(offset, &next_unit_);
return ReadCompilationUnitHeader();
}
@@ -776,11 +971,26 @@
unit_sizes_.ReadDWARFVersion(&remaining_);
- if (unit_sizes_.dwarf_version() > 4) {
- THROW("Data is in new DWARF format we don't understand");
+ if (unit_sizes_.dwarf_version() > 5) {
+ THROWF("Data for $0 is in DWARF $1 format which we don't understand",
+ unit_name_, unit_sizes_.dwarf_version());
}
- debug_abbrev_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
+ if (unit_sizes_.dwarf_version() == 5) {
+ uint8_t unit_type = ReadFixed<uint8_t>(&remaining_);
+ (void)unit_type; // We don't use this currently.
+ unit_sizes_.SetAddressSize(ReadFixed<uint8_t>(&remaining_));
+ debug_abbrev_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
+ } else {
+ debug_abbrev_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
+ unit_sizes_.SetAddressSize(ReadFixed<uint8_t>(&remaining_));
+
+ if (section_ == Section::kDebugTypes) {
+ unit_type_signature_ = ReadFixed<uint64_t>(&remaining_);
+ unit_type_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
+ }
+ }
+
unit_abbrev_ = &abbrev_tables_[debug_abbrev_offset_];
// If we haven't already read abbreviations for this debug_abbrev_offset_, we
@@ -791,13 +1001,6 @@
unit_abbrev_->ReadAbbrevs(abbrev_data);
}
- unit_sizes_.SetAddressSize(ReadMemcpy<uint8_t>(&remaining_));
-
- if (section_ == Section::kDebugTypes) {
- unit_type_signature_ = ReadMemcpy<uint64_t>(&remaining_);
- unit_type_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
- }
-
auto abbrev_id = std::make_pair(unit_abbrev_, unit_sizes_);
auto insert_pair = abbrev_versions_.insert(
std::make_pair(abbrev_id, abbrev_versions_.size()));
@@ -809,195 +1012,6 @@
return ReadCode();
}
-
-// DWARF form parsing //////////////////////////////////////////////////////////
-
-class AttrValue {
- public:
- AttrValue(uint64_t val) : uint_(val), type_(Type::kUint) {}
- AttrValue(string_view val) : string_(val), type_(Type::kString) {}
-
- enum class Type {
- kUint,
- kString
- };
-
- Type type() const { return type_; }
- bool IsUint() const { return type_ == Type::kUint; }
- bool IsString() const { return type_ == Type::kString; }
-
- absl::optional<uint64_t> ToUint() const {
- if (IsUint()) return uint_;
- string_view str = string_;
- switch (str.size()) {
- case 1:
- return ReadMemcpy<uint8_t>(&str);
- case 2:
- return ReadMemcpy<uint8_t>(&str);
- case 4:
- return ReadMemcpy<uint32_t>(&str);
- case 8:
- return ReadMemcpy<uint64_t>(&str);
- }
- return absl::nullopt;
- }
-
- uint64_t GetUint() const {
- assert(type_ == Type::kUint);
- return uint_;
- }
-
- string_view GetString() const {
- assert(type_ == Type::kString);
- return string_;
- }
-
- private:
- union {
- uint64_t uint_;
- string_view string_;
- };
-
- Type type_;
-};
-
-template <class D>
-string_view ReadBlock(string_view* data) {
- D len = ReadMemcpy<D>(data);
- return ReadPiece(len, data);
-}
-
-string_view ReadVariableBlock(string_view* data) {
- uint64_t len = ReadLEB128<uint64_t>(data);
- return ReadPiece(len, data);
-}
-
-template <class D>
-string_view ReadIndirectString(const DIEReader& reader, string_view* data) {
- D ofs = ReadMemcpy<D>(data);
- StringTable table(reader.dwarf().debug_str);
- string_view ret = table.ReadEntry(ofs);
- reader.AddIndirectString(ret);
- return ret;
-}
-
-AttrValue ParseAttr(const DIEReader& reader, uint8_t form, string_view* data) {
- switch (form) {
- case DW_FORM_indirect: {
- uint16_t indirect_form = ReadLEB128<uint16_t>(data);
- if (indirect_form == DW_FORM_indirect) {
- THROW("indirect attribute has indirect form type");
- }
- return ParseAttr(reader, indirect_form, data);
- }
- case DW_FORM_ref1:
- return AttrValue(ReadMemcpy<uint8_t>(data));
- case DW_FORM_ref2:
- return AttrValue(ReadMemcpy<uint16_t>(data));
- case DW_FORM_ref4:
- return AttrValue(ReadMemcpy<uint32_t>(data));
- case DW_FORM_ref_sig8:
- case DW_FORM_ref8:
- return AttrValue(ReadMemcpy<uint64_t>(data));
- case DW_FORM_ref_udata:
- return AttrValue(ReadLEB128<uint64_t>(data));
- case DW_FORM_addr:
- address_size:
- switch (reader.unit_sizes().address_size()) {
- case 4:
- return AttrValue(ReadMemcpy<uint32_t>(data));
- case 8:
- return AttrValue(ReadMemcpy<uint64_t>(data));
- default:
- BLOATY_UNREACHABLE();
- }
- case DW_FORM_ref_addr:
- if (reader.unit_sizes().dwarf_version() <= 2) {
- goto address_size;
- }
- ABSL_FALLTHROUGH_INTENDED;
- case DW_FORM_sec_offset:
- if (reader.unit_sizes().dwarf64()) {
- return AttrValue(ReadMemcpy<uint64_t>(data));
- } else {
- return AttrValue(ReadMemcpy<uint32_t>(data));
- }
- case DW_FORM_udata:
- return AttrValue(ReadLEB128<uint64_t>(data));
- case DW_FORM_block1:
- return AttrValue(ReadBlock<uint8_t>(data));
- case DW_FORM_block2:
- return AttrValue(ReadBlock<uint16_t>(data));
- case DW_FORM_block4:
- return AttrValue(ReadBlock<uint32_t>(data));
- case DW_FORM_block:
- case DW_FORM_exprloc:
- return AttrValue(ReadVariableBlock(data));
- case DW_FORM_string:
- return AttrValue(ReadNullTerminated(data));
- case DW_FORM_strp:
- if (reader.unit_sizes().dwarf64()) {
- return AttrValue(ReadIndirectString<uint64_t>(reader, data));
- } else {
- return AttrValue(ReadIndirectString<uint32_t>(reader, data));
- }
- case DW_FORM_data1:
- return AttrValue(ReadPiece(1, data));
- case DW_FORM_data2:
- return AttrValue(ReadPiece(2, data));
- case DW_FORM_data4:
- return AttrValue(ReadPiece(4, data));
- case DW_FORM_data8:
- return AttrValue(ReadPiece(8, data));
-
- // Bloaty doesn't currently care about any bool or signed data.
- // So we fudge it a bit and just stuff these in a uint64.
- case DW_FORM_flag_present:
- return AttrValue(1);
- case DW_FORM_flag:
- return AttrValue(ReadMemcpy<uint8_t>(data));
- case DW_FORM_sdata:
- return AttrValue(ReadLEB128<uint64_t>(data));
- default:
- THROWF("Don't know how to parse DWARF form: $0", form);
- }
-}
-
-
-// AttrReader //////////////////////////////////////////////////////////////////
-
-// Parses a DIE's attributes, calling user callbacks with the parsed values.
-
-template <class T>
-class AttrReader {
- public:
- typedef void CallbackFunc(T* container, AttrValue val);
-
- void OnAttribute(DwarfAttribute attr, CallbackFunc* func) {
- attributes_[attr] = func;
- }
-
- // Reads all attributes for this DIE, storing the ones we were expecting.
- void ReadAttributes(DIEReader* reader, T* container) {
- string_view data = reader->ReadAttributesBegin();
- const AbbrevTable::Abbrev& abbrev = reader->GetAbbrev();
-
- for (auto attr : abbrev.attr) {
- AttrValue value = ParseAttr(*reader, attr.form, &data);
- auto it = attributes_.find(attr.name);
- if (it != attributes_.end()) {
- it->second(container, value);
- }
- }
-
- reader->ReadAttributesEnd(data, 0);
- }
-
- private:
- std::unordered_map<int, CallbackFunc*> attributes_;
-};
-
-// From DIEReader, defined here because it depends on FixedAttrReader.
bool DIEReader::SkipChildren() {
assert(state_ == State::kReadyToNext);
if (!HasChild()) {
@@ -1005,18 +1019,56 @@
}
int target_depth = depth_ - 1;
- dwarf::AttrReader<void> attr_reader;
+ SkipNullEntries();
while (depth_ > target_depth) {
// TODO(haberman): use DW_AT_sibling to optimize skipping when it is
// available.
if (!NextDIE()) {
return false;
}
- attr_reader.ReadAttributes(this, nullptr);
+ ReadAttributes([](uint16_t, dwarf::AttrValue) {});
+ SkipNullEntries();
}
return true;
}
+// Reads all attributes for this DIE, storing the ones we were expecting.
+template <class T>
+void DIEReader::ReadAttributes(T&& func) {
+ assert(state_ == State::kReadyToReadAttributes);
+
+ for (auto attr : GetAbbrev().attr) {
+ AttrValue value = AttrValue::ParseAttr(*this, attr.form, &remaining_);
+ value.SetForm(attr.form);
+ func(attr.name, value);
+ }
+
+ if (remaining_.data() == nullptr) {
+ THROW("premature EOF reading DWARF attributes");
+ } else {
+ sibling_offset_ = 0;
+ state_ = State::kReadyToNext;
+ }
+}
+
+// RangeList ///////////////////////////////////////////////////////////////////
+
+void ReadRangeList(const DIEReader& die_reader, uint64_t low_pc,
+ string_view name, RangeSink* sink, string_view* data) {
+ std::string name_str(name);
+ while (true) {
+ uint64_t start, end;
+ start = die_reader.unit_sizes().ReadAddress(data);
+ end = die_reader.unit_sizes().ReadAddress(data);
+ if (start == 0 && end == 0) {
+ return;
+ }
+ uint64_t size = end - start;
+ sink->AddVMRangeIgnoreDuplicate("dwarf_rangelist", low_pc + start, size,
+ name_str);
+ }
+}
+
// LineInfoReader //////////////////////////////////////////////////////////////
// Code to read the .line_info programs in a DWARF file.
@@ -1146,9 +1198,9 @@
string_view program = data;
SkipBytes(header_length, &program);
- params_.minimum_instruction_length = ReadMemcpy<uint8_t>(&data);
+ params_.minimum_instruction_length = ReadFixed<uint8_t>(&data);
if (sizes_.dwarf_version() == 4) {
- params_.maximum_operations_per_instruction = ReadMemcpy<uint8_t>(&data);
+ params_.maximum_operations_per_instruction = ReadFixed<uint8_t>(&data);
if (params_.maximum_operations_per_instruction == 0) {
THROW("DWARF line info had maximum_operations_per_instruction=0");
@@ -1156,17 +1208,17 @@
} else {
params_.maximum_operations_per_instruction = 1;
}
- params_.default_is_stmt = ReadMemcpy<uint8_t>(&data);
- params_.line_base = ReadMemcpy<int8_t>(&data);
- params_.line_range = ReadMemcpy<uint8_t>(&data);
- params_.opcode_base = ReadMemcpy<uint8_t>(&data);
+ params_.default_is_stmt = ReadFixed<uint8_t>(&data);
+ params_.line_base = ReadFixed<int8_t>(&data);
+ params_.line_range = ReadFixed<uint8_t>(&data);
+ params_.opcode_base = ReadFixed<uint8_t>(&data);
if (params_.line_range == 0) {
THROW("line_range of zero will cause divide by zero");
}
standard_opcode_lengths_.resize(params_.opcode_base);
for (size_t i = 1; i < params_.opcode_base; i++) {
- standard_opcode_lengths_[i] = ReadMemcpy<uint8_t>(&data);
+ standard_opcode_lengths_[i] = ReadFixed<uint8_t>(&data);
}
// Read include_directories.
@@ -1227,7 +1279,7 @@
return false;
}
- uint8_t op = ReadMemcpy<uint8_t>(&data);
+ uint8_t op = ReadFixed<uint8_t>(&data);
if (op >= params_.opcode_base) {
SpecialOpcodeAdvance(op);
@@ -1241,7 +1293,7 @@
switch (op) {
case DW_LNS_extended_op: {
uint16_t len = ReadLEB128<uint16_t>(&data);
- uint8_t extended_op = ReadMemcpy<uint8_t>(&data);
+ uint8_t extended_op = ReadFixed<uint8_t>(&data);
switch (extended_op) {
case DW_LNE_end_sequence: {
// Preserve address and set end_sequence, but reset everything
@@ -1320,7 +1372,7 @@
SpecialOpcodeAdvance(255);
break;
case DW_LNS_fixed_advance_pc:
- info_.address += ReadMemcpy<uint16_t>(&data);
+ info_.address += ReadFixed<uint16_t>(&data);
info_.op_index = 0;
break;
case DW_LNS_set_prologue_end:
@@ -1346,6 +1398,38 @@
}
}
+string_view* File::GetFieldByName(string_view name) {
+ if (name == "aranges") {
+ return &debug_aranges;
+ } else if (name == "addr") {
+ return &debug_addr;
+ } else if (name == "str") {
+ return &debug_str;
+ } else if (name == "str_offsets") {
+ return &debug_str_offsets;
+ } else if (name == "info") {
+ return &debug_info;
+ } else if (name == "types") {
+ return &debug_types;
+ } else if (name == "abbrev") {
+ return &debug_abbrev;
+ } else if (name == "line") {
+ return &debug_line;
+ } else if (name == "loc") {
+ return &debug_loc;
+ } else if (name == "pubnames") {
+ return &debug_pubnames;
+ } else if (name == "pubtypes") {
+ return &debug_pubtypes;
+ } else if (name == "ranges") {
+ return &debug_ranges;
+ } else if (name == "rnglists") {
+ return &debug_rnglists;
+ } else {
+ return nullptr;
+ }
+}
+
} // namespace dwarf
// Bloaty DWARF Data Sources ///////////////////////////////////////////////////
@@ -1362,13 +1446,7 @@
public:
FilenameMap(const dwarf::File& file)
: die_reader_(file),
- missing_("[DWARF is missing filename]") {
- attr_reader_.OnAttribute(
- DW_AT_name, [](string_view* s, dwarf::AttrValue data) {
- if (!data.IsString()) return;
- *s = data.GetString();
- });
- }
+ missing_("[DWARF is missing filename]") {}
std::string GetFilename(uint64_t compilation_unit_offset) {
auto& name = map_[compilation_unit_offset];
@@ -1379,13 +1457,32 @@
}
private:
+ bool ReadName(string_view* name, uint64_t offset) {
+ auto sec = dwarf::DIEReader::Section::kDebugInfo;
+ if (!die_reader_.SeekToCompilationUnit(sec, offset) ||
+ die_reader_.GetTag() != DW_TAG_compile_unit) {
+ return false;
+ }
+
+ absl::optional<dwarf::AttrValue> attr;
+
+ die_reader_.ReadAttributes([&attr](uint16_t tag, dwarf::AttrValue data) {
+ if (tag == DW_AT_name && data.IsString()) {
+ attr = data;
+ }
+ });
+
+ if (attr && attr->IsString()) {
+ *name = attr->GetString(die_reader_);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
std::string LookupFilename(uint64_t compilation_unit_offset) {
- auto section = dwarf::DIEReader::Section::kDebugInfo;
string_view name;
- if (die_reader_.SeekToCompilationUnit(section, compilation_unit_offset) &&
- die_reader_.GetTag() == DW_TAG_compile_unit &&
- (attr_reader_.ReadAttributes(&die_reader_, &name),
- !name.empty())) {
+ if (ReadName(&name, compilation_unit_offset)) {
return std::string(name);
} else {
return missing_;
@@ -1393,7 +1490,6 @@
}
dwarf::DIEReader die_reader_;
- dwarf::AttrReader<string_view> attr_reader_;
std::unordered_map<uint64_t, std::string> map_;
std::string missing_;
} map(file);
@@ -1414,121 +1510,46 @@
return true;
}
-// TODO(haberman): make these into real protobufs once proto supports
-// string_view.
-class GeneralDIE {
- public:
- bool has_name() const { return has_name_; }
- bool has_linkage_name() const { return has_linkage_name_; }
- bool has_location_string() const { return has_location_string_; }
- bool has_low_pc() const { return has_low_pc_; }
- bool has_high_pc() const { return has_high_pc_; }
- bool has_location_uint64() const { return has_location_uint64_; }
- bool has_stmt_list() const { return has_stmt_list_; }
- bool has_ranges() const { return has_ranges_; }
- bool has_start_scope() const { return has_start_scope_; }
-
- std::string DebugString() {
- std::string ret;
- if (has_name()) {
- ret += absl::Substitute("name: $0\n", name());
- }
- if (has_linkage_name()) {
- ret += absl::Substitute("linkage_name: $0\n", linkage_name());
- }
- if (has_location_string()) {
- ret += absl::Substitute("location_string: $0\n", location_string());
- }
- if (has_low_pc()) {
- ret += absl::Substitute("low_pc: $0\n", low_pc());
- }
- if (has_high_pc()) {
- ret += absl::Substitute("high_pc: $0\n", high_pc());
- }
- if (has_location_uint64()) {
- ret += absl::Substitute("location_uint64: $0\n", location_uint64());
- }
- if (has_stmt_list()) {
- ret += absl::Substitute("stmt_list: $0\n", stmt_list());
- }
- if (has_ranges()) {
- ret += absl::Substitute("ranges: $0\n", ranges());
- }
- if (has_start_scope()) {
- ret += absl::Substitute("start_scope: $0\n", start_scope());
- }
- return ret;
- }
-
- string_view name() const { return name_; }
- string_view linkage_name() const { return linkage_name_; }
- string_view location_string() const { return location_string_; }
- uint64_t low_pc() const { return low_pc_; }
- uint64_t high_pc() const { return high_pc_; }
- uint64_t location_uint64() const { return location_uint64_; }
- uint64_t stmt_list() const { return stmt_list_; }
- uint64_t ranges() const { return ranges_; }
- uint64_t start_scope() const { return start_scope_; }
-
- void set_name(string_view val) {
- has_name_ = true;
- name_ = val;
- }
- void set_linkage_name(string_view val) {
- has_linkage_name_ = true;
- location_string_ = val;
- }
- void set_location_string(string_view val) {
- has_location_string_ = true;
- location_string_ = val;
- }
- void set_low_pc(uint64_t val) {
- has_low_pc_ = true;
- low_pc_ = val;
- }
- void set_high_pc(uint64_t val) {
- has_high_pc_ = true;
- high_pc_ = val;
- }
- void set_location_uint64(uint64_t val) {
- has_location_uint64_ = true;
- location_uint64_ = val;
- }
- void set_stmt_list(uint64_t val) {
- has_stmt_list_ = true;
- stmt_list_ = val;
- }
- void set_ranges(uint64_t val) {
- has_ranges_ = true;
- ranges_ = val;
- }
- void set_start_scope(uint64_t val) {
- has_start_scope_ = true;
- start_scope_ = val;
- }
-
- private:
- bool has_name_ = false;
- bool has_linkage_name_ = false;
- bool has_location_string_ = false;
- bool has_low_pc_ = false;
- bool has_high_pc_ = false;
- bool has_location_uint64_ = false;
- bool has_stmt_list_ = false;
- bool has_ranges_ = false;
- bool has_start_scope_ = false;
-
- string_view name_;
- string_view linkage_name_;
- string_view location_string_;
- uint64_t low_pc_ = 0;
- uint64_t high_pc_ = 0;
- uint64_t location_uint64_ = 0;
- uint64_t stmt_list_ = 0;
- uint64_t ranges_ = 0;
- uint64_t start_scope_ = 0;
+struct GeneralDIE {
+ absl::optional<dwarf::AttrValue> name;
+ absl::optional<dwarf::AttrValue> linkage_name;
+ absl::optional<dwarf::AttrValue> location;
+ absl::optional<dwarf::AttrValue> low_pc;
+ absl::optional<dwarf::AttrValue> high_pc;
+ absl::optional<dwarf::AttrValue> stmt_list;
+ absl::optional<dwarf::AttrValue> ranges;
+ absl::optional<dwarf::AttrValue> start_scope;
};
+void ReadGeneralDIEAttr(uint16_t tag, dwarf::AttrValue val, GeneralDIE *die) {
+ switch (tag) {
+ case DW_AT_name:
+ die->name = val;
+ break;
+ case DW_AT_linkage_name:
+ die->linkage_name = val;
+ break;
+ case DW_AT_location:
+ die->location = val;
+ break;
+ case DW_AT_low_pc:
+ die->low_pc = val;
+ break;
+ case DW_AT_high_pc:
+ die->high_pc = val;
+ break;
+ case DW_AT_stmt_list:
+ die->stmt_list = val;
+ break;
+ case DW_AT_ranges:
+ die->ranges = val;
+ break;
+ case DW_AT_start_scope:
+ die->start_scope = val;
+ break;
+ }
+}
+
class InlinesDIE {
public:
bool has_stmt_list() const { return has_stmt_list_; }
@@ -1545,29 +1566,34 @@
uint64_t stmt_list_ = 0;
};
+// To view DIEs for a given file, try:
+// readelf --debug-dump=info foo.bin
void AddDIE(const dwarf::File& file, const std::string& name,
const GeneralDIE& die, const SymbolTable& symtab,
- const DualMap& symbol_map, const dwarf::CompilationUnitSizes& sizes,
+ const DualMap& symbol_map, const dwarf::DIEReader& die_reader,
RangeSink* sink) {
+ uint64_t low_pc = 0;
// Some DIEs mark address ranges with high_pc/low_pc pairs (especially
// functions).
- if (die.has_low_pc() && die.has_high_pc() &&
- dwarf::IsValidDwarfAddress(die.low_pc(), sizes.address_size())) {
- uint64_t high_pc = die.high_pc();
+ if (die.low_pc && die.low_pc->IsUint() && die.high_pc &&
+ die.high_pc->IsUint() &&
+ dwarf::IsValidDwarfAddress(die.low_pc->GetUint(die_reader),
+ die_reader.unit_sizes().address_size())) {
+ low_pc = die.low_pc->GetUint(die_reader);
+ uint64_t high_pc = die.high_pc->GetUint(die_reader);
// It appears that some compilers make high_pc a size, and others make it an
// address.
- if (high_pc >= die.low_pc()) {
- high_pc -= die.low_pc();
+ if (high_pc >= low_pc) {
+ high_pc -= low_pc;
}
- sink->AddVMRangeIgnoreDuplicate("dwarf_pcpair", die.low_pc(), high_pc,
- name);
+ sink->AddVMRangeIgnoreDuplicate("dwarf_pcpair", low_pc, high_pc, name);
}
// Sometimes a DIE has a linkage_name, which we can look up in the symbol
// table.
- if (die.has_linkage_name()) {
- auto it = symtab.find(die.linkage_name());
+ if (die.linkage_name && die.linkage_name->IsString()) {
+ auto it = symtab.find(die.linkage_name->GetString(die_reader));
if (it != symtab.end()) {
sink->AddVMRangeIgnoreDuplicate("dwarf_linkagename", it->second.first,
it->second.second, name);
@@ -1576,17 +1602,17 @@
// Sometimes the DIE has a "location", which gives the location as an address.
// This parses a very small subset of the overall DWARF expression grammar.
- if (die.has_location_string()) {
- string_view location = die.location_string();
- if (location.size() == sizes.address_size() + 1 &&
+ if (die.location && die.location->IsString()) {
+ string_view location = die.location->GetString(die_reader);
+ if (location.size() == die_reader.unit_sizes().address_size() + 1 &&
location[0] == DW_OP_addr) {
location.remove_prefix(1);
uint64_t addr;
// TODO(haberman): endian?
- if (sizes.address_size() == 4) {
- addr = dwarf::ReadMemcpy<uint32_t>(&location);
- } else if (sizes.address_size() == 8) {
- addr = dwarf::ReadMemcpy<uint64_t>(&location);
+ if (die_reader.unit_sizes().address_size() == 4) {
+ addr = ReadFixed<uint32_t>(&location);
+ } else if (die_reader.unit_sizes().address_size() == 8) {
+ addr = ReadFixed<uint64_t>(&location);
} else {
BLOATY_UNREACHABLE();
}
@@ -1600,47 +1626,107 @@
if (verbose_level > 0) {
fprintf(stderr,
"bloaty: warning: couldn't find DWARF location in symbol "
- "table, address: %" PRIx64 "\n",
- addr);
+ "table, address: %" PRIx64 ", name: %s\n",
+ addr, name.c_str());
}
}
}
}
// Sometimes a location is given as an offset into debug_loc.
- if (die.has_location_uint64()) {
- if (die.location_uint64() < file.debug_loc.size()) {
- absl::string_view loc_range = file.debug_loc.substr(die.location_uint64());
- loc_range = GetLocationListRange(sizes, loc_range);
+ if (die.location && die.location->IsUint()) {
+ uint64_t location = die.location->GetUint(die_reader);
+ if (location < file.debug_loc.size()) {
+ absl::string_view loc_range = file.debug_loc.substr(location);
+ loc_range = GetLocationListRange(die_reader.unit_sizes(), loc_range);
sink->AddFileRange("dwarf_locrange", name, loc_range);
} else if (verbose_level > 0) {
fprintf(stderr,
"bloaty: warning: DWARF location out of range, location=%" PRIx64
"\n",
- die.location_uint64());
+ location);
}
}
- uint64_t ranges_offset = UINT64_MAX;
+ // DWARF 5 range list is the same information as "ranges" but in a different
+ // format.
+ if (die.ranges && die.ranges->form() == DW_FORM_rnglistx && die.ranges->IsUint()) {
+ uint64_t range_list = die.ranges->GetUint(die_reader);
+ const dwarf::CompilationUnitSizes& sizes = die_reader.unit_sizes();
+ string_view offset_data = StrictSubstr(
+ file.debug_rnglists, die_reader.unit_sizes().range_lists_base() + range_list);
+ uint64_t offset = die_reader.unit_sizes().ReadDWARFOffset(&offset_data);
+ string_view data = StrictSubstr(
+ file.debug_rnglists, die_reader.unit_sizes().range_lists_base() + offset);
+ const char* start = data.data();
+ bool done = false;
+ uint64_t base_address = sizes.addr_base();
+ while (!done) {
+ switch (ReadFixed<uint8_t>(&data)) {
+ case DW_RLE_end_of_list:
+ done = true;
+ break;
+ case DW_RLE_base_addressx:
+ base_address = ReadIndirectAddress(
+ die_reader, dwarf::ReadLEB128<uint64_t>(&data));
+ break;
+ case DW_RLE_startx_endx: {
+ uint64_t start = ReadIndirectAddress(
+ die_reader, dwarf::ReadLEB128<uint64_t>(&data));
+ uint64_t end = ReadIndirectAddress(
+ die_reader, dwarf::ReadLEB128<uint64_t>(&data));
+ sink->AddVMRangeIgnoreDuplicate("dwarf_rangelst", start, end - start,
+ name);
+ break;
+ }
+ case DW_RLE_startx_length: {
+ uint64_t start = ReadIndirectAddress(
+ die_reader, dwarf::ReadLEB128<uint64_t>(&data));
+ uint64_t length = dwarf::ReadLEB128<uint64_t>(&data);
+ sink->AddVMRangeIgnoreDuplicate("dwarf_rangelst", start, length,
+ name);
+ break;
+ }
+ case DW_RLE_offset_pair: {
+ uint64_t start = dwarf::ReadLEB128<uint64_t>(&data) + base_address;
+ uint64_t end = dwarf::ReadLEB128<uint64_t>(&data) + base_address;
+ sink->AddVMRangeIgnoreDuplicate("dwarf_rangelst", start, end - start,
+ name);
+ break;
+ }
+ case DW_RLE_base_address:
+ case DW_RLE_start_end:
+ case DW_RLE_start_length:
+ THROW("NYI");
+ break;
+ }
+ }
+ string_view all(start, data.data() - start);
+ sink->AddFileRange("dwarf_rangelst_addrs", name, all);
+ } else {
+ uint64_t ranges_offset = UINT64_MAX;
- // There are two different attributes that sometimes contain an offset into
- // debug_ranges.
- if (die.has_ranges()) {
- ranges_offset = die.ranges();
- } else if (die.has_start_scope()) {
- ranges_offset = die.start_scope();
- }
+ // There are two different attributes that sometimes contain an offset into
+ // debug_ranges.
+ if (die.ranges && die.ranges->IsUint()) {
+ ranges_offset = die.ranges->GetUint(die_reader);
+ } else if (die.start_scope && die.start_scope->IsUint()) {
+ ranges_offset = die.start_scope->GetUint(die_reader);
+ }
- if (ranges_offset != UINT64_MAX) {
- if (ranges_offset < file.debug_ranges.size()) {
- absl::string_view ranges_range = file.debug_ranges.substr(ranges_offset);
- ranges_range = GetRangeListRange(sizes, ranges_range);
- sink->AddFileRange("dwarf_debugrange", name, ranges_range);
- } else if (verbose_level > 0) {
- fprintf(stderr,
- "bloaty: warning: DWARF debug range out of range, "
- "ranges_offset=%" PRIx64 "\n",
- ranges_offset);
+ if (ranges_offset != UINT64_MAX) {
+ if (ranges_offset < file.debug_ranges.size()) {
+ absl::string_view data = file.debug_ranges.substr(ranges_offset);
+ const char* start = data.data();
+ ReadRangeList(die_reader, low_pc, name, sink, &data);
+ string_view all(start, data.data() - start);
+ sink->AddFileRange("dwarf_debugrange", name, all);
+ } else if (verbose_level > 0) {
+ fprintf(stderr,
+ "bloaty: warning: DWARF debug range out of range, "
+ "ranges_offset=%" PRIx64 "\n",
+ ranges_offset);
+ }
}
}
}
@@ -1648,16 +1734,8 @@
static void ReadDWARFPubNames(const dwarf::File& file, string_view section,
RangeSink* sink) {
dwarf::DIEReader die_reader(file);
- dwarf::AttrReader<string_view> attr_reader;
string_view remaining = section;
- attr_reader.OnAttribute(
- DW_AT_name, [](string_view* s, dwarf::AttrValue data) {
- if (data.type() == dwarf::AttrValue::Type::kString) {
- *s = data.GetString();
- }
- });
-
while (remaining.size() > 0) {
dwarf::CompilationUnitSizes sizes;
string_view full_unit = remaining;
@@ -1672,7 +1750,12 @@
THROW("Couldn't seek to debug_info section");
}
string_view compileunit_name;
- attr_reader.ReadAttributes(&die_reader, &compileunit_name);
+ die_reader.ReadAttributes(
+ [&compileunit_name, &die_reader](uint16_t tag, dwarf::AttrValue data) {
+ if (tag == DW_AT_name && data.IsString()) {
+ compileunit_name = data.GetString(die_reader);
+ }
+ });
if (!compileunit_name.empty()) {
sink->AddFileRange("dwarf_pubnames", compileunit_name, full_unit);
}
@@ -1690,34 +1773,34 @@
return 0;
case DW_EH_PE_absptr:
if (is_64bit) {
- value = dwarf::ReadMemcpy<uint64_t>(data);
+ value = ReadFixed<uint64_t>(data);
} else {
- value = dwarf::ReadMemcpy<uint32_t>(data);
+ value = ReadFixed<uint32_t>(data);
}
break;
case DW_EH_PE_uleb128:
value = dwarf::ReadLEB128<uint64_t>(data);
break;
case DW_EH_PE_udata2:
- value = dwarf::ReadMemcpy<uint16_t>(data);
+ value = ReadFixed<uint16_t>(data);
break;
case DW_EH_PE_udata4:
- value = dwarf::ReadMemcpy<uint32_t>(data);
+ value = ReadFixed<uint32_t>(data);
break;
case DW_EH_PE_udata8:
- value = dwarf::ReadMemcpy<uint64_t>(data);
+ value = ReadFixed<uint64_t>(data);
break;
case DW_EH_PE_sleb128:
value = dwarf::ReadLEB128<int64_t>(data);
break;
case DW_EH_PE_sdata2:
- value = dwarf::ReadMemcpy<int16_t>(data);
+ value = ReadFixed<int16_t>(data);
break;
case DW_EH_PE_sdata4:
- value = dwarf::ReadMemcpy<int32_t>(data);
+ value = ReadFixed<int32_t>(data);
break;
case DW_EH_PE_sdata8:
- value = dwarf::ReadMemcpy<int64_t>(data);
+ value = ReadFixed<int64_t>(data);
break;
default:
THROWF("Unexpected eh_frame format value: $0", format);
@@ -1746,9 +1829,9 @@
if (encoding & DW_EH_PE_indirect) {
string_view location = sink->TranslateVMToFile(value);
if (is_64bit) {
- value = dwarf::ReadMemcpy<uint64_t>(&location);
+ value = ReadFixed<uint64_t>(&location);
} else {
- value = dwarf::ReadMemcpy<uint32_t>(&location);
+ value = ReadFixed<uint32_t>(&location);
}
}
@@ -1804,17 +1887,17 @@
}
full_entry =
full_entry.substr(0, entry.size() + (entry.data() - full_entry.data()));
- uint32_t id = dwarf::ReadMemcpy<uint32_t>(&entry);
+ uint32_t id = ReadFixed<uint32_t>(&entry);
if (id == 0) {
// CIE, we don't attribute this yet.
CIEInfo& cie_info = cie_map[full_entry.data()];
- cie_info.version = dwarf::ReadMemcpy<uint8_t>(&entry);
- string_view aug_string = dwarf::ReadNullTerminated(&entry);
+ cie_info.version = ReadFixed<uint8_t>(&entry);
+ string_view aug_string = ReadNullTerminated(&entry);
cie_info.code_align = dwarf::ReadLEB128<uint32_t>(&entry);
cie_info.data_align = dwarf::ReadLEB128<int32_t>(&entry);
switch (cie_info.version) {
case 1:
- cie_info.return_address_reg = dwarf::ReadMemcpy<uint8_t>(&entry);
+ cie_info.return_address_reg = ReadFixed<uint8_t>(&entry);
break;
case 3:
cie_info.return_address_reg = dwarf::ReadLEB128<uint32_t>(&entry);
@@ -1830,16 +1913,16 @@
dwarf::ReadLEB128<uint32_t>(&entry);
break;
case 'L':
- cie_info.lsda_encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
+ cie_info.lsda_encoding = ReadFixed<uint8_t>(&entry);
break;
case 'R':
- cie_info.fde_encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
+ cie_info.fde_encoding = ReadFixed<uint8_t>(&entry);
break;
case 'S':
cie_info.is_signal_handler = true;
break;
case 'P': {
- uint8_t encoding = dwarf::ReadMemcpy<uint8_t>(&entry);
+ uint8_t encoding = ReadFixed<uint8_t>(&entry);
cie_info.personality_function =
ReadEncodedPointer(encoding, true, &entry, nullptr, sink);
break;
@@ -1885,10 +1968,10 @@
// http://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html#EHFRAME
void ReadEhFrameHdr(string_view data, RangeSink* sink) {
const char* base = data.data();
- uint8_t version = dwarf::ReadMemcpy<uint8_t>(&data);
- uint8_t eh_frame_ptr_enc = dwarf::ReadMemcpy<uint8_t>(&data);
- uint8_t fde_count_enc = dwarf::ReadMemcpy<uint8_t>(&data);
- uint8_t table_enc = dwarf::ReadMemcpy<uint8_t>(&data);
+ uint8_t version = ReadFixed<uint8_t>(&data);
+ uint8_t eh_frame_ptr_enc = ReadFixed<uint8_t>(&data);
+ uint8_t fde_count_enc = ReadFixed<uint8_t>(&data);
+ uint8_t table_enc = ReadFixed<uint8_t>(&data);
if (version != 1) {
THROWF("Unknown eh_frame_hdr version: $0", version);
@@ -1920,7 +2003,7 @@
static void ReadDWARFStmtListRange(const dwarf::File& file, uint64_t offset,
string_view unit_name, RangeSink* sink) {
string_view data = file.debug_line;
- dwarf::SkipBytes(offset, &data);
+ SkipBytes(offset, &data);
string_view data_with_length = data;
dwarf::CompilationUnitSizes sizes;
data = sizes.ReadInitialLength(&data);
@@ -1938,56 +2021,6 @@
std::unordered_map<uint64_t, std::string>* stmt_list_map) {
dwarf::DIEReader die_reader(file);
die_reader.set_strp_sink(sink);
- dwarf::AttrReader<GeneralDIE> attr_reader;
-
- attr_reader.OnAttribute(DW_AT_name,
- [](GeneralDIE* die, dwarf::AttrValue val) {
- if (!val.IsString()) return;
- die->set_name(val.GetString());
- });
- attr_reader.OnAttribute(DW_AT_linkage_name,
- [](GeneralDIE* die, dwarf::AttrValue val) {
- if (!val.IsString()) return;
- die->set_linkage_name(val.GetString());
- });
- attr_reader.OnAttribute(DW_AT_location,
- [](GeneralDIE* die, dwarf::AttrValue val) {
- if (val.IsString()) {
- die->set_location_string(val.GetString());
- } else {
- die->set_location_uint64(val.GetUint());
- }
- });
- attr_reader.OnAttribute(DW_AT_low_pc,
- [](GeneralDIE* die, dwarf::AttrValue val) {
- absl::optional<uint64_t> uint = val.ToUint();
- if (!uint.has_value()) return;
- die->set_low_pc(uint.value());
- });
- attr_reader.OnAttribute(DW_AT_high_pc,
- [](GeneralDIE* die, dwarf::AttrValue val) {
- absl::optional<uint64_t> uint = val.ToUint();
- if (!uint.has_value()) return;
- die->set_high_pc(uint.value());
- });
- attr_reader.OnAttribute(DW_AT_stmt_list,
- [](GeneralDIE* die, dwarf::AttrValue val) {
- absl::optional<uint64_t> uint = val.ToUint();
- if (!uint.has_value()) return;
- die->set_stmt_list(uint.value());
- });
- attr_reader.OnAttribute(DW_AT_ranges,
- [](GeneralDIE* die, dwarf::AttrValue val) {
- absl::optional<uint64_t> uint = val.ToUint();
- if (!uint.has_value()) return;
- die->set_ranges(uint.value());
- });
- attr_reader.OnAttribute(DW_AT_start_scope,
- [](GeneralDIE* die, dwarf::AttrValue val) {
- absl::optional<uint64_t> uint = val.ToUint();
- if (!uint.has_value()) return;
- die->set_start_scope(uint.value());
- });
if (!die_reader.SeekToStart(section)) {
return;
@@ -1995,11 +2028,33 @@
do {
GeneralDIE compileunit_die;
- attr_reader.ReadAttributes(&die_reader, &compileunit_die);
- std::string compileunit_name = std::string(compileunit_die.name());
+ die_reader.ReadAttributes(
+ [&die_reader, &compileunit_die](uint16_t tag, dwarf::AttrValue value) {
+ switch (tag) {
+ case DW_AT_addr_base:
+ die_reader.mutable_unit_sizes()->SetAddrBase(value.GetUint(die_reader));
+ break;
+ case DW_AT_str_offsets_base:
+ die_reader.mutable_unit_sizes()->SetStrOffsetsBase(value.GetUint(die_reader));
+ break;
+ case DW_AT_rnglists_base:
+ die_reader.mutable_unit_sizes()->SetRangeListsBase(value.GetUint(die_reader));
+ break;
+ default:
+ ReadGeneralDIEAttr(tag, value, &compileunit_die);
+ break;
+ }
+ });
+ std::string compileunit_name;
+ if (compileunit_die.name && compileunit_die.name->IsString()) {
+ compileunit_name =
+ std::string(compileunit_die.name->GetString(die_reader));
+ }
- if (compileunit_die.has_stmt_list()) {
- uint64_t stmt_list = compileunit_die.stmt_list();
+ uint64_t stmt_list = UINT64_MAX;
+
+ if (compileunit_die.stmt_list && compileunit_die.stmt_list->IsUint()) {
+ stmt_list = compileunit_die.stmt_list->GetUint(die_reader);
if (compileunit_name.empty()) {
auto iter = stmt_list_map->find(stmt_list);
if (iter != stmt_list_map->end()) {
@@ -2018,30 +2073,32 @@
sink->AddFileRange("dwarf_debuginfo", compileunit_name,
die_reader.unit_range());
AddDIE(file, compileunit_name, compileunit_die, symtab, symbol_map,
- die_reader.unit_sizes(), sink);
+ die_reader, sink);
- if (compileunit_die.has_stmt_list()) {
- uint64_t offset = compileunit_die.stmt_list();
- ReadDWARFStmtListRange(file, offset, compileunit_name, sink);
+ if (stmt_list != UINT64_MAX) {
+ ReadDWARFStmtListRange(file, stmt_list, compileunit_name, sink);
}
string_view abbrev_data = file.debug_abbrev;
- dwarf::SkipBytes(die_reader.debug_abbrev_offset(), &abbrev_data);
+ SkipBytes(die_reader.debug_abbrev_offset(), &abbrev_data);
dwarf::AbbrevTable unit_abbrev;
abbrev_data = unit_abbrev.ReadAbbrevs(abbrev_data);
sink->AddFileRange("dwarf_abbrev", compileunit_name, abbrev_data);
while (die_reader.NextDIE()) {
GeneralDIE die;
- attr_reader.ReadAttributes(&die_reader, &die);
+ die_reader.ReadAttributes([&die](uint16_t tag, dwarf::AttrValue value) {
+ ReadGeneralDIEAttr(tag, value, &die);
+ });
// low_pc == 0 is a signal that this routine was stripped out of the
// final binary. Skip this DIE and all of its children.
- if (die.has_low_pc() && die.low_pc() == 0) {
+ if (die.low_pc && die.low_pc->IsUint() &&
+ die.low_pc->GetUint(die_reader) == 0) {
die_reader.SkipChildren();
} else {
- AddDIE(file, compileunit_name, die, symtab, symbol_map,
- die_reader.unit_sizes(), sink);
+ AddDIE(file, compileunit_name, die, symtab, symbol_map, die_reader,
+ sink);
}
}
} while (die_reader.NextCompilationUnit());
@@ -2114,14 +2171,6 @@
dwarf::DIEReader die_reader(file);
dwarf::LineInfoReader line_info_reader(file);
- dwarf::AttrReader<InlinesDIE> attr_reader;
-
- attr_reader.OnAttribute(
- DW_AT_stmt_list, [](InlinesDIE* die, dwarf::AttrValue data) {
- absl::optional<uint64_t> uint = data.ToUint();
- if (!uint.has_value()) return;
- die->set_stmt_list(uint.value());
- });
if (!die_reader.SeekToStart(dwarf::DIEReader::Section::kDebugInfo)) {
THROW("debug info is present, but empty");
@@ -2129,7 +2178,15 @@
while (true) {
InlinesDIE die;
- attr_reader.ReadAttributes(&die_reader, &die);
+ die_reader.ReadAttributes(
+ [&die, &die_reader](uint16_t tag, dwarf::AttrValue val) {
+ switch (tag) {
+ case DW_AT_stmt_list:
+ if (auto uint = val.ToUint(die_reader))
+ die.set_stmt_list(uint.value());
+ break;
+ }
+ });
if (die.has_stmt_list()) {
uint64_t offset = die.stmt_list();
@@ -2144,4 +2201,4 @@
}
}
-} // namespace bloaty
+} // namespace bloaty
diff --git a/src/dwarf_constants.h b/src/dwarf_constants.h
index ba6eb2c..78891fb 100644
--- a/src/dwarf_constants.h
+++ b/src/dwarf_constants.h
@@ -152,9 +152,27 @@
DW_FORM_exprloc = 0x18,
DW_FORM_flag_present = 0x19,
// DWARF 5.
+ DW_FORM_strx = 0x1a,
+ DW_FORM_addrx = 0x1b,
+ DW_FORM_ref_sup4 = 0x1c,
+ DW_FORM_strp_sup = 0x1d,
+ DW_FORM_data16 = 0x1e,
DW_FORM_line_strp = 0x1f,
// DWARF 4.
DW_FORM_ref_sig8 = 0x20,
+ // DWARF 5.
+ DW_FORM_implicit_const = 0x21,
+ DW_FORM_loclistx = 0x22,
+ DW_FORM_rnglistx = 0x23,
+ DW_FORM_ref_sup8 = 0x24,
+ DW_FORM_strx1 = 0x25,
+ DW_FORM_strx2 = 0x26,
+ DW_FORM_strx3 = 0x27,
+ DW_FORM_strx4 = 0x28,
+ DW_FORM_addrx1 = 0x29,
+ DW_FORM_addrx2 = 0x2a,
+ DW_FORM_addrx3 = 0x2b,
+ DW_FORM_addrx4 = 0x2c,
// Extensions for Fission. See http://gcc.gnu.org/wiki/DebugFission.
DW_FORM_GNU_addr_index = 0x1f01,
DW_FORM_GNU_str_index = 0x1f02
@@ -261,6 +279,36 @@
DW_AT_const_expr = 0x6c,
DW_AT_enum_class = 0x6d,
DW_AT_linkage_name = 0x6e,
+ // DWARF 5 values.
+ DW_AT_string_length_bit_size = 0x6f,
+ DW_AT_string_length_byte_size = 0x70,
+ DW_AT_rank = 0x71,
+ DW_AT_str_offsets_base = 0x72,
+ DW_AT_addr_base = 0x73,
+ DW_AT_rnglists_base = 0x74,
+ DW_AT_dwo_name = 0x76,
+ DW_AT_reference = 0x77,
+ DW_AT_rvalue_reference = 0x78,
+ DW_AT_macros = 0x79,
+ DW_AT_call_all_calls = 0x7a,
+ DW_AT_call_all_source_calls = 0x7b,
+ DW_AT_call_all_tail_calls = 0x7c,
+ DW_AT_call_return_pc = 0x7d,
+ DW_AT_call_value = 0x7e,
+ DW_AT_call_origin = 0x7f,
+ DW_AT_call_parameter = 0x80,
+ DW_AT_call_pc = 0x81,
+ DW_AT_call_tail_call = 0x82,
+ DW_AT_call_target = 0x83,
+ DW_AT_call_target_clobbered = 0x84,
+ DW_AT_call_data_location = 0x85,
+ DW_AT_call_data_value = 0x86,
+ DW_AT_noreturn = 0x87,
+ DW_AT_alignment = 0x88,
+ DW_AT_export_symbols = 0x89,
+ DW_AT_deleted = 0x8a,
+ DW_AT_defaulted = 0x8b,
+ DW_AT_loclists_base = 0x8c,
// SGI/MIPS extensions.
DW_AT_MIPS_fde = 0x2001,
DW_AT_MIPS_loop_begin = 0x2002,
@@ -645,5 +693,16 @@
DW_EH_PE_omit = 0xff
};
+enum RangeListEntry {
+ DW_RLE_end_of_list = 0x00,
+ DW_RLE_base_addressx = 0x01,
+ DW_RLE_startx_endx = 0x02,
+ DW_RLE_startx_length = 0x03,
+ DW_RLE_offset_pair = 0x04,
+ DW_RLE_base_address = 0x05,
+ DW_RLE_start_end = 0x06,
+ DW_RLE_start_length = 0x07,
+};
+
} // namespace dwarf2reader
#endif // UTIL_DEBUGINFO_DWARF2ENUMS_H__
diff --git a/src/elf.cc b/src/elf.cc
index 4430a0c..9195261 100644
--- a/src/elf.cc
+++ b/src/elf.cc
@@ -21,7 +21,6 @@
#include "absl/strings/escaping.h"
#include "absl/strings/string_view.h"
#include "absl/strings/substitute.h"
-#include "re2/re2.h"
#include "third_party/freebsd_elf/elf.h"
#include "bloaty.h"
#include "link_map.h"
@@ -30,42 +29,12 @@
#include <limits.h>
#include <stdlib.h>
-// Not present in the FreeBSD ELF headers.
-#define NT_GNU_BUILD_ID 3
-
using absl::string_view;
-ABSL_ATTRIBUTE_NORETURN
-static void Throw(const char *str, int line) {
- throw bloaty::Error(str, __FILE__, line);
-}
-
-#define THROW(msg) Throw(msg, __LINE__)
-#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
-#define WARN(x) fprintf(stderr, "bloaty: %s\n", x);
-
namespace bloaty {
namespace {
-uint64_t CheckedAdd(uint64_t a, uint64_t b) {
- absl::uint128 a_128(a), b_128(b);
- absl::uint128 c_128 = a_128 + b_128;
- if (c_128 > UINT64_MAX) {
- THROW("integer overflow in addition");
- }
- return static_cast<uint64_t>(c_128);
-}
-
-uint64_t CheckedMul(uint64_t a, uint64_t b) {
- absl::uint128 a_128(a), b_128(b);
- absl::uint128 c = a * b;
- if (c > UINT64_MAX) {
- THROW("integer overflow in multiply");
- }
- return static_cast<uint64_t>(c);
-}
-
struct ByteSwapFunc {
template <class T>
T operator()(T val) {
@@ -87,38 +56,10 @@
}
template <class T>
-const T* GetStructPointer(string_view data) {
- if (sizeof(T) > data.size()) {
- THROW("Premature EOF reading ELF data.");
- }
- return reinterpret_cast<const T*>(data.data());
-}
-
-template <class T>
void AdvancePastStruct(string_view* data) {
*data = data->substr(sizeof(T));
}
-static string_view StrictSubstr(string_view data, size_t off, size_t n) {
- uint64_t end = CheckedAdd(off, n);
- if (end > data.size()) {
- THROW("ELF region out-of-bounds");
- }
- return data.substr(off, n);
-}
-
-static string_view StrictSubstr(string_view data, size_t off) {
- if (off > data.size()) {
- THROW("ELF region out-of-bounds");
- }
- return data.substr(off);
-}
-
-static size_t AlignUp(size_t offset, size_t granularity) {
- // Granularity must be a power of two.
- return (offset + granularity - 1) & ~(granularity - 1);
-}
-
// ElfFile /////////////////////////////////////////////////////////////////////
// For parsing the pieces we need out of an ELF file (.o, .so, and binaries).
@@ -230,6 +171,12 @@
bool is_64bit() const { return is_64bit_; }
bool is_native_endian() const { return is_native_endian_; }
+ template <class T32, class T64, class Munger>
+ void ReadStruct(absl::string_view contents, uint64_t offset, Munger munger,
+ absl::string_view* range, T64* out) const {
+ StructReader(*this, contents).Read<T32>(offset, munger, range, out);
+ }
+
private:
friend class Section;
@@ -265,24 +212,15 @@
T64* out) const;
template <class T>
- void Memcpy(uint64_t offset, absl::string_view* range, T* out) const {
- uint64_t end = CheckedAdd(offset, sizeof(T));
- if (end > data_.size()) {
- THROW("out-of-bounds read to ELF file");
- }
- if (range) {
- *range = absl::string_view(data_.data() + offset, sizeof(*out));
+ void Memcpy(uint64_t offset, absl::string_view* out_range, T* out) const {
+ absl::string_view range = StrictSubstr(data_, offset, sizeof(*out));
+ if (out_range) {
+ *out_range = range;
}
memcpy(out, data_.data() + offset, sizeof(*out));
}
};
- template <class T32, class T64, class Munger>
- void ReadStruct(absl::string_view contents, uint64_t offset, Munger munger,
- absl::string_view* range, T64* out) const {
- StructReader(*this, contents).Read<T32>(offset, munger, range, out);
- }
-
bool ok_;
bool is_64bit_;
bool is_native_endian_;
@@ -395,10 +333,20 @@
}
};
+struct ChdrMunger {
+ template <class From, class Func>
+ void operator()(const From& from, Elf64_Chdr* to, Func func) {
+ to->ch_type = func(from.ch_type);
+ to->ch_size = func(from.ch_size);
+ to->ch_addralign = func(from.ch_addralign);
+ }
+};
+
template <class T32, class T64, class Munger>
void ElfFile::StructReader::ReadFallback(uint64_t offset,
absl::string_view* range,
T64* out) const {
+ // Fallback for either 32-bit ELF file or non-native endian.
if (elf_.is_64bit()) {
assert(!elf_.is_native_endian());
Memcpy(offset, range, out);
@@ -525,10 +473,10 @@
switch (ident[EI_DATA]) {
case ELFDATA2LSB:
- is_native_endian_ = IsLittleEndian();
+ is_native_endian_ = GetMachineEndian() == Endian::kLittle;
break;
case ELFDATA2MSB:
- is_native_endian_ = !IsLittleEndian();
+ is_native_endian_ = GetMachineEndian() == Endian::kBig;
break;
default:
THROWF("unexpected ELF data: $0", ident[EI_DATA]);
@@ -663,6 +611,7 @@
private:
string_view Consume(size_t n) {
+ n = (n % 2 == 0 ? n : n + 1);
if (remaining_.size() < n) {
THROW("premature end of file");
}
@@ -754,7 +703,7 @@
template <class Func>
void ForEachElf(const InputFile& file, RangeSink* sink, Func func) {
ArFile ar_file(file.data());
- unsigned long index_base = 0;
+ uint64_t index_base = 0;
if (ar_file.IsOpen()) {
ArFile::MemberFile member;
@@ -801,12 +750,12 @@
//
// - 24 bits for index (up to 16M symbols with -ffunction-sections)
// - 40 bits for address (up to 1TB section)
-static uint64_t ToVMAddr(size_t addr, long ndx, bool is_object) {
+static uint64_t ToVMAddr(size_t addr, uint64_t ndx, bool is_object) {
if (is_object) {
if (ndx >= 1 << 24) {
THROW("ndx overflow: too many sections");
}
- if (addr >= 1UL << 40) {
+ if (addr >= 1ULL << 40) {
THROW("address overflow: section too big");
}
return (ndx << 40) | addr;
@@ -899,7 +848,7 @@
ForEachElf(
file, sink,
- [=](const ElfFile& elf, string_view /*filename*/, uint32_t index_base) {
+ [=](const ElfFile& elf, string_view /*filename*/, uint64_t index_base) {
for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
ElfFile::Section section;
elf.ReadSection(i, §ion);
@@ -1220,34 +1169,49 @@
// reader directly on them. At the moment we don't attempt to make these
// work with object files.
-static void ReadDWARFSections(const InputFile& file, dwarf::File* dwarf) {
+static void ReadDWARFSections(const InputFile &file, dwarf::File *dwarf,
+ RangeSink *sink) {
ElfFile elf(file.data());
assert(elf.IsOpen());
for (Elf64_Xword i = 1; i < elf.section_count(); i++) {
ElfFile::Section section;
elf.ReadSection(i, §ion);
string_view name = section.GetName();
+ string_view contents = section.contents();
+ uint64_t uncompressed_size = 0;
- if (name == ".debug_aranges") {
- dwarf->debug_aranges = section.contents();
- } else if (name == ".debug_str") {
- dwarf->debug_str = section.contents();
- } else if (name == ".debug_info") {
- dwarf->debug_info = section.contents();
- } else if (name == ".debug_types") {
- dwarf->debug_types = section.contents();
- } else if (name == ".debug_abbrev") {
- dwarf->debug_abbrev = section.contents();
- } else if (name == ".debug_line") {
- dwarf->debug_line = section.contents();
- } else if (name == ".debug_loc") {
- dwarf->debug_loc = section.contents();
- } else if (name == ".debug_pubnames") {
- dwarf->debug_pubnames = section.contents();
- } else if (name == ".debug_pubtypes") {
- dwarf->debug_pubtypes = section.contents();
- } else if (name == ".debug_ranges") {
- dwarf->debug_ranges = section.contents();
+ if (section.header().sh_flags & SHF_COMPRESSED) {
+ // Standard ELF section compression, produced when you link with
+ // --compress-debug-sections=zlib-gabi
+ Elf64_Chdr chdr;
+ absl::string_view range;
+ elf.ReadStruct<Elf32_Chdr>(contents, 0, ChdrMunger(), &range, &chdr);
+ if (chdr.ch_type != ELFCOMPRESS_ZLIB) {
+ // Unknown compression format.
+ continue;
+ }
+ uncompressed_size = chdr.ch_size;
+ contents.remove_prefix(range.size());
+ }
+
+ if (name.find(".debug_") == 0) {
+ name.remove_prefix(string_view(".debug_").size());
+ } else if (name.find(".zdebug_") == 0) {
+ // GNU format compressed debug info, produced when you link with
+ // --compress-debug-sections=zlib-gnu
+ name.remove_prefix(string_view(".zdebug_").size());
+ if (ReadBytes(4, &contents) != "ZLIB") {
+ continue; // Bad compression header.
+ }
+ uncompressed_size = ReadBigEndian<uint64_t>(&contents);
+ }
+
+ if (string_view* member = dwarf->GetFieldByName(name)) {
+ if (uncompressed_size) {
+ *member = sink->ZlibDecompress(contents, uncompressed_size);
+ } else {
+ *member = section.contents();
+ }
}
}
}
@@ -1467,12 +1431,12 @@
RangeSink symbol_sink(&debug_file().file_data(),
sink->options(),
DataSource::kRawSymbols,
- &sinks[0]->MapAtIndex(0));
+ &sinks[0]->MapAtIndex(0), nullptr);
symbol_sink.AddOutput(&symbol_map, &empty_munger);
ReadELFSymbols(debug_file().file_data(), &symbol_sink, &symtab,
false);
dwarf::File dwarf;
- ReadDWARFSections(debug_file().file_data(), &dwarf);
+ ReadDWARFSections(debug_file().file_data(), &dwarf, sink);
ReadDWARFCompileUnits(dwarf, symtab, symbol_map, sink);
ReadLinkMapCompileUnits(sink);
break;
@@ -1480,7 +1444,7 @@
case DataSource::kInlines: {
CheckNotObject("lineinfo", sink);
dwarf::File dwarf;
- ReadDWARFSections(debug_file().file_data(), &dwarf);
+ ReadDWARFSections(debug_file().file_data(), &dwarf, sink);
ReadDWARFInlines(dwarf, sink, true);
DoReadELFSections(sink, kReportByEscapedSectionName);
break;
@@ -1519,7 +1483,7 @@
DualMap base_map;
NameMunger empty_munger;
RangeSink base_sink(&file_data(), bloaty::Options(), DataSource::kSegments,
- nullptr);
+ nullptr, nullptr);
base_sink.AddOutput(&base_map, &empty_munger);
std::vector<RangeSink*> sink_ptrs{&base_sink};
ProcessFile(sink_ptrs);
@@ -1527,7 +1491,7 @@
// Could optimize this not to build the whole table if necessary.
SymbolTable symbol_table;
RangeSink symbol_sink(&file_data(), bloaty::Options(), symbol_source,
- &base_map);
+ &base_map, nullptr);
symbol_sink.AddOutput(&info->symbol_map, &empty_munger);
ReadELFSymbols(debug_file().file_data(), &symbol_sink, &symbol_table,
false);
diff --git a/src/macho.cc b/src/macho.cc
index 0ff52da..2c25c5b 100644
--- a/src/macho.cc
+++ b/src/macho.cc
@@ -15,7 +15,7 @@
#include <iostream>
#include "string.h"
#include "bloaty.h"
-#include "re2/re2.h"
+#include "util.h"
#include <cassert>
@@ -27,17 +27,8 @@
#include "third_party/darwin_xnu_macho/mach-o/nlist.h"
#include "third_party/darwin_xnu_macho/mach-o/reloc.h"
-ABSL_ATTRIBUTE_NORETURN
-static void Throw(const char *str, int line) {
- throw bloaty::Error(str, __FILE__, line);
-}
-
using absl::string_view;
-#define THROW(msg) Throw(msg, __LINE__)
-#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
-#define WARN(x) fprintf(stderr, "bloaty: %s\n", x);
-
namespace bloaty {
namespace macho {
@@ -48,23 +39,6 @@
return string_view(s, strnlen(s, maxlen));
}
-static uint64_t CheckedAdd(uint64_t a, uint64_t b) {
- absl::uint128 a_128(a), b_128(b);
- absl::uint128 c_128 = a_128 + b_128;
- if (c_128 > absl::uint128(UINT64_MAX)) {
- THROW("integer overflow in addition");
- }
- return static_cast<uint64_t>(c_128);
-}
-
-static string_view StrictSubstr(string_view data, size_t off, size_t n) {
- uint64_t end = CheckedAdd(off, n);
- if (end > data.size()) {
- THROW("Mach-O region out-of-bounds");
- }
- return data.substr(off, n);
-}
-
uint32_t ReadMagic(string_view data) {
if (data.size() < sizeof(uint32_t)) {
THROW("Malformed Mach-O file");
@@ -83,33 +57,9 @@
}
template <class T>
-void AdvancePastStruct(string_view* data) {
- *data = data->substr(sizeof(T));
-}
-
-string_view ReadNullTerminated(string_view data, size_t offset) {
- if (offset >= data.size()) {
- THROW("Invalid Mach-O string table offset.");
- }
-
- data = data.substr(offset);
-
- const char* nullz =
- static_cast<const char*>(memchr(data.data(), '\0', data.size()));
-
- // Return false if not NULL-terminated.
- if (nullz == NULL) {
- THROW("Mach-O string was not NULL-terminated");
- }
-
- size_t len = nullz - data.data();
- return data.substr(0, len);
-}
-
-template <class T>
const T* GetStructPointerAndAdvance(string_view* data) {
const T* ret = GetStructPointer<T>(*data);
- AdvancePastStruct<T>(data);
+ *data = data->substr(sizeof(T));
return ret;
}
@@ -283,17 +233,28 @@
template <class Segment, class Section>
void ParseSegment(LoadCommand cmd, RangeSink* sink) {
auto segment = GetStructPointerAndAdvance<Segment>(&cmd.command_data);
-
- if (segment->maxprot == VM_PROT_NONE) {
- return;
- }
-
string_view segname = ArrayToStr(segment->segname, 16);
+ // For unknown reasons, some load commands will have maxprot = NONE
+ // indicating they are not accessible, but will also contain a vmaddr
+ // and vmsize. In practice the vmaddr/vmsize of a section sometimes
+ // fall within the segment, but sometimes exceed it, leading to an
+ // error about exceeding the base map.
+ //
+ // Since such segments should not be mapped, we simply ignore the
+ // vmaddr/vmsize of such segments.
+ bool unmapped = segment->maxprot == VM_PROT_NONE;
+
if (sink->data_source() == DataSource::kSegments) {
- sink->AddRange(
- "macho_segment", segname, segment->vmaddr, segment->vmsize,
- StrictSubstr(cmd.file_data, segment->fileoff, segment->filesize));
+ if (unmapped) {
+ sink->AddFileRange(
+ "macho_segment", segname,
+ StrictSubstr(cmd.file_data, segment->fileoff, segment->filesize));
+ } else {
+ sink->AddRange(
+ "macho_segment", segname, segment->vmaddr, segment->vmsize,
+ StrictSubstr(cmd.file_data, segment->fileoff, segment->filesize));
+ }
} else if (sink->data_source() == DataSource::kSections) {
uint32_t nsects = segment->nsects;
for (uint32_t j = 0; j < nsects; j++) {
@@ -313,8 +274,14 @@
std::string label = absl::StrJoin(
std::make_tuple(segname, ArrayToStr(section->sectname, 16)), ",");
- sink->AddRange("macho_section", label, section->addr, section->size,
- StrictSubstr(cmd.file_data, section->offset, filesize));
+ if (unmapped) {
+ sink->AddFileRange(
+ "macho_section", label,
+ StrictSubstr(cmd.file_data, section->offset, filesize));
+ } else {
+ sink->AddRange("macho_section", label, section->addr, section->size,
+ StrictSubstr(cmd.file_data, section->offset, filesize));
+ }
}
} else {
BLOATY_UNREACHABLE();
@@ -451,7 +418,8 @@
continue;
}
- string_view name = ReadNullTerminated(strtab, sym->n_un.n_strx);
+ string_view name_region = StrictSubstr(strtab, sym->n_un.n_strx);
+ string_view name = ReadNullTerminated(&name_region);
if (sink->data_source() >= DataSource::kSymbols) {
sink->AddVMRange("macho_symbols", sym->n_value, RangeSink::kUnknownSize,
@@ -508,13 +476,9 @@
}
template <class Segment, class Section>
-void ReadDebugSectionsFromSegment(LoadCommand cmd, dwarf::File* dwarf) {
+void ReadDebugSectionsFromSegment(LoadCommand cmd, dwarf::File *dwarf,
+ RangeSink *sink) {
auto segment = GetStructPointerAndAdvance<Segment>(&cmd.command_data);
-
- if (segment->maxprot == VM_PROT_NONE) {
- return;
- }
-
string_view segname = ArrayToStr(segment->segname, 16);
if (segname != "__DWARF") {
@@ -541,42 +505,36 @@
string_view contents =
StrictSubstr(cmd.file_data, section->offset, filesize);
- if (sectname == "__debug_aranges") {
- dwarf->debug_aranges = contents;
- } else if (sectname == "__debug_str") {
- dwarf->debug_str = contents;
- } else if (sectname == "__debug_info") {
- dwarf->debug_info = contents;
- } else if (sectname == "__debug_types") {
- dwarf->debug_types = contents;
- } else if (sectname == "__debug_abbrev") {
- dwarf->debug_abbrev = contents;
- } else if (sectname == "__debug_line") {
- dwarf->debug_line = contents;
- } else if (sectname == "__debug_loc") {
- dwarf->debug_loc = contents;
- } else if (sectname == "__debug_pubnames") {
- dwarf->debug_pubnames = contents;
- } else if (sectname == "__debug_pubtypes") {
- dwarf->debug_pubtypes = contents;
- } else if (sectname == "__debug_ranges") {
- dwarf->debug_ranges = contents;
+ if (sectname.find("__debug_") == 0) {
+ sectname.remove_prefix(string_view("__debug_").size());
+ dwarf->SetFieldByName(sectname, contents);
+ } else if (sectname.find("__zdebug_") == 0) {
+ sectname.remove_prefix(string_view("__zdebug_").size());
+ string_view *member = dwarf->GetFieldByName(sectname);
+ if (!member || ReadBytes(4, &contents) != "ZLIB") {
+ continue;
+ }
+ auto uncompressed_size = ReadBigEndian<uint64_t>(&contents);
+ *member = sink->ZlibDecompress(contents, uncompressed_size);
}
}
}
-static void ReadDebugSectionsFromMachO(const InputFile& file, dwarf::File* dwarf) {
- ForEachLoadCommand(file.data(), nullptr, [dwarf](const LoadCommand& cmd) {
- switch (cmd.cmd) {
- case LC_SEGMENT_64:
- ReadDebugSectionsFromSegment<segment_command_64, section_64>(cmd,
- dwarf);
- break;
- case LC_SEGMENT:
- ReadDebugSectionsFromSegment<segment_command, section>(cmd, dwarf);
- break;
- }
- });
+static void ReadDebugSectionsFromMachO(const InputFile &file,
+ dwarf::File *dwarf, RangeSink *sink) {
+ ForEachLoadCommand(
+ file.data(), nullptr, [dwarf, sink](const LoadCommand &cmd) {
+ switch (cmd.cmd) {
+ case LC_SEGMENT_64:
+ ReadDebugSectionsFromSegment<segment_command_64, section_64>(
+ cmd, dwarf, sink);
+ break;
+ case LC_SEGMENT:
+ ReadDebugSectionsFromSegment<segment_command, section>(cmd, dwarf,
+ sink);
+ break;
+ }
+ });
}
class MachOObjectFile : public ObjectFile {
@@ -619,14 +577,13 @@
SymbolTable symtab;
DualMap symbol_map;
NameMunger empty_munger;
- RangeSink symbol_sink(&debug_file().file_data(),
- sink->options(),
+ RangeSink symbol_sink(&debug_file().file_data(), sink->options(),
DataSource::kRawSymbols,
- &sinks[0]->MapAtIndex(0));
+ &sinks[0]->MapAtIndex(0), nullptr);
symbol_sink.AddOutput(&symbol_map, &empty_munger);
ParseSymbols(debug_file().file_data().data(), &symtab, &symbol_sink);
dwarf::File dwarf;
- ReadDebugSectionsFromMachO(debug_file().file_data(), &dwarf);
+ ReadDebugSectionsFromMachO(debug_file().file_data(), &dwarf, sink);
ReadDWARFCompileUnits(dwarf, symtab, symbol_map, sink);
ParseSymbols(sink->input_file().data(), nullptr, sink);
break;
diff --git a/src/pe.cc b/src/pe.cc
new file mode 100644
index 0000000..d5e01d3
--- /dev/null
+++ b/src/pe.cc
@@ -0,0 +1,280 @@
+// Copyright 2021 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "bloaty.h"
+#include "absl/strings/substitute.h"
+#include "util.h"
+
+using absl::string_view;
+
+namespace bloaty {
+namespace pe {
+const uint16_t dos_magic = 0x5A4D; // MZ
+
+//! Sizes in bytes of various things in the COFF format.
+namespace STRUCT_SIZES {
+enum {
+ Header16Size = 20,
+ Header32Size = 56,
+ NameSize = 8,
+ Symbol16Size = 18,
+ Symbol32Size = 20,
+ SectionSize = 40,
+ RelocationSize = 10,
+ BaseRelocationBlockSize = 8,
+ ImportDirectoryTableEntrySize = 20,
+ ResourceDirectoryTableSize = 16,
+ ResourceDirectoryEntriesSize = 8,
+ ResourceDataEntrySize = 16
+};
+}
+
+#include "third_party/lief_pe/pe_structures.h"
+
+static_assert(STRUCT_SIZES::SectionSize == sizeof(pe_section), "Compiler options broke LIEF struct layout");
+static_assert(STRUCT_SIZES::RelocationSize == sizeof(pe_relocation), "Compiler options broke LIEF struct layout");
+static_assert(STRUCT_SIZES::BaseRelocationBlockSize ==
+ sizeof(pe_base_relocation_block),
+ "Compiler options broke LIEF struct layout");
+static_assert(STRUCT_SIZES::ImportDirectoryTableEntrySize == sizeof(pe_import),
+ "Compiler options broke LIEF struct layout");
+static_assert(STRUCT_SIZES::ResourceDirectoryTableSize ==
+ sizeof(pe_resource_directory_table),
+ "Compiler options broke LIEF struct layout");
+static_assert(STRUCT_SIZES::ResourceDirectoryEntriesSize ==
+ sizeof(pe_resource_directory_entries),
+ "Compiler options broke LIEF struct layout");
+static_assert(STRUCT_SIZES::ResourceDataEntrySize ==
+ sizeof(pe_resource_data_entry),
+ "Compiler options broke LIEF struct layout");
+
+class PeFile {
+ public:
+ PeFile(string_view data) : data_(data) { ok_ = Initialize(); }
+
+ bool IsOpen() const { return ok_; }
+
+ string_view header_region() const { return header_region_; }
+
+ uint32_t section_count() const { return section_count_; }
+ string_view section_headers() const { return section_headers_; }
+ string_view section_header(size_t n) const {
+ return StrictSubstr(section_headers_, n * sizeof(pe_section),
+ sizeof(pe_section));
+ }
+
+ private:
+ bool Initialize();
+
+ string_view GetRegion(uint64_t start, uint64_t n) const {
+ return StrictSubstr(data_, start, n);
+ }
+
+ bool ok_;
+ bool is_64bit_;
+ string_view data_;
+
+ pe_dos_header dos_header_;
+ pe_header pe_header_;
+ string_view header_region_;
+ uint32_t section_count_;
+ string_view section_headers_;
+};
+
+bool PeFile::Initialize() {
+ if (data_.size() < sizeof(dos_header_)) {
+ return false;
+ }
+
+ memcpy(&dos_header_, data_.data(), sizeof(dos_header_));
+
+ if (dos_header_.Magic != dos_magic) {
+ // Not a PE file.
+ return false;
+ }
+
+ if ((dos_header_.AddressOfNewExeHeader + sizeof(pe_header)) > data_.size()) {
+ // Cannot fit the headers
+ return false;
+ }
+
+ memcpy(&pe_header_, data_.data() + dos_header_.AddressOfNewExeHeader,
+ sizeof(pe_header_));
+
+ if (!std::equal(pe_header_.signature, pe_header_.signature + sizeof(PE_Magic),
+ std::begin(PE_Magic))) {
+ // Not a PE file.
+ return false;
+ }
+
+ // TODO(mj): Parse PE header further to determine this
+ is_64bit_ = false;
+
+ section_count_ = pe_header_.NumberOfSections;
+
+ const uint32_t sections_offset = dos_header_.AddressOfNewExeHeader +
+ sizeof(pe_header) +
+ pe_header_.SizeOfOptionalHeader;
+
+ auto sections_size = CheckedMul(section_count_, sizeof(pe_section));
+ if ((sections_offset + sections_size) > data_.size()) {
+ // Cannot fit the headers
+ return false;
+ }
+
+ header_region_ = GetRegion(0, sections_offset);
+ section_headers_ = GetRegion(sections_offset, sections_size);
+
+ return true;
+}
+
+class Section {
+ public:
+ std::string name;
+ string_view data;
+
+ uint32_t raw_offset() const { return header_.PointerToRawData; }
+ uint32_t raw_size() const { return header_.SizeOfRawData; }
+
+ uint32_t virtual_addr() const { return header_.VirtualAddress; }
+ uint32_t virtual_size() const { return header_.VirtualSize; }
+
+ Section(string_view header_data) {
+ assert(header_data.size() == sizeof(header_));
+ memcpy(&header_, header_data.data(), sizeof(header_));
+ data = header_data;
+
+ // TODO(mj): Handle long section names:
+ // For longer names, this member contains a forward slash (/) followed by an
+ // ASCII representation of a decimal number that is an offset into the
+ // string table.
+ name = std::string(header_.Name,
+ strnlen(header_.Name, STRUCT_SIZES::NameSize));
+ }
+
+ private:
+ pe_section header_;
+};
+
+template <class Func>
+void ForEachSection(const PeFile& pe, Func&& section_func) {
+ for (auto n = 0; n < pe.section_count(); ++n) {
+ Section section(pe.section_header(n));
+ section_func(section);
+ }
+}
+
+void ParseSections(const PeFile& pe, RangeSink* sink) {
+ assert(pe.IsOpen());
+ ForEachSection(pe, [sink](const Section& section) {
+ uint64_t vmaddr = section.virtual_addr();
+ uint64_t vmsize = section.virtual_size();
+
+ uint64_t fileoff = section.raw_offset();
+ uint64_t filesize = section.raw_size();
+
+ sink->AddRange("pe_sections", section.name, vmaddr, vmsize, fileoff,
+ filesize);
+ });
+}
+
+void AddCatchAll(const PeFile& pe, RangeSink* sink) {
+ // The last-line fallback to make sure we cover the entire VM space.
+ assert(pe.IsOpen());
+
+ auto begin = pe.header_region().data() - sink->input_file().data().data();
+ sink->AddRange("pe_catchall", "[PE Headers]", begin,
+ pe.header_region().size(), pe.header_region());
+ begin = pe.section_headers().data() - sink->input_file().data().data();
+ sink->AddRange("pe_catchall", "[PE Headers]", begin,
+ pe.section_headers().size(), pe.section_headers());
+
+ // The last-line fallback to make sure we cover the entire file.
+ sink->AddFileRange("pe_catchall", "[Unmapped]", sink->input_file().data());
+}
+
+class PEObjectFile : public ObjectFile {
+ public:
+ PEObjectFile(std::unique_ptr<InputFile> file_data,
+ std::unique_ptr<pe::PeFile> pe)
+ : ObjectFile(std::move(file_data)), pe_file(std::move(pe)) {}
+
+ std::string GetBuildId() const override {
+ // TODO(mj): Read from pe_pdb_??
+ return std::string();
+ }
+
+ void ProcessFile(const std::vector<RangeSink*>& sinks) const override {
+ for (auto sink : sinks) {
+ switch (sink->data_source()) {
+ case DataSource::kSegments:
+ // TODO(mj): sections: list out imports and other stuff!
+ case DataSource::kSections:
+ ParseSections(*pe_file, sink);
+ break;
+ case DataSource::kSymbols:
+ case DataSource::kRawSymbols:
+ case DataSource::kShortSymbols:
+ case DataSource::kFullSymbols:
+ // TODO(mj): Generate symbols from debug info, exports and other known
+ // structures
+ case DataSource::kArchiveMembers:
+ case DataSource::kCompileUnits:
+ case DataSource::kInlines:
+ default:
+ THROW("PE doesn't support this data source");
+ }
+ AddCatchAll(*pe_file, sink);
+ }
+ }
+
+ bool GetDisassemblyInfo(absl::string_view /*symbol*/,
+ DataSource /*symbol_source*/,
+ DisassemblyInfo* /*info*/) const override {
+ WARN("PE files do not support disassembly yet");
+ return false;
+ }
+
+ protected:
+ std::unique_ptr<pe::PeFile> pe_file;
+};
+
+bool ReadMagic(const string_view& data) {
+ // If the size is smaller than a dos header, it cannot be a PE file, right?
+ if (data.size() < sizeof(pe_dos_header)) {
+ return false;
+ }
+
+ uint16_t magic;
+ memcpy(&magic, data.data(), sizeof(magic));
+
+ return magic == dos_magic;
+}
+} // namespace pe
+
+std::unique_ptr<ObjectFile> TryOpenPEFile(std::unique_ptr<InputFile>& file) {
+ // Do not bother creating an object if the first magic is not even there
+ if (pe::ReadMagic(file->data())) {
+ std::unique_ptr<pe::PeFile> pe(new pe::PeFile(file->data()));
+
+ if (pe->IsOpen()) {
+ return std::unique_ptr<ObjectFile>(
+ new pe::PEObjectFile(std::move(file), std::move(pe)));
+ }
+ }
+
+ return nullptr;
+}
+
+} // namespace bloaty
diff --git a/src/range_map.cc b/src/range_map.cc
index 2886bd7..08e1f18 100644
--- a/src/range_map.cc
+++ b/src/range_map.cc
@@ -293,7 +293,8 @@
auto it = prev;
while (it != mappings_.end()) {
if (prev->first + prev->second.size == it->first &&
- prev->second.label == it->second.label) {
+ (prev->second.label == it->second.label ||
+ (!prev->second.HasFallbackLabel() && it->second.IsShortFallback()))) {
prev->second.size += it->second.size;
mappings_.erase(it++);
} else {
diff --git a/src/range_map.h b/src/range_map.h
index 7be627a..9c13bbc 100644
--- a/src/range_map.h
+++ b/src/range_map.h
@@ -165,6 +165,12 @@
uint64_t other_start; // kNoTranslation if there is no mapping.
bool HasTranslation() const { return other_start != kNoTranslation; }
+ bool HasFallbackLabel() const { return !label.empty() && label[0] == '['; }
+
+ // We assume that short regions that were unattributed (have fallback
+ // labels) are actually padding. We could probably make this heuristic
+ // a bit more robust.
+ bool IsShortFallback() const { return size <= 16 && HasFallbackLabel(); }
};
typedef std::map<uint64_t, Entry> Map;
@@ -380,7 +386,6 @@
}
}
-
} // namespace bloaty
#endif // BLOATY_RANGE_MAP_H_
diff --git a/src/re.h b/src/re.h
new file mode 100644
index 0000000..d990767
--- /dev/null
+++ b/src/re.h
@@ -0,0 +1,92 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BLOATY_RE_H_
+#define BLOATY_RE_H_
+
+#include <string>
+
+#ifdef USE_RE2
+#include "re2/re2.h"
+#endif
+
+#include "absl/base/attributes.h"
+#include "bloaty.h"
+
+namespace bloaty {
+
+#ifdef USE_RE2
+class ReImpl {
+ public:
+ ReImpl(const char* pattern) : re2_(pattern){};
+ ReImpl(const std::string& pattern) : re2_(pattern){};
+ bool ok() { return re2_.ok(); }
+
+ static bool Extract(std::string text, const ReImpl& re, std::string rewrite,
+ std::string* out) {
+ return RE2::Extract(text, re.re2_, rewrite, out);
+ }
+ template <typename... A>
+ static bool PartialMatch(const std::string& text, const ReImpl& re,
+ A&&... a) {
+ return RE2::PartialMatch(text, re.re2_, a...);
+ }
+
+ static int GlobalReplace(std::string* str, const ReImpl& re,
+ std::string rewrite) {
+ return RE2::GlobalReplace(str, re.re2_, rewrite);
+ }
+ static bool Replace(std::string* str, const ReImpl& re, std::string rewrite) {
+ return RE2::Replace(str, re.re2_, rewrite);
+ }
+
+ private:
+ RE2 re2_;
+};
+#else
+}
+
+ABSL_ATTRIBUTE_NORETURN
+static void _abort() { throw "No support for regular expressions"; }
+
+namespace bloaty {
+class ReImpl {
+ public:
+ ReImpl(const char*) { _abort(); }
+ ReImpl(const std::string&) { _abort(); }
+ bool ok() { _abort(); }
+
+ ABSL_ATTRIBUTE_NORETURN
+ static bool Extract(std::string, const ReImpl&, std::string, std::string*) {
+ _abort();
+ }
+ template <typename... A>
+ ABSL_ATTRIBUTE_NORETURN static bool PartialMatch(const std::string&,
+ const ReImpl&, A&&...) {
+ _abort();
+ }
+ ABSL_ATTRIBUTE_NORETURN
+ static int GlobalReplace(std::string*, const ReImpl&, std::string) {
+ _abort();
+ }
+ ABSL_ATTRIBUTE_NORETURN
+ static bool Replace(std::string*, const ReImpl&, std::string) { _abort(); }
+
+ private:
+};
+#endif
+
+} // namespace bloaty
+
+#endif // BLOATY_RE_H_
diff --git a/src/util.cc b/src/util.cc
new file mode 100644
index 0000000..b85378b
--- /dev/null
+++ b/src/util.cc
@@ -0,0 +1,41 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "util.h"
+
+using absl::string_view;
+
+namespace bloaty {
+
+ABSL_ATTRIBUTE_NORETURN
+void Throw(const char *str, int line) {
+ throw bloaty::Error(str, __FILE__, line);
+}
+
+absl::string_view ReadNullTerminated(absl::string_view* data) {
+ const char* nullz =
+ static_cast<const char*>(memchr(data->data(), '\0', data->size()));
+
+ // Return false if not NULL-terminated.
+ if (nullz == NULL) {
+ THROW("DWARF string was not NULL-terminated");
+ }
+
+ size_t len = nullz - data->data();
+ absl::string_view val = data->substr(0, len);
+ data->remove_prefix(len + 1); // Remove NULL also.
+ return val;
+}
+
+} // namespace bloaty
diff --git a/src/util.h b/src/util.h
new file mode 100644
index 0000000..1aad13f
--- /dev/null
+++ b/src/util.h
@@ -0,0 +1,179 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BLOATY_UTIL_H_
+#define BLOATY_UTIL_H_
+
+#include <stdexcept>
+
+#include "absl/numeric/int128.h"
+#include "absl/strings/string_view.h"
+
+namespace bloaty {
+
+class Error : public std::runtime_error {
+ public:
+ Error(const char* msg, const char* file, int line)
+ : std::runtime_error(msg), file_(file), line_(line) {}
+
+ // TODO(haberman): add these to Bloaty's error message when verbose is
+ // enabled.
+ const char* file() const { return file_; }
+ int line() const { return line_; }
+
+ private:
+ const char* file_;
+ int line_;
+};
+
+// Throwing emits a lot of code, so we do it out-of-line.
+ABSL_ATTRIBUTE_NORETURN
+void Throw(const char *str, int line);
+
+#define THROW(msg) Throw(msg, __LINE__)
+#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
+#define WARN(...) \
+ if (verbose_level > 0) { \
+ printf("WARNING: %s\n", absl::Substitute(__VA_ARGS__).c_str()); \
+ }
+
+#if !defined(_MSC_VER)
+#define BLOATY_UNREACHABLE() do { \
+ assert(false); \
+ __builtin_unreachable(); \
+} while (0)
+#else
+#define BLOATY_UNREACHABLE() do { \
+ assert(false); \
+ __assume(0); \
+} while (0)
+#endif
+
+#ifdef NDEBUG
+// Prevent "unused variable" warnings.
+#define BLOATY_ASSERT(expr) do {} while (false && (expr))
+#else
+#define BLOATY_ASSERT(expr) assert(expr)
+#endif
+
+inline uint64_t CheckedAdd(uint64_t a, uint64_t b) {
+ absl::uint128 a_128(a), b_128(b);
+ absl::uint128 c_128 = a_128 + b_128;
+ if (c_128 > UINT64_MAX) {
+ THROW("integer overflow in addition");
+ }
+ return static_cast<uint64_t>(c_128);
+}
+
+inline uint64_t CheckedMul(uint64_t a, uint64_t b) {
+ absl::uint128 a_128(a), b_128(b);
+ absl::uint128 c = a_128 * b_128;
+ if (c > UINT64_MAX) {
+ THROW("integer overflow in multiply");
+ }
+ return static_cast<uint64_t>(c);
+}
+
+inline absl::string_view StrictSubstr(absl::string_view data, size_t off,
+ size_t n) {
+ uint64_t end = CheckedAdd(off, n);
+ if (end > data.size()) {
+ THROW("region out-of-bounds");
+ }
+ return data.substr(off, n);
+}
+
+inline absl::string_view StrictSubstr(absl::string_view data, size_t off) {
+ if (off > data.size()) {
+ THROW("region out-of-bounds");
+ }
+ return data.substr(off);
+}
+
+inline size_t AlignUp(size_t offset, size_t granularity) {
+ // Granularity must be a power of two.
+ BLOATY_ASSERT((granularity & (granularity - 1)) == 0);
+ return (offset + granularity - 1) & ~(granularity - 1);
+}
+
+// Endianness utilities ////////////////////////////////////////////////////////
+
+enum class Endian { kBig, kLittle };
+
+inline Endian GetMachineEndian() {
+ int x = 1;
+ return *(char *)&x == 1 ? Endian::kLittle : Endian::kBig;
+}
+
+// Generic algorithm for byte-swapping an integer of arbitrary size.
+//
+// With modern GCC/Clang this optimizes to a "bswap" instruction.
+template <size_t N, class T> constexpr T _BS(T val) {
+ if constexpr (N == 1) {
+ return val & 0xff;
+ } else {
+ size_t bits = 8 * (N / 2);
+ return (_BS<N / 2>(val) << bits) | _BS<N / 2>(val >> bits);
+ }
+};
+
+// Byte swaps the given integer, and returns the byte-swapped value.
+template <class T> constexpr T ByteSwap(T val) {
+ return _BS<sizeof(T)>(val);
+}
+
+template <class T, size_t N = sizeof(T)> T ReadFixed(absl::string_view *data) {
+ static_assert(N <= sizeof(N), "N too big for this data type");
+ T val = 0;
+ if (data->size() < N) {
+ THROW("premature EOF reading fixed-length data");
+ }
+ memcpy(&val, data->data(), N);
+ data->remove_prefix(N);
+ return val;
+}
+
+template <class T> T ReadEndian(absl::string_view *data, Endian endian) {
+ T val = ReadFixed<T>(data);
+ return endian == GetMachineEndian() ? val : ByteSwap(val);
+}
+
+template <class T> T ReadLittleEndian(absl::string_view *data) {
+ return ReadEndian<T>(data, Endian::kLittle);
+}
+
+template <class T> T ReadBigEndian(absl::string_view *data) {
+ return ReadEndian<T>(data, Endian::kBig);
+}
+
+// General data reading ///////////////////////////////////////////////////////
+
+absl::string_view ReadNullTerminated(absl::string_view* data);
+
+inline absl::string_view ReadBytes(size_t bytes, absl::string_view* data) {
+ if (data->size() < bytes) {
+ THROW("premature EOF reading variable-length DWARF data");
+ }
+ absl::string_view ret = data->substr(0, bytes);
+ data->remove_prefix(bytes);
+ return ret;
+}
+
+inline void SkipBytes(size_t bytes, absl::string_view* data) {
+ ReadBytes(bytes, data); // Discard result.
+}
+
+} // namespace bloaty
+
+#endif // BLOATY_UTIL_H_
diff --git a/src/webassembly.cc b/src/webassembly.cc
index 044fffa..c2c6e9a 100644
--- a/src/webassembly.cc
+++ b/src/webassembly.cc
@@ -13,34 +13,15 @@
// limitations under the License.
#include "bloaty.h"
+#include "util.h"
#include "absl/strings/substitute.h"
-ABSL_ATTRIBUTE_NORETURN
-static void Throw(const char *str, int line) {
- throw bloaty::Error(str, __FILE__, line);
-}
-
-#define THROW(msg) Throw(msg, __LINE__)
-#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
-#define WARN(x) fprintf(stderr, "bloaty: %s\n", x);
-
using absl::string_view;
namespace bloaty {
namespace wasm {
-template <class T>
-T ReadMemcpy(string_view* data) {
- T ret;
- if (data->size() < sizeof(T)) {
- THROW("premature EOF reading fixed-length wasm data");
- }
- memcpy(&ret, data->data(), sizeof(T));
- data->remove_prefix(sizeof(T));
- return ret;
-}
-
uint64_t ReadLEB128Internal(bool is_signed, size_t size, string_view* data) {
uint64_t ret = 0;
int shift = 0;
@@ -91,14 +72,14 @@
bool ReadMagic(string_view* data) {
const uint32_t wasm_magic = 0x6d736100;
- uint32_t magic = ReadMemcpy<uint32_t>(data);
+ auto magic = ReadFixed<uint32_t>(data);
if (magic != wasm_magic) {
return false;
}
// TODO(haberman): do we need to fail if this is >1?
- uint32_t version = ReadMemcpy<uint32_t>(data);
+ auto version = ReadFixed<uint32_t>(data);
(void)version;
return true;
@@ -272,7 +253,7 @@
ReadPiece(module_len, &data);
uint32_t field_len = ReadVarUInt32(&data);
ReadPiece(field_len, &data);
- auto kind = ReadMemcpy<uint8_t>(&data);
+ auto kind = ReadFixed<uint8_t>(&data);
switch (kind) {
case ExternalKind::kFunction:
diff --git a/src/write_bloaty_report.cc b/src/write_bloaty_report.cc
index 298d3cb..a22fbad 100644
--- a/src/write_bloaty_report.cc
+++ b/src/write_bloaty_report.cc
@@ -23,14 +23,6 @@
#include "bloaty.h"
#include "report.pb.h"
-ABSL_ATTRIBUTE_NORETURN
-static void Throw(const char *str, int line) {
- throw bloaty::Error(str, __FILE__, line);
-}
-
-#define THROW(msg) Throw(msg, __LINE__)
-#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
-
namespace bloaty {
void RollupOutput::PrintToProtobuf(std::ostream* out) const {
diff --git a/tests/bloaty_report_test.cc b/tests/bloaty_report_test.cc
index 355991a..d9b44e1 100644
--- a/tests/bloaty_report_test.cc
+++ b/tests/bloaty_report_test.cc
@@ -113,7 +113,7 @@
// This test is only run on x86_64.
// The test binary `05-binary.bin` is around 14 KiB.
// We would expect to see some compile units.
- ASSERT_EQ(report.compile_units().size(), 13);
+ EXPECT_NEAR(13, report.compile_units().size(), 1);
EXPECT_NEAR(14.1, static_cast<double>(size) / 1024.0, .1);
EXPECT_NEAR(14.1, report.file_total() / 1024.0, .1);
diff --git a/tests/bloaty_test.cc b/tests/bloaty_test.cc
index c0d7340..2783ac8 100644
--- a/tests/bloaty_test.cc
+++ b/tests/bloaty_test.cc
@@ -187,12 +187,9 @@
RunBloaty({"bloaty", "-d", "symbols", "-n", "50", file});
AssertChildren(*top_row_, {
std::make_tuple("bar_x", 4000, 4000),
- std::make_tuple("foo_x", 4000, 0),
+ std::make_tuple("foo_x", 4000, kUnknown),
std::make_tuple("bar_func", kUnknown, kSameAsVM),
std::make_tuple("foo_func", kUnknown, kSameAsVM),
- std::make_tuple("bar_y", 4, 4),
- std::make_tuple("bar_z", 4, 0),
- std::make_tuple("foo_y", 4, 0)
});
}
diff --git a/tests/bloaty_test_pe.cc b/tests/bloaty_test_pe.cc
new file mode 100644
index 0000000..14a247f
--- /dev/null
+++ b/tests/bloaty_test_pe.cc
@@ -0,0 +1,127 @@
+// Copyright 2021 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test.h"
+
+struct BloatyTestEntry
+{
+ std::string name;
+ std::vector<std::string> commandline;
+ std::string input_file;
+ std::string result_file;
+};
+
+std::string TestEntryName(const testing::TestParamInfo<struct BloatyTestEntry>& entry) {
+ return entry.param.name;
+}
+
+std::ostream& operator<<(std::ostream& os, const BloatyTestEntry& entry) {
+ os << "{ ";
+ for (const auto& str: entry.commandline) {
+ os << str << ", ";
+ }
+ os << entry.input_file << ", " << entry.result_file << " }";
+ return os;
+}
+
+// Strip all trailing whitespace (including \r)
+void Normalize(std::string& contents) {
+ std::stringstream buffer(contents);
+ contents.clear();
+ std::string tmp;
+ while (std::getline(buffer, tmp)) {
+ auto end = tmp.find_last_not_of("\t \r");
+ if (end != std::string::npos) {
+ tmp = tmp.substr(0, end + 1);
+ }
+ else {
+ tmp.clear();
+ }
+ if (!contents.empty()) {
+ contents += "\n";
+ }
+ contents += tmp;
+ }
+}
+
+inline bool GetFileContents(const std::string& filename, std::string& contents) {
+ FILE* file = fopen(filename.c_str(), "rb");
+ if (!file) {
+ std::cerr << "Couldn't get file size for: " << filename << "\n";
+ return false;
+ }
+ fseek(file, 0L, SEEK_END);
+ size_t size = ftell(file);
+ fseek(file, 0L, SEEK_SET);
+ contents.resize(size);
+ size_t result = fread(&contents[0], 1, size, file);
+ fclose(file);
+ contents.resize(result);
+ Normalize(contents);
+ return result == size;
+}
+
+class BloatyOutputTest: public BloatyTest,
+ public testing::WithParamInterface<BloatyTestEntry>
+{
+public:
+ BloatyOutputTest()
+ : commandline(GetParam().commandline)
+ , input_file(GetParam().input_file)
+ , result_file(GetParam().result_file)
+ {
+ }
+
+ const std::vector<std::string>& commandline;
+ const std::string& input_file;
+ const std::string& result_file;
+};
+
+
+TEST_P(BloatyOutputTest, CheckOutput) {
+ uint64_t size;
+ ASSERT_TRUE(GetFileSize(input_file, &size));
+ std::string expect_result;
+ ASSERT_TRUE(GetFileContents(result_file, expect_result));
+
+ std::vector<std::string> cmdline = { "bloaty" };
+ cmdline.insert(cmdline.end(), commandline.begin(), commandline.end());
+ cmdline.push_back(input_file);
+ RunBloaty(cmdline);
+
+ bloaty::OutputOptions output_options;
+ std::stringstream output_stream;
+ output_options.output_format = bloaty::OutputFormat::kTSV;
+ output_->Print(output_options, &output_stream);
+ std::string tmp = output_stream.str();
+ Normalize(tmp);
+ EXPECT_EQ(tmp, expect_result);
+}
+
+static BloatyTestEntry tests[] = {
+ { "MSVCR15DLL", {}, "msvc-15.0-foo-bar.dll", "msvc-15.0-foo-bar.dll.txt" },
+ { "MSVCR15DLLSEG", {"-d", "segments"}, "msvc-15.0-foo-bar.dll", "msvc-15.0-foo-bar.dll.seg.txt" },
+ { "MSVC15EXE", {}, "msvc-15.0-foo-bar-main-cv.bin", "msvc-15.0-foo-bar-main-cv.bin.txt" },
+ { "MSVC15EXESEG", {"-d", "segments"}, "msvc-15.0-foo-bar-main-cv.bin", "msvc-15.0-foo-bar-main-cv.bin.seg.txt" },
+
+ { "MSVCR16DLL", {}, "msvc-16.0-foo-bar.dll", "msvc-16.0-foo-bar.dll.txt" },
+ { "MSVCR16DLLSEG", {"-d", "segments"}, "msvc-16.0-foo-bar.dll", "msvc-16.0-foo-bar.dll.seg.txt" },
+ { "MSVC16EXE", {}, "msvc-16.0-foo-bar-main-cv.bin", "msvc-16.0-foo-bar-main-cv.bin.txt" },
+ { "MSVC16EXESEG", {"-d", "segments"}, "msvc-16.0-foo-bar-main-cv.bin", "msvc-16.0-foo-bar-main-cv.bin.seg.txt" },
+};
+
+INSTANTIATE_TEST_SUITE_P(BloatyTest,
+ BloatyOutputTest,
+ testing::ValuesIn(tests),
+ TestEntryName);
diff --git a/tests/test.h b/tests/test.h
index 9fa9ecb..dc864f0 100644
--- a/tests/test.h
+++ b/tests/test.h
@@ -31,6 +31,10 @@
#include "bloaty.h"
#include "bloaty.pb.h"
+#if defined(_MSC_VER)
+#define PATH_MAX 4096
+#endif
+
inline bool GetFileSize(const std::string& filename, uint64_t* size) {
FILE* file = fopen(filename.c_str(), "rb");
if (!file) {
diff --git a/tests/testdata/PE/x64/msvc-15.0-foo-bar-main-cv.bin b/tests/testdata/PE/x64/msvc-15.0-foo-bar-main-cv.bin
new file mode 100644
index 0000000..8b386c7
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-15.0-foo-bar-main-cv.bin
Binary files differ
diff --git a/tests/testdata/PE/x64/msvc-15.0-foo-bar-main-cv.bin.seg.txt b/tests/testdata/PE/x64/msvc-15.0-foo-bar-main-cv.bin.seg.txt
new file mode 100644
index 0000000..c340143
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-15.0-foo-bar-main-cv.bin.seg.txt
@@ -0,0 +1,8 @@
+segments vmsize filesize
+.rdata 7840 8192
+.data 5608 512
+.text 5000 5120
+.pdata 552 1024
+[PE Headers] 696 696
+.reloc 40 512
+[Unmapped] 0 328
diff --git a/tests/testdata/PE/x64/msvc-15.0-foo-bar-main-cv.bin.txt b/tests/testdata/PE/x64/msvc-15.0-foo-bar-main-cv.bin.txt
new file mode 100644
index 0000000..f56e9fd
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-15.0-foo-bar-main-cv.bin.txt
@@ -0,0 +1,8 @@
+sections vmsize filesize
+.rdata 7840 8192
+.data 5608 512
+.text 5000 5120
+.pdata 552 1024
+[PE Headers] 696 696
+.reloc 40 512
+[Unmapped] 0 328
diff --git a/tests/testdata/PE/x64/msvc-15.0-foo-bar-main-cv.pdb b/tests/testdata/PE/x64/msvc-15.0-foo-bar-main-cv.pdb
new file mode 100644
index 0000000..b467213
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-15.0-foo-bar-main-cv.pdb
Binary files differ
diff --git a/tests/testdata/PE/x64/msvc-15.0-foo-bar.dll b/tests/testdata/PE/x64/msvc-15.0-foo-bar.dll
new file mode 100644
index 0000000..ca757e3
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-15.0-foo-bar.dll
Binary files differ
diff --git a/tests/testdata/PE/x64/msvc-15.0-foo-bar.dll.seg.txt b/tests/testdata/PE/x64/msvc-15.0-foo-bar.dll.seg.txt
new file mode 100644
index 0000000..ebdc542
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-15.0-foo-bar.dll.seg.txt
@@ -0,0 +1,8 @@
+segments vmsize filesize
+.text 3587 4096
+.rdata 2520 2560
+.data 1592 512
+[PE Headers] 696 696
+.pdata 408 512
+.reloc 24 512
+[Unmapped] 0 328
diff --git a/tests/testdata/PE/x64/msvc-15.0-foo-bar.dll.txt b/tests/testdata/PE/x64/msvc-15.0-foo-bar.dll.txt
new file mode 100644
index 0000000..9ad01d6
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-15.0-foo-bar.dll.txt
@@ -0,0 +1,8 @@
+sections vmsize filesize
+.text 3587 4096
+.rdata 2520 2560
+.data 1592 512
+[PE Headers] 696 696
+.pdata 408 512
+.reloc 24 512
+[Unmapped] 0 328
diff --git a/tests/testdata/PE/x64/msvc-16.0-foo-bar-main-cv.bin b/tests/testdata/PE/x64/msvc-16.0-foo-bar-main-cv.bin
new file mode 100644
index 0000000..5ee1e64
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-16.0-foo-bar-main-cv.bin
Binary files differ
diff --git a/tests/testdata/PE/x64/msvc-16.0-foo-bar-main-cv.bin.seg.txt b/tests/testdata/PE/x64/msvc-16.0-foo-bar-main-cv.bin.seg.txt
new file mode 100644
index 0000000..e91f2df
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-16.0-foo-bar-main-cv.bin.seg.txt
@@ -0,0 +1,8 @@
+segments vmsize filesize
+.rdata 8236 8704
+.text 5340 5632
+.data 5624 512
+.pdata 588 1024
+[PE Headers] 696 696
+.reloc 52 512
+[Unmapped] 0 328
diff --git a/tests/testdata/PE/x64/msvc-16.0-foo-bar-main-cv.bin.txt b/tests/testdata/PE/x64/msvc-16.0-foo-bar-main-cv.bin.txt
new file mode 100644
index 0000000..e18bde9
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-16.0-foo-bar-main-cv.bin.txt
@@ -0,0 +1,8 @@
+sections vmsize filesize
+.rdata 8236 8704
+.text 5340 5632
+.data 5624 512
+.pdata 588 1024
+[PE Headers] 696 696
+.reloc 52 512
+[Unmapped] 0 328
diff --git a/tests/testdata/PE/x64/msvc-16.0-foo-bar-main-cv.pdb b/tests/testdata/PE/x64/msvc-16.0-foo-bar-main-cv.pdb
new file mode 100644
index 0000000..12fd795
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-16.0-foo-bar-main-cv.pdb
Binary files differ
diff --git a/tests/testdata/PE/x64/msvc-16.0-foo-bar.dll b/tests/testdata/PE/x64/msvc-16.0-foo-bar.dll
new file mode 100644
index 0000000..e2b8dea
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-16.0-foo-bar.dll
Binary files differ
diff --git a/tests/testdata/PE/x64/msvc-16.0-foo-bar.dll.seg.txt b/tests/testdata/PE/x64/msvc-16.0-foo-bar.dll.seg.txt
new file mode 100644
index 0000000..053ef6a
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-16.0-foo-bar.dll.seg.txt
@@ -0,0 +1,8 @@
+segments vmsize filesize
+.text 3592 4096
+.rdata 2680 3072
+.data 1608 512
+[PE Headers] 696 696
+.pdata 432 512
+.reloc 36 512
+[Unmapped] 0 328
diff --git a/tests/testdata/PE/x64/msvc-16.0-foo-bar.dll.txt b/tests/testdata/PE/x64/msvc-16.0-foo-bar.dll.txt
new file mode 100644
index 0000000..e996d4b
--- /dev/null
+++ b/tests/testdata/PE/x64/msvc-16.0-foo-bar.dll.txt
@@ -0,0 +1,8 @@
+sections vmsize filesize
+.text 3592 4096
+.rdata 2680 3072
+.data 1608 512
+[PE Headers] 696 696
+.pdata 432 512
+.reloc 36 512
+[Unmapped] 0 328
diff --git a/tests/testdata/PE/x86/msvc-15.0-foo-bar-main-cv.bin b/tests/testdata/PE/x86/msvc-15.0-foo-bar-main-cv.bin
new file mode 100644
index 0000000..94e93c8
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-15.0-foo-bar-main-cv.bin
Binary files differ
diff --git a/tests/testdata/PE/x86/msvc-15.0-foo-bar-main-cv.bin.seg.txt b/tests/testdata/PE/x86/msvc-15.0-foo-bar-main-cv.bin.seg.txt
new file mode 100644
index 0000000..3984cdd
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-15.0-foo-bar-main-cv.bin.seg.txt
@@ -0,0 +1,7 @@
+segments vmsize filesize
+.rdata 6724 7168
+.data 4912 512
+.text 4268 4608
+[PE Headers] 640 640
+.reloc 488 512
+[Unmapped] 0 384
diff --git a/tests/testdata/PE/x86/msvc-15.0-foo-bar-main-cv.bin.txt b/tests/testdata/PE/x86/msvc-15.0-foo-bar-main-cv.bin.txt
new file mode 100644
index 0000000..c58f6fe
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-15.0-foo-bar-main-cv.bin.txt
@@ -0,0 +1,7 @@
+sections vmsize filesize
+.rdata 6724 7168
+.data 4912 512
+.text 4268 4608
+[PE Headers] 640 640
+.reloc 488 512
+[Unmapped] 0 384
diff --git a/tests/testdata/PE/x86/msvc-15.0-foo-bar-main-cv.pdb b/tests/testdata/PE/x86/msvc-15.0-foo-bar-main-cv.pdb
new file mode 100644
index 0000000..76b0695
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-15.0-foo-bar-main-cv.pdb
Binary files differ
diff --git a/tests/testdata/PE/x86/msvc-15.0-foo-bar.dll b/tests/testdata/PE/x86/msvc-15.0-foo-bar.dll
new file mode 100644
index 0000000..2477499
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-15.0-foo-bar.dll
Binary files differ
diff --git a/tests/testdata/PE/x86/msvc-15.0-foo-bar.dll.seg.txt b/tests/testdata/PE/x86/msvc-15.0-foo-bar.dll.seg.txt
new file mode 100644
index 0000000..470bad6
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-15.0-foo-bar.dll.seg.txt
@@ -0,0 +1,7 @@
+segments vmsize filesize
+.text 3172 3584
+.rdata 1772 2048
+.data 908 512
+[PE Headers] 656 656
+.reloc 296 512
+[Unmapped] 0 368
diff --git a/tests/testdata/PE/x86/msvc-15.0-foo-bar.dll.txt b/tests/testdata/PE/x86/msvc-15.0-foo-bar.dll.txt
new file mode 100644
index 0000000..6d47fc3
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-15.0-foo-bar.dll.txt
@@ -0,0 +1,7 @@
+sections vmsize filesize
+.text 3172 3584
+.rdata 1772 2048
+.data 908 512
+[PE Headers] 656 656
+.reloc 296 512
+[Unmapped] 0 368
diff --git a/tests/testdata/PE/x86/msvc-16.0-foo-bar-main-cv.bin b/tests/testdata/PE/x86/msvc-16.0-foo-bar-main-cv.bin
new file mode 100644
index 0000000..98c7191
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-16.0-foo-bar-main-cv.bin
Binary files differ
diff --git a/tests/testdata/PE/x86/msvc-16.0-foo-bar-main-cv.bin.seg.txt b/tests/testdata/PE/x86/msvc-16.0-foo-bar-main-cv.bin.seg.txt
new file mode 100644
index 0000000..8844551
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-16.0-foo-bar-main-cv.bin.seg.txt
@@ -0,0 +1,7 @@
+segments vmsize filesize
+.rdata 6944 7168
+.text 4716 5120
+.data 4912 512
+.reloc 544 1024
+[PE Headers] 648 648
+[Unmapped] 0 376
diff --git a/tests/testdata/PE/x86/msvc-16.0-foo-bar-main-cv.bin.txt b/tests/testdata/PE/x86/msvc-16.0-foo-bar-main-cv.bin.txt
new file mode 100644
index 0000000..68b42bf
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-16.0-foo-bar-main-cv.bin.txt
@@ -0,0 +1,7 @@
+sections vmsize filesize
+.rdata 6944 7168
+.text 4716 5120
+.data 4912 512
+.reloc 544 1024
+[PE Headers] 648 648
+[Unmapped] 0 376
diff --git a/tests/testdata/PE/x86/msvc-16.0-foo-bar-main-cv.pdb b/tests/testdata/PE/x86/msvc-16.0-foo-bar-main-cv.pdb
new file mode 100644
index 0000000..c4ef096
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-16.0-foo-bar-main-cv.pdb
Binary files differ
diff --git a/tests/testdata/PE/x86/msvc-16.0-foo-bar.dll b/tests/testdata/PE/x86/msvc-16.0-foo-bar.dll
new file mode 100644
index 0000000..4bec075
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-16.0-foo-bar.dll
Binary files differ
diff --git a/tests/testdata/PE/x86/msvc-16.0-foo-bar.dll.seg.txt b/tests/testdata/PE/x86/msvc-16.0-foo-bar.dll.seg.txt
new file mode 100644
index 0000000..e52851a
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-16.0-foo-bar.dll.seg.txt
@@ -0,0 +1,7 @@
+segments vmsize filesize
+.text 3360 3584
+.rdata 1812 2048
+.data 912 512
+[PE Headers] 648 648
+.reloc 312 512
+[Unmapped] 0 376
diff --git a/tests/testdata/PE/x86/msvc-16.0-foo-bar.dll.txt b/tests/testdata/PE/x86/msvc-16.0-foo-bar.dll.txt
new file mode 100644
index 0000000..1599574
--- /dev/null
+++ b/tests/testdata/PE/x86/msvc-16.0-foo-bar.dll.txt
@@ -0,0 +1,7 @@
+sections vmsize filesize
+.text 3360 3584
+.rdata 1812 2048
+.data 912 512
+[PE Headers] 648 648
+.reloc 312 512
+[Unmapped] 0 376
diff --git a/tests/testdata/bar.c b/tests/testdata/bar.c
new file mode 100644
index 0000000..0c835fd
--- /dev/null
+++ b/tests/testdata/bar.c
@@ -0,0 +1,5 @@
+
+int bar_x[1000] = {1};
+int bar_y = 1;
+int bar_z = 0;
+int bar_func() { return bar_y / 17; }
diff --git a/tests/testdata/foo.c b/tests/testdata/foo.c
new file mode 100644
index 0000000..9add3b6
--- /dev/null
+++ b/tests/testdata/foo.c
@@ -0,0 +1,4 @@
+
+int foo_x[1000] = {0};
+int foo_y = 0;
+int foo_func() { return foo_y / 17; }
diff --git a/tests/testdata/main.c b/tests/testdata/main.c
new file mode 100644
index 0000000..c272dab
--- /dev/null
+++ b/tests/testdata/main.c
@@ -0,0 +1 @@
+int main() {}
\ No newline at end of file
diff --git a/tests/testdata/make_all_msvc_test_files.bat b/tests/testdata/make_all_msvc_test_files.bat
new file mode 100644
index 0000000..edcef32
--- /dev/null
+++ b/tests/testdata/make_all_msvc_test_files.bat
@@ -0,0 +1,17 @@
+
+@if not defined _echo echo off
+
+set VSWHERE="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe"
+
+for /f "usebackq delims=" %%i in (`%VSWHERE% -version [15.0^,17.0^) -property installationPath`) do (
+ if exist "%%i\Common7\Tools\vsdevcmd.bat" (
+ for %%A in (x86 x64) do (
+ SETLOCAL
+ call "%%i\Common7\Tools\vsdevcmd.bat" -arch=%%A
+ ECHO Building VS %VisualStudioVersion% %%A
+ call make_msvc_test_files.bat PE\%%A
+ ENDLOCAL
+ )
+ )
+)
+
diff --git a/tests/testdata/make_msvc_test_files.bat b/tests/testdata/make_msvc_test_files.bat
new file mode 100644
index 0000000..dea7167
--- /dev/null
+++ b/tests/testdata/make_msvc_test_files.bat
@@ -0,0 +1,44 @@
+@echo off
+
+if "%1"=="" goto noargs
+
+pushd %1
+
+@REM /Gm- disables minimal rebuild, /O1 favor size, /MD selects external runtime,
+@REM /GL enable cross-module optimization
+set CL_COMMON_FLAGS=/nologo /Gm- /O1 /MD /GL
+
+
+call :make_obj ..\..\foo.c
+call :make_obj ..\..\bar.c
+call :make_obj ..\..\main.c
+
+call :make_dll msvc-%VisualStudioVersion%-foo-bar.dll foo.obj bar.obj
+call :make_binary_with_pdb msvc-%VisualStudioVersion%-foo-bar-main-cv.bin msvc-%VisualStudioVersion%-foo-bar-main-cv.pdb foo.obj bar.obj main.obj
+
+goto cleanup
+
+:make_dll:
+for /f "tokens=1,* delims= " %%a in ("%*") do set ALL_BUT_FIRST=%%b
+cl %CL_COMMON_FLAGS% /LD %ALL_BUT_FIRST% /link /OUT:%1
+exit /B 0
+
+:make_binary_with_pdb:
+for /f "tokens=2,* delims= " %%a in ("%*") do set ALL_BUT_FIRST=%%b
+cl %CL_COMMON_FLAGS% %ALL_BUT_FIRST% /link /OUT:%1 /PDB:%2 /DEBUG
+exit /B 0
+
+:make_obj:
+cl %CL_COMMON_FLAGS% /c %1
+exit /B 0
+
+:noargs:
+echo Usage: make_test_files.bat ^<output dir^>
+
+:cleanup:
+
+del foo.obj
+del bar.obj
+del main.obj
+
+popd
diff --git a/third_party/abseil-cpp b/third_party/abseil-cpp
index 1948f6f..5dd2407 160000
--- a/third_party/abseil-cpp
+++ b/third_party/abseil-cpp
@@ -1 +1 @@
-Subproject commit 1948f6f967e34db9793cfa8b4bcbaf370d039fd8
+Subproject commit 5dd240724366295970c613ed23d0092bcf392f18
diff --git a/third_party/freebsd_elf/elf32.h b/third_party/freebsd_elf/elf32.h
index a4d4d6a..63df8a9 100644
--- a/third_party/freebsd_elf/elf32.h
+++ b/third_party/freebsd_elf/elf32.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 1996-1998 John D. Polstra.
* All rights reserved.
*
@@ -256,4 +258,10 @@
Elf32_Half si_flags; /* per symbol flags */
} Elf32_Syminfo;
+typedef struct {
+ Elf32_Word ch_type;
+ Elf32_Word ch_size;
+ Elf32_Word ch_addralign;
+} Elf32_Chdr;
+
#endif /* !_FREEBSD_ELF_SYS_ELF32_H_ */
diff --git a/third_party/freebsd_elf/elf64.h b/third_party/freebsd_elf/elf64.h
index c440048..6be26b2 100644
--- a/third_party/freebsd_elf/elf64.h
+++ b/third_party/freebsd_elf/elf64.h
@@ -1,4 +1,6 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
* Copyright (c) 1996-1998 John D. Polstra.
* All rights reserved.
*
@@ -259,4 +261,11 @@
Elf64_Half si_flags; /* per symbol flags */
} Elf64_Syminfo;
+typedef struct {
+ Elf64_Word ch_type;
+ Elf64_Word ch_reserved;
+ Elf64_Xword ch_size;
+ Elf64_Xword ch_addralign;
+} Elf64_Chdr;
+
#endif /* !_FREEBSD_ELF_SYS_ELF64_H_ */
diff --git a/third_party/freebsd_elf/elf_common.h b/third_party/freebsd_elf/elf_common.h
index 44a97e0..f242cf5 100644
--- a/third_party/freebsd_elf/elf_common.h
+++ b/third_party/freebsd_elf/elf_common.h
@@ -1,4 +1,7 @@
/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2017, 2018 Dell EMC
* Copyright (c) 2000, 2001, 2008, 2011, David E. O'Brien
* Copyright (c) 1998 John D. Polstra.
* All rights reserved.
@@ -30,7 +33,7 @@
#include <stdint.h>
#ifndef _FREEBSD_ELF_SYS_ELF_COMMON_H_
-#define _FREEBSD_ELF_SYS_ELF_COMMON_H_ 1
+#define _FREEBSD_ELF_SYS_ELF_COMMON_H_ 1
/*
* ELF definitions that are independent of architecture or word size.
@@ -50,6 +53,7 @@
uint32_t n_descsz; /* Length of descriptor. */
uint32_t n_type; /* Type of this note. */
} Elf_Note;
+typedef Elf_Note Elf_Nhdr;
/*
* Option kinds.
@@ -173,6 +177,7 @@
#define ELFOSABI_AROS 15 /* Amiga Research OS */
#define ELFOSABI_FENIXOS 16 /* FenixOS */
#define ELFOSABI_CLOUDABI 17 /* Nuxi CloudABI */
+#define ELFOSABI_OPENVOS 18 /* Stratus Technologies OpenVOS */
#define ELFOSABI_ARM_AEABI 64 /* ARM EABI */
#define ELFOSABI_ARM 97 /* ARM */
#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */
@@ -332,8 +337,10 @@
#define EF_ARM_ALIGN8 0x00000040
#define EF_ARM_NEW_ABI 0x00000080
#define EF_ARM_OLD_ABI 0x00000100
-#define EF_ARM_SOFT_FLOAT 0x00000200
-#define EF_ARM_VFP_FLOAT 0x00000400
+#define EF_ARM_ABI_FLOAT_SOFT 0x00000200
+#define EF_ARM_SOFT_FLOAT EF_ARM_ABI_FLOAT_SOFT /* Pre-V5 ABI name */
+#define EF_ARM_ABI_FLOAT_HARD 0x00000400
+#define EF_ARM_VFP_FLOAT EF_ARM_ABI_FLOAT_HARD /* Pre-V5 ABI name */
#define EF_ARM_MAVERICK_FLOAT 0x00000800
#define EF_MIPS_NOREORDER 0x00000001
@@ -342,15 +349,38 @@
#define EF_MIPS_UCODE 0x00000010
#define EF_MIPS_ABI2 0x00000020 /* N32 */
#define EF_MIPS_OPTIONS_FIRST 0x00000080
+#define EF_MIPS_ABI 0x0000F000
+#define EF_MIPS_ABI_O32 0x00001000
+#define EF_MIPS_ABI_O64 0x00002000
+#define EF_MIPS_ABI_EABI32 0x00003000
+#define EF_MIPS_ABI_EABI64 0x00004000
#define EF_MIPS_ARCH_ASE 0x0F000000 /* Architectural extensions */
#define EF_MIPS_ARCH_ASE_MDMX 0x08000000 /* MDMX multimedia extension */
#define EF_MIPS_ARCH_ASE_M16 0x04000000 /* MIPS-16 ISA extensions */
#define EF_MIPS_ARCH 0xF0000000 /* Architecture field */
+#define EF_MIPS_ARCH_1 0x00000000 /* -mips1 code */
+#define EF_MIPS_ARCH_2 0x10000000 /* -mips2 code */
+#define EF_MIPS_ARCH_3 0x20000000 /* -mips3 code */
+#define EF_MIPS_ARCH_4 0x30000000 /* -mips4 code */
+#define EF_MIPS_ARCH_5 0x40000000 /* -mips5 code */
+#define EF_MIPS_ARCH_32 0x50000000 /* -mips32 code */
+#define EF_MIPS_ARCH_64 0x60000000 /* -mips64 code */
+#define EF_MIPS_ARCH_32R2 0x70000000 /* -mips32r2 code */
+#define EF_MIPS_ARCH_64R2 0x80000000 /* -mips64r2 code */
#define EF_PPC_EMB 0x80000000
#define EF_PPC_RELOCATABLE 0x00010000
#define EF_PPC_RELOCATABLE_LIB 0x00008000
+#define EF_RISCV_RVC 0x00000001
+#define EF_RISCV_FLOAT_ABI_MASK 0x00000006
+#define EF_RISCV_FLOAT_ABI_SOFT 0x00000000
+#define EF_RISCV_FLOAT_ABI_SINGLE 0x000002
+#define EF_RISCV_FLOAT_ABI_DOUBLE 0x000004
+#define EF_RISCV_FLOAT_ABI_QUAD 0x00000006
+#define EF_RISCV_RVE 0x00000008
+#define EF_RISCV_TSO 0x00000010
+
#define EF_SPARC_EXT_MASK 0x00ffff00
#define EF_SPARC_32PLUS 0x00000100
#define EF_SPARC_SUN_US1 0x00000200
@@ -515,8 +545,16 @@
#define PT_LOPROC 0x70000000 /* First processor-specific type. */
#define PT_ARM_ARCHEXT 0x70000000 /* ARM arch compat information. */
#define PT_ARM_EXIDX 0x70000001 /* ARM exception unwind tables. */
+#define PT_MIPS_REGINFO 0x70000000 /* MIPS register usage info */
+#define PT_MIPS_RTPROC 0x70000001 /* MIPS runtime procedure tbl */
+#define PT_MIPS_OPTIONS 0x70000002 /* MIPS e_flags value*/
+#define PT_MIPS_ABIFLAGS 0x70000003 /* MIPS fp mode */
#define PT_HIPROC 0x7fffffff /* Last processor-specific type. */
+#define PT_OPENBSD_RANDOMIZE 0x65A3DBE6 /* OpenBSD random data segment */
+#define PT_OPENBSD_WXNEEDED 0x65A3DBE7 /* OpenBSD EXEC/WRITE pages needed */
+#define PT_OPENBSD_BOOTDATA 0x65A41BE6 /* OpenBSD section for boot args */
+
/* Values for p_flags. */
#define PF_X 0x1 /* Executable. */
#define PF_W 0x2 /* Writable. */
@@ -582,6 +620,7 @@
#define DT_SUNW_RTLDINF 0x6000000e /* ld.so.1 info (private) */
#define DT_SUNW_FILTER 0x6000000f /* symbol filter name */
#define DT_SUNW_CAP 0x60000010 /* hardware/software */
+#define DT_SUNW_ASLR 0x60000023 /* ASLR control */
#define DT_HIOS 0x6ffff000 /* Last OS-specific */
/*
@@ -688,6 +727,7 @@
#define DT_MIPS_PLTGOT 0x70000032
#define DT_MIPS_RLD_OBJ_UPDATE 0x70000033
#define DT_MIPS_RWPLT 0x70000034
+#define DT_MIPS_RLD_MAP_REL 0x70000035
#define DT_PPC_GOT 0x70000000
#define DT_PPC_TLSOPT 0x70000001
@@ -726,6 +766,7 @@
#define DF_1_ORIGIN 0x00000080 /* Process $ORIGIN */
#define DF_1_INTERPOSE 0x00000400 /* Interpose all objects but main */
#define DF_1_NODEFLIB 0x00000800 /* Do not search default paths */
+#define DF_1_PIE 0x08000000 /* Is position-independent executable */
/* Values for l_flags. */
#define LL_NONE 0x0 /* no flags */
@@ -736,10 +777,25 @@
#define LL_DELAY_LOAD 0x10
#define LL_DELTA 0x20
+/* Note section names */
+#define ELF_NOTE_FREEBSD "FreeBSD"
+#define ELF_NOTE_NETBSD "NetBSD"
+#define ELF_NOTE_SOLARIS "SUNW Solaris"
+#define ELF_NOTE_GNU "GNU"
+
/* Values for n_type used in executables. */
#define NT_FREEBSD_ABI_TAG 1
#define NT_FREEBSD_NOINIT_TAG 2
#define NT_FREEBSD_ARCH_TAG 3
+#define NT_FREEBSD_FEATURE_CTL 4
+
+/* NT_FREEBSD_FEATURE_CTL desc[0] bits */
+#define NT_FREEBSD_FCTL_ASLR_DISABLE 0x00000001
+#define NT_FREEBSD_FCTL_PROTMAX_DISABLE 0x00000002
+#define NT_FREEBSD_FCTL_STKGAP_DISABLE 0x00000004
+#define NT_FREEBSD_FCTL_WXNEEDED 0x00000008
+#define NT_FREEBSD_FCTL_LA48 0x00000010
+#define NT_FREEBSD_FCTL_ASG_DISABLE 0x00000020 /* ASLR STACK GAP Disable */
/* Values for n_type. Used in core files. */
#define NT_PRSTATUS 1 /* Process status. */
@@ -755,8 +811,26 @@
#define NT_PROCSTAT_OSREL 14 /* Procstat osreldate data. */
#define NT_PROCSTAT_PSSTRINGS 15 /* Procstat ps_strings data. */
#define NT_PROCSTAT_AUXV 16 /* Procstat auxv data. */
+#define NT_PTLWPINFO 17 /* Thread ptrace miscellaneous info. */
#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */
+#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */
#define NT_X86_XSTATE 0x202 /* x86 XSAVE extended state. */
+#define NT_ARM_VFP 0x400 /* ARM VFP registers */
+
+/* GNU note types. */
+#define NT_GNU_ABI_TAG 1
+#define NT_GNU_HWCAP 2
+#define NT_GNU_BUILD_ID 3
+#define NT_GNU_GOLD_VERSION 4
+#define NT_GNU_PROPERTY_TYPE_0 5
+
+#define GNU_PROPERTY_LOPROC 0xc0000000
+#define GNU_PROPERTY_HIPROC 0xdfffffff
+
+#define GNU_PROPERTY_X86_FEATURE_1_AND 0xc0000002
+
+#define GNU_PROPERTY_X86_FEATURE_1_IBT 0x00000001
+#define GNU_PROPERTY_X86_FEATURE_1_SHSTK 0x00000002
/* Symbol Binding - ELFNN_ST_BIND - st_info */
#define STB_LOCAL 0 /* Local symbol */
@@ -851,6 +925,51 @@
#define SYMINFO_CURRENT 1
#define SYMINFO_NUM 2
+/* Values for ch_type (compressed section headers). */
+#define ELFCOMPRESS_ZLIB 1 /* ZLIB/DEFLATE */
+#define ELFCOMPRESS_LOOS 0x60000000 /* OS-specific */
+#define ELFCOMPRESS_HIOS 0x6fffffff
+#define ELFCOMPRESS_LOPROC 0x70000000 /* Processor-specific */
+#define ELFCOMPRESS_HIPROC 0x7fffffff
+
+/* Values for a_type. */
+#define AT_NULL 0 /* Terminates the vector. */
+#define AT_IGNORE 1 /* Ignored entry. */
+#define AT_EXECFD 2 /* File descriptor of program to load. */
+#define AT_PHDR 3 /* Program header of program already loaded. */
+#define AT_PHENT 4 /* Size of each program header entry. */
+#define AT_PHNUM 5 /* Number of program header entries. */
+#define AT_PAGESZ 6 /* Page size in bytes. */
+#define AT_BASE 7 /* Interpreter's base address. */
+#define AT_FLAGS 8 /* Flags. */
+#define AT_ENTRY 9 /* Where interpreter should transfer control. */
+#define AT_NOTELF 10 /* Program is not ELF ?? */
+#define AT_UID 11 /* Real uid. */
+#define AT_EUID 12 /* Effective uid. */
+#define AT_GID 13 /* Real gid. */
+#define AT_EGID 14 /* Effective gid. */
+#define AT_EXECPATH 15 /* Path to the executable. */
+#define AT_CANARY 16 /* Canary for SSP. */
+#define AT_CANARYLEN 17 /* Length of the canary. */
+#define AT_OSRELDATE 18 /* OSRELDATE. */
+#define AT_NCPUS 19 /* Number of CPUs. */
+#define AT_PAGESIZES 20 /* Pagesizes. */
+#define AT_PAGESIZESLEN 21 /* Number of pagesizes. */
+#define AT_TIMEKEEP 22 /* Pointer to timehands. */
+#define AT_STACKPROT 23 /* Initial stack protection. */
+#define AT_EHDRFLAGS 24 /* e_flags field from elf hdr */
+#define AT_HWCAP 25 /* CPU feature flags. */
+#define AT_HWCAP2 26 /* CPU feature flags 2. */
+#define AT_BSDFLAGS 27 /* ELF BSD Flags. */
+#define AT_ARGC 28 /* Argument count */
+#define AT_ARGV 29 /* Argument vector */
+#define AT_ENVC 30 /* Environment count */
+#define AT_ENVV 31 /* Environment vector */
+#define AT_PS_STRINGS 32 /* struct ps_strings */
+#define AT_FXRNG 33 /* Pointer to root RNG seed version. */
+
+#define AT_COUNT 34 /* Count of defined aux entry types. */
+
/*
* Relocation types.
*
@@ -869,12 +988,17 @@
#define R_386_RELATIVE 8 /* Add load address of shared object. */
#define R_386_GOTOFF 9 /* Add GOT-relative symbol address. */
#define R_386_GOTPC 10 /* Add PC-relative GOT table address. */
+#define R_386_32PLT 11
#define R_386_TLS_TPOFF 14 /* Negative offset in static TLS block */
#define R_386_TLS_IE 15 /* Absolute address of GOT for -ve static TLS */
#define R_386_TLS_GOTIE 16 /* GOT entry for negative static TLS block */
#define R_386_TLS_LE 17 /* Negative offset relative to static TLS */
#define R_386_TLS_GD 18 /* 32 bit offset to GOT (index,off) pair */
#define R_386_TLS_LDM 19 /* 32 bit offset to GOT (index,zero) pair */
+#define R_386_16 20
+#define R_386_PC16 21
+#define R_386_8 22
+#define R_386_PC8 23
#define R_386_TLS_GD_32 24 /* 32 bit offset to GOT (index,off) pair */
#define R_386_TLS_GD_PUSH 25 /* pushl instruction for Sun ABI GD sequence */
#define R_386_TLS_GD_CALL 26 /* call instruction for Sun ABI GD sequence */
@@ -889,7 +1013,12 @@
#define R_386_TLS_DTPMOD32 35 /* GOT entry containing TLS index */
#define R_386_TLS_DTPOFF32 36 /* GOT entry containing TLS offset */
#define R_386_TLS_TPOFF32 37 /* GOT entry of -ve static TLS offset */
+#define R_386_SIZE32 38
+#define R_386_TLS_GOTDESC 39
+#define R_386_TLS_DESC_CALL 40
+#define R_386_TLS_DESC 41
#define R_386_IRELATIVE 42 /* PLT entry resolved indirectly at runtime */
+#define R_386_GOT32X 43
#define R_AARCH64_NONE 0 /* No relocation */
#define R_AARCH64_ABS64 257 /* Absolute offset */
@@ -898,6 +1027,10 @@
#define R_AARCH64_PREL64 260 /* PC relative */
#define R_AARCH64_PREL32 261 /* PC relative, 32-bit overflow check */
#define R_AARCH64_PREL16 262 /* PC relative, 16-bit overflow check */
+#define R_AARCH64_TSTBR14 279 /* TBZ/TBNZ immediate */
+#define R_AARCH64_CONDBR19 280 /* Conditional branch immediate */
+#define R_AARCH64_JUMP26 282 /* Branch immediate */
+#define R_AARCH64_CALL26 283 /* Call immediate */
#define R_AARCH64_COPY 1024 /* Copy data from shared object */
#define R_AARCH64_GLOB_DAT 1025 /* Set GOT entry to data address */
#define R_AARCH64_JUMP_SLOT 1026 /* Set GOT entry to code address */
@@ -1052,6 +1185,8 @@
#define R_MIPS_CALLLO16 31 /* lower 16 bit GOT entry for function */
#define R_MIPS_JALR 37
#define R_MIPS_TLS_GD 42
+#define R_MIPS_COPY 126
+#define R_MIPS_JUMP_SLOT 127
#define R_PPC_NONE 0 /* No relocation. */
#define R_PPC_ADDR32 1
@@ -1090,6 +1225,7 @@
#define R_PPC_SECTOFF_LO 34
#define R_PPC_SECTOFF_HI 35
#define R_PPC_SECTOFF_HA 36
+#define R_PPC_IRELATIVE 248
/*
* 64-bit relocations
@@ -1211,6 +1347,19 @@
#define R_RISCV_ALIGN 43
#define R_RISCV_RVC_BRANCH 44
#define R_RISCV_RVC_JUMP 45
+#define R_RISCV_RVC_LUI 46
+#define R_RISCV_GPREL_I 47
+#define R_RISCV_GPREL_S 48
+#define R_RISCV_TPREL_I 49
+#define R_RISCV_TPREL_S 50
+#define R_RISCV_RELAX 51
+#define R_RISCV_SUB6 52
+#define R_RISCV_SET6 53
+#define R_RISCV_SET8 54
+#define R_RISCV_SET16 55
+#define R_RISCV_SET32 56
+#define R_RISCV_32_PCREL 57
+#define R_RISCV_IRELATIVE 58
#define R_SPARC_NONE 0
#define R_SPARC_8 1
@@ -1331,6 +1480,11 @@
#define R_X86_64_TLSDESC_CALL 35
#define R_X86_64_TLSDESC 36
#define R_X86_64_IRELATIVE 37
+#define R_X86_64_RELATIVE64 38
+/* 39 and 40 were BND-related, already decomissioned */
+#define R_X86_64_GOTPCRELX 41
+#define R_X86_64_REX_GOTPCRELX 42
+#define ELF_BSDF_SIGFASTBLK 0x0001 /* Kernel supports fast sigblock */
#endif /* !_FREEBSD_ELF_SYS_ELF_COMMON_H_ */
diff --git a/third_party/lief_pe/LICENSE b/third_party/lief_pe/LICENSE
new file mode 100644
index 0000000..87b30a9
--- /dev/null
+++ b/third_party/lief_pe/LICENSE
@@ -0,0 +1,202 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2017 - 2021 R. Thomas
+ Copyright 2017 - 2021 Quarkslab
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/third_party/lief_pe/METADATA b/third_party/lief_pe/METADATA
new file mode 100644
index 0000000..f0cd410
--- /dev/null
+++ b/third_party/lief_pe/METADATA
@@ -0,0 +1,25 @@
+name: "LIEF: Library to Instrument Executable Formats (PE header)"
+description:
+ "This contains structures as defined by the LIEF project to parse PE headers."
+ "They only contain struct and constant definitions (no code) and their"
+ "contents are almost entirely derivable from the ELF standard."
+
+third_party {
+ url {
+ type: HOMEPAGE
+ value: "https://lief.quarkslab.com/"
+ }
+ url {
+ type: GIT
+ value: "https://github.com/lief-project/LIEF"
+ }
+ version: "1a4a93c36fab9b0d198e85459b6afe0fb2b0157b"
+ last_upgrade_date {
+ year: 2021
+ month: 4
+ day: 3
+ }
+ local_modifications:
+ "Renamed include/LIEF/PE/structures.inc to third_party/lief_pe/pe_structures.h"
+ "Added license header to the file."
+}
diff --git a/third_party/lief_pe/pe_structures.h b/third_party/lief_pe/pe_structures.h
new file mode 100644
index 0000000..71b7e61
--- /dev/null
+++ b/third_party/lief_pe/pe_structures.h
@@ -0,0 +1,516 @@
+/* Copyright 2017 - 2021 R. Thomas
+ * Copyright 2017 - 2021 Quarkslab
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+//! The maximum number of sections that a COFF object can have (inclusive).
+static const int32_t MaxNumberOfSections16 = 65279;
+
+//! The PE signature bytes that follows the DOS stub header.
+static const char PE_Magic[] = { 'P', 'E', '\0', '\0' };
+
+static const char Rich_Magic[] = {'R', 'i', 'c', 'h'};
+static const char DanS_Magic[] = {'D', 'a', 'n', 'S'};
+
+static const uint32_t DanS_Magic_number = 0x536E6144;
+
+static const char BigObjMagic[] = {
+ '\xc7', '\xa1', '\xba', '\xd1', '\xee', '\xba', '\xa9', '\x4b',
+ '\xaf', '\x20', '\xfa', '\xf6', '\x6a', '\xa4', '\xdc', '\xb8',
+};
+
+static const uint8_t DEFAULT_NUMBER_DATA_DIRECTORIES = 15;
+
+#pragma pack(push,1)
+struct pe_header {
+ char signature[sizeof(PE_Magic)];
+ uint16_t Machine;
+ uint16_t NumberOfSections;
+ uint32_t TimeDateStamp;
+ uint32_t PointerToSymbolTable;
+ uint32_t NumberOfSymbols;
+ uint16_t SizeOfOptionalHeader;
+ uint16_t Characteristics;
+};
+
+
+struct pe_relocation {
+ uint32_t VirtualAddress;
+ uint32_t SymbolTableIndex;
+ uint16_t Type;
+};
+
+struct pe_base_relocation_block {
+ uint32_t PageRVA;
+ uint32_t BlockSize;
+};
+
+
+struct pe_symbol {
+ union {
+ char ShortName[8];
+ struct
+ {
+ uint32_t Zeroes;
+ uint32_t Offset;
+ } Name;
+ } Name;
+ uint32_t Value;
+ int16_t SectionNumber;
+ uint16_t Type;
+ uint8_t StorageClass;
+ uint8_t NumberOfAuxSymbols;
+};
+
+
+struct pe_section {
+ char Name[8];
+ uint32_t VirtualSize;
+ uint32_t VirtualAddress;
+ uint32_t SizeOfRawData;
+ uint32_t PointerToRawData;
+ uint32_t PointerToRelocations;
+ uint32_t PointerToLineNumbers;
+ uint16_t NumberOfRelocations;
+ uint16_t NumberOfLineNumbers;
+ uint32_t Characteristics;
+};
+
+struct AuxiliaryFunctionDefinition {
+ uint32_t TagIndex;
+ uint32_t TotalSize;
+ uint32_t PointerToLinenumber;
+ uint32_t PointerToNextFunction;
+ char unused[2];
+};
+
+struct AuxiliarybfAndefSymbol {
+ uint8_t unused1[4];
+ uint16_t Linenumber;
+ uint8_t unused2[6];
+ uint32_t PointerToNextFunction;
+ uint8_t unused3[2];
+};
+
+struct AuxiliaryWeakExternal {
+ uint32_t TagIndex;
+ uint32_t Characteristics;
+ uint8_t unused[10];
+};
+
+
+struct AuxiliarySectionDefinition {
+ uint32_t Length;
+ uint16_t NumberOfRelocations;
+ uint16_t NumberOfLinenumbers;
+ uint32_t CheckSum;
+ uint32_t Number;
+ uint8_t Selection;
+ char unused;
+};
+
+struct AuxiliaryCLRToken {
+ uint8_t AuxType;
+ uint8_t unused1;
+ uint32_t SymbolTableIndex;
+ char unused2[12];
+};
+
+union Auxiliary {
+ AuxiliaryFunctionDefinition FunctionDefinition;
+ AuxiliarybfAndefSymbol bfAndefSymbol;
+ AuxiliaryWeakExternal WeakExternal;
+ AuxiliarySectionDefinition SectionDefinition;
+};
+
+
+/// The Import Directory Table.
+///
+/// There is a single array of these and one entry per imported DLL.
+struct pe_import {
+ uint32_t ImportLookupTableRVA;
+ uint32_t TimeDateStamp;
+ uint32_t ForwarderChain;
+ uint32_t NameRVA;
+ uint32_t ImportAddressTableRVA;
+};
+
+
+struct ImportLookupTableEntry32 {
+ uint32_t data;
+};
+
+struct ImportLookupTableEntry64 {
+ uint64_t data;
+};
+
+
+struct pe32_tls {
+ uint32_t RawDataStartVA;
+ uint32_t RawDataEndVA;
+ uint32_t AddressOfIndex;
+ uint32_t AddressOfCallback;
+ uint32_t SizeOfZeroFill;
+ uint32_t Characteristics;
+};
+
+
+struct pe64_tls {
+ uint64_t RawDataStartVA;
+ uint64_t RawDataEndVA;
+ uint64_t AddressOfIndex;
+ uint64_t AddressOfCallback;
+ uint32_t SizeOfZeroFill;
+ uint32_t Characteristics;
+};
+
+
+/// The DOS compatible header at the front of all PEs.
+struct pe_dos_header {
+ uint16_t Magic;
+ uint16_t UsedBytesInTheLastPage;
+ uint16_t FileSizeInPages;
+ uint16_t NumberOfRelocationItems;
+ uint16_t HeaderSizeInParagraphs;
+ uint16_t MinimumExtraParagraphs;
+ uint16_t MaximumExtraParagraphs;
+ uint16_t InitialRelativeSS;
+ uint16_t InitialSP;
+ uint16_t Checksum;
+ uint16_t InitialIP;
+ uint16_t InitialRelativeCS;
+ uint16_t AddressOfRelocationTable;
+ uint16_t OverlayNumber;
+ uint16_t Reserved[4];
+ uint16_t OEMid;
+ uint16_t OEMinfo;
+ uint16_t Reserved2[10];
+ uint32_t AddressOfNewExeHeader;
+};
+
+struct pe64_optional_header {
+ uint16_t Magic;
+ uint8_t MajorLinkerVersion;
+ uint8_t MinorLinkerVersion;
+ uint32_t SizeOfCode;
+ uint32_t SizeOfInitializedData;
+ uint32_t SizeOfUninitializedData;
+ uint32_t AddressOfEntryPoint; // RVA
+ uint32_t BaseOfCode; // RVA
+ //uint32_t BaseOfData; // RVA
+ uint64_t ImageBase;
+ uint32_t SectionAlignment;
+ uint32_t FileAlignment;
+ uint16_t MajorOperatingSystemVersion;
+ uint16_t MinorOperatingSystemVersion;
+ uint16_t MajorImageVersion;
+ uint16_t MinorImageVersion;
+ uint16_t MajorSubsystemVersion;
+ uint16_t MinorSubsystemVersion;
+ uint32_t Win32VersionValue;
+ uint32_t SizeOfImage;
+ uint32_t SizeOfHeaders;
+ uint32_t CheckSum;
+ uint16_t Subsystem;
+ uint16_t DLLCharacteristics;
+ uint64_t SizeOfStackReserve;
+ uint64_t SizeOfStackCommit;
+ uint64_t SizeOfHeapReserve;
+ uint64_t SizeOfHeapCommit;
+ uint32_t LoaderFlags;
+ uint32_t NumberOfRvaAndSize;
+};
+
+
+struct pe32_optional_header {
+ uint16_t Magic;
+ uint8_t MajorLinkerVersion;
+ uint8_t MinorLinkerVersion;
+ uint32_t SizeOfCode;
+ uint32_t SizeOfInitializedData;
+ uint32_t SizeOfUninitializedData;
+ uint32_t AddressOfEntryPoint; // RVA
+ uint32_t BaseOfCode; // RVA
+ uint32_t BaseOfData; // RVA
+ uint32_t ImageBase;
+ uint32_t SectionAlignment;
+ uint32_t FileAlignment;
+ uint16_t MajorOperatingSystemVersion;
+ uint16_t MinorOperatingSystemVersion;
+ uint16_t MajorImageVersion;
+ uint16_t MinorImageVersion;
+ uint16_t MajorSubsystemVersion;
+ uint16_t MinorSubsystemVersion;
+ uint32_t Win32VersionValue;
+ uint32_t SizeOfImage;
+ uint32_t SizeOfHeaders;
+ uint32_t CheckSum;
+ uint16_t Subsystem;
+ uint16_t DLLCharacteristics;
+ uint32_t SizeOfStackReserve;
+ uint32_t SizeOfStackCommit;
+ uint32_t SizeOfHeapReserve;
+ uint32_t SizeOfHeapCommit;
+ uint32_t LoaderFlags;
+ uint32_t NumberOfRvaAndSize;
+};
+
+
+struct pe_data_directory {
+ uint32_t RelativeVirtualAddress;
+ uint32_t Size;
+};
+
+
+struct pe_debug {
+ uint32_t Characteristics;
+ uint32_t TimeDateStamp;
+ uint16_t MajorVersion;
+ uint16_t MinorVersion;
+ uint32_t Type;
+ uint32_t SizeOfData;
+ uint32_t AddressOfRawData;
+ uint32_t PointerToRawData;
+};
+
+
+struct pe_pdb_70 {
+ uint32_t cv_signature;
+ uint8_t signature[16];
+ uint32_t age;
+ char* filename;
+};
+
+struct pe_pdb_20 {
+ uint32_t cv_signature;
+ uint32_t offset;
+ uint32_t signature;
+ uint32_t age;
+ char* filename;
+};
+
+struct pe_pogo {
+ uint32_t start_rva;
+ uint32_t size;
+ char name[1];
+};
+
+
+struct pe_resource_directory_table {
+ uint32_t Characteristics;
+ uint32_t TimeDateStamp;
+ uint16_t MajorVersion;
+ uint16_t MinorVersion;
+ uint16_t NumberOfNameEntries;
+ uint16_t NumberOfIDEntries;
+};
+
+struct pe_resource_directory_entries {
+ union {
+ uint32_t NameRVA;
+ uint32_t IntegerID;
+ } NameID;
+ uint32_t RVA;
+};
+
+struct pe_resource_data_entry {
+ uint32_t DataRVA;
+ uint32_t Size;
+ uint32_t Codepage;
+ uint32_t Reserved;
+};
+
+struct pe_resource_string {
+ int16_t Length;
+ uint16_t Name[1];
+};
+
+struct pe_resource_acceltableentry {
+ int16_t fFlags;
+ int16_t wAnsi;
+ int16_t wId;
+ int16_t padding;
+};
+
+//
+// Export structures
+//
+struct pe_export_directory_table {
+ uint32_t ExportFlags; ///< Reserverd must be 0
+ uint32_t Timestamp; ///< The time and date that the export data was created
+ uint16_t MajorVersion; ///< The Major version number
+ uint16_t MinorVersion; ///< The Minor version number
+ uint32_t NameRVA; ///< The address of the ASCII DLL's name (RVA)
+ uint32_t OrdinalBase; ///< The starting ordinal number for exports. (Usually 1)
+ uint32_t AddressTableEntries; ///< Number of entries in the export address table
+ uint32_t NumberOfNamePointers; ///< Number of entries in the name pointer table
+ uint32_t ExportAddressTableRVA; ///< Address of the export address table (RVA)
+ uint32_t NamePointerRVA; ///< Address of the name pointer table (RVA)
+ uint32_t OrdinalTableRVA; ///< Address of the ordinal table (RVA)
+};
+
+
+struct pe_resource_fixed_file_info {
+ uint32_t signature; // e.g. 0xfeef04bd
+ uint32_t struct_version; // e.g. 0x00000042 = "0.42"
+ uint32_t file_version_MS; // e.g. 0x00030075 = "3.75"
+ uint32_t file_version_LS; // e.g. 0x00000031 = "0.31"
+ uint32_t product_version_MS; // e.g. 0x00030010 = "3.10"
+ uint32_t product_version_LS; // e.g. 0x00000031 = "0.31"
+ uint32_t file_flags_mask; // = 0x3F for version "0.42"
+ uint32_t file_flags; // e.g. VFF_DEBUG | VFF_PRERELEASE
+ uint32_t file_OS; // e.g. VOS_DOS_WINDOWS16
+ uint32_t file_type; // e.g. VFT_DRIVER
+ uint32_t file_subtype; // e.g. VFT2_DRV_KEYBOARD
+ uint32_t file_date_MS; // e.g. 0
+ uint32_t file_date_LS; // e.g. 0
+};
+
+
+struct pe_resource_version_info {
+ uint16_t length;
+ uint16_t sizeof_value;
+ uint16_t type;
+ char16_t key[16];
+ // uint16_t padding;
+ //
+ // uint16_t padding;
+ // uint16_t children
+};
+
+//! Resource icons directory structure
+//! Based on https://docs.microsoft.com/en-us/windows/win32/menurc/resources-reference
+//!
+//! This is the begining of the RESOURCE_TYPES::GROUP_ICON content
+struct pe_resource_icon_dir {
+ uint16_t reserved; ///< Reserved
+ uint16_t type; ///< Resource type (1 for icons)
+ uint16_t count; ///< Number of icons
+};
+
+
+//! Structure that follows pe_resource_icon_dir in a resource entry
+struct pe_resource_icon_group {
+ uint8_t width; ///< Width, in pixels, of the image
+ uint8_t height; ///< Height, in pixels, of the image
+ uint8_t color_count; ///< Number of colors in image (0 if >=8bpp)
+ uint8_t reserved; ///< Reserved (must be 0)
+ uint16_t planes; ///< Color Planes
+ uint16_t bit_count; ///< Bits per pixel
+ uint32_t size; ///< Size of the image in bytes
+ uint16_t ID; ///< The associated ID
+};
+
+//! Structure that follows pe_resource_icon_dir in a icon **file**
+struct pe_icon_header {
+ uint8_t width; ///< Width, in pixels, of the image
+ uint8_t height; ///< Height, in pixels, of the image
+ uint8_t color_count; ///< Number of colors in image (0 if >=8bpp)
+ uint8_t reserved; ///< Reserved (must be 0)
+ uint16_t planes; ///< Color Planes
+ uint16_t bit_count; ///< Bits per pixel
+ uint32_t size; ///< Size of the image in bytes
+ uint32_t offset; ///< Offset to the pixels
+};
+
+//! Extended dialog box template
+struct pe_dialog_template_ext {
+ uint16_t version;
+ uint16_t signature;
+ uint32_t help_id;
+ uint32_t ext_style;
+ uint32_t style;
+ uint16_t nbof_items;
+ int16_t x;
+ int16_t y;
+ int16_t cx;
+ int16_t cy;
+ // sz_Or_Ord menu;
+ // sz_Or_Ord windowClass;
+ // char16_t title[titleLen];
+ // uint16_t pointsize;
+ // uint16_t weight;
+ // uint8_t italic;
+ // uint8_t charset;
+ // char16_t typeface[stringLen];
+};
+
+//! Dialog box template
+struct pe_dialog_template {
+ uint32_t style;
+ uint32_t ext_style;
+ uint16_t nbof_items;
+ int16_t x;
+ int16_t y;
+ int16_t cx;
+ int16_t cy;
+};
+
+
+//! Extended dialog box template item
+struct pe_dialog_item_template_ext {
+ uint32_t help_id;
+ uint32_t ext_style;
+ uint32_t style;
+ int16_t x;
+ int16_t y;
+ int16_t cx;
+ int16_t cy;
+ uint32_t id;
+ // sz_Or_Ord windowClass;
+ // sz_Or_Ord title;
+ // uint16_t extra_count;
+};
+
+
+//! Dialog box template item
+struct pe_dialog_item_template {
+ uint32_t style;
+ uint32_t ext_style;
+ int16_t x;
+ int16_t y;
+ int16_t cx;
+ int16_t cy;
+ uint16_t id;
+};
+
+struct pe_code_integrity {
+ uint16_t Flags;
+ uint16_t Catalog;
+ uint32_t CatalogOffset;
+ uint32_t Reserved;
+};
+
+struct pe_exception_entry_x64 {
+ uint32_t address_start_rva;
+ uint32_t address_end_rva;
+ uint32_t unwind_info_rva;
+};
+
+
+struct pe_exception_entry_mips {
+ uint32_t address_start_va;
+ uint32_t address_end_va;
+ uint32_t exception_handler;
+ uint32_t handler_data;
+ uint32_t prolog_end_address;
+};
+
+struct pe_exception_entry_arm {
+ uint32_t address_start_va;
+ uint32_t data;
+};
+
+#pragma pack(pop)
diff --git a/third_party/zlib b/third_party/zlib
new file mode 160000
index 0000000..cacf7f1
--- /dev/null
+++ b/third_party/zlib
@@ -0,0 +1 @@
+Subproject commit cacf7f1d4e3d44d871b605da3b647f07d718623f