Updating branches/google/testing to r297704

git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/branches/google/testing@298145 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5b1591e..cbde831 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -142,9 +142,16 @@
 append_list_if(COMPILER_RT_HAS_FNO_SANITIZE_SAFE_STACK_FLAG -fno-sanitize=safe-stack SANITIZER_COMMON_CFLAGS)
 append_list_if(COMPILER_RT_HAS_FVISIBILITY_HIDDEN_FLAG -fvisibility=hidden SANITIZER_COMMON_CFLAGS)
 append_list_if(COMPILER_RT_HAS_FVISIBILITY_INLINES_HIDDEN_FLAG -fvisibility-inlines-hidden SANITIZER_COMMON_CFLAGS)
-append_list_if(COMPILER_RT_HAS_FNO_FUNCTION_SECTIONS_FLAG -fno-function-sections SANITIZER_COMMON_CFLAGS)
 append_list_if(COMPILER_RT_HAS_FNO_LTO_FLAG -fno-lto SANITIZER_COMMON_CFLAGS)
 
+# The following is a workaround for powerpc64le. This is the only architecture
+# that requires -fno-function-sections to work properly. If lacking, the ASan
+# Linux test function-sections-are-bad.cc fails with the following error:
+# 'undefined symbol: __sanitizer_unaligned_load32'.
+if(DEFINED TARGET_powerpc64le_CFLAGS)
+  append_list_if(COMPILER_RT_HAS_FNO_FUNCTION_SECTIONS_FLAG -fno-function-sections TARGET_powerpc64le_CFLAGS)
+endif()
+
 if(MSVC)
   # Replace the /M[DT][d] flags with /MT, and strip any definitions of _DEBUG,
   # which cause definition mismatches at link time.
@@ -225,6 +232,17 @@
 # Warnings to turn off for all libraries, not just sanitizers.
 append_string_if(COMPILER_RT_HAS_WUNUSED_PARAMETER_FLAG -Wno-unused-parameter CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
 
+if (CMAKE_LINKER MATCHES "link.exe$")
+  # Silence MSVC linker warnings caused by empty object files. The
+  # sanitizer libraries intentionally use ifdefs that result in empty
+  # files, rather than skipping these files in the build system.
+  # Ideally, we would pass this flag only for the libraries that need
+  # it, but CMake doesn't seem to have a way to set linker flags for
+  # individual static libraries, so we enable the suppression flag for
+  # the whole compiler-rt project.
+  append("/IGNORE:4221" CMAKE_STATIC_LINKER_FLAGS)
+endif()
+
 add_subdirectory(include)
 
 set(COMPILER_RT_LIBCXX_PATH ${LLVM_MAIN_SRC_DIR}/projects/libcxx)
diff --git a/cmake/Modules/AddCompilerRT.cmake b/cmake/Modules/AddCompilerRT.cmake
index 42ae7ad..d4533e6 100644
--- a/cmake/Modules/AddCompilerRT.cmake
+++ b/cmake/Modules/AddCompilerRT.cmake
@@ -94,7 +94,7 @@
 #                         OS <os list>
 #                         SOURCES <source files>
 #                         CFLAGS <compile flags>
-#                         LINKFLAGS <linker flags>
+#                         LINK_FLAGS <linker flags>
 #                         DEFS <compile definitions>
 #                         LINK_LIBS <linked libraries> (only for shared library)
 #                         OBJECT_LIBS <object libraries to use as sources>
@@ -107,7 +107,7 @@
   cmake_parse_arguments(LIB
     ""
     "PARENT_TARGET"
-    "OS;ARCHS;SOURCES;CFLAGS;LINKFLAGS;DEFS;LINK_LIBS;OBJECT_LIBS"
+    "OS;ARCHS;SOURCES;CFLAGS;LINK_FLAGS;DEFS;LINK_LIBS;OBJECT_LIBS"
     ${ARGN})
   set(libnames)
   if(APPLE)
@@ -116,7 +116,7 @@
         set(libname "${name}_${os}")
       else()
         set(libname "${name}_${os}_dynamic")
-        set(extra_linkflags_${libname} ${DARWIN_${os}_LINKFLAGS} ${LIB_LINKFLAGS})
+        set(extra_link_flags_${libname} ${DARWIN_${os}_LINK_FLAGS} ${LIB_LINK_FLAGS})
       endif()
       list_intersect(LIB_ARCHS_${libname} DARWIN_${os}_ARCHS LIB_ARCHS)
       if(LIB_ARCHS_${libname})
@@ -139,7 +139,7 @@
       else()
         set(libname "${name}-dynamic-${arch}")
         set(extra_cflags_${libname} ${TARGET_${arch}_CFLAGS} ${LIB_CFLAGS})
-        set(extra_linkflags_${libname} ${TARGET_${arch}_LINKFLAGS} ${LIB_LINKFLAGS})
+        set(extra_link_flags_${libname} ${TARGET_${arch}_LINK_FLAGS} ${LIB_LINK_FLAGS})
         if(WIN32)
           set(output_name_${libname} ${name}_dynamic-${arch}${COMPILER_RT_OS_SUFFIX})
         else()
@@ -188,7 +188,7 @@
 
     add_library(${libname} ${type} ${sources_${libname}})
     set_target_compile_flags(${libname} ${extra_cflags_${libname}})
-    set_target_link_flags(${libname} ${extra_linkflags_${libname}})
+    set_target_link_flags(${libname} ${extra_link_flags_${libname}})
     set_property(TARGET ${libname} APPEND PROPERTY
                 COMPILE_DEFINITIONS ${LIB_DEFS})
     set_target_output_directories(${libname} ${COMPILER_RT_LIBRARY_OUTPUT_DIR})
@@ -243,7 +243,7 @@
 # when cross compiling, COMPILER_RT_TEST_COMPILER_CFLAGS help
 # in compilation and linking of unittests.
 string(REPLACE " " ";" COMPILER_RT_UNITTEST_CFLAGS "${COMPILER_RT_TEST_COMPILER_CFLAGS}")
-set(COMPILER_RT_UNITTEST_LINKFLAGS ${COMPILER_RT_UNITTEST_CFLAGS})
+set(COMPILER_RT_UNITTEST_LINK_FLAGS ${COMPILER_RT_UNITTEST_CFLAGS})
 
 # Unittests support.
 set(COMPILER_RT_GTEST_PATH ${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest)
@@ -256,6 +256,7 @@
 )
 
 append_list_if(COMPILER_RT_DEBUG -DSANITIZER_DEBUG=1 COMPILER_RT_UNITTEST_CFLAGS)
+append_list_if(COMPILER_RT_HAS_WCOVERED_SWITCH_DEFAULT_FLAG -Wno-covered-switch-default COMPILER_RT_UNITTEST_CFLAGS)
 
 if(MSVC)
   # clang doesn't support exceptions on Windows yet.
@@ -379,6 +380,7 @@
                -DCMAKE_BUILD_TYPE=Release
                -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
                -DLLVM_PATH=${LLVM_MAIN_SRC_DIR}
+               -DLIBCXX_STANDALONE_BUILD=On
     LOG_BUILD 1
     LOG_CONFIGURE 1
     LOG_INSTALL 1
diff --git a/cmake/Modules/CompilerRTDarwinUtils.cmake b/cmake/Modules/CompilerRTDarwinUtils.cmake
index 28d3986..3c89381 100644
--- a/cmake/Modules/CompilerRTDarwinUtils.cmake
+++ b/cmake/Modules/CompilerRTDarwinUtils.cmake
@@ -66,7 +66,7 @@
     file(WRITE ${SIMPLE_C} "#include <stdio.h>\nint main() { printf(__FILE__); return 0; }\n")
   
     set(os_linker_flags)
-    foreach(flag ${DARWIN_${os}_LINKFLAGS})
+    foreach(flag ${DARWIN_${os}_LINK_FLAGS})
       set(os_linker_flags "${os_linker_flags} ${flag}")
     endforeach()
   endif()
diff --git a/cmake/Modules/CompilerRTLink.cmake b/cmake/Modules/CompilerRTLink.cmake
index bb96869..05c535f 100644
--- a/cmake/Modules/CompilerRTLink.cmake
+++ b/cmake/Modules/CompilerRTLink.cmake
@@ -1,16 +1,16 @@
 # Link a shared library with COMPILER_RT_TEST_COMPILER.
 # clang_link_shared(<output.so>
 #                   OBJECTS <list of input objects>
-#                   LINKFLAGS <list of link flags>
+#                   LINK_FLAGS <list of link flags>
 #                   DEPS <list of dependencies>)
 macro(clang_link_shared so_file)
-  cmake_parse_arguments(SOURCE "" "" "OBJECTS;LINKFLAGS;DEPS" ${ARGN})
+  cmake_parse_arguments(SOURCE "" "" "OBJECTS;LINK_FLAGS;DEPS" ${ARGN})
   if(NOT COMPILER_RT_STANDALONE_BUILD)
     list(APPEND SOURCE_DEPS clang)
   endif()
   add_custom_command(
     OUTPUT ${so_file}
     COMMAND ${COMPILER_RT_TEST_COMPILER} -o "${so_file}" -shared
-            ${SOURCE_LINKFLAGS} ${SOURCE_OBJECTS}
+            ${SOURCE_LINK_FLAGS} ${SOURCE_OBJECTS}
     DEPENDS ${SOURCE_DEPS})
 endmacro()
diff --git a/cmake/Modules/CompilerRTUtils.cmake b/cmake/Modules/CompilerRTUtils.cmake
index ae389ba..ed946d8 100644
--- a/cmake/Modules/CompilerRTUtils.cmake
+++ b/cmake/Modules/CompilerRTUtils.cmake
@@ -100,6 +100,13 @@
   set(${out_var} ${archs} PARENT_SCOPE)
 endfunction()
 
+# Add $arch as supported with no additional flags.
+macro(add_default_target_arch arch)
+  set(TARGET_${arch}_CFLAGS "")
+  set(CAN_TARGET_${arch} 1)
+  list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
+endmacro()
+
 function(check_compile_definition def argstring out_var)
   if("${def}" STREQUAL "")
     set(${out_var} TRUE PARENT_SCOPE)
@@ -119,7 +126,7 @@
 # If successful, saves target flags for this architecture.
 macro(test_target_arch arch def)
   set(TARGET_${arch}_CFLAGS ${ARGN})
-  set(TARGET_${arch}_LINKFLAGS ${ARGN})
+  set(TARGET_${arch}_LINK_FLAGS ${ARGN})
   set(argstring "")
   foreach(arg ${ARGN})
     set(argstring "${argstring} ${arg}")
@@ -212,8 +219,19 @@
   set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree")
 
   # Make use of LLVM CMake modules.
-  file(TO_CMAKE_PATH ${LLVM_BINARY_DIR} LLVM_BINARY_DIR_CMAKE_STYLE)
-  set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm")
+  # --cmakedir is supported since llvm r291218 (4.0 release)
+  execute_process(
+    COMMAND ${LLVM_CONFIG_PATH} --cmakedir
+    RESULT_VARIABLE HAD_ERROR
+    OUTPUT_VARIABLE CONFIG_OUTPUT)
+  if(NOT HAD_ERROR)
+    string(STRIP "${CONFIG_OUTPUT}" LLVM_CMAKE_PATH_FROM_LLVM_CONFIG)
+    file(TO_CMAKE_PATH ${LLVM_CMAKE_PATH_FROM_LLVM_CONFIG} LLVM_CMAKE_PATH)
+  else()
+    file(TO_CMAKE_PATH ${LLVM_BINARY_DIR} LLVM_BINARY_DIR_CMAKE_STYLE)
+    set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm")
+  endif()
+
   list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}")
   # Get some LLVM variables from LLVMConfig.
   include("${LLVM_CMAKE_PATH}/LLVMConfig.cmake")
@@ -223,8 +241,16 @@
 endmacro()
 
 macro(construct_compiler_rt_default_triple)
-  set(COMPILER_RT_DEFAULT_TARGET_TRIPLE ${TARGET_TRIPLE} CACHE STRING
-      "Default triple for which compiler-rt runtimes will be built.")
+  if(COMPILER_RT_DEFAULT_TARGET_ONLY)
+    if(DEFINED COMPILER_RT_DEFAULT_TARGET_TRIPLE)
+      message(FATAL_ERROR "COMPILER_RT_DEFAULT_TARGET_TRIPLE isn't supported when building for default target only")
+    endif()
+    set(COMPILER_RT_DEFAULT_TARGET_TRIPLE ${CMAKE_C_COMPILER_TARGET})
+  else()
+    set(COMPILER_RT_DEFAULT_TARGET_TRIPLE ${TARGET_TRIPLE} CACHE STRING
+          "Default triple for which compiler-rt runtimes will be built.")
+  endif()
+
   if(DEFINED COMPILER_RT_TEST_TARGET_TRIPLE)
     # Backwards compatibility: this variable used to be called
     # COMPILER_RT_TEST_TARGET_TRIPLE.
@@ -234,7 +260,10 @@
   string(REPLACE "-" ";" TARGET_TRIPLE_LIST ${COMPILER_RT_DEFAULT_TARGET_TRIPLE})
   list(GET TARGET_TRIPLE_LIST 0 COMPILER_RT_DEFAULT_TARGET_ARCH)
   list(GET TARGET_TRIPLE_LIST 1 COMPILER_RT_DEFAULT_TARGET_OS)
-  list(GET TARGET_TRIPLE_LIST 2 COMPILER_RT_DEFAULT_TARGET_ABI)
+  list(LENGTH TARGET_TRIPLE_LIST TARGET_TRIPLE_LIST_LENGTH)
+  if(TARGET_TRIPLE_LIST_LENGTH GREATER 2)
+    list(GET TARGET_TRIPLE_LIST 2 COMPILER_RT_DEFAULT_TARGET_ABI)
+  endif()
   # Determine if test target triple is specified explicitly, and doesn't match the
   # default.
   if(NOT COMPILER_RT_DEFAULT_TARGET_TRIPLE STREQUAL TARGET_TRIPLE)
diff --git a/cmake/Modules/SanitizerUtils.cmake b/cmake/Modules/SanitizerUtils.cmake
index c66083c..c80fc3b 100644
--- a/cmake/Modules/SanitizerUtils.cmake
+++ b/cmake/Modules/SanitizerUtils.cmake
@@ -46,6 +46,17 @@
   endforeach()
 endmacro()
 
+# This function is only used on Darwin, where undefined symbols must be specified
+# in the linker invocation.
+function(add_weak_symbols libname link_flags)
+  file(STRINGS "${COMPILER_RT_SOURCE_DIR}/lib/${libname}/weak_symbols.txt" WEAK_SYMBOLS)
+  set(local_link_flags ${${link_flags}})
+  foreach(SYMBOL ${WEAK_SYMBOLS})
+    set(local_link_flags ${local_link_flags} -Wl,-U,${SYMBOL})
+  endforeach()
+  set(${link_flags} ${local_link_flags} PARENT_SCOPE)
+endfunction()
+
 macro(add_sanitizer_rt_version_list name)
   set(vers ${CMAKE_CURRENT_BINARY_DIR}/${name}.vers)
   cmake_parse_arguments(ARG "" "" "LIBS;EXTRA" ${ARGN})
diff --git a/cmake/base-config-ix.cmake b/cmake/base-config-ix.cmake
index 9780867..6f9f151 100644
--- a/cmake/base-config-ix.cmake
+++ b/cmake/base-config-ix.cmake
@@ -84,6 +84,8 @@
   option(COMPILER_RT_ENABLE_IOS "Enable building for iOS" On)
   option(COMPILER_RT_ENABLE_WATCHOS "Enable building for watchOS - Experimental" Off)
   option(COMPILER_RT_ENABLE_TVOS "Enable building for tvOS - Experimental" Off)
+else()
+  option(COMPILER_RT_DEFAULT_TARGET_ONLY "Build builtins only for the default target" Off)
 endif()
 
 if(WIN32 AND NOT MINGW AND NOT CYGWIN)
@@ -127,7 +129,9 @@
     detect_target_arch()
     set(COMPILER_RT_OS_SUFFIX "-android")
   elseif(NOT APPLE) # Supported archs for Apple platforms are generated later
-    if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "i[2-6]86|x86|amd64")
+    if(COMPILER_RT_DEFAULT_TARGET_ONLY)
+      add_default_target_arch(${COMPILER_RT_DEFAULT_TARGET_ARCH})
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "i[2-6]86|x86|amd64")
       if(NOT MSVC)
         test_target_arch(x86_64 "" "-m64")
         # FIXME: We build runtimes for both i686 and i386, as "clang -m32" may
@@ -168,6 +172,7 @@
       else()
         test_target_arch(arm "" "-march=armv7-a" "-mfloat-abi=soft")
         test_target_arch(armhf "" "-march=armv7-a" "-mfloat-abi=hard")
+        test_target_arch(armv6m "" "-march=armv6m" "-mfloat-abi=soft")
       endif()
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "aarch32")
       test_target_arch(aarch32 "" "-march=armv8-a")
diff --git a/cmake/builtin-config-ix.cmake b/cmake/builtin-config-ix.cmake
index 8cb4ca1..dc2ec16 100644
--- a/cmake/builtin-config-ix.cmake
+++ b/cmake/builtin-config-ix.cmake
@@ -24,7 +24,7 @@
 
 
 set(ARM64 aarch64)
-set(ARM32 arm armhf)
+set(ARM32 arm armhf armv6m)
 set(X86 i386 i686)
 set(X86_64 x86_64)
 set(MIPS32 mips mipsel)
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index e18ec40..26eb532 100644
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -31,6 +31,7 @@
 check_cxx_compiler_flag("-Werror -msse3" COMPILER_RT_HAS_MSSE3_FLAG)
 check_cxx_compiler_flag("-Werror -msse4.2"   COMPILER_RT_HAS_MSSE4_2_FLAG)
 check_cxx_compiler_flag(--sysroot=.          COMPILER_RT_HAS_SYSROOT_FLAG)
+check_cxx_compiler_flag("-Werror -mcrc"      COMPILER_RT_HAS_MCRC_FLAG)
 
 if(NOT WIN32 AND NOT CYGWIN)
   # MinGW warns if -fvisibility-inlines-hidden is used.
@@ -57,6 +58,7 @@
 check_cxx_compiler_flag("-Werror -Wnon-virtual-dtor"   COMPILER_RT_HAS_WNON_VIRTUAL_DTOR_FLAG)
 check_cxx_compiler_flag("-Werror -Wvariadic-macros"    COMPILER_RT_HAS_WVARIADIC_MACROS_FLAG)
 check_cxx_compiler_flag("-Werror -Wunused-parameter"   COMPILER_RT_HAS_WUNUSED_PARAMETER_FLAG)
+check_cxx_compiler_flag("-Werror -Wcovered-switch-default" COMPILER_RT_HAS_WCOVERED_SWITCH_DEFAULT_FLAG)
 
 check_cxx_compiler_flag(/W4 COMPILER_RT_HAS_W4_FLAG)
 check_cxx_compiler_flag(/WX COMPILER_RT_HAS_WX_FLAG)
@@ -96,13 +98,6 @@
 set(SIMPLE_SOURCE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple.cc)
 file(WRITE ${SIMPLE_SOURCE} "#include <stdlib.h>\n#include <stdio.h>\nint main() { printf(\"hello, world\"); }\n")
 
-# Add $arch as supported with no additional flags.
-macro(add_default_target_arch arch)
-  set(TARGET_${arch}_CFLAGS "")
-  set(CAN_TARGET_${arch} 1)
-  list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
-endmacro()
-
 # Detect whether the current target platform is 32-bit or 64-bit, and setup
 # the correct commandline flags needed to attempt to target 32-bit and 64-bit.
 if (NOT CMAKE_SIZEOF_VOID_P EQUAL 4 AND
@@ -169,7 +164,15 @@
 set(ALL_ASAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
     ${MIPS32} ${MIPS64} ${PPC64} ${S390X})
 set(ALL_DFSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64})
-set(ALL_LSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64})
+
+# Darwin does not support 32-bit thread-local storage on ios versions
+# below 9.0. Until the min ios version is bumped to 9.0, lsan will
+# not build for 32-bit darwin targets.
+if(APPLE)
+  set(ALL_LSAN_SUPPORTED_ARCH ${X86_64} ${ARM64})
+else()
+  set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64})
+endif()
 set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64})
 set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC64}
     ${MIPS32} ${MIPS64} ${S390X})
@@ -179,8 +182,8 @@
 set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
 set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
 set(ALL_ESAN_SUPPORTED_ARCH ${X86_64} ${MIPS64})
-set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64})
-set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64})
+set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64})
+set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} powerpc64le)
 
 if(APPLE)
   include(CompilerRTDarwinUtils)
@@ -248,26 +251,26 @@
   set(CMAKE_OSX_DEPLOYMENT_TARGET "")
   
   set(DARWIN_COMMON_CFLAGS -stdlib=libc++)
-  set(DARWIN_COMMON_LINKFLAGS
+  set(DARWIN_COMMON_LINK_FLAGS
     -stdlib=libc++
     -lc++
     -lc++abi)
   
   check_linker_flag("-fapplication-extension" COMPILER_RT_HAS_APP_EXTENSION)
   if(COMPILER_RT_HAS_APP_EXTENSION)
-    list(APPEND DARWIN_COMMON_LINKFLAGS "-fapplication-extension")
+    list(APPEND DARWIN_COMMON_LINK_FLAGS "-fapplication-extension")
   endif()
 
   set(DARWIN_osx_CFLAGS
     ${DARWIN_COMMON_CFLAGS}
     -mmacosx-version-min=${SANITIZER_MIN_OSX_VERSION})
-  set(DARWIN_osx_LINKFLAGS
-    ${DARWIN_COMMON_LINKFLAGS}
+  set(DARWIN_osx_LINK_FLAGS
+    ${DARWIN_COMMON_LINK_FLAGS}
     -mmacosx-version-min=${SANITIZER_MIN_OSX_VERSION})
 
   if(DARWIN_osx_SYSROOT)
     list(APPEND DARWIN_osx_CFLAGS -isysroot ${DARWIN_osx_SYSROOT})
-    list(APPEND DARWIN_osx_LINKFLAGS -isysroot ${DARWIN_osx_SYSROOT})
+    list(APPEND DARWIN_osx_LINK_FLAGS -isysroot ${DARWIN_osx_SYSROOT})
   endif()
 
   # Figure out which arches to use for each OS
@@ -290,8 +293,8 @@
           ${DARWIN_COMMON_CFLAGS}
           ${DARWIN_${platform}_SANITIZER_MIN_VER_FLAG}
           -isysroot ${DARWIN_${platform}sim_SYSROOT})
-        set(DARWIN_${platform}sim_LINKFLAGS
-          ${DARWIN_COMMON_LINKFLAGS}
+        set(DARWIN_${platform}sim_LINK_FLAGS
+          ${DARWIN_COMMON_LINK_FLAGS}
           ${DARWIN_${platform}_SANITIZER_MIN_VER_FLAG}
           -isysroot ${DARWIN_${platform}sim_SYSROOT})
 
@@ -318,8 +321,8 @@
           ${DARWIN_COMMON_CFLAGS}
           ${DARWIN_${platform}_SANITIZER_MIN_VER_FLAG}
           -isysroot ${DARWIN_${platform}_SYSROOT})
-        set(DARWIN_${platform}_LINKFLAGS
-          ${DARWIN_COMMON_LINKFLAGS}
+        set(DARWIN_${platform}_LINK_FLAGS
+          ${DARWIN_COMMON_LINK_FLAGS}
           ${DARWIN_${platform}_SANITIZER_MIN_VER_FLAG}
           -isysroot ${DARWIN_${platform}_SYSROOT})
 
@@ -424,7 +427,9 @@
 
 find_program(GOLD_EXECUTABLE NAMES ${LLVM_DEFAULT_TARGET_TRIPLE}-ld.gold ld.gold ${LLVM_DEFAULT_TARGET_TRIPLE}-ld ld DOC "The gold linker")
 
-list(REMOVE_DUPLICATES COMPILER_RT_SUPPORTED_ARCH)
+if(COMPILER_RT_SUPPORTED_ARCH)
+  list(REMOVE_DUPLICATES COMPILER_RT_SUPPORTED_ARCH)
+endif()
 message(STATUS "Compiler-RT supported architectures: ${COMPILER_RT_SUPPORTED_ARCH}")
 
 if(ANDROID)
@@ -480,6 +485,13 @@
   set(COMPILER_RT_HAS_LSAN FALSE)
 endif()
 
+if(APPLE)
+  option(COMPILER_RT_ENABLE_LSAN_OSX "Enable building LSan for OS X - Experimental" Off)
+  if(COMPILER_RT_ENABLE_LSAN_OSX)
+    set(COMPILER_RT_HAS_LSAN TRUE)
+  endif()
+endif()
+
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND MSAN_SUPPORTED_ARCH AND
     OS_NAME MATCHES "Linux")
   set(COMPILER_RT_HAS_MSAN TRUE)
@@ -488,21 +500,21 @@
 endif()
 
 if (PROFILE_SUPPORTED_ARCH AND NOT LLVM_USE_SANITIZER AND
-    OS_NAME MATCHES "Darwin|Linux|FreeBSD|Windows")
+    OS_NAME MATCHES "Darwin|Linux|FreeBSD|Windows|Android")
   set(COMPILER_RT_HAS_PROFILE TRUE)
 else()
   set(COMPILER_RT_HAS_PROFILE FALSE)
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND TSAN_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Darwin|Linux|FreeBSD")
+    OS_NAME MATCHES "Darwin|Linux|FreeBSD|Android")
   set(COMPILER_RT_HAS_TSAN TRUE)
 else()
   set(COMPILER_RT_HAS_TSAN FALSE)
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND UBSAN_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Darwin|Linux|FreeBSD|Windows")
+    OS_NAME MATCHES "Darwin|Linux|FreeBSD|Windows|Android")
   set(COMPILER_RT_HAS_UBSAN TRUE)
 else()
   set(COMPILER_RT_HAS_UBSAN FALSE)
diff --git a/include/sanitizer/coverage_interface.h b/include/sanitizer/coverage_interface.h
index ecbc502..b44c5ac 100644
--- a/include/sanitizer/coverage_interface.h
+++ b/include/sanitizer/coverage_interface.h
@@ -23,6 +23,11 @@
   void __sanitizer_cov_init();
   // Record and dump coverage info.
   void __sanitizer_cov_dump();
+
+  //  Dump collected coverage info. Sorts pcs by module into individual
+  //  .sancov files.
+  void __sanitizer_dump_coverage(const uintptr_t *pcs, uintptr_t len);
+
   // Open <name>.sancov.packed in the coverage directory and return the file
   // descriptor. Returns -1 on failure, or if coverage dumping is disabled.
   // This is intended for use by sandboxing code.
diff --git a/include/xray/xray_interface.h b/include/xray/xray_interface.h
index 9e712b1..52a7e1d 100644
--- a/include/xray/xray_interface.h
+++ b/include/xray/xray_interface.h
@@ -18,7 +18,13 @@
 
 extern "C" {
 
-enum XRayEntryType { ENTRY = 0, EXIT = 1, TAIL = 2 };
+// Synchronize this with AsmPrinter::SledKind in LLVM.
+enum XRayEntryType {
+  ENTRY = 0,
+  EXIT = 1,
+  TAIL = 2,
+  LOG_ARGS_ENTRY = 3,
+};
 
 // Provide a function to invoke for when instrumentation points are hit. This is
 // a user-visible control surface that overrides the default implementation. The
@@ -60,6 +66,17 @@
 // Reverses the effect of __xray_patch(). See XRayPatchingStatus for possible
 // result values.
 extern XRayPatchingStatus __xray_unpatch();
+
+// Use XRay to log the first argument of each (instrumented) function call.
+// When this function exits, all threads will have observed the effect and
+// start logging their subsequent affected function calls (if patched).
+//
+// Returns 1 on success, 0 on error.
+extern int __xray_set_handler_arg1(void (*)(int32_t, XRayEntryType, uint64_t));
+
+// Disables the XRay handler used to log first arguments of function calls.
+// Returns 1 on success, 0 on error.
+extern int __xray_remove_handler_arg1();
 }
 
 #endif
diff --git a/include/xray/xray_log_interface.h b/include/xray/xray_log_interface.h
new file mode 100644
index 0000000..f98b331
--- /dev/null
+++ b/include/xray/xray_log_interface.h
@@ -0,0 +1,51 @@
+//===-- xray_log_interface.h ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+// APIs for installing a new logging implementation.
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_LOG_INTERFACE_H
+#define XRAY_XRAY_LOG_INTERFACE_H
+
+#include "xray/xray_interface.h"
+#include <stddef.h>
+
+extern "C" {
+
+enum XRayLogInitStatus {
+  XRAY_LOG_UNINITIALIZED = 0,
+  XRAY_LOG_INITIALIZING = 1,
+  XRAY_LOG_INITIALIZED = 2,
+  XRAY_LOG_FINALIZING = 3,
+  XRAY_LOG_FINALIZED = 4,
+};
+
+enum XRayLogFlushStatus {
+  XRAY_LOG_NOT_FLUSHING = 0,
+  XRAY_LOG_FLUSHING = 1,
+  XRAY_LOG_FLUSHED = 2,
+};
+
+struct XRayLogImpl {
+  XRayLogInitStatus (*log_init)(size_t, size_t, void *, size_t);
+  XRayLogInitStatus (*log_finalize)();
+  void (*handle_arg0)(int32_t, XRayEntryType);
+  XRayLogFlushStatus (*flush_log)();
+};
+
+void __xray_set_log_impl(XRayLogImpl Impl);
+XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers,
+                                  void *Args, size_t ArgsSize);
+XRayLogInitStatus __xray_log_finalize();
+XRayLogFlushStatus __xray_log_flushLog();
+
+} // extern "C"
+
+#endif // XRAY_XRAY_LOG_INTERFACE_H
diff --git a/include/xray/xray_records.h b/include/xray/xray_records.h
index 34c236b..71637d1 100644
--- a/include/xray/xray_records.h
+++ b/include/xray/xray_records.h
@@ -21,6 +21,7 @@
 
 enum FileTypes {
   NAIVE_LOG = 0,
+  FDR_LOG = 1,
 };
 
 // This data structure is used to describe the contents of the file. We use this
@@ -40,6 +41,11 @@
 
   // The frequency by which TSC increases per-second.
   alignas(8) uint64_t CycleFrequency = 0;
+
+  // The current civiltime timestamp, as retrived from 'clock_gettime'. This
+  // allows readers of the file to determine when the file was created or
+  // written down.
+  struct timespec TS;
 } __attribute__((packed));
 
 static_assert(sizeof(XRayFileHeader) == 32, "XRayFileHeader != 32 bytes");
diff --git a/lib/asan/CMakeLists.txt b/lib/asan/CMakeLists.txt
index e89d8bd..e940cc7 100644
--- a/lib/asan/CMakeLists.txt
+++ b/lib/asan/CMakeLists.txt
@@ -36,13 +36,16 @@
 include_directories(..)
 
 set(ASAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+
+# FIXME(fjricci) - remove this once lsan for darwin is fully enabled
+if(APPLE AND COMPILER_RT_HAS_LSAN)
+  set(ASAN_CFLAGS ${ASAN_CFLAGS} -DCAN_SANITIZE_LEAKS_MAC=1)
+endif()
 append_rtti_flag(OFF ASAN_CFLAGS)
 
 set(ASAN_DYNAMIC_LINK_FLAGS)
 
 if(ANDROID)
-  list(APPEND ASAN_COMMON_DEFINITIONS
-    ASAN_LOW_MEMORY=1)
 # On Android, -z global does not do what it is documented to do.
 # On Android, -z global moves the library ahead in the lookup order,
 # placing it right after the LD_PRELOADs. This is used to compensate for the fact
@@ -108,6 +111,11 @@
 add_compiler_rt_component(asan)
 
 if(APPLE)
+  add_weak_symbols("asan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("lsan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+
   add_compiler_rt_runtime(clang_rt.asan
     SHARED
     OS ${SANITIZER_COMMON_SUPPORTED_OS}
@@ -119,6 +127,7 @@
                 RTLSanCommon
                 RTUbsan
     CFLAGS ${ASAN_DYNAMIC_CFLAGS}
+    LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
     DEFS ${ASAN_DYNAMIC_DEFINITIONS}
     PARENT_TARGET asan)
 else()
@@ -173,6 +182,21 @@
       set(VERSION_SCRIPT_FLAG)
     endif()
 
+    set(ASAN_DYNAMIC_WEAK_INTERCEPTION)
+    if (MSVC)
+      add_compiler_rt_object_libraries(AsanWeakInterception
+        ${SANITIZER_COMMON_SUPPORTED_OS}
+        ARCHS ${arch}
+        SOURCES asan_win_weak_interception.cc
+        CFLAGS ${ASAN_CFLAGS} -DSANITIZER_DYNAMIC
+        DEFS ${ASAN_COMMON_DEFINITIONS})
+      set(ASAN_DYNAMIC_WEAK_INTERCEPTION
+          AsanWeakInterception
+          UbsanWeakInterception
+          SancovWeakInterception
+          SanitizerCommonWeakInterception)
+    endif()
+
     add_compiler_rt_runtime(clang_rt.asan
       SHARED
       ARCHS ${arch}
@@ -184,8 +208,9 @@
               # add_dependencies(clang_rt.asan-dynamic-${arch} clang_rt.asan-dynamic-${arch}-version-list)
               RTAsan_dynamic_version_script_dummy
               RTUbsan_cxx
+              ${ASAN_DYNAMIC_WEAK_INTERCEPTION}
       CFLAGS ${ASAN_DYNAMIC_CFLAGS}
-      LINKFLAGS ${ASAN_DYNAMIC_LINK_FLAGS}
+      LINK_FLAGS ${ASAN_DYNAMIC_LINK_FLAGS}
                 ${VERSION_SCRIPT_FLAG}
       LINK_LIBS ${ASAN_DYNAMIC_LIBS}
       DEFS ${ASAN_DYNAMIC_DEFINITIONS}
@@ -202,28 +227,46 @@
     endif()
 
     if (WIN32)
+      add_compiler_rt_object_libraries(AsanDllThunk
+        ${SANITIZER_COMMON_SUPPORTED_OS}
+        ARCHS ${arch}
+        SOURCES asan_globals_win.cc
+                asan_win_dll_thunk.cc
+        CFLAGS ${ASAN_CFLAGS} -DSANITIZER_DLL_THUNK
+        DEFS ${ASAN_COMMON_DEFINITIONS})
+
       add_compiler_rt_runtime(clang_rt.asan_dll_thunk
         STATIC
         ARCHS ${arch}
-        SOURCES asan_win_dll_thunk.cc
-                asan_globals_win.cc
-                $<TARGET_OBJECTS:RTInterception.${arch}>
-        CFLAGS ${ASAN_CFLAGS} -DASAN_DLL_THUNK
-        DEFS ${ASAN_COMMON_DEFINITIONS}
+        OBJECT_LIBS AsanDllThunk
+                    UbsanDllThunk
+                    SancovDllThunk
+                    SanitizerCommonDllThunk
+        SOURCES $<TARGET_OBJECTS:RTInterception.${arch}>
         PARENT_TARGET asan)
 
-      set(DYNAMIC_RUNTIME_THUNK_CFLAGS "-DASAN_DYNAMIC_RUNTIME_THUNK")
+      set(DYNAMIC_RUNTIME_THUNK_CFLAGS "-DSANITIZER_DYNAMIC_RUNTIME_THUNK")
       if(MSVC)
         list(APPEND DYNAMIC_RUNTIME_THUNK_CFLAGS "-Zl")
       elseif(CMAKE_C_COMPILER_ID MATCHES Clang)
         list(APPEND DYNAMIC_RUNTIME_THUNK_CFLAGS "-nodefaultlibs")
       endif()
 
+      add_compiler_rt_object_libraries(AsanDynamicRuntimeThunk
+        ${SANITIZER_COMMON_SUPPORTED_OS}
+        ARCHS ${arch}
+        SOURCES asan_globals_win.cc
+                asan_win_dynamic_runtime_thunk.cc
+        CFLAGS ${ASAN_CFLAGS} ${DYNAMIC_RUNTIME_THUNK_CFLAGS}
+        DEFS ${ASAN_COMMON_DEFINITIONS})
+
       add_compiler_rt_runtime(clang_rt.asan_dynamic_runtime_thunk
         STATIC
         ARCHS ${arch}
-        SOURCES asan_win_dynamic_runtime_thunk.cc
-                asan_globals_win.cc
+        OBJECT_LIBS AsanDynamicRuntimeThunk
+                    UbsanDynamicRuntimeThunk
+                    SancovDynamicRuntimeThunk
+                    SanitizerCommonDynamicRuntimeThunk
         CFLAGS ${ASAN_CFLAGS} ${DYNAMIC_RUNTIME_THUNK_CFLAGS}
         DEFS ${ASAN_COMMON_DEFINITIONS}
         PARENT_TARGET asan)
diff --git a/lib/asan/asan.syms.extra b/lib/asan/asan.syms.extra
index 007aafe..f8e9b3a 100644
--- a/lib/asan/asan.syms.extra
+++ b/lib/asan/asan.syms.extra
@@ -1,3 +1,4 @@
 __asan_*
 __lsan_*
 __ubsan_*
+__sancov_*
diff --git a/lib/asan/asan_activation.cc b/lib/asan/asan_activation.cc
index bb41a0e..7e4e604 100644
--- a/lib/asan/asan_activation.cc
+++ b/lib/asan/asan_activation.cc
@@ -77,12 +77,13 @@
 
   void Print() {
     Report(
-        "quarantine_size_mb %d, max_redzone %d, poison_heap %d, "
-        "malloc_context_size %d, alloc_dealloc_mismatch %d, "
-        "allocator_may_return_null %d, coverage %d, coverage_dir %s, "
-        "allocator_release_to_os_interval_ms %d\n",
-        allocator_options.quarantine_size_mb, allocator_options.max_redzone,
-        poison_heap, malloc_context_size,
+        "quarantine_size_mb %d, thread_local_quarantine_size_kb %d, "
+        "max_redzone %d, poison_heap %d, malloc_context_size %d, "
+        "alloc_dealloc_mismatch %d, allocator_may_return_null %d, coverage %d, "
+        "coverage_dir %s, allocator_release_to_os_interval_ms %d\n",
+        allocator_options.quarantine_size_mb,
+        allocator_options.thread_local_quarantine_size_kb,
+        allocator_options.max_redzone, poison_heap, malloc_context_size,
         allocator_options.alloc_dealloc_mismatch,
         allocator_options.may_return_null, coverage, coverage_dir,
         allocator_options.release_to_os_interval_ms);
@@ -109,6 +110,7 @@
 
   AllocatorOptions disabled = asan_deactivated_flags.allocator_options;
   disabled.quarantine_size_mb = 0;
+  disabled.thread_local_quarantine_size_kb = 0;
   disabled.min_redzone = 16;  // Redzone must be at least 16 bytes long.
   disabled.max_redzone = 16;
   disabled.alloc_dealloc_mismatch = false;
diff --git a/lib/asan/asan_activation_flags.inc b/lib/asan/asan_activation_flags.inc
index 67440e6..1c66e5b 100644
--- a/lib/asan/asan_activation_flags.inc
+++ b/lib/asan/asan_activation_flags.inc
@@ -24,6 +24,7 @@
 ASAN_ACTIVATION_FLAG(int, redzone)
 ASAN_ACTIVATION_FLAG(int, max_redzone)
 ASAN_ACTIVATION_FLAG(int, quarantine_size_mb)
+ASAN_ACTIVATION_FLAG(int, thread_local_quarantine_size_kb)
 ASAN_ACTIVATION_FLAG(bool, alloc_dealloc_mismatch)
 ASAN_ACTIVATION_FLAG(bool, poison_heap)
 
diff --git a/lib/asan/asan_allocator.cc b/lib/asan/asan_allocator.cc
index baba537..4be1f1c 100644
--- a/lib/asan/asan_allocator.cc
+++ b/lib/asan/asan_allocator.cc
@@ -207,6 +207,7 @@
 
 void AllocatorOptions::SetFrom(const Flags *f, const CommonFlags *cf) {
   quarantine_size_mb = f->quarantine_size_mb;
+  thread_local_quarantine_size_kb = f->thread_local_quarantine_size_kb;
   min_redzone = f->redzone;
   max_redzone = f->max_redzone;
   may_return_null = cf->allocator_may_return_null;
@@ -216,6 +217,7 @@
 
 void AllocatorOptions::CopyTo(Flags *f, CommonFlags *cf) {
   f->quarantine_size_mb = quarantine_size_mb;
+  f->thread_local_quarantine_size_kb = thread_local_quarantine_size_kb;
   f->redzone = min_redzone;
   f->max_redzone = max_redzone;
   cf->allocator_may_return_null = may_return_null;
@@ -226,8 +228,6 @@
 struct Allocator {
   static const uptr kMaxAllowedMallocSize =
       FIRST_32_SECOND_64(3UL << 30, 1ULL << 40);
-  static const uptr kMaxThreadLocalQuarantine =
-      FIRST_32_SECOND_64(1 << 18, 1 << 20);
 
   AsanAllocator allocator;
   AsanQuarantine quarantine;
@@ -256,7 +256,7 @@
   void SharedInitCode(const AllocatorOptions &options) {
     CheckOptions(options);
     quarantine.Init((uptr)options.quarantine_size_mb << 20,
-                    kMaxThreadLocalQuarantine);
+                    (uptr)options.thread_local_quarantine_size_kb << 10);
     atomic_store(&alloc_dealloc_mismatch, options.alloc_dealloc_mismatch,
                  memory_order_release);
     atomic_store(&min_redzone, options.min_redzone, memory_order_release);
@@ -269,24 +269,24 @@
   }
 
   void RePoisonChunk(uptr chunk) {
-    // This could a user-facing chunk (with redzones), or some internal
+    // This could be a user-facing chunk (with redzones), or some internal
     // housekeeping chunk, like TransferBatch. Start by assuming the former.
     AsanChunk *ac = GetAsanChunk((void *)chunk);
     uptr allocated_size = allocator.GetActuallyAllocatedSize((void *)ac);
     uptr beg = ac->Beg();
     uptr end = ac->Beg() + ac->UsedSize(true);
     uptr chunk_end = chunk + allocated_size;
-    if (chunk < beg && beg < end && end <= chunk_end) {
-      // Looks like a valid AsanChunk. Or maybe not. Be conservative and only
-      // poison the redzones.
+    if (chunk < beg && beg < end && end <= chunk_end &&
+        ac->chunk_state == CHUNK_ALLOCATED) {
+      // Looks like a valid AsanChunk in use, poison redzones only.
       PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic);
       uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY);
       FastPoisonShadowPartialRightRedzone(
           end_aligned_down, end - end_aligned_down,
           chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic);
     } else {
-      // This can not be an AsanChunk. Poison everything. It may be reused as
-      // AsanChunk later.
+      // This is either not an AsanChunk or freed or quarantined AsanChunk.
+      // In either case, poison everything.
       PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic);
     }
   }
@@ -310,6 +310,7 @@
 
   void GetOptions(AllocatorOptions *options) const {
     options->quarantine_size_mb = quarantine.GetSize() >> 20;
+    options->thread_local_quarantine_size_kb = quarantine.GetCacheSize() >> 10;
     options->min_redzone = atomic_load(&min_redzone, memory_order_acquire);
     options->max_redzone = atomic_load(&max_redzone, memory_order_acquire);
     options->may_return_null = allocator.MayReturnNull();
@@ -553,7 +554,17 @@
     uptr chunk_beg = p - kChunkHeaderSize;
     AsanChunk *m = reinterpret_cast<AsanChunk *>(chunk_beg);
 
+    // On Windows, uninstrumented DLLs may allocate memory before ASan hooks
+    // malloc. Don't report an invalid free in this case.
+    if (SANITIZER_WINDOWS &&
+        !get_allocator().PointerIsMine(ptr)) {
+      if (!IsSystemHeapAddress(p))
+        ReportFreeNotMalloced(p, stack);
+      return;
+    }
+
     ASAN_FREE_HOOK(ptr);
+
     // Must mark the chunk as quarantined before any changes to its metadata.
     // Do not quarantine given chunk if we failed to set CHUNK_QUARANTINE flag.
     if (!AtomicallySetQuarantineFlagIfAllocated(m, ptr, stack)) return;
@@ -680,6 +691,7 @@
 
   void PrintStats() {
     allocator.PrintStats();
+    quarantine.PrintStats();
   }
 
   void ForceLock() {
@@ -699,18 +711,21 @@
   return instance.allocator;
 }
 
-bool AsanChunkView::IsValid() {
+bool AsanChunkView::IsValid() const {
   return chunk_ && chunk_->chunk_state != CHUNK_AVAILABLE;
 }
-bool AsanChunkView::IsAllocated() {
+bool AsanChunkView::IsAllocated() const {
   return chunk_ && chunk_->chunk_state == CHUNK_ALLOCATED;
 }
-uptr AsanChunkView::Beg() { return chunk_->Beg(); }
-uptr AsanChunkView::End() { return Beg() + UsedSize(); }
-uptr AsanChunkView::UsedSize() { return chunk_->UsedSize(); }
-uptr AsanChunkView::AllocTid() { return chunk_->alloc_tid; }
-uptr AsanChunkView::FreeTid() { return chunk_->free_tid; }
-AllocType AsanChunkView::GetAllocType() {
+bool AsanChunkView::IsQuarantined() const {
+  return chunk_ && chunk_->chunk_state == CHUNK_QUARANTINE;
+}
+uptr AsanChunkView::Beg() const { return chunk_->Beg(); }
+uptr AsanChunkView::End() const { return Beg() + UsedSize(); }
+uptr AsanChunkView::UsedSize() const { return chunk_->UsedSize(); }
+uptr AsanChunkView::AllocTid() const { return chunk_->alloc_tid; }
+uptr AsanChunkView::FreeTid() const { return chunk_->free_tid; }
+AllocType AsanChunkView::GetAllocType() const {
   return (AllocType)chunk_->alloc_type;
 }
 
@@ -721,14 +736,14 @@
   return res;
 }
 
-u32 AsanChunkView::GetAllocStackId() { return chunk_->alloc_context_id; }
-u32 AsanChunkView::GetFreeStackId() { return chunk_->free_context_id; }
+u32 AsanChunkView::GetAllocStackId() const { return chunk_->alloc_context_id; }
+u32 AsanChunkView::GetFreeStackId() const { return chunk_->free_context_id; }
 
-StackTrace AsanChunkView::GetAllocStack() {
+StackTrace AsanChunkView::GetAllocStack() const {
   return GetStackTraceFromId(GetAllocStackId());
 }
 
-StackTrace AsanChunkView::GetFreeStack() {
+StackTrace AsanChunkView::GetFreeStack() const {
   return GetStackTraceFromId(GetFreeStackId());
 }
 
@@ -953,15 +968,13 @@
 
 #if !SANITIZER_SUPPORTS_WEAK_HOOKS
 // Provide default (no-op) implementation of malloc hooks.
-extern "C" {
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_malloc_hook(void *ptr, uptr size) {
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_malloc_hook,
+                             void *ptr, uptr size) {
   (void)ptr;
   (void)size;
 }
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_free_hook(void *ptr) {
+
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_free_hook, void *ptr) {
   (void)ptr;
 }
-} // extern "C"
 #endif
diff --git a/lib/asan/asan_allocator.h b/lib/asan/asan_allocator.h
index 62415f3..ee28ecf 100644
--- a/lib/asan/asan_allocator.h
+++ b/lib/asan/asan_allocator.h
@@ -33,6 +33,7 @@
 
 struct AllocatorOptions {
   u32 quarantine_size_mb;
+  u32 thread_local_quarantine_size_kb;
   u16 min_redzone;
   u16 max_redzone;
   u8 may_return_null;
@@ -50,28 +51,29 @@
 class AsanChunkView {
  public:
   explicit AsanChunkView(AsanChunk *chunk) : chunk_(chunk) {}
-  bool IsValid();        // Checks if AsanChunkView points to a valid allocated
-                         // or quarantined chunk.
-  bool IsAllocated();    // Checks if the memory is currently allocated.
-  uptr Beg();            // First byte of user memory.
-  uptr End();            // Last byte of user memory.
-  uptr UsedSize();       // Size requested by the user.
-  uptr AllocTid();
-  uptr FreeTid();
+  bool IsValid() const;        // Checks if AsanChunkView points to a valid
+                               // allocated or quarantined chunk.
+  bool IsAllocated() const;    // Checks if the memory is currently allocated.
+  bool IsQuarantined() const;  // Checks if the memory is currently quarantined.
+  uptr Beg() const;            // First byte of user memory.
+  uptr End() const;            // Last byte of user memory.
+  uptr UsedSize() const;       // Size requested by the user.
+  uptr AllocTid() const;
+  uptr FreeTid() const;
   bool Eq(const AsanChunkView &c) const { return chunk_ == c.chunk_; }
-  u32 GetAllocStackId();
-  u32 GetFreeStackId();
-  StackTrace GetAllocStack();
-  StackTrace GetFreeStack();
-  AllocType GetAllocType();
-  bool AddrIsInside(uptr addr, uptr access_size, sptr *offset) {
+  u32 GetAllocStackId() const;
+  u32 GetFreeStackId() const;
+  StackTrace GetAllocStack() const;
+  StackTrace GetFreeStack() const;
+  AllocType GetAllocType() const;
+  bool AddrIsInside(uptr addr, uptr access_size, sptr *offset) const {
     if (addr >= Beg() && (addr + access_size) <= End()) {
       *offset = addr - Beg();
       return true;
     }
     return false;
   }
-  bool AddrIsAtLeft(uptr addr, uptr access_size, sptr *offset) {
+  bool AddrIsAtLeft(uptr addr, uptr access_size, sptr *offset) const {
     (void)access_size;
     if (addr < Beg()) {
       *offset = Beg() - addr;
@@ -79,7 +81,7 @@
     }
     return false;
   }
-  bool AddrIsAtRight(uptr addr, uptr access_size, sptr *offset) {
+  bool AddrIsAtRight(uptr addr, uptr access_size, sptr *offset) const {
     if (addr + access_size > End()) {
       *offset = addr - End();
       return true;
diff --git a/lib/asan/asan_errors.cc b/lib/asan/asan_errors.cc
index c287ba1..57490ad 100644
--- a/lib/asan/asan_errors.cc
+++ b/lib/asan/asan_errors.cc
@@ -58,10 +58,22 @@
   SignalContext::DumpAllRegisters(context);
 }
 
+static void MaybeReportNonExecRegion(uptr pc) {
+#if SANITIZER_FREEBSD || SANITIZER_LINUX
+  MemoryMappingLayout proc_maps(/*cache_enabled*/ true);
+  uptr start, end, protection;
+  while (proc_maps.Next(&start, &end, nullptr, nullptr, 0, &protection)) {
+    if (pc >= start && pc < end &&
+        !(protection & MemoryMappingLayout::kProtectionExecute))
+      Report("Hint: PC is at a non-executable region. Maybe a wild jump?\n");
+  }
+#endif
+}
+
 void ErrorDeadlySignal::Print() {
   Decorator d;
   Printf("%s", d.Warning());
-  const char *description = DescribeSignalOrException(signo);
+  const char *description = __sanitizer::DescribeSignalOrException(signo);
   Report(
       "ERROR: AddressSanitizer: %s on unknown address %p (pc %p bp %p sp %p "
       "T%d)\n",
@@ -77,6 +89,7 @@
     if (addr < GetPageSizeCached())
       Report("Hint: address points to the zero page.\n");
   }
+  MaybeReportNonExecRegion(pc);
   scariness.Print();
   BufferedStackTrace stack;
   GetStackTraceWithPcBpAndContext(&stack, kStackTraceMax, pc, bp, context,
diff --git a/lib/asan/asan_flags.cc b/lib/asan/asan_flags.cc
index 345a35c..74c441a 100644
--- a/lib/asan/asan_flags.cc
+++ b/lib/asan/asan_flags.cc
@@ -156,9 +156,24 @@
     f->quarantine_size_mb = f->quarantine_size >> 20;
   if (f->quarantine_size_mb < 0) {
     const int kDefaultQuarantineSizeMb =
-        (ASAN_LOW_MEMORY) ? 1UL << 6 : 1UL << 8;
+        (ASAN_LOW_MEMORY) ? 1UL << 4 : 1UL << 8;
     f->quarantine_size_mb = kDefaultQuarantineSizeMb;
   }
+  if (f->thread_local_quarantine_size_kb < 0) {
+    const u32 kDefaultThreadLocalQuarantineSizeKb =
+        // It is not advised to go lower than 64Kb, otherwise quarantine batches
+        // pushed from thread local quarantine to global one will create too
+        // much overhead. One quarantine batch size is 8Kb and it  holds up to
+        // 1021 chunk, which amounts to 1/8 memory overhead per batch when
+        // thread local quarantine is set to 64Kb.
+        (ASAN_LOW_MEMORY) ? 1 << 6 : FIRST_32_SECOND_64(1 << 8, 1 << 10);
+    f->thread_local_quarantine_size_kb = kDefaultThreadLocalQuarantineSizeKb;
+  }
+  if (f->thread_local_quarantine_size_kb == 0 && f->quarantine_size_mb > 0) {
+    Report("%s: thread_local_quarantine_size_kb can be set to 0 only when "
+           "quarantine_size_mb is set to 0\n", SanitizerToolName);
+    Die();
+  }
   if (!f->replace_str && common_flags()->intercept_strlen) {
     Report("WARNING: strlen interceptor is enabled even though replace_str=0. "
            "Use intercept_strlen=0 to disable it.");
@@ -171,9 +186,6 @@
 
 }  // namespace __asan
 
-#if !SANITIZER_SUPPORTS_WEAK_HOOKS
-extern "C" {
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-const char* __asan_default_options() { return ""; }
-}  // extern "C"
-#endif
+SANITIZER_INTERFACE_WEAK_DEF(const char*, __asan_default_options, void) {
+  return "";
+}
diff --git a/lib/asan/asan_flags.inc b/lib/asan/asan_flags.inc
index 5272477..4712efb 100644
--- a/lib/asan/asan_flags.inc
+++ b/lib/asan/asan_flags.inc
@@ -23,6 +23,12 @@
           "Size (in Mb) of quarantine used to detect use-after-free "
           "errors. Lower value may reduce memory usage but increase the "
           "chance of false negatives.")
+ASAN_FLAG(int, thread_local_quarantine_size_kb, -1,
+          "Size (in Kb) of thread local quarantine used to detect "
+          "use-after-free errors. Lower value may reduce memory usage but "
+          "increase the chance of false negatives. It is not advised to go "
+          "lower than 64Kb, otherwise frequent transfers to global quarantine "
+          "might affect performance.")
 ASAN_FLAG(int, redzone, 16,
           "Minimal size (in bytes) of redzones around heap objects. "
           "Requirement: redzone >= 16, is a power of two.")
diff --git a/lib/asan/asan_globals_win.cc b/lib/asan/asan_globals_win.cc
index 56c0d1a..261762b 100644
--- a/lib/asan/asan_globals_win.cc
+++ b/lib/asan/asan_globals_win.cc
@@ -29,7 +29,7 @@
   __asan_global *end = &__asan_globals_end;
   uptr bytediff = (uptr)end - (uptr)start;
   if (bytediff % sizeof(__asan_global) != 0) {
-#ifdef ASAN_DLL_THUNK
+#ifdef SANITIZER_DLL_THUNK
     __debugbreak();
 #else
     CHECK("corrupt asan global array");
diff --git a/lib/asan/asan_globals_win.h b/lib/asan/asan_globals_win.h
deleted file mode 100644
index d4ed9c1..0000000
--- a/lib/asan/asan_globals_win.h
+++ /dev/null
@@ -1,34 +0,0 @@
-//===-- asan_globals_win.h --------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Interface to the Windows-specific global management code. Separated into a
-// standalone header to allow inclusion from asan_win_dynamic_runtime_thunk,
-// which defines symbols that clash with other sanitizer headers.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ASAN_GLOBALS_WIN_H
-#define ASAN_GLOBALS_WIN_H
-
-#if !defined(_MSC_VER)
-#error "this file is Windows-only, and uses MSVC pragmas"
-#endif
-
-#if defined(_WIN64)
-#define SANITIZER_SYM_PREFIX
-#else
-#define SANITIZER_SYM_PREFIX "_"
-#endif
-
-// Use this macro to force linking asan_globals_win.cc into the DSO.
-#define ASAN_LINK_GLOBALS_WIN() \
-  __pragma(                     \
-      comment(linker, "/include:" SANITIZER_SYM_PREFIX "__asan_dso_reg_hook"))
-
-#endif // ASAN_GLOBALS_WIN_H
diff --git a/lib/asan/asan_interceptors.cc b/lib/asan/asan_interceptors.cc
index 23f2e44..6ee3266 100644
--- a/lib/asan/asan_interceptors.cc
+++ b/lib/asan/asan_interceptors.cc
@@ -81,6 +81,51 @@
     }                                                                   \
   } while (0)
 
+// memcpy is called during __asan_init() from the internals of printf(...).
+// We do not treat memcpy with to==from as a bug.
+// See http://llvm.org/bugs/show_bug.cgi?id=11763.
+#define ASAN_MEMCPY_IMPL(ctx, to, from, size)                           \
+  do {                                                                  \
+    if (UNLIKELY(!asan_inited)) return internal_memcpy(to, from, size); \
+    if (asan_init_is_running) {                                         \
+      return REAL(memcpy)(to, from, size);                              \
+    }                                                                   \
+    ENSURE_ASAN_INITED();                                               \
+    if (flags()->replace_intrin) {                                      \
+      if (to != from) {                                                 \
+        CHECK_RANGES_OVERLAP("memcpy", to, size, from, size);           \
+      }                                                                 \
+      ASAN_READ_RANGE(ctx, from, size);                                 \
+      ASAN_WRITE_RANGE(ctx, to, size);                                  \
+    }                                                                   \
+    return REAL(memcpy)(to, from, size);                                \
+  } while (0)
+
+// memset is called inside Printf.
+#define ASAN_MEMSET_IMPL(ctx, block, c, size)                           \
+  do {                                                                  \
+    if (UNLIKELY(!asan_inited)) return internal_memset(block, c, size); \
+    if (asan_init_is_running) {                                         \
+      return REAL(memset)(block, c, size);                              \
+    }                                                                   \
+    ENSURE_ASAN_INITED();                                               \
+    if (flags()->replace_intrin) {                                      \
+      ASAN_WRITE_RANGE(ctx, block, size);                               \
+    }                                                                   \
+    return REAL(memset)(block, c, size);                                \
+  } while (0)
+
+#define ASAN_MEMMOVE_IMPL(ctx, to, from, size)                           \
+  do {                                                                   \
+    if (UNLIKELY(!asan_inited)) return internal_memmove(to, from, size); \
+    ENSURE_ASAN_INITED();                                                \
+    if (flags()->replace_intrin) {                                       \
+      ASAN_READ_RANGE(ctx, from, size);                                  \
+      ASAN_WRITE_RANGE(ctx, to, size);                                   \
+    }                                                                    \
+    return internal_memmove(to, from, size);                             \
+  } while (0)
+
 #define ASAN_READ_RANGE(ctx, offset, size) \
   ACCESS_MEMORY_RANGE(ctx, offset, size, false)
 #define ASAN_WRITE_RANGE(ctx, offset, size) \
@@ -183,9 +228,11 @@
 // Strict init-order checking is dlopen-hostile:
 // https://github.com/google/sanitizers/issues/178
 #define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag)                           \
-  if (flags()->strict_init_order) {                                            \
-    StopInitOrderChecking();                                                   \
-  }
+  do {                                                                         \
+    if (flags()->strict_init_order)                                            \
+      StopInitOrderChecking();                                                 \
+    CheckNoDeepBind(filename, flag);                                           \
+  } while (false)
 #define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit()
 #define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) \
   CoverageUpdateMapping()
@@ -198,10 +245,25 @@
   } else {                                                                     \
     *begin = *end = 0;                                                         \
   }
-// Asan needs custom handling of these:
-#undef SANITIZER_INTERCEPT_MEMSET
-#undef SANITIZER_INTERCEPT_MEMMOVE
-#undef SANITIZER_INTERCEPT_MEMCPY
+
+#define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size) \
+  do {                                                       \
+    ASAN_INTERCEPTOR_ENTER(ctx, memmove);                    \
+    ASAN_MEMMOVE_IMPL(ctx, to, from, size);                  \
+  } while (false)
+
+#define COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size) \
+  do {                                                      \
+    ASAN_INTERCEPTOR_ENTER(ctx, memcpy);                    \
+    ASAN_MEMCPY_IMPL(ctx, to, from, size);                  \
+  } while (false)
+
+#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size) \
+  do {                                                      \
+    ASAN_INTERCEPTOR_ENTER(ctx, memset);                    \
+    ASAN_MEMSET_IMPL(ctx, block, c, size);                  \
+  } while (false)
+
 #include "sanitizer_common/sanitizer_common_interceptors.inc"
 
 // Syscall interceptors don't have contexts, we don't support suppressions
@@ -389,90 +451,18 @@
 }
 #endif
 
-// memcpy is called during __asan_init() from the internals of printf(...).
-// We do not treat memcpy with to==from as a bug.
-// See http://llvm.org/bugs/show_bug.cgi?id=11763.
-#define ASAN_MEMCPY_IMPL(ctx, to, from, size) do {                             \
-    if (UNLIKELY(!asan_inited)) return internal_memcpy(to, from, size);        \
-    if (asan_init_is_running) {                                                \
-      return REAL(memcpy)(to, from, size);                                     \
-    }                                                                          \
-    ENSURE_ASAN_INITED();                                                      \
-    if (flags()->replace_intrin) {                                             \
-      if (to != from) {                                                        \
-        CHECK_RANGES_OVERLAP("memcpy", to, size, from, size);                  \
-      }                                                                        \
-      ASAN_READ_RANGE(ctx, from, size);                                        \
-      ASAN_WRITE_RANGE(ctx, to, size);                                         \
-    }                                                                          \
-    return REAL(memcpy)(to, from, size);                                       \
-  } while (0)
-
-
 void *__asan_memcpy(void *to, const void *from, uptr size) {
   ASAN_MEMCPY_IMPL(nullptr, to, from, size);
 }
 
-// memset is called inside Printf.
-#define ASAN_MEMSET_IMPL(ctx, block, c, size) do {                             \
-    if (UNLIKELY(!asan_inited)) return internal_memset(block, c, size);        \
-    if (asan_init_is_running) {                                                \
-      return REAL(memset)(block, c, size);                                     \
-    }                                                                          \
-    ENSURE_ASAN_INITED();                                                      \
-    if (flags()->replace_intrin) {                                             \
-      ASAN_WRITE_RANGE(ctx, block, size);                                      \
-    }                                                                          \
-    return REAL(memset)(block, c, size);                                       \
-  } while (0)
-
 void *__asan_memset(void *block, int c, uptr size) {
   ASAN_MEMSET_IMPL(nullptr, block, c, size);
 }
 
-#define ASAN_MEMMOVE_IMPL(ctx, to, from, size) do {                            \
-    if (UNLIKELY(!asan_inited))                                                \
-      return internal_memmove(to, from, size);                                 \
-    ENSURE_ASAN_INITED();                                                      \
-    if (flags()->replace_intrin) {                                             \
-      ASAN_READ_RANGE(ctx, from, size);                                        \
-      ASAN_WRITE_RANGE(ctx, to, size);                                         \
-    }                                                                          \
-    return internal_memmove(to, from, size);                                   \
-  } while (0)
-
 void *__asan_memmove(void *to, const void *from, uptr size) {
   ASAN_MEMMOVE_IMPL(nullptr, to, from, size);
 }
 
-INTERCEPTOR(void*, memmove, void *to, const void *from, uptr size) {
-  void *ctx;
-  ASAN_INTERCEPTOR_ENTER(ctx, memmove);
-  ASAN_MEMMOVE_IMPL(ctx, to, from, size);
-}
-
-INTERCEPTOR(void*, memcpy, void *to, const void *from, uptr size) {
-  void *ctx;
-  ASAN_INTERCEPTOR_ENTER(ctx, memcpy);
-  if (PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE) {
-    ASAN_MEMCPY_IMPL(ctx, to, from, size);
-  } else {
-    // At least on 10.7 and 10.8 both memcpy() and memmove() are being replaced
-    // with WRAP(memcpy). As a result, false positives are reported for
-    // memmove() calls. If we just disable error reporting with
-    // ASAN_OPTIONS=replace_intrin=0, memmove() is still replaced with
-    // internal_memcpy(), which may lead to crashes, see
-    // http://llvm.org/bugs/show_bug.cgi?id=16362.
-    ASAN_MEMMOVE_IMPL(ctx, to, from, size);
-  }
-}
-
-INTERCEPTOR(void*, memset, void *block, int c, uptr size) {
-  void *ctx;
-  ASAN_INTERCEPTOR_ENTER(ctx, memset);
-  ASAN_MEMSET_IMPL(ctx, block, c, size);
-}
-
 #if ASAN_INTERCEPT_INDEX
 # if ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX
 INTERCEPTOR(char*, index, const char *string, int c)
@@ -724,17 +714,6 @@
   was_called_once = true;
   InitializeCommonInterceptors();
 
-  // Intercept mem* functions.
-  ASAN_INTERCEPT_FUNC(memmove);
-  ASAN_INTERCEPT_FUNC(memset);
-  if (PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE) {
-    // In asan, REAL(memmove) is not used, but it is used in msan.
-    ASAN_INTERCEPT_FUNC(memcpy);
-  } else {
-    ASSIGN_REAL(memcpy, memmove);
-  }
-  CHECK(REAL(memcpy));
-
   // Intercept str* functions.
   ASAN_INTERCEPT_FUNC(strcat);  // NOLINT
   ASAN_INTERCEPT_FUNC(strcpy);  // NOLINT
diff --git a/lib/asan/asan_interface.inc b/lib/asan/asan_interface.inc
new file mode 100644
index 0000000..351be4d
--- /dev/null
+++ b/lib/asan/asan_interface.inc
@@ -0,0 +1,167 @@
+//===-- asan_interface.inc ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Asan interface list.
+//===----------------------------------------------------------------------===//
+INTERFACE_FUNCTION(__asan_addr_is_in_fake_stack)
+INTERFACE_FUNCTION(__asan_address_is_poisoned)
+INTERFACE_FUNCTION(__asan_after_dynamic_init)
+INTERFACE_FUNCTION(__asan_alloca_poison)
+INTERFACE_FUNCTION(__asan_allocas_unpoison)
+INTERFACE_FUNCTION(__asan_before_dynamic_init)
+INTERFACE_FUNCTION(__asan_describe_address)
+INTERFACE_FUNCTION(__asan_exp_load1)
+INTERFACE_FUNCTION(__asan_exp_load2)
+INTERFACE_FUNCTION(__asan_exp_load4)
+INTERFACE_FUNCTION(__asan_exp_load8)
+INTERFACE_FUNCTION(__asan_exp_load16)
+INTERFACE_FUNCTION(__asan_exp_loadN)
+INTERFACE_FUNCTION(__asan_exp_store1)
+INTERFACE_FUNCTION(__asan_exp_store2)
+INTERFACE_FUNCTION(__asan_exp_store4)
+INTERFACE_FUNCTION(__asan_exp_store8)
+INTERFACE_FUNCTION(__asan_exp_store16)
+INTERFACE_FUNCTION(__asan_exp_storeN)
+INTERFACE_FUNCTION(__asan_get_alloc_stack)
+INTERFACE_FUNCTION(__asan_get_current_fake_stack)
+INTERFACE_FUNCTION(__asan_get_free_stack)
+INTERFACE_FUNCTION(__asan_get_report_access_size)
+INTERFACE_FUNCTION(__asan_get_report_access_type)
+INTERFACE_FUNCTION(__asan_get_report_address)
+INTERFACE_FUNCTION(__asan_get_report_bp)
+INTERFACE_FUNCTION(__asan_get_report_description)
+INTERFACE_FUNCTION(__asan_get_report_pc)
+INTERFACE_FUNCTION(__asan_get_report_sp)
+INTERFACE_FUNCTION(__asan_get_shadow_mapping)
+INTERFACE_FUNCTION(__asan_handle_no_return)
+INTERFACE_FUNCTION(__asan_init)
+INTERFACE_FUNCTION(__asan_load_cxx_array_cookie)
+INTERFACE_FUNCTION(__asan_load1)
+INTERFACE_FUNCTION(__asan_load2)
+INTERFACE_FUNCTION(__asan_load4)
+INTERFACE_FUNCTION(__asan_load8)
+INTERFACE_FUNCTION(__asan_load16)
+INTERFACE_FUNCTION(__asan_loadN)
+INTERFACE_FUNCTION(__asan_load1_noabort)
+INTERFACE_FUNCTION(__asan_load2_noabort)
+INTERFACE_FUNCTION(__asan_load4_noabort)
+INTERFACE_FUNCTION(__asan_load8_noabort)
+INTERFACE_FUNCTION(__asan_load16_noabort)
+INTERFACE_FUNCTION(__asan_loadN_noabort)
+INTERFACE_FUNCTION(__asan_locate_address)
+INTERFACE_FUNCTION(__asan_memcpy)
+INTERFACE_FUNCTION(__asan_memmove)
+INTERFACE_FUNCTION(__asan_memset)
+INTERFACE_FUNCTION(__asan_poison_cxx_array_cookie)
+INTERFACE_FUNCTION(__asan_poison_intra_object_redzone)
+INTERFACE_FUNCTION(__asan_poison_memory_region)
+INTERFACE_FUNCTION(__asan_poison_stack_memory)
+INTERFACE_FUNCTION(__asan_print_accumulated_stats)
+INTERFACE_FUNCTION(__asan_region_is_poisoned)
+INTERFACE_FUNCTION(__asan_register_globals)
+INTERFACE_FUNCTION(__asan_register_image_globals)
+INTERFACE_FUNCTION(__asan_report_error)
+INTERFACE_FUNCTION(__asan_report_exp_load1)
+INTERFACE_FUNCTION(__asan_report_exp_load2)
+INTERFACE_FUNCTION(__asan_report_exp_load4)
+INTERFACE_FUNCTION(__asan_report_exp_load8)
+INTERFACE_FUNCTION(__asan_report_exp_load16)
+INTERFACE_FUNCTION(__asan_report_exp_load_n)
+INTERFACE_FUNCTION(__asan_report_exp_store1)
+INTERFACE_FUNCTION(__asan_report_exp_store2)
+INTERFACE_FUNCTION(__asan_report_exp_store4)
+INTERFACE_FUNCTION(__asan_report_exp_store8)
+INTERFACE_FUNCTION(__asan_report_exp_store16)
+INTERFACE_FUNCTION(__asan_report_exp_store_n)
+INTERFACE_FUNCTION(__asan_report_load1)
+INTERFACE_FUNCTION(__asan_report_load2)
+INTERFACE_FUNCTION(__asan_report_load4)
+INTERFACE_FUNCTION(__asan_report_load8)
+INTERFACE_FUNCTION(__asan_report_load16)
+INTERFACE_FUNCTION(__asan_report_load_n)
+INTERFACE_FUNCTION(__asan_report_load1_noabort)
+INTERFACE_FUNCTION(__asan_report_load2_noabort)
+INTERFACE_FUNCTION(__asan_report_load4_noabort)
+INTERFACE_FUNCTION(__asan_report_load8_noabort)
+INTERFACE_FUNCTION(__asan_report_load16_noabort)
+INTERFACE_FUNCTION(__asan_report_load_n_noabort)
+INTERFACE_FUNCTION(__asan_report_present)
+INTERFACE_FUNCTION(__asan_report_store1)
+INTERFACE_FUNCTION(__asan_report_store2)
+INTERFACE_FUNCTION(__asan_report_store4)
+INTERFACE_FUNCTION(__asan_report_store8)
+INTERFACE_FUNCTION(__asan_report_store16)
+INTERFACE_FUNCTION(__asan_report_store_n)
+INTERFACE_FUNCTION(__asan_report_store1_noabort)
+INTERFACE_FUNCTION(__asan_report_store2_noabort)
+INTERFACE_FUNCTION(__asan_report_store4_noabort)
+INTERFACE_FUNCTION(__asan_report_store8_noabort)
+INTERFACE_FUNCTION(__asan_report_store16_noabort)
+INTERFACE_FUNCTION(__asan_report_store_n_noabort)
+INTERFACE_FUNCTION(__asan_set_death_callback)
+INTERFACE_FUNCTION(__asan_set_error_report_callback)
+INTERFACE_FUNCTION(__asan_set_shadow_00)
+INTERFACE_FUNCTION(__asan_set_shadow_f1)
+INTERFACE_FUNCTION(__asan_set_shadow_f2)
+INTERFACE_FUNCTION(__asan_set_shadow_f3)
+INTERFACE_FUNCTION(__asan_set_shadow_f5)
+INTERFACE_FUNCTION(__asan_set_shadow_f8)
+INTERFACE_FUNCTION(__asan_stack_free_0)
+INTERFACE_FUNCTION(__asan_stack_free_1)
+INTERFACE_FUNCTION(__asan_stack_free_2)
+INTERFACE_FUNCTION(__asan_stack_free_3)
+INTERFACE_FUNCTION(__asan_stack_free_4)
+INTERFACE_FUNCTION(__asan_stack_free_5)
+INTERFACE_FUNCTION(__asan_stack_free_6)
+INTERFACE_FUNCTION(__asan_stack_free_7)
+INTERFACE_FUNCTION(__asan_stack_free_8)
+INTERFACE_FUNCTION(__asan_stack_free_9)
+INTERFACE_FUNCTION(__asan_stack_free_10)
+INTERFACE_FUNCTION(__asan_stack_malloc_0)
+INTERFACE_FUNCTION(__asan_stack_malloc_1)
+INTERFACE_FUNCTION(__asan_stack_malloc_2)
+INTERFACE_FUNCTION(__asan_stack_malloc_3)
+INTERFACE_FUNCTION(__asan_stack_malloc_4)
+INTERFACE_FUNCTION(__asan_stack_malloc_5)
+INTERFACE_FUNCTION(__asan_stack_malloc_6)
+INTERFACE_FUNCTION(__asan_stack_malloc_7)
+INTERFACE_FUNCTION(__asan_stack_malloc_8)
+INTERFACE_FUNCTION(__asan_stack_malloc_9)
+INTERFACE_FUNCTION(__asan_stack_malloc_10)
+INTERFACE_FUNCTION(__asan_store1)
+INTERFACE_FUNCTION(__asan_store2)
+INTERFACE_FUNCTION(__asan_store4)
+INTERFACE_FUNCTION(__asan_store8)
+INTERFACE_FUNCTION(__asan_store16)
+INTERFACE_FUNCTION(__asan_storeN)
+INTERFACE_FUNCTION(__asan_store1_noabort)
+INTERFACE_FUNCTION(__asan_store2_noabort)
+INTERFACE_FUNCTION(__asan_store4_noabort)
+INTERFACE_FUNCTION(__asan_store8_noabort)
+INTERFACE_FUNCTION(__asan_store16_noabort)
+INTERFACE_FUNCTION(__asan_storeN_noabort)
+INTERFACE_FUNCTION(__asan_unpoison_intra_object_redzone)
+INTERFACE_FUNCTION(__asan_unpoison_memory_region)
+INTERFACE_FUNCTION(__asan_unpoison_stack_memory)
+INTERFACE_FUNCTION(__asan_unregister_globals)
+INTERFACE_FUNCTION(__asan_unregister_image_globals)
+INTERFACE_FUNCTION(__asan_version_mismatch_check_v8)
+INTERFACE_FUNCTION(__sanitizer_finish_switch_fiber)
+INTERFACE_FUNCTION(__sanitizer_print_stack_trace)
+INTERFACE_FUNCTION(__sanitizer_ptr_cmp)
+INTERFACE_FUNCTION(__sanitizer_ptr_sub)
+INTERFACE_FUNCTION(__sanitizer_start_switch_fiber)
+INTERFACE_FUNCTION(__sanitizer_unaligned_load16)
+INTERFACE_FUNCTION(__sanitizer_unaligned_load32)
+INTERFACE_FUNCTION(__sanitizer_unaligned_load64)
+INTERFACE_FUNCTION(__sanitizer_unaligned_store16)
+INTERFACE_FUNCTION(__sanitizer_unaligned_store32)
+INTERFACE_FUNCTION(__sanitizer_unaligned_store64)
+INTERFACE_WEAK_FUNCTION(__asan_default_options)
+INTERFACE_WEAK_FUNCTION(__asan_default_suppressions)
+INTERFACE_WEAK_FUNCTION(__asan_on_error)
diff --git a/lib/asan/asan_interface_internal.h b/lib/asan/asan_interface_internal.h
index 8cd424c..b18c315 100644
--- a/lib/asan/asan_interface_internal.h
+++ b/lib/asan/asan_interface_internal.h
@@ -165,12 +165,12 @@
   void __asan_set_error_report_callback(void (*callback)(const char*));
 
   SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-  /* OPTIONAL */ void __asan_on_error();
+  void __asan_on_error();
 
   SANITIZER_INTERFACE_ATTRIBUTE void __asan_print_accumulated_stats();
 
   SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-  /* OPTIONAL */ const char* __asan_default_options();
+  const char* __asan_default_options();
 
   SANITIZER_INTERFACE_ATTRIBUTE
   extern uptr __asan_shadow_memory_dynamic_address;
@@ -242,6 +242,9 @@
   void __asan_alloca_poison(uptr addr, uptr size);
   SANITIZER_INTERFACE_ATTRIBUTE
   void __asan_allocas_unpoison(uptr top, uptr bottom);
+
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  const char* __asan_default_suppressions();
 }  // extern "C"
 
 #endif  // ASAN_INTERFACE_INTERNAL_H
diff --git a/lib/asan/asan_internal.h b/lib/asan/asan_internal.h
index 84d7f08..3b70695 100644
--- a/lib/asan/asan_internal.h
+++ b/lib/asan/asan_internal.h
@@ -36,7 +36,7 @@
 // If set, values like allocator chunk size, as well as defaults for some flags
 // will be changed towards less memory overhead.
 #ifndef ASAN_LOW_MEMORY
-# if SANITIZER_IOS || (SANITIZER_WORDSIZE == 32)
+# if SANITIZER_IOS || SANITIZER_ANDROID
 #  define ASAN_LOW_MEMORY 1
 # else
 #  define ASAN_LOW_MEMORY 0
@@ -64,9 +64,9 @@
 
 // asan_win.cc
 void InitializePlatformExceptionHandlers();
-
-// asan_win.cc / asan_posix.cc
-const char *DescribeSignalOrException(int signo);
+// Returns whether an address is a valid allocated system heap block.
+// 'addr' must point to the beginning of the block.
+bool IsSystemHeapAddress(uptr addr);
 
 // asan_rtl.cc
 void NORETURN ShowStatsAndAbort();
@@ -103,17 +103,6 @@
 
 void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name);
 
-// Platform-specific options.
-#if SANITIZER_MAC
-bool PlatformHasDifferentMemcpyAndMemmove();
-# define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE \
-    (PlatformHasDifferentMemcpyAndMemmove())
-#elif SANITIZER_WINDOWS64
-# define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE false
-#else
-# define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE true
-#endif  // SANITIZER_MAC
-
 // Add convenient macro for interface functions that may be represented as
 // weak hooks.
 #define ASAN_MALLOC_HOOK(ptr, size)                                   \
diff --git a/lib/asan/asan_linux.cc b/lib/asan/asan_linux.cc
index c051573..6d150de 100644
--- a/lib/asan/asan_linux.cc
+++ b/lib/asan/asan_linux.cc
@@ -70,6 +70,7 @@
 
 void InitializePlatformInterceptors() {}
 void InitializePlatformExceptionHandlers() {}
+bool IsSystemHeapAddress (uptr addr) { return false; }
 
 void *AsanDoesNotSupportStaticLinkage() {
   // This will fail to link with -static.
diff --git a/lib/asan/asan_mac.cc b/lib/asan/asan_mac.cc
index 525864f..3c93b26 100644
--- a/lib/asan/asan_mac.cc
+++ b/lib/asan/asan_mac.cc
@@ -48,15 +48,7 @@
 
 void InitializePlatformInterceptors() {}
 void InitializePlatformExceptionHandlers() {}
-
-bool PlatformHasDifferentMemcpyAndMemmove() {
-  // On OS X 10.7 memcpy() and memmove() are both resolved
-  // into memmove$VARIANT$sse42.
-  // See also https://github.com/google/sanitizers/issues/34.
-  // TODO(glider): need to check dynamically that memcpy() and memmove() are
-  // actually the same function.
-  return GetMacosVersion() == MACOS_VERSION_SNOW_LEOPARD;
-}
+bool IsSystemHeapAddress (uptr addr) { return false; }
 
 // No-op. Mac does not support static linkage anyway.
 void *AsanDoesNotSupportStaticLinkage() {
@@ -147,7 +139,8 @@
     t = AsanThread::Create(/* start_routine */ nullptr, /* arg */ nullptr,
                            parent_tid, stack, /* detached */ true);
     t->Init();
-    asanThreadRegistry().StartThread(t->tid(), 0, 0);
+    asanThreadRegistry().StartThread(t->tid(), GetTid(),
+                                     /* workerthread */ true, 0);
     SetCurrentThread(t);
   }
 }
diff --git a/lib/asan/asan_malloc_linux.cc b/lib/asan/asan_malloc_linux.cc
index a78767c..8c99d3b 100644
--- a/lib/asan/asan_malloc_linux.cc
+++ b/lib/asan/asan_malloc_linux.cc
@@ -50,12 +50,14 @@
   asan_free(ptr, &stack, FROM_MALLOC);
 }
 
+#if SANITIZER_INTERCEPT_CFREE
 INTERCEPTOR(void, cfree, void *ptr) {
   GET_STACK_TRACE_FREE;
   if (UNLIKELY(IsInDlsymAllocPool(ptr)))
     return;
   asan_free(ptr, &stack, FROM_MALLOC);
 }
+#endif // SANITIZER_INTERCEPT_CFREE
 
 INTERCEPTOR(void*, malloc, uptr size) {
   if (UNLIKELY(!asan_inited))
@@ -91,22 +93,24 @@
   return asan_realloc(ptr, size, &stack);
 }
 
+#if SANITIZER_INTERCEPT_MEMALIGN
 INTERCEPTOR(void*, memalign, uptr boundary, uptr size) {
   GET_STACK_TRACE_MALLOC;
   return asan_memalign(boundary, size, &stack, FROM_MALLOC);
 }
 
-INTERCEPTOR(void*, aligned_alloc, uptr boundary, uptr size) {
-  GET_STACK_TRACE_MALLOC;
-  return asan_memalign(boundary, size, &stack, FROM_MALLOC);
-}
-
 INTERCEPTOR(void*, __libc_memalign, uptr boundary, uptr size) {
   GET_STACK_TRACE_MALLOC;
   void *res = asan_memalign(boundary, size, &stack, FROM_MALLOC);
   DTLS_on_libc_memalign(res, size);
   return res;
 }
+#endif // SANITIZER_INTERCEPT_MEMALIGN
+
+INTERCEPTOR(void*, aligned_alloc, uptr boundary, uptr size) {
+  GET_STACK_TRACE_MALLOC;
+  return asan_memalign(boundary, size, &stack, FROM_MALLOC);
+}
 
 INTERCEPTOR(uptr, malloc_usable_size, void *ptr) {
   GET_CURRENT_PC_BP_SP;
@@ -114,6 +118,7 @@
   return asan_malloc_usable_size(ptr, pc, bp);
 }
 
+#if SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
 // We avoid including malloc.h for portability reasons.
 // man mallinfo says the fields are "long", but the implementation uses int.
 // It doesn't matter much -- we just need to make sure that the libc's mallinfo
@@ -131,6 +136,7 @@
 INTERCEPTOR(int, mallopt, int cmd, int value) {
   return -1;
 }
+#endif // SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
 
 INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
   GET_STACK_TRACE_MALLOC;
@@ -143,10 +149,12 @@
   return asan_valloc(size, &stack);
 }
 
+#if SANITIZER_INTERCEPT_PVALLOC
 INTERCEPTOR(void*, pvalloc, uptr size) {
   GET_STACK_TRACE_MALLOC;
   return asan_pvalloc(size, &stack);
 }
+#endif // SANITIZER_INTERCEPT_PVALLOC
 
 INTERCEPTOR(void, malloc_stats, void) {
   __asan_print_accumulated_stats();
diff --git a/lib/asan/asan_malloc_win.cc b/lib/asan/asan_malloc_win.cc
index 05148d5..5163c04 100644
--- a/lib/asan/asan_malloc_win.cc
+++ b/lib/asan/asan_malloc_win.cc
@@ -56,11 +56,6 @@
 }
 
 ALLOCATION_FUNCTION_ATTRIBUTE
-void cfree(void *ptr) {
-  CHECK(!"cfree() should not be used on Windows");
-}
-
-ALLOCATION_FUNCTION_ATTRIBUTE
 void *malloc(size_t size) {
   GET_STACK_TRACE_MALLOC;
   return asan_malloc(size, &stack);
diff --git a/lib/asan/asan_memory_profile.cc b/lib/asan/asan_memory_profile.cc
index c55264e..c2678b9 100644
--- a/lib/asan/asan_memory_profile.cc
+++ b/lib/asan/asan_memory_profile.cc
@@ -32,9 +32,56 @@
 class HeapProfile {
  public:
   HeapProfile() : allocations_(1024) {}
+
+  void ProcessChunk(const AsanChunkView& cv) {
+    if (cv.IsAllocated()) {
+      total_allocated_user_size_ += cv.UsedSize();
+      total_allocated_count_++;
+      u32 id = cv.GetAllocStackId();
+      if (id)
+        Insert(id, cv.UsedSize());
+    } else if (cv.IsQuarantined()) {
+      total_quarantined_user_size_ += cv.UsedSize();
+      total_quarantined_count_++;
+    } else {
+      total_other_count_++;
+    }
+  }
+
+  void Print(uptr top_percent) {
+    InternalSort(&allocations_, allocations_.size(),
+                 [](const AllocationSite &a, const AllocationSite &b) {
+                   return a.total_size > b.total_size;
+                 });
+    CHECK(total_allocated_user_size_);
+    uptr total_shown = 0;
+    Printf("Live Heap Allocations: %zd bytes in %zd chunks; quarantined: "
+           "%zd bytes in %zd chunks; %zd other chunks; total chunks: %zd; "
+           "showing top %zd%%\n",
+           total_allocated_user_size_, total_allocated_count_,
+           total_quarantined_user_size_, total_quarantined_count_,
+           total_other_count_, total_allocated_count_ +
+           total_quarantined_count_ + total_other_count_, top_percent);
+    for (uptr i = 0; i < allocations_.size(); i++) {
+      auto &a = allocations_[i];
+      Printf("%zd byte(s) (%zd%%) in %zd allocation(s)\n", a.total_size,
+             a.total_size * 100 / total_allocated_user_size_, a.count);
+      StackDepotGet(a.id).Print();
+      total_shown += a.total_size;
+      if (total_shown * 100 / total_allocated_user_size_ > top_percent)
+        break;
+    }
+  }
+
+ private:
+  uptr total_allocated_user_size_ = 0;
+  uptr total_allocated_count_ = 0;
+  uptr total_quarantined_user_size_ = 0;
+  uptr total_quarantined_count_ = 0;
+  uptr total_other_count_ = 0;
+  InternalMmapVector<AllocationSite> allocations_;
+
   void Insert(u32 id, uptr size) {
-    total_allocated_ += size;
-    total_count_++;
     // Linear lookup will be good enough for most cases (although not all).
     for (uptr i = 0; i < allocations_.size(); i++) {
       if (allocations_[i].id == id) {
@@ -45,40 +92,11 @@
     }
     allocations_.push_back({id, size, 1});
   }
-
-  void Print(uptr top_percent) {
-    InternalSort(&allocations_, allocations_.size(),
-                 [](const AllocationSite &a, const AllocationSite &b) {
-                   return a.total_size > b.total_size;
-                 });
-    CHECK(total_allocated_);
-    uptr total_shown = 0;
-    Printf("Live Heap Allocations: %zd bytes from %zd allocations; "
-           "showing top %zd%%\n", total_allocated_, total_count_, top_percent);
-    for (uptr i = 0; i < allocations_.size(); i++) {
-      auto &a = allocations_[i];
-      Printf("%zd byte(s) (%zd%%) in %zd allocation(s)\n", a.total_size,
-             a.total_size * 100 / total_allocated_, a.count);
-      StackDepotGet(a.id).Print();
-      total_shown += a.total_size;
-      if (total_shown * 100 / total_allocated_ > top_percent)
-        break;
-    }
-  }
-
- private:
-  uptr total_allocated_ = 0;
-  uptr total_count_ = 0;
-  InternalMmapVector<AllocationSite> allocations_;
 };
 
 static void ChunkCallback(uptr chunk, void *arg) {
-  HeapProfile *hp = reinterpret_cast<HeapProfile*>(arg);
-  AsanChunkView cv = FindHeapChunkByAllocBeg(chunk);
-  if (!cv.IsAllocated()) return;
-  u32 id = cv.GetAllocStackId();
-  if (!id) return;
-  hp->Insert(id, cv.UsedSize());
+  reinterpret_cast<HeapProfile*>(arg)->ProcessChunk(
+      FindHeapChunkByAllocBeg(chunk));
 }
 
 static void MemoryProfileCB(const SuspendedThreadsList &suspended_threads_list,
diff --git a/lib/asan/asan_posix.cc b/lib/asan/asan_posix.cc
index 8e56763..68fde91 100644
--- a/lib/asan/asan_posix.cc
+++ b/lib/asan/asan_posix.cc
@@ -33,19 +33,6 @@
 
 namespace __asan {
 
-const char *DescribeSignalOrException(int signo) {
-  switch (signo) {
-    case SIGFPE:
-      return "FPE";
-    case SIGILL:
-      return "ILL";
-    case SIGABRT:
-      return "ABRT";
-    default:
-      return "SEGV";
-  }
-}
-
 void AsanOnDeadlySignal(int signo, void *siginfo, void *context) {
   ScopedDeadlySignal signal_scope(GetCurrentThread());
   int code = (int)((siginfo_t*)siginfo)->si_code;
diff --git a/lib/asan/asan_report.cc b/lib/asan/asan_report.cc
index 937ba40..cd44ba8 100644
--- a/lib/asan/asan_report.cc
+++ b/lib/asan/asan_report.cc
@@ -179,6 +179,8 @@
     if (common_flags()->print_cmdline)
       PrintCmdline();
 
+    if (common_flags()->print_module_map == 2) PrintModuleMap();
+
     // Copy the message buffer so that we could start logging without holding a
     // lock that gets aquired during printing.
     InternalScopedBuffer<char> buffer_copy(kErrorMessageBufferSize);
@@ -486,9 +488,6 @@
 }
 } // extern "C"
 
-#if !SANITIZER_SUPPORTS_WEAK_HOOKS
 // Provide default implementation of __asan_on_error that does nothing
 // and may be overriden by user.
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE NOINLINE
-void __asan_on_error() {}
-#endif
+SANITIZER_INTERFACE_WEAK_DEF(void, __asan_on_error, void) {}
diff --git a/lib/asan/asan_rtl.cc b/lib/asan/asan_rtl.cc
index 5c191fe..d9d7d7e 100644
--- a/lib/asan/asan_rtl.cc
+++ b/lib/asan/asan_rtl.cc
@@ -46,6 +46,7 @@
     // Don't die twice - run a busy loop.
     while (1) { }
   }
+  if (common_flags()->print_module_map >= 1) PrintModuleMap();
   if (flags()->sleep_before_dying) {
     Report("Sleeping for %d second(s)\n", flags()->sleep_before_dying);
     SleepForSeconds(flags()->sleep_before_dying);
@@ -410,6 +411,8 @@
   Printf("redzone=%zu\n", (uptr)flags()->redzone);
   Printf("max_redzone=%zu\n", (uptr)flags()->max_redzone);
   Printf("quarantine_size_mb=%zuM\n", (uptr)flags()->quarantine_size_mb);
+  Printf("thread_local_quarantine_size_kb=%zuK\n",
+         (uptr)flags()->thread_local_quarantine_size_kb);
   Printf("malloc_context_size=%zu\n",
          (uptr)common_flags()->malloc_context_size);
 
diff --git a/lib/asan/asan_suppressions.cc b/lib/asan/asan_suppressions.cc
index 62c868d..ac8aa02 100644
--- a/lib/asan/asan_suppressions.cc
+++ b/lib/asan/asan_suppressions.cc
@@ -31,15 +31,9 @@
     kInterceptorName, kInterceptorViaFunction, kInterceptorViaLibrary,
     kODRViolation};
 
-extern "C" {
-#if SANITIZER_SUPPORTS_WEAK_HOOKS
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-const char *__asan_default_suppressions();
-#else
-// No week hooks, provide empty implementation.
-const char *__asan_default_suppressions() { return ""; }
-#endif  // SANITIZER_SUPPORTS_WEAK_HOOKS
-}  // extern "C"
+SANITIZER_INTERFACE_WEAK_DEF(const char *, __asan_default_suppressions, void) {
+  return "";
+}
 
 void InitializeSuppressions() {
   CHECK_EQ(nullptr, suppression_ctx);
diff --git a/lib/asan/asan_thread.cc b/lib/asan/asan_thread.cc
index 537b53d..2f9fa81 100644
--- a/lib/asan/asan_thread.cc
+++ b/lib/asan/asan_thread.cc
@@ -239,7 +239,8 @@
 thread_return_t AsanThread::ThreadStart(
     uptr os_id, atomic_uintptr_t *signal_thread_is_registered) {
   Init();
-  asanThreadRegistry().StartThread(tid(), os_id, nullptr);
+  asanThreadRegistry().StartThread(tid(), os_id, /*workerthread*/ false,
+                                   nullptr);
   if (signal_thread_is_registered)
     atomic_store(signal_thread_is_registered, 1, memory_order_release);
 
diff --git a/lib/asan/asan_win.cc b/lib/asan/asan_win.cc
index 78268d8..4ab535c 100644
--- a/lib/asan/asan_win.cc
+++ b/lib/asan/asan_win.cc
@@ -19,7 +19,6 @@
 
 #include <stdlib.h>
 
-#include "asan_globals_win.h"
 #include "asan_interceptors.h"
 #include "asan_internal.h"
 #include "asan_report.h"
@@ -28,6 +27,8 @@
 #include "asan_mapping.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_mutex.h"
+#include "sanitizer_common/sanitizer_win.h"
+#include "sanitizer_common/sanitizer_win_defs.h"
 
 using namespace __asan;  // NOLINT
 
@@ -43,35 +44,50 @@
   __asan_init();
   return __asan_shadow_memory_dynamic_address;
 }
-
-// -------------------- A workaround for the absence of weak symbols ----- {{{
-// We don't have a direct equivalent of weak symbols when using MSVC, but we can
-// use the /alternatename directive to tell the linker to default a specific
-// symbol to a specific value, which works nicely for allocator hooks and
-// __asan_default_options().
-void __sanitizer_default_malloc_hook(void *ptr, uptr size) { }
-void __sanitizer_default_free_hook(void *ptr) { }
-const char* __asan_default_default_options() { return ""; }
-const char* __asan_default_default_suppressions() { return ""; }
-void __asan_default_on_error() {}
-// 64-bit msvc will not prepend an underscore for symbols.
-#ifdef _WIN64
-#pragma comment(linker, "/alternatename:__sanitizer_malloc_hook=__sanitizer_default_malloc_hook")  // NOLINT
-#pragma comment(linker, "/alternatename:__sanitizer_free_hook=__sanitizer_default_free_hook")      // NOLINT
-#pragma comment(linker, "/alternatename:__asan_default_options=__asan_default_default_options")    // NOLINT
-#pragma comment(linker, "/alternatename:__asan_default_suppressions=__asan_default_default_suppressions")    // NOLINT
-#pragma comment(linker, "/alternatename:__asan_on_error=__asan_default_on_error")                  // NOLINT
-#else
-#pragma comment(linker, "/alternatename:___sanitizer_malloc_hook=___sanitizer_default_malloc_hook")  // NOLINT
-#pragma comment(linker, "/alternatename:___sanitizer_free_hook=___sanitizer_default_free_hook")      // NOLINT
-#pragma comment(linker, "/alternatename:___asan_default_options=___asan_default_default_options")    // NOLINT
-#pragma comment(linker, "/alternatename:___asan_default_suppressions=___asan_default_default_suppressions")    // NOLINT
-#pragma comment(linker, "/alternatename:___asan_on_error=___asan_default_on_error")                  // NOLINT
-#endif
-// }}}
 }  // extern "C"
 
 // ---------------------- Windows-specific interceptors ---------------- {{{
+static LPTOP_LEVEL_EXCEPTION_FILTER default_seh_handler;
+static LPTOP_LEVEL_EXCEPTION_FILTER user_seh_handler;
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+long __asan_unhandled_exception_filter(EXCEPTION_POINTERS *info) {
+  EXCEPTION_RECORD *exception_record = info->ExceptionRecord;
+  CONTEXT *context = info->ContextRecord;
+
+  // FIXME: Handle EXCEPTION_STACK_OVERFLOW here.
+
+  SignalContext sig = SignalContext::Create(exception_record, context);
+  ReportDeadlySignal(exception_record->ExceptionCode, sig);
+  UNREACHABLE("returned from reporting deadly signal");
+}
+
+// Wrapper SEH Handler. If the exception should be handled by asan, we call
+// __asan_unhandled_exception_filter, otherwise, we execute the user provided
+// exception handler or the default.
+static long WINAPI SEHHandler(EXCEPTION_POINTERS *info) {
+  DWORD exception_code = info->ExceptionRecord->ExceptionCode;
+  if (__sanitizer::IsHandledDeadlyException(exception_code))
+    return __asan_unhandled_exception_filter(info);
+  if (user_seh_handler)
+    return user_seh_handler(info);
+  // Bubble out to the default exception filter.
+  if (default_seh_handler)
+    return default_seh_handler(info);
+  return EXCEPTION_CONTINUE_SEARCH;
+}
+
+INTERCEPTOR_WINAPI(LPTOP_LEVEL_EXCEPTION_FILTER, SetUnhandledExceptionFilter,
+    LPTOP_LEVEL_EXCEPTION_FILTER ExceptionFilter) {
+  CHECK(REAL(SetUnhandledExceptionFilter));
+  if (ExceptionFilter == &SEHHandler || common_flags()->allow_user_segv_handler)
+    return REAL(SetUnhandledExceptionFilter)(ExceptionFilter);
+  // We record the user provided exception handler to be called for all the
+  // exceptions unhandled by asan.
+  Swap(ExceptionFilter, user_seh_handler);
+  return ExceptionFilter;
+}
+
 INTERCEPTOR_WINAPI(void, RtlRaiseException, EXCEPTION_RECORD *ExceptionRecord) {
   CHECK(REAL(RtlRaiseException));
   // This is a noreturn function, unless it's one of the exceptions raised to
@@ -144,6 +160,7 @@
 
 void InitializePlatformInterceptors() {
   ASAN_INTERCEPT_FUNC(CreateThread);
+  ASAN_INTERCEPT_FUNC(SetUnhandledExceptionFilter);
 
 #ifdef _WIN64
   ASAN_INTERCEPT_FUNC(__C_specific_handler);
@@ -260,60 +277,8 @@
 #endif
 }
 
-static LPTOP_LEVEL_EXCEPTION_FILTER default_seh_handler;
-
-// Check based on flags if we should report this exception.
-static bool ShouldReportDeadlyException(unsigned code) {
-  switch (code) {
-    case EXCEPTION_ACCESS_VIOLATION:
-    case EXCEPTION_IN_PAGE_ERROR:
-      return common_flags()->handle_segv;
-    case EXCEPTION_BREAKPOINT:
-    case EXCEPTION_ILLEGAL_INSTRUCTION: {
-      return common_flags()->handle_sigill;
-    }
-  }
-  return false;
-}
-
-// Return the textual name for this exception.
-const char *DescribeSignalOrException(int signo) {
-  unsigned code = signo;
-  // Get the string description of the exception if this is a known deadly
-  // exception.
-  switch (code) {
-    case EXCEPTION_ACCESS_VIOLATION:
-      return "access-violation";
-    case EXCEPTION_IN_PAGE_ERROR:
-      return "in-page-error";
-    case EXCEPTION_BREAKPOINT:
-      return "breakpoint";
-    case EXCEPTION_ILLEGAL_INSTRUCTION:
-      return "illegal-instruction";
-  }
-  return nullptr;
-}
-
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE
-long __asan_unhandled_exception_filter(EXCEPTION_POINTERS *info) {
-  EXCEPTION_RECORD *exception_record = info->ExceptionRecord;
-  CONTEXT *context = info->ContextRecord;
-
-  // Continue the search if the signal wasn't deadly.
-  if (!ShouldReportDeadlyException(exception_record->ExceptionCode))
-    return EXCEPTION_CONTINUE_SEARCH;
-  // FIXME: Handle EXCEPTION_STACK_OVERFLOW here.
-
-  SignalContext sig = SignalContext::Create(exception_record, context);
-  ReportDeadlySignal(exception_record->ExceptionCode, sig);
-  UNREACHABLE("returned from reporting deadly signal");
-}
-
-static long WINAPI SEHHandler(EXCEPTION_POINTERS *info) {
-  __asan_unhandled_exception_filter(info);
-
-  // Bubble out to the default exception filter.
-  return default_seh_handler(info);
+bool IsSystemHeapAddress(uptr addr) {
+  return ::HeapValidate(GetProcessHeap(), 0, (void*)addr) != FALSE;
 }
 
 // We want to install our own exception handler (EH) to print helpful reports
@@ -368,7 +333,7 @@
     unsigned long, void *) = asan_thread_init;
 #endif
 
-ASAN_LINK_GLOBALS_WIN()
+WIN_FORCE_LINK(__asan_dso_reg_hook)
 
 // }}}
 }  // namespace __asan
diff --git a/lib/asan/asan_win_dll_thunk.cc b/lib/asan/asan_win_dll_thunk.cc
index 0f62928..651886c 100644
--- a/lib/asan/asan_win_dll_thunk.cc
+++ b/lib/asan/asan_win_dll_thunk.cc
@@ -15,384 +15,41 @@
 // See https://github.com/google/sanitizers/issues/209 for the details.
 //===----------------------------------------------------------------------===//
 
-// Only compile this code when building asan_dll_thunk.lib
-// Using #ifdef rather than relying on Makefiles etc.
-// simplifies the build procedure.
-#ifdef ASAN_DLL_THUNK
+#ifdef SANITIZER_DLL_THUNK
 #include "asan_init_version.h"
-#include "asan_globals_win.h"
 #include "interception/interception.h"
+#include "sanitizer_common/sanitizer_win_defs.h"
+#include "sanitizer_common/sanitizer_win_dll_thunk.h"
 #include "sanitizer_common/sanitizer_platform_interceptors.h"
 
-#ifdef _M_IX86
-#define WINAPI __stdcall
-#else
-#define WINAPI
-#endif
+// ASan own interface functions.
+#define INTERFACE_FUNCTION(Name) INTERCEPT_SANITIZER_FUNCTION(Name)
+#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
+#include "asan_interface.inc"
 
-// ---------- Function interception helper functions and macros ----------- {{{1
-extern "C" {
-void *WINAPI GetModuleHandleA(const char *module_name);
-void *WINAPI GetProcAddress(void *module, const char *proc_name);
-void abort();
-}
+// Memory allocation functions.
+INTERCEPT_WRAP_V_W(free)
+INTERCEPT_WRAP_V_W(_free_base)
+INTERCEPT_WRAP_V_WW(_free_dbg)
 
-using namespace __sanitizer;
+INTERCEPT_WRAP_W_W(malloc)
+INTERCEPT_WRAP_W_W(_malloc_base)
+INTERCEPT_WRAP_W_WWWW(_malloc_dbg)
 
-static uptr getRealProcAddressOrDie(const char *name) {
-  uptr ret =
-      __interception::InternalGetProcAddress((void *)GetModuleHandleA(0), name);
-  if (!ret)
-    abort();
-  return ret;
-}
+INTERCEPT_WRAP_W_WW(calloc)
+INTERCEPT_WRAP_W_WW(_calloc_base)
+INTERCEPT_WRAP_W_WWWWW(_calloc_dbg)
+INTERCEPT_WRAP_W_WWW(_calloc_impl)
 
-// We need to intercept some functions (e.g. ASan interface, memory allocator --
-// let's call them "hooks") exported by the DLL thunk and forward the hooks to
-// the runtime in the main module.
-// However, we don't want to keep two lists of these hooks.
-// To avoid that, the list of hooks should be defined using the
-// INTERCEPT_WHEN_POSSIBLE macro. Then, all these hooks can be intercepted
-// at once by calling INTERCEPT_HOOKS().
+INTERCEPT_WRAP_W_WW(realloc)
+INTERCEPT_WRAP_W_WW(_realloc_base)
+INTERCEPT_WRAP_W_WWW(_realloc_dbg)
+INTERCEPT_WRAP_W_WWW(_recalloc)
+INTERCEPT_WRAP_W_WWW(_recalloc_base)
 
-// Use macro+template magic to automatically generate the list of hooks.
-// Each hook at line LINE defines a template class with a static
-// FunctionInterceptor<LINE>::Execute() method intercepting the hook.
-// The default implementation of FunctionInterceptor<LINE> is to call
-// the Execute() method corresponding to the previous line.
-template<int LINE>
-struct FunctionInterceptor {
-  static void Execute() { FunctionInterceptor<LINE-1>::Execute(); }
-};
-
-// There shouldn't be any hooks with negative definition line number.
-template<>
-struct FunctionInterceptor<0> {
-  static void Execute() {}
-};
-
-#define INTERCEPT_WHEN_POSSIBLE(main_function, dll_function)                   \
-  template <> struct FunctionInterceptor<__LINE__> {                           \
-    static void Execute() {                                                    \
-      uptr wrapper = getRealProcAddressOrDie(main_function);                   \
-      if (!__interception::OverrideFunction((uptr)dll_function, wrapper, 0))   \
-        abort();                                                               \
-      FunctionInterceptor<__LINE__ - 1>::Execute();                            \
-    }                                                                          \
-  };
-
-// Special case of hooks -- ASan own interface functions.  Those are only called
-// after __asan_init, thus an empty implementation is sufficient.
-#define INTERFACE_FUNCTION(name)                                               \
-  extern "C" __declspec(noinline) void name() {                                \
-    volatile int prevent_icf = (__LINE__ << 8); (void)prevent_icf;             \
-    __debugbreak();                                                            \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, name)
-
-// INTERCEPT_HOOKS must be used after the last INTERCEPT_WHEN_POSSIBLE.
-#define INTERCEPT_HOOKS FunctionInterceptor<__LINE__>::Execute
-
-// We can't define our own version of strlen etc. because that would lead to
-// link-time or even type mismatch errors.  Instead, we can declare a function
-// just to be able to get its address.  Me may miss the first few calls to the
-// functions since it can be called before __asan_init, but that would lead to
-// false negatives in the startup code before user's global initializers, which
-// isn't a big deal.
-#define INTERCEPT_LIBRARY_FUNCTION(name)                                       \
-  extern "C" void name();                                                      \
-  INTERCEPT_WHEN_POSSIBLE(WRAPPER_NAME(name), name)
-
-// Disable compiler warnings that show up if we declare our own version
-// of a compiler intrinsic (e.g. strlen).
-#pragma warning(disable: 4391)
-#pragma warning(disable: 4392)
-
-static void InterceptHooks();
-// }}}
-
-// ---------- Function wrapping helpers ----------------------------------- {{{1
-#define WRAP_V_V(name)                                                         \
-  extern "C" void name() {                                                     \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
-    fn();                                                                      \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, name);
-
-#define WRAP_V_W(name)                                                         \
-  extern "C" void name(void *arg) {                                            \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
-    fn(arg);                                                                   \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, name);
-
-#define WRAP_V_WW(name)                                                        \
-  extern "C" void name(void *arg1, void *arg2) {                               \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
-    fn(arg1, arg2);                                                            \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, name);
-
-#define WRAP_V_WWW(name)                                                       \
-  extern "C" void name(void *arg1, void *arg2, void *arg3) {                   \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
-    fn(arg1, arg2, arg3);                                                      \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, name);
-
-#define WRAP_W_V(name)                                                         \
-  extern "C" void *name() {                                                    \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
-    return fn();                                                               \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, name);
-
-#define WRAP_W_W(name)                                                         \
-  extern "C" void *name(void *arg) {                                           \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
-    return fn(arg);                                                            \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, name);
-
-#define WRAP_W_WW(name)                                                        \
-  extern "C" void *name(void *arg1, void *arg2) {                              \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
-    return fn(arg1, arg2);                                                     \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, name);
-
-#define WRAP_W_WWW(name)                                                       \
-  extern "C" void *name(void *arg1, void *arg2, void *arg3) {                  \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
-    return fn(arg1, arg2, arg3);                                               \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, name);
-
-#define WRAP_W_WWWW(name)                                                      \
-  extern "C" void *name(void *arg1, void *arg2, void *arg3, void *arg4) {      \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
-    return fn(arg1, arg2, arg3, arg4);                                         \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, name);
-
-#define WRAP_W_WWWWW(name)                                                     \
-  extern "C" void *name(void *arg1, void *arg2, void *arg3, void *arg4,        \
-                        void *arg5) {                                          \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
-    return fn(arg1, arg2, arg3, arg4, arg5);                                   \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, name);
-
-#define WRAP_W_WWWWWW(name)                                                    \
-  extern "C" void *name(void *arg1, void *arg2, void *arg3, void *arg4,        \
-                        void *arg5, void *arg6) {                              \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
-    return fn(arg1, arg2, arg3, arg4, arg5, arg6);                             \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, name);
-// }}}
-
-// ----------------- ASan own interface functions --------------------
-// Don't use the INTERFACE_FUNCTION machinery for this function as we actually
-// want to call it in the __asan_init interceptor.
-WRAP_W_V(__asan_should_detect_stack_use_after_return)
-WRAP_W_V(__asan_get_shadow_memory_dynamic_address)
-
-extern "C" {
-  int __asan_option_detect_stack_use_after_return;
-  uptr __asan_shadow_memory_dynamic_address;
-
-  // Manually wrap __asan_init as we need to initialize
-  // __asan_option_detect_stack_use_after_return afterwards.
-  void __asan_init() {
-    typedef void (*fntype)();
-    static fntype fn = 0;
-    // __asan_init is expected to be called by only one thread.
-    if (fn) return;
-
-    fn = (fntype)getRealProcAddressOrDie("__asan_init");
-    fn();
-    __asan_option_detect_stack_use_after_return =
-        (__asan_should_detect_stack_use_after_return() != 0);
-    __asan_shadow_memory_dynamic_address =
-        (uptr)__asan_get_shadow_memory_dynamic_address();
-    InterceptHooks();
-  }
-}
-
-extern "C" void __asan_version_mismatch_check() {
-  // Do nothing.
-}
-
-INTERFACE_FUNCTION(__asan_handle_no_return)
-INTERFACE_FUNCTION(__asan_unhandled_exception_filter)
-
-INTERFACE_FUNCTION(__asan_report_store1)
-INTERFACE_FUNCTION(__asan_report_store2)
-INTERFACE_FUNCTION(__asan_report_store4)
-INTERFACE_FUNCTION(__asan_report_store8)
-INTERFACE_FUNCTION(__asan_report_store16)
-INTERFACE_FUNCTION(__asan_report_store_n)
-
-INTERFACE_FUNCTION(__asan_report_load1)
-INTERFACE_FUNCTION(__asan_report_load2)
-INTERFACE_FUNCTION(__asan_report_load4)
-INTERFACE_FUNCTION(__asan_report_load8)
-INTERFACE_FUNCTION(__asan_report_load16)
-INTERFACE_FUNCTION(__asan_report_load_n)
-
-INTERFACE_FUNCTION(__asan_store1)
-INTERFACE_FUNCTION(__asan_store2)
-INTERFACE_FUNCTION(__asan_store4)
-INTERFACE_FUNCTION(__asan_store8)
-INTERFACE_FUNCTION(__asan_store16)
-INTERFACE_FUNCTION(__asan_storeN)
-
-INTERFACE_FUNCTION(__asan_load1)
-INTERFACE_FUNCTION(__asan_load2)
-INTERFACE_FUNCTION(__asan_load4)
-INTERFACE_FUNCTION(__asan_load8)
-INTERFACE_FUNCTION(__asan_load16)
-INTERFACE_FUNCTION(__asan_loadN)
-
-INTERFACE_FUNCTION(__asan_memcpy);
-INTERFACE_FUNCTION(__asan_memset);
-INTERFACE_FUNCTION(__asan_memmove);
-
-INTERFACE_FUNCTION(__asan_set_shadow_00);
-INTERFACE_FUNCTION(__asan_set_shadow_f1);
-INTERFACE_FUNCTION(__asan_set_shadow_f2);
-INTERFACE_FUNCTION(__asan_set_shadow_f3);
-INTERFACE_FUNCTION(__asan_set_shadow_f5);
-INTERFACE_FUNCTION(__asan_set_shadow_f8);
-
-INTERFACE_FUNCTION(__asan_alloca_poison);
-INTERFACE_FUNCTION(__asan_allocas_unpoison);
-
-INTERFACE_FUNCTION(__asan_register_globals)
-INTERFACE_FUNCTION(__asan_unregister_globals)
-
-INTERFACE_FUNCTION(__asan_before_dynamic_init)
-INTERFACE_FUNCTION(__asan_after_dynamic_init)
-
-INTERFACE_FUNCTION(__asan_poison_stack_memory)
-INTERFACE_FUNCTION(__asan_unpoison_stack_memory)
-
-INTERFACE_FUNCTION(__asan_poison_memory_region)
-INTERFACE_FUNCTION(__asan_unpoison_memory_region)
-
-INTERFACE_FUNCTION(__asan_address_is_poisoned)
-INTERFACE_FUNCTION(__asan_region_is_poisoned)
-
-INTERFACE_FUNCTION(__asan_get_current_fake_stack)
-INTERFACE_FUNCTION(__asan_addr_is_in_fake_stack)
-
-INTERFACE_FUNCTION(__asan_stack_malloc_0)
-INTERFACE_FUNCTION(__asan_stack_malloc_1)
-INTERFACE_FUNCTION(__asan_stack_malloc_2)
-INTERFACE_FUNCTION(__asan_stack_malloc_3)
-INTERFACE_FUNCTION(__asan_stack_malloc_4)
-INTERFACE_FUNCTION(__asan_stack_malloc_5)
-INTERFACE_FUNCTION(__asan_stack_malloc_6)
-INTERFACE_FUNCTION(__asan_stack_malloc_7)
-INTERFACE_FUNCTION(__asan_stack_malloc_8)
-INTERFACE_FUNCTION(__asan_stack_malloc_9)
-INTERFACE_FUNCTION(__asan_stack_malloc_10)
-
-INTERFACE_FUNCTION(__asan_stack_free_0)
-INTERFACE_FUNCTION(__asan_stack_free_1)
-INTERFACE_FUNCTION(__asan_stack_free_2)
-INTERFACE_FUNCTION(__asan_stack_free_4)
-INTERFACE_FUNCTION(__asan_stack_free_5)
-INTERFACE_FUNCTION(__asan_stack_free_6)
-INTERFACE_FUNCTION(__asan_stack_free_7)
-INTERFACE_FUNCTION(__asan_stack_free_8)
-INTERFACE_FUNCTION(__asan_stack_free_9)
-INTERFACE_FUNCTION(__asan_stack_free_10)
-
-// FIXME: we might want to have a sanitizer_win_dll_thunk?
-INTERFACE_FUNCTION(__sanitizer_annotate_contiguous_container)
-INTERFACE_FUNCTION(__sanitizer_contiguous_container_find_bad_address)
-INTERFACE_FUNCTION(__sanitizer_cov)
-INTERFACE_FUNCTION(__sanitizer_cov_dump)
-INTERFACE_FUNCTION(__sanitizer_cov_indir_call16)
-INTERFACE_FUNCTION(__sanitizer_cov_init)
-INTERFACE_FUNCTION(__sanitizer_cov_module_init)
-INTERFACE_FUNCTION(__sanitizer_cov_trace_basic_block)
-INTERFACE_FUNCTION(__sanitizer_cov_trace_func_enter)
-INTERFACE_FUNCTION(__sanitizer_cov_with_check)
-INTERFACE_FUNCTION(__sanitizer_get_allocated_size)
-INTERFACE_FUNCTION(__sanitizer_get_coverage_guards)
-INTERFACE_FUNCTION(__sanitizer_get_current_allocated_bytes)
-INTERFACE_FUNCTION(__sanitizer_get_estimated_allocated_size)
-INTERFACE_FUNCTION(__sanitizer_get_free_bytes)
-INTERFACE_FUNCTION(__sanitizer_get_heap_size)
-INTERFACE_FUNCTION(__sanitizer_get_ownership)
-INTERFACE_FUNCTION(__sanitizer_get_total_unique_caller_callee_pairs)
-INTERFACE_FUNCTION(__sanitizer_get_total_unique_coverage)
-INTERFACE_FUNCTION(__sanitizer_get_unmapped_bytes)
-INTERFACE_FUNCTION(__sanitizer_maybe_open_cov_file)
-INTERFACE_FUNCTION(__sanitizer_print_stack_trace)
-INTERFACE_FUNCTION(__sanitizer_symbolize_pc)
-INTERFACE_FUNCTION(__sanitizer_symbolize_global)
-INTERFACE_FUNCTION(__sanitizer_ptr_cmp)
-INTERFACE_FUNCTION(__sanitizer_ptr_sub)
-INTERFACE_FUNCTION(__sanitizer_report_error_summary)
-INTERFACE_FUNCTION(__sanitizer_reset_coverage)
-INTERFACE_FUNCTION(__sanitizer_get_number_of_counters)
-INTERFACE_FUNCTION(__sanitizer_update_counter_bitset_and_clear_counters)
-INTERFACE_FUNCTION(__sanitizer_sandbox_on_notify)
-INTERFACE_FUNCTION(__sanitizer_set_death_callback)
-INTERFACE_FUNCTION(__sanitizer_set_report_path)
-INTERFACE_FUNCTION(__sanitizer_set_report_fd)
-INTERFACE_FUNCTION(__sanitizer_unaligned_load16)
-INTERFACE_FUNCTION(__sanitizer_unaligned_load32)
-INTERFACE_FUNCTION(__sanitizer_unaligned_load64)
-INTERFACE_FUNCTION(__sanitizer_unaligned_store16)
-INTERFACE_FUNCTION(__sanitizer_unaligned_store32)
-INTERFACE_FUNCTION(__sanitizer_unaligned_store64)
-INTERFACE_FUNCTION(__sanitizer_verify_contiguous_container)
-INTERFACE_FUNCTION(__sanitizer_install_malloc_and_free_hooks)
-INTERFACE_FUNCTION(__sanitizer_start_switch_fiber)
-INTERFACE_FUNCTION(__sanitizer_finish_switch_fiber)
-INTERFACE_FUNCTION(__sanitizer_get_module_and_offset_for_pc)
-
-// TODO(timurrrr): Add more interface functions on the as-needed basis.
-
-// ----------------- Memory allocation functions ---------------------
-WRAP_V_W(free)
-WRAP_V_W(_free_base)
-WRAP_V_WW(_free_dbg)
-
-WRAP_W_W(malloc)
-WRAP_W_W(_malloc_base)
-WRAP_W_WWWW(_malloc_dbg)
-
-WRAP_W_WW(calloc)
-WRAP_W_WW(_calloc_base)
-WRAP_W_WWWWW(_calloc_dbg)
-WRAP_W_WWW(_calloc_impl)
-
-WRAP_W_WW(realloc)
-WRAP_W_WW(_realloc_base)
-WRAP_W_WWW(_realloc_dbg)
-WRAP_W_WWW(_recalloc)
-WRAP_W_WWW(_recalloc_base)
-
-WRAP_W_W(_msize)
-WRAP_W_W(_expand)
-WRAP_W_W(_expand_dbg)
+INTERCEPT_WRAP_W_W(_msize)
+INTERCEPT_WRAP_W_W(_expand)
+INTERCEPT_WRAP_W_W(_expand_dbg)
 
 // TODO(timurrrr): Might want to add support for _aligned_* allocation
 // functions to detect a bit more bugs.  Those functions seem to wrap malloc().
@@ -401,20 +58,6 @@
 
 INTERCEPT_LIBRARY_FUNCTION(atoi);
 INTERCEPT_LIBRARY_FUNCTION(atol);
-
-#ifdef _WIN64
-INTERCEPT_LIBRARY_FUNCTION(__C_specific_handler);
-#else
-INTERCEPT_LIBRARY_FUNCTION(_except_handler3);
-
-// _except_handler4 checks -GS cookie which is different for each module, so we
-// can't use INTERCEPT_LIBRARY_FUNCTION(_except_handler4).
-INTERCEPTOR(int, _except_handler4, void *a, void *b, void *c, void *d) {
-  __asan_handle_no_return();
-  return REAL(_except_handler4)(a, b, c, d);
-}
-#endif
-
 INTERCEPT_LIBRARY_FUNCTION(frexp);
 INTERCEPT_LIBRARY_FUNCTION(longjmp);
 #if SANITIZER_INTERCEPT_MEMCHR
@@ -442,38 +85,66 @@
 INTERCEPT_LIBRARY_FUNCTION(strtol);
 INTERCEPT_LIBRARY_FUNCTION(wcslen);
 
-// Must be after all the interceptor declarations due to the way INTERCEPT_HOOKS
-// is defined.
-void InterceptHooks() {
-  INTERCEPT_HOOKS();
+#ifdef _WIN64
+INTERCEPT_LIBRARY_FUNCTION(__C_specific_handler);
+#else
+INTERCEPT_LIBRARY_FUNCTION(_except_handler3);
+// _except_handler4 checks -GS cookie which is different for each module, so we
+// can't use INTERCEPT_LIBRARY_FUNCTION(_except_handler4).
+INTERCEPTOR(int, _except_handler4, void *a, void *b, void *c, void *d) {
+  __asan_handle_no_return();
+  return REAL(_except_handler4)(a, b, c, d);
+}
+#endif
+
+// Window specific functions not included in asan_interface.inc.
+INTERCEPT_WRAP_W_V(__asan_should_detect_stack_use_after_return)
+INTERCEPT_WRAP_W_V(__asan_get_shadow_memory_dynamic_address)
+INTERCEPT_WRAP_W_W(__asan_unhandled_exception_filter)
+
+using namespace __sanitizer;
+
+extern "C" {
+int __asan_option_detect_stack_use_after_return;
+uptr __asan_shadow_memory_dynamic_address;
+} // extern "C"
+
+static int asan_dll_thunk_init() {
+  typedef void (*fntype)();
+  static fntype fn = 0;
+  // asan_dll_thunk_init is expected to be called by only one thread.
+  if (fn) return 0;
+
+  // Ensure all interception was executed.
+  __dll_thunk_init();
+
+  fn = (fntype) dllThunkGetRealAddrOrDie("__asan_init");
+  fn();
+  __asan_option_detect_stack_use_after_return =
+      (__asan_should_detect_stack_use_after_return() != 0);
+  __asan_shadow_memory_dynamic_address =
+      (uptr)__asan_get_shadow_memory_dynamic_address();
+
 #ifndef _WIN64
   INTERCEPT_FUNCTION(_except_handler4);
 #endif
-}
-
-// We want to call __asan_init before C/C++ initializers/constructors are
-// executed, otherwise functions like memset might be invoked.
-// For some strange reason, merely linking in asan_preinit.cc doesn't work
-// as the callback is never called...  Is link.exe doing something too smart?
-
-// In DLLs, the callbacks are expected to return 0,
-// otherwise CRT initialization fails.
-static int call_asan_init() {
-  __asan_init();
+  // In DLLs, the callbacks are expected to return 0,
+  // otherwise CRT initialization fails.
   return 0;
 }
+
 #pragma section(".CRT$XIB", long, read)  // NOLINT
-__declspec(allocate(".CRT$XIB")) int (*__asan_preinit)() = call_asan_init;
+__declspec(allocate(".CRT$XIB")) int (*__asan_preinit)() = asan_dll_thunk_init;
 
 static void WINAPI asan_thread_init(void *mod, unsigned long reason,
-                                   void *reserved) {
-  if (reason == /*DLL_PROCESS_ATTACH=*/1) __asan_init();
+                                    void *reserved) {
+  if (reason == /*DLL_PROCESS_ATTACH=*/1) asan_dll_thunk_init();
 }
 
 #pragma section(".CRT$XLAB", long, read)  // NOLINT
 __declspec(allocate(".CRT$XLAB")) void (WINAPI *__asan_tls_init)(void *,
     unsigned long, void *) = asan_thread_init;
 
-ASAN_LINK_GLOBALS_WIN()
+WIN_FORCE_LINK(__asan_dso_reg_hook)
 
-#endif // ASAN_DLL_THUNK
+#endif // SANITIZER_DLL_THUNK
diff --git a/lib/asan/asan_win_dynamic_runtime_thunk.cc b/lib/asan/asan_win_dynamic_runtime_thunk.cc
index 8e42f03..416c73b 100644
--- a/lib/asan/asan_win_dynamic_runtime_thunk.cc
+++ b/lib/asan/asan_win_dynamic_runtime_thunk.cc
@@ -14,20 +14,24 @@
 // using the default "import library" generated when linking the DLL RTL.
 //
 // This includes:
+//  - creating weak aliases to default implementation imported from asan dll.
 //  - forwarding the detect_stack_use_after_return runtime option
 //  - working around deficiencies of the MD runtime
 //  - installing a custom SEH handler
 //
 //===----------------------------------------------------------------------===//
 
-// Only compile this code when building asan_dynamic_runtime_thunk.lib
-// Using #ifdef rather than relying on Makefiles etc.
-// simplifies the build procedure.
-#ifdef ASAN_DYNAMIC_RUNTIME_THUNK
-#include "asan_globals_win.h"
+#ifdef SANITIZER_DYNAMIC_RUNTIME_THUNK
+#define SANITIZER_IMPORT_INTERFACE 1
+#include "sanitizer_common/sanitizer_win_defs.h"
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 
+// Define weak alias for all weak functions imported from asan dll.
+#define INTERFACE_FUNCTION(Name)
+#define INTERFACE_WEAK_FUNCTION(Name) WIN_WEAK_IMPORT_DEF(Name)
+#include "asan_interface.inc"
+
 // First, declare CRT sections we'll be using in this file
 #pragma section(".CRT$XIB", long, read)  // NOLINT
 #pragma section(".CRT$XID", long, read)  // NOLINT
@@ -122,6 +126,6 @@
     SetSEHFilter;
 }
 
-ASAN_LINK_GLOBALS_WIN()
+WIN_FORCE_LINK(__asan_dso_reg_hook)
 
-#endif // ASAN_DYNAMIC_RUNTIME_THUNK
+#endif // SANITIZER_DYNAMIC_RUNTIME_THUNK
diff --git a/lib/asan/asan_win_weak_interception.cc b/lib/asan/asan_win_weak_interception.cc
new file mode 100644
index 0000000..ca26f91
--- /dev/null
+++ b/lib/asan/asan_win_weak_interception.cc
@@ -0,0 +1,23 @@
+//===-- asan_win_weak_interception.cc -------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This module should be included in Address Sanitizer when it is implemented as
+// a shared library on Windows (dll), in order to delegate the calls of weak
+// functions to the implementation in the main executable when a strong
+// definition is provided.
+//===----------------------------------------------------------------------===//
+#ifdef SANITIZER_DYNAMIC
+#include "sanitizer_common/sanitizer_win_weak_interception.h"
+#include "asan_interface_internal.h"
+// Check if strong definitions for weak functions are present in the main
+// executable. If that is the case, override dll functions to point to strong
+// implementations.
+#define INTERFACE_FUNCTION(Name)
+#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
+#include "asan_interface.inc"
+#endif // SANITIZER_DYNAMIC
diff --git a/lib/asan/scripts/asan_symbolize.py b/lib/asan/scripts/asan_symbolize.py
index 8e6fb61..1a56e44 100755
--- a/lib/asan/scripts/asan_symbolize.py
+++ b/lib/asan/scripts/asan_symbolize.py
@@ -24,6 +24,7 @@
 fix_filename_patterns = None
 logfile = sys.stdin
 allow_system_symbolizer = True
+force_system_symbolizer = False
 
 # FIXME: merge the code that calls fix_filename().
 def fix_filename(file_name):
@@ -37,6 +38,10 @@
 def sysroot_path_filter(binary_name):
   return sysroot_path + binary_name
 
+def is_valid_arch(s):
+  return s in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s",
+               "armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390"]
+
 def guess_arch(addr):
   # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
   if len(addr) > 10:
@@ -84,10 +89,12 @@
       for hint in self.dsym_hints:
         cmd.append('--dsym-hint=%s' % hint)
     if DEBUG:
-      print ' '.join(cmd)
+      print(' '.join(cmd))
     try:
       result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
-                                stdout=subprocess.PIPE)
+                                stdout=subprocess.PIPE,
+                                bufsize=0,
+                                universal_newlines=True)
     except OSError:
       result = None
     return result
@@ -100,8 +107,8 @@
     try:
       symbolizer_input = '"%s" %s' % (binary, offset)
       if DEBUG:
-        print symbolizer_input
-      print >> self.pipe.stdin, symbolizer_input
+        print(symbolizer_input)
+      self.pipe.stdin.write("%s\n" % symbolizer_input)
       while True:
         function_name = self.pipe.stdout.readline().rstrip()
         if not function_name:
@@ -146,9 +153,11 @@
       cmd += ['--demangle']
     cmd += ['-e', self.binary]
     if DEBUG:
-      print ' '.join(cmd)
+      print(' '.join(cmd))
     return subprocess.Popen(cmd,
-                            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+                            stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+                            bufsize=0,
+                            universal_newlines=True)
 
   def symbolize(self, addr, binary, offset):
     """Overrides Symbolizer.symbolize."""
@@ -156,8 +165,8 @@
       return None
     lines = []
     try:
-      print >> self.pipe.stdin, offset
-      print >> self.pipe.stdin, self.output_terminator
+      self.pipe.stdin.write("%s\n" % offset)
+      self.pipe.stdin.write("%s\n" % self.output_terminator)
       is_first_frame = True
       while True:
         function_name = self.pipe.stdout.readline().rstrip()
@@ -206,15 +215,15 @@
 
 
 class DarwinSymbolizer(Symbolizer):
-  def __init__(self, addr, binary):
+  def __init__(self, addr, binary, arch):
     super(DarwinSymbolizer, self).__init__()
     self.binary = binary
-    self.arch = guess_arch(addr)
+    self.arch = arch
     self.open_atos()
 
   def open_atos(self):
     if DEBUG:
-      print 'atos -o %s -arch %s' % (self.binary, self.arch)
+      print('atos -o %s -arch %s' % (self.binary, self.arch))
     cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
     self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
 
@@ -229,7 +238,7 @@
     #   foo(type1, type2) (in object.name) (filename.cc:80)
     match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
     if DEBUG:
-      print 'atos_line: ', atos_line
+      print('atos_line: ', atos_line)
     if match:
       function_name = match.group(1)
       function_name = re.sub('\(.*?\)', '', function_name)
@@ -268,9 +277,9 @@
   return None
 
 
-def SystemSymbolizerFactory(system, addr, binary):
+def SystemSymbolizerFactory(system, addr, binary, arch):
   if system == 'Darwin':
-    return DarwinSymbolizer(addr, binary)
+    return DarwinSymbolizer(addr, binary, arch)
   elif system == 'Linux' or system == 'FreeBSD':
     return Addr2LineSymbolizer(binary)
 
@@ -343,7 +352,7 @@
       function_name, file_name, line_no = res
       result = ['%s in %s %s:%d' % (
           addr, function_name, file_name, line_no)]
-      print result
+      print(result)
       return result
     else:
       return None
@@ -369,7 +378,7 @@
       self.frame_no = 0
       self.process_line = self.process_line_posix
 
-  def symbolize_address(self, addr, binary, offset):
+  def symbolize_address(self, addr, binary, offset, arch):
     # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
     # a single symbolizer binary.
     # On Darwin, if the dsym hint producer is present:
@@ -381,31 +390,35 @@
     #     if so, reuse |last_llvm_symbolizer| which has the full set of hints;
     #  3. otherwise create a new symbolizer and pass all currently known
     #     .dSYM hints to it.
-    if not binary in self.llvm_symbolizers:
-      use_new_symbolizer = True
-      if self.system == 'Darwin' and self.dsym_hint_producer:
-        dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
-        use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
-        self.dsym_hints |= dsym_hints_for_binary
-      if self.last_llvm_symbolizer and not use_new_symbolizer:
+    result = None
+    if not force_system_symbolizer:
+      if not binary in self.llvm_symbolizers:
+        use_new_symbolizer = True
+        if self.system == 'Darwin' and self.dsym_hint_producer:
+          dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
+          use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
+          self.dsym_hints |= dsym_hints_for_binary
+        if self.last_llvm_symbolizer and not use_new_symbolizer:
+            self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
+        else:
+          self.last_llvm_symbolizer = LLVMSymbolizerFactory(
+              self.system, arch, self.dsym_hints)
           self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
-      else:
-        self.last_llvm_symbolizer = LLVMSymbolizerFactory(
-            self.system, guess_arch(addr), self.dsym_hints)
-        self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
-    # Use the chain of symbolizers:
-    # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
-    # (fall back to next symbolizer if the previous one fails).
-    if not binary in symbolizers:
-      symbolizers[binary] = ChainSymbolizer(
-          [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
-    result = symbolizers[binary].symbolize(addr, binary, offset)
+      # Use the chain of symbolizers:
+      # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
+      # (fall back to next symbolizer if the previous one fails).
+      if not binary in symbolizers:
+        symbolizers[binary] = ChainSymbolizer(
+            [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
+      result = symbolizers[binary].symbolize(addr, binary, offset)
+    else:
+      symbolizers[binary] = ChainSymbolizer([])
     if result is None:
       if not allow_system_symbolizer:
         raise Exception('Failed to launch or use llvm-symbolizer.')
       # Initialize system symbolizer only if other symbolizers failed.
       symbolizers[binary].append_symbolizer(
-          SystemSymbolizerFactory(self.system, addr, binary))
+          SystemSymbolizerFactory(self.system, addr, binary, arch))
       result = symbolizers[binary].symbolize(addr, binary, offset)
     # The system symbolizer must produce some result.
     assert result
@@ -425,7 +438,7 @@
     self.frame_no = 0
     for line in logfile:
       processed = self.process_line(line)
-      print '\n'.join(processed)
+      print('\n'.join(processed))
 
   def process_line_echo(self, line):
     return [line.rstrip()]
@@ -439,18 +452,28 @@
     if not match:
       return [self.current_line]
     if DEBUG:
-      print line
+      print(line)
     _, frameno_str, addr, binary, offset = match.groups()
+    arch = ""
+    # Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h"
+    colon_pos = binary.rfind(":")
+    if colon_pos != -1:
+      maybe_arch = binary[colon_pos+1:]
+      if is_valid_arch(maybe_arch):
+        arch = maybe_arch
+        binary = binary[0:colon_pos]
+    if arch == "":
+      arch = guess_arch(addr)
     if frameno_str == '0':
       # Assume that frame #0 is the first frame of new stack trace.
       self.frame_no = 0
     original_binary = binary
     if self.binary_name_filter:
       binary = self.binary_name_filter(binary)
-    symbolized_line = self.symbolize_address(addr, binary, offset)
+    symbolized_line = self.symbolize_address(addr, binary, offset, arch)
     if not symbolized_line:
       if original_binary != binary:
-        symbolized_line = self.symbolize_address(addr, binary, offset)
+        symbolized_line = self.symbolize_address(addr, binary, offset, arch)
     return self.get_symbolized_lines(symbolized_line)
 
 
@@ -472,6 +495,8 @@
   parser.add_argument('-l','--logfile', default=sys.stdin,
                       type=argparse.FileType('r'),
                       help='set log file name to parse, default is stdin')
+  parser.add_argument('--force-system-symbolizer', action='store_true',
+                      help='don\'t use llvm-symbolizer')
   args = parser.parse_args()
   if args.path_to_cut:
     fix_filename_patterns = args.path_to_cut
@@ -486,5 +511,9 @@
     logfile = args.logfile
   else:
     logfile = sys.stdin
+  if args.force_system_symbolizer:
+    force_system_symbolizer = True
+  if force_system_symbolizer:
+    assert(allow_system_symbolizer)
   loop = SymbolizationLoop(binary_name_filter)
   loop.process_logfile()
diff --git a/lib/asan/tests/CMakeLists.txt b/lib/asan/tests/CMakeLists.txt
index 3e56763..8089d51 100644
--- a/lib/asan/tests/CMakeLists.txt
+++ b/lib/asan/tests/CMakeLists.txt
@@ -36,8 +36,8 @@
 
 # This will ensure the target linker is used
 # during cross compilation
-set(ASAN_UNITTEST_COMMON_LINKFLAGS
-  ${COMPILER_RT_UNITTEST_LINKFLAGS})
+set(ASAN_UNITTEST_COMMON_LINK_FLAGS
+  ${COMPILER_RT_UNITTEST_LINK_FLAGS})
 
 # -gline-tables-only must be enough for ASan, so use it if possible.
 if(COMPILER_RT_TEST_COMPILER_ID MATCHES "Clang")
@@ -48,7 +48,7 @@
 if(MSVC)
   list(APPEND ASAN_UNITTEST_COMMON_CFLAGS -gcodeview)
 endif()
-list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS -g)
+list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS -g)
 
 # Use -D instead of definitions to please custom compile command.
 list(APPEND ASAN_UNITTEST_COMMON_CFLAGS
@@ -58,7 +58,12 @@
 
 if(APPLE)
   list(APPEND ASAN_UNITTEST_COMMON_CFLAGS ${DARWIN_osx_CFLAGS})
-  list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS ${DARWIN_osx_LINKFLAGS})
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS ${DARWIN_osx_LINK_FLAGS})
+
+  add_weak_symbols("asan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS})
 endif()
 
 if(MSVC)
@@ -77,41 +82,41 @@
 endif()
 
 if(NOT MSVC)
-  list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS --driver-mode=g++)
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS --driver-mode=g++)
 endif()
 
 # x86_64 FreeBSD 9.2 additionally requires libc++ to build the tests.
 if(CMAKE_SYSTEM MATCHES "FreeBSD-9.2-RELEASE")
-  list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS "-lc++")
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS "-lc++")
 endif()
 
 # Unit tests on Mac depend on Foundation.
 if(APPLE)
-  list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS -framework Foundation)
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS -framework Foundation)
 endif()
 if(ANDROID)
-  list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS -pie)
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS -pie)
 endif()
 
-set(ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS
-  ${ASAN_UNITTEST_COMMON_LINKFLAGS})
-list(APPEND ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS -fsanitize=address)
+set(ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS
+  ${ASAN_UNITTEST_COMMON_LINK_FLAGS})
+list(APPEND ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS -fsanitize=address)
 
-set(ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINKFLAGS
-  ${ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS}
+set(ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS
+  ${ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS}
   -shared-libasan)
 
 set(ASAN_UNITTEST_INSTRUMENTED_LIBS)
 # NDK r10 requires -latomic almost always.
 append_list_if(ANDROID atomic ASAN_UNITTEST_INSTRUMENTED_LIBS)
 
-set(ASAN_UNITTEST_NOINST_LINKFLAGS ${ASAN_UNITTEST_COMMON_LINKFLAGS})
+set(ASAN_UNITTEST_NOINST_LINK_FLAGS ${ASAN_UNITTEST_COMMON_LINK_FLAGS})
 if(NOT APPLE)
-  append_list_if(COMPILER_RT_HAS_LIBM -lm ASAN_UNITTEST_NOINST_LINKFLAGS)
-  append_list_if(COMPILER_RT_HAS_LIBDL -ldl ASAN_UNITTEST_NOINST_LINKFLAGS)
-  append_list_if(COMPILER_RT_HAS_LIBRT -lrt ASAN_UNITTEST_NOINST_LINKFLAGS)
-  append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread ASAN_UNITTEST_NOINST_LINKFLAGS)
-  append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINKFLAGS)
+  append_list_if(COMPILER_RT_HAS_LIBM -lm ASAN_UNITTEST_NOINST_LINK_FLAGS)
+  append_list_if(COMPILER_RT_HAS_LIBDL -ldl ASAN_UNITTEST_NOINST_LINK_FLAGS)
+  append_list_if(COMPILER_RT_HAS_LIBRT -lrt ASAN_UNITTEST_NOINST_LINK_FLAGS)
+  append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread ASAN_UNITTEST_NOINST_LINK_FLAGS)
+  append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS)
 endif()
 
 # TODO(eugenis): move all -l flags above to _LIBS?
@@ -143,7 +148,7 @@
 # Link ASan unit test for a given architecture from a set
 # of objects in with given linker flags.
 macro(add_asan_test test_suite test_name arch kind)
-  cmake_parse_arguments(TEST "WITH_TEST_RUNTIME" "" "OBJECTS;LINKFLAGS;SUBDIR" ${ARGN})
+  cmake_parse_arguments(TEST "WITH_TEST_RUNTIME" "" "OBJECTS;LINK_FLAGS;SUBDIR" ${ARGN})
   get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
   set(TEST_DEPS ${TEST_OBJECTS})
   if(NOT COMPILER_RT_STANDALONE_BUILD)
@@ -167,7 +172,7 @@
                        SUBDIR ${TEST_SUBDIR}
                        OBJECTS ${TEST_OBJECTS}
                        DEPS ${TEST_DEPS}
-                       LINK_FLAGS ${TEST_LINKFLAGS}
+                       LINK_FLAGS ${TEST_LINK_FLAGS}
                                   ${TARGET_LINK_FLAGS})
 endmacro()
 
@@ -232,8 +237,8 @@
     endforeach()
     # Clang links the static CRT by default. Override that to use the dynamic
     # CRT.
-    set(ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINKFLAGS
-      ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINKFLAGS}
+    set(ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS
+      ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS}
       -Wl,-nodefaultlib:libcmt,-defaultlib:msvcrt,-defaultlib:oldnames)
   else()
     set(ASAN_INST_DYNAMIC_TEST_OBJECTS ${ASAN_INST_TEST_OBJECTS})
@@ -251,7 +256,7 @@
   add_asan_test(AsanUnitTests "Asan-${arch}${kind}-Test"
                 ${arch} ${kind} SUBDIR "default"
                 OBJECTS ${ASAN_INST_TEST_OBJECTS}
-                LINKFLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS})
+                LINK_FLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS})
   if(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME)
     # Create the 'dynamic' folder where ASAN tests are produced.
     if(CMAKE_CONFIGURATION_TYPES)
@@ -265,7 +270,7 @@
     add_asan_test(AsanDynamicUnitTests "Asan-${arch}${kind}-Dynamic-Test"
                   ${arch} ${kind} SUBDIR "dynamic"
                   OBJECTS ${ASAN_INST_DYNAMIC_TEST_OBJECTS}
-                  LINKFLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINKFLAGS})
+                  LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS})
   endif()
 
   # Add static ASan runtime that will be linked with uninstrumented tests.
@@ -302,7 +307,7 @@
   add_asan_test(AsanUnitTests "Asan-${arch}${kind}-Noinst-Test"
                 ${arch} ${kind} SUBDIR "default"
                 OBJECTS ${ASAN_NOINST_TEST_OBJECTS}
-                LINKFLAGS ${ASAN_UNITTEST_NOINST_LINKFLAGS}
+                LINK_FLAGS ${ASAN_UNITTEST_NOINST_LINK_FLAGS}
                 WITH_TEST_RUNTIME)
 
   # Benchmarks.
@@ -314,7 +319,7 @@
   add_asan_test(AsanBenchmarks "Asan-${arch}${kind}-Benchmark"
                 ${arch} ${kind} SUBDIR "default"
                 OBJECTS ${ASAN_BENCHMARKS_OBJECTS}
-                LINKFLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS})
+                LINK_FLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS})
 endmacro()
 
 if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID)
@@ -342,7 +347,7 @@
       ${COMPILER_RT_GTEST_SOURCE}
       ${ASAN_NOINST_TEST_SOURCES})
     set_target_compile_flags(AsanNoinstTest ${ASAN_UNITTEST_COMMON_CFLAGS})
-    set_target_link_flags(AsanNoinstTest ${ASAN_UNITTEST_NOINST_LINKFLAGS})
+    set_target_link_flags(AsanNoinstTest ${ASAN_UNITTEST_NOINST_LINK_FLAGS})
     target_link_libraries(AsanNoinstTest ${ASAN_UNITTEST_NOINST_LIBS})
 
     # Test with ASan instrumentation. Link with ASan dynamic runtime.
@@ -350,7 +355,7 @@
       ${COMPILER_RT_GTEST_SOURCE}
       ${ASAN_INST_TEST_SOURCES})
     set_target_compile_flags(AsanTest ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS})
-    set_target_link_flags(AsanTest ${ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS})
+    set_target_link_flags(AsanTest ${ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS})
     target_link_libraries(AsanTest ${ASAN_UNITTEST_INSTRUMENTED_LIBS})
 
     # Setup correct output directory and link flags.
diff --git a/lib/asan/tests/asan_asm_test.cc b/lib/asan/tests/asan_asm_test.cc
index 09af5c3..2bb3794 100644
--- a/lib/asan/tests/asan_asm_test.cc
+++ b/lib/asan/tests/asan_asm_test.cc
@@ -57,12 +57,13 @@
   return res;                                      \
 }
 
-#define DECLARE_ASM_REP_MOVS(Type, Movs)                                       \
-  template <> void asm_rep_movs<Type>(Type * dst, Type * src, size_t size) {   \
-    __asm__("rep " Movs " \n\t"                                                \
-            :                                                                  \
-            : "D"(dst), "S"(src), "c"(size)                                    \
-            : "rsi", "rdi", "rcx", "memory");                                  \
+#define DECLARE_ASM_REP_MOVS(Type, Movs)                         \
+  template <>                                                    \
+  void asm_rep_movs<Type>(Type * dst, Type * src, size_t size) { \
+    __asm__("rep " Movs " \n\t"                                  \
+            : "+D"(dst), "+S"(src), "+c"(size)                   \
+            :                                                    \
+            : "memory");                                         \
   }
 
 DECLARE_ASM_WRITE(U8, "8", "movq", "r");
@@ -99,12 +100,13 @@
   return res;                                      \
 }
 
-#define DECLARE_ASM_REP_MOVS(Type, Movs)                                       \
-  template <> void asm_rep_movs<Type>(Type * dst, Type * src, size_t size) {   \
-    __asm__("rep " Movs " \n\t"                                                \
-            :                                                                  \
-            : "D"(dst), "S"(src), "c"(size)                                    \
-            : "esi", "edi", "ecx", "memory");                                  \
+#define DECLARE_ASM_REP_MOVS(Type, Movs)                         \
+  template <>                                                    \
+  void asm_rep_movs<Type>(Type * dst, Type * src, size_t size) { \
+    __asm__("rep " Movs " \n\t"                                  \
+            : "+D"(dst), "+S"(src), "+c"(size)                   \
+            :                                                    \
+            : "memory");                                         \
   }
 
 } // End of anonymous namespace
diff --git a/lib/asan/tests/asan_interface_test.cc b/lib/asan/tests/asan_interface_test.cc
index fd43f17..d13962b 100644
--- a/lib/asan/tests/asan_interface_test.cc
+++ b/lib/asan/tests/asan_interface_test.cc
@@ -11,8 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 #include "asan_test_utils.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
 #include <sanitizer/allocator_interface.h>
 #include <sanitizer/asan_interface.h>
+#include <vector>
 
 TEST(AddressSanitizerInterface, GetEstimatedAllocatedSize) {
   EXPECT_EQ(0U, __sanitizer_get_estimated_allocated_size(0));
@@ -399,7 +401,7 @@
 
 TEST(AddressSanitizerInterface, SetErrorReportCallbackTest) {
   __asan_set_error_report_callback(ErrorReportCallbackOneToZ);
-  EXPECT_DEATH(__asan_report_error(0, 0, 0, 0, true, 1),
+  EXPECT_DEATH(__asan_report_error((void *)GET_CALLER_PC(), 0, 0, 0, true, 1),
                ASAN_PCRE_DOTALL "ABCDEF.*AddressSanitizer.*WRITE.*ABCDEF");
   __asan_set_error_report_callback(NULL);
 }
diff --git a/lib/asan/tests/asan_internal_interface_test.cc b/lib/asan/tests/asan_internal_interface_test.cc
index ae47594..e247bb4 100644
--- a/lib/asan/tests/asan_internal_interface_test.cc
+++ b/lib/asan/tests/asan_internal_interface_test.cc
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 #include "asan_interface_internal.h"
 #include "asan_test_utils.h"
+#include <vector>
 
 TEST(AddressSanitizerInternalInterface, SetShadow) {
   std::vector<char> buffer(17, 0xff);
diff --git a/lib/asan/tests/asan_mem_test.cc b/lib/asan/tests/asan_mem_test.cc
index 4a941fa..c320886 100644
--- a/lib/asan/tests/asan_mem_test.cc
+++ b/lib/asan/tests/asan_mem_test.cc
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "asan_test_utils.h"
+#include <vector>
 
 template<typename T>
 void MemSetOOBTestTemplate(size_t length) {
@@ -76,7 +77,7 @@
 // Strictly speaking we are not guaranteed to find such two pointers,
 // but given the structure of asan's allocator we will.
 static bool AllocateTwoAdjacentArrays(char **x1, char **x2, size_t size) {
-  vector<uintptr_t> v;
+  std::vector<uintptr_t> v;
   bool res = false;
   for (size_t i = 0; i < 1000U && !res; i++) {
     v.push_back(reinterpret_cast<uintptr_t>(new char[size]));
diff --git a/lib/asan/tests/asan_noinst_test.cc b/lib/asan/tests/asan_noinst_test.cc
index 90c6f40..b3a235e 100644
--- a/lib/asan/tests/asan_noinst_test.cc
+++ b/lib/asan/tests/asan_noinst_test.cc
@@ -97,6 +97,9 @@
   MallocStress(ASAN_LOW_MEMORY ? 300000 : 1000000);
 }
 
+#ifndef __powerpc64__
+// FIXME: This has not reliably worked on powerpc since r279664.  Re-enable
+// this once the problem is tracked down and fixed.
 TEST(AddressSanitizer, ThreadedMallocStressTest) {
   const int kNumThreads = 4;
   const int kNumIterations = (ASAN_LOW_MEMORY) ? 10000 : 100000;
@@ -109,6 +112,7 @@
     PTHREAD_JOIN(t[i], 0);
   }
 }
+#endif
 
 static void PrintShadow(const char *tag, uptr ptr, size_t size) {
   fprintf(stderr, "%s shadow: %lx size % 3ld: ", tag, (long)ptr, (long)size);
@@ -170,6 +174,12 @@
 // Check that the thread local allocators are flushed when threads are
 // destroyed.
 TEST(AddressSanitizer, ThreadedQuarantineTest) {
+  // Run the routine once to warm up ASAN internal structures to get more
+  // predictable incremental memory changes.
+  pthread_t t;
+  PTHREAD_CREATE(&t, NULL, ThreadedQuarantineTestWorker, 0);
+  PTHREAD_JOIN(t, 0);
+
   const int n_threads = 3000;
   size_t mmaped1 = __sanitizer_get_heap_size();
   for (int i = 0; i < n_threads; i++) {
@@ -177,6 +187,7 @@
     PTHREAD_CREATE(&t, NULL, ThreadedQuarantineTestWorker, 0);
     PTHREAD_JOIN(t, 0);
     size_t mmaped2 = __sanitizer_get_heap_size();
+    // Figure out why this much memory is required.
     EXPECT_LT(mmaped2 - mmaped1, 320U * (1 << 20));
   }
 }
@@ -199,6 +210,10 @@
   return NULL;
 }
 
+#ifndef __powerpc64__
+// FIXME: This has not reliably worked on powerpc since r279664.  Re-enable
+// this once the problem is tracked down and fixed.
+
 TEST(AddressSanitizer, ThreadedOneSizeMallocStressTest) {
   const int kNumThreads = 4;
   pthread_t t[kNumThreads];
@@ -209,6 +224,7 @@
     PTHREAD_JOIN(t[i], 0);
   }
 }
+#endif
 
 TEST(AddressSanitizer, ShadowRegionIsPoisonedTest) {
   using __asan::kHighMemEnd;
diff --git a/lib/asan/tests/asan_test.cc b/lib/asan/tests/asan_test.cc
index 424a79e..7bc230a 100644
--- a/lib/asan/tests/asan_test.cc
+++ b/lib/asan/tests/asan_test.cc
@@ -945,7 +945,7 @@
   char *addr = (char*)0x0000100000080000;
 # endif
 #endif
-  EXPECT_DEATH(*addr = 1, "AddressSanitizer: SEGV on unknown");
+  EXPECT_DEATH(*addr = 1, "AddressSanitizer: BUS on unknown");
 }
 #endif  // ASAN_NEEDS_SEGV
 
diff --git a/lib/asan/tests/asan_test_config.h b/lib/asan/tests/asan_test_config.h
index 92f2763..8493f41 100644
--- a/lib/asan/tests/asan_test_config.h
+++ b/lib/asan/tests/asan_test_config.h
@@ -17,13 +17,9 @@
 #ifndef ASAN_TEST_CONFIG_H
 #define ASAN_TEST_CONFIG_H
 
-#include <vector>
 #include <string>
-#include <map>
 
 using std::string;
-using std::vector;
-using std::map;
 
 #ifndef ASAN_UAR
 # error "please define ASAN_UAR"
diff --git a/lib/asan/tests/asan_test_utils.h b/lib/asan/tests/asan_test_utils.h
index 03d17cf..f16d939 100644
--- a/lib/asan/tests/asan_test_utils.h
+++ b/lib/asan/tests/asan_test_utils.h
@@ -62,7 +62,9 @@
 
 static const int kPageSize = 4096;
 
-const size_t kLargeMalloc = 1 << 24;
+// Big enough to be handled by secondary allocator and small enough to fit into
+// quarantine for all configurations.
+const size_t kLargeMalloc = 1 << 22;
 
 extern void free_aaa(void *p);
 extern void *malloc_aaa(size_t size);
diff --git a/lib/asan/weak_symbols.txt b/lib/asan/weak_symbols.txt
new file mode 100644
index 0000000..ba7b027
--- /dev/null
+++ b/lib/asan/weak_symbols.txt
@@ -0,0 +1,3 @@
+___asan_default_options
+___asan_default_suppressions
+___asan_on_error
diff --git a/lib/builtins/CMakeLists.txt b/lib/builtins/CMakeLists.txt
index b33786a..ad9059c 100644
--- a/lib/builtins/CMakeLists.txt
+++ b/lib/builtins/CMakeLists.txt
@@ -132,6 +132,7 @@
   negvdi2.c
   negvsi2.c
   negvti2.c
+  os_version_check.c
   paritydi2.c
   paritysi2.c
   parityti2.c
@@ -302,6 +303,13 @@
   arm/umodsi3.S
   ${GENERIC_SOURCES})
 
+set(thumb1_SOURCES
+  arm/divsi3.S
+  arm/udivsi3.S
+  arm/comparesf2.S
+  arm/addsf3.S
+  ${GENERIC_SOURCES})
+
 set(arm_EABI_SOURCES
   arm/aeabi_cdcmp.S
   arm/aeabi_cdcmpeq_check_nan.c
@@ -320,6 +328,7 @@
   arm/aeabi_memset.S
   arm/aeabi_uidivmod.S
   arm/aeabi_uldivmod.S)
+
 set(arm_Thumb1_JT_SOURCES
   arm/switch16.S
   arm/switch32.S
@@ -401,6 +410,10 @@
     ${arm_SOURCES}
     ${arm_EABI_SOURCES}
     ${arm_Thumb1_SOURCES})
+
+  set(thumb1_SOURCES
+    ${thumb1_SOURCES}
+    ${arm_EABI_SOURCES})
 endif()
 
 set(aarch64_SOURCES
@@ -415,8 +428,10 @@
   fixunstfti.c
   floatditf.c
   floatsitf.c
+  floattitf.c
   floatunditf.c
   floatunsitf.c
+  floatuntitf.c
   multc3.c
   trunctfdf2.c
   trunctfsf2.c
@@ -429,7 +444,7 @@
 set(arm64_SOURCES ${aarch64_SOURCES})
 
 # macho_embedded archs
-set(armv6m_SOURCES ${GENERIC_SOURCES})
+set(armv6m_SOURCES ${thumb1_SOURCES})
 set(armv7m_SOURCES ${arm_SOURCES})
 set(armv7em_SOURCES ${arm_SOURCES})
 
diff --git a/lib/builtins/arm/adddf3vfp.S b/lib/builtins/arm/adddf3vfp.S
index f4c00a0..8e476ca 100644
--- a/lib/builtins/arm/adddf3vfp.S
+++ b/lib/builtins/arm/adddf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__adddf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vadd.f64 d0, d0, d1
+#else
 	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
 	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
 	vadd.f64 d6, d6, d7		
 	vmov	r0, r1, d6		// move result back to r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__adddf3vfp)
 
diff --git a/lib/builtins/arm/addsf3.S b/lib/builtins/arm/addsf3.S
new file mode 100644
index 0000000..362b5c1
--- /dev/null
+++ b/lib/builtins/arm/addsf3.S
@@ -0,0 +1,277 @@
+/*===-- addsf3.S - Adds two single precision floating pointer numbers-----===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __addsf3 (single precision floating pointer number
+ * addition with the IEEE-754 default rounding (to nearest, ties to even)
+ * function for the ARM Thumb1 ISA.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+#define significandBits 23
+#define typeWidth 32
+
+	.syntax unified
+	.text
+  .thumb
+  .p2align 2
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3)
+
+DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3)
+  push {r4, r5, r6, r7, lr}
+  // Get the absolute value of a and b.
+  lsls r2, r0, #1
+  lsls r3, r1, #1
+  lsrs r2, r2, #1  /* aAbs */
+  beq  LOCAL_LABEL(a_zero_nan_inf)
+  lsrs r3, r3, #1  /* bAbs */
+  beq  LOCAL_LABEL(zero_nan_inf)
+
+  // Detect if a or b is infinity or Nan.
+  lsrs r6, r2, #(significandBits)
+  lsrs r7, r3, #(significandBits)
+  cmp  r6, #0xFF
+  beq  LOCAL_LABEL(zero_nan_inf)
+  cmp  r7, #0xFF
+  beq  LOCAL_LABEL(zero_nan_inf)
+
+  // Swap Rep and Abs so that a and aAbs has the larger absolute value.
+  cmp r2, r3
+  bhs LOCAL_LABEL(no_swap)
+  movs r4, r0
+  movs r5, r2
+  movs r0, r1
+  movs r2, r3
+  movs r1, r4
+  movs r3, r5
+LOCAL_LABEL(no_swap):
+
+  // Get the significands and shift them to give us round, guard and sticky.
+  lsls r4, r0, #(typeWidth - significandBits)
+  lsrs r4, r4, #(typeWidth - significandBits - 3) /* aSignificand << 3 */
+  lsls r5, r1, #(typeWidth - significandBits)
+  lsrs r5, r5, #(typeWidth - significandBits - 3) /* bSignificand << 3 */
+
+  // Get the implicitBit.
+  movs r6, #1
+  lsls r6, r6, #(significandBits + 3)
+
+  // Get aExponent and set implicit bit if necessary.
+  lsrs r2, r2, #(significandBits)
+  beq LOCAL_LABEL(a_done_implicit_bit)
+  orrs r4, r6
+LOCAL_LABEL(a_done_implicit_bit):
+
+  // Get bExponent and set implicit bit if necessary.
+  lsrs r3, r3, #(significandBits)
+  beq LOCAL_LABEL(b_done_implicit_bit)
+  orrs r5, r6
+LOCAL_LABEL(b_done_implicit_bit):
+
+  // Get the difference in exponents.
+  subs r6, r2, r3
+  beq LOCAL_LABEL(done_align)
+
+  // If b is denormal, then a must be normal as align > 0, and we only need to
+  // right shift bSignificand by (align - 1) bits.
+  cmp  r3, #0
+  bne  1f
+  subs r6, r6, #1
+1:
+
+  // No longer needs bExponent. r3 is dead here.
+  // Set sticky bits of b: sticky = bSignificand << (typeWidth - align).
+  movs r3, #(typeWidth)
+  subs r3, r3, r6
+  movs r7, r5
+  lsls r7, r3
+  beq 1f
+  movs r7, #1
+1:
+
+  // bSignificand = bSignificand >> align | sticky;
+  lsrs r5, r6
+  orrs r5, r7
+  bne LOCAL_LABEL(done_align)
+  movs r5, #1 //  sticky; b is known to be non-zero.
+
+LOCAL_LABEL(done_align):
+  // isSubtraction = (aRep ^ bRep) >> 31;
+  movs r7, r0
+  eors r7, r1
+  lsrs r7, #31
+  bne LOCAL_LABEL(do_substraction)
+
+  // Same sign, do Addition.
+
+  // aSignificand += bSignificand;
+  adds r4, r4, r5
+
+  // Check carry bit.
+  movs r6, #1
+  lsls r6, r6, #(significandBits + 3 + 1)
+  movs r7, r4
+  ands r7, r6
+  beq LOCAL_LABEL(form_result)
+  // If the addition carried up, we need to right-shift the result and
+  // adjust the exponent.
+  movs r7, r4
+  movs r6, #1
+  ands r7, r6 // sticky = aSignificand & 1;
+  lsrs r4, #1
+  orrs r4, r7  // result Significand
+  adds r2, #1  // result Exponent
+  // If we have overflowed the type, return +/- infinity.
+  cmp  r2, 0xFF
+  beq  LOCAL_LABEL(ret_inf)
+
+LOCAL_LABEL(form_result):
+  // Shift the sign, exponent and significand into place.
+  lsrs r0, #(typeWidth - 1)
+  lsls r0, #(typeWidth - 1) // Get Sign.
+  lsls r2, #(significandBits)
+  orrs r0, r2
+  movs r1, r4
+  lsls r4, #(typeWidth - significandBits - 3)
+  lsrs r4, #(typeWidth - significandBits)
+  orrs r0, r4
+
+  // Final rounding.  The result may overflow to infinity, but that is the
+  // correct result in that case.
+  // roundGuardSticky = aSignificand & 0x7;
+  movs r2, #0x7
+  ands r1, r2
+  // if (roundGuardSticky > 0x4) result++;
+
+  cmp r1, #0x4
+  blt LOCAL_LABEL(done_round)
+  beq 1f
+  adds r0, #1
+  pop {r4, r5, r6, r7, pc}
+1:
+
+  // if (roundGuardSticky == 0x4) result += result & 1;
+  movs r1, r0
+  lsrs r1, #1
+  bcc  LOCAL_LABEL(done_round)
+  adds r0, r0, #1
+LOCAL_LABEL(done_round):
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(do_substraction):
+  subs r4, r4, r5 // aSignificand -= bSignificand;
+  beq  LOCAL_LABEL(ret_zero)
+  movs r6, r4
+  cmp  r2, 0
+  beq  LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize.
+  // If partial cancellation occured, we need to left-shift the result
+  // and adjust the exponent:
+  lsrs r6, r6, #(significandBits + 3)
+  bne LOCAL_LABEL(form_result)
+
+  push {r0, r1, r2, r3}
+  movs r0, r4
+  bl   __clzsi2
+  movs r5, r0
+  pop {r0, r1, r2, r3}
+  // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
+  subs r5, r5, #(typeWidth - significandBits - 3 - 1)
+  // aSignificand <<= shift; aExponent -= shift;
+  lsls r4, r5
+  subs  r2, r2, r5
+  bgt LOCAL_LABEL(form_result)
+
+  // Do normalization if aExponent <= 0.
+  movs r6, #1
+  subs r6, r6, r2 // 1 - aExponent;
+  movs r2, #0 // aExponent = 0;
+  movs r3, #(typeWidth) // bExponent is dead.
+  subs r3, r3, r6
+  movs r7, r4
+  lsls r7, r3  // stickyBit = (bool)(aSignificant << (typeWidth - align))
+  beq 1f
+  movs r7, #1
+1:
+  lsrs r4, r6 /* aSignificand >> shift */
+  orrs r4, r7
+  b LOCAL_LABEL(form_result)
+
+LOCAL_LABEL(ret_zero):
+  movs r0, #0
+  pop {r4, r5, r6, r7, pc}
+
+
+LOCAL_LABEL(a_zero_nan_inf):
+  lsrs r3, r3, #1
+
+LOCAL_LABEL(zero_nan_inf):
+  // Here  r2 has aAbs, r3 has bAbs
+  movs r4, #0xFF
+  lsls r4, r4, #(significandBits) // Make +inf.
+
+  cmp r2, r4
+  bhi LOCAL_LABEL(a_is_nan)
+  cmp r3, r4
+  bhi LOCAL_LABEL(b_is_nan)
+
+  cmp r2, r4
+  bne LOCAL_LABEL(a_is_rational)
+  // aAbs is INF.
+  eors r1, r0 // aRep ^ bRep.
+  movs r6, #1
+  lsls r6, r6, #(typeWidth - 1) // get sign mask.
+  cmp r1, r6 // if they only differ on sign bit, it's -INF + INF
+  beq LOCAL_LABEL(a_is_nan)
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(a_is_rational):
+  cmp r3, r4
+  bne LOCAL_LABEL(b_is_rational)
+  movs r0, r1
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(b_is_rational):
+  // either a or b or both are zero.
+  adds r4, r2, r3
+  beq  LOCAL_LABEL(both_zero)
+  cmp r2, #0 // is absA 0 ?
+  beq LOCAL_LABEL(ret_b)
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(both_zero):
+  ands r0, r1 // +0 + -0 = +0
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(ret_b):
+  movs r0, r1
+
+LOCAL_LABEL(ret):
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(b_is_nan):
+  movs r0, r1
+LOCAL_LABEL(a_is_nan):
+  movs r1, #1
+  lsls r1, r1, #(significandBits -1) // r1 is quiet bit.
+  orrs r0, r1
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(ret_inf):
+  movs r4, #0xFF
+  lsls r4, r4, #(significandBits)
+  orrs r0, r4
+  lsrs r0, r0, #(significandBits)
+  lsls r0, r0, #(significandBits)
+  pop {r4, r5, r6, r7, pc}
+
+
+END_COMPILERRT_FUNCTION(__addsf3)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/builtins/arm/addsf3vfp.S b/lib/builtins/arm/addsf3vfp.S
index af40c1c..8871efd 100644
--- a/lib/builtins/arm/addsf3vfp.S
+++ b/lib/builtins/arm/addsf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__addsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vadd.f32 s0, s0, s1
+#else
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
 	vadd.f32 s14, s14, s15
 	vmov	r0, s14		// move result back to r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__addsf3vfp)
 
diff --git a/lib/builtins/arm/aeabi_cdcmp.S b/lib/builtins/arm/aeabi_cdcmp.S
index 8008f5f..b67814d 100644
--- a/lib/builtins/arm/aeabi_cdcmp.S
+++ b/lib/builtins/arm/aeabi_cdcmp.S
@@ -30,6 +30,19 @@
         push {r0-r3, lr}
         bl __aeabi_cdcmpeq_check_nan
         cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+        beq 1f
+        // NaN has been ruled out, so __aeabi_cdcmple can't trap
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_cdcmple
+        pop {r0-r3, pc}
+1:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
         pop {r0-r3, lr}
 
         // NaN has been ruled out, so __aeabi_cdcmple can't trap
@@ -37,6 +50,7 @@
 
         msr CPSR_f, #APSR_C
         JMP(lr)
+#endif
 END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
 
 
@@ -59,6 +73,28 @@
 
         bl __aeabi_dcmplt
         cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+        bne 1f
+        // Z = 0, C = 0
+        movs r0, #1
+        lsls r0, r0, #1
+        pop {r0-r3, pc}
+1:
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_dcmpeq
+        cmp r0, #1
+        bne 2f
+        // Z = 1, C = 1
+        movs r0, #2
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+2:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
         moveq ip, #0
         beq 1f
 
@@ -72,6 +108,7 @@
         msr CPSR_f, ip
         pop {r0-r3}
         POP_PC()
+#endif
 END_COMPILERRT_FUNCTION(__aeabi_cdcmple)
 
 // int __aeabi_cdrcmple(double a, double b) {
diff --git a/lib/builtins/arm/aeabi_cfcmp.S b/lib/builtins/arm/aeabi_cfcmp.S
index 274baf7..e37aa3d 100644
--- a/lib/builtins/arm/aeabi_cfcmp.S
+++ b/lib/builtins/arm/aeabi_cfcmp.S
@@ -30,6 +30,19 @@
         push {r0-r3, lr}
         bl __aeabi_cfcmpeq_check_nan
         cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+        beq 1f
+        // NaN has been ruled out, so __aeabi_cfcmple can't trap
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_cfcmple
+        pop {r0-r3, pc}
+1:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
         pop {r0-r3, lr}
 
         // NaN has been ruled out, so __aeabi_cfcmple can't trap
@@ -37,6 +50,7 @@
 
         msr CPSR_f, #APSR_C
         JMP(lr)
+#endif
 END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
 
 
@@ -59,6 +73,28 @@
 
         bl __aeabi_fcmplt
         cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+        bne 1f
+        // Z = 0, C = 0
+        movs r0, #1
+        lsls r0, r0, #1
+        pop {r0-r3, pc}
+1:
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_fcmpeq
+        cmp r0, #1
+        bne 2f
+        // Z = 1, C = 1
+        movs r0, #2
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+2:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
         moveq ip, #0
         beq 1f
 
@@ -72,6 +108,7 @@
         msr CPSR_f, ip
         pop {r0-r3}
         POP_PC()
+#endif
 END_COMPILERRT_FUNCTION(__aeabi_cfcmple)
 
 // int __aeabi_cfrcmple(float a, float b) {
diff --git a/lib/builtins/arm/aeabi_dcmp.S b/lib/builtins/arm/aeabi_dcmp.S
index 43e4392..51539c0 100644
--- a/lib/builtins/arm/aeabi_dcmp.S
+++ b/lib/builtins/arm/aeabi_dcmp.S
@@ -26,10 +26,10 @@
         bl        SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
         cmp       r0, #0                         SEPARATOR \
         b ## cond 1f                             SEPARATOR \
-        mov       r0, #0                         SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
         pop       { r4, pc }                     SEPARATOR \
 1:                                               SEPARATOR \
-        mov       r0, #1                         SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
         pop       { r4, pc }                     SEPARATOR \
 END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)
 
diff --git a/lib/builtins/arm/aeabi_fcmp.S b/lib/builtins/arm/aeabi_fcmp.S
index 0a1d92a..8e7774b 100644
--- a/lib/builtins/arm/aeabi_fcmp.S
+++ b/lib/builtins/arm/aeabi_fcmp.S
@@ -26,10 +26,10 @@
         bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
         cmp       r0, #0                         SEPARATOR \
         b ## cond 1f                             SEPARATOR \
-        mov       r0, #0                         SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
         pop       { r4, pc }                     SEPARATOR \
 1:                                               SEPARATOR \
-        mov       r0, #1                         SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
         pop       { r4, pc }                     SEPARATOR \
 END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
 
diff --git a/lib/builtins/arm/aeabi_idivmod.S b/lib/builtins/arm/aeabi_idivmod.S
index b43ea69..0164b15 100644
--- a/lib/builtins/arm/aeabi_idivmod.S
+++ b/lib/builtins/arm/aeabi_idivmod.S
@@ -26,7 +26,7 @@
         push    {r0, r1, lr}
         bl      SYMBOL_NAME(__divsi3)
         pop     {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom
-        muls    r2, r2, r0   // r2 = quot * denom
+        muls    r2, r0, r2   // r2 = quot * denom
         subs    r1, r1, r2
         JMP     (r3)
 #else
diff --git a/lib/builtins/arm/aeabi_ldivmod.S b/lib/builtins/arm/aeabi_ldivmod.S
index 3dae14e..038ae5d 100644
--- a/lib/builtins/arm/aeabi_ldivmod.S
+++ b/lib/builtins/arm/aeabi_ldivmod.S
@@ -23,23 +23,23 @@
         .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
-        push    {r11, lr}
+        push    {r6, lr}
         sub     sp, sp, #16
-        add     r12, sp, #8
-        str     r12, [sp]
+        add     r6, sp, #8
+        str     r6, [sp]
 #if defined(__MINGW32__)
-        mov     r12, r0
-        mov     r0, r2
-        mov     r2, r12
-        mov     r12, r1
-        mov     r1, r3
-        mov     r3, r12
+        movs    r6, r0
+        movs    r0, r2
+        movs    r2, r6
+        movs    r6, r1
+        movs    r1, r3
+        movs    r3, r6
 #endif
         bl      SYMBOL_NAME(__divmoddi4)
         ldr     r2, [sp, #8]
         ldr     r3, [sp, #12]
         add     sp, sp, #16
-        pop     {r11, pc}
+        pop     {r6, pc}
 END_COMPILERRT_FUNCTION(__aeabi_ldivmod)
 
 NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/builtins/arm/aeabi_memset.S b/lib/builtins/arm/aeabi_memset.S
index 48edd89..633f592 100644
--- a/lib/builtins/arm/aeabi_memset.S
+++ b/lib/builtins/arm/aeabi_memset.S
@@ -26,7 +26,7 @@
 
 DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
         mov     r2, r1
-        mov     r1, #0
+        movs    r1, #0
         b       memset
 END_COMPILERRT_FUNCTION(__aeabi_memclr)
 
diff --git a/lib/builtins/arm/aeabi_uidivmod.S b/lib/builtins/arm/aeabi_uidivmod.S
index 7098bc6..a627fc7 100644
--- a/lib/builtins/arm/aeabi_uidivmod.S
+++ b/lib/builtins/arm/aeabi_uidivmod.S
@@ -29,7 +29,7 @@
         push    {r0, r1, lr}
         bl      SYMBOL_NAME(__aeabi_uidiv)
         pop     {r1, r2, r3}
-        muls    r2, r2, r0 // r2 = quot * denom
+        muls    r2, r0, r2 // r2 = quot * denom
         subs    r1, r1, r2
         JMP     (r3)
 LOCAL_LABEL(case_denom_larger):
diff --git a/lib/builtins/arm/aeabi_uldivmod.S b/lib/builtins/arm/aeabi_uldivmod.S
index bc26e56..be343b6 100644
--- a/lib/builtins/arm/aeabi_uldivmod.S
+++ b/lib/builtins/arm/aeabi_uldivmod.S
@@ -23,23 +23,23 @@
         .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
-        push	{r11, lr}
+        push	{r6, lr}
         sub	sp, sp, #16
-        add	r12, sp, #8
-        str	r12, [sp]
+        add	r6, sp, #8
+        str	r6, [sp]
 #if defined(__MINGW32__)
-        mov     r12, r0
-        mov     r0, r2
-        mov     r2, r12
-        mov     r12, r1
-        mov     r1, r3
-        mov     r3, r12
+        movs    r6, r0
+        movs    r0, r2
+        movs    r2, r6
+        movs    r6, r1
+        movs    r1, r3
+        movs    r3, r6
 #endif
         bl	SYMBOL_NAME(__udivmoddi4)
         ldr	r2, [sp, #8]
         ldr	r3, [sp, #12]
         add	sp, sp, #16
-        pop	{r11, pc}
+        pop	{r6, pc}
 END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
 
 NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/builtins/arm/comparesf2.S b/lib/builtins/arm/comparesf2.S
index 6d70195..ef7091b 100644
--- a/lib/builtins/arm/comparesf2.S
+++ b/lib/builtins/arm/comparesf2.S
@@ -43,31 +43,60 @@
 .thumb
 #endif
 
-.p2align 2
+@ int __eqsf2(float a, float b)
+
+    .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__eqsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov r0, s0
+    vmov r1, s1
+#endif
     // Make copies of a and b with the sign bit shifted off the top.  These will
     // be used to detect zeros and NaNs.
+#if __ARM_ARCH_ISA_THUMB == 1
+    push    {r6, lr}
+    lsls    r2,         r0, #1
+    lsls    r3,         r1, #1
+#else
     mov     r2,         r0, lsl #1
     mov     r3,         r1, lsl #1
+#endif
 
     // We do the comparison in three stages (ignoring NaN values for the time
     // being).  First, we orr the absolute values of a and b; this sets the Z
     // flag if both a and b are zero (of either sign).  The shift of r3 doesn't
     // effect this at all, but it *does* make sure that the C flag is clear for
     // the subsequent operations.
+#if __ARM_ARCH_ISA_THUMB == 1
+    lsrs    r6,     r3, #1
+    orrs    r6,     r2
+#else
     orrs    r12,    r2, r3, lsr #1
-
+#endif
     // Next, we check if a and b have the same or different signs.  If they have
     // opposite signs, this eor will set the N flag.
+#if __ARM_ARCH_ISA_THUMB == 1
+    beq     1f
+    movs    r6,     r0
+    eors    r6,     r1
+1:
+#else
     it ne
     eorsne  r12,    r0, r1
+#endif
 
     // If a and b are equal (either both zeros or bit identical; again, we're
     // ignoring NaNs for now), this subtract will zero out r0.  If they have the
     // same sign, the flags are updated as they would be for a comparison of the
     // absolute values of a and b.
+#if __ARM_ARCH_ISA_THUMB == 1
+    bmi     1f
+    subs    r0,     r2, r3
+1:
+#else
     it pl
     subspl  r0,     r2, r3
+#endif
 
     // If a is smaller in magnitude than b and both have the same sign, place
     // the negation of the sign of b in r0.  Thus, if both are negative and
@@ -79,41 +108,126 @@
     // still clear from the shift argument in orrs; if a is positive and b
     // negative, this places 0 in r0; if a is negative and b positive, -1 is
     // placed in r0.
+#if __ARM_ARCH_ISA_THUMB == 1
+    bhs     1f
+    // Here if a and b have the same sign and absA < absB, the result is thus
+    // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan).
+    movs    r0,         #1
+    lsrs    r1,         #31
+    bne     LOCAL_LABEL(CHECK_NAN)
+    negs    r0,         r0
+    b       LOCAL_LABEL(CHECK_NAN)
+1:
+#else
     it lo
     mvnlo   r0,         r1, asr #31
+#endif
 
     // If a is greater in magnitude than b and both have the same sign, place
     // the sign of b in r0.  Thus, if both are negative and a < b, -1 is placed
     // in r0, which is the desired result.  Conversely, if both are positive
     // and a > b, zero is placed in r0.
+#if __ARM_ARCH_ISA_THUMB == 1
+    bls     1f
+    // Here both have the same sign and absA > absB.
+    movs    r0,         #1
+    lsrs    r1,         #31
+    beq     LOCAL_LABEL(CHECK_NAN)
+    negs    r0, r0
+1:
+#else
     it hi
     movhi   r0,         r1, asr #31
+#endif
 
     // If you've been keeping track, at this point r0 contains -1 if a < b and
     // 0 if a >= b.  All that remains to be done is to set it to 1 if a > b.
     // If a == b, then the Z flag is set, so we can get the correct final value
     // into r0 by simply or'ing with 1 if Z is clear.
+    // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b.
+#if __ARM_ARCH_ISA_THUMB != 1
     it ne
     orrne   r0,     r0, #1
+#endif
 
     // Finally, we need to deal with NaNs.  If either argument is NaN, replace
     // the value in r0 with 1.
+#if __ARM_ARCH_ISA_THUMB == 1
+LOCAL_LABEL(CHECK_NAN):
+    movs    r6,         #0xff
+    lsls    r6,         #24
+    cmp     r2,         r6
+    bhi     1f
+    cmp     r3,         r6
+1:
+    bls     2f
+    movs    r0,         #1
+2:
+    pop     {r6, pc}
+#else
     cmp     r2,         #0xff000000
     ite ls
     cmpls   r3,         #0xff000000
     movhi   r0,         #1
     JMP(lr)
+#endif
 END_COMPILERRT_FUNCTION(__eqsf2)
+
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2)
 
-.p2align 2
+@ int __gtsf2(float a, float b)
+
+    .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gtsf2)
     // Identical to the preceding except in that we return -1 for NaN values.
-    // Given that the two paths share so much code, one might be tempted to 
+    // Given that the two paths share so much code, one might be tempted to
     // unify them; however, the extra code needed to do so makes the code size
     // to performance tradeoff very hard to justify for such small functions.
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov r0, s0
+    vmov r1, s1
+#endif
+#if __ARM_ARCH_ISA_THUMB == 1
+    push    {r6, lr}
+    lsls    r2,        r0, #1
+    lsls    r3,        r1, #1
+    lsrs    r6,        r3, #1
+    orrs    r6,        r2
+    beq     1f
+    movs    r6,        r0
+    eors    r6,        r1
+1:
+    bmi     2f
+    subs    r0,        r2, r3
+2:
+    bhs     3f
+    movs    r0,        #1
+    lsrs    r1,        #31
+    bne     LOCAL_LABEL(CHECK_NAN_2)
+    negs    r0, r0
+    b       LOCAL_LABEL(CHECK_NAN_2)
+3:
+    bls     4f
+    movs    r0,         #1
+    lsrs    r1,         #31
+    beq     LOCAL_LABEL(CHECK_NAN_2)
+    negs    r0, r0
+4:
+LOCAL_LABEL(CHECK_NAN_2):
+    movs    r6,         #0xff
+    lsls    r6,         #24
+    cmp     r2,         r6
+    bhi     5f
+    cmp     r3,         r6
+5:
+    bls     6f
+    movs    r0,         #1
+    negs    r0,         r0
+6:
+    pop     {r6, pc}
+#else
     mov     r2,         r0, lsl #1
     mov     r3,         r1, lsl #1
     orrs    r12,    r2, r3, lsr #1
@@ -132,23 +246,51 @@
     cmpls   r3,         #0xff000000
     movhi   r0,         #-1
     JMP(lr)
+#endif
 END_COMPILERRT_FUNCTION(__gtsf2)
+
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)
 
-.p2align 2
+@ int __unordsf2(float a, float b)
+
+    .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unordsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov    r0,         s0
+    vmov    r1,         s1
+#endif
     // Return 1 for NaN values, 0 otherwise.
-    mov     r2,         r0, lsl #1
-    mov     r3,         r1, lsl #1
-    mov     r0,         #0
+    lsls    r2,         r0, #1
+    lsls    r3,         r1, #1
+    movs    r0,         #0
+#if __ARM_ARCH_ISA_THUMB == 1
+    movs    r1,         #0xff
+    lsls    r1,         #24
+    cmp     r2,         r1
+    bhi     1f
+    cmp     r3,         r1
+1:
+    bls     2f
+    movs    r0,         #1
+2:
+#else
     cmp     r2,         #0xff000000
     ite ls
     cmpls   r3,         #0xff000000
     movhi   r0,         #1
+#endif
     JMP(lr)
 END_COMPILERRT_FUNCTION(__unordsf2)
 
+#if defined(COMPILER_RT_ARMHF_TARGET)
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+	vmov s0, r0
+	vmov s1, r1
+	b SYMBOL_NAME(__unordsf2)
+END_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+#else
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2)
+#endif
 
 NO_EXEC_STACK_DIRECTIVE
 
diff --git a/lib/builtins/arm/divdf3vfp.S b/lib/builtins/arm/divdf3vfp.S
index 928f538..776ba4f 100644
--- a/lib/builtins/arm/divdf3vfp.S
+++ b/lib/builtins/arm/divdf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__divdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vdiv.f64 d0, d0, d1
+#else
 	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
 	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
-	vdiv.f64 d5, d6, d7		
+	vdiv.f64 d5, d6, d7
 	vmov	r0, r1, d5		// move result back to r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__divdf3vfp)
 
diff --git a/lib/builtins/arm/divsf3vfp.S b/lib/builtins/arm/divsf3vfp.S
index a2e297f..130318f 100644
--- a/lib/builtins/arm/divsf3vfp.S
+++ b/lib/builtins/arm/divsf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__divsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vdiv.f32 s0, s0, s1
+#else
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
 	vdiv.f32 s13, s14, s15
 	vmov	r0, s13		// move result back to r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__divsf3vfp)
 
diff --git a/lib/builtins/arm/eqdf2vfp.S b/lib/builtins/arm/eqdf2vfp.S
index 95e6bb3..8fa0b2d 100644
--- a/lib/builtins/arm/eqdf2vfp.S
+++ b/lib/builtins/arm/eqdf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov	d6, r0, r1	// load r0/r1 pair in double register
 	vmov	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7		
+#endif
 	vmrs	apsr_nzcv, fpscr
 	moveq	r0, #1		// set result register to 1 if equal
 	movne	r0, #0
diff --git a/lib/builtins/arm/eqsf2vfp.S b/lib/builtins/arm/eqsf2vfp.S
index fbac139..3776bf4 100644
--- a/lib/builtins/arm/eqsf2vfp.S
+++ b/lib/builtins/arm/eqsf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	moveq	r0, #1      // set result register to 1 if equal
 	movne	r0, #0
diff --git a/lib/builtins/arm/extendsfdf2vfp.S b/lib/builtins/arm/extendsfdf2vfp.S
index 563bf92..1079f97 100644
--- a/lib/builtins/arm/extendsfdf2vfp.S
+++ b/lib/builtins/arm/extendsfdf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.f64.f32 d0, s0
+#else
 	vmov	s15, r0      // load float register from R0
 	vcvt.f64.f32 d7, s15 // convert single to double
 	vmov	r0, r1, d7   // return result in r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__extendsfdf2vfp)
 
diff --git a/lib/builtins/arm/fixdfsivfp.S b/lib/builtins/arm/fixdfsivfp.S
index 8263ff9..5d7b0f8 100644
--- a/lib/builtins/arm/fixdfsivfp.S
+++ b/lib/builtins/arm/fixdfsivfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.s32.f64 s0, d0
+	vmov r0, s0
+#else
 	vmov	d7, r0, r1    // load double register from R0/R1
 	vcvt.s32.f64 s15, d7  // convert double to 32-bit int into s15
 	vmov	r0, s15	      // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixdfsivfp)
 
diff --git a/lib/builtins/arm/fixsfsivfp.S b/lib/builtins/arm/fixsfsivfp.S
index c7c3b81..805a277 100644
--- a/lib/builtins/arm/fixsfsivfp.S
+++ b/lib/builtins/arm/fixsfsivfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.s32.f32 s0, s0
+	vmov r0, s0
+#else
 	vmov	s15, r0        // load float register from R0
 	vcvt.s32.f32 s15, s15  // convert single to 32-bit int into s15
 	vmov	r0, s15	       // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixsfsivfp)
 
diff --git a/lib/builtins/arm/fixunsdfsivfp.S b/lib/builtins/arm/fixunsdfsivfp.S
index 9cc1e62..4f1b2c8 100644
--- a/lib/builtins/arm/fixunsdfsivfp.S
+++ b/lib/builtins/arm/fixunsdfsivfp.S
@@ -20,9 +20,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.u32.f64 s0, d0
+	vmov r0, s0
+#else
 	vmov	d7, r0, r1    // load double register from R0/R1
 	vcvt.u32.f64 s15, d7  // convert double to 32-bit int into s15
 	vmov	r0, s15	      // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixunsdfsivfp)
 
diff --git a/lib/builtins/arm/fixunssfsivfp.S b/lib/builtins/arm/fixunssfsivfp.S
index 79d7082..e5d7782 100644
--- a/lib/builtins/arm/fixunssfsivfp.S
+++ b/lib/builtins/arm/fixunssfsivfp.S
@@ -20,9 +20,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.u32.f32 s0, s0
+	vmov r0, s0
+#else
 	vmov	s15, r0        // load float register from R0
 	vcvt.u32.f32 s15, s15  // convert single to 32-bit unsigned into s15
 	vmov	r0, s15	       // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixunssfsivfp)
 
diff --git a/lib/builtins/arm/floatsidfvfp.S b/lib/builtins/arm/floatsidfvfp.S
index 7623f26..3297ad4 100644
--- a/lib/builtins/arm/floatsidfvfp.S
+++ b/lib/builtins/arm/floatsidfvfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f64.s32 d0, s0
+#else
 	vmov	s15, r0        // move int to float register s15
 	vcvt.f64.s32 d7, s15   // convert 32-bit int in s15 to double in d7
 	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatsidfvfp)
 
diff --git a/lib/builtins/arm/floatsisfvfp.S b/lib/builtins/arm/floatsisfvfp.S
index c73dfac..65408b5 100644
--- a/lib/builtins/arm/floatsisfvfp.S
+++ b/lib/builtins/arm/floatsisfvfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f32.s32 s0, s0
+#else
 	vmov	s15, r0	       // move int to float register s15
 	vcvt.f32.s32 s15, s15  // convert 32-bit int in s15 to float in s15
 	vmov	r0, s15        // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatsisfvfp)
 
diff --git a/lib/builtins/arm/floatunssidfvfp.S b/lib/builtins/arm/floatunssidfvfp.S
index 2a59fdb..d7a7024 100644
--- a/lib/builtins/arm/floatunssidfvfp.S
+++ b/lib/builtins/arm/floatunssidfvfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f64.u32 d0, s0
+#else
 	vmov	s15, r0        // move int to float register s15
 	vcvt.f64.u32 d7, s15   // convert 32-bit int in s15 to double in d7
 	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatunssidfvfp)
 
diff --git a/lib/builtins/arm/floatunssisfvfp.S b/lib/builtins/arm/floatunssisfvfp.S
index c096263..1ca8565 100644
--- a/lib/builtins/arm/floatunssisfvfp.S
+++ b/lib/builtins/arm/floatunssisfvfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f32.u32 s0, s0
+#else
 	vmov	s15, r0	       // move int to float register s15
 	vcvt.f32.u32 s15, s15  // convert 32-bit int in s15 to float in s15
 	vmov	r0, s15        // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatunssisfvfp)
 
diff --git a/lib/builtins/arm/gedf2vfp.S b/lib/builtins/arm/gedf2vfp.S
index 72f13ef..14899f0 100644
--- a/lib/builtins/arm/gedf2vfp.S
+++ b/lib/builtins/arm/gedf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movge	r0, #1      // set result register to 1 if greater than or equal
 	movlt	r0, #0
diff --git a/lib/builtins/arm/gesf2vfp.S b/lib/builtins/arm/gesf2vfp.S
index c9ee52c..b49d04d 100644
--- a/lib/builtins/arm/gesf2vfp.S
+++ b/lib/builtins/arm/gesf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0	    // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movge	r0, #1      // set result register to 1 if greater than or equal
 	movlt	r0, #0
diff --git a/lib/builtins/arm/gtdf2vfp.S b/lib/builtins/arm/gtdf2vfp.S
index c7f2775..8166305 100644
--- a/lib/builtins/arm/gtdf2vfp.S
+++ b/lib/builtins/arm/gtdf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movgt	r0, #1		// set result register to 1 if equal
 	movle	r0, #0
diff --git a/lib/builtins/arm/gtsf2vfp.S b/lib/builtins/arm/gtsf2vfp.S
index 7d49e45..d2d8a23 100644
--- a/lib/builtins/arm/gtsf2vfp.S
+++ b/lib/builtins/arm/gtsf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0		// move from GPR 0 to float register
 	vmov	s15, r1		// move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movgt	r0, #1		// set result register to 1 if equal
 	movle	r0, #0
diff --git a/lib/builtins/arm/ledf2vfp.S b/lib/builtins/arm/ledf2vfp.S
index ca5b553..a9dab77 100644
--- a/lib/builtins/arm/ledf2vfp.S
+++ b/lib/builtins/arm/ledf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__ledf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movls	r0, #1		// set result register to 1 if equal
 	movhi	r0, #0
diff --git a/lib/builtins/arm/lesf2vfp.S b/lib/builtins/arm/lesf2vfp.S
index f25422e..7e127f4 100644
--- a/lib/builtins/arm/lesf2vfp.S
+++ b/lib/builtins/arm/lesf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__lesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1     // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movls	r0, #1      // set result register to 1 if equal
 	movhi	r0, #0
diff --git a/lib/builtins/arm/ltdf2vfp.S b/lib/builtins/arm/ltdf2vfp.S
index 6e2c099..8b6f8e4 100644
--- a/lib/builtins/arm/ltdf2vfp.S
+++ b/lib/builtins/arm/ltdf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movmi	r0, #1		// set result register to 1 if equal
 	movpl	r0, #0
diff --git a/lib/builtins/arm/ltsf2vfp.S b/lib/builtins/arm/ltsf2vfp.S
index 95febb6..c4ff812 100644
--- a/lib/builtins/arm/ltsf2vfp.S
+++ b/lib/builtins/arm/ltsf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1     // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movmi	r0, #1      // set result register to 1 if equal
 	movpl	r0, #0
diff --git a/lib/builtins/arm/muldf3vfp.S b/lib/builtins/arm/muldf3vfp.S
index f638de1..aa7b234 100644
--- a/lib/builtins/arm/muldf3vfp.S
+++ b/lib/builtins/arm/muldf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__muldf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmul.f64 d0, d0, d1
+#else
 	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
 	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
-	vmul.f64 d6, d6, d7		
+	vmul.f64 d6, d6, d7
 	vmov 	r0, r1, d6         // move result back to r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__muldf3vfp)
 
diff --git a/lib/builtins/arm/mulsf3vfp.S b/lib/builtins/arm/mulsf3vfp.S
index bef58d3..a1da789 100644
--- a/lib/builtins/arm/mulsf3vfp.S
+++ b/lib/builtins/arm/mulsf3vfp.S
@@ -18,9 +18,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmul.f32 s0, s0, s1
+#else
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
 	vmul.f32 s13, s14, s15
+#endif
 	vmov	r0, s13		// move result back to r0
 	bx	lr
 END_COMPILERRT_FUNCTION(__mulsf3vfp)
diff --git a/lib/builtins/arm/nedf2vfp.S b/lib/builtins/arm/nedf2vfp.S
index 78cf529..7d884e0 100644
--- a/lib/builtins/arm/nedf2vfp.S
+++ b/lib/builtins/arm/nedf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__nedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7		
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movne	r0, #1		// set result register to 0 if unequal
 	moveq	r0, #0
diff --git a/lib/builtins/arm/negdf2vfp.S b/lib/builtins/arm/negdf2vfp.S
index 01c8ba6..81f0ab8 100644
--- a/lib/builtins/arm/negdf2vfp.S
+++ b/lib/builtins/arm/negdf2vfp.S
@@ -18,7 +18,11 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__negdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vneg.f64 d0, d0
+#else
 	eor	r1, r1, #-2147483648	// flip sign bit on double in r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__negdf2vfp)
 
diff --git a/lib/builtins/arm/negsf2vfp.S b/lib/builtins/arm/negsf2vfp.S
index 797abb3..46ab4a9 100644
--- a/lib/builtins/arm/negsf2vfp.S
+++ b/lib/builtins/arm/negsf2vfp.S
@@ -18,7 +18,11 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__negsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vneg.f32 s0, s0
+#else
 	eor	r0, r0, #-2147483648	// flip sign bit on float in r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__negsf2vfp)
 
diff --git a/lib/builtins/arm/nesf2vfp.S b/lib/builtins/arm/nesf2vfp.S
index 554d3e4..97c764f 100644
--- a/lib/builtins/arm/nesf2vfp.S
+++ b/lib/builtins/arm/nesf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__nesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0	    // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movne	r0, #1      // set result register to 1 if unequal
 	moveq	r0, #0
diff --git a/lib/builtins/arm/subdf3vfp.S b/lib/builtins/arm/subdf3vfp.S
index 1fc7d18..2b6f2bd 100644
--- a/lib/builtins/arm/subdf3vfp.S
+++ b/lib/builtins/arm/subdf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__subdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vsub.f64 d0, d0, d1
+#else
 	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
 	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
 	vsub.f64 d6, d6, d7		
 	vmov 	r0, r1, d6         // move result back to r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__subdf3vfp)
 
diff --git a/lib/builtins/arm/subsf3vfp.S b/lib/builtins/arm/subsf3vfp.S
index 11fe386..3e83ea2 100644
--- a/lib/builtins/arm/subsf3vfp.S
+++ b/lib/builtins/arm/subsf3vfp.S
@@ -12,17 +12,21 @@
 //
 // extern float __subsf3vfp(float a, float b);
 //
-// Returns the difference between two single precision floating point numbers 
+// Returns the difference between two single precision floating point numbers
 // using the Darwin calling convention where single arguments are passsed
 // like 32-bit ints.
 //
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__subsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vsub.f32 s0, s0, s1
+#else
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
 	vsub.f32 s14, s14, s15
 	vmov	r0, s14		// move result back to r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__subsf3vfp)
 
diff --git a/lib/builtins/arm/truncdfsf2vfp.S b/lib/builtins/arm/truncdfsf2vfp.S
index 04287ad..682e54d 100644
--- a/lib/builtins/arm/truncdfsf2vfp.S
+++ b/lib/builtins/arm/truncdfsf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.f32.f64 s0, d0
+#else
 	vmov 	d7, r0, r1   // load double from r0/r1 pair
 	vcvt.f32.f64 s15, d7 // convert double to single (trucate precision)
 	vmov 	r0, s15      // return result in r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__truncdfsf2vfp)
 
diff --git a/lib/builtins/arm/unorddf2vfp.S b/lib/builtins/arm/unorddf2vfp.S
index 022dd7a..8556375 100644
--- a/lib/builtins/arm/unorddf2vfp.S
+++ b/lib/builtins/arm/unorddf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
-	vcmp.f64 d6, d7		
+	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
 	movvc	r0, #0
diff --git a/lib/builtins/arm/unordsf2vfp.S b/lib/builtins/arm/unordsf2vfp.S
index 5ebdd3d..2b16b49 100644
--- a/lib/builtins/arm/unordsf2vfp.S
+++ b/lib/builtins/arm/unordsf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
 	movvc	r0, #0
diff --git a/lib/builtins/clear_cache.c b/lib/builtins/clear_cache.c
index 4c2ac3b..a68f9fc 100644
--- a/lib/builtins/clear_cache.c
+++ b/lib/builtins/clear_cache.c
@@ -82,10 +82,6 @@
   #endif
 #endif
 
-#if defined(__linux__) && defined(__arm__)
-  #include <asm/unistd.h>
-#endif
-
 /*
  * The compiler generates calls to __clear_cache() when creating 
  * trampoline functions on the stack for use with nested functions.
@@ -108,6 +104,15 @@
 
         sysarch(ARM_SYNC_ICACHE, &arg);
     #elif defined(__linux__)
+    /*
+     * We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but
+     * it also brought many other unused defines, as well as a dependency on
+     * kernel headers to be installed.
+     *
+     * This value is stable at least since Linux 3.13 and should remain so for
+     * compatibility reasons, warranting it's re-definition here.
+     */
+    #define __ARM_NR_cacheflush 0x0f0002
          register int start_reg __asm("r0") = (int) (intptr_t) start;
          const register int end_reg __asm("r1") = (int) (intptr_t) end;
          const register int flags __asm("r2") = 0;
diff --git a/lib/builtins/floattitf.c b/lib/builtins/floattitf.c
new file mode 100644
index 0000000..994fded
--- /dev/null
+++ b/lib/builtins/floattitf.c
@@ -0,0 +1,82 @@
+//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ti_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+/* Returns: convert a ti_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ *             ti_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t
+__floattitf(ti_int a) {
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(ti_int) * CHAR_BIT;
+    const ti_int s = a >> (N-1);
+    a = (a ^ s) - s;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;            /* exponent */
+    if (sd > LDBL_MANT_DIG) {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit LDBL_MANT_DIG-1 bits to the right of 1
+         *  Q = bit LDBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+         */
+        switch (sd) {
+        case LDBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case LDBL_MANT_DIG + 2:
+            break;
+        default:
+            a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << LDBL_MANT_DIG)) {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    } else {
+        a <<= (LDBL_MANT_DIG - sd);
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    }
+
+    long_double_bits fb;
+    fb.u.high.all = (s & 0x8000000000000000LL)           /* sign */
+                  | (du_int)(e + 16383) << 48            /* exponent */
+                  | ((a >> 64) & 0x0000ffffffffffffLL);  /* significand */
+    fb.u.low.all = (du_int)(a);
+    return fb.f;
+}
+
+#endif
diff --git a/lib/builtins/floatuntitf.c b/lib/builtins/floatuntitf.c
new file mode 100644
index 0000000..e2518c9
--- /dev/null
+++ b/lib/builtins/floatuntitf.c
@@ -0,0 +1,79 @@
+//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tu_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+/* Returns: convert a tu_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ *             tu_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t
+__floatuntitf(tu_int a) {
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(tu_int) * CHAR_BIT;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;            /* exponent */
+    if (sd > LDBL_MANT_DIG) {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit LDBL_MANT_DIG-1 bits to the right of 1
+         *  Q = bit LDBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+         */
+        switch (sd) {
+        case LDBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case LDBL_MANT_DIG + 2:
+            break;
+        default:
+            a = (a >> (sd - (LDBL_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << LDBL_MANT_DIG)) {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    } else {
+        a <<= (LDBL_MANT_DIG - sd);
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    }
+
+    long_double_bits fb;
+    fb.u.high.all = (du_int)(e + 16383) << 48            /* exponent */
+                  | ((a >> 64) & 0x0000ffffffffffffLL);  /* significand */
+    fb.u.low.all = (du_int)(a);
+    return fb.f;
+}
+
+#endif
diff --git a/lib/builtins/os_version_check.c b/lib/builtins/os_version_check.c
new file mode 100644
index 0000000..b36ae54
--- /dev/null
+++ b/lib/builtins/os_version_check.c
@@ -0,0 +1,124 @@
+/* ===-- os_version_check.c - OS version checking  -------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements the function __isOSVersionAtLeast, used by
+ * Objective-C's @available
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifdef __APPLE__
+
+#include <CoreFoundation/CoreFoundation.h>
+#include <dispatch/dispatch.h>
+#include <TargetConditionals.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* These three variables hold the host's OS version. */
+static int32_t GlobalMajor, GlobalMinor, GlobalSubminor;
+static dispatch_once_t DispatchOnceCounter;
+
+/* Find and parse the SystemVersion.plist file. */
+static void parseSystemVersionPList(void *Unused) {
+  (void)Unused;
+
+  char *PListPath = "/System/Library/CoreServices/SystemVersion.plist";
+
+#if TARGET_OS_SIMULATOR
+  char *PListPathPrefix = getenv("IPHONE_SIMULATOR_ROOT");
+  if (!PListPathPrefix)
+    return;
+  char FullPath[strlen(PListPathPrefix) + strlen(PListPath) + 1];
+  strcpy(FullPath, PListPathPrefix);
+  strcat(FullPath, PListPath);
+  PListPath = FullPath;
+#endif
+  FILE *PropertyList = fopen(PListPath, "r");
+  if (!PropertyList)
+    return;
+
+  /* Dynamically allocated stuff. */
+  CFDictionaryRef PListRef = NULL;
+  CFDataRef FileContentsRef = NULL;
+  UInt8 *PListBuf = NULL;
+
+  fseek(PropertyList, 0, SEEK_END);
+  long PListFileSize = ftell(PropertyList);
+  if (PListFileSize < 0)
+    goto Fail;
+  rewind(PropertyList);
+
+  PListBuf = malloc((size_t)PListFileSize);
+  if (!PListBuf)
+    goto Fail;
+
+  size_t NumRead = fread(PListBuf, 1, (size_t)PListFileSize, PropertyList);
+  if (NumRead != (size_t)PListFileSize)
+    goto Fail;
+
+  /* Get the file buffer into CF's format. We pass in a null allocator here *
+   * because we free PListBuf ourselves */
+  FileContentsRef = CFDataCreateWithBytesNoCopy(
+      NULL, PListBuf, (CFIndex)NumRead, kCFAllocatorNull);
+  if (!FileContentsRef)
+    goto Fail;
+
+  if (&CFPropertyListCreateWithData)
+    PListRef = CFPropertyListCreateWithData(
+        NULL, FileContentsRef, kCFPropertyListImmutable, NULL, NULL);
+  else {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+    PListRef = CFPropertyListCreateFromXMLData(NULL, FileContentsRef,
+                                               kCFPropertyListImmutable, NULL);
+#pragma clang diagnostic pop
+  }
+  if (!PListRef)
+    goto Fail;
+
+  CFTypeRef OpaqueValue =
+      CFDictionaryGetValue(PListRef, CFSTR("ProductVersion"));
+  if (!OpaqueValue || CFGetTypeID(OpaqueValue) != CFStringGetTypeID())
+    goto Fail;
+
+  char VersionStr[32];
+  if (!CFStringGetCString((CFStringRef)OpaqueValue, VersionStr,
+                          sizeof(VersionStr), kCFStringEncodingUTF8))
+    goto Fail;
+  sscanf(VersionStr, "%d.%d.%d", &GlobalMajor, &GlobalMinor, &GlobalSubminor);
+
+Fail:
+  if (PListRef)
+    CFRelease(PListRef);
+  if (FileContentsRef)
+    CFRelease(FileContentsRef);
+  free(PListBuf);
+  fclose(PropertyList);
+}
+
+int32_t __isOSVersionAtLeast(int32_t Major, int32_t Minor, int32_t Subminor) {
+  /* Populate the global version variables, if they haven't already. */
+  dispatch_once_f(&DispatchOnceCounter, NULL, parseSystemVersionPList);
+
+  if (Major < GlobalMajor) return 1;
+  if (Major > GlobalMajor) return 0;
+  if (Minor < GlobalMinor) return 1;
+  if (Minor > GlobalMinor) return 0;
+  return Subminor <= GlobalSubminor;
+}
+
+#else
+
+/* Silence an empty translation unit warning. */
+typedef int unused;
+
+#endif
diff --git a/lib/interception/interception_win.cc b/lib/interception/interception_win.cc
index 91abecf..e4f3d35 100644
--- a/lib/interception/interception_win.cc
+++ b/lib/interception/interception_win.cc
@@ -878,6 +878,8 @@
 
   IMAGE_DATA_DIRECTORY *export_directory =
       &headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT];
+  if (export_directory->Size == 0)
+    return 0;
   RVAPtr<IMAGE_EXPORT_DIRECTORY> exports(module,
                                          export_directory->VirtualAddress);
   RVAPtr<DWORD> functions(module, exports->AddressOfFunctions);
diff --git a/lib/interception/tests/interception_win_test.cc b/lib/interception/tests/interception_win_test.cc
index 684ee03..a705768 100644
--- a/lib/interception/tests/interception_win_test.cc
+++ b/lib/interception/tests/interception_win_test.cc
@@ -613,6 +613,13 @@
   EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode6, override, prefix));
 }
 
+TEST(Interception, EmptyExportTable) {
+  // We try to get a pointer to a function from an executable that doesn't
+  // export any symbol (empty export table).
+  uptr FunPtr = InternalGetProcAddress((void *)GetModuleHandleA(0), "example");
+  EXPECT_EQ(0U, FunPtr);
+}
+
 }  // namespace __interception
 
 #endif  // SANITIZER_WINDOWS
diff --git a/lib/lsan/CMakeLists.txt b/lib/lsan/CMakeLists.txt
index 73e475d..a48b85f 100644
--- a/lib/lsan/CMakeLists.txt
+++ b/lib/lsan/CMakeLists.txt
@@ -5,7 +5,8 @@
 
 set(LSAN_COMMON_SOURCES
   lsan_common.cc
-  lsan_common_linux.cc)
+  lsan_common_linux.cc
+  lsan_common_mac.cc)
 
 set(LSAN_SOURCES
   lsan.cc
@@ -16,6 +17,10 @@
 
 set(LSAN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
+# FIXME(fjricci) - remove this once lsan for darwin is fully enabled
+if(APPLE AND COMPILER_RT_HAS_LSAN)
+  set(LSAN_CFLAGS ${LSAN_CFLAGS} -DCAN_SANITIZE_LEAKS_MAC=1)
+endif()
 add_compiler_rt_object_libraries(RTLSanCommon
     OS ${SANITIZER_COMMON_SUPPORTED_OS}
     ARCHS ${LSAN_COMMON_SUPPORTED_ARCH}
@@ -23,18 +28,35 @@
     CFLAGS ${LSAN_CFLAGS})
 
 if(COMPILER_RT_HAS_LSAN)
-  foreach(arch ${LSAN_SUPPORTED_ARCH})
-    add_compiler_rt_component(lsan)
-    
+  add_compiler_rt_component(lsan)
+  if(APPLE)
+    add_weak_symbols("lsan" WEAK_SYMBOL_LINK_FLAGS)
+    add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+
     add_compiler_rt_runtime(clang_rt.lsan
-      STATIC
-      ARCHS ${arch}
+      SHARED
+      OS ${SANITIZER_COMMON_SUPPORTED_OS}
+      ARCHS ${LSAN_SUPPORTED_ARCH}
       SOURCES ${LSAN_SOURCES}
-              $<TARGET_OBJECTS:RTInterception.${arch}>
-              $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
-              $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
-              $<TARGET_OBJECTS:RTLSanCommon.${arch}>
+      OBJECT_LIBS RTLSanCommon
+                  RTInterception
+                  RTSanitizerCommon
+                  RTSanitizerCommonLibc
       CFLAGS ${LSAN_CFLAGS}
+      LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
       PARENT_TARGET lsan)
-  endforeach()
+  else()
+    foreach(arch ${LSAN_SUPPORTED_ARCH})
+      add_compiler_rt_runtime(clang_rt.lsan
+        STATIC
+        ARCHS ${arch}
+        SOURCES ${LSAN_SOURCES}
+                $<TARGET_OBJECTS:RTInterception.${arch}>
+                $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+                $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
+                $<TARGET_OBJECTS:RTLSanCommon.${arch}>
+        CFLAGS ${LSAN_CFLAGS}
+        PARENT_TARGET lsan)
+    endforeach()
+  endif()
 endif()
diff --git a/lib/lsan/lsan_allocator.cc b/lib/lsan/lsan_allocator.cc
index 1f6efc0..4284857 100644
--- a/lib/lsan/lsan_allocator.cc
+++ b/lib/lsan/lsan_allocator.cc
@@ -28,12 +28,21 @@
 struct ChunkMetadata {
   u8 allocated : 8;  // Must be first.
   ChunkTag tag : 2;
+#if SANITIZER_WORDSIZE == 64
   uptr requested_size : 54;
+#else
+  uptr requested_size : 32;
+  uptr padding : 22;
+#endif
   u32 stack_trace_id;
 };
 
-#if defined(__mips64) || defined(__aarch64__)
+#if defined(__mips64) || defined(__aarch64__) || defined(__i386__)
+#if defined(__i386__)
+static const uptr kMaxAllowedMallocSize = 1UL << 30;
+#else
 static const uptr kMaxAllowedMallocSize = 4UL << 30;
+#endif
 static const uptr kRegionSizeLog = 20;
 static const uptr kNumRegions = SANITIZER_MMAP_RANGE_SIZE >> kRegionSizeLog;
 typedef TwoLevelByteMap<(kNumRegions >> 12), 1 << 12> ByteMap;
@@ -258,4 +267,17 @@
 uptr __sanitizer_get_allocated_size(const void *p) {
   return GetMallocUsableSize(p);
 }
+
+#if !SANITIZER_SUPPORTS_WEAK_HOOKS
+// Provide default (no-op) implementation of malloc hooks.
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_malloc_hook(void *ptr, uptr size) {
+  (void)ptr;
+  (void)size;
+}
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_free_hook(void *ptr) {
+  (void)ptr;
+}
+#endif
 } // extern "C"
diff --git a/lib/lsan/lsan_common.cc b/lib/lsan/lsan_common.cc
index b20941e..5ae3ad2 100644
--- a/lib/lsan/lsan_common.cc
+++ b/lib/lsan/lsan_common.cc
@@ -32,20 +32,15 @@
 // also to protect the global list of root regions.
 BlockingMutex global_mutex(LINKER_INITIALIZED);
 
-__attribute__((tls_model("initial-exec")))
-THREADLOCAL int disable_counter;
-bool DisabledInThisThread() { return disable_counter > 0; }
-void DisableInThisThread() { disable_counter++; }
-void EnableInThisThread() {
-  if (!disable_counter && common_flags()->detect_leaks) {
+Flags lsan_flags;
+
+void DisableCounterUnderflow() {
+  if (common_flags()->detect_leaks) {
     Report("Unmatched call to __lsan_enable().\n");
     Die();
   }
-  disable_counter--;
 }
 
-Flags lsan_flags;
-
 void Flags::SetDefaults() {
 #define LSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
 #include "lsan_flags.inc"
@@ -758,5 +753,10 @@
 int __lsan_is_turned_off() {
   return 0;
 }
+
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+const char *__lsan_default_suppressions() {
+  return "";
+}
 #endif
 } // extern "C"
diff --git a/lib/lsan/lsan_common.h b/lib/lsan/lsan_common.h
index 890ce65..c457d1b 100644
--- a/lib/lsan/lsan_common.h
+++ b/lib/lsan/lsan_common.h
@@ -22,8 +22,19 @@
 #include "sanitizer_common/sanitizer_stoptheworld.h"
 #include "sanitizer_common/sanitizer_symbolizer.h"
 
-#if (SANITIZER_LINUX && !SANITIZER_ANDROID) && (SANITIZER_WORDSIZE == 64) \
-     && (defined(__x86_64__) ||  defined(__mips64) ||  defined(__aarch64__))
+// LeakSanitizer relies on some Glibc's internals (e.g. TLS machinery) thus
+// supported for Linux only. Also, LSan doesn't like 32 bit architectures
+// because of "small" (4 bytes) pointer size that leads to high false negative
+// ratio on large leaks. But we still want to have it for some 32 bit arches
+// (e.g. x86), see https://github.com/google/sanitizers/issues/403.
+// To enable LeakSanitizer on new architecture, one need to implement
+// internal_clone function as well as (probably) adjust TLS machinery for
+// new architecture inside sanitizer library.
+#if (SANITIZER_LINUX && !SANITIZER_ANDROID || CAN_SANITIZE_LEAKS_MAC) \
+     && (SANITIZER_WORDSIZE == 64) && (defined(__x86_64__) \
+     ||  defined(__mips64) ||  defined(__aarch64__))
+#define CAN_SANITIZE_LEAKS 1
+#elif SANITIZER_LINUX && !SANITIZER_ANDROID && defined(__i386__)
 #define CAN_SANITIZE_LEAKS 1
 #else
 #define CAN_SANITIZE_LEAKS 0
@@ -44,6 +55,8 @@
   kIgnored = 3
 };
 
+const u32 kInvalidTid = (u32) -1;
+
 struct Flags {
 #define LSAN_FLAG(Type, Name, DefaultValue, Description) Type Name;
 #include "lsan_flags.inc"
@@ -117,6 +130,7 @@
 // Functions called from the parent tool.
 void InitCommonLsan();
 void DoLeakCheck();
+void DisableCounterUnderflow();
 bool DisabledInThisThread();
 
 // Used to implement __lsan::ScopedDisabler.
diff --git a/lib/lsan/lsan_common_linux.cc b/lib/lsan/lsan_common_linux.cc
index f6154d8..0e10d41 100644
--- a/lib/lsan/lsan_common_linux.cc
+++ b/lib/lsan/lsan_common_linux.cc
@@ -34,6 +34,21 @@
   return LibraryNameIs(full_name, kLinkerName);
 }
 
+static THREADLOCAL u32 current_thread_tid = kInvalidTid;
+u32 GetCurrentThread() { return current_thread_tid; }
+void SetCurrentThread(u32 tid) { current_thread_tid = tid; }
+
+__attribute__((tls_model("initial-exec")))
+THREADLOCAL int disable_counter;
+bool DisabledInThisThread() { return disable_counter > 0; }
+void DisableInThisThread() { disable_counter++; }
+void EnableInThisThread() {
+  if (disable_counter == 0) {
+    DisableCounterUnderflow();
+  }
+  disable_counter--;
+}
+
 void InitializePlatformSpecificModules() {
   ListOfModules modules;
   modules.init();
diff --git a/lib/lsan/lsan_common_mac.cc b/lib/lsan/lsan_common_mac.cc
new file mode 100644
index 0000000..7f5e055
--- /dev/null
+++ b/lib/lsan/lsan_common_mac.cc
@@ -0,0 +1,84 @@
+//=-- lsan_common_mac.cc --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of LeakSanitizer.
+// Implementation of common leak checking functionality. Darwin-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_platform.h"
+#include "lsan_common.h"
+
+#if CAN_SANITIZE_LEAKS && SANITIZER_MAC
+
+#include <pthread.h>
+
+namespace __lsan {
+
+typedef struct {
+  int disable_counter;
+  u32 current_thread_id;
+} thread_local_data_t;
+
+static pthread_key_t key;
+static pthread_once_t key_once = PTHREAD_ONCE_INIT;
+
+static void make_tls_key() { CHECK_EQ(pthread_key_create(&key, NULL), 0); }
+
+static thread_local_data_t *get_tls_val() {
+  pthread_once(&key_once, make_tls_key);
+
+  thread_local_data_t *ptr = (thread_local_data_t *)pthread_getspecific(key);
+  if (ptr == NULL) {
+    ptr = (thread_local_data_t *)InternalAlloc(sizeof(*ptr));
+    ptr->disable_counter = 0;
+    ptr->current_thread_id = kInvalidTid;
+    pthread_setspecific(key, ptr);
+  }
+
+  return ptr;
+}
+
+bool DisabledInThisThread() { return get_tls_val()->disable_counter > 0; }
+
+void DisableInThisThread() { ++get_tls_val()->disable_counter; }
+
+void EnableInThisThread() {
+  int *disable_counter = &get_tls_val()->disable_counter;
+  if (*disable_counter == 0) {
+    DisableCounterUnderflow();
+  }
+  --*disable_counter;
+}
+
+u32 GetCurrentThread() { return get_tls_val()->current_thread_id; }
+
+void SetCurrentThread(u32 tid) { get_tls_val()->current_thread_id = tid; }
+
+void InitializePlatformSpecificModules() {
+  CHECK(0 && "unimplemented");
+}
+
+// Scans global variables for heap pointers.
+void ProcessGlobalRegions(Frontier *frontier) {
+  CHECK(0 && "unimplemented");
+}
+
+void ProcessPlatformSpecificAllocations(Frontier *frontier) {
+  CHECK(0 && "unimplemented");
+}
+
+void DoStopTheWorld(StopTheWorldCallback callback, void *argument) {
+  CHECK(0 && "unimplemented");
+}
+
+} // namespace __lsan
+
+#endif // CAN_SANITIZE_LEAKS && SANITIZER_MAC
diff --git a/lib/lsan/lsan_interceptors.cc b/lib/lsan/lsan_interceptors.cc
index 28f1786..876b39d 100644
--- a/lib/lsan/lsan_interceptors.cc
+++ b/lib/lsan/lsan_interceptors.cc
@@ -19,6 +19,7 @@
 #include "sanitizer_common/sanitizer_flags.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_linux.h"
+#include "sanitizer_common/sanitizer_platform_interceptors.h"
 #include "sanitizer_common/sanitizer_platform_limits_posix.h"
 #include "sanitizer_common/sanitizer_tls_get_addr.h"
 #include "lsan.h"
@@ -26,6 +27,8 @@
 #include "lsan_common.h"
 #include "lsan_thread.h"
 
+#include <stddef.h>
+
 using namespace __lsan;
 
 extern "C" {
@@ -86,25 +89,13 @@
   return Reallocate(stack, q, size, 1);
 }
 
+#if SANITIZER_INTERCEPT_MEMALIGN
 INTERCEPTOR(void*, memalign, uptr alignment, uptr size) {
   ENSURE_LSAN_INITED;
   GET_STACK_TRACE_MALLOC;
   return Allocate(stack, size, alignment, kAlwaysClearMemory);
 }
-
-INTERCEPTOR(void*, aligned_alloc, uptr alignment, uptr size) {
-  ENSURE_LSAN_INITED;
-  GET_STACK_TRACE_MALLOC;
-  return Allocate(stack, size, alignment, kAlwaysClearMemory);
-}
-
-INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
-  ENSURE_LSAN_INITED;
-  GET_STACK_TRACE_MALLOC;
-  *memptr = Allocate(stack, size, alignment, kAlwaysClearMemory);
-  // FIXME: Return ENOMEM if user requested more than max alloc size.
-  return 0;
-}
+#define LSAN_MAYBE_INTERCEPT_MEMALIGN INTERCEPT_FUNCTION(memalign)
 
 INTERCEPTOR(void *, __libc_memalign, uptr alignment, uptr size) {
   ENSURE_LSAN_INITED;
@@ -113,6 +104,30 @@
   DTLS_on_libc_memalign(res, size);
   return res;
 }
+#define LSAN_MAYBE_INTERCEPT___LIBC_MEMALIGN INTERCEPT_FUNCTION(__libc_memalign)
+#else
+#define LSAN_MAYBE_INTERCEPT_MEMALIGN
+#define LSAN_MAYBE_INTERCEPT___LIBC_MEMALIGN
+#endif // SANITIZER_INTERCEPT_MEMALIGN
+
+#if SANITIZER_INTERCEPT_ALIGNED_ALLOC
+INTERCEPTOR(void*, aligned_alloc, uptr alignment, uptr size) {
+  ENSURE_LSAN_INITED;
+  GET_STACK_TRACE_MALLOC;
+  return Allocate(stack, size, alignment, kAlwaysClearMemory);
+}
+#define LSAN_MAYBE_INTERCEPT_ALIGNED_ALLOC INTERCEPT_FUNCTION(aligned_alloc)
+#else
+#define LSAN_MAYBE_INTERCEPT_ALIGNED_ALLOC
+#endif
+
+INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
+  ENSURE_LSAN_INITED;
+  GET_STACK_TRACE_MALLOC;
+  *memptr = Allocate(stack, size, alignment, kAlwaysClearMemory);
+  // FIXME: Return ENOMEM if user requested more than max alloc size.
+  return 0;
+}
 
 INTERCEPTOR(void*, valloc, uptr size) {
   ENSURE_LSAN_INITED;
@@ -122,11 +137,18 @@
   return Allocate(stack, size, GetPageSizeCached(), kAlwaysClearMemory);
 }
 
+#if SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE
 INTERCEPTOR(uptr, malloc_usable_size, void *ptr) {
   ENSURE_LSAN_INITED;
   return GetMallocUsableSize(ptr);
 }
+#define LSAN_MAYBE_INTERCEPT_MALLOC_USABLE_SIZE \
+        INTERCEPT_FUNCTION(malloc_usable_size)
+#else
+#define LSAN_MAYBE_INTERCEPT_MALLOC_USABLE_SIZE
+#endif
 
+#if SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
 struct fake_mallinfo {
   int x[10];
 };
@@ -136,11 +158,18 @@
   internal_memset(&res, 0, sizeof(res));
   return res;
 }
+#define LSAN_MAYBE_INTERCEPT_MALLINFO INTERCEPT_FUNCTION(mallinfo)
 
 INTERCEPTOR(int, mallopt, int cmd, int value) {
   return -1;
 }
+#define LSAN_MAYBE_INTERCEPT_MALLOPT INTERCEPT_FUNCTION(mallopt)
+#else
+#define LSAN_MAYBE_INTERCEPT_MALLINFO
+#define LSAN_MAYBE_INTERCEPT_MALLOPT
+#endif // SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
 
+#if SANITIZER_INTERCEPT_PVALLOC
 INTERCEPTOR(void*, pvalloc, uptr size) {
   ENSURE_LSAN_INITED;
   GET_STACK_TRACE_MALLOC;
@@ -152,8 +181,17 @@
   }
   return Allocate(stack, size, GetPageSizeCached(), kAlwaysClearMemory);
 }
+#define LSAN_MAYBE_INTERCEPT_PVALLOC INTERCEPT_FUNCTION(pvalloc)
+#else
+#define LSAN_MAYBE_INTERCEPT_PVALLOC
+#endif // SANITIZER_INTERCEPT_PVALLOC
 
+#if SANITIZER_INTERCEPT_CFREE
 INTERCEPTOR(void, cfree, void *p) ALIAS(WRAPPER_NAME(free));
+#define LSAN_MAYBE_INTERCEPT_CFREE INTERCEPT_FUNCTION(cfree)
+#else
+#define LSAN_MAYBE_INTERCEPT_CFREE
+#endif // SANITIZER_INTERCEPT_CFREE
 
 #define OPERATOR_NEW_BODY                              \
   ENSURE_LSAN_INITED;                                  \
@@ -161,13 +199,13 @@
   return Allocate(stack, size, 1, kAlwaysClearMemory);
 
 INTERCEPTOR_ATTRIBUTE
-void *operator new(uptr size) { OPERATOR_NEW_BODY; }
+void *operator new(size_t size) { OPERATOR_NEW_BODY; }
 INTERCEPTOR_ATTRIBUTE
-void *operator new[](uptr size) { OPERATOR_NEW_BODY; }
+void *operator new[](size_t size) { OPERATOR_NEW_BODY; }
 INTERCEPTOR_ATTRIBUTE
-void *operator new(uptr size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
+void *operator new(size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
 INTERCEPTOR_ATTRIBUTE
-void *operator new[](uptr size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
+void *operator new[](size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
 
 #define OPERATOR_DELETE_BODY \
   ENSURE_LSAN_INITED;        \
@@ -277,17 +315,18 @@
 void InitializeInterceptors() {
   INTERCEPT_FUNCTION(malloc);
   INTERCEPT_FUNCTION(free);
-  INTERCEPT_FUNCTION(cfree);
+  LSAN_MAYBE_INTERCEPT_CFREE;
   INTERCEPT_FUNCTION(calloc);
   INTERCEPT_FUNCTION(realloc);
-  INTERCEPT_FUNCTION(memalign);
+  LSAN_MAYBE_INTERCEPT_MEMALIGN;
+  LSAN_MAYBE_INTERCEPT___LIBC_MEMALIGN;
+  LSAN_MAYBE_INTERCEPT_ALIGNED_ALLOC;
   INTERCEPT_FUNCTION(posix_memalign);
-  INTERCEPT_FUNCTION(__libc_memalign);
   INTERCEPT_FUNCTION(valloc);
-  INTERCEPT_FUNCTION(pvalloc);
-  INTERCEPT_FUNCTION(malloc_usable_size);
-  INTERCEPT_FUNCTION(mallinfo);
-  INTERCEPT_FUNCTION(mallopt);
+  LSAN_MAYBE_INTERCEPT_PVALLOC;
+  LSAN_MAYBE_INTERCEPT_MALLOC_USABLE_SIZE;
+  LSAN_MAYBE_INTERCEPT_MALLINFO;
+  LSAN_MAYBE_INTERCEPT_MALLOPT;
   INTERCEPT_FUNCTION(pthread_create);
   INTERCEPT_FUNCTION(pthread_join);
 
diff --git a/lib/lsan/lsan_thread.cc b/lib/lsan/lsan_thread.cc
index 5dff4f7..ebec6cd 100644
--- a/lib/lsan/lsan_thread.cc
+++ b/lib/lsan/lsan_thread.cc
@@ -19,13 +19,11 @@
 #include "sanitizer_common/sanitizer_thread_registry.h"
 #include "sanitizer_common/sanitizer_tls_get_addr.h"
 #include "lsan_allocator.h"
+#include "lsan_common.h"
 
 namespace __lsan {
 
-const u32 kInvalidTid = (u32) -1;
-
 static ThreadRegistry *thread_registry;
-static THREADLOCAL u32 current_thread_tid = kInvalidTid;
 
 static ThreadContextBase *CreateThreadContext(u32 tid) {
   void *mem = MmapOrDie(sizeof(ThreadContext), "ThreadContext");
@@ -41,14 +39,6 @@
     ThreadRegistry(CreateThreadContext, kMaxThreads, kThreadQuarantineSize);
 }
 
-u32 GetCurrentThread() {
-  return current_thread_tid;
-}
-
-void SetCurrentThread(u32 tid) {
-  current_thread_tid = tid;
-}
-
 ThreadContext::ThreadContext(int tid)
     : ThreadContextBase(tid),
       stack_begin_(0),
@@ -97,7 +87,7 @@
   args.tls_end = args.tls_begin + tls_size;
   GetAllocatorCacheRange(&args.cache_begin, &args.cache_end);
   args.dtls = DTLS_Get();
-  thread_registry->StartThread(tid, os_id, &args);
+  thread_registry->StartThread(tid, os_id, /*workerthread*/ false, &args);
 }
 
 void ThreadFinish() {
diff --git a/lib/lsan/weak_symbols.txt b/lib/lsan/weak_symbols.txt
new file mode 100644
index 0000000..da4f994
--- /dev/null
+++ b/lib/lsan/weak_symbols.txt
@@ -0,0 +1,2 @@
+___lsan_default_suppressions
+___lsan_is_turned_off
diff --git a/lib/msan/msan_interceptors.cc b/lib/msan/msan_interceptors.cc
index 53f10ab..6447bb1 100644
--- a/lib/msan/msan_interceptors.cc
+++ b/lib/msan/msan_interceptors.cc
@@ -45,6 +45,8 @@
 
 DECLARE_REAL(SIZE_T, strlen, const char *s)
 DECLARE_REAL(SIZE_T, strnlen, const char *s, SIZE_T maxlen)
+DECLARE_REAL(void *, memcpy, void *dest, const void *src, uptr n)
+DECLARE_REAL(void *, memset, void *dest, int c, uptr n)
 
 #if SANITIZER_FREEBSD
 #define __errno_location __error
@@ -152,10 +154,6 @@
   return res;
 }
 
-INTERCEPTOR(void *, memcpy, void *dest, const void *src, SIZE_T n) {
-  return __msan_memcpy(dest, src, n);
-}
-
 INTERCEPTOR(void *, mempcpy, void *dest, const void *src, SIZE_T n) {
   return (char *)__msan_memcpy(dest, src, n) + n;
 }
@@ -170,14 +168,6 @@
   return res;
 }
 
-INTERCEPTOR(void *, memmove, void *dest, const void *src, SIZE_T n) {
-  return __msan_memmove(dest, src, n);
-}
-
-INTERCEPTOR(void *, memset, void *s, int c, SIZE_T n) {
-  return __msan_memset(s, c, n);
-}
-
 INTERCEPTOR(void *, bcopy, const void *src, void *dest, SIZE_T n) {
   return __msan_memmove(dest, src, n);
 }
@@ -1354,11 +1344,23 @@
     *begin = *end = 0;                                                         \
   }
 
+#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size) \
+  {                                                         \
+    (void)ctx;                                              \
+    return __msan_memset(block, c, size);                   \
+  }
+#define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size) \
+  {                                                          \
+    (void)ctx;                                               \
+    return __msan_memmove(to, from, size);                   \
+  }
+#define COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size) \
+  {                                                         \
+    (void)ctx;                                              \
+    return __msan_memcpy(to, from, size);                   \
+  }
+
 #include "sanitizer_common/sanitizer_platform_interceptors.h"
-// Msan needs custom handling of these:
-#undef SANITIZER_INTERCEPT_MEMSET
-#undef SANITIZER_INTERCEPT_MEMMOVE
-#undef SANITIZER_INTERCEPT_MEMCPY
 #include "sanitizer_common/sanitizer_common_interceptors.inc"
 
 #define COMMON_SYSCALL_PRE_READ_RANGE(p, s) CHECK_UNPOISONED(p, s)
@@ -1514,11 +1516,8 @@
   INTERCEPT_FUNCTION(fread);
   MSAN_MAYBE_INTERCEPT_FREAD_UNLOCKED;
   INTERCEPT_FUNCTION(readlink);
-  INTERCEPT_FUNCTION(memcpy);
   INTERCEPT_FUNCTION(memccpy);
   INTERCEPT_FUNCTION(mempcpy);
-  INTERCEPT_FUNCTION(memset);
-  INTERCEPT_FUNCTION(memmove);
   INTERCEPT_FUNCTION(bcopy);
   INTERCEPT_FUNCTION(wmemset);
   INTERCEPT_FUNCTION(wmemcpy);
diff --git a/lib/msan/tests/CMakeLists.txt b/lib/msan/tests/CMakeLists.txt
index 130a872..8e911dc 100644
--- a/lib/msan/tests/CMakeLists.txt
+++ b/lib/msan/tests/CMakeLists.txt
@@ -69,15 +69,15 @@
 endmacro()
 
 macro(msan_link_shared so_list so_name arch kind)
-  cmake_parse_arguments(SOURCE "" "" "OBJECTS;LINKFLAGS;DEPS" ${ARGN})
+  cmake_parse_arguments(SOURCE "" "" "OBJECTS;LINK_FLAGS;DEPS" ${ARGN})
   set(output_so "${CMAKE_CURRENT_BINARY_DIR}/${so_name}.${arch}${kind}.so")
-  get_target_flags_for_arch(${arch} TARGET_LINKFLAGS)
+  get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
   if(NOT COMPILER_RT_STANDALONE_BUILD)
     list(APPEND SOURCE_DEPS msan)
   endif()
   clang_link_shared(${output_so}
                 OBJECTS ${SOURCE_OBJECTS}
-                LINKFLAGS ${TARGET_LINKFLAGS} ${SOURCE_LINKFLAGS}
+                LINK_FLAGS ${TARGET_LINK_FLAGS} ${SOURCE_LINK_FLAGS}
                 DEPS ${SOURCE_DEPS})
   list(APPEND ${so_list} ${output_so})
 endmacro()
diff --git a/lib/msan/tests/msan_test.cc b/lib/msan/tests/msan_test.cc
index 9ec1e28..6f4dd99 100644
--- a/lib/msan/tests/msan_test.cc
+++ b/lib/msan/tests/msan_test.cc
@@ -3602,6 +3602,18 @@
   EXPECT_POISONED(_mm_cmpgt_epi16(poisoned(_mm_set1_epi16(6), _mm_set1_epi16(0xF)),
                                poisoned(_mm_set1_epi16(7), _mm_set1_epi16(0))));
 }
+
+TEST(MemorySanitizer, stmxcsr_ldmxcsr) {
+  U4 x = _mm_getcsr();
+  EXPECT_NOT_POISONED(x);
+
+  _mm_setcsr(x);
+
+  __msan_poison(&x, sizeof(x));
+  U4 origin = __LINE__;
+  __msan_set_origin(&x, sizeof(x), origin);
+  EXPECT_UMR_O(_mm_setcsr(x), origin);
+}
 #endif
 
 // Volatile bitfield store is implemented as load-mask-store
diff --git a/lib/profile/InstrProfilingFile.c b/lib/profile/InstrProfilingFile.c
index f82080c..cd3590e 100644
--- a/lib/profile/InstrProfilingFile.c
+++ b/lib/profile/InstrProfilingFile.c
@@ -172,6 +172,16 @@
   return 0;
 }
 
+/* Create the directory holding the file, if needed. */
+static void createProfileDir(const char *Filename) {
+  size_t Length = strlen(Filename);
+  if (lprofFindFirstDirSeparator(Filename)) {
+    char *Copy = (char *)COMPILER_RT_ALLOCA(Length + 1);
+    strncpy(Copy, Filename, Length + 1);
+    __llvm_profile_recursive_mkdir(Copy);
+  }
+}
+
 /* Open the profile data for merging. It opens the file in r+b mode with
  * file locking.  If the file has content which is compatible with the
  * current process, it also reads in the profile data in the file and merge
@@ -184,6 +194,7 @@
   FILE *ProfileFile;
   int rc;
 
+  createProfileDir(ProfileFileName);
   ProfileFile = lprofOpenFileEx(ProfileFileName);
   if (!ProfileFile)
     return NULL;
@@ -233,18 +244,13 @@
   if (!Filename)
     return;
 
-  /* Create the directory holding the file, if needed. */
-  if (lprofFindFirstDirSeparator(Filename)) {
-    char *Copy = (char *)COMPILER_RT_ALLOCA(Length + 1);
-    strncpy(Copy, Filename, Length + 1);
-    __llvm_profile_recursive_mkdir(Copy);
-  }
-
   /* By pass file truncation to allow online raw profile
    * merging. */
   if (lprofCurFilename.MergePoolSize)
     return;
 
+  createProfileDir(Filename);
+
   /* Truncate the file.  Later we'll reopen and append. */
   File = fopen(Filename, "w");
   if (!File)
diff --git a/lib/sanitizer_common/CMakeLists.txt b/lib/sanitizer_common/CMakeLists.txt
index 007d93c..6cdc918 100644
--- a/lib/sanitizer_common/CMakeLists.txt
+++ b/lib/sanitizer_common/CMakeLists.txt
@@ -25,6 +25,7 @@
   sanitizer_stackdepot.cc
   sanitizer_stacktrace.cc
   sanitizer_stacktrace_printer.cc
+  sanitizer_stoptheworld_mac.cc
   sanitizer_suppressions.cc
   sanitizer_symbolizer.cc
   sanitizer_symbolizer_libbacktrace.cc
@@ -52,8 +53,11 @@
 
 set(SANITIZER_LIBCDEP_SOURCES
   sanitizer_common_libcdep.cc
+  sancov_flags.cc
   sanitizer_coverage_libcdep.cc
+  sanitizer_coverage_libcdep_new.cc
   sanitizer_coverage_mapping_libcdep.cc
+  sanitizer_coverage_win_sections.cc
   sanitizer_linux_libcdep.cc
   sanitizer_posix_libcdep.cc
   sanitizer_stacktrace_libcdep.cc
@@ -124,7 +128,10 @@
   sanitizer_syscall_generic.inc
   sanitizer_syscall_linux_x86_64.inc
   sanitizer_syscall_linux_aarch64.inc
-  sanitizer_thread_registry.h)
+  sanitizer_thread_registry.h
+  sanitizer_win.h)
+
+include_directories(..)
 
 set(SANITIZER_COMMON_DEFINITIONS)
 
@@ -182,6 +189,55 @@
   CFLAGS ${SANITIZER_CFLAGS}
   DEFS ${SANITIZER_COMMON_DEFINITIONS})
 
+if(WIN32)
+  add_compiler_rt_object_libraries(SanitizerCommonWeakInterception
+    ${SANITIZER_COMMON_SUPPORTED_OS}
+    ARCHS ${SANITIZER_COMMON_SUPPORTED_ARCH}
+    SOURCES sanitizer_win_weak_interception.cc
+    CFLAGS ${SANITIZER_CFLAGS} -DSANITIZER_DYNAMIC
+    DEFS ${SANITIZER_COMMON_DEFINITIONS})
+  add_compiler_rt_object_libraries(SancovWeakInterception
+    ${SANITIZER_COMMON_SUPPORTED_OS}
+    ARCHS ${SANITIZER_COMMON_SUPPORTED_ARCH}
+    SOURCES sanitizer_coverage_win_weak_interception.cc
+    CFLAGS ${SANITIZER_CFLAGS} -DSANITIZER_DYNAMIC
+    DEFS ${SANITIZER_COMMON_DEFINITIONS})
+
+  add_compiler_rt_object_libraries(SanitizerCommonDllThunk
+    ${SANITIZER_COMMON_SUPPORTED_OS}
+    ARCHS ${SANITIZER_COMMON_SUPPORTED_ARCH}
+    SOURCES sanitizer_win_dll_thunk.cc
+    CFLAGS ${SANITIZER_CFLAGS} -DSANITIZER_DLL_THUNK
+    DEFS ${SANITIZER_COMMON_DEFINITIONS})
+  add_compiler_rt_object_libraries(SancovDllThunk
+    ${SANITIZER_COMMON_SUPPORTED_OS}
+    ARCHS ${SANITIZER_COMMON_SUPPORTED_ARCH}
+    SOURCES sanitizer_coverage_win_dll_thunk.cc
+            sanitizer_coverage_win_sections.cc
+    CFLAGS ${SANITIZER_CFLAGS} -DSANITIZER_DLL_THUNK
+    DEFS ${SANITIZER_COMMON_DEFINITIONS})
+
+  set(DYNAMIC_RUNTIME_THUNK_CFLAGS "-DSANITIZER_DYNAMIC_RUNTIME_THUNK")
+  if(MSVC)
+    list(APPEND DYNAMIC_RUNTIME_THUNK_CFLAGS "-Zl")
+  elseif(CMAKE_C_COMPILER_ID MATCHES Clang)
+    list(APPEND DYNAMIC_RUNTIME_THUNK_CFLAGS "-nodefaultlibs")
+  endif()
+  add_compiler_rt_object_libraries(SanitizerCommonDynamicRuntimeThunk
+    ${SANITIZER_COMMON_SUPPORTED_OS}
+    ARCHS ${SANITIZER_COMMON_SUPPORTED_ARCH}
+    SOURCES sanitizer_win_dynamic_runtime_thunk.cc
+    CFLAGS ${SANITIZER_CFLAGS} ${DYNAMIC_RUNTIME_THUNK_CFLAGS}
+    DEFS ${SANITIZER_COMMON_DEFINITIONS})
+  add_compiler_rt_object_libraries(SancovDynamicRuntimeThunk
+    ${SANITIZER_COMMON_SUPPORTED_OS}
+    ARCHS ${SANITIZER_COMMON_SUPPORTED_ARCH}
+    SOURCES sanitizer_coverage_win_dynamic_runtime_thunk.cc
+            sanitizer_coverage_win_sections.cc
+    CFLAGS ${SANITIZER_CFLAGS} ${DYNAMIC_RUNTIME_THUNK_CFLAGS}
+    DEFS ${SANITIZER_COMMON_DEFINITIONS})
+endif()
+
 # Unit tests for common sanitizer runtime.
 if(COMPILER_RT_INCLUDE_TESTS)
   add_subdirectory(tests)
diff --git a/lib/sanitizer_common/sancov_flags.cc b/lib/sanitizer_common/sancov_flags.cc
new file mode 100644
index 0000000..9abb5b5
--- /dev/null
+++ b/lib/sanitizer_common/sancov_flags.cc
@@ -0,0 +1,59 @@
+//===-- sancov_flags.cc -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Sanitizer Coverage runtime flags.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sancov_flags.h"
+#include "sanitizer_flag_parser.h"
+#include "sanitizer_platform.h"
+
+SANITIZER_INTERFACE_WEAK_DEF(const char*, __sancov_default_options, void) {
+  return "";
+}
+
+using namespace __sanitizer;
+
+namespace __sancov {
+
+SancovFlags sancov_flags_dont_use_directly;  // use via flags();
+
+void SancovFlags::SetDefaults() {
+#define SANCOV_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "sancov_flags.inc"
+#undef SANCOV_FLAG
+}
+
+static void RegisterSancovFlags(FlagParser *parser, SancovFlags *f) {
+#define SANCOV_FLAG(Type, Name, DefaultValue, Description) \
+  RegisterFlag(parser, #Name, Description, &f->Name);
+#include "sancov_flags.inc"
+#undef SANCOV_FLAG
+}
+
+static const char *MaybeCallSancovDefaultOptions() {
+  return (&__sancov_default_options) ? __sancov_default_options() : "";
+}
+
+void InitializeSancovFlags() {
+  SancovFlags *f = sancov_flags();
+  f->SetDefaults();
+
+  FlagParser parser;
+  RegisterSancovFlags(&parser, f);
+
+  parser.ParseString(MaybeCallSancovDefaultOptions());
+  parser.ParseString(GetEnv("SANCOV_OPTIONS"));
+
+  ReportUnrecognizedFlags();
+  if (f->help) parser.PrintFlagDescriptions();
+}
+
+}  // namespace __sancov
diff --git a/lib/sanitizer_common/sancov_flags.h b/lib/sanitizer_common/sancov_flags.h
new file mode 100644
index 0000000..627d9a3
--- /dev/null
+++ b/lib/sanitizer_common/sancov_flags.h
@@ -0,0 +1,40 @@
+//===-- sancov_flags.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Sanitizer Coverage runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANCOV_FLAGS_H
+#define SANCOV_FLAGS_H
+
+#include "sanitizer_flag_parser.h"
+#include "sanitizer_internal_defs.h"
+
+namespace __sancov {
+
+struct SancovFlags {
+#define SANCOV_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "sancov_flags.inc"
+#undef SANCOV_FLAG
+
+  void SetDefaults();
+};
+
+extern SancovFlags sancov_flags_dont_use_directly;
+
+inline SancovFlags* sancov_flags() { return &sancov_flags_dont_use_directly; }
+
+void InitializeSancovFlags();
+
+}  // namespace __sancov
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE const char*
+__sancov_default_options();
+
+#endif
diff --git a/lib/sanitizer_common/sancov_flags.inc b/lib/sanitizer_common/sancov_flags.inc
new file mode 100644
index 0000000..63a1f0c
--- /dev/null
+++ b/lib/sanitizer_common/sancov_flags.inc
@@ -0,0 +1,21 @@
+//===-- sancov_flags.inc ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Sanitizer Coverage runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANCOV_FLAG
+#error "Defnine SANCOV_FLAG prior to including this file!"
+#endif
+
+SANCOV_FLAG(bool, symbolize, true,
+            "If set, converage information will be symbolized by sancov tool "
+            "after dumping.")
+
+SANCOV_FLAG(bool, help, false, "Print flags help.")
diff --git a/lib/sanitizer_common/sanitizer_allocator_combined.h b/lib/sanitizer_common/sanitizer_allocator_combined.h
index de96e27..19e1ae9 100644
--- a/lib/sanitizer_common/sanitizer_allocator_combined.h
+++ b/lib/sanitizer_common/sanitizer_allocator_combined.h
@@ -49,16 +49,30 @@
       size = 1;
     if (size + alignment < size) return ReturnNullOrDieOnBadRequest();
     if (check_rss_limit && RssLimitIsExceeded()) return ReturnNullOrDieOnOOM();
+    uptr original_size = size;
+    // If alignment requirements are to be fulfilled by the frontend allocator
+    // rather than by the primary or secondary, passing an alignment lower than
+    // or equal to 8 will prevent any further rounding up, as well as the later
+    // alignment check.
     if (alignment > 8)
       size = RoundUpTo(size, alignment);
     void *res;
     bool from_primary = primary_.CanAllocate(size, alignment);
+    // The primary allocator should return a 2^x aligned allocation when
+    // requested 2^x bytes, hence using the rounded up 'size' when being
+    // serviced by the primary (this is no longer true when the primary is
+    // using a non-fixed base address). The secondary takes care of the
+    // alignment without such requirement, and allocating 'size' would use
+    // extraneous memory, so we employ 'original_size'.
     if (from_primary)
       res = cache->Allocate(&primary_, primary_.ClassID(size));
     else
-      res = secondary_.Allocate(&stats_, size, alignment);
+      res = secondary_.Allocate(&stats_, original_size, alignment);
     if (alignment > 8)
       CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0);
+    // When serviced by the secondary, the chunk comes from a mmap allocation
+    // and will be zero'd out anyway. We only need to clear our the chunk if
+    // it was serviced by the primary, hence using the rounded up 'size'.
     if (cleared && res && from_primary)
       internal_bzero_aligned16(res, RoundUpTo(size, 16));
     return res;
diff --git a/lib/sanitizer_common/sanitizer_allocator_interface.h b/lib/sanitizer_common/sanitizer_allocator_interface.h
index 5ff6edb..74ee903 100644
--- a/lib/sanitizer_common/sanitizer_allocator_interface.h
+++ b/lib/sanitizer_common/sanitizer_allocator_interface.h
@@ -34,9 +34,9 @@
     void (*free_hook)(const void *));
 
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-    /* OPTIONAL */ void __sanitizer_malloc_hook(void *ptr, uptr size);
+    void __sanitizer_malloc_hook(void *ptr, uptr size);
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-    /* OPTIONAL */ void __sanitizer_free_hook(void *ptr);
+    void __sanitizer_free_hook(void *ptr);
 
 
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
diff --git a/lib/sanitizer_common/sanitizer_allocator_primary64.h b/lib/sanitizer_common/sanitizer_allocator_primary64.h
index f2d94a0..035d92b 100644
--- a/lib/sanitizer_common/sanitizer_allocator_primary64.h
+++ b/lib/sanitizer_common/sanitizer_allocator_primary64.h
@@ -227,9 +227,8 @@
     uptr in_use = region->n_allocated - region->n_freed;
     uptr avail_chunks = region->allocated_user / ClassIdToSize(class_id);
     Printf(
-        "  %02zd (%zd): mapped: %zdK allocs: %zd frees: %zd inuse: %zd "
-        "num_freed_chunks %zd"
-        " avail: %zd rss: %zdK releases: %zd\n",
+        "  %02zd (%6zd): mapped: %6zdK allocs: %7zd frees: %7zd inuse: %6zd "
+        "num_freed_chunks %7zd avail: %6zd rss: %6zdK releases: %6zd\n",
         class_id, ClassIdToSize(class_id), region->mapped_user >> 10,
         region->n_allocated, region->n_freed, in_use,
         region->num_freed_chunks, avail_chunks, rss >> 10,
diff --git a/lib/sanitizer_common/sanitizer_common.cc b/lib/sanitizer_common/sanitizer_common.cc
index 1c6fc3e..9078a90 100644
--- a/lib/sanitizer_common/sanitizer_common.cc
+++ b/lib/sanitizer_common/sanitizer_common.cc
@@ -260,17 +260,22 @@
 }
 
 void LoadedModule::set(const char *module_name, uptr base_address,
-                       ModuleArch arch, u8 uuid[kModuleUUIDSize]) {
+                       ModuleArch arch, u8 uuid[kModuleUUIDSize],
+                       bool instrumented) {
   set(module_name, base_address);
   arch_ = arch;
   internal_memcpy(uuid_, uuid, sizeof(uuid_));
+  instrumented_ = instrumented;
 }
 
 void LoadedModule::clear() {
   InternalFree(full_name_);
+  base_address_ = 0;
+  max_executable_address_ = 0;
   full_name_ = nullptr;
   arch_ = kModuleArchUnknown;
   internal_memset(uuid_, 0, kModuleUUIDSize);
+  instrumented_ = false;
   while (!ranges_.empty()) {
     AddressRange *r = ranges_.front();
     ranges_.pop_front();
@@ -282,6 +287,8 @@
   void *mem = InternalAlloc(sizeof(AddressRange));
   AddressRange *r = new(mem) AddressRange(beg, end, executable);
   ranges_.push_back(r);
+  if (executable && end > max_executable_address_)
+    max_executable_address_ = end;
 }
 
 bool LoadedModule::containsAddress(uptr address) const {
@@ -482,7 +489,8 @@
   report_file.fd_pid = internal_getpid();
 }
 
-void __sanitizer_report_error_summary(const char *error_summary) {
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_report_error_summary,
+                             const char *error_summary) {
   Printf("%s\n", error_summary);
 }
 
@@ -498,9 +506,9 @@
   return InstallMallocFreeHooks(malloc_hook, free_hook);
 }
 
-#if !SANITIZER_GO && !SANITIZER_SUPPORTS_WEAK_HOOKS
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_print_memory_profile(int top_percent) {
+#if !SANITIZER_GO
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_print_memory_profile,
+                             int top_percent) {
   (void)top_percent;
 }
 #endif
diff --git a/lib/sanitizer_common/sanitizer_common.h b/lib/sanitizer_common/sanitizer_common.h
index 57ed35b..aa4d9e5 100644
--- a/lib/sanitizer_common/sanitizer_common.h
+++ b/lib/sanitizer_common/sanitizer_common.h
@@ -283,6 +283,7 @@
 void CacheBinaryName();
 void DisableCoreDumperIfNecessary();
 void DumpProcessMap();
+void PrintModuleMap();
 bool FileExists(const char *filename);
 const char *GetEnv(const char *name);
 bool SetEnv(const char *name, const char *value);
@@ -381,6 +382,7 @@
 typedef void (*SignalHandlerType)(int, void *, void *);
 bool IsHandledDeadlySignal(int signum);
 void InstallDeadlySignalHandlers(SignalHandlerType handler);
+const char *DescribeSignalOrException(int signo);
 // Alternative signal stack (POSIX-only).
 void SetAlternateSignalStack();
 void UnsetAlternateSignalStack();
@@ -547,6 +549,13 @@
   uptr capacity() const {
     return capacity_;
   }
+  void resize(uptr new_size) {
+    Resize(new_size);
+    if (new_size > size_) {
+      internal_memset(&data_[size_], 0, sizeof(T) * (new_size - size_));
+    }
+    size_ = new_size;
+  }
 
   void clear() { size_ = 0; }
   bool empty() const { return size() == 0; }
@@ -658,6 +667,33 @@
   kModuleArchARM64
 };
 
+// When adding a new architecture, don't forget to also update
+// script/asan_symbolize.py and sanitizer_symbolizer_libcdep.cc.
+inline const char *ModuleArchToString(ModuleArch arch) {
+  switch (arch) {
+    case kModuleArchUnknown:
+      return "";
+    case kModuleArchI386:
+      return "i386";
+    case kModuleArchX86_64:
+      return "x86_64";
+    case kModuleArchX86_64H:
+      return "x86_64h";
+    case kModuleArchARMV6:
+      return "armv6";
+    case kModuleArchARMV7:
+      return "armv7";
+    case kModuleArchARMV7S:
+      return "armv7s";
+    case kModuleArchARMV7K:
+      return "armv7k";
+    case kModuleArchARM64:
+      return "arm64";
+  }
+  CHECK(0 && "Invalid module arch");
+  return "";
+}
+
 const uptr kModuleUUIDSize = 16;
 
 // Represents a binary loaded into virtual memory (e.g. this can be an
@@ -665,21 +701,27 @@
 class LoadedModule {
  public:
   LoadedModule()
-      : full_name_(nullptr), base_address_(0), arch_(kModuleArchUnknown) {
+      : full_name_(nullptr),
+        base_address_(0),
+        max_executable_address_(0),
+        arch_(kModuleArchUnknown),
+        instrumented_(false) {
     internal_memset(uuid_, 0, kModuleUUIDSize);
     ranges_.clear();
   }
   void set(const char *module_name, uptr base_address);
   void set(const char *module_name, uptr base_address, ModuleArch arch,
-           u8 uuid[kModuleUUIDSize]);
+           u8 uuid[kModuleUUIDSize], bool instrumented);
   void clear();
   void addAddressRange(uptr beg, uptr end, bool executable);
   bool containsAddress(uptr address) const;
 
   const char *full_name() const { return full_name_; }
   uptr base_address() const { return base_address_; }
+  uptr max_executable_address() const { return max_executable_address_; }
   ModuleArch arch() const { return arch_; }
   const u8 *uuid() const { return uuid_; }
+  bool instrumented() const { return instrumented_; }
 
   struct AddressRange {
     AddressRange *next;
@@ -696,8 +738,10 @@
  private:
   char *full_name_;  // Owned.
   uptr base_address_;
+  uptr max_executable_address_;
   ModuleArch arch_;
   u8 uuid_[kModuleUUIDSize];
+  bool instrumented_;
   IntrusiveList<AddressRange> ranges_;
 };
 
@@ -867,6 +911,8 @@
 // indicate that sanitizer allocator should not attempt to release memory to OS.
 const s32 kReleaseToOSIntervalNever = -1;
 
+void CheckNoDeepBind(const char *filename, int flag);
+
 }  // namespace __sanitizer
 
 inline void *operator new(__sanitizer::operator_new_size_type size,
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors.inc b/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 90fb434..fbffb56 100644
--- a/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -30,6 +30,9 @@
 //   COMMON_INTERCEPTOR_SET_PTHREAD_NAME
 //   COMMON_INTERCEPTOR_HANDLE_RECVMSG
 //   COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED
+//   COMMON_INTERCEPTOR_MEMSET_IMPL
+//   COMMON_INTERCEPTOR_MEMMOVE_IMPL
+//   COMMON_INTERCEPTOR_MEMCPY_IMPL
 //===----------------------------------------------------------------------===//
 
 #include "interception/interception.h"
@@ -41,15 +44,9 @@
 #include <stdarg.h>
 
 #if SANITIZER_INTERCEPTOR_HOOKS
-#define CALL_WEAK_INTERCEPTOR_HOOK(f, ...)                                     \
-  do {                                                                         \
-    if (f)                                                                     \
-      f(__VA_ARGS__);                                                          \
-  } while (false);
-#define DECLARE_WEAK_INTERCEPTOR_HOOK(f, ...)                                  \
-  extern "C" {                                                                 \
-  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void f(__VA_ARGS__);  \
-  } // extern "C"
+#define CALL_WEAK_INTERCEPTOR_HOOK(f, ...) f(__VA_ARGS__);
+#define DECLARE_WEAK_INTERCEPTOR_HOOK(f, ...) \
+  SANITIZER_INTERFACE_WEAK_DEF(void, f, __VA_ARGS__) {}
 #else
 #define DECLARE_WEAK_INTERCEPTOR_HOOK(f, ...)
 #define CALL_WEAK_INTERCEPTOR_HOOK(f, ...)
@@ -67,6 +64,19 @@
 #define iconv __bsd_iconv
 #endif
 
+// Platform-specific options.
+#if SANITIZER_MAC
+namespace __sanitizer {
+bool PlatformHasDifferentMemcpyAndMemmove();
+}
+#define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE \
+  (__sanitizer::PlatformHasDifferentMemcpyAndMemmove())
+#elif SANITIZER_WINDOWS64
+#define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE false
+#else
+#define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE true
+#endif  // SANITIZER_MAC
+
 #ifndef COMMON_INTERCEPTOR_INITIALIZE_RANGE
 #define COMMON_INTERCEPTOR_INITIALIZE_RANGE(p, size) {}
 #endif
@@ -132,7 +142,8 @@
     COMMON_INTERCEPTOR_READ_STRING_OF_LEN((ctx), (s), REAL(strlen)(s), (n))
 
 #ifndef COMMON_INTERCEPTOR_ON_DLOPEN
-#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag) {}
+#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag) \
+  CheckNoDeepBind(filename, flag);
 #endif
 
 #ifndef COMMON_INTERCEPTOR_GET_TLS_RANGE
@@ -163,6 +174,47 @@
     COMMON_INTERCEPT_FUNCTION(fn)
 #endif
 
+#ifndef COMMON_INTERCEPTOR_MEMSET_IMPL
+#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size) \
+  {                                                       \
+    if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)        \
+      return internal_memset(dst, v, size);               \
+    COMMON_INTERCEPTOR_ENTER(ctx, memset, dst, v, size);  \
+    if (common_flags()->intercept_intrin)                 \
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);     \
+    return REAL(memset)(dst, v, size);                    \
+  }
+#endif
+
+#ifndef COMMON_INTERCEPTOR_MEMMOVE_IMPL
+#define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size) \
+  {                                                          \
+    if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)           \
+      return internal_memmove(dst, src, size);               \
+    COMMON_INTERCEPTOR_ENTER(ctx, memmove, dst, src, size);  \
+    if (common_flags()->intercept_intrin) {                  \
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);        \
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size);         \
+    }                                                        \
+    return REAL(memmove)(dst, src, size);                    \
+  }
+#endif
+
+#ifndef COMMON_INTERCEPTOR_MEMCPY_IMPL
+#define COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, dst, src, size) \
+  {                                                         \
+    if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) {        \
+      return internal_memmove(dst, src, size);              \
+    }                                                       \
+    COMMON_INTERCEPTOR_ENTER(ctx, memcpy, dst, src, size);  \
+    if (common_flags()->intercept_intrin) {                 \
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);       \
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size);        \
+    }                                                       \
+    return REAL(memcpy)(dst, src, size);                    \
+  }
+#endif
+
 struct FileMetadata {
   // For open_memstream().
   char **addr;
@@ -565,14 +617,9 @@
 #endif
 
 #if SANITIZER_INTERCEPT_MEMSET
-INTERCEPTOR(void*, memset, void *dst, int v, uptr size) {
-  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
-    return internal_memset(dst, v, size);
+INTERCEPTOR(void *, memset, void *dst, int v, uptr size) {
   void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, memset, dst, v, size);
-  if (common_flags()->intercept_intrin)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);
-  return REAL(memset)(dst, v, size);
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size);
 }
 
 #define INIT_MEMSET COMMON_INTERCEPT_FUNCTION(memset)
@@ -581,16 +628,9 @@
 #endif
 
 #if SANITIZER_INTERCEPT_MEMMOVE
-INTERCEPTOR(void*, memmove, void *dst, const void *src, uptr size) {
-  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
-    return internal_memmove(dst, src, size);
+INTERCEPTOR(void *, memmove, void *dst, const void *src, uptr size) {
   void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, memmove, dst, src, size);
-  if (common_flags()->intercept_intrin) {
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size);
-  }
-  return REAL(memmove)(dst, src, size);
+  COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size);
 }
 
 #define INIT_MEMMOVE COMMON_INTERCEPT_FUNCTION(memmove)
@@ -599,25 +639,30 @@
 #endif
 
 #if SANITIZER_INTERCEPT_MEMCPY
-INTERCEPTOR(void*, memcpy, void *dst, const void *src, uptr size) {
-  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) {
-    // On OS X, calling internal_memcpy here will cause memory corruptions,
-    // because memcpy and memmove are actually aliases of the same
-    // implementation.  We need to use internal_memmove here.
-    return internal_memmove(dst, src, size);
-  }
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, memcpy, dst, src, size);
-  if (common_flags()->intercept_intrin) {
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size);
-  }
+INTERCEPTOR(void *, memcpy, void *dst, const void *src, uptr size) {
+  // On OS X, calling internal_memcpy here will cause memory corruptions,
+  // because memcpy and memmove are actually aliases of the same
+  // implementation.  We need to use internal_memmove here.
   // N.B.: If we switch this to internal_ we'll have to use internal_memmove
   // due to memcpy being an alias of memmove on OS X.
-  return REAL(memcpy)(dst, src, size);
+  void *ctx;
+  if (PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE) {
+    COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, dst, src, size);
+  } else {
+    COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size);
+  }
 }
 
-#define INIT_MEMCPY COMMON_INTERCEPT_FUNCTION(memcpy)
+#define INIT_MEMCPY                                  \
+  do {                                               \
+    if (PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE) { \
+      COMMON_INTERCEPT_FUNCTION(memcpy);             \
+    } else {                                         \
+      ASSIGN_REAL(memcpy, memmove);                  \
+    }                                                \
+    CHECK(REAL(memcpy));                             \
+  } while (false)
+
 #else
 #define INIT_MEMCPY
 #endif
@@ -3201,6 +3246,30 @@
 #endif
 
 #if SANITIZER_INTERCEPT_STRERROR_R
+// There are 2 versions of strerror_r:
+//  * POSIX version returns 0 on success, negative error code on failure,
+//    writes message to buf.
+//  * GNU version returns message pointer, which points to either buf or some
+//    static storage.
+#if ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE) || \
+    SANITIZER_MAC
+// POSIX version. Spec is not clear on whether buf is NULL-terminated.
+// At least on OSX, buf contents are valid even when the call fails.
+INTERCEPTOR(int, strerror_r, int errnum, char *buf, SIZE_T buflen) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, strerror_r, errnum, buf, buflen);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://github.com/google/sanitizers/issues/321.
+  int res = REAL(strerror_r)(errnum, buf, buflen);
+
+  SIZE_T sz = internal_strnlen(buf, buflen);
+  if (sz < buflen) ++sz;
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, sz);
+  return res;
+}
+#else
+// GNU version.
 INTERCEPTOR(char *, strerror_r, int errnum, char *buf, SIZE_T buflen) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, strerror_r, errnum, buf, buflen);
@@ -3208,24 +3277,11 @@
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
   char *res = REAL(strerror_r)(errnum, buf, buflen);
-  // There are 2 versions of strerror_r:
-  //  * POSIX version returns 0 on success, negative error code on failure,
-  //    writes message to buf.
-  //  * GNU version returns message pointer, which points to either buf or some
-  //    static storage.
-  SIZE_T posix_res = (SIZE_T)res;
-  if (posix_res < 1024 || posix_res > (SIZE_T) - 1024) {
-    // POSIX version. Spec is not clear on whether buf is NULL-terminated.
-    // At least on OSX, buf contents are valid even when the call fails.
-    SIZE_T sz = internal_strnlen(buf, buflen);
-    if (sz < buflen) ++sz;
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, sz);
-  } else {
-    // GNU version.
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
-  }
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
   return res;
 }
+#endif //(_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE ||
+       //SANITIZER_MAC
 #define INIT_STRERROR_R COMMON_INTERCEPT_FUNCTION(strerror_r);
 #else
 #define INIT_STRERROR_R
@@ -4564,11 +4620,15 @@
 //   descriptor offset as an argument instead of a pointer.  GOT address
 //   is passed in r12, so it's necessary to write it in assembly.  This is
 //   the function used by the compiler.
-#define INIT_TLS_GET_ADDR COMMON_INTERCEPT_FUNCTION(__tls_get_addr_internal)
+extern "C" uptr __tls_get_offset_wrapper(void *arg, uptr (*fn)(void *arg));
+#define INIT_TLS_GET_ADDR COMMON_INTERCEPT_FUNCTION(__tls_get_offset)
+DEFINE_REAL(uptr, __tls_get_offset, void *arg)
+extern "C" uptr __tls_get_offset(void *arg);
+extern "C" uptr __interceptor___tls_get_offset(void *arg);
 INTERCEPTOR(uptr, __tls_get_addr_internal, void *arg) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, __tls_get_addr_internal, arg);
-  uptr res = REAL(__tls_get_addr_internal)(arg);
+  uptr res = __tls_get_offset_wrapper(arg, REAL(__tls_get_offset));
   uptr tp = reinterpret_cast<uptr>(__builtin_thread_pointer());
   void *ptr = reinterpret_cast<void *>(res + tp);
   uptr tls_begin, tls_end;
@@ -4580,32 +4640,43 @@
   }
   return res;
 }
-// We need a protected symbol aliasing the above, so that we can jump
+// We need a hidden symbol aliasing the above, so that we can jump
 // directly to it from the assembly below.
 extern "C" __attribute__((alias("__interceptor___tls_get_addr_internal"),
-                          visibility("protected")))
-uptr __interceptor___tls_get_addr_internal_protected(void *arg);
+                          visibility("hidden")))
+uptr __tls_get_addr_hidden(void *arg);
 // Now carefully intercept __tls_get_offset.
 asm(
   ".text\n"
-  ".global __tls_get_offset\n"
-  "__tls_get_offset:\n"
 // The __intercept_ version has to exist, so that gen_dynamic_list.py
 // exports our symbol.
+  ".weak __tls_get_offset\n"
+  ".type __tls_get_offset, @function\n"
+  "__tls_get_offset:\n"
   ".global __interceptor___tls_get_offset\n"
+  ".type __interceptor___tls_get_offset, @function\n"
   "__interceptor___tls_get_offset:\n"
 #ifdef __s390x__
   "la %r2, 0(%r2,%r12)\n"
-  "jg __interceptor___tls_get_addr_internal_protected\n"
+  "jg __tls_get_addr_hidden\n"
 #else
   "basr %r3,0\n"
   "0: la %r2,0(%r2,%r12)\n"
   "l %r4,1f-0b(%r3)\n"
   "b 0(%r4,%r3)\n"
-  "1: .long __interceptor___tls_get_addr_internal_protected - 0b\n"
+  "1: .long __tls_get_addr_hidden - 0b\n"
 #endif
-  ".type __tls_get_offset, @function\n"
-  ".size __tls_get_offset, .-__tls_get_offset\n"
+  ".size __interceptor___tls_get_offset, .-__interceptor___tls_get_offset\n"
+// Assembly wrapper to call REAL(__tls_get_offset)(arg)
+  ".type __tls_get_offset_wrapper, @function\n"
+  "__tls_get_offset_wrapper:\n"
+#ifdef __s390x__
+  "sgr %r2,%r12\n"
+#else
+  "sr %r2,%r12\n"
+#endif
+  "br %r3\n"
+  ".size __tls_get_offset_wrapper, .-__tls_get_offset_wrapper\n"
 );
 #endif // SANITIZER_S390
 #else
@@ -4837,47 +4908,67 @@
 #endif
 
 #if SANITIZER_INTERCEPT_AEABI_MEM
-DECLARE_REAL_AND_INTERCEPTOR(void *, memmove, void *, const void *, uptr)
-DECLARE_REAL_AND_INTERCEPTOR(void *, memcpy, void *, const void *, uptr)
-DECLARE_REAL_AND_INTERCEPTOR(void *, memset, void *, int, uptr)
-
 INTERCEPTOR(void *, __aeabi_memmove, void *to, const void *from, uptr size) {
-  return WRAP(memmove)(to, from, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memmove4, void *to, const void *from, uptr size) {
-  return WRAP(memmove)(to, from, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memmove8, void *to, const void *from, uptr size) {
-  return WRAP(memmove)(to, from, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memcpy, void *to, const void *from, uptr size) {
-  return WRAP(memcpy)(to, from, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memcpy4, void *to, const void *from, uptr size) {
-  return WRAP(memcpy)(to, from, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memcpy8, void *to, const void *from, uptr size) {
-  return WRAP(memcpy)(to, from, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
 }
+
 // Note the argument order.
 INTERCEPTOR(void *, __aeabi_memset, void *block, uptr size, int c) {
-  return WRAP(memset)(block, c, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memset4, void *block, uptr size, int c) {
-  return WRAP(memset)(block, c, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memset8, void *block, uptr size, int c) {
-  return WRAP(memset)(block, c, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memclr, void *block, uptr size) {
-  return WRAP(memset)(block, 0, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memclr4, void *block, uptr size) {
-  return WRAP(memset)(block, 0, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memclr8, void *block, uptr size) {
-  return WRAP(memset)(block, 0, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
+
 #define INIT_AEABI_MEM                         \
   COMMON_INTERCEPT_FUNCTION(__aeabi_memmove);  \
   COMMON_INTERCEPT_FUNCTION(__aeabi_memmove4); \
@@ -4896,11 +4987,11 @@
 #endif  // SANITIZER_INTERCEPT_AEABI_MEM
 
 #if SANITIZER_INTERCEPT___BZERO
-DECLARE_REAL_AND_INTERCEPTOR(void *, memset, void *, int, uptr);
-
 INTERCEPTOR(void *, __bzero, void *block, uptr size) {
-  return WRAP(memset)(block, 0, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
+
 #define INIT___BZERO COMMON_INTERCEPT_FUNCTION(__bzero);
 #else
 #define INIT___BZERO
@@ -5890,6 +5981,72 @@
 
 // FIXME: add other *stat interceptor
 
+#if SANITIZER_INTERCEPT_UTMP
+INTERCEPTOR(void *, getutent, int dummy) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getutent, dummy);
+  void *res = REAL(getutent)(dummy);
+  if (res)
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmp_sz);
+  return res;
+}
+INTERCEPTOR(void *, getutid, void *ut) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getutid, ut);
+  void *res = REAL(getutid)(ut);
+  if (res)
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmp_sz);
+  return res;
+}
+INTERCEPTOR(void *, getutline, void *ut) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getutline, ut);
+  void *res = REAL(getutline)(ut);
+  if (res)
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmp_sz);
+  return res;
+}
+#define INIT_UTMP                      \
+  COMMON_INTERCEPT_FUNCTION(getutent); \
+  COMMON_INTERCEPT_FUNCTION(getutid);  \
+  COMMON_INTERCEPT_FUNCTION(getutline);
+#else
+#define INIT_UTMP
+#endif
+
+#if SANITIZER_INTERCEPT_UTMPX
+INTERCEPTOR(void *, getutxent, int dummy) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getutxent, dummy);
+  void *res = REAL(getutxent)(dummy);
+  if (res)
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmpx_sz);
+  return res;
+}
+INTERCEPTOR(void *, getutxid, void *ut) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getutxid, ut);
+  void *res = REAL(getutxid)(ut);
+  if (res)
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmpx_sz);
+  return res;
+}
+INTERCEPTOR(void *, getutxline, void *ut) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getutxline, ut);
+  void *res = REAL(getutxline)(ut);
+  if (res)
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmpx_sz);
+  return res;
+}
+#define INIT_UTMPX                      \
+  COMMON_INTERCEPT_FUNCTION(getutxent); \
+  COMMON_INTERCEPT_FUNCTION(getutxid);  \
+  COMMON_INTERCEPT_FUNCTION(getutxline);
+#else
+#define INIT_UTMPX
+#endif
+
 static void InitializeCommonInterceptors() {
   static u64 metadata_mem[sizeof(MetadataHashMap) / sizeof(u64) + 1];
   interceptor_metadata_map = new((void *)&metadata_mem) MetadataHashMap();
@@ -6086,4 +6243,6 @@
   INIT___LXSTAT;
   INIT___LXSTAT64;
   // FIXME: add other *stat interceptors.
+  INIT_UTMP;
+  INIT_UTMPX;
 }
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors_format.inc b/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
index 92318cd..1256349 100644
--- a/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
+++ b/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
@@ -435,10 +435,6 @@
 }
 
 static int printf_get_value_size(PrintfDirective *dir) {
-  if (dir->convSpecifier == 'm') {
-    return sizeof(char *);
-  }
-
   if (char_is_one_of(dir->convSpecifier, "cCsS")) {
     unsigned charSize =
         format_get_char_size(dir->convSpecifier, dir->lengthModifier);
@@ -519,6 +515,9 @@
       // Dynamic precision
       SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
     }
+    // %m does not require an argument: strlen(errno).
+    if (dir.convSpecifier == 'm')
+      continue;
     int size = printf_get_value_size(&dir);
     if (size == FSS_INVALID) {
       Report("WARNING: unexpected format specifier in printf "
diff --git a/lib/sanitizer_common/sanitizer_common_interface.inc b/lib/sanitizer_common/sanitizer_common_interface.inc
new file mode 100644
index 0000000..4f0e940
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_common_interface.inc
@@ -0,0 +1,39 @@
+//===-- sanitizer_common_interface.inc ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Sanitizer Common interface list.
+//===----------------------------------------------------------------------===//
+INTERFACE_FUNCTION(__sanitizer_annotate_contiguous_container)
+INTERFACE_FUNCTION(__sanitizer_contiguous_container_find_bad_address)
+INTERFACE_FUNCTION(__sanitizer_set_death_callback)
+INTERFACE_FUNCTION(__sanitizer_set_report_path)
+INTERFACE_FUNCTION(__sanitizer_set_report_fd)
+INTERFACE_FUNCTION(__sanitizer_verify_contiguous_container)
+INTERFACE_WEAK_FUNCTION(__sanitizer_report_error_summary)
+INTERFACE_WEAK_FUNCTION(__sanitizer_sandbox_on_notify)
+// Sanitizer weak hooks
+INTERFACE_WEAK_FUNCTION(__sanitizer_weak_hook_memcmp)
+INTERFACE_WEAK_FUNCTION(__sanitizer_weak_hook_strcmp)
+INTERFACE_WEAK_FUNCTION(__sanitizer_weak_hook_strncmp)
+INTERFACE_WEAK_FUNCTION(__sanitizer_weak_hook_strstr)
+// Stacktrace interface.
+INTERFACE_FUNCTION(__sanitizer_get_module_and_offset_for_pc)
+INTERFACE_FUNCTION(__sanitizer_symbolize_global)
+INTERFACE_FUNCTION(__sanitizer_symbolize_pc)
+// Allocator interface.
+INTERFACE_FUNCTION(__sanitizer_get_allocated_size)
+INTERFACE_FUNCTION(__sanitizer_get_current_allocated_bytes)
+INTERFACE_FUNCTION(__sanitizer_get_estimated_allocated_size)
+INTERFACE_FUNCTION(__sanitizer_get_free_bytes)
+INTERFACE_FUNCTION(__sanitizer_get_heap_size)
+INTERFACE_FUNCTION(__sanitizer_get_ownership)
+INTERFACE_FUNCTION(__sanitizer_get_unmapped_bytes)
+INTERFACE_FUNCTION(__sanitizer_install_malloc_and_free_hooks)
+INTERFACE_WEAK_FUNCTION(__sanitizer_free_hook)
+INTERFACE_WEAK_FUNCTION(__sanitizer_malloc_hook)
+INTERFACE_WEAK_FUNCTION(__sanitizer_print_memory_profile)
diff --git a/lib/sanitizer_common/sanitizer_common_interface_posix.inc b/lib/sanitizer_common/sanitizer_common_interface_posix.inc
new file mode 100644
index 0000000..bbc725a
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_common_interface_posix.inc
@@ -0,0 +1,14 @@
+//===-- sanitizer_common_interface_posix.inc ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Sanitizer Common interface list only available for Posix systems.
+//===----------------------------------------------------------------------===//
+INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_code)
+INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_data)
+INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_demangle)
+INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_flush)
diff --git a/lib/sanitizer_common/sanitizer_common_libcdep.cc b/lib/sanitizer_common/sanitizer_common_libcdep.cc
index 49ca961..e96db6d 100644
--- a/lib/sanitizer_common/sanitizer_common_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_common_libcdep.cc
@@ -162,8 +162,8 @@
 
 }  // namespace __sanitizer
 
-void NOINLINE
-__sanitizer_sandbox_on_notify(__sanitizer_sandbox_arguments *args) {
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_sandbox_on_notify,
+                             __sanitizer_sandbox_arguments *args) {
   __sanitizer::PrepareForSandboxing(args);
   if (__sanitizer::sandboxing_callback)
     __sanitizer::sandboxing_callback();
diff --git a/lib/sanitizer_common/sanitizer_coverage_interface.inc b/lib/sanitizer_common/sanitizer_coverage_interface.inc
new file mode 100644
index 0000000..ae691bd
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_coverage_interface.inc
@@ -0,0 +1,40 @@
+//===-- sanitizer_coverage_interface.inc ----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Sanitizer Coverage interface list.
+//===----------------------------------------------------------------------===//
+INTERFACE_FUNCTION(__sanitizer_cov)
+INTERFACE_FUNCTION(__sanitizer_cov_dump)
+INTERFACE_FUNCTION(__sanitizer_cov_indir_call16)
+INTERFACE_FUNCTION(__sanitizer_cov_init)
+INTERFACE_FUNCTION(__sanitizer_cov_module_init)
+INTERFACE_FUNCTION(__sanitizer_cov_trace_basic_block)
+INTERFACE_FUNCTION(__sanitizer_cov_trace_func_enter)
+INTERFACE_FUNCTION(__sanitizer_cov_with_check)
+INTERFACE_FUNCTION(__sanitizer_dump_coverage)
+INTERFACE_FUNCTION(__sanitizer_dump_trace_pc_guard_coverage)
+INTERFACE_FUNCTION(__sanitizer_get_coverage_guards)
+INTERFACE_FUNCTION(__sanitizer_get_number_of_counters)
+INTERFACE_FUNCTION(__sanitizer_get_total_unique_caller_callee_pairs)
+INTERFACE_FUNCTION(__sanitizer_get_total_unique_coverage)
+INTERFACE_FUNCTION(__sanitizer_maybe_open_cov_file)
+INTERFACE_FUNCTION(__sanitizer_reset_coverage)
+INTERFACE_FUNCTION(__sanitizer_update_counter_bitset_and_clear_counters)
+INTERFACE_WEAK_FUNCTION(__sancov_default_options)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp1)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp2)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp4)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp8)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_div4)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_div8)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_gep)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_pc_guard)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_pc_guard_init)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_pc_indir)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_switch)
diff --git a/lib/sanitizer_common/sanitizer_coverage_libcdep.cc b/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
index 0661e5f..e934af3 100644
--- a/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
@@ -171,7 +171,11 @@
   //   - not thread-safe;
   //   - does not support long traces;
   //   - not tuned for performance.
-  static const uptr kTrEventArrayMaxSize = FIRST_32_SECOND_64(1 << 22, 1 << 30);
+  // Windows doesn't do overcommit (committed virtual memory costs swap), so
+  // programs can't reliably map such large amounts of virtual memory.
+  // TODO(etienneb): Find a way to support coverage of larger executable
+static const uptr kTrEventArrayMaxSize =
+    (SANITIZER_WORDSIZE == 32 || SANITIZER_WINDOWS) ? 1 << 22 : 1 << 30;
   u32 *tr_event_array;
   uptr tr_event_array_size;
   u32 *tr_event_pointer;
@@ -415,8 +419,7 @@
   uptr idx = -guard_value - 1;
   if (idx >= atomic_load(&pc_array_index, memory_order_acquire))
     return;  // May happen after fork when pc_array_index becomes 0.
-  CHECK_LT(idx * sizeof(uptr),
-           atomic_load(&pc_array_size, memory_order_acquire));
+  CHECK_LT(idx, atomic_load(&pc_array_size, memory_order_acquire));
   uptr counter = atomic_fetch_add(&coverage_counter, 1, memory_order_relaxed);
   pc_array[idx] = BundlePcAndCounter(pc, counter);
 }
@@ -940,7 +943,8 @@
   atomic_uint32_t *atomic_guard = reinterpret_cast<atomic_uint32_t*>(guard);
   if (static_cast<s32>(
           __sanitizer::atomic_load(atomic_guard, memory_order_relaxed)) < 0)
-    __sanitizer_cov(guard);
+  coverage_data.Add(StackTrace::GetPreviousInstructionPc(GET_CALLER_PC()),
+                    guard);
 }
 SANITIZER_INTERFACE_ATTRIBUTE void
 __sanitizer_cov_indir_call16(uptr callee, uptr callee_cache16[]) {
@@ -954,6 +958,7 @@
 }
 SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump() {
   coverage_data.DumpAll();
+  __sanitizer_dump_trace_pc_guard_coverage();
 }
 SANITIZER_INTERFACE_ATTRIBUTE void
 __sanitizer_cov_module_init(s32 *guards, uptr npcs, u8 *counters,
@@ -1015,31 +1020,16 @@
 uptr __sanitizer_update_counter_bitset_and_clear_counters(u8 *bitset) {
   return coverage_data.Update8bitCounterBitsetAndClearCounters(bitset);
 }
+
 // Default empty implementations (weak). Users should redefine them.
-#if !SANITIZER_WINDOWS  // weak does not work on Windows.
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_cov_trace_cmp() {}
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_cov_trace_cmp1() {}
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_cov_trace_cmp2() {}
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_cov_trace_cmp4() {}
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_cov_trace_cmp8() {}
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_cov_trace_switch() {}
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_cov_trace_div4() {}
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_cov_trace_div8() {}
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_cov_trace_gep() {}
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_cov_trace_pc_guard() {}
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_cov_trace_pc_indir() {}
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void __sanitizer_cov_trace_pc_guard_init() {}
-#endif  // !SANITIZER_WINDOWS
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp1, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp2, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp4, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp8, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_switch, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_div4, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_div8, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_gep, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_indir, void) {}
 } // extern "C"
diff --git a/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cc b/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cc
new file mode 100644
index 0000000..73c3608
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cc
@@ -0,0 +1,173 @@
+//===-- sanitizer_coverage_libcdep_new.cc ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Sanitizer Coverage Controller for Trace PC Guard.
+
+#include "sancov_flags.h"
+#include "sanitizer_allocator_internal.h"
+#include "sanitizer_atomic.h"
+#include "sanitizer_common.h"
+#include "sanitizer_symbolizer.h"
+
+using namespace __sanitizer;
+
+using AddressRange = LoadedModule::AddressRange;
+
+namespace __sancov {
+namespace {
+
+static const u64 Magic64 = 0xC0BFFFFFFFFFFF64ULL;
+static const u64 Magic32 = 0xC0BFFFFFFFFFFF32ULL;
+static const u64 Magic = SANITIZER_WORDSIZE == 64 ? Magic64 : Magic32;
+
+static fd_t OpenFile(const char* path) {
+  error_t err;
+  fd_t fd = OpenFile(path, WrOnly, &err);
+  if (fd == kInvalidFd)
+    Report("SanitizerCoverage: failed to open %s for writing (reason: %d)\n",
+           path, err);
+  return fd;
+}
+
+static void GetCoverageFilename(char* path, const char* name,
+                                const char* extension) {
+  CHECK(name);
+  internal_snprintf(path, kMaxPathLength, "%s/%s.%zd.%s",
+                    common_flags()->coverage_dir, name, internal_getpid(),
+                    extension);
+}
+
+static void WriteModuleCoverage(char* file_path, const char* module_name,
+                                const uptr* pcs, uptr len) {
+  GetCoverageFilename(file_path, StripModuleName(module_name), "sancov");
+  fd_t fd = OpenFile(file_path);
+  WriteToFile(fd, &Magic, sizeof(Magic));
+  WriteToFile(fd, pcs, len * sizeof(*pcs));
+  CloseFile(fd);
+  Printf("SanitizerCoverage: %s %zd PCs written\n", file_path, len);
+}
+
+static void SanitizerDumpCoverage(const uptr* unsorted_pcs, uptr len) {
+  if (!len) return;
+
+  char* file_path = static_cast<char*>(InternalAlloc(kMaxPathLength));
+  char* module_name = static_cast<char*>(InternalAlloc(kMaxPathLength));
+  uptr* pcs = static_cast<uptr*>(InternalAlloc(len * sizeof(uptr)));
+
+  internal_memcpy(pcs, unsorted_pcs, len * sizeof(uptr));
+  SortArray(pcs, len);
+
+  bool module_found = false;
+  uptr last_base = 0;
+  uptr module_start_idx = 0;
+
+  for (uptr i = 0; i < len; ++i) {
+    const uptr pc = pcs[i];
+    if (!pc) continue;
+
+    if (!__sanitizer_get_module_and_offset_for_pc(pc, nullptr, 0, &pcs[i])) {
+      Printf("ERROR: bad pc %x\n", pc);
+      continue;
+    }
+    uptr module_base = pc - pcs[i];
+
+    if (module_base != last_base || !module_found) {
+      if (module_found) {
+        WriteModuleCoverage(file_path, module_name, &pcs[module_start_idx],
+                            i - module_start_idx);
+      }
+
+      last_base = module_base;
+      module_start_idx = i;
+      module_found = true;
+      __sanitizer_get_module_and_offset_for_pc(pc, module_name, kMaxPathLength,
+                                               &pcs[i]);
+    }
+  }
+
+  if (module_found) {
+    WriteModuleCoverage(file_path, module_name, &pcs[module_start_idx],
+                        len - module_start_idx);
+  }
+
+  InternalFree(file_path);
+  InternalFree(module_name);
+  InternalFree(pcs);
+
+  if (sancov_flags()->symbolize) {
+    Printf("TODO(aizatsky): call sancov to symbolize\n");
+  }
+}
+
+// Collects trace-pc guard coverage.
+// This class relies on zero-initialization.
+class TracePcGuardController {
+ public:
+  void Initialize() {
+    CHECK(!initialized);
+
+    initialized = true;
+    InitializeSancovFlags();
+
+    pc_vector.Initialize(0);
+  }
+
+  void InitTracePcGuard(u32* start, u32* end) {
+    if (!initialized) Initialize();
+    CHECK(!*start);
+    CHECK_NE(start, end);
+
+    u32 i = pc_vector.size();
+    for (u32* p = start; p < end; p++) *p = ++i;
+    pc_vector.resize(i);
+  }
+
+  void TracePcGuard(u32* guard, uptr pc) {
+    atomic_uint32_t* guard_ptr = reinterpret_cast<atomic_uint32_t*>(guard);
+    u32 idx = atomic_exchange(guard_ptr, 0, memory_order_relaxed);
+    if (!idx) return;
+    // we start indices from 1.
+    pc_vector[idx - 1] = pc;
+  }
+
+  void Dump() {
+    if (!initialized || !common_flags()->coverage) return;
+    __sanitizer_dump_coverage(pc_vector.data(), pc_vector.size());
+  }
+
+ private:
+  bool initialized;
+  InternalMmapVectorNoCtor<uptr> pc_vector;
+};
+
+static TracePcGuardController pc_guard_controller;
+
+}  // namespace
+}  // namespace __sancov
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_coverage(  // NOLINT
+    const uptr* pcs, uptr len) {
+  return __sancov::SanitizerDumpCoverage(pcs, len);
+}
+
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard, u32* guard) {
+  if (!*guard) return;
+  __sancov::pc_guard_controller.TracePcGuard(guard, GET_CALLER_PC() - 1);
+}
+
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard_init,
+                             u32* start, u32* end) {
+  if (start == end || *start) return;
+  __sancov::pc_guard_controller.InitTracePcGuard(start, end);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_trace_pc_guard_coverage() {
+  __sancov::pc_guard_controller.Dump();
+}
+}  // extern "C"
diff --git a/lib/sanitizer_common/sanitizer_coverage_win_dll_thunk.cc b/lib/sanitizer_common/sanitizer_coverage_win_dll_thunk.cc
new file mode 100644
index 0000000..d5e459f
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_coverage_win_dll_thunk.cc
@@ -0,0 +1,21 @@
+//===-- sanitizer_coverage_win_dll_thunk.cc -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a family of thunks that should be statically linked into
+// the DLLs that have instrumentation in order to delegate the calls to the
+// shared runtime that lives in the main binary.
+// See https://github.com/google/sanitizers/issues/209 for the details.
+//===----------------------------------------------------------------------===//
+#ifdef SANITIZER_DLL_THUNK
+#include "sanitizer_win_dll_thunk.h"
+// Sanitizer Coverage interface functions.
+#define INTERFACE_FUNCTION(Name) INTERCEPT_SANITIZER_FUNCTION(Name)
+#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
+#include "sanitizer_coverage_interface.inc"
+#endif // SANITIZER_DLL_THUNK
diff --git a/lib/sanitizer_common/sanitizer_coverage_win_dynamic_runtime_thunk.cc b/lib/sanitizer_common/sanitizer_coverage_win_dynamic_runtime_thunk.cc
new file mode 100644
index 0000000..988a206
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_coverage_win_dynamic_runtime_thunk.cc
@@ -0,0 +1,21 @@
+//===-- sanitizer_coverage_win_dynamic_runtime_thunk.cc -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things that need to be present in the application modules
+// to interact with Sanitizer Coverage, when it is included in a dll.
+//
+//===----------------------------------------------------------------------===//
+#ifdef SANITIZER_DYNAMIC_RUNTIME_THUNK
+#define SANITIZER_IMPORT_INTERFACE 1
+#include "sanitizer_win_defs.h"
+// Define weak alias for all weak functions imported from sanitizer coverage.
+#define INTERFACE_FUNCTION(Name)
+#define INTERFACE_WEAK_FUNCTION(Name) WIN_WEAK_IMPORT_DEF(Name)
+#include "sanitizer_coverage_interface.inc"
+#endif // SANITIZER_DYNAMIC_RUNTIME_THUNK
diff --git a/lib/sanitizer_common/sanitizer_coverage_win_sections.cc b/lib/sanitizer_common/sanitizer_coverage_win_sections.cc
new file mode 100644
index 0000000..4b0bbf1
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_coverage_win_sections.cc
@@ -0,0 +1,22 @@
+//===-- sanitizer_coverage_win_sections.cc --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines delimiters for Sanitizer Coverage's section.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_platform.h"
+#if SANITIZER_WINDOWS
+#include <stdint.h>
+#pragma section(".SCOV$A", read, write)  // NOLINT
+#pragma section(".SCOV$Z", read, write)  // NOLINT
+extern "C" {
+__declspec(allocate(".SCOV$A")) uint32_t __start___sancov_guards = 0;
+__declspec(allocate(".SCOV$Z")) uint32_t __stop___sancov_guards = 0;
+}
+#endif // SANITIZER_WINDOWS
diff --git a/lib/sanitizer_common/sanitizer_coverage_win_weak_interception.cc b/lib/sanitizer_common/sanitizer_coverage_win_weak_interception.cc
new file mode 100644
index 0000000..0926f46
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_coverage_win_weak_interception.cc
@@ -0,0 +1,24 @@
+//===-- sanitizer_coverage_win_weak_interception.cc -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This module should be included in Sanitizer Coverage when it implemented as a
+// shared library on Windows (dll), in order to delegate the calls of weak
+// functions to the implementation in the main executable when a strong
+// definition is provided.
+//===----------------------------------------------------------------------===//
+#ifdef SANITIZER_DYNAMIC
+#include "sanitizer_win_weak_interception.h"
+#include "sanitizer_interface_internal.h"
+#include "sancov_flags.h"
+// Check if strong definitions for weak functions are present in the main
+// executable. If that is the case, override dll functions to point to strong
+// implementations.
+#define INTERFACE_FUNCTION(Name)
+#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
+#include "sanitizer_coverage_interface.inc"
+#endif // SANITIZER_DYNAMIC
diff --git a/lib/sanitizer_common/sanitizer_flags.inc b/lib/sanitizer_common/sanitizer_flags.inc
index 43900f8..1306c72 100644
--- a/lib/sanitizer_common/sanitizer_flags.inc
+++ b/lib/sanitizer_common/sanitizer_flags.inc
@@ -62,7 +62,8 @@
 COMMON_FLAG(
     int, verbosity, 0,
     "Verbosity level (0 - silent, 1 - a bit of output, 2+ - more output).")
-COMMON_FLAG(bool, detect_leaks, true, "Enable memory leak detection.")
+COMMON_FLAG(bool, detect_leaks, SANITIZER_WORDSIZE == 64,
+            "Enable memory leak detection.")
 COMMON_FLAG(
     bool, leak_check_at_exit, true,
     "Invoke leak checking in an atexit handler. Has no effect if "
@@ -74,9 +75,14 @@
 COMMON_FLAG(bool, print_summary, true,
             "If false, disable printing error summaries in addition to error "
             "reports.")
+COMMON_FLAG(int, print_module_map, 0,
+            "OS X only. 0 = don't print, 1 = print only once before process "
+            "exits, 2 = print after each report.")
 COMMON_FLAG(bool, check_printf, true, "Check printf arguments.")
 COMMON_FLAG(bool, handle_segv, true,
-            "If set, registers the tool's custom SIGSEGV/SIGBUS handler.")
+            "If set, registers the tool's custom SIGSEGV handler.")
+COMMON_FLAG(bool, handle_sigbus, true,
+            "If set, registers the tool's custom SIGBUS handler.")
 COMMON_FLAG(bool, handle_abort, false,
             "If set, registers the tool's custom SIGABRT handler.")
 COMMON_FLAG(bool, handle_sigill, false,
diff --git a/lib/sanitizer_common/sanitizer_interface_internal.h b/lib/sanitizer_common/sanitizer_interface_internal.h
index 7f43c84..b28d8f0 100644
--- a/lib/sanitizer_common/sanitizer_interface_internal.h
+++ b/lib/sanitizer_common/sanitizer_interface_internal.h
@@ -46,8 +46,12 @@
   SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
   void __sanitizer_report_error_summary(const char *error_summary);
 
-  SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump();
   SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_init();
+  SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump();
+  SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_coverage(
+      const __sanitizer::uptr *pcs, const __sanitizer::uptr len);
+  SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_trace_pc_guard_coverage();
+
   SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov(__sanitizer::u32 *guard);
   SANITIZER_INTERFACE_ATTRIBUTE
   void __sanitizer_annotate_contiguous_container(const void *beg,
@@ -60,6 +64,37 @@
   SANITIZER_INTERFACE_ATTRIBUTE
   const void *__sanitizer_contiguous_container_find_bad_address(
       const void *beg, const void *mid, const void *end);
-  } // extern "C"
+
+  SANITIZER_INTERFACE_ATTRIBUTE
+  int __sanitizer_get_module_and_offset_for_pc(
+      __sanitizer::uptr pc, char *module_path,
+      __sanitizer::uptr module_path_len, __sanitizer::uptr *pc_offset);
+
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_cmp();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_cmp1();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_cmp2();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_cmp4();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_cmp8();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_switch();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_div4();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_div8();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_gep();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_pc_indir();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_pc_guard(__sanitizer::u32*);
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_pc_guard_init(__sanitizer::u32*,
+                                           __sanitizer::u32*);
+} // extern "C"
 
 #endif  // SANITIZER_INTERFACE_INTERNAL_H
diff --git a/lib/sanitizer_common/sanitizer_internal_defs.h b/lib/sanitizer_common/sanitizer_internal_defs.h
index 02a1e52..ea5022e 100644
--- a/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -21,8 +21,11 @@
 
 // Only use SANITIZER_*ATTRIBUTE* before the function return type!
 #if SANITIZER_WINDOWS
+#if SANITIZER_IMPORT_INTERFACE
+# define SANITIZER_INTERFACE_ATTRIBUTE __declspec(dllimport)
+#else
 # define SANITIZER_INTERFACE_ATTRIBUTE __declspec(dllexport)
-// FIXME find out what we need on Windows, if anything.
+#endif
 # define SANITIZER_WEAK_ATTRIBUTE
 #elif SANITIZER_GO
 # define SANITIZER_INTERFACE_ATTRIBUTE
@@ -32,11 +35,46 @@
 # define SANITIZER_WEAK_ATTRIBUTE  __attribute__((weak))
 #endif
 
-#if (SANITIZER_LINUX || SANITIZER_WINDOWS) && !SANITIZER_GO
+//--------------------------- WEAK FUNCTIONS ---------------------------------//
+// When working with weak functions, to simplify the code and make it more
+// portable, when possible define a default implementation using this macro:
+//
+// SANITIZER_INTERFACE_WEAK_DEF(<return_type>, <name>, <parameter list>)
+//
+// For example:
+//   SANITIZER_INTERFACE_WEAK_DEF(bool, compare, int a, int b) { return a > b; }
+//
+#if SANITIZER_WINDOWS
+#include "sanitizer_win_defs.h"
+# define SANITIZER_INTERFACE_WEAK_DEF(ReturnType, Name, ...)                   \
+  WIN_WEAK_EXPORT_DEF(ReturnType, Name, __VA_ARGS__)
+#else
+# define SANITIZER_INTERFACE_WEAK_DEF(ReturnType, Name, ...)                   \
+  extern "C" SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE            \
+  ReturnType Name(__VA_ARGS__)
+#endif
+
+// SANITIZER_SUPPORTS_WEAK_HOOKS means that we support real weak functions that
+// will evaluate to a null pointer when not defined.
+#if (SANITIZER_LINUX || SANITIZER_MAC) && !SANITIZER_GO
 # define SANITIZER_SUPPORTS_WEAK_HOOKS 1
 #else
 # define SANITIZER_SUPPORTS_WEAK_HOOKS 0
 #endif
+// For some weak hooks that will be called very often and we want to avoid the
+// overhead of executing the default implementation when it is not necessary,
+// we can use the flag SANITIZER_SUPPORTS_WEAK_HOOKS to only define the default
+// implementation for platforms that doesn't support weak symbols. For example:
+//
+//   #if !SANITIZER_SUPPORT_WEAK_HOOKS
+//     SANITIZER_INTERFACE_WEAK_DEF(bool, compare_hook, int a, int b) {
+//       return a > b;
+//     }
+//   #endif
+//
+// And then use it as: if (compare_hook) compare_hook(a, b);
+//----------------------------------------------------------------------------//
+
 
 // We can use .preinit_array section on Linux to call sanitizer initialization
 // functions very early in the process startup (unless PIC macro is defined).
@@ -289,8 +327,13 @@
 enum LinkerInitialized { LINKER_INITIALIZED = 0 };
 
 #if !defined(_MSC_VER) || defined(__clang__)
-# define GET_CALLER_PC() (uptr)__builtin_return_address(0)
-# define GET_CURRENT_FRAME() (uptr)__builtin_frame_address(0)
+#if SANITIZER_S390_31
+#define GET_CALLER_PC() \
+  (__sanitizer::uptr) __builtin_extract_return_addr(__builtin_return_address(0))
+#else
+#define GET_CALLER_PC() (__sanitizer::uptr) __builtin_return_address(0)
+#endif
+#define GET_CURRENT_FRAME() (__sanitizer::uptr) __builtin_frame_address(0)
 inline void Trap() {
   __builtin_trap();
 }
@@ -299,9 +342,10 @@
 extern "C" void* _AddressOfReturnAddress(void);
 # pragma intrinsic(_ReturnAddress)
 # pragma intrinsic(_AddressOfReturnAddress)
-# define GET_CALLER_PC() (uptr)_ReturnAddress()
+#define GET_CALLER_PC() (__sanitizer::uptr) _ReturnAddress()
 // CaptureStackBackTrace doesn't need to know BP on Windows.
-# define GET_CURRENT_FRAME() (((uptr)_AddressOfReturnAddress()) + sizeof(uptr))
+#define GET_CURRENT_FRAME() \
+  (((__sanitizer::uptr)_AddressOfReturnAddress()) + sizeof(__sanitizer::uptr))
 
 extern "C" void __ud2(void);
 # pragma intrinsic(__ud2)
@@ -319,11 +363,11 @@
   }
 
 // Forces the compiler to generate a frame pointer in the function.
-#define ENABLE_FRAME_POINTER                                       \
-  do {                                                             \
-    volatile uptr enable_fp;                                       \
-    enable_fp = GET_CURRENT_FRAME();                               \
-    (void)enable_fp;                                               \
+#define ENABLE_FRAME_POINTER              \
+  do {                                    \
+    volatile __sanitizer::uptr enable_fp; \
+    enable_fp = GET_CURRENT_FRAME();      \
+    (void)enable_fp;                      \
   } while (0)
 
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_libignore.cc b/lib/sanitizer_common/sanitizer_libignore.cc
index 33a1763..aa4fa88 100644
--- a/lib/sanitizer_common/sanitizer_libignore.cc
+++ b/lib/sanitizer_common/sanitizer_libignore.cc
@@ -78,10 +78,12 @@
                 lib->templ, mod.full_name());
         lib->loaded = true;
         lib->name = internal_strdup(mod.full_name());
-        const uptr idx = atomic_load(&loaded_count_, memory_order_relaxed);
-        code_ranges_[idx].begin = range.beg;
-        code_ranges_[idx].end = range.end;
-        atomic_store(&loaded_count_, idx + 1, memory_order_release);
+        const uptr idx =
+            atomic_load(&ignored_ranges_count_, memory_order_relaxed);
+        CHECK_LT(idx, kMaxLibs);
+        ignored_code_ranges_[idx].begin = range.beg;
+        ignored_code_ranges_[idx].end = range.end;
+        atomic_store(&ignored_ranges_count_, idx + 1, memory_order_release);
         break;
       }
     }
@@ -92,6 +94,29 @@
       Die();
     }
   }
+
+  // Track instrumented ranges.
+  if (track_instrumented_libs_) {
+    for (const auto &mod : modules) {
+      if (!mod.instrumented())
+        continue;
+      for (const auto &range : mod.ranges()) {
+        if (!range.executable)
+          continue;
+        if (IsPcInstrumented(range.beg) && IsPcInstrumented(range.end - 1))
+          continue;
+        VReport(1, "Adding instrumented range %p-%p from library '%s'\n",
+                range.beg, range.end, mod.full_name());
+        const uptr idx =
+            atomic_load(&instrumented_ranges_count_, memory_order_relaxed);
+        CHECK_LT(idx, kMaxLibs);
+        instrumented_code_ranges_[idx].begin = range.beg;
+        instrumented_code_ranges_[idx].end = range.end;
+        atomic_store(&instrumented_ranges_count_, idx + 1,
+                     memory_order_release);
+      }
+    }
+  }
 }
 
 void LibIgnore::OnLibraryUnloaded() {
diff --git a/lib/sanitizer_common/sanitizer_libignore.h b/lib/sanitizer_common/sanitizer_libignore.h
index cd56c36..17b0f56 100644
--- a/lib/sanitizer_common/sanitizer_libignore.h
+++ b/lib/sanitizer_common/sanitizer_libignore.h
@@ -30,6 +30,9 @@
 
   // Must be called during initialization.
   void AddIgnoredLibrary(const char *name_templ);
+  void IgnoreNoninstrumentedModules(bool enable) {
+    track_instrumented_libs_ = enable;
+  }
 
   // Must be called after a new dynamic library is loaded.
   void OnLibraryLoaded(const char *name);
@@ -37,8 +40,14 @@
   // Must be called after a dynamic library is unloaded.
   void OnLibraryUnloaded();
 
-  // Checks whether the provided PC belongs to one of the ignored libraries.
-  bool IsIgnored(uptr pc) const;
+  // Checks whether the provided PC belongs to one of the ignored libraries or
+  // the PC should be ignored because it belongs to an non-instrumented module
+  // (when ignore_noninstrumented_modules=1). Also returns true via
+  // "pc_in_ignored_lib" if the PC is in an ignored library, false otherwise.
+  bool IsIgnored(uptr pc, bool *pc_in_ignored_lib) const;
+
+  // Checks whether the provided PC belongs to an instrumented module.
+  bool IsPcInstrumented(uptr pc) const;
 
  private:
   struct Lib {
@@ -53,26 +62,48 @@
     uptr end;
   };
 
+  inline bool IsInRange(uptr pc, const LibCodeRange &range) const {
+    return (pc >= range.begin && pc < range.end);
+  }
+
   static const uptr kMaxLibs = 128;
 
   // Hot part:
-  atomic_uintptr_t loaded_count_;
-  LibCodeRange code_ranges_[kMaxLibs];
+  atomic_uintptr_t ignored_ranges_count_;
+  LibCodeRange ignored_code_ranges_[kMaxLibs];
+
+  atomic_uintptr_t instrumented_ranges_count_;
+  LibCodeRange instrumented_code_ranges_[kMaxLibs];
 
   // Cold part:
   BlockingMutex mutex_;
   uptr count_;
   Lib libs_[kMaxLibs];
+  bool track_instrumented_libs_;
 
   // Disallow copying of LibIgnore objects.
   LibIgnore(const LibIgnore&);  // not implemented
   void operator = (const LibIgnore&);  // not implemented
 };
 
-inline bool LibIgnore::IsIgnored(uptr pc) const {
-  const uptr n = atomic_load(&loaded_count_, memory_order_acquire);
+inline bool LibIgnore::IsIgnored(uptr pc, bool *pc_in_ignored_lib) const {
+  const uptr n = atomic_load(&ignored_ranges_count_, memory_order_acquire);
   for (uptr i = 0; i < n; i++) {
-    if (pc >= code_ranges_[i].begin && pc < code_ranges_[i].end)
+    if (IsInRange(pc, ignored_code_ranges_[i])) {
+      *pc_in_ignored_lib = true;
+      return true;
+    }
+  }
+  *pc_in_ignored_lib = false;
+  if (track_instrumented_libs_ && !IsPcInstrumented(pc))
+    return true;
+  return false;
+}
+
+inline bool LibIgnore::IsPcInstrumented(uptr pc) const {
+  const uptr n = atomic_load(&instrumented_ranges_count_, memory_order_acquire);
+  for (uptr i = 0; i < n; i++) {
+    if (IsInRange(pc, instrumented_code_ranges_[i]))
       return true;
   }
   return false;
diff --git a/lib/sanitizer_common/sanitizer_linux.cc b/lib/sanitizer_common/sanitizer_linux.cc
index 76cdc72..46dd085 100644
--- a/lib/sanitizer_common/sanitizer_linux.cc
+++ b/lib/sanitizer_common/sanitizer_linux.cc
@@ -1175,6 +1175,71 @@
                "r0", "r29", "r27", "r28");
   return res;
 }
+#elif defined(__i386__) && SANITIZER_LINUX
+uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
+                    int *parent_tidptr, void *newtls, int *child_tidptr) {
+  int res;
+  if (!fn || !child_stack)
+    return -EINVAL;
+  CHECK_EQ(0, (uptr)child_stack % 16);
+  child_stack = (char *)child_stack - 7 * sizeof(unsigned int);
+  ((unsigned int *)child_stack)[0] = (uptr)flags;
+  ((unsigned int *)child_stack)[1] = (uptr)0;
+  ((unsigned int *)child_stack)[2] = (uptr)fn;
+  ((unsigned int *)child_stack)[3] = (uptr)arg;
+  __asm__ __volatile__(
+                       /* %eax = syscall(%eax = SYSCALL(clone),
+                        *                %ebx = flags,
+                        *                %ecx = child_stack,
+                        *                %edx = parent_tidptr,
+                        *                %esi  = new_tls,
+                        *                %edi = child_tidptr)
+                        */
+
+                        /* Obtain flags */
+                        "movl    (%%ecx), %%ebx\n"
+                        /* Do the system call */
+                        "pushl   %%ebx\n"
+                        "pushl   %%esi\n"
+                        "pushl   %%edi\n"
+                        /* Remember the flag value.  */
+                        "movl    %%ebx, (%%ecx)\n"
+                        "int     $0x80\n"
+                        "popl    %%edi\n"
+                        "popl    %%esi\n"
+                        "popl    %%ebx\n"
+
+                        /* if (%eax != 0)
+                         *   return;
+                         */
+
+                        "test    %%eax,%%eax\n"
+                        "jnz    1f\n"
+
+                        /* terminate the stack frame */
+                        "xorl   %%ebp,%%ebp\n"
+                        /* Call FN. */
+                        "call    *%%ebx\n"
+#ifdef PIC
+                        "call    here\n"
+                        "here:\n"
+                        "popl    %%ebx\n"
+                        "addl    $_GLOBAL_OFFSET_TABLE_+[.-here], %%ebx\n"
+#endif
+                        /* Call exit */
+                        "movl    %%eax, %%ebx\n"
+                        "movl    %2, %%eax\n"
+                        "int     $0x80\n"
+                        "1:\n"
+                       : "=a" (res)
+                       : "a"(SYSCALL(clone)), "i"(SYSCALL(exit)),
+                         "c"(child_stack),
+                         "d"(parent_tidptr),
+                         "S"(newtls),
+                         "D"(child_tidptr)
+                       : "memory");
+  return res;
+}
 #endif  // defined(__x86_64__) && SANITIZER_LINUX
 
 #if SANITIZER_ANDROID
@@ -1227,7 +1292,9 @@
     return true;
   if (common_flags()->handle_sigfpe && signum == SIGFPE)
     return true;
-  return (signum == SIGSEGV || signum == SIGBUS) && common_flags()->handle_segv;
+  if (common_flags()->handle_segv && signum == SIGSEGV)
+    return true;
+  return common_flags()->handle_sigbus && signum == SIGBUS;
 }
 
 #if !SANITIZER_GO
@@ -1393,6 +1460,23 @@
   // No need to re-exec on Linux.
 }
 
+void PrintModuleMap() { }
+
+void CheckNoDeepBind(const char *filename, int flag) {
+#if !SANITIZER_ANDROID
+  if (flag & RTLD_DEEPBIND) {
+    Report(
+        "You are trying to dlopen a %s shared library with RTLD_DEEPBIND flag"
+        " which is incompatibe with sanitizer runtime "
+        "(see https://github.com/google/sanitizers/issues/611 for details"
+        "). If you want to run %s library under sanitizers please remove "
+        "RTLD_DEEPBIND from dlopen flags.\n",
+        filename, filename);
+    Die();
+  }
+#endif
+}
+
 uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding) {
   UNREACHABLE("FindAvailableMemoryRange is not available");
   return 0;
diff --git a/lib/sanitizer_common/sanitizer_linux.h b/lib/sanitizer_common/sanitizer_linux.h
index d4d0f47..bba8624 100644
--- a/lib/sanitizer_common/sanitizer_linux.h
+++ b/lib/sanitizer_common/sanitizer_linux.h
@@ -48,7 +48,7 @@
 #endif
 void internal_sigdelset(__sanitizer_sigset_t *set, int signum);
 #if defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) \
-  || defined(__powerpc64__) || defined(__s390__)
+  || defined(__powerpc64__) || defined(__s390__) || defined(__i386__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr);
 #endif
diff --git a/lib/sanitizer_common/sanitizer_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_linux_libcdep.cc
index eb14c97..a5e2840 100644
--- a/lib/sanitizer_common/sanitizer_linux_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_linux_libcdep.cc
@@ -26,10 +26,7 @@
 #include "sanitizer_procmaps.h"
 #include "sanitizer_stacktrace.h"
 
-#if SANITIZER_ANDROID || SANITIZER_FREEBSD
 #include <dlfcn.h>  // for dlsym()
-#endif
-
 #include <link.h>
 #include <pthread.h>
 #include <signal.h>
@@ -273,9 +270,7 @@
 # endif
   const uptr kTlsAlign = 16;
   const uptr kTlsPreTcbSize =
-    (ThreadDescriptorSize() + kTcbHead + kTlsAlign - 1) & ~(kTlsAlign - 1);
-  InitTlsSize();
-  g_tls_size = (g_tls_size + kTlsPreTcbSize + kTlsAlign -1) & ~(kTlsAlign - 1);
+      RoundUpTo(ThreadDescriptorSize() + kTcbHead, kTlsAlign);
   return kTlsPreTcbSize;
 }
 #endif
@@ -382,6 +377,8 @@
   uptr addr, size;
   GetTls(&addr, &size);
   return size;
+#elif defined(__mips__) || defined(__powerpc64__)
+  return RoundUpTo(g_tls_size + TlsPreTcbSize(), 16);
 #else
   return g_tls_size;
 #endif
diff --git a/lib/sanitizer_common/sanitizer_linux_s390.cc b/lib/sanitizer_common/sanitizer_linux_s390.cc
index 053fd17..c2b03b2 100644
--- a/lib/sanitizer_common/sanitizer_linux_s390.cc
+++ b/lib/sanitizer_common/sanitizer_linux_s390.cc
@@ -136,6 +136,18 @@
   if (ptr[0] == '.')
     patch = internal_simple_strtoll(ptr+1, &ptr, 10);
   if (major < 3) {
+    if (major == 2 && minor == 6 && patch == 32 && ptr[0] == '-' &&
+        internal_strstr(ptr, ".el6")) {
+      // Check RHEL6
+      int r1 = internal_simple_strtoll(ptr+1, &ptr, 10);
+      if (r1 >= 657) // 2.6.32-657.el6 or later
+        return true;
+      if (r1 == 642 && ptr[0] == '.') {
+        int r2 = internal_simple_strtoll(ptr+1, &ptr, 10);
+        if (r2 >= 9) // 2.6.32-642.9.1.el6 or later
+          return true;
+      }
+    }
     // <3.0 is bad.
     return false;
   } else if (major == 3) {
@@ -145,6 +157,18 @@
     // 3.12.58+ is OK.
     if (minor == 12 && patch >= 58)
       return true;
+    if (minor == 10 && patch == 0 && ptr[0] == '-' &&
+        internal_strstr(ptr, ".el7")) {
+      // Check RHEL7
+      int r1 = internal_simple_strtoll(ptr+1, &ptr, 10);
+      if (r1 >= 426) // 3.10.0-426.el7 or later
+        return true;
+      if (r1 == 327 && ptr[0] == '.') {
+        int r2 = internal_simple_strtoll(ptr+1, &ptr, 10);
+        if (r2 >= 27) // 3.10.0-327.27.1.el7 or later
+          return true;
+      }
+    }
     // Otherwise, bad.
     return false;
   } else if (major == 4) {
diff --git a/lib/sanitizer_common/sanitizer_list.h b/lib/sanitizer_common/sanitizer_list.h
index c78cb4c..598ce51 100644
--- a/lib/sanitizer_common/sanitizer_list.h
+++ b/lib/sanitizer_common/sanitizer_list.h
@@ -70,6 +70,17 @@
     size_--;
   }
 
+  void extract(Item *prev, Item *x) {
+    CHECK(!empty());
+    CHECK_NE(prev, nullptr);
+    CHECK_NE(x, nullptr);
+    CHECK_EQ(prev->next, x);
+    prev->next = x->next;
+    if (last_ == x)
+      last_ = prev;
+    size_--;
+  }
+
   Item *front() { return first_; }
   const Item *front() const { return first_; }
   Item *back() { return last_; }
diff --git a/lib/sanitizer_common/sanitizer_mac.cc b/lib/sanitizer_common/sanitizer_mac.cc
index cf9465b..7e85505 100644
--- a/lib/sanitizer_common/sanitizer_mac.cc
+++ b/lib/sanitizer_common/sanitizer_mac.cc
@@ -93,20 +93,22 @@
 
 #include "sanitizer_syscall_generic.inc"
 
-// Direct syscalls, don't call libmalloc hooks.
+// Direct syscalls, don't call libmalloc hooks (but not available on 10.6).
 extern "C" void *__mmap(void *addr, size_t len, int prot, int flags, int fildes,
-                        off_t off);
-extern "C" int __munmap(void *, size_t);
+                        off_t off) SANITIZER_WEAK_ATTRIBUTE;
+extern "C" int __munmap(void *, size_t) SANITIZER_WEAK_ATTRIBUTE;
 
 // ---------------------- sanitizer_libc.h
 uptr internal_mmap(void *addr, size_t length, int prot, int flags,
                    int fd, u64 offset) {
   if (fd == -1) fd = VM_MAKE_TAG(VM_MEMORY_ANALYSIS_TOOL);
-  return (uptr)__mmap(addr, length, prot, flags, fd, offset);
+  if (__mmap) return (uptr)__mmap(addr, length, prot, flags, fd, offset);
+  return (uptr)mmap(addr, length, prot, flags, fd, offset);
 }
 
 uptr internal_munmap(void *addr, uptr length) {
-  return __munmap(addr, length);
+  if (__munmap) return __munmap(addr, length);
+  return munmap(addr, length);
 }
 
 int internal_mprotect(void *addr, uptr length, int prot) {
@@ -192,17 +194,19 @@
   return sigprocmask(how, set, oldset);
 }
 
-// Doesn't call pthread_atfork() handlers.
-extern "C" pid_t __fork(void);
+// Doesn't call pthread_atfork() handlers (but not available on 10.6).
+extern "C" pid_t __fork(void) SANITIZER_WEAK_ATTRIBUTE;
 
 int internal_fork() {
-  return __fork();
+  if (__fork)
+    return __fork();
+  return fork();
 }
 
 int internal_forkpty(int *amaster) {
   int master, slave;
   if (openpty(&master, &slave, nullptr, nullptr, nullptr) == -1) return -1;
-  int pid = __fork();
+  int pid = internal_fork();
   if (pid == -1) {
     close(master);
     close(slave);
@@ -344,20 +348,16 @@
 void BlockingMutex::Lock() {
   CHECK(sizeof(OSSpinLock) <= sizeof(opaque_storage_));
   CHECK_EQ(OS_SPINLOCK_INIT, 0);
-  CHECK_NE(owner_, (uptr)pthread_self());
+  CHECK_EQ(owner_, 0);
   OSSpinLockLock((OSSpinLock*)&opaque_storage_);
-  CHECK(!owner_);
-  owner_ = (uptr)pthread_self();
 }
 
 void BlockingMutex::Unlock() {
-  CHECK(owner_ == (uptr)pthread_self());
-  owner_ = 0;
   OSSpinLockUnlock((OSSpinLock*)&opaque_storage_);
 }
 
 void BlockingMutex::CheckLocked() {
-  CHECK_EQ((uptr)pthread_self(), owner_);
+  CHECK_NE(*(OSSpinLock*)&opaque_storage_, 0);
 }
 
 u64 NanoTime() {
@@ -402,7 +402,11 @@
     return true;
   if (common_flags()->handle_sigill && signum == SIGILL)
     return true;
-  return (signum == SIGSEGV || signum == SIGBUS) && common_flags()->handle_segv;
+  if (common_flags()->handle_sigfpe && signum == SIGFPE)
+    return true;
+  if (common_flags()->handle_segv && signum == SIGSEGV)
+    return true;
+  return common_flags()->handle_sigbus && signum == SIGBUS;
 }
 
 MacosVersion cached_macos_version = MACOS_VERSION_UNINITIALIZED;
@@ -448,6 +452,15 @@
   return result;
 }
 
+bool PlatformHasDifferentMemcpyAndMemmove() {
+  // On OS X 10.7 memcpy() and memmove() are both resolved
+  // into memmove$VARIANT$sse42.
+  // See also https://github.com/google/sanitizers/issues/34.
+  // TODO(glider): need to check dynamically that memcpy() and memmove() are
+  // actually the same function.
+  return GetMacosVersion() == MACOS_VERSION_SNOW_LEOPARD;
+}
+
 uptr GetRSS() {
   struct task_basic_info info;
   unsigned count = TASK_BASIC_INFO_COUNT;
@@ -845,6 +858,40 @@
 # undef DUMPREG
 }
 
+static inline bool CompareBaseAddress(const LoadedModule &a,
+                                      const LoadedModule &b) {
+  return a.base_address() < b.base_address();
+}
+
+void FormatUUID(char *out, uptr size, const u8 *uuid) {
+  internal_snprintf(out, size,
+                    "<%02X%02X%02X%02X-%02X%02X-%02X%02X-%02X%02X-"
+                    "%02X%02X%02X%02X%02X%02X>",
+                    uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5],
+                    uuid[6], uuid[7], uuid[8], uuid[9], uuid[10], uuid[11],
+                    uuid[12], uuid[13], uuid[14], uuid[15]);
+}
+
+void PrintModuleMap() {
+  Printf("Process module map:\n");
+  MemoryMappingLayout memory_mapping(false);
+  InternalMmapVector<LoadedModule> modules(/*initial_capacity*/ 128);
+  memory_mapping.DumpListOfModules(&modules);
+  InternalSort(&modules, modules.size(), CompareBaseAddress);
+  for (uptr i = 0; i < modules.size(); ++i) {
+    char uuid_str[128];
+    FormatUUID(uuid_str, sizeof(uuid_str), modules[i].uuid());
+    Printf("0x%zx-0x%zx %s (%s) %s\n", modules[i].base_address(),
+           modules[i].max_executable_address(), modules[i].full_name(),
+           ModuleArchToString(modules[i].arch()), uuid_str);
+  }
+  Printf("End of module map.\n");
+}
+
+void CheckNoDeepBind(const char *filename, int flag) {
+  // Do nothing.
+}
+
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_MAC
diff --git a/lib/sanitizer_common/sanitizer_malloc_mac.inc b/lib/sanitizer_common/sanitizer_malloc_mac.inc
index 149857c..6fbee07 100644
--- a/lib/sanitizer_common/sanitizer_malloc_mac.inc
+++ b/lib/sanitizer_common/sanitizer_malloc_mac.inc
@@ -46,9 +46,45 @@
     // This matches the behavior of malloc_create_zone() on OSX 10.7 and higher.
     mprotect(new_zone, allocated_size, PROT_READ);
   }
+  // We're explicitly *NOT* registering the zone.
   return new_zone;
 }
 
+INTERCEPTOR(void, malloc_destroy_zone, malloc_zone_t *zone) {
+  COMMON_MALLOC_ENTER();
+  // We don't need to do anything here.  We're not registering new zones, so we
+  // don't to unregister.  Just un-mprotect and free() the zone.
+  if (GetMacosVersion() >= MACOS_VERSION_LION) {
+    uptr page_size = GetPageSizeCached();
+    uptr allocated_size = RoundUpTo(sizeof(sanitizer_zone), page_size);
+    mprotect(zone, allocated_size, PROT_READ | PROT_WRITE);
+  }
+  COMMON_MALLOC_FREE(zone);
+}
+
+extern unsigned malloc_num_zones;
+extern malloc_zone_t **malloc_zones;
+
+// We need to make sure that sanitizer_zone is registered as malloc_zones[0]. If
+// libmalloc tries to set up a different zone as malloc_zones[0], it will call
+// mprotect(malloc_zones, ..., PROT_READ).  This interceptor will catch that and
+// make sure we are still the first (default) zone.
+INTERCEPTOR(int, mprotect, void *addr, size_t len, int prot) {
+  if (addr == malloc_zones && prot == PROT_READ) {
+    if (malloc_num_zones > 1 && malloc_zones[0] != &sanitizer_zone) {
+      for (unsigned i = 1; i < malloc_num_zones; i++) {
+        if (malloc_zones[i] == &sanitizer_zone) {
+          // Swap malloc_zones[0] and malloc_zones[i].
+          malloc_zones[i] = malloc_zones[0];
+          malloc_zones[0] = &sanitizer_zone;
+          break;
+        }
+      }
+    }
+  }
+  return REAL(mprotect)(addr, len, prot);
+}
+
 INTERCEPTOR(malloc_zone_t *, malloc_default_zone, void) {
   COMMON_MALLOC_ENTER();
   return &sanitizer_zone;
diff --git a/lib/sanitizer_common/sanitizer_mutex.h b/lib/sanitizer_common/sanitizer_mutex.h
index d06fc45..1759bf1 100644
--- a/lib/sanitizer_common/sanitizer_mutex.h
+++ b/lib/sanitizer_common/sanitizer_mutex.h
@@ -83,6 +83,14 @@
   BlockingMutex();
   void Lock();
   void Unlock();
+
+  // This function does not guarantee an explicit check that the calling thread
+  // is the thread which owns the mutex. This behavior, while more strictly
+  // correct, causes problems in cases like StopTheWorld, where a parent thread
+  // owns the mutex but a child checks that it is locked. Rather than
+  // maintaining complex state to work around those situations, the check only
+  // checks that the mutex is owned, and assumes callers to be generally
+  // well-behaved.
   void CheckLocked();
  private:
   uptr opaque_storage_[10];
diff --git a/lib/sanitizer_common/sanitizer_platform_interceptors.h b/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 8904a12..480e63a 100644
--- a/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -17,9 +17,11 @@
 #include "sanitizer_internal_defs.h"
 
 #if !SANITIZER_WINDOWS
+# define SI_WINDOWS 0
 # define SI_NOT_WINDOWS 1
 # include "sanitizer_platform_limits_posix.h"
 #else
+# define SI_WINDOWS 1
 # define SI_NOT_WINDOWS 0
 #endif
 
@@ -83,8 +85,16 @@
 #define SANITIZER_INTERCEPT_MEMMOVE 1
 #define SANITIZER_INTERCEPT_MEMCPY 1
 #define SANITIZER_INTERCEPT_MEMCMP 1
+#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \
+    __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 1070
+# define SI_MAC_DEPLOYMENT_BELOW_10_7 1
+#else
+# define SI_MAC_DEPLOYMENT_BELOW_10_7 0
+#endif
+// memmem on Darwin doesn't exist on 10.6
 // FIXME: enable memmem on Windows.
-#define SANITIZER_INTERCEPT_MEMMEM SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_MEMMEM \
+  SI_NOT_WINDOWS && !SI_MAC_DEPLOYMENT_BELOW_10_7
 #define SANITIZER_INTERCEPT_MEMCHR 1
 #define SANITIZER_INTERCEPT_MEMRCHR SI_FREEBSD || SI_LINUX
 
@@ -302,7 +312,7 @@
 #define SANITIZER_INTERCEPT_CTERMID SI_LINUX || SI_MAC || SI_FREEBSD
 #define SANITIZER_INTERCEPT_CTERMID_R SI_MAC || SI_FREEBSD
 
-#define SANITIZER_INTERCEPTOR_HOOKS SI_LINUX
+#define SANITIZER_INTERCEPTOR_HOOKS SI_LINUX || SI_MAC || SI_WINDOWS
 #define SANITIZER_INTERCEPT_RECV_RECVFROM SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_SEND_SENDTO SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_EVENTFD_READ_WRITE SI_LINUX
@@ -312,4 +322,15 @@
 #define SANITIZER_INTERCEPT___XSTAT64 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT___LXSTAT SANITIZER_INTERCEPT___XSTAT
 #define SANITIZER_INTERCEPT___LXSTAT64 SI_LINUX_NOT_ANDROID
+
+#define SANITIZER_INTERCEPT_UTMP SI_NOT_WINDOWS && !SI_MAC && !SI_FREEBSD
+#define SANITIZER_INTERCEPT_UTMPX SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD
+
+#define SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO (!SI_FREEBSD && !SI_MAC)
+#define SANITIZER_INTERCEPT_MEMALIGN (!SI_FREEBSD && !SI_MAC)
+#define SANITIZER_INTERCEPT_PVALLOC (!SI_FREEBSD && !SI_MAC)
+#define SANITIZER_INTERCEPT_CFREE (!SI_FREEBSD && !SI_MAC)
+#define SANITIZER_INTERCEPT_ALIGNED_ALLOC (!SI_MAC)
+#define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE (!SI_MAC)
+
 #endif  // #ifndef SANITIZER_PLATFORM_INTERCEPTORS_H
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
index 10c6321..683f019 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
+++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
@@ -23,11 +23,6 @@
 #ifdef _FILE_OFFSET_BITS
 #undef _FILE_OFFSET_BITS
 #endif
-#if SANITIZER_FREEBSD
-#define _WANT_RTENTRY
-#include <sys/param.h>
-#include <sys/socketvar.h>
-#endif
 #include <arpa/inet.h>
 #include <dirent.h>
 #include <errno.h>
@@ -51,6 +46,9 @@
 #include <termios.h>
 #include <time.h>
 #include <wchar.h>
+#if !SANITIZER_MAC && !SANITIZER_FREEBSD
+#include <utmp.h>
+#endif
 
 #if !SANITIZER_IOS
 #include <net/route.h>
@@ -59,6 +57,7 @@
 #if !SANITIZER_ANDROID
 #include <sys/mount.h>
 #include <sys/timeb.h>
+#include <utmpx.h>
 #endif
 
 #if SANITIZER_LINUX
@@ -284,6 +283,13 @@
   int shmctl_shm_stat = (int)SHM_STAT;
 #endif
 
+#if !SANITIZER_MAC && !SANITIZER_FREEBSD
+  unsigned struct_utmp_sz = sizeof(struct utmp);
+#endif
+#if !SANITIZER_ANDROID
+  unsigned struct_utmpx_sz = sizeof(struct utmpx);
+#endif
+
   int map_fixed = MAP_FIXED;
 
   int af_inet = (int)AF_INET;
@@ -422,6 +428,7 @@
   unsigned struct_input_absinfo_sz = sizeof(struct input_absinfo);
   unsigned struct_input_id_sz = sizeof(struct input_id);
   unsigned struct_mtpos_sz = sizeof(struct mtpos);
+  unsigned struct_rtentry_sz = sizeof(struct rtentry);
   unsigned struct_termio_sz = sizeof(struct termio);
   unsigned struct_vt_consize_sz = sizeof(struct vt_consize);
   unsigned struct_vt_sizes_sz = sizeof(struct vt_sizes);
@@ -441,7 +448,6 @@
   unsigned struct_midi_info_sz = sizeof(struct midi_info);
   unsigned struct_mtget_sz = sizeof(struct mtget);
   unsigned struct_mtop_sz = sizeof(struct mtop);
-  unsigned struct_rtentry_sz = sizeof(struct rtentry);
   unsigned struct_sbi_instrument_sz = sizeof(struct sbi_instrument);
   unsigned struct_seq_event_rec_sz = sizeof(struct seq_event_rec);
   unsigned struct_synth_info_sz = sizeof(struct synth_info);
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/lib/sanitizer_common/sanitizer_platform_limits_posix.h
index 555dcce..c2d9f2c 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -635,9 +635,12 @@
 #ifndef __mips__
 #if defined(__sparc__)
 #if __GLIBC_PREREQ (2, 20)
-    // On sparc glibc 2.19 and earlier sa_flags was unsigned long, and
-    // __glibc_reserved0 didn't exist.
+    // On sparc glibc 2.19 and earlier sa_flags was unsigned long.
+#if defined(__arch64__)
+    // To maintain ABI compatibility on sparc64 when switching to an int,
+    // __glibc_reserved0 was added.
     int __glibc_reserved0;
+#endif
     int sa_flags;
 #else
     unsigned long sa_flags;
@@ -862,6 +865,13 @@
   extern int shmctl_shm_stat;
 #endif
 
+#if !SANITIZER_MAC && !SANITIZER_FREEBSD
+  extern unsigned struct_utmp_sz;
+#endif
+#if !SANITIZER_ANDROID
+  extern unsigned struct_utmpx_sz;
+#endif
+
   extern int map_fixed;
 
   // ioctl arguments
diff --git a/lib/sanitizer_common/sanitizer_posix.cc b/lib/sanitizer_common/sanitizer_posix.cc
index c70d5a4..9916f4d 100644
--- a/lib/sanitizer_common/sanitizer_posix.cc
+++ b/lib/sanitizer_common/sanitizer_posix.cc
@@ -358,6 +358,22 @@
   return SignalContext(context, addr, pc, sp, bp, is_memory_access, write_flag);
 }
 
+const char *DescribeSignalOrException(int signo) {
+  switch (signo) {
+    case SIGFPE:
+      return "FPE";
+    case SIGILL:
+      return "ILL";
+    case SIGABRT:
+      return "ABRT";
+    case SIGSEGV:
+      return "SEGV";
+    case SIGBUS:
+      return "BUS";
+  }
+  return "UNKNOWN SIGNAL";
+}
+
 } // namespace __sanitizer
 
 #endif // SANITIZER_POSIX
diff --git a/lib/sanitizer_common/sanitizer_printf.cc b/lib/sanitizer_common/sanitizer_printf.cc
index f394e75..99b7ff1 100644
--- a/lib/sanitizer_common/sanitizer_printf.cc
+++ b/lib/sanitizer_common/sanitizer_printf.cc
@@ -43,7 +43,7 @@
 // on the value of |pad_with_zero|.
 static int AppendNumber(char **buff, const char *buff_end, u64 absolute_value,
                         u8 base, u8 minimal_num_length, bool pad_with_zero,
-                        bool negative) {
+                        bool negative, bool uppercase) {
   uptr const kMaxLen = 30;
   RAW_CHECK(base == 10 || base == 16);
   RAW_CHECK(base == 10 || !negative);
@@ -76,23 +76,25 @@
   if (negative && !pad_with_zero) result += AppendChar(buff, buff_end, '-');
   for (; pos >= 0; pos--) {
     char digit = static_cast<char>(num_buffer[pos]);
-    result += AppendChar(buff, buff_end, (digit < 10) ? '0' + digit
-                                                      : 'a' + digit - 10);
+    digit = (digit < 10) ? '0' + digit : (uppercase ? 'A' : 'a') + digit - 10;
+    result += AppendChar(buff, buff_end, digit);
   }
   return result;
 }
 
 static int AppendUnsigned(char **buff, const char *buff_end, u64 num, u8 base,
-                          u8 minimal_num_length, bool pad_with_zero) {
+                          u8 minimal_num_length, bool pad_with_zero,
+                          bool uppercase) {
   return AppendNumber(buff, buff_end, num, base, minimal_num_length,
-                      pad_with_zero, false /* negative */);
+                      pad_with_zero, false /* negative */, uppercase);
 }
 
 static int AppendSignedDecimal(char **buff, const char *buff_end, s64 num,
                                u8 minimal_num_length, bool pad_with_zero) {
   bool negative = (num < 0);
   return AppendNumber(buff, buff_end, (u64)(negative ? -num : num), 10,
-                      minimal_num_length, pad_with_zero, negative);
+                      minimal_num_length, pad_with_zero, negative,
+                      false /* uppercase */);
 }
 
 static int AppendString(char **buff, const char *buff_end, int precision,
@@ -112,14 +114,16 @@
   int result = 0;
   result += AppendString(buff, buff_end, -1, "0x");
   result += AppendUnsigned(buff, buff_end, ptr_value, 16,
-                           SANITIZER_POINTER_FORMAT_LENGTH, true);
+                           SANITIZER_POINTER_FORMAT_LENGTH,
+                           true /* pad_with_zero */, false /* uppercase */);
   return result;
 }
 
 int VSNPrintf(char *buff, int buff_length,
               const char *format, va_list args) {
   static const char *kPrintfFormatsHelp =
-    "Supported Printf formats: %([0-9]*)?(z|ll)?{d,u,x}; %p; %(\\.\\*)?s; %c\n";
+      "Supported Printf formats: %([0-9]*)?(z|ll)?{d,u,x,X}; %p; %(\\.\\*)?s; "
+      "%c\n";
   RAW_CHECK(format);
   RAW_CHECK(buff_length > 0);
   const char *buff_end = &buff[buff_length - 1];
@@ -164,12 +168,14 @@
         break;
       }
       case 'u':
-      case 'x': {
+      case 'x':
+      case 'X': {
         uval = have_ll ? va_arg(args, u64)
              : have_z ? va_arg(args, uptr)
              : va_arg(args, unsigned);
-        result += AppendUnsigned(&buff, buff_end, uval,
-                                 (*cur == 'u') ? 10 : 16, width, pad_with_zero);
+        bool uppercase = (*cur == 'X');
+        result += AppendUnsigned(&buff, buff_end, uval, (*cur == 'u') ? 10 : 16,
+                                 width, pad_with_zero, uppercase);
         break;
       }
       case 'p': {
@@ -208,15 +214,11 @@
 }
 
 // Can be overriden in frontend.
-#if SANITIZER_SUPPORTS_WEAK_HOOKS
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-void OnPrint(const char *str) {
-  (void)str;
-}
-#elif SANITIZER_GO && defined(TSAN_EXTERNAL_HOOKS)
-void OnPrint(const char *str);
+#if SANITIZER_GO && defined(TSAN_EXTERNAL_HOOKS)
+// Implementation must be defined in frontend.
+extern "C" void OnPrint(const char *str);
 #else
-void OnPrint(const char *str) {
+SANITIZER_INTERFACE_WEAK_DEF(void, OnPrint, const char *str) {
   (void)str;
 }
 #endif
diff --git a/lib/sanitizer_common/sanitizer_procmaps.h b/lib/sanitizer_common/sanitizer_procmaps.h
index 5c26fb7..9dbb5ef 100644
--- a/lib/sanitizer_common/sanitizer_procmaps.h
+++ b/lib/sanitizer_common/sanitizer_procmaps.h
@@ -77,6 +77,7 @@
   u8 current_uuid_[kModuleUUIDSize];
   int current_load_cmd_count_;
   char *current_load_cmd_addr_;
+  bool current_instrumented_;
 # endif
 };
 
diff --git a/lib/sanitizer_common/sanitizer_procmaps_mac.cc b/lib/sanitizer_common/sanitizer_procmaps_mac.cc
index 2b4ad5c..2831f28 100644
--- a/lib/sanitizer_common/sanitizer_procmaps_mac.cc
+++ b/lib/sanitizer_common/sanitizer_procmaps_mac.cc
@@ -19,6 +19,20 @@
 #include <mach-o/dyld.h>
 #include <mach-o/loader.h>
 
+// These are not available in older macOS SDKs.
+#ifndef CPU_SUBTYPE_X86_64_H
+#define CPU_SUBTYPE_X86_64_H  ((cpu_subtype_t)8)   /* Haswell */
+#endif
+#ifndef CPU_SUBTYPE_ARM_V7S
+#define CPU_SUBTYPE_ARM_V7S   ((cpu_subtype_t)11)  /* Swift */
+#endif
+#ifndef CPU_SUBTYPE_ARM_V7K
+#define CPU_SUBTYPE_ARM_V7K   ((cpu_subtype_t)12)
+#endif
+#ifndef CPU_TYPE_ARM64
+#define CPU_TYPE_ARM64        (CPU_TYPE_ARM | CPU_ARCH_ABI64)
+#endif
+
 namespace __sanitizer {
 
 MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
@@ -136,22 +150,36 @@
   }
 }
 
-static void FindUUID(const load_command *first_lc, u8 *uuid_output) {
-  const load_command *current_lc = first_lc;
-  while (1) {
-    if (current_lc->cmd == 0) return;
-    if (current_lc->cmd == LC_UUID) {
-      const uuid_command *uuid_lc = (const uuid_command *)current_lc;
-      const uint8_t *uuid = &uuid_lc->uuid[0];
-      internal_memcpy(uuid_output, uuid, kModuleUUIDSize);
-      return;
-    }
+static const load_command *NextCommand(const load_command *lc) {
+  return (const load_command *)((char *)lc + lc->cmdsize);
+}
 
-    current_lc =
-        (const load_command *)(((char *)current_lc) + current_lc->cmdsize);
+static void FindUUID(const load_command *first_lc, u8 *uuid_output) {
+  for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) {
+    if (lc->cmd != LC_UUID) continue;
+
+    const uuid_command *uuid_lc = (const uuid_command *)lc;
+    const uint8_t *uuid = &uuid_lc->uuid[0];
+    internal_memcpy(uuid_output, uuid, kModuleUUIDSize);
+    return;
   }
 }
 
+static bool IsModuleInstrumented(const load_command *first_lc) {
+  for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) {
+    if (lc->cmd != LC_LOAD_DYLIB) continue;
+
+    const dylib_command *dylib_lc = (const dylib_command *)lc;
+    uint32_t dylib_name_offset = dylib_lc->dylib.name.offset;
+    const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset;
+    dylib_name = StripModuleName(dylib_name);
+    if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt."))) {
+      return true;
+    }
+  }
+  return false;
+}
+
 bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
                                char filename[], uptr filename_size,
                                uptr *protection, ModuleArch *arch, u8 *uuid) {
@@ -179,10 +207,11 @@
           continue;
         }
       }
+      FindUUID((const load_command *)current_load_cmd_addr_, &current_uuid_[0]);
+      current_instrumented_ =
+          IsModuleInstrumented((const load_command *)current_load_cmd_addr_);
     }
 
-    FindUUID((const load_command *)current_load_cmd_addr_, &current_uuid_[0]);
-
     for (; current_load_cmd_count_ >= 0; current_load_cmd_count_--) {
       switch (current_magic_) {
         // current_magic_ may be only one of MH_MAGIC, MH_MAGIC_64.
@@ -230,7 +259,8 @@
     } else {
       modules->push_back(LoadedModule());
       cur_module = &modules->back();
-      cur_module->set(cur_name, cur_beg, cur_arch, cur_uuid);
+      cur_module->set(cur_name, cur_beg, cur_arch, cur_uuid,
+                      current_instrumented_);
     }
     cur_module->addAddressRange(cur_beg, cur_end, prot & kProtectionExecute);
   }
diff --git a/lib/sanitizer_common/sanitizer_quarantine.h b/lib/sanitizer_common/sanitizer_quarantine.h
index ccc22bf..db38867 100644
--- a/lib/sanitizer_common/sanitizer_quarantine.h
+++ b/lib/sanitizer_common/sanitizer_quarantine.h
@@ -31,6 +31,40 @@
   uptr size;
   uptr count;
   void *batch[kSize];
+
+  void init(void *ptr, uptr size) {
+    count = 1;
+    batch[0] = ptr;
+    this->size = size + sizeof(QuarantineBatch);  // Account for the batch size.
+  }
+
+  // The total size of quarantined nodes recorded in this batch.
+  uptr quarantined_size() const {
+    return size - sizeof(QuarantineBatch);
+  }
+
+  void push_back(void *ptr, uptr size) {
+    CHECK_LT(count, kSize);
+    batch[count++] = ptr;
+    this->size += size;
+  }
+
+  bool can_merge(const QuarantineBatch* const from) const {
+    return count + from->count <= kSize;
+  }
+
+  void merge(QuarantineBatch* const from) {
+    CHECK_LE(count + from->count, kSize);
+    CHECK_GE(size, sizeof(QuarantineBatch));
+
+    for (uptr i = 0; i < from->count; ++i)
+      batch[count + i] = from->batch[i];
+    count += from->count;
+    size += from->quarantined_size();
+
+    from->count = 0;
+    from->size = sizeof(QuarantineBatch);
+  }
 };
 
 COMPILER_CHECK(sizeof(QuarantineBatch) <= (1 << 13));  // 8Kb.
@@ -49,17 +83,31 @@
   }
 
   void Init(uptr size, uptr cache_size) {
-    atomic_store(&max_size_, size, memory_order_release);
+    // Thread local quarantine size can be zero only when global quarantine size
+    // is zero (it allows us to perform just one atomic read per Put() call).
+    CHECK((size == 0 && cache_size == 0) || cache_size != 0);
+
+    atomic_store(&max_size_, size, memory_order_relaxed);
     atomic_store(&min_size_, size / 10 * 9,
-                 memory_order_release); // 90% of max size.
-    max_cache_size_ = cache_size;
+                 memory_order_relaxed);  // 90% of max size.
+    atomic_store(&max_cache_size_, cache_size, memory_order_relaxed);
   }
 
-  uptr GetSize() const { return atomic_load(&max_size_, memory_order_acquire); }
+  uptr GetSize() const { return atomic_load(&max_size_, memory_order_relaxed); }
+  uptr GetCacheSize() const {
+    return atomic_load(&max_cache_size_, memory_order_relaxed);
+  }
 
   void Put(Cache *c, Callback cb, Node *ptr, uptr size) {
-    c->Enqueue(cb, ptr, size);
-    if (c->Size() > max_cache_size_)
+    uptr cache_size = GetCacheSize();
+    if (cache_size) {
+      c->Enqueue(cb, ptr, size);
+    } else {
+      // GetCacheSize() == 0 only when GetSize() == 0 (see Init).
+      cb.Recycle(ptr);
+    }
+    // Check cache size anyway to accommodate for runtime cache_size change.
+    if (c->Size() > cache_size)
       Drain(c, cb);
   }
 
@@ -72,12 +120,19 @@
       Recycle(cb);
   }
 
+  void PrintStats() const {
+    // It assumes that the world is stopped, just as the allocator's PrintStats.
+    Printf("Quarantine limits: global: %zdMb; thread local: %zdKb\n",
+           GetSize() >> 20, GetCacheSize() >> 10);
+    cache_.PrintStats();
+  }
+
  private:
   // Read-only data.
   char pad0_[kCacheLineSize];
   atomic_uintptr_t max_size_;
   atomic_uintptr_t min_size_;
-  uptr max_cache_size_;
+  atomic_uintptr_t max_cache_size_;
   char pad1_[kCacheLineSize];
   SpinMutex cache_mutex_;
   SpinMutex recycle_mutex_;
@@ -86,12 +141,30 @@
 
   void NOINLINE Recycle(Callback cb) {
     Cache tmp;
-    uptr min_size = atomic_load(&min_size_, memory_order_acquire);
+    uptr min_size = atomic_load(&min_size_, memory_order_relaxed);
     {
       SpinMutexLock l(&cache_mutex_);
+      // Go over the batches and merge partially filled ones to
+      // save some memory, otherwise batches themselves (since the memory used
+      // by them is counted against quarantine limit) can overcome the actual
+      // user's quarantined chunks, which diminishes the purpose of the
+      // quarantine.
+      uptr cache_size = cache_.Size();
+      uptr overhead_size = cache_.OverheadSize();
+      CHECK_GE(cache_size, overhead_size);
+      // Do the merge only when overhead exceeds this predefined limit (might
+      // require some tuning). It saves us merge attempt when the batch list
+      // quarantine is unlikely to contain batches suitable for merge.
+      const uptr kOverheadThresholdPercents = 100;
+      if (cache_size > overhead_size &&
+          overhead_size * (100 + kOverheadThresholdPercents) >
+              cache_size * kOverheadThresholdPercents) {
+        cache_.MergeBatches(&tmp);
+      }
+      // Extract enough chunks from the quarantine to get below the max
+      // quarantine size and leave some leeway for the newly quarantined chunks.
       while (cache_.Size() > min_size) {
-        QuarantineBatch *b = cache_.DequeueBatch();
-        tmp.EnqueueBatch(b);
+        tmp.EnqueueBatch(cache_.DequeueBatch());
       }
     }
     recycle_mutex_.Unlock();
@@ -126,26 +199,33 @@
     list_.clear();
   }
 
+  // Total memory used, including internal accounting.
   uptr Size() const {
     return atomic_load(&size_, memory_order_relaxed);
   }
 
-  void Enqueue(Callback cb, void *ptr, uptr size) {
-    if (list_.empty() || list_.back()->count == QuarantineBatch::kSize) {
-      AllocBatch(cb);
-      size += sizeof(QuarantineBatch);  // Count the batch in Quarantine size.
-    }
-    QuarantineBatch *b = list_.back();
-    CHECK(b);
-    b->batch[b->count++] = ptr;
-    b->size += size;
-    SizeAdd(size);
+  // Memory used for internal accounting.
+  uptr OverheadSize() const {
+    return list_.size() * sizeof(QuarantineBatch);
   }
 
-  void Transfer(QuarantineCache *c) {
-    list_.append_back(&c->list_);
-    SizeAdd(c->Size());
-    atomic_store(&c->size_, 0, memory_order_relaxed);
+  void Enqueue(Callback cb, void *ptr, uptr size) {
+    if (list_.empty() || list_.back()->count == QuarantineBatch::kSize) {
+      QuarantineBatch *b = (QuarantineBatch *)cb.Allocate(sizeof(*b));
+      CHECK(b);
+      b->init(ptr, size);
+      EnqueueBatch(b);
+    } else {
+      list_.back()->push_back(ptr, size);
+      SizeAdd(size);
+    }
+  }
+
+  void Transfer(QuarantineCache *from_cache) {
+    list_.append_back(&from_cache->list_);
+    SizeAdd(from_cache->Size());
+
+    atomic_store(&from_cache->size_, 0, memory_order_relaxed);
   }
 
   void EnqueueBatch(QuarantineBatch *b) {
@@ -162,8 +242,57 @@
     return b;
   }
 
+  void MergeBatches(QuarantineCache *to_deallocate) {
+    uptr extracted_size = 0;
+    QuarantineBatch *current = list_.front();
+    while (current && current->next) {
+      if (current->can_merge(current->next)) {
+        QuarantineBatch *extracted = current->next;
+        // Move all the chunks into the current batch.
+        current->merge(extracted);
+        CHECK_EQ(extracted->count, 0);
+        CHECK_EQ(extracted->size, sizeof(QuarantineBatch));
+        // Remove the next batch from the list and account for its size.
+        list_.extract(current, extracted);
+        extracted_size += extracted->size;
+        // Add it to deallocation list.
+        to_deallocate->EnqueueBatch(extracted);
+      } else {
+        current = current->next;
+      }
+    }
+    SizeSub(extracted_size);
+  }
+
+  void PrintStats() const {
+    uptr batch_count = 0;
+    uptr total_overhead_bytes = 0;
+    uptr total_bytes = 0;
+    uptr total_quarantine_chunks = 0;
+    for (List::ConstIterator it = list_.begin(); it != list_.end(); ++it) {
+      batch_count++;
+      total_bytes += (*it).size;
+      total_overhead_bytes += (*it).size - (*it).quarantined_size();
+      total_quarantine_chunks += (*it).count;
+    }
+    uptr quarantine_chunks_capacity = batch_count * QuarantineBatch::kSize;
+    int chunks_usage_percent = quarantine_chunks_capacity == 0 ?
+        0 : total_quarantine_chunks * 100 / quarantine_chunks_capacity;
+    uptr total_quarantined_bytes = total_bytes - total_overhead_bytes;
+    int memory_overhead_percent = total_quarantined_bytes == 0 ?
+        0 : total_overhead_bytes * 100 / total_quarantined_bytes;
+    Printf("Global quarantine stats: batches: %zd; bytes: %zd (user: %zd); "
+           "chunks: %zd (capacity: %zd); %d%% chunks used; %d%% memory overhead"
+           "\n",
+           batch_count, total_bytes, total_quarantined_bytes,
+           total_quarantine_chunks, quarantine_chunks_capacity,
+           chunks_usage_percent, memory_overhead_percent);
+  }
+
  private:
-  IntrusiveList<QuarantineBatch> list_;
+  typedef IntrusiveList<QuarantineBatch> List;
+
+  List list_;
   atomic_uintptr_t size_;
 
   void SizeAdd(uptr add) {
@@ -172,16 +301,8 @@
   void SizeSub(uptr sub) {
     atomic_store(&size_, Size() - sub, memory_order_relaxed);
   }
-
-  NOINLINE QuarantineBatch* AllocBatch(Callback cb) {
-    QuarantineBatch *b = (QuarantineBatch *)cb.Allocate(sizeof(*b));
-    CHECK(b);
-    b->count = 0;
-    b->size = 0;
-    list_.push_back(b);
-    return b;
-  }
 };
+
 } // namespace __sanitizer
 
 #endif // SANITIZER_QUARANTINE_H
diff --git a/lib/sanitizer_common/sanitizer_stacktrace.cc b/lib/sanitizer_common/sanitizer_stacktrace.cc
index 7ad1f1f..2741dde 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace.cc
+++ b/lib/sanitizer_common/sanitizer_stacktrace.cc
@@ -106,10 +106,6 @@
   }
 }
 
-static bool MatchPc(uptr cur_pc, uptr trace_pc, uptr threshold) {
-  return cur_pc - trace_pc <= threshold || trace_pc - cur_pc <= threshold;
-}
-
 void BufferedStackTrace::PopStackFrames(uptr count) {
   CHECK_LT(count, size);
   size -= count;
@@ -118,15 +114,14 @@
   }
 }
 
+static uptr Distance(uptr a, uptr b) { return a < b ? b - a : a - b; }
+
 uptr BufferedStackTrace::LocatePcInTrace(uptr pc) {
-  // Use threshold to find PC in stack trace, as PC we want to unwind from may
-  // slightly differ from return address in the actual unwinded stack trace.
-  const int kPcThreshold = 350;
-  for (uptr i = 0; i < size; ++i) {
-    if (MatchPc(pc, trace[i], kPcThreshold))
-      return i;
+  uptr best = 0;
+  for (uptr i = 1; i < size; ++i) {
+    if (Distance(trace[i], pc) < Distance(trace[best], pc)) best = i;
   }
-  return 0;
+  return best;
 }
 
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_stacktrace_printer.cc b/lib/sanitizer_common/sanitizer_stacktrace_printer.cc
index 6fba581..377f1ce 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace_printer.cc
+++ b/lib/sanitizer_common/sanitizer_stacktrace_printer.cc
@@ -93,7 +93,7 @@
                              vs_style, strip_path_prefix);
       } else if (info.module) {
         RenderModuleLocation(buffer, info.module, info.module_offset,
-                             strip_path_prefix);
+                             info.module_arch, strip_path_prefix);
       } else {
         buffer->append("(<unknown module>)");
       }
@@ -103,8 +103,9 @@
       if (info.address & kExternalPCBit)
         {} // There PCs are not meaningful.
       else if (info.module)
-        buffer->append("(%s+%p)", StripModuleName(info.module),
-                       (void *)info.module_offset);
+        // Always strip the module name for %M.
+        RenderModuleLocation(buffer, StripModuleName(info.module),
+                             info.module_offset, info.module_arch, "");
       else
         buffer->append("(%p)", (void *)info.address);
       break;
@@ -165,9 +166,13 @@
 }
 
 void RenderModuleLocation(InternalScopedString *buffer, const char *module,
-                          uptr offset, const char *strip_path_prefix) {
-  buffer->append("(%s+0x%zx)", StripPathPrefix(module, strip_path_prefix),
-                 offset);
+                          uptr offset, ModuleArch arch,
+                          const char *strip_path_prefix) {
+  buffer->append("(%s", StripPathPrefix(module, strip_path_prefix));
+  if (arch != kModuleArchUnknown) {
+    buffer->append(":%s", ModuleArchToString(arch));
+  }
+  buffer->append("+0x%zx)", offset);
 }
 
 } // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_stacktrace_printer.h b/lib/sanitizer_common/sanitizer_stacktrace_printer.h
index 7be1d19..ce85bd7 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace_printer.h
+++ b/lib/sanitizer_common/sanitizer_stacktrace_printer.h
@@ -57,7 +57,8 @@
                           const char *strip_path_prefix);
 
 void RenderModuleLocation(InternalScopedString *buffer, const char *module,
-                          uptr offset, const char *strip_path_prefix);
+                          uptr offset, ModuleArch arch,
+                          const char *strip_path_prefix);
 
 // Same as RenderFrame, but for data section (global variables).
 // Accepts %s, %l from above.
diff --git a/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
index eb4c403..ce8873b 100644
--- a/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
@@ -16,7 +16,7 @@
 
 #if SANITIZER_LINUX && (defined(__x86_64__) || defined(__mips__) || \
                         defined(__aarch64__) || defined(__powerpc64__) || \
-                        defined(__s390__))
+                        defined(__s390__) || defined(__i386__))
 
 #include "sanitizer_stoptheworld.h"
 
@@ -528,4 +528,4 @@
 
 #endif  // SANITIZER_LINUX && (defined(__x86_64__) || defined(__mips__)
         // || defined(__aarch64__) || defined(__powerpc64__)
-        // || defined(__s390__)
+        // || defined(__s390__) || defined(__i386__)
diff --git a/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc b/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc
new file mode 100644
index 0000000..4cccc29
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc
@@ -0,0 +1,38 @@
+//===-- sanitizer_stoptheworld_mac.cc -------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// See sanitizer_stoptheworld.h for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_platform.h"
+
+#if SANITIZER_MAC && (defined(__x86_64__) || defined(__aarch64__))
+
+#include "sanitizer_stoptheworld.h"
+
+namespace __sanitizer {
+void StopTheWorld(StopTheWorldCallback callback, void *argument) {
+  CHECK(0 && "unimplemented");
+}
+
+int SuspendedThreadsList::GetRegistersAndSP(uptr index,
+                                            uptr *buffer,
+                                            uptr *sp) const {
+  CHECK(0 && "unimplemented");
+  return 0;
+}
+
+uptr SuspendedThreadsList::RegisterCount() {
+  CHECK(0 && "unimplemented");
+  return 0;
+}
+} // namespace __sanitizer
+
+#endif  // SANITIZER_MAC && (defined(__x86_64__) || defined(__aarch64__))
diff --git a/lib/sanitizer_common/sanitizer_symbolizer.cc b/lib/sanitizer_common/sanitizer_symbolizer.cc
index 534e55f..1cd5b6e 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer.cc
@@ -33,9 +33,11 @@
   function_offset = kUnknown;
 }
 
-void AddressInfo::FillModuleInfo(const char *mod_name, uptr mod_offset) {
+void AddressInfo::FillModuleInfo(const char *mod_name, uptr mod_offset,
+                                 ModuleArch mod_arch) {
   module = internal_strdup(mod_name);
   module_offset = mod_offset;
+  module_arch = mod_arch;
 }
 
 SymbolizedStack::SymbolizedStack() : next(nullptr), info() {}
diff --git a/lib/sanitizer_common/sanitizer_symbolizer.h b/lib/sanitizer_common/sanitizer_symbolizer.h
index 572f1dd..4fc7742 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer.h
+++ b/lib/sanitizer_common/sanitizer_symbolizer.h
@@ -31,6 +31,7 @@
 
   char *module;
   uptr module_offset;
+  ModuleArch module_arch;
 
   static const uptr kUnknown = ~(uptr)0;
   char *function;
@@ -43,7 +44,7 @@
   AddressInfo();
   // Deletes all strings and resets all fields.
   void Clear();
-  void FillModuleInfo(const char *mod_name, uptr mod_offset);
+  void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch);
 };
 
 // Linked list of symbolized frames (each frame is described by AddressInfo).
@@ -65,6 +66,8 @@
   // (de)allocated using sanitizer internal allocator.
   char *module;
   uptr module_offset;
+  ModuleArch module_arch;
+
   char *file;
   uptr line;
   char *name;
@@ -143,7 +146,8 @@
   static Symbolizer *PlatformInit();
 
   bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
-                                         uptr *module_offset);
+                                         uptr *module_offset,
+                                         ModuleArch *module_arch);
   ListOfModules modules_;
   // If stale, need to reload the modules before looking up addresses.
   bool modules_fresh_;
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_internal.h b/lib/sanitizer_common/sanitizer_symbolizer_internal.h
index ada059c..2ae42b3 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_internal.h
+++ b/lib/sanitizer_common/sanitizer_symbolizer_internal.h
@@ -124,8 +124,8 @@
   bool SymbolizeData(uptr addr, DataInfo *info) override;
 
  private:
-  const char *SendCommand(bool is_data, const char *module_name,
-                          uptr module_offset);
+  const char *FormatAndSendCommand(bool is_data, const char *module_name,
+                                   uptr module_offset, ModuleArch arch);
 
   LLVMSymbolizerProcess *symbolizer_process_;
   static const uptr kBufferSize = 16 * 1024;
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc b/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc
index 31506fe..614470a 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc
@@ -64,11 +64,13 @@
   BlockingMutexLock l(&mu_);
   const char *module_name;
   uptr module_offset;
+  ModuleArch arch;
   SymbolizedStack *res = SymbolizedStack::New(addr);
-  if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset))
+  if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
+                                         &arch))
     return res;
   // Always fill data about module name and offset.
-  res->info.FillModuleInfo(module_name, module_offset);
+  res->info.FillModuleInfo(module_name, module_offset, arch);
   for (auto &tool : tools_) {
     SymbolizerScope sym_scope(this);
     if (tool.SymbolizePC(addr, res)) {
@@ -82,11 +84,14 @@
   BlockingMutexLock l(&mu_);
   const char *module_name;
   uptr module_offset;
-  if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset))
+  ModuleArch arch;
+  if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
+                                         &arch))
     return false;
   info->Clear();
   info->module = internal_strdup(module_name);
   info->module_offset = module_offset;
+  info->module_arch = arch;
   for (auto &tool : tools_) {
     SymbolizerScope sym_scope(this);
     if (tool.SymbolizeData(addr, info)) {
@@ -100,8 +105,9 @@
                                              uptr *module_address) {
   BlockingMutexLock l(&mu_);
   const char *internal_module_name = nullptr;
+  ModuleArch arch;
   if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
-                                         module_address))
+                                         module_address, &arch))
     return false;
 
   if (module_name)
@@ -134,12 +140,14 @@
 
 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
                                                    const char **module_name,
-                                                   uptr *module_offset) {
+                                                   uptr *module_offset,
+                                                   ModuleArch *module_arch) {
   const LoadedModule *module = FindModuleForAddress(address);
   if (module == nullptr)
     return false;
   *module_name = module->full_name();
   *module_offset = address - module->base_address();
+  *module_arch = module->arch();
   return true;
 }
 
@@ -197,6 +205,8 @@
            buffer[length - 2] == '\n';
   }
 
+  // When adding a new architecture, don't forget to also update
+  // script/asan_symbolize.py and sanitizer_common.h.
   void GetArgV(const char *path_to_binary,
                const char *(&argv)[kArgVMax]) const override {
 #if defined(__x86_64h__)
@@ -284,7 +294,8 @@
       top_frame = false;
     } else {
       cur = SymbolizedStack::New(res->info.address);
-      cur->info.FillModuleInfo(res->info.module, res->info.module_offset);
+      cur->info.FillModuleInfo(res->info.module, res->info.module_offset,
+                               res->info.module_arch);
       last->next = cur;
       last = cur;
     }
@@ -317,8 +328,10 @@
 }
 
 bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
-  if (const char *buf = SendCommand(/*is_data*/ false, stack->info.module,
-                                    stack->info.module_offset)) {
+  AddressInfo *info = &stack->info;
+  const char *buf = FormatAndSendCommand(
+      /*is_data*/ false, info->module, info->module_offset, info->module_arch);
+  if (buf) {
     ParseSymbolizePCOutput(buf, stack);
     return true;
   }
@@ -326,8 +339,9 @@
 }
 
 bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
-  if (const char *buf =
-          SendCommand(/*is_data*/ true, info->module, info->module_offset)) {
+  const char *buf = FormatAndSendCommand(
+      /*is_data*/ true, info->module, info->module_offset, info->module_arch);
+  if (buf) {
     ParseSymbolizeDataOutput(buf, info);
     info->start += (addr - info->module_offset); // Add the base address.
     return true;
@@ -335,11 +349,27 @@
   return false;
 }
 
-const char *LLVMSymbolizer::SendCommand(bool is_data, const char *module_name,
-                                        uptr module_offset) {
+const char *LLVMSymbolizer::FormatAndSendCommand(bool is_data,
+                                                 const char *module_name,
+                                                 uptr module_offset,
+                                                 ModuleArch arch) {
   CHECK(module_name);
-  internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
-                    is_data ? "DATA " : "", module_name, module_offset);
+  const char *is_data_str = is_data ? "DATA " : "";
+  if (arch == kModuleArchUnknown) {
+    if (internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", is_data_str,
+                          module_name,
+                          module_offset) >= static_cast<int>(kBufferSize)) {
+      Report("WARNING: Command buffer too small");
+      return nullptr;
+    }
+  } else {
+    if (internal_snprintf(buffer_, kBufferSize, "%s\"%s:%s\" 0x%zx\n",
+                          is_data_str, module_name, ModuleArchToString(arch),
+                          module_offset) >= static_cast<int>(kBufferSize)) {
+      Report("WARNING: Command buffer too small");
+      return nullptr;
+    }
+  }
   return symbolizer_process_->SendCommand(buffer_);
 }
 
@@ -355,7 +385,23 @@
   CHECK_NE(path_[0], '\0');
 }
 
+static bool IsSameModule(const char* path) {
+  if (const char* ProcessName = GetProcessName()) {
+    if (const char* SymbolizerName = StripModuleName(path)) {
+      return !internal_strcmp(ProcessName, SymbolizerName);
+    }
+  }
+  return false;
+}
+
 const char *SymbolizerProcess::SendCommand(const char *command) {
+  if (failed_to_start_)
+    return nullptr;
+  if (IsSameModule(path_)) {
+    Report("WARNING: Symbolizer was blocked from starting itself!\n");
+    failed_to_start_ = true;
+    return nullptr;
+  }
   for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
     // Start or restart symbolizer if we failed to send command to it.
     if (const char *res = SendCommandImpl(command))
@@ -404,6 +450,11 @@
     read_len += just_read;
     if (ReachedEndOfOutput(buffer, read_len))
       break;
+    if (read_len + 1 == max_length) {
+      Report("WARNING: Symbolizer buffer too small");
+      read_len = 0;
+      break;
+    }
   }
   buffer[read_len] = '\0';
   return true;
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc b/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc
index f50d8b1..d3c77b5 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc
@@ -424,7 +424,6 @@
   InternalSymbolizer() { }
 
   static const int kBufferSize = 16 * 1024;
-  static const int kMaxDemangledNameSize = 1024;
   char buffer_[kBufferSize];
 };
 #else  // SANITIZER_SUPPORTS_WEAK_HOOKS
diff --git a/lib/sanitizer_common/sanitizer_thread_registry.cc b/lib/sanitizer_common/sanitizer_thread_registry.cc
index c2b75e6..c5b2e09 100644
--- a/lib/sanitizer_common/sanitizer_thread_registry.cc
+++ b/lib/sanitizer_common/sanitizer_thread_registry.cc
@@ -19,7 +19,7 @@
 ThreadContextBase::ThreadContextBase(u32 tid)
     : tid(tid), unique_id(0), reuse_count(), os_id(0), user_id(0),
       status(ThreadStatusInvalid),
-      detached(false), parent_tid(0), next(0) {
+      detached(false), workerthread(false), parent_tid(0), next(0) {
   name[0] = '\0';
 }
 
@@ -59,9 +59,10 @@
   OnFinished();
 }
 
-void ThreadContextBase::SetStarted(uptr _os_id, void *arg) {
+void ThreadContextBase::SetStarted(uptr _os_id, bool _workerthread, void *arg) {
   status = ThreadStatusRunning;
   os_id = _os_id;
+  workerthread = _workerthread;
   OnStarted(arg);
 }
 
@@ -266,14 +267,15 @@
   }
 }
 
-void ThreadRegistry::StartThread(u32 tid, uptr os_id, void *arg) {
+void ThreadRegistry::StartThread(u32 tid, uptr os_id, bool workerthread,
+                                 void *arg) {
   BlockingMutexLock l(&mtx_);
   running_threads_++;
   CHECK_LT(tid, n_contexts_);
   ThreadContextBase *tctx = threads_[tid];
   CHECK_NE(tctx, 0);
   CHECK_EQ(ThreadStatusCreated, tctx->status);
-  tctx->SetStarted(os_id, arg);
+  tctx->SetStarted(os_id, workerthread, arg);
 }
 
 void ThreadRegistry::QuarantinePush(ThreadContextBase *tctx) {
diff --git a/lib/sanitizer_common/sanitizer_thread_registry.h b/lib/sanitizer_common/sanitizer_thread_registry.h
index a27bbb3..17b1d5d 100644
--- a/lib/sanitizer_common/sanitizer_thread_registry.h
+++ b/lib/sanitizer_common/sanitizer_thread_registry.h
@@ -45,6 +45,7 @@
 
   ThreadStatus status;
   bool detached;
+  bool workerthread;
 
   u32 parent_tid;
   ThreadContextBase *next;  // For storing thread contexts in a list.
@@ -54,7 +55,7 @@
   void SetDead();
   void SetJoined(void *arg);
   void SetFinished();
-  void SetStarted(uptr _os_id, void *arg);
+  void SetStarted(uptr _os_id, bool _workerthread, void *arg);
   void SetCreated(uptr _user_id, u64 _unique_id, bool _detached,
                   u32 _parent_tid, void *arg);
   void Reset();
@@ -115,7 +116,7 @@
   void DetachThread(u32 tid, void *arg);
   void JoinThread(u32 tid, void *arg);
   void FinishThread(u32 tid);
-  void StartThread(u32 tid, uptr os_id, void *arg);
+  void StartThread(u32 tid, uptr os_id, bool workerthread, void *arg);
 
  private:
   const ThreadContextFactory context_factory_;
diff --git a/lib/sanitizer_common/sanitizer_win.cc b/lib/sanitizer_common/sanitizer_win.cc
index 0b7a38c..b1a2a53 100644
--- a/lib/sanitizer_common/sanitizer_win.cc
+++ b/lib/sanitizer_common/sanitizer_win.cc
@@ -30,6 +30,22 @@
 #include "sanitizer_procmaps.h"
 #include "sanitizer_stacktrace.h"
 #include "sanitizer_symbolizer.h"
+#include "sanitizer_win_defs.h"
+
+// A macro to tell the compiler that this part of the code cannot be reached,
+// if the compiler supports this feature. Since we're using this in
+// code that is called when terminating the process, the expansion of the
+// macro should not terminate the process to avoid infinite recursion.
+#if defined(__clang__)
+# define BUILTIN_UNREACHABLE() __builtin_unreachable()
+#elif defined(__GNUC__) && \
+    (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
+# define BUILTIN_UNREACHABLE() __builtin_unreachable()
+#elif defined(_MSC_VER)
+# define BUILTIN_UNREACHABLE() __assume(0)
+#else
+# define BUILTIN_UNREACHABLE()
+#endif
 
 namespace __sanitizer {
 
@@ -373,6 +389,8 @@
 }
 #endif
 
+void PrintModuleMap() { }
+
 void DisableCoreDumperIfNecessary() {
   // Do nothing.
 }
@@ -659,6 +677,7 @@
   if (::IsDebuggerPresent())
     __debugbreak();
   TerminateProcess(GetCurrentProcess(), exitcode);
+  BUILTIN_UNREACHABLE();
 }
 
 uptr internal_ftruncate(fd_t fd, uptr size) {
@@ -817,6 +836,59 @@
   return false;
 }
 
+// Check based on flags if we should handle this exception.
+bool IsHandledDeadlyException(DWORD exceptionCode) {
+  switch (exceptionCode) {
+    case EXCEPTION_ACCESS_VIOLATION:
+    case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
+    case EXCEPTION_STACK_OVERFLOW:
+    case EXCEPTION_DATATYPE_MISALIGNMENT:
+    case EXCEPTION_IN_PAGE_ERROR:
+      return common_flags()->handle_segv;
+    case EXCEPTION_ILLEGAL_INSTRUCTION:
+    case EXCEPTION_PRIV_INSTRUCTION:
+    case EXCEPTION_BREAKPOINT:
+      return common_flags()->handle_sigill;
+    case EXCEPTION_FLT_DENORMAL_OPERAND:
+    case EXCEPTION_FLT_DIVIDE_BY_ZERO:
+    case EXCEPTION_FLT_INEXACT_RESULT:
+    case EXCEPTION_FLT_INVALID_OPERATION:
+    case EXCEPTION_FLT_OVERFLOW:
+    case EXCEPTION_FLT_STACK_CHECK:
+    case EXCEPTION_FLT_UNDERFLOW:
+    case EXCEPTION_INT_DIVIDE_BY_ZERO:
+    case EXCEPTION_INT_OVERFLOW:
+      return common_flags()->handle_sigfpe;
+  }
+  return false;
+}
+
+const char *DescribeSignalOrException(int signo) {
+  unsigned code = signo;
+  // Get the string description of the exception if this is a known deadly
+  // exception.
+  switch (code) {
+    case EXCEPTION_ACCESS_VIOLATION: return "access-violation";
+    case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: return "array-bounds-exceeded";
+    case EXCEPTION_STACK_OVERFLOW: return "stack-overflow";
+    case EXCEPTION_DATATYPE_MISALIGNMENT: return "datatype-misalignment";
+    case EXCEPTION_IN_PAGE_ERROR: return "in-page-error";
+    case EXCEPTION_ILLEGAL_INSTRUCTION: return "illegal-instruction";
+    case EXCEPTION_PRIV_INSTRUCTION: return "priv-instruction";
+    case EXCEPTION_BREAKPOINT: return "breakpoint";
+    case EXCEPTION_FLT_DENORMAL_OPERAND: return "flt-denormal-operand";
+    case EXCEPTION_FLT_DIVIDE_BY_ZERO: return "flt-divide-by-zero";
+    case EXCEPTION_FLT_INEXACT_RESULT: return "flt-inexact-result";
+    case EXCEPTION_FLT_INVALID_OPERATION: return "flt-invalid-operation";
+    case EXCEPTION_FLT_OVERFLOW: return "flt-overflow";
+    case EXCEPTION_FLT_STACK_CHECK: return "flt-stack-check";
+    case EXCEPTION_FLT_UNDERFLOW: return "flt-underflow";
+    case EXCEPTION_INT_DIVIDE_BY_ZERO: return "int-divide-by-zero";
+    case EXCEPTION_INT_OVERFLOW: return "int-overflow";
+  }
+  return "unknown exception";
+}
+
 bool IsAccessibleMemoryRange(uptr beg, uptr size) {
   SYSTEM_INFO si;
   GetNativeSystemInfo(&si);
@@ -918,21 +990,10 @@
 // FIXME implement on this platform.
 void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) { }
 
+void CheckNoDeepBind(const char *filename, int flag) {
+  // Do nothing.
+}
 
 }  // namespace __sanitizer
 
-#if !SANITIZER_GO
-// Workaround to implement weak hooks on Windows. COFF doesn't directly support
-// weak symbols, but it does support /alternatename, which is similar. If the
-// user does not override the hook, we will use this default definition instead
-// of null.
-extern "C" void __sanitizer_print_memory_profile(int top_percent) {}
-
-#ifdef _WIN64
-#pragma comment(linker, "/alternatename:__sanitizer_print_memory_profile=__sanitizer_default_print_memory_profile") // NOLINT
-#else
-#pragma comment(linker, "/alternatename:___sanitizer_print_memory_profile=___sanitizer_default_print_memory_profile") // NOLINT
-#endif
-#endif
-
 #endif  // _WIN32
diff --git a/lib/sanitizer_common/sanitizer_win.h b/lib/sanitizer_common/sanitizer_win.h
new file mode 100644
index 0000000..23e01ab
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_win.h
@@ -0,0 +1,26 @@
+//===-- sanitizer_win.h -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Windows-specific declarations.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_WIN_H
+#define SANITIZER_WIN_H
+
+#include "sanitizer_platform.h"
+#if SANITIZER_WINDOWS
+#include "sanitizer_internal_defs.h"
+
+namespace __sanitizer {
+// Check based on flags if we should handle the exception.
+bool IsHandledDeadlyException(DWORD exceptionCode);
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_WINDOWS
+#endif  // SANITIZER_WIN_H
diff --git a/lib/sanitizer_common/sanitizer_win_defs.h b/lib/sanitizer_common/sanitizer_win_defs.h
new file mode 100644
index 0000000..077ff9c
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_win_defs.h
@@ -0,0 +1,153 @@
+//===-- sanitizer_win_defs.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common definitions for Windows-specific code.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_WIN_DEFS_H
+#define SANITIZER_WIN_DEFS_H
+
+#include "sanitizer_platform.h"
+#if SANITIZER_WINDOWS
+
+#ifndef WINAPI
+#ifdef _M_IX86
+#define WINAPI __stdcall
+#else
+#define WINAPI
+#endif
+#endif
+
+#if defined(_WIN64)
+#define WIN_SYM_PREFIX
+#else
+#define WIN_SYM_PREFIX "_"
+#endif
+
+// Intermediate macro to ensure the parameter is expanded before stringified.
+#define STRINGIFY_(A) #A
+#define STRINGIFY(A) STRINGIFY_(A)
+
+// ----------------- A workaround for the absence of weak symbols --------------
+// We don't have a direct equivalent of weak symbols when using MSVC, but we can
+// use the /alternatename directive to tell the linker to default a specific
+// symbol to a specific value.
+// Take into account that this is a pragma directive for the linker, so it will
+// be ignored by the compiler and the function will be marked as UNDEF in the
+// symbol table of the resulting object file. The linker won't find the default
+// implementation until it links with that object file.
+// So, suppose we provide a default implementation "fundef" for "fun", and this
+// is compiled into the object file "test.obj" including the pragma directive.
+// If we have some code with references to "fun" and we link that code with
+// "test.obj", it will work because the linker always link object files.
+// But, if "test.obj" is included in a static library, like "test.lib", then the
+// liker will only link to "test.obj" if necessary. If we only included the
+// definition of "fun", it won't link to "test.obj" (from test.lib) because
+// "fun" appears as UNDEF, so it doesn't resolve the symbol "fun", and will
+// result in a link error (the linker doesn't find the pragma directive).
+// So, a workaround is to force linkage with the modules that include weak
+// definitions, with the following macro: WIN_FORCE_LINK()
+
+#define WIN_WEAK_ALIAS(Name, Default)                                          \
+  __pragma(comment(linker, "/alternatename:" WIN_SYM_PREFIX STRINGIFY(Name) "="\
+                                             WIN_SYM_PREFIX STRINGIFY(Default)))
+
+#define WIN_FORCE_LINK(Name)                                                   \
+  __pragma(comment(linker, "/include:" WIN_SYM_PREFIX STRINGIFY(Name)))
+
+#define WIN_EXPORT(ExportedName, Name)                                         \
+  __pragma(comment(linker, "/export:" WIN_SYM_PREFIX STRINGIFY(ExportedName)   \
+                                  "=" WIN_SYM_PREFIX STRINGIFY(Name)))
+
+// We cannot define weak functions on Windows, but we can use WIN_WEAK_ALIAS()
+// which defines an alias to a default implementation, and only works when
+// linking statically.
+// So, to define a weak function "fun", we define a default implementation with
+// a different name "fun__def" and we create a "weak alias" fun = fun__def.
+// Then, users can override it just defining "fun".
+// We impose "extern "C"" because otherwise WIN_WEAK_ALIAS() will fail because
+// of name mangling.
+
+// Dummy name for default implementation of weak function.
+# define WEAK_DEFAULT_NAME(Name) Name##__def
+// Name for exported implementation of weak function.
+# define WEAK_EXPORT_NAME(Name) Name##__dll
+
+// Use this macro when you need to define and export a weak function from a
+// library. For example:
+//   WIN_WEAK_EXPORT_DEF(bool, compare, int a, int b) { return a > b; }
+# define WIN_WEAK_EXPORT_DEF(ReturnType, Name, ...)                            \
+  WIN_WEAK_ALIAS(Name, WEAK_DEFAULT_NAME(Name))                                \
+  WIN_EXPORT(WEAK_EXPORT_NAME(Name), Name)                                     \
+  extern "C" ReturnType Name(__VA_ARGS__);                                     \
+  extern "C" ReturnType WEAK_DEFAULT_NAME(Name)(__VA_ARGS__)
+
+// Use this macro when you need to import a weak function from a library. It
+// defines a weak alias to the imported function from the dll. For example:
+//   WIN_WEAK_IMPORT_DEF(compare)
+# define WIN_WEAK_IMPORT_DEF(Name)                                             \
+  WIN_WEAK_ALIAS(Name, WEAK_EXPORT_NAME(Name))
+
+// So, for Windows we provide something similar to weak symbols in Linux, with
+// some differences:
+// + A default implementation must always be provided.
+//
+// + When linking statically it works quite similarly. For example:
+//
+//   // libExample.cc
+//   WIN_WEAK_EXPORT_DEF(bool, compare, int a, int b) { return a > b; }
+//
+//   // client.cc
+//   // We can use the default implementation from the library:
+//   compare(1, 2);
+//   // Or we can override it:
+//   extern "C" bool compare (int a, int b) { return a >= b; }
+//
+//  And it will work fine. If we don't override the function, we need to ensure
+//  that the linker includes the object file with the default implementation.
+//  We can do so with the linker option "-wholearchive:".
+//
+// + When linking dynamically with a library (dll), weak functions are exported
+//  with "__dll" suffix. Clients can use the macro WIN_WEAK_IMPORT_DEF(fun)
+//  which defines a "weak alias" fun = fun__dll.
+//
+//   // libExample.cc
+//   WIN_WEAK_EXPORT_DEF(bool, compare, int a, int b) { return a > b; }
+//
+//   // client.cc
+//   WIN_WEAK_IMPORT_DEF(compare)
+//   // We can use the default implementation from the library:
+//   compare(1, 2);
+//   // Or we can override it:
+//   extern "C" bool compare (int a, int b) { return a >= b; }
+//
+//  But if we override the function, the dlls don't have access to it (which
+//  is different in linux). If that is desired, the strong definition must be
+//  exported and interception can be used from the rest of the dlls.
+//
+//   // libExample.cc
+//   WIN_WEAK_EXPORT_DEF(bool, compare, int a, int b) { return a > b; }
+//   // When initialized, check if the main executable defined "compare".
+//   int libExample_init() {
+//     uptr fnptr = __interception::InternalGetProcAddress(
+//         (void *)GetModuleHandleA(0), "compare");
+//     if (fnptr && !__interception::OverrideFunction((uptr)compare, fnptr, 0))
+//       abort();
+//     return 0;
+//   }
+//
+//   // client.cc
+//   WIN_WEAK_IMPORT_DEF(compare)
+//   // We override and export compare:
+//   extern "C" __declspec(dllexport) bool compare (int a, int b) {
+//     return a >= b;
+//   }
+//
+#endif // SANITIZER_WINDOWS
+#endif // SANITIZER_WIN_DEFS_H
diff --git a/lib/sanitizer_common/sanitizer_win_dll_thunk.cc b/lib/sanitizer_common/sanitizer_win_dll_thunk.cc
new file mode 100644
index 0000000..4fb4650
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_win_dll_thunk.cc
@@ -0,0 +1,102 @@
+//===-- sanitizer_win_dll_thunk.cc ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file defines a family of thunks that should be statically linked into
+// the DLLs that have instrumentation in order to delegate the calls to the
+// shared runtime that lives in the main binary.
+// See https://github.com/google/sanitizers/issues/209 for the details.
+//===----------------------------------------------------------------------===//
+
+#ifdef SANITIZER_DLL_THUNK
+#include "sanitizer_win_defs.h"
+#include "sanitizer_win_dll_thunk.h"
+#include "interception/interception.h"
+
+extern "C" {
+void *WINAPI GetModuleHandleA(const char *module_name);
+void abort();
+}
+
+namespace __sanitizer {
+uptr dllThunkGetRealAddrOrDie(const char *name) {
+  uptr ret =
+      __interception::InternalGetProcAddress((void *)GetModuleHandleA(0), name);
+  if (!ret)
+    abort();
+  return ret;
+}
+
+int dllThunkIntercept(const char* main_function, uptr dll_function) {
+  uptr wrapper = dllThunkGetRealAddrOrDie(main_function);
+  if (!__interception::OverrideFunction(dll_function, wrapper, 0))
+    abort();
+  return 0;
+}
+
+int dllThunkInterceptWhenPossible(const char* main_function,
+    const char* default_function, uptr dll_function) {
+  uptr wrapper = __interception::InternalGetProcAddress(
+    (void *)GetModuleHandleA(0), main_function);
+  if (!wrapper)
+    wrapper = dllThunkGetRealAddrOrDie(default_function);
+  if (!__interception::OverrideFunction(dll_function, wrapper, 0))
+    abort();
+  return 0;
+}
+} // namespace __sanitizer
+
+// Include Sanitizer Common interface.
+#define INTERFACE_FUNCTION(Name) INTERCEPT_SANITIZER_FUNCTION(Name)
+#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
+#include "sanitizer_common_interface.inc"
+
+#pragma section(".DLLTH$A", read)  // NOLINT
+#pragma section(".DLLTH$Z", read)  // NOLINT
+
+typedef void (*DllThunkCB)();
+extern "C" {
+__declspec(allocate(".DLLTH$A")) DllThunkCB __start_dll_thunk;
+__declspec(allocate(".DLLTH$Z")) DllThunkCB __stop_dll_thunk;
+}
+
+// Disable compiler warnings that show up if we declare our own version
+// of a compiler intrinsic (e.g. strlen).
+#pragma warning(disable: 4391)
+#pragma warning(disable: 4392)
+
+extern "C" int __dll_thunk_init() {
+  static bool flag = false;
+  // __dll_thunk_init is expected to be called by only one thread.
+  if (flag) return 0;
+  flag = true;
+
+  for (DllThunkCB *it = &__start_dll_thunk; it < &__stop_dll_thunk; ++it)
+    if (*it)
+      (*it)();
+
+  // In DLLs, the callbacks are expected to return 0,
+  // otherwise CRT initialization fails.
+  return 0;
+}
+
+// We want to call dll_thunk_init before C/C++ initializers / constructors are
+// executed, otherwise functions like memset might be invoked.
+#pragma section(".CRT$XIB", long, read)  // NOLINT
+__declspec(allocate(".CRT$XIB")) int (*__dll_thunk_preinit)() =
+    __dll_thunk_init;
+
+static void WINAPI dll_thunk_thread_init(void *mod, unsigned long reason,
+                                         void *reserved) {
+  if (reason == /*DLL_PROCESS_ATTACH=*/1) __dll_thunk_init();
+}
+
+#pragma section(".CRT$XLAB", long, read)  // NOLINT
+__declspec(allocate(".CRT$XLAB")) void (WINAPI *__dll_thunk_tls_init)(void *,
+    unsigned long, void *) = dll_thunk_thread_init;
+
+#endif // SANITIZER_DLL_THUNK
diff --git a/lib/sanitizer_common/sanitizer_win_dll_thunk.h b/lib/sanitizer_common/sanitizer_win_dll_thunk.h
new file mode 100644
index 0000000..2f9ebda
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_win_dll_thunk.h
@@ -0,0 +1,182 @@
+//===-- sanitizer_win_dll_thunk.h -----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This header provide helper macros to delegate calls to the shared runtime
+// that lives in the main executable. It should be included to dll_thunks that
+// will be linked to the dlls, when the sanitizer is a static library included
+// in the main executable.
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_WIN_DLL_THUNK_H
+#define SANITIZER_WIN_DLL_THUNK_H
+#include "sanitizer_internal_defs.h"
+
+namespace __sanitizer {
+uptr dllThunkGetRealAddrOrDie(const char *name);
+
+int dllThunkIntercept(const char* main_function, uptr dll_function);
+
+int dllThunkInterceptWhenPossible(const char* main_function,
+    const char* default_function, uptr dll_function);
+}
+
+extern "C" int __dll_thunk_init();
+
+// ----------------- Function interception helper macros -------------------- //
+// Override dll_function with main_function from main executable.
+#define INTERCEPT_OR_DIE(main_function, dll_function)                          \
+  static int intercept_##dll_function() {                                      \
+    return __sanitizer::dllThunkIntercept(main_function, (__sanitizer::uptr)   \
+        dll_function);                                                         \
+  }                                                                            \
+  __pragma(section(".DLLTH$M", long, read))                                    \
+  __declspec(allocate(".DLLTH$M")) int (*__dll_thunk_##dll_function)() =       \
+    intercept_##dll_function;
+
+// Try to override dll_function with main_function from main executable.
+// If main_function is not present, override dll_function with default_function.
+#define INTERCEPT_WHEN_POSSIBLE(main_function, default_function, dll_function) \
+  static int intercept_##dll_function() {                                      \
+    return __sanitizer::dllThunkInterceptWhenPossible(main_function,           \
+        default_function, (__sanitizer::uptr)dll_function);                    \
+  }                                                                            \
+  __pragma(section(".DLLTH$M", long, read))                                    \
+  __declspec(allocate(".DLLTH$M")) int (*__dll_thunk_##dll_function)() =       \
+    intercept_##dll_function;
+
+// -------------------- Function interception macros ------------------------ //
+// Special case of hooks -- ASan own interface functions.  Those are only called
+// after __asan_init, thus an empty implementation is sufficient.
+#define INTERCEPT_SANITIZER_FUNCTION(name)                                     \
+  extern "C" __declspec(noinline) void name() {                                \
+    volatile int prevent_icf = (__LINE__ << 8) ^ __COUNTER__;                  \
+    static const char function_name[] = #name;                                 \
+    for (const char* ptr = &function_name[0]; *ptr; ++ptr)                     \
+      prevent_icf ^= *ptr;                                                     \
+    (void)prevent_icf;                                                         \
+    __debugbreak();                                                            \
+  }                                                                            \
+  INTERCEPT_OR_DIE(#name, name)
+
+// Special case of hooks -- Weak functions, could be redefined in the main
+// executable, but that is not necessary, so we shouldn't die if we can not find
+// a reference. Instead, when the function is not present in the main executable
+// we consider the default impl provided by asan library.
+#define INTERCEPT_SANITIZER_WEAK_FUNCTION(name)                                \
+  extern "C" __declspec(noinline) void name() {                                \
+    volatile int prevent_icf = (__LINE__ << 8) ^ __COUNTER__;                  \
+    static const char function_name[] = #name;                                 \
+    for (const char* ptr = &function_name[0]; *ptr; ++ptr)                     \
+      prevent_icf ^= *ptr;                                                     \
+    (void)prevent_icf;                                                         \
+    __debugbreak();                                                            \
+  }                                                                            \
+  INTERCEPT_WHEN_POSSIBLE(#name, STRINGIFY(WEAK_EXPORT_NAME(name)), name)
+
+// We can't define our own version of strlen etc. because that would lead to
+// link-time or even type mismatch errors.  Instead, we can declare a function
+// just to be able to get its address.  Me may miss the first few calls to the
+// functions since it can be called before __dll_thunk_init, but that would lead
+// to false negatives in the startup code before user's global initializers,
+// which isn't a big deal.
+#define INTERCEPT_LIBRARY_FUNCTION(name)                                       \
+  extern "C" void name();                                                      \
+  INTERCEPT_OR_DIE(WRAPPER_NAME(name), name)
+
+// Use these macros for functions that could be called before __dll_thunk_init()
+// is executed and don't lead to errors if defined (free, malloc, etc).
+#define INTERCEPT_WRAP_V_V(name)                                               \
+  extern "C" void name() {                                                     \
+    typedef decltype(name) *fntype;                                            \
+    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
+    fn();                                                                      \
+  }                                                                            \
+  INTERCEPT_OR_DIE(#name, name);
+
+#define INTERCEPT_WRAP_V_W(name)                                               \
+  extern "C" void name(void *arg) {                                            \
+    typedef decltype(name) *fntype;                                            \
+    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
+    fn(arg);                                                                   \
+  }                                                                            \
+  INTERCEPT_OR_DIE(#name, name);
+
+#define INTERCEPT_WRAP_V_WW(name)                                              \
+  extern "C" void name(void *arg1, void *arg2) {                               \
+    typedef decltype(name) *fntype;                                            \
+    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
+    fn(arg1, arg2);                                                            \
+  }                                                                            \
+  INTERCEPT_OR_DIE(#name, name);
+
+#define INTERCEPT_WRAP_V_WWW(name)                                             \
+  extern "C" void name(void *arg1, void *arg2, void *arg3) {                   \
+    typedef decltype(name) *fntype;                                            \
+    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
+    fn(arg1, arg2, arg3);                                                      \
+  }                                                                            \
+  INTERCEPT_OR_DIE(#name, name);
+
+#define INTERCEPT_WRAP_W_V(name)                                               \
+  extern "C" void *name() {                                                    \
+    typedef decltype(name) *fntype;                                            \
+    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
+    return fn();                                                               \
+  }                                                                            \
+  INTERCEPT_OR_DIE(#name, name);
+
+#define INTERCEPT_WRAP_W_W(name)                                               \
+  extern "C" void *name(void *arg) {                                           \
+    typedef decltype(name) *fntype;                                            \
+    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
+    return fn(arg);                                                            \
+  }                                                                            \
+  INTERCEPT_OR_DIE(#name, name);
+
+#define INTERCEPT_WRAP_W_WW(name)                                              \
+  extern "C" void *name(void *arg1, void *arg2) {                              \
+    typedef decltype(name) *fntype;                                            \
+    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
+    return fn(arg1, arg2);                                                     \
+  }                                                                            \
+  INTERCEPT_OR_DIE(#name, name);
+
+#define INTERCEPT_WRAP_W_WWW(name)                                             \
+  extern "C" void *name(void *arg1, void *arg2, void *arg3) {                  \
+    typedef decltype(name) *fntype;                                            \
+    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
+    return fn(arg1, arg2, arg3);                                               \
+  }                                                                            \
+  INTERCEPT_OR_DIE(#name, name);
+
+#define INTERCEPT_WRAP_W_WWWW(name)                                            \
+  extern "C" void *name(void *arg1, void *arg2, void *arg3, void *arg4) {      \
+    typedef decltype(name) *fntype;                                            \
+    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
+    return fn(arg1, arg2, arg3, arg4);                                         \
+  }                                                                            \
+  INTERCEPT_OR_DIE(#name, name);
+
+#define INTERCEPT_WRAP_W_WWWWW(name)                                           \
+  extern "C" void *name(void *arg1, void *arg2, void *arg3, void *arg4,        \
+                        void *arg5) {                                          \
+    typedef decltype(name) *fntype;                                            \
+    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
+    return fn(arg1, arg2, arg3, arg4, arg5);                                   \
+  }                                                                            \
+  INTERCEPT_OR_DIE(#name, name);
+
+#define INTERCEPT_WRAP_W_WWWWWW(name)                                          \
+  extern "C" void *name(void *arg1, void *arg2, void *arg3, void *arg4,        \
+                        void *arg5, void *arg6) {                              \
+    typedef decltype(name) *fntype;                                            \
+    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
+    return fn(arg1, arg2, arg3, arg4, arg5, arg6);                             \
+  }                                                                            \
+  INTERCEPT_OR_DIE(#name, name);
+
+#endif // SANITIZER_WIN_DLL_THUNK_H
diff --git a/lib/sanitizer_common/sanitizer_win_dynamic_runtime_thunk.cc b/lib/sanitizer_common/sanitizer_win_dynamic_runtime_thunk.cc
new file mode 100644
index 0000000..f8f9164
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_win_dynamic_runtime_thunk.cc
@@ -0,0 +1,21 @@
+//===-- santizer_win_dynamic_runtime_thunk.cc -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things that need to be present in the application modules
+// to interact with Sanitizer Common, when it is included in a dll.
+//
+//===----------------------------------------------------------------------===//
+#ifdef SANITIZER_DYNAMIC_RUNTIME_THUNK
+#define SANITIZER_IMPORT_INTERFACE 1
+#include "sanitizer_win_defs.h"
+// Define weak alias for all weak functions imported from sanitizer common.
+#define INTERFACE_FUNCTION(Name)
+#define INTERFACE_WEAK_FUNCTION(Name) WIN_WEAK_IMPORT_DEF(Name)
+#include "sanitizer_common_interface.inc"
+#endif // SANITIZER_DYNAMIC_RUNTIME_THUNK
diff --git a/lib/sanitizer_common/sanitizer_win_weak_interception.cc b/lib/sanitizer_common/sanitizer_win_weak_interception.cc
new file mode 100644
index 0000000..3643193
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_win_weak_interception.cc
@@ -0,0 +1,94 @@
+//===-- sanitizer_win_weak_interception.cc --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This module should be included in the sanitizer when it is implemented as a
+// shared library on Windows (dll), in order to delegate the calls of weak
+// functions to the implementation in the main executable when a strong
+// definition is provided.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_WINDOWS && SANITIZER_DYNAMIC
+#include "sanitizer_win_weak_interception.h"
+#include "sanitizer_allocator_interface.h"
+#include "sanitizer_interface_internal.h"
+#include "sanitizer_win_defs.h"
+#include "interception/interception.h"
+
+extern "C" {
+void *WINAPI GetModuleHandleA(const char *module_name);
+void abort();
+}
+
+namespace __sanitizer {
+// Try to get a pointer to real_function in the main module and override
+// dll_function with that pointer. If the function isn't found, nothing changes.
+int interceptWhenPossible(uptr dll_function, const char *real_function) {
+  uptr real = __interception::InternalGetProcAddress(
+      (void *)GetModuleHandleA(0), real_function);
+  if (real && !__interception::OverrideFunction((uptr)dll_function, real, 0))
+    abort();
+  return 0;
+}
+} // namespace __sanitizer
+
+// Declare weak hooks.
+extern "C" {
+void __sanitizer_weak_hook_memcmp(uptr called_pc, const void *s1,
+                                  const void *s2, uptr n, int result);
+void __sanitizer_weak_hook_strcmp(uptr called_pc, const char *s1,
+                                  const char *s2, int result);
+void __sanitizer_weak_hook_strncmp(uptr called_pc, const char *s1,
+                                   const char *s2, uptr n, int result);
+void __sanitizer_weak_hook_strstr(uptr called_pc, const char *s1,
+                                  const char *s2, char *result);
+}
+
+// Include Sanitizer Common interface.
+#define INTERFACE_FUNCTION(Name)
+#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
+#include "sanitizer_common_interface.inc"
+
+#pragma section(".WEAK$A", read)  // NOLINT
+#pragma section(".WEAK$Z", read)  // NOLINT
+
+typedef void (*InterceptCB)();
+extern "C" {
+__declspec(allocate(".WEAK$A")) InterceptCB __start_weak_list;
+__declspec(allocate(".WEAK$Z")) InterceptCB __stop_weak_list;
+}
+
+static int weak_intercept_init() {
+  static bool flag = false;
+  // weak_interception_init is expected to be called by only one thread.
+  if (flag) return 0;
+  flag = true;
+
+  for (InterceptCB *it = &__start_weak_list; it < &__stop_weak_list; ++it)
+    if (*it)
+      (*it)();
+
+  // In DLLs, the callbacks are expected to return 0,
+  // otherwise CRT initialization fails.
+  return 0;
+}
+
+#pragma section(".CRT$XIB", long, read)  // NOLINT
+__declspec(allocate(".CRT$XIB")) int (*__weak_intercept_preinit)() =
+    weak_intercept_init;
+
+static void WINAPI weak_intercept_thread_init(void *mod, unsigned long reason,
+                                              void *reserved) {
+  if (reason == /*DLL_PROCESS_ATTACH=*/1) weak_intercept_init();
+}
+
+#pragma section(".CRT$XLAB", long, read)  // NOLINT
+__declspec(allocate(".CRT$XLAB")) void(WINAPI *__weak_intercept_tls_init)(
+    void *, unsigned long, void *) = weak_intercept_thread_init;
+
+#endif // SANITIZER_WINDOWS && SANITIZER_DYNAMIC
diff --git a/lib/sanitizer_common/sanitizer_win_weak_interception.h b/lib/sanitizer_common/sanitizer_win_weak_interception.h
new file mode 100644
index 0000000..5b12297
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_win_weak_interception.h
@@ -0,0 +1,33 @@
+//===-- sanitizer_win_weak_interception.h ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This header provide helper macros to delegate calls of weak functions to the
+// implementation in the main executable when a strong definition is present.
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_WIN_WEAK_INTERCEPTION_H
+#define SANITIZER_WIN_WEAK_INTERCEPTION_H
+#include "sanitizer_internal_defs.h"
+
+namespace __sanitizer {
+int interceptWhenPossible(uptr dll_function, const char *real_function);
+}
+
+// ----------------- Function interception helper macros -------------------- //
+// Weak functions, could be redefined in the main executable, but that is not
+// necessary, so we shouldn't die if we can not find a reference.
+#define INTERCEPT_WEAK(Name) interceptWhenPossible((uptr) Name, #Name);
+
+#define INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)                                \
+  static int intercept_##Name() {                                              \
+    return __sanitizer::interceptWhenPossible((__sanitizer::uptr) Name, #Name);\
+  }                                                                            \
+  __pragma(section(".WEAK$M", long, read))                                     \
+  __declspec(allocate(".WEAK$M")) int (*__weak_intercept_##Name)() =           \
+      intercept_##Name;
+
+#endif // SANITIZER_WIN_WEAK_INTERCEPTION_H
diff --git a/lib/sanitizer_common/scripts/sancov.py b/lib/sanitizer_common/scripts/sancov.py
index e19afdb..e2eba36 100755
--- a/lib/sanitizer_common/scripts/sancov.py
+++ b/lib/sanitizer_common/scripts/sancov.py
@@ -14,12 +14,14 @@
 prog_name = ""
 
 def Usage():
-  print >> sys.stderr, "Usage: \n" + \
-      " " + prog_name + " merge FILE [FILE...] > OUTPUT\n" \
-      " " + prog_name + " print FILE [FILE...]\n" \
-      " " + prog_name + " unpack FILE [FILE...]\n" \
-      " " + prog_name + " rawunpack FILE [FILE ...]\n" \
-      " " + prog_name + " missing BINARY < LIST_OF_PCS\n"
+  sys.stderr.write(
+    "Usage: \n" + \
+    " " + prog_name + " merge FILE [FILE...] > OUTPUT\n" \
+    " " + prog_name + " print FILE [FILE...]\n" \
+    " " + prog_name + " unpack FILE [FILE...]\n" \
+    " " + prog_name + " rawunpack FILE [FILE ...]\n" \
+    " " + prog_name + " missing BINARY < LIST_OF_PCS\n" \
+    "\n")
   exit(1)
 
 def CheckBits(bits):
@@ -68,16 +70,19 @@
       raise Exception('File %s is short (< 8 bytes)' % path)
     bits = ReadMagicAndReturnBitness(f, path)
     size -= 8
-    s = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 / bits), f.read(size))
-  print >>sys.stderr, "%s: read %d %d-bit PCs from %s" % (prog_name, size * 8 / bits, bits, path)
+    w = size * 8 // bits
+    s = struct.unpack_from(TypeCodeForStruct(bits) * (w), f.read(size))
+  sys.stderr.write(
+    "%s: read %d %d-bit PCs from %s\n" % (prog_name, w, bits, path))
   return s
 
 def Merge(files):
   s = set()
   for f in files:
     s = s.union(set(ReadOneFile(f)))
-  print >> sys.stderr, "%s: %d files merged; %d PCs total" % \
-    (prog_name, len(files), len(s))
+  sys.stderr.write(
+    "%s: %d files merged; %d PCs total\n" % (prog_name, len(files), len(s))
+  )
   return sorted(s)
 
 def PrintFiles(files):
@@ -85,10 +90,9 @@
     s = Merge(files)
   else:  # If there is just on file, print the PCs in order.
     s = ReadOneFile(files[0])
-    print >> sys.stderr, "%s: 1 file merged; %d PCs total" % \
-      (prog_name, len(s))
+    sys.stderr.write("%s: 1 file merged; %d PCs total\n" % (prog_name, len(s)))
   for i in s:
-    print "0x%x" % i
+    print("0x%x" % i)
 
 def MergeAndPrint(files):
   if sys.stdout.isatty():
@@ -97,27 +101,27 @@
   bits = 32
   if max(s) > 0xFFFFFFFF:
     bits = 64
-  array.array('I', MagicForBits(bits)).tofile(sys.stdout)
+  stdout_buf = getattr(sys.stdout, 'buffer', sys.stdout)
+  array.array('I', MagicForBits(bits)).tofile(stdout_buf)
   a = struct.pack(TypeCodeForStruct(bits) * len(s), *s)
-  sys.stdout.write(a)
+  stdout_buf.write(a)
 
 
 def UnpackOneFile(path):
   with open(path, mode="rb") as f:
-    print >> sys.stderr, "%s: unpacking %s" % (prog_name, path)
+    sys.stderr.write("%s: unpacking %s\n" % (prog_name, path))
     while True:
       header = f.read(12)
       if not header: return
       if len(header) < 12:
         break
       pid, module_length, blob_size = struct.unpack('iII', header)
-      module = f.read(module_length)
+      module = f.read(module_length).decode('utf-8')
       blob = f.read(blob_size)
       assert(len(module) == module_length)
       assert(len(blob) == blob_size)
       extracted_file = "%s.%d.sancov" % (module, pid)
-      print >> sys.stderr, "%s: extracting %s" % \
-        (prog_name, extracted_file)
+      sys.stderr.write("%s: extracting %s\n" % (prog_name, extracted_file))
       # The packed file may contain multiple blobs for the same pid/module
       # pair. Append to the end of the file instead of overwriting.
       with open(extracted_file, 'ab') as f2:
@@ -133,7 +137,7 @@
 def UnpackOneRawFile(path, map_path):
   mem_map = []
   with open(map_path, mode="rt") as f_map:
-    print >> sys.stderr, "%s: reading map %s" % (prog_name, map_path)
+    sys.stderr.write("%s: reading map %s\n" % (prog_name, map_path))
     bits = int(f_map.readline())
     if bits != 32 and bits != 64:
       raise Exception('Wrong bits size in the map')
@@ -147,12 +151,12 @@
   mem_map_keys = [m[0] for m in mem_map]
 
   with open(path, mode="rb") as f:
-    print >> sys.stderr, "%s: unpacking %s" % (prog_name, path)
+    sys.stderr.write("%s: unpacking %s\n" % (prog_name, path))
 
     f.seek(0, 2)
     size = f.tell()
     f.seek(0, 0)
-    pcs = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 / bits), f.read(size))
+    pcs = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 // bits), f.read(size))
     mem_map_pcs = [[] for i in range(0, len(mem_map))]
 
     for pc in pcs:
@@ -161,7 +165,7 @@
       (start, end, base, module_path) = mem_map[map_idx]
       assert pc >= start
       if pc >= end:
-        print >> sys.stderr, "warning: %s: pc %x outside of any known mapping" % (prog_name, pc)
+        sys.stderr.write("warning: %s: pc %x outside of any known mapping\n" % (prog_name, pc))
         continue
       mem_map_pcs[map_idx].append(pc - base)
 
@@ -169,7 +173,7 @@
       if len(pc_list) == 0: continue
       assert path.endswith('.sancov.raw')
       dst_path = module_path + '.' + os.path.basename(path)[:-4]
-      print >> sys.stderr, "%s: writing %d PCs to %s" % (prog_name, len(pc_list), dst_path)
+      sys.stderr.write("%s: writing %d PCs to %s\n" % (prog_name, len(pc_list), dst_path))
       sorted_pc_list = sorted(pc_list)
       pc_buffer = struct.pack(TypeCodeForStruct(bits) * len(pc_list), *sorted_pc_list)
       with open(dst_path, 'ab+') as f2:
@@ -204,18 +208,19 @@
   if not os.path.isfile(binary):
     raise Exception('File not found: %s' % binary)
   instrumented = GetInstrumentedPCs(binary)
-  print >> sys.stderr, "%s: found %d instrumented PCs in %s" % (prog_name,
-                                                                len(instrumented),
-                                                                binary)
+  sys.stderr.write("%s: found %d instrumented PCs in %s\n" % (prog_name,
+                                                              len(instrumented),
+                                                              binary))
   covered = set(int(line, 16) for line in sys.stdin)
-  print >> sys.stderr, "%s: read %d PCs from stdin" % (prog_name, len(covered))
+  sys.stderr.write("%s: read %d PCs from stdin\n" % (prog_name, len(covered)))
   missing = instrumented - covered
-  print >> sys.stderr, "%s: %d PCs missing from coverage" % (prog_name, len(missing))
+  sys.stderr.write("%s: %d PCs missing from coverage\n" % (prog_name, len(missing)))
   if (len(missing) > len(instrumented) - len(covered)):
-    print >> sys.stderr, \
-        "%s: WARNING: stdin contains PCs not found in binary" % prog_name
+    sys.stderr.write(
+      "%s: WARNING: stdin contains PCs not found in binary\n" % prog_name
+    )
   for pc in sorted(missing):
-    print "0x%x" % pc
+    print("0x%x" % pc)
 
 if __name__ == '__main__':
   prog_name = sys.argv[0]
diff --git a/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cc b/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cc
index bd315a0..b25a53d 100644
--- a/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cc
+++ b/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cc
@@ -18,8 +18,9 @@
 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
 
 static llvm::symbolize::LLVMSymbolizer *getDefaultSymbolizer() {
-  static llvm::symbolize::LLVMSymbolizer DefaultSymbolizer;
-  return &DefaultSymbolizer;
+  static llvm::symbolize::LLVMSymbolizer *DefaultSymbolizer =
+      new llvm::symbolize::LLVMSymbolizer();
+  return DefaultSymbolizer;
 }
 
 namespace __sanitizer {
@@ -41,8 +42,8 @@
         getDefaultSymbolizer()->symbolizeInlinedCode(ModuleName, ModuleOffset);
     Printer << (ResOrErr ? ResOrErr.get() : llvm::DIInliningInfo());
   }
-  __sanitizer::internal_snprintf(Buffer, MaxLength, "%s", Result.c_str());
-  return true;
+  return __sanitizer::internal_snprintf(Buffer, MaxLength, "%s",
+                                        Result.c_str()) < MaxLength;
 }
 
 bool __sanitizer_symbolize_data(const char *ModuleName, uint64_t ModuleOffset,
@@ -55,8 +56,8 @@
         getDefaultSymbolizer()->symbolizeData(ModuleName, ModuleOffset);
     Printer << (ResOrErr ? ResOrErr.get() : llvm::DIGlobal());
   }
-  __sanitizer::internal_snprintf(Buffer, MaxLength, "%s", Result.c_str());
-  return true;
+  return __sanitizer::internal_snprintf(Buffer, MaxLength, "%s",
+                                        Result.c_str()) < MaxLength;
 }
 
 void __sanitizer_symbolize_flush() { getDefaultSymbolizer()->flush(); }
@@ -65,8 +66,10 @@
                                    int MaxLength) {
   std::string Result =
       llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr);
-  __sanitizer::internal_snprintf(Buffer, MaxLength, "%s", Result.c_str());
-  return static_cast<int>(Result.size() + 1);
+  return __sanitizer::internal_snprintf(Buffer, MaxLength, "%s",
+                                        Result.c_str()) < MaxLength
+             ? static_cast<int>(Result.size() + 1)
+             : 0;
 }
 
 }  // extern "C"
diff --git a/lib/sanitizer_common/symbolizer/sanitizer_wrappers.cc b/lib/sanitizer_common/symbolizer/sanitizer_wrappers.cc
index 0a796d9..66d089a 100644
--- a/lib/sanitizer_common/symbolizer/sanitizer_wrappers.cc
+++ b/lib/sanitizer_common/symbolizer/sanitizer_wrappers.cc
@@ -172,4 +172,28 @@
 LLVM_SYMBOLIZER_INTERCEPTOR4(pread64, ssize_t(int, void *, size_t, off64_t))
 LLVM_SYMBOLIZER_INTERCEPTOR2(realpath, char *(const char *, char *))
 
+LLVM_SYMBOLIZER_INTERCEPTOR1(pthread_cond_broadcast, int(pthread_cond_t *))
+LLVM_SYMBOLIZER_INTERCEPTOR2(pthread_cond_wait,
+                             int(pthread_cond_t *, pthread_mutex_t *))
+LLVM_SYMBOLIZER_INTERCEPTOR1(pthread_mutex_lock, int(pthread_mutex_t *))
+LLVM_SYMBOLIZER_INTERCEPTOR1(pthread_mutex_unlock, int(pthread_mutex_t *))
+LLVM_SYMBOLIZER_INTERCEPTOR1(pthread_mutex_destroy, int(pthread_mutex_t *))
+LLVM_SYMBOLIZER_INTERCEPTOR2(pthread_mutex_init,
+                             int(pthread_mutex_t *,
+                                 const pthread_mutexattr_t *))
+LLVM_SYMBOLIZER_INTERCEPTOR1(pthread_mutexattr_destroy,
+                             int(pthread_mutexattr_t *))
+LLVM_SYMBOLIZER_INTERCEPTOR1(pthread_mutexattr_init, int(pthread_mutexattr_t *))
+LLVM_SYMBOLIZER_INTERCEPTOR2(pthread_mutexattr_settype,
+                             int(pthread_mutexattr_t *, int))
+LLVM_SYMBOLIZER_INTERCEPTOR1(pthread_getspecific, void *(pthread_key_t))
+LLVM_SYMBOLIZER_INTERCEPTOR2(pthread_key_create,
+                             int(pthread_key_t *, void (*)(void *)))
+LLVM_SYMBOLIZER_INTERCEPTOR2(pthread_once,
+                             int(pthread_once_t *, void (*)(void)))
+LLVM_SYMBOLIZER_INTERCEPTOR2(pthread_setspecific,
+                             int(pthread_key_t, const void *))
+LLVM_SYMBOLIZER_INTERCEPTOR3(pthread_sigmask,
+                             int(int, const sigset_t *, sigset_t *))
+
 }  // extern "C"
diff --git a/lib/sanitizer_common/symbolizer/scripts/ar_to_bc.sh b/lib/sanitizer_common/symbolizer/scripts/ar_to_bc.sh
new file mode 100755
index 0000000..788cef8
--- /dev/null
+++ b/lib/sanitizer_common/symbolizer/scripts/ar_to_bc.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+function usage() {
+  echo "Usage: $0 INPUT... OUTPUT"
+  exit 1
+}
+
+if [ "$#" -le 1 ]; then
+  usage
+fi
+
+AR=$(readlink -f $AR)
+LINK=$(readlink -f $LINK)
+
+INPUTS=
+OUTPUT=
+for ARG in $@; do
+  INPUTS="$INPUTS $OUTPUT"
+  OUTPUT=$(readlink -f $ARG)
+done
+
+echo Inputs: $INPUTS
+echo Output: $OUTPUT
+
+SCRATCH_DIR=$(mktemp -d)
+ln -s $INPUTS $SCRATCH_DIR/
+
+pushd $SCRATCH_DIR
+
+for INPUT in *; do
+  for OBJ in $($AR t $INPUT); do
+    $AR x $INPUT $OBJ
+    mv -f $OBJ $(basename $INPUT).$OBJ
+  done
+done
+
+$LINK *.o -o $OUTPUT
+
+rm -rf $SCRATCH_DIR
diff --git a/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh b/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
index 07239eb..4a0fb03 100755
--- a/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
+++ b/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
@@ -58,9 +58,9 @@
 CC=$CLANG_DIR/clang
 CXX=$CLANG_DIR/clang++
 TBLGEN=$CLANG_DIR/llvm-tblgen
-LINK=$CLANG_DIR/llvm-link
 OPT=$CLANG_DIR/opt
-AR=$CLANG_DIR/llvm-ar
+export AR=$CLANG_DIR/llvm-ar
+export LINK=$CLANG_DIR/llvm-link
 
 for F in $CC $CXX $TBLGEN $LINK $OPT $AR; do
   if [[ ! -x "$F" ]]; then
@@ -136,29 +136,26 @@
 mkdir ${SYMBOLIZER_BUILD}
 cd ${SYMBOLIZER_BUILD}
 
-for A in $LIBCXX_BUILD/lib/libc++.a \
-         $LIBCXX_BUILD/lib/libc++abi.a \
-         $LLVM_BUILD/lib/libLLVMSymbolize.a \
-         $LLVM_BUILD/lib/libLLVMObject.a \
-         $LLVM_BUILD/lib/libLLVMDebugInfoDWARF.a \
-         $LLVM_BUILD/lib/libLLVMSupport.a \
-         $LLVM_BUILD/lib/libLLVMDebugInfoPDB.a \
-         $LLVM_BUILD/lib/libLLVMMC.a \
-         $ZLIB_BUILD/libz.a ; do
-  for O in $($AR t $A); do
-    $AR x $A $O
-    mv -f $O "$(basename $A).$O" # Rename to avoid collisions between libs.
-  done
-done
-
 echo "Compiling..."
 SYMBOLIZER_FLAGS="$FLAGS -std=c++11 -I${LLVM_SRC}/include -I${LLVM_BUILD}/include -I${LIBCXX_BUILD}/include/c++/v1"
 $CXX $SYMBOLIZER_FLAGS ${SRC_DIR}/sanitizer_symbolize.cc ${SRC_DIR}/sanitizer_wrappers.cc -c
+$AR rc symbolizer.a sanitizer_symbolize.o sanitizer_wrappers.o
 
 SYMBOLIZER_API_LIST=__sanitizer_symbolize_code,__sanitizer_symbolize_data,__sanitizer_symbolize_flush,__sanitizer_symbolize_demangle
 
 # Merge all the object files together and copy the resulting library back.
-$LINK *.o -o all.bc
+$SCRIPT_DIR/ar_to_bc.sh $LIBCXX_BUILD/lib/libc++.a \
+                        $LIBCXX_BUILD/lib/libc++abi.a \
+                        $LLVM_BUILD/lib/libLLVMSymbolize.a \
+                        $LLVM_BUILD/lib/libLLVMObject.a \
+                        $LLVM_BUILD/lib/libLLVMDebugInfoDWARF.a \
+                        $LLVM_BUILD/lib/libLLVMSupport.a \
+                        $LLVM_BUILD/lib/libLLVMDebugInfoPDB.a \
+                        $LLVM_BUILD/lib/libLLVMMC.a \
+                        $ZLIB_BUILD/libz.a \
+                        symbolizer.a \
+                        all.bc
+
 echo "Optimizing..."
 $OPT -internalize -internalize-public-api-list=${SYMBOLIZER_API_LIST} all.bc -o opt.bc
 $CC $FLAGS -fno-lto -c opt.bc -o symbolizer.o
diff --git a/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
index 45f3ea2..033acf7 100644
--- a/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
+++ b/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
@@ -64,6 +64,7 @@
 isxdigit U
 log10 U
 lseek U
+lseek64 U
 malloc U
 mbrlen U
 mbrtowc U
diff --git a/lib/sanitizer_common/tests/CMakeLists.txt b/lib/sanitizer_common/tests/CMakeLists.txt
index b66f756..b310f93 100644
--- a/lib/sanitizer_common/tests/CMakeLists.txt
+++ b/lib/sanitizer_common/tests/CMakeLists.txt
@@ -26,6 +26,7 @@
   sanitizer_posix_test.cc
   sanitizer_printf_test.cc
   sanitizer_procmaps_test.cc
+  sanitizer_quarantine_test.cc
   sanitizer_stackdepot_test.cc
   sanitizer_stacktrace_printer_test.cc
   sanitizer_stacktrace_test.cc
@@ -80,7 +81,10 @@
 
 if(APPLE)
   list(APPEND SANITIZER_TEST_CFLAGS_COMMON ${DARWIN_osx_CFLAGS})
-  list(APPEND SANITIZER_TEST_LINK_FLAGS_COMMON ${DARWIN_osx_LINKFLAGS})
+  list(APPEND SANITIZER_TEST_LINK_FLAGS_COMMON ${DARWIN_osx_LINK_FLAGS})
+
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+  list(APPEND SANITIZER_TEST_LINK_FLAGS_COMMON ${WEAK_SYMBOL_LINK_FLAGS})
 endif()
 
 # MSVC linker is allocating 1M for the stack by default, which is not
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
index 8df5efd..e14517f 100644
--- a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
@@ -23,6 +23,7 @@
 #include <stdlib.h>
 #include <algorithm>
 #include <vector>
+#include <random>
 #include <set>
 
 using namespace __sanitizer;
@@ -539,6 +540,7 @@
       Allocator;
   Allocator *a = new Allocator;
   a->Init(/* may_return_null */ true, kReleaseToOSIntervalNever);
+  std::mt19937 r;
 
   AllocatorCache cache;
   memset(&cache, 0, sizeof(cache));
@@ -570,7 +572,7 @@
       allocated.push_back(x);
     }
 
-    random_shuffle(allocated.begin(), allocated.end());
+    std::shuffle(allocated.begin(), allocated.end(), r);
 
     for (uptr i = 0; i < kNumAllocs; i++) {
       void *x = allocated[i];
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc b/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc
index c6dd3c4..d2920d8 100644
--- a/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc
+++ b/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc
@@ -139,6 +139,7 @@
   return p;
 }
 
+#if SANITIZER_INTERCEPT_MEMALIGN
 void *memalign(size_t alignment, size_t size) {
   if (UNLIKELY(!thread_inited))
     thread_init();
@@ -146,6 +147,7 @@
   SANITIZER_MALLOC_HOOK(p, size);
   return p;
 }
+#endif // SANITIZER_INTERCEPT_MEMALIGN
 
 int posix_memalign(void **memptr, size_t alignment, size_t size) {
   if (UNLIKELY(!thread_inited))
@@ -165,18 +167,26 @@
   return p;
 }
 
+#if SANITIZER_INTERCEPT_CFREE
 void cfree(void *p) ALIAS("free");
+#endif // SANITIZER_INTERCEPT_CFREE
+#if SANITIZER_INTERCEPT_PVALLOC
 void *pvalloc(size_t size) ALIAS("valloc");
+#endif // SANITIZER_INTERCEPT_PVALLOC
+#if SANITIZER_INTERCEPT_MEMALIGN
 void *__libc_memalign(size_t alignment, size_t size) ALIAS("memalign");
+#endif // SANITIZER_INTERCEPT_MEMALIGN
 
 void malloc_usable_size() {
 }
 
+#if SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
 void mallinfo() {
 }
 
 void mallopt() {
 }
+#endif // SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
 }  // extern "C"
 
 namespace std {
diff --git a/lib/sanitizer_common/tests/sanitizer_bitvector_test.cc b/lib/sanitizer_common/tests/sanitizer_bitvector_test.cc
index 706b4c5..dec5459 100644
--- a/lib/sanitizer_common/tests/sanitizer_bitvector_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_bitvector_test.cc
@@ -19,6 +19,7 @@
 
 #include <algorithm>
 #include <vector>
+#include <random>
 #include <set>
 
 using namespace __sanitizer;
@@ -75,6 +76,7 @@
 
 template <class BV>
 void TestBitVector(uptr expected_size) {
+  std::mt19937 r;
   BV bv, bv1, t_bv;
   EXPECT_EQ(expected_size, BV::kSize);
   bv.clear();
@@ -112,7 +114,7 @@
   for (uptr it = 0; it < 30; it++) {
     // iota
     for (size_t j = 0; j < bits.size(); j++) bits[j] = j;
-    random_shuffle(bits.begin(), bits.end());
+    std::shuffle(bits.begin(), bits.end(), r);
     set<uptr> s, s1, t_s;
     bv.clear();
     bv1.clear();
diff --git a/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc b/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc
index 13918af..2f0494f 100644
--- a/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc
@@ -256,4 +256,8 @@
 
   // Checks for wide-character strings are not implemented yet.
   testPrintf("%ls", 1, 0);
+
+  testPrintf("%m", 0);
+  testPrintf("%m%s", 1, test_buf_size);
+  testPrintf("%s%m%s", 2, test_buf_size, test_buf_size);
 }
diff --git a/lib/sanitizer_common/tests/sanitizer_list_test.cc b/lib/sanitizer_common/tests/sanitizer_list_test.cc
index fbe53c0..ede9771 100644
--- a/lib/sanitizer_common/tests/sanitizer_list_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_list_test.cc
@@ -125,6 +125,22 @@
   CHECK(l.empty());
   l.CheckConsistency();
 
+  l.push_back(x);
+  l.push_back(y);
+  l.push_back(z);
+  l.extract(x, y);
+  CHECK_EQ(l.size(), 2);
+  CHECK_EQ(l.front(), x);
+  CHECK_EQ(l.back(), z);
+  l.CheckConsistency();
+  l.extract(x, z);
+  CHECK_EQ(l.size(), 1);
+  CHECK_EQ(l.front(), x);
+  CHECK_EQ(l.back(), x);
+  l.CheckConsistency();
+  l.pop_front();
+  CHECK(l.empty());
+
   List l1, l2;
   l1.clear();
   l2.clear();
diff --git a/lib/sanitizer_common/tests/sanitizer_quarantine_test.cc b/lib/sanitizer_common/tests/sanitizer_quarantine_test.cc
new file mode 100644
index 0000000..23ed5f9
--- /dev/null
+++ b/lib/sanitizer_common/tests/sanitizer_quarantine_test.cc
@@ -0,0 +1,180 @@
+//===-- sanitizer_quarantine_test.cc --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_quarantine.h"
+#include "gtest/gtest.h"
+
+#include <stdlib.h>
+
+namespace __sanitizer {
+
+struct QuarantineCallback {
+  void Recycle(void *m) {}
+  void *Allocate(uptr size) {
+    return malloc(size);
+  }
+  void Deallocate(void *p) {
+    free(p);
+  }
+};
+
+typedef QuarantineCache<QuarantineCallback> Cache;
+
+static void* kFakePtr = reinterpret_cast<void*>(0xFA83FA83);
+static const size_t kBlockSize = 8;
+
+static QuarantineCallback cb;
+
+static void DeallocateCache(Cache *cache) {
+  while (QuarantineBatch *batch = cache->DequeueBatch())
+    cb.Deallocate(batch);
+}
+
+TEST(SanitizerCommon, QuarantineBatchMerge) {
+  // Verify the trivial case.
+  QuarantineBatch into;
+  into.init(kFakePtr, 4UL);
+  QuarantineBatch from;
+  from.init(kFakePtr, 8UL);
+
+  into.merge(&from);
+
+  ASSERT_EQ(into.count, 2UL);
+  ASSERT_EQ(into.batch[0], kFakePtr);
+  ASSERT_EQ(into.batch[1], kFakePtr);
+  ASSERT_EQ(into.size, 12UL + sizeof(QuarantineBatch));
+  ASSERT_EQ(into.quarantined_size(), 12UL);
+
+  ASSERT_EQ(from.count, 0UL);
+  ASSERT_EQ(from.size, sizeof(QuarantineBatch));
+  ASSERT_EQ(from.quarantined_size(), 0UL);
+
+  // Merge the batch to the limit.
+  for (uptr i = 2; i < QuarantineBatch::kSize; ++i)
+    from.push_back(kFakePtr, 8UL);
+  ASSERT_TRUE(into.count + from.count == QuarantineBatch::kSize);
+  ASSERT_TRUE(into.can_merge(&from));
+
+  into.merge(&from);
+  ASSERT_TRUE(into.count == QuarantineBatch::kSize);
+
+  // No more space, not even for one element.
+  from.init(kFakePtr, 8UL);
+
+  ASSERT_FALSE(into.can_merge(&from));
+}
+
+TEST(SanitizerCommon, QuarantineCacheMergeBatchesEmpty) {
+  Cache cache;
+  Cache to_deallocate;
+  cache.MergeBatches(&to_deallocate);
+
+  ASSERT_EQ(to_deallocate.Size(), 0UL);
+  ASSERT_EQ(to_deallocate.DequeueBatch(), nullptr);
+}
+
+TEST(SanitizerCommon, QuarantineCacheMergeBatchesOneBatch) {
+  Cache cache;
+  cache.Enqueue(cb, kFakePtr, kBlockSize);
+  ASSERT_EQ(kBlockSize + sizeof(QuarantineBatch), cache.Size());
+
+  Cache to_deallocate;
+  cache.MergeBatches(&to_deallocate);
+
+  // Nothing to merge, nothing to deallocate.
+  ASSERT_EQ(kBlockSize + sizeof(QuarantineBatch), cache.Size());
+
+  ASSERT_EQ(to_deallocate.Size(), 0UL);
+  ASSERT_EQ(to_deallocate.DequeueBatch(), nullptr);
+
+  DeallocateCache(&cache);
+}
+
+TEST(SanitizerCommon, QuarantineCacheMergeBatchesSmallBatches) {
+  // Make a cache with two batches small enough to merge.
+  Cache from;
+  from.Enqueue(cb, kFakePtr, kBlockSize);
+  Cache cache;
+  cache.Enqueue(cb, kFakePtr, kBlockSize);
+
+  cache.Transfer(&from);
+  ASSERT_EQ(kBlockSize * 2 + sizeof(QuarantineBatch) * 2, cache.Size());
+
+  Cache to_deallocate;
+  cache.MergeBatches(&to_deallocate);
+
+  // Batches merged, one batch to deallocate.
+  ASSERT_EQ(kBlockSize * 2 + sizeof(QuarantineBatch), cache.Size());
+  ASSERT_EQ(to_deallocate.Size(), sizeof(QuarantineBatch));
+
+  DeallocateCache(&cache);
+  DeallocateCache(&to_deallocate);
+}
+
+TEST(SanitizerCommon, QuarantineCacheMergeBatchesTooBigToMerge) {
+  const uptr kNumBlocks = QuarantineBatch::kSize - 1;
+
+  // Make a cache with two batches small enough to merge.
+  Cache from;
+  Cache cache;
+  for (uptr i = 0; i < kNumBlocks; ++i) {
+    from.Enqueue(cb, kFakePtr, kBlockSize);
+    cache.Enqueue(cb, kFakePtr, kBlockSize);
+  }
+  cache.Transfer(&from);
+  ASSERT_EQ(kBlockSize * kNumBlocks * 2 +
+            sizeof(QuarantineBatch) * 2, cache.Size());
+
+  Cache to_deallocate;
+  cache.MergeBatches(&to_deallocate);
+
+  // Batches cannot be merged.
+  ASSERT_EQ(kBlockSize * kNumBlocks * 2 +
+            sizeof(QuarantineBatch) * 2, cache.Size());
+  ASSERT_EQ(to_deallocate.Size(), 0UL);
+
+  DeallocateCache(&cache);
+}
+
+TEST(SanitizerCommon, QuarantineCacheMergeBatchesALotOfBatches) {
+  const uptr kNumBatchesAfterMerge = 3;
+  const uptr kNumBlocks = QuarantineBatch::kSize * kNumBatchesAfterMerge;
+  const uptr kNumBatchesBeforeMerge = kNumBlocks;
+
+  // Make a cache with many small batches.
+  Cache cache;
+  for (uptr i = 0; i < kNumBlocks; ++i) {
+    Cache from;
+    from.Enqueue(cb, kFakePtr, kBlockSize);
+    cache.Transfer(&from);
+  }
+
+  ASSERT_EQ(kBlockSize * kNumBlocks +
+            sizeof(QuarantineBatch) * kNumBatchesBeforeMerge, cache.Size());
+
+  Cache to_deallocate;
+  cache.MergeBatches(&to_deallocate);
+
+  // All blocks should fit into 3 batches.
+  ASSERT_EQ(kBlockSize * kNumBlocks +
+            sizeof(QuarantineBatch) * kNumBatchesAfterMerge, cache.Size());
+
+  ASSERT_EQ(to_deallocate.Size(),
+            sizeof(QuarantineBatch) *
+                (kNumBatchesBeforeMerge - kNumBatchesAfterMerge));
+
+  DeallocateCache(&cache);
+  DeallocateCache(&to_deallocate);
+}
+
+}  // namespace __sanitizer
diff --git a/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cc b/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cc
index 05796fc..405f8d8 100644
--- a/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_stacktrace_printer_test.cc
@@ -52,13 +52,18 @@
 
 TEST(SanitizerStacktracePrinter, RenderModuleLocation) {
   InternalScopedString str(128);
-  RenderModuleLocation(&str, "/dir/exe", 0x123, "");
+  RenderModuleLocation(&str, "/dir/exe", 0x123, kModuleArchUnknown, "");
   EXPECT_STREQ("(/dir/exe+0x123)", str.data());
 
   // Check that we strip file prefix if necessary.
   str.clear();
-  RenderModuleLocation(&str, "/dir/exe", 0x123, "/dir/");
+  RenderModuleLocation(&str, "/dir/exe", 0x123, kModuleArchUnknown, "/dir/");
   EXPECT_STREQ("(exe+0x123)", str.data());
+
+  // Check that we render the arch.
+  str.clear();
+  RenderModuleLocation(&str, "/dir/exe", 0x123, kModuleArchX86_64H, "/dir/");
+  EXPECT_STREQ("(exe:x86_64h+0x123)", str.data());
 }
 
 TEST(SanitizerStacktracePrinter, RenderFrame) {
diff --git a/lib/sanitizer_common/tests/sanitizer_thread_registry_test.cc b/lib/sanitizer_common/tests/sanitizer_thread_registry_test.cc
index 1132bfd..f8b8c12 100644
--- a/lib/sanitizer_common/tests/sanitizer_thread_registry_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_thread_registry_test.cc
@@ -67,7 +67,7 @@
 static void TestRegistry(ThreadRegistry *registry, bool has_quarantine) {
   // Create and start a main thread.
   EXPECT_EQ(0U, registry->CreateThread(get_uid(0), true, -1, 0));
-  registry->StartThread(0, 0, 0);
+  registry->StartThread(0, 0, false, 0);
   // Create a bunch of threads.
   for (u32 i = 1; i <= 10; i++) {
     EXPECT_EQ(i, registry->CreateThread(get_uid(i), is_detached(i), 0, 0));
@@ -75,7 +75,7 @@
   CheckThreadQuantity(registry, 11, 1, 11);
   // Start some of them.
   for (u32 i = 1; i <= 5; i++) {
-    registry->StartThread(i, 0, 0);
+    registry->StartThread(i, 0, false, 0);
   }
   CheckThreadQuantity(registry, 11, 6, 11);
   // Finish, create and start more threads.
@@ -85,7 +85,7 @@
       registry->JoinThread(i, 0);
   }
   for (u32 i = 6; i <= 10; i++) {
-    registry->StartThread(i, 0, 0);
+    registry->StartThread(i, 0, false, 0);
   }
   std::vector<u32> new_tids;
   for (u32 i = 11; i <= 15; i++) {
@@ -112,7 +112,7 @@
   }
   for (u32 i = 0; i < new_tids.size(); i++) {
     u32 tid = new_tids[i];
-    registry->StartThread(tid, 0, 0);
+    registry->StartThread(tid, 0, false, 0);
     registry->DetachThread(tid, 0);
     registry->FinishThread(tid);
   }
@@ -189,7 +189,7 @@
     tids.push_back(
         args->registry->CreateThread(0, false, 0, (void*)args->shard));
   for (int i = 0; i < kThreadsPerShard; i++)
-    args->registry->StartThread(tids[i], 0, (void*)args->shard);
+    args->registry->StartThread(tids[i], 0, false, (void*)args->shard);
   for (int i = 0; i < kThreadsPerShard; i++)
     args->registry->FinishThread(tids[i]);
   for (int i = 0; i < kThreadsPerShard; i++)
@@ -200,7 +200,7 @@
 static void ThreadedTestRegistry(ThreadRegistry *registry) {
   // Create and start a main thread.
   EXPECT_EQ(0U, registry->CreateThread(0, true, -1, 0));
-  registry->StartThread(0, 0, 0);
+  registry->StartThread(0, 0, false, 0);
   pthread_t threads[kNumShards];
   RunThreadArgs args[kNumShards];
   for (int i = 0; i < kNumShards; i++) {
diff --git a/lib/sanitizer_common/weak_symbols.txt b/lib/sanitizer_common/weak_symbols.txt
new file mode 100644
index 0000000..5a2b275
--- /dev/null
+++ b/lib/sanitizer_common/weak_symbols.txt
@@ -0,0 +1,8 @@
+___sanitizer_free_hook
+___sanitizer_malloc_hook
+___sanitizer_report_error_summary
+___sanitizer_sandbox_on_notify
+___sanitizer_symbolize_code
+___sanitizer_symbolize_data
+___sanitizer_symbolize_demangle
+___sanitizer_symbolize_flush
diff --git a/lib/scudo/CMakeLists.txt b/lib/scudo/CMakeLists.txt
index 332c3a9..ba5e8ac 100644
--- a/lib/scudo/CMakeLists.txt
+++ b/lib/scudo/CMakeLists.txt
@@ -3,17 +3,30 @@
 include_directories(..)
 
 set(SCUDO_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+# SANITIZER_COMMON_CFLAGS include -fno-builtin, but we actually want builtins!
+list(APPEND SCUDO_CFLAGS -fbuiltin)
 append_rtti_flag(OFF SCUDO_CFLAGS)
-append_list_if(COMPILER_RT_HAS_MSSE4_2_FLAG -msse4.2 SCUDO_CFLAGS)
 
 set(SCUDO_SOURCES
   scudo_allocator.cpp
   scudo_flags.cpp
+  scudo_crc32.cpp
   scudo_interceptors.cpp
   scudo_new_delete.cpp
   scudo_termination.cpp
   scudo_utils.cpp)
 
+# Enable the SSE 4.2 instruction set for scudo_crc32.cpp, if available.
+if (COMPILER_RT_HAS_MSSE4_2_FLAG)
+  set_source_files_properties(scudo_crc32.cpp PROPERTIES COMPILE_FLAGS -msse4.2)
+endif()
+
+# Enable the AArch64 CRC32 feature for scudo_crc32.cpp, if available.
+# Note that it is enabled by default starting with armv8.1-a.
+if (COMPILER_RT_HAS_MCRC_FLAG)
+  set_source_files_properties(scudo_crc32.cpp PROPERTIES COMPILE_FLAGS -mcrc)
+endif()
+
 if(COMPILER_RT_HAS_SCUDO)
   foreach(arch ${SCUDO_SUPPORTED_ARCH})
     add_compiler_rt_runtime(clang_rt.scudo
diff --git a/lib/scudo/scudo_allocator.cpp b/lib/scudo/scudo_allocator.cpp
index 57a2be4..dab6abe 100644
--- a/lib/scudo/scudo_allocator.cpp
+++ b/lib/scudo/scudo_allocator.cpp
@@ -25,22 +25,6 @@
 
 #include <cstring>
 
-// Hardware CRC32 is supported at compilation via the following:
-// - for i386 & x86_64: -msse4.2
-// - for ARM & AArch64: -march=armv8-a+crc
-// An additional check must be performed at runtime as well to make sure the
-// emitted instructions are valid on the target host.
-#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
-# ifdef __SSE4_2__
-#  include <smmintrin.h>
-#  define HW_CRC32 FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64)
-# endif
-# ifdef __ARM_FEATURE_CRC32
-#  include <arm_acle.h>
-#  define HW_CRC32 FIRST_32_SECOND_64(__crc32cw, __crc32cd)
-# endif
-#endif
-
 namespace __scudo {
 
 #if SANITIZER_CAN_USE_ALLOCATOR64
@@ -68,7 +52,7 @@
 # elif SANITIZER_WORDSIZE == 64
 typedef TwoLevelByteMap<(NumRegions >> 12), 1 << 12> ByteMap;
 # endif  // SANITIZER_WORDSIZE
-typedef SizeClassMap<3, 4, 8, 16, 64, 14> SizeClassMap;
+typedef DefaultSizeClassMap SizeClassMap;
 typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, 0, SizeClassMap,
     RegionSizeLog, ByteMap> PrimaryAllocator;
 #endif  // SANITIZER_CAN_USE_ALLOCATOR64
@@ -84,31 +68,25 @@
 // Global static cookie, initialized at start-up.
 static uptr Cookie;
 
-enum : u8 {
-  CRC32Software = 0,
-  CRC32Hardware = 1,
-};
 // We default to software CRC32 if the alternatives are not supported, either
 // at compilation or at runtime.
 static atomic_uint8_t HashAlgorithm = { CRC32Software };
 
-// Helper function that will compute the chunk checksum, being passed all the
-// the needed information as uptrs. It will opt for the hardware version of
-// the checksumming function if available.
-INLINE u32 hashUptrs(uptr Pointer, uptr *Array, uptr ArraySize, u8 HashType) {
-  u32 Crc;
+SANITIZER_WEAK_ATTRIBUTE u32 computeHardwareCRC32(u32 Crc, uptr Data);
+
+INLINE u32 computeCRC32(u32 Crc, uptr Data, u8 HashType) {
+  // If SSE4.2 is defined here, it was enabled everywhere, as opposed to only
+  // for scudo_crc32.cpp. This means that other SSE instructions were likely
+  // emitted at other places, and as a result there is no reason to not use
+  // the hardware version of the CRC32.
 #if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
-  if (HashType == CRC32Hardware) {
-    Crc = HW_CRC32(Cookie, Pointer);
-    for (uptr i = 0; i < ArraySize; i++)
-      Crc = HW_CRC32(Crc, Array[i]);
-    return Crc;
-  }
-#endif
-  Crc = computeCRC32(Cookie, Pointer);
-  for (uptr i = 0; i < ArraySize; i++)
-    Crc = computeCRC32(Crc, Array[i]);
-  return Crc;
+  return computeHardwareCRC32(Crc, Data);
+#else
+  if (computeHardwareCRC32 && HashType == CRC32Hardware)
+    return computeHardwareCRC32(Crc, Data);
+  else
+    return computeSoftwareCRC32(Crc, Data);
+#endif  // defined(__SSE4_2__)
 }
 
 struct ScudoChunk : UnpackedHeader {
@@ -135,11 +113,11 @@
     ZeroChecksumHeader.Checksum = 0;
     uptr HeaderHolder[sizeof(UnpackedHeader) / sizeof(uptr)];
     memcpy(&HeaderHolder, &ZeroChecksumHeader, sizeof(HeaderHolder));
-    u32 Hash = hashUptrs(reinterpret_cast<uptr>(this),
-                         HeaderHolder,
-                         ARRAY_SIZE(HeaderHolder),
-                         atomic_load_relaxed(&HashAlgorithm));
-    return static_cast<u16>(Hash);
+    u8 HashType = atomic_load_relaxed(&HashAlgorithm);
+    u32 Crc = computeCRC32(Cookie, reinterpret_cast<uptr>(this), HashType);
+    for (uptr i = 0; i < ARRAY_SIZE(HeaderHolder); i++)
+      Crc = computeCRC32(Crc, HeaderHolder[i], HashType);
+    return static_cast<u16>(Crc);
   }
 
   // Checks the validity of a chunk by verifying its checksum.
@@ -147,8 +125,7 @@
     UnpackedHeader NewUnpackedHeader;
     const AtomicPackedHeader *AtomicHeader =
         reinterpret_cast<const AtomicPackedHeader *>(this);
-    PackedHeader NewPackedHeader =
-        AtomicHeader->load(std::memory_order_relaxed);
+    PackedHeader NewPackedHeader = atomic_load_relaxed(AtomicHeader);
     NewUnpackedHeader = bit_cast<UnpackedHeader>(NewPackedHeader);
     return (NewUnpackedHeader.Checksum == computeChecksum(&NewUnpackedHeader));
   }
@@ -157,8 +134,7 @@
   void loadHeader(UnpackedHeader *NewUnpackedHeader) const {
     const AtomicPackedHeader *AtomicHeader =
         reinterpret_cast<const AtomicPackedHeader *>(this);
-    PackedHeader NewPackedHeader =
-        AtomicHeader->load(std::memory_order_relaxed);
+    PackedHeader NewPackedHeader = atomic_load_relaxed(AtomicHeader);
     *NewUnpackedHeader = bit_cast<UnpackedHeader>(NewPackedHeader);
     if (NewUnpackedHeader->Checksum != computeChecksum(NewUnpackedHeader)) {
       dieWithMessage("ERROR: corrupted chunk header at address %p\n", this);
@@ -171,7 +147,7 @@
     PackedHeader NewPackedHeader = bit_cast<PackedHeader>(*NewUnpackedHeader);
     AtomicPackedHeader *AtomicHeader =
         reinterpret_cast<AtomicPackedHeader *>(this);
-    AtomicHeader->store(NewPackedHeader, std::memory_order_relaxed);
+    atomic_store_relaxed(AtomicHeader, NewPackedHeader);
   }
 
   // Packs and stores the header, computing the checksum in the process. We
@@ -184,10 +160,10 @@
     PackedHeader OldPackedHeader = bit_cast<PackedHeader>(*OldUnpackedHeader);
     AtomicPackedHeader *AtomicHeader =
         reinterpret_cast<AtomicPackedHeader *>(this);
-    if (!AtomicHeader->compare_exchange_strong(OldPackedHeader,
-                                               NewPackedHeader,
-                                               std::memory_order_relaxed,
-                                               std::memory_order_relaxed)) {
+    if (!atomic_compare_exchange_strong(AtomicHeader,
+                                        &OldPackedHeader,
+                                        NewPackedHeader,
+                                        memory_order_relaxed)) {
       dieWithMessage("ERROR: race on chunk header at address %p\n", this);
     }
   }
@@ -354,11 +330,11 @@
                      "header\n");
     }
     // Verify that we can fit the maximum amount of unused bytes in the header.
-    // The worst case scenario would be when allocating 1 byte on a MaxAlignment
-    // alignment. Since the combined allocator currently rounds the size up to
-    // the alignment before passing it to the secondary, we end up with
-    // MaxAlignment - 1 extra bytes.
-    uptr MaxUnusedBytes = MaxAlignment - 1;
+    // Given that the Secondary fits the allocation to a page, the worst case
+    // scenario happens in the Primary. It will depend on the second to last
+    // and last class sizes, as well as the dynamic base for the Primary. The
+    // following is an over-approximation that works for our needs.
+    uptr MaxUnusedBytes = SizeClassMap::kMaxSize - 1 - AlignedChunkHeaderSize;
     Header.UnusedBytes = MaxUnusedBytes;
     if (Header.UnusedBytes != MaxUnusedBytes) {
       dieWithMessage("ERROR: the maximum possible unused bytes doesn't fit in "
@@ -378,6 +354,8 @@
 
   // Helper function that checks for a valid Scudo chunk.
   bool isValidPointer(const void *UserPtr) {
+    if (UNLIKELY(!ThreadInited))
+      initThread();
     uptr ChunkBeg = reinterpret_cast<uptr>(UserPtr);
     if (!IsAligned(ChunkBeg, MinAlignment)) {
       return false;
@@ -402,12 +380,18 @@
       Size = 1;
     if (Size >= MaxAllowedMallocSize)
       return BackendAllocator.ReturnNullOrDieOnBadRequest();
-    uptr RoundedSize = RoundUpTo(Size, MinAlignment);
-    uptr NeededSize = RoundedSize + AlignedChunkHeaderSize;
+
+    uptr NeededSize = RoundUpTo(Size, MinAlignment) + AlignedChunkHeaderSize;
     if (Alignment > MinAlignment)
       NeededSize += Alignment;
     if (NeededSize >= MaxAllowedMallocSize)
       return BackendAllocator.ReturnNullOrDieOnBadRequest();
+
+    // Primary backed and Secondary backed allocations have a different
+    // treatment. We deal with alignment requirements of Primary serviced
+    // allocations here, but the Secondary will take care of its own alignment
+    // needs, which means we also have to work around some limitations of the
+    // combined allocator to accommodate the situation.
     bool FromPrimary = PrimaryAllocator::CanAllocate(NeededSize, MinAlignment);
 
     void *Ptr;
@@ -426,8 +410,11 @@
     // If the allocation was serviced by the secondary, the returned pointer
     // accounts for ChunkHeaderSize to pass the alignment check of the combined
     // allocator. Adjust it here.
-    if (!FromPrimary)
+    if (!FromPrimary) {
       AllocBeg -= AlignedChunkHeaderSize;
+      if (Alignment > MinAlignment)
+        NeededSize -= Alignment;
+    }
 
     uptr ActuallyAllocatedSize = BackendAllocator.GetActuallyAllocatedSize(
         reinterpret_cast<void *>(AllocBeg));
@@ -595,6 +582,14 @@
     AllocatorQuarantine.Drain(&ThreadQuarantineCache,
                               QuarantineCallback(&Cache));
   }
+
+  uptr getStats(AllocatorStat StatType) {
+    if (UNLIKELY(!ThreadInited))
+      initThread();
+    uptr stats[AllocatorStatCount];
+    BackendAllocator.GetStats(stats);
+    return stats[StatType];
+  }
 };
 
 static Allocator Instance(LINKER_INITIALIZED);
@@ -679,15 +674,11 @@
 // MallocExtension helper functions
 
 uptr __sanitizer_get_current_allocated_bytes() {
-  uptr stats[AllocatorStatCount];
-  getAllocator().GetStats(stats);
-  return stats[AllocatorStatAllocated];
+  return Instance.getStats(AllocatorStatAllocated);
 }
 
 uptr __sanitizer_get_heap_size() {
-  uptr stats[AllocatorStatCount];
-  getAllocator().GetStats(stats);
-  return stats[AllocatorStatMapped];
+  return Instance.getStats(AllocatorStatMapped);
 }
 
 uptr __sanitizer_get_free_bytes() {
diff --git a/lib/scudo/scudo_allocator.h b/lib/scudo/scudo_allocator.h
index 484b7ea..5f5225b 100644
--- a/lib/scudo/scudo_allocator.h
+++ b/lib/scudo/scudo_allocator.h
@@ -18,7 +18,9 @@
 
 #include "sanitizer_common/sanitizer_allocator.h"
 
-#include <atomic>
+#if !SANITIZER_LINUX
+# error "The Scudo hardened allocator is currently only supported on Linux."
+#endif
 
 namespace __scudo {
 
@@ -44,17 +46,17 @@
 typedef u64 PackedHeader;
 struct UnpackedHeader {
   u64 Checksum    : 16;
-  u64 UnusedBytes : 24; // Needed for reallocation purposes.
+  u64 UnusedBytes : 20; // Needed for reallocation purposes.
   u64 State       : 2;  // available, allocated, or quarantined
   u64 AllocType   : 2;  // malloc, new, new[], or memalign
-  u64 Offset      : 12; // Offset from the beginning of the backend
+  u64 Offset      : 16; // Offset from the beginning of the backend
                         // allocation to the beginning of the chunk itself,
                         // in multiples of MinAlignment. See comment about
                         // its maximum value and test in init().
   u64 Salt        : 8;
 };
 
-typedef std::atomic<PackedHeader> AtomicPackedHeader;
+typedef atomic_uint64_t AtomicPackedHeader;
 COMPILER_CHECK(sizeof(UnpackedHeader) == sizeof(PackedHeader));
 
 // Minimum alignment of 8 bytes for 32-bit, 16 for 64-bit
diff --git a/lib/scudo/scudo_allocator_secondary.h b/lib/scudo/scudo_allocator_secondary.h
index d3468f8..b984f0d 100644
--- a/lib/scudo/scudo_allocator_secondary.h
+++ b/lib/scudo/scudo_allocator_secondary.h
@@ -32,32 +32,39 @@
   void *Allocate(AllocatorStats *Stats, uptr Size, uptr Alignment) {
     // The Scudo frontend prevents us from allocating more than
     // MaxAllowedMallocSize, so integer overflow checks would be superfluous.
-    uptr HeadersSize = sizeof(SecondaryHeader) + AlignedChunkHeaderSize;
-    uptr MapSize = RoundUpTo(Size + sizeof(SecondaryHeader), PageSize);
+    uptr MapSize = Size + SecondaryHeaderSize;
+    MapSize = RoundUpTo(MapSize, PageSize);
     // Account for 2 guard pages, one before and one after the chunk.
     MapSize += 2 * PageSize;
-    // Adding an extra Alignment is not required, it was done by the frontend.
+    // The size passed to the Secondary comprises the alignment, if large
+    // enough. Subtract it here to get the requested size, including header.
+    if (Alignment > MinAlignment)
+      Size -= Alignment;
+
     uptr MapBeg = reinterpret_cast<uptr>(MmapNoAccess(MapSize));
     if (MapBeg == ~static_cast<uptr>(0))
       return ReturnNullOrDieOnOOM();
     // A page-aligned pointer is assumed after that, so check it now.
     CHECK(IsAligned(MapBeg, PageSize));
     uptr MapEnd = MapBeg + MapSize;
+    // The beginning of the user area for that allocation comes after the
+    // initial guard page, and both headers. This is the pointer that has to
+    // abide by alignment requirements.
     uptr UserBeg = MapBeg + PageSize + HeadersSize;
-    // In the event of larger alignments, we will attempt to fit the mmap area
-    // better and unmap extraneous memory. This will also ensure that the
+
+    // In the rare event of larger alignments, we will attempt to fit the mmap
+    // area better and unmap extraneous memory. This will also ensure that the
     // offset and unused bytes field of the header stay small.
     if (Alignment > MinAlignment) {
       if (UserBeg & (Alignment - 1))
         UserBeg += Alignment - (UserBeg & (Alignment - 1));
       CHECK_GE(UserBeg, MapBeg);
-      uptr NewMapBeg = UserBeg - HeadersSize;
-      NewMapBeg = RoundDownTo(NewMapBeg, PageSize) - PageSize;
+      uptr NewMapBeg = RoundDownTo(UserBeg - HeadersSize, PageSize) - PageSize;
       CHECK_GE(NewMapBeg, MapBeg);
-      uptr NewMapSize = RoundUpTo(MapSize - Alignment, PageSize);
-      uptr NewMapEnd = NewMapBeg + NewMapSize;
+      uptr NewMapEnd = RoundUpTo(UserBeg + (Size - AlignedChunkHeaderSize),
+                                 PageSize) + PageSize;
       CHECK_LE(NewMapEnd, MapEnd);
-      // Unmap the extra memory if it's large enough.
+      // Unmap the extra memory if it's large enough, on both sides.
       uptr Diff = NewMapBeg - MapBeg;
       if (Diff > PageSize)
         UnmapOrDie(reinterpret_cast<void *>(MapBeg), Diff);
@@ -65,14 +72,13 @@
       if (Diff > PageSize)
         UnmapOrDie(reinterpret_cast<void *>(NewMapEnd), Diff);
       MapBeg = NewMapBeg;
-      MapSize = NewMapSize;
       MapEnd = NewMapEnd;
+      MapSize = NewMapEnd - NewMapBeg;
     }
-    uptr UserEnd = UserBeg - AlignedChunkHeaderSize + Size;
-    // For larger alignments, Alignment was added by the frontend to Size.
-    if (Alignment > MinAlignment)
-      UserEnd -= Alignment;
+
+    uptr UserEnd = UserBeg + (Size - AlignedChunkHeaderSize);
     CHECK_LE(UserEnd, MapEnd - PageSize);
+    // Actually mmap the memory, preserving the guard pages on either side.
     CHECK_EQ(MapBeg + PageSize, reinterpret_cast<uptr>(
         MmapFixedOrDie(MapBeg + PageSize, MapSize - 2 * PageSize)));
     uptr Ptr = UserBeg - AlignedChunkHeaderSize;
@@ -84,7 +90,7 @@
     // the guard pages.
     Stats->Add(AllocatorStatAllocated, MapSize - 2 * PageSize);
     Stats->Add(AllocatorStatMapped, MapSize - 2 * PageSize);
-    CHECK(IsAligned(UserBeg, Alignment));
+
     return reinterpret_cast<void *>(UserBeg);
   }
 
@@ -173,6 +179,8 @@
     return getHeader(reinterpret_cast<uptr>(Ptr));
   }
 
+  const uptr SecondaryHeaderSize = sizeof(SecondaryHeader);
+  const uptr HeadersSize = SecondaryHeaderSize + AlignedChunkHeaderSize;
   uptr PageSize;
   atomic_uint8_t MayReturnNull;
 };
diff --git a/lib/scudo/scudo_crc32.cpp b/lib/scudo/scudo_crc32.cpp
new file mode 100644
index 0000000..56be22f
--- /dev/null
+++ b/lib/scudo/scudo_crc32.cpp
@@ -0,0 +1,42 @@
+//===-- scudo_crc32.cpp -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// CRC32 function leveraging hardware specific instructions. This has to be
+/// kept separated to restrict the use of compiler specific flags to this file.
+///
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+// Hardware CRC32 is supported at compilation via the following:
+// - for i386 & x86_64: -msse4.2
+// - for ARM & AArch64: -march=armv8-a+crc or -mcrc
+// An additional check must be performed at runtime as well to make sure the
+// emitted instructions are valid on the target host.
+
+#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+# ifdef __SSE4_2__
+#  include <smmintrin.h>
+#  define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64)
+# endif
+# ifdef __ARM_FEATURE_CRC32
+#  include <arm_acle.h>
+#  define CRC32_INTRINSIC FIRST_32_SECOND_64(__crc32cw, __crc32cd)
+# endif
+#endif  // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+
+namespace __scudo {
+
+#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+u32 computeHardwareCRC32(u32 Crc, uptr Data) {
+  return CRC32_INTRINSIC(Crc, Data);
+}
+#endif  // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+
+}  // namespace __scudo
diff --git a/lib/scudo/scudo_flags.cpp b/lib/scudo/scudo_flags.cpp
index b9c8381..64da1d9 100644
--- a/lib/scudo/scudo_flags.cpp
+++ b/lib/scudo/scudo_flags.cpp
@@ -68,7 +68,7 @@
   // Sanity checks and default settings for the Quarantine parameters.
 
   if (f->QuarantineSizeMb < 0) {
-    const int DefaultQuarantineSizeMb = 64;
+    const int DefaultQuarantineSizeMb = FIRST_32_SECOND_64(16, 64);
     f->QuarantineSizeMb = DefaultQuarantineSizeMb;
   }
   // We enforce an upper limit for the quarantine size of 4Gb.
@@ -76,7 +76,8 @@
     dieWithMessage("ERROR: the quarantine size is too large\n");
   }
   if (f->ThreadLocalQuarantineSizeKb < 0) {
-    const int DefaultThreadLocalQuarantineSizeKb = 1024;
+    const int DefaultThreadLocalQuarantineSizeKb =
+        FIRST_32_SECOND_64(256, 1024);
     f->ThreadLocalQuarantineSizeKb = DefaultThreadLocalQuarantineSizeKb;
   }
   // And an upper limit of 128Mb for the thread quarantine cache.
@@ -84,6 +85,10 @@
     dieWithMessage("ERROR: the per thread quarantine cache size is too "
                    "large\n");
   }
+  if (f->ThreadLocalQuarantineSizeKb == 0 && f->QuarantineSizeMb > 0) {
+    dieWithMessage("ERROR: ThreadLocalQuarantineSizeKb can be set to 0 only "
+                   "when QuarantineSizeMb is set to 0\n");
+  }
 }
 
 Flags *getFlags() {
diff --git a/lib/scudo/scudo_flags.inc b/lib/scudo/scudo_flags.inc
index c7a2acf..45f9ea8 100644
--- a/lib/scudo/scudo_flags.inc
+++ b/lib/scudo/scudo_flags.inc
@@ -15,12 +15,14 @@
 # error "Define SCUDO_FLAG prior to including this file!"
 #endif
 
-SCUDO_FLAG(int, QuarantineSizeMb, 64,
+// Default value is set in scudo_flags.cpp based on architecture.
+SCUDO_FLAG(int, QuarantineSizeMb, -1,
            "Size (in Mb) of quarantine used to delay the actual deallocation "
            "of chunks. Lower value may reduce memory usage but decrease the "
            "effectiveness of the mitigation.")
 
-SCUDO_FLAG(int, ThreadLocalQuarantineSizeKb, 1024,
+// Default value is set in scudo_flags.cpp based on architecture.
+SCUDO_FLAG(int, ThreadLocalQuarantineSizeKb, -1,
           "Size (in Kb) of per-thread cache used to offload the global "
           "quarantine. Lower value may reduce memory usage but might increase "
           "the contention on the global quarantine.")
diff --git a/lib/scudo/scudo_utils.cpp b/lib/scudo/scudo_utils.cpp
index c0269ec..4e2f6e0 100644
--- a/lib/scudo/scudo_utils.cpp
+++ b/lib/scudo/scudo_utils.cpp
@@ -20,8 +20,9 @@
 #if defined(__x86_64__) || defined(__i386__)
 # include <cpuid.h>
 #endif
-
-#include <cstring>
+#if defined(__arm__) || defined(__aarch64__)
+# include <sys/auxv.h>
+#endif
 
 // TODO(kostyak): remove __sanitizer *Printf uses in favor for our own less
 //                complicated string formatting code. The following is a
@@ -82,12 +83,12 @@
 }
 
 #ifndef bit_SSE4_2
-#define bit_SSE4_2 bit_SSE42  // clang and gcc have different defines.
+# define bit_SSE4_2 bit_SSE42  // clang and gcc have different defines.
 #endif
 
 bool testCPUFeature(CPUFeature Feature)
 {
-  static CPUIDRegs FeaturesRegs = getCPUFeatures();
+  CPUIDRegs FeaturesRegs = getCPUFeatures();
 
   switch (Feature) {
     case CRC32CPUFeature:  // CRC32 is provided by SSE 4.2.
@@ -97,6 +98,25 @@
   }
   return false;
 }
+#elif defined(__arm__) || defined(__aarch64__)
+// For ARM and AArch64, hardware CRC32 support is indicated in the
+// AT_HWVAL auxiliary vector.
+
+#ifndef HWCAP_CRC32
+# define HWCAP_CRC32 (1<<7)  // HWCAP_CRC32 is missing on older platforms.
+#endif
+
+bool testCPUFeature(CPUFeature Feature) {
+  uptr HWCap = getauxval(AT_HWCAP);
+
+  switch (Feature) {
+    case CRC32CPUFeature:
+      return !!(HWCap & HWCAP_CRC32);
+    default:
+      break;
+  }
+  return false;
+}
 #else
 bool testCPUFeature(CPUFeature Feature) {
   return false;
@@ -185,8 +205,7 @@
   0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
 };
 
-u32 computeCRC32(u32 Crc, uptr Data)
-{
+u32 computeSoftwareCRC32(u32 Crc, uptr Data) {
   for (uptr i = 0; i < sizeof(Data); i++) {
     Crc = CRC32Table[(Crc ^ Data) & 0xff] ^ (Crc >> 8);
     Data >>= 8;
diff --git a/lib/scudo/scudo_utils.h b/lib/scudo/scudo_utils.h
index f93f26e..5082d79 100644
--- a/lib/scudo/scudo_utils.h
+++ b/lib/scudo/scudo_utils.h
@@ -53,8 +53,12 @@
   u64 State[2];
 };
 
-// Software CRC32 functions, to be used when SSE 4.2 support is not detected.
-u32 computeCRC32(u32 Crc, uptr Data);
+enum : u8 {
+  CRC32Software = 0,
+  CRC32Hardware = 1,
+};
+
+u32 computeSoftwareCRC32(u32 Crc, uptr Data);
 
 }  // namespace __scudo
 
diff --git a/lib/stats/CMakeLists.txt b/lib/stats/CMakeLists.txt
index 33ab1ae..2b3d647 100644
--- a/lib/stats/CMakeLists.txt
+++ b/lib/stats/CMakeLists.txt
@@ -5,8 +5,14 @@
 
 if(APPLE)
   set(STATS_LIB_FLAVOR SHARED)
+
+  add_weak_symbols("asan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
 else()
   set(STATS_LIB_FLAVOR STATIC)
+
+  set(WEAK_SYMBOL_LINK_FLAGS)
 endif()
 
 add_compiler_rt_runtime(clang_rt.stats
@@ -17,6 +23,7 @@
   OBJECT_LIBS RTSanitizerCommon
               RTSanitizerCommonLibc
   CFLAGS ${SANITIZER_COMMON_CFLAGS}
+  LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
   PARENT_TARGET stats)
 
 add_compiler_rt_runtime(clang_rt.stats_client
@@ -25,4 +32,5 @@
   OS ${SANITIZER_COMMON_SUPPORTED_OS}
   SOURCES stats_client.cc
   CFLAGS ${SANITIZER_COMMON_CFLAGS}
+  LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
   PARENT_TARGET stats)
diff --git a/lib/tsan/CMakeLists.txt b/lib/tsan/CMakeLists.txt
index b26a884..195ecb5 100644
--- a/lib/tsan/CMakeLists.txt
+++ b/lib/tsan/CMakeLists.txt
@@ -25,6 +25,7 @@
 set(TSAN_SOURCES
   rtl/tsan_clock.cc
   rtl/tsan_debugging.cc
+  rtl/tsan_external.cc
   rtl/tsan_fd.cc
   rtl/tsan_flags.cc
   rtl/tsan_ignoreset.cc
@@ -107,6 +108,10 @@
     # Pass ASM file directly to the C++ compiler.
     set_source_files_properties(${TSAN_ASM_SOURCES} PROPERTIES LANGUAGE C)
   endif()
+
+  add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+
   add_compiler_rt_runtime(clang_rt.tsan
     SHARED
     OS ${TSAN_SUPPORTED_OS}
@@ -117,6 +122,7 @@
                 RTSanitizerCommonLibc
                 RTUbsan
     CFLAGS ${TSAN_RTL_CFLAGS}
+    LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
     PARENT_TARGET tsan)
   add_compiler_rt_object_libraries(RTTsan_dynamic
     OS ${TSAN_SUPPORTED_OS}
diff --git a/lib/tsan/go/tsan_go.cc b/lib/tsan/go/tsan_go.cc
index 34625c8..7fb4eb2 100644
--- a/lib/tsan/go/tsan_go.cc
+++ b/lib/tsan/go/tsan_go.cc
@@ -214,7 +214,7 @@
   ThreadState *thr = AllocGoroutine();
   *pthr = thr;
   int goid = ThreadCreate(parent, (uptr)pc, 0, true);
-  ThreadStart(thr, goid, 0);
+  ThreadStart(thr, goid, 0, /*workerthread*/ false);
 }
 
 void __tsan_go_end(ThreadState *thr) {
diff --git a/lib/tsan/rtl/tsan.syms.extra b/lib/tsan/rtl/tsan.syms.extra
index 1bc1d93..22dfde9 100644
--- a/lib/tsan/rtl/tsan.syms.extra
+++ b/lib/tsan/rtl/tsan.syms.extra
@@ -1,4 +1,5 @@
 __tsan_init
+__tsan_flush_memory
 __tsan_read*
 __tsan_write*
 __tsan_vptr*
diff --git a/lib/tsan/rtl/tsan_debugging.cc b/lib/tsan/rtl/tsan_debugging.cc
index ac24c89..06154bc 100644
--- a/lib/tsan/rtl/tsan_debugging.cc
+++ b/lib/tsan/rtl/tsan_debugging.cc
@@ -15,6 +15,8 @@
 #include "tsan_report.h"
 #include "tsan_rtl.h"
 
+#include "sanitizer_common/sanitizer_stackdepot.h"
+
 using namespace __tsan;
 
 static const char *ReportTypeDescription(ReportType typ) {
@@ -22,6 +24,7 @@
   if (typ == ReportTypeVptrRace) return "data-race-vptr";
   if (typ == ReportTypeUseAfterFree) return "heap-use-after-free";
   if (typ == ReportTypeVptrUseAfterFree) return "heap-use-after-free-vptr";
+  if (typ == ReportTypeExternalRace) return "external-race";
   if (typ == ReportTypeThreadLeak) return "thread-leak";
   if (typ == ReportTypeMutexDestroyLocked) return "locked-mutex-destroy";
   if (typ == ReportTypeMutexDoubleLock) return "mutex-double-lock";
@@ -125,6 +128,16 @@
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_loc_object_type(void *report, uptr idx,
+                                      const char **object_type) {
+  const ReportDesc *rep = (ReportDesc *)report;
+  CHECK_LT(idx, rep->locs.Size());
+  ReportLocation *loc = rep->locs[idx];
+  *object_type = GetObjectTypeFromTag(loc->external_tag);
+  return 1;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_get_report_mutex(void *report, uptr idx, uptr *mutex_id, void **addr,
                             int *destroyed, void **trace, uptr trace_size) {
   const ReportDesc *rep = (ReportDesc *)report;
@@ -160,3 +173,78 @@
   *tid = rep->unique_tids[idx];
   return 1;
 }
+
+SANITIZER_INTERFACE_ATTRIBUTE
+const char *__tsan_locate_address(uptr addr, char *name, uptr name_size,
+                                  uptr *region_address_ptr,
+                                  uptr *region_size_ptr) {
+  uptr region_address = 0;
+  uptr region_size = 0;
+  const char *region_kind = nullptr;
+  if (name && name_size > 0) name[0] = 0;
+
+  if (IsMetaMem(addr)) {
+    region_kind = "meta shadow";
+  } else if (IsShadowMem(addr)) {
+    region_kind = "shadow";
+  } else {
+    bool is_stack = false;
+    MBlock *b = 0;
+    Allocator *a = allocator();
+    if (a->PointerIsMine((void *)addr)) {
+      void *block_begin = a->GetBlockBegin((void *)addr);
+      if (block_begin) b = ctx->metamap.GetBlock((uptr)block_begin);
+    }
+
+    if (b != 0) {
+      region_address = (uptr)allocator()->GetBlockBegin((void *)addr);
+      region_size = b->siz;
+      region_kind = "heap";
+    } else {
+      // TODO(kuba.brecka): We should not lock. This is supposed to be called
+      // from within the debugger when other threads are stopped.
+      ctx->thread_registry->Lock();
+      ThreadContext *tctx = IsThreadStackOrTls(addr, &is_stack);
+      ctx->thread_registry->Unlock();
+      if (tctx) {
+        region_kind = is_stack ? "stack" : "tls";
+      } else {
+        region_kind = "global";
+        DataInfo info;
+        if (Symbolizer::GetOrInit()->SymbolizeData(addr, &info)) {
+          internal_strncpy(name, info.name, name_size);
+          region_address = info.start;
+          region_size = info.size;
+        }
+      }
+    }
+  }
+
+  CHECK(region_kind);
+  if (region_address_ptr) *region_address_ptr = region_address;
+  if (region_size_ptr) *region_size_ptr = region_size;
+  return region_kind;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id,
+                           uptr *os_id) {
+  MBlock *b = 0;
+  Allocator *a = allocator();
+  if (a->PointerIsMine((void *)addr)) {
+    void *block_begin = a->GetBlockBegin((void *)addr);
+    if (block_begin) b = ctx->metamap.GetBlock((uptr)block_begin);
+  }
+  if (b == 0) return 0;
+
+  *thread_id = b->tid;
+  // No locking.  This is supposed to be called from within the debugger when
+  // other threads are stopped.
+  ThreadContextBase *tctx = ctx->thread_registry->GetThreadLocked(b->tid);
+  *os_id = tctx->os_id;
+
+  StackTrace stack = StackDepotGet(b->stk);
+  size = Min(size, (uptr)stack.size);
+  for (uptr i = 0; i < size; i++) trace[i] = stack.trace[stack.size - i - 1];
+  return size;
+}
diff --git a/lib/tsan/rtl/tsan_defs.h b/lib/tsan/rtl/tsan_defs.h
index 55580a5..8a0381e 100644
--- a/lib/tsan/rtl/tsan_defs.h
+++ b/lib/tsan/rtl/tsan_defs.h
@@ -149,7 +149,8 @@
 
 // Descriptor of user's memory block.
 struct MBlock {
-  u64  siz;
+  u64  siz : 48;
+  u64  tag : 16;
   u32  stk;
   u16  tid;
 };
diff --git a/lib/tsan/rtl/tsan_external.cc b/lib/tsan/rtl/tsan_external.cc
new file mode 100644
index 0000000..dc8ec62
--- /dev/null
+++ b/lib/tsan/rtl/tsan_external.cc
@@ -0,0 +1,78 @@
+//===-- tsan_external.cc --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+const uptr kMaxTag = 128;  // Limited to 65,536, since MBlock only stores tags
+                           // as 16-bit values, see tsan_defs.h.
+
+const char *registered_tags[kMaxTag];
+static atomic_uint32_t used_tags{1};  // Tag 0 means "no tag". NOLINT
+
+const char *GetObjectTypeFromTag(uptr tag) {
+  if (tag == 0) return nullptr;
+  // Invalid/corrupted tag?  Better return NULL and let the caller deal with it.
+  if (tag >= atomic_load(&used_tags, memory_order_relaxed)) return nullptr;
+  return registered_tags[tag];
+}
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__tsan_external_register_tag(const char *object_type) {
+  uptr new_tag = atomic_fetch_add(&used_tags, 1, memory_order_relaxed);
+  CHECK_LT(new_tag, kMaxTag);
+  registered_tags[new_tag] = internal_strdup(object_type);
+  return (void *)new_tag;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_assign_tag(void *addr, void *tag) {
+  CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed));
+  Allocator *a = allocator();
+  MBlock *b = nullptr;
+  if (a->PointerIsMine((void *)addr)) {
+    void *block_begin = a->GetBlockBegin((void *)addr);
+    if (block_begin) b = ctx->metamap.GetBlock((uptr)block_begin);
+  }
+  if (b) {
+    b->tag = (uptr)tag;
+  }
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_read(void *addr, void *caller_pc, void *tag) {
+  CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed));
+  ThreadState *thr = cur_thread();
+  thr->external_tag = (uptr)tag;
+  FuncEntry(thr, (uptr)caller_pc);
+  MemoryRead(thr, CALLERPC, (uptr)addr, kSizeLog8);
+  FuncExit(thr);
+  thr->external_tag = 0;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_write(void *addr, void *caller_pc, void *tag) {
+  CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed));
+  ThreadState *thr = cur_thread();
+  thr->external_tag = (uptr)tag;
+  FuncEntry(thr, (uptr)caller_pc);
+  MemoryWrite(thr, CALLERPC, (uptr)addr, kSizeLog8);
+  FuncExit(thr);
+  thr->external_tag = 0;
+}
+}  // extern "C"
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_flags.inc b/lib/tsan/rtl/tsan_flags.inc
index 071cf42..e9b3e35 100644
--- a/lib/tsan/rtl/tsan_flags.inc
+++ b/lib/tsan/rtl/tsan_flags.inc
@@ -79,5 +79,8 @@
 TSAN_FLAG(const char *, suppressions, "", "Suppressions file name.")
 TSAN_FLAG(bool, ignore_interceptors_accesses, false,
           "Ignore reads and writes from all interceptors.")
+TSAN_FLAG(bool, ignore_noninstrumented_modules, SANITIZER_MAC ? true : false,
+          "Interceptors should only detect races when called from instrumented "
+          "modules.")
 TSAN_FLAG(bool, shared_ptr_interceptor, true,
           "Track atomic reference counting in libc++ shared_ptr and weak_ptr.")
diff --git a/lib/tsan/rtl/tsan_interceptors.cc b/lib/tsan/rtl/tsan_interceptors.cc
index a3a50e1..9bf1b28 100644
--- a/lib/tsan/rtl/tsan_interceptors.cc
+++ b/lib/tsan/rtl/tsan_interceptors.cc
@@ -231,6 +231,8 @@
     if (0 == internal_strcmp(s->type, kSuppressionLib))
       libignore()->AddIgnoredLibrary(s->templ);
   }
+  if (flags()->ignore_noninstrumented_modules)
+    libignore()->IgnoreNoninstrumentedModules(true);
   libignore()->OnLibraryLoaded(0);
 }
 
@@ -252,31 +254,20 @@
 
 ScopedInterceptor::ScopedInterceptor(ThreadState *thr, const char *fname,
                                      uptr pc)
-    : thr_(thr)
-    , pc_(pc)
-    , in_ignored_lib_(false) {
+    : thr_(thr), pc_(pc), in_ignored_lib_(false), ignoring_(false) {
   Initialize(thr);
-  if (!thr_->is_inited)
-    return;
-  if (!thr_->ignore_interceptors)
-    FuncEntry(thr, pc);
+  if (!thr_->is_inited) return;
+  if (!thr_->ignore_interceptors) FuncEntry(thr, pc);
   DPrintf("#%d: intercept %s()\n", thr_->tid, fname);
-  if (!thr_->in_ignored_lib && libignore()->IsIgnored(pc)) {
-    in_ignored_lib_ = true;
-    thr_->in_ignored_lib = true;
-    ThreadIgnoreBegin(thr_, pc_);
-  }
-  if (flags()->ignore_interceptors_accesses) ThreadIgnoreBegin(thr_, pc_);
+  ignoring_ =
+      !thr_->in_ignored_lib && (flags()->ignore_interceptors_accesses ||
+                                libignore()->IsIgnored(pc, &in_ignored_lib_));
+  EnableIgnores();
 }
 
 ScopedInterceptor::~ScopedInterceptor() {
-  if (!thr_->is_inited)
-    return;
-  if (flags()->ignore_interceptors_accesses) ThreadIgnoreEnd(thr_, pc_);
-  if (in_ignored_lib_) {
-    thr_->in_ignored_lib = false;
-    ThreadIgnoreEnd(thr_, pc_);
-  }
+  if (!thr_->is_inited) return;
+  DisableIgnores();
   if (!thr_->ignore_interceptors) {
     ProcessPendingSignals(thr_);
     FuncExit(thr_);
@@ -284,20 +275,24 @@
   }
 }
 
-void ScopedInterceptor::UserCallbackStart() {
-  if (flags()->ignore_interceptors_accesses) ThreadIgnoreEnd(thr_, pc_);
-  if (in_ignored_lib_) {
-    thr_->in_ignored_lib = false;
-    ThreadIgnoreEnd(thr_, pc_);
+void ScopedInterceptor::EnableIgnores() {
+  if (ignoring_) {
+    ThreadIgnoreBegin(thr_, pc_);
+    if (in_ignored_lib_) {
+      DCHECK(!thr_->in_ignored_lib);
+      thr_->in_ignored_lib = true;
+    }
   }
 }
 
-void ScopedInterceptor::UserCallbackEnd() {
-  if (in_ignored_lib_) {
-    thr_->in_ignored_lib = true;
-    ThreadIgnoreBegin(thr_, pc_);
+void ScopedInterceptor::DisableIgnores() {
+  if (ignoring_) {
+    ThreadIgnoreEnd(thr_, pc_);
+    if (in_ignored_lib_) {
+      DCHECK(thr_->in_ignored_lib);
+      thr_->in_ignored_lib = false;
+    }
   }
-  if (flags()->ignore_interceptors_accesses) ThreadIgnoreBegin(thr_, pc_);
 }
 
 #define TSAN_INTERCEPT(func) INTERCEPT_FUNCTION(func)
@@ -886,7 +881,7 @@
       internal_sched_yield();
     Processor *proc = ProcCreate();
     ProcWire(proc, thr);
-    ThreadStart(thr, tid, GetTid());
+    ThreadStart(thr, tid, GetTid(), /*workerthread*/ false);
     atomic_store(&p->tid, 0, memory_order_release);
   }
   void *res = callback(param);
diff --git a/lib/tsan/rtl/tsan_interceptors.h b/lib/tsan/rtl/tsan_interceptors.h
index a0f9a07..72534f4 100644
--- a/lib/tsan/rtl/tsan_interceptors.h
+++ b/lib/tsan/rtl/tsan_interceptors.h
@@ -10,12 +10,13 @@
  public:
   ScopedInterceptor(ThreadState *thr, const char *fname, uptr pc);
   ~ScopedInterceptor();
-  void UserCallbackStart();
-  void UserCallbackEnd();
+  void DisableIgnores();
+  void EnableIgnores();
  private:
   ThreadState *const thr_;
   const uptr pc_;
   bool in_ignored_lib_;
+  bool ignoring_;
 };
 
 }  // namespace __tsan
@@ -39,10 +40,10 @@
 /**/
 
 #define SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START() \
-    si.UserCallbackStart();
+    si.DisableIgnores();
 
 #define SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END() \
-    si.UserCallbackEnd();
+    si.EnableIgnores();
 
 #define TSAN_INTERCEPTOR(ret, func, ...) INTERCEPTOR(ret, func, __VA_ARGS__)
 
diff --git a/lib/tsan/rtl/tsan_interceptors_mac.cc b/lib/tsan/rtl/tsan_interceptors_mac.cc
index 015e48e..fc5eb04 100644
--- a/lib/tsan/rtl/tsan_interceptors_mac.cc
+++ b/lib/tsan/rtl/tsan_interceptors_mac.cc
@@ -297,18 +297,20 @@
 };
 }  // namespace
 
-// This adds a libc++ interceptor for:
+// The following code adds libc++ interceptors for:
 //     void __shared_weak_count::__release_shared() _NOEXCEPT;
+//     bool __shared_count::__release_shared() _NOEXCEPT;
 // Shared and weak pointers in C++ maintain reference counts via atomics in
 // libc++.dylib, which are TSan-invisible, and this leads to false positives in
-// destructor code.  This interceptor re-implements the whole function so that
+// destructor code. These interceptors re-implements the whole functions so that
 // the mo_acq_rel semantics of the atomic decrement are visible.
 //
-// Unfortunately, this interceptor cannot simply Acquire/Release some sync
+// Unfortunately, the interceptors cannot simply Acquire/Release some sync
 // object and call the original function, because it would have a race between
 // the sync and the destruction of the object.  Calling both under a lock will
 // not work because the destructor can invoke this interceptor again (and even
 // in a different thread, so recursive locks don't help).
+
 STDCXX_INTERCEPTOR(void, _ZNSt3__119__shared_weak_count16__release_sharedEv,
                    fake_shared_weak_count *o) {
   if (!flags()->shared_ptr_interceptor)
@@ -327,6 +329,20 @@
   }
 }
 
+STDCXX_INTERCEPTOR(bool, _ZNSt3__114__shared_count16__release_sharedEv,
+                   fake_shared_weak_count *o) {
+  if (!flags()->shared_ptr_interceptor)
+    return REAL(_ZNSt3__114__shared_count16__release_sharedEv)(o);
+
+  SCOPED_TSAN_INTERCEPTOR(_ZNSt3__114__shared_count16__release_sharedEv, o);
+  if (__tsan_atomic64_fetch_add(&o->shared_owners, -1, mo_release) == 0) {
+    Acquire(thr, pc, (uptr)&o->shared_owners);
+    o->on_zero_shared();
+    return true;
+  }
+  return false;
+}
+
 namespace {
 struct call_once_callback_args {
   void (*orig_func)(void *arg);
diff --git a/lib/tsan/rtl/tsan_interface.cc b/lib/tsan/rtl/tsan_interface.cc
index 809d2ab..ad9b1fe 100644
--- a/lib/tsan/rtl/tsan_interface.cc
+++ b/lib/tsan/rtl/tsan_interface.cc
@@ -28,6 +28,10 @@
   Initialize(cur_thread());
 }
 
+void __tsan_flush_memory() {
+  FlushShadowMemory();
+}
+
 void __tsan_read16(void *addr) {
   MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog8);
   MemoryRead(cur_thread(), CALLERPC, (uptr)addr + 8, kSizeLog8);
diff --git a/lib/tsan/rtl/tsan_interface.h b/lib/tsan/rtl/tsan_interface.h
index 17171a2..496a871 100644
--- a/lib/tsan/rtl/tsan_interface.h
+++ b/lib/tsan/rtl/tsan_interface.h
@@ -32,6 +32,8 @@
 // before any instrumented code is executed and before any call to malloc.
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_init();
 
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_flush_memory();
+
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read1(void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read2(void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read4(void *addr);
@@ -77,6 +79,15 @@
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_ignore_thread_end();
 
 SANITIZER_INTERFACE_ATTRIBUTE
+void *__tsan_external_register_tag(const char *object_type);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_assign_tag(void *addr, void *tag);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_read(void *addr, void *caller_pc, void *tag);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_write(void *addr, void *caller_pc, void *tag);
+
+SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_read_range(void *addr, unsigned long size);  // NOLINT
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_write_range(void *addr, unsigned long size);  // NOLINT
@@ -121,6 +132,10 @@
                           int *fd, int *suppressable, void **trace,
                           uptr trace_size);
 
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_loc_object_type(void *report, uptr idx,
+                                      const char **object_type);
+
 // Returns information about mutexes included in the report.
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_get_report_mutex(void *report, uptr idx, uptr *mutex_id, void **addr,
@@ -136,6 +151,17 @@
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_get_report_unique_tid(void *report, uptr idx, int *tid);
 
+// Returns the type of the pointer (heap, stack, global, ...) and if possible
+// also the starting address (e.g. of a heap allocation) and size.
+SANITIZER_INTERFACE_ATTRIBUTE
+const char *__tsan_locate_address(uptr addr, char *name, uptr name_size,
+                                  uptr *region_address, uptr *region_size);
+
+// Returns the allocation stack for a heap pointer.
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id,
+                           uptr *os_id);
+
 #endif  // SANITIZER_GO
 
 #ifdef __cplusplus
diff --git a/lib/tsan/rtl/tsan_interface_java.cc b/lib/tsan/rtl/tsan_interface_java.cc
index 95be859..5bdc04f 100644
--- a/lib/tsan/rtl/tsan_interface_java.cc
+++ b/lib/tsan/rtl/tsan_interface_java.cc
@@ -150,6 +150,23 @@
   }
 }
 
+jptr __tsan_java_find(jptr *from_ptr, jptr to) {
+  SCOPED_JAVA_FUNC(__tsan_java_find);
+  DPrintf("#%d: java_find(&%p, %p)\n", *from_ptr, to);
+  CHECK_EQ((*from_ptr) % kHeapAlignment, 0);
+  CHECK_EQ(to % kHeapAlignment, 0);
+  CHECK_GE(*from_ptr, jctx->heap_begin);
+  CHECK_LE(to, jctx->heap_begin + jctx->heap_size);
+  for (uptr from = *from_ptr; from < to; from += kHeapAlignment) {
+    MBlock *b = ctx->metamap.GetBlock(from);
+    if (b) {
+      *from_ptr = from;
+      return b->siz;
+    }
+  }
+  return 0;
+}
+
 void __tsan_java_finalize() {
   SCOPED_JAVA_FUNC(__tsan_java_finalize);
   DPrintf("#%d: java_mutex_finalize()\n", thr->tid);
diff --git a/lib/tsan/rtl/tsan_interface_java.h b/lib/tsan/rtl/tsan_interface_java.h
index 30153a1..0bd49ac 100644
--- a/lib/tsan/rtl/tsan_interface_java.h
+++ b/lib/tsan/rtl/tsan_interface_java.h
@@ -57,6 +57,10 @@
 // It ensures necessary synchronization between
 // java object creation and finalization.
 void __tsan_java_finalize() INTERFACE_ATTRIBUTE;
+// Finds the first allocated memory block in the [*from_ptr, to) range, saves
+// its address in *from_ptr and returns its size. Returns 0 if there are no
+// allocated memory blocks in the range.
+jptr __tsan_java_find(jptr *from_ptr, jptr to) INTERFACE_ATTRIBUTE;
 
 // Mutex lock.
 // Addr is any unique address associated with the mutex.
diff --git a/lib/tsan/rtl/tsan_platform_mac.cc b/lib/tsan/rtl/tsan_platform_mac.cc
index 25dd241..b8d3d55 100644
--- a/lib/tsan/rtl/tsan_platform_mac.cc
+++ b/lib/tsan/rtl/tsan_platform_mac.cc
@@ -207,7 +207,7 @@
       ThreadState *parent_thread_state = nullptr;  // No parent.
       int tid = ThreadCreate(parent_thread_state, 0, (uptr)thread, true);
       CHECK_NE(tid, 0);
-      ThreadStart(thr, tid, GetTid());
+      ThreadStart(thr, tid, GetTid(), /*workerthread*/ true);
     }
   } else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) {
     if (thread == pthread_self()) {
diff --git a/lib/tsan/rtl/tsan_report.cc b/lib/tsan/rtl/tsan_report.cc
index 156876e..7de0084 100644
--- a/lib/tsan/rtl/tsan_report.cc
+++ b/lib/tsan/rtl/tsan_report.cc
@@ -90,6 +90,8 @@
     return "heap-use-after-free";
   if (typ == ReportTypeVptrUseAfterFree)
     return "heap-use-after-free (virtual call vs free)";
+  if (typ == ReportTypeExternalRace)
+    return "race on a library object";
   if (typ == ReportTypeThreadLeak)
     return "thread leak";
   if (typ == ReportTypeMutexDestroyLocked)
@@ -152,14 +154,25 @@
                 : (write ? "Previous write" : "Previous read"));
 }
 
+static const char *ExternalMopDesc(bool first, bool write) {
+  return first ? (write ? "Mutating" : "Read-only")
+               : (write ? "Previous mutating" : "Previous read-only");
+}
+
 static void PrintMop(const ReportMop *mop, bool first) {
   Decorator d;
   char thrbuf[kThreadBufSize];
   Printf("%s", d.Access());
-  Printf("  %s of size %d at %p by %s",
-      MopDesc(first, mop->write, mop->atomic),
-      mop->size, (void*)mop->addr,
-      thread_name(thrbuf, mop->tid));
+  const char *object_type = GetObjectTypeFromTag(mop->external_tag);
+  if (!object_type) {
+    Printf("  %s of size %d at %p by %s",
+           MopDesc(first, mop->write, mop->atomic), mop->size,
+           (void *)mop->addr, thread_name(thrbuf, mop->tid));
+  } else {
+    Printf("  %s access of object %s at %p by %s",
+           ExternalMopDesc(first, mop->write), object_type,
+           (void *)mop->addr, thread_name(thrbuf, mop->tid));
+  }
   PrintMutexSet(mop->mset);
   Printf(":\n");
   Printf("%s", d.EndAccess());
@@ -183,9 +196,16 @@
              global.module_offset);
   } else if (loc->type == ReportLocationHeap) {
     char thrbuf[kThreadBufSize];
-    Printf("  Location is heap block of size %zu at %p allocated by %s:\n",
-           loc->heap_chunk_size, loc->heap_chunk_start,
-           thread_name(thrbuf, loc->tid));
+    const char *object_type = GetObjectTypeFromTag(loc->external_tag);
+    if (!object_type) {
+      Printf("  Location is heap block of size %zu at %p allocated by %s:\n",
+             loc->heap_chunk_size, loc->heap_chunk_start,
+             thread_name(thrbuf, loc->tid));
+    } else {
+      Printf("  Location is %s object of size %zu at %p allocated by %s:\n",
+             object_type, loc->heap_chunk_size, loc->heap_chunk_start,
+             thread_name(thrbuf, loc->tid));
+    }
     print_stack = true;
   } else if (loc->type == ReportLocationStack) {
     Printf("  Location is stack of %s.\n\n", thread_name(thrbuf, loc->tid));
@@ -235,9 +255,15 @@
   if (rt->name && rt->name[0] != '\0')
     Printf(" '%s'", rt->name);
   char thrbuf[kThreadBufSize];
-  Printf(" (tid=%zu, %s) created by %s",
-    rt->os_id, rt->running ? "running" : "finished",
-    thread_name(thrbuf, rt->parent_tid));
+  const char *thread_status = rt->running ? "running" : "finished";
+  if (rt->workerthread) {
+    Printf(" (tid=%zu, %s) is a GCD worker thread\n", rt->os_id, thread_status);
+    Printf("\n");
+    Printf("%s", d.EndThreadDescription());
+    return;
+  }
+  Printf(" (tid=%zu, %s) created by %s", rt->os_id, thread_status,
+         thread_name(thrbuf, rt->parent_tid));
   if (rt->stack)
     Printf(" at:");
   Printf("\n");
@@ -358,6 +384,8 @@
       ReportErrorSummary(rep_typ_str, frame->info);
   }
 
+  if (common_flags()->print_module_map == 2) PrintModuleMap();
+
   Printf("==================\n");
 }
 
diff --git a/lib/tsan/rtl/tsan_report.h b/lib/tsan/rtl/tsan_report.h
index d0b9d74..8d8ae0f 100644
--- a/lib/tsan/rtl/tsan_report.h
+++ b/lib/tsan/rtl/tsan_report.h
@@ -24,6 +24,7 @@
   ReportTypeVptrRace,
   ReportTypeUseAfterFree,
   ReportTypeVptrUseAfterFree,
+  ReportTypeExternalRace,
   ReportTypeThreadLeak,
   ReportTypeMutexDestroyLocked,
   ReportTypeMutexDoubleLock,
@@ -56,6 +57,7 @@
   int size;
   bool write;
   bool atomic;
+  uptr external_tag;
   Vector<ReportMopMutex> mset;
   ReportStack *stack;
 
@@ -75,6 +77,7 @@
   DataInfo global;
   uptr heap_chunk_start;
   uptr heap_chunk_size;
+  uptr external_tag;
   int tid;
   int fd;
   bool suppressable;
@@ -89,8 +92,9 @@
   int id;
   uptr os_id;
   bool running;
+  bool workerthread;
   char *name;
-  int parent_tid;
+  u32 parent_tid;
   ReportStack *stack;
 };
 
diff --git a/lib/tsan/rtl/tsan_rtl.cc b/lib/tsan/rtl/tsan_rtl.cc
index 804f3cf..bc5991c 100644
--- a/lib/tsan/rtl/tsan_rtl.cc
+++ b/lib/tsan/rtl/tsan_rtl.cc
@@ -381,7 +381,7 @@
   // Initialize thread 0.
   int tid = ThreadCreate(thr, 0, 0, true);
   CHECK_EQ(tid, 0);
-  ThreadStart(thr, tid, internal_getpid());
+  ThreadStart(thr, tid, GetTid(), /*workerthread*/ false);
 #if TSAN_CONTAINS_UBSAN
   __ubsan::InitAsPlugin();
 #endif
@@ -404,6 +404,8 @@
 int Finalize(ThreadState *thr) {
   bool failed = false;
 
+  if (common_flags()->print_module_map == 1) PrintModuleMap();
+
   if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1)
     SleepForMillis(flags()->atexit_sleep_ms);
 
diff --git a/lib/tsan/rtl/tsan_rtl.h b/lib/tsan/rtl/tsan_rtl.h
index 8fdb6a9..8853941 100644
--- a/lib/tsan/rtl/tsan_rtl.h
+++ b/lib/tsan/rtl/tsan_rtl.h
@@ -410,6 +410,7 @@
   bool is_dead;
   bool is_freeing;
   bool is_vptr_access;
+  uptr external_tag;
   const uptr stk_addr;
   const uptr stk_size;
   const uptr tls_addr;
@@ -564,7 +565,7 @@
   explicit ScopedReport(ReportType typ);
   ~ScopedReport();
 
-  void AddMemoryAccess(uptr addr, Shadow s, StackTrace stack,
+  void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, StackTrace stack,
                        const MutexSet *mset);
   void AddStack(StackTrace stack, bool suppressable = false);
   void AddThread(const ThreadContext *tctx, bool suppressable = false);
@@ -590,6 +591,7 @@
   void operator = (const ScopedReport&);
 };
 
+ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack);
 void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
                   MutexSet *mset);
 
@@ -639,6 +641,8 @@
 bool IsExpectedReport(uptr addr, uptr size);
 void PrintMatchedBenignRaces();
 
+const char *GetObjectTypeFromTag(uptr tag);
+
 #if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 1
 # define DPrintf Printf
 #else
@@ -712,7 +716,7 @@
 void FuncExit(ThreadState *thr);
 
 int ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached);
-void ThreadStart(ThreadState *thr, int tid, uptr os_id);
+void ThreadStart(ThreadState *thr, int tid, uptr os_id, bool workerthread);
 void ThreadFinish(ThreadState *thr);
 int ThreadTid(ThreadState *thr, uptr pc, uptr uid);
 void ThreadJoin(ThreadState *thr, uptr pc, int tid);
diff --git a/lib/tsan/rtl/tsan_rtl_report.cc b/lib/tsan/rtl/tsan_rtl_report.cc
index bc8944f..31b9e97 100644
--- a/lib/tsan/rtl/tsan_rtl_report.cc
+++ b/lib/tsan/rtl/tsan_rtl_report.cc
@@ -164,8 +164,8 @@
   (*rs)->suppressable = suppressable;
 }
 
-void ScopedReport::AddMemoryAccess(uptr addr, Shadow s, StackTrace stack,
-                                   const MutexSet *mset) {
+void ScopedReport::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s,
+                                   StackTrace stack, const MutexSet *mset) {
   void *mem = internal_alloc(MBlockReportMop, sizeof(ReportMop));
   ReportMop *mop = new(mem) ReportMop;
   rep_->mops.PushBack(mop);
@@ -175,6 +175,7 @@
   mop->write = s.IsWrite();
   mop->atomic = s.IsAtomic();
   mop->stack = SymbolizeStack(stack);
+  mop->external_tag = external_tag;
   if (mop->stack)
     mop->stack->suppressable = true;
   for (uptr i = 0; i < mset->Size(); i++) {
@@ -202,6 +203,7 @@
   rt->running = (tctx->status == ThreadStatusRunning);
   rt->name = internal_strdup(tctx->name);
   rt->parent_tid = tctx->parent_tid;
+  rt->workerthread = tctx->workerthread;
   rt->stack = 0;
   rt->stack = SymbolizeStackId(tctx->creation_stack_id);
   if (rt->stack)
@@ -336,6 +338,7 @@
     ReportLocation *loc = ReportLocation::New(ReportLocationHeap);
     loc->heap_chunk_start = (uptr)allocator()->GetBlockBegin((void *)addr);
     loc->heap_chunk_size = b->siz;
+    loc->external_tag = b->tag;
     loc->tid = tctx ? tctx->tid : b->tid;
     loc->stack = SymbolizeStackId(b->stk);
     rep_->locs.PushBack(loc);
@@ -622,6 +625,8 @@
     typ = ReportTypeVptrRace;
   else if (freed)
     typ = ReportTypeUseAfterFree;
+  else if (thr->external_tag > 0)
+    typ = ReportTypeExternalRace;
 
   if (IsFiredSuppression(ctx, typ, addr))
     return;
@@ -650,7 +655,8 @@
   ScopedReport rep(typ);
   for (uptr i = 0; i < kMop; i++) {
     Shadow s(thr->racy_state[i]);
-    rep.AddMemoryAccess(addr, s, traces[i], i == 0 ? &thr->mset : mset2);
+    rep.AddMemoryAccess(addr, thr->external_tag, s, traces[i],
+                        i == 0 ? &thr->mset : mset2);
   }
 
   for (uptr i = 0; i < kMop; i++) {
diff --git a/lib/tsan/rtl/tsan_rtl_thread.cc b/lib/tsan/rtl/tsan_rtl_thread.cc
index 5b17dc6..7357d97 100644
--- a/lib/tsan/rtl/tsan_rtl_thread.cc
+++ b/lib/tsan/rtl/tsan_rtl_thread.cc
@@ -236,7 +236,7 @@
   return tid;
 }
 
-void ThreadStart(ThreadState *thr, int tid, uptr os_id) {
+void ThreadStart(ThreadState *thr, int tid, uptr os_id, bool workerthread) {
   uptr stk_addr = 0;
   uptr stk_size = 0;
   uptr tls_addr = 0;
@@ -266,7 +266,7 @@
 
   ThreadRegistry *tr = ctx->thread_registry;
   OnStartedArgs args = { thr, stk_addr, stk_size, tls_addr, tls_size };
-  tr->StartThread(tid, os_id, &args);
+  tr->StartThread(tid, os_id, workerthread, &args);
 
   tr->Lock();
   thr->tctx = (ThreadContext*)tr->GetThreadLocked(tid);
diff --git a/lib/tsan/rtl/tsan_suppressions.cc b/lib/tsan/rtl/tsan_suppressions.cc
index bfb64e0..e39702b 100644
--- a/lib/tsan/rtl/tsan_suppressions.cc
+++ b/lib/tsan/rtl/tsan_suppressions.cc
@@ -74,6 +74,8 @@
     return kSuppressionRace;
   else if (typ == ReportTypeVptrUseAfterFree)
     return kSuppressionRace;
+  else if (typ == ReportTypeExternalRace)
+    return kSuppressionRace;
   else if (typ == ReportTypeThreadLeak)
     return kSuppressionThread;
   else if (typ == ReportTypeMutexDestroyLocked)
diff --git a/lib/tsan/rtl/tsan_sync.cc b/lib/tsan/rtl/tsan_sync.cc
index 44c6a26..2be0474 100644
--- a/lib/tsan/rtl/tsan_sync.cc
+++ b/lib/tsan/rtl/tsan_sync.cc
@@ -64,6 +64,7 @@
   u32 idx = block_alloc_.Alloc(&thr->proc()->block_cache);
   MBlock *b = block_alloc_.Map(idx);
   b->siz = sz;
+  b->tag = 0;
   b->tid = thr->tid;
   b->stk = CurrentStackId(thr, pc);
   u32 *meta = MemToMeta(p);
diff --git a/lib/tsan/tests/CMakeLists.txt b/lib/tsan/tests/CMakeLists.txt
index 4587e47..87e1417 100644
--- a/lib/tsan/tests/CMakeLists.txt
+++ b/lib/tsan/tests/CMakeLists.txt
@@ -76,14 +76,18 @@
           ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
         list(APPEND TEST_OBJECTS lib${TSAN_TEST_RUNTIME}.a)
         list(APPEND TEST_DEPS ${TSAN_TEST_RUNTIME})
+
+        add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+        add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+
         # Intentionally do *not* link with `-fsanitize=thread`. We already link
         # against a static version of the runtime, and we don't want the dynamic
         # one.
         add_compiler_rt_test(TsanUnitTests "${testname}-${arch}-Test"
                 OBJECTS ${TEST_OBJECTS}
                 DEPS ${TEST_DEPS}
-                LINK_FLAGS ${TARGET_LINK_FLAGS} ${DARWIN_osx_LINKFLAGS}
-                           -lc++)
+                LINK_FLAGS ${TARGET_LINK_FLAGS} ${DARWIN_osx_LINK_FLAGS}
+                           ${WEAK_SYMBOL_LINK_FLAGS} -lc++)
       endif()
     endforeach()
   endif()
diff --git a/lib/tsan/tests/rtl/tsan_test_util_posix.cc b/lib/tsan/tests/rtl/tsan_test_util_posix.cc
index 01e3f7c..834a271 100644
--- a/lib/tsan/tests/rtl/tsan_test_util_posix.cc
+++ b/lib/tsan/tests/rtl/tsan_test_util_posix.cc
@@ -60,11 +60,11 @@
     if (rep->typ != expect_report_type) {
       printf("Expected report of type %d, got type %d\n",
              (int)expect_report_type, (int)rep->typ);
-      EXPECT_FALSE("Wrong report type");
+      EXPECT_TRUE(false) << "Wrong report type";
       return false;
     }
   } else {
-    EXPECT_FALSE("Unexpected report");
+    EXPECT_TRUE(false) << "Unexpected report";
     return false;
   }
   expect_report_reported = true;
@@ -323,7 +323,7 @@
   }
   if (expect_report && !expect_report_reported) {
     printf("Missed expected report of type %d\n", (int)ev->report_type);
-    EXPECT_FALSE("Missed expected race");
+    EXPECT_TRUE(false) << "Missed expected race";
   }
   expect_report = false;
 }
diff --git a/lib/ubsan/CMakeLists.txt b/lib/ubsan/CMakeLists.txt
index 036c65a..f35b40f 100644
--- a/lib/ubsan/CMakeLists.txt
+++ b/lib/ubsan/CMakeLists.txt
@@ -56,6 +56,9 @@
       SOURCES ${UBSAN_STANDALONE_SOURCES}
       CFLAGS ${UBSAN_STANDALONE_CFLAGS})
 
+    add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+    add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+
     add_compiler_rt_runtime(clang_rt.ubsan
       SHARED
       OS ${SANITIZER_COMMON_SUPPORTED_OS}
@@ -64,6 +67,7 @@
                   RTUbsan_standalone
                   RTSanitizerCommon
                   RTSanitizerCommonLibc
+      LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
       PARENT_TARGET ubsan)
   endif()
 
@@ -86,6 +90,35 @@
     ARCHS ${UBSAN_COMMON_SUPPORTED_ARCH}
     SOURCES ${UBSAN_CXX_SOURCES} CFLAGS ${UBSAN_CXXFLAGS})
 
+  if (WIN32)
+    add_compiler_rt_object_libraries(UbsanWeakInterception
+      ${SANITIZER_COMMON_SUPPORTED_OS}
+      ARCHS ${UBSAN_SUPPORTED_ARCH}
+      SOURCES ubsan_win_weak_interception.cc
+      CFLAGS ${UBSAN_CFLAGS} -DSANITIZER_DYNAMIC
+      DEFS ${UBSAN_COMMON_DEFINITIONS})
+
+    add_compiler_rt_object_libraries(UbsanDllThunk
+      ${SANITIZER_COMMON_SUPPORTED_OS}
+      ARCHS ${UBSAN_SUPPORTED_ARCH}
+      SOURCES ubsan_win_dll_thunk.cc
+      CFLAGS ${UBSAN_CFLAGS} -DSANITIZER_DLL_THUNK
+      DEFS ${UBSAN_COMMON_DEFINITIONS})
+
+    set(DYNAMIC_RUNTIME_THUNK_CFLAGS "-DSANITIZER_DYNAMIC_RUNTIME_THUNK")
+    if(MSVC)
+      list(APPEND DYNAMIC_RUNTIME_THUNK_CFLAGS "-Zl")
+    elseif(CMAKE_C_COMPILER_ID MATCHES Clang)
+      list(APPEND DYNAMIC_RUNTIME_THUNK_CFLAGS "-nodefaultlibs")
+    endif()
+    add_compiler_rt_object_libraries(UbsanDynamicRuntimeThunk
+      ${SANITIZER_COMMON_SUPPORTED_OS}
+      ARCHS ${UBSAN_SUPPORTED_ARCH}
+      SOURCES ubsan_win_dynamic_runtime_thunk.cc
+      CFLAGS ${UBSAN_CFLAGS} ${DYNAMIC_RUNTIME_THUNK_CFLAGS}
+      DEFS ${UBSAN_COMMON_DEFINITIONS})
+  endif()
+
   if(COMPILER_RT_HAS_UBSAN)
     # Initializer of standalone UBSan runtime.
     add_compiler_rt_object_libraries(RTUbsan_standalone
diff --git a/lib/ubsan/ubsan_diag.cc b/lib/ubsan/ubsan_diag.cc
index d842694..c531c5f 100644
--- a/lib/ubsan/ubsan_diag.cc
+++ b/lib/ubsan/ubsan_diag.cc
@@ -157,7 +157,7 @@
                            common_flags()->strip_path_prefix);
     else if (Info.module)
       RenderModuleLocation(Buffer, Info.module, Info.module_offset,
-                           common_flags()->strip_path_prefix);
+                           Info.module_arch, common_flags()->strip_path_prefix);
     else
       Buffer->append("%p", Info.address);
     return;
diff --git a/lib/ubsan/ubsan_flags.cc b/lib/ubsan/ubsan_flags.cc
index e77ba55..3d404c1 100644
--- a/lib/ubsan/ubsan_flags.cc
+++ b/lib/ubsan/ubsan_flags.cc
@@ -67,22 +67,8 @@
 
 }  // namespace __ubsan
 
-extern "C" {
-
-#if !SANITIZER_SUPPORTS_WEAK_HOOKS
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-const char *__ubsan_default_options() { return ""; }
-#endif
-
-#if SANITIZER_WINDOWS
-const char *__ubsan_default_default_options() { return ""; }
-# ifdef _WIN64
-#  pragma comment(linker, "/alternatename:__ubsan_default_options=__ubsan_default_default_options")
-# else
-#  pragma comment(linker, "/alternatename:___ubsan_default_options=___ubsan_default_default_options")
-# endif
-#endif
-
-}  // extern "C"
+SANITIZER_INTERFACE_WEAK_DEF(const char *, __ubsan_default_options, void) {
+  return "";
+}
 
 #endif  // CAN_SANITIZE_UB
diff --git a/lib/ubsan/ubsan_handlers.cc b/lib/ubsan/ubsan_handlers.cc
index 4ede388..6ffffae 100644
--- a/lib/ubsan/ubsan_handlers.cc
+++ b/lib/ubsan/ubsan_handlers.cc
@@ -45,10 +45,11 @@
                                    ReportOptions Opts) {
   Location Loc = Data->Loc.acquire();
 
+  uptr Alignment = (uptr)1 << Data->LogAlignment;
   ErrorType ET;
   if (!Pointer)
     ET = ErrorType::NullPointerUse;
-  else if (Data->Alignment && (Pointer & (Data->Alignment - 1)))
+  else if (Pointer & (Alignment - 1))
     ET = ErrorType::MisalignedPointerUse;
   else
     ET = ErrorType::InsufficientObjectSize;
@@ -74,8 +75,8 @@
   case ErrorType::MisalignedPointerUse:
     Diag(Loc, DL_Error, "%0 misaligned address %1 for type %3, "
                         "which requires %2 byte alignment")
-        << TypeCheckKinds[Data->TypeCheckKind] << (void *)Pointer
-        << Data->Alignment << Data->Type;
+        << TypeCheckKinds[Data->TypeCheckKind] << (void *)Pointer << Alignment
+        << Data->Type;
     break;
   case ErrorType::InsufficientObjectSize:
     Diag(Loc, DL_Error, "%0 address %1 with insufficient space "
@@ -90,13 +91,13 @@
     Diag(Pointer, DL_Note, "pointer points here");
 }
 
-void __ubsan::__ubsan_handle_type_mismatch(TypeMismatchData *Data,
-                                           ValueHandle Pointer) {
+void __ubsan::__ubsan_handle_type_mismatch_v1(TypeMismatchData *Data,
+                                              ValueHandle Pointer) {
   GET_REPORT_OPTIONS(false);
   handleTypeMismatchImpl(Data, Pointer, Opts);
 }
-void __ubsan::__ubsan_handle_type_mismatch_abort(TypeMismatchData *Data,
-                                                 ValueHandle Pointer) {
+void __ubsan::__ubsan_handle_type_mismatch_v1_abort(TypeMismatchData *Data,
+                                                    ValueHandle Pointer) {
   GET_REPORT_OPTIONS(true);
   handleTypeMismatchImpl(Data, Pointer, Opts);
   Die();
diff --git a/lib/ubsan/ubsan_handlers.h b/lib/ubsan/ubsan_handlers.h
index e0cfd5b..350eb91 100644
--- a/lib/ubsan/ubsan_handlers.h
+++ b/lib/ubsan/ubsan_handlers.h
@@ -20,7 +20,7 @@
 struct TypeMismatchData {
   SourceLocation Loc;
   const TypeDescriptor &Type;
-  uptr Alignment;
+  unsigned char LogAlignment;
   unsigned char TypeCheckKind;
 };
 
@@ -37,7 +37,7 @@
 /// \brief Handle a runtime type check failure, caused by either a misaligned
 /// pointer, a null pointer, or a pointer to insufficient storage for the
 /// type.
-RECOVERABLE(type_mismatch, TypeMismatchData *Data, ValueHandle Pointer)
+RECOVERABLE(type_mismatch_v1, TypeMismatchData *Data, ValueHandle Pointer)
 
 struct OverflowData {
   SourceLocation Loc;
diff --git a/lib/ubsan/ubsan_interface.inc b/lib/ubsan/ubsan_interface.inc
new file mode 100644
index 0000000..75f080d
--- /dev/null
+++ b/lib/ubsan/ubsan_interface.inc
@@ -0,0 +1,43 @@
+//===-- ubsan_interface.inc -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Ubsan interface list.
+//===----------------------------------------------------------------------===//
+INTERFACE_FUNCTION(__ubsan_handle_add_overflow)
+INTERFACE_FUNCTION(__ubsan_handle_add_overflow_abort)
+INTERFACE_FUNCTION(__ubsan_handle_builtin_unreachable)
+INTERFACE_FUNCTION(__ubsan_handle_cfi_check_fail)
+INTERFACE_FUNCTION(__ubsan_handle_cfi_check_fail_abort)
+INTERFACE_FUNCTION(__ubsan_handle_divrem_overflow)
+INTERFACE_FUNCTION(__ubsan_handle_divrem_overflow_abort)
+INTERFACE_FUNCTION(__ubsan_handle_float_cast_overflow)
+INTERFACE_FUNCTION(__ubsan_handle_float_cast_overflow_abort)
+INTERFACE_FUNCTION(__ubsan_handle_function_type_mismatch)
+INTERFACE_FUNCTION(__ubsan_handle_function_type_mismatch_abort)
+INTERFACE_FUNCTION(__ubsan_handle_load_invalid_value)
+INTERFACE_FUNCTION(__ubsan_handle_load_invalid_value_abort)
+INTERFACE_FUNCTION(__ubsan_handle_missing_return)
+INTERFACE_FUNCTION(__ubsan_handle_mul_overflow)
+INTERFACE_FUNCTION(__ubsan_handle_mul_overflow_abort)
+INTERFACE_FUNCTION(__ubsan_handle_negate_overflow)
+INTERFACE_FUNCTION(__ubsan_handle_negate_overflow_abort)
+INTERFACE_FUNCTION(__ubsan_handle_nonnull_arg)
+INTERFACE_FUNCTION(__ubsan_handle_nonnull_arg_abort)
+INTERFACE_FUNCTION(__ubsan_handle_nonnull_return)
+INTERFACE_FUNCTION(__ubsan_handle_nonnull_return_abort)
+INTERFACE_FUNCTION(__ubsan_handle_out_of_bounds)
+INTERFACE_FUNCTION(__ubsan_handle_out_of_bounds_abort)
+INTERFACE_FUNCTION(__ubsan_handle_shift_out_of_bounds)
+INTERFACE_FUNCTION(__ubsan_handle_shift_out_of_bounds_abort)
+INTERFACE_FUNCTION(__ubsan_handle_sub_overflow)
+INTERFACE_FUNCTION(__ubsan_handle_sub_overflow_abort)
+INTERFACE_FUNCTION(__ubsan_handle_type_mismatch_v1)
+INTERFACE_FUNCTION(__ubsan_handle_type_mismatch_v1_abort)
+INTERFACE_FUNCTION(__ubsan_handle_vla_bound_not_positive)
+INTERFACE_FUNCTION(__ubsan_handle_vla_bound_not_positive_abort)
+INTERFACE_WEAK_FUNCTION(__ubsan_default_options)
diff --git a/lib/ubsan/ubsan_win_dll_thunk.cc b/lib/ubsan/ubsan_win_dll_thunk.cc
new file mode 100644
index 0000000..a1d0dbd
--- /dev/null
+++ b/lib/ubsan/ubsan_win_dll_thunk.cc
@@ -0,0 +1,21 @@
+//===-- ubsan_win_dll_thunk.cc --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a family of thunks that should be statically linked into
+// the DLLs that have instrumentation in order to delegate the calls to the
+// shared runtime that lives in the main binary.
+// See https://github.com/google/sanitizers/issues/209 for the details.
+//===----------------------------------------------------------------------===//
+#ifdef SANITIZER_DLL_THUNK
+#include "sanitizer_common/sanitizer_win_dll_thunk.h"
+// Ubsan interface functions.
+#define INTERFACE_FUNCTION(Name) INTERCEPT_SANITIZER_FUNCTION(Name)
+#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
+#include "ubsan_interface.inc"
+#endif // SANITIZER_DLL_THUNK
diff --git a/lib/ubsan/ubsan_win_dynamic_runtime_thunk.cc b/lib/ubsan/ubsan_win_dynamic_runtime_thunk.cc
new file mode 100644
index 0000000..c9b74a4
--- /dev/null
+++ b/lib/ubsan/ubsan_win_dynamic_runtime_thunk.cc
@@ -0,0 +1,21 @@
+//===-- ubsan_win_dynamic_runtime_thunk.cc --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things that need to be present in the application modules
+// to interact with Ubsan, when it is included in a dll.
+//
+//===----------------------------------------------------------------------===//
+#ifdef SANITIZER_DYNAMIC_RUNTIME_THUNK
+#define SANITIZER_IMPORT_INTERFACE 1
+#include "sanitizer_common/sanitizer_win_defs.h"
+// Define weak alias for all weak functions imported from ubsan.
+#define INTERFACE_FUNCTION(Name)
+#define INTERFACE_WEAK_FUNCTION(Name) WIN_WEAK_IMPORT_DEF(Name)
+#include "ubsan_interface.inc"
+#endif // SANITIZER_DYNAMIC_RUNTIME_THUNK
diff --git a/lib/ubsan/ubsan_win_weak_interception.cc b/lib/ubsan/ubsan_win_weak_interception.cc
new file mode 100644
index 0000000..353719e
--- /dev/null
+++ b/lib/ubsan/ubsan_win_weak_interception.cc
@@ -0,0 +1,23 @@
+//===-- ubsan_win_weak_interception.cc ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This module should be included in Ubsan when it is implemented as a shared
+// library on Windows (dll), in order to delegate the calls of weak functions to
+// the implementation in the main executable when a strong definition is
+// provided.
+//===----------------------------------------------------------------------===//
+#ifdef SANITIZER_DYNAMIC
+#include "sanitizer_common/sanitizer_win_weak_interception.h"
+#include "ubsan_flags.h"
+// Check if strong definitions for weak functions are present in the main
+// executable. If that is the case, override dll functions to point to strong
+// implementations.
+#define INTERFACE_FUNCTION(Name)
+#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
+#include "ubsan_interface.inc"
+#endif // SANITIZER_DYNAMIC
diff --git a/lib/ubsan/weak_symbols.txt b/lib/ubsan/weak_symbols.txt
new file mode 100644
index 0000000..69e1bc1
--- /dev/null
+++ b/lib/ubsan/weak_symbols.txt
@@ -0,0 +1 @@
+___ubsan_default_options
diff --git a/lib/xray/CMakeLists.txt b/lib/xray/CMakeLists.txt
index 9c7cf6c..72caa9f 100644
--- a/lib/xray/CMakeLists.txt
+++ b/lib/xray/CMakeLists.txt
@@ -1,15 +1,15 @@
 # Build for the XRay runtime support library.
 
-# Core XRay runtime library implementation files.
+# XRay runtime library implementation files.
 set(XRAY_SOURCES
+  xray_inmemory_log.cc
   xray_init.cc
-  xray_interface.cc
   xray_flags.cc
-  xray_inmemory_log.cc)
-
-# XRay flight data recorder (FDR) implementation files.
-set(XRAY_FDR_SOURCES
-  xray_buffer_queue.cc)
+  xray_interface.cc
+  xray_buffer_queue.cc
+  xray_log_interface.cc
+  xray_fdr_logging.cc
+  xray_utils.cc)
 
 set(x86_64_SOURCES
     xray_x86_64.cc
@@ -21,11 +21,38 @@
     xray_trampoline_arm.S
     ${XRAY_SOURCES})
 
-set(armhf_SOURCES ${arm_SOURCES})
+set(armhf_SOURCES
+  ${arm_SOURCES})
 
 set(aarch64_SOURCES
-        xray_AArch64.cc
-        xray_trampoline_AArch64.S
+  xray_AArch64.cc
+  xray_trampoline_AArch64.S
+  ${XRAY_SOURCES})
+
+set(mips_SOURCES
+    xray_mips.cc
+    xray_trampoline_mips.S
+    ${XRAY_SOURCES})
+
+set(mipsel_SOURCES
+    xray_mips.cc
+    xray_trampoline_mips.S
+    ${XRAY_SOURCES})
+
+set(mips64_SOURCES
+    xray_mips64.cc
+    xray_trampoline_mips64.S
+    ${XRAY_SOURCES})
+
+set(mips64el_SOURCES
+    xray_mips64.cc
+    xray_trampoline_mips64.S
+    ${XRAY_SOURCES})
+
+set(powerpc64le_SOURCES
+        xray_powerpc64.cc
+        xray_trampoline_powerpc64.cc
+        xray_trampoline_powerpc64_asm.S
         ${XRAY_SOURCES})
 
 include_directories(..)
@@ -41,13 +68,7 @@
   SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS}
   DEFS ${XRAY_COMMON_DEFINITIONS})
 
-add_compiler_rt_object_libraries(RTXrayFDR
-  ARCHS ${XRAY_SUPPORTED_ARCH}
-  SOURCES ${XRAY_FDR_SOURCES} CFLAGS ${XRAY_CFLAGS}
-  DEFS ${XRAY_COMMON_DEFINITIONS})
-
 add_compiler_rt_component(xray)
-add_compiler_rt_component(xray-fdr)
 
 set(XRAY_COMMON_RUNTIME_OBJECT_LIBS
     RTSanitizerCommon
@@ -63,14 +84,6 @@
      DEFS ${XRAY_COMMON_DEFINITIONS}
      OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS}
      PARENT_TARGET xray)
-   add_compiler_rt_runtime(clang_rt.xray-fdr
-     STATIC
-     ARCHS ${arch}
-     SOURCES ${XRAY_FDR_SOURCES}
-     CFLAGS ${XRAY_CFLAGS}
-     DEFS ${XRAY_COMMON_DEFINITIONS}
-     OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS}
-     PARENT_TARGET xray-fdr)
   endif()
 endforeach()
 
diff --git a/lib/xray/tests/CMakeLists.txt b/lib/xray/tests/CMakeLists.txt
index 6cb1793..43878cb 100644
--- a/lib/xray/tests/CMakeLists.txt
+++ b/lib/xray/tests/CMakeLists.txt
@@ -8,14 +8,15 @@
   ${COMPILER_RT_UNITTEST_CFLAGS}
   ${COMPILER_RT_GTEST_CFLAGS}
   -I${COMPILER_RT_SOURCE_DIR}/include
-  -I${COMPILER_RT_SOURCE_DIR}/lib/xray)
+  -I${COMPILER_RT_SOURCE_DIR}/lib/xray
+  -I${COMPILER_RT_SOURCE_DIR}/lib)
 
 macro(xray_compile obj_list source arch)
   get_filename_component(basename ${source} NAME)
   set(output_obj "${basename}.${arch}.o")
   get_target_flags_for_arch(${arch} TARGET_CFLAGS)
   if(NOT COMPILER_RT_STANDALONE_BUILD)
-    list(APPEND COMPILE_DEPS gtest_main xray-fdr)
+    list(APPEND COMPILE_DEPS gtest_main xray)
   endif()
   clang_compile(${output_obj} ${source}
     CFLAGS ${XRAY_UNITTEST_CFLAGS} ${TARGET_CFLAGS}
@@ -38,16 +39,17 @@
       get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
       set(TEST_DEPS ${TEST_OBJECTS})
       if(NOT COMPILER_RT_STANDALONE_BUILD)
-        list(APPEND TEST_DEPS gtest_main xray-fdr)
+        list(APPEND TEST_DEPS gtest_main xray)
       endif()
       if(NOT APPLE)
-        add_compiler_rt_test(XRayUnitTests ${testname}
+        add_compiler_rt_test(XRayUnitTests ${testname}-${arch}
           OBJECTS ${TEST_OBJECTS}
           DEPS ${TEST_DEPS}
           LINK_FLAGS ${TARGET_LINK_FLAGS}
           -lstdc++ -lm ${CMAKE_THREAD_LIBS_INIT}
           -lpthread
-          -L${COMPILER_RT_LIBRARY_OUTPUT_DIR} -lclang_rt.xray-fdr-${arch})
+          -L${COMPILER_RT_LIBRARY_OUTPUT_DIR} -lclang_rt.xray-${arch}
+          -latomic -ldl -lrt)
       endif()
       # FIXME: Figure out how to run even just the unit tests on APPLE.
     endforeach()
diff --git a/lib/xray/tests/unit/CMakeLists.txt b/lib/xray/tests/unit/CMakeLists.txt
index 3e5412d..62d01f2 100644
--- a/lib/xray/tests/unit/CMakeLists.txt
+++ b/lib/xray/tests/unit/CMakeLists.txt
@@ -1,2 +1,4 @@
 add_xray_unittest(XRayBufferQueueTest SOURCES
   buffer_queue_test.cc xray_unit_test_main.cc)
+add_xray_unittest(XRayFDRLoggingTest SOURCES
+  fdr_logging_test.cc xray_unit_test_main.cc)
diff --git a/lib/xray/tests/unit/buffer_queue_test.cc b/lib/xray/tests/unit/buffer_queue_test.cc
index fd7d5af..4db762d 100644
--- a/lib/xray/tests/unit/buffer_queue_test.cc
+++ b/lib/xray/tests/unit/buffer_queue_test.cc
@@ -14,54 +14,73 @@
 #include "gtest/gtest.h"
 
 #include <future>
+#include <system_error>
 #include <unistd.h>
 
 namespace __xray {
 
 static constexpr size_t kSize = 4096;
 
-TEST(BufferQueueTest, API) { BufferQueue Buffers(kSize, 1); }
+TEST(BufferQueueTest, API) {
+  bool Success = false;
+  BufferQueue Buffers(kSize, 1, Success);
+  ASSERT_TRUE(Success);
+}
 
 TEST(BufferQueueTest, GetAndRelease) {
-  BufferQueue Buffers(kSize, 1);
+  bool Success = false;
+  BufferQueue Buffers(kSize, 1, Success);
+  ASSERT_TRUE(Success);
   BufferQueue::Buffer Buf;
-  ASSERT_FALSE(Buffers.getBuffer(Buf));
+  ASSERT_EQ(Buffers.getBuffer(Buf), std::error_code());
   ASSERT_NE(nullptr, Buf.Buffer);
-  ASSERT_FALSE(Buffers.releaseBuffer(Buf));
+  ASSERT_EQ(Buffers.releaseBuffer(Buf), std::error_code());
   ASSERT_EQ(nullptr, Buf.Buffer);
 }
 
 TEST(BufferQueueTest, GetUntilFailed) {
-  BufferQueue Buffers(kSize, 1);
+  bool Success = false;
+  BufferQueue Buffers(kSize, 1, Success);
+  ASSERT_TRUE(Success);
   BufferQueue::Buffer Buf0;
-  EXPECT_FALSE(Buffers.getBuffer(Buf0));
+  EXPECT_EQ(Buffers.getBuffer(Buf0), std::error_code());
   BufferQueue::Buffer Buf1;
-  EXPECT_EQ(std::errc::not_enough_memory, Buffers.getBuffer(Buf1));
-  EXPECT_FALSE(Buffers.releaseBuffer(Buf0));
+  EXPECT_EQ(std::make_error_code(std::errc::not_enough_memory),
+            Buffers.getBuffer(Buf1));
+  EXPECT_EQ(Buffers.releaseBuffer(Buf0), std::error_code());
 }
 
 TEST(BufferQueueTest, ReleaseUnknown) {
-  BufferQueue Buffers(kSize, 1);
+  bool Success = false;
+  BufferQueue Buffers(kSize, 1, Success);
+  ASSERT_TRUE(Success);
   BufferQueue::Buffer Buf;
   Buf.Buffer = reinterpret_cast<void *>(0xdeadbeef);
   Buf.Size = kSize;
-  EXPECT_EQ(std::errc::argument_out_of_domain, Buffers.releaseBuffer(Buf));
+  EXPECT_EQ(std::make_error_code(std::errc::argument_out_of_domain),
+            Buffers.releaseBuffer(Buf));
 }
 
 TEST(BufferQueueTest, ErrorsWhenFinalising) {
-  BufferQueue Buffers(kSize, 2);
+  bool Success = false;
+  BufferQueue Buffers(kSize, 2, Success);
+  ASSERT_TRUE(Success);
   BufferQueue::Buffer Buf;
-  ASSERT_FALSE(Buffers.getBuffer(Buf));
+  ASSERT_EQ(Buffers.getBuffer(Buf), std::error_code());
   ASSERT_NE(nullptr, Buf.Buffer);
-  ASSERT_FALSE(Buffers.finalize());
+  ASSERT_EQ(Buffers.finalize(), std::error_code());
   BufferQueue::Buffer OtherBuf;
-  ASSERT_EQ(std::errc::state_not_recoverable, Buffers.getBuffer(OtherBuf));
-  ASSERT_EQ(std::errc::state_not_recoverable, Buffers.finalize());
-  ASSERT_FALSE(Buffers.releaseBuffer(Buf));
+  ASSERT_EQ(std::make_error_code(std::errc::state_not_recoverable),
+            Buffers.getBuffer(OtherBuf));
+  ASSERT_EQ(std::make_error_code(std::errc::state_not_recoverable),
+            Buffers.finalize());
+  ASSERT_EQ(Buffers.releaseBuffer(Buf), std::error_code());
 }
 
 TEST(BufferQueueTest, MultiThreaded) {
-  BufferQueue Buffers(kSize, 100);
+  bool Success = false;
+  BufferQueue Buffers(kSize, 100, Success);
+  ASSERT_TRUE(Success);
   auto F = [&] {
     BufferQueue::Buffer B;
     while (!Buffers.getBuffer(B)) {
@@ -77,4 +96,18 @@
   F();
 }
 
+TEST(BufferQueueTest, Apply) {
+  bool Success = false;
+  BufferQueue Buffers(kSize, 10, Success);
+  ASSERT_TRUE(Success);
+  auto Count = 0;
+  BufferQueue::Buffer B;
+  for (int I = 0; I < 10; ++I) {
+    ASSERT_FALSE(Buffers.getBuffer(B));
+    ASSERT_FALSE(Buffers.releaseBuffer(B));
+  }
+  Buffers.apply([&](const BufferQueue::Buffer &B) { ++Count; });
+  ASSERT_EQ(Count, 10);
+}
+
 } // namespace __xray
diff --git a/lib/xray/tests/unit/fdr_logging_test.cc b/lib/xray/tests/unit/fdr_logging_test.cc
new file mode 100644
index 0000000..0d5e99a
--- /dev/null
+++ b/lib/xray/tests/unit/fdr_logging_test.cc
@@ -0,0 +1,127 @@
+//===-- fdr_logging_test.cc -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_fdr_logging.h"
+#include "gtest/gtest.h"
+
+#include <fcntl.h>
+#include <iostream>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <system_error>
+#include <unistd.h>
+
+#include "xray/xray_records.h"
+
+namespace __xray {
+namespace {
+
+constexpr auto kBufferSize = 16384;
+constexpr auto kBufferMax = 10;
+
+struct ScopedFileCloserAndDeleter {
+  explicit ScopedFileCloserAndDeleter(int Fd, const char *Filename)
+      : Fd(Fd), Filename(Filename) {}
+
+  ~ScopedFileCloserAndDeleter() {
+    if (Fd) {
+      close(Fd);
+      unlink(Filename);
+    }
+  }
+
+  int Fd;
+  const char *Filename;
+};
+
+TEST(FDRLoggingTest, Simple) {
+  FDRLoggingOptions Options;
+  Options.ReportErrors = true;
+  char TmpFilename[] = "fdr-logging-test.XXXXXX";
+  Options.Fd = mkstemp(TmpFilename);
+  ASSERT_NE(Options.Fd, -1);
+  ASSERT_EQ(fdrLoggingInit(kBufferSize, kBufferMax, &Options,
+                            sizeof(FDRLoggingOptions)),
+            XRayLogInitStatus::XRAY_LOG_INITIALIZED);
+  fdrLoggingHandleArg0(1, XRayEntryType::ENTRY);
+  fdrLoggingHandleArg0(1, XRayEntryType::EXIT);
+  ASSERT_EQ(fdrLoggingFinalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED);
+  ASSERT_EQ(fdrLoggingFlush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED);
+  ASSERT_EQ(fdrLoggingReset(), XRayLogInitStatus::XRAY_LOG_UNINITIALIZED);
+
+  // To do this properly, we have to close the file descriptor then re-open the
+  // file for reading this time.
+  ASSERT_EQ(close(Options.Fd), 0);
+  int Fd = open(TmpFilename, O_RDONLY);
+  ASSERT_NE(-1, Fd);
+  ScopedFileCloserAndDeleter Guard(Fd, TmpFilename);
+  auto Size = lseek(Fd, 0, SEEK_END);
+  ASSERT_NE(Size, 0);
+  // Map the file contents.
+  const char *Contents = static_cast<const char *>(
+      mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0));
+  ASSERT_NE(Contents, nullptr);
+
+  XRayFileHeader H;
+  memcpy(&H, Contents, sizeof(XRayFileHeader));
+  ASSERT_EQ(H.Version, 1);
+  ASSERT_EQ(H.Type, FileTypes::FDR_LOG);
+
+  // We require one buffer at least to have the "start of buffer" metadata
+  // record.
+  MetadataRecord MDR;
+  memcpy(&MDR, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord));
+  ASSERT_EQ(MDR.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer));
+}
+
+TEST(FDRLoggingTest, Multiple) {
+  FDRLoggingOptions Options;
+  char TmpFilename[] = "fdr-logging-test.XXXXXX";
+  Options.Fd = mkstemp(TmpFilename);
+  ASSERT_NE(Options.Fd, -1);
+  ASSERT_EQ(fdrLoggingInit(kBufferSize, kBufferMax, &Options,
+                            sizeof(FDRLoggingOptions)),
+            XRayLogInitStatus::XRAY_LOG_INITIALIZED);
+  for (uint64_t I = 0; I < 100; ++I) {
+    fdrLoggingHandleArg0(1, XRayEntryType::ENTRY);
+    fdrLoggingHandleArg0(1, XRayEntryType::EXIT);
+  }
+  ASSERT_EQ(fdrLoggingFinalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED);
+  ASSERT_EQ(fdrLoggingFlush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED);
+  ASSERT_EQ(fdrLoggingReset(), XRayLogInitStatus::XRAY_LOG_UNINITIALIZED);
+
+  // To do this properly, we have to close the file descriptor then re-open the
+  // file for reading this time.
+  ASSERT_EQ(close(Options.Fd), 0);
+  int Fd = open(TmpFilename, O_RDONLY);
+  ASSERT_NE(-1, Fd);
+  ScopedFileCloserAndDeleter Guard(Fd, TmpFilename);
+  auto Size = lseek(Fd, 0, SEEK_END);
+  ASSERT_NE(Size, 0);
+  // Map the file contents.
+  const char *Contents = static_cast<const char *>(
+      mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0));
+  ASSERT_NE(Contents, nullptr);
+
+  XRayFileHeader H;
+  memcpy(&H, Contents, sizeof(XRayFileHeader));
+  ASSERT_EQ(H.Version, 1);
+  ASSERT_EQ(H.Type, FileTypes::FDR_LOG);
+
+  MetadataRecord MDR0;
+  memcpy(&MDR0, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord));
+  ASSERT_EQ(MDR0.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer));
+}
+
+} // namespace
+} // namespace __xray
diff --git a/lib/xray/xray_AArch64.cc b/lib/xray/xray_AArch64.cc
index c2d33a2..8d1c7c5 100644
--- a/lib/xray/xray_AArch64.cc
+++ b/lib/xray/xray_AArch64.cc
@@ -18,6 +18,9 @@
 #include <atomic>
 #include <cassert>
 
+
+extern "C" void __clear_cache(void* start, void* end);
+
 namespace __xray {
 
 // The machine codes for some instructions used in runtime patching.
@@ -61,8 +64,8 @@
   //   B #32
 
   uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
+  uint32_t *CurAddress = FirstAddress + 1;
   if (Enable) {
-    uint32_t *CurAddress = FirstAddress + 1;
     *CurAddress = uint32_t(PatchOpcodes::PO_LdrW0_12);
     CurAddress++;
     *CurAddress = uint32_t(PatchOpcodes::PO_LdrX16_12);
@@ -74,6 +77,7 @@
     *reinterpret_cast<void (**)()>(CurAddress) = TracingHook;
     CurAddress += 2;
     *CurAddress = uint32_t(PatchOpcodes::PO_LdpX0X30SP_16);
+    CurAddress++;
     std::atomic_store_explicit(
         reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
         uint32_t(PatchOpcodes::PO_StpX0X30SP_m16e), std::memory_order_release);
@@ -82,12 +86,15 @@
         reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
         uint32_t(PatchOpcodes::PO_B32), std::memory_order_release);
   }
+  __clear_cache(reinterpret_cast<char*>(FirstAddress),
+      reinterpret_cast<char*>(CurAddress));
   return true;
 }
 
 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
-                        const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
+                        const XRaySledEntry &Sled,
+                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampoline);
 }
 
 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
@@ -97,9 +104,14 @@
 
 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
                            const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  // FIXME: In the future we'd need to distinguish between non-tail exits and
-  // tail exits for better information preservation.
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
 }
 
+// FIXME: Maybe implement this better?
+bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
 } // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+  // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/lib/xray/xray_arm.cc b/lib/xray/xray_arm.cc
index 9ce8451..26d673e 100644
--- a/lib/xray/xray_arm.cc
+++ b/lib/xray/xray_arm.cc
@@ -18,6 +18,8 @@
 #include <atomic>
 #include <cassert>
 
+extern "C" void __clear_cache(void* start, void* end);
+
 namespace __xray {
 
 // The machine codes for some instructions used in runtime patching.
@@ -58,7 +60,7 @@
 //   MOVW r0, #<lower 16 bits of the |Value|>
 //   MOVT r0, #<higher 16 bits of the |Value|>
 inline static uint32_t *
-Write32bitLoadR0(uint32_t *Address,
+write32bitLoadR0(uint32_t *Address,
                  const uint32_t Value) XRAY_NEVER_INSTRUMENT {
   return write32bitLoadReg(0, Address, Value);
 }
@@ -67,7 +69,7 @@
 //   MOVW ip, #<lower 16 bits of the |Value|>
 //   MOVT ip, #<higher 16 bits of the |Value|>
 inline static uint32_t *
-Write32bitLoadIP(uint32_t *Address,
+write32bitLoadIP(uint32_t *Address,
                  const uint32_t Value) XRAY_NEVER_INSTRUMENT {
   return write32bitLoadReg(12, Address, Value);
 }
@@ -102,15 +104,16 @@
   //   B #20
 
   uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
+  uint32_t *CurAddress = FirstAddress + 1;
   if (Enable) {
-    uint32_t *CurAddress = FirstAddress + 1;
     CurAddress =
-        Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
+        write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
     CurAddress =
-        Write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook));
+        write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook));
     *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
     CurAddress++;
     *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
+    CurAddress++;
     std::atomic_store_explicit(
         reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
         uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release);
@@ -119,12 +122,15 @@
         reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
         uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
   }
+  __clear_cache(reinterpret_cast<char*>(FirstAddress),
+      reinterpret_cast<char*>(CurAddress));
   return true;
 }
 
 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
-                        const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
+                        const XRaySledEntry &Sled,
+                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampoline);
 }
 
 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
@@ -134,9 +140,14 @@
 
 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
                            const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
-  // FIXME: In the future we'd need to distinguish between non-tail exits and
-  // tail exits for better information preservation.
-  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
 }
 
+// FIXME: Maybe implement this better?
+bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
 } // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+  // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/lib/xray/xray_buffer_queue.cc b/lib/xray/xray_buffer_queue.cc
index 7e5462f..bd8f496 100644
--- a/lib/xray/xray_buffer_queue.cc
+++ b/lib/xray/xray_buffer_queue.cc
@@ -18,15 +18,21 @@
 
 using namespace __xray;
 
-BufferQueue::BufferQueue(std::size_t B, std::size_t N)
+BufferQueue::BufferQueue(std::size_t B, std::size_t N, bool &Success)
     : BufferSize(B), Buffers(N), Mutex(), OwnedBuffers(), Finalizing(false) {
-  for (auto &Buf : Buffers) {
+  for (auto &T : Buffers) {
     void *Tmp = malloc(BufferSize);
+    if (Tmp == nullptr) {
+      Success = false;
+      return;
+    }
+
+    auto &Buf = std::get<0>(T);
     Buf.Buffer = Tmp;
     Buf.Size = B;
-    if (Tmp != 0)
-      OwnedBuffers.insert(Tmp);
+    OwnedBuffers.emplace(Tmp);
   }
+  Success = true;
 }
 
 std::error_code BufferQueue::getBuffer(Buffer &Buf) {
@@ -35,7 +41,11 @@
   std::lock_guard<std::mutex> Guard(Mutex);
   if (Buffers.empty())
     return std::make_error_code(std::errc::not_enough_memory);
-  Buf = Buffers.front();
+  auto &T = Buffers.front();
+  auto &B = std::get<0>(T);
+  Buf = B;
+  B.Buffer = nullptr;
+  B.Size = 0;
   Buffers.pop_front();
   return {};
 }
@@ -44,9 +54,11 @@
   if (OwnedBuffers.count(Buf.Buffer) == 0)
     return std::make_error_code(std::errc::argument_out_of_domain);
   std::lock_guard<std::mutex> Guard(Mutex);
-  Buffers.push_back(Buf);
+
+  // Now that the buffer has been released, we mark it as "used".
+  Buffers.emplace(Buffers.end(), Buf, true /* used */);
   Buf.Buffer = nullptr;
-  Buf.Size = BufferSize;
+  Buf.Size = 0;
   return {};
 }
 
@@ -57,9 +69,8 @@
 }
 
 BufferQueue::~BufferQueue() {
-  for (auto &Buf : Buffers) {
+  for (auto &T : Buffers) {
+    auto &Buf = std::get<0>(T);
     free(Buf.Buffer);
-    Buf.Buffer = nullptr;
-    Buf.Size = 0;
   }
 }
diff --git a/lib/xray/xray_buffer_queue.h b/lib/xray/xray_buffer_queue.h
index bf0b7af..3898437 100644
--- a/lib/xray/xray_buffer_queue.h
+++ b/lib/xray/xray_buffer_queue.h
@@ -21,6 +21,7 @@
 #include <mutex>
 #include <system_error>
 #include <unordered_set>
+#include <utility>
 
 namespace __xray {
 
@@ -38,14 +39,18 @@
 
 private:
   std::size_t BufferSize;
-  std::deque<Buffer> Buffers;
+
+  // We use a bool to indicate whether the Buffer has been used in this
+  // freelist implementation.
+  std::deque<std::tuple<Buffer, bool>> Buffers;
   std::mutex Mutex;
   std::unordered_set<void *> OwnedBuffers;
   std::atomic<bool> Finalizing;
 
 public:
-  /// Initialise a queue of size |N| with buffers of size |B|.
-  BufferQueue(std::size_t B, std::size_t N);
+  /// Initialise a queue of size |N| with buffers of size |B|. We report success
+  /// through |Success|.
+  BufferQueue(std::size_t B, std::size_t N, bool &Success);
 
   /// Updates |Buf| to contain the pointer to an appropriate buffer. Returns an
   /// error in case there are no available buffers to return when we will run
@@ -68,15 +73,26 @@
 
   bool finalizing() const { return Finalizing.load(std::memory_order_acquire); }
 
-  // Sets the state of the BufferQueue to finalizing, which ensures that:
-  //
-  //   - All subsequent attempts to retrieve a Buffer will fail.
-  //   - All releaseBuffer operations will not fail.
-  //
-  // After a call to finalize succeeds, all subsequent calls to finalize will
-  // fail with std::errc::state_not_recoverable.
+  /// Sets the state of the BufferQueue to finalizing, which ensures that:
+  ///
+  ///   - All subsequent attempts to retrieve a Buffer will fail.
+  ///   - All releaseBuffer operations will not fail.
+  ///
+  /// After a call to finalize succeeds, all subsequent calls to finalize will
+  /// fail with std::errc::state_not_recoverable.
   std::error_code finalize();
 
+  /// Applies the provided function F to each Buffer in the queue, only if the
+  /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a
+  /// releaseBuffer(...) operation.
+  template <class F> void apply(F Fn) {
+    std::lock_guard<std::mutex> G(Mutex);
+    for (const auto &T : Buffers) {
+      if (std::get<1>(T))
+        Fn(std::get<0>(T));
+    }
+  }
+
   // Cleans up allocated buffers.
   ~BufferQueue();
 };
diff --git a/lib/xray/xray_fdr_logging.cc b/lib/xray/xray_fdr_logging.cc
new file mode 100644
index 0000000..bae7d4c
--- /dev/null
+++ b/lib/xray/xray_fdr_logging.cc
@@ -0,0 +1,537 @@
+//===-- xray_fdr_logging.cc ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instruementation system.
+//
+// Here we implement the Flight Data Recorder mode for XRay, where we use
+// compact structures to store records in memory as well as when writing out the
+// data to files.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_fdr_logging.h"
+#include <algorithm>
+#include <bitset>
+#include <cassert>
+#include <cstring>
+#include <memory>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+#include <unordered_map>
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray/xray_interface.h"
+#include "xray/xray_records.h"
+#include "xray_buffer_queue.h"
+#include "xray_defs.h"
+#include "xray_flags.h"
+#include "xray_tsc.h"
+#include "xray_utils.h"
+
+namespace __xray {
+
+// Global BufferQueue.
+std::shared_ptr<BufferQueue> BQ;
+
+std::atomic<XRayLogInitStatus> LoggingStatus{
+    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
+
+std::atomic<XRayLogFlushStatus> LogFlushStatus{
+    XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
+
+std::unique_ptr<FDRLoggingOptions> FDROptions;
+
+XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
+                                  void *Options,
+                                  size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
+  assert(OptionsSize == sizeof(FDRLoggingOptions));
+  XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+  if (!LoggingStatus.compare_exchange_strong(
+          CurrentStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZING,
+          std::memory_order_release, std::memory_order_relaxed))
+    return CurrentStatus;
+
+  FDROptions.reset(new FDRLoggingOptions());
+  *FDROptions = *reinterpret_cast<FDRLoggingOptions *>(Options);
+  if (FDROptions->ReportErrors)
+    SetPrintfAndReportCallback(printToStdErr);
+
+  bool Success = false;
+  BQ = std::make_shared<BufferQueue>(BufferSize, BufferMax, Success);
+  if (!Success) {
+    Report("BufferQueue init failed.\n");
+    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+  }
+
+  // Install the actual handleArg0 handler after initialising the buffers.
+  __xray_set_handler(fdrLoggingHandleArg0);
+
+  LoggingStatus.store(XRayLogInitStatus::XRAY_LOG_INITIALIZED,
+                      std::memory_order_release);
+  return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+}
+
+// Must finalize before flushing.
+XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
+  if (LoggingStatus.load(std::memory_order_acquire) !=
+      XRayLogInitStatus::XRAY_LOG_FINALIZED)
+    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+
+  XRayLogFlushStatus Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+  if (!LogFlushStatus.compare_exchange_strong(
+          Result, XRayLogFlushStatus::XRAY_LOG_FLUSHING,
+          std::memory_order_release, std::memory_order_relaxed))
+    return Result;
+
+  // Make a copy of the BufferQueue pointer to prevent other threads that may be
+  // resetting it from blowing away the queue prematurely while we're dealing
+  // with it.
+  auto LocalBQ = BQ;
+
+  // We write out the file in the following format:
+  //
+  //   1) We write down the XRay file header with version 1, type FDR_LOG.
+  //   2) Then we use the 'apply' member of the BufferQueue that's live, to
+  //      ensure that at this point in time we write down the buffers that have
+  //      been released (and marked "used") -- we dump the full buffer for now
+  //      (fixed-sized) and let the tools reading the buffers deal with the data
+  //      afterwards.
+  //
+  int Fd = FDROptions->Fd;
+  if (Fd == -1)
+    Fd = getLogFD();
+  if (Fd == -1) {
+    auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+    LogFlushStatus.store(Result, std::memory_order_release);
+    return Result;
+  }
+
+  XRayFileHeader Header;
+  Header.Version = 1;
+  Header.Type = FileTypes::FDR_LOG;
+  Header.CycleFrequency = getTSCFrequency();
+  // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
+  // before setting the values in the header.
+  Header.ConstantTSC = 1;
+  Header.NonstopTSC = 1;
+  clock_gettime(CLOCK_REALTIME, &Header.TS);
+  retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
+                   reinterpret_cast<char *>(&Header) + sizeof(Header));
+  LocalBQ->apply([&](const BufferQueue::Buffer &B) {
+    retryingWriteAll(Fd, reinterpret_cast<char *>(B.Buffer),
+                     reinterpret_cast<char *>(B.Buffer) + B.Size);
+  });
+  LogFlushStatus.store(XRayLogFlushStatus::XRAY_LOG_FLUSHED,
+                       std::memory_order_release);
+  return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+}
+
+XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
+  XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+  if (!LoggingStatus.compare_exchange_strong(
+          CurrentStatus, XRayLogInitStatus::XRAY_LOG_FINALIZING,
+          std::memory_order_release, std::memory_order_relaxed))
+    return CurrentStatus;
+
+  // Do special things to make the log finalize itself, and not allow any more
+  // operations to be performed until re-initialized.
+  BQ->finalize();
+
+  LoggingStatus.store(XRayLogInitStatus::XRAY_LOG_FINALIZED,
+                      std::memory_order_release);
+  return XRayLogInitStatus::XRAY_LOG_FINALIZED;
+}
+
+XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
+  XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_FINALIZED;
+  if (!LoggingStatus.compare_exchange_strong(
+          CurrentStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
+          std::memory_order_release, std::memory_order_relaxed))
+    return CurrentStatus;
+
+  // Release the in-memory buffer queue.
+  BQ.reset();
+
+  // Spin until the flushing status is flushed.
+  XRayLogFlushStatus CurrentFlushingStatus =
+      XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+  while (!LogFlushStatus.compare_exchange_weak(
+      CurrentFlushingStatus, XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
+      std::memory_order_release, std::memory_order_relaxed)) {
+    if (CurrentFlushingStatus == XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING)
+      break;
+    CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+  }
+
+  // At this point, we know that the status is flushed, and that we can assume
+  return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+}
+
+namespace {
+thread_local BufferQueue::Buffer Buffer;
+thread_local char *RecordPtr = nullptr;
+
+void setupNewBuffer(const BufferQueue::Buffer &Buffer) XRAY_NEVER_INSTRUMENT {
+  RecordPtr = static_cast<char *>(Buffer.Buffer);
+
+  static constexpr int InitRecordsCount = 2;
+  std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
+  {
+    // Write out a MetadataRecord to signify that this is the start of a new
+    // buffer, associated with a particular thread, with a new CPU.  For the
+    // data, we have 15 bytes to squeeze as much information as we can.  At this
+    // point we only write down the following bytes:
+    //   - Thread ID (pid_t, 4 bytes)
+    auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(&Records[0]);
+    NewBuffer.Type = uint8_t(RecordType::Metadata);
+    NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
+    pid_t Tid = syscall(SYS_gettid);
+    std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
+  }
+
+  // Also write the WalltimeMarker record.
+  {
+    static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
+    auto &WalltimeMarker = *reinterpret_cast<MetadataRecord *>(&Records[1]);
+    WalltimeMarker.Type = uint8_t(RecordType::Metadata);
+    WalltimeMarker.RecordKind =
+        uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
+    timespec TS{0, 0};
+    clock_gettime(CLOCK_MONOTONIC, &TS);
+
+    // We only really need microsecond precision here, and enforce across
+    // platforms that we need 64-bit seconds and 32-bit microseconds encoded in
+    // the Metadata record.
+    int32_t Micros = TS.tv_nsec / 1000;
+    int64_t Seconds = TS.tv_sec;
+    std::memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds));
+    std::memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, sizeof(Micros));
+  }
+  std::memcpy(RecordPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
+  RecordPtr += sizeof(MetadataRecord) * InitRecordsCount;
+}
+
+void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+  MetadataRecord NewCPUId;
+  NewCPUId.Type = uint8_t(RecordType::Metadata);
+  NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId);
+
+  // The data for the New CPU will contain the following bytes:
+  //   - CPU ID (uint16_t, 2 bytes)
+  //   - Full TSC (uint64_t, 8 bytes)
+  // Total = 12 bytes.
+  std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU));
+  std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
+  std::memcpy(RecordPtr, &NewCPUId, sizeof(MetadataRecord));
+  RecordPtr += sizeof(MetadataRecord);
+}
+
+void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
+  MetadataRecord EOBMeta;
+  EOBMeta.Type = uint8_t(RecordType::Metadata);
+  EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
+  // For now we don't write any bytes into the Data field.
+  std::memcpy(RecordPtr, &EOBMeta, sizeof(MetadataRecord));
+  RecordPtr += sizeof(MetadataRecord);
+}
+
+void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+  MetadataRecord TSCWrap;
+  TSCWrap.Type = uint8_t(RecordType::Metadata);
+  TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap);
+
+  // The data for the TSCWrap record contains the following bytes:
+  //   - Full TSC (uint64_t, 8 bytes)
+  // Total = 8 bytes.
+  std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
+  std::memcpy(RecordPtr, &TSCWrap, sizeof(MetadataRecord));
+  RecordPtr += sizeof(MetadataRecord);
+}
+
+constexpr auto MetadataRecSize = sizeof(MetadataRecord);
+constexpr auto FunctionRecSize = sizeof(FunctionRecord);
+
+class ThreadExitBufferCleanup {
+  std::weak_ptr<BufferQueue> Buffers;
+  BufferQueue::Buffer &Buffer;
+
+public:
+  explicit ThreadExitBufferCleanup(std::weak_ptr<BufferQueue> BQ,
+                                   BufferQueue::Buffer &Buffer)
+      XRAY_NEVER_INSTRUMENT : Buffers(BQ),
+                              Buffer(Buffer) {}
+
+  ~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT {
+    if (RecordPtr == nullptr)
+      return;
+
+    // We make sure that upon exit, a thread will write out the EOB
+    // MetadataRecord in the thread-local log, and also release the buffer to
+    // the queue.
+    assert((RecordPtr + MetadataRecSize) - static_cast<char *>(Buffer.Buffer) >=
+           static_cast<ptrdiff_t>(MetadataRecSize));
+    if (auto BQ = Buffers.lock()) {
+      writeEOBMetadata();
+      if (auto EC = BQ->releaseBuffer(Buffer))
+        Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
+               EC.message().c_str());
+      return;
+    }
+  }
+};
+
+class RecursionGuard {
+  bool &Running;
+  const bool Valid;
+
+public:
+  explicit RecursionGuard(bool &R) : Running(R), Valid(!R) {
+    if (Valid)
+      Running = true;
+  }
+
+  RecursionGuard(const RecursionGuard &) = delete;
+  RecursionGuard(RecursionGuard &&) = delete;
+  RecursionGuard &operator=(const RecursionGuard &) = delete;
+  RecursionGuard &operator=(RecursionGuard &&) = delete;
+
+  explicit operator bool() const { return Valid; }
+
+  ~RecursionGuard() noexcept {
+    if (Valid)
+      Running = false;
+  }
+};
+
+inline bool loggingInitialized() {
+  return LoggingStatus.load(std::memory_order_acquire) ==
+         XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+}
+
+} // namespace
+
+void fdrLoggingHandleArg0(int32_t FuncId,
+                           XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
+  // We want to get the TSC as early as possible, so that we can check whether
+  // we've seen this CPU before. We also do it before we load anything else, to
+  // allow for forward progress with the scheduling.
+  unsigned char CPU;
+  uint64_t TSC = __xray::readTSC(CPU);
+
+  // Bail out right away if logging is not initialized yet.
+  if (LoggingStatus.load(std::memory_order_acquire) !=
+      XRayLogInitStatus::XRAY_LOG_INITIALIZED)
+    return;
+
+  // We use a thread_local variable to keep track of which CPUs we've already
+  // run, and the TSC times for these CPUs. This allows us to stop repeating the
+  // CPU field in the function records.