Merge remote-tracking branch 'origin/stable' into stable

STABLE MERGE COMMIT for SWIFT-3.1-branch
diff --git a/.arcconfig b/.arcconfig
index ef3e327..bc39977 100644
--- a/.arcconfig
+++ b/.arcconfig
@@ -1,4 +1,4 @@
 {
   "project_id" : "compiler-rt",
-  "conduit_uri" : "http://reviews.llvm.org/"
+  "conduit_uri" : "https://reviews.llvm.org/"
 }
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4cc417..f72fb01 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,112 +11,38 @@
 if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   project(CompilerRT C CXX ASM)
   set(COMPILER_RT_STANDALONE_BUILD TRUE)
-else()
-  set(COMPILER_RT_STANDALONE_BUILD FALSE)
 endif()
 
-# The CompilerRT build system requires CMake version 2.8.8 or higher in order
-# to use its support for building convenience "libraries" as a collection of
-# .o files. This is particularly useful in producing larger, more complex
-# runtime libraries.
-if (NOT MSVC)
-  cmake_minimum_required(VERSION 2.8.8)
-else()
-  # Version 2.8.12.1 is required to build with Visual Studio 2013.
-  cmake_minimum_required(VERSION 2.8.12.1)
-endif()
+cmake_minimum_required(VERSION 3.4.3)
+# FIXME:
+# The OLD behavior (pre 3.2) for this policy is to not set the value of the 
+# CMAKE_EXE_LINKER_FLAGS variable in the generated test project. The NEW behavior
+# for this policy is to set the value of the CMAKE_EXE_LINKER_FLAGS variable 
+# in the test project to the same as it is in the calling project. The new 
+# behavior cause the compiler_rt test to fail during try_compile: see
+# projects/compiler-rt/cmake/Modules/CompilerRTUtils.cmake:121 such that
+# CAN_TARGET_${arch} is not set properly. This results in COMPILER_RT_SUPPORTED_ARCH
+# not being updated properly leading to poblems.
+cmake_policy(SET CMP0056 OLD)
 
-# FIXME: It may be removed when we use 2.8.12.
-if(CMAKE_VERSION VERSION_LESS 2.8.12)
-  # Invalidate a couple of keywords.
-  set(cmake_2_8_12_INTERFACE)
-  set(cmake_2_8_12_PRIVATE)
-  if(APPLE)
-    set(CMAKE_MACOSX_RPATH On)
-  endif()
-else()
-  # Use ${cmake_2_8_12_KEYWORD} intead of KEYWORD in target_link_libraries().
-  set(cmake_2_8_12_INTERFACE INTERFACE)
-  set(cmake_2_8_12_PRIVATE PRIVATE)
-  if(POLICY CMP0022)
-    cmake_policy(SET CMP0022 NEW) # automatic when 2.8.12 is required
-  endif()
-  if(POLICY CMP0042)
-    cmake_policy(SET CMP0042 NEW) # automatic when 2.8.12 is required
-  endif()
-endif()
+# Add path for custom compiler-rt modules.
+list(INSERT CMAKE_MODULE_PATH 0
+  "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
+  "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules"
+  )
 
-# Top level target used to build all compiler-rt libraries.
-add_custom_target(compiler-rt ALL)
+include(base-config-ix)
+include(CompilerRTUtils)
 
 option(COMPILER_RT_BUILD_BUILTINS "Build builtins" ON)
 mark_as_advanced(COMPILER_RT_BUILD_BUILTINS)
 option(COMPILER_RT_BUILD_SANITIZERS "Build sanitizers" ON)
 mark_as_advanced(COMPILER_RT_BUILD_SANITIZERS)
+option(COMPILER_RT_BUILD_XRAY "Build xray" ON)
+mark_as_advanced(COMPILER_RT_BUILD_XRAY)
 
-if (NOT COMPILER_RT_STANDALONE_BUILD)
-  # Compute the Clang version from the LLVM version.
-  # FIXME: We should be able to reuse CLANG_VERSION variable calculated
-  #        in Clang cmake files, instead of copying the rules here.
-  string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION
-         ${PACKAGE_VERSION})
-  # Setup the paths where compiler-rt runtimes and headers should be stored.
-  set(COMPILER_RT_OUTPUT_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION})
-  set(COMPILER_RT_EXEC_OUTPUT_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR})
-  set(COMPILER_RT_INSTALL_PATH lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION})
-  option(COMPILER_RT_INCLUDE_TESTS "Generate and build compiler-rt unit tests."
-         ${LLVM_INCLUDE_TESTS})
- option(COMPILER_RT_ENABLE_WERROR "Fail and stop if warning is triggered"
-        ${LLVM_ENABLE_WERROR})
-  # Use just-built Clang to compile/link tests on all platforms, except for
-  # Windows where we need to use clang-cl instead.
-  if(NOT MSVC)
-    set(COMPILER_RT_TEST_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
-  else()
-    set(COMPILER_RT_TEST_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang.exe)
-  endif()
-else()
-  # Take output dir and install path from the user.
-  set(COMPILER_RT_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR} CACHE PATH
-    "Path where built compiler-rt libraries should be stored.")
-  set(COMPILER_RT_EXEC_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/bin CACHE PATH
-    "Path where built compiler-rt executables should be stored.")
-  set(COMPILER_RT_INSTALL_PATH ${CMAKE_INSTALL_PREFIX} CACHE PATH
-    "Path where built compiler-rt libraries should be installed.")
-  option(COMPILER_RT_INCLUDE_TESTS "Generate and build compiler-rt unit tests." OFF)
-  option(COMPILER_RT_ENABLE_WERROR "Fail and stop if warning is triggered" OFF)
-  # Use a host compiler to compile/link tests.
-  set(COMPILER_RT_TEST_COMPILER ${CMAKE_C_COMPILER} CACHE PATH "Compiler to use for testing")
-
-  if (NOT LLVM_CONFIG_PATH)
-    find_program(LLVM_CONFIG_PATH "llvm-config"
-                 DOC "Path to llvm-config binary")
-    if (NOT LLVM_CONFIG_PATH)
-      message(FATAL_ERROR "llvm-config not found: specify LLVM_CONFIG_PATH")
-    endif()
-  endif()
-  execute_process(
-    COMMAND ${LLVM_CONFIG_PATH} "--obj-root" "--bindir" "--libdir" "--src-root"
-    RESULT_VARIABLE HAD_ERROR
-    OUTPUT_VARIABLE CONFIG_OUTPUT)
-  if (HAD_ERROR)
-    message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}")
-  endif()
-  string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";" CONFIG_OUTPUT ${CONFIG_OUTPUT})
-  list(GET CONFIG_OUTPUT 0 LLVM_BINARY_DIR)
-  list(GET CONFIG_OUTPUT 1 LLVM_TOOLS_BINARY_DIR)
-  list(GET CONFIG_OUTPUT 2 LLVM_LIBRARY_DIR)
-  list(GET CONFIG_OUTPUT 3 LLVM_MAIN_SRC_DIR)
-
-  # Make use of LLVM CMake modules.
-  file(TO_CMAKE_PATH ${LLVM_BINARY_DIR} LLVM_BINARY_DIR_CMAKE_STYLE)
-  set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/share/llvm/cmake")
-  list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}")
-  # Get some LLVM variables from LLVMConfig.
-  include("${LLVM_CMAKE_PATH}/LLVMConfig.cmake")
-
-  set(LLVM_LIBRARY_OUTPUT_INTDIR
-    ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
+if (COMPILER_RT_STANDALONE_BUILD)
+  load_llvm_config()
 
   # Find Python interpreter.
   set(Python_ADDITIONAL_VERSIONS 2.7 2.6 2.5)
@@ -135,52 +61,11 @@
   set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit")
 endif()
 
-if("${COMPILER_RT_TEST_COMPILER}" MATCHES "clang[+]*$")
-  set(COMPILER_RT_TEST_COMPILER_ID Clang)
-elseif("${COMPILER_RT_TEST_COMPILER}" MATCHES "clang.*.exe$")
-  set(COMPILER_RT_TEST_COMPILER_ID Clang)
-else()
-  set(COMPILER_RT_TEST_COMPILER_ID GNU)
-endif()
-
-set(COMPILER_RT_DEFAULT_TARGET_TRIPLE ${TARGET_TRIPLE} CACHE STRING
-    "Default triple for which compiler-rt runtimes will be built.")
-if(DEFINED COMPILER_RT_TEST_TARGET_TRIPLE)
-  # Backwards compatibility: this variable used to be called
-  # COMPILER_RT_TEST_TARGET_TRIPLE.
-  set(COMPILER_RT_DEFAULT_TARGET_TRIPLE ${COMPILER_RT_TEST_TARGET_TRIPLE})
-endif()
-
-string(REPLACE "-" ";" TARGET_TRIPLE_LIST ${COMPILER_RT_DEFAULT_TARGET_TRIPLE})
-list(GET TARGET_TRIPLE_LIST 0 COMPILER_RT_DEFAULT_TARGET_ARCH)
-list(GET TARGET_TRIPLE_LIST 1 COMPILER_RT_DEFAULT_TARGET_OS)
-list(GET TARGET_TRIPLE_LIST 2 COMPILER_RT_DEFAULT_TARGET_ABI)
-# Determine if test target triple is specified explicitly, and doesn't match the
-# default.
-if(NOT COMPILER_RT_DEFAULT_TARGET_TRIPLE STREQUAL TARGET_TRIPLE)
-  set(COMPILER_RT_HAS_EXPLICIT_DEFAULT_TARGET_TRIPLE TRUE)
-else()
-  set(COMPILER_RT_HAS_EXPLICIT_DEFAULT_TARGET_TRIPLE FALSE)
-endif()
-
+construct_compiler_rt_default_triple()
 if ("${COMPILER_RT_DEFAULT_TARGET_ABI}" STREQUAL "androideabi")
   set(ANDROID 1)
 endif()
 
-string(TOLOWER ${CMAKE_SYSTEM_NAME} COMPILER_RT_OS_DIR)
-set(COMPILER_RT_LIBRARY_OUTPUT_DIR
-  ${COMPILER_RT_OUTPUT_DIR}/lib/${COMPILER_RT_OS_DIR})
-set(COMPILER_RT_LIBRARY_INSTALL_DIR
-  ${COMPILER_RT_INSTALL_PATH}/lib/${COMPILER_RT_OS_DIR})
-
-# Add path for custom compiler-rt modules.
-set(CMAKE_MODULE_PATH
-  "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
-  "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules"
-  ${CMAKE_MODULE_PATH}
-  )
-include(CompilerRTUtils)
-
 set(COMPILER_RT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 set(COMPILER_RT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
 
@@ -203,13 +88,18 @@
 #================================
 # Setup Compiler Flags
 #================================
-include(CheckIncludeFile)
-check_include_file(unwind.h HAVE_UNWIND_H)
 
 include(config-ix)
 
 if(MSVC)
-  append_string_if(COMPILER_RT_HAS_W3_FLAG /W3 CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+  # Override any existing /W flags with /W4. This is what LLVM does.  Failing to
+  # remove other /W[0-4] flags will result in a warning about overriding a
+  # previous flag.
+  if (COMPILER_RT_HAS_W4_FLAG)
+    string(REGEX REPLACE " /W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+    string(REGEX REPLACE " /W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+    append_string_if(COMPILER_RT_HAS_W4_FLAG /W4 CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+  endif()
 else()
   append_string_if(COMPILER_RT_HAS_WALL_FLAG -Wall CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
 endif()
@@ -229,7 +119,9 @@
 append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC SANITIZER_COMMON_CFLAGS)
 append_list_if(COMPILER_RT_HAS_FNO_BUILTIN_FLAG -fno-builtin SANITIZER_COMMON_CFLAGS)
 append_list_if(COMPILER_RT_HAS_FNO_EXCEPTIONS_FLAG -fno-exceptions SANITIZER_COMMON_CFLAGS)
-append_list_if(COMPILER_RT_HAS_FOMIT_FRAME_POINTER_FLAG -fomit-frame-pointer SANITIZER_COMMON_CFLAGS)
+if(NOT COMPILER_RT_DEBUG)
+  append_list_if(COMPILER_RT_HAS_FOMIT_FRAME_POINTER_FLAG -fomit-frame-pointer SANITIZER_COMMON_CFLAGS)
+endif()
 append_list_if(COMPILER_RT_HAS_FUNWIND_TABLES_FLAG -funwind-tables SANITIZER_COMMON_CFLAGS)
 append_list_if(COMPILER_RT_HAS_FNO_STACK_PROTECTOR_FLAG -fno-stack-protector SANITIZER_COMMON_CFLAGS)
 append_list_if(COMPILER_RT_HAS_FNO_SANITIZE_SAFE_STACK_FLAG -fno-sanitize=safe-stack SANITIZER_COMMON_CFLAGS)
@@ -244,6 +136,8 @@
   # FIXME: In fact, sanitizers should support both /MT and /MD, see PR20214.
   if(COMPILER_RT_HAS_MT_FLAG)
     foreach(flag_var
+      CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
+      CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
       CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
       CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
       string(REGEX REPLACE "/M[DT]d" "/MT" ${flag_var} "${${flag_var}}")
@@ -253,6 +147,12 @@
   endif()
   append_list_if(COMPILER_RT_HAS_Oy_FLAG /Oy- SANITIZER_COMMON_CFLAGS)
   append_list_if(COMPILER_RT_HAS_GS_FLAG /GS- SANITIZER_COMMON_CFLAGS)
+  # VS 2015 (version 1900) added support for thread safe static initialization.
+  # However, ASan interceptors run before CRT initialization, which causes the
+  # new thread safe code to crash. Disable this feature for now.
+  if (MSVC_VERSION GREATER 1899 OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+    list(APPEND SANITIZER_COMMON_CFLAGS /Zc:threadSafeInit-)
+  endif()
 endif()
 
 append_list_if(COMPILER_RT_DEBUG -DSANITIZER_DEBUG=1 SANITIZER_COMMON_CFLAGS)
@@ -280,8 +180,14 @@
   list(APPEND SANITIZER_COMMON_CFLAGS -gline-tables-only)
 elseif(COMPILER_RT_HAS_G_FLAG)
   list(APPEND SANITIZER_COMMON_CFLAGS -g)
-elseif(COMPILER_RT_HAS_Zi_FLAG)
-  list(APPEND SANITIZER_COMMON_CFLAGS /Zi)
+elseif(MSVC)
+  # Use /Z7 instead of /Zi for the asan runtime. This avoids the LNK4099
+  # warning from the MS linker complaining that it can't find the 'vc140.pdb'
+  # file used by our object library compilations.
+  list(APPEND SANITIZER_COMMON_CFLAGS /Z7)
+  llvm_replace_compiler_option(CMAKE_CXX_FLAGS "/Z[i7I]" "/Z7")
+  llvm_replace_compiler_option(CMAKE_CXX_FLAGS_DEBUG "/Z[i7I]" "/Z7")
+  llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Z[i7I]" "/Z7")
 endif()
 
 # Turn off several warnings.
@@ -295,10 +201,15 @@
 append_list_if(COMPILER_RT_HAS_WD4722_FLAG /wd4722 SANITIZER_COMMON_CFLAGS)
 append_list_if(COMPILER_RT_HAS_WD4800_FLAG /wd4800 SANITIZER_COMMON_CFLAGS)
 
+# Warnings to turn off for all libraries, not just sanitizers.
+append_string_if(COMPILER_RT_HAS_WUNUSED_PARAMETER_FLAG -Wno-unused-parameter CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+
 if(APPLE AND SANITIZER_MIN_OSX_VERSION VERSION_LESS "10.9")
   # Mac OS X prior to 10.9 had problems with exporting symbols from
   # libc++/libc++abi.
   set(SANITIZER_CAN_USE_CXXABI FALSE)
+elseif(MSVC)
+  set(SANITIZER_CAN_USE_CXXABI FALSE)
 else()
   set(SANITIZER_CAN_USE_CXXABI TRUE)
 endif()
diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT
index 2159ad7..1254878 100644
--- a/CODE_OWNERS.TXT
+++ b/CODE_OWNERS.TXT
@@ -24,10 +24,6 @@
 E: howard.hinnant@gmail.com
 D: builtins library
 
-N: Sergey Matveev
-E: earthdok@google.com
-D: LeakSanitizer
-
 N: Alexander Potapenko
 E: glider@google.com
 D: MacOS/iOS port of sanitizers
@@ -38,7 +34,7 @@
 
 N: Kostya Serebryany
 E: kcc@google.com
-D: AddressSanitizer, sanitizer_common, porting sanitizers to another platforms
+D: AddressSanitizer, sanitizer_common, porting sanitizers to another platforms, LeakSanitizer
 
 N: Richard Smith
 E: richard-llvm@metafoo.co.uk
diff --git a/README.txt b/README.txt
index fc88432..2d64f00 100644
--- a/README.txt
+++ b/README.txt
@@ -8,4 +8,3 @@
 terms of the license agreement found in LICENSE.txt.
 
 ================================
-
diff --git a/cmake/Modules/AddCompilerRT.cmake b/cmake/Modules/AddCompilerRT.cmake
index feeb3ff..3342248 100644
--- a/cmake/Modules/AddCompilerRT.cmake
+++ b/cmake/Modules/AddCompilerRT.cmake
@@ -1,7 +1,28 @@
-include(AddLLVM)
 include(ExternalProject)
 include(CompilerRTUtils)
 
+function(set_target_output_directories target output_dir)
+  # For RUNTIME_OUTPUT_DIRECTORY variable, Multi-configuration generators
+  # append a per-configuration subdirectory to the specified directory.
+  # To avoid the appended folder, the configuration specific variable must be
+  # set 'RUNTIME_OUTPUT_DIRECTORY_${CONF}':
+  # RUNTIME_OUTPUT_DIRECTORY_DEBUG, RUNTIME_OUTPUT_DIRECTORY_RELEASE, ...
+  if(CMAKE_CONFIGURATION_TYPES)
+    foreach(build_mode ${CMAKE_CONFIGURATION_TYPES})
+      string(TOUPPER "${build_mode}" CONFIG_SUFFIX)
+      set_target_properties("${target}" PROPERTIES
+          "ARCHIVE_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${output_dir}
+          "LIBRARY_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${output_dir}
+          "RUNTIME_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${output_dir})
+    endforeach()
+  else()
+    set_target_properties("${target}" PROPERTIES
+        ARCHIVE_OUTPUT_DIRECTORY ${output_dir}
+        LIBRARY_OUTPUT_DIRECTORY ${output_dir}
+        RUNTIME_OUTPUT_DIRECTORY ${output_dir})
+  endif()
+endfunction()
+
 # Tries to add an "object library" target for a given list of OSs and/or
 # architectures with name "<name>.<arch>" for non-Darwin platforms if
 # architecture can be targeted, and "<name>.<os>" for Darwin platforms.
@@ -19,7 +40,7 @@
       set(libname "${name}.${os}")
       set(libnames ${libnames} ${libname})
       set(extra_cflags_${libname} ${DARWIN_${os}_CFLAGS})
-      list_union(LIB_ARCHS_${libname} DARWIN_${os}_ARCHS LIB_ARCHS)
+      list_intersect(LIB_ARCHS_${libname} DARWIN_${os}_ARCHS LIB_ARCHS)
     endforeach()
   else()
     foreach(arch ${LIB_ARCHS})
@@ -32,13 +53,14 @@
       endif()
     endforeach()
   endif()
-  
+
   foreach(libname ${libnames})
     add_library(${libname} OBJECT ${LIB_SOURCES})
     set_target_compile_flags(${libname}
       ${CMAKE_CXX_FLAGS} ${extra_cflags_${libname}} ${LIB_CFLAGS})
     set_property(TARGET ${libname} APPEND PROPERTY
       COMPILE_DEFINITIONS ${LIB_DEFS})
+    set_target_properties(${libname} PROPERTIES FOLDER "Compiler-RT Libraries")
     if(APPLE)
       set_target_properties(${libname} PROPERTIES
         OSX_ARCHITECTURES "${LIB_ARCHS_${libname}}")
@@ -87,7 +109,7 @@
         set(libname "${name}_${os}_dynamic")
         set(extra_linkflags_${libname} ${DARWIN_${os}_LINKFLAGS} ${LIB_LINKFLAGS})
       endif()
-      list_union(LIB_ARCHS_${libname} DARWIN_${os}_ARCHS LIB_ARCHS)
+      list_intersect(LIB_ARCHS_${libname} DARWIN_${os}_ARCHS LIB_ARCHS)
       if(LIB_ARCHS_${libname})
         list(APPEND libnames ${libname})
         set(extra_cflags_${libname} ${DARWIN_${os}_CFLAGS} ${LIB_CFLAGS})
@@ -107,7 +129,8 @@
         set(output_name_${libname} ${libname}${COMPILER_RT_OS_SUFFIX})
       else()
         set(libname "${name}-dynamic-${arch}")
-        set(extra_linkflags_${libname} ${TARGET_${arch}_CFLAGS} ${LIB_CFLAGS} ${LIB_LINKFLAGS})
+        set(extra_cflags_${libname} ${TARGET_${arch}_CFLAGS} ${LIB_CFLAGS})
+        set(extra_linkflags_${libname} ${TARGET_${arch}_LINKFLAGS} ${LIB_LINKFLAGS})
         if(WIN32)
           set(output_name_${libname} ${name}_dynamic-${arch}${COMPILER_RT_OS_SUFFIX})
         else()
@@ -126,21 +149,42 @@
   endif()
 
   if(LIB_PARENT_TARGET)
-    set(COMPONENT_OPTION COMPONENT ${LIB_PARENT_TARGET})
+    # If the parent targets aren't created we should create them
+    if(NOT TARGET ${LIB_PARENT_TARGET})
+      add_custom_target(${LIB_PARENT_TARGET})
+    endif()
+    if(NOT TARGET install-${LIB_PARENT_TARGET})
+      # The parent install target specifies the parent component to scrape up
+      # anything not installed by the individual install targets, and to handle
+      # installation when running the multi-configuration generators.
+      add_custom_target(install-${LIB_PARENT_TARGET}
+                        DEPENDS ${LIB_PARENT_TARGET}
+                        COMMAND "${CMAKE_COMMAND}"
+                                -DCMAKE_INSTALL_COMPONENT=${LIB_PARENT_TARGET}
+                                -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+      set_target_properties(install-${LIB_PARENT_TARGET} PROPERTIES
+                            FOLDER "Compiler-RT Misc")
+    endif()
   endif()
 
   foreach(libname ${libnames})
+    # If you are using a multi-configuration generator we don't generate
+    # per-library install rules, so we fall back to the parent target COMPONENT
+    if(CMAKE_CONFIGURATION_TYPES AND LIB_PARENT_TARGET)
+      set(COMPONENT_OPTION COMPONENT ${LIB_PARENT_TARGET})
+    else()
+      set(COMPONENT_OPTION COMPONENT ${libname})
+    endif()
+
     add_library(${libname} ${type} ${sources_${libname}})
     set_target_compile_flags(${libname} ${extra_cflags_${libname}})
     set_target_link_flags(${libname} ${extra_linkflags_${libname}})
-    set_property(TARGET ${libname} APPEND PROPERTY 
+    set_property(TARGET ${libname} APPEND PROPERTY
                 COMPILE_DEFINITIONS ${LIB_DEFS})
-    set_target_properties(${libname} PROPERTIES
-        ARCHIVE_OUTPUT_DIRECTORY ${COMPILER_RT_LIBRARY_OUTPUT_DIR}
-        LIBRARY_OUTPUT_DIRECTORY ${COMPILER_RT_LIBRARY_OUTPUT_DIR}
-        RUNTIME_OUTPUT_DIRECTORY ${COMPILER_RT_LIBRARY_OUTPUT_DIR})
+    set_target_output_directories(${libname} ${COMPILER_RT_LIBRARY_OUTPUT_DIR})
     set_target_properties(${libname} PROPERTIES
         OUTPUT_NAME ${output_name_${libname}})
+    set_target_properties(${libname} PROPERTIES FOLDER "Compiler-RT Runtime")
     if(LIB_LINK_LIBS AND ${type} STREQUAL "SHARED")
       target_link_libraries(${libname} ${LIB_LINK_LIBS})
     endif()
@@ -151,6 +195,21 @@
               ${COMPONENT_OPTION}
       RUNTIME DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR}
               ${COMPONENT_OPTION})
+
+    # We only want to generate per-library install targets if you aren't using
+    # an IDE because the extra targets get cluttered in IDEs.
+    if(NOT CMAKE_CONFIGURATION_TYPES)
+      add_custom_target(install-${libname}
+                        DEPENDS ${libname}
+                        COMMAND "${CMAKE_COMMAND}"
+                                -DCMAKE_INSTALL_COMPONENT=${libname}
+                                -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+      # If you have a parent target specified, we bind the new install target
+      # to the parent install target.
+      if(LIB_PARENT_TARGET)
+        add_dependencies(install-${LIB_PARENT_TARGET} install-${libname})
+      endif()
+    endif()
     if(APPLE)
       set_target_properties(${libname} PROPERTIES
       OSX_ARCHITECTURES "${LIB_ARCHS_${libname}}")
@@ -212,14 +271,18 @@
 #                      LINK_FLAGS <link flags>)
 macro(add_compiler_rt_test test_suite test_name)
   cmake_parse_arguments(TEST "" "SUBDIR" "OBJECTS;DEPS;LINK_FLAGS" "" ${ARGN})
+  set(output_bin ${CMAKE_CURRENT_BINARY_DIR})
   if(TEST_SUBDIR)
-    set(output_bin "${CMAKE_CURRENT_BINARY_DIR}/${TEST_SUBDIR}/${test_name}")
-  else()
-    set(output_bin "${CMAKE_CURRENT_BINARY_DIR}/${test_name}")
+    set(output_bin "${output_bin}/${TEST_SUBDIR}")
   endif()
+  if(CMAKE_CONFIGURATION_TYPES)
+    set(output_bin "${output_bin}/${CMAKE_CFG_INTDIR}")
+  endif()
+  set(output_bin "${output_bin}/${test_name}")
   if(MSVC)
     set(output_bin "${output_bin}.exe")
   endif()
+
   # Use host compiler in a standalone build, and just-built Clang otherwise.
   if(NOT COMPILER_RT_STANDALONE_BUILD)
     list(APPEND TEST_DEPS clang)
@@ -239,11 +302,13 @@
             -o "${output_bin}"
             ${TEST_LINK_FLAGS}
     DEPENDS ${TEST_DEPS})
+  set_target_properties(${test_name} PROPERTIES FOLDER "Compiler-RT Tests")
+
   # Make the test suite depend on the binary.
   add_dependencies(${test_suite} ${test_name})
 endmacro()
 
-macro(add_compiler_rt_resource_file target_name file_name)
+macro(add_compiler_rt_resource_file target_name file_name component)
   set(src_file "${CMAKE_CURRENT_SOURCE_DIR}/${file_name}")
   set(dst_file "${COMPILER_RT_OUTPUT_DIR}/${file_name}")
   add_custom_command(OUTPUT ${dst_file}
@@ -252,7 +317,12 @@
     COMMENT "Copying ${file_name}...")
   add_custom_target(${target_name} DEPENDS ${dst_file})
   # Install in Clang resource directory.
-  install(FILES ${file_name} DESTINATION ${COMPILER_RT_INSTALL_PATH})
+  install(FILES ${file_name}
+    DESTINATION ${COMPILER_RT_INSTALL_PATH}
+    COMPONENT ${component})
+  add_dependencies(${component} ${target_name})
+
+  set_target_properties(${target_name} PROPERTIES FOLDER "Compiler-RT Misc")
 endmacro()
 
 macro(add_compiler_rt_script name)
@@ -293,11 +363,12 @@
     SOURCE_DIR ${COMPILER_RT_LIBCXX_PATH}
     CMAKE_ARGS -DCMAKE_MAKE_PROGRAM:STRING=${CMAKE_MAKE_PROGRAM}
                -DCMAKE_C_COMPILER=${COMPILER_RT_TEST_COMPILER}
-               -DCMAKE_CXX_COMPILER=${COMPILER_RT_TEST_COMPILER}
+               -DCMAKE_CXX_COMPILER=${COMPILER_RT_TEST_CXX_COMPILER}
                -DCMAKE_C_FLAGS=${LIBCXX_CFLAGS}
                -DCMAKE_CXX_FLAGS=${LIBCXX_CFLAGS}
                -DCMAKE_BUILD_TYPE=Release
                -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
+               -DLLVM_PATH=${LLVM_MAIN_SRC_DIR}
     LOG_BUILD 1
     LOG_CONFIGURE 1
     LOG_INSTALL 1
diff --git a/cmake/Modules/BuiltinTests.cmake b/cmake/Modules/BuiltinTests.cmake
new file mode 100644
index 0000000..6bbf449
--- /dev/null
+++ b/cmake/Modules/BuiltinTests.cmake
@@ -0,0 +1,91 @@
+include(CMakeCheckCompilerFlagCommonPatterns)
+
+# This function takes an OS and a list of architectures and identifies the
+# subset of the architectures list that the installed toolchain can target.
+function(try_compile_only output)
+  cmake_parse_arguments(ARG "" "" "SOURCE;FLAGS" ${ARGN})
+  if(NOT ARG_SOURCE)
+    set(ARG_SOURCE "int foo(int x, int y) { return x + y; }\n")
+  endif()
+  set(SIMPLE_C ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/src.c)
+  file(WRITE ${SIMPLE_C} "${ARG_SOURCE}\n")
+  string(REGEX MATCHALL "<[A-Za-z0-9_]*>" substitutions
+         ${CMAKE_C_COMPILE_OBJECT})
+  string(REPLACE ";" " " extra_flags "${ARG_FLAGS}")
+
+  set(test_compile_command "${CMAKE_C_COMPILE_OBJECT}")
+  foreach(substitution ${substitutions})
+    if(substitution STREQUAL "<CMAKE_C_COMPILER>")
+      string(REPLACE "<CMAKE_C_COMPILER>"
+             "${CMAKE_C_COMPILER}" test_compile_command ${test_compile_command})
+    elseif(substitution STREQUAL "<OBJECT>")
+      string(REPLACE "<OBJECT>"
+             "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/test.o"
+             test_compile_command ${test_compile_command})
+    elseif(substitution STREQUAL "<SOURCE>")
+      string(REPLACE "<SOURCE>" "${SIMPLE_C}" test_compile_command
+             ${test_compile_command})
+    elseif(substitution STREQUAL "<FLAGS>")
+      string(REPLACE "<FLAGS>" "${CMAKE_C_FLAGS} ${extra_flags}"
+             test_compile_command ${test_compile_command})
+    else()
+      string(REPLACE "${substitution}" "" test_compile_command
+             ${test_compile_command})
+    endif()
+  endforeach()
+
+  string(REPLACE " " ";" test_compile_command "${test_compile_command}")
+
+  execute_process(
+    COMMAND ${test_compile_command}
+    RESULT_VARIABLE result
+    OUTPUT_VARIABLE TEST_OUTPUT
+    ERROR_VARIABLE TEST_ERROR
+  )
+
+  CHECK_COMPILER_FLAG_COMMON_PATTERNS(_CheckCCompilerFlag_COMMON_PATTERNS)
+  foreach(var ${_CheckCCompilerFlag_COMMON_PATTERNS})
+    if("${var}" STREQUAL "FAIL_REGEX")
+      continue()
+    endif()
+    if("${var}" MATCHES "${_CheckCCompilerFlag_COMMON_PATTERNS}")
+      set(ERRORS_FOUND True)
+    endif()
+  endforeach()
+
+  if(result EQUAL 0 AND NOT ERRORS_FOUND)
+    set(${output} True PARENT_SCOPE)
+  else()
+    file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
+        "Testing compiler for supporting " ${ARGN} ":\n"
+        "Command: ${test_compile_command}\n"
+        "${TEST_OUTPUT}\n${TEST_ERROR}\n${result}\n")
+    set(${output} False PARENT_SCOPE)
+  endif()
+endfunction()
+
+function(builtin_check_c_compiler_flag flag output)
+  if(NOT DEFINED ${output})
+    message(STATUS "Performing Test ${output}")
+    try_compile_only(result FLAGS ${flag})
+    set(${output} ${result} CACHE INTERNAL "Compiler supports ${flag}")
+    if(${result})
+      message(STATUS "Performing Test ${output} - Success")
+    else()
+      message(STATUS "Performing Test ${output} - Failed")
+    endif()
+  endif()
+endfunction()
+
+function(builtin_check_c_compiler_source output source)
+  if(NOT DEFINED ${output})
+    message(STATUS "Performing Test ${output}")
+    try_compile_only(result SOURCE ${source})
+    set(${output} ${result} CACHE INTERNAL "Compiler supports ${flag}")
+    if(${result})
+      message(STATUS "Performing Test ${output} - Success")
+    else()
+      message(STATUS "Performing Test ${output} - Failed")
+    endif()
+  endif()
+endfunction()
diff --git a/cmake/Modules/CompilerRTCompile.cmake b/cmake/Modules/CompilerRTCompile.cmake
index 48f40bf..30663b6 100644
--- a/cmake/Modules/CompilerRTCompile.cmake
+++ b/cmake/Modules/CompilerRTCompile.cmake
@@ -90,8 +90,8 @@
       "  fi"
       "  echo 'This can also be fixed by checking out the libcxx project from llvm.org and installing the headers'"
       "  echo 'into your build directory:'"
-      "  echo '  cd ${LLVM_SOURCE_DIR}/projects && svn co http://llvm.org/svn/llvm-project/libcxx/trunk libcxx'"
-      "  echo '  cd ${LLVM_BINARY_DIR} && make -C ${LLVM_SOURCE_DIR}/projects/libcxx installheaders HEADER_DIR=${LLVM_BINARY_DIR}/include'"
+      "  echo '  cd ${LLVM_MAIN_SRC_DIR}/projects && svn co http://llvm.org/svn/llvm-project/libcxx/trunk libcxx'"
+      "  echo '  cd ${LLVM_BINARY_DIR} && make -C ${LLVM_MAIN_SRC_DIR}/projects/libcxx installheaders HEADER_DIR=${LLVM_BINARY_DIR}/include'"
       "  echo"
       "  false"
       "fi"
diff --git a/cmake/Modules/CompilerRTDarwinUtils.cmake b/cmake/Modules/CompilerRTDarwinUtils.cmake
index 8be28d9..fd19ff9 100644
--- a/cmake/Modules/CompilerRTDarwinUtils.cmake
+++ b/cmake/Modules/CompilerRTDarwinUtils.cmake
@@ -1,3 +1,5 @@
+include(CMakeParseArguments)
+
 # On OS X SDKs can be installed anywhere on the base system and xcode-select can
 # set the default Xcode to use. This function finds the SDKs that are present in
 # the current Xcode.
@@ -16,6 +18,8 @@
       OUTPUT_STRIP_TRAILING_WHITESPACE
       ERROR_FILE /dev/null
     )
+  else()
+    set(${var}_INTERNAL ${var_internal} PARENT_SCOPE)
   endif()
   set(${var} ${var_internal} PARENT_SCOPE)
 endfunction()
@@ -52,30 +56,36 @@
   endif()
 
   set(archs ${ARGN})
-  message(STATUS "Finding valid architectures for ${os}...")
-  set(SIMPLE_CPP ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/src.cpp)
-  file(WRITE ${SIMPLE_CPP} "#include <iostream>\nint main() { std::cout << std::endl; return 0; }\n")
-
-  set(os_linker_flags)
-  foreach(flag ${DARWIN_${os}_LINKFLAGS})
-    set(os_linker_flags "${os_linker_flags} ${flag}")
-  endforeach()
+  if(NOT TEST_COMPILE_ONLY)
+    message(STATUS "Finding valid architectures for ${os}...")
+    set(SIMPLE_C ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/src.c)
+    file(WRITE ${SIMPLE_C} "#include <stdio.h>\nint main() { printf(__FILE__); return 0; }\n")
+  
+    set(os_linker_flags)
+    foreach(flag ${DARWIN_${os}_LINKFLAGS})
+      set(os_linker_flags "${os_linker_flags} ${flag}")
+    endforeach()
+  endif()
 
   # The simple program will build for x86_64h on the simulator because it is 
   # compatible with x86_64 libraries (mostly), but since x86_64h isn't actually
   # a valid or useful architecture for the iOS simulator we should drop it.
-  if(${os} STREQUAL "iossim")
+  if(${os} MATCHES "^(iossim|tvossim|watchossim)$")
     list(REMOVE_ITEM archs "x86_64h")
   endif()
 
   set(working_archs)
   foreach(arch ${archs})
-    
+   
     set(arch_linker_flags "-arch ${arch} ${os_linker_flags}")
-    try_compile(CAN_TARGET_${os}_${arch} ${CMAKE_BINARY_DIR} ${SIMPLE_CPP}
-                COMPILE_DEFINITIONS "-v -arch ${arch}" ${DARWIN_${os}_CFLAGS}
-                CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS=${arch_linker_flags}"
-                OUTPUT_VARIABLE TEST_OUTPUT)
+    if(TEST_COMPILE_ONLY)
+      try_compile_only(CAN_TARGET_${os}_${arch} -v -arch ${arch} ${DARWIN_${os}_CFLAGS})
+    else()
+      try_compile(CAN_TARGET_${os}_${arch} ${CMAKE_BINARY_DIR} ${SIMPLE_C}
+                  COMPILE_DEFINITIONS "-v -arch ${arch}" ${DARWIN_${os}_CFLAGS}
+                  CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS=${arch_linker_flags}"
+                  OUTPUT_VARIABLE TEST_OUTPUT)
+    endif()
     if(${CAN_TARGET_${os}_${arch}})
       list(APPEND working_archs ${arch})
     else()
@@ -91,7 +101,7 @@
 # This function checks the host cpusubtype to see if it is post-haswell. Haswell
 # and later machines can run x86_64h binaries. Haswell is cpusubtype 8.
 function(darwin_filter_host_archs input output)
-  list_union(tmp_var DARWIN_osx_ARCHS ${input})
+  list_intersect(tmp_var DARWIN_osx_ARCHS ${input})
   execute_process(
     COMMAND sysctl hw.cpusubtype
     OUTPUT_VARIABLE SUBTYPE)
@@ -285,7 +295,7 @@
                       ../profile/InstrProfilingPlatformDarwin
                       ../profile/InstrProfilingWriter)
   foreach (os ${ARGN})
-    list_union(DARWIN_BUILTIN_ARCHS DARWIN_${os}_ARCHS BUILTIN_SUPPORTED_ARCH)
+    list_intersect(DARWIN_BUILTIN_ARCHS DARWIN_${os}_ARCHS BUILTIN_SUPPORTED_ARCH)
     foreach (arch ${DARWIN_BUILTIN_ARCHS})
       darwin_find_excluded_builtins_list(${arch}_${os}_EXCLUDED_BUILTINS
                               OS ${os}
diff --git a/cmake/Modules/CompilerRTUtils.cmake b/cmake/Modules/CompilerRTUtils.cmake
index cf690f4..2569297 100644
--- a/cmake/Modules/CompilerRTUtils.cmake
+++ b/cmake/Modules/CompilerRTUtils.cmake
@@ -1,3 +1,6 @@
+include(CMakePushCheckState)
+include(CheckSymbolExists)
+
 # Because compiler-rt spends a lot of time setting up custom compile flags,
 # define a handy helper function for it. The compile flags setting in CMake
 # has serious issues that make its syntax challenging at best.
@@ -45,9 +48,14 @@
   endif()
 endmacro()
 
-macro(append_no_rtti_flag list)
-  append_list_if(COMPILER_RT_HAS_FNO_RTTI_FLAG -fno-rtti ${list})
-  append_list_if(COMPILER_RT_HAS_GR_FLAG /GR- ${list})
+macro(append_rtti_flag polarity list)
+  if(polarity)
+    append_list_if(COMPILER_RT_HAS_FRTTI_FLAG -frtti ${list})
+    append_list_if(COMPILER_RT_HAS_GR_FLAG /GR ${list})
+  else()
+    append_list_if(COMPILER_RT_HAS_FNO_RTTI_FLAG -fno-rtti ${list})
+    append_list_if(COMPILER_RT_HAS_GR_FLAG /GR- ${list})
+  endif()
 endmacro()
 
 macro(append_have_file_definition filename varname list)
@@ -58,7 +66,7 @@
   list(APPEND ${list} "${varname}=${${varname}}")
 endmacro()
 
-macro(list_union output input1 input2)
+macro(list_intersect output input1 input2)
   set(${output})
   foreach(it ${${input1}})
     list(FIND ${input2} ${it} index)
@@ -67,3 +75,150 @@
     endif()
   endforeach()
 endmacro()
+
+# Takes ${ARGN} and puts only supported architectures in @out_var list.
+function(filter_available_targets out_var)
+  set(archs ${${out_var}})
+  foreach(arch ${ARGN})
+    list(FIND COMPILER_RT_SUPPORTED_ARCH ${arch} ARCH_INDEX)
+    if(NOT (ARCH_INDEX EQUAL -1) AND CAN_TARGET_${arch})
+      list(APPEND archs ${arch})
+    endif()
+  endforeach()
+  set(${out_var} ${archs} PARENT_SCOPE)
+endfunction()
+
+function(check_compile_definition def argstring out_var)
+  if("${def}" STREQUAL "")
+    set(${out_var} TRUE PARENT_SCOPE)
+    return()
+  endif()
+  cmake_push_check_state()
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${argstring}")
+  check_symbol_exists(${def} "" ${out_var})
+  cmake_pop_check_state()
+endfunction()
+
+# test_target_arch(<arch> <def> <target flags...>)
+# Checks if architecture is supported: runs host compiler with provided
+# flags to verify that:
+#   1) <def> is defined (if non-empty)
+#   2) simple file can be successfully built.
+# If successful, saves target flags for this architecture.
+macro(test_target_arch arch def)
+  set(TARGET_${arch}_CFLAGS ${ARGN})
+  set(TARGET_${arch}_LINKFLAGS ${ARGN})
+  set(argstring "")
+  foreach(arg ${ARGN})
+    set(argstring "${argstring} ${arg}")
+  endforeach()
+  check_compile_definition("${def}" "${argstring}" HAS_${arch}_DEF)
+  if(NOT DEFINED CAN_TARGET_${arch})
+    if(NOT HAS_${arch}_DEF)
+      set(CAN_TARGET_${arch} FALSE)
+    elseif(TEST_COMPILE_ONLY)
+      try_compile_only(CAN_TARGET_${arch} ${TARGET_${arch}_CFLAGS})
+    else()
+      set(argstring "${CMAKE_EXE_LINKER_FLAGS} ${argstring}")
+      try_compile(CAN_TARGET_${arch} ${CMAKE_BINARY_DIR} ${SIMPLE_SOURCE}
+                  COMPILE_DEFINITIONS "${TARGET_${arch}_CFLAGS}"
+                  OUTPUT_VARIABLE TARGET_${arch}_OUTPUT
+                  CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS:STRING=${argstring}")
+    endif()
+  endif()
+  if(${CAN_TARGET_${arch}})
+    list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
+  elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" STREQUAL "${arch}" AND
+         COMPILER_RT_HAS_EXPLICIT_DEFAULT_TARGET_TRIPLE)
+    # Bail out if we cannot target the architecture we plan to test.
+    message(FATAL_ERROR "Cannot compile for ${arch}:\n${TARGET_${arch}_OUTPUT}")
+  endif()
+endmacro()
+
+macro(detect_target_arch)
+  check_symbol_exists(__arm__ "" __ARM)
+  check_symbol_exists(__aarch64__ "" __AARCH64)
+  check_symbol_exists(__x86_64__ "" __X86_64)
+  check_symbol_exists(__i686__ "" __I686)
+  check_symbol_exists(__i386__ "" __I386)
+  check_symbol_exists(__mips__ "" __MIPS)
+  check_symbol_exists(__mips64__ "" __MIPS64)
+  check_symbol_exists(__s390x__ "" __S390X)
+  check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
+  check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
+  if(__ARM)
+    add_default_target_arch(arm)
+  elseif(__AARCH64)
+    add_default_target_arch(aarch64)
+  elseif(__X86_64)
+    add_default_target_arch(x86_64)
+  elseif(__I686)
+    add_default_target_arch(i686)
+  elseif(__I386)
+    add_default_target_arch(i386)
+  elseif(__MIPS64) # must be checked before __MIPS
+    add_default_target_arch(mips64)
+  elseif(__MIPS)
+    add_default_target_arch(mips)
+  elseif(__S390X)
+    add_default_target_arch(s390x)
+  elseif(__WEBASSEMBLY32)
+    add_default_target_arch(wasm32)
+  elseif(__WEBASSEMBLY64)
+    add_default_target_arch(wasm64)
+  endif()
+endmacro()
+
+macro(load_llvm_config)
+  if (NOT LLVM_CONFIG_PATH)
+    find_program(LLVM_CONFIG_PATH "llvm-config"
+                 DOC "Path to llvm-config binary")
+    if (NOT LLVM_CONFIG_PATH)
+      message(FATAL_ERROR "llvm-config not found: specify LLVM_CONFIG_PATH")
+    endif()
+  endif()
+  execute_process(
+    COMMAND ${LLVM_CONFIG_PATH} "--obj-root" "--bindir" "--libdir" "--src-root"
+    RESULT_VARIABLE HAD_ERROR
+    OUTPUT_VARIABLE CONFIG_OUTPUT)
+  if (HAD_ERROR)
+    message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}")
+  endif()
+  string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";" CONFIG_OUTPUT ${CONFIG_OUTPUT})
+  list(GET CONFIG_OUTPUT 0 LLVM_BINARY_DIR)
+  list(GET CONFIG_OUTPUT 1 LLVM_TOOLS_BINARY_DIR)
+  list(GET CONFIG_OUTPUT 2 LLVM_LIBRARY_DIR)
+  list(GET CONFIG_OUTPUT 3 LLVM_MAIN_SRC_DIR)
+
+  # Make use of LLVM CMake modules.
+  file(TO_CMAKE_PATH ${LLVM_BINARY_DIR} LLVM_BINARY_DIR_CMAKE_STYLE)
+  set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm")
+  list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}")
+  # Get some LLVM variables from LLVMConfig.
+  include("${LLVM_CMAKE_PATH}/LLVMConfig.cmake")
+
+  set(LLVM_LIBRARY_OUTPUT_INTDIR
+    ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
+endmacro()
+
+macro(construct_compiler_rt_default_triple)
+  set(COMPILER_RT_DEFAULT_TARGET_TRIPLE ${TARGET_TRIPLE} CACHE STRING
+      "Default triple for which compiler-rt runtimes will be built.")
+  if(DEFINED COMPILER_RT_TEST_TARGET_TRIPLE)
+    # Backwards compatibility: this variable used to be called
+    # COMPILER_RT_TEST_TARGET_TRIPLE.
+    set(COMPILER_RT_DEFAULT_TARGET_TRIPLE ${COMPILER_RT_TEST_TARGET_TRIPLE})
+  endif()
+
+  string(REPLACE "-" ";" TARGET_TRIPLE_LIST ${COMPILER_RT_DEFAULT_TARGET_TRIPLE})
+  list(GET TARGET_TRIPLE_LIST 0 COMPILER_RT_DEFAULT_TARGET_ARCH)
+  list(GET TARGET_TRIPLE_LIST 1 COMPILER_RT_DEFAULT_TARGET_OS)
+  list(GET TARGET_TRIPLE_LIST 2 COMPILER_RT_DEFAULT_TARGET_ABI)
+  # Determine if test target triple is specified explicitly, and doesn't match the
+  # default.
+  if(NOT COMPILER_RT_DEFAULT_TARGET_TRIPLE STREQUAL TARGET_TRIPLE)
+    set(COMPILER_RT_HAS_EXPLICIT_DEFAULT_TARGET_TRIPLE TRUE)
+  else()
+    set(COMPILER_RT_HAS_EXPLICIT_DEFAULT_TARGET_TRIPLE FALSE)
+  endif()
+endmacro()
diff --git a/cmake/Modules/SanitizerUtils.cmake b/cmake/Modules/SanitizerUtils.cmake
index 3eb49c8..c66083c 100644
--- a/cmake/Modules/SanitizerUtils.cmake
+++ b/cmake/Modules/SanitizerUtils.cmake
@@ -38,22 +38,8 @@
       DEPENDS ${stamp}
       SOURCES ${SANITIZER_GEN_DYNAMIC_LIST} ${ARG_EXTRA})
 
-    if(NOT CMAKE_VERSION VERSION_LESS 3.0)
-      install(FILES $<TARGET_FILE:${target_name}>.syms
-              DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR})
-    else()
-      # Per-config install location.
-      if(CMAKE_CONFIGURATION_TYPES)
-        foreach(c ${CMAKE_CONFIGURATION_TYPES})
-          get_target_property(libfile ${target_name} LOCATION_${c})
-          install(FILES ${libfile}.syms CONFIGURATIONS ${c}
+    install(FILES $<TARGET_FILE:${target_name}>.syms
             DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR})
-        endforeach()
-      else()
-        get_target_property(libfile ${target_name} LOCATION_${CMAKE_BUILD_TYPE})
-        install(FILES ${libfile}.syms DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR})
-      endif()
-    endif()
     if(ARG_PARENT_TARGET)
       add_dependencies(${ARG_PARENT_TARGET} ${target_name}-symbols)
     endif()
@@ -84,9 +70,9 @@
 endmacro()
 
 # Add target to check code style for sanitizer runtimes.
-if(UNIX)
+if(CMAKE_HOST_UNIX)
   add_custom_target(SanitizerLintCheck
-    COMMAND LLVM_CHECKOUT=${LLVM_MAIN_SRC_DIR} SILENT=1 TMPDIR=
+    COMMAND env LLVM_CHECKOUT=${LLVM_MAIN_SRC_DIR} SILENT=1 TMPDIR=
       PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}
       COMPILER_RT=${COMPILER_RT_SOURCE_DIR}
       ${SANITIZER_LINT_SCRIPT}
diff --git a/cmake/base-config-ix.cmake b/cmake/base-config-ix.cmake
new file mode 100644
index 0000000..4f3976d
--- /dev/null
+++ b/cmake/base-config-ix.cmake
@@ -0,0 +1,173 @@
+# The CompilerRT build system requires CMake version 2.8.8 or higher in order
+# to use its support for building convenience "libraries" as a collection of
+# .o files. This is particularly useful in producing larger, more complex
+# runtime libraries.
+
+include(CheckIncludeFile)
+check_include_file(unwind.h HAVE_UNWIND_H)
+
+# Top level target used to build all compiler-rt libraries.
+add_custom_target(compiler-rt ALL)
+set_target_properties(compiler-rt PROPERTIES FOLDER "Compiler-RT Misc")
+
+# Setting these variables from an LLVM build is sufficient that compiler-rt can
+# construct the output paths, so it can behave as if it were in-tree here.
+if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION)
+  set(LLVM_TREE_AVAILABLE On)
+endif()
+
+if (LLVM_TREE_AVAILABLE)
+  # Compute the Clang version from the LLVM version.
+  # FIXME: We should be able to reuse CLANG_VERSION variable calculated
+  #        in Clang cmake files, instead of copying the rules here.
+  string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION
+         ${PACKAGE_VERSION})
+  # Setup the paths where compiler-rt runtimes and headers should be stored.
+  set(COMPILER_RT_OUTPUT_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION})
+  set(COMPILER_RT_EXEC_OUTPUT_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR})
+  set(COMPILER_RT_INSTALL_PATH lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION})
+  option(COMPILER_RT_INCLUDE_TESTS "Generate and build compiler-rt unit tests."
+         ${LLVM_INCLUDE_TESTS})
+  option(COMPILER_RT_ENABLE_WERROR "Fail and stop if warning is triggered"
+         ${LLVM_ENABLE_WERROR})
+  # Use just-built Clang to compile/link tests on all platforms, except for
+  # Windows where we need to use clang-cl instead.
+  if(NOT MSVC)
+    set(COMPILER_RT_TEST_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
+    set(COMPILER_RT_TEST_CXX_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++)
+  else()
+    set(COMPILER_RT_TEST_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang.exe)
+    set(COMPILER_RT_TEST_CXX_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++.exe)
+  endif()
+else()
+    # Take output dir and install path from the user.
+  set(COMPILER_RT_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR} CACHE PATH
+    "Path where built compiler-rt libraries should be stored.")
+  set(COMPILER_RT_EXEC_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/bin CACHE PATH
+    "Path where built compiler-rt executables should be stored.")
+  set(COMPILER_RT_INSTALL_PATH ${CMAKE_INSTALL_PREFIX} CACHE PATH
+    "Path where built compiler-rt libraries should be installed.")
+  option(COMPILER_RT_INCLUDE_TESTS "Generate and build compiler-rt unit tests." OFF)
+  option(COMPILER_RT_ENABLE_WERROR "Fail and stop if warning is triggered" OFF)
+  # Use a host compiler to compile/link tests.
+  set(COMPILER_RT_TEST_COMPILER ${CMAKE_C_COMPILER} CACHE PATH "Compiler to use for testing")
+  set(COMPILER_RT_TEST_CXX_COMPILER ${CMAKE_CXX_COMPILER} CACHE PATH "C++ Compiler to use for testing")
+endif()
+
+if("${COMPILER_RT_TEST_COMPILER}" MATCHES "clang[+]*$")
+  set(COMPILER_RT_TEST_COMPILER_ID Clang)
+elseif("${COMPILER_RT_TEST_COMPILER}" MATCHES "clang.*.exe$")
+  set(COMPILER_RT_TEST_COMPILER_ID Clang)
+else()
+  set(COMPILER_RT_TEST_COMPILER_ID GNU)
+endif()
+
+string(TOLOWER ${CMAKE_SYSTEM_NAME} COMPILER_RT_OS_DIR)
+set(COMPILER_RT_LIBRARY_OUTPUT_DIR
+  ${COMPILER_RT_OUTPUT_DIR}/lib/${COMPILER_RT_OS_DIR})
+set(COMPILER_RT_LIBRARY_INSTALL_DIR
+  ${COMPILER_RT_INSTALL_PATH}/lib/${COMPILER_RT_OS_DIR})
+
+if(APPLE)
+  # On Darwin if /usr/include doesn't exist, the user probably has Xcode but not
+  # the command line tools. If this is the case, we need to find the OS X
+  # sysroot to pass to clang.
+  if(NOT EXISTS /usr/include)
+    execute_process(COMMAND xcodebuild -version -sdk macosx Path
+       OUTPUT_VARIABLE OSX_SYSROOT
+       ERROR_QUIET
+       OUTPUT_STRIP_TRAILING_WHITESPACE)
+    set(OSX_SYSROOT_FLAG "-isysroot${OSX_SYSROOT}")
+  endif()
+
+  option(COMPILER_RT_ENABLE_IOS "Enable building for iOS" Off)
+  option(COMPILER_RT_ENABLE_WATCHOS "Enable building for watchOS - Experimental" Off)
+  option(COMPILER_RT_ENABLE_TVOS "Enable building for tvOS - Experimental" Off)
+endif()
+
+macro(test_targets)
+  # Find and run MSVC (not clang-cl) and get its version. This will tell clang-cl
+  # what version of MSVC to pretend to be so that the STL works.
+  set(MSVC_VERSION_FLAG "")
+  if (MSVC)
+    # Find and run MSVC (not clang-cl) and get its version. This will tell
+    # clang-cl what version of MSVC to pretend to be so that the STL works.
+    execute_process(COMMAND "$ENV{VSINSTALLDIR}/VC/bin/cl.exe"
+      OUTPUT_QUIET
+      ERROR_VARIABLE MSVC_COMPAT_VERSION
+      )
+    string(REGEX REPLACE "^.*Compiler Version ([0-9.]+) for .*$" "\\1"
+      MSVC_COMPAT_VERSION "${MSVC_COMPAT_VERSION}")
+    if (MSVC_COMPAT_VERSION MATCHES "^[0-9].+$")
+      set(MSVC_VERSION_FLAG "-fms-compatibility-version=${MSVC_COMPAT_VERSION}")
+      # Add this flag into the host build if this is clang-cl.
+      if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+        append("${MSVC_VERSION_FLAG}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+      elseif (COMPILER_RT_TEST_COMPILER_ID MATCHES "Clang")
+        # Add this flag to test compiles to suppress clang's auto-detection
+        # logic.
+        append("${MSVC_VERSION_FLAG}" COMPILER_RT_TEST_COMPILER_CFLAGS)
+      endif()
+    endif()
+  endif()
+
+  # Generate the COMPILER_RT_SUPPORTED_ARCH list.
+  if(ANDROID)
+    # Examine compiler output to determine target architecture.
+    detect_target_arch()
+    set(COMPILER_RT_OS_SUFFIX "-android")
+  elseif(NOT APPLE) # Supported archs for Apple platforms are generated later
+    if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "i[2-6]86|x86|amd64")
+      if(NOT MSVC)
+        test_target_arch(x86_64 "" "-m64")
+        # FIXME: We build runtimes for both i686 and i386, as "clang -m32" may
+        # target different variant than "$CMAKE_C_COMPILER -m32". This part should
+        # be gone after we resolve PR14109.
+        test_target_arch(i686 __i686__ "-m32")
+        test_target_arch(i386 __i386__ "-m32")
+      else()
+        if (CMAKE_SIZEOF_VOID_P EQUAL 4)
+          test_target_arch(i386 "" "")
+        else()
+          test_target_arch(x86_64 "" "")
+        endif()
+      endif()
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc")
+      TEST_BIG_ENDIAN(HOST_IS_BIG_ENDIAN)
+      if(HOST_IS_BIG_ENDIAN)
+        test_target_arch(powerpc64 "" "-m64")
+      else()
+        test_target_arch(powerpc64le "" "-m64")
+      endif()
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "s390x")
+      test_target_arch(s390x "" "")
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "mipsel|mips64el")
+      # Gcc doesn't accept -m32/-m64 so we do the next best thing and use
+      # -mips32r2/-mips64r2. We don't use -mips1/-mips3 because we want to match
+      # clang's default CPU's. In the 64-bit case, we must also specify the ABI
+      # since the default ABI differs between gcc and clang.
+      # FIXME: Ideally, we would build the N32 library too.
+      test_target_arch(mipsel "" "-mips32r2" "--target=mipsel-linux-gnu")
+      test_target_arch(mips64el "" "-mips64r2" "--target=mips64el-linux-gnu" "-mabi=64")
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "mips")
+      test_target_arch(mips "" "-mips32r2" "--target=mips-linux-gnu")
+      test_target_arch(mips64 "" "-mips64r2" "--target=mips64-linux-gnu" "-mabi=64")
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm")
+      if(WIN32)
+        test_target_arch(arm "" "" "")
+      else()
+        test_target_arch(arm "" "-march=armv7-a" "-mfloat-abi=soft")
+        test_target_arch(armhf "" "-march=armv7-a" "-mfloat-abi=hard")
+      endif()
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "aarch32")
+      test_target_arch(aarch32 "" "-march=armv8-a")
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "aarch64")
+      test_target_arch(aarch64 "" "-march=armv8-a")
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "wasm32")
+      test_target_arch(wasm32 "" "--target=wasm32-unknown-unknown")
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "wasm64")
+      test_target_arch(wasm64 "" "--target=wasm64-unknown-unknown")
+    endif()
+    set(COMPILER_RT_OS_SUFFIX "")
+  endif()
+endmacro()
diff --git a/cmake/builtin-config-ix.cmake b/cmake/builtin-config-ix.cmake
new file mode 100644
index 0000000..da055b5
--- /dev/null
+++ b/cmake/builtin-config-ix.cmake
@@ -0,0 +1,179 @@
+include(BuiltinTests)
+include(CheckCSourceCompiles)
+
+# Make all the tests only check the compiler
+set(TEST_COMPILE_ONLY On)
+
+builtin_check_c_compiler_flag(-fPIC                 COMPILER_RT_HAS_FPIC_FLAG)
+builtin_check_c_compiler_flag(-fPIE                 COMPILER_RT_HAS_FPIE_FLAG)
+builtin_check_c_compiler_flag(-fno-builtin          COMPILER_RT_HAS_FNO_BUILTIN_FLAG)
+builtin_check_c_compiler_flag(-std=c99              COMPILER_RT_HAS_STD_C99_FLAG)
+builtin_check_c_compiler_flag(-fvisibility=hidden   COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG)
+builtin_check_c_compiler_flag(-fomit-frame-pointer  COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG)
+builtin_check_c_compiler_flag(-ffreestanding        COMPILER_RT_HAS_FREESTANDING_FLAG)
+builtin_check_c_compiler_flag(-mfloat-abi=soft      COMPILER_RT_HAS_FLOAT_ABI_SOFT_FLAG)
+builtin_check_c_compiler_flag(-mfloat-abi=hard      COMPILER_RT_HAS_FLOAT_ABI_HARD_FLAG)
+builtin_check_c_compiler_flag(-static               COMPILER_RT_HAS_STATIC_FLAG)
+
+builtin_check_c_compiler_source(COMPILER_RT_HAS_ATOMIC_KEYWORD
+"
+int foo(int x, int y) {
+ _Atomic int result = x * y;
+ return result;
+}
+")
+
+
+set(ARM64 aarch64)
+set(ARM32 arm armhf)
+set(X86 i386 i686)
+set(X86_64 x86_64)
+set(MIPS32 mips mipsel)
+set(MIPS64 mips64 mips64el)
+set(PPC64 powerpc64 powerpc64le)
+set(WASM32 wasm32)
+set(WASM64 wasm64)
+
+if(APPLE)
+  set(ARM64 arm64)
+  set(ARM32 armv7 armv7k armv7s)
+  set(X86_64 x86_64 x86_64h)
+endif()
+
+set(ALL_BUILTIN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
+    ${MIPS32} ${MIPS64} ${WASM32} ${WASM64})
+
+include(CompilerRTUtils)
+include(CompilerRTDarwinUtils)
+
+if(APPLE)
+
+  find_darwin_sdk_dir(DARWIN_osx_SYSROOT macosx)
+  find_darwin_sdk_dir(DARWIN_iossim_SYSROOT iphonesimulator)
+  find_darwin_sdk_dir(DARWIN_ios_SYSROOT iphoneos)
+  find_darwin_sdk_dir(DARWIN_watchossim_SYSROOT watchsimulator)
+  find_darwin_sdk_dir(DARWIN_watchos_SYSROOT watchos)
+  find_darwin_sdk_dir(DARWIN_tvossim_SYSROOT appletvsimulator)
+  find_darwin_sdk_dir(DARWIN_tvos_SYSROOT appletvos)
+
+  set(DARWIN_EMBEDDED_PLATFORMS)
+  set(DARWIN_osx_BUILTIN_MIN_VER 10.5)
+  set(DARWIN_osx_BUILTIN_MIN_VER_FLAG
+      -mmacosx-version-min=${DARWIN_osx_BUILTIN_MIN_VER})
+
+  if(COMPILER_RT_ENABLE_IOS)
+    list(APPEND DARWIN_EMBEDDED_PLATFORMS ios)
+    set(DARWIN_ios_MIN_VER_FLAG -miphoneos-version-min)
+    set(DARWIN_ios_BUILTIN_MIN_VER 6.0)
+    set(DARWIN_ios_BUILTIN_MIN_VER_FLAG
+      ${DARWIN_ios_MIN_VER_FLAG}=${DARWIN_ios_BUILTIN_MIN_VER})
+  endif()
+  if(COMPILER_RT_ENABLE_WATCHOS)
+    list(APPEND DARWIN_EMBEDDED_PLATFORMS watchos)
+    set(DARWIN_watchos_MIN_VER_FLAG -mwatchos-version-min)
+    set(DARWIN_watchos_BUILTIN_MIN_VER 2.0)
+    set(DARWIN_watchos_BUILTIN_MIN_VER_FLAG
+      ${DARWIN_watchos_MIN_VER_FLAG}=${DARWIN_watchos_BUILTIN_MIN_VER})
+  endif()
+  if(COMPILER_RT_ENABLE_TVOS)
+    list(APPEND DARWIN_EMBEDDED_PLATFORMS tvos)
+    set(DARWIN_tvos_MIN_VER_FLAG -mtvos-version-min)
+    set(DARWIN_tvos_BUILTIN_MIN_VER 9.0)
+    set(DARWIN_tvos_BUILTIN_MIN_VER_FLAG
+      ${DARWIN_tvos_MIN_VER_FLAG}=${DARWIN_tvos_BUILTIN_MIN_VER})
+  endif()
+
+  set(BUILTIN_SUPPORTED_OS osx)
+
+  # We're setting the flag manually for each target OS
+  set(CMAKE_OSX_DEPLOYMENT_TARGET "")
+
+  if(NOT DARWIN_osx_ARCHS)
+    set(DARWIN_osx_ARCHS i386 x86_64 x86_64h)
+  endif()
+
+  set(DARWIN_sim_ARCHS i386 x86_64)
+  set(DARWIN_device_ARCHS armv7 armv7s armv7k arm64)
+
+  message(STATUS "OSX supported arches: ${DARWIN_osx_ARCHS}")
+  foreach(arch ${DARWIN_osx_ARCHS})
+    list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
+    set(CAN_TARGET_${arch} 1)
+  endforeach()
+
+  # Need to build a 10.4 compatible libclang_rt
+  set(DARWIN_10.4_SYSROOT ${DARWIN_osx_SYSROOT})
+  set(DARWIN_10.4_BUILTIN_MIN_VER 10.4)
+  set(DARWIN_10.4_BUILTIN_MIN_VER_FLAG
+      -mmacosx-version-min=${DARWIN_10.4_BUILTIN_MIN_VER})
+  set(DARWIN_10.4_SKIP_CC_KEXT On)
+  darwin_test_archs(10.4 DARWIN_10.4_ARCHS i386 x86_64)
+  message(STATUS "OSX 10.4 supported builtin arches: ${DARWIN_10.4_ARCHS}")
+  if(DARWIN_10.4_ARCHS)
+    # don't include the Haswell slice in the 10.4 compatibility library
+    list(REMOVE_ITEM DARWIN_10.4_ARCHS x86_64h)
+    list(APPEND BUILTIN_SUPPORTED_OS 10.4)
+  endif()
+
+  foreach(platform ${DARWIN_EMBEDDED_PLATFORMS})
+    if(DARWIN_${platform}sim_SYSROOT)
+      set(DARWIN_${platform}sim_BUILTIN_MIN_VER
+        ${DARWIN_${platform}_BUILTIN_MIN_VER})
+      set(DARWIN_${platform}sim_BUILTIN_MIN_VER_FLAG
+        ${DARWIN_${platform}_BUILTIN_MIN_VER_FLAG})
+
+      set(DARWIN_${platform}sim_SKIP_CC_KEXT On)
+
+      set(test_arches ${DARWIN_sim_ARCHS})
+      if(DARWIN_${platform}sim_ARCHS)
+        set(test_arches DARWIN_${platform}sim_ARCHS)
+      endif()
+
+      darwin_test_archs(${platform}sim
+        DARWIN_${platform}sim_ARCHS
+        ${test_arches})
+      message(STATUS "${platform} Simulator supported builtin arches: ${DARWIN_${platform}sim_ARCHS}")
+      if(DARWIN_${platform}sim_ARCHS)
+        list(APPEND BUILTIN_SUPPORTED_OS ${platform}sim)
+      endif()
+      foreach(arch ${DARWIN_${platform}sim_ARCHS})
+        list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
+        set(CAN_TARGET_${arch} 1)
+      endforeach()
+    endif()
+
+    if(DARWIN_${platform}_SYSROOT)
+      set(test_arches ${DARWIN_device_ARCHS})
+      if(DARWIN_${platform}_ARCHS)
+        set(test_arches DARWIN_${platform}_ARCHS)
+      endif()
+
+      darwin_test_archs(${platform}
+        DARWIN_${platform}_ARCHS
+        ${test_arches})
+      message(STATUS "${platform} supported builtin arches: ${DARWIN_${platform}_ARCHS}")
+      if(DARWIN_${platform}_ARCHS)
+        list(APPEND BUILTIN_SUPPORTED_OS ${platform})
+      endif()
+      foreach(arch ${DARWIN_${platform}_ARCHS})
+        list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
+        set(CAN_TARGET_${arch} 1)
+      endforeach()
+    endif()
+  endforeach()
+
+  list_intersect(BUILTIN_SUPPORTED_ARCH ALL_BUILTIN_SUPPORTED_ARCH COMPILER_RT_SUPPORTED_ARCH)
+
+else()
+  # If we're not building the builtins standalone, just rely on the  tests in
+  # config-ix.cmake to tell us what to build. Otherwise we need to do some leg
+  # work here...
+  if(COMPILER_RT_BUILTINS_STANDALONE_BUILD)
+    test_targets()
+  endif()
+  # Architectures supported by compiler-rt libraries.
+  filter_available_targets(BUILTIN_SUPPORTED_ARCH
+    ${ALL_BUILTIN_SUPPORTED_ARCH})
+endif()
+
+message(STATUS "Builtin supported architectures: ${BUILTIN_SUPPORTED_ARCH}")
diff --git a/cmake/caches/Apple.cmake b/cmake/caches/Apple.cmake
new file mode 100644
index 0000000..cdee3c0
--- /dev/null
+++ b/cmake/caches/Apple.cmake
@@ -0,0 +1,15 @@
+# This file sets up a CMakeCache for Apple-style builds of compiler-rt.
+# This configuration matches Apple uses when shipping Xcode releases.
+
+set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
+set(COMPILER_RT_HAS_SAFESTACK OFF CACHE BOOL "")
+set(COMPILER_RT_EXTERNALIZE_DEBUGINFO ON CACHE BOOL "")
+set(CMAKE_MACOSX_RPATH ON CACHE BOOL "")
+
+set(CMAKE_C_FLAGS_RELEASE "-O3" CACHE STRING "")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3" CACHE STRING "")
+set(CMAKE_ASM_FLAGS_RELEASE "-O3" CACHE STRING "")
+set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -gline-tables-only -DNDEBUG" CACHE STRING "")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -gline-tables-only -DNDEBUG" CACHE STRING "")
+set(CMAKE_ASM_FLAGS_RELWITHDEBINFO "-O3 -gline-tables-only -DNDEBUG" CACHE STRING "")
+set(CMAKE_BUILD_TYPE RELEASE CACHE STRING "")
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 58623c4..92c3452 100644
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -21,6 +21,7 @@
 check_cxx_compiler_flag(-fno-stack-protector COMPILER_RT_HAS_FNO_STACK_PROTECTOR_FLAG)
 check_cxx_compiler_flag(-fno-sanitize=safe-stack COMPILER_RT_HAS_FNO_SANITIZE_SAFE_STACK_FLAG)
 check_cxx_compiler_flag(-fvisibility=hidden  COMPILER_RT_HAS_FVISIBILITY_HIDDEN_FLAG)
+check_cxx_compiler_flag(-frtti               COMPILER_RT_HAS_FRTTI_FLAG)
 check_cxx_compiler_flag(-fno-rtti            COMPILER_RT_HAS_FNO_RTTI_FLAG)
 check_cxx_compiler_flag(-ffreestanding       COMPILER_RT_HAS_FFREESTANDING_FLAG)
 check_cxx_compiler_flag("-Werror -fno-function-sections" COMPILER_RT_HAS_FNO_FUNCTION_SECTIONS_FLAG)
@@ -28,7 +29,6 @@
 check_cxx_compiler_flag(-ftls-model=initial-exec COMPILER_RT_HAS_FTLS_MODEL_INITIAL_EXEC)
 check_cxx_compiler_flag(-fno-lto             COMPILER_RT_HAS_FNO_LTO_FLAG)
 check_cxx_compiler_flag("-Werror -msse3" COMPILER_RT_HAS_MSSE3_FLAG)
-check_cxx_compiler_flag(-std=c99             COMPILER_RT_HAS_STD_C99_FLAG)
 check_cxx_compiler_flag(--sysroot=.          COMPILER_RT_HAS_SYSROOT_FLAG)
 
 if(NOT WIN32 AND NOT CYGWIN)
@@ -55,11 +55,13 @@
 check_cxx_compiler_flag("-Werror -Wgnu"                COMPILER_RT_HAS_WGNU_FLAG)
 check_cxx_compiler_flag("-Werror -Wnon-virtual-dtor"   COMPILER_RT_HAS_WNON_VIRTUAL_DTOR_FLAG)
 check_cxx_compiler_flag("-Werror -Wvariadic-macros"    COMPILER_RT_HAS_WVARIADIC_MACROS_FLAG)
+check_cxx_compiler_flag("-Werror -Wunused-parameter"   COMPILER_RT_HAS_WUNUSED_PARAMETER_FLAG)
 
-check_cxx_compiler_flag(/W3 COMPILER_RT_HAS_W3_FLAG)
+check_cxx_compiler_flag(/W4 COMPILER_RT_HAS_W4_FLAG)
 check_cxx_compiler_flag(/WX COMPILER_RT_HAS_WX_FLAG)
 check_cxx_compiler_flag(/wd4146 COMPILER_RT_HAS_WD4146_FLAG)
 check_cxx_compiler_flag(/wd4291 COMPILER_RT_HAS_WD4291_FLAG)
+check_cxx_compiler_flag(/wd4221 COMPILER_RT_HAS_WD4221_FLAG)
 check_cxx_compiler_flag(/wd4391 COMPILER_RT_HAS_WD4391_FLAG)
 check_cxx_compiler_flag(/wd4722 COMPILER_RT_HAS_WD4722_FLAG)
 check_cxx_compiler_flag(/wd4800 COMPILER_RT_HAS_WD4800_FLAG)
@@ -90,50 +92,8 @@
 # platform. We use the results of these tests to build only the various target
 # runtime libraries supported by our current compilers cross-compiling
 # abilities.
-set(SIMPLE_SOURCE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple.cc)
-file(WRITE ${SIMPLE_SOURCE} "#include <stdlib.h>\n#include <limits>\nint main() {}\n")
-
-function(check_compile_definition def argstring out_var)
-  if("${def}" STREQUAL "")
-    set(${out_var} TRUE PARENT_SCOPE)
-    return()
-  endif()
-  cmake_push_check_state()
-  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${argstring}")
-  check_symbol_exists(${def} "" ${out_var})
-  cmake_pop_check_state()
-endfunction()
-
-# test_target_arch(<arch> <def> <target flags...>)
-# Checks if architecture is supported: runs host compiler with provided
-# flags to verify that:
-#   1) <def> is defined (if non-empty)
-#   2) simple file can be successfully built.
-# If successful, saves target flags for this architecture.
-macro(test_target_arch arch def)
-  set(TARGET_${arch}_CFLAGS ${ARGN})
-  set(argstring "")
-  foreach(arg ${ARGN})
-    set(argstring "${argstring} ${arg}")
-  endforeach()
-  check_compile_definition("${def}" "${argstring}" HAS_${arch}_DEF)
-  if(NOT HAS_${arch}_DEF)
-    set(CAN_TARGET_${arch} FALSE)
-  else()
-    set(argstring "${CMAKE_EXE_LINKER_FLAGS} ${argstring}")
-    try_compile(CAN_TARGET_${arch} ${CMAKE_BINARY_DIR} ${SIMPLE_SOURCE}
-                COMPILE_DEFINITIONS "${TARGET_${arch}_CFLAGS}"
-                OUTPUT_VARIABLE TARGET_${arch}_OUTPUT
-                CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS:STRING=${argstring}")
-  endif()
-  if(${CAN_TARGET_${arch}})
-    list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
-  elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "${arch}" AND
-         COMPILER_RT_HAS_EXPLICIT_DEFAULT_TARGET_TRIPLE)
-    # Bail out if we cannot target the architecture we plan to test.
-    message(FATAL_ERROR "Cannot compile for ${arch}:\n${TARGET_${arch}_OUTPUT}")
-  endif()
-endmacro()
+set(SIMPLE_SOURCE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple.c)
+file(WRITE ${SIMPLE_SOURCE} "#include <stdlib.h>\n#include <stdio.h>\nint main() { printf(\"hello, world\"); }\n")
 
 # Add $arch as supported with no additional flags.
 macro(add_default_target_arch arch)
@@ -142,37 +102,6 @@
   list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
 endmacro()
 
-macro(detect_target_arch)
-  check_symbol_exists(__arm__ "" __ARM)
-  check_symbol_exists(__aarch64__ "" __AARCH64)
-  check_symbol_exists(__x86_64__ "" __X86_64)
-  check_symbol_exists(__i686__ "" __I686)
-  check_symbol_exists(__i386__ "" __I386)
-  check_symbol_exists(__mips__ "" __MIPS)
-  check_symbol_exists(__mips64__ "" __MIPS64)
-  check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
-  check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
-  if(__ARM)
-    add_default_target_arch(arm)
-  elseif(__AARCH64)
-    add_default_target_arch(aarch64)
-  elseif(__X86_64)
-    add_default_target_arch(x86_64)
-  elseif(__I686)
-    add_default_target_arch(i686)
-  elseif(__I386)
-    add_default_target_arch(i386)
-  elseif(__MIPS64) # must be checked before __MIPS
-    add_default_target_arch(mips64)
-  elseif(__MIPS)
-    add_default_target_arch(mips)
-  elseif(__WEBASSEMBLY32)
-    add_default_target_arch(wasm32)
-  elseif(__WEBASSEMBLY64)
-    add_default_target_arch(wasm64)
-  endif()
-endmacro()
-
 # Detect whether the current target platform is 32-bit or 64-bit, and setup
 # the correct commandline flags needed to attempt to target 32-bit and 64-bit.
 if (NOT CMAKE_SIZEOF_VOID_P EQUAL 4 AND
@@ -180,71 +109,7 @@
   message(FATAL_ERROR "Please use architecture with 4 or 8 byte pointers.")
 endif()
 
-# Generate the COMPILER_RT_SUPPORTED_ARCH list.
-if(ANDROID)
-  # Examine compiler output to determine target architecture.
-  detect_target_arch()
-  set(COMPILER_RT_OS_SUFFIX "-android")
-elseif(NOT APPLE) # Supported archs for Apple platforms are generated later
-  if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "i[2-6]86|x86|amd64")
-    if(NOT MSVC)
-      test_target_arch(x86_64 "" "-m64")
-      # FIXME: We build runtimes for both i686 and i386, as "clang -m32" may
-      # target different variant than "$CMAKE_C_COMPILER -m32". This part should
-      # be gone after we resolve PR14109.
-      test_target_arch(i686 __i686__ "-m32")
-      test_target_arch(i386 __i386__ "-m32")
-    else()
-      if (CMAKE_SIZEOF_VOID_P EQUAL 4)
-        test_target_arch(i386 "" "")
-      else()
-        test_target_arch(x86_64 "" "")
-      endif()
-    endif()
-  elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc")
-    TEST_BIG_ENDIAN(HOST_IS_BIG_ENDIAN)
-    if(HOST_IS_BIG_ENDIAN)
-      test_target_arch(powerpc64 "" "-m64")
-    else()
-      test_target_arch(powerpc64le "" "-m64")
-    endif()
-  elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "mipsel|mips64el")
-    # Gcc doesn't accept -m32/-m64 so we do the next best thing and use
-    # -mips32r2/-mips64r2. We don't use -mips1/-mips3 because we want to match
-    # clang's default CPU's. In the 64-bit case, we must also specify the ABI
-    # since the default ABI differs between gcc and clang.
-    # FIXME: Ideally, we would build the N32 library too.
-    test_target_arch(mipsel "" "-mips32r2" "--target=mipsel-linux-gnu")
-    test_target_arch(mips64el "" "-mips64r2" "--target=mips64el-linux-gnu" "-mabi=n64")
-  elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "mips")
-    test_target_arch(mips "" "-mips32r2" "--target=mips-linux-gnu")
-    test_target_arch(mips64 "" "-mips64r2" "--target=mips64-linux-gnu" "-mabi=n64")
-  elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm")
-    test_target_arch(arm "" "-march=armv7-a" "-mfloat-abi=soft")
-    test_target_arch(armhf "" "-march=armv7-a" "-mfloat-abi=hard")
-  elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "aarch32")
-    test_target_arch(aarch32 "" "-march=armv8-a")
-  elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "aarch64")
-    test_target_arch(aarch64 "" "-march=armv8-a")
-  elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "wasm32")
-    test_target_arch(wasm32 "" "--target=wasm32-unknown-unknown")
-  elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "wasm64")
-    test_target_arch(wasm64 "" "--target=wasm64-unknown-unknown")
-  endif()
-  set(COMPILER_RT_OS_SUFFIX "")
-endif()
-
-# Takes ${ARGN} and puts only supported architectures in @out_var list.
-function(filter_available_targets out_var)
-  set(archs ${${out_var}})
-  foreach(arch ${ARGN})
-    list(FIND COMPILER_RT_SUPPORTED_ARCH ${arch} ARCH_INDEX)
-    if(NOT (ARCH_INDEX EQUAL -1) AND CAN_TARGET_${arch})
-      list(APPEND archs ${arch})
-    endif()
-  endforeach()
-  set(${out_var} ${archs} PARENT_SCOPE)
-endfunction()
+test_targets()
 
 # Returns a list of architecture specific target cflags in @out_var list.
 function(get_target_flags_for_arch arch out_var)
@@ -270,50 +135,37 @@
 set(MIPS32 mips mipsel)
 set(MIPS64 mips64 mips64el)
 set(PPC64 powerpc64 powerpc64le)
+set(S390X s390x)
 set(WASM32 wasm32)
 set(WASM64 wasm64)
 
 if(APPLE)
   set(ARM64 arm64)
-  set(ARM32 armv7 armv7s)
+  set(ARM32 armv7 armv7s armv7k)
   set(X86_64 x86_64 x86_64h)
 endif()
 
-set(ALL_BUILTIN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
-    ${MIPS32} ${MIPS64} ${WASM32} ${WASM64})
 set(ALL_SANITIZER_COMMON_SUPPORTED_ARCH ${X86} ${X86_64} ${PPC64}
-    ${ARM32} ${ARM64} ${MIPS32} ${MIPS64})
+    ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} ${S390X})
 set(ALL_ASAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
     ${MIPS32} ${MIPS64} ${PPC64})
 set(ALL_DFSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64})
 set(ALL_LSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64})
-set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64})
+set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64})
 set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC64}
     ${MIPS32} ${MIPS64})
 set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64})
 set(ALL_UBSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
-    ${MIPS32} ${MIPS64} ${PPC64})
-set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64})
-set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64})
+    ${MIPS32} ${MIPS64} ${PPC64} ${S390X})
+set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
+set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
+set(ALL_ESAN_SUPPORTED_ARCH ${X86_64})
+set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64})
+set(ALL_XRAY_SUPPORTED_ARCH ${X86_64})
 
 if(APPLE)
   include(CompilerRTDarwinUtils)
 
-  # On Darwin if /usr/include doesn't exist, the user probably has Xcode but not
-  # the command line tools. If this is the case, we need to find the OS X
-  # sysroot to pass to clang.
-  if(NOT EXISTS /usr/include)
-    execute_process(COMMAND xcodebuild -version -sdk macosx Path
-       OUTPUT_VARIABLE OSX_SYSROOT
-       ERROR_QUIET
-       OUTPUT_STRIP_TRAILING_WHITESPACE)
-    set(OSX_SYSROOT_FLAG "-isysroot${OSX_SYSROOT}")
-  endif()
-
-  option(COMPILER_RT_ENABLE_IOS "Enable building for iOS - Experimental" Off)
-  option(COMPILER_RT_ENABLE_WATCHOS "Enable building for watchOS - Experimental" Off)
-  option(COMPILER_RT_ENABLE_TVOS "Enable building for tvOS - Experimental" Off)
-
   find_darwin_sdk_dir(DARWIN_osx_SYSROOT macosx)
   find_darwin_sdk_dir(DARWIN_iossim_SYSROOT iphonesimulator)
   find_darwin_sdk_dir(DARWIN_ios_SYSROOT iphoneos)
@@ -327,33 +179,23 @@
     set(DARWIN_ios_MIN_VER_FLAG -miphoneos-version-min)
     set(DARWIN_ios_SANITIZER_MIN_VER_FLAG
       ${DARWIN_ios_MIN_VER_FLAG}=7.0)
-    set(DARWIN_ios_BUILTIN_MIN_VER 6.0)
-    set(DARWIN_ios_BUILTIN_MIN_VER_FLAG
-      ${DARWIN_ios_MIN_VER_FLAG}=${DARWIN_ios_BUILTIN_MIN_VER})
   endif()
   if(COMPILER_RT_ENABLE_WATCHOS)
     list(APPEND DARWIN_EMBEDDED_PLATFORMS watchos)
     set(DARWIN_watchos_MIN_VER_FLAG -mwatchos-version-min)
     set(DARWIN_watchos_SANITIZER_MIN_VER_FLAG
       ${DARWIN_watchos_MIN_VER_FLAG}=2.0)
-    set(DARWIN_watchos_BUILTIN_MIN_VER 2.0)
-    set(DARWIN_watchos_BUILTIN_MIN_VER_FLAG
-      ${DARWIN_watchos_MIN_VER_FLAG}=${DARWIN_watchos_BUILTIN_MIN_VER})
   endif()
   if(COMPILER_RT_ENABLE_TVOS)
     list(APPEND DARWIN_EMBEDDED_PLATFORMS tvos)
     set(DARWIN_tvos_MIN_VER_FLAG -mtvos-version-min)
     set(DARWIN_tvos_SANITIZER_MIN_VER_FLAG
       ${DARWIN_tvos_MIN_VER_FLAG}=9.0)
-    set(DARWIN_tvos_BUILTIN_MIN_VER 9.0)
-    set(DARWIN_tvos_BUILTIN_MIN_VER_FLAG
-      ${DARWIN_tvos_MIN_VER_FLAG}=${DARWIN_tvos_BUILTIN_MIN_VER})
   endif()
 
   # Note: In order to target x86_64h on OS X the minimum deployment target must
   # be 10.8 or higher.
   set(SANITIZER_COMMON_SUPPORTED_OS osx)
-  set(BUILTIN_SUPPORTED_OS osx)
   set(PROFILE_SUPPORTED_OS osx)
   set(TSAN_SUPPORTED_OS osx)
   if(NOT SANITIZER_MIN_OSX_VERSION)
@@ -391,9 +233,6 @@
   set(DARWIN_osx_LINKFLAGS
     ${DARWIN_COMMON_LINKFLAGS}
     -mmacosx-version-min=${SANITIZER_MIN_OSX_VERSION})
-  set(DARWIN_osx_BUILTIN_MIN_VER 10.5)
-  set(DARWIN_osx_BUILTIN_MIN_VER_FLAG
-      -mmacosx-version-min=${DARWIN_osx_BUILTIN_MIN_VER})
 
   if(DARWIN_osx_SYSROOT)
     list(APPEND DARWIN_osx_CFLAGS -isysroot ${DARWIN_osx_SYSROOT})
@@ -414,22 +253,6 @@
       set(CAN_TARGET_${arch} 1)
     endforeach()
 
-    # Need to build a 10.4 compatible libclang_rt
-    set(DARWIN_10.4_SYSROOT ${DARWIN_osx_SYSROOT})
-    set(DARWIN_10.4_BUILTIN_MIN_VER 10.4)
-    set(DARWIN_10.4_BUILTIN_MIN_VER_FLAG
-        -mmacosx-version-min=${DARWIN_10.4_BUILTIN_MIN_VER})
-    set(DARWIN_10.4_SKIP_CC_KEXT On)
-    darwin_test_archs(10.4
-      DARWIN_10.4_ARCHS
-      ${toolchain_arches})
-    message(STATUS "OSX 10.4 supported arches: ${DARWIN_10.4_ARCHS}")
-    if(DARWIN_10.4_ARCHS)
-      # don't include the Haswell slice in the 10.4 compatibility library
-      list(REMOVE_ITEM DARWIN_10.4_ARCHS x86_64h)
-      list(APPEND BUILTIN_SUPPORTED_OS 10.4)
-    endif()
-
     foreach(platform ${DARWIN_EMBEDDED_PLATFORMS})
       if(DARWIN_${platform}sim_SYSROOT)
         set(DARWIN_${platform}sim_CFLAGS
@@ -440,21 +263,18 @@
           ${DARWIN_COMMON_LINKFLAGS}
           ${DARWIN_${platform}_SANITIZER_MIN_VER_FLAG}
           -isysroot ${DARWIN_${platform}sim_SYSROOT})
-        set(DARWIN_${platform}sim_BUILTIN_MIN_VER
-          ${DARWIN_${platform}_BUILTIN_MIN_VER})
-        set(DARWIN_${platform}sim_BUILTIN_MIN_VER_FLAG
-          ${DARWIN_${platform}_BUILTIN_MIN_VER_FLAG})
 
         set(DARWIN_${platform}sim_SKIP_CC_KEXT On)
         darwin_test_archs(${platform}sim
           DARWIN_${platform}sim_ARCHS
           ${toolchain_arches})
         message(STATUS "${platform} Simulator supported arches: ${DARWIN_${platform}sim_ARCHS}")
-        if(DARWIN_iossim_ARCHS)
+        if(DARWIN_${platform}_ARCHS)
           list(APPEND SANITIZER_COMMON_SUPPORTED_OS ${platform}sim)
-          list(APPEND BUILTIN_SUPPORTED_OS ${platform}sim)
           list(APPEND PROFILE_SUPPORTED_OS ${platform}sim)
-          list(APPEND TSAN_SUPPORTED_OS ${platform}sim)
+          if(DARWIN_${platform}_SYSROOT_INTERNAL)
+            list(APPEND TSAN_SUPPORTED_OS ${platform}sim)
+          endif()
         endif()
         foreach(arch ${DARWIN_${platform}sim_ARCHS})
           list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
@@ -478,7 +298,6 @@
         message(STATUS "${platform} supported arches: ${DARWIN_${platform}_ARCHS}")
         if(DARWIN_${platform}_ARCHS)
           list(APPEND SANITIZER_COMMON_SUPPORTED_OS ${platform})
-          list(APPEND BUILTIN_SUPPORTED_OS ${platform})
           list(APPEND PROFILE_SUPPORTED_OS ${platform})
         endif()
         foreach(arch ${DARWIN_${platform}_ARCHS})
@@ -489,48 +308,54 @@
     endforeach()
   endif()
 
-  # for list_union
+  # for list_intersect
   include(CompilerRTUtils)
 
-  list_union(BUILTIN_SUPPORTED_ARCH ALL_BUILTIN_SUPPORTED_ARCH toolchain_arches)
 
-  list_union(SANITIZER_COMMON_SUPPORTED_ARCH
+  list_intersect(SANITIZER_COMMON_SUPPORTED_ARCH
     ALL_SANITIZER_COMMON_SUPPORTED_ARCH
     COMPILER_RT_SUPPORTED_ARCH
     )
   set(LSAN_COMMON_SUPPORTED_ARCH ${SANITIZER_COMMON_SUPPORTED_ARCH})
   set(UBSAN_COMMON_SUPPORTED_ARCH ${SANITIZER_COMMON_SUPPORTED_ARCH})
-  list_union(ASAN_SUPPORTED_ARCH
+  list_intersect(ASAN_SUPPORTED_ARCH
     ALL_ASAN_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
-  list_union(DFSAN_SUPPORTED_ARCH
+  list_intersect(DFSAN_SUPPORTED_ARCH
     ALL_DFSAN_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
-  list_union(LSAN_SUPPORTED_ARCH
+  list_intersect(LSAN_SUPPORTED_ARCH
     ALL_LSAN_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
-  list_union(MSAN_SUPPORTED_ARCH
+  list_intersect(MSAN_SUPPORTED_ARCH
     ALL_MSAN_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
-  list_union(PROFILE_SUPPORTED_ARCH
+  list_intersect(PROFILE_SUPPORTED_ARCH
     ALL_PROFILE_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
-  list_union(TSAN_SUPPORTED_ARCH
+  list_intersect(TSAN_SUPPORTED_ARCH
     ALL_TSAN_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
-  list_union(UBSAN_SUPPORTED_ARCH
+  list_intersect(UBSAN_SUPPORTED_ARCH
     ALL_UBSAN_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
-  list_union(SAFESTACK_SUPPORTED_ARCH
+  list_intersect(SAFESTACK_SUPPORTED_ARCH
     ALL_SAFESTACK_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
-  list_union(CFI_SUPPORTED_ARCH
+  list_intersect(CFI_SUPPORTED_ARCH
     ALL_CFI_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(ESAN_SUPPORTED_ARCH
+    ALL_ESAN_SUPPORTED_ARCH
+    SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(SCUDO_SUPPORTED_ARCH
+    ALL_SCUDO_SUPPORTED_ARCH
+    SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(XRAY_SUPPORTED_ARCH
+    ALL_XRAY_SUPPORTED_ARCH
+		SANITIZER_COMMON_SUPPORTED_ARCH)
 else()
   # Architectures supported by compiler-rt libraries.
-  filter_available_targets(BUILTIN_SUPPORTED_ARCH
-    ${ALL_BUILTIN_SUPPORTED_ARCH})
   filter_available_targets(SANITIZER_COMMON_SUPPORTED_ARCH
     ${ALL_SANITIZER_COMMON_SUPPORTED_ARCH})
   # LSan and UBSan common files should be available on all architectures
@@ -549,6 +374,22 @@
   filter_available_targets(SAFESTACK_SUPPORTED_ARCH
     ${ALL_SAFESTACK_SUPPORTED_ARCH})
   filter_available_targets(CFI_SUPPORTED_ARCH ${ALL_CFI_SUPPORTED_ARCH})
+  filter_available_targets(ESAN_SUPPORTED_ARCH ${ALL_ESAN_SUPPORTED_ARCH})
+  filter_available_targets(SCUDO_SUPPORTED_ARCH
+    ${ALL_SCUDO_SUPPORTED_ARCH})
+  filter_available_targets(XRAY_SUPPORTED_ARCH ${ALL_XRAY_SUPPORTED_ARCH})
+endif()
+
+if (MSVC)
+  # See if the DIA SDK is available and usable.
+  set(MSVC_DIA_SDK_DIR "$ENV{VSINSTALLDIR}DIA SDK")
+  if (IS_DIRECTORY ${MSVC_DIA_SDK_DIR})
+    set(CAN_SYMBOLIZE 1)
+  else()
+    set(CAN_SYMBOLIZE 0)
+  endif()
+else()
+  set(CAN_SYMBOLIZE 1)
 endif()
 
 message(STATUS "Compiler-RT supported architectures: ${COMPILER_RT_SUPPORTED_ARCH}")
@@ -567,15 +408,13 @@
   set(COMPILER_RT_HAS_SANITIZER_COMMON FALSE)
 endif()
 
-if (COMPILER_RT_HAS_SANITIZER_COMMON AND
-    (NOT OS_NAME MATCHES "Windows" OR CMAKE_SIZEOF_VOID_P EQUAL 4))
+if (COMPILER_RT_HAS_SANITIZER_COMMON)
   set(COMPILER_RT_HAS_INTERCEPTION TRUE)
 else()
   set(COMPILER_RT_HAS_INTERCEPTION FALSE)
 endif()
 
-if (COMPILER_RT_HAS_SANITIZER_COMMON AND ASAN_SUPPORTED_ARCH AND
-    (NOT OS_NAME MATCHES "Windows" OR CMAKE_SIZEOF_VOID_P EQUAL 4))
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND ASAN_SUPPORTED_ARCH)
   set(COMPILER_RT_HAS_ASAN TRUE)
 else()
   set(COMPILER_RT_HAS_ASAN FALSE)
@@ -644,3 +483,24 @@
 else()
   set(COMPILER_RT_HAS_CFI FALSE)
 endif()
+
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND ESAN_SUPPORTED_ARCH AND
+    OS_NAME MATCHES "Linux")
+  set(COMPILER_RT_HAS_ESAN TRUE)
+else()
+  set(COMPILER_RT_HAS_ESAN FALSE)
+endif()
+
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND SCUDO_SUPPORTED_ARCH AND
+    OS_NAME MATCHES "Linux")
+  set(COMPILER_RT_HAS_SCUDO TRUE)
+else()
+  set(COMPILER_RT_HAS_SCUDO FALSE)
+endif()
+
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND XRAY_SUPPORTED_ARCH AND
+    OS_NAME MATCHES "Linux")
+  set(COMPILER_RT_HAS_XRAY TRUE)
+else()
+  set(COMPILER_RT_HAS_XRAY FALSE)
+endif()
diff --git a/docs/TestingGuide.rst b/docs/TestingGuide.rst
new file mode 100644
index 0000000..4edda67
--- /dev/null
+++ b/docs/TestingGuide.rst
@@ -0,0 +1,66 @@
+========================================
+Compiler-rt Testing Infrastructure Guide
+========================================
+
+.. contents::
+   :local:
+
+Overview
+========
+
+This document is the reference manual for the compiler-rt modifications to the
+testing infrastructure. Documentation for the infrastructure itself can be found at
+:ref:`llvm_testing_guide`.
+
+LLVM testing infrastructure organization
+========================================
+
+The compiler-rt testing infrastructure contains regression tests which are run
+as part of the usual ``make check-all`` and are expected to always pass -- they
+should be run before every commit.
+
+Quick start
+===========
+
+The regressions tests are in the "compiler-rt" module and are normally checked
+out in the directory ``llvm/projects/compiler-rt/test``. Use ``make check-all``
+to run the regression tests after building compiler-rt.
+
+REQUIRES, XFAIL, etc.
+---------------------
+
+Sometimes it is necessary to restrict a test to a specific target or mark it as
+an "expected fail" or XFAIL. This is normally achieved using ``REQUIRES:`` or
+``XFAIL:`` with a substring of LLVM's default target triple. Unfortunately, the
+behaviour of this is somewhat quirky in compiler-rt. There are two main
+pitfalls to avoid.
+
+The first pitfall is that these directives perform a substring match on the
+triple and as such ``XFAIL: mips`` affects more triples than expected. For
+example, ``mips-linux-gnu``, ``mipsel-linux-gnu``, ``mips64-linux-gnu``, and
+``mips64el-linux-gnu`` will all match a ``XFAIL: mips`` directive. Including a
+trailing ``-`` such as in ``XFAIL: mips-`` can help to mitigate this quirk but
+even that has issues as described below.
+
+The second pitfall is that the default target triple is often inappropriate for
+compiler-rt tests since compiler-rt tests may be compiled for multiple targets.
+For example, a typical build on an ``x86_64-linux-gnu`` host will often run the
+tests for both x86_64 and i386. In this situation ``XFAIL: x86_64`` will mark
+both the x86_64 and i386 tests as an expected failure while ``XFAIL: i386``
+will have no effect at all.
+
+To remedy both pitfalls, compiler-rt tests provide a feature string which can
+be used to specify a single target. This string is of the form
+``target-is-${arch}`` where ``${arch}}`` is one of the values from the
+following lines of the CMake output::
+
+  -- Compiler-RT supported architectures: x86_64;i386
+  -- Builtin supported architectures: i386;x86_64
+
+So for example ``XFAIL: target-is-x86_64`` will mark a test as expected to fail
+on x86_64 without also affecting the i386 test and ``XFAIL: target-is-i386``
+will mark a test as expected to fail on i386 even if the default target triple
+is ``x86_64-linux-gnu``. Directives that use these ``target-is-${arch}`` string
+require exact matches so ``XFAIL: target-is-mips``,
+``XFAIL: target-is-mipsel``, ``XFAIL: target-is-mips64``, and
+``XFAIL: target-is-mips64el`` all refer to different MIPS targets.
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
index ad1437e..1f8b481 100644
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@@ -4,16 +4,24 @@
   sanitizer/common_interface_defs.h
   sanitizer/coverage_interface.h
   sanitizer/dfsan_interface.h
+  sanitizer/esan_interface.h
   sanitizer/linux_syscall_hooks.h
   sanitizer/lsan_interface.h
   sanitizer/msan_interface.h
   sanitizer/tsan_interface_atomic.h)
 
+set(XRAY_HEADERS
+  xray/xray_interface.h)
+
+set(COMPILER_RT_HEADERS
+  ${SANITIZER_HEADERS}
+	${XRAY_HEADERS})
+
 set(output_dir ${COMPILER_RT_OUTPUT_DIR}/include)
 
 # Copy compiler-rt headers to the build tree.
 set(out_files)
-foreach( f ${SANITIZER_HEADERS} )
+foreach( f ${COMPILER_RT_HEADERS} )
   set( src ${CMAKE_CURRENT_SOURCE_DIR}/${f} )
   set( dst ${output_dir}/${f} )
   add_custom_command(OUTPUT ${dst}
@@ -25,8 +33,13 @@
 
 add_custom_target(compiler-rt-headers ALL DEPENDS ${out_files})
 add_dependencies(compiler-rt compiler-rt-headers)
+set_target_properties(compiler-rt-headers PROPERTIES FOLDER "Compiler-RT Misc")
 
 # Install sanitizer headers.
 install(FILES ${SANITIZER_HEADERS}
   PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
   DESTINATION ${COMPILER_RT_INSTALL_PATH}/include/sanitizer)
+# Install xray headers.
+install(FILES ${XRAY_HEADERS}
+  PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
+  DESTINATION ${COMPILER_RT_INSTALL_PATH}/include/xray)
diff --git a/include/sanitizer/allocator_interface.h b/include/sanitizer/allocator_interface.h
index ab251f8..5220631 100644
--- a/include/sanitizer/allocator_interface.h
+++ b/include/sanitizer/allocator_interface.h
@@ -59,6 +59,23 @@
        deallocation of "ptr". */
   void __sanitizer_malloc_hook(const volatile void *ptr, size_t size);
   void __sanitizer_free_hook(const volatile void *ptr);
+
+  /* Installs a pair of hooks for malloc/free.
+     Several (currently, 5) hook pairs may be installed, they are executed
+     in the order they were installed and after calling
+     __sanitizer_malloc_hook/__sanitizer_free_hook.
+     Unlike __sanitizer_malloc_hook/__sanitizer_free_hook these hooks can be
+     chained and do not rely on weak symbols working on the platform, but
+     require __sanitizer_install_malloc_and_free_hooks to be called at startup
+     and thus will not be called on malloc/free very early in the process.
+     Returns the number of hooks currently installed or 0 on failure.
+     Not thread-safe, should be called in the main thread before starting
+     other threads.
+  */
+  int __sanitizer_install_malloc_and_free_hooks(
+      void (*malloc_hook)(const volatile void *, size_t),
+      void (*free_hook)(const volatile void *));
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/include/sanitizer/common_interface_defs.h b/include/sanitizer/common_interface_defs.h
index b2a4bb7..1c90a60 100644
--- a/include/sanitizer/common_interface_defs.h
+++ b/include/sanitizer/common_interface_defs.h
@@ -41,6 +41,9 @@
 
   // Tell the tools to write their reports to "path.<pid>" instead of stderr.
   void __sanitizer_set_report_path(const char *path);
+  // Tell the tools to write their reports to the provided file descriptor
+  // (casted to void *).
+  void __sanitizer_set_report_fd(void *fd);
 
   // Notify the tools that the sandbox is going to be turned on. The reserved
   // parameter will be used in the future to hold a structure with functions
@@ -128,8 +131,45 @@
                                     const void *s2, size_t n, int result);
   void __sanitizer_weak_hook_strncmp(void *called_pc, const char *s1,
                                     const char *s2, size_t n, int result);
+  void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1,
+                                         const char *s2, size_t n, int result);
   void __sanitizer_weak_hook_strcmp(void *called_pc, const char *s1,
                                     const char *s2, int result);
+  void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1,
+                                        const char *s2, int result);
+  void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1,
+                                    const char *s2, char *result);
+  void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1,
+                                        const char *s2, char *result);
+  void __sanitizer_weak_hook_memmem(void *called_pc,
+                                    const void *s1, size_t len1,
+                                    const void *s2, size_t len2, void *result);
+
+  // Prints stack traces for all live heap allocations ordered by total
+  // allocation size until `top_percent` of total live heap is shown.
+  // `top_percent` should be between 1 and 100.
+  // Experimental feature currently available only with asan on Linux/x86_64.
+  void __sanitizer_print_memory_profile(size_t top_percent);
+
+  // Fiber annotation interface.
+  // Before switching to a different stack, one must call
+  // __sanitizer_start_switch_fiber with a pointer to the bottom of the
+  // destination stack and its size. When code starts running on the new stack,
+  // it must call __sanitizer_finish_switch_fiber to finalize the switch.
+  // The start_switch function takes a void** to store the current fake stack if
+  // there is one (it is needed when detect_stack_use_after_return is enabled).
+  // When restoring a stack, this pointer must be given to the finish_switch
+  // function. In most cases, this void* can be stored on the stack just before
+  // switching.  When leaving a fiber definitely, null must be passed as first
+  // argument to the start_switch function so that the fake stack is destroyed.
+  // If you do not want support for stack use-after-return detection, you can
+  // always pass null to these two functions.
+  // Note that the fake stack mechanism is disabled during fiber switch, so if a
+  // signal callback runs during the switch, it will not benefit from the stack
+  // use-after-return detection.
+  void __sanitizer_start_switch_fiber(void **fake_stack_save,
+                                      const void *bottom, size_t size);
+  void __sanitizer_finish_switch_fiber(void *fake_stack_save);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/include/sanitizer/coverage_interface.h b/include/sanitizer/coverage_interface.h
index 2dcc09f..72ac843 100644
--- a/include/sanitizer/coverage_interface.h
+++ b/include/sanitizer/coverage_interface.h
@@ -41,13 +41,6 @@
   // Some of the entries in *data will be zero.
   uintptr_t __sanitizer_get_coverage_guards(uintptr_t **data);
 
-  // Set *data to the growing buffer with covered PCs and return the size
-  // of the buffer. The entries are never zero.
-  // When only unique pcs are collected, the size is equal to
-  // __sanitizer_get_total_unique_coverage.
-  // WARNING: EXPERIMENTAL API.
-  uintptr_t __sanitizer_get_coverage_pc_buffer(uintptr_t **data);
-
   // The coverage instrumentation may optionally provide imprecise counters.
   // Rather than exposing the counter values to the user we instead map
   // the counters to a bitset.
@@ -65,6 +58,15 @@
   // __sanitizer_get_number_of_counters bytes long and 8-aligned.
   uintptr_t
   __sanitizer_update_counter_bitset_and_clear_counters(uint8_t *bitset);
+
+  // EXPERIMENTAL API
+  // Set allocated buffer to record new coverage PCs as they are executed.
+  // Buffer length is specified in uptrs.
+  void __sanitizer_set_coverage_pc_buffer(uintptr_t *buffer, uintptr_t length);
+  // Number of pcs recorded in the buffer.
+  // Reset by __sanitizer_reset_coverage();
+  uintptr_t __sanitizer_get_coverage_pc_buffer_pos();
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/include/sanitizer/esan_interface.h b/include/sanitizer/esan_interface.h
new file mode 100644
index 0000000..4aff8d4
--- /dev/null
+++ b/include/sanitizer/esan_interface.h
@@ -0,0 +1,50 @@
+//===-- sanitizer/esan_interface.h ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Public interface header.
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_ESAN_INTERFACE_H
+#define SANITIZER_ESAN_INTERFACE_H
+
+#include <sanitizer/common_interface_defs.h>
+
+// We declare our interface routines as weak to allow the user to avoid
+// ifdefs and instead use this pattern to allow building the same sources
+// with and without our runtime library:
+//     if (__esan_report)
+//       __esan_report();
+#ifdef _MSC_VER
+/* selectany is as close to weak as we'll get. */
+#define COMPILER_RT_WEAK __declspec(selectany)
+#elif __GNUC__
+#define COMPILER_RT_WEAK __attribute__((weak))
+#else
+#define COMPILER_RT_WEAK
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// This function can be called mid-run (or at the end of a run for
+// a server process that doesn't shut down normally) to request that
+// data for that point in the run be reported from the tool.
+void COMPILER_RT_WEAK __esan_report();
+
+// This function returns the number of samples that the esan tool has collected
+// to this point.  This is useful for testing.
+unsigned int COMPILER_RT_WEAK __esan_get_sample_count();
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // SANITIZER_ESAN_INTERFACE_H
diff --git a/include/sanitizer/linux_syscall_hooks.h b/include/sanitizer/linux_syscall_hooks.h
index 89867c1..09f261d 100644
--- a/include/sanitizer/linux_syscall_hooks.h
+++ b/include/sanitizer/linux_syscall_hooks.h
@@ -1835,6 +1835,17 @@
   __sanitizer_syscall_pre_impl_vfork()
 #define __sanitizer_syscall_post_vfork(res) \
   __sanitizer_syscall_post_impl_vfork(res)
+#define __sanitizer_syscall_pre_sigaction(signum, act, oldact)                 \
+  __sanitizer_syscall_pre_impl_sigaction((long)signum, (long)act, (long)oldact)
+#define __sanitizer_syscall_post_sigaction(res, signum, act, oldact)           \
+  __sanitizer_syscall_post_impl_sigaction(res, (long)signum, (long)act,        \
+                                          (long)oldact)
+#define __sanitizer_syscall_pre_rt_sigaction(signum, act, oldact, sz)          \
+  __sanitizer_syscall_pre_impl_rt_sigaction((long)signum, (long)act,           \
+                                            (long)oldact, (long)sz)
+#define __sanitizer_syscall_post_rt_sigaction(res, signum, act, oldact, sz)    \
+  __sanitizer_syscall_post_impl_rt_sigaction(res, (long)signum, (long)act,     \
+                                             (long)oldact, (long)sz)
 
 // And now a few syscalls we don't handle yet.
 #define __sanitizer_syscall_pre_afs_syscall(...)
@@ -1889,7 +1900,6 @@
 #define __sanitizer_syscall_pre_query_module(...)
 #define __sanitizer_syscall_pre_readahead(...)
 #define __sanitizer_syscall_pre_readdir(...)
-#define __sanitizer_syscall_pre_rt_sigaction(...)
 #define __sanitizer_syscall_pre_rt_sigreturn(...)
 #define __sanitizer_syscall_pre_rt_sigsuspend(...)
 #define __sanitizer_syscall_pre_security(...)
@@ -1903,7 +1913,6 @@
 #define __sanitizer_syscall_pre_setreuid32(...)
 #define __sanitizer_syscall_pre_set_thread_area(...)
 #define __sanitizer_syscall_pre_setuid32(...)
-#define __sanitizer_syscall_pre_sigaction(...)
 #define __sanitizer_syscall_pre_sigaltstack(...)
 #define __sanitizer_syscall_pre_sigreturn(...)
 #define __sanitizer_syscall_pre_sigsuspend(...)
@@ -1971,7 +1980,6 @@
 #define __sanitizer_syscall_post_query_module(res, ...)
 #define __sanitizer_syscall_post_readahead(res, ...)
 #define __sanitizer_syscall_post_readdir(res, ...)
-#define __sanitizer_syscall_post_rt_sigaction(res, ...)
 #define __sanitizer_syscall_post_rt_sigreturn(res, ...)
 #define __sanitizer_syscall_post_rt_sigsuspend(res, ...)
 #define __sanitizer_syscall_post_security(res, ...)
@@ -1985,7 +1993,6 @@
 #define __sanitizer_syscall_post_setreuid32(res, ...)
 #define __sanitizer_syscall_post_set_thread_area(res, ...)
 #define __sanitizer_syscall_post_setuid32(res, ...)
-#define __sanitizer_syscall_post_sigaction(res, ...)
 #define __sanitizer_syscall_post_sigaltstack(res, ...)
 #define __sanitizer_syscall_post_sigreturn(res, ...)
 #define __sanitizer_syscall_post_sigsuspend(res, ...)
@@ -3062,7 +3069,13 @@
 void __sanitizer_syscall_post_impl_fork(long res);
 void __sanitizer_syscall_pre_impl_vfork();
 void __sanitizer_syscall_post_impl_vfork(long res);
-
+void __sanitizer_syscall_pre_impl_sigaction(long signum, long act, long oldact);
+void __sanitizer_syscall_post_impl_sigaction(long res, long signum, long act,
+                                             long oldact);
+void __sanitizer_syscall_pre_impl_rt_sigaction(long signum, long act,
+                                               long oldact, long sz);
+void __sanitizer_syscall_post_impl_rt_sigaction(long res, long signum, long act,
+                                                long oldact, long sz);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/include/xray/xray_interface.h b/include/xray/xray_interface.h
new file mode 100644
index 0000000..22f137d
--- /dev/null
+++ b/include/xray/xray_interface.h
@@ -0,0 +1,58 @@
+//===-- xray_interface.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// APIs for controlling XRay functionality explicitly.
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_INTERFACE_H
+#define XRAY_XRAY_INTERFACE_H
+
+#include <cstdint>
+
+extern "C" {
+
+enum XRayEntryType { ENTRY = 0, EXIT = 1 };
+
+// Provide a function to invoke for when instrumentation points are hit. This is
+// a user-visible control surface that overrides the default implementation. The
+// function provided should take the following arguments:
+//
+//   - function id: an identifier that indicates the id of a function; this id
+//                  is generated by xray; the mapping between the function id
+//                  and the actual function pointer is available through
+//                  __xray_table.
+//   - entry type: identifies what kind of instrumentation point was encountered
+//                 (function entry, function exit, etc.). See the enum
+//                 XRayEntryType for more details.
+//
+// Returns 1 on success, 0 on error.
+extern int __xray_set_handler(void (*entry)(int32_t, XRayEntryType));
+
+// This removes whatever the currently provided handler is. Returns 1 on
+// success, 0 on error.
+extern int __xray_remove_handler();
+
+enum XRayPatchingStatus {
+  NOT_INITIALIZED = 0,
+  SUCCESS = 1,
+  ONGOING = 2,
+  FAILED = 3,
+};
+
+// This tells XRay to patch the instrumentation points. See XRayPatchingStatus
+// for possible result values.
+extern XRayPatchingStatus __xray_patch();
+
+// Reverses the effect of __xray_patch(). See XRayPatchingStatus for possible
+// result values.
+extern XRayPatchingStatus __xray_unpatch();
+}
+
+#endif
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 718b128..ce96fe4 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -4,6 +4,15 @@
 include(AddCompilerRT)
 include(SanitizerUtils)
 
+# Hoist the building of sanitizer_common on whether we're building either the
+# sanitizers or xray (or both).
+#
+#TODO: Refactor sanitizer_common into smaller pieces (e.g. flag parsing, utils).
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND
+    (COMPILER_RT_BUILD_SANITIZERS OR COMPILER_RT_BUILD_XRAY))
+  add_subdirectory(sanitizer_common)
+endif()
+
 if(COMPILER_RT_BUILD_BUILTINS)
   add_subdirectory(builtins)
 endif()
@@ -14,7 +23,6 @@
   endif()
 
   if(COMPILER_RT_HAS_SANITIZER_COMMON)
-    add_subdirectory(sanitizer_common)
     add_subdirectory(stats)
     add_subdirectory(lsan)
     add_subdirectory(ubsan)
@@ -48,4 +56,16 @@
   if(COMPILER_RT_HAS_CFI)
     add_subdirectory(cfi)
   endif()
+
+  if(COMPILER_RT_HAS_ESAN)
+    add_subdirectory(esan)
+  endif()
+
+  if(COMPILER_RT_HAS_SCUDO)
+    add_subdirectory(scudo)
+  endif()
+endif()
+
+if(COMPILER_RT_BUILD_XRAY AND COMPILER_RT_HAS_XRAY)
+  add_subdirectory(xray)
 endif()
diff --git a/lib/Makefile.mk b/lib/Makefile.mk
index 7eb6489..b1540bd 100644
--- a/lib/Makefile.mk
+++ b/lib/Makefile.mk
@@ -10,10 +10,4 @@
 SubDirs :=
 
 # Add submodules.
-SubDirs += asan
 SubDirs += builtins
-SubDirs += interception
-SubDirs += lsan
-SubDirs += profile
-SubDirs += sanitizer_common
-SubDirs += ubsan
diff --git a/lib/asan/CMakeLists.txt b/lib/asan/CMakeLists.txt
index 6716f48..7325932 100644
--- a/lib/asan/CMakeLists.txt
+++ b/lib/asan/CMakeLists.txt
@@ -4,6 +4,7 @@
   asan_allocator.cc
   asan_activation.cc
   asan_debugging.cc
+  asan_descriptions.cc
   asan_fake_stack.cc
   asan_flags.cc
   asan_globals.cc
@@ -13,6 +14,7 @@
   asan_malloc_linux.cc
   asan_malloc_mac.cc
   asan_malloc_win.cc
+  asan_memory_profile.cc
   asan_poisoning.cc
   asan_posix.cc
   asan_report.cc
@@ -32,7 +34,7 @@
 include_directories(..)
 
 set(ASAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
-append_no_rtti_flag(ASAN_CFLAGS)
+append_rtti_flag(OFF ASAN_CFLAGS)
 
 set(ASAN_COMMON_DEFINITIONS
   ASAN_HAS_EXCEPTIONS=1)
@@ -62,7 +64,7 @@
 set(ASAN_DYNAMIC_CFLAGS ${ASAN_CFLAGS})
 append_list_if(COMPILER_RT_HAS_FTLS_MODEL_INITIAL_EXEC
   -ftls-model=initial-exec ASAN_DYNAMIC_CFLAGS)
-append_list_if(MSVC /DEBUG ASAN_DYNAMIC_CFLAGS)
+append_list_if(MSVC /DEBUG ASAN_DYNAMIC_LINK_FLAGS)
 
 append_list_if(COMPILER_RT_HAS_LIBC c ASAN_DYNAMIC_LIBS)
 append_list_if(COMPILER_RT_HAS_LIBDL dl ASAN_DYNAMIC_LIBS)
@@ -74,7 +76,7 @@
 
 # Compile ASan sources into an object library.
 
-add_compiler_rt_object_libraries(RTAsan_dynamic 
+add_compiler_rt_object_libraries(RTAsan_dynamic
   OS ${SANITIZER_COMMON_SUPPORTED_OS}
   ARCHS ${ASAN_SUPPORTED_ARCH}
   SOURCES ${ASAN_SOURCES} ${ASAN_CXX_SOURCES}
@@ -82,15 +84,15 @@
   DEFS ${ASAN_DYNAMIC_DEFINITIONS})
 
 if(NOT APPLE)
-  add_compiler_rt_object_libraries(RTAsan 
+  add_compiler_rt_object_libraries(RTAsan
     ARCHS ${ASAN_SUPPORTED_ARCH}
     SOURCES ${ASAN_SOURCES} CFLAGS ${ASAN_CFLAGS}
     DEFS ${ASAN_COMMON_DEFINITIONS})
-  add_compiler_rt_object_libraries(RTAsan_cxx 
+  add_compiler_rt_object_libraries(RTAsan_cxx
     ARCHS ${ASAN_SUPPORTED_ARCH}
     SOURCES ${ASAN_CXX_SOURCES} CFLAGS ${ASAN_CFLAGS}
     DEFS ${ASAN_COMMON_DEFINITIONS})
-  add_compiler_rt_object_libraries(RTAsan_preinit 
+  add_compiler_rt_object_libraries(RTAsan_preinit
     ARCHS ${ASAN_SUPPORTED_ARCH}
     SOURCES ${ASAN_PREINIT_SOURCES} CFLAGS ${ASAN_CFLAGS}
     DEFS ${ASAN_COMMON_DEFINITIONS})
@@ -105,6 +107,8 @@
 
 # Build ASan runtimes shipped with Clang.
 add_custom_target(asan)
+set_target_properties(asan PROPERTIES FOLDER "Compiler-RT Misc")
+
 if(APPLE)
   add_compiler_rt_runtime(clang_rt.asan
     SHARED
@@ -121,40 +125,40 @@
     PARENT_TARGET asan)
 else()
   # Build separate libraries for each target.
-  
-    set(ASAN_COMMON_RUNTIME_OBJECT_LIBS
-      RTInterception
-      RTSanitizerCommon
-      RTSanitizerCommonLibc
-      RTLSanCommon
-      RTUbsan)
 
-    add_compiler_rt_runtime(clang_rt.asan
-      STATIC
-      ARCHS ${ASAN_SUPPORTED_ARCH}
-      OBJECT_LIBS RTAsan_preinit
-                  RTAsan
-                  ${ASAN_COMMON_RUNTIME_OBJECT_LIBS}
-      CFLAGS ${ASAN_CFLAGS}
-      DEFS ${ASAN_COMMON_DEFINITIONS}
-      PARENT_TARGET asan)
+  set(ASAN_COMMON_RUNTIME_OBJECT_LIBS
+    RTInterception
+    RTSanitizerCommon
+    RTSanitizerCommonLibc
+    RTLSanCommon
+    RTUbsan)
 
-    add_compiler_rt_runtime(clang_rt.asan_cxx
-      STATIC
-      ARCHS ${ASAN_SUPPORTED_ARCH}
-      OBJECT_LIBS RTAsan_cxx
-                  RTUbsan_cxx
-      CFLAGS ${ASAN_CFLAGS}
-      DEFS ${ASAN_COMMON_DEFINITIONS}
-      PARENT_TARGET asan)
+  add_compiler_rt_runtime(clang_rt.asan
+    STATIC
+    ARCHS ${ASAN_SUPPORTED_ARCH}
+    OBJECT_LIBS RTAsan_preinit
+                RTAsan
+                ${ASAN_COMMON_RUNTIME_OBJECT_LIBS}
+    CFLAGS ${ASAN_CFLAGS}
+    DEFS ${ASAN_COMMON_DEFINITIONS}
+    PARENT_TARGET asan)
 
-    add_compiler_rt_runtime(clang_rt.asan-preinit
-      STATIC
-      ARCHS ${ASAN_SUPPORTED_ARCH}
-      OBJECT_LIBS RTAsan_preinit
-      CFLAGS ${ASAN_CFLAGS}
-      DEFS ${ASAN_COMMON_DEFINITIONS}
-      PARENT_TARGET asan)
+  add_compiler_rt_runtime(clang_rt.asan_cxx
+    STATIC
+    ARCHS ${ASAN_SUPPORTED_ARCH}
+    OBJECT_LIBS RTAsan_cxx
+                RTUbsan_cxx
+    CFLAGS ${ASAN_CFLAGS}
+    DEFS ${ASAN_COMMON_DEFINITIONS}
+    PARENT_TARGET asan)
+
+  add_compiler_rt_runtime(clang_rt.asan-preinit
+    STATIC
+    ARCHS ${ASAN_SUPPORTED_ARCH}
+    OBJECT_LIBS RTAsan_preinit
+    CFLAGS ${ASAN_CFLAGS}
+    DEFS ${ASAN_COMMON_DEFINITIONS}
+    PARENT_TARGET asan)
 
   foreach(arch ${ASAN_SUPPORTED_ARCH})
     if (UNIX AND NOT ${arch} MATCHES "i386|i686")
@@ -165,8 +169,8 @@
            -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.asan-dynamic-${arch}.vers)
       set_source_files_properties(
         ${CMAKE_CURRENT_BINARY_DIR}/dummy.cc
-	PROPERTIES
-	OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang_rt.asan-dynamic-${arch}.vers)
+        PROPERTIES
+        OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang_rt.asan-dynamic-${arch}.vers)
     else()
       set(VERSION_SCRIPT_FLAG)
     endif()
@@ -194,7 +198,7 @@
         ARCHS ${arch})
       add_dependencies(asan clang_rt.asan_cxx-${arch}-symbols)
       add_sanitizer_rt_symbols(clang_rt.asan
-        ARCHS ${arch} 
+        ARCHS ${arch}
         EXTRA asan.syms.extra)
       add_dependencies(asan clang_rt.asan-${arch}-symbols)
     endif()
@@ -219,8 +223,7 @@
   endforeach()
 endif()
 
-add_compiler_rt_resource_file(asan_blacklist asan_blacklist.txt)
-add_dependencies(asan asan_blacklist)
+add_compiler_rt_resource_file(asan_blacklist asan_blacklist.txt asan)
 add_dependencies(compiler-rt asan)
 
 add_subdirectory(scripts)
diff --git a/lib/asan/Makefile.mk b/lib/asan/Makefile.mk
deleted file mode 100644
index 0dafefc..0000000
--- a/lib/asan/Makefile.mk
+++ /dev/null
@@ -1,29 +0,0 @@
-#===- lib/asan/Makefile.mk ---------------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := asan
-SubDirs := 
-
-CCSources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
-CXXOnlySources := asan_new_delete.cc
-COnlySources := $(filter-out $(CXXOnlySources),$(CCSources))
-SSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(CCSources) $(SSources)
-ObjNames := $(CCSources:%.cc=%.o) $(SSources:%.S=%.o)
-
-Implementation := Generic
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard $(Dir)/*.h)
-Dependencies += $(wildcard $(Dir)/../interception/*.h)
-Dependencies += $(wildcard $(Dir)/../sanitizer_common/*.h)
-
-# Define a convenience variable for all the asan functions.
-AsanFunctions := $(COnlySources:%.cc=%) $(SSources:%.S=%)
-AsanCXXFunctions := $(CXXOnlySources:%.cc=%)
diff --git a/lib/asan/asan_activation.cc b/lib/asan/asan_activation.cc
index 9df3b97..a5ace85 100644
--- a/lib/asan/asan_activation.cc
+++ b/lib/asan/asan_activation.cc
@@ -47,6 +47,7 @@
     FlagParser parser;
     RegisterActivationFlags(&parser, &f, &cf);
 
+    cf.SetDefaults();
     // Copy the current activation flags.
     allocator_options.CopyTo(&f, &cf);
     cf.malloc_context_size = malloc_context_size;
@@ -61,7 +62,7 @@
       parser.ParseString(env);
     }
 
-    SetVerbosity(cf.verbosity);
+    InitializeCommonFlags(&cf);
 
     if (Verbosity()) ReportUnrecognizedFlags();
 
diff --git a/lib/asan/asan_allocator.cc b/lib/asan/asan_allocator.cc
index 56f184a..6a5d227 100644
--- a/lib/asan/asan_allocator.cc
+++ b/lib/asan/asan_allocator.cc
@@ -223,7 +223,7 @@
 
 struct Allocator {
   static const uptr kMaxAllowedMallocSize =
-      FIRST_32_SECOND_64(3UL << 30, 1UL << 40);
+      FIRST_32_SECOND_64(3UL << 30, 1ULL << 40);
   static const uptr kMaxThreadLocalQuarantine =
       FIRST_32_SECOND_64(1 << 18, 1 << 20);
 
@@ -457,29 +457,28 @@
     return res;
   }
 
-  void AtomicallySetQuarantineFlag(AsanChunk *m, void *ptr,
+  // Set quarantine flag if chunk is allocated, issue ASan error report on
+  // available and quarantined chunks. Return true on success, false otherwise.
+  bool AtomicallySetQuarantineFlagIfAllocated(AsanChunk *m, void *ptr,
                                    BufferedStackTrace *stack) {
     u8 old_chunk_state = CHUNK_ALLOCATED;
     // Flip the chunk_state atomically to avoid race on double-free.
-    if (!atomic_compare_exchange_strong((atomic_uint8_t*)m, &old_chunk_state,
-                                        CHUNK_QUARANTINE, memory_order_acquire))
+    if (!atomic_compare_exchange_strong((atomic_uint8_t *)m, &old_chunk_state,
+                                        CHUNK_QUARANTINE,
+                                        memory_order_acquire)) {
       ReportInvalidFree(ptr, old_chunk_state, stack);
+      // It's not safe to push a chunk in quarantine on invalid free.
+      return false;
+    }
     CHECK_EQ(CHUNK_ALLOCATED, old_chunk_state);
+    return true;
   }
 
   // Expects the chunk to already be marked as quarantined by using
-  // AtomicallySetQuarantineFlag.
+  // AtomicallySetQuarantineFlagIfAllocated.
   void QuarantineChunk(AsanChunk *m, void *ptr, BufferedStackTrace *stack,
                        AllocType alloc_type) {
     CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE);
-
-    if (m->alloc_type != alloc_type) {
-      if (atomic_load(&alloc_dealloc_mismatch, memory_order_acquire)) {
-        ReportAllocTypeMismatch((uptr)ptr, stack, (AllocType)m->alloc_type,
-                                (AllocType)alloc_type);
-      }
-    }
-
     CHECK_GE(m->alloc_tid, 0);
     if (SANITIZER_WORDSIZE == 64)  // On 32-bits this resides in user area.
       CHECK_EQ(m->free_tid, kInvalidTid);
@@ -516,13 +515,24 @@
 
     uptr chunk_beg = p - kChunkHeaderSize;
     AsanChunk *m = reinterpret_cast<AsanChunk *>(chunk_beg);
-    if (delete_size && flags()->new_delete_type_mismatch &&
-        delete_size != m->UsedSize()) {
-      ReportNewDeleteSizeMismatch(p, delete_size, stack);
-    }
+
     ASAN_FREE_HOOK(ptr);
     // Must mark the chunk as quarantined before any changes to its metadata.
-    AtomicallySetQuarantineFlag(m, ptr, stack);
+    // Do not quarantine given chunk if we failed to set CHUNK_QUARANTINE flag.
+    if (!AtomicallySetQuarantineFlagIfAllocated(m, ptr, stack)) return;
+
+    if (m->alloc_type != alloc_type) {
+      if (atomic_load(&alloc_dealloc_mismatch, memory_order_acquire)) {
+        ReportAllocTypeMismatch((uptr)ptr, stack, (AllocType)m->alloc_type,
+                                (AllocType)alloc_type);
+      }
+    }
+
+    if (delete_size && flags()->new_delete_type_mismatch &&
+        delete_size != m->UsedSize()) {
+      ReportNewDeleteSizeMismatch(p, m->UsedSize(), delete_size, stack);
+    }
+
     QuarantineChunk(m, ptr, stack, alloc_type);
   }
 
@@ -655,6 +665,9 @@
 bool AsanChunkView::IsValid() {
   return chunk_ && chunk_->chunk_state != CHUNK_AVAILABLE;
 }
+bool AsanChunkView::IsAllocated() {
+  return chunk_ && chunk_->chunk_state == CHUNK_ALLOCATED;
+}
 uptr AsanChunkView::Beg() { return chunk_->Beg(); }
 uptr AsanChunkView::End() { return Beg() + UsedSize(); }
 uptr AsanChunkView::UsedSize() { return chunk_->UsedSize(); }
@@ -668,12 +681,15 @@
   return res;
 }
 
+u32 AsanChunkView::GetAllocStackId() { return chunk_->alloc_context_id; }
+u32 AsanChunkView::GetFreeStackId() { return chunk_->free_context_id; }
+
 StackTrace AsanChunkView::GetAllocStack() {
-  return GetStackTraceFromId(chunk_->alloc_context_id);
+  return GetStackTraceFromId(GetAllocStackId());
 }
 
 StackTrace AsanChunkView::GetFreeStack() {
-  return GetStackTraceFromId(chunk_->free_context_id);
+  return GetStackTraceFromId(GetFreeStackId());
 }
 
 void InitializeAllocator(const AllocatorOptions &options) {
@@ -754,7 +770,7 @@
   return 0;
 }
 
-uptr asan_malloc_usable_size(void *ptr, uptr pc, uptr bp) {
+uptr asan_malloc_usable_size(const void *ptr, uptr pc, uptr bp) {
   if (!ptr) return 0;
   uptr usable_size = instance.AllocationSize(reinterpret_cast<uptr>(ptr));
   if (flags()->check_malloc_usable_size && (usable_size == 0)) {
diff --git a/lib/asan/asan_allocator.h b/lib/asan/asan_allocator.h
index e3d5333..2f9f7aa 100644
--- a/lib/asan/asan_allocator.h
+++ b/lib/asan/asan_allocator.h
@@ -49,14 +49,17 @@
 class AsanChunkView {
  public:
   explicit AsanChunkView(AsanChunk *chunk) : chunk_(chunk) {}
-  bool IsValid();   // Checks if AsanChunkView points to a valid allocated
-                    // or quarantined chunk.
-  uptr Beg();       // First byte of user memory.
-  uptr End();       // Last byte of user memory.
-  uptr UsedSize();  // Size requested by the user.
+  bool IsValid();        // Checks if AsanChunkView points to a valid allocated
+                         // or quarantined chunk.
+  bool IsAllocated();    // Checks if the memory is currently allocated.
+  uptr Beg();            // First byte of user memory.
+  uptr End();            // Last byte of user memory.
+  uptr UsedSize();       // Size requested by the user.
   uptr AllocTid();
   uptr FreeTid();
   bool Eq(const AsanChunkView &c) const { return chunk_ == c.chunk_; }
+  u32 GetAllocStackId();
+  u32 GetFreeStackId();
   StackTrace GetAllocStack();
   StackTrace GetFreeStack();
   bool AddrIsInside(uptr addr, uptr access_size, sptr *offset) {
@@ -171,7 +174,7 @@
 
 int asan_posix_memalign(void **memptr, uptr alignment, uptr size,
                         BufferedStackTrace *stack);
-uptr asan_malloc_usable_size(void *ptr, uptr pc, uptr bp);
+uptr asan_malloc_usable_size(const void *ptr, uptr pc, uptr bp);
 
 uptr asan_mz_size(const void *ptr);
 void asan_mz_force_lock();
diff --git a/lib/asan/asan_debugging.cc b/lib/asan/asan_debugging.cc
index 7c3a8a7..b7481ff 100644
--- a/lib/asan/asan_debugging.cc
+++ b/lib/asan/asan_debugging.cc
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "asan_allocator.h"
+#include "asan_descriptions.h"
 #include "asan_flags.h"
 #include "asan_internal.h"
 #include "asan_mapping.h"
@@ -65,7 +66,9 @@
 }
 
 void AsanLocateAddress(uptr addr, AddressDescription *descr) {
-  if (DescribeAddressIfShadow(addr, descr, /* print */ false)) {
+  ShadowAddressDescription shadow_descr;
+  if (GetShadowAddressInformation(addr, &shadow_descr)) {
+    descr->region_kind = ShadowNames[shadow_descr.kind];
     return;
   }
   if (GetInfoForAddressIfGlobal(addr, descr)) {
diff --git a/lib/asan/asan_descriptions.cc b/lib/asan/asan_descriptions.cc
new file mode 100644
index 0000000..9b39360
--- /dev/null
+++ b/lib/asan/asan_descriptions.cc
@@ -0,0 +1,104 @@
+//===-- asan_descriptions.cc ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// ASan functions for getting information about an address and/or printing it.
+//===----------------------------------------------------------------------===//
+
+#include "asan_descriptions.h"
+#include "asan_mapping.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+
+namespace __asan {
+
+// Return " (thread_name) " or an empty string if the name is empty.
+const char *ThreadNameWithParenthesis(AsanThreadContext *t, char buff[],
+                                      uptr buff_len) {
+  const char *name = t->name;
+  if (name[0] == '\0') return "";
+  buff[0] = 0;
+  internal_strncat(buff, " (", 3);
+  internal_strncat(buff, name, buff_len - 4);
+  internal_strncat(buff, ")", 2);
+  return buff;
+}
+
+const char *ThreadNameWithParenthesis(u32 tid, char buff[], uptr buff_len) {
+  if (tid == kInvalidTid) return "";
+  asanThreadRegistry().CheckLocked();
+  AsanThreadContext *t = GetThreadContextByTidLocked(tid);
+  return ThreadNameWithParenthesis(t, buff, buff_len);
+}
+
+void DescribeThread(AsanThreadContext *context) {
+  CHECK(context);
+  asanThreadRegistry().CheckLocked();
+  // No need to announce the main thread.
+  if (context->tid == 0 || context->announced) {
+    return;
+  }
+  context->announced = true;
+  char tname[128];
+  InternalScopedString str(1024);
+  str.append("Thread T%d%s", context->tid,
+             ThreadNameWithParenthesis(context->tid, tname, sizeof(tname)));
+  if (context->parent_tid == kInvalidTid) {
+    str.append(" created by unknown thread\n");
+    Printf("%s", str.data());
+    return;
+  }
+  str.append(
+      " created by T%d%s here:\n", context->parent_tid,
+      ThreadNameWithParenthesis(context->parent_tid, tname, sizeof(tname)));
+  Printf("%s", str.data());
+  StackDepotGet(context->stack_id).Print();
+  // Recursively described parent thread if needed.
+  if (flags()->print_full_thread_history) {
+    AsanThreadContext *parent_context =
+        GetThreadContextByTidLocked(context->parent_tid);
+    DescribeThread(parent_context);
+  }
+}
+
+// Shadow descriptions
+static bool GetShadowKind(uptr addr, ShadowKind *shadow_kind) {
+  CHECK(!AddrIsInMem(addr));
+  if (AddrIsInShadowGap(addr)) {
+    *shadow_kind = kShadowKindGap;
+  } else if (AddrIsInHighShadow(addr)) {
+    *shadow_kind = kShadowKindHigh;
+  } else if (AddrIsInLowShadow(addr)) {
+    *shadow_kind = kShadowKindLow;
+  } else {
+    CHECK(0 && "Address is not in memory and not in shadow?");
+    return false;
+  }
+  return true;
+}
+
+bool DescribeAddressIfShadow(uptr addr) {
+  ShadowAddressDescription descr;
+  if (!GetShadowAddressInformation(addr, &descr)) return false;
+  Printf("Address %p is located in the %s area.\n", addr,
+         ShadowNames[descr.kind]);
+  return true;
+}
+
+bool GetShadowAddressInformation(uptr addr, ShadowAddressDescription *descr) {
+  if (AddrIsInMem(addr)) return false;
+  ShadowKind shadow_kind;
+  if (!GetShadowKind(addr, &shadow_kind)) return false;
+  if (shadow_kind != kShadowKindGap) descr->shadow_byte = *(u8 *)addr;
+  descr->addr = addr;
+  descr->kind = shadow_kind;
+  return true;
+}
+
+}  // namespace __asan
diff --git a/lib/asan/asan_descriptions.h b/lib/asan/asan_descriptions.h
new file mode 100644
index 0000000..e93adec
--- /dev/null
+++ b/lib/asan/asan_descriptions.h
@@ -0,0 +1,97 @@
+//===-- asan_descriptions.h -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// ASan-private header for asan_descriptions.cc.
+// TODO(filcab): Most struct definitions should move to the interface headers.
+//===----------------------------------------------------------------------===//
+
+#include "asan_internal.h"
+#include "asan_thread.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_report_decorator.h"
+
+namespace __asan {
+
+void DescribeThread(AsanThreadContext *context);
+static inline void DescribeThread(AsanThread *t) {
+  if (t) DescribeThread(t->context());
+}
+const char *ThreadNameWithParenthesis(AsanThreadContext *t, char buff[],
+                                      uptr buff_len);
+const char *ThreadNameWithParenthesis(u32 tid, char buff[], uptr buff_len);
+
+class Decorator : public __sanitizer::SanitizerCommonDecorator {
+ public:
+  Decorator() : SanitizerCommonDecorator() {}
+  const char *Access() { return Blue(); }
+  const char *EndAccess() { return Default(); }
+  const char *Location() { return Green(); }
+  const char *EndLocation() { return Default(); }
+  const char *Allocation() { return Magenta(); }
+  const char *EndAllocation() { return Default(); }
+
+  const char *ShadowByte(u8 byte) {
+    switch (byte) {
+      case kAsanHeapLeftRedzoneMagic:
+      case kAsanHeapRightRedzoneMagic:
+      case kAsanArrayCookieMagic:
+        return Red();
+      case kAsanHeapFreeMagic:
+        return Magenta();
+      case kAsanStackLeftRedzoneMagic:
+      case kAsanStackMidRedzoneMagic:
+      case kAsanStackRightRedzoneMagic:
+      case kAsanStackPartialRedzoneMagic:
+        return Red();
+      case kAsanStackAfterReturnMagic:
+        return Magenta();
+      case kAsanInitializationOrderMagic:
+        return Cyan();
+      case kAsanUserPoisonedMemoryMagic:
+      case kAsanContiguousContainerOOBMagic:
+      case kAsanAllocaLeftMagic:
+      case kAsanAllocaRightMagic:
+        return Blue();
+      case kAsanStackUseAfterScopeMagic:
+        return Magenta();
+      case kAsanGlobalRedzoneMagic:
+        return Red();
+      case kAsanInternalHeapMagic:
+        return Yellow();
+      case kAsanIntraObjectRedzone:
+        return Yellow();
+      default:
+        return Default();
+    }
+  }
+  const char *EndShadowByte() { return Default(); }
+  const char *MemoryByte() { return Magenta(); }
+  const char *EndMemoryByte() { return Default(); }
+};
+
+enum ShadowKind : u8 {
+  kShadowKindLow,
+  kShadowKindGap,
+  kShadowKindHigh,
+};
+static const char *const ShadowNames[] = {"low shadow", "shadow gap",
+                                          "high shadow"};
+
+struct ShadowAddressDescription {
+  uptr addr;
+  ShadowKind kind;
+  u8 shadow_byte;
+};
+
+bool GetShadowAddressInformation(uptr addr, ShadowAddressDescription *descr);
+bool DescribeAddressIfShadow(uptr addr);
+
+}  // namespace __asan
diff --git a/lib/asan/asan_fake_stack.cc b/lib/asan/asan_fake_stack.cc
index 91fdf0a..017b7d2 100644
--- a/lib/asan/asan_fake_stack.cc
+++ b/lib/asan/asan_fake_stack.cc
@@ -31,7 +31,7 @@
   CHECK_EQ(SHADOW_SCALE, 3);  // This code expects SHADOW_SCALE=3.
   u64 *shadow = reinterpret_cast<u64*>(MemToShadow(ptr));
   if (class_id <= 6) {
-    for (uptr i = 0; i < (1U << class_id); i++) {
+    for (uptr i = 0; i < (((uptr)1) << class_id); i++) {
       shadow[i] = magic;
       // Make sure this does not become memset.
       SanitizerBreakOptimization(nullptr);
@@ -100,7 +100,7 @@
     // if the signal arrives between checking and setting flags[pos], the
     // signal handler's fake stack will start from a different hint_position
     // and so will not touch this particular byte. So, it is safe to do this
-    // with regular non-atimic load and store (at least I was not able to make
+    // with regular non-atomic load and store (at least I was not able to make
     // this code crash).
     if (flags[pos]) continue;
     flags[pos] = 1;
@@ -121,7 +121,7 @@
   uptr class_id = (ptr - beg) >> stack_size_log;
   uptr base = beg + (class_id << stack_size_log);
   CHECK_LE(base, ptr);
-  CHECK_LT(ptr, base + (1UL << stack_size_log));
+  CHECK_LT(ptr, base + (((uptr)1) << stack_size_log));
   uptr pos = (ptr - base) >> (kMinStackFrameSizeLog + class_id);
   uptr res = base + pos * BytesInSizeClass(class_id);
   *frame_end = res + BytesInSizeClass(class_id);
diff --git a/lib/asan/asan_fake_stack.h b/lib/asan/asan_fake_stack.h
index 3b1d9eb..da9a91c 100644
--- a/lib/asan/asan_fake_stack.h
+++ b/lib/asan/asan_fake_stack.h
@@ -52,7 +52,7 @@
 // Allocate() flips the appropriate allocation flag atomically, thus achieving
 // async-signal safety.
 // This allocator does not have quarantine per se, but it tries to allocate the
-// frames in round robin fasion to maximize the delay between a deallocation
+// frames in round robin fashion to maximize the delay between a deallocation
 // and the next allocation.
 class FakeStack {
   static const uptr kMinStackFrameSizeLog = 6;  // Min frame is 64B.
@@ -69,12 +69,12 @@
 
   // stack_size_log is at least 15 (stack_size >= 32K).
   static uptr SizeRequiredForFlags(uptr stack_size_log) {
-    return 1UL << (stack_size_log + 1 - kMinStackFrameSizeLog);
+    return ((uptr)1) << (stack_size_log + 1 - kMinStackFrameSizeLog);
   }
 
   // Each size class occupies stack_size bytes.
   static uptr SizeRequiredForFrames(uptr stack_size_log) {
-    return (1ULL << stack_size_log) * kNumberOfSizeClasses;
+    return (((uptr)1) << stack_size_log) * kNumberOfSizeClasses;
   }
 
   // Number of bytes requires for the whole object.
@@ -91,20 +91,20 @@
   // and so on.
   static uptr FlagsOffset(uptr stack_size_log, uptr class_id) {
     uptr t = kNumberOfSizeClasses - 1 - class_id;
-    const uptr all_ones = (1 << (kNumberOfSizeClasses - 1)) - 1;
+    const uptr all_ones = (((uptr)1) << (kNumberOfSizeClasses - 1)) - 1;
     return ((all_ones >> t) << t) << (stack_size_log - 15);
   }
 
   static uptr NumberOfFrames(uptr stack_size_log, uptr class_id) {
-    return 1UL << (stack_size_log - kMinStackFrameSizeLog - class_id);
+    return ((uptr)1) << (stack_size_log - kMinStackFrameSizeLog - class_id);
   }
 
-  // Divide n by the numbe of frames in size class.
+  // Divide n by the number of frames in size class.
   static uptr ModuloNumberOfFrames(uptr stack_size_log, uptr class_id, uptr n) {
     return n & (NumberOfFrames(stack_size_log, class_id) - 1);
   }
 
-  // The the pointer to the flags of the given class_id.
+  // The pointer to the flags of the given class_id.
   u8 *GetFlags(uptr stack_size_log, uptr class_id) {
     return reinterpret_cast<u8 *>(this) + kFlagsOffset +
            FlagsOffset(stack_size_log, class_id);
@@ -114,7 +114,8 @@
   u8 *GetFrame(uptr stack_size_log, uptr class_id, uptr pos) {
     return reinterpret_cast<u8 *>(this) + kFlagsOffset +
            SizeRequiredForFlags(stack_size_log) +
-           (1 << stack_size_log) * class_id + BytesInSizeClass(class_id) * pos;
+           (((uptr)1) << stack_size_log) * class_id +
+           BytesInSizeClass(class_id) * pos;
   }
 
   // Allocate the fake frame.
@@ -137,7 +138,7 @@
 
   // Number of bytes in a fake frame of this size class.
   static uptr BytesInSizeClass(uptr class_id) {
-    return 1UL << (class_id + kMinStackFrameSizeLog);
+    return ((uptr)1) << (class_id + kMinStackFrameSizeLog);
   }
 
   // The fake frame is guaranteed to have a right redzone.
@@ -159,7 +160,7 @@
   static const uptr kFlagsOffset = 4096;  // This is were the flags begin.
   // Must match the number of uses of DEFINE_STACK_MALLOC_FREE_WITH_CLASS_ID
   COMPILER_CHECK(kNumberOfSizeClasses == 11);
-  static const uptr kMaxStackMallocSize = 1 << kMaxStackFrameSizeLog;
+  static const uptr kMaxStackMallocSize = ((uptr)1) << kMaxStackFrameSizeLog;
 
   uptr hint_position_[kNumberOfSizeClasses];
   uptr stack_size_log_;
diff --git a/lib/asan/asan_flags.cc b/lib/asan/asan_flags.cc
index 363ee67..345a35c 100644
--- a/lib/asan/asan_flags.cc
+++ b/lib/asan/asan_flags.cc
@@ -116,7 +116,7 @@
   ubsan_parser.ParseString(GetEnv("UBSAN_OPTIONS"));
 #endif
 
-  SetVerbosity(common_flags()->verbosity);
+  InitializeCommonFlags();
 
   // TODO(eugenis): dump all flags at verbosity>=2?
   if (Verbosity()) ReportUnrecognizedFlags();
@@ -159,6 +159,14 @@
         (ASAN_LOW_MEMORY) ? 1UL << 6 : 1UL << 8;
     f->quarantine_size_mb = kDefaultQuarantineSizeMb;
   }
+  if (!f->replace_str && common_flags()->intercept_strlen) {
+    Report("WARNING: strlen interceptor is enabled even though replace_str=0. "
+           "Use intercept_strlen=0 to disable it.");
+  }
+  if (!f->replace_str && common_flags()->intercept_strchr) {
+    Report("WARNING: strchr* interceptors are enabled even though "
+           "replace_str=0. Use intercept_strchr=0 to disable them.");
+  }
 }
 
 }  // namespace __asan
diff --git a/lib/asan/asan_flags.inc b/lib/asan/asan_flags.inc
index 5e69242..805002a 100644
--- a/lib/asan/asan_flags.inc
+++ b/lib/asan/asan_flags.inc
@@ -43,9 +43,11 @@
     "If set, uses custom wrappers and replacements for libc string functions "
     "to find more errors.")
 ASAN_FLAG(bool, replace_intrin, true,
-          "If set, uses custom wrappers for memset/memcpy/memmove intinsics.")
+          "If set, uses custom wrappers for memset/memcpy/memmove intrinsics.")
 ASAN_FLAG(bool, detect_stack_use_after_return, false,
           "Enables stack-use-after-return checking at run-time.")
+ASAN_FLAG(bool, detect_stack_use_after_scope, true,
+          "Enables stack-use-after-scope checking at run-time.")
 ASAN_FLAG(int, min_uar_stack_size_log, 16, // We can't do smaller anyway.
           "Minimum fake stack size log.")
 ASAN_FLAG(int, max_uar_stack_size_log,
@@ -77,6 +79,8 @@
           "Print various statistics after printing an error message or if "
           "atexit=1.")
 ASAN_FLAG(bool, print_legend, true, "Print the legend for the shadow bytes.")
+ASAN_FLAG(bool, print_scariness, false,
+          "Print the scariness score. Experimental.")
 ASAN_FLAG(bool, atexit, false,
           "If set, prints ASan exit stats even after program terminates "
           "successfully.")
@@ -104,7 +108,7 @@
           "Report errors on malloc/delete, new/free, new/delete[], etc.")
 
 ASAN_FLAG(bool, new_delete_type_mismatch, true,
-          "Report errors on mismatch betwen size of new and delete.")
+          "Report errors on mismatch between size of new and delete.")
 ASAN_FLAG(
     bool, strict_init_order, false,
     "If true, assume that dynamic initializers can never access globals from "
@@ -135,3 +139,5 @@
 ASAN_FLAG(bool, halt_on_error, true,
           "Crash the program after printing the first error report "
           "(WARNING: USE AT YOUR OWN RISK!)")
+ASAN_FLAG(bool, use_odr_indicator, false,
+          "Use special ODR indicator symbol for ODR violation detection")
diff --git a/lib/asan/asan_globals.cc b/lib/asan/asan_globals.cc
index 77e10ea..f185761 100644
--- a/lib/asan/asan_globals.cc
+++ b/lib/asan/asan_globals.cc
@@ -135,6 +135,70 @@
   return false;
 }
 
+enum GlobalSymbolState {
+  UNREGISTERED = 0,
+  REGISTERED = 1
+};
+
+// Check ODR violation for given global G via special ODR indicator. We use
+// this method in case compiler instruments global variables through their
+// local aliases.
+static void CheckODRViolationViaIndicator(const Global *g) {
+  u8 *odr_indicator = reinterpret_cast<u8 *>(g->odr_indicator);
+  if (*odr_indicator == UNREGISTERED) {
+    *odr_indicator = REGISTERED;
+    return;
+  }
+  // If *odr_indicator is DEFINED, some module have already registered
+  // externally visible symbol with the same name. This is an ODR violation.
+  for (ListOfGlobals *l = list_of_all_globals; l; l = l->next) {
+    if (g->odr_indicator == l->g->odr_indicator &&
+        (flags()->detect_odr_violation >= 2 || g->size != l->g->size) &&
+        !IsODRViolationSuppressed(g->name))
+      ReportODRViolation(g, FindRegistrationSite(g),
+                         l->g, FindRegistrationSite(l->g));
+  }
+}
+
+// Check ODR violation for given global G by checking if it's already poisoned.
+// We use this method in case compiler doesn't use private aliases for global
+// variables.
+static void CheckODRViolationViaPoisoning(const Global *g) {
+  if (__asan_region_is_poisoned(g->beg, g->size_with_redzone)) {
+    // This check may not be enough: if the first global is much larger
+    // the entire redzone of the second global may be within the first global.
+    for (ListOfGlobals *l = list_of_all_globals; l; l = l->next) {
+      if (g->beg == l->g->beg &&
+          (flags()->detect_odr_violation >= 2 || g->size != l->g->size) &&
+          !IsODRViolationSuppressed(g->name))
+        ReportODRViolation(g, FindRegistrationSite(g),
+                           l->g, FindRegistrationSite(l->g));
+    }
+  }
+}
+
+// Clang provides two different ways for global variables protection:
+// it can poison the global itself or its private alias. In former
+// case we may poison same symbol multiple times, that can help us to
+// cheaply detect ODR violation: if we try to poison an already poisoned
+// global, we have ODR violation error.
+// In latter case, we poison each symbol exactly once, so we use special
+// indicator symbol to perform similar check.
+// In either case, compiler provides a special odr_indicator field to Global
+// structure, that can contain two kinds of values:
+//   1) Non-zero value. In this case, odr_indicator is an address of
+//      corresponding indicator variable for given global.
+//   2) Zero. This means that we don't use private aliases for global variables
+//      and can freely check ODR violation with the first method.
+//
+// This routine chooses between two different methods of ODR violation
+// detection.
+static inline bool UseODRIndicator(const Global *g) {
+  // Use ODR indicator method iff use_odr_indicator flag is set and
+  // indicator symbol address is not 0.
+  return flags()->use_odr_indicator && g->odr_indicator > 0;
+}
+
 // Register a global variable.
 // This function may be called more than once for every global
 // so we store the globals in a map.
@@ -144,22 +208,24 @@
     ReportGlobal(*g, "Added");
   CHECK(flags()->report_globals);
   CHECK(AddrIsInMem(g->beg));
-  CHECK(AddrIsAlignedByGranularity(g->beg));
+  if (!AddrIsAlignedByGranularity(g->beg)) {
+    Report("The following global variable is not properly aligned.\n");
+    Report("This may happen if another global with the same name\n");
+    Report("resides in another non-instrumented module.\n");
+    Report("Or the global comes from a C file built w/o -fno-common.\n");
+    Report("In either case this is likely an ODR violation bug,\n");
+    Report("but AddressSanitizer can not provide more details.\n");
+    ReportODRViolation(g, FindRegistrationSite(g), g, FindRegistrationSite(g));
+    CHECK(AddrIsAlignedByGranularity(g->beg));
+  }
   CHECK(AddrIsAlignedByGranularity(g->size_with_redzone));
   if (flags()->detect_odr_violation) {
     // Try detecting ODR (One Definition Rule) violation, i.e. the situation
     // where two globals with the same name are defined in different modules.
-    if (__asan_region_is_poisoned(g->beg, g->size_with_redzone)) {
-      // This check may not be enough: if the first global is much larger
-      // the entire redzone of the second global may be within the first global.
-      for (ListOfGlobals *l = list_of_all_globals; l; l = l->next) {
-        if (g->beg == l->g->beg &&
-            (flags()->detect_odr_violation >= 2 || g->size != l->g->size) &&
-            !IsODRViolationSuppressed(g->name))
-          ReportODRViolation(g, FindRegistrationSite(g),
-                             l->g, FindRegistrationSite(l->g));
-      }
-    }
+    if (UseODRIndicator(g))
+      CheckODRViolationViaIndicator(g);
+    else
+      CheckODRViolationViaPoisoning(g);
   }
   if (CanPoisonMemory())
     PoisonRedZones(*g);
@@ -190,6 +256,12 @@
   // We unpoison the shadow memory for the global but we do not remove it from
   // the list because that would require O(n^2) time with the current list
   // implementation. It might not be worth doing anyway.
+
+  // Release ODR indicator.
+  if (UseODRIndicator(g)) {
+    u8 *odr_indicator = reinterpret_cast<u8 *>(g->odr_indicator);
+    *odr_indicator = UNREGISTERED;
+  }
 }
 
 void StopInitOrderChecking() {
diff --git a/lib/asan/asan_init_version.h b/lib/asan/asan_init_version.h
index bc8a622..f48cc19 100644
--- a/lib/asan/asan_init_version.h
+++ b/lib/asan/asan_init_version.h
@@ -19,16 +19,20 @@
   // Every time the ASan ABI changes we also change the version number in the
   // __asan_init function name.  Objects built with incompatible ASan ABI
   // versions will not link with run-time.
+  //
   // Changes between ABI versions:
   // v1=>v2: added 'module_name' to __asan_global
   // v2=>v3: stack frame description (created by the compiler)
-  //         contains the function PC as the 3-rd field (see
-  //         DescribeAddressIfStack).
-  // v3=>v4: added '__asan_global_source_location' to __asan_global.
+  //         contains the function PC as the 3rd field (see
+  //         DescribeAddressIfStack)
+  // v3=>v4: added '__asan_global_source_location' to __asan_global
   // v4=>v5: changed the semantics and format of __asan_stack_malloc_ and
-  //         __asan_stack_free_ functions.
+  //         __asan_stack_free_ functions
   // v5=>v6: changed the name of the version check symbol
-  #define __asan_version_mismatch_check __asan_version_mismatch_check_v6
+  // v6=>v7: added 'odr_indicator' to __asan_global
+  // v7=>v8: added '__asan_(un)register_image_globals' functions for dead
+  //         stripping support on Mach-O platforms
+  #define __asan_version_mismatch_check __asan_version_mismatch_check_v8
 }
 
 #endif  // ASAN_INIT_VERSION_H
diff --git a/lib/asan/asan_interceptors.cc b/lib/asan/asan_interceptors.cc
index 712b6b9..8f38587 100644
--- a/lib/asan/asan_interceptors.cc
+++ b/lib/asan/asan_interceptors.cc
@@ -111,7 +111,7 @@
 } while (0)
 
 static inline uptr MaybeRealStrnlen(const char *s, uptr maxlen) {
-#if ASAN_INTERCEPT_STRNLEN
+#if SANITIZER_INTERCEPT_STRNLEN
   if (REAL(strnlen)) {
     return REAL(strnlen)(s, maxlen);
   }
@@ -144,6 +144,8 @@
   (void) ctx;                                                                  \
 
 #define COMMON_INTERCEPT_FUNCTION(name) ASAN_INTERCEPT_FUNC(name)
+#define COMMON_INTERCEPT_FUNCTION_VER(name, ver)                          \
+  ASAN_INTERCEPT_FUNC_VER(name, ver)
 #define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \
   ASAN_WRITE_RANGE(ctx, ptr, size)
 #define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size) \
@@ -196,6 +198,10 @@
   } else {                                                                     \
     *begin = *end = 0;                                                         \
   }
+// Asan needs custom handling of these:
+#undef SANITIZER_INTERCEPT_MEMSET
+#undef SANITIZER_INTERCEPT_MEMMOVE
+#undef SANITIZER_INTERCEPT_MEMCPY
 #include "sanitizer_common/sanitizer_common_interceptors.inc"
 
 // Syscall interceptors don't have contexts, we don't support suppressions
@@ -219,6 +225,7 @@
   atomic_uintptr_t is_registered;
 };
 
+#if ASAN_INTERCEPT_PTHREAD_CREATE
 static thread_return_t THREAD_CALLING_CONV asan_thread_start(void *arg) {
   ThreadStartParam *param = reinterpret_cast<ThreadStartParam *>(arg);
   AsanThread *t = nullptr;
@@ -229,7 +236,6 @@
   return t->ThreadStart(GetTid(), &param->is_registered);
 }
 
-#if ASAN_INTERCEPT_PTHREAD_CREATE
 INTERCEPTOR(int, pthread_create, void *thread,
     void *attr, void *(*start_routine)(void*), void *arg) {
   EnsureMainThreadIDIsCorrect();
@@ -282,7 +288,8 @@
 
 #if SANITIZER_ANDROID
 INTERCEPTOR(void*, bsd_signal, int signum, void *handler) {
-  if (!IsDeadlySignal(signum) || common_flags()->allow_user_segv_handler) {
+  if (!IsHandledDeadlySignal(signum) ||
+      common_flags()->allow_user_segv_handler) {
     return REAL(bsd_signal)(signum, handler);
   }
   return 0;
@@ -290,7 +297,8 @@
 #endif
 
 INTERCEPTOR(void*, signal, int signum, void *handler) {
-  if (!IsDeadlySignal(signum) || common_flags()->allow_user_segv_handler) {
+  if (!IsHandledDeadlySignal(signum) ||
+      common_flags()->allow_user_segv_handler) {
     return REAL(signal)(signum, handler);
   }
   return nullptr;
@@ -298,7 +306,8 @@
 
 INTERCEPTOR(int, sigaction, int signum, const struct sigaction *act,
                             struct sigaction *oldact) {
-  if (!IsDeadlySignal(signum) || common_flags()->allow_user_segv_handler) {
+  if (!IsHandledDeadlySignal(signum) ||
+      common_flags()->allow_user_segv_handler) {
     return REAL(sigaction)(signum, act, oldact);
   }
   return 0;
@@ -464,25 +473,6 @@
   ASAN_MEMSET_IMPL(ctx, block, c, size);
 }
 
-INTERCEPTOR(char*, strchr, const char *str, int c) {
-  void *ctx;
-  ASAN_INTERCEPTOR_ENTER(ctx, strchr);
-  if (UNLIKELY(!asan_inited)) return internal_strchr(str, c);
-  // strchr is called inside create_purgeable_zone() when MallocGuardEdges=1 is
-  // used.
-  if (asan_init_is_running) {
-    return REAL(strchr)(str, c);
-  }
-  ENSURE_ASAN_INITED();
-  char *result = REAL(strchr)(str, c);
-  if (flags()->replace_str) {
-    uptr len = REAL(strlen)(str);
-    uptr bytes_read = (result ? result - str : len) + 1;
-    ASAN_READ_STRING_OF_LEN(ctx, str, len, bytes_read);
-  }
-  return result;
-}
-
 #if ASAN_INTERCEPT_INDEX
 # if ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX
 INTERCEPTOR(char*, index, const char *string, int c)
@@ -560,7 +550,6 @@
   return REAL(strcpy)(to, from);  // NOLINT
 }
 
-#if ASAN_INTERCEPT_STRDUP
 INTERCEPTOR(char*, strdup, const char *s) {
   void *ctx;
   ASAN_INTERCEPTOR_ENTER(ctx, strdup);
@@ -575,29 +564,28 @@
   REAL(memcpy)(new_mem, s, length + 1);
   return reinterpret_cast<char*>(new_mem);
 }
-#endif
 
-INTERCEPTOR(SIZE_T, strlen, const char *s) {
+#if ASAN_INTERCEPT___STRDUP
+INTERCEPTOR(char*, __strdup, const char *s) {
   void *ctx;
-  ASAN_INTERCEPTOR_ENTER(ctx, strlen);
-  if (UNLIKELY(!asan_inited)) return internal_strlen(s);
-  // strlen is called from malloc_default_purgeable_zone()
-  // in __asan::ReplaceSystemAlloc() on Mac.
-  if (asan_init_is_running) {
-    return REAL(strlen)(s);
-  }
+  ASAN_INTERCEPTOR_ENTER(ctx, strdup);
+  if (UNLIKELY(!asan_inited)) return internal_strdup(s);
   ENSURE_ASAN_INITED();
-  SIZE_T length = REAL(strlen)(s);
+  uptr length = REAL(strlen)(s);
   if (flags()->replace_str) {
     ASAN_READ_RANGE(ctx, s, length + 1);
   }
-  return length;
+  GET_STACK_TRACE_MALLOC;
+  void *new_mem = asan_malloc(length + 1, &stack);
+  REAL(memcpy)(new_mem, s, length + 1);
+  return reinterpret_cast<char*>(new_mem);
 }
+#endif // ASAN_INTERCEPT___STRDUP
 
 INTERCEPTOR(SIZE_T, wcslen, const wchar_t *s) {
   void *ctx;
   ASAN_INTERCEPTOR_ENTER(ctx, wcslen);
-  SIZE_T length = REAL(wcslen)(s);
+  SIZE_T length = internal_wcslen(s);
   if (!asan_init_is_running) {
     ENSURE_ASAN_INITED();
     ASAN_READ_RANGE(ctx, s, (length + 1) * sizeof(wchar_t));
@@ -618,19 +606,6 @@
   return REAL(strncpy)(to, from, size);
 }
 
-#if ASAN_INTERCEPT_STRNLEN
-INTERCEPTOR(uptr, strnlen, const char *s, uptr maxlen) {
-  void *ctx;
-  ASAN_INTERCEPTOR_ENTER(ctx, strnlen);
-  ENSURE_ASAN_INITED();
-  uptr length = REAL(strnlen)(s, maxlen);
-  if (flags()->replace_str) {
-    ASAN_READ_RANGE(ctx, s, Min(length + 1, maxlen));
-  }
-  return length;
-}
-#endif  // ASAN_INTERCEPT_STRNLEN
-
 INTERCEPTOR(long, strtol, const char *nptr,  // NOLINT
             char **endptr, int base) {
   void *ctx;
@@ -713,12 +688,12 @@
 }
 #endif  // ASAN_INTERCEPT_ATOLL_AND_STRTOLL
 
+#if ASAN_INTERCEPT___CXA_ATEXIT
 static void AtCxaAtexit(void *unused) {
   (void)unused;
   StopInitOrderChecking();
 }
 
-#if ASAN_INTERCEPT___CXA_ATEXIT
 INTERCEPTOR(int, __cxa_atexit, void (*func)(void *), void *arg,
             void *dso_handle) {
 #if SANITIZER_MAC
@@ -745,7 +720,7 @@
 namespace __asan {
 void InitializeAsanInterceptors() {
   static bool was_called_once;
-  CHECK(was_called_once == false);
+  CHECK(!was_called_once);
   was_called_once = true;
   InitializeCommonInterceptors();
 
@@ -753,22 +728,22 @@
   ASAN_INTERCEPT_FUNC(memmove);
   ASAN_INTERCEPT_FUNC(memset);
   if (PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE) {
+    // In asan, REAL(memmove) is not used, but it is used in msan.
     ASAN_INTERCEPT_FUNC(memcpy);
+  } else {
+    ASSIGN_REAL(memcpy, memmove);
   }
+  CHECK(REAL(memcpy));
 
   // Intercept str* functions.
   ASAN_INTERCEPT_FUNC(strcat);  // NOLINT
-  ASAN_INTERCEPT_FUNC(strchr);
   ASAN_INTERCEPT_FUNC(strcpy);  // NOLINT
-  ASAN_INTERCEPT_FUNC(strlen);
   ASAN_INTERCEPT_FUNC(wcslen);
   ASAN_INTERCEPT_FUNC(strncat);
   ASAN_INTERCEPT_FUNC(strncpy);
-#if ASAN_INTERCEPT_STRDUP
   ASAN_INTERCEPT_FUNC(strdup);
-#endif
-#if ASAN_INTERCEPT_STRNLEN
-  ASAN_INTERCEPT_FUNC(strnlen);
+#if ASAN_INTERCEPT___STRDUP
+  ASAN_INTERCEPT_FUNC(__strdup);
 #endif
 #if ASAN_INTERCEPT_INDEX && ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX
   ASAN_INTERCEPT_FUNC(index);
diff --git a/lib/asan/asan_interceptors.h b/lib/asan/asan_interceptors.h
index 279c5f3..d747c31 100644
--- a/lib/asan/asan_interceptors.h
+++ b/lib/asan/asan_interceptors.h
@@ -23,14 +23,12 @@
 #if !SANITIZER_WINDOWS
 # define ASAN_INTERCEPT_ATOLL_AND_STRTOLL 1
 # define ASAN_INTERCEPT__LONGJMP 1
-# define ASAN_INTERCEPT_STRDUP 1
 # define ASAN_INTERCEPT_INDEX 1
 # define ASAN_INTERCEPT_PTHREAD_CREATE 1
 # define ASAN_INTERCEPT_FORK 1
 #else
 # define ASAN_INTERCEPT_ATOLL_AND_STRTOLL 0
 # define ASAN_INTERCEPT__LONGJMP 0
-# define ASAN_INTERCEPT_STRDUP 0
 # define ASAN_INTERCEPT_INDEX 0
 # define ASAN_INTERCEPT_PTHREAD_CREATE 0
 # define ASAN_INTERCEPT_FORK 0
@@ -42,12 +40,6 @@
 # define ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX 0
 #endif
 
-#if !SANITIZER_MAC
-# define ASAN_INTERCEPT_STRNLEN 1
-#else
-# define ASAN_INTERCEPT_STRNLEN 0
-#endif
-
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
 # define ASAN_INTERCEPT_SWAPCONTEXT 1
 #else
@@ -80,6 +72,12 @@
 # define ASAN_INTERCEPT___CXA_ATEXIT 0
 #endif
 
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
+# define ASAN_INTERCEPT___STRDUP 1
+#else
+# define ASAN_INTERCEPT___STRDUP 0
+#endif
+
 DECLARE_REAL(int, memcmp, const void *a1, const void *a2, uptr size)
 DECLARE_REAL(void*, memcpy, void *to, const void *from, uptr size)
 DECLARE_REAL(void*, memset, void *block, int c, uptr size)
diff --git a/lib/asan/asan_interface_internal.h b/lib/asan/asan_interface_internal.h
index 66867f2..e6aede0 100644
--- a/lib/asan/asan_interface_internal.h
+++ b/lib/asan/asan_interface_internal.h
@@ -54,6 +54,7 @@
     uptr has_dynamic_init;   // Non-zero if the global has dynamic initializer.
     __asan_global_source_location *location;  // Source location of a global,
                                               // or NULL if it is unknown.
+    uptr odr_indicator;      // The address of the ODR indicator symbol.
   };
 
   // These functions can be called on some platforms to find globals in the same
@@ -159,6 +160,10 @@
   SANITIZER_INTERFACE_ATTRIBUTE
   extern int __asan_option_detect_stack_use_after_return;
 
+// Global flag, copy of ASAN_OPTIONS=detect_stack_use_after_scope
+  SANITIZER_INTERFACE_ATTRIBUTE
+  extern int __asan_option_detect_stack_use_after_scope;
+
   SANITIZER_INTERFACE_ATTRIBUTE
   extern uptr *__asan_test_only_reported_buggy_pointer;
 
diff --git a/lib/asan/asan_internal.h b/lib/asan/asan_internal.h
index d9e41ee..2014237 100644
--- a/lib/asan/asan_internal.h
+++ b/lib/asan/asan_internal.h
@@ -36,9 +36,9 @@
 // If set, values like allocator chunk size, as well as defaults for some flags
 // will be changed towards less memory overhead.
 #ifndef ASAN_LOW_MEMORY
-#if SANITIZER_WORDSIZE == 32
+# if SANITIZER_IOS || (SANITIZER_WORDSIZE == 32)
 #  define ASAN_LOW_MEMORY 1
-#else
+# else
 #  define ASAN_LOW_MEMORY 0
 # endif
 #endif
@@ -62,6 +62,9 @@
 
 void AsanInitFromRtl();
 
+// asan_win.cc
+void InitializePlatformExceptionHandlers();
+
 // asan_rtl.cc
 void NORETURN ShowStatsAndAbort();
 
@@ -102,16 +105,24 @@
 bool PlatformHasDifferentMemcpyAndMemmove();
 # define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE \
     (PlatformHasDifferentMemcpyAndMemmove())
+#elif SANITIZER_WINDOWS64
+# define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE false
 #else
 # define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE true
 #endif  // SANITIZER_MAC
 
 // Add convenient macro for interface functions that may be represented as
 // weak hooks.
-#define ASAN_MALLOC_HOOK(ptr, size) \
-  if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(ptr, size)
-#define ASAN_FREE_HOOK(ptr) \
-  if (&__sanitizer_free_hook) __sanitizer_free_hook(ptr)
+#define ASAN_MALLOC_HOOK(ptr, size)                                   \
+  do {                                                                \
+    if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(ptr, size); \
+    RunMallocHooks(ptr, size);                                        \
+  } while (false)
+#define ASAN_FREE_HOOK(ptr)                                 \
+  do {                                                      \
+    if (&__sanitizer_free_hook) __sanitizer_free_hook(ptr); \
+    RunFreeHooks(ptr);                                      \
+  } while (false)
 #define ASAN_ON_ERROR() \
   if (&__asan_on_error) __asan_on_error()
 
@@ -119,7 +130,6 @@
 // Used to avoid infinite recursion in __asan_init().
 extern bool asan_init_is_running;
 extern void (*death_callback)(void);
-
 // These magic values are written to shadow for better error reporting.
 const int kAsanHeapLeftRedzoneMagic = 0xfa;
 const int kAsanHeapRightRedzoneMagic = 0xfb;
diff --git a/lib/asan/asan_linux.cc b/lib/asan/asan_linux.cc
index c5c08ed..c051573 100644
--- a/lib/asan/asan_linux.cc
+++ b/lib/asan/asan_linux.cc
@@ -69,6 +69,7 @@
 namespace __asan {
 
 void InitializePlatformInterceptors() {}
+void InitializePlatformExceptionHandlers() {}
 
 void *AsanDoesNotSupportStaticLinkage() {
   // This will fail to link with -static.
diff --git a/lib/asan/asan_mac.cc b/lib/asan/asan_mac.cc
index 5093e2d..525864f 100644
--- a/lib/asan/asan_mac.cc
+++ b/lib/asan/asan_mac.cc
@@ -38,9 +38,16 @@
 #include <sys/ucontext.h>
 #include <unistd.h>
 
+// from <crt_externs.h>, but we don't have that file on iOS
+extern "C" {
+  extern char ***_NSGetArgv(void);
+  extern char ***_NSGetEnviron(void);
+}
+
 namespace __asan {
 
 void InitializePlatformInterceptors() {}
+void InitializePlatformExceptionHandlers() {}
 
 bool PlatformHasDifferentMemcpyAndMemmove() {
   // On OS X 10.7 memcpy() and memmove() are both resolved
diff --git a/lib/asan/asan_malloc_linux.cc b/lib/asan/asan_malloc_linux.cc
index a81f19f..162abd2 100644
--- a/lib/asan/asan_malloc_linux.cc
+++ b/lib/asan/asan_malloc_linux.cc
@@ -26,52 +26,58 @@
 // ---------------------- Replacement functions ---------------- {{{1
 using namespace __asan;  // NOLINT
 
-static const uptr kCallocPoolSize = 1024;
-static uptr calloc_memory_for_dlsym[kCallocPoolSize];
+static uptr allocated_for_dlsym;
+static const uptr kDlsymAllocPoolSize = 1024;
+static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
 
-static bool IsInCallocPool(const void *ptr) {
-  sptr off = (sptr)ptr - (sptr)calloc_memory_for_dlsym;
-  return 0 <= off && off < (sptr)kCallocPoolSize;
+static bool IsInDlsymAllocPool(const void *ptr) {
+  uptr off = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
+  return off < sizeof(alloc_memory_for_dlsym);
+}
+
+static void *AllocateFromLocalPool(uptr size_in_bytes) {
+  uptr size_in_words = RoundUpTo(size_in_bytes, kWordSize) / kWordSize;
+  void *mem = (void*)&alloc_memory_for_dlsym[allocated_for_dlsym];
+  allocated_for_dlsym += size_in_words;
+  CHECK_LT(allocated_for_dlsym, kDlsymAllocPoolSize);
+  return mem;
 }
 
 INTERCEPTOR(void, free, void *ptr) {
   GET_STACK_TRACE_FREE;
-  if (UNLIKELY(IsInCallocPool(ptr)))
+  if (UNLIKELY(IsInDlsymAllocPool(ptr)))
     return;
   asan_free(ptr, &stack, FROM_MALLOC);
 }
 
 INTERCEPTOR(void, cfree, void *ptr) {
   GET_STACK_TRACE_FREE;
-  if (UNLIKELY(IsInCallocPool(ptr)))
+  if (UNLIKELY(IsInDlsymAllocPool(ptr)))
     return;
   asan_free(ptr, &stack, FROM_MALLOC);
 }
 
 INTERCEPTOR(void*, malloc, uptr size) {
+  if (UNLIKELY(!asan_inited))
+    // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
+    return AllocateFromLocalPool(size);
   GET_STACK_TRACE_MALLOC;
   return asan_malloc(size, &stack);
 }
 
 INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) {
-  if (UNLIKELY(!asan_inited)) {
+  if (UNLIKELY(!asan_inited))
     // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
-    static uptr allocated;
-    uptr size_in_words = ((nmemb * size) + kWordSize - 1) / kWordSize;
-    void *mem = (void*)&calloc_memory_for_dlsym[allocated];
-    allocated += size_in_words;
-    CHECK(allocated < kCallocPoolSize);
-    return mem;
-  }
+    return AllocateFromLocalPool(nmemb * size);
   GET_STACK_TRACE_MALLOC;
   return asan_calloc(nmemb, size, &stack);
 }
 
 INTERCEPTOR(void*, realloc, void *ptr, uptr size) {
   GET_STACK_TRACE_MALLOC;
-  if (UNLIKELY(IsInCallocPool(ptr))) {
-    uptr offset = (uptr)ptr - (uptr)calloc_memory_for_dlsym;
-    uptr copy_size = Min(size, kCallocPoolSize - offset);
+  if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
+    uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
+    uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
     void *new_ptr = asan_malloc(size, &stack);
     internal_memcpy(new_ptr, ptr, copy_size);
     return new_ptr;
diff --git a/lib/asan/asan_malloc_win.cc b/lib/asan/asan_malloc_win.cc
index c99e312..4a233df 100644
--- a/lib/asan/asan_malloc_win.cc
+++ b/lib/asan/asan_malloc_win.cc
@@ -14,6 +14,8 @@
 
 #include "sanitizer_common/sanitizer_platform.h"
 #if SANITIZER_WINDOWS
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
 
 #include "asan_allocator.h"
 #include "asan_interceptors.h"
@@ -49,6 +51,11 @@
 }
 
 ALLOCATION_FUNCTION_ATTRIBUTE
+void _free_base(void *ptr) {
+  free(ptr);
+}
+
+ALLOCATION_FUNCTION_ATTRIBUTE
 void cfree(void *ptr) {
   CHECK(!"cfree() should not be used on Windows");
 }
@@ -60,6 +67,11 @@
 }
 
 ALLOCATION_FUNCTION_ATTRIBUTE
+void *_malloc_base(size_t size) {
+  return malloc(size);
+}
+
+ALLOCATION_FUNCTION_ATTRIBUTE
 void *_malloc_dbg(size_t size, int, const char *, int) {
   return malloc(size);
 }
@@ -71,6 +83,11 @@
 }
 
 ALLOCATION_FUNCTION_ATTRIBUTE
+void *_calloc_base(size_t nmemb, size_t size) {
+  return calloc(nmemb, size);
+}
+
+ALLOCATION_FUNCTION_ATTRIBUTE
 void *_calloc_dbg(size_t nmemb, size_t size, int, const char *, int) {
   return calloc(nmemb, size);
 }
@@ -93,6 +110,11 @@
 }
 
 ALLOCATION_FUNCTION_ATTRIBUTE
+void *_realloc_base(void *ptr, size_t size) {
+  return realloc(ptr, size);
+}
+
+ALLOCATION_FUNCTION_ATTRIBUTE
 void *_recalloc(void *p, size_t n, size_t elem_size) {
   if (!p)
     return calloc(n, elem_size);
@@ -103,7 +125,7 @@
 }
 
 ALLOCATION_FUNCTION_ATTRIBUTE
-size_t _msize(void *ptr) {
+size_t _msize(const void *ptr) {
   GET_CURRENT_PC_BP_SP;
   (void)sp;
   return asan_malloc_usable_size(ptr, pc, bp);
@@ -139,38 +161,89 @@
 }
 }  // extern "C"
 
+INTERCEPTOR_WINAPI(LPVOID, HeapAlloc, HANDLE hHeap, DWORD dwFlags,
+                   SIZE_T dwBytes) {
+  GET_STACK_TRACE_MALLOC;
+  void *p = asan_malloc(dwBytes, &stack);
+  // Reading MSDN suggests that the *entire* usable allocation is zeroed out.
+  // Otherwise it is difficult to HeapReAlloc with HEAP_ZERO_MEMORY.
+  // https://blogs.msdn.microsoft.com/oldnewthing/20120316-00/?p=8083
+  if (dwFlags == HEAP_ZERO_MEMORY)
+    internal_memset(p, 0, asan_mz_size(p));
+  else
+    CHECK(dwFlags == 0 && "unsupported heap flags");
+  return p;
+}
+
+INTERCEPTOR_WINAPI(BOOL, HeapFree, HANDLE hHeap, DWORD dwFlags, LPVOID lpMem) {
+  CHECK(dwFlags == 0 && "unsupported heap flags");
+  GET_STACK_TRACE_FREE;
+  asan_free(lpMem, &stack, FROM_MALLOC);
+  return true;
+}
+
+INTERCEPTOR_WINAPI(LPVOID, HeapReAlloc, HANDLE hHeap, DWORD dwFlags,
+                   LPVOID lpMem, SIZE_T dwBytes) {
+  GET_STACK_TRACE_MALLOC;
+  // Realloc should never reallocate in place.
+  if (dwFlags & HEAP_REALLOC_IN_PLACE_ONLY)
+    return nullptr;
+  CHECK(dwFlags == 0 && "unsupported heap flags");
+  return asan_realloc(lpMem, dwBytes, &stack);
+}
+
+INTERCEPTOR_WINAPI(SIZE_T, HeapSize, HANDLE hHeap, DWORD dwFlags,
+                   LPCVOID lpMem) {
+  CHECK(dwFlags == 0 && "unsupported heap flags");
+  GET_CURRENT_PC_BP_SP;
+  (void)sp;
+  return asan_malloc_usable_size(lpMem, pc, bp);
+}
+
 namespace __asan {
+
+static void TryToOverrideFunction(const char *fname, uptr new_func) {
+  // Failure here is not fatal. The CRT may not be present, and different CRT
+  // versions use different symbols.
+  if (!__interception::OverrideFunction(fname, new_func))
+    VPrintf(2, "Failed to override function %s\n", fname);
+}
+
 void ReplaceSystemMalloc() {
 #if defined(ASAN_DYNAMIC)
-  // We don't check the result because CRT might not be used in the process.
-  __interception::OverrideFunction("free", (uptr)free);
-  __interception::OverrideFunction("malloc", (uptr)malloc);
-  __interception::OverrideFunction("_malloc_crt", (uptr)malloc);
-  __interception::OverrideFunction("calloc", (uptr)calloc);
-  __interception::OverrideFunction("_calloc_crt", (uptr)calloc);
-  __interception::OverrideFunction("realloc", (uptr)realloc);
-  __interception::OverrideFunction("_realloc_crt", (uptr)realloc);
-  __interception::OverrideFunction("_recalloc", (uptr)_recalloc);
-  __interception::OverrideFunction("_recalloc_crt", (uptr)_recalloc);
-  __interception::OverrideFunction("_msize", (uptr)_msize);
-  __interception::OverrideFunction("_expand", (uptr)_expand);
+  TryToOverrideFunction("free", (uptr)free);
+  TryToOverrideFunction("_free_base", (uptr)free);
+  TryToOverrideFunction("malloc", (uptr)malloc);
+  TryToOverrideFunction("_malloc_base", (uptr)malloc);
+  TryToOverrideFunction("_malloc_crt", (uptr)malloc);
+  TryToOverrideFunction("calloc", (uptr)calloc);
+  TryToOverrideFunction("_calloc_base", (uptr)calloc);
+  TryToOverrideFunction("_calloc_crt", (uptr)calloc);
+  TryToOverrideFunction("realloc", (uptr)realloc);
+  TryToOverrideFunction("_realloc_base", (uptr)realloc);
+  TryToOverrideFunction("_realloc_crt", (uptr)realloc);
+  TryToOverrideFunction("_recalloc", (uptr)_recalloc);
+  TryToOverrideFunction("_recalloc_crt", (uptr)_recalloc);
+  TryToOverrideFunction("_msize", (uptr)_msize);
+  TryToOverrideFunction("_expand", (uptr)_expand);
+  TryToOverrideFunction("_expand_base", (uptr)_expand);
 
-  // Override different versions of 'operator new' and 'operator delete'.
-  // No need to override the nothrow versions as they just wrap the throw
-  // versions.
-  // FIXME: Unfortunately, MSVC miscompiles the statements that take the
-  // addresses of the array versions of these operators,
-  // see https://connect.microsoft.com/VisualStudio/feedbackdetail/view/946992
-  // We might want to try to work around this by [inline] assembly or compiling
-  // parts of the RTL with Clang.
-  void *(*op_new)(size_t sz) = operator new;
-  void (*op_delete)(void *p) = operator delete;
-  void *(*op_array_new)(size_t sz) = operator new[];
-  void (*op_array_delete)(void *p) = operator delete[];
-  __interception::OverrideFunction("??2@YAPAXI@Z", (uptr)op_new);
-  __interception::OverrideFunction("??3@YAXPAX@Z", (uptr)op_delete);
-  __interception::OverrideFunction("??_U@YAPAXI@Z", (uptr)op_array_new);
-  __interception::OverrideFunction("??_V@YAXPAX@Z", (uptr)op_array_delete);
+  // Recent versions of ucrtbase.dll appear to be built with PGO and LTCG, which
+  // enable cross-module inlining. This means our _malloc_base hook won't catch
+  // all CRT allocations. This code here patches the import table of
+  // ucrtbase.dll so that all attempts to use the lower-level win32 heap
+  // allocation API will be directed to ASan's heap. We don't currently
+  // intercept all calls to HeapAlloc. If we did, we would have to check on
+  // HeapFree whether the pointer came from ASan of from the system.
+#define INTERCEPT_UCRT_FUNCTION(func)                                         \
+  if (!INTERCEPT_FUNCTION_DLLIMPORT("ucrtbase.dll",                           \
+                                    "api-ms-win-core-heap-l1-1-0.dll", func)) \
+    VPrintf(2, "Failed to intercept ucrtbase.dll import %s\n", #func);
+  INTERCEPT_UCRT_FUNCTION(HeapAlloc);
+  INTERCEPT_UCRT_FUNCTION(HeapFree);
+  INTERCEPT_UCRT_FUNCTION(HeapReAlloc);
+  INTERCEPT_UCRT_FUNCTION(HeapSize);
+#undef INTERCEPT_UCRT_FUNCTION
 #endif
 }
 }  // namespace __asan
diff --git a/lib/asan/asan_mapping.h b/lib/asan/asan_mapping.h
index 8fe347c..52c4f67 100644
--- a/lib/asan/asan_mapping.h
+++ b/lib/asan/asan_mapping.h
@@ -87,6 +87,20 @@
 // || `[0x08000000000, 0x08fffffffff]` || lowshadow  ||
 // || `[0x00000000000, 0x07fffffffff]` || lowmem     ||
 //
+// Default Linux/S390 mapping:
+// || `[0x30000000, 0x7fffffff]` || HighMem    ||
+// || `[0x26000000, 0x2fffffff]` || HighShadow ||
+// || `[0x24000000, 0x25ffffff]` || ShadowGap  ||
+// || `[0x20000000, 0x23ffffff]` || LowShadow  ||
+// || `[0x00000000, 0x1fffffff]` || LowMem     ||
+//
+// Default Linux/SystemZ mapping:
+// || `[0x14000000000000, 0x1fffffffffffff]` || HighMem    ||
+// || `[0x12800000000000, 0x13ffffffffffff]` || HighShadow ||
+// || `[0x12000000000000, 0x127fffffffffff]` || ShadowGap  ||
+// || `[0x10000000000000, 0x11ffffffffffff]` || LowShadow  ||
+// || `[0x00000000000000, 0x0fffffffffffff]` || LowMem     ||
+//
 // Shadow mapping on FreeBSD/x86-64 with SHADOW_OFFSET == 0x400000000000:
 // || `[0x500000000000, 0x7fffffffffff]` || HighMem    ||
 // || `[0x4a0000000000, 0x4fffffffffff]` || HighShadow ||
@@ -115,16 +129,18 @@
 static const u64 kDefaultShadowOffset64 = 1ULL << 44;
 static const u64 kDefaultShort64bitShadowOffset = 0x7FFF8000;  // < 2G.
 static const u64 kIosShadowOffset32 = 1ULL << 30;  // 0x40000000
-static const u64 kIosShadowOffset64 = 0x130000000;
+static const u64 kIosShadowOffset64 = 0x120200000;
 static const u64 kIosSimShadowOffset32 = 1ULL << 30;
 static const u64 kIosSimShadowOffset64 = kDefaultShadowOffset64;
 static const u64 kAArch64_ShadowOffset64 = 1ULL << 36;
 static const u64 kMIPS32_ShadowOffset32 = 0x0aaa0000;
 static const u64 kMIPS64_ShadowOffset64 = 1ULL << 37;
 static const u64 kPPC64_ShadowOffset64 = 1ULL << 41;
+static const u64 kSystemZ_ShadowOffset64 = 1ULL << 52;
 static const u64 kFreeBSD_ShadowOffset32 = 1ULL << 30;  // 0x40000000
 static const u64 kFreeBSD_ShadowOffset64 = 1ULL << 46;  // 0x400000000000
 static const u64 kWindowsShadowOffset32 = 3ULL << 28;  // 0x30000000
+static const u64 kWindowsShadowOffset64 = 1ULL << 45;  // 32TB
 
 #define SHADOW_SCALE kDefaultShadowScale
 
@@ -138,28 +154,36 @@
 #    define SHADOW_OFFSET kFreeBSD_ShadowOffset32
 #  elif SANITIZER_WINDOWS
 #    define SHADOW_OFFSET kWindowsShadowOffset32
-#  elif SANITIZER_IOSSIM
-#    define SHADOW_OFFSET kIosSimShadowOffset32
 #  elif SANITIZER_IOS
-#    define SHADOW_OFFSET kIosShadowOffset32
+#    if SANITIZER_IOSSIM
+#      define SHADOW_OFFSET kIosSimShadowOffset32
+#    else
+#      define SHADOW_OFFSET kIosShadowOffset32
+#    endif
 #  else
 #    define SHADOW_OFFSET kDefaultShadowOffset32
 #  endif
 #else
-#  if defined(__aarch64__)
+#  if SANITIZER_IOS
+#    if SANITIZER_IOSSIM
+#      define SHADOW_OFFSET kIosSimShadowOffset64
+#    else
+#      define SHADOW_OFFSET kIosShadowOffset64
+#    endif
+#  elif defined(__aarch64__)
 #    define SHADOW_OFFSET kAArch64_ShadowOffset64
 #  elif defined(__powerpc64__)
 #    define SHADOW_OFFSET kPPC64_ShadowOffset64
+#  elif defined(__s390x__)
+#    define SHADOW_OFFSET kSystemZ_ShadowOffset64
 #  elif SANITIZER_FREEBSD
 #    define SHADOW_OFFSET kFreeBSD_ShadowOffset64
 #  elif SANITIZER_MAC
 #   define SHADOW_OFFSET kDefaultShadowOffset64
 #  elif defined(__mips64)
 #   define SHADOW_OFFSET kMIPS64_ShadowOffset64
-#  elif SANITIZER_IOSSIM
-#    define SHADOW_OFFSET kIosSimShadowOffset64
-#  elif SANITIZER_IOS
-#    define SHADOW_OFFSET kIosShadowOffset64
+#  elif SANITIZER_WINDOWS64
+#   define SHADOW_OFFSET kWindowsShadowOffset64
 #  else
 #   define SHADOW_OFFSET kDefaultShort64bitShadowOffset
 #  endif
diff --git a/lib/asan/asan_memory_profile.cc b/lib/asan/asan_memory_profile.cc
new file mode 100644
index 0000000..ba00516
--- /dev/null
+++ b/lib/asan/asan_memory_profile.cc
@@ -0,0 +1,100 @@
+//===-- asan_memory_profile.cc.cc -----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// This file implements __sanitizer_print_memory_profile.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "sanitizer_common/sanitizer_stoptheworld.h"
+#include "lsan/lsan_common.h"
+#include "asan/asan_allocator.h"
+
+#if CAN_SANITIZE_LEAKS
+
+namespace __asan {
+
+struct AllocationSite {
+  u32 id;
+  uptr total_size;
+  uptr count;
+};
+
+class HeapProfile {
+ public:
+  HeapProfile() : allocations_(1024) {}
+  void Insert(u32 id, uptr size) {
+    total_allocated_ += size;
+    total_count_++;
+    // Linear lookup will be good enough for most cases (although not all).
+    for (uptr i = 0; i < allocations_.size(); i++) {
+      if (allocations_[i].id == id) {
+        allocations_[i].total_size += size;
+        allocations_[i].count++;
+        return;
+      }
+    }
+    allocations_.push_back({id, size, 1});
+  }
+
+  void Print(uptr top_percent) {
+    InternalSort(&allocations_, allocations_.size(),
+                 [](const AllocationSite &a, const AllocationSite &b) {
+                   return a.total_size > b.total_size;
+                 });
+    CHECK(total_allocated_);
+    uptr total_shown = 0;
+    Printf("Live Heap Allocations: %zd bytes from %zd allocations; "
+           "showing top %zd%%\n", total_allocated_, total_count_, top_percent);
+    for (uptr i = 0; i < allocations_.size(); i++) {
+      auto &a = allocations_[i];
+      Printf("%zd byte(s) (%zd%%) in %zd allocation(s)\n", a.total_size,
+             a.total_size * 100 / total_allocated_, a.count);
+      StackDepotGet(a.id).Print();
+      total_shown += a.total_size;
+      if (total_shown * 100 / total_allocated_ > top_percent)
+        break;
+    }
+  }
+
+ private:
+  uptr total_allocated_ = 0;
+  uptr total_count_ = 0;
+  InternalMmapVector<AllocationSite> allocations_;
+};
+
+static void ChunkCallback(uptr chunk, void *arg) {
+  HeapProfile *hp = reinterpret_cast<HeapProfile*>(arg);
+  AsanChunkView cv = FindHeapChunkByAddress(chunk);
+  if (!cv.IsAllocated()) return;
+  u32 id = cv.GetAllocStackId();
+  if (!id) return;
+  hp->Insert(id, cv.UsedSize());
+}
+
+static void MemoryProfileCB(const SuspendedThreadsList &suspended_threads_list,
+                            void *argument) {
+  HeapProfile hp;
+  __lsan::ForEachChunk(ChunkCallback, &hp);
+  hp.Print(reinterpret_cast<uptr>(argument));
+}
+
+}  // namespace __asan
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_print_memory_profile(uptr top_percent) {
+  __sanitizer::StopTheWorld(__asan::MemoryProfileCB, (void*)top_percent);
+}
+}  // extern "C"
+
+#endif  // CAN_SANITIZE_LEAKS
diff --git a/lib/asan/asan_new_delete.cc b/lib/asan/asan_new_delete.cc
index b5ba13e..fef6604 100644
--- a/lib/asan/asan_new_delete.cc
+++ b/lib/asan/asan_new_delete.cc
@@ -20,9 +20,25 @@
 
 #include <stddef.h>
 
-// C++ operators can't have visibility attributes on Windows.
+// C++ operators can't have dllexport attributes on Windows. We export them
+// anyway by passing extra -export flags to the linker, which is exactly that
+// dllexport would normally do. We need to export them in order to make the
+// VS2015 dynamic CRT (MD) work.
 #if SANITIZER_WINDOWS
 # define CXX_OPERATOR_ATTRIBUTE
+# ifdef _WIN64
+#  pragma comment(linker, "/export:??2@YAPEAX_K@Z")   // operator new
+#  pragma comment(linker, "/export:??3@YAXPEAX@Z")    // operator delete
+#  pragma comment(linker, "/export:??3@YAXPEAX_K@Z")  // sized operator delete
+#  pragma comment(linker, "/export:??_U@YAPEAX_K@Z")  // operator new[]
+#  pragma comment(linker, "/export:??_V@YAXPEAX@Z")   // operator delete[]
+# else
+#  pragma comment(linker, "/export:??2@YAPAXI@Z")   // operator new
+#  pragma comment(linker, "/export:??3@YAXPAX@Z")   // operator delete
+#  pragma comment(linker, "/export:??3@YAXPAXI@Z")  // sized operator delete
+#  pragma comment(linker, "/export:??_U@YAPAXI@Z")  // operator new[]
+#  pragma comment(linker, "/export:??_V@YAXPAX@Z")  // operator delete[]
+# endif
 #else
 # define CXX_OPERATOR_ATTRIBUTE INTERCEPTOR_ATTRIBUTE
 #endif
diff --git a/lib/asan/asan_poisoning.cc b/lib/asan/asan_poisoning.cc
index f77ab87..65c4401 100644
--- a/lib/asan/asan_poisoning.cc
+++ b/lib/asan/asan_poisoning.cc
@@ -117,9 +117,9 @@
   ShadowSegmentEndpoint beg(beg_addr);
   ShadowSegmentEndpoint end(end_addr);
   if (beg.chunk == end.chunk) {
-    CHECK(beg.offset < end.offset);
+    CHECK_LT(beg.offset, end.offset);
     s8 value = beg.value;
-    CHECK(value == end.value);
+    CHECK_EQ(value, end.value);
     // We can only poison memory if the byte in end.offset is unaddressable.
     // No need to re-poison memory if it is poisoned already.
     if (value > 0 && value <= end.offset) {
@@ -131,7 +131,7 @@
     }
     return;
   }
-  CHECK(beg.chunk < end.chunk);
+  CHECK_LT(beg.chunk, end.chunk);
   if (beg.offset > 0) {
     // Mark bytes from beg.offset as unaddressable.
     if (beg.value == 0) {
@@ -157,9 +157,9 @@
   ShadowSegmentEndpoint beg(beg_addr);
   ShadowSegmentEndpoint end(end_addr);
   if (beg.chunk == end.chunk) {
-    CHECK(beg.offset < end.offset);
+    CHECK_LT(beg.offset, end.offset);
     s8 value = beg.value;
-    CHECK(value == end.value);
+    CHECK_EQ(value, end.value);
     // We unpoison memory bytes up to enbytes up to end.offset if it is not
     // unpoisoned already.
     if (value != 0) {
@@ -167,7 +167,7 @@
     }
     return;
   }
-  CHECK(beg.chunk < end.chunk);
+  CHECK_LT(beg.chunk, end.chunk);
   if (beg.offset > 0) {
     *beg.chunk = 0;
     beg.chunk++;
@@ -315,11 +315,13 @@
 }
 
 void __asan_poison_stack_memory(uptr addr, uptr size) {
+  if (!__asan_option_detect_stack_use_after_scope) return;
   VReport(1, "poisoning: %p %zx\n", (void *)addr, size);
   PoisonAlignedStackMemory(addr, size, true);
 }
 
 void __asan_unpoison_stack_memory(uptr addr, uptr size) {
+  if (!__asan_option_detect_stack_use_after_scope) return;
   VReport(1, "unpoisoning: %p %zx\n", (void *)addr, size);
   PoisonAlignedStackMemory(addr, size, false);
 }
@@ -343,7 +345,7 @@
                                                  &stack);
   }
   CHECK_LE(end - beg,
-           FIRST_32_SECOND_64(1UL << 30, 1UL << 34)); // Sanity check.
+           FIRST_32_SECOND_64(1UL << 30, 1ULL << 34)); // Sanity check.
 
   uptr a = RoundDownTo(Min(old_mid, new_mid), granularity);
   uptr c = RoundUpTo(Max(old_mid, new_mid), granularity);
diff --git a/lib/asan/asan_posix.cc b/lib/asan/asan_posix.cc
index 9e01bcd..84a29ec 100644
--- a/lib/asan/asan_posix.cc
+++ b/lib/asan/asan_posix.cc
@@ -36,14 +36,23 @@
 void AsanOnDeadlySignal(int signo, void *siginfo, void *context) {
   ScopedDeadlySignal signal_scope(GetCurrentThread());
   int code = (int)((siginfo_t*)siginfo)->si_code;
-  // Write the first message using the bullet-proof write.
-  if (18 != internal_write(2, "ASAN:DEADLYSIGNAL\n", 18)) Die();
+  // Write the first message using fd=2, just in case.
+  // It may actually fail to write in case stderr is closed.
+  internal_write(2, "ASAN:DEADLYSIGNAL\n", 18);
   SignalContext sig = SignalContext::Create(siginfo, context);
 
   // Access at a reasonable offset above SP, or slightly below it (to account
   // for x86_64 or PowerPC redzone, ARM push of multiple registers, etc) is
   // probably a stack overflow.
+#ifdef __s390__
+  // On s390, the fault address in siginfo points to start of the page, not
+  // to the precise word that was accessed.  Mask off the low bits of sp to
+  // take it into account.
+  bool IsStackAccess = sig.addr >= (sig.sp & ~0xFFF) &&
+                       sig.addr < sig.sp + 0xFFFF;
+#else
   bool IsStackAccess = sig.addr + 512 > sig.sp && sig.addr < sig.sp + 0xFFFF;
+#endif
 
 #if __powerpc__
   // Large stack frames can be allocated with e.g.
diff --git a/lib/asan/asan_report.cc b/lib/asan/asan_report.cc
index 69bda82..220eba3 100644
--- a/lib/asan/asan_report.cc
+++ b/lib/asan/asan_report.cc
@@ -13,9 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "asan_flags.h"
+#include "asan_descriptions.h"
 #include "asan_internal.h"
 #include "asan_mapping.h"
 #include "asan_report.h"
+#include "asan_scariness_score.h"
 #include "asan_stack.h"
 #include "asan_thread.h"
 #include "sanitizer_common/sanitizer_common.h"
@@ -64,56 +66,6 @@
   error_message_buffer_pos += Min(remaining, length);
 }
 
-// ---------------------- Decorator ------------------------------ {{{1
-class Decorator: public __sanitizer::SanitizerCommonDecorator {
- public:
-  Decorator() : SanitizerCommonDecorator() { }
-  const char *Access()     { return Blue(); }
-  const char *EndAccess()  { return Default(); }
-  const char *Location()   { return Green(); }
-  const char *EndLocation() { return Default(); }
-  const char *Allocation()  { return Magenta(); }
-  const char *EndAllocation()  { return Default(); }
-
-  const char *ShadowByte(u8 byte) {
-    switch (byte) {
-      case kAsanHeapLeftRedzoneMagic:
-      case kAsanHeapRightRedzoneMagic:
-      case kAsanArrayCookieMagic:
-        return Red();
-      case kAsanHeapFreeMagic:
-        return Magenta();
-      case kAsanStackLeftRedzoneMagic:
-      case kAsanStackMidRedzoneMagic:
-      case kAsanStackRightRedzoneMagic:
-      case kAsanStackPartialRedzoneMagic:
-        return Red();
-      case kAsanStackAfterReturnMagic:
-        return Magenta();
-      case kAsanInitializationOrderMagic:
-        return Cyan();
-      case kAsanUserPoisonedMemoryMagic:
-      case kAsanContiguousContainerOOBMagic:
-      case kAsanAllocaLeftMagic:
-      case kAsanAllocaRightMagic:
-        return Blue();
-      case kAsanStackUseAfterScopeMagic:
-        return Magenta();
-      case kAsanGlobalRedzoneMagic:
-        return Red();
-      case kAsanInternalHeapMagic:
-        return Yellow();
-      case kAsanIntraObjectRedzone:
-        return Yellow();
-      default:
-        return Default();
-    }
-  }
-  const char *EndShadowByte() { return Default(); }
-  const char *MemoryByte() { return Magenta(); }
-  const char *EndMemoryByte() { return Default(); }
-};
-
 // ---------------------- Helper functions ----------------------- {{{1
 
 static void PrintMemoryByte(InternalScopedString *str, const char *before,
@@ -233,11 +185,6 @@
   }
 }
 
-static void DescribeThread(AsanThread *t) {
-  if (t)
-    DescribeThread(t->context());
-}
-
 // ---------------------- Address Descriptions ------------------- {{{1
 
 static bool IsASCII(unsigned char c) {
@@ -335,46 +282,6 @@
   return true;
 }
 
-bool DescribeAddressIfShadow(uptr addr, AddressDescription *descr, bool print) {
-  if (AddrIsInMem(addr))
-    return false;
-  const char *area_type = nullptr;
-  if (AddrIsInShadowGap(addr)) area_type = "shadow gap";
-  else if (AddrIsInHighShadow(addr)) area_type = "high shadow";
-  else if (AddrIsInLowShadow(addr)) area_type = "low shadow";
-  if (area_type != nullptr) {
-    if (print) {
-      Printf("Address %p is located in the %s area.\n", addr, area_type);
-    } else {
-      CHECK(descr);
-      descr->region_kind = area_type;
-    }
-    return true;
-  }
-  CHECK(0 && "Address is not in memory and not in shadow?");
-  return false;
-}
-
-// Return " (thread_name) " or an empty string if the name is empty.
-const char *ThreadNameWithParenthesis(AsanThreadContext *t, char buff[],
-                                      uptr buff_len) {
-  const char *name = t->name;
-  if (name[0] == '\0') return "";
-  buff[0] = 0;
-  internal_strncat(buff, " (", 3);
-  internal_strncat(buff, name, buff_len - 4);
-  internal_strncat(buff, ")", 2);
-  return buff;
-}
-
-const char *ThreadNameWithParenthesis(u32 tid, char buff[],
-                                      uptr buff_len) {
-  if (tid == kInvalidTid) return "";
-  asanThreadRegistry().CheckLocked();
-  AsanThreadContext *t = GetThreadContextByTidLocked(tid);
-  return ThreadNameWithParenthesis(t, buff, buff_len);
-}
-
 static void PrintAccessAndVarIntersection(const StackVarDescr &var, uptr addr,
                                           uptr access_size, uptr prev_var_end,
                                           uptr next_var_beg) {
@@ -470,7 +377,7 @@
   // previously. That's unfortunate, but I have no better solution,
   // especially given that the alloca may be from entirely different place
   // (e.g. use-after-scope, or different thread's stack).
-#if defined(__powerpc64__) && defined(__BIG_ENDIAN__)
+#if SANITIZER_PPC64V1
   // On PowerPC64 ELFv1, the address of a function actually points to a
   // three-doubleword data structure with the first field containing
   // the address of the function's code.
@@ -544,7 +451,7 @@
     return;
   }
   DescribeAccessToHeapChunk(chunk, addr, access_size);
-  CHECK(chunk.AllocTid() != kInvalidTid);
+  CHECK_NE(chunk.AllocTid(), kInvalidTid);
   asanThreadRegistry().CheckLocked();
   AsanThreadContext *alloc_thread =
       GetThreadContextByTidLocked(chunk.AllocTid());
@@ -590,38 +497,6 @@
   DescribeHeapAddress(addr, access_size);
 }
 
-// ------------------- Thread description -------------------- {{{1
-
-void DescribeThread(AsanThreadContext *context) {
-  CHECK(context);
-  asanThreadRegistry().CheckLocked();
-  // No need to announce the main thread.
-  if (context->tid == 0 || context->announced) {
-    return;
-  }
-  context->announced = true;
-  char tname[128];
-  InternalScopedString str(1024);
-  str.append("Thread T%d%s", context->tid,
-             ThreadNameWithParenthesis(context->tid, tname, sizeof(tname)));
-  if (context->parent_tid == kInvalidTid) {
-    str.append(" created by unknown thread\n");
-    Printf("%s", str.data());
-    return;
-  }
-  str.append(
-      " created by T%d%s here:\n", context->parent_tid,
-      ThreadNameWithParenthesis(context->parent_tid, tname, sizeof(tname)));
-  Printf("%s", str.data());
-  StackDepotGet(context->stack_id).Print();
-  // Recursively described parent thread if needed.
-  if (flags()->print_full_thread_history) {
-    AsanThreadContext *parent_context =
-        GetThreadContextByTidLocked(context->parent_tid);
-    DescribeThread(parent_context);
-  }
-}
-
 // -------------------- Different kinds of reports ----------------- {{{1
 
 // Use ScopedInErrorReport to run common actions just before and
@@ -738,7 +613,7 @@
 u32 ScopedInErrorReport::reporting_thread_tid_ = kInvalidTid;
 
 void ReportStackOverflow(const SignalContext &sig) {
-  ScopedInErrorReport in_report;
+  ScopedInErrorReport in_report(/*report*/ nullptr, /*fatal*/ true);
   Decorator d;
   Printf("%s", d.Warning());
   Report(
@@ -747,13 +622,14 @@
       (void *)sig.addr, (void *)sig.pc, (void *)sig.bp, (void *)sig.sp,
       GetCurrentTidOrInvalid());
   Printf("%s", d.EndWarning());
+  ScarinessScore::PrintSimple(10, "stack-overflow");
   GET_STACK_TRACE_SIGNAL(sig);
   stack.Print();
   ReportErrorSummary("stack-overflow", &stack);
 }
 
 void ReportDeadlySignal(const char *description, const SignalContext &sig) {
-  ScopedInErrorReport in_report(/*report*/nullptr, /*fatal*/true);
+  ScopedInErrorReport in_report(/*report*/ nullptr, /*fatal*/ true);
   Decorator d;
   Printf("%s", d.Warning());
   Report(
@@ -761,10 +637,32 @@
       " (pc %p bp %p sp %p T%d)\n",
       description, (void *)sig.addr, (void *)sig.pc, (void *)sig.bp,
       (void *)sig.sp, GetCurrentTidOrInvalid());
-  if (sig.pc < GetPageSizeCached()) {
-    Report("Hint: pc points to the zero page.\n");
-  }
   Printf("%s", d.EndWarning());
+  ScarinessScore SS;
+  if (sig.pc < GetPageSizeCached())
+    Report("Hint: pc points to the zero page.\n");
+  if (sig.is_memory_access) {
+    const char *access_type =
+        sig.write_flag == SignalContext::WRITE
+            ? "WRITE"
+            : (sig.write_flag == SignalContext::READ ? "READ" : "UNKNOWN");
+    Report("The signal is caused by a %s memory access.\n", access_type);
+    if (sig.addr < GetPageSizeCached()) {
+      Report("Hint: address points to the zero page.\n");
+      SS.Scare(10, "null-deref");
+    } else if (sig.addr == sig.pc) {
+      SS.Scare(60, "wild-jump");
+    } else if (sig.write_flag == SignalContext::WRITE) {
+      SS.Scare(30, "wild-addr-write");
+    } else if (sig.write_flag == SignalContext::READ) {
+      SS.Scare(20, "wild-addr-read");
+    } else {
+      SS.Scare(25, "wild-addr");
+    }
+  } else {
+    SS.Scare(10, "signal");
+  }
+  SS.Print();
   GET_STACK_TRACE_SIGNAL(sig);
   stack.Print();
   MaybeDumpInstructionBytes(sig.pc);
@@ -784,13 +682,14 @@
          ThreadNameWithParenthesis(curr_tid, tname, sizeof(tname)));
   Printf("%s", d.EndWarning());
   CHECK_GT(free_stack->size, 0);
+  ScarinessScore::PrintSimple(42, "double-free");
   GET_STACK_TRACE_FATAL(free_stack->trace[0], free_stack->top_frame_bp);
   stack.Print();
   DescribeHeapAddress(addr, 1);
   ReportErrorSummary("double-free", &stack);
 }
 
-void ReportNewDeleteSizeMismatch(uptr addr, uptr delete_size,
+void ReportNewDeleteSizeMismatch(uptr addr, uptr alloc_size, uptr delete_size,
                                  BufferedStackTrace *free_stack) {
   ScopedInErrorReport in_report;
   Decorator d;
@@ -804,8 +703,9 @@
   Printf("%s  object passed to delete has wrong type:\n", d.EndWarning());
   Printf("  size of the allocated type:   %zd bytes;\n"
          "  size of the deallocated type: %zd bytes.\n",
-         asan_mz_size(reinterpret_cast<void*>(addr)), delete_size);
+         alloc_size, delete_size);
   CHECK_GT(free_stack->size, 0);
+  ScarinessScore::PrintSimple(10, "new-delete-type-mismatch");
   GET_STACK_TRACE_FATAL(free_stack->trace[0], free_stack->top_frame_bp);
   stack.Print();
   DescribeHeapAddress(addr, 1);
@@ -825,6 +725,7 @@
          curr_tid, ThreadNameWithParenthesis(curr_tid, tname, sizeof(tname)));
   Printf("%s", d.EndWarning());
   CHECK_GT(free_stack->size, 0);
+  ScarinessScore::PrintSimple(40, "bad-free");
   GET_STACK_TRACE_FATAL(free_stack->trace[0], free_stack->top_frame_bp);
   stack.Print();
   DescribeHeapAddress(addr, 1);
@@ -846,6 +747,7 @@
         alloc_names[alloc_type], dealloc_names[dealloc_type], addr);
   Printf("%s", d.EndWarning());
   CHECK_GT(free_stack->size, 0);
+  ScarinessScore::PrintSimple(10, "alloc-dealloc-mismatch");
   GET_STACK_TRACE_FATAL(free_stack->trace[0], free_stack->top_frame_bp);
   stack.Print();
   DescribeHeapAddress(addr, 1);
@@ -894,6 +796,7 @@
              "memory ranges [%p,%p) and [%p, %p) overlap\n", \
              bug_type, offset1, offset1 + length1, offset2, offset2 + length2);
   Printf("%s", d.EndWarning());
+  ScarinessScore::PrintSimple(10, bug_type);
   stack->Print();
   DescribeAddress((uptr)offset1, length1, bug_type);
   DescribeAddress((uptr)offset2, length2, bug_type);
@@ -908,6 +811,7 @@
   Printf("%s", d.Warning());
   Report("ERROR: AddressSanitizer: %s: (size=%zd)\n", bug_type, size);
   Printf("%s", d.EndWarning());
+  ScarinessScore::PrintSimple(10, bug_type);
   stack->Print();
   DescribeAddress(offset, size, bug_type);
   ReportErrorSummary(bug_type, stack);
@@ -982,10 +886,10 @@
   uptr a2 = reinterpret_cast<uptr>(p2);
   AsanChunkView chunk1 = FindHeapChunkByAddress(a1);
   AsanChunkView chunk2 = FindHeapChunkByAddress(a2);
-  bool valid1 = chunk1.IsValid();
-  bool valid2 = chunk2.IsValid();
-  if ((valid1 != valid2) || (valid1 && valid2 && !chunk1.Eq(chunk2))) {
-    GET_CALLER_PC_BP_SP;                                              \
+  bool valid1 = chunk1.IsAllocated();
+  bool valid2 = chunk2.IsAllocated();
+  if (!valid1 || !valid2 || !chunk1.Eq(chunk2)) {
+    GET_CALLER_PC_BP_SP;
     return ReportInvalidPointerPair(pc, bp, sp, a1, a2);
   }
 }
@@ -1024,10 +928,26 @@
          "AddressSanitizerContainerOverflow.\n");
 }
 
+static bool AdjacentShadowValuesAreFullyPoisoned(u8 *s) {
+  return s[-1] > 127 && s[1] > 127;
+}
+
 void ReportGenericError(uptr pc, uptr bp, uptr sp, uptr addr, bool is_write,
                         uptr access_size, u32 exp, bool fatal) {
   if (!fatal && SuppressErrorReport(pc)) return;
   ENABLE_FRAME_POINTER;
+  ScarinessScore SS;
+
+  if (access_size) {
+    if (access_size <= 9) {
+      char desr[] = "?-byte";
+      desr[0] = '0' + access_size;
+      SS.Scare(access_size + access_size / 2, desr);
+    } else if (access_size >= 10) {
+      SS.Scare(15, "multi-byte");
+    }
+    is_write ? SS.Scare(20, "write") : SS.Scare(1, "read");
+  }
 
   // Optimization experiments.
   // The experiments can be used to evaluate potential optimizations that remove
@@ -1049,50 +969,76 @@
     // If we are in the partial right redzone, look at the next shadow byte.
     if (*shadow_addr > 0 && *shadow_addr < 128)
       shadow_addr++;
+    bool far_from_bounds = false;
     shadow_val = *shadow_addr;
+    int bug_type_score = 0;
+    // For use-after-frees reads are almost as bad as writes.
+    int read_after_free_bonus = 0;
     switch (shadow_val) {
       case kAsanHeapLeftRedzoneMagic:
       case kAsanHeapRightRedzoneMagic:
       case kAsanArrayCookieMagic:
         bug_descr = "heap-buffer-overflow";
+        bug_type_score = 10;
+        far_from_bounds = AdjacentShadowValuesAreFullyPoisoned(shadow_addr);
         break;
       case kAsanHeapFreeMagic:
         bug_descr = "heap-use-after-free";
+        bug_type_score = 20;
+        if (!is_write) read_after_free_bonus = 18;
         break;
       case kAsanStackLeftRedzoneMagic:
         bug_descr = "stack-buffer-underflow";
+        bug_type_score = 25;
+        far_from_bounds = AdjacentShadowValuesAreFullyPoisoned(shadow_addr);
         break;
       case kAsanInitializationOrderMagic:
         bug_descr = "initialization-order-fiasco";
+        bug_type_score = 1;
         break;
       case kAsanStackMidRedzoneMagic:
       case kAsanStackRightRedzoneMagic:
       case kAsanStackPartialRedzoneMagic:
         bug_descr = "stack-buffer-overflow";
+        bug_type_score = 25;
+        far_from_bounds = AdjacentShadowValuesAreFullyPoisoned(shadow_addr);
         break;
       case kAsanStackAfterReturnMagic:
         bug_descr = "stack-use-after-return";
+        bug_type_score = 30;
+        if (!is_write) read_after_free_bonus = 18;
         break;
       case kAsanUserPoisonedMemoryMagic:
         bug_descr = "use-after-poison";
+        bug_type_score = 20;
         break;
       case kAsanContiguousContainerOOBMagic:
         bug_descr = "container-overflow";
+        bug_type_score = 10;
         break;
       case kAsanStackUseAfterScopeMagic:
         bug_descr = "stack-use-after-scope";
+        bug_type_score = 10;
         break;
       case kAsanGlobalRedzoneMagic:
         bug_descr = "global-buffer-overflow";
+        bug_type_score = 10;
+        far_from_bounds = AdjacentShadowValuesAreFullyPoisoned(shadow_addr);
         break;
       case kAsanIntraObjectRedzone:
         bug_descr = "intra-object-overflow";
+        bug_type_score = 10;
         break;
       case kAsanAllocaLeftMagic:
       case kAsanAllocaRightMagic:
         bug_descr = "dynamic-stack-buffer-overflow";
+        bug_type_score = 25;
+        far_from_bounds = AdjacentShadowValuesAreFullyPoisoned(shadow_addr);
         break;
     }
+    SS.Scare(bug_type_score + read_after_free_bonus, bug_descr);
+    if (far_from_bounds)
+      SS.Scare(10, "far-from-bounds");
   }
 
   ReportData report = { pc, sp, bp, addr, (bool)is_write, access_size,
@@ -1115,6 +1061,7 @@
          ThreadNameWithParenthesis(curr_tid, tname, sizeof(tname)),
          d.EndAccess());
 
+  SS.Print();
   GET_STACK_TRACE_FATAL(pc, bp);
   stack.Print();
 
diff --git a/lib/asan/asan_report.h b/lib/asan/asan_report.h
index 559b8ad..7b7122f 100644
--- a/lib/asan/asan_report.h
+++ b/lib/asan/asan_report.h
@@ -41,19 +41,16 @@
 // The following functions prints address description depending
 // on the memory type (shadow/heap/stack/global).
 void DescribeHeapAddress(uptr addr, uptr access_size);
-bool DescribeAddressIfShadow(uptr addr, AddressDescription *descr = nullptr,
-                             bool print = true);
 bool ParseFrameDescription(const char *frame_descr,
                            InternalMmapVector<StackVarDescr> *vars);
 bool DescribeAddressIfStack(uptr addr, uptr access_size);
-void DescribeThread(AsanThreadContext *context);
 
 // Different kinds of error reports.
 void ReportGenericError(uptr pc, uptr bp, uptr sp, uptr addr, bool is_write,
                         uptr access_size, u32 exp, bool fatal);
 void ReportStackOverflow(const SignalContext &sig);
 void ReportDeadlySignal(const char *description, const SignalContext &sig);
-void ReportNewDeleteSizeMismatch(uptr addr, uptr delete_size,
+void ReportNewDeleteSizeMismatch(uptr addr, uptr alloc_size, uptr delete_size,
                                  BufferedStackTrace *free_stack);
 void ReportDoubleFree(uptr addr, BufferedStackTrace *free_stack);
 void ReportFreeNotMalloced(uptr addr, BufferedStackTrace *free_stack);
diff --git a/lib/asan/asan_rtl.cc b/lib/asan/asan_rtl.cc
index 7b8b5dd..b6297da 100644
--- a/lib/asan/asan_rtl.cc
+++ b/lib/asan/asan_rtl.cc
@@ -33,6 +33,7 @@
 #include "ubsan/ubsan_platform.h"
 
 int __asan_option_detect_stack_use_after_return;  // Global interface symbol.
+int __asan_option_detect_stack_use_after_scope;  // Global interface symbol.
 uptr *__asan_test_only_reported_buggy_pointer;  // Used only for testing asan.
 
 namespace __asan {
@@ -86,8 +87,8 @@
 // Reserve memory range [beg, end].
 // We need to use inclusive range because end+1 may not be representable.
 void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
-  CHECK_EQ((beg % GetPageSizeCached()), 0);
-  CHECK_EQ(((end + 1) % GetPageSizeCached()), 0);
+  CHECK_EQ((beg % GetMmapGranularity()), 0);
+  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
   uptr size = end - beg + 1;
   DecreaseTotalMmap(size);  // Don't count the shadow against mmap_limit_mb.
   void *res = MmapFixedNoReserve(beg, size, name);
@@ -320,26 +321,26 @@
   kHighMemEnd = GetMaxVirtualAddress();
   // Increase kHighMemEnd to make sure it's properly
   // aligned together with kHighMemBeg:
-  kHighMemEnd |= SHADOW_GRANULARITY * GetPageSizeCached() - 1;
+  kHighMemEnd |= SHADOW_GRANULARITY * GetMmapGranularity() - 1;
 #endif  // !ASAN_FIXED_MAPPING
-  CHECK_EQ((kHighMemBeg % GetPageSizeCached()), 0);
+  CHECK_EQ((kHighMemBeg % GetMmapGranularity()), 0);
 }
 
 static void ProtectGap(uptr addr, uptr size) {
   if (!flags()->protect_shadow_gap)
     return;
-  void *res = MmapNoAccess(addr, size, "shadow gap");
+  void *res = MmapFixedNoAccess(addr, size, "shadow gap");
   if (addr == (uptr)res)
     return;
   // A few pages at the start of the address space can not be protected.
   // But we really want to protect as much as possible, to prevent this memory
   // being returned as a result of a non-FIXED mmap().
   if (addr == kZeroBaseShadowStart) {
-    uptr step = GetPageSizeCached();
+    uptr step = GetMmapGranularity();
     while (size > step && addr < kZeroBaseMaxShadowStart) {
       addr += step;
       size -= step;
-      void *res = MmapNoAccess(addr, size, "shadow gap");
+      void *res = MmapFixedNoAccess(addr, size, "shadow gap");
       if (addr == (uptr)res)
         return;
     }
@@ -415,10 +416,13 @@
 
   AsanCheckIncompatibleRT();
   AsanCheckDynamicRTPrereqs();
+  AvoidCVE_2016_2143();
 
   SetCanPoisonMemory(flags()->poison_heap);
   SetMallocContextSize(common_flags()->malloc_context_size);
 
+  InitializePlatformExceptionHandlers();
+
   InitializeHighMemEnd();
 
   // Make sure we are not statically linked.
@@ -431,10 +435,12 @@
 
   __sanitizer_set_report_path(common_flags()->log_path);
 
-  // Enable UAR detection, if required.
   __asan_option_detect_stack_use_after_return =
       flags()->detect_stack_use_after_return;
 
+  __asan_option_detect_stack_use_after_scope =
+      flags()->detect_stack_use_after_scope;
+
   // Re-exec ourselves if we need to set additional env or command line args.
   MaybeReexec();
 
@@ -462,6 +468,12 @@
     kMidMemBeg = kLowMemEnd < 0x3000000000ULL ? 0x3000000000ULL : 0;
     kMidMemEnd = kLowMemEnd < 0x3000000000ULL ? 0x4fffffffffULL : 0;
   }
+#elif SANITIZER_WINDOWS64
+  // Disable the "mid mem" shadow layout.
+  if (!full_shadow_is_available) {
+    kMidMemBeg = 0;
+    kMidMemEnd = 0;
+  }
 #endif
 
   if (Verbosity()) PrintAddressSpaceLayout();
@@ -539,12 +551,12 @@
   force_interface_symbols();  // no-op.
   SanitizerInitializeUnwinder();
 
-#if CAN_SANITIZE_LEAKS
-  __lsan::InitCommonLsan();
-  if (common_flags()->detect_leaks && common_flags()->leak_check_at_exit) {
-    Atexit(__lsan::DoLeakCheck);
+  if (CAN_SANITIZE_LEAKS) {
+    __lsan::InitCommonLsan();
+    if (common_flags()->detect_leaks && common_flags()->leak_check_at_exit) {
+      Atexit(__lsan::DoLeakCheck);
+    }
   }
-#endif  // CAN_SANITIZE_LEAKS
 
 #if CAN_SANITIZE_UB
   __ubsan::InitAsPlugin();
@@ -552,6 +564,15 @@
 
   InitializeSuppressions();
 
+  if (CAN_SANITIZE_LEAKS) {
+    // LateInitialize() calls dlsym, which can allocate an error string buffer
+    // in the TLS.  Let's ignore the allocation to avoid reporting a leak.
+    __lsan::ScopedInterceptorDisabler disabler;
+    Symbolizer::LateInitialize();
+  } else {
+    Symbolizer::LateInitialize();
+  }
+
   VReport(1, "AddressSanitizer Init done\n");
 }
 
diff --git a/lib/asan/asan_scariness_score.h b/lib/asan/asan_scariness_score.h
new file mode 100644
index 0000000..492eb56
--- /dev/null
+++ b/lib/asan/asan_scariness_score.h
@@ -0,0 +1,67 @@
+//===-- asan_scariness_score.h ----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Compute the level of scariness of the error message.
+// Don't expect any deep science here, just a set of heuristics that suggest
+// that e.g. 1-byte-read-global-buffer-overflow is less scary than
+// 8-byte-write-stack-use-after-return.
+//
+// Every error report has one or more features, such as memory access size,
+// type (read or write), type of accessed memory (e.g. free-d heap, or a global
+// redzone), etc. Every such feature has an int score and a string description.
+// The overall score is the sum of all feature scores and the description
+// is a concatenation of feature descriptions.
+// Examples:
+//  17 (4-byte-read-heap-buffer-overflow)
+//  65 (multi-byte-write-stack-use-after-return)
+//  10 (null-deref)
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASAN_SCARINESS_SCORE_H
+#define ASAN_SCARINESS_SCORE_H
+
+#include "asan_flags.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
+
+namespace __asan {
+class ScarinessScore {
+ public:
+  ScarinessScore() {
+    descr[0] = 0;
+  }
+  void Scare(int add_to_score, const char *reason) {
+    if (descr[0])
+      internal_strlcat(descr, "-", sizeof(descr));
+    internal_strlcat(descr, reason, sizeof(descr));
+    score += add_to_score;
+  };
+  int GetScore() const { return score; }
+  const char *GetDescription() const { return descr; }
+  void Print() {
+    if (score && flags()->print_scariness)
+      Printf("SCARINESS: %d (%s)\n", score, descr);
+  }
+  static void PrintSimple(int score, const char *descr) {
+    ScarinessScore SS;
+    SS.Scare(score, descr);
+    SS.Print();
+  }
+
+ private:
+  int score = 0;
+  char descr[1024];
+};
+
+}  // namespace __asan
+
+#endif  // ASAN_SCARINESS_SCORE_H
diff --git a/lib/asan/asan_stack.h b/lib/asan/asan_stack.h
index 5c51815..cc95e0f 100644
--- a/lib/asan/asan_stack.h
+++ b/lib/asan/asan_stack.h
@@ -48,7 +48,10 @@
       uptr stack_top = t->stack_top();
       uptr stack_bottom = t->stack_bottom();
       ScopedUnwinding unwind_scope(t);
-      stack->Unwind(max_depth, pc, bp, context, stack_top, stack_bottom, fast);
+      if (!SANITIZER_MIPS || IsValidFrame(bp, stack_top, stack_bottom)) {
+        stack->Unwind(max_depth, pc, bp, context, stack_top, stack_bottom,
+                      fast);
+      }
     } else if (!t && !fast) {
       /* If GetCurrentThread() has failed, try to do slow unwind anyways. */
       stack->Unwind(max_depth, pc, bp, context, 0, 0, false);
diff --git a/lib/asan/asan_suppressions.cc b/lib/asan/asan_suppressions.cc
index 41887b5..62c868d 100644
--- a/lib/asan/asan_suppressions.cc
+++ b/lib/asan/asan_suppressions.cc
@@ -89,6 +89,7 @@
 
     if (suppression_ctx->HasSuppressionType(kInterceptorViaFunction)) {
       SymbolizedStack *frames = symbolizer->SymbolizePC(addr);
+      CHECK(frames);
       for (SymbolizedStack *cur = frames; cur; cur = cur->next) {
         const char *function_name = cur->info.function;
         if (!function_name) {
diff --git a/lib/asan/asan_thread.cc b/lib/asan/asan_thread.cc
index 526ef3d..d7e2cca 100644
--- a/lib/asan/asan_thread.cc
+++ b/lib/asan/asan_thread.cc
@@ -120,6 +120,71 @@
   DTLS_Destroy();
 }
 
+void AsanThread::StartSwitchFiber(FakeStack **fake_stack_save, uptr bottom,
+                                  uptr size) {
+  if (atomic_load(&stack_switching_, memory_order_relaxed)) {
+    Report("ERROR: starting fiber switch while in fiber switch\n");
+    Die();
+  }
+
+  next_stack_bottom_ = bottom;
+  next_stack_top_ = bottom + size;
+  atomic_store(&stack_switching_, 1, memory_order_release);
+
+  FakeStack *current_fake_stack = fake_stack_;
+  if (fake_stack_save)
+    *fake_stack_save = fake_stack_;
+  fake_stack_ = nullptr;
+  SetTLSFakeStack(nullptr);
+  // if fake_stack_save is null, the fiber will die, delete the fakestack
+  if (!fake_stack_save && current_fake_stack)
+    current_fake_stack->Destroy(this->tid());
+}
+
+void AsanThread::FinishSwitchFiber(FakeStack *fake_stack_save) {
+  if (!atomic_load(&stack_switching_, memory_order_relaxed)) {
+    Report("ERROR: finishing a fiber switch that has not started\n");
+    Die();
+  }
+
+  if (fake_stack_save) {
+    SetTLSFakeStack(fake_stack_save);
+    fake_stack_ = fake_stack_save;
+  }
+
+  stack_bottom_ = next_stack_bottom_;
+  stack_top_ = next_stack_top_;
+  atomic_store(&stack_switching_, 0, memory_order_release);
+  next_stack_top_ = 0;
+  next_stack_bottom_ = 0;
+}
+
+inline AsanThread::StackBounds AsanThread::GetStackBounds() const {
+  if (!atomic_load(&stack_switching_, memory_order_acquire))
+    return StackBounds{stack_bottom_, stack_top_};  // NOLINT
+  char local;
+  const uptr cur_stack = (uptr)&local;
+  // Note: need to check next stack first, because FinishSwitchFiber
+  // may be in process of overwriting stack_top_/bottom_. But in such case
+  // we are already on the next stack.
+  if (cur_stack >= next_stack_bottom_ && cur_stack < next_stack_top_)
+    return StackBounds{next_stack_bottom_, next_stack_top_};  // NOLINT
+  return StackBounds{stack_bottom_, stack_top_};              // NOLINT
+}
+
+uptr AsanThread::stack_top() {
+  return GetStackBounds().top;
+}
+
+uptr AsanThread::stack_bottom() {
+  return GetStackBounds().bottom;
+}
+
+uptr AsanThread::stack_size() {
+  const auto bounds = GetStackBounds();
+  return bounds.top - bounds.bottom;
+}
+
 // We want to create the FakeStack lazyly on the first use, but not eralier
 // than the stack size is known and the procedure has to be async-signal safe.
 FakeStack *AsanThread::AsyncSignalSafeLazyInitFakeStack() {
@@ -150,6 +215,8 @@
 }
 
 void AsanThread::Init() {
+  next_stack_top_ = next_stack_bottom_ = 0;
+  atomic_store(&stack_switching_, false, memory_order_release);
   fake_stack_ = nullptr;  // Will be initialized lazily if needed.
   CHECK_EQ(this->stack_size(), 0U);
   SetThreadStackAndTls();
@@ -195,9 +262,10 @@
 
 void AsanThread::SetThreadStackAndTls() {
   uptr tls_size = 0;
-  GetThreadStackAndTls(tid() == 0, &stack_bottom_, &stack_size_, &tls_begin_,
-                       &tls_size);
-  stack_top_ = stack_bottom_ + stack_size_;
+  uptr stack_size = 0;
+  GetThreadStackAndTls(tid() == 0, const_cast<uptr *>(&stack_bottom_),
+                       const_cast<uptr *>(&stack_size), &tls_begin_, &tls_size);
+  stack_top_ = stack_bottom_ + stack_size;
   tls_end_ = tls_begin_ + tls_size;
   dtls_ = DTLS_Get();
 
@@ -250,6 +318,11 @@
   return true;
 }
 
+bool AsanThread::AddrIsInStack(uptr addr) {
+  const auto bounds = GetStackBounds();
+  return addr >= bounds.bottom && addr < bounds.top;
+}
+
 static bool ThreadStackContainsAddress(ThreadContextBase *tctx_base,
                                        void *addr) {
   AsanThreadContext *tctx = static_cast<AsanThreadContext*>(tctx_base);
@@ -357,3 +430,29 @@
   __asan::EnsureMainThreadIDIsCorrect();
 }
 } // namespace __lsan
+
+// ---------------------- Interface ---------------- {{{1
+using namespace __asan;  // NOLINT
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_start_switch_fiber(void **fakestacksave, const void *bottom,
+                                    uptr size) {
+  AsanThread *t = GetCurrentThread();
+  if (!t) {
+    VReport(1, "__asan_start_switch_fiber called from unknown thread\n");
+    return;
+  }
+  t->StartSwitchFiber((FakeStack**)fakestacksave, (uptr)bottom, size);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_finish_switch_fiber(void* fakestack) {
+  AsanThread *t = GetCurrentThread();
+  if (!t) {
+    VReport(1, "__asan_finish_switch_fiber called from unknown thread\n");
+    return;
+  }
+  t->FinishSwitchFiber((FakeStack*)fakestack);
+}
+}
diff --git a/lib/asan/asan_thread.h b/lib/asan/asan_thread.h
index b05d720..92a92a2 100644
--- a/lib/asan/asan_thread.h
+++ b/lib/asan/asan_thread.h
@@ -66,9 +66,9 @@
   thread_return_t ThreadStart(uptr os_id,
                               atomic_uintptr_t *signal_thread_is_registered);
 
-  uptr stack_top() { return stack_top_; }
-  uptr stack_bottom() { return stack_bottom_; }
-  uptr stack_size() { return stack_size_; }
+  uptr stack_top();
+  uptr stack_bottom();
+  uptr stack_size();
   uptr tls_begin() { return tls_begin_; }
   uptr tls_end() { return tls_end_; }
   DTLS *dtls() { return dtls_; }
@@ -83,9 +83,7 @@
   };
   bool GetStackFrameAccessByAddr(uptr addr, StackFrameAccess *access);
 
-  bool AddrIsInStack(uptr addr) {
-    return addr >= stack_bottom_ && addr < stack_top_;
-  }
+  bool AddrIsInStack(uptr addr);
 
   void DeleteFakeStack(int tid) {
     if (!fake_stack_) return;
@@ -95,13 +93,19 @@
     t->Destroy(tid);
   }
 
+  void StartSwitchFiber(FakeStack **fake_stack_save, uptr bottom, uptr size);
+  void FinishSwitchFiber(FakeStack *fake_stack_save);
+
   bool has_fake_stack() {
-    return (reinterpret_cast<uptr>(fake_stack_) > 1);
+    return !atomic_load(&stack_switching_, memory_order_relaxed) &&
+           (reinterpret_cast<uptr>(fake_stack_) > 1);
   }
 
   FakeStack *fake_stack() {
     if (!__asan_option_detect_stack_use_after_return)
       return nullptr;
+    if (atomic_load(&stack_switching_, memory_order_relaxed))
+      return nullptr;
     if (!has_fake_stack())
       return AsyncSignalSafeLazyInitFakeStack();
     return fake_stack_;
@@ -127,14 +131,24 @@
   void ClearShadowForThreadStackAndTLS();
   FakeStack *AsyncSignalSafeLazyInitFakeStack();
 
+  struct StackBounds {
+    uptr bottom;
+    uptr top;
+  };
+  StackBounds GetStackBounds() const;
+
   AsanThreadContext *context_;
   thread_callback_t start_routine_;
   void *arg_;
+
   uptr stack_top_;
   uptr stack_bottom_;
-  // stack_size_ == stack_top_ - stack_bottom_;
-  // It needs to be set in a async-signal-safe manner.
-  uptr stack_size_;
+  // these variables are used when the thread is about to switch stack
+  uptr next_stack_top_;
+  uptr next_stack_bottom_;
+  // true if switching is in progress
+  atomic_uint8_t stack_switching_;
+
   uptr tls_begin_;
   uptr tls_end_;
   DTLS *dtls_;
diff --git a/lib/asan/asan_win.cc b/lib/asan/asan_win.cc
index dc20757..0616e40 100644
--- a/lib/asan/asan_win.cc
+++ b/lib/asan/asan_win.cc
@@ -24,6 +24,7 @@
 #include "asan_report.h"
 #include "asan_stack.h"
 #include "asan_thread.h"
+#include "asan_mapping.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_mutex.h"
 
@@ -36,7 +37,13 @@
   return __asan_option_detect_stack_use_after_return;
 }
 
-// -------------------- A workaround for the abscence of weak symbols ----- {{{
+SANITIZER_INTERFACE_ATTRIBUTE
+int __asan_should_detect_stack_use_after_scope() {
+  __asan_init();
+  return __asan_option_detect_stack_use_after_scope;
+}
+
+// -------------------- A workaround for the absence of weak symbols ----- {{{
 // We don't have a direct equivalent of weak symbols when using MSVC, but we can
 // use the /alternatename directive to tell the linker to default a specific
 // symbol to a specific value, which works nicely for allocator hooks and
@@ -46,21 +53,49 @@
 const char* __asan_default_default_options() { return ""; }
 const char* __asan_default_default_suppressions() { return ""; }
 void __asan_default_on_error() {}
+// 64-bit msvc will not prepend an underscore for symbols.
+#ifdef _WIN64
+#pragma comment(linker, "/alternatename:__sanitizer_malloc_hook=__sanitizer_default_malloc_hook")  // NOLINT
+#pragma comment(linker, "/alternatename:__sanitizer_free_hook=__sanitizer_default_free_hook")      // NOLINT
+#pragma comment(linker, "/alternatename:__asan_default_options=__asan_default_default_options")    // NOLINT
+#pragma comment(linker, "/alternatename:__asan_default_suppressions=__asan_default_default_suppressions")    // NOLINT
+#pragma comment(linker, "/alternatename:__asan_on_error=__asan_default_on_error")                  // NOLINT
+#else
 #pragma comment(linker, "/alternatename:___sanitizer_malloc_hook=___sanitizer_default_malloc_hook")  // NOLINT
 #pragma comment(linker, "/alternatename:___sanitizer_free_hook=___sanitizer_default_free_hook")      // NOLINT
 #pragma comment(linker, "/alternatename:___asan_default_options=___asan_default_default_options")    // NOLINT
 #pragma comment(linker, "/alternatename:___asan_default_suppressions=___asan_default_default_suppressions")    // NOLINT
 #pragma comment(linker, "/alternatename:___asan_on_error=___asan_default_on_error")                  // NOLINT
+#endif
 // }}}
 }  // extern "C"
 
-// ---------------------- Windows-specific inteceptors ---------------- {{{
+// ---------------------- Windows-specific interceptors ---------------- {{{
+INTERCEPTOR_WINAPI(void, RtlRaiseException, EXCEPTION_RECORD *ExceptionRecord) {
+  CHECK(REAL(RtlRaiseException));
+  // This is a noreturn function, unless it's one of the exceptions raised to
+  // communicate with the debugger, such as the one from OutputDebugString.
+  if (ExceptionRecord->ExceptionCode != DBG_PRINTEXCEPTION_C)
+    __asan_handle_no_return();
+  REAL(RtlRaiseException)(ExceptionRecord);
+}
+
 INTERCEPTOR_WINAPI(void, RaiseException, void *a, void *b, void *c, void *d) {
   CHECK(REAL(RaiseException));
   __asan_handle_no_return();
   REAL(RaiseException)(a, b, c, d);
 }
 
+#ifdef _WIN64
+
+INTERCEPTOR_WINAPI(int, __C_specific_handler, void *a, void *b, void *c, void *d) {  // NOLINT
+  CHECK(REAL(__C_specific_handler));
+  __asan_handle_no_return();
+  return REAL(__C_specific_handler)(a, b, c, d);
+}
+
+#else
+
 INTERCEPTOR(int, _except_handler3, void *a, void *b, void *c, void *d) {
   CHECK(REAL(_except_handler3));
   __asan_handle_no_return();
@@ -76,6 +111,7 @@
   __asan_handle_no_return();
   return REAL(_except_handler4)(a, b, c, d);
 }
+#endif
 
 static thread_return_t THREAD_CALLING_CONV asan_thread_start(void *arg) {
   AsanThread *t = (AsanThread*)arg;
@@ -101,52 +137,29 @@
                             asan_thread_start, t, thr_flags, tid);
 }
 
-namespace {
-BlockingMutex mu_for_thread_tracking(LINKER_INITIALIZED);
-
-void EnsureWorkerThreadRegistered() {
-  // FIXME: GetCurrentThread relies on TSD, which might not play well with
-  // system thread pools.  We might want to use something like reference
-  // counting to zero out GetCurrentThread() underlying storage when the last
-  // work item finishes?  Or can we disable reclaiming of threads in the pool?
-  BlockingMutexLock l(&mu_for_thread_tracking);
-  if (__asan::GetCurrentThread())
-    return;
-
-  AsanThread *t = AsanThread::Create(
-      /* start_routine */ nullptr, /* arg */ nullptr,
-      /* parent_tid */ -1, /* stack */ nullptr, /* detached */ true);
-  t->Init();
-  asanThreadRegistry().StartThread(t->tid(), 0, 0);
-  SetCurrentThread(t);
-}
-}  // namespace
-
-INTERCEPTOR_WINAPI(DWORD, NtWaitForWorkViaWorkerFactory, DWORD a, DWORD b) {
-  // NtWaitForWorkViaWorkerFactory is called from system worker pool threads to
-  // query work scheduled by BindIoCompletionCallback, QueueUserWorkItem, etc.
-  // System worker pool threads are created at arbitraty point in time and
-  // without using CreateThread, so we wrap NtWaitForWorkViaWorkerFactory
-  // instead and don't register a specific parent_tid/stack.
-  EnsureWorkerThreadRegistered();
-  return REAL(NtWaitForWorkViaWorkerFactory)(a, b);
-}
-
 // }}}
 
 namespace __asan {
 
 void InitializePlatformInterceptors() {
   ASAN_INTERCEPT_FUNC(CreateThread);
-  ASAN_INTERCEPT_FUNC(RaiseException);
+
+#ifdef _WIN64
+  ASAN_INTERCEPT_FUNC(__C_specific_handler);
+#else
   ASAN_INTERCEPT_FUNC(_except_handler3);
   ASAN_INTERCEPT_FUNC(_except_handler4);
+#endif
 
-  // NtWaitForWorkViaWorkerFactory is always linked dynamically.
-  CHECK(::__interception::OverrideFunction(
-      "NtWaitForWorkViaWorkerFactory",
-      (uptr)WRAP(NtWaitForWorkViaWorkerFactory),
-      (uptr *)&REAL(NtWaitForWorkViaWorkerFactory)));
+  // Try to intercept kernel32!RaiseException, and if that fails, intercept
+  // ntdll!RtlRaiseException instead.
+  if (!::__interception::OverrideFunction("RaiseException",
+                                          (uptr)WRAP(RaiseException),
+                                          (uptr *)&REAL(RaiseException))) {
+    CHECK(::__interception::OverrideFunction("RtlRaiseException",
+                                             (uptr)WRAP(RtlRaiseException),
+                                             (uptr *)&REAL(RtlRaiseException)));
+  }
 }
 
 void AsanApplyToGlobals(globals_op_fptr op, const void *needle) {
@@ -198,18 +211,94 @@
   UNIMPLEMENTED();
 }
 
+#if SANITIZER_WINDOWS64
+// Exception handler for dealing with shadow memory.
+static LONG CALLBACK
+ShadowExceptionHandler(PEXCEPTION_POINTERS exception_pointers) {
+  uptr page_size = GetPageSizeCached();
+  // Only handle access violations.
+  if (exception_pointers->ExceptionRecord->ExceptionCode !=
+      EXCEPTION_ACCESS_VIOLATION) {
+    return EXCEPTION_CONTINUE_SEARCH;
+  }
+
+  // Only handle access violations that land within the shadow memory.
+  uptr addr =
+      (uptr)(exception_pointers->ExceptionRecord->ExceptionInformation[1]);
+
+  // Check valid shadow range.
+  if (!AddrIsInShadow(addr)) return EXCEPTION_CONTINUE_SEARCH;
+
+  // This is an access violation while trying to read from the shadow. Commit
+  // the relevant page and let execution continue.
+
+  // Determine the address of the page that is being accessed.
+  uptr page = RoundDownTo(addr, page_size);
+
+  // Query the existing page.
+  MEMORY_BASIC_INFORMATION mem_info = {};
+  if (::VirtualQuery((LPVOID)page, &mem_info, sizeof(mem_info)) == 0)
+    return EXCEPTION_CONTINUE_SEARCH;
+
+  // Commit the page.
+  uptr result =
+      (uptr)::VirtualAlloc((LPVOID)page, page_size, MEM_COMMIT, PAGE_READWRITE);
+  if (result != page) return EXCEPTION_CONTINUE_SEARCH;
+
+  // The page mapping succeeded, so continue execution as usual.
+  return EXCEPTION_CONTINUE_EXECUTION;
+}
+
+#endif
+
+void InitializePlatformExceptionHandlers() {
+#if SANITIZER_WINDOWS64
+  // On Win64, we map memory on demand with access violation handler.
+  // Install our exception handler.
+  CHECK(AddVectoredExceptionHandler(TRUE, &ShadowExceptionHandler));
+#endif
+}
+
 static LPTOP_LEVEL_EXCEPTION_FILTER default_seh_handler;
 
+// Check based on flags if we should report this exception.
+static bool ShouldReportDeadlyException(unsigned code) {
+  switch (code) {
+    case EXCEPTION_ACCESS_VIOLATION:
+    case EXCEPTION_IN_PAGE_ERROR:
+      return common_flags()->handle_segv;
+    case EXCEPTION_BREAKPOINT:
+    case EXCEPTION_ILLEGAL_INSTRUCTION: {
+      return common_flags()->handle_sigill;
+    }
+  }
+  return false;
+}
+
+// Return the textual name for this exception.
+static const char *DescribeDeadlyException(unsigned code) {
+  switch (code) {
+    case EXCEPTION_ACCESS_VIOLATION:
+      return "access-violation";
+    case EXCEPTION_IN_PAGE_ERROR:
+      return "in-page-error";
+    case EXCEPTION_BREAKPOINT:
+      return "breakpoint";
+    case EXCEPTION_ILLEGAL_INSTRUCTION:
+      return "illegal-instruction";
+  }
+  return nullptr;
+}
+
 static long WINAPI SEHHandler(EXCEPTION_POINTERS *info) {
   EXCEPTION_RECORD *exception_record = info->ExceptionRecord;
   CONTEXT *context = info->ContextRecord;
 
-  if (exception_record->ExceptionCode == EXCEPTION_ACCESS_VIOLATION ||
-      exception_record->ExceptionCode == EXCEPTION_IN_PAGE_ERROR) {
+  if (ShouldReportDeadlyException(exception_record->ExceptionCode)) {
+    // Get the string description of the exception if this is a known deadly
+    // exception.
     const char *description =
-        (exception_record->ExceptionCode == EXCEPTION_ACCESS_VIOLATION)
-            ? "access-violation"
-            : "in-page-error";
+        DescribeDeadlyException(exception_record->ExceptionCode);
     SignalContext sig = SignalContext::Create(exception_record, context);
     ReportDeadlySignal(description, sig);
   }
@@ -246,10 +335,16 @@
 }
 
 #if !ASAN_DYNAMIC
-// Put a pointer to __asan_set_seh_filter at the end of the global list
-// of C initializers, after the default EH is set by the CRT.
-#pragma section(".CRT$XIZ", long, read)  // NOLINT
-__declspec(allocate(".CRT$XIZ"))
+// The CRT runs initializers in this order:
+// - C initializers, from XIA to XIZ
+// - C++ initializers, from XCA to XCZ
+// Prior to 2015, the CRT set the unhandled exception filter at priority XIY,
+// near the end of C initialization. Starting in 2015, it was moved to the
+// beginning of C++ initialization. We set our priority to XCAB to run
+// immediately after the CRT runs. This way, our exception filter is called
+// first and we can delegate to their filter if appropriate.
+#pragma section(".CRT$XCAB", long, read)  // NOLINT
+__declspec(allocate(".CRT$XCAB"))
     int (*__intercept_seh)() = __asan_set_seh_filter;
 #endif
 // }}}
diff --git a/lib/asan/asan_win_dll_thunk.cc b/lib/asan/asan_win_dll_thunk.cc
index 672cabf..09c3b20 100644
--- a/lib/asan/asan_win_dll_thunk.cc
+++ b/lib/asan/asan_win_dll_thunk.cc
@@ -15,12 +15,13 @@
 // See https://github.com/google/sanitizers/issues/209 for the details.
 //===----------------------------------------------------------------------===//
 
-// Only compile this code when buidling asan_dll_thunk.lib
+// Only compile this code when building asan_dll_thunk.lib
 // Using #ifdef rather than relying on Makefiles etc.
 // simplifies the build procedure.
 #ifdef ASAN_DLL_THUNK
 #include "asan_init_version.h"
 #include "interception/interception.h"
+#include "sanitizer_common/sanitizer_platform_interceptors.h"
 
 // ---------- Function interception helper functions and macros ----------- {{{1
 extern "C" {
@@ -197,9 +198,11 @@
 // Don't use the INTERFACE_FUNCTION machinery for this function as we actually
 // want to call it in the __asan_init interceptor.
 WRAP_W_V(__asan_should_detect_stack_use_after_return)
+WRAP_W_V(__asan_should_detect_stack_use_after_scope)
 
 extern "C" {
   int __asan_option_detect_stack_use_after_return;
+  int __asan_option_detect_stack_use_after_scope;
 
   // Manually wrap __asan_init as we need to initialize
   // __asan_option_detect_stack_use_after_return afterwards.
@@ -213,6 +216,8 @@
     fn();
     __asan_option_detect_stack_use_after_return =
         (__asan_should_detect_stack_use_after_return() != 0);
+    __asan_option_detect_stack_use_after_scope =
+        (__asan_should_detect_stack_use_after_scope() != 0);
 
     InterceptHooks();
   }
@@ -315,7 +320,7 @@
 INTERFACE_FUNCTION(__sanitizer_cov_with_check)
 INTERFACE_FUNCTION(__sanitizer_get_allocated_size)
 INTERFACE_FUNCTION(__sanitizer_get_coverage_guards)
-INTERFACE_FUNCTION(__sanitizer_get_coverage_pc_buffer)
+INTERFACE_FUNCTION(__sanitizer_get_coverage_pc_buffer_pos)
 INTERFACE_FUNCTION(__sanitizer_get_current_allocated_bytes)
 INTERFACE_FUNCTION(__sanitizer_get_estimated_allocated_size)
 INTERFACE_FUNCTION(__sanitizer_get_free_bytes)
@@ -333,8 +338,10 @@
 INTERFACE_FUNCTION(__sanitizer_get_number_of_counters)
 INTERFACE_FUNCTION(__sanitizer_update_counter_bitset_and_clear_counters)
 INTERFACE_FUNCTION(__sanitizer_sandbox_on_notify)
+INTERFACE_FUNCTION(__sanitizer_set_coverage_pc_buffer)
 INTERFACE_FUNCTION(__sanitizer_set_death_callback)
 INTERFACE_FUNCTION(__sanitizer_set_report_path)
+INTERFACE_FUNCTION(__sanitizer_set_report_fd)
 INTERFACE_FUNCTION(__sanitizer_unaligned_load16)
 INTERFACE_FUNCTION(__sanitizer_unaligned_load32)
 INTERFACE_FUNCTION(__sanitizer_unaligned_load64)
@@ -342,21 +349,28 @@
 INTERFACE_FUNCTION(__sanitizer_unaligned_store32)
 INTERFACE_FUNCTION(__sanitizer_unaligned_store64)
 INTERFACE_FUNCTION(__sanitizer_verify_contiguous_container)
+INTERFACE_FUNCTION(__sanitizer_install_malloc_and_free_hooks)
+INTERFACE_FUNCTION(__sanitizer_start_switch_fiber)
+INTERFACE_FUNCTION(__sanitizer_finish_switch_fiber)
 
 // TODO(timurrrr): Add more interface functions on the as-needed basis.
 
 // ----------------- Memory allocation functions ---------------------
 WRAP_V_W(free)
+WRAP_V_W(_free_base)
 WRAP_V_WW(_free_dbg)
 
 WRAP_W_W(malloc)
+WRAP_W_W(_malloc_base)
 WRAP_W_WWWW(_malloc_dbg)
 
 WRAP_W_WW(calloc)
+WRAP_W_WW(_calloc_base)
 WRAP_W_WWWWW(_calloc_dbg)
 WRAP_W_WWW(_calloc_impl)
 
 WRAP_W_WW(realloc)
+WRAP_W_WW(_realloc_base)
 WRAP_W_WWW(_realloc_dbg)
 WRAP_W_WWW(_recalloc)
 
@@ -371,6 +385,10 @@
 
 INTERCEPT_LIBRARY_FUNCTION(atoi);
 INTERCEPT_LIBRARY_FUNCTION(atol);
+
+#ifdef _WIN64
+INTERCEPT_LIBRARY_FUNCTION(__C_specific_handler);
+#else
 INTERCEPT_LIBRARY_FUNCTION(_except_handler3);
 
 // _except_handler4 checks -GS cookie which is different for each module, so we
@@ -379,10 +397,13 @@
   __asan_handle_no_return();
   return REAL(_except_handler4)(a, b, c, d);
 }
+#endif
 
 INTERCEPT_LIBRARY_FUNCTION(frexp);
 INTERCEPT_LIBRARY_FUNCTION(longjmp);
+#if SANITIZER_INTERCEPT_MEMCHR
 INTERCEPT_LIBRARY_FUNCTION(memchr);
+#endif
 INTERCEPT_LIBRARY_FUNCTION(memcmp);
 INTERCEPT_LIBRARY_FUNCTION(memcpy);
 INTERCEPT_LIBRARY_FUNCTION(memmove);
@@ -392,12 +413,14 @@
 INTERCEPT_LIBRARY_FUNCTION(strcmp);
 INTERCEPT_LIBRARY_FUNCTION(strcpy);  // NOLINT
 INTERCEPT_LIBRARY_FUNCTION(strcspn);
+INTERCEPT_LIBRARY_FUNCTION(strdup);
 INTERCEPT_LIBRARY_FUNCTION(strlen);
 INTERCEPT_LIBRARY_FUNCTION(strncat);
 INTERCEPT_LIBRARY_FUNCTION(strncmp);
 INTERCEPT_LIBRARY_FUNCTION(strncpy);
 INTERCEPT_LIBRARY_FUNCTION(strnlen);
 INTERCEPT_LIBRARY_FUNCTION(strpbrk);
+INTERCEPT_LIBRARY_FUNCTION(strrchr);
 INTERCEPT_LIBRARY_FUNCTION(strspn);
 INTERCEPT_LIBRARY_FUNCTION(strstr);
 INTERCEPT_LIBRARY_FUNCTION(strtol);
@@ -407,7 +430,9 @@
 // is defined.
 void InterceptHooks() {
   INTERCEPT_HOOKS();
+#ifndef _WIN64
   INTERCEPT_FUNCTION(_except_handler4);
+#endif
 }
 
 // We want to call __asan_init before C/C++ initializers/constructors are
diff --git a/lib/asan/asan_win_dynamic_runtime_thunk.cc b/lib/asan/asan_win_dynamic_runtime_thunk.cc
index 73e5207..c790714 100644
--- a/lib/asan/asan_win_dynamic_runtime_thunk.cc
+++ b/lib/asan/asan_win_dynamic_runtime_thunk.cc
@@ -1,4 +1,4 @@
-//===-- asan_win_uar_thunk.cc ---------------------------------------------===//
+//===-- asan_win_dynamic_runtime_thunk.cc ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,12 +15,13 @@
 //
 // This includes:
 //  - forwarding the detect_stack_use_after_return runtime option
+//  - forwarding the detect_stack_use_after_scope runtime option
 //  - working around deficiencies of the MD runtime
-//  - installing a custom SEH handlerx
+//  - installing a custom SEH handler
 //
 //===----------------------------------------------------------------------===//
 
-// Only compile this code when buidling asan_dynamic_runtime_thunk.lib
+// Only compile this code when building asan_dynamic_runtime_thunk.lib
 // Using #ifdef rather than relying on Makefiles etc.
 // simplifies the build procedure.
 #ifdef ASAN_DYNAMIC_RUNTIME_THUNK
@@ -29,7 +30,7 @@
 
 // First, declare CRT sections we'll be using in this file
 #pragma section(".CRT$XID", long, read)  // NOLINT
-#pragma section(".CRT$XIZ", long, read)  // NOLINT
+#pragma section(".CRT$XCAB", long, read)  // NOLINT
 #pragma section(".CRT$XTW", long, read)  // NOLINT
 #pragma section(".CRT$XTY", long, read)  // NOLINT
 
@@ -42,8 +43,8 @@
 // attribute adds __imp_ prefix to the symbol name of a variable.
 // Since in general we don't know if a given TU is going to be used
 // with a MT or MD runtime and we don't want to use ugly __imp_ names on Windows
-// just to work around this issue, let's clone the a variable that is
-// constant after initialization anyways.
+// just to work around this issue, let's clone the variable that is constant
+// after initialization anyways.
 extern "C" {
 __declspec(dllimport) int __asan_should_detect_stack_use_after_return();
 int __asan_option_detect_stack_use_after_return =
@@ -51,6 +52,23 @@
 }
 
 ////////////////////////////////////////////////////////////////////////////////
+// Define a copy of __asan_option_detect_stack_use_after_scope that should be
+// used when linking an MD runtime with a set of object files on Windows.
+//
+// The ASan MD runtime dllexports '__asan_option_detect_stack_use_after_scope',
+// so normally we would just dllimport it.  Unfortunately, the dllimport
+// attribute adds __imp_ prefix to the symbol name of a variable.
+// Since in general we don't know if a given TU is going to be used
+// with a MT or MD runtime and we don't want to use ugly __imp_ names on Windows
+// just to work around this issue, let's clone the variable that is constant
+// after initialization anyways.
+extern "C" {
+__declspec(dllimport) int __asan_should_detect_stack_use_after_scope();
+int __asan_option_detect_stack_use_after_scope =
+    __asan_should_detect_stack_use_after_scope();
+}
+
+////////////////////////////////////////////////////////////////////////////////
 // For some reason, the MD CRT doesn't call the C/C++ terminators during on DLL
 // unload or on exit.  ASan relies on LLVM global_dtors to call
 // __asan_unregister_globals on these events, which unfortunately doesn't work
@@ -93,7 +111,8 @@
 
 // Unfortunately, putting a pointer to __asan_set_seh_filter into
 // __asan_intercept_seh gets optimized out, so we have to use an extra function.
-__declspec(allocate(".CRT$XIZ")) int (*__asan_seh_interceptor)() = SetSEHFilter;
+__declspec(allocate(".CRT$XCAB")) int (*__asan_seh_interceptor)() =
+    SetSEHFilter;
 }
 
 #endif // ASAN_DYNAMIC_RUNTIME_THUNK
diff --git a/lib/asan/scripts/asan_device_setup b/lib/asan/scripts/asan_device_setup
index 6cb7b94..52794b1 100755
--- a/lib/asan/scripts/asan_device_setup
+++ b/lib/asan/scripts/asan_device_setup
@@ -308,11 +308,18 @@
   local _from=$1
   local _to=$2
   local _asan_rt=$3
+  if [[ PRE_L -eq 0 ]]; then
+    # LD_PRELOAD parsing is broken in N if it starts with ":". Luckily, it is
+    # unset in the system environment since L.
+    local _ld_preload=$_asan_rt
+  else
+    local _ld_preload=\$LD_PRELOAD:$_asan_rt
+  fi
   cat <<EOF >"$TMPDIR/$_from"
 #!/system/bin/sh-from-zygote
 ASAN_OPTIONS=$ASAN_OPTIONS \\
 ASAN_ACTIVATION_OPTIONS=include_if_exists=/data/local/tmp/asan.options.%b \\
-LD_PRELOAD=\$LD_PRELOAD:$_asan_rt \\
+LD_PRELOAD=$_ld_preload \\
 exec $_to \$@
 
 EOF
diff --git a/lib/asan/tests/CMakeLists.txt b/lib/asan/tests/CMakeLists.txt
index 08d19d2..e67d0fb 100644
--- a/lib/asan/tests/CMakeLists.txt
+++ b/lib/asan/tests/CMakeLists.txt
@@ -45,6 +45,10 @@
 else()
   list(APPEND ASAN_UNITTEST_COMMON_CFLAGS -g)
 endif()
+if(MSVC)
+  list(APPEND ASAN_UNITTEST_COMMON_CFLAGS -gcodeview)
+endif()
+list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS -g)
 
 # Use -D instead of definitions to please custom compile command.
 list(APPEND ASAN_UNITTEST_COMMON_CFLAGS
@@ -119,7 +123,11 @@
 # options in ${ARGN}, and add it to the object list.
 macro(asan_compile obj_list source arch kind)
   get_filename_component(basename ${source} NAME)
-  set(output_obj "${obj_list}.${basename}.${arch}${kind}.o")
+  if(CMAKE_CONFIGURATION_TYPES)
+    set(output_obj "${CMAKE_CFG_INTDIR}/${obj_list}.${basename}.${arch}${kind}.o")
+  else()
+    set(output_obj "${obj_list}.${basename}.${arch}${kind}.o")
+  endif()
   get_target_flags_for_arch(${arch} TARGET_CFLAGS)
   set(COMPILE_DEPS ${ASAN_UNITTEST_HEADERS} ${ASAN_BLACKLIST_FILE})
   if(NOT COMPILER_RT_STANDALONE_BUILD)
@@ -142,11 +150,17 @@
   endif()
   if(TEST_WITH_TEST_RUNTIME)
     list(APPEND TEST_DEPS ${ASAN_TEST_RUNTIME})
-    if(NOT MSVC)
-      list(APPEND TEST_OBJECTS lib${ASAN_TEST_RUNTIME}.a)
+    if(CMAKE_CONFIGURATION_TYPES)
+     set(configuration_path "${CMAKE_CFG_INTDIR}/")
     else()
-      list(APPEND TEST_OBJECTS ${ASAN_TEST_RUNTIME}.lib)
+     set(configuration_path "")
     endif()
+    if(NOT MSVC)
+      set(asan_test_runtime_path ${configuration_path}lib${ASAN_TEST_RUNTIME}.a)
+    else()
+      set(asan_test_runtime_path ${configuration_path}${ASAN_TEST_RUNTIME}.lib)
+    endif()
+    list(APPEND TEST_OBJECTS ${asan_test_runtime_path})
   endif()
   add_compiler_rt_test(${test_suite} ${test_name}
                        SUBDIR ${TEST_SUBDIR}
@@ -158,15 +172,15 @@
 
 # Main AddressSanitizer unit tests.
 add_custom_target(AsanUnitTests)
-set_target_properties(AsanUnitTests PROPERTIES FOLDER "ASan unit tests")
+set_target_properties(AsanUnitTests PROPERTIES FOLDER "Compiler-RT Tests")
+
 # AddressSanitizer unit tests with dynamic runtime (on platforms where it's
 # not the default).
 add_custom_target(AsanDynamicUnitTests)
-set_target_properties(AsanDynamicUnitTests
-  PROPERTIES FOLDER "ASan unit tests with dynamic runtime")
+set_target_properties(AsanDynamicUnitTests PROPERTIES FOLDER "Compiler-RT Tests")
 # ASan benchmarks (not actively used now).
 add_custom_target(AsanBenchmarks)
-set_target_properties(AsanBenchmarks PROPERTIES FOLDER "Asan benchmarks")
+set_target_properties(AsanBenchmarks PROPERTIES FOLDER "Compiler-RT Tests")
 
 set(ASAN_NOINST_TEST_SOURCES
   ${COMPILER_RT_GTEST_SOURCE}
@@ -205,13 +219,30 @@
     asan_compile(ASAN_INST_TEST_OBJECTS asan_mac_test_helpers.mm ${arch} ${kind}
                  ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS} -ObjC ${ARGN})
   endif()
-  file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/default")
+
+  # Create the 'default' folder where ASAN tests are produced.
+  if(CMAKE_CONFIGURATION_TYPES)
+    foreach(build_mode ${CMAKE_CONFIGURATION_TYPES})
+      file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/default/${build_mode}")
+    endforeach()
+  else()
+    file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/default")
+  endif()
+
   add_asan_test(AsanUnitTests "Asan-${arch}${kind}-Test"
                 ${arch} ${kind} SUBDIR "default"
                 OBJECTS ${ASAN_INST_TEST_OBJECTS}
                 LINKFLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS})
   if(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME)
-    file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/dynamic")
+    # Create the 'dynamic' folder where ASAN tests are produced.
+    if(CMAKE_CONFIGURATION_TYPES)
+      foreach(build_mode ${CMAKE_CONFIGURATION_TYPES})
+        file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/dynamic/${build_mode}")
+      endforeach()
+    else()
+      file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/dynamic")
+    endif()
+
     add_asan_test(AsanDynamicUnitTests "Asan-${arch}${kind}-Dynamic-Test"
                   ${arch} ${kind} SUBDIR "dynamic"
                   OBJECTS ${ASAN_INST_TEST_OBJECTS}
@@ -241,7 +272,8 @@
   endif()
   add_library(${ASAN_TEST_RUNTIME} STATIC ${ASAN_TEST_RUNTIME_OBJECTS})
   set_target_properties(${ASAN_TEST_RUNTIME} PROPERTIES
-    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    FOLDER "Compiler-RT Runtime tests")
   # Uninstrumented tests.
   set(ASAN_NOINST_TEST_OBJECTS)
   foreach(src ${ASAN_NOINST_TEST_SOURCES})
diff --git a/lib/asan/tests/asan_str_test.cc b/lib/asan/tests/asan_str_test.cc
index 89b0d3d..0b86702 100644
--- a/lib/asan/tests/asan_str_test.cc
+++ b/lib/asan/tests/asan_str_test.cc
@@ -20,10 +20,41 @@
 static char global_string[] = "global";
 static size_t global_string_length = 6;
 
+const char kStackReadUnderflow[] =
+#if !GTEST_USES_SIMPLE_RE
+    ASAN_PCRE_DOTALL
+    "READ.*"
+#endif
+    "underflows this variable";
+const char kStackReadOverflow[] =
+#if !GTEST_USES_SIMPLE_RE
+    ASAN_PCRE_DOTALL
+    "READ.*"
+#endif
+    "overflows this variable";
+
+namespace {
+enum class OOBKind {
+  Heap,
+  Stack,
+  Global,
+};
+
+string LeftOOBReadMessage(OOBKind oob_kind, int oob_distance) {
+  return oob_kind == OOBKind::Stack ? kStackReadUnderflow
+                                    : ::LeftOOBReadMessage(oob_distance);
+}
+
+string RightOOBReadMessage(OOBKind oob_kind, int oob_distance) {
+  return oob_kind == OOBKind::Stack ? kStackReadOverflow
+                                    : ::RightOOBReadMessage(oob_distance);
+}
+}  // namespace
+
 // Input to a test is a zero-terminated string str with given length
 // Accesses to the bytes to the left and to the right of str
 // are presumed to produce OOB errors
-void StrLenOOBTestTemplate(char *str, size_t length, bool is_global) {
+void StrLenOOBTestTemplate(char *str, size_t length, OOBKind oob_kind) {
   // Normal strlen calls
   EXPECT_EQ(strlen(str), length);
   if (length > 0) {
@@ -31,17 +62,18 @@
     EXPECT_EQ(0U, strlen(str + length));
   }
   // Arg of strlen is not malloced, OOB access
-  if (!is_global) {
+  if (oob_kind != OOBKind::Global) {
     // We don't insert RedZones to the left of global variables
-    EXPECT_DEATH(Ident(strlen(str - 1)), LeftOOBReadMessage(1));
-    EXPECT_DEATH(Ident(strlen(str - 5)), LeftOOBReadMessage(5));
+    EXPECT_DEATH(Ident(strlen(str - 1)), LeftOOBReadMessage(oob_kind, 1));
+    EXPECT_DEATH(Ident(strlen(str - 5)), LeftOOBReadMessage(oob_kind, 5));
   }
-  EXPECT_DEATH(Ident(strlen(str + length + 1)), RightOOBReadMessage(0));
+  EXPECT_DEATH(Ident(strlen(str + length + 1)),
+               RightOOBReadMessage(oob_kind, 0));
   // Overwrite terminator
   str[length] = 'a';
   // String is not zero-terminated, strlen will lead to OOB access
-  EXPECT_DEATH(Ident(strlen(str)), RightOOBReadMessage(0));
-  EXPECT_DEATH(Ident(strlen(str + length)), RightOOBReadMessage(0));
+  EXPECT_DEATH(Ident(strlen(str)), RightOOBReadMessage(oob_kind, 0));
+  EXPECT_DEATH(Ident(strlen(str + length)), RightOOBReadMessage(oob_kind, 0));
   // Restore terminator
   str[length] = 0;
 }
@@ -57,11 +89,9 @@
   }
   heap_string[length] = 0;
   stack_string[length] = 0;
-  StrLenOOBTestTemplate(heap_string, length, false);
-  // TODO(samsonov): Fix expected messages in StrLenOOBTestTemplate to
-  //      make test for stack_string work. Or move it to output tests.
-  // StrLenOOBTestTemplate(stack_string, length, false);
-  StrLenOOBTestTemplate(global_string, global_string_length, true);
+  StrLenOOBTestTemplate(heap_string, length, OOBKind::Heap);
+  StrLenOOBTestTemplate(stack_string, length, OOBKind::Stack);
+  StrLenOOBTestTemplate(global_string, global_string_length, OOBKind::Global);
   free(heap_string);
 }
 
@@ -186,23 +216,8 @@
 typedef char*(*PointerToStrChr1)(const char*, int);
 typedef char*(*PointerToStrChr2)(char*, int);
 
-UNUSED static void RunStrChrTest(PointerToStrChr1 StrChr) {
-  size_t size = Ident(100);
-  char *str = MallocAndMemsetString(size);
-  str[10] = 'q';
-  str[11] = '\0';
-  EXPECT_EQ(str, StrChr(str, 'z'));
-  EXPECT_EQ(str + 10, StrChr(str, 'q'));
-  EXPECT_EQ(NULL, StrChr(str, 'a'));
-  // StrChr argument points to not allocated memory.
-  EXPECT_DEATH(Ident(StrChr(str - 1, 'z')), LeftOOBReadMessage(1));
-  EXPECT_DEATH(Ident(StrChr(str + size, 'z')), RightOOBReadMessage(0));
-  // Overwrite the terminator and hit not allocated memory.
-  str[11] = 'z';
-  EXPECT_DEATH(Ident(StrChr(str, 'a')), RightOOBReadMessage(0));
-  free(str);
-}
-UNUSED static void RunStrChrTest(PointerToStrChr2 StrChr) {
+template<typename StrChrFn>
+static void RunStrChrTestImpl(StrChrFn *StrChr) {
   size_t size = Ident(100);
   char *str = MallocAndMemsetString(size);
   str[10] = 'q';
@@ -219,11 +234,19 @@
   free(str);
 }
 
+// Prefer to use the standard signature if both are available.
+UNUSED static void RunStrChrTest(PointerToStrChr1 StrChr, ...) {
+  RunStrChrTestImpl(StrChr);
+}
+UNUSED static void RunStrChrTest(PointerToStrChr2 StrChr, int) {
+  RunStrChrTestImpl(StrChr);
+}
+
 TEST(AddressSanitizer, StrChrAndIndexOOBTest) {
-  RunStrChrTest(&strchr);
+  RunStrChrTest(&strchr, 0);
 // No index() on Windows and on Android L.
 #if !defined(_WIN32) && !defined(__ANDROID__)
-  RunStrChrTest(&index);
+  RunStrChrTest(&index, 0);
 #endif
 }
 
@@ -434,12 +457,14 @@
 #if !defined(__APPLE__) || !defined(MAC_OS_X_VERSION_10_7) || \
     (MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7)
   // Check "memcpy". Use Ident() to avoid inlining.
+#if PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE
   memset(str, 'z', size);
   Ident(memcpy)(str + 1, str + 11, 10);
   Ident(memcpy)(str, str, 0);
   EXPECT_DEATH(Ident(memcpy)(str, str + 14, 15), OverlapErrorMessage("memcpy"));
   EXPECT_DEATH(Ident(memcpy)(str + 14, str, 15), OverlapErrorMessage("memcpy"));
 #endif
+#endif
 
   // We do not treat memcpy with to==from as a bug.
   // See http://llvm.org/bugs/show_bug.cgi?id=11763.
diff --git a/lib/asan/tests/asan_test.cc b/lib/asan/tests/asan_test.cc
index 71fb27a..6a95c3f 100644
--- a/lib/asan/tests/asan_test.cc
+++ b/lib/asan/tests/asan_test.cc
@@ -300,6 +300,7 @@
   }
 }
 
+#if !GTEST_USES_SIMPLE_RE
 TEST(AddressSanitizer, HugeMallocTest) {
   if (SANITIZER_WORDSIZE != 64 || ASAN_AVOID_EXPENSIVE_TESTS) return;
   size_t n_megs = 4100;
@@ -307,6 +308,7 @@
                "is located 1 bytes to the left|"
                "AddressSanitizer failed to allocate");
 }
+#endif
 
 #if SANITIZER_TEST_HAS_MEMALIGN
 void MemalignRun(size_t align, size_t size, int idx) {
@@ -595,9 +597,8 @@
 }
 
 #if !defined(__ANDROID__) && !defined(__arm__) && \
-    !defined(__powerpc64__) && !defined(__powerpc__) && \
     !defined(__aarch64__) && !defined(__mips__) && \
-    !defined(__mips64)
+    !defined(__mips64) && !defined(__s390__)
 NOINLINE void BuiltinLongJmpFunc1(jmp_buf buf) {
   // create three red zones for these two stack objects.
   int a;
@@ -609,7 +610,7 @@
   __builtin_longjmp((void**)buf, 1);
 }
 
-// Does not work on Power and ARM:
+// Does not work on ARM:
 // https://github.com/google/sanitizers/issues/185
 TEST(AddressSanitizer, BuiltinLongJmpTest) {
   static jmp_buf buf;
@@ -619,9 +620,9 @@
     TouchStackFunc();
   }
 }
-#endif  // !defined(__ANDROID__) && !defined(__powerpc64__) &&
-        // !defined(__powerpc__) && !defined(__arm__) &&
-        // !defined(__mips__) && !defined(__mips64)
+#endif  // !defined(__ANDROID__) && !defined(__arm__) &&
+        // !defined(__aarch64__) && !defined(__mips__)
+        // !defined(__mips64) && !defined(__s390__)
 
 TEST(AddressSanitizer, UnderscopeLongJmpTest) {
   static jmp_buf buf;
@@ -809,9 +810,6 @@
   free(s);
 }
 
-// TODO(samsonov): Add a test with malloc(0)
-// TODO(samsonov): Add tests for str* and mem* functions.
-
 NOINLINE static int LargeFunction(bool do_bad_access) {
   int *x = new int[100];
   x[0]++;
@@ -941,6 +939,8 @@
 #else
 # if defined(__powerpc64__)
   char *addr = (char*)0x024000800000;
+# elif defined(__s390x__)
+  char *addr = (char*)0x11000000000000;
 # else
   char *addr = (char*)0x0000100000080000;
 # endif
@@ -1166,15 +1166,21 @@
   return string("AddressSanitizer: alloc-dealloc-mismatch \\(") + str;
 }
 
+static string MismatchOrNewDeleteTypeStr(const string &mismatch_str) {
+  return "(" + MismatchStr(mismatch_str) +
+         ")|(AddressSanitizer: new-delete-type-mismatch)";
+}
+
 TEST(AddressSanitizer, AllocDeallocMismatch) {
   EXPECT_DEATH(free(Ident(new int)),
                MismatchStr("operator new vs free"));
   EXPECT_DEATH(free(Ident(new int[2])),
                MismatchStr("operator new \\[\\] vs free"));
-  EXPECT_DEATH(delete (Ident(new int[2])),
-               MismatchStr("operator new \\[\\] vs operator delete"));
-  EXPECT_DEATH(delete (Ident((int*)malloc(2 * sizeof(int)))),
-               MismatchStr("malloc vs operator delete"));
+  EXPECT_DEATH(
+      delete (Ident(new int[2])),
+      MismatchOrNewDeleteTypeStr("operator new \\[\\] vs operator delete"));
+  EXPECT_DEATH(delete (Ident((int *)malloc(2 * sizeof(int)))),
+               MismatchOrNewDeleteTypeStr("malloc vs operator delete"));
   EXPECT_DEATH(delete [] (Ident(new int)),
                MismatchStr("operator new vs operator delete \\[\\]"));
   EXPECT_DEATH(delete [] (Ident((int*)malloc(2 * sizeof(int)))),
diff --git a/lib/builtins/CMakeLists.txt b/lib/builtins/CMakeLists.txt
index 7ac4eea..9d2154b 100644
--- a/lib/builtins/CMakeLists.txt
+++ b/lib/builtins/CMakeLists.txt
@@ -2,9 +2,31 @@
 # generic implementations of the core runtime library along with optimized
 # architecture-specific code in various subdirectories.
 
+if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+  cmake_minimum_required(VERSION 3.4.3)
+
+  project(CompilerRTBuiltins C ASM)
+  set(COMPILER_RT_STANDALONE_BUILD TRUE)
+  set(COMPILER_RT_BUILTINS_STANDALONE_BUILD TRUE)
+  list(INSERT CMAKE_MODULE_PATH 0
+    "${CMAKE_SOURCE_DIR}/../../cmake"
+    "${CMAKE_SOURCE_DIR}/../../cmake/Modules")
+  include(base-config-ix)
+  include(CompilerRTUtils)
+
+  load_llvm_config()
+  construct_compiler_rt_default_triple()
+
+  if(APPLE)
+    include(CompilerRTDarwinUtils)
+  endif()
+  include(AddCompilerRT)
+endif()
+
+include(builtin-config-ix)
+
 # TODO: Need to add a mechanism for logging errors when builtin source files are
 # added to a sub-directory and not this CMakeLists file.
-
 set(GENERIC_SOURCES
   absvdi2.c
   absvsi2.c
@@ -20,8 +42,6 @@
   ashlti3.c
   ashrdi3.c
   ashrti3.c
-  # FIXME: atomic.c may only be compiled if host compiler understands _Atomic
-  # atomic.c
   clear_cache.c
   clzdi2.c
   clzsi2.c
@@ -30,6 +50,7 @@
   cmpti2.c
   comparedf2.c
   comparesf2.c
+  cpu_model.c
   ctzdi2.c
   ctzsi2.c
   ctzti2.c
@@ -143,6 +164,21 @@
   umodsi3.c
   umodti3.c)
 
+if(COMPILER_RT_HAS_ATOMIC_KEYWORD)
+  set(GENERIC_SOURCES
+    ${GENERIC_SOURCES}
+    atomic.c)
+endif()
+
+set(MSVC_SOURCES
+ divsc3.c
+ divdc3.c
+ divxc3.c
+ mulsc3.c
+ muldc3.c
+ mulxc3.c)
+
+
 if(APPLE)
   set(GENERIC_SOURCES
     ${GENERIC_SOURCES}
@@ -216,80 +252,27 @@
       ${i386_SOURCES})
 else () # MSVC
   # Use C versions of functions when building on MSVC
-  # MSVC's assembler takes Intel syntax, not AT&T syntax
+  # MSVC's assembler takes Intel syntax, not AT&T syntax.
+  # Also use only MSVC compilable builtin implementations.
   set(x86_64_SOURCES
       x86_64/floatdidf.c
       x86_64/floatdisf.c
       x86_64/floatdixf.c
-      ${GENERIC_SOURCES})
+      ${MSVC_SOURCES})
   set(x86_64h_SOURCES ${x86_64_SOURCES})
-  set(i386_SOURCES ${GENERIC_SOURCES})
+  set(i386_SOURCES ${MSVC_SOURCES})
   set(i686_SOURCES ${i386_SOURCES})
 endif () # if (NOT MSVC)
 
 set(arm_SOURCES
-  arm/adddf3vfp.S
-  arm/addsf3vfp.S
-  arm/aeabi_cdcmp.S
-  arm/aeabi_cdcmpeq_check_nan.c
-  arm/aeabi_cfcmp.S
-  arm/aeabi_cfcmpeq_check_nan.c
-  arm/aeabi_dcmp.S
-  arm/aeabi_div0.c
-  arm/aeabi_drsub.c
-  arm/aeabi_fcmp.S
-  arm/aeabi_frsub.c
-  arm/aeabi_idivmod.S
-  arm/aeabi_ldivmod.S
-  arm/aeabi_memcmp.S
-  arm/aeabi_memcpy.S
-  arm/aeabi_memmove.S
-  arm/aeabi_memset.S
-  arm/aeabi_uidivmod.S
-  arm/aeabi_uldivmod.S
   arm/bswapdi2.S
   arm/bswapsi2.S
   arm/clzdi2.S
   arm/clzsi2.S
   arm/comparesf2.S
-  arm/divdf3vfp.S
   arm/divmodsi4.S
-  arm/divsf3vfp.S
   arm/divsi3.S
-  arm/eqdf2vfp.S
-  arm/eqsf2vfp.S
-  arm/extendsfdf2vfp.S
-  arm/fixdfsivfp.S
-  arm/fixsfsivfp.S
-  arm/fixunsdfsivfp.S
-  arm/fixunssfsivfp.S
-  arm/floatsidfvfp.S
-  arm/floatsisfvfp.S
-  arm/floatunssidfvfp.S
-  arm/floatunssisfvfp.S
-  arm/gedf2vfp.S
-  arm/gesf2vfp.S
-  arm/gtdf2vfp.S
-  arm/gtsf2vfp.S
-  arm/ledf2vfp.S
-  arm/lesf2vfp.S
-  arm/ltdf2vfp.S
-  arm/ltsf2vfp.S
   arm/modsi3.S
-  arm/muldf3vfp.S
-  arm/mulsf3vfp.S
-  arm/nedf2vfp.S
-  arm/negdf2vfp.S
-  arm/negsf2vfp.S
-  arm/nesf2vfp.S
-  arm/restore_vfp_d8_d15_regs.S
-  arm/save_vfp_d8_d15_regs.S
-  arm/subdf3vfp.S
-  arm/subsf3vfp.S
-  arm/switch16.S
-  arm/switch32.S
-  arm/switch8.S
-  arm/switchu8.S
   arm/sync_fetch_and_add_4.S
   arm/sync_fetch_and_add_8.S
   arm/sync_fetch_and_and_4.S
@@ -310,15 +293,88 @@
   arm/sync_fetch_and_umin_8.S
   arm/sync_fetch_and_xor_4.S
   arm/sync_fetch_and_xor_8.S
-  arm/sync_synchronize.S
-  arm/truncdfsf2vfp.S
   arm/udivmodsi4.S
   arm/udivsi3.S
   arm/umodsi3.S
-  arm/unorddf2vfp.S
-  arm/unordsf2vfp.S
   ${GENERIC_SOURCES})
 
+set(arm_EABI_SOURCES
+  arm/aeabi_cdcmp.S
+  arm/aeabi_cdcmpeq_check_nan.c
+  arm/aeabi_cfcmp.S
+  arm/aeabi_cfcmpeq_check_nan.c
+  arm/aeabi_dcmp.S
+  arm/aeabi_div0.c
+  arm/aeabi_drsub.c
+  arm/aeabi_fcmp.S
+  arm/aeabi_frsub.c
+  arm/aeabi_idivmod.S
+  arm/aeabi_ldivmod.S
+  arm/aeabi_memcmp.S
+  arm/aeabi_memcpy.S
+  arm/aeabi_memmove.S
+  arm/aeabi_memset.S
+  arm/aeabi_uidivmod.S
+  arm/aeabi_uldivmod.S)
+set(arm_Thumb1_JT_SOURCES
+  arm/switch16.S
+  arm/switch32.S
+  arm/switch8.S
+  arm/switchu8.S)
+set(arm_Thumb1_SjLj_EH_SOURCES
+  arm/restore_vfp_d8_d15_regs.S
+  arm/save_vfp_d8_d15_regs.S)
+set(arm_Thumb1_VFPv2_SOURCES
+  arm/adddf3vfp.S
+  arm/addsf3vfp.S
+  arm/divdf3vfp.S
+  arm/divsf3vfp.S
+  arm/eqdf2vfp.S
+  arm/eqsf2vfp.S
+  arm/extendsfdf2vfp.S
+  arm/fixdfsivfp.S
+  arm/fixsfsivfp.S
+  arm/fixunsdfsivfp.S
+  arm/fixunssfsivfp.S
+  arm/floatsidfvfp.S
+  arm/floatsisfvfp.S
+  arm/floatunssidfvfp.S
+  arm/floatunssisfvfp.S
+  arm/gedf2vfp.S
+  arm/gesf2vfp.S
+  arm/gtdf2vfp.S
+  arm/gtsf2vfp.S
+  arm/ledf2vfp.S
+  arm/lesf2vfp.S
+  arm/ltdf2vfp.S
+  arm/ltsf2vfp.S
+  arm/muldf3vfp.S
+  arm/mulsf3vfp.S
+  arm/nedf2vfp.S
+  arm/negdf2vfp.S
+  arm/negsf2vfp.S
+  arm/nesf2vfp.S
+  arm/subdf3vfp.S
+  arm/subsf3vfp.S
+  arm/truncdfsf2vfp.S
+  arm/unorddf2vfp.S
+  arm/unordsf2vfp.S)
+set(arm_Thumb1_icache_SOURCES
+  arm/sync_synchronize.S)
+set(arm_Thumb1_SOURCES
+  ${arm_Thumb1_JT_SOURCES}
+  ${arm_Thumb1_SjLj_EH_SOURCES}
+  ${arm_Thumb1_VFPv2_SOURCES}
+  ${arm_Thumb1_icache_SOURCES})
+
+if(NOT WIN32)
+  # TODO the EABI sources should only be added to EABI targets
+  set(arm_SOURCES
+    ${arm_SOURCES}
+    ${arm_EABI_SOURCES}
+    ${arm_Thumb1_SOURCES})
+endif()
+
 set(aarch64_SOURCES
   comparetf2.c
   extenddftf2.c
@@ -341,6 +397,7 @@
 set(armhf_SOURCES ${arm_SOURCES})
 set(armv7_SOURCES ${arm_SOURCES})
 set(armv7s_SOURCES ${arm_SOURCES})
+set(armv7k_SOURCES ${arm_SOURCES})
 set(arm64_SOURCES ${aarch64_SOURCES})
 
 # macho_embedded archs
@@ -357,13 +414,14 @@
 set(wasm64_SOURCES ${GENERIC_SOURCES})
 
 add_custom_target(builtins)
+set_target_properties(builtins PROPERTIES FOLDER "Compiler-RT Misc")
 
 if (APPLE)
   add_subdirectory(Darwin-excludes)
   add_subdirectory(macho_embedded)
   darwin_add_builtin_libraries(${BUILTIN_SUPPORTED_OS})
-elseif (NOT WIN32 OR MINGW)
-  append_string_if(COMPILER_RT_HAS_STD_C99_FLAG -std=c99 maybe_stdc99)
+else ()
+  append_string_if(COMPILER_RT_HAS_STD_C99_FLAG -std=gnu99 maybe_stdc99)
 
   foreach (arch ${BUILTIN_SUPPORTED_ARCH})
     if (CAN_TARGET_${arch})
diff --git a/lib/builtins/Darwin-excludes/10.4-x86_64.txt b/lib/builtins/Darwin-excludes/10.4-x86_64.txt
deleted file mode 100644
index f2ee7fe..0000000
--- a/lib/builtins/Darwin-excludes/10.4-x86_64.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-absvti2
-addvti3
-ashlti3
-ashrti3
-clzti2
-cmpti2
-ctzti2
-divti3
-ffsti2
-fixdfti
-fixsfti
-fixunsdfti
-fixunssfti
-fixunsxfti
-fixxfti
-floattidf
-floattisf
-floattixf
-floatuntidf
-floatuntisf
-floatuntixf
-lshrti3
-modti3
-muloti4
-multi3
-mulvti3
-negti2
-negvti2
-parityti2
-popcountti2
-subvti3
-ucmpti2
-udivmodti4
-udivti3
-umodti3
diff --git a/lib/builtins/Darwin-excludes/10.4.txt b/lib/builtins/Darwin-excludes/10.4.txt
index 70d3644..603c0b3 100644
--- a/lib/builtins/Darwin-excludes/10.4.txt
+++ b/lib/builtins/Darwin-excludes/10.4.txt
@@ -1,18 +1,34 @@
-apple_versioning
 absvdi2
 absvsi2
+absvti2
 adddf3
 addsf3
+addtf3
 addvdi3
 addvsi3
+addvti3
+apple_versioning
 ashldi3
+ashlti3
 ashrdi3
+ashrti3
+atomic_flag_clear
+atomic_flag_clear_explicit
+atomic_flag_test_and_set
+atomic_flag_test_and_set_explicit
+atomic_signal_fence
+atomic_thread_fence
 clear_cache
 clzdi2
 clzsi2
+clzti2
 cmpdi2
+cmpti2
+comparedf2
+comparesf2
 ctzdi2
 ctzsi2
+ctzti2
 divdc3
 divdf3
 divdi3
@@ -21,76 +37,101 @@
 divsc3
 divsf3
 divsi3
+divtf3
+divti3
 divxc3
 enable_execute_stack
-comparedf2
-comparesf2
 extendhfsf2
 extendsfdf2
 ffsdi2
+ffsti2
 fixdfdi
 fixdfsi
+fixdfti
 fixsfdi
 fixsfsi
+fixsfti
 fixunsdfdi
 fixunsdfsi
+fixunsdfti
 fixunssfdi
 fixunssfsi
+fixunssfti
 fixunsxfdi
 fixunsxfsi
+fixunsxfti
 fixxfdi
+fixxfti
 floatdidf
 floatdisf
 floatdixf
 floatsidf
 floatsisf
+floattidf
+floattisf
+floattixf
 floatunsidf
 floatunsisf
+floatuntidf
+floatuntisf
+floatuntixf
 gcc_personality_v0
 gnu_f2h_ieee
 gnu_h2f_ieee
 lshrdi3
+lshrti3
 moddi3
 modsi3
+modti3
 muldc3
 muldf3
 muldi3
 mulodi4
 mulosi4
+muloti4
 mulsc3
 mulsf3
+multf3
+multi3
 mulvdi3
 mulvsi3
+mulvti3
 mulxc3
 negdf2
 negdi2
 negsf2
+negti2
 negvdi2
 negvsi2
+negvti2
 paritydi2
 paritysi2
+parityti2
 popcountdi2
 popcountsi2
+popcountti2
 powidf2
 powisf2
+powitf2
 powixf2
 subdf3
 subsf3
+subtf3
 subvdi3
 subvsi3
+subvti3
+trampoline_setup
 truncdfhf2
 truncdfsf2
 truncsfhf2
 ucmpdi2
+ucmpti2
 udivdi3
 udivmoddi4
 udivmodsi4
+udivmodti4
 udivsi3
+udivti3
 umoddi3
 umodsi3
-atomic_flag_clear
-atomic_flag_clear_explicit
-atomic_flag_test_and_set
-atomic_flag_test_and_set_explicit
-atomic_signal_fence
-atomic_thread_fence
\ No newline at end of file
+umodti3
diff --git a/lib/builtins/Darwin-excludes/osx-i386.txt b/lib/builtins/Darwin-excludes/osx-i386.txt
index 60c0e2d..f2ee7fe 100644
--- a/lib/builtins/Darwin-excludes/osx-i386.txt
+++ b/lib/builtins/Darwin-excludes/osx-i386.txt
@@ -1,5 +1,4 @@
 absvti2
-addtf3
 addvti3
 ashlti3
 ashrti3
@@ -7,7 +6,6 @@
 cmpti2
 ctzti2
 divti3
-divtf3
 ffsti2
 fixdfti
 fixsfti
@@ -25,57 +23,12 @@
 modti3
 muloti4
 multi3
-multf3
 mulvti3
 negti2
 negvti2
 parityti2
 popcountti2
-powitf2
 subvti3
-subtf3
-trampoline_setup
-ucmpti2
-udivmodti4
-udivti3
-umodti3
-absvti2
-addtf3
-addvti3
-ashlti3
-ashrti3
-clzti2
-cmpti2
-ctzti2
-divti3
-divtf3
-ffsti2
-fixdfti
-fixsfti
-fixunsdfti
-fixunssfti
-fixunsxfti
-fixxfti
-floattidf
-floattisf
-floattixf
-floatuntidf
-floatuntisf
-floatuntixf
-lshrti3
-modti3
-muloti4
-multi3
-multf3
-mulvti3
-negti2
-negvti2
-parityti2
-popcountti2
-powitf2
-subvti3
-subtf3
-trampoline_setup
 ucmpti2
 udivmodti4
 udivti3
diff --git a/lib/builtins/Darwin-excludes/osx-x86_64.txt b/lib/builtins/Darwin-excludes/osx-x86_64.txt
deleted file mode 100644
index de1574e..0000000
--- a/lib/builtins/Darwin-excludes/osx-x86_64.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-addtf3
-divtf3
-multf3
-powitf2
-subtf3
-trampoline_setup
-addtf3
-divtf3
-multf3
-powitf2
-subtf3
-trampoline_setup
diff --git a/lib/builtins/Darwin-excludes/osx.txt b/lib/builtins/Darwin-excludes/osx.txt
index 5db2400..6f9d0a7 100644
--- a/lib/builtins/Darwin-excludes/osx.txt
+++ b/lib/builtins/Darwin-excludes/osx.txt
@@ -1 +1,7 @@
 apple_versioning
+addtf3
+divtf3
+multf3
+powitf2
+subtf3
+trampoline_setup
diff --git a/lib/builtins/arm/adddf3vfp.S b/lib/builtins/arm/adddf3vfp.S
index 2825ae9..f4c00a0 100644
--- a/lib/builtins/arm/adddf3vfp.S
+++ b/lib/builtins/arm/adddf3vfp.S
@@ -24,3 +24,6 @@
 	vmov	r0, r1, d6		// move result back to r0/r1 pair
 	bx	lr
 END_COMPILERRT_FUNCTION(__adddf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/addsf3vfp.S b/lib/builtins/arm/addsf3vfp.S
index bff5a7e..af40c1c 100644
--- a/lib/builtins/arm/addsf3vfp.S
+++ b/lib/builtins/arm/addsf3vfp.S
@@ -24,3 +24,6 @@
 	vmov	r0, s14		// move result back to r0
 	bx	lr
 END_COMPILERRT_FUNCTION(__addsf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/aeabi_cdcmp.S b/lib/builtins/arm/aeabi_cdcmp.S
index 036a6f5..8008f5f 100644
--- a/lib/builtins/arm/aeabi_cdcmp.S
+++ b/lib/builtins/arm/aeabi_cdcmp.S
@@ -94,3 +94,5 @@
         b __aeabi_cdcmple
 END_COMPILERRT_FUNCTION(__aeabi_cdrcmple)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/aeabi_cfcmp.S b/lib/builtins/arm/aeabi_cfcmp.S
index 43594e5..274baf7 100644
--- a/lib/builtins/arm/aeabi_cfcmp.S
+++ b/lib/builtins/arm/aeabi_cfcmp.S
@@ -89,3 +89,5 @@
         b __aeabi_cfcmple
 END_COMPILERRT_FUNCTION(__aeabi_cfrcmple)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/aeabi_dcmp.S b/lib/builtins/arm/aeabi_dcmp.S
index 310c35b..43e4392 100644
--- a/lib/builtins/arm/aeabi_dcmp.S
+++ b/lib/builtins/arm/aeabi_dcmp.S
@@ -38,3 +38,6 @@
 DEFINE_AEABI_DCMP(le)
 DEFINE_AEABI_DCMP(ge)
 DEFINE_AEABI_DCMP(gt)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/aeabi_fcmp.S b/lib/builtins/arm/aeabi_fcmp.S
index 55f49a2..0a1d92a 100644
--- a/lib/builtins/arm/aeabi_fcmp.S
+++ b/lib/builtins/arm/aeabi_fcmp.S
@@ -38,3 +38,6 @@
 DEFINE_AEABI_FCMP(le)
 DEFINE_AEABI_FCMP(ge)
 DEFINE_AEABI_FCMP(gt)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/aeabi_idivmod.S b/lib/builtins/arm/aeabi_idivmod.S
index 384add3..2fcad86 100644
--- a/lib/builtins/arm/aeabi_idivmod.S
+++ b/lib/builtins/arm/aeabi_idivmod.S
@@ -26,3 +26,6 @@
         add     sp, sp, #4
         pop     { pc }
 END_COMPILERRT_FUNCTION(__aeabi_idivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/aeabi_ldivmod.S b/lib/builtins/arm/aeabi_ldivmod.S
index ad06f1d..9f161f3 100644
--- a/lib/builtins/arm/aeabi_ldivmod.S
+++ b/lib/builtins/arm/aeabi_ldivmod.S
@@ -29,3 +29,6 @@
         add     sp, sp, #16
         pop     {r11, pc}
 END_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/aeabi_memcmp.S b/lib/builtins/arm/aeabi_memcmp.S
index 051ce43..33ea548 100644
--- a/lib/builtins/arm/aeabi_memcmp.S
+++ b/lib/builtins/arm/aeabi_memcmp.S
@@ -11,6 +11,7 @@
 
 //  void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); }
 
+        .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp)
         b       memcmp
@@ -18,3 +19,6 @@
 
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp)
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/aeabi_memcpy.S b/lib/builtins/arm/aeabi_memcpy.S
index cf02332..eabfa49 100644
--- a/lib/builtins/arm/aeabi_memcpy.S
+++ b/lib/builtins/arm/aeabi_memcpy.S
@@ -11,6 +11,7 @@
 
 //  void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); }
 
+        .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy)
         b       memcpy
@@ -18,3 +19,6 @@
 
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy)
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/aeabi_memmove.S b/lib/builtins/arm/aeabi_memmove.S
index 4dda06f..1bf08c0 100644
--- a/lib/builtins/arm/aeabi_memmove.S
+++ b/lib/builtins/arm/aeabi_memmove.S
@@ -18,3 +18,6 @@
 
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove)
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/aeabi_memset.S b/lib/builtins/arm/aeabi_memset.S
index c8b49c7..48edd89 100644
--- a/lib/builtins/arm/aeabi_memset.S
+++ b/lib/builtins/arm/aeabi_memset.S
@@ -12,6 +12,7 @@
 //  void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); }
 //  void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); }
 
+        .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_memset)
         mov     r3, r1
@@ -32,3 +33,5 @@
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr)
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/aeabi_uidivmod.S b/lib/builtins/arm/aeabi_uidivmod.S
index 8ea474d..e1e12d9 100644
--- a/lib/builtins/arm/aeabi_uidivmod.S
+++ b/lib/builtins/arm/aeabi_uidivmod.S
@@ -27,3 +27,6 @@
         add     sp, sp, #4
         pop     { pc }
 END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/aeabi_uldivmod.S b/lib/builtins/arm/aeabi_uldivmod.S
index 4e1f8e2..e8aaef2 100644
--- a/lib/builtins/arm/aeabi_uldivmod.S
+++ b/lib/builtins/arm/aeabi_uldivmod.S
@@ -29,3 +29,6 @@
         add	sp, sp, #16
         pop	{r11, pc}
 END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/bswapdi2.S b/lib/builtins/arm/bswapdi2.S
index 86f3bba..fb226ce 100644
--- a/lib/builtins/arm/bswapdi2.S
+++ b/lib/builtins/arm/bswapdi2.S
@@ -45,3 +45,6 @@
     mov r1, r2  // r1 = r2 = rev(r0)
     JMP(lr)
 END_COMPILERRT_FUNCTION(__bswapdi2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/bswapsi2.S b/lib/builtins/arm/bswapsi2.S
index 59ba815..553c3c2 100644
--- a/lib/builtins/arm/bswapsi2.S
+++ b/lib/builtins/arm/bswapsi2.S
@@ -37,3 +37,6 @@
 #endif
     JMP(lr)
 END_COMPILERRT_FUNCTION(__bswapsi2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/clzdi2.S b/lib/builtins/arm/clzdi2.S
index a55abac..6068c17 100644
--- a/lib/builtins/arm/clzdi2.S
+++ b/lib/builtins/arm/clzdi2.S
@@ -95,3 +95,6 @@
 	JMP(lr)
 #endif // __ARM_FEATURE_CLZ
 END_COMPILERRT_FUNCTION(__clzdi2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/clzsi2.S b/lib/builtins/arm/clzsi2.S
index 1cd379b..c2ba3a8 100644
--- a/lib/builtins/arm/clzsi2.S
+++ b/lib/builtins/arm/clzsi2.S
@@ -74,3 +74,6 @@
 	JMP(lr)
 #endif // __ARM_FEATURE_CLZ
 END_COMPILERRT_FUNCTION(__clzsi2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/comparesf2.S b/lib/builtins/arm/comparesf2.S
index cf71d36..52597b6 100644
--- a/lib/builtins/arm/comparesf2.S
+++ b/lib/builtins/arm/comparesf2.S
@@ -146,3 +146,6 @@
 END_COMPILERRT_FUNCTION(__unordsf2)
 
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/divdf3vfp.S b/lib/builtins/arm/divdf3vfp.S
index 6eebef1..928f538 100644
--- a/lib/builtins/arm/divdf3vfp.S
+++ b/lib/builtins/arm/divdf3vfp.S
@@ -24,3 +24,6 @@
 	vmov	r0, r1, d5		// move result back to r0/r1 pair
 	bx	lr
 END_COMPILERRT_FUNCTION(__divdf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/divmodsi4.S b/lib/builtins/arm/divmodsi4.S
index 646b9ab..999c310 100644
--- a/lib/builtins/arm/divmodsi4.S
+++ b/lib/builtins/arm/divmodsi4.S
@@ -72,3 +72,6 @@
     CLEAR_FRAME_AND_RETURN
 #endif
 END_COMPILERRT_FUNCTION(__divmodsi4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/divsf3vfp.S b/lib/builtins/arm/divsf3vfp.S
index fdbaebc..a2e297f 100644
--- a/lib/builtins/arm/divsf3vfp.S
+++ b/lib/builtins/arm/divsf3vfp.S
@@ -24,3 +24,6 @@
 	vmov	r0, s13		// move result back to r0
 	bx	lr
 END_COMPILERRT_FUNCTION(__divsf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/divsi3.S b/lib/builtins/arm/divsi3.S
index adf8f94..7e23ba4 100644
--- a/lib/builtins/arm/divsi3.S
+++ b/lib/builtins/arm/divsi3.S
@@ -63,3 +63,6 @@
     CLEAR_FRAME_AND_RETURN
 #endif
 END_COMPILERRT_FUNCTION(__divsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/eqdf2vfp.S b/lib/builtins/arm/eqdf2vfp.S
index 7f2fbc3..95e6bb3 100644
--- a/lib/builtins/arm/eqdf2vfp.S
+++ b/lib/builtins/arm/eqdf2vfp.S
@@ -27,3 +27,6 @@
 	movne	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__eqdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/eqsf2vfp.S b/lib/builtins/arm/eqsf2vfp.S
index a318b33..fbac139 100644
--- a/lib/builtins/arm/eqsf2vfp.S
+++ b/lib/builtins/arm/eqsf2vfp.S
@@ -27,3 +27,6 @@
 	movne	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__eqsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/extendsfdf2vfp.S b/lib/builtins/arm/extendsfdf2vfp.S
index b998e58..563bf92 100644
--- a/lib/builtins/arm/extendsfdf2vfp.S
+++ b/lib/builtins/arm/extendsfdf2vfp.S
@@ -24,3 +24,6 @@
 	vmov	r0, r1, d7   // return result in r0/r1 pair
 	bx	lr
 END_COMPILERRT_FUNCTION(__extendsfdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/fixdfsivfp.S b/lib/builtins/arm/fixdfsivfp.S
index e3bd8e0..8263ff9 100644
--- a/lib/builtins/arm/fixdfsivfp.S
+++ b/lib/builtins/arm/fixdfsivfp.S
@@ -24,3 +24,6 @@
 	vmov	r0, s15	      // move s15 to result register
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixdfsivfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/fixsfsivfp.S b/lib/builtins/arm/fixsfsivfp.S
index 3d0d0f5..c7c3b81 100644
--- a/lib/builtins/arm/fixsfsivfp.S
+++ b/lib/builtins/arm/fixsfsivfp.S
@@ -24,3 +24,6 @@
 	vmov	r0, s15	       // move s15 to result register
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixsfsivfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/fixunsdfsivfp.S b/lib/builtins/arm/fixunsdfsivfp.S
index 35dda5b..9cc1e62 100644
--- a/lib/builtins/arm/fixunsdfsivfp.S
+++ b/lib/builtins/arm/fixunsdfsivfp.S
@@ -25,3 +25,6 @@
 	vmov	r0, s15	      // move s15 to result register
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixunsdfsivfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/fixunssfsivfp.S b/lib/builtins/arm/fixunssfsivfp.S
index 5c3a7d9..79d7082 100644
--- a/lib/builtins/arm/fixunssfsivfp.S
+++ b/lib/builtins/arm/fixunssfsivfp.S
@@ -25,3 +25,6 @@
 	vmov	r0, s15	       // move s15 to result register
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixunssfsivfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/floatsidfvfp.S b/lib/builtins/arm/floatsidfvfp.S
index d691849..7623f26 100644
--- a/lib/builtins/arm/floatsidfvfp.S
+++ b/lib/builtins/arm/floatsidfvfp.S
@@ -24,3 +24,6 @@
 	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatsidfvfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/floatsisfvfp.S b/lib/builtins/arm/floatsisfvfp.S
index 4a0cb39..c73dfac 100644
--- a/lib/builtins/arm/floatsisfvfp.S
+++ b/lib/builtins/arm/floatsisfvfp.S
@@ -24,3 +24,6 @@
 	vmov	r0, s15        // move s15 to result register
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatsisfvfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/floatunssidfvfp.S b/lib/builtins/arm/floatunssidfvfp.S
index d92969e..2a59fdb 100644
--- a/lib/builtins/arm/floatunssidfvfp.S
+++ b/lib/builtins/arm/floatunssidfvfp.S
@@ -24,3 +24,6 @@
 	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatunssidfvfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/floatunssisfvfp.S b/lib/builtins/arm/floatunssisfvfp.S
index f6aeba5..c096263 100644
--- a/lib/builtins/arm/floatunssisfvfp.S
+++ b/lib/builtins/arm/floatunssisfvfp.S
@@ -24,3 +24,6 @@
 	vmov	r0, s15        // move s15 to result register
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatunssisfvfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/gedf2vfp.S b/lib/builtins/arm/gedf2vfp.S
index 9e23527..72f13ef 100644
--- a/lib/builtins/arm/gedf2vfp.S
+++ b/lib/builtins/arm/gedf2vfp.S
@@ -27,3 +27,6 @@
 	movlt	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__gedf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/gesf2vfp.S b/lib/builtins/arm/gesf2vfp.S
index 0ff6084..c9ee52c 100644
--- a/lib/builtins/arm/gesf2vfp.S
+++ b/lib/builtins/arm/gesf2vfp.S
@@ -27,3 +27,6 @@
 	movlt	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__gesf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/gtdf2vfp.S b/lib/builtins/arm/gtdf2vfp.S
index 3dc5d5b..c7f2775 100644
--- a/lib/builtins/arm/gtdf2vfp.S
+++ b/lib/builtins/arm/gtdf2vfp.S
@@ -27,3 +27,6 @@
 	movle	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__gtdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/gtsf2vfp.S b/lib/builtins/arm/gtsf2vfp.S
index ddd843a..7d49e45 100644
--- a/lib/builtins/arm/gtsf2vfp.S
+++ b/lib/builtins/arm/gtsf2vfp.S
@@ -27,3 +27,6 @@
 	movle	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__gtsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/ledf2vfp.S b/lib/builtins/arm/ledf2vfp.S
index b06ff6d..ca5b553 100644
--- a/lib/builtins/arm/ledf2vfp.S
+++ b/lib/builtins/arm/ledf2vfp.S
@@ -27,3 +27,6 @@
 	movhi	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__ledf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/lesf2vfp.S b/lib/builtins/arm/lesf2vfp.S
index 9b33c0c..f25422e 100644
--- a/lib/builtins/arm/lesf2vfp.S
+++ b/lib/builtins/arm/lesf2vfp.S
@@ -27,3 +27,6 @@
 	movhi	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__lesf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/ltdf2vfp.S b/lib/builtins/arm/ltdf2vfp.S
index 9f794b0..6e2c099 100644
--- a/lib/builtins/arm/ltdf2vfp.S
+++ b/lib/builtins/arm/ltdf2vfp.S
@@ -27,3 +27,6 @@
 	movpl	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__ltdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/ltsf2vfp.S b/lib/builtins/arm/ltsf2vfp.S
index ba190d9..95febb6 100644
--- a/lib/builtins/arm/ltsf2vfp.S
+++ b/lib/builtins/arm/ltsf2vfp.S
@@ -27,3 +27,6 @@
 	movpl	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__ltsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/modsi3.S b/lib/builtins/arm/modsi3.S
index 295a227..1d302ed 100644
--- a/lib/builtins/arm/modsi3.S
+++ b/lib/builtins/arm/modsi3.S
@@ -61,3 +61,6 @@
     CLEAR_FRAME_AND_RETURN
 #endif
 END_COMPILERRT_FUNCTION(__modsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/muldf3vfp.S b/lib/builtins/arm/muldf3vfp.S
index 636cc71..f638de1 100644
--- a/lib/builtins/arm/muldf3vfp.S
+++ b/lib/builtins/arm/muldf3vfp.S
@@ -24,3 +24,6 @@
 	vmov 	r0, r1, d6         // move result back to r0/r1 pair
 	bx	lr
 END_COMPILERRT_FUNCTION(__muldf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/mulsf3vfp.S b/lib/builtins/arm/mulsf3vfp.S
index 7f40082..bef58d3 100644
--- a/lib/builtins/arm/mulsf3vfp.S
+++ b/lib/builtins/arm/mulsf3vfp.S
@@ -24,3 +24,6 @@
 	vmov	r0, s13		// move result back to r0
 	bx	lr
 END_COMPILERRT_FUNCTION(__mulsf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/nedf2vfp.S b/lib/builtins/arm/nedf2vfp.S
index 7ab2f55..78cf529 100644
--- a/lib/builtins/arm/nedf2vfp.S
+++ b/lib/builtins/arm/nedf2vfp.S
@@ -27,3 +27,6 @@
 	moveq	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__nedf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/negdf2vfp.S b/lib/builtins/arm/negdf2vfp.S
index 56d73c6..01c8ba6 100644
--- a/lib/builtins/arm/negdf2vfp.S
+++ b/lib/builtins/arm/negdf2vfp.S
@@ -21,3 +21,6 @@
 	eor	r1, r1, #-2147483648	// flip sign bit on double in r0/r1 pair
 	bx	lr
 END_COMPILERRT_FUNCTION(__negdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/negsf2vfp.S b/lib/builtins/arm/negsf2vfp.S
index a6e32e1..797abb3 100644
--- a/lib/builtins/arm/negsf2vfp.S
+++ b/lib/builtins/arm/negsf2vfp.S
@@ -21,3 +21,6 @@
 	eor	r0, r0, #-2147483648	// flip sign bit on float in r0
 	bx	lr
 END_COMPILERRT_FUNCTION(__negsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/nesf2vfp.S b/lib/builtins/arm/nesf2vfp.S
index 9fe8ecd..554d3e4 100644
--- a/lib/builtins/arm/nesf2vfp.S
+++ b/lib/builtins/arm/nesf2vfp.S
@@ -27,3 +27,6 @@
 	moveq	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__nesf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/restore_vfp_d8_d15_regs.S b/lib/builtins/arm/restore_vfp_d8_d15_regs.S
index 0f6ea51..0692cf3 100644
--- a/lib/builtins/arm/restore_vfp_d8_d15_regs.S
+++ b/lib/builtins/arm/restore_vfp_d8_d15_regs.S
@@ -31,3 +31,5 @@
 	bx      lr                      // return to prolog
 END_COMPILERRT_FUNCTION(__restore_vfp_d8_d15_regs)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/save_vfp_d8_d15_regs.S b/lib/builtins/arm/save_vfp_d8_d15_regs.S
index f1d90e7..544dd54 100644
--- a/lib/builtins/arm/save_vfp_d8_d15_regs.S
+++ b/lib/builtins/arm/save_vfp_d8_d15_regs.S
@@ -31,3 +31,5 @@
 	bx      lr                      // return to prolog
 END_COMPILERRT_FUNCTION(__save_vfp_d8_d15_regs)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/subdf3vfp.S b/lib/builtins/arm/subdf3vfp.S
index 5f3c0f7..1fc7d18 100644
--- a/lib/builtins/arm/subdf3vfp.S
+++ b/lib/builtins/arm/subdf3vfp.S
@@ -24,3 +24,6 @@
 	vmov 	r0, r1, d6         // move result back to r0/r1 pair
 	bx	lr
 END_COMPILERRT_FUNCTION(__subdf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/subsf3vfp.S b/lib/builtins/arm/subsf3vfp.S
index d6e06df..11fe386 100644
--- a/lib/builtins/arm/subsf3vfp.S
+++ b/lib/builtins/arm/subsf3vfp.S
@@ -25,3 +25,6 @@
 	vmov	r0, s14		// move result back to r0
 	bx	lr
 END_COMPILERRT_FUNCTION(__subsf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/switch16.S b/lib/builtins/arm/switch16.S
index 3c3a6b1..df9e38e 100644
--- a/lib/builtins/arm/switch16.S
+++ b/lib/builtins/arm/switch16.S
@@ -42,3 +42,5 @@
 	bx      ip                      // jump to computed label
 END_COMPILERRT_FUNCTION(__switch16)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/switch32.S b/lib/builtins/arm/switch32.S
index b38cd2b..d97b536 100644
--- a/lib/builtins/arm/switch32.S
+++ b/lib/builtins/arm/switch32.S
@@ -42,3 +42,5 @@
 	bx      ip                       // jump to computed label
 END_COMPILERRT_FUNCTION(__switch32)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/switch8.S b/lib/builtins/arm/switch8.S
index d7c2042..4d9e0ea 100644
--- a/lib/builtins/arm/switch8.S
+++ b/lib/builtins/arm/switch8.S
@@ -40,3 +40,5 @@
 	bx      ip                      // jump to computed label
 END_COMPILERRT_FUNCTION(__switch8)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/switchu8.S b/lib/builtins/arm/switchu8.S
index 1844f11..4ffe35f 100644
--- a/lib/builtins/arm/switchu8.S
+++ b/lib/builtins/arm/switchu8.S
@@ -40,3 +40,5 @@
 	bx      ip                      // jump to computed label
 END_COMPILERRT_FUNCTION(__switchu8)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_add_4.S b/lib/builtins/arm/sync_fetch_and_add_4.S
index 54c33e2..7877d6c 100644
--- a/lib/builtins/arm/sync_fetch_and_add_4.S
+++ b/lib/builtins/arm/sync_fetch_and_add_4.S
@@ -19,3 +19,5 @@
 
 SYNC_OP_4(add_4)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_add_8.S b/lib/builtins/arm/sync_fetch_and_add_8.S
index 5724bb1..1df07a3 100644
--- a/lib/builtins/arm/sync_fetch_and_add_8.S
+++ b/lib/builtins/arm/sync_fetch_and_add_8.S
@@ -22,3 +22,5 @@
 SYNC_OP_8(add_8)
 #endif
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_and_4.S b/lib/builtins/arm/sync_fetch_and_and_4.S
index e2b77a1..720ff02 100644
--- a/lib/builtins/arm/sync_fetch_and_and_4.S
+++ b/lib/builtins/arm/sync_fetch_and_and_4.S
@@ -17,3 +17,6 @@
 #define and_4(rD, rN, rM)  and rD, rN, rM
 
 SYNC_OP_4(and_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_and_8.S b/lib/builtins/arm/sync_fetch_and_and_8.S
index a74163a..4f7b5ca 100644
--- a/lib/builtins/arm/sync_fetch_and_and_8.S
+++ b/lib/builtins/arm/sync_fetch_and_and_8.S
@@ -21,3 +21,6 @@
 
 SYNC_OP_8(and_8)
 #endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_max_4.S b/lib/builtins/arm/sync_fetch_and_max_4.S
index 01e4f44..43da9c7 100644
--- a/lib/builtins/arm/sync_fetch_and_max_4.S
+++ b/lib/builtins/arm/sync_fetch_and_max_4.S
@@ -18,3 +18,5 @@
 
 SYNC_OP_4(max_4)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_max_8.S b/lib/builtins/arm/sync_fetch_and_max_8.S
index 1eef2b2..898fc62 100644
--- a/lib/builtins/arm/sync_fetch_and_max_8.S
+++ b/lib/builtins/arm/sync_fetch_and_max_8.S
@@ -19,3 +19,6 @@
 
 SYNC_OP_8(max_8)
 #endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_min_4.S b/lib/builtins/arm/sync_fetch_and_min_4.S
index 015626b..bba31a0 100644
--- a/lib/builtins/arm/sync_fetch_and_min_4.S
+++ b/lib/builtins/arm/sync_fetch_and_min_4.S
@@ -18,3 +18,5 @@
 
 SYNC_OP_4(min_4)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_min_8.S b/lib/builtins/arm/sync_fetch_and_min_8.S
index ad5cce0..e7ccf9f 100644
--- a/lib/builtins/arm/sync_fetch_and_min_8.S
+++ b/lib/builtins/arm/sync_fetch_and_min_8.S
@@ -19,3 +19,6 @@
 
 SYNC_OP_8(min_8)
 #endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_nand_4.S b/lib/builtins/arm/sync_fetch_and_nand_4.S
index b32a314..c13dd39 100644
--- a/lib/builtins/arm/sync_fetch_and_nand_4.S
+++ b/lib/builtins/arm/sync_fetch_and_nand_4.S
@@ -18,3 +18,5 @@
 
 SYNC_OP_4(nand_4)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_nand_8.S b/lib/builtins/arm/sync_fetch_and_nand_8.S
index a2c17c0..e8107ab 100644
--- a/lib/builtins/arm/sync_fetch_and_nand_8.S
+++ b/lib/builtins/arm/sync_fetch_and_nand_8.S
@@ -22,3 +22,5 @@
 SYNC_OP_8(nand_8)
 #endif
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_or_4.S b/lib/builtins/arm/sync_fetch_and_or_4.S
index f2e0857..6726571 100644
--- a/lib/builtins/arm/sync_fetch_and_or_4.S
+++ b/lib/builtins/arm/sync_fetch_and_or_4.S
@@ -18,3 +18,5 @@
 
 SYNC_OP_4(or_4)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_or_8.S b/lib/builtins/arm/sync_fetch_and_or_8.S
index 87b940b..f7f162c 100644
--- a/lib/builtins/arm/sync_fetch_and_or_8.S
+++ b/lib/builtins/arm/sync_fetch_and_or_8.S
@@ -22,3 +22,5 @@
 SYNC_OP_8(or_8)
 #endif
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_sub_4.S b/lib/builtins/arm/sync_fetch_and_sub_4.S
index 460b2bc..b9326b1 100644
--- a/lib/builtins/arm/sync_fetch_and_sub_4.S
+++ b/lib/builtins/arm/sync_fetch_and_sub_4.S
@@ -19,3 +19,5 @@
 
 SYNC_OP_4(sub_4)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_sub_8.S b/lib/builtins/arm/sync_fetch_and_sub_8.S
index a8035a2..6ce743e 100644
--- a/lib/builtins/arm/sync_fetch_and_sub_8.S
+++ b/lib/builtins/arm/sync_fetch_and_sub_8.S
@@ -22,3 +22,5 @@
 SYNC_OP_8(sub_8)
 #endif
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_umax_4.S b/lib/builtins/arm/sync_fetch_and_umax_4.S
index c591530..b8d19ff 100644
--- a/lib/builtins/arm/sync_fetch_and_umax_4.S
+++ b/lib/builtins/arm/sync_fetch_and_umax_4.S
@@ -18,3 +18,5 @@
 
 SYNC_OP_4(umax_4)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_umax_8.S b/lib/builtins/arm/sync_fetch_and_umax_8.S
index d9b7965..34442fd 100644
--- a/lib/builtins/arm/sync_fetch_and_umax_8.S
+++ b/lib/builtins/arm/sync_fetch_and_umax_8.S
@@ -19,3 +19,6 @@
 
 SYNC_OP_8(umax_8)
 #endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_umin_4.S b/lib/builtins/arm/sync_fetch_and_umin_4.S
index 9f3896f..0998e3e 100644
--- a/lib/builtins/arm/sync_fetch_and_umin_4.S
+++ b/lib/builtins/arm/sync_fetch_and_umin_4.S
@@ -18,3 +18,5 @@
 
 SYNC_OP_4(umin_4)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_umin_8.S b/lib/builtins/arm/sync_fetch_and_umin_8.S
index 7bf5e23..558f913 100644
--- a/lib/builtins/arm/sync_fetch_and_umin_8.S
+++ b/lib/builtins/arm/sync_fetch_and_umin_8.S
@@ -19,3 +19,6 @@
 
 SYNC_OP_8(umin_8)
 #endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_xor_4.S b/lib/builtins/arm/sync_fetch_and_xor_4.S
index 7e7c90c..824f491 100644
--- a/lib/builtins/arm/sync_fetch_and_xor_4.S
+++ b/lib/builtins/arm/sync_fetch_and_xor_4.S
@@ -18,3 +18,5 @@
 
 SYNC_OP_4(xor_4)
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_fetch_and_xor_8.S b/lib/builtins/arm/sync_fetch_and_xor_8.S
index ea9aa6d..073fb9c 100644
--- a/lib/builtins/arm/sync_fetch_and_xor_8.S
+++ b/lib/builtins/arm/sync_fetch_and_xor_8.S
@@ -22,3 +22,5 @@
 SYNC_OP_8(xor_8)
 #endif
 
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/sync_synchronize.S b/lib/builtins/arm/sync_synchronize.S
index 178f245..61d1db9 100644
--- a/lib/builtins/arm/sync_synchronize.S
+++ b/lib/builtins/arm/sync_synchronize.S
@@ -33,3 +33,6 @@
 	.subsections_via_symbols
 		
 #endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/truncdfsf2vfp.S b/lib/builtins/arm/truncdfsf2vfp.S
index fa4362c..04287ad 100644
--- a/lib/builtins/arm/truncdfsf2vfp.S
+++ b/lib/builtins/arm/truncdfsf2vfp.S
@@ -24,3 +24,6 @@
 	vmov 	r0, s15      // return result in r0
 	bx	lr
 END_COMPILERRT_FUNCTION(__truncdfsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/udivmodsi4.S b/lib/builtins/arm/udivmodsi4.S
index 85b8493..1ad8ee3 100644
--- a/lib/builtins/arm/udivmodsi4.S
+++ b/lib/builtins/arm/udivmodsi4.S
@@ -182,3 +182,6 @@
 #endif
 
 END_COMPILERRT_FUNCTION(__udivmodsi4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/udivsi3.S b/lib/builtins/arm/udivsi3.S
index 165b2b5..085f8fb 100644
--- a/lib/builtins/arm/udivsi3.S
+++ b/lib/builtins/arm/udivsi3.S
@@ -168,3 +168,6 @@
 #endif
 
 END_COMPILERRT_FUNCTION(__udivsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/umodsi3.S b/lib/builtins/arm/umodsi3.S
index 9e7a148..672487e 100644
--- a/lib/builtins/arm/umodsi3.S
+++ b/lib/builtins/arm/umodsi3.S
@@ -159,3 +159,6 @@
 #endif
 
 END_COMPILERRT_FUNCTION(__umodsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/unorddf2vfp.S b/lib/builtins/arm/unorddf2vfp.S
index c4bea2d..022dd7a 100644
--- a/lib/builtins/arm/unorddf2vfp.S
+++ b/lib/builtins/arm/unorddf2vfp.S
@@ -27,3 +27,6 @@
 	movvc	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__unorddf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/arm/unordsf2vfp.S b/lib/builtins/arm/unordsf2vfp.S
index 886e965..5ebdd3d 100644
--- a/lib/builtins/arm/unordsf2vfp.S
+++ b/lib/builtins/arm/unordsf2vfp.S
@@ -27,3 +27,6 @@
 	movvc	r0, #0
 	bx	lr
 END_COMPILERRT_FUNCTION(__unordsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/assembly.h b/lib/builtins/assembly.h
index c289705..5fc74f6 100644
--- a/lib/builtins/assembly.h
+++ b/lib/builtins/assembly.h
@@ -30,6 +30,8 @@
 #define SYMBOL_IS_FUNC(name)
 #define CONST_SECTION .const
 
+#define NO_EXEC_STACK_DIRECTIVE
+
 #elif defined(__ELF__)
 
 #define HIDDEN(name) .hidden name
@@ -42,6 +44,12 @@
 #endif
 #define CONST_SECTION .section .rodata
 
+#if defined(__GNU__) || defined(__ANDROID__) || defined(__FreeBSD__)
+#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
+#else
+#define NO_EXEC_STACK_DIRECTIVE
+#endif
+
 #else // !__APPLE__ && !__ELF__
 
 #define HIDDEN(name)
@@ -54,6 +62,8 @@
   .endef
 #define CONST_SECTION .section .rdata,"rd"
 
+#define NO_EXEC_STACK_DIRECTIVE
+
 #endif
 
 #if defined(__arm__)
diff --git a/lib/builtins/clear_cache.c b/lib/builtins/clear_cache.c
index ede7659..55bbdd3 100644
--- a/lib/builtins/clear_cache.c
+++ b/lib/builtins/clear_cache.c
@@ -14,6 +14,15 @@
 #if __APPLE__
   #include <libkern/OSCacheControl.h>
 #endif
+
+#if defined(_WIN32)
+/* Forward declare Win32 APIs since the GCC mode driver does not handle the
+   newer SDKs as well as needed.  */
+uint32_t FlushInstructionCache(uintptr_t hProcess, void *lpBaseAddress,
+                               uintptr_t dwSize);
+uintptr_t GetCurrentProcess(void);
+#endif
+
 #if (defined(__FreeBSD__) || defined(__Bitrig__)) && defined(__arm__)
   #include <sys/types.h>
   #include <machine/sysarch.h>
@@ -73,7 +82,7 @@
   #endif
 #endif
 
-#if defined(__ANDROID__) && defined(__arm__)
+#if defined(__linux__) && defined(__arm__)
   #include <asm/unistd.h>
 #endif
 
@@ -98,16 +107,18 @@
         arg.len = (uintptr_t)end - (uintptr_t)start;
 
         sysarch(ARM_SYNC_ICACHE, &arg);
-    #elif defined(__ANDROID__)
+    #elif defined(__linux__)
          register int start_reg __asm("r0") = (int) (intptr_t) start;
          const register int end_reg __asm("r1") = (int) (intptr_t) end;
-         const register int flags __asm("r2") = 0;
          const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
-        __asm __volatile("svc 0x0" : "=r"(start_reg)
-            : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), "r"(flags) : "r0");
+         __asm __volatile("svc 0x0"
+                          : "=r"(start_reg)
+                          : "r"(syscall_nr), "r"(start_reg), "r"(end_reg));
          if (start_reg != 0) {
              compilerrt_abort();
          }
+    #elif defined(_WIN32)
+        FlushInstructionCache(GetCurrentProcess(), start, end - start);
     #else
         compilerrt_abort();
     #endif
diff --git a/lib/builtins/cpu_model.c b/lib/builtins/cpu_model.c
new file mode 100644
index 0000000..9a37370
--- /dev/null
+++ b/lib/builtins/cpu_model.c
@@ -0,0 +1,797 @@
+//===-- cpu_model.c - Support for __cpu_model builtin  ------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file is based on LLVM's lib/Support/Host.cpp.
+//  It implements the operating system Host concept and builtin
+//  __cpu_model for the compiler_rt library, for x86 only.
+//
+//===----------------------------------------------------------------------===//
+
+#if (defined(__i386__) || defined(_M_IX86) || \
+     defined(__x86_64__) || defined(_M_X64)) && \
+    (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
+
+#include <assert.h>
+
+#define bool int
+#define true 1
+#define false 0
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+enum VendorSignatures {
+  SIG_INTEL = 0x756e6547 /* Genu */,
+  SIG_AMD = 0x68747541 /* Auth */
+};
+
+enum ProcessorVendors {
+  VENDOR_INTEL = 1,
+  VENDOR_AMD,
+  VENDOR_OTHER,
+  VENDOR_MAX
+};
+
+enum ProcessorTypes {
+  INTEL_ATOM = 1,
+  INTEL_CORE2,
+  INTEL_COREI7,
+  AMDFAM10H,
+  AMDFAM15H,
+  INTEL_i386,
+  INTEL_i486,
+  INTEL_PENTIUM,
+  INTEL_PENTIUM_PRO,
+  INTEL_PENTIUM_II,
+  INTEL_PENTIUM_III,
+  INTEL_PENTIUM_IV,
+  INTEL_PENTIUM_M,
+  INTEL_CORE_DUO,
+  INTEL_XEONPHI,
+  INTEL_X86_64,
+  INTEL_NOCONA,
+  INTEL_PRESCOTT,
+  AMD_i486,
+  AMDPENTIUM,
+  AMDATHLON,
+  AMDFAM14H,
+  AMDFAM16H,
+  CPU_TYPE_MAX
+};
+
+enum ProcessorSubtypes {
+  INTEL_COREI7_NEHALEM = 1,
+  INTEL_COREI7_WESTMERE,
+  INTEL_COREI7_SANDYBRIDGE,
+  AMDFAM10H_BARCELONA,
+  AMDFAM10H_SHANGHAI,
+  AMDFAM10H_ISTANBUL,
+  AMDFAM15H_BDVER1,
+  AMDFAM15H_BDVER2,
+  INTEL_PENTIUM_MMX,
+  INTEL_CORE2_65,
+  INTEL_CORE2_45,
+  INTEL_COREI7_IVYBRIDGE,
+  INTEL_COREI7_HASWELL,
+  INTEL_COREI7_BROADWELL,
+  INTEL_COREI7_SKYLAKE,
+  INTEL_COREI7_SKYLAKE_AVX512,
+  INTEL_ATOM_BONNELL,
+  INTEL_ATOM_SILVERMONT,
+  INTEL_KNIGHTS_LANDING,
+  AMDPENTIUM_K6,
+  AMDPENTIUM_K62,
+  AMDPENTIUM_K63,
+  AMDPENTIUM_GEODE,
+  AMDATHLON_TBIRD,
+  AMDATHLON_MP,
+  AMDATHLON_XP,
+  AMDATHLON_K8SSE3,
+  AMDATHLON_OPTERON,
+  AMDATHLON_FX,
+  AMDATHLON_64,
+  AMD_BTVER1,
+  AMD_BTVER2,
+  AMDFAM15H_BDVER3,
+  AMDFAM15H_BDVER4,
+  CPU_SUBTYPE_MAX
+};
+
+enum ProcessorFeatures {
+  FEATURE_CMOV = 0,
+  FEATURE_MMX,
+  FEATURE_POPCNT,
+  FEATURE_SSE,
+  FEATURE_SSE2,
+  FEATURE_SSE3,
+  FEATURE_SSSE3,
+  FEATURE_SSE4_1,
+  FEATURE_SSE4_2,
+  FEATURE_AVX,
+  FEATURE_AVX2,
+  FEATURE_AVX512,
+  FEATURE_AVX512SAVE,
+  FEATURE_MOVBE,
+  FEATURE_ADX,
+  FEATURE_EM64T
+};
+
+// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
+// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
+// support. Consequently, for i386, the presence of CPUID is checked first
+// via the corresponding eflags bit.
+static bool isCpuIdSupported() {
+#if defined(__GNUC__) || defined(__clang__)
+#if defined(__i386__)
+  int __cpuid_supported;
+  __asm__("  pushfl\n"
+          "  popl   %%eax\n"
+          "  movl   %%eax,%%ecx\n"
+          "  xorl   $0x00200000,%%eax\n"
+          "  pushl  %%eax\n"
+          "  popfl\n"
+          "  pushfl\n"
+          "  popl   %%eax\n"
+          "  movl   $0,%0\n"
+          "  cmpl   %%eax,%%ecx\n"
+          "  je     1f\n"
+          "  movl   $1,%0\n"
+          "1:"
+          : "=r"(__cpuid_supported)
+          :
+          : "eax", "ecx");
+  if (!__cpuid_supported)
+    return false;
+#endif
+  return true;
+#endif
+  return true;
+}
+
+// This code is copied from lib/Support/Host.cpp.
+// Changes to either file should be mirrored in the other.
+
+/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+/// the specified arguments.  If we can't run cpuid on the host, return true.
+static void getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+                               unsigned *rECX, unsigned *rEDX) {
+#if defined(__GNUC__) || defined(__clang__)
+#if defined(__x86_64__)
+  // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+  __asm__("movq\t%%rbx, %%rsi\n\t"
+          "cpuid\n\t"
+          "xchgq\t%%rbx, %%rsi\n\t"
+          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+          : "a"(value));
+#elif defined(__i386__)
+  __asm__("movl\t%%ebx, %%esi\n\t"
+          "cpuid\n\t"
+          "xchgl\t%%ebx, %%esi\n\t"
+          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+          : "a"(value));
+// pedantic #else returns to appease -Wunreachable-code (so we don't generate
+// postprocessed code that looks like "return true; return false;")
+#else
+  assert(0 && "This method is defined only for x86.");
+#endif
+#elif defined(_MSC_VER)
+  // The MSVC intrinsic is portable across x86 and x64.
+  int registers[4];
+  __cpuid(registers, value);
+  *rEAX = registers[0];
+  *rEBX = registers[1];
+  *rECX = registers[2];
+  *rEDX = registers[3];
+#else
+  assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
+#endif
+}
+
+/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
+/// the 4 values in the specified arguments.  If we can't run cpuid on the host,
+/// return true.
+static void getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
+                                 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
+                                 unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_X64)
+#if defined(__GNUC__) || defined(__clang__)
+  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
+  // FIXME: should we save this for Clang?
+  __asm__("movq\t%%rbx, %%rsi\n\t"
+          "cpuid\n\t"
+          "xchgq\t%%rbx, %%rsi\n\t"
+          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+          : "a"(value), "c"(subleaf));
+#elif defined(_MSC_VER)
+  int registers[4];
+  __cpuidex(registers, value, subleaf);
+  *rEAX = registers[0];
+  *rEBX = registers[1];
+  *rECX = registers[2];
+  *rEDX = registers[3];
+#else
+  assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
+#endif
+#elif defined(__i386__) || defined(_M_IX86)
+#if defined(__GNUC__) || defined(__clang__)
+  __asm__("movl\t%%ebx, %%esi\n\t"
+          "cpuid\n\t"
+          "xchgl\t%%ebx, %%esi\n\t"
+          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+          : "a"(value), "c"(subleaf));
+#elif defined(_MSC_VER)
+  __asm {
+      mov   eax,value
+      mov   ecx,subleaf
+      cpuid
+      mov   esi,rEAX
+      mov   dword ptr [esi],eax
+      mov   esi,rEBX
+      mov   dword ptr [esi],ebx
+      mov   esi,rECX
+      mov   dword ptr [esi],ecx
+      mov   esi,rEDX
+      mov   dword ptr [esi],edx
+  }
+#else
+  assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
+#endif
+#else
+  assert(0 && "This method is defined only for x86.");
+#endif
+}
+
+// Read control register 0 (XCR0). Used to detect features such as AVX.
+static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
+#if defined(__GNUC__) || defined(__clang__)
+  // Check xgetbv; this uses a .byte sequence instead of the instruction
+  // directly because older assemblers do not include support for xgetbv and
+  // there is no easy way to conditionally compile based on the assembler used.
+  __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
+  return false;
+#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+  unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+  *rEAX = Result;
+  *rEDX = Result >> 32;
+  return false;
+#else
+  return true;
+#endif
+}
+
+static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
+                                 unsigned *Model) {
+  *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+  *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
+  if (*Family == 6 || *Family == 0xf) {
+    if (*Family == 0xf)
+      // Examine extended family ID if family ID is F.
+      *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+    // Examine extended model ID if family ID is 6 or F.
+    *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+  }
+}
+
+static void getIntelProcessorTypeAndSubtype(unsigned int Family,
+                                            unsigned int Model,
+                                            unsigned int Brand_id,
+                                            unsigned int Features,
+                                            unsigned *Type, unsigned *Subtype) {
+  if (Brand_id != 0)
+    return;
+  switch (Family) {
+  case 3:
+    *Type = INTEL_i386;
+    break;
+  case 4:
+    switch (Model) {
+    case 0: // Intel486 DX processors
+    case 1: // Intel486 DX processors
+    case 2: // Intel486 SX processors
+    case 3: // Intel487 processors, IntelDX2 OverDrive processors,
+            // IntelDX2 processors
+    case 4: // Intel486 SL processor
+    case 5: // IntelSX2 processors
+    case 7: // Write-Back Enhanced IntelDX2 processors
+    case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
+    default:
+      *Type = INTEL_i486;
+      break;
+    }
+  case 5:
+    switch (Model) {
+    case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
+            // Pentium processors (60, 66)
+    case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
+            // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
+            // 150, 166, 200)
+    case 3: // Pentium OverDrive processors for Intel486 processor-based
+            // systems
+      *Type = INTEL_PENTIUM;
+      break;
+    case 4: // Pentium OverDrive processor with MMX technology for Pentium
+            // processor (75, 90, 100, 120, 133), Pentium processor with
+            // MMX technology (166, 200)
+      *Type = INTEL_PENTIUM;
+      *Subtype = INTEL_PENTIUM_MMX;
+      break;
+    default:
+      *Type = INTEL_PENTIUM;
+      break;
+    }
+  case 6:
+    switch (Model) {
+    case 0x01: // Pentium Pro processor
+      *Type = INTEL_PENTIUM_PRO;
+      break;
+    case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
+               // model 03
+    case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
+               // model 05, and Intel Celeron processor, model 05
+    case 0x06: // Celeron processor, model 06
+      *Type = INTEL_PENTIUM_II;
+      break;
+    case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
+               // processor, model 07
+    case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
+               // model 08, and Celeron processor, model 08
+    case 0x0a: // Pentium III Xeon processor, model 0Ah
+    case 0x0b: // Pentium III processor, model 0Bh
+      *Type = INTEL_PENTIUM_III;
+      break;
+    case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
+    case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
+               // 0Dh. All processors are manufactured using the 90 nm process.
+    case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
+               // Integrated Processor with Intel QuickAssist Technology
+      *Type = INTEL_PENTIUM_M;
+      break;
+    case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
+               // 0Eh. All processors are manufactured using the 65 nm process.
+      *Type = INTEL_CORE_DUO;
+      break;   // yonah
+    case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
+               // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
+               // mobile processor, Intel Core 2 Extreme processor, Intel
+               // Pentium Dual-Core processor, Intel Xeon processor, model
+               // 0Fh. All processors are manufactured using the 65 nm process.
+    case 0x16: // Intel Celeron processor model 16h. All processors are
+               // manufactured using the 65 nm process
+      *Type = INTEL_CORE2; // "core2"
+      *Subtype = INTEL_CORE2_65;
+      break;
+    case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
+               // 17h. All processors are manufactured using the 45 nm process.
+               //
+               // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+    case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
+               // the 45 nm process.
+      *Type = INTEL_CORE2; // "penryn"
+      *Subtype = INTEL_CORE2_45;
+      break;
+    case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
+               // processors are manufactured using the 45 nm process.
+    case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
+               // As found in a Summer 2010 model iMac.
+    case 0x1f:
+    case 0x2e:              // Nehalem EX
+      *Type = INTEL_COREI7; // "nehalem"
+      *Subtype = INTEL_COREI7_NEHALEM;
+      break;
+    case 0x25: // Intel Core i7, laptop version.
+    case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
+               // processors are manufactured using the 32 nm process.
+    case 0x2f: // Westmere EX
+      *Type = INTEL_COREI7; // "westmere"
+      *Subtype = INTEL_COREI7_WESTMERE;
+      break;
+    case 0x2a: // Intel Core i7 processor. All processors are manufactured
+               // using the 32 nm process.
+    case 0x2d:
+      *Type = INTEL_COREI7; //"sandybridge"
+      *Subtype = INTEL_COREI7_SANDYBRIDGE;
+      break;
+    case 0x3a:
+    case 0x3e:              // Ivy Bridge EP
+      *Type = INTEL_COREI7; // "ivybridge"
+      *Subtype = INTEL_COREI7_IVYBRIDGE;
+      break;
+
+    // Haswell:
+    case 0x3c:
+    case 0x3f:
+    case 0x45:
+    case 0x46:
+      *Type = INTEL_COREI7; // "haswell"
+      *Subtype = INTEL_COREI7_HASWELL;
+      break;
+
+    // Broadwell:
+    case 0x3d:
+    case 0x47:
+    case 0x4f:
+    case 0x56:
+      *Type = INTEL_COREI7; // "broadwell"
+      *Subtype = INTEL_COREI7_BROADWELL;
+      break;
+
+    // Skylake:
+    case 0x4e:
+      *Type = INTEL_COREI7; // "skylake-avx512"
+      *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
+      break;
+    case 0x5e:
+      *Type = INTEL_COREI7; // "skylake"
+      *Subtype = INTEL_COREI7_SKYLAKE;
+      break;
+
+    case 0x1c: // Most 45 nm Intel Atom processors
+    case 0x26: // 45 nm Atom Lincroft
+    case 0x27: // 32 nm Atom Medfield
+    case 0x35: // 32 nm Atom Midview
+    case 0x36: // 32 nm Atom Midview
+      *Type = INTEL_ATOM;
+      *Subtype = INTEL_ATOM_BONNELL;
+      break; // "bonnell"
+
+    // Atom Silvermont codes from the Intel software optimization guide.
+    case 0x37:
+    case 0x4a:
+    case 0x4d:
+    case 0x5a:
+    case 0x5d:
+    case 0x4c: // really airmont
+      *Type = INTEL_ATOM;
+      *Subtype = INTEL_ATOM_SILVERMONT;
+      break; // "silvermont"
+
+    case 0x57:
+      *Type = INTEL_XEONPHI; // knl
+      *Subtype = INTEL_KNIGHTS_LANDING;
+      break;
+
+    default: // Unknown family 6 CPU, try to guess.
+      if (Features & (1 << FEATURE_AVX512)) {
+        *Type = INTEL_XEONPHI; // knl
+        *Subtype = INTEL_KNIGHTS_LANDING;
+        break;
+      }
+      if (Features & (1 << FEATURE_ADX)) {
+        *Type = INTEL_COREI7;
+        *Subtype = INTEL_COREI7_BROADWELL;
+        break;
+      }
+      if (Features & (1 << FEATURE_AVX2)) {
+        *Type = INTEL_COREI7;
+        *Subtype = INTEL_COREI7_HASWELL;
+        break;
+      }
+      if (Features & (1 << FEATURE_AVX)) {
+        *Type = INTEL_COREI7;
+        *Subtype = INTEL_COREI7_SANDYBRIDGE;
+        break;
+      }
+      if (Features & (1 << FEATURE_SSE4_2)) {
+        if (Features & (1 << FEATURE_MOVBE)) {
+          *Type = INTEL_ATOM;
+          *Subtype = INTEL_ATOM_SILVERMONT;
+        } else {
+          *Type = INTEL_COREI7;
+          *Subtype = INTEL_COREI7_NEHALEM;
+        }
+        break;
+      }
+      if (Features & (1 << FEATURE_SSE4_1)) {
+        *Type = INTEL_CORE2; // "penryn"
+        *Subtype = INTEL_CORE2_45;
+        break;
+      }
+      if (Features & (1 << FEATURE_SSSE3)) {
+        if (Features & (1 << FEATURE_MOVBE)) {
+          *Type = INTEL_ATOM;
+          *Subtype = INTEL_ATOM_BONNELL; // "bonnell"
+        } else {
+          *Type = INTEL_CORE2; // "core2"
+          *Subtype = INTEL_CORE2_65;
+        }
+        break;
+      }
+      if (Features & (1 << FEATURE_EM64T)) {
+        *Type = INTEL_X86_64;
+        break; // x86-64
+      }
+      if (Features & (1 << FEATURE_SSE2)) {
+        *Type = INTEL_PENTIUM_M;
+        break;
+      }
+      if (Features & (1 << FEATURE_SSE)) {
+        *Type = INTEL_PENTIUM_III;
+        break;
+      }
+      if (Features & (1 << FEATURE_MMX)) {
+        *Type = INTEL_PENTIUM_II;
+        break;
+      }
+      *Type = INTEL_PENTIUM_PRO;
+      break;
+    }
+  case 15: {
+    switch (Model) {
+    case 0: // Pentium 4 processor, Intel Xeon processor. All processors are
+            // model 00h and manufactured using the 0.18 micron process.
+    case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
+            // processor MP, and Intel Celeron processor. All processors are
+            // model 01h and manufactured using the 0.18 micron process.
+    case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
+            // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
+            // processor, and Mobile Intel Celeron processor. All processors
+            // are model 02h and manufactured using the 0.13 micron process.
+      *Type =
+          ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
+      break;
+
+    case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
+            // processor. All processors are model 03h and manufactured using
+            // the 90 nm process.
+    case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
+            // Pentium D processor, Intel Xeon processor, Intel Xeon
+            // processor MP, Intel Celeron D processor. All processors are
+            // model 04h and manufactured using the 90 nm process.
+    case 6: // Pentium 4 processor, Pentium D processor, Pentium processor
+            // Extreme Edition, Intel Xeon processor, Intel Xeon processor
+            // MP, Intel Celeron D processor. All processors are model 06h
+            // and manufactured using the 65 nm process.
+      *Type =
+          ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT);
+      break;
+
+    default:
+      *Type =
+          ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
+      break;
+    }
+  }
+  default:
+    break; /*"generic"*/
+  }
+}
+
+static void getAMDProcessorTypeAndSubtype(unsigned int Family,
+                                          unsigned int Model,
+                                          unsigned int Features, unsigned *Type,
+                                          unsigned *Subtype) {
+  // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
+  // appears to be no way to generate the wide variety of AMD-specific targets
+  // from the information returned from CPUID.
+  switch (Family) {
+  case 4:
+    *Type = AMD_i486;
+  case 5:
+    *Type = AMDPENTIUM;
+    switch (Model) {
+    case 6:
+    case 7:
+      *Subtype = AMDPENTIUM_K6;
+      break; // "k6"
+    case 8:
+      *Subtype = AMDPENTIUM_K62;
+      break; // "k6-2"
+    case 9:
+    case 13:
+      *Subtype = AMDPENTIUM_K63;
+      break; // "k6-3"
+    case 10:
+      *Subtype = AMDPENTIUM_GEODE;
+      break; // "geode"
+    default:
+      break;
+    }
+  case 6:
+    *Type = AMDATHLON;
+    switch (Model) {
+    case 4:
+      *Subtype = AMDATHLON_TBIRD;
+      break; // "athlon-tbird"
+    case 6:
+    case 7:
+    case 8:
+      *Subtype = AMDATHLON_MP;
+      break; // "athlon-mp"
+    case 10:
+      *Subtype = AMDATHLON_XP;
+      break; // "athlon-xp"
+    default:
+      break;
+    }
+  case 15:
+    *Type = AMDATHLON;
+    if (Features & (1 << FEATURE_SSE3)) {
+      *Subtype = AMDATHLON_K8SSE3;
+      break; // "k8-sse3"
+    }
+    switch (Model) {
+    case 1:
+      *Subtype = AMDATHLON_OPTERON;
+      break; // "opteron"
+    case 5:
+      *Subtype = AMDATHLON_FX;
+      break; // "athlon-fx"; also opteron
+    default:
+      *Subtype = AMDATHLON_64;
+      break; // "athlon64"
+    }
+  case 16:
+    *Type = AMDFAM10H; // "amdfam10"
+    switch (Model) {
+    case 2:
+      *Subtype = AMDFAM10H_BARCELONA;
+      break;
+    case 4:
+      *Subtype = AMDFAM10H_SHANGHAI;
+      break;
+    case 8:
+      *Subtype = AMDFAM10H_ISTANBUL;
+      break;
+    default:
+      break;
+    }
+  case 20:
+    *Type = AMDFAM14H;
+    *Subtype = AMD_BTVER1;
+    break; // "btver1";
+  case 21:
+    *Type = AMDFAM15H;
+    if (!(Features &
+          (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback.
+      *Subtype = AMD_BTVER1;
+      break; // "btver1"
+    }
+    if (Model >= 0x50 && Model <= 0x6f) {
+      *Subtype = AMDFAM15H_BDVER4;
+      break; // "bdver4"; 50h-6Fh: Excavator
+    }
+    if (Model >= 0x30 && Model <= 0x3f) {
+      *Subtype = AMDFAM15H_BDVER3;
+      break; // "bdver3"; 30h-3Fh: Steamroller
+    }
+    if (Model >= 0x10 && Model <= 0x1f) {
+      *Subtype = AMDFAM15H_BDVER2;
+      break; // "bdver2"; 10h-1Fh: Piledriver
+    }
+    if (Model <= 0x0f) {
+      *Subtype = AMDFAM15H_BDVER1;
+      break; // "bdver1"; 00h-0Fh: Bulldozer
+    }
+    break;
+  case 22:
+    *Type = AMDFAM16H;
+    if (!(Features &
+          (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback.
+      *Subtype = AMD_BTVER1;
+      break; // "btver1";
+    }
+    *Subtype = AMD_BTVER2;
+    break; // "btver2"
+  default:
+    break; // "generic"
+  }
+}
+
+static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX,
+                                     unsigned MaxLeaf) {
+  unsigned Features = 0;
+  unsigned int EAX, EBX;
+  Features |= (((EDX >> 23) & 1) << FEATURE_MMX);
+  Features |= (((EDX >> 25) & 1) << FEATURE_SSE);
+  Features |= (((EDX >> 26) & 1) << FEATURE_SSE2);
+  Features |= (((ECX >> 0) & 1) << FEATURE_SSE3);
+  Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3);
+  Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1);
+  Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2);
+  Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE);
+
+  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
+  // indicates that the AVX registers will be saved and restored on context
+  // switch, then we have full AVX support.
+  const unsigned AVXBits = (1 << 27) | (1 << 28);
+  bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
+                ((EAX & 0x6) == 0x6);
+  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+  bool HasLeaf7 = MaxLeaf >= 0x7;
+  getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+  bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
+  bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
+  bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
+  Features |= (HasAVX << FEATURE_AVX);
+  Features |= (HasAVX2 << FEATURE_AVX2);
+  Features |= (HasAVX512 << FEATURE_AVX512);
+  Features |= (HasAVX512Save << FEATURE_AVX512SAVE);
+  Features |= (HasADX << FEATURE_ADX);
+
+  getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T);
+  return Features;
+}
+
+#ifdef HAVE_INIT_PRIORITY
+#define CONSTRUCTOR_PRIORITY (101)
+#else
+#define CONSTRUCTOR_PRIORITY
+#endif
+
+int __cpu_indicator_init(void)
+    __attribute__((constructor CONSTRUCTOR_PRIORITY));
+
+struct __processor_model {
+  unsigned int __cpu_vendor;
+  unsigned int __cpu_type;
+  unsigned int __cpu_subtype;
+  unsigned int __cpu_features[1];
+} __cpu_model = {0, 0, 0, {0}};
+
+/* A constructor function that is sets __cpu_model and __cpu_features with
+   the right values.  This needs to run only once.  This constructor is
+   given the highest priority and it should run before constructors without
+   the priority set.  However, it still runs after ifunc initializers and
+   needs to be called explicitly there.  */
+
+int __attribute__((constructor CONSTRUCTOR_PRIORITY))
+__cpu_indicator_init(void) {
+  unsigned int EAX, EBX, ECX, EDX;
+  unsigned int MaxLeaf = 5;
+  unsigned int Vendor;
+  unsigned int Model, Family, Brand_id;
+  unsigned int Features = 0;
+
+  /* This function needs to run just once.  */
+  if (__cpu_model.__cpu_vendor)
+    return 0;
+
+  if (!isCpuIdSupported())
+    return -1;
+
+  /* Assume cpuid insn present. Run in level 0 to get vendor id. */
+  getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX);
+
+  if (MaxLeaf < 1) {
+    __cpu_model.__cpu_vendor = VENDOR_OTHER;
+    return -1;
+  }
+  getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
+  detectX86FamilyModel(EAX, &Family, &Model);
+  Brand_id = EBX & 0xff;
+
+  /* Find available features. */
+  Features = getAvailableFeatures(ECX, EDX, MaxLeaf);
+  __cpu_model.__cpu_features[0] = Features;
+
+  if (Vendor == SIG_INTEL) {
+    /* Get CPU type.  */
+    getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
+                                    &(__cpu_model.__cpu_type),
+                                    &(__cpu_model.__cpu_subtype));
+    __cpu_model.__cpu_vendor = VENDOR_INTEL;
+  } else if (Vendor == SIG_AMD) {
+    /* Get CPU type.  */
+    getAMDProcessorTypeAndSubtype(Family, Model, Features,
+                                  &(__cpu_model.__cpu_type),
+                                  &(__cpu_model.__cpu_subtype));
+    __cpu_model.__cpu_vendor = VENDOR_AMD;
+  } else
+    __cpu_model.__cpu_vendor = VENDOR_OTHER;
+
+  assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
+  assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
+  assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
+
+  return 0;
+}
+
+#endif
diff --git a/lib/builtins/emutls.c b/lib/builtins/emutls.c
index 09e7956..eccbf53 100644
--- a/lib/builtins/emutls.c
+++ b/lib/builtins/emutls.c
@@ -27,9 +27,14 @@
  * If xyz has non-zero initial value, __emutls_v.xyz's "value"
  * will point to __emutls_t.xyz, which has the initial value.
  */
+typedef unsigned int gcc_word __attribute__((mode(word)));
 typedef struct __emutls_control {
-    size_t size;  /* size of the object in bytes */
-    size_t align;  /* alignment of the object in bytes */
+    /* Must use gcc_word here, instead of size_t, to match GCC.  When
+       gcc_word is larger than size_t, the upper extra bits are all
+       zeros.  We can use variables of size_t to operate on size and
+       align.  */
+    gcc_word size;  /* size of the object in bytes */
+    gcc_word align;  /* alignment of the object in bytes */
     union {
         uintptr_t index;  /* data[index-1] is the object address */
         void* address;  /* object address, when in single thread env */
@@ -67,21 +72,20 @@
 /* Emulated TLS objects are always allocated at run-time. */
 static __inline void *emutls_allocate_object(__emutls_control *control) {
     /* Use standard C types, check with gcc's emutls.o. */
-    typedef unsigned int gcc_word __attribute__((mode(word)));
     typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
-    COMPILE_TIME_ASSERT(sizeof(size_t) == sizeof(gcc_word));
     COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
     COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*));
 
     size_t size = control->size;
     size_t align = control->align;
+    void* base;
     if (align < sizeof(void*))
         align = sizeof(void*);
     /* Make sure that align is power of 2. */
     if ((align & (align - 1)) != 0)
         abort();
 
-    void* base = emutls_memalign_alloc(align, size);
+    base = emutls_memalign_alloc(align, size);
     if (control->value)
         memcpy(base, control->value, size);
     else
@@ -160,12 +164,14 @@
     emutls_address_array* array = pthread_getspecific(emutls_pthread_key);
     if (array == NULL) {
         uintptr_t new_size = emutls_new_data_array_size(index);
-        array = calloc(new_size + 1, sizeof(void*));
+        array = malloc(new_size * sizeof(void *) + sizeof(emutls_address_array));
+        if (array)
+            memset(array->data, 0, new_size * sizeof(void*));
         emutls_check_array_set_size(array, new_size);
     } else if (index > array->size) {
         uintptr_t orig_size = array->size;
         uintptr_t new_size = emutls_new_data_array_size(index);
-        array = realloc(array, (new_size + 1) * sizeof(void*));
+        array = realloc(array, new_size * sizeof(void *) + sizeof(emutls_address_array));
         if (array)
             memset(array->data + orig_size, 0,
                    (new_size - orig_size) * sizeof(void*));
diff --git a/lib/builtins/floatdidf.c b/lib/builtins/floatdidf.c
index a300c9f..2b023ad 100644
--- a/lib/builtins/floatdidf.c
+++ b/lib/builtins/floatdidf.c
@@ -16,7 +16,7 @@
 
 /* Returns: convert a to a double, rounding toward even. */
 
-/* Assumption: double is a IEEE 64 bit floating point type 
+/* Assumption: double is a IEEE 64 bit floating point type
  *             di_int is a 64 bit integral type
  */
 
@@ -32,16 +32,16 @@
 COMPILER_RT_ABI double
 __floatdidf(di_int a)
 {
-	static const double twop52 = 4503599627370496.0; // 0x1.0p52
-	static const double twop32 = 4294967296.0; // 0x1.0p32
-	
-	union { int64_t x; double d; } low = { .d = twop52 };
-	
-	const double high = (int32_t)(a >> 32) * twop32;
-	low.x |= a & INT64_C(0x00000000ffffffff);
-	
-	const double result = (high - twop52) + low.d;
-	return result;
+    static const double twop52 = 4503599627370496.0; // 0x1.0p52
+    static const double twop32 = 4294967296.0; // 0x1.0p32
+
+    union { int64_t x; double d; } low = { .d = twop52 };
+
+    const double high = (int32_t)(a >> 32) * twop32;
+    low.x |= a & INT64_C(0x00000000ffffffff);
+
+    const double result = (high - twop52) + low.d;
+    return result;
 }
 
 #else
@@ -98,10 +98,10 @@
         /* a is now rounded to DBL_MANT_DIG bits */
     }
     double_bits fb;
-    fb.u.high = ((su_int)s & 0x80000000) |        /* sign */
-                ((e + 1023) << 20)      |        /* exponent */
-                ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */
-    fb.u.low = (su_int)a;                         /* mantissa-low */
+    fb.u.s.high = ((su_int)s & 0x80000000) |        /* sign */
+                  ((e + 1023) << 20)       |        /* exponent */
+                  ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */
+    fb.u.s.low = (su_int)a;                         /* mantissa-low */
     return fb.f;
 }
 #endif
diff --git a/lib/builtins/floattidf.c b/lib/builtins/floattidf.c
index 6331ba5..2702a3c 100644
--- a/lib/builtins/floattidf.c
+++ b/lib/builtins/floattidf.c
@@ -10,7 +10,7 @@
  * This file implements __floattidf for the compiler_rt library.
  *
  * ===----------------------------------------------------------------------===
- */ 
+ */
 
 #include "int_lib.h"
 
@@ -18,11 +18,11 @@
 
 /* Returns: convert a to a double, rounding toward even.*/
 
-/* Assumption: double is a IEEE 64 bit floating point type 
+/* Assumption: double is a IEEE 64 bit floating point type
  *            ti_int is a 128 bit integral type
  */
 
-/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ 
+/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */
 
 COMPILER_RT_ABI double
 __floattidf(ti_int a)
diff --git a/lib/builtins/floatundidf.c b/lib/builtins/floatundidf.c
index 67aa86e..cfd3a7a 100644
--- a/lib/builtins/floatundidf.c
+++ b/lib/builtins/floatundidf.c
@@ -14,7 +14,7 @@
 
 /* Returns: convert a to a double, rounding toward even. */
 
-/* Assumption: double is a IEEE 64 bit floating point type 
+/* Assumption: double is a IEEE 64 bit floating point type
  *             du_int is a 64 bit integral type
  */
 
@@ -32,24 +32,24 @@
 COMPILER_RT_ABI double
 __floatundidf(du_int a)
 {
-	static const double twop52 = 4503599627370496.0; // 0x1.0p52
-	static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84
-	static const double twop84_plus_twop52 = 19342813118337666422669312.0; // 0x1.00000001p84
-	
-	union { uint64_t x; double d; } high = { .d = twop84 };
-	union { uint64_t x; double d; } low = { .d = twop52 };
-	
-	high.x |= a >> 32;
-	low.x |= a & UINT64_C(0x00000000ffffffff);
-	
-	const double result = (high.d - twop84_plus_twop52) + low.d;
-	return result;
+    static const double twop52 = 4503599627370496.0; // 0x1.0p52
+    static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84
+    static const double twop84_plus_twop52 = 19342813118337666422669312.0; // 0x1.00000001p84
+
+    union { uint64_t x; double d; } high = { .d = twop84 };
+    union { uint64_t x; double d; } low = { .d = twop52 };
+
+    high.x |= a >> 32;
+    low.x |= a & UINT64_C(0x00000000ffffffff);
+
+    const double result = (high.d - twop84_plus_twop52) + low.d;
+    return result;
 }
 
 #else
 /* Support for systems that don't have hardware floating-point; there are no flags to
  * set, and we don't want to code-gen to an unknown soft-float implementation.
- */ 
+ */
 
 COMPILER_RT_ABI double
 __floatundidf(du_int a)
@@ -98,9 +98,9 @@
         /* a is now rounded to DBL_MANT_DIG bits */
     }
     double_bits fb;
-    fb.u.high = ((e + 1023) << 20)      |        /* exponent */
-                ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */
-    fb.u.low = (su_int)a;                         /* mantissa-low  */
+    fb.u.s.high = ((e + 1023) << 20)       |        /* exponent */
+                  ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */
+    fb.u.s.low = (su_int)a;                         /* mantissa-low  */
     return fb.f;
 }
 #endif
diff --git a/lib/builtins/floatuntidf.c b/lib/builtins/floatuntidf.c
index 06202d9..960265d 100644
--- a/lib/builtins/floatuntidf.c
+++ b/lib/builtins/floatuntidf.c
@@ -18,7 +18,7 @@
 
 /* Returns: convert a to a double, rounding toward even. */
 
-/* Assumption: double is a IEEE 64 bit floating point type 
+/* Assumption: double is a IEEE 64 bit floating point type
  *             tu_int is a 128 bit integral type
  */
 
diff --git a/lib/builtins/gcc_personality_v0.c b/lib/builtins/gcc_personality_v0.c
index ed544d3..29e5be3 100644
--- a/lib/builtins/gcc_personality_v0.c
+++ b/lib/builtins/gcc_personality_v0.c
@@ -131,6 +131,26 @@
     return result;
 }
 
+#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) &&                 \
+    !defined(__ARM_DWARF_EH__)
+#define USING_ARM_EHABI 1
+_Unwind_Reason_Code __gnu_unwind_frame(struct _Unwind_Exception *,
+                                       struct _Unwind_Context *);
+#endif
+
+static inline _Unwind_Reason_Code
+continueUnwind(struct _Unwind_Exception *exceptionObject,
+               struct _Unwind_Context *context) {
+#if USING_ARM_EHABI
+    /*
+     * On ARM EHABI the personality routine is responsible for actually
+     * unwinding a single stack frame before returning (ARM EHABI Sec. 6.1).
+     */
+    if (__gnu_unwind_frame(exceptionObject, context) != _URC_OK)
+        return _URC_FAILURE;
+#endif
+    return _URC_CONTINUE_UNWIND;
+}
 
 /*
  * The C compiler makes references to __gcc_personality_v0 in
@@ -147,6 +167,11 @@
 __gcc_personality_sj0(int version, _Unwind_Action actions,
          uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject,
          struct _Unwind_Context *context)
+#elif USING_ARM_EHABI
+/* The ARM EHABI personality routine has a different signature. */
+COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0(
+         _Unwind_State state, struct _Unwind_Exception *exceptionObject,
+         struct _Unwind_Context *context)
 #else
 COMPILER_RT_ABI _Unwind_Reason_Code
 __gcc_personality_v0(int version, _Unwind_Action actions,
@@ -156,13 +181,19 @@
 {
     /* Since C does not have catch clauses, there is nothing to do during */
     /* phase 1 (the search phase). */
-    if ( actions & _UA_SEARCH_PHASE ) 
-        return _URC_CONTINUE_UNWIND;
-        
+#if USING_ARM_EHABI
+    /* After resuming from a cleanup we should also continue on to the next
+     * frame straight away. */
+    if ((state & _US_ACTION_MASK) != _US_UNWIND_FRAME_STARTING)
+#else
+    if ( actions & _UA_SEARCH_PHASE )
+#endif
+        return continueUnwind(exceptionObject, context);
+
     /* There is nothing to do if there is no LSDA for this frame. */
     const uint8_t* lsda = (uint8_t*)_Unwind_GetLanguageSpecificData(context);
     if ( lsda == (uint8_t*) 0 )
-        return _URC_CONTINUE_UNWIND;
+        return continueUnwind(exceptionObject, context);
 
     uintptr_t pc = _Unwind_GetIP(context)-1;
     uintptr_t funcStart = _Unwind_GetRegionStart(context);
@@ -205,6 +236,6 @@
     }
 
     /* No landing pad found, continue unwinding. */
-    return _URC_CONTINUE_UNWIND;
+    return continueUnwind(exceptionObject, context);
 }
 
diff --git a/lib/builtins/i386/ashldi3.S b/lib/builtins/i386/ashldi3.S
index 3fbd739..6f05dcf 100644
--- a/lib/builtins/i386/ashldi3.S
+++ b/lib/builtins/i386/ashldi3.S
@@ -56,3 +56,6 @@
 
 #endif // __SSE2__
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/ashrdi3.S b/lib/builtins/i386/ashrdi3.S
index 8f47424..206369f 100644
--- a/lib/builtins/i386/ashrdi3.S
+++ b/lib/builtins/i386/ashrdi3.S
@@ -67,3 +67,6 @@
 
 #endif // __SSE2__
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/divdi3.S b/lib/builtins/i386/divdi3.S
index 2cb0ddd..2fb4bdc 100644
--- a/lib/builtins/i386/divdi3.S
+++ b/lib/builtins/i386/divdi3.S
@@ -160,3 +160,6 @@
 END_COMPILERRT_FUNCTION(__divdi3)
 
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/floatdidf.S b/lib/builtins/i386/floatdidf.S
index dcc32f8..d75dfe6 100644
--- a/lib/builtins/i386/floatdidf.S
+++ b/lib/builtins/i386/floatdidf.S
@@ -37,3 +37,6 @@
 END_COMPILERRT_FUNCTION(__floatdidf)
 
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/floatdisf.S b/lib/builtins/i386/floatdisf.S
index f642767..0874eaa 100644
--- a/lib/builtins/i386/floatdisf.S
+++ b/lib/builtins/i386/floatdisf.S
@@ -30,3 +30,6 @@
 END_COMPILERRT_FUNCTION(__floatdisf)
 
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/floatdixf.S b/lib/builtins/i386/floatdixf.S
index 839b043..1044ef5 100644
--- a/lib/builtins/i386/floatdixf.S
+++ b/lib/builtins/i386/floatdixf.S
@@ -28,3 +28,6 @@
 END_COMPILERRT_FUNCTION(__floatdixf)
 
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/floatundidf.S b/lib/builtins/i386/floatundidf.S
index 8058c2a..fe03234 100644
--- a/lib/builtins/i386/floatundidf.S
+++ b/lib/builtins/i386/floatundidf.S
@@ -50,3 +50,6 @@
 END_COMPILERRT_FUNCTION(__floatundidf)
 
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/floatundisf.S b/lib/builtins/i386/floatundisf.S
index 94c97e2..16000b5 100644
--- a/lib/builtins/i386/floatundisf.S
+++ b/lib/builtins/i386/floatundisf.S
@@ -103,3 +103,6 @@
 END_COMPILERRT_FUNCTION(__floatundisf)
 
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/floatundixf.S b/lib/builtins/i386/floatundixf.S
index 814b52f..c935670 100644
--- a/lib/builtins/i386/floatundixf.S
+++ b/lib/builtins/i386/floatundixf.S
@@ -41,3 +41,6 @@
 END_COMPILERRT_FUNCTION(__floatundixf)
 
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/lshrdi3.S b/lib/builtins/i386/lshrdi3.S
index b80f11a..53e95cf 100644
--- a/lib/builtins/i386/lshrdi3.S
+++ b/lib/builtins/i386/lshrdi3.S
@@ -57,3 +57,6 @@
 
 #endif // __SSE2__
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/moddi3.S b/lib/builtins/i386/moddi3.S
index b9cee9d..a5bf9ce 100644
--- a/lib/builtins/i386/moddi3.S
+++ b/lib/builtins/i386/moddi3.S
@@ -164,3 +164,6 @@
 END_COMPILERRT_FUNCTION(__moddi3)
 
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/muldi3.S b/lib/builtins/i386/muldi3.S
index 15b6b49..1239460 100644
--- a/lib/builtins/i386/muldi3.S
+++ b/lib/builtins/i386/muldi3.S
@@ -28,3 +28,6 @@
 END_COMPILERRT_FUNCTION(__muldi3)
 
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/udivdi3.S b/lib/builtins/i386/udivdi3.S
index 41b2edf..7276136 100644
--- a/lib/builtins/i386/udivdi3.S
+++ b/lib/builtins/i386/udivdi3.S
@@ -113,3 +113,6 @@
 END_COMPILERRT_FUNCTION(__udivdi3)
 
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/i386/umoddi3.S b/lib/builtins/i386/umoddi3.S
index a190a7d..763e821 100644
--- a/lib/builtins/i386/umoddi3.S
+++ b/lib/builtins/i386/umoddi3.S
@@ -124,3 +124,6 @@
 END_COMPILERRT_FUNCTION(__umoddi3)
 
 #endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/int_lib.h b/lib/builtins/int_lib.h
index e66cda3..39eee18 100644
--- a/lib/builtins/int_lib.h
+++ b/lib/builtins/int_lib.h
@@ -35,11 +35,7 @@
 # define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
 #else
 # define ARM_EABI_FNALIAS(aeabi_name, name)
-# if defined(__arm__) && defined(_WIN32) && (!defined(_MSC_VER) || defined(__clang__))
-#   define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
-# else
-#   define COMPILER_RT_ABI
-# endif
+# define COMPILER_RT_ABI
 #endif
 
 #ifdef _MSC_VER
@@ -95,14 +91,14 @@
 #include <intrin.h>
 
 uint32_t __inline __builtin_ctz(uint32_t value) {
-  uint32_t trailing_zero = 0;
+  unsigned long trailing_zero = 0;
   if (_BitScanForward(&trailing_zero, value))
     return trailing_zero;
   return 32;
 }
 
 uint32_t __inline __builtin_clz(uint32_t value) {
-  uint32_t leading_zero = 0;
+  unsigned long leading_zero = 0;
   if (_BitScanReverse(&leading_zero, value))
     return 31 - leading_zero;
   return 32;
@@ -110,7 +106,7 @@
 
 #if defined(_M_ARM) || defined(_M_X64)
 uint32_t __inline __builtin_clzll(uint64_t value) {
-  uint32_t leading_zero = 0;
+  unsigned long leading_zero = 0;
   if (_BitScanReverse64(&leading_zero, value))
     return 63 - leading_zero;
   return 64;
diff --git a/lib/builtins/ppc/restFP.S b/lib/builtins/ppc/restFP.S
index 9503289..507e756 100644
--- a/lib/builtins/ppc/restFP.S
+++ b/lib/builtins/ppc/restFP.S
@@ -41,3 +41,6 @@
         lwz     r0,8(r1)
         mtlr	r0
         blr
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/ppc/saveFP.S b/lib/builtins/ppc/saveFP.S
index 72bd459..20b06ff 100644
--- a/lib/builtins/ppc/saveFP.S
+++ b/lib/builtins/ppc/saveFP.S
@@ -38,3 +38,6 @@
         stfd    f31,-8(r1)
         stw      r0,8(r1)
         blr
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/x86_64/floatundidf.S b/lib/builtins/x86_64/floatundidf.S
index 3cd5d02..094a68d 100644
--- a/lib/builtins/x86_64/floatundidf.S
+++ b/lib/builtins/x86_64/floatundidf.S
@@ -47,3 +47,6 @@
 END_COMPILERRT_FUNCTION(__floatundidf)
 
 #endif // __x86_64__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/x86_64/floatundisf.S b/lib/builtins/x86_64/floatundisf.S
index 61952f4..7c9f75e 100644
--- a/lib/builtins/x86_64/floatundisf.S
+++ b/lib/builtins/x86_64/floatundisf.S
@@ -33,3 +33,6 @@
 END_COMPILERRT_FUNCTION(__floatundisf)
 
 #endif // __x86_64__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/builtins/x86_64/floatundixf.S b/lib/builtins/x86_64/floatundixf.S
index 92961c8..28a096b 100644
--- a/lib/builtins/x86_64/floatundixf.S
+++ b/lib/builtins/x86_64/floatundixf.S
@@ -66,3 +66,6 @@
 #endif // __x86_64__
 
 */
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/cfi/CMakeLists.txt b/lib/cfi/CMakeLists.txt
index 24e5181..56ef882 100644
--- a/lib/cfi/CMakeLists.txt
+++ b/lib/cfi/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_custom_target(cfi)
+set_target_properties(cfi PROPERTIES FOLDER "Compiler-RT Misc")
 
 set(CFI_SOURCES cfi.cc)
 
@@ -30,11 +31,9 @@
                 RTSanitizerCommon
                 RTSanitizerCommonLibc
 		RTUbsan
-		RTUbsan_cxx
     CFLAGS ${CFI_CFLAGS} ${CFI_DIAG_CFLAGS}
     PARENT_TARGET cfi)
 endforeach()
 
-add_compiler_rt_resource_file(cfi_blacklist cfi_blacklist.txt)
-add_dependencies(cfi cfi_blacklist)
+add_compiler_rt_resource_file(cfi_blacklist cfi_blacklist.txt cfi)
 add_dependencies(compiler-rt cfi)
diff --git a/lib/cfi/cfi.cc b/lib/cfi/cfi.cc
index 711866f..ca2cf8f 100644
--- a/lib/cfi/cfi.cc
+++ b/lib/cfi/cfi.cc
@@ -11,16 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-// FIXME: Intercept dlopen/dlclose.
-// FIXME: Support diagnostic mode.
-// FIXME: Harden:
-//  * mprotect shadow, use mremap for updates
-//  * something else equally important
-
 #include <assert.h>
 #include <elf.h>
 #include <link.h>
 #include <string.h>
+#include <sys/mman.h>
 
 typedef ElfW(Phdr) Elf_Phdr;
 typedef ElfW(Ehdr) Elf_Ehdr;
@@ -31,19 +26,55 @@
 #include "ubsan/ubsan_init.h"
 #include "ubsan/ubsan_flags.h"
 
-static uptr __cfi_shadow;
+#ifdef CFI_ENABLE_DIAG
+#include "ubsan/ubsan_handlers.h"
+#endif
+
+namespace __cfi {
+
+#define kCfiShadowLimitsStorageSize 4096 // 1 page
+// Lets hope that the data segment is mapped with 4K pages.
+// The pointer to the cfi shadow region is stored at the start of this page.
+// The rest of the page is unused and re-mapped read-only.
+static union {
+  char space[kCfiShadowLimitsStorageSize];
+  struct {
+    uptr start;
+    uptr size;
+  } limits;
+} cfi_shadow_limits_storage
+    __attribute__((aligned(kCfiShadowLimitsStorageSize)));
 static constexpr uptr kShadowGranularity = 12;
 static constexpr uptr kShadowAlign = 1UL << kShadowGranularity; // 4096
 
 static constexpr uint16_t kInvalidShadow = 0;
 static constexpr uint16_t kUncheckedShadow = 0xFFFFU;
 
-static uint16_t *mem_to_shadow(uptr x) {
-  return (uint16_t *)(__cfi_shadow + ((x >> kShadowGranularity) << 1));
+// Get the start address of the CFI shadow region.
+uptr GetShadow() {
+  return cfi_shadow_limits_storage.limits.start;
 }
 
-typedef int (*CFICheckFn)(u64, void *);
+uptr GetShadowSize() {
+  return cfi_shadow_limits_storage.limits.size;
+}
 
+// This will only work while the shadow is not allocated.
+void SetShadowSize(uptr size) {
+  cfi_shadow_limits_storage.limits.size = size;
+}
+
+uptr MemToShadowOffset(uptr x) {
+  return (x >> kShadowGranularity) << 1;
+}
+
+uint16_t *MemToShadow(uptr x, uptr shadow_base) {
+  return (uint16_t *)(shadow_base + MemToShadowOffset(x));
+}
+
+typedef int (*CFICheckFn)(u64, void *, void *);
+
+// This class reads and decodes the shadow contents.
 class ShadowValue {
   uptr addr;
   uint16_t v;
@@ -61,49 +92,91 @@
     return reinterpret_cast<CFICheckFn>(p);
   }
 
-  // Load a shadow valud for the given application memory address.
+  // Load a shadow value for the given application memory address.
   static const ShadowValue load(uptr addr) {
-    return ShadowValue(addr, *mem_to_shadow(addr));
+    uptr shadow_base = GetShadow();
+    uptr shadow_offset = MemToShadowOffset(addr);
+    if (shadow_offset > GetShadowSize())
+      return ShadowValue(addr, kInvalidShadow);
+    else
+      return ShadowValue(
+          addr, *reinterpret_cast<uint16_t *>(shadow_base + shadow_offset));
   }
 };
 
-static void fill_shadow_constant(uptr begin, uptr end, uint16_t v) {
-  assert(v == kInvalidShadow || v == kUncheckedShadow);
-  uint16_t *shadow_begin = mem_to_shadow(begin);
-  uint16_t *shadow_end = mem_to_shadow(end - 1) + 1;
-  memset(shadow_begin, v, (shadow_end - shadow_begin) * sizeof(*shadow_begin));
+class ShadowBuilder {
+  uptr shadow_;
+
+public:
+  // Allocate a new empty shadow (for the entire address space) on the side.
+  void Start();
+  // Mark the given address range as unchecked.
+  // This is used for uninstrumented libraries like libc.
+  // Any CFI check with a target in that range will pass.
+  void AddUnchecked(uptr begin, uptr end);
+  // Mark the given address range as belonging to a library with the given
+  // cfi_check function.
+  void Add(uptr begin, uptr end, uptr cfi_check);
+  // Finish shadow construction. Atomically switch the current active shadow
+  // region with the newly constructed one and deallocate the former.
+  void Install();
+};
+
+void ShadowBuilder::Start() {
+  shadow_ = (uptr)MmapNoReserveOrDie(GetShadowSize(), "CFI shadow");
+  VReport(1, "CFI: shadow at %zx .. %zx\n", shadow_, shadow_ + GetShadowSize());
 }
 
-static void fill_shadow(uptr begin, uptr end, uptr cfi_check) {
+void ShadowBuilder::AddUnchecked(uptr begin, uptr end) {
+  uint16_t *shadow_begin = MemToShadow(begin, shadow_);
+  uint16_t *shadow_end = MemToShadow(end - 1, shadow_) + 1;
+  memset(shadow_begin, kUncheckedShadow,
+         (shadow_end - shadow_begin) * sizeof(*shadow_begin));
+}
+
+void ShadowBuilder::Add(uptr begin, uptr end, uptr cfi_check) {
   assert((cfi_check & (kShadowAlign - 1)) == 0);
 
   // Don't fill anything below cfi_check. We can not represent those addresses
   // in the shadow, and must make sure at codegen to place all valid call
   // targets above cfi_check.
-  uptr p = Max(begin, cfi_check);
-  uint16_t *s = mem_to_shadow(p);
-  uint16_t *s_end = mem_to_shadow(end - 1) + 1;
-  uint16_t sv = ((p - cfi_check) >> kShadowGranularity) + 1;
+  begin = Max(begin, cfi_check);
+  uint16_t *s = MemToShadow(begin, shadow_);
+  uint16_t *s_end = MemToShadow(end - 1, shadow_) + 1;
+  uint16_t sv = ((begin - cfi_check) >> kShadowGranularity) + 1;
   for (; s < s_end; s++, sv++)
     *s = sv;
+}
 
-  // Sanity checks.
-  uptr q = p & ~(kShadowAlign - 1);
-  for (; q < end; q += kShadowAlign) {
-    assert((uptr)ShadowValue::load(q).get_cfi_check() == cfi_check);
-    assert((uptr)ShadowValue::load(q + kShadowAlign / 2).get_cfi_check() ==
-           cfi_check);
-    assert((uptr)ShadowValue::load(q + kShadowAlign - 1).get_cfi_check() ==
-           cfi_check);
+#if SANITIZER_LINUX
+void ShadowBuilder::Install() {
+  MprotectReadOnly(shadow_, GetShadowSize());
+  uptr main_shadow = GetShadow();
+  if (main_shadow) {
+    // Update.
+    void *res = mremap((void *)shadow_, GetShadowSize(), GetShadowSize(),
+                       MREMAP_MAYMOVE | MREMAP_FIXED, (void *)main_shadow);
+    CHECK(res != MAP_FAILED);
+  } else {
+    // Initial setup.
+    CHECK_EQ(kCfiShadowLimitsStorageSize, GetPageSizeCached());
+    CHECK_EQ(0, GetShadow());
+    cfi_shadow_limits_storage.limits.start = shadow_;
+    MprotectReadOnly((uptr)&cfi_shadow_limits_storage,
+                     sizeof(cfi_shadow_limits_storage));
+    CHECK_EQ(shadow_, GetShadow());
   }
 }
+#else
+#error not implemented
+#endif
 
 // This is a workaround for a glibc bug:
 // https://sourceware.org/bugzilla/show_bug.cgi?id=15199
 // Other platforms can, hopefully, just do
 //    dlopen(RTLD_NOLOAD | RTLD_LAZY)
 //    dlsym("__cfi_check").
-static uptr find_cfi_check_in_dso(dl_phdr_info *info) {
+uptr find_cfi_check_in_dso(dl_phdr_info *info) {
   const ElfW(Dyn) *dynamic = nullptr;
   for (int i = 0; i < info->dlpi_phnum; ++i) {
     if (info->dlpi_phdr[i].p_type == PT_DYNAMIC) {
@@ -157,11 +230,13 @@
   return 0;
 }
 
-static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *data) {
+int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *data) {
   uptr cfi_check = find_cfi_check_in_dso(info);
   if (cfi_check)
     VReport(1, "Module '%s' __cfi_check %zx\n", info->dlpi_name, cfi_check);
 
+  ShadowBuilder *b = reinterpret_cast<ShadowBuilder *>(data);
+
   for (int i = 0; i < info->dlpi_phnum; i++) {
     const Elf_Phdr *phdr = &info->dlpi_phdr[i];
     if (phdr->p_type == PT_LOAD) {
@@ -174,28 +249,69 @@
       uptr cur_end = cur_beg + phdr->p_memsz;
       if (cfi_check) {
         VReport(1, "   %zx .. %zx\n", cur_beg, cur_end);
-        fill_shadow(cur_beg, cur_end, cfi_check ? cfi_check : (uptr)(-1));
+        b->Add(cur_beg, cur_end, cfi_check);
       } else {
-        fill_shadow_constant(cur_beg, cur_end, kUncheckedShadow);
+        b->AddUnchecked(cur_beg, cur_end);
       }
     }
   }
   return 0;
 }
 
-// Fill shadow for the initial libraries.
-static void init_shadow() {
-  dl_iterate_phdr(dl_iterate_phdr_cb, nullptr);
+// Init or update shadow for the current set of loaded libraries.
+void UpdateShadow() {
+  ShadowBuilder b;
+  b.Start();
+  dl_iterate_phdr(dl_iterate_phdr_cb, &b);
+  b.Install();
 }
 
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE
-void __cfi_slowpath(u64 CallSiteTypeId, void *Ptr) {
+void InitShadow() {
+  CHECK_EQ(0, GetShadow());
+  CHECK_EQ(0, GetShadowSize());
+
+  uptr vma = GetMaxVirtualAddress();
+  // Shadow is 2 -> 2**kShadowGranularity.
+  SetShadowSize((vma >> (kShadowGranularity - 1)) + 1);
+  VReport(1, "CFI: VMA size %zx, shadow size %zx\n", vma, GetShadowSize());
+
+  UpdateShadow();
+}
+
+THREADLOCAL int in_loader;
+BlockingMutex shadow_update_lock(LINKER_INITIALIZED);
+
+void EnterLoader() {
+  if (in_loader == 0) {
+    shadow_update_lock.Lock();
+  }
+  ++in_loader;
+}
+
+void ExitLoader() {
+  CHECK(in_loader > 0);
+  --in_loader;
+  UpdateShadow();
+  if (in_loader == 0) {
+    shadow_update_lock.Unlock();
+  }
+}
+
+ALWAYS_INLINE void CfiSlowPathCommon(u64 CallSiteTypeId, void *Ptr,
+                                     void *DiagData) {
   uptr Addr = (uptr)Ptr;
   VReport(3, "__cfi_slowpath: %llx, %p\n", CallSiteTypeId, Ptr);
   ShadowValue sv = ShadowValue::load(Addr);
   if (sv.is_invalid()) {
-    VReport(2, "CFI: invalid memory region for a function pointer (shadow==0): %p\n", Ptr);
-    Die();
+    VReport(1, "CFI: invalid memory region for a check target: %p\n", Ptr);
+#ifdef CFI_ENABLE_DIAG
+    if (DiagData) {
+      __ubsan_handle_cfi_check_fail(
+          reinterpret_cast<__ubsan::CFICheckFailData *>(DiagData), Addr, false);
+      return;
+    }
+#endif
+    Trap();
   }
   if (sv.is_unchecked()) {
     VReport(2, "CFI: unchecked call (shadow=FFFF): %p\n", Ptr);
@@ -203,10 +319,10 @@
   }
   CFICheckFn cfi_check = sv.get_cfi_check();
   VReport(2, "__cfi_check at %p\n", cfi_check);
-  cfi_check(CallSiteTypeId, Ptr);
+  cfi_check(CallSiteTypeId, Ptr, DiagData);
 }
 
-static void InitializeFlags() {
+void InitializeFlags() {
   SetCommonFlagsDefaults();
 #ifdef CFI_ENABLE_DIAG
   __ubsan::Flags *uf = __ubsan::flags();
@@ -227,15 +343,54 @@
   ubsan_parser.ParseString(GetEnv("UBSAN_OPTIONS"));
 #endif
 
-  SetVerbosity(common_flags()->verbosity);
+  InitializeCommonFlags();
 
-  if (Verbosity()) ReportUnrecognizedFlags();
+  if (Verbosity())
+    ReportUnrecognizedFlags();
 
   if (common_flags()->help) {
     cfi_parser.PrintFlagDescriptions();
   }
 }
 
+} // namespace __cfi
+
+using namespace __cfi;
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__cfi_slowpath(u64 CallSiteTypeId, void *Ptr) {
+  CfiSlowPathCommon(CallSiteTypeId, Ptr, nullptr);
+}
+
+#ifdef CFI_ENABLE_DIAG
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__cfi_slowpath_diag(u64 CallSiteTypeId, void *Ptr, void *DiagData) {
+  CfiSlowPathCommon(CallSiteTypeId, Ptr, DiagData);
+}
+#endif
+
+// Setup shadow for dlopen()ed libraries.
+// The actual shadow setup happens after dlopen() returns, which means that
+// a library can not be a target of any CFI checks while its constructors are
+// running. It's unclear how to fix this without some extra help from libc.
+// In glibc, mmap inside dlopen is not interceptable.
+// Maybe a seccomp-bpf filter?
+// We could insert a high-priority constructor into the library, but that would
+// not help with the uninstrumented libraries.
+INTERCEPTOR(void*, dlopen, const char *filename, int flag) {
+  EnterLoader();
+  void *handle = REAL(dlopen)(filename, flag);
+  ExitLoader();
+  return handle;
+}
+
+INTERCEPTOR(int, dlclose, void *handle) {
+  EnterLoader();
+  int res = REAL(dlclose)(handle);
+  ExitLoader();
+  return res;
+}
+
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
 #if !SANITIZER_CAN_USE_PREINIT_ARRAY
 // On ELF platforms, the constructor is invoked using .preinit_array (see below)
@@ -244,16 +399,10 @@
 void __cfi_init() {
   SanitizerToolName = "CFI";
   InitializeFlags();
+  InitShadow();
 
-  uptr vma = GetMaxVirtualAddress();
-  // Shadow is 2 -> 2**kShadowGranularity.
-  uptr shadow_size = (vma >> (kShadowGranularity - 1)) + 1;
-  VReport(1, "CFI: VMA size %zx, shadow size %zx\n", vma, shadow_size);
-  void *shadow = MmapNoReserveOrDie(shadow_size, "CFI shadow");
-  VReport(1, "CFI: shadow at %zx .. %zx\n", shadow,
-          reinterpret_cast<uptr>(shadow) + shadow_size);
-  __cfi_shadow = (uptr)shadow;
-  init_shadow();
+  INTERCEPT_FUNCTION(dlopen);
+  INTERCEPT_FUNCTION(dlclose);
 
 #ifdef CFI_ENABLE_DIAG
   __ubsan::InitAsPlugin();
diff --git a/lib/dfsan/CMakeLists.txt b/lib/dfsan/CMakeLists.txt
index 19a7909..eca402d 100644
--- a/lib/dfsan/CMakeLists.txt
+++ b/lib/dfsan/CMakeLists.txt
@@ -6,12 +6,14 @@
   dfsan_custom.cc
   dfsan_interceptors.cc)
 set(DFSAN_COMMON_CFLAGS ${SANITIZER_COMMON_CFLAGS})
-append_no_rtti_flag(DFSAN_COMMON_CFLAGS)
+append_rtti_flag(OFF DFSAN_COMMON_CFLAGS)
 # Prevent clang from generating libc calls.
 append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding DFSAN_COMMON_CFLAGS)
 
 # Static runtime library.
 add_custom_target(dfsan)
+set_target_properties(dfsan PROPERTIES FOLDER "Compiler-RT Misc")
+
 foreach(arch ${DFSAN_SUPPORTED_ARCH})
   set(DFSAN_CFLAGS ${DFSAN_COMMON_CFLAGS})
   append_list_if(COMPILER_RT_HAS_FPIE_FLAG -fPIE DFSAN_CFLAGS)
diff --git a/lib/dfsan/dfsan.cc b/lib/dfsan/dfsan.cc
index 7285f20..4156000 100644
--- a/lib/dfsan/dfsan.cc
+++ b/lib/dfsan/dfsan.cc
@@ -362,12 +362,13 @@
   RegisterCommonFlags(&parser);
   RegisterDfsanFlags(&parser, &flags());
   parser.ParseString(GetEnv("DFSAN_OPTIONS"));
-  SetVerbosity(common_flags()->verbosity);
+  InitializeCommonFlags();
   if (Verbosity()) ReportUnrecognizedFlags();
   if (common_flags()->help) parser.PrintFlagDescriptions();
 }
 
 static void InitializePlatformEarly() {
+  AvoidCVE_2016_2143();
 #ifdef DFSAN_RUNTIME_VMA
   __dfsan::vmaSize =
     (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
@@ -411,7 +412,7 @@
   // case by disabling memory protection when ASLR is disabled.
   uptr init_addr = (uptr)&dfsan_init;
   if (!(init_addr >= UnusedAddr() && init_addr < AppAddr()))
-    MmapNoAccess(UnusedAddr(), AppAddr() - UnusedAddr());
+    MmapFixedNoAccess(UnusedAddr(), AppAddr() - UnusedAddr());
 
   InitializeInterceptors();
 
diff --git a/lib/esan/CMakeLists.txt b/lib/esan/CMakeLists.txt
new file mode 100644
index 0000000..2a0a71b
--- /dev/null
+++ b/lib/esan/CMakeLists.txt
@@ -0,0 +1,43 @@
+# Build for the EfficiencySanitizer runtime support library.
+
+add_custom_target(esan)
+set_target_properties(esan PROPERTIES FOLDER "Compiler-RT Misc")
+
+set(ESAN_RTL_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+append_rtti_flag(OFF ESAN_RTL_CFLAGS)
+
+include_directories(..)
+
+set(ESAN_SOURCES
+  esan.cpp
+  esan_flags.cpp
+  esan_interface.cpp
+  esan_interceptors.cpp
+  esan_linux.cpp
+  esan_sideline_linux.cpp
+  cache_frag.cpp
+  working_set.cpp
+  working_set_posix.cpp)
+
+foreach (arch ${ESAN_SUPPORTED_ARCH})
+  add_compiler_rt_runtime(clang_rt.esan
+    STATIC
+    ARCHS ${arch}
+    SOURCES ${ESAN_SOURCES}
+            $<TARGET_OBJECTS:RTInterception.${arch}>
+            $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+            $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
+    CFLAGS ${ESAN_RTL_CFLAGS})
+  add_sanitizer_rt_symbols(clang_rt.esan
+    ARCHS ${arch}
+    EXTRA esan.syms.extra)
+  add_dependencies(esan
+    clang_rt.esan-${arch}
+    clang_rt.esan-${arch}-symbols)
+endforeach()
+
+add_dependencies(compiler-rt esan)
+
+if (COMPILER_RT_INCLUDE_TESTS)
+  # TODO(bruening): add tests via add_subdirectory(tests)
+endif()
diff --git a/lib/esan/cache_frag.cpp b/lib/esan/cache_frag.cpp
new file mode 100644
index 0000000..a3e612d
--- /dev/null
+++ b/lib/esan/cache_frag.cpp
@@ -0,0 +1,208 @@
+//===-- cache_frag.cpp ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// This file contains cache fragmentation-specific code.
+//===----------------------------------------------------------------------===//
+
+#include "esan.h"
+#include "esan_flags.h"
+#include "sanitizer_common/sanitizer_addrhashmap.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include <string.h>
+
+namespace __esan {
+
+//===-- Struct field access counter runtime -------------------------------===//
+
+// This should be kept consistent with LLVM's EfficiencySanitizer StructInfo.
+struct StructInfo {
+  const char *StructName;
+  u32 Size;
+  u32 NumFields;
+  u32 *FieldOffset;           // auxiliary struct field info.
+  u32 *FieldSize;             // auxiliary struct field info.
+  const char **FieldTypeName; // auxiliary struct field info.
+  u64 *FieldCounters;
+  u64 *ArrayCounter;
+  bool hasAuxFieldInfo() { return FieldOffset != nullptr; }
+};
+
+// This should be kept consistent with LLVM's EfficiencySanitizer CacheFragInfo.
+// The tool-specific information per compilation unit (module).
+struct CacheFragInfo {
+  const char *UnitName;
+  u32 NumStructs;
+  StructInfo *Structs;
+};
+
+struct StructCounter {
+  StructInfo *Struct;
+  u64 Count; // The total access count of the struct.
+  u64 Ratio; // Difference ratio for the struct layout access.
+};
+
+// We use StructHashMap to keep track of an unique copy of StructCounter.
+typedef AddrHashMap<StructCounter, 31051> StructHashMap;
+struct Context {
+  StructHashMap StructMap;
+  u32 NumStructs;
+  u64 TotalCount; // The total access count of all structs.
+};
+static Context *Ctx;
+
+static void reportStructSummary() {
+  // FIXME: provide a better struct field access summary report.
+  Report("%s: total struct field access count = %llu\n", SanitizerToolName,
+         Ctx->TotalCount);
+}
+
+// FIXME: we are still exploring proper ways to evaluate the difference between
+// struct field counts.  Currently, we use a simple formula to calculate the
+// difference ratio: V1/V2.
+static inline u64 computeDifferenceRatio(u64 Val1, u64 Val2) {
+  if (Val2 > Val1) {
+    Swap(Val1, Val2);
+  }
+  if (Val2 == 0)
+    Val2 = 1;
+  return (Val1 / Val2);
+}
+
+static void reportStructCounter(StructHashMap::Handle &Handle) {
+  const u32 TypePrintLimit = 512;
+  const char *type, *start, *end;
+  StructInfo *Struct = Handle->Struct;
+  // Union field address calculation is done via bitcast instead of GEP,
+  // so the count for union is always 0.
+  // We skip the union report to avoid confusion.
+  if (strncmp(Struct->StructName, "union.", 6) == 0)
+    return;
+  // Remove the '.' after class/struct during print.
+  if (strncmp(Struct->StructName, "class.", 6) == 0) {
+    type = "class";
+    start = &Struct->StructName[6];
+  } else {
+    type = "struct";
+    start = &Struct->StructName[7];
+  }
+  // Remove the suffixes with '#' during print.
+  end = strchr(start, '#');
+  CHECK(end != nullptr);
+  Report("  %s %.*s\n", type, end - start, start);
+  Report("   size = %u, count = %llu, ratio = %llu, array access = %llu\n",
+         Struct->Size, Handle->Count, Handle->Ratio, *Struct->ArrayCounter);
+  if (Struct->hasAuxFieldInfo()) {
+    for (u32 i = 0; i < Struct->NumFields; ++i) {
+      Report("   #%2u: offset = %u,\t size = %u,"
+             "\t count = %llu,\t type = %.*s\n",
+             i, Struct->FieldOffset[i], Struct->FieldSize[i],
+             Struct->FieldCounters[i], TypePrintLimit, Struct->FieldTypeName[i]);
+    }
+  } else {
+    for (u32 i = 0; i < Struct->NumFields; ++i) {
+      Report("   #%2u: count = %llu\n", i, Struct->FieldCounters[i]);
+    }
+  }
+}
+
+static void computeStructRatio(StructHashMap::Handle &Handle) {
+  Handle->Ratio = 0;
+  Handle->Count = Handle->Struct->FieldCounters[0];
+  for (u32 i = 1; i < Handle->Struct->NumFields; ++i) {
+    Handle->Count += Handle->Struct->FieldCounters[i];
+    Handle->Ratio += computeDifferenceRatio(
+        Handle->Struct->FieldCounters[i - 1], Handle->Struct->FieldCounters[i]);
+  }
+  Ctx->TotalCount += Handle->Count;
+  if (Handle->Ratio >= (u64)getFlags()->report_threshold ||
+      (Verbosity() >= 1 && Handle->Count > 0))
+    reportStructCounter(Handle);
+}
+
+static void registerStructInfo(CacheFragInfo *CacheFrag) {
+  for (u32 i = 0; i < CacheFrag->NumStructs; ++i) {
+    StructInfo *Struct = &CacheFrag->Structs[i];
+    StructHashMap::Handle H(&Ctx->StructMap, (uptr)Struct->FieldCounters);
+    if (H.created()) {
+      VPrintf(2, " Register %s: %u fields\n", Struct->StructName,
+              Struct->NumFields);
+      H->Struct = Struct;
+      ++Ctx->NumStructs;
+    } else {
+      VPrintf(2, " Duplicated %s: %u fields\n", Struct->StructName,
+              Struct->NumFields);
+    }
+  }
+}
+
+static void unregisterStructInfo(CacheFragInfo *CacheFrag) {
+  // FIXME: if the library is unloaded before finalizeCacheFrag, we should
+  // collect the result for later report.
+  for (u32 i = 0; i < CacheFrag->NumStructs; ++i) {
+    StructInfo *Struct = &CacheFrag->Structs[i];
+    StructHashMap::Handle H(&Ctx->StructMap, (uptr)Struct->FieldCounters, true);
+    if (H.exists()) {
+      VPrintf(2, " Unregister %s: %u fields\n", Struct->StructName,
+              Struct->NumFields);
+      // FIXME: we should move this call to finalizeCacheFrag once we can
+      // iterate over the hash map there.
+      computeStructRatio(H);
+      --Ctx->NumStructs;
+    } else {
+      VPrintf(2, " Duplicated %s: %u fields\n", Struct->StructName,
+              Struct->NumFields);
+    }
+  }
+  static bool Reported = false;
+  if (Ctx->NumStructs == 0 && !Reported) {
+    Reported = true;
+    reportStructSummary();
+  }
+}
+
+//===-- Init/exit functions -----------------------------------------------===//
+
+void processCacheFragCompilationUnitInit(void *Ptr) {
+  CacheFragInfo *CacheFrag = (CacheFragInfo *)Ptr;
+  VPrintf(2, "in esan::%s: %s with %u class(es)/struct(s)\n", __FUNCTION__,
+          CacheFrag->UnitName, CacheFrag->NumStructs);
+  registerStructInfo(CacheFrag);
+}
+
+void processCacheFragCompilationUnitExit(void *Ptr) {
+  CacheFragInfo *CacheFrag = (CacheFragInfo *)Ptr;
+  VPrintf(2, "in esan::%s: %s with %u class(es)/struct(s)\n", __FUNCTION__,
+          CacheFrag->UnitName, CacheFrag->NumStructs);
+  unregisterStructInfo(CacheFrag);
+}
+
+void initializeCacheFrag() {
+  VPrintf(2, "in esan::%s\n", __FUNCTION__);
+  // We use placement new to initialize Ctx before C++ static initializaion.
+  // We make CtxMem 8-byte aligned for atomic operations in AddrHashMap.
+  static u64 CtxMem[sizeof(Context) / sizeof(u64) + 1];
+  Ctx = new (CtxMem) Context();
+  Ctx->NumStructs = 0;
+}
+
+int finalizeCacheFrag() {
+  VPrintf(2, "in esan::%s\n", __FUNCTION__);
+  return 0;
+}
+
+void reportCacheFrag() {
+  VPrintf(2, "in esan::%s\n", __FUNCTION__);
+  // FIXME: Not yet implemented.  We need to iterate over all of the
+  // compilation unit data.
+}
+
+} // namespace __esan
diff --git a/lib/esan/cache_frag.h b/lib/esan/cache_frag.h
new file mode 100644
index 0000000..646d3f8
--- /dev/null
+++ b/lib/esan/cache_frag.h
@@ -0,0 +1,29 @@
+//===-- cache_frag.h --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Header for cache-fragmentation-specific code.
+//===----------------------------------------------------------------------===//
+
+#ifndef CACHE_FRAG_H
+#define CACHE_FRAG_H
+
+namespace __esan {
+
+void processCacheFragCompilationUnitInit(void *Ptr);
+void processCacheFragCompilationUnitExit(void *Ptr);
+
+void initializeCacheFrag();
+int finalizeCacheFrag();
+void reportCacheFrag();
+
+} // namespace __esan
+
+#endif  // CACHE_FRAG_H
diff --git a/lib/esan/esan.cpp b/lib/esan/esan.cpp
new file mode 100644
index 0000000..2fb7789
--- /dev/null
+++ b/lib/esan/esan.cpp
@@ -0,0 +1,270 @@
+//===-- esan.cpp ----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Main file (entry points) for the Esan run-time.
+//===----------------------------------------------------------------------===//
+
+#include "esan.h"
+#include "esan_flags.h"
+#include "esan_interface_internal.h"
+#include "esan_shadow.h"
+#include "cache_frag.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "working_set.h"
+
+// See comment below.
+extern "C" {
+extern void __cxa_atexit(void (*function)(void));
+}
+
+namespace __esan {
+
+bool EsanIsInitialized;
+bool EsanDuringInit;
+ShadowMapping Mapping;
+
+// Different tools use different scales within the same shadow mapping scheme.
+// The scale used here must match that used by the compiler instrumentation.
+// This array is indexed by the ToolType enum.
+static const uptr ShadowScale[] = {
+  0, // ESAN_None.
+  2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2.
+  6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6.
+};
+
+// We are combining multiple performance tuning tools under the umbrella of
+// one EfficiencySanitizer super-tool.  Most of our tools have very similar
+// memory access instrumentation, shadow memory mapping, libc interception,
+// etc., and there is typically more shared code than distinct code.
+//
+// We are not willing to dispatch on tool dynamically in our fastpath
+// instrumentation: thus, which tool to use is a static option selected
+// at compile time and passed to __esan_init().
+//
+// We are willing to pay the overhead of tool dispatch in the slowpath to more
+// easily share code.  We expect to only come here rarely.
+// If this becomes a performance hit, we can add separate interface
+// routines for each subtool (e.g., __esan_cache_frag_aligned_load_4).
+// But for libc interceptors, we'll have to do one of the following:
+// A) Add multiple-include support to sanitizer_common_interceptors.inc,
+//    instantiate it separately for each tool, and call the selected
+//    tool's intercept setup code.
+// B) Build separate static runtime libraries, one for each tool.
+// C) Completely split the tools into separate sanitizers.
+
+void processRangeAccess(uptr PC, uptr Addr, int Size, bool IsWrite) {
+  VPrintf(3, "in esan::%s %p: %c %p %d\n", __FUNCTION__, PC,
+          IsWrite ? 'w' : 'r', Addr, Size);
+  if (__esan_which_tool == ESAN_CacheFrag) {
+    // TODO(bruening): add shadow mapping and update shadow bits here.
+    // We'll move this to cache_frag.cpp once we have something.
+  } else if (__esan_which_tool == ESAN_WorkingSet) {
+    processRangeAccessWorkingSet(PC, Addr, Size, IsWrite);
+  }
+}
+
+bool processSignal(int SigNum, void (*Handler)(int), void (**Result)(int)) {
+  if (__esan_which_tool == ESAN_WorkingSet)
+    return processWorkingSetSignal(SigNum, Handler, Result);
+  return true;
+}
+
+bool processSigaction(int SigNum, const void *Act, void *OldAct) {
+  if (__esan_which_tool == ESAN_WorkingSet)
+    return processWorkingSetSigaction(SigNum, Act, OldAct);
+  return true;
+}
+
+bool processSigprocmask(int How, void *Set, void *OldSet) {
+  if (__esan_which_tool == ESAN_WorkingSet)
+    return processWorkingSetSigprocmask(How, Set, OldSet);
+  return true;
+}
+
+#if SANITIZER_DEBUG
+static bool verifyShadowScheme() {
+  // Sanity checks for our shadow mapping scheme.
+  uptr AppStart, AppEnd;
+  if (Verbosity() >= 3) {
+    for (int i = 0; getAppRegion(i, &AppStart, &AppEnd); ++i) {
+      VPrintf(3, "App #%d: [%zx-%zx) (%zuGB)\n", i, AppStart, AppEnd,
+              (AppEnd - AppStart) >> 30);
+    }
+  }
+  for (int Scale = 0; Scale < 8; ++Scale) {
+    Mapping.initialize(Scale);
+    if (Verbosity() >= 3) {
+      VPrintf(3, "\nChecking scale %d\n", Scale);
+      uptr ShadowStart, ShadowEnd;
+      for (int i = 0; getShadowRegion(i, &ShadowStart, &ShadowEnd); ++i) {
+        VPrintf(3, "Shadow #%d: [%zx-%zx) (%zuGB)\n", i, ShadowStart,
+                ShadowEnd, (ShadowEnd - ShadowStart) >> 30);
+      }
+      for (int i = 0; getShadowRegion(i, &ShadowStart, &ShadowEnd); ++i) {
+        VPrintf(3, "Shadow(Shadow) #%d: [%zx-%zx)\n", i,
+                appToShadow(ShadowStart), appToShadow(ShadowEnd - 1)+1);
+      }
+    }
+    for (int i = 0; getAppRegion(i, &AppStart, &AppEnd); ++i) {
+      DCHECK(isAppMem(AppStart));
+      DCHECK(!isAppMem(AppStart - 1));
+      DCHECK(isAppMem(AppEnd - 1));
+      DCHECK(!isAppMem(AppEnd));
+      DCHECK(!isShadowMem(AppStart));
+      DCHECK(!isShadowMem(AppEnd - 1));
+      DCHECK(isShadowMem(appToShadow(AppStart)));
+      DCHECK(isShadowMem(appToShadow(AppEnd - 1)));
+      // Double-shadow checks.
+      DCHECK(!isShadowMem(appToShadow(appToShadow(AppStart))));
+      DCHECK(!isShadowMem(appToShadow(appToShadow(AppEnd - 1))));
+    }
+    // Ensure no shadow regions overlap each other.
+    uptr ShadowAStart, ShadowBStart, ShadowAEnd, ShadowBEnd;
+    for (int i = 0; getShadowRegion(i, &ShadowAStart, &ShadowAEnd); ++i) {
+      for (int j = 0; getShadowRegion(j, &ShadowBStart, &ShadowBEnd); ++j) {
+        DCHECK(i == j || ShadowAStart >= ShadowBEnd ||
+               ShadowAEnd <= ShadowBStart);
+      }
+    }
+  }
+  return true;
+}
+#endif
+
+static void initializeShadow() {
+  verifyAddressSpace();
+
+  DCHECK(verifyShadowScheme());
+
+  Mapping.initialize(ShadowScale[__esan_which_tool]);
+
+  VPrintf(1, "Shadow scale=%d offset=%p\n", Mapping.Scale, Mapping.Offset);
+
+  uptr ShadowStart, ShadowEnd;
+  for (int i = 0; getShadowRegion(i, &ShadowStart, &ShadowEnd); ++i) {
+    VPrintf(1, "Shadow #%d: [%zx-%zx) (%zuGB)\n", i, ShadowStart, ShadowEnd,
+            (ShadowEnd - ShadowStart) >> 30);
+
+    uptr Map;
+    if (__esan_which_tool == ESAN_WorkingSet) {
+      // We want to identify all shadow pages that are touched so we start
+      // out inaccessible.
+      Map = (uptr)MmapFixedNoAccess(ShadowStart, ShadowEnd- ShadowStart,
+                                    "shadow");
+    } else {
+      Map = (uptr)MmapFixedNoReserve(ShadowStart, ShadowEnd - ShadowStart,
+                                     "shadow");
+    }
+    if (Map != ShadowStart) {
+      Printf("FATAL: EfficiencySanitizer failed to map its shadow memory.\n");
+      Die();
+    }
+
+    if (common_flags()->no_huge_pages_for_shadow)
+      NoHugePagesInRegion(ShadowStart, ShadowEnd - ShadowStart);
+    if (common_flags()->use_madv_dontdump)
+      DontDumpShadowMemory(ShadowStart, ShadowEnd - ShadowStart);
+
+    // TODO: Call MmapNoAccess() on in-between regions.
+  }
+}
+
+void initializeLibrary(ToolType Tool) {
+  // We assume there is only one thread during init, but we need to
+  // guard against double-init when we're (re-)called from an
+  // early interceptor.
+  if (EsanIsInitialized || EsanDuringInit)
+    return;
+  EsanDuringInit = true;
+  CHECK(Tool == __esan_which_tool);
+  SanitizerToolName = "EfficiencySanitizer";
+  CacheBinaryName();
+  initializeFlags();
+
+  // Intercepting libc _exit or exit via COMMON_INTERCEPTOR_ON_EXIT only
+  // finalizes on an explicit exit call by the app.  To handle a normal
+  // exit we register an atexit handler.
+  ::__cxa_atexit((void (*)())finalizeLibrary);
+
+  VPrintf(1, "in esan::%s\n", __FUNCTION__);
+  if (__esan_which_tool <= ESAN_None || __esan_which_tool >= ESAN_Max) {
+    Printf("ERROR: unknown tool %d requested\n", __esan_which_tool);
+    Die();
+  }
+
+  initializeShadow();
+  if (__esan_which_tool == ESAN_WorkingSet)
+    initializeShadowWorkingSet();
+
+  initializeInterceptors();
+
+  if (__esan_which_tool == ESAN_CacheFrag) {
+    initializeCacheFrag();
+  } else if (__esan_which_tool == ESAN_WorkingSet) {
+    initializeWorkingSet();
+  }
+
+  EsanIsInitialized = true;
+  EsanDuringInit = false;
+}
+
+int finalizeLibrary() {
+  VPrintf(1, "in esan::%s\n", __FUNCTION__);
+  if (__esan_which_tool == ESAN_CacheFrag) {
+    return finalizeCacheFrag();
+  } else if (__esan_which_tool == ESAN_WorkingSet) {
+    return finalizeWorkingSet();
+  }
+  return 0;
+}
+
+void reportResults() {
+  VPrintf(1, "in esan::%s\n", __FUNCTION__);
+  if (__esan_which_tool == ESAN_CacheFrag) {
+    return reportCacheFrag();
+  } else if (__esan_which_tool == ESAN_WorkingSet) {
+    return reportWorkingSet();
+  }
+}
+
+void processCompilationUnitInit(void *Ptr) {
+  VPrintf(2, "in esan::%s\n", __FUNCTION__);
+  if (__esan_which_tool == ESAN_CacheFrag) {
+    DCHECK(Ptr != nullptr);
+    processCacheFragCompilationUnitInit(Ptr);
+  } else {
+    DCHECK(Ptr == nullptr);
+  }
+}
+
+// This is called when the containing module is unloaded.
+// For the main executable module, this is called after finalizeLibrary.
+void processCompilationUnitExit(void *Ptr) {
+  VPrintf(2, "in esan::%s\n", __FUNCTION__);
+  if (__esan_which_tool == ESAN_CacheFrag) {
+    DCHECK(Ptr != nullptr);
+    processCacheFragCompilationUnitExit(Ptr);
+  } else {
+    DCHECK(Ptr == nullptr);
+  }
+}
+
+unsigned int getSampleCount() {
+  VPrintf(1, "in esan::%s\n", __FUNCTION__);
+  if (__esan_which_tool == ESAN_WorkingSet) {
+    return getSampleCountWorkingSet();
+  }
+  return 0;
+}
+
+} // namespace __esan
diff --git a/lib/esan/esan.h b/lib/esan/esan.h
new file mode 100644
index 0000000..5a0dde6
--- /dev/null
+++ b/lib/esan/esan.h
@@ -0,0 +1,60 @@
+//===-- esan.h --------------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Main internal esan header file.
+//
+// Ground rules:
+//   - C++ run-time should not be used (static CTORs, RTTI, exceptions, static
+//     function-scope locals)
+//   - All functions/classes/etc reside in namespace __esan, except for those
+//     declared in esan_interface_internal.h.
+//   - Platform-specific files should be used instead of ifdefs (*).
+//   - No system headers included in header files (*).
+//   - Platform specific headers included only into platform-specific files (*).
+//
+//  (*) Except when inlining is critical for performance.
+//===----------------------------------------------------------------------===//
+
+#ifndef ESAN_H
+#define ESAN_H
+
+#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "esan_interface_internal.h"
+
+namespace __esan {
+
+extern bool EsanIsInitialized;
+extern bool EsanDuringInit;
+
+void initializeLibrary(ToolType Tool);
+int finalizeLibrary();
+void reportResults();
+unsigned int getSampleCount();
+// Esan creates the variable per tool per compilation unit at compile time
+// and passes its pointer Ptr to the runtime library.
+void processCompilationUnitInit(void *Ptr);
+void processCompilationUnitExit(void *Ptr);
+void processRangeAccess(uptr PC, uptr Addr, int Size, bool IsWrite);
+void initializeInterceptors();
+
+// Platform-dependent routines.
+void verifyAddressSpace();
+bool fixMmapAddr(void **Addr, SIZE_T Size, int Flags);
+uptr checkMmapResult(uptr Addr, SIZE_T Size);
+// The return value indicates whether to call the real version or not.
+bool processSignal(int SigNum, void (*Handler)(int), void (**Result)(int));
+bool processSigaction(int SigNum, const void *Act, void *OldAct);
+bool processSigprocmask(int How, void *Set, void *OldSet);
+
+} // namespace __esan
+
+#endif // ESAN_H
diff --git a/lib/esan/esan.syms.extra b/lib/esan/esan.syms.extra
new file mode 100644
index 0000000..d6397d4
--- /dev/null
+++ b/lib/esan/esan.syms.extra
@@ -0,0 +1,4 @@
+__esan_init
+__esan_exit
+__esan_aligned*
+__esan_unaligned*
diff --git a/lib/esan/esan_circular_buffer.h b/lib/esan/esan_circular_buffer.h
new file mode 100644
index 0000000..9ce102d
--- /dev/null
+++ b/lib/esan/esan_circular_buffer.h
@@ -0,0 +1,96 @@
+//===-- esan_circular_buffer.h ----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Circular buffer data structure.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_common.h"
+
+namespace __esan {
+
+// A circular buffer for POD data whose memory is allocated using mmap.
+// There are two usage models: one is to use initialize/free (for global
+// instances) and the other is to use placement new with the
+// constructor and to call the destructor or free (they are equivalent).
+template<typename T>
+class CircularBuffer {
+ public:
+  // To support global instances we cannot initialize any field in the
+  // default constructor.
+  explicit CircularBuffer() {}
+  CircularBuffer(uptr BufferCapacity) {
+    initialize(BufferCapacity);
+    WasConstructed = true;
+  }
+  ~CircularBuffer() {
+    if (WasConstructed) // Else caller will call free() explicitly.
+      free();
+  }
+  void initialize(uptr BufferCapacity) {
+    Capacity = BufferCapacity;
+    // MmapOrDie rounds up to the page size for us.
+    Data = (T *)MmapOrDie(Capacity * sizeof(T), "CircularBuffer");
+    StartIdx = 0;
+    Count = 0;
+    WasConstructed = false;
+  }
+  void free() {
+    UnmapOrDie(Data, Capacity * sizeof(T));
+  }
+  T &operator[](uptr Idx) {
+    CHECK_LT(Idx, Count);
+    uptr ArrayIdx = (StartIdx + Idx) % Capacity;
+    return Data[ArrayIdx];
+  }
+  const T &operator[](uptr Idx) const {
+    CHECK_LT(Idx, Count);
+    uptr ArrayIdx = (StartIdx + Idx) % Capacity;
+    return Data[ArrayIdx];
+  }
+  void push_back(const T &Item) {
+    CHECK_GT(Capacity, 0);
+    uptr ArrayIdx = (StartIdx + Count) % Capacity;
+    Data[ArrayIdx] = Item;
+    if (Count < Capacity)
+      ++Count;
+    else
+      StartIdx = (StartIdx + 1) % Capacity;
+  }
+  T &back() {
+    CHECK_GT(Count, 0);
+    uptr ArrayIdx = (StartIdx + Count - 1) % Capacity;
+    return Data[ArrayIdx];
+  }
+  void pop_back() {
+    CHECK_GT(Count, 0);
+    --Count;
+  }
+  uptr size() const {
+    return Count;
+  }
+  void clear() {
+    StartIdx = 0;
+    Count = 0;
+  }
+  bool empty() const { return size() == 0; }
+
+ private:
+  CircularBuffer(const CircularBuffer&);
+  void operator=(const CircularBuffer&);
+
+  bool WasConstructed;
+  T *Data;
+  uptr Capacity;
+  uptr StartIdx;
+  uptr Count;
+};
+
+} // namespace __esan
diff --git a/lib/esan/esan_flags.cpp b/lib/esan/esan_flags.cpp
new file mode 100644
index 0000000..3b047e2
--- /dev/null
+++ b/lib/esan/esan_flags.cpp
@@ -0,0 +1,58 @@
+//===-- esan_flags.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Esan flag parsing logic.
+//===----------------------------------------------------------------------===//
+
+#include "esan_flags.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_flags.h"
+
+namespace __esan {
+
+static const char EsanOptsEnv[] = "ESAN_OPTIONS";
+
+Flags EsanFlagsDontUseDirectly;
+
+void Flags::setDefaults() {
+#define ESAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "esan_flags.inc"
+#undef ESAN_FLAG
+}
+
+static void registerEsanFlags(FlagParser *Parser, Flags *F) {
+#define ESAN_FLAG(Type, Name, DefaultValue, Description) \
+  RegisterFlag(Parser, #Name, Description, &F->Name);
+#include "esan_flags.inc"
+#undef ESAN_FLAG
+}
+
+void initializeFlags() {
+  SetCommonFlagsDefaults();
+  Flags *F = getFlags();
+  F->setDefaults();
+
+  FlagParser Parser;
+  registerEsanFlags(&Parser, F);
+  RegisterCommonFlags(&Parser);
+  Parser.ParseString(GetEnv(EsanOptsEnv));
+
+  InitializeCommonFlags();
+  if (Verbosity())
+    ReportUnrecognizedFlags();
+  if (common_flags()->help)
+    Parser.PrintFlagDescriptions();
+
+  __sanitizer_set_report_path(common_flags()->log_path);
+}
+
+} // namespace __esan
diff --git a/lib/esan/esan_flags.h b/lib/esan/esan_flags.h
new file mode 100644
index 0000000..c8f4ef5
--- /dev/null
+++ b/lib/esan/esan_flags.h
@@ -0,0 +1,41 @@
+//===-- esan_flags.h --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Esan runtime flags.
+//===----------------------------------------------------------------------===//
+
+#ifndef ESAN_FLAGS_H
+#define ESAN_FLAGS_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+
+namespace __esan {
+
+class Flags {
+public:
+#define ESAN_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "esan_flags.inc"
+#undef ESAN_FLAG
+
+  void setDefaults();
+};
+
+extern Flags EsanFlagsDontUseDirectly;
+inline Flags *getFlags() {
+  return &EsanFlagsDontUseDirectly;
+}
+
+void initializeFlags();
+
+} // namespace __esan
+
+#endif // ESAN_FLAGS_H
diff --git a/lib/esan/esan_flags.inc b/lib/esan/esan_flags.inc
new file mode 100644
index 0000000..5687cac
--- /dev/null
+++ b/lib/esan/esan_flags.inc
@@ -0,0 +1,56 @@
+//===-- esan_flags.inc ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Esan runtime flags.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ESAN_FLAG
+# error "Define ESAN_FLAG prior to including this file!"
+#endif
+
+// ESAN_FLAG(Type, Name, DefaultValue, Description)
+// See COMMON_FLAG in sanitizer_flags.inc for more details.
+
+//===----------------------------------------------------------------------===//
+// Cross-tool options
+//===----------------------------------------------------------------------===//
+
+ESAN_FLAG(int, cache_line_size, 64,
+          "The number of bytes in a cache line.  For the working-set tool, this "
+          "cannot be changed without also changing the compiler "
+          "instrumentation.")
+
+//===----------------------------------------------------------------------===//
+// Working set tool options
+//===----------------------------------------------------------------------===//
+
+ESAN_FLAG(bool, record_snapshots, true,
+          "Working set tool: whether to sample snapshots during a run.")
+
+// Typical profiling uses a 10ms timer.  Our snapshots take some work
+// to scan memory so we reduce to 20ms.
+// To disable samples, turn off record_snapshots.
+ESAN_FLAG(int, sample_freq, 20,
+          "Working set tool: sampling frequency in milliseconds.")
+
+// This controls the difference in frequency between each successive series
+// of snapshots.  There are 8 in total, with number 0 using sample_freq.
+// Number N samples number N-1 every (1 << snapshot_step) instance of N-1.
+ESAN_FLAG(int, snapshot_step, 2, "Working set tool: the log of the sampling "
+          "performed for the next-higher-frequency snapshot series.")
+
+//===----------------------------------------------------------------------===//
+// Cache Fragmentation tool options
+//===----------------------------------------------------------------------===//
+
+// The difference information of a struct is reported if the struct's difference
+// score is greater than the report_threshold.
+ESAN_FLAG(int, report_threshold, 1<<10, "Cache-frag tool: the struct difference"
+          " score threshold for reporting.")
diff --git a/lib/esan/esan_hashtable.h b/lib/esan/esan_hashtable.h
new file mode 100644
index 0000000..7bd8297
--- /dev/null
+++ b/lib/esan/esan_hashtable.h
@@ -0,0 +1,381 @@
+//===-- esan_hashtable.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Generic resizing hashtable.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include <stddef.h>
+
+namespace __esan {
+
+//===----------------------------------------------------------------------===//
+// Default hash and comparison functions
+//===----------------------------------------------------------------------===//
+
+template <typename T> struct DefaultHash {
+  size_t operator()(const T &Key) const {
+    return (size_t)Key;
+  }
+};
+
+template <typename T> struct DefaultEqual {
+  bool operator()(const T &Key1, const T &Key2) const {
+    return Key1 == Key2;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// HashTable declaration
+//===----------------------------------------------------------------------===//
+
+// A simple resizing and mutex-locked hashtable.
+//
+// If the default hash functor is used, KeyTy must have an operator size_t().
+// If the default comparison functor is used, KeyTy must have an operator ==.
+//
+// By default all operations are internally-synchronized with a mutex, with no
+// synchronization for payloads once hashtable functions return.  If
+// ExternalLock is set to true, the caller should call the lock() and unlock()
+// routines around all hashtable operations and subsequent manipulation of
+// payloads.
+template <typename KeyTy, typename DataTy, bool ExternalLock = false,
+          typename HashFuncTy = DefaultHash<KeyTy>,
+          typename EqualFuncTy = DefaultEqual<KeyTy> >
+class HashTable {
+public:
+  // InitialCapacity must be a power of 2.
+  // ResizeFactor must be between 1 and 99 and indicates the
+  // maximum percentage full that the table should ever be.
+  HashTable(u32 InitialCapacity = 2048, u32 ResizeFactor = 70);
+  ~HashTable();
+  bool lookup(const KeyTy &Key, DataTy &Payload); // Const except for Mutex.
+  bool add(const KeyTy &Key, const DataTy &Payload);
+  bool remove(const KeyTy &Key);
+  u32 size(); // Const except for Mutex.
+  // If the table is internally-synchronized, this lock must not be held
+  // while a hashtable function is called as it will deadlock: the lock
+  // is not recursive.  This is meant for use with externally-synchronized
+  // tables or with an iterator.
+  void lock();
+  void unlock();
+
+private:
+  struct HashEntry {
+    KeyTy Key;
+    DataTy Payload;
+    HashEntry *Next;
+  };
+
+public:
+  struct HashPair {
+    HashPair(KeyTy Key, DataTy Data) : Key(Key), Data(Data) {}
+    KeyTy Key;
+    DataTy Data;
+  };
+
+  // This iterator does not perform any synchronization.
+  // It expects the caller to lock the table across the whole iteration.
+  // Calling HashTable functions while using the iterator is not supported.
+  // The iterator returns copies of the keys and data.
+  class iterator {
+  public:
+    iterator(
+        HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy> *Table);
+    iterator(const iterator &Src) = default;
+    iterator &operator=(const iterator &Src) = default;
+    HashPair operator*();
+    iterator &operator++();
+    iterator &operator++(int);
+    bool operator==(const iterator &Cmp) const;
+    bool operator!=(const iterator &Cmp) const;
+
+  private:
+    iterator(
+        HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy> *Table,
+        int Idx);
+    friend HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>;
+    HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy> *Table;
+    int Idx;
+    HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::HashEntry
+        *Entry;
+  };
+
+  // No erase or insert iterator supported
+  iterator begin();
+  iterator end();
+
+private:
+  void resize();
+
+  HashEntry **Table;
+  u32 Capacity;
+  u32 Entries;
+  const u32 ResizeFactor;
+  BlockingMutex Mutex;
+  const HashFuncTy HashFunc;
+  const EqualFuncTy EqualFunc;
+};
+
+//===----------------------------------------------------------------------===//
+// Hashtable implementation
+//===----------------------------------------------------------------------===//
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::HashTable(
+    u32 InitialCapacity, u32 ResizeFactor)
+    : Capacity(InitialCapacity), Entries(0), ResizeFactor(ResizeFactor),
+      HashFunc(HashFuncTy()), EqualFunc(EqualFuncTy()) {
+  CHECK(IsPowerOfTwo(Capacity));
+  CHECK(ResizeFactor >= 1 && ResizeFactor <= 99);
+  Table = (HashEntry **)InternalAlloc(Capacity * sizeof(HashEntry *));
+  internal_memset(Table, 0, Capacity * sizeof(HashEntry *));
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::~HashTable() {
+  for (u32 i = 0; i < Capacity; ++i) {
+    HashEntry *Entry = Table[i];
+    while (Entry != nullptr) {
+      HashEntry *Next = Entry->Next;
+      Entry->Payload.~DataTy();
+      InternalFree(Entry);
+      Entry = Next;
+    }
+  }
+  InternalFree(Table);
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+u32 HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::size() {
+  u32 Res;
+  if (!ExternalLock)
+    Mutex.Lock();
+  Res = Entries;
+  if (!ExternalLock)
+    Mutex.Unlock();
+  return Res;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::lookup(
+    const KeyTy &Key, DataTy &Payload) {
+  if (!ExternalLock)
+    Mutex.Lock();
+  bool Found = false;
+  size_t Hash = HashFunc(Key) % Capacity;
+  HashEntry *Entry = Table[Hash];
+  for (; Entry != nullptr; Entry = Entry->Next) {
+    if (EqualFunc(Entry->Key, Key)) {
+      Payload = Entry->Payload;
+      Found = true;
+      break;
+    }
+  }
+  if (!ExternalLock)
+    Mutex.Unlock();
+  return Found;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+void HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::resize() {
+  if (!ExternalLock)
+    Mutex.CheckLocked();
+  size_t OldCapacity = Capacity;
+  HashEntry **OldTable = Table;
+  Capacity *= 2;
+  Table = (HashEntry **)InternalAlloc(Capacity * sizeof(HashEntry *));
+  internal_memset(Table, 0, Capacity * sizeof(HashEntry *));
+  // Re-hash
+  for (u32 i = 0; i < OldCapacity; ++i) {
+    HashEntry *OldEntry = OldTable[i];
+    while (OldEntry != nullptr) {
+      HashEntry *Next = OldEntry->Next;
+      size_t Hash = HashFunc(OldEntry->Key) % Capacity;
+      OldEntry->Next = Table[Hash];
+      Table[Hash] = OldEntry;
+      OldEntry = Next;
+    }
+  }
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::add(
+    const KeyTy &Key, const DataTy &Payload) {
+  if (!ExternalLock)
+    Mutex.Lock();
+  bool Exists = false;
+  size_t Hash = HashFunc(Key) % Capacity;
+  HashEntry *Entry = Table[Hash];
+  for (; Entry != nullptr; Entry = Entry->Next) {
+    if (EqualFunc(Entry->Key, Key)) {
+      Exists = true;
+      break;
+    }
+  }
+  if (!Exists) {
+    Entries++;
+    if (Entries * 100 >= Capacity * ResizeFactor) {
+      resize();
+      Hash = HashFunc(Key) % Capacity;
+    }
+    HashEntry *Add = (HashEntry *)InternalAlloc(sizeof(*Add));
+    Add->Key = Key;
+    Add->Payload = Payload;
+    Add->Next = Table[Hash];
+    Table[Hash] = Add;
+  }
+  if (!ExternalLock)
+    Mutex.Unlock();
+  return !Exists;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::remove(
+    const KeyTy &Key) {
+  if (!ExternalLock)
+    Mutex.Lock();
+  bool Found = false;
+  size_t Hash = HashFunc(Key) % Capacity;
+  HashEntry *Entry = Table[Hash];
+  HashEntry *Prev = nullptr;
+  for (; Entry != nullptr; Prev = Entry, Entry = Entry->Next) {
+    if (EqualFunc(Entry->Key, Key)) {
+      Found = true;
+      Entries--;
+      if (Prev == nullptr)
+        Table[Hash] = Entry->Next;
+      else
+        Prev->Next = Entry->Next;
+      Entry->Payload.~DataTy();
+      InternalFree(Entry);
+      break;
+    }
+  }
+  if (!ExternalLock)
+    Mutex.Unlock();
+  return Found;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+void HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::lock() {
+  Mutex.Lock();
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+void HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::unlock() {
+  Mutex.Unlock();
+}
+
+//===----------------------------------------------------------------------===//
+// Iterator implementation
+//===----------------------------------------------------------------------===//
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::iterator::
+    iterator(
+        HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy> *Table)
+    : Table(Table), Idx(-1), Entry(nullptr) {
+  operator++();
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::iterator::
+    iterator(
+        HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy> *Table,
+        int Idx)
+    : Table(Table), Idx(Idx), Entry(nullptr) {
+  CHECK(Idx >= (int)Table->Capacity); // Only used to create end().
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+typename HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy,
+                   EqualFuncTy>::HashPair
+    HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::iterator::
+    operator*() {
+  CHECK(Idx >= 0 && Idx < (int)Table->Capacity);
+  CHECK(Entry != nullptr);
+  return HashPair(Entry->Key, Entry->Payload);
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+typename HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy,
+                   EqualFuncTy>::iterator &
+    HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::iterator::
+    operator++() {
+  if (Entry != nullptr)
+    Entry = Entry->Next;
+  while (Entry == nullptr) {
+    ++Idx;
+    if (Idx >= (int)Table->Capacity)
+      break; // At end().
+    Entry = Table->Table[Idx];
+  }
+  return *this;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+typename HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy,
+                   EqualFuncTy>::iterator &
+    HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::iterator::
+    operator++(int) {
+  iterator Temp(*this);
+  operator++();
+  return Temp;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::iterator::
+operator==(const iterator &Cmp) const {
+  return Cmp.Table == Table && Cmp.Idx == Idx && Cmp.Entry == Entry;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::iterator::
+operator!=(const iterator &Cmp) const {
+  return Cmp.Table != Table || Cmp.Idx != Idx || Cmp.Entry != Entry;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+typename HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy,
+                   EqualFuncTy>::iterator
+HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::begin() {
+  return iterator(this);
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+typename HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy,
+                   EqualFuncTy>::iterator
+HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::end() {
+  return iterator(this, Capacity);
+}
+
+} // namespace __esan
diff --git a/lib/esan/esan_interceptors.cpp b/lib/esan/esan_interceptors.cpp
new file mode 100644
index 0000000..647f010
--- /dev/null
+++ b/lib/esan/esan_interceptors.cpp
@@ -0,0 +1,547 @@
+//===-- esan_interceptors.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Interception routines for the esan run-time.
+//===----------------------------------------------------------------------===//
+
+#include "esan.h"
+#include "esan_shadow.h"
+#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_linux.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+
+using namespace __esan; // NOLINT
+
+#define CUR_PC() (StackTrace::GetCurrentPc())
+
+//===----------------------------------------------------------------------===//
+// Interception via sanitizer common interceptors
+//===----------------------------------------------------------------------===//
+
+// Get the per-platform defines for what is possible to intercept
+#include "sanitizer_common/sanitizer_platform_interceptors.h"
+
+// TODO(bruening): tsan disables several interceptors (getpwent, etc.) claiming
+// that interception is a perf hit: should we do the same?
+
+// We have no need to intercept:
+#undef SANITIZER_INTERCEPT_TLS_GET_ADDR
+
+// TODO(bruening): the common realpath interceptor assumes malloc is
+// intercepted!  We should try to parametrize that, though we'll
+// intercept malloc soon ourselves and can then remove this undef.
+#undef SANITIZER_INTERCEPT_REALPATH
+
+// We provide our own version:
+#undef SANITIZER_INTERCEPT_SIGPROCMASK
+
+#define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (!EsanIsInitialized)
+
+#define COMMON_INTERCEPT_FUNCTION(name) INTERCEPT_FUNCTION(name)
+#define COMMON_INTERCEPT_FUNCTION_VER(name, ver)                          \
+  INTERCEPT_FUNCTION_VER(name, ver)
+
+// We must initialize during early interceptors, to support tcmalloc.
+// This means that for some apps we fully initialize prior to
+// __esan_init() being called.
+// We currently do not use ctx.
+#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...)                               \
+  do {                                                                         \
+    if (UNLIKELY(COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)) {                 \
+      if (!UNLIKELY(EsanDuringInit))                                           \
+        initializeLibrary(__esan_which_tool);                                  \
+      return REAL(func)(__VA_ARGS__);                                          \
+    }                                                                          \
+    ctx = nullptr;                                                             \
+    (void)ctx;                                                                 \
+  } while (false)
+
+#define COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, func, ...)                      \
+  COMMON_INTERCEPTOR_ENTER(ctx, func, __VA_ARGS__)
+
+#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size)                         \
+  processRangeAccess(CUR_PC(), (uptr)ptr, size, true)
+
+#define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size)                          \
+  processRangeAccess(CUR_PC(), (uptr)ptr, size, false)
+
+// This is only called if the app explicitly calls exit(), not on
+// a normal exit.
+#define COMMON_INTERCEPTOR_ON_EXIT(ctx) finalizeLibrary()
+
+#define COMMON_INTERCEPTOR_FILE_OPEN(ctx, file, path)                          \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(file);                                                              \
+    (void)(path);                                                              \
+  } while (false)
+#define COMMON_INTERCEPTOR_FILE_CLOSE(ctx, file)                               \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(file);                                                              \
+  } while (false)
+#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle)                    \
+  do {                                                                         \
+    (void)(filename);                                                          \
+    (void)(handle);                                                            \
+  } while (false)
+#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED()                                  \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_ACQUIRE(ctx, u)                                     \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(u);                                                                 \
+  } while (false)
+#define COMMON_INTERCEPTOR_RELEASE(ctx, u)                                     \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(u);                                                                 \
+  } while (false)
+#define COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path)                              \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(path);                                                              \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd)                                 \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(fd);                                                                \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd)                                 \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(fd);                                                                \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd)                                  \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(fd);                                                                \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, newfd)                    \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(fd);                                                                \
+    (void)(newfd);                                                             \
+  } while (false)
+#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name)                          \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(name);                                                              \
+  } while (false)
+#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name)                 \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(thread);                                                            \
+    (void)(name);                                                              \
+  } while (false)
+#define COMMON_INTERCEPTOR_BLOCK_REAL(name) REAL(name)
+#define COMMON_INTERCEPTOR_MUTEX_LOCK(ctx, m)                                  \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(m);                                                                 \
+  } while (false)
+#define COMMON_INTERCEPTOR_MUTEX_UNLOCK(ctx, m)                                \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(m);                                                                 \
+  } while (false)
+#define COMMON_INTERCEPTOR_MUTEX_REPAIR(ctx, m)                                \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(m);                                                                 \
+  } while (false)
+#define COMMON_INTERCEPTOR_HANDLE_RECVMSG(ctx, msg)                            \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(msg);                                                               \
+  } while (false)
+#define COMMON_INTERCEPTOR_USER_CALLBACK_START()                               \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_USER_CALLBACK_END()                                 \
+  do {                                                                         \
+  } while (false)
+
+#include "sanitizer_common/sanitizer_common_interceptors.inc"
+
+//===----------------------------------------------------------------------===//
+// Syscall interception
+//===----------------------------------------------------------------------===//
+
+// We want the caller's PC b/c unlike the other function interceptors these
+// are separate pre and post functions called around the app's syscall().
+
+#define COMMON_SYSCALL_PRE_READ_RANGE(ptr, size)                               \
+  processRangeAccess(GET_CALLER_PC(), (uptr)ptr, size, false)
+
+#define COMMON_SYSCALL_PRE_WRITE_RANGE(ptr, size)                              \
+  do {                                                                         \
+    (void)(ptr);                                                               \
+    (void)(size);                                                              \
+  } while (false)
+
+#define COMMON_SYSCALL_POST_READ_RANGE(ptr, size)                              \
+  do {                                                                         \
+    (void)(ptr);                                                               \
+    (void)(size);                                                              \
+  } while (false)
+
+// The actual amount written is in post, not pre.
+#define COMMON_SYSCALL_POST_WRITE_RANGE(ptr, size)                             \
+  processRangeAccess(GET_CALLER_PC(), (uptr)ptr, size, true)
+
+#define COMMON_SYSCALL_ACQUIRE(addr)                                           \
+  do {                                                                         \
+    (void)(addr);                                                              \
+  } while (false)
+#define COMMON_SYSCALL_RELEASE(addr)                                           \
+  do {                                                                         \
+    (void)(addr);                                                              \
+  } while (false)
+#define COMMON_SYSCALL_FD_CLOSE(fd)                                            \
+  do {                                                                         \
+    (void)(fd);                                                                \
+  } while (false)
+#define COMMON_SYSCALL_FD_ACQUIRE(fd)                                          \
+  do {                                                                         \
+    (void)(fd);                                                                \
+  } while (false)
+#define COMMON_SYSCALL_FD_RELEASE(fd)                                          \
+  do {                                                                         \
+    (void)(fd);                                                                \
+  } while (false)
+#define COMMON_SYSCALL_PRE_FORK()                                              \
+  do {                                                                         \
+  } while (false)
+#define COMMON_SYSCALL_POST_FORK(res)                                          \
+  do {                                                                         \
+    (void)(res);                                                               \
+  } while (false)
+
+#include "sanitizer_common/sanitizer_common_syscalls.inc"
+
+//===----------------------------------------------------------------------===//
+// Custom interceptors
+//===----------------------------------------------------------------------===//
+
+// TODO(bruening): move more of these to the common interception pool as they
+// are shared with tsan and asan.
+// While our other files match LLVM style, here we match sanitizer style as we
+// expect to move these to the common pool.
+
+INTERCEPTOR(char *, strcpy, char *dst, const char *src) { // NOLINT
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, strcpy, dst, src);
+  uptr srclen = internal_strlen(src);
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, srclen + 1);
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, src, srclen + 1);
+  return REAL(strcpy)(dst, src); // NOLINT
+}
+
+INTERCEPTOR(char *, strncpy, char *dst, char *src, uptr n) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, strncpy, dst, src, n);
+  uptr srclen = internal_strnlen(src, n);
+  uptr copied_size = srclen + 1 > n ? n : srclen + 1;
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, copied_size);
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, src, copied_size);
+  return REAL(strncpy)(dst, src, n);
+}
+
+INTERCEPTOR(int, open, const char *name, int flags, int mode) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, open, name, flags, mode);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
+  return REAL(open)(name, flags, mode);
+}
+
+#if SANITIZER_LINUX
+INTERCEPTOR(int, open64, const char *name, int flags, int mode) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, open64, name, flags, mode);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
+  return REAL(open64)(name, flags, mode);
+}
+#define ESAN_MAYBE_INTERCEPT_OPEN64 INTERCEPT_FUNCTION(open64)
+#else
+#define ESAN_MAYBE_INTERCEPT_OPEN64
+#endif
+
+INTERCEPTOR(int, creat, const char *name, int mode) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, creat, name, mode);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
+  return REAL(creat)(name, mode);
+}
+
+#if SANITIZER_LINUX
+INTERCEPTOR(int, creat64, const char *name, int mode) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, creat64, name, mode);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
+  return REAL(creat64)(name, mode);
+}
+#define ESAN_MAYBE_INTERCEPT_CREAT64 INTERCEPT_FUNCTION(creat64)
+#else
+#define ESAN_MAYBE_INTERCEPT_CREAT64
+#endif
+
+INTERCEPTOR(int, unlink, char *path) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, unlink, path);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
+  return REAL(unlink)(path);
+}
+
+INTERCEPTOR(uptr, fread, void *ptr, uptr size, uptr nmemb, void *f) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, fread, ptr, size, nmemb, f);
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size * nmemb);
+  return REAL(fread)(ptr, size, nmemb, f);
+}
+
+INTERCEPTOR(uptr, fwrite, const void *p, uptr size, uptr nmemb, void *f) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, fwrite, p, size, nmemb, f);
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, p, size * nmemb);
+  return REAL(fwrite)(p, size, nmemb, f);
+}
+
+INTERCEPTOR(int, puts, const char *s) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, puts, s);
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s));
+  return REAL(puts)(s);
+}
+
+INTERCEPTOR(int, rmdir, char *path) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, rmdir, path);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
+  return REAL(rmdir)(path);
+}
+
+//===----------------------------------------------------------------------===//
+// Shadow-related interceptors
+//===----------------------------------------------------------------------===//
+
+// These are candidates for sharing with all sanitizers if shadow memory
+// support is also standardized.
+
+INTERCEPTOR(void *, mmap, void *addr, SIZE_T sz, int prot, int flags,
+                 int fd, OFF_T off) {
+  if (UNLIKELY(REAL(mmap) == nullptr)) {
+    // With esan init during interceptor init and a static libc preventing
+    // our early-calloc from triggering, we can end up here before our
+    // REAL pointer is set up.
+    return (void *)internal_mmap(addr, sz, prot, flags, fd, off);
+  }
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, mmap, addr, sz, prot, flags, fd, off);
+  if (!fixMmapAddr(&addr, sz, flags))
+    return (void *)-1;
+  void *result = REAL(mmap)(addr, sz, prot, flags, fd, off);
+  return (void *)checkMmapResult((uptr)result, sz);
+}
+
+#if SANITIZER_LINUX
+INTERCEPTOR(void *, mmap64, void *addr, SIZE_T sz, int prot, int flags,
+                 int fd, OFF64_T off) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, mmap64, addr, sz, prot, flags, fd, off);
+  if (!fixMmapAddr(&addr, sz, flags))
+    return (void *)-1;
+  void *result = REAL(mmap64)(addr, sz, prot, flags, fd, off);
+  return (void *)checkMmapResult((uptr)result, sz);
+}
+#define ESAN_MAYBE_INTERCEPT_MMAP64 INTERCEPT_FUNCTION(mmap64)
+#else
+#define ESAN_MAYBE_INTERCEPT_MMAP64
+#endif
+
+//===----------------------------------------------------------------------===//
+// Signal-related interceptors
+//===----------------------------------------------------------------------===//
+
+#if SANITIZER_LINUX
+typedef void (*signal_handler_t)(int);
+INTERCEPTOR(signal_handler_t, signal, int signum, signal_handler_t handler) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, signal, signum, handler);
+  signal_handler_t result;
+  if (!processSignal(signum, handler, &result))
+    return result;
+  else
+    return REAL(signal)(signum, handler);
+}
+#define ESAN_MAYBE_INTERCEPT_SIGNAL INTERCEPT_FUNCTION(signal)
+#else
+#error Platform not supported
+#define ESAN_MAYBE_INTERCEPT_SIGNAL
+#endif
+
+#if SANITIZER_LINUX
+DECLARE_REAL(int, sigaction, int signum, const struct sigaction *act,
+             struct sigaction *oldact)
+INTERCEPTOR(int, sigaction, int signum, const struct sigaction *act,
+            struct sigaction *oldact) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, sigaction, signum, act, oldact);
+  if (!processSigaction(signum, act, oldact))
+    return 0;
+  else
+    return REAL(sigaction)(signum, act, oldact);
+}
+
+// This is required to properly use internal_sigaction.
+namespace __sanitizer {
+int real_sigaction(int signum, const void *act, void *oldact) {
+  if (REAL(sigaction) == nullptr) {
+    // With an instrumented allocator, this is called during interceptor init
+    // and we need a raw syscall solution.
+    return internal_sigaction_syscall(signum, act, oldact);
+  }
+  return REAL(sigaction)(signum, (const struct sigaction *)act,
+                         (struct sigaction *)oldact);
+}
+} // namespace __sanitizer
+
+#define ESAN_MAYBE_INTERCEPT_SIGACTION INTERCEPT_FUNCTION(sigaction)
+#else
+#error Platform not supported
+#define ESAN_MAYBE_INTERCEPT_SIGACTION
+#endif
+
+#if SANITIZER_LINUX
+INTERCEPTOR(int, sigprocmask, int how, __sanitizer_sigset_t *set,
+            __sanitizer_sigset_t *oldset) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, sigprocmask, how, set, oldset);
+  int res = 0;
+  if (processSigprocmask(how, set, oldset))
+    res = REAL(sigprocmask)(how, set, oldset);
+  if (!res && oldset)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldset, sizeof(*oldset));
+  return res;
+}
+#define ESAN_MAYBE_INTERCEPT_SIGPROCMASK INTERCEPT_FUNCTION(sigprocmask)
+#else
+#define ESAN_MAYBE_INTERCEPT_SIGPROCMASK
+#endif
+
+#if !SANITIZER_WINDOWS
+INTERCEPTOR(int, pthread_sigmask, int how, __sanitizer_sigset_t *set,
+            __sanitizer_sigset_t *oldset) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, pthread_sigmask, how, set, oldset);
+  int res = 0;
+  if (processSigprocmask(how, set, oldset))
+    res = REAL(sigprocmask)(how, set, oldset);
+  if (!res && oldset)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldset, sizeof(*oldset));
+  return res;
+}
+#define ESAN_MAYBE_INTERCEPT_PTHREAD_SIGMASK INTERCEPT_FUNCTION(pthread_sigmask)
+#else
+#define ESAN_MAYBE_INTERCEPT_PTHREAD_SIGMASK
+#endif
+
+//===----------------------------------------------------------------------===//
+// Malloc interceptors
+//===----------------------------------------------------------------------===//
+
+static char early_alloc_buf[128];
+static bool used_early_alloc_buf;
+
+static void *handleEarlyAlloc(uptr size) {
+  // If esan is initialized during an interceptor (which happens with some
+  // tcmalloc implementations that call pthread_mutex_lock), the call from
+  // dlsym to calloc will deadlock.  There is only one such calloc (dlsym
+  // allocates a single pthread key), so we work around it by using a
+  // static buffer for the calloc request.  The loader currently needs
+  // 32 bytes but we size at 128 to allow for future changes.
+  // This solution will also allow us to deliberately intercept malloc & family
+  // in the future (to perform tool actions on each allocation, without
+  // replacing the allocator), as it also solves the problem of intercepting
+  // calloc when it will itself be called before its REAL pointer is
+  // initialized.
+  CHECK(!used_early_alloc_buf && size < sizeof(early_alloc_buf));
+  // We do not handle multiple threads here.  This only happens at process init
+  // time, and while it's possible for a shared library to create early threads
+  // that race here, we consider that to be a corner case extreme enough that
+  // it's not worth the effort to handle.
+  used_early_alloc_buf = true;
+  return (void *)early_alloc_buf;
+}
+
+INTERCEPTOR(void*, calloc, uptr size, uptr n) {
+  if (EsanDuringInit && REAL(calloc) == nullptr)
+    return handleEarlyAlloc(size * n);
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, calloc, size, n);
+  void *res = REAL(calloc)(size, n);
+  // The memory is zeroed and thus is all written.
+  COMMON_INTERCEPTOR_WRITE_RANGE(nullptr, (uptr)res, size * n);
+  return res;
+}
+
+INTERCEPTOR(void, free, void *p) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, free, p);
+  if (p == (void *)early_alloc_buf) {
+    // We expect just a singleton use but we clear this for cleanliness.
+    used_early_alloc_buf = false;
+    return;
+  }
+  REAL(free)(p);
+}
+
+namespace __esan {
+
+void initializeInterceptors() {
+  InitializeCommonInterceptors();
+
+  INTERCEPT_FUNCTION(strcpy); // NOLINT
+  INTERCEPT_FUNCTION(strncpy);
+
+  INTERCEPT_FUNCTION(open);
+  ESAN_MAYBE_INTERCEPT_OPEN64;
+  INTERCEPT_FUNCTION(creat);
+  ESAN_MAYBE_INTERCEPT_CREAT64;
+  INTERCEPT_FUNCTION(unlink);
+  INTERCEPT_FUNCTION(fread);
+  INTERCEPT_FUNCTION(fwrite);
+  INTERCEPT_FUNCTION(puts);
+  INTERCEPT_FUNCTION(rmdir);
+
+  INTERCEPT_FUNCTION(mmap);
+  ESAN_MAYBE_INTERCEPT_MMAP64;
+
+  ESAN_MAYBE_INTERCEPT_SIGNAL;
+  ESAN_MAYBE_INTERCEPT_SIGACTION;
+  ESAN_MAYBE_INTERCEPT_SIGPROCMASK;
+  ESAN_MAYBE_INTERCEPT_PTHREAD_SIGMASK;
+
+  INTERCEPT_FUNCTION(calloc);
+  INTERCEPT_FUNCTION(free);
+
+  // TODO(bruening): intercept routines that other sanitizers intercept that
+  // are not in the common pool or here yet, ideally by adding to the common
+  // pool.  Examples include wcslen and bcopy.
+
+  // TODO(bruening): there are many more libc routines that read or write data
+  // structures that no sanitizer is intercepting: sigaction, strtol, etc.
+}
+
+} // namespace __esan
diff --git a/lib/esan/esan_interface.cpp b/lib/esan/esan_interface.cpp
new file mode 100644
index 0000000..43b3dff
--- /dev/null
+++ b/lib/esan/esan_interface.cpp
@@ -0,0 +1,122 @@
+//===-- esan_interface.cpp ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+//===----------------------------------------------------------------------===//
+
+#include "esan_interface_internal.h"
+#include "esan.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+using namespace __esan; // NOLINT
+
+void __esan_init(ToolType Tool, void *Ptr) {
+  if (Tool != __esan_which_tool) {
+    Printf("ERROR: tool mismatch: %d vs %d\n", Tool, __esan_which_tool);
+    Die();
+  }
+  initializeLibrary(Tool);
+  processCompilationUnitInit(Ptr);
+}
+
+void __esan_exit(void *Ptr) {
+  processCompilationUnitExit(Ptr);
+}
+
+void __esan_aligned_load1(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 1, false);
+}
+
+void __esan_aligned_load2(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 2, false);
+}
+
+void __esan_aligned_load4(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 4, false);
+}
+
+void __esan_aligned_load8(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 8, false);
+}
+
+void __esan_aligned_load16(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 16, false);
+}
+
+void __esan_aligned_store1(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 1, true);
+}
+
+void __esan_aligned_store2(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 2, true);
+}
+
+void __esan_aligned_store4(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 4, true);
+}
+
+void __esan_aligned_store8(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 8, true);
+}
+
+void __esan_aligned_store16(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 16, true);
+}
+
+void __esan_unaligned_load2(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 2, false);
+}
+
+void __esan_unaligned_load4(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 4, false);
+}
+
+void __esan_unaligned_load8(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 8, false);
+}
+
+void __esan_unaligned_load16(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 16, false);
+}
+
+void __esan_unaligned_store2(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 2, true);
+}
+
+void __esan_unaligned_store4(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 4, true);
+}
+
+void __esan_unaligned_store8(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 8, true);
+}
+
+void __esan_unaligned_store16(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 16, true);
+}
+
+void __esan_unaligned_loadN(void *Addr, uptr Size) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, Size, false);
+}
+
+void __esan_unaligned_storeN(void *Addr, uptr Size) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, Size, true);
+}
+
+// Public interface:
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_report() {
+  reportResults();
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE unsigned int __esan_get_sample_count() {
+  return getSampleCount();
+}
+} // extern "C"
diff --git a/lib/esan/esan_interface_internal.h b/lib/esan/esan_interface_internal.h
new file mode 100644
index 0000000..3b915d0
--- /dev/null
+++ b/lib/esan/esan_interface_internal.h
@@ -0,0 +1,80 @@
+//===-- esan_interface_internal.h -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Calls to the functions declared in this header will be inserted by
+// the instrumentation module.
+//===----------------------------------------------------------------------===//
+
+#ifndef ESAN_INTERFACE_INTERNAL_H
+#define ESAN_INTERFACE_INTERNAL_H
+
+#include <sanitizer_common/sanitizer_internal_defs.h>
+
+// This header should NOT include any other headers.
+// All functions in this header are extern "C" and start with __esan_.
+
+extern "C" {
+
+// This should be kept consistent with LLVM's EfficiencySanitizerOptions.
+// The value is passed as a 32-bit integer by the compiler.
+typedef enum Type : u32 {
+  ESAN_None = 0,
+  ESAN_CacheFrag,
+  ESAN_WorkingSet,
+  ESAN_Max,
+} ToolType;
+
+// To handle interceptors that invoke instrumented code prior to
+// __esan_init() being called, the instrumentation module creates this
+// global variable specifying the tool.
+extern ToolType __esan_which_tool;
+
+// This function should be called at the very beginning of the process,
+// before any instrumented code is executed and before any call to malloc.
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_init(ToolType Tool, void *Ptr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_exit(void *Ptr);
+
+// The instrumentation module will insert a call to one of these routines prior
+// to each load and store instruction for which we do not have "fastpath"
+// inlined instrumentation.  These calls constitute the "slowpath" for our
+// tools.  We have separate routines for each type of memory access to enable
+// targeted optimization.
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_load1(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_load2(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_load4(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_load8(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_load16(void *Addr);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_store1(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_store2(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_store4(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_store8(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_store16(void *Addr);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_load2(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_load4(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_load8(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_load16(void *Addr);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_store2(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_store4(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_store8(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_store16(void *Addr);
+
+// These cover unusually-sized accesses.
+SANITIZER_INTERFACE_ATTRIBUTE
+void __esan_unaligned_loadN(void *Addr, uptr Size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __esan_unaligned_storeN(void *Addr, uptr Size);
+
+} // extern "C"
+
+#endif // ESAN_INTERFACE_INTERNAL_H
diff --git a/lib/esan/esan_linux.cpp b/lib/esan/esan_linux.cpp
new file mode 100644
index 0000000..aa961b6
--- /dev/null
+++ b/lib/esan/esan_linux.cpp
@@ -0,0 +1,83 @@
+//===-- esan.cpp ----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Linux-specific code for the Esan run-time.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_FREEBSD || SANITIZER_LINUX
+
+#include "esan.h"
+#include "esan_shadow.h"
+#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include <sys/mman.h>
+#include <errno.h>
+
+namespace __esan {
+
+void verifyAddressSpace() {
+#if SANITIZER_LINUX && defined(__x86_64__)
+  // The kernel determines its mmap base from the stack size limit.
+  // Our Linux 64-bit shadow mapping assumes the stack limit is less than a
+  // terabyte, which keeps the mmap region above 0x7e00'.
+  uptr StackLimit = GetStackSizeLimitInBytes();
+  if (StackSizeIsUnlimited() || StackLimit > MaxStackSize) {
+    VReport(1, "The stack size limit is beyond the maximum supported.\n"
+            "Re-execing with a stack size below 1TB.\n");
+    SetStackSizeLimitInBytes(MaxStackSize);
+    ReExec();
+  }
+#endif
+}
+
+static bool liesWithinSingleAppRegion(uptr Start, SIZE_T Size) {
+  uptr AppStart, AppEnd;
+  for (int i = 0; getAppRegion(i, &AppStart, &AppEnd); ++i) {
+    if (Start >= AppStart && Start + Size - 1 <= AppEnd) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool fixMmapAddr(void **Addr, SIZE_T Size, int Flags) {
+  if (*Addr) {
+    if (!liesWithinSingleAppRegion((uptr)*Addr, Size)) {
+      VPrintf(1, "mmap conflict: [%p-%p) is not in an app region\n",
+              *Addr, (uptr)*Addr + Size);
+      if (Flags & MAP_FIXED) {
+        errno = EINVAL;
+        return false;
+      } else {
+        *Addr = 0;
+      }
+    }
+  }
+  return true;
+}
+
+uptr checkMmapResult(uptr Addr, SIZE_T Size) {
+  if ((void *)Addr == MAP_FAILED)
+    return Addr;
+  if (!liesWithinSingleAppRegion(Addr, Size)) {
+    // FIXME: attempt to dynamically add this as an app region if it
+    // fits our shadow criteria.
+    // We could also try to remap somewhere else.
+    Printf("ERROR: unsupported mapping at [%p-%p)\n", Addr, Addr+Size);
+    Die();
+  }
+  return Addr;
+}
+
+} // namespace __esan
+
+#endif // SANITIZER_FREEBSD || SANITIZER_LINUX
diff --git a/lib/esan/esan_shadow.h b/lib/esan/esan_shadow.h
new file mode 100644
index 0000000..f8f154e
--- /dev/null
+++ b/lib/esan/esan_shadow.h
@@ -0,0 +1,203 @@
+//===-- esan_shadow.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Shadow memory mappings for the esan run-time.
+//===----------------------------------------------------------------------===//
+
+#ifndef ESAN_SHADOW_H
+#define ESAN_SHADOW_H
+
+#include <sanitizer_common/sanitizer_platform.h>
+
+#if SANITIZER_WORDSIZE != 64
+#error Only 64-bit is supported
+#endif
+
+namespace __esan {
+
+#if SANITIZER_LINUX && defined(__x86_64__)
+// Linux x86_64
+//
+// Application memory falls into these 5 regions (ignoring the corner case
+// of PIE with a non-zero PT_LOAD base):
+//
+// [0x00000000'00000000, 0x00000100'00000000) non-PIE + heap
+// [0x00005500'00000000, 0x00005700'00000000) PIE
+// [0x00007e00'00000000, 0x00007fff'ff600000) libraries + stack, part 1
+// [0x00007fff'ff601000, 0x00008000'00000000) libraries + stack, part 2
+// [0xffffffff'ff600000, 0xffffffff'ff601000) vsyscall
+//
+// Although we can ignore the vsyscall for the most part as there are few data
+// references there (other sanitizers ignore it), we enforce a gap inside the
+// library region to distinguish the vsyscall's shadow, considering this gap to
+// be an invalid app region.
+// We disallow application memory outside of those 5 regions.
+// Our regions assume that the stack rlimit is less than a terabyte (otherwise
+// the Linux kernel's default mmap region drops below 0x7e00'), which we enforce
+// at init time (we can support larger and unlimited sizes for shadow
+// scaledowns, but it is difficult for 1:1 mappings).
+//
+// Our shadow memory is scaled from a 1:1 mapping and supports a scale
+// specified at library initialization time that can be any power-of-2
+// scaledown (1x, 2x, 4x, 8x, 16x, etc.).
+//
+// We model our shadow memory after Umbra, a library used by the Dr. Memory
+// tool: https://github.com/DynamoRIO/drmemory/blob/master/umbra/umbra_x64.c.
+// We use Umbra's scheme as it was designed to support different
+// offsets, it supports two different shadow mappings (which we may want to
+// use for future tools), and it ensures that the shadow of a shadow will
+// not overlap either shadow memory or application memory.
+//
+// This formula translates from application memory to shadow memory:
+//
+//   shadow(app) = ((app & 0x00000fff'ffffffff) + offset) >> scale
+//
+// Where the offset for 1:1 is 0x00001300'00000000.  For other scales, the
+// offset is shifted left by the scale, except for scales of 1 and 2 where
+// it must be tweaked in order to pass the double-shadow test
+// (see the "shadow(shadow)" comments below):
+//   scale == 0: 0x00001300'000000000
+//   scale == 1: 0x00002200'000000000
+//   scale == 2: 0x00004400'000000000
+//   scale >= 3: (0x00001300'000000000 << scale)
+//
+// Do not pass in the open-ended end value to the formula as it will fail.
+//
+// The resulting shadow memory regions for a 0 scaling are:
+//
+// [0x00001300'00000000, 0x00001400'00000000)
+// [0x00001800'00000000, 0x00001a00'00000000)
+// [0x00002100'00000000, 0x000022ff'ff600000)
+// [0x000022ff'ff601000, 0x00002300'00000000)
+// [0x000022ff'ff600000, 0x000022ff'ff601000]
+//
+// We also want to ensure that a wild access by the application into the shadow
+// regions will not corrupt our own shadow memory.  shadow(shadow) ends up
+// disjoint from shadow(app):
+//
+// [0x00001600'00000000, 0x00001700'00000000)
+// [0x00001b00'00000000, 0x00001d00'00000000)
+// [0x00001400'00000000, 0x000015ff'ff600000]
+// [0x000015ff'ff601000, 0x00001600'00000000]
+// [0x000015ff'ff600000, 0x000015ff'ff601000]
+
+struct ApplicationRegion {
+  uptr Start;
+  uptr End;
+  bool ShadowMergedWithPrev;
+};
+
+static const struct ApplicationRegion AppRegions[] = {
+  {0x0000000000000000ull, 0x0000010000000000u, false},
+  {0x0000550000000000u,   0x0000570000000000u, false},
+  // We make one shadow mapping to hold the shadow regions for all 3 of these
+  // app regions, as the mappings interleave, and the gap between the 3rd and
+  // 4th scales down below a page.
+  {0x00007e0000000000u,   0x00007fffff600000u, false},
+  {0x00007fffff601000u,   0x0000800000000000u, true},
+  {0xffffffffff600000u,   0xffffffffff601000u, true},
+};
+static const u32 NumAppRegions = sizeof(AppRegions)/sizeof(AppRegions[0]);
+
+// See the comment above: we do not currently support a stack size rlimit
+// equal to or larger than 1TB.
+static const uptr MaxStackSize = (1ULL << 40) - 4096;
+
+class ShadowMapping {
+public:
+  static const uptr Mask = 0x00000fffffffffffu;
+  // The scale and offset vary by tool.
+  uptr Scale;
+  uptr Offset;
+  void initialize(uptr ShadowScale) {
+    static const uptr OffsetArray[3] = {
+        0x0000130000000000u,
+        0x0000220000000000u,
+        0x0000440000000000u,
+    };
+    Scale = ShadowScale;
+    if (Scale <= 2)
+      Offset = OffsetArray[Scale];
+    else
+      Offset = OffsetArray[0] << Scale;
+  }
+};
+extern ShadowMapping Mapping;
+#else
+// We'll want to use templatized functions over the ShadowMapping once
+// we support more platforms.
+#error Platform not supported
+#endif
+
+static inline bool getAppRegion(u32 i, uptr *Start, uptr *End) {
+  if (i >= NumAppRegions)
+    return false;
+  *Start = AppRegions[i].Start;
+  *End = AppRegions[i].End;
+  return true;
+}
+
+ALWAYS_INLINE
+bool isAppMem(uptr Mem) {
+  for (u32 i = 0; i < NumAppRegions; ++i) {
+    if (Mem >= AppRegions[i].Start && Mem < AppRegions[i].End)
+      return true;
+  }
+  return false;
+}
+
+ALWAYS_INLINE
+uptr appToShadow(uptr App) {
+  return (((App & ShadowMapping::Mask) + Mapping.Offset) >> Mapping.Scale);
+}
+
+static inline bool getShadowRegion(u32 i, uptr *Start, uptr *End) {
+  if (i >= NumAppRegions)
+    return false;
+  u32 UnmergedShadowCount = 0;
+  u32 AppIdx;
+  for (AppIdx = 0; AppIdx < NumAppRegions; ++AppIdx) {
+    if (!AppRegions[AppIdx].ShadowMergedWithPrev) {
+      if (UnmergedShadowCount == i)
+        break;
+      UnmergedShadowCount++;
+    }
+  }
+  if (AppIdx >= NumAppRegions || UnmergedShadowCount != i)
+    return false;
+  *Start = appToShadow(AppRegions[AppIdx].Start);
+  // The formula fails for the end itself.
+  *End = appToShadow(AppRegions[AppIdx].End - 1) + 1;
+  // Merge with adjacent shadow regions:
+  for (++AppIdx; AppIdx < NumAppRegions; ++AppIdx) {
+    if (!AppRegions[AppIdx].ShadowMergedWithPrev)
+      break;
+    *Start = Min(*Start, appToShadow(AppRegions[AppIdx].Start));
+    *End = Max(*End, appToShadow(AppRegions[AppIdx].End - 1) + 1);
+  }
+  return true;
+}
+
+ALWAYS_INLINE
+bool isShadowMem(uptr Mem) {
+  // We assume this is not used on any critical performance path and so there's
+  // no need to hardcode the mapping results.
+  for (uptr i = 0; i < NumAppRegions; ++i) {
+    if (Mem >= appToShadow(AppRegions[i].Start) &&
+        Mem < appToShadow(AppRegions[i].End - 1) + 1)
+      return true;
+  }
+  return false;
+}
+
+} // namespace __esan
+
+#endif /* ESAN_SHADOW_H */
diff --git a/lib/esan/esan_sideline.h b/lib/esan/esan_sideline.h
new file mode 100644
index 0000000..aa3fae1
--- /dev/null
+++ b/lib/esan/esan_sideline.h
@@ -0,0 +1,61 @@
+//===-- esan_sideline.h -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Esan sideline thread support.
+//===----------------------------------------------------------------------===//
+
+#ifndef ESAN_SIDELINE_H
+#define ESAN_SIDELINE_H
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __esan {
+
+typedef void (*SidelineFunc)(void *Arg);
+
+// Currently only one sideline thread is supported.
+// It calls the SidelineFunc passed to launchThread once on each sample at the
+// given frequency in real time (i.e., wall clock time).
+class SidelineThread {
+public:
+  // We cannot initialize any fields in the constructor as it will be called
+  // *after* launchThread for a static instance, as esan.module_ctor is called
+  // before static initializers.
+  SidelineThread() {}
+  ~SidelineThread() {}
+
+  // To simplify declaration in sanitizer code where we want to avoid
+  // heap allocations, the constructor and destructor do nothing and
+  // launchThread and joinThread do the real work.
+  // They should each be called just once.
+  bool launchThread(SidelineFunc takeSample, void *Arg, u32 FreqMilliSec);
+  bool joinThread();
+
+  // Must be called from the sideline thread itself.
+  bool adjustTimer(u32 FreqMilliSec);
+
+private:
+  static int runSideline(void *Arg);
+  static void registerSignal(int SigNum);
+  static void handleSidelineSignal(int SigNum, void *SigInfo, void *Ctx);
+
+  char *Stack;
+  SidelineFunc sampleFunc;
+  void *FuncArg;
+  u32 Freq;
+  uptr SidelineId;
+  atomic_uintptr_t SidelineExit;
+};
+
+} // namespace __esan
+
+#endif  // ESAN_SIDELINE_H
diff --git a/lib/esan/esan_sideline_linux.cpp b/lib/esan/esan_sideline_linux.cpp
new file mode 100644
index 0000000..d04f590
--- /dev/null
+++ b/lib/esan/esan_sideline_linux.cpp
@@ -0,0 +1,177 @@
+//===-- esan_sideline_linux.cpp ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Support for a separate or "sideline" tool thread on Linux.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_LINUX
+
+#include "esan_sideline.h"
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_linux.h"
+#include <errno.h>
+#include <sched.h>
+#include <sys/prctl.h>
+#include <sys/signal.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+namespace __esan {
+
+static const int SigAltStackSize = 4*1024;
+static const int SidelineStackSize = 4*1024;
+static const uptr SidelineIdUninitialized = 1;
+
+// FIXME: we'll need some kind of TLS (can we trust that a pthread key will
+// work in our non-POSIX thread?) to access our data in our signal handler
+// with multiple sideline threads.  For now we assume there is only one
+// sideline thread and we use a dirty solution of a global var.
+static SidelineThread *TheThread;
+
+// We aren't passing SA_NODEFER so the same signal is blocked while here.
+void SidelineThread::handleSidelineSignal(int SigNum, void *SigInfo,
+                                          void *Ctx) {
+  VPrintf(3, "Sideline signal %d\n", SigNum);
+  CHECK_EQ(SigNum, SIGALRM);
+  // See above about needing TLS to avoid this global var.
+  SidelineThread *Thread = TheThread;
+  if (atomic_load(&Thread->SidelineExit, memory_order_relaxed) != 0)
+    return;
+  Thread->sampleFunc(Thread->FuncArg);
+}
+
+void SidelineThread::registerSignal(int SigNum) {
+  __sanitizer_sigaction SigAct;
+  internal_memset(&SigAct, 0, sizeof(SigAct));
+  SigAct.sigaction = handleSidelineSignal;
+  // We do not pass SA_NODEFER as we want to block the same signal.
+  SigAct.sa_flags = SA_ONSTACK | SA_SIGINFO;
+  int Res = internal_sigaction(SigNum, &SigAct, nullptr);
+  CHECK_EQ(Res, 0);
+}
+
+int SidelineThread::runSideline(void *Arg) {
+  VPrintf(1, "Sideline thread starting\n");
+  SidelineThread *Thread = static_cast<SidelineThread*>(Arg);
+
+  // If the parent dies, we want to exit also.
+  internal_prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+
+  // Set up a signal handler on an alternate stack for safety.
+  InternalScopedBuffer<char> StackMap(SigAltStackSize);
+  struct sigaltstack SigAltStack;
+  SigAltStack.ss_sp = StackMap.data();
+  SigAltStack.ss_size = SigAltStackSize;
+  SigAltStack.ss_flags = 0;
+  internal_sigaltstack(&SigAltStack, nullptr);
+
+  // We inherit the signal mask from the app thread.  In case
+  // we weren't created at init time, we ensure the mask is empty.
+  __sanitizer_sigset_t SigSet;
+  internal_sigfillset(&SigSet);
+  int Res = internal_sigprocmask(SIG_UNBLOCK, &SigSet, nullptr);
+  CHECK_EQ(Res, 0);
+
+  registerSignal(SIGALRM);
+
+  bool TimerSuccess = Thread->adjustTimer(Thread->Freq);
+  CHECK(TimerSuccess);
+
+  // We loop, doing nothing but handling itimer signals.
+  while (atomic_load(&TheThread->SidelineExit, memory_order_relaxed) == 0)
+    sched_yield();
+
+  if (!Thread->adjustTimer(0))
+    VPrintf(1, "Failed to disable timer\n");
+
+  VPrintf(1, "Sideline thread exiting\n");
+  return 0;
+}
+
+bool SidelineThread::launchThread(SidelineFunc takeSample, void *Arg,
+                                  u32 FreqMilliSec) {
+  // This can only be called once.  However, we can't clear a field in
+  // the constructor and check for that here as the constructor for
+  // a static instance is called *after* our module_ctor and thus after
+  // this routine!  Thus we rely on the TheThread check below.
+  CHECK(TheThread == nullptr); // Only one sideline thread is supported.
+  TheThread = this;
+  sampleFunc = takeSample;
+  FuncArg = Arg;
+  Freq = FreqMilliSec;
+  atomic_store(&SidelineExit, 0, memory_order_relaxed);
+
+  // We do without a guard page.
+  Stack = static_cast<char*>(MmapOrDie(SidelineStackSize, "SidelineStack"));
+  // We need to handle the return value from internal_clone() not having been
+  // assigned yet (for our CHECK in adjustTimer()) so we ensure this has a
+  // sentinel value.
+  SidelineId = SidelineIdUninitialized;
+  // By omitting CLONE_THREAD, the child is in its own thread group and will not
+  // receive any of the application's signals.
+  SidelineId = internal_clone(
+      runSideline, Stack + SidelineStackSize,
+      CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_UNTRACED,
+      this, nullptr /* parent_tidptr */,
+      nullptr /* newtls */, nullptr /* child_tidptr */);
+  int ErrCode;
+  if (internal_iserror(SidelineId, &ErrCode)) {
+    Printf("FATAL: EfficiencySanitizer failed to spawn a thread (code %d).\n",
+           ErrCode);
+    Die();
+    return false; // Not reached.
+  }
+  return true;
+}
+
+bool SidelineThread::joinThread() {
+  VPrintf(1, "Joining sideline thread\n");
+  bool Res = true;
+  atomic_store(&SidelineExit, 1, memory_order_relaxed);
+  while (true) {
+    uptr Status = internal_waitpid(SidelineId, nullptr, __WALL);
+    int ErrCode;
+    if (!internal_iserror(Status, &ErrCode))
+      break;
+    if (ErrCode == EINTR)
+      continue;
+    VPrintf(1, "Failed to join sideline thread (errno %d)\n", ErrCode);
+    Res = false;
+    break;
+  }
+  UnmapOrDie(Stack, SidelineStackSize);
+  return Res;
+}
+
+// Must be called from the sideline thread itself.
+bool SidelineThread::adjustTimer(u32 FreqMilliSec) {
+  // The return value of internal_clone() may not have been assigned yet:
+  CHECK(internal_getpid() == SidelineId ||
+        SidelineId == SidelineIdUninitialized);
+  Freq = FreqMilliSec;
+  struct itimerval TimerVal;
+  TimerVal.it_interval.tv_sec = (time_t) Freq / 1000;
+  TimerVal.it_interval.tv_usec = (time_t) (Freq % 1000) * 1000;
+  TimerVal.it_value.tv_sec = (time_t) Freq / 1000;
+  TimerVal.it_value.tv_usec = (time_t) (Freq % 1000) * 1000;
+  // As we're in a different thread group, we cannot use either
+  // ITIMER_PROF or ITIMER_VIRTUAL without taking up scheduled
+  // time ourselves: thus we must use real time.
+  int Res = setitimer(ITIMER_REAL, &TimerVal, nullptr);
+  return (Res == 0);
+}
+
+} // namespace __esan
+
+#endif // SANITIZER_LINUX
diff --git a/lib/esan/working_set.cpp b/lib/esan/working_set.cpp
new file mode 100644
index 0000000..f391119
--- /dev/null
+++ b/lib/esan/working_set.cpp
@@ -0,0 +1,279 @@
+//===-- working_set.cpp ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// This file contains working-set-specific code.
+//===----------------------------------------------------------------------===//
+
+#include "working_set.h"
+#include "esan.h"
+#include "esan_circular_buffer.h"
+#include "esan_flags.h"
+#include "esan_shadow.h"
+#include "esan_sideline.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+
+// We shadow every cache line of app memory with one shadow byte.
+// - The highest bit of each shadow byte indicates whether the corresponding
+//   cache line has ever been accessed.
+// - The lowest bit of each shadow byte indicates whether the corresponding
+//   cache line was accessed since the last sample.
+// - The other bits are used for working set snapshots at successively
+//   lower frequencies, each bit to the left from the lowest bit stepping
+//   down the frequency by 2 to the power of getFlags()->snapshot_step.
+// Thus we have something like this:
+//   Bit 0: Since last sample
+//   Bit 1: Since last 2^2 samples
+//   Bit 2: Since last 2^4 samples
+//   Bit 3: ...
+//   Bit 7: Ever accessed.
+// We live with races in accessing each shadow byte.
+typedef unsigned char byte;
+
+namespace __esan {
+
+// Our shadow memory assumes that the line size is 64.
+static const u32 CacheLineSize = 64;
+
+// See the shadow byte layout description above.
+static const u32 TotalWorkingSetBitIdx = 7;
+// We accumulate to the left until we hit this bit.
+// We don't need to accumulate to the final bit as it's set on each ref
+// by the compiler instrumentation.
+static const u32 MaxAccumBitIdx = 6;
+static const u32 CurWorkingSetBitIdx = 0;
+static const byte ShadowAccessedVal =
+  (1 << TotalWorkingSetBitIdx) | (1 << CurWorkingSetBitIdx);
+
+static SidelineThread Thread;
+// If we use real-time-based timer samples this won't overflow in any realistic
+// scenario, but if we switch to some other unit (such as memory accesses) we
+// may want to consider a 64-bit int.
+static u32 SnapshotNum;
+
+// We store the wset size for each of 8 different sampling frequencies.
+static const u32 NumFreq = 8; // One for each bit of our shadow bytes.
+// We cannot use static objects as the global destructor is called
+// prior to our finalize routine.
+// These are each circular buffers, sized up front.
+CircularBuffer<u32> SizePerFreq[NumFreq];
+// We cannot rely on static initializers (they may run too late) but
+// we record the size here for clarity:
+u32 CircularBufferSizes[NumFreq] = {
+  // These are each mmap-ed so our minimum is one page.
+  32*1024,
+  16*1024,
+  8*1024,
+  4*1024,
+  4*1024,
+  4*1024,
+  4*1024,
+  4*1024,
+};
+
+void processRangeAccessWorkingSet(uptr PC, uptr Addr, SIZE_T Size,
+                                  bool IsWrite) {
+  if (Size == 0)
+    return;
+  SIZE_T I = 0;
+  uptr LineSize = getFlags()->cache_line_size;
+  // As Addr+Size could overflow at the top of a 32-bit address space,
+  // we avoid the simpler formula that rounds the start and end.
+  SIZE_T NumLines = Size / LineSize +
+    // Add any extra at the start or end adding on an extra line:
+    (LineSize - 1 + Addr % LineSize + Size % LineSize) / LineSize;
+  byte *Shadow = (byte *)appToShadow(Addr);
+  // Write shadow bytes until we're word-aligned.
+  while (I < NumLines && (uptr)Shadow % 4 != 0) {
+    if ((*Shadow & ShadowAccessedVal) != ShadowAccessedVal)
+      *Shadow |= ShadowAccessedVal;
+    ++Shadow;
+    ++I;
+  }
+  // Write whole shadow words at a time.
+  // Using a word-stride loop improves the runtime of a microbenchmark of
+  // memset calls by 10%.
+  u32 WordValue = ShadowAccessedVal | ShadowAccessedVal << 8 |
+    ShadowAccessedVal << 16 | ShadowAccessedVal << 24;
+  while (I + 4 <= NumLines) {
+    if ((*(u32*)Shadow & WordValue) != WordValue)
+      *(u32*)Shadow |= WordValue;
+    Shadow += 4;
+    I += 4;
+  }
+  // Write any trailing shadow bytes.
+  while (I < NumLines) {
+    if ((*Shadow & ShadowAccessedVal) != ShadowAccessedVal)
+      *Shadow |= ShadowAccessedVal;
+    ++Shadow;
+    ++I;
+  }
+}
+
+// This routine will word-align ShadowStart and ShadowEnd prior to scanning.
+// It does *not* clear for BitIdx==TotalWorkingSetBitIdx, as that top bit
+// measures the access during the entire execution and should never be cleared.
+static u32 countAndClearShadowValues(u32 BitIdx, uptr ShadowStart,
+                                     uptr ShadowEnd) {
+  u32 WorkingSetSize = 0;
+  u32 ByteValue = 0x1 << BitIdx;
+  u32 WordValue = ByteValue | ByteValue << 8 | ByteValue << 16 |
+    ByteValue << 24;
+  // Get word aligned start.
+  ShadowStart = RoundDownTo(ShadowStart, sizeof(u32));
+  bool Accum = getFlags()->record_snapshots && BitIdx < MaxAccumBitIdx;
+  // Do not clear the bit that measures access during the entire execution.
+  bool Clear = BitIdx < TotalWorkingSetBitIdx;
+  for (u32 *Ptr = (u32 *)ShadowStart; Ptr < (u32 *)ShadowEnd; ++Ptr) {
+    if ((*Ptr & WordValue) != 0) {
+      byte *BytePtr = (byte *)Ptr;
+      for (u32 j = 0; j < sizeof(u32); ++j) {
+        if (BytePtr[j] & ByteValue) {
+          ++WorkingSetSize;
+          if (Accum) {
+            // Accumulate to the lower-frequency bit to the left.
+            BytePtr[j] |= (ByteValue << 1);
+          }
+        }
+      }
+      if (Clear) {
+        // Clear this bit from every shadow byte.
+        *Ptr &= ~WordValue;
+      }
+    }
+  }
+  return WorkingSetSize;
+}
+
+// Scan shadow memory to calculate the number of cache lines being accessed,
+// i.e., the number of non-zero bits indexed by BitIdx in each shadow byte.
+// We also clear the lowest bits (most recent working set snapshot).
+// We do *not* clear for BitIdx==TotalWorkingSetBitIdx, as that top bit
+// measures the access during the entire execution and should never be cleared.
+static u32 computeWorkingSizeAndReset(u32 BitIdx) {
+  u32 WorkingSetSize = 0;
+  MemoryMappingLayout MemIter(true/*cache*/);
+  uptr Start, End, Prot;
+  while (MemIter.Next(&Start, &End, nullptr/*offs*/, nullptr/*file*/,
+                      0/*file size*/, &Prot)) {
+    VPrintf(4, "%s: considering %p-%p app=%d shadow=%d prot=%u\n",
+            __FUNCTION__, Start, End, Prot, isAppMem(Start),
+            isShadowMem(Start));
+    if (isShadowMem(Start) && (Prot & MemoryMappingLayout::kProtectionWrite)) {
+      VPrintf(3, "%s: walking %p-%p\n", __FUNCTION__, Start, End);
+      WorkingSetSize += countAndClearShadowValues(BitIdx, Start, End);
+    }
+  }
+  return WorkingSetSize;
+}
+
+// This is invoked from a signal handler but in a sideline thread doing nothing
+// else so it is a little less fragile than a typical signal handler.
+static void takeSample(void *Arg) {
+  u32 BitIdx = CurWorkingSetBitIdx;
+  u32 Freq = 1;
+  ++SnapshotNum; // Simpler to skip 0 whose mod matches everything.
+  while (BitIdx <= MaxAccumBitIdx && (SnapshotNum % Freq) == 0) {
+    u32 NumLines = computeWorkingSizeAndReset(BitIdx);
+    VReport(1, "%s: snapshot #%5d bit %d freq %4d: %8u\n", SanitizerToolName,
+            SnapshotNum, BitIdx, Freq, NumLines);
+    SizePerFreq[BitIdx].push_back(NumLines);
+    Freq = Freq << getFlags()->snapshot_step;
+    BitIdx++;
+  }
+}
+
+unsigned int getSampleCountWorkingSet()
+{
+  return SnapshotNum;
+}
+
+// Initialization that must be done before any instrumented code is executed.
+void initializeShadowWorkingSet() {
+  CHECK(getFlags()->cache_line_size == CacheLineSize);
+  registerMemoryFaultHandler();
+}
+
+void initializeWorkingSet() {
+  if (getFlags()->record_snapshots) {
+    for (u32 i = 0; i < NumFreq; ++i)
+      SizePerFreq[i].initialize(CircularBufferSizes[i]);
+    Thread.launchThread(takeSample, nullptr, getFlags()->sample_freq);
+  }
+}
+
+static u32 getPeriodForPrinting(u32 MilliSec, const char *&Unit) {
+  if (MilliSec > 600000) {
+    Unit = "min";
+    return MilliSec / 60000;
+  } else if (MilliSec > 10000) {
+    Unit = "sec";
+    return MilliSec / 1000;
+  } else {
+    Unit = "ms";
+    return MilliSec;
+  }
+}
+
+static u32 getSizeForPrinting(u32 NumOfCachelines, const char *&Unit) {
+  // We need a constant to avoid software divide support:
+  static const u32 KilobyteCachelines = (0x1 << 10) / CacheLineSize;
+  static const u32 MegabyteCachelines = KilobyteCachelines << 10;
+
+  if (NumOfCachelines > 10 * MegabyteCachelines) {
+    Unit = "MB";
+    return NumOfCachelines / MegabyteCachelines;
+  } else if (NumOfCachelines > 10 * KilobyteCachelines) {
+    Unit = "KB";
+    return NumOfCachelines / KilobyteCachelines;
+  } else {
+    Unit = "Bytes";
+    return NumOfCachelines * CacheLineSize;
+  }
+}
+
+void reportWorkingSet() {
+  const char *Unit;
+  if (getFlags()->record_snapshots) {
+    u32 Freq = 1;
+    Report(" Total number of samples: %u\n", SnapshotNum);
+    for (u32 i = 0; i < NumFreq; ++i) {
+      u32 Time = getPeriodForPrinting(getFlags()->sample_freq*Freq, Unit);
+      Report(" Samples array #%d at period %u %s\n", i, Time, Unit);
+      // FIXME: report whether we wrapped around and thus whether we
+      // have data on the whole run or just the last N samples.
+      for (u32 j = 0; j < SizePerFreq[i].size(); ++j) {
+        u32 Size = getSizeForPrinting(SizePerFreq[i][j], Unit);
+        Report("#%4d: %8u %s (%9u cache lines)\n", j, Size, Unit,
+               SizePerFreq[i][j]);
+      }
+      Freq = Freq << getFlags()->snapshot_step;
+    }
+  }
+
+  // Get the working set size for the entire execution.
+  u32 NumOfCachelines = computeWorkingSizeAndReset(TotalWorkingSetBitIdx);
+  u32 Size = getSizeForPrinting(NumOfCachelines, Unit);
+  Report(" %s: the total working set size: %u %s (%u cache lines)\n",
+         SanitizerToolName, Size, Unit, NumOfCachelines);
+}
+
+int finalizeWorkingSet() {
+  if (getFlags()->record_snapshots)
+    Thread.joinThread();
+  reportWorkingSet();
+  if (getFlags()->record_snapshots) {
+    for (u32 i = 0; i < NumFreq; ++i)
+      SizePerFreq[i].free();
+  }
+  return 0;
+}
+
+} // namespace __esan
diff --git a/lib/esan/working_set.h b/lib/esan/working_set.h
new file mode 100644
index 0000000..6a976c3
--- /dev/null
+++ b/lib/esan/working_set.h
@@ -0,0 +1,40 @@
+//===-- working_set.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Header for working-set-specific code.
+//===----------------------------------------------------------------------===//
+
+#ifndef WORKING_SET_H
+#define WORKING_SET_H
+
+#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __esan {
+
+void initializeWorkingSet();
+void initializeShadowWorkingSet();
+int finalizeWorkingSet();
+void reportWorkingSet();
+unsigned int getSampleCountWorkingSet();
+void processRangeAccessWorkingSet(uptr PC, uptr Addr, SIZE_T Size,
+                                  bool IsWrite);
+
+// Platform-dependent.
+void registerMemoryFaultHandler();
+bool processWorkingSetSignal(int SigNum, void (*Handler)(int),
+                             void (**Result)(int));
+bool processWorkingSetSigaction(int SigNum, const void *Act, void *OldAct);
+bool processWorkingSetSigprocmask(int How, void *Set, void *OldSet);
+
+} // namespace __esan
+
+#endif // WORKING_SET_H
diff --git a/lib/esan/working_set_posix.cpp b/lib/esan/working_set_posix.cpp
new file mode 100644
index 0000000..fcfa871
--- /dev/null
+++ b/lib/esan/working_set_posix.cpp
@@ -0,0 +1,133 @@
+//===-- working_set_posix.cpp -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// POSIX-specific working set tool code.
+//===----------------------------------------------------------------------===//
+
+#include "working_set.h"
+#include "esan_flags.h"
+#include "esan_shadow.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_linux.h"
+#include <signal.h>
+#include <sys/mman.h>
+
+namespace __esan {
+
+// We only support regular POSIX threads with a single signal handler
+// for the whole process == thread group.
+// Thus we only need to store one app signal handler.
+// FIXME: Store and use any alternate stack and signal flags set by
+// the app.  For now we just call the app handler from our handler.
+static __sanitizer_sigaction AppSigAct;
+
+bool processWorkingSetSignal(int SigNum, void (*Handler)(int),
+                             void (**Result)(int)) {
+  VPrintf(2, "%s: %d\n", __FUNCTION__, SigNum);
+  if (SigNum == SIGSEGV) {
+    *Result = AppSigAct.handler;
+    AppSigAct.sigaction = (void (*)(int, void*, void*))Handler;
+    return false; // Skip real call.
+  }
+  return true;
+}
+
+bool processWorkingSetSigaction(int SigNum, const void *ActVoid,
+                                void *OldActVoid) {
+  VPrintf(2, "%s: %d\n", __FUNCTION__, SigNum);
+  if (SigNum == SIGSEGV) {
+    const struct sigaction *Act = (const struct sigaction *) ActVoid;
+    struct sigaction *OldAct = (struct sigaction *) OldActVoid;
+    if (OldAct)
+      internal_memcpy(OldAct, &AppSigAct, sizeof(OldAct));
+    if (Act)
+      internal_memcpy(&AppSigAct, Act, sizeof(AppSigAct));
+    return false; // Skip real call.
+  }
+  return true;
+}
+
+bool processWorkingSetSigprocmask(int How, void *Set, void *OldSet) {
+  VPrintf(2, "%s\n", __FUNCTION__);
+  // All we need to do is ensure that SIGSEGV is not blocked.
+  // FIXME: we are not fully transparent as we do not pretend that
+  // SIGSEGV is still blocked on app queries: that would require
+  // per-thread mask tracking.
+  if (Set && (How == SIG_BLOCK || How == SIG_SETMASK)) {
+    if (internal_sigismember((__sanitizer_sigset_t *)Set, SIGSEGV)) {
+      VPrintf(1, "%s: removing SIGSEGV from the blocked set\n", __FUNCTION__);
+      internal_sigdelset((__sanitizer_sigset_t *)Set, SIGSEGV);
+    }
+  }
+  return true;
+}
+
+static void reinstateDefaultHandler(int SigNum) {
+  __sanitizer_sigaction SigAct;
+  internal_memset(&SigAct, 0, sizeof(SigAct));
+  SigAct.sigaction = (void (*)(int, void*, void*)) SIG_DFL;
+  int Res = internal_sigaction(SigNum, &SigAct, nullptr);
+  CHECK(Res == 0);
+  VPrintf(1, "Unregistered for %d handler\n", SigNum);
+}
+
+// If this is a shadow fault, we handle it here; otherwise, we pass it to the
+// app to handle it just as the app would do without our tool in place.
+static void handleMemoryFault(int SigNum, void *Info, void *Ctx) {
+  if (SigNum == SIGSEGV) {
+    // We rely on si_addr being filled in (thus we do not support old kernels).
+    siginfo_t *SigInfo = (siginfo_t *)Info;
+    uptr Addr = (uptr)SigInfo->si_addr;
+    if (isShadowMem(Addr)) {
+      VPrintf(3, "Shadow fault @%p\n", Addr);
+      uptr PageSize = GetPageSizeCached();
+      int Res = internal_mprotect((void *)RoundDownTo(Addr, PageSize),
+                                  PageSize, PROT_READ|PROT_WRITE);
+      CHECK(Res == 0);
+    } else if (AppSigAct.sigaction) {
+      // FIXME: For simplicity we ignore app options including its signal stack
+      // (we just use ours) and all the delivery flags.
+      AppSigAct.sigaction(SigNum, Info, Ctx);
+    } else {
+      // Crash instead of spinning with infinite faults.
+      reinstateDefaultHandler(SigNum);
+    }
+  } else
+    UNREACHABLE("signal not registered");
+}
+
+void registerMemoryFaultHandler() {
+  // We do not use an alternate signal stack, as doing so would require
+  // setting it up for each app thread.
+  // FIXME: This could result in problems with emulating the app's signal
+  // handling if the app relies on an alternate stack for SIGSEGV.
+
+  // We require that SIGSEGV is not blocked.  We use a sigprocmask
+  // interceptor to ensure that in the future.  Here we ensure it for
+  // the current thread.  We assume there are no other threads at this
+  // point during initialization, or that at least they do not block
+  // SIGSEGV.
+  __sanitizer_sigset_t SigSet;
+  internal_sigemptyset(&SigSet);
+  internal_sigprocmask(SIG_BLOCK, &SigSet, nullptr);
+
+  __sanitizer_sigaction SigAct;
+  internal_memset(&SigAct, 0, sizeof(SigAct));
+  SigAct.sigaction = handleMemoryFault;
+  // We want to handle nested signals b/c we need to handle a
+  // shadow fault in an app signal handler.
+  SigAct.sa_flags = SA_SIGINFO | SA_NODEFER;
+  int Res = internal_sigaction(SIGSEGV, &SigAct, &AppSigAct);
+  CHECK(Res == 0);
+  VPrintf(1, "Registered for SIGSEGV handler\n");
+}
+
+} // namespace __esan
diff --git a/lib/interception/CMakeLists.txt b/lib/interception/CMakeLists.txt
index 16b41c9..18d2594 100644
--- a/lib/interception/CMakeLists.txt
+++ b/lib/interception/CMakeLists.txt
@@ -10,10 +10,14 @@
 include_directories(..)
 
 set(INTERCEPTION_CFLAGS ${SANITIZER_COMMON_CFLAGS})
-append_no_rtti_flag(INTERCEPTION_CFLAGS)
+append_rtti_flag(OFF INTERCEPTION_CFLAGS)
 
 add_compiler_rt_object_libraries(RTInterception
     OS ${SANITIZER_COMMON_SUPPORTED_OS}
     ARCHS ${SANITIZER_COMMON_SUPPORTED_ARCH}
     SOURCES ${INTERCEPTION_SOURCES}
     CFLAGS ${INTERCEPTION_CFLAGS})
+
+if(COMPILER_RT_INCLUDE_TESTS)
+  add_subdirectory(tests)
+endif()
diff --git a/lib/interception/Makefile.mk b/lib/interception/Makefile.mk
deleted file mode 100644
index 88aa6cb..0000000
--- a/lib/interception/Makefile.mk
+++ /dev/null
@@ -1,23 +0,0 @@
-#===- lib/interception/Makefile.mk -------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := interception
-SubDirs :=
-
-Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
-ObjNames := $(Sources:%.cc=%.o)
-
-Implementation := Generic
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard $(Dir)/*.h)
-Dependencies += $(wildcard $(Dir)/../sanitizer_common/*.h)
-
-# Define a convenience variable for all the interception functions.
-InterceptionFunctions := $(Sources:%.cc=%)
diff --git a/lib/interception/interception.h b/lib/interception/interception.h
index 9e9aca2..f490c39 100644
--- a/lib/interception/interception.h
+++ b/lib/interception/interception.h
@@ -158,10 +158,12 @@
     namespace __interception { \
       extern FUNC_TYPE(func) PTR_TO_REAL(func); \
     }
+# define ASSIGN_REAL(dst, src) REAL(dst) = REAL(src)
 #else  // __APPLE__
 # define REAL(x) x
 # define DECLARE_REAL(ret_type, func, ...) \
     extern "C" ret_type func(__VA_ARGS__);
+# define ASSIGN_REAL(x, y)
 #endif  // __APPLE__
 
 #define DECLARE_REAL_AND_INTERCEPTOR(ret_type, func, ...) \
diff --git a/lib/interception/interception_win.cc b/lib/interception/interception_win.cc
index 4c04c83..c8d67b9 100644
--- a/lib/interception/interception_win.cc
+++ b/lib/interception/interception_win.cc
@@ -10,19 +10,178 @@
 // This file is a part of AddressSanitizer, an address sanity checker.
 //
 // Windows-specific interception methods.
+//
+// This file is implementing several hooking techniques to intercept calls
+// to functions. The hooks are dynamically installed by modifying the assembly
+// code.
+//
+// The hooking techniques are making assumptions on the way the code is
+// generated and are safe under these assumptions.
+//
+// On 64-bit architecture, there is no direct 64-bit jump instruction. To allow
+// arbitrary branching on the whole memory space, the notion of trampoline
+// region is used. A trampoline region is a memory space withing 2G boundary
+// where it is safe to add custom assembly code to build 64-bit jumps.
+//
+// Hooking techniques
+// ==================
+//
+// 1) Detour
+//
+//    The Detour hooking technique is assuming the presence of an header with
+//    padding and an overridable 2-bytes nop instruction (mov edi, edi). The
+//    nop instruction can safely be replaced by a 2-bytes jump without any need
+//    to save the instruction. A jump to the target is encoded in the function
+//    header and the nop instruction is replaced by a short jump to the header.
+//
+//        head:  5 x nop                 head:  jmp <hook>
+//        func:  mov edi, edi    -->     func:  jmp short <head>
+//               [...]                   real:  [...]
+//
+//    This technique is only implemented on 32-bit architecture.
+//    Most of the time, Windows API are hookable with the detour technique.
+//
+// 2) Redirect Jump
+//
+//    The redirect jump is applicable when the first instruction is a direct
+//    jump. The instruction is replaced by jump to the hook.
+//
+//        func:  jmp <label>     -->     func:  jmp <hook>
+//
+//    On an 64-bit architecture, a trampoline is inserted.
+//
+//        func:  jmp <label>     -->     func:  jmp <tramp>
+//                                              [...]
+//
+//                                   [trampoline]
+//                                      tramp:  jmp QWORD [addr]
+//                                       addr:  .bytes <hook>
+//
+//    Note: <real> is equilavent to <label>.
+//
+// 3) HotPatch
+//
+//    The HotPatch hooking is assuming the presence of an header with padding
+//    and a first instruction with at least 2-bytes.
+//
+//    The reason to enforce the 2-bytes limitation is to provide the minimal
+//    space to encode a short jump. HotPatch technique is only rewriting one
+//    instruction to avoid breaking a sequence of instructions containing a
+//    branching target.
+//
+//    Assumptions are enforced by MSVC compiler by using the /HOTPATCH flag.
+//      see: https://msdn.microsoft.com/en-us/library/ms173507.aspx
+//    Default padding length is 5 bytes in 32-bits and 6 bytes in 64-bits.
+//
+//        head:   5 x nop                head:  jmp <hook>
+//        func:   <instr>        -->     func:  jmp short <head>
+//                [...]                  body:  [...]
+//
+//                                   [trampoline]
+//                                       real:  <instr>
+//                                              jmp <body>
+//
+//    On an 64-bit architecture:
+//
+//        head:   6 x nop                head:  jmp QWORD [addr1]
+//        func:   <instr>        -->     func:  jmp short <head>
+//                [...]                  body:  [...]
+//
+//                                   [trampoline]
+//                                      addr1:  .bytes <hook>
+//                                       real:  <instr>
+//                                              jmp QWORD [addr2]
+//                                      addr2:  .bytes <body>
+//
+// 4) Trampoline
+//
+//    The Trampoline hooking technique is the most aggressive one. It is
+//    assuming that there is a sequence of instructions that can be safely
+//    replaced by a jump (enough room and no incoming branches).
+//
+//    Unfortunately, these assumptions can't be safely presumed and code may
+//    be broken after hooking.
+//
+//        func:   <instr>        -->     func:  jmp <hook>
+//                <instr>
+//                [...]                  body:  [...]
+//
+//                                   [trampoline]
+//                                       real:  <instr>
+//                                              <instr>
+//                                              jmp <body>
+//
+//    On an 64-bit architecture:
+//
+//        func:   <instr>        -->     func:  jmp QWORD [addr1]
+//                <instr>
+//                [...]                  body:  [...]
+//
+//                                   [trampoline]
+//                                      addr1:  .bytes <hook>
+//                                       real:  <instr>
+//                                              <instr>
+//                                              jmp QWORD [addr2]
+//                                      addr2:  .bytes <body>
 //===----------------------------------------------------------------------===//
 
 #ifdef _WIN32
 
 #include "interception.h"
+#include "sanitizer_common/sanitizer_platform.h"
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 
 namespace __interception {
 
+static const int kAddressLength = FIRST_32_SECOND_64(4, 8);
+static const int kJumpInstructionLength = 5;
+static const int kShortJumpInstructionLength = 2;
+static const int kIndirectJumpInstructionLength = 6;
+static const int kBranchLength =
+    FIRST_32_SECOND_64(kJumpInstructionLength, kIndirectJumpInstructionLength);
+static const int kDirectBranchLength = kBranchLength + kAddressLength;
+
+static void InterceptionFailed() {
+  // Do we have a good way to abort with an error message here?
+  __debugbreak();
+}
+
+static bool DistanceIsWithin2Gig(uptr from, uptr target) {
+  if (from < target)
+    return target - from <= (uptr)0x7FFFFFFFU;
+  else
+    return from - target <= (uptr)0x80000000U;
+}
+
+static uptr GetMmapGranularity() {
+  SYSTEM_INFO si;
+  GetSystemInfo(&si);
+  return si.dwAllocationGranularity;
+}
+
+static uptr RoundUpTo(uptr size, uptr boundary) {
+  return (size + boundary - 1) & ~(boundary - 1);
+}
+
 // FIXME: internal_str* and internal_mem* functions should be moved from the
 // ASan sources into interception/.
 
+static size_t _strlen(const char *str) {
+  const char* p = str;
+  while (*p != '\0') ++p;
+  return p - str;
+}
+
+static char* _strchr(char* str, char c) {
+  while (*str) {
+    if (*str == c)
+      return str;
+    ++str;
+  }
+  return nullptr;
+}
+
 static void _memset(void *p, int value, size_t sz) {
   for (size_t i = 0; i < sz; ++i)
     ((char*)p)[i] = (char)value;
@@ -35,163 +194,640 @@
     dst_c[i] = src_c[i];
 }
 
-static void WriteJumpInstruction(char *jmp_from, char *to) {
-  // jmp XXYYZZWW = E9 WW ZZ YY XX, where XXYYZZWW is an offset fromt jmp_from
-  // to the next instruction to the destination.
-  ptrdiff_t offset = to - jmp_from - 5;
-  *jmp_from = '\xE9';
-  *(ptrdiff_t*)(jmp_from + 1) = offset;
+static bool ChangeMemoryProtection(
+    uptr address, uptr size, DWORD *old_protection) {
+  return ::VirtualProtect((void*)address, size,
+                          PAGE_EXECUTE_READWRITE,
+                          old_protection) != FALSE;
 }
 
-static char *GetMemoryForTrampoline(size_t size) {
-  // Trampolines are allocated from a common pool.
-  const int POOL_SIZE = 1024;
-  static char *pool = NULL;
-  static size_t pool_used = 0;
-  if (!pool) {
-    pool = (char *)VirtualAlloc(NULL, POOL_SIZE, MEM_RESERVE | MEM_COMMIT,
-                                PAGE_EXECUTE_READWRITE);
-    // FIXME: Might want to apply PAGE_EXECUTE_READ access after all the
-    // interceptors are in place.
-    if (!pool)
-      return NULL;
-    _memset(pool, 0xCC /* int 3 */, POOL_SIZE);
+static bool RestoreMemoryProtection(
+    uptr address, uptr size, DWORD old_protection) {
+  DWORD unused;
+  return ::VirtualProtect((void*)address, size,
+                          old_protection,
+                          &unused) != FALSE;
+}
+
+static bool IsMemoryPadding(uptr address, uptr size) {
+  u8* function = (u8*)address;
+  for (size_t i = 0; i < size; ++i)
+    if (function[i] != 0x90 && function[i] != 0xCC)
+      return false;
+  return true;
+}
+
+static const u8 kHintNop10Bytes[] = {
+  0x66, 0x66, 0x0F, 0x1F, 0x84,
+  0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+template<class T>
+static bool FunctionHasPrefix(uptr address, const T &pattern) {
+  u8* function = (u8*)address - sizeof(pattern);
+  for (size_t i = 0; i < sizeof(pattern); ++i)
+    if (function[i] != pattern[i])
+      return false;
+  return true;
+}
+
+static bool FunctionHasPadding(uptr address, uptr size) {
+  if (IsMemoryPadding(address - size, size))
+    return true;
+  if (size <= sizeof(kHintNop10Bytes) &&
+      FunctionHasPrefix(address, kHintNop10Bytes))
+    return true;
+  return false;
+}
+
+static void WritePadding(uptr from, uptr size) {
+  _memset((void*)from, 0xCC, (size_t)size);
+}
+
+static void WriteJumpInstruction(uptr from, uptr target) {
+  if (!DistanceIsWithin2Gig(from + kJumpInstructionLength, target))
+    InterceptionFailed();
+  ptrdiff_t offset = target - from - kJumpInstructionLength;
+  *(u8*)from = 0xE9;
+  *(u32*)(from + 1) = offset;
+}
+
+static void WriteShortJumpInstruction(uptr from, uptr target) {
+  sptr offset = target - from - kShortJumpInstructionLength;
+  if (offset < -128 || offset > 127)
+    InterceptionFailed();
+  *(u8*)from = 0xEB;
+  *(u8*)(from + 1) = (u8)offset;
+}
+
+#if SANITIZER_WINDOWS64
+static void WriteIndirectJumpInstruction(uptr from, uptr indirect_target) {
+  // jmp [rip + <offset>] = FF 25 <offset> where <offset> is a relative
+  // offset.
+  // The offset is the distance from then end of the jump instruction to the
+  // memory location containing the targeted address. The displacement is still
+  // 32-bit in x64, so indirect_target must be located within +/- 2GB range.
+  int offset = indirect_target - from - kIndirectJumpInstructionLength;
+  if (!DistanceIsWithin2Gig(from + kIndirectJumpInstructionLength,
+                            indirect_target)) {
+    InterceptionFailed();
+  }
+  *(u16*)from = 0x25FF;
+  *(u32*)(from + 2) = offset;
+}
+#endif
+
+static void WriteBranch(
+    uptr from, uptr indirect_target, uptr target) {
+#if SANITIZER_WINDOWS64
+  WriteIndirectJumpInstruction(from, indirect_target);
+  *(u64*)indirect_target = target;
+#else
+  (void)indirect_target;
+  WriteJumpInstruction(from, target);
+#endif
+}
+
+static void WriteDirectBranch(uptr from, uptr target) {
+#if SANITIZER_WINDOWS64
+  // Emit an indirect jump through immediately following bytes:
+  //   jmp [rip + kBranchLength]
+  //   .quad <target>
+  WriteBranch(from, from + kBranchLength, target);
+#else
+  WriteJumpInstruction(from, target);
+#endif
+}
+
+struct TrampolineMemoryRegion {
+  uptr content;
+  uptr allocated_size;
+  uptr max_size;
+};
+
+static const uptr kTrampolineScanLimitRange = 1 << 31;  // 2 gig
+static const int kMaxTrampolineRegion = 1024;
+static TrampolineMemoryRegion TrampolineRegions[kMaxTrampolineRegion];
+
+static void *AllocateTrampolineRegion(uptr image_address, size_t granularity) {
+#if SANITIZER_WINDOWS64
+  uptr address = image_address;
+  uptr scanned = 0;
+  while (scanned < kTrampolineScanLimitRange) {
+    MEMORY_BASIC_INFORMATION info;
+    if (!::VirtualQuery((void*)address, &info, sizeof(info)))
+      return nullptr;
+
+    // Check whether a region can be allocated at |address|.
+    if (info.State == MEM_FREE && info.RegionSize >= granularity) {
+      void *page = ::VirtualAlloc((void*)RoundUpTo(address, granularity),
+                                  granularity,
+                                  MEM_RESERVE | MEM_COMMIT,
+                                  PAGE_EXECUTE_READWRITE);
+      return page;
+    }
+
+    // Move to the next region.
+    address = (uptr)info.BaseAddress + info.RegionSize;
+    scanned += info.RegionSize;
+  }
+  return nullptr;
+#else
+  return ::VirtualAlloc(nullptr,
+                        granularity,
+                        MEM_RESERVE | MEM_COMMIT,
+                        PAGE_EXECUTE_READWRITE);
+#endif
+}
+
+// Used by unittests to release mapped memory space.
+void TestOnlyReleaseTrampolineRegions() {
+  for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) {
+    TrampolineMemoryRegion *current = &TrampolineRegions[bucket];
+    if (current->content == 0)
+      return;
+    ::VirtualFree((void*)current->content, 0, MEM_RELEASE);
+    current->content = 0;
+  }
+}
+
+static uptr AllocateMemoryForTrampoline(uptr image_address, size_t size) {
+  // Find a region within 2G with enough space to allocate |size| bytes.
+  TrampolineMemoryRegion *region = nullptr;
+  for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) {
+    TrampolineMemoryRegion* current = &TrampolineRegions[bucket];
+    if (current->content == 0) {
+      // No valid region found, allocate a new region.
+      size_t bucket_size = GetMmapGranularity();
+      void *content = AllocateTrampolineRegion(image_address, bucket_size);
+      if (content == nullptr)
+        return 0U;
+
+      current->content = (uptr)content;
+      current->allocated_size = 0;
+      current->max_size = bucket_size;
+      region = current;
+      break;
+    } else if (current->max_size - current->allocated_size > size) {
+#if SANITIZER_WINDOWS64
+        // In 64-bits, the memory space must be allocated within 2G boundary.
+        uptr next_address = current->content + current->allocated_size;
+        if (next_address < image_address ||
+            next_address - image_address >= 0x7FFF0000)
+          continue;
+#endif
+      // The space can be allocated in the current region.
+      region = current;
+      break;
+    }
   }
 
-  if (pool_used + size > POOL_SIZE)
-    return NULL;
+  // Failed to find a region.
+  if (region == nullptr)
+    return 0U;
 
-  char *ret = pool + pool_used;
-  pool_used += size;
-  return ret;
+  // Allocate the space in the current region.
+  uptr allocated_space = region->content + region->allocated_size;
+  region->allocated_size += size;
+  WritePadding(allocated_space, size);
+
+  return allocated_space;
 }
 
 // Returns 0 on error.
-static size_t RoundUpToInstrBoundary(size_t size, char *code) {
-  size_t cursor = 0;
-  while (cursor < size) {
-    switch (code[cursor]) {
-      case '\x51':  // push ecx
-      case '\x52':  // push edx
-      case '\x53':  // push ebx
-      case '\x54':  // push esp
-      case '\x55':  // push ebp
-      case '\x56':  // push esi
-      case '\x57':  // push edi
-      case '\x5D':  // pop ebp
-        cursor++;
-        continue;
-      case '\x6A':  // 6A XX = push XX
-        cursor += 2;
-        continue;
-      case '\xE9':  // E9 XX YY ZZ WW = jmp WWZZYYXX
-      case '\xB8':  // B8 XX YY ZZ WW = mov eax, WWZZYYXX
-        cursor += 5;
-        continue;
-    }
-    switch (*(unsigned short*)(code + cursor)) {  // NOLINT
-      case 0xFF8B:  // 8B FF = mov edi, edi
-      case 0xEC8B:  // 8B EC = mov ebp, esp
-      case 0xC033:  // 33 C0 = xor eax, eax
-        cursor += 2;
-        continue;
-      case 0x458B:  // 8B 45 XX = mov eax, dword ptr [ebp+XXh]
-      case 0x5D8B:  // 8B 5D XX = mov ebx, dword ptr [ebp+XXh]
-      case 0xEC83:  // 83 EC XX = sub esp, XX
-      case 0x75FF:  // FF 75 XX = push dword ptr [ebp+XXh]
-        cursor += 3;
-        continue;
-      case 0xC1F7:  // F7 C1 XX YY ZZ WW = test ecx, WWZZYYXX
-      case 0x25FF:  // FF 25 XX YY ZZ WW = jmp dword ptr ds:[WWZZYYXX]
-        cursor += 6;
-        continue;
-      case 0x3D83:  // 83 3D XX YY ZZ WW TT = cmp TT, WWZZYYXX
-        cursor += 7;
-        continue;
-    }
-    switch (0x00FFFFFF & *(unsigned int*)(code + cursor)) {
-      case 0x24448A:  // 8A 44 24 XX = mov eal, dword ptr [esp+XXh]
-      case 0x24448B:  // 8B 44 24 XX = mov eax, dword ptr [esp+XXh]
-      case 0x244C8B:  // 8B 4C 24 XX = mov ecx, dword ptr [esp+XXh]
-      case 0x24548B:  // 8B 54 24 XX = mov edx, dword ptr [esp+XXh]
-      case 0x24748B:  // 8B 74 24 XX = mov esi, dword ptr [esp+XXh]
-      case 0x247C8B:  // 8B 7C 24 XX = mov edi, dword ptr [esp+XXh]
-        cursor += 4;
-        continue;
-    }
-
-    // Unknown instruction!
-    // FIXME: Unknown instruction failures might happen when we add a new
-    // interceptor or a new compiler version. In either case, they should result
-    // in visible and readable error messages. However, merely calling abort()
-    // leads to an infinite recursion in CheckFailed.
-    // Do we have a good way to abort with an error message here?
-    __debugbreak();
-    return 0;
+static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
+  switch (*(u64*)address) {
+    case 0x90909090909006EB:  // stub: jmp over 6 x nop.
+      return 8;
   }
 
+  switch (*(u8*)address) {
+    case 0x90:  // 90 : nop
+      return 1;
+
+    case 0x50:  // push eax / rax
+    case 0x51:  // push ecx / rcx
+    case 0x52:  // push edx / rdx
+    case 0x53:  // push ebx / rbx
+    case 0x54:  // push esp / rsp
+    case 0x55:  // push ebp / rbp
+    case 0x56:  // push esi / rsi
+    case 0x57:  // push edi / rdi
+    case 0x5D:  // pop ebp / rbp
+      return 1;
+
+    case 0x6A:  // 6A XX = push XX
+      return 2;
+
+    case 0xb8:  // b8 XX XX XX XX : mov eax, XX XX XX XX
+    case 0xB9:  // b9 XX XX XX XX : mov ecx, XX XX XX XX
+      return 5;
+
+    // Cannot overwrite control-instruction. Return 0 to indicate failure.
+    case 0xE9:  // E9 XX XX XX XX : jmp <label>
+    case 0xE8:  // E8 XX XX XX XX : call <func>
+    case 0xC3:  // C3 : ret
+    case 0xEB:  // EB XX : jmp XX (short jump)
+    case 0x70:  // 7Y YY : jy XX (short conditional jump)
+    case 0x71:
+    case 0x72:
+    case 0x73:
+    case 0x74:
+    case 0x75:
+    case 0x76:
+    case 0x77:
+    case 0x78:
+    case 0x79:
+    case 0x7A:
+    case 0x7B:
+    case 0x7C:
+    case 0x7D:
+    case 0x7E:
+    case 0x7F:
+      return 0;
+  }
+
+  switch (*(u16*)(address)) {
+    case 0xFF8B:  // 8B FF : mov edi, edi
+    case 0xEC8B:  // 8B EC : mov ebp, esp
+    case 0xc889:  // 89 C8 : mov eax, ecx
+    case 0xC18B:  // 8B C1 : mov eax, ecx
+    case 0xC033:  // 33 C0 : xor eax, eax
+    case 0xC933:  // 33 C9 : xor ecx, ecx
+    case 0xD233:  // 33 D2 : xor edx, edx
+      return 2;
+
+    // Cannot overwrite control-instruction. Return 0 to indicate failure.
+    case 0x25FF:  // FF 25 XX XX XX XX : jmp [XXXXXXXX]
+      return 0;
+  }
+
+  switch (0x00FFFFFF & *(u32*)address) {
+    case 0x24A48D:  // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX]
+      return 7;
+  }
+
+#if SANITIZER_WINDOWS64
+  switch (*(u8*)address) {
+    case 0xA1:  // A1 XX XX XX XX XX XX XX XX :
+                //   movabs eax, dword ptr ds:[XXXXXXXX]
+      return 8;
+  }
+
+  switch (*(u16*)address) {
+    case 0x5040:  // push rax
+    case 0x5140:  // push rcx
+    case 0x5240:  // push rdx
+    case 0x5340:  // push rbx
+    case 0x5440:  // push rsp
+    case 0x5540:  // push rbp
+    case 0x5640:  // push rsi
+    case 0x5740:  // push rdi
+    case 0x5441:  // push r12
+    case 0x5541:  // push r13
+    case 0x5641:  // push r14
+    case 0x5741:  // push r15
+    case 0x9066:  // Two-byte NOP
+      return 2;
+  }
+
+  switch (0x00FFFFFF & *(u32*)address) {
+    case 0xe58948:    // 48 8b c4 : mov rbp, rsp
+    case 0xc18b48:    // 48 8b c1 : mov rax, rcx
+    case 0xc48b48:    // 48 8b c4 : mov rax, rsp
+    case 0xd9f748:    // 48 f7 d9 : neg rcx
+    case 0xd12b48:    // 48 2b d1 : sub rdx, rcx
+    case 0x07c1f6:    // f6 c1 07 : test cl, 0x7
+    case 0xc0854d:    // 4d 85 c0 : test r8, r8
+    case 0xc2b60f:    // 0f b6 c2 : movzx eax, dl
+    case 0xc03345:    // 45 33 c0 : xor r8d, r8d
+    case 0xdb3345:    // 45 33 DB : xor r11d, r11d
+    case 0xd98b4c:    // 4c 8b d9 : mov r11, rcx
+    case 0xd28b4c:    // 4c 8b d2 : mov r10, rdx
+    case 0xc98b4c:    // 4C 8B C9 : mov r9, rcx
+    case 0xd2b60f:    // 0f b6 d2 : movzx edx, dl
+    case 0xca2b48:    // 48 2b ca : sub rcx, rdx
+    case 0x10b70f:    // 0f b7 10 : movzx edx, WORD PTR [rax]
+    case 0xc00b4d:    // 3d 0b c0 : or r8, r8
+    case 0xd18b48:    // 48 8b d1 : mov rdx, rcx
+    case 0xdc8b4c:    // 4c 8b dc : mov r11, rsp
+    case 0xd18b4c:    // 4c 8b d1 : mov r10, rcx
+      return 3;
+
+    case 0xec8348:    // 48 83 ec XX : sub rsp, XX
+    case 0xf88349:    // 49 83 f8 XX : cmp r8, XX
+    case 0x588948:    // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx
+      return 4;
+
+    case 0xec8148:    // 48 81 EC XX XX XX XX : sub rsp, XXXXXXXX
+      return 7;
+
+    case 0x058b48:    // 48 8b 05 XX XX XX XX :
+                      //   mov rax, QWORD PTR [rip + XXXXXXXX]
+    case 0x25ff48:    // 48 ff 25 XX XX XX XX :
+                      //   rex.W jmp QWORD PTR [rip + XXXXXXXX]
+
+      // Instructions having offset relative to 'rip' need offset adjustment.
+      if (rel_offset)
+        *rel_offset = 3;
+      return 7;
+
+    case 0x2444c7:    // C7 44 24 XX YY YY YY YY
+                      //   mov dword ptr [rsp + XX], YYYYYYYY
+      return 8;
+  }
+
+  switch (*(u32*)(address)) {
+    case 0x24448b48:  // 48 8b 44 24 XX : mov rax, QWORD ptr [rsp + XX]
+    case 0x246c8948:  // 48 89 6C 24 XX : mov QWORD ptr [rsp + XX], rbp
+    case 0x245c8948:  // 48 89 5c 24 XX : mov QWORD PTR [rsp + XX], rbx
+    case 0x24748948:  // 48 89 74 24 XX : mov QWORD PTR [rsp + XX], rsi
+      return 5;
+  }
+
+#else
+
+  switch (*(u8*)address) {
+    case 0xA1:  // A1 XX XX XX XX :  mov eax, dword ptr ds:[XXXXXXXX]
+      return 5;
+  }
+  switch (*(u16*)address) {
+    case 0x458B:  // 8B 45 XX : mov eax, dword ptr [ebp + XX]
+    case 0x5D8B:  // 8B 5D XX : mov ebx, dword ptr [ebp + XX]
+    case 0x7D8B:  // 8B 7D XX : mov edi, dword ptr [ebp + XX]
+    case 0xEC83:  // 83 EC XX : sub esp, XX
+    case 0x75FF:  // FF 75 XX : push dword ptr [ebp + XX]
+      return 3;
+    case 0xC1F7:  // F7 C1 XX YY ZZ WW : test ecx, WWZZYYXX
+    case 0x25FF:  // FF 25 XX YY ZZ WW : jmp dword ptr ds:[WWZZYYXX]
+      return 6;
+    case 0x3D83:  // 83 3D XX YY ZZ WW TT : cmp TT, WWZZYYXX
+      return 7;
+    case 0x7D83:  // 83 7D XX YY : cmp dword ptr [ebp + XX], YY
+      return 4;
+  }
+
+  switch (0x00FFFFFF & *(u32*)address) {
+    case 0x24448A:  // 8A 44 24 XX : mov eal, dword ptr [esp + XX]
+    case 0x24448B:  // 8B 44 24 XX : mov eax, dword ptr [esp + XX]
+    case 0x244C8B:  // 8B 4C 24 XX : mov ecx, dword ptr [esp + XX]
+    case 0x24548B:  // 8B 54 24 XX : mov edx, dword ptr [esp + XX]
+    case 0x24748B:  // 8B 74 24 XX : mov esi, dword ptr [esp + XX]
+    case 0x247C8B:  // 8B 7C 24 XX : mov edi, dword ptr [esp + XX]
+      return 4;
+  }
+
+  switch (*(u32*)address) {
+    case 0x2444B60F:  // 0F B6 44 24 XX : movzx eax, byte ptr [esp + XX]
+      return 5;
+  }
+#endif
+
+  // Unknown instruction!
+  // FIXME: Unknown instruction failures might happen when we add a new
+  // interceptor or a new compiler version. In either case, they should result
+  // in visible and readable error messages. However, merely calling abort()
+  // leads to an infinite recursion in CheckFailed.
+  InterceptionFailed();
+  return 0;
+}
+
+// Returns 0 on error.
+static size_t RoundUpToInstrBoundary(size_t size, uptr address) {
+  size_t cursor = 0;
+  while (cursor < size) {
+    size_t instruction_size = GetInstructionSize(address + cursor);
+    if (!instruction_size)
+      return 0;
+    cursor += instruction_size;
+  }
   return cursor;
 }
 
-bool OverrideFunction(uptr old_func, uptr new_func, uptr *orig_old_func) {
-#ifdef _WIN64
-#error OverrideFunction is not yet supported on x64
+static bool CopyInstructions(uptr to, uptr from, size_t size) {
+  size_t cursor = 0;
+  while (cursor != size) {
+    size_t rel_offset = 0;
+    size_t instruction_size = GetInstructionSize(from + cursor, &rel_offset);
+    _memcpy((void*)(to + cursor), (void*)(from + cursor),
+            (size_t)instruction_size);
+    if (rel_offset) {
+      uptr delta = to - from;
+      uptr relocated_offset = *(u32*)(to + cursor + rel_offset) - delta;
+#if SANITIZER_WINDOWS64
+      if (relocated_offset + 0x80000000U >= 0xFFFFFFFFU)
+        return false;
 #endif
-  // Function overriding works basically like this:
-  // We write "jmp <new_func>" (5 bytes) at the beginning of the 'old_func'
-  // to override it.
-  // We might want to be able to execute the original 'old_func' from the
-  // wrapper, in this case we need to keep the leading 5+ bytes ('head')
-  // of the original code somewhere with a "jmp <old_func+head>".
-  // We call these 'head'+5 bytes of instructions a "trampoline".
-  char *old_bytes = (char *)old_func;
-
-  // We'll need at least 5 bytes for a 'jmp'.
-  size_t head = 5;
-  if (orig_old_func) {
-    // Find out the number of bytes of the instructions we need to copy
-    // to the trampoline and store it in 'head'.
-    head = RoundUpToInstrBoundary(head, old_bytes);
-    if (!head)
-      return false;
-
-    // Put the needed instructions into the trampoline bytes.
-    char *trampoline = GetMemoryForTrampoline(head + 5);
-    if (!trampoline)
-      return false;
-    _memcpy(trampoline, old_bytes, head);
-    WriteJumpInstruction(trampoline + head, old_bytes + head);
-    *orig_old_func = (uptr)trampoline;
+      *(u32*)(to + cursor + rel_offset) = relocated_offset;
+    }
+    cursor += instruction_size;
   }
+  return true;
+}
 
-  // Now put the "jmp <new_func>" instruction at the original code location.
-  // We should preserve the EXECUTE flag as some of our own code might be
-  // located in the same page (sic!).  FIXME: might consider putting the
-  // __interception code into a separate section or something?
-  DWORD old_prot, unused_prot;
-  if (!VirtualProtect((void *)old_bytes, head, PAGE_EXECUTE_READWRITE,
-                      &old_prot))
+
+#if !SANITIZER_WINDOWS64
+bool OverrideFunctionWithDetour(
+    uptr old_func, uptr new_func, uptr *orig_old_func) {
+  const int kDetourHeaderLen = 5;
+  const u16 kDetourInstruction = 0xFF8B;
+
+  uptr header = (uptr)old_func - kDetourHeaderLen;
+  uptr patch_length = kDetourHeaderLen + kShortJumpInstructionLength;
+
+  // Validate that the function is hookable.
+  if (*(u16*)old_func != kDetourInstruction ||
+      !IsMemoryPadding(header, kDetourHeaderLen))
     return false;
 
-  WriteJumpInstruction(old_bytes, (char *)new_func);
-  _memset(old_bytes + 5, 0xCC /* int 3 */, head - 5);
+  // Change memory protection to writable.
+  DWORD protection = 0;
+  if (!ChangeMemoryProtection(header, patch_length, &protection))
+    return false;
 
-  // Restore the original permissions.
-  if (!VirtualProtect((void *)old_bytes, head, old_prot, &unused_prot))
-    return false;  // not clear if this failure bothers us.
+  // Write a relative jump to the redirected function.
+  WriteJumpInstruction(header, new_func);
+
+  // Write the short jump to the function prefix.
+  WriteShortJumpInstruction(old_func, header);
+
+  // Restore previous memory protection.
+  if (!RestoreMemoryProtection(header, patch_length, protection))
+    return false;
+
+  if (orig_old_func)
+    *orig_old_func = old_func + kShortJumpInstructionLength;
+
+  return true;
+}
+#endif
+
+bool OverrideFunctionWithRedirectJump(
+    uptr old_func, uptr new_func, uptr *orig_old_func) {
+  // Check whether the first instruction is a relative jump.
+  if (*(u8*)old_func != 0xE9)
+    return false;
+
+  if (orig_old_func) {
+    uptr relative_offset = *(u32*)(old_func + 1);
+    uptr absolute_target = old_func + relative_offset + kJumpInstructionLength;
+    *orig_old_func = absolute_target;
+  }
+
+#if SANITIZER_WINDOWS64
+  // If needed, get memory space for a trampoline jump.
+  uptr trampoline = AllocateMemoryForTrampoline(old_func, kDirectBranchLength);
+  if (!trampoline)
+    return false;
+  WriteDirectBranch(trampoline, new_func);
+#endif
+
+  // Change memory protection to writable.
+  DWORD protection = 0;
+  if (!ChangeMemoryProtection(old_func, kJumpInstructionLength, &protection))
+    return false;
+
+  // Write a relative jump to the redirected function.
+  WriteJumpInstruction(old_func, FIRST_32_SECOND_64(new_func, trampoline));
+
+  // Restore previous memory protection.
+  if (!RestoreMemoryProtection(old_func, kJumpInstructionLength, protection))
+    return false;
 
   return true;
 }
 
+bool OverrideFunctionWithHotPatch(
+    uptr old_func, uptr new_func, uptr *orig_old_func) {
+  const int kHotPatchHeaderLen = kBranchLength;
+
+  uptr header = (uptr)old_func - kHotPatchHeaderLen;
+  uptr patch_length = kHotPatchHeaderLen + kShortJumpInstructionLength;
+
+  // Validate that the function is hot patchable.
+  size_t instruction_size = GetInstructionSize(old_func);
+  if (instruction_size < kShortJumpInstructionLength ||
+      !FunctionHasPadding(old_func, kHotPatchHeaderLen))
+    return false;
+
+  if (orig_old_func) {
+    // Put the needed instructions into the trampoline bytes.
+    uptr trampoline_length = instruction_size + kDirectBranchLength;
+    uptr trampoline = AllocateMemoryForTrampoline(old_func, trampoline_length);
+    if (!trampoline)
+      return false;
+    if (!CopyInstructions(trampoline, old_func, instruction_size))
+      return false;
+    WriteDirectBranch(trampoline + instruction_size,
+                      old_func + instruction_size);
+    *orig_old_func = trampoline;
+  }
+
+  // If needed, get memory space for indirect address.
+  uptr indirect_address = 0;
+#if SANITIZER_WINDOWS64
+  indirect_address = AllocateMemoryForTrampoline(old_func, kAddressLength);
+  if (!indirect_address)
+    return false;
+#endif
+
+  // Change memory protection to writable.
+  DWORD protection = 0;
+  if (!ChangeMemoryProtection(header, patch_length, &protection))
+    return false;
+
+  // Write jumps to the redirected function.
+  WriteBranch(header, indirect_address, new_func);
+  WriteShortJumpInstruction(old_func, header);
+
+  // Restore previous memory protection.
+  if (!RestoreMemoryProtection(header, patch_length, protection))
+    return false;
+
+  return true;
+}
+
+bool OverrideFunctionWithTrampoline(
+    uptr old_func, uptr new_func, uptr *orig_old_func) {
+
+  size_t instructions_length = kBranchLength;
+  size_t padding_length = 0;
+  uptr indirect_address = 0;
+
+  if (orig_old_func) {
+    // Find out the number of bytes of the instructions we need to copy
+    // to the trampoline.
+    instructions_length = RoundUpToInstrBoundary(kBranchLength, old_func);
+    if (!instructions_length)
+      return false;
+
+    // Put the needed instructions into the trampoline bytes.
+    uptr trampoline_length = instructions_length + kDirectBranchLength;
+    uptr trampoline = AllocateMemoryForTrampoline(old_func, trampoline_length);
+    if (!trampoline)
+      return false;
+    if (!CopyInstructions(trampoline, old_func, instructions_length))
+      return false;
+    WriteDirectBranch(trampoline + instructions_length,
+                      old_func + instructions_length);
+    *orig_old_func = trampoline;
+  }
+
+#if SANITIZER_WINDOWS64
+  // Check if the targeted address can be encoded in the function padding.
+  // Otherwise, allocate it in the trampoline region.
+  if (IsMemoryPadding(old_func - kAddressLength, kAddressLength)) {
+    indirect_address = old_func - kAddressLength;
+    padding_length = kAddressLength;
+  } else {
+    indirect_address = AllocateMemoryForTrampoline(old_func, kAddressLength);
+    if (!indirect_address)
+      return false;
+  }
+#endif
+
+  // Change memory protection to writable.
+  uptr patch_address = old_func - padding_length;
+  uptr patch_length = instructions_length + padding_length;
+  DWORD protection = 0;
+  if (!ChangeMemoryProtection(patch_address, patch_length, &protection))
+    return false;
+
+  // Patch the original function.
+  WriteBranch(old_func, indirect_address, new_func);
+
+  // Restore previous memory protection.
+  if (!RestoreMemoryProtection(patch_address, patch_length, protection))
+    return false;
+
+  return true;
+}
+
+bool OverrideFunction(
+    uptr old_func, uptr new_func, uptr *orig_old_func) {
+#if !SANITIZER_WINDOWS64
+  if (OverrideFunctionWithDetour(old_func, new_func, orig_old_func))
+    return true;
+#endif
+  if (OverrideFunctionWithRedirectJump(old_func, new_func, orig_old_func))
+    return true;
+  if (OverrideFunctionWithHotPatch(old_func, new_func, orig_old_func))
+    return true;
+  if (OverrideFunctionWithTrampoline(old_func, new_func, orig_old_func))
+    return true;
+  return false;
+}
+
 static void **InterestingDLLsAvailable() {
-  const char *InterestingDLLs[] = {
-    "kernel32.dll",
-    "msvcr110.dll", // VS2012
-    "msvcr120.dll", // VS2013
-    // NTDLL should go last as it exports some functions that we should override
-    // in the CRT [presumably only used internally].
-    "ntdll.dll", NULL
-  };
+  static const char *InterestingDLLs[] = {
+      "kernel32.dll",
+      "msvcr110.dll",      // VS2012
+      "msvcr120.dll",      // VS2013
+      "vcruntime140.dll",  // VS2015
+      "ucrtbase.dll",      // Universal CRT
+      // NTDLL should go last as it exports some functions that we should
+      // override in the CRT [presumably only used internally].
+      "ntdll.dll", NULL};
   static void *result[ARRAY_SIZE(InterestingDLLs)] = { 0 };
   if (!result[0]) {
     for (size_t i = 0, j = 0; InterestingDLLs[i]; ++i) {
@@ -246,6 +882,32 @@
     if (!strcmp(func_name, name)) {
       DWORD index = ordinals[i];
       RVAPtr<char> func(module, functions[index]);
+
+      // Handle forwarded functions.
+      DWORD offset = functions[index];
+      if (offset >= export_directory->VirtualAddress &&
+          offset < export_directory->VirtualAddress + export_directory->Size) {
+        // An entry for a forwarded function is a string with the following
+        // format: "<module> . <function_name>" that is stored into the
+        // exported directory.
+        char function_name[256];
+        size_t funtion_name_length = _strlen(func);
+        if (funtion_name_length >= sizeof(function_name) - 1)
+          InterceptionFailed();
+
+        _memcpy(function_name, func, funtion_name_length);
+        function_name[funtion_name_length] = '\0';
+        char* separator = _strchr(function_name, '.');
+        if (!separator)
+          InterceptionFailed();
+        *separator = '\0';
+
+        void* redirected_module = GetModuleHandleA(function_name);
+        if (!redirected_module)
+          InterceptionFailed();
+        return InternalGetProcAddress(redirected_module, separator + 1);
+      }
+
       return (uptr)(char *)func;
     }
   }
@@ -268,6 +930,71 @@
   return OverrideFunction(orig_func, new_func, orig_old_func);
 }
 
+bool OverrideImportedFunction(const char *module_to_patch,
+                              const char *imported_module,
+                              const char *function_name, uptr new_function,
+                              uptr *orig_old_func) {
+  HMODULE module = GetModuleHandleA(module_to_patch);
+  if (!module)
+    return false;
+
+  // Check that the module header is full and present.
+  RVAPtr<IMAGE_DOS_HEADER> dos_stub(module, 0);
+  RVAPtr<IMAGE_NT_HEADERS> headers(module, dos_stub->e_lfanew);
+  if (!module || dos_stub->e_magic != IMAGE_DOS_SIGNATURE || // "MZ"
+      headers->Signature != IMAGE_NT_SIGNATURE ||            // "PE\0\0"
+      headers->FileHeader.SizeOfOptionalHeader <
+          sizeof(IMAGE_OPTIONAL_HEADER)) {
+    return false;
+  }
+
+  IMAGE_DATA_DIRECTORY *import_directory =
+      &headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
+
+  // Iterate the list of imported DLLs. FirstThunk will be null for the last
+  // entry.
+  RVAPtr<IMAGE_IMPORT_DESCRIPTOR> imports(module,
+                                          import_directory->VirtualAddress);
+  for (; imports->FirstThunk != 0; ++imports) {
+    RVAPtr<const char> modname(module, imports->Name);
+    if (_stricmp(&*modname, imported_module) == 0)
+      break;
+  }
+  if (imports->FirstThunk == 0)
+    return false;
+
+  // We have two parallel arrays: the import address table (IAT) and the table
+  // of names. They start out containing the same data, but the loader rewrites
+  // the IAT to hold imported addresses and leaves the name table in
+  // OriginalFirstThunk alone.
+  RVAPtr<IMAGE_THUNK_DATA> name_table(module, imports->OriginalFirstThunk);
+  RVAPtr<IMAGE_THUNK_DATA> iat(module, imports->FirstThunk);
+  for (; name_table->u1.Ordinal != 0; ++name_table, ++iat) {
+    if (!IMAGE_SNAP_BY_ORDINAL(name_table->u1.Ordinal)) {
+      RVAPtr<IMAGE_IMPORT_BY_NAME> import_by_name(
+          module, name_table->u1.ForwarderString);
+      const char *funcname = &import_by_name->Name[0];
+      if (strcmp(funcname, function_name) == 0)
+        break;
+    }
+  }
+  if (name_table->u1.Ordinal == 0)
+    return false;
+
+  // Now we have the correct IAT entry. Do the swap. We have to make the page
+  // read/write first.
+  if (orig_old_func)
+    *orig_old_func = iat->u1.AddressOfData;
+  DWORD old_prot, unused_prot;
+  if (!VirtualProtect(&iat->u1.AddressOfData, 4, PAGE_EXECUTE_READWRITE,
+                      &old_prot))
+    return false;
+  iat->u1.AddressOfData = new_function;
+  if (!VirtualProtect(&iat->u1.AddressOfData, 4, old_prot, &unused_prot))
+    return false;  // Not clear if this failure bothers us.
+  return true;
+}
+
 }  // namespace __interception
 
 #endif  // _WIN32
diff --git a/lib/interception/interception_win.h b/lib/interception/interception_win.h
index 96c4a0c..9061f9e 100644
--- a/lib/interception/interception_win.h
+++ b/lib/interception/interception_win.h
@@ -34,6 +34,31 @@
 // Windows-only replacement for GetProcAddress. Useful for some sanitizers.
 uptr InternalGetProcAddress(void *module, const char *func_name);
 
+// Overrides a function only when it is called from a specific DLL. For example,
+// this is used to override calls to HeapAlloc/HeapFree from ucrtbase without
+// affecting other third party libraries.
+bool OverrideImportedFunction(const char *module_to_patch,
+                              const char *imported_module,
+                              const char *function_name, uptr new_function,
+                              uptr *orig_old_func);
+
+#if !SANITIZER_WINDOWS64
+// Exposed for unittests
+bool OverrideFunctionWithDetour(
+    uptr old_func, uptr new_func, uptr *orig_old_func);
+#endif
+
+// Exposed for unittests
+bool OverrideFunctionWithRedirectJump(
+    uptr old_func, uptr new_func, uptr *orig_old_func);
+bool OverrideFunctionWithHotPatch(
+    uptr old_func, uptr new_func, uptr *orig_old_func);
+bool OverrideFunctionWithTrampoline(
+    uptr old_func, uptr new_func, uptr *orig_old_func);
+
+// Exposed for unittests
+void TestOnlyReleaseTrampolineRegions();
+
 }  // namespace __interception
 
 #if defined(INTERCEPTION_DYNAMIC_CRT)
@@ -50,5 +75,10 @@
 
 #define INTERCEPT_FUNCTION_VER_WIN(func, symver) INTERCEPT_FUNCTION_WIN(func)
 
+#define INTERCEPT_FUNCTION_DLLIMPORT(user_dll, provider_dll, func)       \
+  ::__interception::OverrideImportedFunction(                            \
+      user_dll, provider_dll, #func, (::__interception::uptr)WRAP(func), \
+      (::__interception::uptr *)&REAL(func))
+
 #endif  // INTERCEPTION_WIN_H
 #endif  // _WIN32
diff --git a/lib/interception/tests/CMakeLists.txt b/lib/interception/tests/CMakeLists.txt
new file mode 100644
index 0000000..bfe41fe
--- /dev/null
+++ b/lib/interception/tests/CMakeLists.txt
@@ -0,0 +1,142 @@
+include(CompilerRTCompile)
+
+filter_available_targets(INTERCEPTION_UNITTEST_SUPPORTED_ARCH x86_64 i386 mips64 mips64el)
+
+set(INTERCEPTION_UNITTESTS
+  interception_linux_test.cc
+  interception_test_main.cc
+  interception_win_test.cc
+)
+
+set(INTERCEPTION_TEST_HEADERS)
+
+set(INTERCEPTION_TEST_CFLAGS_COMMON
+  ${COMPILER_RT_UNITTEST_CFLAGS}
+  ${COMPILER_RT_GTEST_CFLAGS}
+  -I${COMPILER_RT_SOURCE_DIR}/include
+  -I${COMPILER_RT_SOURCE_DIR}/lib
+  -I${COMPILER_RT_SOURCE_DIR}/lib/interception
+  -fno-rtti
+  -O2
+  -Werror=sign-compare
+  -Wno-non-virtual-dtor)
+
+# -gline-tables-only must be enough for these tests, so use it if possible.
+if(COMPILER_RT_TEST_COMPILER_ID MATCHES "Clang")
+  list(APPEND INTERCEPTION_TEST_CFLAGS_COMMON -gline-tables-only)
+else()
+  list(APPEND INTERCEPTION_TEST_CFLAGS_COMMON -g)
+endif()
+if(MSVC)
+  list(APPEND INTERCEPTION_TEST_CFLAGS_COMMON -gcodeview)
+endif()
+list(APPEND INTERCEPTION_TEST_LINK_FLAGS_COMMON -g)
+
+if(NOT MSVC)
+  list(APPEND INTERCEPTION_TEST_LINK_FLAGS_COMMON --driver-mode=g++)
+endif()
+
+if(ANDROID)
+  list(APPEND INTERCEPTION_TEST_LINK_FLAGS_COMMON -pie)
+endif()
+
+set(INTERCEPTION_TEST_LINK_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBLOG log INTERCEPTION_TEST_LINK_LIBS)
+# NDK r10 requires -latomic almost always.
+append_list_if(ANDROID atomic INTERCEPTION_TEST_LINK_LIBS)
+
+append_list_if(COMPILER_RT_HAS_LIBDL -ldl INTERCEPTION_TEST_LINK_FLAGS_COMMON)
+append_list_if(COMPILER_RT_HAS_LIBRT -lrt INTERCEPTION_TEST_LINK_FLAGS_COMMON)
+append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread INTERCEPTION_TEST_LINK_FLAGS_COMMON)
+# x86_64 FreeBSD 9.2 additionally requires libc++ to build the tests. Also,
+# 'libm' shall be specified explicitly to build i386 tests.
+if(CMAKE_SYSTEM MATCHES "FreeBSD-9.2-RELEASE")
+  list(APPEND INTERCEPTION_TEST_LINK_FLAGS_COMMON "-lc++ -lm")
+endif()
+
+include_directories(..)
+include_directories(../..)
+
+# Adds static library which contains interception object file
+# (universal binary on Mac and arch-specific object files on Linux).
+macro(add_interceptor_lib library)
+  add_library(${library} STATIC ${ARGN})
+  set_target_properties(${library} PROPERTIES
+    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    FOLDER "Compiler-RT Runtime tests")
+endmacro()
+
+function(get_interception_lib_for_arch arch lib lib_name)
+  if(APPLE)
+    set(tgt_name "RTInterception.test.osx")
+  else()
+    set(tgt_name "RTInterception.test.${arch}")
+  endif()
+  set(${lib} "${tgt_name}" PARENT_SCOPE)
+  if(CMAKE_CONFIGURATION_TYPES)
+   set(configuration_path "${CMAKE_CFG_INTDIR}/")
+  else()
+   set(configuration_path "")
+  endif()
+  if(NOT MSVC)
+    set(${lib_name} "${configuration_path}lib${tgt_name}.a" PARENT_SCOPE)
+  else()
+    set(${lib_name} "${configuration_path}${tgt_name}.lib" PARENT_SCOPE)
+  endif()
+endfunction()
+
+# Interception unit tests testsuite.
+add_custom_target(InterceptionUnitTests)
+set_target_properties(InterceptionUnitTests PROPERTIES
+  FOLDER "Compiler-RT Tests")
+
+# Adds interception tests for architecture.
+macro(add_interception_tests_for_arch arch)
+  get_target_flags_for_arch(${arch} TARGET_FLAGS)
+  set(INTERCEPTION_TEST_SOURCES ${INTERCEPTION_UNITTESTS}
+                             ${COMPILER_RT_GTEST_SOURCE})
+  set(INTERCEPTION_TEST_COMPILE_DEPS ${INTERCEPTION_TEST_HEADERS})
+  if(NOT COMPILER_RT_STANDALONE_BUILD)
+    list(APPEND INTERCEPTION_TEST_COMPILE_DEPS gtest)
+  endif()
+  set(INTERCEPTION_TEST_OBJECTS)
+  foreach(source ${INTERCEPTION_TEST_SOURCES})
+    get_filename_component(basename ${source} NAME)
+    if(CMAKE_CONFIGURATION_TYPES)
+      set(output_obj "${CMAKE_CFG_INTDIR}/${basename}.${arch}.o")
+    else()
+      set(output_obj "${basename}.${arch}.o")
+    endif()
+    clang_compile(${output_obj} ${source}
+                  CFLAGS ${INTERCEPTION_TEST_CFLAGS_COMMON} ${TARGET_FLAGS}
+                  DEPS ${INTERCEPTION_TEST_COMPILE_DEPS})
+    list(APPEND INTERCEPTION_TEST_OBJECTS ${output_obj})
+  endforeach()
+  get_interception_lib_for_arch(${arch} INTERCEPTION_COMMON_LIB
+                                INTERCEPTION_COMMON_LIB_NAME)
+  # Add unittest target.
+  set(INTERCEPTION_TEST_NAME "Interception-${arch}-Test")
+  add_compiler_rt_test(InterceptionUnitTests ${INTERCEPTION_TEST_NAME}
+                       OBJECTS ${INTERCEPTION_TEST_OBJECTS}
+                               ${INTERCEPTION_COMMON_LIB_NAME}
+                       DEPS ${INTERCEPTION_TEST_OBJECTS} ${INTERCEPTION_COMMON_LIB}
+                       LINK_FLAGS ${INTERCEPTION_TEST_LINK_FLAGS_COMMON}
+                                  ${TARGET_FLAGS})
+endmacro()
+
+if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID AND NOT APPLE)
+  # We use just-built clang to build interception unittests, so we must
+  # be sure that produced binaries would work.
+  if(APPLE)
+    add_interceptor_lib("RTInterception.test.osx"
+                        $<TARGET_OBJECTS:RTInterception.osx>)
+  else()
+    foreach(arch ${INTERCEPTION_UNITTEST_SUPPORTED_ARCH})
+      add_interceptor_lib("RTInterception.test.${arch}"
+                          $<TARGET_OBJECTS:RTInterception.${arch}>)
+    endforeach()
+  endif()
+  foreach(arch ${INTERCEPTION_UNITTEST_SUPPORTED_ARCH})
+    add_interception_tests_for_arch(${arch})
+  endforeach()
+endif()
diff --git a/lib/interception/tests/interception_linux_test.cc b/lib/interception/tests/interception_linux_test.cc
new file mode 100644
index 0000000..08619d8
--- /dev/null
+++ b/lib/interception/tests/interception_linux_test.cc
@@ -0,0 +1,64 @@
+//===-- interception_linux_test.cc ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
+// Tests for interception_linux.h.
+//
+//===----------------------------------------------------------------------===//
+#include "interception/interception.h"
+
+#include "gtest/gtest.h"
+
+// Too slow for debug build
+#if !SANITIZER_DEBUG
+#if SANITIZER_LINUX
+
+static int InterceptorFunctionCalled;
+
+DECLARE_REAL(int, isdigit, int);
+
+INTERCEPTOR(int, isdigit, int d) {
+  ++InterceptorFunctionCalled;
+  return d >= '0' && d <= '9';
+}
+
+namespace __interception {
+
+TEST(Interception, GetRealFunctionAddress) {
+  uptr malloc_address = 0;
+  EXPECT_TRUE(GetRealFunctionAddress("malloc", &malloc_address, 0, 0));
+  EXPECT_NE(0U, malloc_address);
+
+  uptr dummy_address = 0;
+  EXPECT_TRUE(
+      GetRealFunctionAddress("dummy_doesnt_exist__", &dummy_address, 0, 0));
+  EXPECT_EQ(0U, dummy_address);
+}
+
+TEST(Interception, Basic) {
+  ASSERT_TRUE(INTERCEPT_FUNCTION(isdigit));
+
+  // After interception, the counter should be incremented.
+  InterceptorFunctionCalled = 0;
+  EXPECT_NE(0, isdigit('1'));
+  EXPECT_EQ(1, InterceptorFunctionCalled);
+  EXPECT_EQ(0, isdigit('a'));
+  EXPECT_EQ(2, InterceptorFunctionCalled);
+
+  // Calling the REAL function should not affect the counter.
+  InterceptorFunctionCalled = 0;
+  EXPECT_NE(0, REAL(isdigit)('1'));
+  EXPECT_EQ(0, REAL(isdigit)('a'));
+  EXPECT_EQ(0, InterceptorFunctionCalled);
+}
+
+}  // namespace __interception
+
+#endif  // SANITIZER_LINUX
+#endif  // #if !SANITIZER_DEBUG
diff --git a/lib/interception/tests/interception_test_main.cc b/lib/interception/tests/interception_test_main.cc
new file mode 100644
index 0000000..311da51
--- /dev/null
+++ b/lib/interception/tests/interception_test_main.cc
@@ -0,0 +1,22 @@
+//===-- interception_test_main.cc------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Testing the machinery for providing replacements/wrappers for system
+// functions.
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+
+int main(int argc, char **argv) {
+  testing::GTEST_FLAG(death_test_style) = "threadsafe";
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/lib/interception/tests/interception_win_test.cc b/lib/interception/tests/interception_win_test.cc
new file mode 100644
index 0000000..67b40f7
--- /dev/null
+++ b/lib/interception/tests/interception_win_test.cc
@@ -0,0 +1,600 @@
+//===-- interception_win_test.cc ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
+// Tests for interception_win.h.
+//
+//===----------------------------------------------------------------------===//
+#include "interception/interception.h"
+
+#include "gtest/gtest.h"
+
+// Too slow for debug build
+#if !SANITIZER_DEBUG
+#if SANITIZER_WINDOWS
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+namespace __interception {
+namespace {
+
+enum FunctionPrefixKind {
+  FunctionPrefixNone,
+  FunctionPrefixPadding,
+  FunctionPrefixHotPatch,
+  FunctionPrefixDetour,
+};
+
+typedef bool (*TestOverrideFunction)(uptr, uptr, uptr*);
+typedef int (*IdentityFunction)(int);
+
+#if SANITIZER_WINDOWS64
+
+const u8 kIdentityCodeWithPrologue[] = {
+    0x55,                   // push        rbp
+    0x48, 0x89, 0xE5,       // mov         rbp,rsp
+    0x8B, 0xC1,             // mov         eax,ecx
+    0x5D,                   // pop         rbp
+    0xC3,                   // ret
+};
+
+const u8 kIdentityCodeWithPushPop[] = {
+    0x55,                   // push        rbp
+    0x48, 0x89, 0xE5,       // mov         rbp,rsp
+    0x53,                   // push        rbx
+    0x50,                   // push        rax
+    0x58,                   // pop         rax
+    0x8B, 0xC1,             // mov         rax,rcx
+    0x5B,                   // pop         rbx
+    0x5D,                   // pop         rbp
+    0xC3,                   // ret
+};
+
+const u8 kIdentityTwiceOffset = 16;
+const u8 kIdentityTwice[] = {
+    0x55,                   // push        rbp
+    0x48, 0x89, 0xE5,       // mov         rbp,rsp
+    0x8B, 0xC1,             // mov         eax,ecx
+    0x5D,                   // pop         rbp
+    0xC3,                   // ret
+    0x90, 0x90, 0x90, 0x90,
+    0x90, 0x90, 0x90, 0x90,
+    0x55,                   // push        rbp
+    0x48, 0x89, 0xE5,       // mov         rbp,rsp
+    0x8B, 0xC1,             // mov         eax,ecx
+    0x5D,                   // pop         rbp
+    0xC3,                   // ret
+};
+
+const u8 kIdentityCodeWithMov[] = {
+    0x89, 0xC8,             // mov         eax, ecx
+    0xC3,                   // ret
+};
+
+const u8 kIdentityCodeWithJump[] = {
+    0xE9, 0x04, 0x00, 0x00,
+    0x00,                   // jmp + 4
+    0xCC, 0xCC, 0xCC, 0xCC,
+    0x89, 0xC8,             // mov         eax, ecx
+    0xC3,                   // ret
+};
+
+#else
+
+const u8 kIdentityCodeWithPrologue[] = {
+    0x55,                   // push        ebp
+    0x8B, 0xEC,             // mov         ebp,esp
+    0x8B, 0x45, 0x08,       // mov         eax,dword ptr [ebp + 8]
+    0x5D,                   // pop         ebp
+    0xC3,                   // ret
+};
+
+const u8 kIdentityCodeWithPushPop[] = {
+    0x55,                   // push        ebp
+    0x8B, 0xEC,             // mov         ebp,esp
+    0x53,                   // push        ebx
+    0x50,                   // push        eax
+    0x58,                   // pop         eax
+    0x8B, 0x45, 0x08,       // mov         eax,dword ptr [ebp + 8]
+    0x5B,                   // pop         ebx
+    0x5D,                   // pop         ebp
+    0xC3,                   // ret
+};
+
+const u8 kIdentityTwiceOffset = 8;
+const u8 kIdentityTwice[] = {
+    0x55,                   // push        ebp
+    0x8B, 0xEC,             // mov         ebp,esp
+    0x8B, 0x45, 0x08,       // mov         eax,dword ptr [ebp + 8]
+    0x5D,                   // pop         ebp
+    0xC3,                   // ret
+    0x55,                   // push        ebp
+    0x8B, 0xEC,             // mov         ebp,esp
+    0x8B, 0x45, 0x08,       // mov         eax,dword ptr [ebp + 8]
+    0x5D,                   // pop         ebp
+    0xC3,                   // ret
+};
+
+const u8 kIdentityCodeWithMov[] = {
+    0x8B, 0x44, 0x24, 0x04, // mov         eax,dword ptr [esp + 4]
+    0xC3,                   // ret
+};
+
+const u8 kIdentityCodeWithJump[] = {
+    0xE9, 0x04, 0x00, 0x00,
+    0x00,                   // jmp + 4
+    0xCC, 0xCC, 0xCC, 0xCC,
+    0x8B, 0x44, 0x24, 0x04, // mov         eax,dword ptr [esp + 4]
+    0xC3,                   // ret
+};
+
+#endif
+
+const u8 kPatchableCode1[] = {
+    0xB8, 0x4B, 0x00, 0x00, 0x00,   // mov eax,4B
+    0x33, 0xC9,                     // xor ecx,ecx
+    0xC3,                           // ret
+};
+
+const u8 kPatchableCode2[] = {
+    0x55,                           // push ebp
+    0x8B, 0xEC,                     // mov ebp,esp
+    0x33, 0xC0,                     // xor eax,eax
+    0x5D,                           // pop ebp
+    0xC3,                           // ret
+};
+
+const u8 kPatchableCode3[] = {
+    0x55,                           // push ebp
+    0x8B, 0xEC,                     // mov ebp,esp
+    0x6A, 0x00,                     // push 0
+    0xE8, 0x3D, 0xFF, 0xFF, 0xFF,   // call <func>
+};
+
+const u8 kPatchableCode4[] = {
+    0xE9, 0xCC, 0xCC, 0xCC, 0xCC,   // jmp <label>
+    0x90, 0x90, 0x90, 0x90,
+};
+
+const u8 kPatchableCode5[] = {
+    0x55,                                      // push    ebp
+    0x8b, 0xec,                                // mov     ebp,esp
+    0x8d, 0xa4, 0x24, 0x30, 0xfd, 0xff, 0xff,  // lea     esp,[esp-2D0h]
+    0x54,                                      // push    esp
+};
+
+const u8 kUnpatchableCode1[] = {
+    0xC3,                           // ret
+};
+
+const u8 kUnpatchableCode2[] = {
+    0x33, 0xC9,                     // xor ecx,ecx
+    0xC3,                           // ret
+};
+
+const u8 kUnpatchableCode3[] = {
+    0x75, 0xCC,                     // jne <label>
+    0x33, 0xC9,                     // xor ecx,ecx
+    0xC3,                           // ret
+};
+
+const u8 kUnpatchableCode4[] = {
+    0x74, 0xCC,                     // jne <label>
+    0x33, 0xC9,                     // xor ecx,ecx
+    0xC3,                           // ret
+};
+
+const u8 kUnpatchableCode5[] = {
+    0xEB, 0x02,                     // jmp <label>
+    0x33, 0xC9,                     // xor ecx,ecx
+    0xC3,                           // ret
+};
+
+const u8 kUnpatchableCode6[] = {
+    0xE8, 0xCC, 0xCC, 0xCC, 0xCC,   // call <func>
+    0x90, 0x90, 0x90, 0x90,
+};
+
+// A buffer holding the dynamically generated code under test.
+u8* ActiveCode;
+size_t ActiveCodeLength = 4096;
+
+template<class T>
+static void LoadActiveCode(
+    const T &code,
+    uptr *entry_point,
+    FunctionPrefixKind prefix_kind = FunctionPrefixNone) {
+  if (ActiveCode == nullptr) {
+    ActiveCode =
+        (u8*)::VirtualAlloc(nullptr, ActiveCodeLength,
+                            MEM_COMMIT | MEM_RESERVE,
+                            PAGE_EXECUTE_READWRITE);
+    ASSERT_NE(ActiveCode, nullptr);
+  }
+
+  size_t position = 0;
+
+  // Add padding to avoid memory violation when scanning the prefix.
+  for (int i = 0; i < 16; ++i)
+    ActiveCode[position++] = 0xC3;  // Instruction 'ret'.
+
+  // Add function padding.
+  size_t padding = 0;
+  if (prefix_kind == FunctionPrefixPadding)
+    padding = 16;
+  else if (prefix_kind == FunctionPrefixDetour ||
+           prefix_kind == FunctionPrefixHotPatch)
+    padding = FIRST_32_SECOND_64(5, 6);
+  // Insert |padding| instructions 'nop'.
+  for (size_t i = 0; i < padding; ++i)
+    ActiveCode[position++] = 0x90;
+
+  // Keep track of the entry point.
+  *entry_point = (uptr)&ActiveCode[position];
+
+  // Add the detour instruction (i.e. mov edi, edi)
+  if (prefix_kind == FunctionPrefixDetour) {
+#if SANITIZER_WINDOWS64
+    // Note that "mov edi,edi" is NOP in 32-bit only, in 64-bit it clears
+    // higher bits of RDI.
+    // Use 66,90H as NOP for Windows64.
+    ActiveCode[position++] = 0x66;
+    ActiveCode[position++] = 0x90;
+#else
+    // mov edi,edi.
+    ActiveCode[position++] = 0x8B;
+    ActiveCode[position++] = 0xFF;
+#endif
+
+  }
+
+  // Copy the function body.
+  for (size_t i = 0; i < sizeof(T); ++i)
+    ActiveCode[position++] = code[i];
+}
+
+int InterceptorFunctionCalled;
+IdentityFunction InterceptedRealFunction;
+
+int InterceptorFunction(int x) {
+  ++InterceptorFunctionCalled;
+  return InterceptedRealFunction(x);
+}
+
+}  // namespace
+
+// Tests for interception_win.h
+TEST(Interception, InternalGetProcAddress) {
+  HMODULE ntdll_handle = ::GetModuleHandle("ntdll");
+  ASSERT_NE(nullptr, ntdll_handle);
+  uptr DbgPrint_expected = (uptr)::GetProcAddress(ntdll_handle, "DbgPrint");
+  uptr isdigit_expected = (uptr)::GetProcAddress(ntdll_handle, "isdigit");
+  uptr DbgPrint_adddress = InternalGetProcAddress(ntdll_handle, "DbgPrint");
+  uptr isdigit_address = InternalGetProcAddress(ntdll_handle, "isdigit");
+
+  EXPECT_EQ(DbgPrint_expected, DbgPrint_adddress);
+  EXPECT_EQ(isdigit_expected, isdigit_address);
+  EXPECT_NE(DbgPrint_adddress, isdigit_address);
+}
+
+template<class T>
+static void TestIdentityFunctionPatching(
+    const T &code,
+    TestOverrideFunction override,
+    FunctionPrefixKind prefix_kind = FunctionPrefixNone) {
+  uptr identity_address;
+  LoadActiveCode(code, &identity_address, prefix_kind);
+  IdentityFunction identity = (IdentityFunction)identity_address;
+
+  // Validate behavior before dynamic patching.
+  InterceptorFunctionCalled = 0;
+  EXPECT_EQ(0, identity(0));
+  EXPECT_EQ(42, identity(42));
+  EXPECT_EQ(0, InterceptorFunctionCalled);
+
+  // Patch the function.
+  uptr real_identity_address = 0;
+  bool success = override(identity_address,
+                         (uptr)&InterceptorFunction,
+                         &real_identity_address);
+  EXPECT_TRUE(success);
+  EXPECT_NE(0U, real_identity_address);
+  IdentityFunction real_identity = (IdentityFunction)real_identity_address;
+  InterceptedRealFunction = real_identity;
+
+  // Don't run tests if hooking failed or the real function is not valid.
+  if (!success || !real_identity_address)
+    return;
+
+  // Calling the redirected function.
+  InterceptorFunctionCalled = 0;
+  EXPECT_EQ(0, identity(0));
+  EXPECT_EQ(42, identity(42));
+  EXPECT_EQ(2, InterceptorFunctionCalled);
+
+  // Calling the real function.
+  InterceptorFunctionCalled = 0;
+  EXPECT_EQ(0, real_identity(0));
+  EXPECT_EQ(42, real_identity(42));
+  EXPECT_EQ(0, InterceptorFunctionCalled);
+
+  TestOnlyReleaseTrampolineRegions();
+}
+
+#if !SANITIZER_WINDOWS64
+TEST(Interception, OverrideFunctionWithDetour) {
+  TestOverrideFunction override = OverrideFunctionWithDetour;
+  FunctionPrefixKind prefix = FunctionPrefixDetour;
+  TestIdentityFunctionPatching(kIdentityCodeWithPrologue, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithPushPop, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithMov, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithJump, override, prefix);
+}
+#endif  // !SANITIZER_WINDOWS64
+
+TEST(Interception, OverrideFunctionWithRedirectJump) {
+  TestOverrideFunction override = OverrideFunctionWithRedirectJump;
+  TestIdentityFunctionPatching(kIdentityCodeWithJump, override);
+}
+
+TEST(Interception, OverrideFunctionWithHotPatch) {
+  TestOverrideFunction override = OverrideFunctionWithHotPatch;
+  FunctionPrefixKind prefix = FunctionPrefixHotPatch;
+  TestIdentityFunctionPatching(kIdentityCodeWithMov, override, prefix);
+}
+
+TEST(Interception, OverrideFunctionWithTrampoline) {
+  TestOverrideFunction override = OverrideFunctionWithTrampoline;
+  FunctionPrefixKind prefix = FunctionPrefixNone;
+  TestIdentityFunctionPatching(kIdentityCodeWithPrologue, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithPushPop, override, prefix);
+
+  prefix = FunctionPrefixPadding;
+  TestIdentityFunctionPatching(kIdentityCodeWithPrologue, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithPushPop, override, prefix);
+}
+
+TEST(Interception, OverrideFunction) {
+  TestOverrideFunction override = OverrideFunction;
+  FunctionPrefixKind prefix = FunctionPrefixNone;
+  TestIdentityFunctionPatching(kIdentityCodeWithPrologue, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithPushPop, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithJump, override, prefix);
+
+  prefix = FunctionPrefixPadding;
+  TestIdentityFunctionPatching(kIdentityCodeWithPrologue, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithPushPop, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithMov, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithJump, override, prefix);
+
+  prefix = FunctionPrefixHotPatch;
+  TestIdentityFunctionPatching(kIdentityCodeWithPrologue, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithPushPop, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithMov, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithJump, override, prefix);
+
+  prefix = FunctionPrefixDetour;
+  TestIdentityFunctionPatching(kIdentityCodeWithPrologue, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithPushPop, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithMov, override, prefix);
+  TestIdentityFunctionPatching(kIdentityCodeWithJump, override, prefix);
+}
+
+template<class T>
+static void TestIdentityFunctionMultiplePatching(
+    const T &code,
+    TestOverrideFunction override,
+    FunctionPrefixKind prefix_kind = FunctionPrefixNone) {
+  uptr identity_address;
+  LoadActiveCode(code, &identity_address, prefix_kind);
+
+  // Patch the function.
+  uptr real_identity_address = 0;
+  bool success = override(identity_address,
+                          (uptr)&InterceptorFunction,
+                          &real_identity_address);
+  EXPECT_TRUE(success);
+  EXPECT_NE(0U, real_identity_address);
+
+  // Re-patching the function should not work.
+  success = override(identity_address,
+                     (uptr)&InterceptorFunction,
+                     &real_identity_address);
+  EXPECT_FALSE(success);
+
+  TestOnlyReleaseTrampolineRegions();
+}
+
+TEST(Interception, OverrideFunctionMultiplePatchingIsFailing) {
+#if !SANITIZER_WINDOWS64
+  TestIdentityFunctionMultiplePatching(kIdentityCodeWithPrologue,
+                                       OverrideFunctionWithDetour,
+                                       FunctionPrefixDetour);
+#endif
+
+  TestIdentityFunctionMultiplePatching(kIdentityCodeWithMov,
+                                       OverrideFunctionWithHotPatch,
+                                       FunctionPrefixHotPatch);
+
+  TestIdentityFunctionMultiplePatching(kIdentityCodeWithPushPop,
+                                       OverrideFunctionWithTrampoline,
+                                       FunctionPrefixPadding);
+}
+
+TEST(Interception, OverrideFunctionTwice) {
+  uptr identity_address1;
+  LoadActiveCode(kIdentityTwice, &identity_address1);
+  uptr identity_address2 = identity_address1 + kIdentityTwiceOffset;
+  IdentityFunction identity1 = (IdentityFunction)identity_address1;
+  IdentityFunction identity2 = (IdentityFunction)identity_address2;
+
+  // Patch the two functions.
+  uptr real_identity_address = 0;
+  EXPECT_TRUE(OverrideFunction(identity_address1,
+                               (uptr)&InterceptorFunction,
+                               &real_identity_address));
+  EXPECT_TRUE(OverrideFunction(identity_address2,
+                               (uptr)&InterceptorFunction,
+                               &real_identity_address));
+  IdentityFunction real_identity = (IdentityFunction)real_identity_address;
+  InterceptedRealFunction = real_identity;
+
+  // Calling the redirected function.
+  InterceptorFunctionCalled = 0;
+  EXPECT_EQ(42, identity1(42));
+  EXPECT_EQ(42, identity2(42));
+  EXPECT_EQ(2, InterceptorFunctionCalled);
+
+  TestOnlyReleaseTrampolineRegions();
+}
+
+template<class T>
+static bool TestFunctionPatching(
+    const T &code,
+    TestOverrideFunction override,
+    FunctionPrefixKind prefix_kind = FunctionPrefixNone) {
+  uptr address;
+  LoadActiveCode(code, &address, prefix_kind);
+  uptr unused_real_address = 0;
+  bool result = override(
+      address, (uptr)&InterceptorFunction, &unused_real_address);
+
+  TestOnlyReleaseTrampolineRegions();
+  return result;
+}
+
+TEST(Interception, PatchableFunction) {
+  TestOverrideFunction override = OverrideFunction;
+  // Test without function padding.
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode1, override));
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode2, override));
+#if SANITIZER_WINDOWS64
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode3, override));
+#else
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode3, override));
+#endif
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode4, override));
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode5, override));
+
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode1, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode2, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode3, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode4, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode5, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode6, override));
+}
+
+#if !SANITIZER_WINDOWS64
+TEST(Interception, PatchableFunctionWithDetour) {
+  TestOverrideFunction override = OverrideFunctionWithDetour;
+  // Without the prefix, no function can be detoured.
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode1, override));
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode2, override));
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode3, override));
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode4, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode1, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode2, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode3, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode4, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode5, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode6, override));
+
+  // With the prefix, all functions can be detoured.
+  FunctionPrefixKind prefix = FunctionPrefixDetour;
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode1, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode2, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode3, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode4, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kUnpatchableCode1, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kUnpatchableCode2, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kUnpatchableCode3, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kUnpatchableCode4, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kUnpatchableCode5, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kUnpatchableCode6, override, prefix));
+}
+#endif  // !SANITIZER_WINDOWS64
+
+TEST(Interception, PatchableFunctionWithRedirectJump) {
+  TestOverrideFunction override = OverrideFunctionWithRedirectJump;
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode1, override));
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode2, override));
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode3, override));
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode4, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode1, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode2, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode3, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode4, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode5, override));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode6, override));
+}
+
+TEST(Interception, PatchableFunctionWithHotPatch) {
+  TestOverrideFunction override = OverrideFunctionWithHotPatch;
+  FunctionPrefixKind prefix = FunctionPrefixHotPatch;
+
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode1, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode2, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode3, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode4, override, prefix));
+
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode1, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kUnpatchableCode2, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode3, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode4, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode5, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode6, override, prefix));
+}
+
+TEST(Interception, PatchableFunctionWithTrampoline) {
+  TestOverrideFunction override = OverrideFunctionWithTrampoline;
+  FunctionPrefixKind prefix = FunctionPrefixPadding;
+
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode1, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode2, override, prefix));
+#if SANITIZER_WINDOWS64
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode3, override, prefix));
+#else
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode3, override, prefix));
+#endif
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode4, override, prefix));
+
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode1, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode2, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode3, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode4, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode5, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode6, override, prefix));
+}
+
+TEST(Interception, PatchableFunctionPadding) {
+  TestOverrideFunction override = OverrideFunction;
+  FunctionPrefixKind prefix = FunctionPrefixPadding;
+
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode1, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode2, override, prefix));
+#if SANITIZER_WINDOWS64
+  EXPECT_FALSE(TestFunctionPatching(kPatchableCode3, override, prefix));
+#else
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode3, override, prefix));
+#endif
+  EXPECT_TRUE(TestFunctionPatching(kPatchableCode4, override, prefix));
+
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode1, override, prefix));
+  EXPECT_TRUE(TestFunctionPatching(kUnpatchableCode2, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode3, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode4, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode5, override, prefix));
+  EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode6, override, prefix));
+}
+
+}  // namespace __interception
+
+#endif  // SANITIZER_WINDOWS
+#endif  // #if !SANITIZER_DEBUG
diff --git a/lib/lsan/CMakeLists.txt b/lib/lsan/CMakeLists.txt
index 20e4093..9412c7a 100644
--- a/lib/lsan/CMakeLists.txt
+++ b/lib/lsan/CMakeLists.txt
@@ -1,7 +1,7 @@
 include_directories(..)
 
 set(LSAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
-append_no_rtti_flag(LSAN_CFLAGS)
+append_rtti_flag(OFF LSAN_CFLAGS)
 
 set(LSAN_COMMON_SOURCES
   lsan_common.cc
@@ -17,6 +17,7 @@
 set(LSAN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 add_custom_target(lsan)
+set_target_properties(lsan PROPERTIES FOLDER "Compiler-RT Misc")
 
 add_compiler_rt_object_libraries(RTLSanCommon
     OS ${SANITIZER_COMMON_SUPPORTED_OS}
diff --git a/lib/lsan/Makefile.mk b/lib/lsan/Makefile.mk
deleted file mode 100644
index 5e70634..0000000
--- a/lib/lsan/Makefile.mk
+++ /dev/null
@@ -1,25 +0,0 @@
-#===- lib/lsan/Makefile.mk ---------------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := lsan
-SubDirs := 
-
-Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
-ObjNames := $(Sources:%.cc=%.o)
-
-Implementation := Generic
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard $(Dir)/*.h)
-Dependencies += $(wildcard $(Dir)/../interception/*.h)
-Dependencies += $(wildcard $(Dir)/../sanitizer_common/*.h)
-
-# lsan functions used in another sanitizers.
-LsanCommonSources := $(foreach file,$(wildcard $(Dir)/lsan_common*.cc),$(notdir $(file)))
-LsanCommonFunctions := $(LsanCommonSources:%.cc=%)
diff --git a/lib/lsan/lsan.cc b/lib/lsan/lsan.cc
index 9c67359..c7c3429 100644
--- a/lib/lsan/lsan.cc
+++ b/lib/lsan/lsan.cc
@@ -72,6 +72,7 @@
   lsan_init_is_running = true;
   SanitizerToolName = "LeakSanitizer";
   CacheBinaryName();
+  AvoidCVE_2016_2143();
   InitializeFlags();
   InitCommonLsan();
   InitializeAllocator();
diff --git a/lib/lsan/lsan.h b/lib/lsan/lsan.h
index 53783cd..ec5eb93 100644
--- a/lib/lsan/lsan.h
+++ b/lib/lsan/lsan.h
@@ -24,8 +24,11 @@
       stack_top = t->stack_end();                                              \
       stack_bottom = t->stack_begin();                                         \
     }                                                                          \
-    stack.Unwind(max_size, StackTrace::GetCurrentPc(), GET_CURRENT_FRAME(),    \
-                 /* context */ 0, stack_top, stack_bottom, fast);              \
+    if (!SANITIZER_MIPS ||                                                     \
+        IsValidFrame(GET_CURRENT_FRAME(), stack_top, stack_bottom)) {          \
+      stack.Unwind(max_size, StackTrace::GetCurrentPc(), GET_CURRENT_FRAME(),  \
+                   /* context */ 0, stack_top, stack_bottom, fast);            \
+    }                                                                          \
   }
 
 #define GET_STACK_TRACE_FATAL \
diff --git a/lib/lsan/lsan_allocator.cc b/lib/lsan/lsan_allocator.cc
index 0a36781..a5220f1 100644
--- a/lib/lsan/lsan_allocator.cc
+++ b/lib/lsan/lsan_allocator.cc
@@ -99,11 +99,13 @@
     memset(p, 0, size);
   RegisterAllocation(stack, p, size);
   if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(p, size);
+  RunMallocHooks(p, size);
   return p;
 }
 
 void Deallocate(void *p) {
   if (&__sanitizer_free_hook) __sanitizer_free_hook(p);
+  RunFreeHooks(p);
   RegisterDeallocation(p);
   allocator.Deallocate(&cache, p);
 }
diff --git a/lib/lsan/lsan_common.cc b/lib/lsan/lsan_common.cc
index 0385c37..888a25b 100644
--- a/lib/lsan/lsan_common.cc
+++ b/lib/lsan/lsan_common.cc
@@ -221,9 +221,18 @@
       LOG_THREADS("Stack at %p-%p (SP = %p).\n", stack_begin, stack_end, sp);
       if (sp < stack_begin || sp >= stack_end) {
         // SP is outside the recorded stack range (e.g. the thread is running a
-        // signal handler on alternate stack). Again, consider the entire stack
-        // range to be reachable.
+        // signal handler on alternate stack, or swapcontext was used).
+        // Again, consider the entire stack range to be reachable.
         LOG_THREADS("WARNING: stack pointer not in stack range.\n");
+        uptr page_size = GetPageSizeCached();
+        int skipped = 0;
+        while (stack_begin < stack_end &&
+               !IsAccessibleMemoryRange(stack_begin, 1)) {
+          skipped++;
+          stack_begin += page_size;
+        }
+        LOG_THREADS("Skipped %d guard page(s) to obtain stack %p-%p.\n",
+                    skipped, stack_begin, stack_end);
       } else {
         // Shrink the stack range to ignore out-of-scope values.
         stack_begin = sp;
@@ -437,6 +446,9 @@
 
   if (!param.success) {
     Report("LeakSanitizer has encountered a fatal error.\n");
+    Report(
+        "HINT: For debugging, try setting environment variable "
+        "LSAN_OPTIONS=verbosity=1:log_threads=1\n");
     Die();
   }
   param.leak_report.ApplySuppressions();
@@ -638,6 +650,13 @@
 }
 
 } // namespace __lsan
+#else // CAN_SANITIZE_LEAKS
+namespace __lsan {
+void InitCommonLsan() { }
+void DoLeakCheck() { }
+void DisableInThisThread() { }
+void EnableInThisThread() { }
+}
 #endif // CAN_SANITIZE_LEAKS
 
 using namespace __lsan;  // NOLINT
diff --git a/lib/lsan/lsan_common_linux.cc b/lib/lsan/lsan_common_linux.cc
index 09f1502..1f54303 100644
--- a/lib/lsan/lsan_common_linux.cc
+++ b/lib/lsan/lsan_common_linux.cc
@@ -26,9 +26,8 @@
 namespace __lsan {
 
 static const char kLinkerName[] = "ld";
-// We request 2 modules matching "ld", so we can print a warning if there's more
-// than one match. But only the first one is actually used.
-static char linker_placeholder[2 * sizeof(LoadedModule)] ALIGNED(64);
+
+static char linker_placeholder[sizeof(LoadedModule)] ALIGNED(64);
 static LoadedModule *linker = nullptr;
 
 static bool IsLinker(const char* full_name) {
@@ -36,20 +35,24 @@
 }
 
 void InitializePlatformSpecificModules() {
-  internal_memset(linker_placeholder, 0, sizeof(linker_placeholder));
-  uptr num_matches = GetListOfModules(
-      reinterpret_cast<LoadedModule *>(linker_placeholder), 2, IsLinker);
-  if (num_matches == 1) {
-    linker = reinterpret_cast<LoadedModule *>(linker_placeholder);
-    return;
+  ListOfModules modules;
+  modules.init();
+  for (LoadedModule &module : modules) {
+    if (!IsLinker(module.full_name())) continue;
+    if (linker == nullptr) {
+      linker = reinterpret_cast<LoadedModule *>(linker_placeholder);
+      *linker = module;
+      module = LoadedModule();
+    } else {
+      VReport(1, "LeakSanitizer: Multiple modules match \"%s\". "
+              "TLS will not be handled correctly.\n", kLinkerName);
+      linker->clear();
+      linker = nullptr;
+      return;
+    }
   }
-  if (num_matches == 0)
-    VReport(1, "LeakSanitizer: Dynamic linker not found. "
-            "TLS will not be handled correctly.\n");
-  else if (num_matches > 1)
-    VReport(1, "LeakSanitizer: Multiple modules match \"%s\". "
-            "TLS will not be handled correctly.\n", kLinkerName);
-  linker = nullptr;
+  VReport(1, "LeakSanitizer: Dynamic linker not found. "
+             "TLS will not be handled correctly.\n");
 }
 
 static int ProcessGlobalRegionsCallback(struct dl_phdr_info *info, size_t size,
diff --git a/lib/msan/CMakeLists.txt b/lib/msan/CMakeLists.txt
index 1b48def..e7f2877 100644
--- a/lib/msan/CMakeLists.txt
+++ b/lib/msan/CMakeLists.txt
@@ -17,7 +17,7 @@
 
 
 set(MSAN_RTL_CFLAGS ${SANITIZER_COMMON_CFLAGS})
-append_no_rtti_flag(MSAN_RTL_CFLAGS)
+append_rtti_flag(OFF MSAN_RTL_CFLAGS)
 append_list_if(COMPILER_RT_HAS_FPIE_FLAG -fPIE MSAN_RTL_CFLAGS)
 # Prevent clang from generating libc calls.
 append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding MSAN_RTL_CFLAGS)
@@ -26,6 +26,8 @@
 
 # Static runtime library.
 add_custom_target(msan)
+set_target_properties(msan PROPERTIES FOLDER "Compiler-RT Misc")
+
 foreach(arch ${MSAN_SUPPORTED_ARCH})
   add_compiler_rt_runtime(clang_rt.msan
     STATIC
@@ -58,8 +60,7 @@
   endif()
 endforeach()
 
-add_compiler_rt_resource_file(msan_blacklist msan_blacklist.txt)
-add_dependencies(msan msan_blacklist)
+add_compiler_rt_resource_file(msan_blacklist msan_blacklist.txt msan)
 add_dependencies(compiler-rt msan)
 
 if(COMPILER_RT_INCLUDE_TESTS)
diff --git a/lib/msan/msan.cc b/lib/msan/msan.cc
index 9949db4..d2981f0 100644
--- a/lib/msan/msan.cc
+++ b/lib/msan/msan.cc
@@ -178,7 +178,7 @@
 #endif
   VPrintf(1, "MSAN_OPTIONS: %s\n", msan_options ? msan_options : "<empty>");
 
-  SetVerbosity(common_flags()->verbosity);
+  InitializeCommonFlags();
 
   if (Verbosity()) ReportUnrecognizedFlags();
 
@@ -375,6 +375,7 @@
   msan_init_is_running = 1;
   SanitizerToolName = "MemorySanitizer";
 
+  AvoidCVE_2016_2143();
   InitTlsSize();
 
   CacheBinaryName();
@@ -462,13 +463,8 @@
   }
 
   unsigned char *s = (unsigned char*)MEM_TO_SHADOW(x);
-  for (uptr i = 0; i < size; i++) {
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-    Printf("%x%x ", s[i] & 0xf, s[i] >> 4);
-#else
+  for (uptr i = 0; i < size; i++)
     Printf("%x%x ", s[i] >> 4, s[i] & 0xf);
-#endif
-  }
   Printf("\n");
 }
 
@@ -542,6 +538,13 @@
     u32 idx = atomic_fetch_add(&NumStackOriginDescrs, 1, memory_order_relaxed);
     CHECK_LT(idx, kNumStackOriginDescrs);
     StackOriginDescr[idx] = descr + 4;
+#if SANITIZER_PPC64V1
+    // On PowerPC64 ELFv1, the address of a function actually points to a
+    // three-doubleword data structure with the first field containing
+    // the address of the function's code.
+    if (pc)
+      pc = *reinterpret_cast<uptr*>(pc);
+#endif
     StackOriginPC[idx] = pc;
     id = Origin::CreateStackOrigin(idx).raw_id();
     *id_ptr = id;
@@ -580,13 +583,13 @@
 }
 
 u16 __sanitizer_unaligned_load16(const uu16 *p) {
-  __msan_retval_tls[0] = *(uu16 *)MEM_TO_SHADOW((uptr)p);
+  *(uu16 *)&__msan_retval_tls[0] = *(uu16 *)MEM_TO_SHADOW((uptr)p);
   if (__msan_get_track_origins())
     __msan_retval_origin_tls = GetOriginIfPoisoned((uptr)p, sizeof(*p));
   return *p;
 }
 u32 __sanitizer_unaligned_load32(const uu32 *p) {
-  __msan_retval_tls[0] = *(uu32 *)MEM_TO_SHADOW((uptr)p);
+  *(uu32 *)&__msan_retval_tls[0] = *(uu32 *)MEM_TO_SHADOW((uptr)p);
   if (__msan_get_track_origins())
     __msan_retval_origin_tls = GetOriginIfPoisoned((uptr)p, sizeof(*p));
   return *p;
@@ -598,7 +601,7 @@
   return *p;
 }
 void __sanitizer_unaligned_store16(uu16 *p, u16 x) {
-  u16 s = __msan_param_tls[1];
+  u16 s = *(uu16 *)&__msan_param_tls[1];
   *(uu16 *)MEM_TO_SHADOW((uptr)p) = s;
   if (s && __msan_get_track_origins())
     if (uu32 o = __msan_param_origin_tls[2])
@@ -606,7 +609,7 @@
   *p = x;
 }
 void __sanitizer_unaligned_store32(uu32 *p, u32 x) {
-  u32 s = __msan_param_tls[1];
+  u32 s = *(uu32 *)&__msan_param_tls[1];
   *(uu32 *)MEM_TO_SHADOW((uptr)p) = s;
   if (s && __msan_get_track_origins())
     if (uu32 o = __msan_param_origin_tls[2])
diff --git a/lib/msan/msan.h b/lib/msan/msan.h
index 2079a59..c714bff 100644
--- a/lib/msan/msan.h
+++ b/lib/msan/msan.h
@@ -42,15 +42,27 @@
 
 #if SANITIZER_LINUX && defined(__mips64)
 
-// Everything is above 0x00e000000000.
+// MIPS64 maps:
+// - 0x0000000000-0x0200000000: Program own segments
+// - 0xa200000000-0xc000000000: PIE program segments
+// - 0xe200000000-0xffffffffff: libraries segments.
 const MappingDesc kMemoryLayout[] = {
-    {0x000000000000ULL, 0x00a000000000ULL, MappingDesc::INVALID, "invalid"},
-    {0x00a000000000ULL, 0x00c000000000ULL, MappingDesc::SHADOW, "shadow"},
-    {0x00c000000000ULL, 0x00e000000000ULL, MappingDesc::ORIGIN, "origin"},
-    {0x00e000000000ULL, 0x010000000000ULL, MappingDesc::APP, "app"}};
+    {0x000000000000ULL, 0x000200000000ULL, MappingDesc::APP, "app-1"},
+    {0x000200000000ULL, 0x002200000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x002200000000ULL, 0x004000000000ULL, MappingDesc::SHADOW, "shadow-2"},
+    {0x004000000000ULL, 0x004200000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x004200000000ULL, 0x006000000000ULL, MappingDesc::ORIGIN, "origin-2"},
+    {0x006000000000ULL, 0x006200000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x006200000000ULL, 0x008000000000ULL, MappingDesc::SHADOW, "shadow-3"},
+    {0x008000000000ULL, 0x008200000000ULL, MappingDesc::SHADOW, "shadow-1"},
+    {0x008200000000ULL, 0x00a000000000ULL, MappingDesc::ORIGIN, "origin-3"},
+    {0x00a000000000ULL, 0x00a200000000ULL, MappingDesc::ORIGIN, "origin-1"},
+    {0x00a200000000ULL, 0x00c000000000ULL, MappingDesc::APP, "app-2"},
+    {0x00c000000000ULL, 0x00e200000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x00e200000000ULL, 0x00ffffffffffULL, MappingDesc::APP, "app-3"}};
 
-#define MEM_TO_SHADOW(mem) (((uptr)(mem)) & ~0x4000000000ULL)
-#define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x002000000000)
+#define MEM_TO_SHADOW(mem) (((uptr)(mem)) ^ 0x8000000000ULL)
+#define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x2000000000ULL)
 
 #elif SANITIZER_LINUX && defined(__aarch64__)
 
@@ -107,7 +119,7 @@
 # define MEM_TO_SHADOW(mem) ((uptr)mem ^ 0x6000000000ULL)
 # define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x1000000000ULL)
 
-#elif SANITIZER_LINUX && defined(__powerpc64__)
+#elif SANITIZER_LINUX && SANITIZER_PPC64
 
 const MappingDesc kMemoryLayout[] = {
     {0x000000000000ULL, 0x000100000000ULL, MappingDesc::APP, "low memory"},
@@ -309,9 +321,21 @@
 
 }  // namespace __msan
 
-#define MSAN_MALLOC_HOOK(ptr, size) \
-  if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(ptr, size)
-#define MSAN_FREE_HOOK(ptr) \
-  if (&__sanitizer_free_hook) __sanitizer_free_hook(ptr)
+#define MSAN_MALLOC_HOOK(ptr, size)       \
+  do {                                    \
+    if (&__sanitizer_malloc_hook) {       \
+      UnpoisonParam(2);                   \
+      __sanitizer_malloc_hook(ptr, size); \
+    }                                     \
+    RunMallocHooks(ptr, size);            \
+  } while (false)
+#define MSAN_FREE_HOOK(ptr)       \
+  do {                            \
+    if (&__sanitizer_free_hook) { \
+      UnpoisonParam(1);           \
+      __sanitizer_free_hook(ptr); \
+    }                             \
+    RunFreeHooks(ptr);            \
+  } while (false)
 
 #endif  // MSAN_H
diff --git a/lib/msan/msan_allocator.cc b/lib/msan/msan_allocator.cc
index b7d3947..fdde4b4 100644
--- a/lib/msan/msan_allocator.cc
+++ b/lib/msan/msan_allocator.cc
@@ -56,7 +56,7 @@
 #else
   static const uptr kAllocatorSpace = 0x600000000000ULL;
 #endif
-  static const uptr kAllocatorSize = 0x80000000000; // 8T.
+  static const uptr kAllocatorSize = 0x40000000000; // 4T.
   static const uptr kMetadataSize  = sizeof(Metadata);
   static const uptr kMaxAllowedMallocSize = 8UL << 30;
 
diff --git a/lib/msan/msan_interceptors.cc b/lib/msan/msan_interceptors.cc
index 65671ee..f23d3ee 100644
--- a/lib/msan/msan_interceptors.cc
+++ b/lib/msan/msan_interceptors.cc
@@ -43,6 +43,9 @@
 using __sanitizer::atomic_store;
 using __sanitizer::atomic_uintptr_t;
 
+DECLARE_REAL(SIZE_T, strlen, const char *s)
+DECLARE_REAL(SIZE_T, strnlen, const char *s, SIZE_T maxlen)
+
 #if SANITIZER_FREEBSD
 #define __errno_location __error
 #endif
@@ -280,23 +283,6 @@
 #define MSAN_MAYBE_INTERCEPT_MALLOC_STATS
 #endif
 
-INTERCEPTOR(SIZE_T, strlen, const char *s) {
-  if (msan_init_is_running)
-    return REAL(strlen)(s);
-  ENSURE_MSAN_INITED();
-  SIZE_T res = REAL(strlen)(s);
-  CHECK_UNPOISONED(s, res + 1);
-  return res;
-}
-
-INTERCEPTOR(SIZE_T, strnlen, const char *s, SIZE_T n) {
-  ENSURE_MSAN_INITED();
-  SIZE_T res = REAL(strnlen)(s, n);
-  SIZE_T scan_size = (res == n) ? res : res + 1;
-  CHECK_UNPOISONED(s, scan_size);
-  return res;
-}
-
 INTERCEPTOR(char *, strcpy, char *dest, const char *src) {  // NOLINT
   ENSURE_MSAN_INITED();
   GET_STORE_STACK_TRACE;
@@ -756,65 +742,6 @@
 #define MSAN_MAYBE_INTERCEPT___FXSTATAT64
 #endif
 
-#if SANITIZER_FREEBSD
-INTERCEPTOR(int, stat, char *path, void *buf) {
-  ENSURE_MSAN_INITED();
-  int res = REAL(stat)(path, buf);
-  if (!res)
-    __msan_unpoison(buf, __sanitizer::struct_stat_sz);
-  return res;
-}
-# define MSAN_INTERCEPT_STAT INTERCEPT_FUNCTION(stat)
-#else
-INTERCEPTOR(int, __xstat, int magic, char *path, void *buf) {
-  ENSURE_MSAN_INITED();
-  int res = REAL(__xstat)(magic, path, buf);
-  if (!res)
-    __msan_unpoison(buf, __sanitizer::struct_stat_sz);
-  return res;
-}
-# define MSAN_INTERCEPT_STAT INTERCEPT_FUNCTION(__xstat)
-#endif
-
-#if !SANITIZER_FREEBSD
-INTERCEPTOR(int, __xstat64, int magic, char *path, void *buf) {
-  ENSURE_MSAN_INITED();
-  int res = REAL(__xstat64)(magic, path, buf);
-  if (!res)
-    __msan_unpoison(buf, __sanitizer::struct_stat64_sz);
-  return res;
-}
-#define MSAN_MAYBE_INTERCEPT___XSTAT64 INTERCEPT_FUNCTION(__xstat64)
-#else
-#define MSAN_MAYBE_INTERCEPT___XSTAT64
-#endif
-
-#if !SANITIZER_FREEBSD
-INTERCEPTOR(int, __lxstat, int magic, char *path, void *buf) {
-  ENSURE_MSAN_INITED();
-  int res = REAL(__lxstat)(magic, path, buf);
-  if (!res)
-    __msan_unpoison(buf, __sanitizer::struct_stat_sz);
-  return res;
-}
-#define MSAN_MAYBE_INTERCEPT___LXSTAT INTERCEPT_FUNCTION(__lxstat)
-#else
-#define MSAN_MAYBE_INTERCEPT___LXSTAT
-#endif
-
-#if !SANITIZER_FREEBSD
-INTERCEPTOR(int, __lxstat64, int magic, char *path, void *buf) {
-  ENSURE_MSAN_INITED();
-  int res = REAL(__lxstat64)(magic, path, buf);
-  if (!res)
-    __msan_unpoison(buf, __sanitizer::struct_stat64_sz);
-  return res;
-}
-#define MSAN_MAYBE_INTERCEPT___LXSTAT64 INTERCEPT_FUNCTION(__lxstat64)
-#else
-#define MSAN_MAYBE_INTERCEPT___LXSTAT64
-#endif
-
 INTERCEPTOR(int, pipe, int pipefd[2]) {
   if (msan_init_is_running)
     return REAL(pipe)(pipefd);
@@ -874,17 +801,42 @@
 
 #if !SANITIZER_FREEBSD
 INTERCEPTOR(int, getrlimit64, int resource, void *rlim) {
-  if (msan_init_is_running)
-    return REAL(getrlimit64)(resource, rlim);
+  if (msan_init_is_running) return REAL(getrlimit64)(resource, rlim);
   ENSURE_MSAN_INITED();
   int res = REAL(getrlimit64)(resource, rlim);
-  if (!res)
-    __msan_unpoison(rlim, __sanitizer::struct_rlimit64_sz);
+  if (!res) __msan_unpoison(rlim, __sanitizer::struct_rlimit64_sz);
   return res;
 }
+
+INTERCEPTOR(int, prlimit, int pid, int resource, void *new_rlimit,
+            void *old_rlimit) {
+  if (msan_init_is_running)
+    return REAL(prlimit)(pid, resource, new_rlimit, old_rlimit);
+  ENSURE_MSAN_INITED();
+  CHECK_UNPOISONED(new_rlimit, __sanitizer::struct_rlimit_sz);
+  int res = REAL(prlimit)(pid, resource, new_rlimit, old_rlimit);
+  if (!res) __msan_unpoison(old_rlimit, __sanitizer::struct_rlimit_sz);
+  return res;
+}
+
+INTERCEPTOR(int, prlimit64, int pid, int resource, void *new_rlimit,
+            void *old_rlimit) {
+  if (msan_init_is_running)
+    return REAL(prlimit64)(pid, resource, new_rlimit, old_rlimit);
+  ENSURE_MSAN_INITED();
+  CHECK_UNPOISONED(new_rlimit, __sanitizer::struct_rlimit64_sz);
+  int res = REAL(prlimit64)(pid, resource, new_rlimit, old_rlimit);
+  if (!res) __msan_unpoison(old_rlimit, __sanitizer::struct_rlimit64_sz);
+  return res;
+}
+
 #define MSAN_MAYBE_INTERCEPT_GETRLIMIT64 INTERCEPT_FUNCTION(getrlimit64)
+#define MSAN_MAYBE_INTERCEPT_PRLIMIT INTERCEPT_FUNCTION(prlimit)
+#define MSAN_MAYBE_INTERCEPT_PRLIMIT64 INTERCEPT_FUNCTION(prlimit64)
 #else
 #define MSAN_MAYBE_INTERCEPT_GETRLIMIT64
+#define MSAN_MAYBE_INTERCEPT_PRLIMIT
+#define MSAN_MAYBE_INTERCEPT_PRLIMIT64
 #endif
 
 #if SANITIZER_FREEBSD
@@ -953,30 +905,6 @@
 #define MSAN_MAYBE_INTERCEPT_EPOLL_PWAIT
 #endif
 
-INTERCEPTOR(SSIZE_T, recv, int fd, void *buf, SIZE_T len, int flags) {
-  ENSURE_MSAN_INITED();
-  SSIZE_T res = REAL(recv)(fd, buf, len, flags);
-  if (res > 0)
-    __msan_unpoison(buf, res);
-  return res;
-}
-
-INTERCEPTOR(SSIZE_T, recvfrom, int fd, void *buf, SIZE_T len, int flags,
-            void *srcaddr, int *addrlen) {
-  ENSURE_MSAN_INITED();
-  SIZE_T srcaddr_sz;
-  if (srcaddr) srcaddr_sz = *addrlen;
-  SSIZE_T res = REAL(recvfrom)(fd, buf, len, flags, srcaddr, addrlen);
-  if (res > 0) {
-    __msan_unpoison(buf, res);
-    if (srcaddr) {
-      SIZE_T sz = *addrlen;
-      __msan_unpoison(srcaddr, Min(sz, srcaddr_sz));
-    }
-  }
-  return res;
-}
-
 INTERCEPTOR(void *, calloc, SIZE_T nmemb, SIZE_T size) {
   GET_MALLOC_STACK_TRACE;
   if (UNLIKELY(!msan_inited)) {
@@ -1065,63 +993,6 @@
 #define MSAN_MAYBE_INTERCEPT_MMAP64
 #endif
 
-struct dlinfo {
-  char *dli_fname;
-  void *dli_fbase;
-  char *dli_sname;
-  void *dli_saddr;
-};
-
-INTERCEPTOR(int, dladdr, void *addr, dlinfo *info) {
-  ENSURE_MSAN_INITED();
-  int res = REAL(dladdr)(addr, info);
-  if (res != 0) {
-    __msan_unpoison(info, sizeof(*info));
-    if (info->dli_fname)
-      __msan_unpoison(info->dli_fname, REAL(strlen)(info->dli_fname) + 1);
-    if (info->dli_sname)
-      __msan_unpoison(info->dli_sname, REAL(strlen)(info->dli_sname) + 1);
-  }
-  return res;
-}
-
-INTERCEPTOR(char *, dlerror, int fake) {
-  ENSURE_MSAN_INITED();
-  char *res = REAL(dlerror)(fake);
-  if (res) __msan_unpoison(res, REAL(strlen)(res) + 1);
-  return res;
-}
-
-typedef int (*dl_iterate_phdr_cb)(__sanitizer_dl_phdr_info *info, SIZE_T size,
-                                  void *data);
-struct dl_iterate_phdr_data {
-  dl_iterate_phdr_cb callback;
-  void *data;
-};
-
-static int msan_dl_iterate_phdr_cb(__sanitizer_dl_phdr_info *info, SIZE_T size,
-                                   void *data) {
-  if (info) {
-    __msan_unpoison(info, size);
-    if (info->dlpi_phdr && info->dlpi_phnum)
-      __msan_unpoison(info->dlpi_phdr, struct_ElfW_Phdr_sz * info->dlpi_phnum);
-    if (info->dlpi_name)
-      __msan_unpoison(info->dlpi_name, REAL(strlen)(info->dlpi_name) + 1);
-  }
-  dl_iterate_phdr_data *cbdata = (dl_iterate_phdr_data *)data;
-  UnpoisonParam(3);
-  return cbdata->callback(info, size, cbdata->data);
-}
-
-INTERCEPTOR(int, dl_iterate_phdr, dl_iterate_phdr_cb callback, void *data) {
-  ENSURE_MSAN_INITED();
-  dl_iterate_phdr_data cbdata;
-  cbdata.callback = callback;
-  cbdata.data = data;
-  int res = REAL(dl_iterate_phdr)(msan_dl_iterate_phdr_cb, (void *)&cbdata);
-  return res;
-}
-
 INTERCEPTOR(int, getrusage, int who, void *usage) {
   ENSURE_MSAN_INITED();
   int res = REAL(getrusage)(who, usage);
@@ -1397,7 +1268,16 @@
       VReport(1, "MemorySanitizer: failed to intercept '" #name "'\n"); \
   } while (0)
 
+#define MSAN_INTERCEPT_FUNC_VER(name, ver)                                    \
+  do {                                                                        \
+    if ((!INTERCEPT_FUNCTION_VER(name, ver) || !REAL(name)))                  \
+      VReport(                                                                \
+          1, "MemorySanitizer: failed to intercept '" #name "@@" #ver "'\n"); \
+  } while (0)
+
 #define COMMON_INTERCEPT_FUNCTION(name) MSAN_INTERCEPT_FUNC(name)
+#define COMMON_INTERCEPT_FUNCTION_VER(name, ver)                          \
+  MSAN_INTERCEPT_FUNC_VER(name, ver)
 #define COMMON_INTERCEPTOR_UNPOISON_PARAM(count)  \
   UnpoisonParam(count)
 #define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \
@@ -1408,12 +1288,12 @@
   __msan_unpoison(ptr, size)
 #define COMMON_INTERCEPTOR_ENTER(ctx, func, ...)                  \
   if (msan_init_is_running) return REAL(func)(__VA_ARGS__);       \
+  ENSURE_MSAN_INITED();                                           \
   MSanInterceptorContext msan_ctx = {IsInInterceptorScope()};     \
   ctx = (void *)&msan_ctx;                                        \
   (void)ctx;                                                      \
   InterceptorScope interceptor_scope;                             \
-  __msan_unpoison(__errno_location(), sizeof(int)); /* NOLINT */  \
-  ENSURE_MSAN_INITED();
+  __msan_unpoison(__errno_location(), sizeof(int)); /* NOLINT */
 #define COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path) \
   do {                                            \
   } while (false)
@@ -1449,6 +1329,11 @@
     *begin = *end = 0;                                                         \
   }
 
+#include "sanitizer_common/sanitizer_platform_interceptors.h"
+// Msan needs custom handling of these:
+#undef SANITIZER_INTERCEPT_MEMSET
+#undef SANITIZER_INTERCEPT_MEMMOVE
+#undef SANITIZER_INTERCEPT_MEMCPY
 #include "sanitizer_common/sanitizer_common_interceptors.inc"
 
 #define COMMON_SYSCALL_PRE_READ_RANGE(p, s) CHECK_UNPOISONED(p, s)
@@ -1461,6 +1346,66 @@
 #define COMMON_SYSCALL_POST_WRITE_RANGE(p, s) __msan_unpoison(p, s)
 #include "sanitizer_common/sanitizer_common_syscalls.inc"
 
+struct dlinfo {
+  char *dli_fname;
+  void *dli_fbase;
+  char *dli_sname;
+  void *dli_saddr;
+};
+
+INTERCEPTOR(int, dladdr, void *addr, dlinfo *info) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, dladdr, addr, info);
+  int res = REAL(dladdr)(addr, info);
+  if (res != 0) {
+    __msan_unpoison(info, sizeof(*info));
+    if (info->dli_fname)
+      __msan_unpoison(info->dli_fname, REAL(strlen)(info->dli_fname) + 1);
+    if (info->dli_sname)
+      __msan_unpoison(info->dli_sname, REAL(strlen)(info->dli_sname) + 1);
+  }
+  return res;
+}
+
+INTERCEPTOR(char *, dlerror, int fake) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, dlerror, fake);
+  char *res = REAL(dlerror)(fake);
+  if (res) __msan_unpoison(res, REAL(strlen)(res) + 1);
+  return res;
+}
+
+typedef int (*dl_iterate_phdr_cb)(__sanitizer_dl_phdr_info *info, SIZE_T size,
+                                  void *data);
+struct dl_iterate_phdr_data {
+  dl_iterate_phdr_cb callback;
+  void *data;
+};
+
+static int msan_dl_iterate_phdr_cb(__sanitizer_dl_phdr_info *info, SIZE_T size,
+                                   void *data) {
+  if (info) {
+    __msan_unpoison(info, size);
+    if (info->dlpi_phdr && info->dlpi_phnum)
+      __msan_unpoison(info->dlpi_phdr, struct_ElfW_Phdr_sz * info->dlpi_phnum);
+    if (info->dlpi_name)
+      __msan_unpoison(info->dlpi_name, REAL(strlen)(info->dlpi_name) + 1);
+  }
+  dl_iterate_phdr_data *cbdata = (dl_iterate_phdr_data *)data;
+  UnpoisonParam(3);
+  return cbdata->callback(info, size, cbdata->data);
+}
+
+INTERCEPTOR(int, dl_iterate_phdr, dl_iterate_phdr_cb callback, void *data) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, dl_iterate_phdr, callback, data);
+  dl_iterate_phdr_data cbdata;
+  cbdata.callback = callback;
+  cbdata.data = data;
+  int res = REAL(dl_iterate_phdr)(msan_dl_iterate_phdr_cb, (void *)&cbdata);
+  return res;
+}
+
 // These interface functions reside here so that they can use
 // REAL(memset), etc.
 void __msan_unpoison(const void *a, uptr size) {
@@ -1561,8 +1506,6 @@
   INTERCEPT_FUNCTION(strndup);
   MSAN_MAYBE_INTERCEPT___STRNDUP;
   INTERCEPT_FUNCTION(strncpy);  // NOLINT
-  INTERCEPT_FUNCTION(strlen);
-  INTERCEPT_FUNCTION(strnlen);
   INTERCEPT_FUNCTION(gcvt);
   INTERCEPT_FUNCTION(strcat);  // NOLINT
   INTERCEPT_FUNCTION(strncat);  // NOLINT
@@ -1580,8 +1523,13 @@
   INTERCEPT_STRTO(wcstoul);
   INTERCEPT_STRTO(wcstoll);
   INTERCEPT_STRTO(wcstoull);
+#ifdef SANITIZER_NLDBL_VERSION
+  INTERCEPT_FUNCTION_VER(vswprintf, SANITIZER_NLDBL_VERSION);
+  INTERCEPT_FUNCTION_VER(swprintf, SANITIZER_NLDBL_VERSION);
+#else
   INTERCEPT_FUNCTION(vswprintf);
   INTERCEPT_FUNCTION(swprintf);
+#endif
   INTERCEPT_FUNCTION(strxfrm);
   INTERCEPT_FUNCTION(strxfrm_l);
   INTERCEPT_FUNCTION(strftime);
@@ -1603,12 +1551,8 @@
   INTERCEPT_FUNCTION(fcvt);
   MSAN_MAYBE_INTERCEPT___FXSTAT;
   MSAN_INTERCEPT_FSTATAT;
-  MSAN_INTERCEPT_STAT;
-  MSAN_MAYBE_INTERCEPT___LXSTAT;
   MSAN_MAYBE_INTERCEPT___FXSTAT64;
   MSAN_MAYBE_INTERCEPT___FXSTATAT64;
-  MSAN_MAYBE_INTERCEPT___XSTAT64;
-  MSAN_MAYBE_INTERCEPT___LXSTAT64;
   INTERCEPT_FUNCTION(pipe);
   INTERCEPT_FUNCTION(pipe2);
   INTERCEPT_FUNCTION(socketpair);
@@ -1616,19 +1560,23 @@
   MSAN_MAYBE_INTERCEPT_FGETS_UNLOCKED;
   INTERCEPT_FUNCTION(getrlimit);
   MSAN_MAYBE_INTERCEPT_GETRLIMIT64;
+  MSAN_MAYBE_INTERCEPT_PRLIMIT;
+  MSAN_MAYBE_INTERCEPT_PRLIMIT64;
   MSAN_INTERCEPT_UNAME;
   INTERCEPT_FUNCTION(gethostname);
   MSAN_MAYBE_INTERCEPT_EPOLL_WAIT;
   MSAN_MAYBE_INTERCEPT_EPOLL_PWAIT;
-  INTERCEPT_FUNCTION(recv);
-  INTERCEPT_FUNCTION(recvfrom);
   INTERCEPT_FUNCTION(dladdr);
   INTERCEPT_FUNCTION(dlerror);
   INTERCEPT_FUNCTION(dl_iterate_phdr);
   INTERCEPT_FUNCTION(getrusage);
   INTERCEPT_FUNCTION(sigaction);
   INTERCEPT_FUNCTION(signal);
+#if defined(__mips__)
+  INTERCEPT_FUNCTION_VER(pthread_create, "GLIBC_2.2");
+#else
   INTERCEPT_FUNCTION(pthread_create);
+#endif
   INTERCEPT_FUNCTION(pthread_key_create);
   INTERCEPT_FUNCTION(pthread_join);
   INTERCEPT_FUNCTION(tzset);
diff --git a/lib/msan/msan_linux.cc b/lib/msan/msan_linux.cc
index ab3be91..d6a9588 100644
--- a/lib/msan/msan_linux.cc
+++ b/lib/msan/msan_linux.cc
@@ -55,14 +55,14 @@
 
 static bool ProtectMemoryRange(uptr beg, uptr size, const char *name) {
   if (size > 0) {
-    void *addr = MmapNoAccess(beg, size, name);
+    void *addr = MmapFixedNoAccess(beg, size, name);
     if (beg == 0 && addr) {
       // Depending on the kernel configuration, we may not be able to protect
       // the page at address zero.
       uptr gap = 16 * GetPageSizeCached();
       beg += gap;
       size -= gap;
-      addr = MmapNoAccess(beg, size, name);
+      addr = MmapFixedNoAccess(beg, size, name);
     }
     if ((uptr)addr != beg) {
       uptr end = beg + size - 1;
diff --git a/lib/msan/msan_report.cc b/lib/msan/msan_report.cc
index ddb8070..9a35c9c 100644
--- a/lib/msan/msan_report.cc
+++ b/lib/msan/msan_report.cc
@@ -221,11 +221,7 @@
     } else {
       unsigned char v = *(unsigned char *)s;
       if (v) last_quad_poisoned = true;
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-      Printf("%x%x", v & 0xf, v >> 4);
-#else
       Printf("%x%x", v >> 4, v & 0xf);
-#endif
     }
     // Group end.
     if (pos % 4 == 3 && with_origins) {
diff --git a/lib/msan/tests/msan_test.cc b/lib/msan/tests/msan_test.cc
index b7162b3..e4076b5 100644
--- a/lib/msan/tests/msan_test.cc
+++ b/lib/msan/tests/msan_test.cc
@@ -115,7 +115,10 @@
 # define SUPERUSER_GROUP "root"
 #endif
 
-const size_t kPageSize = 4096;
+static uintptr_t GetPageSize() {
+  return sysconf(_SC_PAGESIZE);
+}
+
 const size_t kMaxPathLength = 4096;
 
 typedef unsigned char      U1;
@@ -1117,8 +1120,8 @@
   struct hostent he;
   struct hostent *result;
   int err;
-  int res = gethostbyname_r("localhost", &he, buf, sizeof(buf), &result, &err);
-  ASSERT_EQ(ERANGE, res);
+  gethostbyname_r("localhost", &he, buf, sizeof(buf), &result, &err);
+  ASSERT_EQ(ERANGE, errno);
   EXPECT_NOT_POISONED(err);
 }
 
@@ -1214,17 +1217,21 @@
 }
 
 TEST(MemorySanitizer, shmat) {
-  void *p = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
-                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-  ASSERT_NE(MAP_FAILED, p);
+  const int kShmSize = 4096;
+  void *mapping_start = mmap(NULL, kShmSize + SHMLBA, PROT_READ | PROT_WRITE,
+                             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  ASSERT_NE(MAP_FAILED, mapping_start);
+
+  void *p = (void *)(((unsigned long)mapping_start + SHMLBA - 1) / SHMLBA * SHMLBA);
+  // p is now SHMLBA-aligned;
 
   ((char *)p)[10] = *GetPoisoned<U1>();
-  ((char *)p)[4095] = *GetPoisoned<U1>();
+  ((char *)p)[kShmSize - 1] = *GetPoisoned<U1>();
 
-  int res = munmap(p, 4096);
+  int res = munmap(mapping_start, kShmSize + SHMLBA);
   ASSERT_EQ(0, res);
 
-  int id = shmget(IPC_PRIVATE, 4096, 0644 | IPC_CREAT);
+  int id = shmget(IPC_PRIVATE, kShmSize, 0644 | IPC_CREAT);
   ASSERT_GT(id, -1);
 
   void *q = shmat(id, p, 0);
@@ -1232,7 +1239,7 @@
 
   EXPECT_NOT_POISONED(((char *)q)[0]);
   EXPECT_NOT_POISONED(((char *)q)[10]);
-  EXPECT_NOT_POISONED(((char *)q)[4095]);
+  EXPECT_NOT_POISONED(((char *)q)[kShmSize - 1]);
 
   res = shmdt(q);
   ASSERT_EQ(0, res);
@@ -2389,13 +2396,19 @@
 
 TEST(MemorySanitizer, ptrtoint) {
   // Test that shadow is propagated through pointer-to-integer conversion.
-  void* p = (void*)0xABCD;
-  __msan_poison(((char*)&p) + 1, sizeof(p));
-  EXPECT_NOT_POISONED((((uintptr_t)p) & 0xFF) == 0);
+  unsigned char c = 0;
+  __msan_poison(&c, 1);
+  uintptr_t u = (uintptr_t)c << 8;
+  EXPECT_NOT_POISONED(u & 0xFF00FF);
+  EXPECT_POISONED(u & 0xFF00);
 
-  void* q = (void*)0xABCD;
-  __msan_poison(&q, sizeof(q) - 1);
-  EXPECT_POISONED((((uintptr_t)q) & 0xFF) == 0);
+  break_optimization(&u);
+  void* p = (void*)u;
+
+  break_optimization(&p);
+  EXPECT_POISONED(p);
+  EXPECT_NOT_POISONED(((uintptr_t)p) & 0xFF00FF);
+  EXPECT_POISONED(((uintptr_t)p) & 0xFF00);
 }
 
 static void vaargsfn2(int guard, ...) {
@@ -2449,6 +2462,20 @@
   vaargsfn_many(1, 2, *x, 3, 4, 5, 6, 7, 8, 9, *y);
 }
 
+static void vaargsfn_manyfix(int g1, int g2, int g3, int g4, int g5, int g6, int g7, int g8, int g9, ...) {
+  va_list vl;
+  va_start(vl, g9);
+  EXPECT_NOT_POISONED(va_arg(vl, int));
+  EXPECT_POISONED(va_arg(vl, int));
+  va_end(vl);
+}
+
+TEST(MemorySanitizer, VAArgManyFixTest) {
+  int* x = GetPoisoned<int>();
+  int* y = GetPoisoned<int>();
+  vaargsfn_manyfix(1, *x, 3, 4, 5, 6, 7, 8, 9, 10, *y);
+}
+
 static void vaargsfn_pass2(va_list vl) {
   EXPECT_NOT_POISONED(va_arg(vl, int));
   EXPECT_NOT_POISONED(va_arg(vl, int));
@@ -2805,6 +2832,22 @@
   ASSERT_EQ(result, 0);
   EXPECT_NOT_POISONED(limit.rlim_cur);
   EXPECT_NOT_POISONED(limit.rlim_max);
+
+  struct rlimit limit2;
+  __msan_poison(&limit2, sizeof(limit2));
+  result = prlimit(getpid(), RLIMIT_DATA, &limit, &limit2);
+  ASSERT_EQ(result, 0);
+  EXPECT_NOT_POISONED(limit2.rlim_cur);
+  EXPECT_NOT_POISONED(limit2.rlim_max);
+
+  __msan_poison(&limit, sizeof(limit));
+  result = prlimit(getpid(), RLIMIT_DATA, nullptr, &limit);
+  ASSERT_EQ(result, 0);
+  EXPECT_NOT_POISONED(limit.rlim_cur);
+  EXPECT_NOT_POISONED(limit.rlim_max);
+
+  result = prlimit(getpid(), RLIMIT_DATA, &limit, nullptr);
+  ASSERT_EQ(result, 0);
 }
 
 TEST(MemorySanitizer, getrusage) {
@@ -2888,6 +2931,10 @@
   static const char basename[] = "libmsan_loadable.mips64el.so";
 #elif defined(__aarch64__)
   static const char basename[] = "libmsan_loadable.aarch64.so";
+#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  static const char basename[] = "libmsan_loadable.powerpc64.so";
+#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  static const char basename[] = "libmsan_loadable.powerpc64le.so";
 #endif
   int res = snprintf(buf, sz, "%.*s/%s",
                      (int)dir_len, program_path, basename);
@@ -3195,28 +3242,30 @@
 #if !defined(__FreeBSD__)
 TEST(MemorySanitizer, memalign) {
   void *p = memalign(4096, 13);
-  EXPECT_EQ(0U, (uintptr_t)p % kPageSize);
+  EXPECT_EQ(0U, (uintptr_t)p % 4096);
   free(p);
 }
 #endif
 
 TEST(MemorySanitizer, valloc) {
   void *a = valloc(100);
-  EXPECT_EQ(0U, (uintptr_t)a % kPageSize);
+  uintptr_t PageSize = GetPageSize();
+  EXPECT_EQ(0U, (uintptr_t)a % PageSize);
   free(a);
 }
 
 // There's no pvalloc() on FreeBSD.
 #if !defined(__FreeBSD__)
 TEST(MemorySanitizer, pvalloc) {
-  void *p = pvalloc(kPageSize + 100);
-  EXPECT_EQ(0U, (uintptr_t)p % kPageSize);
-  EXPECT_EQ(2 * kPageSize, __sanitizer_get_allocated_size(p));
+  uintptr_t PageSize = GetPageSize();
+  void *p = pvalloc(PageSize + 100);
+  EXPECT_EQ(0U, (uintptr_t)p % PageSize);
+  EXPECT_EQ(2 * PageSize, __sanitizer_get_allocated_size(p));
   free(p);
 
   p = pvalloc(0);  // pvalloc(0) should allocate at least one page.
-  EXPECT_EQ(0U, (uintptr_t)p % kPageSize);
-  EXPECT_EQ(kPageSize, __sanitizer_get_allocated_size(p));
+  EXPECT_EQ(0U, (uintptr_t)p % PageSize);
+  EXPECT_EQ(PageSize, __sanitizer_get_allocated_size(p));
   free(p);
 }
 #endif
@@ -3920,7 +3969,48 @@
 
   EXPECT_EQ((unsigned)(2 * 102 + 3 * 103), c[1]);
 }
-#endif  // defined(__clang__)
+
+TEST(VectorCmpTest, mm_cmpneq_ps) {
+  V4x32 c;
+  c = _mm_cmpneq_ps(V4x32{Poisoned<U4>(), 1, 2, 3}, V4x32{4, 5, Poisoned<U4>(), 6});
+  EXPECT_POISONED(c[0]);
+  EXPECT_NOT_POISONED(c[1]);
+  EXPECT_POISONED(c[2]);
+  EXPECT_NOT_POISONED(c[3]);
+
+  c = _mm_cmpneq_ps(V4x32{0, 1, 2, 3}, V4x32{4, 5, 6, 7});
+  EXPECT_NOT_POISONED(c);
+}
+
+TEST(VectorCmpTest, mm_cmpneq_sd) {
+  V2x64 c;
+  c = _mm_cmpneq_sd(V2x64{Poisoned<U8>(), 1}, V2x64{2, 3});
+  EXPECT_POISONED(c[0]);
+  c = _mm_cmpneq_sd(V2x64{1, 2}, V2x64{Poisoned<U8>(), 3});
+  EXPECT_POISONED(c[0]);
+  c = _mm_cmpneq_sd(V2x64{1, 2}, V2x64{3, 4});
+  EXPECT_NOT_POISONED(c[0]);
+  c = _mm_cmpneq_sd(V2x64{1, Poisoned<U8>()}, V2x64{2, Poisoned<U8>()});
+  EXPECT_NOT_POISONED(c[0]);
+  c = _mm_cmpneq_sd(V2x64{1, Poisoned<U8>()}, V2x64{1, Poisoned<U8>()});
+  EXPECT_NOT_POISONED(c[0]);
+}
+
+TEST(VectorCmpTest, builtin_ia32_ucomisdlt) {
+  U4 c;
+  c = __builtin_ia32_ucomisdlt(V2x64{Poisoned<U8>(), 1}, V2x64{2, 3});
+  EXPECT_POISONED(c);
+  c = __builtin_ia32_ucomisdlt(V2x64{1, 2}, V2x64{Poisoned<U8>(), 3});
+  EXPECT_POISONED(c);
+  c = __builtin_ia32_ucomisdlt(V2x64{1, 2}, V2x64{3, 4});
+  EXPECT_NOT_POISONED(c);
+  c = __builtin_ia32_ucomisdlt(V2x64{1, Poisoned<U8>()}, V2x64{2, Poisoned<U8>()});
+  EXPECT_NOT_POISONED(c);
+  c = __builtin_ia32_ucomisdlt(V2x64{1, Poisoned<U8>()}, V2x64{1, Poisoned<U8>()});
+  EXPECT_NOT_POISONED(c);
+}
+
+#endif // defined(__x86_64__) && defined(__clang__)
 
 TEST(MemorySanitizerOrigins, SetGet) {
   EXPECT_EQ(TrackingOrigins(), !!__msan_get_track_origins());
@@ -4173,7 +4263,7 @@
   U4 origin = __LINE__;
   __msan_set_origin(&x, sizeof(x), origin);
   __msan_poison(&x, sizeof(x));
-  __builtin_ia32_storeups((float*)&y, x);
+  _mm_storeu_ps((float*)&y, x);
   EXPECT_POISONED_O(y, origin);
 }
 #endif
diff --git a/lib/profile/CMakeLists.txt b/lib/profile/CMakeLists.txt
index 17eb48a..ccf79d7 100644
--- a/lib/profile/CMakeLists.txt
+++ b/lib/profile/CMakeLists.txt
@@ -22,7 +22,24 @@
       }
 " COMPILER_RT_TARGET_HAS_ATOMICS)
 
+CHECK_CXX_SOURCE_COMPILES("
+#if defined(__linux__)
+#include <unistd.h>
+#endif
+#include <fcntl.h>
+int fd;
+int main() {
+ struct flock s_flock;
+
+ s_flock.l_type = F_WRLCK;
+ fcntl(fd, F_SETLKW, &s_flock);
+ return 0;
+}
+
+" COMPILER_RT_TARGET_HAS_FCNTL_LCK)
+
 add_custom_target(profile)
+set_target_properties(profile PROPERTIES FOLDER "Compiler-RT Misc")
 
 set(PROFILE_SOURCES
   GCDAProfiling.c
@@ -30,6 +47,8 @@
   InstrProfilingValue.c
   InstrProfilingBuffer.c
   InstrProfilingFile.c
+  InstrProfilingMerge.c
+  InstrProfilingMergeFile.c
   InstrProfilingWriter.c
   InstrProfilingPlatformDarwin.c
   InstrProfilingPlatformLinux.c
@@ -53,11 +72,23 @@
      -DCOMPILER_RT_HAS_ATOMICS=1)
 endif() 
 
+if(COMPILER_RT_TARGET_HAS_FCNTL_LCK)
+ set(EXTRA_FLAGS
+     ${EXTRA_FLAGS}
+     -DCOMPILER_RT_HAS_FCNTL_LCK=1)
+endif()
+
+# This appears to be a C-only warning banning the use of locals in aggregate
+# initializers. All other compilers accept this, though.
+# nonstandard extension used : 'identifier' : cannot be initialized using address of automatic variable
+append_list_if(COMPILER_RT_HAS_WD4221_FLAG /wd4221 EXTRA_FLAGS)
+
 if(APPLE)
   add_compiler_rt_runtime(clang_rt.profile
     STATIC
     OS ${PROFILE_SUPPORTED_OS}
     ARCHS ${PROFILE_SUPPORTED_ARCH}
+    CFLAGS ${EXTRA_FLAGS}
     SOURCES ${PROFILE_SOURCES}
     PARENT_TARGET profile)
 else()
diff --git a/lib/profile/GCDAProfiling.c b/lib/profile/GCDAProfiling.c
index 84fc7ef..138af6e 100644
--- a/lib/profile/GCDAProfiling.c
+++ b/lib/profile/GCDAProfiling.c
@@ -20,6 +20,7 @@
 |*
 \*===----------------------------------------------------------------------===*/
 
+#include "InstrProfilingPort.h"
 #include "InstrProfilingUtil.h"
 
 #include <errno.h>
@@ -33,6 +34,9 @@
 #else
 #include <sys/mman.h>
 #include <sys/file.h>
+#ifndef MAP_FILE
+#define MAP_FILE 0
+#endif
 #endif
 
 #if defined(__FreeBSD__) && defined(__i386__)
@@ -170,44 +174,16 @@
 
 static char *mangle_filename(const char *orig_filename) {
   char *new_filename;
-  size_t filename_len, prefix_len;
+  size_t prefix_len;
   int prefix_strip;
-  int level = 0;
-  const char *fname, *ptr;
-  const char *prefix = getenv("GCOV_PREFIX");
-  const char *prefix_strip_str = getenv("GCOV_PREFIX_STRIP");
+  const char *prefix = lprofGetPathPrefix(&prefix_strip, &prefix_len);
 
-  if (prefix == NULL || prefix[0] == '\0')
+  if (prefix == NULL)
     return strdup(orig_filename);
 
-  if (prefix_strip_str) {
-    prefix_strip = atoi(prefix_strip_str);
-
-    /* Negative GCOV_PREFIX_STRIP values are ignored */
-    if (prefix_strip < 0)
-      prefix_strip = 0;
-  } else {
-    prefix_strip = 0;
-  }
-
-  fname = orig_filename;
-  for (level = 0, ptr = fname + 1; level < prefix_strip; ++ptr) {
-    if (*ptr == '\0')
-      break;
-    if (*ptr != '/')
-      continue;
-    fname = ptr;
-    ++level;
-  }
-
-  filename_len = strlen(fname);
-  prefix_len = strlen(prefix);
-  new_filename = malloc(prefix_len + 1 + filename_len + 1);
-  memcpy(new_filename, prefix, prefix_len);
-
-  if (prefix[prefix_len - 1] != '/')
-    new_filename[prefix_len++] = '/';
-  memcpy(new_filename + prefix_len, fname, filename_len + 1);
+  new_filename = malloc(prefix_len + 1 + strlen(orig_filename) + 1);
+  lprofApplyPathPrefix(new_filename, orig_filename, prefix, prefix_len,
+                       prefix_strip);
 
   return new_filename;
 }
@@ -511,7 +487,7 @@
   }
 }
 
-void llvm_writeout_files() {
+void llvm_writeout_files(void) {
   struct writeout_fn_node *curr = writeout_fn_head;
 
   while (curr) {
@@ -520,7 +496,7 @@
   }
 }
 
-void llvm_delete_writeout_function_list() {
+void llvm_delete_writeout_function_list(void) {
   while (writeout_fn_head) {
     struct writeout_fn_node *node = writeout_fn_head;
     writeout_fn_head = writeout_fn_head->next;
@@ -552,7 +528,7 @@
   }
 }
 
-void llvm_delete_flush_function_list() {
+void llvm_delete_flush_function_list(void) {
   while (flush_fn_head) {
     struct flush_fn_node *node = flush_fn_head;
     flush_fn_head = flush_fn_head->next;
diff --git a/lib/profile/InstrProfData.inc b/lib/profile/InstrProfData.inc
index 4b8eead..f7c22d1 100644
--- a/lib/profile/InstrProfData.inc
+++ b/lib/profile/InstrProfData.inc
@@ -57,6 +57,12 @@
  *
 \*===----------------------------------------------------------------------===*/
 
+/* Functions marked with INSTR_PROF_VISIBILITY must have hidden visibility in
+ * the compiler runtime. */
+#ifndef INSTR_PROF_VISIBILITY
+#define INSTR_PROF_VISIBILITY
+#endif
+
 /* INSTR_PROF_DATA start. */
 /* Definition of member fields of the per-function control structure. */
 #ifndef INSTR_PROF_DATA
@@ -66,17 +72,21 @@
 #endif
 INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \
                 ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \
-		IndexedInstrProf::ComputeHash(getPGOFuncNameVarInitializer(Inc->getName()))))
+                IndexedInstrProf::ComputeHash(getPGOFuncNameVarInitializer(Inc->getName()))))
 INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
                 ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \
                 Inc->getHash()->getZExtValue()))
 INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \
                 ConstantExpr::getBitCast(CounterPtr, \
                 llvm::Type::getInt64PtrTy(Ctx)))
+/* This is used to map function pointers for the indirect call targets to
+ * function name hashes during the conversion from raw to merged profile
+ * data.
+ */
 INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), FunctionPointer, \
                 FunctionAddr)
 INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \
-                ConstantPointerNull::get(Int8PtrTy))
+                ValuesPtrExpr)
 INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \
                 ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters))
 INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \
@@ -84,6 +94,33 @@
 #undef INSTR_PROF_DATA
 /* INSTR_PROF_DATA end. */
 
+
+/* This is an internal data structure used by value profiler. It
+ * is defined here to allow serialization code sharing by LLVM
+ * to be used in unit test.
+ *
+ * typedef struct ValueProfNode {
+ *   // InstrProfValueData VData;
+ *   uint64_t Value;
+ *   uint64_t Count;
+ *   struct ValueProfNode *Next;
+ * } ValueProfNode;
+ */
+/* INSTR_PROF_VALUE_NODE start. */
+#ifndef INSTR_PROF_VALUE_NODE
+#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+INSTR_PROF_VALUE_NODE(uint64_t, llvm::Type::getInt64Ty(Ctx), Value, \
+                      ConstantInt::get(llvm::Type::GetInt64Ty(Ctx), 0))
+INSTR_PROF_VALUE_NODE(uint64_t, llvm::Type::getInt64Ty(Ctx), Count, \
+                      ConstantInt::get(llvm::Type::GetInt64Ty(Ctx), 0))
+INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \
+                      ConstantInt::get(llvm::Type::GetInt8PtrTy(Ctx), 0))
+#undef INSTR_PROF_VALUE_NODE
+/* INSTR_PROF_VALUE_NODE end. */
+
 /* INSTR_PROF_RAW_HEADER  start */
 /* Definition of member fields of the raw profile header data structure. */
 #ifndef INSTR_PROF_RAW_HEADER
@@ -99,8 +136,6 @@
 INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
 INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
 INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
-INSTR_PROF_RAW_HEADER(uint64_t, ValueDataSize, ValueDataSize)
-INSTR_PROF_RAW_HEADER(uint64_t, ValueDataDelta, (uintptr_t)ValueDataBegin)
 #undef INSTR_PROF_RAW_HEADER
 /* INSTR_PROF_RAW_HEADER  end */
 
@@ -169,7 +204,7 @@
 #else
 COVMAP_FUNC_RECORD(const int64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \
                    llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \
-	           llvm::IndexedInstrProf::ComputeHash(NameValue)))
+                   llvm::IndexedInstrProf::ComputeHash(NameValue)))
 #endif
 COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), DataSize, \
                    llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\
@@ -293,16 +328,15 @@
   static std::unique_ptr<ValueProfData>
   serializeFrom(const InstrProfRecord &Record);
   /*!
-   * Check the integrity of the record. Return the error code when
-   * an error is detected, otherwise return instrprof_error::success.
+   * Check the integrity of the record.
    */
-  instrprof_error checkIntegrity();
+  Error checkIntegrity();
   /*!
    * Return a pointer to \c ValueProfileData instance ready to be read.
    * All data in the instance are properly byte swapped. The input
    * data is assumed to be in little endian order.
    */
-  static ErrorOr<std::unique_ptr<ValueProfData>>
+  static Expected<std::unique_ptr<ValueProfData>>
   getValueProfData(const unsigned char *SrcBuffer,
                    const unsigned char *const SrcBufferEnd,
                    support::endianness SrcDataEndianness);
@@ -359,42 +393,14 @@
   ValueProfData *(*AllocValueProfData)(size_t TotalSizeInBytes);
 } ValueProfRecordClosure;
 
-/*
- * A wrapper struct that represents value profile runtime data.
- * Like InstrProfRecord class which is used by profiling host tools,
- * ValueProfRuntimeRecord also implements the abstract intefaces defined in
- * ValueProfRecordClosure so that the runtime data can be serialized using
- * shared C implementation. In this structure, NumValueSites and Nodes
- * members are the primary fields while other fields hold the derived
- * information for fast implementation of closure interfaces.
- */
-typedef struct ValueProfRuntimeRecord {
-  /* Number of sites for each value profile kind.  */
-  const uint16_t *NumValueSites;
-  /* An array of linked-list headers. The size of of the array is the
-   * total number of value profile sites : sum(NumValueSites[*])). Each
-   * linked-list stores the values profiled for a value profile site. */
-  ValueProfNode **Nodes;
-
-  /* Total number of value profile kinds which have at least one
-   *  value profile sites. */
-  uint32_t NumValueKinds;
-  /* An array recording the number of values tracked at each site.
-   * The size of the array is TotalNumValueSites. */
-  uint8_t *SiteCountArray[IPVK_Last + 1];
-  ValueProfNode **NodesKind[IPVK_Last + 1];
-} ValueProfRuntimeRecord;
-
-/* Forward declarations of C interfaces.  */
-int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord,
-                                     const uint16_t *NumValueSites,
-                                     ValueProfNode **Nodes);
-void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord);
-uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record);
-ValueProfData *
-serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
-                             ValueProfData *Dst);
-uint32_t getNumValueKindsRT(const void *R);
+INSTR_PROF_VISIBILITY ValueProfRecord *
+getFirstValueProfRecord(ValueProfData *VPD);
+INSTR_PROF_VISIBILITY ValueProfRecord *
+getValueProfRecordNext(ValueProfRecord *VPR);
+INSTR_PROF_VISIBILITY InstrProfValueData *
+getValueProfRecordValueData(ValueProfRecord *VPR);
+INSTR_PROF_VISIBILITY uint32_t
+getValueProfRecordHeaderSize(uint32_t NumValueSites);
 
 #undef INSTR_PROF_VALUE_PROF_DATA
 #endif  /* INSTR_PROF_VALUE_PROF_DATA */
@@ -404,8 +410,10 @@
 #define INSTR_PROF_DATA_DEFINED
 #ifdef __cplusplus
 #define INSTR_PROF_INLINE inline
+#define INSTR_PROF_NULLPTR nullptr
 #else
 #define INSTR_PROF_INLINE
+#define INSTR_PROF_NULLPTR NULL
 #endif
 
 #ifndef offsetof
@@ -416,7 +424,7 @@
  * \brief Return the \c ValueProfRecord header size including the
  * padding bytes.
  */
-INSTR_PROF_INLINE
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
 uint32_t getValueProfRecordHeaderSize(uint32_t NumValueSites) {
   uint32_t Size = offsetof(ValueProfRecord, SiteCountArray) +
                   sizeof(uint8_t) * NumValueSites;
@@ -429,7 +437,7 @@
  * \brief Return the total size of the value profile record including the
  * header and the value data.
  */
-INSTR_PROF_INLINE
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
 uint32_t getValueProfRecordSize(uint32_t NumValueSites,
                                 uint32_t NumValueData) {
   return getValueProfRecordHeaderSize(NumValueSites) +
@@ -439,7 +447,7 @@
 /*!
  * \brief Return the pointer to the start of value data array.
  */
-INSTR_PROF_INLINE
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
 InstrProfValueData *getValueProfRecordValueData(ValueProfRecord *This) {
   return (InstrProfValueData *)((char *)This + getValueProfRecordHeaderSize(
                                                    This->NumValueSites));
@@ -448,7 +456,7 @@
 /*!
  * \brief Return the total number of value data for \c This record.
  */
-INSTR_PROF_INLINE
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
 uint32_t getValueProfRecordNumValueData(ValueProfRecord *This) {
   uint32_t NumValueData = 0;
   uint32_t I;
@@ -460,7 +468,7 @@
 /*!
  * \brief Use this method to advance to the next \c This \c ValueProfRecord.
  */
-INSTR_PROF_INLINE
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
 ValueProfRecord *getValueProfRecordNext(ValueProfRecord *This) {
   uint32_t NumValueData = getValueProfRecordNumValueData(This);
   return (ValueProfRecord *)((char *)This +
@@ -471,7 +479,7 @@
 /*!
  * \brief Return the first \c ValueProfRecord instance.
  */
-INSTR_PROF_INLINE
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
 ValueProfRecord *getFirstValueProfRecord(ValueProfData *This) {
   return (ValueProfRecord *)((char *)This + sizeof(ValueProfData));
 }
@@ -482,13 +490,11 @@
  * Return the total size in bytes of the on-disk value profile data
  * given the data stored in Record.
  */
-uint32_t getValueProfDataSize(ValueProfRecordClosure *Closure) {
+INSTR_PROF_VISIBILITY uint32_t
+getValueProfDataSize(ValueProfRecordClosure *Closure) {
   uint32_t Kind;
   uint32_t TotalSize = sizeof(ValueProfData);
   const void *Record = Closure->Record;
-  uint32_t NumValueKinds = Closure->GetNumValueKinds(Record);
-  if (NumValueKinds == 0)
-    return TotalSize;
 
   for (Kind = IPVK_First; Kind <= IPVK_Last; Kind++) {
     uint32_t NumValueSites = Closure->GetNumValueSites(Record, Kind);
@@ -504,9 +510,10 @@
  * Extract value profile data of a function for the profile kind \c ValueKind
  * from the \c Closure and serialize the data into \c This record instance.
  */
-void serializeValueProfRecordFrom(ValueProfRecord *This,
-                                  ValueProfRecordClosure *Closure,
-                                  uint32_t ValueKind, uint32_t NumValueSites) {
+INSTR_PROF_VISIBILITY void
+serializeValueProfRecordFrom(ValueProfRecord *This,
+                             ValueProfRecordClosure *Closure,
+                             uint32_t ValueKind, uint32_t NumValueSites) {
   uint32_t S;
   const void *Record = Closure->Record;
   This->Kind = ValueKind;
@@ -524,12 +531,16 @@
 /*!
  * Extract value profile data of a function  from the \c Closure
  * and serialize the data into \c DstData if it is not NULL or heap
- * memory allocated by the \c Closure's allocator method.
+ * memory allocated by the \c Closure's allocator method. If \c
+ * DstData is not null, the caller is expected to set the TotalSize
+ * in DstData.
  */
-ValueProfData *serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
-                                          ValueProfData *DstData) {
+INSTR_PROF_VISIBILITY ValueProfData *
+serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
+                           ValueProfData *DstData) {
   uint32_t Kind;
-  uint32_t TotalSize = getValueProfDataSize(Closure);
+  uint32_t TotalSize =
+      DstData ? DstData->TotalSize : getValueProfDataSize(Closure);
 
   ValueProfData *VPD =
       DstData ? DstData : Closure->AllocValueProfData(TotalSize);
@@ -547,144 +558,15 @@
   return VPD;
 }
 
-/*
- * The value profiler runtime library stores the value profile data
- * for a given function in \c NumValueSites and \c Nodes structures.
- * \c ValueProfRuntimeRecord class is used to encapsulate the runtime
- * profile data and provides fast interfaces to retrieve the profile
- * information. This interface is used to initialize the runtime record
- * and pre-compute the information needed for efficient implementation
- * of callbacks required by ValueProfRecordClosure class.
- */
-int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord,
-                                     const uint16_t *NumValueSites,
-                                     ValueProfNode **Nodes) {
-  unsigned I, J, S = 0, NumValueKinds = 0;
-  RuntimeRecord->NumValueSites = NumValueSites;
-  RuntimeRecord->Nodes = Nodes;
-  for (I = 0; I <= IPVK_Last; I++) {
-    uint16_t N = NumValueSites[I];
-    if (!N) {
-      RuntimeRecord->SiteCountArray[I] = 0;
-      continue;
-    }
-    NumValueKinds++;
-    RuntimeRecord->SiteCountArray[I] = (uint8_t *)calloc(N, 1);
-    if (!RuntimeRecord->SiteCountArray[I])
-      return 1;
-    RuntimeRecord->NodesKind[I] = Nodes ? &Nodes[S] : NULL;
-    for (J = 0; J < N; J++) {
-      /* Compute value count for each site. */
-      uint32_t C = 0;
-      ValueProfNode *Site = Nodes ? RuntimeRecord->NodesKind[I][J] : NULL;
-      while (Site) {
-        C++;
-        Site = Site->Next;
-      }
-      if (C > UCHAR_MAX)
-        C = UCHAR_MAX;
-      RuntimeRecord->SiteCountArray[I][J] = C;
-    }
-    S += N;
-  }
-  RuntimeRecord->NumValueKinds = NumValueKinds;
-  return 0;
-}
-
-void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord) {
-  unsigned I;
-  for (I = 0; I <= IPVK_Last; I++) {
-    if (RuntimeRecord->SiteCountArray[I])
-      free(RuntimeRecord->SiteCountArray[I]);
-  }
-}
-
-/* ValueProfRecordClosure Interface implementation for
- * ValueProfDataRuntimeRecord.  */
-uint32_t getNumValueKindsRT(const void *R) {
-  return ((const ValueProfRuntimeRecord *)R)->NumValueKinds;
-}
-
-uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
-  return ((const ValueProfRuntimeRecord *)R)->NumValueSites[VK];
-}
-
-uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK, uint32_t S) {
-  const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
-  return Record->SiteCountArray[VK][S];
-}
-
-uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
-  unsigned I, S = 0;
-  const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
-  if (Record->SiteCountArray[VK] == 0)
-    return 0;
-  for (I = 0; I < Record->NumValueSites[VK]; I++)
-    S += Record->SiteCountArray[VK][I];
-  return S;
-}
-
-void getValueForSiteRT(const void *R, InstrProfValueData *Dst, uint32_t VK,
-                       uint32_t S) {
-  unsigned I, N = 0;
-  const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
-  N = getNumValueDataForSiteRT(R, VK, S);
-  if (N == 0)
-    return;
-  ValueProfNode *VNode = Record->NodesKind[VK][S];
-  for (I = 0; I < N; I++) {
-    Dst[I] = VNode->VData;
-    VNode = VNode->Next;
-  }
-}
-
-ValueProfData *allocValueProfDataRT(size_t TotalSizeInBytes) {
-  return (ValueProfData *)calloc(TotalSizeInBytes, 1);
-}
-
-static ValueProfRecordClosure RTRecordClosure = {0,
-                                                 getNumValueKindsRT,
-                                                 getNumValueSitesRT,
-                                                 getNumValueDataRT,
-                                                 getNumValueDataForSiteRT,
-                                                 0,
-                                                 getValueForSiteRT,
-                                                 allocValueProfDataRT};
-
-/*
- * Return the size of ValueProfData structure to store data
- * recorded in the runtime record.
- */
-uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record) {
-  RTRecordClosure.Record = Record;
-  return getValueProfDataSize(&RTRecordClosure);
-}
-
-/*
- * Return a ValueProfData instance that stores the data collected
- * from runtime. If \c DstData is provided by the caller, the value
- * profile data will be store in *DstData and DstData is returned,
- * otherwise the method will allocate space for the value data and
- * return pointer to the newly allocated space.
- */
-ValueProfData *
-serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
-                             ValueProfData *DstData) {
-  RTRecordClosure.Record = Record;
-  return serializeValueProfDataFrom(&RTRecordClosure, DstData);
-}
-
-
 #undef INSTR_PROF_COMMON_API_IMPL
 #endif /* INSTR_PROF_COMMON_API_IMPL */
 
 /*============================================================================*/
 
-
 #ifndef INSTR_PROF_DATA_DEFINED
 
-#ifndef INSTR_PROF_DATA_INC_
-#define INSTR_PROF_DATA_INC_
+#ifndef INSTR_PROF_DATA_INC
+#define INSTR_PROF_DATA_INC
 
 /* Helper macros.  */
 #define INSTR_PROF_SIMPLE_QUOTE(x) #x
@@ -707,7 +589,7 @@
         (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
 
 /* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 3
+#define INSTR_PROF_RAW_VERSION 4
 /* Indexed profile format version (start from 1). */
 #define INSTR_PROF_INDEX_VERSION 4
 /* Coverage mapping format vresion (start from 0). */
@@ -717,14 +599,27 @@
  * version for other variants of profile. We set the lowest bit of the upper 8
  * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton
  * generated profile, and 0 if this is a Clang FE generated profile.
-*/
+ */
 #define VARIANT_MASKS_ALL 0xff00000000000000ULL
 #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
+#define VARIANT_MASK_IR_PROF (0x1ULL << 56)
+#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
+#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
+
+/* The variable that holds the name of the profile data
+ * specified via command line. */
+#define INSTR_PROF_PROFILE_NAME_VAR __llvm_profile_filename
 
 /* Runtime section names and name strings.  */
 #define INSTR_PROF_DATA_SECT_NAME __llvm_prf_data
 #define INSTR_PROF_NAME_SECT_NAME __llvm_prf_names
 #define INSTR_PROF_CNTS_SECT_NAME __llvm_prf_cnts
+/* Array of pointers. Each pointer points to a list
+ * of value nodes associated with one value site.
+ */
+#define INSTR_PROF_VALS_SECT_NAME __llvm_prf_vals
+/* Value profile nodes section. */
+#define INSTR_PROF_VNODES_SECT_NAME __llvm_prf_vnds
 #define INSTR_PROF_COVMAP_SECT_NAME __llvm_covmap
 
 #define INSTR_PROF_DATA_SECT_NAME_STR                                          \
@@ -735,6 +630,10 @@
   INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME)
 #define INSTR_PROF_COVMAP_SECT_NAME_STR                                        \
   INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_SECT_NAME)
+#define INSTR_PROF_VALS_SECT_NAME_STR                                          \
+  INSTR_PROF_QUOTE(INSTR_PROF_VALS_SECT_NAME)
+#define INSTR_PROF_VNODES_SECT_NAME_STR                                        \
+  INSTR_PROF_QUOTE(INSTR_PROF_VNODES_SECT_NAME)
 
 /* Macros to define start/stop section symbol for a given
  * section on Linux. For instance
@@ -764,16 +663,7 @@
   uint64_t Count;
 } InstrProfValueData;
 
-/* This is an internal data structure used by value profiler. It
- * is defined here to allow serialization code sharing by LLVM
- * to be used in unit test.
- */
-typedef struct ValueProfNode {
-  InstrProfValueData VData;
-  struct ValueProfNode *Next;
-} ValueProfNode;
-
-#endif /* INSTR_PROF_DATA_INC_ */
+#endif /* INSTR_PROF_DATA_INC */
 
 #else
 #undef INSTR_PROF_DATA_DEFINED
diff --git a/lib/profile/InstrProfiling.c b/lib/profile/InstrProfiling.c
index 24820ec..1dcd05b 100644
--- a/lib/profile/InstrProfiling.c
+++ b/lib/profile/InstrProfiling.c
@@ -16,15 +16,27 @@
 #define INSTR_PROF_VALUE_PROF_DATA
 #include "InstrProfData.inc"
 
-char *(*GetEnvHook)(const char *) = 0;
+COMPILER_RT_VISIBILITY char *(*GetEnvHook)(const char *) = 0;
 
-COMPILER_RT_WEAK uint64_t __llvm_profile_raw_version = INSTR_PROF_RAW_VERSION;
+COMPILER_RT_WEAK uint64_t INSTR_PROF_RAW_VERSION_VAR = INSTR_PROF_RAW_VERSION;
+
+COMPILER_RT_WEAK char INSTR_PROF_PROFILE_NAME_VAR[1] = {0};
 
 COMPILER_RT_VISIBILITY uint64_t __llvm_profile_get_magic(void) {
   return sizeof(void *) == sizeof(uint64_t) ? (INSTR_PROF_RAW_MAGIC_64)
                                             : (INSTR_PROF_RAW_MAGIC_32);
 }
 
+static unsigned ProfileDumped = 0;
+
+COMPILER_RT_VISIBILITY unsigned lprofProfileDumped() {
+  return ProfileDumped;
+}
+
+COMPILER_RT_VISIBILITY void lprofSetProfileDumped() {
+  ProfileDumped = 1;
+}
+
 /* Return the number of bytes needed to add to SizeInBytes to make it
  *   the result a multiple of 8.
  */
@@ -61,9 +73,10 @@
       ValueProfNode *CurrentVNode = ValueCounters[i];
 
       while (CurrentVNode) {
-        CurrentVNode->VData.Count = 0;
+        CurrentVNode->Count = 0;
         CurrentVNode = CurrentVNode->Next;
       }
     }
   }
+  ProfileDumped = 0;
 }
diff --git a/lib/profile/InstrProfiling.h b/lib/profile/InstrProfiling.h
index fdb8a70..945f1c4 100644
--- a/lib/profile/InstrProfiling.h
+++ b/lib/profile/InstrProfiling.h
@@ -11,6 +11,8 @@
 #define PROFILE_INSTRPROFILING_H_
 
 #include "InstrProfilingPort.h"
+
+#define INSTR_PROF_VISIBILITY COMPILER_RT_VISIBILITY
 #include "InstrProfData.inc"
 
 enum ValueKind {
@@ -30,6 +32,12 @@
 #include "InstrProfData.inc"
 } __llvm_profile_header;
 
+typedef struct ValueProfNode * PtrToNodeT;
+typedef struct ValueProfNode {
+#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Initializer) Type Name;
+#include "InstrProfData.inc"
+} ValueProfNode;
+
 /*!
  * \brief Get number of bytes necessary to pad the argument to eight
  * byte boundary.
@@ -55,6 +63,8 @@
 const char *__llvm_profile_end_names(void);
 uint64_t *__llvm_profile_begin_counters(void);
 uint64_t *__llvm_profile_end_counters(void);
+ValueProfNode *__llvm_profile_begin_vnodes();
+ValueProfNode *__llvm_profile_end_vnodes();
 
 /*!
  * \brief Clear profile counters to zero.
@@ -63,6 +73,27 @@
 void __llvm_profile_reset_counters(void);
 
 /*!
+ * \brief Merge profile data from buffer.
+ *
+ * Read profile data form buffer \p Profile  and merge with
+ * in-process profile counters. The client is expected to
+ * have checked or already knows the profile data in the
+ * buffer matches the in-process counter structure before
+ * calling it.
+ */
+void __llvm_profile_merge_from_buffer(const char *Profile, uint64_t Size);
+
+/*! \brief Check if profile in buffer matches the current binary.
+ *
+ *  Returns 0 (success) if the profile data in buffer \p Profile with size
+ *  \p Size was generated by the same binary and therefore matches
+ *  structurally the in-process counters. If the profile data in buffer is
+ *  not compatible, the interface returns 1 (failure).
+ */
+int __llvm_profile_check_compatibility(const char *Profile,
+                                       uint64_t Size);
+
+/*!
  * \brief Counts the number of times a target value is seen.
  *
  * Records the target value for the CounterIndex if not seen before. Otherwise,
@@ -73,15 +104,7 @@
 void INSTR_PROF_VALUE_PROF_FUNC(
 #define VALUE_PROF_FUNC_PARAM(ArgType, ArgName, ArgLLVMType) ArgType ArgName
 #include "InstrProfData.inc"
-);
-
-/*!
- * \brief Prepares the value profiling data for output.
- *
- * Returns an array of pointers to value profile data.
- */
-struct ValueProfData;
-struct ValueProfData **__llvm_profile_gather_value_data(uint64_t *Size);
+    );
 
 /*!
  * \brief Write instrumentation data to the current file.
@@ -89,13 +112,34 @@
  * Writes to the file with the last name given to \a *
  * __llvm_profile_set_filename(),
  * or if it hasn't been called, the \c LLVM_PROFILE_FILE environment variable,
- * or if that's not set, the last name given to
- * \a __llvm_profile_override_default_filename(), or if that's not set,
- * \c "default.profraw".
+ * or if that's not set, the last name set to INSTR_PROF_PROFILE_NAME_VAR,
+ * or if that's not set,  \c "default.profraw".
  */
 int __llvm_profile_write_file(void);
 
 /*!
+ * \brief this is a wrapper interface to \c __llvm_profile_write_file.
+ * After this interface is invoked, a arleady dumped flag will be set
+ * so that profile won't be dumped again during program exit. 
+ * Invocation of interface __llvm_profile_reset_counters will clear
+ * the flag. This interface is designed to be used to collect profile
+ * data from user selected hot regions. The use model is
+ *      __llvm_profile_reset_counters();
+ *      ... hot region 1
+ *      __llvm_profile_dump();
+ *      .. some other code
+ *      __llvm_profile_reset_counters();
+ *       ... hot region 2
+ *      __llvm_profile_dump();
+ *
+ *  It is expected that on-line profile merging is on with \c %m specifier
+ *  used in profile filename . If merging is  not turned on, user is expected
+ *  to invoke __llvm_profile_set_filename  to specify different profile names
+ *  for different regions before dumping to avoid profile write clobbering.
+ */
+int __llvm_profile_dump(void);
+
+/*!
  * \brief Set the filename for writing instrumentation data.
  *
  * Sets the filename to be used for subsequent calls to
@@ -106,25 +150,22 @@
  */
 void __llvm_profile_set_filename(const char *Name);
 
-/*!
- * \brief Set the filename for writing instrumentation data, unless the
- * \c LLVM_PROFILE_FILE environment variable was set.
- *
- * Unless overridden, sets the filename to be used for subsequent calls to
- * \a __llvm_profile_write_file().
- *
- * \c Name is not copied, so it must remain valid.  Passing NULL resets the
- * filename logic to the default behaviour (unless the \c LLVM_PROFILE_FILE
- * was set in which case it has no effect).
- */
-void __llvm_profile_override_default_filename(const char *Name);
-
 /*! \brief Register to write instrumentation data to file at exit. */
 int __llvm_profile_register_write_file_atexit(void);
 
 /*! \brief Initialize file handling. */
 void __llvm_profile_initialize_file(void);
 
+/*!
+ * \brief Return path prefix (excluding the base filename) of the profile data.
+ * This is useful for users using \c -fprofile-generate=./path_prefix who do
+ * not care about the default raw profile name. It is also useful to collect
+ * more than more profile data files dumped in the same directory (Online
+ * merge mode is turned on for instrumented programs with shared libs).
+ * Side-effect: this API call will invoke malloc with dynamic memory allocation.
+ */
+const char *__llvm_profile_get_path_prefix();
+
 /*! \brief Get the magic token for the file format. */
 uint64_t __llvm_profile_get_magic(void);
 
@@ -135,4 +176,34 @@
 uint64_t __llvm_profile_get_data_size(const __llvm_profile_data *Begin,
                                       const __llvm_profile_data *End);
 
+/*!
+ * This variable is defined in InstrProfilingRuntime.cc as a hidden
+ * symbol. Its main purpose is to enable profile runtime user to
+ * bypass runtime initialization code -- if the client code explicitly
+ * define this variable, then InstProfileRuntime.o won't be linked in.
+ * Note that this variable's visibility needs to be hidden so that the
+ * definition of this variable in an instrumented shared library won't
+ * affect runtime initialization decision of the main program.
+ *  __llvm_profile_profile_runtime. */
+COMPILER_RT_VISIBILITY extern int INSTR_PROF_PROFILE_RUNTIME_VAR;
+
+/*!
+ * This variable is defined in InstrProfiling.c. Its main purpose is to
+ * encode the raw profile version value and other format related information
+ * such as whether the profile is from IR based instrumentation. The variable
+ * is defined as weak so that compiler can emit an overriding definition
+ * depending on user option.  Since we don't support mixing FE and IR based
+ * data in the same raw profile data file (in other words, shared libs and
+ * main program are expected to be instrumented in the same way), there is
+ * no need for this variable to be hidden.
+ */
+extern uint64_t INSTR_PROF_RAW_VERSION_VAR; /* __llvm_profile_raw_version */
+
+/*!
+ * This variable is a weak symbol defined in InstrProfiling.c. It allows
+ * compiler instrumentation to provide overriding definition with value
+ * from compiler command line. This variable has default visibility.
+ */
+extern char INSTR_PROF_PROFILE_NAME_VAR[1]; /* __llvm_profile_filename. */
+
 #endif /* PROFILE_INSTRPROFILING_H_ */
diff --git a/lib/profile/InstrProfilingBuffer.c b/lib/profile/InstrProfilingBuffer.c
index 1d95534..ac259e8 100644
--- a/lib/profile/InstrProfilingBuffer.c
+++ b/lib/profile/InstrProfilingBuffer.c
@@ -46,14 +46,14 @@
 }
 
 COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer(char *Buffer) {
-  return llvmWriteProfData(llvmBufferWriter, Buffer, 0, 0);
+  return lprofWriteData(lprofBufferWriter, Buffer, 0);
 }
 
 COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
     char *Buffer, const __llvm_profile_data *DataBegin,
     const __llvm_profile_data *DataEnd, const uint64_t *CountersBegin,
     const uint64_t *CountersEnd, const char *NamesBegin, const char *NamesEnd) {
-  return llvmWriteProfDataImpl(llvmBufferWriter, Buffer, DataBegin, DataEnd,
-                               CountersBegin, CountersEnd, 0, 0, NamesBegin,
-                               NamesEnd);
+  return lprofWriteDataImpl(lprofBufferWriter, Buffer, DataBegin, DataEnd,
+                            CountersBegin, CountersEnd, 0, NamesBegin,
+                            NamesEnd);
 }
diff --git a/lib/profile/InstrProfilingFile.c b/lib/profile/InstrProfilingFile.c
index ad4124b..fab479d 100644
--- a/lib/profile/InstrProfilingFile.c
+++ b/lib/profile/InstrProfilingFile.c
@@ -14,12 +14,80 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-
-#define UNCONST(ptr) ((void *)(uintptr_t)(ptr))
-
 #ifdef _MSC_VER
-#define snprintf _snprintf
+/* For _alloca. */
+#include <malloc.h>
 #endif
+#if defined(_WIN32)
+#include "WindowsMMap.h"
+/* For _chsize_s */
+#include <io.h>
+#else
+#include <sys/file.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#if defined(__linux__)
+#include <sys/types.h>
+#endif
+#endif
+
+/* From where is profile name specified.
+ * The order the enumerators define their
+ * precedence. Re-order them may lead to
+ * runtime behavior change. */ 
+typedef enum ProfileNameSpecifier {
+  PNS_unknown = 0,
+  PNS_default,
+  PNS_command_line,
+  PNS_environment,
+  PNS_runtime_api
+} ProfileNameSpecifier;
+
+static const char *getPNSStr(ProfileNameSpecifier PNS) {
+  switch (PNS) {
+  case PNS_default:
+    return "default setting";
+  case PNS_command_line:
+    return "command line";
+  case PNS_environment:
+    return "environment variable";
+  case PNS_runtime_api:
+    return "runtime API";
+  default:
+    return "Unknown";
+  }
+}
+
+#define MAX_PID_SIZE 16
+/* Data structure holding the result of parsed filename pattern. */
+typedef struct lprofFilename {
+  /* File name string possibly with %p or %h specifiers. */
+  const char *FilenamePat;
+  /* A flag indicating if FilenamePat's memory is allocated
+   * by runtime. */
+  unsigned OwnsFilenamePat;
+  const char *ProfilePathPrefix;
+  char PidChars[MAX_PID_SIZE];
+  char Hostname[COMPILER_RT_MAX_HOSTLEN];
+  unsigned NumPids;
+  unsigned NumHosts;
+  /* When in-process merging is enabled, this parameter specifies
+   * the total number of profile data files shared by all the processes
+   * spawned from the same binary. By default the value is 1. If merging
+   * is not enabled, its value should be 0. This parameter is specified
+   * by the %[0-9]m specifier. For instance %2m enables merging using
+   * 2 profile data files. %1m is equivalent to %m. Also %m specifier
+   * can only appear once at the end of the name pattern. */
+  unsigned MergePoolSize;
+  ProfileNameSpecifier PNS;
+} lprofFilename;
+
+lprofFilename lprofCurFilename = {0, 0, 0, {0}, {0}, 0, 0, 0, PNS_unknown};
+
+int getpid(void);
+static int getCurFilenameLength();
+static const char *getCurFilename(char *FilenameBuf);
+static unsigned doMerging() { return lprofCurFilename.MergePoolSize; }
 
 /* Return 1 if there is an error, otherwise return  0.  */
 static uint32_t fileWriter(ProfDataIOVec *IOVecs, uint32_t NumIOVecs,
@@ -35,61 +103,147 @@
 }
 
 COMPILER_RT_VISIBILITY ProfBufferIO *
-llvmCreateBufferIOInternal(void *File, uint32_t BufferSz) {
-  CallocHook = calloc;
-  FreeHook = free;
-  return llvmCreateBufferIO(fileWriter, File, BufferSz);
-}
-
-static int writeFile(FILE *File) {
-  const char *BufferSzStr = 0;
-  uint64_t ValueDataSize = 0;
-  struct ValueProfData **ValueDataArray =
-      __llvm_profile_gather_value_data(&ValueDataSize);
+lprofCreateBufferIOInternal(void *File, uint32_t BufferSz) {
   FreeHook = &free;
-  CallocHook = &calloc;
-  BufferSzStr = getenv("LLVM_VP_BUFFER_SIZE");
-  if (BufferSzStr && BufferSzStr[0])
-    VPBufferSize = atoi(BufferSzStr);
-  return llvmWriteProfData(fileWriter, File, ValueDataArray, ValueDataSize);
+  DynamicBufferIOBuffer = (uint8_t *)calloc(BufferSz, 1);
+  VPBufferSize = BufferSz;
+  return lprofCreateBufferIO(fileWriter, File);
 }
 
-static int writeFileWithName(const char *OutputName) {
+static void setupIOBuffer() {
+  const char *BufferSzStr = 0;
+  BufferSzStr = getenv("LLVM_VP_BUFFER_SIZE");
+  if (BufferSzStr && BufferSzStr[0]) {
+    VPBufferSize = atoi(BufferSzStr);
+    DynamicBufferIOBuffer = (uint8_t *)calloc(VPBufferSize, 1);
+  }
+}
+
+/* Read profile data in \c ProfileFile and merge with in-memory
+   profile counters. Returns -1 if there is fatal error, otheriwse
+   0 is returned.
+*/
+static int doProfileMerging(FILE *ProfileFile) {
+  uint64_t ProfileFileSize;
+  char *ProfileBuffer;
+
+  if (fseek(ProfileFile, 0L, SEEK_END) == -1) {
+    PROF_ERR("Unable to merge profile data, unable to get size: %s\n",
+             strerror(errno));
+    return -1;
+  }
+  ProfileFileSize = ftell(ProfileFile);
+
+  /* Restore file offset.  */
+  if (fseek(ProfileFile, 0L, SEEK_SET) == -1) {
+    PROF_ERR("Unable to merge profile data, unable to rewind: %s\n",
+             strerror(errno));
+    return -1;
+  }
+
+  /* Nothing to merge.  */
+  if (ProfileFileSize < sizeof(__llvm_profile_header)) {
+    if (ProfileFileSize)
+      PROF_WARN("Unable to merge profile data: %s\n",
+                "source profile file is too small.");
+    return 0;
+  }
+
+  ProfileBuffer = mmap(NULL, ProfileFileSize, PROT_READ, MAP_SHARED | MAP_FILE,
+                       fileno(ProfileFile), 0);
+  if (ProfileBuffer == MAP_FAILED) {
+    PROF_ERR("Unable to merge profile data, mmap failed: %s\n",
+             strerror(errno));
+    return -1;
+  }
+
+  if (__llvm_profile_check_compatibility(ProfileBuffer, ProfileFileSize)) {
+    (void)munmap(ProfileBuffer, ProfileFileSize);
+    PROF_WARN("Unable to merge profile data: %s\n",
+              "source profile file is not compatible.");
+    return 0;
+  }
+
+  /* Now start merging */
+  __llvm_profile_merge_from_buffer(ProfileBuffer, ProfileFileSize);
+  (void)munmap(ProfileBuffer, ProfileFileSize);
+
+  return 0;
+}
+
+/* Open the profile data for merging. It opens the file in r+b mode with
+ * file locking.  If the file has content which is compatible with the
+ * current process, it also reads in the profile data in the file and merge
+ * it with in-memory counters. After the profile data is merged in memory,
+ * the original profile data is truncated and gets ready for the profile
+ * dumper. With profile merging enabled, each executable as well as any of
+ * its instrumented shared libraries dump profile data into their own data file.
+*/
+static FILE *openFileForMerging(const char *ProfileFileName) {
+  FILE *ProfileFile;
+  int rc;
+
+  ProfileFile = lprofOpenFileEx(ProfileFileName);
+  if (!ProfileFile)
+    return NULL;
+
+  rc = doProfileMerging(ProfileFile);
+  if (rc || COMPILER_RT_FTRUNCATE(ProfileFile, 0L) ||
+      fseek(ProfileFile, 0L, SEEK_SET) == -1) {
+    PROF_ERR("Profile Merging of file %s failed: %s\n", ProfileFileName,
+             strerror(errno));
+    fclose(ProfileFile);
+    return NULL;
+  }
+  fseek(ProfileFile, 0L, SEEK_SET);
+  return ProfileFile;
+}
+
+/* Write profile data to file \c OutputName.  */
+static int writeFile(const char *OutputName) {
   int RetVal;
   FILE *OutputFile;
-  if (!OutputName || !OutputName[0])
-    return -1;
 
-  /* Append to the file to support profiling multiple shared objects. */
-  OutputFile = fopen(OutputName, "ab");
+  if (!doMerging())
+    OutputFile = fopen(OutputName, "ab");
+  else
+    OutputFile = openFileForMerging(OutputName);
+
   if (!OutputFile)
     return -1;
 
-  RetVal = writeFile(OutputFile);
+  FreeHook = &free;
+  setupIOBuffer();
+  RetVal = lprofWriteData(fileWriter, OutputFile, lprofGetVPDataReader());
 
   fclose(OutputFile);
   return RetVal;
 }
 
-COMPILER_RT_WEAK int __llvm_profile_OwnsFilename = 0;
-COMPILER_RT_WEAK const char *__llvm_profile_CurrentFilename = NULL;
-
 static void truncateCurrentFile(void) {
   const char *Filename;
+  char *FilenameBuf;
   FILE *File;
+  int Length;
 
-  Filename = __llvm_profile_CurrentFilename;
-  if (!Filename || !Filename[0])
+  Length = getCurFilenameLength();
+  FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
+  Filename = getCurFilename(FilenameBuf);
+  if (!Filename)
     return;
 
   /* Create the directory holding the file, if needed. */
-  if (strchr(Filename, '/') || strchr(Filename, '\\')) {
-    char *Copy = malloc(strlen(Filename) + 1);
-    strcpy(Copy, Filename);
+  if (lprofFindFirstDirSeparator(Filename)) {
+    char *Copy = (char *)COMPILER_RT_ALLOCA(Length + 1);
+    strncpy(Copy, Filename, Length + 1);
     __llvm_profile_recursive_mkdir(Copy);
-    free(Copy);
   }
 
+  /* By pass file truncation to allow online raw profile
+   * merging. */
+  if (lprofCurFilename.MergePoolSize)
+    return;
+
   /* Truncate the file.  Later we'll reopen and append. */
   File = fopen(Filename, "w");
   if (!File)
@@ -97,137 +251,318 @@
   fclose(File);
 }
 
-static void setFilename(const char *Filename, int OwnsFilename) {
-  /* Check if this is a new filename and therefore needs truncation. */
-  int NewFile = !__llvm_profile_CurrentFilename ||
-      (Filename && strcmp(Filename, __llvm_profile_CurrentFilename));
-  if (__llvm_profile_OwnsFilename)
-    free(UNCONST(__llvm_profile_CurrentFilename));
-
-  __llvm_profile_CurrentFilename = Filename;
-  __llvm_profile_OwnsFilename = OwnsFilename;
-
-  /* If not a new file, append to support profiling multiple shared objects. */
-  if (NewFile)
-    truncateCurrentFile();
+static const char *DefaultProfileName = "default.profraw";
+static void resetFilenameToDefault(void) {
+  if (lprofCurFilename.FilenamePat && lprofCurFilename.OwnsFilenamePat) {
+    free((void *)lprofCurFilename.FilenamePat);
+  }
+  memset(&lprofCurFilename, 0, sizeof(lprofCurFilename));
+  lprofCurFilename.FilenamePat = DefaultProfileName;
+  lprofCurFilename.PNS = PNS_default;
 }
 
-static void resetFilenameToDefault(void) { setFilename("default.profraw", 0); }
+static int containsMergeSpecifier(const char *FilenamePat, int I) {
+  return (FilenamePat[I] == 'm' ||
+          (FilenamePat[I] >= '1' && FilenamePat[I] <= '9' &&
+           /* If FilenamePat[I] is not '\0', the next byte is guaranteed
+            * to be in-bound as the string is null terminated. */
+           FilenamePat[I + 1] == 'm'));
+}
 
-int getpid(void);
-static int setFilenamePossiblyWithPid(const char *Filename) {
-#define MAX_PID_SIZE 16
-  char PidChars[MAX_PID_SIZE] = {0};
-  int NumPids = 0, PidLength = 0;
-  char *Allocated;
-  int I, J;
+/* Parses the pattern string \p FilenamePat and stores the result to
+ * lprofcurFilename structure. */
+static int parseFilenamePattern(const char *FilenamePat,
+                                unsigned CopyFilenamePat) {
+  int NumPids = 0, NumHosts = 0, I;
+  char *PidChars = &lprofCurFilename.PidChars[0];
+  char *Hostname = &lprofCurFilename.Hostname[0];
+  int MergingEnabled = 0;
 
-  /* Reset filename on NULL, except with env var which is checked by caller. */
-  if (!Filename) {
-    resetFilenameToDefault();
-    return 0;
+  /* Clean up cached prefix.  */
+  if (lprofCurFilename.ProfilePathPrefix)
+    free((void *)lprofCurFilename.ProfilePathPrefix);
+  memset(&lprofCurFilename, 0, sizeof(lprofCurFilename));
+
+  if (lprofCurFilename.FilenamePat && lprofCurFilename.OwnsFilenamePat) {
+    free((void *)lprofCurFilename.FilenamePat);
   }
 
+  if (!CopyFilenamePat)
+    lprofCurFilename.FilenamePat = FilenamePat;
+  else {
+    lprofCurFilename.FilenamePat = strdup(FilenamePat);
+    lprofCurFilename.OwnsFilenamePat = 1;
+  }
   /* Check the filename for "%p", which indicates a pid-substitution. */
-  for (I = 0; Filename[I]; ++I)
-    if (Filename[I] == '%' && Filename[++I] == 'p')
-      if (!NumPids++) {
-        PidLength = snprintf(PidChars, MAX_PID_SIZE, "%d", getpid());
-        if (PidLength <= 0)
+  for (I = 0; FilenamePat[I]; ++I)
+    if (FilenamePat[I] == '%') {
+      if (FilenamePat[++I] == 'p') {
+        if (!NumPids++) {
+          if (snprintf(PidChars, MAX_PID_SIZE, "%d", getpid()) <= 0) {
+            PROF_WARN(
+                "Unable to parse filename pattern %s. Using the default name.",
+                FilenamePat);
+            return -1;
+          }
+        }
+      } else if (FilenamePat[I] == 'h') {
+        if (!NumHosts++)
+          if (COMPILER_RT_GETHOSTNAME(Hostname, COMPILER_RT_MAX_HOSTLEN)) {
+            PROF_WARN(
+                "Unable to parse filename pattern %s. Using the default name.",
+                FilenamePat);
+            return -1;
+          }
+      } else if (containsMergeSpecifier(FilenamePat, I)) {
+        if (MergingEnabled) {
+          PROF_WARN("%%m specifier can only be specified once in %s.\n",
+                    FilenamePat);
           return -1;
+        }
+        MergingEnabled = 1;
+        if (FilenamePat[I] == 'm')
+          lprofCurFilename.MergePoolSize = 1;
+        else {
+          lprofCurFilename.MergePoolSize = FilenamePat[I] - '0';
+          I++; /* advance to 'm' */
+        }
       }
-  if (!NumPids) {
-    setFilename(Filename, 0);
-    return 0;
-  }
+    }
 
-  /* Allocate enough space for the substituted filename. */
-  Allocated = malloc(I + NumPids*(PidLength - 2) + 1);
-  if (!Allocated)
-    return -1;
-
-  /* Construct the new filename. */
-  for (I = 0, J = 0; Filename[I]; ++I)
-    if (Filename[I] == '%') {
-      if (Filename[++I] == 'p') {
-        memcpy(Allocated + J, PidChars, PidLength);
-        J += PidLength;
-      }
-      /* Drop any unknown substitutions. */
-    } else
-      Allocated[J++] = Filename[I];
-  Allocated[J] = 0;
-
-  /* Use the computed name. */
-  setFilename(Allocated, 1);
+  lprofCurFilename.NumPids = NumPids;
+  lprofCurFilename.NumHosts = NumHosts;
   return 0;
 }
 
-static int setFilenameFromEnvironment(void) {
-  const char *Filename = getenv("LLVM_PROFILE_FILE");
+static void parseAndSetFilename(const char *FilenamePat,
+                                ProfileNameSpecifier PNS,
+                                unsigned CopyFilenamePat) {
 
-  if (!Filename || !Filename[0])
-    return -1;
+  const char *OldFilenamePat = lprofCurFilename.FilenamePat;
+  ProfileNameSpecifier OldPNS = lprofCurFilename.PNS;
 
-  return setFilenamePossiblyWithPid(Filename);
-}
-
-static void setFilenameAutomatically(void) {
-  if (!setFilenameFromEnvironment())
+  if (PNS < OldPNS)
     return;
 
-  resetFilenameToDefault();
+  if (!FilenamePat)
+    FilenamePat = DefaultProfileName;
+
+  if (OldFilenamePat && !strcmp(OldFilenamePat, FilenamePat)) {
+    lprofCurFilename.PNS = PNS;
+    return;
+  }
+
+  /* When PNS >= OldPNS, the last one wins. */
+  if (!FilenamePat || parseFilenamePattern(FilenamePat, CopyFilenamePat))
+    resetFilenameToDefault();
+  lprofCurFilename.PNS = PNS;
+
+  if (!OldFilenamePat) {
+    PROF_NOTE("Set profile file path to \"%s\" via %s.\n",
+              lprofCurFilename.FilenamePat, getPNSStr(PNS));
+  } else {
+    PROF_NOTE("Override old profile path \"%s\" via %s to \"%s\" via %s.\n",
+              OldFilenamePat, getPNSStr(OldPNS), lprofCurFilename.FilenamePat,
+              getPNSStr(PNS));
+  }
+
+  truncateCurrentFile();
 }
 
+/* Return buffer length that is required to store the current profile
+ * filename with PID and hostname substitutions. */
+/* The length to hold uint64_t followed by 2 digit pool id including '_' */
+#define SIGLEN 24
+static int getCurFilenameLength() {
+  int Len;
+  if (!lprofCurFilename.FilenamePat || !lprofCurFilename.FilenamePat[0])
+    return 0;
+
+  if (!(lprofCurFilename.NumPids || lprofCurFilename.NumHosts ||
+        lprofCurFilename.MergePoolSize))
+    return strlen(lprofCurFilename.FilenamePat);
+
+  Len = strlen(lprofCurFilename.FilenamePat) +
+        lprofCurFilename.NumPids * (strlen(lprofCurFilename.PidChars) - 2) +
+        lprofCurFilename.NumHosts * (strlen(lprofCurFilename.Hostname) - 2);
+  if (lprofCurFilename.MergePoolSize)
+    Len += SIGLEN;
+  return Len;
+}
+
+/* Return the pointer to the current profile file name (after substituting
+ * PIDs and Hostnames in filename pattern. \p FilenameBuf is the buffer
+ * to store the resulting filename. If no substitution is needed, the
+ * current filename pattern string is directly returned. */
+static const char *getCurFilename(char *FilenameBuf) {
+  int I, J, PidLength, HostNameLength;
+  const char *FilenamePat = lprofCurFilename.FilenamePat;
+
+  if (!lprofCurFilename.FilenamePat || !lprofCurFilename.FilenamePat[0])
+    return 0;
+
+  if (!(lprofCurFilename.NumPids || lprofCurFilename.NumHosts ||
+        lprofCurFilename.MergePoolSize))
+    return lprofCurFilename.FilenamePat;
+
+  PidLength = strlen(lprofCurFilename.PidChars);
+  HostNameLength = strlen(lprofCurFilename.Hostname);
+  /* Construct the new filename. */
+  for (I = 0, J = 0; FilenamePat[I]; ++I)
+    if (FilenamePat[I] == '%') {
+      if (FilenamePat[++I] == 'p') {
+        memcpy(FilenameBuf + J, lprofCurFilename.PidChars, PidLength);
+        J += PidLength;
+      } else if (FilenamePat[I] == 'h') {
+        memcpy(FilenameBuf + J, lprofCurFilename.Hostname, HostNameLength);
+        J += HostNameLength;
+      } else if (containsMergeSpecifier(FilenamePat, I)) {
+        char LoadModuleSignature[SIGLEN];
+        int S;
+        int ProfilePoolId = getpid() % lprofCurFilename.MergePoolSize;
+        S = snprintf(LoadModuleSignature, SIGLEN, "%" PRIu64 "_%d",
+                     lprofGetLoadModuleSignature(), ProfilePoolId);
+        if (S == -1 || S > SIGLEN)
+          S = SIGLEN;
+        memcpy(FilenameBuf + J, LoadModuleSignature, S);
+        J += S;
+        if (FilenamePat[I] != 'm')
+          I++;
+      }
+      /* Drop any unknown substitutions. */
+    } else
+      FilenameBuf[J++] = FilenamePat[I];
+  FilenameBuf[J] = 0;
+
+  return FilenameBuf;
+}
+
+/* Returns the pointer to the environment variable
+ * string. Returns null if the env var is not set. */
+static const char *getFilenamePatFromEnv(void) {
+  const char *Filename = getenv("LLVM_PROFILE_FILE");
+  if (!Filename || !Filename[0])
+    return 0;
+  return Filename;
+}
+
+COMPILER_RT_VISIBILITY
+const char *__llvm_profile_get_path_prefix(void) {
+  int Length;
+  char *FilenameBuf, *Prefix;
+  const char *Filename, *PrefixEnd;
+
+  if (lprofCurFilename.ProfilePathPrefix)
+    return lprofCurFilename.ProfilePathPrefix;
+
+  Length = getCurFilenameLength();
+  FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
+  Filename = getCurFilename(FilenameBuf);
+  if (!Filename)
+    return "\0";
+
+  PrefixEnd = lprofFindLastDirSeparator(Filename);
+  if (!PrefixEnd)
+    return "\0";
+
+  Length = PrefixEnd - Filename + 1;
+  Prefix = (char *)malloc(Length + 1);
+  if (!Prefix) {
+    PROF_ERR("Failed to %s\n", "allocate memory.");
+    return "\0";
+  }
+  memcpy(Prefix, Filename, Length);
+  Prefix[Length] = '\0';
+  lprofCurFilename.ProfilePathPrefix = Prefix;
+  return Prefix;
+}
+
+/* This method is invoked by the runtime initialization hook
+ * InstrProfilingRuntime.o if it is linked in. Both user specified
+ * profile path via -fprofile-instr-generate= and LLVM_PROFILE_FILE
+ * environment variable can override this default value. */
 COMPILER_RT_VISIBILITY
 void __llvm_profile_initialize_file(void) {
-  /* Check if the filename has been initialized. */
-  if (__llvm_profile_CurrentFilename)
-    return;
+  const char *EnvFilenamePat;
+  const char *SelectedPat = NULL;
+  ProfileNameSpecifier PNS = PNS_unknown;
+  int hasCommandLineOverrider = (INSTR_PROF_PROFILE_NAME_VAR[0] != 0);
 
-  /* Detect the filename and truncate. */
-  setFilenameAutomatically();
+  EnvFilenamePat = getFilenamePatFromEnv();
+  if (EnvFilenamePat) {
+    SelectedPat = EnvFilenamePat;
+    PNS = PNS_environment;
+  } else if (hasCommandLineOverrider) {
+    SelectedPat = INSTR_PROF_PROFILE_NAME_VAR;
+    PNS = PNS_command_line;
+  } else {
+    SelectedPat = NULL;
+    PNS = PNS_default;
+  }
+
+  parseAndSetFilename(SelectedPat, PNS, 0);
 }
 
+/* This API is directly called by the user application code. It has the
+ * highest precedence compared with LLVM_PROFILE_FILE environment variable
+ * and command line option -fprofile-instr-generate=<profile_name>.
+ */
 COMPILER_RT_VISIBILITY
-void __llvm_profile_set_filename(const char *Filename) {
-  setFilenamePossiblyWithPid(Filename);
+void __llvm_profile_set_filename(const char *FilenamePat) {
+  parseAndSetFilename(FilenamePat, PNS_runtime_api, 1);
 }
 
-COMPILER_RT_VISIBILITY
-void __llvm_profile_override_default_filename(const char *Filename) {
-  /* If the env var is set, skip setting filename from argument. */
-  const char *Env_Filename = getenv("LLVM_PROFILE_FILE");
-  if (Env_Filename && Env_Filename[0])
-    return;
-  setFilenamePossiblyWithPid(Filename);
-}
-
+/* The public API for writing profile data into the file with name
+ * set by previous calls to __llvm_profile_set_filename or
+ * __llvm_profile_override_default_filename or
+ * __llvm_profile_initialize_file. */
 COMPILER_RT_VISIBILITY
 int __llvm_profile_write_file(void) {
-  int rc;
+  int rc, Length;
+  const char *Filename;
+  char *FilenameBuf;
 
-  GetEnvHook = &getenv;
+  if (lprofProfileDumped()) {
+    PROF_NOTE("Profile data not written to file: %s.\n", 
+              "already written");
+    return 0;
+  }
+
+  Length = getCurFilenameLength();
+  FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
+  Filename = getCurFilename(FilenameBuf);
+
   /* Check the filename. */
-  if (!__llvm_profile_CurrentFilename) {
-    PROF_ERR("LLVM Profile: Failed to write file : %s\n", "Filename not set");
+  if (!Filename) {
+    PROF_ERR("Failed to write file : %s\n", "Filename not set");
     return -1;
   }
 
   /* Check if there is llvm/runtime version mismatch.  */
   if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) {
-    PROF_ERR("LLVM Profile: runtime and instrumentation version mismatch : "
+    PROF_ERR("Runtime and instrumentation version mismatch : "
              "expected %d, but get %d\n",
              INSTR_PROF_RAW_VERSION,
              (int)GET_VERSION(__llvm_profile_get_version()));
     return -1;
   }
 
-  /* Write the file. */
-  rc = writeFileWithName(__llvm_profile_CurrentFilename);
+  /* Write profile data to the file. */
+  rc = writeFile(Filename);
   if (rc)
-    PROF_ERR("LLVM Profile: Failed to write file \"%s\": %s\n",
-            __llvm_profile_CurrentFilename, strerror(errno));
+    PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno));
+  return rc;
+}
+
+COMPILER_RT_VISIBILITY
+int __llvm_profile_dump(void) {
+  if (!doMerging())
+    PROF_WARN("Later invocation of __llvm_profile_dump can lead to clobbering "
+              " of previously dumped profile data : %s. Either use \%m "
+              "in profile name or change profile name before dumping.\n",
+              "online profile merging is not on");
+  int rc = __llvm_profile_write_file();
+  lprofSetProfileDumped();
   return rc;
 }
 
@@ -240,6 +575,8 @@
   if (HasBeenRegistered)
     return 0;
 
+  lprofSetupValueProfiler();
+
   HasBeenRegistered = 1;
   return atexit(writeFileWithoutReturn);
 }
diff --git a/lib/profile/InstrProfilingInternal.h b/lib/profile/InstrProfilingInternal.h
index 4aab78e..21590d5 100644
--- a/lib/profile/InstrProfilingInternal.h
+++ b/lib/profile/InstrProfilingInternal.h
@@ -68,51 +68,116 @@
 } ProfBufferIO;
 
 /* The creator interface used by testing.  */
-ProfBufferIO *llvmCreateBufferIOInternal(void *File, uint32_t DefaultBufferSz);
+ProfBufferIO *lprofCreateBufferIOInternal(void *File, uint32_t BufferSz);
+
 /*!
  * This is the interface to create a handle for buffered IO.
  */
-ProfBufferIO *llvmCreateBufferIO(WriterCallback FileWriter, void *File,
-                                 uint32_t DefaultBufferSz);
+ProfBufferIO *lprofCreateBufferIO(WriterCallback FileWriter, void *File);
+
 /*!
  * The interface to destroy the bufferIO handle and reclaim
  * the memory.
  */
-void llvmDeleteBufferIO(ProfBufferIO *BufferIO);
+void lprofDeleteBufferIO(ProfBufferIO *BufferIO);
 
 /*!
  * This is the interface to write \c Data of \c Size bytes through
  * \c BufferIO. Returns 0 if successful, otherwise return -1.
  */
-int llvmBufferIOWrite(ProfBufferIO *BufferIO, const uint8_t *Data,
-                      uint32_t Size);
+int lprofBufferIOWrite(ProfBufferIO *BufferIO, const uint8_t *Data,
+                       uint32_t Size);
 /*!
  * The interface to flush the remaining data in the buffer.
  * through the low level writer callback.
  */
-int llvmBufferIOFlush(ProfBufferIO *BufferIO);
+int lprofBufferIOFlush(ProfBufferIO *BufferIO);
 
 /* The low level interface to write data into a buffer. It is used as the
  * callback by other high level writer methods such as buffered IO writer
  * and profile data writer.  */
-uint32_t llvmBufferWriter(ProfDataIOVec *IOVecs, uint32_t NumIOVecs,
-                          void **WriterCtx);
+uint32_t lprofBufferWriter(ProfDataIOVec *IOVecs, uint32_t NumIOVecs,
+                           void **WriterCtx);
 
-int llvmWriteProfData(WriterCallback Writer, void *WriterCtx,
-                      struct ValueProfData **ValueDataArray,
-                      const uint64_t ValueDataSize);
-int llvmWriteProfDataImpl(WriterCallback Writer, void *WriterCtx,
-                          const __llvm_profile_data *DataBegin,
-                          const __llvm_profile_data *DataEnd,
-                          const uint64_t *CountersBegin,
-                          const uint64_t *CountersEnd,
-                          struct ValueProfData **ValueDataBeginArray,
-                          const uint64_t ValueDataSize, const char *NamesBegin,
-                          const char *NamesEnd);
+struct ValueProfData;
+struct ValueProfRecord;
+struct InstrProfValueData;
+struct ValueProfNode;
 
-extern char *(*GetEnvHook)(const char *);
-extern void (*FreeHook)(void *);
-extern void* (*CallocHook)(size_t, size_t);
-extern uint32_t VPBufferSize;
+/*!
+ * The class that defines a set of methods to read value profile
+ * data for streaming/serialization from the instrumentation runtime.
+ */
+typedef struct VPDataReaderType {
+  uint32_t (*InitRTRecord)(const __llvm_profile_data *Data,
+                           uint8_t *SiteCountArray[]);
+  /* Function pointer to getValueProfRecordHeader method. */
+  uint32_t (*GetValueProfRecordHeaderSize)(uint32_t NumSites);
+  /* Function pointer to getFristValueProfRecord method. */  
+  struct ValueProfRecord *(*GetFirstValueProfRecord)(struct ValueProfData *);
+  /* Return the number of value data for site \p Site.  */
+  uint32_t (*GetNumValueDataForSite)(uint32_t VK, uint32_t Site);
+  /* Return the total size of the value profile data of the 
+   * current function.  */
+  uint32_t (*GetValueProfDataSize)(void);
+  /*! 
+   * Read the next \p N value data for site \p Site and store the data
+   * in \p Dst. \p StartNode is the first value node to start with if
+   * it is not null. The function returns the pointer to the value
+   * node pointer to be used as the \p StartNode of the next batch reading.
+   * If there is nothing left, it returns NULL.
+   */
+  struct ValueProfNode *(*GetValueData)(uint32_t ValueKind, uint32_t Site,
+                                        struct InstrProfValueData *Dst,
+                                        struct ValueProfNode *StartNode,
+                                        uint32_t N);
+} VPDataReaderType;
+
+int lprofWriteData(WriterCallback Writer, void *WriterCtx,
+                   VPDataReaderType *VPDataReader);
+int lprofWriteDataImpl(WriterCallback Writer, void *WriterCtx,
+                       const __llvm_profile_data *DataBegin,
+                       const __llvm_profile_data *DataEnd,
+                       const uint64_t *CountersBegin,
+                       const uint64_t *CountersEnd,
+                       VPDataReaderType *VPDataReader, const char *NamesBegin,
+                       const char *NamesEnd);
+
+/* Merge value profile data pointed to by SrcValueProfData into
+ * in-memory profile counters pointed by to DstData.  */
+void lprofMergeValueProfData(struct ValueProfData *SrcValueProfData,
+                             __llvm_profile_data *DstData);
+
+VPDataReaderType *lprofGetVPDataReader();
+
+/* Internal interface used by test to reset the max number of 
+ * tracked values per value site to be \p MaxVals.
+ */
+void lprofSetMaxValsPerSite(uint32_t MaxVals);
+void lprofSetupValueProfiler();
+
+/* Return the profile header 'signature' value associated with the current
+ * executable or shared library. The signature value can be used to for
+ * a profile name that is unique to this load module so that it does not
+ * collide with profiles from other binaries. It also allows shared libraries
+ * to dump merged profile data into its own profile file. */
+uint64_t lprofGetLoadModuleSignature();
+
+/* 
+ * Return non zero value if the profile data has already been
+ * dumped to the file.
+ */
+unsigned lprofProfileDumped();
+void lprofSetProfileDumped();
+
+COMPILER_RT_VISIBILITY extern char *(*GetEnvHook)(const char *);
+COMPILER_RT_VISIBILITY extern void (*FreeHook)(void *);
+COMPILER_RT_VISIBILITY extern uint8_t *DynamicBufferIOBuffer;
+COMPILER_RT_VISIBILITY extern uint32_t VPBufferSize;
+COMPILER_RT_VISIBILITY extern uint32_t VPMaxNumValsPerSite;
+/* Pointer to the start of static value counters to be allocted. */
+COMPILER_RT_VISIBILITY extern ValueProfNode *CurrentVNode;
+COMPILER_RT_VISIBILITY extern ValueProfNode *EndVNode;
+extern void (*VPMergeHook)(struct ValueProfData *, __llvm_profile_data *);
 
 #endif
diff --git a/lib/profile/InstrProfilingMerge.c b/lib/profile/InstrProfilingMerge.c
new file mode 100644
index 0000000..a202115
--- /dev/null
+++ b/lib/profile/InstrProfilingMerge.c
@@ -0,0 +1,132 @@
+/*===- InstrProfilingMerge.c - Profile in-process Merging  ---------------===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|*
+|*===----------------------------------------------------------------------===*
+|* This file defines the API needed for in-process merging of profile data
+|* stored in memory buffer.
+\*===---------------------------------------------------------------------===*/
+
+#include "InstrProfiling.h"
+#include "InstrProfilingInternal.h"
+#include "InstrProfilingUtil.h"
+
+#define INSTR_PROF_VALUE_PROF_DATA
+#include "InstrProfData.inc"
+
+COMPILER_RT_WEAK void (*VPMergeHook)(ValueProfData *,
+                                     __llvm_profile_data *) = NULL;
+COMPILER_RT_VISIBILITY
+uint64_t lprofGetLoadModuleSignature() {
+  /* A very fast way to compute a module signature.  */
+  uint64_t CounterSize = (uint64_t)(__llvm_profile_end_counters() -
+                                    __llvm_profile_begin_counters());
+  uint64_t DataSize = __llvm_profile_get_data_size(__llvm_profile_begin_data(),
+                                                   __llvm_profile_end_data());
+  uint64_t NamesSize =
+      (uint64_t)(__llvm_profile_end_names() - __llvm_profile_begin_names());
+  uint64_t NumVnodes =
+      (uint64_t)(__llvm_profile_end_vnodes() - __llvm_profile_begin_vnodes());
+  const __llvm_profile_data *FirstD = __llvm_profile_begin_data();
+
+  return (NamesSize << 40) + (CounterSize << 30) + (DataSize << 20) +
+         (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0);
+}
+
+/* Returns 1 if profile is not structurally compatible.  */
+COMPILER_RT_VISIBILITY
+int __llvm_profile_check_compatibility(const char *ProfileData,
+                                       uint64_t ProfileSize) {
+  /* Check profile header only for now  */
+  __llvm_profile_header *Header = (__llvm_profile_header *)ProfileData;
+  __llvm_profile_data *SrcDataStart, *SrcDataEnd, *SrcData, *DstData;
+  SrcDataStart =
+      (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header));
+  SrcDataEnd = SrcDataStart + Header->DataSize;
+
+  if (ProfileSize < sizeof(__llvm_profile_header))
+    return 1;
+
+  /* Check the header first.  */
+  if (Header->Magic != __llvm_profile_get_magic() ||
+      Header->Version != __llvm_profile_get_version() ||
+      Header->DataSize !=
+          __llvm_profile_get_data_size(__llvm_profile_begin_data(),
+                                       __llvm_profile_end_data()) ||
+      Header->CountersSize != (uint64_t)(__llvm_profile_end_counters() -
+                                         __llvm_profile_begin_counters()) ||
+      Header->NamesSize != (uint64_t)(__llvm_profile_end_names() -
+                                      __llvm_profile_begin_names()) ||
+      Header->ValueKindLast != IPVK_Last)
+    return 1;
+
+  if (ProfileSize < sizeof(__llvm_profile_header) +
+                        Header->DataSize * sizeof(__llvm_profile_data) +
+                        Header->NamesSize + Header->CountersSize)
+    return 1;
+
+  for (SrcData = SrcDataStart,
+       DstData = (__llvm_profile_data *)__llvm_profile_begin_data();
+       SrcData < SrcDataEnd; ++SrcData, ++DstData) {
+    if (SrcData->NameRef != DstData->NameRef ||
+        SrcData->FuncHash != DstData->FuncHash ||
+        SrcData->NumCounters != DstData->NumCounters)
+      return 1;
+  }
+
+  /* Matched! */
+  return 0;
+}
+
+COMPILER_RT_VISIBILITY
+void __llvm_profile_merge_from_buffer(const char *ProfileData,
+                                      uint64_t ProfileSize) {
+  __llvm_profile_data *SrcDataStart, *SrcDataEnd, *SrcData, *DstData;
+  __llvm_profile_header *Header = (__llvm_profile_header *)ProfileData;
+  uint64_t *SrcCountersStart;
+  const char *SrcNameStart;
+  ValueProfData *SrcValueProfDataStart, *SrcValueProfData;
+
+  SrcDataStart =
+      (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header));
+  SrcDataEnd = SrcDataStart + Header->DataSize;
+  SrcCountersStart = (uint64_t *)SrcDataEnd;
+  SrcNameStart = (const char *)(SrcCountersStart + Header->CountersSize);
+  SrcValueProfDataStart =
+      (ValueProfData *)(SrcNameStart + Header->NamesSize +
+                        __llvm_profile_get_num_padding_bytes(
+                            Header->NamesSize));
+
+  for (SrcData = SrcDataStart,
+      DstData = (__llvm_profile_data *)__llvm_profile_begin_data(),
+      SrcValueProfData = SrcValueProfDataStart;
+       SrcData < SrcDataEnd; ++SrcData, ++DstData) {
+    uint64_t *SrcCounters;
+    uint64_t *DstCounters = (uint64_t *)DstData->CounterPtr;
+    unsigned I, NC, NVK = 0;
+
+    NC = SrcData->NumCounters;
+    SrcCounters = SrcCountersStart +
+                  ((size_t)SrcData->CounterPtr - Header->CountersDelta) /
+                      sizeof(uint64_t);
+    for (I = 0; I < NC; I++)
+      DstCounters[I] += SrcCounters[I];
+
+    /* Now merge value profile data.  */
+    if (!VPMergeHook)
+      continue;
+
+    for (I = 0; I <= IPVK_Last; I++)
+      NVK += (SrcData->NumValueSites[I] != 0);
+
+    if (!NVK)
+      continue;
+
+    VPMergeHook(SrcValueProfData, DstData);
+    SrcValueProfData = (ValueProfData *)((char *)SrcValueProfData +
+                                         SrcValueProfData->TotalSize);
+  }
+}
diff --git a/lib/profile/InstrProfilingMergeFile.c b/lib/profile/InstrProfilingMergeFile.c
new file mode 100644
index 0000000..ac5ee9f
--- /dev/null
+++ b/lib/profile/InstrProfilingMergeFile.c
@@ -0,0 +1,41 @@
+/*===- InstrProfilingMergeFile.c - Profile in-process Merging  ------------===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|*
+|*===----------------------------------------------------------------------===
+|* This file defines APIs needed to support in-process merging for profile data
+|* stored in files.
+\*===----------------------------------------------------------------------===*/
+
+#include "InstrProfiling.h"
+#include "InstrProfilingInternal.h"
+#include "InstrProfilingUtil.h"
+
+#define INSTR_PROF_VALUE_PROF_DATA
+#include "InstrProfData.inc"
+
+void (*VPMergeHook)(ValueProfData *,
+                    __llvm_profile_data *) = &lprofMergeValueProfData;
+
+/* Merge value profile data pointed to by SrcValueProfData into
+ * in-memory profile counters pointed by to DstData.  */
+void lprofMergeValueProfData(ValueProfData *SrcValueProfData,
+                             __llvm_profile_data *DstData) {
+  unsigned I, S, V, C;
+  InstrProfValueData *VData;
+  ValueProfRecord *VR = getFirstValueProfRecord(SrcValueProfData);
+  for (I = 0; I < SrcValueProfData->NumValueKinds; I++) {
+    VData = getValueProfRecordValueData(VR);
+    for (S = 0; S < VR->NumValueSites; S++) {
+      uint8_t NV = VR->SiteCountArray[S];
+      for (V = 0; V < NV; V++) {
+        for (C = 0; C < VData[V].Count; C++)
+          __llvm_profile_instrument_target(VData[V].Value, DstData, S);
+      }
+    }
+    VR = getValueProfRecordNext(VR);
+  }
+}
diff --git a/lib/profile/InstrProfilingPlatformDarwin.c b/lib/profile/InstrProfilingPlatformDarwin.c
index 30ddbd2..8931aba 100644
--- a/lib/profile/InstrProfilingPlatformDarwin.c
+++ b/lib/profile/InstrProfilingPlatformDarwin.c
@@ -30,6 +30,13 @@
     CountersEnd __asm("section$end$__DATA$" INSTR_PROF_CNTS_SECT_NAME_STR);
 
 COMPILER_RT_VISIBILITY
+extern ValueProfNode
+    VNodesStart __asm("section$start$__DATA$" INSTR_PROF_VNODES_SECT_NAME_STR);
+COMPILER_RT_VISIBILITY
+extern ValueProfNode
+    VNodesEnd __asm("section$end$__DATA$" INSTR_PROF_VNODES_SECT_NAME_STR);
+
+COMPILER_RT_VISIBILITY
 const __llvm_profile_data *__llvm_profile_begin_data(void) {
   return &DataStart;
 }
@@ -43,4 +50,14 @@
 uint64_t *__llvm_profile_begin_counters(void) { return &CountersStart; }
 COMPILER_RT_VISIBILITY
 uint64_t *__llvm_profile_end_counters(void) { return &CountersEnd; }
+
+COMPILER_RT_VISIBILITY
+ValueProfNode *__llvm_profile_begin_vnodes(void) {
+  return &VNodesStart;
+}
+COMPILER_RT_VISIBILITY
+ValueProfNode *__llvm_profile_end_vnodes(void) { return &VNodesEnd; }
+
+COMPILER_RT_VISIBILITY ValueProfNode *CurrentVNode = &VNodesStart;
+COMPILER_RT_VISIBILITY ValueProfNode *EndVNode = &VNodesEnd;
 #endif
diff --git a/lib/profile/InstrProfilingPlatformLinux.c b/lib/profile/InstrProfilingPlatformLinux.c
index 7843f47..b6c780f 100644
--- a/lib/profile/InstrProfilingPlatformLinux.c
+++ b/lib/profile/InstrProfilingPlatformLinux.c
@@ -18,6 +18,8 @@
 #define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_SECT_NAME)
 #define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_SECT_NAME)
 #define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_SECT_NAME)
+#define PROF_VNODES_START INSTR_PROF_SECT_START(INSTR_PROF_VNODES_SECT_NAME)
+#define PROF_VNODES_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNODES_SECT_NAME)
 
 /* Declare section start and stop symbols for various sections
  * generated by compiler instrumentation.
@@ -28,6 +30,8 @@
 extern uint64_t PROF_CNTS_STOP COMPILER_RT_VISIBILITY;
 extern char PROF_NAME_START COMPILER_RT_VISIBILITY;
 extern char PROF_NAME_STOP COMPILER_RT_VISIBILITY;
+extern ValueProfNode PROF_VNODES_START COMPILER_RT_VISIBILITY;
+extern ValueProfNode PROF_VNODES_STOP COMPILER_RT_VISIBILITY;
 
 /* Add dummy data to ensure the section is always created. */
 __llvm_profile_data
@@ -35,6 +39,7 @@
 uint64_t
     __prof_cnts_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_CNTS_SECT_NAME_STR);
 char __prof_nms_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_NAME_SECT_NAME_STR);
+ValueProfNode __prof_vnodes_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_VNODES_SECT_NAME_STR);
 
 COMPILER_RT_VISIBILITY const __llvm_profile_data *
 __llvm_profile_begin_data(void) {
@@ -56,4 +61,15 @@
 COMPILER_RT_VISIBILITY uint64_t *__llvm_profile_end_counters(void) {
   return &PROF_CNTS_STOP;
 }
+
+COMPILER_RT_VISIBILITY ValueProfNode *
+__llvm_profile_begin_vnodes(void) {
+  return &PROF_VNODES_START;
+}
+COMPILER_RT_VISIBILITY ValueProfNode *__llvm_profile_end_vnodes(void) {
+  return &PROF_VNODES_STOP;
+}
+COMPILER_RT_VISIBILITY ValueProfNode *CurrentVNode = &PROF_VNODES_START;
+COMPILER_RT_VISIBILITY ValueProfNode *EndVNode = &PROF_VNODES_STOP;
+
 #endif
diff --git a/lib/profile/InstrProfilingPlatformOther.c b/lib/profile/InstrProfilingPlatformOther.c
index 24ef5bb..b259664 100644
--- a/lib/profile/InstrProfilingPlatformOther.c
+++ b/lib/profile/InstrProfilingPlatformOther.c
@@ -80,4 +80,15 @@
 uint64_t *__llvm_profile_begin_counters(void) { return CountersFirst; }
 COMPILER_RT_VISIBILITY
 uint64_t *__llvm_profile_end_counters(void) { return CountersLast; }
+
+COMPILER_RT_VISIBILITY
+ValueProfNode *__llvm_profile_begin_vnodes(void) {
+  return 0;
+}
+COMPILER_RT_VISIBILITY
+ValueProfNode *__llvm_profile_end_vnodes(void) { return 0; }
+
+COMPILER_RT_VISIBILITY ValueProfNode *CurrentVNode = 0;
+COMPILER_RT_VISIBILITY ValueProfNode *EndVNode = 0;
+
 #endif
diff --git a/lib/profile/InstrProfilingPort.h b/lib/profile/InstrProfilingPort.h
index e07f598..c947153 100644
--- a/lib/profile/InstrProfilingPort.h
+++ b/lib/profile/InstrProfilingPort.h
@@ -13,39 +13,99 @@
 #ifdef _MSC_VER
 #define COMPILER_RT_ALIGNAS(x) __declspec(align(x))
 #define COMPILER_RT_VISIBILITY
+/* FIXME: selectany does not have the same semantics as weak. */
 #define COMPILER_RT_WEAK __declspec(selectany)
+/* Need to include <windows.h> */
+#define COMPILER_RT_ALLOCA _alloca
+/* Need to include <stdio.h> and <io.h> */
+#define COMPILER_RT_FTRUNCATE(f,l) _chsize(_fileno(f),l)
 #elif __GNUC__
 #define COMPILER_RT_ALIGNAS(x) __attribute__((aligned(x)))
 #define COMPILER_RT_VISIBILITY __attribute__((visibility("hidden")))
 #define COMPILER_RT_WEAK __attribute__((weak))
+#define COMPILER_RT_ALLOCA __builtin_alloca
+#define COMPILER_RT_FTRUNCATE(f,l) ftruncate(fileno(f),l)
 #endif
 
+#if defined(__APPLE__)
+#define COMPILER_RT_SEG "__DATA,"
+#else
+#define COMPILER_RT_SEG ""
+#endif
+
+#ifdef _MSC_VER
+#define COMPILER_RT_SECTION(Sect) __declspec(allocate(Sect))
+#else
 #define COMPILER_RT_SECTION(Sect) __attribute__((section(Sect)))
+#endif
+
+#define COMPILER_RT_MAX_HOSTLEN 128
+#ifdef _MSC_VER
+#define COMPILER_RT_GETHOSTNAME(Name, Len) gethostname(Name, Len)
+#elif defined(__ORBIS__)
+#define COMPILER_RT_GETHOSTNAME(Name, Len) ((void)(Name), (void)(Len), (-1))
+#else
+#define COMPILER_RT_GETHOSTNAME(Name, Len) lprofGetHostName(Name, Len)
+#define COMPILER_RT_HAS_UNAME 1
+#endif
 
 #if COMPILER_RT_HAS_ATOMICS == 1
 #ifdef _MSC_VER
 #include <windows.h>
+#if _MSC_VER < 1900
+#define snprintf _snprintf
+#endif
 #if defined(_WIN64)
 #define COMPILER_RT_BOOL_CMPXCHG(Ptr, OldV, NewV)                              \
   (InterlockedCompareExchange64((LONGLONG volatile *)Ptr, (LONGLONG)NewV,      \
                                 (LONGLONG)OldV) == (LONGLONG)OldV)
+#define COMPILER_RT_PTR_FETCH_ADD(DomType, PtrVar, PtrIncr)                    \
+  (DomType *)InterlockedExchangeAdd64((LONGLONG volatile *)&PtrVar,            \
+                                      (LONGLONG)sizeof(DomType) * PtrIncr)
 #else /* !defined(_WIN64) */
 #define COMPILER_RT_BOOL_CMPXCHG(Ptr, OldV, NewV)                              \
   (InterlockedCompareExchange((LONG volatile *)Ptr, (LONG)NewV, (LONG)OldV) == \
    (LONG)OldV)
+#define COMPILER_RT_PTR_FETCH_ADD(DomType, PtrVar, PtrIncr)                    \
+  (DomType *)InterlockedExchangeAdd((LONG volatile *)&PtrVar,                  \
+                                    (LONG)sizeof(DomType) * PtrIncr)
 #endif
 #else /* !defined(_MSC_VER) */
 #define COMPILER_RT_BOOL_CMPXCHG(Ptr, OldV, NewV)                              \
   __sync_bool_compare_and_swap(Ptr, OldV, NewV)
+#define COMPILER_RT_PTR_FETCH_ADD(DomType, PtrVar, PtrIncr)                    \
+  (DomType *)__sync_fetch_and_add((long *)&PtrVar, sizeof(DomType) * PtrIncr)
 #endif
 #else /* COMPILER_RT_HAS_ATOMICS != 1 */
+#include "InstrProfilingUtil.h"
 #define COMPILER_RT_BOOL_CMPXCHG(Ptr, OldV, NewV)                              \
-  BoolCmpXchg((void **)Ptr, OldV, NewV)
+  lprofBoolCmpXchg((void **)Ptr, OldV, NewV)
+#define COMPILER_RT_PTR_FETCH_ADD(DomType, PtrVar, PtrIncr)                    \
+  (DomType *)lprofPtrFetchAdd((void **)&PtrVar, sizeof(DomType) * PtrIncr)
 #endif
 
+#if defined(_WIN32)
+#define DIR_SEPARATOR '\\'
+#define DIR_SEPARATOR_2 '/'
+#else
+#define DIR_SEPARATOR '/'
+#endif
+
+#ifndef DIR_SEPARATOR_2
+#define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR)
+#else /* DIR_SEPARATOR_2 */
+#define IS_DIR_SEPARATOR(ch)                                                   \
+  (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2))
+#endif /* DIR_SEPARATOR_2 */
+
 #define PROF_ERR(Format, ...)                                                  \
-  if (GetEnvHook && GetEnvHook("LLVM_PROFILE_VERBOSE_ERRORS"))                 \
-    fprintf(stderr, Format, __VA_ARGS__);
+  fprintf(stderr, "LLVM Profile Error: " Format, __VA_ARGS__);
+
+#define PROF_WARN(Format, ...)                                                 \
+  fprintf(stderr, "LLVM Profile Warning: " Format, __VA_ARGS__);
+
+#define PROF_NOTE(Format, ...)                                                 \
+  fprintf(stderr, "LLVM Profile Note: " Format, __VA_ARGS__);
 
 #if defined(__FreeBSD__)
 
diff --git a/lib/profile/InstrProfilingRuntime.cc b/lib/profile/InstrProfilingRuntime.cc
index 12ad9f1..eb83074 100644
--- a/lib/profile/InstrProfilingRuntime.cc
+++ b/lib/profile/InstrProfilingRuntime.cc
@@ -11,7 +11,8 @@
 
 #include "InstrProfiling.h"
 
-COMPILER_RT_VISIBILITY int __llvm_profile_runtime;
+/* int __llvm_profile_runtime  */
+COMPILER_RT_VISIBILITY int INSTR_PROF_PROFILE_RUNTIME_VAR;
 }
 
 namespace {
diff --git a/lib/profile/InstrProfilingUtil.c b/lib/profile/InstrProfilingUtil.c
index fb14005..ead537d 100644
--- a/lib/profile/InstrProfilingUtil.c
+++ b/lib/profile/InstrProfilingUtil.c
@@ -12,27 +12,198 @@
 
 #ifdef _WIN32
 #include <direct.h>
-#elif I386_FREEBSD
-int mkdir(const char*, unsigned short);
+#include <io.h>
+#include <windows.h>
 #else
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
 #endif
 
+#ifdef COMPILER_RT_HAS_UNAME
+#include <sys/utsname.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
 COMPILER_RT_VISIBILITY
 void __llvm_profile_recursive_mkdir(char *path) {
   int i;
 
   for (i = 1; path[i] != '\0'; ++i) {
     char save = path[i];
-    if (!(path[i] == '/' || path[i] == '\\'))
+    if (!IS_DIR_SEPARATOR(path[i]))
       continue;
     path[i] = '\0';
 #ifdef _WIN32
     _mkdir(path);
 #else
-    mkdir(path, 0755);  /* Some of these will fail, ignore it. */
+    mkdir(path, 0755); /* Some of these will fail, ignore it. */
 #endif
     path[i] = save;
   }
 }
+
+#if COMPILER_RT_HAS_ATOMICS != 1
+COMPILER_RT_VISIBILITY
+uint32_t lprofBoolCmpXchg(void **Ptr, void *OldV, void *NewV) {
+  void *R = *Ptr;
+  if (R == OldV) {
+    *Ptr = NewV;
+    return 1;
+  }
+  return 0;
+}
+COMPILER_RT_VISIBILITY
+void *lprofPtrFetchAdd(void **Mem, long ByteIncr) {
+  void *Old = *Mem;
+  *((char **)Mem) += ByteIncr;
+  return Old;
+}
+
+#endif
+
+#ifdef COMPILER_RT_HAS_UNAME
+COMPILER_RT_VISIBILITY int lprofGetHostName(char *Name, int Len) {
+  struct utsname N;
+  int R;
+  if (!(R = uname(&N)))
+    strncpy(Name, N.nodename, Len);
+  return R;
+}
+#endif
+
+COMPILER_RT_VISIBILITY FILE *lprofOpenFileEx(const char *ProfileName) {
+  FILE *f;
+  int fd;
+#ifdef COMPILER_RT_HAS_FCNTL_LCK
+  struct flock s_flock;
+
+  s_flock.l_whence = SEEK_SET;
+  s_flock.l_start = 0;
+  s_flock.l_len = 0; /* Until EOF.  */
+  s_flock.l_pid = getpid();
+
+  s_flock.l_type = F_WRLCK;
+  fd = open(ProfileName, O_RDWR | O_CREAT, 0666);
+  if (fd < 0)
+    return NULL;
+
+  while (fcntl(fd, F_SETLKW, &s_flock) == -1) {
+    if (errno != EINTR) {
+      if (errno == ENOLCK) {
+        PROF_WARN("Data may be corrupted during profile merging : %s\n",
+                  "Fail to obtain file lock due to system limit.");
+      }
+      break;
+    }
+  }
+
+  f = fdopen(fd, "r+b");
+#elif defined(_WIN32)
+  // FIXME: Use the wide variants to handle Unicode filenames.
+  HANDLE h = CreateFileA(ProfileName, GENERIC_READ | GENERIC_WRITE, 0, 0,
+                         OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0);
+  if (h == INVALID_HANDLE_VALUE)
+    return NULL;
+
+  fd = _open_osfhandle((intptr_t)h, 0);
+  if (fd == -1) {
+    CloseHandle(h);
+    return NULL;
+  }
+
+  f = _fdopen(fd, "r+b");
+  if (f == 0) {
+    CloseHandle(h);
+    return NULL;
+  }
+#else
+  /* Worst case no locking applied.  */
+  PROF_WARN("Concurrent file access is not supported : %s\n",
+            "lack file locking");
+  fd = open(ProfileName, O_RDWR | O_CREAT, 0666);
+  if (fd < 0)
+    return NULL;
+  f = fdopen(fd, "r+b");
+#endif
+
+  return f;
+}
+
+COMPILER_RT_VISIBILITY const char *lprofGetPathPrefix(int *PrefixStrip,
+                                                      size_t *PrefixLen) {
+  const char *Prefix = getenv("GCOV_PREFIX");
+  const char *PrefixStripStr = getenv("GCOV_PREFIX_STRIP");
+
+  *PrefixLen = 0;
+  *PrefixStrip = 0;
+  if (Prefix == NULL || Prefix[0] == '\0')
+    return NULL;
+
+  if (PrefixStripStr) {
+    *PrefixStrip = atoi(PrefixStripStr);
+
+    /* Negative GCOV_PREFIX_STRIP values are ignored */
+    if (*PrefixStrip < 0)
+      *PrefixStrip = 0;
+  } else {
+    *PrefixStrip = 0;
+  }
+  *PrefixLen = strlen(Prefix);
+
+  return Prefix;
+}
+
+COMPILER_RT_VISIBILITY void
+lprofApplyPathPrefix(char *Dest, const char *PathStr, const char *Prefix,
+                     size_t PrefixLen, int PrefixStrip) {
+
+  const char *Ptr;
+  int Level;
+  const char *StrippedPathStr = PathStr;
+
+  for (Level = 0, Ptr = PathStr + 1; Level < PrefixStrip; ++Ptr) {
+    if (*Ptr == '\0')
+      break;
+
+    if (!IS_DIR_SEPARATOR(*Ptr))
+      continue;
+
+    StrippedPathStr = Ptr;
+    ++Level;
+  }
+
+  memcpy(Dest, Prefix, PrefixLen);
+
+  if (!IS_DIR_SEPARATOR(Prefix[PrefixLen - 1]))
+    Dest[PrefixLen++] = DIR_SEPARATOR;
+
+  memcpy(Dest + PrefixLen, StrippedPathStr, strlen(StrippedPathStr) + 1);
+}
+
+COMPILER_RT_VISIBILITY const char *
+lprofFindFirstDirSeparator(const char *Path) {
+  const char *Sep;
+  Sep = strchr(Path, DIR_SEPARATOR);
+  if (Sep)
+    return Sep;
+#if defined(DIR_SEPARATOR_2)
+  Sep = strchr(Path, DIR_SEPARATOR_2);
+#endif
+  return Sep;
+}
+
+COMPILER_RT_VISIBILITY const char *lprofFindLastDirSeparator(const char *Path) {
+  const char *Sep;
+  Sep = strrchr(Path, DIR_SEPARATOR);
+  if (Sep)
+    return Sep;
+#if defined(DIR_SEPARATOR_2)
+  Sep = strrchr(Path, DIR_SEPARATOR_2);
+#endif
+  return Sep;
+}
diff --git a/lib/profile/InstrProfilingUtil.h b/lib/profile/InstrProfilingUtil.h
index 756b18e..a80fde7 100644
--- a/lib/profile/InstrProfilingUtil.h
+++ b/lib/profile/InstrProfilingUtil.h
@@ -10,7 +10,45 @@
 #ifndef PROFILE_INSTRPROFILINGUTIL_H
 #define PROFILE_INSTRPROFILINGUTIL_H
 
+#include <stddef.h>
+#include <stdio.h>
+
 /*! \brief Create a directory tree. */
 void __llvm_profile_recursive_mkdir(char *Pathname);
 
-#endif  /* PROFILE_INSTRPROFILINGUTIL_H */
+/*! Open file \c Filename for read+write with write
+ * lock for exclusive access. The caller will block
+ * if the lock is already held by another process. */
+FILE *lprofOpenFileEx(const char *Filename);
+/* PS4 doesn't have getenv. Define a shim. */
+#if __ORBIS__
+static inline char *getenv(const char *name) { return NULL; }
+#endif /* #if __ORBIS__ */
+
+/* GCOV_PREFIX and GCOV_PREFIX_STRIP support */
+/* Return the path prefix specified by GCOV_PREFIX environment variable.
+ * If GCOV_PREFIX_STRIP is also specified, the strip level (integer value)
+ * is returned via \c *PrefixStrip. The prefix length is stored in *PrefixLen.
+ */
+const char *lprofGetPathPrefix(int *PrefixStrip, size_t *PrefixLen);
+/* Apply the path prefix specified in \c Prefix to path string in \c PathStr,
+ * and store the result to buffer pointed to by \c Buffer. If \c PrefixStrip
+ * is not zero, path prefixes are stripped from \c PathStr (the level of
+ * stripping is specified by \c PrefixStrip) before \c Prefix is added.
+ */
+void lprofApplyPathPrefix(char *Dest, const char *PathStr, const char *Prefix,
+                          size_t PrefixLen, int PrefixStrip);
+
+/* Returns a pointer to the first occurrence of \c DIR_SEPARATOR char in
+ * the string \c Path, or NULL if the char is not found. */
+const char *lprofFindFirstDirSeparator(const char *Path);
+/* Returns a pointer to the last occurrence of \c DIR_SEPARATOR char in
+ * the string \c Path, or NULL if the char is not found. */
+const char *lprofFindLastDirSeparator(const char *Path);
+
+int lprofGetHostName(char *Name, int Len);
+
+unsigned lprofBoolCmpXchg(void **Ptr, void *OldV, void *NewV);
+void *lprofPtrFetchAdd(void **Mem, long ByteIncr);
+
+#endif /* PROFILE_INSTRPROFILINGUTIL_H */
diff --git a/lib/profile/InstrProfilingValue.c b/lib/profile/InstrProfilingValue.c
index 7b36f9f..93957e3 100644
--- a/lib/profile/InstrProfilingValue.c
+++ b/lib/profile/InstrProfilingValue.c
@@ -9,6 +9,7 @@
 
 #include "InstrProfiling.h"
 #include "InstrProfilingInternal.h"
+#include "InstrProfilingUtil.h" /* For PS4 getenv shim. */
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -17,26 +18,40 @@
 #define INSTR_PROF_COMMON_API_IMPL
 #include "InstrProfData.inc"
 
-#define PROF_OOM(Msg) PROF_ERR(Msg ":%s\n", "Out of memory");
-#define PROF_OOM_RETURN(Msg)                                                   \
-  {                                                                            \
-    PROF_OOM(Msg)                                                              \
-    free(ValueDataArray);                                                      \
-    return NULL;                                                               \
-  }
+static int hasStaticCounters = 1;
+static int OutOfNodesWarnings = 0;
+static int hasNonDefaultValsPerSite = 0;
+#define INSTR_PROF_MAX_VP_WARNS 10
+#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 8
+#define INSTR_PROF_VNODE_POOL_SIZE 1024
 
-#if COMPILER_RT_HAS_ATOMICS != 1
-COMPILER_RT_VISIBILITY
-uint32_t BoolCmpXchg(void **Ptr, void *OldV, void *NewV) {
-  void *R = *Ptr;
-  if (R == OldV) {
-    *Ptr = NewV;
-    return 1;
-  }
-  return 0;
-}
+#ifndef _MSC_VER
+/* A shared static pool in addition to the vnodes statically
+ * allocated by the compiler.  */
+COMPILER_RT_VISIBILITY ValueProfNode
+    lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
+       COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME_STR);
 #endif
 
+COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
+    INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
+
+COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() {
+  const char *Str = 0;
+  Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
+  if (Str && Str[0]) {
+    VPMaxNumValsPerSite = atoi(Str);
+    hasNonDefaultValsPerSite = 1;
+  }
+  if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
+    VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
+}
+
+COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
+  VPMaxNumValsPerSite = MaxVals;
+  hasNonDefaultValsPerSite = 1;
+}
+
 /* This method is only used in value profiler mock testing.  */
 COMPILER_RT_VISIBILITY void
 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
@@ -65,6 +80,15 @@
 static int allocateValueProfileCounters(__llvm_profile_data *Data) {
   uint64_t NumVSites = 0;
   uint32_t VKI;
+
+  /* This function will never be called when value site array is allocated
+     statically at compile time.  */
+  hasStaticCounters = 0;
+  /* When dynamic allocation is enabled, allow tracking the max number of
+   * values allowd.  */
+  if (!hasNonDefaultValsPerSite)
+    VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
+
   for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
     NumVSites += Data->NumValueSites[VKI];
 
@@ -79,10 +103,36 @@
   return 1;
 }
 
+static ValueProfNode *allocateOneNode(__llvm_profile_data *Data, uint32_t Index,
+                                      uint64_t Value) {
+  ValueProfNode *Node;
+
+  if (!hasStaticCounters)
+    return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
+
+  /* Early check to avoid value wrapping around.  */
+  if (CurrentVNode + 1 > EndVNode) {
+    if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
+      PROF_WARN("Unable to track new values: %s. "
+                " Consider using option -mllvm -vp-counters-per-site=<n> to "
+                "allocate more"
+                " value profile counters at compile time. \n",
+                "Running out of static counters");
+    }
+    return 0;
+  }
+  Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
+  /* Due to section padding, EndVNode point to a byte which is one pass
+   * an incomplete VNode, so we need to skip the last incomplete node. */
+  if (Node + 1 > EndVNode)
+    return 0;
+
+  return Node;
+}
+
 COMPILER_RT_VISIBILITY void
 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
                                  uint32_t CounterIndex) {
-
   __llvm_profile_data *PData = (__llvm_profile_data *)Data;
   if (!PData)
     return;
@@ -94,87 +144,184 @@
 
   ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
   ValueProfNode *PrevVNode = NULL;
-  ValueProfNode *CurrentVNode = ValueCounters[CounterIndex];
+  ValueProfNode *MinCountVNode = NULL;
+  ValueProfNode *CurVNode = ValueCounters[CounterIndex];
+  uint64_t MinCount = UINT64_MAX;
 
   uint8_t VDataCount = 0;
-  while (CurrentVNode) {
-    if (TargetValue == CurrentVNode->VData.Value) {
-      CurrentVNode->VData.Count++;
+  while (CurVNode) {
+    if (TargetValue == CurVNode->Value) {
+      CurVNode->Count++;
       return;
     }
-    PrevVNode = CurrentVNode;
-    CurrentVNode = CurrentVNode->Next;
+    if (CurVNode->Count < MinCount) {
+      MinCount = CurVNode->Count;
+      MinCountVNode = CurVNode;
+    }
+    PrevVNode = CurVNode;
+    CurVNode = CurVNode->Next;
     ++VDataCount;
   }
 
-  if (VDataCount >= INSTR_PROF_MAX_NUM_VAL_PER_SITE)
+  if (VDataCount >= VPMaxNumValsPerSite) {
+    /* Bump down the min count node's count. If it reaches 0,
+     * evict it. This eviction/replacement policy makes hot
+     * targets more sticky while cold targets less so. In other
+     * words, it makes it less likely for the hot targets to be
+     * prematurally evicted during warmup/establishment period,
+     * when their counts are still low. In a special case when
+     * the number of values tracked is reduced to only one, this
+     * policy will guarantee that the dominating target with >50%
+     * total count will survive in the end. Note that this scheme
+     * allows the runtime to track the min count node in an adaptive
+     * manner. It can correct previous mistakes and eventually
+     * lock on a cold target that is alread in stable state.
+     *
+     * In very rare cases,  this replacement scheme may still lead
+     * to target loss. For instance, out of \c N value slots, \c N-1
+     * slots are occupied by luke warm targets during the warmup
+     * period and the remaining one slot is competed by two or more
+     * very hot targets. If those hot targets occur in an interleaved
+     * way, none of them will survive (gain enough weight to throw out
+     * other established entries) due to the ping-pong effect.
+     * To handle this situation, user can choose to increase the max
+     * number of tracked values per value site. Alternatively, a more
+     * expensive eviction mechanism can be implemented. It requires
+     * the runtime to track the total number of evictions per-site.
+     * When the total number of evictions reaches certain threshold,
+     * the runtime can wipe out more than one lowest count entries
+     * to give space for hot targets.
+     */
+    if (!(--MinCountVNode->Count)) {
+      CurVNode = MinCountVNode;
+      CurVNode->Value = TargetValue;
+      CurVNode->Count++;
+    }
     return;
+  }
 
-  CurrentVNode = (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
-  if (!CurrentVNode)
+  CurVNode = allocateOneNode(PData, CounterIndex, TargetValue);
+  if (!CurVNode)
     return;
-
-  CurrentVNode->VData.Value = TargetValue;
-  CurrentVNode->VData.Count++;
+  CurVNode->Value = TargetValue;
+  CurVNode->Count++;
 
   uint32_t Success = 0;
   if (!ValueCounters[CounterIndex])
     Success =
-        COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurrentVNode);
+        COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
   else if (PrevVNode && !PrevVNode->Next)
-    Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurrentVNode);
+    Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
 
-  if (!Success) {
-    free(CurrentVNode);
+  if (!Success && !hasStaticCounters) {
+    free(CurVNode);
     return;
   }
 }
 
-COMPILER_RT_VISIBILITY ValueProfData **
-__llvm_profile_gather_value_data(uint64_t *ValueDataSize) {
-  size_t S = 0;
-  __llvm_profile_data *I;
-  ValueProfData **ValueDataArray;
+/*
+ * A wrapper struct that represents value profile runtime data.
+ * Like InstrProfRecord class which is used by profiling host tools,
+ * ValueProfRuntimeRecord also implements the abstract intefaces defined in
+ * ValueProfRecordClosure so that the runtime data can be serialized using
+ * shared C implementation.
+ */
+typedef struct ValueProfRuntimeRecord {
+  const __llvm_profile_data *Data;
+  ValueProfNode **NodesKind[IPVK_Last + 1];
+  uint8_t **SiteCountArray;
+} ValueProfRuntimeRecord;
 
-  const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
-  const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
+/* ValueProfRecordClosure Interface implementation. */
 
-  if (!ValueDataSize)
-    return NULL;
+static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
+  return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
+}
 
-  ValueDataArray = (ValueProfData **)calloc(
-      __llvm_profile_get_data_size(DataBegin, DataEnd), sizeof(void *));
-  if (!ValueDataArray)
-    PROF_OOM_RETURN("Failed to write value profile data ");
+static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
+  uint32_t S = 0, I;
+  const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
+  if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
+    return 0;
+  for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
+    S += Record->SiteCountArray[VK][I];
+  return S;
+}
 
-  /*
-   * Compute the total Size of the buffer to hold ValueProfData
-   * structures for functions with value profile data.
-   */
-  for (I = (__llvm_profile_data *)DataBegin; I < DataEnd; ++I) {
-    ValueProfRuntimeRecord R;
-    if (initializeValueProfRuntimeRecord(&R, I->NumValueSites, I->Values))
-      PROF_OOM_RETURN("Failed to write value profile data ");
+static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
+                                         uint32_t S) {
+  const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
+  return Record->SiteCountArray[VK][S];
+}
 
-    /* Compute the size of ValueProfData from this runtime record.  */
-    if (getNumValueKindsRT(&R) != 0) {
-      ValueProfData *VD = NULL;
-      uint32_t VS = getValueProfDataSizeRT(&R);
-      VD = (ValueProfData *)calloc(VS, sizeof(uint8_t));
-      if (!VD)
-        PROF_OOM_RETURN("Failed to write value profile data ");
-      serializeValueProfDataFromRT(&R, VD);
-      ValueDataArray[I - DataBegin] = VD;
-      S += VS;
+static ValueProfRuntimeRecord RTRecord;
+static ValueProfRecordClosure RTRecordClosure = {
+    &RTRecord,          INSTR_PROF_NULLPTR, /* GetNumValueKinds */
+    getNumValueSitesRT, getNumValueDataRT,  getNumValueDataForSiteRT,
+    INSTR_PROF_NULLPTR, /* RemapValueData */
+    INSTR_PROF_NULLPTR, /* GetValueForSite, */
+    INSTR_PROF_NULLPTR  /* AllocValueProfData */
+};
+
+static uint32_t
+initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
+                                 uint8_t *SiteCountArray[]) {
+  unsigned I, J, S = 0, NumValueKinds = 0;
+  ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
+  RTRecord.Data = Data;
+  RTRecord.SiteCountArray = SiteCountArray;
+  for (I = 0; I <= IPVK_Last; I++) {
+    uint16_t N = Data->NumValueSites[I];
+    if (!N)
+      continue;
+
+    NumValueKinds++;
+
+    RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
+    for (J = 0; J < N; J++) {
+      /* Compute value count for each site. */
+      uint32_t C = 0;
+      ValueProfNode *Site =
+          Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
+      while (Site) {
+        C++;
+        Site = Site->Next;
+      }
+      if (C > UCHAR_MAX)
+        C = UCHAR_MAX;
+      RTRecord.SiteCountArray[I][J] = C;
     }
-    finalizeValueProfRuntimeRecord(&R);
+    S += N;
   }
+  return NumValueKinds;
+}
 
-  if (!S) {
-    free(ValueDataArray);
-    ValueDataArray = NULL;
+static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
+                                        InstrProfValueData *Dst,
+                                        ValueProfNode *StartNode, uint32_t N) {
+  unsigned I;
+  ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
+  for (I = 0; I < N; I++) {
+    Dst[I].Value = VNode->Value;
+    Dst[I].Count = VNode->Count;
+    VNode = VNode->Next;
   }
+  return VNode;
+}
 
-  *ValueDataSize = S;
-  return ValueDataArray;
+static uint32_t getValueProfDataSizeWrapper(void) {
+  return getValueProfDataSize(&RTRecordClosure);
+}
+
+static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
+  return getNumValueDataForSiteRT(&RTRecord, VK, S);
+}
+
+static VPDataReaderType TheVPDataReader = {
+    initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
+    getFirstValueProfRecord,          getNumValueDataForSiteWrapper,
+    getValueProfDataSizeWrapper,      getNextNValueData};
+
+COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader() {
+  return &TheVPDataReader;
 }
diff --git a/lib/profile/InstrProfilingWriter.c b/lib/profile/InstrProfilingWriter.c
index 93879bb..95f37e8 100644
--- a/lib/profile/InstrProfilingWriter.c
+++ b/lib/profile/InstrProfilingWriter.c
@@ -9,20 +9,31 @@
 
 #include "InstrProfiling.h"
 #include "InstrProfilingInternal.h"
+#ifdef _MSC_VER
+/* For _alloca */
+#include <malloc.h>
+#endif
 #include <string.h>
 
 #define INSTR_PROF_VALUE_PROF_DATA
 #include "InstrProfData.inc"
-void (*FreeHook)(void *) = NULL;
-void* (*CallocHook)(size_t, size_t) = NULL;
-uint32_t VPBufferSize = 0;
+
+COMPILER_RT_VISIBILITY void (*FreeHook)(void *) = NULL;
+static ProfBufferIO TheBufferIO;
+#define VP_BUFFER_SIZE 8 * 1024
+static uint8_t BufferIOBuffer[VP_BUFFER_SIZE];
+static InstrProfValueData VPDataArray[16];
+static uint32_t VPDataArraySize = sizeof(VPDataArray) / sizeof(*VPDataArray);
+
+COMPILER_RT_VISIBILITY uint8_t *DynamicBufferIOBuffer = 0;
+COMPILER_RT_VISIBILITY uint32_t VPBufferSize = 0;
 
 /* The buffer writer is reponsponsible in keeping writer state
  * across the call.
  */
-COMPILER_RT_VISIBILITY uint32_t llvmBufferWriter(ProfDataIOVec *IOVecs,
-                                                 uint32_t NumIOVecs,
-                                                 void **WriterCtx) {
+COMPILER_RT_VISIBILITY uint32_t lprofBufferWriter(ProfDataIOVec *IOVecs,
+                                                  uint32_t NumIOVecs,
+                                                  void **WriterCtx) {
   uint32_t I;
   char **Buffer = (char **)WriterCtx;
   for (I = 0; I < NumIOVecs; I++) {
@@ -43,28 +54,31 @@
 }
 
 COMPILER_RT_VISIBILITY ProfBufferIO *
-llvmCreateBufferIO(WriterCallback FileWriter, void *File, uint32_t BufferSz) {
-  ProfBufferIO *BufferIO = (ProfBufferIO *)CallocHook(1, sizeof(ProfBufferIO));
-  uint8_t *Buffer = (uint8_t *)CallocHook(1, BufferSz);
+lprofCreateBufferIO(WriterCallback FileWriter, void *File) {
+  uint8_t *Buffer = DynamicBufferIOBuffer;
+  uint32_t BufferSize = VPBufferSize;
   if (!Buffer) {
-    FreeHook(BufferIO);
-    return 0;
+    Buffer = &BufferIOBuffer[0];
+    BufferSize = sizeof(BufferIOBuffer);
   }
-  llvmInitBufferIO(BufferIO, FileWriter, File, Buffer, BufferSz);
-  return BufferIO;
+  llvmInitBufferIO(&TheBufferIO, FileWriter, File, Buffer, BufferSize);
+  return &TheBufferIO;
 }
 
-COMPILER_RT_VISIBILITY void llvmDeleteBufferIO(ProfBufferIO *BufferIO) {
-  FreeHook(BufferIO->BufferStart);
-  FreeHook(BufferIO);
+COMPILER_RT_VISIBILITY void lprofDeleteBufferIO(ProfBufferIO *BufferIO) {
+  if (DynamicBufferIOBuffer) {
+    FreeHook(DynamicBufferIOBuffer);
+    DynamicBufferIOBuffer = 0;
+    VPBufferSize = 0;
+  }
 }
 
 COMPILER_RT_VISIBILITY int
-llvmBufferIOWrite(ProfBufferIO *BufferIO, const uint8_t *Data, uint32_t Size) {
+lprofBufferIOWrite(ProfBufferIO *BufferIO, const uint8_t *Data, uint32_t Size) {
   /* Buffer is not large enough, it is time to flush.  */
   if (Size + BufferIO->CurOffset > BufferIO->BufferSz) {
-     if (llvmBufferIOFlush(BufferIO) != 0)
-       return -1;
+    if (lprofBufferIOFlush(BufferIO) != 0)
+      return -1;
   }
   /* Special case, bypass the buffer completely. */
   ProfDataIOVec IO[] = {{Data, sizeof(uint8_t), Size}};
@@ -74,13 +88,13 @@
   } else {
     /* Write the data to buffer */
     uint8_t *Buffer = BufferIO->BufferStart + BufferIO->CurOffset;
-    llvmBufferWriter(IO, 1, (void **)&Buffer);
+    lprofBufferWriter(IO, 1, (void **)&Buffer);
     BufferIO->CurOffset = Buffer - BufferIO->BufferStart;
   }
   return 0;
 }
 
-COMPILER_RT_VISIBILITY int llvmBufferIOFlush(ProfBufferIO *BufferIO) {
+COMPILER_RT_VISIBILITY int lprofBufferIOFlush(ProfBufferIO *BufferIO) {
   if (BufferIO->CurOffset) {
     ProfDataIOVec IO[] = {
         {BufferIO->BufferStart, sizeof(uint8_t), BufferIO->CurOffset}};
@@ -91,10 +105,129 @@
   return 0;
 }
 
-COMPILER_RT_VISIBILITY int llvmWriteProfData(WriterCallback Writer,
-                                             void *WriterCtx,
-                                             ValueProfData **ValueDataArray,
-                                             const uint64_t ValueDataSize) {
+/* Write out value profile data for function specified with \c Data.
+ * The implementation does not use the method \c serializeValueProfData
+ * which depends on dynamic memory allocation. In this implementation,
+ * value profile data is written out to \c BufferIO piecemeal.
+ */
+static int writeOneValueProfData(ProfBufferIO *BufferIO,
+                                 VPDataReaderType *VPDataReader,
+                                 const __llvm_profile_data *Data) {
+  unsigned I, NumValueKinds = 0;
+  ValueProfData VPHeader;
+  uint8_t *SiteCountArray[IPVK_Last + 1];
+
+  for (I = 0; I <= IPVK_Last; I++) {
+    if (!Data->NumValueSites[I])
+      SiteCountArray[I] = 0;
+    else {
+      uint32_t Sz =
+          VPDataReader->GetValueProfRecordHeaderSize(Data->NumValueSites[I]) -
+          offsetof(ValueProfRecord, SiteCountArray);
+      /* Only use alloca for this small byte array to avoid excessive
+       * stack growth.  */
+      SiteCountArray[I] = (uint8_t *)COMPILER_RT_ALLOCA(Sz);
+      memset(SiteCountArray[I], 0, Sz);
+    }
+  }
+
+  /* If NumValueKinds returned is 0, there is nothing to write, report
+     success and return. This should match the raw profile reader's behavior. */
+  if (!(NumValueKinds = VPDataReader->InitRTRecord(Data, SiteCountArray)))
+    return 0;
+
+  /* First write the header structure. */
+  VPHeader.TotalSize = VPDataReader->GetValueProfDataSize();
+  VPHeader.NumValueKinds = NumValueKinds;
+  if (lprofBufferIOWrite(BufferIO, (const uint8_t *)&VPHeader,
+                         sizeof(ValueProfData)))
+    return -1;
+
+  /* Make sure nothing else needs to be written before value profile
+   * records. */
+  if ((void *)VPDataReader->GetFirstValueProfRecord(&VPHeader) !=
+      (void *)(&VPHeader + 1))
+    return -1;
+
+  /* Write out the value profile record for each value kind
+   * one by one. */
+  for (I = 0; I <= IPVK_Last; I++) {
+    uint32_t J;
+    ValueProfRecord RecordHeader;
+    /* The size of the value prof record header without counting the
+     * site count array .*/
+    uint32_t RecordHeaderSize = offsetof(ValueProfRecord, SiteCountArray);
+    uint32_t SiteCountArraySize;
+
+    if (!Data->NumValueSites[I])
+      continue;
+
+    /* Write out the record header.  */
+    RecordHeader.Kind = I;
+    RecordHeader.NumValueSites = Data->NumValueSites[I];
+    if (lprofBufferIOWrite(BufferIO, (const uint8_t *)&RecordHeader,
+                           RecordHeaderSize))
+      return -1;
+
+    /* Write out the site value count array including padding space. */
+    SiteCountArraySize =
+        VPDataReader->GetValueProfRecordHeaderSize(Data->NumValueSites[I]) -
+        RecordHeaderSize;
+    if (lprofBufferIOWrite(BufferIO, SiteCountArray[I], SiteCountArraySize))
+      return -1;
+
+    /* Write out the value profile data for each value site.  */
+    for (J = 0; J < Data->NumValueSites[I]; J++) {
+      uint32_t NRead, NRemain;
+      ValueProfNode *NextStartNode = 0;
+      NRemain = VPDataReader->GetNumValueDataForSite(I, J);
+      if (!NRemain)
+        continue;
+      /* Read and write out value data in small chunks till it is done. */
+      do {
+        NRead = (NRemain > VPDataArraySize ? VPDataArraySize : NRemain);
+        NextStartNode =
+            VPDataReader->GetValueData(I, /* ValueKind */
+                                       J, /* Site */
+                                       &VPDataArray[0], NextStartNode, NRead);
+        if (lprofBufferIOWrite(BufferIO, (const uint8_t *)&VPDataArray[0],
+                               NRead * sizeof(InstrProfValueData)))
+          return -1;
+        NRemain -= NRead;
+      } while (NRemain != 0);
+    }
+  }
+  /* All done report success.  */
+  return 0;
+}
+
+static int writeValueProfData(WriterCallback Writer, void *WriterCtx,
+                              VPDataReaderType *VPDataReader,
+                              const __llvm_profile_data *DataBegin,
+                              const __llvm_profile_data *DataEnd) {
+  ProfBufferIO *BufferIO;
+  const __llvm_profile_data *DI = 0;
+
+  if (!VPDataReader)
+    return 0;
+
+  BufferIO = lprofCreateBufferIO(Writer, WriterCtx);
+
+  for (DI = DataBegin; DI < DataEnd; DI++) {
+    if (writeOneValueProfData(BufferIO, VPDataReader, DI))
+      return -1;
+  }
+
+  if (lprofBufferIOFlush(BufferIO) != 0)
+    return -1;
+  lprofDeleteBufferIO(BufferIO);
+
+  return 0;
+}
+
+COMPILER_RT_VISIBILITY int lprofWriteData(WriterCallback Writer,
+                                          void *WriterCtx,
+                                          VPDataReaderType *VPDataReader) {
   /* Match logic in __llvm_profile_write_buffer(). */
   const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
   const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
@@ -102,46 +235,18 @@
   const uint64_t *CountersEnd = __llvm_profile_end_counters();
   const char *NamesBegin = __llvm_profile_begin_names();
   const char *NamesEnd = __llvm_profile_end_names();
-  return llvmWriteProfDataImpl(Writer, WriterCtx, DataBegin, DataEnd,
-                               CountersBegin, CountersEnd, ValueDataArray,
-                               ValueDataSize, NamesBegin, NamesEnd);
+  return lprofWriteDataImpl(Writer, WriterCtx, DataBegin, DataEnd,
+                            CountersBegin, CountersEnd, VPDataReader,
+                            NamesBegin, NamesEnd);
 }
 
-#define VP_BUFFER_SIZE 8 * 1024
-static int writeValueProfData(WriterCallback Writer, void *WriterCtx,
-                              ValueProfData **ValueDataBegin,
-                              uint64_t NumVData) {
-  ProfBufferIO *BufferIO;
-  uint32_t I = 0, BufferSz;
-
-  if (!ValueDataBegin)
-    return 0;
-
-  BufferSz = VPBufferSize ? VPBufferSize : VP_BUFFER_SIZE;
-  BufferIO = llvmCreateBufferIO(Writer, WriterCtx, BufferSz);
-
-  for (I = 0; I < NumVData; I++) {
-    ValueProfData *CurVData = ValueDataBegin[I];
-    if (!CurVData)
-      continue;
-    if (llvmBufferIOWrite(BufferIO, (const uint8_t *)CurVData,
-                          CurVData->TotalSize) != 0)
-      return -1;
-  }
-
-  if (llvmBufferIOFlush(BufferIO) != 0)
-    return -1;
-  llvmDeleteBufferIO(BufferIO);
-
-  return 0;
-}
-
-COMPILER_RT_VISIBILITY int llvmWriteProfDataImpl(
-    WriterCallback Writer, void *WriterCtx,
-    const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd,
-    const uint64_t *CountersBegin, const uint64_t *CountersEnd,
-    ValueProfData **ValueDataBegin, const uint64_t ValueDataSize,
-    const char *NamesBegin, const char *NamesEnd) {
+COMPILER_RT_VISIBILITY int
+lprofWriteDataImpl(WriterCallback Writer, void *WriterCtx,
+                   const __llvm_profile_data *DataBegin,
+                   const __llvm_profile_data *DataEnd,
+                   const uint64_t *CountersBegin, const uint64_t *CountersEnd,
+                   VPDataReaderType *VPDataReader, const char *NamesBegin,
+                   const char *NamesEnd) {
 
   /* Calculate size of sections. */
   const uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
@@ -158,7 +263,7 @@
   if (!DataSize)
     return 0;
 
-  /* Initialize header struture.  */
+/* Initialize header structure.  */
 #define INSTR_PROF_RAW_HEADER(Type, Name, Init) Header.Name = Init;
 #include "InstrProfData.inc"
 
@@ -171,5 +276,6 @@
   if (Writer(IOVec, sizeof(IOVec) / sizeof(*IOVec), &WriterCtx))
     return -1;
 
-  return writeValueProfData(Writer, WriterCtx, ValueDataBegin, DataSize);
+  return writeValueProfData(Writer, WriterCtx, VPDataReader, DataBegin,
+                            DataEnd);
 }
diff --git a/lib/profile/Makefile.mk b/lib/profile/Makefile.mk
deleted file mode 100644
index dd3a36f..0000000
--- a/lib/profile/Makefile.mk
+++ /dev/null
@@ -1,18 +0,0 @@
-#===- lib/profile/Makefile.mk ------------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := profile
-SubDirs :=
-
-Sources := $(foreach file,$(wildcard $(Dir)/*.c $(Dir)/*.cc),$(notdir $(file)))
-ObjNames := $(patsubst %.c,%.o,$(patsubst %.cc,%.o,$(Sources)))
-Implementation := Generic
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard $(Dir)/*.h)
diff --git a/lib/profile/WindowsMMap.h b/lib/profile/WindowsMMap.h
index 7b94eb2..271619a 100644
--- a/lib/profile/WindowsMMap.h
+++ b/lib/profile/WindowsMMap.h
@@ -21,13 +21,7 @@
  */
 #define PROT_READ     0x1
 #define PROT_WRITE    0x2
-/* This flag is only available in WinXP+ */
-#ifdef FILE_MAP_EXECUTE
-#define PROT_EXEC     0x4
-#else
-#define PROT_EXEC        0x0
-#define FILE_MAP_EXECUTE 0
-#endif
+#define PROT_EXEC     0x0
 
 #define MAP_FILE      0x00
 #define MAP_SHARED    0x01
diff --git a/lib/safestack/CMakeLists.txt b/lib/safestack/CMakeLists.txt
index 9c11bb6..a3870ab 100644
--- a/lib/safestack/CMakeLists.txt
+++ b/lib/safestack/CMakeLists.txt
@@ -1,4 +1,6 @@
 add_custom_target(safestack)
+set_target_properties(safestack PROPERTIES
+  FOLDER "Compiler-RT Misc")
 
 set(SAFESTACK_SOURCES safestack.cc)
 
diff --git a/lib/sanitizer_common/.clang-tidy b/lib/sanitizer_common/.clang-tidy
new file mode 100644
index 0000000..aa695cc
--- /dev/null
+++ b/lib/sanitizer_common/.clang-tidy
@@ -0,0 +1,12 @@
+Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,readability-identifier-naming'
+CheckOptions:
+  - key:             readability-identifier-naming.ClassCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.EnumCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.FunctionCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.UnionCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.VariableCase
+    value:           lower_case
diff --git a/lib/sanitizer_common/CMakeLists.txt b/lib/sanitizer_common/CMakeLists.txt
index 6a20f02..59a6b35 100644
--- a/lib/sanitizer_common/CMakeLists.txt
+++ b/lib/sanitizer_common/CMakeLists.txt
@@ -1,7 +1,7 @@
 # Build system for the common Sanitizer runtime support library components.
 # These components are shared between AddressSanitizer and ThreadSanitizer.
 
-set(SANITIZER_SOURCES
+set(SANITIZER_SOURCES_NOTERMINATION
   sanitizer_allocator.cc
   sanitizer_common.cc
   sanitizer_deadlock_detector1.cc
@@ -11,6 +11,7 @@
   sanitizer_libc.cc
   sanitizer_libignore.cc
   sanitizer_linux.cc
+  sanitizer_linux_s390.cc
   sanitizer_mac.cc
   sanitizer_persistent_allocator.cc
   sanitizer_platform_limits_linux.cc
@@ -33,6 +34,14 @@
   sanitizer_thread_registry.cc
   sanitizer_win.cc)
 
+if(UNIX AND NOT APPLE)
+  list(APPEND SANITIZER_SOURCES_NOTERMINATION
+    sanitizer_linux_x86_64.S)
+endif()
+
+set(SANITIZER_SOURCES
+  ${SANITIZER_SOURCES_NOTERMINATION} sanitizer_termination.cc)
+
 # Libc functions stubs. These sources should be linked instead of
 # SANITIZER_LIBCDEP_SOURCES when sanitizer_common library must not depend on
 # libc.
@@ -57,8 +66,16 @@
 set(SANITIZER_HEADERS
   sanitizer_addrhashmap.h
   sanitizer_allocator.h
+  sanitizer_allocator_bytemap.h
+  sanitizer_allocator_combined.h
   sanitizer_allocator_interface.h
   sanitizer_allocator_internal.h
+  sanitizer_allocator_local_cache.h
+  sanitizer_allocator_primary32.h
+  sanitizer_allocator_primary64.h
+  sanitizer_allocator_secondary.h
+  sanitizer_allocator_size_class_map.h
+  sanitizer_allocator_stats.h
   sanitizer_atomic.h
   sanitizer_atomic_clang.h
   sanitizer_atomic_msvc.h
@@ -109,26 +126,29 @@
 
 set(SANITIZER_COMMON_DEFINITIONS)
 
-if(MSVC)
-  list(APPEND SANITIZER_COMMON_DEFINITIONS
-    SANITIZER_NEEDS_SEGV=0)
-else()
-  list(APPEND SANITIZER_COMMON_DEFINITIONS
-    SANITIZER_NEEDS_SEGV=1)
-endif()
-
 include(CheckIncludeFile)
 append_have_file_definition(rpc/xdr.h HAVE_RPC_XDR_H SANITIZER_COMMON_DEFINITIONS)
 append_have_file_definition(tirpc/rpc/xdr.h HAVE_TIRPC_RPC_XDR_H SANITIZER_COMMON_DEFINITIONS)
 
 set(SANITIZER_CFLAGS ${SANITIZER_COMMON_CFLAGS})
-append_no_rtti_flag(SANITIZER_CFLAGS)
+append_rtti_flag(OFF SANITIZER_CFLAGS)
 
 append_list_if(SANITIZER_LIMIT_FRAME_SIZE -Wframe-larger-than=570
                SANITIZER_CFLAGS)
 append_list_if(COMPILER_RT_HAS_WGLOBAL_CONSTRUCTORS_FLAG -Wglobal-constructors
                SANITIZER_CFLAGS)
 
+if (LLVM_ENABLE_PEDANTIC AND UNIX AND NOT APPLE)
+  # With -pedantic, our .S files raise warnings about empty macro arguments
+  # from __USER_LABEL_PREFIX__ being an empty arg to GLUE().  Unfortunately,
+  # there is no simple way to test for an empty define, nor to disable just
+  # that warning or to disable -pedantic.  There is also no simple way to
+  # remove -pedantic from just this file (we'd have to remove from
+  # CMAKE_C*_FLAGS and re-add as a source property to all the non-.S files).
+  set_source_files_properties(sanitizer_linux_x86_64.S
+    PROPERTIES COMPILE_FLAGS "-w")
+endif ()
+
 if(APPLE)
   set(OS_OPTION OS ${SANITIZER_COMMON_SUPPORTED_OS})
 endif()
@@ -139,6 +159,12 @@
   SOURCES ${SANITIZER_SOURCES}
   CFLAGS ${SANITIZER_CFLAGS}
   DEFS ${SANITIZER_COMMON_DEFINITIONS})
+add_compiler_rt_object_libraries(RTSanitizerCommonNoTermination
+  ${OS_OPTION}
+  ARCHS ${SANITIZER_COMMON_SUPPORTED_ARCH}
+  SOURCES ${SANITIZER_SOURCES_NOTERMINATION}
+  CFLAGS ${SANITIZER_CFLAGS}
+  DEFS ${SANITIZER_COMMON_DEFINITIONS})
 add_compiler_rt_object_libraries(RTSanitizerCommonNoLibc
   ${OS_OPTION}
   ARCHS ${SANITIZER_COMMON_SUPPORTED_ARCH}
diff --git a/lib/sanitizer_common/Makefile.mk b/lib/sanitizer_common/Makefile.mk
deleted file mode 100644
index 5bb20d0..0000000
--- a/lib/sanitizer_common/Makefile.mk
+++ /dev/null
@@ -1,24 +0,0 @@
-#===- lib/sanitizer_common/Makefile.mk ---------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := sanitizer_common
-SubDirs :=
-
-Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
-NolibcSources := $(foreach file,$(wildcard $(Dir)/*_nolibc.cc),$(notdir $(file)))
-Sources := $(filter-out $(NolibcSources),$(Sources))
-ObjNames := $(Sources:%.cc=%.o)
-
-Implementation := Generic
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard $(Dir)/*.h)
-
-# Define a convenience variable for all the sanitizer_common functions.
-SanitizerCommonFunctions := $(Sources:%.cc=%)
diff --git a/lib/sanitizer_common/sanitizer_allocator.cc b/lib/sanitizer_common/sanitizer_allocator.cc
index df298c6..2b5d192 100644
--- a/lib/sanitizer_common/sanitizer_allocator.cc
+++ b/lib/sanitizer_common/sanitizer_allocator.cc
@@ -22,18 +22,22 @@
 #if defined(SANITIZER_GO) || defined(SANITIZER_USE_MALLOC)
 # if SANITIZER_LINUX && !SANITIZER_ANDROID
 extern "C" void *__libc_malloc(uptr size);
+#  ifndef SANITIZER_GO
 extern "C" void *__libc_memalign(uptr alignment, uptr size);
+#  endif
 extern "C" void *__libc_realloc(void *ptr, uptr size);
 extern "C" void __libc_free(void *ptr);
 # else
 #  include <stdlib.h>
 #  define __libc_malloc malloc
+#  ifndef SANITIZER_GO
 static void *__libc_memalign(uptr alignment, uptr size) {
   void *p;
   uptr error = posix_memalign(&p, alignment, size);
   if (error) return nullptr;
   return p;
 }
+#  endif
 #  define __libc_realloc realloc
 #  define __libc_free free
 # endif
@@ -41,10 +45,20 @@
 static void *RawInternalAlloc(uptr size, InternalAllocatorCache *cache,
                               uptr alignment) {
   (void)cache;
+#ifndef SANITIZER_GO
   if (alignment == 0)
     return __libc_malloc(size);
   else
     return __libc_memalign(alignment, size);
+#else
+  // Windows does not provide __libc_memalign/posix_memalign. It provides
+  // __aligned_malloc, but the allocated blocks can't be passed to free,
+  // they need to be passed to __aligned_free. InternalAlloc interface does
+  // not account for such requirement. Alignemnt does not seem to be used
+  // anywhere in runtime, so just call __libc_malloc for now.
+  DCHECK_EQ(alignment, 0);
+  return __libc_malloc(size);
+#endif
 }
 
 static void *RawInternalRealloc(void *ptr, uptr size,
diff --git a/lib/sanitizer_common/sanitizer_allocator.h b/lib/sanitizer_common/sanitizer_allocator.h
index 10679cc..da74350 100644
--- a/lib/sanitizer_common/sanitizer_allocator.h
+++ b/lib/sanitizer_common/sanitizer_allocator.h
@@ -20,272 +20,12 @@
 #include "sanitizer_list.h"
 #include "sanitizer_mutex.h"
 #include "sanitizer_lfstack.h"
+#include "sanitizer_procmaps.h"
 
 namespace __sanitizer {
 
 // Prints error message and kills the program.
 void NORETURN ReportAllocatorCannotReturnNull();
-
-// SizeClassMap maps allocation sizes into size classes and back.
-// Class 0 corresponds to size 0.
-// Classes 1 - 16 correspond to sizes 16 to 256 (size = class_id * 16).
-// Next 4 classes: 256 + i * 64  (i = 1 to 4).
-// Next 4 classes: 512 + i * 128 (i = 1 to 4).
-// ...
-// Next 4 classes: 2^k + i * 2^(k-2) (i = 1 to 4).
-// Last class corresponds to kMaxSize = 1 << kMaxSizeLog.
-//
-// This structure of the size class map gives us:
-//   - Efficient table-free class-to-size and size-to-class functions.
-//   - Difference between two consequent size classes is betweed 14% and 25%
-//
-// This class also gives a hint to a thread-caching allocator about the amount
-// of chunks that need to be cached per-thread:
-//  - kMaxNumCached is the maximal number of chunks per size class.
-//  - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
-//
-// Part of output of SizeClassMap::Print():
-// c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
-// c01 => s: 16 diff: +16 00% l 4 cached: 256 4096; id 1
-// c02 => s: 32 diff: +16 100% l 5 cached: 256 8192; id 2
-// c03 => s: 48 diff: +16 50% l 5 cached: 256 12288; id 3
-// c04 => s: 64 diff: +16 33% l 6 cached: 256 16384; id 4
-// c05 => s: 80 diff: +16 25% l 6 cached: 256 20480; id 5
-// c06 => s: 96 diff: +16 20% l 6 cached: 256 24576; id 6
-// c07 => s: 112 diff: +16 16% l 6 cached: 256 28672; id 7
-//
-// c08 => s: 128 diff: +16 14% l 7 cached: 256 32768; id 8
-// c09 => s: 144 diff: +16 12% l 7 cached: 256 36864; id 9
-// c10 => s: 160 diff: +16 11% l 7 cached: 256 40960; id 10
-// c11 => s: 176 diff: +16 10% l 7 cached: 256 45056; id 11
-// c12 => s: 192 diff: +16 09% l 7 cached: 256 49152; id 12
-// c13 => s: 208 diff: +16 08% l 7 cached: 256 53248; id 13
-// c14 => s: 224 diff: +16 07% l 7 cached: 256 57344; id 14
-// c15 => s: 240 diff: +16 07% l 7 cached: 256 61440; id 15
-//
-// c16 => s: 256 diff: +16 06% l 8 cached: 256 65536; id 16
-// c17 => s: 320 diff: +64 25% l 8 cached: 204 65280; id 17
-// c18 => s: 384 diff: +64 20% l 8 cached: 170 65280; id 18
-// c19 => s: 448 diff: +64 16% l 8 cached: 146 65408; id 19
-//
-// c20 => s: 512 diff: +64 14% l 9 cached: 128 65536; id 20
-// c21 => s: 640 diff: +128 25% l 9 cached: 102 65280; id 21
-// c22 => s: 768 diff: +128 20% l 9 cached: 85 65280; id 22
-// c23 => s: 896 diff: +128 16% l 9 cached: 73 65408; id 23
-//
-// c24 => s: 1024 diff: +128 14% l 10 cached: 64 65536; id 24
-// c25 => s: 1280 diff: +256 25% l 10 cached: 51 65280; id 25
-// c26 => s: 1536 diff: +256 20% l 10 cached: 42 64512; id 26
-// c27 => s: 1792 diff: +256 16% l 10 cached: 36 64512; id 27
-//
-// ...
-//
-// c48 => s: 65536 diff: +8192 14% l 16 cached: 1 65536; id 48
-// c49 => s: 81920 diff: +16384 25% l 16 cached: 1 81920; id 49
-// c50 => s: 98304 diff: +16384 20% l 16 cached: 1 98304; id 50
-// c51 => s: 114688 diff: +16384 16% l 16 cached: 1 114688; id 51
-//
-// c52 => s: 131072 diff: +16384 14% l 17 cached: 1 131072; id 52
-
-template <uptr kMaxSizeLog, uptr kMaxNumCachedT, uptr kMaxBytesCachedLog>
-class SizeClassMap {
-  static const uptr kMinSizeLog = 4;
-  static const uptr kMidSizeLog = kMinSizeLog + 4;
-  static const uptr kMinSize = 1 << kMinSizeLog;
-  static const uptr kMidSize = 1 << kMidSizeLog;
-  static const uptr kMidClass = kMidSize / kMinSize;
-  static const uptr S = 2;
-  static const uptr M = (1 << S) - 1;
-
- public:
-  static const uptr kMaxNumCached = kMaxNumCachedT;
-  // We transfer chunks between central and thread-local free lists in batches.
-  // For small size classes we allocate batches separately.
-  // For large size classes we use one of the chunks to store the batch.
-  struct TransferBatch {
-    TransferBatch *next;
-    uptr count;
-    void *batch[kMaxNumCached];
-  };
-
-  static const uptr kMaxSize = 1UL << kMaxSizeLog;
-  static const uptr kNumClasses =
-      kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1;
-  COMPILER_CHECK(kNumClasses >= 32 && kNumClasses <= 256);
-  static const uptr kNumClassesRounded =
-      kNumClasses == 32  ? 32 :
-      kNumClasses <= 64  ? 64 :
-      kNumClasses <= 128 ? 128 : 256;
-
-  static uptr Size(uptr class_id) {
-    if (class_id <= kMidClass)
-      return kMinSize * class_id;
-    class_id -= kMidClass;
-    uptr t = kMidSize << (class_id >> S);
-    return t + (t >> S) * (class_id & M);
-  }
-
-  static uptr ClassID(uptr size) {
-    if (size <= kMidSize)
-      return (size + kMinSize - 1) >> kMinSizeLog;
-    if (size > kMaxSize) return 0;
-    uptr l = MostSignificantSetBitIndex(size);
-    uptr hbits = (size >> (l - S)) & M;
-    uptr lbits = size & ((1 << (l - S)) - 1);
-    uptr l1 = l - kMidSizeLog;
-    return kMidClass + (l1 << S) + hbits + (lbits > 0);
-  }
-
-  static uptr MaxCached(uptr class_id) {
-    if (class_id == 0) return 0;
-    uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
-    return Max<uptr>(1, Min(kMaxNumCached, n));
-  }
-
-  static void Print() {
-    uptr prev_s = 0;
-    uptr total_cached = 0;
-    for (uptr i = 0; i < kNumClasses; i++) {
-      uptr s = Size(i);
-      if (s >= kMidSize / 2 && (s & (s - 1)) == 0)
-        Printf("\n");
-      uptr d = s - prev_s;
-      uptr p = prev_s ? (d * 100 / prev_s) : 0;
-      uptr l = s ? MostSignificantSetBitIndex(s) : 0;
-      uptr cached = MaxCached(i) * s;
-      Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
-             "cached: %zd %zd; id %zd\n",
-             i, Size(i), d, p, l, MaxCached(i), cached, ClassID(s));
-      total_cached += cached;
-      prev_s = s;
-    }
-    Printf("Total cached: %zd\n", total_cached);
-  }
-
-  static bool SizeClassRequiresSeparateTransferBatch(uptr class_id) {
-    return Size(class_id) < sizeof(TransferBatch) -
-        sizeof(uptr) * (kMaxNumCached - MaxCached(class_id));
-  }
-
-  static void Validate() {
-    for (uptr c = 1; c < kNumClasses; c++) {
-      // Printf("Validate: c%zd\n", c);
-      uptr s = Size(c);
-      CHECK_NE(s, 0U);
-      CHECK_EQ(ClassID(s), c);
-      if (c != kNumClasses - 1)
-        CHECK_EQ(ClassID(s + 1), c + 1);
-      CHECK_EQ(ClassID(s - 1), c);
-      if (c)
-        CHECK_GT(Size(c), Size(c-1));
-    }
-    CHECK_EQ(ClassID(kMaxSize + 1), 0);
-
-    for (uptr s = 1; s <= kMaxSize; s++) {
-      uptr c = ClassID(s);
-      // Printf("s%zd => c%zd\n", s, c);
-      CHECK_LT(c, kNumClasses);
-      CHECK_GE(Size(c), s);
-      if (c > 0)
-        CHECK_LT(Size(c-1), s);
-    }
-  }
-};
-
-typedef SizeClassMap<17, 128, 16> DefaultSizeClassMap;
-typedef SizeClassMap<17, 64,  14> CompactSizeClassMap;
-template<class SizeClassAllocator> struct SizeClassAllocatorLocalCache;
-
-// Memory allocator statistics
-enum AllocatorStat {
-  AllocatorStatAllocated,
-  AllocatorStatMapped,
-  AllocatorStatCount
-};
-
-typedef uptr AllocatorStatCounters[AllocatorStatCount];
-
-// Per-thread stats, live in per-thread cache.
-class AllocatorStats {
- public:
-  void Init() {
-    internal_memset(this, 0, sizeof(*this));
-  }
-  void InitLinkerInitialized() {}
-
-  void Add(AllocatorStat i, uptr v) {
-    v += atomic_load(&stats_[i], memory_order_relaxed);
-    atomic_store(&stats_[i], v, memory_order_relaxed);
-  }
-
-  void Sub(AllocatorStat i, uptr v) {
-    v = atomic_load(&stats_[i], memory_order_relaxed) - v;
-    atomic_store(&stats_[i], v, memory_order_relaxed);
-  }
-
-  void Set(AllocatorStat i, uptr v) {
-    atomic_store(&stats_[i], v, memory_order_relaxed);
-  }
-
-  uptr Get(AllocatorStat i) const {
-    return atomic_load(&stats_[i], memory_order_relaxed);
-  }
-
- private:
-  friend class AllocatorGlobalStats;
-  AllocatorStats *next_;
-  AllocatorStats *prev_;
-  atomic_uintptr_t stats_[AllocatorStatCount];
-};
-
-// Global stats, used for aggregation and querying.
-class AllocatorGlobalStats : public AllocatorStats {
- public:
-  void InitLinkerInitialized() {
-    next_ = this;
-    prev_ = this;
-  }
-  void Init() {
-    internal_memset(this, 0, sizeof(*this));
-    InitLinkerInitialized();
-  }
-
-  void Register(AllocatorStats *s) {
-    SpinMutexLock l(&mu_);
-    s->next_ = next_;
-    s->prev_ = this;
-    next_->prev_ = s;
-    next_ = s;
-  }
-
-  void Unregister(AllocatorStats *s) {
-    SpinMutexLock l(&mu_);
-    s->prev_->next_ = s->next_;
-    s->next_->prev_ = s->prev_;
-    for (int i = 0; i < AllocatorStatCount; i++)
-      Add(AllocatorStat(i), s->Get(AllocatorStat(i)));
-  }
-
-  void Get(AllocatorStatCounters s) const {
-    internal_memset(s, 0, AllocatorStatCount * sizeof(uptr));
-    SpinMutexLock l(&mu_);
-    const AllocatorStats *stats = this;
-    for (;;) {
-      for (int i = 0; i < AllocatorStatCount; i++)
-        s[i] += stats->Get(AllocatorStat(i));
-      stats = stats->next_;
-      if (stats == this)
-        break;
-    }
-    // All stats must be non-negative.
-    for (int i = 0; i < AllocatorStatCount; i++)
-      s[i] = ((sptr)s[i]) >= 0 ? s[i] : 0;
-  }
-
- private:
-  mutable SpinMutex mu_;
-};
-
 // Allocators call these callbacks on mmap/munmap.
 struct NoOpMapUnmapCallback {
   void OnMap(uptr p, uptr size) const { }
@@ -295,1164 +35,18 @@
 // Callback type for iterating over chunks.
 typedef void (*ForEachChunkCallback)(uptr chunk, void *arg);
 
-// SizeClassAllocator64 -- allocator for 64-bit address space.
-//
-// Space: a portion of address space of kSpaceSize bytes starting at
-// a fixed address (kSpaceBeg). Both constants are powers of two and
-// kSpaceBeg is kSpaceSize-aligned.
-// At the beginning the entire space is mprotect-ed, then small parts of it
-// are mapped on demand.
-//
-// Region: a part of Space dedicated to a single size class.
-// There are kNumClasses Regions of equal size.
-//
-// UserChunk: a piece of memory returned to user.
-// MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk.
-//
-// A Region looks like this:
-// UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
-template <const uptr kSpaceBeg, const uptr kSpaceSize,
-          const uptr kMetadataSize, class SizeClassMap,
-          class MapUnmapCallback = NoOpMapUnmapCallback>
-class SizeClassAllocator64 {
- public:
-  typedef typename SizeClassMap::TransferBatch Batch;
-  typedef SizeClassAllocator64<kSpaceBeg, kSpaceSize, kMetadataSize,
-      SizeClassMap, MapUnmapCallback> ThisT;
-  typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
-
-  void Init() {
-    CHECK_EQ(kSpaceBeg,
-             reinterpret_cast<uptr>(MmapNoAccess(kSpaceBeg, kSpaceSize)));
-    MapWithCallback(kSpaceEnd, AdditionalSize());
-  }
-
-  void MapWithCallback(uptr beg, uptr size) {
-    CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size)));
-    MapUnmapCallback().OnMap(beg, size);
-  }
-
-  void UnmapWithCallback(uptr beg, uptr size) {
-    MapUnmapCallback().OnUnmap(beg, size);
-    UnmapOrDie(reinterpret_cast<void *>(beg), size);
-  }
-
-  static bool CanAllocate(uptr size, uptr alignment) {
-    return size <= SizeClassMap::kMaxSize &&
-      alignment <= SizeClassMap::kMaxSize;
-  }
-
-  NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
-                                uptr class_id) {
-    CHECK_LT(class_id, kNumClasses);
-    RegionInfo *region = GetRegionInfo(class_id);
-    Batch *b = region->free_list.Pop();
-    if (!b)
-      b = PopulateFreeList(stat, c, class_id, region);
-    region->n_allocated += b->count;
-    return b;
-  }
-
-  NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) {
-    RegionInfo *region = GetRegionInfo(class_id);
-    CHECK_GT(b->count, 0);
-    region->free_list.Push(b);
-    region->n_freed += b->count;
-  }
-
-  static bool PointerIsMine(const void *p) {
-    return reinterpret_cast<uptr>(p) / kSpaceSize == kSpaceBeg / kSpaceSize;
-  }
-
-  static uptr GetSizeClass(const void *p) {
-    return (reinterpret_cast<uptr>(p) / kRegionSize) % kNumClassesRounded;
-  }
-
-  void *GetBlockBegin(const void *p) {
-    uptr class_id = GetSizeClass(p);
-    uptr size = SizeClassMap::Size(class_id);
-    if (!size) return nullptr;
-    uptr chunk_idx = GetChunkIdx((uptr)p, size);
-    uptr reg_beg = (uptr)p & ~(kRegionSize - 1);
-    uptr beg = chunk_idx * size;
-    uptr next_beg = beg + size;
-    if (class_id >= kNumClasses) return nullptr;
-    RegionInfo *region = GetRegionInfo(class_id);
-    if (region->mapped_user >= next_beg)
-      return reinterpret_cast<void*>(reg_beg + beg);
-    return nullptr;
-  }
-
-  static uptr GetActuallyAllocatedSize(void *p) {
-    CHECK(PointerIsMine(p));
-    return SizeClassMap::Size(GetSizeClass(p));
-  }
-
-  uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
-
-  void *GetMetaData(const void *p) {
-    uptr class_id = GetSizeClass(p);
-    uptr size = SizeClassMap::Size(class_id);
-    uptr chunk_idx = GetChunkIdx(reinterpret_cast<uptr>(p), size);
-    return reinterpret_cast<void*>(kSpaceBeg + (kRegionSize * (class_id + 1)) -
-                                   (1 + chunk_idx) * kMetadataSize);
-  }
-
-  uptr TotalMemoryUsed() {
-    uptr res = 0;
-    for (uptr i = 0; i < kNumClasses; i++)
-      res += GetRegionInfo(i)->allocated_user;
-    return res;
-  }
-
-  // Test-only.
-  void TestOnlyUnmap() {
-    UnmapWithCallback(kSpaceBeg, kSpaceSize + AdditionalSize());
-  }
-
-  void PrintStats() {
-    uptr total_mapped = 0;
-    uptr n_allocated = 0;
-    uptr n_freed = 0;
-    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
-      RegionInfo *region = GetRegionInfo(class_id);
-      total_mapped += region->mapped_user;
-      n_allocated += region->n_allocated;
-      n_freed += region->n_freed;
-    }
-    Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
-           "remains %zd\n",
-           total_mapped >> 20, n_allocated, n_allocated - n_freed);
-    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
-      RegionInfo *region = GetRegionInfo(class_id);
-      if (region->mapped_user == 0) continue;
-      Printf("  %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n",
-             class_id,
-             SizeClassMap::Size(class_id),
-             region->mapped_user >> 10,
-             region->n_allocated,
-             region->n_allocated - region->n_freed);
-    }
-  }
-
-  // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
-  // introspection API.
-  void ForceLock() {
-    for (uptr i = 0; i < kNumClasses; i++) {
-      GetRegionInfo(i)->mutex.Lock();
-    }
-  }
-
-  void ForceUnlock() {
-    for (int i = (int)kNumClasses - 1; i >= 0; i--) {
-      GetRegionInfo(i)->mutex.Unlock();
-    }
-  }
-
-  // Iterate over all existing chunks.
-  // The allocator must be locked when calling this function.
-  void ForEachChunk(ForEachChunkCallback callback, void *arg) {
-    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
-      RegionInfo *region = GetRegionInfo(class_id);
-      uptr chunk_size = SizeClassMap::Size(class_id);
-      uptr region_beg = kSpaceBeg + class_id * kRegionSize;
-      for (uptr chunk = region_beg;
-           chunk < region_beg + region->allocated_user;
-           chunk += chunk_size) {
-        // Too slow: CHECK_EQ((void *)chunk, GetBlockBegin((void *)chunk));
-        callback(chunk, arg);
-      }
-    }
-  }
-
-  static uptr AdditionalSize() {
-    return RoundUpTo(sizeof(RegionInfo) * kNumClassesRounded,
-                     GetPageSizeCached());
-  }
-
-  typedef SizeClassMap SizeClassMapT;
-  static const uptr kNumClasses = SizeClassMap::kNumClasses;
-  static const uptr kNumClassesRounded = SizeClassMap::kNumClassesRounded;
-
- private:
-  static const uptr kRegionSize = kSpaceSize / kNumClassesRounded;
-  static const uptr kSpaceEnd = kSpaceBeg + kSpaceSize;
-  COMPILER_CHECK(kSpaceBeg % kSpaceSize == 0);
-  // kRegionSize must be >= 2^32.
-  COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2)));
-  // Populate the free list with at most this number of bytes at once
-  // or with one element if its size is greater.
-  static const uptr kPopulateSize = 1 << 14;
-  // Call mmap for user memory with at least this size.
-  static const uptr kUserMapSize = 1 << 16;
-  // Call mmap for metadata memory with at least this size.
-  static const uptr kMetaMapSize = 1 << 16;
-
-  struct RegionInfo {
-    BlockingMutex mutex;
-    LFStack<Batch> free_list;
-    uptr allocated_user;  // Bytes allocated for user memory.
-    uptr allocated_meta;  // Bytes allocated for metadata.
-    uptr mapped_user;  // Bytes mapped for user memory.
-    uptr mapped_meta;  // Bytes mapped for metadata.
-    uptr n_allocated, n_freed;  // Just stats.
-  };
-  COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize);
-
-  RegionInfo *GetRegionInfo(uptr class_id) {
-    CHECK_LT(class_id, kNumClasses);
-    RegionInfo *regions = reinterpret_cast<RegionInfo*>(kSpaceBeg + kSpaceSize);
-    return &regions[class_id];
-  }
-
-  static uptr GetChunkIdx(uptr chunk, uptr size) {
-    uptr offset = chunk % kRegionSize;
-    // Here we divide by a non-constant. This is costly.
-    // size always fits into 32-bits. If the offset fits too, use 32-bit div.
-    if (offset >> (SANITIZER_WORDSIZE / 2))
-      return offset / size;
-    return (u32)offset / (u32)size;
-  }
-
-  NOINLINE Batch* PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
-                                   uptr class_id, RegionInfo *region) {
-    BlockingMutexLock l(&region->mutex);
-    Batch *b = region->free_list.Pop();
-    if (b)
-      return b;
-    uptr size = SizeClassMap::Size(class_id);
-    uptr count = size < kPopulateSize ? SizeClassMap::MaxCached(class_id) : 1;
-    uptr beg_idx = region->allocated_user;
-    uptr end_idx = beg_idx + count * size;
-    uptr region_beg = kSpaceBeg + kRegionSize * class_id;
-    if (end_idx + size > region->mapped_user) {
-      // Do the mmap for the user memory.
-      uptr map_size = kUserMapSize;
-      while (end_idx + size > region->mapped_user + map_size)
-        map_size += kUserMapSize;
-      CHECK_GE(region->mapped_user + map_size, end_idx);
-      MapWithCallback(region_beg + region->mapped_user, map_size);
-      stat->Add(AllocatorStatMapped, map_size);
-      region->mapped_user += map_size;
-    }
-    uptr total_count = (region->mapped_user - beg_idx - size)
-        / size / count * count;
-    region->allocated_meta += total_count * kMetadataSize;
-    if (region->allocated_meta > region->mapped_meta) {
-      uptr map_size = kMetaMapSize;
-      while (region->allocated_meta > region->mapped_meta + map_size)
-        map_size += kMetaMapSize;
-      // Do the mmap for the metadata.
-      CHECK_GE(region->mapped_meta + map_size, region->allocated_meta);
-      MapWithCallback(region_beg + kRegionSize -
-                      region->mapped_meta - map_size, map_size);
-      region->mapped_meta += map_size;
-    }
-    CHECK_LE(region->allocated_meta, region->mapped_meta);
-    if (region->mapped_user + region->mapped_meta > kRegionSize) {
-      Printf("%s: Out of memory. Dying. ", SanitizerToolName);
-      Printf("The process has exhausted %zuMB for size class %zu.\n",
-          kRegionSize / 1024 / 1024, size);
-      Die();
-    }
-    for (;;) {
-      if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
-        b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
-      else
-        b = (Batch*)(region_beg + beg_idx);
-      b->count = count;
-      for (uptr i = 0; i < count; i++)
-        b->batch[i] = (void*)(region_beg + beg_idx + i * size);
-      region->allocated_user += count * size;
-      CHECK_LE(region->allocated_user, region->mapped_user);
-      beg_idx += count * size;
-      if (beg_idx + count * size + size > region->mapped_user)
-        break;
-      CHECK_GT(b->count, 0);
-      region->free_list.Push(b);
-    }
-    return b;
-  }
-};
-
-// Maps integers in rage [0, kSize) to u8 values.
-template<u64 kSize>
-class FlatByteMap {
- public:
-  void TestOnlyInit() {
-    internal_memset(map_, 0, sizeof(map_));
-  }
-
-  void set(uptr idx, u8 val) {
-    CHECK_LT(idx, kSize);
-    CHECK_EQ(0U, map_[idx]);
-    map_[idx] = val;
-  }
-  u8 operator[] (uptr idx) {
-    CHECK_LT(idx, kSize);
-    // FIXME: CHECK may be too expensive here.
-    return map_[idx];
-  }
- private:
-  u8 map_[kSize];
-};
-
-// TwoLevelByteMap maps integers in range [0, kSize1*kSize2) to u8 values.
-// It is implemented as a two-dimensional array: array of kSize1 pointers
-// to kSize2-byte arrays. The secondary arrays are mmaped on demand.
-// Each value is initially zero and can be set to something else only once.
-// Setting and getting values from multiple threads is safe w/o extra locking.
-template <u64 kSize1, u64 kSize2, class MapUnmapCallback = NoOpMapUnmapCallback>
-class TwoLevelByteMap {
- public:
-  void TestOnlyInit() {
-    internal_memset(map1_, 0, sizeof(map1_));
-    mu_.Init();
-  }
-
-  void TestOnlyUnmap() {
-    for (uptr i = 0; i < kSize1; i++) {
-      u8 *p = Get(i);
-      if (!p) continue;
-      MapUnmapCallback().OnUnmap(reinterpret_cast<uptr>(p), kSize2);
-      UnmapOrDie(p, kSize2);
-    }
-  }
-
-  uptr size() const { return kSize1 * kSize2; }
-  uptr size1() const { return kSize1; }
-  uptr size2() const { return kSize2; }
-
-  void set(uptr idx, u8 val) {
-    CHECK_LT(idx, kSize1 * kSize2);
-    u8 *map2 = GetOrCreate(idx / kSize2);
-    CHECK_EQ(0U, map2[idx % kSize2]);
-    map2[idx % kSize2] = val;
-  }
-
-  u8 operator[] (uptr idx) const {
-    CHECK_LT(idx, kSize1 * kSize2);
-    u8 *map2 = Get(idx / kSize2);
-    if (!map2) return 0;
-    return map2[idx % kSize2];
-  }
-
- private:
-  u8 *Get(uptr idx) const {
-    CHECK_LT(idx, kSize1);
-    return reinterpret_cast<u8 *>(
-        atomic_load(&map1_[idx], memory_order_acquire));
-  }
-
-  u8 *GetOrCreate(uptr idx) {
-    u8 *res = Get(idx);
-    if (!res) {
-      SpinMutexLock l(&mu_);
-      if (!(res = Get(idx))) {
-        res = (u8*)MmapOrDie(kSize2, "TwoLevelByteMap");
-        MapUnmapCallback().OnMap(reinterpret_cast<uptr>(res), kSize2);
-        atomic_store(&map1_[idx], reinterpret_cast<uptr>(res),
-                     memory_order_release);
-      }
-    }
-    return res;
-  }
-
-  atomic_uintptr_t map1_[kSize1];
-  StaticSpinMutex mu_;
-};
-
-// SizeClassAllocator32 -- allocator for 32-bit address space.
-// This allocator can theoretically be used on 64-bit arch, but there it is less
-// efficient than SizeClassAllocator64.
-//
-// [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can
-// be returned by MmapOrDie().
-//
-// Region:
-//   a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
-// Since the regions are aligned by kRegionSize, there are exactly
-// kNumPossibleRegions possible regions in the address space and so we keep
-// a ByteMap possible_regions to store the size classes of each Region.
-// 0 size class means the region is not used by the allocator.
-//
-// One Region is used to allocate chunks of a single size class.
-// A Region looks like this:
-// UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1
-//
-// In order to avoid false sharing the objects of this class should be
-// chache-line aligned.
-template <const uptr kSpaceBeg, const u64 kSpaceSize,
-          const uptr kMetadataSize, class SizeClassMap,
-          const uptr kRegionSizeLog,
-          class ByteMap,
-          class MapUnmapCallback = NoOpMapUnmapCallback>
-class SizeClassAllocator32 {
- public:
-  typedef typename SizeClassMap::TransferBatch Batch;
-  typedef SizeClassAllocator32<kSpaceBeg, kSpaceSize, kMetadataSize,
-      SizeClassMap, kRegionSizeLog, ByteMap, MapUnmapCallback> ThisT;
-  typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
-
-  void Init() {
-    possible_regions.TestOnlyInit();
-    internal_memset(size_class_info_array, 0, sizeof(size_class_info_array));
-  }
-
-  void *MapWithCallback(uptr size) {
-    size = RoundUpTo(size, GetPageSizeCached());
-    void *res = MmapOrDie(size, "SizeClassAllocator32");
-    MapUnmapCallback().OnMap((uptr)res, size);
-    return res;
-  }
-
-  void UnmapWithCallback(uptr beg, uptr size) {
-    MapUnmapCallback().OnUnmap(beg, size);
-    UnmapOrDie(reinterpret_cast<void *>(beg), size);
-  }
-
-  static bool CanAllocate(uptr size, uptr alignment) {
-    return size <= SizeClassMap::kMaxSize &&
-      alignment <= SizeClassMap::kMaxSize;
-  }
-
-  void *GetMetaData(const void *p) {
-    CHECK(PointerIsMine(p));
-    uptr mem = reinterpret_cast<uptr>(p);
-    uptr beg = ComputeRegionBeg(mem);
-    uptr size = SizeClassMap::Size(GetSizeClass(p));
-    u32 offset = mem - beg;
-    uptr n = offset / (u32)size;  // 32-bit division
-    uptr meta = (beg + kRegionSize) - (n + 1) * kMetadataSize;
-    return reinterpret_cast<void*>(meta);
-  }
-
-  NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
-                                uptr class_id) {
-    CHECK_LT(class_id, kNumClasses);
-    SizeClassInfo *sci = GetSizeClassInfo(class_id);
-    SpinMutexLock l(&sci->mutex);
-    if (sci->free_list.empty())
-      PopulateFreeList(stat, c, sci, class_id);
-    CHECK(!sci->free_list.empty());
-    Batch *b = sci->free_list.front();
-    sci->free_list.pop_front();
-    return b;
-  }
-
-  NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) {
-    CHECK_LT(class_id, kNumClasses);
-    SizeClassInfo *sci = GetSizeClassInfo(class_id);
-    SpinMutexLock l(&sci->mutex);
-    CHECK_GT(b->count, 0);
-    sci->free_list.push_front(b);
-  }
-
-  bool PointerIsMine(const void *p) {
-    uptr mem = reinterpret_cast<uptr>(p);
-    if (mem < kSpaceBeg || mem >= kSpaceBeg + kSpaceSize)
-      return false;
-    return GetSizeClass(p) != 0;
-  }
-
-  uptr GetSizeClass(const void *p) {
-    return possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))];
-  }
-
-  void *GetBlockBegin(const void *p) {
-    CHECK(PointerIsMine(p));
-    uptr mem = reinterpret_cast<uptr>(p);
-    uptr beg = ComputeRegionBeg(mem);
-    uptr size = SizeClassMap::Size(GetSizeClass(p));
-    u32 offset = mem - beg;
-    u32 n = offset / (u32)size;  // 32-bit division
-    uptr res = beg + (n * (u32)size);
-    return reinterpret_cast<void*>(res);
-  }
-
-  uptr GetActuallyAllocatedSize(void *p) {
-    CHECK(PointerIsMine(p));
-    return SizeClassMap::Size(GetSizeClass(p));
-  }
-
-  uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
-
-  uptr TotalMemoryUsed() {
-    // No need to lock here.
-    uptr res = 0;
-    for (uptr i = 0; i < kNumPossibleRegions; i++)
-      if (possible_regions[i])
-        res += kRegionSize;
-    return res;
-  }
-
-  void TestOnlyUnmap() {
-    for (uptr i = 0; i < kNumPossibleRegions; i++)
-      if (possible_regions[i])
-        UnmapWithCallback((i * kRegionSize), kRegionSize);
-  }
-
-  // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
-  // introspection API.
-  void ForceLock() {
-    for (uptr i = 0; i < kNumClasses; i++) {
-      GetSizeClassInfo(i)->mutex.Lock();
-    }
-  }
-
-  void ForceUnlock() {
-    for (int i = kNumClasses - 1; i >= 0; i--) {
-      GetSizeClassInfo(i)->mutex.Unlock();
-    }
-  }
-
-  // Iterate over all existing chunks.
-  // The allocator must be locked when calling this function.
-  void ForEachChunk(ForEachChunkCallback callback, void *arg) {
-    for (uptr region = 0; region < kNumPossibleRegions; region++)
-      if (possible_regions[region]) {
-        uptr chunk_size = SizeClassMap::Size(possible_regions[region]);
-        uptr max_chunks_in_region = kRegionSize / (chunk_size + kMetadataSize);
-        uptr region_beg = region * kRegionSize;
-        for (uptr chunk = region_beg;
-             chunk < region_beg + max_chunks_in_region * chunk_size;
-             chunk += chunk_size) {
-          // Too slow: CHECK_EQ((void *)chunk, GetBlockBegin((void *)chunk));
-          callback(chunk, arg);
-        }
-      }
-  }
-
-  void PrintStats() {
-  }
-
-  static uptr AdditionalSize() {
-    return 0;
-  }
-
-  typedef SizeClassMap SizeClassMapT;
-  static const uptr kNumClasses = SizeClassMap::kNumClasses;
-
- private:
-  static const uptr kRegionSize = 1 << kRegionSizeLog;
-  static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize;
-
-  struct SizeClassInfo {
-    SpinMutex mutex;
-    IntrusiveList<Batch> free_list;
-    char padding[kCacheLineSize - sizeof(uptr) - sizeof(IntrusiveList<Batch>)];
-  };
-  COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);
-
-  uptr ComputeRegionId(uptr mem) {
-    uptr res = mem >> kRegionSizeLog;
-    CHECK_LT(res, kNumPossibleRegions);
-    return res;
-  }
-
-  uptr ComputeRegionBeg(uptr mem) {
-    return mem & ~(kRegionSize - 1);
-  }
-
-  uptr AllocateRegion(AllocatorStats *stat, uptr class_id) {
-    CHECK_LT(class_id, kNumClasses);
-    uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize,
-                                      "SizeClassAllocator32"));
-    MapUnmapCallback().OnMap(res, kRegionSize);
-    stat->Add(AllocatorStatMapped, kRegionSize);
-    CHECK_EQ(0U, (res & (kRegionSize - 1)));
-    possible_regions.set(ComputeRegionId(res), static_cast<u8>(class_id));
-    return res;
-  }
-
-  SizeClassInfo *GetSizeClassInfo(uptr class_id) {
-    CHECK_LT(class_id, kNumClasses);
-    return &size_class_info_array[class_id];
-  }
-
-  void PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
-                        SizeClassInfo *sci, uptr class_id) {
-    uptr size = SizeClassMap::Size(class_id);
-    uptr reg = AllocateRegion(stat, class_id);
-    uptr n_chunks = kRegionSize / (size + kMetadataSize);
-    uptr max_count = SizeClassMap::MaxCached(class_id);
-    Batch *b = nullptr;
-    for (uptr i = reg; i < reg + n_chunks * size; i += size) {
-      if (!b) {
-        if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
-          b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
-        else
-          b = (Batch*)i;
-        b->count = 0;
-      }
-      b->batch[b->count++] = (void*)i;
-      if (b->count == max_count) {
-        CHECK_GT(b->count, 0);
-        sci->free_list.push_back(b);
-        b = nullptr;
-      }
-    }
-    if (b) {
-      CHECK_GT(b->count, 0);
-      sci->free_list.push_back(b);
-    }
-  }
-
-  ByteMap possible_regions;
-  SizeClassInfo size_class_info_array[kNumClasses];
-};
-
-// Objects of this type should be used as local caches for SizeClassAllocator64
-// or SizeClassAllocator32. Since the typical use of this class is to have one
-// object per thread in TLS, is has to be POD.
-template<class SizeClassAllocator>
-struct SizeClassAllocatorLocalCache {
-  typedef SizeClassAllocator Allocator;
-  static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
-
-  void Init(AllocatorGlobalStats *s) {
-    stats_.Init();
-    if (s)
-      s->Register(&stats_);
-  }
-
-  void Destroy(SizeClassAllocator *allocator, AllocatorGlobalStats *s) {
-    Drain(allocator);
-    if (s)
-      s->Unregister(&stats_);
-  }
-
-  void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
-    CHECK_NE(class_id, 0UL);
-    CHECK_LT(class_id, kNumClasses);
-    stats_.Add(AllocatorStatAllocated, SizeClassMap::Size(class_id));
-    PerClass *c = &per_class_[class_id];
-    if (UNLIKELY(c->count == 0))
-      Refill(allocator, class_id);
-    void *res = c->batch[--c->count];
-    PREFETCH(c->batch[c->count - 1]);
-    return res;
-  }
-
-  void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) {
-    CHECK_NE(class_id, 0UL);
-    CHECK_LT(class_id, kNumClasses);
-    // If the first allocator call on a new thread is a deallocation, then
-    // max_count will be zero, leading to check failure.
-    InitCache();
-    stats_.Sub(AllocatorStatAllocated, SizeClassMap::Size(class_id));
-    PerClass *c = &per_class_[class_id];
-    CHECK_NE(c->max_count, 0UL);
-    if (UNLIKELY(c->count == c->max_count))
-      Drain(allocator, class_id);
-    c->batch[c->count++] = p;
-  }
-
-  void Drain(SizeClassAllocator *allocator) {
-    for (uptr class_id = 0; class_id < kNumClasses; class_id++) {
-      PerClass *c = &per_class_[class_id];
-      while (c->count > 0)
-        Drain(allocator, class_id);
-    }
-  }
-
-  // private:
-  typedef typename SizeClassAllocator::SizeClassMapT SizeClassMap;
-  typedef typename SizeClassMap::TransferBatch Batch;
-  struct PerClass {
-    uptr count;
-    uptr max_count;
-    void *batch[2 * SizeClassMap::kMaxNumCached];
-  };
-  PerClass per_class_[kNumClasses];
-  AllocatorStats stats_;
-
-  void InitCache() {
-    if (per_class_[1].max_count)
-      return;
-    for (uptr i = 0; i < kNumClasses; i++) {
-      PerClass *c = &per_class_[i];
-      c->max_count = 2 * SizeClassMap::MaxCached(i);
-    }
-  }
-
-  NOINLINE void Refill(SizeClassAllocator *allocator, uptr class_id) {
-    InitCache();
-    PerClass *c = &per_class_[class_id];
-    Batch *b = allocator->AllocateBatch(&stats_, this, class_id);
-    CHECK_GT(b->count, 0);
-    for (uptr i = 0; i < b->count; i++)
-      c->batch[i] = b->batch[i];
-    c->count = b->count;
-    if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
-      Deallocate(allocator, SizeClassMap::ClassID(sizeof(Batch)), b);
-  }
-
-  NOINLINE void Drain(SizeClassAllocator *allocator, uptr class_id) {
-    InitCache();
-    PerClass *c = &per_class_[class_id];
-    Batch *b;
-    if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
-      b = (Batch*)Allocate(allocator, SizeClassMap::ClassID(sizeof(Batch)));
-    else
-      b = (Batch*)c->batch[0];
-    uptr cnt = Min(c->max_count / 2, c->count);
-    for (uptr i = 0; i < cnt; i++) {
-      b->batch[i] = c->batch[i];
-      c->batch[i] = c->batch[i + c->max_count / 2];
-    }
-    b->count = cnt;
-    c->count -= cnt;
-    CHECK_GT(b->count, 0);
-    allocator->DeallocateBatch(&stats_, class_id, b);
-  }
-};
-
-// This class can (de)allocate only large chunks of memory using mmap/unmap.
-// The main purpose of this allocator is to cover large and rare allocation
-// sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
-template <class MapUnmapCallback = NoOpMapUnmapCallback>
-class LargeMmapAllocator {
- public:
-  void InitLinkerInitialized(bool may_return_null) {
-    page_size_ = GetPageSizeCached();
-    atomic_store(&may_return_null_, may_return_null, memory_order_relaxed);
-  }
-
-  void Init(bool may_return_null) {
-    internal_memset(this, 0, sizeof(*this));
-    InitLinkerInitialized(may_return_null);
-  }
-
-  void *Allocate(AllocatorStats *stat, uptr size, uptr alignment) {
-    CHECK(IsPowerOfTwo(alignment));
-    uptr map_size = RoundUpMapSize(size);
-    if (alignment > page_size_)
-      map_size += alignment;
-    // Overflow.
-    if (map_size < size)
-      return ReturnNullOrDie();
-    uptr map_beg = reinterpret_cast<uptr>(
-        MmapOrDie(map_size, "LargeMmapAllocator"));
-    CHECK(IsAligned(map_beg, page_size_));
-    MapUnmapCallback().OnMap(map_beg, map_size);
-    uptr map_end = map_beg + map_size;
-    uptr res = map_beg + page_size_;
-    if (res & (alignment - 1))  // Align.
-      res += alignment - (res & (alignment - 1));
-    CHECK(IsAligned(res, alignment));
-    CHECK(IsAligned(res, page_size_));
-    CHECK_GE(res + size, map_beg);
-    CHECK_LE(res + size, map_end);
-    Header *h = GetHeader(res);
-    h->size = size;
-    h->map_beg = map_beg;
-    h->map_size = map_size;
-    uptr size_log = MostSignificantSetBitIndex(map_size);
-    CHECK_LT(size_log, ARRAY_SIZE(stats.by_size_log));
-    {
-      SpinMutexLock l(&mutex_);
-      uptr idx = n_chunks_++;
-      chunks_sorted_ = false;
-      CHECK_LT(idx, kMaxNumChunks);
-      h->chunk_idx = idx;
-      chunks_[idx] = h;
-      stats.n_allocs++;
-      stats.currently_allocated += map_size;
-      stats.max_allocated = Max(stats.max_allocated, stats.currently_allocated);
-      stats.by_size_log[size_log]++;
-      stat->Add(AllocatorStatAllocated, map_size);
-      stat->Add(AllocatorStatMapped, map_size);
-    }
-    return reinterpret_cast<void*>(res);
-  }
-
-  void *ReturnNullOrDie() {
-    if (atomic_load(&may_return_null_, memory_order_acquire))
-      return nullptr;
-    ReportAllocatorCannotReturnNull();
-  }
-
-  void SetMayReturnNull(bool may_return_null) {
-    atomic_store(&may_return_null_, may_return_null, memory_order_release);
-  }
-
-  void Deallocate(AllocatorStats *stat, void *p) {
-    Header *h = GetHeader(p);
-    {
-      SpinMutexLock l(&mutex_);
-      uptr idx = h->chunk_idx;
-      CHECK_EQ(chunks_[idx], h);
-      CHECK_LT(idx, n_chunks_);
-      chunks_[idx] = chunks_[n_chunks_ - 1];
-      chunks_[idx]->chunk_idx = idx;
-      n_chunks_--;
-      chunks_sorted_ = false;
-      stats.n_frees++;
-      stats.currently_allocated -= h->map_size;
-      stat->Sub(AllocatorStatAllocated, h->map_size);
-      stat->Sub(AllocatorStatMapped, h->map_size);
-    }
-    MapUnmapCallback().OnUnmap(h->map_beg, h->map_size);
-    UnmapOrDie(reinterpret_cast<void*>(h->map_beg), h->map_size);
-  }
-
-  uptr TotalMemoryUsed() {
-    SpinMutexLock l(&mutex_);
-    uptr res = 0;
-    for (uptr i = 0; i < n_chunks_; i++) {
-      Header *h = chunks_[i];
-      CHECK_EQ(h->chunk_idx, i);
-      res += RoundUpMapSize(h->size);
-    }
-    return res;
-  }
-
-  bool PointerIsMine(const void *p) {
-    return GetBlockBegin(p) != nullptr;
-  }
-
-  uptr GetActuallyAllocatedSize(void *p) {
-    return RoundUpTo(GetHeader(p)->size, page_size_);
-  }
-
-  // At least page_size_/2 metadata bytes is available.
-  void *GetMetaData(const void *p) {
-    // Too slow: CHECK_EQ(p, GetBlockBegin(p));
-    if (!IsAligned(reinterpret_cast<uptr>(p), page_size_)) {
-      Printf("%s: bad pointer %p\n", SanitizerToolName, p);
-      CHECK(IsAligned(reinterpret_cast<uptr>(p), page_size_));
-    }
-    return GetHeader(p) + 1;
-  }
-
-  void *GetBlockBegin(const void *ptr) {
-    uptr p = reinterpret_cast<uptr>(ptr);
-    SpinMutexLock l(&mutex_);
-    uptr nearest_chunk = 0;
-    // Cache-friendly linear search.
-    for (uptr i = 0; i < n_chunks_; i++) {
-      uptr ch = reinterpret_cast<uptr>(chunks_[i]);
-      if (p < ch) continue;  // p is at left to this chunk, skip it.
-      if (p - ch < p - nearest_chunk)
-        nearest_chunk = ch;
-    }
-    if (!nearest_chunk)
-      return nullptr;
-    Header *h = reinterpret_cast<Header *>(nearest_chunk);
-    CHECK_GE(nearest_chunk, h->map_beg);
-    CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
-    CHECK_LE(nearest_chunk, p);
-    if (h->map_beg + h->map_size <= p)
-      return nullptr;
-    return GetUser(h);
-  }
-
-  // This function does the same as GetBlockBegin, but is much faster.
-  // Must be called with the allocator locked.
-  void *GetBlockBeginFastLocked(void *ptr) {
-    mutex_.CheckLocked();
-    uptr p = reinterpret_cast<uptr>(ptr);
-    uptr n = n_chunks_;
-    if (!n) return nullptr;
-    if (!chunks_sorted_) {
-      // Do one-time sort. chunks_sorted_ is reset in Allocate/Deallocate.
-      SortArray(reinterpret_cast<uptr*>(chunks_), n);
-      for (uptr i = 0; i < n; i++)
-        chunks_[i]->chunk_idx = i;
-      chunks_sorted_ = true;
-      min_mmap_ = reinterpret_cast<uptr>(chunks_[0]);
-      max_mmap_ = reinterpret_cast<uptr>(chunks_[n - 1]) +
-          chunks_[n - 1]->map_size;
-    }
-    if (p < min_mmap_ || p >= max_mmap_)
-      return nullptr;
-    uptr beg = 0, end = n - 1;
-    // This loop is a log(n) lower_bound. It does not check for the exact match
-    // to avoid expensive cache-thrashing loads.
-    while (end - beg >= 2) {
-      uptr mid = (beg + end) / 2;  // Invariant: mid >= beg + 1
-      if (p < reinterpret_cast<uptr>(chunks_[mid]))
-        end = mid - 1;  // We are not interested in chunks_[mid].
-      else
-        beg = mid;  // chunks_[mid] may still be what we want.
-    }
-
-    if (beg < end) {
-      CHECK_EQ(beg + 1, end);
-      // There are 2 chunks left, choose one.
-      if (p >= reinterpret_cast<uptr>(chunks_[end]))
-        beg = end;
-    }
-
-    Header *h = chunks_[beg];
-    if (h->map_beg + h->map_size <= p || p < h->map_beg)
-      return nullptr;
-    return GetUser(h);
-  }
-
-  void PrintStats() {
-    Printf("Stats: LargeMmapAllocator: allocated %zd times, "
-           "remains %zd (%zd K) max %zd M; by size logs: ",
-           stats.n_allocs, stats.n_allocs - stats.n_frees,
-           stats.currently_allocated >> 10, stats.max_allocated >> 20);
-    for (uptr i = 0; i < ARRAY_SIZE(stats.by_size_log); i++) {
-      uptr c = stats.by_size_log[i];
-      if (!c) continue;
-      Printf("%zd:%zd; ", i, c);
-    }
-    Printf("\n");
-  }
-
-  // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
-  // introspection API.
-  void ForceLock() {
-    mutex_.Lock();
-  }
-
-  void ForceUnlock() {
-    mutex_.Unlock();
-  }
-
-  // Iterate over all existing chunks.
-  // The allocator must be locked when calling this function.
-  void ForEachChunk(ForEachChunkCallback callback, void *arg) {
-    for (uptr i = 0; i < n_chunks_; i++)
-      callback(reinterpret_cast<uptr>(GetUser(chunks_[i])), arg);
-  }
-
- private:
-  static const int kMaxNumChunks = 1 << FIRST_32_SECOND_64(15, 18);
-  struct Header {
-    uptr map_beg;
-    uptr map_size;
-    uptr size;
-    uptr chunk_idx;
-  };
-
-  Header *GetHeader(uptr p) {
-    CHECK(IsAligned(p, page_size_));
-    return reinterpret_cast<Header*>(p - page_size_);
-  }
-  Header *GetHeader(const void *p) {
-    return GetHeader(reinterpret_cast<uptr>(p));
-  }
-
-  void *GetUser(Header *h) {
-    CHECK(IsAligned((uptr)h, page_size_));
-    return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + page_size_);
-  }
-
-  uptr RoundUpMapSize(uptr size) {
-    return RoundUpTo(size, page_size_) + page_size_;
-  }
-
-  uptr page_size_;
-  Header *chunks_[kMaxNumChunks];
-  uptr n_chunks_;
-  uptr min_mmap_, max_mmap_;
-  bool chunks_sorted_;
-  struct Stats {
-    uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64];
-  } stats;
-  atomic_uint8_t may_return_null_;
-  SpinMutex mutex_;
-};
-
-// This class implements a complete memory allocator by using two
-// internal allocators:
-// PrimaryAllocator is efficient, but may not allocate some sizes (alignments).
-//  When allocating 2^x bytes it should return 2^x aligned chunk.
-// PrimaryAllocator is used via a local AllocatorCache.
-// SecondaryAllocator can allocate anything, but is not efficient.
-template <class PrimaryAllocator, class AllocatorCache,
-          class SecondaryAllocator>  // NOLINT
-class CombinedAllocator {
- public:
-  void InitCommon(bool may_return_null) {
-    primary_.Init();
-    atomic_store(&may_return_null_, may_return_null, memory_order_relaxed);
-  }
-
-  void InitLinkerInitialized(bool may_return_null) {
-    secondary_.InitLinkerInitialized(may_return_null);
-    stats_.InitLinkerInitialized();
-    InitCommon(may_return_null);
-  }
-
-  void Init(bool may_return_null) {
-    secondary_.Init(may_return_null);
-    stats_.Init();
-    InitCommon(may_return_null);
-  }
-
-  void *Allocate(AllocatorCache *cache, uptr size, uptr alignment,
-                 bool cleared = false, bool check_rss_limit = false) {
-    // Returning 0 on malloc(0) may break a lot of code.
-    if (size == 0)
-      size = 1;
-    if (size + alignment < size)
-      return ReturnNullOrDie();
-    if (check_rss_limit && RssLimitIsExceeded())
-      return ReturnNullOrDie();
-    if (alignment > 8)
-      size = RoundUpTo(size, alignment);
-    void *res;
-    bool from_primary = primary_.CanAllocate(size, alignment);
-    if (from_primary)
-      res = cache->Allocate(&primary_, primary_.ClassID(size));
-    else
-      res = secondary_.Allocate(&stats_, size, alignment);
-    if (alignment > 8)
-      CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0);
-    if (cleared && res && from_primary)
-      internal_bzero_aligned16(res, RoundUpTo(size, 16));
-    return res;
-  }
-
-  bool MayReturnNull() const {
-    return atomic_load(&may_return_null_, memory_order_acquire);
-  }
-
-  void *ReturnNullOrDie() {
-    if (MayReturnNull())
-      return nullptr;
-    ReportAllocatorCannotReturnNull();
-  }
-
-  void SetMayReturnNull(bool may_return_null) {
-    secondary_.SetMayReturnNull(may_return_null);
-    atomic_store(&may_return_null_, may_return_null, memory_order_release);
-  }
-
-  bool RssLimitIsExceeded() {
-    return atomic_load(&rss_limit_is_exceeded_, memory_order_acquire);
-  }
-
-  void SetRssLimitIsExceeded(bool rss_limit_is_exceeded) {
-    atomic_store(&rss_limit_is_exceeded_, rss_limit_is_exceeded,
-                 memory_order_release);
-  }
-
-  void Deallocate(AllocatorCache *cache, void *p) {
-    if (!p) return;
-    if (primary_.PointerIsMine(p))
-      cache->Deallocate(&primary_, primary_.GetSizeClass(p), p);
-    else
-      secondary_.Deallocate(&stats_, p);
-  }
-
-  void *Reallocate(AllocatorCache *cache, void *p, uptr new_size,
-                   uptr alignment) {
-    if (!p)
-      return Allocate(cache, new_size, alignment);
-    if (!new_size) {
-      Deallocate(cache, p);
-      return nullptr;
-    }
-    CHECK(PointerIsMine(p));
-    uptr old_size = GetActuallyAllocatedSize(p);
-    uptr memcpy_size = Min(new_size, old_size);
-    void *new_p = Allocate(cache, new_size, alignment);
-    if (new_p)
-      internal_memcpy(new_p, p, memcpy_size);
-    Deallocate(cache, p);
-    return new_p;
-  }
-
-  bool PointerIsMine(void *p) {
-    if (primary_.PointerIsMine(p))
-      return true;
-    return secondary_.PointerIsMine(p);
-  }
-
-  bool FromPrimary(void *p) {
-    return primary_.PointerIsMine(p);
-  }
-
-  void *GetMetaData(const void *p) {
-    if (primary_.PointerIsMine(p))
-      return primary_.GetMetaData(p);
-    return secondary_.GetMetaData(p);
-  }
-
-  void *GetBlockBegin(const void *p) {
-    if (primary_.PointerIsMine(p))
-      return primary_.GetBlockBegin(p);
-    return secondary_.GetBlockBegin(p);
-  }
-
-  // This function does the same as GetBlockBegin, but is much faster.
-  // Must be called with the allocator locked.
-  void *GetBlockBeginFastLocked(void *p) {
-    if (primary_.PointerIsMine(p))
-      return primary_.GetBlockBegin(p);
-    return secondary_.GetBlockBeginFastLocked(p);
-  }
-
-  uptr GetActuallyAllocatedSize(void *p) {
-    if (primary_.PointerIsMine(p))
-      return primary_.GetActuallyAllocatedSize(p);
-    return secondary_.GetActuallyAllocatedSize(p);
-  }
-
-  uptr TotalMemoryUsed() {
-    return primary_.TotalMemoryUsed() + secondary_.TotalMemoryUsed();
-  }
-
-  void TestOnlyUnmap() { primary_.TestOnlyUnmap(); }
-
-  void InitCache(AllocatorCache *cache) {
-    cache->Init(&stats_);
-  }
-
-  void DestroyCache(AllocatorCache *cache) {
-    cache->Destroy(&primary_, &stats_);
-  }
-
-  void SwallowCache(AllocatorCache *cache) {
-    cache->Drain(&primary_);
-  }
-
-  void GetStats(AllocatorStatCounters s) const {
-    stats_.Get(s);
-  }
-
-  void PrintStats() {
-    primary_.PrintStats();
-    secondary_.PrintStats();
-  }
-
-  // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
-  // introspection API.
-  void ForceLock() {
-    primary_.ForceLock();
-    secondary_.ForceLock();
-  }
-
-  void ForceUnlock() {
-    secondary_.ForceUnlock();
-    primary_.ForceUnlock();
-  }
-
-  // Iterate over all existing chunks.
-  // The allocator must be locked when calling this function.
-  void ForEachChunk(ForEachChunkCallback callback, void *arg) {
-    primary_.ForEachChunk(callback, arg);
-    secondary_.ForEachChunk(callback, arg);
-  }
-
- private:
-  PrimaryAllocator primary_;
-  SecondaryAllocator secondary_;
-  AllocatorGlobalStats stats_;
-  atomic_uint8_t may_return_null_;
-  atomic_uint8_t rss_limit_is_exceeded_;
-};
-
 // Returns true if calloc(size, n) should return 0 due to overflow in size*n.
 bool CallocShouldReturnNullDueToOverflow(uptr size, uptr n);
 
+#include "sanitizer_allocator_size_class_map.h"
+#include "sanitizer_allocator_stats.h"
+#include "sanitizer_allocator_primary64.h"
+#include "sanitizer_allocator_bytemap.h"
+#include "sanitizer_allocator_primary32.h"
+#include "sanitizer_allocator_local_cache.h"
+#include "sanitizer_allocator_secondary.h"
+#include "sanitizer_allocator_combined.h"
+
 } // namespace __sanitizer
 
 #endif // SANITIZER_ALLOCATOR_H
diff --git a/lib/sanitizer_common/sanitizer_allocator_bytemap.h b/lib/sanitizer_common/sanitizer_allocator_bytemap.h
new file mode 100644
index 0000000..92472cd
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_allocator_bytemap.h
@@ -0,0 +1,103 @@
+//===-- sanitizer_allocator_bytemap.h ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the Sanitizer Allocator.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_ALLOCATOR_H
+#error This file must be included inside sanitizer_allocator.h
+#endif
+
+// Maps integers in rage [0, kSize) to u8 values.
+template<u64 kSize>
+class FlatByteMap {
+ public:
+  void TestOnlyInit() {
+    internal_memset(map_, 0, sizeof(map_));
+  }
+
+  void set(uptr idx, u8 val) {
+    CHECK_LT(idx, kSize);
+    CHECK_EQ(0U, map_[idx]);
+    map_[idx] = val;
+  }
+  u8 operator[] (uptr idx) {
+    CHECK_LT(idx, kSize);
+    // FIXME: CHECK may be too expensive here.
+    return map_[idx];
+  }
+ private:
+  u8 map_[kSize];
+};
+
+// TwoLevelByteMap maps integers in range [0, kSize1*kSize2) to u8 values.
+// It is implemented as a two-dimensional array: array of kSize1 pointers
+// to kSize2-byte arrays. The secondary arrays are mmaped on demand.
+// Each value is initially zero and can be set to something else only once.
+// Setting and getting values from multiple threads is safe w/o extra locking.
+template <u64 kSize1, u64 kSize2, class MapUnmapCallback = NoOpMapUnmapCallback>
+class TwoLevelByteMap {
+ public:
+  void TestOnlyInit() {
+    internal_memset(map1_, 0, sizeof(map1_));
+    mu_.Init();
+  }
+
+  void TestOnlyUnmap() {
+    for (uptr i = 0; i < kSize1; i++) {
+      u8 *p = Get(i);
+      if (!p) continue;
+      MapUnmapCallback().OnUnmap(reinterpret_cast<uptr>(p), kSize2);
+      UnmapOrDie(p, kSize2);
+    }
+  }
+
+  uptr size() const { return kSize1 * kSize2; }
+  uptr size1() const { return kSize1; }
+  uptr size2() const { return kSize2; }
+
+  void set(uptr idx, u8 val) {
+    CHECK_LT(idx, kSize1 * kSize2);
+    u8 *map2 = GetOrCreate(idx / kSize2);
+    CHECK_EQ(0U, map2[idx % kSize2]);
+    map2[idx % kSize2] = val;
+  }
+
+  u8 operator[] (uptr idx) const {
+    CHECK_LT(idx, kSize1 * kSize2);
+    u8 *map2 = Get(idx / kSize2);
+    if (!map2) return 0;
+    return map2[idx % kSize2];
+  }
+
+ private:
+  u8 *Get(uptr idx) const {
+    CHECK_LT(idx, kSize1);
+    return reinterpret_cast<u8 *>(
+        atomic_load(&map1_[idx], memory_order_acquire));
+  }
+
+  u8 *GetOrCreate(uptr idx) {
+    u8 *res = Get(idx);
+    if (!res) {
+      SpinMutexLock l(&mu_);
+      if (!(res = Get(idx))) {
+        res = (u8*)MmapOrDie(kSize2, "TwoLevelByteMap");
+        MapUnmapCallback().OnMap(reinterpret_cast<uptr>(res), kSize2);
+        atomic_store(&map1_[idx], reinterpret_cast<uptr>(res),
+                     memory_order_release);
+      }
+    }
+    return res;
+  }
+
+  atomic_uintptr_t map1_[kSize1];
+  StaticSpinMutex mu_;
+};
+
diff --git a/lib/sanitizer_common/sanitizer_allocator_combined.h b/lib/sanitizer_common/sanitizer_allocator_combined.h
new file mode 100644
index 0000000..dceb64b
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_allocator_combined.h
@@ -0,0 +1,207 @@
+//===-- sanitizer_allocator_combined.h --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the Sanitizer Allocator.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_ALLOCATOR_H
+#error This file must be included inside sanitizer_allocator.h
+#endif
+
+// This class implements a complete memory allocator by using two
+// internal allocators:
+// PrimaryAllocator is efficient, but may not allocate some sizes (alignments).
+//  When allocating 2^x bytes it should return 2^x aligned chunk.
+// PrimaryAllocator is used via a local AllocatorCache.
+// SecondaryAllocator can allocate anything, but is not efficient.
+template <class PrimaryAllocator, class AllocatorCache,
+          class SecondaryAllocator>  // NOLINT
+class CombinedAllocator {
+ public:
+  void InitCommon(bool may_return_null) {
+    primary_.Init();
+    atomic_store(&may_return_null_, may_return_null, memory_order_relaxed);
+  }
+
+  void InitLinkerInitialized(bool may_return_null) {
+    secondary_.InitLinkerInitialized(may_return_null);
+    stats_.InitLinkerInitialized();
+    InitCommon(may_return_null);
+  }
+
+  void Init(bool may_return_null) {
+    secondary_.Init(may_return_null);
+    stats_.Init();
+    InitCommon(may_return_null);
+  }
+
+  void *Allocate(AllocatorCache *cache, uptr size, uptr alignment,
+                 bool cleared = false, bool check_rss_limit = false) {
+    // Returning 0 on malloc(0) may break a lot of code.
+    if (size == 0)
+      size = 1;
+    if (size + alignment < size)
+      return ReturnNullOrDie();
+    if (check_rss_limit && RssLimitIsExceeded())
+      return ReturnNullOrDie();
+    if (alignment > 8)
+      size = RoundUpTo(size, alignment);
+    void *res;
+    bool from_primary = primary_.CanAllocate(size, alignment);
+    if (from_primary)
+      res = cache->Allocate(&primary_, primary_.ClassID(size));
+    else
+      res = secondary_.Allocate(&stats_, size, alignment);
+    if (alignment > 8)
+      CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0);
+    if (cleared && res && from_primary)
+      internal_bzero_aligned16(res, RoundUpTo(size, 16));
+    return res;
+  }
+
+  bool MayReturnNull() const {
+    return atomic_load(&may_return_null_, memory_order_acquire);
+  }
+
+  void *ReturnNullOrDie() {
+    if (MayReturnNull())
+      return nullptr;
+    ReportAllocatorCannotReturnNull();
+  }
+
+  void SetMayReturnNull(bool may_return_null) {
+    secondary_.SetMayReturnNull(may_return_null);
+    atomic_store(&may_return_null_, may_return_null, memory_order_release);
+  }
+
+  bool RssLimitIsExceeded() {
+    return atomic_load(&rss_limit_is_exceeded_, memory_order_acquire);
+  }
+
+  void SetRssLimitIsExceeded(bool rss_limit_is_exceeded) {
+    atomic_store(&rss_limit_is_exceeded_, rss_limit_is_exceeded,
+                 memory_order_release);
+  }
+
+  void Deallocate(AllocatorCache *cache, void *p) {
+    if (!p) return;
+    if (primary_.PointerIsMine(p))
+      cache->Deallocate(&primary_, primary_.GetSizeClass(p), p);
+    else
+      secondary_.Deallocate(&stats_, p);
+  }
+
+  void *Reallocate(AllocatorCache *cache, void *p, uptr new_size,
+                   uptr alignment) {
+    if (!p)
+      return Allocate(cache, new_size, alignment);
+    if (!new_size) {
+      Deallocate(cache, p);
+      return nullptr;
+    }
+    CHECK(PointerIsMine(p));
+    uptr old_size = GetActuallyAllocatedSize(p);
+    uptr memcpy_size = Min(new_size, old_size);
+    void *new_p = Allocate(cache, new_size, alignment);
+    if (new_p)
+      internal_memcpy(new_p, p, memcpy_size);
+    Deallocate(cache, p);
+    return new_p;
+  }
+
+  bool PointerIsMine(void *p) {
+    if (primary_.PointerIsMine(p))
+      return true;
+    return secondary_.PointerIsMine(p);
+  }
+
+  bool FromPrimary(void *p) {
+    return primary_.PointerIsMine(p);
+  }
+
+  void *GetMetaData(const void *p) {
+    if (primary_.PointerIsMine(p))
+      return primary_.GetMetaData(p);
+    return secondary_.GetMetaData(p);
+  }
+
+  void *GetBlockBegin(const void *p) {
+    if (primary_.PointerIsMine(p))
+      return primary_.GetBlockBegin(p);
+    return secondary_.GetBlockBegin(p);
+  }
+
+  // This function does the same as GetBlockBegin, but is much faster.
+  // Must be called with the allocator locked.
+  void *GetBlockBeginFastLocked(void *p) {
+    if (primary_.PointerIsMine(p))
+      return primary_.GetBlockBegin(p);
+    return secondary_.GetBlockBeginFastLocked(p);
+  }
+
+  uptr GetActuallyAllocatedSize(void *p) {
+    if (primary_.PointerIsMine(p))
+      return primary_.GetActuallyAllocatedSize(p);
+    return secondary_.GetActuallyAllocatedSize(p);
+  }
+
+  uptr TotalMemoryUsed() {
+    return primary_.TotalMemoryUsed() + secondary_.TotalMemoryUsed();
+  }
+
+  void TestOnlyUnmap() { primary_.TestOnlyUnmap(); }
+
+  void InitCache(AllocatorCache *cache) {
+    cache->Init(&stats_);
+  }
+
+  void DestroyCache(AllocatorCache *cache) {
+    cache->Destroy(&primary_, &stats_);
+  }
+
+  void SwallowCache(AllocatorCache *cache) {
+    cache->Drain(&primary_);
+  }
+
+  void GetStats(AllocatorStatCounters s) const {
+    stats_.Get(s);
+  }
+
+  void PrintStats() {
+    primary_.PrintStats();
+    secondary_.PrintStats();
+  }
+
+  // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
+  // introspection API.
+  void ForceLock() {
+    primary_.ForceLock();
+    secondary_.ForceLock();
+  }
+
+  void ForceUnlock() {
+    secondary_.ForceUnlock();
+    primary_.ForceUnlock();
+  }
+
+  // Iterate over all existing chunks.
+  // The allocator must be locked when calling this function.
+  void ForEachChunk(ForEachChunkCallback callback, void *arg) {
+    primary_.ForEachChunk(callback, arg);
+    secondary_.ForEachChunk(callback, arg);
+  }
+
+ private:
+  PrimaryAllocator primary_;
+  SecondaryAllocator secondary_;
+  AllocatorGlobalStats stats_;
+  atomic_uint8_t may_return_null_;
+  atomic_uint8_t rss_limit_is_exceeded_;
+};
+
diff --git a/lib/sanitizer_common/sanitizer_allocator_interface.h b/lib/sanitizer_common/sanitizer_allocator_interface.h
index 2cd924c..797c38a 100644
--- a/lib/sanitizer_common/sanitizer_allocator_interface.h
+++ b/lib/sanitizer_common/sanitizer_allocator_interface.h
@@ -29,6 +29,10 @@
 SANITIZER_INTERFACE_ATTRIBUTE uptr __sanitizer_get_free_bytes();
 SANITIZER_INTERFACE_ATTRIBUTE uptr __sanitizer_get_unmapped_bytes();
 
+SANITIZER_INTERFACE_ATTRIBUTE int __sanitizer_install_malloc_and_free_hooks(
+    void (*malloc_hook)(const void *, uptr),
+    void (*free_hook)(const void *));
+
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
     /* OPTIONAL */ void __sanitizer_malloc_hook(void *ptr, uptr size);
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
diff --git a/lib/sanitizer_common/sanitizer_allocator_local_cache.h b/lib/sanitizer_common/sanitizer_allocator_local_cache.h
new file mode 100644
index 0000000..0ad22ba
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_allocator_local_cache.h
@@ -0,0 +1,159 @@
+//===-- sanitizer_allocator_local_cache.h -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the Sanitizer Allocator.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_ALLOCATOR_H
+#error This file must be included inside sanitizer_allocator.h
+#endif
+
+// Objects of this type should be used as local caches for SizeClassAllocator64
+// or SizeClassAllocator32. Since the typical use of this class is to have one
+// object per thread in TLS, is has to be POD.
+template<class SizeClassAllocator>
+struct SizeClassAllocatorLocalCache {
+  typedef SizeClassAllocator Allocator;
+  typedef typename Allocator::TransferBatch TransferBatch;
+  static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
+
+  void Init(AllocatorGlobalStats *s) {
+    stats_.Init();
+    if (s)
+      s->Register(&stats_);
+  }
+
+  void Destroy(SizeClassAllocator *allocator, AllocatorGlobalStats *s) {
+    Drain(allocator);
+    if (s)
+      s->Unregister(&stats_);
+  }
+
+  void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
+    CHECK_NE(class_id, 0UL);
+    CHECK_LT(class_id, kNumClasses);
+    stats_.Add(AllocatorStatAllocated, Allocator::ClassIdToSize(class_id));
+    PerClass *c = &per_class_[class_id];
+    if (UNLIKELY(c->count == 0))
+      Refill(allocator, class_id);
+    void *res = c->batch[--c->count];
+    PREFETCH(c->batch[c->count - 1]);
+    return res;
+  }
+
+  void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) {
+    CHECK_NE(class_id, 0UL);
+    CHECK_LT(class_id, kNumClasses);
+    // If the first allocator call on a new thread is a deallocation, then
+    // max_count will be zero, leading to check failure.
+    InitCache();
+    stats_.Sub(AllocatorStatAllocated, Allocator::ClassIdToSize(class_id));
+    PerClass *c = &per_class_[class_id];
+    CHECK_NE(c->max_count, 0UL);
+    if (UNLIKELY(c->count == c->max_count))
+      Drain(allocator, class_id);
+    c->batch[c->count++] = p;
+  }
+
+  void Drain(SizeClassAllocator *allocator) {
+    for (uptr class_id = 0; class_id < kNumClasses; class_id++) {
+      PerClass *c = &per_class_[class_id];
+      while (c->count > 0)
+        Drain(allocator, class_id);
+    }
+  }
+
+  // private:
+  typedef typename SizeClassAllocator::SizeClassMapT SizeClassMap;
+  struct PerClass {
+    uptr count;
+    uptr max_count;
+    void *batch[2 * TransferBatch::kMaxNumCached];
+  };
+  PerClass per_class_[kNumClasses];
+  AllocatorStats stats_;
+
+  void InitCache() {
+    if (per_class_[1].max_count)
+      return;
+    for (uptr i = 0; i < kNumClasses; i++) {
+      PerClass *c = &per_class_[i];
+      c->max_count = 2 * TransferBatch::MaxCached(i);
+    }
+  }
+
+  // TransferBatch class is declared in SizeClassAllocator.
+  // We transfer chunks between central and thread-local free lists in batches.
+  // For small size classes we allocate batches separately.
+  // For large size classes we may use one of the chunks to store the batch.
+  // sizeof(TransferBatch) must be a power of 2 for more efficient allocation.
+
+  // If kUseSeparateSizeClassForBatch is true,
+  // all TransferBatch objects are allocated from kBatchClassID
+  // size class (except for those that are needed for kBatchClassID itself).
+  // The goal is to have TransferBatches in a totally different region of RAM
+  // to improve security and allow more efficient RAM reclamation.
+  // This is experimental and may currently increase memory usage by up to 3%
+  // in extreme cases.
+  static const bool kUseSeparateSizeClassForBatch = false;
+
+  static uptr SizeClassForTransferBatch(uptr class_id) {
+    if (kUseSeparateSizeClassForBatch)
+      return class_id == SizeClassMap::kBatchClassID
+                 ? 0
+                 : SizeClassMap::kBatchClassID;
+    if (Allocator::ClassIdToSize(class_id) <
+        TransferBatch::AllocationSizeRequiredForNElements(
+            TransferBatch::MaxCached(class_id)))
+      return SizeClassMap::ClassID(sizeof(TransferBatch));
+    return 0;
+  }
+
+  // Returns a TransferBatch suitable for class_id.
+  // For small size classes allocates the batch from the allocator.
+  // For large size classes simply returns b.
+  TransferBatch *CreateBatch(uptr class_id, SizeClassAllocator *allocator,
+                             TransferBatch *b) {
+    if (uptr batch_class_id = SizeClassForTransferBatch(class_id))
+      return (TransferBatch*)Allocate(allocator, batch_class_id);
+    return b;
+  }
+
+  // Destroys TransferBatch b.
+  // For small size classes deallocates b to the allocator.
+  // Does notthing for large size classes.
+  void DestroyBatch(uptr class_id, SizeClassAllocator *allocator,
+                    TransferBatch *b) {
+    if (uptr batch_class_id = SizeClassForTransferBatch(class_id))
+      Deallocate(allocator, batch_class_id, b);
+  }
+
+  NOINLINE void Refill(SizeClassAllocator *allocator, uptr class_id) {
+    InitCache();
+    PerClass *c = &per_class_[class_id];
+    TransferBatch *b = allocator->AllocateBatch(&stats_, this, class_id);
+    CHECK_GT(b->Count(), 0);
+    b->CopyToArray(c->batch);
+    c->count = b->Count();
+    DestroyBatch(class_id, allocator, b);
+  }
+
+  NOINLINE void Drain(SizeClassAllocator *allocator, uptr class_id) {
+    InitCache();
+    PerClass *c = &per_class_[class_id];
+    uptr cnt = Min(c->max_count / 2, c->count);
+    uptr first_idx_to_drain = c->count - cnt;
+    TransferBatch *b = CreateBatch(
+        class_id, allocator, (TransferBatch *)c->batch[first_idx_to_drain]);
+    b->SetFromArray(allocator->GetRegionBeginBySizeClass(class_id),
+                    &c->batch[first_idx_to_drain], cnt);
+    c->count -= cnt;
+    allocator->DeallocateBatch(&stats_, class_id, b);
+  }
+};
diff --git a/lib/sanitizer_common/sanitizer_allocator_primary32.h b/lib/sanitizer_common/sanitizer_allocator_primary32.h
new file mode 100644
index 0000000..e5cf56f
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_allocator_primary32.h
@@ -0,0 +1,302 @@
+//===-- sanitizer_allocator_primary32.h -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the Sanitizer Allocator.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_ALLOCATOR_H
+#error This file must be included inside sanitizer_allocator.h
+#endif
+
+// SizeClassAllocator32 -- allocator for 32-bit address space.
+// This allocator can theoretically be used on 64-bit arch, but there it is less
+// efficient than SizeClassAllocator64.
+//
+// [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can
+// be returned by MmapOrDie().
+//
+// Region:
+//   a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
+// Since the regions are aligned by kRegionSize, there are exactly
+// kNumPossibleRegions possible regions in the address space and so we keep
+// a ByteMap possible_regions to store the size classes of each Region.
+// 0 size class means the region is not used by the allocator.
+//
+// One Region is used to allocate chunks of a single size class.
+// A Region looks like this:
+// UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1
+//
+// In order to avoid false sharing the objects of this class should be
+// chache-line aligned.
+template <const uptr kSpaceBeg, const u64 kSpaceSize,
+          const uptr kMetadataSize, class SizeClassMap,
+          const uptr kRegionSizeLog,
+          class ByteMap,
+          class MapUnmapCallback = NoOpMapUnmapCallback>
+class SizeClassAllocator32 {
+ public:
+  struct TransferBatch {
+    static const uptr kMaxNumCached = SizeClassMap::kMaxNumCachedHint - 2;
+    void SetFromArray(uptr region_beg_unused, void *batch[], uptr count) {
+      count_ = count;
+      CHECK_LE(count_, kMaxNumCached);
+      for (uptr i = 0; i < count; i++)
+        batch_[i] = batch[i];
+    }
+    uptr Count() const { return count_; }
+    void Clear() { count_ = 0; }
+    void Add(void *ptr) {
+      batch_[count_++] = ptr;
+      CHECK_LE(count_, kMaxNumCached);
+    }
+    void CopyToArray(void *to_batch[]) {
+      for (uptr i = 0, n = Count(); i < n; i++)
+        to_batch[i] = batch_[i];
+    }
+
+    // How much memory do we need for a batch containing n elements.
+    static uptr AllocationSizeRequiredForNElements(uptr n) {
+      return sizeof(uptr) * 2 + sizeof(void *) * n;
+    }
+    static uptr MaxCached(uptr class_id) {
+      return Min(kMaxNumCached, SizeClassMap::MaxCachedHint(class_id));
+    }
+
+    TransferBatch *next;
+
+   private:
+    uptr count_;
+    void *batch_[kMaxNumCached];
+  };
+
+  static const uptr kBatchSize = sizeof(TransferBatch);
+  COMPILER_CHECK((kBatchSize & (kBatchSize - 1)) == 0);
+  COMPILER_CHECK(sizeof(TransferBatch) ==
+                 SizeClassMap::kMaxNumCachedHint * sizeof(uptr));
+
+  static uptr ClassIdToSize(uptr class_id) {
+    return class_id == SizeClassMap::kBatchClassID
+               ? sizeof(TransferBatch)
+               : SizeClassMap::Size(class_id);
+  }
+
+  typedef SizeClassAllocator32<kSpaceBeg, kSpaceSize, kMetadataSize,
+      SizeClassMap, kRegionSizeLog, ByteMap, MapUnmapCallback> ThisT;
+  typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
+
+  void Init() {
+    possible_regions.TestOnlyInit();
+    internal_memset(size_class_info_array, 0, sizeof(size_class_info_array));
+  }
+
+  void *MapWithCallback(uptr size) {
+    size = RoundUpTo(size, GetPageSizeCached());
+    void *res = MmapOrDie(size, "SizeClassAllocator32");
+    MapUnmapCallback().OnMap((uptr)res, size);
+    return res;
+  }
+
+  void UnmapWithCallback(uptr beg, uptr size) {
+    MapUnmapCallback().OnUnmap(beg, size);
+    UnmapOrDie(reinterpret_cast<void *>(beg), size);
+  }
+
+  static bool CanAllocate(uptr size, uptr alignment) {
+    return size <= SizeClassMap::kMaxSize &&
+      alignment <= SizeClassMap::kMaxSize;
+  }
+
+  void *GetMetaData(const void *p) {
+    CHECK(PointerIsMine(p));
+    uptr mem = reinterpret_cast<uptr>(p);
+    uptr beg = ComputeRegionBeg(mem);
+    uptr size = ClassIdToSize(GetSizeClass(p));
+    u32 offset = mem - beg;
+    uptr n = offset / (u32)size;  // 32-bit division
+    uptr meta = (beg + kRegionSize) - (n + 1) * kMetadataSize;
+    return reinterpret_cast<void*>(meta);
+  }
+
+  NOINLINE TransferBatch *AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
+                                        uptr class_id) {
+    CHECK_LT(class_id, kNumClasses);
+    SizeClassInfo *sci = GetSizeClassInfo(class_id);
+    SpinMutexLock l(&sci->mutex);
+    if (sci->free_list.empty())
+      PopulateFreeList(stat, c, sci, class_id);
+    CHECK(!sci->free_list.empty());
+    TransferBatch *b = sci->free_list.front();
+    sci->free_list.pop_front();
+    return b;
+  }
+
+  NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id,
+                                TransferBatch *b) {
+    CHECK_LT(class_id, kNumClasses);
+    SizeClassInfo *sci = GetSizeClassInfo(class_id);
+    SpinMutexLock l(&sci->mutex);
+    CHECK_GT(b->Count(), 0);
+    sci->free_list.push_front(b);
+  }
+
+  uptr GetRegionBeginBySizeClass(uptr class_id) { return 0; }
+
+  bool PointerIsMine(const void *p) {
+    uptr mem = reinterpret_cast<uptr>(p);
+    if (mem < kSpaceBeg || mem >= kSpaceBeg + kSpaceSize)
+      return false;
+    return GetSizeClass(p) != 0;
+  }
+
+  uptr GetSizeClass(const void *p) {
+    return possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))];
+  }
+
+  void *GetBlockBegin(const void *p) {
+    CHECK(PointerIsMine(p));
+    uptr mem = reinterpret_cast<uptr>(p);
+    uptr beg = ComputeRegionBeg(mem);
+    uptr size = ClassIdToSize(GetSizeClass(p));
+    u32 offset = mem - beg;
+    u32 n = offset / (u32)size;  // 32-bit division
+    uptr res = beg + (n * (u32)size);
+    return reinterpret_cast<void*>(res);
+  }
+
+  uptr GetActuallyAllocatedSize(void *p) {
+    CHECK(PointerIsMine(p));
+    return ClassIdToSize(GetSizeClass(p));
+  }
+
+  uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
+
+  uptr TotalMemoryUsed() {
+    // No need to lock here.
+    uptr res = 0;
+    for (uptr i = 0; i < kNumPossibleRegions; i++)
+      if (possible_regions[i])
+        res += kRegionSize;
+    return res;
+  }
+
+  void TestOnlyUnmap() {
+    for (uptr i = 0; i < kNumPossibleRegions; i++)
+      if (possible_regions[i])
+        UnmapWithCallback((i * kRegionSize), kRegionSize);
+  }
+
+  // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
+  // introspection API.
+  void ForceLock() {
+    for (uptr i = 0; i < kNumClasses; i++) {
+      GetSizeClassInfo(i)->mutex.Lock();
+    }
+  }
+
+  void ForceUnlock() {
+    for (int i = kNumClasses - 1; i >= 0; i--) {
+      GetSizeClassInfo(i)->mutex.Unlock();
+    }
+  }
+
+  // Iterate over all existing chunks.
+  // The allocator must be locked when calling this function.
+  void ForEachChunk(ForEachChunkCallback callback, void *arg) {
+    for (uptr region = 0; region < kNumPossibleRegions; region++)
+      if (possible_regions[region]) {
+        uptr chunk_size = ClassIdToSize(possible_regions[region]);
+        uptr max_chunks_in_region = kRegionSize / (chunk_size + kMetadataSize);
+        uptr region_beg = region * kRegionSize;
+        for (uptr chunk = region_beg;
+             chunk < region_beg + max_chunks_in_region * chunk_size;
+             chunk += chunk_size) {
+          // Too slow: CHECK_EQ((void *)chunk, GetBlockBegin((void *)chunk));
+          callback(chunk, arg);
+        }
+      }
+  }
+
+  void PrintStats() {
+  }
+
+  static uptr AdditionalSize() {
+    return 0;
+  }
+
+  typedef SizeClassMap SizeClassMapT;
+  static const uptr kNumClasses = SizeClassMap::kNumClasses;
+
+ private:
+  static const uptr kRegionSize = 1 << kRegionSizeLog;
+  static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize;
+
+  struct SizeClassInfo {
+    SpinMutex mutex;
+    IntrusiveList<TransferBatch> free_list;
+    char padding[kCacheLineSize - sizeof(uptr) -
+                 sizeof(IntrusiveList<TransferBatch>)];
+  };
+  COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);
+
+  uptr ComputeRegionId(uptr mem) {
+    uptr res = mem >> kRegionSizeLog;
+    CHECK_LT(res, kNumPossibleRegions);
+    return res;
+  }
+
+  uptr ComputeRegionBeg(uptr mem) {
+    return mem & ~(kRegionSize - 1);
+  }
+
+  uptr AllocateRegion(AllocatorStats *stat, uptr class_id) {
+    CHECK_LT(class_id, kNumClasses);
+    uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize,
+                                      "SizeClassAllocator32"));
+    MapUnmapCallback().OnMap(res, kRegionSize);
+    stat->Add(AllocatorStatMapped, kRegionSize);
+    CHECK_EQ(0U, (res & (kRegionSize - 1)));
+    possible_regions.set(ComputeRegionId(res), static_cast<u8>(class_id));
+    return res;
+  }
+
+  SizeClassInfo *GetSizeClassInfo(uptr class_id) {
+    CHECK_LT(class_id, kNumClasses);
+    return &size_class_info_array[class_id];
+  }
+
+  void PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
+                        SizeClassInfo *sci, uptr class_id) {
+    uptr size = ClassIdToSize(class_id);
+    uptr reg = AllocateRegion(stat, class_id);
+    uptr n_chunks = kRegionSize / (size + kMetadataSize);
+    uptr max_count = TransferBatch::MaxCached(class_id);
+    TransferBatch *b = nullptr;
+    for (uptr i = reg; i < reg + n_chunks * size; i += size) {
+      if (!b) {
+        b = c->CreateBatch(class_id, this, (TransferBatch*)i);
+        b->Clear();
+      }
+      b->Add((void*)i);
+      if (b->Count() == max_count) {
+        CHECK_GT(b->Count(), 0);
+        sci->free_list.push_back(b);
+        b = nullptr;
+      }
+    }
+    if (b) {
+      CHECK_GT(b->Count(), 0);
+      sci->free_list.push_back(b);
+    }
+  }
+
+  ByteMap possible_regions;
+  SizeClassInfo size_class_info_array[kNumClasses];
+};
+
+
diff --git a/lib/sanitizer_common/sanitizer_allocator_primary64.h b/lib/sanitizer_common/sanitizer_allocator_primary64.h
new file mode 100644
index 0000000..393a019
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_allocator_primary64.h
@@ -0,0 +1,402 @@
+//===-- sanitizer_allocator_primary64.h -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the Sanitizer Allocator.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_ALLOCATOR_H
+#error This file must be included inside sanitizer_allocator.h
+#endif
+
+// SizeClassAllocator64 -- allocator for 64-bit address space.
+//
+// Space: a portion of address space of kSpaceSize bytes starting at SpaceBeg.
+// If kSpaceBeg is ~0 then SpaceBeg is chosen dynamically my mmap.
+// Otherwise SpaceBeg=kSpaceBeg (fixed address).
+// kSpaceSize is a power of two.
+// At the beginning the entire space is mprotect-ed, then small parts of it
+// are mapped on demand.
+//
+// Region: a part of Space dedicated to a single size class.
+// There are kNumClasses Regions of equal size.
+//
+// UserChunk: a piece of memory returned to user.
+// MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk.
+//
+// A Region looks like this:
+// UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
+template <const uptr kSpaceBeg, const uptr kSpaceSize,
+          const uptr kMetadataSize, class SizeClassMap,
+          class MapUnmapCallback = NoOpMapUnmapCallback>
+class SizeClassAllocator64 {
+ public:
+  struct TransferBatch {
+    static const uptr kMaxNumCached = SizeClassMap::kMaxNumCachedHint - 4;
+    void SetFromRange(uptr region_beg, uptr beg_offset, uptr step, uptr count) {
+      count_ = count;
+      CHECK_LE(count_, kMaxNumCached);
+      region_beg_ = region_beg;
+      for (uptr i = 0; i < count; i++)
+        batch_[i] = static_cast<u32>((beg_offset + i * step) >> 4);
+    }
+    void SetFromArray(uptr region_beg, void *batch[], uptr count) {
+      count_ = count;
+      CHECK_LE(count_, kMaxNumCached);
+      region_beg_ = region_beg;
+      for (uptr i = 0; i < count; i++)
+        batch_[i] = static_cast<u32>(
+            ((reinterpret_cast<uptr>(batch[i])) - region_beg) >> 4);
+    }
+    void CopyToArray(void *to_batch[]) {
+      for (uptr i = 0, n = Count(); i < n; i++)
+        to_batch[i] = reinterpret_cast<void*>(Get(i));
+    }
+    uptr Count() const { return count_; }
+
+    // How much memory do we need for a batch containing n elements.
+    static uptr AllocationSizeRequiredForNElements(uptr n) {
+      return sizeof(uptr) * 2 + sizeof(u32) * n;
+    }
+    static uptr MaxCached(uptr class_id) {
+      return Min(kMaxNumCached, SizeClassMap::MaxCachedHint(class_id));
+    }
+
+    TransferBatch *next;
+
+   private:
+    uptr Get(uptr i) {
+      return region_beg_ + (static_cast<uptr>(batch_[i]) << 4);
+    }
+    // Instead of storing 64-bit pointers we store 32-bit offsets from the
+    // region start divided by 4. This imposes two limitations:
+    // * all allocations are 16-aligned,
+    // * regions are not larger than 2^36.
+    uptr region_beg_ : SANITIZER_WORDSIZE - 10;  // Region-beg is 4096-aligned.
+    uptr count_      : 10;
+    u32 batch_[kMaxNumCached];
+  };
+  static const uptr kBatchSize = sizeof(TransferBatch);
+  COMPILER_CHECK((kBatchSize & (kBatchSize - 1)) == 0);
+  COMPILER_CHECK(sizeof(TransferBatch) ==
+                 SizeClassMap::kMaxNumCachedHint * sizeof(u32));
+  COMPILER_CHECK(TransferBatch::kMaxNumCached < 1024);  // count_ uses 10 bits.
+
+  static uptr ClassIdToSize(uptr class_id) {
+    return class_id == SizeClassMap::kBatchClassID
+               ? sizeof(TransferBatch)
+               : SizeClassMap::Size(class_id);
+  }
+
+  typedef SizeClassAllocator64<kSpaceBeg, kSpaceSize, kMetadataSize,
+      SizeClassMap, MapUnmapCallback> ThisT;
+  typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
+
+  void Init() {
+    uptr TotalSpaceSize = kSpaceSize + AdditionalSize();
+    if (kUsingConstantSpaceBeg) {
+      CHECK_EQ(kSpaceBeg, reinterpret_cast<uptr>(
+                              MmapFixedNoAccess(kSpaceBeg, TotalSpaceSize)));
+    } else {
+      NonConstSpaceBeg =
+          reinterpret_cast<uptr>(MmapNoAccess(TotalSpaceSize));
+      CHECK_NE(NonConstSpaceBeg, ~(uptr)0);
+    }
+    MapWithCallback(SpaceEnd(), AdditionalSize());
+  }
+
+  void MapWithCallback(uptr beg, uptr size) {
+    CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size)));
+    MapUnmapCallback().OnMap(beg, size);
+  }
+
+  void UnmapWithCallback(uptr beg, uptr size) {
+    MapUnmapCallback().OnUnmap(beg, size);
+    UnmapOrDie(reinterpret_cast<void *>(beg), size);
+  }
+
+  static bool CanAllocate(uptr size, uptr alignment) {
+    return size <= SizeClassMap::kMaxSize &&
+      alignment <= SizeClassMap::kMaxSize;
+  }
+
+  NOINLINE TransferBatch *AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
+                                        uptr class_id) {
+    CHECK_LT(class_id, kNumClasses);
+    RegionInfo *region = GetRegionInfo(class_id);
+    TransferBatch *b = region->free_list.Pop();
+    if (!b)
+      b = PopulateFreeList(stat, c, class_id, region);
+    region->n_allocated += b->Count();
+    return b;
+  }
+
+  NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id,
+                                TransferBatch *b) {
+    RegionInfo *region = GetRegionInfo(class_id);
+    CHECK_GT(b->Count(), 0);
+    region->free_list.Push(b);
+    region->n_freed += b->Count();
+  }
+
+  bool PointerIsMine(const void *p) {
+    uptr P = reinterpret_cast<uptr>(p);
+    if (kUsingConstantSpaceBeg && (kSpaceBeg % kSpaceSize) == 0)
+      return P / kSpaceSize == kSpaceBeg / kSpaceSize;
+    return P >= SpaceBeg() && P < SpaceEnd();
+  }
+
+  uptr GetRegionBegin(const void *p) {
+    if (kUsingConstantSpaceBeg)
+      return reinterpret_cast<uptr>(p) & ~(kRegionSize - 1);
+    uptr space_beg = SpaceBeg();
+    return ((reinterpret_cast<uptr>(p)  - space_beg) & ~(kRegionSize - 1)) +
+        space_beg;
+  }
+
+  uptr GetRegionBeginBySizeClass(uptr class_id) {
+    return SpaceBeg() + kRegionSize * class_id;
+  }
+
+  uptr GetSizeClass(const void *p) {
+    if (kUsingConstantSpaceBeg && (kSpaceBeg % kSpaceSize) == 0)
+      return ((reinterpret_cast<uptr>(p)) / kRegionSize) % kNumClassesRounded;
+    return ((reinterpret_cast<uptr>(p) - SpaceBeg()) / kRegionSize) %
+           kNumClassesRounded;
+  }
+
+  void *GetBlockBegin(const void *p) {
+    uptr class_id = GetSizeClass(p);
+    uptr size = ClassIdToSize(class_id);
+    if (!size) return nullptr;
+    uptr chunk_idx = GetChunkIdx((uptr)p, size);
+    uptr reg_beg = GetRegionBegin(p);
+    uptr beg = chunk_idx * size;
+    uptr next_beg = beg + size;
+    if (class_id >= kNumClasses) return nullptr;
+    RegionInfo *region = GetRegionInfo(class_id);
+    if (region->mapped_user >= next_beg)
+      return reinterpret_cast<void*>(reg_beg + beg);
+    return nullptr;
+  }
+
+  uptr GetActuallyAllocatedSize(void *p) {
+    CHECK(PointerIsMine(p));
+    return ClassIdToSize(GetSizeClass(p));
+  }
+
+  uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
+
+  void *GetMetaData(const void *p) {
+    uptr class_id = GetSizeClass(p);
+    uptr size = ClassIdToSize(class_id);
+    uptr chunk_idx = GetChunkIdx(reinterpret_cast<uptr>(p), size);
+    return reinterpret_cast<void *>(SpaceBeg() +
+                                    (kRegionSize * (class_id + 1)) -
+                                    (1 + chunk_idx) * kMetadataSize);
+  }
+
+  uptr TotalMemoryUsed() {
+    uptr res = 0;
+    for (uptr i = 0; i < kNumClasses; i++)
+      res += GetRegionInfo(i)->allocated_user;
+    return res;
+  }
+
+  // Test-only.
+  void TestOnlyUnmap() {
+    UnmapWithCallback(SpaceBeg(), kSpaceSize + AdditionalSize());
+  }
+
+  static void FillMemoryProfile(uptr start, uptr rss, bool file, uptr *stats,
+                           uptr stats_size) {
+    for (uptr class_id = 0; class_id < stats_size; class_id++)
+      if (stats[class_id] == start)
+        stats[class_id] = rss;
+  }
+
+  void PrintStats() {
+    uptr total_mapped = 0;
+    uptr n_allocated = 0;
+    uptr n_freed = 0;
+    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
+      RegionInfo *region = GetRegionInfo(class_id);
+      total_mapped += region->mapped_user;
+      n_allocated += region->n_allocated;
+      n_freed += region->n_freed;
+    }
+    Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
+           "remains %zd\n",
+           total_mapped >> 20, n_allocated, n_allocated - n_freed);
+    uptr rss_stats[kNumClasses];
+    for (uptr class_id = 0; class_id < kNumClasses; class_id++)
+      rss_stats[class_id] = SpaceBeg() + kRegionSize * class_id;
+    GetMemoryProfile(FillMemoryProfile, rss_stats, kNumClasses);
+    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
+      RegionInfo *region = GetRegionInfo(class_id);
+      if (region->mapped_user == 0) continue;
+      uptr in_use = region->n_allocated - region->n_freed;
+      uptr avail_chunks = region->allocated_user / ClassIdToSize(class_id);
+      Printf("  %02zd (%zd): mapped: %zdK allocs: %zd frees: %zd inuse: %zd"
+             " avail: %zd rss: %zdK\n",
+             class_id,
+             ClassIdToSize(class_id),
+             region->mapped_user >> 10,
+             region->n_allocated,
+             region->n_freed,
+             in_use, avail_chunks,
+             rss_stats[class_id] >> 10);
+    }
+  }
+
+  // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
+  // introspection API.
+  void ForceLock() {
+    for (uptr i = 0; i < kNumClasses; i++) {
+      GetRegionInfo(i)->mutex.Lock();
+    }
+  }
+
+  void ForceUnlock() {
+    for (int i = (int)kNumClasses - 1; i >= 0; i--) {
+      GetRegionInfo(i)->mutex.Unlock();
+    }
+  }
+
+  // Iterate over all existing chunks.
+  // The allocator must be locked when calling this function.
+  void ForEachChunk(ForEachChunkCallback callback, void *arg) {
+    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
+      RegionInfo *region = GetRegionInfo(class_id);
+      uptr chunk_size = ClassIdToSize(class_id);
+      uptr region_beg = SpaceBeg() + class_id * kRegionSize;
+      for (uptr chunk = region_beg;
+           chunk < region_beg + region->allocated_user;
+           chunk += chunk_size) {
+        // Too slow: CHECK_EQ((void *)chunk, GetBlockBegin((void *)chunk));
+        callback(chunk, arg);
+      }
+    }
+  }
+
+  static uptr AdditionalSize() {
+    return RoundUpTo(sizeof(RegionInfo) * kNumClassesRounded,
+                     GetPageSizeCached());
+  }
+
+  typedef SizeClassMap SizeClassMapT;
+  static const uptr kNumClasses = SizeClassMap::kNumClasses;
+  static const uptr kNumClassesRounded = SizeClassMap::kNumClassesRounded;
+
+ private:
+  static const uptr kRegionSize = kSpaceSize / kNumClassesRounded;
+
+  static const bool kUsingConstantSpaceBeg = kSpaceBeg != ~(uptr)0;
+  uptr NonConstSpaceBeg;
+  uptr SpaceBeg() const {
+    return kUsingConstantSpaceBeg ? kSpaceBeg : NonConstSpaceBeg;
+  }
+  uptr SpaceEnd() const { return  SpaceBeg() + kSpaceSize; }
+  // kRegionSize must be >= 2^32.
+  COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2)));
+  // kRegionSize must be <= 2^36, see TransferBatch.
+  COMPILER_CHECK((kRegionSize) <= (1ULL << (SANITIZER_WORDSIZE / 2 + 4)));
+  // Call mmap for user memory with at least this size.
+  static const uptr kUserMapSize = 1 << 16;
+  // Call mmap for metadata memory with at least this size.
+  static const uptr kMetaMapSize = 1 << 16;
+
+  struct RegionInfo {
+    BlockingMutex mutex;
+    LFStack<TransferBatch> free_list;
+    uptr allocated_user;  // Bytes allocated for user memory.
+    uptr allocated_meta;  // Bytes allocated for metadata.
+    uptr mapped_user;  // Bytes mapped for user memory.
+    uptr mapped_meta;  // Bytes mapped for metadata.
+    uptr n_allocated, n_freed;  // Just stats.
+  };
+  COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize);
+
+  RegionInfo *GetRegionInfo(uptr class_id) {
+    CHECK_LT(class_id, kNumClasses);
+    RegionInfo *regions =
+        reinterpret_cast<RegionInfo *>(SpaceBeg() + kSpaceSize);
+    return &regions[class_id];
+  }
+
+  uptr GetChunkIdx(uptr chunk, uptr size) {
+    if (!kUsingConstantSpaceBeg)
+      chunk -= SpaceBeg();
+
+    uptr offset = chunk % kRegionSize;
+    // Here we divide by a non-constant. This is costly.
+    // size always fits into 32-bits. If the offset fits too, use 32-bit div.
+    if (offset >> (SANITIZER_WORDSIZE / 2))
+      return offset / size;
+    return (u32)offset / (u32)size;
+  }
+
+  NOINLINE TransferBatch *PopulateFreeList(AllocatorStats *stat,
+                                           AllocatorCache *c, uptr class_id,
+                                           RegionInfo *region) {
+    BlockingMutexLock l(&region->mutex);
+    TransferBatch *b = region->free_list.Pop();
+    if (b)
+      return b;
+    uptr size = ClassIdToSize(class_id);
+    uptr count = TransferBatch::MaxCached(class_id);
+    uptr beg_idx = region->allocated_user;
+    uptr end_idx = beg_idx + count * size;
+    uptr region_beg = SpaceBeg() + kRegionSize * class_id;
+    if (end_idx + size > region->mapped_user) {
+      // Do the mmap for the user memory.
+      uptr map_size = kUserMapSize;
+      while (end_idx + size > region->mapped_user + map_size)
+        map_size += kUserMapSize;
+      CHECK_GE(region->mapped_user + map_size, end_idx);
+      MapWithCallback(region_beg + region->mapped_user, map_size);
+      stat->Add(AllocatorStatMapped, map_size);
+      region->mapped_user += map_size;
+    }
+    uptr total_count = (region->mapped_user - beg_idx - size)
+        / size / count * count;
+    region->allocated_meta += total_count * kMetadataSize;
+    if (region->allocated_meta > region->mapped_meta) {
+      uptr map_size = kMetaMapSize;
+      while (region->allocated_meta > region->mapped_meta + map_size)
+        map_size += kMetaMapSize;
+      // Do the mmap for the metadata.
+      CHECK_GE(region->mapped_meta + map_size, region->allocated_meta);
+      MapWithCallback(region_beg + kRegionSize -
+                      region->mapped_meta - map_size, map_size);
+      region->mapped_meta += map_size;
+    }
+    CHECK_LE(region->allocated_meta, region->mapped_meta);
+    if (region->mapped_user + region->mapped_meta > kRegionSize) {
+      Printf("%s: Out of memory. Dying. ", SanitizerToolName);
+      Printf("The process has exhausted %zuMB for size class %zu.\n",
+          kRegionSize / 1024 / 1024, size);
+      Die();
+    }
+    for (;;) {
+      b = c->CreateBatch(class_id, this,
+                         (TransferBatch *)(region_beg + beg_idx));
+      b->SetFromRange(region_beg, beg_idx, size, count);
+      region->allocated_user += count * size;
+      CHECK_LE(region->allocated_user, region->mapped_user);
+      beg_idx += count * size;
+      if (beg_idx + count * size + size > region->mapped_user)
+        break;
+      CHECK_GT(b->Count(), 0);
+      region->free_list.Push(b);
+    }
+    return b;
+  }
+};
+
+
diff --git a/lib/sanitizer_common/sanitizer_allocator_secondary.h b/lib/sanitizer_common/sanitizer_allocator_secondary.h
new file mode 100644
index 0000000..383eccf
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_allocator_secondary.h
@@ -0,0 +1,268 @@
+//===-- sanitizer_allocator_secondary.h -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the Sanitizer Allocator.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_ALLOCATOR_H
+#error This file must be included inside sanitizer_allocator.h
+#endif
+
+// This class can (de)allocate only large chunks of memory using mmap/unmap.
+// The main purpose of this allocator is to cover large and rare allocation
+// sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
+template <class MapUnmapCallback = NoOpMapUnmapCallback>
+class LargeMmapAllocator {
+ public:
+  void InitLinkerInitialized(bool may_return_null) {
+    page_size_ = GetPageSizeCached();
+    atomic_store(&may_return_null_, may_return_null, memory_order_relaxed);
+  }
+
+  void Init(bool may_return_null) {
+    internal_memset(this, 0, sizeof(*this));
+    InitLinkerInitialized(may_return_null);
+  }
+
+  void *Allocate(AllocatorStats *stat, uptr size, uptr alignment) {
+    CHECK(IsPowerOfTwo(alignment));
+    uptr map_size = RoundUpMapSize(size);
+    if (alignment > page_size_)
+      map_size += alignment;
+    // Overflow.
+    if (map_size < size)
+      return ReturnNullOrDie();
+    uptr map_beg = reinterpret_cast<uptr>(
+        MmapOrDie(map_size, "LargeMmapAllocator"));
+    CHECK(IsAligned(map_beg, page_size_));
+    MapUnmapCallback().OnMap(map_beg, map_size);
+    uptr map_end = map_beg + map_size;
+    uptr res = map_beg + page_size_;
+    if (res & (alignment - 1))  // Align.
+      res += alignment - (res & (alignment - 1));
+    CHECK(IsAligned(res, alignment));
+    CHECK(IsAligned(res, page_size_));
+    CHECK_GE(res + size, map_beg);
+    CHECK_LE(res + size, map_end);
+    Header *h = GetHeader(res);
+    h->size = size;
+    h->map_beg = map_beg;
+    h->map_size = map_size;
+    uptr size_log = MostSignificantSetBitIndex(map_size);
+    CHECK_LT(size_log, ARRAY_SIZE(stats.by_size_log));
+    {
+      SpinMutexLock l(&mutex_);
+      uptr idx = n_chunks_++;
+      chunks_sorted_ = false;
+      CHECK_LT(idx, kMaxNumChunks);
+      h->chunk_idx = idx;
+      chunks_[idx] = h;
+      stats.n_allocs++;
+      stats.currently_allocated += map_size;
+      stats.max_allocated = Max(stats.max_allocated, stats.currently_allocated);
+      stats.by_size_log[size_log]++;
+      stat->Add(AllocatorStatAllocated, map_size);
+      stat->Add(AllocatorStatMapped, map_size);
+    }
+    return reinterpret_cast<void*>(res);
+  }
+
+  void *ReturnNullOrDie() {
+    if (atomic_load(&may_return_null_, memory_order_acquire))
+      return nullptr;
+    ReportAllocatorCannotReturnNull();
+  }
+
+  void SetMayReturnNull(bool may_return_null) {
+    atomic_store(&may_return_null_, may_return_null, memory_order_release);
+  }
+
+  void Deallocate(AllocatorStats *stat, void *p) {
+    Header *h = GetHeader(p);
+    {
+      SpinMutexLock l(&mutex_);
+      uptr idx = h->chunk_idx;
+      CHECK_EQ(chunks_[idx], h);
+      CHECK_LT(idx, n_chunks_);
+      chunks_[idx] = chunks_[n_chunks_ - 1];
+      chunks_[idx]->chunk_idx = idx;
+      n_chunks_--;
+      chunks_sorted_ = false;
+      stats.n_frees++;
+      stats.currently_allocated -= h->map_size;
+      stat->Sub(AllocatorStatAllocated, h->map_size);
+      stat->Sub(AllocatorStatMapped, h->map_size);
+    }
+    MapUnmapCallback().OnUnmap(h->map_beg, h->map_size);
+    UnmapOrDie(reinterpret_cast<void*>(h->map_beg), h->map_size);
+  }
+
+  uptr TotalMemoryUsed() {
+    SpinMutexLock l(&mutex_);
+    uptr res = 0;
+    for (uptr i = 0; i < n_chunks_; i++) {
+      Header *h = chunks_[i];
+      CHECK_EQ(h->chunk_idx, i);
+      res += RoundUpMapSize(h->size);
+    }
+    return res;
+  }
+
+  bool PointerIsMine(const void *p) {
+    return GetBlockBegin(p) != nullptr;
+  }
+
+  uptr GetActuallyAllocatedSize(void *p) {
+    return RoundUpTo(GetHeader(p)->size, page_size_);
+  }
+
+  // At least page_size_/2 metadata bytes is available.
+  void *GetMetaData(const void *p) {
+    // Too slow: CHECK_EQ(p, GetBlockBegin(p));
+    if (!IsAligned(reinterpret_cast<uptr>(p), page_size_)) {
+      Printf("%s: bad pointer %p\n", SanitizerToolName, p);
+      CHECK(IsAligned(reinterpret_cast<uptr>(p), page_size_));
+    }
+    return GetHeader(p) + 1;
+  }
+
+  void *GetBlockBegin(const void *ptr) {
+    uptr p = reinterpret_cast<uptr>(ptr);
+    SpinMutexLock l(&mutex_);
+    uptr nearest_chunk = 0;
+    // Cache-friendly linear search.
+    for (uptr i = 0; i < n_chunks_; i++) {
+      uptr ch = reinterpret_cast<uptr>(chunks_[i]);
+      if (p < ch) continue;  // p is at left to this chunk, skip it.
+      if (p - ch < p - nearest_chunk)
+        nearest_chunk = ch;
+    }
+    if (!nearest_chunk)
+      return nullptr;
+    Header *h = reinterpret_cast<Header *>(nearest_chunk);
+    CHECK_GE(nearest_chunk, h->map_beg);
+    CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
+    CHECK_LE(nearest_chunk, p);
+    if (h->map_beg + h->map_size <= p)
+      return nullptr;
+    return GetUser(h);
+  }
+
+  // This function does the same as GetBlockBegin, but is much faster.
+  // Must be called with the allocator locked.
+  void *GetBlockBeginFastLocked(void *ptr) {
+    mutex_.CheckLocked();
+    uptr p = reinterpret_cast<uptr>(ptr);
+    uptr n = n_chunks_;
+    if (!n) return nullptr;
+    if (!chunks_sorted_) {
+      // Do one-time sort. chunks_sorted_ is reset in Allocate/Deallocate.
+      SortArray(reinterpret_cast<uptr*>(chunks_), n);
+      for (uptr i = 0; i < n; i++)
+        chunks_[i]->chunk_idx = i;
+      chunks_sorted_ = true;
+      min_mmap_ = reinterpret_cast<uptr>(chunks_[0]);
+      max_mmap_ = reinterpret_cast<uptr>(chunks_[n - 1]) +
+          chunks_[n - 1]->map_size;
+    }
+    if (p < min_mmap_ || p >= max_mmap_)
+      return nullptr;
+    uptr beg = 0, end = n - 1;
+    // This loop is a log(n) lower_bound. It does not check for the exact match
+    // to avoid expensive cache-thrashing loads.
+    while (end - beg >= 2) {
+      uptr mid = (beg + end) / 2;  // Invariant: mid >= beg + 1
+      if (p < reinterpret_cast<uptr>(chunks_[mid]))
+        end = mid - 1;  // We are not interested in chunks_[mid].
+      else
+        beg = mid;  // chunks_[mid] may still be what we want.
+    }
+
+    if (beg < end) {
+      CHECK_EQ(beg + 1, end);
+      // There are 2 chunks left, choose one.
+      if (p >= reinterpret_cast<uptr>(chunks_[end]))
+        beg = end;
+    }
+
+    Header *h = chunks_[beg];
+    if (h->map_beg + h->map_size <= p || p < h->map_beg)
+      return nullptr;
+    return GetUser(h);
+  }
+
+  void PrintStats() {
+    Printf("Stats: LargeMmapAllocator: allocated %zd times, "
+           "remains %zd (%zd K) max %zd M; by size logs: ",
+           stats.n_allocs, stats.n_allocs - stats.n_frees,
+           stats.currently_allocated >> 10, stats.max_allocated >> 20);
+    for (uptr i = 0; i < ARRAY_SIZE(stats.by_size_log); i++) {
+      uptr c = stats.by_size_log[i];
+      if (!c) continue;
+      Printf("%zd:%zd; ", i, c);
+    }
+    Printf("\n");
+  }
+
+  // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
+  // introspection API.
+  void ForceLock() {
+    mutex_.Lock();
+  }
+
+  void ForceUnlock() {
+    mutex_.Unlock();
+  }
+
+  // Iterate over all existing chunks.
+  // The allocator must be locked when calling this function.
+  void ForEachChunk(ForEachChunkCallback callback, void *arg) {
+    for (uptr i = 0; i < n_chunks_; i++)
+      callback(reinterpret_cast<uptr>(GetUser(chunks_[i])), arg);
+  }
+
+ private:
+  static const int kMaxNumChunks = 1 << FIRST_32_SECOND_64(15, 18);
+  struct Header {
+    uptr map_beg;
+    uptr map_size;
+    uptr size;
+    uptr chunk_idx;
+  };
+
+  Header *GetHeader(uptr p) {
+    CHECK(IsAligned(p, page_size_));
+    return reinterpret_cast<Header*>(p - page_size_);
+  }
+  Header *GetHeader(const void *p) {
+    return GetHeader(reinterpret_cast<uptr>(p));
+  }
+
+  void *GetUser(Header *h) {
+    CHECK(IsAligned((uptr)h, page_size_));
+    return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + page_size_);
+  }
+
+  uptr RoundUpMapSize(uptr size) {
+    return RoundUpTo(size, page_size_) + page_size_;
+  }
+
+  uptr page_size_;
+  Header *chunks_[kMaxNumChunks];
+  uptr n_chunks_;
+  uptr min_mmap_, max_mmap_;
+  bool chunks_sorted_;
+  struct Stats {
+    uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64];
+  } stats;
+  atomic_uint8_t may_return_null_;
+  SpinMutex mutex_;
+};
+
+
diff --git a/lib/sanitizer_common/sanitizer_allocator_size_class_map.h b/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
new file mode 100644
index 0000000..b8917a4
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
@@ -0,0 +1,191 @@
+//===-- sanitizer_allocator_size_class_map.h --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the Sanitizer Allocator.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_ALLOCATOR_H
+#error This file must be included inside sanitizer_allocator.h
+#endif
+
+// SizeClassMap maps allocation sizes into size classes and back.
+// Class 0 corresponds to size 0.
+// Classes 1 - 16 correspond to sizes 16 to 256 (size = class_id * 16).
+// Next 4 classes: 256 + i * 64  (i = 1 to 4).
+// Next 4 classes: 512 + i * 128 (i = 1 to 4).
+// ...
+// Next 4 classes: 2^k + i * 2^(k-2) (i = 1 to 4).
+// Last class corresponds to kMaxSize = 1 << kMaxSizeLog.
+//
+// This structure of the size class map gives us:
+//   - Efficient table-free class-to-size and size-to-class functions.
+//   - Difference between two consequent size classes is betweed 14% and 25%
+//
+// This class also gives a hint to a thread-caching allocator about the amount
+// of chunks that need to be cached per-thread:
+//  - kMaxNumCachedHint is a hint for maximal number of chunks per size class.
+//    The actual number is computed in TransferBatch.
+//  - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
+//
+// There is one extra size class kBatchClassID that is used for allocating
+// objects of TransferBatch type when kUseSeparateSizeClassForBatch is true.
+//
+// Part of output of SizeClassMap::Print():
+// c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
+// c01 => s: 16 diff: +16 00% l 4 cached: 256 4096; id 1
+// c02 => s: 32 diff: +16 100% l 5 cached: 256 8192; id 2
+// c03 => s: 48 diff: +16 50% l 5 cached: 256 12288; id 3
+// c04 => s: 64 diff: +16 33% l 6 cached: 256 16384; id 4
+// c05 => s: 80 diff: +16 25% l 6 cached: 256 20480; id 5
+// c06 => s: 96 diff: +16 20% l 6 cached: 256 24576; id 6
+// c07 => s: 112 diff: +16 16% l 6 cached: 256 28672; id 7
+//
+// c08 => s: 128 diff: +16 14% l 7 cached: 256 32768; id 8
+// c09 => s: 144 diff: +16 12% l 7 cached: 256 36864; id 9
+// c10 => s: 160 diff: +16 11% l 7 cached: 256 40960; id 10
+// c11 => s: 176 diff: +16 10% l 7 cached: 256 45056; id 11
+// c12 => s: 192 diff: +16 09% l 7 cached: 256 49152; id 12
+// c13 => s: 208 diff: +16 08% l 7 cached: 256 53248; id 13
+// c14 => s: 224 diff: +16 07% l 7 cached: 256 57344; id 14
+// c15 => s: 240 diff: +16 07% l 7 cached: 256 61440; id 15
+//
+// c16 => s: 256 diff: +16 06% l 8 cached: 256 65536; id 16
+// c17 => s: 320 diff: +64 25% l 8 cached: 204 65280; id 17
+// c18 => s: 384 diff: +64 20% l 8 cached: 170 65280; id 18
+// c19 => s: 448 diff: +64 16% l 8 cached: 146 65408; id 19
+//
+// c20 => s: 512 diff: +64 14% l 9 cached: 128 65536; id 20
+// c21 => s: 640 diff: +128 25% l 9 cached: 102 65280; id 21
+// c22 => s: 768 diff: +128 20% l 9 cached: 85 65280; id 22
+// c23 => s: 896 diff: +128 16% l 9 cached: 73 65408; id 23
+//
+// c24 => s: 1024 diff: +128 14% l 10 cached: 64 65536; id 24
+// c25 => s: 1280 diff: +256 25% l 10 cached: 51 65280; id 25
+// c26 => s: 1536 diff: +256 20% l 10 cached: 42 64512; id 26
+// c27 => s: 1792 diff: +256 16% l 10 cached: 36 64512; id 27
+//
+// ...
+//
+// c48 => s: 65536 diff: +8192 14% l 16 cached: 1 65536; id 48
+// c49 => s: 81920 diff: +16384 25% l 16 cached: 1 81920; id 49
+// c50 => s: 98304 diff: +16384 20% l 16 cached: 1 98304; id 50
+// c51 => s: 114688 diff: +16384 16% l 16 cached: 1 114688; id 51
+//
+// c52 => s: 131072 diff: +16384 14% l 17 cached: 1 131072; id 52
+
+template <uptr kMaxSizeLog, uptr kMaxNumCachedHintT, uptr kMaxBytesCachedLog>
+class SizeClassMap {
+  static const uptr kMinSizeLog = 4;
+  static const uptr kMidSizeLog = kMinSizeLog + 4;
+  static const uptr kMinSize = 1 << kMinSizeLog;
+  static const uptr kMidSize = 1 << kMidSizeLog;
+  static const uptr kMidClass = kMidSize / kMinSize;
+  static const uptr S = 2;
+  static const uptr M = (1 << S) - 1;
+
+ public:
+  // kMaxNumCachedHintT is a power of two. It serves as a hint
+  // for the size of TransferBatch, the actual size could be a bit smaller.
+  static const uptr kMaxNumCachedHint = kMaxNumCachedHintT;
+  COMPILER_CHECK((kMaxNumCachedHint & (kMaxNumCachedHint - 1)) == 0);
+
+  static const uptr kMaxSize = 1UL << kMaxSizeLog;
+  static const uptr kNumClasses =
+      kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1 + 1;
+  static const uptr kBatchClassID = kNumClasses - 1;
+  static const uptr kLargestClassID = kNumClasses - 2;
+  COMPILER_CHECK(kNumClasses >= 32 && kNumClasses <= 256);
+  static const uptr kNumClassesRounded =
+      kNumClasses == 32  ? 32 :
+      kNumClasses <= 64  ? 64 :
+      kNumClasses <= 128 ? 128 : 256;
+
+  static uptr Size(uptr class_id) {
+    if (class_id <= kMidClass)
+      return kMinSize * class_id;
+    // Should not pass kBatchClassID here, but we should avoid a CHECK.
+    if (class_id == kBatchClassID) return 0;
+    class_id -= kMidClass;
+    uptr t = kMidSize << (class_id >> S);
+    return t + (t >> S) * (class_id & M);
+  }
+
+  static uptr ClassID(uptr size) {
+    if (size <= kMidSize)
+      return (size + kMinSize - 1) >> kMinSizeLog;
+    if (size > kMaxSize) return 0;
+    uptr l = MostSignificantSetBitIndex(size);
+    uptr hbits = (size >> (l - S)) & M;
+    uptr lbits = size & ((1 << (l - S)) - 1);
+    uptr l1 = l - kMidSizeLog;
+    return kMidClass + (l1 << S) + hbits + (lbits > 0);
+  }
+
+  static uptr MaxCachedHint(uptr class_id) {
+    if (class_id == 0) return 0;
+    // Estimate the result for kBatchClassID because this class
+    // does not know the exact size of TransferBatch.
+    // Moreover, we need to cache fewer batches than user chunks,
+    // so this number could be small.
+    if (class_id == kBatchClassID) return 8;
+    uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
+    return Max<uptr>(1, Min(kMaxNumCachedHint, n));
+  }
+
+  static void Print() {
+    uptr prev_s = 0;
+    uptr total_cached = 0;
+    for (uptr i = 0; i < kNumClasses; i++) {
+      uptr s = Size(i);
+      if (s >= kMidSize / 2 && (s & (s - 1)) == 0)
+        Printf("\n");
+      uptr d = s - prev_s;
+      uptr p = prev_s ? (d * 100 / prev_s) : 0;
+      uptr l = s ? MostSignificantSetBitIndex(s) : 0;
+      uptr cached = MaxCachedHint(i) * s;
+      if (i == kBatchClassID)
+        d = l = p = 0;
+      Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
+             "cached: %zd %zd; id %zd\n",
+             i, Size(i), d, p, l, MaxCachedHint(i), cached, ClassID(s));
+      total_cached += cached;
+      prev_s = s;
+    }
+    Printf("Total cached: %zd\n", total_cached);
+  }
+
+  static void Validate() {
+    for (uptr c = 1; c < kNumClasses; c++) {
+      if (c == kBatchClassID) continue;
+      // Printf("Validate: c%zd\n", c);
+      uptr s = Size(c);
+      CHECK_NE(s, 0U);
+      CHECK_EQ(ClassID(s), c);
+      if (c != kBatchClassID - 1 && c != kNumClasses - 1)
+        CHECK_EQ(ClassID(s + 1), c + 1);
+      CHECK_EQ(ClassID(s - 1), c);
+      if (c)
+        CHECK_GT(Size(c), Size(c-1));
+    }
+    CHECK_EQ(ClassID(kMaxSize + 1), 0);
+
+    for (uptr s = 1; s <= kMaxSize; s++) {
+      uptr c = ClassID(s);
+      // Printf("s%zd => c%zd\n", s, c);
+      CHECK_LT(c, kNumClasses);
+      CHECK_GE(Size(c), s);
+      if (c > 0)
+        CHECK_LT(Size(c-1), s);
+    }
+  }
+};
+
+typedef SizeClassMap<17, 128, 16> DefaultSizeClassMap;
+typedef SizeClassMap<17, 64,  14> CompactSizeClassMap;
+template<class SizeClassAllocator> struct SizeClassAllocatorLocalCache;
diff --git a/lib/sanitizer_common/sanitizer_allocator_stats.h b/lib/sanitizer_common/sanitizer_allocator_stats.h
new file mode 100644
index 0000000..38b088b
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_allocator_stats.h
@@ -0,0 +1,107 @@
+//===-- sanitizer_allocator_stats.h -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the Sanitizer Allocator.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_ALLOCATOR_H
+#error This file must be included inside sanitizer_allocator.h
+#endif
+
+// Memory allocator statistics
+enum AllocatorStat {
+  AllocatorStatAllocated,
+  AllocatorStatMapped,
+  AllocatorStatCount
+};
+
+typedef uptr AllocatorStatCounters[AllocatorStatCount];
+
+// Per-thread stats, live in per-thread cache.
+class AllocatorStats {
+ public:
+  void Init() {
+    internal_memset(this, 0, sizeof(*this));
+  }
+  void InitLinkerInitialized() {}
+
+  void Add(AllocatorStat i, uptr v) {
+    v += atomic_load(&stats_[i], memory_order_relaxed);
+    atomic_store(&stats_[i], v, memory_order_relaxed);
+  }
+
+  void Sub(AllocatorStat i, uptr v) {
+    v = atomic_load(&stats_[i], memory_order_relaxed) - v;
+    atomic_store(&stats_[i], v, memory_order_relaxed);
+  }
+
+  void Set(AllocatorStat i, uptr v) {
+    atomic_store(&stats_[i], v, memory_order_relaxed);
+  }
+
+  uptr Get(AllocatorStat i) const {
+    return atomic_load(&stats_[i], memory_order_relaxed);
+  }
+
+ private:
+  friend class AllocatorGlobalStats;
+  AllocatorStats *next_;
+  AllocatorStats *prev_;
+  atomic_uintptr_t stats_[AllocatorStatCount];
+};
+
+// Global stats, used for aggregation and querying.
+class AllocatorGlobalStats : public AllocatorStats {
+ public:
+  void InitLinkerInitialized() {
+    next_ = this;
+    prev_ = this;
+  }
+  void Init() {
+    internal_memset(this, 0, sizeof(*this));
+    InitLinkerInitialized();
+  }
+
+  void Register(AllocatorStats *s) {
+    SpinMutexLock l(&mu_);
+    s->next_ = next_;
+    s->prev_ = this;
+    next_->prev_ = s;
+    next_ = s;
+  }
+
+  void Unregister(AllocatorStats *s) {
+    SpinMutexLock l(&mu_);
+    s->prev_->next_ = s->next_;
+    s->next_->prev_ = s->prev_;
+    for (int i = 0; i < AllocatorStatCount; i++)
+      Add(AllocatorStat(i), s->Get(AllocatorStat(i)));
+  }
+
+  void Get(AllocatorStatCounters s) const {
+    internal_memset(s, 0, AllocatorStatCount * sizeof(uptr));
+    SpinMutexLock l(&mu_);
+    const AllocatorStats *stats = this;
+    for (;;) {
+      for (int i = 0; i < AllocatorStatCount; i++)
+        s[i] += stats->Get(AllocatorStat(i));
+      stats = stats->next_;
+      if (stats == this)
+        break;
+    }
+    // All stats must be non-negative.
+    for (int i = 0; i < AllocatorStatCount; i++)
+      s[i] = ((sptr)s[i]) >= 0 ? s[i] : 0;
+  }
+
+ private:
+  mutable SpinMutex mu_;
+};
+
+
diff --git a/lib/sanitizer_common/sanitizer_atomic_msvc.h b/lib/sanitizer_common/sanitizer_atomic_msvc.h
index 24d6f0f..6d94056 100644
--- a/lib/sanitizer_common/sanitizer_atomic_msvc.h
+++ b/lib/sanitizer_common/sanitizer_atomic_msvc.h
@@ -33,6 +33,10 @@
 extern "C" long _InterlockedExchangeAdd(  // NOLINT
     long volatile * Addend, long Value);  // NOLINT
 #pragma intrinsic(_InterlockedExchangeAdd)
+extern "C" char _InterlockedCompareExchange8(  // NOLINT
+    char volatile *Destination,                // NOLINT
+    char Exchange, char Comparand);            // NOLINT
+#pragma intrinsic(_InterlockedCompareExchange8)
 extern "C" short _InterlockedCompareExchange16(  // NOLINT
     short volatile *Destination,                 // NOLINT
     short Exchange, short Comparand);            // NOLINT
@@ -171,8 +175,6 @@
   return (u32)_InterlockedExchange((volatile long*)&a->val_dont_use, v);
 }
 
-#ifndef _WIN64
-
 INLINE bool atomic_compare_exchange_strong(volatile atomic_uint8_t *a,
                                            u8 *cmp,
                                            u8 xchgv,
@@ -180,6 +182,10 @@
   (void)mo;
   DCHECK(!((uptr)a % sizeof(*a)));
   u8 cmpv = *cmp;
+#ifdef _WIN64
+  u8 prev = (u8)_InterlockedCompareExchange8(
+      (volatile char*)&a->val_dont_use, (char)xchgv, (char)cmpv);
+#else
   u8 prev;
   __asm {
     mov al, cmpv
@@ -188,14 +194,13 @@
     lock cmpxchg [ecx], dl
     mov prev, al
   }
+#endif
   if (prev == cmpv)
     return true;
   *cmp = prev;
   return false;
 }
 
-#endif
-
 INLINE bool atomic_compare_exchange_strong(volatile atomic_uintptr_t *a,
                                            uptr *cmp,
                                            uptr xchg,
diff --git a/lib/sanitizer_common/sanitizer_common.cc b/lib/sanitizer_common/sanitizer_common.cc
index 9e0ad6a..79fcbb1 100644
--- a/lib/sanitizer_common/sanitizer_common.cc
+++ b/lib/sanitizer_common/sanitizer_common.cc
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common.h"
+#include "sanitizer_allocator_interface.h"
 #include "sanitizer_allocator_internal.h"
 #include "sanitizer_flags.h"
 #include "sanitizer_libc.h"
@@ -99,72 +100,6 @@
 // writing to the same log file.
 uptr stoptheworld_tracer_ppid = 0;
 
-static const int kMaxNumOfInternalDieCallbacks = 5;
-static DieCallbackType InternalDieCallbacks[kMaxNumOfInternalDieCallbacks];
-
-bool AddDieCallback(DieCallbackType callback) {
-  for (int i = 0; i < kMaxNumOfInternalDieCallbacks; i++) {
-    if (InternalDieCallbacks[i] == nullptr) {
-      InternalDieCallbacks[i] = callback;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool RemoveDieCallback(DieCallbackType callback) {
-  for (int i = 0; i < kMaxNumOfInternalDieCallbacks; i++) {
-    if (InternalDieCallbacks[i] == callback) {
-      internal_memmove(&InternalDieCallbacks[i], &InternalDieCallbacks[i + 1],
-                       sizeof(InternalDieCallbacks[0]) *
-                           (kMaxNumOfInternalDieCallbacks - i - 1));
-      InternalDieCallbacks[kMaxNumOfInternalDieCallbacks - 1] = nullptr;
-      return true;
-    }
-  }
-  return false;
-}
-
-static DieCallbackType UserDieCallback;
-void SetUserDieCallback(DieCallbackType callback) {
-  UserDieCallback = callback;
-}
-
-void NORETURN Die() {
-  if (UserDieCallback)
-    UserDieCallback();
-  for (int i = kMaxNumOfInternalDieCallbacks - 1; i >= 0; i--) {
-    if (InternalDieCallbacks[i])
-      InternalDieCallbacks[i]();
-  }
-  if (common_flags()->abort_on_error)
-    Abort();
-  internal__exit(common_flags()->exitcode);
-}
-
-static CheckFailedCallbackType CheckFailedCallback;
-void SetCheckFailedCallback(CheckFailedCallbackType callback) {
-  CheckFailedCallback = callback;
-}
-
-const int kSecondsToSleepWhenRecursiveCheckFailed = 2;
-
-void NORETURN CheckFailed(const char *file, int line, const char *cond,
-                          u64 v1, u64 v2) {
-  static atomic_uint32_t num_calls;
-  if (atomic_fetch_add(&num_calls, 1, memory_order_relaxed) > 10) {
-    SleepForSeconds(kSecondsToSleepWhenRecursiveCheckFailed);
-    Trap();
-  }
-
-  if (CheckFailedCallback) {
-    CheckFailedCallback(file, line, cond, v1, v2);
-  }
-  Report("Sanitizer CHECK failed: %s:%d %s (%lld, %lld)\n", file, line, cond,
-                                                            v1, v2);
-  Die();
-}
-
 void NORETURN ReportMmapFailureAndDie(uptr size, const char *mem_type,
                                       const char *mmap_type, error_t err,
                                       bool raw_report) {
@@ -232,27 +167,6 @@
   InternalSort<uptr*, UptrComparisonFunction>(&array, size, CompareLess);
 }
 
-// We want to map a chunk of address space aligned to 'alignment'.
-// We do it by maping a bit more and then unmaping redundant pieces.
-// We probably can do it with fewer syscalls in some OS-dependent way.
-void *MmapAlignedOrDie(uptr size, uptr alignment, const char *mem_type) {
-// uptr PageSize = GetPageSizeCached();
-  CHECK(IsPowerOfTwo(size));
-  CHECK(IsPowerOfTwo(alignment));
-  uptr map_size = size + alignment;
-  uptr map_res = (uptr)MmapOrDie(map_size, mem_type);
-  uptr map_end = map_res + map_size;
-  uptr res = map_res;
-  if (res & (alignment - 1))  // Not aligned.
-    res = (map_res + alignment) & ~(alignment - 1);
-  uptr end = res + size;
-  if (res != map_res)
-    UnmapOrDie((void*)map_res, res - map_res);
-  if (end != map_end)
-    UnmapOrDie((void*)end, map_end - end);
-  return (void*)res;
-}
-
 const char *StripPathPrefix(const char *filepath,
                             const char *strip_path_prefix) {
   if (!filepath) return nullptr;
@@ -425,6 +339,10 @@
 static const char kPathSeparator = SANITIZER_WINDOWS ? ';' : ':';
 
 char *FindPathToBinary(const char *name) {
+  if (FileExists(name)) {
+    return internal_strdup(name);
+  }
+
   const char *path = GetEnv("PATH");
   if (!path)
     return nullptr;
@@ -498,6 +416,44 @@
   Printf("\n\n");
 }
 
+// Malloc hooks.
+static const int kMaxMallocFreeHooks = 5;
+struct MallocFreeHook {
+  void (*malloc_hook)(const void *, uptr);
+  void (*free_hook)(const void *);
+};
+
+static MallocFreeHook MFHooks[kMaxMallocFreeHooks];
+
+void RunMallocHooks(const void *ptr, uptr size) {
+  for (int i = 0; i < kMaxMallocFreeHooks; i++) {
+    auto hook = MFHooks[i].malloc_hook;
+    if (!hook) return;
+    hook(ptr, size);
+  }
+}
+
+void RunFreeHooks(const void *ptr) {
+  for (int i = 0; i < kMaxMallocFreeHooks; i++) {
+    auto hook = MFHooks[i].free_hook;
+    if (!hook) return;
+    hook(ptr);
+  }
+}
+
+static int InstallMallocFreeHooks(void (*malloc_hook)(const void *, uptr),
+                                  void (*free_hook)(const void *)) {
+  if (!malloc_hook || !free_hook) return 0;
+  for (int i = 0; i < kMaxMallocFreeHooks; i++) {
+    if (MFHooks[i].malloc_hook == nullptr) {
+      MFHooks[i].malloc_hook = malloc_hook;
+      MFHooks[i].free_hook = free_hook;
+      return i + 1;
+    }
+  }
+  return 0;
+}
+
 } // namespace __sanitizer
 
 using namespace __sanitizer;  // NOLINT
@@ -507,6 +463,11 @@
   report_file.SetReportPath(path);
 }
 
+void __sanitizer_set_report_fd(void *fd) {
+  report_file.fd = (fd_t)reinterpret_cast<uptr>(fd);
+  report_file.fd_pid = internal_getpid();
+}
+
 void __sanitizer_report_error_summary(const char *error_summary) {
   Printf("%s\n", error_summary);
 }
@@ -515,4 +476,11 @@
 void __sanitizer_set_death_callback(void (*callback)(void)) {
   SetUserDieCallback(callback);
 }
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __sanitizer_install_malloc_and_free_hooks(void (*malloc_hook)(const void *,
+                                                                  uptr),
+                                              void (*free_hook)(const void *)) {
+  return InstallMallocFreeHooks(malloc_hook, free_hook);
+}
 } // extern "C"
diff --git a/lib/sanitizer_common/sanitizer_common.h b/lib/sanitizer_common/sanitizer_common.h
index 181b049..6c1d6a0 100644
--- a/lib/sanitizer_common/sanitizer_common.h
+++ b/lib/sanitizer_common/sanitizer_common.h
@@ -23,7 +23,7 @@
 #include "sanitizer_list.h"
 #include "sanitizer_mutex.h"
 
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && !defined(__clang__)
 extern "C" void _ReadWriteBarrier();
 #pragma intrinsic(_ReadWriteBarrier)
 #endif
@@ -44,9 +44,6 @@
 
 const uptr kMaxPathLength = 4096;
 
-// 16K loaded modules should be enough for everyone.
-static const uptr kMaxNumberOfModules = 1 << 14;
-
 const uptr kMaxThreadStackSize = 1 << 30;  // 1Gb
 
 static const uptr kErrorMessageBufferSize = 1 << 16;
@@ -92,12 +89,14 @@
                          const char *name = nullptr);
 void *MmapNoReserveOrDie(uptr size, const char *mem_type);
 void *MmapFixedOrDie(uptr fixed_addr, uptr size);
-void *MmapNoAccess(uptr fixed_addr, uptr size, const char *name = nullptr);
+void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name = nullptr);
+void *MmapNoAccess(uptr size);
 // Map aligned chunk of address space; size and alignment are powers of two.
 void *MmapAlignedOrDie(uptr size, uptr alignment, const char *mem_type);
-// Disallow access to a memory range.  Use MmapNoAccess to allocate an
+// Disallow access to a memory range.  Use MmapFixedNoAccess to allocate an
 // unaccessible memory.
 bool MprotectNoAccess(uptr addr, uptr size);
+bool MprotectReadOnly(uptr addr, uptr size);
 
 // Used to check if we can map shadow memory to a fixed location.
 bool MemoryRangeIsAvailable(uptr range_start, uptr range_end);
@@ -109,6 +108,8 @@
 void DontDumpShadowMemory(uptr addr, uptr length);
 // Check if the built VMA size matches the runtime one.
 void CheckVMASize();
+void RunMallocHooks(const void *ptr, uptr size);
+void RunFreeHooks(const void *ptr);
 
 // InternalScopedBuffer can be used instead of large stack arrays to
 // keep frame size low.
@@ -284,12 +285,27 @@
 char *FindPathToBinary(const char *name);
 bool IsPathSeparator(const char c);
 bool IsAbsolutePath(const char *path);
+// Starts a subprocess and returs its pid.
+// If *_fd parameters are not kInvalidFd their corresponding input/output
+// streams will be redirect to the file. The files will always be closed
+// in parent process even in case of an error.
+// The child process will close all fds after STDERR_FILENO
+// before passing control to a program.
+pid_t StartSubprocess(const char *filename, const char *const argv[],
+                      fd_t stdin_fd = kInvalidFd, fd_t stdout_fd = kInvalidFd,
+                      fd_t stderr_fd = kInvalidFd);
+// Checks if specified process is still running
+bool IsProcessRunning(pid_t pid);
+// Waits for the process to finish and returns its exit code.
+// Returns -1 in case of an error.
+int WaitForProcess(pid_t pid);
 
 u32 GetUid();
 void ReExec();
 char **GetArgv();
 void PrintCmdline();
 bool StackSizeIsUnlimited();
+uptr GetStackSizeLimitInBytes();
 void SetStackSizeLimitInBytes(uptr limit);
 bool AddressSpaceIsUnlimited();
 void SetAddressSpaceUnlimited();
@@ -357,7 +373,7 @@
 
 // Functions related to signal handling.
 typedef void (*SignalHandlerType)(int, void *, void *);
-bool IsDeadlySignal(int signum);
+bool IsHandledDeadlySignal(int signum);
 void InstallDeadlySignalHandlers(SignalHandlerType handler);
 // Alternative signal stack (POSIX-only).
 void SetAlternateSignalStack();
@@ -503,7 +519,7 @@
       uptr new_capacity = RoundUpToPowerOfTwo(size_ + 1);
       Resize(new_capacity);
     }
-    data_[size_++] = element;
+    internal_memcpy(&data_[size_++], &element, sizeof(T));
   }
   T &back() {
     CHECK_GT(size_, 0);
@@ -656,13 +672,33 @@
   IntrusiveList<AddressRange> ranges_;
 };
 
-// OS-dependent function that fills array with descriptions of at most
-// "max_modules" currently loaded modules. Returns the number of
-// initialized modules. If filter is nonzero, ignores modules for which
-// filter(full_name) is false.
-typedef bool (*string_predicate_t)(const char *);
-uptr GetListOfModules(LoadedModule *modules, uptr max_modules,
-                      string_predicate_t filter);
+// List of LoadedModules. OS-dependent implementation is responsible for
+// filling this information.
+class ListOfModules {
+ public:
+  ListOfModules() : modules_(kInitialCapacity) {}
+  ~ListOfModules() { clear(); }
+  void init();
+  const LoadedModule *begin() const { return modules_.begin(); }
+  LoadedModule *begin() { return modules_.begin(); }
+  const LoadedModule *end() const { return modules_.end(); }
+  LoadedModule *end() { return modules_.end(); }
+  uptr size() const { return modules_.size(); }
+  const LoadedModule &operator[](uptr i) const {
+    CHECK_LT(i, modules_.size());
+    return modules_[i];
+  }
+
+ private:
+  void clear() {
+    for (auto &module : modules_) module.clear();
+    modules_.clear();
+  }
+
+  InternalMmapVector<LoadedModule> modules_;
+  // We rarely have more than 16K loaded modules.
+  static const uptr kInitialCapacity = 1 << 14;
+};
 
 // Callback type for iterating over a set of memory ranges.
 typedef void (*RangeIteratorCallback)(uptr begin, uptr end, void *arg);
@@ -726,7 +762,7 @@
 // compiler from recognising it and turning it into an actual call to
 // memset/memcpy/etc.
 static inline void SanitizerBreakOptimization(void *arg) {
-#if _MSC_VER && !defined(__clang__)
+#if defined(_MSC_VER) && !defined(__clang__)
   _ReadWriteBarrier();
 #else
   __asm__ __volatile__("" : : "r" (arg) : "memory");
@@ -739,19 +775,58 @@
   uptr pc;
   uptr sp;
   uptr bp;
+  bool is_memory_access;
 
-  SignalContext(void *context, uptr addr, uptr pc, uptr sp, uptr bp) :
-      context(context), addr(addr), pc(pc), sp(sp), bp(bp) {
-  }
+  enum WriteFlag { UNKNOWN, READ, WRITE } write_flag;
+
+  SignalContext(void *context, uptr addr, uptr pc, uptr sp, uptr bp,
+                bool is_memory_access, WriteFlag write_flag)
+      : context(context),
+        addr(addr),
+        pc(pc),
+        sp(sp),
+        bp(bp),
+        is_memory_access(is_memory_access),
+        write_flag(write_flag) {}
 
   // Creates signal context in a platform-specific manner.
   static SignalContext Create(void *siginfo, void *context);
+
+  // Returns true if the "context" indicates a memory write.
+  static WriteFlag GetWriteFlag(void *context);
 };
 
 void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp);
 
 void MaybeReexec();
 
+template <typename Fn>
+class RunOnDestruction {
+ public:
+  explicit RunOnDestruction(Fn fn) : fn_(fn) {}
+  ~RunOnDestruction() { fn_(); }
+
+ private:
+  Fn fn_;
+};
+
+// A simple scope guard. Usage:
+// auto cleanup = at_scope_exit([]{ do_cleanup; });
+template <typename Fn>
+RunOnDestruction<Fn> at_scope_exit(Fn fn) {
+  return RunOnDestruction<Fn>(fn);
+}
+
+// Linux on 64-bit s390 had a nasty bug that crashes the whole machine
+// if a process uses virtual memory over 4TB (as many sanitizers like
+// to do).  This function will abort the process if running on a kernel
+// that looks vulnerable.
+#if SANITIZER_LINUX && SANITIZER_S390_64
+void AvoidCVE_2016_2143();
+#else
+INLINE void AvoidCVE_2016_2143() {}
+#endif
+
 }  // namespace __sanitizer
 
 inline void *operator new(__sanitizer::operator_new_size_type size,
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors.inc b/lib/sanitizer_common/sanitizer_common_interceptors.inc
index a9a6724..f9ccca3 100644
--- a/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -11,7 +11,7 @@
 // ThreadSanitizer, MemorySanitizer, etc.
 //
 // This file should be included into the tool's interceptor file,
-// which has to define it's own macros:
+// which has to define its own macros:
 //   COMMON_INTERCEPTOR_ENTER
 //   COMMON_INTERCEPTOR_ENTER_NOIGNORE
 //   COMMON_INTERCEPTOR_READ_RANGE
@@ -155,6 +155,14 @@
 #define COMMON_INTERCEPTOR_USER_CALLBACK_END() {}
 #endif
 
+#ifdef SANITIZER_NLDBL_VERSION
+#define COMMON_INTERCEPT_FUNCTION_LDBL(fn)                          \
+    COMMON_INTERCEPT_FUNCTION_VER(fn, SANITIZER_NLDBL_VERSION)
+#else
+#define COMMON_INTERCEPT_FUNCTION_LDBL(fn)                          \
+    COMMON_INTERCEPT_FUNCTION(fn)
+#endif
+
 struct FileMetadata {
   // For open_memstream().
   char **addr;
@@ -204,6 +212,40 @@
 }
 #endif  // SI_NOT_WINDOWS
 
+#if SANITIZER_INTERCEPT_STRLEN
+INTERCEPTOR(SIZE_T, strlen, const char *s) {
+  // Sometimes strlen is called prior to InitializeCommonInterceptors,
+  // in which case the REAL(strlen) typically used in
+  // COMMON_INTERCEPTOR_ENTER will fail.  We use internal_strlen here
+  // to handle that.
+  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
+    return internal_strlen(s);
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, strlen, s);
+  SIZE_T result = REAL(strlen)(s);
+  if (common_flags()->intercept_strlen)
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, s, result + 1);
+  return result;
+}
+#define INIT_STRLEN COMMON_INTERCEPT_FUNCTION(strlen)
+#else
+#define INIT_STRLEN
+#endif
+
+#if SANITIZER_INTERCEPT_STRNLEN
+INTERCEPTOR(SIZE_T, strnlen, const char *s, SIZE_T maxlen) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, strnlen, s, maxlen);
+  SIZE_T length = REAL(strnlen)(s, maxlen);
+  if (common_flags()->intercept_strlen)
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, s, Min(length + 1, maxlen));
+  return length;
+}
+#define INIT_STRNLEN COMMON_INTERCEPT_FUNCTION(strnlen)
+#else
+#define INIT_STRNLEN
+#endif
+
 #if SANITIZER_INTERCEPT_TEXTDOMAIN
 INTERCEPTOR(char*, textdomain, const char *domainname) {
   void *ctx;
@@ -284,6 +326,9 @@
   return c1_low - c2_low;
 }
 
+DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcasecmp, uptr called_pc,
+                              const char *s1, const char *s2, int result)
+
 INTERCEPTOR(int, strcasecmp, const char *s1, const char *s2) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, strcasecmp, s1, s2);
@@ -296,9 +341,16 @@
   }
   COMMON_INTERCEPTOR_READ_STRING(ctx, s1, i + 1);
   COMMON_INTERCEPTOR_READ_STRING(ctx, s2, i + 1);
-  return CharCaseCmp(c1, c2);
+  int result = CharCaseCmp(c1, c2);
+  CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcasecmp, GET_CALLER_PC(),
+                             s1, s2, result);
+  return result;
 }
 
+DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncasecmp, uptr called_pc,
+                              const char *s1, const char *s2, uptr n,
+                              int result)
+
 INTERCEPTOR(int, strncasecmp, const char *s1, const char *s2, SIZE_T n) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, strncasecmp, s1, s2, n);
@@ -311,7 +363,10 @@
   }
   COMMON_INTERCEPTOR_READ_RANGE(ctx, s1, Min(i + 1, n));
   COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, Min(i + 1, n));
-  return CharCaseCmp(c1, c2);
+  int result = CharCaseCmp(c1, c2);
+  CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncasecmp, GET_CALLER_PC(),
+                             s1, s2, n, result);
+  return result;
 }
 
 #define INIT_STRCASECMP COMMON_INTERCEPT_FUNCTION(strcasecmp)
@@ -333,6 +388,10 @@
 #endif
 
 #if SANITIZER_INTERCEPT_STRSTR
+
+DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strstr, uptr called_pc,
+                              const char *s1, const char *s2, char *result)
+
 INTERCEPTOR(char*, strstr, const char *s1, const char *s2) {
   if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
     return internal_strstr(s1, s2);
@@ -341,6 +400,8 @@
   char *r = REAL(strstr)(s1, s2);
   if (common_flags()->intercept_strstr)
     StrstrCheck(ctx, r, s1, s2);
+  CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strstr, GET_CALLER_PC(), s1,
+                             s2, r);
   return r;
 }
 
@@ -350,12 +411,18 @@
 #endif
 
 #if SANITIZER_INTERCEPT_STRCASESTR
+
+DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcasestr, uptr called_pc,
+                              const char *s1, const char *s2, char *result)
+
 INTERCEPTOR(char*, strcasestr, const char *s1, const char *s2) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, strcasestr, s1, s2);
   char *r = REAL(strcasestr)(s1, s2);
   if (common_flags()->intercept_strstr)
     StrstrCheck(ctx, r, s1, s2);
+  CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcasestr, GET_CALLER_PC(),
+                             s1, s2, r);
   return r;
 }
 
@@ -364,6 +431,79 @@
 #define INIT_STRCASESTR
 #endif
 
+#if SANITIZER_INTERCEPT_MEMMEM
+DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memmem, uptr called_pc,
+                              const void *s1, SIZE_T len1, const void *s2,
+                              SIZE_T len2, void *result)
+
+INTERCEPTOR(void*, memmem, const void *s1, SIZE_T len1, const void *s2,
+            SIZE_T len2) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, memmem, s1, len1, s2, len2);
+  void *r = REAL(memmem)(s1, len1, s2, len2);
+  if (common_flags()->intercept_memmem) {
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, s1, len1);
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, len2);
+  }
+  CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memmem, GET_CALLER_PC(),
+                             s1, len1, s2, len2, r);
+  return r;
+}
+
+#define INIT_MEMMEM COMMON_INTERCEPT_FUNCTION(memmem);
+#else
+#define INIT_MEMMEM
+#endif  // SANITIZER_INTERCEPT_MEMMEM
+
+#if SANITIZER_INTERCEPT_STRCHR
+INTERCEPTOR(char*, strchr, const char *s, int c) {
+  void *ctx;
+  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
+    return internal_strchr(s, c);
+  COMMON_INTERCEPTOR_ENTER(ctx, strchr, s, c);
+  char *result = REAL(strchr)(s, c);
+  uptr len = internal_strlen(s);
+  uptr n = result ? result - s + 1 : len + 1;
+  if (common_flags()->intercept_strchr)
+    COMMON_INTERCEPTOR_READ_STRING_OF_LEN(ctx, s, len, n);
+  return result;
+}
+#define INIT_STRCHR COMMON_INTERCEPT_FUNCTION(strchr)
+#else
+#define INIT_STRCHR
+#endif
+
+#if SANITIZER_INTERCEPT_STRCHRNUL
+INTERCEPTOR(char*, strchrnul, const char *s, int c) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, strchrnul, s, c);
+  char *result = REAL(strchrnul)(s, c);
+  uptr len = result - s + 1;
+  if (common_flags()->intercept_strchr)
+    COMMON_INTERCEPTOR_READ_STRING(ctx, s, len);
+  return result;
+}
+#define INIT_STRCHRNUL COMMON_INTERCEPT_FUNCTION(strchrnul)
+#else
+#define INIT_STRCHRNUL
+#endif
+
+#if SANITIZER_INTERCEPT_STRRCHR
+INTERCEPTOR(char*, strrchr, const char *s, int c) {
+  void *ctx;
+  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
+    return internal_strrchr(s, c);
+  COMMON_INTERCEPTOR_ENTER(ctx, strrchr, s, c);
+  uptr len = internal_strlen(s);
+  if (common_flags()->intercept_strchr)
+    COMMON_INTERCEPTOR_READ_STRING_OF_LEN(ctx, s, len, len + 1);
+  return REAL(strrchr)(s, c);
+}
+#define INIT_STRRCHR COMMON_INTERCEPT_FUNCTION(strrchr)
+#else
+#define INIT_STRRCHR
+#endif
+
 #if SANITIZER_INTERCEPT_STRSPN
 INTERCEPTOR(SIZE_T, strspn, const char *s1, const char *s2) {
   void *ctx;
@@ -412,6 +552,64 @@
 #define INIT_STRPBRK
 #endif
 
+#if SANITIZER_INTERCEPT_MEMSET
+INTERCEPTOR(void*, memset, void *dst, int v, uptr size) {
+  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
+    return internal_memset(dst, v, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, memset, dst, v, size);
+  if (common_flags()->intercept_intrin)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);
+  return REAL(memset)(dst, v, size);
+}
+
+#define INIT_MEMSET COMMON_INTERCEPT_FUNCTION(memset)
+#else
+#define INIT_MEMSET
+#endif
+
+#if SANITIZER_INTERCEPT_MEMMOVE
+INTERCEPTOR(void*, memmove, void *dst, const void *src, uptr size) {
+  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
+    return internal_memmove(dst, src, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, memmove, dst, src, size);
+  if (common_flags()->intercept_intrin) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size);
+  }
+  return REAL(memmove)(dst, src, size);
+}
+
+#define INIT_MEMMOVE COMMON_INTERCEPT_FUNCTION(memmove)
+#else
+#define INIT_MEMMOVE
+#endif
+
+#if SANITIZER_INTERCEPT_MEMCPY
+INTERCEPTOR(void*, memcpy, void *dst, const void *src, uptr size) {
+  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) {
+    // On OS X, calling internal_memcpy here will cause memory corruptions,
+    // because memcpy and memmove are actually aliases of the same
+    // implementation.  We need to use internal_memmove here.
+    return internal_memmove(dst, src, size);
+  }
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, memcpy, dst, src, size);
+  if (common_flags()->intercept_intrin) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size);
+  }
+  // N.B.: If we switch this to internal_ we'll have to use internal_memmove
+  // due to memcpy being an alias of memmove on OS X.
+  return REAL(memcpy)(dst, src, size);
+}
+
+#define INIT_MEMCPY COMMON_INTERCEPT_FUNCTION(memcpy)
+#else
+#define INIT_MEMCPY
+#endif
+
 #if SANITIZER_INTERCEPT_MEMCMP
 
 DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memcmp, uptr called_pc,
@@ -465,7 +663,16 @@
     return internal_memchr(s, c, n);
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, memchr, s, c, n);
+#if SANITIZER_WINDOWS
+  void *res;
+  if (REAL(memchr)) {
+    res = REAL(memchr)(s, c, n);
+  } else {
+    res = internal_memchr(s, c, n);
+  }
+#else
   void *res = REAL(memchr)(s, c, n);
+#endif
   uptr len = res ? (char *)res - (const char *)s + 1 : n;
   COMMON_INTERCEPTOR_READ_RANGE(ctx, s, len);
   return res;
@@ -529,7 +736,7 @@
 
 #define INIT_FREXPF_FREXPL           \
   COMMON_INTERCEPT_FUNCTION(frexpf); \
-  COMMON_INTERCEPT_FUNCTION(frexpl)
+  COMMON_INTERCEPT_FUNCTION_LDBL(frexpl)
 #else
 #define INIT_FREXPF_FREXPL
 #endif  // SANITIZER_INTERCEPT_FREXPF_FREXPL
@@ -2308,7 +2515,7 @@
 #define INIT_MODF                   \
   COMMON_INTERCEPT_FUNCTION(modf);  \
   COMMON_INTERCEPT_FUNCTION(modff); \
-  COMMON_INTERCEPT_FUNCTION(modfl);
+  COMMON_INTERCEPT_FUNCTION_LDBL(modfl);
 #else
 #define INIT_MODF
 #endif
@@ -2349,6 +2556,75 @@
 #define INIT_RECVMSG
 #endif
 
+#if SANITIZER_INTERCEPT_SENDMSG
+static void read_msghdr_control(void *ctx, void *control, uptr controllen) {
+  const unsigned kCmsgDataOffset =
+      RoundUpTo(sizeof(__sanitizer_cmsghdr), sizeof(uptr));
+
+  char *p = (char *)control;
+  char *const control_end = p + controllen;
+  while (true) {
+    if (p + sizeof(__sanitizer_cmsghdr) > control_end) break;
+    __sanitizer_cmsghdr *cmsg = (__sanitizer_cmsghdr *)p;
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, &cmsg->cmsg_len, sizeof(cmsg->cmsg_len));
+
+    if (p + RoundUpTo(cmsg->cmsg_len, sizeof(uptr)) > control_end) break;
+
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, &cmsg->cmsg_level,
+                                  sizeof(cmsg->cmsg_level));
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, &cmsg->cmsg_type,
+                                  sizeof(cmsg->cmsg_type));
+
+    if (cmsg->cmsg_len > kCmsgDataOffset) {
+      char *data = p + kCmsgDataOffset;
+      unsigned data_len = cmsg->cmsg_len - kCmsgDataOffset;
+      if (data_len > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, data, data_len);
+    }
+
+    p += RoundUpTo(cmsg->cmsg_len, sizeof(uptr));
+  }
+}
+
+static void read_msghdr(void *ctx, struct __sanitizer_msghdr *msg,
+                        SSIZE_T maxlen) {
+#define R(f) \
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, &msg->msg_##f, sizeof(msg->msg_##f))
+  R(name);
+  R(namelen);
+  R(iov);
+  R(iovlen);
+  R(control);
+  R(controllen);
+  R(flags);
+#undef R
+  if (msg->msg_name && msg->msg_namelen)
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, msg->msg_name, msg->msg_namelen);
+  if (msg->msg_iov && msg->msg_iovlen)
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, msg->msg_iov,
+                                  sizeof(*msg->msg_iov) * msg->msg_iovlen);
+  read_iovec(ctx, msg->msg_iov, msg->msg_iovlen, maxlen);
+  if (msg->msg_control && msg->msg_controllen)
+    read_msghdr_control(ctx, msg->msg_control, msg->msg_controllen);
+}
+
+INTERCEPTOR(SSIZE_T, sendmsg, int fd, struct __sanitizer_msghdr *msg,
+            int flags) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, sendmsg, fd, msg, flags);
+  if (fd >= 0) {
+    COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+    COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
+  }
+  SSIZE_T res = REAL(sendmsg)(fd, msg, flags);
+  if (common_flags()->intercept_send && res >= 0 && msg)
+    read_msghdr(ctx, msg, res);
+  return res;
+}
+#define INIT_SENDMSG COMMON_INTERCEPT_FUNCTION(sendmsg);
+#else
+#define INIT_SENDMSG
+#endif
+
 #if SANITIZER_INTERCEPT_GETPEERNAME
 INTERCEPTOR(int, getpeername, int sockfd, void *addr, unsigned *addrlen) {
   void *ctx;
@@ -3969,7 +4245,7 @@
 #define INIT_SINCOS                   \
   COMMON_INTERCEPT_FUNCTION(sincos);  \
   COMMON_INTERCEPT_FUNCTION(sincosf); \
-  COMMON_INTERCEPT_FUNCTION(sincosl);
+  COMMON_INTERCEPT_FUNCTION_LDBL(sincosl);
 #else
 #define INIT_SINCOS
 #endif
@@ -4008,7 +4284,7 @@
 #define INIT_REMQUO                   \
   COMMON_INTERCEPT_FUNCTION(remquo);  \
   COMMON_INTERCEPT_FUNCTION(remquof); \
-  COMMON_INTERCEPT_FUNCTION(remquol);
+  COMMON_INTERCEPT_FUNCTION_LDBL(remquol);
 #else
 #define INIT_REMQUO
 #endif
@@ -4039,7 +4315,7 @@
 #define INIT_LGAMMA                   \
   COMMON_INTERCEPT_FUNCTION(lgamma);  \
   COMMON_INTERCEPT_FUNCTION(lgammaf); \
-  COMMON_INTERCEPT_FUNCTION(lgammal);
+  COMMON_INTERCEPT_FUNCTION_LDBL(lgammal);
 #else
 #define INIT_LGAMMA
 #endif
@@ -4083,7 +4359,7 @@
   if (signp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, signp, sizeof(*signp));
   return res;
 }
-#define INIT_LGAMMAL_R COMMON_INTERCEPT_FUNCTION(lgammal_r);
+#define INIT_LGAMMAL_R COMMON_INTERCEPT_FUNCTION_LDBL(lgammal_r);
 #else
 #define INIT_LGAMMAL_R
 #endif
@@ -4224,6 +4500,7 @@
 #endif
 
 #if SANITIZER_INTERCEPT_TLS_GET_ADDR
+#if !SANITIZER_S390
 #define INIT_TLS_GET_ADDR COMMON_INTERCEPT_FUNCTION(__tls_get_addr)
 // If you see any crashes around this functions, there are 2 known issues with
 // it: 1. __tls_get_addr can be called with mis-aligned stack due to:
@@ -4244,6 +4521,67 @@
   }
   return res;
 }
+#if SANITIZER_PPC
+// On PowerPC, we also need to intercept __tls_get_addr_opt, which has
+// mostly the same semantics as __tls_get_addr, but its presence enables
+// some optimizations in linker (which are safe to ignore here).
+extern "C" __attribute__((alias("__interceptor___tls_get_addr"),
+                          visibility("default")))
+void *__tls_get_addr_opt(void *arg);
+#endif
+#else // SANITIZER_S390
+// On s390, we have to intercept two functions here:
+// - __tls_get_addr_internal, which is a glibc-internal function that is like
+//   the usual __tls_get_addr, but returns a TP-relative offset instead of
+//   a proper pointer.  It is used by dlsym for TLS symbols.
+// - __tls_get_offset, which is like the above, but also takes a GOT-relative
+//   descriptor offset as an argument instead of a pointer.  GOT address
+//   is passed in r12, so it's necessary to write it in assembly.  This is
+//   the function used by the compiler.
+#define INIT_TLS_GET_ADDR COMMON_INTERCEPT_FUNCTION(__tls_get_addr_internal)
+INTERCEPTOR(uptr, __tls_get_addr_internal, void *arg) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, __tls_get_addr_internal, arg);
+  uptr res = REAL(__tls_get_addr_internal)(arg);
+  uptr tp = reinterpret_cast<uptr>(__builtin_thread_pointer());
+  void *ptr = reinterpret_cast<void *>(res + tp);
+  uptr tls_begin, tls_end;
+  COMMON_INTERCEPTOR_GET_TLS_RANGE(&tls_begin, &tls_end);
+  DTLS::DTV *dtv = DTLS_on_tls_get_addr(arg, ptr, tls_begin, tls_end);
+  if (dtv) {
+    // New DTLS block has been allocated.
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE((void *)dtv->beg, dtv->size);
+  }
+  return res;
+}
+// We need a protected symbol aliasing the above, so that we can jump
+// directly to it from the assembly below.
+extern "C" __attribute__((alias("__interceptor___tls_get_addr_internal"),
+                          visibility("protected")))
+uptr __interceptor___tls_get_addr_internal_protected(void *arg);
+// Now carefully intercept __tls_get_offset.
+asm(
+  ".text\n"
+  ".global __tls_get_offset\n"
+  "__tls_get_offset:\n"
+// The __intercept_ version has to exist, so that gen_dynamic_list.py
+// exports our symbol.
+  ".global __interceptor___tls_get_offset\n"
+  "__interceptor___tls_get_offset:\n"
+#ifdef __s390x__
+  "la %r2, 0(%r2,%r12)\n"
+  "jg __interceptor___tls_get_addr_internal_protected\n"
+#else
+  "basr %r3,0\n"
+  "0: la %r2,0(%r2,%r12)\n"
+  "l %r4,1f-0b(%r3)\n"
+  "b 0(%r4,%r3)\n"
+  "1: .long __interceptor___tls_get_addr_internal_protected - 0b\n"
+#endif
+  ".type __tls_get_offset, @function\n"
+  ".size __tls_get_offset, .-__tls_get_offset\n"
+);
+#endif // SANITIZER_S390
 #else
 #define INIT_TLS_GET_ADDR
 #endif
@@ -5342,22 +5680,215 @@
 #define INIT_CTERMID_R
 #endif
 
+#if SANITIZER_INTERCEPT_RECV_RECVFROM
+INTERCEPTOR(SSIZE_T, recv, int fd, void *buf, SIZE_T len, int flags) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, recv, fd, buf, len, flags);
+  COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+  SSIZE_T res = REAL(recv)(fd, buf, len, flags);
+  if (res > 0) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, Min((SIZE_T)res, len));
+  }
+  if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
+  return res;
+}
+
+INTERCEPTOR(SSIZE_T, recvfrom, int fd, void *buf, SIZE_T len, int flags,
+            void *srcaddr, int *addrlen) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, recvfrom, fd, buf, len, flags, srcaddr,
+                           addrlen);
+  COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+  SIZE_T srcaddr_sz;
+  if (srcaddr) srcaddr_sz = *addrlen;
+  (void)srcaddr_sz;  // prevent "set but not used" warning
+  SSIZE_T res = REAL(recvfrom)(fd, buf, len, flags, srcaddr, addrlen);
+  if (res > 0) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, Min((SIZE_T)res, len));
+    if (srcaddr)
+      COMMON_INTERCEPTOR_INITIALIZE_RANGE(srcaddr,
+                                          Min((SIZE_T)*addrlen, srcaddr_sz));
+  }
+  return res;
+}
+#define INIT_RECV_RECVFROM          \
+  COMMON_INTERCEPT_FUNCTION(recv);  \
+  COMMON_INTERCEPT_FUNCTION(recvfrom);
+#else
+#define INIT_RECV_RECVFROM
+#endif
+
+#if SANITIZER_INTERCEPT_SEND_SENDTO
+INTERCEPTOR(SSIZE_T, send, int fd, void *buf, SIZE_T len, int flags) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, send, fd, buf, len, flags);
+  if (fd >= 0) {
+    COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+    COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
+  }
+  SSIZE_T res = REAL(send)(fd, buf, len, flags);
+  if (common_flags()->intercept_send && res > 0)
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, Min((SIZE_T)res, len));
+  return res;
+}
+
+INTERCEPTOR(SSIZE_T, sendto, int fd, void *buf, SIZE_T len, int flags,
+            void *dstaddr, int addrlen) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, sendto, fd, buf, len, flags, dstaddr, addrlen);
+  if (fd >= 0) {
+    COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+    COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
+  }
+  // Can't check dstaddr as it may have uninitialized padding at the end.
+  SSIZE_T res = REAL(sendto)(fd, buf, len, flags, dstaddr, addrlen);
+  if (common_flags()->intercept_send && res > 0)
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, Min((SIZE_T)res, len));
+  return res;
+}
+#define INIT_SEND_SENDTO           \
+  COMMON_INTERCEPT_FUNCTION(send); \
+  COMMON_INTERCEPT_FUNCTION(sendto);
+#else
+#define INIT_SEND_SENDTO
+#endif
+
+#if SANITIZER_INTERCEPT_EVENTFD_READ_WRITE
+INTERCEPTOR(int, eventfd_read, int fd, u64 *value) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, eventfd_read, fd, value);
+  COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+  int res = REAL(eventfd_read)(fd, value);
+  if (res == 0) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, sizeof(*value));
+    if (fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
+  }
+  return res;
+}
+INTERCEPTOR(int, eventfd_write, int fd, u64 value) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, eventfd_write, fd, value);
+  if (fd >= 0) {
+    COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+    COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
+  }
+  int res = REAL(eventfd_write)(fd, value);
+  return res;
+}
+#define INIT_EVENTFD_READ_WRITE            \
+  COMMON_INTERCEPT_FUNCTION(eventfd_read); \
+  COMMON_INTERCEPT_FUNCTION(eventfd_write)
+#else
+#define INIT_EVENTFD_READ_WRITE
+#endif
+
+#if SANITIZER_INTERCEPT_STAT
+INTERCEPTOR(int, stat, const char *path, void *buf) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, stat, path, buf);
+  if (common_flags()->intercept_stat)
+    COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
+  int res = REAL(stat)(path, buf);
+  if (!res)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat_sz);
+  return res;
+}
+#define INIT_STAT COMMON_INTERCEPT_FUNCTION(stat)
+#else
+#define INIT_STAT
+#endif
+
+#if SANITIZER_INTERCEPT___XSTAT
+INTERCEPTOR(int, __xstat, int version, const char *path, void *buf) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, __xstat, version, path, buf);
+  if (common_flags()->intercept_stat)
+    COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
+  int res = REAL(__xstat)(version, path, buf);
+  if (!res)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat_sz);
+  return res;
+}
+#define INIT___XSTAT COMMON_INTERCEPT_FUNCTION(__xstat)
+#else
+#define INIT___XSTAT
+#endif
+
+#if SANITIZER_INTERCEPT___XSTAT64
+INTERCEPTOR(int, __xstat64, int version, const char *path, void *buf) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, __xstat64, version, path, buf);
+  if (common_flags()->intercept_stat)
+    COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
+  int res = REAL(__xstat64)(version, path, buf);
+  if (!res)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat64_sz);
+  return res;
+}
+#define INIT___XSTAT64 COMMON_INTERCEPT_FUNCTION(__xstat64)
+#else
+#define INIT___XSTAT64
+#endif
+
+#if SANITIZER_INTERCEPT___LXSTAT
+INTERCEPTOR(int, __lxstat, int version, const char *path, void *buf) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, __lxstat, version, path, buf);
+  if (common_flags()->intercept_stat)
+    COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
+  int res = REAL(__lxstat)(version, path, buf);
+  if (!res)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat_sz);
+  return res;
+}
+#define INIT___LXSTAT COMMON_INTERCEPT_FUNCTION(__lxstat)
+#else
+#define INIT___LXSTAT
+#endif
+
+#if SANITIZER_INTERCEPT___LXSTAT64
+INTERCEPTOR(int, __lxstat64, int version, const char *path, void *buf) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, __lxstat64, version, path, buf);
+  if (common_flags()->intercept_stat)
+    COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
+  int res = REAL(__lxstat64)(version, path, buf);
+  if (!res)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat64_sz);
+  return res;
+}
+#define INIT___LXSTAT64 COMMON_INTERCEPT_FUNCTION(__lxstat64)
+#else
+#define INIT___LXSTAT64
+#endif
+
+// FIXME: add other *stat interceptor
+
 static void InitializeCommonInterceptors() {
   static u64 metadata_mem[sizeof(MetadataHashMap) / sizeof(u64) + 1];
   interceptor_metadata_map = new((void *)&metadata_mem) MetadataHashMap();
 
   INIT_TEXTDOMAIN;
+  INIT_STRLEN;
+  INIT_STRNLEN;
   INIT_STRCMP;
   INIT_STRNCMP;
   INIT_STRCASECMP;
   INIT_STRNCASECMP;
   INIT_STRSTR;
   INIT_STRCASESTR;
+  INIT_STRCHR;
+  INIT_STRCHRNUL;
+  INIT_STRRCHR;
   INIT_STRSPN;
   INIT_STRPBRK;
+  INIT_MEMSET;
+  INIT_MEMMOVE;
+  INIT_MEMCPY;
   INIT_MEMCHR;
   INIT_MEMCMP;
   INIT_MEMRCHR;
+  INIT_MEMMEM;
   INIT_READ;
   INIT_PREAD;
   INIT_PREAD64;
@@ -5407,6 +5938,7 @@
   INIT_ACCEPT4;
   INIT_MODF;
   INIT_RECVMSG;
+  INIT_SENDMSG;
   INIT_GETPEERNAME;
   INIT_IOCTL;
   INIT_INET_ATON;
@@ -5518,4 +6050,13 @@
   INIT_PROCESS_VM_READV;
   INIT_CTERMID;
   INIT_CTERMID_R;
+  INIT_RECV_RECVFROM;
+  INIT_SEND_SENDTO;
+  INIT_STAT;
+  INIT_EVENTFD_READ_WRITE;
+  INIT___XSTAT;
+  INIT___XSTAT64;
+  INIT___LXSTAT;
+  INIT___LXSTAT64;
+  // FIXME: add other *stat interceptors.
 }
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc b/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
index fcd0a3d..4ed9afe 100644
--- a/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
+++ b/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
@@ -53,25 +53,9 @@
   _(FIONBIO, READ, sizeof(int));
   _(FIONCLEX, NONE, 0);
   _(FIOSETOWN, READ, sizeof(int));
-  _(SIOCADDMULTI, READ, struct_ifreq_sz);
   _(SIOCATMARK, WRITE, sizeof(int));
-  _(SIOCDELMULTI, READ, struct_ifreq_sz);
-  _(SIOCGIFADDR, WRITE, struct_ifreq_sz);
-  _(SIOCGIFBRDADDR, WRITE, struct_ifreq_sz);
   _(SIOCGIFCONF, CUSTOM, 0);
-  _(SIOCGIFDSTADDR, WRITE, struct_ifreq_sz);
-  _(SIOCGIFFLAGS, WRITE, struct_ifreq_sz);
-  _(SIOCGIFMETRIC, WRITE, struct_ifreq_sz);
-  _(SIOCGIFMTU, WRITE, struct_ifreq_sz);
-  _(SIOCGIFNETMASK, WRITE, struct_ifreq_sz);
   _(SIOCGPGRP, WRITE, sizeof(int));
-  _(SIOCSIFADDR, READ, struct_ifreq_sz);
-  _(SIOCSIFBRDADDR, READ, struct_ifreq_sz);
-  _(SIOCSIFDSTADDR, READ, struct_ifreq_sz);
-  _(SIOCSIFFLAGS, READ, struct_ifreq_sz);
-  _(SIOCSIFMETRIC, READ, struct_ifreq_sz);
-  _(SIOCSIFMTU, READ, struct_ifreq_sz);
-  _(SIOCSIFNETMASK, READ, struct_ifreq_sz);
   _(SIOCSPGRP, READ, sizeof(int));
   _(TIOCCONS, NONE, 0);
   _(TIOCEXCL, NONE, 0);
@@ -92,6 +76,25 @@
   _(TIOCSTI, READ, sizeof(char));
   _(TIOCSWINSZ, READ, struct_winsize_sz);
 
+#if !SANITIZER_IOS
+  _(SIOCADDMULTI, READ, struct_ifreq_sz);
+  _(SIOCDELMULTI, READ, struct_ifreq_sz);
+  _(SIOCGIFADDR, WRITE, struct_ifreq_sz);
+  _(SIOCGIFBRDADDR, WRITE, struct_ifreq_sz);
+  _(SIOCGIFDSTADDR, WRITE, struct_ifreq_sz);
+  _(SIOCGIFFLAGS, WRITE, struct_ifreq_sz);
+  _(SIOCGIFMETRIC, WRITE, struct_ifreq_sz);
+  _(SIOCGIFMTU, WRITE, struct_ifreq_sz);
+  _(SIOCGIFNETMASK, WRITE, struct_ifreq_sz);
+  _(SIOCSIFADDR, READ, struct_ifreq_sz);
+  _(SIOCSIFBRDADDR, READ, struct_ifreq_sz);
+  _(SIOCSIFDSTADDR, READ, struct_ifreq_sz);
+  _(SIOCSIFFLAGS, READ, struct_ifreq_sz);
+  _(SIOCSIFMETRIC, READ, struct_ifreq_sz);
+  _(SIOCSIFMTU, READ, struct_ifreq_sz);
+  _(SIOCSIFNETMASK, READ, struct_ifreq_sz);
+#endif
+
 #if (SANITIZER_LINUX && !SANITIZER_ANDROID)
   _(SIOCGETSGCNT, WRITE, struct_sioc_sg_req_sz);
   _(SIOCGETVIFCNT, WRITE, struct_sioc_vif_req_sz);
@@ -580,7 +583,8 @@
     return;
   if (request == IOCTL_SIOCGIFCONF) {
     struct __sanitizer_ifconf *ifc = (__sanitizer_ifconf *)arg;
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, &ifc->ifc_len, sizeof(ifc->ifc_len));
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, (char*)&ifc->ifc_len,
+                                  sizeof(ifc->ifc_len));
   }
 }
 
diff --git a/lib/sanitizer_common/sanitizer_common_syscalls.inc b/lib/sanitizer_common/sanitizer_common_syscalls.inc
index 008e577..469c8eb 100644
--- a/lib/sanitizer_common/sanitizer_common_syscalls.inc
+++ b/lib/sanitizer_common/sanitizer_common_syscalls.inc
@@ -1237,17 +1237,15 @@
 PRE_SYSCALL(pipe)(void *fildes) {}
 
 POST_SYSCALL(pipe)(long res, void *fildes) {
-  if (res >= 0) {
-    if (fildes) POST_WRITE(fildes, sizeof(int));
-  }
+  if (res >= 0)
+    if (fildes) POST_WRITE(fildes, sizeof(int) * 2);
 }
 
 PRE_SYSCALL(pipe2)(void *fildes, long flags) {}
 
 POST_SYSCALL(pipe2)(long res, void *fildes, long flags) {
-  if (res >= 0) {
-    if (fildes) POST_WRITE(fildes, sizeof(int));
-  }
+  if (res >= 0)
+    if (fildes) POST_WRITE(fildes, sizeof(int) * 2);
 }
 
 PRE_SYSCALL(dup)(long fildes) {}
@@ -1880,13 +1878,11 @@
 
 POST_SYSCALL(socket)(long res, long arg0, long arg1, long arg2) {}
 
-PRE_SYSCALL(socketpair)(long arg0, long arg1, long arg2, void *arg3) {}
+PRE_SYSCALL(socketpair)(long arg0, long arg1, long arg2, int *sv) {}
 
-POST_SYSCALL(socketpair)(long res, long arg0, long arg1, long arg2,
-                         void *arg3) {
-  if (res >= 0) {
-    if (arg3) POST_WRITE(arg3, sizeof(int));
-  }
+POST_SYSCALL(socketpair)(long res, long arg0, long arg1, long arg2, int *sv) {
+  if (res >= 0)
+    if (sv) POST_WRITE(sv, sizeof(int) * 2);
 }
 
 PRE_SYSCALL(socketcall)(long call, void *args) {}
@@ -2301,7 +2297,7 @@
 PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) {
 #if !SANITIZER_ANDROID && \
     (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
-     defined(__powerpc64__) || defined(__aarch64__))
+     defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__))
   if (data) {
     if (request == ptrace_setregs) {
       PRE_READ((void *)data, struct_user_regs_struct_sz);
@@ -2322,7 +2318,7 @@
 POST_SYSCALL(ptrace)(long res, long request, long pid, long addr, long data) {
 #if !SANITIZER_ANDROID && \
     (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
-     defined(__powerpc64__) || defined(__aarch64__))
+     defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__))
   if (res >= 0 && data) {
     // Note that this is different from the interceptor in
     // sanitizer_common_interceptors.inc.
@@ -2844,6 +2840,40 @@
 POST_SYSCALL(vfork)(long res) {
   COMMON_SYSCALL_POST_FORK(res);
 }
+
+PRE_SYSCALL(sigaction)(long signum, const __sanitizer_kernel_sigaction_t *act,
+                       __sanitizer_kernel_sigaction_t *oldact) {
+  if (act) {
+    PRE_READ(&act->sigaction, sizeof(act->sigaction));
+    PRE_READ(&act->sa_flags, sizeof(act->sa_flags));
+    PRE_READ(&act->sa_mask, sizeof(act->sa_mask));
+  }
+}
+
+POST_SYSCALL(sigaction)(long res, long signum,
+                        const __sanitizer_kernel_sigaction_t *act,
+                        __sanitizer_kernel_sigaction_t *oldact) {
+  if (res >= 0 && oldact) POST_WRITE(oldact, sizeof(*oldact));
+}
+
+PRE_SYSCALL(rt_sigaction)(long signum,
+                          const __sanitizer_kernel_sigaction_t *act,
+                          __sanitizer_kernel_sigaction_t *oldact, SIZE_T sz) {
+  if (act) {
+    PRE_READ(&act->sigaction, sizeof(act->sigaction));
+    PRE_READ(&act->sa_flags, sizeof(act->sa_flags));
+    PRE_READ(&act->sa_mask, sz);
+  }
+}
+
+POST_SYSCALL(rt_sigaction)(long res, long signum,
+                           const __sanitizer_kernel_sigaction_t *act,
+                           __sanitizer_kernel_sigaction_t *oldact, SIZE_T sz) {
+  if (res >= 0 && oldact) {
+    SIZE_T oldact_sz = ((char *)&oldact->sa_mask) - ((char *)oldact) + sz;
+    POST_WRITE(oldact, oldact_sz);
+  }
+}
 }  // extern "C"
 
 #undef PRE_SYSCALL
diff --git a/lib/sanitizer_common/sanitizer_coverage_libcdep.cc b/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
index eaa1446..2c69788 100644
--- a/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
@@ -49,6 +49,8 @@
 
 static const u64 kMagic64 = 0xC0BFFFFFFFFFFF64ULL;
 static const u64 kMagic32 = 0xC0BFFFFFFFFFFF32ULL;
+static const uptr kNumWordsForMagic = SANITIZER_WORDSIZE == 64 ? 1 : 2;
+static const u64 kMagic = SANITIZER_WORDSIZE == 64 ? kMagic64 : kMagic32;
 
 static atomic_uint32_t dump_once_guard;  // Ensure that CovDump runs only once.
 
@@ -107,18 +109,25 @@
   uptr Update8bitCounterBitsetAndClearCounters(u8 *bitset);
 
   uptr *data();
-  uptr size();
-  uptr *buffer() const { return pc_buffer; }
+  uptr size() const;
+
+  void SetPcBuffer(uptr* data, uptr length);
 
  private:
+  struct NamedPcRange {
+    const char *copied_module_name;
+    uptr beg, end; // elements [beg,end) in pc_array.
+  };
+
   void DirectOpen();
   void UpdateModuleNameVec(uptr caller_pc, uptr range_beg, uptr range_end);
+  void GetRangeOffsets(const NamedPcRange& r, Symbolizer* s,
+      InternalMmapVector<uptr>* offsets) const;
 
   // Maximal size pc array may ever grow.
   // We MmapNoReserve this space to ensure that the array is contiguous.
-  static const uptr kPcArrayMaxSize = FIRST_32_SECOND_64(
-      1 << (SANITIZER_ANDROID ? 24 : (SANITIZER_WINDOWS ? 27 : 26)),
-      1 << 27);
+  static const uptr kPcArrayMaxSize =
+      FIRST_32_SECOND_64(1 << (SANITIZER_ANDROID ? 24 : 26), 1 << 27);
   // The amount file mapping for the pc array is grown by.
   static const uptr kPcArrayMmapSize = 64 * 1024;
 
@@ -135,15 +144,11 @@
   fd_t pc_fd;
 
   uptr *pc_buffer;
+  uptr pc_buffer_len;
 
   // Vector of coverage guard arrays, protected by mu.
   InternalMmapVectorNoCtor<s32*> guard_array_vec;
 
-  struct NamedPcRange {
-    const char *copied_module_name;
-    uptr beg, end; // elements [beg,end) in pc_array.
-  };
-
   // Vector of module and compilation unit pc ranges.
   InternalMmapVectorNoCtor<NamedPcRange> comp_unit_name_vec;
   InternalMmapVectorNoCtor<NamedPcRange> module_name_vec;
@@ -213,9 +218,7 @@
   }
 
   pc_buffer = nullptr;
-  if (common_flags()->coverage_pc_buffer)
-    pc_buffer = reinterpret_cast<uptr *>(MmapNoReserveOrDie(
-        sizeof(uptr) * kPcArrayMaxSize, "CovInit::pc_buffer"));
+  pc_buffer_len = 0;
 
   cc_array = reinterpret_cast<uptr **>(MmapNoReserveOrDie(
       sizeof(uptr *) * kCcArrayMaxSize, "CovInit::cc_array"));
@@ -254,10 +257,6 @@
     UnmapOrDie(cc_array, sizeof(uptr *) * kCcArrayMaxSize);
     cc_array = nullptr;
   }
-  if (pc_buffer) {
-    UnmapOrDie(pc_buffer, sizeof(uptr) * kPcArrayMaxSize);
-    pc_buffer = nullptr;
-  }
   if (tr_event_array) {
     UnmapOrDie(tr_event_array,
                sizeof(tr_event_array[0]) * kTrEventArrayMaxSize +
@@ -426,7 +425,7 @@
            atomic_load(&pc_array_size, memory_order_acquire));
   uptr counter = atomic_fetch_add(&coverage_counter, 1, memory_order_relaxed);
   pc_array[idx] = BundlePcAndCounter(pc, counter);
-  if (pc_buffer) pc_buffer[counter] = pc;
+  if (pc_buffer && counter < pc_buffer_len) pc_buffer[counter] = pc;
 }
 
 // Registers a pair caller=>callee.
@@ -525,7 +524,7 @@
   return pc_array;
 }
 
-uptr CoverageData::size() {
+uptr CoverageData::size() const {
   return atomic_load(&pc_array_index, memory_order_relaxed);
 }
 
@@ -755,41 +754,96 @@
   }
 }
 
+
+void CoverageData::GetRangeOffsets(const NamedPcRange& r, Symbolizer* sym,
+    InternalMmapVector<uptr>* offsets) const {
+  offsets->clear();
+  for (uptr i = 0; i < kNumWordsForMagic; i++)
+    offsets->push_back(0);
+  CHECK(r.copied_module_name);
+  CHECK_LE(r.beg, r.end);
+  CHECK_LE(r.end, size());
+  for (uptr i = r.beg; i < r.end; i++) {
+    uptr pc = UnbundlePc(pc_array[i]);
+    uptr counter = UnbundleCounter(pc_array[i]);
+    if (!pc) continue; // Not visited.
+    uptr offset = 0;
+    sym->GetModuleNameAndOffsetForPC(pc, nullptr, &offset);
+    offsets->push_back(BundlePcAndCounter(offset, counter));
+  }
+
+  CHECK_GE(offsets->size(), kNumWordsForMagic);
+  SortArray(offsets->data(), offsets->size());
+  for (uptr i = 0; i < offsets->size(); i++)
+    (*offsets)[i] = UnbundlePc((*offsets)[i]);
+}
+
+static void GenerateHtmlReport(const InternalMmapVector<char *> &cov_files) {
+  if (!common_flags()->html_cov_report) {
+    return;
+  }
+  char *sancov_path = FindPathToBinary(common_flags()->sancov_path);
+  if (sancov_path == nullptr) {
+    return;
+  }
+
+  InternalMmapVector<char *> sancov_argv(cov_files.size() * 2 + 3);
+  sancov_argv.push_back(sancov_path);
+  sancov_argv.push_back(internal_strdup("-html-report"));
+  auto argv_deleter = at_scope_exit([&] {
+    for (uptr i = 0; i < sancov_argv.size(); ++i) {
+      InternalFree(sancov_argv[i]);
+    }
+  });
+
+  for (const auto &cov_file : cov_files) {
+    sancov_argv.push_back(internal_strdup(cov_file));
+  }
+
+  {
+    ListOfModules modules;
+    modules.init();
+    for (const LoadedModule &module : modules) {
+      sancov_argv.push_back(internal_strdup(module.full_name()));
+    }
+  }
+
+  InternalScopedString report_path(kMaxPathLength);
+  fd_t report_fd =
+      CovOpenFile(&report_path, false /* packed */, GetProcessName(), "html");
+  int pid = StartSubprocess(sancov_argv[0], sancov_argv.data(),
+                            kInvalidFd /* stdin */, report_fd /* std_out */);
+  if (pid > 0) {
+    int result = WaitForProcess(pid);
+    if (result == 0)
+      Printf("coverage report generated to %s\n", report_path.data());
+  }
+}
+
 void CoverageData::DumpOffsets() {
   auto sym = Symbolizer::GetOrInit();
   if (!common_flags()->coverage_pcs) return;
   CHECK_NE(sym, nullptr);
   InternalMmapVector<uptr> offsets(0);
   InternalScopedString path(kMaxPathLength);
-  for (uptr m = 0; m < module_name_vec.size(); m++) {
-    offsets.clear();
-    uptr num_words_for_magic = SANITIZER_WORDSIZE == 64 ? 1 : 2;
-    for (uptr i = 0; i < num_words_for_magic; i++)
-      offsets.push_back(0);
-    auto r = module_name_vec[m];
-    CHECK(r.copied_module_name);
-    CHECK_LE(r.beg, r.end);
-    CHECK_LE(r.end, size());
-    for (uptr i = r.beg; i < r.end; i++) {
-      uptr pc = UnbundlePc(pc_array[i]);
-      uptr counter = UnbundleCounter(pc_array[i]);
-      if (!pc) continue; // Not visited.
-      uptr offset = 0;
-      sym->GetModuleNameAndOffsetForPC(pc, nullptr, &offset);
-      offsets.push_back(BundlePcAndCounter(offset, counter));
+
+  InternalMmapVector<char *> cov_files(module_name_vec.size());
+  auto cov_files_deleter = at_scope_exit([&] {
+    for (uptr i = 0; i < cov_files.size(); ++i) {
+      InternalFree(cov_files[i]);
     }
+  });
 
-    CHECK_GE(offsets.size(), num_words_for_magic);
-    SortArray(offsets.data(), offsets.size());
-    for (uptr i = 0; i < offsets.size(); i++)
-      offsets[i] = UnbundlePc(offsets[i]);
+  for (uptr m = 0; m < module_name_vec.size(); m++) {
+    auto r = module_name_vec[m];
+    GetRangeOffsets(r, sym, &offsets);
 
-    uptr num_offsets = offsets.size() - num_words_for_magic;
+    uptr num_offsets = offsets.size() - kNumWordsForMagic;
     u64 *magic_p = reinterpret_cast<u64*>(offsets.data());
     CHECK_EQ(*magic_p, 0ULL);
     // FIXME: we may want to write 32-bit offsets even in 64-mode
     // if all the offsets are small enough.
-    *magic_p = SANITIZER_WORDSIZE == 64 ? kMagic64 : kMagic32;
+    *magic_p = kMagic;
 
     const char *module_name = StripModuleName(r.copied_module_name);
     if (cov_sandboxed) {
@@ -804,11 +858,14 @@
       if (fd == kInvalidFd) continue;
       WriteToFile(fd, offsets.data(), offsets.size() * sizeof(offsets[0]));
       CloseFile(fd);
+      cov_files.push_back(internal_strdup(path.data()));
       VReport(1, " CovDump: %s: %zd PCs written\n", path.data(), num_offsets);
     }
   }
   if (cov_fd != kInvalidFd)
     CloseFile(cov_fd);
+
+  GenerateHtmlReport(cov_files);
 }
 
 void CoverageData::DumpAll() {
@@ -822,6 +879,11 @@
   DumpCallerCalleePairs();
 }
 
+void CoverageData::SetPcBuffer(uptr* data, uptr length) {
+  pc_buffer = data;
+  pc_buffer_len = length;
+}
+
 void CovPrepareForSandboxing(__sanitizer_sandbox_arguments *args) {
   if (!args) return;
   if (!coverage_enabled) return;
@@ -957,8 +1019,12 @@
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-uptr __sanitizer_get_coverage_pc_buffer(uptr **data) {
-  *data = coverage_data.buffer();
+void __sanitizer_set_coverage_pc_buffer(uptr *data, uptr length) {
+  coverage_data.SetPcBuffer(data, length);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+uptr __sanitizer_get_coverage_pc_buffer_pos() {
   return __sanitizer_get_total_unique_coverage();
 }
 
diff --git a/lib/sanitizer_common/sanitizer_coverage_mapping_libcdep.cc b/lib/sanitizer_common/sanitizer_coverage_mapping_libcdep.cc
index 9a9ec55..3477b06 100644
--- a/lib/sanitizer_common/sanitizer_coverage_mapping_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_coverage_mapping_libcdep.cc
@@ -72,16 +72,13 @@
   InternalScopedString text(kMaxTextSize);
 
   {
-    InternalScopedBuffer<LoadedModule> modules(kMaxNumberOfModules);
-    CHECK(modules.data());
-    int n_modules = GetListOfModules(modules.data(), kMaxNumberOfModules,
-                                     /* filter */ nullptr);
-
     text.append("%d\n", sizeof(uptr) * 8);
-    for (int i = 0; i < n_modules; ++i) {
-      const char *module_name = StripModuleName(modules[i].full_name());
-      uptr base = modules[i].base_address();
-      for (const auto &range : modules[i].ranges()) {
+    ListOfModules modules;
+    modules.init();
+    for (const LoadedModule &module : modules) {
+      const char *module_name = StripModuleName(module.full_name());
+      uptr base = module.base_address();
+      for (const auto &range : module.ranges()) {
         if (range.executable) {
           uptr start = range.beg;
           uptr end = range.end;
@@ -90,7 +87,6 @@
             cached_mapping.SetModuleRange(start, end);
         }
       }
-      modules[i].clear();
     }
   }
 
diff --git a/lib/sanitizer_common/sanitizer_flags.cc b/lib/sanitizer_common/sanitizer_flags.cc
index 84da1f6..913ce3c 100644
--- a/lib/sanitizer_common/sanitizer_flags.cc
+++ b/lib/sanitizer_common/sanitizer_flags.cc
@@ -30,11 +30,6 @@
 
 IntrusiveList<FlagDescription> flag_descriptions;
 
-// If set, the tool will install its own SEGV signal handler by default.
-#ifndef SANITIZER_NEEDS_SEGV
-# define SANITIZER_NEEDS_SEGV 1
-#endif
-
 void CommonFlags::SetDefaults() {
 #define COMMON_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
 #include "sanitizer_flags.inc"
@@ -126,4 +121,10 @@
   RegisterIncludeFlags(parser, cf);
 }
 
+void InitializeCommonFlags(CommonFlags *cf) {
+  // need to record coverage to generate coverage report.
+  cf->coverage |= cf->html_cov_report;
+  SetVerbosity(cf->verbosity);
+}
+
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_flags.h b/lib/sanitizer_common/sanitizer_flags.h
index 64f2e21..503126b 100644
--- a/lib/sanitizer_common/sanitizer_flags.h
+++ b/lib/sanitizer_common/sanitizer_flags.h
@@ -52,6 +52,11 @@
 void RegisterCommonFlags(FlagParser *parser,
                          CommonFlags *cf = &common_flags_dont_use);
 void RegisterIncludeFlags(FlagParser *parser, CommonFlags *cf);
+
+// Should be called after parsing all flags. Sets up common flag values
+// and perform initializations common to all sanitizers (e.g. setting
+// verbosity).
+void InitializeCommonFlags(CommonFlags *cf = &common_flags_dont_use);
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_FLAGS_H
diff --git a/lib/sanitizer_common/sanitizer_flags.inc b/lib/sanitizer_common/sanitizer_flags.inc
index f9726fe..3fcfb83 100644
--- a/lib/sanitizer_common/sanitizer_flags.inc
+++ b/lib/sanitizer_common/sanitizer_flags.inc
@@ -75,7 +75,7 @@
             "If false, disable printing error summaries in addition to error "
             "reports.")
 COMMON_FLAG(bool, check_printf, true, "Check printf arguments.")
-COMMON_FLAG(bool, handle_segv, SANITIZER_NEEDS_SEGV,
+COMMON_FLAG(bool, handle_segv, true,
             "If set, registers the tool's custom SIGSEGV/SIGBUS handler.")
 COMMON_FLAG(bool, handle_abort, false,
             "If set, registers the tool's custom SIGABRT handler.")
@@ -144,9 +144,6 @@
 COMMON_FLAG(const char *, coverage_dir, ".",
             "Target directory for coverage dumps. Defaults to the current "
             "directory.")
-COMMON_FLAG(bool, coverage_pc_buffer, true,
-            "If set (and if 'coverage' is set too), the pcs would be collected "
-            "in a buffer.")
 COMMON_FLAG(bool, full_address_space, false,
             "Sanitize complete address space; "
             "by default kernel area on 32-bit platforms will not be sanitized")
@@ -165,6 +162,11 @@
 COMMON_FLAG(bool, symbolize_vs_style, false,
             "Print file locations in Visual Studio style (e.g: "
             " file(10,42): ...")
+COMMON_FLAG(int, dedup_token_length, 0,
+            "If positive, after printing a stack trace also print a short "
+            "string token based on this number of frames that will simplify "
+            "deduplication of the reports. "
+            "Example: 'DEDUP_TOKEN: foo-bar-main'. Default is 0.")
 COMMON_FLAG(const char *, stack_trace_format, "DEFAULT",
             "Format string used to render stack frames. "
             "See sanitizer_stacktrace_printer.h for the format description. "
@@ -182,19 +184,36 @@
 COMMON_FLAG(bool, intercept_strpbrk, true,
             "If set, uses custom wrappers for strpbrk function "
             "to find more errors.")
+COMMON_FLAG(bool, intercept_strlen, true,
+            "If set, uses custom wrappers for strlen and strnlen functions "
+            "to find more errors.")
+COMMON_FLAG(bool, intercept_strchr, true,
+            "If set, uses custom wrappers for strchr, strchrnul, and strrchr "
+            "functions to find more errors.")
 COMMON_FLAG(bool, intercept_memcmp, true,
             "If set, uses custom wrappers for memcmp function "
             "to find more errors.")
 COMMON_FLAG(bool, strict_memcmp, true,
           "If true, assume that memcmp(p1, p2, n) always reads n bytes before "
           "comparing p1 and p2.")
+COMMON_FLAG(bool, intercept_memmem, true,
+            "If set, uses a wrapper for memmem() to find more errors.")
+COMMON_FLAG(bool, intercept_intrin, true,
+            "If set, uses custom wrappers for memset/memcpy/memmove "
+            "intrinsics to find more errors.")
+COMMON_FLAG(bool, intercept_stat, true,
+            "If set, uses custom wrappers for *stat functions "
+            "to find more errors.")
+COMMON_FLAG(bool, intercept_send, true,
+            "If set, uses custom wrappers for send* functions "
+            "to find more errors.")
 COMMON_FLAG(bool, decorate_proc_maps, false, "If set, decorate sanitizer "
                                              "mappings in /proc/self/maps with "
                                              "user-readable names")
 COMMON_FLAG(int, exitcode, 1, "Override the program exit status if the tool "
                               "found an error")
 COMMON_FLAG(
-    bool, abort_on_error, SANITIZER_MAC,
+    bool, abort_on_error, SANITIZER_ANDROID || SANITIZER_MAC,
     "If set, the tool calls abort() instead of _exit() after printing the "
     "error report.")
 COMMON_FLAG(bool, suppress_equal_pcs, true,
@@ -202,3 +221,5 @@
             "halt_on_error=false mode (asan only).")
 COMMON_FLAG(bool, print_cmdline, false, "Print command line on crash "
             "(asan only).")
+COMMON_FLAG(bool, html_cov_report, false, "Generate html coverage report.")
+COMMON_FLAG(const char *, sancov_path, "sancov", "Sancov tool location.")
diff --git a/lib/sanitizer_common/sanitizer_interface_internal.h b/lib/sanitizer_common/sanitizer_interface_internal.h
index b11ae30..7f43c84 100644
--- a/lib/sanitizer_common/sanitizer_interface_internal.h
+++ b/lib/sanitizer_common/sanitizer_interface_internal.h
@@ -25,6 +25,10 @@
   // The special values are "stdout" and "stderr".
   SANITIZER_INTERFACE_ATTRIBUTE
   void __sanitizer_set_report_path(const char *path);
+  // Tell the tools to write their reports to the provided file descriptor
+  // (casted to void *).
+  SANITIZER_INTERFACE_ATTRIBUTE
+  void __sanitizer_set_report_fd(void *fd);
 
   typedef struct {
       int coverage_sandboxed;
diff --git a/lib/sanitizer_common/sanitizer_internal_defs.h b/lib/sanitizer_common/sanitizer_internal_defs.h
index c78069b..bb25cf4 100644
--- a/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -89,6 +89,7 @@
 typedef int fd_t;
 typedef int error_t;
 #endif
+typedef int pid_t;
 
 // WARNING: OFF_T may be different from OS type off_t, depending on the value of
 // _FILE_OFFSET_BITS. This definition of OFF_T matches the ABI of system calls
@@ -105,7 +106,12 @@
 #if (SANITIZER_WORDSIZE == 64) || SANITIZER_MAC
 typedef uptr operator_new_size_type;
 #else
+# if defined(__s390__) && !defined(__s390x__)
+// Special case: 31-bit s390 has unsigned long as size_t.
+typedef unsigned long operator_new_size_type;
+# else
 typedef u32 operator_new_size_type;
+# endif
 #endif
 
 
@@ -132,7 +138,7 @@
 # define THREADLOCAL   __declspec(thread)
 # define LIKELY(x) (x)
 # define UNLIKELY(x) (x)
-# define PREFETCH(x) /* _mm_prefetch(x, _MM_HINT_NTA) */
+# define PREFETCH(x) /* _mm_prefetch(x, _MM_HINT_NTA) */ (void)0
 #else  // _MSC_VER
 # define ALWAYS_INLINE inline __attribute__((always_inline))
 # define ALIAS(x) __attribute__((alias(x)))
@@ -290,12 +296,12 @@
 }
 #else
 extern "C" void* _ReturnAddress(void);
+extern "C" void* _AddressOfReturnAddress(void);
 # pragma intrinsic(_ReturnAddress)
+# pragma intrinsic(_AddressOfReturnAddress)
 # define GET_CALLER_PC() (uptr)_ReturnAddress()
 // CaptureStackBackTrace doesn't need to know BP on Windows.
-// FIXME: This macro is still used when printing error reports though it's not
-// clear if the BP value is needed in the ASan reports on Windows.
-# define GET_CURRENT_FRAME() (uptr)0xDEADBEEF
+# define GET_CURRENT_FRAME() (((uptr)_AddressOfReturnAddress()) + sizeof(uptr))
 
 extern "C" void __ud2(void);
 # pragma intrinsic(__ud2)
diff --git a/lib/sanitizer_common/sanitizer_libc.cc b/lib/sanitizer_common/sanitizer_libc.cc
index cf31e68..d6c8ea2 100644
--- a/lib/sanitizer_common/sanitizer_libc.cc
+++ b/lib/sanitizer_common/sanitizer_libc.cc
@@ -74,7 +74,7 @@
 
 // Semi-fast bzero for 16-aligned data. Still far from peak performance.
 void internal_bzero_aligned16(void *s, uptr n) {
-  struct S16 { u64 a, b; } ALIGNED(16);
+  struct ALIGNED(16) S16 { u64 a, b; };
   CHECK_EQ((reinterpret_cast<uptr>(s) | n) & 15, 0);
   for (S16 *p = reinterpret_cast<S16*>(s), *end = p + n / 16; p < end; p++) {
     p->a = p->b = 0;
@@ -234,6 +234,12 @@
   return nullptr;
 }
 
+uptr internal_wcslen(const wchar_t *s) {
+  uptr i = 0;
+  while (s[i]) i++;
+  return i;
+}
+
 s64 internal_simple_strtoll(const char *nptr, char **endptr, int base) {
   CHECK_EQ(base, 10);
   while (IsSpace(*nptr)) nptr++;
diff --git a/lib/sanitizer_common/sanitizer_libc.h b/lib/sanitizer_common/sanitizer_libc.h
index 71b8917..9c11fb0 100644
--- a/lib/sanitizer_common/sanitizer_libc.h
+++ b/lib/sanitizer_common/sanitizer_libc.h
@@ -51,6 +51,7 @@
 uptr internal_strnlen(const char *s, uptr maxlen);
 char *internal_strrchr(const char *s, int c);
 // This is O(N^2), but we are not using it in hot places.
+uptr internal_wcslen(const wchar_t *s);
 char *internal_strstr(const char *haystack, const char *needle);
 // Works only for base=10 and doesn't set errno.
 s64 internal_simple_strtoll(const char *nptr, char **endptr, int base);
@@ -61,10 +62,12 @@
 bool mem_is_zero(const char *mem, uptr size);
 
 // I/O
-const fd_t kInvalidFd = (fd_t)-1;
-const fd_t kStdinFd = 0;
-const fd_t kStdoutFd = (fd_t)1;
-const fd_t kStderrFd = (fd_t)2;
+// Define these as macros so we can use them in linker initialized global
+// structs without dynamic initialization.
+#define kInvalidFd ((fd_t)-1)
+#define kStdinFd ((fd_t)0)
+#define kStdoutFd ((fd_t)1)
+#define kStderrFd ((fd_t)2)
 
 uptr internal_ftruncate(fd_t fd, uptr size);
 
diff --git a/lib/sanitizer_common/sanitizer_linux.cc b/lib/sanitizer_common/sanitizer_linux.cc
index 6a62e33..c2fa4c0 100644
--- a/lib/sanitizer_common/sanitizer_linux.cc
+++ b/lib/sanitizer_common/sanitizer_linux.cc
@@ -60,7 +60,10 @@
 #include <unistd.h>
 
 #if SANITIZER_FREEBSD
+#include <sys/exec.h>
 #include <sys/sysctl.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
 #include <machine/atomic.h>
 extern "C" {
 // <sys/umtx.h> must be included after <errno.h> and <sys/types.h> on
@@ -96,6 +99,12 @@
 # define SANITIZER_LINUX_USES_64BIT_SYSCALLS 0
 #endif
 
+#if defined(__x86_64__)
+extern "C" {
+extern void internal_sigreturn();
+}
+#endif
+
 namespace __sanitizer {
 
 #if SANITIZER_LINUX && defined(__x86_64__)
@@ -107,6 +116,7 @@
 #endif
 
 // --------------- sanitizer_libc.h
+#if !SANITIZER_S390
 uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd,
                    OFF_T offset) {
 #if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
@@ -119,6 +129,7 @@
                           offset / 4096);
 #endif
 }
+#endif // !SANITIZER_S390
 
 uptr internal_munmap(void *addr, uptr length) {
   return internal_syscall(SYSCALL(munmap), (uptr)addr, length);
@@ -241,7 +252,15 @@
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path,
                          (uptr)buf, AT_SYMLINK_NOFOLLOW);
 #elif SANITIZER_LINUX_USES_64BIT_SYSCALLS
+# if SANITIZER_MIPS64
+  // For mips64, lstat syscall fills buffer in the format of kernel_stat
+  struct kernel_stat kbuf;
+  int res = internal_syscall(SYSCALL(lstat), path, &kbuf);
+  kernel_stat_to_stat(&kbuf, (struct stat *)buf);
+  return res;
+# else
   return internal_syscall(SYSCALL(lstat), (uptr)path, (uptr)buf);
+# endif
 #else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(lstat64), path, &buf64);
@@ -252,7 +271,15 @@
 
 uptr internal_fstat(fd_t fd, void *buf) {
 #if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
+# if SANITIZER_MIPS64
+  // For mips64, fstat syscall fills buffer in the format of kernel_stat
+  struct kernel_stat kbuf;
+  int res = internal_syscall(SYSCALL(fstat), fd, &kbuf);
+  kernel_stat_to_stat(&kbuf, (struct stat *)buf);
+  return res;
+# else
   return internal_syscall(SYSCALL(fstat), fd, (uptr)buf);
+# endif
 #else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(fstat64), fd, &buf64);
@@ -404,11 +431,13 @@
 #endif
 }
 
+#if !SANITIZER_FREEBSD
 extern "C" {
   SANITIZER_WEAK_ATTRIBUTE extern void *__libc_stack_end;
 }
+#endif
 
-#if !SANITIZER_GO
+#if !SANITIZER_GO && !SANITIZER_FREEBSD
 static void ReadNullSepFileToArray(const char *path, char ***arr,
                                    int arr_size) {
   char *buff;
@@ -434,6 +463,7 @@
 #endif
 
 static void GetArgsAndEnv(char ***argv, char ***envp) {
+#if !SANITIZER_FREEBSD
 #if !SANITIZER_GO
   if (&__libc_stack_end) {
 #endif
@@ -448,6 +478,19 @@
     ReadNullSepFileToArray("/proc/self/environ", envp, kMaxEnvp);
   }
 #endif
+#else
+  // On FreeBSD, retrieving the argument and environment arrays is done via the
+  // kern.ps_strings sysctl, which returns a pointer to a structure containing
+  // this information. See also <sys/exec.h>.
+  ps_strings *pss;
+  size_t sz = sizeof(pss);
+  if (sysctlbyname("kern.ps_strings", &pss, &sz, NULL, 0) == -1) {
+    Printf("sysctl kern.ps_strings failed\n");
+    Die();
+  }
+  *argv = pss->ps_argvstr;
+  *envp = pss->ps_envstr;
+#endif
 }
 
 char **GetArgv() {
@@ -579,7 +622,8 @@
 
 #if SANITIZER_LINUX
 #define SA_RESTORER 0x04000000
-// Doesn't set sa_restorer, use with caution (see below).
+// Doesn't set sa_restorer if the caller did not set it, so use with caution
+//(see below).
 int internal_sigaction_norestorer(int signum, const void *act, void *oldact) {
   __sanitizer_kernel_sigaction_t k_act, k_oldact;
   internal_memset(&k_act, 0, sizeof(__sanitizer_kernel_sigaction_t));
@@ -601,7 +645,9 @@
     // rt_sigaction, so we need to do the same (we'll need to reimplement the
     // restorers; for x86_64 the restorer address can be obtained from
     // oldact->sa_restorer upon a call to sigaction(xxx, NULL, oldact).
+#if !SANITIZER_ANDROID || !SANITIZER_MIPS32
     k_act.sa_restorer = u_act->sa_restorer;
+#endif
   }
 
   uptr result = internal_syscall(SYSCALL(rt_sigaction), (uptr)signum,
@@ -615,10 +661,31 @@
     internal_memcpy(&u_oldact->sa_mask, &k_oldact.sa_mask,
                     sizeof(__sanitizer_kernel_sigset_t));
     u_oldact->sa_flags = k_oldact.sa_flags;
+#if !SANITIZER_ANDROID || !SANITIZER_MIPS32
     u_oldact->sa_restorer = k_oldact.sa_restorer;
+#endif
   }
   return result;
 }
+
+// Invokes sigaction via a raw syscall with a restorer, but does not support
+// all platforms yet.
+// We disable for Go simply because we have not yet added to buildgo.sh.
+#if defined(__x86_64__) && !SANITIZER_GO
+int internal_sigaction_syscall(int signum, const void *act, void *oldact) {
+  if (act == nullptr)
+    return internal_sigaction_norestorer(signum, act, oldact);
+  __sanitizer_sigaction u_adjust;
+  internal_memcpy(&u_adjust, act, sizeof(u_adjust));
+#if !SANITIZER_ANDROID || !SANITIZER_MIPS32
+    if (u_adjust.sa_restorer == nullptr) {
+      u_adjust.sa_restorer = internal_sigreturn;
+    }
+#endif
+    return internal_sigaction_norestorer(signum, (const void *)&u_adjust,
+                                         oldact);
+}
+#endif // defined(__x86_64__) && !SANITIZER_GO
 #endif  // SANITIZER_LINUX
 
 uptr internal_sigprocmask(int how, __sanitizer_sigset_t *set,
@@ -638,6 +705,10 @@
   internal_memset(set, 0xff, sizeof(*set));
 }
 
+void internal_sigemptyset(__sanitizer_sigset_t *set) {
+  internal_memset(set, 0, sizeof(*set));
+}
+
 #if SANITIZER_LINUX
 void internal_sigdelset(__sanitizer_sigset_t *set, int signum) {
   signum -= 1;
@@ -648,6 +719,16 @@
   const uptr bit = signum % (sizeof(k_set->sig[0]) * 8);
   k_set->sig[idx] &= ~(1 << bit);
 }
+
+bool internal_sigismember(__sanitizer_sigset_t *set, int signum) {
+  signum -= 1;
+  CHECK_GE(signum, 0);
+  CHECK_LT(signum, sizeof(*set) * 8);
+  __sanitizer_kernel_sigset_t *k_set = (__sanitizer_kernel_sigset_t *)set;
+  const uptr idx = signum / (sizeof(k_set->sig[0]) * 8);
+  const uptr bit = signum % (sizeof(k_set->sig[0]) * 8);
+  return k_set->sig[idx] & (1 << bit);
+}
 #endif  // SANITIZER_LINUX
 
 // ThreadLister implementation.
@@ -719,7 +800,10 @@
 }
 
 uptr GetPageSize() {
-#if SANITIZER_LINUX && (defined(__x86_64__) || defined(__i386__))
+// Android post-M sysconf(_SC_PAGESIZE) crashes if called from .preinit_array.
+#if SANITIZER_ANDROID
+  return 4096;
+#elif SANITIZER_LINUX && (defined(__x86_64__) || defined(__i386__))
   return EXEC_PAGESIZE;
 #else
   return sysconf(_SC_PAGESIZE);  // EXEC_PAGESIZE may not be trustworthy.
@@ -926,8 +1010,18 @@
                        "bnez $2,1f;\n"
 
                        /* Call "fn(arg)". */
+#if SANITIZER_WORDSIZE == 32
+#ifdef __BIG_ENDIAN__
+                       "lw $25,4($29);\n"
+                       "lw $4,12($29);\n"
+#else
+                       "lw $25,0($29);\n"
+                       "lw $4,8($29);\n"
+#endif
+#else
                        "ld $25,0($29);\n"
                        "ld $4,8($29);\n"
+#endif
                        "jal $25;\n"
 
                        /* Call _exit($v0). */
@@ -1126,7 +1220,7 @@
 
 #endif
 
-bool IsDeadlySignal(int signum) {
+bool IsHandledDeadlySignal(int signum) {
   if (common_flags()->handle_abort && signum == SIGABRT)
     return true;
   if (common_flags()->handle_sigill && signum == SIGILL)
@@ -1162,6 +1256,54 @@
 void internal_join_thread(void *th) {}
 #endif
 
+#if defined(__aarch64__)
+// Android headers in the older NDK releases miss this definition.
+struct __sanitizer_esr_context {
+  struct _aarch64_ctx head;
+  uint64_t esr;
+};
+
+static bool Aarch64GetESR(ucontext_t *ucontext, u64 *esr) {
+  static const u32 kEsrMagic = 0x45535201;
+  u8 *aux = ucontext->uc_mcontext.__reserved;
+  while (true) {
+    _aarch64_ctx *ctx = (_aarch64_ctx *)aux;
+    if (ctx->size == 0) break;
+    if (ctx->magic == kEsrMagic) {
+      *esr = ((__sanitizer_esr_context *)ctx)->esr;
+      return true;
+    }
+    aux += ctx->size;
+  }
+  return false;
+}
+#endif
+
+SignalContext::WriteFlag SignalContext::GetWriteFlag(void *context) {
+  ucontext_t *ucontext = (ucontext_t *)context;
+#if defined(__x86_64__) || defined(__i386__)
+  static const uptr PF_WRITE = 1U << 1;
+#if SANITIZER_FREEBSD
+  uptr err = ucontext->uc_mcontext.mc_err;
+#else
+  uptr err = ucontext->uc_mcontext.gregs[REG_ERR];
+#endif
+  return err & PF_WRITE ? WRITE : READ;
+#elif defined(__arm__)
+  static const uptr FSR_WRITE = 1U << 11;
+  uptr fsr = ucontext->uc_mcontext.error_code;
+  return fsr & FSR_WRITE ? WRITE : READ;
+#elif defined(__aarch64__)
+  static const u64 ESR_ELx_WNR = 1U << 6;
+  u64 esr;
+  if (!Aarch64GetESR(ucontext, &esr)) return UNKNOWN;
+  return esr & ESR_ELx_WNR ? WRITE : READ;
+#else
+  (void)ucontext;
+  return UNKNOWN;  // FIXME: Implement.
+#endif
+}
+
 void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
 #if defined(__arm__)
   ucontext_t *ucontext = (ucontext_t*)context;
@@ -1229,6 +1371,15 @@
   *pc = ucontext->uc_mcontext.pc;
   *bp = ucontext->uc_mcontext.gregs[30];
   *sp = ucontext->uc_mcontext.gregs[29];
+#elif defined(__s390__)
+  ucontext_t *ucontext = (ucontext_t*)context;
+# if defined(__s390x__)
+  *pc = ucontext->uc_mcontext.psw.addr;
+# else
+  *pc = ucontext->uc_mcontext.psw.addr & 0x7fffffff;
+# endif
+  *bp = ucontext->uc_mcontext.gregs[11];
+  *sp = ucontext->uc_mcontext.gregs[15];
 #else
 # error "Unsupported arch"
 #endif
diff --git a/lib/sanitizer_common/sanitizer_linux.h b/lib/sanitizer_common/sanitizer_linux.h
index a61198f..526fa44 100644
--- a/lib/sanitizer_common/sanitizer_linux.h
+++ b/lib/sanitizer_common/sanitizer_linux.h
@@ -42,9 +42,13 @@
 // (like the process-wide error reporting SEGV handler) must use
 // internal_sigaction instead.
 int internal_sigaction_norestorer(int signum, const void *act, void *oldact);
+#if defined(__x86_64__) && !SANITIZER_GO
+// Uses a raw system call to avoid interceptors.
+int internal_sigaction_syscall(int signum, const void *act, void *oldact);
+#endif
 void internal_sigdelset(__sanitizer_sigset_t *set, int signum);
 #if defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) \
-  || defined(__powerpc64__)
+  || defined(__powerpc64__) || defined(__s390__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr);
 #endif
diff --git a/lib/sanitizer_common/sanitizer_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_linux_libcdep.cc
index 8cf2c73..a37bdf1 100644
--- a/lib/sanitizer_common/sanitizer_linux_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_linux_libcdep.cc
@@ -158,7 +158,6 @@
 
 #if !SANITIZER_FREEBSD && !SANITIZER_ANDROID && !SANITIZER_GO
 static uptr g_tls_size;
-#endif
 
 #ifdef __i386__
 # define DL_INTERNAL_FUNCTION __attribute__((regparm(3), stdcall))
@@ -166,26 +165,7 @@
 # define DL_INTERNAL_FUNCTION
 #endif
 
-#if defined(__mips__) || defined(__powerpc64__)
-// TlsPreTcbSize includes size of struct pthread_descr and size of tcb
-// head structure. It lies before the static tls blocks.
-static uptr TlsPreTcbSize() {
-# if defined(__mips__)
-  const uptr kTcbHead = 16; // sizeof (tcbhead_t)
-# elif defined(__powerpc64__)
-  const uptr kTcbHead = 88; // sizeof (tcbhead_t)
-# endif
-  const uptr kTlsAlign = 16;
-  const uptr kTlsPreTcbSize =
-    (ThreadDescriptorSize() + kTcbHead + kTlsAlign - 1) & ~(kTlsAlign - 1);
-  InitTlsSize();
-  g_tls_size = (g_tls_size + kTlsPreTcbSize + kTlsAlign -1) & ~(kTlsAlign - 1);
-  return kTlsPreTcbSize;
-}
-#endif
-
 void InitTlsSize() {
-#if !SANITIZER_FREEBSD && !SANITIZER_ANDROID && !SANITIZER_GO
 // all current supported platforms have 16 bytes stack alignment
   const size_t kStackAlign = 16;
   typedef void (*get_tls_func)(size_t*, size_t*) DL_INTERNAL_FUNCTION;
@@ -201,11 +181,13 @@
   if (tls_align < kStackAlign)
     tls_align = kStackAlign;
   g_tls_size = RoundUpTo(tls_size, tls_align);
-#endif  // !SANITIZER_FREEBSD && !SANITIZER_ANDROID && !SANITIZER_GO
 }
+#else
+void InitTlsSize() { }
+#endif  // !SANITIZER_FREEBSD && !SANITIZER_ANDROID && !SANITIZER_GO
 
 #if (defined(__x86_64__) || defined(__i386__) || defined(__mips__) \
-    || defined(__aarch64__) || defined(__powerpc64__)) \
+    || defined(__aarch64__) || defined(__powerpc64__) || defined(__s390__)) \
     && SANITIZER_LINUX && !SANITIZER_ANDROID
 // sizeof(struct pthread) from glibc.
 static atomic_uintptr_t kThreadDescriptorSize;
@@ -222,6 +204,11 @@
     char *end;
     int minor = internal_simple_strtoll(buf + 8, &end, 10);
     if (end != buf + 8 && (*end == '\0' || *end == '.')) {
+      int patch = 0;
+      if (*end == '.')
+        // strtoll will return 0 if no valid conversion could be performed
+        patch = internal_simple_strtoll(end + 1, nullptr, 10);
+
       /* sizeof(struct pthread) values from various glibc versions.  */
       if (SANITIZER_X32)
         val = 1728;  // Assume only one particular version for x32.
@@ -235,9 +222,9 @@
         val = FIRST_32_SECOND_64(1136, 1712);
       else if (minor == 10)
         val = FIRST_32_SECOND_64(1168, 1776);
-      else if (minor <= 12)
+      else if (minor == 11 || (minor == 12 && patch == 1))
         val = FIRST_32_SECOND_64(1168, 2288);
-      else if (minor == 13)
+      else if (minor <= 13)
         val = FIRST_32_SECOND_64(1168, 2304);
       else
         val = FIRST_32_SECOND_64(1216, 2304);
@@ -262,6 +249,9 @@
   val = 1776; // from glibc.ppc64le 2.20-8.fc21
   atomic_store(&kThreadDescriptorSize, val, memory_order_relaxed);
   return val;
+#elif defined(__s390__)
+  val = FIRST_32_SECOND_64(1152, 1776); // valid for glibc 2.22
+  atomic_store(&kThreadDescriptorSize, val, memory_order_relaxed);
 #endif
   return 0;
 }
@@ -273,6 +263,24 @@
   return kThreadSelfOffset;
 }
 
+#if defined(__mips__) || defined(__powerpc64__)
+// TlsPreTcbSize includes size of struct pthread_descr and size of tcb
+// head structure. It lies before the static tls blocks.
+static uptr TlsPreTcbSize() {
+# if defined(__mips__)
+  const uptr kTcbHead = 16; // sizeof (tcbhead_t)
+# elif defined(__powerpc64__)
+  const uptr kTcbHead = 88; // sizeof (tcbhead_t)
+# endif
+  const uptr kTlsAlign = 16;
+  const uptr kTlsPreTcbSize =
+    (ThreadDescriptorSize() + kTcbHead + kTlsAlign - 1) & ~(kTlsAlign - 1);
+  InitTlsSize();
+  g_tls_size = (g_tls_size + kTlsPreTcbSize + kTlsAlign -1) & ~(kTlsAlign - 1);
+  return kTlsPreTcbSize;
+}
+#endif
+
 uptr ThreadSelf() {
   uptr descr_addr;
 # if defined(__i386__)
@@ -291,7 +299,7 @@
                 rdhwr %0,$29;\
                 .set pop" : "=r" (thread_pointer));
   descr_addr = thread_pointer - kTlsTcbOffset - TlsPreTcbSize();
-# elif defined(__aarch64__)
+# elif defined(__aarch64__) || defined(__s390__)
   descr_addr = reinterpret_cast<uptr>(__builtin_thread_pointer());
 # elif defined(__powerpc64__)
   // PPC64LE uses TLS variant I. The thread pointer (in GPR 13)
@@ -332,7 +340,7 @@
 #if !SANITIZER_GO
 static void GetTls(uptr *addr, uptr *size) {
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
-# if defined(__x86_64__) || defined(__i386__)
+# if defined(__x86_64__) || defined(__i386__) || defined(__s390__)
   *addr = ThreadSelf();
   *size = GetTlsSize();
   *addr -= *size;
@@ -412,17 +420,12 @@
 # endif
 
 struct DlIteratePhdrData {
-  LoadedModule *modules;
-  uptr current_n;
+  InternalMmapVector<LoadedModule> *modules;
   bool first;
-  uptr max_n;
-  string_predicate_t filter;
 };
 
 static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
   DlIteratePhdrData *data = (DlIteratePhdrData*)arg;
-  if (data->current_n == data->max_n)
-    return 0;
   InternalScopedString module_name(kMaxPathLength);
   if (data->first) {
     data->first = false;
@@ -433,20 +436,18 @@
   }
   if (module_name[0] == '\0')
     return 0;
-  if (data->filter && !data->filter(module_name.data()))
-    return 0;
-  LoadedModule *cur_module = &data->modules[data->current_n];
-  cur_module->set(module_name.data(), info->dlpi_addr);
-  data->current_n++;
+  LoadedModule cur_module;
+  cur_module.set(module_name.data(), info->dlpi_addr);
   for (int i = 0; i < info->dlpi_phnum; i++) {
     const Elf_Phdr *phdr = &info->dlpi_phdr[i];
     if (phdr->p_type == PT_LOAD) {
       uptr cur_beg = info->dlpi_addr + phdr->p_vaddr;
       uptr cur_end = cur_beg + phdr->p_memsz;
       bool executable = phdr->p_flags & PF_X;
-      cur_module->addAddressRange(cur_beg, cur_end, executable);
+      cur_module.addAddressRange(cur_beg, cur_end, executable);
     }
   }
+  data->modules->push_back(cur_module);
   return 0;
 }
 
@@ -455,8 +456,8 @@
     int (*)(struct dl_phdr_info *, size_t, void *), void *);
 #endif
 
-uptr GetListOfModules(LoadedModule *modules, uptr max_modules,
-                      string_predicate_t filter) {
+void ListOfModules::init() {
+  clear();
 #if SANITIZER_ANDROID && __ANDROID_API__ <= 22
   u32 api_level = AndroidGetApiLevel();
   // Fall back to /proc/maps if dl_iterate_phdr is unavailable or broken.
@@ -464,13 +465,12 @@
   // both K and L (and future) Android releases.
   if (api_level <= ANDROID_LOLLIPOP_MR1) { // L or earlier
     MemoryMappingLayout memory_mapping(false);
-    return memory_mapping.DumpListOfModules(modules, max_modules, filter);
+    memory_mapping.DumpListOfModules(&modules_);
+    return;
   }
 #endif
-  CHECK(modules);
-  DlIteratePhdrData data = {modules, 0, true, max_modules, filter};
+  DlIteratePhdrData data = {&modules_, true};
   dl_iterate_phdr(dl_iterate_phdr_cb, &data);
-  return data.current_n;
 }
 
 // getrusage does not give us the current RSS, only the max RSS.
diff --git a/lib/sanitizer_common/sanitizer_linux_s390.cc b/lib/sanitizer_common/sanitizer_linux_s390.cc
new file mode 100644
index 0000000..053fd17
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_linux_s390.cc
@@ -0,0 +1,191 @@
+//===-- sanitizer_linux_s390.cc -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between AddressSanitizer and ThreadSanitizer
+// run-time libraries and implements s390-linux-specific functions from
+// sanitizer_libc.h.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_platform.h"
+
+#if SANITIZER_LINUX && SANITIZER_S390
+
+#include "sanitizer_libc.h"
+#include "sanitizer_linux.h"
+
+#include <errno.h>
+#include <sys/syscall.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+namespace __sanitizer {
+
+// --------------- sanitizer_libc.h
+uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd,
+                   OFF_T offset) {
+  struct s390_mmap_params {
+    unsigned long addr;
+    unsigned long length;
+    unsigned long prot;
+    unsigned long flags;
+    unsigned long fd;
+    unsigned long offset;
+  } params = {
+    (unsigned long)addr,
+    (unsigned long)length,
+    (unsigned long)prot,
+    (unsigned long)flags,
+    (unsigned long)fd,
+# ifdef __s390x__
+    (unsigned long)offset,
+# else
+    (unsigned long)(offset / 4096),
+# endif
+  };
+# ifdef __s390x__
+  return syscall(__NR_mmap, &params);
+# else
+  return syscall(__NR_mmap2, &params);
+# endif
+}
+
+uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
+                    int *parent_tidptr, void *newtls, int *child_tidptr) {
+  if (!fn || !child_stack)
+    return -EINVAL;
+  CHECK_EQ(0, (uptr)child_stack % 16);
+  // Minimum frame size.
+#ifdef __s390x__
+  child_stack = (char *)child_stack - 160;
+#else
+  child_stack = (char *)child_stack - 96;
+#endif
+  // Terminate unwind chain.
+  ((unsigned long *)child_stack)[0] = 0;
+  // And pass parameters.
+  ((unsigned long *)child_stack)[1] = (uptr)fn;
+  ((unsigned long *)child_stack)[2] = (uptr)arg;
+  register long res __asm__("r2");
+  register void *__cstack      __asm__("r2") = child_stack;
+  register int __flags         __asm__("r3") = flags;
+  register int * __ptidptr     __asm__("r4") = parent_tidptr;
+  register int * __ctidptr     __asm__("r5") = child_tidptr;
+  register void * __newtls     __asm__("r6") = newtls;
+
+  __asm__ __volatile__(
+                       /* Clone. */
+                       "svc    %1\n"
+
+                       /* if (%r2 != 0)
+                        *   return;
+                        */
+#ifdef __s390x__
+                       "cghi   %%r2, 0\n"
+#else
+                       "chi    %%r2, 0\n"
+#endif
+                       "jne    1f\n"
+
+                       /* Call "fn(arg)". */
+#ifdef __s390x__
+                       "lmg    %%r1, %%r2, 8(%%r15)\n"
+#else
+                       "lm     %%r1, %%r2, 4(%%r15)\n"
+#endif
+                       "basr   %%r14, %%r1\n"
+
+                       /* Call _exit(%r2). */
+                       "svc %2\n"
+
+                       /* Return to parent. */
+                     "1:\n"
+                       : "=r" (res)
+                       : "i"(__NR_clone), "i"(__NR_exit),
+                         "r"(__cstack),
+                         "r"(__flags),
+                         "r"(__ptidptr),
+                         "r"(__ctidptr),
+                         "r"(__newtls)
+                       : "memory", "cc");
+  return res;
+}
+
+#if SANITIZER_S390_64
+static bool FixedCVE_2016_2143() {
+  // Try to determine if the running kernel has a fix for CVE-2016-2143,
+  // return false if in doubt (better safe than sorry).  Distros may want to
+  // adjust this for their own kernels.
+  struct utsname buf;
+  unsigned int major, minor, patch = 0;
+  // This should never fail, but just in case...
+  if (uname(&buf))
+    return false;
+  char *ptr = buf.release;
+  major = internal_simple_strtoll(ptr, &ptr, 10);
+  // At least first 2 should be matched.
+  if (ptr[0] != '.')
+    return false;
+  minor = internal_simple_strtoll(ptr+1, &ptr, 10);
+  // Third is optional.
+  if (ptr[0] == '.')
+    patch = internal_simple_strtoll(ptr+1, &ptr, 10);
+  if (major < 3) {
+    // <3.0 is bad.
+    return false;
+  } else if (major == 3) {
+    // 3.2.79+ is OK.
+    if (minor == 2 && patch >= 79)
+      return true;
+    // 3.12.58+ is OK.
+    if (minor == 12 && patch >= 58)
+      return true;
+    // Otherwise, bad.
+    return false;
+  } else if (major == 4) {
+    // 4.1.21+ is OK.
+    if (minor == 1 && patch >= 21)
+      return true;
+    // 4.4.6+ is OK.
+    if (minor == 4 && patch >= 6)
+      return true;
+    // Otherwise, OK if 4.5+.
+    return minor >= 5;
+  } else {
+    // Linux 5 and up are fine.
+    return true;
+  }
+}
+
+void AvoidCVE_2016_2143() {
+  // Older kernels are affected by CVE-2016-2143 - they will crash hard
+  // if someone uses 4-level page tables (ie. virtual addresses >= 4TB)
+  // and fork() in the same process.  Unfortunately, sanitizers tend to
+  // require such addresses.  Since this is very likely to crash the whole
+  // machine (sanitizers themselves use fork() for llvm-symbolizer, for one),
+  // abort the process at initialization instead.
+  if (FixedCVE_2016_2143())
+    return;
+  if (GetEnv("SANITIZER_IGNORE_CVE_2016_2143"))
+    return;
+  Report(
+    "ERROR: Your kernel seems to be vulnerable to CVE-2016-2143.  Using ASan,\n"
+    "MSan, TSan, DFSan or LSan with such kernel can and will crash your\n"
+    "machine, or worse.\n"
+    "\n"
+    "If you are certain your kernel is not vulnerable (you have compiled it\n"
+    "yourself, or are using an unrecognized distribution kernel), you can\n"
+    "override this safety check by exporting SANITIZER_IGNORE_CVE_2016_2143\n"
+    "with any value.\n");
+  Die();
+}
+#endif
+
+} // namespace __sanitizer
+
+#endif // SANITIZER_LINUX && SANITIZER_S390
diff --git a/lib/sanitizer_common/sanitizer_linux_x86_64.S b/lib/sanitizer_common/sanitizer_linux_x86_64.S
new file mode 100644
index 0000000..8ff9095
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_linux_x86_64.S
@@ -0,0 +1,25 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+// Avoid being marked as needing an executable stack:
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+// Further contents are x86_64-only:
+#if defined(__linux__) && defined(__x86_64__)
+
+#include "../builtins/assembly.h"
+
+// If the "naked" function attribute were supported for x86 we could
+// do this via inline asm.
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(internal_sigreturn)
+        mov           $0xf,             %eax    // 0xf == SYS_rt_sigreturn
+        mov           %rcx,             %r10
+        syscall
+        ret                                     // Won't normally reach here.
+END_COMPILERRT_FUNCTION(internal_sigreturn)
+
+#endif // defined(__linux__) && defined(__x86_64__)
diff --git a/lib/sanitizer_common/sanitizer_mac.cc b/lib/sanitizer_common/sanitizer_mac.cc
index 23e5af9..41a2068 100644
--- a/lib/sanitizer_common/sanitizer_mac.cc
+++ b/lib/sanitizer_common/sanitizer_mac.cc
@@ -72,6 +72,12 @@
 #include <unistd.h>
 #include <util.h>
 
+// from <crt_externs.h>, but we don't have that file on iOS
+extern "C" {
+  extern char ***_NSGetArgv(void);
+  extern char ***_NSGetEnviron(void);
+}
+
 namespace __sanitizer {
 
 #include "sanitizer_syscall_generic.inc"
@@ -371,13 +377,16 @@
 #endif
 }
 
-uptr GetListOfModules(LoadedModule *modules, uptr max_modules,
-                      string_predicate_t filter) {
+void ListOfModules::init() {
+  clear();
   MemoryMappingLayout memory_mapping(false);
-  return memory_mapping.DumpListOfModules(modules, max_modules, filter);
+  memory_mapping.DumpListOfModules(&modules_);
 }
 
-bool IsDeadlySignal(int signum) {
+bool IsHandledDeadlySignal(int signum) {
+  if ((SANITIZER_WATCHOS || SANITIZER_TVOS) && !(SANITIZER_IOSSIM))
+    // Handling fatal signals on watchOS and tvOS devices is disallowed.
+    return false;
   return (signum == SIGSEGV || signum == SIGBUS) && common_flags()->handle_segv;
 }
 
@@ -449,11 +458,15 @@
 
 void internal_join_thread(void *th) { pthread_join((pthread_t)th, 0); }
 
+#ifndef SANITIZER_GO
 static BlockingMutex syslog_lock(LINKER_INITIALIZED);
+#endif
 
 void WriteOneLineToSyslog(const char *s) {
+#ifndef SANITIZER_GO
   syslog_lock.CheckLocked();
   asl_log(nullptr, nullptr, ASL_LEVEL_ERR, "%s", s);
+#endif
 }
 
 void LogMessageOnPrintf(const char *str) {
@@ -463,6 +476,7 @@
 }
 
 void LogFullErrorReport(const char *buffer) {
+#ifndef SANITIZER_GO
   // Log with os_trace. This will make it into the crash log.
 #if SANITIZER_OS_TRACE
   if (GetMacosVersion() >= MACOS_VERSION_YOSEMITE) {
@@ -496,6 +510,16 @@
     WriteToSyslog(buffer);
 
   // The report is added to CrashLog as part of logging all of Printf output.
+#endif
+}
+
+SignalContext::WriteFlag SignalContext::GetWriteFlag(void *context) {
+#if defined(__x86_64__) || defined(__i386__)
+  ucontext_t *ucontext = static_cast<ucontext_t*>(context);
+  return ucontext->uc_mcontext->__es.__err & 2 /*T_PF_WRITE*/ ? WRITE : READ;
+#else
+  return UNKNOWN;
+#endif
 }
 
 void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
@@ -525,6 +549,7 @@
 # endif
 }
 
+#ifndef SANITIZER_GO
 static const char kDyldInsertLibraries[] = "DYLD_INSERT_LIBRARIES";
 LowLevelAllocator allocator_for_env;
 
@@ -562,10 +587,14 @@
   return false;
 }
 
-extern "C" double dyldVersionNumber;
+extern "C" SANITIZER_WEAK_ATTRIBUTE double dyldVersionNumber;
 static const double kMinDyldVersionWithAutoInterposition = 360.0;
 
 bool DyldNeedsEnvVariable() {
+  // Although sanitizer support was added to LLVM on OS X 10.7+, GCC users
+  // still may want use them on older systems. On older Darwin platforms, dyld
+  // doesn't export dyldVersionNumber symbol and we simply return true.
+  if (!&dyldVersionNumber) return true;
   // If running on OS X 10.11+ or iOS 9.0+, dyld will interpose even if
   // DYLD_INSERT_LIBRARIES is not set. However, checking OS version via
   // GetMacosVersion() doesn't work for the simulator. Let's instead check
@@ -581,7 +610,7 @@
   // wrappers work. If it is not, set DYLD_INSERT_LIBRARIES and re-exec
   // ourselves.
   Dl_info info;
-  CHECK(dladdr((void*)((uptr)&__sanitizer_report_error_summary), &info));
+  RAW_CHECK(dladdr((void*)((uptr)&__sanitizer_report_error_summary), &info));
   char *dyld_insert_libraries =
       const_cast<char*>(GetEnv(kDyldInsertLibraries));
   uptr old_env_len = dyld_insert_libraries ?
@@ -626,7 +655,7 @@
            "environment variable and re-execute itself, but execv() failed, "
            "possibly because of sandbox restrictions. Make sure to launch the "
            "executable with:\n%s=%s\n", kDyldInsertLibraries, new_env);
-    CHECK("execv failed" && 0);
+    RAW_CHECK("execv failed" && 0);
   }
 
   // Verify that interceptors really work.  We'll use dlsym to locate
@@ -634,14 +663,14 @@
   // "wrap_pthread_create" within our own dylib.
   Dl_info info_pthread_create;
   void *dlopen_addr = dlsym(RTLD_DEFAULT, "pthread_create");
-  CHECK(dladdr(dlopen_addr, &info_pthread_create));
+  RAW_CHECK(dladdr(dlopen_addr, &info_pthread_create));
   if (internal_strcmp(info.dli_fname, info_pthread_create.dli_fname) != 0) {
     Report(
         "ERROR: Interceptors are not working. This may be because %s is "
         "loaded too late (e.g. via dlopen). Please launch the executable "
         "with:\n%s=%s\n",
         SanitizerToolName, kDyldInsertLibraries, info.dli_fname);
-    CHECK("interceptors not installed" && 0);
+    RAW_CHECK("interceptors not installed" && 0);
   }
 
   if (!lib_is_in_env)
@@ -656,7 +685,7 @@
   // sign and the '\0' char.
   char *new_env = (char*)allocator_for_env.Allocate(
       old_env_len + 2 + env_name_len);
-  CHECK(new_env);
+  RAW_CHECK(new_env);
   internal_memset(new_env, '\0', old_env_len + 2 + env_name_len);
   internal_strncpy(new_env, kDyldInsertLibraries, env_name_len);
   new_env[env_name_len] = '=';
@@ -705,11 +734,15 @@
   if (new_env_pos == new_env + env_name_len + 1) new_env = NULL;
   LeakyResetEnv(kDyldInsertLibraries, new_env);
 }
+#endif  // SANITIZER_GO
 
 char **GetArgv() {
   return *_NSGetArgv();
 }
 
+// FIXME implement on this platform.
+void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) { }
+
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_MAC
diff --git a/lib/sanitizer_common/sanitizer_platform.h b/lib/sanitizer_common/sanitizer_platform.h
index 841cceb..0ce2307 100644
--- a/lib/sanitizer_common/sanitizer_platform.h
+++ b/lib/sanitizer_common/sanitizer_platform.h
@@ -49,12 +49,30 @@
 # define SANITIZER_IOSSIM  0
 #endif
 
+#if defined(__APPLE__) && TARGET_OS_IPHONE && TARGET_OS_WATCH
+# define SANITIZER_WATCHOS 1
+#else
+# define SANITIZER_WATCHOS 0
+#endif
+
+#if defined(__APPLE__) && TARGET_OS_IPHONE && TARGET_OS_TV
+# define SANITIZER_TVOS 1
+#else
+# define SANITIZER_TVOS 0
+#endif
+
 #if defined(_WIN32)
 # define SANITIZER_WINDOWS 1
 #else
 # define SANITIZER_WINDOWS 0
 #endif
 
+#if defined(_WIN64)
+# define SANITIZER_WINDOWS64 1
+#else
+# define SANITIZER_WINDOWS64 0
+#endif
+
 #if defined(__ANDROID__)
 # define SANITIZER_ANDROID 1
 #else
@@ -81,6 +99,69 @@
 # define SANITIZER_X32 0
 #endif
 
+#if defined(__mips__)
+# define SANITIZER_MIPS 1
+# if defined(__mips64)
+#  define SANITIZER_MIPS32 0
+#  define SANITIZER_MIPS64 1
+# else
+#  define SANITIZER_MIPS32 1
+#  define SANITIZER_MIPS64 0
+# endif
+#else
+# define SANITIZER_MIPS 0
+# define SANITIZER_MIPS32 0
+# define SANITIZER_MIPS64 0
+#endif
+
+#if defined(__s390__)
+# define SANITIZER_S390 1
+# if defined(__s390x__)
+#  define SANITIZER_S390_31 0
+#  define SANITIZER_S390_64 1
+# else
+#  define SANITIZER_S390_31 1
+#  define SANITIZER_S390_64 0
+# endif
+#else
+# define SANITIZER_S390 0
+# define SANITIZER_S390_31 0
+# define SANITIZER_S390_64 0
+#endif
+
+#if defined(__powerpc__)
+# define SANITIZER_PPC 1
+# if defined(__powerpc64__)
+#  define SANITIZER_PPC32 0
+#  define SANITIZER_PPC64 1
+// 64-bit PPC has two ABIs (v1 and v2).  The old powerpc64 target is
+// big-endian, and uses v1 ABI (known for its function descriptors),
+// while the new powerpc64le target is little-endian and uses v2.
+// In theory, you could convince gcc to compile for their evil twins
+// (eg. big-endian v2), but you won't find such combinations in the wild
+// (it'd require bootstrapping a whole system, which would be quite painful
+// - there's no target triple for that).  LLVM doesn't support them either.
+#  if _CALL_ELF == 2
+#   define SANITIZER_PPC64V1 0
+#   define SANITIZER_PPC64V2 1
+#  else
+#   define SANITIZER_PPC64V1 1
+#   define SANITIZER_PPC64V2 0
+#  endif
+# else
+#  define SANITIZER_PPC32 1
+#  define SANITIZER_PPC64 0
+#  define SANITIZER_PPC64V1 0
+#  define SANITIZER_PPC64V2 0
+# endif
+#else
+# define SANITIZER_PPC 0
+# define SANITIZER_PPC32 0
+# define SANITIZER_PPC64 0
+# define SANITIZER_PPC64V1 0
+# define SANITIZER_PPC64V2 0
+#endif
+
 // By default we allow to use SizeClassAllocator64 on 64-bit platform.
 // But in some cases (e.g. AArch64's 39-bit address space) SizeClassAllocator64
 // does not work well and we need to fallback to SizeClassAllocator32.
@@ -99,6 +180,8 @@
 // will still work but will consume more memory for TwoLevelByteMap.
 #if defined(__mips__)
 # define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 40)
+#elif defined(__aarch64__)
+# define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 48)
 #else
 # define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 47)
 #endif
@@ -150,4 +233,18 @@
 # define MSC_PREREQ(version) 0
 #endif
 
+#if defined(__arm64__) && SANITIZER_IOS
+# define SANITIZER_NON_UNIQUE_TYPEINFO 1
+#else
+# define SANITIZER_NON_UNIQUE_TYPEINFO 0
+#endif
+
+// On linux, some architectures had an ABI transition from 64-bit long double
+// (ie. same as double) to 128-bit long double.  On those, glibc symbols
+// involving long doubles come in two versions, and we need to pass the
+// correct one to dlvsym when intercepting them.
+#if SANITIZER_LINUX && (SANITIZER_S390 || SANITIZER_PPC32 || SANITIZER_PPC64V1)
+#define SANITIZER_NLDBL_VERSION "GLIBC_2.4"
+#endif
+
 #endif // SANITIZER_PLATFORM_H
diff --git a/lib/sanitizer_common/sanitizer_platform_interceptors.h b/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 2b1cd8a..1e53dd1 100644
--- a/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -29,6 +29,12 @@
 # define SI_LINUX_NOT_ANDROID 0
 #endif
 
+#if SANITIZER_ANDROID
+# define SI_ANDROID 1
+#else
+# define SI_ANDROID 0
+#endif
+
 #if SANITIZER_FREEBSD
 # define SI_FREEBSD 1
 #else
@@ -43,8 +49,10 @@
 
 #if SANITIZER_MAC
 # define SI_MAC 1
+# define SI_NOT_MAC 0
 #else
 # define SI_MAC 0
+# define SI_NOT_MAC 1
 #endif
 
 #if SANITIZER_IOS
@@ -53,14 +61,30 @@
 # define SI_IOS 0
 #endif
 
+#if !SANITIZER_WINDOWS && !SANITIZER_MAC
+# define SI_UNIX_NOT_MAC 1
+#else
+# define SI_UNIX_NOT_MAC 0
+#endif
+
+#define SANITIZER_INTERCEPT_STRLEN 1
+#define SANITIZER_INTERCEPT_STRNLEN SI_NOT_MAC
 #define SANITIZER_INTERCEPT_STRCMP 1
 #define SANITIZER_INTERCEPT_STRSTR 1
 #define SANITIZER_INTERCEPT_STRCASESTR SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_STRCHR 1
+#define SANITIZER_INTERCEPT_STRCHRNUL SI_UNIX_NOT_MAC
+#define SANITIZER_INTERCEPT_STRRCHR 1
 #define SANITIZER_INTERCEPT_STRSPN 1
 #define SANITIZER_INTERCEPT_STRPBRK 1
 #define SANITIZER_INTERCEPT_TEXTDOMAIN SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_STRCASECMP SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_MEMSET 1
+#define SANITIZER_INTERCEPT_MEMMOVE 1
+#define SANITIZER_INTERCEPT_MEMCPY 1
 #define SANITIZER_INTERCEPT_MEMCMP 1
+// FIXME: enable memmem on Windows.
+#define SANITIZER_INTERCEPT_MEMMEM SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_MEMCHR 1
 #define SANITIZER_INTERCEPT_MEMRCHR SI_FREEBSD || SI_LINUX
 
@@ -125,6 +149,7 @@
 #define SANITIZER_INTERCEPT_ACCEPT4 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_MODF SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_RECVMSG SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_SENDMSG SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_GETPEERNAME SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_IOCTL SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_INET_ATON SI_NOT_WINDOWS
@@ -133,7 +158,8 @@
 #define SANITIZER_INTERCEPT_READDIR64 SI_LINUX_NOT_ANDROID
 #if SI_LINUX_NOT_ANDROID && \
   (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
-    defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__))
+    defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
+    defined(__s390__))
 #define SANITIZER_INTERCEPT_PTRACE 1
 #else
 #define SANITIZER_INTERCEPT_PTRACE 0
@@ -257,8 +283,12 @@
 #define SANITIZER_INTERCEPT_OBSTACK SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_FFLUSH SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_FCLOSE SI_NOT_WINDOWS
+
+#ifndef SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
 #define SANITIZER_INTERCEPT_DLOPEN_DLCLOSE \
     SI_FREEBSD || SI_LINUX_NOT_ANDROID || SI_MAC
+#endif
+
 #define SANITIZER_INTERCEPT_GETPASS SI_LINUX_NOT_ANDROID || SI_MAC
 #define SANITIZER_INTERCEPT_TIMERFD SI_LINUX_NOT_ANDROID
 
@@ -272,5 +302,13 @@
 #define SANITIZER_INTERCEPT_CTERMID_R SI_MAC || SI_FREEBSD
 
 #define SANITIZER_INTERCEPTOR_HOOKS SI_LINUX
+#define SANITIZER_INTERCEPT_RECV_RECVFROM SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_SEND_SENDTO SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_EVENTFD_READ_WRITE SI_LINUX
 
+#define SANITIZER_INTERCEPT_STAT (SI_FREEBSD || SI_MAC || SI_ANDROID)
+#define SANITIZER_INTERCEPT___XSTAT !SANITIZER_INTERCEPT_STAT && SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT___XSTAT64 SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT___LXSTAT SANITIZER_INTERCEPT___XSTAT
+#define SANITIZER_INTERCEPT___LXSTAT64 SI_LINUX_NOT_ANDROID
 #endif  // #ifndef SANITIZER_PLATFORM_INTERCEPTORS_H
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_linux.cc b/lib/sanitizer_common/sanitizer_platform_limits_linux.cc
index 92353e4..ed16f63 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_linux.cc
+++ b/lib/sanitizer_common/sanitizer_platform_limits_linux.cc
@@ -28,7 +28,7 @@
 // With old kernels (and even new kernels on powerpc) asm/stat.h uses types that
 // are not defined anywhere in userspace headers. Fake them. This seems to work
 // fine with newer headers, too.
-#include <asm/posix_types.h>
+#include <linux/posix_types.h>
 #if defined(__x86_64__) ||  defined(__mips__)
 #include <sys/stat.h>
 #else
@@ -62,7 +62,7 @@
 }  // namespace __sanitizer
 
 #if !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__aarch64__)\
-                            && !defined(__mips__)
+                            && !defined(__mips__) && !defined(__s390__)
 COMPILER_CHECK(struct___old_kernel_stat_sz == sizeof(struct __old_kernel_stat));
 #endif
 
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
index b642cba..137cd9a 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
+++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
@@ -311,23 +311,28 @@
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID && \
     (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
-      defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__))
+      defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
+      defined(__s390__))
 #if defined(__mips64) || defined(__powerpc64__) || defined(__arm__)
   unsigned struct_user_regs_struct_sz = sizeof(struct pt_regs);
   unsigned struct_user_fpregs_struct_sz = sizeof(elf_fpregset_t);
 #elif defined(__aarch64__)
   unsigned struct_user_regs_struct_sz = sizeof(struct user_pt_regs);
   unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpsimd_state);
+#elif defined(__s390__)
+  unsigned struct_user_regs_struct_sz = sizeof(struct _user_regs_struct);
+  unsigned struct_user_fpregs_struct_sz = sizeof(struct _user_fpregs_struct);
 #else
   unsigned struct_user_regs_struct_sz = sizeof(struct user_regs_struct);
   unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpregs_struct);
 #endif // __mips64 || __powerpc64__ || __aarch64__
 #if defined(__x86_64) || defined(__mips64) || defined(__powerpc64__) || \
-    defined(__aarch64__) || defined(__arm__)
+    defined(__aarch64__) || defined(__arm__) || defined(__s390__)
   unsigned struct_user_fpxregs_struct_sz = 0;
 #else
   unsigned struct_user_fpxregs_struct_sz = sizeof(struct user_fpxregs_struct);
 #endif // __x86_64 || __mips64 || __powerpc64__ || __aarch64__ || __arm__
+// || __s390__
 #ifdef __arm__
   unsigned struct_user_vfpregs_struct_sz = ARM_VFPREGS_SIZE;
 #else
@@ -1055,8 +1060,15 @@
 // Can't write checks for sa_handler and sa_sigaction due to them being
 // preprocessor macros.
 CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_mask);
+#ifndef __GLIBC_PREREQ
+#define __GLIBC_PREREQ(x, y) 0
+#endif
+#if !defined(__s390x__) || __GLIBC_PREREQ (2, 20)
+// On s390x glibc 2.19 and earlier sa_flags was unsigned long, and sa_resv
+// didn't exist.
 CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_flags);
-#if SANITIZER_LINUX
+#endif
+#if SANITIZER_LINUX && (!SANITIZER_ANDROID || !SANITIZER_MIPS32)
 CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_restorer);
 #endif
 
@@ -1127,9 +1139,6 @@
 CHECK_SIZE_AND_OFFSET(ipc_perm, gid);
 CHECK_SIZE_AND_OFFSET(ipc_perm, cuid);
 CHECK_SIZE_AND_OFFSET(ipc_perm, cgid);
-#ifndef __GLIBC_PREREQ
-#define __GLIBC_PREREQ(x, y) 0
-#endif
 #if !defined(__aarch64__) || !SANITIZER_LINUX || __GLIBC_PREREQ (2, 21)
 /* On aarch64 glibc 2.20 and earlier provided incorrect mode field.  */
 CHECK_SIZE_AND_OFFSET(ipc_perm, mode);
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/lib/sanitizer_common/sanitizer_platform_limits_posix.h
index 2978e7b..14bc750 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -77,12 +77,16 @@
   const unsigned struct_kernel_stat_sz = 144;
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__mips__)
-  #if SANITIZER_WORDSIZE == 64
-  const unsigned struct_kernel_stat_sz = 216;
-  #else
-  const unsigned struct_kernel_stat_sz = 144;
-  #endif
+  const unsigned struct_kernel_stat_sz =
+                 SANITIZER_ANDROID ? FIRST_32_SECOND_64(104, 128) :
+                                     FIRST_32_SECOND_64(144, 216);
   const unsigned struct_kernel_stat64_sz = 104;
+#elif defined(__s390__) && !defined(__s390x__)
+  const unsigned struct_kernel_stat_sz = 64;
+  const unsigned struct_kernel_stat64_sz = 104;
+#elif defined(__s390x__)
+  const unsigned struct_kernel_stat_sz = 144;
+  const unsigned struct_kernel_stat64_sz = 0;
 #endif
   struct __sanitizer_perf_event_attr {
     unsigned type;
@@ -103,7 +107,7 @@
 
 #if SANITIZER_LINUX || SANITIZER_FREEBSD
 
-#if defined(__powerpc64__)
+#if defined(__powerpc64__) || defined(__s390__)
   const unsigned struct___old_kernel_stat_sz = 0;
 #else
   const unsigned struct___old_kernel_stat_sz = 32;
@@ -190,7 +194,7 @@
     unsigned __seq;
     u64 __unused1;
     u64 __unused2;
-#elif defined(__mips__) || defined(__aarch64__)
+#elif defined(__mips__) || defined(__aarch64__) || defined(__s390x__)
     unsigned int mode;
     unsigned short __seq;
     unsigned short __pad1;
@@ -516,7 +520,11 @@
   };
 
 #if SANITIZER_ANDROID
+# if SANITIZER_MIPS
+  typedef unsigned long __sanitizer_sigset_t[16/sizeof(unsigned long)];
+# else
   typedef unsigned long __sanitizer_sigset_t;
+# endif
 #elif SANITIZER_MAC
   typedef unsigned __sanitizer_sigset_t;
 #elif SANITIZER_LINUX
@@ -542,6 +550,15 @@
     __sanitizer_sigset_t sa_mask;
     void (*sa_restorer)();
   };
+#elif SANITIZER_ANDROID && SANITIZER_MIPS32  // check this before WORDSIZE == 32
+  struct __sanitizer_sigaction {
+    unsigned sa_flags;
+    union {
+      void (*sigaction)(int sig, void *siginfo, void *uctx);
+      void (*handler)(int sig);
+    };
+    __sanitizer_sigset_t sa_mask;
+  };
 #elif SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 32)
   struct __sanitizer_sigaction {
     union {
@@ -565,7 +582,11 @@
     int sa_flags;
     __sanitizer_sigset_t sa_mask;
 #else
+#if defined(__s390x__)
+    int sa_resv;
+#else
     __sanitizer_sigset_t sa_mask;
+#endif
 #ifndef __mips__
     int sa_flags;
 #endif
@@ -576,6 +597,9 @@
 #if defined(__mips__) && (SANITIZER_WORDSIZE == 32)
     int sa_resv[1];
 #endif
+#if defined(__s390x__)
+    __sanitizer_sigset_t sa_mask;
+#endif
   };
 #endif // !SANITIZER_ANDROID
 
@@ -736,7 +760,8 @@
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID && \
   (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
-    defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__))
+    defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
+    defined(__s390__))
   extern unsigned struct_user_regs_struct_sz;
   extern unsigned struct_user_fpregs_struct_sz;
   extern unsigned struct_user_fpxregs_struct_sz;
diff --git a/lib/sanitizer_common/sanitizer_posix.cc b/lib/sanitizer_common/sanitizer_posix.cc
index 5ae6866..c70d5a4 100644
--- a/lib/sanitizer_common/sanitizer_posix.cc
+++ b/lib/sanitizer_common/sanitizer_posix.cc
@@ -89,7 +89,11 @@
 
 uptr GetMaxVirtualAddress() {
 #if SANITIZER_WORDSIZE == 64
-# if defined(__powerpc64__) || defined(__aarch64__)
+# if defined(__aarch64__) && SANITIZER_IOS && !SANITIZER_IOSSIM
+  // Ideally, we would derive the upper bound from MACH_VM_MAX_ADDRESS. The
+  // upper bound can change depending on the device.
+  return 0x200000000 - 1;
+# elif defined(__powerpc64__) || defined(__aarch64__)
   // On PowerPC64 we have two different address space layouts: 44- and 46-bit.
   // We somehow need to figure out which one we are using now and choose
   // one of 0x00000fffffffffffUL and 0x00003fffffffffffUL.
@@ -100,15 +104,21 @@
   return (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1;
 # elif defined(__mips64)
   return (1ULL << 40) - 1;  // 0x000000ffffffffffUL;
+# elif defined(__s390x__)
+  return (1ULL << 53) - 1;  // 0x001fffffffffffffUL;
 # else
   return (1ULL << 47) - 1;  // 0x00007fffffffffffUL;
 # endif
 #else  // SANITIZER_WORDSIZE == 32
+# if defined(__s390__)
+  return (1ULL << 31) - 1;  // 0x7fffffff;
+# else
   uptr res = (1ULL << 32) - 1;  // 0xffffffff;
   if (!common_flags()->full_address_space)
     res -= GetKernelAreaSize();
   CHECK_LT(reinterpret_cast<uptr>(&res), res);
   return res;
+# endif
 #endif  // SANITIZER_WORDSIZE
 }
 
@@ -135,6 +145,26 @@
   DecreaseTotalMmap(size);
 }
 
+// We want to map a chunk of address space aligned to 'alignment'.
+// We do it by maping a bit more and then unmaping redundant pieces.
+// We probably can do it with fewer syscalls in some OS-dependent way.
+void *MmapAlignedOrDie(uptr size, uptr alignment, const char *mem_type) {
+  CHECK(IsPowerOfTwo(size));
+  CHECK(IsPowerOfTwo(alignment));
+  uptr map_size = size + alignment;
+  uptr map_res = (uptr)MmapOrDie(map_size, mem_type);
+  uptr map_end = map_res + map_size;
+  uptr res = map_res;
+  if (res & (alignment - 1))  // Not aligned.
+    res = (map_res + alignment) & ~(alignment - 1);
+  uptr end = res + size;
+  if (res != map_res)
+    UnmapOrDie((void*)map_res, res - map_res);
+  if (end != map_end)
+    UnmapOrDie((void*)end, map_end - end);
+  return (void*)res;
+}
+
 void *MmapNoReserveOrDie(uptr size, const char *mem_type) {
   uptr PageSize = GetPageSizeCached();
   uptr p = internal_mmap(nullptr,
@@ -171,6 +201,10 @@
   return 0 == internal_mprotect((void*)addr, size, PROT_NONE);
 }
 
+bool MprotectReadOnly(uptr addr, uptr size) {
+  return 0 == internal_mprotect((void *)addr, size, PROT_READ);
+}
+
 fd_t OpenFile(const char *filename, FileAccessMode mode, error_t *errno_p) {
   int flags;
   switch (mode) {
@@ -315,10 +349,13 @@
 }
 
 SignalContext SignalContext::Create(void *siginfo, void *context) {
-  uptr addr = (uptr)((siginfo_t*)siginfo)->si_addr;
+  auto si = (siginfo_t *)siginfo;
+  uptr addr = (uptr)si->si_addr;
   uptr pc, sp, bp;
   GetPcSpBp(context, &pc, &sp, &bp);
-  return SignalContext(context, addr, pc, sp, bp);
+  WriteFlag write_flag = GetWriteFlag(context);
+  bool is_memory_access = si->si_signo == SIGSEGV;
+  return SignalContext(context, addr, pc, sp, bp, is_memory_access, write_flag);
 }
 
 } // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_posix.h b/lib/sanitizer_common/sanitizer_posix.h
index 3247b69..7f862cd 100644
--- a/lib/sanitizer_common/sanitizer_posix.h
+++ b/lib/sanitizer_common/sanitizer_posix.h
@@ -78,9 +78,15 @@
 
 int my_pthread_attr_getstack(void *attr, void **addr, uptr *size);
 
+// A routine named real_sigaction() must be implemented by each sanitizer in
+// order for internal_sigaction() to bypass interceptors.
 int internal_sigaction(int signum, const void *act, void *oldact);
 void internal_sigfillset(__sanitizer_sigset_t *set);
+void internal_sigemptyset(__sanitizer_sigset_t *set);
+bool internal_sigismember(__sanitizer_sigset_t *set, int signum);
 
+uptr internal_execve(const char *filename, char *const argv[],
+                     char *const envp[]);
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_POSIX_H
diff --git a/lib/sanitizer_common/sanitizer_posix_libcdep.cc b/lib/sanitizer_common/sanitizer_posix_libcdep.cc
index c158eed..f1e8b50 100644
--- a/lib/sanitizer_common/sanitizer_posix_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_posix_libcdep.cc
@@ -34,6 +34,7 @@
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/types.h>
+#include <sys/wait.h>
 #include <unistd.h>
 
 #if SANITIZER_FREEBSD
@@ -97,6 +98,10 @@
   return (stack_size == RLIM_INFINITY);
 }
 
+uptr GetStackSizeLimitInBytes() {
+  return (uptr)getlim(RLIMIT_STACK);
+}
+
 void SetStackSizeLimitInBytes(uptr limit) {
   setlim(RLIMIT_STACK, (rlim_t)limit);
   CHECK(!StackSizeIsUnlimited());
@@ -168,7 +173,7 @@
 typedef void (*sa_sigaction_t)(int, siginfo_t *, void *);
 static void MaybeInstallSigaction(int signum,
                                   SignalHandlerType handler) {
-  if (!IsDeadlySignal(signum))
+  if (!IsHandledDeadlySignal(signum))
     return;
   struct sigaction sigact;
   internal_memset(&sigact, 0, sizeof(sigact));
@@ -269,7 +274,7 @@
   return (void *)p;
 }
 
-void *MmapNoAccess(uptr fixed_addr, uptr size, const char *name) {
+void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name) {
   int fd = name ? GetNamedMappingFd(name, size) : -1;
   unsigned flags = MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE;
   if (fd == -1) flags |= MAP_ANON;
@@ -278,6 +283,11 @@
                                0);
 }
 
+void *MmapNoAccess(uptr size) {
+  unsigned flags = MAP_PRIVATE | MAP_ANON | MAP_NORESERVE;
+  return (void *)internal_mmap(nullptr, size, PROT_NONE, flags, -1, 0);
+}
+
 // This function is defined elsewhere if we intercepted pthread_attr_getstack.
 extern "C" {
 SANITIZER_WEAK_ATTRIBUTE int
@@ -320,6 +330,79 @@
 }
 #endif // !SANITIZER_GO
 
+pid_t StartSubprocess(const char *program, const char *const argv[],
+                      fd_t stdin_fd, fd_t stdout_fd, fd_t stderr_fd) {
+  auto file_closer = at_scope_exit([&] {
+    if (stdin_fd != kInvalidFd) {
+      internal_close(stdin_fd);
+    }
+    if (stdout_fd != kInvalidFd) {
+      internal_close(stdout_fd);
+    }
+    if (stderr_fd != kInvalidFd) {
+      internal_close(stderr_fd);
+    }
+  });
+
+  int pid = internal_fork();
+
+  if (pid < 0) {
+    int rverrno;
+    if (internal_iserror(pid, &rverrno)) {
+      Report("WARNING: failed to fork (errno %d)\n", rverrno);
+    }
+    return pid;
+  }
+
+  if (pid == 0) {
+    // Child subprocess
+    if (stdin_fd != kInvalidFd) {
+      internal_close(STDIN_FILENO);
+      internal_dup2(stdin_fd, STDIN_FILENO);
+      internal_close(stdin_fd);
+    }
+    if (stdout_fd != kInvalidFd) {
+      internal_close(STDOUT_FILENO);
+      internal_dup2(stdout_fd, STDOUT_FILENO);
+      internal_close(stdout_fd);
+    }
+    if (stderr_fd != kInvalidFd) {
+      internal_close(STDERR_FILENO);
+      internal_dup2(stderr_fd, STDERR_FILENO);
+      internal_close(stderr_fd);
+    }
+
+    for (int fd = sysconf(_SC_OPEN_MAX); fd > 2; fd--) internal_close(fd);
+
+    execv(program, const_cast<char **>(&argv[0]));
+    internal__exit(1);
+  }
+
+  return pid;
+}
+
+bool IsProcessRunning(pid_t pid) {
+  int process_status;
+  uptr waitpid_status = internal_waitpid(pid, &process_status, WNOHANG);
+  int local_errno;
+  if (internal_iserror(waitpid_status, &local_errno)) {
+    VReport(1, "Waiting on the process failed (errno %d).\n", local_errno);
+    return false;
+  }
+  return waitpid_status == 0;
+}
+
+int WaitForProcess(pid_t pid) {
+  int process_status;
+  uptr waitpid_status = internal_waitpid(pid, &process_status, 0);
+  int local_errno;
+  if (internal_iserror(waitpid_status, &local_errno)) {
+    VReport(1, "Waiting on the process failed (errno %d).\n", local_errno);
+    return -1;
+  }
+  return process_status;
+}
+
 } // namespace __sanitizer
 
 #endif // SANITIZER_POSIX
diff --git a/lib/sanitizer_common/sanitizer_procmaps.h b/lib/sanitizer_common/sanitizer_procmaps.h
index 94e3871..1fe59ab 100644
--- a/lib/sanitizer_common/sanitizer_procmaps.h
+++ b/lib/sanitizer_common/sanitizer_procmaps.h
@@ -43,9 +43,8 @@
   // instead of aborting.
   static void CacheMemoryMappings();
 
-  // Stores the list of mapped objects into an array.
-  uptr DumpListOfModules(LoadedModule *modules, uptr max_modules,
-                         string_predicate_t filter);
+  // Adds all mapped objects into a vector.
+  void DumpListOfModules(InternalMmapVector<LoadedModule> *modules);
 
   // Memory protection masks.
   static const uptr kProtectionRead = 1;
diff --git a/lib/sanitizer_common/sanitizer_procmaps_common.cc b/lib/sanitizer_common/sanitizer_procmaps_common.cc
index d43432c..fac3fbd 100644
--- a/lib/sanitizer_common/sanitizer_procmaps_common.cc
+++ b/lib/sanitizer_common/sanitizer_procmaps_common.cc
@@ -116,22 +116,17 @@
   }
 }
 
-uptr MemoryMappingLayout::DumpListOfModules(LoadedModule *modules,
-                                            uptr max_modules,
-                                            string_predicate_t filter) {
+void MemoryMappingLayout::DumpListOfModules(
+    InternalMmapVector<LoadedModule> *modules) {
   Reset();
   uptr cur_beg, cur_end, cur_offset, prot;
   InternalScopedString module_name(kMaxPathLength);
-  uptr n_modules = 0;
-  for (uptr i = 0; n_modules < max_modules &&
-                       Next(&cur_beg, &cur_end, &cur_offset, module_name.data(),
-                            module_name.size(), &prot);
+  for (uptr i = 0; Next(&cur_beg, &cur_end, &cur_offset, module_name.data(),
+                        module_name.size(), &prot);
        i++) {
     const char *cur_name = module_name.data();
     if (cur_name[0] == '\0')
       continue;
-    if (filter && !filter(cur_name))
-      continue;
     // Don't subtract 'cur_beg' from the first entry:
     // * If a binary is compiled w/o -pie, then the first entry in
     //   process maps is likely the binary itself (all dynamic libs
@@ -144,12 +139,11 @@
     //   shadow memory of the tool), so the module can't be the
     //   first entry.
     uptr base_address = (i ? cur_beg : 0) - cur_offset;
-    LoadedModule *cur_module = &modules[n_modules];
-    cur_module->set(cur_name, base_address);
-    cur_module->addAddressRange(cur_beg, cur_end, prot & kProtectionExecute);
-    n_modules++;
+    LoadedModule cur_module;
+    cur_module.set(cur_name, base_address);
+    cur_module.addAddressRange(cur_beg, cur_end, prot & kProtectionExecute);
+    modules->push_back(cur_module);
   }
-  return n_modules;
 }
 
 void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) {
diff --git a/lib/sanitizer_common/sanitizer_procmaps_mac.cc b/lib/sanitizer_common/sanitizer_procmaps_mac.cc
index d10881e..417cc90 100644
--- a/lib/sanitizer_common/sanitizer_procmaps_mac.cc
+++ b/lib/sanitizer_common/sanitizer_procmaps_mac.cc
@@ -155,34 +155,28 @@
   return false;
 }
 
-uptr MemoryMappingLayout::DumpListOfModules(LoadedModule *modules,
-                                            uptr max_modules,
-                                            string_predicate_t filter) {
+void MemoryMappingLayout::DumpListOfModules(
+    InternalMmapVector<LoadedModule> *modules) {
   Reset();
   uptr cur_beg, cur_end, prot;
   InternalScopedString module_name(kMaxPathLength);
-  uptr n_modules = 0;
-  for (uptr i = 0; n_modules < max_modules &&
-                       Next(&cur_beg, &cur_end, 0, module_name.data(),
-                            module_name.size(), &prot);
+  for (uptr i = 0; Next(&cur_beg, &cur_end, 0, module_name.data(),
+                        module_name.size(), &prot);
        i++) {
     const char *cur_name = module_name.data();
     if (cur_name[0] == '\0')
       continue;
-    if (filter && !filter(cur_name))
-      continue;
     LoadedModule *cur_module = nullptr;
-    if (n_modules > 0 &&
-        0 == internal_strcmp(cur_name, modules[n_modules - 1].full_name())) {
-      cur_module = &modules[n_modules - 1];
+    if (!modules->empty() &&
+        0 == internal_strcmp(cur_name, modules->back().full_name())) {
+      cur_module = &modules->back();
     } else {
-      cur_module = &modules[n_modules];
+      modules->push_back(LoadedModule());
+      cur_module = &modules->back();
       cur_module->set(cur_name, cur_beg);
-      n_modules++;
     }
     cur_module->addAddressRange(cur_beg, cur_end, prot & kProtectionExecute);
   }
-  return n_modules;
 }
 
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_stacktrace.cc b/lib/sanitizer_common/sanitizer_stacktrace.cc
index 78282ba..7ad1f1f 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace.cc
+++ b/lib/sanitizer_common/sanitizer_stacktrace.cc
@@ -40,11 +40,6 @@
   top_frame_bp = 0;
 }
 
-// Check if given pointer points into allocated stack area.
-static inline bool IsValidFrame(uptr frame, uptr stack_top, uptr stack_bottom) {
-  return frame > stack_bottom && frame < stack_top - 2 * sizeof (uhwptr);
-}
-
 // In GCC on ARM bp points to saved lr, not fp, so we should check the next
 // cell in stack to be a saved frame pointer. GetCanonicFrame returns the
 // pointer to saved frame pointer in any case.
@@ -93,9 +88,14 @@
         !IsAligned((uptr)caller_frame, sizeof(uhwptr)))
       break;
     uhwptr pc1 = caller_frame[2];
+#elif defined(__s390__)
+    uhwptr pc1 = frame[14];
 #else
     uhwptr pc1 = frame[1];
 #endif
+    // Let's assume that any pointer in the 0th page (i.e. <0x1000 on i386 and
+    // x86_64) is invalid and stop unwinding here.  If we're adding support for
+    // a platform where this isn't true, we need to reconsider this check.
     if (pc1 < kPageSize)
       break;
     if (pc1 != pc) {
@@ -121,7 +121,7 @@
 uptr BufferedStackTrace::LocatePcInTrace(uptr pc) {
   // Use threshold to find PC in stack trace, as PC we want to unwind from may
   // slightly differ from return address in the actual unwinded stack trace.
-  const int kPcThreshold = 320;
+  const int kPcThreshold = 350;
   for (uptr i = 0; i < size; ++i) {
     if (MatchPc(pc, trace[i], kPcThreshold))
       return i;
diff --git a/lib/sanitizer_common/sanitizer_stacktrace.h b/lib/sanitizer_common/sanitizer_stacktrace.h
index 969cedb..90142df 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace.h
+++ b/lib/sanitizer_common/sanitizer_stacktrace.h
@@ -110,6 +110,11 @@
   void operator=(const BufferedStackTrace &);
 };
 
+// Check if given pointer points into allocated stack area.
+static inline bool IsValidFrame(uptr frame, uptr stack_top, uptr stack_bottom) {
+  return frame > stack_bottom && frame < stack_top - 2 * sizeof (uhwptr);
+}
+
 }  // namespace __sanitizer
 
 // Use this macro if you want to print stack trace with the caller
diff --git a/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cc b/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cc
index f66fa79..59ca927 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cc
@@ -25,6 +25,8 @@
     return;
   }
   InternalScopedString frame_desc(GetPageSizeCached() * 2);
+  InternalScopedString dedup_token(GetPageSizeCached());
+  int dedup_frames = common_flags()->dedup_token_length;
   uptr frame_num = 0;
   for (uptr i = 0; i < size && trace[i]; i++) {
     // PCs in stack traces are actually the return addresses, that is,
@@ -38,11 +40,18 @@
                   cur->info, common_flags()->symbolize_vs_style,
                   common_flags()->strip_path_prefix);
       Printf("%s\n", frame_desc.data());
+      if (dedup_frames-- > 0) {
+        if (dedup_token.length())
+          dedup_token.append("--");
+        dedup_token.append(cur->info.function);
+      }
     }
     frames->ClearAll();
   }
   // Always print a trailing empty line after stack trace.
   Printf("\n");
+  if (dedup_token.length())
+    Printf("DEDUP_TOKEN: %s\n", dedup_token.data());
 }
 
 void BufferedStackTrace::Unwind(u32 max_depth, uptr pc, uptr bp, void *context,
diff --git a/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
index 2376ee5..1f8861f 100644
--- a/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
@@ -15,7 +15,8 @@
 #include "sanitizer_platform.h"
 
 #if SANITIZER_LINUX && (defined(__x86_64__) || defined(__mips__) || \
-                        defined(__aarch64__) || defined(__powerpc64__))
+                        defined(__aarch64__) || defined(__powerpc64__) || \
+                        defined(__s390__))
 
 #include "sanitizer_stoptheworld.h"
 
@@ -38,6 +39,9 @@
 #  include <asm/ptrace.h>
 # endif
 # include <sys/user.h>  // for user_regs_struct
+# if SANITIZER_ANDROID && SANITIZER_MIPS
+#   include <asm/reg.h>  // for mips SP register in sys/user.h
+# endif
 #endif
 #include <sys/wait.h> // for signal-related stuff
 
@@ -229,8 +233,8 @@
 // Signal handler to wake up suspended threads when the tracer thread dies.
 static void TracerThreadSignalHandler(int signum, void *siginfo, void *uctx) {
   SignalContext ctx = SignalContext::Create(siginfo, uctx);
-  VPrintf(1, "Tracer caught signal %d: addr=0x%zx pc=0x%zx sp=0x%zx\n",
-      signum, ctx.addr, ctx.pc, ctx.sp);
+  Printf("Tracer caught signal %d: addr=0x%zx pc=0x%zx sp=0x%zx\n", signum,
+         ctx.addr, ctx.pc, ctx.sp);
   ThreadSuspender *inst = thread_suspender_instance;
   if (inst) {
     if (signum == SIGABRT)
@@ -467,13 +471,22 @@
 
 #elif defined(__mips__)
 typedef struct user regs_struct;
-#define REG_SP regs[EF_REG29]
+# if SANITIZER_ANDROID
+#  define REG_SP regs[EF_R29]
+# else
+#  define REG_SP regs[EF_REG29]
+# endif
 
 #elif defined(__aarch64__)
 typedef struct user_pt_regs regs_struct;
 #define REG_SP sp
 #define ARCH_IOVEC_FOR_GETREGSET
 
+#elif defined(__s390__)
+typedef _user_regs_struct regs_struct;
+#define REG_SP gprs[15]
+#define ARCH_IOVEC_FOR_GETREGSET
+
 #else
 #error "Unsupported architecture"
 #endif // SANITIZER_ANDROID && defined(__arm__)
@@ -513,3 +526,4 @@
 
 #endif  // SANITIZER_LINUX && (defined(__x86_64__) || defined(__mips__)
         // || defined(__aarch64__) || defined(__powerpc64__)
+        // || defined(__s390__)
diff --git a/lib/sanitizer_common/sanitizer_symbolizer.cc b/lib/sanitizer_common/sanitizer_symbolizer.cc
index 8b2496a..534e55f 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer.cc
@@ -60,6 +60,7 @@
 
 void DataInfo::Clear() {
   InternalFree(module);
+  InternalFree(file);
   InternalFree(name);
   internal_memset(this, 0, sizeof(DataInfo));
 }
@@ -96,7 +97,7 @@
 }
 
 Symbolizer::Symbolizer(IntrusiveList<SymbolizerTool> tools)
-    : module_names_(&mu_), n_modules_(0), modules_fresh_(false), tools_(tools),
+    : module_names_(&mu_), modules_(), modules_fresh_(false), tools_(tools),
       start_hook_(0), end_hook_(0) {}
 
 Symbolizer::SymbolizerScope::SymbolizerScope(const Symbolizer *sym)
diff --git a/lib/sanitizer_common/sanitizer_symbolizer.h b/lib/sanitizer_common/sanitizer_symbolizer.h
index 8a92964..572f1dd 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer.h
+++ b/lib/sanitizer_common/sanitizer_symbolizer.h
@@ -65,6 +65,8 @@
   // (de)allocated using sanitizer internal allocator.
   char *module;
   uptr module_offset;
+  char *file;
+  uptr line;
   char *name;
   uptr start;
   uptr size;
@@ -80,6 +82,7 @@
   /// Initialize and return platform-specific implementation of symbolizer
   /// (if it wasn't already initialized).
   static Symbolizer *GetOrInit();
+  static void LateInitialize();
   // Returns a list of symbolized frames for a given address (containing
   // all inlined functions, if necessary).
   SymbolizedStack *SymbolizePC(uptr address);
@@ -113,7 +116,7 @@
   void AddHooks(StartSymbolizationHook start_hook,
                 EndSymbolizationHook end_hook);
 
-  LoadedModule *FindModuleForAddress(uptr address);
+  const LoadedModule *FindModuleForAddress(uptr address);
 
  private:
   // GetModuleNameAndOffsetForPC has to return a string to the caller.
@@ -141,8 +144,7 @@
 
   bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
                                          uptr *module_offset);
-  LoadedModule modules_[kMaxNumberOfModules];
-  uptr n_modules_;
+  ListOfModules modules_;
   // If stale, need to reload the modules before looking up addresses.
   bool modules_fresh_;
 
@@ -175,6 +177,10 @@
   };
 };
 
+#ifdef SANITIZER_WINDOWS
+void InitializeDbgHelpIfNeeded();
+#endif
+
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_SYMBOLIZER_H
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc b/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc
index 5a770ce..36b4fa9 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc
@@ -135,27 +135,23 @@
 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
                                                    const char **module_name,
                                                    uptr *module_offset) {
-  LoadedModule *module = FindModuleForAddress(address);
-  if (module == 0)
+  const LoadedModule *module = FindModuleForAddress(address);
+  if (module == nullptr)
     return false;
   *module_name = module->full_name();
   *module_offset = address - module->base_address();
   return true;
 }
 
-LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
+const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
   bool modules_were_reloaded = false;
   if (!modules_fresh_) {
-    for (uptr i = 0; i < n_modules_; i++)
-      modules_[i].clear();
-    n_modules_ =
-        GetListOfModules(modules_, kMaxNumberOfModules, /* filter */ nullptr);
-    CHECK_GT(n_modules_, 0);
-    CHECK_LT(n_modules_, kMaxNumberOfModules);
+    modules_.init();
+    RAW_CHECK(modules_.size() > 0);
     modules_fresh_ = true;
     modules_were_reloaded = true;
   }
-  for (uptr i = 0; i < n_modules_; i++) {
+  for (uptr i = 0; i < modules_.size(); i++) {
     if (modules_[i].containsAddress(address)) {
       return &modules_[i];
     }
@@ -209,10 +205,18 @@
     const char* const kSymbolizerArch = "--default-arch=x86_64";
 #elif defined(__i386__)
     const char* const kSymbolizerArch = "--default-arch=i386";
-#elif defined(__powerpc64__) && defined(__BIG_ENDIAN__)
+#elif defined(__aarch64__)
+    const char* const kSymbolizerArch = "--default-arch=arm64";
+#elif defined(__arm__)
+    const char* const kSymbolizerArch = "--default-arch=arm";
+#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
     const char* const kSymbolizerArch = "--default-arch=powerpc64";
-#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)
+#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     const char* const kSymbolizerArch = "--default-arch=powerpc64le";
+#elif defined(__s390x__)
+    const char* const kSymbolizerArch = "--default-arch=s390x";
+#elif defined(__s390__)
+    const char* const kSymbolizerArch = "--default-arch=s390";
 #else
     const char* const kSymbolizerArch = "--default-arch=unknown";
 #endif
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_mac.cc b/lib/sanitizer_common/sanitizer_symbolizer_mac.cc
index 7d443d7..d591abc 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_mac.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_mac.cc
@@ -79,23 +79,6 @@
   char pid_str_[16];
 };
 
-static const char *kAtosErrorMessages[] = {
-  "atos cannot examine process",
-  "unable to get permission to examine process",
-  "An admin user name and password is required",
-  "could not load inserted library",
-  "architecture mismatch between analysis process",
-};
-
-static bool IsAtosErrorMessage(const char *str) {
-  for (uptr i = 0; i < ARRAY_SIZE(kAtosErrorMessages); i++) {
-    if (internal_strstr(str, kAtosErrorMessages[i])) {
-      return true;
-    }
-  }
-  return false;
-}
-
 static bool ParseCommandOutput(const char *str, uptr addr, char **out_name,
                                char **out_module, char **out_file, uptr *line,
                                uptr *start_address) {
@@ -112,12 +95,6 @@
   //   0xdeadbeef (in library.dylib)
   //   0xdeadbeef
 
-  if (IsAtosErrorMessage(trim)) {
-    Report("atos returned an error: %s\n", trim);
-    InternalFree(trim);
-    return false;
-  }
-
   const char *rest = trim;
   char *symbol_name;
   rest = ExtractTokenUpToDelimiter(rest, " (in ", &symbol_name);
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc b/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc
index 40197e2..7028da6 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc
@@ -71,19 +71,19 @@
                                    size_t *outputBufferSize, uint32_t flags);
 static swift_demangle_ft swift_demangle_f;
 
-// This must not happen lazily, because dlsym uses thread-local storage, which
-// is not a good thing to do during symbolication.
+// This must not happen lazily at symbolication time, because dlsym uses
+// malloc and thread-local storage, which is not a good thing to do during
+// symbolication.
 static void InitializeSwiftDemangler() {
   swift_demangle_f = (swift_demangle_ft)dlsym(RTLD_DEFAULT, "swift_demangle");
 }
 
-// Attempts to demangle a Swift name. The demangler will return nullptr
-/// if a non-Swift name is passed in.
+// Attempts to demangle a Swift name. The demangler will return nullptr if a
+// non-Swift name is passed in.
 const char *DemangleSwift(const char *name) {
   if (!name) return nullptr;
 
-  // Not to call dlsym every time we demangle, check if we are dealing with
-  // Swift mangled name first.
+  // Check if we are dealing with a Swift mangled name first.
   if (name[0] != '_' || name[1] != 'T') {
     return nullptr;
   }
@@ -114,6 +114,13 @@
   if (use_forkpty_) {
 #if SANITIZER_MAC
     fd_t fd = kInvalidFd;
+
+    // forkpty redirects stdout and stderr into a single stream, so we would
+    // receive error messages as standard replies. To avoid that, let's dup
+    // stderr and restore it in the child.
+    int saved_stderr = dup(STDERR_FILENO);
+    CHECK_GE(saved_stderr, 0);
+
     // Use forkpty to disable buffering in the new terminal.
     pid = internal_forkpty(&fd);
     if (pid == -1) {
@@ -123,6 +130,11 @@
       return false;
     } else if (pid == 0) {
       // Child subprocess.
+
+      // Restore stderr.
+      CHECK_GE(dup2(saved_stderr, STDERR_FILENO), 0);
+      close(saved_stderr);
+
       const char *argv[kArgVMax];
       GetArgV(path_, argv);
       execv(path_, const_cast<char **>(&argv[0]));
@@ -132,6 +144,8 @@
     // Continue execution in parent process.
     input_fd_ = output_fd_ = fd;
 
+    close(saved_stderr);
+
     // Disable echo in the new terminal, disable CR.
     struct termios termflags;
     tcgetattr(fd, &termflags);
@@ -177,47 +191,23 @@
     CHECK(infd);
     CHECK(outfd);
 
-    // Real fork() may call user callbacks registered with pthread_atfork().
-    pid = internal_fork();
-    if (pid == -1) {
-      // Fork() failed.
+    const char *argv[kArgVMax];
+    GetArgV(path_, argv);
+    pid = StartSubprocess(path_, argv, /* stdin */ outfd[0],
+                          /* stdout */ infd[1]);
+    if (pid < 0) {
       internal_close(infd[0]);
-      internal_close(infd[1]);
-      internal_close(outfd[0]);
       internal_close(outfd[1]);
-      Report("WARNING: failed to fork external symbolizer "
-             " (errno: %d)\n", errno);
       return false;
-    } else if (pid == 0) {
-      // Child subprocess.
-      internal_close(STDOUT_FILENO);
-      internal_close(STDIN_FILENO);
-      internal_dup2(outfd[0], STDIN_FILENO);
-      internal_dup2(infd[1], STDOUT_FILENO);
-      internal_close(outfd[0]);
-      internal_close(outfd[1]);
-      internal_close(infd[0]);
-      internal_close(infd[1]);
-      for (int fd = sysconf(_SC_OPEN_MAX); fd > 2; fd--)
-        internal_close(fd);
-      const char *argv[kArgVMax];
-      GetArgV(path_, argv);
-      execv(path_, const_cast<char **>(&argv[0]));
-      internal__exit(1);
     }
 
-    // Continue execution in parent process.
-    internal_close(outfd[0]);
-    internal_close(infd[1]);
     input_fd_ = infd[0];
     output_fd_ = outfd[1];
   }
 
   // Check that symbolizer subprocess started successfully.
-  int pid_status;
   SleepForMillis(kSymbolizerStartupTimeMillis);
-  int exited_pid = waitpid(pid, &pid_status, WNOHANG);
-  if (exited_pid != 0) {
+  if (!IsProcessRunning(pid)) {
     // Either waitpid failed, or child has already exited.
     Report("WARNING: external symbolizer didn't start up correctly!\n");
     return false;
@@ -495,14 +485,17 @@
 }
 
 Symbolizer *Symbolizer::PlatformInit() {
-  InitializeSwiftDemangler();
-
   IntrusiveList<SymbolizerTool> list;
   list.clear();
   ChooseSymbolizerTools(&list, &symbolizer_allocator_);
   return new(symbolizer_allocator_) Symbolizer(list);
 }
 
+void Symbolizer::LateInitialize() {
+  Symbolizer::GetOrInit();
+  InitializeSwiftDemangler();
+}
+
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_POSIX
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_win.cc b/lib/sanitizer_common/sanitizer_symbolizer_win.cc
index b1dceeb..3cb7e48 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_win.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_win.cc
@@ -42,6 +42,8 @@
   // FIXME: We don't call SymCleanup() on exit yet - should we?
 }
 
+}  // namespace
+
 // Initializes DbgHelp library, if it's not yet initialized. Calls to this
 // function should be synchronized with respect to other calls to DbgHelp API
 // (e.g. from WinSymbolizerTool).
@@ -97,8 +99,6 @@
   }
 }
 
-}  // namespace
-
 bool WinSymbolizerTool::SymbolizePC(uptr addr, SymbolizedStack *frame) {
   InitializeDbgHelpIfNeeded();
 
@@ -279,6 +279,10 @@
   return new(symbolizer_allocator_) Symbolizer(list);
 }
 
+void Symbolizer::LateInitialize() {
+  Symbolizer::GetOrInit();
+}
+
 }  // namespace __sanitizer
 
 #endif  // _WIN32
diff --git a/lib/sanitizer_common/sanitizer_termination.cc b/lib/sanitizer_common/sanitizer_termination.cc
new file mode 100644
index 0000000..8243fc0
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_termination.cc
@@ -0,0 +1,86 @@
+//===-- sanitizer_termination.cc --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file contains the Sanitizer termination functions CheckFailed and Die,
+/// and the callback functionalities associated with them.
+///
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common.h"
+#include "sanitizer_libc.h"
+
+namespace __sanitizer {
+
+static const int kMaxNumOfInternalDieCallbacks = 5;
+static DieCallbackType InternalDieCallbacks[kMaxNumOfInternalDieCallbacks];
+
+bool AddDieCallback(DieCallbackType callback) {
+  for (int i = 0; i < kMaxNumOfInternalDieCallbacks; i++) {
+    if (InternalDieCallbacks[i] == nullptr) {
+      InternalDieCallbacks[i] = callback;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool RemoveDieCallback(DieCallbackType callback) {
+  for (int i = 0; i < kMaxNumOfInternalDieCallbacks; i++) {
+    if (InternalDieCallbacks[i] == callback) {
+      internal_memmove(&InternalDieCallbacks[i], &InternalDieCallbacks[i + 1],
+                       sizeof(InternalDieCallbacks[0]) *
+                           (kMaxNumOfInternalDieCallbacks - i - 1));
+      InternalDieCallbacks[kMaxNumOfInternalDieCallbacks - 1] = nullptr;
+      return true;
+    }
+  }
+  return false;
+}
+
+static DieCallbackType UserDieCallback;
+void SetUserDieCallback(DieCallbackType callback) {
+  UserDieCallback = callback;
+}
+
+void NORETURN Die() {
+  if (UserDieCallback)
+    UserDieCallback();
+  for (int i = kMaxNumOfInternalDieCallbacks - 1; i >= 0; i--) {
+    if (InternalDieCallbacks[i])
+      InternalDieCallbacks[i]();
+  }
+  if (common_flags()->abort_on_error)
+    Abort();
+  internal__exit(common_flags()->exitcode);
+}
+
+static CheckFailedCallbackType CheckFailedCallback;
+void SetCheckFailedCallback(CheckFailedCallbackType callback) {
+  CheckFailedCallback = callback;
+}
+
+const int kSecondsToSleepWhenRecursiveCheckFailed = 2;
+
+void NORETURN CheckFailed(const char *file, int line, const char *cond,
+                          u64 v1, u64 v2) {
+  static atomic_uint32_t num_calls;
+  if (atomic_fetch_add(&num_calls, 1, memory_order_relaxed) > 10) {
+    SleepForSeconds(kSecondsToSleepWhenRecursiveCheckFailed);
+    Trap();
+  }
+
+  if (CheckFailedCallback) {
+    CheckFailedCallback(file, line, cond, v1, v2);
+  }
+  Report("Sanitizer CHECK failed: %s:%d %s (%lld, %lld)\n", file, line, cond,
+                                                            v1, v2);
+  Die();
+}
+
+} // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_tls_get_addr.cc b/lib/sanitizer_common/sanitizer_tls_get_addr.cc
index 213aced..77c1947 100644
--- a/lib/sanitizer_common/sanitizer_tls_get_addr.cc
+++ b/lib/sanitizer_common/sanitizer_tls_get_addr.cc
@@ -78,7 +78,7 @@
   DTLS_Deallocate(dtls.dtv, s);
 }
 
-#if defined(__powerpc64__)
+#if defined(__powerpc64__) || defined(__mips__)
 // This is glibc's TLS_DTV_OFFSET:
 // "Dynamic thread vector pointers point 0x8000 past the start of each
 //  TLS block."
diff --git a/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cc
index 92fe106..5943125 100644
--- a/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cc
@@ -48,6 +48,11 @@
 
 #if SANITIZER_ANDROID
 void SanitizerInitializeUnwinder() {
+  if (AndroidGetApiLevel() >= ANDROID_LOLLIPOP_MR1) return;
+
+  // Pre-lollipop Android can not unwind through signal handler frames with
+  // libgcc unwinder, but it has a libcorkscrew.so library with the necessary
+  // workarounds.
   void *p = dlopen("libcorkscrew.so", RTLD_LAZY);
   if (!p) {
     VReport(1,
@@ -104,6 +109,9 @@
   CHECK_LT(arg->stack->size, arg->max_depth);
   uptr pc = Unwind_GetIP(ctx);
   const uptr kPageSize = GetPageSizeCached();
+  // Let's assume that any pointer in the 0th page (i.e. <0x1000 on i386 and
+  // x86_64) is invalid and stop unwinding here.  If we're adding support for
+  // a platform where this isn't true, we need to reconsider this check.
   if (pc < kPageSize) return UNWIND_STOP;
   arg->stack->trace_buffer[arg->stack->size++] = pc;
   if (arg->stack->size == arg->max_depth) return UNWIND_STOP;
diff --git a/lib/sanitizer_common/sanitizer_win.cc b/lib/sanitizer_common/sanitizer_win.cc
index b06b81b..cdb2948 100644
--- a/lib/sanitizer_common/sanitizer_win.cc
+++ b/lib/sanitizer_common/sanitizer_win.cc
@@ -27,7 +27,9 @@
 #include "sanitizer_libc.h"
 #include "sanitizer_mutex.h"
 #include "sanitizer_placement_new.h"
+#include "sanitizer_procmaps.h"
 #include "sanitizer_stacktrace.h"
+#include "sanitizer_symbolizer.h"
 
 namespace __sanitizer {
 
@@ -35,13 +37,15 @@
 
 // --------------------- sanitizer_common.h
 uptr GetPageSize() {
-  // FIXME: there is an API for getting the system page size (GetSystemInfo or
-  // GetNativeSystemInfo), but if we use it here we get test failures elsewhere.
-  return 1U << 14;
+  SYSTEM_INFO si;
+  GetSystemInfo(&si);
+  return si.dwPageSize;
 }
 
 uptr GetMmapGranularity() {
-  return 1U << 16;  // FIXME: is this configurable?
+  SYSTEM_INFO si;
+  GetSystemInfo(&si);
+  return si.dwAllocationGranularity;
 }
 
 uptr GetMaxVirtualAddress() {
@@ -95,20 +99,90 @@
   if (!size || !addr)
     return;
 
-  if (VirtualFree(addr, size, MEM_DECOMMIT) == 0) {
-    Report("ERROR: %s failed to "
-           "deallocate 0x%zx (%zd) bytes at address %p (error code: %d)\n",
-           SanitizerToolName, size, size, addr, GetLastError());
-    CHECK("unable to unmap" && 0);
+  MEMORY_BASIC_INFORMATION mbi;
+  CHECK(VirtualQuery(addr, &mbi, sizeof(mbi)));
+
+  // MEM_RELEASE can only be used to unmap whole regions previously mapped with
+  // VirtualAlloc. So we first try MEM_RELEASE since it is better, and if that
+  // fails try MEM_DECOMMIT.
+  if (VirtualFree(addr, 0, MEM_RELEASE) == 0) {
+    if (VirtualFree(addr, size, MEM_DECOMMIT) == 0) {
+      Report("ERROR: %s failed to "
+             "deallocate 0x%zx (%zd) bytes at address %p (error code: %d)\n",
+             SanitizerToolName, size, size, addr, GetLastError());
+      CHECK("unable to unmap" && 0);
+    }
   }
 }
 
+// We want to map a chunk of address space aligned to 'alignment'.
+void *MmapAlignedOrDie(uptr size, uptr alignment, const char *mem_type) {
+  CHECK(IsPowerOfTwo(size));
+  CHECK(IsPowerOfTwo(alignment));
+
+  // Windows will align our allocations to at least 64K.
+  alignment = Max(alignment, GetMmapGranularity());
+
+  uptr mapped_addr =
+      (uptr)VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+  if (!mapped_addr)
+    ReportMmapFailureAndDie(size, mem_type, "allocate aligned", GetLastError());
+
+  // If we got it right on the first try, return. Otherwise, unmap it and go to
+  // the slow path.
+  if (IsAligned(mapped_addr, alignment))
+    return (void*)mapped_addr;
+  if (VirtualFree((void *)mapped_addr, 0, MEM_RELEASE) == 0)
+    ReportMmapFailureAndDie(size, mem_type, "deallocate", GetLastError());
+
+  // If we didn't get an aligned address, overallocate, find an aligned address,
+  // unmap, and try to allocate at that aligned address.
+  int retries = 0;
+  const int kMaxRetries = 10;
+  for (; retries < kMaxRetries &&
+         (mapped_addr == 0 || !IsAligned(mapped_addr, alignment));
+       retries++) {
+    // Overallocate size + alignment bytes.
+    mapped_addr =
+        (uptr)VirtualAlloc(0, size + alignment, MEM_RESERVE, PAGE_NOACCESS);
+    if (!mapped_addr)
+      ReportMmapFailureAndDie(size, mem_type, "allocate aligned",
+                              GetLastError());
+
+    // Find the aligned address.
+    uptr aligned_addr = RoundUpTo(mapped_addr, alignment);
+
+    // Free the overallocation.
+    if (VirtualFree((void *)mapped_addr, 0, MEM_RELEASE) == 0)
+      ReportMmapFailureAndDie(size, mem_type, "deallocate", GetLastError());
+
+    // Attempt to allocate exactly the number of bytes we need at the aligned
+    // address. This may fail for a number of reasons, in which case we continue
+    // the loop.
+    mapped_addr = (uptr)VirtualAlloc((void *)aligned_addr, size,
+                                     MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+  }
+
+  // Fail if we can't make this work quickly.
+  if (retries == kMaxRetries && mapped_addr == 0)
+    ReportMmapFailureAndDie(size, mem_type, "allocate aligned", GetLastError());
+
+  return (void *)mapped_addr;
+}
+
 void *MmapFixedNoReserve(uptr fixed_addr, uptr size, const char *name) {
   // FIXME: is this really "NoReserve"? On Win32 this does not matter much,
   // but on Win64 it does.
-  (void)name; // unsupported
-  void *p = VirtualAlloc((LPVOID)fixed_addr, size,
-      MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+  (void)name;  // unsupported
+#if !SANITIZER_GO && SANITIZER_WINDOWS64
+  // On asan/Windows64, use MEM_COMMIT would result in error
+  // 1455:ERROR_COMMITMENT_LIMIT.
+  // Asan uses exception handler to commit page on demand.
+  void *p = VirtualAlloc((LPVOID)fixed_addr, size, MEM_RESERVE, PAGE_READWRITE);
+#else
+  void *p = VirtualAlloc((LPVOID)fixed_addr, size, MEM_RESERVE | MEM_COMMIT,
+                         PAGE_READWRITE);
+#endif
   if (p == 0)
     Report("ERROR: %s failed to "
            "allocate %p (%zd) bytes at %p (error code: %d)\n",
@@ -116,8 +190,18 @@
   return p;
 }
 
+// Memory space mapped by 'MmapFixedOrDie' must have been reserved by
+// 'MmapFixedNoAccess'.
 void *MmapFixedOrDie(uptr fixed_addr, uptr size) {
-  return MmapFixedNoReserve(fixed_addr, size);
+  void *p = VirtualAlloc((LPVOID)fixed_addr, size,
+      MEM_COMMIT, PAGE_READWRITE);
+  if (p == 0) {
+    char mem_type[30];
+    internal_snprintf(mem_type, sizeof(mem_type), "memory at address 0x%zx",
+                      fixed_addr);
+    ReportMmapFailureAndDie(size, mem_type, "allocate", GetLastError());
+  }
+  return p;
 }
 
 void *MmapNoReserveOrDie(uptr size, const char *mem_type) {
@@ -125,10 +209,10 @@
   return MmapOrDie(size, mem_type);
 }
 
-void *MmapNoAccess(uptr fixed_addr, uptr size, const char *name) {
+void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name) {
   (void)name; // unsupported
   void *res = VirtualAlloc((LPVOID)fixed_addr, size,
-                           MEM_RESERVE | MEM_COMMIT, PAGE_NOACCESS);
+                           MEM_RESERVE, PAGE_NOACCESS);
   if (res == 0)
     Report("WARNING: %s failed to "
            "mprotect %p (%zd) bytes at %p (error code: %d)\n",
@@ -136,6 +220,15 @@
   return res;
 }
 
+void *MmapNoAccess(uptr size) {
+  void *res = VirtualAlloc(nullptr, size, MEM_RESERVE, PAGE_NOACCESS);
+  if (res == 0)
+    Report("WARNING: %s failed to "
+           "mprotect %p (%zd) bytes (error code: %d)\n",
+           SanitizerToolName, size, size, GetLastError());
+  return res;
+}
+
 bool MprotectNoAccess(uptr addr, uptr size) {
   DWORD old_protection;
   return VirtualProtect((LPVOID)addr, size, PAGE_NOACCESS, &old_protection);
@@ -234,15 +327,15 @@
 #ifndef SANITIZER_GO
 void DumpProcessMap() {
   Report("Dumping process modules:\n");
-  InternalScopedBuffer<LoadedModule> modules(kMaxNumberOfModules);
-  uptr num_modules =
-      GetListOfModules(modules.data(), kMaxNumberOfModules, nullptr);
+  ListOfModules modules;
+  modules.init();
+  uptr num_modules = modules.size();
 
   InternalScopedBuffer<ModuleInfo> module_infos(num_modules);
   for (size_t i = 0; i < num_modules; ++i) {
     module_infos[i].filepath = modules[i].full_name();
-    module_infos[i].base_address = modules[i].base_address();
-    module_infos[i].end_address = modules[i].ranges().front()->end;
+    module_infos[i].base_address = modules[i].ranges().front()->beg;
+    module_infos[i].end_address = modules[i].ranges().back()->end;
   }
   qsort(module_infos.data(), num_modules, sizeof(ModuleInfo),
         CompareModulesBase);
@@ -317,6 +410,7 @@
   internal__exit(3);
 }
 
+#ifndef SANITIZER_GO
 // Read the file to extract the ImageBase field from the PE header. If ASLR is
 // disabled and this virtual address is available, the loader will typically
 // load the image at this address. Therefore, we call it the preferred base. Any
@@ -369,9 +463,8 @@
   return (uptr)pe_header->ImageBase;
 }
 
-#ifndef SANITIZER_GO
-uptr GetListOfModules(LoadedModule *modules, uptr max_modules,
-                      string_predicate_t filter) {
+void ListOfModules::init() {
+  clear();
   HANDLE cur_process = GetCurrentProcess();
 
   // Query the list of modules.  Start by assuming there are no more than 256
@@ -393,10 +486,8 @@
   }
 
   // |num_modules| is the number of modules actually present,
-  // |count| is the number of modules we return.
-  size_t nun_modules = bytes_required / sizeof(HMODULE),
-         count = 0;
-  for (size_t i = 0; i < nun_modules && count < max_modules; ++i) {
+  size_t num_modules = bytes_required / sizeof(HMODULE);
+  for (size_t i = 0; i < num_modules; ++i) {
     HMODULE handle = hmodules[i];
     MODULEINFO mi;
     if (!GetModuleInformation(cur_process, handle, &mi, sizeof(mi)))
@@ -414,9 +505,6 @@
                               &module_name[0], kMaxPathLength, NULL, NULL);
     module_name[module_name_len] = '\0';
 
-    if (filter && !filter(module_name))
-      continue;
-
     uptr base_address = (uptr)mi.lpBaseOfDll;
     uptr end_address = (uptr)mi.lpBaseOfDll + mi.SizeOfImage;
 
@@ -427,15 +515,13 @@
     uptr preferred_base = GetPreferredBase(&module_name[0]);
     uptr adjusted_base = base_address - preferred_base;
 
-    LoadedModule *cur_module = &modules[count];
-    cur_module->set(module_name, adjusted_base);
+    LoadedModule cur_module;
+    cur_module.set(module_name, adjusted_base);
     // We add the whole module as one single address range.
-    cur_module->addAddressRange(base_address, end_address, /*executable*/ true);
-    count++;
+    cur_module.addAddressRange(base_address, end_address, /*executable*/ true);
+    modules_.push_back(cur_module);
   }
   UnmapOrDie(hmodules, modules_buffer_size);
-
-  return count;
 };
 
 // We can't use atexit() directly at __asan_init time as the CRT is not fully
@@ -462,14 +548,15 @@
 
 // ------------------ sanitizer_libc.h
 fd_t OpenFile(const char *filename, FileAccessMode mode, error_t *last_error) {
+  // FIXME: Use the wide variants to handle Unicode filenames.
   fd_t res;
   if (mode == RdOnly) {
-    res = CreateFile(filename, GENERIC_READ,
-                     FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
-                     nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
+    res = CreateFileA(filename, GENERIC_READ,
+                      FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+                      nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
   } else if (mode == WrOnly) {
-    res = CreateFile(filename, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS,
-                     FILE_ATTRIBUTE_NORMAL, nullptr);
+    res = CreateFileA(filename, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS,
+                      FILE_ATTRIBUTE_NORMAL, nullptr);
   } else {
     UNIMPLEMENTED();
   }
@@ -637,7 +724,7 @@
   // FIXME: CaptureStackBackTrace might be too slow for us.
   // FIXME: Compare with StackWalk64.
   // FIXME: Look at LLVMUnhandledExceptionFilter in Signals.inc
-  size = CaptureStackBackTrace(2, Min(max_depth, kStackTraceMax),
+  size = CaptureStackBackTrace(1, Min(max_depth, kStackTraceMax),
                                (void**)trace, 0);
   if (size == 0)
     return;
@@ -652,6 +739,9 @@
   CONTEXT ctx = *(CONTEXT *)context;
   STACKFRAME64 stack_frame;
   memset(&stack_frame, 0, sizeof(stack_frame));
+
+  InitializeDbgHelpIfNeeded();
+
   size = 0;
 #if defined(_WIN64)
   int machine_type = IMAGE_FILE_MACHINE_AMD64;
@@ -700,7 +790,7 @@
   // FIXME: Decide what to do on Windows.
 }
 
-bool IsDeadlySignal(int signum) {
+bool IsHandledDeadlySignal(int signum) {
   // FIXME: Decide what to do on Windows.
   return false;
 }
@@ -731,8 +821,8 @@
 }
 
 SignalContext SignalContext::Create(void *siginfo, void *context) {
-  EXCEPTION_RECORD *exception_record = (EXCEPTION_RECORD*)siginfo;
-  CONTEXT *context_record = (CONTEXT*)context;
+  EXCEPTION_RECORD *exception_record = (EXCEPTION_RECORD *)siginfo;
+  CONTEXT *context_record = (CONTEXT *)context;
 
   uptr pc = (uptr)exception_record->ExceptionAddress;
 #ifdef _WIN64
@@ -744,7 +834,19 @@
 #endif
   uptr access_addr = exception_record->ExceptionInformation[1];
 
-  return SignalContext(context, access_addr, pc, sp, bp);
+  // The contents of this array are documented at
+  // https://msdn.microsoft.com/en-us/library/windows/desktop/aa363082(v=vs.85).aspx
+  // The first element indicates read as 0, write as 1, or execute as 8.  The
+  // second element is the faulting address.
+  WriteFlag write_flag = SignalContext::UNKNOWN;
+  switch (exception_record->ExceptionInformation[0]) {
+  case 0: write_flag = SignalContext::READ; break;
+  case 1: write_flag = SignalContext::WRITE; break;
+  case 8: write_flag = SignalContext::UNKNOWN; break;
+  }
+  bool is_memory_access = write_flag != SignalContext::UNKNOWN;
+  return SignalContext(context, access_addr, pc, sp, bp, is_memory_access,
+                       write_flag);
 }
 
 uptr ReadBinaryName(/*out*/char *buf, uptr buf_len) {
@@ -771,6 +873,26 @@
   return 0;
 }
 
+pid_t StartSubprocess(const char *program, const char *const argv[],
+                      fd_t stdin_fd, fd_t stdout_fd, fd_t stderr_fd) {
+  // FIXME: implement on this platform
+  // Should be implemented based on
+  // SymbolizerProcess::StarAtSymbolizerSubprocess
+  // from lib/sanitizer_common/sanitizer_symbolizer_win.cc.
+  return -1;
+}
+
+bool IsProcessRunning(pid_t pid) {
+  // FIXME: implement on this platform.
+  return false;
+}
+
+int WaitForProcess(pid_t pid) { return -1; }
+
+// FIXME implement on this platform.
+void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) { }
+
+
 }  // namespace __sanitizer
 
 #endif  // _WIN32
diff --git a/lib/sanitizer_common/scripts/gen_dynamic_list.py b/lib/sanitizer_common/scripts/gen_dynamic_list.py
index b8b79b5..69f26f4 100755
--- a/lib/sanitizer_common/scripts/gen_dynamic_list.py
+++ b/lib/sanitizer_common/scripts/gen_dynamic_list.py
@@ -42,11 +42,12 @@
 
 def get_global_functions(library):
   functions = []
-  nm_proc = subprocess.Popen(['nm', library], stdout=subprocess.PIPE,
+  nm = os.environ.get('NM', 'nm')
+  nm_proc = subprocess.Popen([nm, library], stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE)
   nm_out = nm_proc.communicate()[0].decode().split('\n')
   if nm_proc.returncode != 0:
-    raise subprocess.CalledProcessError(nm_proc.returncode, 'nm')
+    raise subprocess.CalledProcessError(nm_proc.returncode, nm)
   func_symbols = ['T', 'W']
   # On PowerPC, nm prints function descriptors from .data section.
   if os.uname()[4] in ["powerpc", "ppc64"]:
diff --git a/lib/sanitizer_common/scripts/sancov.py b/lib/sanitizer_common/scripts/sancov.py
index a5ae957..e19afdb 100755
--- a/lib/sanitizer_common/scripts/sancov.py
+++ b/lib/sanitizer_common/scripts/sancov.py
@@ -30,6 +30,10 @@
   CheckBits(bits)
   return 'L' if bits == 64 else 'I'
 
+def TypeCodeForStruct(bits):
+  CheckBits(bits)
+  return 'Q' if bits == 64 else 'I'
+
 kMagic32SecondHalf = 0xFFFFFF32;
 kMagic64SecondHalf = 0xFFFFFF64;
 kMagicFirstHalf    = 0xC0BFFFFF;
@@ -64,7 +68,7 @@
       raise Exception('File %s is short (< 8 bytes)' % path)
     bits = ReadMagicAndReturnBitness(f, path)
     size -= 8
-    s = array.array(TypeCodeForBits(bits), f.read(size))
+    s = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 / bits), f.read(size))
   print >>sys.stderr, "%s: read %d %d-bit PCs from %s" % (prog_name, size * 8 / bits, bits, path)
   return s
 
@@ -94,8 +98,8 @@
   if max(s) > 0xFFFFFFFF:
     bits = 64
   array.array('I', MagicForBits(bits)).tofile(sys.stdout)
-  a = array.array(TypeCodeForBits(bits), s)
-  a.tofile(sys.stdout)
+  a = struct.pack(TypeCodeForStruct(bits) * len(s), *s)
+  sys.stdout.write(a)
 
 
 def UnpackOneFile(path):
@@ -148,7 +152,7 @@
     f.seek(0, 2)
     size = f.tell()
     f.seek(0, 0)
-    pcs = array.array(TypeCodeForBits(bits), f.read(size))
+    pcs = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 / bits), f.read(size))
     mem_map_pcs = [[] for i in range(0, len(mem_map))]
 
     for pc in pcs:
@@ -166,11 +170,12 @@
       assert path.endswith('.sancov.raw')
       dst_path = module_path + '.' + os.path.basename(path)[:-4]
       print >> sys.stderr, "%s: writing %d PCs to %s" % (prog_name, len(pc_list), dst_path)
-      arr = array.array(TypeCodeForBits(bits))
-      arr.fromlist(sorted(pc_list))
-      with open(dst_path, 'ab') as f2:
+      sorted_pc_list = sorted(pc_list)
+      pc_buffer = struct.pack(TypeCodeForStruct(bits) * len(pc_list), *sorted_pc_list)
+      with open(dst_path, 'ab+') as f2:
         array.array('I', MagicForBits(bits)).tofile(f2)
-        arr.tofile(f2)
+        f2.seek(0, 2)
+        f2.write(pc_buffer)
 
 def RawUnpack(files):
   for f in files:
diff --git a/lib/sanitizer_common/tests/CMakeLists.txt b/lib/sanitizer_common/tests/CMakeLists.txt
index 32055c3..0a828dc 100644
--- a/lib/sanitizer_common/tests/CMakeLists.txt
+++ b/lib/sanitizer_common/tests/CMakeLists.txt
@@ -65,6 +65,10 @@
 else()
   list(APPEND SANITIZER_TEST_CFLAGS_COMMON -g)
 endif()
+if(MSVC)
+  list(APPEND SANITIZER_TEST_CFLAGS_COMMON -gcodeview)
+endif()
+list(APPEND SANITIZER_TEST_LINK_FLAGS_COMMON -g)
 
 if(NOT MSVC)
   list(APPEND SANITIZER_TEST_LINK_FLAGS_COMMON --driver-mode=g++)
@@ -74,6 +78,13 @@
   list(APPEND SANITIZER_TEST_LINK_FLAGS_COMMON -pie)
 endif()
 
+# MSVC linker is allocating 1M for the stack by default, which is not
+# enough for the unittests. Some unittests require more than 2M.
+# The default stack size for clang is 8M.
+if(MSVC)
+  list(APPEND SANITIZER_TEST_LINK_FLAGS_COMMON -Wl,/STACK:0xC00000)
+endif()
+
 set(SANITIZER_TEST_LINK_LIBS)
 append_list_if(COMPILER_RT_HAS_LIBLOG log SANITIZER_TEST_LINK_LIBS)
 # NDK r10 requires -latomic almost always.
@@ -96,7 +107,8 @@
 macro(add_sanitizer_common_lib library)
   add_library(${library} STATIC ${ARGN})
   set_target_properties(${library} PROPERTIES
-    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    FOLDER "Compiler-RT Runtime tests")
 endmacro()
 
 function(get_sanitizer_common_lib_for_arch arch lib lib_name)
@@ -106,17 +118,21 @@
     set(tgt_name "RTSanitizerCommon.test.${arch}")
   endif()
   set(${lib} "${tgt_name}" PARENT_SCOPE)
-  if(NOT MSVC)
-    set(${lib_name} "lib${tgt_name}.a" PARENT_SCOPE)
+  if(CMAKE_CONFIGURATION_TYPES)
+   set(configuration_path "${CMAKE_CFG_INTDIR}/")
   else()
-    set(${lib_name} "${tgt_name}.lib" PARENT_SCOPE)
+   set(configuration_path "")
+  endif()
+  if(NOT MSVC)
+    set(${lib_name} "${configuration_path}lib${tgt_name}.a" PARENT_SCOPE)
+  else()
+    set(${lib_name} "${configuration_path}${tgt_name}.lib" PARENT_SCOPE)
   endif()
 endfunction()
 
 # Sanitizer_common unit tests testsuite.
 add_custom_target(SanitizerUnitTests)
-set_target_properties(SanitizerUnitTests PROPERTIES
-  FOLDER "Sanitizer unittests")
+set_target_properties(SanitizerUnitTests PROPERTIES FOLDER "Compiler-RT Tests")
 
 # Adds sanitizer tests for architecture.
 macro(add_sanitizer_tests_for_arch arch)
@@ -130,7 +146,11 @@
   set(SANITIZER_TEST_OBJECTS)
   foreach(source ${SANITIZER_TEST_SOURCES})
     get_filename_component(basename ${source} NAME)
-    set(output_obj "${basename}.${arch}.o")
+    if(CMAKE_CONFIGURATION_TYPES)
+      set(output_obj "${CMAKE_CFG_INTDIR}/${basename}.${arch}.o")
+    else()
+      set(output_obj "${basename}.${arch}.o")
+    endif()
     clang_compile(${output_obj} ${source}
                   CFLAGS ${SANITIZER_TEST_CFLAGS_COMMON} ${TARGET_FLAGS}
                   DEPS ${SANITIZER_TEST_COMPILE_DEPS})
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
index 7ba3345..a558f08 100644
--- a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
@@ -29,12 +29,23 @@
 #if !SANITIZER_DEBUG
 
 #if SANITIZER_CAN_USE_ALLOCATOR64
+#if SANITIZER_WINDOWS
+// On Windows 64-bit there is no easy way to find a large enough fixed address
+// space that is always available. Thus, a dynamically allocated address space
+// is used instead (i.e. ~(uptr)0).
+static const uptr kAllocatorSpace = ~(uptr)0;
+static const uptr kAllocatorSize  =  0x10000000000ULL;  // 1T.
+static const u64 kAddressSpaceSize = 1ULL << 40;
+#else
 static const uptr kAllocatorSpace = 0x700000000000ULL;
 static const uptr kAllocatorSize  = 0x010000000000ULL;  // 1T.
 static const u64 kAddressSpaceSize = 1ULL << 47;
+#endif
 
 typedef SizeClassAllocator64<
   kAllocatorSpace, kAllocatorSize, 16, DefaultSizeClassMap> Allocator64;
+typedef SizeClassAllocator64<
+  ~(uptr)0, kAllocatorSize, 16, DefaultSizeClassMap> Allocator64Dynamic;
 
 typedef SizeClassAllocator64<
   kAllocatorSpace, kAllocatorSize, 16, CompactSizeClassMap> Allocator64Compact;
@@ -42,6 +53,10 @@
 static const u64 kAddressSpaceSize = 1ULL << 40;
 #elif defined(__aarch64__)
 static const u64 kAddressSpaceSize = 1ULL << 39;
+#elif defined(__s390x__)
+static const u64 kAddressSpaceSize = 1ULL << 53;
+#elif defined(__s390__)
+static const u64 kAddressSpaceSize = 1ULL << 31;
 #else
 static const u64 kAddressSpaceSize = 1ULL << 32;
 #endif
@@ -144,14 +159,22 @@
 }
 
 #if SANITIZER_CAN_USE_ALLOCATOR64
+// These tests can fail on Windows if memory is somewhat full and lit happens
+// to run them all at the same time. FIXME: Make them not flaky and reenable.
+#if !SANITIZER_WINDOWS
 TEST(SanitizerCommon, SizeClassAllocator64) {
   TestSizeClassAllocator<Allocator64>();
 }
 
+TEST(SanitizerCommon, SizeClassAllocator64Dynamic) {
+  TestSizeClassAllocator<Allocator64Dynamic>();
+}
+
 TEST(SanitizerCommon, SizeClassAllocator64Compact) {
   TestSizeClassAllocator<Allocator64Compact>();
 }
 #endif
+#endif
 
 TEST(SanitizerCommon, SizeClassAllocator32Compact) {
   TestSizeClassAllocator<Allocator32Compact>();
@@ -188,13 +211,21 @@
 }
 
 #if SANITIZER_CAN_USE_ALLOCATOR64
+// These tests can fail on Windows if memory is somewhat full and lit happens
+// to run them all at the same time. FIXME: Make them not flaky and reenable.
+#if !SANITIZER_WINDOWS
 TEST(SanitizerCommon, SizeClassAllocator64MetadataStress) {
   SizeClassAllocatorMetadataStress<Allocator64>();
 }
 
+TEST(SanitizerCommon, SizeClassAllocator64DynamicMetadataStress) {
+  SizeClassAllocatorMetadataStress<Allocator64Dynamic>();
+}
+
 TEST(SanitizerCommon, SizeClassAllocator64CompactMetadataStress) {
   SizeClassAllocatorMetadataStress<Allocator64Compact>();
 }
+#endif
 #endif  // SANITIZER_CAN_USE_ALLOCATOR64
 TEST(SanitizerCommon, SizeClassAllocator32CompactMetadataStress) {
   SizeClassAllocatorMetadataStress<Allocator32Compact>();
@@ -208,7 +239,7 @@
   memset(&cache, 0, sizeof(cache));
   cache.Init(0);
 
-  uptr max_size_class = Allocator::kNumClasses - 1;
+  uptr max_size_class = Allocator::SizeClassMapT::kLargestClassID;
   uptr size = Allocator::SizeClassMapT::Size(max_size_class);
   u64 G8 = 1ULL << 33;
   // Make sure we correctly compute GetBlockBegin() w/o overflow.
@@ -225,15 +256,22 @@
 }
 
 #if SANITIZER_CAN_USE_ALLOCATOR64
+// These tests can fail on Windows if memory is somewhat full and lit happens
+// to run them all at the same time. FIXME: Make them not flaky and reenable.
+#if !SANITIZER_WINDOWS
 TEST(SanitizerCommon, SizeClassAllocator64GetBlockBegin) {
   SizeClassAllocatorGetBlockBeginStress<Allocator64>();
 }
+TEST(SanitizerCommon, SizeClassAllocator64DynamicGetBlockBegin) {
+  SizeClassAllocatorGetBlockBeginStress<Allocator64Dynamic>();
+}
 TEST(SanitizerCommon, SizeClassAllocator64CompactGetBlockBegin) {
   SizeClassAllocatorGetBlockBeginStress<Allocator64Compact>();
 }
 TEST(SanitizerCommon, SizeClassAllocator32CompactGetBlockBegin) {
   SizeClassAllocatorGetBlockBeginStress<Allocator32Compact>();
 }
+#endif
 #endif  // SANITIZER_CAN_USE_ALLOCATOR64
 
 struct TestMapUnmapCallback {
@@ -245,6 +283,9 @@
 int TestMapUnmapCallback::unmap_count;
 
 #if SANITIZER_CAN_USE_ALLOCATOR64
+// These tests can fail on Windows if memory is somewhat full and lit happens
+// to run them all at the same time. FIXME: Make them not flaky and reenable.
+#if !SANITIZER_WINDOWS
 TEST(SanitizerCommon, SizeClassAllocator64MapUnmapCallback) {
   TestMapUnmapCallback::map_count = 0;
   TestMapUnmapCallback::unmap_count = 0;
@@ -266,6 +307,7 @@
   delete a;
 }
 #endif
+#endif
 
 TEST(SanitizerCommon, SizeClassAllocator32MapUnmapCallback) {
   TestMapUnmapCallback::map_count = 0;
@@ -325,13 +367,14 @@
   a.TestOnlyUnmap();
 }
 
-#if SANITIZER_CAN_USE_ALLOCATOR64
+// Don't test OOM conditions on Win64 because it causes other tests on the same
+// machine to OOM.
+#if SANITIZER_CAN_USE_ALLOCATOR64 && !SANITIZER_WINDOWS64
 TEST(SanitizerCommon, SizeClassAllocator64Overflow) {
   EXPECT_DEATH(FailInAssertionOnOOM<Allocator64>(), "Out of memory");
 }
 #endif
 
-#if !defined(_WIN32)  // FIXME: This currently fails on Windows.
 TEST(SanitizerCommon, LargeMmapAllocator) {
   LargeMmapAllocator<> a;
   a.Init(/* may_return_null */ false);
@@ -381,8 +424,10 @@
   }
   CHECK_EQ(a.TotalMemoryUsed(), 0);
 
-  // Test alignments.
-  uptr max_alignment = SANITIZER_WORDSIZE == 64 ? (1 << 28) : (1 << 24);
+  // Test alignments. Test with 512MB alignment on x64 non-Windows machines.
+  // Windows doesn't overcommit, and many machines do not have 51.2GB of swap.
+  uptr max_alignment =
+      (SANITIZER_WORDSIZE == 64 && !SANITIZER_WINDOWS) ? (1 << 28) : (1 << 24);
   for (uptr alignment = 8; alignment <= max_alignment; alignment *= 2) {
     const uptr kNumAlignedAllocs = 100;
     for (uptr i = 0; i < kNumAlignedAllocs; i++) {
@@ -407,7 +452,6 @@
   CHECK_NE(p, (char *)a.GetBlockBegin(p + page_size));
   a.Deallocate(&stats, p);
 }
-#endif
 
 template
 <class PrimaryAllocator, class SecondaryAllocator, class AllocatorCache>
@@ -472,6 +516,12 @@
       SizeClassAllocatorLocalCache<Allocator64> > ();
 }
 
+TEST(SanitizerCommon, CombinedAllocator64Dynamic) {
+  TestCombinedAllocator<Allocator64Dynamic,
+      LargeMmapAllocator<>,
+      SizeClassAllocatorLocalCache<Allocator64Dynamic> > ();
+}
+
 TEST(SanitizerCommon, CombinedAllocator64Compact) {
   TestCombinedAllocator<Allocator64Compact,
       LargeMmapAllocator<>,
@@ -479,13 +529,11 @@
 }
 #endif
 
-#if !defined(_WIN32)  // FIXME: This currently fails on Windows.
 TEST(SanitizerCommon, CombinedAllocator32Compact) {
   TestCombinedAllocator<Allocator32Compact,
       LargeMmapAllocator<>,
       SizeClassAllocatorLocalCache<Allocator32Compact> > ();
 }
-#endif
 
 template <class AllocatorCache>
 void TestSizeClassAllocatorLocalCache() {
@@ -522,16 +570,25 @@
 }
 
 #if SANITIZER_CAN_USE_ALLOCATOR64
+// These tests can fail on Windows if memory is somewhat full and lit happens
+// to run them all at the same time. FIXME: Make them not flaky and reenable.
+#if !SANITIZER_WINDOWS
 TEST(SanitizerCommon, SizeClassAllocator64LocalCache) {
   TestSizeClassAllocatorLocalCache<
       SizeClassAllocatorLocalCache<Allocator64> >();
 }
 
+TEST(SanitizerCommon, SizeClassAllocator64DynamicLocalCache) {
+  TestSizeClassAllocatorLocalCache<
+      SizeClassAllocatorLocalCache<Allocator64Dynamic> >();
+}
+
 TEST(SanitizerCommon, SizeClassAllocator64CompactLocalCache) {
   TestSizeClassAllocatorLocalCache<
       SizeClassAllocatorLocalCache<Allocator64Compact> >();
 }
 #endif
+#endif
 
 TEST(SanitizerCommon, SizeClassAllocator32CompactLocalCache) {
   TestSizeClassAllocatorLocalCache<
@@ -601,6 +658,8 @@
   pthread_t t;
   PTHREAD_CREATE(&t, 0, DeallocNewThreadWorker, params);
   PTHREAD_JOIN(t, 0);
+
+  allocator.TestOnlyUnmap();
 }
 #endif
 
@@ -695,9 +754,16 @@
 }
 
 #if SANITIZER_CAN_USE_ALLOCATOR64
+// These tests can fail on Windows if memory is somewhat full and lit happens
+// to run them all at the same time. FIXME: Make them not flaky and reenable.
+#if !SANITIZER_WINDOWS
 TEST(SanitizerCommon, SizeClassAllocator64Iteration) {
   TestSizeClassAllocatorIteration<Allocator64>();
 }
+TEST(SanitizerCommon, SizeClassAllocator64DynamicIteration) {
+  TestSizeClassAllocatorIteration<Allocator64Dynamic>();
+}
+#endif
 #endif
 
 TEST(SanitizerCommon, SizeClassAllocator32Iteration) {
@@ -769,7 +835,9 @@
 }
 
 
-#if SANITIZER_CAN_USE_ALLOCATOR64
+// Don't test OOM conditions on Win64 because it causes other tests on the same
+// machine to OOM.
+#if SANITIZER_CAN_USE_ALLOCATOR64 && !SANITIZER_WINDOWS64
 // Regression test for out-of-memory condition in PopulateFreeList().
 TEST(SanitizerCommon, SizeClassAllocator64PopulateFreeListOOM) {
   // In a world where regions are small and chunks are huge...
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc b/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc
index 0cc3b9b..54e8773 100644
--- a/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc
+++ b/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc
@@ -11,9 +11,11 @@
 // for CombinedAllocator.
 //===----------------------------------------------------------------------===//
 /* Usage:
-clang++ -fno-exceptions  -g -fPIC -I. -I../include -Isanitizer \
+clang++ -std=c++11 -fno-exceptions  -g -fPIC -I. -I../include -Isanitizer \
  sanitizer_common/tests/sanitizer_allocator_testlib.cc \
- sanitizer_common/sanitizer_*.cc -shared -lpthread -o testmalloc.so
+ $(\ls sanitizer_common/sanitizer_*.cc | grep -v sanitizer_common_nolibc.cc) \
+  sanitizer_common/sanitizer_linux_x86_64.S \
+ -shared -lpthread -o testmalloc.so
 LD_PRELOAD=`pwd`/testmalloc.so /your/app
 */
 #include "sanitizer_common/sanitizer_allocator.h"
@@ -36,7 +38,8 @@
 static const uptr kAllocatorSpace = 0x600000000000ULL;
 static const uptr kAllocatorSize  =  0x10000000000ULL;  // 1T.
 
-typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, 0,
+// typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, 0,
+typedef SizeClassAllocator64<~(uptr)0, kAllocatorSize, 0,
   CompactSizeClassMap> PrimaryAllocator;
 typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
 typedef LargeMmapAllocator<> SecondaryAllocator;
@@ -57,15 +60,34 @@
   allocator.SwallowCache(&cache);
 }
 
+static size_t GetRss() {
+  if (FILE *f = fopen("/proc/self/statm", "r")) {
+    size_t size = 0, rss = 0;
+    fscanf(f, "%zd %zd", &size, &rss);
+    fclose(f);
+    return rss << 12;  // rss is in pages.
+  }
+  return 0;
+}
+
+struct AtExit {
+  ~AtExit() {
+    allocator.PrintStats();
+    Printf("RSS: %zdM\n", GetRss() >> 20);
+  }
+};
+
+static AtExit at_exit;
+
 static void NOINLINE thread_init() {
   if (!global_inited) {
     global_inited = true;
-    allocator.Init();
+    allocator.Init(false /*may_return_null*/);
     pthread_key_create(&pkey, thread_dtor);
   }
   thread_inited = true;
   pthread_setspecific(pkey, (void*)1);
-  cache.Init();
+  cache.Init(nullptr);
 }
 }  // namespace
 
diff --git a/lib/sanitizer_common/tests/sanitizer_flags_test.cc b/lib/sanitizer_common/tests/sanitizer_flags_test.cc
index 3e5d838..24a3f3d 100644
--- a/lib/sanitizer_common/tests/sanitizer_flags_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_flags_test.cc
@@ -47,6 +47,9 @@
   parser.ParseString(env);
 
   EXPECT_EQ(0, internal_strcmp(final_value, flag));
+
+  // Reporting unrecognized flags is needed to reset them.
+  ReportUnrecognizedFlags();
 }
 
 TEST(SanitizerCommon, BooleanFlags) {
@@ -97,6 +100,9 @@
 
   EXPECT_EQ(expected_flag1, flag1);
   EXPECT_EQ(0, internal_strcmp(flag2, expected_flag2));
+
+  // Reporting unrecognized flags is needed to reset them.
+  ReportUnrecognizedFlags();
 }
 
 TEST(SanitizerCommon, MultipleFlags) {
diff --git a/lib/sanitizer_common/tests/sanitizer_ioctl_test.cc b/lib/sanitizer_common/tests/sanitizer_ioctl_test.cc
index 22fa522..6e2a20b 100644
--- a/lib/sanitizer_common/tests/sanitizer_ioctl_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_ioctl_test.cc
@@ -78,7 +78,8 @@
 // Test decoding KVM ioctl numbers.
 TEST(SanitizerIoctl, KVM_GET_MP_STATE) {
   ioctl_desc desc;
-  bool res = ioctl_decode(0x8004ae98U, &desc);
+  unsigned int desc_value = SANITIZER_MIPS ? 0x4004ae98U : 0x8004ae98U;
+  bool res = ioctl_decode(desc_value, &desc);
   EXPECT_TRUE(res);
   EXPECT_EQ(ioctl_desc::WRITE, desc.type);
   EXPECT_EQ(4U, desc.size);
@@ -86,7 +87,8 @@
 
 TEST(SanitizerIoctl, KVM_GET_LAPIC) {
   ioctl_desc desc;
-  bool res = ioctl_decode(0x8400ae8eU, &desc);
+  unsigned int desc_value = SANITIZER_MIPS ? 0x4400ae8eU : 0x8400ae8eU;
+  bool res = ioctl_decode(desc_value, &desc);
   EXPECT_TRUE(res);
   EXPECT_EQ(ioctl_desc::WRITE, desc.type);
   EXPECT_EQ(1024U, desc.size);
diff --git a/lib/sanitizer_common/tests/sanitizer_linux_test.cc b/lib/sanitizer_common/tests/sanitizer_linux_test.cc
index eef7101..fb6b109 100644
--- a/lib/sanitizer_common/tests/sanitizer_linux_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_linux_test.cc
@@ -263,6 +263,41 @@
 }
 #endif
 
+TEST(SanitizerCommon, StartSubprocessTest) {
+  int pipe_fds[2];
+  ASSERT_EQ(0, pipe(pipe_fds));
+#if SANITIZER_ANDROID
+  const char *shell = "/system/bin/sh";
+#else
+  const char *shell = "/bin/sh";
+#endif
+  const char *argv[] = {shell, "-c", "echo -n 'hello'", (char *)NULL};
+  int pid = StartSubprocess(shell, argv,
+                            /* stdin */ kInvalidFd, /* stdout */ pipe_fds[1]);
+  ASSERT_GT(pid, 0);
+
+  // wait for process to finish.
+  while (IsProcessRunning(pid)) {
+  }
+  ASSERT_FALSE(IsProcessRunning(pid));
+
+  char buffer[256];
+  {
+    char *ptr = buffer;
+    uptr bytes_read;
+    while (ReadFromFile(pipe_fds[0], ptr, 256, &bytes_read)) {
+      if (!bytes_read) {
+        break;
+      }
+      ptr += bytes_read;
+    }
+    ASSERT_EQ(5, ptr - buffer);
+    *ptr = 0;
+  }
+  ASSERT_EQ(0, strcmp(buffer, "hello")) << "Buffer: " << buffer;
+  internal_close(pipe_fds[0]);
+}
+
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_LINUX
diff --git a/lib/sanitizer_common/tests/sanitizer_posix_test.cc b/lib/sanitizer_common/tests/sanitizer_posix_test.cc
index 03ca449..b7cca83 100644
--- a/lib/sanitizer_common/tests/sanitizer_posix_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_posix_test.cc
@@ -56,6 +56,7 @@
   EXPECT_TRUE(destructor_executed);
   SpawnThread(GetPthreadDestructorIterations() + 1);
   EXPECT_FALSE(destructor_executed);
+  ASSERT_EQ(0, pthread_key_delete(key));
 }
 
 TEST(SanitizerCommon, IsAccessibleMemoryRange) {
diff --git a/lib/sanitizer_common/tests/sanitizer_printf_test.cc b/lib/sanitizer_common/tests/sanitizer_printf_test.cc
index 5e39e0a..5a77b47 100644
--- a/lib/sanitizer_common/tests/sanitizer_printf_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_printf_test.cc
@@ -23,9 +23,9 @@
   char buf[1024];
   uptr len = internal_snprintf(buf, sizeof(buf),
       "a%db%zdc%ue%zuf%xh%zxq%pe%sr",
-      (int)-1, (long)-2, // NOLINT
-      (unsigned)-4, (unsigned long)5, // NOLINT
-      (unsigned)10, (unsigned long)11, // NOLINT
+      (int)-1, (uptr)-2, // NOLINT
+      (unsigned)-4, (uptr)5, // NOLINT
+      (unsigned)10, (uptr)11, // NOLINT
       (void*)0x123, "_string_");
   EXPECT_EQ(len, strlen(buf));
 
diff --git a/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc b/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc
index 12bc9e1..ae7c5d5 100644
--- a/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc
@@ -37,11 +37,11 @@
   const char *binary_name = last_slash ? last_slash + 1 : argv0;
   MemoryMappingLayout memory_mapping(false);
   const uptr kMaxModules = 100;
-  LoadedModule modules[kMaxModules];
-  uptr n_modules = memory_mapping.DumpListOfModules(modules, kMaxModules, 0);
-  EXPECT_GT(n_modules, 0U);
+  InternalMmapVector<LoadedModule> modules(kMaxModules);
+  memory_mapping.DumpListOfModules(&modules);
+  EXPECT_GT(modules.size(), 0U);
   bool found = false;
-  for (uptr i = 0; i < n_modules; ++i) {
+  for (uptr i = 0; i < modules.size(); ++i) {
     if (modules[i].containsAddress((uptr)&noop)) {
       // Verify that the module name is sane.
       if (strstr(modules[i].full_name(), binary_name) != 0)
diff --git a/lib/sanitizer_common/tests/sanitizer_pthread_wrappers.h b/lib/sanitizer_common/tests/sanitizer_pthread_wrappers.h
index 47b0f97..b7d784c 100644
--- a/lib/sanitizer_common/tests/sanitizer_pthread_wrappers.h
+++ b/lib/sanitizer_common/tests/sanitizer_pthread_wrappers.h
@@ -48,7 +48,9 @@
   data->start_routine = start_routine;
   data->arg = arg;
   *thread = CreateThread(0, 0, PthreadHelperThreadProc, data, 0, 0);
-  ASSERT_NE(nullptr, *thread) << "Failed to create a thread.";
+  DWORD err = GetLastError();
+  ASSERT_NE(nullptr, *thread) << "Failed to create a thread, got error 0x"
+                              << std::hex << err;
 }
 
 inline void PTHREAD_JOIN(pthread_t thread, void **value_ptr) {
diff --git a/lib/sanitizer_common/tests/sanitizer_symbolizer_test.cc b/lib/sanitizer_common/tests/sanitizer_symbolizer_test.cc
index 3d5678a..4c4d2a8 100644
--- a/lib/sanitizer_common/tests/sanitizer_symbolizer_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_symbolizer_test.cc
@@ -62,7 +62,9 @@
   EXPECT_STREQ("_TtSd", DemangleSwiftAndCXX("_TtSd"));
   // Check that the rest demangles properly.
   EXPECT_STREQ("f1(char*, int)", DemangleSwiftAndCXX("_Z2f1Pci"));
+#if !SANITIZER_FREEBSD // QoI issue with libcxxrt on FreeBSD
   EXPECT_STREQ("foo", DemangleSwiftAndCXX("foo"));
+#endif
   EXPECT_STREQ("", DemangleSwiftAndCXX(""));
 }
 #endif
diff --git a/lib/sanitizer_common/tests/sanitizer_thread_registry_test.cc b/lib/sanitizer_common/tests/sanitizer_thread_registry_test.cc
index 58c627a..1132bfd 100644
--- a/lib/sanitizer_common/tests/sanitizer_thread_registry_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_thread_registry_test.cc
@@ -224,6 +224,10 @@
 }
 
 TEST(SanitizerCommon, ThreadRegistryThreadedTest) {
+  memset(&num_created, 0, sizeof(num_created));
+  memset(&num_started, 0, sizeof(num_created));
+  memset(&num_joined, 0, sizeof(num_created));
+
   ThreadRegistry registry(GetThreadContext<TestThreadContext>,
                           kThreadsPerShard * kNumShards + 1, 10);
   ThreadedTestRegistry(&registry);
diff --git a/lib/scudo/CMakeLists.txt b/lib/scudo/CMakeLists.txt
new file mode 100644
index 0000000..6cbb85f
--- /dev/null
+++ b/lib/scudo/CMakeLists.txt
@@ -0,0 +1,33 @@
+add_custom_target(scudo)
+set_target_properties(scudo PROPERTIES FOLDER "Compiler-RT Misc")
+
+include_directories(..)
+
+set(SCUDO_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+append_rtti_flag(OFF SCUDO_CFLAGS)
+list(APPEND SCUDO_CFLAGS -msse4.2 -mcx16)
+
+set(SCUDO_SOURCES
+  scudo_allocator.cpp
+  scudo_flags.cpp
+  scudo_interceptors.cpp
+  scudo_new_delete.cpp
+  scudo_termination.cpp
+  scudo_utils.cpp)
+
+if(COMPILER_RT_HAS_SCUDO)
+  foreach(arch ${SCUDO_SUPPORTED_ARCH})
+    add_compiler_rt_runtime(clang_rt.scudo
+      STATIC
+      ARCHS ${arch}
+      SOURCES ${SCUDO_SOURCES}
+              $<TARGET_OBJECTS:RTInterception.${arch}>
+              $<TARGET_OBJECTS:RTSanitizerCommonNoTermination.${arch}>
+              $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
+      CFLAGS ${SCUDO_CFLAGS}
+      PARENT_TARGET scudo)
+  endforeach()
+endif()
+
+add_dependencies(compiler-rt scudo)
+
diff --git a/lib/scudo/scudo_allocator.cpp b/lib/scudo/scudo_allocator.cpp
new file mode 100644
index 0000000..ceb7bbd
--- /dev/null
+++ b/lib/scudo/scudo_allocator.cpp
@@ -0,0 +1,635 @@
+//===-- scudo_allocator.cpp -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo Hardened Allocator implementation.
+/// It uses the sanitizer_common allocator as a base and aims at mitigating
+/// heap corruption vulnerabilities. It provides a checksum-guarded chunk
+/// header, a delayed free list, and additional sanity checks.
+///
+//===----------------------------------------------------------------------===//
+
+#include "scudo_allocator.h"
+#include "scudo_utils.h"
+
+#include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_quarantine.h"
+
+#include <limits.h>
+#include <pthread.h>
+#include <smmintrin.h>
+
+#include <atomic>
+#include <cstring>
+
+namespace __scudo {
+
+const uptr AllocatorSpace = ~0ULL;
+const uptr AllocatorSize  =  0x10000000000ULL;
+const uptr MinAlignmentLog = 4; // 16 bytes for x64
+const uptr MaxAlignmentLog = 24;
+
+typedef DefaultSizeClassMap SizeClassMap;
+typedef SizeClassAllocator64<AllocatorSpace, AllocatorSize, 0, SizeClassMap>
+  PrimaryAllocator;
+typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
+typedef LargeMmapAllocator<> SecondaryAllocator;
+typedef CombinedAllocator<PrimaryAllocator, AllocatorCache, SecondaryAllocator>
+  ScudoAllocator;
+
+static ScudoAllocator &getAllocator();
+
+static thread_local Xorshift128Plus Prng;
+// Global static cookie, initialized at start-up.
+static u64 Cookie;
+
+enum ChunkState : u8 {
+  ChunkAvailable  = 0,
+  ChunkAllocated  = 1,
+  ChunkQuarantine = 2
+};
+
+typedef unsigned __int128 PackedHeader;
+typedef std::atomic<PackedHeader> AtomicPackedHeader;
+
+// Our header requires 128-bit of storage on x64 (the only platform supported
+// as of now), which fits nicely with the alignment requirements.
+// Having the offset saves us from using functions such as GetBlockBegin, that
+// is fairly costly. Our first implementation used the MetaData as well, which
+// offers the advantage of being stored away from the chunk itself, but
+// accessing it was costly as well.
+// The header will be atomically loaded and stored using the 16-byte primitives
+// offered by the platform (likely requires cmpxchg16b support).
+struct UnpackedHeader {
+  // 1st 8 bytes
+  u16 Checksum      : 16;
+  u64 RequestedSize : 40; // Needed for reallocation purposes.
+  u8  State         : 2;  // available, allocated, or quarantined
+  u8  AllocType     : 2;  // malloc, new, new[], or memalign
+  u8  Unused_0_     : 4;
+  // 2nd 8 bytes
+  u64 Offset        : 20; // Offset from the beginning of the backend
+                          // allocation to the beginning chunk itself, in
+                          // multiples of MinAlignment. See comment about its
+                          // maximum value and test in init().
+  u64 Unused_1_     : 28;
+  u16 Salt          : 16;
+};
+
+COMPILER_CHECK(sizeof(UnpackedHeader) == sizeof(PackedHeader));
+
+const uptr ChunkHeaderSize = sizeof(PackedHeader);
+
+struct ScudoChunk : UnpackedHeader {
+  // We can't use the offset member of the chunk itself, as we would double
+  // fetch it without any warranty that it wouldn't have been tampered. To
+  // prevent this, we work with a local copy of the header.
+  void *AllocBeg(UnpackedHeader *Header) {
+    return reinterpret_cast<void *>(
+        reinterpret_cast<uptr>(this) - (Header->Offset << MinAlignmentLog));
+  }
+
+  // CRC32 checksum of the Chunk pointer and its ChunkHeader.
+  // It currently uses the Intel Nehalem SSE4.2 crc32 64-bit instruction.
+  u16 Checksum(UnpackedHeader *Header) const {
+    u64 HeaderHolder[2];
+    memcpy(HeaderHolder, Header, sizeof(HeaderHolder));
+    u64 Crc = _mm_crc32_u64(Cookie, reinterpret_cast<uptr>(this));
+    // This is somewhat of a shortcut. The checksum is stored in the 16 least
+    // significant bits of the first 8 bytes of the header, hence zero-ing
+    // those bits out. It would be more valid to zero the checksum field of the
+    // UnpackedHeader, but would require holding an additional copy of it.
+    Crc = _mm_crc32_u64(Crc, HeaderHolder[0] & 0xffffffffffff0000ULL);
+    Crc = _mm_crc32_u64(Crc, HeaderHolder[1]);
+    return static_cast<u16>(Crc);
+  }
+
+  // Loads and unpacks the header, verifying the checksum in the process.
+  void loadHeader(UnpackedHeader *NewUnpackedHeader) const {
+    const AtomicPackedHeader *AtomicHeader =
+        reinterpret_cast<const AtomicPackedHeader *>(this);
+    PackedHeader NewPackedHeader =
+        AtomicHeader->load(std::memory_order_relaxed);
+    *NewUnpackedHeader = bit_cast<UnpackedHeader>(NewPackedHeader);
+    if ((NewUnpackedHeader->Unused_0_ != 0) ||
+        (NewUnpackedHeader->Unused_1_ != 0) ||
+        (NewUnpackedHeader->Checksum != Checksum(NewUnpackedHeader))) {
+      dieWithMessage("ERROR: corrupted chunk header at address %p\n", this);
+    }
+  }
+
+  // Packs and stores the header, computing the checksum in the process.
+  void storeHeader(UnpackedHeader *NewUnpackedHeader) {
+    NewUnpackedHeader->Checksum = Checksum(NewUnpackedHeader);
+    PackedHeader NewPackedHeader = bit_cast<PackedHeader>(*NewUnpackedHeader);
+    AtomicPackedHeader *AtomicHeader =
+        reinterpret_cast<AtomicPackedHeader *>(this);
+    AtomicHeader->store(NewPackedHeader, std::memory_order_relaxed);
+  }
+
+  // Packs and stores the header, computing the checksum in the process. We
+  // compare the current header with the expected provided one to ensure that
+  // we are not being raced by a corruption occurring in another thread.
+  void compareExchangeHeader(UnpackedHeader *NewUnpackedHeader,
+                             UnpackedHeader *OldUnpackedHeader) {
+    NewUnpackedHeader->Checksum = Checksum(NewUnpackedHeader);
+    PackedHeader NewPackedHeader = bit_cast<PackedHeader>(*NewUnpackedHeader);
+    PackedHeader OldPackedHeader = bit_cast<PackedHeader>(*OldUnpackedHeader);
+    AtomicPackedHeader *AtomicHeader =
+        reinterpret_cast<AtomicPackedHeader *>(this);
+    if (!AtomicHeader->compare_exchange_strong(OldPackedHeader,
+                                               NewPackedHeader,
+                                               std::memory_order_relaxed,
+                                               std::memory_order_relaxed)) {
+      dieWithMessage("ERROR: race on chunk header at address %p\n", this);
+    }
+  }
+};
+
+static bool ScudoInitIsRunning = false;
+
+static pthread_once_t GlobalInited = PTHREAD_ONCE_INIT;
+static pthread_key_t pkey;
+
+static thread_local bool ThreadInited = false;
+static thread_local bool ThreadTornDown = false;
+static thread_local AllocatorCache Cache;
+
+static void teardownThread(void *p) {
+  uptr v = reinterpret_cast<uptr>(p);
+  // The glibc POSIX thread-local-storage deallocation routine calls user
+  // provided destructors in a loop of PTHREAD_DESTRUCTOR_ITERATIONS.
+  // We want to be called last since other destructors might call free and the
+  // like, so we wait until PTHREAD_DESTRUCTOR_ITERATIONS before draining the
+  // quarantine and swallowing the cache.
+  if (v < PTHREAD_DESTRUCTOR_ITERATIONS) {
+    pthread_setspecific(pkey, reinterpret_cast<void *>(v + 1));
+    return;
+  }
+  drainQuarantine();
+  getAllocator().DestroyCache(&Cache);
+  ThreadTornDown = true;
+}
+
+static void initInternal() {
+  SanitizerToolName = "Scudo";
+  CHECK(!ScudoInitIsRunning && "Scudo init calls itself!");
+  ScudoInitIsRunning = true;
+
+  initFlags();
+
+  AllocatorOptions Options;
+  Options.setFrom(getFlags(), common_flags());
+  initAllocator(Options);
+
+  ScudoInitIsRunning = false;
+}
+
+static void initGlobal() {
+  pthread_key_create(&pkey, teardownThread);
+  initInternal();
+}
+
+static void NOINLINE initThread() {
+  pthread_once(&GlobalInited, initGlobal);
+  pthread_setspecific(pkey, reinterpret_cast<void *>(1));
+  getAllocator().InitCache(&Cache);
+  ThreadInited = true;
+}
+
+struct QuarantineCallback {
+  explicit QuarantineCallback(AllocatorCache *Cache)
+    : Cache_(Cache) {}
+
+  // Chunk recycling function, returns a quarantined chunk to the backend.
+  void Recycle(ScudoChunk *Chunk) {
+    UnpackedHeader Header;
+    Chunk->loadHeader(&Header);
+    if (Header.State != ChunkQuarantine) {
+      dieWithMessage("ERROR: invalid chunk state when recycling address %p\n",
+                     Chunk);
+    }
+    void *Ptr = Chunk->AllocBeg(&Header);
+    getAllocator().Deallocate(Cache_, Ptr);
+  }
+
+  /// Internal quarantine allocation and deallocation functions.
+  void *Allocate(uptr Size) {
+    // The internal quarantine memory cannot be protected by us. But the only
+    // structures allocated are QuarantineBatch, that are 8KB for x64. So we
+    // will use mmap for those, and given that Deallocate doesn't pass a size
+    // in, we enforce the size of the allocation to be sizeof(QuarantineBatch).
+    // TODO(kostyak): switching to mmap impacts greatly performances, we have
+    //                to find another solution
+    // CHECK_EQ(Size, sizeof(QuarantineBatch));
+    // return MmapOrDie(Size, "QuarantineBatch");
+    return getAllocator().Allocate(Cache_, Size, 1, false);
+  }
+
+  void Deallocate(void *Ptr) {
+    // UnmapOrDie(Ptr, sizeof(QuarantineBatch));
+    getAllocator().Deallocate(Cache_, Ptr);
+  }
+
+  AllocatorCache *Cache_;
+};
+
+typedef Quarantine<QuarantineCallback, ScudoChunk> ScudoQuarantine;
+typedef ScudoQuarantine::Cache QuarantineCache;
+static thread_local QuarantineCache ThreadQuarantineCache;
+
+void AllocatorOptions::setFrom(const Flags *f, const CommonFlags *cf) {
+  MayReturnNull = cf->allocator_may_return_null;
+  QuarantineSizeMb = f->QuarantineSizeMb;
+  ThreadLocalQuarantineSizeKb = f->ThreadLocalQuarantineSizeKb;
+  DeallocationTypeMismatch = f->DeallocationTypeMismatch;
+  DeleteSizeMismatch = f->DeleteSizeMismatch;
+  ZeroContents = f->ZeroContents;
+}
+
+void AllocatorOptions::copyTo(Flags *f, CommonFlags *cf) const {
+  cf->allocator_may_return_null = MayReturnNull;
+  f->QuarantineSizeMb = QuarantineSizeMb;
+  f->ThreadLocalQuarantineSizeKb = ThreadLocalQuarantineSizeKb;
+  f->DeallocationTypeMismatch = DeallocationTypeMismatch;
+  f->DeleteSizeMismatch = DeleteSizeMismatch;
+  f->ZeroContents = ZeroContents;
+}
+
+struct Allocator {
+  static const uptr MaxAllowedMallocSize = 1ULL << 40;
+  static const uptr MinAlignment = 1 << MinAlignmentLog;
+  static const uptr MaxAlignment = 1 << MaxAlignmentLog; // 16 MB
+
+  ScudoAllocator BackendAllocator;
+  ScudoQuarantine AllocatorQuarantine;
+
+  // The fallback caches are used when the thread local caches have been
+  // 'detroyed' on thread tear-down. They are protected by a Mutex as they can
+  // be accessed by different threads.
+  StaticSpinMutex FallbackMutex;
+  AllocatorCache FallbackAllocatorCache;
+  QuarantineCache FallbackQuarantineCache;
+
+  bool DeallocationTypeMismatch;
+  bool ZeroContents;
+  bool DeleteSizeMismatch;
+
+  explicit Allocator(LinkerInitialized)
+    : AllocatorQuarantine(LINKER_INITIALIZED),
+      FallbackQuarantineCache(LINKER_INITIALIZED) {}
+
+  void init(const AllocatorOptions &Options) {
+    // Currently SSE 4.2 support is required. This might change later.
+    CHECK(testCPUFeature(SSE4_2)); // for crc32
+
+    // Verify that the header offset field can hold the maximum offset. In the
+    // worst case scenario, the backend allocation is already aligned on
+    // MaxAlignment, so in order to store the header and still be aligned, we
+    // add an extra MaxAlignment. As a result, the offset from the beginning of
+    // the backend allocation to the chunk will be MaxAlignment -
+    // ChunkHeaderSize.
+    UnpackedHeader Header = {};
+    uptr MaximumOffset = (MaxAlignment - ChunkHeaderSize) >> MinAlignmentLog;
+    Header.Offset = MaximumOffset;
+    if (Header.Offset != MaximumOffset) {
+      dieWithMessage("ERROR: the maximum possible offset doesn't fit in the "
+                     "header\n");
+    }
+
+    DeallocationTypeMismatch = Options.DeallocationTypeMismatch;
+    DeleteSizeMismatch = Options.DeleteSizeMismatch;
+    ZeroContents = Options.ZeroContents;
+    BackendAllocator.Init(Options.MayReturnNull);
+    AllocatorQuarantine.Init(static_cast<uptr>(Options.QuarantineSizeMb) << 20,
+                             static_cast<uptr>(
+                                 Options.ThreadLocalQuarantineSizeKb) << 10);
+    BackendAllocator.InitCache(&FallbackAllocatorCache);
+    Cookie = Prng.Next();
+  }
+
+  // Allocates a chunk.
+  void *allocate(uptr Size, uptr Alignment, AllocType Type) {
+    if (UNLIKELY(!ThreadInited))
+      initThread();
+    if (!IsPowerOfTwo(Alignment)) {
+      dieWithMessage("ERROR: malloc alignment is not a power of 2\n");
+    }
+    if (Alignment > MaxAlignment)
+      return BackendAllocator.ReturnNullOrDie();
+    if (Alignment < MinAlignment)
+      Alignment = MinAlignment;
+    if (Size == 0)
+      Size = 1;
+    if (Size >= MaxAllowedMallocSize)
+      return BackendAllocator.ReturnNullOrDie();
+    uptr RoundedSize = RoundUpTo(Size, MinAlignment);
+    uptr ExtraBytes = ChunkHeaderSize;
+    if (Alignment > MinAlignment)
+      ExtraBytes += Alignment;
+    uptr NeededSize = RoundedSize + ExtraBytes;
+    if (NeededSize >= MaxAllowedMallocSize)
+      return BackendAllocator.ReturnNullOrDie();
+
+    void *Ptr;
+    if (LIKELY(!ThreadTornDown)) {
+      Ptr = BackendAllocator.Allocate(&Cache, NeededSize, MinAlignment);
+    } else {
+      SpinMutexLock l(&FallbackMutex);
+      Ptr = BackendAllocator.Allocate(&FallbackAllocatorCache, NeededSize,
+                               MinAlignment);
+    }
+    if (!Ptr)
+      return BackendAllocator.ReturnNullOrDie();
+
+    // If requested, we will zero out the entire contents of the returned chunk.
+    if (ZeroContents && BackendAllocator.FromPrimary(Ptr))
+       memset(Ptr, 0, BackendAllocator.GetActuallyAllocatedSize(Ptr));
+
+    uptr AllocBeg = reinterpret_cast<uptr>(Ptr);
+    uptr ChunkBeg = AllocBeg + ChunkHeaderSize;
+    if (!IsAligned(ChunkBeg, Alignment))
+      ChunkBeg = RoundUpTo(ChunkBeg, Alignment);
+    CHECK_LE(ChunkBeg + Size, AllocBeg + NeededSize);
+    ScudoChunk *Chunk =
+        reinterpret_cast<ScudoChunk *>(ChunkBeg - ChunkHeaderSize);
+    UnpackedHeader Header = {};
+    Header.State = ChunkAllocated;
+    Header.Offset = (ChunkBeg - ChunkHeaderSize - AllocBeg) >> MinAlignmentLog;
+    Header.AllocType = Type;
+    Header.RequestedSize = Size;
+    Header.Salt = static_cast<u16>(Prng.Next());
+    Chunk->storeHeader(&Header);
+    void *UserPtr = reinterpret_cast<void *>(ChunkBeg);
+    // TODO(kostyak): hooks sound like a terrible idea security wise but might
+    //                be needed for things to work properly?
+    // if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(UserPtr, Size);
+    return UserPtr;
+  }
+
+  // Deallocates a Chunk, which means adding it to the delayed free list (or
+  // Quarantine).
+  void deallocate(void *UserPtr, uptr DeleteSize, AllocType Type) {
+    if (UNLIKELY(!ThreadInited))
+      initThread();
+    // TODO(kostyak): see hook comment above
+    // if (&__sanitizer_free_hook) __sanitizer_free_hook(UserPtr);
+    if (!UserPtr)
+      return;
+    uptr ChunkBeg = reinterpret_cast<uptr>(UserPtr);
+    if (!IsAligned(ChunkBeg, MinAlignment)) {
+      dieWithMessage("ERROR: attempted to deallocate a chunk not properly "
+                     "aligned at address %p\n", UserPtr);
+    }
+    ScudoChunk *Chunk =
+        reinterpret_cast<ScudoChunk *>(ChunkBeg - ChunkHeaderSize);
+    UnpackedHeader OldHeader;
+    Chunk->loadHeader(&OldHeader);
+    if (OldHeader.State != ChunkAllocated) {
+      dieWithMessage("ERROR: invalid chunk state when deallocating address "
+                     "%p\n", Chunk);
+    }
+    UnpackedHeader NewHeader = OldHeader;
+    NewHeader.State = ChunkQuarantine;
+    Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
+    if (DeallocationTypeMismatch) {
+      // The deallocation type has to match the allocation one.
+      if (NewHeader.AllocType != Type) {
+        // With the exception of memalign'd Chunks, that can be still be free'd.
+        if (NewHeader.AllocType != FromMemalign || Type != FromMalloc) {
+          dieWithMessage("ERROR: allocation type mismatch on address %p\n",
+                         Chunk);
+        }
+      }
+    }
+    uptr Size = NewHeader.RequestedSize;
+    if (DeleteSizeMismatch) {
+      if (DeleteSize && DeleteSize != Size) {
+        dieWithMessage("ERROR: invalid sized delete on chunk at address %p\n",
+                       Chunk);
+      }
+    }
+    if (LIKELY(!ThreadTornDown)) {
+      AllocatorQuarantine.Put(&ThreadQuarantineCache,
+                              QuarantineCallback(&Cache), Chunk, Size);
+    } else {
+      SpinMutexLock l(&FallbackMutex);
+      AllocatorQuarantine.Put(&FallbackQuarantineCache,
+                              QuarantineCallback(&FallbackAllocatorCache),
+                              Chunk, Size);
+    }
+  }
+
+  // Returns the actual usable size of a chunk. Since this requires loading the
+  // header, we will return it in the second parameter, as it can be required
+  // by the caller to perform additional processing.
+  uptr getUsableSize(const void *Ptr, UnpackedHeader *Header) {
+    if (UNLIKELY(!ThreadInited))
+      initThread();
+    if (!Ptr)
+      return 0;
+    uptr ChunkBeg = reinterpret_cast<uptr>(Ptr);
+    ScudoChunk *Chunk =
+        reinterpret_cast<ScudoChunk *>(ChunkBeg - ChunkHeaderSize);
+    Chunk->loadHeader(Header);
+    // Getting the usable size of a chunk only makes sense if it's allocated.
+    if (Header->State != ChunkAllocated) {
+      dieWithMessage("ERROR: attempted to size a non-allocated chunk at "
+                     "address %p\n", Chunk);
+    }
+    uptr Size =
+        BackendAllocator.GetActuallyAllocatedSize(Chunk->AllocBeg(Header));
+    // UsableSize works as malloc_usable_size, which is also what (AFAIU)
+    // tcmalloc's MallocExtension::GetAllocatedSize aims at providing. This
+    // means we will return the size of the chunk from the user beginning to
+    // the end of the 'user' allocation, hence us subtracting the header size
+    // and the offset from the size.
+    if (Size == 0)
+      return Size;
+    return Size - ChunkHeaderSize - (Header->Offset << MinAlignmentLog);
+  }
+
+  // Helper function that doesn't care about the header.
+  uptr getUsableSize(const void *Ptr) {
+    UnpackedHeader Header;
+    return getUsableSize(Ptr, &Header);
+  }
+
+  // Reallocates a chunk. We can save on a new allocation if the new requested
+  // size still fits in the chunk.
+  void *reallocate(void *OldPtr, uptr NewSize) {
+    if (UNLIKELY(!ThreadInited))
+      initThread();
+    UnpackedHeader OldHeader;
+    uptr Size = getUsableSize(OldPtr, &OldHeader);
+    uptr ChunkBeg = reinterpret_cast<uptr>(OldPtr);
+    ScudoChunk *Chunk =
+        reinterpret_cast<ScudoChunk *>(ChunkBeg - ChunkHeaderSize);
+    if (OldHeader.AllocType != FromMalloc) {
+      dieWithMessage("ERROR: invalid chunk type when reallocating address %p\n",
+                     Chunk);
+    }
+    UnpackedHeader NewHeader = OldHeader;
+    // The new size still fits in the current chunk.
+    if (NewSize <= Size) {
+      NewHeader.RequestedSize = NewSize;
+      Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
+      return OldPtr;
+    }
+    // Otherwise, we have to allocate a new chunk and copy the contents of the
+    // old one.
+    void *NewPtr = allocate(NewSize, MinAlignment, FromMalloc);
+    if (NewPtr) {
+      uptr OldSize = OldHeader.RequestedSize;
+      memcpy(NewPtr, OldPtr, Min(NewSize, OldSize));
+      NewHeader.State = ChunkQuarantine;
+      Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
+      if (LIKELY(!ThreadTornDown)) {
+        AllocatorQuarantine.Put(&ThreadQuarantineCache,
+                                QuarantineCallback(&Cache), Chunk, OldSize);
+      } else {
+        SpinMutexLock l(&FallbackMutex);
+        AllocatorQuarantine.Put(&FallbackQuarantineCache,
+                                QuarantineCallback(&FallbackAllocatorCache),
+                                Chunk, OldSize);
+      }
+    }
+    return NewPtr;
+  }
+
+  void *calloc(uptr NMemB, uptr Size) {
+    if (UNLIKELY(!ThreadInited))
+      initThread();
+    uptr Total = NMemB * Size;
+    if (Size != 0 && Total / Size != NMemB) // Overflow check
+      return BackendAllocator.ReturnNullOrDie();
+    void *Ptr = allocate(Total, MinAlignment, FromMalloc);
+    // If ZeroContents, the content of the chunk has already been zero'd out.
+    if (!ZeroContents && Ptr && BackendAllocator.FromPrimary(Ptr))
+      memset(Ptr, 0, getUsableSize(Ptr));
+    return Ptr;
+  }
+
+  void drainQuarantine() {
+    AllocatorQuarantine.Drain(&ThreadQuarantineCache,
+                              QuarantineCallback(&Cache));
+  }
+};
+
+static Allocator Instance(LINKER_INITIALIZED);
+
+static ScudoAllocator &getAllocator() {
+  return Instance.BackendAllocator;
+}
+
+void initAllocator(const AllocatorOptions &Options) {
+  Instance.init(Options);
+}
+
+void drainQuarantine() {
+  Instance.drainQuarantine();
+}
+
+void *scudoMalloc(uptr Size, AllocType Type) {
+  return Instance.allocate(Size, Allocator::MinAlignment, Type);
+}
+
+void scudoFree(void *Ptr, AllocType Type) {
+  Instance.deallocate(Ptr, 0, Type);
+}
+
+void scudoSizedFree(void *Ptr, uptr Size, AllocType Type) {
+  Instance.deallocate(Ptr, Size, Type);
+}
+
+void *scudoRealloc(void *Ptr, uptr Size) {
+  if (!Ptr)
+    return Instance.allocate(Size, Allocator::MinAlignment, FromMalloc);
+  if (Size == 0) {
+    Instance.deallocate(Ptr, 0, FromMalloc);
+    return nullptr;
+  }
+  return Instance.reallocate(Ptr, Size);
+}
+
+void *scudoCalloc(uptr NMemB, uptr Size) {
+  return Instance.calloc(NMemB, Size);
+}
+
+void *scudoValloc(uptr Size) {
+  return Instance.allocate(Size, GetPageSizeCached(), FromMemalign);
+}
+
+void *scudoMemalign(uptr Alignment, uptr Size) {
+  return Instance.allocate(Size, Alignment, FromMemalign);
+}
+
+void *scudoPvalloc(uptr Size) {
+  uptr PageSize = GetPageSizeCached();
+  Size = RoundUpTo(Size, PageSize);
+  if (Size == 0) {
+    // pvalloc(0) should allocate one page.
+    Size = PageSize;
+  }
+  return Instance.allocate(Size, PageSize, FromMemalign);
+}
+
+int scudoPosixMemalign(void **MemPtr, uptr Alignment, uptr Size) {
+  *MemPtr = Instance.allocate(Size, Alignment, FromMemalign);
+  return 0;
+}
+
+void *scudoAlignedAlloc(uptr Alignment, uptr Size) {
+  // size must be a multiple of the alignment. To avoid a division, we first
+  // make sure that alignment is a power of 2.
+  CHECK(IsPowerOfTwo(Alignment));
+  CHECK_EQ((Size & (Alignment - 1)), 0);
+  return Instance.allocate(Size, Alignment, FromMalloc);
+}
+
+uptr scudoMallocUsableSize(void *Ptr) {
+  return Instance.getUsableSize(Ptr);
+}
+
+} // namespace __scudo
+
+using namespace __scudo;
+
+// MallocExtension helper functions
+
+uptr __sanitizer_get_current_allocated_bytes() {
+  uptr stats[AllocatorStatCount];
+  getAllocator().GetStats(stats);
+  return stats[AllocatorStatAllocated];
+}
+
+uptr __sanitizer_get_heap_size() {
+  uptr stats[AllocatorStatCount];
+  getAllocator().GetStats(stats);
+  return stats[AllocatorStatMapped];
+}
+
+uptr __sanitizer_get_free_bytes() {
+  return 1;
+}
+
+uptr __sanitizer_get_unmapped_bytes() {
+  return 1;
+}
+
+uptr __sanitizer_get_estimated_allocated_size(uptr size) {
+  return size;
+}
+
+int __sanitizer_get_ownership(const void *p) {
+  return Instance.getUsableSize(p) != 0;
+}
+
+uptr __sanitizer_get_allocated_size(const void *p) {
+  return Instance.getUsableSize(p);
+}
diff --git a/lib/scudo/scudo_allocator.h b/lib/scudo/scudo_allocator.h
new file mode 100644
index 0000000..7e9c788
--- /dev/null
+++ b/lib/scudo/scudo_allocator.h
@@ -0,0 +1,63 @@
+//===-- scudo_allocator.h ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Header for scudo_allocator.cpp.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_ALLOCATOR_H_
+#define SCUDO_ALLOCATOR_H_
+
+#ifndef __x86_64__
+# error "The Scudo hardened allocator currently only supports x86_64."
+#endif
+
+#include "scudo_flags.h"
+
+#include "sanitizer_common/sanitizer_allocator.h"
+
+namespace __scudo {
+
+enum AllocType : u8 {
+  FromMalloc    = 0, // Memory block came from malloc, realloc, calloc, etc.
+  FromNew       = 1, // Memory block came from operator new.
+  FromNewArray  = 2, // Memory block came from operator new [].
+  FromMemalign  = 3, // Memory block came from memalign, posix_memalign, etc.
+};
+
+struct AllocatorOptions {
+  u32 QuarantineSizeMb;
+  u32 ThreadLocalQuarantineSizeKb;
+  bool MayReturnNull;
+  bool DeallocationTypeMismatch;
+  bool DeleteSizeMismatch;
+  bool ZeroContents;
+
+  void setFrom(const Flags *f, const CommonFlags *cf);
+  void copyTo(Flags *f, CommonFlags *cf) const;
+};
+
+void initAllocator(const AllocatorOptions &options);
+void drainQuarantine();
+
+void *scudoMalloc(uptr Size, AllocType Type);
+void scudoFree(void *Ptr, AllocType Type);
+void scudoSizedFree(void *Ptr, uptr Size, AllocType Type);
+void *scudoRealloc(void *Ptr, uptr Size);
+void *scudoCalloc(uptr NMemB, uptr Size);
+void *scudoMemalign(uptr Alignment, uptr Size);
+void *scudoValloc(uptr Size);
+void *scudoPvalloc(uptr Size);
+int scudoPosixMemalign(void **MemPtr, uptr Alignment, uptr Size);
+void *scudoAlignedAlloc(uptr Alignment, uptr Size);
+uptr scudoMallocUsableSize(void *Ptr);
+
+} // namespace __scudo
+
+#endif  // SCUDO_ALLOCATOR_H_
diff --git a/lib/scudo/scudo_flags.cpp b/lib/scudo/scudo_flags.cpp
new file mode 100644
index 0000000..f0d2088
--- /dev/null
+++ b/lib/scudo/scudo_flags.cpp
@@ -0,0 +1,93 @@
+//===-- scudo_flags.cpp -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Hardened Allocator flag parsing logic.
+///
+//===----------------------------------------------------------------------===//
+
+#include "scudo_flags.h"
+#include "scudo_utils.h"
+
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+const char* __scudo_default_options();
+
+namespace __scudo {
+
+Flags ScudoFlags;  // Use via getFlags().
+
+void Flags::setDefaults() {
+#define SCUDO_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "scudo_flags.inc"
+#undef SCUDO_FLAG
+}
+
+static void RegisterScudoFlags(FlagParser *parser, Flags *f) {
+#define SCUDO_FLAG(Type, Name, DefaultValue, Description) \
+  RegisterFlag(parser, #Name, Description, &f->Name);
+#include "scudo_flags.inc"
+#undef SCUDO_FLAG
+}
+
+static const char *callGetScudoDefaultOptions() {
+  return (&__scudo_default_options) ? __scudo_default_options() : "";
+}
+
+void initFlags() {
+  SetCommonFlagsDefaults();
+  {
+    CommonFlags cf;
+    cf.CopyFrom(*common_flags());
+    cf.exitcode = 1;
+    OverrideCommonFlags(cf);
+  }
+  Flags *f = getFlags();
+  f->setDefaults();
+
+  FlagParser ScudoParser;
+  RegisterScudoFlags(&ScudoParser, f);
+  RegisterCommonFlags(&ScudoParser);
+
+  // Override from user-specified string.
+  const char *ScudoDefaultOptions = callGetScudoDefaultOptions();
+  ScudoParser.ParseString(ScudoDefaultOptions);
+
+  // Override from environment.
+  ScudoParser.ParseString(GetEnv("SCUDO_OPTIONS"));
+
+  InitializeCommonFlags();
+
+  // Sanity checks and default settings for the Quarantine parameters.
+
+  if (f->QuarantineSizeMb < 0) {
+    const int DefaultQuarantineSizeMb = 64;
+    f->QuarantineSizeMb = DefaultQuarantineSizeMb;
+  }
+  // We enforce an upper limit for the quarantine size of 4Gb.
+  if (f->QuarantineSizeMb > (4 * 1024)) {
+    dieWithMessage("ERROR: the quarantine size is too large\n");
+  }
+  if (f->ThreadLocalQuarantineSizeKb < 0) {
+    const int DefaultThreadLocalQuarantineSizeKb = 1024;
+    f->ThreadLocalQuarantineSizeKb = DefaultThreadLocalQuarantineSizeKb;
+  }
+  // And an upper limit of 128Mb for the thread quarantine cache.
+  if (f->ThreadLocalQuarantineSizeKb > (128 * 1024)) {
+    dieWithMessage("ERROR: the per thread quarantine cache size is too "
+                   "large\n");
+  }
+}
+
+Flags *getFlags() {
+  return &ScudoFlags;
+}
+
+}
diff --git a/lib/scudo/scudo_flags.h b/lib/scudo/scudo_flags.h
new file mode 100644
index 0000000..c16f635
--- /dev/null
+++ b/lib/scudo/scudo_flags.h
@@ -0,0 +1,33 @@
+//===-- scudo_flags.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Header for scudo_flags.cpp.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_FLAGS_H_
+#define SCUDO_FLAGS_H_
+
+namespace __scudo {
+
+struct Flags {
+#define SCUDO_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "scudo_flags.inc"
+#undef SCUDO_FLAG
+
+  void setDefaults();
+};
+
+Flags *getFlags();
+
+void initFlags();
+
+} // namespace __scudo
+
+#endif  // SCUDO_FLAGS_H_
diff --git a/lib/scudo/scudo_flags.inc b/lib/scudo/scudo_flags.inc
new file mode 100644
index 0000000..c7a2acf
--- /dev/null
+++ b/lib/scudo/scudo_flags.inc
@@ -0,0 +1,35 @@
+//===-- scudo_flags.inc -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Hardened Allocator runtime flags.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_FLAG
+# error "Define SCUDO_FLAG prior to including this file!"
+#endif
+
+SCUDO_FLAG(int, QuarantineSizeMb, 64,
+           "Size (in Mb) of quarantine used to delay the actual deallocation "
+           "of chunks. Lower value may reduce memory usage but decrease the "
+           "effectiveness of the mitigation.")
+
+SCUDO_FLAG(int, ThreadLocalQuarantineSizeKb, 1024,
+          "Size (in Kb) of per-thread cache used to offload the global "
+          "quarantine. Lower value may reduce memory usage but might increase "
+          "the contention on the global quarantine.")
+
+SCUDO_FLAG(bool, DeallocationTypeMismatch, true,
+          "Report errors on malloc/delete, new/free, new/delete[], etc.")
+
+SCUDO_FLAG(bool, DeleteSizeMismatch, true,
+           "Report errors on mismatch between size of new and delete.")
+
+SCUDO_FLAG(bool, ZeroContents, false,
+          "Zero chunk contents on allocation and deallocation.")
diff --git a/lib/scudo/scudo_interceptors.cpp b/lib/scudo/scudo_interceptors.cpp
new file mode 100644
index 0000000..9204652
--- /dev/null
+++ b/lib/scudo/scudo_interceptors.cpp
@@ -0,0 +1,75 @@
+//===-- scudo_interceptors.cpp ----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Linux specific malloc interception functions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_LINUX
+
+#include "scudo_allocator.h"
+
+#include "interception/interception.h"
+
+using namespace __scudo;
+
+INTERCEPTOR(void, free, void *ptr) {
+  scudoFree(ptr, FromMalloc);
+}
+
+INTERCEPTOR(void, cfree, void *ptr) {
+  scudoFree(ptr, FromMalloc);
+}
+
+INTERCEPTOR(void*, malloc, uptr size) {
+  return scudoMalloc(size, FromMalloc);
+}
+
+INTERCEPTOR(void*, realloc, void *ptr, uptr size) {
+  return scudoRealloc(ptr, size);
+}
+
+INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) {
+  return scudoCalloc(nmemb, size);
+}
+
+INTERCEPTOR(void*, valloc, uptr size) {
+  return scudoValloc(size);
+}
+
+INTERCEPTOR(void*, memalign, uptr alignment, uptr size) {
+  return scudoMemalign(alignment, size);
+}
+
+INTERCEPTOR(void*, __libc_memalign, uptr alignment, uptr size) {
+  return scudoMemalign(alignment, size);
+}
+
+INTERCEPTOR(void*, pvalloc, uptr size) {
+  return scudoPvalloc(size);
+}
+
+INTERCEPTOR(void*, aligned_alloc, uptr alignment, uptr size) {
+  return scudoAlignedAlloc(alignment, size);
+}
+
+INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
+  return scudoPosixMemalign(memptr, alignment, size);
+}
+
+INTERCEPTOR(uptr, malloc_usable_size, void *ptr) {
+  return scudoMallocUsableSize(ptr);
+}
+
+INTERCEPTOR(int, mallopt, int cmd, int value) {
+  return -1;
+}
+
+#endif // SANITIZER_LINUX
diff --git a/lib/scudo/scudo_new_delete.cpp b/lib/scudo/scudo_new_delete.cpp
new file mode 100644
index 0000000..172f565
--- /dev/null
+++ b/lib/scudo/scudo_new_delete.cpp
@@ -0,0 +1,69 @@
+//===-- scudo_new_delete.cpp ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Interceptors for operators new and delete.
+///
+//===----------------------------------------------------------------------===//
+
+#include "scudo_allocator.h"
+
+#include "interception/interception.h"
+
+#include <cstddef>
+
+using namespace __scudo;
+
+#define CXX_OPERATOR_ATTRIBUTE INTERCEPTOR_ATTRIBUTE
+
+// Fake std::nothrow_t to avoid including <new>.
+namespace std {
+struct nothrow_t {};
+} // namespace std
+
+CXX_OPERATOR_ATTRIBUTE
+void *operator new(size_t size) {
+  return scudoMalloc(size, FromNew);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new[](size_t size) {
+  return scudoMalloc(size, FromNewArray);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new(size_t size, std::nothrow_t const&) {
+  return scudoMalloc(size, FromNew);
+}
+CXX_OPERATOR_ATTRIBUTE
+void *operator new[](size_t size, std::nothrow_t const&) {
+  return scudoMalloc(size, FromNewArray);
+}
+
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr) NOEXCEPT {
+  return scudoFree(ptr, FromNew);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr) NOEXCEPT {
+  return scudoFree(ptr, FromNewArray);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, std::nothrow_t const&) NOEXCEPT {
+  return scudoFree(ptr, FromNew);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, std::nothrow_t const&) NOEXCEPT {
+  return scudoFree(ptr, FromNewArray);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, size_t size) NOEXCEPT {
+  scudoSizedFree(ptr, size, FromNew);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, size_t size) NOEXCEPT {
+  scudoSizedFree(ptr, size, FromNewArray);
+}
diff --git a/lib/scudo/scudo_termination.cpp b/lib/scudo/scudo_termination.cpp
new file mode 100644
index 0000000..a533383
--- /dev/null
+++ b/lib/scudo/scudo_termination.cpp
@@ -0,0 +1,42 @@
+//===-- scudo_termination.cpp -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file contains bare-bones termination functions to replace the
+/// __sanitizer ones, in order to avoid any potential abuse of the callbacks
+/// functionality.
+///
+//===----------------------------------------------------------------------===//
+
+#include "scudo_utils.h"
+
+#include "sanitizer_common/sanitizer_common.h"
+
+namespace __sanitizer {
+
+bool AddDieCallback(DieCallbackType Callback) { return true; }
+
+bool RemoveDieCallback(DieCallbackType Callback) { return true; }
+
+void SetUserDieCallback(DieCallbackType Callback) {}
+
+void NORETURN Die() {
+  if (common_flags()->abort_on_error)
+    Abort();
+  internal__exit(common_flags()->exitcode);
+}
+
+void SetCheckFailedCallback(CheckFailedCallbackType callback) {}
+
+void NORETURN CheckFailed(const char *File, int Line, const char *Condition,
+                          u64 Value1, u64 Value2) {
+  __scudo::dieWithMessage("Scudo CHECK failed: %s:%d %s (%lld, %lld)\n",
+                          File, Line, Condition, Value1, Value2);
+}
+
+} // namespace __sanitizer
diff --git a/lib/scudo/scudo_utils.cpp b/lib/scudo/scudo_utils.cpp
new file mode 100644
index 0000000..f45569e
--- /dev/null
+++ b/lib/scudo/scudo_utils.cpp
@@ -0,0 +1,133 @@
+//===-- scudo_utils.cpp -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Platform specific utility functions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "scudo_utils.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+#include <cstring>
+
+// TODO(kostyak): remove __sanitizer *Printf uses in favor for our own less
+//                complicated string formatting code. The following is a
+//                temporary workaround to be able to use __sanitizer::VSNPrintf.
+namespace __sanitizer {
+
+extern int VSNPrintf(char *buff, int buff_length, const char *format,
+                     va_list args);
+
+} // namespace __sanitizer
+
+namespace __scudo {
+
+FORMAT(1, 2)
+void NORETURN dieWithMessage(const char *Format, ...) {
+  // Our messages are tiny, 128 characters is more than enough.
+  char Message[128];
+  va_list Args;
+  va_start(Args, Format);
+  __sanitizer::VSNPrintf(Message, sizeof(Message), Format, Args);
+  va_end(Args);
+  RawWrite(Message);
+  Die();
+}
+
+typedef struct {
+  u32 Eax;
+  u32 Ebx;
+  u32 Ecx;
+  u32 Edx;
+} CPUIDInfo;
+
+static void getCPUID(CPUIDInfo *info, u32 leaf, u32 subleaf)
+{
+  asm volatile("cpuid"
+      : "=a" (info->Eax), "=b" (info->Ebx), "=c" (info->Ecx), "=d" (info->Edx)
+      : "a" (leaf), "c" (subleaf)
+  );
+}
+
+// Returns true is the CPU is a "GenuineIntel" or "AuthenticAMD"
+static bool isSupportedCPU()
+{
+  CPUIDInfo Info;
+
+  getCPUID(&Info, 0, 0);
+  if (memcmp(reinterpret_cast<char *>(&Info.Ebx), "Genu", 4) == 0 &&
+      memcmp(reinterpret_cast<char *>(&Info.Edx), "ineI", 4) == 0 &&
+      memcmp(reinterpret_cast<char *>(&Info.Ecx), "ntel", 4) == 0) {
+      return true;
+  }
+  if (memcmp(reinterpret_cast<char *>(&Info.Ebx), "Auth", 4) == 0 &&
+      memcmp(reinterpret_cast<char *>(&Info.Edx), "enti", 4) == 0 &&
+      memcmp(reinterpret_cast<char *>(&Info.Ecx), "cAMD", 4) == 0) {
+      return true;
+  }
+  return false;
+}
+
+bool testCPUFeature(CPUFeature feature)
+{
+  static bool InfoInitialized = false;
+  static CPUIDInfo CPUInfo = {};
+
+  if (InfoInitialized == false) {
+    if (isSupportedCPU() == true)
+      getCPUID(&CPUInfo, 1, 0);
+    else
+      UNIMPLEMENTED();
+    InfoInitialized = true;
+  }
+  switch (feature) {
+    case SSE4_2:
+      return ((CPUInfo.Ecx >> 20) & 0x1) != 0;
+    default:
+      break;
+  }
+  return false;
+}
+
+// readRetry will attempt to read Count bytes from the Fd specified, and if
+// interrupted will retry to read additional bytes to reach Count.
+static ssize_t readRetry(int Fd, u8 *Buffer, size_t Count) {
+  ssize_t AmountRead = 0;
+  while (static_cast<size_t>(AmountRead) < Count) {
+    ssize_t Result = read(Fd, Buffer + AmountRead, Count - AmountRead);
+    if (Result > 0)
+      AmountRead += Result;
+    else if (!Result)
+      break;
+    else if (errno != EINTR) {
+      AmountRead = -1;
+      break;
+    }
+  }
+  return AmountRead;
+}
+
+// Default constructor for Xorshift128Plus seeds the state with /dev/urandom
+Xorshift128Plus::Xorshift128Plus() {
+  int Fd = open("/dev/urandom", O_RDONLY);
+  bool Success = readRetry(Fd, reinterpret_cast<u8 *>(&State_0_),
+                           sizeof(State_0_)) == sizeof(State_0_);
+  Success &= readRetry(Fd, reinterpret_cast<u8 *>(&State_1_),
+                           sizeof(State_1_)) == sizeof(State_1_);
+  close(Fd);
+  if (!Success) {
+    dieWithMessage("ERROR: failed to read enough data from /dev/urandom.\n");
+  }
+}
+
+} // namespace __scudo
diff --git a/lib/scudo/scudo_utils.h b/lib/scudo/scudo_utils.h
new file mode 100644
index 0000000..c4f0760
--- /dev/null
+++ b/lib/scudo/scudo_utils.h
@@ -0,0 +1,59 @@
+//===-- scudo_utils.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Header for scudo_utils.cpp.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_UTILS_H_
+#define SCUDO_UTILS_H_
+
+#include <string.h>
+
+#include "sanitizer_common/sanitizer_common.h"
+
+namespace __scudo {
+
+template <class Dest, class Source>
+inline Dest bit_cast(const Source& source) {
+  static_assert(sizeof(Dest) == sizeof(Source), "Sizes are not equal!");
+  Dest dest;
+  memcpy(&dest, &source, sizeof(dest));
+  return dest;
+}
+
+void NORETURN dieWithMessage(const char *Format, ...);
+
+enum  CPUFeature {
+  SSE4_2 = 0,
+  ENUM_CPUFEATURE_MAX
+};
+bool testCPUFeature(CPUFeature feature);
+
+// Tiny PRNG based on https://en.wikipedia.org/wiki/Xorshift#xorshift.2B
+// The state (128 bits) will be stored in thread local storage.
+struct Xorshift128Plus {
+ public:
+  Xorshift128Plus();
+  u64 Next() {
+    u64 x = State_0_;
+    const u64 y = State_1_;
+    State_0_ = y;
+    x ^= x << 23;
+    State_1_ = x ^ y ^ (x >> 17) ^ (y >> 26);
+    return State_1_ + y;
+  }
+ private:
+  u64 State_0_;
+  u64 State_1_;
+};
+
+} // namespace __scudo
+
+#endif  // SCUDO_UTILS_H_
diff --git a/lib/stats/CMakeLists.txt b/lib/stats/CMakeLists.txt
index 086ee09..33ab1ae 100644
--- a/lib/stats/CMakeLists.txt
+++ b/lib/stats/CMakeLists.txt
@@ -1,6 +1,7 @@
 include_directories(..)
 
 add_custom_target(stats)
+set_target_properties(stats PROPERTIES FOLDER "Compiler-RT Misc")
 
 if(APPLE)
   set(STATS_LIB_FLAVOR SHARED)
diff --git a/lib/stats/stats.cc b/lib/stats/stats.cc
index 6b937f1..df9845a 100644
--- a/lib/stats/stats.cc
+++ b/lib/stats/stats.cc
@@ -62,7 +62,7 @@
     return;
   if (!stats_fd)
     OpenStatsFile(path_env);
-  LoadedModule *mod = Symbolizer::GetOrInit()->FindModuleForAddress(
+  const LoadedModule *mod = Symbolizer::GetOrInit()->FindModuleForAddress(
       reinterpret_cast<uptr>(smodp));
   WriteToFile(stats_fd, mod->full_name(),
               internal_strlen(mod->full_name()) + 1);
diff --git a/lib/tsan/CMakeLists.txt b/lib/tsan/CMakeLists.txt
index f72e542..1ce5821 100644
--- a/lib/tsan/CMakeLists.txt
+++ b/lib/tsan/CMakeLists.txt
@@ -6,7 +6,7 @@
 # SANITIZER_COMMON_CFLAGS contains -fPIC, but it's performance-critical for
 # TSan runtime to be built with -fPIE to reduce the number of register spills.
 append_list_if(COMPILER_RT_HAS_FPIE_FLAG -fPIE TSAN_CFLAGS)
-append_no_rtti_flag(TSAN_CFLAGS)
+append_rtti_flag(OFF TSAN_CFLAGS)
 
 if(COMPILER_RT_TSAN_DEBUG_OUTPUT)
   # Add extra debug information to TSan runtime. This configuration is rarely
@@ -25,22 +25,24 @@
 set(TSAN_SOURCES
   rtl/tsan_clock.cc
   rtl/tsan_debugging.cc
-  rtl/tsan_flags.cc
   rtl/tsan_fd.cc
+  rtl/tsan_flags.cc
   rtl/tsan_ignoreset.cc
   rtl/tsan_interceptors.cc
+  rtl/tsan_interface.cc
   rtl/tsan_interface_ann.cc
   rtl/tsan_interface_atomic.cc
-  rtl/tsan_interface.cc
   rtl/tsan_interface_java.cc
   rtl/tsan_malloc_mac.cc
   rtl/tsan_md5.cc
   rtl/tsan_mman.cc
   rtl/tsan_mutex.cc
   rtl/tsan_mutexset.cc
+  rtl/tsan_preinit.cc
   rtl/tsan_report.cc
   rtl/tsan_rtl.cc
   rtl/tsan_rtl_mutex.cc
+  rtl/tsan_rtl_proc.cc
   rtl/tsan_rtl_report.cc
   rtl/tsan_rtl_thread.cc
   rtl/tsan_stack_trace.cc
@@ -95,6 +97,7 @@
 
 set(TSAN_RUNTIME_LIBRARIES)
 add_custom_target(tsan)
+set_target_properties(tsan PROPERTIES FOLDER "Compiler-RT Misc")
 
 if(APPLE)
   set(TSAN_ASM_SOURCES rtl/tsan_rtl_amd64.S)
@@ -116,7 +119,7 @@
                 RTUbsan
     CFLAGS ${TSAN_RTL_CFLAGS}
     PARENT_TARGET tsan)
-  add_compiler_rt_object_libraries(RTTsan_dynamic 
+  add_compiler_rt_object_libraries(RTTsan_dynamic
     OS ${TSAN_SUPPORTED_OS}
     ARCHS ${TSAN_SUPPORTED_ARCH}
     SOURCES ${TSAN_SOURCES} ${TSAN_CXX_SOURCES} ${TSAN_ASM_SOURCES}
@@ -157,6 +160,11 @@
      # Pass ASM file directly to the C++ compiler.
      set_source_files_properties(${TSAN_ASM_SOURCES} PROPERTIES
        LANGUAGE C)
+    elseif(arch MATCHES "mips64|mips64le")
+     set(TSAN_ASM_SOURCES rtl/tsan_rtl_mips64.S)
+     # Pass ASM file directly to the C++ compiler.
+     set_source_files_properties(${TSAN_ASM_SOURCES} PROPERTIES
+       LANGUAGE C)
     else()
       set(TSAN_ASM_SOURCES)
     endif()
@@ -196,7 +204,7 @@
 # FreeBSD does not install a number of Clang-provided headers for the compiler
 # in the base system due to incompatibilities between FreeBSD's and Clang's
 # versions. As a workaround do not use --sysroot=. on FreeBSD until this is
-# addressed.                                                        
+# addressed.
 if(COMPILER_RT_HAS_SYSROOT_FLAG AND NOT CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
   file(GLOB _tsan_generic_sources rtl/tsan*)
   file(GLOB _tsan_platform_sources rtl/tsan*posix* rtl/tsan*mac*
@@ -209,10 +217,17 @@
 # Build libcxx instrumented with TSan.
 if(COMPILER_RT_HAS_LIBCXX_SOURCES AND
    COMPILER_RT_TEST_COMPILER_ID STREQUAL "Clang")
-  set(LIBCXX_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libcxx_tsan)
-  add_custom_libcxx(libcxx_tsan ${LIBCXX_PREFIX}
-    DEPS ${TSAN_RUNTIME_LIBRARIES}
-    CFLAGS -fsanitize=thread)
+  set(libcxx_tsan_deps)
+  foreach(arch ${TSAN_SUPPORTED_ARCH})
+    get_target_flags_for_arch(${arch} TARGET_CFLAGS)
+    set(LIBCXX_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libcxx_tsan_${arch})
+    add_custom_libcxx(libcxx_tsan_${arch} ${LIBCXX_PREFIX}
+      DEPS ${TSAN_RUNTIME_LIBRARIES}
+      CFLAGS ${TARGET_CFLAGS} -fsanitize=thread)
+    list(APPEND libcxx_tsan_deps libcxx_tsan_${arch})
+  endforeach()
+
+  add_custom_target(libcxx_tsan DEPENDS ${libcxx_tsan_deps})
 endif()
 
 if(COMPILER_RT_INCLUDE_TESTS)
diff --git a/lib/tsan/check_analyze.sh b/lib/tsan/check_analyze.sh
index 0f6cc06..a5d3632 100755
--- a/lib/tsan/check_analyze.sh
+++ b/lib/tsan/check_analyze.sh
@@ -32,13 +32,7 @@
   check $f pop 2
 done
 
-for f in write2 write4; do
-  check $f rsp 1
-  check $f push 4
-  check $f pop 4
-done
-
-for f in write8; do
+for f in write2 write4 write8; do
   check $f rsp 1
   check $f push 3
   check $f pop 3
diff --git a/lib/tsan/dd/CMakeLists.txt b/lib/tsan/dd/CMakeLists.txt
index 6330bd9..bcff35f 100644
--- a/lib/tsan/dd/CMakeLists.txt
+++ b/lib/tsan/dd/CMakeLists.txt
@@ -3,7 +3,7 @@
 include_directories(../..)
 
 set(DD_CFLAGS ${SANITIZER_COMMON_CFLAGS})
-append_no_rtti_flag(DD_CFLAGS)
+append_rtti_flag(OFF DD_CFLAGS)
 
 set(DD_SOURCES
   dd_rtl.cc
diff --git a/lib/tsan/go/build.bat b/lib/tsan/go/build.bat
index 7d393dc..3ada9ab 100644
--- a/lib/tsan/go/build.bat
+++ b/lib/tsan/go/build.bat
@@ -1,4 +1,4 @@
-type tsan_go.cc ..\rtl\tsan_interface_atomic.cc ..\rtl\tsan_clock.cc ..\rtl\tsan_flags.cc ..\rtl\tsan_md5.cc ..\rtl\tsan_mutex.cc ..\rtl\tsan_report.cc ..\rtl\tsan_rtl.cc ..\rtl\tsan_rtl_mutex.cc ..\rtl\tsan_rtl_report.cc ..\rtl\tsan_rtl_thread.cc ..\rtl\tsan_stat.cc ..\rtl\tsan_suppressions.cc ..\rtl\tsan_sync.cc ..\rtl\tsan_stack_trace.cc ..\..\sanitizer_common\sanitizer_allocator.cc ..\..\sanitizer_common\sanitizer_common.cc ..\..\sanitizer_common\sanitizer_flags.cc ..\..\sanitizer_common\sanitizer_stacktrace.cc ..\..\sanitizer_common\sanitizer_libc.cc ..\..\sanitizer_common\sanitizer_printf.cc ..\..\sanitizer_common\sanitizer_suppressions.cc ..\..\sanitizer_common\sanitizer_thread_registry.cc ..\rtl\tsan_platform_windows.cc ..\..\sanitizer_common\sanitizer_win.cc ..\..\sanitizer_common\sanitizer_deadlock_detector1.cc ..\..\sanitizer_common\sanitizer_stackdepot.cc ..\..\sanitizer_common\sanitizer_persistent_allocator.cc ..\..\sanitizer_common\sanitizer_flag_parser.cc ..\..\sanitizer_common\sanitizer_symbolizer.cc > gotsan.cc
+type tsan_go.cc ..\rtl\tsan_interface_atomic.cc ..\rtl\tsan_clock.cc ..\rtl\tsan_flags.cc ..\rtl\tsan_md5.cc ..\rtl\tsan_mutex.cc ..\rtl\tsan_report.cc ..\rtl\tsan_rtl.cc ..\rtl\tsan_rtl_mutex.cc ..\rtl\tsan_rtl_report.cc ..\rtl\tsan_rtl_thread.cc ..\rtl\tsan_rtl_proc.cc ..\rtl\tsan_stat.cc ..\rtl\tsan_suppressions.cc ..\rtl\tsan_sync.cc ..\rtl\tsan_stack_trace.cc ..\..\sanitizer_common\sanitizer_allocator.cc ..\..\sanitizer_common\sanitizer_common.cc ..\..\sanitizer_common\sanitizer_flags.cc ..\..\sanitizer_common\sanitizer_stacktrace.cc ..\..\sanitizer_common\sanitizer_libc.cc ..\..\sanitizer_common\sanitizer_printf.cc ..\..\sanitizer_common\sanitizer_suppressions.cc ..\..\sanitizer_common\sanitizer_thread_registry.cc ..\rtl\tsan_platform_windows.cc ..\..\sanitizer_common\sanitizer_win.cc ..\..\sanitizer_common\sanitizer_deadlock_detector1.cc ..\..\sanitizer_common\sanitizer_stackdepot.cc ..\..\sanitizer_common\sanitizer_persistent_allocator.cc ..\..\sanitizer_common\sanitizer_flag_parser.cc ..\..\sanitizer_common\sanitizer_symbolizer.cc ..\..\sanitizer_common\sanitizer_termination.cc > gotsan.cc
 
 gcc -c -o race_windows_amd64.syso gotsan.cc -I..\rtl -I..\.. -I..\..\sanitizer_common -I..\..\..\include -m64 -Wall -fno-exceptions -fno-rtti -DSANITIZER_GO -Wno-error=attributes -Wno-attributes -Wno-format -Wno-maybe-uninitialized -DSANITIZER_DEBUG=0 -O3 -fomit-frame-pointer -std=c++11
 
diff --git a/lib/tsan/go/buildgo.sh b/lib/tsan/go/buildgo.sh
index fdbd405..834e325 100755
--- a/lib/tsan/go/buildgo.sh
+++ b/lib/tsan/go/buildgo.sh
@@ -14,6 +14,7 @@
 	../rtl/tsan_rtl_mutex.cc
 	../rtl/tsan_rtl_report.cc
 	../rtl/tsan_rtl_thread.cc
+	../rtl/tsan_rtl_proc.cc
 	../rtl/tsan_stack_trace.cc
 	../rtl/tsan_stat.cc
 	../rtl/tsan_suppressions.cc
@@ -32,6 +33,7 @@
 	../../sanitizer_common/sanitizer_stackdepot.cc
 	../../sanitizer_common/sanitizer_stacktrace.cc
 	../../sanitizer_common/sanitizer_symbolizer.cc
+	../../sanitizer_common/sanitizer_termination.cc
 "
 
 if [ "`uname -a | grep Linux`" != "" ]; then
@@ -50,23 +52,24 @@
 		../../sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
 	"
 elif [ "`uname -a | grep FreeBSD`" != "" ]; then
-        SUFFIX="freebsd_amd64"
-        OSCFLAGS="-fno-strict-aliasing -fPIC -Werror"
-        OSLDFLAGS="-lpthread -fPIC -fpie"
-        SRCS="
-                $SRCS
-                ../rtl/tsan_platform_linux.cc
-                ../../sanitizer_common/sanitizer_posix.cc
-                ../../sanitizer_common/sanitizer_posix_libcdep.cc
-                ../../sanitizer_common/sanitizer_procmaps_common.cc
-                ../../sanitizer_common/sanitizer_procmaps_freebsd.cc
-                ../../sanitizer_common/sanitizer_linux.cc
-                ../../sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
-        "
+	SUFFIX="freebsd_amd64"
+	OSCFLAGS="-fno-strict-aliasing -fPIC -Werror"
+	OSLDFLAGS="-lpthread -fPIC -fpie"
+	SRCS="
+		$SRCS
+		../rtl/tsan_platform_linux.cc
+		../../sanitizer_common/sanitizer_posix.cc
+		../../sanitizer_common/sanitizer_posix_libcdep.cc
+		../../sanitizer_common/sanitizer_procmaps_common.cc
+		../../sanitizer_common/sanitizer_procmaps_freebsd.cc
+		../../sanitizer_common/sanitizer_linux.cc
+		../../sanitizer_common/sanitizer_linux_libcdep.cc
+		../../sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
+	"
 elif [ "`uname -a | grep Darwin`" != "" ]; then
 	SUFFIX="darwin_amd64"
-	OSCFLAGS="-fPIC -Wno-unused-const-variable -Wno-unknown-warning-option"
-	OSLDFLAGS="-lpthread -fPIC -fpie"
+	OSCFLAGS="-fPIC -Wno-unused-const-variable -Wno-unknown-warning-option -mmacosx-version-min=10.7"
+	OSLDFLAGS="-lpthread -fPIC -fpie -mmacosx-version-min=10.7"
 	SRCS="
 		$SRCS
 		../rtl/tsan_platform_mac.cc
@@ -122,7 +125,7 @@
 fi
 $CC $DIR/gotsan.cc -c -o $DIR/race_$SUFFIX.syso $FLAGS $CFLAGS
 
-$CC test.c $DIR/race_$SUFFIX.syso -m64 -o $DIR/test $OSLDFLAGS
+$CC $OSCFLAGS test.c $DIR/race_$SUFFIX.syso -m64 -g -o $DIR/test $OSLDFLAGS
 
 export GORACE="exitcode=0 atexit_sleep_ms=0"
 if [ "$SILENT" != "1" ]; then
diff --git a/lib/tsan/go/test.c b/lib/tsan/go/test.c
index 94433f1..b3e31b1 100644
--- a/lib/tsan/go/test.c
+++ b/lib/tsan/go/test.c
@@ -12,22 +12,37 @@
 //===----------------------------------------------------------------------===//
 
 #include <stdio.h>
+#include <stdlib.h>
 
-void __tsan_init(void **thr, void (*cb)(void*));
+void __tsan_init(void **thr, void **proc, void (*cb)(long, void*));
 void __tsan_fini();
 void __tsan_map_shadow(void *addr, unsigned long size);
 void __tsan_go_start(void *thr, void **chthr, void *pc);
 void __tsan_go_end(void *thr);
+void __tsan_proc_create(void **pproc);
+void __tsan_proc_destroy(void *proc);
+void __tsan_proc_wire(void *proc, void *thr);
+void __tsan_proc_unwire(void *proc, void *thr);
 void __tsan_read(void *thr, void *addr, void *pc);
 void __tsan_write(void *thr, void *addr, void *pc);
 void __tsan_func_enter(void *thr, void *pc);
 void __tsan_func_exit(void *thr);
-void __tsan_malloc(void *p, unsigned long sz);
+void __tsan_malloc(void *thr, void *pc, void *p, unsigned long sz);
+void __tsan_free(void *p, unsigned long sz);
 void __tsan_acquire(void *thr, void *addr);
 void __tsan_release(void *thr, void *addr);
 void __tsan_release_merge(void *thr, void *addr);
 
-void symbolize_cb(void *ctx) {}
+void *current_proc;
+
+void symbolize_cb(long cmd, void *ctx) {
+  switch (cmd) {
+  case 0:
+    if (current_proc == 0)
+      abort();
+    *(void**)ctx = current_proc;
+  }
+}
 
 char buf0[100<<10];
 
@@ -36,18 +51,22 @@
 
 int main(void) {
   void *thr0 = 0;
+  void *proc0 = 0;
+  __tsan_init(&thr0, &proc0, symbolize_cb);
+  current_proc = proc0;
   char *buf = (char*)((unsigned long)buf0 + (64<<10) - 1 & ~((64<<10) - 1));
-  __tsan_malloc(buf, 10);
-  __tsan_init(&thr0, symbolize_cb);
   __tsan_map_shadow(buf, 4096);
+  __tsan_malloc(thr0, (char*)&barfoo + 1, buf, 10);
+  __tsan_free(buf, 10);
   __tsan_func_enter(thr0, (char*)&main + 1);
-  __tsan_malloc(buf, 10);
+  __tsan_malloc(thr0, (char*)&barfoo + 1, buf, 10);
   __tsan_release(thr0, buf);
   __tsan_release_merge(thr0, buf);
   void *thr1 = 0;
   __tsan_go_start(thr0, &thr1, (char*)&barfoo + 1);
   void *thr2 = 0;
   __tsan_go_start(thr0, &thr2, (char*)&barfoo + 1);
+  __tsan_func_exit(thr0);
   __tsan_func_enter(thr1, (char*)&foobar + 1);
   __tsan_func_enter(thr1, (char*)&foobar + 1);
   __tsan_write(thr1, buf, (char*)&barfoo + 1);
@@ -55,11 +74,16 @@
   __tsan_func_exit(thr1);
   __tsan_func_exit(thr1);
   __tsan_go_end(thr1);
+  void *proc1 = 0;
+  __tsan_proc_create(&proc1);
+  current_proc = proc1;
   __tsan_func_enter(thr2, (char*)&foobar + 1);
   __tsan_read(thr2, buf, (char*)&barfoo + 1);
+  __tsan_free(buf, 10);
   __tsan_func_exit(thr2);
   __tsan_go_end(thr2);
-  __tsan_func_exit(thr0);
+  __tsan_proc_destroy(proc1);
+  current_proc = proc0;
   __tsan_fini();
   return 0;
 }
diff --git a/lib/tsan/go/tsan_go.cc b/lib/tsan/go/tsan_go.cc
index ea0beb7..bc0d553 100644
--- a/lib/tsan/go/tsan_go.cc
+++ b/lib/tsan/go/tsan_go.cc
@@ -28,10 +28,6 @@
   return false;
 }
 
-ReportLocation *SymbolizeData(uptr addr) {
-  return 0;
-}
-
 void *internal_alloc(MBlockType typ, uptr sz) {
   return InternalAlloc(sz);
 }
@@ -40,7 +36,16 @@
   InternalFree(p);
 }
 
-struct SymbolizeContext {
+// Callback into Go.
+static void (*go_runtime_cb)(uptr cmd, void *ctx);
+
+enum {
+  CallbackGetProc = 0,
+  CallbackSymbolizeCode = 1,
+  CallbackSymbolizeData = 2,
+};
+
+struct SymbolizeCodeContext {
   uptr pc;
   char *func;
   char *file;
@@ -49,31 +54,83 @@
   uptr res;
 };
 
-// Callback into Go.
-static void (*symbolize_cb)(SymbolizeContext *ctx);
-
 SymbolizedStack *SymbolizeCode(uptr addr) {
   SymbolizedStack *s = SymbolizedStack::New(addr);
-  SymbolizeContext ctx;
-  internal_memset(&ctx, 0, sizeof(ctx));
-  ctx.pc = addr;
-  symbolize_cb(&ctx);
-  if (ctx.res) {
+  SymbolizeCodeContext cbctx;
+  internal_memset(&cbctx, 0, sizeof(cbctx));
+  cbctx.pc = addr;
+  go_runtime_cb(CallbackSymbolizeCode, &cbctx);
+  if (cbctx.res) {
     AddressInfo &info = s->info;
-    info.module_offset = ctx.off;
-    info.function = internal_strdup(ctx.func ? ctx.func : "??");
-    info.file = internal_strdup(ctx.file ? ctx.file : "-");
-    info.line = ctx.line;
+    info.module_offset = cbctx.off;
+    info.function = internal_strdup(cbctx.func ? cbctx.func : "??");
+    info.file = internal_strdup(cbctx.file ? cbctx.file : "-");
+    info.line = cbctx.line;
     info.column = 0;
   }
   return s;
 }
 
-extern "C" {
+struct SymbolizeDataContext {
+  uptr addr;
+  uptr heap;
+  uptr start;
+  uptr size;
+  char *name;
+  char *file;
+  uptr line;
+  uptr res;
+};
+
+ReportLocation *SymbolizeData(uptr addr) {
+  SymbolizeDataContext cbctx;
+  internal_memset(&cbctx, 0, sizeof(cbctx));
+  cbctx.addr = addr;
+  go_runtime_cb(CallbackSymbolizeData, &cbctx);
+  if (!cbctx.res)
+    return 0;
+  if (cbctx.heap) {
+    MBlock *b = ctx->metamap.GetBlock(cbctx.start);
+    if (!b)
+      return 0;
+    ReportLocation *loc = ReportLocation::New(ReportLocationHeap);
+    loc->heap_chunk_start = cbctx.start;
+    loc->heap_chunk_size = b->siz;
+    loc->tid = b->tid;
+    loc->stack = SymbolizeStackId(b->stk);
+    return loc;
+  } else {
+    ReportLocation *loc = ReportLocation::New(ReportLocationGlobal);
+    loc->global.name = internal_strdup(cbctx.name ? cbctx.name : "??");
+    loc->global.file = internal_strdup(cbctx.file ? cbctx.file : "??");
+    loc->global.line = cbctx.line;
+    loc->global.start = cbctx.start;
+    loc->global.size = cbctx.size;
+    return loc;
+  }
+}
 
 static ThreadState *main_thr;
 static bool inited;
 
+static Processor* get_cur_proc() {
+  if (UNLIKELY(!inited)) {
+    // Running Initialize().
+    // We have not yet returned the Processor to Go, so we cannot ask it back.
+    // Currently, Initialize() does not use the Processor, so return nullptr.
+    return nullptr;
+  }
+  Processor *proc;
+  go_runtime_cb(CallbackGetProc, &proc);
+  return proc;
+}
+
+Processor *ThreadState::proc() {
+  return get_cur_proc();
+}
+
+extern "C" {
+
 static ThreadState *AllocGoroutine() {
   ThreadState *thr = (ThreadState*)internal_alloc(MBlockThreadContex,
       sizeof(ThreadState));
@@ -81,11 +138,13 @@
   return thr;
 }
 
-void __tsan_init(ThreadState **thrp, void (*cb)(SymbolizeContext *cb)) {
-  symbolize_cb = cb;
+void __tsan_init(ThreadState **thrp, Processor **procp,
+                 void (*cb)(uptr cmd, void *cb)) {
+  go_runtime_cb = cb;
   ThreadState *thr = AllocGoroutine();
   main_thr = *thrp = thr;
   Initialize(thr);
+  *procp = thr->proc1;
   inited = true;
 }
 
@@ -140,12 +199,17 @@
   FuncExit(thr);
 }
 
-void __tsan_malloc(void *p, uptr sz) {
-  if (!inited)
-    return;
+void __tsan_malloc(ThreadState *thr, uptr pc, uptr p, uptr sz) {
+  CHECK(inited);
+  if (thr && pc)
+    ctx->metamap.AllocBlock(thr, pc, p, sz);
   MemoryResetRange(0, 0, (uptr)p, sz);
 }
 
+void __tsan_free(uptr p, uptr sz) {
+  ctx->metamap.FreeRange(get_cur_proc(), p, sz);
+}
+
 void __tsan_go_start(ThreadState *parent, ThreadState **pthr, void *pc) {
   ThreadState *thr = AllocGoroutine();
   *pthr = thr;
@@ -158,6 +222,14 @@
   internal_free(thr);
 }
 
+void __tsan_proc_create(Processor **pproc) {
+  *pproc = ProcCreate();
+}
+
+void __tsan_proc_destroy(Processor *proc) {
+  ProcDestroy(proc);
+}
+
 void __tsan_acquire(ThreadState *thr, void *addr) {
   Acquire(thr, 0, (uptr)addr);
 }
diff --git a/lib/tsan/rtl/tsan_debugging.cc b/lib/tsan/rtl/tsan_debugging.cc
index 3d800ff..ac24c89 100644
--- a/lib/tsan/rtl/tsan_debugging.cc
+++ b/lib/tsan/rtl/tsan_debugging.cc
@@ -57,8 +57,7 @@
 // Meant to be called by the debugger.
 SANITIZER_INTERFACE_ATTRIBUTE
 void *__tsan_get_current_report() {
-  const ReportDesc *rep = cur_thread()->current_report;
-  return (void *)rep;
+  return const_cast<ReportDesc*>(cur_thread()->current_report);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
diff --git a/lib/tsan/rtl/tsan_defs.h b/lib/tsan/rtl/tsan_defs.h
index 9c7b329..cdc23d0 100644
--- a/lib/tsan/rtl/tsan_defs.h
+++ b/lib/tsan/rtl/tsan_defs.h
@@ -29,7 +29,11 @@
 #endif
 
 #ifndef TSAN_CONTAINS_UBSAN
-# define TSAN_CONTAINS_UBSAN (CAN_SANITIZE_UB && !defined(SANITIZER_GO))
+# if CAN_SANITIZE_UB && !defined(SANITIZER_GO)
+#  define TSAN_CONTAINS_UBSAN 1
+# else
+#  define TSAN_CONTAINS_UBSAN 0
+# endif
 #endif
 
 namespace __tsan {
@@ -145,6 +149,7 @@
 
 MD5Hash md5_hash(const void *data, uptr size);
 
+struct Processor;
 struct ThreadState;
 class ThreadContext;
 struct Context;
diff --git a/lib/tsan/rtl/tsan_flags.cc b/lib/tsan/rtl/tsan_flags.cc
index 7615231..93f5986 100644
--- a/lib/tsan/rtl/tsan_flags.cc
+++ b/lib/tsan/rtl/tsan_flags.cc
@@ -71,6 +71,7 @@
     cf.print_suppressions = false;
     cf.stack_trace_format = "    #%n %f %S %M";
     cf.exitcode = 66;
+    cf.intercept_tls_get_addr = true;
     OverrideCommonFlags(cf);
   }
 
@@ -108,7 +109,7 @@
     f->report_signal_unsafe = false;
   }
 
-  SetVerbosity(common_flags()->verbosity);
+  InitializeCommonFlags();
 
   if (Verbosity()) ReportUnrecognizedFlags();
 
diff --git a/lib/tsan/rtl/tsan_interceptors.cc b/lib/tsan/rtl/tsan_interceptors.cc
index 283ec6f..a3a50e1 100644
--- a/lib/tsan/rtl/tsan_interceptors.cc
+++ b/lib/tsan/rtl/tsan_interceptors.cc
@@ -19,6 +19,7 @@
 #include "sanitizer_common/sanitizer_platform_limits_posix.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
+#include "sanitizer_common/sanitizer_tls_get_addr.h"
 #include "interception/interception.h"
 #include "tsan_interceptors.h"
 #include "tsan_interface.h"
@@ -74,11 +75,9 @@
 };
 #endif
 
-#if defined(__x86_64__) || defined(__mips__) \
-  || (defined(__powerpc64__) && defined(__BIG_ENDIAN__))
+#if defined(__x86_64__) || defined(__mips__) || SANITIZER_PPC64V1
 #define PTHREAD_ABI_BASE  "GLIBC_2.3.2"
-#elif defined(__aarch64__) || (defined(__powerpc64__) \
-  && defined(__LITTLE_ENDIAN__))
+#elif defined(__aarch64__) || SANITIZER_PPC64V2
 #define PTHREAD_ABI_BASE  "GLIBC_2.17"
 #endif
 
@@ -89,8 +88,6 @@
 extern "C" int pthread_key_create(unsigned *key, void (*destructor)(void* v));
 extern "C" int pthread_setspecific(unsigned key, const void *v);
 DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *)
-extern "C" int pthread_sigmask(int how, const __sanitizer_sigset_t *set,
-                               __sanitizer_sigset_t *oldset);
 DECLARE_REAL(int, fflush, __sanitizer_FILE *fp)
 DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr size)
 DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
@@ -113,7 +110,7 @@
 const int EINVAL = 22;
 const int EBUSY = 16;
 const int EOWNERDEAD = 130;
-#if !SANITIZER_MAC
+#if !SANITIZER_FREEBSD && !SANITIZER_MAC
 const int EPOLL_CTL_ADD = 1;
 #endif
 const int SIGILL = 4;
@@ -122,7 +119,7 @@
 const int SIGSEGV = 11;
 const int SIGPIPE = 13;
 const int SIGTERM = 15;
-#if defined(__mips__) || SANITIZER_MAC
+#if defined(__mips__) || SANITIZER_FREEBSD || SANITIZER_MAC
 const int SIGBUS = 10;
 const int SIGSYS = 12;
 #else
@@ -486,6 +483,8 @@
 #elif defined(SANITIZER_LINUX)
 # ifdef __aarch64__
   uptr mangled_sp = env[13];
+# elif defined(__mips64)
+  uptr mangled_sp = env[1];
 # else
   uptr mangled_sp = env[6];
 # endif
@@ -565,8 +564,11 @@
 #endif  // SANITIZER_MAC
 
 TSAN_INTERCEPTOR(void, longjmp, uptr *env, int val) {
+  // Note: if we call REAL(longjmp) in the context of ScopedInterceptor,
+  // bad things will happen. We will jump over ScopedInterceptor dtor and can
+  // leave thr->in_ignored_lib set.
   {
-    SCOPED_TSAN_INTERCEPTOR(longjmp, env, val);
+    SCOPED_INTERCEPTOR_RAW(longjmp, env, val);
   }
   LongJmp(cur_thread(), env);
   REAL(longjmp)(env, val);
@@ -574,7 +576,7 @@
 
 TSAN_INTERCEPTOR(void, siglongjmp, uptr *env, int val) {
   {
-    SCOPED_TSAN_INTERCEPTOR(siglongjmp, env, val);
+    SCOPED_INTERCEPTOR_RAW(siglongjmp, env, val);
   }
   LongJmp(cur_thread(), env);
   REAL(siglongjmp)(env, val);
@@ -649,69 +651,6 @@
 }
 #endif
 
-TSAN_INTERCEPTOR(uptr, strlen, const char *s) {
-  SCOPED_TSAN_INTERCEPTOR(strlen, s);
-  uptr len = internal_strlen(s);
-  MemoryAccessRange(thr, pc, (uptr)s, len + 1, false);
-  return len;
-}
-
-TSAN_INTERCEPTOR(void*, memset, void *dst, int v, uptr size) {
-  // On FreeBSD we get here from libthr internals on thread initialization.
-  if (!COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) {
-    SCOPED_TSAN_INTERCEPTOR(memset, dst, v, size);
-    MemoryAccessRange(thr, pc, (uptr)dst, size, true);
-  }
-  return internal_memset(dst, v, size);
-}
-
-TSAN_INTERCEPTOR(void*, memcpy, void *dst, const void *src, uptr size) {
-  // On FreeBSD we get here from libthr internals on thread initialization.
-  if (!COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) {
-    SCOPED_TSAN_INTERCEPTOR(memcpy, dst, src, size);
-    MemoryAccessRange(thr, pc, (uptr)dst, size, true);
-    MemoryAccessRange(thr, pc, (uptr)src, size, false);
-  }
-  // On OS X, calling internal_memcpy here will cause memory corruptions,
-  // because memcpy and memmove are actually aliases of the same implementation.
-  // We need to use internal_memmove here.
-  return internal_memmove(dst, src, size);
-}
-
-TSAN_INTERCEPTOR(void*, memmove, void *dst, void *src, uptr n) {
-  if (!COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) {
-    SCOPED_TSAN_INTERCEPTOR(memmove, dst, src, n);
-    MemoryAccessRange(thr, pc, (uptr)dst, n, true);
-    MemoryAccessRange(thr, pc, (uptr)src, n, false);
-  }
-  return REAL(memmove)(dst, src, n);
-}
-
-TSAN_INTERCEPTOR(char*, strchr, char *s, int c) {
-  SCOPED_TSAN_INTERCEPTOR(strchr, s, c);
-  char *res = REAL(strchr)(s, c);
-  uptr len = internal_strlen(s);
-  uptr n = res ? (char*)res - (char*)s + 1 : len + 1;
-  READ_STRING_OF_LEN(thr, pc, s, len, n);
-  return res;
-}
-
-#if !SANITIZER_MAC
-TSAN_INTERCEPTOR(char*, strchrnul, char *s, int c) {
-  SCOPED_TSAN_INTERCEPTOR(strchrnul, s, c);
-  char *res = REAL(strchrnul)(s, c);
-  uptr len = (char*)res - (char*)s + 1;
-  READ_STRING(thr, pc, s, len);
-  return res;
-}
-#endif
-
-TSAN_INTERCEPTOR(char*, strrchr, char *s, int c) {
-  SCOPED_TSAN_INTERCEPTOR(strrchr, s, c);
-  MemoryAccessRange(thr, pc, (uptr)s, internal_strlen(s) + 1, false);
-  return REAL(strrchr)(s, c);
-}
-
 TSAN_INTERCEPTOR(char*, strcpy, char *dst, const char *src) {  // NOLINT
   SCOPED_TSAN_INTERCEPTOR(strcpy, dst, src);  // NOLINT
   uptr srclen = internal_strlen(src);
@@ -794,7 +733,8 @@
   if (sz != 0) {
     // If sz == 0, munmap will return EINVAL and don't unmap any memory.
     DontNeedShadowFor((uptr)addr, sz);
-    ctx->metamap.ResetRange(thr, pc, (uptr)addr, (uptr)sz);
+    ScopedGlobalProcessor sgp;
+    ctx->metamap.ResetRange(thr->proc(), (uptr)addr, (uptr)sz);
   }
   int res = REAL(munmap)(addr, sz);
   return res;
@@ -889,12 +829,16 @@
 namespace __tsan {
 void DestroyThreadState() {
   ThreadState *thr = cur_thread();
+  Processor *proc = thr->proc();
   ThreadFinish(thr);
+  ProcUnwire(proc, thr);
+  ProcDestroy(proc);
   ThreadSignalContext *sctx = thr->signal_ctx;
   if (sctx) {
     thr->signal_ctx = 0;
     UnmapOrDie(sctx, sizeof(*sctx));
   }
+  DTLS_Destroy();
   cur_thread_finalize();
 }
 }  // namespace __tsan
@@ -940,6 +884,8 @@
 #endif
     while ((tid = atomic_load(&p->tid, memory_order_acquire)) == 0)
       internal_sched_yield();
+    Processor *proc = ProcCreate();
+    ProcWire(proc, thr);
     ThreadStart(thr, tid, GetTid());
     atomic_store(&p->tid, 0, memory_order_release);
   }
@@ -1399,96 +1345,6 @@
 }
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
-TSAN_INTERCEPTOR(int, __xstat, int version, const char *path, void *buf) {
-  SCOPED_TSAN_INTERCEPTOR(__xstat, version, path, buf);
-  READ_STRING(thr, pc, path, 0);
-  return REAL(__xstat)(version, path, buf);
-}
-#define TSAN_MAYBE_INTERCEPT___XSTAT TSAN_INTERCEPT(__xstat)
-#else
-#define TSAN_MAYBE_INTERCEPT___XSTAT
-#endif
-
-TSAN_INTERCEPTOR(int, stat, const char *path, void *buf) {
-#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_ANDROID
-  SCOPED_TSAN_INTERCEPTOR(stat, path, buf);
-  READ_STRING(thr, pc, path, 0);
-  return REAL(stat)(path, buf);
-#else
-  SCOPED_TSAN_INTERCEPTOR(__xstat, 0, path, buf);
-  READ_STRING(thr, pc, path, 0);
-  return REAL(__xstat)(0, path, buf);
-#endif
-}
-
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
-TSAN_INTERCEPTOR(int, __xstat64, int version, const char *path, void *buf) {
-  SCOPED_TSAN_INTERCEPTOR(__xstat64, version, path, buf);
-  READ_STRING(thr, pc, path, 0);
-  return REAL(__xstat64)(version, path, buf);
-}
-#define TSAN_MAYBE_INTERCEPT___XSTAT64 TSAN_INTERCEPT(__xstat64)
-#else
-#define TSAN_MAYBE_INTERCEPT___XSTAT64
-#endif
-
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
-TSAN_INTERCEPTOR(int, stat64, const char *path, void *buf) {
-  SCOPED_TSAN_INTERCEPTOR(__xstat64, 0, path, buf);
-  READ_STRING(thr, pc, path, 0);
-  return REAL(__xstat64)(0, path, buf);
-}
-#define TSAN_MAYBE_INTERCEPT_STAT64 TSAN_INTERCEPT(stat64)
-#else
-#define TSAN_MAYBE_INTERCEPT_STAT64
-#endif
-
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
-TSAN_INTERCEPTOR(int, __lxstat, int version, const char *path, void *buf) {
-  SCOPED_TSAN_INTERCEPTOR(__lxstat, version, path, buf);
-  READ_STRING(thr, pc, path, 0);
-  return REAL(__lxstat)(version, path, buf);
-}
-#define TSAN_MAYBE_INTERCEPT___LXSTAT TSAN_INTERCEPT(__lxstat)
-#else
-#define TSAN_MAYBE_INTERCEPT___LXSTAT
-#endif
-
-TSAN_INTERCEPTOR(int, lstat, const char *path, void *buf) {
-#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_ANDROID
-  SCOPED_TSAN_INTERCEPTOR(lstat, path, buf);
-  READ_STRING(thr, pc, path, 0);
-  return REAL(lstat)(path, buf);
-#else
-  SCOPED_TSAN_INTERCEPTOR(__lxstat, 0, path, buf);
-  READ_STRING(thr, pc, path, 0);
-  return REAL(__lxstat)(0, path, buf);
-#endif
-}
-
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
-TSAN_INTERCEPTOR(int, __lxstat64, int version, const char *path, void *buf) {
-  SCOPED_TSAN_INTERCEPTOR(__lxstat64, version, path, buf);
-  READ_STRING(thr, pc, path, 0);
-  return REAL(__lxstat64)(version, path, buf);
-}
-#define TSAN_MAYBE_INTERCEPT___LXSTAT64 TSAN_INTERCEPT(__lxstat64)
-#else
-#define TSAN_MAYBE_INTERCEPT___LXSTAT64
-#endif
-
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
-TSAN_INTERCEPTOR(int, lstat64, const char *path, void *buf) {
-  SCOPED_TSAN_INTERCEPTOR(__lxstat64, 0, path, buf);
-  READ_STRING(thr, pc, path, 0);
-  return REAL(__lxstat64)(0, path, buf);
-}
-#define TSAN_MAYBE_INTERCEPT_LSTAT64 TSAN_INTERCEPT(lstat64)
-#else
-#define TSAN_MAYBE_INTERCEPT_LSTAT64
-#endif
-
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
 TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) {
   SCOPED_TSAN_INTERCEPTOR(__fxstat, version, fd, buf);
   if (fd > 0)
@@ -1705,32 +1561,6 @@
   return res;
 }
 
-#if SANITIZER_LINUX
-TSAN_INTERCEPTOR(int, epoll_create, int size) {
-  SCOPED_TSAN_INTERCEPTOR(epoll_create, size);
-  int fd = REAL(epoll_create)(size);
-  if (fd >= 0)
-    FdPollCreate(thr, pc, fd);
-  return fd;
-}
-#define TSAN_MAYBE_INTERCEPT_EPOLL_CREATE TSAN_INTERCEPT(epoll_create)
-#else
-#define TSAN_MAYBE_INTERCEPT_EPOLL_CREATE
-#endif
-
-#if SANITIZER_LINUX
-TSAN_INTERCEPTOR(int, epoll_create1, int flags) {
-  SCOPED_TSAN_INTERCEPTOR(epoll_create1, flags);
-  int fd = REAL(epoll_create1)(flags);
-  if (fd >= 0)
-    FdPollCreate(thr, pc, fd);
-  return fd;
-}
-#define TSAN_MAYBE_INTERCEPT_EPOLL_CREATE1 TSAN_INTERCEPT(epoll_create1)
-#else
-#define TSAN_MAYBE_INTERCEPT_EPOLL_CREATE1
-#endif
-
 TSAN_INTERCEPTOR(int, close, int fd) {
   SCOPED_TSAN_INTERCEPTOR(close, fd);
   if (fd >= 0)
@@ -1785,37 +1615,6 @@
 }
 #endif
 
-TSAN_INTERCEPTOR(long_t, send, int fd, void *buf, long_t len, int flags) {
-  SCOPED_TSAN_INTERCEPTOR(send, fd, buf, len, flags);
-  if (fd >= 0) {
-    FdAccess(thr, pc, fd);
-    FdRelease(thr, pc, fd);
-  }
-  int res = REAL(send)(fd, buf, len, flags);
-  return res;
-}
-
-TSAN_INTERCEPTOR(long_t, sendmsg, int fd, void *msg, int flags) {
-  SCOPED_TSAN_INTERCEPTOR(sendmsg, fd, msg, flags);
-  if (fd >= 0) {
-    FdAccess(thr, pc, fd);
-    FdRelease(thr, pc, fd);
-  }
-  int res = REAL(sendmsg)(fd, msg, flags);
-  return res;
-}
-
-TSAN_INTERCEPTOR(long_t, recv, int fd, void *buf, long_t len, int flags) {
-  SCOPED_TSAN_INTERCEPTOR(recv, fd, buf, len, flags);
-  if (fd >= 0)
-    FdAccess(thr, pc, fd);
-  int res = REAL(recv)(fd, buf, len, flags);
-  if (res >= 0 && fd >= 0) {
-    FdAcquire(thr, pc, fd);
-  }
-  return res;
-}
-
 TSAN_INTERCEPTOR(int, unlink, char *path) {
   SCOPED_TSAN_INTERCEPTOR(unlink, path);
   Release(thr, pc, File2addr(path));
@@ -1904,6 +1703,22 @@
 }
 
 #if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, epoll_create, int size) {
+  SCOPED_TSAN_INTERCEPTOR(epoll_create, size);
+  int fd = REAL(epoll_create)(size);
+  if (fd >= 0)
+    FdPollCreate(thr, pc, fd);
+  return fd;
+}
+
+TSAN_INTERCEPTOR(int, epoll_create1, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(epoll_create1, flags);
+  int fd = REAL(epoll_create1)(flags);
+  if (fd >= 0)
+    FdPollCreate(thr, pc, fd);
+  return fd;
+}
+
 TSAN_INTERCEPTOR(int, epoll_ctl, int epfd, int op, int fd, void *ev) {
   SCOPED_TSAN_INTERCEPTOR(epoll_ctl, epfd, op, fd, ev);
   if (epfd >= 0)
@@ -1915,12 +1730,7 @@
   int res = REAL(epoll_ctl)(epfd, op, fd, ev);
   return res;
 }
-#define TSAN_MAYBE_INTERCEPT_EPOLL_CTL TSAN_INTERCEPT(epoll_ctl)
-#else
-#define TSAN_MAYBE_INTERCEPT_EPOLL_CTL
-#endif
 
-#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, epoll_wait, int epfd, void *ev, int cnt, int timeout) {
   SCOPED_TSAN_INTERCEPTOR(epoll_wait, epfd, ev, cnt, timeout);
   if (epfd >= 0)
@@ -1930,17 +1740,72 @@
     FdAcquire(thr, pc, epfd);
   return res;
 }
-#define TSAN_MAYBE_INTERCEPT_EPOLL_WAIT TSAN_INTERCEPT(epoll_wait)
+
+TSAN_INTERCEPTOR(int, epoll_pwait, int epfd, void *ev, int cnt, int timeout,
+                 void *sigmask) {
+  SCOPED_TSAN_INTERCEPTOR(epoll_pwait, epfd, ev, cnt, timeout, sigmask);
+  if (epfd >= 0)
+    FdAccess(thr, pc, epfd);
+  int res = BLOCK_REAL(epoll_pwait)(epfd, ev, cnt, timeout, sigmask);
+  if (res > 0 && epfd >= 0)
+    FdAcquire(thr, pc, epfd);
+  return res;
+}
+
+#define TSAN_MAYBE_INTERCEPT_EPOLL \
+    TSAN_INTERCEPT(epoll_create); \
+    TSAN_INTERCEPT(epoll_create1); \
+    TSAN_INTERCEPT(epoll_ctl); \
+    TSAN_INTERCEPT(epoll_wait); \
+    TSAN_INTERCEPT(epoll_pwait)
 #else
-#define TSAN_MAYBE_INTERCEPT_EPOLL_WAIT
+#define TSAN_MAYBE_INTERCEPT_EPOLL
 #endif
 
+// The following functions are intercepted merely to process pending signals.
+// If program blocks signal X, we must deliver the signal before the function
+// returns. Similarly, if program unblocks a signal (or returns from sigsuspend)
+// it's better to deliver the signal straight away.
+TSAN_INTERCEPTOR(int, sigsuspend, const __sanitizer_sigset_t *mask) {
+  SCOPED_TSAN_INTERCEPTOR(sigsuspend, mask);
+  return REAL(sigsuspend)(mask);
+}
+
+TSAN_INTERCEPTOR(int, sigblock, int mask) {
+  SCOPED_TSAN_INTERCEPTOR(sigblock, mask);
+  return REAL(sigblock)(mask);
+}
+
+TSAN_INTERCEPTOR(int, sigsetmask, int mask) {
+  SCOPED_TSAN_INTERCEPTOR(sigsetmask, mask);
+  return REAL(sigsetmask)(mask);
+}
+
+TSAN_INTERCEPTOR(int, pthread_sigmask, int how, const __sanitizer_sigset_t *set,
+    __sanitizer_sigset_t *oldset) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_sigmask, how, set, oldset);
+  return REAL(pthread_sigmask)(how, set, oldset);
+}
+
 namespace __tsan {
 
 static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire,
     bool sigact, int sig, my_siginfo_t *info, void *uctx) {
   if (acquire)
     Acquire(thr, 0, (uptr)&sigactions[sig]);
+  // Signals are generally asynchronous, so if we receive a signals when
+  // ignores are enabled we should disable ignores. This is critical for sync
+  // and interceptors, because otherwise we can miss syncronization and report
+  // false races.
+  int ignore_reads_and_writes = thr->ignore_reads_and_writes;
+  int ignore_interceptors = thr->ignore_interceptors;
+  int ignore_sync = thr->ignore_sync;
+  if (!ctx->after_multithreaded_fork) {
+    thr->ignore_reads_and_writes = 0;
+    thr->fast_state.ClearIgnoreBit();
+    thr->ignore_interceptors = 0;
+    thr->ignore_sync = 0;
+  }
   // Ensure that the handler does not spoil errno.
   const int saved_errno = errno;
   errno = 99;
@@ -1956,6 +1821,13 @@
     else
       ((sighandler_t)pc)(sig);
   }
+  if (!ctx->after_multithreaded_fork) {
+    thr->ignore_reads_and_writes = ignore_reads_and_writes;
+    if (ignore_reads_and_writes)
+      thr->fast_state.SetIgnoreBit();
+    thr->ignore_interceptors = ignore_interceptors;
+    thr->ignore_sync = ignore_sync;
+  }
   // We do not detect errno spoiling for SIGTERM,
   // because some SIGTERM handlers do spoil errno but reraise SIGTERM,
   // tsan reports false positive in such case.
@@ -1986,7 +1858,8 @@
   atomic_store(&sctx->have_pending_signals, 0, memory_order_relaxed);
   atomic_fetch_add(&thr->in_signal_handler, 1, memory_order_relaxed);
   internal_sigfillset(&sctx->emptyset);
-  CHECK_EQ(0, pthread_sigmask(SIG_SETMASK, &sctx->emptyset, &sctx->oldset));
+  int res = REAL(pthread_sigmask)(SIG_SETMASK, &sctx->emptyset, &sctx->oldset);
+  CHECK_EQ(res, 0);
   for (int sig = 0; sig < kSigCount; sig++) {
     SignalDesc *signal = &sctx->pending_signals[sig];
     if (signal->armed) {
@@ -1995,7 +1868,8 @@
           &signal->siginfo, &signal->ctx);
     }
   }
-  CHECK_EQ(0, pthread_sigmask(SIG_SETMASK, &sctx->oldset, 0));
+  res = REAL(pthread_sigmask)(SIG_SETMASK, &sctx->oldset, 0);
+  CHECK_EQ(res, 0);
   atomic_fetch_add(&thr->in_signal_handler, -1, memory_order_relaxed);
 }
 
@@ -2025,13 +1899,8 @@
       (sctx && atomic_load(&sctx->in_blocking_func, memory_order_relaxed))) {
     atomic_fetch_add(&thr->in_signal_handler, 1, memory_order_relaxed);
     if (sctx && atomic_load(&sctx->in_blocking_func, memory_order_relaxed)) {
-      // We ignore interceptors in blocking functions,
-      // temporary enbled them again while we are calling user function.
-      int const i = thr->ignore_interceptors;
-      thr->ignore_interceptors = 0;
       atomic_store(&sctx->in_blocking_func, 0, memory_order_relaxed);
       CallUserSignalHandler(thr, sync, true, sigact, sig, info, ctx);
-      thr->ignore_interceptors = i;
       atomic_store(&sctx->in_blocking_func, 1, memory_order_relaxed);
     } else {
       // Be very conservative with when we do acquire in this case.
@@ -2069,7 +1938,10 @@
 }
 
 TSAN_INTERCEPTOR(int, sigaction, int sig, sigaction_t *act, sigaction_t *old) {
-  SCOPED_TSAN_INTERCEPTOR(sigaction, sig, act, old);
+  // Note: if we call REAL(sigaction) directly for any reason without proxying
+  // the signal handler through rtl_sigaction, very bad things will happen.
+  // The handler will run synchronously and corrupt tsan per-thread state.
+  SCOPED_INTERCEPTOR_RAW(sigaction, sig, act, old);
   if (old)
     internal_memcpy(old, &sigactions[sig], sizeof(*old));
   if (act == 0)
@@ -2104,7 +1976,7 @@
 TSAN_INTERCEPTOR(sighandler_t, signal, int sig, sighandler_t h) {
   sigaction_t act;
   act.sa_handler = h;
-  REAL(memset)(&act.sa_mask, -1, sizeof(act.sa_mask));
+  internal_memset(&act.sa_mask, -1, sizeof(act.sa_mask));
   act.sa_flags = 0;
   sigaction_t old;
   int res = sigaction(sig, &act, &old);
@@ -2113,11 +1985,6 @@
   return old.sa_handler;
 }
 
-TSAN_INTERCEPTOR(int, sigsuspend, const __sanitizer_sigset_t *mask) {
-  SCOPED_TSAN_INTERCEPTOR(sigsuspend, mask);
-  return REAL(sigsuspend)(mask);
-}
-
 TSAN_INTERCEPTOR(int, raise, int sig) {
   SCOPED_TSAN_INTERCEPTOR(raise, sig);
   ThreadSignalContext *sctx = SigCtx(thr);
@@ -2306,18 +2173,15 @@
 #undef SANITIZER_INTERCEPT_FGETPWENT
 #undef SANITIZER_INTERCEPT_GETPWNAM_AND_FRIENDS
 #undef SANITIZER_INTERCEPT_GETPWNAM_R_AND_FRIENDS
-// __tls_get_addr can be called with mis-aligned stack due to:
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
-// There are two potential issues:
-// 1. Sanitizer code contains a MOVDQA spill (it does not seem to be the case
-// right now). or 2. ProcessPendingSignal calls user handler which contains
-// MOVDQA spill (this happens right now).
-// Since the interceptor only initializes memory for msan, the simplest solution
-// is to disable the interceptor in tsan (other sanitizers do not call
-// signal handlers from COMMON_INTERCEPTOR_ENTER).
+// We define our own.
+#if SANITIZER_INTERCEPT_TLS_GET_ADDR
+#define NEED_TLS_GET_ADDR
+#endif
 #undef SANITIZER_INTERCEPT_TLS_GET_ADDR
 
 #define COMMON_INTERCEPT_FUNCTION(name) INTERCEPT_FUNCTION(name)
+#define COMMON_INTERCEPT_FUNCTION_VER(name, ver)                          \
+  INTERCEPT_FUNCTION_VER(name, ver)
 
 #define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size)                    \
   MemoryAccessRange(((TsanInterceptorContext *)ctx)->thr,                 \
@@ -2450,7 +2314,7 @@
   }
 };
 
-#if !SANITIZER_MAC
+#if !SANITIZER_FREEBSD && !SANITIZER_MAC
 static void syscall_access_range(uptr pc, uptr p, uptr s, bool write) {
   TSAN_SYSCALL();
   MemoryAccessRange(thr, pc, p, s, write);
@@ -2544,6 +2408,31 @@
 
 #include "sanitizer_common/sanitizer_common_syscalls.inc"
 
+#ifdef NEED_TLS_GET_ADDR
+// Define own interceptor instead of sanitizer_common's for three reasons:
+// 1. It must not process pending signals.
+//    Signal handlers may contain MOVDQA instruction (see below).
+// 2. It must be as simple as possible to not contain MOVDQA.
+// 3. Sanitizer_common version uses COMMON_INTERCEPTOR_INITIALIZE_RANGE which
+//    is empty for tsan (meant only for msan).
+// Note: __tls_get_addr can be called with mis-aligned stack due to:
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+// So the interceptor must work with mis-aligned stack, in particular, does not
+// execute MOVDQA with stack addresses.
+TSAN_INTERCEPTOR(void *, __tls_get_addr, void *arg) {
+  void *res = REAL(__tls_get_addr)(arg);
+  ThreadState *thr = cur_thread();
+  if (!thr)
+    return res;
+  DTLS::DTV *dtv = DTLS_on_tls_get_addr(arg, res, thr->tls_addr, thr->tls_size);
+  if (!dtv)
+    return res;
+  // New DTLS block has been allocated.
+  MemoryResetRange(thr, 0, dtv->beg, dtv->size);
+  return res;
+}
+#endif
+
 namespace __tsan {
 
 static void finalize(void *arg) {
@@ -2604,13 +2493,6 @@
   TSAN_MAYBE_INTERCEPT_PVALLOC;
   TSAN_INTERCEPT(posix_memalign);
 
-  TSAN_INTERCEPT(strlen);
-  TSAN_INTERCEPT(memset);
-  TSAN_INTERCEPT(memcpy);
-  TSAN_INTERCEPT(memmove);
-  TSAN_INTERCEPT(strchr);
-  TSAN_INTERCEPT(strchrnul);
-  TSAN_INTERCEPT(strrchr);
   TSAN_INTERCEPT(strcpy);  // NOLINT
   TSAN_INTERCEPT(strncpy);
   TSAN_INTERCEPT(strdup);
@@ -2653,14 +2535,6 @@
 
   TSAN_INTERCEPT(pthread_once);
 
-  TSAN_INTERCEPT(stat);
-  TSAN_MAYBE_INTERCEPT___XSTAT;
-  TSAN_MAYBE_INTERCEPT_STAT64;
-  TSAN_MAYBE_INTERCEPT___XSTAT64;
-  TSAN_INTERCEPT(lstat);
-  TSAN_MAYBE_INTERCEPT___LXSTAT;
-  TSAN_MAYBE_INTERCEPT_LSTAT64;
-  TSAN_MAYBE_INTERCEPT___LXSTAT64;
   TSAN_INTERCEPT(fstat);
   TSAN_MAYBE_INTERCEPT___FXSTAT;
   TSAN_MAYBE_INTERCEPT_FSTAT64;
@@ -2681,18 +2555,13 @@
   TSAN_INTERCEPT(connect);
   TSAN_INTERCEPT(bind);
   TSAN_INTERCEPT(listen);
-  TSAN_MAYBE_INTERCEPT_EPOLL_CREATE;
-  TSAN_MAYBE_INTERCEPT_EPOLL_CREATE1;
+  TSAN_MAYBE_INTERCEPT_EPOLL;
   TSAN_INTERCEPT(close);
   TSAN_MAYBE_INTERCEPT___CLOSE;
   TSAN_MAYBE_INTERCEPT___RES_ICLOSE;
   TSAN_INTERCEPT(pipe);
   TSAN_INTERCEPT(pipe2);
 
-  TSAN_INTERCEPT(send);
-  TSAN_INTERCEPT(sendmsg);
-  TSAN_INTERCEPT(recv);
-
   TSAN_INTERCEPT(unlink);
   TSAN_INTERCEPT(tmpfile);
   TSAN_MAYBE_INTERCEPT_TMPFILE64;
@@ -2703,12 +2572,12 @@
   TSAN_INTERCEPT(rmdir);
   TSAN_INTERCEPT(closedir);
 
-  TSAN_MAYBE_INTERCEPT_EPOLL_CTL;
-  TSAN_MAYBE_INTERCEPT_EPOLL_WAIT;
-
   TSAN_INTERCEPT(sigaction);
   TSAN_INTERCEPT(signal);
   TSAN_INTERCEPT(sigsuspend);
+  TSAN_INTERCEPT(sigblock);
+  TSAN_INTERCEPT(sigsetmask);
+  TSAN_INTERCEPT(pthread_sigmask);
   TSAN_INTERCEPT(raise);
   TSAN_INTERCEPT(kill);
   TSAN_INTERCEPT(pthread_kill);
@@ -2727,6 +2596,10 @@
   TSAN_INTERCEPT(__cxa_atexit);
   TSAN_INTERCEPT(_exit);
 
+#ifdef NEED_TLS_GET_ADDR
+  TSAN_INTERCEPT(__tls_get_addr);
+#endif
+
 #if !SANITIZER_MAC && !SANITIZER_ANDROID
   // Need to setup it, because interceptors check that the function is resolved.
   // But atexit is emitted directly into the module, so can't be resolved.
diff --git a/lib/tsan/rtl/tsan_interface_java.cc b/lib/tsan/rtl/tsan_interface_java.cc
index 0aea63d..95be859 100644
--- a/lib/tsan/rtl/tsan_interface_java.cc
+++ b/lib/tsan/rtl/tsan_interface_java.cc
@@ -111,7 +111,7 @@
   CHECK_GE(ptr, jctx->heap_begin);
   CHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
 
-  ctx->metamap.FreeRange(thr, pc, ptr, size);
+  ctx->metamap.FreeRange(thr->proc(), ptr, size);
 }
 
 void __tsan_java_move(jptr src, jptr dst, jptr size) {
diff --git a/lib/tsan/rtl/tsan_mman.cc b/lib/tsan/rtl/tsan_mman.cc
index 1d9644e..f99ddb3 100644
--- a/lib/tsan/rtl/tsan_mman.cc
+++ b/lib/tsan/rtl/tsan_mman.cc
@@ -63,18 +63,69 @@
   return reinterpret_cast<Allocator*>(&allocator_placeholder);
 }
 
+struct GlobalProc {
+  Mutex mtx;
+  Processor *proc;
+
+  GlobalProc()
+      : mtx(MutexTypeGlobalProc, StatMtxGlobalProc)
+      , proc(ProcCreate()) {
+  }
+};
+
+static char global_proc_placeholder[sizeof(GlobalProc)] ALIGNED(64);
+GlobalProc *global_proc() {
+  return reinterpret_cast<GlobalProc*>(&global_proc_placeholder);
+}
+
+ScopedGlobalProcessor::ScopedGlobalProcessor() {
+  GlobalProc *gp = global_proc();
+  ThreadState *thr = cur_thread();
+  if (thr->proc())
+    return;
+  // If we don't have a proc, use the global one.
+  // There are currently only two known case where this path is triggered:
+  //   __interceptor_free
+  //   __nptl_deallocate_tsd
+  //   start_thread
+  //   clone
+  // and:
+  //   ResetRange
+  //   __interceptor_munmap
+  //   __deallocate_stack
+  //   start_thread
+  //   clone
+  // Ideally, we destroy thread state (and unwire proc) when a thread actually
+  // exits (i.e. when we join/wait it). Then we would not need the global proc
+  gp->mtx.Lock();
+  ProcWire(gp->proc, thr);
+}
+
+ScopedGlobalProcessor::~ScopedGlobalProcessor() {
+  GlobalProc *gp = global_proc();
+  ThreadState *thr = cur_thread();
+  if (thr->proc() != gp->proc)
+    return;
+  ProcUnwire(gp->proc, thr);
+  gp->mtx.Unlock();
+}
+
 void InitializeAllocator() {
   allocator()->Init(common_flags()->allocator_may_return_null);
 }
 
-void AllocatorThreadStart(ThreadState *thr) {
-  allocator()->InitCache(&thr->alloc_cache);
-  internal_allocator()->InitCache(&thr->internal_alloc_cache);
+void InitializeAllocatorLate() {
+  new(global_proc()) GlobalProc();
 }
 
-void AllocatorThreadFinish(ThreadState *thr) {
-  allocator()->DestroyCache(&thr->alloc_cache);
-  internal_allocator()->DestroyCache(&thr->internal_alloc_cache);
+void AllocatorProcStart(Processor *proc) {
+  allocator()->InitCache(&proc->alloc_cache);
+  internal_allocator()->InitCache(&proc->internal_alloc_cache);
+}
+
+void AllocatorProcFinish(Processor *proc) {
+  allocator()->DestroyCache(&proc->alloc_cache);
+  internal_allocator()->DestroyCache(&proc->internal_alloc_cache);
 }
 
 void AllocatorPrintStats() {
@@ -98,7 +149,7 @@
 void *user_alloc(ThreadState *thr, uptr pc, uptr sz, uptr align, bool signal) {
   if ((sz >= (1ull << 40)) || (align >= (1ull << 40)))
     return allocator()->ReturnNullOrDie();
-  void *p = allocator()->Allocate(&thr->alloc_cache, sz, align);
+  void *p = allocator()->Allocate(&thr->proc()->alloc_cache, sz, align);
   if (p == 0)
     return 0;
   if (ctx && ctx->initialized)
@@ -118,9 +169,10 @@
 }
 
 void user_free(ThreadState *thr, uptr pc, void *p, bool signal) {
+  ScopedGlobalProcessor sgp;
   if (ctx && ctx->initialized)
     OnUserFree(thr, pc, (uptr)p, true);
-  allocator()->Deallocate(&thr->alloc_cache, p);
+  allocator()->Deallocate(&thr->proc()->alloc_cache, p);
   if (signal)
     SignalUnsafeCall(thr, pc);
 }
@@ -136,7 +188,7 @@
 
 void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write) {
   CHECK_NE(p, (void*)0);
-  uptr sz = ctx->metamap.FreeBlock(thr, pc, p);
+  uptr sz = ctx->metamap.FreeBlock(thr->proc(), p);
   DPrintf("#%d: free(%p, %zu)\n", thr->tid, p, sz);
   if (write && thr->ignore_reads_and_writes == 0)
     MemoryRangeFreed(thr, pc, (uptr)p, sz);
@@ -172,6 +224,7 @@
   if (ctx == 0 || !ctx->initialized || thr->ignore_interceptors)
     return;
   __sanitizer_malloc_hook(ptr, size);
+  RunMallocHooks(ptr, size);
 }
 
 void invoke_free_hook(void *ptr) {
@@ -179,6 +232,7 @@
   if (ctx == 0 || !ctx->initialized || thr->ignore_interceptors)
     return;
   __sanitizer_free_hook(ptr);
+  RunFreeHooks(ptr);
 }
 
 void *internal_alloc(MBlockType typ, uptr sz) {
@@ -187,7 +241,7 @@
     thr->nomalloc = 0;  // CHECK calls internal_malloc().
     CHECK(0);
   }
-  return InternalAlloc(sz, &thr->internal_alloc_cache);
+  return InternalAlloc(sz, &thr->proc()->internal_alloc_cache);
 }
 
 void internal_free(void *p) {
@@ -196,7 +250,7 @@
     thr->nomalloc = 0;  // CHECK calls internal_malloc().
     CHECK(0);
   }
-  InternalFree(p, &thr->internal_alloc_cache);
+  InternalFree(p, &thr->proc()->internal_alloc_cache);
 }
 
 }  // namespace __tsan
@@ -238,8 +292,8 @@
 
 void __tsan_on_thread_idle() {
   ThreadState *thr = cur_thread();
-  allocator()->SwallowCache(&thr->alloc_cache);
-  internal_allocator()->SwallowCache(&thr->internal_alloc_cache);
-  ctx->metamap.OnThreadIdle(thr);
+  allocator()->SwallowCache(&thr->proc()->alloc_cache);
+  internal_allocator()->SwallowCache(&thr->proc()->internal_alloc_cache);
+  ctx->metamap.OnProcIdle(thr->proc());
 }
 }  // extern "C"
diff --git a/lib/tsan/rtl/tsan_mman.h b/lib/tsan/rtl/tsan_mman.h
index b419b58..8cdeeb3 100644
--- a/lib/tsan/rtl/tsan_mman.h
+++ b/lib/tsan/rtl/tsan_mman.h
@@ -20,9 +20,10 @@
 const uptr kDefaultAlignment = 16;
 
 void InitializeAllocator();
+void InitializeAllocatorLate();
 void ReplaceSystemMalloc();
-void AllocatorThreadStart(ThreadState *thr);
-void AllocatorThreadFinish(ThreadState *thr);
+void AllocatorProcStart(Processor *proc);
+void AllocatorProcFinish(Processor *proc);
 void AllocatorPrintStats();
 
 // For user allocations.
diff --git a/lib/tsan/rtl/tsan_mutex.cc b/lib/tsan/rtl/tsan_mutex.cc
index 9dd2480..22afefc 100644
--- a/lib/tsan/rtl/tsan_mutex.cc
+++ b/lib/tsan/rtl/tsan_mutex.cc
@@ -43,6 +43,7 @@
   /*11 MutexTypeDDetector*/   {},
   /*12 MutexTypeFired*/       {MutexTypeLeaf},
   /*13 MutexTypeRacy*/        {MutexTypeLeaf},
+  /*14 MutexTypeGlobalProc*/  {},
 };
 
 static bool CanLockAdj[MutexTypeCount][MutexTypeCount];
diff --git a/lib/tsan/rtl/tsan_mutex.h b/lib/tsan/rtl/tsan_mutex.h
index 27f5538..22ee2f3 100644
--- a/lib/tsan/rtl/tsan_mutex.h
+++ b/lib/tsan/rtl/tsan_mutex.h
@@ -34,6 +34,7 @@
   MutexTypeDDetector,
   MutexTypeFired,
   MutexTypeRacy,
+  MutexTypeGlobalProc,
 
   // This must be the last.
   MutexTypeCount
diff --git a/lib/tsan/rtl/tsan_platform.h b/lib/tsan/rtl/tsan_platform.h
index c2b4871..213c6b5 100644
--- a/lib/tsan/rtl/tsan_platform.h
+++ b/lib/tsan/rtl/tsan_platform.h
@@ -169,6 +169,27 @@
   static const uptr kVdsoBeg       = 0x37f00000000ull;
 };
 
+struct Mapping48 {
+  static const uptr kLoAppMemBeg   = 0x0000000001000ull;
+  static const uptr kLoAppMemEnd   = 0x0000200000000ull;
+  static const uptr kShadowBeg     = 0x0002000000000ull;
+  static const uptr kShadowEnd     = 0x0004000000000ull;
+  static const uptr kMetaShadowBeg = 0x0005000000000ull;
+  static const uptr kMetaShadowEnd = 0x0006000000000ull;
+  static const uptr kMidAppMemBeg  = 0x0aaaa00000000ull;
+  static const uptr kMidAppMemEnd  = 0x0aaaf00000000ull;
+  static const uptr kMidShadowOff  = 0x0aaa800000000ull;
+  static const uptr kTraceMemBeg   = 0x0f06000000000ull;
+  static const uptr kTraceMemEnd   = 0x0f06200000000ull;
+  static const uptr kHeapMemBeg    = 0x0ffff00000000ull;
+  static const uptr kHeapMemEnd    = 0x0ffff00000000ull;
+  static const uptr kHiAppMemBeg   = 0x0ffff00000000ull;
+  static const uptr kHiAppMemEnd   = 0x1000000000000ull;
+  static const uptr kAppMemMsk     = 0x0fff800000000ull;
+  static const uptr kAppMemXor     = 0x0000800000000ull;
+  static const uptr kVdsoBeg       = 0xffff000000000ull;
+};
+
 // Indicates the runtime will define the memory regions at runtime.
 #define TSAN_RUNTIME_VMA 1
 // Indicates that mapping defines a mid range memory segment.
@@ -297,7 +318,7 @@
   static const uptr kShadowEnd     = 0x050000000000ull;
   static const uptr kAppMemBeg     = 0x000000001000ull;
   static const uptr kAppMemEnd     = 0x00e000000000ull;
-}
+};
 
 #else
 # error "Unknown platform"
@@ -362,11 +383,13 @@
 template<int Type>
 uptr MappingArchImpl(void) {
 #ifdef __aarch64__
-  if (vmaSize == 39)
-    return MappingImpl<Mapping39, Type>();
-  else
-    return MappingImpl<Mapping42, Type>();
+  switch (vmaSize) {
+    case 39: return MappingImpl<Mapping39, Type>();
+    case 42: return MappingImpl<Mapping42, Type>();
+    case 48: return MappingImpl<Mapping48, Type>();
+  }
   DCHECK(0);
+  return 0;
 #elif defined(__powerpc64__)
   if (vmaSize == 44)
     return MappingImpl<Mapping44, Type>();
@@ -513,11 +536,13 @@
 ALWAYS_INLINE
 bool IsAppMem(uptr mem) {
 #ifdef __aarch64__
-  if (vmaSize == 39)
-    return IsAppMemImpl<Mapping39>(mem);
-  else
-    return IsAppMemImpl<Mapping42>(mem);
+  switch (vmaSize) {
+    case 39: return IsAppMemImpl<Mapping39>(mem);
+    case 42: return IsAppMemImpl<Mapping42>(mem);
+    case 48: return IsAppMemImpl<Mapping48>(mem);
+  }
   DCHECK(0);
+  return false;
 #elif defined(__powerpc64__)
   if (vmaSize == 44)
     return IsAppMemImpl<Mapping44>(mem);
@@ -538,11 +563,13 @@
 ALWAYS_INLINE
 bool IsShadowMem(uptr mem) {
 #ifdef __aarch64__
-  if (vmaSize == 39)
-    return IsShadowMemImpl<Mapping39>(mem);
-  else
-    return IsShadowMemImpl<Mapping42>(mem);
+  switch (vmaSize) {
+    case 39: return IsShadowMemImpl<Mapping39>(mem);
+    case 42: return IsShadowMemImpl<Mapping42>(mem);
+    case 48: return IsShadowMemImpl<Mapping48>(mem);
+  }
   DCHECK(0);
+  return false;
 #elif defined(__powerpc64__)
   if (vmaSize == 44)
     return IsShadowMemImpl<Mapping44>(mem);
@@ -563,11 +590,13 @@
 ALWAYS_INLINE
 bool IsMetaMem(uptr mem) {
 #ifdef __aarch64__
-  if (vmaSize == 39)
-    return IsMetaMemImpl<Mapping39>(mem);
-  else
-    return IsMetaMemImpl<Mapping42>(mem);
+  switch (vmaSize) {
+    case 39: return IsMetaMemImpl<Mapping39>(mem);
+    case 42: return IsMetaMemImpl<Mapping42>(mem);
+    case 48: return IsMetaMemImpl<Mapping48>(mem);
+  }
   DCHECK(0);
+  return false;
 #elif defined(__powerpc64__)
   if (vmaSize == 44)
     return IsMetaMemImpl<Mapping44>(mem);
@@ -587,18 +616,24 @@
   return (((x) & ~(Mapping::kAppMemMsk | (kShadowCell - 1)))
       ^ Mapping::kAppMemXor) * kShadowCnt;
 #else
+# ifndef SANITIZER_WINDOWS
   return ((x & ~(kShadowCell - 1)) * kShadowCnt) | Mapping::kShadowBeg;
+# else
+  return ((x & ~(kShadowCell - 1)) * kShadowCnt) + Mapping::kShadowBeg;
+# endif
 #endif
 }
 
 ALWAYS_INLINE
 uptr MemToShadow(uptr x) {
 #ifdef __aarch64__
-  if (vmaSize == 39)
-    return MemToShadowImpl<Mapping39>(x);
-  else
-    return MemToShadowImpl<Mapping42>(x);
+  switch (vmaSize) {
+    case 39: return MemToShadowImpl<Mapping39>(x);
+    case 42: return MemToShadowImpl<Mapping42>(x);
+    case 48: return MemToShadowImpl<Mapping48>(x);
+  }
   DCHECK(0);
+  return 0;
 #elif defined(__powerpc64__)
   if (vmaSize == 44)
     return MemToShadowImpl<Mapping44>(x);
@@ -627,11 +662,13 @@
 ALWAYS_INLINE
 u32 *MemToMeta(uptr x) {
 #ifdef __aarch64__
-  if (vmaSize == 39)
-    return MemToMetaImpl<Mapping39>(x);
-  else
-    return MemToMetaImpl<Mapping42>(x);
+  switch (vmaSize) {
+    case 39: return MemToMetaImpl<Mapping39>(x);
+    case 42: return MemToMetaImpl<Mapping42>(x);
+    case 48: return MemToMetaImpl<Mapping48>(x);
+  }
   DCHECK(0);
+  return 0;
 #elif defined(__powerpc64__)
   if (vmaSize == 44)
     return MemToMetaImpl<Mapping44>(x);
@@ -662,7 +699,6 @@
 # ifndef SANITIZER_WINDOWS
   return (s & ~Mapping::kShadowBeg) / kShadowCnt;
 # else
-  // FIXME(dvyukov): this is most likely wrong as the mapping is not bijection.
   return (s - Mapping::kShadowBeg) / kShadowCnt;
 # endif // SANITIZER_WINDOWS
 #endif
@@ -671,11 +707,13 @@
 ALWAYS_INLINE
 uptr ShadowToMem(uptr s) {
 #ifdef __aarch64__
-  if (vmaSize == 39)
-    return ShadowToMemImpl<Mapping39>(s);
-  else
-    return ShadowToMemImpl<Mapping42>(s);
+  switch (vmaSize) {
+    case 39: return ShadowToMemImpl<Mapping39>(s);
+    case 42: return ShadowToMemImpl<Mapping42>(s);
+    case 48: return ShadowToMemImpl<Mapping48>(s);
+  }
   DCHECK(0);
+  return 0;
 #elif defined(__powerpc64__)
   if (vmaSize == 44)
     return ShadowToMemImpl<Mapping44>(s);
@@ -704,11 +742,13 @@
 ALWAYS_INLINE
 uptr GetThreadTrace(int tid) {
 #ifdef __aarch64__
-  if (vmaSize == 39)
-    return GetThreadTraceImpl<Mapping39>(tid);
-  else
-    return GetThreadTraceImpl<Mapping42>(tid);
+  switch (vmaSize) {
+    case 39: return GetThreadTraceImpl<Mapping39>(tid);
+    case 42: return GetThreadTraceImpl<Mapping42>(tid);
+    case 48: return GetThreadTraceImpl<Mapping48>(tid);
+  }
   DCHECK(0);
+  return 0;
 #elif defined(__powerpc64__)
   if (vmaSize == 44)
     return GetThreadTraceImpl<Mapping44>(tid);
@@ -732,11 +772,13 @@
 ALWAYS_INLINE
 uptr GetThreadTraceHeader(int tid) {
 #ifdef __aarch64__
-  if (vmaSize == 39)
-    return GetThreadTraceHeaderImpl<Mapping39>(tid);
-  else
-    return GetThreadTraceHeaderImpl<Mapping42>(tid);
+  switch (vmaSize) {
+    case 39: return GetThreadTraceHeaderImpl<Mapping39>(tid);
+    case 42: return GetThreadTraceHeaderImpl<Mapping42>(tid);
+    case 48: return GetThreadTraceHeaderImpl<Mapping48>(tid);
+  }
   DCHECK(0);
+  return 0;
 #elif defined(__powerpc64__)
   if (vmaSize == 44)
     return GetThreadTraceHeaderImpl<Mapping44>(tid);
@@ -754,10 +796,6 @@
 void InitializeShadowMemoryPlatform();
 void FlushShadowMemory();
 void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive);
-
-// Says whether the addr relates to a global var.
-// Guesses with high probability, may yield both false positives and negatives.
-bool IsGlobalVar(uptr addr);
 int ExtractResolvFDs(void *state, int *fds, int nfd);
 int ExtractRecvmsgFDs(void *msg, int *fds, int nfd);
 
diff --git a/lib/tsan/rtl/tsan_platform_linux.cc b/lib/tsan/rtl/tsan_platform_linux.cc
index 3e962fc..cd80e17 100644
--- a/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/lib/tsan/rtl/tsan_platform_linux.cc
@@ -36,6 +36,10 @@
 #include <string.h>
 #include <stdarg.h>
 #include <sys/mman.h>
+#if SANITIZER_LINUX
+#include <sys/personality.h>
+#include <setjmp.h>
+#endif
 #include <sys/syscall.h>
 #include <sys/socket.h>
 #include <sys/time.h>
@@ -64,10 +68,11 @@
 void *__libc_stack_end = 0;
 #endif
 
-namespace __tsan {
+#if SANITIZER_LINUX && defined(__aarch64__)
+void InitializeGuardPtr() __attribute__((visibility("hidden")));
+#endif
 
-static uptr g_data_start;
-static uptr g_data_end;
+namespace __tsan {
 
 #ifdef TSAN_RUNTIME_VMA
 // Runtime detected VMA size.
@@ -201,46 +206,6 @@
   MapRodata();
 }
 
-static void InitDataSeg() {
-  MemoryMappingLayout proc_maps(true);
-  uptr start, end, offset;
-  char name[128];
-#if SANITIZER_FREEBSD
-  // On FreeBSD BSS is usually the last block allocated within the
-  // low range and heap is the last block allocated within the range
-  // 0x800000000-0x8ffffffff.
-  while (proc_maps.Next(&start, &end, &offset, name, ARRAY_SIZE(name),
-                        /*protection*/ 0)) {
-    DPrintf("%p-%p %p %s\n", start, end, offset, name);
-    if ((start & 0xffff00000000ULL) == 0 && (end & 0xffff00000000ULL) == 0 &&
-        name[0] == '\0') {
-      g_data_start = start;
-      g_data_end = end;
-    }
-  }
-#else
-  bool prev_is_data = false;
-  while (proc_maps.Next(&start, &end, &offset, name, ARRAY_SIZE(name),
-                        /*protection*/ 0)) {
-    DPrintf("%p-%p %p %s\n", start, end, offset, name);
-    bool is_data = offset != 0 && name[0] != 0;
-    // BSS may get merged with [heap] in /proc/self/maps. This is not very
-    // reliable.
-    bool is_bss = offset == 0 &&
-      (name[0] == 0 || internal_strcmp(name, "[heap]") == 0) && prev_is_data;
-    if (g_data_start == 0 && is_data)
-      g_data_start = start;
-    if (is_bss)
-      g_data_end = end;
-    prev_is_data = is_data;
-  }
-#endif
-  DPrintf("guessed data_start=%p data_end=%p\n",  g_data_start, g_data_end);
-  CHECK_LT(g_data_start, g_data_end);
-  CHECK_GE((uptr)&g_data_start, g_data_start);
-  CHECK_LT((uptr)&g_data_start, g_data_end);
-}
-
 #endif  // #ifndef SANITIZER_GO
 
 void InitializePlatformEarly() {
@@ -248,9 +213,9 @@
   vmaSize =
     (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
 #if defined(__aarch64__)
-  if (vmaSize != 39 && vmaSize != 42) {
+  if (vmaSize != 39 && vmaSize != 42 && vmaSize != 48) {
     Printf("FATAL: ThreadSanitizer: unsupported VMA range\n");
-    Printf("FATAL: Found %d - Supported 39 and 42\n", vmaSize);
+    Printf("FATAL: Found %d - Supported 39, 42 and 48\n", vmaSize);
     Die();
   }
 #elif defined(__powerpc64__)
@@ -291,6 +256,22 @@
       SetAddressSpaceUnlimited();
       reexec = true;
     }
+#if SANITIZER_LINUX && defined(__aarch64__)
+    // After patch "arm64: mm: support ARCH_MMAP_RND_BITS." is introduced in
+    // linux kernel, the random gap between stack and mapped area is increased
+    // from 128M to 36G on 39-bit aarch64. As it is almost impossible to cover
+    // this big range, we should disable randomized virtual space on aarch64.
+    int old_personality = personality(0xffffffff);
+    if (old_personality != -1 && (old_personality & ADDR_NO_RANDOMIZE) == 0) {
+      VReport(1, "WARNING: Program is run with randomized virtual address "
+              "space, which wouldn't work with ThreadSanitizer.\n"
+              "Re-execing with fixed virtual address space.\n");
+      CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1);
+      reexec = true;
+    }
+    // Initialize the guard pointer used in {sig}{set,long}jump.
+    InitializeGuardPtr();
+#endif
     if (reexec)
       ReExec();
   }
@@ -298,14 +279,9 @@
 #ifndef SANITIZER_GO
   CheckAndProtect();
   InitTlsSize();
-  InitDataSeg();
 #endif
 }
 
-bool IsGlobalVar(uptr addr) {
-  return g_data_start && addr >= g_data_start && addr < g_data_end;
-}
-
 #ifndef SANITIZER_GO
 // Extract file descriptors passed to glibc internal __res_iclose function.
 // This is required to properly "close" the fds, because we do not see internal
diff --git a/lib/tsan/rtl/tsan_platform_mac.cc b/lib/tsan/rtl/tsan_platform_mac.cc
index 53b1d5a..0cc02ab 100644
--- a/lib/tsan/rtl/tsan_platform_mac.cc
+++ b/lib/tsan/rtl/tsan_platform_mac.cc
@@ -133,10 +133,12 @@
   if (event == PTHREAD_INTROSPECTION_THREAD_CREATE) {
     if (thread == pthread_self()) {
       // The current thread is a newly created GCD worker thread.
+      ThreadState *thr = cur_thread();
+      Processor *proc = ProcCreate();
+      ProcWire(proc, thr);
       ThreadState *parent_thread_state = nullptr;  // No parent.
       int tid = ThreadCreate(parent_thread_state, 0, (uptr)thread, true);
       CHECK_NE(tid, 0);
-      ThreadState *thr = cur_thread();
       ThreadStart(thr, tid, GetTid());
     }
   } else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) {
@@ -185,10 +187,6 @@
 }
 #endif
 
-bool IsGlobalVar(uptr addr) {
-  return false;
-}
-
 }  // namespace __tsan
 
 #endif  // SANITIZER_MAC
diff --git a/lib/tsan/rtl/tsan_platform_posix.cc b/lib/tsan/rtl/tsan_platform_posix.cc
index 90476cb..805ce1b 100644
--- a/lib/tsan/rtl/tsan_platform_posix.cc
+++ b/lib/tsan/rtl/tsan_platform_posix.cc
@@ -105,7 +105,7 @@
   CHECK_LE(beg, end);
   if (beg == end)
     return;
-  if (beg != (uptr)MmapNoAccess(beg, end - beg)) {
+  if (beg != (uptr)MmapFixedNoAccess(beg, end - beg)) {
     Printf("FATAL: ThreadSanitizer can not protect [%zx,%zx]\n", beg, end);
     Printf("FATAL: Make sure you are not using unlimited stack\n");
     Die();
diff --git a/lib/tsan/rtl/tsan_preinit.cc b/lib/tsan/rtl/tsan_preinit.cc
new file mode 100644
index 0000000..a96618d
--- /dev/null
+++ b/lib/tsan/rtl/tsan_preinit.cc
@@ -0,0 +1,27 @@
+//===-- tsan_preinit.cc ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer.
+//
+// Call __tsan_init at the very early stage of process startup.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "tsan_interface.h"
+
+#if SANITIZER_CAN_USE_PREINIT_ARRAY
+
+// The symbol is called __local_tsan_preinit, because it's not intended to be
+// exported.
+// This code linked into the main executable when -fsanitize=thread is in
+// the link flags. It can only use exported interface functions.
+__attribute__((section(".preinit_array"), used))
+void (*__local_tsan_preinit)(void) = __tsan_init;
+
+#endif
diff --git a/lib/tsan/rtl/tsan_report.cc b/lib/tsan/rtl/tsan_report.cc
index e206f49..9360494 100644
--- a/lib/tsan/rtl/tsan_report.cc
+++ b/lib/tsan/rtl/tsan_report.cc
@@ -381,9 +381,9 @@
 
 static void PrintMop(const ReportMop *mop, bool first) {
   Printf("\n");
-  Printf("%s by ",
+  Printf("%s at %p by ",
       (first ? (mop->write ? "Write" : "Read")
-             : (mop->write ? "Previous write" : "Previous read")));
+             : (mop->write ? "Previous write" : "Previous read")), mop->addr);
   if (mop->tid == kMainThreadId)
     Printf("main goroutine:\n");
   else
@@ -391,6 +391,31 @@
   PrintStack(mop->stack);
 }
 
+static void PrintLocation(const ReportLocation *loc) {
+  switch (loc->type) {
+  case ReportLocationHeap: {
+    Printf("\n");
+    Printf("Heap block of size %zu at %p allocated by ",
+        loc->heap_chunk_size, loc->heap_chunk_start);
+    if (loc->tid == kMainThreadId)
+      Printf("main goroutine:\n");
+    else
+      Printf("goroutine %d:\n", loc->tid);
+    PrintStack(loc->stack);
+    break;
+  }
+  case ReportLocationGlobal: {
+    Printf("\n");
+    Printf("Global var %s of size %zu at %p declared at %s:%zu\n",
+        loc->global.name, loc->global.size, loc->global.start,
+        loc->global.file, loc->global.line);
+    break;
+  }
+  default:
+    break;
+  }
+}
+
 static void PrintThread(const ReportThread *rt) {
   if (rt->id == kMainThreadId)
     return;
@@ -406,6 +431,8 @@
     Printf("WARNING: DATA RACE");
     for (uptr i = 0; i < rep->mops.Size(); i++)
       PrintMop(rep->mops[i], i == 0);
+    for (uptr i = 0; i < rep->locs.Size(); i++)
+      PrintLocation(rep->locs[i]);
     for (uptr i = 0; i < rep->threads.Size(); i++)
       PrintThread(rep->threads[i]);
   } else if (rep->typ == ReportTypeDeadlock) {
diff --git a/lib/tsan/rtl/tsan_rtl.cc b/lib/tsan/rtl/tsan_rtl.cc
index 4df4db5..bda75d1 100644
--- a/lib/tsan/rtl/tsan_rtl.cc
+++ b/lib/tsan/rtl/tsan_rtl.cc
@@ -287,6 +287,10 @@
 static void CheckShadowMapping() {
   uptr beg, end;
   for (int i = 0; GetUserRegion(i, &beg, &end); i++) {
+    // Skip cases for empty regions (heap definition for architectures that
+    // do not use 64-bit allocator).
+    if (beg ==end)
+      continue;
     VPrintf(3, "checking shadow region %p-%p\n", beg, end);
     for (uptr p0 = beg; p0 <= end; p0 += (end - beg) / 4) {
       for (int x = -1; x <= 1; x++) {
@@ -321,6 +325,7 @@
   const char *options = GetEnv(kTsanOptionsEnv);
   CacheBinaryName();
   InitializeFlags(&ctx->flags, options);
+  AvoidCVE_2016_2143();
   InitializePlatformEarly();
 #ifndef SANITIZER_GO
   // Re-exec ourselves if we need to set additional env or command line args.
@@ -329,6 +334,10 @@
   InitializeAllocator();
   ReplaceSystemMalloc();
 #endif
+  if (common_flags()->detect_deadlocks)
+    ctx->dd = DDetector::Create(flags());
+  Processor *proc = ProcCreate();
+  ProcWire(proc, thr);
   InitializeInterceptors();
   CheckShadowMapping();
   InitializePlatform();
@@ -336,6 +345,7 @@
   InitializeDynamicAnnotations();
 #ifndef SANITIZER_GO
   InitializeShadowMemory();
+  InitializeAllocatorLate();
 #endif
   // Setup correct file descriptor for error reports.
   __sanitizer_set_report_path(common_flags()->log_path);
@@ -351,8 +361,6 @@
   SetSandboxingCallback(StopBackgroundThread);
 #endif
 #endif
-  if (common_flags()->detect_deadlocks)
-    ctx->dd = DDetector::Create(flags());
 
   VPrintf(1, "***** Running under ThreadSanitizer v2 (pid %d) *****\n",
           (int)internal_getpid());
@@ -366,6 +374,10 @@
 #endif
   ctx->initialized = true;
 
+#ifndef SANITIZER_GO
+  Symbolizer::LateInitialize();
+#endif
+
   if (flags()->stop_on_start) {
     Printf("ThreadSanitizer is suspended at startup (pid %d)."
            " Call __tsan_resume().\n",
diff --git a/lib/tsan/rtl/tsan_rtl.h b/lib/tsan/rtl/tsan_rtl.h
index 057a3a6..ff69015 100644
--- a/lib/tsan/rtl/tsan_rtl.h
+++ b/lib/tsan/rtl/tsan_rtl.h
@@ -325,6 +325,36 @@
   uptr *shadow_stack_pos;
 };
 
+// A Processor represents a physical thread, or a P for Go.
+// It is used to store internal resources like allocate cache, and does not
+// participate in race-detection logic (invisible to end user).
+// In C++ it is tied to an OS thread just like ThreadState, however ideally
+// it should be tied to a CPU (this way we will have fewer allocator caches).
+// In Go it is tied to a P, so there are significantly fewer Processor's than
+// ThreadState's (which are tied to Gs).
+// A ThreadState must be wired with a Processor to handle events.
+struct Processor {
+  ThreadState *thr; // currently wired thread, or nullptr
+#ifndef SANITIZER_GO
+  AllocatorCache alloc_cache;
+  InternalAllocatorCache internal_alloc_cache;
+#endif
+  DenseSlabAllocCache block_cache;
+  DenseSlabAllocCache sync_cache;
+  DenseSlabAllocCache clock_cache;
+  DDPhysicalThread *dd_pt;
+};
+
+#ifndef SANITIZER_GO
+// ScopedGlobalProcessor temporary setups a global processor for the current
+// thread, if it does not have one. Intended for interceptors that can run
+// at the very thread end, when we already destroyed the thread processor.
+struct ScopedGlobalProcessor {
+  ScopedGlobalProcessor();
+  ~ScopedGlobalProcessor();
+};
+#endif
+
 // This struct is stored in TLS.
 struct ThreadState {
   FastState fast_state;
@@ -360,8 +390,6 @@
   MutexSet mset;
   ThreadClock clock;
 #ifndef SANITIZER_GO
-  AllocatorCache alloc_cache;
-  InternalAllocatorCache internal_alloc_cache;
   Vector<JmpBuf> jmp_bufs;
   int ignore_interceptors;
 #endif
@@ -385,16 +413,19 @@
 #if SANITIZER_DEBUG && !SANITIZER_GO
   InternalDeadlockDetector internal_deadlock_detector;
 #endif
-  DDPhysicalThread *dd_pt;
   DDLogicalThread *dd_lt;
 
+  // Current wired Processor, or nullptr. Required to handle any events.
+  Processor *proc1;
+#ifndef SANITIZER_GO
+  Processor *proc() { return proc1; }
+#else
+  Processor *proc();
+#endif
+
   atomic_uintptr_t in_signal_handler;
   ThreadSignalContext *signal_ctx;
 
-  DenseSlabAllocCache block_cache;
-  DenseSlabAllocCache sync_cache;
-  DenseSlabAllocCache clock_cache;
-
 #ifndef SANITIZER_GO
   u32 last_sleep_stack_id;
   ThreadClock last_sleep_clock;
@@ -685,6 +716,11 @@
 int ThreadCount(ThreadState *thr);
 void ProcessPendingSignals(ThreadState *thr);
 
+Processor *ProcCreate();
+void ProcDestroy(Processor *proc);
+void ProcWire(Processor *proc, ThreadState *thr);
+void ProcUnwire(Processor *proc, ThreadState *thr);
+
 void MutexCreate(ThreadState *thr, uptr pc, uptr addr,
                  bool rw, bool recursive, bool linker_init);
 void MutexDestroy(ThreadState *thr, uptr pc, uptr addr);
diff --git a/lib/tsan/rtl/tsan_rtl_aarch64.S b/lib/tsan/rtl/tsan_rtl_aarch64.S
index 9cea3cf..ef06f04 100644
--- a/lib/tsan/rtl/tsan_rtl_aarch64.S
+++ b/lib/tsan/rtl/tsan_rtl_aarch64.S
@@ -1,6 +1,62 @@
 #include "sanitizer_common/sanitizer_asm.h"
+
+.section .bss
+.type	__tsan_pointer_chk_guard, %object
+.size	__tsan_pointer_chk_guard, 8
+__tsan_pointer_chk_guard:
+.zero	8
+
 .section .text
 
+// GLIBC mangles the function pointers in jmp_buf (used in {set,long}*jmp
+// functions) by XORing them with a random guard pointer.  For AArch64 it is a
+// global variable rather than a TCB one (as for x86_64/powerpc) and althought
+// its value is exported by the loader, it lies within a private GLIBC
+// namespace (meaning it should be only used by GLIBC itself and the ABI is
+// not stable). So InitializeGuardPtr obtains the pointer guard value by
+// issuing a setjmp and checking the resulting pointers values against the
+// original ones.
+.hidden _Z18InitializeGuardPtrv
+.global _Z18InitializeGuardPtrv
+.type _Z18InitializeGuardPtrv, @function
+_Z18InitializeGuardPtrv:
+  CFI_STARTPROC
+  // Allocates a jmp_buf for the setjmp call.
+  stp	x29, x30, [sp, -336]!
+  CFI_DEF_CFA_OFFSET (336)
+  CFI_OFFSET (29, -336)
+  CFI_OFFSET (30, -328)
+  add	x29, sp, 0
+  CFI_DEF_CFA_REGISTER (29)
+  add	x0, x29, 24
+
+  // Call libc setjmp that mangle the stack pointer value
+  adrp  x1, :got:_ZN14__interception12real__setjmpE
+  ldr   x1, [x1, #:got_lo12:_ZN14__interception12real__setjmpE]
+  ldr   x1, [x1]
+  blr   x1
+
+  // glibc setjmp mangles both the frame pointer (FP, pc+4 on blr) and the
+  // stack pointer (SP). FP will be placed on ((uintptr*)jmp_buf)[11] and
+  // SP at ((uintptr*)jmp_buf)[13].
+  // The mangle operation is just 'value' xor 'pointer guard value' and
+  // if we know the original value (SP) and the expected one, we can derive
+  // the guard pointer value.
+  mov	x0, sp
+
+  // Loads the mangled SP pointer.
+  ldr	x1, [x29, 128]
+  eor	x0, x0, x1
+  adrp	x2, __tsan_pointer_chk_guard
+  str	x0, [x2, #:lo12:__tsan_pointer_chk_guard]
+  ldp	x29, x30, [sp], 336
+  CFI_RESTORE (30)
+  CFI_RESTORE (19)
+  CFI_DEF_CFA (31, 0)
+  ret
+  CFI_ENDPROC
+.size _Z18InitializeGuardPtrv, .-_Z18InitializeGuardPtrv
+
 .hidden __tsan_setjmp
 .comm _ZN14__interception11real_setjmpE,8,8
 .type setjmp, @function
@@ -23,10 +79,9 @@
   mov     x19, x0
 
   // SP pointer mangling (see glibc setjmp)
-  adrp    x2, :got:__pointer_chk_guard
-  ldr     x2, [x2, #:got_lo12:__pointer_chk_guard]
+  adrp    x2, __tsan_pointer_chk_guard
+  ldr     x2, [x2, #:lo12:__tsan_pointer_chk_guard]
   add     x0, x29, 32
-  ldr     x2, [x2]
   eor     x1, x2, x0
 
   // call tsan interceptor
@@ -71,10 +126,9 @@
   mov     x19, x0
 
   // SP pointer mangling (see glibc setjmp)
-  adrp    x2, :got:__pointer_chk_guard
-  ldr     x2, [x2, #:got_lo12:__pointer_chk_guard]
+  adrp    x2, __tsan_pointer_chk_guard
+  ldr     x2, [x2, #:lo12:__tsan_pointer_chk_guard]
   add     x0, x29, 32
-  ldr     x2, [x2]
   eor     x1, x2, x0
 
   // call tsan interceptor
@@ -121,10 +175,9 @@
   mov     x19, x0
 
   // SP pointer mangling (see glibc setjmp)
-  adrp    x2, :got:__pointer_chk_guard
-  ldr     x2, [x2, #:got_lo12:__pointer_chk_guard]
+  adrp    x2, __tsan_pointer_chk_guard
+  ldr     x2, [x2, #:lo12:__tsan_pointer_chk_guard]
   add     x0, x29, 32
-  ldr     x2, [x2]
   eor     x1, x2, x0
 
   // call tsan interceptor
@@ -173,10 +226,9 @@
   mov     x19, x0
 
   // SP pointer mangling (see glibc setjmp)
-  adrp    x2, :got:__pointer_chk_guard
-  ldr     x2, [x2, #:got_lo12:__pointer_chk_guard]
+  adrp    x2, __tsan_pointer_chk_guard
+  ldr     x2, [x2, #:lo12:__tsan_pointer_chk_guard]
   add     x0, x29, 32
-  ldr     x2, [x2]
   eor     x1, x2, x0
 
   // call tsan interceptor
diff --git a/lib/tsan/rtl/tsan_rtl_mips64.S b/lib/tsan/rtl/tsan_rtl_mips64.S
new file mode 100644
index 0000000..d0f7a3f
--- /dev/null
+++ b/lib/tsan/rtl/tsan_rtl_mips64.S
@@ -0,0 +1,214 @@
+.section .text
+.set noreorder
+
+.hidden __tsan_setjmp
+.comm _ZN14__interception11real_setjmpE,8,8
+.globl setjmp
+.type setjmp, @function
+setjmp:
+
+  // save env parameters
+  daddiu $sp,$sp,-40
+  sd $s0,32($sp)
+  sd $ra,24($sp)
+  sd $fp,16($sp)
+  sd $gp,8($sp)
+
+  // calculate and save pointer to GOT
+  lui $gp,%hi(%neg(%gp_rel(setjmp)))
+  daddu $gp,$gp,$t9
+  daddiu $gp,$gp,%lo(%neg(%gp_rel(setjmp)))
+  move $s0,$gp
+
+  // save jmp_buf
+  sd $a0,0($sp)
+
+  // obtain $sp
+  dadd $a0,$zero,$sp
+
+  // call tsan interceptor
+  jal __tsan_setjmp
+  daddiu $a1,$a0,40
+
+  // restore jmp_buf
+  ld $a0,0($sp)
+
+  // restore gp
+  move $gp,$s0
+
+  // load pointer of libc setjmp to t9
+  dla $t9,(_ZN14__interception11real_setjmpE) 
+
+  // restore env parameters
+  ld $gp,8($sp)
+  ld $fp,16($sp)
+  ld $ra,24($sp)
+  ld $s0,32($sp)
+  daddiu $sp,$sp,40
+
+  // tail jump to libc setjmp
+  ld $t9,0($t9)
+  jr $t9
+  nop
+
+.size setjmp, .-setjmp
+
+.hidden __tsan_setjmp
+.globl _setjmp
+.comm _ZN14__interception12real__setjmpE,8,8
+.type _setjmp, @function
+_setjmp:
+
+  // Save env parameters
+  daddiu $sp,$sp,-40
+  sd $s0,32($sp)
+  sd $ra,24($sp)
+  sd $fp,16($sp)
+  sd $gp,8($sp)
+
+  // calculate and save pointer to GOT
+  lui $gp,%hi(%neg(%gp_rel(_setjmp)))
+  daddu $gp,$gp,$t9
+  daddiu $gp,$gp,%lo(%neg(%gp_rel(_setjmp)))
+  move $s0,$gp
+
+  // save jmp_buf
+  sd $a0,0($sp)
+
+  // obtain $sp
+  dadd $a0,$zero,$sp
+
+  // call tsan interceptor
+  jal __tsan_setjmp
+  daddiu $a1,$a0,40
+
+  // restore jmp_buf
+  ld $a0,0($sp)
+
+  // restore gp
+  move $gp,$s0
+
+  // load pointer of libc _setjmp to t9
+  dla $t9,(_ZN14__interception12real__setjmpE)
+
+  // restore env parameters
+  ld $gp,8($sp)
+  ld $fp,16($sp)
+  ld $ra,24($sp)
+  ld $s0,32($sp)
+  daddiu $sp,$sp,40
+
+  // tail jump to libc _setjmp
+  ld $t9,0($t9)
+  jr $t9
+  nop
+
+.size _setjmp, .-_setjmp
+
+.hidden __tsan_setjmp
+.globl sigsetjmp
+.comm _ZN14__interception14real_sigsetjmpE,8,8
+.type sigsetjmp, @function
+sigsetjmp:
+
+  // Save env parameters
+  daddiu $sp,$sp,-48
+  sd $s0,40($sp)
+  sd $ra,32($sp)
+  sd $fp,24($sp)
+  sd $gp,16($sp)
+
+  // calculate and save pointer to GOT
+  lui $gp,%hi(%neg(%gp_rel(sigsetjmp)))
+  daddu $gp,$gp,$t9
+  daddiu $gp,$gp,%lo(%neg(%gp_rel(sigsetjmp)))
+  move $s0,$gp
+
+  // save jmp_buf and savesig
+  sd $a0,0($sp)
+  sd $a1,8($sp)
+
+  // obtain $sp
+  dadd $a0,$zero,$sp
+
+  // call tsan interceptor
+  jal __tsan_setjmp
+  daddiu $a1,$a0,48
+
+  // restore jmp_buf and savesig
+  ld $a0,0($sp)
+  ld $a1,8($sp)
+
+  // restore gp
+  move $gp,$s0
+
+  // load pointer of libc sigsetjmp to t9
+  dla $t9,(_ZN14__interception14real_sigsetjmpE) 
+
+  // restore env parameters
+  ld $gp,16($sp)
+  ld $fp,24($sp)
+  ld $ra,32($sp)
+  ld $s0,40($sp)
+  daddiu $sp,$sp,48
+
+  // tail jump to libc sigsetjmp
+  ld $t9,0($t9)
+  jr $t9
+  nop
+
+.size sigsetjmp, .-sigsetjmp
+
+.hidden __tsan_setjmp
+.comm _ZN14__interception16real___sigsetjmpE,8,8
+.globl __sigsetjmp
+.type __sigsetjmp, @function
+__sigsetjmp:
+
+  // Save env parameters
+  daddiu $sp,$sp,-48
+  sd $s0,40($sp)
+  sd $ra,32($sp)
+  sd $fp,24($sp)
+  sd $gp,16($sp)
+
+  // calculate and save pointer to GOT
+  lui $gp,%hi(%neg(%gp_rel(__sigsetjmp)))
+  daddu $gp,$gp,$t9
+  daddiu $gp,$gp,%lo(%neg(%gp_rel(__sigsetjmp)))
+  move $s0,$gp
+
+  // save jmp_buf and savesig
+  sd $a0,0($sp)
+  sd $a1,8($sp)
+
+  // obtain $sp
+  dadd $a0,$zero,$sp
+
+  // call tsan interceptor
+  jal __tsan_setjmp
+  daddiu $a1,$a0,48
+
+  // restore jmp_buf and savesig
+  ld $a0,0($sp)
+  ld $a1,8($sp)
+
+  // restore gp
+  move $gp,$s0
+
+  // load pointer to libc __sigsetjmp in t9
+  dla $t9,(_ZN14__interception16real___sigsetjmpE)
+
+  // restore env parameters
+  ld $gp,16($sp)
+  ld $fp,24($sp)
+  ld $ra,32($sp)
+  ld $s0,40($sp)
+  daddiu $sp,$sp,48
+
+  // tail jump to libc __sigsetjmp
+  ld $t9,0($t9)
+  jr $t9
+  nop
+
+.size __sigsetjmp, .-__sigsetjmp
diff --git a/lib/tsan/rtl/tsan_rtl_mutex.cc b/lib/tsan/rtl/tsan_rtl_mutex.cc
index c568287..1806acf 100644
--- a/lib/tsan/rtl/tsan_rtl_mutex.cc
+++ b/lib/tsan/rtl/tsan_rtl_mutex.cc
@@ -32,7 +32,7 @@
   Callback(ThreadState *thr, uptr pc)
       : thr(thr)
       , pc(pc) {
-    DDCallback::pt = thr->dd_pt;
+    DDCallback::pt = thr->proc()->dd_pt;
     DDCallback::lt = thr->dd_lt;
   }
 
@@ -84,21 +84,14 @@
 void MutexDestroy(ThreadState *thr, uptr pc, uptr addr) {
   DPrintf("#%d: MutexDestroy %zx\n", thr->tid, addr);
   StatInc(thr, StatMutexDestroy);
-#ifndef SANITIZER_GO
-  // Global mutexes not marked as LINKER_INITIALIZED
-  // cause tons of not interesting reports, so just ignore it.
-  if (IsGlobalVar(addr))
-    return;
-#endif
-  if (IsAppMem(addr)) {
-    CHECK(!thr->is_freeing);
-    thr->is_freeing = true;
-    MemoryWrite(thr, pc, addr, kSizeLog1);
-    thr->is_freeing = false;
-  }
-  SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr);
+  SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr, true);
   if (s == 0)
     return;
+  if (s->is_linker_init) {
+    // Destroy is no-op for linker-initialized mutexes.
+    s->mtx.Unlock();
+    return;
+  }
   if (common_flags()->detect_deadlocks) {
     Callback cb(thr, pc);
     ctx->dd->MutexDestroy(&cb, &s->dd);
@@ -114,7 +107,7 @@
   u64 mid = s->GetId();
   u32 last_lock = s->last_lock;
   if (!unlock_locked)
-    s->Reset(thr);  // must not reset it before the report is printed
+    s->Reset(thr->proc());  // must not reset it before the report is printed
   s->mtx.Unlock();
   if (unlock_locked) {
     ThreadRegistryLock l(ctx->thread_registry);
@@ -128,15 +121,23 @@
     rep.AddStack(trace, true);
     rep.AddLocation(addr, 1);
     OutputReport(thr, rep);
-  }
-  if (unlock_locked) {
-    SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr);
+
+    SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr, true);
     if (s != 0) {
-      s->Reset(thr);
+      s->Reset(thr->proc());
       s->mtx.Unlock();
     }
   }
   thr->mset.Remove(mid);
+  // Imitate a memory write to catch unlock-destroy races.
+  // Do this outside of sync mutex, because it can report a race which locks
+  // sync mutexes.
+  if (IsAppMem(addr)) {
+    CHECK(!thr->is_freeing);
+    thr->is_freeing = true;
+    MemoryWrite(thr, pc, addr, kSizeLog1);
+    thr->is_freeing = false;
+  }
   // s will be destroyed and freed in MetaMap::FreeBlock.
 }
 
@@ -362,7 +363,9 @@
   DPrintf("#%d: Acquire %zx\n", thr->tid, addr);
   if (thr->ignore_sync)
     return;
-  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, false);
+  SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr, false);
+  if (!s)
+    return;
   AcquireImpl(thr, pc, &s->clock);
   s->mtx.ReadUnlock();
 }
@@ -434,7 +437,7 @@
   if (thr->ignore_sync)
     return;
   thr->clock.set(thr->fast_state.epoch());
-  thr->clock.acquire(&thr->clock_cache, c);
+  thr->clock.acquire(&thr->proc()->clock_cache, c);
   StatInc(thr, StatSyncAcquire);
 }
 
@@ -443,7 +446,7 @@
     return;
   thr->clock.set(thr->fast_state.epoch());
   thr->fast_synch_epoch = thr->fast_state.epoch();
-  thr->clock.release(&thr->clock_cache, c);
+  thr->clock.release(&thr->proc()->clock_cache, c);
   StatInc(thr, StatSyncRelease);
 }
 
@@ -452,7 +455,7 @@
     return;
   thr->clock.set(thr->fast_state.epoch());
   thr->fast_synch_epoch = thr->fast_state.epoch();
-  thr->clock.ReleaseStore(&thr->clock_cache, c);
+  thr->clock.ReleaseStore(&thr->proc()->clock_cache, c);
   StatInc(thr, StatSyncRelease);
 }
 
@@ -461,7 +464,7 @@
     return;
   thr->clock.set(thr->fast_state.epoch());
   thr->fast_synch_epoch = thr->fast_state.epoch();
-  thr->clock.acq_rel(&thr->clock_cache, c);
+  thr->clock.acq_rel(&thr->proc()->clock_cache, c);
   StatInc(thr, StatSyncAcquire);
   StatInc(thr, StatSyncRelease);
 }
diff --git a/lib/tsan/rtl/tsan_rtl_proc.cc b/lib/tsan/rtl/tsan_rtl_proc.cc
new file mode 100644
index 0000000..0c838a1
--- /dev/null
+++ b/lib/tsan/rtl/tsan_rtl_proc.cc
@@ -0,0 +1,61 @@
+//===-- tsan_rtl_proc.cc ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+#include "tsan_flags.h"
+
+namespace __tsan {
+
+Processor *ProcCreate() {
+  void *mem = InternalAlloc(sizeof(Processor));
+  internal_memset(mem, 0, sizeof(Processor));
+  Processor *proc = new(mem) Processor;
+  proc->thr = nullptr;
+#ifndef SANITIZER_GO
+  AllocatorProcStart(proc);
+#endif
+  if (common_flags()->detect_deadlocks)
+    proc->dd_pt = ctx->dd->CreatePhysicalThread();
+  return proc;
+}
+
+void ProcDestroy(Processor *proc) {
+  CHECK_EQ(proc->thr, nullptr);
+#ifndef SANITIZER_GO
+  AllocatorProcFinish(proc);
+#endif
+  ctx->clock_alloc.FlushCache(&proc->clock_cache);
+  ctx->metamap.OnProcIdle(proc);
+  if (common_flags()->detect_deadlocks)
+     ctx->dd->DestroyPhysicalThread(proc->dd_pt);
+  proc->~Processor();
+  InternalFree(proc);
+}
+
+void ProcWire(Processor *proc, ThreadState *thr) {
+  CHECK_EQ(thr->proc1, nullptr);
+  CHECK_EQ(proc->thr, nullptr);
+  thr->proc1 = proc;
+  proc->thr = thr;
+}
+
+void ProcUnwire(Processor *proc, ThreadState *thr) {
+  CHECK_EQ(thr->proc1, proc);
+  CHECK_EQ(proc->thr, thr);
+  thr->proc1 = nullptr;
+  proc->thr = nullptr;
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_rtl_report.cc b/lib/tsan/rtl/tsan_rtl_report.cc
index ad61dfd..810119b 100644
--- a/lib/tsan/rtl/tsan_rtl_report.cc
+++ b/lib/tsan/rtl/tsan_rtl_report.cc
@@ -273,7 +273,7 @@
   u64 uid = 0;
   u64 mid = id;
   uptr addr = SyncVar::SplitId(id, &uid);
-  SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr);
+  SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr, true);
   // Check that the mutex is still alive.
   // Another mutex can be created at the same address,
   // so check uid as well.
@@ -347,12 +347,12 @@
     rep_->locs.PushBack(loc);
     AddThread(tctx);
   }
+#endif
   if (ReportLocation *loc = SymbolizeData(addr)) {
     loc->suppressable = true;
     rep_->locs.PushBack(loc);
     return;
   }
-#endif
 }
 
 #ifndef SANITIZER_GO
@@ -680,6 +680,14 @@
   PrintStack(SymbolizeStack(trace));
 }
 
+// Always inlining PrintCurrentStackSlow, because LocatePcInTrace assumes
+// __sanitizer_print_stack_trace exists in the actual unwinded stack, but
+// tail-call to PrintCurrentStackSlow breaks this assumption because
+// __sanitizer_print_stack_trace disappears after tail-call.
+// However, this solution is not reliable enough, please see dvyukov's comment
+// http://reviews.llvm.org/D19148#406208
+// Also see PR27280 comment 2 and 3 for breaking examples and analysis.
+ALWAYS_INLINE
 void PrintCurrentStackSlow(uptr pc) {
 #ifndef SANITIZER_GO
   BufferedStackTrace *ptrace =
diff --git a/lib/tsan/rtl/tsan_rtl_thread.cc b/lib/tsan/rtl/tsan_rtl_thread.cc
index dcae255..13528ae 100644
--- a/lib/tsan/rtl/tsan_rtl_thread.cc
+++ b/lib/tsan/rtl/tsan_rtl_thread.cc
@@ -42,7 +42,7 @@
 void ThreadContext::OnJoined(void *arg) {
   ThreadState *caller_thr = static_cast<ThreadState *>(arg);
   AcquireImpl(caller_thr, 0, &sync);
-  sync.Reset(&caller_thr->clock_cache);
+  sync.Reset(&caller_thr->proc()->clock_cache);
 }
 
 struct OnCreatedArgs {
@@ -74,7 +74,7 @@
 
 void ThreadContext::OnDetached(void *arg) {
   ThreadState *thr1 = static_cast<ThreadState*>(arg);
-  sync.Reset(&thr1->clock_cache);
+  sync.Reset(&thr1->proc()->clock_cache);
 }
 
 struct OnStartedArgs {
@@ -106,13 +106,8 @@
   thr->shadow_stack_pos = thr->shadow_stack;
   thr->shadow_stack_end = thr->shadow_stack + kInitStackSize;
 #endif
-#ifndef SANITIZER_GO
-  AllocatorThreadStart(thr);
-#endif
-  if (common_flags()->detect_deadlocks) {
-    thr->dd_pt = ctx->dd->CreatePhysicalThread();
+  if (common_flags()->detect_deadlocks)
     thr->dd_lt = ctx->dd->CreateLogicalThread(unique_id);
-  }
   thr->fast_state.SetHistorySize(flags()->history_size);
   // Commit switch to the new part of the trace.
   // TraceAddEvent will reset stack0/mset0 in the new part for us.
@@ -121,7 +116,7 @@
   thr->fast_synch_epoch = epoch0;
   AcquireImpl(thr, 0, &sync);
   StatInc(thr, StatSyncAcquire);
-  sync.Reset(&thr->clock_cache);
+  sync.Reset(&thr->proc()->clock_cache);
   thr->is_inited = true;
   DPrintf("#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx "
           "tls_addr=%zx tls_size=%zx\n",
@@ -130,6 +125,12 @@
 }
 
 void ThreadContext::OnFinished() {
+#ifdef SANITIZER_GO
+  internal_free(thr->shadow_stack);
+  thr->shadow_stack = nullptr;
+  thr->shadow_stack_pos = nullptr;
+  thr->shadow_stack_end = nullptr;
+#endif
   if (!detached) {
     thr->fast_state.IncrementEpoch();
     // Can't increment epoch w/o writing to the trace as well.
@@ -138,15 +139,8 @@
   }
   epoch1 = thr->fast_state.epoch();
 
-  if (common_flags()->detect_deadlocks) {
-    ctx->dd->DestroyPhysicalThread(thr->dd_pt);
+  if (common_flags()->detect_deadlocks)
     ctx->dd->DestroyLogicalThread(thr->dd_lt);
-  }
-  ctx->clock_alloc.FlushCache(&thr->clock_cache);
-  ctx->metamap.OnThreadIdle(thr);
-#ifndef SANITIZER_GO
-  AllocatorThreadFinish(thr);
-#endif
   thr->~ThreadState();
 #if TSAN_COLLECT_STATS
   StatAggregate(ctx->stat, thr->stat);
diff --git a/lib/tsan/rtl/tsan_stat.cc b/lib/tsan/rtl/tsan_stat.cc
index a5cca96..d1d6ed2 100644
--- a/lib/tsan/rtl/tsan_stat.cc
+++ b/lib/tsan/rtl/tsan_stat.cc
@@ -168,6 +168,7 @@
   name[StatMtxFired]                     = "  FiredSuppressions               ";
   name[StatMtxRacy]                      = "  RacyStacks                      ";
   name[StatMtxFD]                        = "  FD                              ";
+  name[StatMtxGlobalProc]                = "  GlobalProc                      ";
 
   Printf("Statistics:\n");
   for (int i = 0; i < StatCnt; i++)
diff --git a/lib/tsan/rtl/tsan_stat.h b/lib/tsan/rtl/tsan_stat.h
index 8ea3204..8447dd8 100644
--- a/lib/tsan/rtl/tsan_stat.h
+++ b/lib/tsan/rtl/tsan_stat.h
@@ -173,6 +173,7 @@
   StatMtxFired,
   StatMtxRacy,
   StatMtxFD,
+  StatMtxGlobalProc,
 
   // This must be the last.
   StatCnt
diff --git a/lib/tsan/rtl/tsan_suppressions.cc b/lib/tsan/rtl/tsan_suppressions.cc
index 3cd6af0..aea3cb9 100644
--- a/lib/tsan/rtl/tsan_suppressions.cc
+++ b/lib/tsan/rtl/tsan_suppressions.cc
@@ -94,7 +94,7 @@
     return kSuppressionNone;
   else if (typ == ReportTypeDeadlock)
     return kSuppressionDeadlock;
-  Printf("ThreadSanitizer: unknown report type %d\n", typ),
+  Printf("ThreadSanitizer: unknown report type %d\n", typ);
   Die();
 }
 
@@ -161,8 +161,8 @@
   Printf("ThreadSanitizer: Matched %d suppressions (pid=%d):\n", hit_count,
          (int)internal_getpid());
   for (uptr i = 0; i < matched.size(); i++) {
-    Printf("%d %s:%s\n", matched[i]->hit_count, matched[i]->type,
-           matched[i]->templ);
+    Printf("%d %s:%s\n", atomic_load_relaxed(&matched[i]->hit_count),
+           matched[i]->type, matched[i]->templ);
   }
 }
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_sync.cc b/lib/tsan/rtl/tsan_sync.cc
index 4202d30..58b2680 100644
--- a/lib/tsan/rtl/tsan_sync.cc
+++ b/lib/tsan/rtl/tsan_sync.cc
@@ -36,7 +36,7 @@
     DDMutexInit(thr, pc, this);
 }
 
-void SyncVar::Reset(ThreadState *thr) {
+void SyncVar::Reset(Processor *proc) {
   uid = 0;
   creation_stack_id = 0;
   owner_tid = kInvalidTid;
@@ -47,12 +47,12 @@
   is_broken = 0;
   is_linker_init = 0;
 
-  if (thr == 0) {
+  if (proc == 0) {
     CHECK_EQ(clock.size(), 0);
     CHECK_EQ(read_clock.size(), 0);
   } else {
-    clock.Reset(&thr->clock_cache);
-    read_clock.Reset(&thr->clock_cache);
+    clock.Reset(&proc->clock_cache);
+    read_clock.Reset(&proc->clock_cache);
   }
 }
 
@@ -61,7 +61,7 @@
 }
 
 void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) {
-  u32 idx = block_alloc_.Alloc(&thr->block_cache);
+  u32 idx = block_alloc_.Alloc(&thr->proc()->block_cache);
   MBlock *b = block_alloc_.Map(idx);
   b->siz = sz;
   b->tid = thr->tid;
@@ -71,16 +71,16 @@
   *meta = idx | kFlagBlock;
 }
 
-uptr MetaMap::FreeBlock(ThreadState *thr, uptr pc, uptr p) {
+uptr MetaMap::FreeBlock(Processor *proc, uptr p) {
   MBlock* b = GetBlock(p);
   if (b == 0)
     return 0;
   uptr sz = RoundUpTo(b->siz, kMetaShadowCell);
-  FreeRange(thr, pc, p, sz);
+  FreeRange(proc, p, sz);
   return sz;
 }
 
-bool MetaMap::FreeRange(ThreadState *thr, uptr pc, uptr p, uptr sz) {
+bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz) {
   bool has_something = false;
   u32 *meta = MemToMeta(p);
   u32 *end = MemToMeta(p + sz);
@@ -96,14 +96,14 @@
     has_something = true;
     while (idx != 0) {
       if (idx & kFlagBlock) {
-        block_alloc_.Free(&thr->block_cache, idx & ~kFlagMask);
+        block_alloc_.Free(&proc->block_cache, idx & ~kFlagMask);
         break;
       } else if (idx & kFlagSync) {
         DCHECK(idx & kFlagSync);
         SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask);
         u32 next = s->next;
-        s->Reset(thr);
-        sync_alloc_.Free(&thr->sync_cache, idx & ~kFlagMask);
+        s->Reset(proc);
+        sync_alloc_.Free(&proc->sync_cache, idx & ~kFlagMask);
         idx = next;
       } else {
         CHECK(0);
@@ -119,24 +119,30 @@
 // which can be huge. The function probes pages one-by-one until it finds a page
 // without meta objects, at this point it stops freeing meta objects. Because
 // thread stacks grow top-down, we do the same starting from end as well.
-void MetaMap::ResetRange(ThreadState *thr, uptr pc, uptr p, uptr sz) {
+void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) {
+  if (kGoMode) {
+    // UnmapOrDie/MmapFixedNoReserve does not work on Windows,
+    // so we do the optimization only for C/C++.
+    FreeRange(proc, p, sz);
+    return;
+  }
   const uptr kMetaRatio = kMetaShadowCell / kMetaShadowSize;
   const uptr kPageSize = GetPageSizeCached() * kMetaRatio;
   if (sz <= 4 * kPageSize) {
     // If the range is small, just do the normal free procedure.
-    FreeRange(thr, pc, p, sz);
+    FreeRange(proc, p, sz);
     return;
   }
   // First, round both ends of the range to page size.
   uptr diff = RoundUp(p, kPageSize) - p;
   if (diff != 0) {
-    FreeRange(thr, pc, p, diff);
+    FreeRange(proc, p, diff);
     p += diff;
     sz -= diff;
   }
   diff = p + sz - RoundDown(p + sz, kPageSize);
   if (diff != 0) {
-    FreeRange(thr, pc, p + sz - diff, diff);
+    FreeRange(proc, p + sz - diff, diff);
     sz -= diff;
   }
   // Now we must have a non-empty page-aligned range.
@@ -146,18 +152,21 @@
   const uptr p0 = p;
   const uptr sz0 = sz;
   // Probe start of the range.
-  while (sz > 0) {
-    bool has_something = FreeRange(thr, pc, p, kPageSize);
+  for (uptr checked = 0; sz > 0; checked += kPageSize) {
+    bool has_something = FreeRange(proc, p, kPageSize);
     p += kPageSize;
     sz -= kPageSize;
-    if (!has_something)
+    if (!has_something && checked > (128 << 10))
       break;
   }
   // Probe end of the range.
-  while (sz > 0) {
-    bool has_something = FreeRange(thr, pc, p - kPageSize, kPageSize);
+  for (uptr checked = 0; sz > 0; checked += kPageSize) {
+    bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize);
     sz -= kPageSize;
-    if (!has_something)
+    // Stacks grow down, so sync object are most likely at the end of the region
+    // (if it is a stack). The very end of the stack is TLS and tsan increases
+    // TLS by at least 256K, so check at least 512K.
+    if (!has_something && checked > (512 << 10))
       break;
   }
   // Finally, page out the whole range (including the parts that we've just
@@ -189,8 +198,8 @@
   return GetAndLock(thr, pc, addr, write_lock, true);
 }
 
-SyncVar* MetaMap::GetIfExistsAndLock(uptr addr) {
-  return GetAndLock(0, 0, addr, true, false);
+SyncVar* MetaMap::GetIfExistsAndLock(uptr addr, bool write_lock) {
+  return GetAndLock(0, 0, addr, write_lock, false);
 }
 
 SyncVar* MetaMap::GetAndLock(ThreadState *thr, uptr pc,
@@ -210,8 +219,8 @@
       SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask);
       if (s->addr == addr) {
         if (myidx != 0) {
-          mys->Reset(thr);
-          sync_alloc_.Free(&thr->sync_cache, myidx);
+          mys->Reset(thr->proc());
+          sync_alloc_.Free(&thr->proc()->sync_cache, myidx);
         }
         if (write_lock)
           s->mtx.Lock();
@@ -230,7 +239,7 @@
 
     if (myidx == 0) {
       const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
-      myidx = sync_alloc_.Alloc(&thr->sync_cache);
+      myidx = sync_alloc_.Alloc(&thr->proc()->sync_cache);
       mys = sync_alloc_.Map(myidx);
       mys->Init(thr, pc, addr, uid);
     }
@@ -279,9 +288,9 @@
   }
 }
 
-void MetaMap::OnThreadIdle(ThreadState *thr) {
-  block_alloc_.FlushCache(&thr->block_cache);
-  sync_alloc_.FlushCache(&thr->sync_cache);
+void MetaMap::OnProcIdle(Processor *proc) {
+  block_alloc_.FlushCache(&proc->block_cache);
+  sync_alloc_.FlushCache(&proc->sync_cache);
 }
 
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_sync.h b/lib/tsan/rtl/tsan_sync.h
index f07ea3b..86e6bbd 100644
--- a/lib/tsan/rtl/tsan_sync.h
+++ b/lib/tsan/rtl/tsan_sync.h
@@ -47,19 +47,19 @@
   SyncClock clock;
 
   void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid);
-  void Reset(ThreadState *thr);
+  void Reset(Processor *proc);
 
   u64 GetId() const {
-    // 47 lsb is addr, then 14 bits is low part of uid, then 3 zero bits.
-    return GetLsb((u64)addr | (uid << 47), 61);
+    // 48 lsb is addr, then 14 bits is low part of uid, then 2 zero bits.
+    return GetLsb((u64)addr | (uid << 48), 60);
   }
   bool CheckId(u64 uid) const {
     CHECK_EQ(uid, GetLsb(uid, 14));
     return GetLsb(this->uid, 14) == uid;
   }
   static uptr SplitId(u64 id, u64 *uid) {
-    *uid = id >> 47;
-    return (uptr)GetLsb(id, 47);
+    *uid = id >> 48;
+    return (uptr)GetLsb(id, 48);
   }
 };
 
@@ -72,18 +72,18 @@
   MetaMap();
 
   void AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz);
-  uptr FreeBlock(ThreadState *thr, uptr pc, uptr p);
-  bool FreeRange(ThreadState *thr, uptr pc, uptr p, uptr sz);
-  void ResetRange(ThreadState *thr, uptr pc, uptr p, uptr sz);
+  uptr FreeBlock(Processor *proc, uptr p);
+  bool FreeRange(Processor *proc, uptr p, uptr sz);
+  void ResetRange(Processor *proc, uptr p, uptr sz);
   MBlock* GetBlock(uptr p);
 
   SyncVar* GetOrCreateAndLock(ThreadState *thr, uptr pc,
                               uptr addr, bool write_lock);
-  SyncVar* GetIfExistsAndLock(uptr addr);
+  SyncVar* GetIfExistsAndLock(uptr addr, bool write_lock);
 
   void MoveMemory(uptr src, uptr dst, uptr sz);
 
-  void OnThreadIdle(ThreadState *thr);
+  void OnProcIdle(Processor *proc);
 
  private:
   static const u32 kFlagMask  = 3u << 30;
diff --git a/lib/tsan/tests/unit/tsan_sync_test.cc b/lib/tsan/tests/unit/tsan_sync_test.cc
index d3616a1..8016654 100644
--- a/lib/tsan/tests/unit/tsan_sync_test.cc
+++ b/lib/tsan/tests/unit/tsan_sync_test.cc
@@ -25,7 +25,7 @@
   EXPECT_NE(mb, (MBlock*)0);
   EXPECT_EQ(mb->siz, 1 * sizeof(u64));
   EXPECT_EQ(mb->tid, thr->tid);
-  uptr sz = m->FreeBlock(thr, 0, (uptr)&block[0]);
+  uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0]);
   EXPECT_EQ(sz, 1 * sizeof(u64));
   mb = m->GetBlock((uptr)&block[0]);
   EXPECT_EQ(mb, (MBlock*)0);
@@ -41,7 +41,7 @@
   EXPECT_EQ(mb1->siz, 1 * sizeof(u64));
   MBlock *mb2 = m->GetBlock((uptr)&block[1]);
   EXPECT_EQ(mb2->siz, 3 * sizeof(u64));
-  m->FreeRange(thr, 0, (uptr)&block[0], 4 * sizeof(u64));
+  m->FreeRange(thr->proc(), (uptr)&block[0], 4 * sizeof(u64));
   mb1 = m->GetBlock((uptr)&block[0]);
   EXPECT_EQ(mb1, (MBlock*)0);
   mb2 = m->GetBlock((uptr)&block[1]);
@@ -53,7 +53,7 @@
   MetaMap *m = &ctx->metamap;
   u64 block[4] = {};  // fake malloc block
   m->AllocBlock(thr, 0, (uptr)&block[0], 4 * sizeof(u64));
-  SyncVar *s1 = m->GetIfExistsAndLock((uptr)&block[0]);
+  SyncVar *s1 = m->GetIfExistsAndLock((uptr)&block[0], true);
   EXPECT_EQ(s1, (SyncVar*)0);
   s1 = m->GetOrCreateAndLock(thr, 0, (uptr)&block[0], true);
   EXPECT_NE(s1, (SyncVar*)0);
@@ -63,12 +63,12 @@
   EXPECT_NE(s2, (SyncVar*)0);
   EXPECT_EQ(s2->addr, (uptr)&block[1]);
   s2->mtx.ReadUnlock();
-  m->FreeBlock(thr, 0, (uptr)&block[0]);
-  s1 = m->GetIfExistsAndLock((uptr)&block[0]);
+  m->FreeBlock(thr->proc(), (uptr)&block[0]);
+  s1 = m->GetIfExistsAndLock((uptr)&block[0], true);
   EXPECT_EQ(s1, (SyncVar*)0);
-  s2 = m->GetIfExistsAndLock((uptr)&block[1]);
+  s2 = m->GetIfExistsAndLock((uptr)&block[1], true);
   EXPECT_EQ(s2, (SyncVar*)0);
-  m->OnThreadIdle(thr);
+  m->OnProcIdle(thr->proc());
 }
 
 TEST(MetaMap, MoveMemory) {
@@ -93,19 +93,19 @@
   mb2 = m->GetBlock((uptr)&block2[3]);
   EXPECT_NE(mb2, (MBlock*)0);
   EXPECT_EQ(mb2->siz, 1 * sizeof(u64));
-  s1 = m->GetIfExistsAndLock((uptr)&block1[0]);
+  s1 = m->GetIfExistsAndLock((uptr)&block1[0], true);
   EXPECT_EQ(s1, (SyncVar*)0);
-  s2 = m->GetIfExistsAndLock((uptr)&block1[1]);
+  s2 = m->GetIfExistsAndLock((uptr)&block1[1], true);
   EXPECT_EQ(s2, (SyncVar*)0);
-  s1 = m->GetIfExistsAndLock((uptr)&block2[0]);
+  s1 = m->GetIfExistsAndLock((uptr)&block2[0], true);
   EXPECT_NE(s1, (SyncVar*)0);
   EXPECT_EQ(s1->addr, (uptr)&block2[0]);
   s1->mtx.Unlock();
-  s2 = m->GetIfExistsAndLock((uptr)&block2[1]);
+  s2 = m->GetIfExistsAndLock((uptr)&block2[1], true);
   EXPECT_NE(s2, (SyncVar*)0);
   EXPECT_EQ(s2->addr, (uptr)&block2[1]);
   s2->mtx.Unlock();
-  m->FreeRange(thr, 0, (uptr)&block2[0], 4 * sizeof(u64));
+  m->FreeRange(thr->proc(), (uptr)&block2[0], 4 * sizeof(u64));
 }
 
 TEST(MetaMap, ResetSync) {
@@ -114,9 +114,9 @@
   u64 block[1] = {};  // fake malloc block
   m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64));
   SyncVar *s = m->GetOrCreateAndLock(thr, 0, (uptr)&block[0], true);
-  s->Reset(thr);
+  s->Reset(thr->proc());
   s->mtx.Unlock();
-  uptr sz = m->FreeBlock(thr, 0, (uptr)&block[0]);
+  uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0]);
   EXPECT_EQ(sz, 1 * sizeof(u64));
 }
 
diff --git a/lib/ubsan/CMakeLists.txt b/lib/ubsan/CMakeLists.txt
index 5ece9a6..901fef2 100644
--- a/lib/ubsan/CMakeLists.txt
+++ b/lib/ubsan/CMakeLists.txt
@@ -22,12 +22,19 @@
 include_directories(..)
 
 set(UBSAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
-append_no_rtti_flag(UBSAN_CFLAGS)
+append_rtti_flag(OFF UBSAN_CFLAGS)
+append_list_if(SANITIZER_CAN_USE_CXXABI -DUBSAN_CAN_USE_CXXABI UBSAN_CFLAGS)
+
 set(UBSAN_STANDALONE_CFLAGS ${SANITIZER_COMMON_CFLAGS})
-append_no_rtti_flag(UBSAN_STANDALONE_CFLAGS)
+append_rtti_flag(OFF UBSAN_STANDALONE_CFLAGS)
+append_list_if(SANITIZER_CAN_USE_CXXABI -DUBSAN_CAN_USE_CXXABI UBSAN_STANDALONE_CFLAGS)
+
 set(UBSAN_CXXFLAGS ${SANITIZER_COMMON_CFLAGS})
+append_rtti_flag(ON UBSAN_STANDALONE_CXXFLAGS)
+append_list_if(SANITIZER_CAN_USE_CXXABI -DUBSAN_CAN_USE_CXXABI UBSAN_CXXFLAGS)
 
 add_custom_target(ubsan)
+set_target_properties(ubsan PROPERTIES FOLDER "Compiler-RT Misc")
 
 if(APPLE)
   set(UBSAN_COMMON_SOURCES ${UBSAN_SOURCES})
diff --git a/lib/ubsan/Makefile.mk b/lib/ubsan/Makefile.mk
deleted file mode 100644
index ec3f5c5..0000000
--- a/lib/ubsan/Makefile.mk
+++ /dev/null
@@ -1,28 +0,0 @@
-#===- lib/ubsan/Makefile.mk ---------------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := ubsan
-SubDirs :=
-
-Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file)))
-StandaloneSources := ubsan_init_standalone.cc
-CXXSources := ubsan_type_hash.cc ubsan_handlers_cxx.cc
-CSources := $(filter-out $(StandaloneSources),$(filter-out $(CXXSources),$(Sources)))
-ObjNames := $(Sources:%.cc=%.o)
-
-Implementation := Generic
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard $(Dir)/*.h)
-Dependencies += $(wildcard $(Dir)/../sanitizer_common/*.h)
-
-# Define a convenience variable for all the ubsan functions.
-UbsanFunctions := $(CSources:%.cc=%)
-UbsanCXXFunctions := $(CXXSources:%.cc=%)
-UbsanStandaloneFunctions := $(StandaloneSources:%.cc=%)
diff --git a/lib/ubsan/ubsan_diag.cc b/lib/ubsan/ubsan_diag.cc
index 2476947..d842694 100644
--- a/lib/ubsan/ubsan_diag.cc
+++ b/lib/ubsan/ubsan_diag.cc
@@ -124,108 +124,98 @@
 }
 
 /// Hexadecimal printing for numbers too large for Printf to handle directly.
-static void PrintHex(UIntMax Val) {
+static void RenderHex(InternalScopedString *Buffer, UIntMax Val) {
 #if HAVE_INT128_T
-  Printf("0x%08x%08x%08x%08x",
-          (unsigned int)(Val >> 96),
-          (unsigned int)(Val >> 64),
-          (unsigned int)(Val >> 32),
-          (unsigned int)(Val));
+  Buffer->append("0x%08x%08x%08x%08x", (unsigned int)(Val >> 96),
+                 (unsigned int)(Val >> 64), (unsigned int)(Val >> 32),
+                 (unsigned int)(Val));
 #else
   UNREACHABLE("long long smaller than 64 bits?");
 #endif
 }
 
-static void renderLocation(Location Loc) {
-  InternalScopedString LocBuffer(1024);
+static void RenderLocation(InternalScopedString *Buffer, Location Loc) {
   switch (Loc.getKind()) {
   case Location::LK_Source: {
     SourceLocation SLoc = Loc.getSourceLocation();
     if (SLoc.isInvalid())
-      LocBuffer.append("<unknown>");
+      Buffer->append("<unknown>");
     else
-      RenderSourceLocation(&LocBuffer, SLoc.getFilename(), SLoc.getLine(),
+      RenderSourceLocation(Buffer, SLoc.getFilename(), SLoc.getLine(),
                            SLoc.getColumn(), common_flags()->symbolize_vs_style,
                            common_flags()->strip_path_prefix);
-    break;
+    return;
   }
   case Location::LK_Memory:
-    LocBuffer.append("%p", Loc.getMemoryLocation());
-    break;
+    Buffer->append("%p", Loc.getMemoryLocation());
+    return;
   case Location::LK_Symbolized: {
     const AddressInfo &Info = Loc.getSymbolizedStack()->info;
-    if (Info.file) {
-      RenderSourceLocation(&LocBuffer, Info.file, Info.line, Info.column,
+    if (Info.file)
+      RenderSourceLocation(Buffer, Info.file, Info.line, Info.column,
                            common_flags()->symbolize_vs_style,
                            common_flags()->strip_path_prefix);
-    } else if (Info.module) {
-      RenderModuleLocation(&LocBuffer, Info.module, Info.module_offset,
+    else if (Info.module)
+      RenderModuleLocation(Buffer, Info.module, Info.module_offset,
                            common_flags()->strip_path_prefix);
-    } else {
-      LocBuffer.append("%p", Info.address);
-    }
-    break;
+    else
+      Buffer->append("%p", Info.address);
+    return;
   }
   case Location::LK_Null:
-    LocBuffer.append("<unknown>");
-    break;
+    Buffer->append("<unknown>");
+    return;
   }
-  Printf("%s:", LocBuffer.data());
 }
 
-static void renderText(const char *Message, const Diag::Arg *Args) {
+static void RenderText(InternalScopedString *Buffer, const char *Message,
+                       const Diag::Arg *Args) {
   for (const char *Msg = Message; *Msg; ++Msg) {
     if (*Msg != '%') {
-      char Buffer[64];
-      unsigned I;
-      for (I = 0; Msg[I] && Msg[I] != '%' && I != 63; ++I)
-        Buffer[I] = Msg[I];
-      Buffer[I] = '\0';
-      Printf(Buffer);
-      Msg += I - 1;
-    } else {
-      const Diag::Arg &A = Args[*++Msg - '0'];
-      switch (A.Kind) {
-      case Diag::AK_String:
-        Printf("%s", A.String);
-        break;
-      case Diag::AK_TypeName: {
-        if (SANITIZER_WINDOWS)
-          // The Windows implementation demangles names early.
-          Printf("'%s'", A.String);
-        else
-          Printf("'%s'", Symbolizer::GetOrInit()->Demangle(A.String));
-        break;
-      }
-      case Diag::AK_SInt:
-        // 'long long' is guaranteed to be at least 64 bits wide.
-        if (A.SInt >= INT64_MIN && A.SInt <= INT64_MAX)
-          Printf("%lld", (long long)A.SInt);
-        else
-          PrintHex(A.SInt);
-        break;
-      case Diag::AK_UInt:
-        if (A.UInt <= UINT64_MAX)
-          Printf("%llu", (unsigned long long)A.UInt);
-        else
-          PrintHex(A.UInt);
-        break;
-      case Diag::AK_Float: {
-        // FIXME: Support floating-point formatting in sanitizer_common's
-        //        printf, and stop using snprintf here.
-        char Buffer[32];
+      Buffer->append("%c", *Msg);
+      continue;
+    }
+    const Diag::Arg &A = Args[*++Msg - '0'];
+    switch (A.Kind) {
+    case Diag::AK_String:
+      Buffer->append("%s", A.String);
+      break;
+    case Diag::AK_TypeName: {
+      if (SANITIZER_WINDOWS)
+        // The Windows implementation demangles names early.
+        Buffer->append("'%s'", A.String);
+      else
+        Buffer->append("'%s'", Symbolizer::GetOrInit()->Demangle(A.String));
+      break;
+    }
+    case Diag::AK_SInt:
+      // 'long long' is guaranteed to be at least 64 bits wide.
+      if (A.SInt >= INT64_MIN && A.SInt <= INT64_MAX)
+        Buffer->append("%lld", (long long)A.SInt);
+      else
+        RenderHex(Buffer, A.SInt);
+      break;
+    case Diag::AK_UInt:
+      if (A.UInt <= UINT64_MAX)
+        Buffer->append("%llu", (unsigned long long)A.UInt);
+      else
+        RenderHex(Buffer, A.UInt);
+      break;
+    case Diag::AK_Float: {
+      // FIXME: Support floating-point formatting in sanitizer_common's
+      //        printf, and stop using snprintf here.
+      char FloatBuffer[32];
 #if SANITIZER_WINDOWS
-        sprintf_s(Buffer, sizeof(Buffer), "%Lg", (long double)A.Float);
+      sprintf_s(FloatBuffer, sizeof(FloatBuffer), "%Lg", (long double)A.Float);
 #else
-        snprintf(Buffer, sizeof(Buffer), "%Lg", (long double)A.Float);
+      snprintf(FloatBuffer, sizeof(FloatBuffer), "%Lg", (long double)A.Float);
 #endif
-        Printf("%s", Buffer);
-        break;
-      }
-      case Diag::AK_Pointer:
-        Printf("%p", A.Pointer);
-        break;
-      }
+      Buffer->append("%s", FloatBuffer);
+      break;
+    }
+    case Diag::AK_Pointer:
+      Buffer->append("%p", A.Pointer);
+      break;
     }
   }
 }
@@ -253,9 +243,9 @@
 }
 
 /// Render a snippet of the address space near a location.
-static void renderMemorySnippet(const Decorator &Decor, MemoryLocation Loc,
-                                Range *Ranges, unsigned NumRanges,
-                                const Diag::Arg *Args) {
+static void PrintMemorySnippet(const Decorator &Decor, MemoryLocation Loc,
+                               Range *Ranges, unsigned NumRanges,
+                               const Diag::Arg *Args) {
   // Show at least the 8 bytes surrounding Loc.
   const unsigned MinBytesNearLoc = 4;
   MemoryLocation Min = subtractNoOverflow(Loc, MinBytesNearLoc);
@@ -278,14 +268,15 @@
   }
 
   // Emit data.
+  InternalScopedString Buffer(1024);
   for (uptr P = Min; P != Max; ++P) {
     unsigned char C = *reinterpret_cast<const unsigned char*>(P);
-    Printf("%s%02x", (P % 8 == 0) ? "  " : " ", C);
+    Buffer.append("%s%02x", (P % 8 == 0) ? "  " : " ", C);
   }
-  Printf("\n");
+  Buffer.append("\n");
 
   // Emit highlights.
-  Printf(Decor.Highlight());
+  Buffer.append(Decor.Highlight());
   Range *InRange = upperBound(Min, Ranges, NumRanges);
   for (uptr P = Min; P != Max; ++P) {
     char Pad = ' ', Byte = ' ';
@@ -297,10 +288,13 @@
       Pad = '~';
     if (InRange && InRange->getStart().getMemoryLocation() <= P)
       Byte = '~';
-    char Buffer[] = { Pad, Pad, P == Loc ? '^' : Byte, Byte, 0 };
-    Printf((P % 8 == 0) ? Buffer : &Buffer[1]);
+    if (P % 8 == 0)
+      Buffer.append("%c", Pad);
+    Buffer.append("%c", Pad);
+    Buffer.append("%c", P == Loc ? '^' : Byte);
+    Buffer.append("%c", Byte);
   }
-  Printf("%s\n", Decor.EndHighlight());
+  Buffer.append("%s\n", Decor.EndHighlight());
 
   // Go over the line again, and print names for the ranges.
   InRange = 0;
@@ -315,9 +309,9 @@
 
     if (InRange && InRange->getStart().getMemoryLocation() == P) {
       while (Spaces--)
-        Printf(" ");
-      renderText(InRange->getText(), Args);
-      Printf("\n");
+        Buffer.append(" ");
+      RenderText(&Buffer, InRange->getText(), Args);
+      Buffer.append("\n");
       // FIXME: We only support naming one range for now!
       break;
     }
@@ -325,6 +319,7 @@
     Spaces += 2;
   }
 
+  Printf("%s", Buffer.data());
   // FIXME: Print names for anything we can identify within the line:
   //
   //  * If we can identify the memory itself as belonging to a particular
@@ -341,28 +336,30 @@
   // All diagnostics should be printed under report mutex.
   CommonSanitizerReportMutex.CheckLocked();
   Decorator Decor;
-  Printf(Decor.Bold());
+  InternalScopedString Buffer(1024);
 
-  renderLocation(Loc);
+  Buffer.append(Decor.Bold());
+  RenderLocation(&Buffer, Loc);
+  Buffer.append(":");
 
   switch (Level) {
   case DL_Error:
-    Printf("%s runtime error: %s%s",
-           Decor.Warning(), Decor.EndWarning(), Decor.Bold());
+    Buffer.append("%s runtime error: %s%s", Decor.Warning(), Decor.EndWarning(),
+                  Decor.Bold());
     break;
 
   case DL_Note:
-    Printf("%s note: %s", Decor.Note(), Decor.EndNote());
+    Buffer.append("%s note: %s", Decor.Note(), Decor.EndNote());
     break;
   }
 
-  renderText(Message, Args);
+  RenderText(&Buffer, Message, Args);
 
-  Printf("%s\n", Decor.Default());
+  Buffer.append("%s\n", Decor.Default());
+  Printf("%s", Buffer.data());
 
   if (Loc.isMemoryLocation())
-    renderMemorySnippet(Decor, Loc.getMemoryLocation(), Ranges,
-                        NumRanges, Args);
+    PrintMemorySnippet(Decor, Loc.getMemoryLocation(), Ranges, NumRanges, Args);
 }
 
 ScopedReport::ScopedReport(ReportOptions Opts, Location SummaryLoc,
diff --git a/lib/ubsan/ubsan_flags.cc b/lib/ubsan/ubsan_flags.cc
index 20087b9..e77ba55 100644
--- a/lib/ubsan/ubsan_flags.cc
+++ b/lib/ubsan/ubsan_flags.cc
@@ -59,7 +59,7 @@
   parser.ParseString(MaybeCallUbsanDefaultOptions());
   // Override from environment variable.
   parser.ParseString(GetEnv("UBSAN_OPTIONS"));
-  SetVerbosity(common_flags()->verbosity);
+  InitializeCommonFlags();
   if (Verbosity()) ReportUnrecognizedFlags();
 
   if (common_flags()->help) parser.PrintFlagDescriptions();
diff --git a/lib/ubsan/ubsan_handlers.cc b/lib/ubsan/ubsan_handlers.cc
index 5d82e9a..4ede388 100644
--- a/lib/ubsan/ubsan_handlers.cc
+++ b/lib/ubsan/ubsan_handlers.cc
@@ -523,8 +523,11 @@
   Die();
 }
 
-static void handleCFIBadIcall(CFIBadIcallData *Data, ValueHandle Function,
+static void handleCFIBadIcall(CFICheckFailData *Data, ValueHandle Function,
                               ReportOptions Opts) {
+  if (Data->CheckKind != CFITCK_ICall)
+    Die();
+
   SourceLocation Loc = Data->Loc.acquire();
   ErrorType ET = ErrorType::CFIBadType;
 
@@ -544,16 +547,37 @@
   Diag(FLoc, DL_Note, "%0 defined here") << FName;
 }
 
-void __ubsan::__ubsan_handle_cfi_bad_icall(CFIBadIcallData *Data,
-                                           ValueHandle Function) {
+namespace __ubsan {
+#ifdef UBSAN_CAN_USE_CXXABI
+SANITIZER_WEAK_ATTRIBUTE
+void HandleCFIBadType(CFICheckFailData *Data, ValueHandle Vtable,
+                      bool ValidVtable, ReportOptions Opts);
+#else
+static void HandleCFIBadType(CFICheckFailData *Data, ValueHandle Vtable,
+                             bool ValidVtable, ReportOptions Opts) {
+  Die();
+}
+#endif
+}  // namespace __ubsan
+
+void __ubsan::__ubsan_handle_cfi_check_fail(CFICheckFailData *Data,
+                                            ValueHandle Value,
+                                            uptr ValidVtable) {
   GET_REPORT_OPTIONS(false);
-  handleCFIBadIcall(Data, Function, Opts);
+  if (Data->CheckKind == CFITCK_ICall)
+    handleCFIBadIcall(Data, Value, Opts);
+  else
+    HandleCFIBadType(Data, Value, ValidVtable, Opts);
 }
 
-void __ubsan::__ubsan_handle_cfi_bad_icall_abort(CFIBadIcallData *Data,
-                                                 ValueHandle Function) {
+void __ubsan::__ubsan_handle_cfi_check_fail_abort(CFICheckFailData *Data,
+                                                  ValueHandle Value,
+                                                  uptr ValidVtable) {
   GET_REPORT_OPTIONS(true);
-  handleCFIBadIcall(Data, Function, Opts);
+  if (Data->CheckKind == CFITCK_ICall)
+    handleCFIBadIcall(Data, Value, Opts);
+  else
+    HandleCFIBadType(Data, Value, ValidVtable, Opts);
   Die();
 }
 
diff --git a/lib/ubsan/ubsan_handlers.h b/lib/ubsan/ubsan_handlers.h
index 6f309cf..e0cfd5b 100644
--- a/lib/ubsan/ubsan_handlers.h
+++ b/lib/ubsan/ubsan_handlers.h
@@ -148,14 +148,25 @@
 /// \brief Handle passing null pointer to function with nonnull attribute.
 RECOVERABLE(nonnull_arg, NonNullArgData *Data)
 
-struct CFIBadIcallData {
+/// \brief Known CFI check kinds.
+/// Keep in sync with the enum of the same name in CodeGenFunction.h
+enum CFITypeCheckKind : unsigned char {
+  CFITCK_VCall,
+  CFITCK_NVCall,
+  CFITCK_DerivedCast,
+  CFITCK_UnrelatedCast,
+  CFITCK_ICall,
+};
+
+struct CFICheckFailData {
+  CFITypeCheckKind CheckKind;
   SourceLocation Loc;
   const TypeDescriptor &Type;
 };
 
-/// \brief Handle control flow integrity failure for indirect function calls.
-RECOVERABLE(cfi_bad_icall, CFIBadIcallData *Data, ValueHandle Function)
-
+/// \brief Handle control flow integrity failures.
+RECOVERABLE(cfi_check_fail, CFICheckFailData *Data, ValueHandle Function,
+            uptr VtableIsValid)
 }
 
 #endif // UBSAN_HANDLERS_H
diff --git a/lib/ubsan/ubsan_handlers_cxx.cc b/lib/ubsan/ubsan_handlers_cxx.cc
index 3e81be6..d97ec48 100644
--- a/lib/ubsan/ubsan_handlers_cxx.cc
+++ b/lib/ubsan/ubsan_handlers_cxx.cc
@@ -15,6 +15,7 @@
 
 #include "ubsan_platform.h"
 #if CAN_SANITIZE_UB
+#include "ubsan_handlers.h"
 #include "ubsan_handlers_cxx.h"
 #include "ubsan_diag.h"
 #include "ubsan_type_hash.h"
@@ -54,11 +55,17 @@
     << TypeCheckKinds[Data->TypeCheckKind] << (void*)Pointer << Data->Type;
 
   // If possible, say what type it actually points to.
-  if (!DTI.isValid())
-    Diag(Pointer, DL_Note, "object has invalid vptr")
-        << TypeName(DTI.getMostDerivedTypeName())
-        << Range(Pointer, Pointer + sizeof(uptr), "invalid vptr");
-  else if (!DTI.getOffset())
+  if (!DTI.isValid()) {
+    if (DTI.getOffset() < -VptrMaxOffsetToTop || DTI.getOffset() > VptrMaxOffsetToTop) {
+      Diag(Pointer, DL_Note, "object has a possibly invalid vptr: abs(offset to top) too big")
+          << TypeName(DTI.getMostDerivedTypeName())
+          << Range(Pointer, Pointer + sizeof(uptr), "possibly invalid vptr");
+    } else {
+      Diag(Pointer, DL_Note, "object has invalid vptr")
+          << TypeName(DTI.getMostDerivedTypeName())
+          << Range(Pointer, Pointer + sizeof(uptr), "invalid vptr");
+    }
+  } else if (!DTI.getOffset())
     Diag(Pointer, DL_Note, "object is of type %0")
         << TypeName(DTI.getMostDerivedTypeName())
         << Range(Pointer, Pointer + sizeof(uptr), "vptr for %0");
@@ -87,8 +94,9 @@
     Die();
 }
 
-static void HandleCFIBadType(CFIBadTypeData *Data, ValueHandle Vtable,
-                             ReportOptions Opts) {
+namespace __ubsan {
+void HandleCFIBadType(CFICheckFailData *Data, ValueHandle Vtable,
+                      bool ValidVtable, ReportOptions Opts) {
   SourceLocation Loc = Data->Loc.acquire();
   ErrorType ET = ErrorType::CFIBadType;
 
@@ -96,38 +104,44 @@
     return;
 
   ScopedReport R(Opts, Loc, ET);
-  DynamicTypeInfo DTI = getDynamicTypeInfoFromVtable((void*)Vtable);
+  DynamicTypeInfo DTI = ValidVtable
+                            ? getDynamicTypeInfoFromVtable((void *)Vtable)
+                            : DynamicTypeInfo(0, 0, 0);
 
-  static const char *TypeCheckKinds[] = {
-    "virtual call",
-    "non-virtual call",
-    "base-to-derived cast",
-    "cast to unrelated type",
-  };
+  const char *CheckKindStr;
+  switch (Data->CheckKind) {
+  case CFITCK_VCall:
+    CheckKindStr = "virtual call";
+    break;
+  case CFITCK_NVCall:
+    CheckKindStr = "non-virtual call";
+    break;
+  case CFITCK_DerivedCast:
+    CheckKindStr = "base-to-derived cast";
+    break;
+  case CFITCK_UnrelatedCast:
+    CheckKindStr = "cast to unrelated type";
+    break;
+  case CFITCK_ICall:
+    Die();
+  }
 
   Diag(Loc, DL_Error, "control flow integrity check for type %0 failed during "
                       "%1 (vtable address %2)")
-      << Data->Type << TypeCheckKinds[Data->TypeCheckKind] << (void *)Vtable;
+      << Data->Type << CheckKindStr << (void *)Vtable;
 
   // If possible, say what type it actually points to.
-  if (!DTI.isValid())
-    Diag(Vtable, DL_Note, "invalid vtable");
-  else
+  if (!DTI.isValid()) {
+    const char *module = Symbolizer::GetOrInit()->GetModuleNameForPc(Vtable);
+    if (module)
+      Diag(Vtable, DL_Note, "invalid vtable in module %0") << module;
+    else
+      Diag(Vtable, DL_Note, "invalid vtable");
+  } else {
     Diag(Vtable, DL_Note, "vtable is of type %0")
         << TypeName(DTI.getMostDerivedTypeName());
+  }
 }
+}  // namespace __ubsan
 
-void __ubsan::__ubsan_handle_cfi_bad_type(CFIBadTypeData *Data,
-                                          ValueHandle Vtable) {
-  GET_REPORT_OPTIONS(false);
-  HandleCFIBadType(Data, Vtable, Opts);
-}
-
-void __ubsan::__ubsan_handle_cfi_bad_type_abort(CFIBadTypeData *Data,
-                                                ValueHandle Vtable) {
-  GET_REPORT_OPTIONS(true);
-  HandleCFIBadType(Data, Vtable, Opts);
-  Die();
-}
-
-#endif  // CAN_SANITIZE_UB
+#endif // CAN_SANITIZE_UB
diff --git a/lib/ubsan/ubsan_handlers_cxx.h b/lib/ubsan/ubsan_handlers_cxx.h
index 92050d9..2ff014e 100644
--- a/lib/ubsan/ubsan_handlers_cxx.h
+++ b/lib/ubsan/ubsan_handlers_cxx.h
@@ -25,12 +25,6 @@
   unsigned char TypeCheckKind;
 };
 
-struct CFIBadTypeData {
-  SourceLocation Loc;
-  const TypeDescriptor &Type;
-  unsigned char TypeCheckKind;
-};
-
 /// \brief Handle a runtime type check failure, caused by an incorrect vptr.
 /// When this handler is called, all we know is that the type was not in the
 /// cache; this does not necessarily imply the existence of a bug.
@@ -40,14 +34,6 @@
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
 void __ubsan_handle_dynamic_type_cache_miss_abort(
   DynamicTypeCacheMissData *Data, ValueHandle Pointer, ValueHandle Hash);
-
-/// \brief Handle a control flow integrity check failure by printing a
-/// diagnostic.
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
-__ubsan_handle_cfi_bad_type(CFIBadTypeData *Data, ValueHandle Vtable);
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
-__ubsan_handle_cfi_bad_type_abort(CFIBadTypeData *Data, ValueHandle Vtable);
-
 }
 
 #endif // UBSAN_HANDLERS_H
diff --git a/lib/ubsan/ubsan_init.cc b/lib/ubsan/ubsan_init.cc
index 73398ce..b4f42c4 100644
--- a/lib/ubsan/ubsan_init.cc
+++ b/lib/ubsan/ubsan_init.cc
@@ -39,6 +39,7 @@
   InitializeFlags();
   CacheBinaryName();
   __sanitizer_set_report_path(common_flags()->log_path);
+  AndroidLogInit();
   InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir);
   CommonInit();
   ubsan_mode = UBSAN_MODE_STANDALONE;
diff --git a/lib/ubsan/ubsan_platform.h b/lib/ubsan/ubsan_platform.h
index 002ecf3..1a3bfd6 100644
--- a/lib/ubsan/ubsan_platform.h
+++ b/lib/ubsan/ubsan_platform.h
@@ -16,7 +16,8 @@
 // Other platforms should be easy to add, and probably work as-is.
 #if (defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)) && \
     (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || \
-     defined(__aarch64__) || defined(__mips__) || defined(__powerpc64__))
+     defined(__aarch64__) || defined(__mips__) || defined(__powerpc64__) || \
+     defined(__s390__))
 # define CAN_SANITIZE_UB 1
 #elif defined(_WIN32)
 # define CAN_SANITIZE_UB 1
diff --git a/lib/ubsan/ubsan_type_hash.h b/lib/ubsan/ubsan_type_hash.h
index 695fed9..aa63871 100644
--- a/lib/ubsan/ubsan_type_hash.h
+++ b/lib/ubsan/ubsan_type_hash.h
@@ -53,6 +53,10 @@
 
 const unsigned VptrTypeCacheSize = 128;
 
+/// A sanity check for Vtable. Offsets to top must be reasonably small
+/// numbers (by absolute value). It's a weak check for Vtable corruption.
+const int VptrMaxOffsetToTop = 1<<20;
+
 /// \brief A cache of the results of checkDynamicType. \c checkDynamicType would
 /// return \c true (modulo hash collisions) if
 /// \code
diff --git a/lib/ubsan/ubsan_type_hash_itanium.cc b/lib/ubsan/ubsan_type_hash_itanium.cc
index b84e88d..26272e3 100644
--- a/lib/ubsan/ubsan_type_hash_itanium.cc
+++ b/lib/ubsan/ubsan_type_hash_itanium.cc
@@ -115,7 +115,9 @@
 static bool isDerivedFromAtOffset(const abi::__class_type_info *Derived,
                                   const abi::__class_type_info *Base,
                                   sptr Offset) {
-  if (Derived->__type_name == Base->__type_name)
+  if (Derived->__type_name == Base->__type_name ||
+      (SANITIZER_NON_UNIQUE_TYPEINFO &&
+       !internal_strcmp(Derived->__type_name, Base->__type_name)))
     return Offset == 0;
 
   if (const abi::__si_class_type_info *SI =
@@ -219,6 +221,10 @@
   VtablePrefix *Vtable = getVtablePrefix(VtablePtr);
   if (!Vtable)
     return false;
+  if (Vtable->Offset < -VptrMaxOffsetToTop || Vtable->Offset > VptrMaxOffsetToTop) {
+    // Too large or too small offset are signs of Vtable corruption.
+    return false;
+  }
 
   // Check that this is actually a type_info object for a class type.
   abi::__class_type_info *Derived =
@@ -241,6 +247,8 @@
   VtablePrefix *Vtable = getVtablePrefix(VtablePtr);
   if (!Vtable)
     return DynamicTypeInfo(0, 0, 0);
+  if (Vtable->Offset < -VptrMaxOffsetToTop || Vtable->Offset > VptrMaxOffsetToTop)
+    return DynamicTypeInfo(0, Vtable->Offset, 0);
   const abi::__class_type_info *ObjectType = findBaseAtOffset(
     static_cast<const abi::__class_type_info*>(Vtable->TypeInfo),
     -Vtable->Offset);
diff --git a/lib/ubsan/ubsan_value.cc b/lib/ubsan/ubsan_value.cc
index 79dc4c8..466834c 100644
--- a/lib/ubsan/ubsan_value.cc
+++ b/lib/ubsan/ubsan_value.cc
@@ -83,12 +83,12 @@
 #endif
       case 32: {
         float Value;
-#if defined(__BIG_ENDIAN__)
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
        // For big endian the float value is in the last 4 bytes.
        // On some targets we may only have 4 bytes so we count backwards from
        // the end of Val to account for both the 32-bit and 64-bit cases.
        internal_memcpy(&Value, ((const char*)(&Val + 1)) - 4, 4);
-#else 
+#else
        internal_memcpy(&Value, &Val, 4);
 #endif
         return Value;
diff --git a/lib/xray/CMakeLists.txt b/lib/xray/CMakeLists.txt
new file mode 100644
index 0000000..2013bbf
--- /dev/null
+++ b/lib/xray/CMakeLists.txt
@@ -0,0 +1,43 @@
+# Build for the XRay runtime support library.
+
+set(XRAY_SOURCES
+  xray_init.cc
+	xray_interface.cc
+	xray_flags.cc
+)
+
+set(x86_64_SOURCES
+		xray_trampoline_x86.S
+		${XRAY_SOURCES})
+
+include_directories(..)
+include_directories(../../include)
+
+set(XRAY_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+
+set(XRAY_COMMON_DEFINITIONS XRAY_HAS_EXCEPTIONS=1)
+
+add_compiler_rt_object_libraries(RTXray
+		ARCHS ${XRAY_SUPPORTED_ARCH}
+		SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS}
+		DEFS ${XRAY_COMMON_DEFINITIONS})
+
+add_custom_target(xray)
+set(XRAY_COMMON_RUNTIME_OBJECT_LIBS
+		RTSanitizerCommon
+		RTSanitizerCommonLibc)
+
+foreach (arch ${XRAY_SUPPORTED_ARCH})
+		if (CAN_TARGET_${arch})
+				add_compiler_rt_runtime(clang_rt.xray
+						STATIC
+						ARCHS ${arch}
+						SOURCES ${${arch}_SOURCES}
+						CFLAGS ${XRAY_CFLAGS}
+						DEFS ${XRAY_COMMON_DEFINITIONS}
+						OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS}
+						PARENT_TARGET xray)
+		endif ()
+endforeach()
+
+add_dependencies(compiler-rt xray)
diff --git a/lib/xray/xray_flags.cc b/lib/xray/xray_flags.cc
new file mode 100644
index 0000000..6f82912
--- /dev/null
+++ b/lib/xray/xray_flags.cc
@@ -0,0 +1,61 @@
+//===-- xray_flags.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay flag parsing logic.
+//===----------------------------------------------------------------------===//
+
+#include "xray_flags.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_libc.h"
+
+using namespace __sanitizer;
+
+namespace __xray {
+
+Flags xray_flags_dont_use_directly; // use via flags().
+
+void Flags::SetDefaults() {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+}
+
+static void RegisterXRayFlags(FlagParser *P, Flags *F) {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description)                       \
+  RegisterFlag(P, #Name, Description, &F->Name);
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+}
+
+void InitializeFlags() {
+  SetCommonFlagsDefaults();
+  auto *F = flags();
+  F->SetDefaults();
+
+  FlagParser XRayParser;
+  RegisterXRayFlags(&XRayParser, F);
+  RegisterCommonFlags(&XRayParser);
+
+  // Override from command line.
+  XRayParser.ParseString(GetEnv("XRAY_OPTIONS"));
+
+  InitializeCommonFlags();
+
+  if (Verbosity())
+    ReportUnrecognizedFlags();
+
+  if (common_flags()->help) {
+    XRayParser.PrintFlagDescriptions();
+  }
+}
+
+} // namespace __xray
diff --git a/lib/xray/xray_flags.h b/lib/xray/xray_flags.h
new file mode 100644
index 0000000..2ecf5fb
--- /dev/null
+++ b/lib/xray/xray_flags.h
@@ -0,0 +1,37 @@
+//===-- xray_flags.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instruementation system.
+//
+// XRay runtime flags.
+//===----------------------------------------------------------------------===//
+
+#ifndef XRAY_FLAGS_H
+#define XRAY_FLAGS_H
+
+#include "sanitizer_common/sanitizer_flag_parser.h"
+
+namespace __xray {
+
+struct Flags {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+
+  void SetDefaults();
+};
+
+extern Flags xray_flags_dont_use_directly;
+inline Flags *flags() { return &xray_flags_dont_use_directly; }
+
+void InitializeFlags();
+
+} // namespace __xray
+
+#endif // XRAY_FLAGS_H
diff --git a/lib/xray/xray_flags.inc b/lib/xray/xray_flags.inc
new file mode 100644
index 0000000..bc2f45e
--- /dev/null
+++ b/lib/xray/xray_flags.inc
@@ -0,0 +1,18 @@
+//===-- xray_flags.inc ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// XRay runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_FLAG
+#error "Define XRAY_FLAG prior to including this file!"
+#endif
+
+XRAY_FLAG(bool, patch_premain, true,
+          "Whether to patch instrumentation points before main.")
diff --git a/lib/xray/xray_init.cc b/lib/xray/xray_init.cc
new file mode 100644
index 0000000..e5dc0d9
--- /dev/null
+++ b/lib/xray/xray_init.cc
@@ -0,0 +1,68 @@
+//===-- xray_init.cc --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay initialisation logic.
+//===----------------------------------------------------------------------===//
+
+#include <atomic>
+#include <fcntl.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_flags.h"
+#include "xray_interface_internal.h"
+
+extern "C" {
+void __xray_init();
+extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak));
+extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak));
+}
+
+using namespace __xray;
+
+// When set to 'true' this means the XRay runtime has been initialised. We use
+// the weak symbols defined above (__start_xray_inst_map and
+// __stop_xray_instr_map) to initialise the instrumentation map that XRay uses
+// for runtime patching/unpatching of instrumentation points.
+//
+// FIXME: Support DSO instrumentation maps too. The current solution only works
+// for statically linked executables.
+std::atomic<bool> XRayInitialized{false};
+
+// This should always be updated before XRayInitialized is updated.
+std::atomic<__xray::XRaySledMap> XRayInstrMap{};
+
+// __xray_init() will do the actual loading of the current process' memory map
+// and then proceed to look for the .xray_instr_map section/segment.
+void __xray_init() {
+  InitializeFlags();
+  if (__start_xray_instr_map == nullptr) {
+    Report("XRay instrumentation map missing. Not initializing XRay.\n");
+    return;
+  }
+
+  // Now initialize the XRayInstrMap global struct with the address of the
+  // entries, reinterpreted as an array of XRaySledEntry objects. We use the
+  // virtual pointer we have from the section to provide us the correct
+  // information.
+  __xray::XRaySledMap SledMap{};
+  SledMap.Sleds = __start_xray_instr_map;
+  SledMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
+  XRayInstrMap.store(SledMap, std::memory_order_release);
+  XRayInitialized.store(true, std::memory_order_release);
+
+  if (flags()->patch_premain)
+    __xray_patch();
+}
+
+__attribute__((section(".preinit_array"),
+               used)) void (*__local_xray_preinit)(void) = __xray_init;
diff --git a/lib/xray/xray_interface.cc b/lib/xray/xray_interface.cc
new file mode 100644
index 0000000..5ef3fc7
--- /dev/null
+++ b/lib/xray/xray_interface.cc
@@ -0,0 +1,270 @@
+//===-- xray_interface.cpp --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of the API functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "xray_interface_internal.h"
+
+#include <atomic>
+#include <cstdint>
+#include <cstdio>
+#include <errno.h>
+#include <limits>
+#include <sys/mman.h>
+
+#include "sanitizer_common/sanitizer_common.h"
+
+namespace __xray {
+
+// This is the function to call when we encounter the entry or exit sleds.
+std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction{nullptr};
+
+// MProtectHelper is an RAII wrapper for calls to mprotect(...) that will undo
+// any successful mprotect(...) changes. This is used to make a page writeable
+// and executable, and upon destruction if it was successful in doing so returns
+// the page into a read-only and executable page.
+//
+// This is only used specifically for runtime-patching of the XRay
+// instrumentation points. This assumes that the executable pages are originally
+// read-and-execute only.
+class MProtectHelper {
+  void *PageAlignedAddr;
+  std::size_t MProtectLen;
+  bool MustCleanup;
+
+public:
+  explicit MProtectHelper(void *PageAlignedAddr, std::size_t MProtectLen)
+      : PageAlignedAddr(PageAlignedAddr), MProtectLen(MProtectLen),
+        MustCleanup(false) {}
+
+  int MakeWriteable() {
+    auto R = mprotect(PageAlignedAddr, MProtectLen,
+                      PROT_READ | PROT_WRITE | PROT_EXEC);
+    if (R != -1)
+      MustCleanup = true;
+    return R;
+  }
+
+  ~MProtectHelper() {
+    if (MustCleanup) {
+      mprotect(PageAlignedAddr, MProtectLen, PROT_READ | PROT_EXEC);
+    }
+  }
+};
+
+} // namespace __xray
+
+extern "C" {
+// The following functions have to be defined in assembler, on a per-platform
+// basis. See xray_trampoline_*.s files for implementations.
+extern void __xray_FunctionEntry();
+extern void __xray_FunctionExit();
+}
+
+extern std::atomic<bool> XRayInitialized;
+extern std::atomic<__xray::XRaySledMap> XRayInstrMap;
+
+int __xray_set_handler(void (*entry)(int32_t, XRayEntryType)) {
+  if (XRayInitialized.load(std::memory_order_acquire)) {
+    __xray::XRayPatchedFunction.store(entry, std::memory_order_release);
+    return 1;
+  }
+  return 0;
+}
+
+int __xray_remove_handler() { return __xray_set_handler(nullptr); }
+
+std::atomic<bool> XRayPatching{false};
+
+using namespace __xray;
+
+// FIXME: Figure out whether we can move this class to sanitizer_common instead
+// as a generic "scope guard".
+template <class Function> class CleanupInvoker {
+  Function Fn;
+
+public:
+  explicit CleanupInvoker(Function Fn) : Fn(Fn) {}
+  CleanupInvoker(const CleanupInvoker &) = default;
+  CleanupInvoker(CleanupInvoker &&) = default;
+  CleanupInvoker &operator=(const CleanupInvoker &) = delete;
+  CleanupInvoker &operator=(CleanupInvoker &&) = delete;
+  ~CleanupInvoker() { Fn(); }
+};
+
+template <class Function> CleanupInvoker<Function> ScopeCleanup(Function Fn) {
+  return CleanupInvoker<Function>{Fn};
+}
+
+// ControlPatching implements the common internals of the patching/unpatching
+// implementation. |Enable| defines whether we're enabling or disabling the
+// runtime XRay instrumentation.
+XRayPatchingStatus ControlPatching(bool Enable) {
+  if (!XRayInitialized.load(std::memory_order_acquire))
+    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+  static bool NotPatching = false;
+  if (!XRayPatching.compare_exchange_strong(NotPatching, true,
+                                            std::memory_order_acq_rel,
+                                            std::memory_order_acquire)) {
+    return XRayPatchingStatus::ONGOING; // Already patching.
+  }
+
+  bool PatchingSuccess = false;
+  auto XRayPatchingStatusResetter = ScopeCleanup([&PatchingSuccess] {
+    if (!PatchingSuccess) {
+      XRayPatching.store(false, std::memory_order_release);
+    }
+  });
+
+  // Step 1: Compute the function id, as a unique identifier per function in the
+  // instrumentation map.
+  XRaySledMap InstrMap = XRayInstrMap.load(std::memory_order_acquire);
+  if (InstrMap.Entries == 0)
+    return XRayPatchingStatus::NOT_INITIALIZED;
+
+  int32_t FuncId = 1;
+  static constexpr uint8_t CallOpCode = 0xe8;
+  static constexpr uint16_t MovR10Seq = 0xba41;
+  static constexpr uint16_t Jmp9Seq = 0x09eb;
+  static constexpr uint8_t JmpOpCode = 0xe9;
+  static constexpr uint8_t RetOpCode = 0xc3;
+  uint64_t CurFun = 0;
+  for (std::size_t I = 0; I < InstrMap.Entries; I++) {
+    auto Sled = InstrMap.Sleds[I];
+    auto F = Sled.Function;
+    if (CurFun == 0)
+      CurFun = F;
+    if (F != CurFun) {
+      ++FuncId;
+      CurFun = F;
+    }
+
+    // While we're here, we should patch the nop sled. To do that we mprotect
+    // the page containing the function to be writeable.
+    void *PageAlignedAddr =
+        reinterpret_cast<void *>(Sled.Address & ~((2 << 16) - 1));
+    std::size_t MProtectLen =
+        (Sled.Address + 12) - reinterpret_cast<uint64_t>(PageAlignedAddr);
+    MProtectHelper Protector(PageAlignedAddr, MProtectLen);
+    if (Protector.MakeWriteable() == -1) {
+      printf("Failed mprotect: %d\n", errno);
+      return XRayPatchingStatus::FAILED;
+    }
+
+    static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
+    static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
+    if (Sled.Kind == XRayEntryType::ENTRY) {
+      // FIXME: Implement this in a more extensible manner, per-platform.
+      // Here we do the dance of replacing the following sled:
+      //
+      // xray_sled_n:
+      //   jmp +9
+      //   <9 byte nop>
+      //
+      // With the following:
+      //
+      //   mov r10d, <function id>
+      //   call <relative 32bit offset to entry trampoline>
+      //
+      // We need to do this in the following order:
+      //
+      // 1. Put the function id first, 2 bytes from the start of the sled (just
+      // after the 2-byte jmp instruction).
+      // 2. Put the call opcode 6 bytes from the start of the sled.
+      // 3. Put the relative offset 7 bytes from the start of the sled.
+      // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+      // opcode and first operand.
+      //
+      // Prerequisite is to compute the relative offset to the
+      // __xray_FunctionEntry function's address.
+      int64_t TrampolineOffset =
+          reinterpret_cast<int64_t>(__xray_FunctionEntry) -
+          (static_cast<int64_t>(Sled.Address) + 11);
+      if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+        Report("XRay Entry trampoline (%p) too far from sled (%p); distance = "
+               "%ld\n",
+               __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address),
+               TrampolineOffset);
+        continue;
+      }
+      if (Enable) {
+        *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+        *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
+        *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+        std::atomic_store_explicit(
+            reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+            std::memory_order_release);
+      } else {
+        std::atomic_store_explicit(
+            reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
+            std::memory_order_release);
+        // FIXME: Write out the nops still?
+      }
+    }
+
+    if (Sled.Kind == XRayEntryType::EXIT) {
+      // FIXME: Implement this in a more extensible manner, per-platform.
+      // Here we do the dance of replacing the following sled:
+      //
+      // xray_sled_n:
+      //   ret
+      //   <10 byte nop>
+      //
+      // With the following:
+      //
+      //   mov r10d, <function id>
+      //   jmp <relative 32bit offset to exit trampoline>
+      //
+      // 1. Put the function id first, 2 bytes from the start of the sled (just
+      // after the 1-byte ret instruction).
+      // 2. Put the jmp opcode 6 bytes from the start of the sled.
+      // 3. Put the relative offset 7 bytes from the start of the sled.
+      // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+      // opcode and first operand.
+      //
+      // Prerequisite is to compute the relative offset fo the
+      // __xray_FunctionExit function's address.
+      int64_t TrampolineOffset =
+          reinterpret_cast<int64_t>(__xray_FunctionExit) -
+          (static_cast<int64_t>(Sled.Address) + 11);
+      if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+        Report("XRay Exit trampoline (%p) too far from sled (%p); distance = "
+               "%ld\n",
+               __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address),
+               TrampolineOffset);
+        continue;
+      }
+      if (Enable) {
+        *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+        *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
+        *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+        std::atomic_store_explicit(
+            reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+            std::memory_order_release);
+      } else {
+        std::atomic_store_explicit(
+            reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
+            std::memory_order_release);
+        // FIXME: Write out the nops still?
+      }
+    }
+  }
+  XRayPatching.store(false, std::memory_order_release);
+  PatchingSuccess = true;
+  return XRayPatchingStatus::SUCCESS;
+}
+
+XRayPatchingStatus __xray_patch() { return ControlPatching(true); }
+
+XRayPatchingStatus __xray_unpatch() { return ControlPatching(false); }
diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h
new file mode 100644
index 0000000..6208c11
--- /dev/null
+++ b/lib/xray/xray_interface_internal.h
@@ -0,0 +1,42 @@
+//===-- xray_interface_internal.h -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of the API functions. See also include/xray/xray_interface.h.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_INTERFACE_INTERNAL_H
+#define XRAY_INTERFACE_INTERNAL_H
+
+#include "xray/xray_interface.h"
+#include <cstddef>
+#include <cstdint>
+
+extern "C" {
+
+struct XRaySledEntry {
+  uint64_t Address;
+  uint64_t Function;
+  unsigned char Kind;
+  unsigned char AlwaysInstrument;
+  unsigned char Padding[14]; // Need 32 bytes
+};
+}
+
+namespace __xray {
+
+struct XRaySledMap {
+  const XRaySledEntry *Sleds;
+  size_t Entries;
+};
+
+} // namespace __xray
+
+#endif
diff --git a/lib/xray/xray_trampoline_x86.S b/lib/xray/xray_trampoline_x86.S
new file mode 100644
index 0000000..8580396
--- /dev/null
+++ b/lib/xray/xray_trampoline_x86.S
@@ -0,0 +1,112 @@
+//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the X86-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+	.text
+	.file "xray_trampoline_x86.S"
+	.globl __xray_FunctionEntry
+	.align 16, 0x90
+	.type __xray_FunctionEntry,@function
+
+__xray_FunctionEntry:
+  .cfi_startproc
+  // Save caller provided registers before doing any actual work.
+	pushq %rbp
+	.cfi_def_cfa_offset 16
+	subq $200, %rsp
+	movupd	%xmm0, 184(%rsp)
+	movupd	%xmm1, 168(%rsp)
+	movupd	%xmm2, 152(%rsp)
+	movupd	%xmm3, 136(%rsp)
+	movupd	%xmm4, 120(%rsp)
+	movupd	%xmm5, 104(%rsp)
+	movupd	%xmm6, 88(%rsp)
+	movupd	%xmm7, 72(%rsp)
+	movq	%rdi, 64(%rsp)
+	movq  %rax, 56(%rsp)
+	movq  %rdx, 48(%rsp)
+	movq	%rsi, 40(%rsp)
+	movq	%rcx, 32(%rsp)
+	movq	%r8, 24(%rsp)
+	movq	%r9, 16(%rsp)
+
+	// de-mangled, that's __xray::XRayPatchedFunction, and we're doing an acquire
+	// load (on x86 is a normal mov instruction).
+	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+	testq	%rax, %rax
+	je	.Ltmp0
+
+	// assume that %r10d has the function id.
+	movl	%r10d, %edi
+	xor	%esi,%esi
+	callq	*%rax
+.Ltmp0:
+  // restore the registers
+	movupd	184(%rsp), %xmm0
+	movupd	168(%rsp), %xmm1
+	movupd	152(%rsp), %xmm2
+	movupd	136(%rsp), %xmm3
+	movupd	120(%rsp), %xmm4
+	movupd	104(%rsp), %xmm5
+	movupd	88(%rsp) , %xmm6
+	movupd	72(%rsp) , %xmm7
+	movq	64(%rsp), %rdi
+	movq  56(%rsp), %rax
+	movq  48(%rsp), %rdx
+	movq	40(%rsp), %rsi
+	movq	32(%rsp), %rcx
+	movq	24(%rsp), %r8
+	movq	16(%rsp), %r9
+	addq	$200, %rsp
+	popq	%rbp
+	retq
+.Ltmp1:
+	.size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry
+	.cfi_endproc
+
+	.globl __xray_FunctionExit
+	.align 16, 0x90
+	.type __xray_FunctionExit,@function
+__xray_FunctionExit:
+	.cfi_startproc
+	// Save the important registers first. Since we're assuming that this
+	// function is only jumped into, we only preserve the registers for
+	// returning.
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	subq	$56, %rsp
+	.cfi_def_cfa_offset 32
+	movupd	%xmm0, 40(%rsp)
+	movupd	%xmm1, 24(%rsp)
+	movq	%rax, 16(%rsp)
+	movq	%rdx, 8(%rsp)
+	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+	testq %rax,%rax
+	je	.Ltmp2
+
+	movl	%r10d, %edi
+	movl	$1, %esi
+	callq	*%rax
+.Ltmp2:
+  // Restore the important registers.
+	movupd	40(%rsp), %xmm0
+	movupd	24(%rsp), %xmm1
+	movq	16(%rsp), %rax
+	movq	8(%rsp), %rdx
+	addq	$56, %rsp
+	popq	%rbp
+	retq
+.Ltmp3:
+	.size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit
+	.cfi_endproc
diff --git a/make/platform/clang_darwin.mk b/make/platform/clang_darwin.mk
index 9944481..ccbee8b 100644
--- a/make/platform/clang_darwin.mk
+++ b/make/platform/clang_darwin.mk
@@ -101,26 +101,6 @@
 Configs += cc_kext_ios
 UniversalArchs.cc_kext_ios += $(call CheckArches,armv7,cc_kext_ios,$(IOS_SDK))
 
-# Configurations which define the profiling support functions.
-Configs += profile_osx
-UniversalArchs.profile_osx := $(call CheckArches,i386 x86_64 x86_64h,profile_osx,$(OSX_SDK))
-Configs += profile_ios
-UniversalArchs.profile_ios := $(call CheckArches,i386 x86_64,profile_ios,$(IOSSIM_SDK))
-UniversalArchs.profile_ios += $(call CheckArches,armv7 arm64,profile_ios,$(IOS_SDK))
-
-# Configurations which define the ASAN support functions.
-Configs += asan_osx_dynamic
-UniversalArchs.asan_osx_dynamic := $(call CheckArches,i386 x86_64 x86_64h,asan_osx_dynamic,$(OSX_SDK))
-
-Configs += asan_iossim_dynamic
-UniversalArchs.asan_iossim_dynamic := $(call CheckArches,i386 x86_64,asan_iossim_dynamic,$(IOSSIM_SDK))
-
-Configs += ubsan_osx_dynamic
-UniversalArchs.ubsan_osx_dynamic := $(call CheckArches,i386 x86_64 x86_64h,ubsan_osx_dynamic,$(OSX_SDK))
-
-Configs += ubsan_iossim_dynamic
-UniversalArchs.ubsan_iossim_dynamic := $(call CheckArches,i386 x86_64,ubsan_iossim_dynamic,$(IOSSIM_SDK))
-
 # Darwin 10.6 has a bug in cctools that makes it unable to use ranlib on our ARM
 # object files. If we are on that platform, strip out all ARM archs. We still
 # build the libraries themselves so that Clang can find them where it expects
@@ -128,7 +108,6 @@
 ifneq ($(shell test -x /usr/bin/sw_vers && sw_vers -productVersion | grep 10.6),)
 UniversalArchs.ios := $(filter-out armv7, $(UniversalArchs.ios))
 UniversalArchs.cc_kext_ios := $(filter-out armv7, $(UniversalArchs.cc_kext_ios))
-UniversalArchs.profile_ios := $(filter-out armv7, $(UniversalArchs.profile_ios))
 endif
 
 # If RC_SUPPORTED_ARCHS is defined, treat it as a list of the architectures we
@@ -175,26 +154,6 @@
   -isysroot $(IOSSIM_SDK)
 SANITIZER_CFLAGS := -fno-builtin -gline-tables-only -stdlib=libc++
 
-CFLAGS.asan_osx_dynamic := \
-	$(CFLAGS) $(SANITIZER_MACOSX_DEPLOYMENT_ARGS) \
-	$(SANITIZER_CFLAGS) \
-	-DMAC_INTERPOSE_FUNCTIONS=1 \
-	-DASAN_DYNAMIC=1
-
-CFLAGS.asan_iossim_dynamic := \
-	$(CFLAGS) $(SANITIZER_IOSSIM_DEPLOYMENT_ARGS) \
-	$(SANITIZER_CFLAGS) \
-	-DMAC_INTERPOSE_FUNCTIONS=1 \
-	-DASAN_DYNAMIC=1
-
-CFLAGS.ubsan_osx_dynamic := \
-	$(CFLAGS) $(SANITIZER_MACOSX_DEPLOYMENT_ARGS) \
-	$(SANITIZER_CFLAGS)
-
-CFLAGS.ubsan_iossim_dynamic := \
-	$(CFLAGS) $(SANITIZER_IOSSIM_DEPLOYMENT_ARGS) \
-	$(SANITIZER_CFLAGS)
-
 
 CFLAGS.ios.i386		:= $(CFLAGS) $(IOSSIM_DEPLOYMENT_ARGS)
 CFLAGS.ios.x86_64	:= $(CFLAGS) $(IOSSIM_DEPLOYMENT_ARGS)
@@ -212,41 +171,9 @@
 CFLAGS.cc_kext_ios.armv7k	:= $(CFLAGS) $(IOS6_DEPLOYMENT_ARGS)
 CFLAGS.cc_kext_ios.armv7s	:= $(CFLAGS) $(IOS6_DEPLOYMENT_ARGS)
 CFLAGS.cc_kext_ios.arm64	:= $(CFLAGS) $(IOS6_DEPLOYMENT_ARGS)
-CFLAGS.profile_osx.i386    := $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
-CFLAGS.profile_osx.x86_64  := $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
-CFLAGS.profile_osx.x86_64h := $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
-CFLAGS.profile_ios.i386    := $(CFLAGS) $(IOSSIM_DEPLOYMENT_ARGS)
-CFLAGS.profile_ios.x86_64  := $(CFLAGS) $(IOSSIM_DEPLOYMENT_ARGS)
-CFLAGS.profile_ios.armv7  := $(CFLAGS) $(IOS_DEPLOYMENT_ARGS)
-CFLAGS.profile_ios.armv7k := $(CFLAGS) $(IOS_DEPLOYMENT_ARGS)
-CFLAGS.profile_ios.armv7s := $(CFLAGS) $(IOS_DEPLOYMENT_ARGS)
-CFLAGS.profile_ios.arm64  := $(CFLAGS) $(IOS6_DEPLOYMENT_ARGS)
 
 SANITIZER_LDFLAGS := -stdlib=libc++ -lc++ -lc++abi
 
-SHARED_LIBRARY.asan_osx_dynamic := 1
-LDFLAGS.asan_osx_dynamic := $(SANITIZER_LDFLAGS) -install_name @rpath/libclang_rt.asan_osx_dynamic.dylib \
-  $(SANITIZER_MACOSX_DEPLOYMENT_ARGS)
-
-SHARED_LIBRARY.asan_iossim_dynamic := 1
-LDFLAGS.asan_iossim_dynamic := $(SANITIZER_LDFLAGS) -install_name @rpath/libclang_rt.asan_iossim_dynamic.dylib \
-  -Wl,-ios_simulator_version_min,7.0.0 $(SANITIZER_IOSSIM_DEPLOYMENT_ARGS)
-
-SHARED_LIBRARY.ubsan_osx_dynamic := 1
-LDFLAGS.ubsan_osx_dynamic := $(SANITIZER_LDFLAGS) -install_name @rpath/libclang_rt.ubsan_osx_dynamic.dylib \
-  $(SANITIZER_MACOSX_DEPLOYMENT_ARGS)
-
-SHARED_LIBRARY.ubsan_iossim_dynamic := 1
-LDFLAGS.ubsan_iossim_dynamic := $(SANITIZER_LDFLAGS) -install_name @rpath/libclang_rt.ubsan_iossim_dynamic.dylib \
-  -Wl,-ios_simulator_version_min,7.0.0 $(SANITIZER_IOSSIM_DEPLOYMENT_ARGS)
-
-ifneq ($(OSX_SDK),)
-CFLAGS.asan_osx_dynamic += -isysroot $(OSX_SDK)
-LDFLAGS.asan_osx_dynamic += -isysroot $(OSX_SDK)
-CFLAGS.ubsan_osx_dynamic += -isysroot $(OSX_SDK)
-LDFLAGS.ubsan_osx_dynamic += -isysroot $(OSX_SDK)
-endif
-
 ATOMIC_FUNCTIONS := \
 	atomic_flag_clear \
 	atomic_flag_clear_explicit \
@@ -274,32 +201,6 @@
 
 FUNCTIONS.osx	:= mulosi4 mulodi4 muloti4 $(ATOMIC_FUNCTIONS) $(FP16_FUNCTIONS)
 
-FUNCTIONS.profile_osx := GCDAProfiling InstrProfiling InstrProfilingBuffer \
-                         InstrProfilingFile InstrProfilingPlatformDarwin \
-                         InstrProfilingRuntime InstrProfilingUtil \
-                         InstrProfilingWriter InstrProfilingValue
-FUNCTIONS.profile_ios := $(FUNCTIONS.profile_osx)
-
-FUNCTIONS.asan_osx_dynamic := $(AsanFunctions) $(AsanCXXFunctions) \
-                              $(InterceptionFunctions) \
-                              $(SanitizerCommonFunctions) \
-                              $(AsanDynamicFunctions) \
-                              $(UbsanFunctions) $(UbsanCXXFunctions)
-
-FUNCTIONS.asan_iossim_dynamic := $(AsanFunctions) $(AsanCXXFunctions) \
-                                 $(InterceptionFunctions) \
-                                 $(SanitizerCommonFunctions) \
-                                 $(AsanDynamicFunctions) \
-                                 $(UbsanFunctions) $(UbsanCXXFunctions)
-
-FUNCTIONS.ubsan_osx_dynamic := $(UbsanFunctions) $(UbsanCXXFunctions) \
-                               $(SanitizerCommonFunctions) \
-                               $(UbsanStandaloneFunctions)
-
-FUNCTIONS.ubsan_iossim_dynamic := $(UbsanFunctions) $(UbsanCXXFunctions) \
-                                  $(SanitizerCommonFunctions) \
-                                  $(UbsanStandaloneFunctions)
-
 CCKEXT_PROFILE_FUNCTIONS := \
 	InstrProfiling \
 	InstrProfilingBuffer \
diff --git a/make/platform/clang_linux.mk b/make/platform/clang_linux.mk
index bf5ee4a..870209f 100644
--- a/make/platform/clang_linux.mk
+++ b/make/platform/clang_linux.mk
@@ -79,7 +79,8 @@
 FUNCTIONS.profile-i386 := GCDAProfiling InstrProfiling InstrProfilingBuffer \
                           InstrProfilingFile InstrProfilingPlatformOther \
                           InstrProfilingRuntime InstrProfilingUtil \
-                          InstrProfilingWriter InstrProfilingValue
+                          InstrProfilingWriter InstrProfilingValue \
+                          InstrProfilingMerge InstrProfilingMergeFile
 FUNCTIONS.profile-x86_64 := $(FUNCTIONS.profile-i386)
 
 # Always use optimized variants.
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index e5c51c8..32851b8 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,3 +1,6 @@
+# Needed for lit support
+include(AddLLVM)
+
 configure_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/lit.common.configured.in
   ${CMAKE_CURRENT_BINARY_DIR}/lit.common.configured)
@@ -20,7 +23,7 @@
     # Use LLVM utils and Clang from the same build tree.
     list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS
       clang clang-headers FileCheck count not llvm-config llvm-nm llvm-objdump
-      llvm-symbolizer compiler-rt-headers)
+      llvm-symbolizer compiler-rt-headers sancov)
     if (COMPILER_RT_HAS_PROFILE)
       list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS profile)
     endif()
@@ -28,7 +31,7 @@
       list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS KillTheDoctor)
     endif()
   endif()
-  if(UNIX)
+  if(CMAKE_HOST_UNIX)
     list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS SanitizerLintCheck)
   endif()
 endif()
@@ -42,6 +45,9 @@
   if(COMPILER_RT_HAS_DFSAN)
     add_subdirectory(dfsan)
   endif()
+  if (COMPILER_RT_HAS_INTERCEPTION)
+    add_subdirectory(interception)
+  endif()
   if(COMPILER_RT_HAS_LSAN)
     add_subdirectory(lsan)
   endif()
@@ -67,6 +73,15 @@
   if(COMPILER_RT_HAS_SAFESTACK)
     add_subdirectory(safestack)
   endif()
+  if(COMPILER_RT_HAS_ESAN)
+    add_subdirectory(esan)
+  endif()
+  if(COMPILER_RT_HAS_SCUDO)
+    add_subdirectory(scudo)
+  endif()
+  if(COMPILER_RT_HAS_XRAY)
+    add_subdirectory(xray)
+  endif()
 endif()
 
 if(COMPILER_RT_STANDALONE_BUILD)
@@ -78,4 +93,5 @@
     "Running all regression tests"
     ${LLVM_LIT_TESTSUITES}
     DEPENDS ${LLVM_LIT_DEPENDS})
+  add_custom_target(compiler-rt-test-depends DEPENDS ${LLVM_LIT_DEPENDS})
 endif()
diff --git a/test/asan/CMakeLists.txt b/test/asan/CMakeLists.txt
index b2be957..82ec8ff 100644
--- a/test/asan/CMakeLists.txt
+++ b/test/asan/CMakeLists.txt
@@ -3,10 +3,16 @@
 set(ASAN_TESTSUITES)
 set(ASAN_DYNAMIC_TESTSUITES)
 
+# TODO(wwchrome): Re-enable Win64 asan tests when ready.
+# Disable tests for asan Win64 temporarily.
+if(OS_NAME MATCHES "Windows" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+  set(EXCLUDE_FROM_ALL TRUE)
+endif()
+
 macro(get_bits_for_arch arch bits)
   if (${arch} MATCHES "i386|i686|arm|mips|mipsel")
     set(${bits} 32)
-  elseif (${arch} MATCHES "x86_64|powerpc64|powerpc64le|aarch64|mips64|mips64el")
+  elseif (${arch} MATCHES "x86_64|powerpc64|powerpc64le|aarch64|mips64|mips64el|s390x")
     set(${bits} 64)
   else()
     message(FATAL_ERROR "Unknown target architecture: ${arch}")
@@ -94,14 +100,21 @@
   endif()
 endif()
 
+set(LIT_ARGS)
+if(ANDROID)
+  set(LIT_ARGS -j5)
+endif()
+
 add_lit_testsuite(check-asan "Running the AddressSanitizer tests"
   ${ASAN_TESTSUITES}
-  DEPENDS ${ASAN_TEST_DEPS})
-set_target_properties(check-asan PROPERTIES FOLDER "ASan tests")
+  DEPENDS ${ASAN_TEST_DEPS}
+  ARGS ${LIT_ARGS})
+set_target_properties(check-asan PROPERTIES FOLDER "Compiler-RT Misc")
 
 if(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME)
   # Add check-dynamic-asan target. It is a part of check-all only on Windows,
   # where we want to always test both dynamic and static runtime.
+
   if(NOT OS_NAME MATCHES "Windows")
     set(EXCLUDE_FROM_ALL TRUE)
   endif()
@@ -110,8 +123,13 @@
                     ${ASAN_DYNAMIC_TESTSUITES}
                     DEPENDS ${ASAN_DYNAMIC_TEST_DEPS})
   set_target_properties(check-asan-dynamic
-                        PROPERTIES FOLDER "ASan dynamic tests")
+                        PROPERTIES FOLDER "Compiler-RT Misc")
   if(NOT OS_NAME MATCHES "Windows")
     set(EXCLUDE_FROM_ALL FALSE)
   endif()
 endif()
+
+# TODO(wwchrome): Re-enable the tests for asan Win64 when ready.
+if(OS_NAME MATCHES "Windows" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+  set(EXCLUDE_FROM_ALL FALSE)
+endif()
diff --git a/test/asan/TestCases/Android/coverage-android.cc b/test/asan/TestCases/Android/coverage-android.cc
index 16a6e1f..cf4f33e 100644
--- a/test/asan/TestCases/Android/coverage-android.cc
+++ b/test/asan/TestCases/Android/coverage-android.cc
@@ -101,6 +101,10 @@
 // RUN: %sancov rawunpack *.sancov.raw
 // RUN: %sancov print *.sancov |& FileCheck --check-prefix=CHECK3 %s
 
+// PC counts in CHECK lines are platform dependent and match arm32 at the moment.
+// sancov tool does not support Android well enough to match function names
+// REQUIRES: arm
+
 #include <assert.h>
 #include <dlfcn.h>
 #include <stdio.h>
@@ -139,5 +143,5 @@
 #endif
 
 // CHECK1: 2 PCs total
-// CHECK2: 7 PCs total
-// CHECK3: 8 PCs total
+// CHECK2: 4 PCs total
+// CHECK3: 5 PCs total
diff --git a/test/asan/TestCases/Darwin/abort_on_error.cc b/test/asan/TestCases/Darwin/abort_on_error.cc
index f09718b..295afb8 100644
--- a/test/asan/TestCases/Darwin/abort_on_error.cc
+++ b/test/asan/TestCases/Darwin/abort_on_error.cc
@@ -4,7 +4,7 @@
 // RUN: %clangxx_asan %s -o %t
 
 // Intentionally don't inherit the default ASAN_OPTIONS.
-// RUN: ASAN_OPTIONS="" not --crash %run %t 2>&1 | FileCheck %s
+// RUN: env ASAN_OPTIONS="" not --crash %run %t 2>&1 | FileCheck %s
 // When we use lit's default ASAN_OPTIONS, we shouldn't crash.
 // RUN: not %run %t 2>&1 | FileCheck %s
 
diff --git a/test/asan/TestCases/Darwin/address-range-limit.mm b/test/asan/TestCases/Darwin/address-range-limit.mm
index a690676..ba9175a 100644
--- a/test/asan/TestCases/Darwin/address-range-limit.mm
+++ b/test/asan/TestCases/Darwin/address-range-limit.mm
@@ -1,7 +1,7 @@
 // Regression test for https://code.google.com/p/address-sanitizer/issues/detail?id=368.
 
-// RUN: %clang_asan %s -Wno-deprecated-declarations -flat_namespace -bundle -undefined suppress -o %t.bundle
-// RUN: %clang_asan %s -Wno-deprecated-declarations -o %t -framework Foundation && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan %s -Wno-deprecated-declarations -flat_namespace -bundle -undefined suppress -o %t.bundle
+// RUN: %clangxx_asan %s -Wno-deprecated-declarations -o %t -framework Foundation && not %run %t 2>&1 | FileCheck %s
 
 #import <Foundation/Foundation.h>
 #import <mach-o/dyld.h>
diff --git a/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cc b/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cc
index 4595fb5..d2facd6 100644
--- a/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cc
+++ b/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cc
@@ -14,8 +14,8 @@
   int res = x[argc];
   free(x);
   free(x + argc - 1);  // BOOM
-  // CHECK: AddressSanitizer: attempting double-free{{.*}}in thread T0
   // CHECK: Using atos at user-specified path:
+  // CHECK: AddressSanitizer: attempting double-free{{.*}}in thread T0
   // CHECK: #0 0x{{.*}} in {{.*}}free
   // CHECK: #1 0x{{.*}} in main {{.*}}atos-symbolizer-dyld-root-path.cc:[[@LINE-4]]
   // CHECK: freed by thread T0 here:
diff --git a/test/asan/TestCases/Darwin/atos-symbolizer.cc b/test/asan/TestCases/Darwin/atos-symbolizer.cc
index 2a9ffbc..b4a868e 100644
--- a/test/asan/TestCases/Darwin/atos-symbolizer.cc
+++ b/test/asan/TestCases/Darwin/atos-symbolizer.cc
@@ -11,8 +11,8 @@
   int res = x[argc];
   free(x);
   free(x + argc - 1);  // BOOM
-  // CHECK: AddressSanitizer: attempting double-free{{.*}}in thread T0
   // CHECK: Using atos at user-specified path:
+  // CHECK: AddressSanitizer: attempting double-free{{.*}}in thread T0
   // CHECK: #0 0x{{.*}} in {{.*}}free
   // CHECK: #1 0x{{.*}} in main {{.*}}atos-symbolizer.cc:[[@LINE-4]]
   // CHECK: freed by thread T0 here:
diff --git a/test/asan/TestCases/Darwin/dladdr-demangling.cc b/test/asan/TestCases/Darwin/dladdr-demangling.cc
index d773659..6f52b93 100644
--- a/test/asan/TestCases/Darwin/dladdr-demangling.cc
+++ b/test/asan/TestCases/Darwin/dladdr-demangling.cc
@@ -13,10 +13,10 @@
     char *x = (char*)malloc(n * sizeof(char));
     free(x);
     return x[5];
+    // CHECK-DLADDR: Using dladdr symbolizer
     // CHECK: {{.*ERROR: AddressSanitizer: heap-use-after-free on address}}
     // CHECK: {{READ of size 1 at 0x.* thread T0}}
-    // CHECK-DLADDR: Using dladdr symbolizer
-    // CHECK-DLADDR: failed to fork external symbolizer
+    // CHECK-DLADDR: failed to fork
     // CHECK: {{    #0 0x.* in MyClass::my_function\(int\)}}
     // CHECK: {{freed by thread T0 here:}}
     // CHECK: {{    #0 0x.* in wrap_free}}
diff --git a/test/asan/TestCases/Darwin/segv_read_write.c b/test/asan/TestCases/Darwin/segv_read_write.c
new file mode 100644
index 0000000..d8e2d21
--- /dev/null
+++ b/test/asan/TestCases/Darwin/segv_read_write.c
@@ -0,0 +1,26 @@
+// RUN: %clangxx_asan -std=c++11 -O0 %s -o %t
+// RUN: not %run %t       2>&1 | FileCheck %s --check-prefix=READ
+// RUN: not %run %t write 2>&1 | FileCheck %s --check-prefix=WRITE
+// REQUIRES: x86-target-arch
+
+#include <sys/mman.h>
+
+static volatile int sink;
+__attribute__((noinline)) void Read(int *ptr) { sink = *ptr; }
+__attribute__((noinline)) void Write(int *ptr) { *ptr = 0; }
+int main(int argc, char **argv) {
+  // Writes to shadow are detected as reads from shadow gap (because of how the
+  // shadow mapping works). This is kinda hard to fix. Test a random address in
+  // the application part of the address space.
+  void *volatile p =
+      mmap(nullptr, 4096, PROT_READ, MAP_PRIVATE | MAP_ANON, 0, 0);
+  munmap(p, 4096);
+  if (argc == 1)
+    Read((int *)p);
+  else
+    Write((int *)p);
+}
+// READ: AddressSanitizer: SEGV on unknown address
+// READ: The signal is caused by a READ memory access.
+// WRITE: AddressSanitizer: SEGV on unknown address
+// WRITE: The signal is caused by a WRITE memory access.
diff --git a/test/asan/TestCases/Linux/abort_on_error.cc b/test/asan/TestCases/Linux/abort_on_error.cc
index 406d98b..67fa9b8 100644
--- a/test/asan/TestCases/Linux/abort_on_error.cc
+++ b/test/asan/TestCases/Linux/abort_on_error.cc
@@ -4,7 +4,7 @@
 // RUN: %clangxx_asan %s -o %t
 
 // Intentionally don't inherit the default ASAN_OPTIONS.
-// RUN: ASAN_OPTIONS="" not %run %t 2>&1 | FileCheck %s
+// RUN: env ASAN_OPTIONS="" not %run %t 2>&1 | FileCheck %s
 // When we use lit's default ASAN_OPTIONS, we shouldn't crash either. On Linux
 // lit doesn't set ASAN_OPTIONS anyway.
 // RUN: not %run %t 2>&1 | FileCheck %s
diff --git a/test/asan/TestCases/Linux/asan-asm-stacktrace-test.cc b/test/asan/TestCases/Linux/asan-asm-stacktrace-test.cc
index 5332c99..cbc900d 100644
--- a/test/asan/TestCases/Linux/asan-asm-stacktrace-test.cc
+++ b/test/asan/TestCases/Linux/asan-asm-stacktrace-test.cc
@@ -1,7 +1,7 @@
 // Check that a stack unwinding algorithm works corretly even with the assembly
 // instrumentation.
 
-// REQUIRES: x86_64-supported-target
+// REQUIRES: x86_64-target-arch
 // RUN: %clangxx_asan -g -O1 %s -fno-inline-functions -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -mllvm -asan-instrument-assembly -o %t && not %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -g -O1 %s -fno-inline-functions -fomit-frame-pointer -momit-leaf-frame-pointer -mllvm -asan-instrument-assembly -o %t && not %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -g0 -O1 %s -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-exceptions -fno-inline-functions -fomit-frame-pointer -momit-leaf-frame-pointer -mllvm -asan-instrument-assembly -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-nounwind
diff --git a/test/asan/TestCases/Linux/asan_prelink_test.cc b/test/asan/TestCases/Linux/asan_prelink_test.cc
index d67d945..a5808ba 100644
--- a/test/asan/TestCases/Linux/asan_prelink_test.cc
+++ b/test/asan/TestCases/Linux/asan_prelink_test.cc
@@ -10,7 +10,7 @@
 // RUN: %env_asan_opts=verbosity=1 %run %t 2>&1 | FileCheck %s
 
 // GNU driver doesn't handle .so files properly.
-// REQUIRES: x86_64-supported-target, asan-64-bits, Clang
+// REQUIRES: x86_64-target-arch, Clang
 #if BUILD_SO
 int G;
 int *getG() {
diff --git a/test/asan/TestCases/Linux/clang_gcc_abi.cc b/test/asan/TestCases/Linux/clang_gcc_abi.cc
index 669d152..845f412 100644
--- a/test/asan/TestCases/Linux/clang_gcc_abi.cc
+++ b/test/asan/TestCases/Linux/clang_gcc_abi.cc
@@ -3,7 +3,7 @@
 // RUN: %clangxx_asan -O2 -x c %s -o %t && not %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O3 -x c %s -o %t && not %run %t 2>&1 | FileCheck %s
 
-// REQUIRES: arm-supported-target
+// REQUIRES: arm-target-arch
 // XFAIL: armv7l-unknown-linux-gnueabihf
 
 #include <stdlib.h>
diff --git a/test/asan/TestCases/Linux/clone_test.cc b/test/asan/TestCases/Linux/clone_test.cc
index e9c1f16..f6eb261 100644
--- a/test/asan/TestCases/Linux/clone_test.cc
+++ b/test/asan/TestCases/Linux/clone_test.cc
@@ -22,7 +22,7 @@
 
 int main(int argc, char **argv) {
   const int kStackSize = 1 << 20;
-  char child_stack[kStackSize + 1];
+  char __attribute__((aligned(16))) child_stack[kStackSize + 1];
   char *sp = child_stack + kStackSize;  // Stack grows down.
   printf("Parent: %p\n", sp);
   pid_t clone_pid = clone(Child, sp, CLONE_FILES | CLONE_VM, NULL);
diff --git a/test/asan/TestCases/Linux/coverage-missing.cc b/test/asan/TestCases/Linux/coverage-missing.cc
index 6cd3201..49487d3 100644
--- a/test/asan/TestCases/Linux/coverage-missing.cc
+++ b/test/asan/TestCases/Linux/coverage-missing.cc
@@ -43,7 +43,7 @@
 // RUN: %sancov missing %dynamiclib < foo.txt > foo-missing.txt
 // RUN: ( diff bar.txt foo-missing.txt || true ) | not grep "^<"
 
-// REQUIRES: x86_64-supported-target, i386-supported-target
+// REQUIRES: x86-target-arch
 // XFAIL: android
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Linux/coverage_html_report.cc b/test/asan/TestCases/Linux/coverage_html_report.cc
new file mode 100644
index 0000000..78fbfb3
--- /dev/null
+++ b/test/asan/TestCases/Linux/coverage_html_report.cc
@@ -0,0 +1,24 @@
+// REQUIRES: has_sancovcc, x86_64-linux, asan-dynamic-runtime
+// RUN: %clangxx_asan_static -fsanitize-coverage=func %s -o %t
+// RUN: rm -rf %T/coverage_html_report
+// RUN: mkdir -p %T/coverage_html_report
+// RUN: cd %T/coverage_html_report
+// RUN: %env_asan_opts=coverage=1:verbosity=1:html_cov_report=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-main
+// RUN: ls *.html | FileCheck %s --check-prefix=CHECK-ls
+// RUN: rm -r %T/coverage_html_report
+
+#include <stdio.h>
+#include <unistd.h>
+
+void bar() { printf("bar\n"); }
+
+int main(int argc, char **argv) {
+  fprintf(stderr, "PID: %d\n", getpid());
+  bar();
+  return 0;
+}
+
+// CHECK-main: PID: [[PID:[0-9]+]]
+// CHECK-main: [[PID]].sancov: 2 PCs written
+// CHECK-main: coverage report generated to ./coverage_html_report.cc.tmp.[[PID]].html
+// CHECK-ls: coverage_html_report.cc.tmp.{{[0-9]+}}.html
diff --git a/test/asan/TestCases/Linux/interface_symbols_linux.c b/test/asan/TestCases/Linux/interface_symbols_linux.c
index 971feb5..2e64857 100644
--- a/test/asan/TestCases/Linux/interface_symbols_linux.c
+++ b/test/asan/TestCases/Linux/interface_symbols_linux.c
@@ -56,6 +56,6 @@
 
 // FIXME: nm -D on powerpc somewhy shows ASan interface symbols residing
 // in "initialized data section".
-// REQUIRES: x86_64-supported-target,i386-supported-target,asan-static-runtime
+// REQUIRES: x86-target-arch,asan-static-runtime
 
 int main() { return 0; }
diff --git a/test/asan/TestCases/Linux/kernel-area.cc b/test/asan/TestCases/Linux/kernel-area.cc
index c0f1727..d7a544f 100644
--- a/test/asan/TestCases/Linux/kernel-area.cc
+++ b/test/asan/TestCases/Linux/kernel-area.cc
@@ -16,7 +16,7 @@
 // CHECK-kernel-64-bits: || `[0x28{{0+}}, 0x3{{f+}}]` || HighShadow ||
 // CHECK-kernel-64-bits: || `[0x24{{0+}}, 0x27{{f+}}]` || ShadowGap  ||
 //
-// REQUIRES: asan-32-bits,i386-supported-target
+// REQUIRES: i386-target-arch
 
 int main() {
   return 0;
diff --git a/test/asan/TestCases/Linux/leak_check_segv.cc b/test/asan/TestCases/Linux/leak_check_segv.cc
index 8160d5f..2a2010f 100644
--- a/test/asan/TestCases/Linux/leak_check_segv.cc
+++ b/test/asan/TestCases/Linux/leak_check_segv.cc
@@ -1,5 +1,5 @@
 // Test that SIGSEGV during leak checking does not crash the process.
-// RUN: %clangxx_asan -O1 %s -o %t && LSAN_OPTIONS="verbosity=1" not %run %t 2>&1
+// RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
 // REQUIRES: leak-detection
 #include <stdlib.h>
 #include <stdio.h>
@@ -11,7 +11,7 @@
 int main() {
   void *p = malloc(10 * 1024 * 1024);
   // surprise-surprise!
-  mprotect((void*)(((unsigned long)p + 4095) & ~4095), 16 * 1024, PROT_NONE); 
+  mprotect((void*)(((unsigned long)p + 4095) & ~4095), 16 * 1024, PROT_NONE);
   mprotect((void*)(((unsigned long)data + 4095) & ~4095), 16 * 1024, PROT_NONE);
   __lsan_do_leak_check();
   fprintf(stderr, "DONE\n");
@@ -19,5 +19,5 @@
 
 // CHECK: Tracer caught signal 11
 // CHECK: LeakSanitizer has encountered a fatal error
+// CHECK: HINT: For debugging, try setting {{.*}} LSAN_OPTIONS
 // CHECK-NOT: DONE
-
diff --git a/test/asan/TestCases/Linux/local_alias.cc b/test/asan/TestCases/Linux/local_alias.cc
new file mode 100644
index 0000000..8c80f87
--- /dev/null
+++ b/test/asan/TestCases/Linux/local_alias.cc
@@ -0,0 +1,43 @@
+// Test that mixing instrumented and non-instrumented code doesn't lead to crash.
+// Build two modules (one is instrumented, another is not) that have globals
+// with same names. Check, that ASan doesn't crash with CHECK failure or
+// false positive global-buffer-overflow due to sanitized library poisons
+// globals from non-sanitized one.
+//
+// FIXME: https://github.com/google/sanitizers/issues/316
+// XFAIL: android
+//
+// This test requires the integrated assembler to be the default.
+// XFAIL: target-is-mips64
+// XFAIL: target-is-mips64el
+//
+// RUN: %clangxx_asan -DBUILD_INSTRUMENTED_DSO=1 -fPIC -shared -mllvm -asan-use-private-alias %s -o %t-INSTRUMENTED-SO.so
+// RUN: %clangxx -DBUILD_UNINSTRUMENTED_DSO=1 -fPIC -shared %s -o %t-UNINSTRUMENTED-SO.so
+// RUN: %clangxx %s -c -mllvm -asan-use-private-alias -o %t.o
+// RUN: %clangxx_asan %t.o %t-UNINSTRUMENTED-SO.so %t-INSTRUMENTED-SO.so -o %t-EXE
+// RUN: %env_asan_opts=use_odr_indicator=true %run %t-EXE
+
+#if defined (BUILD_INSTRUMENTED_DSO)
+long h = 15;
+long f = 4;
+long foo(long *p) {
+  return *p;
+}
+#elif defined (BUILD_UNINSTRUMENTED_DSO)
+long foo(long *);
+long h = 12;
+long i = 13;
+long f = 5;
+
+int bar() {
+  if (foo(&f) != 5 || foo(&h) != 12 || foo(&i) != 13)
+    return 1;
+  return 0;
+}
+#else
+extern int bar();
+
+int main() {
+  return bar();
+}
+#endif
diff --git a/test/asan/TestCases/Linux/malloc-in-qsort.cc b/test/asan/TestCases/Linux/malloc-in-qsort.cc
index e8c9b74..ea23924 100644
--- a/test/asan/TestCases/Linux/malloc-in-qsort.cc
+++ b/test/asan/TestCases/Linux/malloc-in-qsort.cc
@@ -7,7 +7,7 @@
 // https://code.google.com/p/address-sanitizer/issues/detail?id=137
 
 // Fast unwinder is only available on x86_64 and i386.
-// REQUIRES: x86_64-supported-target
+// REQUIRES: x86-target-arch
 
 // REQUIRES: compiler-rt-optimized
 
diff --git a/test/asan/TestCases/Linux/memmem_test.cc b/test/asan/TestCases/Linux/memmem_test.cc
new file mode 100644
index 0000000..661381c
--- /dev/null
+++ b/test/asan/TestCases/Linux/memmem_test.cc
@@ -0,0 +1,24 @@
+// RUN: %clangxx_asan  %s -o %t
+// RUN: not %run %t   2>&1 | FileCheck %s --check-prefix=A1
+// RUN: not %run %t 1 2>&1 | FileCheck %s --check-prefix=A2
+// RUN: %env_asan_opts=intercept_memmem=0 %run %t
+
+#include <string.h>
+int main(int argc, char **argv) {
+  char a1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  char a2[] = {3, 4, 5};
+  void *res;
+  if (argc == 1)
+    res = memmem(a1, sizeof(a1) + 1, a2, sizeof(a2));  // BOOM
+  else
+    res = memmem(a1, sizeof(a1), a2, sizeof(a2) + 1);  // BOOM
+  // A1: AddressSanitizer: stack-buffer-overflow
+  // A1: {{#0.*memmem}}
+  // A1-NEXT: {{#1.*main}}
+  // A1: 'a1' <== Memory access at offset
+  //
+  // A2: AddressSanitizer: stack-buffer-overflow
+  // A2: {{#0.*memmem}}
+  // A2: 'a2' <== Memory access at offset
+  return res == NULL;
+}
diff --git a/test/asan/TestCases/Linux/new_delete_mismatch.cc b/test/asan/TestCases/Linux/new_delete_mismatch.cc
new file mode 100644
index 0000000..1cfc0ef
--- /dev/null
+++ b/test/asan/TestCases/Linux/new_delete_mismatch.cc
@@ -0,0 +1,16 @@
+// Check that we report new[] vs delete as alloc-dealloc-mismatch and not as
+// new-delete-type-mismatch when -fsized-deallocation is enabled.
+
+// RUN: %clangxx_asan -g %s -o %t && not %run %t |& FileCheck %s
+// RUN: %clangxx_asan -fsized-deallocation -g %s -o %t && not %run %t |& FileCheck %s
+
+#include <stdlib.h>
+
+static volatile char *x;
+
+int main() {
+  x = new char[10];
+  delete x;
+}
+
+// CHECK: AddressSanitizer: alloc-dealloc-mismatch (operator new [] vs operator delete) on 0x
diff --git a/test/asan/TestCases/Linux/nohugepage_test.cc b/test/asan/TestCases/Linux/nohugepage_test.cc
index 2758f0a..ce8f17e 100644
--- a/test/asan/TestCases/Linux/nohugepage_test.cc
+++ b/test/asan/TestCases/Linux/nohugepage_test.cc
@@ -9,7 +9,7 @@
 // Would be great to run the test with no_huge_pages_for_shadow=0, but
 // the result will depend on the OS version and settings...
 //
-// REQUIRES: x86_64-supported-target, asan-64-bits
+// REQUIRES: x86_64-target-arch
 //
 // WARNING: this test is very subtle and may nto work on some systems.
 // If this is the case we'll need to futher improve it or disable it.
diff --git a/test/asan/TestCases/Linux/odr-violation.cc b/test/asan/TestCases/Linux/odr-violation.cc
index bc76116..d909143 100644
--- a/test/asan/TestCases/Linux/odr-violation.cc
+++ b/test/asan/TestCases/Linux/odr-violation.cc
@@ -1,6 +1,10 @@
 // FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
 // XFAIL: android
 //
+// This test requires the integrated assembler to be the default.
+// XFAIL: target-is-mips64
+// XFAIL: target-is-mips64el
+//
 // We use fast_unwind_on_malloc=0 to have full unwinding even w/o frame
 // pointers. This setting is not on by default because it's too expensive.
 //
@@ -22,6 +26,13 @@
 // RUN: echo "odr_violation:foo::G" > %t.supp
 // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=2:suppressions=%t.supp      %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED
 // RUN: rm -f %t.supp
+//
+// Use private aliases for global variables: use indicator symbol to detect ODR violation.
+// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared -mllvm -asan-use-private-alias %s -o %t-ODR-SO.so -DSZ=100
+// RUN: %clangxx_asan -mllvm -asan-use-private-alias %s %t-ODR-SO.so -Wl,-R. -o %t-ODR-EXE
+// RUN: %env_asan_opts=fast_unwind_on_malloc=0                              %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED
+// RUN: %env_asan_opts=fast_unwind_on_malloc=0:use_odr_indicator=false      %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED
+// RUN: %env_asan_opts=fast_unwind_on_malloc=0:use_odr_indicator=true   not %run %t-ODR-EXE 2>&1 | FileCheck %s
 
 // GNU driver doesn't handle .so files properly.
 // REQUIRES: Clang
diff --git a/test/asan/TestCases/Linux/odr_c_test.c b/test/asan/TestCases/Linux/odr_c_test.c
new file mode 100644
index 0000000..b1d2349
--- /dev/null
+++ b/test/asan/TestCases/Linux/odr_c_test.c
@@ -0,0 +1,28 @@
+// Test that we can properly report an ODR violation
+// between an instrumented global and a non-instrumented global.
+
+// RUN: %clang_asan %s -fPIC -shared -o %t-1.so  -DFILE1
+// RUN: %clang_asan %s -fPIC -shared -o %t-2.so  -DFILE2
+// RUN: %clang_asan %s -fPIE %t-1.so %t-2.so -Wl,-R`pwd` -o %t
+// RUN: not %run %t 2>&1 | FileCheck %s
+//
+// REQUIRES: x86_64-target-arch
+//
+// CHECK: The following global variable is not properly aligned.
+// CHECK: ERROR: AddressSanitizer: odr-violation
+#if defined(FILE1)
+__attribute__((aligned(8))) int x;
+__attribute__((aligned(1))) char y;
+// The gold linker puts ZZZ at the start of bss (where it is aligned)
+// unless we have a large alternative like Displace:
+__attribute__((aligned(1))) char Displace[105];
+__attribute__((aligned(1))) char ZZZ[100];
+#elif defined(FILE2)
+int ZZZ = 1;
+#else
+extern int ZZZ;
+int main() {
+  return ZZZ;
+}
+#endif
+
diff --git a/test/asan/TestCases/Linux/overflow-in-qsort.cc b/test/asan/TestCases/Linux/overflow-in-qsort.cc
index dc3918e..6990518 100644
--- a/test/asan/TestCases/Linux/overflow-in-qsort.cc
+++ b/test/asan/TestCases/Linux/overflow-in-qsort.cc
@@ -7,7 +7,7 @@
 // https://code.google.com/p/address-sanitizer/issues/detail?id=137
 
 // Fast unwinder is only available on x86_64 and i386.
-// REQUIRES: x86_64-supported-target
+// REQUIRES: x86-target-arch
 
 #include <stdlib.h>
 #include <stdio.h>
diff --git a/test/asan/TestCases/Linux/print_memory_profile_test.cc b/test/asan/TestCases/Linux/print_memory_profile_test.cc
new file mode 100644
index 0000000..d30dbea
--- /dev/null
+++ b/test/asan/TestCases/Linux/print_memory_profile_test.cc
@@ -0,0 +1,29 @@
+// Printing memory profiling only works in the configuration where we can
+// detect leaks.
+// REQUIRES: leak-detection
+//
+// RUN: %clangxx_asan %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+#include <sanitizer/common_interface_defs.h>
+
+#include <stdio.h>
+
+char *sink[1000];
+
+int main() {
+  int idx = 0;
+  for (int i = 0; i < 17; i++)
+    sink[idx++] = new char[131000];
+  for (int i = 0; i < 28; i++)
+    sink[idx++] = new char[24000];
+
+  __sanitizer_print_memory_profile(100);
+  __sanitizer_print_memory_profile(50);
+}
+
+// CHECK: Live Heap Allocations: {{.*}}; showing top 100%
+// CHECK: 2227000 byte(s) ({{.*}}%) in 17 allocation(s)
+// CHECK: 672000 byte(s) ({{.*}}%) in 28 allocation(s)
+// CHECK: Live Heap Allocations: {{.*}}; showing top 50%
+// CHECK: 2227000 byte(s) ({{.*}}%) in 17 allocation(s)
+// CHECK-NOT: 1008 byte
diff --git a/test/asan/TestCases/Linux/ptrace.cc b/test/asan/TestCases/Linux/ptrace.cc
index d87d90b..bd3d2d2 100644
--- a/test/asan/TestCases/Linux/ptrace.cc
+++ b/test/asan/TestCases/Linux/ptrace.cc
@@ -59,6 +59,13 @@
 #define PRINT_REG_PC(__regs)    printf ("%x\n", (unsigned) (__regs.ARM_pc))
 #define PRINT_REG_FP(__fpregs)  printf ("%x\n", (unsigned) (__fpregs + 32 * 8))
 #define __PTRACE_FPREQUEST PTRACE_GETVFPREGS
+
+#elif defined(__s390__)
+typedef _user_regs_struct   regs_struct;
+typedef _user_fpregs_struct fpregs_struct;
+#define PRINT_REG_PC(__regs)    printf ("%lx\n", (unsigned long) (__regs.psw.addr))
+#define PRINT_REG_FP(__fpregs)  printf ("%lx\n", (unsigned long) (__fpregs.fpc))
+#define ARCH_IOVEC_FOR_GETREGSET
 #endif
 
 
diff --git a/test/asan/TestCases/Linux/recvfrom.cc b/test/asan/TestCases/Linux/recvfrom.cc
new file mode 100644
index 0000000..9c6eec3
--- /dev/null
+++ b/test/asan/TestCases/Linux/recvfrom.cc
@@ -0,0 +1,81 @@
+// Test that ASan detects buffer overflow on read from socket via recvfrom.
+//
+// RUN: %clangxx_asan %s -DRECVFROM -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-RECVFROM
+// RUN: %clangxx_asan %s -DSENDTO -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-SENDTO
+// RUN: %clangxx_asan %s -DSENDTO -o %t && %env_asan_opts=intercept_send=0 %run %t 2>&1
+//
+// UNSUPPORTED: android
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <pthread.h>
+
+#define CHECK_ERROR(p, m)                                                      \
+  do {                                                                         \
+    if (p) {                                                                   \
+      fprintf(stderr, "ERROR " m "\n");                                        \
+      exit(1);                                                                 \
+    }                                                                          \
+  } while (0)
+
+const int kBufSize = 10;
+int sockfd;
+
+static void *client_thread_udp(void *data) {
+#ifdef SENDTO
+  const char buf[kBufSize / 2] = {0, };
+#else
+  const char buf[kBufSize] = {0, };
+#endif
+  struct sockaddr_in serveraddr;
+  socklen_t addrlen = sizeof(serveraddr);
+
+  int succeeded = getsockname(sockfd, (struct sockaddr *)&serveraddr, &addrlen);
+  CHECK_ERROR(succeeded < 0, "in getsockname");
+
+  succeeded = sendto(sockfd, buf, kBufSize, 0, (struct sockaddr *)&serveraddr,
+                     sizeof(serveraddr));
+  // CHECK-SENDTO: {{READ of size 10 at 0x.* thread T1}}
+  // CHECK-SENDTO: {{    #1 0x.* in client_thread_udp.*recvfrom.cc:}}[[@LINE-3]]
+  CHECK_ERROR(succeeded < 0, "in sending message");
+  return NULL;
+}
+
+int main() {
+#ifdef RECVFROM
+  char buf[kBufSize / 2];
+#else
+  char buf[kBufSize];
+#endif
+  pthread_t client_thread;
+  struct sockaddr_in serveraddr;
+
+  sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+  CHECK_ERROR(sockfd < 0, "opening socket");
+
+  memset(&serveraddr, 0, sizeof(serveraddr));
+  serveraddr.sin_family = AF_INET;
+  serveraddr.sin_addr.s_addr = htonl(INADDR_ANY);
+  serveraddr.sin_port = 0;
+
+  int bound = bind(sockfd, (struct sockaddr *)&serveraddr, sizeof(serveraddr));
+  CHECK_ERROR(bound < 0, "on binding");
+
+  int succeeded =
+      pthread_create(&client_thread, NULL, client_thread_udp, &serveraddr);
+  CHECK_ERROR(succeeded, "creating thread");
+
+  recvfrom(sockfd, buf, kBufSize, 0, NULL, NULL); // BOOM
+  // CHECK-RECVFROM: {{WRITE of size 10 at 0x.* thread T0}}
+  // CHECK-RECVFROM: {{    #1 0x.* in main.*recvfrom.cc:}}[[@LINE-2]]
+  // CHECK-RECVFROM: {{Address 0x.* is located in stack of thread T0 at offset}}
+  // CHECK-RECVFROM-NEXT: in{{.*}}main{{.*}}recvfrom.cc
+  succeeded = pthread_join(client_thread, NULL);
+  CHECK_ERROR(succeeded, "joining thread");
+  return 0;
+}
diff --git a/test/asan/TestCases/Linux/scariness_score_test.cc b/test/asan/TestCases/Linux/scariness_score_test.cc
new file mode 100644
index 0000000..2485413
--- /dev/null
+++ b/test/asan/TestCases/Linux/scariness_score_test.cc
@@ -0,0 +1,192 @@
+// Test how we produce the scariness score.
+
+// RUN: %clangxx_asan -O0 %s -o %t
+// RUN: export %env_asan_opts=detect_stack_use_after_return=1:handle_abort=1:print_scariness=1
+// Make sure the stack is limited (may not be the default under GNU make)
+// RUN: ulimit -s 4096
+// RUN: not %run %t  1 2>&1 | FileCheck %s --check-prefix=CHECK1
+// RUN: not %run %t  2 2>&1 | FileCheck %s --check-prefix=CHECK2
+// RUN: not %run %t  3 2>&1 | FileCheck %s --check-prefix=CHECK3
+// RUN: not %run %t  4 2>&1 | FileCheck %s --check-prefix=CHECK4
+// RUN: not %run %t  5 2>&1 | FileCheck %s --check-prefix=CHECK5
+// RUN: not %run %t  6 2>&1 | FileCheck %s --check-prefix=CHECK6
+// RUN: not %run %t  7 2>&1 | FileCheck %s --check-prefix=CHECK7
+// RUN: not %run %t  8 2>&1 | FileCheck %s --check-prefix=CHECK8
+// RUN: not %run %t  9 2>&1 | FileCheck %s --check-prefix=CHECK9
+// RUN: not %run %t 10 2>&1 | FileCheck %s --check-prefix=CHECK10
+// RUN: not %run %t 11 2>&1 | FileCheck %s --check-prefix=CHECK11
+// RUN: not %run %t 12 2>&1 | FileCheck %s --check-prefix=CHECK12
+// RUN: not %run %t 13 2>&1 | FileCheck %s --check-prefix=CHECK13
+// RUN: not %run %t 14 2>&1 | FileCheck %s --check-prefix=CHECK14
+// RUN: not %run %t 15 2>&1 | FileCheck %s --check-prefix=CHECK15
+// RUN: not %run %t 16 2>&1 | FileCheck %s --check-prefix=CHECK16
+// RUN: not %run %t 17 2>&1 | FileCheck %s --check-prefix=CHECK17
+// RUN: not %run %t 18 2>&1 | FileCheck %s --check-prefix=CHECK18
+// RUN: not %run %t 19 2>&1 | FileCheck %s --check-prefix=CHECK19
+// RUN: not %run %t 20 2>&1 | FileCheck %s --check-prefix=CHECK20
+// RUN: not %run %t 21 2>&1 | FileCheck %s --check-prefix=CHECK21
+// RUN: not %run %t 22 2>&1 | FileCheck %s --check-prefix=CHECK22
+// RUN: not %run %t 23 2>&1 | FileCheck %s --check-prefix=CHECK23
+// RUN: not %run %t 24 2>&1 | FileCheck %s --check-prefix=CHECK24
+// RUN: not %run %t 25 2>&1 | FileCheck %s --check-prefix=CHECK25
+// RUN: not %run %t 26 2>&1 | FileCheck %s --check-prefix=CHECK26
+// RUN: not %run %t 27 2>&1 | FileCheck %s --check-prefix=CHECK27
+// Parts of the test are too platform-specific:
+// REQUIRES: x86_64-target-arch
+// REQUIRES: shell
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <sanitizer/asan_interface.h>
+
+enum ReadOrWrite { Read = 0, Write = 1 };
+
+struct S32 {
+  char x[32];
+};
+
+template<class T>
+void HeapBuferOverflow(int Idx, ReadOrWrite w) {
+  T *t = new T[100];
+  static T sink;
+  if (w)
+    t[100 + Idx] = T();
+  else
+    sink = t[100 + Idx];
+  delete [] t;
+}
+
+template<class T>
+void HeapUseAfterFree(int Idx, ReadOrWrite w) {
+  T *t = new T[100];
+  static T sink;
+  sink = t[0];
+  delete [] t;
+  if (w)
+    t[Idx] = T();
+  else
+    sink = t[Idx];
+}
+
+template<class T>
+void StackBufferOverflow(int Idx, ReadOrWrite w) {
+  T t[100];
+  static T sink;
+  sink = t[Idx];
+  if (w)
+    t[100 + Idx] = T();
+  else
+    sink = t[100 + Idx];
+}
+
+template<class T>
+T *LeakStack() {
+  T t[100];
+  static volatile T *x;
+  x = &t[0];
+  return (T*)x;
+}
+
+template<class T>
+void StackUseAfterReturn(int Idx, ReadOrWrite w) {
+  static T sink;
+  T *t = LeakStack<T>();
+  if (w)
+    t[100 + Idx] = T();
+  else
+    sink = t[100 + Idx];
+}
+
+char    g1[100];
+short   g2[100];
+int     g4[100];
+int64_t g8[100];
+S32     gm[100];
+
+void DoubleFree() {
+  int *x = new int;
+  static volatile int two = 2;
+  for (int i = 0; i < two; i++)
+    delete x;
+}
+
+void StackOverflow(int Idx) {
+  int some_stack[10000];
+  static volatile int *x;
+  x = &some_stack[0];
+  if (Idx > 0)
+    StackOverflow(Idx - 1);
+}
+
+void UseAfterPoison() {
+  int buf[100];
+  __asan_poison_memory_region(buf, sizeof(buf));
+  static volatile int sink;
+  sink = buf[42];
+}
+
+int main(int argc, char **argv) {
+  char arr[100];
+  static volatile int zero = 0;
+  static volatile int *zero_ptr = 0;
+  static volatile int *wild_addr = (int*)0x10000000; // System-dependent.
+  if (argc != 2) return 1;
+  int kind = atoi(argv[1]);
+  switch (kind) {
+    case 1: HeapBuferOverflow<char>(0, Read); break;
+    case 2: HeapBuferOverflow<int>(0, Read); break;
+    case 3: HeapBuferOverflow<short>(0, Write); break;
+    case 4: HeapBuferOverflow<int64_t>(2, Write); break;
+    case 5: HeapBuferOverflow<S32>(4, Write); break;
+    case 6: HeapUseAfterFree<char>(0, Read); break;
+    case 7: HeapUseAfterFree<int>(0, Write); break;
+    case 8: HeapUseAfterFree<int64_t>(0, Read); break;
+    case 9: HeapUseAfterFree<S32>(0, Write); break;
+    case 10: StackBufferOverflow<char>(0, Write); break;
+    case 11: StackBufferOverflow<int64_t>(0, Read); break;
+    case 12: StackBufferOverflow<int>(4, Write); break;
+    case 13: StackUseAfterReturn<char>(0, Read); break;
+    case 14: StackUseAfterReturn<S32>(0, Write); break;
+    case 15: g1[zero + 100] = 0; break;
+    case 16: gm[0] = gm[zero + 100 + 1]; break;
+    case 17: DoubleFree(); break;
+    case 18: StackOverflow(1000000); break;
+    case 19: *zero_ptr = 0; break;
+    case 20: *wild_addr = 0; break;
+    case 21: zero = *wild_addr; break;
+    case 22: abort(); break;
+    case 23: ((void (*)(void))wild_addr)(); break;
+    case 24: delete (new int[10]); break;
+    case 25: free((char*)malloc(100) + 10); break;
+    case 26: memcpy(arr, arr+10, 20);  break;
+    case 27: UseAfterPoison(); break;
+    // CHECK1: SCARINESS: 12 (1-byte-read-heap-buffer-overflow)
+    // CHECK2: SCARINESS: 17 (4-byte-read-heap-buffer-overflow)
+    // CHECK3: SCARINESS: 33 (2-byte-write-heap-buffer-overflow)
+    // CHECK4: SCARINESS: 52 (8-byte-write-heap-buffer-overflow-far-from-bounds)
+    // CHECK5: SCARINESS: 55 (multi-byte-write-heap-buffer-overflow-far-from-bounds)
+    // CHECK6: SCARINESS: 40 (1-byte-read-heap-use-after-free)
+    // CHECK7: SCARINESS: 46 (4-byte-write-heap-use-after-free)
+    // CHECK8: SCARINESS: 51 (8-byte-read-heap-use-after-free)
+    // CHECK9: SCARINESS: 55 (multi-byte-write-heap-use-after-free)
+    // CHECK10: SCARINESS: 46 (1-byte-write-stack-buffer-overflow)
+    // CHECK11: SCARINESS: 38 (8-byte-read-stack-buffer-overflow)
+    // CHECK12: SCARINESS: 61 (4-byte-write-stack-buffer-overflow-far-from-bounds)
+    // CHECK13: SCARINESS: 50 (1-byte-read-stack-use-after-return)
+    // CHECK14: SCARINESS: 65 (multi-byte-write-stack-use-after-return)
+    // CHECK15: SCARINESS: 31 (1-byte-write-global-buffer-overflow)
+    // CHECK16: SCARINESS: 36 (multi-byte-read-global-buffer-overflow-far-from-bounds)
+    // CHECK17: SCARINESS: 42 (double-free)
+    // CHECK18: SCARINESS: 10 (stack-overflow)
+    // CHECK19: SCARINESS: 10 (null-deref)
+    // CHECK20: SCARINESS: 30 (wild-addr-write)
+    // CHECK21: SCARINESS: 20 (wild-addr-read)
+    // CHECK22: SCARINESS: 10 (signal)
+    // CHECK23: SCARINESS: 60 (wild-jump)
+    // CHECK24: SCARINESS: 10 (alloc-dealloc-mismatch)
+    // CHECK25: SCARINESS: 40 (bad-free)
+    // CHECK26: SCARINESS: 10 (memcpy-param-overlap)
+    // CHECK27: SCARINESS: 27 (4-byte-read-use-after-poison)
+  }
+}
diff --git a/test/asan/TestCases/Linux/segv_read_write.c b/test/asan/TestCases/Linux/segv_read_write.c
new file mode 100644
index 0000000..b137970
--- /dev/null
+++ b/test/asan/TestCases/Linux/segv_read_write.c
@@ -0,0 +1,26 @@
+// RUN: %clangxx_asan -std=c++11 -O0 %s -o %t
+// RUN: not %run %t       2>&1 | FileCheck %s --check-prefix=READ
+// RUN: not %run %t write 2>&1 | FileCheck %s --check-prefix=WRITE
+// UNSUPPORTED: powerpc64,mips,s390
+
+#include <sys/mman.h>
+
+static volatile int sink;
+__attribute__((noinline)) void Read(int *ptr) { sink = *ptr; }
+__attribute__((noinline)) void Write(int *ptr) { *ptr = 0; }
+int main(int argc, char **argv) {
+  // Writes to shadow are detected as reads from shadow gap (because of how the
+  // shadow mapping works). This is kinda hard to fix. Test a random address in
+  // the application part of the address space.
+  void *volatile p =
+      mmap(nullptr, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+  munmap(p, 4096);
+  if (argc == 1)
+    Read((int *)p);
+  else
+    Write((int *)p);
+}
+// READ: AddressSanitizer: SEGV on unknown address
+// READ: The signal is caused by a READ memory access.
+// WRITE: AddressSanitizer: SEGV on unknown address
+// WRITE: The signal is caused by a WRITE memory access.
diff --git a/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cc b/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cc
new file mode 100644
index 0000000..e996659
--- /dev/null
+++ b/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cc
@@ -0,0 +1,36 @@
+// Test that ASan doesn't hang on stack overflow in recovery mode.
+//
+// RUN: %clang_asan -O0 -fsanitize-recover=address %s -o %t
+// RUN: %env_asan_opts=halt_on_error=false not %run %t 2>&1 | FileCheck %s
+
+#include <assert.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+
+static volatile int *recurse(volatile int n, volatile int *p) {
+  // CHECK: {{stack-overflow on address 0x.* \(pc 0x.* bp 0x.* sp 0x.* T.*\)}}
+  if (n >= 0) *recurse(n + 1, p) += n;
+  return p;
+}
+
+
+void LimitStackAndReexec(int argc, char **argv) {
+  struct rlimit rlim;
+  int res = getrlimit(RLIMIT_STACK, &rlim);
+  assert(res == 0);
+  if (rlim.rlim_cur == RLIM_INFINITY) {
+    rlim.rlim_cur = 256 * 1024;
+    res = setrlimit(RLIMIT_STACK, &rlim);
+    assert(res == 0);
+
+    execv(argv[0], argv);
+    assert(0 && "unreachable");
+  }
+}
+
+int main(int argc, char **argv) {
+  LimitStackAndReexec(argc, argv);
+  volatile int res;
+  return *recurse(argc + 1, &res);
+}
diff --git a/test/asan/TestCases/Linux/static_tls.cc b/test/asan/TestCases/Linux/static_tls.cc
index 11bb1a4..5e569dd 100644
--- a/test/asan/TestCases/Linux/static_tls.cc
+++ b/test/asan/TestCases/Linux/static_tls.cc
@@ -10,6 +10,8 @@
 // CHECK: after
 
 // XFAIL: aarch64
+// binutils 2.26 has a change that causes this test to fail on powerpc64.
+// UNSUPPORTED: powerpc64 
 
 #ifndef SHARED
 #include <stdio.h>
diff --git a/test/asan/TestCases/Linux/swapcontext_annotation.cc b/test/asan/TestCases/Linux/swapcontext_annotation.cc
new file mode 100644
index 0000000..90aabae
--- /dev/null
+++ b/test/asan/TestCases/Linux/swapcontext_annotation.cc
@@ -0,0 +1,178 @@
+// Check that ASan plays well with annotated makecontext/swapcontext.
+
+// RUN: %clangxx_asan -lpthread -O0 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -lpthread -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -lpthread -O2 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -lpthread -O3 %s -o %t && %run %t 2>&1 | FileCheck %s
+//
+// This test is too subtle to try on non-x86 arch for now.
+// REQUIRES: x86_64-supported-target,i386-supported-target
+
+#include <pthread.h>
+#include <setjmp.h>
+#include <stdio.h>
+#include <sys/time.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include <sanitizer/common_interface_defs.h>
+
+ucontext_t orig_context;
+ucontext_t child_context;
+ucontext_t next_child_context;
+
+char *next_child_stack;
+
+const int kStackSize = 1 << 20;
+
+void *main_thread_stack;
+size_t main_thread_stacksize;
+
+__attribute__((noinline, noreturn)) void LongJump(jmp_buf env) {
+  longjmp(env, 1);
+  _exit(1);
+}
+
+// Simulate __asan_handle_no_return().
+__attribute__((noinline)) void CallNoReturn() {
+  jmp_buf env;
+  if (setjmp(env) != 0) return;
+
+  LongJump(env);
+  _exit(1);
+}
+
+void NextChild() {
+  CallNoReturn();
+  __sanitizer_finish_switch_fiber();
+
+  char x[32] = {0};  // Stack gets poisoned.
+  printf("NextChild: %p\n", x);
+
+  CallNoReturn();
+
+  __sanitizer_start_switch_fiber(main_thread_stack, main_thread_stacksize);
+  CallNoReturn();
+  if (swapcontext(&next_child_context, &orig_context) < 0) {
+    perror("swapcontext");
+    _exit(1);
+  }
+}
+
+void Child(int mode) {
+  CallNoReturn();
+  __sanitizer_finish_switch_fiber();
+  char x[32] = {0};  // Stack gets poisoned.
+  printf("Child: %p\n", x);
+  CallNoReturn();
+  // (a) Do nothing, just return to parent function.
+  // (b) Jump into the original function. Stack remains poisoned unless we do
+  //     something.
+  // (c) Jump to another function which will then jump back to the main function
+  if (mode == 0) {
+    __sanitizer_start_switch_fiber(main_thread_stack, main_thread_stacksize);
+    CallNoReturn();
+  } else if (mode == 1) {
+    __sanitizer_start_switch_fiber(main_thread_stack, main_thread_stacksize);
+    CallNoReturn();
+    if (swapcontext(&child_context, &orig_context) < 0) {
+      perror("swapcontext");
+      _exit(1);
+    }
+  } else if (mode == 2) {
+    getcontext(&next_child_context);
+    next_child_context.uc_stack.ss_sp = next_child_stack;
+    next_child_context.uc_stack.ss_size = kStackSize / 2;
+    makecontext(&next_child_context, (void (*)())NextChild, 0);
+    __sanitizer_start_switch_fiber(next_child_context.uc_stack.ss_sp,
+                                   next_child_context.uc_stack.ss_size);
+    CallNoReturn();
+    if (swapcontext(&child_context, &next_child_context) < 0) {
+      perror("swapcontext");
+      _exit(1);
+    }
+  }
+}
+
+int Run(int arg, int mode, char *child_stack) {
+  printf("Child stack: %p\n", child_stack);
+  // Setup child context.
+  getcontext(&child_context);
+  child_context.uc_stack.ss_sp = child_stack;
+  child_context.uc_stack.ss_size = kStackSize / 2;
+  if (mode == 0) {
+    child_context.uc_link = &orig_context;
+  }
+  makecontext(&child_context, (void (*)())Child, 1, mode);
+  CallNoReturn();
+  __sanitizer_start_switch_fiber(child_context.uc_stack.ss_sp,
+                                 child_context.uc_stack.ss_size);
+  CallNoReturn();
+  if (swapcontext(&orig_context, &child_context) < 0) {
+    perror("swapcontext");
+    _exit(1);
+  }
+  CallNoReturn();
+  __sanitizer_finish_switch_fiber();
+  CallNoReturn();
+
+  // Touch childs's stack to make sure it's unpoisoned.
+  for (int i = 0; i < kStackSize; i++) {
+    child_stack[i] = i;
+  }
+  return child_stack[arg];
+}
+
+void handler(int sig) { CallNoReturn(); }
+
+void InitStackBounds() {
+  pthread_attr_t attr;
+  pthread_attr_init(&attr);
+  pthread_getattr_np(pthread_self(), &attr);
+  pthread_attr_getstack(&attr, &main_thread_stack, &main_thread_stacksize);
+  pthread_attr_destroy(&attr);
+}
+
+int main(int argc, char **argv) {
+  InitStackBounds();
+
+  // set up a signal that will spam and trigger __asan_handle_no_return at
+  // tricky moments
+  struct sigaction act = {};
+  act.sa_handler = &handler;
+  if (sigaction(SIGPROF, &act, 0)) {
+    perror("sigaction");
+    _exit(1);
+  }
+
+  itimerval t;
+  t.it_interval.tv_sec = 0;
+  t.it_interval.tv_usec = 10;
+  t.it_value = t.it_interval;
+  if (setitimer(ITIMER_PROF, &t, 0)) {
+    perror("setitimer");
+    _exit(1);
+  }
+
+  char *heap = new char[kStackSize + 1];
+  next_child_stack = new char[kStackSize + 1];
+  char stack[kStackSize + 1];
+  // CHECK: WARNING: ASan doesn't fully support makecontext/swapcontext
+  int ret = 0;
+  // CHECK-NOT: ASan is ignoring requested __asan_handle_no_return
+  for (unsigned int i = 0; i < 30; ++i) {
+    ret += Run(argc - 1, 0, stack);
+    ret += Run(argc - 1, 1, stack);
+    ret += Run(argc - 1, 2, stack);
+    ret += Run(argc - 1, 0, heap);
+    ret += Run(argc - 1, 1, heap);
+    ret += Run(argc - 1, 2, heap);
+  }
+  // CHECK: Test passed
+  printf("Test passed\n");
+
+  delete[] heap;
+  delete[] next_child_stack;
+
+  return ret;
+}
diff --git a/test/asan/TestCases/Linux/swapcontext_test.cc b/test/asan/TestCases/Linux/swapcontext_test.cc
index 86ed593..210a667 100644
--- a/test/asan/TestCases/Linux/swapcontext_test.cc
+++ b/test/asan/TestCases/Linux/swapcontext_test.cc
@@ -6,7 +6,7 @@
 // RUN: %clangxx_asan -O3 %s -o %t && %run %t 2>&1 | FileCheck %s
 //
 // This test is too sublte to try on non-x86 arch for now.
-// REQUIRES: x86_64-supported-target,i386-supported-target
+// REQUIRES: x86-target-arch
 
 #include <stdio.h>
 #include <ucontext.h>
diff --git a/test/asan/TestCases/Linux/unpoison_tls.cc b/test/asan/TestCases/Linux/unpoison_tls.cc
index 9c1d74b..19ebec4 100644
--- a/test/asan/TestCases/Linux/unpoison_tls.cc
+++ b/test/asan/TestCases/Linux/unpoison_tls.cc
@@ -1,5 +1,5 @@
 // Test that TLS is unpoisoned on thread death.
-// REQUIRES: x86_64-supported-target,i386-supported-target
+// REQUIRES: x86-target-arch
 
 // RUN: %clangxx_asan -O1 %s -pthread -o %t && %run %t 2>&1
 
diff --git a/test/asan/TestCases/Posix/closed-fds.cc b/test/asan/TestCases/Posix/closed-fds.cc
index 3bbe3d8..b7bca26 100644
--- a/test/asan/TestCases/Posix/closed-fds.cc
+++ b/test/asan/TestCases/Posix/closed-fds.cc
@@ -2,7 +2,7 @@
 // symbolizer still works.
 
 // RUN: rm -f %t.log.*
-// RUN: %clangxx_asan -O0 %s -o %t 2>&1 && %env_asan_opts=log_path=%t.log:verbosity=2 not %run %t 2>&1
+// RUN: %clangxx_asan -O0 %s -o %t 2>&1 && %env_asan_opts=log_path='"%t.log"':verbosity=2 not %run %t 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK-FILE < %t.log.*
 
 // FIXME: copy %t.log back from the device and re-enable on Android.
diff --git a/test/asan/TestCases/Posix/coverage-sandboxing.cc b/test/asan/TestCases/Posix/coverage-sandboxing.cc
index f6fc526..c4e6bc7 100644
--- a/test/asan/TestCases/Posix/coverage-sandboxing.cc
+++ b/test/asan/TestCases/Posix/coverage-sandboxing.cc
@@ -79,8 +79,8 @@
 #endif
 
 // CHECK-vanilla: PID: [[PID:[0-9]+]]
-// CHECK-vanilla: .so.[[PID]].sancov: 258 PCs written
+// CHECK-vanilla: .so.[[PID]].sancov: 257 PCs written
 // CHECK-vanilla: [[PID]].sancov: 1 PCs written
 
 // CHECK-sandbox: PID: [[PID:[0-9]+]]
-// CHECK-sandbox: 258 PCs written to packed file
+// CHECK-sandbox: 257 PCs written to packed file
diff --git a/test/asan/TestCases/Posix/dlclose-test.cc b/test/asan/TestCases/Posix/dlclose-test.cc
index 369abd3..0aafa3e 100644
--- a/test/asan/TestCases/Posix/dlclose-test.cc
+++ b/test/asan/TestCases/Posix/dlclose-test.cc
@@ -11,8 +11,8 @@
 
 // This sublte test assumes that after a foo.so is dlclose-d
 // we can mmap the region of memory that has been occupied by the library.
-// It works on i368/x86_64 Linux, but not necessary anywhere else.
-// REQUIRES: x86_64-supported-target,i386-supported-target
+// It works on x86 Linux, but not necessary anywhere else.
+// REQUIRES: x86-target-arch
 
 // RUN: %clangxx_asan -O0 -DSHARED_LIB %s -fPIC -shared -o %t-so.so
 // RUN: %clangxx_asan -O0 %s %libdl -o %t && %run %t 2>&1 | FileCheck %s
diff --git a/test/asan/TestCases/dump_instruction_bytes.cc b/test/asan/TestCases/Posix/dump_instruction_bytes.cc
similarity index 91%
rename from test/asan/TestCases/dump_instruction_bytes.cc
rename to test/asan/TestCases/Posix/dump_instruction_bytes.cc
index da86a0f..b5b38ff 100644
--- a/test/asan/TestCases/dump_instruction_bytes.cc
+++ b/test/asan/TestCases/Posix/dump_instruction_bytes.cc
@@ -4,7 +4,7 @@
 // RUN: %env_asan_opts=dump_instruction_bytes=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-DUMP
 // RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NODUMP
 //
-// REQUIRES: x86_64-supported-target,i386-supported-target
+// REQUIRES: x86-target-arch
 
 int main() {
 #if defined(__x86_64__)
diff --git a/test/asan/TestCases/Posix/halt_on_error-signals.c b/test/asan/TestCases/Posix/halt_on_error-signals.c
index 60916f6..6bdf30b 100644
--- a/test/asan/TestCases/Posix/halt_on_error-signals.c
+++ b/test/asan/TestCases/Posix/halt_on_error-signals.c
@@ -3,7 +3,7 @@
 // RUN: %clang_asan -fsanitize-recover=address -pthread %s -o %t
 //
 // RUN: rm -f %t.log
-// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t 100 >%t.log 2>&1 || true
+// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t 100 >>%t.log 2>&1 || true
 // Collision will almost always get triggered but we still need to check the unlikely case:
 // RUN: FileCheck --check-prefix=CHECK-COLLISION %s < %t.log || FileCheck --check-prefix=CHECK-NO-COLLISION %s < %t.log
 
diff --git a/test/asan/TestCases/Posix/halt_on_error-torture.cc b/test/asan/TestCases/Posix/halt_on_error-torture.cc
index 019f7d1..5d7eff0 100644
--- a/test/asan/TestCases/Posix/halt_on_error-torture.cc
+++ b/test/asan/TestCases/Posix/halt_on_error-torture.cc
@@ -2,22 +2,21 @@
 //
 // RUN: %clangxx_asan -fsanitize-recover=address -pthread %s -o %t
 //
-// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t 1 10 >1.txt 2>&1
+// RUN: rm -f 1.txt
+// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t 1 10 >>1.txt 2>&1
 // RUN: FileCheck %s < 1.txt
 // RUN: [ $(grep -c 'ERROR: AddressSanitizer: use-after-poison' 1.txt) -eq 10 ]
 // RUN: FileCheck --check-prefix=CHECK-NO-COLLISION %s < 1.txt
 //
 // Collisions are unlikely but still possible so we need the ||.
-// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t 10 20 >10.txt 2>&1 || true
-// This one is racy although _very_ unlikely to fail:
-// RUN: FileCheck %s < 10.txt
-// RUN: FileCheck --check-prefix=CHECK-COLLISION %s < 1.txt || FileCheck --check-prefix=CHECK-NO-COLLISION %s < 1.txt
+// RUN: rm -f 10.txt
+// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t 10 20 >>10.txt 2>&1 || true
+// RUN: FileCheck --check-prefix=CHECK-COLLISION %s < 10.txt || FileCheck --check-prefix=CHECK-NO-COLLISION %s < 10.txt
 //
 // Collisions are unlikely but still possible so we need the ||.
-// RUN: %env_asan_opts=halt_on_error=false %run %t 10 20 >10.txt 2>&1 || true
-// This one is racy although _very_ unlikely to fail:
-// RUN: FileCheck %s < 10.txt
-// RUN: FileCheck --check-prefix=CHECK-COLLISION %s < 1.txt || FileCheck --check-prefix=CHECK-NO-COLLISION %s < 1.txt
+// RUN: rm -f 20.txt
+// RUN: %env_asan_opts=halt_on_error=false %run %t 10 20 >>20.txt 2>&1 || true
+// RUN: FileCheck --check-prefix=CHECK-COLLISION %s < 20.txt || FileCheck --check-prefix=CHECK-NO-COLLISION %s < 20.txt
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/asan/TestCases/Posix/halt_on_error_suppress_equal_pcs.cc b/test/asan/TestCases/Posix/halt_on_error_suppress_equal_pcs.cc
index 98b0348..98ef851 100644
--- a/test/asan/TestCases/Posix/halt_on_error_suppress_equal_pcs.cc
+++ b/test/asan/TestCases/Posix/halt_on_error_suppress_equal_pcs.cc
@@ -6,14 +6,16 @@
 // RUN: %env_asan_opts=halt_on_error=false %run %t 2>&1 | FileCheck %s
 //
 // Check that we die after reaching different reports number threshold.
-// RUN: %env_asan_opts=halt_on_error=false not %run %t 1 > %t1.log 2>&1
+// RUN: rm -f %t1.log
+// RUN: %env_asan_opts=halt_on_error=false not %run %t 1 >> %t1.log 2>&1
 // RUN: [ $(grep -c 'ERROR: AddressSanitizer: stack-buffer-overflow' %t1.log) -eq 25 ]
 //
 // Check suppress_equal_pcs=true behavior is equal to default one.
 // RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=true %run %t 2>&1 | FileCheck %s
 //
 // Check suppress_equal_pcs=false behavior isn't equal to default one.
-// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t > %t2.log 2>&1
+// RUN: rm -f %t2.log
+// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t >> %t2.log 2>&1
 // RUN: [ $(grep -c 'ERROR: AddressSanitizer: stack-buffer-overflow' %t2.log) -eq 30 ]
 
 #define ACCESS_ARRAY_FIVE_ELEMENTS(array, i)     \
diff --git a/test/asan/TestCases/mmap_limit_mb.cc b/test/asan/TestCases/Posix/mmap_limit_mb.cc
similarity index 100%
rename from test/asan/TestCases/mmap_limit_mb.cc
rename to test/asan/TestCases/Posix/mmap_limit_mb.cc
diff --git a/test/asan/TestCases/no_asan_gen_globals.c b/test/asan/TestCases/Posix/no_asan_gen_globals.c
similarity index 98%
rename from test/asan/TestCases/no_asan_gen_globals.c
rename to test/asan/TestCases/Posix/no_asan_gen_globals.c
index 2b13dea..c686f83 100644
--- a/test/asan/TestCases/no_asan_gen_globals.c
+++ b/test/asan/TestCases/Posix/no_asan_gen_globals.c
@@ -2,7 +2,6 @@
 // XFAIL: android
 // FIXME: http://llvm.org/bugs/show_bug.cgi?id=22682
 // REQUIRES: asan-64-bits
-//
 // Make sure __asan_gen_* strings do not end up in the symbol table.
 
 // RUN: %clang_asan %s -o %t.exe
diff --git a/test/asan/TestCases/Posix/start-deactivated.cc b/test/asan/TestCases/Posix/start-deactivated.cc
index b301415..187ee5e 100644
--- a/test/asan/TestCases/Posix/start-deactivated.cc
+++ b/test/asan/TestCases/Posix/start-deactivated.cc
@@ -6,7 +6,7 @@
 // RUN: %clangxx -O0 %s -c -o %t.o
 // RUN: %clangxx_asan -O0 %t.o %libdl -o %t
 // RUN: %env_asan_opts=start_deactivated=1,allocator_may_return_null=0 \
-// RUN:   ASAN_ACTIVATION_OPTIONS=allocator_may_return_null=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN:   ASAN_ACTIVATION_OPTIONS=allocator_may_return_null=1 not %run %t 2>&1 | FileCheck %s
 // RUN: %env_asan_opts=start_deactivated=1 \
 // RUN:   ASAN_ACTIVATION_OPTIONS=help=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-HELP
 // RUN: %env_asan_opts=start_deactivated=1,verbosity=1 \
diff --git a/test/asan/TestCases/Windows/bind_io_completion_callback.cc b/test/asan/TestCases/Windows/bind_io_completion_callback.cc
index c062a79..ef7e458 100644
--- a/test/asan/TestCases/Windows/bind_io_completion_callback.cc
+++ b/test/asan/TestCases/Windows/bind_io_completion_callback.cc
@@ -1,13 +1,7 @@
 // Make sure we can throw exceptions from work items executed via
 // BindIoCompletionCallback.
 //
-// Clang doesn't support exceptions on Windows yet, so for the time being we
-// build this program in two parts: the code with exceptions is built with CL,
-// the rest is built with Clang.  This represents the typical scenario when we
-// build a large project using "clang-cl -fallback -fsanitize=address".
-//
-// RUN: cl -c %s -Fo%t.obj
-// RUN: %clangxx_asan -o %t.exe %s %t.obj
+// RUN: %clangxx_asan %s -o %t.exe
 // RUN: %run %t.exe 2>&1 | FileCheck %s
 
 #include <windows.h>
@@ -15,7 +9,6 @@
 
 void ThrowAndCatch();
 
-#if !defined(__clang__)
 __declspec(noinline)
 void Throw() {
   fprintf(stderr, "Throw\n");
@@ -32,7 +25,6 @@
 // CHECK: Catch
   }
 }
-#else
 
 char buffer[65536];
 HANDLE done;
@@ -62,9 +54,8 @@
       GetLastError() != ERROR_IO_PENDING)
     return 4;
 
-  if (WAIT_OBJECT_0 != WaitForSingleObject(done, INFINITE))
+  if (WAIT_OBJECT_0 != WaitForSingleObject(done, 10 * 1000))
     return 5;
   fprintf(stderr, "Done!\n");
 // CHECK: Done!
 }
-#endif
diff --git a/test/asan/TestCases/Windows/coverage-basic.cc b/test/asan/TestCases/Windows/coverage-basic.cc
index 0ff105d..918872f 100644
--- a/test/asan/TestCases/Windows/coverage-basic.cc
+++ b/test/asan/TestCases/Windows/coverage-basic.cc
@@ -6,8 +6,8 @@
 // RUN: %sancov print *.sancov | FileCheck %s
 #include <stdio.h>
 
-void foo() { fprintf(stderr, "FOO\n"); }
-void bar() { fprintf(stderr, "BAR\n"); }
+void foo() { fputs("FOO", stderr); }
+void bar() { fputs("BAR", stderr); }
 
 int main(int argc, char **argv) {
   if (argc == 2) {
diff --git a/test/asan/TestCases/Windows/crash_read_write.cc b/test/asan/TestCases/Windows/crash_read_write.cc
new file mode 100644
index 0000000..74200cc
--- /dev/null
+++ b/test/asan/TestCases/Windows/crash_read_write.cc
@@ -0,0 +1,29 @@
+// RUN: %clangxx_asan -std=c++11 -O0 %s -o %t
+// RUN: not %run %t       2>&1 | FileCheck %s --check-prefix=READ
+// RUN: not %run %t write 2>&1 | FileCheck %s --check-prefix=WRITE
+
+#include <windows.h>
+#include <stdio.h>
+
+static volatile int sink;
+__attribute__((noinline)) void Read(int *ptr) { sink = *ptr; }
+__attribute__((noinline)) void Write(int *ptr) { *ptr = 0; }
+int main(int argc, char **argv) {
+  // Writes to shadow are detected as reads from shadow gap (because of how the
+  // shadow mapping works). This is kinda hard to fix. Test a random address in
+  // the application part of the address space.
+  void *volatile p = VirtualAlloc(0, 4096, MEM_COMMIT, PAGE_READONLY);
+  bool ok = VirtualFree(p, 0, MEM_RELEASE);
+  if (!ok) {
+    printf("VirtualFree failed\n");
+    return 0;
+  }
+  if (argc == 1)
+    Read((int *)p);
+  else
+    Write((int *)p);
+}
+// READ: AddressSanitizer: access-violation on unknown address
+// READ: The signal is caused by a READ memory access.
+// WRITE: AddressSanitizer: access-violation on unknown address
+// WRITE: The signal is caused by a WRITE memory access.
diff --git a/test/asan/TestCases/Windows/dll_host.cc b/test/asan/TestCases/Windows/dll_host.cc
index 71721fe..6a029c9 100644
--- a/test/asan/TestCases/Windows/dll_host.cc
+++ b/test/asan/TestCases/Windows/dll_host.cc
@@ -5,20 +5,41 @@
 // RUN: %clang_cl_asan -O0 %s -Fe%t
 //
 // Get the list of ASan wrappers exported by the main module RTL:
-// RUN: dumpbin /EXPORTS %t | grep -o "__asan_wrap[^ ]*" | grep -v @ | sort | uniq > %t.exported_wrappers
+// note: The mangling decoration (i.e. @4 )is removed because calling convention
+//       differ from 32-bit and 64-bit.
+// RUN: dumpbin /EXPORTS %t | grep -o "__asan_wrap[^ ]*" | sed -e s/@.*// > %t.exported_wrappers1
 // FIXME: we should really check the other __asan exports too.
-// RUN: dumpbin /EXPORTS %t | grep -o "__sanitizer_[^ ]*" | grep -v @ | sort | uniq >> %t.exported_wrappers
+// RUN: dumpbin /EXPORTS %t | grep -o "__sanitizer_[^ ]*" | sed -e s/@.*// > %t.exported_wrappers2
 //
 // Get the list of ASan wrappers imported by the DLL RTL:
 // [BEWARE: be really careful with the sed commands, as this test can be run
 //  from different environemnts with different shells and seds]
-// RUN: grep INTERCEPT_LIBRARY_FUNCTION %p/../../../../lib/asan/asan_win_dll_thunk.cc | grep -v define | sed -e s/.*(/__asan_wrap_/ | sed -e s/).*// | sort | uniq > %t.dll_imports
-// RUN: grep "^INTERFACE_FUNCTION.*sanitizer" %p/../../../../lib/asan/asan_win_dll_thunk.cc | grep -v define | sed -e s/.*(// | sed -e s/).*// | sort | uniq >> %t.dll_imports
+// RUN: grep INTERCEPT_LIBRARY_FUNCTION %p/../../../../lib/asan/asan_win_dll_thunk.cc | grep -v define | sed -e s/.*(/__asan_wrap_/ | sed -e s/).*// > %t.dll_imports1
+// RUN: grep "^INTERFACE_FUNCTION.*sanitizer" %p/../../../../lib/asan/asan_win_dll_thunk.cc | grep -v define | sed -e s/.*(// | sed -e s/).*// > %t.dll_imports2
+//
+// Add functions interecepted in asan_malloc.win.cc and asan_win.cc.
+// RUN: grep '[I]MPORT:' %s | sed -e 's/.*[I]MPORT: //' > %t.dll_imports3
+// IMPORT: __asan_wrap_HeapAlloc
+// IMPORT: __asan_wrap_HeapFree
+// IMPORT: __asan_wrap_HeapReAlloc
+// IMPORT: __asan_wrap_HeapSize
+// IMPORT: __asan_wrap_CreateThread
+// IMPORT: __asan_wrap_RaiseException
+// IMPORT: __asan_wrap_RtlRaiseException
+//
+// The exception handlers differ in 32-bit and 64-bit, so we ignore them:
+// RUN: grep '[E]XPORT:' %s | sed -e 's/.*[E]XPORT: //' > %t.exported_wrappers3
+// EXPORT: __asan_wrap__except_handler3
+// EXPORT: __asan_wrap__except_handler4
+// EXPORT: __asan_wrap___C_specific_handler
+//
+// RUN: cat %t.dll_imports1 %t.dll_imports2 %t.dll_imports3 | sort | uniq > %t.dll_imports-sorted
+// RUN: cat %t.exported_wrappers1 %t.exported_wrappers2 %t.exported_wrappers3 | sort | uniq > %t.exported_wrappers-sorted
 //
 // Now make sure the DLL thunk imports everything:
 // RUN: echo
 // RUN: echo "=== NOTE === If you see a mismatch below, please update asan_win_dll_thunk.cc"
-// RUN: diff %t.dll_imports %t.exported_wrappers
+// RUN: diff %t.dll_imports-sorted %t.exported_wrappers-sorted
 // REQUIRES: asan-static-runtime
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Windows/dll_operator_array_new_with_dtor_left_oob.cc b/test/asan/TestCases/Windows/dll_operator_array_new_with_dtor_left_oob.cc
index 8306a73..b514c99 100644
--- a/test/asan/TestCases/Windows/dll_operator_array_new_with_dtor_left_oob.cc
+++ b/test/asan/TestCases/Windows/dll_operator_array_new_with_dtor_left_oob.cc
@@ -10,7 +10,7 @@
 extern "C" __declspec(dllexport)
 int test_function() {
   C *buffer = new C[42];
-  buffer[-2].x = 42;
+  buffer[-(1 + sizeof(void*) / 4)].x = 42;
 // CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
 // CHECK: WRITE of size 4 at [[ADDR]] thread T0
 // CHECK-NEXT: test_function {{.*}}dll_operator_array_new_with_dtor_left_oob.cc:[[@LINE-3]]
@@ -19,7 +19,7 @@
 // FIXME: Currently it says "4 bytes ... left of 172-byte region",
 //        should be "8 bytes ... left of 168-byte region", see
 //        https://code.google.com/p/address-sanitizer/issues/detail?id=314
-// CHECK: [[ADDR]] is located {{.*}} bytes to the left of 172-byte region
+// CHECK: [[ADDR]] is located {{.*}} bytes to the left of {{(172|176)}}-byte region
 // FIXME: Should get rid of the malloc/free frames called from the inside of
 // operator new/delete in DLLs when using -MT CRT.
 // FIXME: The operator new frame should have [].
diff --git a/test/asan/TestCases/Windows/dll_seh.cc b/test/asan/TestCases/Windows/dll_seh.cc
index 6e4c724..0962138 100644
--- a/test/asan/TestCases/Windows/dll_seh.cc
+++ b/test/asan/TestCases/Windows/dll_seh.cc
@@ -1,17 +1,10 @@
-// Clang doesn't support SEH on Windows yet, so for the time being we
-// build this program in two parts: the code with SEH is built with CL,
-// the rest is built with Clang.  This represents the typical scenario when we
-// build a large project using "clang-cl -fallback -fsanitize=address".
-//
 // RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
 //
 // Check both -GS and -GS- builds:
-// RUN: cl -LD -c %s -Fo%t.obj
-// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll %t.obj
+// RUN: %clang_cl_asan -GS  -LD -O0 %s -Fe%t.dll
 // RUN: %run %t %t.dll
 //
-// RUN: cl -LD -GS- -c %s -Fo%t.obj
-// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll %t.obj
+// RUN: %clang_cl_asan -GS- -LD -O0 %s -Fe%t.dll
 // RUN: %run %t %t.dll
 
 #include <windows.h>
@@ -24,7 +17,6 @@
 
 void ThrowAndCatch();
 
-#if !defined(__clang__)
 __declspec(noinline)
 void Throw() {
   int local, zero = 0;
@@ -41,7 +33,6 @@
     fprintf(stderr, "__except:  %p\n", &local);
   }
 }
-#else
 
 extern "C" __declspec(dllexport)
 int test_function() {
@@ -57,4 +48,3 @@
   assert(!__asan_address_is_poisoned(x + 32));
   return 0;
 }
-#endif
diff --git a/test/asan/TestCases/Windows/intercept_memcpy.cc b/test/asan/TestCases/Windows/intercept_memcpy.cc
index 9ee984b..6e45e7f 100644
--- a/test/asan/TestCases/Windows/intercept_memcpy.cc
+++ b/test/asan/TestCases/Windows/intercept_memcpy.cc
@@ -22,8 +22,8 @@
   call_memcpy(&memcpy, buff2, buff1, 6);
 // CHECK: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
 // CHECK: WRITE of size 6 at [[ADDR]] thread T0
-// CHECK-NEXT:  __asan_{{.*}}memcpy
-// CHECK-NEXT:  call_memcpy
+// CHECK-NEXT:  __asan_{{.*}}mem{{.*}}
+// CHECK-NEXT:  call_mem{{.*}}
 // CHECK-NEXT:  main {{.*}}intercept_memcpy.cc:[[@LINE-5]]
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset {{.*}} in frame
 // CHECK-NEXT:   #0 {{.*}} main
diff --git a/test/asan/TestCases/Windows/intercept_strdup.cc b/test/asan/TestCases/Windows/intercept_strdup.cc
index 3710534..95b659f 100644
--- a/test/asan/TestCases/Windows/intercept_strdup.cc
+++ b/test/asan/TestCases/Windows/intercept_strdup.cc
@@ -20,9 +20,13 @@
 // CHECK:   {{#0 .* main .*}}intercept_strdup.cc:[[@LINE-3]]
 // CHECK: [[ADDR]] is located 1 bytes to the left of 6-byte region
 // CHECK: allocated by thread T0 here:
-// CHECK:   {{#0 .* malloc }}
-// FIXME: llvm-symbolizer can't find strdup in the CRT.
-// CHECKX:   {{#1 .*strdup}}
-// CHECK:   {{#2 .* main .*}}intercept_strdup.cc:[[@LINE-17]]
+//
+// The first frame is our wrapper normally but will be malloc in the dynamic
+// config.
+// CHECK:   #0 {{.*}} in {{malloc|__asan_wrap_strdup}}
+//
+// The local call to _strdup above may be the second or third frame depending
+// on whether we're using the dynamic config.
+// CHECK:   #{{[12]}} {{.*}} in main {{.*}}intercept_strdup.cc:[[@LINE-21]]
   free(ptr);
 }
diff --git a/test/asan/TestCases/Windows/oom.cc b/test/asan/TestCases/Windows/oom.cc
index b24cddf..59cc7ed 100644
--- a/test/asan/TestCases/Windows/oom.cc
+++ b/test/asan/TestCases/Windows/oom.cc
@@ -1,12 +1,12 @@
 // RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
+// REQUIRES: asan-32-bits
 
 #include <malloc.h>
 
 int main() {
   while (true) {
     void *ptr = malloc(200 * 1024 * 1024);  // 200MB
-    free(ptr);
   }
 // CHECK: failed to allocate
 }
diff --git a/test/asan/TestCases/Windows/operator_array_new_with_dtor_left_oob.cc b/test/asan/TestCases/Windows/operator_array_new_with_dtor_left_oob.cc
index 63f2929..aae9d5e 100644
--- a/test/asan/TestCases/Windows/operator_array_new_with_dtor_left_oob.cc
+++ b/test/asan/TestCases/Windows/operator_array_new_with_dtor_left_oob.cc
@@ -8,7 +8,7 @@
 
 int main() {
   C *buffer = new C[42];
-  buffer[-2].x = 42;
+  buffer[-(1 + sizeof(void*) / 4)].x = 42;
 // CHECK: AddressSanitizer: heap-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
 // CHECK: WRITE of size 4 at [[ADDR]] thread T0
 // CHECK-NEXT: {{#0 .* main .*operator_array_new_with_dtor_left_oob.cc}}:[[@LINE-3]]
@@ -16,7 +16,7 @@
 // FIXME: Currently it says "4 bytes ... left of 172-byte region",
 //        should be "8 bytes ... left of 168-byte region", see
 //        https://code.google.com/p/address-sanitizer/issues/detail?id=314
-// CHECK: [[ADDR]] is located {{.*}} bytes to the left of 172-byte region
+// CHECK: [[ADDR]] is located {{.*}} bytes to the left of {{(172|176)}}-byte region
 // CHECK-LABEL: allocated by thread T0 here:
 // FIXME: The 'operator new' frame should have [].
 // CHECK-NEXT: {{#0 .* operator new}}
diff --git a/test/asan/TestCases/Windows/queue_user_work_item.cc b/test/asan/TestCases/Windows/queue_user_work_item.cc
index d99ea6f..2a8beb8 100644
--- a/test/asan/TestCases/Windows/queue_user_work_item.cc
+++ b/test/asan/TestCases/Windows/queue_user_work_item.cc
@@ -1,13 +1,7 @@
 // Make sure we can throw exceptions from work items executed via
 // QueueUserWorkItem.
 //
-// Clang doesn't support exceptions on Windows yet, so for the time being we
-// build this program in two parts: the code with exceptions is built with CL,
-// the rest is built with Clang.  This represents the typical scenario when we
-// build a large project using "clang-cl -fallback -fsanitize=address".
-//
-// RUN: cl -c %s -Fo%t.obj
-// RUN: %clangxx_asan -o %t.exe %s %t.obj
+// RUN: %clangxx_asan %s -o %t.exe
 // RUN: %run %t.exe 2>&1 | FileCheck %s
 
 #include <windows.h>
@@ -15,7 +9,6 @@
 
 void ThrowAndCatch();
 
-#if !defined(__clang__)
 __declspec(noinline)
 void Throw() {
   fprintf(stderr, "Throw\n");
@@ -32,7 +25,6 @@
 // CHECK: Catch
   }
 }
-#else
 
 HANDLE done;
 
@@ -47,9 +39,13 @@
   if (!done)
     return 1;
   QueueUserWorkItem(&work_item, nullptr, 0);
-  if (WAIT_OBJECT_0 != WaitForSingleObject(done, INFINITE))
+  unsigned wait_result = WaitForSingleObject(done, 10 * 1000);
+  if (wait_result == WAIT_ABANDONED)
+    fprintf(stderr, "Timed out\n");
+  if (wait_result != WAIT_OBJECT_0) {
+    fprintf(stderr, "Wait for work item failed\n");
     return 2;
+  }
   fprintf(stderr, "Done!\n");
 // CHECK: Done!
 }
-#endif
diff --git a/test/asan/TestCases/Windows/queue_user_work_item_report.cc b/test/asan/TestCases/Windows/queue_user_work_item_report.cc
index f0d3d3e..26bd5e0 100644
--- a/test/asan/TestCases/Windows/queue_user_work_item_report.cc
+++ b/test/asan/TestCases/Windows/queue_user_work_item_report.cc
@@ -12,8 +12,6 @@
 // CHECK: AddressSanitizer: stack-buffer-underflow on address [[ADDR:0x[0-9a-f]+]]
 // CHECK: WRITE of size 1 at [[ADDR]] thread T1
 // CHECK:   {{#0 .* work_item.*queue_user_work_item_report.cc}}:[[@LINE-3]]
-// CHECK: Address [[ADDR]] is located in stack of thread T1 at offset {{.*}} in frame
-// CHECK:   work_item
   SetEvent(done);
   return 0;
 }
@@ -24,6 +22,6 @@
     return 1;
 // CHECK-NOT: Thread T1 created
   QueueUserWorkItem(&work_item, nullptr, 0);
-  if (WAIT_OBJECT_0 != WaitForSingleObject(done, INFINITE))
+  if (WAIT_OBJECT_0 != WaitForSingleObject(done, 10 * 1000))
     return 2;
 }
diff --git a/test/asan/TestCases/Windows/report_after_syminitialize.cc b/test/asan/TestCases/Windows/report_after_syminitialize.cc
index d83d7dc..c5ac602 100644
--- a/test/asan/TestCases/Windows/report_after_syminitialize.cc
+++ b/test/asan/TestCases/Windows/report_after_syminitialize.cc
@@ -14,8 +14,10 @@
 
   *(volatile int*)0 = 42;
   // CHECK: ERROR: AddressSanitizer: access-violation on unknown address
-  // CHECK-NEXT: {{WARNING: Failed to use and restart external symbolizer}}
-  // CHECK-NEXT: {{WARNING: .*DbgHelp}}
-  // CHECK: {{#0 0x.* in main.*report_after_syminitialize.cc:}}[[@LINE-4]]
+  // CHECK: The signal is caused by a WRITE memory access.
+  // CHECK: Hint: address points to the zero page.
+  // CHECK: {{WARNING: .*DbgHelp}}
+  // CHECK: {{WARNING: Failed to use and restart external symbolizer}}
+  // CHECK: {{#0 0x.* in main.*report_after_syminitialize.cc:}}[[@LINE-6]]
   // CHECK: AddressSanitizer can not provide additional info.
 }
diff --git a/test/asan/TestCases/Windows/shadow_conflict_32.cc b/test/asan/TestCases/Windows/shadow_conflict_32.cc
new file mode 100644
index 0000000..7c6d94b
--- /dev/null
+++ b/test/asan/TestCases/Windows/shadow_conflict_32.cc
@@ -0,0 +1,29 @@
+// Load this DLL at the default 32-bit ASan shadow base, and test how we dump
+// the process memory layout.
+// REQUIRES: asan-32-bits
+//
+// RUN: %clang_cl_asan -DBUILD_DLL -LD %s -Fe%t_dll.dll -link -base:0x30000000 -fixed -dynamicbase:no
+// RUN: %clang_cl_asan %s -Fe%t.exe -link %t_dll.lib
+// RUN: not %run %t.exe 2>&1 | FileCheck %s
+
+#ifndef BUILD_DLL
+#include <stdio.h>
+
+extern "C" __declspec(dllimport) int test_function();
+
+int main() {
+  fprintf(stderr, "should have failed to initialize, DLL got loaded near 0x%p\n",
+          (void *)&test_function);
+}
+
+#else
+extern "C" __declspec(dllexport) int test_function() { return 0; }
+#endif
+
+// CHECK: =={{[0-9]+}}==Shadow memory range interleaves with an existing memory mapping. ASan cannot proceed correctly. ABORTING.
+// CHECK: =={{[0-9]+}}==ASan shadow was supposed to be located in the [0x2fff0000-0x3fffffff] range.
+// CHECK: =={{[0-9]+}}==Dumping process modules
+
+// CHECK-DAG: {{0x30000000-0x300.....}} {{.*}}\shadow_conflict_32.cc.tmp_dll.dll
+// CHECK-DAG: {{0x........-0x........}} {{.*}}\shadow_conflict_32.cc.tmp.exe
+// CHECK-DAG: {{0x........-0x........}} {{.*}}\ntdll.dll
diff --git a/test/asan/TestCases/Windows/shadow_mapping_failure.cc b/test/asan/TestCases/Windows/shadow_mapping_failure.cc
index 9b83947..510f169 100644
--- a/test/asan/TestCases/Windows/shadow_mapping_failure.cc
+++ b/test/asan/TestCases/Windows/shadow_mapping_failure.cc
@@ -1,5 +1,6 @@
 // RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
+// REQUIRES: asan-32-bits
 
 #include <stdio.h>
 
diff --git a/test/asan/TestCases/Windows/throw_catch.cc b/test/asan/TestCases/Windows/throw_catch.cc
deleted file mode 100644
index 5313d25..0000000
--- a/test/asan/TestCases/Windows/throw_catch.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-// Clang doesn't support exceptions on Windows yet, so for the time being we
-// build this program in two parts: the code with exceptions is built with CL,
-// the rest is built with Clang.  This represents the typical scenario when we
-// build a large project using "clang-cl -fallback -fsanitize=address".
-//
-// RUN: cl -c %s -Fo%t.obj
-// RUN: %clangxx_asan -o %t.exe %s %t.obj
-// RUN: %run %t.exe
-
-#include <assert.h>
-#include <stdio.h>
-
-// Should just "#include <sanitizer/asan_interface.h>" when C++ exceptions are
-// supported and we don't need to use CL.
-extern "C" bool __asan_address_is_poisoned(void *p);
-
-void ThrowAndCatch();
-void TestThrowInline();
-
-#if !defined(__clang__)
-__declspec(noinline)
-void Throw() {
-  int local;
-  fprintf(stderr, "Throw:  %p\n", &local);
-  throw 1;
-}
-
-__declspec(noinline)
-void ThrowAndCatch() {
-  int local;
-  try {
-    Throw();
-  } catch(...) {
-    fprintf(stderr, "Catch:  %p\n", &local);
-  }
-}
-
-void TestThrowInline() {
-  char x[32];
-  fprintf(stderr, "Before: %p poisoned: %d\n", &x,
-          __asan_address_is_poisoned(x + 32));
-  try {
-    Throw();
-  } catch(...) {
-    fprintf(stderr, "Catch\n");
-  }
-  fprintf(stderr, "After:  %p poisoned: %d\n",  &x,
-          __asan_address_is_poisoned(x + 32));
-  // FIXME: Invert this assertion once we fix
-  // https://code.google.com/p/address-sanitizer/issues/detail?id=258
-  assert(!__asan_address_is_poisoned(x + 32));
-}
-
-#else
-
-void TestThrow() {
-  char x[32];
-  fprintf(stderr, "Before: %p poisoned: %d\n", &x,
-          __asan_address_is_poisoned(x + 32));
-  assert(__asan_address_is_poisoned(x + 32));
-  ThrowAndCatch();
-  fprintf(stderr, "After:  %p poisoned: %d\n",  &x,
-          __asan_address_is_poisoned(x + 32));
-  // FIXME: Invert this assertion once we fix
-  // https://code.google.com/p/address-sanitizer/issues/detail?id=258
-  assert(!__asan_address_is_poisoned(x + 32));
-}
-
-int main(int argc, char **argv) {
-  TestThrowInline();
-  TestThrow();
-}
-#endif
diff --git a/test/asan/TestCases/Windows/unsymbolized.cc b/test/asan/TestCases/Windows/unsymbolized.cc
index e44b4bb..5854dc5 100644
--- a/test/asan/TestCases/Windows/unsymbolized.cc
+++ b/test/asan/TestCases/Windows/unsymbolized.cc
@@ -20,6 +20,6 @@
   free(x);
   return x[5];
   // CHECK: AddressSanitizer: heap-use-after-free
-  // CHECK: #0 {{0x[a-f0-9]+ \(.*[\\/]unsymbolized.cc.*.exe\+0x40[a-f0-9]{4}\)}}
-  // CHECK: #1 {{0x[a-f0-9]+ \(.*[\\/]unsymbolized.cc.*.exe\+0x40[a-f0-9]{4}\)}}
+  // CHECK: #0 {{0x[a-f0-9]+ \(.*[\\/]unsymbolized.cc.*.exe\+(0x40|0x14000)[a-f0-9]{4}\)}}
+  // CHECK: #1 {{0x[a-f0-9]+ \(.*[\\/]unsymbolized.cc.*.exe\+(0x40|0x14000)[a-f0-9]{4}\)}}
 }
diff --git a/test/asan/TestCases/alloca_constant_size.cc b/test/asan/TestCases/alloca_constant_size.cc
index 43144b4..a766ae7 100644
--- a/test/asan/TestCases/alloca_constant_size.cc
+++ b/test/asan/TestCases/alloca_constant_size.cc
@@ -4,10 +4,19 @@
 // RUN: %run %t 1 2>&1 | FileCheck %s
 // RUN: %run %t 2 2>&1 | FileCheck %s
 
-#include <alloca.h>
 #include <stdio.h>
 #include <string.h>
 
+// MSVC provides _alloca instead of alloca.
+#if defined(_MSC_VER) && !defined(alloca)
+# define alloca _alloca
+#elif defined(__FreeBSD__)
+#include <stdlib.h>
+#else
+#include <alloca.h>
+#endif
+
+
 void f1_alloca() {
   char *dynamic_buffer = (char *)alloca(200);
   fprintf(stderr, "dynamic_buffer = %p\n", dynamic_buffer);
diff --git a/test/asan/TestCases/asan_and_llvm_coverage_test.cc b/test/asan/TestCases/asan_and_llvm_coverage_test.cc
index 4748481..d53deb4 100644
--- a/test/asan/TestCases/asan_and_llvm_coverage_test.cc
+++ b/test/asan/TestCases/asan_and_llvm_coverage_test.cc
@@ -1,6 +1,8 @@
 // RUN: %clangxx_asan -coverage -O0 %s -o %t
 // RUN: %env_asan_opts=check_initialization_order=1 %run %t 2>&1 | FileCheck %s
-// XFAIL: android,win32
+// XFAIL: android
+// We don't really support running tests using profile runtime on Windows.
+// UNSUPPORTED: win32
 #include <stdio.h>
 int foo() { return 1; }
 int XXX = foo();
diff --git a/test/asan/TestCases/coverage-levels.cc b/test/asan/TestCases/coverage-levels.cc
index 612bbd8..83f7cf6 100644
--- a/test/asan/TestCases/coverage-levels.cc
+++ b/test/asan/TestCases/coverage-levels.cc
@@ -25,10 +25,10 @@
 
 // CHECK1: CovDump: bitset of 1 bits written for '{{.*}}', 1 bits are set
 // CHECK1:  1 PCs written
-// CHECK2: CovDump: bitset of 3 bits written for '{{.*}}', 2 bits are set
-// CHECK2:  2 PCs written
-// CHECK3: CovDump: bitset of 4 bits written for '{{.*}}', 3 bits are set
-// CHECK3:  3 PCs written
+// CHECK2: CovDump: bitset of 2 bits written for '{{.*}}', 1 bits are set
+// CHECK2:  1 PCs written
+// CHECK3: CovDump: bitset of 3 bits written for '{{.*}}', 2 bits are set
+// CHECK3:  2 PCs written
 // CHECK3_NOBITSET-NOT: bitset of
 // CHECK3_NOPCS-NOT: PCs written
-// CHECK_COUNTERS: CovDump: 4 counters written for
+// CHECK_COUNTERS: CovDump: 3 counters written for
diff --git a/test/asan/TestCases/coverage-order-pcs.cc b/test/asan/TestCases/coverage-order-pcs.cc
index dcab694..e81c910 100644
--- a/test/asan/TestCases/coverage-order-pcs.cc
+++ b/test/asan/TestCases/coverage-order-pcs.cc
@@ -19,7 +19,7 @@
 //
 // RUN: rm -rf $DIR
 // Ordering works only in 64-bit mode for now.
-// REQUIRES: asan-64-bits
+// REQUIRES: asan-64-bits, shell
 // UNSUPPORTED: android
 #include <stdio.h>
 
diff --git a/test/asan/TestCases/coverage-pc-buffer.cc b/test/asan/TestCases/coverage-pc-buffer.cc
index 67b6935..dd9beaf 100644
--- a/test/asan/TestCases/coverage-pc-buffer.cc
+++ b/test/asan/TestCases/coverage-pc-buffer.cc
@@ -1,15 +1,16 @@
 // Test __sanitizer_coverage_pc_buffer().
 
-// RUN: %clangxx_asan -fsanitize-coverage=edge %s -o %t && %run %t
+// RUN: %clangxx_asan -fsanitize-coverage=edge %stdcxx11 %s -O3 -o %t && %run %t
 
 // UNSUPPORTED: android
 
 #include <assert.h>
+#include <memory>
 #include <sanitizer/coverage_interface.h>
+#include <stdint.h>
 #include <stdio.h>
 
 static volatile int sink;
-__attribute__((noinline)) void bar() { sink = 2; }
 __attribute__((noinline)) void foo() { sink = 1; }
 
 void assertNotZeroPcs(uintptr_t *buf, uintptr_t size) {
@@ -19,30 +20,30 @@
 }
 
 int main() {
-  uintptr_t *buf = NULL;
-  uintptr_t sz = __sanitizer_get_coverage_pc_buffer(&buf);
-  assertNotZeroPcs(buf, sz);
-  assert(sz);
+  uintptr_t buf_size = 1 << 20;
+  std::unique_ptr<uintptr_t[]> buf(new uintptr_t[buf_size]);
+  __sanitizer_set_coverage_pc_buffer(buf.get(), buf_size);
 
-  foo();
-  bar();
-  uintptr_t *buf1 = NULL;
-  uintptr_t sz1 = __sanitizer_get_coverage_pc_buffer(&buf1);
-  assertNotZeroPcs(buf1, sz1);
-  assert(buf1 == buf);
-  assert(sz1 > sz);
+  {
+    uintptr_t sz = __sanitizer_get_coverage_pc_buffer_pos();
+    assertNotZeroPcs(buf.get(), sz);
+    assert(sz);
+  }
 
-  bar();
-  uintptr_t *buf2 = NULL;
-  uintptr_t sz2 = __sanitizer_get_coverage_pc_buffer(&buf2);
-  assertNotZeroPcs(buf2, sz2);
-  assert(buf2 == buf);
-  assert(sz2 > sz1);
+  {
+    uintptr_t sz = __sanitizer_get_coverage_pc_buffer_pos();
+    foo();
+    uintptr_t sz1 = __sanitizer_get_coverage_pc_buffer_pos();
+    assertNotZeroPcs(buf.get(), sz1);
+    assert(sz1 > sz);
+  }
 
-  __sanitizer_reset_coverage();
-  uintptr_t *buf3 = NULL;
-  uintptr_t sz3 = __sanitizer_get_coverage_pc_buffer(&buf3);
-  assertNotZeroPcs(buf3, sz3);
-  assert(buf3 == buf);
-  assert(sz3 < sz2);
+  {
+    uintptr_t sz = __sanitizer_get_coverage_pc_buffer_pos();
+    // reset coverage to 0.
+    __sanitizer_reset_coverage();
+    uintptr_t sz1 = __sanitizer_get_coverage_pc_buffer_pos();
+    assertNotZeroPcs(buf.get(), sz1);
+    assert(sz1 < sz);
+  }
 }
diff --git a/test/asan/TestCases/coverage-reset.cc b/test/asan/TestCases/coverage-reset.cc
index eb8da8c..11c5ef6 100644
--- a/test/asan/TestCases/coverage-reset.cc
+++ b/test/asan/TestCases/coverage-reset.cc
@@ -13,6 +13,13 @@
 __attribute__((noinline)) void bar() { sink = 2; }
 __attribute__((noinline)) void foo() { sink = 1; }
 
+// In MSVC 2015, printf is an inline function, which causes this test to fail as
+// it introduces an extra coverage point. Define away printf on that platform to
+// avoid the issue.
+#if _MSC_VER >= 1900
+# define printf(arg, ...)
+#endif
+
 #define GET_AND_PRINT_COVERAGE()                                       \
   bitset = 0;                                                  \
   for (size_t i = 0; i < n_guards; i++)                        \
diff --git a/test/asan/TestCases/coverage-trace-pc.cc b/test/asan/TestCases/coverage-trace-pc.cc
new file mode 100644
index 0000000..c03a6f0
--- /dev/null
+++ b/test/asan/TestCases/coverage-trace-pc.cc
@@ -0,0 +1,31 @@
+// Test -fsanitize-coverage=edge,indirect-call,trace-pc
+// RUN: %clangxx_asan -O0 -DTRACE_RT %s -o %t-rt.o -c
+// RUN: %clangxx_asan -O0 -fsanitize-coverage=edge,trace-pc,indirect-calls %s -o %t %t-rt.o
+// RUN: %run %t
+#ifdef TRACE_RT
+int pc_count;
+void *last_callee;
+extern "C" void __sanitizer_cov_trace_pc() {
+  pc_count++;
+}
+extern "C" void __sanitizer_cov_trace_pc_indir(void *callee) {
+  last_callee = callee;
+}
+#else
+#include <stdio.h>
+#include <assert.h>
+extern int pc_count;
+extern void *last_callee;
+
+__attribute__((noinline)) void foo() { printf("foo\n"); }
+__attribute__((noinline)) void bar() { printf("bar\n"); }
+
+int main(int argc, char **argv) {
+  void (*f)(void) = argc ? foo : bar;
+  int c1 = pc_count;
+  f();
+  int c2 = pc_count;
+  assert(c1 < c2);
+  assert(last_callee == foo);
+}
+#endif
diff --git a/test/asan/TestCases/coverage-tracing.cc b/test/asan/TestCases/coverage-tracing.cc
index b7755f8..278cfb1 100644
--- a/test/asan/TestCases/coverage-tracing.cc
+++ b/test/asan/TestCases/coverage-tracing.cc
@@ -24,7 +24,7 @@
 // RUN: not diff b.points bf.points
 // RUN: rm -rf   %T/coverage-tracing
 //
-// REQUIRES: asan-64-bits
+// REQUIRES: asan-64-bits, shell
 // UNSUPPORTED: android
 
 #include <stdlib.h>
diff --git a/test/asan/TestCases/debug_mapping.cc b/test/asan/TestCases/debug_mapping.cc
index bd05f6a..40083f0 100644
--- a/test/asan/TestCases/debug_mapping.cc
+++ b/test/asan/TestCases/debug_mapping.cc
@@ -6,6 +6,12 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+#if _WIN64
+#define PTR "%llx"
+#else
+#define PTR "%lx"
+#endif
+
 // printed because of verbosity=1
 // CHECK: SHADOW_SCALE: [[SCALE:[0-9]+]]
 // CHECK: SHADOW_OFFSET: [[OFFSET:0x[0-9a-f]+]]
@@ -15,7 +21,7 @@
   __asan_get_shadow_mapping(&scale, &offset);
 
   fprintf(stderr, "scale: %d\n", (int)scale);
-  fprintf(stderr, "offset: 0x%lx\n", offset);
+  fprintf(stderr, "offset: 0x" PTR "\n", (void*)offset);
 
   // CHECK: scale: [[SCALE]]
   // CHECK: offset: [[OFFSET]]
diff --git a/test/asan/TestCases/debug_ppc64_mapping.cc b/test/asan/TestCases/debug_ppc64_mapping.cc
index 753a636..43e1183 100644
--- a/test/asan/TestCases/debug_ppc64_mapping.cc
+++ b/test/asan/TestCases/debug_ppc64_mapping.cc
@@ -1,7 +1,7 @@
 // RUN: %clang_asan -O0 %s -o %t
 // RUN: %env_asan_opts=verbosity=0 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-PPC64-V0
 // RUN: %env_asan_opts=verbosity=2 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-PPC64
-// REQUIRES: powerpc64-supported-target
+// REQUIRES: powerpc64-target-arch
 
 #include <stdio.h>
 
diff --git a/test/asan/TestCases/debug_stacks.cc b/test/asan/TestCases/debug_stacks.cc
index 857e905..7c320bf 100644
--- a/test/asan/TestCases/debug_stacks.cc
+++ b/test/asan/TestCases/debug_stacks.cc
@@ -5,6 +5,12 @@
 // FIXME: Figure out why allocation/free stack traces may be too short on ARM.
 // REQUIRES: stable-runtime
 
+#if _WIN64
+#define PTR "%llx"
+#else
+#define PTR "%lx"
+#endif
+
 #include <sanitizer/asan_interface.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -35,9 +41,9 @@
   // CHECK: alloc stack retval ok
   fprintf(stderr, "thread id = %d\n", thread_id);
   // CHECK: thread id = 0
-  fprintf(stderr, "0x%lx\n", trace[0]);
+  fprintf(stderr, "0x" PTR "\n", trace[0]);
   // CHECK: [[ALLOC_FRAME_0:0x[0-9a-f]+]]
-  fprintf(stderr, "0x%lx\n", trace[1]);
+  fprintf(stderr, "0x" PTR "\n", trace[1]);
   // CHECK: [[ALLOC_FRAME_1:0x[0-9a-f]+]]
 
   num_frames = 100;
@@ -48,9 +54,9 @@
   // CHECK: free stack retval ok
   fprintf(stderr, "thread id = %d\n", thread_id);
   // CHECK: thread id = 0
-  fprintf(stderr, "0x%lx\n", trace[0]);
+  fprintf(stderr, "0x" PTR "\n", trace[0]);
   // CHECK: [[FREE_FRAME_0:0x[0-9a-f]+]]
-  fprintf(stderr, "0x%lx\n", trace[1]);
+  fprintf(stderr, "0x" PTR "\n", trace[1]);
   // CHECK: [[FREE_FRAME_1:0x[0-9a-f]+]]
 
   mem[0] = 'A'; // BOOM
diff --git a/test/asan/TestCases/double-free.cc b/test/asan/TestCases/double-free.cc
index 3297b43..9bd418f 100644
--- a/test/asan/TestCases/double-free.cc
+++ b/test/asan/TestCases/double-free.cc
@@ -4,6 +4,10 @@
 // Also works if no malloc context is available.
 // RUN: %env_asan_opts=malloc_context_size=0:fast_unwind_on_malloc=0 not %run %t 2>&1 | FileCheck %s
 // RUN: %env_asan_opts=malloc_context_size=0:fast_unwind_on_malloc=1 not %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_asan -O0 -fsanitize-recover=address %s -o %t 2>&1
+// RUN: %env_asan_opts=halt_on_error=false %run %t 2>&1 | FileCheck %s --check-prefix CHECK-RECOVER
+
 // XFAIL: arm-linux-gnueabi
 // XFAIL: armv7l-unknown-linux-gnueabihf
 
@@ -23,5 +27,7 @@
   // MALLOC-CTX: #1 0x{{.*}} in main {{.*}}double-free.cc:[[@LINE-7]]
   // CHECK: allocated by thread T0 here:
   // MALLOC-CTX: double-free.cc:[[@LINE-12]]
+  // CHECK-RECOVER: AddressSanitizer: attempting double-free{{.*}}in thread T0
+  // CHECK-RECOVER-NOT: AddressSanitizer CHECK failed:
   return res;
 }
diff --git a/test/asan/TestCases/ill.cc b/test/asan/TestCases/ill.cc
new file mode 100644
index 0000000..887f506
--- /dev/null
+++ b/test/asan/TestCases/ill.cc
@@ -0,0 +1,32 @@
+// Test the handle_sigill option.
+//
+// RUN: %clangxx_asan %s -o %t && %env_asan_opts=handle_sigill=0 not --crash %run %t 2>&1 | FileCheck %s --check-prefix=CHECK0
+// RUN: %clangxx_asan %s -o %t && %env_asan_opts=handle_sigill=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1
+// REQUIRES: x86-target-arch
+// UNSUPPORTED: darwin
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+int main(int argc, char **argv) {
+#ifdef _WIN32
+  // Sometimes on Windows this test generates a WER fault dialog. Suppress that.
+  UINT new_flags = SEM_FAILCRITICALERRORS |
+                   SEM_NOGPFAULTERRORBOX |
+                   SEM_NOOPENFILEERRORBOX;
+  // Preserve existing error mode, as discussed at
+  // http://blogs.msdn.com/oldnewthing/archive/2004/07/27/198410.aspx
+  UINT existing_flags = SetErrorMode(new_flags);
+  SetErrorMode(existing_flags | new_flags);
+#endif
+
+  if (argc)
+    __builtin_trap();
+  // Unreachable code to avoid confusing the Windows unwinder.
+#ifdef _WIN32
+  SetErrorMode(0);
+#endif
+}
+// CHECK0-NOT: ERROR: AddressSanitizer
+// CHECK1: ERROR: AddressSanitizer: {{ILL|illegal-instruction}} on unknown address {{0x0*}}
diff --git a/test/asan/TestCases/intra-object-overflow.cc b/test/asan/TestCases/intra-object-overflow.cc
index e48a261..4032cc1 100644
--- a/test/asan/TestCases/intra-object-overflow.cc
+++ b/test/asan/TestCases/intra-object-overflow.cc
@@ -4,6 +4,8 @@
 //
 // FIXME: fix 32-bits.
 // REQUIRES: asan-64-bits
+// FIXME: Implement ASan intra-object padding in Clang's MS record layout
+// UNSUPPORTED: win32
 #include <stdio.h>
 #include <stdlib.h>
 class Foo {
diff --git a/test/asan/TestCases/invalid-pointer-pairs.cc b/test/asan/TestCases/invalid-pointer-pairs.cc
new file mode 100644
index 0000000..b36e6cd
--- /dev/null
+++ b/test/asan/TestCases/invalid-pointer-pairs.cc
@@ -0,0 +1,44 @@
+// RUN: %clangxx_asan -O0 %s -o %t -mllvm -asan-detect-invalid-pointer-pair
+
+// RUN: %env_asan_opts=detect_invalid_pointer_pairs=1 %run %t k 2>&1 | FileCheck %s -check-prefix=OK -allow-empty
+// RUN: %env_asan_opts=detect_invalid_pointer_pairs=1 not %run %t g 2>&1 | FileCheck %s -check-prefix=CMP -check-prefix=ALL-ERRORS
+// RUN: %env_asan_opts=detect_invalid_pointer_pairs=1 not %run %t s 2>&1 | FileCheck %s -check-prefix=SUB -check-prefix=ALL-ERRORS
+// RUN: %env_asan_opts=detect_invalid_pointer_pairs=1 not %run %t f 2>&1 | FileCheck %s -check-prefix=FREE -check-prefix=ALL-ERRORS
+
+#include <assert.h>
+#include <stdlib.h>
+
+int f(char c, char *p, char *q) {
+  // ALL-ERRORS: ERROR: AddressSanitizer: invalid-pointer-pair
+  // [[PTR1:0x[0-9a-f]+]] [[PTR2:0x[0-9a-f]+]]
+  switch (c) {
+  case 'g':
+    // CMP: #{{[0-9]+ .*}} in f({{char, char\*, char\*|char,char \*,char \*}}) {{.*}}invalid-pointer-pairs.cc:[[@LINE+1]]:14
+    return p > q;
+  case 's':
+    // SUB: #{{[0-9]+ .*}} in f({{char, char\*, char\*|char,char \*,char \*}}) {{.*}}invalid-pointer-pairs.cc:[[@LINE+1]]:14
+    return p - q;
+  case 'k': {
+    // OK-NOT: ERROR
+    char *p2 = p + 20;
+    return p > p2;
+  }
+  case 'f': {
+    char *p3 = p + 20;
+    free(p);
+    // FREE: #{{[0-9]+ .*}} in f({{char, char\*, char\*|char,char \*,char \*}}) {{.*}}invalid-pointer-pairs.cc:[[@LINE+2]]:14
+    // FREE: freed by thread
+    return p < p3;
+  }
+  }
+  assert(0);
+}
+
+int main(int argc, char **argv) {
+  char *p = (char *)malloc(42);
+  char *q = (char *)malloc(42);
+  assert(argc >= 2);
+  f(argv[1][0], p, q);
+  free(p);
+  free(q);
+}
diff --git a/test/asan/TestCases/large_func_test.cc b/test/asan/TestCases/large_func_test.cc
index 6b592f8..8d9afae 100644
--- a/test/asan/TestCases/large_func_test.cc
+++ b/test/asan/TestCases/large_func_test.cc
@@ -49,5 +49,5 @@
   // CHECK-Linux: {{    #0 0x.* in operator new.*}}
   // CHECK-Darwin: {{    #0 0x.* in .*_Zna.*}}
   // CHECK: {{    #1 0x.* in main .*large_func_test.cc:}}[[@LINE-7]]
-  delete x;
+  delete[] x;
 }
diff --git a/test/asan/TestCases/printf-2.c b/test/asan/TestCases/printf-2.c
index 4b5ae13..0544847 100644
--- a/test/asan/TestCases/printf-2.c
+++ b/test/asan/TestCases/printf-2.c
@@ -1,9 +1,9 @@
 // RUN: %clang_asan -O2 %s -o %t
-// We need replace_str=0 and replace_intrin=0 to avoid reporting errors in
-// strlen() and memcpy() called by printf().
-// RUN: %env_asan_opts=replace_str=0:replace_intrin=0:check_printf=1 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
-// RUN: %env_asan_opts=replace_str=0:replace_intrin=0:check_printf=0 %run %t 2>&1 | FileCheck --check-prefix=CHECK-OFF %s
-// RUN: %env_asan_opts=replace_str=0:replace_intrin=0 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
+// We need replace_str=0, intercept_strlen=0 and replace_intrin=0 to avoid
+// reporting errors in strlen() and memcpy() called by printf().
+// RUN: %env_asan_opts=replace_str=0:intercept_strlen=0:replace_intrin=0:check_printf=1 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
+// RUN: %env_asan_opts=replace_str=0:intercept_strlen=0:replace_intrin=0:check_printf=0 %run %t 2>&1 | FileCheck --check-prefix=CHECK-OFF %s
+// RUN: %env_asan_opts=replace_str=0:intercept_strlen=0:replace_intrin=0 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
 
 // FIXME: printf is not intercepted on Windows yet.
 // XFAIL: win32
diff --git a/test/asan/TestCases/printf-4.c b/test/asan/TestCases/printf-4.c
index 13bfc87..5a883fe 100644
--- a/test/asan/TestCases/printf-4.c
+++ b/test/asan/TestCases/printf-4.c
@@ -1,10 +1,8 @@
 // RUN: %clang_asan -O2 %s -o %t
-// We need replace_str=0 and replace_intrin=0 to avoid reporting errors in
-// strlen() and memcpy() called by puts().
-// RUN: %env_asan_opts=replace_str=0:replace_intrin=0:check_printf=1 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
-// RUN: %env_asan_opts=replace_str=0:replace_intrin=0 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
+// RUN: %env_asan_opts=check_printf=1 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
+// RUN: not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
 
-// FIXME: printf is not intercepted on Windows yet.
+// FIXME: sprintf is not intercepted on Windows yet.
 // XFAIL: win32
 
 #include <stdio.h>
@@ -14,10 +12,14 @@
   volatile float f = 1.239;
   volatile char s[] = "34";
   volatile char buf[2];
+  fputs("before sprintf\n", stderr);
   sprintf((char *)buf, "%c %d %.3f %s\n", c, x, f, s);
-  puts((const char *)buf);
+  fputs("after sprintf", stderr);
+  fputs((const char *)buf, stderr);
   return 0;
   // Check that size of output buffer is sanitized.
+  // CHECK-ON: before sprintf
+  // CHECK-ON-NOT: after sprintf
   // CHECK-ON: stack-buffer-overflow
   // CHECK-ON-NOT: 0 12 1.239 34
 }
diff --git a/test/asan/TestCases/stack-oob-frames.cc b/test/asan/TestCases/stack-oob-frames.cc
index 00db4b3..3b5d511 100644
--- a/test/asan/TestCases/stack-oob-frames.cc
+++ b/test/asan/TestCases/stack-oob-frames.cc
@@ -4,9 +4,6 @@
 // RUN: not %run %t 2 2>&1 | FileCheck %s --check-prefix=CHECK2
 // RUN: not %run %t 3 2>&1 | FileCheck %s --check-prefix=CHECK3
 
-// FIXME: Symbolization problems.
-// XFAIL: win32
-
 #define NOINLINE __attribute__((noinline))
 inline void break_optimization(void *arg) {
   __asm__ __volatile__("" : : "r" (arg) : "memory");
diff --git a/test/asan/TestCases/strcasestr-2.c b/test/asan/TestCases/strcasestr-2.c
index cca6d20..47fd692 100644
--- a/test/asan/TestCases/strcasestr-2.c
+++ b/test/asan/TestCases/strcasestr-2.c
@@ -3,7 +3,7 @@
 
 // Test intercept_strstr asan option
 // Disable other interceptors because strlen may be called inside strcasestr
-// RUN: %env_asan_opts=intercept_strstr=false:replace_str=false %run %t 2>&1
+// RUN: %env_asan_opts=intercept_strstr=false:replace_str=false:intercept_strlen=false %run %t 2>&1
 
 // There's no interceptor for strcasestr on Windows
 // XFAIL: win32
diff --git a/test/asan/TestCases/strdup_oob_test.cc b/test/asan/TestCases/strdup_oob_test.cc
index a039568..492555a 100644
--- a/test/asan/TestCases/strdup_oob_test.cc
+++ b/test/asan/TestCases/strdup_oob_test.cc
@@ -3,6 +3,12 @@
 // RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s
 
+// When built as C on Linux, strdup is transformed to __strdup.
+// RUN: %clangxx_asan -O3 -xc %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+// Unwind problem on arm: "main" is missing from the allocation stack trace.
+// UNSUPPORTED: armv7l-unknown-linux-gnueabihf
+
 #include <string.h>
 
 char kString[] = "foo";
@@ -14,7 +20,8 @@
   // CHECK: #0 {{.*}}main {{.*}}strdup_oob_test.cc:[[@LINE-2]]
   // CHECK-LABEL: allocated by thread T{{.*}} here:
   // CHECK: #{{[01]}} {{.*}}strdup
+  // CHECK: #{{.*}}main {{.*}}strdup_oob_test.cc:[[@LINE-6]]
   // CHECK-LABEL: SUMMARY
-  // CHECK: strdup_oob_test.cc:[[@LINE-6]]
+  // CHECK: strdup_oob_test.cc:[[@LINE-7]]
   return x;
 }
diff --git a/test/asan/TestCases/strstr-2.c b/test/asan/TestCases/strstr-2.c
index edb7008..8bc6e99 100644
--- a/test/asan/TestCases/strstr-2.c
+++ b/test/asan/TestCases/strstr-2.c
@@ -3,7 +3,7 @@
 
 // Test intercept_strstr asan option
 // Disable other interceptors because strlen may be called inside strstr
-// RUN: %env_asan_opts=intercept_strstr=false:replace_str=false %run %t 2>&1
+// RUN: %env_asan_opts=intercept_strstr=false:replace_str=false:intercept_strlen=false %run %t 2>&1
 
 #include <assert.h>
 #include <string.h>
diff --git a/test/asan/TestCases/throw_call_test.cc b/test/asan/TestCases/throw_call_test.cc
index 4b3910d..5a8204a 100644
--- a/test/asan/TestCases/throw_call_test.cc
+++ b/test/asan/TestCases/throw_call_test.cc
@@ -5,9 +5,6 @@
 // Android builds with static libstdc++ by default.
 // XFAIL: android
 
-// Clang doesn't support exceptions on Windows yet.
-// XFAIL: win32
-
 #include <stdio.h>
 static volatile int zero = 0;
 inline void pretend_to_do_something(void *x) {
diff --git a/test/asan/TestCases/throw_catch.cc b/test/asan/TestCases/throw_catch.cc
index bce4819..0108351 100644
--- a/test/asan/TestCases/throw_catch.cc
+++ b/test/asan/TestCases/throw_catch.cc
@@ -1,8 +1,5 @@
 // RUN: %clangxx_asan -O %s -o %t && %run %t
 
-// Clang doesn't support exceptions on Windows yet.
-// XFAIL: win32
-
 #include <assert.h>
 #include <stdio.h>
 #include <sanitizer/asan_interface.h>
diff --git a/test/asan/TestCases/throw_invoke_test.cc b/test/asan/TestCases/throw_invoke_test.cc
index ec48fc7..e6e91d1 100644
--- a/test/asan/TestCases/throw_invoke_test.cc
+++ b/test/asan/TestCases/throw_invoke_test.cc
@@ -1,8 +1,5 @@
 // RUN: %clangxx_asan %s -o %t && %run %t
-// RUN: %clangxx_asan %s -o %t -static-libstdc++ && %run %t
-
-// Clang doesn't support exceptions on Windows yet.
-// XFAIL: win32
+// RUN: %clangxx_asan %s -o %t -stdlib=libstdc++ -static-libstdc++ && %run %t
 
 #include <stdio.h>
 static volatile int zero = 0;
diff --git a/test/asan/TestCases/uar_and_exceptions.cc b/test/asan/TestCases/uar_and_exceptions.cc
index 324e8a5..2357ae8 100644
--- a/test/asan/TestCases/uar_and_exceptions.cc
+++ b/test/asan/TestCases/uar_and_exceptions.cc
@@ -2,9 +2,6 @@
 // RUN: %clangxx_asan -O0 %s -o %t
 // RUN: %env_asan_opts=detect_stack_use_after_return=1 %run %t
 
-// Clang doesn't support exceptions on Windows yet.
-// XFAIL: win32
-
 #include <stdio.h>
 
 volatile char *g;
diff --git a/test/asan/TestCases/use-after-scope-capture.cc b/test/asan/TestCases/use-after-scope-capture.cc
new file mode 100644
index 0000000..113ce35
--- /dev/null
+++ b/test/asan/TestCases/use-after-scope-capture.cc
@@ -0,0 +1,17 @@
+// RUN: %clangxx_asan %stdcxx11 -O1 -fsanitize-address-use-after-scope %s -o %t && \
+// RUN:     not %run %t 2>&1 | FileCheck %s
+
+#include <functional>
+
+int main() {
+  std::function<int()> f;
+  {
+    int x = 0;
+    f = [&x]() {
+      return x;  // BOOM
+      // CHECK: ERROR: AddressSanitizer: stack-use-after-scope
+      // CHECK: #0 0x{{.*}} in {{.*}}use-after-scope-capture.cc:[[@LINE-2]]
+    };
+  }
+  return f();  // BOOM
+}
diff --git a/test/asan/TestCases/use-after-scope-dtor-order.cc b/test/asan/TestCases/use-after-scope-dtor-order.cc
index 7896dd3..8cdfa6a 100644
--- a/test/asan/TestCases/use-after-scope-dtor-order.cc
+++ b/test/asan/TestCases/use-after-scope-dtor-order.cc
@@ -1,6 +1,6 @@
-// RUN: %clangxx_asan -O0 -fsanitize=use-after-scope %s -o %t && \
+// RUN: %clangxx_asan -O1 -fsanitize-address-use-after-scope %s -o %t && \
 // RUN:     not %run %t 2>&1 | FileCheck %s
-// XFAIL: *
+
 #include <stdio.h>
 
 struct IntHolder {
@@ -8,7 +8,7 @@
   ~IntHolder() {
     printf("Value: %d\n", *val_);  // BOOM
     // CHECK: ERROR: AddressSanitizer: stack-use-after-scope
-    // CHECK:  #0 0x{{.*}} in IntHolder::~IntHolder{{.*}}use-after-scope-dtor-order.cc:[[@LINE-2]]
+    // CHECK:  #0 0x{{.*}} in IntHolder::~IntHolder{{.*}}.cc:[[@LINE-2]]
   }
   void set(int *val) { val_ = val; }
   int *get() { return val_; }
diff --git a/test/asan/TestCases/use-after-scope-if.cc b/test/asan/TestCases/use-after-scope-if.cc
new file mode 100644
index 0000000..8180077
--- /dev/null
+++ b/test/asan/TestCases/use-after-scope-if.cc
@@ -0,0 +1,15 @@
+// RUN: %clangxx_asan -O1 -fsanitize-address-use-after-scope %s -o %t && \
+// RUN:     not %run %t 2>&1 | FileCheck %s
+
+int *p;
+bool b = true;
+
+int main() {
+  if (b) {
+    int x[5];
+    p = x+1;
+  }
+  return *p;  // BOOM
+  // CHECK: ERROR: AddressSanitizer: stack-use-after-scope
+  // CHECK:  #0 0x{{.*}} in main {{.*}}.cc:[[@LINE-2]]
+}
diff --git a/test/asan/TestCases/use-after-scope-inlined.cc b/test/asan/TestCases/use-after-scope-inlined.cc
index a0a0d94..fc8c7f7 100644
--- a/test/asan/TestCases/use-after-scope-inlined.cc
+++ b/test/asan/TestCases/use-after-scope-inlined.cc
@@ -2,8 +2,8 @@
 // happens. "always_inline" is not enough, as Clang doesn't emit
 // llvm.lifetime intrinsics at -O0.
 //
-// RUN: %clangxx_asan -O2 -fsanitize=use-after-scope %s -o %t && not %run %t 2>&1 | FileCheck %s
-// XFAIL: *
+// RUN: %clangxx_asan -O2 -fsanitize-address-use-after-scope %s -o %t && \
+// RUN:     not %run %t 2>&1 | FileCheck %s
 
 int *arr;
 
diff --git a/test/asan/TestCases/use-after-scope-loop-bug.cc b/test/asan/TestCases/use-after-scope-loop-bug.cc
new file mode 100644
index 0000000..4f483f2
--- /dev/null
+++ b/test/asan/TestCases/use-after-scope-loop-bug.cc
@@ -0,0 +1,17 @@
+// RUN: %clangxx_asan -O1 -fsanitize-address-use-after-scope %s -o %t && \
+// RUN:     not %run %t 2>&1 | FileCheck %s
+
+int *p;
+
+int main() {
+  // Variable goes in and out of scope.
+  for (int i = 0; i < 3; ++i) {
+    int x[3] = {i, i, i};
+    p = x + i;
+  }
+  return *p;  // BOOM
+  // CHECK: ERROR: AddressSanitizer: stack-use-after-scope
+  // CHECK:  #0 0x{{.*}} in main {{.*}}use-after-scope-loop-bug.cc:[[@LINE-2]]
+  // CHECK: Address 0x{{.*}} is located in stack of thread T{{.*}} at offset [[OFFSET:[^ ]+]] in frame
+  // {{\[}}[[OFFSET]], {{[0-9]+}}) 'x'
+}
diff --git a/test/asan/TestCases/use-after-scope-loop-removed.cc b/test/asan/TestCases/use-after-scope-loop-removed.cc
new file mode 100644
index 0000000..730bf3a
--- /dev/null
+++ b/test/asan/TestCases/use-after-scope-loop-removed.cc
@@ -0,0 +1,18 @@
+// RUN: %clangxx_asan -O1 -fsanitize-address-use-after-scope %s -o %t && \
+// RUN:     not %run %t 2>&1 | FileCheck %s
+
+#include <stdlib.h>
+
+int *p;
+
+int main() {
+  for (int i = 0; i < 3; i++) {
+    int x;
+    p = &x;
+  }
+  return *p;  // BOOM
+  // CHECK: ERROR: AddressSanitizer: stack-use-after-scope
+  // CHECK:  #0 0x{{.*}} in main {{.*}}use-after-scope-loop-removed.cc:[[@LINE-2]]
+  // CHECK: Address 0x{{.*}} is located in stack of thread T{{.*}} at offset [[OFFSET:[^ ]+]] in frame
+  // {{\[}}[[OFFSET]], {{[0-9]+}}) 'x'
+}
diff --git a/test/asan/TestCases/use-after-scope-loop.cc b/test/asan/TestCases/use-after-scope-loop.cc
new file mode 100644
index 0000000..d99761b
--- /dev/null
+++ b/test/asan/TestCases/use-after-scope-loop.cc
@@ -0,0 +1,14 @@
+// RUN: %clangxx_asan -O1 -fsanitize-address-use-after-scope %s -o %t && \
+// RUN:     not %run %t 2>&1 | FileCheck %s
+
+int *p[3];
+
+int main() {
+  for (int i = 0; i < 3; i++) {
+    int x;
+    p[i] = &x;
+  }
+  return **p;  // BOOM
+  // CHECK: ERROR: AddressSanitizer: stack-use-after-scope
+  // CHECK: #0 0x{{.*}} in main {{.*}}.cc:[[@LINE-2]]
+}
diff --git a/test/asan/TestCases/use-after-scope-nobug.cc b/test/asan/TestCases/use-after-scope-nobug.cc
index 21b085c..cf471dc 100644
--- a/test/asan/TestCases/use-after-scope-nobug.cc
+++ b/test/asan/TestCases/use-after-scope-nobug.cc
@@ -1,14 +1,15 @@
-// RUN: %clangxx_asan -O0 -fsanitize=use-after-scope %s -o %t && %run %t
-// XFAIL: *
+// RUN: %clangxx_asan -O1 -fsanitize-address-use-after-scope %s -o %t && %run %t
 
 #include <stdio.h>
+#include <stdlib.h>
+
+int *p[3];
 
 int main() {
-  int *p = 0;
   // Variable goes in and out of scope.
   for (int i = 0; i < 3; i++) {
-    int x = 0;
-    p = &x;
+    int x;
+    p[i] = &x;
   }
   printf("PASSED\n");
   return 0;
diff --git a/test/asan/TestCases/use-after-scope-temp.cc b/test/asan/TestCases/use-after-scope-temp.cc
index f9bd779..4dcef59 100644
--- a/test/asan/TestCases/use-after-scope-temp.cc
+++ b/test/asan/TestCases/use-after-scope-temp.cc
@@ -1,15 +1,8 @@
-// RUN: %clangxx_asan -O0 -fsanitize=use-after-scope %s -o %t && \
-// RUN:     %run %t 2>&1 | FileCheck %s
-//
-// Lifetime for temporaries is not emitted yet.
-// XFAIL: *
+// RUN: %clangxx_asan %stdcxx11 -O1 -fsanitize-address-use-after-scope %s -o %t && \
+// RUN:     not %run %t 2>&1 | FileCheck %s
 
-#include <stdio.h>
 
 struct IntHolder {
-  explicit IntHolder(int val) : val(val) {
-    printf("IntHolder: %d\n", val);
-  }
   int val;
 };
 
@@ -20,10 +13,9 @@
 }
 
 int main(int argc, char *argv[]) {
-  save(IntHolder(10));
+  save({argc});
   int x = saved->val;  // BOOM
   // CHECK: ERROR: AddressSanitizer: stack-use-after-scope
   // CHECK:  #0 0x{{.*}} in main {{.*}}use-after-scope-temp.cc:[[@LINE-2]]
-  printf("saved value: %d\n", x);
-  return 0;
+  return x;
 }
diff --git a/test/asan/TestCases/use-after-scope-types.cc b/test/asan/TestCases/use-after-scope-types.cc
new file mode 100644
index 0000000..b213681
--- /dev/null
+++ b/test/asan/TestCases/use-after-scope-types.cc
@@ -0,0 +1,76 @@
+// RUN: %clangxx_asan %stdcxx11 -O0 -fsanitize-address-use-after-scope %s -o %t
+// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 0 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 1 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 2 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 3 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 4 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 5 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 6 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 7 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 8 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 9 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 10 2>&1 | FileCheck %s
+
+// RUN: %env_asan_opts=detect_stack_use_after_scope=0 %run %t 11
+
+#include <stdlib.h>
+#include <string>
+#include <vector>
+
+template <class T> struct Ptr {
+  void Store(T *ptr) { t = ptr; }
+
+  void Access() { *t = {}; }
+
+  T *t;
+};
+
+template <class T, size_t N> struct Ptr<T[N]> {
+  using Type = T[N];
+  void Store(Type *ptr) { t = *ptr; }
+
+  void Access() { *t = {}; }
+
+  T *t;
+};
+
+template <class T> void test() {
+  Ptr<T> ptr;
+  {
+    T x;
+    ptr.Store(&x);
+  }
+
+  ptr.Access();
+  // CHECK: ERROR: AddressSanitizer: stack-use-after-scope
+  // CHECK:  #{{[0-9]+}} 0x{{.*}} in {{(void )?test.*\((void)?\) .*}}use-after-scope-types.cc
+  // CHECK: Address 0x{{.*}} is located in stack of thread T{{.*}} at offset [[OFFSET:[^ ]+]] in frame
+  // {{\[}}[[OFFSET]], {{[0-9]+}}) 'x'
+}
+
+int main(int argc, char **argv) {
+  using Tests = void (*)();
+  Tests tests[] = {
+    &test<bool>,
+    &test<char>,
+    &test<int>,
+    &test<double>,
+    &test<float>,
+    &test<void*>,
+    &test<std::vector<std::string>>,
+    &test<int[3]>,
+    &test<int[1000]>,
+    &test<char[3]>,
+    &test<char[1000]>,
+  };
+
+  int n = atoi(argv[1]);
+  if (n == sizeof(tests) / sizeof(tests[0])) {
+    for (auto te : tests)
+      te();
+  } else {
+    tests[n]();
+  }
+
+  return 0;
+}
diff --git a/test/asan/TestCases/use-after-scope.cc b/test/asan/TestCases/use-after-scope.cc
index 59a0e0c..f669bf9 100644
--- a/test/asan/TestCases/use-after-scope.cc
+++ b/test/asan/TestCases/use-after-scope.cc
@@ -1,17 +1,19 @@
-// RUN: %clangxx_asan -O0 -fsanitize=use-after-scope %s -o %t && \
-// RUN: not %run %t 2>&1 | FileCheck %s
-// RUN: %env_asan_opts=detect_stack_use_after_return=1 not %run %t 2>&1 | FileCheck %s
-// XFAIL: *
+// RUN: %clangxx_asan -O1 -fsanitize-address-use-after-scope %s -o %t && \
+// RUN:     %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 2>&1 | FileCheck %s
+
+// RUN: %env_asan_opts=detect_stack_use_after_scope=0 %run %t
+
+volatile int *p = 0;
 
 int main() {
-  int *p = 0;
   {
     int x = 0;
     p = &x;
   }
-  return *p;  // BOOM
+  *p = 5;  // BOOM
   // CHECK: ERROR: AddressSanitizer: stack-use-after-scope
   // CHECK:  #0 0x{{.*}} in main {{.*}}use-after-scope.cc:[[@LINE-2]]
   // CHECK: Address 0x{{.*}} is located in stack of thread T{{.*}} at offset [[OFFSET:[^ ]+]] in frame
   // {{\[}}[[OFFSET]], {{[0-9]+}}) 'x'
+  return 0;
 }
diff --git a/test/asan/Unit/lit.site.cfg.in b/test/asan/Unit/lit.site.cfg.in
index b599102..55631a6 100644
--- a/test/asan/Unit/lit.site.cfg.in
+++ b/test/asan/Unit/lit.site.cfg.in
@@ -1,5 +1,4 @@
-## Autogenerated by LLVM/Clang configuration.
-# Do not edit!
+@LIT_SITE_CFG_IN_HEADER@
 
 import os
 
diff --git a/test/asan/android_commands/android_common.py b/test/asan/android_commands/android_common.py
index 43ac7b4..1a295b7 100644
--- a/test/asan/android_commands/android_common.py
+++ b/test/asan/android_commands/android_common.py
@@ -8,15 +8,30 @@
 if os.environ.get('ANDROID_RUN_VERBOSE') == '1':
     verbose = True
 
-def adb(args):
+def adb(args, attempts = 1):
     if verbose:
         print args
-    devnull = open(os.devnull, 'w')
-    return subprocess.call([ADB] + args, stdout=devnull, stderr=subprocess.STDOUT)
+    tmpname = tempfile.mktemp()
+    out = open(tmpname, 'w')
+    ret = 255
+    while attempts > 0 and ret != 0:
+      attempts -= 1
+      ret = subprocess.call([ADB] + args, stdout=out, stderr=subprocess.STDOUT)
+      if attempts != 0:
+        ret = 5
+    if ret != 0:
+      print "adb command failed", args
+      print tmpname
+      out.close()
+      out = open(tmpname, 'r')
+      print out.read()
+    out.close()
+    os.unlink(tmpname)
+    return ret
 
 def pull_from_device(path):
     tmp = tempfile.mktemp()
-    adb(['pull', path, tmp])
+    adb(['pull', path, tmp], 5)
     text = open(tmp, 'r').read()
     os.unlink(tmp)
     return text
@@ -25,5 +40,5 @@
     # Workaround for https://code.google.com/p/android/issues/detail?id=65857
     dst_path = os.path.join(ANDROID_TMPDIR, os.path.basename(path))
     tmp_path = dst_path + '.push'
-    adb(['push', path, tmp_path])
-    adb(['shell', 'cp "%s" "%s" 2>&1' % (tmp_path, dst_path)])
+    adb(['push', path, tmp_path], 5)
+    adb(['shell', 'cp "%s" "%s" 2>&1' % (tmp_path, dst_path)], 5)
diff --git a/test/asan/lit.cfg b/test/asan/lit.cfg
index 8355470..f2c0c87 100644
--- a/test/asan/lit.cfg
+++ b/test/asan/lit.cfg
@@ -73,6 +73,8 @@
                             "-fno-omit-frame-pointer",
                             "-fno-optimize-sibling-calls"] +
                             config.debug_info_flags + target_cflags)
+if config.target_arch == 's390x':
+  clang_asan_static_cflags.append("-mbackchain")
 clang_asan_static_cxxflags = config.cxx_mode_flags + clang_asan_static_cflags
 
 if config.asan_dynamic:
@@ -122,6 +124,14 @@
   config.substitutions.append( ("%asan_cxx_lib", base_lib % "_cxx") )
   config.substitutions.append( ("%asan_dll_thunk", base_lib % "_dll_thunk") )
 
+if platform.system() == 'Windows':
+  # Don't use -std=c++11 on Windows, as the driver will detect the appropriate
+  # default needed to use with the STL.
+  config.substitutions.append(("%stdcxx11 ", ""))
+else:
+  # Some tests uses C++11 features such as lambdas and need to pass -std=c++11.
+  config.substitutions.append(("%stdcxx11 ", "-std=c++11 "))
+
 # FIXME: De-hardcode this path.
 asan_source_dir = os.path.join(
   get_required_attr(config, "compiler_rt_src_root"), "lib", "asan")
@@ -138,7 +148,7 @@
 if not os.path.exists(sancov):
   lit_config.fatal("Can't find script on path %r" % sancov)
 python_exec = get_required_attr(config, "python_executable")
-config.substitutions.append( ("%sancov", python_exec + " " + sancov + " ") )
+config.substitutions.append( ("%sancov ", python_exec + " " + sancov + " ") )
 
 # Determine kernel bitness
 if config.host_arch.find('64') != -1 and config.android != "1":
diff --git a/test/asan/lit.site.cfg.in b/test/asan/lit.site.cfg.in
index 1e83c74..1b6fed2 100644
--- a/test/asan/lit.site.cfg.in
+++ b/test/asan/lit.site.cfg.in
@@ -1,5 +1,4 @@
-## Autogenerated by LLVM/Clang configuration.
-# Do not edit!
+@LIT_SITE_CFG_IN_HEADER@
 
 # Tool-specific config options.
 config.name_suffix = "@ASAN_TEST_CONFIG_SUFFIX@"
diff --git a/test/builtins/Unit/cpu_model_test.c b/test/builtins/Unit/cpu_model_test.c
new file mode 100644
index 0000000..5a918bd
--- /dev/null
+++ b/test/builtins/Unit/cpu_model_test.c
@@ -0,0 +1,19 @@
+//===-- cpu_model_test.c - Test __builtin_cpu_supports -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __builtin_cpu_supports for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+int main (void) {
+  if(__builtin_cpu_supports("avx2"))
+    return 4;
+  else
+    return 3;
+}
diff --git a/test/cfi/CMakeLists.txt b/test/cfi/CMakeLists.txt
index cbd9a3c..4c4deba 100644
--- a/test/cfi/CMakeLists.txt
+++ b/test/cfi/CMakeLists.txt
@@ -1,6 +1,13 @@
+set(CFI_LIT_TEST_MODE Standalone)
 configure_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+  ${CMAKE_CURRENT_BINARY_DIR}/Standalone/lit.site.cfg
+  )
+
+set(CFI_LIT_TEST_MODE Devirt)
+configure_lit_site_cfg(
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+  ${CMAKE_CURRENT_BINARY_DIR}/Devirt/lit.site.cfg
   )
 
 set(CFI_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
@@ -32,12 +39,15 @@
 endif()
 
 add_lit_testsuite(check-cfi "Running the cfi regression tests"
-  ${CMAKE_CURRENT_BINARY_DIR}
+  ${CMAKE_CURRENT_BINARY_DIR}/Standalone
+  ${CMAKE_CURRENT_BINARY_DIR}/Devirt
   DEPENDS ${CFI_TEST_DEPS})
 
 add_lit_target(check-cfi-and-supported "Running the cfi regression tests"
-  ${CMAKE_CURRENT_BINARY_DIR}
+  ${CMAKE_CURRENT_BINARY_DIR}/Standalone
+  ${CMAKE_CURRENT_BINARY_DIR}/Devirt
   PARAMS check_supported=1
   DEPENDS ${CFI_TEST_DEPS})
 
-set_target_properties(check-cfi PROPERTIES FOLDER "Tests")
+set_target_properties(check-cfi PROPERTIES FOLDER "Compiler-RT Misc")
+set_target_properties(check-cfi-and-supported PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/cfi/create-derivers.test b/test/cfi/create-derivers.test
index 79521e4..a67562b 100644
--- a/test/cfi/create-derivers.test
+++ b/test/cfi/create-derivers.test
@@ -1,20 +1,20 @@
 REQUIRES: asserts
 
 RUN: %clangxx_cfi -c -o %t1.o %S/simple-fail.cpp
-RUN: opt -lowerbitsets -debug-only=lowerbitsets -o /dev/null %t1.o 2>&1 | FileCheck --check-prefix=B0 %s
+RUN: opt -lowertypetests -debug-only=lowertypetests -o /dev/null %t1.o 2>&1 | FileCheck --check-prefix=B0 %s
 B0: {{1B|B@@}}: {{.*}} size 1
 
 RUN: %clangxx_cfi -DB32 -c -o %t2.o %S/simple-fail.cpp
-RUN: opt -lowerbitsets -debug-only=lowerbitsets -o /dev/null %t2.o 2>&1 | FileCheck --check-prefix=B32 %s
+RUN: opt -lowertypetests -debug-only=lowertypetests -o /dev/null %t2.o 2>&1 | FileCheck --check-prefix=B32 %s
 B32: {{1B|B@@}}: {{.*}} size 24
 B32-NOT: all-ones
 
 RUN: %clangxx_cfi -DB64 -c -o %t3.o %S/simple-fail.cpp
-RUN: opt -lowerbitsets -debug-only=lowerbitsets -o /dev/null %t3.o 2>&1 | FileCheck --check-prefix=B64 %s
+RUN: opt -lowertypetests -debug-only=lowertypetests -o /dev/null %t3.o 2>&1 | FileCheck --check-prefix=B64 %s
 B64: {{1B|B@@}}: {{.*}} size 54
 B64-NOT: all-ones
 
 RUN: %clangxx_cfi -DBM -c -o %t4.o %S/simple-fail.cpp
-RUN: opt -lowerbitsets -debug-only=lowerbitsets -o /dev/null %t4.o 2>&1 | FileCheck --check-prefix=BM %s
+RUN: opt -lowertypetests -debug-only=lowertypetests -o /dev/null %t4.o 2>&1 | FileCheck --check-prefix=BM %s
 BM: {{1B|B@@}}: {{.*}} size 84
 BM-NOT: all-ones
diff --git a/test/cfi/cross-dso/dlopen.cpp b/test/cfi/cross-dso/dlopen.cpp
new file mode 100644
index 0000000..ee4dae2
--- /dev/null
+++ b/test/cfi/cross-dso/dlopen.cpp
@@ -0,0 +1,147 @@
+// RUN: %clangxx_cfi_dso -DSHARED_LIB %s -fPIC -shared -o %t1-so.so
+// RUN: %clangxx_cfi_dso %s -o %t1
+// RUN: %expect_crash %t1 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %t1 cast 2>&1 | FileCheck --check-prefix=CFI-CAST %s
+// RUN: %expect_crash %t1 dlclose 2>&1 | FileCheck --check-prefix=CFI %s
+
+// RUN: %clangxx_cfi_dso -DB32 -DSHARED_LIB %s -fPIC -shared -o %t2-so.so
+// RUN: %clangxx_cfi_dso -DB32 %s -o %t2
+// RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %t2 cast 2>&1 | FileCheck --check-prefix=CFI-CAST %s
+// RUN: %expect_crash %t2 dlclose 2>&1 | FileCheck --check-prefix=CFI %s
+
+// RUN: %clangxx_cfi_dso -DB64 -DSHARED_LIB %s -fPIC -shared -o %t3-so.so
+// RUN: %clangxx_cfi_dso -DB64 %s -o %t3
+// RUN: %expect_crash %t3 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %t3 cast 2>&1 | FileCheck --check-prefix=CFI-CAST %s
+// RUN: %expect_crash %t3 dlclose 2>&1 | FileCheck --check-prefix=CFI %s
+
+// RUN: %clangxx_cfi_dso -DBM -DSHARED_LIB %s -fPIC -shared -o %t4-so.so
+// RUN: %clangxx_cfi_dso -DBM %s -o %t4
+// RUN: %expect_crash %t4 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %t4 cast 2>&1 | FileCheck --check-prefix=CFI-CAST %s
+// RUN: %expect_crash %t4 dlclose 2>&1 | FileCheck --check-prefix=CFI %s
+
+// RUN: %clangxx -g -DBM -DSHARED_LIB -DNOCFI %s -fPIC -shared -o %t5-so.so
+// RUN: %clangxx -g -DBM -DNOCFI %s -ldl -o %t5
+// RUN: %t5 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %t5 cast 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %t5 dlclose 2>&1 | FileCheck --check-prefix=NCFI %s
+
+// Test that calls to uninstrumented library are unchecked.
+// RUN: %clangxx -DBM -DSHARED_LIB %s -fPIC -shared -o %t6-so.so
+// RUN: %clangxx_cfi_dso -DBM %s -o %t6
+// RUN: %t6 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %t6 cast 2>&1 | FileCheck --check-prefix=NCFI %s
+
+// Call-after-dlclose is checked on the caller side.
+// RUN: %expect_crash %t6 dlclose 2>&1 | FileCheck --check-prefix=CFI %s
+
+// Tests calls into dlopen-ed library.
+// REQUIRES: cxxabi
+
+#include <assert.h>
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include <string>
+
+struct A {
+  virtual void f();
+};
+
+#ifdef SHARED_LIB
+
+#include "../utils.h"
+struct B {
+  virtual void f();
+};
+void B::f() {}
+
+extern "C" void *create_B() {
+  create_derivers<B>();
+  return (void *)(new B());
+}
+
+extern "C" __attribute__((aligned(4096))) void do_nothing() {}
+
+#else
+
+void A::f() {}
+
+static const int kCodeAlign = 4096;
+static const int kCodeSize = 4096;
+static char saved_code[kCodeSize];
+static char *real_start;
+
+static void save_code(char *p) {
+  real_start = (char *)(((uintptr_t)p) & ~(kCodeAlign - 1));
+  memcpy(saved_code, real_start, kCodeSize);
+}
+
+static void restore_code() {
+  char *code = (char *)mmap(real_start, kCodeSize, PROT_WRITE | PROT_EXEC,
+                            MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0, 0);
+  assert(code == real_start);
+  memcpy(code, saved_code, kCodeSize);
+}
+
+int main(int argc, char *argv[]) {
+  const bool test_cast = argc > 1 && strcmp(argv[1], "cast") == 0;
+  const bool test_dlclose = argc > 1 && strcmp(argv[1], "dlclose") == 0;
+
+  std::string name = std::string(argv[0]) + "-so.so";
+  void *handle = dlopen(name.c_str(), RTLD_NOW);
+  assert(handle);
+  void *(*create_B)() = (void *(*)())dlsym(handle, "create_B");
+  assert(create_B);
+
+  void *p = create_B();
+  A *a;
+
+  // CFI: =0=
+  // CFI-CAST: =0=
+  // NCFI: =0=
+  fprintf(stderr, "=0=\n");
+
+  if (test_cast) {
+    // Test cast. BOOM.
+    a = (A*)p;
+  } else {
+    // Invisible to CFI. Test virtual call later.
+    memcpy(&a, &p, sizeof(a));
+  }
+
+  // CFI: =1=
+  // CFI-CAST-NOT: =1=
+  // NCFI: =1=
+  fprintf(stderr, "=1=\n");
+
+  if (test_dlclose) {
+    // Imitate an attacker sneaking in an executable page where a dlclose()d
+    // library was loaded. This needs to pass w/o CFI, so for the testing
+    // purpose, we just copy the bytes of a "void f() {}" function back and
+    // forth.
+    void (*do_nothing)() = (void (*)())dlsym(handle, "do_nothing");
+    assert(do_nothing);
+    save_code((char *)do_nothing);
+
+    int res = dlclose(handle);
+    assert(res == 0);
+
+    restore_code();
+
+    do_nothing(); // UB here
+  } else {
+    a->f(); // UB here
+  }
+
+  // CFI-NOT: =2=
+  // CFI-CAST-NOT: =2=
+  // NCFI: =2=
+  fprintf(stderr, "=2=\n");
+}
+#endif
diff --git a/test/cfi/cross-dso/icall/diag.cpp b/test/cfi/cross-dso/icall/diag.cpp
new file mode 100644
index 0000000..c9ca28c
--- /dev/null
+++ b/test/cfi/cross-dso/icall/diag.cpp
@@ -0,0 +1,159 @@
+// Cross-DSO diagnostics.
+// The rules are:
+// * If the library needs diagnostics, the main executable must request at
+//   least some diagnostics as well (to link the diagnostic runtime).
+// * -fsanitize-trap on the caller side overrides everything.
+// * otherwise, the callee decides between trap/recover/norecover.
+
+// Full-recover.
+// RUN: %clangxx_cfi_dso_diag -g -DSHARED_LIB %s -fPIC -shared -o %t-so.so
+// RUN: %clangxx_cfi_dso_diag -g %s -o %t %t-so.so
+
+// RUN: %t icv 2>&1 | FileCheck %s --check-prefix=ICALL-DIAG --check-prefix=CAST-DIAG \
+// RUN:                            --check-prefix=VCALL-DIAG --check-prefix=ALL-RECOVER
+
+// RUN: %t i_v 2>&1 | FileCheck %s --check-prefix=ICALL-DIAG --check-prefix=CAST-NODIAG \
+// RUN:                            --check-prefix=VCALL-DIAG --check-prefix=ALL-RECOVER
+
+// RUN: %t _cv 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-DIAG \
+// RUN:                            --check-prefix=VCALL-DIAG --check-prefix=ALL-RECOVER
+
+// RUN: %t ic_ 2>&1 | FileCheck %s --check-prefix=ICALL-DIAG --check-prefix=CAST-DIAG \
+// RUN:                            --check-prefix=VCALL-NODIAG --check-prefix=ALL-RECOVER
+
+// Trap on icall, no-recover on cast.
+// RUN: %clangxx_cfi_dso_diag -fsanitize-trap=cfi-icall -fno-sanitize-recover=cfi-unrelated-cast \
+// RUN:     -g -DSHARED_LIB %s -fPIC -shared -o %t-so.so
+// RUN: %clangxx_cfi_dso_diag -fsanitize-trap=cfi-icall -fno-sanitize-recover=cfi-unrelated-cast \
+// RUN:     -g %s -o %t %t-so.so
+
+// RUN: %expect_crash %t icv 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-NODIAG \
+// RUN:                                          --check-prefix=VCALL-NODIAG --check-prefix=ICALL-FATAL
+
+// RUN: not %t _cv 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-DIAG \
+// RUN:                                --check-prefix=VCALL-NODIAG --check-prefix=CAST-FATAL
+
+// RUN: %t __v 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-NODIAG \
+// RUN:                            --check-prefix=VCALL-DIAG
+
+// Callee: trap on icall, no-recover on cast.
+// Caller: recover on everything.
+// The same as in the previous case, behaviour is decided by the callee.
+// RUN: %clangxx_cfi_dso_diag -fsanitize-trap=cfi-icall -fno-sanitize-recover=cfi-unrelated-cast \
+// RUN:     -g -DSHARED_LIB %s -fPIC -shared -o %t-so.so
+// RUN: %clangxx_cfi_dso_diag \
+// RUN:     -g %s -o %t %t-so.so
+
+// RUN: %expect_crash %t icv 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-NODIAG \
+// RUN:                                          --check-prefix=VCALL-NODIAG --check-prefix=ICALL-FATAL
+
+// RUN: not %t _cv 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-DIAG \
+// RUN:                                --check-prefix=VCALL-NODIAG --check-prefix=CAST-FATAL
+
+// RUN: %t __v 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-NODIAG \
+// RUN:                            --check-prefix=VCALL-DIAG
+
+// Caller in trapping mode, callee with full diagnostic+recover.
+// Caller wins.
+// cfi-nvcall is non-trapping in the main executable to link the diagnostic runtime library.
+// RUN: %clangxx_cfi_dso_diag \
+// RUN:     -g -DSHARED_LIB %s -fPIC -shared -o %t-so.so
+// RUN: %clangxx_cfi_dso -fno-sanitize-trap=cfi-nvcall \
+// RUN:     -g %s -o %t %t-so.so
+
+// RUN: %expect_crash %t icv 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-NODIAG \
+// RUN:                                          --check-prefix=VCALL-NODIAG --check-prefix=ICALL-FATAL
+
+// RUN: %expect_crash %t _cv 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-NODIAG \
+// RUN:                                          --check-prefix=VCALL-NODIAG --check-prefix=CAST-FATAL
+
+// RUN: %expect_crash %t __v 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-NODIAG \
+// RUN:                                          --check-prefix=VCALL-NODIAG --check-prefix=VCALL-FATAL
+
+// REQUIRES: cxxabi
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+struct A {
+  virtual void f();
+};
+
+void *create_B();
+
+#ifdef SHARED_LIB
+
+#include "../../utils.h"
+struct B {
+  virtual void f();
+};
+void B::f() {}
+
+void *create_B() {
+  create_derivers<B>();
+  return (void *)(new B());
+}
+
+#else
+
+void A::f() {}
+
+int main(int argc, char *argv[]) {
+  assert(argc == 2);
+  assert(strlen(argv[1]) == 3);
+
+  // ICALL-FATAL: =0=
+  // CAST-FATAL:  =0=
+  // VCALL-FATAL: =0=
+  // ALL-RECOVER: =0=
+  fprintf(stderr, "=0=\n");
+
+  void *p;
+  if (argv[1][0] == 'i') {
+    // ICALL-DIAG: runtime error: control flow integrity check for type 'void *(int)' failed during indirect function call
+    // ICALL-DIAG-NEXT: note: create_B() defined here
+    // ICALL-NODIAG-NOT: runtime error: control flow integrity check {{.*}} during indirect function call
+    p = ((void *(*)(int))create_B)(42);
+  } else {
+    p = create_B();
+  }
+
+  // ICALL-FATAL-NOT: =1=
+  // CAST-FATAL:      =1=
+  // VCALL-FATAL:     =1=
+  // ALL-RECOVER:     =1=
+  fprintf(stderr, "=1=\n");
+
+  A *a;
+  if (argv[1][1] == 'c') {
+    // CAST-DIAG: runtime error: control flow integrity check for type 'A' failed during cast to unrelated type
+    // CAST-DIAG-NEXT: note: vtable is of type '{{(struct )?}}B'
+    // CAST-NODIAG-NOT: runtime error: control flow integrity check {{.*}} during cast to unrelated type
+    a = (A*)p;
+  } else {
+    // Invisible to CFI.
+    memcpy(&a, &p, sizeof(a));
+  }
+
+  // ICALL-FATAL-NOT: =2=
+  // CAST-FATAL-NOT:  =2=
+  // VCALL-FATAL:     =2=
+  // ALL-RECOVER:     =2=
+  fprintf(stderr, "=2=\n");
+
+  // VCALL-DIAG: runtime error: control flow integrity check for type 'A' failed during virtual call
+  // VCALL-DIAG-NEXT: note: vtable is of type '{{(struct )?}}B'
+  // VCALL-NODIAG-NOT: runtime error: control flow integrity check {{.*}} during virtual call
+  if (argv[1][2] == 'v') {
+    a->f(); // UB here
+  }
+
+  // ICALL-FATAL-NOT: =3=
+  // CAST-FATAL-NOT:  =3=
+  // VCALL-FATAL-NOT: =3=
+  // ALL-RECOVER: =3=
+  fprintf(stderr, "=3=\n");
+
+}
+#endif
diff --git a/test/cfi/cross-dso/icall/icall-from-dso.cpp b/test/cfi/cross-dso/icall/icall-from-dso.cpp
index 1995f05..93cf4f6 100644
--- a/test/cfi/cross-dso/icall/icall-from-dso.cpp
+++ b/test/cfi/cross-dso/icall/icall-from-dso.cpp
@@ -1,17 +1,25 @@
 // RUN: %clangxx_cfi_dso -DSHARED_LIB %s -fPIC -shared -o %t-so.so
 // RUN: %clangxx_cfi_dso %s -o %t %t-so.so && %expect_crash %t 2>&1 | FileCheck %s
 
+// RUN: %clangxx_cfi_dso_diag -g -DSHARED_LIB %s -fPIC -shared -o %t2-so.so
+// RUN: %clangxx_cfi_dso_diag -g %s -o %t2 %t2-so.so && %t2 2>&1 | FileCheck %s --check-prefix=CFI-DIAG
+
 #include <stdio.h>
 
 #ifdef SHARED_LIB
 void g();
 void f() {
+  // CHECK-DIAG: =1=
   // CHECK: =1=
   fprintf(stderr, "=1=\n");
   ((void (*)(void))g)();
+  // CHECK-DIAG: =2=
   // CHECK: =2=
   fprintf(stderr, "=2=\n");
+  // CFI-DIAG: runtime error: control flow integrity check for type 'void (int)' failed during indirect function call
+  // CFI-DIAG-NEXT: note: g() defined here
   ((void (*)(int))g)(42); // UB here
+  // CHECK-DIAG: =3=
   // CHECK-NOT: =3=
   fprintf(stderr, "=3=\n");
 }
diff --git a/test/cfi/cross-dso/icall/icall.cpp b/test/cfi/cross-dso/icall/icall.cpp
index d7cc2f9..6017b80 100644
--- a/test/cfi/cross-dso/icall/icall.cpp
+++ b/test/cfi/cross-dso/icall/icall.cpp
@@ -1,6 +1,9 @@
 // RUN: %clangxx_cfi_dso -DSHARED_LIB %s -fPIC -shared -o %t-so.so
 // RUN: %clangxx_cfi_dso %s -o %t %t-so.so && %expect_crash %t 2>&1 | FileCheck %s
 
+// RUN: %clangxx_cfi_dso_diag -g -DSHARED_LIB %s -fPIC -shared -o %t2-so.so
+// RUN: %clangxx_cfi_dso_diag -g %s -o %t2 %t2-so.so && %t2 2>&1 | FileCheck %s --check-prefix=CFI-DIAG
+
 #include <stdio.h>
 
 #ifdef SHARED_LIB
@@ -9,12 +12,17 @@
 #else
 void f();
 int main() {
+  // CHECK-DIAG: =1=
   // CHECK: =1=
   fprintf(stderr, "=1=\n");
   ((void (*)(void))f)();
+  // CHECK-DIAG: =2=
   // CHECK: =2=
   fprintf(stderr, "=2=\n");
+  // CFI-DIAG: runtime error: control flow integrity check for type 'void (int)' failed during indirect function call
+  // CFI-DIAG-NEXT: note: f() defined here
   ((void (*)(int))f)(42); // UB here
+  // CHECK-DIAG: =3=
   // CHECK-NOT: =3=
   fprintf(stderr, "=3=\n");
 }
diff --git a/test/cfi/cross-dso/shadow_is_read_only.cpp b/test/cfi/cross-dso/shadow_is_read_only.cpp
new file mode 100644
index 0000000..65aec82
--- /dev/null
+++ b/test/cfi/cross-dso/shadow_is_read_only.cpp
@@ -0,0 +1,85 @@
+// RUN: %clangxx_cfi_dso -std=c++11 -g -DSHARED_LIB %s -fPIC -shared -o %t-cfi-so.so
+// RUN: %clangxx -std=c++11 -g -DSHARED_LIB %s -fPIC -shared -o %t-nocfi-so.so
+// RUN: %clangxx_cfi_dso -std=c++11 -g %s -o %t
+
+// RUN: %expect_crash %t start 2>&1 | FileCheck %s
+// RUN: %expect_crash %t mmap 2>&1 | FileCheck %s
+// RUN: %expect_crash %t dlopen %t-cfi-so.so 2>&1 | FileCheck %s
+// RUN: %expect_crash %t dlclose %t-cfi-so.so 2>&1 | FileCheck %s
+// RUN: %expect_crash %t dlopen %t-nocfi-so.so 2>&1 | FileCheck %s
+// RUN: %expect_crash %t dlclose %t-nocfi-so.so 2>&1 | FileCheck %s
+
+// Tests that shadow is read-only most of the time.
+// REQUIRES: cxxabi
+
+#include <assert.h>
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+struct A {
+  virtual void f();
+};
+
+#ifdef SHARED_LIB
+
+void A::f() {}
+
+extern "C" A *create_A() { return new A(); }
+
+#else
+
+constexpr unsigned kShadowGranularity = 12;
+
+namespace __cfi {
+uintptr_t GetShadow();
+}
+
+void write_shadow(void *ptr) {
+  uintptr_t base = __cfi::GetShadow();
+  uint16_t *s =
+      (uint16_t *)(base + (((uintptr_t)ptr >> kShadowGranularity) << 1));
+  fprintf(stderr, "going to crash\n");
+  // CHECK: going to crash
+  *s = 42;
+  fprintf(stderr, "did not crash\n");
+  // CHECK-NOT: did not crash
+  exit(1);
+}
+
+int main(int argc, char *argv[]) {
+  assert(argc > 1);
+  const bool test_mmap = strcmp(argv[1], "mmap") == 0;
+  const bool test_start = strcmp(argv[1], "start") == 0;
+  const bool test_dlopen = strcmp(argv[1], "dlopen") == 0;
+  const bool test_dlclose = strcmp(argv[1], "dlclose") == 0;
+  const char *lib = argc > 2 ? argv[2] : nullptr;
+
+  if (test_start)
+    write_shadow((void *)&main);
+
+  if (test_mmap) {
+    void *p = mmap(nullptr, 1 << 20, PROT_READ | PROT_WRITE | PROT_EXEC,
+                   MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+    assert(p != MAP_FAILED);
+    write_shadow((char *)p + 100);
+  } else {
+    void *handle = dlopen(lib, RTLD_NOW);
+    assert(handle);
+    void *create_A = dlsym(handle, "create_A");
+    assert(create_A);
+
+    if (test_dlopen)
+      write_shadow(create_A);
+
+    int res = dlclose(handle);
+    assert(res == 0);
+
+    if (test_dlclose)
+      write_shadow(create_A);
+  }
+}
+#endif
diff --git a/test/cfi/cross-dso/simple-fail.cpp b/test/cfi/cross-dso/simple-fail.cpp
index 64db288..276b67d 100644
--- a/test/cfi/cross-dso/simple-fail.cpp
+++ b/test/cfi/cross-dso/simple-fail.cpp
@@ -28,6 +28,11 @@
 // RUN: %t6 2>&1 | FileCheck --check-prefix=NCFI %s
 // RUN: %t6 x 2>&1 | FileCheck --check-prefix=NCFI %s
 
+// RUN: %clangxx_cfi_dso_diag -DSHARED_LIB %s -fPIC -shared -o %t7-so.so
+// RUN: %clangxx_cfi_dso_diag %s -o %t7 %t7-so.so
+// RUN: %t7 2>&1 | FileCheck --check-prefix=CFI-DIAG-CALL %s
+// RUN: %t7 x 2>&1 | FileCheck --check-prefix=CFI-DIAG-CALL --check-prefix=CFI-DIAG-CAST %s
+
 // Tests that the CFI mechanism crashes the program when making a virtual call
 // to an object of the wrong class but with a compatible vtable, by casting a
 // pointer to such an object and attempting to make a call through it.
@@ -71,6 +76,8 @@
 
   if (argc > 1 && argv[1][0] == 'x') {
     // Test cast. BOOM.
+    // CFI-DIAG-CAST: runtime error: control flow integrity check for type 'A' failed during cast to unrelated type
+    // CFI-DIAG-CAST-NEXT: note: vtable is of type '{{(struct )?}}B'
     a = (A*)p;
   } else {
     // Invisible to CFI. Test virtual call later.
@@ -82,6 +89,8 @@
   // NCFI: =1=
   fprintf(stderr, "=1=\n");
 
+  // CFI-DIAG-CALL: runtime error: control flow integrity check for type 'A' failed during virtual call
+  // CFI-DIAG-CALL-NEXT: note: vtable is of type '{{(struct )?}}B'
   a->f(); // UB here
 
   // CFI-NOT: =2=
diff --git a/test/cfi/cross-dso/target_out_of_bounds.cpp b/test/cfi/cross-dso/target_out_of_bounds.cpp
new file mode 100644
index 0000000..6353f03
--- /dev/null
+++ b/test/cfi/cross-dso/target_out_of_bounds.cpp
@@ -0,0 +1,64 @@
+// RUN: %clangxx_cfi_dso_diag -std=c++11 %s -o %t
+// RUN: %t zero 2>&1 | FileCheck --check-prefix=CHECK-ZERO %s
+// RUN: %t unaddressable 2>&1 | FileCheck --check-prefix=CHECK-UNADDR %s
+// RUN: %t 2>&1 | FileCheck --check-prefix=CHECK-TYPEINFO %s
+
+// RUN: %clangxx_cfi_diag -std=c++11 %s -o %t2
+// RUN: %t2 zero 2>&1 | FileCheck --check-prefix=CHECK-ZERO %s
+// RUN: %t2 unaddressable 2>&1 | FileCheck --check-prefix=CHECK-UNADDR %s
+// RUN: %t2 2>&1 | FileCheck --check-prefix=CHECK-TYPEINFO %s
+
+// REQUIRES: cxxabi
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+struct A {
+  virtual void f();
+};
+
+void A::f() {}
+
+int main(int argc, char *argv[]) {
+  char *volatile p = reinterpret_cast<char *>(new A());
+  if (argc > 1 && strcmp(argv[1], "unaddressable") == 0) {
+    void *vtable = mmap(nullptr, 4096, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+    // Create an object with a vtable in an unaddressable memory region.
+    *(uintptr_t *)p = (uintptr_t)vtable + 64;
+    // CHECK-UNADDR: runtime error: control flow integrity check for type 'A' failed during cast
+    // CHECK-UNADDR: note: invalid vtable
+    // CHECK-UNADDR: <memory cannot be printed>
+    // CHECK-UNADDR: runtime error: control flow integrity check for type 'A' failed during cast
+    // CHECK-UNADDR: note: invalid vtable
+    // CHECK-UNADDR: <memory cannot be printed>
+  } else if (argc > 1 && strcmp(argv[1], "zero") == 0) {
+    // Create an object with a vtable outside of any known DSO, but still in an
+    // addressable area.
+    void *vtable = calloc(1, 128);
+    *(uintptr_t *)p = (uintptr_t)vtable + 64;
+    // CHECK-ZERO: runtime error: control flow integrity check for type 'A' failed during cast
+    // CHECK-ZERO: note: invalid vtable
+    // CHECK-ZERO: 00 00 00 00 00 00 00 00
+    // CHECK-ZERO: runtime error: control flow integrity check for type 'A' failed during cast
+    // CHECK-ZERO: note: invalid vtable
+    // CHECK-ZERO: 00 00 00 00 00 00 00 00
+  } else {
+    // Create an object with a seemingly fine vtable, but with an unaddressable
+    // typeinfo pointer.
+    void *vtable = calloc(1, 128);
+    memset(vtable, 0xFE, 128);
+    *(uintptr_t *)p = (uintptr_t)vtable + 64;
+    // CHECK-TYPEINFO: runtime error: control flow integrity check for type 'A' failed during cast
+    // CHECK-TYPEINFO: note: invalid vtable
+    // CHECK-TYPEINFO: fe fe fe fe fe fe fe fe
+    // CHECK-TYPEINFO: runtime error: control flow integrity check for type 'A' failed during cast
+    // CHECK-TYPEINFO: note: invalid vtable
+    // CHECK-TYPEINFO: fe fe fe fe fe fe fe fe
+  }
+
+  A *volatile pa = reinterpret_cast<A *>(p);
+  pa = reinterpret_cast<A *>(p);
+}
diff --git a/test/cfi/icall/bad-signature.c b/test/cfi/icall/bad-signature.c
index 43de117..183e627 100644
--- a/test/cfi/icall/bad-signature.c
+++ b/test/cfi/icall/bad-signature.c
@@ -1,10 +1,10 @@
-// RUN: %clangxx -o %t1 %s
+// RUN: %clang -o %t1 %s
 // RUN: %t1 2>&1 | FileCheck --check-prefix=NCFI %s
 
-// RUN: %clangxx_cfi -o %t2 %s
+// RUN: %clang_cfi -o %t2 %s
 // RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
 
-// RUN: %clangxx_cfi_diag -g -o %t3 %s
+// RUN: %clang_cfi_diag -g -o %t3 %s
 // RUN: %t3 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
 
 #include <stdio.h>
@@ -18,7 +18,7 @@
   fprintf(stderr, "1\n");
 
   // CFI-DIAG: runtime error: control flow integrity check for type 'void (int)' failed during indirect function call
-  // CFI-DIAG: f() defined here
+  // CFI-DIAG: f defined here
   ((void (*)(int))f)(42); // UB here
 
   // CFI-NOT: 2
diff --git a/test/cfi/icall/external-call.c b/test/cfi/icall/external-call.c
index 43fc252..e90c7e0 100644
--- a/test/cfi/icall/external-call.c
+++ b/test/cfi/icall/external-call.c
@@ -1,4 +1,4 @@
-// RUN: %clangxx_cfi -o %t1 %s
+// RUN: %clang_cfi -lm -o %t1 %s
 // RUN: %t1 c 1 2>&1 | FileCheck --check-prefix=CFI %s
 // RUN: %t1 s 2 2>&1 | FileCheck --check-prefix=CFI %s
 
diff --git a/test/cfi/lit.cfg b/test/cfi/lit.cfg
index 687c80f..3c02506 100644
--- a/test/cfi/lit.cfg
+++ b/test/cfi/lit.cfg
@@ -7,14 +7,28 @@
 
 clangxx = ' '.join([config.clang] + config.cxx_mode_flags)
 
+config.substitutions.append((r"%clang ", ' '.join([config.clang]) + ' '))
 config.substitutions.append((r"%clangxx ", clangxx + ' '))
 if config.lto_supported:
-  clangxx_cfi = ' '.join(config.lto_launch + [clangxx] + config.lto_flags + ['-flto -fsanitize=cfi '])
-  clangxx_cfi_diag = clangxx_cfi + '-fno-sanitize-trap=cfi -fsanitize-recover=cfi '
-  config.substitutions.append((r"%clangxx_cfi ", clangxx_cfi))
-  config.substitutions.append((r"%clangxx_cfi_diag ", clangxx_cfi_diag))
-  config.substitutions.append((r"%clangxx_cfi_dso ", clangxx_cfi + '-fsanitize-cfi-cross-dso '))
-  config.substitutions.append((r"%clangxx_cfi_dso_diag ", clangxx_cfi_diag + '-fsanitize-cfi-cross-dso '))
+  clang_cfi = ' '.join(config.lto_launch + [config.clang] + config.lto_flags + ['-flto -fsanitize=cfi '])
+
+  if config.cfi_lit_test_mode == "Devirt":
+    config.available_features.add('devirt')
+    clang_cfi += '-fwhole-program-vtables '
+    config.substitutions.append((r"%expect_crash_unless_devirt ", ""))
+  else:
+    config.substitutions.append((r"%expect_crash_unless_devirt ", config.expect_crash))
+
+  cxx = ' '.join(config.cxx_mode_flags) + ' '
+  diag = '-fno-sanitize-trap=cfi -fsanitize-recover=cfi '
+  non_dso = '-fvisibility=hidden '
+  dso = '-fsanitize-cfi-cross-dso -fvisibility=default '
+  config.substitutions.append((r"%clang_cfi ", clang_cfi + non_dso))
+  config.substitutions.append((r"%clangxx_cfi ", clang_cfi + cxx + non_dso))
+  config.substitutions.append((r"%clang_cfi_diag ", clang_cfi + non_dso + diag))
+  config.substitutions.append((r"%clangxx_cfi_diag ", clang_cfi + cxx + non_dso + diag))
+  config.substitutions.append((r"%clangxx_cfi_dso ", clang_cfi + cxx + dso))
+  config.substitutions.append((r"%clangxx_cfi_dso_diag ", clang_cfi + cxx + dso + diag))
 else:
   config.unsupported = True
 
diff --git a/test/cfi/lit.site.cfg.in b/test/cfi/lit.site.cfg.in
index 76897e7..87e5b51 100644
--- a/test/cfi/lit.site.cfg.in
+++ b/test/cfi/lit.site.cfg.in
@@ -1,2 +1,6 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+config.cfi_lit_test_mode = "@CFI_LIT_TEST_MODE@"
+
 lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
 lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")
diff --git a/test/cfi/overwrite.cpp b/test/cfi/overwrite.cpp
index 90f995d..48c0a89 100644
--- a/test/cfi/overwrite.cpp
+++ b/test/cfi/overwrite.cpp
@@ -1,5 +1,5 @@
 // RUN: %clangxx_cfi -o %t1 %s
-// RUN: %expect_crash %t1 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash_unless_devirt %t1 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB32 -o %t2 %s
 // RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
@@ -55,7 +55,10 @@
   // CFI-DIAG-NEXT: note: invalid vtable
   a->f();
 
-  // CFI-NOT: {{^2$}}
+  // We don't check for the absence of a 2 here because under devirtualization
+  // our virtual call may be devirtualized and we will proceed with execution
+  // rather than crashing.
+
   // NCFI: {{^2$}}
   fprintf(stderr, "2\n");
 }
diff --git a/test/cfi/simple-fail.cpp b/test/cfi/simple-fail.cpp
index 92b1322..595ca16 100644
--- a/test/cfi/simple-fail.cpp
+++ b/test/cfi/simple-fail.cpp
@@ -52,6 +52,9 @@
 // RUN: %clangxx -o %t18 %s
 // RUN: %t18 2>&1 | FileCheck --check-prefix=NCFI %s
 
+// RUN: %clangxx_cfi -DCHECK_NO_SANITIZE_CFI -o %t19 %s
+// RUN: %t19 2>&1 | FileCheck --check-prefix=NCFI %s
+
 // Tests that the CFI mechanism crashes the program when making a virtual call
 // to an object of the wrong class but with a compatible vtable, by casting a
 // pointer to such an object and attempting to make a call through it.
@@ -73,6 +76,9 @@
 
 void B::f() {}
 
+#if defined(CHECK_NO_SANITIZE_CFI)
+__attribute__((no_sanitize("cfi")))
+#endif
 int main() {
   create_derivers<B>();
 
diff --git a/test/cfi/stats.cpp b/test/cfi/stats.cpp
index eda11e1..566fcfb 100644
--- a/test/cfi/stats.cpp
+++ b/test/cfi/stats.cpp
@@ -2,6 +2,9 @@
 // RUN: env SANITIZER_STATS_PATH=%t.stats %t
 // RUN: sanstats %t.stats | FileCheck %s
 
+// FIXME: We currently emit the wrong debug info under devirtualization.
+// UNSUPPORTED: devirt
+
 struct ABase {};
 
 struct A : ABase {
diff --git a/test/cfi/target_uninstrumented.cpp b/test/cfi/target_uninstrumented.cpp
new file mode 100644
index 0000000..2ec2b5b
--- /dev/null
+++ b/test/cfi/target_uninstrumented.cpp
@@ -0,0 +1,44 @@
+// RUN: %clangxx -g -DSHARED_LIB %s -fPIC -shared -o %T/target_uninstrumented-so.so
+// RUN: %clangxx_cfi_diag -g %s -o %t %T/target_uninstrumented-so.so
+// RUN: %t 2>&1 | FileCheck %s
+
+// REQUIRES: cxxabi
+
+#include <stdio.h>
+#include <string.h>
+
+struct A {
+  virtual void f();
+};
+
+void *create_B();
+
+#ifdef SHARED_LIB
+
+struct B {
+  virtual void f();
+};
+void B::f() {}
+
+void *create_B() {
+  return (void *)(new B());
+}
+
+#else
+
+void A::f() {}
+
+int main(int argc, char *argv[]) {
+  void *p = create_B();
+  // CHECK: runtime error: control flow integrity check for type 'A' failed during cast to unrelated type
+  // CHECK: invalid vtable in module {{.*}}target_uninstrumented-so.so
+  A *a = (A *)p;
+  memset(p, 0, sizeof(A));
+  // CHECK: runtime error: control flow integrity check for type 'A' failed during cast to unrelated type
+  // CHECK-NOT: invalid vtable in module
+  // CHECK: invalid vtable
+  a = (A *)p;
+  // CHECK: done
+  fprintf(stderr, "done %p\n", a);
+}
+#endif
diff --git a/test/cfi/two-vcalls.cpp b/test/cfi/two-vcalls.cpp
new file mode 100644
index 0000000..854b3e0
--- /dev/null
+++ b/test/cfi/two-vcalls.cpp
@@ -0,0 +1,60 @@
+// RUN: %clangxx_cfi_diag -o %t %s
+// RUN: %t 2>&1 | FileCheck %s
+
+// This test checks that we don't generate two type checks,
+// if two virtual calls are in the same function.
+
+// UNSUPPORTED: win32
+// REQUIRES: cxxabi
+
+// TODO(krasin): implement the optimization to not emit two type checks.
+// XFAIL: *
+#include <stdio.h>
+
+class Base {
+ public:
+  virtual void Foo() {
+    fprintf(stderr, "Base::Foo\n");
+  }
+
+  virtual void Bar() {
+    fprintf(stderr, "Base::Bar\n");
+  }
+};
+
+class Derived : public Base {
+ public:
+  void Foo() override {
+    fprintf(stderr, "Derived::Foo\n");
+  }
+
+  void Bar() override {
+    printf("Derived::Bar\n");
+  }
+};
+
+__attribute__((noinline)) void print(Base* ptr) {
+  ptr->Foo();
+  // Corrupt the vtable pointer. We expect that the optimization will
+  // check vtable before the first vcall then store it in a local
+  // variable, and reuse it for the second vcall. With no optimization,
+  // CFI will complain about the virtual table being corrupted.
+  *reinterpret_cast<void**>(ptr) = 0;
+  ptr->Bar();
+}
+
+
+int main() {
+  Base b;
+  Derived d;
+  // CHECK: Base::Foo
+  // CHECK: Base::Bar
+  print(&b);
+
+  // CHECK: Derived::Foo
+  // CHECK-NOT: runtime error
+  // CHECK: Derived::Bar
+  print(&d);
+
+  return 0;
+}
diff --git a/test/dfsan/CMakeLists.txt b/test/dfsan/CMakeLists.txt
index 3fa1af2..c2baf93 100644
--- a/test/dfsan/CMakeLists.txt
+++ b/test/dfsan/CMakeLists.txt
@@ -1,8 +1,33 @@
 set(DFSAN_LIT_TESTS_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
-configure_lit_site_cfg(
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg)
+set(DFSAN_TESTSUITES)
+ 
+set(DFSAN_TEST_ARCH ${DFSAN_SUPPORTED_ARCH})
+if(APPLE)
+  darwin_filter_host_archs(DFSAN_SUPPORTED_ARCH DFSAN_TEST_ARCH)
+endif()
+
+foreach(arch ${DFSAN_TEST_ARCH})
+  set(DFSAN_TEST_TARGET_ARCH ${arch})
+  string(TOLOWER "-${arch}" DFSAN_TEST_CONFIG_SUFFIX)
+  if(ANDROID OR ${arch} MATCHES "arm|aarch64")
+    # This is only true if we are cross-compiling.
+    # Build all tests with host compiler and use host tools.
+    set(DFSAN_TEST_TARGET_CC ${COMPILER_RT_TEST_COMPILER})
+    set(DFSAN_TEST_TARGET_CFLAGS ${COMPILER_RT_TEST_COMPILER_CFLAGS})
+  else()
+    get_target_flags_for_arch(${arch} DFSAN_TEST_TARGET_CFLAGS)
+    string(REPLACE ";" " " DFSAN_TEST_TARGET_CFLAGS "${DFSAN_TEST_TARGET_CFLAGS}")
+  endif()
+
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}Config)
+
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg)
+  list(APPEND DFSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+endforeach()
 
 set(DFSAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
 if(NOT COMPILER_RT_STANDALONE_BUILD)
@@ -10,6 +35,6 @@
 endif()
 
 add_lit_testsuite(check-dfsan "Running the DataFlowSanitizer tests"
-  ${CMAKE_CURRENT_BINARY_DIR}
+  ${DFSAN_TESTSUITES}
   DEPENDS ${DFSAN_TEST_DEPS})
-set_target_properties(check-dfsan PROPERTIES FOLDER "DFSan tests")
+set_target_properties(check-dfsan PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/dfsan/custom.cc b/test/dfsan/custom.cc
index 057b060..c96d940 100644
--- a/test/dfsan/custom.cc
+++ b/test/dfsan/custom.cc
@@ -3,6 +3,8 @@
 // RUN: %clang_dfsan -DSTRICT_DATA_DEPENDENCIES %s -o %t && %run %t
 // RUN: %clang_dfsan -DSTRICT_DATA_DEPENDENCIES -mllvm -dfsan-args-abi %s -o %t && %run %t
 
+// XFAIL: target-is-mips64el
+
 // Tests custom implementations of various glibc functions.
 
 #include <sanitizer/dfsan_interface.h>
@@ -536,7 +538,7 @@
   int ret4 = inet_pton(AF_INET, addr4, &in4);
   assert(ret4 == 1);
   ASSERT_READ_LABEL(&in4, sizeof(in4), i_label);
-  assert(in4.s_addr == 0x0100007f);
+  assert(in4.s_addr == htonl(0x7f000001));
 
   char addr6[] = "::1";
   dfsan_set_label(j_label, addr6 + 3, 1);
diff --git a/test/dfsan/lit.cfg b/test/dfsan/lit.cfg
index e4d4e8f..6dc0f9c 100644
--- a/test/dfsan/lit.cfg
+++ b/test/dfsan/lit.cfg
@@ -3,13 +3,13 @@
 import os
 
 # Setup config name.
-config.name = 'DataFlowSanitizer'
+config.name = 'DataFlowSanitizer' + config.name_suffix
 
 # Setup source root.
 config.test_source_root = os.path.dirname(__file__)
 
 # Setup default compiler flags used with -fsanitize=dataflow option.
-clang_dfsan_cflags = ["-fsanitize=dataflow", "-m64"]
+clang_dfsan_cflags = ["-fsanitize=dataflow", config.target_cflags]
 clang_dfsan_cxxflags = config.cxx_mode_flags + clang_dfsan_cflags
 
 def build_invocation(compile_flags):
diff --git a/test/dfsan/lit.site.cfg.in b/test/dfsan/lit.site.cfg.in
index 859284e..927ba11 100644
--- a/test/dfsan/lit.site.cfg.in
+++ b/test/dfsan/lit.site.cfg.in
@@ -1,3 +1,10 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+# Tool-specific config options.
+config.name_suffix = "@DFSAN_TEST_CONFIG_SUFFIX@"
+config.target_cflags = "@DFSAN_TEST_TARGET_CFLAGS@"
+config.target_arch = "@DFSAN_TEST_TARGET_ARCH@"
+
 # Load common config for all compiler-rt lit tests.
 lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
 
diff --git a/test/esan/CMakeLists.txt b/test/esan/CMakeLists.txt
new file mode 100644
index 0000000..bbdcd51
--- /dev/null
+++ b/test/esan/CMakeLists.txt
@@ -0,0 +1,32 @@
+set(ESAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
+if(NOT COMPILER_RT_STANDALONE_BUILD)
+  list(APPEND ESAN_TEST_DEPS esan)
+endif()
+
+set(ESAN_TESTSUITES)
+
+set(ESAN_TEST_ARCH ${ESAN_SUPPORTED_ARCH})
+
+set(ESAN_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+
+foreach(arch ${ESAN_TEST_ARCH})
+  set(ESAN_TEST_TARGET_ARCH ${arch})
+  string(TOLOWER "-${arch}" ESAN_TEST_CONFIG_SUFFIX)
+  get_target_flags_for_arch(${arch} ESAN_TEST_TARGET_CFLAGS)
+  string(REPLACE ";" " " ESAN_TEST_TARGET_CFLAGS "${ESAN_TEST_TARGET_CFLAGS}")
+
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}Config)
+
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg)
+  list(APPEND ESAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+endforeach()
+
+# TODO(bruening): add Unit/ tests as well
+
+add_lit_testsuite(check-esan "Running EfficiencySanitizer tests"
+  ${ESAN_TESTSUITES}
+  DEPENDS ${ESAN_TEST_DEPS})
+set_target_properties(check-esan PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/esan/TestCases/large-stack-linux.c b/test/esan/TestCases/large-stack-linux.c
new file mode 100644
index 0000000..3e024fc
--- /dev/null
+++ b/test/esan/TestCases/large-stack-linux.c
@@ -0,0 +1,74 @@
+// RUN: %clang_esan_wset -O0 %s -o %t 2>&1
+// RUN: %env_esan_opts="verbosity=1 record_snapshots=0" %run %t %t 2>&1 | FileCheck %s
+
+#include <assert.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static void testChildStackLimit(rlim_t StackLimit, char *ToRun) {
+  int Res;
+  struct rlimit Limit;
+  Limit.rlim_cur = RLIM_INFINITY;
+  Limit.rlim_max = RLIM_INFINITY;
+  Res = setrlimit(RLIMIT_STACK, &Limit);
+  if (Res != 0) {
+    // Probably our environment had a large limit and we ourselves got
+    // re-execed and can no longer raise our limit.
+    // We have to bail and emulate the regular test.
+    // We'd prefer to have branches in our FileCheck output to ensure the
+    // initial program was re-execed but this is the best we can do for now.
+    fprintf(stderr, "in esan::initializeLibrary\n");
+    fprintf(stderr, "==1234==The stack size limit is beyond the maximum supported.\n");
+    fprintf(stderr, "Re-execing with a stack size below 1TB.\n");
+    fprintf(stderr, "in esan::initializeLibrary\n");
+    fprintf(stderr, "done\n");
+    fprintf(stderr, "in esan::finalizeLibrary\n");
+    return;
+  }
+
+  pid_t Child = fork();
+  assert(Child >= 0);
+  if (Child > 0) {
+    pid_t WaitRes = waitpid(Child, NULL, 0);
+    assert(WaitRes == Child);
+  } else {
+    char *Args[2];
+    Args[0] = ToRun;
+    Args[1] = NULL;
+    Res = execv(ToRun, Args);
+    assert(0); // Should not be reached.
+  }
+}
+
+int main(int argc, char *argv[]) {
+  // The path to the program to exec must be passed in the first time.
+  if (argc == 2) {
+    fprintf(stderr, "Testing child with infinite stack\n");
+    testChildStackLimit(RLIM_INFINITY, argv[1]);
+    fprintf(stderr, "Testing child with 1TB stack\n");
+    testChildStackLimit(1ULL << 40, argv[1]);
+  }
+  fprintf(stderr, "done\n");
+  // CHECK:      in esan::initializeLibrary
+  // CHECK:      Testing child with infinite stack
+  // CHECK-NEXT: in esan::initializeLibrary
+  // CHECK-NEXT: =={{[0-9]+}}==The stack size limit is beyond the maximum supported.
+  // CHECK-NEXT: Re-execing with a stack size below 1TB.
+  // CHECK-NEXT: in esan::initializeLibrary
+  // CHECK:      done
+  // CHECK:      in esan::finalizeLibrary
+  // CHECK:      Testing child with 1TB stack
+  // CHECK-NEXT: in esan::initializeLibrary
+  // CHECK-NEXT: =={{[0-9]+}}==The stack size limit is beyond the maximum supported.
+  // CHECK-NEXT: Re-execing with a stack size below 1TB.
+  // CHECK-NEXT: in esan::initializeLibrary
+  // CHECK:      done
+  // CHECK-NEXT: in esan::finalizeLibrary
+  // CHECK:      done
+  // CHECK-NEXT: in esan::finalizeLibrary
+  return 0;
+}
diff --git a/test/esan/TestCases/libc-intercept.c b/test/esan/TestCases/libc-intercept.c
new file mode 100644
index 0000000..8d8d81f
--- /dev/null
+++ b/test/esan/TestCases/libc-intercept.c
@@ -0,0 +1,20 @@
+// RUN: %clang_esan_frag -O0 %s -o %t 2>&1
+// RUN: %env_esan_opts=verbosity=3 %run %t 2>&1 | FileCheck %s
+
+#include <string.h>
+
+int main(int argc, char **argv) {
+  char Buf[2048];
+  const char Str[] = "TestStringOfParticularLength"; // 29 chars.
+  strcpy(Buf, Str);
+  strncpy(Buf, Str, 17);
+  return strncmp(Buf, Str, 17);
+  // CHECK:      in esan::initializeLibrary
+  // CHECK:      in esan::processRangeAccess {{.*}} 29
+  // CHECK:      in esan::processRangeAccess {{.*}} 29
+  // CHECK:      in esan::processRangeAccess {{.*}} 17
+  // CHECK:      in esan::processRangeAccess {{.*}} 17
+  // CHECK:      in esan::processRangeAccess {{.*}} 17
+  // CHECK:      in esan::processRangeAccess {{.*}} 17
+  // CHECK:      in esan::finalizeLibrary
+}
diff --git a/test/esan/TestCases/mmap-shadow-conflict.c b/test/esan/TestCases/mmap-shadow-conflict.c
new file mode 100644
index 0000000..4b3c58b
--- /dev/null
+++ b/test/esan/TestCases/mmap-shadow-conflict.c
@@ -0,0 +1,30 @@
+// RUN: %clang_esan_frag -O0 %s -o %t 2>&1
+// RUN: %env_esan_opts=verbosity=1 %run %t 2>&1 | FileCheck %s
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdio.h>
+
+int main(int argc, char **argv) {
+  void *Map = mmap((void *)0x0000016000000000ULL, 0x1000, PROT_READ,
+                   MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+  if (Map == (void *)-1)
+    fprintf(stderr, "map failed\n");
+  else
+    fprintf(stderr, "mapped %p\n", Map);
+  Map = mmap((void *)0x0000016000000000ULL, 0x1000, PROT_READ,
+                   MAP_ANON|MAP_PRIVATE, -1, 0);
+  fprintf(stderr, "mapped %p\n", Map);
+  // CHECK:      in esan::initializeLibrary
+  // (There can be a re-exec for stack limit here.)
+  // CHECK:      Shadow scale=2 offset=0x440000000000
+  // CHECK-NEXT: Shadow #0: [110000000000-114000000000) (256GB)
+  // CHECK-NEXT: Shadow #1: [124000000000-12c000000000) (512GB)
+  // CHECK-NEXT: Shadow #2: [148000000000-150000000000) (512GB)
+  // CHECK-NEXT: mmap conflict: {{.*}}
+  // CHECK-NEXT: map failed
+  // CHECK-NEXT: mmap conflict: {{.*}}
+  // CHECK-NEXT: mapped {{.*}}
+  // CHECK-NEXT: in esan::finalizeLibrary
+  return 0;
+}
diff --git a/test/esan/TestCases/struct-simple.cpp b/test/esan/TestCases/struct-simple.cpp
new file mode 100644
index 0000000..c52154e
--- /dev/null
+++ b/test/esan/TestCases/struct-simple.cpp
@@ -0,0 +1,204 @@
+// RUN: %clang_esan_frag -O0 %s -DPART1 -mllvm -esan-aux-field-info=0 -c -o %t-part1.o 2>&1
+// RUN: %clang_esan_frag -O0 %s -DPART2 -c -o %t-part2.o 2>&1
+// RUN: %clang_esan_frag -O0 %s -DMAIN -c -o %t-main.o 2>&1
+// RUN: %clang_esan_frag -O0 %t-part1.o %t-part2.o %t-main.o -o %t 2>&1
+// RUN: %env_esan_opts=verbosity=2 %run %t 2>&1 | FileCheck %s
+
+// We generate two different object files from this file with different
+// macros, and then link them together. We do this to test how we handle
+// separate compilation with multiple compilation units.
+
+#include <stdio.h>
+
+extern "C" {
+  void part1();
+  void part2();
+}
+
+//===-- compilation unit part1 without main function ----------------------===//
+
+#ifdef PART1
+struct A {
+  int x;
+  int y;
+};
+
+struct B {
+  float m;
+  double n;
+};
+
+union U {
+  float f;
+  double d;
+};
+
+// Same struct in both main and part1.
+struct S {
+  int s1;
+  int s2;
+};
+
+// Different structs with the same name in main and part1.
+struct D {
+  int d1;
+  int d2;
+  struct {
+    int x;
+    int y;
+    int z;
+  } ds[10];
+};
+
+void part1()
+{
+  struct A a;
+  struct B b;
+  union  U u;
+  struct S s;
+  struct D d;
+  for (int i = 0; i < (1 << 11); i++)
+    a.x = 0;
+  a.y = 1;
+  b.m = 2.0;
+  for (int i = 0; i < (1 << 21); i++) {
+    b.n = 3.0;
+    d.ds[3].y = 0;
+  }
+  u.f = 0.0;
+  u.d = 1.0;
+  s.s1 = 0;
+  d.d1 = 0;
+}
+#endif // PART1
+
+//===-- compilation unit part2 without main function ----------------------===//
+#ifdef PART2
+// No struct in this part.
+void part2()
+{
+  // do nothing
+}
+#endif // PART2
+
+//===-- compilation unit with main function -------------------------------===//
+
+#ifdef MAIN
+class C {
+public:
+  struct {
+    int x;
+    int y;
+  } cs;
+  union {
+    float f;
+    double d;
+  } cu;
+  char c[10];
+};
+
+// Same struct in both main and part1.
+struct S {
+  int s1;
+  int s2;
+};
+
+// Different structs with the same name in main and part1.
+struct D {
+  int d1;
+  int d2;
+  int d3;
+};
+
+int main(int argc, char **argv) {
+  // CHECK:      in esan::initializeLibrary
+  // CHECK:      in esan::initializeCacheFrag
+  // CHECK-NEXT: in esan::processCompilationUnitInit
+  // CHECK-NEXT: in esan::processCacheFragCompilationUnitInit: {{.*}}struct-simple.cpp with 6 class(es)/struct(s)
+  // CHECK-NEXT:  Register struct.A#2#11#11: 2 fields
+  // CHECK-NEXT:  Register struct.B#2#3#2:   2 fields
+  // CHECK-NEXT:  Register union.U#1#3:      1 fields
+  // CHECK-NEXT:  Register struct.S#2#11#11: 2 fields
+  // CHECK-NEXT:  Register struct.D#3#14#11#11: 3 fields
+  // CHECK-NEXT:  Register struct.anon#3#11#11#11: 3 fields
+  // CHECK-NEXT: in esan::processCompilationUnitInit
+  // CHECK-NEXT: in esan::processCacheFragCompilationUnitInit: {{.*}}struct-simple.cpp with 0 class(es)/struct(s)
+  // CHECK-NEXT: in esan::processCompilationUnitInit
+  // CHECK-NEXT: in esan::processCacheFragCompilationUnitInit: {{.*}}struct-simple.cpp with 5 class(es)/struct(s)
+  // CHECK-NEXT:  Register class.C#3#14#13#13:  3 fields
+  // CHECK-NEXT:  Register struct.anon#2#11#11: 2 fields
+  // CHECK-NEXT:  Register union.anon#1#3:      1 fields
+  // CHECK-NEXT:  Duplicated struct.S#2#11#11:  2 fields
+  // CHECK-NEXT:  Register struct.D#3#11#11#11: 3 fields
+  struct C c[2];
+  struct S s;
+  struct D d;
+  c[0].cs.x = 0;
+  c[1].cs.y = 1;
+  c[0].cu.f = 0.0;
+  c[1].cu.d = 1.0;
+  c[0].c[2] = 0;
+  s.s1 = 0;
+  d.d1 = 0;
+  d.d2 = 0;
+  part1();
+  part2();
+  return 0;
+  // CHECK:      in esan::finalizeLibrary
+  // CHECK-NEXT: in esan::finalizeCacheFrag
+  // CHECK-NEXT: in esan::processCompilationUnitExit
+  // CHECK-NEXT: in esan::processCacheFragCompilationUnitExit: {{.*}}struct-simple.cpp with 5 class(es)/struct(s)
+  // CHECK-NEXT:  Unregister class.C#3#14#13#13:  3 fields
+  // CHECK-NEXT:   {{.*}} class C
+  // CHECK-NEXT:   {{.*}}  size = 32, count = 5, ratio = 3, array access = 5
+  // CHECK-NEXT:   {{.*}}  # 0: offset = 0,  size = 8,  count = 2, type = %struct.anon = type { i32, i32 }
+  // CHECK-NEXT:   {{.*}}  # 1: offset = 8,  size = 8,  count = 2, type = %union.anon = type { double }
+  // CHECK-NEXT:   {{.*}}  # 2: offset = 16, size = 10, count = 1, type = [10 x i8]
+  // CHECK-NEXT:  Unregister struct.anon#2#11#11: 2 fields
+  // CHECK-NEXT:   {{.*}} struct anon
+  // CHECK-NEXT:   {{.*}}  size = 8, count = 2, ratio = 1, array access = 0
+  // CHECK-NEXT:   {{.*}}  # 0: offset = 0, size = 4, count = 1, type = i32
+  // CHECK-NEXT:   {{.*}}  # 1: offset = 4, size = 4, count = 1, type = i32
+  // CHECK-NEXT:  Unregister union.anon#1#3:      1 fields
+  // CHECK-NEXT:  Unregister struct.S#2#11#11:    2 fields
+  // CHECK-NEXT:   {{.*}} struct S
+  // CHECK-NEXT:   {{.*}}  size = 8, count = 2, ratio = 2, array access = 0
+  // CHECK-NEXT:   {{.*}}  # 0: count = 2
+  // CHECK-NEXT:   {{.*}}  # 1: count = 0
+  // CHECK-NEXT:  Unregister struct.D#3#11#11#11: 3 fields
+  // CHECK-NEXT:   {{.*}} struct D
+  // CHECK-NEXT:   {{.*}}  size = 12, count = 2, ratio = 2, array access = 0
+  // CHECK-NEXT:   {{.*}}  # 0: offset = 0, size = 4, count = 1, type = i32
+  // CHECK-NEXT:   {{.*}}  # 1: offset = 4, size = 4, count = 1, type = i32
+  // CHECK-NEXT:   {{.*}}  # 2: offset = 8, size = 4, count = 0, type = i32
+  // CHECK-NEXT: in esan::processCompilationUnitExit
+  // CHECK-NEXT: in esan::processCacheFragCompilationUnitExit: {{.*}}struct-simple.cpp with 0 class(es)/struct(s)
+  // CHECK-NEXT: in esan::processCompilationUnitExit
+  // CHECK-NEXT: in esan::processCacheFragCompilationUnitExit: {{.*}}struct-simple.cpp with 6 class(es)/struct(s)
+  // CHECK-NEXT:  Unregister struct.A#2#11#11:    2 fields
+  // CHECK-NEXT:   {{.*}} struct A
+  // CHECK-NEXT:   {{.*}}  size = 8, count = 2049, ratio = 2048, array access = 0
+  // CHECK-NEXT:   {{.*}}  # 0: count = 2048
+  // CHECK-NEXT:   {{.*}}  # 1: count = 1
+  // CHECK-NEXT:  Unregister struct.B#2#3#2:      2 fields
+  // CHECK-NEXT:   {{.*}} struct B
+  // CHECK-NEXT:   {{.*}}  size = 16, count = 2097153, ratio = 2097152, array access = 0
+  // CHECK-NEXT:   {{.*}}  # 0: count = 1
+  // CHECK-NEXT:   {{.*}}  # 1: count = 2097152
+  // CHECK-NEXT:  Unregister union.U#1#3:         1 fields
+  // CHECK-NEXT:  Duplicated struct.S#2#11#11:    2 fields
+  // CHECK-NEXT:  Unregister struct.D#3#14#11#11: 3 fields
+  // CHECK-NEXT:  {{.*}} struct D
+  // CHECK-NEXT:  {{.*}}  size = 128, count = 2097153, ratio = 2097153, array access = 0
+  // CHECK-NEXT:  {{.*}}  # 0: count = 1
+  // CHECK-NEXT:  {{.*}}  # 1: count = 0
+  // CHECK-NEXT:  {{.*}}  # 2: count = 2097152
+  // CHECK-NEXT:  Unregister struct.anon#3#11#11#11: 3 fields
+  // CHECK-NEXT:  {{.*}} struct anon
+  // CHECK-NEXT:  {{.*}}  size = 12, count = 2097152, ratio = 4194304, array access = 2097152
+  // CHECK-NEXT:  {{.*}}  # 0: count = 0
+  // CHECK-NEXT:  {{.*}}  # 1: count = 2097152
+  // CHECK-NEXT:  {{.*}}  # 2: count = 0
+  // CHECK-NEXT: {{.*}}EfficiencySanitizer: total struct field access count = 6293518
+}
+#endif // MAIN
diff --git a/test/esan/TestCases/verbose-simple.c b/test/esan/TestCases/verbose-simple.c
new file mode 100644
index 0000000..0d867bf
--- /dev/null
+++ b/test/esan/TestCases/verbose-simple.c
@@ -0,0 +1,14 @@
+// RUN: %clang_esan_frag -O0 %s -o %t 2>&1
+// RUN: %env_esan_opts="verbosity=1 log_exe_name=1" %run %t 2>&1 | FileCheck %s
+
+int main(int argc, char **argv) {
+  // CHECK:      in esan::initializeLibrary
+  // (There can be a re-exec for stack limit here.)
+  // CHECK:      Shadow scale=2 offset=0x440000000000
+  // CHECK-NEXT: Shadow #0: [110000000000-114000000000) (256GB)
+  // CHECK-NEXT: Shadow #1: [124000000000-12c000000000) (512GB)
+  // CHECK-NEXT: Shadow #2: [148000000000-150000000000) (512GB)
+  // CHECK-NEXT: in esan::finalizeLibrary
+  // CHECK-NEXT: ==verbose-simple{{.*}}EfficiencySanitizer: total struct field access count = 0
+  return 0;
+}
diff --git a/test/esan/TestCases/workingset-early-fault.c b/test/esan/TestCases/workingset-early-fault.c
new file mode 100644
index 0000000..1c420c3
--- /dev/null
+++ b/test/esan/TestCases/workingset-early-fault.c
@@ -0,0 +1,33 @@
+// Test shadow faults during esan initialization as well as
+// faults during dlsym's calloc during interceptor init.
+//
+// RUN: %clang_esan_wset %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+// Our goal is to emulate an instrumented allocator, whose calloc
+// invoked from dlsym will trigger shadow faults, to test an
+// early shadow fault during esan interceptor init.
+// We do this by replacing calloc:
+void *calloc(size_t size, size_t n) {
+  // Unfortunately we can't print anything to make the test
+  // ensure we got here b/c the sanitizer interceptors can't
+  // handle that during interceptor init.
+
+  // Ensure we trigger a shadow write fault:
+  int x[16];
+  x[0] = size;
+  // Now just emulate calloc.
+  void *res = malloc(size*n);
+  memset(res, 0, size*n);
+  return res;
+}
+
+int main(int argc, char **argv) {
+  printf("all done\n");
+  return 0;
+}
+// CHECK: all done
diff --git a/test/esan/TestCases/workingset-memset.cpp b/test/esan/TestCases/workingset-memset.cpp
new file mode 100644
index 0000000..9c972ec
--- /dev/null
+++ b/test/esan/TestCases/workingset-memset.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_esan_wset -O0 %s -o %t 2>&1
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <assert.h>
+#include <string.h>
+
+int main(int argc, char **argv) {
+  const int size = 128*1024*1024;
+  char *p = (char *)mmap(0, size, PROT_READ | PROT_WRITE,
+                         MAP_ANON | MAP_PRIVATE, -1, 0);
+  // Test the slowpath at different cache line boundaries.
+  for (int i = 0; i < 630; i++)
+    memset((char *)p + 63*i, i, 63*i);
+  munmap(p, size);
+  return 0;
+  // CHECK: {{.*}} EfficiencySanitizer: the total working set size: 77 KB (12{{[0-9]+}} cache lines)
+}
diff --git a/test/esan/TestCases/workingset-midreport.cpp b/test/esan/TestCases/workingset-midreport.cpp
new file mode 100644
index 0000000..2c29cf4
--- /dev/null
+++ b/test/esan/TestCases/workingset-midreport.cpp
@@ -0,0 +1,71 @@
+// RUN: %clang_esan_wset -O0 %s -o %t 2>&1
+// RUN: %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-ESAN
+
+// RUN: %clang -O0 %s -o %t 2>&1
+// RUN: %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ESAN
+
+#include <sanitizer/esan_interface.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+const int size = 0x1 << 25; // 523288 cache lines
+const int iters = 6;
+
+int main(int argc, char **argv) {
+  char *buf = (char *)mmap(0, size, PROT_READ | PROT_WRITE,
+                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  // To avoid flakiness stemming from whether the sideline thread
+  // is scheduled enough on a loaded test machine, we coordinate
+  // with esan itself:
+  if (__esan_get_sample_count) {
+    while (__esan_get_sample_count() < 4) {
+      for (int i = 0; i < size; ++i)
+        buf[i] = i;
+      sched_yield();
+    }
+  }
+  // Ensure a non-esan build works without ifdefs:
+  if (__esan_report) {
+    // We should get 2 roughly identical reports:
+    __esan_report();
+  }
+  munmap(buf, size);
+  fprintf(stderr, "all done\n");
+  // CHECK-NO-ESAN:   all done
+  // We only check for a few samples here to reduce the chance of flakiness:
+  // CHECK-ESAN:      =={{[0-9]+}}== Total number of samples: {{[0-9]+}}
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}== Samples array #0 at period 20 ms
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}==#   0: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}==#   1: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}==#   2: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}==#   3: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #1 at period 80 ms
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}==#   0: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #2 at period 320 ms
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #3 at period 1280 ms
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #4 at period 5120 ms
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #5 at period 20 sec
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #6 at period 81 sec
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #7 at period 327 sec
+  // CHECK-ESAN: {{.*}} EfficiencySanitizer: the total working set size: 32 MB (5242{{[0-9][0-9]}} cache lines)
+  // CHECK-ESAN-NEXT: all done
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}== Total number of samples: {{[0-9]+}}
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}== Samples array #0 at period 20 ms
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}==#   0: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}==#   1: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}==#   2: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}==#   3: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #1 at period 80 ms
+  // CHECK-ESAN-NEXT: =={{[0-9]+}}==#   0: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #2 at period 320 ms
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #3 at period 1280 ms
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #4 at period 5120 ms
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #5 at period 20 sec
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #6 at period 81 sec
+  // CHECK-ESAN:      =={{[0-9]+}}== Samples array #7 at period 327 sec
+  // CHECK-ESAN: {{.*}} EfficiencySanitizer: the total working set size: 32 MB (5242{{[0-9][0-9]}} cache lines)
+  return 0;
+}
diff --git a/test/esan/TestCases/workingset-samples.cpp b/test/esan/TestCases/workingset-samples.cpp
new file mode 100644
index 0000000..cf198d2
--- /dev/null
+++ b/test/esan/TestCases/workingset-samples.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_esan_wset -O0 %s -o %t 2>&1
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#include <sanitizer/esan_interface.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+const int size = 0x1 << 25; // 523288 cache lines
+const int iters = 6;
+
+int main(int argc, char **argv) {
+  char *buf = (char *)mmap(0, size, PROT_READ | PROT_WRITE,
+                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  // To avoid flakiness stemming from whether the sideline thread
+  // is scheduled enough on a loaded test machine, we coordinate
+  // with esan itself:
+  if (__esan_get_sample_count) {
+    while (__esan_get_sample_count() < 4) {
+      for (int i = 0; i < size; ++i)
+        buf[i] = i;
+      sched_yield();
+    }
+  }
+  munmap(buf, size);
+  // We only check for a few samples here to reduce the chance of flakiness.
+  // CHECK:      =={{[0-9]+}}== Total number of samples: {{[0-9]+}}
+  // CHECK-NEXT: =={{[0-9]+}}== Samples array #0 at period 20 ms
+  // CHECK-NEXT: =={{[0-9]+}}==#   0: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-NEXT: =={{[0-9]+}}==#   1: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-NEXT: =={{[0-9]+}}==#   2: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK-NEXT: =={{[0-9]+}}==#   3: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK:      =={{[0-9]+}}== Samples array #1 at period 80 ms
+  // CHECK-NEXT: =={{[0-9]+}}==#   0: {{[ 0-9]+}} {{KB|MB|Bytes}} ({{[ 0-9]+}} cache lines)
+  // CHECK:      =={{[0-9]+}}== Samples array #2 at period 320 ms
+  // CHECK:      =={{[0-9]+}}== Samples array #3 at period 1280 ms
+  // CHECK:      =={{[0-9]+}}== Samples array #4 at period 5120 ms
+  // CHECK:      =={{[0-9]+}}== Samples array #5 at period 20 sec
+  // CHECK:      =={{[0-9]+}}== Samples array #6 at period 81 sec
+  // CHECK:      =={{[0-9]+}}== Samples array #7 at period 327 sec
+  // CHECK: {{.*}} EfficiencySanitizer: the total working set size: 32 MB (5242{{[0-9][0-9]}} cache lines)
+  return 0;
+}
diff --git a/test/esan/TestCases/workingset-signal-posix.cpp b/test/esan/TestCases/workingset-signal-posix.cpp
new file mode 100644
index 0000000..ba776fc
--- /dev/null
+++ b/test/esan/TestCases/workingset-signal-posix.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_esan_wset -O0 %s -o %t 2>&1
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#include <assert.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+sigjmp_buf mark;
+
+static void SignalHandler(int Sig) {
+  if (Sig == SIGSEGV) {
+    fprintf(stderr, "Handling SIGSEGV for signal\n");
+    siglongjmp(mark, 1);
+  }
+  exit(1);
+}
+
+static void SigactionHandler(int Sig, siginfo_t *Info, void *Ctx) {
+  if (Sig == SIGSEGV) {
+    fprintf(stderr, "Handling SIGSEGV for sigaction\n");
+    siglongjmp(mark, 1);
+  }
+  exit(1);
+}
+
+int main(int argc, char **argv) {
+  __sighandler_t Prior = signal(SIGSEGV, SignalHandler);
+  assert(Prior == SIG_DFL);
+  if (sigsetjmp(mark, 1) == 0)
+    *((volatile int *)(ssize_t)argc) = 42; // Raise SIGSEGV
+  fprintf(stderr, "Past longjmp for signal\n");
+
+  Prior = signal(SIGSEGV, SIG_DFL);
+  assert(Prior == SignalHandler);
+
+  struct sigaction SigAct;
+  SigAct.sa_sigaction = SigactionHandler;
+  int Res = sigfillset(&SigAct.sa_mask);
+  assert(Res == 0);
+  SigAct.sa_flags = SA_SIGINFO;
+  Res = sigaction(SIGSEGV, &SigAct, NULL);
+  assert(Res == 0);
+
+  if (sigsetjmp(mark, 1) == 0)
+    *((volatile int *)(ssize_t)argc) = 42; // Raise SIGSEGV
+  fprintf(stderr, "Past longjmp for sigaction\n");
+
+  Res = sigaction(SIGSEGV, NULL, &SigAct);
+  assert(Res == 0);
+  assert(SigAct.sa_sigaction == SigactionHandler);
+
+  // Test blocking SIGSEGV and raising a shadow fault.
+  sigset_t Set;
+  sigemptyset(&Set);
+  sigaddset(&Set, SIGSEGV);
+  Res = sigprocmask(SIG_BLOCK, &Set, NULL);
+  // Make a large enough mapping that its start point will be before any
+  // prior library-region shadow access.
+  char *buf = (char *)mmap(0, 640*1024, PROT_READ | PROT_WRITE,
+                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  buf[0] = 4;
+  munmap(buf, 640*1024);
+  fprintf(stderr, "Past blocked-SIGSEGV shadow fault\n");
+
+  return 0;
+}
+// CHECK:      Handling SIGSEGV for signal
+// CHECK-NEXT: Past longjmp for signal
+// CHECK-NEXT: Handling SIGSEGV for sigaction
+// CHECK-NEXT: Past longjmp for sigaction
+// CHECK-NEXT: Past blocked-SIGSEGV shadow fault
+// CHECK:      {{.*}} EfficiencySanitizer: the total working set size: {{[0-9]+}} Bytes ({{[0-9][0-9]}} cache lines)
diff --git a/test/esan/TestCases/workingset-simple.cpp b/test/esan/TestCases/workingset-simple.cpp
new file mode 100644
index 0000000..c8a2d52
--- /dev/null
+++ b/test/esan/TestCases/workingset-simple.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_esan_wset -O0 %s -o %t 2>&1
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <assert.h>
+
+const int size = 0x1 << 25; // 523288 cache lines
+const int line_size = 64;
+
+int main(int argc, char **argv) {
+  char *bufA = (char *)malloc(sizeof(char) * line_size);
+  char bufB[64];
+  char *bufC = (char *)mmap(0, size, PROT_READ | PROT_WRITE,
+                            MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  bufA[0] = 0;
+  // This additional access to the same line should not increase the line
+  // count: but it's difficult to make a non-flaky test that measures the
+  // lines down to the ones digit so right now we're not really testing that.
+  // If we add a heap-only mode we may be able to be more precise.
+  bufA[1] = 0;
+  bufB[33] = 1;
+  for (int i = 0; i < size; i += line_size)
+    bufC[i] = 0;
+  free(bufA);
+  munmap(bufC, 0x4000);
+  // CHECK: {{.*}} EfficiencySanitizer: the total working set size: 32 MB (524{{[0-9][0-9][0-9]}} cache lines)
+  return 0;
+}
diff --git a/test/esan/Unit/circular_buffer.cpp b/test/esan/Unit/circular_buffer.cpp
new file mode 100644
index 0000000..00999a2
--- /dev/null
+++ b/test/esan/Unit/circular_buffer.cpp
@@ -0,0 +1,61 @@
+// RUN: %clangxx_unit -O0 %s -o %t 2>&1
+// RUN: %env_esan_opts="record_snapshots=0" %run %t 2>&1 | FileCheck %s
+
+#include "esan/esan_circular_buffer.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include <assert.h>
+#include <stdio.h>
+
+static const int TestBufCapacity = 4;
+
+// The buffer should have a capacity of TestBufCapacity.
+void testBuffer(__esan::CircularBuffer<int> *Buf) {
+  assert(Buf->size() == 0);
+  assert(Buf->empty());
+
+  Buf->push_back(1);
+  assert(Buf->back() == 1);
+  assert((*Buf)[0] == 1);
+  assert(Buf->size() == 1);
+  assert(!Buf->empty());
+
+  Buf->push_back(2);
+  Buf->push_back(3);
+  Buf->push_back(4);
+  Buf->push_back(5);
+  assert((*Buf)[0] == 2);
+  assert(Buf->size() == 4);
+
+  Buf->pop_back();
+  assert((*Buf)[0] == 2);
+  assert(Buf->size() == 3);
+
+  Buf->pop_back();
+  Buf->pop_back();
+  assert((*Buf)[0] == 2);
+  assert(Buf->size() == 1);
+  assert(!Buf->empty());
+
+  Buf->pop_back();
+  assert(Buf->empty());
+}
+
+int main()
+{
+  // Test initialize/free.
+  __esan::CircularBuffer<int> GlobalBuf;
+  GlobalBuf.initialize(TestBufCapacity);
+  testBuffer(&GlobalBuf);
+  GlobalBuf.free();
+
+  // Test constructor/free.
+  __esan::CircularBuffer<int> *LocalBuf;
+  static char placeholder[sizeof(*LocalBuf)];
+  LocalBuf = new(placeholder) __esan::CircularBuffer<int>(TestBufCapacity);
+  testBuffer(LocalBuf);
+  LocalBuf->free();
+
+  fprintf(stderr, "All checks passed.\n");
+  // CHECK: All checks passed.
+  return 0;
+}
diff --git a/test/esan/Unit/hashtable.cpp b/test/esan/Unit/hashtable.cpp
new file mode 100644
index 0000000..390a427
--- /dev/null
+++ b/test/esan/Unit/hashtable.cpp
@@ -0,0 +1,179 @@
+// RUN: %clangxx_unit -esan-instrument-loads-and-stores=0 -O0 %s -o %t 2>&1
+// RUN: %env_esan_opts="record_snapshots=0" %run %t 2>&1 | FileCheck %s
+
+#include "esan/esan_hashtable.h"
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+class MyData {
+ public:
+  MyData(const char *Str) : RefCount(0) { Buf = strdup(Str); }
+  ~MyData() {
+    fprintf(stderr, "  Destructor: %s.\n", Buf);
+    free(Buf);
+  }
+  bool operator==(MyData &Cmp) { return strcmp(Buf, Cmp.Buf) == 0; }
+  operator size_t() const {
+    size_t Res = 0;
+    for (int i = 0; i < strlen(Buf); ++i)
+      Res ^= Buf[i];
+    return Res;
+  }
+  char *Buf;
+  int RefCount;
+};
+
+// We use a smart pointer wrapper to free the payload on hashtable removal.
+struct MyDataPayload {
+  MyDataPayload() : Data(nullptr) {}
+  explicit MyDataPayload(MyData *Data) : Data(Data) { ++Data->RefCount; }
+  ~MyDataPayload() {
+    if (Data && --Data->RefCount == 0) {
+      fprintf(stderr, "Deleting %s.\n", Data->Buf);
+      delete Data;
+    }
+  }
+  MyDataPayload(const MyDataPayload &Copy) {
+    Data = Copy.Data;
+    ++Data->RefCount;
+  }
+  MyDataPayload & operator=(const MyDataPayload &Copy) {
+    if (this != &Copy) {
+      this->~MyDataPayload();
+      Data = Copy.Data;
+      ++Data->RefCount;
+    }
+    return *this;
+  }
+  bool operator==(MyDataPayload &Cmp) { return *Data == *Cmp.Data; }
+  operator size_t() const { return (size_t)*Data; }
+  MyData *Data;
+};
+
+int main()
+{
+  __esan::HashTable<int, int> IntTable;
+  assert(IntTable.size() == 0);
+
+  // Test iteration on an empty table.
+  int Count = 0;
+  for (auto Iter = IntTable.begin(); Iter != IntTable.end();
+       ++Iter, ++Count) {
+    // Empty.
+  }
+  assert(Count == 0);
+
+  bool Added = IntTable.add(4, 42);
+  assert(Added);
+  assert(!IntTable.add(4, 42));
+  assert(IntTable.size() == 1);
+  int Value;
+  bool Found = IntTable.lookup(4, Value);
+  assert(Found && Value == 42);
+
+  // Test iterator.
+  IntTable.lock();
+  for (auto Iter = IntTable.begin(); Iter != IntTable.end();
+       ++Iter, ++Count) {
+    assert((*Iter).Key == 4);
+    assert((*Iter).Data == 42);
+  }
+  IntTable.unlock();
+  assert(Count == 1);
+  assert(Count == IntTable.size());
+  assert(!IntTable.remove(5));
+  assert(IntTable.remove(4));
+
+  // Test a more complex payload.
+  __esan::HashTable<int, MyDataPayload> DataTable(4);
+  MyDataPayload NewData(new MyData("mystring"));
+  Added = DataTable.add(4, NewData);
+  assert(Added);
+  MyDataPayload FoundData;
+  Found = DataTable.lookup(4, FoundData);
+  assert(Found && strcmp(FoundData.Data->Buf, "mystring") == 0);
+  assert(!DataTable.remove(5));
+  assert(DataTable.remove(4));
+  // Test resize.
+  for (int i = 0; i < 4; ++i) {
+    MyDataPayload MoreData(new MyData("delete-at-end"));
+    Added = DataTable.add(i+1, MoreData);
+    assert(Added);
+    assert(!DataTable.add(i+1, MoreData));
+  }
+  for (int i = 0; i < 4; ++i) {
+    Found = DataTable.lookup(i+1, FoundData);
+    assert(Found && strcmp(FoundData.Data->Buf, "delete-at-end") == 0);
+  }
+  DataTable.lock();
+  Count = 0;
+  for (auto Iter = DataTable.begin(); Iter != DataTable.end();
+       ++Iter, ++Count) {
+    int Key = (*Iter).Key;
+    FoundData = (*Iter).Data;
+    assert(Key >= 1 && Key <= 4);
+    assert(strcmp(FoundData.Data->Buf, "delete-at-end") == 0);
+  }
+  DataTable.unlock();
+  assert(Count == 4);
+  assert(Count == DataTable.size());
+
+  // Ensure the iterator supports a range-based for loop.
+  DataTable.lock();
+  Count = 0;
+  for (auto Pair : DataTable) {
+    assert(Pair.Key >= 1 && Pair.Key <= 4);
+    assert(strcmp(Pair.Data.Data->Buf, "delete-at-end") == 0);
+    ++Count;
+  }
+  DataTable.unlock();
+  assert(Count == 4);
+  assert(Count == DataTable.size());
+
+  // Test payload freeing via smart pointer wrapper.
+  __esan::HashTable<MyDataPayload, MyDataPayload, true> DataKeyTable;
+  MyDataPayload DataA(new MyData("string AB"));
+  DataKeyTable.lock();
+  Added = DataKeyTable.add(DataA, DataA);
+  assert(Added);
+  Found = DataKeyTable.lookup(DataA, FoundData);
+  assert(Found && strcmp(FoundData.Data->Buf, "string AB") == 0);
+  MyDataPayload DataB(new MyData("string AB"));
+  Added = DataKeyTable.add(DataB, DataB);
+  assert(!Added);
+  DataKeyTable.remove(DataB); // Should free the DataA payload.
+  DataKeyTable.unlock();
+
+  // Test custom functors.
+  struct CustomHash {
+    size_t operator()(int Key) const { return Key % 4; }
+  };
+  struct CustomEqual {
+    bool operator()(int Key1, int Key2) const { return Key1 %4 == Key2 % 4; }
+  };
+  __esan::HashTable<int, int, false, CustomHash, CustomEqual> ModTable;
+  Added = ModTable.add(2, 42);
+  assert(Added);
+  Added = ModTable.add(6, 42);
+  assert(!Added);
+
+  fprintf(stderr, "All checks passed.\n");
+  return 0;
+}
+// CHECK:      Deleting mystring.
+// CHECK-NEXT:   Destructor: mystring.
+// CHECK-NEXT: All checks passed.
+// CHECK-NEXT: Deleting string AB.
+// CHECK-NEXT:   Destructor: string AB.
+// CHECK-NEXT: Deleting string AB.
+// CHECK-NEXT:   Destructor: string AB.
+// CHECK-NEXT: Deleting delete-at-end.
+// CHECK-NEXT:   Destructor: delete-at-end.
+// CHECK-NEXT: Deleting delete-at-end.
+// CHECK-NEXT:   Destructor: delete-at-end.
+// CHECK-NEXT: Deleting delete-at-end.
+// CHECK-NEXT:   Destructor: delete-at-end.
+// CHECK-NEXT: Deleting delete-at-end.
+// CHECK-NEXT:   Destructor: delete-at-end.
diff --git a/test/esan/lit.cfg b/test/esan/lit.cfg
new file mode 100644
index 0000000..cf16a6b
--- /dev/null
+++ b/test/esan/lit.cfg
@@ -0,0 +1,44 @@
+# -*- Python -*-
+
+import os
+
+# Setup config name.
+config.name = 'EfficiencySanitizer' + config.name_suffix
+
+# Setup source root.
+config.test_source_root = os.path.dirname(__file__)
+
+# Setup default compiler flags used with -fsanitize=efficiency option.
+base_cflags = ([config.target_cflags] + config.debug_info_flags)
+base_cxxflags = config.cxx_mode_flags + base_cflags
+
+frag_cflags = (["-fsanitize=efficiency-cache-frag"] + base_cflags)
+wset_cflags = (["-fsanitize=efficiency-working-set"] + base_cflags)
+esan_incdir = config.test_source_root + "/../../lib"
+unit_cxxflags = (["-I%s" % esan_incdir, "-std=c++11",
+                  # We need to link with the esan runtime.
+                  # Tests should pass %env_esan_opts="record_snapshots=0".
+                  "-fsanitize=efficiency-working-set"] + base_cxxflags)
+
+def build_invocation(compile_flags):
+  return " " + " ".join([config.clang] + compile_flags) + " "
+
+config.substitutions.append( ("%clang ",
+                              build_invocation(base_cflags)) )
+config.substitutions.append( ("%clang_esan_frag ",
+                              build_invocation(frag_cflags)) )
+config.substitutions.append( ("%clang_esan_wset ",
+                              build_invocation(wset_cflags)) )
+config.substitutions.append( ("%clangxx_unit",
+                              build_invocation(unit_cxxflags)) )
+
+default_esan_opts = ''
+config.substitutions.append(('%env_esan_opts=',
+                             'env ESAN_OPTIONS=' + default_esan_opts))
+
+# Default test suffixes.
+config.suffixes = ['.c', '.cpp']
+
+# EfficiencySanitizer tests are currently supported on Linux x86-64 only.
+if config.host_os not in ['Linux'] or config.target_arch != 'x86_64':
+  config.unsupported = True
diff --git a/test/esan/lit.site.cfg.in b/test/esan/lit.site.cfg.in
new file mode 100644
index 0000000..b631ce4
--- /dev/null
+++ b/test/esan/lit.site.cfg.in
@@ -0,0 +1,14 @@
+## Autogenerated by LLVM/Clang configuration.
+# Do not edit!
+
+# Tool-specific config options.
+config.name_suffix = "@ESAN_TEST_CONFIG_SUFFIX@"
+config.esan_lit_source_dir = "@ESAN_LIT_SOURCE_DIR@"
+config.target_cflags = "@ESAN_TEST_TARGET_CFLAGS@"
+config.target_arch = "@ESAN_TEST_TARGET_ARCH@"
+
+# Load common config for all compiler-rt lit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+
+# Load tool-specific config that would do the real work.
+lit_config.load_config(config, "@ESAN_LIT_SOURCE_DIR@/lit.cfg")
diff --git a/test/interception/CMakeLists.txt b/test/interception/CMakeLists.txt
new file mode 100644
index 0000000..ff9e4b0
--- /dev/null
+++ b/test/interception/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(INTERCEPTION_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
+set(INTERCEPTION_TESTSUITES)
+
+# Unit tests. There are currently no unit tests capable to running on Apple or
+# Android targets.
+if(COMPILER_RT_INCLUDE_TESTS AND NOT ANDROID AND NOT APPLE)
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg)
+  list(APPEND INTERCEPTION_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit)
+  list(APPEND INTERCEPTION_TEST_DEPS InterceptionUnitTests)
+endif()
+
+add_lit_testsuite(check-interception "Running the Interception tests"
+  ${INTERCEPTION_TESTSUITES}
+  DEPENDS ${INTERCEPTION_TEST_DEPS})
+set_target_properties(check-interception PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/interception/Unit/lit.site.cfg.in b/test/interception/Unit/lit.site.cfg.in
new file mode 100644
index 0000000..5d3e2f9
--- /dev/null
+++ b/test/interception/Unit/lit.site.cfg.in
@@ -0,0 +1,14 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+# Load common config for all compiler-rt unit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/unittests/lit.common.unit.configured")
+
+# Setup config name.
+config.name = 'Interception-Unit'
+
+# Setup test source and exec root. For unit tests, we define
+# it as build directory with interception tests.
+# FIXME: De-hardcode this path.
+config.test_exec_root = os.path.join("@COMPILER_RT_BINARY_DIR@", "lib",
+                                     "interception", "tests")
+config.test_source_root = config.test_exec_root
diff --git a/test/lit.common.cfg b/test/lit.common.cfg
index 828ff15..64ad426 100644
--- a/test/lit.common.cfg
+++ b/test/lit.common.cfg
@@ -72,6 +72,9 @@
 if platform.system() == 'Windows' and '-win' in config.target_triple:
   config.environment['LIB'] = os.environ['LIB']
 
+if re.match(r'^x86_64.*-linux', config.target_triple):
+      config.available_features.add("x86_64-linux")
+
 if lit.util.isMacOSTriple(config.target_triple):
    config.available_features.add('darwin')
 
@@ -92,15 +95,17 @@
   # does not crash but exits with a non-zero exit code. We ought to merge
   # KillTheDoctor and not --crash to make the latter more useful and remove the
   # need for this substitution.
-  config.substitutions.append( ("%expect_crash ", "not KillTheDoctor ") )
+  config.expect_crash = "not KillTheDoctor "
 else:
-  config.substitutions.append( ("%expect_crash ", "not --crash ") )
+  config.expect_crash = "not --crash "
 
-# Add supported compiler_rt architectures to a list of available features.
-compiler_rt_arch = getattr(config, 'compiler_rt_arch', None)
-if compiler_rt_arch:
-  for arch in compiler_rt_arch.split(";"):
-    config.available_features.add(arch + "-supported-target")
+config.substitutions.append( ("%expect_crash ", config.expect_crash) )
+
+target_arch = getattr(config, 'target_arch', None)
+if target_arch:
+  config.available_features.add(target_arch + '-target-arch')
+  if target_arch in ['x86_64', 'i386', 'i686']:
+    config.available_features.add('x86-target-arch')
 
 compiler_rt_debug = getattr(config, 'compiler_rt_debug', False)
 if not compiler_rt_debug:
@@ -110,10 +115,12 @@
 if sanitizer_can_use_cxxabi:
   config.available_features.add('cxxabi')
 
-# Test lld if it is available.
 if config.has_lld:
   config.available_features.add('lld')
 
+if config.can_symbolize:
+  config.available_features.add('can-symbolize')
+
 lit.util.usePlatformSdkOnDarwin(config, lit_config)
 
 if config.host_os == 'Darwin':
@@ -132,6 +139,11 @@
   except:
     pass
 
+sancovcc_path = os.path.join(llvm_tools_dir, "sancov") 
+if os.path.exists(sancovcc_path):
+  config.available_features.add("has_sancovcc")
+  config.substitutions.append( ("%sancovcc ", sancovcc_path) )
+
 def is_darwin_lto_supported():
   return os.path.exists(os.path.join(config.llvm_shlib_dir, 'libLTO.dylib'))
 
diff --git a/test/lit.common.configured.in b/test/lit.common.configured.in
index 0543540..4472f59 100644
--- a/test/lit.common.configured.in
+++ b/test/lit.common.configured.in
@@ -1,5 +1,4 @@
-## Autogenerated by LLVM/Clang configuration.
-# Do not edit!
+@LIT_SITE_CFG_IN_HEADER@
 
 # Set attribute value if it is unset.
 def set_default(attr, value):
@@ -13,7 +12,7 @@
 set_default("target_arch", "@COMPILER_RT_DEFAULT_TARGET_ARCH@")
 set_default("host_os", "@HOST_OS@")
 set_default("llvm_build_mode", "@LLVM_BUILD_MODE@")
-set_default("llvm_src_root", "@LLVM_SOURCE_DIR@")
+set_default("llvm_src_root", "@LLVM_MAIN_SRC_DIR@")
 set_default("llvm_obj_root", "@LLVM_BINARY_DIR@")
 set_default("compiler_rt_src_root", "@COMPILER_RT_SOURCE_DIR@")
 set_default("compiler_rt_obj_root", "@COMPILER_RT_BINARY_DIR@")
@@ -22,13 +21,14 @@
 set_default("gold_executable", "@GOLD_EXECUTABLE@")
 set_default("clang", "@COMPILER_RT_TEST_COMPILER@")
 set_default("compiler_id", "@COMPILER_RT_TEST_COMPILER_ID@")
-set_default("compiler_rt_arch", "@COMPILER_RT_SUPPORTED_ARCH@")
 set_default("python_executable", "@PYTHON_EXECUTABLE@")
 set_default("compiler_rt_debug", @COMPILER_RT_DEBUG_PYBOOL@)
 set_default("compiler_rt_libdir", "@COMPILER_RT_LIBRARY_OUTPUT_DIR@")
 set_default("emulator", "@COMPILER_RT_EMULATOR@")
 set_default("sanitizer_can_use_cxxabi", @SANITIZER_CAN_USE_CXXABI_PYBOOL@)
 set_default("has_lld", @COMPILER_RT_HAS_LLD_SOURCES_PYBOOL@)
+set_default("can_symbolize", @CAN_SYMBOLIZE@)
+config.available_features.add('target-is-%s' % config.target_arch)
 
 # LLVM tools dir can be passed in lit parameters, so try to
 # apply substitution.
diff --git a/test/lsan/CMakeLists.txt b/test/lsan/CMakeLists.txt
index 6cca00a..e3d363a 100644
--- a/test/lsan/CMakeLists.txt
+++ b/test/lsan/CMakeLists.txt
@@ -1,21 +1,48 @@
 set(LSAN_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
-set(LSAN_LIT_TEST_MODE "Standalone")
-configure_lit_site_cfg(
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-  ${CMAKE_CURRENT_BINARY_DIR}/LsanConfig/lit.site.cfg)
+set(LSAN_TESTSUITES)
+ 
+set(LSAN_TEST_ARCH ${LSAN_SUPPORTED_ARCH})
+if(APPLE)
+  darwin_filter_host_archs(LSAN_SUPPORTED_ARCH LSAN_TEST_ARCH)
+endif()
 
-set(LSAN_LIT_TEST_MODE "AddressSanitizer")
-configure_lit_site_cfg(
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-  ${CMAKE_CURRENT_BINARY_DIR}/AsanConfig/lit.site.cfg)
+foreach(arch ${LSAN_TEST_ARCH})
+  set(LSAN_TEST_TARGET_ARCH ${arch})
+  string(TOLOWER "-${arch}" LSAN_TEST_CONFIG_SUFFIX)
+  if(ANDROID OR ${arch} MATCHES "arm|aarch64")
+    # This is only true if we are cross-compiling.
+    # Build all tests with host compiler and use host tools.
+    set(LSAN_TEST_TARGET_CC ${COMPILER_RT_TEST_COMPILER})
+    set(LSAN_TEST_TARGET_CFLAGS ${COMPILER_RT_TEST_COMPILER_CFLAGS})
+  else()
+    get_target_flags_for_arch(${arch} LSAN_TEST_TARGET_CFLAGS)
+    string(REPLACE ";" " " LSAN_TEST_TARGET_CFLAGS "${LSAN_TEST_TARGET_CFLAGS}")
+  endif()
+
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(LSAN_LIT_TEST_MODE "Standalone")
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}LsanConfig)
+
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg)
+  list(APPEND LSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}AsanConfig)
+  set(LSAN_LIT_TEST_MODE "AddressSanitizer")
+
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg)
+  list(APPEND LSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+endforeach()
 
 set(LSAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
 if(NOT COMPILER_RT_STANDALONE_BUILD)
   list(APPEND LSAN_TEST_DEPS lsan asan)
 endif()
 add_lit_testsuite(check-lsan "Running the LeakSanitizer tests"
-  ${CMAKE_CURRENT_BINARY_DIR}/LsanConfig
-  ${CMAKE_CURRENT_BINARY_DIR}/AsanConfig
+  ${LSAN_TESTSUITES}
   DEPENDS ${LSAN_TEST_DEPS})
-set_target_properties(check-lsan PROPERTIES FOLDER "LSan tests")
+set_target_properties(check-lsan PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/lsan/TestCases/guard-page.c b/test/lsan/TestCases/guard-page.c
new file mode 100644
index 0000000..5c70a9f
--- /dev/null
+++ b/test/lsan/TestCases/guard-page.c
@@ -0,0 +1,60 @@
+// Check that if LSan finds that SP doesn't point into thread stack (e.g.
+// if swapcontext is used), LSan will not hit the guard page.
+// RUN: %clang_lsan %s -o %t && %run %t
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <ucontext.h>
+
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+int ctxfunc_started = 0;
+
+static void die(const char* msg, int err) {
+  if (err == 0)
+    err = errno;
+  fprintf(stderr, "%s: %s\n", msg, strerror(err));
+  exit(EXIT_FAILURE);
+}
+
+static void ctxfunc() {
+  pthread_mutex_lock(&mutex);
+  ctxfunc_started = 1;
+  pthread_cond_signal(&cond);
+  pthread_mutex_unlock(&mutex);
+  // Leave this context alive when the program exits.
+  for (;;);
+}
+
+static void* thread(void* arg) {
+  (void)arg;
+  ucontext_t ctx;
+  void* stack;
+
+  if (getcontext(&ctx) < 0)
+    die("getcontext", 0);
+  stack = malloc(1 << 10);
+  if (stack == NULL)
+    die("malloc", 0);
+  ctx.uc_stack.ss_sp = stack;
+  ctx.uc_stack.ss_size = 1 << 10;
+  makecontext(&ctx, ctxfunc, 0);
+  setcontext(&ctx);
+  die("setcontext", 0);
+  return NULL;
+}
+
+int main() {
+  pthread_t tid;
+  int i;
+
+  pthread_mutex_lock(&mutex);
+  i = pthread_create(&tid, NULL, thread, NULL);
+  if (i != 0)
+    die("pthread_create", i);
+  while (!ctxfunc_started) pthread_cond_wait(&cond, &mutex);
+  pthread_mutex_unlock(&mutex);
+  return 0;
+}
diff --git a/test/lsan/TestCases/leak_check_before_thread_started.cc b/test/lsan/TestCases/leak_check_before_thread_started.cc
index 0bd4837..ca818e1 100644
--- a/test/lsan/TestCases/leak_check_before_thread_started.cc
+++ b/test/lsan/TestCases/leak_check_before_thread_started.cc
@@ -4,12 +4,19 @@
 // RUN: LSAN_OPTIONS="log_pointers=1:log_threads=1" %run %t
 #include <assert.h>
 #include <pthread.h>
+#include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
+
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+bool flag = false;
 
 void *func(void *arg) {
-  sleep(1);
+  // This mutex will never be grabbed.
+  fprintf(stderr, "entered func()\n");
+  pthread_mutex_lock(&mutex);
   free(arg);
+  pthread_mutex_unlock(&mutex);
   return 0;
 }
 
@@ -22,6 +29,8 @@
 
   void *arg = malloc(1337);
   assert(arg);
+  // This mutex is never unlocked by the main thread.
+  pthread_mutex_lock(&mutex);
   int res = pthread_create(&thread_id, &attr, func, arg);
   assert(res == 0);
 }
diff --git a/test/lsan/TestCases/use_registers.cc b/test/lsan/TestCases/use_registers.cc
index ce11c3f..74301a2 100644
--- a/test/lsan/TestCases/use_registers.cc
+++ b/test/lsan/TestCases/use_registers.cc
@@ -27,6 +27,11 @@
       :
       : "r" (p)
       );
+#elif defined(__mips__)
+  asm ( "move $16, %0"
+      :
+      : "r" (p)
+      );
 #else
 #error "Test is not supported on this architecture."
 #endif
diff --git a/test/lsan/lit.common.cfg b/test/lsan/lit.common.cfg
index ba9c283..a04c113 100644
--- a/test/lsan/lit.common.cfg
+++ b/test/lsan/lit.common.cfg
@@ -27,8 +27,9 @@
   config.available_features.add('asan')
 else:
   lit_config.fatal("Unknown LSan test mode: %r" % lsan_lit_test_mode)
+config.name += config.name_suffix
 
-clang_cflags = ["-O0", "-m64"] + config.debug_info_flags
+clang_cflags = ["-O0", config.target_cflags] + config.debug_info_flags
 clang_cxxflags = config.cxx_mode_flags + clang_cflags
 clang_lsan_cflags = clang_cflags + lsan_cflags
 clang_lsan_cxxflags = clang_cxxflags + lsan_cflags
diff --git a/test/lsan/lit.site.cfg.in b/test/lsan/lit.site.cfg.in
index 7d2877b..de89347 100644
--- a/test/lsan/lit.site.cfg.in
+++ b/test/lsan/lit.site.cfg.in
@@ -1,8 +1,13 @@
-# Load common config for all compiler-rt lit tests.
-lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+@LIT_SITE_CFG_IN_HEADER@
 
 # Tool-specific config options.
+config.name_suffix = "@LSAN_TEST_CONFIG_SUFFIX@"
+config.target_cflags = "@LSAN_TEST_TARGET_CFLAGS@"
 config.lsan_lit_test_mode = "@LSAN_LIT_TEST_MODE@"
+config.target_arch = "@LSAN_TEST_TARGET_ARCH@"
+
+# Load common config for all compiler-rt lit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
 
 # Load tool-specific config that would do the real work.
 lit_config.load_config(config, "@LSAN_LIT_SOURCE_DIR@/lit.common.cfg")
diff --git a/test/msan/CMakeLists.txt b/test/msan/CMakeLists.txt
index 08786ee..176fb4a 100644
--- a/test/msan/CMakeLists.txt
+++ b/test/msan/CMakeLists.txt
@@ -1,8 +1,33 @@
 set(MSAN_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
-configure_lit_site_cfg(
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg)
+set(MSAN_TESTSUITES)
+
+set(MSAN_TEST_ARCH ${MSAN_SUPPORTED_ARCH})
+if(APPLE)
+  darwin_filter_host_archs(MSAN_SUPPORTED_ARCH MSAN_TEST_ARCH)
+endif()
+
+foreach(arch ${MSAN_TEST_ARCH})
+  set(MSAN_TEST_TARGET_ARCH ${arch})
+  string(TOLOWER "-${arch}" MSAN_TEST_CONFIG_SUFFIX)
+  if(ANDROID OR ${arch} MATCHES "arm|aarch64")
+    # This is only true if we are cross-compiling.
+    # Build all tests with host compiler and use host tools.
+    set(MSAN_TEST_TARGET_CC ${COMPILER_RT_TEST_COMPILER})
+    set(MSAN_TEST_TARGET_CFLAGS ${COMPILER_RT_TEST_COMPILER_CFLAGS})
+  else()
+    get_target_flags_for_arch(${arch} MSAN_TEST_TARGET_CFLAGS)
+    string(REPLACE ";" " " MSAN_TEST_TARGET_CFLAGS "${MSAN_TEST_TARGET_CFLAGS}")
+  endif()
+
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}Config)
+
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg)
+  list(APPEND MSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+endforeach()
 
 set(MSAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
 if(NOT COMPILER_RT_STANDALONE_BUILD)
@@ -14,10 +39,11 @@
     ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
     ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg)
   list(APPEND MSAN_TEST_DEPS MsanUnitTests)
+  list(APPEND MSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit)
 endif()
 
 add_lit_testsuite(check-msan "Running the MemorySanitizer tests"
-  ${CMAKE_CURRENT_BINARY_DIR}
+  ${MSAN_TESTSUITES}
   DEPENDS ${MSAN_TEST_DEPS}
   )
-set_target_properties(check-msan PROPERTIES FOLDER "MSan tests")
+set_target_properties(check-msan PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/msan/Linux/cmsghdr.cc b/test/msan/Linux/cmsghdr.cc
new file mode 100644
index 0000000..d18415a
--- /dev/null
+++ b/test/msan/Linux/cmsghdr.cc
@@ -0,0 +1,103 @@
+// RUN: %clangxx_msan %s -std=c++11 -DSENDMSG -DPOISONFD -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=SENDMSG
+// RUN: %clangxx_msan %s -std=c++11 -DSENDMSG -DPOISONCRED -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=SENDMSG
+// RUN: %clangxx_msan %s -std=c++11 -DSENDMSG -DPOISONLEN -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=SENDMSG
+// RUN: %clangxx_msan %s -std=c++11 -DSENDMSG -DPOISONLEVEL -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=SENDMSG
+// RUN: %clangxx_msan %s -std=c++11 -DSENDMSG -DPOISONTYPE -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=SENDMSG
+// RUN: %clangxx_msan %s -std=c++11 -DSENDMSG -DPOISONLEN2 -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=SENDMSG
+// RUN: %clangxx_msan %s -std=c++11 -DSENDMSG -DPOISONLEVEL2 -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=SENDMSG
+// RUN: %clangxx_msan %s -std=c++11 -DSENDMSG -DPOISONTYPE2 -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=SENDMSG
+// RUN: %clangxx_msan %s -std=c++11 -DSENDMSG -o %t && %run %t 2>&1 | FileCheck %s --check-prefix=NEGATIVE
+
+// UNSUPPORTED: android
+
+// XFAIL: target-is-mips64el
+
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <netdb.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sanitizer/msan_interface.h>
+
+const int kBufSize = 10;
+
+int main() {
+  int ret;
+  char buf[kBufSize] = {0};
+  pthread_t client_thread;
+  struct sockaddr_un serveraddr;
+
+  int sock[2];
+  ret = socketpair(AF_UNIX, SOCK_STREAM, 0, sock);
+  assert(ret == 0);
+
+  int sockfd = sock[0];
+
+  struct iovec iov[] = {{buf, 10}};
+  struct msghdr msg = {0};
+  msg.msg_iov = iov;
+  msg.msg_iovlen = 1;
+  msg.msg_flags = 0;
+
+  static const int kNumFds = 3;
+  char controlbuf[CMSG_SPACE(kNumFds * sizeof(int)) +
+                  CMSG_SPACE(sizeof(struct ucred))];
+  msg.msg_control = &controlbuf;
+  msg.msg_controllen = sizeof(controlbuf);
+
+  struct cmsghdr *cmsg = (struct cmsghdr *)&controlbuf;
+  assert(cmsg);
+  int myfds[kNumFds];
+  for (int &fd : myfds)
+    fd = sockfd;
+#ifdef POISONFD
+  __msan_poison(&myfds[1], sizeof(int));
+#endif
+  cmsg->cmsg_level = SOL_SOCKET;
+  cmsg->cmsg_type = SCM_RIGHTS;
+  cmsg->cmsg_len = CMSG_LEN(kNumFds * sizeof(int));
+  memcpy(CMSG_DATA(cmsg), myfds, kNumFds * sizeof(int));
+#ifdef POISONLEVEL
+  __msan_poison(&cmsg->cmsg_level, sizeof(cmsg->cmsg_level));
+#endif
+#ifdef POISONTYPE
+  __msan_poison(&cmsg->cmsg_type, sizeof(cmsg->cmsg_type));
+#endif
+#ifdef POISONLEN
+  __msan_poison(&cmsg->cmsg_len, sizeof(cmsg->cmsg_len));
+#endif
+
+  cmsg = (struct cmsghdr *)(&controlbuf[CMSG_SPACE(kNumFds * sizeof(int))]);
+  assert(cmsg);
+  struct ucred cred = {getpid(), getuid(), getgid()};
+#ifdef POISONCRED
+  __msan_poison(&cred.uid, sizeof(cred.uid));
+#endif
+  cmsg->cmsg_level = SOL_SOCKET;
+  cmsg->cmsg_type = SCM_CREDENTIALS;
+  cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+  memcpy(CMSG_DATA(cmsg), &cred, sizeof(struct ucred));
+#ifdef POISONLEVEL2
+  __msan_poison(&cmsg->cmsg_level, sizeof(cmsg->cmsg_level));
+#endif
+#ifdef POISONTYPE2
+  __msan_poison(&cmsg->cmsg_type, sizeof(cmsg->cmsg_type));
+#endif
+#ifdef POISONLEN2
+  __msan_poison(&cmsg->cmsg_len, sizeof(cmsg->cmsg_len));
+#endif
+
+  ret = sendmsg(sockfd, &msg, 0);
+  // SENDMSG: MemorySanitizer: use-of-uninitialized-value
+  if (ret == -1) printf("%d: %s\n", errno, strerror(errno));
+  assert(ret > 0);
+
+  fprintf(stderr, "== done\n");
+  // NEGATIVE: == done
+  return 0;
+}
diff --git a/test/msan/Linux/eventfd.cc b/test/msan/Linux/eventfd.cc
new file mode 100644
index 0000000..62e19b2
--- /dev/null
+++ b/test/msan/Linux/eventfd.cc
@@ -0,0 +1,20 @@
+// RUN: %clangxx_msan -O0 %s -o %t && %run %t 2>&1
+
+// XFAIL: target-is-mips64el
+
+#include <assert.h>
+#include <sys/eventfd.h>
+
+#include <sanitizer/msan_interface.h>
+
+int main(int argc, char *argv[]) {
+  int efd = eventfd(42, 0);
+  assert(efd >= 0);
+
+  eventfd_t v;
+  int ret = eventfd_read(efd, &v);
+  assert(ret == 0);
+  __msan_check_mem_is_initialized(&v, sizeof(v));
+
+  assert(v == 42);
+}
diff --git a/test/msan/Linux/fopencookie.cc b/test/msan/Linux/fopencookie.cc
index e5b8f93..551e891 100644
--- a/test/msan/Linux/fopencookie.cc
+++ b/test/msan/Linux/fopencookie.cc
@@ -2,6 +2,8 @@
 // RUN: %clangxx_msan -std=c++11 -O0 %s -o %t && %run %t
 // RUN: %clangxx_msan -std=c++11 -fsanitize-memory-track-origins -O0 %s -o %t && %run %t
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <pthread.h>
 #include <stdint.h>
diff --git a/test/msan/Linux/forkpty.cc b/test/msan/Linux/forkpty.cc
index ae5c7d9..7a80945 100644
--- a/test/msan/Linux/forkpty.cc
+++ b/test/msan/Linux/forkpty.cc
@@ -1,4 +1,7 @@
 // RUN: %clangxx_msan -O0 -g %s -lutil -o %t && %run %t
+
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <pty.h>
 
diff --git a/test/msan/Linux/getresid.cc b/test/msan/Linux/getresid.cc
index f3c0914..06e1374 100644
--- a/test/msan/Linux/getresid.cc
+++ b/test/msan/Linux/getresid.cc
@@ -2,6 +2,8 @@
 // RUN: %clangxx_msan -O0 -D_FILE_OFFSET_BITS=64 %s -o %t && %run %t %p 2>&1
 // RUN: %clangxx_msan -O3 %s -o %t && %run %t %p 2>&1
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <unistd.h>
 
diff --git a/test/msan/Linux/glob.cc b/test/msan/Linux/glob.cc
index 1481861..50096c0 100644
--- a/test/msan/Linux/glob.cc
+++ b/test/msan/Linux/glob.cc
@@ -2,6 +2,8 @@
 // RUN: %clangxx_msan -O0 -D_FILE_OFFSET_BITS=64 %s -o %t && %run %t %p 2>&1 | FileCheck %s
 // RUN: %clangxx_msan -O3 %s -o %t && %run %t %p 2>&1 | FileCheck %s
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <glob.h>
 #include <stdio.h>
diff --git a/test/msan/Linux/glob_altdirfunc.cc b/test/msan/Linux/glob_altdirfunc.cc
index cb7fe09..d0cb4bc 100644
--- a/test/msan/Linux/glob_altdirfunc.cc
+++ b/test/msan/Linux/glob_altdirfunc.cc
@@ -2,6 +2,8 @@
 // RUN: %clangxx_msan -O0 -D_FILE_OFFSET_BITS=64 %s -o %t && %run %t %p 2>&1 | FileCheck %s
 // RUN: %clangxx_msan -O3 %s -o %t && %run %t %p 2>&1 | FileCheck %s
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <glob.h>
 #include <stdio.h>
diff --git a/test/msan/Linux/glob_nomatch.cc b/test/msan/Linux/glob_nomatch.cc
index fa132c8..5845513 100644
--- a/test/msan/Linux/glob_nomatch.cc
+++ b/test/msan/Linux/glob_nomatch.cc
@@ -1,6 +1,8 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t %p
 // RUN: %clangxx_msan -O3 %s -o %t && %run %t %p
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <glob.h>
 #include <stdio.h>
diff --git a/test/msan/Linux/ioctl_sound.cc b/test/msan/Linux/ioctl_sound.cc
index fb36c52..d7b38fa 100644
--- a/test/msan/Linux/ioctl_sound.cc
+++ b/test/msan/Linux/ioctl_sound.cc
@@ -1,6 +1,8 @@
 // RUN: %clangxx_msan -O0 -g %s -o %t && %run %t
 // RUN: %clangxx_msan -O3 -g %s -o %t && %run %t
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <fcntl.h>
 #include <sound/asound.h>
diff --git a/test/msan/Linux/mallinfo.cc b/test/msan/Linux/mallinfo.cc
index 545ae93..577a193 100644
--- a/test/msan/Linux/mallinfo.cc
+++ b/test/msan/Linux/mallinfo.cc
@@ -1,6 +1,8 @@
 // RUN: %clangxx_msan -O0 -g %s -o %t && %run %t
 // REQUIRES: stable-runtime
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <malloc.h>
 
diff --git a/test/msan/Linux/mincore.cc b/test/msan/Linux/mincore.cc
index 35f5713..a229d4b 100644
--- a/test/msan/Linux/mincore.cc
+++ b/test/msan/Linux/mincore.cc
@@ -1,5 +1,7 @@
 // RUN: %clangxx_msan -std=c++11 -O0 %s -o %t && %run %t
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <unistd.h>
 #include <sys/mman.h>
diff --git a/test/msan/Linux/obstack.cc b/test/msan/Linux/obstack.cc
index f1f53be..f5b4fc3 100644
--- a/test/msan/Linux/obstack.cc
+++ b/test/msan/Linux/obstack.cc
@@ -1,6 +1,8 @@
 // RUN: %clangxx_msan -O0 -g %s -o %t && %run %t
 // RUN: %clangxx_msan -O0 -g -DPOSITIVE %s -o %t && not %run %t |& FileCheck %s
 
+// XFAIL: target-is-mips64el
+
 #include <obstack.h>
 #include <sanitizer/msan_interface.h>
 #include <stdlib.h>
@@ -30,7 +32,7 @@
       __msan_check_mem_is_initialized(p, sizeof(data) + 1);
     }
     // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
-    // CHECK: #0 0x{{.*}} in main{{.*}}obstack.cc:[[@LINE-30]]
+    // CHECK: #0 0x{{.*}} in main{{.*}}obstack.cc:[[@LINE-3]]
 #endif
   }
   obstack_free(&obs, 0);
diff --git a/test/msan/Linux/process_vm_readv.cc b/test/msan/Linux/process_vm_readv.cc
index 601c0d2..4c7cafa 100644
--- a/test/msan/Linux/process_vm_readv.cc
+++ b/test/msan/Linux/process_vm_readv.cc
@@ -1,6 +1,8 @@
 // RUN: %clangxx_msan -std=c++11 -O0 %s -o %t && %run %t
 // RUN: %clangxx_msan -std=c++11 -O0 %s -o %t -DPOSITIVE && not %run %t |& FileCheck %s
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <dlfcn.h>
 #include <sanitizer/msan_interface.h>
@@ -9,26 +11,31 @@
 #include <sys/types.h>
 #include <sys/uio.h>
 #include <unistd.h>
+#include <errno.h>
 
 typedef ssize_t (*process_vm_readwritev_fn)(pid_t, const iovec *, unsigned long,
                                             const iovec *, unsigned long,
                                             unsigned long);
 
-int main(void) {
-  // This requires glibc 2.15.
-  process_vm_readwritev_fn libc_process_vm_readv =
-      (process_vm_readwritev_fn)dlsym(RTLD_NEXT, "process_vm_readv");
-  if (!libc_process_vm_readv) {
 // Exit with success, emulating the expected output.
+int exit_dummy()
+{
 #ifdef POSITIVE
-    printf("process_vm_readv not found!\n");
+    printf("process_vm_readv not found or not implemented!\n");
     printf(
         "WARNING: MemorySanitizer: use-of-uninitialized-value (not really)\n");
     return 1;
 #else
     return 0;
 #endif
-  }
+}
+
+int main(void) {
+  // This requires glibc 2.15.
+  process_vm_readwritev_fn libc_process_vm_readv =
+      (process_vm_readwritev_fn)dlsym(RTLD_NEXT, "process_vm_readv");
+  if (!libc_process_vm_readv)
+    return exit_dummy();
 
   process_vm_readwritev_fn process_vm_readv =
       (process_vm_readwritev_fn)dlsym(RTLD_DEFAULT, "process_vm_readv");
@@ -44,6 +51,9 @@
 
   __msan_poison(&b, sizeof(b));
   ssize_t res = process_vm_readv(getpid(), iov_b, 2, iov_a, 2, 0);
+  if (errno == ENOSYS) // Function not implemented 
+    return exit_dummy();
+
   assert(res == 30);
   __msan_check_mem_is_initialized(b + 10, 10);
   __msan_check_mem_is_initialized(b + 30, 20);
diff --git a/test/msan/Linux/sendmsg.cc b/test/msan/Linux/sendmsg.cc
new file mode 100644
index 0000000..e04559c
--- /dev/null
+++ b/test/msan/Linux/sendmsg.cc
@@ -0,0 +1,85 @@
+// RUN: %clangxx_msan %s -DSEND -DPOISON -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=SEND
+// RUN: %clangxx_msan %s -DSENDTO -DPOISON -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=SENDTO
+// RUN: %clangxx_msan %s -DSENDMSG -DPOISON -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=SENDMSG
+
+// RUN: %clangxx_msan %s -DSEND -o %t && %run %t 2>&1 | FileCheck %s --check-prefix=NEGATIVE
+// RUN: %clangxx_msan %s -DSENDTO -o %t && %run %t 2>&1 | FileCheck %s --check-prefix=NEGATIVE
+// RUN: %clangxx_msan %s -DSENDMSG -o %t && %run %t 2>&1 | FileCheck %s --check-prefix=NEGATIVE
+
+// RUN: %clangxx_msan %s -DSEND -DPOISON -o %t && \
+// RUN:   MSAN_OPTIONS=intercept_send=0 %run %t 2>&1 | FileCheck %s --check-prefix=NEGATIVE
+// RUN: %clangxx_msan %s -DSENDTO -DPOISON -o %t && \
+// RUN:   MSAN_OPTIONS=intercept_send=0 %run %t 2>&1 | FileCheck %s --check-prefix=NEGATIVE
+// RUN: %clangxx_msan %s -DSENDMSG -DPOISON -o %t && \
+// RUN:   MSAN_OPTIONS=intercept_send=0 %run %t 2>&1 | FileCheck %s --check-prefix=NEGATIVE
+
+// UNSUPPORTED: android
+
+// XFAIL: target-is-mips64el
+
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sanitizer/msan_interface.h>
+
+const int kBufSize = 10;
+int sockfd;
+
+int main() {
+  int ret;
+  char buf[kBufSize] = {0};
+  pthread_t client_thread;
+  struct sockaddr_in serveraddr;
+
+  sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+
+  memset(&serveraddr, 0, sizeof(serveraddr));
+  serveraddr.sin_family = AF_INET;
+  serveraddr.sin_addr.s_addr = htonl(INADDR_ANY);
+  serveraddr.sin_port = 0;
+
+  bind(sockfd, (struct sockaddr *)&serveraddr, sizeof(serveraddr));
+  socklen_t addrlen = sizeof(serveraddr);
+  getsockname(sockfd, (struct sockaddr *)&serveraddr, &addrlen);
+
+#if defined(POISON)
+  __msan_poison(buf + 7, 1);
+#endif
+
+#if defined(SENDMSG)
+  struct iovec iov[2] = {{buf, 5}, {buf + 5, 5}};
+  struct msghdr msg;
+  msg.msg_name = &serveraddr;
+  msg.msg_namelen = addrlen;
+  msg.msg_iov = iov;
+  msg.msg_iovlen = 2;
+  msg.msg_control = 0;
+  msg.msg_controllen = 0;
+  msg.msg_flags = 0;
+#endif
+
+#if defined(SEND)
+  ret = connect(sockfd, (struct sockaddr *)&serveraddr, addrlen);
+  assert(ret == 0);
+  ret = send(sockfd, buf, kBufSize, 0);
+  // SEND: Uninitialized bytes in __interceptor_send at offset 7 inside [{{.*}}, 10)
+  assert(ret > 0);
+#elif defined(SENDTO)
+  ret =
+      sendto(sockfd, buf, kBufSize, 0, (struct sockaddr *)&serveraddr, addrlen);
+  // SENDTO: Uninitialized bytes in __interceptor_sendto at offset 7 inside [{{.*}}, 10)
+  assert(ret > 0);
+#elif defined(SENDMSG)
+  ret = sendmsg(sockfd, &msg, 0);
+  // SENDMSG: Uninitialized bytes in {{.*}} at offset 2 inside [{{.*}}, 5)
+  assert(ret > 0);
+#endif
+  fprintf(stderr, "== done\n");
+  // NEGATIVE: == done
+  return 0;
+}
diff --git a/test/msan/Linux/sunrpc.cc b/test/msan/Linux/sunrpc.cc
index c92ad63..8acb155 100644
--- a/test/msan/Linux/sunrpc.cc
+++ b/test/msan/Linux/sunrpc.cc
@@ -11,6 +11,8 @@
 // RUN: %clangxx_msan -g -O0 -DTYPE=u_quad_t -DFN=xdr_u_longlong_t -DUNINIT=1 %s -o %t && \
 // RUN:     not %run %t 2>&1 | FileCheck %s
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <rpc/xdr.h>
 
diff --git a/test/msan/Linux/sunrpc_bytes.cc b/test/msan/Linux/sunrpc_bytes.cc
index 477637a..4d46d55 100644
--- a/test/msan/Linux/sunrpc_bytes.cc
+++ b/test/msan/Linux/sunrpc_bytes.cc
@@ -3,6 +3,8 @@
 // RUN: %clangxx_msan -g -O0 -DUNINIT=1 %s -o %t && \
 // RUN:     not %run %t 2>&1 | FileCheck %s
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <string.h>
 #include <rpc/xdr.h>
diff --git a/test/msan/Linux/sunrpc_string.cc b/test/msan/Linux/sunrpc_string.cc
index 350222f..53bea26 100644
--- a/test/msan/Linux/sunrpc_string.cc
+++ b/test/msan/Linux/sunrpc_string.cc
@@ -3,6 +3,8 @@
 // RUN: %clangxx_msan -g -O0 -DUNINIT=1 %s -o %t && \
 // RUN:     not %run %t 2>&1 | FileCheck %s
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <string.h>
 #include <rpc/xdr.h>
diff --git a/test/msan/Linux/syscalls.cc b/test/msan/Linux/syscalls.cc
index 78dba36..1287486 100644
--- a/test/msan/Linux/syscalls.cc
+++ b/test/msan/Linux/syscalls.cc
@@ -1,6 +1,8 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t 2>&1
 // RUN: %clangxx_msan -O3 %s -o %t && %run %t 2>&1
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <errno.h>
 #include <glob.h>
@@ -19,7 +21,7 @@
    sanity of their behaviour. */
 
 int main(int argc, char *argv[]) {
-  char buf[1000];
+  char buf[1000] __attribute__((aligned(8)));
   const int kTen = 10;
   const int kFortyTwo = 42;
   memset(buf, 0, sizeof(buf));
@@ -111,5 +113,17 @@
   assert(__msan_test_shadow(&p, sizeof(p)) == -1);
   assert(__msan_test_shadow(buf, sizeof(buf)) >= 32);
 
+  __msan_poison(buf, sizeof(buf));
+  __sanitizer_syscall_post_pipe(0, (int *)buf);
+  assert(__msan_test_shadow(buf, sizeof(buf)) == 2 * sizeof(int));
+
+  __msan_poison(buf, sizeof(buf));
+  __sanitizer_syscall_post_pipe2(0, (int *)buf, 0);
+  assert(__msan_test_shadow(buf, sizeof(buf)) == 2 * sizeof(int));
+
+  __msan_poison(buf, sizeof(buf));
+  __sanitizer_syscall_post_socketpair(0, 0, 0, 0, (int *)buf);
+  assert(__msan_test_shadow(buf, sizeof(buf)) == 2 * sizeof(int));
+
   return 0;
 }
diff --git a/test/msan/Linux/syscalls_sigaction.cc b/test/msan/Linux/syscalls_sigaction.cc
new file mode 100644
index 0000000..84e010e
--- /dev/null
+++ b/test/msan/Linux/syscalls_sigaction.cc
@@ -0,0 +1,42 @@
+// RUN: %clangxx_msan -DPRE1 -O0 %s -o %t && not %run %t 2>&1
+// RUN: %clangxx_msan -DPRE2 -O0 %s -o %t && not %run %t 2>&1
+// RUN: %clangxx_msan -DPRE3 -O0 %s -o %t && not %run %t 2>&1
+// RUN: %clangxx_msan -O0 %s -o %t && %run %t 2>&1
+
+// XFAIL: target-is-mips64el                                                      
+
+#include <assert.h>
+#include <signal.h>
+#include <string.h>
+
+#include <sanitizer/linux_syscall_hooks.h>
+#include <sanitizer/msan_interface.h>
+
+struct my_kernel_sigaction {
+  long handler, flags, restorer;
+  uint64_t mask[20]; // larger than any known platform
+};
+
+int main() {
+  my_kernel_sigaction act = {}, oldact = {};
+
+#if defined(PRE1)
+  __msan_poison(&act.handler, sizeof(act.handler));
+  __sanitizer_syscall_pre_rt_sigaction(SIGUSR1, &act, &oldact, 20 * 8);
+#elif defined(PRE2)
+  __msan_poison(&act.flags, sizeof(act.flags));
+  __sanitizer_syscall_pre_rt_sigaction(SIGUSR1, &act, &oldact, 20 * 8);
+#elif defined(PRE3)
+  __msan_poison(&act.mask, 1);
+  __sanitizer_syscall_pre_rt_sigaction(SIGUSR1, &act, &oldact, 20 * 8);
+#else
+  // Uninit past the end of the mask is ignored.
+  __msan_poison(((char *)&act.mask) + 5, 1);
+  __sanitizer_syscall_pre_rt_sigaction(SIGUSR1, &act, &oldact, 5);
+
+  memset(&act, 0, sizeof(act));
+  __msan_poison(&oldact, sizeof(oldact));
+  __sanitizer_syscall_post_rt_sigaction(0, SIGUSR1, &act, &oldact, 5);
+  assert(__msan_test_shadow(&oldact, sizeof(oldact)) == sizeof(long)*3 + 5);
+#endif
+}
diff --git a/test/msan/Linux/tcgetattr.cc b/test/msan/Linux/tcgetattr.cc
index 454b7fd..7b6adbc 100644
--- a/test/msan/Linux/tcgetattr.cc
+++ b/test/msan/Linux/tcgetattr.cc
@@ -1,5 +1,7 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t %p
 
+// XFAIL: target-is-mips64el                                                      
+
 #include <assert.h>
 #include <glob.h>
 #include <stdio.h>
diff --git a/test/msan/Linux/xattr.cc b/test/msan/Linux/xattr.cc
index 86cc2cd..bead651 100644
--- a/test/msan/Linux/xattr.cc
+++ b/test/msan/Linux/xattr.cc
@@ -2,6 +2,8 @@
 // RUN: %clangxx_msan -O0 -D_FILE_OFFSET_BITS=64 %s -o %t && %run %t %p 2>&1
 // RUN: %clangxx_msan -O3 %s -o %t && %run %t %p 2>&1
 
+// XFAIL: target-is-mips64el                                                      
+
 #include <argz.h>
 #include <assert.h>
 #include <sys/types.h>
diff --git a/test/msan/Unit/lit.site.cfg.in b/test/msan/Unit/lit.site.cfg.in
index dc0e961..083a25b 100644
--- a/test/msan/Unit/lit.site.cfg.in
+++ b/test/msan/Unit/lit.site.cfg.in
@@ -1,5 +1,4 @@
-## Autogenerated by LLVM/Clang configuration.
-# Do not edit!
+@LIT_SITE_CFG_IN_HEADER@
 
 # Load common config for all compiler-rt unit tests.
 lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/unittests/lit.common.unit.configured")
diff --git a/test/msan/allocator_mapping.cc b/test/msan/allocator_mapping.cc
index f47d9a6..533128f 100644
--- a/test/msan/allocator_mapping.cc
+++ b/test/msan/allocator_mapping.cc
@@ -8,7 +8,7 @@
 // This test only makes sense for the 64-bit allocator. The 32-bit allocator
 // does not have a fixed mapping. Exclude platforms that use the 32-bit
 // allocator.
-// UNSUPPORTED: mips64,aarch64
+// UNSUPPORTED: target-is-mips64,target-is-mips64el,aarch64
 
 #include <assert.h>
 #include <stdio.h>
diff --git a/test/msan/allocator_returns_null.cc b/test/msan/allocator_returns_null.cc
index f4ea51d..c47dc2e 100644
--- a/test/msan/allocator_returns_null.cc
+++ b/test/msan/allocator_returns_null.cc
@@ -15,6 +15,8 @@
 // RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t realloc-after-malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mrCRASH
 // RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t realloc-after-malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mrNULL
 
+// XFAIL: target-is-mips64el
+
 #include <limits.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/test/msan/backtrace.cc b/test/msan/backtrace.cc
index 9cb883c..a4dd69b 100644
--- a/test/msan/backtrace.cc
+++ b/test/msan/backtrace.cc
@@ -1,5 +1,7 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t
 
+// XFAIL: target-is-mips64el
+
 #include <assert.h>
 #include <execinfo.h>
 #include <stdio.h>
diff --git a/test/msan/c-strdup.c b/test/msan/c-strdup.c
index b1e02b9..4a121cb 100644
--- a/test/msan/c-strdup.c
+++ b/test/msan/c-strdup.c
@@ -3,6 +3,8 @@
 // RUN: %clang_msan -O2 %s -o %t && %run %t >%t.out 2>&1
 // RUN: %clang_msan -O3 %s -o %t && %run %t >%t.out 2>&1
 
+// XFAIL: target-is-mips64el
+
 // Test that strdup in C programs is intercepted.
 // GLibC headers translate strdup to __strdup at -O1 and higher.
 
diff --git a/test/msan/chained_origin.cc b/test/msan/chained_origin.cc
index ae72c10..9b30c74 100644
--- a/test/msan/chained_origin.cc
+++ b/test/msan/chained_origin.cc
@@ -15,6 +15,7 @@
 // RUN:     not %run %t >%t.out 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-HEAP < %t.out
 
+// XFAIL: target-is-mips64el
 
 #include <stdio.h>
 
@@ -48,19 +49,19 @@
 }
 
 // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
-// CHECK: {{#0 .* in main.*chained_origin.cc:47}}
+// CHECK: {{#0 .* in main.*chained_origin.cc:}}[[@LINE-4]]
 
 // CHECK: Uninitialized value was stored to memory at
-// CHECK: {{#0 .* in fn_h.*chained_origin.cc:35}}
-// CHECK: {{#1 .* in main.*chained_origin.cc:46}}
+// CHECK: {{#0 .* in fn_h.*chained_origin.cc:}}[[@LINE-19]]
+// CHECK: {{#1 .* in main.*chained_origin.cc:}}[[@LINE-9]]
 
 // CHECK: Uninitialized value was stored to memory at
-// CHECK: {{#0 .* in fn_g.*chained_origin.cc:25}}
-// CHECK: {{#1 .* in fn_f.*chained_origin.cc:30}}
-// CHECK: {{#2 .* in main.*chained_origin.cc:45}}
+// CHECK: {{#0 .* in fn_g.*chained_origin.cc:}}[[@LINE-33]]
+// CHECK: {{#1 .* in fn_f.*chained_origin.cc:}}[[@LINE-29]]
+// CHECK: {{#2 .* in main.*chained_origin.cc:}}[[@LINE-15]]
 
 // CHECK-STACK: Uninitialized value was created by an allocation of 'z' in the stack frame of function 'main'
-// CHECK-STACK: {{#0 .* in main.*chained_origin.cc:38}}
+// CHECK-STACK: {{#0 .* in main.*chained_origin.cc:}}[[@LINE-25]]
 
 // CHECK-HEAP: Uninitialized value was created by a heap allocation
-// CHECK-HEAP: {{#1 .* in main.*chained_origin.cc:40}}
+// CHECK-HEAP: {{#1 .* in main.*chained_origin.cc:}}[[@LINE-26]]
diff --git a/test/msan/chained_origin_empty_stack.cc b/test/msan/chained_origin_empty_stack.cc
index f1ed66b..0a5a9c3 100644
--- a/test/msan/chained_origin_empty_stack.cc
+++ b/test/msan/chained_origin_empty_stack.cc
@@ -1,6 +1,8 @@
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -O3 %s -o %t && \
 // RUN:     MSAN_OPTIONS=store_context_size=1 not %run %t 2>&1 | FileCheck %s
 
+// XFAIL: target-is-mips64el
+
 // Test that stack trace for the intermediate store is not empty.
 
 // CHECK: MemorySanitizer: use-of-uninitialized-value
diff --git a/test/msan/chained_origin_limits.cc b/test/msan/chained_origin_limits.cc
index 90fd09a..0f97c11 100644
--- a/test/msan/chained_origin_limits.cc
+++ b/test/msan/chained_origin_limits.cc
@@ -62,6 +62,8 @@
 // RUN: MSAN_OPTIONS=origin_history_size=7,origin_history_per_stack_limit=0 not %run %t >%t.out 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK7 < %t.out
 
+// XFAIL: target-is-mips64el
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/test/msan/chained_origin_memcpy.cc b/test/msan/chained_origin_memcpy.cc
index 3fe0b77..07474fb 100644
--- a/test/msan/chained_origin_memcpy.cc
+++ b/test/msan/chained_origin_memcpy.cc
@@ -15,6 +15,7 @@
 // RUN:     not %run %t >%t.out 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z2 < %t.out
 
+// XFAIL: target-is-mips64el
 
 #include <stdio.h>
 #include <string.h>
@@ -47,15 +48,15 @@
 }
 
 // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
-// CHECK: {{#0 .* in main .*chained_origin_memcpy.cc:46}}
+// CHECK: {{#0 .* in main .*chained_origin_memcpy.cc:}}[[@LINE-4]]
 
 // CHECK: Uninitialized value was stored to memory at
-// CHECK: {{#1 .* in fn_h.*chained_origin_memcpy.cc:38}}
+// CHECK: {{#1 .* in fn_h.*chained_origin_memcpy.cc:}}[[@LINE-15]]
 
 // CHECK: Uninitialized value was stored to memory at
-// CHECK: {{#0 .* in fn_g.*chained_origin_memcpy.cc:28}}
-// CHECK: {{#1 .* in fn_f.*chained_origin_memcpy.cc:33}}
+// CHECK: {{#0 .* in fn_g.*chained_origin_memcpy.cc:}}[[@LINE-28]]
+// CHECK: {{#1 .* in fn_f.*chained_origin_memcpy.cc:}}[[@LINE-24]]
 
 // CHECK-Z1: Uninitialized value was created by an allocation of 'z1' in the stack frame of function 'main'
 // CHECK-Z2: Uninitialized value was created by an allocation of 'z2' in the stack frame of function 'main'
-// CHECK: {{#0 .* in main.*chained_origin_memcpy.cc:41}}
+// CHECK: {{#0 .* in main.*chained_origin_memcpy.cc:}}[[@LINE-20]]
diff --git a/test/msan/chained_origin_with_signals.cc b/test/msan/chained_origin_with_signals.cc
index 43dbdcc..e371982 100644
--- a/test/msan/chained_origin_with_signals.cc
+++ b/test/msan/chained_origin_with_signals.cc
@@ -10,6 +10,8 @@
 // RUN:     not %run %t >%t.out 2>&1
 // RUN: FileCheck %s < %t.out
 
+// XFAIL: target-is-mips64el
+
 #include <signal.h>
 #include <stdio.h>
 #include <sys/types.h>
diff --git a/test/msan/check_mem_is_initialized.cc b/test/msan/check_mem_is_initialized.cc
index e1d3b11..461ce19 100644
--- a/test/msan/check_mem_is_initialized.cc
+++ b/test/msan/check_mem_is_initialized.cc
@@ -16,6 +16,8 @@
 // RUN: %clangxx_msan -fsanitize-memory-track-origins -O3 %s -o %t && not %run %t >%t.out 2>&1
 // RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-ORIGINS < %t.out
 
+// XFAIL: target-is-mips64el
+
 #include <sanitizer/msan_interface.h>
 #include <stdlib.h>
 
diff --git a/test/msan/coverage-levels.cc b/test/msan/coverage-levels.cc
index d71bfec..43b03e3 100644
--- a/test/msan/coverage-levels.cc
+++ b/test/msan/coverage-levels.cc
@@ -9,7 +9,9 @@
 // RUN: MSAN_OPTIONS=coverage=1:verbosity=1:coverage_dir=%T/coverage-levels not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK2 --check-prefix=CHECK_WARN
 // RUN: %clangxx_msan -O1 -fsanitize-coverage=edge  %s -o %t
 // RUN: MSAN_OPTIONS=coverage=1:verbosity=1:coverage_dir=%T/coverage-levels not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK3 --check-prefix=CHECK_WARN
-//
+
+// XFAIL: target-is-mips64el
+
 volatile int sink;
 int main(int argc, char **argv) {
   int var;
@@ -24,5 +26,5 @@
 // CHECK_WARN: WARNING: MemorySanitizer: use-of-uninitialized-value
 // CHECK_NOWARN-NOT: ERROR
 // CHECK1:  1 PCs written
-// CHECK2:  2 PCs written
-// CHECK3:  3 PCs written
+// CHECK2:  1 PCs written
+// CHECK3:  2 PCs written
diff --git a/test/msan/ctermid.cc b/test/msan/ctermid.cc
index a2818e6..e91ea71 100644
--- a/test/msan/ctermid.cc
+++ b/test/msan/ctermid.cc
@@ -1,5 +1,7 @@
 // RUN: %clangxx_msan -std=c++11 -O0 %s -o %t && %run %t
 
+// XFAIL: target-is-mips64el
+
 #include <sanitizer/msan_interface.h>
 #include <stdio.h>
 #include <string.h>
diff --git a/test/msan/cxa_atexit.cc b/test/msan/cxa_atexit.cc
index 70384b9..8210436 100644
--- a/test/msan/cxa_atexit.cc
+++ b/test/msan/cxa_atexit.cc
@@ -1,5 +1,7 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t %p
 
+// XFAIL: target-is-mips64el
+
 // PR17377: C++ module destructors get stale argument shadow.
 
 #include <stdio.h>
diff --git a/test/msan/death-callback.cc b/test/msan/death-callback.cc
index 08cf291..9aeac5b 100644
--- a/test/msan/death-callback.cc
+++ b/test/msan/death-callback.cc
@@ -7,6 +7,8 @@
 // RUN: %clangxx_msan -DMSANCB_SET %s -o %t && %run %t 2>&1 | \
 // RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOCB
 
+// XFAIL: target-is-mips64el
+
 #include <sanitizer/msan_interface.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/msan/dlerror.cc b/test/msan/dlerror.cc
index 0ad5b35..d0335d9 100644
--- a/test/msan/dlerror.cc
+++ b/test/msan/dlerror.cc
@@ -1,8 +1,6 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t
-//
-// AArch64 shows fails with uninitialized bytes in __interceptor_strcmp from
-// dlfcn/dlerror.c:107 (glibc).
-// XFAIL: aarch64
+
+// XFAIL: target-is-mips64el
 
 #include <assert.h>
 #include <dlfcn.h>
diff --git a/test/msan/dtls_test.c b/test/msan/dtls_test.c
index 4036f71..49d95c4 100644
--- a/test/msan/dtls_test.c
+++ b/test/msan/dtls_test.c
@@ -4,7 +4,7 @@
 
    Regression test for a bug in msan/glibc integration,
    see https://sourceware.org/bugzilla/show_bug.cgi?id=16291
-   and https://code.google.com/p/memory-sanitizer/issues/detail?id=44
+   and https://github.com/google/sanitizers/issues/547
 */
 
 #ifndef BUILD_SO
diff --git a/test/msan/lit.cfg b/test/msan/lit.cfg
index 011ccd2..d23ff31 100644
--- a/test/msan/lit.cfg
+++ b/test/msan/lit.cfg
@@ -3,17 +3,18 @@
 import os
 
 # Setup config name.
-config.name = 'MemorySanitizer'
+config.name = 'MemorySanitizer' + getattr(config, 'name_suffix', 'default')
 
 # Setup source root.
 config.test_source_root = os.path.dirname(__file__)
 
 # Setup default compiler flags used with -fsanitize=memory option.
-clang_msan_cflags = ["-fsanitize=memory",
-                     "-mno-omit-leaf-frame-pointer",
-                     "-fno-omit-frame-pointer",
-                     "-fno-optimize-sibling-calls",
-                     "-m64"] + config.debug_info_flags
+clang_msan_cflags = (["-fsanitize=memory",
+                      "-mno-omit-leaf-frame-pointer",
+                      "-fno-omit-frame-pointer",
+                      "-fno-optimize-sibling-calls"] +
+                      [config.target_cflags] +
+                      config.debug_info_flags)
 # Some Msan tests leverage backtrace() which requires libexecinfo on FreeBSD.
 if config.host_os == 'FreeBSD':
   clang_msan_cflags += ["-lexecinfo"]
@@ -31,3 +32,6 @@
 # MemorySanitizer tests are currently supported on Linux only.
 if config.host_os not in ['Linux']:
   config.unsupported = True
+
+if config.target_arch != 'aarch64':
+  config.available_features.add('stable-runtime')
diff --git a/test/msan/lit.site.cfg.in b/test/msan/lit.site.cfg.in
index fb22a57..a9656f2 100644
--- a/test/msan/lit.site.cfg.in
+++ b/test/msan/lit.site.cfg.in
@@ -1,3 +1,10 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+# Tool-specific config options.
+config.name_suffix = "@MSAN_TEST_CONFIG_SUFFIX@"
+config.target_cflags = "@MSAN_TEST_TARGET_CFLAGS@"
+config.target_arch = "@MSAN_TEST_TARGET_ARCH@"
+
 # Load common config for all compiler-rt lit tests.
 lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
 
diff --git a/test/msan/memcmp_test.cc b/test/msan/memcmp_test.cc
index 95228eb..5ade58a 100644
--- a/test/msan/memcmp_test.cc
+++ b/test/msan/memcmp_test.cc
@@ -3,13 +3,16 @@
 // RUN: MSAN_OPTIONS=intercept_memcmp=0 %run %t
 
 #include <string.h>
+#include <stdio.h>
 int main(int argc, char **argv) {
   char a1[4];
   char a2[4];
   for (int i = 0; i < argc * 3; i++)
     a2[i] = a1[i] = i;
   int res = memcmp(a1, a2, 4);
-  return res;
+  if (!res)
+    printf("equals");
+  return 0;
   // CHECK: Uninitialized bytes in __interceptor_memcmp at offset 3
   // CHECK: MemorySanitizer: use-of-uninitialized-value
 }
diff --git a/test/msan/mmap.cc b/test/msan/mmap.cc
index 27a8bb2..01c1772 100644
--- a/test/msan/mmap.cc
+++ b/test/msan/mmap.cc
@@ -19,7 +19,9 @@
          (addr >= 0x510000000000ULL && addr < 0x600000000000ULL) ||
          (addr >= 0x700000000000ULL && addr < 0x800000000000ULL);
 #elif defined(__mips64)
-  return addr >= 0x00e000000000ULL;
+  return (addr >= 0x0000000000ULL && addr <= 0x0200000000ULL) ||
+         (addr >= 0xa200000000ULL && addr <= 0xc000000000ULL) ||
+         addr >= 0xe200000000ULL;
 #elif defined(__powerpc64__)
   return addr < 0x000100000000ULL || addr >= 0x300000000000ULL;
 #elif defined(__aarch64__)
diff --git a/test/msan/msan_print_shadow3.cc b/test/msan/msan_print_shadow3.cc
index b29f322..4783152 100644
--- a/test/msan/msan_print_shadow3.cc
+++ b/test/msan/msan_print_shadow3.cc
@@ -6,7 +6,7 @@
 
 int main(void) {
   unsigned long long x = 0; // For 8-byte alignment.
-  uint32_t x_s = 0x12345678U;
+  char x_s[4] = {0x87, 0x65, 0x43, 0x21};
   __msan_partial_poison(&x, &x_s, sizeof(x_s));
   __msan_print_shadow(&x, sizeof(x_s));
   return 0;
diff --git a/test/msan/param_tls_limit.cc b/test/msan/param_tls_limit.cc
index 1c504da..d34376a 100644
--- a/test/msan/param_tls_limit.cc
+++ b/test/msan/param_tls_limit.cc
@@ -20,6 +20,17 @@
 // In case of no overflow, it is still poisoned.
 #define NO_OVERFLOW(x) assert(__msan_test_shadow(&x, sizeof(x)) == 0)
 
+#if defined(__x86_64__)
+// In x86_64, if argument is partially outside tls, it is considered completly
+// unpoisoned
+#define PARTIAL_OVERFLOW(x) OVERFLOW(x)
+#else
+// In other archs, bigger arguments are splitted in multiple IR arguments, so
+// they are considered poisoned till tls limit. Checking last byte of such arg:
+#define PARTIAL_OVERFLOW(x) assert(__msan_test_shadow((char *)(&(x) + 1) - 1, 1) == -1)
+#endif
+
+
 template<int N>
 struct S {
   char x[N];
@@ -34,17 +45,17 @@
 }
 
 void f801(S<801> s) {
-  OVERFLOW(s);
+  PARTIAL_OVERFLOW(s);
 }
 
 void f1000(S<1000> s) {
-  OVERFLOW(s);
+  PARTIAL_OVERFLOW(s);
 }
 
 void f_many(int a, double b, S<800> s, int c, double d) {
   NO_OVERFLOW(a);
   NO_OVERFLOW(b);
-  OVERFLOW(s);
+  PARTIAL_OVERFLOW(s);
   OVERFLOW(c);
   OVERFLOW(d);
 }
@@ -54,7 +65,7 @@
 void f_many2(int a, S<800 - 8 - 2> s, int c, double d) {
   NO_OVERFLOW(a);
   NO_OVERFLOW(s);
-  OVERFLOW(c);
+  PARTIAL_OVERFLOW(c);
   OVERFLOW(d);
 }
 
diff --git a/test/msan/strlen_of_shadow.cc b/test/msan/strlen_of_shadow.cc
index 3066dd5..b9cf5f0 100644
--- a/test/msan/strlen_of_shadow.cc
+++ b/test/msan/strlen_of_shadow.cc
@@ -14,7 +14,7 @@
 #if defined(__x86_64__)
   return (char *)((uintptr_t)p ^ 0x500000000000ULL);
 #elif defined (__mips64)
-  return (char *)((uintptr_t)p & ~0x4000000000ULL);
+  return (char *)((uintptr_t)p ^ 0x8000000000ULL);
 #elif defined(__powerpc64__)
 #define LINEARIZE_MEM(mem) \
   (((uintptr_t)(mem) & ~0x200000000000ULL) ^ 0x100000000000ULL)
diff --git a/test/msan/vector_cvt.cc b/test/msan/vector_cvt.cc
index 633a8b1..5541436 100644
--- a/test/msan/vector_cvt.cc
+++ b/test/msan/vector_cvt.cc
@@ -1,6 +1,6 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t
 // RUN: %clangxx_msan -DPOSITIVE -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
-// REQUIRES: x86_64-supported-target
+// REQUIRES: x86_64-target-arch
 
 #include <emmintrin.h>
 
diff --git a/test/profile/CMakeLists.txt b/test/profile/CMakeLists.txt
index 0cf9048..0eb2b89 100644
--- a/test/profile/CMakeLists.txt
+++ b/test/profile/CMakeLists.txt
@@ -32,4 +32,4 @@
 add_lit_testsuite(check-profile "Running the profile tests"
   ${PROFILE_TESTSUITES}
   DEPENDS ${PROFILE_TEST_DEPS})
-set_target_properties(check-profile PROPERTIES FOLDER "Profile tests")
+set_target_properties(check-profile PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/profile/Inputs/comdat_rename.h b/test/profile/Inputs/comdat_rename.h
new file mode 100644
index 0000000..53e1007
--- /dev/null
+++ b/test/profile/Inputs/comdat_rename.h
@@ -0,0 +1,13 @@
+struct FOO {
+  FOO() : a(0), b(0) {}
+  int callee();
+  __attribute__((noinline)) void caller(int n) {
+      int r = callee();
+      if (r == 0) {
+        a += n;
+        b += 1;
+      }
+  }
+  int a;
+  int b;
+};
diff --git a/test/profile/Inputs/comdat_rename_1.cc b/test/profile/Inputs/comdat_rename_1.cc
new file mode 100644
index 0000000..688e305
--- /dev/null
+++ b/test/profile/Inputs/comdat_rename_1.cc
@@ -0,0 +1,33 @@
+#include "comdat_rename.h"
+// callee's out-of-line instance profile data -- it comes
+// from external calls to it from comdat_rename_2.cc.
+// Its inline instance copy's profile data is different and
+// is collected in 'caller''s context. 
+int FOO::callee() {
+  // CHECK-LABEL: define {{.*}}callee{{.*}}
+  // CHECK-NOT: br i1 {{.*}}
+  // CHECK: br {{.*}}label{{.*}}, label %[[BB1:.*]], !prof ![[PD1:[0-9]+]]
+  // CHECK: {{.*}}[[BB1]]: 
+  if (b != 0)
+    return a / b;
+  if (a != 0)
+    return 10 / a;
+  return 0;
+}
+
+// This is the 'caller''s comdat copy (after renaming) in this module.
+// The profile counters include a copy of counters from 'callee':
+//
+// CHECK-LABEL: define {{.*}}caller{{.*}}
+// CHECK-NOT: br i1 {{.*}}
+// CHECK: br {{.*}}label{{.*}}, label %[[BB2:.*]], !prof ![[PD2:[0-9]+]]
+// CHECK: {{.*}}[[BB2]]: 
+// CHECK: br {{.*}}label{{.*}}, label %{{.*}}, !prof !{{.*}}
+// CHECK: br {{.*}}label %[[BB3:.*]], label %{{.*}} !prof ![[PD3:[0-9]+]]
+// CHECK: {{.*}}[[BB3]]: 
+//
+// CHECK:![[PD1]] = !{!"branch_weights", i32 0, i32 1}
+// CHECK:![[PD2]] = !{!"branch_weights", i32 1, i32 0}
+// CHECK:![[PD3]] = !{!"branch_weights", i32 {{.*}}, i32 0}
+
+void test(FOO *foo) { foo->caller(10); }
diff --git a/test/profile/Inputs/comdat_rename_2.cc b/test/profile/Inputs/comdat_rename_2.cc
new file mode 100644
index 0000000..5cad79c
--- /dev/null
+++ b/test/profile/Inputs/comdat_rename_2.cc
@@ -0,0 +1,18 @@
+#include "comdat_rename.h"
+extern void test(FOO *);
+FOO foo;
+int main() {
+  test(&foo);
+  foo.caller(20);
+  return 0;
+}
+
+// The copy of 'caller' defined in this module -- it has
+// 'callee' call remaining.
+//
+// CHECK-LABEL: define {{.*}}caller{{.*}}
+// CHECK: {{.*}} call {{.*}}
+// CHECK-NOT: br i1 {{.*}}
+// CHECK: br {{.*}}label %[[BB1:.*]], label{{.*}}!prof ![[PD1:[0-9]+]]
+// CHECK: {{.*}}[[BB1]]: 
+// CHECK:![[PD1]] = !{!"branch_weights", i32 0, i32 1}
diff --git a/test/profile/Inputs/extern_template.cpp b/test/profile/Inputs/extern_template.cpp
new file mode 100644
index 0000000..98c6c16
--- /dev/null
+++ b/test/profile/Inputs/extern_template.cpp
@@ -0,0 +1,14 @@
+#define DEF
+#include "extern_template.h"
+#undef DEF
+extern int bar();
+extern int foo();
+extern Test<int> TO;
+int main() {
+  foo();
+  int R = bar();
+
+  if (R != 10)
+    return 1;
+  return 0;
+}
diff --git a/test/profile/Inputs/extern_template.h b/test/profile/Inputs/extern_template.h
new file mode 100644
index 0000000..aa59f6c
--- /dev/null
+++ b/test/profile/Inputs/extern_template.h
@@ -0,0 +1,17 @@
+template <typename T> struct Test {
+  Test() : M(10) {}
+  void doIt(int N) { // CHECK: [[@LINE]]| 2|  void doIt
+    if (N > 10) {    // CHECK: [[@LINE]]| 2|    if (N > 10) {
+      M += 2;        // CHECK: [[@LINE]]| 1|      M += 2;
+    } else           // CHECK: [[@LINE]]| 1|    } else
+      M -= 2;        // CHECK: [[@LINE]]| 1|      M -= 2;
+  }
+  T M;
+};
+
+#ifdef USE
+extern template struct Test<int>;
+#endif
+#ifdef DEF
+template struct Test<int>;
+#endif
diff --git a/test/profile/Inputs/extern_template1.cpp b/test/profile/Inputs/extern_template1.cpp
new file mode 100644
index 0000000..372ffd2
--- /dev/null
+++ b/test/profile/Inputs/extern_template1.cpp
@@ -0,0 +1,9 @@
+#define USE
+#include "extern_template.h"
+#undef USE
+
+Test<int> TO;
+int foo() {
+  TO.doIt(20);
+  return TO.M;
+}
diff --git a/test/profile/Inputs/extern_template2.cpp b/test/profile/Inputs/extern_template2.cpp
new file mode 100644
index 0000000..ac2f858
--- /dev/null
+++ b/test/profile/Inputs/extern_template2.cpp
@@ -0,0 +1,9 @@
+#define USE
+#include "extern_template.h"
+#undef USE
+
+extern Test<int> TO;
+int bar() {
+  TO.doIt(5);
+  return TO.M;
+}
diff --git a/test/profile/Inputs/instrprof-alloc.c b/test/profile/Inputs/instrprof-alloc.c
new file mode 100644
index 0000000..0894237
--- /dev/null
+++ b/test/profile/Inputs/instrprof-alloc.c
@@ -0,0 +1,41 @@
+/* This test case tests that when static allocation for value
+ * profiler is on, no malloc/calloc calls will be invoked by
+ * profile runtime library. */
+#include <stdlib.h>
+__attribute__((noinline)) void foo() {}
+__attribute__((noinline)) void foo2() {}
+void (*FP)();
+int MainEntered = 0;
+int CallocCalled = 0;
+int MallocCalled = 0;
+
+extern void *__real_calloc(size_t s, size_t n);
+extern void *__real_malloc(size_t s);
+
+void *__wrap_calloc(size_t s, size_t n) {
+  if (MainEntered)
+    CallocCalled = 1;
+  return __real_calloc(s, n);
+}
+void *__wrap_malloc(size_t s) {
+  if (MainEntered)
+    MallocCalled = 1;
+  return __real_malloc(s);
+}
+
+void getFP(int i) {
+  if (i % 2)
+    FP = foo;
+  else
+    FP = foo2;
+}
+
+int main() {
+  int i;
+  MainEntered = 1;
+  for (i = 0; i < 100; i++) {
+    getFP(i);
+    FP();
+  }
+  return CallocCalled + MallocCalled;
+}
diff --git a/test/profile/Inputs/instrprof-comdat.h b/test/profile/Inputs/instrprof-comdat.h
index db1a5ba..61e283c 100644
--- a/test/profile/Inputs/instrprof-comdat.h
+++ b/test/profile/Inputs/instrprof-comdat.h
@@ -12,12 +12,12 @@
   T t;
 };
 
-template <class T> T FOO<T>::DoIt(T ti) { // HEADER:  2| [[@LINE]]|template
-  for (T I = 0; I < ti; I++) {            // HEADER: 22| [[@LINE]]|  for (T
-    t += I;                               // HEADER: 20| [[@LINE]]|    t += I;
-    if (I > ti / 2)                       // HEADER: 20| [[@LINE]]|    if (I > ti 
-      t -= 1;                             // HEADER:  8| [[@LINE]]|      t -= 1;
-  }                                       // HEADER: 10| [[@LINE]]|  }
-                                          // HEADER:  1| [[@LINE]]|
-  return t;                               // HEADER:  1| [[@LINE]]|  return t;
+template <class T> T FOO<T>::DoIt(T ti) { // HEADER: [[@LINE]]|  2|template
+  for (T I = 0; I < ti; I++) {            // HEADER: [[@LINE]]| 22|  for (T
+    t += I;                               // HEADER: [[@LINE]]| 20|    t += I;
+    if (I > ti / 2)                       // HEADER: [[@LINE]]| 20|    if (I > ti
+      t -= 1;                             // HEADER: [[@LINE]]|  8|      t -= 1;
+  }                                       // HEADER: [[@LINE]]| 10|  }
+                                          // HEADER: [[@LINE]]|  1|
+  return t;                               // HEADER: [[@LINE]]|  1|  return t;
 }
diff --git a/test/profile/Inputs/instrprof-dynamic-a.cpp b/test/profile/Inputs/instrprof-dynamic-a.cpp
index 5faa9c2..7468cd4 100644
--- a/test/profile/Inputs/instrprof-dynamic-a.cpp
+++ b/test/profile/Inputs/instrprof-dynamic-a.cpp
@@ -1,7 +1,7 @@
 #include "instrprof-dynamic-header.h"
-void a() {                             // COV: 1| [[@LINE]]|void a
-  if (true) {                          // COV: 1| [[@LINE]]|  if
-    bar<void>(1);                      // COV: 1| [[@LINE]]|    bar
-    bar<char>(1);                      // COV: 1| [[@LINE]]|    bar
-  }                                    // COV: 1| [[@LINE]]|  }
+void a() {                             // COV: [[@LINE]]| 1|void a
+  if (true) {                          // COV: [[@LINE]]| 1|  if
+    bar<void>(1);                      // COV: [[@LINE]]| 1|    bar
+    bar<char>(1);                      // COV: [[@LINE]]| 1|   bar
+  }                                    // COV: [[@LINE]]| 1|  }
 }
diff --git a/test/profile/Inputs/instrprof-file_ex.c b/test/profile/Inputs/instrprof-file_ex.c
new file mode 100644
index 0000000..106e589
--- /dev/null
+++ b/test/profile/Inputs/instrprof-file_ex.c
@@ -0,0 +1,59 @@
+/* This is a test case where the parent process forks 10
+ * children which contend to write to the same file. With
+ * file locking support, the data from each child should not
+ * be lost.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+extern FILE *lprofOpenFileEx(const char *);
+int main(int argc, char *argv[]) {
+  pid_t tid;
+  FILE *F;
+  const char *FN;
+  int child[10];
+  int c;
+  int i;
+
+  if (argc < 2) {
+    fprintf(stderr, "Requires one argument \n");
+    exit(1);
+  }
+  FN = argv[1];
+  truncate(FN, 0);
+
+  for (i = 0; i < 10; i++) {
+    c = fork();
+    // in child: 
+    if (c == 0) {
+      FILE *F = lprofOpenFileEx(FN);
+      if (!F) {
+        fprintf(stderr, "Can not open file %s from child\n", FN);
+        exit(1);
+      }
+      fseek(F, 0, SEEK_END);
+      fprintf(F, "Dump from Child %d\n", i);
+      fclose(F);
+      exit(0);
+    } else {
+      child[i] = c;
+    }
+  }
+
+  // In parent
+  for (i = 0; i < 10; i++) {
+    int child_status;
+    if ((tid = waitpid(child[i], &child_status, 0)) == -1)
+      break;
+  }
+  F = lprofOpenFileEx(FN);
+  if (!F) {
+    fprintf(stderr, "Can not open file %s from parent\n", FN);
+    exit(1);
+  }
+  fseek(F, 0, SEEK_END);
+  fprintf(F, "Dump from parent %d\n", i);
+  return 0;
+}
diff --git a/test/profile/Inputs/instrprof-icall-promo.h b/test/profile/Inputs/instrprof-icall-promo.h
new file mode 100644
index 0000000..531e8ac
--- /dev/null
+++ b/test/profile/Inputs/instrprof-icall-promo.h
@@ -0,0 +1,4 @@
+struct A {
+  virtual int foo() { return 1; };
+  virtual int bar();
+};
diff --git a/test/profile/Inputs/instrprof-icall-promo_1.cc b/test/profile/Inputs/instrprof-icall-promo_1.cc
new file mode 100644
index 0000000..e0a5e06
--- /dev/null
+++ b/test/profile/Inputs/instrprof-icall-promo_1.cc
@@ -0,0 +1,7 @@
+#include "instrprof-icall-promo.h"
+
+A a;
+
+A* ap = &a;
+
+int ref(A* ap) { return ap->A::foo(); }
diff --git a/test/profile/Inputs/instrprof-icall-promo_2.cc b/test/profile/Inputs/instrprof-icall-promo_2.cc
new file mode 100644
index 0000000..658ab0b
--- /dev/null
+++ b/test/profile/Inputs/instrprof-icall-promo_2.cc
@@ -0,0 +1,15 @@
+#include "instrprof-icall-promo.h"
+extern int ref(A *);
+
+int A::bar() { return 2; }
+
+extern A *ap;
+int test() {
+  for (int i = 0; i < 10000; i++) ap->foo();
+  return ref(ap);
+}
+
+int main() {
+  test();
+  return 0;
+}
diff --git a/test/profile/Inputs/instrprof-merge-match-lib.c b/test/profile/Inputs/instrprof-merge-match-lib.c
new file mode 100644
index 0000000..afe559e
--- /dev/null
+++ b/test/profile/Inputs/instrprof-merge-match-lib.c
@@ -0,0 +1,39 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int __llvm_profile_runtime = 0;
+uint64_t __llvm_profile_get_size_for_buffer(void);
+int __llvm_profile_write_buffer(char *);
+void __llvm_profile_reset_counters(void);
+int __llvm_profile_check_compatibility(const char *, uint64_t);
+
+int gg = 0;
+void bar(char c) {
+  if (c == '1')
+    gg++;
+  else
+    gg--;
+}
+
+/* Returns 0 (size) when an error occurs. */
+uint64_t libEntry(char *Buffer, uint64_t MaxSize) {
+
+  uint64_t Size = __llvm_profile_get_size_for_buffer();
+  if (Size > MaxSize)
+    return 0;
+
+  __llvm_profile_reset_counters();
+
+  bar('1');
+
+  if (__llvm_profile_write_buffer(Buffer))
+    return 0;
+
+  /* Now check compatibility. Should return 0.  */
+  if (__llvm_profile_check_compatibility(Buffer, Size))
+    return 0;
+
+  return Size;
+}
+
diff --git a/test/profile/Inputs/instrprof-merge-match.c b/test/profile/Inputs/instrprof-merge-match.c
new file mode 100644
index 0000000..6e29e4a
--- /dev/null
+++ b/test/profile/Inputs/instrprof-merge-match.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+int __llvm_profile_runtime = 0;
+uint64_t __llvm_profile_get_size_for_buffer(void);
+int __llvm_profile_write_buffer(char *);
+void __llvm_profile_reset_counters(void);
+int  __llvm_profile_check_compatibility(const char *, uint64_t);
+
+int g = 0;
+void foo(char c) {
+  if (c == '1')
+    g++;
+  else
+    g--;
+}
+
+extern uint64_t libEntry(char *Buffer, uint64_t MaxSize);
+
+int main(int argc, const char *argv[]) {
+  const uint64_t MaxSize = 10000;
+  static char Buffer[MaxSize];
+
+  uint64_t Size = __llvm_profile_get_size_for_buffer();
+  if (Size > MaxSize)
+    return 1;
+
+  __llvm_profile_reset_counters();
+  foo('0');
+
+  if (__llvm_profile_write_buffer(Buffer))
+    return 1;
+
+  /* Now check compatibility. Should return 0.  */
+  if (__llvm_profile_check_compatibility(Buffer, Size))
+    return 1;
+
+  /* Clear the buffer. */
+  memset(Buffer, 0, MaxSize);
+
+  /* Collect profile from shared library.  */
+  Size = libEntry(Buffer, MaxSize);
+
+  if (!Size)
+    return 1;
+
+  /* Shared library's profile should not match main executable's. */
+  if (!__llvm_profile_check_compatibility(Buffer, Size))
+    return 1;
+
+  return 0;
+}
+
diff --git a/test/profile/Inputs/instrprof-value-prof-evict.c b/test/profile/Inputs/instrprof-value-prof-evict.c
new file mode 100644
index 0000000..3b72e6e
--- /dev/null
+++ b/test/profile/Inputs/instrprof-value-prof-evict.c
@@ -0,0 +1,141 @@
+void callee_0() {}
+void callee_1() {}
+void callee_2() {}
+void callee_3() {}
+
+void *CalleeAddrs[] = {callee_0, callee_1, callee_2, callee_3};
+extern void lprofSetMaxValsPerSite(unsigned);
+
+// sequences of callee ids
+
+// In the following sequences,
+// there are two targets, the dominating target is
+// target 0.
+int CallSeqTwoTarget_1[] = {0, 0, 0, 0, 0, 1, 1};
+int CallSeqTwoTarget_2[] = {1, 1, 0, 0, 0, 0, 0};
+int CallSeqTwoTarget_3[] = {1, 0, 0, 1, 0, 0, 0};
+int CallSeqTwoTarget_4[] = {0, 0, 0, 1, 0, 1, 0};
+
+// In the following sequences, there are three targets
+// The dominating target is 0 and has > 50% of total
+// counts.
+int CallSeqThreeTarget_1[] = {0, 0, 0, 0, 0, 0, 1, 2, 1};
+int CallSeqThreeTarget_2[] = {1, 2, 1, 0, 0, 0, 0, 0, 0};
+int CallSeqThreeTarget_3[] = {1, 0, 0, 2, 0, 0, 0, 1, 0};
+int CallSeqThreeTarget_4[] = {0, 0, 0, 1, 0, 1, 0, 0, 2};
+
+// Four target sequence --
+// There are two cold targets which occupies the value counters
+// early. There is also a very hot target and a medium hot target
+// which are invoked in an interleaved fashion -- the length of each
+// hot period in the sequence is shorter than the cold targets' count.
+//  1. If only two values are tracked, the Hot and Medium hot targets
+//     should surive in the end
+//  2. If only three values are tracked, the top three targets should
+//     surive in the end.
+int CallSeqFourTarget_1[] = {1, 1, 1, 2, 2, 2, 2, 0, 0, 3, 0, 0, 3, 0, 0, 3,
+                             0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3};
+
+// Same as above, but the cold entries are invoked later.
+int CallSeqFourTarget_2[] = {0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0,
+                             0, 3, 0, 0, 3, 0, 0, 3, 1, 1, 1, 2, 2, 2, 2};
+
+// Same as above, but all the targets are interleaved.
+int CallSeqFourTarget_3[] = {0, 3, 0, 0, 1, 3, 0, 0, 0, 2, 0, 0, 3, 3, 0, 3,
+                             2, 2, 0, 3, 3, 1, 0, 0, 1, 0, 0, 3, 0, 2, 0};
+
+typedef void (*FPT)(void);
+
+
+// Testing value profiling eviction algorithm.
+FPT getCalleeFunc(int I) { return CalleeAddrs[I]; }
+
+int main() {
+  int I;
+
+#define INDIRECT_CALLSITE(Sequence, NumValsTracked)                            \
+  lprofSetMaxValsPerSite(NumValsTracked);                                      \
+  for (I = 0; I < sizeof(Sequence) / sizeof(*Sequence); I++) {                 \
+    FPT FP = getCalleeFunc(Sequence[I]);                                       \
+    FP();                                                                      \
+  }
+
+  // check site, target patterns
+  // CHECK: 0, callee_0
+  INDIRECT_CALLSITE(CallSeqTwoTarget_1, 1);
+
+  // CHECK-NEXT: 1, callee_0
+  INDIRECT_CALLSITE(CallSeqTwoTarget_2, 1);
+
+  // CHECK-NEXT: 2, callee_0
+  INDIRECT_CALLSITE(CallSeqTwoTarget_3, 1);
+
+  // CHECK-NEXT: 3, callee_0
+  INDIRECT_CALLSITE(CallSeqTwoTarget_4, 1);
+
+  // CHECK-NEXT: 4, callee_0
+  INDIRECT_CALLSITE(CallSeqThreeTarget_1, 1);
+
+  // CHECK-NEXT: 5, callee_0
+  INDIRECT_CALLSITE(CallSeqThreeTarget_2, 1);
+
+  // CHECK-NEXT: 6, callee_0
+  INDIRECT_CALLSITE(CallSeqThreeTarget_3, 1);
+
+  // CHECK-NEXT: 7, callee_0
+  INDIRECT_CALLSITE(CallSeqThreeTarget_4, 1);
+
+  // CHECK-NEXT: 8, callee_0
+  // CHECK-NEXT: 8, callee_1
+  INDIRECT_CALLSITE(CallSeqThreeTarget_1, 2);
+
+  // CHECK-NEXT: 9, callee_0
+  // CHECK-NEXT: 9, callee_1
+  INDIRECT_CALLSITE(CallSeqThreeTarget_2, 2);
+
+  // CHECK-NEXT: 10, callee_0
+  // CHECK-NEXT: 10, callee_1
+  INDIRECT_CALLSITE(CallSeqThreeTarget_3, 2);
+
+  // CHECK-NEXT: 11, callee_0
+  // CHECK-NEXT: 11, callee_1
+  INDIRECT_CALLSITE(CallSeqThreeTarget_4, 2);
+
+  // CHECK-NEXT: 12, callee_0
+  INDIRECT_CALLSITE(CallSeqFourTarget_1, 1);
+
+  // CHECK-NEXT: 13, callee_0
+  INDIRECT_CALLSITE(CallSeqFourTarget_2, 1);
+
+  // CHECK-NEXT: 14, callee_0
+  INDIRECT_CALLSITE(CallSeqFourTarget_3, 1);
+
+  // CHECK-NEXT: 15, callee_0
+  // CHECK-NEXT: 15, callee_3
+  INDIRECT_CALLSITE(CallSeqFourTarget_1, 2);
+
+  // CHECK-NEXT: 16, callee_0
+  // CHECK-NEXT: 16, callee_3
+  INDIRECT_CALLSITE(CallSeqFourTarget_2, 2);
+
+  // CHECK-NEXT: 17, callee_0
+  // CHECK-NEXT: 17, callee_3
+  INDIRECT_CALLSITE(CallSeqFourTarget_3, 2);
+
+  // CHECK-NEXT: 18, callee_0
+  // CHECK-NEXT: 18, callee_3
+  // CHECK-NEXT: 18, callee_2
+  INDIRECT_CALLSITE(CallSeqFourTarget_1, 3);
+
+  // CHECK-NEXT: 19, callee_0
+  // CHECK-NEXT: 19, callee_3
+  // CHECK-NEXT: 19, callee_2
+  INDIRECT_CALLSITE(CallSeqFourTarget_2, 3);
+
+  // CHECK-NEXT: 20, callee_0
+  // CHECK-NEXT: 20, callee_3
+  // CHECK-NEXT: 20, callee_2
+  INDIRECT_CALLSITE(CallSeqFourTarget_3, 3);
+
+  return 0;
+}
diff --git a/test/profile/Inputs/instrprof-value-prof-real.c b/test/profile/Inputs/instrprof-value-prof-real.c
new file mode 100644
index 0000000..65e5799
--- /dev/null
+++ b/test/profile/Inputs/instrprof-value-prof-real.c
@@ -0,0 +1,1096 @@
+#define DEF_FUNC(x)                                                            \
+  void x() {}
+#define DEF_2_FUNCS(x) DEF_FUNC(x##_1) DEF_FUNC(x##_2)
+#define DEF_4_FUNCS(x) DEF_2_FUNCS(x##_1) DEF_2_FUNCS(x##_2)
+#define DEF_8_FUNCS(x) DEF_4_FUNCS(x##_1) DEF_4_FUNCS(x##_2)
+#define DEF_16_FUNCS(x) DEF_8_FUNCS(x##_1) DEF_8_FUNCS(x##_2)
+#define DEF_32_FUNCS(x) DEF_16_FUNCS(x##_1) DEF_16_FUNCS(x##_2)
+#define DEF_64_FUNCS(x) DEF_32_FUNCS(x##_1) DEF_32_FUNCS(x##_2)
+#define DEF_128_FUNCS(x) DEF_64_FUNCS(x##_1) DEF_64_FUNCS(x##_2)
+#define DEF_256_FUNCS(x) DEF_128_FUNCS(x##_1) DEF_128_FUNCS(x##_2)
+#define DEF_512_FUNCS(x) DEF_256_FUNCS(x##_1) DEF_256_FUNCS(x##_2)
+
+#define FUNC_ADDR(x) &x,
+#define FUNC_2_ADDRS(x) FUNC_ADDR(x##_1) FUNC_ADDR(x##_2)
+#define FUNC_4_ADDRS(x) FUNC_2_ADDRS(x##_1) FUNC_2_ADDRS(x##_2)
+#define FUNC_8_ADDRS(x) FUNC_4_ADDRS(x##_1) FUNC_4_ADDRS(x##_2)
+#define FUNC_16_ADDRS(x) FUNC_8_ADDRS(x##_1) FUNC_8_ADDRS(x##_2)
+#define FUNC_32_ADDRS(x) FUNC_16_ADDRS(x##_1) FUNC_16_ADDRS(x##_2)
+#define FUNC_64_ADDRS(x) FUNC_32_ADDRS(x##_1) FUNC_32_ADDRS(x##_2)
+#define FUNC_128_ADDRS(x) FUNC_64_ADDRS(x##_1) FUNC_64_ADDRS(x##_2)
+#define FUNC_256_ADDRS(x) FUNC_128_ADDRS(x##_1) FUNC_128_ADDRS(x##_2)
+#define FUNC_512_ADDRS(x) FUNC_256_ADDRS(x##_1) FUNC_256_ADDRS(x##_2)
+
+DEF_512_FUNCS(foo)
+void *CalleeAddrs[] = {FUNC_512_ADDRS(foo)};
+
+typedef void (*FPT)(void);
+
+FPT getFunc(int I) { return CalleeAddrs[I]; }
+
+#ifdef SHARED_LIB
+int shared_entry() {
+#else
+#ifdef CALL_SHARED
+extern int shared_entry();
+#endif
+int main() {
+#endif
+  int I;
+  for (I = 0; I < 512; I++) {
+    FPT Fp = getFunc(I);
+    int J;
+    for (J = 0; J < 1000 - I; J++)
+      Fp();
+
+    Fp = getFunc(511 - I);
+    for (J = 0; J < 2000 - I; J++)
+      Fp();
+#ifdef STRESS
+    Fp = getFunc(I);
+    for (J = 0; J < 2000 - I; J++)
+      Fp();
+
+    Fp = getFunc(I);
+    for (J = 0; J < 2000 - I; J++)
+      Fp();
+
+    Fp = getFunc(I);
+    for (J = 0; J < 2000 - I; J++)
+      Fp();
+
+    Fp = getFunc(I);
+    for (J = 0; J < 2000 - I; J++)
+      Fp();
+#endif
+  }
+#ifdef CALL_SHARED
+  shared_entry();
+#endif
+  return 0;
+}
+
+// IR: :ir
+// CHECK-LABEL:  main:
+// CHECK:	[ 0, foo_1_1_1_1_1_1_1_1_1, 1000 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_1_1_1_2, 999 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_1_1_2_1, 998 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_1_1_2_2, 997 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_1_2_1_1, 996 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_1_2_1_2, 995 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_1_2_2_1, 994 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_1_2_2_2, 993 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_2_1_1_1, 992 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_2_1_1_2, 991 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_2_1_2_1, 990 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_2_1_2_2, 989 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_2_2_1_1, 988 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_2_2_1_2, 987 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_2_2_2_1, 986 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_1_2_2_2_2, 985 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_1_1_1_1, 984 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_1_1_1_2, 983 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_1_1_2_1, 982 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_1_1_2_2, 981 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_1_2_1_1, 980 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_1_2_1_2, 979 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_1_2_2_1, 978 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_1_2_2_2, 977 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_2_1_1_1, 976 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_2_1_1_2, 975 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_2_1_2_1, 974 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_2_1_2_2, 973 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_2_2_1_1, 972 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_2_2_1_2, 971 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_2_2_2_1, 970 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_1_2_2_2_2_2, 969 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_1_1_1_1, 968 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_1_1_1_2, 967 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_1_1_2_1, 966 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_1_1_2_2, 965 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_1_2_1_1, 964 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_1_2_1_2, 963 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_1_2_2_1, 962 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_1_2_2_2, 961 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_2_1_1_1, 960 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_2_1_1_2, 959 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_2_1_2_1, 958 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_2_1_2_2, 957 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_2_2_1_1, 956 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_2_2_1_2, 955 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_2_2_2_1, 954 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_1_2_2_2_2, 953 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_1_1_1_1, 952 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_1_1_1_2, 951 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_1_1_2_1, 950 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_1_1_2_2, 949 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_1_2_1_1, 948 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_1_2_1_2, 947 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_1_2_2_1, 946 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_1_2_2_2, 945 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_2_1_1_1, 944 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_2_1_1_2, 943 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_2_1_2_1, 942 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_2_1_2_2, 941 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_2_2_1_1, 940 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_2_2_1_2, 939 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_2_2_2_1, 938 ]
+// CHECK-NEXT:	[ 0, foo_1_1_1_2_2_2_2_2_2, 937 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_1_1_1_1, 936 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_1_1_1_2, 935 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_1_1_2_1, 934 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_1_1_2_2, 933 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_1_2_1_1, 932 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_1_2_1_2, 931 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_1_2_2_1, 930 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_1_2_2_2, 929 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_2_1_1_1, 928 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_2_1_1_2, 927 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_2_1_2_1, 926 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_2_1_2_2, 925 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_2_2_1_1, 924 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_2_2_1_2, 923 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_2_2_2_1, 922 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_1_2_2_2_2, 921 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_1_1_1_1, 920 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_1_1_1_2, 919 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_1_1_2_1, 918 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_1_1_2_2, 917 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_1_2_1_1, 916 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_1_2_1_2, 915 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_1_2_2_1, 914 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_1_2_2_2, 913 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_2_1_1_1, 912 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_2_1_1_2, 911 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_2_1_2_1, 910 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_2_1_2_2, 909 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_2_2_1_1, 908 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_2_2_1_2, 907 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_2_2_2_1, 906 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_1_2_2_2_2_2, 905 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_1_1_1_1, 904 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_1_1_1_2, 903 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_1_1_2_1, 902 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_1_1_2_2, 901 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_1_2_1_1, 900 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_1_2_1_2, 899 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_1_2_2_1, 898 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_1_2_2_2, 897 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_2_1_1_1, 896 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_2_1_1_2, 895 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_2_1_2_1, 894 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_2_1_2_2, 893 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_2_2_1_1, 892 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_2_2_1_2, 891 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_2_2_2_1, 890 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_1_2_2_2_2, 889 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_1_1_1_1, 888 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_1_1_1_2, 887 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_1_1_2_1, 886 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_1_1_2_2, 885 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_1_2_1_1, 884 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_1_2_1_2, 883 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_1_2_2_1, 882 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_1_2_2_2, 881 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_2_1_1_1, 880 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_2_1_1_2, 879 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_2_1_2_1, 878 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_2_1_2_2, 877 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_2_2_1_1, 876 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_2_2_1_2, 875 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_2_2_2_1, 874 ]
+// CHECK-NEXT:	[ 0, foo_1_1_2_2_2_2_2_2_2, 873 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_1_1_1_1, 872 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_1_1_1_2, 871 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_1_1_2_1, 870 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_1_1_2_2, 869 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_1_2_1_1, 868 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_1_2_1_2, 867 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_1_2_2_1, 866 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_1_2_2_2, 865 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_2_1_1_1, 864 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_2_1_1_2, 863 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_2_1_2_1, 862 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_2_1_2_2, 861 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_2_2_1_1, 860 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_2_2_1_2, 859 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_2_2_2_1, 858 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_1_2_2_2_2, 857 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_1_1_1_1, 856 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_1_1_1_2, 855 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_1_1_2_1, 854 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_1_1_2_2, 853 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_1_2_1_1, 852 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_1_2_1_2, 851 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_1_2_2_1, 850 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_1_2_2_2, 849 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_2_1_1_1, 848 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_2_1_1_2, 847 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_2_1_2_1, 846 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_2_1_2_2, 845 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_2_2_1_1, 844 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_2_2_1_2, 843 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_2_2_2_1, 842 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_1_2_2_2_2_2, 841 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_1_1_1_1, 840 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_1_1_1_2, 839 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_1_1_2_1, 838 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_1_1_2_2, 837 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_1_2_1_1, 836 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_1_2_1_2, 835 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_1_2_2_1, 834 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_1_2_2_2, 833 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_2_1_1_1, 832 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_2_1_1_2, 831 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_2_1_2_1, 830 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_2_1_2_2, 829 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_2_2_1_1, 828 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_2_2_1_2, 827 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_2_2_2_1, 826 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_1_2_2_2_2, 825 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_1_1_1_1, 824 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_1_1_1_2, 823 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_1_1_2_1, 822 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_1_1_2_2, 821 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_1_2_1_1, 820 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_1_2_1_2, 819 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_1_2_2_1, 818 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_1_2_2_2, 817 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_2_1_1_1, 816 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_2_1_1_2, 815 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_2_1_2_1, 814 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_2_1_2_2, 813 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_2_2_1_1, 812 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_2_2_1_2, 811 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_2_2_2_1, 810 ]
+// CHECK-NEXT:	[ 0, foo_1_2_1_2_2_2_2_2_2, 809 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_1_1_1_1, 808 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_1_1_1_2, 807 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_1_1_2_1, 806 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_1_1_2_2, 805 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_1_2_1_1, 804 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_1_2_1_2, 803 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_1_2_2_1, 802 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_1_2_2_2, 801 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_2_1_1_1, 800 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_2_1_1_2, 799 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_2_1_2_1, 798 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_2_1_2_2, 797 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_2_2_1_1, 796 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_2_2_1_2, 795 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_2_2_2_1, 794 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_1_2_2_2_2, 793 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_1_1_1_1, 792 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_1_1_1_2, 791 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_1_1_2_1, 790 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_1_1_2_2, 789 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_1_2_1_1, 788 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_1_2_1_2, 787 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_1_2_2_1, 786 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_1_2_2_2, 785 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_2_1_1_1, 784 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_2_1_1_2, 783 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_2_1_2_1, 782 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_2_1_2_2, 781 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_2_2_1_1, 780 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_2_2_1_2, 779 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_2_2_2_1, 778 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_1_2_2_2_2_2, 777 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_1_1_1_1, 776 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_1_1_1_2, 775 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_1_1_2_1, 774 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_1_1_2_2, 773 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_1_2_1_1, 772 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_1_2_1_2, 771 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_1_2_2_1, 770 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_1_2_2_2, 769 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_2_1_1_1, 768 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_2_1_1_2, 767 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_2_1_2_1, 766 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_2_1_2_2, 765 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_2_2_1_1, 764 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_2_2_1_2, 763 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_2_2_2_1, 762 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_1_2_2_2_2, 761 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_1_1_1_1, 760 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_1_1_1_2, 759 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_1_1_2_1, 758 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_1_1_2_2, 757 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_1_2_1_1, 756 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_1_2_1_2, 755 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_1_2_2_1, 754 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_1_2_2_2, 753 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_2_1_1_1, 752 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_2_1_1_2, 751 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_2_1_2_1, 750 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_2_1_2_2, 749 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_2_2_1_1, 748 ]
+// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_2_2_1_2, 747 ]
+// CHECK-NEXT:	[ 0, foo
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_2_2_2_2, 2000 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_2_2_2_1, 1999 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_2_2_1_2, 1998 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_2_2_1_1, 1997 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_2_1_2_2, 1996 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_2_1_2_1, 1995 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_2_1_1_2, 1994 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_2_1_1_1, 1993 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_1_2_2_2, 1992 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_1_2_2_1, 1991 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_1_2_1_2, 1990 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_1_2_1_1, 1989 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_1_1_2_2, 1988 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_1_1_2_1, 1987 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_1_1_1_2, 1986 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_2_1_1_1_1, 1985 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_2_2_2_2, 1984 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_2_2_2_1, 1983 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_2_2_1_2, 1982 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_2_2_1_1, 1981 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_2_1_2_2, 1980 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_2_1_2_1, 1979 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_2_1_1_2, 1978 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_2_1_1_1, 1977 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_1_2_2_2, 1976 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_1_2_2_1, 1975 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_1_2_1_2, 1974 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_1_2_1_1, 1973 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_1_1_2_2, 1972 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_1_1_2_1, 1971 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_1_1_1_2, 1970 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_2_1_1_1_1_1, 1969 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_2_2_2_2, 1968 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_2_2_2_1, 1967 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_2_2_1_2, 1966 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_2_2_1_1, 1965 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_2_1_2_2, 1964 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_2_1_2_1, 1963 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_2_1_1_2, 1962 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_2_1_1_1, 1961 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_1_2_2_2, 1960 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_1_2_2_1, 1959 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_1_2_1_2, 1958 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_1_2_1_1, 1957 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_1_1_2_2, 1956 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_1_1_2_1, 1955 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_1_1_1_2, 1954 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_2_1_1_1_1, 1953 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_2_2_2_2, 1952 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_2_2_2_1, 1951 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_2_2_1_2, 1950 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_2_2_1_1, 1949 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_2_1_2_2, 1948 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_2_1_2_1, 1947 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_2_1_1_2, 1946 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_2_1_1_1, 1945 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_1_2_2_2, 1944 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_1_2_2_1, 1943 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_1_2_1_2, 1942 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_1_2_1_1, 1941 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_1_1_2_2, 1940 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_1_1_2_1, 1939 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_1_1_1_2, 1938 ]
+// CHECK-NEXT:	[ 1, foo_2_2_2_1_1_1_1_1_1, 1937 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_2_2_2_2, 1936 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_2_2_2_1, 1935 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_2_2_1_2, 1934 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_2_2_1_1, 1933 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_2_1_2_2, 1932 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_2_1_2_1, 1931 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_2_1_1_2, 1930 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_2_1_1_1, 1929 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_1_2_2_2, 1928 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_1_2_2_1, 1927 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_1_2_1_2, 1926 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_1_2_1_1, 1925 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_1_1_2_2, 1924 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_1_1_2_1, 1923 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_1_1_1_2, 1922 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_2_1_1_1_1, 1921 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_2_2_2_2, 1920 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_2_2_2_1, 1919 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_2_2_1_2, 1918 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_2_2_1_1, 1917 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_2_1_2_2, 1916 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_2_1_2_1, 1915 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_2_1_1_2, 1914 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_2_1_1_1, 1913 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_1_2_2_2, 1912 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_1_2_2_1, 1911 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_1_2_1_2, 1910 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_1_2_1_1, 1909 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_1_1_2_2, 1908 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_1_1_2_1, 1907 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_1_1_1_2, 1906 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_2_1_1_1_1_1, 1905 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_2_2_2_2, 1904 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_2_2_2_1, 1903 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_2_2_1_2, 1902 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_2_2_1_1, 1901 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_2_1_2_2, 1900 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_2_1_2_1, 1899 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_2_1_1_2, 1898 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_2_1_1_1, 1897 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_1_2_2_2, 1896 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_1_2_2_1, 1895 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_1_2_1_2, 1894 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_1_2_1_1, 1893 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_1_1_2_2, 1892 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_1_1_2_1, 1891 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_1_1_1_2, 1890 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_2_1_1_1_1, 1889 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_2_2_2_2, 1888 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_2_2_2_1, 1887 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_2_2_1_2, 1886 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_2_2_1_1, 1885 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_2_1_2_2, 1884 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_2_1_2_1, 1883 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_2_1_1_2, 1882 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_2_1_1_1, 1881 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_1_2_2_2, 1880 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_1_2_2_1, 1879 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_1_2_1_2, 1878 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_1_2_1_1, 1877 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_1_1_2_2, 1876 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_1_1_2_1, 1875 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_1_1_1_2, 1874 ]
+// CHECK-NEXT:	[ 1, foo_2_2_1_1_1_1_1_1_1, 1873 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_2_2_2_2, 1872 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_2_2_2_1, 1871 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_2_2_1_2, 1870 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_2_2_1_1, 1869 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_2_1_2_2, 1868 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_2_1_2_1, 1867 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_2_1_1_2, 1866 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_2_1_1_1, 1865 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_1_2_2_2, 1864 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_1_2_2_1, 1863 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_1_2_1_2, 1862 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_1_2_1_1, 1861 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_1_1_2_2, 1860 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_1_1_2_1, 1859 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_1_1_1_2, 1858 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_2_1_1_1_1, 1857 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_2_2_2_2, 1856 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_2_2_2_1, 1855 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_2_2_1_2, 1854 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_2_2_1_1, 1853 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_2_1_2_2, 1852 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_2_1_2_1, 1851 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_2_1_1_2, 1850 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_2_1_1_1, 1849 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_1_2_2_2, 1848 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_1_2_2_1, 1847 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_1_2_1_2, 1846 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_1_2_1_1, 1845 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_1_1_2_2, 1844 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_1_1_2_1, 1843 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_1_1_1_2, 1842 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_2_1_1_1_1_1, 1841 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_2_2_2_2, 1840 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_2_2_2_1, 1839 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_2_2_1_2, 1838 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_2_2_1_1, 1837 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_2_1_2_2, 1836 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_2_1_2_1, 1835 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_2_1_1_2, 1834 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_2_1_1_1, 1833 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_1_2_2_2, 1832 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_1_2_2_1, 1831 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_1_2_1_2, 1830 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_1_2_1_1, 1829 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_1_1_2_2, 1828 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_1_1_2_1, 1827 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_1_1_1_2, 1826 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_2_1_1_1_1, 1825 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_2_2_2_2, 1824 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_2_2_2_1, 1823 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_2_2_1_2, 1822 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_2_2_1_1, 1821 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_2_1_2_2, 1820 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_2_1_2_1, 1819 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_2_1_1_2, 1818 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_2_1_1_1, 1817 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_1_2_2_2, 1816 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_1_2_2_1, 1815 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_1_2_1_2, 1814 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_1_2_1_1, 1813 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_1_1_2_2, 1812 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_1_1_2_1, 1811 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_1_1_1_2, 1810 ]
+// CHECK-NEXT:	[ 1, foo_2_1_2_1_1_1_1_1_1, 1809 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_2_2_2_2, 1808 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_2_2_2_1, 1807 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_2_2_1_2, 1806 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_2_2_1_1, 1805 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_2_1_2_2, 1804 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_2_1_2_1, 1803 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_2_1_1_2, 1802 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_2_1_1_1, 1801 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_1_2_2_2, 1800 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_1_2_2_1, 1799 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_1_2_1_2, 1798 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_1_2_1_1, 1797 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_1_1_2_2, 1796 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_1_1_2_1, 1795 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_1_1_1_2, 1794 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_2_1_1_1_1, 1793 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_2_2_2_2, 1792 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_2_2_2_1, 1791 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_2_2_1_2, 1790 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_2_2_1_1, 1789 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_2_1_2_2, 1788 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_2_1_2_1, 1787 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_2_1_1_2, 1786 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_2_1_1_1, 1785 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_1_2_2_2, 1784 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_1_2_2_1, 1783 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_1_2_1_2, 1782 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_1_2_1_1, 1781 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_1_1_2_2, 1780 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_1_1_2_1, 1779 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_1_1_1_2, 1778 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_2_1_1_1_1_1, 1777 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_2_2_2_2, 1776 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_2_2_2_1, 1775 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_2_2_1_2, 1774 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_2_2_1_1, 1773 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_2_1_2_2, 1772 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_2_1_2_1, 1771 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_2_1_1_2, 1770 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_2_1_1_1, 1769 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_1_2_2_2, 1768 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_1_2_2_1, 1767 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_1_2_1_2, 1766 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_1_2_1_1, 1765 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_1_1_2_2, 1764 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_1_1_2_1, 1763 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_1_1_1_2, 1762 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_2_1_1_1_1, 1761 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_2_2_2_2, 1760 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_2_2_2_1, 1759 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_2_2_1_2, 1758 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_2_2_1_1, 1757 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_2_1_2_2, 1756 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_2_1_2_1, 1755 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_2_1_1_2, 1754 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_2_1_1_1, 1753 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_1_2_2_2, 1752 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_1_2_2_1, 1751 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_1_2_1_2, 1750 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_1_2_1_1, 1749 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_1_1_2_2, 1748 ]
+// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_1_1_2_1, 1747 ]
+// CHECK-NEXT:	[ 1, foo
+
+// SHARED-LABEL:  shared_entry:
+// SHARED:	[ 0, foo_1_1_1_1_1_1_1_1_1, 1000 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_1_1_1_2, 999 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_1_1_2_1, 998 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_1_1_2_2, 997 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_1_2_1_1, 996 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_1_2_1_2, 995 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_1_2_2_1, 994 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_1_2_2_2, 993 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_2_1_1_1, 992 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_2_1_1_2, 991 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_2_1_2_1, 990 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_2_1_2_2, 989 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_2_2_1_1, 988 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_2_2_1_2, 987 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_2_2_2_1, 986 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_1_2_2_2_2, 985 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_1_1_1_1, 984 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_1_1_1_2, 983 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_1_1_2_1, 982 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_1_1_2_2, 981 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_1_2_1_1, 980 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_1_2_1_2, 979 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_1_2_2_1, 978 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_1_2_2_2, 977 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_2_1_1_1, 976 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_2_1_1_2, 975 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_2_1_2_1, 974 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_2_1_2_2, 973 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_2_2_1_1, 972 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_2_2_1_2, 971 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_2_2_2_1, 970 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_1_2_2_2_2_2, 969 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_1_1_1_1, 968 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_1_1_1_2, 967 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_1_1_2_1, 966 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_1_1_2_2, 965 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_1_2_1_1, 964 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_1_2_1_2, 963 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_1_2_2_1, 962 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_1_2_2_2, 961 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_2_1_1_1, 960 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_2_1_1_2, 959 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_2_1_2_1, 958 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_2_1_2_2, 957 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_2_2_1_1, 956 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_2_2_1_2, 955 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_2_2_2_1, 954 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_1_2_2_2_2, 953 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_1_1_1_1, 952 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_1_1_1_2, 951 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_1_1_2_1, 950 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_1_1_2_2, 949 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_1_2_1_1, 948 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_1_2_1_2, 947 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_1_2_2_1, 946 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_1_2_2_2, 945 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_2_1_1_1, 944 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_2_1_1_2, 943 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_2_1_2_1, 942 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_2_1_2_2, 941 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_2_2_1_1, 940 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_2_2_1_2, 939 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_2_2_2_1, 938 ]
+// SHARED-NEXT:	[ 0, foo_1_1_1_2_2_2_2_2_2, 937 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_1_1_1_1, 936 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_1_1_1_2, 935 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_1_1_2_1, 934 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_1_1_2_2, 933 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_1_2_1_1, 932 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_1_2_1_2, 931 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_1_2_2_1, 930 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_1_2_2_2, 929 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_2_1_1_1, 928 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_2_1_1_2, 927 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_2_1_2_1, 926 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_2_1_2_2, 925 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_2_2_1_1, 924 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_2_2_1_2, 923 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_2_2_2_1, 922 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_1_2_2_2_2, 921 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_1_1_1_1, 920 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_1_1_1_2, 919 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_1_1_2_1, 918 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_1_1_2_2, 917 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_1_2_1_1, 916 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_1_2_1_2, 915 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_1_2_2_1, 914 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_1_2_2_2, 913 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_2_1_1_1, 912 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_2_1_1_2, 911 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_2_1_2_1, 910 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_2_1_2_2, 909 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_2_2_1_1, 908 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_2_2_1_2, 907 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_2_2_2_1, 906 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_1_2_2_2_2_2, 905 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_1_1_1_1, 904 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_1_1_1_2, 903 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_1_1_2_1, 902 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_1_1_2_2, 901 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_1_2_1_1, 900 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_1_2_1_2, 899 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_1_2_2_1, 898 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_1_2_2_2, 897 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_2_1_1_1, 896 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_2_1_1_2, 895 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_2_1_2_1, 894 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_2_1_2_2, 893 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_2_2_1_1, 892 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_2_2_1_2, 891 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_2_2_2_1, 890 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_1_2_2_2_2, 889 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_1_1_1_1, 888 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_1_1_1_2, 887 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_1_1_2_1, 886 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_1_1_2_2, 885 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_1_2_1_1, 884 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_1_2_1_2, 883 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_1_2_2_1, 882 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_1_2_2_2, 881 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_2_1_1_1, 880 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_2_1_1_2, 879 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_2_1_2_1, 878 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_2_1_2_2, 877 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_2_2_1_1, 876 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_2_2_1_2, 875 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_2_2_2_1, 874 ]
+// SHARED-NEXT:	[ 0, foo_1_1_2_2_2_2_2_2_2, 873 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_1_1_1_1, 872 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_1_1_1_2, 871 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_1_1_2_1, 870 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_1_1_2_2, 869 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_1_2_1_1, 868 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_1_2_1_2, 867 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_1_2_2_1, 866 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_1_2_2_2, 865 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_2_1_1_1, 864 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_2_1_1_2, 863 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_2_1_2_1, 862 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_2_1_2_2, 861 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_2_2_1_1, 860 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_2_2_1_2, 859 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_2_2_2_1, 858 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_1_2_2_2_2, 857 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_1_1_1_1, 856 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_1_1_1_2, 855 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_1_1_2_1, 854 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_1_1_2_2, 853 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_1_2_1_1, 852 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_1_2_1_2, 851 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_1_2_2_1, 850 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_1_2_2_2, 849 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_2_1_1_1, 848 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_2_1_1_2, 847 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_2_1_2_1, 846 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_2_1_2_2, 845 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_2_2_1_1, 844 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_2_2_1_2, 843 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_2_2_2_1, 842 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_1_2_2_2_2_2, 841 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_1_1_1_1, 840 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_1_1_1_2, 839 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_1_1_2_1, 838 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_1_1_2_2, 837 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_1_2_1_1, 836 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_1_2_1_2, 835 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_1_2_2_1, 834 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_1_2_2_2, 833 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_2_1_1_1, 832 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_2_1_1_2, 831 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_2_1_2_1, 830 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_2_1_2_2, 829 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_2_2_1_1, 828 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_2_2_1_2, 827 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_2_2_2_1, 826 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_1_2_2_2_2, 825 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_1_1_1_1, 824 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_1_1_1_2, 823 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_1_1_2_1, 822 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_1_1_2_2, 821 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_1_2_1_1, 820 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_1_2_1_2, 819 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_1_2_2_1, 818 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_1_2_2_2, 817 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_2_1_1_1, 816 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_2_1_1_2, 815 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_2_1_2_1, 814 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_2_1_2_2, 813 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_2_2_1_1, 812 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_2_2_1_2, 811 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_2_2_2_1, 810 ]
+// SHARED-NEXT:	[ 0, foo_1_2_1_2_2_2_2_2_2, 809 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_1_1_1_1, 808 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_1_1_1_2, 807 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_1_1_2_1, 806 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_1_1_2_2, 805 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_1_2_1_1, 804 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_1_2_1_2, 803 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_1_2_2_1, 802 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_1_2_2_2, 801 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_2_1_1_1, 800 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_2_1_1_2, 799 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_2_1_2_1, 798 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_2_1_2_2, 797 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_2_2_1_1, 796 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_2_2_1_2, 795 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_2_2_2_1, 794 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_1_2_2_2_2, 793 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_1_1_1_1, 792 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_1_1_1_2, 791 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_1_1_2_1, 790 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_1_1_2_2, 789 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_1_2_1_1, 788 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_1_2_1_2, 787 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_1_2_2_1, 786 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_1_2_2_2, 785 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_2_1_1_1, 784 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_2_1_1_2, 783 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_2_1_2_1, 782 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_2_1_2_2, 781 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_2_2_1_1, 780 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_2_2_1_2, 779 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_2_2_2_1, 778 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_1_2_2_2_2_2, 777 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_1_1_1_1, 776 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_1_1_1_2, 775 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_1_1_2_1, 774 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_1_1_2_2, 773 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_1_2_1_1, 772 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_1_2_1_2, 771 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_1_2_2_1, 770 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_1_2_2_2, 769 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_2_1_1_1, 768 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_2_1_1_2, 767 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_2_1_2_1, 766 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_2_1_2_2, 765 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_2_2_1_1, 764 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_2_2_1_2, 763 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_2_2_2_1, 762 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_1_2_2_2_2, 761 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_1_1_1_1, 760 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_1_1_1_2, 759 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_1_1_2_1, 758 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_1_1_2_2, 757 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_1_2_1_1, 756 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_1_2_1_2, 755 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_1_2_2_1, 754 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_1_2_2_2, 753 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_2_1_1_1, 752 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_2_1_1_2, 751 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_2_1_2_1, 750 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_2_1_2_2, 749 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_2_2_1_1, 748 ]
+// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_2_2_1_2, 747 ]
+// SHARED-NEXT:	[ 0, foo
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_2_2_2_2, 2000 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_2_2_2_1, 1999 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_2_2_1_2, 1998 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_2_2_1_1, 1997 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_2_1_2_2, 1996 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_2_1_2_1, 1995 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_2_1_1_2, 1994 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_2_1_1_1, 1993 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_1_2_2_2, 1992 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_1_2_2_1, 1991 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_1_2_1_2, 1990 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_1_2_1_1, 1989 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_1_1_2_2, 1988 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_1_1_2_1, 1987 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_1_1_1_2, 1986 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_2_1_1_1_1, 1985 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_2_2_2_2, 1984 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_2_2_2_1, 1983 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_2_2_1_2, 1982 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_2_2_1_1, 1981 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_2_1_2_2, 1980 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_2_1_2_1, 1979 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_2_1_1_2, 1978 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_2_1_1_1, 1977 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_1_2_2_2, 1976 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_1_2_2_1, 1975 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_1_2_1_2, 1974 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_1_2_1_1, 1973 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_1_1_2_2, 1972 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_1_1_2_1, 1971 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_1_1_1_2, 1970 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_2_1_1_1_1_1, 1969 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_2_2_2_2, 1968 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_2_2_2_1, 1967 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_2_2_1_2, 1966 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_2_2_1_1, 1965 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_2_1_2_2, 1964 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_2_1_2_1, 1963 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_2_1_1_2, 1962 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_2_1_1_1, 1961 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_1_2_2_2, 1960 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_1_2_2_1, 1959 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_1_2_1_2, 1958 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_1_2_1_1, 1957 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_1_1_2_2, 1956 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_1_1_2_1, 1955 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_1_1_1_2, 1954 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_2_1_1_1_1, 1953 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_2_2_2_2, 1952 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_2_2_2_1, 1951 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_2_2_1_2, 1950 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_2_2_1_1, 1949 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_2_1_2_2, 1948 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_2_1_2_1, 1947 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_2_1_1_2, 1946 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_2_1_1_1, 1945 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_1_2_2_2, 1944 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_1_2_2_1, 1943 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_1_2_1_2, 1942 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_1_2_1_1, 1941 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_1_1_2_2, 1940 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_1_1_2_1, 1939 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_1_1_1_2, 1938 ]
+// SHARED-NEXT:	[ 1, foo_2_2_2_1_1_1_1_1_1, 1937 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_2_2_2_2, 1936 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_2_2_2_1, 1935 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_2_2_1_2, 1934 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_2_2_1_1, 1933 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_2_1_2_2, 1932 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_2_1_2_1, 1931 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_2_1_1_2, 1930 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_2_1_1_1, 1929 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_1_2_2_2, 1928 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_1_2_2_1, 1927 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_1_2_1_2, 1926 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_1_2_1_1, 1925 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_1_1_2_2, 1924 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_1_1_2_1, 1923 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_1_1_1_2, 1922 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_2_1_1_1_1, 1921 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_2_2_2_2, 1920 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_2_2_2_1, 1919 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_2_2_1_2, 1918 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_2_2_1_1, 1917 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_2_1_2_2, 1916 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_2_1_2_1, 1915 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_2_1_1_2, 1914 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_2_1_1_1, 1913 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_1_2_2_2, 1912 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_1_2_2_1, 1911 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_1_2_1_2, 1910 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_1_2_1_1, 1909 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_1_1_2_2, 1908 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_1_1_2_1, 1907 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_1_1_1_2, 1906 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_2_1_1_1_1_1, 1905 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_2_2_2_2, 1904 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_2_2_2_1, 1903 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_2_2_1_2, 1902 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_2_2_1_1, 1901 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_2_1_2_2, 1900 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_2_1_2_1, 1899 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_2_1_1_2, 1898 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_2_1_1_1, 1897 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_1_2_2_2, 1896 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_1_2_2_1, 1895 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_1_2_1_2, 1894 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_1_2_1_1, 1893 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_1_1_2_2, 1892 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_1_1_2_1, 1891 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_1_1_1_2, 1890 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_2_1_1_1_1, 1889 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_2_2_2_2, 1888 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_2_2_2_1, 1887 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_2_2_1_2, 1886 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_2_2_1_1, 1885 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_2_1_2_2, 1884 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_2_1_2_1, 1883 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_2_1_1_2, 1882 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_2_1_1_1, 1881 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_1_2_2_2, 1880 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_1_2_2_1, 1879 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_1_2_1_2, 1878 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_1_2_1_1, 1877 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_1_1_2_2, 1876 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_1_1_2_1, 1875 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_1_1_1_2, 1874 ]
+// SHARED-NEXT:	[ 1, foo_2_2_1_1_1_1_1_1_1, 1873 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_2_2_2_2, 1872 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_2_2_2_1, 1871 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_2_2_1_2, 1870 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_2_2_1_1, 1869 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_2_1_2_2, 1868 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_2_1_2_1, 1867 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_2_1_1_2, 1866 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_2_1_1_1, 1865 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_1_2_2_2, 1864 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_1_2_2_1, 1863 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_1_2_1_2, 1862 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_1_2_1_1, 1861 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_1_1_2_2, 1860 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_1_1_2_1, 1859 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_1_1_1_2, 1858 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_2_1_1_1_1, 1857 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_2_2_2_2, 1856 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_2_2_2_1, 1855 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_2_2_1_2, 1854 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_2_2_1_1, 1853 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_2_1_2_2, 1852 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_2_1_2_1, 1851 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_2_1_1_2, 1850 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_2_1_1_1, 1849 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_1_2_2_2, 1848 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_1_2_2_1, 1847 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_1_2_1_2, 1846 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_1_2_1_1, 1845 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_1_1_2_2, 1844 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_1_1_2_1, 1843 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_1_1_1_2, 1842 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_2_1_1_1_1_1, 1841 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_2_2_2_2, 1840 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_2_2_2_1, 1839 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_2_2_1_2, 1838 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_2_2_1_1, 1837 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_2_1_2_2, 1836 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_2_1_2_1, 1835 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_2_1_1_2, 1834 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_2_1_1_1, 1833 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_1_2_2_2, 1832 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_1_2_2_1, 1831 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_1_2_1_2, 1830 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_1_2_1_1, 1829 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_1_1_2_2, 1828 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_1_1_2_1, 1827 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_1_1_1_2, 1826 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_2_1_1_1_1, 1825 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_2_2_2_2, 1824 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_2_2_2_1, 1823 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_2_2_1_2, 1822 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_2_2_1_1, 1821 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_2_1_2_2, 1820 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_2_1_2_1, 1819 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_2_1_1_2, 1818 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_2_1_1_1, 1817 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_1_2_2_2, 1816 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_1_2_2_1, 1815 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_1_2_1_2, 1814 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_1_2_1_1, 1813 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_1_1_2_2, 1812 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_1_1_2_1, 1811 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_1_1_1_2, 1810 ]
+// SHARED-NEXT:	[ 1, foo_2_1_2_1_1_1_1_1_1, 1809 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_2_2_2_2, 1808 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_2_2_2_1, 1807 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_2_2_1_2, 1806 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_2_2_1_1, 1805 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_2_1_2_2, 1804 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_2_1_2_1, 1803 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_2_1_1_2, 1802 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_2_1_1_1, 1801 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_1_2_2_2, 1800 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_1_2_2_1, 1799 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_1_2_1_2, 1798 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_1_2_1_1, 1797 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_1_1_2_2, 1796 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_1_1_2_1, 1795 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_1_1_1_2, 1794 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_2_1_1_1_1, 1793 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_2_2_2_2, 1792 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_2_2_2_1, 1791 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_2_2_1_2, 1790 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_2_2_1_1, 1789 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_2_1_2_2, 1788 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_2_1_2_1, 1787 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_2_1_1_2, 1786 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_2_1_1_1, 1785 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_1_2_2_2, 1784 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_1_2_2_1, 1783 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_1_2_1_2, 1782 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_1_2_1_1, 1781 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_1_1_2_2, 1780 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_1_1_2_1, 1779 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_1_1_1_2, 1778 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_2_1_1_1_1_1, 1777 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_2_2_2_2, 1776 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_2_2_2_1, 1775 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_2_2_1_2, 1774 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_2_2_1_1, 1773 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_2_1_2_2, 1772 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_2_1_2_1, 1771 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_2_1_1_2, 1770 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_2_1_1_1, 1769 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_1_2_2_2, 1768 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_1_2_2_1, 1767 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_1_2_1_2, 1766 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_1_2_1_1, 1765 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_1_1_2_2, 1764 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_1_1_2_1, 1763 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_1_1_1_2, 1762 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_2_1_1_1_1, 1761 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_2_2_2_2, 1760 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_2_2_2_1, 1759 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_2_2_1_2, 1758 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_2_2_1_1, 1757 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_2_1_2_2, 1756 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_2_1_2_1, 1755 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_2_1_1_2, 1754 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_2_1_1_1, 1753 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_1_2_2_2, 1752 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_1_2_2_1, 1751 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_1_2_1_2, 1750 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_1_2_1_1, 1749 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_1_1_2_2, 1748 ]
+// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_1_1_2_1, 1747 ]
+// SHARED-NEXT:	[ 1, foo
diff --git a/test/profile/Linux/comdat_rename.test b/test/profile/Linux/comdat_rename.test
new file mode 100644
index 0000000..116b5dc
--- /dev/null
+++ b/test/profile/Linux/comdat_rename.test
@@ -0,0 +1,6 @@
+// RUN: rm -fr %t.prof
+// RUN: %clangxx_pgogen=%t.prof/ -o %t.gen -O2 %S/../Inputs/comdat_rename_1.cc %S/../Inputs/comdat_rename_2.cc
+// RUN: %t.gen
+// RUN: llvm-profdata merge -o %t.profdata %t.prof/
+// RUN: %clangxx_profuse=%t.profdata  -O2 -emit-llvm -S %S/../Inputs/comdat_rename_1.cc -o - | FileCheck %S/../Inputs/comdat_rename_1.cc
+// RUN: %clangxx_profuse=%t.profdata  -O2 -emit-llvm -S %S/../Inputs/comdat_rename_2.cc -o - | FileCheck %S/../Inputs/comdat_rename_2.cc
diff --git a/test/profile/Linux/coverage_ctors.cpp b/test/profile/Linux/coverage_ctors.cpp
new file mode 100644
index 0000000..021d9df
--- /dev/null
+++ b/test/profile/Linux/coverage_ctors.cpp
@@ -0,0 +1,32 @@
+// RUN: %clangxx_profgen -std=c++11 -fuse-ld=gold -fcoverage-mapping -o %t %s
+// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: llvm-cov show %t -instr-profile %t.profdata -filename-equivalence 2>&1 | FileCheck %s
+
+struct Base {
+  int B;
+  Base() : B(0) {}
+  Base(const Base &b2) {
+    B = b2.B + 5;
+  }
+  Base(Base &&b2) {
+    B = b2.B + 10;
+  }
+};
+
+struct Derived : public Base {
+  Derived(const Derived &) = default; // CHECK:  [[@LINE]]| 2|  Derived(const Derived &) = default;
+  Derived(Derived &&) = default;      // CHECK:  [[@LINE]]| 1| Derived(Derived &&) = default;
+  Derived() = default;                // CHECK:  [[@LINE]]| 1| Derived() = default
+};
+
+Derived dd;
+int main() {
+  Derived dd2(dd);
+  Derived dd3(dd2);
+  Derived dd4(static_cast<Derived &&>(dd3));
+
+  if (dd.B != 0 || dd2.B != 5 || dd3.B != 10 || dd4.B != 20)
+    return 1;                         // CHECK: [[@LINE]]| 0|     return 1;
+  return 0;
+}
diff --git a/test/profile/Linux/coverage_dtor.cpp b/test/profile/Linux/coverage_dtor.cpp
new file mode 100644
index 0000000..1641512
--- /dev/null
+++ b/test/profile/Linux/coverage_dtor.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_profgen -x c++ -fno-exceptions  -std=c++11 -fuse-ld=gold -fcoverage-mapping -o %t %s
+// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: llvm-cov show %t -instr-profile %t.profdata -filename-equivalence 2>&1 | FileCheck %s
+
+int g = 100;
+struct Base {
+  int B;
+  Base(int B_) : B(B_) {}
+  ~Base() { g -= B; }
+};
+
+struct Derived : public Base {
+  Derived(int K) : Base(K) {}
+  ~Derived() = default; // CHECK:  [[@LINE]]| 2|  ~Derived() = default;
+};
+
+int main() {
+  {
+    Derived dd(10);
+    Derived dd2(90);
+  }
+  if (g != 0)
+    return 1;          // CHECK:  [[@LINE]]|  0|   return 1;
+  return 0;
+}
diff --git a/test/profile/Linux/coverage_test.cpp b/test/profile/Linux/coverage_test.cpp
index 1443279..db9a14e 100644
--- a/test/profile/Linux/coverage_test.cpp
+++ b/test/profile/Linux/coverage_test.cpp
@@ -1,28 +1,35 @@
-// RUN: %clang_profgen -fuse-ld=gold -O2 -fdata-sections -ffunction-sections -fprofile-instr-generate -fcoverage-mapping -Wl,--gc-sections -o %t %s
+// RUN: %clang_profgen -fuse-ld=gold -O2 -fdata-sections -ffunction-sections -fcoverage-mapping -Wl,--gc-sections -o %t %s
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
 // RUN: llvm-profdata merge -o %t.profdata %t.profraw
 // RUN: llvm-cov show %t -instr-profile %t.profdata -filename-equivalence 2>&1 | FileCheck %s
-//
-// RUN: %clang_profgen -fuse-ld=gold -O2 -fdata-sections -ffunction-sections -fPIE -pie -fprofile-instr-generate -fcoverage-mapping -Wl,--gc-sections -o %t.pie %s
+// BFD linker older than 2.26 has a bug that per-func profile data will be wrongly garbage collected when GC is turned on. We only do end-to-end test here without GC:
+// RUN: %clang_profgen -O2  -fcoverage-mapping  -o %t.2 %s
+// RUN: env LLVM_PROFILE_FILE=%t.2.profraw %run %t.2
+// RUN: llvm-profdata merge -o %t.2.profdata %t.2.profraw
+// RUN: llvm-cov show %t.2 -instr-profile %t.2.profdata -filename-equivalence 2>&1 | FileCheck %s
+// Check covmap is not garbage collected when GC is turned on with BFD linker. Due to the bug mentioned above, we can only
+// do the check with objdump:
+// RUN: %clang_profgen -O2  -fcoverage-mapping -Wl,--gc-sections -o %t.3 %s
+// RUN: llvm-objdump -h %t.3 | FileCheck --check-prefix COVMAP %s
+// Check PIE option
+// RUN: %clang_profgen -fuse-ld=gold -O2 -fdata-sections -ffunction-sections -fPIE -pie -fcoverage-mapping -Wl,--gc-sections -o %t.pie %s
 // RUN: env LLVM_PROFILE_FILE=%t.pie.profraw %run %t.pie
 // RUN: llvm-profdata merge -o %t.pie.profdata %t.pie.profraw
 // RUN: llvm-cov show %t.pie -instr-profile %t.pie.profdata -filename-equivalence 2>&1 | FileCheck %s
 
-void foo(bool cond) { // CHECK:  1| [[@LINE]]|void foo(
-  if (cond) {         // CHECK:  1| [[@LINE]]|  if (cond) {
-  }                   // CHECK:  0| [[@LINE]]|  }
-}                     // CHECK:  1| [[@LINE]]|}
-void bar() {          // CHECK:  1| [[@LINE]]|void bar() {
-}                     // CHECK:  1| [[@LINE]]|}
-void func() {         // CHECK:  0| [[@LINE]]|void func(
-}                     // CHECK:  0| [[@LINE]]|}
-int main() {          // CHECK:  1| [[@LINE]]|int main(
-  foo(false);         // CHECK:  1| [[@LINE]]|  foo(
-  bar();              // CHECK:  1| [[@LINE]]|  bar(
-  return 0;           // CHECK:  1| [[@LINE]]|  return
-}                     // CHECK:  1| [[@LINE]]|}
+void foo(bool cond) { // CHECK:  [[@LINE]]| 1|void foo(
+  if (cond) {         // CHECK:  [[@LINE]]| 1| if (cond) {
+  }                   // CHECK:  [[@LINE]]| 0|  }
+}                     // CHECK:  [[@LINE]]| 1|}
+void bar() {          // CHECK:  [[@LINE]]| 1|void bar() {
+}                     // CHECK:  [[@LINE]]| 1|}
+void func() {         // CHECK:  [[@LINE]]| 0|void func(
+}                     // CHECK:  [[@LINE]]| 0|}
+int main() {          // CHECK:  [[@LINE]]| 1|int main(
+  foo(false);         // CHECK:  [[@LINE]]| 1| foo(
+  bar();              // CHECK:  [[@LINE]]| 1|  bar(
+  return 0;           // CHECK:  [[@LINE]]| 1| return
+}                     // CHECK:  [[@LINE]]| 1|}
 
-
-
-
+// COVMAP: __llvm_covmap {{.*}}
 
diff --git a/test/profile/Linux/extern_template.test b/test/profile/Linux/extern_template.test
new file mode 100644
index 0000000..ada4d23
--- /dev/null
+++ b/test/profile/Linux/extern_template.test
@@ -0,0 +1,29 @@
+// RUN: %clang -O2  -c -o %t.0.o %S/../Inputs/extern_template.cpp
+// RUN: %clang_profgen -O2  -c -o %t.o %S/../Inputs/extern_template.cpp
+// RUN: %clang_profgen -O2 -fcoverage-mapping %S/../Inputs/extern_template1.cpp %S/../Inputs/extern_template2.cpp %t.o -o %t
+// RUN: env LLVM_PROFILE_FILE=%t.profraw %t
+// RUN: llvm-profdata show --all-functions %t.profraw | FileCheck %s
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: llvm-cov show -instr-profile=%t.profdata %t | FileCheck %S/../Inputs/extern_template.h
+// RUN: %clang_profgen -O2 -fcoverage-mapping %S/../Inputs/extern_template1.cpp %S/../Inputs/extern_template2.cpp %t.0.o -o %t.0
+// RUN: env LLVM_PROFILE_FILE=%t.0.profraw %t.0
+// RUN: llvm-profdata show --all-functions %t.0.profraw | FileCheck %s
+// RUN: llvm-profdata merge -o %t.0.profdata %t.0.profraw
+// RUN: llvm-cov show -instr-profile=%t.0.profdata %t.0 | FileCheck %S/../Inputs/extern_template.h
+#define DEF
+#include "extern_template.h"
+#undef DEF
+extern int bar();
+extern int foo();
+extern Test<int> TO;
+int main() {
+  foo();
+  int R = bar();
+
+  if (R != 10)
+    return 1;
+  return 0;
+}
+// No duplicate entries
+// CHECK: _ZN4TestIiE4doItEi:
+// CHECK-NOT: _ZN4TestIiE4doItEi:
diff --git a/test/profile/Linux/instrprof-alloc.test b/test/profile/Linux/instrprof-alloc.test
new file mode 100644
index 0000000..752b108
--- /dev/null
+++ b/test/profile/Linux/instrprof-alloc.test
@@ -0,0 +1,6 @@
+// RUN: %clang_profgen -Xclang -fprofile-instrument=llvm  -fuse-ld=gold -Wl,-wrap,malloc -Wl,-wrap,calloc -o %t -O3 %S/../Inputs/instrprof-alloc.c
+// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
+
+// RUN: %clang_profgen  -Xclang -fprofile-instrument=llvm -mllvm -vp-static-alloc=false -fuse-ld=gold -Wl,-wrap,malloc -Wl,-wrap,calloc -o %t.dyn -O3 %S/../Inputs/instrprof-alloc.c
+// RUN: env LLVM_PROFILE_FILE=%t.profraw not %run %t.dyn
+
diff --git a/test/profile/Linux/instrprof-comdat.test b/test/profile/Linux/instrprof-comdat.test
index 689b8b0..b933e96 100644
--- a/test/profile/Linux/instrprof-comdat.test
+++ b/test/profile/Linux/instrprof-comdat.test
@@ -1,5 +1,5 @@
 RUN: mkdir -p %t.d
-RUN: %clang_profgen -xc++ -o %t.d/comdat -fcoverage-mapping -fuse-ld=gold %S/../Inputs/instrprof-comdat-1.cpp %S/../Inputs/instrprof-comdat-2.cpp
+RUN: %clangxx_profgen -o %t.d/comdat -fcoverage-mapping -fuse-ld=gold %S/../Inputs/instrprof-comdat-1.cpp %S/../Inputs/instrprof-comdat-2.cpp
 RUN: LLVM_PROFILE_FILE=%t-comdat.profraw %t.d/comdat
 RUN: llvm-profdata merge -o %t.d/comdat.prof %t-comdat.profraw 
 RUN: llvm-cov show --filename-equivalence --instr-profile=%t.d/comdat.prof %t.d/comdat | FileCheck --check-prefix=HEADER %S/../Inputs/instrprof-comdat.h
diff --git a/test/profile/Linux/instrprof-cs.c b/test/profile/Linux/instrprof-cs.c
new file mode 100644
index 0000000..3be4359
--- /dev/null
+++ b/test/profile/Linux/instrprof-cs.c
@@ -0,0 +1,35 @@
+// RUN: rm -fr %t.prof
+// RUN: %clang_pgogen=%t.prof/ -o %t.gen.cs -O2 %s
+// RUN: %t.gen.cs
+// RUN: llvm-profdata merge -o %t.cs.profdata %t.prof/
+// Check context sensitive profile
+// RUN: %clang_profuse=%t.cs.profdata  -O2 -emit-llvm -S %s -o - | FileCheck %s --check-prefix=CS
+//
+// RUN: %clang_profgen=%t.profraw -o %t.gen.cis -O2 %s
+// RUN: %t.gen.cis
+// RUN: llvm-profdata merge -o %t.cis.profdata %t.profraw
+// Check context insenstive profile
+// RUN: %clang_profuse=%t.cis.profdata  -O2 -emit-llvm -S %s -o - | FileCheck %s --check-prefix=CIS
+int g1 = 1;
+int g2 = 2;
+static void toggle(int t) {
+  if (t & 1)
+    g1 *= t;
+  else
+    g2 *= t;
+}
+
+int main() {
+  int i;
+  // CS: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
+  // CIS: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD:[0-9]+]]
+  toggle(g1);
+  // CS: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD2:[0-9]+]]
+  // CIS: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD:[0-9]+]]
+  toggle(g2);
+  return 0;
+}
+
+// CS: ![[PD1]] = !{!"branch_weights", i32 0, i32 1}
+// CS: ![[PD2]] = !{!"branch_weights", i32 1, i32 0}
+// CIS: ![[PD]] = !{!"branch_weights", i32 2, i32 2}
diff --git a/test/profile/Linux/instrprof-file_ex.test b/test/profile/Linux/instrprof-file_ex.test
new file mode 100644
index 0000000..be89966
--- /dev/null
+++ b/test/profile/Linux/instrprof-file_ex.test
@@ -0,0 +1,17 @@
+RUN: mkdir -p %t.d
+RUN: %clang_profgen -fprofile-instr-generate %S/../Inputs/instrprof-file_ex.c -o %t
+RUN: rm -f %t.d/run.dump
+RUN: %run %t %t.d/run.dump
+RUN: sort %t.d/run.dump | FileCheck %s
+
+CHECK: Dump from Child 0
+CHECK-NEXT: Dump from Child 1
+CHECK-NEXT: Dump from Child 2
+CHECK-NEXT: Dump from Child 3
+CHECK-NEXT: Dump from Child 4
+CHECK-NEXT: Dump from Child 5
+CHECK-NEXT: Dump from Child 6
+CHECK-NEXT: Dump from Child 7 
+CHECK-NEXT: Dump from Child 8
+CHECK-NEXT: Dump from Child 9
+CHECK-NEXT: Dump from parent 10
diff --git a/test/profile/Linux/instrprof-merge-vp.c b/test/profile/Linux/instrprof-merge-vp.c
new file mode 100644
index 0000000..8daed33
--- /dev/null
+++ b/test/profile/Linux/instrprof-merge-vp.c
@@ -0,0 +1,113 @@
+// RUN: %clang_profgen -mllvm --enable-value-profiling=true -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=3 -O2 -o %t %s
+// RUN: %run %t %t.profraw
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: llvm-profdata show --all-functions --counts --ic-targets %t.profdata > %t.profdump
+// RUN: FileCheck --input-file %t.profdump  %s --check-prefix=FOO
+// RUN: FileCheck --input-file %t.profdump  %s --check-prefix=BAR
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+int __llvm_profile_runtime = 0;
+int __llvm_profile_write_file();
+void __llvm_profile_reset_counters(void);
+void __llvm_profile_merge_from_buffer(const char *, uint64_t);
+void __llvm_profile_set_filename(const char *);
+struct __llvm_profile_data;
+struct ValueProfData;
+void lprofMergeValueProfData(struct ValueProfData *, struct __llvm_profile_data *);
+/* Force the vp merger module to be linked in.  */
+void *Dummy = &lprofMergeValueProfData;
+
+void callee1() {}
+void callee2() {}
+void callee3() {}
+
+typedef void (*FP)(void);
+FP Fps[3] = {callee1, callee2, callee3};
+
+void foo(int N) {
+  int I, J;
+  for (I = 0; I < 3; I++)
+    for (J = 0; J < I * 2 + 1; J++)
+      Fps[I]();
+
+  if (N < 2)
+    return;
+
+  for (I = 0; I < 3; I++)
+    for (J = 0; J < I * 2 + 1; J++)
+      Fps[2 - I]();
+}
+
+/* This function is not profiled */
+void bar(void) {
+  int I;
+  for (I = 0; I < 20; I++)
+    Fps[I % 3]();
+}
+
+int main(int argc, const char *argv[]) {
+  int i;
+  if (argc < 2)
+    return 1;
+
+  const char *FileN = argv[1];
+  __llvm_profile_set_filename(FileN);
+  /* Start profiling. */
+  __llvm_profile_reset_counters();
+  foo(1);
+  /* End profiling by freezing counters and
+   * dump them to the file. */
+  if (__llvm_profile_write_file())
+    return 1;
+
+  /* Read profile data into buffer. */
+  FILE *File = fopen(FileN, "r");
+  if (!File)
+    return 1;
+  fseek(File, 0, SEEK_END);
+  uint64_t Size = ftell(File);
+  fseek(File, 0, SEEK_SET);
+  char *Buffer = (char *)malloc(Size);
+  if (Size != fread(Buffer, 1, Size, File))
+    return 1;
+  fclose(File);
+
+  /* Its profile will be discarded. */
+  for (i = 0; i < 10; i++)
+    bar();
+
+  /* Start profiling again and merge in previously
+     saved counters in buffer. */
+  __llvm_profile_reset_counters();
+  __llvm_profile_merge_from_buffer(Buffer, Size);
+  foo(2);
+  /* End profiling. */
+  truncate(FileN, 0);
+  if (__llvm_profile_write_file())
+    return 1;
+
+  /* Its profile will be discarded. */
+  bar();
+
+  return 0;
+}
+
+// FOO-LABEL:  foo:
+// FOO:    Indirect Target Results:
+// FOO-NEXT:	[ 0, callee3, 10 ]
+// FOO-NEXT:	[ 0, callee2, 6 ]
+// FOO-NEXT:	[ 0, callee1, 2 ]
+// FOO-NEXT:	[ 1, callee1, 5 ]
+// FOO-NEXT:	[ 1, callee2, 3 ]
+// FOO-NEXT:	[ 1, callee3, 1 ]
+
+// BAR-LABEL: bar:
+// BAR:         [ 0, callee1, 0 ]
+// BAR-NEXT:    [ 0, callee2, 0 ]
+// BAR-NEXT:    [ 0, callee3, 0 ]
+
diff --git a/test/profile/Linux/instrprof-set-filename-shared.test b/test/profile/Linux/instrprof-set-filename-shared.test
new file mode 100644
index 0000000..29e6713
--- /dev/null
+++ b/test/profile/Linux/instrprof-set-filename-shared.test
@@ -0,0 +1,8 @@
+# Test that __llvm_profile_set_filename is honored by shared libary too.
+RUN: mkdir -p %t.d
+RUN: %clang_profgen=%t.shared.profraw -fPIC -shared -o %t.d/t.shared %S/../Inputs/instrprof-dlopen-func.c
+RUN: %clang_profgen -DCALL_SHARED -o %t.m -O3 -rpath %t.d %t.d/t.shared %S/../instrprof-set-filename.c 
+RUN: %run %t.m %t.main.profraw
+RUN: llvm-profdata show %t.main.profraw | FileCheck --check-prefix=SHARED %s
+
+# SHARED: Total functions: 2
diff --git a/test/profile/Linux/instrprof-value-prof-warn.test b/test/profile/Linux/instrprof-value-prof-warn.test
new file mode 100644
index 0000000..26502cc
--- /dev/null
+++ b/test/profile/Linux/instrprof-value-prof-warn.test
@@ -0,0 +1,8 @@
+RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -mllvm -vp-static-alloc=true -DSTRESS=1 -o %t.ir.warn  %S/../Inputs/instrprof-value-prof-real.c
+RUN: env LLVM_PROFILE_FILE=%t.ir.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=255  %run %t.ir.warn 2>&1 |FileCheck --check-prefix=WARNING %s
+#  Test that enough static counters have been allocated
+RUN: env LLVM_PROFILE_FILE=%t.ir.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=150  %run %t.ir.warn 2>&1 |FileCheck --check-prefix=NOWARNING --allow-empty %s
+
+# WARNING: LLVM Profile Warning:
+# NOWARNING-NOT: LLVM Profile Warning:
+
diff --git a/test/profile/gcc-flag-compatibility.test b/test/profile/gcc-flag-compatibility.test
index b108761..5b05e76 100644
--- a/test/profile/gcc-flag-compatibility.test
+++ b/test/profile/gcc-flag-compatibility.test
@@ -2,16 +2,16 @@
 RUN: mkdir -p %t.d
 RUN: %clang_profgen_gcc=%t.d/d1/d2 -o %t.d/code %S/Inputs/gcc-flag-compatibility.c
 
-# Test that the instrumented code writes to %t.d/d1/d2/default.profraw
+# Test that the instrumented code writes to %t.d/d1/d2/
 RUN: %run %t.d/code
-RUN: llvm-profdata merge -o %t.profdata %t.d/d1/d2/default.profraw
+RUN: llvm-profdata merge -o %t.profdata %t.d/d1/d2/
 
 # Test that we can override the directory and file name with LLVM_PROFILE_FILE.
 RUN: env LLVM_PROFILE_FILE=%t.d/x1/prof.raw %run %t.d/code
-RUN: llvm-profdata merge -o %t.profdata %t.d/x1/prof.raw
+RUN: llvm-profdata merge -o %t.profdata %t.d/x1/
 
 # Test that we can specify a directory with -fprofile-use.
-RUN: llvm-profdata merge -o %t.d/default.profdata %t.d/x1/prof.raw
+RUN: llvm-profdata merge -o %t.d/default.profdata %t.d/x1/
 RUN: %clang_profuse_gcc=%t.d -o %t.d/code %S/Inputs/gcc-flag-compatibility.c
 
 # Test that we can specify a file with -fprofile-use.
diff --git a/test/profile/instrprof-basic.c b/test/profile/instrprof-basic.c
index 995525b..dd8f3fc 100644
--- a/test/profile/instrprof-basic.c
+++ b/test/profile/instrprof-basic.c
@@ -1,17 +1,56 @@
 // RUN: %clang_profgen -o %t -O3 %s
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
 // RUN: llvm-profdata merge -o %t.profdata %t.profraw
-// RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s
+// RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON --check-prefix=ORIG
+//
+// RUN: rm -fr %t.dir1
+// RUN: mkdir -p %t.dir1
+// RUN: env LLVM_PROFILE_FILE=%t.dir1/profraw_e_%1m %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.dir1/profraw_e_%1m %run %t
+// RUN: llvm-profdata merge -o %t.em.profdata %t.dir1
+// RUN: %clang_profuse=%t.em.profdata -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON --check-prefix=MERGE
+//
+// RUN: rm -fr %t.dir2
+// RUN: mkdir -p %t.dir2
+// RUN: %clang_profgen=%t.dir2/%m.profraw -o %t.merge -O3 %s
+// RUN: %run %t.merge
+// RUN: %run %t.merge
+// RUN: llvm-profdata merge -o %t.m.profdata %t.dir2/
+// RUN: %clang_profuse=%t.m.profdata -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON --check-prefix=MERGE
+//
+// Test that merging is enabled by default with -fprofile-generate=
+// RUN: rm -fr %t.dir3
+// RUN: mkdir -p %t.dir3
+// RUN: %clang_pgogen=%t.dir3/ -o %t.merge3 -O0 %s
+// RUN: %run %t.merge3
+// RUN: %run %t.merge3
+// RUN: %run %t.merge3
+// RUN: %run %t.merge3
+// RUN: llvm-profdata merge -o %t.m3.profdata %t.dir3/
+// RUN: %clang_profuse=%t.m3.profdata -O0 -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON --check-prefix=PGOMERGE
+//
+// Test that merging is enabled by default with -fprofile-generate
+// RUN: rm -fr %t.dir4
+// RUN: mkdir -p %t.dir4
+// RUN: %clang_pgogen -o %t.dir4/merge4 -O0 %s
+// RUN: cd %t.dir4
+// RUN: %run %t.dir4/merge4
+// RUN: %run %t.dir4/merge4
+// RUN: %run %t.dir4/merge4
+// RUN: %run %t.dir4/merge4
+// RUN: rm -f %t.dir4/merge4
+// RUN: llvm-profdata merge -o %t.m4.profdata ./
+// RUN: %clang_profuse=%t.m4.profdata -O0 -o - -S -emit-llvm %s | FileCheck %s --check-prefix=COMMON  --check-prefix=PGOMERGE
 
 int begin(int i) {
-  // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
+  // COMMON: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
   if (i)
     return 0;
   return 1;
 }
 
 int end(int i) {
-  // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD2:[0-9]+]]
+  // COMMON: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD2:[0-9]+]]
   if (i)
     return 0;
   return 1;
@@ -21,11 +60,15 @@
   begin(0);
   end(1);
 
-  // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD2:[0-9]+]]
+  // COMMON: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD2:[0-9]+]]
   if (argc)
     return 0;
   return 1;
 }
 
-// CHECK: ![[PD1]] = !{!"branch_weights", i32 1, i32 2}
-// CHECK: ![[PD2]] = !{!"branch_weights", i32 2, i32 1}
+// ORIG: ![[PD1]] = !{!"branch_weights", i32 1, i32 2}
+// ORIG: ![[PD2]] = !{!"branch_weights", i32 2, i32 1}
+// MERGE: ![[PD1]] = !{!"branch_weights", i32 1, i32 3}
+// MERGE: ![[PD2]] = !{!"branch_weights", i32 3, i32 1}
+// PGOMERGE: ![[PD1]] = !{!"branch_weights", i32 0, i32 4}
+// PGOMERGE: ![[PD2]] = !{!"branch_weights", i32 4, i32 0}
diff --git a/test/profile/instrprof-bufferio.c b/test/profile/instrprof-bufferio.c
index eed548f..5584254 100644
--- a/test/profile/instrprof-bufferio.c
+++ b/test/profile/instrprof-bufferio.c
@@ -11,11 +11,11 @@
 #include <string.h>
 
 typedef struct ProfBufferIO ProfBufferIO;
-ProfBufferIO *llvmCreateBufferIOInternal(FILE *File, uint32_t DefaultBufferSz);
-void llvmDeleteBufferIO(ProfBufferIO *BufferIO);
+ProfBufferIO *lprofCreateBufferIOInternal(void *File, uint32_t BufferSz);
+void lprofDeleteBufferIO(ProfBufferIO *BufferIO);
 
-int llvmBufferIOWrite(ProfBufferIO *BufferIO, const char *Data, uint32_t Size);
-int llvmBufferIOFlush(ProfBufferIO *BufferIO);
+int lprofBufferIOWrite(ProfBufferIO *BufferIO, const char *Data, uint32_t Size);
+int lprofBufferIOFlush(ProfBufferIO *BufferIO);
 
 int __llvm_profile_runtime = 0;
 
@@ -42,34 +42,35 @@
     if (!File[J])
       return 1;
 
-    BufferIO = llvmCreateBufferIOInternal(File[J], IOBufferSize[J]);
+    BufferIO = lprofCreateBufferIOInternal(File[J], IOBufferSize[J]);
 
-    llvmBufferIOWrite(BufferIO, "Short Strings:\n", strlen("Short Strings:\n"));
+    lprofBufferIOWrite(BufferIO, "Short Strings:\n",
+                       strlen("Short Strings:\n"));
     for (I = 0; I < 1024; I++) {
-      llvmBufferIOWrite(BufferIO, SmallData, strlen(SmallData));
+      lprofBufferIOWrite(BufferIO, SmallData, strlen(SmallData));
     }
-    llvmBufferIOWrite(BufferIO, "Long Strings:\n", strlen("Long Strings:\n"));
+    lprofBufferIOWrite(BufferIO, "Long Strings:\n", strlen("Long Strings:\n"));
     for (I = 0; I < 1024; I++) {
-      llvmBufferIOWrite(BufferIO, MediumData, strlen(MediumData));
+      lprofBufferIOWrite(BufferIO, MediumData, strlen(MediumData));
     }
-    llvmBufferIOWrite(BufferIO, "Extra Long Strings:\n",
+    lprofBufferIOWrite(BufferIO, "Extra Long Strings:\n",
                       strlen("Extra Long Strings:\n"));
     for (I = 0; I < 10; I++) {
-      llvmBufferIOWrite(BufferIO, LargeData, strlen(LargeData));
+      lprofBufferIOWrite(BufferIO, LargeData, strlen(LargeData));
     }
-    llvmBufferIOWrite(BufferIO, "Mixed Strings:\n", strlen("Mixed Strings:\n"));
+    lprofBufferIOWrite(BufferIO, "Mixed Strings:\n", strlen("Mixed Strings:\n"));
     for (I = 0; I < 1024; I++) {
-      llvmBufferIOWrite(BufferIO, MediumData, strlen(MediumData));
-      llvmBufferIOWrite(BufferIO, SmallData, strlen(SmallData));
+      lprofBufferIOWrite(BufferIO, MediumData, strlen(MediumData));
+      lprofBufferIOWrite(BufferIO, SmallData, strlen(SmallData));
     }
-    llvmBufferIOWrite(BufferIO, "Endings:\n", strlen("Endings:\n"));
-    llvmBufferIOWrite(BufferIO, "END\n", strlen("END\n"));
-    llvmBufferIOWrite(BufferIO, "ENDEND\n", strlen("ENDEND\n"));
-    llvmBufferIOWrite(BufferIO, "ENDENDEND\n", strlen("ENDENDEND\n"));
-    llvmBufferIOWrite(BufferIO, "ENDENDENDEND\n", strlen("ENDENDENDEND\n"));
-    llvmBufferIOFlush(BufferIO);
+    lprofBufferIOWrite(BufferIO, "Endings:\n", strlen("Endings:\n"));
+    lprofBufferIOWrite(BufferIO, "END\n", strlen("END\n"));
+    lprofBufferIOWrite(BufferIO, "ENDEND\n", strlen("ENDEND\n"));
+    lprofBufferIOWrite(BufferIO, "ENDENDEND\n", strlen("ENDENDEND\n"));
+    lprofBufferIOWrite(BufferIO, "ENDENDENDEND\n", strlen("ENDENDENDEND\n"));
+    lprofBufferIOFlush(BufferIO);
 
-    llvmDeleteBufferIO(BufferIO);
+    lprofDeleteBufferIO(BufferIO);
 
     fclose(File[J]);
   }
diff --git a/test/profile/instrprof-dump.c b/test/profile/instrprof-dump.c
new file mode 100644
index 0000000..93c3c46
--- /dev/null
+++ b/test/profile/instrprof-dump.c
@@ -0,0 +1,62 @@
+/*
+RUN: rm -fr %t.profdir
+RUN: %clang_profgen=%t.profdir/default_%m.profraw -o %t -O2 %s
+RUN: %run %t  2>&1 | FileCheck %s --check-prefix=NO_EXIT_WRITE
+RUN: llvm-profdata merge -o %t.profdata %t.profdir
+RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s  --check-prefix=PROF
+
+NO_EXIT_WRITE: Profile data not written to file: already written
+*/
+
+int __llvm_profile_dump(void);
+void __llvm_profile_reset_counters(void);
+int foo(int);
+int bar(int);
+int skip(int);
+
+int main(int argc, const char *argv[]) {
+  int Ret = foo(0); /* region 1 */
+  __llvm_profile_dump();
+
+  /* not profiled -- cleared later. */
+  skip(0);   /* skipped region */
+  
+  __llvm_profile_reset_counters();
+  Ret += bar(0);  /* region 2 */
+  __llvm_profile_dump();
+
+  skip(1);
+
+  __llvm_profile_reset_counters();
+  /* foo's profile will be merged.  */
+  foo(1);  /* region 3 */
+  __llvm_profile_dump();
+
+  return Ret;
+}
+
+__attribute__((noinline)) int foo(int X) {
+  /* PROF: define {{.*}} @foo({{.*}}!prof ![[ENT:[0-9]+]]
+     PROF: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
+  */
+  return X <= 0 ? -X : X;
+}
+
+__attribute__((noinline)) int skip(int X) {
+  /* PROF: define {{.*}} @skip(
+     PROF: br i1 %{{.*}}, label %{{.*}}, label %{{[^,]+$}}
+  */
+  return X <= 0 ? -X : X;
+}
+
+__attribute__((noinline)) int bar(int X) {
+  /* PROF-LABEL: define {{.*}} @bar(
+     PROF: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD2:[0-9]+]]
+  */
+  return X <= 0 ? -X : X;
+}
+
+/*
+PROF: ![[ENT]] = !{!"function_entry_count", i64 2}  
+PROF: ![[PD1]] = !{!"branch_weights", i32 2, i32 2}
+*/
diff --git a/test/profile/instrprof-error.c b/test/profile/instrprof-error.c
index bba9d0a..3297c9d 100644
--- a/test/profile/instrprof-error.c
+++ b/test/profile/instrprof-error.c
@@ -1,9 +1,9 @@
 // RUN: %clang_profgen -o %t -O3 %s
-// RUN: env LLVM_PROFILE_FILE=%t/ LLVM_PROFILE_VERBOSE_ERRORS=1 %run %t 1 2>&1 | FileCheck %s
+// RUN: env LLVM_PROFILE_FILE=%t/  %run %t 1 2>&1 | FileCheck %s
 
 int main(int argc, const char *argv[]) {
   if (argc < 2)
     return 1;
   return 0;
 }
-// CHECK: LLVM Profile: Failed to write file 
+// CHECK: LLVM Profile Error: Failed to write file 
diff --git a/test/profile/instrprof-hostname.c b/test/profile/instrprof-hostname.c
new file mode 100644
index 0000000..b77cf8d
--- /dev/null
+++ b/test/profile/instrprof-hostname.c
@@ -0,0 +1,14 @@
+// RUN: %clang_profgen -o %t -O3 %s
+// RUN: env LLVM_PROFILE_FILE=%h.%t-%h.profraw_%h %run %t
+// RUN: %run uname -n > %t.n
+// RUN: llvm-profdata merge -o %t.profdata `cat %t.n`.%t-`cat %t.n`.profraw_`cat %t.n`
+// RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s
+// REQUIRES: shell
+
+int main(int argc, const char *argv[]) {
+  // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
+  if (argc > 2)
+    return 1;
+  return 0;
+}
+// CHECK: ![[PD1]] = !{!"branch_weights", i32 1, i32 2}
diff --git a/test/profile/instrprof-icall-promo.test b/test/profile/instrprof-icall-promo.test
new file mode 100644
index 0000000..d9b16f6
--- /dev/null
+++ b/test/profile/instrprof-icall-promo.test
@@ -0,0 +1,17 @@
+# IR based instrumentation
+RUN: %clangxx_pgogen -O2  -c -o %t.1.o  %S/Inputs/instrprof-icall-promo_1.cc 
+RUN: %clangxx_pgogen -O2 -c -o %t.2.o  %S/Inputs/instrprof-icall-promo_2.cc 
+
+RUN: %clangxx_pgogen -O2 %t.2.o %t.1.o -o %t.gen.1
+RUN: env LLVM_PROFILE_FILE=%t-icall.profraw %run %t.gen.1
+RUN: llvm-profdata merge -o %t-icall.profdata %t-icall.profraw
+RUN: %clangxx_profuse=%t-icall.profdata -O2 -Rpass=pgo-icall-prom  -c -o %t.2.use.o  %S/Inputs/instrprof-icall-promo_2.cc  2>&1 | FileCheck %s
+
+RUN: %clangxx_pgogen -O2 %t.1.o %t.2.o -o %t.gen.2
+RUN: env LLVM_PROFILE_FILE=%t-icall2.profraw %run %t.gen.2
+RUN: llvm-profdata merge -o %t-icall2.profdata %t-icall2.profraw
+RUN: %clangxx_profuse=%t-icall2.profdata -O2 -Rpass=pgo-icall-prom  -c -o %t.2.use.o  %S/Inputs/instrprof-icall-promo_2.cc  2>&1 | FileCheck %s
+
+
+# CHECK: Promote indirect call to
+
diff --git a/test/profile/instrprof-merge-match.test b/test/profile/instrprof-merge-match.test
new file mode 100644
index 0000000..8345620
--- /dev/null
+++ b/test/profile/instrprof-merge-match.test
@@ -0,0 +1,5 @@
+// RUN: mkdir -p %t.d
+// RUN: %clang_profgen  -o %t.d/libt.so -fPIC -shared %S/Inputs/instrprof-merge-match-lib.c
+// RUN: %clang_profgen  -o %t -L %t.d -rpath %t.d  %S/Inputs/instrprof-merge-match.c -lt
+// RUN: %run %t
+
diff --git a/test/profile/instrprof-merge.c b/test/profile/instrprof-merge.c
new file mode 100644
index 0000000..ef24c83
--- /dev/null
+++ b/test/profile/instrprof-merge.c
@@ -0,0 +1,96 @@
+// RUN: %clang_profgen -O2 -o %t %s
+// RUN: %run %t %t.profraw 1 1
+// RUN: llvm-profdata show --all-functions --counts %t.profraw  | FileCheck %s
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int __llvm_profile_runtime = 0;
+uint64_t __llvm_profile_get_size_for_buffer(void);
+int __llvm_profile_write_buffer(char *);
+void __llvm_profile_reset_counters(void);
+void __llvm_profile_merge_from_buffer(const char *, uint64_t);
+
+int dumpBuffer(const char *FileN, const char *Buffer, uint64_t Size) {
+  FILE *File = fopen(FileN, "w");
+  if (!File)
+    return 1;
+  if (fwrite(Buffer, 1, Size, File) != Size)
+    return 1;
+  return fclose(File);
+}
+
+int g = 0;
+void foo(char c) {
+  if (c == '1')
+    g++;
+  else
+    g--;
+}
+
+/* This function is not profiled */
+void bar(int M) { g += M; }
+
+int main(int argc, const char *argv[]) {
+  int i;
+  if (argc < 4)
+    return 1;
+
+  const uint64_t MaxSize = 10000;
+  static char Buffer[MaxSize];
+
+  uint64_t Size = __llvm_profile_get_size_for_buffer();
+  if (Size > MaxSize)
+    return 1;
+
+  /* Start profiling. */
+  __llvm_profile_reset_counters();
+  foo(argv[2][0]);
+  /* End profiling by freezing counters. */
+  if (__llvm_profile_write_buffer(Buffer))
+    return 1;
+
+  /* Its profile will be discarded. */
+  for (i = 0; i < 10; i++)
+    bar(1);
+
+  /* Start profiling again and merge in previously
+     saved counters in buffer. */
+  __llvm_profile_reset_counters();
+  __llvm_profile_merge_from_buffer(Buffer, Size);
+  foo(argv[3][0]);
+  /* End profiling */
+  if (__llvm_profile_write_buffer(Buffer))
+    return 1;
+
+  /* Its profile will be discarded. */
+  bar(2);
+
+  /* Now it is time to dump the profile to file.  */
+  return dumpBuffer(argv[1], Buffer, Size);
+}
+
+// Not profiled
+// CHECK-LABEL: dumpBuffer:
+// CHECK:        Counters: 3
+// CHECK-NEXT:   Function count: 0
+// CHECK-NEXT:   Block counts: [0, 0]
+
+// Profiled with entry count == 2
+// CHECK-LABEL:  foo:
+// CHECK:         Counters: 2
+// CHECK-NEXT:    Function count: 2
+// CHECK-NEXT:    Block counts: [2]
+
+// Not profiled
+// CHECK-LABEL:  bar:
+// CHECK:         Counters: 1
+// CHECK-NEXT     Function count: 0
+// CHECK-NEXT     Block counts: []
+
+// Not profiled
+// CHECK-LABEL:  main:
+// CHECK:         Counters: 6
+// CHECK-NEXT:    Function count: 0
+// CHECK-NEXT:    Block counts: [0, 0, 0, 0, 0]
diff --git a/test/profile/instrprof-override-filename-then-reset-default.c b/test/profile/instrprof-override-filename-then-reset-default.c
index 137a3b2..3438227 100644
--- a/test/profile/instrprof-override-filename-then-reset-default.c
+++ b/test/profile/instrprof-override-filename-then-reset-default.c
@@ -7,13 +7,13 @@
 // RUN: %clang_profuse=%t.d/default.profdata -o - -S -emit-llvm %s | FileCheck %s
 
 
-void __llvm_profile_override_default_filename(const char *);
+void __llvm_profile_set_filename(const char *);
 int main(int argc, const char *argv[]) {
   // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
   if (argc < 2)
     return 1;
-  __llvm_profile_override_default_filename(argv[1]);
-  __llvm_profile_override_default_filename(0);
+  __llvm_profile_set_filename(argv[1]);
+  __llvm_profile_set_filename(0);
   return 0;
 }
 // CHECK: ![[PD1]] = !{!"branch_weights", i32 1, i32 2}
diff --git a/test/profile/instrprof-override-filename-with-env.c b/test/profile/instrprof-override-filename-with-env.c
index cce8389..3f4e5c8 100644
--- a/test/profile/instrprof-override-filename-with-env.c
+++ b/test/profile/instrprof-override-filename-with-env.c
@@ -1,14 +1,14 @@
-// RUN: %clang_profgen -o %t -O3 %s
+// RUN: %clang_profgen=%t.bad.profraw -o %t -O3 %s
 // RUN: env LLVM_PROFILE_FILE=%t.good.profraw %run %t %t.bad.profraw
 // RUN: llvm-profdata merge -o %t.profdata %t.good.profraw
 // RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s
 
-void __llvm_profile_override_default_filename(const char *);
+void bar () {}
 int main(int argc, const char *argv[]) {
   // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
   if (argc < 2)
     return 1;
-  __llvm_profile_override_default_filename(argv[1]);
+  bar();
   return 0;
 }
 // CHECK: ![[PD1]] = !{!"branch_weights", i32 1, i32 2}
diff --git a/test/profile/instrprof-override-filename.c b/test/profile/instrprof-override-filename.c
index 59dea29..a67c707 100644
--- a/test/profile/instrprof-override-filename.c
+++ b/test/profile/instrprof-override-filename.c
@@ -1,14 +1,14 @@
-// RUN: %clang_profgen -o %t -O3 %s
+// RUN: %clang_profgen=%t.profraw -o %t -O3 %s
 // RUN: %run %t %t.profraw
 // RUN: llvm-profdata merge -o %t.profdata %t.profraw
 // RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s
 
-void __llvm_profile_override_default_filename(const char *);
+void bar() {}
 int main(int argc, const char *argv[]) {
   // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
   if (argc < 2)
     return 1;
-  __llvm_profile_override_default_filename(argv[1]);
+  bar();
   return 0;
 }
 // CHECK: ![[PD1]] = !{!"branch_weights", i32 1, i32 2}
diff --git a/test/profile/instrprof-path.c b/test/profile/instrprof-path.c
new file mode 100644
index 0000000..28ee8ad
--- /dev/null
+++ b/test/profile/instrprof-path.c
@@ -0,0 +1,39 @@
+// RUN: %clang_pgogen -O2 -o %t.0 %s
+// RUN: %clang_pgogen=%t.d1 -O2 -o %t.1 %s
+// RUN: %clang_pgogen=%t.d1/%t.d2 -O2 -o %t.2 %s
+//
+// RUN: %run %t.0  ""
+// RUN: env LLVM_PROFILE_FILE=%t.d1/default.profraw %run %t.0  %t.d1/
+// RUN: env LLVM_PROFILE_FILE=%t.d1/%t.d2/default.profraw %run %t.0 %t.d1/%t.d2/
+// RUN: %run %t.1 %t.d1/
+// RUN: %run %t.2 %t.d1/%t.d2/
+// RUN: %run %t.2 %t.d1/%t.d2/ %t.d1/%t.d2/%t.d3/blah.profraw %t.d1/%t.d2/%t.d3/
+
+#include <string.h>
+
+const char *__llvm_profile_get_path_prefix();
+void __llvm_profile_set_filanem(const char*);
+
+int main(int argc, const char *argv[]) {
+  int i;
+  const char *expected;
+  const char *prefix;
+  if (argc < 2)
+    return 1;
+
+  expected = argv[1];
+  prefix = __llvm_profile_get_path_prefix();
+
+  if (strcmp(prefix, expected))
+    return 1;
+
+  if (argc == 4) {
+    __llvm_profile_set_filename(argv[2]);
+    prefix = __llvm_profile_get_path_prefix();
+    expected = argv[3];
+    if (strcmp(prefix, expected))
+      return 1;
+  }
+
+  return 0;
+}
diff --git a/test/profile/instrprof-set-filename.c b/test/profile/instrprof-set-filename.c
index 51aa423..7635360 100644
--- a/test/profile/instrprof-set-filename.c
+++ b/test/profile/instrprof-set-filename.c
@@ -1,14 +1,57 @@
+// 1. Test that __llvm_profile_set_filename has higher precedence than
+//    the default path.
 // RUN: %clang_profgen -o %t -O3 %s
 // RUN: %run %t %t.profraw
 // RUN: llvm-profdata merge -o %t.profdata %t.profraw
 // RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s
+// RUN: rm %t.profraw
+// RUN: rm %t.profdata
+// 2. Test that __llvm_profile_set_filename has higher precedence than
+//    environment variable
+// RUN: env LLVM_PROFILE_FILE=%t.env.profraw %run %t %t.profraw
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s
+// RUN: rm %t.profraw
+// RUN: rm %t.profdata
+// 3. Test that __llvm_profile_set_filename has higher precedence than
+//    the command line.
+// RUN: %clang_profgen=%t.cmd.profraw -o %t.cmd -O3 %s
+// RUN: %run %t.cmd %t.profraw
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s
+// RUN: rm %t.profraw
+// RUN: rm %t.profdata
+// 4. Test that command line has high precedence than the default path
+// RUN: %clang_profgen=%t.cmd.profraw -DNO_API -o %t.cmd -O3 %s
+// RUN: %run %t.cmd %t.profraw
+// RUN: llvm-profdata merge -o %t.cmd.profdata %t.cmd.profraw
+// RUN: %clang_profuse=%t.cmd.profdata -o - -S -emit-llvm %s | FileCheck %s
+// RUN: rm %t.cmd.profraw
+// RUN: rm %t.cmd.profdata
+// 5. Test that the environment variable has higher precedence than
+//    the command line.
+// RUN: env LLVM_PROFILE_FILE=%t.env.profraw %run %t.cmd %t.profraw
+// RUN: llvm-profdata merge -o %t.env.profdata %t.env.profraw
+// RUN: %clang_profuse=%t.env.profdata -o - -S -emit-llvm %s | FileCheck %s
+// RUN: rm %t.env.profraw
+// RUN: rm %t.env.profdata
 
+#ifdef CALL_SHARED
+extern void func(int);
+#endif
 void __llvm_profile_set_filename(const char *);
 int main(int argc, const char *argv[]) {
   // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
   if (argc < 2)
     return 1;
+#ifndef NO_API
   __llvm_profile_set_filename(argv[1]);
+#endif
+
+#ifdef CALL_SHARED
+  func(1);
+#endif
   return 0;
 }
 // CHECK: ![[PD1]] = !{!"branch_weights", i32 1, i32 2}
+// SHARED: Total functions: 2
diff --git a/test/profile/instrprof-value-prof-2.c b/test/profile/instrprof-value-prof-2.c
index 989353e..a5939fe 100644
--- a/test/profile/instrprof-value-prof-2.c
+++ b/test/profile/instrprof-value-prof-2.c
@@ -1,7 +1,13 @@
 // RUN: %clang_profgen -O2 -o %t %s
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
 // RUN: llvm-profdata merge -o %t.profdata %t.profraw
-// RUN: llvm-profdata show --all-functions -ic-targets  %t.profdata |  FileCheck  %s 
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.profdata > %t.out
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-1 < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-2 < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-3 < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-4 < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-5 < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-6 < %t.out
 
 #include <stdint.h>
 #include <stdio.h>
@@ -27,10 +33,19 @@
 void caller_without_value_site2() {}
 void caller_with_vp2() {}
 
+void (*callee1Ptr)();
+void (*callee2Ptr)();
+
+void __attribute__ ((noinline)) setFunctionPointers () {
+  callee1Ptr = callee1;
+  callee2Ptr = callee2;
+}
+
 int main(int argc, const char *argv[]) {
   unsigned S, NS = 10, V;
   const __llvm_profile_data *Data, *DataEnd;
 
+  setFunctionPointers();
   Data = __llvm_profile_begin_data();
   DataEnd = __llvm_profile_end_data();
   for (; Data < DataEnd; Data = __llvm_profile_iterate_data(Data)) {
@@ -49,87 +64,87 @@
     for (S = 0; S < NS; S++) {
       unsigned C;
       for (C = 0; C < S + 1; C++) {
-        __llvm_profile_instrument_target((uint64_t)&callee1, (void *)Data, S);
+        __llvm_profile_instrument_target((uint64_t)callee1Ptr, (void *)Data, S);
         if (C % 2 == 0)
-          __llvm_profile_instrument_target((uint64_t)&callee2, (void *)Data, S);
+          __llvm_profile_instrument_target((uint64_t)callee2Ptr, (void *)Data, S);
       }
     }
   }
 }
 
-// CHECK-LABEL:   caller_with_value_site_never_called2:
-// CHECK-NEXT:    Hash: 0x0000000000000000
-// CHECK-NEXT:    Counters:
-// CHECK-NEXT:    Function count
-// CHECK-NEXT:    Indirect Call Site Count: 10
-// CHECK-NEXT:    Indirect Target Results: 
-// CHECK-LABEL:   caller_with_vp2:
-// CHECK-NEXT:    Hash: 0x0000000000000000
-// CHECK-NEXT:    Counters:
-// CHECK-NEXT:    Function count:
-// CHECK-NEXT:    Indirect Call Site Count: 10
-// CHECK-NEXT:    Indirect Target Results: 
-// CHECK-NEXT:	[ 0, callee1, 1 ]
-// CHECK-NEXT:	[ 0, callee2, 1 ]
-// CHECK-NEXT:	[ 1, callee1, 2 ]
-// CHECK-NEXT:	[ 1, callee2, 1 ]
-// CHECK-NEXT:	[ 2, callee1, 3 ]
-// CHECK-NEXT:	[ 2, callee2, 2 ]
-// CHECK-NEXT:	[ 3, callee1, 4 ]
-// CHECK-NEXT:	[ 3, callee2, 2 ]
-// CHECK-NEXT:	[ 4, callee1, 5 ]
-// CHECK-NEXT:	[ 4, callee2, 3 ]
-// CHECK-NEXT:	[ 5, callee1, 6 ]
-// CHECK-NEXT:	[ 5, callee2, 3 ]
-// CHECK-NEXT:	[ 6, callee1, 7 ]
-// CHECK-NEXT:	[ 6, callee2, 4 ]
-// CHECK-NEXT:	[ 7, callee1, 8 ]
-// CHECK-NEXT:	[ 7, callee2, 4 ]
-// CHECK-NEXT:	[ 8, callee1, 9 ]
-// CHECK-NEXT:	[ 8, callee2, 5 ]
-// CHECK-NEXT:	[ 9, callee1, 10 ]
-// CHECK-NEXT:	[ 9, callee2, 5 ]
-// CHECK-LABEL:   caller_with_vp1:
-// CHECK-NEXT:    Hash: 0x0000000000000000
-// CHECK-NEXT:    Counters:
-// CHECK-NEXT:    Function count
-// CHECK-NEXT:    Indirect Call Site Count: 10
-// CHECK-NEXT:    Indirect Target Results: 
-// CHECK-NEXT:	[ 0, callee1, 1 ]
-// CHECK-NEXT:	[ 0, callee2, 1 ]
-// CHECK-NEXT:	[ 1, callee1, 2 ]
-// CHECK-NEXT:	[ 1, callee2, 1 ]
-// CHECK-NEXT:	[ 2, callee1, 3 ]
-// CHECK-NEXT:	[ 2, callee2, 2 ]
-// CHECK-NEXT:	[ 3, callee1, 4 ]
-// CHECK-NEXT:	[ 3, callee2, 2 ]
-// CHECK-NEXT:	[ 4, callee1, 5 ]
-// CHECK-NEXT:	[ 4, callee2, 3 ]
-// CHECK-NEXT:	[ 5, callee1, 6 ]
-// CHECK-NEXT:	[ 5, callee2, 3 ]
-// CHECK-NEXT:	[ 6, callee1, 7 ]
-// CHECK-NEXT:	[ 6, callee2, 4 ]
-// CHECK-NEXT:	[ 7, callee1, 8 ]
-// CHECK-NEXT:	[ 7, callee2, 4 ]
-// CHECK-NEXT:	[ 8, callee1, 9 ]
-// CHECK-NEXT:	[ 8, callee2, 5 ]
-// CHECK-NEXT:	[ 9, callee1, 10 ]
-// CHECK-NEXT:	[ 9, callee2, 5 ]
-// CHECK-LABEL:   caller_with_value_site_never_called1:
-// CHECK-NEXT:    Hash: 0x0000000000000000
-// CHECK-NEXT:    Counters:
-// CHECK-NEXT:    Function count:
-// CHECK-NEXT:    Indirect Call Site Count: 10
-// CHECK-NEXT:    Indirect Target Results: 
-// CHECK-LABEL:   caller_without_value_site2:
-// CHECK-NEXT:    Hash: 0x0000000000000000
-// CHECK-NEXT:    Counters:
-// CHECK-NEXT:    Function count:
-// CHECK-NEXT:    Indirect Call Site Count: 0
-// CHECK-NEXT:    Indirect Target Results: 
-// CHECK-LABEL:   caller_without_value_site1:
-// CHECK-NEXT:    Hash: 0x0000000000000000
-// CHECK-NEXT:    Counters:
-// CHECK-NEXT:    Function count:
-// CHECK-NEXT:    Indirect Call Site Count: 0
-// CHECK-NEXT:    Indirect Target Results: 
+// CHECK-1-LABEL:   caller_with_value_site_never_called2:
+// CHECK-1-NEXT:    Hash: 0x0000000000000000
+// CHECK-1-NEXT:    Counters:
+// CHECK-1-NEXT:    Function count
+// CHECK-1-NEXT:    Indirect Call Site Count: 10
+// CHECK-1-NEXT:    Indirect Target Results: 
+// CHECK-2-LABEL:   caller_with_vp2:
+// CHECK-2-NEXT:    Hash: 0x0000000000000000
+// CHECK-2-NEXT:    Counters:
+// CHECK-2-NEXT:    Function count:
+// CHECK-2-NEXT:    Indirect Call Site Count: 10
+// CHECK-2-NEXT:    Indirect Target Results: 
+// CHECK-2-NEXT:	[ 0, callee1, 1 ]
+// CHECK-2-NEXT:	[ 0, callee2, 1 ]
+// CHECK-2-NEXT:	[ 1, callee1, 2 ]
+// CHECK-2-NEXT:	[ 1, callee2, 1 ]
+// CHECK-2-NEXT:	[ 2, callee1, 3 ]
+// CHECK-2-NEXT:	[ 2, callee2, 2 ]
+// CHECK-2-NEXT:	[ 3, callee1, 4 ]
+// CHECK-2-NEXT:	[ 3, callee2, 2 ]
+// CHECK-2-NEXT:	[ 4, callee1, 5 ]
+// CHECK-2-NEXT:	[ 4, callee2, 3 ]
+// CHECK-2-NEXT:	[ 5, callee1, 6 ]
+// CHECK-2-NEXT:	[ 5, callee2, 3 ]
+// CHECK-2-NEXT:	[ 6, callee1, 7 ]
+// CHECK-2-NEXT:	[ 6, callee2, 4 ]
+// CHECK-2-NEXT:	[ 7, callee1, 8 ]
+// CHECK-2-NEXT:	[ 7, callee2, 4 ]
+// CHECK-2-NEXT:	[ 8, callee1, 9 ]
+// CHECK-2-NEXT:	[ 8, callee2, 5 ]
+// CHECK-2-NEXT:	[ 9, callee1, 10 ]
+// CHECK-2-NEXT:	[ 9, callee2, 5 ]
+// CHECK-3-LABEL:   caller_with_vp1:
+// CHECK-3-NEXT:    Hash: 0x0000000000000000
+// CHECK-3-NEXT:    Counters:
+// CHECK-3-NEXT:    Function count
+// CHECK-3-NEXT:    Indirect Call Site Count: 10
+// CHECK-3-NEXT:    Indirect Target Results: 
+// CHECK-3-NEXT:	[ 0, callee1, 1 ]
+// CHECK-3-NEXT:	[ 0, callee2, 1 ]
+// CHECK-3-NEXT:	[ 1, callee1, 2 ]
+// CHECK-3-NEXT:	[ 1, callee2, 1 ]
+// CHECK-3-NEXT:	[ 2, callee1, 3 ]
+// CHECK-3-NEXT:	[ 2, callee2, 2 ]
+// CHECK-3-NEXT:	[ 3, callee1, 4 ]
+// CHECK-3-NEXT:	[ 3, callee2, 2 ]
+// CHECK-3-NEXT:	[ 4, callee1, 5 ]
+// CHECK-3-NEXT:	[ 4, callee2, 3 ]
+// CHECK-3-NEXT:	[ 5, callee1, 6 ]
+// CHECK-3-NEXT:	[ 5, callee2, 3 ]
+// CHECK-3-NEXT:	[ 6, callee1, 7 ]
+// CHECK-3-NEXT:	[ 6, callee2, 4 ]
+// CHECK-3-NEXT:	[ 7, callee1, 8 ]
+// CHECK-3-NEXT:	[ 7, callee2, 4 ]
+// CHECK-3-NEXT:	[ 8, callee1, 9 ]
+// CHECK-3-NEXT:	[ 8, callee2, 5 ]
+// CHECK-3-NEXT:	[ 9, callee1, 10 ]
+// CHECK-3-NEXT:	[ 9, callee2, 5 ]
+// CHECK-4-LABEL:   caller_with_value_site_never_called1:
+// CHECK-4-NEXT:    Hash: 0x0000000000000000
+// CHECK-4-NEXT:    Counters:
+// CHECK-4-NEXT:    Function count:
+// CHECK-4-NEXT:    Indirect Call Site Count: 10
+// CHECK-4-NEXT:    Indirect Target Results: 
+// CHECK-5-LABEL:   caller_without_value_site2:
+// CHECK-5-NEXT:    Hash: 0x0000000000000000
+// CHECK-5-NEXT:    Counters:
+// CHECK-5-NEXT:    Function count:
+// CHECK-5-NEXT:    Indirect Call Site Count: 0
+// CHECK-5-NEXT:    Indirect Target Results: 
+// CHECK-6-LABEL:   caller_without_value_site1:
+// CHECK-6-NEXT:    Hash: 0x0000000000000000
+// CHECK-6-NEXT:    Counters:
+// CHECK-6-NEXT:    Function count:
+// CHECK-6-NEXT:    Indirect Call Site Count: 0
+// CHECK-6-NEXT:    Indirect Target Results: 
diff --git a/test/profile/instrprof-value-prof-evict.test b/test/profile/instrprof-value-prof-evict.test
new file mode 100644
index 0000000..8b054fb
--- /dev/null
+++ b/test/profile/instrprof-value-prof-evict.test
@@ -0,0 +1,16 @@
+// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=10 -o %t %S/Inputs/instrprof-value-prof-evict.c
+// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.profdata | FileCheck  %S/Inputs/instrprof-value-prof-evict.c
+
+// IR level instrumentation
+// RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=true  -mllvm -vp-counters-per-site=10 -o %t.ir  %S/Inputs/instrprof-value-prof-evict.c
+// RUN: env LLVM_PROFILE_FILE=%t.ir.profraw %run %t.ir
+// RUN: llvm-profdata merge -o %t.ir.profdata %t.ir.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.profdata | FileCheck  %S/Inputs/instrprof-value-prof-evict.c
+
+// IR level instrumentation, dynamic allocation
+// RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=false -o %t.ir.dyn  %S/Inputs/instrprof-value-prof-evict.c
+// RUN: env LLVM_PROFILE_FILE=%t.ir.dyn.profraw %run %t.ir.dyn
+// RUN: llvm-profdata merge -o %t.ir.dyn.profdata %t.ir.dyn.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.dyn.profdata | FileCheck  %S/Inputs/instrprof-value-prof-evict.c
diff --git a/test/profile/instrprof-value-prof-shared.test b/test/profile/instrprof-value-prof-shared.test
new file mode 100644
index 0000000..a45b0d5
--- /dev/null
+++ b/test/profile/instrprof-value-prof-shared.test
@@ -0,0 +1,52 @@
+// RUN: mkdir -p %t.d 
+// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -fPIC -shared -o %t.d/t.shared -DSHARED_LIB %S/Inputs/instrprof-value-prof-real.c
+// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -o %t -rpath %t.d %t.d/t.shared -DCALL_SHARED %S/Inputs/instrprof-value-prof-real.c
+// RUN: env LLVM_PROFILE_FILE=%t.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=255  %run %t
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c --check-prefix=SHARED
+
+// IR level instrumentation
+// RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -fPIC -shared -o %t.d/t.ir.shared -DSHARED_LIB  %S/Inputs/instrprof-value-prof-real.c
+// RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -rpath %t.d -o %t.ir %t.d/t.ir.shared -DCALL_SHARED  %S/Inputs/instrprof-value-prof-real.c
+// Profile data from shared library will be concatenated to the same raw file.
+// RUN: env LLVM_PROFILE_FILE=%t.ir.profraw  LLVM_VP_MAX_NUM_VALS_PER_SITE=255 %run %t.ir
+// RUN: llvm-profdata merge -o %t.ir.profdata %t.ir.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c
+// RUN: llvm-profdata merge -text  %t.ir.profdata -o %t.ir.proftxt 
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c --check-prefix=SHARED
+// RUN: FileCheck  %S/Inputs/instrprof-value-prof-real.c --check-prefix=IR < %t.ir.proftxt
+
+// Same as above but with profile online merging enabled.
+// RUN: rm -fr %t.prof/
+// RUN: mkdir -p %t.prof/
+// RUN: %clang_pgogen=%t.prof -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -fPIC -shared -o %t.d/t.ir.m.shared -DSHARED_LIB  %S/Inputs/instrprof-value-prof-real.c
+// RUN: %clang_pgogen=%t.prof -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -rpath %t.d -o %t.ir.m %t.d/t.ir.m.shared -DCALL_SHARED  %S/Inputs/instrprof-value-prof-real.c
+// RUN: env LLVM_VP_MAX_NUM_VALS_PER_SITE=255 %run %t.ir.m
+// RUN: llvm-profdata merge -o %t.ir.m.profdata -dump-input-file-list %t.prof/ | count 2
+// RUN: llvm-profdata merge -o %t.ir.m.profdata  %t.prof/
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.m.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c
+// RUN: llvm-profdata merge -text  %t.ir.m.profdata -o %t.ir.m.proftxt 
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.m.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c --check-prefix=SHARED
+// RUN: FileCheck  %S/Inputs/instrprof-value-prof-real.c --check-prefix=IR < %t.ir.m.proftxt
+
+
+// IR level instrumentation: dynamic memory allocation
+// RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=false -mllvm -vp-counters-per-site=256 -fPIC -shared -o %t.d/t.ir.dyn.shared -DSHARED_LIB  %S/Inputs/instrprof-value-prof-real.c
+// RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=false -mllvm -vp-counters-per-site=256 -rpath %t.d -o %t.ir.dyn %t.d/t.ir.dyn.shared -DCALL_SHARED  %S/Inputs/instrprof-value-prof-real.c
+// RUN: env LLVM_PROFILE_FILE=%t.ir.dyn.profraw %run %t.ir.dyn
+// RUN: llvm-profdata merge -o %t.ir.dyn.profdata %t.ir.dyn.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.dyn.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c
+// RUN: llvm-profdata merge -text  %t.ir.dyn.profdata -o %t.ir.dyn.proftxt 
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.dyn.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c --check-prefix=SHARED
+// RUN: FileCheck  %S/Inputs/instrprof-value-prof-real.c --check-prefix=IR < %t.ir.dyn.proftxt
+
+// IR level instrumentation: main program uses static counter, shared library uses dynamic memory alloc.
+// RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=false -mllvm -vp-counters-per-site=256 -fPIC -shared -o %t.d/t.ir.dyn.shared -DSHARED_LIB  %S/Inputs/instrprof-value-prof-real.c
+// RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -rpath %t.d -o %t.ir.mixed %t.d/t.ir.dyn.shared -DCALL_SHARED  %S/Inputs/instrprof-value-prof-real.c
+// RUN: env LLVM_PROFILE_FILE=%t.ir.mixed.profraw  LLVM_VP_MAX_NUM_VALS_PER_SITE=255 %run %t.ir.mixed
+// RUN: llvm-profdata merge -o %t.ir.mixed.profdata %t.ir.mixed.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.mixed.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c
+// RUN: llvm-profdata merge -text  %t.ir.mixed.profdata -o %t.ir.mixed.proftxt 
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.mixed.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c --check-prefix=SHARED
+// RUN: FileCheck  %S/Inputs/instrprof-value-prof-real.c --check-prefix=IR < %t.ir.mixed.proftxt
diff --git a/test/profile/instrprof-value-prof.c b/test/profile/instrprof-value-prof.c
index f09e1ac..3a5bdbd 100644
--- a/test/profile/instrprof-value-prof.c
+++ b/test/profile/instrprof-value-prof.c
@@ -1,6 +1,6 @@
-// RUN: %clang_profgen -O2 -o %t %s
-// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t 1
-// RUN: env LLVM_PROFILE_FILE=%t-2.profraw %run %t
+// RUN: %clang_profgen -mllvm -vp-static-alloc=false  -O2 -o %t %s
+// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t-2.profraw %run %t DO_NOT_INSTRUMENT
 // RUN: llvm-profdata merge -o %t.profdata %t.profraw
 // RUN: llvm-profdata merge -o %t-2.profdata %t-2.profraw
 // RUN: llvm-profdata merge -o %t-merged.profdata %t.profraw %t-2.profdata
@@ -8,11 +8,11 @@
 // RUN: llvm-profdata show --all-functions -ic-targets  %t.profdata | FileCheck  %s
 // RUN: llvm-profdata show --all-functions -ic-targets  %t-merged.profdata | FileCheck  %s
 //
-// RUN: env LLVM_PROFILE_FILE=%t-3.profraw LLVM_VP_BUFFER_SIZE=1 %run %t 1
-// RUN: env LLVM_PROFILE_FILE=%t-4.profraw LLVM_VP_BUFFER_SIZE=8 %run %t 1
-// RUN: env LLVM_PROFILE_FILE=%t-5.profraw LLVM_VP_BUFFER_SIZE=128 %run %t 1
-// RUN: env LLVM_PROFILE_FILE=%t-6.profraw LLVM_VP_BUFFER_SIZE=1024 %run %t 1
-// RUN: env LLVM_PROFILE_FILE=%t-7.profraw LLVM_VP_BUFFER_SIZE=102400 %run %t 1
+// RUN: env LLVM_PROFILE_FILE=%t-3.profraw LLVM_VP_BUFFER_SIZE=1 %run %t
+// RUN: env LLVM_PROFILE_FILE=%t-4.profraw LLVM_VP_BUFFER_SIZE=8 %run %t
+// RUN: env LLVM_PROFILE_FILE=%t-5.profraw LLVM_VP_BUFFER_SIZE=128 %run %t
+// RUN: env LLVM_PROFILE_FILE=%t-6.profraw LLVM_VP_BUFFER_SIZE=1024 %run %t
+// RUN: env LLVM_PROFILE_FILE=%t-7.profraw LLVM_VP_BUFFER_SIZE=102400 %run %t
 // RUN: llvm-profdata merge -o %t-3.profdata %t-3.profraw
 // RUN: llvm-profdata merge -o %t-4.profdata %t-4.profraw
 // RUN: llvm-profdata merge -o %t-5.profdata %t-5.profraw
@@ -80,7 +80,7 @@
   unsigned S, NS = 0, I, V, doInstrument = 1;
   const __llvm_profile_data *Data, *DataEnd;
 
-  if (argc < 2)
+  if (argc >= 2 && !strcmp(argv[1], "DO_NOT_INSTRUMENT"))
     doInstrument = 0;
 
   for (I = 0; I < 128; I++) {
@@ -90,9 +90,12 @@
   qsort(CallerInfos, sizeof(CallerInfos) / sizeof(CallerInfo), sizeof(CallerInfo),
         cmpaddr);
 
-  /* We will synthesis value profile data for 128 callers functions.
-   * The number of * value sites. The number values for each value site
-   * ranges from 0 to 8.  */
+  /* We will synthesis value profile data for 128 callers functions declared.
+   * The number of value sites for each caller function is recorded in
+   * the NS field of the CallerInfo object. For each value site, the number of
+   * callee values is determined by the site index (modulo 8). The frequency
+   * of each callee target synthesized is equal to V + 1, in which V is the
+   * index of the target value for the callsite. */
 
   Data = __llvm_profile_begin_data();
   DataEnd = __llvm_profile_end_data();
diff --git a/test/profile/instrprof-value-prof.test b/test/profile/instrprof-value-prof.test
new file mode 100644
index 0000000..0203899
--- /dev/null
+++ b/test/profile/instrprof-value-prof.test
@@ -0,0 +1,21 @@
+// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -o %t %S/Inputs/instrprof-value-prof-real.c
+// RUN: env LLVM_PROFILE_FILE=%t.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=255 %run %t
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c
+
+// IR level instrumentation
+// RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=256 -o %t.ir  %S/Inputs/instrprof-value-prof-real.c
+// RUN: env LLVM_PROFILE_FILE=%t.ir.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=255 %run %t.ir
+// RUN: llvm-profdata merge -o %t.ir.profdata %t.ir.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c
+// RUN: llvm-profdata merge -text  %t.ir.profdata -o %t.ir.proftxt 
+// RUN: FileCheck  %S/Inputs/instrprof-value-prof-real.c --check-prefix=IR < %t.ir.proftxt
+
+// IR level instrumentation with dynamic memory allocation
+// RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=false -mllvm -vp-counters-per-site=256 -o %t.ir.dyn  %S/Inputs/instrprof-value-prof-real.c
+// RUN: env LLVM_PROFILE_FILE=%t.ir.dyn.profraw %run %t.ir.dyn
+// RUN: llvm-profdata merge -o %t.ir.dyn.profdata %t.ir.dyn.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.dyn.profdata | FileCheck  %S/Inputs/instrprof-value-prof-real.c
+// RUN: llvm-profdata merge -text  %t.ir.dyn.profdata -o %t.ir.dyn.proftxt 
+// RUN: FileCheck  %S/Inputs/instrprof-value-prof-real.c --check-prefix=IR < %t.ir.dyn.proftxt
+
diff --git a/test/profile/instrprof-version-mismatch.c b/test/profile/instrprof-version-mismatch.c
index 633fe9f..81ae521 100644
--- a/test/profile/instrprof-version-mismatch.c
+++ b/test/profile/instrprof-version-mismatch.c
@@ -1,5 +1,5 @@
 // RUN: %clang_profgen -o %t -O3 %s
-// RUN: env LLVM_PROFILE_VERBOSE_ERRORS=1 %run %t 1 2>&1 | FileCheck %s
+// RUN: %run %t 1 2>&1 | FileCheck %s
 
 // override the version variable with a bogus version:
 unsigned long long __llvm_profile_raw_version = 10000;
@@ -8,4 +8,4 @@
     return 1;
   return 0;
 }
-// CHECK: LLVM Profile: runtime and instrumentation version mismatch
+// CHECK: LLVM Profile Error: Runtime and instrumentation version mismatch
diff --git a/test/profile/instrprof-visibility.cpp b/test/profile/instrprof-visibility.cpp
index 08b8865..6fbba9d 100644
--- a/test/profile/instrprof-visibility.cpp
+++ b/test/profile/instrprof-visibility.cpp
@@ -56,34 +56,34 @@
 
 // --- Check coverage for functions in the anonymous namespace.
 // COV-DAG: instrprof-visibility.cpp:_ZN12_GLOBAL__N_14callEv
-// COV-DAG: 1|{{.*}}|void call() {
-// COV-DAG: 1|{{.*}}|  f1();
-// COV-DAG: 1|{{.*}}|#ifndef NO_WEAK
-// COV-DAG:  |{{.*}}|  f2();
-// COV-DAG:  |{{.*}}|#endif
-// COV-DAG: 1|{{.*}}|  f3();
-// COV-DAG: 1|{{.*}}|#ifndef NO_EXTERN
-// COV-DAG:  |{{.*}}|  f4();
-// COV-DAG:  |{{.*}}|#endif
-// COV-DAG: 1|{{.*}}|  f5();
-// COV-DAG: 1|{{.*}}|  f6();
-// COV-DAG: 1|{{.*}}|  f7();
-// COV-DAG: 1|{{.*}}|}
+// COV-DAG: [[CALL:[0-9]+]]|{{ *}}1|void call() {
+// COV-DAG: {{.*}}|{{ *}}1|  f1();
+// COV-DAG: {{.*}}|{{ *}}1|#ifndef NO_WEAK
+// COV-DAG: {{.*}}|{{ *}} |  f2();
+// COV-DAG: {{.*}}|{{ *}} |#endif
+// COV-DAG: {{.*}}|{{ *}}1|  f3();
+// COV-DAG: {{.*}}|{{ *}}1|#ifndef NO_EXTERN
+// COV-DAG: {{.*}}|{{ *}} |  f4();
+// COV-DAG: {{.*}}|{{ *}} |#endif
+// COV-DAG: {{.*}}|{{ *}}1|  f5();
+// COV-DAG: {{.*}}|{{ *}}1|  f6();
+// COV-DAG: {{.*}}|{{ *}}1|  f7();
+// COV-DAG: {{.*}}|{{ *}}1|}
 
 // --- Check coverage for functions in namespace N1.
 // COV-DAG: _ZN2N14callEv
-// COV-DAG: 1|{{.*}}|void call() {
-// COV-DAG: 1|{{.*}}|  f1();
-// COV-DAG: 1|{{.*}}|#ifndef NO_WEAK
-// COV-DAG: 1|{{.*}}|  f2();
-// COV-DAG: 1|{{.*}}|#endif
-// COV-DAG: 1|{{.*}}|  f3();
-// COV-DAG: 1|{{.*}}|#ifndef NO_EXTERN
-// COV-DAG: 1|{{.*}}|  f4();
-// COV-DAG: 1|{{.*}}|#endif
-// COV-DAG: 1|{{.*}}|  f5();
-// COV-DAG: 1|{{.*}}|  f6();
-// COV-DAG: 1|{{.*}}|  f7();
-// COV-DAG: 1|{{.*}}|}
+// COV-DAG: {{ *}}[[CALL]]|{{ *}}1|void call() {
+// COV-DAG: {{.*}}|{{ *}}1|  f1();
+// COV-DAG: {{.*}}|{{ *}}1|#ifndef NO_WEAK
+// COV-DAG: {{.*}}|{{ *}}1|  f2();
+// COV-DAG: {{.*}}|{{ *}}1|#endif
+// COV-DAG: {{.*}}|{{ *}}1|  f3();
+// COV-DAG: {{.*}}|{{ *}}1|#ifndef NO_EXTERN
+// COV-DAG: {{.*}}|{{ *}}1|  f4();
+// COV-DAG: {{.*}}|{{ *}}1|#endif
+// COV-DAG: {{.*}}|{{ *}}1|  f5();
+// COV-DAG: {{.*}}|{{ *}}1|  f6();
+// COV-DAG: {{.*}}|{{ *}}1|  f7();
+// COV-DAG: {{.*}}|{{ *}}1|}
 
 // COV-DAG: instrprof-visibility.cpp
diff --git a/test/profile/instrprof-without-libc.c b/test/profile/instrprof-without-libc.c
index 5290efd..0708833 100644
--- a/test/profile/instrprof-without-libc.c
+++ b/test/profile/instrprof-without-libc.c
@@ -15,6 +15,8 @@
 int __llvm_profile_runtime = 0;
 uint64_t __llvm_profile_get_size_for_buffer(void);
 int __llvm_profile_write_buffer(char *);
+void __llvm_profile_merge_from_buffer(const char *, uint64_t Size);
+
 int write_buffer(uint64_t, const char *);
 int main(int argc, const char *argv[]) {
   // CHECK-LABEL: define {{.*}} @main(
@@ -29,12 +31,14 @@
   if (Size > MaxSize)
     return 1;
   int Write = __llvm_profile_write_buffer(Buffer);
-  if (__llvm_profile_write_buffer(Buffer))
+  if (Write)
     return Write;
 
 #ifdef CHECK_SYMBOLS
   // Don't write it out.  Since we're checking the symbols, we don't have libc
   // available.
+  // Call merge function to make sure it does not bring in libc deps:
+  __llvm_profile_merge_from_buffer(Buffer, Size);
   return 0;
 #else
   // Actually write it out so we can FileCheck the output.
diff --git a/test/profile/lit.cfg b/test/profile/lit.cfg
index 4e654fa..b8968c4 100644
--- a/test/profile/lit.cfg
+++ b/test/profile/lit.cfg
@@ -56,12 +56,21 @@
 config.substitutions.append( ("%clang ", build_invocation(clang_cflags)) )
 config.substitutions.append( ("%clangxx ", build_invocation(clang_cxxflags)) )
 config.substitutions.append( ("%clang_profgen ", build_invocation(clang_cflags) + " -fprofile-instr-generate ") )
-config.substitutions.append( ("%clang_profuse=", build_invocation(clang_cflags) + " -fprofile-instr-use=") )
+config.substitutions.append( ("%clang_profgen=", build_invocation(clang_cflags) + " -fprofile-instr-generate=") )
+config.substitutions.append( ("%clang_pgogen ", build_invocation(clang_cflags) + " -fprofile-generate ") )
+config.substitutions.append( ("%clang_pgogen=", build_invocation(clang_cflags) + " -fprofile-generate=") )
+
 config.substitutions.append( ("%clangxx_profgen ", build_invocation(clang_cxxflags) + " -fprofile-instr-generate ") )
-config.substitutions.append( ("%clangxx_profuse=", build_invocation(clang_cxxflags) + " -fprofile-instr-use=") )
+config.substitutions.append( ("%clangxx_profgen=", build_invocation(clang_cxxflags) + " -fprofile-instr-generate=") )
+config.substitutions.append( ("%clangxx_pgogen ", build_invocation(clang_cxxflags) + " -fprofile-generate ") )
+config.substitutions.append( ("%clangxx_pgogen=", build_invocation(clang_cxxflags) + " -fprofile-generate=") )
+
 config.substitutions.append( ("%clang_profgen_gcc=", build_invocation(clang_cflags) + " -fprofile-generate=") )
 config.substitutions.append( ("%clang_profuse_gcc=", build_invocation(clang_cflags) + " -fprofile-use=") )
 
+config.substitutions.append( ("%clang_profuse=", build_invocation(clang_cflags) + " -fprofile-instr-use=") )
+config.substitutions.append( ("%clangxx_profuse=", build_invocation(clang_cxxflags) + " -fprofile-instr-use=") )
+
 if config.host_os not in ['Darwin', 'FreeBSD', 'Linux']:
   config.unsupported = True
 
diff --git a/test/profile/lit.site.cfg.in b/test/profile/lit.site.cfg.in
index 168caf9..1cb61b5 100644
--- a/test/profile/lit.site.cfg.in
+++ b/test/profile/lit.site.cfg.in
@@ -1,5 +1,4 @@
-## Autogenerated by LLVM/Clang configuration.
-# Do not edit!
+@LIT_SITE_CFG_IN_HEADER@
 
 # Tool-specific config options.
 config.profile_lit_binary_dir = "@PROFILE_LIT_BINARY_DIR@"
diff --git a/test/safestack/CMakeLists.txt b/test/safestack/CMakeLists.txt
index 6f5c2f9..c56e81a 100644
--- a/test/safestack/CMakeLists.txt
+++ b/test/safestack/CMakeLists.txt
@@ -26,4 +26,4 @@
 add_lit_testsuite(check-safestack "Running the SafeStack tests"
   ${CMAKE_CURRENT_BINARY_DIR}
   DEPENDS ${SAFESTACK_TEST_DEPS})
-set_target_properties(check-safestack PROPERTIES FOLDER "SafeStack tests")
+set_target_properties(check-safestack PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/safestack/canary.c b/test/safestack/canary.c
new file mode 100644
index 0000000..c6b81f2
--- /dev/null
+++ b/test/safestack/canary.c
@@ -0,0 +1,37 @@
+// RUN: %clang_safestack -fno-stack-protector -D_FORTIFY_SOURCE=0 -g %s -o %t.nossp
+// RUN: %run %t.nossp 2>&1 | FileCheck --check-prefix=NOSSP %s
+
+// RUN: %clang_safestack -fstack-protector-all -D_FORTIFY_SOURCE=0 -g %s -o %t.ssp
+// RUN: not --crash %run %t.ssp 2>&1 | FileCheck -check-prefix=SSP %s
+
+// Test stack canaries on the unsafe stack.
+
+// REQUIRES: stable-runtime
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+__attribute__((noinline)) void f(unsigned *y) {
+  char x;
+  char *volatile p = &x;
+  char *volatile q = (char *)y;
+  assert(p < q);
+  assert(q - p < 1024); // sanity
+  // This has technically undefined behavior, but we know the actual layout of
+  // the unsafe stack and this should not touch anything important.
+  memset(&x, 0xab, q - p + sizeof(*y));
+}
+
+int main(int argc, char **argv)
+{
+  unsigned y;
+  // NOSSP: main 1
+  // SSP: main 1
+  fprintf(stderr, "main 1\n");
+  f(&y);
+  // NOSSP: main 2
+  // SSP-NOT: main 2
+  fprintf(stderr, "main 2\n");
+  return 0;
+}
diff --git a/test/safestack/lit.site.cfg.in b/test/safestack/lit.site.cfg.in
index cb1e729..6864f39 100644
--- a/test/safestack/lit.site.cfg.in
+++ b/test/safestack/lit.site.cfg.in
@@ -1,5 +1,4 @@
-## Autogenerated by LLVM/Clang configuration.
-# Do not edit!
+@LIT_SITE_CFG_IN_HEADER@
 
 # Load common config for all compiler-rt lit tests.
 lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
diff --git a/test/sanitizer_common/CMakeLists.txt b/test/sanitizer_common/CMakeLists.txt
index 54b9135..218fbf0 100644
--- a/test/sanitizer_common/CMakeLists.txt
+++ b/test/sanitizer_common/CMakeLists.txt
@@ -53,12 +53,10 @@
   list(APPEND SANITIZER_COMMON_TEST_DEPS SanitizerUnitTests)
 endif()
 
-# FIXME: Re-enable on 64-bit Windows.
-if(SANITIZER_COMMON_TESTSUITES AND
-    (NOT OS_NAME MATCHES "Windows" OR CMAKE_SIZEOF_VOID_P EQUAL 4))
+if(SANITIZER_COMMON_TESTSUITES)
   add_lit_testsuite(check-sanitizer "Running sanitizer_common tests"
     ${SANITIZER_COMMON_TESTSUITES}
     DEPENDS ${SANITIZER_COMMON_TEST_DEPS})
   set_target_properties(check-sanitizer PROPERTIES FOLDER
-                        "sanitizer_common tests")
+                        "Compiler-RT Misc")
 endif()
diff --git a/test/sanitizer_common/TestCases/Darwin/abort_on_error.cc b/test/sanitizer_common/TestCases/Darwin/abort_on_error.cc
index dbab525..e73f669 100644
--- a/test/sanitizer_common/TestCases/Darwin/abort_on_error.cc
+++ b/test/sanitizer_common/TestCases/Darwin/abort_on_error.cc
@@ -4,7 +4,7 @@
 // RUN: %clangxx %s -o %t
 
 // Intentionally don't inherit the default options.
-// RUN: %tool_options='' not --crash %run %t 2>&1
+// RUN: env %tool_options='' not --crash %run %t 2>&1
 
 // When we use lit's default options, we shouldn't crash.
 // RUN: not %run %t 2>&1
diff --git a/test/sanitizer_common/TestCases/Linux/abort_on_error.cc b/test/sanitizer_common/TestCases/Linux/abort_on_error.cc
index 7e444c2..a5ef665 100644
--- a/test/sanitizer_common/TestCases/Linux/abort_on_error.cc
+++ b/test/sanitizer_common/TestCases/Linux/abort_on_error.cc
@@ -4,7 +4,7 @@
 // RUN: %clangxx %s -o %t
 
 // Intentionally don't inherit the default options.
-// RUN: %tool_options='' not %run %t 2>&1
+// RUN: env %tool_options='' not %run %t 2>&1
 
 // When we use lit's default options, we shouldn't crash either. On Linux
 // lit doesn't set options anyway.
diff --git a/test/sanitizer_common/TestCases/Posix/decorate_proc_maps.cc b/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cc
similarity index 100%
rename from test/sanitizer_common/TestCases/Posix/decorate_proc_maps.cc
rename to test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cc
diff --git a/test/sanitizer_common/TestCases/Linux/fpe.cc b/test/sanitizer_common/TestCases/Linux/fpe.cc
index b4be500..9a6f808 100644
--- a/test/sanitizer_common/TestCases/Linux/fpe.cc
+++ b/test/sanitizer_common/TestCases/Linux/fpe.cc
@@ -9,7 +9,7 @@
 // XFAIL: tsan
 //
 // FIXME: seems to fail on ARM
-// REQUIRES: x86_64-supported-target
+// REQUIRES: x86_64-target-arch
 #include <assert.h>
 #include <stdio.h>
 #include <sanitizer/asan_interface.h>
diff --git a/test/sanitizer_common/TestCases/Linux/ill.cc b/test/sanitizer_common/TestCases/Linux/ill.cc
index 1edad48..2c69618 100644
--- a/test/sanitizer_common/TestCases/Linux/ill.cc
+++ b/test/sanitizer_common/TestCases/Linux/ill.cc
@@ -9,7 +9,7 @@
 // XFAIL: tsan
 //
 // FIXME: seems to fail on ARM
-// REQUIRES: x86_64-supported-target
+// REQUIRES: x86_64-target-arch
 #include <assert.h>
 #include <stdio.h>
 #include <sanitizer/asan_interface.h>
diff --git a/test/sanitizer_common/TestCases/Linux/open_memstream.cc b/test/sanitizer_common/TestCases/Linux/open_memstream.cc
index 3bce030..cf31f44 100644
--- a/test/sanitizer_common/TestCases/Linux/open_memstream.cc
+++ b/test/sanitizer_common/TestCases/Linux/open_memstream.cc
@@ -1,6 +1,6 @@
 // RUN: %clangxx -m64 -O0 -g -xc++ %s -o %t && %run %t
 // RUN: %clangxx -m64 -O3 -g -xc++ %s -o %t && %run %t
-// REQUIRES: x86_64-supported-target
+// REQUIRES: x86_64-target-arch
 
 #include <assert.h>
 #include <stdio.h>
diff --git a/test/sanitizer_common/TestCases/Linux/ptrace.cc b/test/sanitizer_common/TestCases/Linux/ptrace.cc
index 67b6474..b10aecd 100644
--- a/test/sanitizer_common/TestCases/Linux/ptrace.cc
+++ b/test/sanitizer_common/TestCases/Linux/ptrace.cc
@@ -92,6 +92,26 @@
       printf("%x\n", fpregs.fpsr);
 #endif // (__aarch64__)
 
+#if (__s390__)
+    struct iovec regset_io;
+
+    struct _user_regs_struct regs;
+    regset_io.iov_base = &regs;
+    regset_io.iov_len = sizeof(regs);
+    res = ptrace(PTRACE_GETREGSET, pid, (void*)NT_PRSTATUS, (void*)&regset_io);
+    assert(!res);
+    if (regs.psw.addr)
+      printf("%lx\n", regs.psw.addr);
+
+    struct _user_fpregs_struct fpregs;
+    regset_io.iov_base = &fpregs;
+    regset_io.iov_len = sizeof(fpregs);
+    res = ptrace(PTRACE_GETREGSET, pid, (void*)NT_FPREGSET, (void*)&regset_io);
+    assert(!res);
+    if (fpregs.fpc)
+      printf("%x\n", fpregs.fpc);
+#endif // (__s390__)
+
     siginfo_t siginfo;
     res = ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo);
     assert(!res);
diff --git a/test/sanitizer_common/TestCases/Linux/recv_msg_trunc.cc b/test/sanitizer_common/TestCases/Linux/recv_msg_trunc.cc
new file mode 100644
index 0000000..a806ce0
--- /dev/null
+++ b/test/sanitizer_common/TestCases/Linux/recv_msg_trunc.cc
@@ -0,0 +1,36 @@
+// Test that ASan doesn't raise false alarm when MSG_TRUNC is present.
+//
+// RUN: %clangxx %s -o %t && %run %t 2>&1
+//
+// UNSUPPORTED: android
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/ip.h>
+#include <assert.h>
+
+int main() {
+  int fd_0 = socket(AF_INET, SOCK_DGRAM, 0);
+  int fd_1 = socket(AF_INET, SOCK_DGRAM, 0);
+  struct sockaddr_in sin;
+  socklen_t len = sizeof(sin);
+  char *buf = (char *)malloc(1);
+
+  sin.sin_family = AF_INET;
+  // Choose a random port to bind.
+  sin.sin_port = 0;
+  sin.sin_addr.s_addr = INADDR_ANY;
+
+  assert(bind(fd_1, (struct sockaddr *)&sin, sizeof(sin)) == 0);
+  // Get the address and port binded.
+  assert(getsockname(fd_1, (struct sockaddr *)&sin, &len) == 0);
+  assert(sendto(fd_0, "hello", strlen("hello"), MSG_DONTWAIT,
+                (struct sockaddr *)&sin, sizeof(sin)) != -1);
+  assert(recv(fd_1, buf, 1, MSG_TRUNC) != -1);
+  free(buf);
+
+  return 0;
+}
+
diff --git a/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cc b/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cc
index f17453b..193b33d 100644
--- a/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cc
+++ b/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cc
@@ -2,24 +2,35 @@
 // This test depends on the glibc layout of struct sem_t and checks that we
 // don't leave sem_t::private uninitialized.
 // UNSUPPORTED: android
+#include <features.h>
 #include <assert.h>
 #include <semaphore.h>
 #include <string.h>
+#include <stdint.h>
 
-void my_sem_init(bool priv, int value, unsigned *a, unsigned char *b) {
+// This condition needs to correspond to __HAVE_64B_ATOMICS macro in glibc.
+#if (defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
+     defined(__s390x__) || defined(__sparc64__) || defined(__alpha__) || \
+     defined(__ia64__) || defined(__m68k__)) && __GLIBC_PREREQ(2, 21)
+typedef uint64_t semval_t;
+#else
+typedef unsigned semval_t;
+#endif
+
+void my_sem_init(bool priv, int value, semval_t *a, unsigned char *b) {
   sem_t sem;
   memset(&sem, 0xAB, sizeof(sem));
   sem_init(&sem, priv, value);
 
   char *p = (char *)&sem;
-  memcpy(a, p, sizeof(unsigned));
-  memcpy(b, p + sizeof(unsigned), sizeof(char));
+  memcpy(a, p, sizeof(semval_t));
+  memcpy(b, p + sizeof(semval_t), sizeof(char));
 
   sem_destroy(&sem);
 }
 
 int main() {
-  unsigned a;
+  semval_t a;
   unsigned char b;
 
   my_sem_init(false, 42, &a, &b);
diff --git a/test/sanitizer_common/TestCases/Linux/weak_hook_test.cc b/test/sanitizer_common/TestCases/Linux/weak_hook_test.cc
new file mode 100644
index 0000000..d566764
--- /dev/null
+++ b/test/sanitizer_common/TestCases/Linux/weak_hook_test.cc
@@ -0,0 +1,82 @@
+// Test the weak hooks.
+// RUN: %clangxx %s -o %t
+// RUN: %run %t
+
+// Hooks are not implemented for lsan.
+// XFAIL: lsan
+
+#include <string.h>
+#include <assert.h>
+
+bool seen_memcmp, seen_strncmp, seen_strncasecmp, seen_strcmp, seen_strcasecmp,
+    seen_strstr, seen_strcasestr, seen_memmem;
+
+extern "C" {
+void __sanitizer_weak_hook_memcmp(void *called_pc, const void *s1,
+                                  const void *s2, size_t n, int result) {
+  seen_memcmp = true;
+}
+void __sanitizer_weak_hook_strncmp(void *called_pc, const char *s1,
+                                   const char *s2, size_t n, int result) {
+  seen_strncmp = true;
+}
+void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1,
+                                       const char *s2, size_t n, int result){
+  seen_strncasecmp = true;
+}
+void __sanitizer_weak_hook_strcmp(void *called_pc, const char *s1,
+                                  const char *s2, int result){
+  seen_strcmp = true;
+}
+void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1,
+                                      const char *s2, int result){
+  seen_strcasecmp = true;
+}
+void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1,
+                                  const char *s2, char *result){
+  seen_strstr = true;
+}
+void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1,
+                                      const char *s2, char *result){
+  seen_strcasestr = true;
+}
+void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1,
+                                  const void *s2, size_t len2, void *result){
+  seen_memmem = true;
+}
+} // extern "C"
+
+char s1[] = "ABCDEF";
+char s2[] = "CDE";
+
+static volatile int int_sink;
+static volatile void *ptr_sink;
+
+int main() {
+  assert(sizeof(s2) < sizeof(s1));
+
+  int_sink = memcmp(s1, s2, sizeof(s2));
+  assert(seen_memcmp);
+
+  int_sink = strncmp(s1, s2, sizeof(s2));
+  assert(seen_strncmp);
+
+  int_sink = strncasecmp(s1, s2, sizeof(s2));
+  assert(seen_strncasecmp);
+
+  int_sink = strcmp(s1, s2);
+  assert(seen_strcmp);
+
+  int_sink = strcasecmp(s1, s2);
+  assert(seen_strcasecmp);
+
+  ptr_sink = strstr(s1, s2);
+  assert(seen_strstr);
+
+  ptr_sink = strcasestr(s1, s2);
+  assert(seen_strcasestr);
+
+  ptr_sink = memmem(s1, sizeof(s1), s2, sizeof(s2));
+  assert(seen_memmem);
+  return 0;
+}
diff --git a/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cc b/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cc
new file mode 100644
index 0000000..88d41b6
--- /dev/null
+++ b/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cc
@@ -0,0 +1,40 @@
+// Test dedup_token_length
+// RUN: %clangxx -O0 %s -o %t
+// RUN: env %tool_options='abort_on_error=0'                    not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK0
+// RUN: env %tool_options='abort_on_error=0, dedup_token_length=0' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK0
+// RUN: env %tool_options='abort_on_error=0, dedup_token_length=1' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK1
+// RUN: env %tool_options='abort_on_error=0, dedup_token_length=2' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK2
+// RUN: env %tool_options='abort_on_error=0, dedup_token_length=3' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK3
+
+// REQUIRES: stable-runtime
+// FIXME: implement SEGV handler in other sanitizers, not just asan.
+// XFAIL: msan
+// XFAIL: lsan
+// XFAIL: tsan
+
+volatile int *null = 0;
+
+namespace Xyz {
+  template<class A, class B> void Abc() {
+    *null = 0;
+  }
+}
+
+extern "C" void bar() {
+  Xyz::Abc<int, int>();
+}
+
+void FOO() {
+  bar();
+}
+
+int main(int argc, char **argv) {
+  FOO();
+}
+
+// CHECK0-NOT: DEDUP_TOKEN:
+// CHECK1: DEDUP_TOKEN: void Xyz::Abc<int, int>()
+// CHECK1-NOT: bar
+// CHECK2: DEDUP_TOKEN: void Xyz::Abc<int, int>()--bar
+// CHECK2-NOT: FOO
+// CHECK3: DEDUP_TOKEN: void Xyz::Abc<int, int>()--bar--FOO()
diff --git a/test/sanitizer_common/TestCases/Linux/getpass.cc b/test/sanitizer_common/TestCases/Posix/getpass.cc
similarity index 94%
rename from test/sanitizer_common/TestCases/Linux/getpass.cc
rename to test/sanitizer_common/TestCases/Posix/getpass.cc
index 902c9cb..251f911 100644
--- a/test/sanitizer_common/TestCases/Linux/getpass.cc
+++ b/test/sanitizer_common/TestCases/Posix/getpass.cc
@@ -4,7 +4,11 @@
 #include <stdio.h>
 #include <unistd.h>
 #include <string.h>
+#if __linux__
 #include <pty.h>
+#else
+#include <util.h>
+#endif
 
 int
 main (int argc, char** argv)
diff --git a/test/sanitizer_common/TestCases/Posix/lit.local.cfg b/test/sanitizer_common/TestCases/Posix/lit.local.cfg
index a6d96d3..60a9460 100644
--- a/test/sanitizer_common/TestCases/Posix/lit.local.cfg
+++ b/test/sanitizer_common/TestCases/Posix/lit.local.cfg
@@ -5,5 +5,5 @@
 
 root = getRoot(config)
 
-if root.host_os in ['Windows', 'Darwin']:
+if root.host_os in ['Windows']:
   config.unsupported = True
diff --git a/test/sanitizer_common/TestCases/Linux/sanitizer_set_death_callback_test.cc b/test/sanitizer_common/TestCases/Posix/sanitizer_set_death_callback_test.cc
similarity index 100%
rename from test/sanitizer_common/TestCases/Linux/sanitizer_set_death_callback_test.cc
rename to test/sanitizer_common/TestCases/Posix/sanitizer_set_death_callback_test.cc
diff --git a/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cc b/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cc
new file mode 100644
index 0000000..af7eea1
--- /dev/null
+++ b/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cc
@@ -0,0 +1,37 @@
+// Test __sanitizer_set_report_fd:
+// RUN: %clangxx -O2 %s -o %t
+// RUN: not %run %t 2>&1   | FileCheck %s
+// RUN: not %run %t stdout | FileCheck %s
+// RUN: not %run %t %t-out && FileCheck < %t-out %s
+
+// REQUIRES: stable-runtime
+// FIXME: implement SEGV handler in other sanitizers, not just asan.
+// XFAIL: msan
+// XFAIL: lsan
+// XFAIL: tsan
+
+#include <sanitizer/common_interface_defs.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <assert.h>
+
+volatile int *null = 0;
+
+int main(int argc, char **argv) {
+  if (argc == 2) {
+    if (!strcmp(argv[1], "stdout")) {
+      __sanitizer_set_report_fd(reinterpret_cast<void*>(1));
+    } else {
+      int fd = open(argv[1], O_CREAT | O_WRONLY | O_TRUNC, S_IRWXU);
+      assert(fd > 0);
+      __sanitizer_set_report_fd(reinterpret_cast<void*>(fd));
+    }
+  }
+  *null = 0;
+}
+
+// CHECK: ERROR: {{.*}} SEGV on unknown address
diff --git a/test/sanitizer_common/TestCases/malloc_hook.cc b/test/sanitizer_common/TestCases/malloc_hook.cc
index 9702249..59cd620 100644
--- a/test/sanitizer_common/TestCases/malloc_hook.cc
+++ b/test/sanitizer_common/TestCases/malloc_hook.cc
@@ -10,23 +10,43 @@
 extern "C" {
 const volatile void *global_ptr;
 
+#define WRITE(s) write(1, s, sizeof(s))
+
 // Note: avoid calling functions that allocate memory in malloc/free
 // to avoid infinite recursion.
 void __sanitizer_malloc_hook(const volatile void *ptr, size_t sz) {
-  if (__sanitizer_get_ownership(ptr)) {
-    write(1, "MallocHook\n", sizeof("MallocHook\n"));
+  if (__sanitizer_get_ownership(ptr) && sz == 4) {
+    WRITE("MallocHook\n");
     global_ptr = ptr;
   }
 }
 void __sanitizer_free_hook(const volatile void *ptr) {
   if (__sanitizer_get_ownership(ptr) && ptr == global_ptr)
-    write(1, "FreeHook\n", sizeof("FreeHook\n"));
+    WRITE("FreeHook\n");
 }
 }  // extern "C"
 
+volatile int *x;
+
+void MallocHook1(const volatile void *ptr, size_t sz) { WRITE("MH1\n"); }
+void MallocHook2(const volatile void *ptr, size_t sz) { WRITE("MH2\n"); }
+void FreeHook1(const volatile void *ptr) { WRITE("FH1\n"); }
+void FreeHook2(const volatile void *ptr) { WRITE("FH2\n"); }
+// Call this function with uninitialized arguments to poison
+// TLS shadow for function parameters before calling operator
+// new and, eventually, user-provided hook.
+__attribute__((noinline)) void allocate(int *unused1, int *unused2) {
+  x = new int;
+}
+
 int main() {
-  volatile int *x = new int;
+  __sanitizer_install_malloc_and_free_hooks(MallocHook1, FreeHook1);
+  __sanitizer_install_malloc_and_free_hooks(MallocHook2, FreeHook2);
+  int *undef1, *undef2;
+  allocate(undef1, undef2);
   // CHECK: MallocHook
+  // CHECK: MH1
+  // CHECK: MH2
   // Check that malloc hook was called with correct argument.
   if (global_ptr != (void*)x) {
     _exit(1);
@@ -34,5 +54,7 @@
   *x = 0;
   delete x;
   // CHECK: FreeHook
+  // CHECK: FH1
+  // CHECK: FH2
   return 0;
 }
diff --git a/test/sanitizer_common/TestCases/options-include.cc b/test/sanitizer_common/TestCases/options-include.cc
index 1528b15..5b0b6d5 100644
--- a/test/sanitizer_common/TestCases/options-include.cc
+++ b/test/sanitizer_common/TestCases/options-include.cc
@@ -1,9 +1,10 @@
 // RUN: %clangxx -O0 %s -o %t
 
 // Recursive include: options1 includes options2
-// RUN: echo -e "symbolize=1\ninclude='%t.options2.txt'" >%t.options1.txt
-// RUN: echo -e "help=1\n" >%t.options2.txt
-// RUN: echo -e "help=1\n" >%t.options.options-include.cc.tmp
+// RUN: echo "symbolize=1" > %t.options1.txt
+// RUN: echo "include='%t.options2.txt'" >>%t.options1.txt
+// RUN: echo "help=1" >%t.options2.txt
+// RUN: echo "help=1" >%t.options.options-include.cc.tmp
 // RUN: cat %t.options1.txt
 // RUN: cat %t.options2.txt
 
diff --git a/test/sanitizer_common/TestCases/print-stack-trace.cc b/test/sanitizer_common/TestCases/print-stack-trace.cc
index 9134a88..0055b27 100644
--- a/test/sanitizer_common/TestCases/print-stack-trace.cc
+++ b/test/sanitizer_common/TestCases/print-stack-trace.cc
@@ -14,11 +14,11 @@
   return 0;
 }
 // CHECK: {{    #0 0x.* in __sanitizer_print_stack_trace}}
-// CHECK: {{    #1 0x.* in FooBarBaz(\(\))? .*print-stack-trace.cc:9}}
-// CHECK: {{    #2 0x.* in main.*print-stack-trace.cc:13}}
+// CHECK: {{    #1 0x.* in FooBarBaz(\(\))? .*}}print-stack-trace.cc:[[@LINE-8]]
+// CHECK: {{    #2 0x.* in main.*}}print-stack-trace.cc:[[@LINE-5]]
 
-// CUSTOM: frame:1 lineno:9
-// CUSTOM: frame:2 lineno:13
+// CUSTOM: frame:1 lineno:[[@LINE-11]]
+// CUSTOM: frame:2 lineno:[[@LINE-8]]
 
 // NOINLINE: #0 0x{{.*}} in __sanitizer_print_stack_trace
-// NOINLINE: #1 0x{{.*}} in main{{.*}}print-stack-trace.cc:9
+// NOINLINE: #1 0x{{.*}} in main{{.*}}print-stack-trace.cc:[[@LINE-15]]
diff --git a/test/sanitizer_common/TestCases/strnlen.c b/test/sanitizer_common/TestCases/strnlen.c
new file mode 100644
index 0000000..8ab8ec9
--- /dev/null
+++ b/test/sanitizer_common/TestCases/strnlen.c
@@ -0,0 +1,12 @@
+// RUN: %clang %s -o %t && %run %t 2>&1
+
+#include <assert.h>
+#include <string.h>
+int main(int argc, char **argv) {
+  const char *s = "mytest";
+  assert(strnlen(s, 0) == 0UL);
+  assert(strnlen(s, 1) == 1UL);
+  assert(strnlen(s, 6) == strlen(s));
+  assert(strnlen(s, 7) == strlen(s));
+  return 0;
+}
diff --git a/test/sanitizer_common/Unit/lit.site.cfg.in b/test/sanitizer_common/Unit/lit.site.cfg.in
index 2600585..c62e23c 100644
--- a/test/sanitizer_common/Unit/lit.site.cfg.in
+++ b/test/sanitizer_common/Unit/lit.site.cfg.in
@@ -1,5 +1,4 @@
-## Autogenerated by LLVM/Clang configuration.
-# Do not edit!
+@LIT_SITE_CFG_IN_HEADER@
 
 # Load common config for all compiler-rt unit tests.
 lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/unittests/lit.common.unit.configured")
diff --git a/test/sanitizer_common/lit.common.cfg b/test/sanitizer_common/lit.common.cfg
index 7abbfc2..b32fb1b 100644
--- a/test/sanitizer_common/lit.common.cfg
+++ b/test/sanitizer_common/lit.common.cfg
@@ -3,7 +3,7 @@
 # Setup source root.
 config.test_source_root = os.path.join(os.path.dirname(__file__), "TestCases")
 
-config.name = "SanitizerCommon-" + config.tool_name
+config.name = "SanitizerCommon-" + config.name_suffix
 
 default_tool_options = []
 if config.tool_name == "asan":
@@ -23,6 +23,9 @@
 
 config.available_features.add(config.tool_name)
 
+if config.target_arch not in ['arm', 'armhf', 'aarch64']:
+  config.available_features.add('stable-runtime')
+
 if config.host_os == 'Darwin':
   # On Darwin, we default to `abort_on_error=1`, which would make tests run
   # much slower. Let's override this and run lit tests with 'abort_on_error=0'.
diff --git a/test/sanitizer_common/lit.site.cfg.in b/test/sanitizer_common/lit.site.cfg.in
index 64a3edf..414eaba 100644
--- a/test/sanitizer_common/lit.site.cfg.in
+++ b/test/sanitizer_common/lit.site.cfg.in
@@ -1,11 +1,14 @@
-# Load common config for all compiler-rt lit tests.
-lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+@LIT_SITE_CFG_IN_HEADER@
 
 # Tool-specific config options.
+config.name_suffix = "@CONFIG_NAME@"
 config.tool_name = "@SANITIZER_COMMON_LIT_TEST_MODE@"
 config.target_cflags = "@SANITIZER_COMMON_TEST_TARGET_CFLAGS@"
 config.target_arch = "@SANITIZER_COMMON_TEST_TARGET_ARCH@"
 
+# Load common config for all compiler-rt lit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+
 # Load tool-specific config that would do the real work.
 lit_config.load_config(config, "@SANITIZER_COMMON_LIT_SOURCE_DIR@/lit.common.cfg")
 
diff --git a/test/scudo/CMakeLists.txt b/test/scudo/CMakeLists.txt
new file mode 100644
index 0000000..b6cb2fd
--- /dev/null
+++ b/test/scudo/CMakeLists.txt
@@ -0,0 +1,28 @@
+set(SCUDO_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+set(SCUDO_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
+
+
+set(SCUDO_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
+if(NOT COMPILER_RT_STANDALONE_BUILD)
+  list(APPEND SCUDO_TEST_DEPS scudo)
+endif()
+
+configure_lit_site_cfg(
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+  )
+
+if(CMAKE_SYSTEM_NAME MATCHES "Linux")
+   EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
+   STRING(REGEX REPLACE "^.*(sse4_2).*$" "\\1" SSE_THERE ${CPUINFO})
+   STRING(COMPARE EQUAL "sse4_2" "${SSE_THERE}" SSE42_TRUE)
+endif(CMAKE_SYSTEM_NAME MATCHES "Linux")
+
+if (SSE42_TRUE AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+  add_lit_testsuite(check-scudo
+    "Running the Scudo Hardened Allocator tests"
+    ${CMAKE_CURRENT_BINARY_DIR}
+    DEPENDS ${SCUDO_TEST_DEPS})
+  set_target_properties(check-scudo PROPERTIES FOLDER
+    "Compiler-RT Misc")
+endif(SSE42_TRUE AND CMAKE_SIZEOF_VOID_P EQUAL 8)
diff --git a/test/scudo/alignment.cpp b/test/scudo/alignment.cpp
new file mode 100644
index 0000000..c5e57d1
--- /dev/null
+++ b/test/scudo/alignment.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: not %run %t pointers 2>&1 | FileCheck %s
+
+// Tests that a non-16-byte aligned pointer will trigger the associated error
+// on deallocation.
+
+#include <assert.h>
+#include <malloc.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+  assert(argc == 2);
+  if (!strcmp(argv[1], "pointers")) {
+    void *p = malloc(1U << 16);
+    if (!p)
+      return 1;
+    free(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(p) | 8));
+  }
+  return 0;
+}
+
+// CHECK: ERROR: attempted to deallocate a chunk not properly aligned
diff --git a/test/scudo/double-free.cpp b/test/scudo/double-free.cpp
new file mode 100644
index 0000000..4f5bf0c
--- /dev/null
+++ b/test/scudo/double-free.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: not %run %t malloc   2>&1 | FileCheck %s
+// RUN: not %run %t new      2>&1 | FileCheck %s
+// RUN: not %run %t newarray 2>&1 | FileCheck %s
+// RUN: not %run %t memalign 2>&1 | FileCheck %s
+
+// Tests double-free error on pointers allocated with different allocation
+// functions.
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+  assert(argc == 2);
+  if (!strcmp(argv[1], "malloc")) {
+    void *p = malloc(sizeof(int));
+    if (!p)
+      return 1;
+    free(p);
+    free(p);
+  }
+  if (!strcmp(argv[1], "new")) {
+    int *p = new int;
+    if (!p)
+      return 1;
+    delete p;
+    delete p;
+  }
+  if (!strcmp(argv[1], "newarray")) {
+    int *p = new int[8];
+    if (!p)
+      return 1;
+    delete[] p;
+    delete[] p;
+  }
+  if (!strcmp(argv[1], "memalign")) {
+    void *p = nullptr;
+    posix_memalign(&p, 0x100, sizeof(int));
+    if (!p)
+      return 1;
+    free(p);
+    free(p);
+  }
+  return 0;
+}
+
+// CHECK: ERROR: invalid chunk state when deallocating address
diff --git a/test/scudo/lit.cfg b/test/scudo/lit.cfg
new file mode 100644
index 0000000..e2a4997
--- /dev/null
+++ b/test/scudo/lit.cfg
@@ -0,0 +1,39 @@
+# -*- Python -*-
+
+import os
+
+# Setup config name.
+config.name = 'Scudo'
+
+# Setup source root.
+config.test_source_root = os.path.dirname(__file__)
+
+# Path to the static library
+base_lib = os.path.join(config.compiler_rt_libdir,
+                        "libclang_rt.scudo-%s.a" % config.target_arch)
+whole_archive = "-Wl,-whole-archive %s -Wl,-no-whole-archive " % base_lib
+
+# Test suffixes.
+config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.ll', '.test']
+
+# C flags.
+c_flags = ["-std=c++11",
+           "-lstdc++",
+           "-ldl",
+           "-lrt",
+           "-pthread",
+           "-latomic",
+           "-fPIE",
+           "-pie",
+           "-O0"]
+
+def build_invocation(compile_flags):                                            
+  return " " + " ".join([config.clang] + compile_flags) + " "                   
+
+# Add clang substitutions.
+config.substitutions.append( ("%clang_scudo ",
+                              build_invocation(c_flags) + whole_archive) )
+
+# Hardened Allocator tests are currently supported on Linux only.
+if config.host_os not in ['Linux']:
+   config.unsupported = True
diff --git a/test/scudo/lit.site.cfg.in b/test/scudo/lit.site.cfg.in
new file mode 100644
index 0000000..64e2fb3
--- /dev/null
+++ b/test/scudo/lit.site.cfg.in
@@ -0,0 +1,7 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+# Load common config for all compiler-rt lit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+
+# Load tool-specific config that would do the real work.
+lit_config.load_config(config, "@SCUDO_LIT_SOURCE_DIR@/lit.cfg")
diff --git a/test/scudo/malloc.cpp b/test/scudo/malloc.cpp
new file mode 100644
index 0000000..4507a52
--- /dev/null
+++ b/test/scudo/malloc.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: %run %t 2>&1
+
+// Tests that a regular workflow of allocation, memory fill and free works as
+// intended. Also tests that a zero-sized allocation succeeds.
+
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+  void *p;
+  size_t size = 1U << 8;
+
+  p = malloc(size);
+  if (!p)
+    return 1;
+  memset(p, 'A', size);
+  free(p);
+  p = malloc(0);
+  if (!p)
+    return 1;
+  free(p);
+
+  return 0;
+}
diff --git a/test/scudo/memalign.cpp b/test/scudo/memalign.cpp
new file mode 100644
index 0000000..951d1aa
--- /dev/null
+++ b/test/scudo/memalign.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_scudo %s -o %t
+// RUN:     %run %t valid   2>&1
+// RUN: not %run %t invalid 2>&1 | FileCheck %s
+
+// Tests that the various aligned allocation functions work as intended. Also
+// tests for the condition where the alignment is not a power of 2.
+
+#include <assert.h>
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+// Sometimes the headers may not have this...
+extern "C" void *aligned_alloc (size_t alignment, size_t size);
+
+int main(int argc, char **argv)
+{
+  void *p;
+  size_t alignment = 1U << 12;
+  size_t size = alignment;
+
+  assert(argc == 2);
+  if (!strcmp(argv[1], "valid")) {
+    p = memalign(alignment, size);
+    if (!p)
+      return 1;
+    free(p);
+    p = nullptr;
+    posix_memalign(&p, alignment, size);
+    if (!p)
+      return 1;
+    free(p);
+    p = aligned_alloc(alignment, size);
+    if (!p)
+      return 1;
+    free(p);
+  }
+  if (!strcmp(argv[1], "invalid")) {
+    p = memalign(alignment - 1, size);
+    free(p);
+  }
+  return 0;
+}
+
+// CHECK: ERROR: malloc alignment is not a power of 2
diff --git a/test/scudo/mismatch.cpp b/test/scudo/mismatch.cpp
new file mode 100644
index 0000000..2d3d198
--- /dev/null
+++ b/test/scudo/mismatch.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=1 not %run %t mallocdel   2>&1 | FileCheck %s
+// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=0     %run %t mallocdel   2>&1
+// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=1 not %run %t newfree     2>&1 | FileCheck %s
+// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=0     %run %t newfree     2>&1
+// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=1 not %run %t memaligndel 2>&1 | FileCheck %s
+// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=0     %run %t memaligndel 2>&1
+
+// Tests that type mismatches between allocation and deallocation functions are
+// caught when the related option is set.
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+
+int main(int argc, char **argv)
+{
+  assert(argc == 2);
+  if (!strcmp(argv[1], "mallocdel")) {
+    int *p = (int *)malloc(16);
+    if (!p)
+      return 1;
+    delete p;
+  }
+  if (!strcmp(argv[1], "newfree")) {
+    int *p = new int;
+    if (!p)
+      return 1;
+    free((void *)p);
+  }
+  if (!strcmp(argv[1], "memaligndel")) {
+    int *p = (int *)memalign(0x10, 0x10);
+    if (!p)
+      return 1;
+    delete p;
+  }
+  return 0;
+}
+
+// CHECK: ERROR: allocation type mismatch on address
diff --git a/test/scudo/options.cpp b/test/scudo/options.cpp
new file mode 100644
index 0000000..bccf7c8
--- /dev/null
+++ b/test/scudo/options.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_scudo %s -o %t
+// RUN:                                              %run %t 2>&1
+// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=0     %run %t 2>&1
+// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=1 not %run %t 2>&1 | FileCheck %s
+
+// Tests that the options can be passed using getScudoDefaultOptions, and that
+// the environment ones take precedence over them.
+
+#include <stdlib.h>
+#include <malloc.h>
+
+extern "C" const char* __scudo_default_options() {
+  return "DeallocationTypeMismatch=0";  // Defaults to true in scudo_flags.inc.
+}
+
+int main(int argc, char **argv)
+{
+  int *p = (int *)malloc(16);
+  if (!p)
+    return 1;
+  delete p;
+  return 0;
+}
+
+// CHECK: ERROR: allocation type mismatch on address
diff --git a/test/scudo/overflow.cpp b/test/scudo/overflow.cpp
new file mode 100644
index 0000000..5b2cb75
--- /dev/null
+++ b/test/scudo/overflow.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_scudo %s -o %t
+// RUN:                                  not %run %t malloc     2>&1 | FileCheck %s
+// RUN: SCUDO_OPTIONS=QuarantineSizeMb=1 not %run %t quarantine 2>&1 | FileCheck %s
+
+// Tests that header corruption of an allocated or quarantined chunk is caught.
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+  assert(argc == 2);
+  if (!strcmp(argv[1], "malloc")) {
+    // Simulate a header corruption of an allocated chunk (1-bit)
+    void *p = malloc(1U << 4);
+    if (!p)
+      return 1;
+    ((char *)p)[-1] ^= 1;
+    free(p);
+  }
+  if (!strcmp(argv[1], "quarantine")) {
+    void *p = malloc(1U << 4);
+    if (!p)
+      return 1;
+    free(p);
+    // Simulate a header corruption of a quarantined chunk
+    ((char *)p)[-2] ^= 1;
+    // Trigger the quarantine recycle
+    for (int i = 0; i < 0x100; i++) {
+      p = malloc(1U << 16);
+      free(p);
+    }
+  }
+  return 0;
+}
+
+// CHECK: ERROR: corrupted chunk header at address
diff --git a/test/scudo/preinit.cpp b/test/scudo/preinit.cpp
new file mode 100644
index 0000000..a280ae1
--- /dev/null
+++ b/test/scudo/preinit.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: %run %t 2>&1
+
+// Verifies that calling malloc in a preinit_array function succeeds, and that
+// the resulting pointer can be freed at program termination.
+
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+static void *global_p = nullptr;
+
+void __init(void) {
+  global_p = malloc(1);
+  if (!global_p)
+    exit(1);
+}
+
+void __fini(void) {
+  if (global_p)
+    free(global_p);
+}
+
+int main(int argc, char **argv)
+{
+  void *p = malloc(1);
+  if (!p)
+    return 1;
+  free(p);
+
+  return 0;
+}
+
+__attribute__((section(".preinit_array"), used))
+  void (*__local_preinit)(void) = __init;
+__attribute__((section(".fini_array"), used))
+  void (*__local_fini)(void) = __fini;
+
diff --git a/test/scudo/quarantine.cpp b/test/scudo/quarantine.cpp
new file mode 100644
index 0000000..4ce0197
--- /dev/null
+++ b/test/scudo/quarantine.cpp
@@ -0,0 +1,43 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: SCUDO_OPTIONS=QuarantineSizeMb=1 %run %t 2>&1
+
+// Tests that the quarantine prevents a chunk from being reused right away.
+// Also tests that a chunk will eventually become available again for
+// allocation when the recycling criteria has been met.
+
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+  void *p, *old_p;
+  size_t size = 1U << 16;
+
+  // The delayed freelist will prevent a chunk from being available right away
+  p = malloc(size);
+  if (!p)
+    return 1;
+  old_p = p;
+  free(p);
+  p = malloc(size);
+  if (!p)
+    return 1;
+  if (old_p == p)
+    return 1;
+  free(p);
+
+  // Eventually the chunk should become available again
+  bool found = false;
+  for (int i = 0; i < 0x100 && found == false; i++) {
+    p = malloc(size);
+    if (!p)
+      return 1;
+    found = (p == old_p);
+    free(p);
+  }
+  if (found == false)
+    return 1;
+
+  return 0;
+}
diff --git a/test/scudo/realloc.cpp b/test/scudo/realloc.cpp
new file mode 100644
index 0000000..2a7d5b6
--- /dev/null
+++ b/test/scudo/realloc.cpp
@@ -0,0 +1,69 @@
+// RUN: %clang_scudo %s -o %t
+// RUN:     %run %t pointers 2>&1
+// RUN:     %run %t contents 2>&1
+// RUN: not %run %t memalign 2>&1 | FileCheck %s
+
+// Tests that our reallocation function returns the same pointer when the
+// requested size can fit into the previously allocated chunk. Also tests that
+// a new chunk is returned if the size is greater, and that the contents of the
+// chunk are left unchanged.
+// As a final test, make sure that a chunk allocated by memalign cannot be
+// reallocated.
+
+#include <assert.h>
+#include <malloc.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+  void *p, *old_p;
+  size_t size = 32;
+
+  assert(argc == 2);
+  if (!strcmp(argv[1], "pointers")) {
+    old_p = p = realloc(nullptr, size);
+    if (!p)
+      return 1;
+    size = malloc_usable_size(p);
+    // Our realloc implementation will return the same pointer if the size
+    // requested is lower or equal to the usable size of the associated chunk.
+    p = realloc(p, size - 1);
+    if (p != old_p)
+      return 1;
+    p = realloc(p, size);
+    if (p != old_p)
+      return 1;
+    // And a new one if the size is greater.
+    p = realloc(p, size + 1);
+    if (p == old_p)
+      return 1;
+    // A size of 0 will free the chunk and return nullptr.
+    p = realloc(p, 0);
+    if (p)
+      return 1;
+    old_p = nullptr;
+  }
+  if (!strcmp(argv[1], "contents")) {
+    p = realloc(nullptr, size);
+    if (!p)
+      return 1;
+    for (int i = 0; i < size; i++)
+      reinterpret_cast<char *>(p)[i] = 'A';
+    p = realloc(p, size + 1);
+    // The contents of the reallocated chunk must match the original one.
+    for (int i = 0; i < size; i++)
+      if (reinterpret_cast<char *>(p)[i] != 'A')
+        return 1;
+  }
+  if (!strcmp(argv[1], "memalign")) {
+    // A chunk coming from memalign cannot be reallocated.
+    p = memalign(16, size);
+    if (!p)
+      return 1;
+    p = realloc(p, size);
+    free(p);
+  }
+  return 0;
+}
+
+// CHECK: ERROR: invalid chunk type when reallocating address
diff --git a/test/scudo/sized-delete.cpp b/test/scudo/sized-delete.cpp
new file mode 100644
index 0000000..5b1bf5f
--- /dev/null
+++ b/test/scudo/sized-delete.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_scudo -fsized-deallocation %s -o %t
+// RUN: SCUDO_OPTIONS=DeleteSizeMismatch=1     %run %t gooddel    2>&1
+// RUN: SCUDO_OPTIONS=DeleteSizeMismatch=1 not %run %t baddel     2>&1 | FileCheck %s
+// RUN: SCUDO_OPTIONS=DeleteSizeMismatch=0     %run %t baddel     2>&1
+// RUN: SCUDO_OPTIONS=DeleteSizeMismatch=1     %run %t gooddelarr 2>&1
+// RUN: SCUDO_OPTIONS=DeleteSizeMismatch=1 not %run %t baddelarr  2>&1 | FileCheck %s
+// RUN: SCUDO_OPTIONS=DeleteSizeMismatch=0     %run %t baddelarr  2>&1
+
+// Ensures that the sized delete operator errors out when the appropriate
+// option is passed and the sizes do not match between allocation and
+// deallocation functions.
+
+#include <new>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+  assert(argc == 2);
+  if (!strcmp(argv[1], "gooddel")) {
+    long long *p = new long long;
+    operator delete(p, sizeof(long long));
+  }
+  if (!strcmp(argv[1], "baddel")) {
+    long long *p = new long long;
+    operator delete(p, 2);
+  }
+  if (!strcmp(argv[1], "gooddelarr")) {
+    char *p = new char[64];
+    operator delete[](p, 64);
+  }
+  if (!strcmp(argv[1], "baddelarr")) {
+    char *p = new char[63];
+    operator delete[](p, 64);
+  }
+  return 0;
+}
+
+// CHECK: ERROR: invalid sized delete on chunk at address
diff --git a/test/scudo/sizes.cpp b/test/scudo/sizes.cpp
new file mode 100644
index 0000000..7190cb6
--- /dev/null
+++ b/test/scudo/sizes.cpp
@@ -0,0 +1,61 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: SCUDO_OPTIONS=allocator_may_return_null=0 not %run %t malloc 2>&1 | FileCheck %s
+// RUN: SCUDO_OPTIONS=allocator_may_return_null=1     %run %t malloc 2>&1
+// RUN: SCUDO_OPTIONS=allocator_may_return_null=0 not %run %t calloc 2>&1 | FileCheck %s
+// RUN: SCUDO_OPTIONS=allocator_may_return_null=1     %run %t calloc 2>&1
+// RUN:                                               %run %t usable 2>&1
+
+// Tests for various edge cases related to sizes, notably the maximum size the
+// allocator can allocate. Tests that an integer overflow in the parameters of
+// calloc is caught.
+
+#include <assert.h>
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <limits>
+
+int main(int argc, char **argv)
+{
+  assert(argc == 2);
+  if (!strcmp(argv[1], "malloc")) {
+    // Currently the maximum size the allocator can allocate is 1ULL<<40 bytes.
+    size_t size = std::numeric_limits<size_t>::max();
+    void *p = malloc(size);
+    if (p)
+      return 1;
+    size = (1ULL << 40) - 16;
+    p = malloc(size);
+    if (p)
+      return 1;
+  }
+  if (!strcmp(argv[1], "calloc")) {
+    // Trigger an overflow in calloc.
+    size_t size = std::numeric_limits<size_t>::max();
+    void *p = calloc((size / 0x1000) + 1, 0x1000);
+    if (p)
+      return 1;
+  }
+  if (!strcmp(argv[1], "usable")) {
+    // Playing with the actual usable size of a chunk.
+    void *p = malloc(1007);
+    if (!p)
+      return 1;
+    size_t size = malloc_usable_size(p);
+    if (size < 1007)
+      return 1;
+    memset(p, 'A', size);
+    p = realloc(p, 2014);
+    if (!p)
+      return 1;
+    size = malloc_usable_size(p);
+    if (size < 2014)
+      return 1;
+    memset(p, 'B', size);
+    free(p);
+  }
+  return 0;
+}
+
+// CHECK: allocator is terminating the process
diff --git a/test/tsan/CMakeLists.txt b/test/tsan/CMakeLists.txt
index 01e8038..e05b100 100644
--- a/test/tsan/CMakeLists.txt
+++ b/test/tsan/CMakeLists.txt
@@ -14,18 +14,44 @@
   set(TSAN_HAS_LIBCXX False)
 endif()
 
-configure_lit_site_cfg(
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg)
+set(TSAN_TESTSUITES)
+
+set(TSAN_TEST_ARCH ${TSAN_SUPPORTED_ARCH})
+if(APPLE)
+  darwin_filter_host_archs(TSAN_SUPPORTED_ARCH TSAN_TEST_ARCH)
+endif()
+
+foreach(arch ${TSAN_TEST_ARCH})
+  set(TSAN_TEST_TARGET_ARCH ${arch})
+  string(TOLOWER "-${arch}" TSAN_TEST_CONFIG_SUFFIX)
+  if(ANDROID OR ${arch} MATCHES "arm|aarch64")
+    # This is only true if we are cross-compiling.
+    # Build all tests with host compiler and use host tools.
+    set(TSAN_TEST_TARGET_CC ${COMPILER_RT_TEST_COMPILER})
+    set(TSAN_TEST_TARGET_CFLAGS ${COMPILER_RT_TEST_COMPILER_CFLAGS})
+  else()
+    get_target_flags_for_arch(${arch} TSAN_TEST_TARGET_CFLAGS)
+    string(REPLACE ";" " " TSAN_TEST_TARGET_CFLAGS "${TSAN_TEST_TARGET_CFLAGS}")
+  endif()
+
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}Config)
+
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg)
+  list(APPEND TSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+endforeach()
 
 if(COMPILER_RT_INCLUDE_TESTS)
   configure_lit_site_cfg(
     ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
     ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg)
   list(APPEND TSAN_TEST_DEPS TsanUnitTests)
+  list(APPEND TSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit)
 endif()
 
 add_lit_testsuite(check-tsan "Running ThreadSanitizer tests"
-  ${CMAKE_CURRENT_BINARY_DIR}
+  ${TSAN_TESTSUITES}
   DEPENDS ${TSAN_TEST_DEPS})
-set_target_properties(check-tsan PROPERTIES FOLDER "TSan tests")
+set_target_properties(check-tsan PROPERTIES FOLDER "Compiler-RT Tests")
diff --git a/test/tsan/Darwin/gcd-blocks.mm b/test/tsan/Darwin/gcd-blocks.mm
index 0dbff27..e008260 100644
--- a/test/tsan/Darwin/gcd-blocks.mm
+++ b/test/tsan/Darwin/gcd-blocks.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan %s -o %t -framework Foundation
+// RUN: %clangxx_tsan %s -o %t -framework Foundation
 // RUN: %env_tsan_opts=ignore_interceptors_accesses=1 %run %t 2>&1 | FileCheck %s
 
 #import <Foundation/Foundation.h>
diff --git a/test/tsan/Darwin/gcd-groups-destructor.mm b/test/tsan/Darwin/gcd-groups-destructor.mm
index 641be00..19c2c9b 100644
--- a/test/tsan/Darwin/gcd-groups-destructor.mm
+++ b/test/tsan/Darwin/gcd-groups-destructor.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan %s -o %t -framework Foundation
+// RUN: %clangxx_tsan %s -o %t -framework Foundation
 // RUN: %env_tsan_opts=ignore_interceptors_accesses=1 %run %t 2>&1 | FileCheck %s
 
 #import <Foundation/Foundation.h>
diff --git a/test/tsan/Darwin/ignored-interceptors.mm b/test/tsan/Darwin/ignored-interceptors.mm
index 82b9515..d513142 100644
--- a/test/tsan/Darwin/ignored-interceptors.mm
+++ b/test/tsan/Darwin/ignored-interceptors.mm
@@ -6,10 +6,10 @@
 // RUN: %clang_tsan %s -o %t -framework Foundation
 
 // Check that without the flag, there are false positives.
-// RUN: %deflake %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-RACE
+// RUN: %deflake %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-RACE
 
 // With ignore_interceptors_accesses=1, no races are reported.
-// RUN: %env_tsan_opts=ignore_interceptors_accesses=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %env_tsan_opts=ignore_interceptors_accesses=1 %run %t 2>&1 | FileCheck %s
 
 // With ignore_interceptors_accesses=1, races in user's code are still reported.
 // RUN: %env_tsan_opts=ignore_interceptors_accesses=1 %deflake %run %t race 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-RACE
diff --git a/test/tsan/Darwin/libcxx-shared-ptr-recursive.mm b/test/tsan/Darwin/libcxx-shared-ptr-recursive.mm
index fc5482a..eea02dc 100644
--- a/test/tsan/Darwin/libcxx-shared-ptr-recursive.mm
+++ b/test/tsan/Darwin/libcxx-shared-ptr-recursive.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan %s -o %t -framework Foundation
+// RUN: %clangxx_tsan %s -o %t -framework Foundation
 // RUN: %env_tsan_opts=ignore_interceptors_accesses=1 %run %t 2>&1 | FileCheck %s
 
 #import <Foundation/Foundation.h>
diff --git a/test/tsan/Darwin/libcxx-shared-ptr-stress.mm b/test/tsan/Darwin/libcxx-shared-ptr-stress.mm
index 2aa5582..7c36729 100644
--- a/test/tsan/Darwin/libcxx-shared-ptr-stress.mm
+++ b/test/tsan/Darwin/libcxx-shared-ptr-stress.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan %s -o %t -framework Foundation
+// RUN: %clangxx_tsan %s -o %t -framework Foundation
 // RUN: %env_tsan_opts=ignore_interceptors_accesses=1 %run %t 2>&1 | FileCheck %s
 
 #import <Foundation/Foundation.h>
diff --git a/test/tsan/Darwin/libcxx-shared-ptr.mm b/test/tsan/Darwin/libcxx-shared-ptr.mm
index e1ea226..6187c43 100644
--- a/test/tsan/Darwin/libcxx-shared-ptr.mm
+++ b/test/tsan/Darwin/libcxx-shared-ptr.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan %s -o %t -framework Foundation
+// RUN: %clangxx_tsan %s -o %t -framework Foundation
 // RUN: %env_tsan_opts=ignore_interceptors_accesses=1 %run %t 2>&1 | FileCheck %s
 
 #import <Foundation/Foundation.h>
diff --git a/test/tsan/Darwin/objc-simple.mm b/test/tsan/Darwin/objc-simple.mm
index b62d0eb..a8fc355 100644
--- a/test/tsan/Darwin/objc-simple.mm
+++ b/test/tsan/Darwin/objc-simple.mm
@@ -1,7 +1,7 @@
 // Test that a simple Obj-C program runs and exits without any warnings.
 
 // RUN: %clang_tsan %s -o %t -framework Foundation
-// RUN: %run %t 2>&1 | FileCheck %s
+// RUN: %env_tsan_opts=ignore_interceptors_accesses=1 %run %t 2>&1 | FileCheck %s
 
 #import <Foundation/Foundation.h>
 
diff --git a/test/tsan/Darwin/osatomics-add.mm b/test/tsan/Darwin/osatomics-add.mm
index 65be24d..087958e 100644
--- a/test/tsan/Darwin/osatomics-add.mm
+++ b/test/tsan/Darwin/osatomics-add.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan %s -o %t -framework Foundation -std=c++11
+// RUN: %clangxx_tsan %s -o %t -framework Foundation -std=c++11
 // RUN: %run %t 2>&1 | FileCheck %s
 
 #import <Foundation/Foundation.h>
diff --git a/test/tsan/Darwin/osatomics-list.mm b/test/tsan/Darwin/osatomics-list.mm
index 2543111..6c2fbe7 100644
--- a/test/tsan/Darwin/osatomics-list.mm
+++ b/test/tsan/Darwin/osatomics-list.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan %s -o %t -framework Foundation -std=c++11
+// RUN: %clangxx_tsan %s -o %t -framework Foundation -std=c++11
 // RUN: %run %t 2>&1 | FileCheck %s
 
 #import <Foundation/Foundation.h>
diff --git a/test/tsan/Darwin/realloc-zero.cc b/test/tsan/Darwin/realloc-zero.cc
index 9826246..c5053c3 100644
--- a/test/tsan/Darwin/realloc-zero.cc
+++ b/test/tsan/Darwin/realloc-zero.cc
@@ -9,7 +9,7 @@
 #include <sys/mman.h>
 
 int main() {
-  void *p = realloc(NULL, 0);
+  void *p = realloc(nullptr, 0);
   if (!p) {
     abort();
   }
diff --git a/test/tsan/Linux/check_preinit.cc b/test/tsan/Linux/check_preinit.cc
new file mode 100644
index 0000000..8f5bf40
--- /dev/null
+++ b/test/tsan/Linux/check_preinit.cc
@@ -0,0 +1,60 @@
+// RUN: %clang_tsan -fno-sanitize=thread -shared -fPIC -O1 -DBUILD_SO=1 %s -o \
+// RUN:  %t.so && \
+// RUN:   %clang_tsan -O1 %s %t.so -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: llvm-objdump -t %t | FileCheck %s --check-prefix=CHECK-DUMP
+// CHECK-DUMP:  {{[.]preinit_array.*__local_tsan_preinit}}
+
+// SANITIZER_CAN_USE_PREINIT_ARRAY is undefined on android.
+// UNSUPPORTED: android
+
+// Test checks if __tsan_init is called from .preinit_array.
+// Without initialization from .preinit_array, __tsan_init will be called from
+// constructors of the binary which are called after constructors of shared
+// library.
+
+#include <stdio.h>
+
+#if BUILD_SO
+
+// "volatile" is needed to avoid compiler optimize-out constructors.
+volatile int counter = 0;
+volatile int lib_constructor_call = 0;
+volatile int tsan_init_call = 0;
+
+__attribute__ ((constructor))
+void LibConstructor() {
+  lib_constructor_call = ++counter;
+};
+
+#else  // BUILD_SO
+
+extern int counter;
+extern int lib_constructor_call;
+extern int tsan_init_call;
+
+volatile int bin_constructor_call = 0;
+
+__attribute__ ((constructor))
+void BinConstructor() {
+  bin_constructor_call = ++counter;
+};
+
+namespace __tsan {
+
+void OnInitialize() {
+  tsan_init_call = ++counter;
+}
+
+}
+
+int main() {
+  // CHECK: TSAN_INIT 1
+  // CHECK: LIB_CONSTRUCTOR 2
+  // CHECK: BIN_CONSTRUCTOR 3
+  printf("TSAN_INIT %d\n", tsan_init_call);
+  printf("LIB_CONSTRUCTOR %d\n", lib_constructor_call);
+  printf("BIN_CONSTRUCTOR %d\n", bin_constructor_call);
+  return 0;
+}
+
+#endif  // BUILD_SO
diff --git a/test/tsan/Linux/user_malloc.cc b/test/tsan/Linux/user_malloc.cc
index c671bfc..9c3ce68 100644
--- a/test/tsan/Linux/user_malloc.cc
+++ b/test/tsan/Linux/user_malloc.cc
@@ -8,7 +8,7 @@
 extern "C" void *malloc(unsigned long size) {
   static int first = 0;
   if (__sync_lock_test_and_set(&first, 1) == 0)
-    printf("user malloc\n");
+    fprintf(stderr, "user malloc\n");
   return __interceptor_malloc(size);
 }
 
diff --git a/test/tsan/Unit/lit.site.cfg.in b/test/tsan/Unit/lit.site.cfg.in
index 9498105..23894a8 100644
--- a/test/tsan/Unit/lit.site.cfg.in
+++ b/test/tsan/Unit/lit.site.cfg.in
@@ -1,5 +1,4 @@
-## Autogenerated by LLVM/Clang configuration.
-# Do not edit!
+@LIT_SITE_CFG_IN_HEADER@
 
 # Load common config for all compiler-rt unit tests.
 lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/unittests/lit.common.unit.configured")
diff --git a/test/tsan/aligned_vs_unaligned_race.cc b/test/tsan/aligned_vs_unaligned_race.cc
index 5c1189f..fb299da 100644
--- a/test/tsan/aligned_vs_unaligned_race.cc
+++ b/test/tsan/aligned_vs_unaligned_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 // Race between an aligned access and an unaligned access, which
 // touches the same memory region.
 #include "test.h"
@@ -28,7 +28,7 @@
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("Pass\n");
+  fprintf(stderr, "Pass\n");
   // CHECK: ThreadSanitizer: data race
   // CHECK: Pass
   return 0;
diff --git a/test/tsan/benign_race.cc b/test/tsan/benign_race.cc
index 2f72fe1..90722aa 100644
--- a/test/tsan/benign_race.cc
+++ b/test/tsan/benign_race.cc
@@ -33,7 +33,7 @@
   Global = 43;
   WTFGlobal = 143;
   pthread_join(t, 0);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/blacklist.cc b/test/tsan/blacklist.cc
index d6ca383..c1bcca6 100644
--- a/test/tsan/blacklist.cc
+++ b/test/tsan/blacklist.cc
@@ -23,7 +23,7 @@
   pthread_create(&t[1], NULL, Blacklisted_Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
   return 0;
 }
 
diff --git a/test/tsan/blacklist2.cc b/test/tsan/blacklist2.cc
index 629b588..bf6c4eb 100644
--- a/test/tsan/blacklist2.cc
+++ b/test/tsan/blacklist2.cc
@@ -44,6 +44,6 @@
   pthread_create(&t[1], NULL, Blacklisted_Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
   return 0;
 }
diff --git a/test/tsan/dl_iterate_phdr.cc b/test/tsan/dl_iterate_phdr.cc
index b9ce615..3c9821b 100644
--- a/test/tsan/dl_iterate_phdr.cc
+++ b/test/tsan/dl_iterate_phdr.cc
@@ -47,7 +47,7 @@
     dlclose(lib);
   }
   pthread_join(th, 0);
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
   return 0;
 }
 
diff --git a/test/tsan/dtls.c b/test/tsan/dtls.c
new file mode 100644
index 0000000..5169756
--- /dev/null
+++ b/test/tsan/dtls.c
@@ -0,0 +1,62 @@
+// RUN: %clang_tsan %s -o %t
+// RUN: %clang_tsan %s -DBUILD_SO -fPIC -o %t-so.so -shared
+// RUN: %run %t 2>&1 | FileCheck %s
+
+// Test that tsan cleans up dynamic TLS memory between reuse.
+
+#include "test.h"
+
+#ifndef BUILD_SO
+#include <assert.h>
+#include <dlfcn.h>
+
+typedef volatile long *(* get_t)();
+get_t GetTls;
+
+void *Thread1(void *arg) {
+  pthread_detach(pthread_self());
+  volatile long *x = GetTls();
+  *x = 42;
+  fprintf(stderr, "stack: %p dtls: %p\n", &x, x);
+  barrier_wait(&barrier);
+  return 0;
+}
+
+void *Thread2(void *arg) {
+  volatile long *x = GetTls();
+  *x = 42;
+  fprintf(stderr, "stack: %p dtls: %p\n", &x, x);
+  return 0;
+}
+
+int main(int argc, char *argv[]) {
+  char path[4096];
+  snprintf(path, sizeof(path), "%s-so.so", argv[0]);
+
+  void *handle = dlopen(path, RTLD_LAZY);
+  if (!handle) fprintf(stderr, "%s\n", dlerror());
+  assert(handle != 0);
+  GetTls = (get_t)dlsym(handle, "GetTls");
+  assert(dlerror() == 0);
+
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], 0, Thread1, 0);
+  barrier_wait(&barrier);
+  // Wait for actual thread termination without using pthread_join,
+  // which would synchronize threads.
+  sleep(1);
+  pthread_create(&t[1], 0, Thread2, 0);
+  pthread_join(t[1], 0);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+#else  // BUILD_SO
+__thread long huge_thread_local_array[1 << 17];
+long *GetTls() {
+  return &huge_thread_local_array[0];
+}
+#endif
+
+// CHECK-NOT: ThreadSanitizer: data race
+// CHECK: DONE
diff --git a/test/tsan/fd_close_norace.cc b/test/tsan/fd_close_norace.cc
index 1b52c20..7d9d491 100644
--- a/test/tsan/fd_close_norace.cc
+++ b/test/tsan/fd_close_norace.cc
@@ -25,7 +25,7 @@
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/fd_close_norace2.cc b/test/tsan/fd_close_norace2.cc
index bf94fd5..382ae5f 100644
--- a/test/tsan/fd_close_norace2.cc
+++ b/test/tsan/fd_close_norace2.cc
@@ -23,7 +23,7 @@
   while (write(pipes[1], &t, 1) != 1) {
   }
   pthread_join(t, 0);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/fd_dup_norace.cc b/test/tsan/fd_dup_norace.cc
index 5045325..e599517 100644
--- a/test/tsan/fd_dup_norace.cc
+++ b/test/tsan/fd_dup_norace.cc
@@ -28,7 +28,7 @@
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/fd_dup_norace2.cc b/test/tsan/fd_dup_norace2.cc
index 662c686..31aaed9 100644
--- a/test/tsan/fd_dup_norace2.cc
+++ b/test/tsan/fd_dup_norace2.cc
@@ -53,7 +53,7 @@
     exit(printf("close failed\n"));
   if (close(fd2) == -1)
     exit(printf("close failed\n"));
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/fd_dup_race.cc b/test/tsan/fd_dup_race.cc
index a1aee55..d665eeb 100644
--- a/test/tsan/fd_dup_race.cc
+++ b/test/tsan/fd_dup_race.cc
@@ -27,7 +27,7 @@
     exit(printf("dup2 failed\n"));
   barrier_wait(&barrier);
   pthread_join(th, 0);
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
 }
 
 // CHECK: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/fd_pipe_norace.cc b/test/tsan/fd_pipe_norace.cc
index b434703..01c4490 100644
--- a/test/tsan/fd_pipe_norace.cc
+++ b/test/tsan/fd_pipe_norace.cc
@@ -27,7 +27,7 @@
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/fd_socket_connect_norace.cc b/test/tsan/fd_socket_connect_norace.cc
index ab2a950..b9fb434 100644
--- a/test/tsan/fd_socket_connect_norace.cc
+++ b/test/tsan/fd_socket_connect_norace.cc
@@ -38,7 +38,7 @@
   pthread_join(t, 0);
   close(c);
   close(s);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/fd_socket_norace.cc b/test/tsan/fd_socket_norace.cc
index 0f41c43..07b0cb3 100644
--- a/test/tsan/fd_socket_norace.cc
+++ b/test/tsan/fd_socket_norace.cc
@@ -45,7 +45,7 @@
   close(c);
   close(s);
   pthread_join(t, 0);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/fd_socketpair_norace.cc b/test/tsan/fd_socketpair_norace.cc
index a455d44..bee030d 100644
--- a/test/tsan/fd_socketpair_norace.cc
+++ b/test/tsan/fd_socketpair_norace.cc
@@ -31,7 +31,7 @@
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/ignore_lib0.cc b/test/tsan/ignore_lib0.cc
index c72aa49..d6ae72f 100644
--- a/test/tsan/ignore_lib0.cc
+++ b/test/tsan/ignore_lib0.cc
@@ -1,9 +1,9 @@
 // RUN: %clangxx_tsan -O1 %s -DLIB -fPIC -fno-sanitize=thread -shared -o %T/libignore_lib0.so
 // RUN: %clangxx_tsan -O1 %s -L%T -lignore_lib0 -o %t
 // RUN: echo running w/o suppressions:
-// RUN: LD_LIBRARY_PATH=%T${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP
+// RUN: env LD_LIBRARY_PATH=%T${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP
 // RUN: echo running with suppressions:
-// RUN: LD_LIBRARY_PATH=%T${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %env_tsan_opts=suppressions='%s.supp' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP
+// RUN: env LD_LIBRARY_PATH=%T${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %env_tsan_opts=suppressions='%s.supp' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP
 
 // Tests that interceptors coming from a library specified in called_from_lib
 // suppression are ignored.
diff --git a/test/tsan/ignore_lib4.cc b/test/tsan/ignore_lib4.cc
new file mode 100644
index 0000000..84d8b27
--- /dev/null
+++ b/test/tsan/ignore_lib4.cc
@@ -0,0 +1,46 @@
+// RUN: %clangxx_tsan -O1 %s -DLIB -fPIC -shared -o %T/libignore_lib4.so
+// RUN: %clangxx_tsan -O1 %s -o %t
+// RUN: echo "called_from_lib:libignore_lib4.so" > %t.supp
+// RUN: %env_tsan_opts=suppressions='%t.supp' %run %t 2>&1 | FileCheck %s
+
+// powerpc64 big endian bots failed with "FileCheck error: '-' is empty" due
+// to a segmentation fault.
+// UNSUPPORTED: powerpc64-unknown-linux-gnu
+// aarch64 bots failed with "called_from_lib suppression 'libignore_lib4.so'
+//                           is matched against 2 libraries".
+// UNSUPPORTED: aarch64
+
+// Test longjmp in ignored lib.
+// It used to crash since we jumped out of ScopedInterceptor scope.
+
+#include "test.h"
+#include <setjmp.h>
+#include <string.h>
+#include <errno.h>
+#include <libgen.h>
+#include <string>
+
+#ifdef LIB
+
+extern "C" void myfunc() {
+  for (int i = 0; i < (1 << 20); i++) {
+    jmp_buf env;
+    if (!setjmp(env))
+      longjmp(env, 1);
+  }
+}
+
+#else
+
+int main(int argc, char **argv) {
+  std::string lib = std::string(dirname(argv[0])) + "/libignore_lib4.so";
+  void *h = dlopen(lib.c_str(), RTLD_GLOBAL | RTLD_NOW);
+  void (*func)() = (void(*)())dlsym(h, "myfunc");
+  func();
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+#endif
+
+// CHECK: DONE
diff --git a/test/tsan/ignore_race.cc b/test/tsan/ignore_race.cc
index cc33b66..e410006 100644
--- a/test/tsan/ignore_race.cc
+++ b/test/tsan/ignore_race.cc
@@ -25,7 +25,7 @@
   barrier_wait(&barrier);
   Global = 43;
   pthread_join(t, 0);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/inlined_memcpy_race.cc b/test/tsan/inlined_memcpy_race.cc
index 720f2bf..4d08589 100644
--- a/test/tsan/inlined_memcpy_race.cc
+++ b/test/tsan/inlined_memcpy_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 #include "test.h"
 #include <string.h>
 
@@ -24,7 +24,7 @@
   pthread_create(&t[1], NULL, MemSetThread, x);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
   return 0;
 }
 
diff --git a/test/tsan/inlined_memcpy_race2.cc b/test/tsan/inlined_memcpy_race2.cc
index 37414ba..906a52b 100644
--- a/test/tsan/inlined_memcpy_race2.cc
+++ b/test/tsan/inlined_memcpy_race2.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 #include "test.h"
 #include <string.h>
 
@@ -25,7 +25,7 @@
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
 
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
   return 0;
 }
 
@@ -33,5 +33,5 @@
 // CHECK:   #0 memset
 // CHECK:   #1 MemSetThread
 // CHECK:  Previous write
-// CHECK:   #0 memmove
+// CHECK:   #0 {{(memcpy|memmove)}}
 // CHECK:   #1 MemMoveThread
diff --git a/test/tsan/interface_atomic_test.c b/test/tsan/interface_atomic_test.c
index 18d860e..b7dfc86 100644
--- a/test/tsan/interface_atomic_test.c
+++ b/test/tsan/interface_atomic_test.c
@@ -1,5 +1,5 @@
 // Test that we can include header with TSan atomic interface.
-// RUN: %clang_tsan %s -o %t && %run %t | FileCheck %s
+// RUN: %clang_tsan %s -o %t && %run %t 2>&1 | FileCheck %s
 #include <sanitizer/tsan_interface_atomic.h>
 #include <stdio.h>
 
@@ -9,7 +9,7 @@
   int res = __tsan_atomic32_load(&a, __tsan_memory_order_acquire);
   if (res == 100) {
     // CHECK: PASS
-    printf("PASS\n");
+    fprintf(stderr, "PASS\n");
     return 0;
   }
   return 1;
diff --git a/test/tsan/java_alloc.cc b/test/tsan/java_alloc.cc
index 4a606f7..94919a4 100644
--- a/test/tsan/java_alloc.cc
+++ b/test/tsan/java_alloc.cc
@@ -26,10 +26,10 @@
   stress(jheap);
   pthread_join(th, 0);
   if (__tsan_java_fini() != 0) {
-    printf("FAILED\n");
+    fprintf(stderr, "FAILED\n");
     return 1;
   }
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
   return 0;
 }
 
diff --git a/test/tsan/java_heap_init.cc b/test/tsan/java_heap_init.cc
index bb7357c..47ec5db 100644
--- a/test/tsan/java_heap_init.cc
+++ b/test/tsan/java_heap_init.cc
@@ -20,7 +20,7 @@
     return printf("second mmap failed with %d\n", errno);
   __tsan_java_init(jheap, kHeapSize);
   __tsan_java_move(jheap + 16, jheap, 16);
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
   return __tsan_java_fini();
 }
 
diff --git a/test/tsan/java_lock_move.cc b/test/tsan/java_lock_move.cc
index fe5491d..66599f8 100644
--- a/test/tsan/java_lock_move.cc
+++ b/test/tsan/java_lock_move.cc
@@ -35,7 +35,7 @@
   barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(varaddr2, kBlockSize);
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
   return __tsan_java_fini();
 }
 
diff --git a/test/tsan/java_lock_rec.cc b/test/tsan/java_lock_rec.cc
index f0bf401..aa8de97 100644
--- a/test/tsan/java_lock_rec.cc
+++ b/test/tsan/java_lock_rec.cc
@@ -10,14 +10,14 @@
   *(int*)varaddr = 42;
   int rec = __tsan_java_mutex_unlock_rec(lockaddr);
   if (rec != 2) {
-    printf("FAILED 0 rec=%d\n", rec);
+    fprintf(stderr, "FAILED 0 rec=%d\n", rec);
     exit(1);
   }
   barrier_wait(&barrier);
   barrier_wait(&barrier);
   __tsan_java_mutex_lock_rec(lockaddr, rec);
   if (*(int*)varaddr != 43) {
-    printf("FAILED 3 var=%d\n", *(int*)varaddr);
+    fprintf(stderr, "FAILED 3 var=%d\n", *(int*)varaddr);
     exit(1);
   }
   __tsan_java_mutex_unlock(lockaddr);
@@ -40,7 +40,7 @@
   barrier_wait(&barrier);
   __tsan_java_mutex_lock(lockaddr);
   if (*(int*)varaddr != 42) {
-    printf("FAILED 1 var=%d\n", *(int*)varaddr);
+    fprintf(stderr, "FAILED 1 var=%d\n", *(int*)varaddr);
     exit(1);
   }
   *(int*)varaddr = 43;
@@ -48,7 +48,7 @@
   barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(jheap, kBlockSize);
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
   return __tsan_java_fini();
 }
 
diff --git a/test/tsan/java_lock_rec_race.cc b/test/tsan/java_lock_rec_race.cc
index 3da8ad0..bf56eef 100644
--- a/test/tsan/java_lock_rec_race.cc
+++ b/test/tsan/java_lock_rec_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 #include "java.h"
 
 jptr varaddr;
@@ -10,7 +10,7 @@
   __tsan_java_mutex_lock(lockaddr);
   int rec = __tsan_java_mutex_unlock_rec(lockaddr);
   if (rec != 3) {
-    printf("FAILED 0 rec=%d\n", rec);
+    fprintf(stderr, "FAILED 0 rec=%d\n", rec);
     exit(1);
   }
   *(int*)varaddr = 42;
@@ -42,7 +42,7 @@
   barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(jheap, kBlockSize);
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
   return __tsan_java_fini();
 }
 
diff --git a/test/tsan/java_move_overlap.cc b/test/tsan/java_move_overlap.cc
index 7ed98ef..bf8d1e1 100644
--- a/test/tsan/java_move_overlap.cc
+++ b/test/tsan/java_move_overlap.cc
@@ -66,7 +66,7 @@
   barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(varaddr1_new, kBlockSize);
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
   return __tsan_java_fini();
 }
 
diff --git a/test/tsan/java_move_overlap_race.cc b/test/tsan/java_move_overlap_race.cc
index 874b90b..fbbcf2c 100644
--- a/test/tsan/java_move_overlap_race.cc
+++ b/test/tsan/java_move_overlap_race.cc
@@ -1,6 +1,6 @@
 // RUN: %clangxx_tsan -O1 %s -o %t
-// RUN: %deflake %run %t | FileCheck %s
-// RUN: %deflake %run %t arg | FileCheck %s
+// RUN: %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %deflake %run %t arg 2>&1 | FileCheck %s
 #include "java.h"
 
 jptr varaddr1_old;
@@ -46,7 +46,7 @@
   barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(varaddr1_new, kBlockSize);
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
   return __tsan_java_fini();
 }
 
diff --git a/test/tsan/java_race_pc.cc b/test/tsan/java_race_pc.cc
index 0745ade..be1c5f2 100644
--- a/test/tsan/java_race_pc.cc
+++ b/test/tsan/java_race_pc.cc
@@ -1,8 +1,8 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-// This test fails on powerpc64 on both VMA (44 and 46).
+// This test fails on powerpc64 big endian.
 // The Tsan report is returning wrong information about
 // the location of the race.
-// XFAIL: powerpc64
+// XFAIL: powerpc64-unknown-linux-gnu
 #include "java.h"
 
 void foobar() {
@@ -13,7 +13,7 @@
 
 void *Thread(void *p) {
   barrier_wait(&barrier);
-  __tsan_read1_pc((jptr)p, (jptr)foobar + 1);
+  __tsan_read1_pc((jptr)p, (jptr)foobar + kPCInc);
   return 0;
 }
 
@@ -26,7 +26,7 @@
   __tsan_java_alloc(jheap, kBlockSize);
   pthread_t th;
   pthread_create(&th, 0, Thread, (void*)jheap);
-  __tsan_write1_pc((jptr)jheap, (jptr)barbaz + 1);
+  __tsan_write1_pc((jptr)jheap, (jptr)barbaz + kPCInc);
   barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(jheap, kBlockSize);
diff --git a/test/tsan/java_rwlock.cc b/test/tsan/java_rwlock.cc
index a4cc92a..aa77273 100644
--- a/test/tsan/java_rwlock.cc
+++ b/test/tsan/java_rwlock.cc
@@ -29,7 +29,7 @@
   barrier_wait(&barrier);
   pthread_join(th, 0);
   __tsan_java_free(jheap, kBlockSize);
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
   return __tsan_java_fini();
 }
 
diff --git a/test/tsan/lit.cfg b/test/tsan/lit.cfg
index 2be10da..1fc1ecc 100644
--- a/test/tsan/lit.cfg
+++ b/test/tsan/lit.cfg
@@ -12,7 +12,7 @@
   return attr_value
 
 # Setup config name.
-config.name = 'ThreadSanitizer'
+config.name = 'ThreadSanitizer' + config.name_suffix
 
 # Setup source root.
 config.test_source_root = os.path.dirname(__file__)
@@ -39,20 +39,23 @@
   extra_cflags = []
 
 # Setup default compiler flags used with -fsanitize=thread option.
-clang_tsan_cflags = ["-fsanitize=thread",
-                     "-Wall",
-                     "-m64"] + config.debug_info_flags + extra_cflags
+clang_tsan_cflags = (["-fsanitize=thread",
+                      "-Wall"] +
+                      [config.target_cflags] +
+                      config.debug_info_flags +
+                      extra_cflags)
 clang_tsan_cxxflags = config.cxx_mode_flags + clang_tsan_cflags
 # Add additional flags if we're using instrumented libc++.
 # Instrumented libcxx currently not supported on Darwin.
 if config.has_libcxx and config.host_os != 'Darwin':
   # FIXME: Dehardcode this path somehow.
   libcxx_path = os.path.join(config.compiler_rt_obj_root, "lib",
-                             "tsan", "libcxx_tsan")
+                             "tsan", "libcxx_tsan_" + config.target_arch)
   libcxx_incdir = os.path.join(libcxx_path, "include", "c++", "v1")
   libcxx_libdir = os.path.join(libcxx_path, "lib")
   libcxx_so = os.path.join(libcxx_libdir, "libc++.so")
   clang_tsan_cxxflags += ["-std=c++11",
+                          "-nostdinc++",
                           "-I%s" % libcxx_incdir,
                           libcxx_so,
                           "-Wl,-rpath=%s" % libcxx_libdir]
diff --git a/test/tsan/lit.site.cfg.in b/test/tsan/lit.site.cfg.in
index 5190b21..a87e8d2 100644
--- a/test/tsan/lit.site.cfg.in
+++ b/test/tsan/lit.site.cfg.in
@@ -1,7 +1,9 @@
-## Autogenerated by LLVM/Clang configuration.
-# Do not edit!
+@LIT_SITE_CFG_IN_HEADER@
 
+config.name_suffix = "@TSAN_TEST_CONFIG_SUFFIX@"
 config.has_libcxx = @TSAN_HAS_LIBCXX@
+config.target_cflags = "@TSAN_TEST_TARGET_CFLAGS@"
+config.target_arch = "@TSAN_TEST_TARGET_ARCH@"
 
 # Load common config for all compiler-rt lit tests.
 lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
diff --git a/test/tsan/longjmp.cc b/test/tsan/longjmp.cc
index d642067..a8abca6 100644
--- a/test/tsan/longjmp.cc
+++ b/test/tsan/longjmp.cc
@@ -1,8 +1,5 @@
 // RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
 
-// Longjmp assembly has not been implemented for mips64 yet
-// XFAIL: mips64
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <setjmp.h>
@@ -14,11 +11,11 @@
 int main() {
   jmp_buf env;
   if (setjmp(env) == 42) {
-    printf("JUMPED\n");
+    fprintf(stderr, "JUMPED\n");
     return 0;
   }
   foo(env);
-  printf("FAILED\n");
+  fprintf(stderr, "FAILED\n");
   return 0;
 }
 
diff --git a/test/tsan/longjmp2.cc b/test/tsan/longjmp2.cc
index eee423d..d396f3f 100644
--- a/test/tsan/longjmp2.cc
+++ b/test/tsan/longjmp2.cc
@@ -1,8 +1,5 @@
 // RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
 
-// Longjmp assembly has not been implemented for mips64 yet
-// XFAIL: mips64
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <setjmp.h>
@@ -16,11 +13,11 @@
   sigjmp_buf env;
   printf("env=%p\n", env);
   if (sigsetjmp(env, 1) == 42) {
-    printf("JUMPED\n");
+    fprintf(stderr, "JUMPED\n");
     return 0;
   }
   foo(env);
-  printf("FAILED\n");
+  fprintf(stderr, "FAILED\n");
   return 0;
 }
 
diff --git a/test/tsan/longjmp3.cc b/test/tsan/longjmp3.cc
index 79965c4..842cf26 100644
--- a/test/tsan/longjmp3.cc
+++ b/test/tsan/longjmp3.cc
@@ -1,7 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-
-// Longjmp assembly has not been implemented for mips64 yet
-// XFAIL: mips64
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 
 #include <pthread.h>
 #include <stdio.h>
@@ -34,7 +31,7 @@
     return;
   }
   foo(env);
-  printf("FAILED\n");
+  fprintf(stderr, "FAILED\n");
 }
 
 int main() {
diff --git a/test/tsan/longjmp4.cc b/test/tsan/longjmp4.cc
index c858399..4c2fbf0 100644
--- a/test/tsan/longjmp4.cc
+++ b/test/tsan/longjmp4.cc
@@ -1,7 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-
-// Longjmp assembly has not been implemented for mips64 yet
-// XFAIL: mips64
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 
 #include <pthread.h>
 #include <stdio.h>
@@ -37,7 +34,7 @@
     return;
   }
   foo(env);
-  printf("FAILED\n");
+  fprintf(stderr, "FAILED\n");
 }
 
 int main() {
diff --git a/test/tsan/lots_of_threads.c b/test/tsan/lots_of_threads.c
new file mode 100644
index 0000000..eef9b1c
--- /dev/null
+++ b/test/tsan/lots_of_threads.c
@@ -0,0 +1,30 @@
+// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+void *thr(void *arg) {
+  // Create a sync object on stack, so there is something to free on thread end.
+  volatile int x;
+  __atomic_fetch_add(&x, 1, __ATOMIC_SEQ_CST);
+  barrier_wait(&barrier);
+  return 0;
+}
+
+int main() {
+  const int kThreads = 10;
+  barrier_init(&barrier, kThreads + 1);
+  pthread_t t[kThreads];
+  pthread_attr_t attr;
+  pthread_attr_init(&attr);
+  pthread_attr_setstacksize(&attr, 16 << 20);
+  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+  for (int i = 0; i < kThreads; i++)
+    pthread_create(&t[i], &attr, thr, 0);
+  pthread_attr_destroy(&attr);
+  barrier_wait(&barrier);
+  sleep(1);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK: DONE
+
diff --git a/test/tsan/malloc_overflow.cc b/test/tsan/malloc_overflow.cc
index b2f9b0f..3db4129 100644
--- a/test/tsan/malloc_overflow.cc
+++ b/test/tsan/malloc_overflow.cc
@@ -6,17 +6,17 @@
 int main() {
   void *p = malloc((size_t)-1);
   if (p != 0)
-    printf("FAIL malloc(-1) = %p\n", p);
+    fprintf(stderr, "FAIL malloc(-1) = %p\n", p);
   p = malloc((size_t)-1 / 2);
   if (p != 0)
-    printf("FAIL malloc(-1/2) = %p\n", p);
+    fprintf(stderr, "FAIL malloc(-1/2) = %p\n", p);
   p = calloc((size_t)-1, (size_t)-1);
   if (p != 0)
-    printf("FAIL calloc(-1, -1) = %p\n", p);
+    fprintf(stderr, "FAIL calloc(-1, -1) = %p\n", p);
   p = calloc((size_t)-1 / 2, (size_t)-1 / 2);
   if (p != 0)
-    printf("FAIL calloc(-1/2, -1/2) = %p\n", p);
-  printf("OK\n");
+    fprintf(stderr, "FAIL calloc(-1/2, -1/2) = %p\n", p);
+  fprintf(stderr, "OK\n");
 }
 
 // CHECK-NOT: FAIL
diff --git a/test/tsan/map32bit.cc b/test/tsan/map32bit.cc
index 0411f29..cec91a4 100644
--- a/test/tsan/map32bit.cc
+++ b/test/tsan/map32bit.cc
@@ -8,7 +8,7 @@
 // https://github.com/google/sanitizers/issues/412
 
 // MAP_32BIT flag for mmap is supported only for x86_64.
-// XFAIL: mips64
+// XFAIL: target-is-mips64
 // XFAIL: aarch64
 // XFAIL: powerpc64
 
diff --git a/test/tsan/mmap_stress.cc b/test/tsan/mmap_stress.cc
index e01e7e9..f272779 100644
--- a/test/tsan/mmap_stress.cc
+++ b/test/tsan/mmap_stress.cc
@@ -47,6 +47,10 @@
 }
 
 int main() {
+  // This test is flaky on several builders:
+  // https://groups.google.com/d/msg/llvm-dev/KUFPdLhBN3Q/L75rwW9xBgAJ
+  // The cause is unknown (lit hides test output on failures).
+#if 0
   pthread_t th[4];
   for (int i = 0; i < 4; i++) {
     if (pthread_create(&th[i], 0, Worker, 0))
@@ -56,6 +60,7 @@
     if (pthread_join(th[i], 0))
       exit(printf("pthread_join failed: %d\n", errno));
   }
+#endif
   fprintf(stderr, "DONE\n");
 }
 
diff --git a/test/tsan/mutex_annotations.cc b/test/tsan/mutex_annotations.cc
new file mode 100644
index 0000000..59fa452
--- /dev/null
+++ b/test/tsan/mutex_annotations.cc
@@ -0,0 +1,49 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+// Test that a linker-initialized mutex can be created/destroyed while in use.
+
+// Stub for testing, just invokes annotations.
+// Meant to be synchronized externally with test barrier.
+class Mutex {
+ public:
+  void Create(bool linker_initialized = false) {
+    if (linker_initialized)
+      ANNOTATE_RWLOCK_CREATE_STATIC(&state_);
+    else
+      ANNOTATE_RWLOCK_CREATE(&state_);
+  }
+
+  void Destroy() {
+    ANNOTATE_RWLOCK_DESTROY(&state_);
+  }
+
+  void Lock() {
+    ANNOTATE_RWLOCK_ACQUIRED(&state_, true);
+  }
+
+  void Unlock() {
+    ANNOTATE_RWLOCK_RELEASED(&state_, true);
+  }
+
+ private:
+  long long state_;
+};
+
+int main() {
+  Mutex m;
+
+  m.Lock();
+  m.Create(true);
+  m.Unlock();
+
+  m.Lock();
+  m.Destroy();
+  m.Unlock();
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer:
+// CHECK: DONE
diff --git a/test/tsan/printf-1.c b/test/tsan/printf-1.c
index 9116c95..c8414f8 100644
--- a/test/tsan/printf-1.c
+++ b/test/tsan/printf-1.c
@@ -1,6 +1,6 @@
 // RUN: %clang_tsan -O2 %s -o %t
-// RUN: ASAN_OPTIONS=check_printf=1 %run %t 2>&1 | FileCheck %s
-// RUN: ASAN_OPTIONS=check_printf=0 %run %t 2>&1 | FileCheck %s
+// RUN: %env_tsan_opts=check_printf=1 %run %t 2>&1 | FileCheck %s
+// RUN: %env_tsan_opts=check_printf=0 %run %t 2>&1 | FileCheck %s
 // RUN: %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
diff --git a/test/tsan/pthread_key.cc b/test/tsan/pthread_key.cc
new file mode 100644
index 0000000..798caa4
--- /dev/null
+++ b/test/tsan/pthread_key.cc
@@ -0,0 +1,39 @@
+// RUN: %clangxx_tsan -O1 %s -DBUILD_SO -fPIC -shared -o %t-so.so
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+// Extracted from:
+// https://bugs.chromium.org/p/v8/issues/detail?id=4995
+
+#include "test.h"
+
+void* thr(void* arg) {
+  const int N = 32;
+  pthread_key_t keys_[N];
+  for (size_t i = 0; i < N; ++i) {
+    int err = pthread_key_create(&keys_[i], 0);
+    if (err) {
+      fprintf(stderr, "pthread_key_create failed with %d\n", err);
+      exit(1);
+    }
+  }
+  for (size_t i = 0; i < N; i++)
+    pthread_setspecific(keys_[i], (void*)(long)i);
+  for (size_t i = 0; i < N; i++)
+    pthread_key_delete(keys_[i]);
+  return 0;
+}
+
+int main() {
+  for (int i = 0; i < 10; i++) {
+    pthread_t th;
+    pthread_create(&th, 0, thr, 0);
+    pthread_join(th, 0);
+  }
+  pthread_t th[2];
+  pthread_create(&th[0], 0, thr, 0);
+  pthread_create(&th[1], 0, thr, 0);
+  pthread_join(th[0], 0);
+  pthread_join(th[1], 0);
+  fprintf(stderr, "DONE\n");
+  // CHECK: DONE
+}
diff --git a/test/tsan/race_on_mutex.c b/test/tsan/race_on_mutex.c
index d998fdc..c7f5e06 100644
--- a/test/tsan/race_on_mutex.c
+++ b/test/tsan/race_on_mutex.c
@@ -1,13 +1,25 @@
 // RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
-// This test fails on powerpc64 (VMA=46).
-// The size of the write reported by Tsan for T1 is 8 instead of 1.
-// XFAIL: powerpc64-unknown-linux-gnu
 #include "test.h"
 
 pthread_mutex_t Mtx;
 int Global;
 
+void *Thread2(void *x) {
+  barrier_wait(&barrier);
+// CHECK:      WARNING: ThreadSanitizer: data race
+// CHECK-NEXT:   Atomic read of size 1 at {{.*}} by thread T2:
+// CHECK-NEXT:     #0 pthread_mutex_lock
+// CHECK-NEXT:     #1 Thread2{{.*}} {{.*}}race_on_mutex.c:[[@LINE+1]]{{(:3)?}} ({{.*}})
+  pthread_mutex_lock(&Mtx);
+  Global = 43;
+  pthread_mutex_unlock(&Mtx);
+  return NULL;
+}
+
 void *Thread1(void *x) {
+// CHECK:        Previous write of size {{[0-9]+}} at {{.*}} by thread T1:
+// CHECK:          #{{[0-9]+}} {{.*}}pthread_mutex_init {{.*}} ({{.*}})
+// CHECK-NEXT:     #{{[0-9]+}} Thread1{{.*}} {{.*}}race_on_mutex.c:[[@LINE+1]]{{(:3)?}} ({{.*}})
   pthread_mutex_init(&Mtx, 0);
   pthread_mutex_lock(&Mtx);
   Global = 42;
@@ -16,14 +28,6 @@
   return NULL;
 }
 
-void *Thread2(void *x) {
-  barrier_wait(&barrier);
-  pthread_mutex_lock(&Mtx);
-  Global = 43;
-  pthread_mutex_unlock(&Mtx);
-  return NULL;
-}
-
 int main() {
   barrier_init(&barrier, 2);
   pthread_t t[2];
@@ -34,11 +38,3 @@
   pthread_mutex_destroy(&Mtx);
   return 0;
 }
-
-// CHECK:      WARNING: ThreadSanitizer: data race
-// CHECK-NEXT:   Atomic read of size 1 at {{.*}} by thread T2:
-// CHECK-NEXT:     #0 pthread_mutex_lock
-// CHECK-NEXT:     #1 Thread2{{.*}} {{.*}}race_on_mutex.c:21{{(:3)?}} ({{.*}})
-// CHECK:        Previous write of size 1 at {{.*}} by thread T1:
-// CHECK-NEXT:     #0 pthread_mutex_init {{.*}} ({{.*}})
-// CHECK-NEXT:     #1 Thread1{{.*}} {{.*}}race_on_mutex.c:11{{(:3)?}} ({{.*}})
diff --git a/test/tsan/race_on_speculative_load.cc b/test/tsan/race_on_speculative_load.cc
index dd40dae..5a9d698 100644
--- a/test/tsan/race_on_speculative_load.cc
+++ b/test/tsan/race_on_speculative_load.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && %run %t | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
 // Regtest for https://github.com/google/sanitizers/issues/447
 // This is a correct program and tsan should not report a race.
 #include "test.h"
@@ -24,7 +24,7 @@
   g = 1;
   barrier_wait(&barrier);
   pthread_join(t, 0);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
   // CHECK-NOT: ThreadSanitizer: data race
   // CHECK: PASS
 }
diff --git a/test/tsan/setuid.c b/test/tsan/setuid.c
index bc9c8ca..2d6b7c8 100644
--- a/test/tsan/setuid.c
+++ b/test/tsan/setuid.c
@@ -1,4 +1,10 @@
 // RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+//
+// setuid(0) hangs on powerpc64 big endian.  When this is fixed remove
+// the unsupported flag.
+// https://llvm.org/bugs/show_bug.cgi?id=25799
+//
+// UNSUPPORTED: powerpc64-unknown-linux-gnu
 #include "test.h"
 #include <sys/types.h>
 #include <unistd.h>
diff --git a/test/tsan/setuid2.c b/test/tsan/setuid2.c
index 9dbb657..3ea8978 100644
--- a/test/tsan/setuid2.c
+++ b/test/tsan/setuid2.c
@@ -1,4 +1,10 @@
 // RUN: %clang_tsan -O1 %s -o %t && %env_tsan_opts=flush_memory_ms=1:memory_limit_mb=1 %run %t 2>&1 | FileCheck %s
+//
+// setuid(0) hangs on powerpc64 big endian.  When this is fixed remove
+// the unsupported flag.
+// https://llvm.org/bugs/show_bug.cgi?id=25799
+//
+// UNSUPPORTED: powerpc64-unknown-linux-gnu
 #include "test.h"
 #include <sys/types.h>
 #include <unistd.h>
diff --git a/test/tsan/signal_block.cc b/test/tsan/signal_block.cc
new file mode 100644
index 0000000..dfd4259
--- /dev/null
+++ b/test/tsan/signal_block.cc
@@ -0,0 +1,60 @@
+// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+// Test that a signal is not delivered when it is blocked.
+
+#include "test.h"
+#include <semaphore.h>
+#include <signal.h>
+#include <errno.h>
+
+int stop;
+sig_atomic_t signal_blocked;
+
+void handler(int signum) {
+  if (signal_blocked) {
+    fprintf(stderr, "signal arrived when blocked\n");
+    exit(1);
+  }
+}
+
+void *thread(void *arg) {
+  sigset_t myset;
+  sigemptyset(&myset);
+  sigaddset(&myset, SIGUSR1);
+  while (!__atomic_load_n(&stop, __ATOMIC_RELAXED)) {
+    usleep(1);
+    if (pthread_sigmask(SIG_BLOCK, &myset, 0)) {
+      fprintf(stderr, "pthread_sigmask failed %d\n", errno);
+      exit(1);
+    }
+    signal_blocked = 1;
+    usleep(1);
+    signal_blocked = 0;
+    if (pthread_sigmask(SIG_UNBLOCK, &myset, 0)) {
+      fprintf(stderr, "pthread_sigmask failed %d\n", errno);
+      exit(1);
+    }
+  }
+  return 0;
+}
+
+int main(int argc, char** argv) {
+  struct sigaction act = {};
+  act.sa_handler = &handler;
+  if (sigaction(SIGUSR1, &act, 0)) {
+    fprintf(stderr, "sigaction failed %d\n", errno);
+    return 1;
+  }
+  pthread_t th;
+  pthread_create(&th, 0, thread, 0);
+  for (int i = 0; i < 100000; i++)
+    pthread_kill(th, SIGUSR1);
+  __atomic_store_n(&stop, 1, __ATOMIC_RELAXED);
+  pthread_join(th, 0);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: ThreadSanitizer CHECK
+// CHECK-NOT: WARNING: ThreadSanitizer:
+// CHECK: DONE
diff --git a/test/tsan/signal_longjmp.cc b/test/tsan/signal_longjmp.cc
index 45e2462..f9fa4f5 100644
--- a/test/tsan/signal_longjmp.cc
+++ b/test/tsan/signal_longjmp.cc
@@ -3,8 +3,6 @@
 // Test case for longjumping out of signal handler:
 // https://github.com/google/sanitizers/issues/482
 
-// Longjmp assembly has not been implemented for mips64 yet
-// XFAIL: mips64
 // This test fails on powerpc64 BE (VMA=44), a segmentation fault
 // error happens at the second assignment
 // "((volatile int *volatile)mem)[1] = 1".
diff --git a/test/tsan/signal_sync2.cc b/test/tsan/signal_sync2.cc
new file mode 100644
index 0000000..163f206
--- /dev/null
+++ b/test/tsan/signal_sync2.cc
@@ -0,0 +1,77 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+// UNSUPPORTED: darwin
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+
+// Test synchronization in signal handled within IgnoreSync region.
+
+extern "C" void AnnotateIgnoreSyncBegin(const char *f, int l);
+extern "C" void AnnotateIgnoreSyncEnd(const char *f, int l);
+
+const int kSignalCount = 500;
+
+__thread int process_signals;
+int signals_handled;
+int done;
+int ready[kSignalCount];
+long long data[kSignalCount];
+
+static void handler(int sig) {
+  if (!__atomic_load_n(&process_signals, __ATOMIC_RELAXED))
+    return;
+  int pos = signals_handled++;
+  if (pos >= kSignalCount)
+    return;
+  data[pos] = pos;
+  __atomic_store_n(&ready[pos], 1, __ATOMIC_RELEASE);
+}
+
+static void* thr(void *p) {
+  AnnotateIgnoreSyncBegin(__FILE__, __LINE__);
+  __atomic_store_n(&process_signals, 1, __ATOMIC_RELAXED);
+  while (!__atomic_load_n(&done, __ATOMIC_RELAXED)) {
+  }
+  AnnotateIgnoreSyncEnd(__FILE__, __LINE__);
+  return 0;
+}
+
+int main() {
+  struct sigaction act = {};
+  act.sa_handler = handler;
+  if (sigaction(SIGPROF, &act, 0)) {
+    perror("sigaction");
+    exit(1);
+  }
+  itimerval t;
+  t.it_value.tv_sec = 0;
+  t.it_value.tv_usec = 10;
+  t.it_interval = t.it_value;
+  if (setitimer(ITIMER_PROF, &t, 0)) {
+    perror("setitimer");
+    exit(1);
+  }
+
+  pthread_t th;
+  pthread_create(&th, 0, thr, 0);
+  for (int pos = 0; pos < kSignalCount; pos++) {
+    while (__atomic_load_n(&ready[pos], __ATOMIC_ACQUIRE) == 0) {
+    }
+    if (data[pos] != pos) {
+      printf("at pos %d, expect %d, got %lld\n", pos, pos, data[pos]);
+      exit(1);
+    }
+  }
+  __atomic_store_n(&done, 1, __ATOMIC_RELAXED);
+  pthread_join(th, 0);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer:
+// CHECK: DONE
diff --git a/test/tsan/static_init1.cc b/test/tsan/static_init1.cc
index 3e5fb14..3e6e4f9 100644
--- a/test/tsan/static_init1.cc
+++ b/test/tsan/static_init1.cc
@@ -21,7 +21,7 @@
   pthread_create(&t[1], 0, Thread, 0);
   pthread_join(t[0], 0);
   pthread_join(t[1], 0);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/static_init2.cc b/test/tsan/static_init2.cc
index 667aed1..ca2300a 100644
--- a/test/tsan/static_init2.cc
+++ b/test/tsan/static_init2.cc
@@ -27,7 +27,7 @@
   pthread_create(&t[1], 0, Thread, 0);
   pthread_join(t[0], 0);
   pthread_join(t[1], 0);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/static_init4.cc b/test/tsan/static_init4.cc
index 85835a2..c8da783 100644
--- a/test/tsan/static_init4.cc
+++ b/test/tsan/static_init4.cc
@@ -31,7 +31,7 @@
   pthread_create(&t[1], 0, Thread1, 0);
   pthread_join(t[0], 0);
   pthread_join(t[1], 0);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/static_init5.cc b/test/tsan/static_init5.cc
index 961e3a3..b334981 100644
--- a/test/tsan/static_init5.cc
+++ b/test/tsan/static_init5.cc
@@ -36,7 +36,7 @@
   pthread_create(&t[1], 0, Thread1, 0);
   pthread_join(t[0], 0);
   pthread_join(t[1], 0);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/static_init6.cc b/test/tsan/static_init6.cc
index 77253ea..fd22e0a 100644
--- a/test/tsan/static_init6.cc
+++ b/test/tsan/static_init6.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -static-libstdc++ -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -stdlib=libstdc++ -static-libstdc++ -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
 #include <pthread.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -36,7 +36,7 @@
   pthread_create(&t[1], 0, Thread1, 0);
   pthread_join(t[0], 0);
   pthread_join(t[1], 0);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/sunrpc.cc b/test/tsan/sunrpc.cc
index 579816d..5cfb534 100644
--- a/test/tsan/sunrpc.cc
+++ b/test/tsan/sunrpc.cc
@@ -19,7 +19,7 @@
   pthread_create(&th[1], 0, thr, 0);
   pthread_join(th[0], 0);
   pthread_join(th[1], 0);
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
   // CHECK: DONE
   return 0;
 }
diff --git a/test/tsan/suppressions_global.cc b/test/tsan/suppressions_global.cc
index 8928162..282d261 100644
--- a/test/tsan/suppressions_global.cc
+++ b/test/tsan/suppressions_global.cc
@@ -20,7 +20,7 @@
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
   return 0;
 }
 
diff --git a/test/tsan/suppressions_race.cc b/test/tsan/suppressions_race.cc
index 7a88434..d0aeeda 100644
--- a/test/tsan/suppressions_race.cc
+++ b/test/tsan/suppressions_race.cc
@@ -22,7 +22,7 @@
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
   return 0;
 }
 
diff --git a/test/tsan/suppressions_race2.cc b/test/tsan/suppressions_race2.cc
index b6566a8..6f8ca73 100644
--- a/test/tsan/suppressions_race2.cc
+++ b/test/tsan/suppressions_race2.cc
@@ -22,7 +22,7 @@
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
   return 0;
 }
 
diff --git a/test/tsan/test.h b/test/tsan/test.h
index a681daa..e3affdc 100644
--- a/test/tsan/test.h
+++ b/test/tsan/test.h
@@ -67,3 +67,38 @@
   return (unsigned long long)t.tv_sec * 1000000000ull + t.tv_nsec;
 }
 #endif
+
+//The const kPCInc must be in sync with StackTrace::GetPreviousInstructionPc
+#if defined(__powerpc64__)
+// PCs are always 4 byte aligned.
+const int kPCInc = 4;
+#elif defined(__sparc__) || defined(__mips__)
+const int kPCInc = 8;
+#else
+const int kPCInc = 1;
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void AnnotateRWLockCreate(const char *f, int l, void *m);
+void AnnotateRWLockCreateStatic(const char *f, int l, void *m);
+void AnnotateRWLockDestroy(const char *f, int l, void *m);
+void AnnotateRWLockAcquired(const char *f, int l, void *m, long is_w);
+void AnnotateRWLockReleased(const char *f, int l, void *m, long is_w);
+
+#ifdef __cplusplus
+}
+#endif
+
+#define ANNOTATE_RWLOCK_CREATE(m) \
+    AnnotateRWLockCreate(__FILE__, __LINE__, m)
+#define ANNOTATE_RWLOCK_CREATE_STATIC(m) \
+    AnnotateRWLockCreateStatic(__FILE__, __LINE__, m)
+#define ANNOTATE_RWLOCK_DESTROY(m) \
+    AnnotateRWLockDestroy(__FILE__, __LINE__, m)
+#define ANNOTATE_RWLOCK_ACQUIRED(m, is_w) \
+    AnnotateRWLockAcquired(__FILE__, __LINE__, m, is_w)
+#define ANNOTATE_RWLOCK_RELEASED(m, is_w) \
+    AnnotateRWLockReleased(__FILE__, __LINE__, m, is_w)
diff --git a/test/tsan/thread_detach.c b/test/tsan/thread_detach.c
index 802d8de..2a95742 100644
--- a/test/tsan/thread_detach.c
+++ b/test/tsan/thread_detach.c
@@ -12,7 +12,7 @@
   pthread_create(&t, 0, Thread, 0);
   barrier_wait(&barrier);
   pthread_detach(t);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
   return 0;
 }
 
diff --git a/test/tsan/thread_detach2.c b/test/tsan/thread_detach2.c
index 8133980..5ee94e9 100644
--- a/test/tsan/thread_detach2.c
+++ b/test/tsan/thread_detach2.c
@@ -20,7 +20,7 @@
   pthread_create(&t, 0, Thread, 0);
   pthread_detach(t);
   barrier_wait(&barrier);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
   return 0;
 }
 
diff --git a/test/tsan/thread_leak.c b/test/tsan/thread_leak.c
index 9b850dd..9b42b16 100644
--- a/test/tsan/thread_leak.c
+++ b/test/tsan/thread_leak.c
@@ -10,7 +10,7 @@
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
   pthread_join(t, 0);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
   return 0;
 }
 
diff --git a/test/tsan/thread_leak2.c b/test/tsan/thread_leak2.c
index fc2942b..c3cac7a 100644
--- a/test/tsan/thread_leak2.c
+++ b/test/tsan/thread_leak2.c
@@ -10,7 +10,7 @@
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
   pthread_detach(t);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
   return 0;
 }
 
diff --git a/test/tsan/thread_leak4.c b/test/tsan/thread_leak4.c
index 1ebca58..1d0636f 100644
--- a/test/tsan/thread_leak4.c
+++ b/test/tsan/thread_leak4.c
@@ -9,7 +9,7 @@
 int main() {
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
   return 0;
 }
 
diff --git a/test/tsan/tsan-vs-gvn.cc b/test/tsan/tsan-vs-gvn.cc
index 950f5d3..efd81ef 100644
--- a/test/tsan/tsan-vs-gvn.cc
+++ b/test/tsan/tsan-vs-gvn.cc
@@ -31,7 +31,7 @@
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("PASS\n");
+  fprintf(stderr, "PASS\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/unaligned_norace.cc b/test/tsan/unaligned_norace.cc
index 94df1cf..7e360cf 100644
--- a/test/tsan/unaligned_norace.cc
+++ b/test/tsan/unaligned_norace.cc
@@ -77,7 +77,7 @@
   pthread_create(&th, 0, Thread, 0);
   Test(true);
   pthread_join(th, 0);
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer:
diff --git a/test/tsan/vfork.cc b/test/tsan/vfork.cc
index 5ae1dd1..2d669b3 100644
--- a/test/tsan/vfork.cc
+++ b/test/tsan/vfork.cc
@@ -26,7 +26,7 @@
   pipe(fds);
   int pid = vfork();
   if (pid < 0) {
-    printf("FAIL to vfork\n");
+    fprintf(stderr, "FAIL to vfork\n");
     exit(1);
   }
   if (pid == 0) {  // child
@@ -43,7 +43,7 @@
   pthread_create(&t[1], NULL, Thread2, NULL);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  printf("DONE\n");
+  fprintf(stderr, "DONE\n");
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/virtual_inheritance_compile_bug.cc b/test/tsan/virtual_inheritance_compile_bug.cc
index 7da581d..3b1e08b 100644
--- a/test/tsan/virtual_inheritance_compile_bug.cc
+++ b/test/tsan/virtual_inheritance_compile_bug.cc
@@ -10,6 +10,6 @@
 DDD::DDD()  { }
 int main() {
   DDD d;
-  printf("OK\n");
+  fprintf(stderr, "OK\n");
 }
 // CHECK: OK
diff --git a/test/ubsan/CMakeLists.txt b/test/ubsan/CMakeLists.txt
index 0938ea2..7b14a70 100644
--- a/test/ubsan/CMakeLists.txt
+++ b/test/ubsan/CMakeLists.txt
@@ -32,7 +32,11 @@
   add_ubsan_testsuite("Standalone" ubsan ${arch})
 
   if(COMPILER_RT_HAS_ASAN AND ";${ASAN_SUPPORTED_ARCH};" MATCHES ";${arch};")
-    add_ubsan_testsuite("AddressSanitizer" asan ${arch})
+    # TODO(wwchrome): Re-enable ubsan for asan win 64-bit when ready.
+    # Disable ubsan with AddressSanitizer tests for Windows 64-bit.
+    if(NOT OS_NAME MATCHES "Windows" OR CMAKE_SIZEOF_VOID_P EQUAL 4)
+      add_ubsan_testsuite("AddressSanitizer" asan ${arch})
+    endif()
   endif()
   if(COMPILER_RT_HAS_MSAN AND ";${MSAN_SUPPORTED_ARCH};" MATCHES ";${arch};")
     add_ubsan_testsuite("MemorySanitizer" msan ${arch})
@@ -45,4 +49,4 @@
 add_lit_testsuite(check-ubsan "Running UndefinedBehaviorSanitizer tests"
   ${UBSAN_TESTSUITES}
   DEPENDS ${UBSAN_TEST_DEPS})
-set_target_properties(check-ubsan PROPERTIES FOLDER "UBSan tests")
+set_target_properties(check-ubsan PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/ubsan/TestCases/Float/cast-overflow.cpp b/test/ubsan/TestCases/Float/cast-overflow.cpp
index 1551bf5..5f51553 100644
--- a/test/ubsan/TestCases/Float/cast-overflow.cpp
+++ b/test/ubsan/TestCases/Float/cast-overflow.cpp
@@ -61,18 +61,18 @@
 
   // Build a '+Inf'.
 #if BYTE_ORDER == LITTLE_ENDIAN
-  char InfVal[] = { 0x00, 0x00, 0x80, 0x7f };
+  unsigned char InfVal[] = { 0x00, 0x00, 0x80, 0x7f };
 #else
-  char InfVal[] = { 0x7f, 0x80, 0x00, 0x00 };
+  unsigned char InfVal[] = { 0x7f, 0x80, 0x00, 0x00 };
 #endif
   float Inf;
   memcpy(&Inf, InfVal, 4);
 
   // Build a 'NaN'.
 #if BYTE_ORDER == LITTLE_ENDIAN
-  char NaNVal[] = { 0x01, 0x00, 0x80, 0x7f };
+  unsigned char NaNVal[] = { 0x01, 0x00, 0x80, 0x7f };
 #else
-  char NaNVal[] = { 0x7f, 0x80, 0x00, 0x01 };
+  unsigned char NaNVal[] = { 0x7f, 0x80, 0x00, 0x01 };
 #endif
   float NaN;
   memcpy(&NaN, NaNVal, 4);
diff --git a/test/ubsan/TestCases/Integer/suppressions.cpp b/test/ubsan/TestCases/Integer/suppressions.cpp
index 5203efd..a9e6601 100644
--- a/test/ubsan/TestCases/Integer/suppressions.cpp
+++ b/test/ubsan/TestCases/Integer/suppressions.cpp
@@ -1,11 +1,9 @@
-// XFAIL: win32
-// This test fails on Windows if the environment was set up by SetEnv.cmd from
-// the Windows SDK.  If it's set up via vcvarsall.bat, it passes.
-// FIXME: Figure out how to make this reliably pass on Windows.
-// test/asan/TestCases/suppressions-interceptor.cc will need the same fix.
-
 // RUN: %clangxx -fsanitize=integer -g0 %s -o %t
 
+// Suppression by symbol name (unsigned-integer-overflow:do_overflow below)
+// requires the compiler-rt runtime to be able to symbolize stack addresses.
+// REQUIRES: can-symbolize
+
 // Fails without any suppression.
 // RUN: %env_ubsan_opts=halt_on_error=1 not %run %t 2>&1 | FileCheck %s
 
@@ -36,4 +34,3 @@
   do_overflow();
   return 0;
 }
-
diff --git a/test/ubsan/TestCases/Misc/coverage-levels.cc b/test/ubsan/TestCases/Misc/coverage-levels.cc
index 046d886..f96b487 100644
--- a/test/ubsan/TestCases/Misc/coverage-levels.cc
+++ b/test/ubsan/TestCases/Misc/coverage-levels.cc
@@ -3,18 +3,22 @@
 // FIXME: Port the environment variable logic below for the lit shell.
 // REQUIRES: shell
 //
-// RUN: mkdir -p %T/coverage-levels
+// RUN: rm -rf %T/coverage-levels && mkdir %T/coverage-levels
 // RUN: %clangxx -fsanitize=shift                        -DGOOD_SHIFT=1 -O1 -fsanitize-coverage=func  %s -o %t
-// RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir=%T/coverage-levels %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_NOWARN
+// RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir='"%T/coverage-levels"' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_NOWARN
 // RUN: %clangxx -fsanitize=undefined                    -DGOOD_SHIFT=1 -O1 -fsanitize-coverage=func  %s -o %t
-// RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir=%T/coverage-levels %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_NOWARN
+// RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir='"%T/coverage-levels"' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_NOWARN
+
+// Also works without any sanitizer.
+// RUN: %clangxx                                         -DGOOD_SHIFT=1 -O1 -fsanitize-coverage=func  %s -o %t
+// RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir='"%T/coverage-levels"' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_NOWARN
 
 // RUN: %clangxx -fsanitize=shift -O1 -fsanitize-coverage=func  %s -o %t
-// RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir=%T/coverage-levels %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_WARN
+// RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir='"%T/coverage-levels"' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1 --check-prefix=CHECK_WARN
 // RUN: %clangxx -fsanitize=shift -O1 -fsanitize-coverage=bb  %s -o %t
-// RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir=%T/coverage-levels %run %t 2>&1 | FileCheck %s --check-prefix=CHECK2 --check-prefix=CHECK_WARN
+// RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir='"%T/coverage-levels"' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK2 --check-prefix=CHECK_WARN
 // RUN: %clangxx -fsanitize=shift -O1 -fsanitize-coverage=edge  %s -o %t
-// RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir=%T/coverage-levels %run %t 2>&1 | FileCheck %s --check-prefix=CHECK3 --check-prefix=CHECK_WARN
+// RUN: %env_ubsan_opts=coverage=1:verbosity=1:coverage_dir='"%T/coverage-levels"' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK3 --check-prefix=CHECK_WARN
 
 // Coverage is not yet implemented in TSan.
 // XFAIL: ubsan-tsan
@@ -37,5 +41,5 @@
 // FIXME: Currently, ubsan with -fno-sanitize-recover and w/o asan will fail
 // to dump coverage.
 // CHECK1:  1 PCs written
-// CHECK2:  3 PCs written
-// CHECK3:  3 PCs written
+// CHECK2:  2 PCs written
+// CHECK3:  2 PCs written
diff --git a/test/ubsan/TestCases/TypeCheck/misaligned.cpp b/test/ubsan/TestCases/TypeCheck/misaligned.cpp
index 4307167..35b1ec3 100644
--- a/test/ubsan/TestCases/TypeCheck/misaligned.cpp
+++ b/test/ubsan/TestCases/TypeCheck/misaligned.cpp
@@ -77,7 +77,7 @@
     return s->k && 0;
 
   case 'f':
-    // CHECK-MEMFUN: misaligned.cpp:[[@LINE+4]]{{(:12)?}}: runtime error: member call on misaligned address [[PTR:0x[0-9a-f]*]] for type 'S', which requires 4 byte alignment
+    // CHECK-MEMFUN: misaligned.cpp:[[@LINE+4]]{{(:15)?}}: runtime error: member call on misaligned address [[PTR:0x[0-9a-f]*]] for type 'S', which requires 4 byte alignment
     // CHECK-MEMFUN-NEXT: [[PTR]]: note: pointer points here
     // CHECK-MEMFUN-NEXT: {{^ 00 00 00 01 02 03 04  05}}
     // CHECK-MEMFUN-NEXT: {{^             \^}}
diff --git a/test/ubsan/TestCases/TypeCheck/null.cpp b/test/ubsan/TestCases/TypeCheck/null.cpp
index 1e17955..b1cba83 100644
--- a/test/ubsan/TestCases/TypeCheck/null.cpp
+++ b/test/ubsan/TestCases/TypeCheck/null.cpp
@@ -32,7 +32,7 @@
     // CHECK-MEMBER: null.cpp:[[@LINE+1]]:15: runtime error: member access within null pointer of type 'S'
     return s->k;
   case 'f':
-    // CHECK-MEMFUN: null.cpp:[[@LINE+1]]:12: runtime error: member call on null pointer of type 'S'
+    // CHECK-MEMFUN: null.cpp:[[@LINE+1]]:15: runtime error: member call on null pointer of type 'S'
     return s->f();
   }
 }
diff --git a/test/ubsan/TestCases/TypeCheck/vptr-corrupted-vtable-itanium.cpp b/test/ubsan/TestCases/TypeCheck/vptr-corrupted-vtable-itanium.cpp
new file mode 100644
index 0000000..37ffe5b
--- /dev/null
+++ b/test/ubsan/TestCases/TypeCheck/vptr-corrupted-vtable-itanium.cpp
@@ -0,0 +1,41 @@
+// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -g %s -O3 -o %t
+// RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-CORRUPTED-VTABLE --strict-whitespace
+
+// UNSUPPORTED: win32
+// REQUIRES: stable-runtime, cxxabi
+#include <cstddef>
+
+#include <typeinfo>
+
+struct S {
+  S() {}
+  ~S() {}
+  virtual int v() { return 0; }
+};
+
+// See the proper definition in ubsan_type_hash_itanium.cc
+struct VtablePrefix {
+  signed long Offset;
+  std::type_info *TypeInfo;
+};
+
+int main(int argc, char **argv) {
+  // Test that we don't crash on corrupted vtable when
+  // offset is too large or too small.
+  S Obj;
+  void *Ptr = &Obj;
+  VtablePrefix* RealPrefix = reinterpret_cast<VtablePrefix*>(
+      *reinterpret_cast<void**>(Ptr)) - 1;
+
+  VtablePrefix Prefix[2];
+  Prefix[0].Offset = 1<<21; // Greater than VptrMaxOffset
+  Prefix[0].TypeInfo = RealPrefix->TypeInfo;
+
+  // Hack Vtable ptr for Obj.
+  *reinterpret_cast<void**>(Ptr) = static_cast<void*>(&Prefix[1]);
+
+  // CHECK-CORRUPTED-VTABLE: vptr-corrupted-vtable-itanium.cpp:[[@LINE+3]]:16: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'S'
+  // CHECK-CORRUPTED-VTABLE-NEXT: [[PTR]]: note: object has a possibly invalid vptr: abs(offset to top) too big
+  S* Ptr2 = reinterpret_cast<S*>(Ptr);
+  return Ptr2->v();
+}
diff --git a/test/ubsan/TestCases/TypeCheck/vptr-non-unique-typeinfo.cpp b/test/ubsan/TestCases/TypeCheck/vptr-non-unique-typeinfo.cpp
new file mode 100644
index 0000000..8ab7bfc
--- /dev/null
+++ b/test/ubsan/TestCases/TypeCheck/vptr-non-unique-typeinfo.cpp
@@ -0,0 +1,25 @@
+// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -I%p/Helpers -g %s -fPIC -shared -o %t-lib.so -DBUILD_SO
+// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -I%p/Helpers -g %s -O3 -o %t %t-lib.so
+// RUN: %run %t
+//
+// REQUIRES: cxxabi
+
+struct X {
+  virtual ~X() {}
+};
+X *libCall();
+
+#ifdef BUILD_SO
+
+X *libCall() {
+  return new X;
+}
+
+#else
+
+int main() {
+  X *px = libCall();
+  delete px;
+}
+
+#endif
diff --git a/test/ubsan/TestCases/TypeCheck/vptr.cpp b/test/ubsan/TestCases/TypeCheck/vptr.cpp
index 4a1fa8d..86b646d 100644
--- a/test/ubsan/TestCases/TypeCheck/vptr.cpp
+++ b/test/ubsan/TestCases/TypeCheck/vptr.cpp
@@ -109,7 +109,7 @@
     for (int i = 0; i < 2; i++) {
       // Check that the first iteration ("S") succeeds, while the second ("V") fails.
       p = reinterpret_cast<T*>((i == 0) ? new S : new V);
-      // CHECK-LOC-SUPPRESS: vptr.cpp:[[@LINE+5]]:7: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T'
+      // CHECK-LOC-SUPPRESS: vptr.cpp:[[@LINE+5]]:10: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T'
       // CHECK-LOC-SUPPRESS-NEXT: [[PTR]]: note: object is of type 'V'
       // CHECK-LOC-SUPPRESS-NEXT: {{^ .. .. .. ..  .. .. .. .. .. .. .. ..  }}
       // CHECK-LOC-SUPPRESS-NEXT: {{^              \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}}
@@ -135,7 +135,7 @@
     // CHECK-Linux-NULL-MEMBER: #0 {{.*}}access_p{{.*}}vptr.cpp:[[@LINE-7]]
 
   case 'f':
-    // CHECK-MEMFUN: vptr.cpp:[[@LINE+6]]:12: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T'
+    // CHECK-MEMFUN: vptr.cpp:[[@LINE+6]]:15: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T'
     // CHECK-MEMFUN-NEXT: [[PTR]]: note: object is of type [[DYN_TYPE:'S'|'U']]
     // CHECK-MEMFUN-NEXT: {{^ .. .. .. ..  .. .. .. .. .. .. .. ..  }}
     // CHECK-MEMFUN-NEXT: {{^              \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}}
@@ -144,7 +144,7 @@
     return p->g();
 
   case 'o':
-    // CHECK-OFFSET: vptr.cpp:[[@LINE+6]]:12: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'U'
+    // CHECK-OFFSET: vptr.cpp:[[@LINE+6]]:37: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'U'
     // CHECK-OFFSET-NEXT: 0x{{[0-9a-f]*}}: note: object is base class subobject at offset {{8|16}} within object of type [[DYN_TYPE:'U']]
     // CHECK-OFFSET-NEXT: {{^ .. .. .. ..  .. .. .. .. .. .. .. ..  .. .. .. .. .. .. .. ..  .. .. .. .. .. .. .. ..  }}
     // CHECK-OFFSET-NEXT: {{^              \^                        (                         ~~~~~~~~~~~~)?~~~~~~~~~~~ *$}}
diff --git a/test/ubsan/lit.common.cfg b/test/ubsan/lit.common.cfg
index e508629..cd6d209 100644
--- a/test/ubsan/lit.common.cfg
+++ b/test/ubsan/lit.common.cfg
@@ -69,11 +69,6 @@
 if config.host_os not in ['Linux', 'Darwin', 'FreeBSD', 'Windows']:
   config.unsupported = True
 
-if config.host_os == 'Windows':
-  # We do not currently support enough of the Microsoft ABI for UBSan to work on
-  # Windows.
-  config.available_features.remove('cxxabi')
-
 # Allow tests to use REQUIRES=stable-runtime.  For use when you cannot use XFAIL
 # because the test hangs or fails on one configuration and not the other.
 if config.target_arch.startswith('arm') == False and config.target_arch != 'aarch64':
diff --git a/test/ubsan/lit.site.cfg.in b/test/ubsan/lit.site.cfg.in
index 1b06881..d824216 100644
--- a/test/ubsan/lit.site.cfg.in
+++ b/test/ubsan/lit.site.cfg.in
@@ -1,10 +1,12 @@
-# Load common config for all compiler-rt lit tests.
-lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+@LIT_SITE_CFG_IN_HEADER@
 
 # Tool-specific config options.
 config.ubsan_lit_test_mode = "@UBSAN_LIT_TEST_MODE@"
 config.target_cflags = "@UBSAN_TEST_TARGET_CFLAGS@"
 config.target_arch = "@UBSAN_TEST_TARGET_ARCH@"
 
+# Load common config for all compiler-rt lit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+
 # Load tool-specific config that would do the real work.
 lit_config.load_config(config, "@UBSAN_LIT_TESTS_DIR@/lit.common.cfg")
diff --git a/test/xray/CMakeLists.txt b/test/xray/CMakeLists.txt
new file mode 100644
index 0000000..49ceafe
--- /dev/null
+++ b/test/xray/CMakeLists.txt
@@ -0,0 +1,39 @@
+set(XRAY_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+
+set(XRAY_TESTSUITES)
+
+set(XRAY_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
+
+if(NOT COMPILER_RT_STANDALONE_BUILD AND COMPILER_RT_BUILD_XRAY AND
+   COMPILER_RT_HAS_XRAY)
+  list(APPEND XRAY_TEST_DEPS xray)
+endif()
+
+set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH})
+foreach(arch ${XRAY_TEST_ARCH})
+  set(XRAY_TEST_TARGET_ARCH ${arch})
+  string(TOLOWER "-${arch}-${OS_NAME}" XRAY_TEST_CONFIG_SUFFIX)
+
+  if(ANDROID OR ${arch} MATCHES "arm|aarch64")
+    # This is only true if we are cross-compiling.
+    # Build all tests with host compiler and use host tools.
+    set(XRAY_TEST_TARGET_CC ${COMPILER_RT_TEST_COMPILER})
+    set(XRAY_TEST_TARGET_CFLAGS ${COMPILER_RT_TEST_COMPILER_CFLAGS})
+  else()
+    get_target_flags_for_arch(${arch} XRAY_TEST_TARGET_CFLAGS)
+    string(REPLACE ";" " " XRAY_TEST_TARGET_CFLAGS "${XRAY_TEST_TARGET_CFLAGS}")
+  endif()
+
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config)
+
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg)
+  list(APPEND XRAY_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+endforeach()
+
+add_lit_testsuite(check-xray "Running the XRay tests"
+  ${XRAY_TESTSUITES}
+	DEPENDS ${XRAY_TEST_DEPS})
+set_target_properties(check-xray PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/xray/TestCases/Linux/patching-unpatching.cc b/test/xray/TestCases/Linux/patching-unpatching.cc
new file mode 100644
index 0000000..05478a4
--- /dev/null
+++ b/test/xray/TestCases/Linux/patching-unpatching.cc
@@ -0,0 +1,47 @@
+// Check that we can patch and un-patch on demand, and that logging gets invoked
+// appropriately.
+//
+// RUN: %clangxx_xray -fxray-instrument -std=c++11 %s -o %t
+// RUN: XRAY_OPTIONS="patch_premain=false" %run %t 2>&1 | FileCheck %s
+
+#include "xray/xray_interface.h"
+
+#include <cstdio>
+
+bool called = false;
+
+void test_handler(int32_t fid, XRayEntryType type) {
+  printf("called: %d, type=%d\n", fid, static_cast<int32_t>(type));
+  called = true;
+}
+
+[[clang::xray_always_instrument]] void always_instrument() {
+  printf("always instrumented called\n");
+}
+
+int main() {
+  __xray_set_handler(test_handler);
+  always_instrument();
+  // CHECK: always instrumented called
+  auto status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  always_instrument();
+  // CHECK-NEXT: called: {{.*}}, type=0
+  // CHECK-NEXT: always instrumented called
+  // CHECK-NEXT: called: {{.*}}, type=1
+  status = __xray_unpatch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  always_instrument();
+  // CHECK-NEXT: always instrumented called
+  status = __xray_patch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+  __xray_remove_handler();
+  always_instrument();
+  // CHECK-NEXT: always instrumented called
+  status = __xray_unpatch();
+  printf("patching status: %d\n", static_cast<int32_t>(status));
+  // CHECK-NEXT: patching status: 1
+}
diff --git a/test/xray/lit.cfg b/test/xray/lit.cfg
new file mode 100644
index 0000000..04e21f1
--- /dev/null
+++ b/test/xray/lit.cfg
@@ -0,0 +1,34 @@
+# -*- Python -*-
+
+import os
+
+# Setup config name.
+config.name = 'XRay' + config.name_suffix
+
+# Setup source root.
+config.test_source_root = os.path.dirname(__file__)
+
+# Setup default compiler flags use with -fxray-instrument option.
+clang_xray_cflags = (['-fxray-instrument', config.target_cflags])
+clang_xray_cxxflags = config.cxx_mode_flags + clang_xray_cflags
+
+
+def build_invocation(compile_flags):
+  return ' ' + ' '.join([config.clang] + compile_flags) + ' '
+
+# Setup substitutions.
+config.substitutions.append(
+    ('%clang ', build_invocation([config.target_cflags])))
+config.substitutions.append(
+    ('%clangxx ',
+     build_invocation(config.cxx_mode_flags + [config.target_cflags])))
+config.substitutions.append(
+    ('%clang_xray ', build_invocation(clang_xray_cflags)))
+config.substitutions.append(
+    ('%clangxx_xray', build_invocation(clang_xray_cxxflags)))
+
+# Default test suffixes.
+config.suffixes = ['.c', '.cc', '.cpp']
+
+if config.host_os not in ['Linux'] or config.host_arch.find('64') == -1:
+  config.unsupported = True
diff --git a/test/xray/lit.site.cfg.in b/test/xray/lit.site.cfg.in
new file mode 100644
index 0000000..ee0ffca
--- /dev/null
+++ b/test/xray/lit.site.cfg.in
@@ -0,0 +1,13 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+# Tool-specific config options.
+config.name_suffix = "@XRAY_TEST_CONFIG_SUFFIX@"
+config.xray_lit_source_dir = "@XRAY_LIT_SOURCE_DIR@"
+config.target_cflags = "@XRAY_TEST_TARGET_CFLAGS@"
+config.target_arch = "@XRAY_TEST_TARGET_ARCH@"
+
+# Load common config for all compiler-rt lit tests
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+
+# Load tool-specific config that would do the real work.
+lit_config.load_config(config, "@XRAY_LIT_SOURCE_DIR@/lit.cfg")
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index fe2c397..e5059bd 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -1,3 +1,6 @@
+# Needed for lit support
+include(AddLLVM)
+
 configure_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/lit.common.unit.configured.in
   ${CMAKE_CURRENT_BINARY_DIR}/lit.common.unit.configured)
diff --git a/unittests/lit.common.unit.configured.in b/unittests/lit.common.unit.configured.in
index 85bb91d..dd36985 100644
--- a/unittests/lit.common.unit.configured.in
+++ b/unittests/lit.common.unit.configured.in
@@ -1,9 +1,8 @@
-## Autogenerated by LLVM/Clang configuration.
-# Do not edit!
+@LIT_SITE_CFG_IN_HEADER@
 
 # Generic config options for all compiler-rt unit tests.
 config.target_triple = "@TARGET_TRIPLE@"
-config.llvm_src_root = "@LLVM_SOURCE_DIR@"
+config.llvm_src_root = "@LLVM_MAIN_SRC_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@"
 config.compiler_rt_src_root = "@COMPILER_RT_SOURCE_DIR@"