Reset stable branch to track swift-4.0-branch.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fc1381c..e8326f0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,14 +1,14 @@
 # CMake build for CompilerRT.
 #
 # This build assumes that CompilerRT is checked out into the
-# 'projects/compiler-rt' inside of an LLVM tree.
+# 'projects/compiler-rt' or 'runtimes/compiler-rt' inside of an LLVM tree.
 # Standalone build system for CompilerRT is not yet ready.
 #
 # An important constraint of the build is that it only produces libraries
 # based on the ability of the host toolchain to target various platforms.
 
 # Check if compiler-rt is built as a standalone project.
-if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE_BUILD)
   project(CompilerRT C CXX ASM)
   set(COMPILER_RT_STANDALONE_BUILD TRUE)
 endif()
@@ -85,12 +85,25 @@
 # COMPILER_RT_DEBUG_PYBOOL is used by lit.common.configured.in.
 pythonize_bool(COMPILER_RT_DEBUG)
 
+include(config-ix)
+
+if(APPLE AND SANITIZER_MIN_OSX_VERSION VERSION_LESS "10.9")
+  # Mac OS X prior to 10.9 had problems with exporting symbols from
+  # libc++/libc++abi.
+  set(use_cxxabi_default OFF)
+elseif(MSVC)
+  set(use_cxxabi_default OFF)
+else()
+  set(use_cxxabi_default ON)
+endif()
+
+option(SANITIZER_CAN_USE_CXXABI "Sanitizers can use cxxabi" ${use_cxxabi_default})
+pythonize_bool(SANITIZER_CAN_USE_CXXABI)
+
 #================================
 # Setup Compiler Flags
 #================================
 
-include(config-ix)
-
 if(MSVC)
   # Override any existing /W flags with /W4. This is what LLVM does.  Failing to
   # remove other /W[0-4] flags will result in a warning about overriding a
@@ -116,7 +129,9 @@
 endif()
 
 # Provide some common commmandline flags for Sanitizer runtimes.
-append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC SANITIZER_COMMON_CFLAGS)
+if(NOT WIN32)
+  append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC SANITIZER_COMMON_CFLAGS)
+endif()
 append_list_if(COMPILER_RT_HAS_FNO_BUILTIN_FLAG -fno-builtin SANITIZER_COMMON_CFLAGS)
 append_list_if(COMPILER_RT_HAS_FNO_EXCEPTIONS_FLAG -fno-exceptions SANITIZER_COMMON_CFLAGS)
 if(NOT COMPILER_RT_DEBUG)
@@ -210,16 +225,16 @@
 # Warnings to turn off for all libraries, not just sanitizers.
 append_string_if(COMPILER_RT_HAS_WUNUSED_PARAMETER_FLAG -Wno-unused-parameter CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
 
-if(APPLE AND SANITIZER_MIN_OSX_VERSION AND SANITIZER_MIN_OSX_VERSION VERSION_LESS "10.9")
-  # Mac OS X prior to 10.9 had problems with exporting symbols from
-  # libc++/libc++abi.
-  set(SANITIZER_CAN_USE_CXXABI FALSE)
-elseif(MSVC)
-  set(SANITIZER_CAN_USE_CXXABI FALSE)
-else()
-  set(SANITIZER_CAN_USE_CXXABI TRUE)
+if (CMAKE_LINKER MATCHES "link.exe$")
+  # Silence MSVC linker warnings caused by empty object files. The
+  # sanitizer libraries intentionally use ifdefs that result in empty
+  # files, rather than skipping these files in the build system.
+  # Ideally, we would pass this flag only for the libraries that need
+  # it, but CMake doesn't seem to have a way to set linker flags for
+  # individual static libraries, so we enable the suppression flag for
+  # the whole compiler-rt project.
+  append("/IGNORE:4221" CMAKE_STATIC_LINKER_FLAGS)
 endif()
-pythonize_bool(SANITIZER_CAN_USE_CXXABI)
 
 add_subdirectory(include)
 
diff --git a/Makefile b/Makefile
deleted file mode 100644
index ac3daac..0000000
--- a/Makefile
+++ /dev/null
@@ -1,275 +0,0 @@
-SubDirs := lib
-
-# Set default rule before anything else.
-all: help
-
-include make/config.mk
-include make/util.mk
-# If SRCROOT is defined, assume we are doing an Apple style build. We should be
-# able to use RC_XBS for this but that is unused during "make installsrc".
-ifdef SRCROOT
-  include make/AppleBI.mk
-endif
-
-# Make sure we don't build with a missing ProjObjRoot.
-ifeq ($(ProjObjRoot),)
-$(error Refusing to build with empty ProjObjRoot variable)
-endif
-
-##############
-
-###
-# Rules
-
-###
-# Top level targets
-
-# FIXME: Document the available subtargets.
-help:
-	@echo "usage: make [{VARIABLE=VALUE}*] target"
-	@echo
-	@echo "User variables:"
-	@echo "  VERBOSE=1: Use to show all commands [default=0]"
-	@echo
-	@echo "Available targets:"
-	@echo "  <platform name>: build the libraries for 'platform'"
-	@echo "  clean:           clean all configurations"
-	@echo "  test:            run unit tests"
-	@echo
-	@echo "  info-platforms:  list available platforms"
-	@echo "  help-devel:      print additional help for developers"
-	@echo
-
-help-devel: help
-	@echo "Development targets:"
-	@echo "  <platform name>-<config name>:"
-	@echo "    build the libraries for a single platform config"
-	@echo "  <platform name>-<config name>-<arch name>:"
-	@echo "    build the libraries for a single config and arch"
-	@echo "  info-functions: list available compiler-rt functions"
-	@echo "  help-hidden: print help for Makefile debugging"
-	@echo
-
-help-hidden: help-devel
-	@echo "Debugging variables:"
-	@echo "  DEBUGMAKE=1: enable some Makefile logging [default=]"
-	@echo "           =2: enable more Makefile logging"
-	@echo
-	@echo "Debugging targets:"
-	@echo "  make-print-FOO: print information on the variable 'FOO'"
-	@echo
-
-info-functions:
-	@echo "compiler-rt Available Functions"
-	@echo
-	@echo "All Functions: $(AvailableFunctions)"
-	@$(foreach fn,$(AvailableFunctions),\
-	  printf "  %-20s - available in (%s)\n" $(fn)\
-	    "$(foreach key,$(AvailableIn.$(fn)),$($(key).Dir))";)
-
-info-platforms:
-	@echo "compiler-rt Available Platforms"
-	@echo
-	@echo "Platforms:"
-	@$(foreach key,$(PlatformKeys),\
-	  printf "  %s - from '%s'\n" $($(key).Name) $($(key).Path);\
-	  printf "    %s\n" "$($(key).Description)";\
-	  printf "    Configurations: %s\n\n" "$($(key).Configs)";)
-
-# Provide default clean target which is extended by other templates.
-.PHONY: clean
-clean::
-
-# Test
-.PHONY: test
-test:
-	cd test/Unit && ./test
-
-###
-# Directory handling magic.
-
-# Create directories as needed, and timestamp their creation.
-%/.dir:
-	$(Summary) "  MKDIR:     $*"
-	$(Verb) $(MKDIR) $* > /dev/null
-	$(Verb) echo 'Created.' > $@
-
-# Remove directories
-%/.remove:
-	$(Verb) $(RM) -r $*
-
-###
-# Include child makefile fragments
-
-Dir := .
-include make/subdir.mk
-include make/lib_info.mk
-include make/lib_util.mk
-include make/lib_platforms.mk
-
-###
-# Define Platform Rules
-
-define PerPlatform_template
-$(call Set,Tmp.Key,$(1))
-$(call Set,Tmp.Name,$($(Tmp.Key).Name))
-$(call Set,Tmp.Configs,$($(Tmp.Key).Configs))
-$(call Set,Tmp.ObjPath,$(ProjObjRoot)/$(Tmp.Name))
-
-# Top-Level Platform Target
-$(Tmp.Name):: $(Tmp.Configs:%=$(Tmp.Name)-%)
-.PHONY: $(Tmp.Name)
-
-clean::
-	$(Verb) rm -rf $(Tmp.ObjPath)
-
-# Per-Config Libraries
-$(foreach config,$(Tmp.Configs),\
-  $(call PerPlatformConfig_template,$(config)))
-endef
-
-define PerPlatformConfig_template
-$(call Set,Tmp.Config,$(1))
-$(call Set,Tmp.ObjPath,$(ProjObjRoot)/$(Tmp.Name)/$(Tmp.Config))
-$(call Set,Tmp.SHARED_LIBRARY,$(strip \
-  $(call GetCNAVar,SHARED_LIBRARY,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-$(call Set,Tmp.SHARED_LIBRARY_SUFFIX,$(strip \
-  $(call GetCNAVar,SHARED_LIBRARY_SUFFIX,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-
-# Compute the library suffix.
-$(if $(call streq,1,$(Tmp.SHARED_LIBRARY)),
-  $(call Set,Tmp.LibrarySuffix,$(Tmp.SHARED_LIBRARY_SUFFIX)),
-  $(call Set,Tmp.LibrarySuffix,a))
-
-# Compute the archs to build, depending on whether this is a universal build or
-# not.
-$(call Set,Tmp.ArchsToBuild,\
-  $(if $(call IsDefined,$(Tmp.Key).UniversalArchs),\
-       $(strip \
-         $(or $($(Tmp.Key).UniversalArchs.$(Tmp.Config)),\
-              $($(Tmp.Key).UniversalArchs))),\
-       $(call VarOrDefault,$(Tmp.Key).Arch.$(Tmp.Config),$($(Tmp.Key).Arch))))
-
-# Copy or lipo to create the per-config library.
-$(call Set,Tmp.Inputs,$(Tmp.ArchsToBuild:%=$(Tmp.ObjPath)/%/libcompiler_rt.$(Tmp.LibrarySuffix)))
-$(Tmp.ObjPath)/libcompiler_rt.$(Tmp.LibrarySuffix): $(Tmp.Inputs) $(Tmp.ObjPath)/.dir
-	$(Summary) "  FINAL-ARCHIVE: $(Tmp.Name)/$(Tmp.Config): $$@"
-	-$(Verb) $(RM) $$@
-	$(if $(call streq,1,$(words $(Tmp.ArchsToBuild))), \
-	  $(Verb) $(CP) $(Tmp.Inputs) $$@, \
-	  $(Verb) $(LIPO) -create -output $$@ $(Tmp.Inputs))
-.PRECIOUS: $(Tmp.ObjPath)/.dir
-
-# Per-Config Targets
-$(Tmp.Name)-$(Tmp.Config):: $(Tmp.ObjPath)/libcompiler_rt.$(Tmp.LibrarySuffix)
-.PHONY: $(Tmp.Name)-$(Tmp.Config)
-
-# Per-Config-Arch Libraries
-$(foreach arch,$(Tmp.ArchsToBuild),\
-  $(call PerPlatformConfigArch_template,$(arch)))
-endef
-
-define PerPlatformConfigArch_template
-$(call Set,Tmp.Arch,$(1))
-$(call Set,Tmp.ObjPath,$(ProjObjRoot)/$(Tmp.Name)/$(Tmp.Config)/$(Tmp.Arch))
-$(call Set,Tmp.Functions,$(strip \
-  $(AlwaysRequiredModules) \
-  $(call GetCNAVar,FUNCTIONS,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-$(call Set,Tmp.Optimized,$(strip \
-  $(call GetCNAVar,OPTIMIZED,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-$(call Set,Tmp.AR,$(strip \
-  $(call GetCNAVar,AR,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-$(call Set,Tmp.ARFLAGS,$(strip \
-  $(call GetCNAVar,ARFLAGS,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-$(call Set,Tmp.CC,$(strip \
-  $(call GetCNAVar,CC,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-$(call Set,Tmp.LDFLAGS,$(strip \
-  $(call GetCNAVar,LDFLAGS,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-$(call Set,Tmp.RANLIB,$(strip \
-  $(call GetCNAVar,RANLIB,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-$(call Set,Tmp.RANLIBFLAGS,$(strip \
-  $(call GetCNAVar,RANLIBFLAGS,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-$(call Set,Tmp.SHARED_LIBRARY,$(strip \
-  $(call GetCNAVar,SHARED_LIBRARY,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-
-# Compute the library suffix.
-$(if $(call streq,1,$(Tmp.SHARED_LIBRARY)),
-  $(call Set,Tmp.LibrarySuffix,$(Tmp.SHARED_LIBRARY_SUFFIX)),
-  $(call Set,Tmp.LibrarySuffix,a))
-
-# Compute the object inputs for this library.
-$(call Set,Tmp.Inputs,\
-  $(foreach fn,$(sort $(Tmp.Functions)),\
-    $(call Set,Tmp.FnDir,\
-      $(call SelectFunctionDir,$(Tmp.Config),$(Tmp.Arch),$(fn),$(Tmp.Optimized)))\
-    $(Tmp.ObjPath)/$(Tmp.FnDir)/$(fn).o))
-$(Tmp.ObjPath)/libcompiler_rt.a: $(Tmp.Inputs) $(Tmp.ObjPath)/.dir
-	$(Summary) "  ARCHIVE:   $(Tmp.Name)/$(Tmp.Config)/$(Tmp.Arch): $$@"
-	-$(Verb) $(RM) $$@
-	$(Verb) $(Tmp.AR) $(Tmp.ARFLAGS) $$@ $(Tmp.Inputs)
-	$(Verb) $(Tmp.RANLIB) $(Tmp.RANLIBFLAGS) $$@
-$(Tmp.ObjPath)/libcompiler_rt.dylib: $(Tmp.Inputs) $(Tmp.ObjPath)/.dir
-	$(Summary) "  DYLIB:   $(Tmp.Name)/$(Tmp.Config)/$(Tmp.Arch): $$@"
-	$(Verb) $(Tmp.CC) -arch $(Tmp.Arch) -dynamiclib -o $$@ \
-	  $(Tmp.Inputs) $(Tmp.LDFLAGS)
-$(Tmp.ObjPath)/libcompiler_rt.so: $(Tmp.Inputs) $(Tmp.ObjPath)/.dir
-	$(Summary) "  SO:   $(Tmp.Name)/$(Tmp.Config)/$(Tmp.Arch): $$@"
-	$(Verb) $(Tmp.CC) -shared -o $$@ \
-	  $(Tmp.Inputs) $(Tmp.LDFLAGS)
-.PRECIOUS: $(Tmp.ObjPath)/.dir
-
-# Per-Config-Arch Targets
-$(Tmp.Name)-$(Tmp.Config)-$(Tmp.Arch):: $(Tmp.ObjPath)/libcompiler_rt.$(Tmp.LibrarySuffix)
-.PHONY: $(Tmp.Name)-$(Tmp.Config)-$(Tmp.Arch)
-
-# Per-Config-Arch-SubDir Objects
-$(foreach key,$(SubDirKeys),\
-  $(call PerPlatformConfigArchSubDir_template,$(key)))
-endef
-
-define PerPlatformConfigArchSubDir_template
-$(call Set,Tmp.SubDirKey,$(1))
-$(call Set,Tmp.SubDir,$($(Tmp.SubDirKey).Dir))
-$(call Set,Tmp.SrcPath,$(ProjSrcRoot)/$(Tmp.SubDir))
-$(call Set,Tmp.ObjPath,$(ProjObjRoot)/$(Tmp.Name)/$(Tmp.Config)/$(Tmp.Arch)/$(Tmp.SubDirKey))
-$(call Set,Tmp.Dependencies,$($(Tmp.SubDirKey).Dependencies))
-$(call Set,Tmp.CC,$(strip \
-  $(call GetCNAVar,CC,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-$(call Set,Tmp.KERNEL_USE,$(strip \
-  $(call GetCNAVar,KERNEL_USE,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-$(call Set,Tmp.VISIBILITY_HIDDEN,$(strip \
-  $(call GetCNAVar,VISIBILITY_HIDDEN,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-$(call Set,Tmp.CFLAGS,$(strip \
-  $(if $(call IsDefined,$(Tmp.Key).UniversalArchs),-arch $(Tmp.Arch),)\
-  $(if $(call streq,$(Tmp.VISIBILITY_HIDDEN),1),\
-       -fvisibility=hidden -DVISIBILITY_HIDDEN,)\
-  $(if $(call streq,$(Tmp.KERNEL_USE),1),\
-       -mkernel -DKERNEL_USE,)\
-  $(call GetCNAVar,CFLAGS,$(Tmp.Key),$(Tmp.Config),$(Tmp.Arch))))
-
-$(Tmp.ObjPath)/%.o: $(Tmp.SrcPath)/%.s $(Tmp.Dependencies) $(Tmp.ObjPath)/.dir
-	$(Summary) "  ASSEMBLE:  $(Tmp.Name)/$(Tmp.Config)/$(Tmp.Arch): $$<"
-	$(Verb) $(Tmp.CC) $(COMMON_ASMFLAGS) $(Tmp.CFLAGS)  -c -o $$@ $$<
-$(Tmp.ObjPath)/%.o: $(Tmp.SrcPath)/%.S $(Tmp.Dependencies) $(Tmp.ObjPath)/.dir
-	$(Summary) "  ASSEMBLE:  $(Tmp.Name)/$(Tmp.Config)/$(Tmp.Arch): $$<"
-	$(Verb) $(Tmp.CC) $(COMMON_ASMFLAGS) $(Tmp.CFLAGS) -c -o $$@ $$<
-$(Tmp.ObjPath)/%.o: $(Tmp.SrcPath)/%.c $(Tmp.Dependencies) $(Tmp.ObjPath)/.dir
-	$(Summary) "  COMPILE:   $(Tmp.Name)/$(Tmp.Config)/$(Tmp.Arch): $$<"
-	$(Verb) $(Tmp.CC) $(COMMON_CFLAGS) $(Tmp.CFLAGS) -c -o $$@ $$<
-$(Tmp.ObjPath)/%.o: $(Tmp.SrcPath)/%.cc $(Tmp.Dependencies) $(Tmp.ObjPath)/.dir
-	$(Summary) "  COMPILE:   $(Tmp.Name)/$(Tmp.Config)/$(Tmp.Arch): $$<"
-	$(Verb) $(Tmp.CC) $(COMMON_CXXFLAGS) $(Tmp.CFLAGS) -c -o $$@ $$<
-.PRECIOUS: $(Tmp.ObjPath)/.dir
-
-endef
-
-# Run templates.
-$(foreach key,$(PlatformKeys),\
-  $(eval $(call PerPlatform_template,$(key))))
-
-###
-
-ifneq ($(DEBUGMAKE),)
-  $(info MAKE: Done processing Makefile)
-  $(info  )
-endif
diff --git a/cmake/Modules/AddCompilerRT.cmake b/cmake/Modules/AddCompilerRT.cmake
index fa972c2..3c1e1c1 100644
--- a/cmake/Modules/AddCompilerRT.cmake
+++ b/cmake/Modules/AddCompilerRT.cmake
@@ -94,7 +94,7 @@
 #                         OS <os list>
 #                         SOURCES <source files>
 #                         CFLAGS <compile flags>
-#                         LINKFLAGS <linker flags>
+#                         LINK_FLAGS <linker flags>
 #                         DEFS <compile definitions>
 #                         LINK_LIBS <linked libraries> (only for shared library)
 #                         OBJECT_LIBS <object libraries to use as sources>
@@ -107,7 +107,7 @@
   cmake_parse_arguments(LIB
     ""
     "PARENT_TARGET"
-    "OS;ARCHS;SOURCES;CFLAGS;LINKFLAGS;DEFS;LINK_LIBS;OBJECT_LIBS"
+    "OS;ARCHS;SOURCES;CFLAGS;LINK_FLAGS;DEFS;LINK_LIBS;OBJECT_LIBS"
     ${ARGN})
   set(libnames)
   if(APPLE)
@@ -116,7 +116,7 @@
         set(libname "${name}_${os}")
       else()
         set(libname "${name}_${os}_dynamic")
-        set(extra_linkflags_${libname} ${DARWIN_${os}_LINKFLAGS} ${LIB_LINKFLAGS})
+        set(extra_link_flags_${libname} ${DARWIN_${os}_LINK_FLAGS} ${LIB_LINK_FLAGS})
       endif()
       list_intersect(LIB_ARCHS_${libname} DARWIN_${os}_ARCHS LIB_ARCHS)
       if(LIB_ARCHS_${libname})
@@ -139,7 +139,7 @@
       else()
         set(libname "${name}-dynamic-${arch}")
         set(extra_cflags_${libname} ${TARGET_${arch}_CFLAGS} ${LIB_CFLAGS})
-        set(extra_linkflags_${libname} ${TARGET_${arch}_LINKFLAGS} ${LIB_LINKFLAGS})
+        set(extra_link_flags_${libname} ${TARGET_${arch}_LINK_FLAGS} ${LIB_LINK_FLAGS})
         if(WIN32)
           set(output_name_${libname} ${name}_dynamic-${arch}${COMPILER_RT_OS_SUFFIX})
         else()
@@ -173,6 +173,7 @@
                                 -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
       set_target_properties(install-${LIB_PARENT_TARGET} PROPERTIES
                             FOLDER "Compiler-RT Misc")
+      add_dependencies(install-compiler-rt install-${LIB_PARENT_TARGET})
     endif()
   endif()
 
@@ -187,15 +188,21 @@
 
     add_library(${libname} ${type} ${sources_${libname}})
     set_target_compile_flags(${libname} ${extra_cflags_${libname}})
-    set_target_link_flags(${libname} ${extra_linkflags_${libname}})
+    set_target_link_flags(${libname} ${extra_link_flags_${libname}})
     set_property(TARGET ${libname} APPEND PROPERTY
                 COMPILE_DEFINITIONS ${LIB_DEFS})
     set_target_output_directories(${libname} ${COMPILER_RT_LIBRARY_OUTPUT_DIR})
     set_target_properties(${libname} PROPERTIES
         OUTPUT_NAME ${output_name_${libname}})
     set_target_properties(${libname} PROPERTIES FOLDER "Compiler-RT Runtime")
-    if(LIB_LINK_LIBS AND ${type} STREQUAL "SHARED")
-      target_link_libraries(${libname} ${LIB_LINK_LIBS})
+    if(${type} STREQUAL "SHARED")
+      if(LIB_LINK_LIBS)
+        target_link_libraries(${libname} ${LIB_LINK_LIBS})
+      endif()
+      if(WIN32 AND NOT CYGWIN AND NOT MINGW)
+        set_target_properties(${libname} PROPERTIES IMPORT_PREFIX "")
+        set_target_properties(${libname} PROPERTIES IMPORT_SUFFIX ".lib")
+      endif()
     endif()
     install(TARGETS ${libname}
       ARCHIVE DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR}
@@ -236,7 +243,7 @@
 # when cross compiling, COMPILER_RT_TEST_COMPILER_CFLAGS help
 # in compilation and linking of unittests.
 string(REPLACE " " ";" COMPILER_RT_UNITTEST_CFLAGS "${COMPILER_RT_TEST_COMPILER_CFLAGS}")
-set(COMPILER_RT_UNITTEST_LINKFLAGS ${COMPILER_RT_UNITTEST_CFLAGS})
+set(COMPILER_RT_UNITTEST_LINK_FLAGS ${COMPILER_RT_UNITTEST_CFLAGS})
 
 # Unittests support.
 set(COMPILER_RT_GTEST_PATH ${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest)
@@ -249,6 +256,7 @@
 )
 
 append_list_if(COMPILER_RT_DEBUG -DSANITIZER_DEBUG=1 COMPILER_RT_UNITTEST_CFLAGS)
+append_list_if(COMPILER_RT_HAS_WCOVERED_SWITCH_DEFAULT_FLAG -Wno-covered-switch-default COMPILER_RT_UNITTEST_CFLAGS)
 
 if(MSVC)
   # clang doesn't support exceptions on Windows yet.
@@ -262,12 +270,6 @@
 
   # gtest use a lot of stuff marked as deprecated on Windows.
   list(APPEND COMPILER_RT_GTEST_CFLAGS -Wno-deprecated-declarations)
-
-  # Visual Studio 2012 only supports up to 8 template parameters in
-  # std::tr1::tuple by default, but gtest requires 10
-  if(MSVC_VERSION EQUAL 1700)
-    list(APPEND COMPILER_RT_GTEST_CFLAGS -D_VARIADIC_MAX=10)
-  endif()
 endif()
 
 # Link objects into a single executable with COMPILER_RT_TEST_COMPILER,
diff --git a/cmake/Modules/BuiltinTests.cmake b/cmake/Modules/BuiltinTests.cmake
index 6bbf449..a6bf864 100644
--- a/cmake/Modules/BuiltinTests.cmake
+++ b/cmake/Modules/BuiltinTests.cmake
@@ -11,7 +11,13 @@
   file(WRITE ${SIMPLE_C} "${ARG_SOURCE}\n")
   string(REGEX MATCHALL "<[A-Za-z0-9_]*>" substitutions
          ${CMAKE_C_COMPILE_OBJECT})
-  string(REPLACE ";" " " extra_flags "${ARG_FLAGS}")
+
+  set(TRY_COMPILE_FLAGS "${ARG_FLAGS}")
+  if(CMAKE_C_COMPILER_ID MATCHES Clang AND CMAKE_C_COMPILER_TARGET)
+    list(APPEND TRY_COMPILE_FLAGS "-target ${CMAKE_C_COMPILER_TARGET}")
+  endif()
+
+  string(REPLACE ";" " " extra_flags "${TRY_COMPILE_FLAGS}")
 
   set(test_compile_command "${CMAKE_C_COMPILE_OBJECT}")
   foreach(substitution ${substitutions})
@@ -44,12 +50,13 @@
   )
 
   CHECK_COMPILER_FLAG_COMMON_PATTERNS(_CheckCCompilerFlag_COMMON_PATTERNS)
+  set(ERRORS_FOUND OFF)
   foreach(var ${_CheckCCompilerFlag_COMMON_PATTERNS})
     if("${var}" STREQUAL "FAIL_REGEX")
       continue()
     endif()
-    if("${var}" MATCHES "${_CheckCCompilerFlag_COMMON_PATTERNS}")
-      set(ERRORS_FOUND True)
+    if("${TEST_ERROR}" MATCHES "${var}" OR "${TEST_OUTPUT}" MATCHES "${var}")
+      set(ERRORS_FOUND ON)
     endif()
   endforeach()
 
diff --git a/cmake/Modules/CompilerRTDarwinUtils.cmake b/cmake/Modules/CompilerRTDarwinUtils.cmake
index fd19ff9..3c89381 100644
--- a/cmake/Modules/CompilerRTDarwinUtils.cmake
+++ b/cmake/Modules/CompilerRTDarwinUtils.cmake
@@ -7,13 +7,15 @@
   # Let's first try the internal SDK, otherwise use the public SDK.
   execute_process(
     COMMAND xcodebuild -version -sdk ${sdk_name}.internal Path
+    RESULT_VARIABLE result_process
     OUTPUT_VARIABLE var_internal
     OUTPUT_STRIP_TRAILING_WHITESPACE
     ERROR_FILE /dev/null
   )
-  if("" STREQUAL "${var_internal}")
+  if((NOT result_process EQUAL 0) OR "" STREQUAL "${var_internal}")
     execute_process(
       COMMAND xcodebuild -version -sdk ${sdk_name} Path
+      RESULT_VARIABLE result_process
       OUTPUT_VARIABLE var_internal
       OUTPUT_STRIP_TRAILING_WHITESPACE
       ERROR_FILE /dev/null
@@ -21,7 +23,9 @@
   else()
     set(${var}_INTERNAL ${var_internal} PARENT_SCOPE)
   endif()
-  set(${var} ${var_internal} PARENT_SCOPE)
+  if(result_process EQUAL 0)
+    set(${var} ${var_internal} PARENT_SCOPE)
+  endif()
 endfunction()
 
 # There isn't a clear mapping of what architectures are supported with a given
@@ -62,7 +66,7 @@
     file(WRITE ${SIMPLE_C} "#include <stdio.h>\nint main() { printf(__FILE__); return 0; }\n")
   
     set(os_linker_flags)
-    foreach(flag ${DARWIN_${os}_LINKFLAGS})
+    foreach(flag ${DARWIN_${os}_LINK_FLAGS})
       set(os_linker_flags "${os_linker_flags} ${flag}")
     endforeach()
   endif()
@@ -256,30 +260,6 @@
   set(${output_var} ${intermediate} PARENT_SCOPE)
 endfunction()
 
-function(darwin_add_eprintf_library)
-  cmake_parse_arguments(LIB
-    ""
-    ""
-    "CFLAGS"
-    ${ARGN})
-
-  add_library(clang_rt.eprintf STATIC eprintf.c)
-  set_target_compile_flags(clang_rt.eprintf
-    -isysroot ${DARWIN_osx_SYSROOT}
-    ${DARWIN_osx_BUILTIN_MIN_VER_FLAG}
-    -arch i386
-    ${LIB_CFLAGS})
-  set_target_properties(clang_rt.eprintf PROPERTIES
-      OUTPUT_NAME clang_rt.eprintf${COMPILER_RT_OS_SUFFIX})
-  set_target_properties(clang_rt.eprintf PROPERTIES
-    OSX_ARCHITECTURES i386)
-  add_dependencies(builtins clang_rt.eprintf)
-  set_target_properties(clang_rt.eprintf PROPERTIES
-        ARCHIVE_OUTPUT_DIRECTORY ${COMPILER_RT_LIBRARY_OUTPUT_DIR})
-  install(TARGETS clang_rt.eprintf
-      ARCHIVE DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR})
-endfunction()
-
 # Generates builtin libraries for all operating systems specified in ARGN. Each
 # OS library is constructed by lipo-ing together single-architecture libraries.
 macro(darwin_add_builtin_libraries)
@@ -350,8 +330,6 @@
     endif()
   endforeach()
 
-  darwin_add_eprintf_library(CFLAGS ${CFLAGS})
-
   # We put the x86 sim slices into the archives for their base OS
   foreach (os ${ARGN})
     if(NOT ${os} MATCHES ".*sim$")
diff --git a/cmake/Modules/CompilerRTLink.cmake b/cmake/Modules/CompilerRTLink.cmake
index bb96869..05c535f 100644
--- a/cmake/Modules/CompilerRTLink.cmake
+++ b/cmake/Modules/CompilerRTLink.cmake
@@ -1,16 +1,16 @@
 # Link a shared library with COMPILER_RT_TEST_COMPILER.
 # clang_link_shared(<output.so>
 #                   OBJECTS <list of input objects>
-#                   LINKFLAGS <list of link flags>
+#                   LINK_FLAGS <list of link flags>
 #                   DEPS <list of dependencies>)
 macro(clang_link_shared so_file)
-  cmake_parse_arguments(SOURCE "" "" "OBJECTS;LINKFLAGS;DEPS" ${ARGN})
+  cmake_parse_arguments(SOURCE "" "" "OBJECTS;LINK_FLAGS;DEPS" ${ARGN})
   if(NOT COMPILER_RT_STANDALONE_BUILD)
     list(APPEND SOURCE_DEPS clang)
   endif()
   add_custom_command(
     OUTPUT ${so_file}
     COMMAND ${COMPILER_RT_TEST_COMPILER} -o "${so_file}" -shared
-            ${SOURCE_LINKFLAGS} ${SOURCE_OBJECTS}
+            ${SOURCE_LINK_FLAGS} ${SOURCE_OBJECTS}
     DEPENDS ${SOURCE_DEPS})
 endmacro()
diff --git a/cmake/Modules/CompilerRTUtils.cmake b/cmake/Modules/CompilerRTUtils.cmake
index 2569297..c337523 100644
--- a/cmake/Modules/CompilerRTUtils.cmake
+++ b/cmake/Modules/CompilerRTUtils.cmake
@@ -49,7 +49,7 @@
 endmacro()
 
 macro(append_rtti_flag polarity list)
-  if(polarity)
+  if(${polarity})
     append_list_if(COMPILER_RT_HAS_FRTTI_FLAG -frtti ${list})
     append_list_if(COMPILER_RT_HAS_GR_FLAG /GR ${list})
   else()
@@ -76,6 +76,18 @@
   endforeach()
 endmacro()
 
+function(list_replace input_list old new)
+  set(replaced_list)
+  foreach(item ${${input_list}})
+    if(${item} STREQUAL ${old})
+      list(APPEND replaced_list ${new})
+    else()
+      list(APPEND replaced_list ${item})
+    endif()
+  endforeach()
+  set(${input_list} "${replaced_list}" PARENT_SCOPE)
+endfunction()
+
 # Takes ${ARGN} and puts only supported architectures in @out_var list.
 function(filter_available_targets out_var)
   set(archs ${${out_var}})
@@ -88,6 +100,13 @@
   set(${out_var} ${archs} PARENT_SCOPE)
 endfunction()
 
+# Add $arch as supported with no additional flags.
+macro(add_default_target_arch arch)
+  set(TARGET_${arch}_CFLAGS "")
+  set(CAN_TARGET_${arch} 1)
+  list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
+endmacro()
+
 function(check_compile_definition def argstring out_var)
   if("${def}" STREQUAL "")
     set(${out_var} TRUE PARENT_SCOPE)
@@ -107,7 +126,7 @@
 # If successful, saves target flags for this architecture.
 macro(test_target_arch arch def)
   set(TARGET_${arch}_CFLAGS ${ARGN})
-  set(TARGET_${arch}_LINKFLAGS ${ARGN})
+  set(TARGET_${arch}_LINK_FLAGS ${ARGN})
   set(argstring "")
   foreach(arg ${ARGN})
     set(argstring "${argstring} ${arg}")
@@ -120,8 +139,12 @@
       try_compile_only(CAN_TARGET_${arch} ${TARGET_${arch}_CFLAGS})
     else()
       set(argstring "${CMAKE_EXE_LINKER_FLAGS} ${argstring}")
+      set(FLAG_NO_EXCEPTIONS "")
+      if(COMPILER_RT_HAS_FNO_EXCEPTIONS_FLAG)
+        set(FLAG_NO_EXCEPTIONS " -fno-exceptions ")
+      endif()
       try_compile(CAN_TARGET_${arch} ${CMAKE_BINARY_DIR} ${SIMPLE_SOURCE}
-                  COMPILE_DEFINITIONS "${TARGET_${arch}_CFLAGS}"
+                  COMPILE_DEFINITIONS "${TARGET_${arch}_CFLAGS} ${FLAG_NO_EXCEPTIONS}"
                   OUTPUT_VARIABLE TARGET_${arch}_OUTPUT
                   CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS:STRING=${argstring}")
     endif()
@@ -185,25 +208,48 @@
     message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}")
   endif()
   string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";" CONFIG_OUTPUT ${CONFIG_OUTPUT})
-  list(GET CONFIG_OUTPUT 0 LLVM_BINARY_DIR)
-  list(GET CONFIG_OUTPUT 1 LLVM_TOOLS_BINARY_DIR)
-  list(GET CONFIG_OUTPUT 2 LLVM_LIBRARY_DIR)
-  list(GET CONFIG_OUTPUT 3 LLVM_MAIN_SRC_DIR)
+  list(GET CONFIG_OUTPUT 0 BINARY_DIR)
+  list(GET CONFIG_OUTPUT 1 TOOLS_BINARY_DIR)
+  list(GET CONFIG_OUTPUT 2 LIBRARY_DIR)
+  list(GET CONFIG_OUTPUT 3 MAIN_SRC_DIR)
+
+  set(LLVM_BINARY_DIR ${BINARY_DIR} CACHE PATH "Path to LLVM build tree")
+  set(LLVM_TOOLS_BINARY_DIR ${TOOLS_BINARY_DIR} CACHE PATH "Path to llvm/bin")
+  set(LLVM_LIBRARY_DIR ${LIBRARY_DIR} CACHE PATH "Path to llvm/lib")
+  set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree")
 
   # Make use of LLVM CMake modules.
-  file(TO_CMAKE_PATH ${LLVM_BINARY_DIR} LLVM_BINARY_DIR_CMAKE_STYLE)
-  set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm")
+  # --cmakedir is supported since llvm r291218 (4.0 release)
+  execute_process(
+    COMMAND ${LLVM_CONFIG_PATH} --cmakedir
+    RESULT_VARIABLE HAD_ERROR
+    OUTPUT_VARIABLE CONFIG_OUTPUT)
+  if(NOT HAD_ERROR)
+    string(STRIP "${CONFIG_OUTPUT}" LLVM_CMAKE_PATH)
+  else()
+    file(TO_CMAKE_PATH ${LLVM_BINARY_DIR} LLVM_BINARY_DIR_CMAKE_STYLE)
+    set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm")
+  endif()
+
   list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}")
   # Get some LLVM variables from LLVMConfig.
   include("${LLVM_CMAKE_PATH}/LLVMConfig.cmake")
 
   set(LLVM_LIBRARY_OUTPUT_INTDIR
-    ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
+    ${LLVM_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
 endmacro()
 
 macro(construct_compiler_rt_default_triple)
-  set(COMPILER_RT_DEFAULT_TARGET_TRIPLE ${TARGET_TRIPLE} CACHE STRING
-      "Default triple for which compiler-rt runtimes will be built.")
+  if(COMPILER_RT_DEFAULT_TARGET_ONLY)
+    if(DEFINED COMPILER_RT_DEFAULT_TARGET_TRIPLE)
+      message(FATAL_ERROR "COMPILER_RT_DEFAULT_TARGET_TRIPLE isn't supported when building for default target only")
+    endif()
+    set(COMPILER_RT_DEFAULT_TARGET_TRIPLE ${CMAKE_C_COMPILER_TARGET})
+  else()
+    set(COMPILER_RT_DEFAULT_TARGET_TRIPLE ${TARGET_TRIPLE} CACHE STRING
+          "Default triple for which compiler-rt runtimes will be built.")
+  endif()
+
   if(DEFINED COMPILER_RT_TEST_TARGET_TRIPLE)
     # Backwards compatibility: this variable used to be called
     # COMPILER_RT_TEST_TARGET_TRIPLE.
@@ -213,7 +259,10 @@
   string(REPLACE "-" ";" TARGET_TRIPLE_LIST ${COMPILER_RT_DEFAULT_TARGET_TRIPLE})
   list(GET TARGET_TRIPLE_LIST 0 COMPILER_RT_DEFAULT_TARGET_ARCH)
   list(GET TARGET_TRIPLE_LIST 1 COMPILER_RT_DEFAULT_TARGET_OS)
-  list(GET TARGET_TRIPLE_LIST 2 COMPILER_RT_DEFAULT_TARGET_ABI)
+  list(LENGTH TARGET_TRIPLE_LIST TARGET_TRIPLE_LIST_LENGTH)
+  if(TARGET_TRIPLE_LIST_LENGTH GREATER 2)
+    list(GET TARGET_TRIPLE_LIST 2 COMPILER_RT_DEFAULT_TARGET_ABI)
+  endif()
   # Determine if test target triple is specified explicitly, and doesn't match the
   # default.
   if(NOT COMPILER_RT_DEFAULT_TARGET_TRIPLE STREQUAL TARGET_TRIPLE)
diff --git a/cmake/Modules/SanitizerUtils.cmake b/cmake/Modules/SanitizerUtils.cmake
index c66083c..c80fc3b 100644
--- a/cmake/Modules/SanitizerUtils.cmake
+++ b/cmake/Modules/SanitizerUtils.cmake
@@ -46,6 +46,17 @@
   endforeach()
 endmacro()
 
+# This function is only used on Darwin, where undefined symbols must be specified
+# in the linker invocation.
+function(add_weak_symbols libname link_flags)
+  file(STRINGS "${COMPILER_RT_SOURCE_DIR}/lib/${libname}/weak_symbols.txt" WEAK_SYMBOLS)
+  set(local_link_flags ${${link_flags}})
+  foreach(SYMBOL ${WEAK_SYMBOLS})
+    set(local_link_flags ${local_link_flags} -Wl,-U,${SYMBOL})
+  endforeach()
+  set(${link_flags} ${local_link_flags} PARENT_SCOPE)
+endfunction()
+
 macro(add_sanitizer_rt_version_list name)
   set(vers ${CMAKE_CURRENT_BINARY_DIR}/${name}.vers)
   cmake_parse_arguments(ARG "" "" "LIBS;EXTRA" ${ARGN})
diff --git a/cmake/base-config-ix.cmake b/cmake/base-config-ix.cmake
index 4f3976d..71d1499 100644
--- a/cmake/base-config-ix.cmake
+++ b/cmake/base-config-ix.cmake
@@ -8,6 +8,7 @@
 
 # Top level target used to build all compiler-rt libraries.
 add_custom_target(compiler-rt ALL)
+add_custom_target(install-compiler-rt)
 set_target_properties(compiler-rt PROPERTIES FOLDER "Compiler-RT Misc")
 
 # Setting these variables from an LLVM build is sufficient that compiler-rt can
@@ -80,9 +81,20 @@
     set(OSX_SYSROOT_FLAG "-isysroot${OSX_SYSROOT}")
   endif()
 
-  option(COMPILER_RT_ENABLE_IOS "Enable building for iOS" Off)
+  option(COMPILER_RT_ENABLE_IOS "Enable building for iOS" On)
   option(COMPILER_RT_ENABLE_WATCHOS "Enable building for watchOS - Experimental" Off)
   option(COMPILER_RT_ENABLE_TVOS "Enable building for tvOS - Experimental" Off)
+else()
+  option(COMPILER_RT_DEFAULT_TARGET_ONLY "Build builtins only for the default target" Off)
+endif()
+
+if(WIN32 AND NOT MINGW AND NOT CYGWIN)
+  set(CMAKE_SHARED_LIBRARY_PREFIX_C "")
+  set(CMAKE_SHARED_LIBRARY_PREFIX_CXX "")
+  set(CMAKE_STATIC_LIBRARY_PREFIX_C "")
+  set(CMAKE_STATIC_LIBRARY_PREFIX_CXX "")
+  set(CMAKE_STATIC_LIBRARY_SUFFIX_C ".lib")
+  set(CMAKE_STATIC_LIBRARY_SUFFIX_CXX ".lib")
 endif()
 
 macro(test_targets)
@@ -117,7 +129,9 @@
     detect_target_arch()
     set(COMPILER_RT_OS_SUFFIX "-android")
   elseif(NOT APPLE) # Supported archs for Apple platforms are generated later
-    if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "i[2-6]86|x86|amd64")
+    if(COMPILER_RT_DEFAULT_TARGET_ONLY)
+      add_default_target_arch(${COMPILER_RT_DEFAULT_TARGET_ARCH})
+    elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "i[2-6]86|x86|amd64")
       if(NOT MSVC)
         test_target_arch(x86_64 "" "-m64")
         # FIXME: We build runtimes for both i686 and i386, as "clang -m32" may
diff --git a/cmake/builtin-config-ix.cmake b/cmake/builtin-config-ix.cmake
index da055b5..8cb4ca1 100644
--- a/cmake/builtin-config-ix.cmake
+++ b/cmake/builtin-config-ix.cmake
@@ -4,16 +4,15 @@
 # Make all the tests only check the compiler
 set(TEST_COMPILE_ONLY On)
 
+# Check host compiler support for certain flags
 builtin_check_c_compiler_flag(-fPIC                 COMPILER_RT_HAS_FPIC_FLAG)
 builtin_check_c_compiler_flag(-fPIE                 COMPILER_RT_HAS_FPIE_FLAG)
 builtin_check_c_compiler_flag(-fno-builtin          COMPILER_RT_HAS_FNO_BUILTIN_FLAG)
-builtin_check_c_compiler_flag(-std=c99              COMPILER_RT_HAS_STD_C99_FLAG)
+builtin_check_c_compiler_flag(-std=c11              COMPILER_RT_HAS_STD_C11_FLAG)
 builtin_check_c_compiler_flag(-fvisibility=hidden   COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG)
 builtin_check_c_compiler_flag(-fomit-frame-pointer  COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG)
 builtin_check_c_compiler_flag(-ffreestanding        COMPILER_RT_HAS_FREESTANDING_FLAG)
-builtin_check_c_compiler_flag(-mfloat-abi=soft      COMPILER_RT_HAS_FLOAT_ABI_SOFT_FLAG)
-builtin_check_c_compiler_flag(-mfloat-abi=hard      COMPILER_RT_HAS_FLOAT_ABI_HARD_FLAG)
-builtin_check_c_compiler_flag(-static               COMPILER_RT_HAS_STATIC_FLAG)
+builtin_check_c_compiler_flag(-fxray-instrument     COMPILER_RT_HAS_XRAY_COMPILER_FLAG)
 
 builtin_check_c_compiler_source(COMPILER_RT_HAS_ATOMIC_KEYWORD
 "
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 59c64e2..e0e4355 100644
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -29,6 +29,7 @@
 check_cxx_compiler_flag(-ftls-model=initial-exec COMPILER_RT_HAS_FTLS_MODEL_INITIAL_EXEC)
 check_cxx_compiler_flag(-fno-lto             COMPILER_RT_HAS_FNO_LTO_FLAG)
 check_cxx_compiler_flag("-Werror -msse3" COMPILER_RT_HAS_MSSE3_FLAG)
+check_cxx_compiler_flag("-Werror -msse4.2"   COMPILER_RT_HAS_MSSE4_2_FLAG)
 check_cxx_compiler_flag(--sysroot=.          COMPILER_RT_HAS_SYSROOT_FLAG)
 
 if(NOT WIN32 AND NOT CYGWIN)
@@ -56,6 +57,7 @@
 check_cxx_compiler_flag("-Werror -Wnon-virtual-dtor"   COMPILER_RT_HAS_WNON_VIRTUAL_DTOR_FLAG)
 check_cxx_compiler_flag("-Werror -Wvariadic-macros"    COMPILER_RT_HAS_WVARIADIC_MACROS_FLAG)
 check_cxx_compiler_flag("-Werror -Wunused-parameter"   COMPILER_RT_HAS_WUNUSED_PARAMETER_FLAG)
+check_cxx_compiler_flag("-Werror -Wcovered-switch-default" COMPILER_RT_HAS_WCOVERED_SWITCH_DEFAULT_FLAG)
 
 check_cxx_compiler_flag(/W4 COMPILER_RT_HAS_W4_FLAG)
 check_cxx_compiler_flag(/WX COMPILER_RT_HAS_WX_FLAG)
@@ -92,16 +94,9 @@
 # platform. We use the results of these tests to build only the various target
 # runtime libraries supported by our current compilers cross-compiling
 # abilities.
-set(SIMPLE_SOURCE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple.c)
+set(SIMPLE_SOURCE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple.cc)
 file(WRITE ${SIMPLE_SOURCE} "#include <stdlib.h>\n#include <stdio.h>\nint main() { printf(\"hello, world\"); }\n")
 
-# Add $arch as supported with no additional flags.
-macro(add_default_target_arch arch)
-  set(TARGET_${arch}_CFLAGS "")
-  set(CAN_TARGET_${arch} 1)
-  list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
-endmacro()
-
 # Detect whether the current target platform is 32-bit or 64-bit, and setup
 # the correct commandline flags needed to attempt to target 32-bit and 64-bit.
 if (NOT CMAKE_SIZEOF_VOID_P EQUAL 4 AND
@@ -166,20 +161,20 @@
 set(ALL_SANITIZER_COMMON_SUPPORTED_ARCH ${X86} ${X86_64} ${PPC64}
     ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} ${S390X})
 set(ALL_ASAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
-    ${MIPS32} ${MIPS64} ${PPC64})
+    ${MIPS32} ${MIPS64} ${PPC64} ${S390X})
 set(ALL_DFSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64})
 set(ALL_LSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64})
 set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64})
 set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC64}
-    ${MIPS32} ${MIPS64})
+    ${MIPS32} ${MIPS64} ${S390X})
 set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64})
 set(ALL_UBSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
     ${MIPS32} ${MIPS64} ${PPC64} ${S390X})
 set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
 set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
-set(ALL_ESAN_SUPPORTED_ARCH ${X86_64})
-set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64})
-set(ALL_XRAY_SUPPORTED_ARCH ${X86_64})
+set(ALL_ESAN_SUPPORTED_ARCH ${X86_64} ${MIPS64})
+set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32})
+set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64})
 
 if(APPLE)
   include(CompilerRTDarwinUtils)
@@ -192,6 +187,14 @@
   find_darwin_sdk_dir(DARWIN_tvossim_SYSROOT appletvsimulator)
   find_darwin_sdk_dir(DARWIN_tvos_SYSROOT appletvos)
 
+  if(NOT DARWIN_osx_SYSROOT)
+    if(EXISTS /usr/include)
+      set(DARWIN_osx_SYSROOT /)
+    else()
+      message(ERROR "Could not detect OS X Sysroot. Either install Xcode or the Apple Command Line Tools")
+    endif()
+  endif()
+
   if(COMPILER_RT_ENABLE_IOS)
     list(APPEND DARWIN_EMBEDDED_PLATFORMS ios)
     set(DARWIN_ios_MIN_VER_FLAG -miphoneos-version-min)
@@ -239,26 +242,26 @@
   set(CMAKE_OSX_DEPLOYMENT_TARGET "")
   
   set(DARWIN_COMMON_CFLAGS -stdlib=libc++)
-  set(DARWIN_COMMON_LINKFLAGS
+  set(DARWIN_COMMON_LINK_FLAGS
     -stdlib=libc++
     -lc++
     -lc++abi)
   
   check_linker_flag("-fapplication-extension" COMPILER_RT_HAS_APP_EXTENSION)
   if(COMPILER_RT_HAS_APP_EXTENSION)
-    list(APPEND DARWIN_COMMON_LINKFLAGS "-fapplication-extension")
+    list(APPEND DARWIN_COMMON_LINK_FLAGS "-fapplication-extension")
   endif()
 
   set(DARWIN_osx_CFLAGS
     ${DARWIN_COMMON_CFLAGS}
     -mmacosx-version-min=${SANITIZER_MIN_OSX_VERSION})
-  set(DARWIN_osx_LINKFLAGS
-    ${DARWIN_COMMON_LINKFLAGS}
+  set(DARWIN_osx_LINK_FLAGS
+    ${DARWIN_COMMON_LINK_FLAGS}
     -mmacosx-version-min=${SANITIZER_MIN_OSX_VERSION})
 
   if(DARWIN_osx_SYSROOT)
     list(APPEND DARWIN_osx_CFLAGS -isysroot ${DARWIN_osx_SYSROOT})
-    list(APPEND DARWIN_osx_LINKFLAGS -isysroot ${DARWIN_osx_SYSROOT})
+    list(APPEND DARWIN_osx_LINK_FLAGS -isysroot ${DARWIN_osx_SYSROOT})
   endif()
 
   # Figure out which arches to use for each OS
@@ -281,8 +284,8 @@
           ${DARWIN_COMMON_CFLAGS}
           ${DARWIN_${platform}_SANITIZER_MIN_VER_FLAG}
           -isysroot ${DARWIN_${platform}sim_SYSROOT})
-        set(DARWIN_${platform}sim_LINKFLAGS
-          ${DARWIN_COMMON_LINKFLAGS}
+        set(DARWIN_${platform}sim_LINK_FLAGS
+          ${DARWIN_COMMON_LINK_FLAGS}
           ${DARWIN_${platform}_SANITIZER_MIN_VER_FLAG}
           -isysroot ${DARWIN_${platform}sim_SYSROOT})
 
@@ -309,8 +312,8 @@
           ${DARWIN_COMMON_CFLAGS}
           ${DARWIN_${platform}_SANITIZER_MIN_VER_FLAG}
           -isysroot ${DARWIN_${platform}_SYSROOT})
-        set(DARWIN_${platform}_LINKFLAGS
-          ${DARWIN_COMMON_LINKFLAGS}
+        set(DARWIN_${platform}_LINK_FLAGS
+          ${DARWIN_COMMON_LINK_FLAGS}
           ${DARWIN_${platform}_SANITIZER_MIN_VER_FLAG}
           -isysroot ${DARWIN_${platform}_SYSROOT})
 
@@ -397,8 +400,7 @@
     ${ALL_SAFESTACK_SUPPORTED_ARCH})
   filter_available_targets(CFI_SUPPORTED_ARCH ${ALL_CFI_SUPPORTED_ARCH})
   filter_available_targets(ESAN_SUPPORTED_ARCH ${ALL_ESAN_SUPPORTED_ARCH})
-  filter_available_targets(SCUDO_SUPPORTED_ARCH
-    ${ALL_SCUDO_SUPPORTED_ARCH})
+  filter_available_targets(SCUDO_SUPPORTED_ARCH ${ALL_SCUDO_SUPPORTED_ARCH})
   filter_available_targets(XRAY_SUPPORTED_ARCH ${ALL_XRAY_SUPPORTED_ARCH})
 endif()
 
@@ -414,6 +416,11 @@
   set(CAN_SYMBOLIZE 1)
 endif()
 
+find_program(GOLD_EXECUTABLE NAMES ${LLVM_DEFAULT_TARGET_TRIPLE}-ld.gold ld.gold ${LLVM_DEFAULT_TARGET_TRIPLE}-ld ld DOC "The gold linker")
+
+if(COMPILER_RT_SUPPORTED_ARCH)
+  list(REMOVE_DUPLICATES COMPILER_RT_SUPPORTED_ARCH)
+endif()
 message(STATUS "Compiler-RT supported architectures: ${COMPILER_RT_SUPPORTED_ARCH}")
 
 if(ANDROID)
@@ -422,9 +429,14 @@
   set(OS_NAME "${CMAKE_SYSTEM_NAME}")
 endif()
 
+set(ALL_SANITIZERS asan;dfsan;msan;tsan;safestack;cfi;esan;scudo)
+set(COMPILER_RT_SANITIZERS_TO_BUILD ${ALL_SANITIZERS} CACHE STRING
+    "sanitizers to build if supported on the target (all;${ALL_SANITIZERS})")
+list_replace(COMPILER_RT_SANITIZERS_TO_BUILD all "${ALL_SANITIZERS}")
+
 if (SANITIZER_COMMON_SUPPORTED_ARCH AND NOT LLVM_USE_SANITIZER AND
     (OS_NAME MATCHES "Android|Darwin|Linux|FreeBSD" OR
-    (OS_NAME MATCHES "Windows" AND MSVC)))
+    (OS_NAME MATCHES "Windows" AND (NOT MINGW AND NOT CYGWIN))))
   set(COMPILER_RT_HAS_SANITIZER_COMMON TRUE)
 else()
   set(COMPILER_RT_HAS_SANITIZER_COMMON FALSE)
@@ -471,7 +483,7 @@
   set(COMPILER_RT_HAS_MSAN FALSE)
 endif()
 
-if (PROFILE_SUPPORTED_ARCH AND
+if (PROFILE_SUPPORTED_ARCH AND NOT LLVM_USE_SANITIZER AND
     OS_NAME MATCHES "Darwin|Linux|FreeBSD|Windows")
   set(COMPILER_RT_HAS_PROFILE TRUE)
 else()
diff --git a/include/sanitizer/common_interface_defs.h b/include/sanitizer/common_interface_defs.h
index d8ff272..f9f9302 100644
--- a/include/sanitizer/common_interface_defs.h
+++ b/include/sanitizer/common_interface_defs.h
@@ -179,7 +179,16 @@
   // use-after-return detection.
   void __sanitizer_start_switch_fiber(void **fake_stack_save,
                                       const void *bottom, size_t size);
-  void __sanitizer_finish_switch_fiber(void *fake_stack_save);
+  void __sanitizer_finish_switch_fiber(void *fake_stack_save,
+                                       const void **bottom_old,
+                                       size_t *size_old);
+
+  // Get full module name and calculate pc offset within it.
+  // Returns 1 if pc belongs to some module, 0 if module was not found.
+  int __sanitizer_get_module_and_offset_for_pc(void *pc, char *module_path,
+                                               size_t module_path_len,
+                                               void **pc_offset);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/include/sanitizer/coverage_interface.h b/include/sanitizer/coverage_interface.h
index 72ac843..b44c5ac 100644
--- a/include/sanitizer/coverage_interface.h
+++ b/include/sanitizer/coverage_interface.h
@@ -23,6 +23,11 @@
   void __sanitizer_cov_init();
   // Record and dump coverage info.
   void __sanitizer_cov_dump();
+
+  //  Dump collected coverage info. Sorts pcs by module into individual
+  //  .sancov files.
+  void __sanitizer_dump_coverage(const uintptr_t *pcs, uintptr_t len);
+
   // Open <name>.sancov.packed in the coverage directory and return the file
   // descriptor. Returns -1 on failure, or if coverage dumping is disabled.
   // This is intended for use by sandboxing code.
@@ -59,14 +64,6 @@
   uintptr_t
   __sanitizer_update_counter_bitset_and_clear_counters(uint8_t *bitset);
 
-  // EXPERIMENTAL API
-  // Set allocated buffer to record new coverage PCs as they are executed.
-  // Buffer length is specified in uptrs.
-  void __sanitizer_set_coverage_pc_buffer(uintptr_t *buffer, uintptr_t length);
-  // Number of pcs recorded in the buffer.
-  // Reset by __sanitizer_reset_coverage();
-  uintptr_t __sanitizer_get_coverage_pc_buffer_pos();
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/include/xray/xray_interface.h b/include/xray/xray_interface.h
index 22f137d..9e712b1 100644
--- a/include/xray/xray_interface.h
+++ b/include/xray/xray_interface.h
@@ -18,7 +18,7 @@
 
 extern "C" {
 
-enum XRayEntryType { ENTRY = 0, EXIT = 1 };
+enum XRayEntryType { ENTRY = 0, EXIT = 1, TAIL = 2 };
 
 // Provide a function to invoke for when instrumentation points are hit. This is
 // a user-visible control surface that overrides the default implementation. The
@@ -32,6 +32,13 @@
 //                 (function entry, function exit, etc.). See the enum
 //                 XRayEntryType for more details.
 //
+// The user handler must handle correctly spurious calls after this handler is
+// removed or replaced with another handler, because it would be too costly for
+// XRay runtime to avoid spurious calls.
+// To prevent circular calling, the handler function itself and all its
+// direct&indirect callees must not be instrumented with XRay, which can be
+// achieved by marking them all with: __attribute__((xray_never_instrument))
+//
 // Returns 1 on success, 0 on error.
 extern int __xray_set_handler(void (*entry)(int32_t, XRayEntryType));
 
diff --git a/include/xray/xray_records.h b/include/xray/xray_records.h
new file mode 100644
index 0000000..34c236b
--- /dev/null
+++ b/include/xray/xray_records.h
@@ -0,0 +1,80 @@
+//===-- xray_records.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This header exposes some record types useful for the XRay in-memory logging
+// implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XRAY_XRAY_RECORDS_H
+#define XRAY_XRAY_RECORDS_H
+
+namespace __xray {
+
+enum FileTypes {
+  NAIVE_LOG = 0,
+};
+
+// This data structure is used to describe the contents of the file. We use this
+// for versioning the supported XRay file formats.
+struct alignas(32) XRayFileHeader {
+  uint16_t Version = 0;
+
+  // The type of file we're writing out. See the FileTypes enum for more
+  // information. This allows different implementations of the XRay logging to
+  // have different files for different information being stored.
+  uint16_t Type = 0;
+
+  // What follows are a set of flags that indicate useful things for when
+  // reading the data in the file.
+  bool ConstantTSC : 1;
+  bool NonstopTSC : 1;
+
+  // The frequency by which TSC increases per-second.
+  alignas(8) uint64_t CycleFrequency = 0;
+} __attribute__((packed));
+
+static_assert(sizeof(XRayFileHeader) == 32, "XRayFileHeader != 32 bytes");
+
+enum RecordTypes {
+  NORMAL = 0,
+};
+
+struct alignas(32) XRayRecord {
+  // This is the type of the record being written. We use 16 bits to allow us to
+  // treat this as a discriminant, and so that the first 4 bytes get packed
+  // properly. See RecordTypes for more supported types.
+  uint16_t RecordType = 0;
+
+  // The CPU where the thread is running. We assume number of CPUs <= 256.
+  uint8_t CPU = 0;
+
+  // The type of the event. Usually either ENTER = 0 or EXIT = 1.
+  uint8_t Type = 0;
+
+  // The function ID for the record.
+  int32_t FuncId = 0;
+
+  // Get the full 8 bytes of the TSC when we get the log record.
+  uint64_t TSC = 0;
+
+  // The thread ID for the currently running thread.
+  uint32_t TId = 0;
+
+  // Use some bytes in the end of the record for buffers.
+  char Buffer[4] = {};
+} __attribute__((packed));
+
+static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes");
+
+} // namespace __xray
+
+#endif // XRAY_XRAY_RECORDS_H
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index ce96fe4..4ab1e93 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -17,10 +17,27 @@
   add_subdirectory(builtins)
 endif()
 
-if(COMPILER_RT_BUILD_SANITIZERS)
-  if(COMPILER_RT_HAS_INTERCEPTION)
-    add_subdirectory(interception)
+function(compiler_rt_build_runtime runtime)
+  string(TOUPPER ${runtime} runtime_uppercase)
+  if(COMPILER_RT_HAS_${runtime_uppercase})
+    add_subdirectory(${runtime})
+    foreach(directory ${ARGN})
+      add_subdirectory(${directory})
+    endforeach()
   endif()
+endfunction()
+
+function(compiler_rt_build_sanitizer sanitizer)
+  string(TOUPPER ${sanitizer} sanitizer_uppercase)
+  string(TOLOWER ${sanitizer} sanitizer_lowercase)
+  list(FIND COMPILER_RT_SANITIZERS_TO_BUILD ${sanitizer_lowercase} result)
+  if(NOT ${result} EQUAL -1)
+    compiler_rt_build_runtime(${sanitizer} ${ARGN})
+  endif()
+endfunction()
+
+if(COMPILER_RT_BUILD_SANITIZERS)
+  compiler_rt_build_runtime(interception)
 
   if(COMPILER_RT_HAS_SANITIZER_COMMON)
     add_subdirectory(stats)
@@ -28,44 +45,18 @@
     add_subdirectory(ubsan)
   endif()
 
-  if(COMPILER_RT_HAS_ASAN)
-    add_subdirectory(asan)
-  endif()
+  compiler_rt_build_sanitizer(asan)
+  compiler_rt_build_sanitizer(dfsan)
+  compiler_rt_build_sanitizer(msan)
+  compiler_rt_build_sanitizer(tsan tsan/dd)
+  compiler_rt_build_sanitizer(safestack)
+  compiler_rt_build_sanitizer(cfi)
+  compiler_rt_build_sanitizer(esan)
+  compiler_rt_build_sanitizer(scudo)
 
-  if(COMPILER_RT_HAS_DFSAN)
-    add_subdirectory(dfsan)
-  endif()
-
-  if(COMPILER_RT_HAS_MSAN)
-    add_subdirectory(msan)
-  endif()
-
-  if(COMPILER_RT_HAS_PROFILE)
-    add_subdirectory(profile)
-  endif()
-
-  if(COMPILER_RT_HAS_TSAN)
-    add_subdirectory(tsan)
-    add_subdirectory(tsan/dd)
-  endif()
-
-  if(COMPILER_RT_HAS_SAFESTACK)
-    add_subdirectory(safestack)
-  endif()
-
-  if(COMPILER_RT_HAS_CFI)
-    add_subdirectory(cfi)
-  endif()
-
-  if(COMPILER_RT_HAS_ESAN)
-    add_subdirectory(esan)
-  endif()
-
-  if(COMPILER_RT_HAS_SCUDO)
-    add_subdirectory(scudo)
-  endif()
+  compiler_rt_build_runtime(profile)
 endif()
 
-if(COMPILER_RT_BUILD_XRAY AND COMPILER_RT_HAS_XRAY)
-  add_subdirectory(xray)
+if(COMPILER_RT_BUILD_XRAY)
+  compiler_rt_build_runtime(xray)
 endif()
diff --git a/lib/Makefile.mk b/lib/Makefile.mk
deleted file mode 100644
index b1540bd..0000000
--- a/lib/Makefile.mk
+++ /dev/null
@@ -1,13 +0,0 @@
-#===- lib/Makefile.mk --------------------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-SubDirs :=
-
-# Add submodules.
-SubDirs += builtins
diff --git a/lib/asan/CMakeLists.txt b/lib/asan/CMakeLists.txt
index e37705e..5ac5708 100644
--- a/lib/asan/CMakeLists.txt
+++ b/lib/asan/CMakeLists.txt
@@ -9,6 +9,7 @@
   asan_fake_stack.cc
   asan_flags.cc
   asan_globals.cc
+  asan_globals_win.cc
   asan_interceptors.cc
   asan_linux.cc
   asan_mac.cc
@@ -37,14 +38,9 @@
 set(ASAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
 append_rtti_flag(OFF ASAN_CFLAGS)
 
-set(ASAN_COMMON_DEFINITIONS
-  ASAN_HAS_EXCEPTIONS=1)
-
 set(ASAN_DYNAMIC_LINK_FLAGS)
 
 if(ANDROID)
-  list(APPEND ASAN_COMMON_DEFINITIONS
-    ASAN_LOW_MEMORY=1)
 # On Android, -z global does not do what it is documented to do.
 # On Android, -z global moves the library ahead in the lookup order,
 # placing it right after the LD_PRELOADs. This is used to compensate for the fact
@@ -110,6 +106,10 @@
 add_compiler_rt_component(asan)
 
 if(APPLE)
+  add_weak_symbols("asan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+
   add_compiler_rt_runtime(clang_rt.asan
     SHARED
     OS ${SANITIZER_COMMON_SUPPORTED_OS}
@@ -121,6 +121,7 @@
                 RTLSanCommon
                 RTUbsan
     CFLAGS ${ASAN_DYNAMIC_CFLAGS}
+    LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
     DEFS ${ASAN_DYNAMIC_DEFINITIONS}
     PARENT_TARGET asan)
 else()
@@ -187,7 +188,7 @@
               RTAsan_dynamic_version_script_dummy
               RTUbsan_cxx
       CFLAGS ${ASAN_DYNAMIC_CFLAGS}
-      LINKFLAGS ${ASAN_DYNAMIC_LINK_FLAGS}
+      LINK_FLAGS ${ASAN_DYNAMIC_LINK_FLAGS}
                 ${VERSION_SCRIPT_FLAG}
       LINK_LIBS ${ASAN_DYNAMIC_LIBS}
       DEFS ${ASAN_DYNAMIC_DEFINITIONS}
@@ -208,15 +209,25 @@
         STATIC
         ARCHS ${arch}
         SOURCES asan_win_dll_thunk.cc
+                asan_globals_win.cc
                 $<TARGET_OBJECTS:RTInterception.${arch}>
         CFLAGS ${ASAN_CFLAGS} -DASAN_DLL_THUNK
         DEFS ${ASAN_COMMON_DEFINITIONS}
         PARENT_TARGET asan)
+
+      set(DYNAMIC_RUNTIME_THUNK_CFLAGS "-DASAN_DYNAMIC_RUNTIME_THUNK")
+      if(MSVC)
+        list(APPEND DYNAMIC_RUNTIME_THUNK_CFLAGS "-Zl")
+      elseif(CMAKE_C_COMPILER_ID MATCHES Clang)
+        list(APPEND DYNAMIC_RUNTIME_THUNK_CFLAGS "-nodefaultlibs")
+      endif()
+
       add_compiler_rt_runtime(clang_rt.asan_dynamic_runtime_thunk
         STATIC
         ARCHS ${arch}
         SOURCES asan_win_dynamic_runtime_thunk.cc
-        CFLAGS ${ASAN_CFLAGS} -DASAN_DYNAMIC_RUNTIME_THUNK -Zl
+                asan_globals_win.cc
+        CFLAGS ${ASAN_CFLAGS} ${DYNAMIC_RUNTIME_THUNK_CFLAGS}
         DEFS ${ASAN_COMMON_DEFINITIONS}
         PARENT_TARGET asan)
     endif()
diff --git a/lib/asan/asan_activation.cc b/lib/asan/asan_activation.cc
index a5ace85..7e4e604 100644
--- a/lib/asan/asan_activation.cc
+++ b/lib/asan/asan_activation.cc
@@ -77,13 +77,16 @@
 
   void Print() {
     Report(
-        "quarantine_size_mb %d, max_redzone %d, poison_heap %d, "
-        "malloc_context_size %d, alloc_dealloc_mismatch %d, "
-        "allocator_may_return_null %d, coverage %d, coverage_dir %s\n",
-        allocator_options.quarantine_size_mb, allocator_options.max_redzone,
-        poison_heap, malloc_context_size,
+        "quarantine_size_mb %d, thread_local_quarantine_size_kb %d, "
+        "max_redzone %d, poison_heap %d, malloc_context_size %d, "
+        "alloc_dealloc_mismatch %d, allocator_may_return_null %d, coverage %d, "
+        "coverage_dir %s, allocator_release_to_os_interval_ms %d\n",
+        allocator_options.quarantine_size_mb,
+        allocator_options.thread_local_quarantine_size_kb,
+        allocator_options.max_redzone, poison_heap, malloc_context_size,
         allocator_options.alloc_dealloc_mismatch,
-        allocator_options.may_return_null, coverage, coverage_dir);
+        allocator_options.may_return_null, coverage, coverage_dir,
+        allocator_options.release_to_os_interval_ms);
   }
 } asan_deactivated_flags;
 
@@ -107,6 +110,7 @@
 
   AllocatorOptions disabled = asan_deactivated_flags.allocator_options;
   disabled.quarantine_size_mb = 0;
+  disabled.thread_local_quarantine_size_kb = 0;
   disabled.min_redzone = 16;  // Redzone must be at least 16 bytes long.
   disabled.max_redzone = 16;
   disabled.alloc_dealloc_mismatch = false;
diff --git a/lib/asan/asan_activation_flags.inc b/lib/asan/asan_activation_flags.inc
index d4c089e..1c66e5b 100644
--- a/lib/asan/asan_activation_flags.inc
+++ b/lib/asan/asan_activation_flags.inc
@@ -24,6 +24,7 @@
 ASAN_ACTIVATION_FLAG(int, redzone)
 ASAN_ACTIVATION_FLAG(int, max_redzone)
 ASAN_ACTIVATION_FLAG(int, quarantine_size_mb)
+ASAN_ACTIVATION_FLAG(int, thread_local_quarantine_size_kb)
 ASAN_ACTIVATION_FLAG(bool, alloc_dealloc_mismatch)
 ASAN_ACTIVATION_FLAG(bool, poison_heap)
 
@@ -33,3 +34,4 @@
 COMMON_ACTIVATION_FLAG(const char *, coverage_dir)
 COMMON_ACTIVATION_FLAG(int, verbosity)
 COMMON_ACTIVATION_FLAG(bool, help)
+COMMON_ACTIVATION_FLAG(s32, allocator_release_to_os_interval_ms)
diff --git a/lib/asan/asan_allocator.cc b/lib/asan/asan_allocator.cc
index ebd67b6..ee9b1a6 100644
--- a/lib/asan/asan_allocator.cc
+++ b/lib/asan/asan_allocator.cc
@@ -207,25 +207,27 @@
 
 void AllocatorOptions::SetFrom(const Flags *f, const CommonFlags *cf) {
   quarantine_size_mb = f->quarantine_size_mb;
+  thread_local_quarantine_size_kb = f->thread_local_quarantine_size_kb;
   min_redzone = f->redzone;
   max_redzone = f->max_redzone;
   may_return_null = cf->allocator_may_return_null;
   alloc_dealloc_mismatch = f->alloc_dealloc_mismatch;
+  release_to_os_interval_ms = cf->allocator_release_to_os_interval_ms;
 }
 
 void AllocatorOptions::CopyTo(Flags *f, CommonFlags *cf) {
   f->quarantine_size_mb = quarantine_size_mb;
+  f->thread_local_quarantine_size_kb = thread_local_quarantine_size_kb;
   f->redzone = min_redzone;
   f->max_redzone = max_redzone;
   cf->allocator_may_return_null = may_return_null;
   f->alloc_dealloc_mismatch = alloc_dealloc_mismatch;
+  cf->allocator_release_to_os_interval_ms = release_to_os_interval_ms;
 }
 
 struct Allocator {
   static const uptr kMaxAllowedMallocSize =
       FIRST_32_SECOND_64(3UL << 30, 1ULL << 40);
-  static const uptr kMaxThreadLocalQuarantine =
-      FIRST_32_SECOND_64(1 << 18, 1 << 20);
 
   AsanAllocator allocator;
   AsanQuarantine quarantine;
@@ -254,7 +256,7 @@
   void SharedInitCode(const AllocatorOptions &options) {
     CheckOptions(options);
     quarantine.Init((uptr)options.quarantine_size_mb << 20,
-                    kMaxThreadLocalQuarantine);
+                    (uptr)options.thread_local_quarantine_size_kb << 10);
     atomic_store(&alloc_dealloc_mismatch, options.alloc_dealloc_mismatch,
                  memory_order_release);
     atomic_store(&min_redzone, options.min_redzone, memory_order_release);
@@ -262,22 +264,59 @@
   }
 
   void Initialize(const AllocatorOptions &options) {
-    allocator.Init(options.may_return_null);
+    allocator.Init(options.may_return_null, options.release_to_os_interval_ms);
     SharedInitCode(options);
   }
 
+  void RePoisonChunk(uptr chunk) {
+    // This could be a user-facing chunk (with redzones), or some internal
+    // housekeeping chunk, like TransferBatch. Start by assuming the former.
+    AsanChunk *ac = GetAsanChunk((void *)chunk);
+    uptr allocated_size = allocator.GetActuallyAllocatedSize((void *)ac);
+    uptr beg = ac->Beg();
+    uptr end = ac->Beg() + ac->UsedSize(true);
+    uptr chunk_end = chunk + allocated_size;
+    if (chunk < beg && beg < end && end <= chunk_end &&
+        ac->chunk_state == CHUNK_ALLOCATED) {
+      // Looks like a valid AsanChunk in use, poison redzones only.
+      PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic);
+      uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY);
+      FastPoisonShadowPartialRightRedzone(
+          end_aligned_down, end - end_aligned_down,
+          chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic);
+    } else {
+      // This is either not an AsanChunk or freed or quarantined AsanChunk.
+      // In either case, poison everything.
+      PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic);
+    }
+  }
+
   void ReInitialize(const AllocatorOptions &options) {
     allocator.SetMayReturnNull(options.may_return_null);
+    allocator.SetReleaseToOSIntervalMs(options.release_to_os_interval_ms);
     SharedInitCode(options);
+
+    // Poison all existing allocation's redzones.
+    if (CanPoisonMemory()) {
+      allocator.ForceLock();
+      allocator.ForEachChunk(
+          [](uptr chunk, void *alloc) {
+            ((Allocator *)alloc)->RePoisonChunk(chunk);
+          },
+          this);
+      allocator.ForceUnlock();
+    }
   }
 
   void GetOptions(AllocatorOptions *options) const {
     options->quarantine_size_mb = quarantine.GetSize() >> 20;
+    options->thread_local_quarantine_size_kb = quarantine.GetCacheSize() >> 10;
     options->min_redzone = atomic_load(&min_redzone, memory_order_acquire);
     options->max_redzone = atomic_load(&max_redzone, memory_order_acquire);
     options->may_return_null = allocator.MayReturnNull();
     options->alloc_dealloc_mismatch =
         atomic_load(&alloc_dealloc_mismatch, memory_order_acquire);
+    options->release_to_os_interval_ms = allocator.ReleaseToOSIntervalMs();
   }
 
   // -------------------- Helper methods. -------------------------
@@ -356,7 +395,7 @@
     if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize) {
       Report("WARNING: AddressSanitizer failed to allocate 0x%zx bytes\n",
              (void*)size);
-      return allocator.ReturnNullOrDie();
+      return allocator.ReturnNullOrDieOnBadRequest();
     }
 
     AsanThread *t = GetCurrentThread();
@@ -373,8 +412,7 @@
           allocator.Allocate(cache, needed_size, 8, false, check_rss_limit);
     }
 
-    if (!allocated)
-      return allocator.ReturnNullOrDie();
+    if (!allocated) return allocator.ReturnNullOrDieOnOOM();
 
     if (*(u8 *)MEM_TO_SHADOW((uptr)allocated) == 0 && CanPoisonMemory()) {
       // Heap poisoning is enabled, but the allocator provides an unpoisoned
@@ -563,7 +601,7 @@
 
   void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
     if (CallocShouldReturnNullDueToOverflow(size, nmemb))
-      return allocator.ReturnNullOrDie();
+      return allocator.ReturnNullOrDieOnBadRequest();
     void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC, false);
     // If the memory comes from the secondary allocator no need to clear it
     // as it comes directly from mmap.
@@ -643,6 +681,7 @@
 
   void PrintStats() {
     allocator.PrintStats();
+    quarantine.PrintStats();
   }
 
   void ForceLock() {
@@ -662,19 +701,22 @@
   return instance.allocator;
 }
 
-bool AsanChunkView::IsValid() {
+bool AsanChunkView::IsValid() const {
   return chunk_ && chunk_->chunk_state != CHUNK_AVAILABLE;
 }
-bool AsanChunkView::IsAllocated() {
+bool AsanChunkView::IsAllocated() const {
   return chunk_ && chunk_->chunk_state == CHUNK_ALLOCATED;
 }
-uptr AsanChunkView::Beg() { return chunk_->Beg(); }
-uptr AsanChunkView::End() { return Beg() + UsedSize(); }
-uptr AsanChunkView::UsedSize() { return chunk_->UsedSize(); }
-uptr AsanChunkView::AllocTid() { return chunk_->alloc_tid; }
-uptr AsanChunkView::FreeTid() { return chunk_->free_tid; }
-AllocType AsanChunkView::AllocType() {
-  return (enum AllocType)chunk_->alloc_type;
+bool AsanChunkView::IsQuarantined() const {
+  return chunk_ && chunk_->chunk_state == CHUNK_QUARANTINE;
+}
+uptr AsanChunkView::Beg() const { return chunk_->Beg(); }
+uptr AsanChunkView::End() const { return Beg() + UsedSize(); }
+uptr AsanChunkView::UsedSize() const { return chunk_->UsedSize(); }
+uptr AsanChunkView::AllocTid() const { return chunk_->alloc_tid; }
+uptr AsanChunkView::FreeTid() const { return chunk_->free_tid; }
+AllocType AsanChunkView::GetAllocType() const {
+  return (AllocType)chunk_->alloc_type;
 }
 
 static StackTrace GetStackTraceFromId(u32 id) {
@@ -684,14 +726,14 @@
   return res;
 }
 
-u32 AsanChunkView::GetAllocStackId() { return chunk_->alloc_context_id; }
-u32 AsanChunkView::GetFreeStackId() { return chunk_->free_context_id; }
+u32 AsanChunkView::GetAllocStackId() const { return chunk_->alloc_context_id; }
+u32 AsanChunkView::GetFreeStackId() const { return chunk_->free_context_id; }
 
-StackTrace AsanChunkView::GetAllocStack() {
+StackTrace AsanChunkView::GetAllocStack() const {
   return GetStackTraceFromId(GetAllocStackId());
 }
 
-StackTrace AsanChunkView::GetFreeStack() {
+StackTrace AsanChunkView::GetFreeStack() const {
   return GetStackTraceFromId(GetFreeStackId());
 }
 
@@ -710,6 +752,9 @@
 AsanChunkView FindHeapChunkByAddress(uptr addr) {
   return instance.FindHeapChunkByAddress(addr);
 }
+AsanChunkView FindHeapChunkByAllocBeg(uptr addr) {
+  return AsanChunkView(instance.GetAsanChunk(reinterpret_cast<void*>(addr)));
+}
 
 void AsanThreadLocalMallocStorage::CommitBack() {
   instance.CommitBack(this);
diff --git a/lib/asan/asan_allocator.h b/lib/asan/asan_allocator.h
index 55bbbb5..ee28ecf 100644
--- a/lib/asan/asan_allocator.h
+++ b/lib/asan/asan_allocator.h
@@ -33,10 +33,12 @@
 
 struct AllocatorOptions {
   u32 quarantine_size_mb;
+  u32 thread_local_quarantine_size_kb;
   u16 min_redzone;
   u16 max_redzone;
   u8 may_return_null;
   u8 alloc_dealloc_mismatch;
+  s32 release_to_os_interval_ms;
 
   void SetFrom(const Flags *f, const CommonFlags *cf);
   void CopyTo(Flags *f, CommonFlags *cf);
@@ -49,28 +51,29 @@
 class AsanChunkView {
  public:
   explicit AsanChunkView(AsanChunk *chunk) : chunk_(chunk) {}
-  bool IsValid();        // Checks if AsanChunkView points to a valid allocated
-                         // or quarantined chunk.
-  bool IsAllocated();    // Checks if the memory is currently allocated.
-  uptr Beg();            // First byte of user memory.
-  uptr End();            // Last byte of user memory.
-  uptr UsedSize();       // Size requested by the user.
-  uptr AllocTid();
-  uptr FreeTid();
+  bool IsValid() const;        // Checks if AsanChunkView points to a valid
+                               // allocated or quarantined chunk.
+  bool IsAllocated() const;    // Checks if the memory is currently allocated.
+  bool IsQuarantined() const;  // Checks if the memory is currently quarantined.
+  uptr Beg() const;            // First byte of user memory.
+  uptr End() const;            // Last byte of user memory.
+  uptr UsedSize() const;       // Size requested by the user.
+  uptr AllocTid() const;
+  uptr FreeTid() const;
   bool Eq(const AsanChunkView &c) const { return chunk_ == c.chunk_; }
-  u32 GetAllocStackId();
-  u32 GetFreeStackId();
-  StackTrace GetAllocStack();
-  StackTrace GetFreeStack();
-  AllocType AllocType();
-  bool AddrIsInside(uptr addr, uptr access_size, sptr *offset) {
+  u32 GetAllocStackId() const;
+  u32 GetFreeStackId() const;
+  StackTrace GetAllocStack() const;
+  StackTrace GetFreeStack() const;
+  AllocType GetAllocType() const;
+  bool AddrIsInside(uptr addr, uptr access_size, sptr *offset) const {
     if (addr >= Beg() && (addr + access_size) <= End()) {
       *offset = addr - Beg();
       return true;
     }
     return false;
   }
-  bool AddrIsAtLeft(uptr addr, uptr access_size, sptr *offset) {
+  bool AddrIsAtLeft(uptr addr, uptr access_size, sptr *offset) const {
     (void)access_size;
     if (addr < Beg()) {
       *offset = Beg() - addr;
@@ -78,7 +81,7 @@
     }
     return false;
   }
-  bool AddrIsAtRight(uptr addr, uptr access_size, sptr *offset) {
+  bool AddrIsAtRight(uptr addr, uptr access_size, sptr *offset) const {
     if (addr + access_size > End()) {
       *offset = addr - End();
       return true;
@@ -91,6 +94,7 @@
 };
 
 AsanChunkView FindHeapChunkByAddress(uptr address);
+AsanChunkView FindHeapChunkByAllocBeg(uptr address);
 
 // List of AsanChunks with total size.
 class AsanChunkFifoList: public IntrusiveList<AsanChunk> {
@@ -118,18 +122,36 @@
 # if defined(__powerpc64__)
 const uptr kAllocatorSpace =  0xa0000000000ULL;
 const uptr kAllocatorSize  =  0x20000000000ULL;  // 2T.
+typedef DefaultSizeClassMap SizeClassMap;
+# elif defined(__aarch64__) && SANITIZER_ANDROID
+const uptr kAllocatorSpace =  0x3000000000ULL;
+const uptr kAllocatorSize  =  0x2000000000ULL;  // 128G.
+typedef VeryCompactSizeClassMap SizeClassMap;
 # elif defined(__aarch64__)
-// AArch64/SANITIZIER_CAN_USER_ALLOCATOR64 is only for 42-bit VMA
+// AArch64/SANITIZER_CAN_USER_ALLOCATOR64 is only for 42-bit VMA
 // so no need to different values for different VMA.
 const uptr kAllocatorSpace =  0x10000000000ULL;
 const uptr kAllocatorSize  =  0x10000000000ULL;  // 3T.
+typedef DefaultSizeClassMap SizeClassMap;
+# elif SANITIZER_WINDOWS
+const uptr kAllocatorSpace = ~(uptr)0;
+const uptr kAllocatorSize  =  0x8000000000ULL;  // 500G
+typedef DefaultSizeClassMap SizeClassMap;
 # else
 const uptr kAllocatorSpace = 0x600000000000ULL;
 const uptr kAllocatorSize  =  0x40000000000ULL;  // 4T.
-# endif
 typedef DefaultSizeClassMap SizeClassMap;
-typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, 0 /*metadata*/,
-    SizeClassMap, AsanMapUnmapCallback> PrimaryAllocator;
+# endif
+struct AP64 {  // Allocator64 parameters. Deliberately using a short name.
+  static const uptr kSpaceBeg = kAllocatorSpace;
+  static const uptr kSpaceSize = kAllocatorSize;
+  static const uptr kMetadataSize = 0;
+  typedef __asan::SizeClassMap SizeClassMap;
+  typedef AsanMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+
+typedef SizeClassAllocator64<AP64> PrimaryAllocator;
 #else  // Fallback to SizeClassAllocator32.
 static const uptr kRegionSizeLog = 20;
 static const uptr kNumRegions = SANITIZER_MMAP_RANGE_SIZE >> kRegionSizeLog;
diff --git a/lib/asan/asan_descriptions.cc b/lib/asan/asan_descriptions.cc
index 7744cbf..0ecbe09 100644
--- a/lib/asan/asan_descriptions.cc
+++ b/lib/asan/asan_descriptions.cc
@@ -122,7 +122,7 @@
   }
   descr->chunk_begin = chunk.Beg();
   descr->chunk_size = chunk.UsedSize();
-  descr->alloc_type = chunk.AllocType();
+  descr->alloc_type = chunk.GetAllocType();
 }
 
 static void PrintHeapChunkAccess(uptr addr, const ChunkAccess &descr) {
diff --git a/lib/asan/asan_flags.cc b/lib/asan/asan_flags.cc
index 345a35c..ad5bbff 100644
--- a/lib/asan/asan_flags.cc
+++ b/lib/asan/asan_flags.cc
@@ -156,9 +156,24 @@
     f->quarantine_size_mb = f->quarantine_size >> 20;
   if (f->quarantine_size_mb < 0) {
     const int kDefaultQuarantineSizeMb =
-        (ASAN_LOW_MEMORY) ? 1UL << 6 : 1UL << 8;
+        (ASAN_LOW_MEMORY) ? 1UL << 4 : 1UL << 8;
     f->quarantine_size_mb = kDefaultQuarantineSizeMb;
   }
+  if (f->thread_local_quarantine_size_kb < 0) {
+    const u32 kDefaultThreadLocalQuarantineSizeKb =
+        // It is not advised to go lower than 64Kb, otherwise quarantine batches
+        // pushed from thread local quarantine to global one will create too
+        // much overhead. One quarantine batch size is 8Kb and it  holds up to
+        // 1021 chunk, which amounts to 1/8 memory overhead per batch when
+        // thread local quarantine is set to 64Kb.
+        (ASAN_LOW_MEMORY) ? 1 << 6 : FIRST_32_SECOND_64(1 << 8, 1 << 10);
+    f->thread_local_quarantine_size_kb = kDefaultThreadLocalQuarantineSizeKb;
+  }
+  if (f->thread_local_quarantine_size_kb == 0 && f->quarantine_size_mb > 0) {
+    Report("%s: thread_local_quarantine_size_kb can be set to 0 only when "
+           "quarantine_size_mb is set to 0\n", SanitizerToolName);
+    Die();
+  }
   if (!f->replace_str && common_flags()->intercept_strlen) {
     Report("WARNING: strlen interceptor is enabled even though replace_str=0. "
            "Use intercept_strlen=0 to disable it.");
diff --git a/lib/asan/asan_flags.inc b/lib/asan/asan_flags.inc
index 4b8d8ff..4712efb 100644
--- a/lib/asan/asan_flags.inc
+++ b/lib/asan/asan_flags.inc
@@ -23,6 +23,12 @@
           "Size (in Mb) of quarantine used to detect use-after-free "
           "errors. Lower value may reduce memory usage but increase the "
           "chance of false negatives.")
+ASAN_FLAG(int, thread_local_quarantine_size_kb, -1,
+          "Size (in Kb) of thread local quarantine used to detect "
+          "use-after-free errors. Lower value may reduce memory usage but "
+          "increase the chance of false negatives. It is not advised to go "
+          "lower than 64Kb, otherwise frequent transfers to global quarantine "
+          "might affect performance.")
 ASAN_FLAG(int, redzone, 16,
           "Minimal size (in bytes) of redzones around heap objects. "
           "Requirement: redzone >= 16, is a power of two.")
@@ -46,8 +52,6 @@
           "If set, uses custom wrappers for memset/memcpy/memmove intrinsics.")
 ASAN_FLAG(bool, detect_stack_use_after_return, false,
           "Enables stack-use-after-return checking at run-time.")
-ASAN_FLAG(bool, detect_stack_use_after_scope, true,
-          "Enables stack-use-after-scope checking at run-time.")
 ASAN_FLAG(int, min_uar_stack_size_log, 16, // We can't do smaller anyway.
           "Minimum fake stack size log.")
 ASAN_FLAG(int, max_uar_stack_size_log,
@@ -104,7 +108,7 @@
 // https://github.com/google/sanitizers/issues/309
 // TODO(glider,timurrrr): Fix known issues and enable this back.
 ASAN_FLAG(bool, alloc_dealloc_mismatch,
-          (SANITIZER_MAC == 0) && (SANITIZER_WINDOWS == 0),
+          !SANITIZER_MAC && !SANITIZER_WINDOWS && !SANITIZER_ANDROID,
           "Report errors on malloc/delete, new/free, new/delete[], etc.")
 
 ASAN_FLAG(bool, new_delete_type_mismatch, true,
diff --git a/lib/asan/asan_globals.cc b/lib/asan/asan_globals.cc
index 93dae29..b723306 100644
--- a/lib/asan/asan_globals.cc
+++ b/lib/asan/asan_globals.cc
@@ -348,6 +348,20 @@
     Printf("=== ID %d; %p %p\n", stack_id, &globals[0], &globals[n - 1]);
   }
   for (uptr i = 0; i < n; i++) {
+    if (SANITIZER_WINDOWS && globals[i].beg == 0) {
+      // The MSVC incremental linker may pad globals out to 256 bytes. As long
+      // as __asan_global is less than 256 bytes large and its size is a power
+      // of two, we can skip over the padding.
+      static_assert(
+          sizeof(__asan_global) < 256 &&
+              (sizeof(__asan_global) & (sizeof(__asan_global) - 1)) == 0,
+          "sizeof(__asan_global) incompatible with incremental linker padding");
+      // If these are padding bytes, the rest of the global should be zero.
+      CHECK(globals[i].size == 0 && globals[i].size_with_redzone == 0 &&
+            globals[i].name == nullptr && globals[i].module_name == nullptr &&
+            globals[i].odr_indicator == 0);
+      continue;
+    }
     RegisterGlobal(&globals[i]);
   }
 }
@@ -358,6 +372,11 @@
   if (!flags()->report_globals) return;
   BlockingMutexLock lock(&mu_for_globals);
   for (uptr i = 0; i < n; i++) {
+    if (SANITIZER_WINDOWS && globals[i].beg == 0) {
+      // Skip globals that look like padding from the MSVC incremental linker.
+      // See comment in __asan_register_globals.
+      continue;
+    }
     UnregisterGlobal(&globals[i]);
   }
 }
@@ -368,10 +387,10 @@
 // initializer can only touch global variables in the same TU.
 void __asan_before_dynamic_init(const char *module_name) {
   if (!flags()->check_initialization_order ||
-      !CanPoisonMemory())
+      !CanPoisonMemory() ||
+      !dynamic_init_globals)
     return;
   bool strict_init_order = flags()->strict_init_order;
-  CHECK(dynamic_init_globals);
   CHECK(module_name);
   CHECK(asan_inited);
   BlockingMutexLock lock(&mu_for_globals);
@@ -394,7 +413,8 @@
 // TU are poisoned.  It simply unpoisons all dynamically initialized globals.
 void __asan_after_dynamic_init() {
   if (!flags()->check_initialization_order ||
-      !CanPoisonMemory())
+      !CanPoisonMemory() ||
+      !dynamic_init_globals)
     return;
   CHECK(asan_inited);
   BlockingMutexLock lock(&mu_for_globals);
diff --git a/lib/asan/asan_globals_win.cc b/lib/asan/asan_globals_win.cc
new file mode 100644
index 0000000..56c0d1a
--- /dev/null
+++ b/lib/asan/asan_globals_win.cc
@@ -0,0 +1,62 @@
+//===-- asan_globals_win.cc -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Global registration code that is linked into every Windows DLL and EXE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "asan_interface_internal.h"
+#if SANITIZER_WINDOWS
+
+namespace __asan {
+
+#pragma section(".ASAN$GA", read, write)  // NOLINT
+#pragma section(".ASAN$GZ", read, write)  // NOLINT
+extern "C" __declspec(allocate(".ASAN$GA"))
+__asan_global __asan_globals_start = {};
+extern "C" __declspec(allocate(".ASAN$GZ"))
+__asan_global __asan_globals_end = {};
+#pragma comment(linker, "/merge:.ASAN=.data")
+
+static void call_on_globals(void (*hook)(__asan_global *, uptr)) {
+  __asan_global *start = &__asan_globals_start + 1;
+  __asan_global *end = &__asan_globals_end;
+  uptr bytediff = (uptr)end - (uptr)start;
+  if (bytediff % sizeof(__asan_global) != 0) {
+#ifdef ASAN_DLL_THUNK
+    __debugbreak();
+#else
+    CHECK("corrupt asan global array");
+#endif
+  }
+  // We know end >= start because the linker sorts the portion after the dollar
+  // sign alphabetically.
+  uptr n = end - start;
+  hook(start, n);
+}
+
+static void register_dso_globals() {
+  call_on_globals(&__asan_register_globals);
+}
+
+static void unregister_dso_globals() {
+  call_on_globals(&__asan_unregister_globals);
+}
+
+// Register globals
+#pragma section(".CRT$XCU", long, read)  // NOLINT
+#pragma section(".CRT$XTX", long, read)  // NOLINT
+extern "C" __declspec(allocate(".CRT$XCU"))
+void (*const __asan_dso_reg_hook)() = &register_dso_globals;
+extern "C" __declspec(allocate(".CRT$XTX"))
+void (*const __asan_dso_unreg_hook)() = &unregister_dso_globals;
+
+} // namespace __asan
+
+#endif  // SANITIZER_WINDOWS
diff --git a/lib/asan/asan_globals_win.h b/lib/asan/asan_globals_win.h
new file mode 100644
index 0000000..d4ed9c1
--- /dev/null
+++ b/lib/asan/asan_globals_win.h
@@ -0,0 +1,34 @@
+//===-- asan_globals_win.h --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to the Windows-specific global management code. Separated into a
+// standalone header to allow inclusion from asan_win_dynamic_runtime_thunk,
+// which defines symbols that clash with other sanitizer headers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASAN_GLOBALS_WIN_H
+#define ASAN_GLOBALS_WIN_H
+
+#if !defined(_MSC_VER)
+#error "this file is Windows-only, and uses MSVC pragmas"
+#endif
+
+#if defined(_WIN64)
+#define SANITIZER_SYM_PREFIX
+#else
+#define SANITIZER_SYM_PREFIX "_"
+#endif
+
+// Use this macro to force linking asan_globals_win.cc into the DSO.
+#define ASAN_LINK_GLOBALS_WIN() \
+  __pragma(                     \
+      comment(linker, "/include:" SANITIZER_SYM_PREFIX "__asan_dso_reg_hook"))
+
+#endif // ASAN_GLOBALS_WIN_H
diff --git a/lib/asan/asan_interceptors.cc b/lib/asan/asan_interceptors.cc
index 8f38587..606016d 100644
--- a/lib/asan/asan_interceptors.cc
+++ b/lib/asan/asan_interceptors.cc
@@ -81,6 +81,51 @@
     }                                                                   \
   } while (0)
 
+// memcpy is called during __asan_init() from the internals of printf(...).
+// We do not treat memcpy with to==from as a bug.
+// See http://llvm.org/bugs/show_bug.cgi?id=11763.
+#define ASAN_MEMCPY_IMPL(ctx, to, from, size)                           \
+  do {                                                                  \
+    if (UNLIKELY(!asan_inited)) return internal_memcpy(to, from, size); \
+    if (asan_init_is_running) {                                         \
+      return REAL(memcpy)(to, from, size);                              \
+    }                                                                   \
+    ENSURE_ASAN_INITED();                                               \
+    if (flags()->replace_intrin) {                                      \
+      if (to != from) {                                                 \
+        CHECK_RANGES_OVERLAP("memcpy", to, size, from, size);           \
+      }                                                                 \
+      ASAN_READ_RANGE(ctx, from, size);                                 \
+      ASAN_WRITE_RANGE(ctx, to, size);                                  \
+    }                                                                   \
+    return REAL(memcpy)(to, from, size);                                \
+  } while (0)
+
+// memset is called inside Printf.
+#define ASAN_MEMSET_IMPL(ctx, block, c, size)                           \
+  do {                                                                  \
+    if (UNLIKELY(!asan_inited)) return internal_memset(block, c, size); \
+    if (asan_init_is_running) {                                         \
+      return REAL(memset)(block, c, size);                              \
+    }                                                                   \
+    ENSURE_ASAN_INITED();                                               \
+    if (flags()->replace_intrin) {                                      \
+      ASAN_WRITE_RANGE(ctx, block, size);                               \
+    }                                                                   \
+    return REAL(memset)(block, c, size);                                \
+  } while (0)
+
+#define ASAN_MEMMOVE_IMPL(ctx, to, from, size)                           \
+  do {                                                                   \
+    if (UNLIKELY(!asan_inited)) return internal_memmove(to, from, size); \
+    ENSURE_ASAN_INITED();                                                \
+    if (flags()->replace_intrin) {                                       \
+      ASAN_READ_RANGE(ctx, from, size);                                  \
+      ASAN_WRITE_RANGE(ctx, to, size);                                   \
+    }                                                                    \
+    return internal_memmove(to, from, size);                             \
+  } while (0)
+
 #define ASAN_READ_RANGE(ctx, offset, size) \
   ACCESS_MEMORY_RANGE(ctx, offset, size, false)
 #define ASAN_WRITE_RANGE(ctx, offset, size) \
@@ -198,10 +243,25 @@
   } else {                                                                     \
     *begin = *end = 0;                                                         \
   }
-// Asan needs custom handling of these:
-#undef SANITIZER_INTERCEPT_MEMSET
-#undef SANITIZER_INTERCEPT_MEMMOVE
-#undef SANITIZER_INTERCEPT_MEMCPY
+
+#define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size) \
+  do {                                                       \
+    ASAN_INTERCEPTOR_ENTER(ctx, memmove);                    \
+    ASAN_MEMMOVE_IMPL(ctx, to, from, size);                  \
+  } while (false)
+
+#define COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size) \
+  do {                                                      \
+    ASAN_INTERCEPTOR_ENTER(ctx, memcpy);                    \
+    ASAN_MEMCPY_IMPL(ctx, to, from, size);                  \
+  } while (false)
+
+#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size) \
+  do {                                                      \
+    ASAN_INTERCEPTOR_ENTER(ctx, memset);                    \
+    ASAN_MEMSET_IMPL(ctx, block, c, size);                  \
+  } while (false)
+
 #include "sanitizer_common/sanitizer_common_interceptors.inc"
 
 // Syscall interceptors don't have contexts, we don't support suppressions
@@ -389,90 +449,18 @@
 }
 #endif
 
-// memcpy is called during __asan_init() from the internals of printf(...).
-// We do not treat memcpy with to==from as a bug.
-// See http://llvm.org/bugs/show_bug.cgi?id=11763.
-#define ASAN_MEMCPY_IMPL(ctx, to, from, size) do {                             \
-    if (UNLIKELY(!asan_inited)) return internal_memcpy(to, from, size);        \
-    if (asan_init_is_running) {                                                \
-      return REAL(memcpy)(to, from, size);                                     \
-    }                                                                          \
-    ENSURE_ASAN_INITED();                                                      \
-    if (flags()->replace_intrin) {                                             \
-      if (to != from) {                                                        \
-        CHECK_RANGES_OVERLAP("memcpy", to, size, from, size);                  \
-      }                                                                        \
-      ASAN_READ_RANGE(ctx, from, size);                                        \
-      ASAN_WRITE_RANGE(ctx, to, size);                                         \
-    }                                                                          \
-    return REAL(memcpy)(to, from, size);                                       \
-  } while (0)
-
-
 void *__asan_memcpy(void *to, const void *from, uptr size) {
   ASAN_MEMCPY_IMPL(nullptr, to, from, size);
 }
 
-// memset is called inside Printf.
-#define ASAN_MEMSET_IMPL(ctx, block, c, size) do {                             \
-    if (UNLIKELY(!asan_inited)) return internal_memset(block, c, size);        \
-    if (asan_init_is_running) {                                                \
-      return REAL(memset)(block, c, size);                                     \
-    }                                                                          \
-    ENSURE_ASAN_INITED();                                                      \
-    if (flags()->replace_intrin) {                                             \
-      ASAN_WRITE_RANGE(ctx, block, size);                                      \
-    }                                                                          \
-    return REAL(memset)(block, c, size);                                       \
-  } while (0)
-
 void *__asan_memset(void *block, int c, uptr size) {
   ASAN_MEMSET_IMPL(nullptr, block, c, size);
 }
 
-#define ASAN_MEMMOVE_IMPL(ctx, to, from, size) do {                            \
-    if (UNLIKELY(!asan_inited))                                                \
-      return internal_memmove(to, from, size);                                 \
-    ENSURE_ASAN_INITED();                                                      \
-    if (flags()->replace_intrin) {                                             \
-      ASAN_READ_RANGE(ctx, from, size);                                        \
-      ASAN_WRITE_RANGE(ctx, to, size);                                         \
-    }                                                                          \
-    return internal_memmove(to, from, size);                                   \
-  } while (0)
-
 void *__asan_memmove(void *to, const void *from, uptr size) {
   ASAN_MEMMOVE_IMPL(nullptr, to, from, size);
 }
 
-INTERCEPTOR(void*, memmove, void *to, const void *from, uptr size) {
-  void *ctx;
-  ASAN_INTERCEPTOR_ENTER(ctx, memmove);
-  ASAN_MEMMOVE_IMPL(ctx, to, from, size);
-}
-
-INTERCEPTOR(void*, memcpy, void *to, const void *from, uptr size) {
-  void *ctx;
-  ASAN_INTERCEPTOR_ENTER(ctx, memcpy);
-#if !SANITIZER_MAC
-  ASAN_MEMCPY_IMPL(ctx, to, from, size);
-#else
-  // At least on 10.7 and 10.8 both memcpy() and memmove() are being replaced
-  // with WRAP(memcpy). As a result, false positives are reported for memmove()
-  // calls. If we just disable error reporting with
-  // ASAN_OPTIONS=replace_intrin=0, memmove() is still replaced with
-  // internal_memcpy(), which may lead to crashes, see
-  // http://llvm.org/bugs/show_bug.cgi?id=16362.
-  ASAN_MEMMOVE_IMPL(ctx, to, from, size);
-#endif  // !SANITIZER_MAC
-}
-
-INTERCEPTOR(void*, memset, void *block, int c, uptr size) {
-  void *ctx;
-  ASAN_INTERCEPTOR_ENTER(ctx, memset);
-  ASAN_MEMSET_IMPL(ctx, block, c, size);
-}
-
 #if ASAN_INTERCEPT_INDEX
 # if ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX
 INTERCEPTOR(char*, index, const char *string, int c)
@@ -724,17 +712,6 @@
   was_called_once = true;
   InitializeCommonInterceptors();
 
-  // Intercept mem* functions.
-  ASAN_INTERCEPT_FUNC(memmove);
-  ASAN_INTERCEPT_FUNC(memset);
-  if (PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE) {
-    // In asan, REAL(memmove) is not used, but it is used in msan.
-    ASAN_INTERCEPT_FUNC(memcpy);
-  } else {
-    ASSIGN_REAL(memcpy, memmove);
-  }
-  CHECK(REAL(memcpy));
-
   // Intercept str* functions.
   ASAN_INTERCEPT_FUNC(strcat);  // NOLINT
   ASAN_INTERCEPT_FUNC(strcpy);  // NOLINT
diff --git a/lib/asan/asan_interface_internal.h b/lib/asan/asan_interface_internal.h
index 9e674ef..8cd424c 100644
--- a/lib/asan/asan_interface_internal.h
+++ b/lib/asan/asan_interface_internal.h
@@ -81,6 +81,20 @@
   SANITIZER_INTERFACE_ATTRIBUTE
   void __asan_after_dynamic_init();
 
+  // Sets bytes of the given range of the shadow memory into specific value.
+  SANITIZER_INTERFACE_ATTRIBUTE
+  void __asan_set_shadow_00(uptr addr, uptr size);
+  SANITIZER_INTERFACE_ATTRIBUTE
+  void __asan_set_shadow_f1(uptr addr, uptr size);
+  SANITIZER_INTERFACE_ATTRIBUTE
+  void __asan_set_shadow_f2(uptr addr, uptr size);
+  SANITIZER_INTERFACE_ATTRIBUTE
+  void __asan_set_shadow_f3(uptr addr, uptr size);
+  SANITIZER_INTERFACE_ATTRIBUTE
+  void __asan_set_shadow_f5(uptr addr, uptr size);
+  SANITIZER_INTERFACE_ATTRIBUTE
+  void __asan_set_shadow_f8(uptr addr, uptr size);
+
   // These two functions are used by instrumented code in the
   // use-after-scope mode. They mark memory for local variables as
   // unaddressable when they leave scope and addressable before the
@@ -165,10 +179,6 @@
   SANITIZER_INTERFACE_ATTRIBUTE
   extern int __asan_option_detect_stack_use_after_return;
 
-// Global flag, copy of ASAN_OPTIONS=detect_stack_use_after_scope
-  SANITIZER_INTERFACE_ATTRIBUTE
-  extern int __asan_option_detect_stack_use_after_scope;
-
   SANITIZER_INTERFACE_ATTRIBUTE
   extern uptr *__asan_test_only_reported_buggy_pointer;
 
diff --git a/lib/asan/asan_internal.h b/lib/asan/asan_internal.h
index 84d7f08..1dc678c 100644
--- a/lib/asan/asan_internal.h
+++ b/lib/asan/asan_internal.h
@@ -36,7 +36,7 @@
 // If set, values like allocator chunk size, as well as defaults for some flags
 // will be changed towards less memory overhead.
 #ifndef ASAN_LOW_MEMORY
-# if SANITIZER_IOS || (SANITIZER_WORDSIZE == 32)
+# if SANITIZER_IOS || SANITIZER_ANDROID
 #  define ASAN_LOW_MEMORY 1
 # else
 #  define ASAN_LOW_MEMORY 0
@@ -103,17 +103,6 @@
 
 void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name);
 
-// Platform-specific options.
-#if SANITIZER_MAC
-bool PlatformHasDifferentMemcpyAndMemmove();
-# define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE \
-    (PlatformHasDifferentMemcpyAndMemmove())
-#elif SANITIZER_WINDOWS64
-# define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE false
-#else
-# define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE true
-#endif  // SANITIZER_MAC
-
 // Add convenient macro for interface functions that may be represented as
 // weak hooks.
 #define ASAN_MALLOC_HOOK(ptr, size)                                   \
diff --git a/lib/asan/asan_mac.cc b/lib/asan/asan_mac.cc
index 525864f..baf533a 100644
--- a/lib/asan/asan_mac.cc
+++ b/lib/asan/asan_mac.cc
@@ -49,15 +49,6 @@
 void InitializePlatformInterceptors() {}
 void InitializePlatformExceptionHandlers() {}
 
-bool PlatformHasDifferentMemcpyAndMemmove() {
-  // On OS X 10.7 memcpy() and memmove() are both resolved
-  // into memmove$VARIANT$sse42.
-  // See also https://github.com/google/sanitizers/issues/34.
-  // TODO(glider): need to check dynamically that memcpy() and memmove() are
-  // actually the same function.
-  return GetMacosVersion() == MACOS_VERSION_SNOW_LEOPARD;
-}
-
 // No-op. Mac does not support static linkage anyway.
 void *AsanDoesNotSupportStaticLinkage() {
   return 0;
diff --git a/lib/asan/asan_malloc_linux.cc b/lib/asan/asan_malloc_linux.cc
index 162abd2..a78767c 100644
--- a/lib/asan/asan_malloc_linux.cc
+++ b/lib/asan/asan_malloc_linux.cc
@@ -78,7 +78,13 @@
   if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
     uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
     uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
-    void *new_ptr = asan_malloc(size, &stack);
+    void *new_ptr;
+    if (UNLIKELY(!asan_inited)) {
+      new_ptr = AllocateFromLocalPool(size);
+    } else {
+      copy_size = size;
+      new_ptr = asan_malloc(copy_size, &stack);
+    }
     internal_memcpy(new_ptr, ptr, copy_size);
     return new_ptr;
   }
diff --git a/lib/asan/asan_malloc_win.cc b/lib/asan/asan_malloc_win.cc
index 4a233df..05148d5 100644
--- a/lib/asan/asan_malloc_win.cc
+++ b/lib/asan/asan_malloc_win.cc
@@ -125,6 +125,11 @@
 }
 
 ALLOCATION_FUNCTION_ATTRIBUTE
+void *_recalloc_base(void *p, size_t n, size_t elem_size) {
+  return _recalloc(p, n, elem_size);
+}
+
+ALLOCATION_FUNCTION_ATTRIBUTE
 size_t _msize(const void *ptr) {
   GET_CURRENT_PC_BP_SP;
   (void)sp;
@@ -223,6 +228,7 @@
   TryToOverrideFunction("_realloc_base", (uptr)realloc);
   TryToOverrideFunction("_realloc_crt", (uptr)realloc);
   TryToOverrideFunction("_recalloc", (uptr)_recalloc);
+  TryToOverrideFunction("_recalloc_base", (uptr)_recalloc);
   TryToOverrideFunction("_recalloc_crt", (uptr)_recalloc);
   TryToOverrideFunction("_msize", (uptr)_msize);
   TryToOverrideFunction("_expand", (uptr)_expand);
diff --git a/lib/asan/asan_mapping.h b/lib/asan/asan_mapping.h
index 5cbdd34..d8e60a4 100644
--- a/lib/asan/asan_mapping.h
+++ b/lib/asan/asan_mapping.h
@@ -269,9 +269,25 @@
   return kMidMemBeg && a >= kMidMemBeg && a <= kMidMemEnd;
 }
 
+static inline bool AddrIsInShadowGap(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  if (kMidMemBeg) {
+    if (a <= kShadowGapEnd)
+      return SHADOW_OFFSET == 0 || a >= kShadowGapBeg;
+    return (a >= kShadowGap2Beg && a <= kShadowGap2End) ||
+           (a >= kShadowGap3Beg && a <= kShadowGap3End);
+  }
+  // In zero-based shadow mode we treat addresses near zero as addresses
+  // in shadow gap as well.
+  if (SHADOW_OFFSET == 0)
+    return a <= kShadowGapEnd;
+  return a >= kShadowGapBeg && a <= kShadowGapEnd;
+}
+
 static inline bool AddrIsInMem(uptr a) {
   PROFILE_ASAN_MAPPING();
-  return AddrIsInLowMem(a) || AddrIsInMidMem(a) || AddrIsInHighMem(a);
+  return AddrIsInLowMem(a) || AddrIsInMidMem(a) || AddrIsInHighMem(a) ||
+      (flags()->protect_shadow_gap == 0 && AddrIsInShadowGap(a));
 }
 
 static inline uptr MemToShadow(uptr p) {
@@ -295,21 +311,6 @@
   return AddrIsInLowShadow(a) || AddrIsInMidShadow(a) || AddrIsInHighShadow(a);
 }
 
-static inline bool AddrIsInShadowGap(uptr a) {
-  PROFILE_ASAN_MAPPING();
-  if (kMidMemBeg) {
-    if (a <= kShadowGapEnd)
-      return SHADOW_OFFSET == 0 || a >= kShadowGapBeg;
-    return (a >= kShadowGap2Beg && a <= kShadowGap2End) ||
-           (a >= kShadowGap3Beg && a <= kShadowGap3End);
-  }
-  // In zero-based shadow mode we treat addresses near zero as addresses
-  // in shadow gap as well.
-  if (SHADOW_OFFSET == 0)
-    return a <= kShadowGapEnd;
-  return a >= kShadowGapBeg && a <= kShadowGapEnd;
-}
-
 static inline bool AddrIsAlignedByGranularity(uptr a) {
   PROFILE_ASAN_MAPPING();
   return (a & (SHADOW_GRANULARITY - 1)) == 0;
diff --git a/lib/asan/asan_memory_profile.cc b/lib/asan/asan_memory_profile.cc
index ba00516..c2678b9 100644
--- a/lib/asan/asan_memory_profile.cc
+++ b/lib/asan/asan_memory_profile.cc
@@ -32,9 +32,56 @@
 class HeapProfile {
  public:
   HeapProfile() : allocations_(1024) {}
+
+  void ProcessChunk(const AsanChunkView& cv) {
+    if (cv.IsAllocated()) {
+      total_allocated_user_size_ += cv.UsedSize();
+      total_allocated_count_++;
+      u32 id = cv.GetAllocStackId();
+      if (id)
+        Insert(id, cv.UsedSize());
+    } else if (cv.IsQuarantined()) {
+      total_quarantined_user_size_ += cv.UsedSize();
+      total_quarantined_count_++;
+    } else {
+      total_other_count_++;
+    }
+  }
+
+  void Print(uptr top_percent) {
+    InternalSort(&allocations_, allocations_.size(),
+                 [](const AllocationSite &a, const AllocationSite &b) {
+                   return a.total_size > b.total_size;
+                 });
+    CHECK(total_allocated_user_size_);
+    uptr total_shown = 0;
+    Printf("Live Heap Allocations: %zd bytes in %zd chunks; quarantined: "
+           "%zd bytes in %zd chunks; %zd other chunks; total chunks: %zd; "
+           "showing top %zd%%\n",
+           total_allocated_user_size_, total_allocated_count_,
+           total_quarantined_user_size_, total_quarantined_count_,
+           total_other_count_, total_allocated_count_ +
+           total_quarantined_count_ + total_other_count_, top_percent);
+    for (uptr i = 0; i < allocations_.size(); i++) {
+      auto &a = allocations_[i];
+      Printf("%zd byte(s) (%zd%%) in %zd allocation(s)\n", a.total_size,
+             a.total_size * 100 / total_allocated_user_size_, a.count);
+      StackDepotGet(a.id).Print();
+      total_shown += a.total_size;
+      if (total_shown * 100 / total_allocated_user_size_ > top_percent)
+        break;
+    }
+  }
+
+ private:
+  uptr total_allocated_user_size_ = 0;
+  uptr total_allocated_count_ = 0;
+  uptr total_quarantined_user_size_ = 0;
+  uptr total_quarantined_count_ = 0;
+  uptr total_other_count_ = 0;
+  InternalMmapVector<AllocationSite> allocations_;
+
   void Insert(u32 id, uptr size) {
-    total_allocated_ += size;
-    total_count_++;
     // Linear lookup will be good enough for most cases (although not all).
     for (uptr i = 0; i < allocations_.size(); i++) {
       if (allocations_[i].id == id) {
@@ -45,40 +92,11 @@
     }
     allocations_.push_back({id, size, 1});
   }
-
-  void Print(uptr top_percent) {
-    InternalSort(&allocations_, allocations_.size(),
-                 [](const AllocationSite &a, const AllocationSite &b) {
-                   return a.total_size > b.total_size;
-                 });
-    CHECK(total_allocated_);
-    uptr total_shown = 0;
-    Printf("Live Heap Allocations: %zd bytes from %zd allocations; "
-           "showing top %zd%%\n", total_allocated_, total_count_, top_percent);
-    for (uptr i = 0; i < allocations_.size(); i++) {
-      auto &a = allocations_[i];
-      Printf("%zd byte(s) (%zd%%) in %zd allocation(s)\n", a.total_size,
-             a.total_size * 100 / total_allocated_, a.count);
-      StackDepotGet(a.id).Print();
-      total_shown += a.total_size;
-      if (total_shown * 100 / total_allocated_ > top_percent)
-        break;
-    }
-  }
-
- private:
-  uptr total_allocated_ = 0;
-  uptr total_count_ = 0;
-  InternalMmapVector<AllocationSite> allocations_;
 };
 
 static void ChunkCallback(uptr chunk, void *arg) {
-  HeapProfile *hp = reinterpret_cast<HeapProfile*>(arg);
-  AsanChunkView cv = FindHeapChunkByAddress(chunk);
-  if (!cv.IsAllocated()) return;
-  u32 id = cv.GetAllocStackId();
-  if (!id) return;
-  hp->Insert(id, cv.UsedSize());
+  reinterpret_cast<HeapProfile*>(arg)->ProcessChunk(
+      FindHeapChunkByAllocBeg(chunk));
 }
 
 static void MemoryProfileCB(const SuspendedThreadsList &suspended_threads_list,
diff --git a/lib/asan/asan_new_delete.cc b/lib/asan/asan_new_delete.cc
index fef6604..3283fb3 100644
--- a/lib/asan/asan_new_delete.cc
+++ b/lib/asan/asan_new_delete.cc
@@ -45,26 +45,6 @@
 
 using namespace __asan;  // NOLINT
 
-// This code has issues on OSX.
-// See https://github.com/google/sanitizers/issues/131.
-
-// Fake std::nothrow_t to avoid including <new>.
-namespace std {
-struct nothrow_t {};
-}  // namespace std
-
-#define OPERATOR_NEW_BODY(type) \
-  GET_STACK_TRACE_MALLOC;\
-  return asan_memalign(0, size, &stack, type);
-
-// On OS X it's not enough to just provide our own 'operator new' and
-// 'operator delete' implementations, because they're going to be in the
-// runtime dylib, and the main executable will depend on both the runtime
-// dylib and libstdc++, each of those'll have its implementation of new and
-// delete.
-// To make sure that C++ allocation/deallocation operators are overridden on
-// OS X we need to intercept them using their mangled names.
-#if !SANITIZER_MAC
 // FreeBSD prior v9.2 have wrong definition of 'size_t'.
 // http://svnweb.freebsd.org/base?view=revision&revision=232261
 #if SANITIZER_FREEBSD && SANITIZER_WORDSIZE == 32
@@ -74,6 +54,30 @@
 #endif  // __FreeBSD_version
 #endif  // SANITIZER_FREEBSD && SANITIZER_WORDSIZE == 32
 
+// This code has issues on OSX.
+// See https://github.com/google/sanitizers/issues/131.
+
+// Fake std::nothrow_t and std::align_val_t to avoid including <new>.
+namespace std {
+struct nothrow_t {};
+enum class align_val_t: size_t {};
+}  // namespace std
+
+#define OPERATOR_NEW_BODY(type) \
+  GET_STACK_TRACE_MALLOC;\
+  return asan_memalign(0, size, &stack, type);
+#define OPERATOR_NEW_BODY_ALIGN(type) \
+  GET_STACK_TRACE_MALLOC;\
+  return asan_memalign((uptr)align, size, &stack, type);
+
+// On OS X it's not enough to just provide our own 'operator new' and
+// 'operator delete' implementations, because they're going to be in the
+// runtime dylib, and the main executable will depend on both the runtime
+// dylib and libstdc++, each of those'll have its implementation of new and
+// delete.
+// To make sure that C++ allocation/deallocation operators are overridden on
+// OS X we need to intercept them using their mangled names.
+#if !SANITIZER_MAC
 CXX_OPERATOR_ATTRIBUTE
 void *operator new(size_t size) { OPERATOR_NEW_BODY(FROM_NEW); }
 CXX_OPERATOR_ATTRIBUTE
@@ -84,6 +88,18 @@
 CXX_OPERATOR_ATTRIBUTE
 void *operator new[](size_t size, std::nothrow_t const&)
 { OPERATOR_NEW_BODY(FROM_NEW_BR); }
+CXX_OPERATOR_ATTRIBUTE
+void *operator new(size_t size, std::align_val_t align)
+{ OPERATOR_NEW_BODY_ALIGN(FROM_NEW); }
+CXX_OPERATOR_ATTRIBUTE
+void *operator new[](size_t size, std::align_val_t align)
+{ OPERATOR_NEW_BODY_ALIGN(FROM_NEW_BR); }
+CXX_OPERATOR_ATTRIBUTE
+void *operator new(size_t size, std::align_val_t align, std::nothrow_t const&)
+{ OPERATOR_NEW_BODY_ALIGN(FROM_NEW); }
+CXX_OPERATOR_ATTRIBUTE
+void *operator new[](size_t size, std::align_val_t align, std::nothrow_t const&)
+{ OPERATOR_NEW_BODY_ALIGN(FROM_NEW_BR); }
 
 #else  // SANITIZER_MAC
 INTERCEPTOR(void *, _Znwm, size_t size) {
@@ -131,6 +147,32 @@
   GET_STACK_TRACE_FREE;
   asan_sized_free(ptr, size, &stack, FROM_NEW_BR);
 }
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, std::align_val_t) NOEXCEPT {
+  OPERATOR_DELETE_BODY(FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, std::align_val_t) NOEXCEPT {
+  OPERATOR_DELETE_BODY(FROM_NEW_BR);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, std::align_val_t, std::nothrow_t const&) {
+  OPERATOR_DELETE_BODY(FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, std::align_val_t, std::nothrow_t const&) {
+  OPERATOR_DELETE_BODY(FROM_NEW_BR);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete(void *ptr, size_t size, std::align_val_t) NOEXCEPT {
+  GET_STACK_TRACE_FREE;
+  asan_sized_free(ptr, size, &stack, FROM_NEW);
+}
+CXX_OPERATOR_ATTRIBUTE
+void operator delete[](void *ptr, size_t size, std::align_val_t) NOEXCEPT {
+  GET_STACK_TRACE_FREE;
+  asan_sized_free(ptr, size, &stack, FROM_NEW_BR);
+}
 
 #else  // SANITIZER_MAC
 INTERCEPTOR(void, _ZdlPv, void *ptr) {
diff --git a/lib/asan/asan_poisoning.cc b/lib/asan/asan_poisoning.cc
index 65c4401..abb75ab 100644
--- a/lib/asan/asan_poisoning.cc
+++ b/lib/asan/asan_poisoning.cc
@@ -64,12 +64,9 @@
 };
 
 void FlushUnneededASanShadowMemory(uptr p, uptr size) {
-    // Since asan's mapping is compacting, the shadow chunk may be
-    // not page-aligned, so we only flush the page-aligned portion.
-    uptr page_size = GetPageSizeCached();
-    uptr shadow_beg = RoundUpTo(MemToShadow(p), page_size);
-    uptr shadow_end = RoundDownTo(MemToShadow(p + size), page_size);
-    FlushUnneededShadowMemory(shadow_beg, shadow_end - shadow_beg);
+  // Since asan's mapping is compacting, the shadow chunk may be
+  // not page-aligned, so we only flush the page-aligned portion.
+  ReleaseMemoryPagesToOS(MemToShadow(p), MemToShadow(p + size));
 }
 
 void AsanPoisonOrUnpoisonIntraObjectRedzone(uptr ptr, uptr size, bool poison) {
@@ -314,14 +311,36 @@
   }
 }
 
+void __asan_set_shadow_00(uptr addr, uptr size) {
+  REAL(memset)((void *)addr, 0, size);
+}
+
+void __asan_set_shadow_f1(uptr addr, uptr size) {
+  REAL(memset)((void *)addr, 0xf1, size);
+}
+
+void __asan_set_shadow_f2(uptr addr, uptr size) {
+  REAL(memset)((void *)addr, 0xf2, size);
+}
+
+void __asan_set_shadow_f3(uptr addr, uptr size) {
+  REAL(memset)((void *)addr, 0xf3, size);
+}
+
+void __asan_set_shadow_f5(uptr addr, uptr size) {
+  REAL(memset)((void *)addr, 0xf5, size);
+}
+
+void __asan_set_shadow_f8(uptr addr, uptr size) {
+  REAL(memset)((void *)addr, 0xf8, size);
+}
+
 void __asan_poison_stack_memory(uptr addr, uptr size) {
-  if (!__asan_option_detect_stack_use_after_scope) return;
   VReport(1, "poisoning: %p %zx\n", (void *)addr, size);
   PoisonAlignedStackMemory(addr, size, true);
 }
 
 void __asan_unpoison_stack_memory(uptr addr, uptr size) {
-  if (!__asan_option_detect_stack_use_after_scope) return;
   VReport(1, "unpoisoning: %p %zx\n", (void *)addr, size);
   PoisonAlignedStackMemory(addr, size, false);
 }
@@ -390,7 +409,7 @@
   // ending with end.
   uptr kMaxRangeToCheck = 32;
   uptr r1_beg = beg;
-  uptr r1_end = Min(end + kMaxRangeToCheck, mid);
+  uptr r1_end = Min(beg + kMaxRangeToCheck, mid);
   uptr r2_beg = Max(beg, mid - kMaxRangeToCheck);
   uptr r2_end = Min(end, mid + kMaxRangeToCheck);
   uptr r3_beg = Max(end - kMaxRangeToCheck, mid);
diff --git a/lib/asan/asan_poisoning.h b/lib/asan/asan_poisoning.h
index 6344225..cc3281e 100644
--- a/lib/asan/asan_poisoning.h
+++ b/lib/asan/asan_poisoning.h
@@ -86,8 +86,8 @@
   }
 }
 
-// Calls __sanitizer::FlushUnneededShadowMemory() on
-// [MemToShadow(p), MemToShadow(p+size)] with proper rounding.
+// Calls __sanitizer::ReleaseMemoryPagesToOS() on
+// [MemToShadow(p), MemToShadow(p+size)].
 void FlushUnneededASanShadowMemory(uptr p, uptr size);
 
 }  // namespace __asan
diff --git a/lib/asan/asan_rtl.cc b/lib/asan/asan_rtl.cc
index eafa629..d9d7d7e 100644
--- a/lib/asan/asan_rtl.cc
+++ b/lib/asan/asan_rtl.cc
@@ -34,7 +34,6 @@
 
 uptr __asan_shadow_memory_dynamic_address;  // Global interface symbol.
 int __asan_option_detect_stack_use_after_return;  // Global interface symbol.
-int __asan_option_detect_stack_use_after_scope;  // Global interface symbol.
 uptr *__asan_test_only_reported_buggy_pointer;  // Used only for testing asan.
 
 namespace __asan {
@@ -266,6 +265,7 @@
   volatile int fake_condition = 0;  // prevent dead condition elimination.
   // __asan_report_* functions are noreturn, so we need a switch to prevent
   // the compiler from removing any of them.
+  // clang-format off
   switch (fake_condition) {
     case 1: __asan_report_load1(0); break;
     case 2: __asan_report_load2(0); break;
@@ -305,7 +305,14 @@
     case 37: __asan_unpoison_stack_memory(0, 0); break;
     case 38: __asan_region_is_poisoned(0, 0); break;
     case 39: __asan_describe_address(0); break;
+    case 40: __asan_set_shadow_00(0, 0); break;
+    case 41: __asan_set_shadow_f1(0, 0); break;
+    case 42: __asan_set_shadow_f2(0, 0); break;
+    case 43: __asan_set_shadow_f3(0, 0); break;
+    case 44: __asan_set_shadow_f5(0, 0); break;
+    case 45: __asan_set_shadow_f8(0, 0); break;
   }
+  // clang-format on
 }
 
 static void asan_atexit() {
@@ -329,8 +336,21 @@
 }
 
 static void ProtectGap(uptr addr, uptr size) {
-  if (!flags()->protect_shadow_gap)
+  if (!flags()->protect_shadow_gap) {
+    // The shadow gap is unprotected, so there is a chance that someone
+    // is actually using this memory. Which means it needs a shadow...
+    uptr GapShadowBeg = RoundDownTo(MEM_TO_SHADOW(addr), GetPageSizeCached());
+    uptr GapShadowEnd =
+        RoundUpTo(MEM_TO_SHADOW(addr + size), GetPageSizeCached()) - 1;
+    if (Verbosity())
+      Printf("protect_shadow_gap=0:"
+             " not protecting shadow gap, allocating gap's shadow\n"
+             "|| `[%p, %p]` || ShadowGap's shadow ||\n", GapShadowBeg,
+             GapShadowEnd);
+    ReserveShadowMemoryRange(GapShadowBeg, GapShadowEnd,
+                             "unprotected gap shadow");
     return;
+  }
   void *res = MmapFixedNoAccess(addr, size, "shadow gap");
   if (addr == (uptr)res)
     return;
@@ -391,6 +411,8 @@
   Printf("redzone=%zu\n", (uptr)flags()->redzone);
   Printf("max_redzone=%zu\n", (uptr)flags()->max_redzone);
   Printf("quarantine_size_mb=%zuM\n", (uptr)flags()->quarantine_size_mb);
+  Printf("thread_local_quarantine_size_kb=%zuK\n",
+         (uptr)flags()->thread_local_quarantine_size_kb);
   Printf("malloc_context_size=%zu\n",
          (uptr)common_flags()->malloc_context_size);
 
@@ -404,60 +426,7 @@
           kHighShadowBeg > kMidMemEnd);
 }
 
-static void AsanInitInternal() {
-  if (LIKELY(asan_inited)) return;
-  SanitizerToolName = "AddressSanitizer";
-  CHECK(!asan_init_is_running && "ASan init calls itself!");
-  asan_init_is_running = true;
-
-  CacheBinaryName();
-
-  // Initialize flags. This must be done early, because most of the
-  // initialization steps look at flags().
-  InitializeFlags();
-
-  AsanCheckIncompatibleRT();
-  AsanCheckDynamicRTPrereqs();
-  AvoidCVE_2016_2143();
-
-  SetCanPoisonMemory(flags()->poison_heap);
-  SetMallocContextSize(common_flags()->malloc_context_size);
-
-  InitializePlatformExceptionHandlers();
-
-  InitializeHighMemEnd();
-
-  // Make sure we are not statically linked.
-  AsanDoesNotSupportStaticLinkage();
-
-  // Install tool-specific callbacks in sanitizer_common.
-  AddDieCallback(AsanDie);
-  SetCheckFailedCallback(AsanCheckFailed);
-  SetPrintfAndReportCallback(AppendToErrorMessageBuffer);
-
-  __sanitizer_set_report_path(common_flags()->log_path);
-
-  __asan_option_detect_stack_use_after_return =
-      flags()->detect_stack_use_after_return;
-
-  __asan_option_detect_stack_use_after_scope =
-      flags()->detect_stack_use_after_scope;
-
-  // Re-exec ourselves if we need to set additional env or command line args.
-  MaybeReexec();
-
-  // Setup internal allocator callback.
-  SetLowLevelAllocateCallback(OnLowLevelAllocate);
-
-  InitializeAsanInterceptors();
-
-  // Enable system log ("adb logcat") on Android.
-  // Doing this before interceptors are initialized crashes in:
-  // AsanInitInternal -> android_log_write -> __interceptor_strcmp
-  AndroidLogInit();
-
-  ReplaceSystemMalloc();
-
+static void InitializeShadowMemory() {
   // Set the shadow memory address to uninitialized.
   __asan_shadow_memory_dynamic_address = kDefaultShadowSentinel;
 
@@ -497,8 +466,6 @@
 
   if (Verbosity()) PrintAddressSpaceLayout();
 
-  DisableCoreDumperIfNecessary();
-
   if (full_shadow_is_available) {
     // mmap the low shadow plus at least one page at the left.
     if (kLowShadowBeg)
@@ -530,6 +497,62 @@
     DumpProcessMap();
     Die();
   }
+}
+
+static void AsanInitInternal() {
+  if (LIKELY(asan_inited)) return;
+  SanitizerToolName = "AddressSanitizer";
+  CHECK(!asan_init_is_running && "ASan init calls itself!");
+  asan_init_is_running = true;
+
+  CacheBinaryName();
+
+  // Initialize flags. This must be done early, because most of the
+  // initialization steps look at flags().
+  InitializeFlags();
+
+  AsanCheckIncompatibleRT();
+  AsanCheckDynamicRTPrereqs();
+  AvoidCVE_2016_2143();
+
+  SetCanPoisonMemory(flags()->poison_heap);
+  SetMallocContextSize(common_flags()->malloc_context_size);
+
+  InitializePlatformExceptionHandlers();
+
+  InitializeHighMemEnd();
+
+  // Make sure we are not statically linked.
+  AsanDoesNotSupportStaticLinkage();
+
+  // Install tool-specific callbacks in sanitizer_common.
+  AddDieCallback(AsanDie);
+  SetCheckFailedCallback(AsanCheckFailed);
+  SetPrintfAndReportCallback(AppendToErrorMessageBuffer);
+
+  __sanitizer_set_report_path(common_flags()->log_path);
+
+  __asan_option_detect_stack_use_after_return =
+      flags()->detect_stack_use_after_return;
+
+  // Re-exec ourselves if we need to set additional env or command line args.
+  MaybeReexec();
+
+  // Setup internal allocator callback.
+  SetLowLevelAllocateCallback(OnLowLevelAllocate);
+
+  InitializeAsanInterceptors();
+
+  // Enable system log ("adb logcat") on Android.
+  // Doing this before interceptors are initialized crashes in:
+  // AsanInitInternal -> android_log_write -> __interceptor_strcmp
+  AndroidLogInit();
+
+  ReplaceSystemMalloc();
+
+  DisableCoreDumperIfNecessary();
+
+  InitializeShadowMemory();
 
   AsanTSDInit(PlatformTSDDtor);
   InstallDeadlySignalHandlers(AsanOnDeadlySignal);
@@ -621,6 +644,9 @@
 using namespace __asan;  // NOLINT
 
 void NOINLINE __asan_handle_no_return() {
+  if (asan_init_is_running)
+    return;
+
   int local_stack;
   AsanThread *curr_thread = GetCurrentThread();
   uptr PageSize = GetPageSizeCached();
diff --git a/lib/asan/asan_thread.cc b/lib/asan/asan_thread.cc
index d7e2cca..537b53d 100644
--- a/lib/asan/asan_thread.cc
+++ b/lib/asan/asan_thread.cc
@@ -141,7 +141,9 @@
     current_fake_stack->Destroy(this->tid());
 }
 
-void AsanThread::FinishSwitchFiber(FakeStack *fake_stack_save) {
+void AsanThread::FinishSwitchFiber(FakeStack *fake_stack_save,
+                                   uptr *bottom_old,
+                                   uptr *size_old) {
   if (!atomic_load(&stack_switching_, memory_order_relaxed)) {
     Report("ERROR: finishing a fiber switch that has not started\n");
     Die();
@@ -152,6 +154,10 @@
     fake_stack_ = fake_stack_save;
   }
 
+  if (bottom_old)
+    *bottom_old = stack_bottom_;
+  if (size_old)
+    *size_old = stack_top_ - stack_bottom_;
   stack_bottom_ = next_stack_bottom_;
   stack_top_ = next_stack_top_;
   atomic_store(&stack_switching_, 0, memory_order_release);
@@ -345,7 +351,7 @@
       // limits, so only do this magic on Android, and only if the found thread
       // is the main thread.
       AsanThreadContext *tctx = GetThreadContextByTidLocked(0);
-      if (ThreadStackContainsAddress(tctx, &context)) {
+      if (tctx && ThreadStackContainsAddress(tctx, &context)) {
         SetCurrentThread(tctx->thread);
         return tctx->thread;
       }
@@ -447,12 +453,16 @@
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void __sanitizer_finish_switch_fiber(void* fakestack) {
+void __sanitizer_finish_switch_fiber(void* fakestack,
+                                     const void **bottom_old,
+                                     uptr *size_old) {
   AsanThread *t = GetCurrentThread();
   if (!t) {
     VReport(1, "__asan_finish_switch_fiber called from unknown thread\n");
     return;
   }
-  t->FinishSwitchFiber((FakeStack*)fakestack);
+  t->FinishSwitchFiber((FakeStack*)fakestack,
+                       (uptr*)bottom_old,
+                       (uptr*)size_old);
 }
 }
diff --git a/lib/asan/asan_thread.h b/lib/asan/asan_thread.h
index 92a92a2..f53dfb7 100644
--- a/lib/asan/asan_thread.h
+++ b/lib/asan/asan_thread.h
@@ -94,7 +94,8 @@
   }
 
   void StartSwitchFiber(FakeStack **fake_stack_save, uptr bottom, uptr size);
-  void FinishSwitchFiber(FakeStack *fake_stack_save);
+  void FinishSwitchFiber(FakeStack *fake_stack_save, uptr *bottom_old,
+                         uptr *size_old);
 
   bool has_fake_stack() {
     return !atomic_load(&stack_switching_, memory_order_relaxed) &&
diff --git a/lib/asan/asan_win.cc b/lib/asan/asan_win.cc
index 4881f83..78268d8 100644
--- a/lib/asan/asan_win.cc
+++ b/lib/asan/asan_win.cc
@@ -19,6 +19,7 @@
 
 #include <stdlib.h>
 
+#include "asan_globals_win.h"
 #include "asan_interceptors.h"
 #include "asan_internal.h"
 #include "asan_report.h"
@@ -38,12 +39,6 @@
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-int __asan_should_detect_stack_use_after_scope() {
-  __asan_init();
-  return __asan_option_detect_stack_use_after_scope;
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
 uptr __asan_get_shadow_memory_dynamic_address() {
   __asan_init();
   return __asan_shadow_memory_dynamic_address;
@@ -299,17 +294,25 @@
   return nullptr;
 }
 
-static long WINAPI SEHHandler(EXCEPTION_POINTERS *info) {
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+long __asan_unhandled_exception_filter(EXCEPTION_POINTERS *info) {
   EXCEPTION_RECORD *exception_record = info->ExceptionRecord;
   CONTEXT *context = info->ContextRecord;
 
-  if (ShouldReportDeadlyException(exception_record->ExceptionCode)) {
-    SignalContext sig = SignalContext::Create(exception_record, context);
-    ReportDeadlySignal(exception_record->ExceptionCode, sig);
-  }
-
+  // Continue the search if the signal wasn't deadly.
+  if (!ShouldReportDeadlyException(exception_record->ExceptionCode))
+    return EXCEPTION_CONTINUE_SEARCH;
   // FIXME: Handle EXCEPTION_STACK_OVERFLOW here.
 
+  SignalContext sig = SignalContext::Create(exception_record, context);
+  ReportDeadlySignal(exception_record->ExceptionCode, sig);
+  UNREACHABLE("returned from reporting deadly signal");
+}
+
+static long WINAPI SEHHandler(EXCEPTION_POINTERS *info) {
+  __asan_unhandled_exception_filter(info);
+
+  // Bubble out to the default exception filter.
   return default_seh_handler(info);
 }
 
@@ -349,10 +352,25 @@
 // immediately after the CRT runs. This way, our exception filter is called
 // first and we can delegate to their filter if appropriate.
 #pragma section(".CRT$XCAB", long, read)  // NOLINT
-__declspec(allocate(".CRT$XCAB"))
-    int (*__intercept_seh)() = __asan_set_seh_filter;
+__declspec(allocate(".CRT$XCAB")) int (*__intercept_seh)() =
+    __asan_set_seh_filter;
+
+// Piggyback on the TLS initialization callback directory to initialize asan as
+// early as possible. Initializers in .CRT$XL* are called directly by ntdll,
+// which run before the CRT. Users also add code to .CRT$XLC, so it's important
+// to run our initializers first.
+static void NTAPI asan_thread_init(void *module, DWORD reason, void *reserved) {
+  if (reason == DLL_PROCESS_ATTACH) __asan_init();
+}
+
+#pragma section(".CRT$XLAB", long, read)  // NOLINT
+__declspec(allocate(".CRT$XLAB")) void (NTAPI *__asan_tls_init)(void *,
+    unsigned long, void *) = asan_thread_init;
 #endif
+
+ASAN_LINK_GLOBALS_WIN()
+
 // }}}
 }  // namespace __asan
 
-#endif  // _WIN32
+#endif  // SANITIZER_WINDOWS
diff --git a/lib/asan/asan_win_dll_thunk.cc b/lib/asan/asan_win_dll_thunk.cc
index 1a5ce12..4764fd0 100644
--- a/lib/asan/asan_win_dll_thunk.cc
+++ b/lib/asan/asan_win_dll_thunk.cc
@@ -20,13 +20,20 @@
 // simplifies the build procedure.
 #ifdef ASAN_DLL_THUNK
 #include "asan_init_version.h"
+#include "asan_globals_win.h"
 #include "interception/interception.h"
 #include "sanitizer_common/sanitizer_platform_interceptors.h"
 
+#ifdef _M_IX86
+#define WINAPI __stdcall
+#else
+#define WINAPI
+#endif
+
 // ---------- Function interception helper functions and macros ----------- {{{1
 extern "C" {
-void *__stdcall GetModuleHandleA(const char *module_name);
-void *__stdcall GetProcAddress(void *module, const char *proc_name);
+void *WINAPI GetModuleHandleA(const char *module_name);
+void *WINAPI GetProcAddress(void *module, const char *proc_name);
 void abort();
 }
 
@@ -107,7 +114,7 @@
 // ---------- Function wrapping helpers ----------------------------------- {{{1
 #define WRAP_V_V(name)                                                         \
   extern "C" void name() {                                                     \
-    typedef void (*fntype)();                                                  \
+    typedef decltype(name) *fntype;                                            \
     static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
     fn();                                                                      \
   }                                                                            \
@@ -115,7 +122,7 @@
 
 #define WRAP_V_W(name)                                                         \
   extern "C" void name(void *arg) {                                            \
-    typedef void (*fntype)(void *arg);                                         \
+    typedef decltype(name) *fntype;                                            \
     static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
     fn(arg);                                                                   \
   }                                                                            \
@@ -123,7 +130,7 @@
 
 #define WRAP_V_WW(name)                                                        \
   extern "C" void name(void *arg1, void *arg2) {                               \
-    typedef void (*fntype)(void *, void *);                                    \
+    typedef decltype(name) *fntype;                                            \
     static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
     fn(arg1, arg2);                                                            \
   }                                                                            \
@@ -131,7 +138,7 @@
 
 #define WRAP_V_WWW(name)                                                       \
   extern "C" void name(void *arg1, void *arg2, void *arg3) {                   \
-    typedef void *(*fntype)(void *, void *, void *);                           \
+    typedef decltype(name) *fntype;                                            \
     static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
     fn(arg1, arg2, arg3);                                                      \
   }                                                                            \
@@ -139,7 +146,7 @@
 
 #define WRAP_W_V(name)                                                         \
   extern "C" void *name() {                                                    \
-    typedef void *(*fntype)();                                                 \
+    typedef decltype(name) *fntype;                                            \
     static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
     return fn();                                                               \
   }                                                                            \
@@ -147,7 +154,7 @@
 
 #define WRAP_W_W(name)                                                         \
   extern "C" void *name(void *arg) {                                           \
-    typedef void *(*fntype)(void *arg);                                        \
+    typedef decltype(name) *fntype;                                            \
     static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
     return fn(arg);                                                            \
   }                                                                            \
@@ -155,7 +162,7 @@
 
 #define WRAP_W_WW(name)                                                        \
   extern "C" void *name(void *arg1, void *arg2) {                              \
-    typedef void *(*fntype)(void *, void *);                                   \
+    typedef decltype(name) *fntype;                                            \
     static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
     return fn(arg1, arg2);                                                     \
   }                                                                            \
@@ -163,7 +170,7 @@
 
 #define WRAP_W_WWW(name)                                                       \
   extern "C" void *name(void *arg1, void *arg2, void *arg3) {                  \
-    typedef void *(*fntype)(void *, void *, void *);                           \
+    typedef decltype(name) *fntype;                                            \
     static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
     return fn(arg1, arg2, arg3);                                               \
   }                                                                            \
@@ -171,7 +178,7 @@
 
 #define WRAP_W_WWWW(name)                                                      \
   extern "C" void *name(void *arg1, void *arg2, void *arg3, void *arg4) {      \
-    typedef void *(*fntype)(void *, void *, void *, void *);                   \
+    typedef decltype(name) *fntype;                                            \
     static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
     return fn(arg1, arg2, arg3, arg4);                                         \
   }                                                                            \
@@ -180,7 +187,7 @@
 #define WRAP_W_WWWWW(name)                                                     \
   extern "C" void *name(void *arg1, void *arg2, void *arg3, void *arg4,        \
                         void *arg5) {                                          \
-    typedef void *(*fntype)(void *, void *, void *, void *, void *);           \
+    typedef decltype(name) *fntype;                                            \
     static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
     return fn(arg1, arg2, arg3, arg4, arg5);                                   \
   }                                                                            \
@@ -189,7 +196,7 @@
 #define WRAP_W_WWWWWW(name)                                                    \
   extern "C" void *name(void *arg1, void *arg2, void *arg3, void *arg4,        \
                         void *arg5, void *arg6) {                              \
-    typedef void *(*fntype)(void *, void *, void *, void *, void *, void *);   \
+    typedef decltype(name) *fntype;                                            \
     static fntype fn = (fntype)getRealProcAddressOrDie(#name);                 \
     return fn(arg1, arg2, arg3, arg4, arg5, arg6);                             \
   }                                                                            \
@@ -200,12 +207,10 @@
 // Don't use the INTERFACE_FUNCTION machinery for this function as we actually
 // want to call it in the __asan_init interceptor.
 WRAP_W_V(__asan_should_detect_stack_use_after_return)
-WRAP_W_V(__asan_should_detect_stack_use_after_scope)
 WRAP_W_V(__asan_get_shadow_memory_dynamic_address)
 
 extern "C" {
   int __asan_option_detect_stack_use_after_return;
-  int __asan_option_detect_stack_use_after_scope;
   uptr __asan_shadow_memory_dynamic_address;
 
   // Manually wrap __asan_init as we need to initialize
@@ -220,11 +225,8 @@
     fn();
     __asan_option_detect_stack_use_after_return =
         (__asan_should_detect_stack_use_after_return() != 0);
-    __asan_option_detect_stack_use_after_scope =
-        (__asan_should_detect_stack_use_after_scope() != 0);
     __asan_shadow_memory_dynamic_address =
         (uptr)__asan_get_shadow_memory_dynamic_address();
-
     InterceptHooks();
   }
 }
@@ -234,6 +236,7 @@
 }
 
 INTERFACE_FUNCTION(__asan_handle_no_return)
+INTERFACE_FUNCTION(__asan_unhandled_exception_filter)
 
 INTERFACE_FUNCTION(__asan_report_store1)
 INTERFACE_FUNCTION(__asan_report_store2)
@@ -267,6 +270,13 @@
 INTERFACE_FUNCTION(__asan_memset);
 INTERFACE_FUNCTION(__asan_memmove);
 
+INTERFACE_FUNCTION(__asan_set_shadow_00);
+INTERFACE_FUNCTION(__asan_set_shadow_f1);
+INTERFACE_FUNCTION(__asan_set_shadow_f2);
+INTERFACE_FUNCTION(__asan_set_shadow_f3);
+INTERFACE_FUNCTION(__asan_set_shadow_f5);
+INTERFACE_FUNCTION(__asan_set_shadow_f8);
+
 INTERFACE_FUNCTION(__asan_alloca_poison);
 INTERFACE_FUNCTION(__asan_allocas_unpoison);
 
@@ -316,17 +326,18 @@
 INTERFACE_FUNCTION(__sanitizer_contiguous_container_find_bad_address)
 INTERFACE_FUNCTION(__sanitizer_cov)
 INTERFACE_FUNCTION(__sanitizer_cov_dump)
+INTERFACE_FUNCTION(__sanitizer_dump_coverage)
+INTERFACE_FUNCTION(__sanitizer_dump_trace_pc_guard_coverage)
 INTERFACE_FUNCTION(__sanitizer_cov_indir_call16)
 INTERFACE_FUNCTION(__sanitizer_cov_init)
 INTERFACE_FUNCTION(__sanitizer_cov_module_init)
 INTERFACE_FUNCTION(__sanitizer_cov_trace_basic_block)
 INTERFACE_FUNCTION(__sanitizer_cov_trace_func_enter)
-INTERFACE_FUNCTION(__sanitizer_cov_trace_cmp)
-INTERFACE_FUNCTION(__sanitizer_cov_trace_switch)
+INTERFACE_FUNCTION(__sanitizer_cov_trace_pc_guard)
+INTERFACE_FUNCTION(__sanitizer_cov_trace_pc_guard_init)
 INTERFACE_FUNCTION(__sanitizer_cov_with_check)
 INTERFACE_FUNCTION(__sanitizer_get_allocated_size)
 INTERFACE_FUNCTION(__sanitizer_get_coverage_guards)
-INTERFACE_FUNCTION(__sanitizer_get_coverage_pc_buffer_pos)
 INTERFACE_FUNCTION(__sanitizer_get_current_allocated_bytes)
 INTERFACE_FUNCTION(__sanitizer_get_estimated_allocated_size)
 INTERFACE_FUNCTION(__sanitizer_get_free_bytes)
@@ -346,7 +357,6 @@
 INTERFACE_FUNCTION(__sanitizer_get_number_of_counters)
 INTERFACE_FUNCTION(__sanitizer_update_counter_bitset_and_clear_counters)
 INTERFACE_FUNCTION(__sanitizer_sandbox_on_notify)
-INTERFACE_FUNCTION(__sanitizer_set_coverage_pc_buffer)
 INTERFACE_FUNCTION(__sanitizer_set_death_callback)
 INTERFACE_FUNCTION(__sanitizer_set_report_path)
 INTERFACE_FUNCTION(__sanitizer_set_report_fd)
@@ -360,6 +370,7 @@
 INTERFACE_FUNCTION(__sanitizer_install_malloc_and_free_hooks)
 INTERFACE_FUNCTION(__sanitizer_start_switch_fiber)
 INTERFACE_FUNCTION(__sanitizer_finish_switch_fiber)
+INTERFACE_FUNCTION(__sanitizer_get_module_and_offset_for_pc)
 
 // TODO(timurrrr): Add more interface functions on the as-needed basis.
 
@@ -381,6 +392,7 @@
 WRAP_W_WW(_realloc_base)
 WRAP_W_WWW(_realloc_dbg)
 WRAP_W_WWW(_recalloc)
+WRAP_W_WWW(_recalloc_base)
 
 WRAP_W_W(_msize)
 WRAP_W_W(_expand)
@@ -457,4 +469,15 @@
 #pragma section(".CRT$XIB", long, read)  // NOLINT
 __declspec(allocate(".CRT$XIB")) int (*__asan_preinit)() = call_asan_init;
 
+static void WINAPI asan_thread_init(void *mod, unsigned long reason,
+                                   void *reserved) {
+  if (reason == /*DLL_PROCESS_ATTACH=*/1) __asan_init();
+}
+
+#pragma section(".CRT$XLAB", long, read)  // NOLINT
+__declspec(allocate(".CRT$XLAB")) void (WINAPI *__asan_tls_init)(void *,
+    unsigned long, void *) = asan_thread_init;
+
+ASAN_LINK_GLOBALS_WIN()
+
 #endif // ASAN_DLL_THUNK
diff --git a/lib/asan/asan_win_dynamic_runtime_thunk.cc b/lib/asan/asan_win_dynamic_runtime_thunk.cc
index 75d5d28..8e42f03 100644
--- a/lib/asan/asan_win_dynamic_runtime_thunk.cc
+++ b/lib/asan/asan_win_dynamic_runtime_thunk.cc
@@ -15,7 +15,6 @@
 //
 // This includes:
 //  - forwarding the detect_stack_use_after_return runtime option
-//  - forwarding the detect_stack_use_after_scope runtime option
 //  - working around deficiencies of the MD runtime
 //  - installing a custom SEH handler
 //
@@ -25,14 +24,17 @@
 // Using #ifdef rather than relying on Makefiles etc.
 // simplifies the build procedure.
 #ifdef ASAN_DYNAMIC_RUNTIME_THUNK
+#include "asan_globals_win.h"
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 
 // First, declare CRT sections we'll be using in this file
+#pragma section(".CRT$XIB", long, read)  // NOLINT
 #pragma section(".CRT$XID", long, read)  // NOLINT
 #pragma section(".CRT$XCAB", long, read)  // NOLINT
 #pragma section(".CRT$XTW", long, read)  // NOLINT
 #pragma section(".CRT$XTY", long, read)  // NOLINT
+#pragma section(".CRT$XLAB", long, read)  // NOLINT
 
 ////////////////////////////////////////////////////////////////////////////////
 // Define a copy of __asan_option_detect_stack_use_after_return that should be
@@ -47,31 +49,33 @@
 // after initialization anyways.
 extern "C" {
 __declspec(dllimport) int __asan_should_detect_stack_use_after_return();
-int __asan_option_detect_stack_use_after_return =
-    __asan_should_detect_stack_use_after_return();
+int __asan_option_detect_stack_use_after_return;
 
 __declspec(dllimport) void* __asan_get_shadow_memory_dynamic_address();
-void* __asan_shadow_memory_dynamic_address =
-    __asan_get_shadow_memory_dynamic_address();
+void* __asan_shadow_memory_dynamic_address;
 }
 
-////////////////////////////////////////////////////////////////////////////////
-// Define a copy of __asan_option_detect_stack_use_after_scope that should be
-// used when linking an MD runtime with a set of object files on Windows.
-//
-// The ASan MD runtime dllexports '__asan_option_detect_stack_use_after_scope',
-// so normally we would just dllimport it.  Unfortunately, the dllimport
-// attribute adds __imp_ prefix to the symbol name of a variable.
-// Since in general we don't know if a given TU is going to be used
-// with a MT or MD runtime and we don't want to use ugly __imp_ names on Windows
-// just to work around this issue, let's clone the variable that is constant
-// after initialization anyways.
-extern "C" {
-__declspec(dllimport) int __asan_should_detect_stack_use_after_scope();
-int __asan_option_detect_stack_use_after_scope =
-    __asan_should_detect_stack_use_after_scope();
+static int InitializeClonedVariables() {
+  __asan_option_detect_stack_use_after_return =
+    __asan_should_detect_stack_use_after_return();
+  __asan_shadow_memory_dynamic_address =
+    __asan_get_shadow_memory_dynamic_address();
+  return 0;
 }
 
+static void NTAPI asan_thread_init(void *mod, unsigned long reason,
+    void *reserved) {
+  if (reason == DLL_PROCESS_ATTACH) InitializeClonedVariables();
+}
+
+// Our cloned variables must be initialized before C/C++ constructors.  If TLS
+// is used, our .CRT$XLAB initializer will run first. If not, our .CRT$XIB
+// initializer is needed as a backup.
+__declspec(allocate(".CRT$XIB")) int (*__asan_initialize_cloned_variables)() =
+    InitializeClonedVariables;
+__declspec(allocate(".CRT$XLAB")) void (NTAPI *__asan_tls_init)(void *,
+    unsigned long, void *) = asan_thread_init;
+
 ////////////////////////////////////////////////////////////////////////////////
 // For some reason, the MD CRT doesn't call the C/C++ terminators during on DLL
 // unload or on exit.  ASan relies on LLVM global_dtors to call
@@ -95,6 +99,7 @@
 int ScheduleUnregisterGlobals() {
   return atexit(UnregisterGlobals);
 }
+}  // namespace
 
 // We need to call 'atexit(UnregisterGlobals);' as early as possible, but after
 // atexit() is initialized (.CRT$XIC).  As this is executed before C++
@@ -103,8 +108,6 @@
 __declspec(allocate(".CRT$XID"))
 int (*__asan_schedule_unregister_globals)() = ScheduleUnregisterGlobals;
 
-}  // namespace
-
 ////////////////////////////////////////////////////////////////////////////////
 // ASan SEH handling.
 // We need to set the ASan-specific SEH handler at the end of CRT initialization
@@ -119,4 +122,6 @@
     SetSEHFilter;
 }
 
+ASAN_LINK_GLOBALS_WIN()
+
 #endif // ASAN_DYNAMIC_RUNTIME_THUNK
diff --git a/lib/asan/scripts/asan_device_setup b/lib/asan/scripts/asan_device_setup
index 52794b1..fdfc46f 100755
--- a/lib/asan/scripts/asan_device_setup
+++ b/lib/asan/scripts/asan_device_setup
@@ -300,20 +300,22 @@
   cp "$ASAN_RT_PATH/$ASAN_RT64" "$TMPDIR/"
 fi
 
-# FIXME: alloc_dealloc_mismatch=0 prevents a failure in libdvm startup,
-# which may or may not be a real bug (probably not).
-ASAN_OPTIONS=start_deactivated=1,alloc_dealloc_mismatch=0,malloc_context_size=0
+ASAN_OPTIONS=start_deactivated=1,malloc_context_size=0
 
-function generate_zygote_wrapper { # from, to, asan_rt
+# The name of a symlink to libclang_rt.asan-$ARCH-android.so used in LD_PRELOAD.
+# The idea is to have the same name in lib and lib64 to keep it from falling
+# apart when a 64-bit process spawns a 32-bit one, inheriting the environment.
+ASAN_RT_SYMLINK=symlink-to-libclang_rt.asan
+
+function generate_zygote_wrapper { # from, to
   local _from=$1
   local _to=$2
-  local _asan_rt=$3
   if [[ PRE_L -eq 0 ]]; then
     # LD_PRELOAD parsing is broken in N if it starts with ":". Luckily, it is
     # unset in the system environment since L.
-    local _ld_preload=$_asan_rt
+    local _ld_preload=$ASAN_RT_SYMLINK
   else
-    local _ld_preload=\$LD_PRELOAD:$_asan_rt
+    local _ld_preload=\$LD_PRELOAD:$ASAN_RT_SYMLINK
   fi
   cat <<EOF >"$TMPDIR/$_from"
 #!/system/bin/sh-from-zygote
@@ -342,18 +344,18 @@
     mv "$TMPDIR/app_process32" "$TMPDIR/app_process32.real"
     mv "$TMPDIR/app_process64" "$TMPDIR/app_process64.real"
   fi
-  generate_zygote_wrapper "app_process32" "/system/bin/app_process32.real" "$ASAN_RT"
-  generate_zygote_wrapper "app_process64" "/system/bin/app_process64.real" "$ASAN_RT64"
+  generate_zygote_wrapper "app_process32" "/system/bin/app_process32.real"
+  generate_zygote_wrapper "app_process64" "/system/bin/app_process64.real"
 else
   # A 32-bit device.
-  generate_zygote_wrapper "app_process.wrap" "/system/bin/app_process32" "$ASAN_RT"
+  generate_zygote_wrapper "app_process.wrap" "/system/bin/app_process32"
 fi
 
 # General command-line tool wrapper (use for anything that's not started as
 # zygote).
 cat <<EOF >"$TMPDIR/asanwrapper"
 #!/system/bin/sh
-LD_PRELOAD=$ASAN_RT \\
+LD_PRELOAD=$ASAN_RT_SYMLINK \\
 exec \$@
 
 EOF
@@ -361,7 +363,7 @@
 if [[ -n "$ASAN_RT64" ]]; then
   cat <<EOF >"$TMPDIR/asanwrapper64"
 #!/system/bin/sh
-LD_PRELOAD=$ASAN_RT64 \\
+LD_PRELOAD=$ASAN_RT_SYMLINK \\
 exec \$@
 
 EOF
@@ -412,12 +414,17 @@
       install "$TMPDIR/app_process64.real" /system/bin 755 $CTX
       install "$TMPDIR/asanwrapper" /system/bin 755
       install "$TMPDIR/asanwrapper64" /system/bin 755
+
+      adb_shell ln -s $ASAN_RT /system/lib/$ASAN_RT_SYMLINK
+      adb_shell ln -s $ASAN_RT64 /system/lib64/$ASAN_RT_SYMLINK
     else
       install "$TMPDIR/$ASAN_RT" /system/lib 644
       install "$TMPDIR/app_process32" /system/bin 755 $CTX
       install "$TMPDIR/app_process.wrap" /system/bin 755 $CTX
       install "$TMPDIR/asanwrapper" /system/bin 755 $CTX
 
+      adb_shell ln -s $ASAN_RT /system/lib/$ASAN_RT_SYMLINK
+
       adb_shell rm /system/bin/app_process
       adb_shell ln -s /system/bin/app_process.wrap /system/bin/app_process
     fi
diff --git a/lib/asan/tests/CMakeLists.txt b/lib/asan/tests/CMakeLists.txt
index e67d0fb..8089d51 100644
--- a/lib/asan/tests/CMakeLists.txt
+++ b/lib/asan/tests/CMakeLists.txt
@@ -36,8 +36,8 @@
 
 # This will ensure the target linker is used
 # during cross compilation
-set(ASAN_UNITTEST_COMMON_LINKFLAGS
-  ${COMPILER_RT_UNITTEST_LINKFLAGS})
+set(ASAN_UNITTEST_COMMON_LINK_FLAGS
+  ${COMPILER_RT_UNITTEST_LINK_FLAGS})
 
 # -gline-tables-only must be enough for ASan, so use it if possible.
 if(COMPILER_RT_TEST_COMPILER_ID MATCHES "Clang")
@@ -48,7 +48,7 @@
 if(MSVC)
   list(APPEND ASAN_UNITTEST_COMMON_CFLAGS -gcodeview)
 endif()
-list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS -g)
+list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS -g)
 
 # Use -D instead of definitions to please custom compile command.
 list(APPEND ASAN_UNITTEST_COMMON_CFLAGS
@@ -58,7 +58,12 @@
 
 if(APPLE)
   list(APPEND ASAN_UNITTEST_COMMON_CFLAGS ${DARWIN_osx_CFLAGS})
-  list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS ${DARWIN_osx_LINKFLAGS})
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS ${DARWIN_osx_LINK_FLAGS})
+
+  add_weak_symbols("asan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS})
 endif()
 
 if(MSVC)
@@ -77,41 +82,42 @@
 endif()
 
 if(NOT MSVC)
-  list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS --driver-mode=g++)
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS --driver-mode=g++)
 endif()
 
 # x86_64 FreeBSD 9.2 additionally requires libc++ to build the tests.
 if(CMAKE_SYSTEM MATCHES "FreeBSD-9.2-RELEASE")
-  list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS "-lc++")
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS "-lc++")
 endif()
 
 # Unit tests on Mac depend on Foundation.
 if(APPLE)
-  list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS -framework Foundation)
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS -framework Foundation)
 endif()
 if(ANDROID)
-  list(APPEND ASAN_UNITTEST_COMMON_LINKFLAGS -pie)
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS -pie)
 endif()
 
-set(ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS
-  ${ASAN_UNITTEST_COMMON_LINKFLAGS})
-list(APPEND ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS -fsanitize=address)
+set(ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS
+  ${ASAN_UNITTEST_COMMON_LINK_FLAGS})
+list(APPEND ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS -fsanitize=address)
 
-set(ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINKFLAGS
-  ${ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS}
+set(ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS
+  ${ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS}
   -shared-libasan)
 
 set(ASAN_UNITTEST_INSTRUMENTED_LIBS)
 # NDK r10 requires -latomic almost always.
 append_list_if(ANDROID atomic ASAN_UNITTEST_INSTRUMENTED_LIBS)
 
-set(ASAN_UNITTEST_NOINST_LINKFLAGS ${ASAN_UNITTEST_COMMON_LINKFLAGS})
-append_list_if(COMPILER_RT_HAS_LIBM -lm ASAN_UNITTEST_NOINST_LINKFLAGS)
-append_list_if(COMPILER_RT_HAS_LIBDL -ldl ASAN_UNITTEST_NOINST_LINKFLAGS)
-append_list_if(COMPILER_RT_HAS_LIBRT -lrt ASAN_UNITTEST_NOINST_LINKFLAGS)
-append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread ASAN_UNITTEST_NOINST_LINKFLAGS)
-append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread
-          ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINKFLAGS)
+set(ASAN_UNITTEST_NOINST_LINK_FLAGS ${ASAN_UNITTEST_COMMON_LINK_FLAGS})
+if(NOT APPLE)
+  append_list_if(COMPILER_RT_HAS_LIBM -lm ASAN_UNITTEST_NOINST_LINK_FLAGS)
+  append_list_if(COMPILER_RT_HAS_LIBDL -ldl ASAN_UNITTEST_NOINST_LINK_FLAGS)
+  append_list_if(COMPILER_RT_HAS_LIBRT -lrt ASAN_UNITTEST_NOINST_LINK_FLAGS)
+  append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread ASAN_UNITTEST_NOINST_LINK_FLAGS)
+  append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS)
+endif()
 
 # TODO(eugenis): move all -l flags above to _LIBS?
 set(ASAN_UNITTEST_NOINST_LIBS)
@@ -142,7 +148,7 @@
 # Link ASan unit test for a given architecture from a set
 # of objects in with given linker flags.
 macro(add_asan_test test_suite test_name arch kind)
-  cmake_parse_arguments(TEST "WITH_TEST_RUNTIME" "" "OBJECTS;LINKFLAGS;SUBDIR" ${ARGN})
+  cmake_parse_arguments(TEST "WITH_TEST_RUNTIME" "" "OBJECTS;LINK_FLAGS;SUBDIR" ${ARGN})
   get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
   set(TEST_DEPS ${TEST_OBJECTS})
   if(NOT COMPILER_RT_STANDALONE_BUILD)
@@ -166,7 +172,7 @@
                        SUBDIR ${TEST_SUBDIR}
                        OBJECTS ${TEST_OBJECTS}
                        DEPS ${TEST_DEPS}
-                       LINK_FLAGS ${TEST_LINKFLAGS}
+                       LINK_FLAGS ${TEST_LINK_FLAGS}
                                   ${TARGET_LINK_FLAGS})
 endmacro()
 
@@ -193,6 +199,7 @@
   asan_asm_test.cc
   asan_globals_test.cc
   asan_interface_test.cc
+  asan_internal_interface_test.cc
   asan_test.cc
   asan_oob_test.cc
   asan_mem_test.cc
@@ -220,6 +227,23 @@
                  ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS} -ObjC ${ARGN})
   endif()
 
+  if (MSVC)
+    # With the MSVC CRT, the choice between static and dynamic CRT is made at
+    # compile time with a macro. Simulate the effect of passing /MD to clang-cl.
+    set(ASAN_INST_DYNAMIC_TEST_OBJECTS)
+    foreach(src ${ASAN_INST_TEST_SOURCES})
+      asan_compile(ASAN_INST_DYNAMIC_TEST_OBJECTS ${src} ${arch} ${kind}
+        ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS} -D_MT -D_DLL ${ARGN})
+    endforeach()
+    # Clang links the static CRT by default. Override that to use the dynamic
+    # CRT.
+    set(ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS
+      ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS}
+      -Wl,-nodefaultlib:libcmt,-defaultlib:msvcrt,-defaultlib:oldnames)
+  else()
+    set(ASAN_INST_DYNAMIC_TEST_OBJECTS ${ASAN_INST_TEST_OBJECTS})
+  endif()
+
   # Create the 'default' folder where ASAN tests are produced.
   if(CMAKE_CONFIGURATION_TYPES)
     foreach(build_mode ${CMAKE_CONFIGURATION_TYPES})
@@ -232,7 +256,7 @@
   add_asan_test(AsanUnitTests "Asan-${arch}${kind}-Test"
                 ${arch} ${kind} SUBDIR "default"
                 OBJECTS ${ASAN_INST_TEST_OBJECTS}
-                LINKFLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS})
+                LINK_FLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS})
   if(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME)
     # Create the 'dynamic' folder where ASAN tests are produced.
     if(CMAKE_CONFIGURATION_TYPES)
@@ -245,8 +269,8 @@
 
     add_asan_test(AsanDynamicUnitTests "Asan-${arch}${kind}-Dynamic-Test"
                   ${arch} ${kind} SUBDIR "dynamic"
-                  OBJECTS ${ASAN_INST_TEST_OBJECTS}
-                  LINKFLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINKFLAGS})
+                  OBJECTS ${ASAN_INST_DYNAMIC_TEST_OBJECTS}
+                  LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS})
   endif()
 
   # Add static ASan runtime that will be linked with uninstrumented tests.
@@ -283,7 +307,7 @@
   add_asan_test(AsanUnitTests "Asan-${arch}${kind}-Noinst-Test"
                 ${arch} ${kind} SUBDIR "default"
                 OBJECTS ${ASAN_NOINST_TEST_OBJECTS}
-                LINKFLAGS ${ASAN_UNITTEST_NOINST_LINKFLAGS}
+                LINK_FLAGS ${ASAN_UNITTEST_NOINST_LINK_FLAGS}
                 WITH_TEST_RUNTIME)
 
   # Benchmarks.
@@ -295,7 +319,7 @@
   add_asan_test(AsanBenchmarks "Asan-${arch}${kind}-Benchmark"
                 ${arch} ${kind} SUBDIR "default"
                 OBJECTS ${ASAN_BENCHMARKS_OBJECTS}
-                LINKFLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS})
+                LINK_FLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS})
 endmacro()
 
 if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID)
@@ -323,7 +347,7 @@
       ${COMPILER_RT_GTEST_SOURCE}
       ${ASAN_NOINST_TEST_SOURCES})
     set_target_compile_flags(AsanNoinstTest ${ASAN_UNITTEST_COMMON_CFLAGS})
-    set_target_link_flags(AsanNoinstTest ${ASAN_UNITTEST_NOINST_LINKFLAGS})
+    set_target_link_flags(AsanNoinstTest ${ASAN_UNITTEST_NOINST_LINK_FLAGS})
     target_link_libraries(AsanNoinstTest ${ASAN_UNITTEST_NOINST_LIBS})
 
     # Test with ASan instrumentation. Link with ASan dynamic runtime.
@@ -331,7 +355,7 @@
       ${COMPILER_RT_GTEST_SOURCE}
       ${ASAN_INST_TEST_SOURCES})
     set_target_compile_flags(AsanTest ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS})
-    set_target_link_flags(AsanTest ${ASAN_UNITTEST_INSTRUMENTED_LINKFLAGS})
+    set_target_link_flags(AsanTest ${ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS})
     target_link_libraries(AsanTest ${ASAN_UNITTEST_INSTRUMENTED_LIBS})
 
     # Setup correct output directory and link flags.
diff --git a/lib/asan/tests/asan_asm_test.cc b/lib/asan/tests/asan_asm_test.cc
index 09af5c3..2bb3794 100644
--- a/lib/asan/tests/asan_asm_test.cc
+++ b/lib/asan/tests/asan_asm_test.cc
@@ -57,12 +57,13 @@
   return res;                                      \
 }
 
-#define DECLARE_ASM_REP_MOVS(Type, Movs)                                       \
-  template <> void asm_rep_movs<Type>(Type * dst, Type * src, size_t size) {   \
-    __asm__("rep " Movs " \n\t"                                                \
-            :                                                                  \
-            : "D"(dst), "S"(src), "c"(size)                                    \
-            : "rsi", "rdi", "rcx", "memory");                                  \
+#define DECLARE_ASM_REP_MOVS(Type, Movs)                         \
+  template <>                                                    \
+  void asm_rep_movs<Type>(Type * dst, Type * src, size_t size) { \
+    __asm__("rep " Movs " \n\t"                                  \
+            : "+D"(dst), "+S"(src), "+c"(size)                   \
+            :                                                    \
+            : "memory");                                         \
   }
 
 DECLARE_ASM_WRITE(U8, "8", "movq", "r");
@@ -99,12 +100,13 @@
   return res;                                      \
 }
 
-#define DECLARE_ASM_REP_MOVS(Type, Movs)                                       \
-  template <> void asm_rep_movs<Type>(Type * dst, Type * src, size_t size) {   \
-    __asm__("rep " Movs " \n\t"                                                \
-            :                                                                  \
-            : "D"(dst), "S"(src), "c"(size)                                    \
-            : "esi", "edi", "ecx", "memory");                                  \
+#define DECLARE_ASM_REP_MOVS(Type, Movs)                         \
+  template <>                                                    \
+  void asm_rep_movs<Type>(Type * dst, Type * src, size_t size) { \
+    __asm__("rep " Movs " \n\t"                                  \
+            : "+D"(dst), "+S"(src), "+c"(size)                   \
+            :                                                    \
+            : "memory");                                         \
   }
 
 } // End of anonymous namespace
diff --git a/lib/asan/tests/asan_interface_test.cc b/lib/asan/tests/asan_interface_test.cc
index f5bfb80..e4e9524 100644
--- a/lib/asan/tests/asan_interface_test.cc
+++ b/lib/asan/tests/asan_interface_test.cc
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "asan_test_utils.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
 #include <sanitizer/allocator_interface.h>
 #include <sanitizer/asan_interface.h>
 
@@ -100,6 +101,9 @@
   }
 }
 
+#ifndef __powerpc64__
+// FIXME: This has not reliably worked on powerpc since r279664.  Re-enable
+// this once the problem is tracked down and fixed.
 static const size_t kManyThreadsMallocSizes[] = {5, 1UL<<10, 1UL<<14, 357};
 static const size_t kManyThreadsIterations = 250;
 static const size_t kManyThreadsNumThreads =
@@ -133,6 +137,7 @@
   // so we can't check for equality here.
   EXPECT_LT(after_test, before_test + (1UL<<20));
 }
+#endif
 
 static void DoDoubleFree() {
   int *x = Ident(new int);
@@ -395,7 +400,7 @@
 
 TEST(AddressSanitizerInterface, SetErrorReportCallbackTest) {
   __asan_set_error_report_callback(ErrorReportCallbackOneToZ);
-  EXPECT_DEATH(__asan_report_error(0, 0, 0, 0, true, 1),
+  EXPECT_DEATH(__asan_report_error((void *)GET_CALLER_PC(), 0, 0, 0, true, 1),
                ASAN_PCRE_DOTALL "ABCDEF.*AddressSanitizer.*WRITE.*ABCDEF");
   __asan_set_error_report_callback(NULL);
 }
diff --git a/lib/asan/tests/asan_internal_interface_test.cc b/lib/asan/tests/asan_internal_interface_test.cc
new file mode 100644
index 0000000..ae47594
--- /dev/null
+++ b/lib/asan/tests/asan_internal_interface_test.cc
@@ -0,0 +1,36 @@
+//===-- asan_internal_interface_test.cc -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+//===----------------------------------------------------------------------===//
+#include "asan_interface_internal.h"
+#include "asan_test_utils.h"
+
+TEST(AddressSanitizerInternalInterface, SetShadow) {
+  std::vector<char> buffer(17, 0xff);
+
+  __asan_set_shadow_00((uptr)buffer.data(), buffer.size());
+  EXPECT_EQ(std::vector<char>(buffer.size(), 0x00), buffer);
+
+  __asan_set_shadow_f1((uptr)buffer.data(), buffer.size());
+  EXPECT_EQ(std::vector<char>(buffer.size(), 0xf1), buffer);
+
+  __asan_set_shadow_f2((uptr)buffer.data(), buffer.size());
+  EXPECT_EQ(std::vector<char>(buffer.size(), 0xf2), buffer);
+
+  __asan_set_shadow_f3((uptr)buffer.data(), buffer.size());
+  EXPECT_EQ(std::vector<char>(buffer.size(), 0xf3), buffer);
+
+  __asan_set_shadow_f5((uptr)buffer.data(), buffer.size());
+  EXPECT_EQ(std::vector<char>(buffer.size(), 0xf5), buffer);
+
+  __asan_set_shadow_f8((uptr)buffer.data(), buffer.size());
+  EXPECT_EQ(std::vector<char>(buffer.size(), 0xf8), buffer);
+}
diff --git a/lib/asan/tests/asan_noinst_test.cc b/lib/asan/tests/asan_noinst_test.cc
index 90c6f40..65acb28 100644
--- a/lib/asan/tests/asan_noinst_test.cc
+++ b/lib/asan/tests/asan_noinst_test.cc
@@ -170,6 +170,12 @@
 // Check that the thread local allocators are flushed when threads are
 // destroyed.
 TEST(AddressSanitizer, ThreadedQuarantineTest) {
+  // Run the routine once to warm up ASAN internal structures to get more
+  // predictable incremental memory changes.
+  pthread_t t;
+  PTHREAD_CREATE(&t, NULL, ThreadedQuarantineTestWorker, 0);
+  PTHREAD_JOIN(t, 0);
+
   const int n_threads = 3000;
   size_t mmaped1 = __sanitizer_get_heap_size();
   for (int i = 0; i < n_threads; i++) {
@@ -177,6 +183,7 @@
     PTHREAD_CREATE(&t, NULL, ThreadedQuarantineTestWorker, 0);
     PTHREAD_JOIN(t, 0);
     size_t mmaped2 = __sanitizer_get_heap_size();
+    // Figure out why this much memory is required.
     EXPECT_LT(mmaped2 - mmaped1, 320U * (1 << 20));
   }
 }
diff --git a/lib/asan/tests/asan_str_test.cc b/lib/asan/tests/asan_str_test.cc
index 0b86702..c790088 100644
--- a/lib/asan/tests/asan_str_test.cc
+++ b/lib/asan/tests/asan_str_test.cc
@@ -127,7 +127,15 @@
 }
 #endif  // SANITIZER_TEST_HAS_STRNLEN
 
-TEST(AddressSanitizer, StrDupOOBTest) {
+// This test fails with the WinASan dynamic runtime because we fail to intercept
+// strdup.
+#if defined(_MSC_VER) && defined(_DLL)
+#define MAYBE_StrDupOOBTest DISABLED_StrDupOOBTest
+#else
+#define MAYBE_StrDupOOBTest StrDupOOBTest
+#endif
+
+TEST(AddressSanitizer, MAYBE_StrDupOOBTest) {
   size_t size = Ident(42);
   char *str = MallocAndMemsetString(size);
   char *new_str;
diff --git a/lib/asan/tests/asan_test.cc b/lib/asan/tests/asan_test.cc
index 6a95c3f..424a79e 100644
--- a/lib/asan/tests/asan_test.cc
+++ b/lib/asan/tests/asan_test.cc
@@ -692,7 +692,7 @@
   PTHREAD_JOIN(t, 0);
 }
 
-#if defined(__i686__) || defined(__x86_64__)
+#if defined(__SSE2__)
 #include <emmintrin.h>
 TEST(AddressSanitizer, Store128Test) {
   char *a = Ident((char*)malloc(Ident(12)));
diff --git a/lib/asan/tests/asan_test_utils.h b/lib/asan/tests/asan_test_utils.h
index 03d17cf..f16d939 100644
--- a/lib/asan/tests/asan_test_utils.h
+++ b/lib/asan/tests/asan_test_utils.h
@@ -62,7 +62,9 @@
 
 static const int kPageSize = 4096;
 
-const size_t kLargeMalloc = 1 << 24;
+// Big enough to be handled by secondary allocator and small enough to fit into
+// quarantine for all configurations.
+const size_t kLargeMalloc = 1 << 22;
 
 extern void free_aaa(void *p);
 extern void *malloc_aaa(size_t size);
diff --git a/lib/asan/weak_symbols.txt b/lib/asan/weak_symbols.txt
new file mode 100644
index 0000000..ba7b027
--- /dev/null
+++ b/lib/asan/weak_symbols.txt
@@ -0,0 +1,3 @@
+___asan_default_options
+___asan_default_suppressions
+___asan_on_error
diff --git a/lib/builtins/CMakeLists.txt b/lib/builtins/CMakeLists.txt
index 9d2154b..3cf7861 100644
--- a/lib/builtins/CMakeLists.txt
+++ b/lib/builtins/CMakeLists.txt
@@ -164,7 +164,11 @@
   umodsi3.c
   umodti3.c)
 
-if(COMPILER_RT_HAS_ATOMIC_KEYWORD)
+option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN
+  "Skip the atomic builtin (this may be needed if system headers are unavailable)"
+  Off)
+
+if(COMPILER_RT_HAS_ATOMIC_KEYWORD AND NOT COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN)
   set(GENERIC_SOURCES
     ${GENERIC_SOURCES}
     atomic.c)
@@ -367,7 +371,31 @@
   ${arm_Thumb1_VFPv2_SOURCES}
   ${arm_Thumb1_icache_SOURCES})
 
-if(NOT WIN32)
+if(MINGW)
+  set(arm_SOURCES
+      arm/aeabi_idivmod.S
+      arm/aeabi_ldivmod.S
+      arm/aeabi_uidivmod.S
+      arm/aeabi_uldivmod.S
+      divmoddi4.c
+      divmodsi4.c
+      divdi3.c
+      divsi3.c
+      fixdfdi.c
+      fixsfdi.c
+      fixunsdfdi.c
+      fixunssfdi.c
+      floatdidf.c
+      floatdisf.c
+      floatundidf.c
+      floatundisf.c
+      mingw_fixfloat.c
+      moddi3.c
+      udivmoddi4.c
+      udivmodsi4.c
+      udivsi3.c
+      umoddi3.c)
+elseif(NOT WIN32)
   # TODO the EABI sources should only be added to EABI targets
   set(arm_SOURCES
     ${arm_SOURCES}
@@ -387,8 +415,10 @@
   fixunstfti.c
   floatditf.c
   floatsitf.c
+  floattitf.c
   floatunditf.c
   floatunsitf.c
+  floatuntitf.c
   multc3.c
   trunctfdf2.c
   trunctfsf2.c
@@ -421,7 +451,24 @@
   add_subdirectory(macho_embedded)
   darwin_add_builtin_libraries(${BUILTIN_SUPPORTED_OS})
 else ()
-  append_string_if(COMPILER_RT_HAS_STD_C99_FLAG -std=gnu99 maybe_stdc99)
+  set(BUILTIN_CFLAGS "")
+
+  append_list_if(COMPILER_RT_HAS_STD_C11_FLAG -std=c11 BUILTIN_CFLAGS)
+
+  # These flags would normally be added to CMAKE_C_FLAGS by the llvm
+  # cmake step. Add them manually if this is a standalone build.
+  if(COMPILER_RT_STANDALONE_BUILD)
+    append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC BUILTIN_CFLAGS)
+    append_list_if(COMPILER_RT_HAS_FNO_BUILTIN_FLAG -fno-builtin BUILTIN_CFLAGS)
+    append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG -fvisibility=hidden BUILTIN_CFLAGS)
+    if(NOT COMPILER_RT_DEBUG)
+      append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG -fomit-frame-pointer BUILTIN_CFLAGS)
+    endif()
+  endif()
+
+  set(BUILTIN_DEFS "")
+
+  append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG VISIBILITY_HIDDEN BUILTIN_DEFS)
 
   foreach (arch ${BUILTIN_SUPPORTED_ARCH})
     if (CAN_TARGET_${arch})
@@ -436,11 +483,18 @@
         endif ()
       endforeach ()
 
+      # Needed for clear_cache on debug mode, due to r7's usage in inline asm.
+      # Release mode already sets it via -O2/3, Debug mode doesn't.
+      if (${arch} STREQUAL "armhf")
+        list(APPEND BUILTIN_CFLAGS -fomit-frame-pointer)
+      endif()
+
       add_compiler_rt_runtime(clang_rt.builtins
                               STATIC
                               ARCHS ${arch}
                               SOURCES ${${arch}_SOURCES}
-                              CFLAGS ${maybe_stdc99}
+                              DEFS ${BUILTIN_DEFS}
+                              CFLAGS ${BUILTIN_CFLAGS}
                               PARENT_TARGET builtins)
     endif ()
   endforeach ()
diff --git a/lib/builtins/Makefile.mk b/lib/builtins/Makefile.mk
deleted file mode 100644
index 00e2f53..0000000
--- a/lib/builtins/Makefile.mk
+++ /dev/null
@@ -1,25 +0,0 @@
-#===- lib/builtins/Makefile.mk -----------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs :=
-
-# Add arch specific optimized implementations.
-SubDirs += i386 ppc x86_64 arm armv6m
-
-# Add ARM64 dir.
-SubDirs += arm64
-
-# Define the variables for this specific directory.
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o)
-Implementation := Generic
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard $(Dir)/*.h)
diff --git a/lib/builtins/arm/Makefile.mk b/lib/builtins/arm/Makefile.mk
deleted file mode 100644
index ed2e832..0000000
--- a/lib/builtins/arm/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := armv5 armv6 armv7 armv7k armv7m armv7em armv7s
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/builtins/arm/adddf3vfp.S b/lib/builtins/arm/adddf3vfp.S
index f4c00a0..8e476ca 100644
--- a/lib/builtins/arm/adddf3vfp.S
+++ b/lib/builtins/arm/adddf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__adddf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vadd.f64 d0, d0, d1
+#else
 	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
 	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
 	vadd.f64 d6, d6, d7		
 	vmov	r0, r1, d6		// move result back to r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__adddf3vfp)
 
diff --git a/lib/builtins/arm/addsf3vfp.S b/lib/builtins/arm/addsf3vfp.S
index af40c1c..8871efd 100644
--- a/lib/builtins/arm/addsf3vfp.S
+++ b/lib/builtins/arm/addsf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__addsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vadd.f32 s0, s0, s1
+#else
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
 	vadd.f32 s14, s14, s15
 	vmov	r0, s14		// move result back to r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__addsf3vfp)
 
diff --git a/lib/builtins/arm/aeabi_fcmp.S b/lib/builtins/arm/aeabi_fcmp.S
index 0a1d92a..8e7774b 100644
--- a/lib/builtins/arm/aeabi_fcmp.S
+++ b/lib/builtins/arm/aeabi_fcmp.S
@@ -26,10 +26,10 @@
         bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
         cmp       r0, #0                         SEPARATOR \
         b ## cond 1f                             SEPARATOR \
-        mov       r0, #0                         SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
         pop       { r4, pc }                     SEPARATOR \
 1:                                               SEPARATOR \
-        mov       r0, #1                         SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
         pop       { r4, pc }                     SEPARATOR \
 END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
 
diff --git a/lib/builtins/arm/aeabi_idivmod.S b/lib/builtins/arm/aeabi_idivmod.S
index 2fcad86..b43ea69 100644
--- a/lib/builtins/arm/aeabi_idivmod.S
+++ b/lib/builtins/arm/aeabi_idivmod.S
@@ -15,16 +15,34 @@
 //   return {quot, rem};
 // }
 
+#if defined(__MINGW32__)
+#define __aeabi_idivmod __rt_sdiv
+#endif
+
         .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+#if __ARM_ARCH_ISA_THUMB == 1
+        push    {r0, r1, lr}
+        bl      SYMBOL_NAME(__divsi3)
+        pop     {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom
+        muls    r2, r2, r0   // r2 = quot * denom
+        subs    r1, r1, r2
+        JMP     (r3)
+#else
         push    { lr }
         sub     sp, sp, #4
         mov     r2, sp
+#if defined(__MINGW32__)
+        mov     r3, r0
+        mov     r0, r1
+        mov     r1, r3
+#endif
         bl      SYMBOL_NAME(__divmodsi4)
         ldr     r1, [sp]
         add     sp, sp, #4
         pop     { pc }
+#endif // __ARM_ARCH_ISA_THUMB == 1
 END_COMPILERRT_FUNCTION(__aeabi_idivmod)
 
 NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/builtins/arm/aeabi_ldivmod.S b/lib/builtins/arm/aeabi_ldivmod.S
index 9f161f3..3dae14e 100644
--- a/lib/builtins/arm/aeabi_ldivmod.S
+++ b/lib/builtins/arm/aeabi_ldivmod.S
@@ -16,6 +16,10 @@
 //   return {quot, rem};
 // }
 
+#if defined(__MINGW32__)
+#define __aeabi_ldivmod __rt_sdiv64
+#endif
+
         .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
@@ -23,6 +27,14 @@
         sub     sp, sp, #16
         add     r12, sp, #8
         str     r12, [sp]
+#if defined(__MINGW32__)
+        mov     r12, r0
+        mov     r0, r2
+        mov     r2, r12
+        mov     r12, r1
+        mov     r1, r3
+        mov     r3, r12
+#endif
         bl      SYMBOL_NAME(__divmoddi4)
         ldr     r2, [sp, #8]
         ldr     r3, [sp, #12]
diff --git a/lib/builtins/arm/aeabi_uidivmod.S b/lib/builtins/arm/aeabi_uidivmod.S
index e1e12d9..7098bc6 100644
--- a/lib/builtins/arm/aeabi_uidivmod.S
+++ b/lib/builtins/arm/aeabi_uidivmod.S
@@ -16,16 +16,40 @@
 //   return {quot, rem};
 // }
 
+#if defined(__MINGW32__)
+#define __aeabi_uidivmod __rt_udiv
+#endif
+
         .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+#if __ARM_ARCH_ISA_THUMB == 1
+        cmp     r0, r1
+        bcc     LOCAL_LABEL(case_denom_larger)
+        push    {r0, r1, lr}
+        bl      SYMBOL_NAME(__aeabi_uidiv)
+        pop     {r1, r2, r3}
+        muls    r2, r2, r0 // r2 = quot * denom
+        subs    r1, r1, r2
+        JMP     (r3)
+LOCAL_LABEL(case_denom_larger):
+        movs    r1, r0
+        movs    r0, #0
+        JMP     (lr)
+#else
         push    { lr }
         sub     sp, sp, #4
         mov     r2, sp
+#if defined(__MINGW32__)
+        mov     r3, r0
+        mov     r0, r1
+        mov     r1, r3
+#endif
         bl      SYMBOL_NAME(__udivmodsi4)
         ldr     r1, [sp]
         add     sp, sp, #4
         pop     { pc }
+#endif
 END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
 
 NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/builtins/arm/aeabi_uldivmod.S b/lib/builtins/arm/aeabi_uldivmod.S
index e8aaef2..bc26e56 100644
--- a/lib/builtins/arm/aeabi_uldivmod.S
+++ b/lib/builtins/arm/aeabi_uldivmod.S
@@ -16,6 +16,10 @@
 //   return {quot, rem};
 // }
 
+#if defined(__MINGW32__)
+#define __aeabi_uldivmod __rt_udiv64
+#endif
+
         .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
@@ -23,6 +27,14 @@
         sub	sp, sp, #16
         add	r12, sp, #8
         str	r12, [sp]
+#if defined(__MINGW32__)
+        mov     r12, r0
+        mov     r0, r2
+        mov     r2, r12
+        mov     r12, r1
+        mov     r1, r3
+        mov     r3, r12
+#endif
         bl	SYMBOL_NAME(__udivmoddi4)
         ldr	r2, [sp, #8]
         ldr	r3, [sp, #12]
diff --git a/lib/builtins/arm/comparesf2.S b/lib/builtins/arm/comparesf2.S
index 52597b6..e809565 100644
--- a/lib/builtins/arm/comparesf2.S
+++ b/lib/builtins/arm/comparesf2.S
@@ -39,32 +39,64 @@
 
 #include "../assembly.h"
 .syntax unified
+#if __ARM_ARCH_ISA_THUMB == 2
+.thumb
+#endif
 
-.p2align 2
+@ int __eqsf2(float a, float b)
+
+    .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__eqsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov r0, s0
+    vmov r1, s1
+#endif
     // Make copies of a and b with the sign bit shifted off the top.  These will
     // be used to detect zeros and NaNs.
+#if __ARM_ARCH_ISA_THUMB == 1
+    push    {r6, lr}
+    lsls    r2,         r0, #1
+    lsls    r3,         r1, #1
+#else
     mov     r2,         r0, lsl #1
     mov     r3,         r1, lsl #1
+#endif
 
     // We do the comparison in three stages (ignoring NaN values for the time
     // being).  First, we orr the absolute values of a and b; this sets the Z
     // flag if both a and b are zero (of either sign).  The shift of r3 doesn't
     // effect this at all, but it *does* make sure that the C flag is clear for
     // the subsequent operations.
+#if __ARM_ARCH_ISA_THUMB == 1
+    lsrs    r6,     r3, #1
+    orrs    r6,     r2, r6
+#else
     orrs    r12,    r2, r3, lsr #1
-
+#endif
     // Next, we check if a and b have the same or different signs.  If they have
     // opposite signs, this eor will set the N flag.
+#if __ARM_ARCH_ISA_THUMB == 1
+    beq     1f
+    movs    r6,     r0
+    eors    r6,     r1
+1:
+#else
     it ne
     eorsne  r12,    r0, r1
+#endif
 
     // If a and b are equal (either both zeros or bit identical; again, we're
     // ignoring NaNs for now), this subtract will zero out r0.  If they have the
     // same sign, the flags are updated as they would be for a comparison of the
     // absolute values of a and b.
+#if __ARM_ARCH_ISA_THUMB == 1
+    bmi     1f
+    subs    r0,     r2, r3
+1:
+#else
     it pl
     subspl  r0,     r2, r3
+#endif
 
     // If a is smaller in magnitude than b and both have the same sign, place
     // the negation of the sign of b in r0.  Thus, if both are negative and
@@ -76,41 +108,126 @@
     // still clear from the shift argument in orrs; if a is positive and b
     // negative, this places 0 in r0; if a is negative and b positive, -1 is
     // placed in r0.
+#if __ARM_ARCH_ISA_THUMB == 1
+    bhs     1f
+    // Here if a and b have the same sign and absA < absB, the result is thus
+    // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan).
+    movs    r0,         #1
+    lsrs    r1,         #31
+    bne     LOCAL_LABEL(CHECK_NAN)
+    negs    r0,         r0
+    b       LOCAL_LABEL(CHECK_NAN)
+1:
+#else
     it lo
     mvnlo   r0,         r1, asr #31
+#endif
 
     // If a is greater in magnitude than b and both have the same sign, place
     // the sign of b in r0.  Thus, if both are negative and a < b, -1 is placed
     // in r0, which is the desired result.  Conversely, if both are positive
     // and a > b, zero is placed in r0.
+#if __ARM_ARCH_ISA_THUMB == 1
+    bls     1f
+    // Here both have the same sign and absA > absB.
+    movs    r0,         #1
+    lsrs    r1,         #31
+    beq     LOCAL_LABEL(CHECK_NAN)
+    negs    r0, r0
+1:
+#else
     it hi
     movhi   r0,         r1, asr #31
+#endif
 
     // If you've been keeping track, at this point r0 contains -1 if a < b and
     // 0 if a >= b.  All that remains to be done is to set it to 1 if a > b.
     // If a == b, then the Z flag is set, so we can get the correct final value
     // into r0 by simply or'ing with 1 if Z is clear.
+    // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b.
+#if __ARM_ARCH_ISA_THUMB != 1
     it ne
     orrne   r0,     r0, #1
+#endif
 
     // Finally, we need to deal with NaNs.  If either argument is NaN, replace
     // the value in r0 with 1.
+#if __ARM_ARCH_ISA_THUMB == 1
+LOCAL_LABEL(CHECK_NAN):
+    movs    r6,         #0xff
+    lsls    r6,         #24
+    cmp     r2,         r6
+    bhi     1f
+    cmp     r3,         r6
+1:
+    bls     2f
+    movs    r0,         #1
+2:
+    pop     {r6, pc}
+#else
     cmp     r2,         #0xff000000
     ite ls
     cmpls   r3,         #0xff000000
     movhi   r0,         #1
     JMP(lr)
+#endif
 END_COMPILERRT_FUNCTION(__eqsf2)
+
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2)
 
-.p2align 2
+@ int __gtsf2(float a, float b)
+
+    .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gtsf2)
     // Identical to the preceding except in that we return -1 for NaN values.
-    // Given that the two paths share so much code, one might be tempted to 
+    // Given that the two paths share so much code, one might be tempted to
     // unify them; however, the extra code needed to do so makes the code size
     // to performance tradeoff very hard to justify for such small functions.
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov r0, s0
+    vmov r1, s1
+#endif
+#if __ARM_ARCH_ISA_THUMB == 1
+    push    {r6, lr}
+    lsls    r2,        r0, #1
+    lsls    r3,        r1, #1
+    lsrs    r6,        r3, #1
+    orrs    r6,        r2, r6
+    beq     1f
+    movs    r6,        r0
+    eors    r6,        r1
+1:
+    bmi     2f
+    subs    r0,        r2, r3
+2:
+    bhs     3f
+    movs    r0,        #1
+    lsrs    r1,        #31
+    bne     LOCAL_LABEL(CHECK_NAN_2)
+    negs    r0, r0
+    b       LOCAL_LABEL(CHECK_NAN_2)
+3:
+    bls     4f
+    movs    r0,         #1
+    lsrs    r1,         #31
+    beq     LOCAL_LABEL(CHECK_NAN_2)
+    negs    r0, r0
+4:
+LOCAL_LABEL(CHECK_NAN_2):
+    movs    r6,         #0xff
+    lsls    r6,         #24
+    cmp     r2,         r6
+    bhi     5f
+    cmp     r3,         r6
+5:
+    bls     6f
+    movs    r0,         #1
+    negs    r0,         r0
+6:
+    pop     {r6, pc}
+#else
     mov     r2,         r0, lsl #1
     mov     r3,         r1, lsl #1
     orrs    r12,    r2, r3, lsr #1
@@ -129,23 +246,51 @@
     cmpls   r3,         #0xff000000
     movhi   r0,         #-1
     JMP(lr)
+#endif
 END_COMPILERRT_FUNCTION(__gtsf2)
+
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)
 
-.p2align 2
+@ int __unordsf2(float a, float b)
+
+    .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unordsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov    r0,         s0
+    vmov    r1,         s1
+#endif
     // Return 1 for NaN values, 0 otherwise.
-    mov     r2,         r0, lsl #1
-    mov     r3,         r1, lsl #1
-    mov     r0,         #0
+    lsls    r2,         r0, #1
+    lsls    r3,         r1, #1
+    movs    r0,         #0
+#if __ARM_ARCH_ISA_THUMB == 1
+    movs    r1,         #0xff
+    lsls    r1,         #24
+    cmp     r2,         r1
+    bhi     1f
+    cmp     r3,         r1
+1:
+    bls     2f
+    movs    r0,         #1
+2:
+#else
     cmp     r2,         #0xff000000
     ite ls
     cmpls   r3,         #0xff000000
     movhi   r0,         #1
+#endif
     JMP(lr)
 END_COMPILERRT_FUNCTION(__unordsf2)
 
+#if defined(COMPILER_RT_ARMHF_TARGET)
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum):
+	vmov s0, r0
+	vmov s1, r1
+	b SYMBOL_NAME(__unordsf2)
+END_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+#else
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2)
+#endif
 
 NO_EXEC_STACK_DIRECTIVE
 
diff --git a/lib/builtins/arm/divdf3vfp.S b/lib/builtins/arm/divdf3vfp.S
index 928f538..776ba4f 100644
--- a/lib/builtins/arm/divdf3vfp.S
+++ b/lib/builtins/arm/divdf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__divdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vdiv.f64 d0, d0, d1
+#else
 	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
 	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
-	vdiv.f64 d5, d6, d7		
+	vdiv.f64 d5, d6, d7
 	vmov	r0, r1, d5		// move result back to r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__divdf3vfp)
 
diff --git a/lib/builtins/arm/divsf3vfp.S b/lib/builtins/arm/divsf3vfp.S
index a2e297f..130318f 100644
--- a/lib/builtins/arm/divsf3vfp.S
+++ b/lib/builtins/arm/divsf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__divsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vdiv.f32 s0, s0, s1
+#else
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
 	vdiv.f32 s13, s14, s15
 	vmov	r0, s13		// move result back to r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__divsf3vfp)
 
diff --git a/lib/builtins/arm/divsi3.S b/lib/builtins/arm/divsi3.S
index 7e23ba4..f066f60 100644
--- a/lib/builtins/arm/divsi3.S
+++ b/lib/builtins/arm/divsi3.S
@@ -49,17 +49,37 @@
 #else
 ESTABLISH_FRAME
 //  Set aside the sign of the quotient.
+#  if __ARM_ARCH_ISA_THUMB == 1
+    movs    r4,     r0
+    eors    r4,     r1
+#  else
     eor     r4,     r0, r1
+#  endif
 //  Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+#  if   __ARM_ARCH_ISA_THUMB == 1
+    asrs    r2,     r0, #31
+    asrs    r3,     r1, #31
+    eors    r0,     r2
+    eors    r1,     r3
+    subs    r0,     r0, r2
+    subs    r1,     r1, r3
+#  else
     eor     r2,     r0, r0, asr #31
     eor     r3,     r1, r1, asr #31
     sub     r0,     r2, r0, asr #31
     sub     r1,     r3, r1, asr #31
+#  endif
 //  abs(a) / abs(b)
     bl      SYMBOL_NAME(__udivsi3)
 //  Apply sign of quotient to result and return.
+#  if __ARM_ARCH_ISA_THUMB == 1
+    asrs    r4,     #31
+    eors    r0,     r4
+    subs    r0,     r0, r4
+#  else
     eor     r0,     r0, r4, asr #31
     sub     r0,     r0, r4, asr #31
+#  endif
     CLEAR_FRAME_AND_RETURN
 #endif
 END_COMPILERRT_FUNCTION(__divsi3)
diff --git a/lib/builtins/arm/eqdf2vfp.S b/lib/builtins/arm/eqdf2vfp.S
index 95e6bb3..8fa0b2d 100644
--- a/lib/builtins/arm/eqdf2vfp.S
+++ b/lib/builtins/arm/eqdf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov	d6, r0, r1	// load r0/r1 pair in double register
 	vmov	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7		
+#endif
 	vmrs	apsr_nzcv, fpscr
 	moveq	r0, #1		// set result register to 1 if equal
 	movne	r0, #0
diff --git a/lib/builtins/arm/eqsf2vfp.S b/lib/builtins/arm/eqsf2vfp.S
index fbac139..3776bf4 100644
--- a/lib/builtins/arm/eqsf2vfp.S
+++ b/lib/builtins/arm/eqsf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	moveq	r0, #1      // set result register to 1 if equal
 	movne	r0, #0
diff --git a/lib/builtins/arm/extendsfdf2vfp.S b/lib/builtins/arm/extendsfdf2vfp.S
index 563bf92..1079f97 100644
--- a/lib/builtins/arm/extendsfdf2vfp.S
+++ b/lib/builtins/arm/extendsfdf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.f64.f32 d0, s0
+#else
 	vmov	s15, r0      // load float register from R0
 	vcvt.f64.f32 d7, s15 // convert single to double
 	vmov	r0, r1, d7   // return result in r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__extendsfdf2vfp)
 
diff --git a/lib/builtins/arm/fixdfsivfp.S b/lib/builtins/arm/fixdfsivfp.S
index 8263ff9..5d7b0f8 100644
--- a/lib/builtins/arm/fixdfsivfp.S
+++ b/lib/builtins/arm/fixdfsivfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.s32.f64 s0, d0
+	vmov r0, s0
+#else
 	vmov	d7, r0, r1    // load double register from R0/R1
 	vcvt.s32.f64 s15, d7  // convert double to 32-bit int into s15
 	vmov	r0, s15	      // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixdfsivfp)
 
diff --git a/lib/builtins/arm/fixsfsivfp.S b/lib/builtins/arm/fixsfsivfp.S
index c7c3b81..805a277 100644
--- a/lib/builtins/arm/fixsfsivfp.S
+++ b/lib/builtins/arm/fixsfsivfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.s32.f32 s0, s0
+	vmov r0, s0
+#else
 	vmov	s15, r0        // load float register from R0
 	vcvt.s32.f32 s15, s15  // convert single to 32-bit int into s15
 	vmov	r0, s15	       // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixsfsivfp)
 
diff --git a/lib/builtins/arm/fixunsdfsivfp.S b/lib/builtins/arm/fixunsdfsivfp.S
index 9cc1e62..4f1b2c8 100644
--- a/lib/builtins/arm/fixunsdfsivfp.S
+++ b/lib/builtins/arm/fixunsdfsivfp.S
@@ -20,9 +20,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.u32.f64 s0, d0
+	vmov r0, s0
+#else
 	vmov	d7, r0, r1    // load double register from R0/R1
 	vcvt.u32.f64 s15, d7  // convert double to 32-bit int into s15
 	vmov	r0, s15	      // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixunsdfsivfp)
 
diff --git a/lib/builtins/arm/fixunssfsivfp.S b/lib/builtins/arm/fixunssfsivfp.S
index 79d7082..e5d7782 100644
--- a/lib/builtins/arm/fixunssfsivfp.S
+++ b/lib/builtins/arm/fixunssfsivfp.S
@@ -20,9 +20,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.u32.f32 s0, s0
+	vmov r0, s0
+#else
 	vmov	s15, r0        // load float register from R0
 	vcvt.u32.f32 s15, s15  // convert single to 32-bit unsigned into s15
 	vmov	r0, s15	       // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixunssfsivfp)
 
diff --git a/lib/builtins/arm/floatsidfvfp.S b/lib/builtins/arm/floatsidfvfp.S
index 7623f26..3297ad4 100644
--- a/lib/builtins/arm/floatsidfvfp.S
+++ b/lib/builtins/arm/floatsidfvfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f64.s32 d0, s0
+#else
 	vmov	s15, r0        // move int to float register s15
 	vcvt.f64.s32 d7, s15   // convert 32-bit int in s15 to double in d7
 	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatsidfvfp)
 
diff --git a/lib/builtins/arm/floatsisfvfp.S b/lib/builtins/arm/floatsisfvfp.S
index c73dfac..65408b5 100644
--- a/lib/builtins/arm/floatsisfvfp.S
+++ b/lib/builtins/arm/floatsisfvfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f32.s32 s0, s0
+#else
 	vmov	s15, r0	       // move int to float register s15
 	vcvt.f32.s32 s15, s15  // convert 32-bit int in s15 to float in s15
 	vmov	r0, s15        // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatsisfvfp)
 
diff --git a/lib/builtins/arm/floatunssidfvfp.S b/lib/builtins/arm/floatunssidfvfp.S
index 2a59fdb..d7a7024 100644
--- a/lib/builtins/arm/floatunssidfvfp.S
+++ b/lib/builtins/arm/floatunssidfvfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f64.u32 d0, s0
+#else
 	vmov	s15, r0        // move int to float register s15
 	vcvt.f64.u32 d7, s15   // convert 32-bit int in s15 to double in d7
 	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatunssidfvfp)
 
diff --git a/lib/builtins/arm/floatunssisfvfp.S b/lib/builtins/arm/floatunssisfvfp.S
index c096263..1ca8565 100644
--- a/lib/builtins/arm/floatunssisfvfp.S
+++ b/lib/builtins/arm/floatunssisfvfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f32.u32 s0, s0
+#else
 	vmov	s15, r0	       // move int to float register s15
 	vcvt.f32.u32 s15, s15  // convert 32-bit int in s15 to float in s15
 	vmov	r0, s15        // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatunssisfvfp)
 
diff --git a/lib/builtins/arm/gedf2vfp.S b/lib/builtins/arm/gedf2vfp.S
index 72f13ef..14899f0 100644
--- a/lib/builtins/arm/gedf2vfp.S
+++ b/lib/builtins/arm/gedf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movge	r0, #1      // set result register to 1 if greater than or equal
 	movlt	r0, #0
diff --git a/lib/builtins/arm/gesf2vfp.S b/lib/builtins/arm/gesf2vfp.S
index c9ee52c..b49d04d 100644
--- a/lib/builtins/arm/gesf2vfp.S
+++ b/lib/builtins/arm/gesf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0	    // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movge	r0, #1      // set result register to 1 if greater than or equal
 	movlt	r0, #0
diff --git a/lib/builtins/arm/gtdf2vfp.S b/lib/builtins/arm/gtdf2vfp.S
index c7f2775..8166305 100644
--- a/lib/builtins/arm/gtdf2vfp.S
+++ b/lib/builtins/arm/gtdf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movgt	r0, #1		// set result register to 1 if equal
 	movle	r0, #0
diff --git a/lib/builtins/arm/gtsf2vfp.S b/lib/builtins/arm/gtsf2vfp.S
index 7d49e45..d2d8a23 100644
--- a/lib/builtins/arm/gtsf2vfp.S
+++ b/lib/builtins/arm/gtsf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0		// move from GPR 0 to float register
 	vmov	s15, r1		// move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movgt	r0, #1		// set result register to 1 if equal
 	movle	r0, #0
diff --git a/lib/builtins/arm/ledf2vfp.S b/lib/builtins/arm/ledf2vfp.S
index ca5b553..a9dab77 100644
--- a/lib/builtins/arm/ledf2vfp.S
+++ b/lib/builtins/arm/ledf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__ledf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movls	r0, #1		// set result register to 1 if equal
 	movhi	r0, #0
diff --git a/lib/builtins/arm/lesf2vfp.S b/lib/builtins/arm/lesf2vfp.S
index f25422e..7e127f4 100644
--- a/lib/builtins/arm/lesf2vfp.S
+++ b/lib/builtins/arm/lesf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__lesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1     // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movls	r0, #1      // set result register to 1 if equal
 	movhi	r0, #0
diff --git a/lib/builtins/arm/ltdf2vfp.S b/lib/builtins/arm/ltdf2vfp.S
index 6e2c099..8b6f8e4 100644
--- a/lib/builtins/arm/ltdf2vfp.S
+++ b/lib/builtins/arm/ltdf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movmi	r0, #1		// set result register to 1 if equal
 	movpl	r0, #0
diff --git a/lib/builtins/arm/ltsf2vfp.S b/lib/builtins/arm/ltsf2vfp.S
index 95febb6..c4ff812 100644
--- a/lib/builtins/arm/ltsf2vfp.S
+++ b/lib/builtins/arm/ltsf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1     // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movmi	r0, #1      // set result register to 1 if equal
 	movpl	r0, #0
diff --git a/lib/builtins/arm/muldf3vfp.S b/lib/builtins/arm/muldf3vfp.S
index f638de1..aa7b234 100644
--- a/lib/builtins/arm/muldf3vfp.S
+++ b/lib/builtins/arm/muldf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__muldf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmul.f64 d0, d0, d1
+#else
 	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
 	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
-	vmul.f64 d6, d6, d7		
+	vmul.f64 d6, d6, d7
 	vmov 	r0, r1, d6         // move result back to r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__muldf3vfp)
 
diff --git a/lib/builtins/arm/mulsf3vfp.S b/lib/builtins/arm/mulsf3vfp.S
index bef58d3..a1da789 100644
--- a/lib/builtins/arm/mulsf3vfp.S
+++ b/lib/builtins/arm/mulsf3vfp.S
@@ -18,9 +18,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmul.f32 s0, s0, s1
+#else
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
 	vmul.f32 s13, s14, s15
+#endif
 	vmov	r0, s13		// move result back to r0
 	bx	lr
 END_COMPILERRT_FUNCTION(__mulsf3vfp)
diff --git a/lib/builtins/arm/nedf2vfp.S b/lib/builtins/arm/nedf2vfp.S
index 78cf529..7d884e0 100644
--- a/lib/builtins/arm/nedf2vfp.S
+++ b/lib/builtins/arm/nedf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__nedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7		
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movne	r0, #1		// set result register to 0 if unequal
 	moveq	r0, #0
diff --git a/lib/builtins/arm/negdf2vfp.S b/lib/builtins/arm/negdf2vfp.S
index 01c8ba6..81f0ab8 100644
--- a/lib/builtins/arm/negdf2vfp.S
+++ b/lib/builtins/arm/negdf2vfp.S
@@ -18,7 +18,11 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__negdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vneg.f64 d0, d0
+#else
 	eor	r1, r1, #-2147483648	// flip sign bit on double in r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__negdf2vfp)
 
diff --git a/lib/builtins/arm/negsf2vfp.S b/lib/builtins/arm/negsf2vfp.S
index 797abb3..46ab4a9 100644
--- a/lib/builtins/arm/negsf2vfp.S
+++ b/lib/builtins/arm/negsf2vfp.S
@@ -18,7 +18,11 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__negsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vneg.f32 s0, s0
+#else
 	eor	r0, r0, #-2147483648	// flip sign bit on float in r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__negsf2vfp)
 
diff --git a/lib/builtins/arm/nesf2vfp.S b/lib/builtins/arm/nesf2vfp.S
index 554d3e4..97c764f 100644
--- a/lib/builtins/arm/nesf2vfp.S
+++ b/lib/builtins/arm/nesf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__nesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0	    // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movne	r0, #1      // set result register to 1 if unequal
 	moveq	r0, #0
diff --git a/lib/builtins/arm/subdf3vfp.S b/lib/builtins/arm/subdf3vfp.S
index 1fc7d18..2b6f2bd 100644
--- a/lib/builtins/arm/subdf3vfp.S
+++ b/lib/builtins/arm/subdf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__subdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vsub.f64 d0, d0, d1
+#else
 	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
 	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
 	vsub.f64 d6, d6, d7		
 	vmov 	r0, r1, d6         // move result back to r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__subdf3vfp)
 
diff --git a/lib/builtins/arm/subsf3vfp.S b/lib/builtins/arm/subsf3vfp.S
index 11fe386..a9f3ba9 100644
--- a/lib/builtins/arm/subsf3vfp.S
+++ b/lib/builtins/arm/subsf3vfp.S
@@ -12,17 +12,21 @@
 //
 // extern float __subsf3vfp(float a, float b);
 //
-// Returns the difference between two single precision floating point numbers 
+// Returns the difference between two single precision floating point numbers
 // using the Darwin calling convention where single arguments are passsed
 // like 32-bit ints.
 //
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__subsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vsub.f32 s0, s0, s1
+#elsee
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
 	vsub.f32 s14, s14, s15
 	vmov	r0, s14		// move result back to r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__subsf3vfp)
 
diff --git a/lib/builtins/arm/truncdfsf2vfp.S b/lib/builtins/arm/truncdfsf2vfp.S
index 04287ad..682e54d 100644
--- a/lib/builtins/arm/truncdfsf2vfp.S
+++ b/lib/builtins/arm/truncdfsf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.f32.f64 s0, d0
+#else
 	vmov 	d7, r0, r1   // load double from r0/r1 pair
 	vcvt.f32.f64 s15, d7 // convert double to single (trucate precision)
 	vmov 	r0, s15      // return result in r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__truncdfsf2vfp)
 
diff --git a/lib/builtins/arm/udivsi3.S b/lib/builtins/arm/udivsi3.S
index 085f8fb..fcc472b 100644
--- a/lib/builtins/arm/udivsi3.S
+++ b/lib/builtins/arm/udivsi3.S
@@ -40,12 +40,26 @@
 #else
 	cmp	r1, #1
 	bcc	LOCAL_LABEL(divby0)
+#if __ARM_ARCH_ISA_THUMB == 1
+	bne LOCAL_LABEL(num_neq_denom)
+	JMP(lr)
+LOCAL_LABEL(num_neq_denom):
+#else
 	IT(eq)
 	JMPc(lr, eq)
+#endif
 	cmp	r0, r1
+#if __ARM_ARCH_ISA_THUMB == 1
+	bhs LOCAL_LABEL(num_ge_denom)
+	movs r0, #0
+	JMP(lr)
+LOCAL_LABEL(num_ge_denom):
+#else
 	ITT(cc)
 	movcc	r0, #0
 	JMPc(lr, cc)
+#endif
+
 	/*
 	 * Implement division using binary long division algorithm.
 	 *
@@ -62,7 +76,7 @@
 	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
 	 */
 
-#  ifdef __ARM_FEATURE_CLZ
+#  if defined(__ARM_FEATURE_CLZ)
 	clz	ip, r0
 	clz	r3, r1
 	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
@@ -77,49 +91,128 @@
 	sub	ip, ip, r3, lsl #3
 	mov	r3, #0
 	bx	ip
-#  else
+#  else /* No CLZ Feature */
 #    if __ARM_ARCH_ISA_THUMB == 2
 #    error THUMB mode requires CLZ or UDIV
 #    endif
+#    if __ARM_ARCH_ISA_THUMB == 1
+#      define BLOCK_SIZE 10
+#    else
+#      define BLOCK_SIZE 12
+#    endif
+
 	mov	r2, r0
+#    if __ARM_ARCH_ISA_THUMB == 1
+	mov ip, r0
+	adr r0, LOCAL_LABEL(div0block)
+	adds r0, #1
+#    else
 	adr	ip, LOCAL_LABEL(div0block)
-
-	lsr	r3, r2, #16
+#    endif
+	lsrs	r3, r2, #16
 	cmp	r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+	blo LOCAL_LABEL(skip_16)
+	movs r2, r3
+	subs r0, r0, #(16 * BLOCK_SIZE)
+LOCAL_LABEL(skip_16):
+#    else
 	movhs	r2, r3
-	subhs	ip, ip, #(16 * 12)
+	subhs	ip, ip, #(16 * BLOCK_SIZE)
+#    endif
 
-	lsr	r3, r2, #8
+	lsrs	r3, r2, #8
 	cmp	r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+	blo LOCAL_LABEL(skip_8)
+	movs r2, r3
+	subs r0, r0, #(8 * BLOCK_SIZE)
+LOCAL_LABEL(skip_8):
+#    else
 	movhs	r2, r3
-	subhs	ip, ip, #(8 * 12)
+	subhs	ip, ip, #(8 * BLOCK_SIZE)
+#    endif
 
-	lsr	r3, r2, #4
+	lsrs	r3, r2, #4
 	cmp	r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+	blo LOCAL_LABEL(skip_4)
+	movs r2, r3
+	subs r0, r0, #(4 * BLOCK_SIZE)
+LOCAL_LABEL(skip_4):
+#    else
 	movhs	r2, r3
-	subhs	ip, #(4 * 12)
+	subhs	ip, #(4 * BLOCK_SIZE)
+#    endif
 
-	lsr	r3, r2, #2
+	lsrs	r3, r2, #2
 	cmp	r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+	blo LOCAL_LABEL(skip_2)
+	movs r2, r3
+	subs r0, r0, #(2 * BLOCK_SIZE)
+LOCAL_LABEL(skip_2):
+#    else
 	movhs	r2, r3
-	subhs	ip, ip, #(2 * 12)
+	subhs	ip, ip, #(2 * BLOCK_SIZE)
+#    endif
 
 	/* Last block, no need to update r2 or r3. */
-	cmp	r1, r2, lsr #1
-	subls	ip, ip, #(1 * 12)
+#    if __ARM_ARCH_ISA_THUMB == 1
+	lsrs r3, r2, #1
+	cmp r3, r1
+	blo LOCAL_LABEL(skip_1)
+	subs r0, r0, #(1 * BLOCK_SIZE)
+LOCAL_LABEL(skip_1):
+	movs r2, r0
+	mov r0, ip
+	movs r3, #0
+	JMP (r2)
 
-	mov	r3, #0
+#    else
+	cmp	r1, r2, lsr #1
+	subls	ip, ip, #(1 * BLOCK_SIZE)
+
+	movs	r3, #0
 
 	JMP(ip)
-#  endif
+#    endif
+#  endif /* __ARM_FEATURE_CLZ */
+
 
 #define	IMM	#
+	/* due to the range limit of branch in Thumb1, we have to place the
+		 block closer */
+LOCAL_LABEL(divby0):
+	movs	r0, #0
+#      if defined(__ARM_EABI__)
+	bl	__aeabi_idiv0 // due to relocation limit, can't use b.
+#      endif
+	JMP(lr)
 
+
+#if __ARM_ARCH_ISA_THUMB == 1
+#define block(shift)                                                           \
+	lsls r2, r1, IMM shift;                                                      \
+	cmp r0, r2;                                                                  \
+	blo LOCAL_LABEL(block_skip_##shift);                                         \
+	subs r0, r0, r2;                                                             \
+	LOCAL_LABEL(block_skip_##shift) :;                                           \
+	adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */
+
+	/* TODO: if current location counter is not not word aligned, we don't
+		 need the .p2align and nop */
+	/* Label div0block must be word-aligned. First align block 31 */
+	.p2align 2
+	nop /* Padding to align div0block as 31 blocks = 310 bytes */
+
+#else
 #define block(shift)                                                           \
 	cmp	r0, r1, lsl IMM shift;                                         \
 	ITT(hs);                                                               \
 	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
 	WIDE(subhs)	r0, r0, r1, lsl IMM shift
+#endif
 
 	block(31)
 	block(30)
@@ -159,12 +252,14 @@
 	JMP(lr)
 #endif /* __ARM_ARCH_EXT_IDIV__ */
 
+#if __ARM_ARCH_EXT_IDIV__
 LOCAL_LABEL(divby0):
-	mov	r0, #0
-#ifdef __ARM_EABI__
-	b	__aeabi_idiv0
-#else
-	JMP(lr)
+        mov     r0, #0
+#  ifdef __ARM_EABI__
+        b       __aeabi_idiv0
+#  else
+        JMP(lr)
+#  endif
 #endif
 
 END_COMPILERRT_FUNCTION(__udivsi3)
diff --git a/lib/builtins/arm/unorddf2vfp.S b/lib/builtins/arm/unorddf2vfp.S
index 022dd7a..8556375 100644
--- a/lib/builtins/arm/unorddf2vfp.S
+++ b/lib/builtins/arm/unorddf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
-	vcmp.f64 d6, d7		
+	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
 	movvc	r0, #0
diff --git a/lib/builtins/arm/unordsf2vfp.S b/lib/builtins/arm/unordsf2vfp.S
index 5ebdd3d..2b16b49 100644
--- a/lib/builtins/arm/unordsf2vfp.S
+++ b/lib/builtins/arm/unordsf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
 	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
 	movvc	r0, #0
diff --git a/lib/builtins/arm64/Makefile.mk b/lib/builtins/arm64/Makefile.mk
deleted file mode 100644
index 7f7e386..0000000
--- a/lib/builtins/arm64/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/arm64/Makefile.mk -----------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := arm64 
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/builtins/armv6m/Makefile.mk b/lib/builtins/armv6m/Makefile.mk
deleted file mode 100644
index f3c1807..0000000
--- a/lib/builtins/armv6m/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := armv6m
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/builtins/assembly.h b/lib/builtins/assembly.h
index 5fc74f6..29d9f88 100644
--- a/lib/builtins/assembly.h
+++ b/lib/builtins/assembly.h
@@ -70,7 +70,7 @@
 #if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
 #define ARM_HAS_BX
 #endif
-#if !defined(__ARM_FEATURE_CLZ) &&                                             \
+#if !defined(__ARM_FEATURE_CLZ) && __ARM_ARCH_ISA_THUMB != 1 &&                \
     (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
 #define __ARM_FEATURE_CLZ
 #endif
@@ -149,6 +149,7 @@
 #define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)                         \
   .globl SYMBOL_NAME(name) SEPARATOR                                           \
   SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  DECLARE_SYMBOL_VISIBILITY(SYMBOL_NAME(name)) SEPARATOR                       \
   .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR
 
 #if defined(__ARM_EABI__)
diff --git a/lib/builtins/atomic.c b/lib/builtins/atomic.c
index f1ddc3e..ee35e34 100644
--- a/lib/builtins/atomic.c
+++ b/lib/builtins/atomic.c
@@ -229,13 +229,20 @@
 // Where the size is known at compile time, the compiler may emit calls to
 // specialised versions of the above functions.
 ////////////////////////////////////////////////////////////////////////////////
+#ifdef __SIZEOF_INT128__
 #define OPTIMISED_CASES\
   OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\
   OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\
   OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\
   OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\
-  /* FIXME: __uint128_t isn't available on 32 bit platforms.
-  OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)*/\
+  OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)
+#else
+#define OPTIMISED_CASES\
+  OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\
+  OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\
+  OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\
+  OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)
+#endif
 
 #define OPTIMISED_CASE(n, lockfree, type)\
 type __atomic_load_##n(type *src, int model) {\
diff --git a/lib/builtins/clear_cache.c b/lib/builtins/clear_cache.c
index 55bbdd3..4c2ac3b 100644
--- a/lib/builtins/clear_cache.c
+++ b/lib/builtins/clear_cache.c
@@ -110,10 +110,12 @@
     #elif defined(__linux__)
          register int start_reg __asm("r0") = (int) (intptr_t) start;
          const register int end_reg __asm("r1") = (int) (intptr_t) end;
+         const register int flags __asm("r2") = 0;
          const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
          __asm __volatile("svc 0x0"
                           : "=r"(start_reg)
-                          : "r"(syscall_nr), "r"(start_reg), "r"(end_reg));
+                          : "r"(syscall_nr), "r"(start_reg), "r"(end_reg),
+                            "r"(flags));
          if (start_reg != 0) {
              compilerrt_abort();
          }
diff --git a/lib/builtins/floattitf.c b/lib/builtins/floattitf.c
new file mode 100644
index 0000000..994fded
--- /dev/null
+++ b/lib/builtins/floattitf.c
@@ -0,0 +1,82 @@
+//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ti_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+/* Returns: convert a ti_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ *             ti_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t
+__floattitf(ti_int a) {
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(ti_int) * CHAR_BIT;
+    const ti_int s = a >> (N-1);
+    a = (a ^ s) - s;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;            /* exponent */
+    if (sd > LDBL_MANT_DIG) {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit LDBL_MANT_DIG-1 bits to the right of 1
+         *  Q = bit LDBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+         */
+        switch (sd) {
+        case LDBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case LDBL_MANT_DIG + 2:
+            break;
+        default:
+            a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << LDBL_MANT_DIG)) {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    } else {
+        a <<= (LDBL_MANT_DIG - sd);
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    }
+
+    long_double_bits fb;
+    fb.u.high.all = (s & 0x8000000000000000LL)           /* sign */
+                  | (du_int)(e + 16383) << 48            /* exponent */
+                  | ((a >> 64) & 0x0000ffffffffffffLL);  /* significand */
+    fb.u.low.all = (du_int)(a);
+    return fb.f;
+}
+
+#endif
diff --git a/lib/builtins/floatuntitf.c b/lib/builtins/floatuntitf.c
new file mode 100644
index 0000000..e2518c9
--- /dev/null
+++ b/lib/builtins/floatuntitf.c
@@ -0,0 +1,79 @@
+//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tu_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+/* Returns: convert a tu_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ *             tu_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t
+__floatuntitf(tu_int a) {
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(tu_int) * CHAR_BIT;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;            /* exponent */
+    if (sd > LDBL_MANT_DIG) {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit LDBL_MANT_DIG-1 bits to the right of 1
+         *  Q = bit LDBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+         */
+        switch (sd) {
+        case LDBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case LDBL_MANT_DIG + 2:
+            break;
+        default:
+            a = (a >> (sd - (LDBL_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << LDBL_MANT_DIG)) {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    } else {
+        a <<= (LDBL_MANT_DIG - sd);
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    }
+
+    long_double_bits fb;
+    fb.u.high.all = (du_int)(e + 16383) << 48            /* exponent */
+                  | ((a >> 64) & 0x0000ffffffffffffLL);  /* significand */
+    fb.u.low.all = (du_int)(a);
+    return fb.f;
+}
+
+#endif
diff --git a/lib/builtins/gcc_personality_v0.c b/lib/builtins/gcc_personality_v0.c
index 29e5be3..0bc7656 100644
--- a/lib/builtins/gcc_personality_v0.c
+++ b/lib/builtins/gcc_personality_v0.c
@@ -12,6 +12,17 @@
 #include "int_lib.h"
 
 #include <unwind.h>
+#if defined(__arm__) && !defined(__ARM_DWARF_EH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+/*
+ * When building with older compilers (e.g. clang <3.9), it is possible that we
+ * have a version of unwind.h which does not provide the EHABI declarations
+ * which are quired for the C personality to conform to the specification.  In
+ * order to provide forward compatibility for such compilers, we re-declare the
+ * necessary interfaces in the helper to permit a standalone compilation of the
+ * builtins (which contains the C unwinding personality for historical reasons).
+ */
+#include "unwind-ehabi-helpers.h"
+#endif
 
 /*
  * Pointer encodings documented at:
diff --git a/lib/builtins/i386/Makefile.mk b/lib/builtins/i386/Makefile.mk
deleted file mode 100644
index f3776a0..0000000
--- a/lib/builtins/i386/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/i386/Makefile.mk ------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := i386
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/builtins/mingw_fixfloat.c b/lib/builtins/mingw_fixfloat.c
new file mode 100644
index 0000000..c462e0d
--- /dev/null
+++ b/lib/builtins/mingw_fixfloat.c
@@ -0,0 +1,36 @@
+/* ===-- mingw_fixfloat.c - Wrap int/float conversions for arm/windows -----===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI di_int __fixdfdi(double a);
+COMPILER_RT_ABI di_int __fixsfdi(float a);
+COMPILER_RT_ABI du_int __fixunsdfdi(double a);
+COMPILER_RT_ABI du_int __fixunssfdi(float a);
+COMPILER_RT_ABI double __floatdidf(di_int a);
+COMPILER_RT_ABI float __floatdisf(di_int a);
+COMPILER_RT_ABI double __floatundidf(du_int a);
+COMPILER_RT_ABI float __floatundisf(du_int a);
+
+COMPILER_RT_ABI di_int __dtoi64(double a) { return __fixdfdi(a); }
+
+COMPILER_RT_ABI di_int __stoi64(float a) { return __fixsfdi(a); }
+
+COMPILER_RT_ABI du_int __dtou64(double a) { return __fixunsdfdi(a); }
+
+COMPILER_RT_ABI du_int __stou64(float a) { return __fixunssfdi(a); }
+
+COMPILER_RT_ABI double __i64tod(di_int a) { return __floatdidf(a); }
+
+COMPILER_RT_ABI float __i64tos(di_int a) { return __floatdisf(a); }
+
+COMPILER_RT_ABI double __u64tod(du_int a) { return __floatundidf(a); }
+
+COMPILER_RT_ABI float __u64tos(du_int a) { return __floatundisf(a); }
diff --git a/lib/builtins/ppc/Makefile.mk b/lib/builtins/ppc/Makefile.mk
deleted file mode 100644
index 0adc623..0000000
--- a/lib/builtins/ppc/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/ppc/Makefile.mk -------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := ppc
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/builtins/unwind-ehabi-helpers.h b/lib/builtins/unwind-ehabi-helpers.h
new file mode 100644
index 0000000..ccb0765
--- /dev/null
+++ b/lib/builtins/unwind-ehabi-helpers.h
@@ -0,0 +1,55 @@
+/* ===-- arm-ehabi-helpers.h - Supplementary ARM EHABI declarations --------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===--------------------------------------------------------------------=== */
+
+#ifndef UNWIND_EHABI_HELPERS_H
+#define UNWIND_EHABI_HELPERS_H
+
+#include <stdint.h>
+/* NOTE: see reasoning for this inclusion below */
+#include <unwind.h>
+
+#if !defined(__ARM_EABI_UNWINDER__)
+
+/*
+ * NOTE: _URC_OK, _URC_FAILURE must be present as preprocessor tokens.  This
+ * allows for a substitution of a constant which can be cast into the
+ * appropriate enumerated type.  This header is expected to always be included
+ * AFTER unwind.h (which is why it is forcefully included above).  This ensures
+ * that we do not overwrite the token for the enumeration.  Subsequent uses of
+ * the token would be clean to rewrite with constant values.
+ *
+ * The typedef redeclaration should be safe.  Due to the protection granted to
+ * us by the `__ARM_EABI_UNWINDER__` above, we are guaranteed that we are in a
+ * header not vended by gcc.  The HP unwinder (being an itanium unwinder) does
+ * not support EHABI, and the GNU unwinder, derived from the HP unwinder, also
+ * does not support EHABI as of the introduction of this header.  As such, we
+ * are fairly certain that we are in the LLVM case.  Here, _Unwind_State is a
+ * typedef, and so we can get away with a redeclaration.
+ *
+ * Guarded redefinitions of the needed unwind state prevent the redefinition of
+ * those states.
+ */
+
+#define _URC_OK       0
+#define _URC_FAILURE  9
+
+typedef uint32_t _Unwind_State;
+
+#if !defined(_US_UNWIND_FRAME_STARTING)
+#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)
+#endif
+
+#if !defined(_US_ACTION_MASK)
+#define _US_ACTION_MASK ((_Unwind_State)3)
+#endif
+
+#endif
+
+#endif
+
diff --git a/lib/builtins/x86_64/Makefile.mk b/lib/builtins/x86_64/Makefile.mk
deleted file mode 100644
index 83848dd..0000000
--- a/lib/builtins/x86_64/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/x86_64/Makefile.mk ----------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := x86_64 x86_64h
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/dfsan/dfsan.cc b/lib/dfsan/dfsan.cc
index 4156000..3aa99b7 100644
--- a/lib/dfsan/dfsan.cc
+++ b/lib/dfsan/dfsan.cc
@@ -114,6 +114,26 @@
 // | reserved by kernel |
 // +--------------------+ 0x0000000000
 
+// On Linux/AArch64 (48-bit VMA), memory is laid out as follow:
+//
+// +--------------------+ 0x1000000000000 (top of memory)
+// | application memory |
+// +--------------------+ 0xffff00008000 (kAppAddr)
+// |       unused       |
+// +--------------------+ 0xaaaab0000000 (top of PIE address)
+// | application PIE    |
+// +--------------------+ 0xaaaaa0000000 (top of PIE address)
+// |                    |
+// |       unused       |
+// |                    |
+// +--------------------+ 0x1200000000 (kUnusedAddr)
+// |    union table     |
+// +--------------------+ 0x8000000000 (kUnionTableAddr)
+// |   shadow memory    |
+// +--------------------+ 0x0000010000 (kShadowAddr)
+// | reserved by kernel |
+// +--------------------+ 0x0000000000
+
 typedef atomic_dfsan_label dfsan_union_table_t[kNumLabels][kNumLabels];
 
 #ifdef DFSAN_RUNTIME_VMA
@@ -372,11 +392,12 @@
 #ifdef DFSAN_RUNTIME_VMA
   __dfsan::vmaSize =
     (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
-  if (__dfsan::vmaSize == 39 || __dfsan::vmaSize == 42) {
+  if (__dfsan::vmaSize == 39 || __dfsan::vmaSize == 42 ||
+      __dfsan::vmaSize == 48) {
     __dfsan_shadow_ptr_mask = ShadowMask();
   } else {
     Printf("FATAL: DataFlowSanitizer: unsupported VMA range\n");
-    Printf("FATAL: Found %d - Supported 39 and 42\n", __dfsan::vmaSize);
+    Printf("FATAL: Found %d - Supported 39, 42, and 48\n", __dfsan::vmaSize);
     Die();
   }
 #endif
diff --git a/lib/dfsan/dfsan_platform.h b/lib/dfsan/dfsan_platform.h
index f1d9f10..98284ba 100644
--- a/lib/dfsan/dfsan_platform.h
+++ b/lib/dfsan/dfsan_platform.h
@@ -46,6 +46,13 @@
   static const uptr kShadowMask = ~0x3c000000000;
 };
 
+struct Mapping48 {
+  static const uptr kShadowAddr = 0x10000;
+  static const uptr kUnionTableAddr = 0x8000000000;
+  static const uptr kAppAddr = 0xffff00008000;
+  static const uptr kShadowMask = ~0xfffff0000000;
+};
+
 extern int vmaSize;
 # define DFSAN_RUNTIME_VMA 1
 #else
@@ -72,11 +79,13 @@
 template<int Type>
 uptr MappingArchImpl(void) {
 #ifdef __aarch64__
-  if (vmaSize == 39)
-    return MappingImpl<Mapping39, Type>();
-  else
-    return MappingImpl<Mapping42, Type>();
+  switch (vmaSize) {
+    case 39: return MappingImpl<Mapping39, Type>();
+    case 42: return MappingImpl<Mapping42, Type>();
+    case 48: return MappingImpl<Mapping48, Type>();
+  }
   DCHECK(0);
+  return 0;
 #else
   return MappingImpl<Mapping, Type>();
 #endif
diff --git a/lib/dfsan/done_abilist.txt b/lib/dfsan/done_abilist.txt
index 7ca8aeb..a00dc54 100644
--- a/lib/dfsan/done_abilist.txt
+++ b/lib/dfsan/done_abilist.txt
@@ -266,6 +266,14 @@
 # Replaces __sanitizer_cov_trace_cmp with __dfsw___sanitizer_cov_trace_cmp
 fun:__sanitizer_cov_trace_cmp=custom
 fun:__sanitizer_cov_trace_cmp=uninstrumented
+fun:__sanitizer_cov_trace_cmp1=custom
+fun:__sanitizer_cov_trace_cmp1=uninstrumented
+fun:__sanitizer_cov_trace_cmp2=custom
+fun:__sanitizer_cov_trace_cmp2=uninstrumented
+fun:__sanitizer_cov_trace_cmp4=custom
+fun:__sanitizer_cov_trace_cmp4=uninstrumented
+fun:__sanitizer_cov_trace_cmp8=custom
+fun:__sanitizer_cov_trace_cmp8=uninstrumented
 # Similar for __sanitizer_cov_trace_switch
 fun:__sanitizer_cov_trace_switch=custom
 fun:__sanitizer_cov_trace_switch=uninstrumented
diff --git a/lib/esan/cache_frag.cpp b/lib/esan/cache_frag.cpp
index a3e612d..5fa5c7d 100644
--- a/lib/esan/cache_frag.cpp
+++ b/lib/esan/cache_frag.cpp
@@ -94,8 +94,8 @@
     type = "struct";
     start = &Struct->StructName[7];
   }
-  // Remove the suffixes with '#' during print.
-  end = strchr(start, '#');
+  // Remove the suffixes with '$' during print.
+  end = strchr(start, '$');
   CHECK(end != nullptr);
   Report("  %s %.*s\n", type, end - start, start);
   Report("   size = %u, count = %llu, ratio = %llu, array access = %llu\n",
diff --git a/lib/esan/esan.cpp b/lib/esan/esan.cpp
index 2fb7789..09b530b 100644
--- a/lib/esan/esan.cpp
+++ b/lib/esan/esan.cpp
@@ -141,9 +141,17 @@
 }
 #endif
 
+uptr VmaSize;
+
 static void initializeShadow() {
   verifyAddressSpace();
 
+  // This is based on the assumption that the intial stack is always allocated
+  // in the topmost segment of the user address space and the assumption
+  // holds true on all the platforms currently supported.
+  VmaSize =
+    (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
+
   DCHECK(verifyShadowScheme());
 
   Mapping.initialize(ShadowScale[__esan_which_tool]);
diff --git a/lib/esan/esan.h b/lib/esan/esan.h
index 5a0dde6..e73b21e 100644
--- a/lib/esan/esan.h
+++ b/lib/esan/esan.h
@@ -34,6 +34,7 @@
 
 extern bool EsanIsInitialized;
 extern bool EsanDuringInit;
+extern uptr VmaSize;
 
 void initializeLibrary(ToolType Tool);
 int finalizeLibrary();
diff --git a/lib/esan/esan_interceptors.cpp b/lib/esan/esan_interceptors.cpp
index 647f010..9ae5482 100644
--- a/lib/esan/esan_interceptors.cpp
+++ b/lib/esan/esan_interceptors.cpp
@@ -461,28 +461,35 @@
 // Malloc interceptors
 //===----------------------------------------------------------------------===//
 
-static char early_alloc_buf[128];
-static bool used_early_alloc_buf;
+static const uptr early_alloc_buf_size = 4096;
+static uptr allocated_bytes;
+static char early_alloc_buf[early_alloc_buf_size];
+
+static bool isInEarlyAllocBuf(const void *ptr) {
+  return ((uptr)ptr >= (uptr)early_alloc_buf &&
+          ((uptr)ptr - (uptr)early_alloc_buf) < sizeof(early_alloc_buf));
+}
 
 static void *handleEarlyAlloc(uptr size) {
   // If esan is initialized during an interceptor (which happens with some
   // tcmalloc implementations that call pthread_mutex_lock), the call from
-  // dlsym to calloc will deadlock.  There is only one such calloc (dlsym
-  // allocates a single pthread key), so we work around it by using a
-  // static buffer for the calloc request.  The loader currently needs
-  // 32 bytes but we size at 128 to allow for future changes.
+  // dlsym to calloc will deadlock.
+  // dlsym may also call malloc before REAL(malloc) is retrieved from dlsym.
+  // We work around it by using a static buffer for the early malloc/calloc
+  // requests.
   // This solution will also allow us to deliberately intercept malloc & family
   // in the future (to perform tool actions on each allocation, without
   // replacing the allocator), as it also solves the problem of intercepting
   // calloc when it will itself be called before its REAL pointer is
   // initialized.
-  CHECK(!used_early_alloc_buf && size < sizeof(early_alloc_buf));
   // We do not handle multiple threads here.  This only happens at process init
   // time, and while it's possible for a shared library to create early threads
   // that race here, we consider that to be a corner case extreme enough that
   // it's not worth the effort to handle.
-  used_early_alloc_buf = true;
-  return (void *)early_alloc_buf;
+  void *mem = (void *)&early_alloc_buf[allocated_bytes];
+  allocated_bytes += size;
+  CHECK_LT(allocated_bytes, early_alloc_buf_size);
+  return mem;
 }
 
 INTERCEPTOR(void*, calloc, uptr size, uptr n) {
@@ -496,14 +503,20 @@
   return res;
 }
 
+INTERCEPTOR(void*, malloc, uptr size) {
+  if (EsanDuringInit && REAL(malloc) == nullptr)
+    return handleEarlyAlloc(size);
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, malloc, size);
+  return REAL(malloc)(size);
+}
+
 INTERCEPTOR(void, free, void *p) {
   void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, free, p);
-  if (p == (void *)early_alloc_buf) {
-    // We expect just a singleton use but we clear this for cleanliness.
-    used_early_alloc_buf = false;
+  // There are only a few early allocation requests, so we simply skip the free.
+  if (isInEarlyAllocBuf(p))
     return;
-  }
+  COMMON_INTERCEPTOR_ENTER(ctx, free, p);
   REAL(free)(p);
 }
 
@@ -534,6 +547,7 @@
   ESAN_MAYBE_INTERCEPT_PTHREAD_SIGMASK;
 
   INTERCEPT_FUNCTION(calloc);
+  INTERCEPT_FUNCTION(malloc);
   INTERCEPT_FUNCTION(free);
 
   // TODO(bruening): intercept routines that other sanitizers intercept that
diff --git a/lib/esan/esan_linux.cpp b/lib/esan/esan_linux.cpp
index aa961b6..014205c 100644
--- a/lib/esan/esan_linux.cpp
+++ b/lib/esan/esan_linux.cpp
@@ -25,7 +25,7 @@
 namespace __esan {
 
 void verifyAddressSpace() {
-#if SANITIZER_LINUX && defined(__x86_64__)
+#if SANITIZER_LINUX && (defined(__x86_64__) || SANITIZER_MIPS64)
   // The kernel determines its mmap base from the stack size limit.
   // Our Linux 64-bit shadow mapping assumes the stack limit is less than a
   // terabyte, which keeps the mmap region above 0x7e00'.
diff --git a/lib/esan/esan_shadow.h b/lib/esan/esan_shadow.h
index f8f154e..72a919a 100644
--- a/lib/esan/esan_shadow.h
+++ b/lib/esan/esan_shadow.h
@@ -15,6 +15,7 @@
 #ifndef ESAN_SHADOW_H
 #define ESAN_SHADOW_H
 
+#include "esan.h"
 #include <sanitizer_common/sanitizer_platform.h>
 
 #if SANITIZER_WORDSIZE != 64
@@ -23,6 +24,12 @@
 
 namespace __esan {
 
+struct ApplicationRegion {
+  uptr Start;
+  uptr End;
+  bool ShadowMergedWithPrev;
+};
+
 #if SANITIZER_LINUX && defined(__x86_64__)
 // Linux x86_64
 //
@@ -89,12 +96,6 @@
 // [0x000015ff'ff601000, 0x00001600'00000000]
 // [0x000015ff'ff600000, 0x000015ff'ff601000]
 
-struct ApplicationRegion {
-  uptr Start;
-  uptr End;
-  bool ShadowMergedWithPrev;
-};
-
 static const struct ApplicationRegion AppRegions[] = {
   {0x0000000000000000ull, 0x0000010000000000u, false},
   {0x0000550000000000u,   0x0000570000000000u, false},
@@ -105,6 +106,52 @@
   {0x00007fffff601000u,   0x0000800000000000u, true},
   {0xffffffffff600000u,   0xffffffffff601000u, true},
 };
+
+#elif SANITIZER_LINUX && SANITIZER_MIPS64
+
+// Application memory falls into these 3 regions
+//
+// [0x00000001'00000000, 0x00000002'00000000) non-PIE + heap
+// [0x000000aa'00000000, 0x000000ab'00000000) PIE
+// [0x000000ff'00000000, 0x000000ff'ffffffff) libraries + stack
+//
+// This formula translates from application memory to shadow memory:
+//
+//   shadow(app) = ((app & 0x00000f'ffffffff) + offset) >> scale
+//
+// Where the offset for 1:1 is 0x000013'00000000.  For other scales, the
+// offset is shifted left by the scale, except for scales of 1 and 2 where
+// it must be tweaked in order to pass the double-shadow test
+// (see the "shadow(shadow)" comments below):
+//   scale == 0: 0x000013'00000000
+//   scale == 1: 0x000022'00000000
+//   scale == 2: 0x000044'00000000
+//   scale >= 3: (0x000013'00000000 << scale)
+//
+// The resulting shadow memory regions for a 0 scaling are:
+//
+// [0x00000014'00000000, 0x00000015'00000000)
+// [0x0000001d'00000000, 0x0000001e'00000000)
+// [0x00000022'00000000, 0x00000022'ffffffff)
+//
+// We also want to ensure that a wild access by the application into the shadow
+// regions will not corrupt our own shadow memory. shadow(shadow) ends up
+// disjoint from shadow(app):
+//
+// [0x00000017'00000000, 0x00000018'00000000)
+// [0x00000020'00000000, 0x00000021'00000000)
+// [0x00000015'00000000, 0x00000015'ffffffff]
+
+static const struct ApplicationRegion AppRegions[] = {
+  {0x0100000000u, 0x0200000000u, false},
+  {0xaa00000000u, 0xab00000000u, false},
+  {0xff00000000u, 0xffffffffffu, false},
+};
+
+#else
+#error Platform not supported
+#endif
+
 static const u32 NumAppRegions = sizeof(AppRegions)/sizeof(AppRegions[0]);
 
 // See the comment above: we do not currently support a stack size rlimit
@@ -113,29 +160,59 @@
 
 class ShadowMapping {
 public:
-  static const uptr Mask = 0x00000fffffffffffu;
+
   // The scale and offset vary by tool.
   uptr Scale;
   uptr Offset;
+
+  // TODO(sagar.thakur): Try to hardcode the mask as done in the compiler
+  // instrumentation to reduce the runtime cost of appToShadow.
+  struct ShadowMemoryMask40 {
+    static const uptr Mask = 0x0000000fffffffffu;
+  };
+
+  struct ShadowMemoryMask47 {
+    static const uptr Mask = 0x00000fffffffffffu;
+  };
+
   void initialize(uptr ShadowScale) {
-    static const uptr OffsetArray[3] = {
-        0x0000130000000000u,
-        0x0000220000000000u,
-        0x0000440000000000u,
+
+    const uptr OffsetArray40[3] = {
+      0x0000001300000000u,
+      0x0000002200000000u,
+      0x0000004400000000u,
     };
+
+    const uptr OffsetArray47[3] = {
+      0x0000130000000000u,
+      0x0000220000000000u,
+      0x0000440000000000u,
+    };
+
     Scale = ShadowScale;
-    if (Scale <= 2)
-      Offset = OffsetArray[Scale];
-    else
-      Offset = OffsetArray[0] << Scale;
+    switch (VmaSize) {
+      case 40: {
+        if (Scale <= 2)
+          Offset = OffsetArray40[Scale];
+        else
+          Offset = OffsetArray40[0] << Scale;
+      }
+      break;
+      case 47: {
+        if (Scale <= 2)
+          Offset = OffsetArray47[Scale];
+        else
+          Offset = OffsetArray47[0] << Scale;
+      }
+      break;
+      default: {
+        Printf("ERROR: %d-bit virtual memory address size not supported\n", VmaSize);
+        Die();
+      }
+    }
   }
 };
 extern ShadowMapping Mapping;
-#else
-// We'll want to use templatized functions over the ShadowMapping once
-// we support more platforms.
-#error Platform not supported
-#endif
 
 static inline bool getAppRegion(u32 i, uptr *Start, uptr *End) {
   if (i >= NumAppRegions)
@@ -154,9 +231,21 @@
   return false;
 }
 
+template<typename Params>
+uptr appToShadowImpl(uptr App) {
+  return (((App & Params::Mask) + Mapping.Offset) >> Mapping.Scale);
+}
+
 ALWAYS_INLINE
 uptr appToShadow(uptr App) {
-  return (((App & ShadowMapping::Mask) + Mapping.Offset) >> Mapping.Scale);
+  switch (VmaSize) {
+    case 40: return appToShadowImpl<ShadowMapping::ShadowMemoryMask40>(App);
+    case 47: return appToShadowImpl<ShadowMapping::ShadowMemoryMask47>(App);
+    default: {
+      Printf("ERROR: %d-bit virtual memory address size not supported\n", VmaSize);
+      Die();
+    }
+  }
 }
 
 static inline bool getShadowRegion(u32 i, uptr *Start, uptr *End) {
diff --git a/lib/interception/interception_win.cc b/lib/interception/interception_win.cc
index c8d67b9..91abecf 100644
--- a/lib/interception/interception_win.cc
+++ b/lib/interception/interception_win.cc
@@ -148,10 +148,16 @@
 }
 
 static bool DistanceIsWithin2Gig(uptr from, uptr target) {
+#if SANITIZER_WINDOWS64
   if (from < target)
     return target - from <= (uptr)0x7FFFFFFFU;
   else
     return from - target <= (uptr)0x80000000U;
+#else
+  // In a 32-bit address space, the address calculation will wrap, so this check
+  // is unnecessary.
+  return true;
+#endif
 }
 
 static uptr GetMmapGranularity() {
@@ -498,6 +504,7 @@
     case 0xd9f748:    // 48 f7 d9 : neg rcx
     case 0xd12b48:    // 48 2b d1 : sub rdx, rcx
     case 0x07c1f6:    // f6 c1 07 : test cl, 0x7
+    case 0xc98548:    // 48 85 C9 : test rcx, rcx
     case 0xc0854d:    // 4d 85 c0 : test r8, r8
     case 0xc2b60f:    // 0f b6 c2 : movzx eax, dl
     case 0xc03345:    // 45 33 c0 : xor r8d, r8d
@@ -915,19 +922,18 @@
   return 0;
 }
 
-static bool GetFunctionAddressInDLLs(const char *func_name, uptr *func_addr) {
-  *func_addr = 0;
+bool OverrideFunction(
+    const char *func_name, uptr new_func, uptr *orig_old_func) {
+  bool hooked = false;
   void **DLLs = InterestingDLLsAvailable();
-  for (size_t i = 0; *func_addr == 0 && DLLs[i]; ++i)
-    *func_addr = InternalGetProcAddress(DLLs[i], func_name);
-  return (*func_addr != 0);
-}
-
-bool OverrideFunction(const char *name, uptr new_func, uptr *orig_old_func) {
-  uptr orig_func;
-  if (!GetFunctionAddressInDLLs(name, &orig_func))
-    return false;
-  return OverrideFunction(orig_func, new_func, orig_old_func);
+  for (size_t i = 0; DLLs[i]; ++i) {
+    uptr func_addr = InternalGetProcAddress(DLLs[i], func_name);
+    if (func_addr &&
+        OverrideFunction(func_addr, new_func, orig_old_func)) {
+      hooked = true;
+    }
+  }
+  return hooked;
 }
 
 bool OverrideImportedFunction(const char *module_to_patch,
diff --git a/lib/interception/tests/CMakeLists.txt b/lib/interception/tests/CMakeLists.txt
index bfe41fe..5ea943f 100644
--- a/lib/interception/tests/CMakeLists.txt
+++ b/lib/interception/tests/CMakeLists.txt
@@ -29,6 +29,7 @@
 endif()
 if(MSVC)
   list(APPEND INTERCEPTION_TEST_CFLAGS_COMMON -gcodeview)
+  list(APPEND INTERCEPTION_TEST_LINK_FLAGS_COMMON -Wl,-largeaddressaware)
 endif()
 list(APPEND INTERCEPTION_TEST_LINK_FLAGS_COMMON -g)
 
diff --git a/lib/interception/tests/interception_linux_test.cc b/lib/interception/tests/interception_linux_test.cc
index 08619d8..cc09aa0 100644
--- a/lib/interception/tests/interception_linux_test.cc
+++ b/lib/interception/tests/interception_linux_test.cc
@@ -11,6 +11,10 @@
 // Tests for interception_linux.h.
 //
 //===----------------------------------------------------------------------===//
+
+// Do not declare isdigit in ctype.h.
+#define __NO_CTYPE
+
 #include "interception/interception.h"
 
 #include "gtest/gtest.h"
diff --git a/lib/interception/tests/interception_win_test.cc b/lib/interception/tests/interception_win_test.cc
index 67b40f7..684ee03 100644
--- a/lib/interception/tests/interception_win_test.cc
+++ b/lib/interception/tests/interception_win_test.cc
@@ -204,7 +204,29 @@
 
 // A buffer holding the dynamically generated code under test.
 u8* ActiveCode;
-size_t ActiveCodeLength = 4096;
+const size_t ActiveCodeLength = 4096;
+
+int InterceptorFunction(int x);
+
+/// Allocate code memory more than 2GB away from Base.
+u8 *AllocateCode2GBAway(u8 *Base) {
+  // Find a 64K aligned location after Base plus 2GB.
+  size_t TwoGB = 0x80000000;
+  size_t AllocGranularity = 0x10000;
+  Base = (u8 *)((((uptr)Base + TwoGB + AllocGranularity)) & ~(AllocGranularity - 1));
+
+  // Check if that location is free, and if not, loop over regions until we find
+  // one that is.
+  MEMORY_BASIC_INFORMATION mbi = {};
+  while (sizeof(mbi) == VirtualQuery(Base, &mbi, sizeof(mbi))) {
+    if (mbi.State & MEM_FREE) break;
+    Base += mbi.RegionSize;
+  }
+
+  // Allocate one RWX page at the free location.
+  return (u8 *)::VirtualAlloc(Base, ActiveCodeLength, MEM_COMMIT | MEM_RESERVE,
+                              PAGE_EXECUTE_READWRITE);
+}
 
 template<class T>
 static void LoadActiveCode(
@@ -212,11 +234,8 @@
     uptr *entry_point,
     FunctionPrefixKind prefix_kind = FunctionPrefixNone) {
   if (ActiveCode == nullptr) {
-    ActiveCode =
-        (u8*)::VirtualAlloc(nullptr, ActiveCodeLength,
-                            MEM_COMMIT | MEM_RESERVE,
-                            PAGE_EXECUTE_READWRITE);
-    ASSERT_NE(ActiveCode, nullptr);
+    ActiveCode = AllocateCode2GBAway((u8*)&InterceptorFunction);
+    ASSERT_NE(ActiveCode, nullptr) << "failed to allocate RWX memory 2GB away";
   }
 
   size_t position = 0;
diff --git a/lib/lsan/CMakeLists.txt b/lib/lsan/CMakeLists.txt
index 73e475d..b782f24 100644
--- a/lib/lsan/CMakeLists.txt
+++ b/lib/lsan/CMakeLists.txt
@@ -23,9 +23,8 @@
     CFLAGS ${LSAN_CFLAGS})
 
 if(COMPILER_RT_HAS_LSAN)
+  add_compiler_rt_component(lsan)
   foreach(arch ${LSAN_SUPPORTED_ARCH})
-    add_compiler_rt_component(lsan)
-    
     add_compiler_rt_runtime(clang_rt.lsan
       STATIC
       ARCHS ${arch}
diff --git a/lib/lsan/lsan_allocator.cc b/lib/lsan/lsan_allocator.cc
index a5220f1..c805a39 100644
--- a/lib/lsan/lsan_allocator.cc
+++ b/lib/lsan/lsan_allocator.cc
@@ -43,10 +43,17 @@
     PrimaryAllocator;
 #else
 static const uptr kMaxAllowedMallocSize = 8UL << 30;
-static const uptr kAllocatorSpace = 0x600000000000ULL;
-static const uptr kAllocatorSize = 0x40000000000ULL; // 4T.
-typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize,
-        sizeof(ChunkMetadata), DefaultSizeClassMap> PrimaryAllocator;
+
+struct AP64 {  // Allocator64 parameters. Deliberately using a short name.
+  static const uptr kSpaceBeg = 0x600000000000ULL;
+  static const uptr kSpaceSize =  0x40000000000ULL; // 4T.
+  static const uptr kMetadataSize = sizeof(ChunkMetadata);
+  typedef DefaultSizeClassMap SizeClassMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+
+typedef SizeClassAllocator64<AP64> PrimaryAllocator;
 #endif
 typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
 typedef LargeMmapAllocator<> SecondaryAllocator;
@@ -57,7 +64,9 @@
 static THREADLOCAL AllocatorCache cache;
 
 void InitializeAllocator() {
-  allocator.InitLinkerInitialized(common_flags()->allocator_may_return_null);
+  allocator.InitLinkerInitialized(
+      common_flags()->allocator_may_return_null,
+      common_flags()->allocator_release_to_os_interval_ms);
 }
 
 void AllocatorThreadFinish() {
@@ -249,4 +258,17 @@
 uptr __sanitizer_get_allocated_size(const void *p) {
   return GetMallocUsableSize(p);
 }
+
+#if !SANITIZER_SUPPORTS_WEAK_HOOKS
+// Provide default (no-op) implementation of malloc hooks.
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_malloc_hook(void *ptr, uptr size) {
+  (void)ptr;
+  (void)size;
+}
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_free_hook(void *ptr) {
+  (void)ptr;
+}
+#endif
 } // extern "C"
diff --git a/lib/lsan/lsan_common.cc b/lib/lsan/lsan_common.cc
index 888a25b..f055452 100644
--- a/lib/lsan/lsan_common.cc
+++ b/lib/lsan/lsan_common.cc
@@ -32,6 +32,7 @@
 // also to protect the global list of root regions.
 BlockingMutex global_mutex(LINKER_INITIALIZED);
 
+__attribute__((tls_model("initial-exec")))
 THREADLOCAL int disable_counter;
 bool DisabledInThisThread() { return disable_counter > 0; }
 void DisableInThisThread() { disable_counter++; }
@@ -449,6 +450,8 @@
     Report(
         "HINT: For debugging, try setting environment variable "
         "LSAN_OPTIONS=verbosity=1:log_threads=1\n");
+    Report(
+        "HINT: LeakSanitizer does not work under ptrace (strace, gdb, etc)\n");
     Die();
   }
   param.leak_report.ApplySuppressions();
@@ -755,5 +758,10 @@
 int __lsan_is_turned_off() {
   return 0;
 }
+
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+const char *__lsan_default_suppressions() {
+  return "";
+}
 #endif
 } // extern "C"
diff --git a/lib/lsan/lsan_common_linux.cc b/lib/lsan/lsan_common_linux.cc
index 1f54303..f6154d8 100644
--- a/lib/lsan/lsan_common_linux.cc
+++ b/lib/lsan/lsan_common_linux.cc
@@ -71,7 +71,7 @@
     GetAllocatorGlobalRange(&allocator_begin, &allocator_end);
     if (begin <= allocator_begin && allocator_begin < end) {
       CHECK_LE(allocator_begin, allocator_end);
-      CHECK_LT(allocator_end, end);
+      CHECK_LE(allocator_end, end);
       if (begin < allocator_begin)
         ScanRangeForPointers(begin, allocator_begin, frontier, "GLOBAL",
                              kReachable);
diff --git a/lib/lsan/lsan_thread.cc b/lib/lsan/lsan_thread.cc
index 8bd6d90..5dff4f7 100644
--- a/lib/lsan/lsan_thread.cc
+++ b/lib/lsan/lsan_thread.cc
@@ -36,7 +36,7 @@
 static const uptr kThreadQuarantineSize = 64;
 
 void InitializeThreadRegistry() {
-  static char thread_registry_placeholder[sizeof(ThreadRegistry)] ALIGNED(64);
+  static ALIGNED(64) char thread_registry_placeholder[sizeof(ThreadRegistry)];
   thread_registry = new(thread_registry_placeholder)
     ThreadRegistry(CreateThreadContext, kMaxThreads, kThreadQuarantineSize);
 }
diff --git a/lib/msan/msan.h b/lib/msan/msan.h
index c714bff..0709260 100644
--- a/lib/msan/msan.h
+++ b/lib/msan/msan.h
@@ -66,15 +66,19 @@
 
 #elif SANITIZER_LINUX && defined(__aarch64__)
 
-// The mapping describes both 39-bits and 42-bits.  AArch64 maps:
-// - 0x00000000000-0x00010000000: 39/42-bits program own segments
-// - 0x05500000000-0x05600000000: 39-bits PIE program segments
-// - 0x07f80000000-0x07fffffffff: 39-bits libraries segments
-// - 0x2aa00000000-0x2ab00000000: 42-bits PIE program segments
-// - 0x3ff00000000-0x3ffffffffff: 42-bits libraries segments
+// The mapping describes both 39-bits, 42-bits, and 48-bits VMA.  AArch64
+// maps:
+// - 0x0000000000000-0x0000010000000: 39/42/48-bits program own segments
+// - 0x0005500000000-0x0005600000000: 39-bits PIE program segments
+// - 0x0007f80000000-0x0007fffffffff: 39-bits libraries segments
+// - 0x002aa00000000-0x002ab00000000: 42-bits PIE program segments
+// - 0x003ff00000000-0x003ffffffffff: 42-bits libraries segments
+// - 0x0aaaaa0000000-0x0aaab00000000: 48-bits PIE program segments
+// - 0xffff000000000-0x1000000000000: 48-bits libraries segments
 // It is fragmented in multiples segments to increase the memory available
 // on 42-bits (12.21% of total VMA available for 42-bits and 13.28 for
-// 39 bits).
+// 39 bits). The 48-bits segments only cover the usual PIE/default segments
+// plus some more segments (262144GB total, 0.39% total VMA).
 const MappingDesc kMemoryLayout[] = {
     {0x00000000000ULL, 0x01000000000ULL, MappingDesc::INVALID, "invalid"},
     {0x01000000000ULL, 0x02000000000ULL, MappingDesc::SHADOW, "shadow-2"},
@@ -115,6 +119,42 @@
     {0x3D000000000ULL, 0x3E000000000ULL, MappingDesc::SHADOW, "shadow-8"},
     {0x3E000000000ULL, 0x3F000000000ULL, MappingDesc::ORIGIN, "origin-8"},
     {0x3F000000000ULL, 0x40000000000ULL, MappingDesc::APP, "app-9"},
+    // The mappings below are used only for 48-bits VMA.
+    // TODO(unknown): 48-bit mapping ony covers the usual PIE, non-PIE
+    // segments and some more segments totalizing 262144GB of VMA (which cover
+    // only 0.32% of all 48-bit VMA). Memory avaliability can be increase by
+    // adding multiple application segments like 39 and 42 mapping.
+    {0x0040000000000ULL, 0x0041000000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x0041000000000ULL, 0x0042000000000ULL, MappingDesc::APP, "app-10"},
+    {0x0042000000000ULL, 0x0047000000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x0047000000000ULL, 0x0048000000000ULL, MappingDesc::SHADOW, "shadow-10"},
+    {0x0048000000000ULL, 0x0049000000000ULL, MappingDesc::ORIGIN, "origin-10"},
+    {0x0049000000000ULL, 0x0050000000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x0050000000000ULL, 0x0051000000000ULL, MappingDesc::APP, "app-11"},
+    {0x0051000000000ULL, 0x0056000000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x0056000000000ULL, 0x0057000000000ULL, MappingDesc::SHADOW, "shadow-11"},
+    {0x0057000000000ULL, 0x0058000000000ULL, MappingDesc::ORIGIN, "origin-11"},
+    {0x0058000000000ULL, 0x0059000000000ULL, MappingDesc::APP, "app-12"},
+    {0x0059000000000ULL, 0x005E000000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x005E000000000ULL, 0x005F000000000ULL, MappingDesc::SHADOW, "shadow-12"},
+    {0x005F000000000ULL, 0x0060000000000ULL, MappingDesc::ORIGIN, "origin-12"},
+    {0x0060000000000ULL, 0x0061000000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x0061000000000ULL, 0x0062000000000ULL, MappingDesc::APP, "app-13"},
+    {0x0062000000000ULL, 0x0067000000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x0067000000000ULL, 0x0068000000000ULL, MappingDesc::SHADOW, "shadow-13"},
+    {0x0068000000000ULL, 0x0069000000000ULL, MappingDesc::ORIGIN, "origin-13"},
+    {0x0069000000000ULL, 0x0AAAAA0000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x0AAAAA0000000ULL, 0x0AAAB00000000ULL, MappingDesc::APP, "app-14"},
+    {0x0AAAB00000000ULL, 0x0AACAA0000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x0AACAA0000000ULL, 0x0AACB00000000ULL, MappingDesc::SHADOW, "shadow-14"},
+    {0x0AACB00000000ULL, 0x0AADAA0000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x0AADAA0000000ULL, 0x0AADB00000000ULL, MappingDesc::ORIGIN, "origin-14"},
+    {0x0AADB00000000ULL, 0x0FF9F00000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x0FF9F00000000ULL, 0x0FFA000000000ULL, MappingDesc::SHADOW, "shadow-15"},
+    {0x0FFA000000000ULL, 0x0FFAF00000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x0FFAF00000000ULL, 0x0FFB000000000ULL, MappingDesc::ORIGIN, "origin-15"},
+    {0x0FFB000000000ULL, 0x0FFFF00000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x0FFFF00000000ULL, 0x1000000000000ULL, MappingDesc::APP, "app-15"},
 };
 # define MEM_TO_SHADOW(mem) ((uptr)mem ^ 0x6000000000ULL)
 # define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x1000000000ULL)
@@ -289,11 +329,20 @@
                 StackTrace::GetCurrentPc(), GET_CURRENT_FRAME(),               \
                 common_flags()->fast_unwind_on_malloc)
 
+// For platforms which support slow unwinder only, we restrict the store context
+// size to 1, basically only storing the current pc. We do this because the slow
+// unwinder which is based on libunwind is not async signal safe and causes
+// random freezes in forking applications as well as in signal handlers.
 #define GET_STORE_STACK_TRACE_PC_BP(pc, bp)                                    \
   BufferedStackTrace stack;                                                    \
-  if (__msan_get_track_origins() > 1 && msan_inited)                           \
-  GetStackTrace(&stack, flags()->store_context_size, pc, bp,                   \
-                common_flags()->fast_unwind_on_malloc)
+  if (__msan_get_track_origins() > 1 && msan_inited) {                         \
+    if (!SANITIZER_CAN_FAST_UNWIND)                                            \
+      GetStackTrace(&stack, Min(1, flags()->store_context_size), pc, bp,       \
+                    false);                                                    \
+    else                                                                       \
+      GetStackTrace(&stack, flags()->store_context_size, pc, bp,               \
+                    common_flags()->fast_unwind_on_malloc);                    \
+  }
 
 #define GET_FATAL_STACK_TRACE_PC_BP(pc, bp)                                    \
   BufferedStackTrace stack;                                                    \
diff --git a/lib/msan/msan_allocator.cc b/lib/msan/msan_allocator.cc
index fdde4b4..6c389f0 100644
--- a/lib/msan/msan_allocator.cc
+++ b/lib/msan/msan_allocator.cc
@@ -33,9 +33,12 @@
 
     // We are about to unmap a chunk of user memory.
     // Mark the corresponding shadow memory as not needed.
-    FlushUnneededShadowMemory(MEM_TO_SHADOW(p), size);
-    if (__msan_get_track_origins())
-      FlushUnneededShadowMemory(MEM_TO_ORIGIN(p), size);
+    uptr shadow_p = MEM_TO_SHADOW(p);
+    ReleaseMemoryPagesToOS(shadow_p, shadow_p + size);
+    if (__msan_get_track_origins()) {
+      uptr origin_p = MEM_TO_ORIGIN(p);
+      ReleaseMemoryPagesToOS(origin_p, origin_p + size);
+    }
   }
 };
 
@@ -56,23 +59,32 @@
 #else
   static const uptr kAllocatorSpace = 0x600000000000ULL;
 #endif
-  static const uptr kAllocatorSize = 0x40000000000; // 4T.
-  static const uptr kMetadataSize  = sizeof(Metadata);
   static const uptr kMaxAllowedMallocSize = 8UL << 30;
 
-  typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, kMetadataSize,
-                             DefaultSizeClassMap,
-                             MsanMapUnmapCallback> PrimaryAllocator;
+  struct AP64 {  // Allocator64 parameters. Deliberately using a short name.
+    static const uptr kSpaceBeg = kAllocatorSpace;
+    static const uptr kSpaceSize = 0x40000000000; // 4T.
+    static const uptr kMetadataSize = sizeof(Metadata);
+    typedef DefaultSizeClassMap SizeClassMap;
+    typedef MsanMapUnmapCallback MapUnmapCallback;
+    static const uptr kFlags = 0;
+  };
+
+  typedef SizeClassAllocator64<AP64> PrimaryAllocator;
 
 #elif defined(__powerpc64__)
-  static const uptr kAllocatorSpace = 0x300000000000;
-  static const uptr kAllocatorSize  = 0x020000000000;  // 2T
-  static const uptr kMetadataSize  = sizeof(Metadata);
   static const uptr kMaxAllowedMallocSize = 2UL << 30;  // 2G
 
-  typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, kMetadataSize,
-                             DefaultSizeClassMap,
-                             MsanMapUnmapCallback> PrimaryAllocator;
+  struct AP64 {  // Allocator64 parameters. Deliberately using a short name.
+    static const uptr kSpaceBeg = 0x300000000000;
+    static const uptr kSpaceSize = 0x020000000000; // 2T.
+    static const uptr kMetadataSize = sizeof(Metadata);
+    typedef DefaultSizeClassMap SizeClassMap;
+    typedef MsanMapUnmapCallback MapUnmapCallback;
+    static const uptr kFlags = 0;
+  };
+
+  typedef SizeClassAllocator64<AP64> PrimaryAllocator;
 #elif defined(__aarch64__)
   static const uptr kMaxAllowedMallocSize = 2UL << 30;  // 2G
   static const uptr kRegionSizeLog = 20;
@@ -94,7 +106,9 @@
 static SpinMutex fallback_mutex;
 
 void MsanAllocatorInit() {
-  allocator.Init(common_flags()->allocator_may_return_null);
+  allocator.Init(
+      common_flags()->allocator_may_return_null,
+      common_flags()->allocator_release_to_os_interval_ms);
 }
 
 AllocatorCache *GetAllocatorCache(MsanThreadLocalMallocStorage *ms) {
@@ -112,7 +126,7 @@
   if (size > kMaxAllowedMallocSize) {
     Report("WARNING: MemorySanitizer failed to allocate %p bytes\n",
            (void *)size);
-    return allocator.ReturnNullOrDie();
+    return allocator.ReturnNullOrDieOnBadRequest();
   }
   MsanThread *t = GetCurrentThread();
   void *allocated;
@@ -170,7 +184,7 @@
 
 void *MsanCalloc(StackTrace *stack, uptr nmemb, uptr size) {
   if (CallocShouldReturnNullDueToOverflow(size, nmemb))
-    return allocator.ReturnNullOrDie();
+    return allocator.ReturnNullOrDieOnBadRequest();
   return MsanReallocate(stack, nullptr, nmemb * size, sizeof(u64), true);
 }
 
diff --git a/lib/msan/msan_interceptors.cc b/lib/msan/msan_interceptors.cc
index f23d3ee..6447bb1 100644
--- a/lib/msan/msan_interceptors.cc
+++ b/lib/msan/msan_interceptors.cc
@@ -45,6 +45,8 @@
 
 DECLARE_REAL(SIZE_T, strlen, const char *s)
 DECLARE_REAL(SIZE_T, strnlen, const char *s, SIZE_T maxlen)
+DECLARE_REAL(void *, memcpy, void *dest, const void *src, uptr n)
+DECLARE_REAL(void *, memset, void *dest, int c, uptr n)
 
 #if SANITIZER_FREEBSD
 #define __errno_location __error
@@ -64,6 +66,23 @@
   return in_interceptor_scope;
 }
 
+static uptr allocated_for_dlsym;
+static const uptr kDlsymAllocPoolSize = 1024;
+static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
+
+static bool IsInDlsymAllocPool(const void *ptr) {
+  uptr off = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
+  return off < sizeof(alloc_memory_for_dlsym);
+}
+
+static void *AllocateFromLocalPool(uptr size_in_bytes) {
+  uptr size_in_words = RoundUpTo(size_in_bytes, kWordSize) / kWordSize;
+  void *mem = (void *)&alloc_memory_for_dlsym[allocated_for_dlsym];
+  allocated_for_dlsym += size_in_words;
+  CHECK_LT(allocated_for_dlsym, kDlsymAllocPoolSize);
+  return mem;
+}
+
 #define ENSURE_MSAN_INITED() do { \
   CHECK(!msan_init_is_running); \
   if (!msan_inited) { \
@@ -135,10 +154,6 @@
   return res;
 }
 
-INTERCEPTOR(void *, memcpy, void *dest, const void *src, SIZE_T n) {
-  return __msan_memcpy(dest, src, n);
-}
-
 INTERCEPTOR(void *, mempcpy, void *dest, const void *src, SIZE_T n) {
   return (char *)__msan_memcpy(dest, src, n) + n;
 }
@@ -153,14 +168,6 @@
   return res;
 }
 
-INTERCEPTOR(void *, memmove, void *dest, const void *src, SIZE_T n) {
-  return __msan_memmove(dest, src, n);
-}
-
-INTERCEPTOR(void *, memset, void *s, int c, SIZE_T n) {
-  return __msan_memset(s, c, n);
-}
-
 INTERCEPTOR(void *, bcopy, const void *src, void *dest, SIZE_T n) {
   return __msan_memmove(dest, src, n);
 }
@@ -227,14 +234,14 @@
 
 INTERCEPTOR(void, free, void *ptr) {
   GET_MALLOC_STACK_TRACE;
-  if (!ptr) return;
+  if (!ptr || UNLIKELY(IsInDlsymAllocPool(ptr))) return;
   MsanDeallocate(&stack, ptr);
 }
 
 #if !SANITIZER_FREEBSD
 INTERCEPTOR(void, cfree, void *ptr) {
   GET_MALLOC_STACK_TRACE;
-  if (!ptr) return;
+  if (!ptr || UNLIKELY(IsInDlsymAllocPool(ptr))) return;
   MsanDeallocate(&stack, ptr);
 }
 #define MSAN_MAYBE_INTERCEPT_CFREE INTERCEPT_FUNCTION(cfree)
@@ -907,27 +914,35 @@
 
 INTERCEPTOR(void *, calloc, SIZE_T nmemb, SIZE_T size) {
   GET_MALLOC_STACK_TRACE;
-  if (UNLIKELY(!msan_inited)) {
+  if (UNLIKELY(!msan_inited))
     // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
-    const SIZE_T kCallocPoolSize = 1024;
-    static uptr calloc_memory_for_dlsym[kCallocPoolSize];
-    static SIZE_T allocated;
-    SIZE_T size_in_words = ((nmemb * size) + kWordSize - 1) / kWordSize;
-    void *mem = (void*)&calloc_memory_for_dlsym[allocated];
-    allocated += size_in_words;
-    CHECK(allocated < kCallocPoolSize);
-    return mem;
-  }
+    return AllocateFromLocalPool(nmemb * size);
   return MsanCalloc(&stack, nmemb, size);
 }
 
 INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) {
   GET_MALLOC_STACK_TRACE;
+  if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
+    uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
+    uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
+    void *new_ptr;
+    if (UNLIKELY(!msan_inited)) {
+      new_ptr = AllocateFromLocalPool(copy_size);
+    } else {
+      copy_size = size;
+      new_ptr = MsanReallocate(&stack, nullptr, copy_size, sizeof(u64), false);
+    }
+    internal_memcpy(new_ptr, ptr, copy_size);
+    return new_ptr;
+  }
   return MsanReallocate(&stack, ptr, size, sizeof(u64), false);
 }
 
 INTERCEPTOR(void *, malloc, SIZE_T size) {
   GET_MALLOC_STACK_TRACE;
+  if (UNLIKELY(!msan_inited))
+    // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
+    return AllocateFromLocalPool(size);
   return MsanReallocate(&stack, nullptr, size, sizeof(u64), false);
 }
 
@@ -1329,11 +1344,23 @@
     *begin = *end = 0;                                                         \
   }
 
+#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size) \
+  {                                                         \
+    (void)ctx;                                              \
+    return __msan_memset(block, c, size);                   \
+  }
+#define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size) \
+  {                                                          \
+    (void)ctx;                                               \
+    return __msan_memmove(to, from, size);                   \
+  }
+#define COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size) \
+  {                                                         \
+    (void)ctx;                                              \
+    return __msan_memcpy(to, from, size);                   \
+  }
+
 #include "sanitizer_common/sanitizer_platform_interceptors.h"
-// Msan needs custom handling of these:
-#undef SANITIZER_INTERCEPT_MEMSET
-#undef SANITIZER_INTERCEPT_MEMMOVE
-#undef SANITIZER_INTERCEPT_MEMCPY
 #include "sanitizer_common/sanitizer_common_interceptors.inc"
 
 #define COMMON_SYSCALL_PRE_READ_RANGE(p, s) CHECK_UNPOISONED(p, s)
@@ -1489,11 +1516,8 @@
   INTERCEPT_FUNCTION(fread);
   MSAN_MAYBE_INTERCEPT_FREAD_UNLOCKED;
   INTERCEPT_FUNCTION(readlink);
-  INTERCEPT_FUNCTION(memcpy);
   INTERCEPT_FUNCTION(memccpy);
   INTERCEPT_FUNCTION(mempcpy);
-  INTERCEPT_FUNCTION(memset);
-  INTERCEPT_FUNCTION(memmove);
   INTERCEPT_FUNCTION(bcopy);
   INTERCEPT_FUNCTION(wmemset);
   INTERCEPT_FUNCTION(wmemcpy);
diff --git a/lib/msan/msan_linux.cc b/lib/msan/msan_linux.cc
index d6a9588..0a687f6 100644
--- a/lib/msan/msan_linux.cc
+++ b/lib/msan/msan_linux.cc
@@ -66,7 +66,8 @@
     }
     if ((uptr)addr != beg) {
       uptr end = beg + size - 1;
-      Printf("FATAL: Cannot protect memory range %p - %p.\n", beg, end);
+      Printf("FATAL: Cannot protect memory range %p - %p (%s).\n", beg, end,
+             name);
       return false;
     }
   }
diff --git a/lib/msan/tests/CMakeLists.txt b/lib/msan/tests/CMakeLists.txt
index 130a872..8e911dc 100644
--- a/lib/msan/tests/CMakeLists.txt
+++ b/lib/msan/tests/CMakeLists.txt
@@ -69,15 +69,15 @@
 endmacro()
 
 macro(msan_link_shared so_list so_name arch kind)
-  cmake_parse_arguments(SOURCE "" "" "OBJECTS;LINKFLAGS;DEPS" ${ARGN})
+  cmake_parse_arguments(SOURCE "" "" "OBJECTS;LINK_FLAGS;DEPS" ${ARGN})
   set(output_so "${CMAKE_CURRENT_BINARY_DIR}/${so_name}.${arch}${kind}.so")
-  get_target_flags_for_arch(${arch} TARGET_LINKFLAGS)
+  get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
   if(NOT COMPILER_RT_STANDALONE_BUILD)
     list(APPEND SOURCE_DEPS msan)
   endif()
   clang_link_shared(${output_so}
                 OBJECTS ${SOURCE_OBJECTS}
-                LINKFLAGS ${TARGET_LINKFLAGS} ${SOURCE_LINKFLAGS}
+                LINK_FLAGS ${TARGET_LINK_FLAGS} ${SOURCE_LINK_FLAGS}
                 DEPS ${SOURCE_DEPS})
   list(APPEND ${so_list} ${output_so})
 endmacro()
diff --git a/lib/msan/tests/msan_test.cc b/lib/msan/tests/msan_test.cc
index e4076b5..9ec1e28 100644
--- a/lib/msan/tests/msan_test.cc
+++ b/lib/msan/tests/msan_test.cc
@@ -2825,6 +2825,12 @@
   EXPECT_POISONED(s2.a8);
 }
 
+#ifdef __GLIBC__
+#define MSAN_TEST_PRLIMIT __GLIBC_PREREQ(2, 13)
+#else
+#define MSAN_TEST_PRLIMIT 1
+#endif
+
 TEST(MemorySanitizer, getrlimit) {
   struct rlimit limit;
   __msan_poison(&limit, sizeof(limit));
@@ -2833,6 +2839,7 @@
   EXPECT_NOT_POISONED(limit.rlim_cur);
   EXPECT_NOT_POISONED(limit.rlim_max);
 
+#if MSAN_TEST_PRLIMIT
   struct rlimit limit2;
   __msan_poison(&limit2, sizeof(limit2));
   result = prlimit(getpid(), RLIMIT_DATA, &limit, &limit2);
@@ -2848,6 +2855,7 @@
 
   result = prlimit(getpid(), RLIMIT_DATA, &limit, nullptr);
   ASSERT_EQ(result, 0);
+#endif
 }
 
 TEST(MemorySanitizer, getrusage) {
diff --git a/lib/profile/InstrProfilingFile.c b/lib/profile/InstrProfilingFile.c
index a9769c9..9a11a10 100644
--- a/lib/profile/InstrProfilingFile.c
+++ b/lib/profile/InstrProfilingFile.c
@@ -336,18 +336,18 @@
       if (FilenamePat[++I] == 'p') {
         if (!NumPids++) {
           if (snprintf(PidChars, MAX_PID_SIZE, "%d", getpid()) <= 0) {
-            PROF_WARN(
-                "Unable to parse filename pattern %s. Using the default name.",
-                FilenamePat);
+            PROF_WARN("Unable to get pid for filename pattern %s. Using the "
+                      "default name.",
+                      FilenamePat);
             return -1;
           }
         }
       } else if (FilenamePat[I] == 'h') {
         if (!NumHosts++)
           if (COMPILER_RT_GETHOSTNAME(Hostname, COMPILER_RT_MAX_HOSTLEN)) {
-            PROF_WARN(
-                "Unable to parse filename pattern %s. Using the default name.",
-                FilenamePat);
+            PROF_WARN("Unable to get hostname for filename pattern %s. Using "
+                      "the default name.",
+                      FilenamePat);
             return -1;
           }
       } else if (containsMergeSpecifier(FilenamePat, I)) {
diff --git a/lib/profile/InstrProfilingPort.h b/lib/profile/InstrProfilingPort.h
index c947153..5789351 100644
--- a/lib/profile/InstrProfilingPort.h
+++ b/lib/profile/InstrProfilingPort.h
@@ -40,14 +40,14 @@
 #endif
 
 #define COMPILER_RT_MAX_HOSTLEN 128
-#ifdef _MSC_VER
-#define COMPILER_RT_GETHOSTNAME(Name, Len) gethostname(Name, Len)
-#elif defined(__ORBIS__)
+#ifdef __ORBIS__
 #define COMPILER_RT_GETHOSTNAME(Name, Len) ((void)(Name), (void)(Len), (-1))
 #else
 #define COMPILER_RT_GETHOSTNAME(Name, Len) lprofGetHostName(Name, Len)
+#ifndef _MSC_VER
 #define COMPILER_RT_HAS_UNAME 1
 #endif
+#endif
 
 #if COMPILER_RT_HAS_ATOMICS == 1
 #ifdef _MSC_VER
diff --git a/lib/profile/InstrProfilingUtil.c b/lib/profile/InstrProfilingUtil.c
index ead537d..321c719 100644
--- a/lib/profile/InstrProfilingUtil.c
+++ b/lib/profile/InstrProfilingUtil.c
@@ -66,7 +66,19 @@
 
 #endif
 
-#ifdef COMPILER_RT_HAS_UNAME
+#ifdef _MSC_VER
+COMPILER_RT_VISIBILITY int lprofGetHostName(char *Name, int Len) {
+  WCHAR Buffer[COMPILER_RT_MAX_HOSTLEN];
+  DWORD BufferSize = sizeof(Buffer);
+  BOOL Result =
+      GetComputerNameExW(ComputerNameDnsFullyQualified, Buffer, &BufferSize);
+  if (!Result)
+    return -1;
+  if (WideCharToMultiByte(CP_UTF8, 0, Buffer, -1, Name, Len, NULL, NULL) == 0)
+    return -1;
+  return 0;
+}
+#elif defined(COMPILER_RT_HAS_UNAME)
 COMPILER_RT_VISIBILITY int lprofGetHostName(char *Name, int Len) {
   struct utsname N;
   int R;
diff --git a/lib/profile/InstrProfilingValue.c b/lib/profile/InstrProfilingValue.c
index 93957e3..6648f89 100644
--- a/lib/profile/InstrProfilingValue.c
+++ b/lib/profile/InstrProfilingValue.c
@@ -192,7 +192,7 @@
      * the runtime can wipe out more than one lowest count entries
      * to give space for hot targets.
      */
-    if (!(--MinCountVNode->Count)) {
+    if (!MinCountVNode->Count || !(--MinCountVNode->Count)) {
       CurVNode = MinCountVNode;
       CurVNode->Value = TargetValue;
       CurVNode->Count++;
diff --git a/lib/sanitizer_common/.clang-tidy b/lib/sanitizer_common/.clang-tidy
index aa695cc..6c71abf 100644
--- a/lib/sanitizer_common/.clang-tidy
+++ b/lib/sanitizer_common/.clang-tidy
@@ -8,5 +8,9 @@
     value:           CamelCase
   - key:             readability-identifier-naming.UnionCase
     value:           CamelCase
+  - key:             readability-identifier-naming.GlobalConstantCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.GlobalConstantPrefix
+    value:           "k"
   - key:             readability-identifier-naming.VariableCase
     value:           lower_case
diff --git a/lib/sanitizer_common/CMakeLists.txt b/lib/sanitizer_common/CMakeLists.txt
index 59a6b35..c70b8be 100644
--- a/lib/sanitizer_common/CMakeLists.txt
+++ b/lib/sanitizer_common/CMakeLists.txt
@@ -37,6 +37,8 @@
 if(UNIX AND NOT APPLE)
   list(APPEND SANITIZER_SOURCES_NOTERMINATION
     sanitizer_linux_x86_64.S)
+  list(APPEND SANITIZER_SOURCES_NOTERMINATION
+    sanitizer_linux_mips64.S)
 endif()
 
 set(SANITIZER_SOURCES
@@ -50,7 +52,9 @@
 
 set(SANITIZER_LIBCDEP_SOURCES
   sanitizer_common_libcdep.cc
+  sancov_flags.cc
   sanitizer_coverage_libcdep.cc
+  sanitizer_coverage_libcdep_new.cc
   sanitizer_coverage_mapping_libcdep.cc
   sanitizer_linux_libcdep.cc
   sanitizer_posix_libcdep.cc
@@ -147,6 +151,8 @@
   # CMAKE_C*_FLAGS and re-add as a source property to all the non-.S files).
   set_source_files_properties(sanitizer_linux_x86_64.S
     PROPERTIES COMPILE_FLAGS "-w")
+  set_source_files_properties(sanitizer_linux_mips64.S
+    PROPERTIES COMPILE_FLAGS "-w")
 endif ()
 
 if(APPLE)
diff --git a/lib/sanitizer_common/sancov_flags.cc b/lib/sanitizer_common/sancov_flags.cc
new file mode 100644
index 0000000..08fd2a4
--- /dev/null
+++ b/lib/sanitizer_common/sancov_flags.cc
@@ -0,0 +1,60 @@
+//===-- sancov_flags.cc -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Sanitizer Coverage runtime flags.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sancov_flags.h"
+#include "sanitizer_flag_parser.h"
+#include "sanitizer_platform.h"
+
+#if !SANITIZER_LINUX
+// other platforms do not have weak symbols out of the box.
+extern "C" const char* __sancov_default_options() { return ""; }
+#endif
+
+using namespace __sanitizer;
+
+namespace __sancov {
+
+SancovFlags sancov_flags_dont_use_directly;  // use via flags();
+
+void SancovFlags::SetDefaults() {
+#define SANCOV_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "sancov_flags.inc"
+#undef SANCOV_FLAG
+}
+
+static void RegisterSancovFlags(FlagParser *parser, SancovFlags *f) {
+#define SANCOV_FLAG(Type, Name, DefaultValue, Description) \
+  RegisterFlag(parser, #Name, Description, &f->Name);
+#include "sancov_flags.inc"
+#undef SANCOV_FLAG
+}
+
+static const char *MaybeCallSancovDefaultOptions() {
+  return (&__sancov_default_options) ? __sancov_default_options() : "";
+}
+
+void InitializeSancovFlags() {
+  SancovFlags *f = sancov_flags();
+  f->SetDefaults();
+
+  FlagParser parser;
+  RegisterSancovFlags(&parser, f);
+
+  parser.ParseString(MaybeCallSancovDefaultOptions());
+  parser.ParseString(GetEnv("SANCOV_OPTIONS"));
+
+  ReportUnrecognizedFlags();
+  if (f->help) parser.PrintFlagDescriptions();
+}
+
+}  // namespace __sancov
diff --git a/lib/sanitizer_common/sancov_flags.h b/lib/sanitizer_common/sancov_flags.h
new file mode 100644
index 0000000..5fbd7ad
--- /dev/null
+++ b/lib/sanitizer_common/sancov_flags.h
@@ -0,0 +1,40 @@
+//===-- sancov_flags.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Sanitizer Coverage runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANCOV_FLAGS_H
+#define SANCOV_FLAGS_H
+
+#include "sanitizer_flag_parser.h"
+#include "sanitizer_internal_defs.h"
+
+namespace __sancov {
+
+struct SancovFlags {
+#define SANCOV_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "sancov_flags.inc"
+#undef SANCOV_FLAG
+
+  void SetDefaults();
+};
+
+extern SancovFlags sancov_flags_dont_use_directly;
+
+inline SancovFlags* sancov_flags() { return &sancov_flags_dont_use_directly; }
+
+void InitializeSancovFlags();
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE const char*
+__sancov_default_options();
+
+}  // namespace __sancov
+
+#endif
diff --git a/lib/sanitizer_common/sancov_flags.inc b/lib/sanitizer_common/sancov_flags.inc
new file mode 100644
index 0000000..63a1f0c
--- /dev/null
+++ b/lib/sanitizer_common/sancov_flags.inc
@@ -0,0 +1,21 @@
+//===-- sancov_flags.inc ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Sanitizer Coverage runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SANCOV_FLAG
+#error "Defnine SANCOV_FLAG prior to including this file!"
+#endif
+
+SANCOV_FLAG(bool, symbolize, true,
+            "If set, converage information will be symbolized by sancov tool "
+            "after dumping.")
+
+SANCOV_FLAG(bool, help, false, "Print flags help.")
diff --git a/lib/sanitizer_common/sanitizer_addrhashmap.h b/lib/sanitizer_common/sanitizer_addrhashmap.h
index e55fc4f..2ca3c40 100644
--- a/lib/sanitizer_common/sanitizer_addrhashmap.h
+++ b/lib/sanitizer_common/sanitizer_addrhashmap.h
@@ -73,6 +73,8 @@
 
     ~Handle();
     T *operator->();
+    T &operator*();
+    const T &operator*() const;
     bool created() const;
     bool exists() const;
 
@@ -136,6 +138,16 @@
   return &cell_->val;
 }
 
+template <typename T, uptr kSize>
+const T &AddrHashMap<T, kSize>::Handle::operator*() const {
+  return cell_->val;
+}
+
+template <typename T, uptr kSize>
+T &AddrHashMap<T, kSize>::Handle::operator*() {
+  return cell_->val;
+}
+
 template<typename T, uptr kSize>
 bool AddrHashMap<T, kSize>::Handle::created() const {
   return created_;
diff --git a/lib/sanitizer_common/sanitizer_allocator.cc b/lib/sanitizer_common/sanitizer_allocator.cc
index 2b5d192..d47b5b4 100644
--- a/lib/sanitizer_common/sanitizer_allocator.cc
+++ b/lib/sanitizer_common/sanitizer_allocator.cc
@@ -13,16 +13,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_allocator.h"
+
 #include "sanitizer_allocator_internal.h"
+#include "sanitizer_atomic.h"
 #include "sanitizer_common.h"
 
 namespace __sanitizer {
 
 // ThreadSanitizer for Go uses libc malloc/free.
-#if defined(SANITIZER_GO) || defined(SANITIZER_USE_MALLOC)
+#if SANITIZER_GO || defined(SANITIZER_USE_MALLOC)
 # if SANITIZER_LINUX && !SANITIZER_ANDROID
 extern "C" void *__libc_malloc(uptr size);
-#  ifndef SANITIZER_GO
+#  if !SANITIZER_GO
 extern "C" void *__libc_memalign(uptr alignment, uptr size);
 #  endif
 extern "C" void *__libc_realloc(void *ptr, uptr size);
@@ -30,7 +32,7 @@
 # else
 #  include <stdlib.h>
 #  define __libc_malloc malloc
-#  ifndef SANITIZER_GO
+#  if !SANITIZER_GO
 static void *__libc_memalign(uptr alignment, uptr size) {
   void *p;
   uptr error = posix_memalign(&p, alignment, size);
@@ -45,7 +47,7 @@
 static void *RawInternalAlloc(uptr size, InternalAllocatorCache *cache,
                               uptr alignment) {
   (void)cache;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   if (alignment == 0)
     return __libc_malloc(size);
   else
@@ -76,7 +78,7 @@
   return 0;
 }
 
-#else  // defined(SANITIZER_GO) || defined(SANITIZER_USE_MALLOC)
+#else  // SANITIZER_GO || defined(SANITIZER_USE_MALLOC)
 
 static ALIGNED(64) char internal_alloc_placeholder[sizeof(InternalAllocator)];
 static atomic_uint8_t internal_allocator_initialized;
@@ -92,7 +94,8 @@
     SpinMutexLock l(&internal_alloc_init_mu);
     if (atomic_load(&internal_allocator_initialized, memory_order_relaxed) ==
         0) {
-      internal_allocator_instance->Init(/* may_return_null*/ false);
+      internal_allocator_instance->Init(
+          /* may_return_null */ false, kReleaseToOSIntervalNever);
       atomic_store(&internal_allocator_initialized, 1, memory_order_release);
     }
   }
@@ -129,7 +132,7 @@
   internal_allocator()->Deallocate(cache, ptr);
 }
 
-#endif  // defined(SANITIZER_GO) || defined(SANITIZER_USE_MALLOC)
+#endif  // SANITIZER_GO || defined(SANITIZER_USE_MALLOC)
 
 const u64 kBlockMagic = 0x6A6CB03ABCEBC041ull;
 
@@ -159,7 +162,7 @@
 
 void *InternalCalloc(uptr count, uptr size, InternalAllocatorCache *cache) {
   if (CallocShouldReturnNullDueToOverflow(count, size))
-    return internal_allocator()->ReturnNullOrDie();
+    return internal_allocator()->ReturnNullOrDieOnBadRequest();
   void *p = InternalAlloc(count * size, cache);
   if (p) internal_memset(p, 0, count * size);
   return p;
@@ -206,7 +209,12 @@
   return (max / size) < n;
 }
 
-void NORETURN ReportAllocatorCannotReturnNull() {
+static atomic_uint8_t reporting_out_of_memory = {0};
+
+bool IsReportingOOM() { return atomic_load_relaxed(&reporting_out_of_memory); }
+
+void NORETURN ReportAllocatorCannotReturnNull(bool out_of_memory) {
+  if (out_of_memory) atomic_store_relaxed(&reporting_out_of_memory, 1);
   Report("%s's allocator is terminating the process instead of returning 0\n",
          SanitizerToolName);
   Report("If you don't like this behavior set allocator_may_return_null=1\n");
diff --git a/lib/sanitizer_common/sanitizer_allocator.h b/lib/sanitizer_common/sanitizer_allocator.h
index da74350..9a37a2f 100644
--- a/lib/sanitizer_common/sanitizer_allocator.h
+++ b/lib/sanitizer_common/sanitizer_allocator.h
@@ -24,8 +24,13 @@
 
 namespace __sanitizer {
 
+// Returns true if ReportAllocatorCannotReturnNull(true) was called.
+// Can be use to avoid memory hungry operations.
+bool IsReportingOOM();
+
 // Prints error message and kills the program.
-void NORETURN ReportAllocatorCannotReturnNull();
+void NORETURN ReportAllocatorCannotReturnNull(bool out_of_memory);
+
 // Allocators call these callbacks on mmap/munmap.
 struct NoOpMapUnmapCallback {
   void OnMap(uptr p, uptr size) const { }
diff --git a/lib/sanitizer_common/sanitizer_allocator_combined.h b/lib/sanitizer_common/sanitizer_allocator_combined.h
index dceb64b..19e1ae9 100644
--- a/lib/sanitizer_common/sanitizer_allocator_combined.h
+++ b/lib/sanitizer_common/sanitizer_allocator_combined.h
@@ -24,21 +24,22 @@
           class SecondaryAllocator>  // NOLINT
 class CombinedAllocator {
  public:
-  void InitCommon(bool may_return_null) {
-    primary_.Init();
+  void InitCommon(bool may_return_null, s32 release_to_os_interval_ms) {
+    primary_.Init(release_to_os_interval_ms);
     atomic_store(&may_return_null_, may_return_null, memory_order_relaxed);
   }
 
-  void InitLinkerInitialized(bool may_return_null) {
+  void InitLinkerInitialized(
+      bool may_return_null, s32 release_to_os_interval_ms) {
     secondary_.InitLinkerInitialized(may_return_null);
     stats_.InitLinkerInitialized();
-    InitCommon(may_return_null);
+    InitCommon(may_return_null, release_to_os_interval_ms);
   }
 
-  void Init(bool may_return_null) {
+  void Init(bool may_return_null, s32 release_to_os_interval_ms) {
     secondary_.Init(may_return_null);
     stats_.Init();
-    InitCommon(may_return_null);
+    InitCommon(may_return_null, release_to_os_interval_ms);
   }
 
   void *Allocate(AllocatorCache *cache, uptr size, uptr alignment,
@@ -46,20 +47,32 @@
     // Returning 0 on malloc(0) may break a lot of code.
     if (size == 0)
       size = 1;
-    if (size + alignment < size)
-      return ReturnNullOrDie();
-    if (check_rss_limit && RssLimitIsExceeded())
-      return ReturnNullOrDie();
+    if (size + alignment < size) return ReturnNullOrDieOnBadRequest();
+    if (check_rss_limit && RssLimitIsExceeded()) return ReturnNullOrDieOnOOM();
+    uptr original_size = size;
+    // If alignment requirements are to be fulfilled by the frontend allocator
+    // rather than by the primary or secondary, passing an alignment lower than
+    // or equal to 8 will prevent any further rounding up, as well as the later
+    // alignment check.
     if (alignment > 8)
       size = RoundUpTo(size, alignment);
     void *res;
     bool from_primary = primary_.CanAllocate(size, alignment);
+    // The primary allocator should return a 2^x aligned allocation when
+    // requested 2^x bytes, hence using the rounded up 'size' when being
+    // serviced by the primary (this is no longer true when the primary is
+    // using a non-fixed base address). The secondary takes care of the
+    // alignment without such requirement, and allocating 'size' would use
+    // extraneous memory, so we employ 'original_size'.
     if (from_primary)
       res = cache->Allocate(&primary_, primary_.ClassID(size));
     else
-      res = secondary_.Allocate(&stats_, size, alignment);
+      res = secondary_.Allocate(&stats_, original_size, alignment);
     if (alignment > 8)
       CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0);
+    // When serviced by the secondary, the chunk comes from a mmap allocation
+    // and will be zero'd out anyway. We only need to clear our the chunk if
+    // it was serviced by the primary, hence using the rounded up 'size'.
     if (cleared && res && from_primary)
       internal_bzero_aligned16(res, RoundUpTo(size, 16));
     return res;
@@ -69,10 +82,15 @@
     return atomic_load(&may_return_null_, memory_order_acquire);
   }
 
-  void *ReturnNullOrDie() {
+  void *ReturnNullOrDieOnBadRequest() {
     if (MayReturnNull())
       return nullptr;
-    ReportAllocatorCannotReturnNull();
+    ReportAllocatorCannotReturnNull(false);
+  }
+
+  void *ReturnNullOrDieOnOOM() {
+    if (MayReturnNull()) return nullptr;
+    ReportAllocatorCannotReturnNull(true);
   }
 
   void SetMayReturnNull(bool may_return_null) {
@@ -80,6 +98,14 @@
     atomic_store(&may_return_null_, may_return_null, memory_order_release);
   }
 
+  s32 ReleaseToOSIntervalMs() const {
+    return primary_.ReleaseToOSIntervalMs();
+  }
+
+  void SetReleaseToOSIntervalMs(s32 release_to_os_interval_ms) {
+    primary_.SetReleaseToOSIntervalMs(release_to_os_interval_ms);
+  }
+
   bool RssLimitIsExceeded() {
     return atomic_load(&rss_limit_is_exceeded_, memory_order_acquire);
   }
diff --git a/lib/sanitizer_common/sanitizer_allocator_interface.h b/lib/sanitizer_common/sanitizer_allocator_interface.h
index 797c38a..5ff6edb 100644
--- a/lib/sanitizer_common/sanitizer_allocator_interface.h
+++ b/lib/sanitizer_common/sanitizer_allocator_interface.h
@@ -37,6 +37,10 @@
     /* OPTIONAL */ void __sanitizer_malloc_hook(void *ptr, uptr size);
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
     /* OPTIONAL */ void __sanitizer_free_hook(void *ptr);
+
+
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+    void __sanitizer_print_memory_profile(int top_percent);
 }  // extern "C"
 
 #endif  // SANITIZER_ALLOCATOR_INTERFACE_H
diff --git a/lib/sanitizer_common/sanitizer_allocator_local_cache.h b/lib/sanitizer_common/sanitizer_allocator_local_cache.h
index 0ad22ba..e1172e0 100644
--- a/lib/sanitizer_common/sanitizer_allocator_local_cache.h
+++ b/lib/sanitizer_common/sanitizer_allocator_local_cache.h
@@ -18,7 +18,110 @@
 // or SizeClassAllocator32. Since the typical use of this class is to have one
 // object per thread in TLS, is has to be POD.
 template<class SizeClassAllocator>
-struct SizeClassAllocatorLocalCache {
+struct SizeClassAllocatorLocalCache
+    : SizeClassAllocator::AllocatorCache {
+};
+
+// Cache used by SizeClassAllocator64.
+template <class SizeClassAllocator>
+struct SizeClassAllocator64LocalCache {
+  typedef SizeClassAllocator Allocator;
+  static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
+  typedef typename Allocator::SizeClassMapT SizeClassMap;
+  typedef typename Allocator::CompactPtrT CompactPtrT;
+
+  void Init(AllocatorGlobalStats *s) {
+    stats_.Init();
+    if (s)
+      s->Register(&stats_);
+  }
+
+  void Destroy(SizeClassAllocator *allocator, AllocatorGlobalStats *s) {
+    Drain(allocator);
+    if (s)
+      s->Unregister(&stats_);
+  }
+
+  void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
+    CHECK_NE(class_id, 0UL);
+    CHECK_LT(class_id, kNumClasses);
+    stats_.Add(AllocatorStatAllocated, Allocator::ClassIdToSize(class_id));
+    PerClass *c = &per_class_[class_id];
+    if (UNLIKELY(c->count == 0))
+      Refill(c, allocator, class_id);
+    CHECK_GT(c->count, 0);
+    CompactPtrT chunk = c->chunks[--c->count];
+    void *res = reinterpret_cast<void *>(allocator->CompactPtrToPointer(
+        allocator->GetRegionBeginBySizeClass(class_id), chunk));
+    return res;
+  }
+
+  void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) {
+    CHECK_NE(class_id, 0UL);
+    CHECK_LT(class_id, kNumClasses);
+    // If the first allocator call on a new thread is a deallocation, then
+    // max_count will be zero, leading to check failure.
+    InitCache();
+    stats_.Sub(AllocatorStatAllocated, Allocator::ClassIdToSize(class_id));
+    PerClass *c = &per_class_[class_id];
+    CHECK_NE(c->max_count, 0UL);
+    if (UNLIKELY(c->count == c->max_count))
+      Drain(c, allocator, class_id, c->max_count / 2);
+    CompactPtrT chunk = allocator->PointerToCompactPtr(
+        allocator->GetRegionBeginBySizeClass(class_id),
+        reinterpret_cast<uptr>(p));
+    c->chunks[c->count++] = chunk;
+  }
+
+  void Drain(SizeClassAllocator *allocator) {
+    for (uptr class_id = 0; class_id < kNumClasses; class_id++) {
+      PerClass *c = &per_class_[class_id];
+      while (c->count > 0)
+        Drain(c, allocator, class_id, c->count);
+    }
+  }
+
+  // private:
+  struct PerClass {
+    u32 count;
+    u32 max_count;
+    CompactPtrT chunks[2 * SizeClassMap::kMaxNumCachedHint];
+  };
+  PerClass per_class_[kNumClasses];
+  AllocatorStats stats_;
+
+  void InitCache() {
+    if (per_class_[1].max_count)
+      return;
+    for (uptr i = 0; i < kNumClasses; i++) {
+      PerClass *c = &per_class_[i];
+      c->max_count = 2 * SizeClassMap::MaxCachedHint(i);
+    }
+  }
+
+  NOINLINE void Refill(PerClass *c, SizeClassAllocator *allocator,
+                       uptr class_id) {
+    InitCache();
+    uptr num_requested_chunks = SizeClassMap::MaxCachedHint(class_id);
+    allocator->GetFromAllocator(&stats_, class_id, c->chunks,
+                                num_requested_chunks);
+    c->count = num_requested_chunks;
+  }
+
+  NOINLINE void Drain(PerClass *c, SizeClassAllocator *allocator, uptr class_id,
+                      uptr count) {
+    InitCache();
+    CHECK_GE(c->count, count);
+    uptr first_idx_to_drain = c->count - count;
+    c->count -= count;
+    allocator->ReturnToAllocator(&stats_, class_id,
+                                 &c->chunks[first_idx_to_drain], count);
+  }
+};
+
+// Cache used by SizeClassAllocator32.
+template <class SizeClassAllocator>
+struct SizeClassAllocator32LocalCache {
   typedef SizeClassAllocator Allocator;
   typedef typename Allocator::TransferBatch TransferBatch;
   static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
@@ -93,21 +196,7 @@
   // For small size classes we allocate batches separately.
   // For large size classes we may use one of the chunks to store the batch.
   // sizeof(TransferBatch) must be a power of 2 for more efficient allocation.
-
-  // If kUseSeparateSizeClassForBatch is true,
-  // all TransferBatch objects are allocated from kBatchClassID
-  // size class (except for those that are needed for kBatchClassID itself).
-  // The goal is to have TransferBatches in a totally different region of RAM
-  // to improve security and allow more efficient RAM reclamation.
-  // This is experimental and may currently increase memory usage by up to 3%
-  // in extreme cases.
-  static const bool kUseSeparateSizeClassForBatch = false;
-
   static uptr SizeClassForTransferBatch(uptr class_id) {
-    if (kUseSeparateSizeClassForBatch)
-      return class_id == SizeClassMap::kBatchClassID
-                 ? 0
-                 : SizeClassMap::kBatchClassID;
     if (Allocator::ClassIdToSize(class_id) <
         TransferBatch::AllocationSizeRequiredForNElements(
             TransferBatch::MaxCached(class_id)))
@@ -157,3 +246,4 @@
     allocator->DeallocateBatch(&stats_, class_id, b);
   }
 };
+
diff --git a/lib/sanitizer_common/sanitizer_allocator_primary32.h b/lib/sanitizer_common/sanitizer_allocator_primary32.h
index e5cf56f..2882afd 100644
--- a/lib/sanitizer_common/sanitizer_allocator_primary32.h
+++ b/lib/sanitizer_common/sanitizer_allocator_primary32.h
@@ -14,6 +14,8 @@
 #error This file must be included inside sanitizer_allocator.h
 #endif
 
+template<class SizeClassAllocator> struct SizeClassAllocator32LocalCache;
+
 // SizeClassAllocator32 -- allocator for 32-bit address space.
 // This allocator can theoretically be used on 64-bit arch, but there it is less
 // efficient than SizeClassAllocator64.
@@ -81,20 +83,26 @@
                  SizeClassMap::kMaxNumCachedHint * sizeof(uptr));
 
   static uptr ClassIdToSize(uptr class_id) {
-    return class_id == SizeClassMap::kBatchClassID
-               ? sizeof(TransferBatch)
-               : SizeClassMap::Size(class_id);
+    return SizeClassMap::Size(class_id);
   }
 
   typedef SizeClassAllocator32<kSpaceBeg, kSpaceSize, kMetadataSize,
       SizeClassMap, kRegionSizeLog, ByteMap, MapUnmapCallback> ThisT;
-  typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
+  typedef SizeClassAllocator32LocalCache<ThisT> AllocatorCache;
 
-  void Init() {
+  void Init(s32 release_to_os_interval_ms) {
     possible_regions.TestOnlyInit();
     internal_memset(size_class_info_array, 0, sizeof(size_class_info_array));
   }
 
+  s32 ReleaseToOSIntervalMs() const {
+    return kReleaseToOSIntervalNever;
+  }
+
+  void SetReleaseToOSIntervalMs(s32 release_to_os_interval_ms) {
+    // This is empty here. Currently only implemented in 64-bit allocator.
+  }
+
   void *MapWithCallback(uptr size) {
     size = RoundUpTo(size, GetPageSizeCached());
     void *res = MmapOrDie(size, "SizeClassAllocator32");
diff --git a/lib/sanitizer_common/sanitizer_allocator_primary64.h b/lib/sanitizer_common/sanitizer_allocator_primary64.h
index 393a019..035d92b 100644
--- a/lib/sanitizer_common/sanitizer_allocator_primary64.h
+++ b/lib/sanitizer_common/sanitizer_allocator_primary64.h
@@ -14,7 +14,10 @@
 #error This file must be included inside sanitizer_allocator.h
 #endif
 
+template<class SizeClassAllocator> struct SizeClassAllocator64LocalCache;
+
 // SizeClassAllocator64 -- allocator for 64-bit address space.
+// The template parameter Params is a class containing the actual parameters.
 //
 // Space: a portion of address space of kSpaceSize bytes starting at SpaceBeg.
 // If kSpaceBeg is ~0 then SpaceBeg is chosen dynamically my mmap.
@@ -28,76 +31,45 @@
 //
 // UserChunk: a piece of memory returned to user.
 // MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk.
+
+// FreeArray is an array free-d chunks (stored as 4-byte offsets)
 //
 // A Region looks like this:
-// UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
-template <const uptr kSpaceBeg, const uptr kSpaceSize,
-          const uptr kMetadataSize, class SizeClassMap,
-          class MapUnmapCallback = NoOpMapUnmapCallback>
+// UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1 FreeArray
+
+struct SizeClassAllocator64FlagMasks {  //  Bit masks.
+  enum {
+    kRandomShuffleChunks = 1,
+  };
+};
+
+template <class Params>
 class SizeClassAllocator64 {
  public:
-  struct TransferBatch {
-    static const uptr kMaxNumCached = SizeClassMap::kMaxNumCachedHint - 4;
-    void SetFromRange(uptr region_beg, uptr beg_offset, uptr step, uptr count) {
-      count_ = count;
-      CHECK_LE(count_, kMaxNumCached);
-      region_beg_ = region_beg;
-      for (uptr i = 0; i < count; i++)
-        batch_[i] = static_cast<u32>((beg_offset + i * step) >> 4);
-    }
-    void SetFromArray(uptr region_beg, void *batch[], uptr count) {
-      count_ = count;
-      CHECK_LE(count_, kMaxNumCached);
-      region_beg_ = region_beg;
-      for (uptr i = 0; i < count; i++)
-        batch_[i] = static_cast<u32>(
-            ((reinterpret_cast<uptr>(batch[i])) - region_beg) >> 4);
-    }
-    void CopyToArray(void *to_batch[]) {
-      for (uptr i = 0, n = Count(); i < n; i++)
-        to_batch[i] = reinterpret_cast<void*>(Get(i));
-    }
-    uptr Count() const { return count_; }
+  static const uptr kSpaceBeg = Params::kSpaceBeg;
+  static const uptr kSpaceSize = Params::kSpaceSize;
+  static const uptr kMetadataSize = Params::kMetadataSize;
+  typedef typename Params::SizeClassMap SizeClassMap;
+  typedef typename Params::MapUnmapCallback MapUnmapCallback;
 
-    // How much memory do we need for a batch containing n elements.
-    static uptr AllocationSizeRequiredForNElements(uptr n) {
-      return sizeof(uptr) * 2 + sizeof(u32) * n;
-    }
-    static uptr MaxCached(uptr class_id) {
-      return Min(kMaxNumCached, SizeClassMap::MaxCachedHint(class_id));
-    }
+  static const bool kRandomShuffleChunks =
+      Params::kFlags & SizeClassAllocator64FlagMasks::kRandomShuffleChunks;
 
-    TransferBatch *next;
+  typedef SizeClassAllocator64<Params> ThisT;
+  typedef SizeClassAllocator64LocalCache<ThisT> AllocatorCache;
 
-   private:
-    uptr Get(uptr i) {
-      return region_beg_ + (static_cast<uptr>(batch_[i]) << 4);
-    }
-    // Instead of storing 64-bit pointers we store 32-bit offsets from the
-    // region start divided by 4. This imposes two limitations:
-    // * all allocations are 16-aligned,
-    // * regions are not larger than 2^36.
-    uptr region_beg_ : SANITIZER_WORDSIZE - 10;  // Region-beg is 4096-aligned.
-    uptr count_      : 10;
-    u32 batch_[kMaxNumCached];
-  };
-  static const uptr kBatchSize = sizeof(TransferBatch);
-  COMPILER_CHECK((kBatchSize & (kBatchSize - 1)) == 0);
-  COMPILER_CHECK(sizeof(TransferBatch) ==
-                 SizeClassMap::kMaxNumCachedHint * sizeof(u32));
-  COMPILER_CHECK(TransferBatch::kMaxNumCached < 1024);  // count_ uses 10 bits.
-
-  static uptr ClassIdToSize(uptr class_id) {
-    return class_id == SizeClassMap::kBatchClassID
-               ? sizeof(TransferBatch)
-               : SizeClassMap::Size(class_id);
+  // When we know the size class (the region base) we can represent a pointer
+  // as a 4-byte integer (offset from the region start shifted right by 4).
+  typedef u32 CompactPtrT;
+  static const uptr kCompactPtrScale = 4;
+  CompactPtrT PointerToCompactPtr(uptr base, uptr ptr) {
+    return static_cast<CompactPtrT>((ptr - base) >> kCompactPtrScale);
+  }
+  uptr CompactPtrToPointer(uptr base, CompactPtrT ptr32) {
+    return base + (static_cast<uptr>(ptr32) << kCompactPtrScale);
   }
 
-  typedef SizeClassAllocator64<kSpaceBeg, kSpaceSize, kMetadataSize,
-      SizeClassMap, MapUnmapCallback> ThisT;
-  typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
-
-  void Init() {
+  void Init(s32 release_to_os_interval_ms) {
     uptr TotalSpaceSize = kSpaceSize + AdditionalSize();
     if (kUsingConstantSpaceBeg) {
       CHECK_EQ(kSpaceBeg, reinterpret_cast<uptr>(
@@ -107,9 +79,19 @@
           reinterpret_cast<uptr>(MmapNoAccess(TotalSpaceSize));
       CHECK_NE(NonConstSpaceBeg, ~(uptr)0);
     }
+    SetReleaseToOSIntervalMs(release_to_os_interval_ms);
     MapWithCallback(SpaceEnd(), AdditionalSize());
   }
 
+  s32 ReleaseToOSIntervalMs() const {
+    return atomic_load(&release_to_os_interval_ms_, memory_order_relaxed);
+  }
+
+  void SetReleaseToOSIntervalMs(s32 release_to_os_interval_ms) {
+    atomic_store(&release_to_os_interval_ms_, release_to_os_interval_ms,
+                 memory_order_relaxed);
+  }
+
   void MapWithCallback(uptr beg, uptr size) {
     CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size)));
     MapUnmapCallback().OnMap(beg, size);
@@ -125,25 +107,44 @@
       alignment <= SizeClassMap::kMaxSize;
   }
 
-  NOINLINE TransferBatch *AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
-                                        uptr class_id) {
-    CHECK_LT(class_id, kNumClasses);
+  NOINLINE void ReturnToAllocator(AllocatorStats *stat, uptr class_id,
+                                  const CompactPtrT *chunks, uptr n_chunks) {
     RegionInfo *region = GetRegionInfo(class_id);
-    TransferBatch *b = region->free_list.Pop();
-    if (!b)
-      b = PopulateFreeList(stat, c, class_id, region);
-    region->n_allocated += b->Count();
-    return b;
+    uptr region_beg = GetRegionBeginBySizeClass(class_id);
+    CompactPtrT *free_array = GetFreeArray(region_beg);
+
+    BlockingMutexLock l(&region->mutex);
+    uptr old_num_chunks = region->num_freed_chunks;
+    uptr new_num_freed_chunks = old_num_chunks + n_chunks;
+    EnsureFreeArraySpace(region, region_beg, new_num_freed_chunks);
+    for (uptr i = 0; i < n_chunks; i++)
+      free_array[old_num_chunks + i] = chunks[i];
+    region->num_freed_chunks = new_num_freed_chunks;
+    region->n_freed += n_chunks;
+
+    MaybeReleaseToOS(class_id);
   }
 
-  NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id,
-                                TransferBatch *b) {
+  NOINLINE void GetFromAllocator(AllocatorStats *stat, uptr class_id,
+                                 CompactPtrT *chunks, uptr n_chunks) {
     RegionInfo *region = GetRegionInfo(class_id);
-    CHECK_GT(b->Count(), 0);
-    region->free_list.Push(b);
-    region->n_freed += b->Count();
+    uptr region_beg = GetRegionBeginBySizeClass(class_id);
+    CompactPtrT *free_array = GetFreeArray(region_beg);
+
+    BlockingMutexLock l(&region->mutex);
+    if (UNLIKELY(region->num_freed_chunks < n_chunks)) {
+      PopulateFreeArray(stat, class_id, region,
+                        n_chunks - region->num_freed_chunks);
+      CHECK_GE(region->num_freed_chunks, n_chunks);
+    }
+    region->num_freed_chunks -= n_chunks;
+    uptr base_idx = region->num_freed_chunks;
+    for (uptr i = 0; i < n_chunks; i++)
+      chunks[i] = free_array[base_idx + i];
+    region->n_allocated += n_chunks;
   }
 
+
   bool PointerIsMine(const void *p) {
     uptr P = reinterpret_cast<uptr>(p);
     if (kUsingConstantSpaceBeg && (kSpaceBeg % kSpaceSize) == 0)
@@ -196,8 +197,8 @@
     uptr class_id = GetSizeClass(p);
     uptr size = ClassIdToSize(class_id);
     uptr chunk_idx = GetChunkIdx(reinterpret_cast<uptr>(p), size);
-    return reinterpret_cast<void *>(SpaceBeg() +
-                                    (kRegionSize * (class_id + 1)) -
+    uptr region_beg = GetRegionBeginBySizeClass(class_id);
+    return reinterpret_cast<void *>(GetMetadataEnd(region_beg) -
                                     (1 + chunk_idx) * kMetadataSize);
   }
 
@@ -220,6 +221,20 @@
         stats[class_id] = rss;
   }
 
+  void PrintStats(uptr class_id, uptr rss) {
+    RegionInfo *region = GetRegionInfo(class_id);
+    if (region->mapped_user == 0) return;
+    uptr in_use = region->n_allocated - region->n_freed;
+    uptr avail_chunks = region->allocated_user / ClassIdToSize(class_id);
+    Printf(
+        "  %02zd (%6zd): mapped: %6zdK allocs: %7zd frees: %7zd inuse: %6zd "
+        "num_freed_chunks %7zd avail: %6zd rss: %6zdK releases: %6zd\n",
+        class_id, ClassIdToSize(class_id), region->mapped_user >> 10,
+        region->n_allocated, region->n_freed, in_use,
+        region->num_freed_chunks, avail_chunks, rss >> 10,
+        region->rtoi.num_releases);
+  }
+
   void PrintStats() {
     uptr total_mapped = 0;
     uptr n_allocated = 0;
@@ -237,21 +252,8 @@
     for (uptr class_id = 0; class_id < kNumClasses; class_id++)
       rss_stats[class_id] = SpaceBeg() + kRegionSize * class_id;
     GetMemoryProfile(FillMemoryProfile, rss_stats, kNumClasses);
-    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
-      RegionInfo *region = GetRegionInfo(class_id);
-      if (region->mapped_user == 0) continue;
-      uptr in_use = region->n_allocated - region->n_freed;
-      uptr avail_chunks = region->allocated_user / ClassIdToSize(class_id);
-      Printf("  %02zd (%zd): mapped: %zdK allocs: %zd frees: %zd inuse: %zd"
-             " avail: %zd rss: %zdK\n",
-             class_id,
-             ClassIdToSize(class_id),
-             region->mapped_user >> 10,
-             region->n_allocated,
-             region->n_freed,
-             in_use, avail_chunks,
-             rss_stats[class_id] >> 10);
-    }
+    for (uptr class_id = 1; class_id < kNumClasses; class_id++)
+      PrintStats(class_id, rss_stats[class_id]);
   }
 
   // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
@@ -284,6 +286,10 @@
     }
   }
 
+  static uptr ClassIdToSize(uptr class_id) {
+    return SizeClassMap::Size(class_id);
+  }
+
   static uptr AdditionalSize() {
     return RoundUpTo(sizeof(RegionInfo) * kNumClassesRounded,
                      GetPageSizeCached());
@@ -295,6 +301,11 @@
 
  private:
   static const uptr kRegionSize = kSpaceSize / kNumClassesRounded;
+  // FreeArray is the array of free-d chunks (stored as 4-byte offsets).
+  // In the worst case it may reguire kRegionSize/SizeClassMap::kMinSize
+  // elements, but in reality this will not happen. For simplicity we
+  // dedicate 1/8 of the region's virtual space to FreeArray.
+  static const uptr kFreeArraySize = kRegionSize / 8;
 
   static const bool kUsingConstantSpaceBeg = kSpaceBeg != ~(uptr)0;
   uptr NonConstSpaceBeg;
@@ -304,24 +315,49 @@
   uptr SpaceEnd() const { return  SpaceBeg() + kSpaceSize; }
   // kRegionSize must be >= 2^32.
   COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2)));
-  // kRegionSize must be <= 2^36, see TransferBatch.
+  // kRegionSize must be <= 2^36, see CompactPtrT.
   COMPILER_CHECK((kRegionSize) <= (1ULL << (SANITIZER_WORDSIZE / 2 + 4)));
   // Call mmap for user memory with at least this size.
   static const uptr kUserMapSize = 1 << 16;
   // Call mmap for metadata memory with at least this size.
   static const uptr kMetaMapSize = 1 << 16;
+  // Call mmap for free array memory with at least this size.
+  static const uptr kFreeArrayMapSize = 1 << 16;
+
+  atomic_sint32_t release_to_os_interval_ms_;
+
+  struct ReleaseToOsInfo {
+    uptr n_freed_at_last_release;
+    uptr num_releases;
+    u64 last_release_at_ns;
+  };
 
   struct RegionInfo {
     BlockingMutex mutex;
-    LFStack<TransferBatch> free_list;
+    uptr num_freed_chunks;  // Number of elements in the freearray.
+    uptr mapped_free_array;  // Bytes mapped for freearray.
     uptr allocated_user;  // Bytes allocated for user memory.
     uptr allocated_meta;  // Bytes allocated for metadata.
     uptr mapped_user;  // Bytes mapped for user memory.
     uptr mapped_meta;  // Bytes mapped for metadata.
+    u32 rand_state; // Seed for random shuffle, used if kRandomShuffleChunks.
     uptr n_allocated, n_freed;  // Just stats.
+    ReleaseToOsInfo rtoi;
   };
   COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize);
 
+  u32 Rand(u32 *state) {  // ANSI C linear congruential PRNG.
+    return (*state = *state * 1103515245 + 12345) >> 16;
+  }
+
+  u32 RandN(u32 *state, u32 n) { return Rand(state) % n; }  // [0, n)
+
+  void RandomShuffle(u32 *a, u32 n, u32 *rand_state) {
+    if (n <= 1) return;
+    for (u32 i = n - 1; i > 0; i--)
+      Swap(a[i], a[RandN(rand_state, i + 1)]);
+  }
+
   RegionInfo *GetRegionInfo(uptr class_id) {
     CHECK_LT(class_id, kNumClasses);
     RegionInfo *regions =
@@ -329,6 +365,10 @@
     return &regions[class_id];
   }
 
+  uptr GetMetadataEnd(uptr region_beg) {
+    return region_beg + kRegionSize - kFreeArraySize;
+  }
+
   uptr GetChunkIdx(uptr chunk, uptr size) {
     if (!kUsingConstantSpaceBeg)
       chunk -= SpaceBeg();
@@ -341,30 +381,61 @@
     return (u32)offset / (u32)size;
   }
 
-  NOINLINE TransferBatch *PopulateFreeList(AllocatorStats *stat,
-                                           AllocatorCache *c, uptr class_id,
-                                           RegionInfo *region) {
-    BlockingMutexLock l(&region->mutex);
-    TransferBatch *b = region->free_list.Pop();
-    if (b)
-      return b;
+  CompactPtrT *GetFreeArray(uptr region_beg) {
+    return reinterpret_cast<CompactPtrT *>(region_beg + kRegionSize -
+                                           kFreeArraySize);
+  }
+
+  void EnsureFreeArraySpace(RegionInfo *region, uptr region_beg,
+                            uptr num_freed_chunks) {
+    uptr needed_space = num_freed_chunks * sizeof(CompactPtrT);
+    if (region->mapped_free_array < needed_space) {
+      CHECK_LE(needed_space, kFreeArraySize);
+      uptr new_mapped_free_array = RoundUpTo(needed_space, kFreeArrayMapSize);
+      uptr current_map_end = reinterpret_cast<uptr>(GetFreeArray(region_beg)) +
+                             region->mapped_free_array;
+      uptr new_map_size = new_mapped_free_array - region->mapped_free_array;
+      MapWithCallback(current_map_end, new_map_size);
+      region->mapped_free_array = new_mapped_free_array;
+    }
+  }
+
+
+  NOINLINE void PopulateFreeArray(AllocatorStats *stat, uptr class_id,
+                                  RegionInfo *region, uptr requested_count) {
+    // region->mutex is held.
     uptr size = ClassIdToSize(class_id);
-    uptr count = TransferBatch::MaxCached(class_id);
     uptr beg_idx = region->allocated_user;
-    uptr end_idx = beg_idx + count * size;
-    uptr region_beg = SpaceBeg() + kRegionSize * class_id;
-    if (end_idx + size > region->mapped_user) {
+    uptr end_idx = beg_idx + requested_count * size;
+    uptr region_beg = GetRegionBeginBySizeClass(class_id);
+    if (end_idx > region->mapped_user) {
+      if (!kUsingConstantSpaceBeg && region->mapped_user == 0)
+        region->rand_state = static_cast<u32>(region_beg >> 12);  // From ASLR.
       // Do the mmap for the user memory.
       uptr map_size = kUserMapSize;
-      while (end_idx + size > region->mapped_user + map_size)
+      while (end_idx > region->mapped_user + map_size)
         map_size += kUserMapSize;
       CHECK_GE(region->mapped_user + map_size, end_idx);
       MapWithCallback(region_beg + region->mapped_user, map_size);
       stat->Add(AllocatorStatMapped, map_size);
       region->mapped_user += map_size;
     }
-    uptr total_count = (region->mapped_user - beg_idx - size)
-        / size / count * count;
+    CompactPtrT *free_array = GetFreeArray(region_beg);
+    uptr total_count = (region->mapped_user - beg_idx) / size;
+    uptr num_freed_chunks = region->num_freed_chunks;
+    EnsureFreeArraySpace(region, region_beg, num_freed_chunks + total_count);
+    for (uptr i = 0; i < total_count; i++) {
+      uptr chunk = beg_idx + i * size;
+      free_array[num_freed_chunks + total_count - 1 - i] =
+          PointerToCompactPtr(0, chunk);
+    }
+    if (kRandomShuffleChunks)
+      RandomShuffle(&free_array[num_freed_chunks], total_count,
+                    &region->rand_state);
+    region->num_freed_chunks += total_count;
+    region->allocated_user += total_count * size;
+    CHECK_LE(region->allocated_user, region->mapped_user);
+
     region->allocated_meta += total_count * kMetadataSize;
     if (region->allocated_meta > region->mapped_meta) {
       uptr map_size = kMetaMapSize;
@@ -372,30 +443,78 @@
         map_size += kMetaMapSize;
       // Do the mmap for the metadata.
       CHECK_GE(region->mapped_meta + map_size, region->allocated_meta);
-      MapWithCallback(region_beg + kRegionSize -
+      MapWithCallback(GetMetadataEnd(region_beg) -
                       region->mapped_meta - map_size, map_size);
       region->mapped_meta += map_size;
     }
     CHECK_LE(region->allocated_meta, region->mapped_meta);
-    if (region->mapped_user + region->mapped_meta > kRegionSize) {
+    if (region->mapped_user + region->mapped_meta >
+        kRegionSize - kFreeArraySize) {
       Printf("%s: Out of memory. Dying. ", SanitizerToolName);
       Printf("The process has exhausted %zuMB for size class %zu.\n",
           kRegionSize / 1024 / 1024, size);
       Die();
     }
-    for (;;) {
-      b = c->CreateBatch(class_id, this,
-                         (TransferBatch *)(region_beg + beg_idx));
-      b->SetFromRange(region_beg, beg_idx, size, count);
-      region->allocated_user += count * size;
-      CHECK_LE(region->allocated_user, region->mapped_user);
-      beg_idx += count * size;
-      if (beg_idx + count * size + size > region->mapped_user)
-        break;
-      CHECK_GT(b->Count(), 0);
-      region->free_list.Push(b);
+  }
+
+  void MaybeReleaseChunkRange(uptr region_beg, uptr chunk_size,
+                              CompactPtrT first, CompactPtrT last) {
+    uptr beg_ptr = CompactPtrToPointer(region_beg, first);
+    uptr end_ptr = CompactPtrToPointer(region_beg, last) + chunk_size;
+    ReleaseMemoryPagesToOS(beg_ptr, end_ptr);
+  }
+
+  // Attempts to release some RAM back to OS. The region is expected to be
+  // locked.
+  // Algorithm:
+  // * Sort the chunks.
+  // * Find ranges fully covered by free-d chunks
+  // * Release them to OS with madvise.
+  void MaybeReleaseToOS(uptr class_id) {
+    RegionInfo *region = GetRegionInfo(class_id);
+    const uptr chunk_size = ClassIdToSize(class_id);
+    const uptr page_size = GetPageSizeCached();
+
+    uptr n = region->num_freed_chunks;
+    if (n * chunk_size < page_size)
+      return;  // No chance to release anything.
+    if ((region->n_freed - region->rtoi.n_freed_at_last_release) * chunk_size <
+        page_size) {
+      return;  // Nothing new to release.
     }
-    return b;
+
+    s32 interval_ms = ReleaseToOSIntervalMs();
+    if (interval_ms < 0)
+      return;
+
+    u64 now_ns = NanoTime();
+    if (region->rtoi.last_release_at_ns + interval_ms * 1000000ULL > now_ns)
+      return;  // Memory was returned recently.
+    region->rtoi.last_release_at_ns = now_ns;
+
+    uptr region_beg = GetRegionBeginBySizeClass(class_id);
+    CompactPtrT *free_array = GetFreeArray(region_beg);
+    SortArray(free_array, n);
+
+    const uptr scaled_chunk_size = chunk_size >> kCompactPtrScale;
+    const uptr kScaledGranularity = page_size >> kCompactPtrScale;
+
+    uptr range_beg = free_array[0];
+    uptr prev = free_array[0];
+    for (uptr i = 1; i < n; i++) {
+      uptr chunk = free_array[i];
+      CHECK_GT(chunk, prev);
+      if (chunk - prev != scaled_chunk_size) {
+        CHECK_GT(chunk - prev, scaled_chunk_size);
+        if (prev + scaled_chunk_size - range_beg >= kScaledGranularity) {
+          MaybeReleaseChunkRange(region_beg, chunk_size, range_beg, prev);
+          region->rtoi.n_freed_at_last_release = region->n_freed;
+          region->rtoi.num_releases++;
+        }
+        range_beg = chunk;
+      }
+      prev = chunk;
+    }
   }
 };
 
diff --git a/lib/sanitizer_common/sanitizer_allocator_secondary.h b/lib/sanitizer_common/sanitizer_allocator_secondary.h
index 383eccf..2e98e59 100644
--- a/lib/sanitizer_common/sanitizer_allocator_secondary.h
+++ b/lib/sanitizer_common/sanitizer_allocator_secondary.h
@@ -36,8 +36,7 @@
     if (alignment > page_size_)
       map_size += alignment;
     // Overflow.
-    if (map_size < size)
-      return ReturnNullOrDie();
+    if (map_size < size) return ReturnNullOrDieOnBadRequest();
     uptr map_beg = reinterpret_cast<uptr>(
         MmapOrDie(map_size, "LargeMmapAllocator"));
     CHECK(IsAligned(map_beg, page_size_));
@@ -73,10 +72,18 @@
     return reinterpret_cast<void*>(res);
   }
 
-  void *ReturnNullOrDie() {
-    if (atomic_load(&may_return_null_, memory_order_acquire))
-      return nullptr;
-    ReportAllocatorCannotReturnNull();
+  bool MayReturnNull() const {
+    return atomic_load(&may_return_null_, memory_order_acquire);
+  }
+
+  void *ReturnNullOrDieOnBadRequest() {
+    if (MayReturnNull()) return nullptr;
+    ReportAllocatorCannotReturnNull(false);
+  }
+
+  void *ReturnNullOrDieOnOOM() {
+    if (MayReturnNull()) return nullptr;
+    ReportAllocatorCannotReturnNull(true);
   }
 
   void SetMayReturnNull(bool may_return_null) {
@@ -154,6 +161,14 @@
     return GetUser(h);
   }
 
+  void EnsureSortedChunks() {
+    if (chunks_sorted_) return;
+    SortArray(reinterpret_cast<uptr*>(chunks_), n_chunks_);
+    for (uptr i = 0; i < n_chunks_; i++)
+      chunks_[i]->chunk_idx = i;
+    chunks_sorted_ = true;
+  }
+
   // This function does the same as GetBlockBegin, but is much faster.
   // Must be called with the allocator locked.
   void *GetBlockBeginFastLocked(void *ptr) {
@@ -161,16 +176,10 @@
     uptr p = reinterpret_cast<uptr>(ptr);
     uptr n = n_chunks_;
     if (!n) return nullptr;
-    if (!chunks_sorted_) {
-      // Do one-time sort. chunks_sorted_ is reset in Allocate/Deallocate.
-      SortArray(reinterpret_cast<uptr*>(chunks_), n);
-      for (uptr i = 0; i < n; i++)
-        chunks_[i]->chunk_idx = i;
-      chunks_sorted_ = true;
-      min_mmap_ = reinterpret_cast<uptr>(chunks_[0]);
-      max_mmap_ = reinterpret_cast<uptr>(chunks_[n - 1]) +
-          chunks_[n - 1]->map_size;
-    }
+    EnsureSortedChunks();
+    auto min_mmap_ = reinterpret_cast<uptr>(chunks_[0]);
+    auto max_mmap_ =
+        reinterpret_cast<uptr>(chunks_[n - 1]) + chunks_[n - 1]->map_size;
     if (p < min_mmap_ || p >= max_mmap_)
       return nullptr;
     uptr beg = 0, end = n - 1;
@@ -223,8 +232,14 @@
   // Iterate over all existing chunks.
   // The allocator must be locked when calling this function.
   void ForEachChunk(ForEachChunkCallback callback, void *arg) {
-    for (uptr i = 0; i < n_chunks_; i++)
+    EnsureSortedChunks();  // Avoid doing the sort while iterating.
+    for (uptr i = 0; i < n_chunks_; i++) {
+      auto t = chunks_[i];
       callback(reinterpret_cast<uptr>(GetUser(chunks_[i])), arg);
+      // Consistency check: verify that the array did not change.
+      CHECK_EQ(chunks_[i], t);
+      CHECK_EQ(chunks_[i]->chunk_idx, i);
+    }
   }
 
  private:
@@ -256,7 +271,6 @@
   uptr page_size_;
   Header *chunks_[kMaxNumChunks];
   uptr n_chunks_;
-  uptr min_mmap_, max_mmap_;
   bool chunks_sorted_;
   struct Stats {
     uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64];
diff --git a/lib/sanitizer_common/sanitizer_allocator_size_class_map.h b/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
index b8917a4..7151a46 100644
--- a/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
+++ b/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
@@ -15,7 +15,18 @@
 #endif
 
 // SizeClassMap maps allocation sizes into size classes and back.
-// Class 0 corresponds to size 0.
+// Class 0 always corresponds to size 0.
+// The other sizes are controlled by the template parameters:
+//   kMinSizeLog: defines the class 1    as 2^kMinSizeLog.
+//   kMaxSizeLog: defines the last class as 2^kMaxSizeLog.
+//   kMidSizeLog: the classes starting from 1 increase with step
+//                2^kMinSizeLog until 2^kMidSizeLog.
+//   kNumBits: the number of non-zero bits in sizes after 2^kMidSizeLog.
+//             E.g. with kNumBits==3 all size classes after 2^kMidSizeLog
+//             look like 0b1xx0..0, where x is either 0 or 1.
+//
+// Example: kNumBits=3, kMidSizeLog=4, kMidSizeLog=8, kMaxSizeLog=17:
+//
 // Classes 1 - 16 correspond to sizes 16 to 256 (size = class_id * 16).
 // Next 4 classes: 256 + i * 64  (i = 1 to 4).
 // Next 4 classes: 512 + i * 128 (i = 1 to 4).
@@ -25,7 +36,7 @@
 //
 // This structure of the size class map gives us:
 //   - Efficient table-free class-to-size and size-to-class functions.
-//   - Difference between two consequent size classes is betweed 14% and 25%
+//   - Difference between two consequent size classes is between 14% and 25%
 //
 // This class also gives a hint to a thread-caching allocator about the amount
 // of chunks that need to be cached per-thread:
@@ -33,9 +44,6 @@
 //    The actual number is computed in TransferBatch.
 //  - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
 //
-// There is one extra size class kBatchClassID that is used for allocating
-// objects of TransferBatch type when kUseSeparateSizeClassForBatch is true.
-//
 // Part of output of SizeClassMap::Print():
 // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
 // c01 => s: 16 diff: +16 00% l 4 cached: 256 4096; id 1
@@ -78,15 +86,44 @@
 // c51 => s: 114688 diff: +16384 16% l 16 cached: 1 114688; id 51
 //
 // c52 => s: 131072 diff: +16384 14% l 17 cached: 1 131072; id 52
+//
+//
+// Another example (kNumBits=2):
+// c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
+// c01 => s: 32 diff: +32 00% l 5 cached: 64 2048; id 1
+// c02 => s: 64 diff: +32 100% l 6 cached: 64 4096; id 2
+// c03 => s: 96 diff: +32 50% l 6 cached: 64 6144; id 3
+// c04 => s: 128 diff: +32 33% l 7 cached: 64 8192; id 4
+// c05 => s: 160 diff: +32 25% l 7 cached: 64 10240; id 5
+// c06 => s: 192 diff: +32 20% l 7 cached: 64 12288; id 6
+// c07 => s: 224 diff: +32 16% l 7 cached: 64 14336; id 7
+// c08 => s: 256 diff: +32 14% l 8 cached: 64 16384; id 8
+// c09 => s: 384 diff: +128 50% l 8 cached: 42 16128; id 9
+// c10 => s: 512 diff: +128 33% l 9 cached: 32 16384; id 10
+// c11 => s: 768 diff: +256 50% l 9 cached: 21 16128; id 11
+// c12 => s: 1024 diff: +256 33% l 10 cached: 16 16384; id 12
+// c13 => s: 1536 diff: +512 50% l 10 cached: 10 15360; id 13
+// c14 => s: 2048 diff: +512 33% l 11 cached: 8 16384; id 14
+// c15 => s: 3072 diff: +1024 50% l 11 cached: 5 15360; id 15
+// c16 => s: 4096 diff: +1024 33% l 12 cached: 4 16384; id 16
+// c17 => s: 6144 diff: +2048 50% l 12 cached: 2 12288; id 17
+// c18 => s: 8192 diff: +2048 33% l 13 cached: 2 16384; id 18
+// c19 => s: 12288 diff: +4096 50% l 13 cached: 1 12288; id 19
+// c20 => s: 16384 diff: +4096 33% l 14 cached: 1 16384; id 20
+// c21 => s: 24576 diff: +8192 50% l 14 cached: 1 24576; id 21
+// c22 => s: 32768 diff: +8192 33% l 15 cached: 1 32768; id 22
+// c23 => s: 49152 diff: +16384 50% l 15 cached: 1 49152; id 23
+// c24 => s: 65536 diff: +16384 33% l 16 cached: 1 65536; id 24
+// c25 => s: 98304 diff: +32768 50% l 16 cached: 1 98304; id 25
+// c26 => s: 131072 diff: +32768 33% l 17 cached: 1 131072; id 26
 
-template <uptr kMaxSizeLog, uptr kMaxNumCachedHintT, uptr kMaxBytesCachedLog>
+template <uptr kNumBits, uptr kMinSizeLog, uptr kMidSizeLog, uptr kMaxSizeLog,
+          uptr kMaxNumCachedHintT, uptr kMaxBytesCachedLog>
 class SizeClassMap {
-  static const uptr kMinSizeLog = 4;
-  static const uptr kMidSizeLog = kMinSizeLog + 4;
   static const uptr kMinSize = 1 << kMinSizeLog;
   static const uptr kMidSize = 1 << kMidSizeLog;
   static const uptr kMidClass = kMidSize / kMinSize;
-  static const uptr S = 2;
+  static const uptr S = kNumBits - 1;
   static const uptr M = (1 << S) - 1;
 
  public:
@@ -97,20 +134,17 @@
 
   static const uptr kMaxSize = 1UL << kMaxSizeLog;
   static const uptr kNumClasses =
-      kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1 + 1;
-  static const uptr kBatchClassID = kNumClasses - 1;
+      kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1;
   static const uptr kLargestClassID = kNumClasses - 2;
-  COMPILER_CHECK(kNumClasses >= 32 && kNumClasses <= 256);
+  COMPILER_CHECK(kNumClasses >= 16 && kNumClasses <= 256);
   static const uptr kNumClassesRounded =
-      kNumClasses == 32  ? 32 :
+      kNumClasses <= 32  ? 32 :
       kNumClasses <= 64  ? 64 :
       kNumClasses <= 128 ? 128 : 256;
 
   static uptr Size(uptr class_id) {
     if (class_id <= kMidClass)
       return kMinSize * class_id;
-    // Should not pass kBatchClassID here, but we should avoid a CHECK.
-    if (class_id == kBatchClassID) return 0;
     class_id -= kMidClass;
     uptr t = kMidSize << (class_id >> S);
     return t + (t >> S) * (class_id & M);
@@ -129,11 +163,6 @@
 
   static uptr MaxCachedHint(uptr class_id) {
     if (class_id == 0) return 0;
-    // Estimate the result for kBatchClassID because this class
-    // does not know the exact size of TransferBatch.
-    // Moreover, we need to cache fewer batches than user chunks,
-    // so this number could be small.
-    if (class_id == kBatchClassID) return 8;
     uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
     return Max<uptr>(1, Min(kMaxNumCachedHint, n));
   }
@@ -149,8 +178,6 @@
       uptr p = prev_s ? (d * 100 / prev_s) : 0;
       uptr l = s ? MostSignificantSetBitIndex(s) : 0;
       uptr cached = MaxCachedHint(i) * s;
-      if (i == kBatchClassID)
-        d = l = p = 0;
       Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
              "cached: %zd %zd; id %zd\n",
              i, Size(i), d, p, l, MaxCachedHint(i), cached, ClassID(s));
@@ -162,12 +189,11 @@
 
   static void Validate() {
     for (uptr c = 1; c < kNumClasses; c++) {
-      if (c == kBatchClassID) continue;
       // Printf("Validate: c%zd\n", c);
       uptr s = Size(c);
       CHECK_NE(s, 0U);
       CHECK_EQ(ClassID(s), c);
-      if (c != kBatchClassID - 1 && c != kNumClasses - 1)
+      if (c != kNumClasses - 1)
         CHECK_EQ(ClassID(s + 1), c + 1);
       CHECK_EQ(ClassID(s - 1), c);
       if (c)
@@ -186,6 +212,6 @@
   }
 };
 
-typedef SizeClassMap<17, 128, 16> DefaultSizeClassMap;
-typedef SizeClassMap<17, 64,  14> CompactSizeClassMap;
-template<class SizeClassAllocator> struct SizeClassAllocatorLocalCache;
+typedef SizeClassMap<3, 4, 8, 17, 128, 16> DefaultSizeClassMap;
+typedef SizeClassMap<3, 4, 8, 17, 64, 14> CompactSizeClassMap;
+typedef SizeClassMap<2, 5, 9, 16, 64, 14> VeryCompactSizeClassMap;
diff --git a/lib/sanitizer_common/sanitizer_atomic.h b/lib/sanitizer_common/sanitizer_atomic.h
index b26693e..8f400ac 100644
--- a/lib/sanitizer_common/sanitizer_atomic.h
+++ b/lib/sanitizer_common/sanitizer_atomic.h
@@ -37,6 +37,11 @@
   volatile Type val_dont_use;
 };
 
+struct atomic_sint32_t {
+  typedef s32 Type;
+  volatile Type val_dont_use;
+};
+
 struct atomic_uint32_t {
   typedef u32 Type;
   volatile Type val_dont_use;
diff --git a/lib/sanitizer_common/sanitizer_common.cc b/lib/sanitizer_common/sanitizer_common.cc
index 0867d5a..9824a519 100644
--- a/lib/sanitizer_common/sanitizer_common.cc
+++ b/lib/sanitizer_common/sanitizer_common.cc
@@ -114,7 +114,7 @@
   Report("ERROR: %s failed to "
          "%s 0x%zx (%zd) bytes of %s (error code: %d)\n",
          SanitizerToolName, mmap_type, size, size, mem_type, err);
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   DumpProcessMap();
 #endif
   UNREACHABLE("unable to mmap");
@@ -157,6 +157,7 @@
 }
 
 typedef bool UptrComparisonFunction(const uptr &a, const uptr &b);
+typedef bool U32ComparisonFunction(const u32 &a, const u32 &b);
 
 template<class T>
 static inline bool CompareLess(const T &a, const T &b) {
@@ -167,6 +168,10 @@
   InternalSort<uptr*, UptrComparisonFunction>(&array, size, CompareLess);
 }
 
+void SortArray(u32 *array, uptr size) {
+  InternalSort<u32*, U32ComparisonFunction>(&array, size, CompareLess);
+}
+
 const char *StripPathPrefix(const char *filepath,
                             const char *strip_path_prefix) {
   if (!filepath) return nullptr;
@@ -202,7 +207,7 @@
   __sanitizer_report_error_summary(buff.data());
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 void ReportErrorSummary(const char *error_type, const AddressInfo &info) {
   if (!common_flags()->print_summary)
     return;
@@ -499,4 +504,11 @@
                                               void (*free_hook)(const void *)) {
   return InstallMallocFreeHooks(malloc_hook, free_hook);
 }
+
+#if !SANITIZER_GO && !SANITIZER_SUPPORTS_WEAK_HOOKS
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_print_memory_profile(int top_percent) {
+  (void)top_percent;
+}
+#endif
 } // extern "C"
diff --git a/lib/sanitizer_common/sanitizer_common.h b/lib/sanitizer_common/sanitizer_common.h
index 4a4c54e..2dabb50 100644
--- a/lib/sanitizer_common/sanitizer_common.h
+++ b/lib/sanitizer_common/sanitizer_common.h
@@ -103,7 +103,9 @@
 
 // Used to check if we can map shadow memory to a fixed location.
 bool MemoryRangeIsAvailable(uptr range_start, uptr range_end);
-void FlushUnneededShadowMemory(uptr addr, uptr size);
+// Releases memory pages entirely within the [beg, end] address range. Noop if
+// the provided range does not contain at least one entire page.
+void ReleaseMemoryPagesToOS(uptr beg, uptr end);
 void IncreaseTotalMmap(uptr size);
 void DecreaseTotalMmap(uptr size);
 uptr GetRSS();
@@ -118,16 +120,14 @@
 // keep frame size low.
 // FIXME: use InternalAlloc instead of MmapOrDie once
 // InternalAlloc is made libc-free.
-template<typename T>
+template <typename T>
 class InternalScopedBuffer {
  public:
   explicit InternalScopedBuffer(uptr cnt) {
     cnt_ = cnt;
-    ptr_ = (T*)MmapOrDie(cnt * sizeof(T), "InternalScopedBuffer");
+    ptr_ = (T *)MmapOrDie(cnt * sizeof(T), "InternalScopedBuffer");
   }
-  ~InternalScopedBuffer() {
-    UnmapOrDie(ptr_, cnt_ * sizeof(T));
-  }
+  ~InternalScopedBuffer() { UnmapOrDie(ptr_, cnt_ * sizeof(T)); }
   T &operator[](uptr i) { return ptr_[i]; }
   T *data() { return ptr_; }
   uptr size() { return cnt_ * sizeof(T); }
@@ -135,9 +135,11 @@
  private:
   T *ptr_;
   uptr cnt_;
-  // Disallow evil constructors.
-  InternalScopedBuffer(const InternalScopedBuffer&);
-  void operator=(const InternalScopedBuffer&);
+  // Disallow copies and moves.
+  InternalScopedBuffer(const InternalScopedBuffer &) = delete;
+  InternalScopedBuffer &operator=(const InternalScopedBuffer &) = delete;
+  InternalScopedBuffer(InternalScopedBuffer &&) = delete;
+  InternalScopedBuffer &operator=(InternalScopedBuffer &&) = delete;
 };
 
 class InternalScopedString : public InternalScopedBuffer<char> {
@@ -334,6 +336,7 @@
 u64 NanoTime();
 int Atexit(void (*function)(void));
 void SortArray(uptr *array, uptr size);
+void SortArray(u32 *array, uptr size);
 bool TemplateMatch(const char *templ, const char *str);
 
 // Exit
@@ -450,8 +453,8 @@
   if (IsPowerOfTwo(size)) return size;
 
   uptr up = MostSignificantSetBitIndex(size);
-  CHECK(size < (1ULL << (up + 1)));
-  CHECK(size > (1ULL << up));
+  CHECK_LT(size, (1ULL << (up + 1)));
+  CHECK_GT(size, (1ULL << up));
   return 1ULL << (up + 1);
 }
 
@@ -545,6 +548,13 @@
   uptr capacity() const {
     return capacity_;
   }
+  void resize(uptr new_size) {
+    Resize(new_size);
+    if (new_size > size_) {
+      internal_memset(&data_[size_], 0, sizeof(T) * (new_size - size_));
+    }
+    size_ = new_size;
+  }
 
   void clear() { size_ = 0; }
   bool empty() const { return size() == 0; }
@@ -629,20 +639,19 @@
   }
 }
 
-template<class Container, class Value, class Compare>
-uptr InternalBinarySearch(const Container &v, uptr first, uptr last,
-                          const Value &val, Compare comp) {
-  uptr not_found = last + 1;
-  while (last >= first) {
+// Works like std::lower_bound: finds the first element that is not less
+// than the val.
+template <class Container, class Value, class Compare>
+uptr InternalLowerBound(const Container &v, uptr first, uptr last,
+                        const Value &val, Compare comp) {
+  while (last > first) {
     uptr mid = (first + last) / 2;
     if (comp(v[mid], val))
       first = mid + 1;
-    else if (comp(val, v[mid]))
-      last = mid - 1;
     else
-      return mid;
+      last = mid;
   }
-  return not_found;
+  return first;
 }
 
 enum ModuleArch {
@@ -681,6 +690,7 @@
       return "arm64";
   }
   CHECK(0 && "Invalid module arch");
+  return "";
 }
 
 const uptr kModuleUUIDSize = 16;
@@ -896,6 +906,10 @@
   uptr allocated;
 };
 
+// The default value for allocator_release_to_os_interval_ms common flag to
+// indicate that sanitizer allocator should not attempt to release memory to OS.
+const s32 kReleaseToOSIntervalNever = -1;
+
 }  // namespace __sanitizer
 
 inline void *operator new(__sanitizer::operator_new_size_type size,
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors.inc b/lib/sanitizer_common/sanitizer_common_interceptors.inc
index f9ccca3..ca571d1 100644
--- a/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -30,6 +30,9 @@
 //   COMMON_INTERCEPTOR_SET_PTHREAD_NAME
 //   COMMON_INTERCEPTOR_HANDLE_RECVMSG
 //   COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED
+//   COMMON_INTERCEPTOR_MEMSET_IMPL
+//   COMMON_INTERCEPTOR_MEMMOVE_IMPL
+//   COMMON_INTERCEPTOR_MEMCPY_IMPL
 //===----------------------------------------------------------------------===//
 
 #include "interception/interception.h"
@@ -67,6 +70,19 @@
 #define iconv __bsd_iconv
 #endif
 
+// Platform-specific options.
+#if SANITIZER_MAC
+namespace __sanitizer {
+bool PlatformHasDifferentMemcpyAndMemmove();
+}
+#define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE \
+  (__sanitizer::PlatformHasDifferentMemcpyAndMemmove())
+#elif SANITIZER_WINDOWS64
+#define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE false
+#else
+#define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE true
+#endif  // SANITIZER_MAC
+
 #ifndef COMMON_INTERCEPTOR_INITIALIZE_RANGE
 #define COMMON_INTERCEPTOR_INITIALIZE_RANGE(p, size) {}
 #endif
@@ -163,6 +179,47 @@
     COMMON_INTERCEPT_FUNCTION(fn)
 #endif
 
+#ifndef COMMON_INTERCEPTOR_MEMSET_IMPL
+#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size) \
+  {                                                       \
+    if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)        \
+      return internal_memset(dst, v, size);               \
+    COMMON_INTERCEPTOR_ENTER(ctx, memset, dst, v, size);  \
+    if (common_flags()->intercept_intrin)                 \
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);     \
+    return REAL(memset)(dst, v, size);                    \
+  }
+#endif
+
+#ifndef COMMON_INTERCEPTOR_MEMMOVE_IMPL
+#define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size) \
+  {                                                          \
+    if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)           \
+      return internal_memmove(dst, src, size);               \
+    COMMON_INTERCEPTOR_ENTER(ctx, memmove, dst, src, size);  \
+    if (common_flags()->intercept_intrin) {                  \
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);        \
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size);         \
+    }                                                        \
+    return REAL(memmove)(dst, src, size);                    \
+  }
+#endif
+
+#ifndef COMMON_INTERCEPTOR_MEMCPY_IMPL
+#define COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, dst, src, size) \
+  {                                                         \
+    if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) {        \
+      return internal_memmove(dst, src, size);              \
+    }                                                       \
+    COMMON_INTERCEPTOR_ENTER(ctx, memcpy, dst, src, size);  \
+    if (common_flags()->intercept_intrin) {                 \
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);       \
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size);        \
+    }                                                       \
+    return REAL(memcpy)(dst, src, size);                    \
+  }
+#endif
+
 struct FileMetadata {
   // For open_memstream().
   char **addr;
@@ -304,8 +361,14 @@
     c2 = (unsigned char)s2[i];
     if (c1 != c2 || c1 == '\0') break;
   }
-  COMMON_INTERCEPTOR_READ_RANGE(ctx, s1, Min(i + 1, size));
-  COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, Min(i + 1, size));
+  uptr i1 = i;
+  uptr i2 = i;
+  if (common_flags()->strict_string_checks) {
+    for (; i1 < size && s1[i1]; i1++) {}
+    for (; i2 < size && s2[i2]; i2++) {}
+  }
+  COMMON_INTERCEPTOR_READ_RANGE((ctx), (s1), Min(i1 + 1, size));
+  COMMON_INTERCEPTOR_READ_RANGE((ctx), (s2), Min(i2 + 1, size));
   int result = CharCmpX(c1, c2);
   CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncmp, GET_CALLER_PC(), s1,
                              s2, size, result);
@@ -348,24 +411,30 @@
 }
 
 DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncasecmp, uptr called_pc,
-                              const char *s1, const char *s2, uptr n,
+                              const char *s1, const char *s2, uptr size,
                               int result)
 
-INTERCEPTOR(int, strncasecmp, const char *s1, const char *s2, SIZE_T n) {
+INTERCEPTOR(int, strncasecmp, const char *s1, const char *s2, SIZE_T size) {
   void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, strncasecmp, s1, s2, n);
+  COMMON_INTERCEPTOR_ENTER(ctx, strncasecmp, s1, s2, size);
   unsigned char c1 = 0, c2 = 0;
   uptr i;
-  for (i = 0; i < n; i++) {
+  for (i = 0; i < size; i++) {
     c1 = (unsigned char)s1[i];
     c2 = (unsigned char)s2[i];
     if (CharCaseCmp(c1, c2) != 0 || c1 == '\0') break;
   }
-  COMMON_INTERCEPTOR_READ_RANGE(ctx, s1, Min(i + 1, n));
-  COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, Min(i + 1, n));
+  uptr i1 = i;
+  uptr i2 = i;
+  if (common_flags()->strict_string_checks) {
+    for (; i1 < size && s1[i1]; i1++) {}
+    for (; i2 < size && s2[i2]; i2++) {}
+  }
+  COMMON_INTERCEPTOR_READ_RANGE((ctx), (s1), Min(i1 + 1, size));
+  COMMON_INTERCEPTOR_READ_RANGE((ctx), (s2), Min(i2 + 1, size));
   int result = CharCaseCmp(c1, c2);
   CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncasecmp, GET_CALLER_PC(),
-                             s1, s2, n, result);
+                             s1, s2, size, result);
   return result;
 }
 
@@ -553,14 +622,9 @@
 #endif
 
 #if SANITIZER_INTERCEPT_MEMSET
-INTERCEPTOR(void*, memset, void *dst, int v, uptr size) {
-  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
-    return internal_memset(dst, v, size);
+INTERCEPTOR(void *, memset, void *dst, int v, uptr size) {
   void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, memset, dst, v, size);
-  if (common_flags()->intercept_intrin)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);
-  return REAL(memset)(dst, v, size);
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size);
 }
 
 #define INIT_MEMSET COMMON_INTERCEPT_FUNCTION(memset)
@@ -569,16 +633,9 @@
 #endif
 
 #if SANITIZER_INTERCEPT_MEMMOVE
-INTERCEPTOR(void*, memmove, void *dst, const void *src, uptr size) {
-  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
-    return internal_memmove(dst, src, size);
+INTERCEPTOR(void *, memmove, void *dst, const void *src, uptr size) {
   void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, memmove, dst, src, size);
-  if (common_flags()->intercept_intrin) {
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size);
-  }
-  return REAL(memmove)(dst, src, size);
+  COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size);
 }
 
 #define INIT_MEMMOVE COMMON_INTERCEPT_FUNCTION(memmove)
@@ -587,25 +644,30 @@
 #endif
 
 #if SANITIZER_INTERCEPT_MEMCPY
-INTERCEPTOR(void*, memcpy, void *dst, const void *src, uptr size) {
-  if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) {
-    // On OS X, calling internal_memcpy here will cause memory corruptions,
-    // because memcpy and memmove are actually aliases of the same
-    // implementation.  We need to use internal_memmove here.
-    return internal_memmove(dst, src, size);
-  }
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, memcpy, dst, src, size);
-  if (common_flags()->intercept_intrin) {
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size);
-  }
+INTERCEPTOR(void *, memcpy, void *dst, const void *src, uptr size) {
+  // On OS X, calling internal_memcpy here will cause memory corruptions,
+  // because memcpy and memmove are actually aliases of the same
+  // implementation.  We need to use internal_memmove here.
   // N.B.: If we switch this to internal_ we'll have to use internal_memmove
   // due to memcpy being an alias of memmove on OS X.
-  return REAL(memcpy)(dst, src, size);
+  void *ctx;
+  if (PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE) {
+    COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, dst, src, size);
+  } else {
+    COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size);
+  }
 }
 
-#define INIT_MEMCPY COMMON_INTERCEPT_FUNCTION(memcpy)
+#define INIT_MEMCPY                                  \
+  do {                                               \
+    if (PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE) { \
+      COMMON_INTERCEPT_FUNCTION(memcpy);             \
+    } else {                                         \
+      ASSIGN_REAL(memcpy, memmove);                  \
+    }                                                \
+    CHECK(REAL(memcpy));                             \
+  } while (false)
+
 #else
 #define INIT_MEMCPY
 #endif
@@ -1227,12 +1289,12 @@
 
 #if SANITIZER_INTERCEPT_SCANF
 #define INIT_SCANF                    \
-  COMMON_INTERCEPT_FUNCTION(scanf);   \
-  COMMON_INTERCEPT_FUNCTION(sscanf);  \
-  COMMON_INTERCEPT_FUNCTION(fscanf);  \
-  COMMON_INTERCEPT_FUNCTION(vscanf);  \
-  COMMON_INTERCEPT_FUNCTION(vsscanf); \
-  COMMON_INTERCEPT_FUNCTION(vfscanf);
+  COMMON_INTERCEPT_FUNCTION_LDBL(scanf);   \
+  COMMON_INTERCEPT_FUNCTION_LDBL(sscanf);  \
+  COMMON_INTERCEPT_FUNCTION_LDBL(fscanf);  \
+  COMMON_INTERCEPT_FUNCTION_LDBL(vscanf);  \
+  COMMON_INTERCEPT_FUNCTION_LDBL(vsscanf); \
+  COMMON_INTERCEPT_FUNCTION_LDBL(vfscanf);
 #else
 #define INIT_SCANF
 #endif
@@ -1405,16 +1467,16 @@
 
 #if SANITIZER_INTERCEPT_PRINTF
 #define INIT_PRINTF                     \
-  COMMON_INTERCEPT_FUNCTION(printf);    \
-  COMMON_INTERCEPT_FUNCTION(sprintf);   \
-  COMMON_INTERCEPT_FUNCTION(snprintf);  \
-  COMMON_INTERCEPT_FUNCTION(asprintf);  \
-  COMMON_INTERCEPT_FUNCTION(fprintf);   \
-  COMMON_INTERCEPT_FUNCTION(vprintf);   \
-  COMMON_INTERCEPT_FUNCTION(vsprintf);  \
-  COMMON_INTERCEPT_FUNCTION(vsnprintf); \
-  COMMON_INTERCEPT_FUNCTION(vasprintf); \
-  COMMON_INTERCEPT_FUNCTION(vfprintf);
+  COMMON_INTERCEPT_FUNCTION_LDBL(printf);    \
+  COMMON_INTERCEPT_FUNCTION_LDBL(sprintf);   \
+  COMMON_INTERCEPT_FUNCTION_LDBL(snprintf);  \
+  COMMON_INTERCEPT_FUNCTION_LDBL(asprintf);  \
+  COMMON_INTERCEPT_FUNCTION_LDBL(fprintf);   \
+  COMMON_INTERCEPT_FUNCTION_LDBL(vprintf);   \
+  COMMON_INTERCEPT_FUNCTION_LDBL(vsprintf);  \
+  COMMON_INTERCEPT_FUNCTION_LDBL(vsnprintf); \
+  COMMON_INTERCEPT_FUNCTION_LDBL(vasprintf); \
+  COMMON_INTERCEPT_FUNCTION_LDBL(vfprintf);
 #else
 #define INIT_PRINTF
 #endif
@@ -4183,6 +4245,20 @@
 #define INIT_TMPNAM_R
 #endif
 
+#if SANITIZER_INTERCEPT_TTYNAME_R
+INTERCEPTOR(int, ttyname_r, int fd, char *name, SIZE_T namesize) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, ttyname_r, fd, name, namesize);
+  int res = REAL(ttyname_r)(fd, name, namesize);
+  if (res == 0)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, REAL(strlen)(name) + 1);
+  return res;
+}
+#define INIT_TTYNAME_R COMMON_INTERCEPT_FUNCTION(ttyname_r);
+#else
+#define INIT_TTYNAME_R
+#endif
+
 #if SANITIZER_INTERCEPT_TEMPNAM
 INTERCEPTOR(char *, tempnam, char *dir, char *pfx) {
   void *ctx;
@@ -4811,47 +4887,67 @@
 #endif
 
 #if SANITIZER_INTERCEPT_AEABI_MEM
-DECLARE_REAL_AND_INTERCEPTOR(void *, memmove, void *, const void *, uptr)
-DECLARE_REAL_AND_INTERCEPTOR(void *, memcpy, void *, const void *, uptr)
-DECLARE_REAL_AND_INTERCEPTOR(void *, memset, void *, int, uptr)
-
 INTERCEPTOR(void *, __aeabi_memmove, void *to, const void *from, uptr size) {
-  return WRAP(memmove)(to, from, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memmove4, void *to, const void *from, uptr size) {
-  return WRAP(memmove)(to, from, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memmove8, void *to, const void *from, uptr size) {
-  return WRAP(memmove)(to, from, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memcpy, void *to, const void *from, uptr size) {
-  return WRAP(memcpy)(to, from, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memcpy4, void *to, const void *from, uptr size) {
-  return WRAP(memcpy)(to, from, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memcpy8, void *to, const void *from, uptr size) {
-  return WRAP(memcpy)(to, from, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
 }
+
 // Note the argument order.
 INTERCEPTOR(void *, __aeabi_memset, void *block, uptr size, int c) {
-  return WRAP(memset)(block, c, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memset4, void *block, uptr size, int c) {
-  return WRAP(memset)(block, c, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memset8, void *block, uptr size, int c) {
-  return WRAP(memset)(block, c, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memclr, void *block, uptr size) {
-  return WRAP(memset)(block, 0, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memclr4, void *block, uptr size) {
-  return WRAP(memset)(block, 0, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
+
 INTERCEPTOR(void *, __aeabi_memclr8, void *block, uptr size) {
-  return WRAP(memset)(block, 0, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
+
 #define INIT_AEABI_MEM                         \
   COMMON_INTERCEPT_FUNCTION(__aeabi_memmove);  \
   COMMON_INTERCEPT_FUNCTION(__aeabi_memmove4); \
@@ -4870,11 +4966,11 @@
 #endif  // SANITIZER_INTERCEPT_AEABI_MEM
 
 #if SANITIZER_INTERCEPT___BZERO
-DECLARE_REAL_AND_INTERCEPTOR(void *, memset, void *, int, uptr);
-
 INTERCEPTOR(void *, __bzero, void *block, uptr size) {
-  return WRAP(memset)(block, 0, size);
+  void *ctx;
+  COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
+
 #define INIT___BZERO COMMON_INTERCEPT_FUNCTION(__bzero);
 #else
 #define INIT___BZERO
@@ -5864,6 +5960,72 @@
 
 // FIXME: add other *stat interceptor
 
+#if SANITIZER_INTERCEPT_UTMP
+INTERCEPTOR(void *, getutent, int dummy) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getutent, dummy);
+  void *res = REAL(getutent)(dummy);
+  if (res)
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmp_sz);
+  return res;
+}
+INTERCEPTOR(void *, getutid, void *ut) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getutid, ut);
+  void *res = REAL(getutid)(ut);
+  if (res)
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmp_sz);
+  return res;
+}
+INTERCEPTOR(void *, getutline, void *ut) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getutline, ut);
+  void *res = REAL(getutline)(ut);
+  if (res)
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmp_sz);
+  return res;
+}
+#define INIT_UTMP                      \
+  COMMON_INTERCEPT_FUNCTION(getutent); \
+  COMMON_INTERCEPT_FUNCTION(getutid);  \
+  COMMON_INTERCEPT_FUNCTION(getutline);
+#else
+#define INIT_UTMP
+#endif
+
+#if SANITIZER_INTERCEPT_UTMPX
+INTERCEPTOR(void *, getutxent, int dummy) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getutxent, dummy);
+  void *res = REAL(getutxent)(dummy);
+  if (res)
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmpx_sz);
+  return res;
+}
+INTERCEPTOR(void *, getutxid, void *ut) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getutxid, ut);
+  void *res = REAL(getutxid)(ut);
+  if (res)
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmpx_sz);
+  return res;
+}
+INTERCEPTOR(void *, getutxline, void *ut) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getutxline, ut);
+  void *res = REAL(getutxline)(ut);
+  if (res)
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmpx_sz);
+  return res;
+}
+#define INIT_UTMPX                      \
+  COMMON_INTERCEPT_FUNCTION(getutxent); \
+  COMMON_INTERCEPT_FUNCTION(getutxid);  \
+  COMMON_INTERCEPT_FUNCTION(getutxline);
+#else
+#define INIT_UTMPX
+#endif
+
 static void InitializeCommonInterceptors() {
   static u64 metadata_mem[sizeof(MetadataHashMap) / sizeof(u64) + 1];
   interceptor_metadata_map = new((void *)&metadata_mem) MetadataHashMap();
@@ -6008,6 +6170,7 @@
   INIT_PTHREAD_BARRIERATTR_GETPSHARED;
   INIT_TMPNAM;
   INIT_TMPNAM_R;
+  INIT_TTYNAME_R;
   INIT_TEMPNAM;
   INIT_PTHREAD_SETNAME_NP;
   INIT_SINCOS;
@@ -6059,4 +6222,6 @@
   INIT___LXSTAT;
   INIT___LXSTAT64;
   // FIXME: add other *stat interceptors.
+  INIT_UTMP;
+  INIT_UTMPX;
 }
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors_format.inc b/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
index 92318cd..1256349 100644
--- a/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
+++ b/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
@@ -435,10 +435,6 @@
 }
 
 static int printf_get_value_size(PrintfDirective *dir) {
-  if (dir->convSpecifier == 'm') {
-    return sizeof(char *);
-  }
-
   if (char_is_one_of(dir->convSpecifier, "cCsS")) {
     unsigned charSize =
         format_get_char_size(dir->convSpecifier, dir->lengthModifier);
@@ -519,6 +515,9 @@
       // Dynamic precision
       SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
     }
+    // %m does not require an argument: strlen(errno).
+    if (dir.convSpecifier == 'm')
+      continue;
     int size = printf_get_value_size(&dir);
     if (size == FSS_INVALID) {
       Report("WARNING: unexpected format specifier in printf "
diff --git a/lib/sanitizer_common/sanitizer_common_libcdep.cc b/lib/sanitizer_common/sanitizer_common_libcdep.cc
index 27876f4..49ca961 100644
--- a/lib/sanitizer_common/sanitizer_common_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_common_libcdep.cc
@@ -13,6 +13,7 @@
 
 #include "sanitizer_common.h"
 
+#include "sanitizer_allocator_interface.h"
 #include "sanitizer_flags.h"
 #include "sanitizer_stackdepot.h"
 #include "sanitizer_stacktrace.h"
@@ -69,12 +70,15 @@
   SoftRssLimitExceededCallback = Callback;
 }
 
+#if SANITIZER_LINUX && !SANITIZER_GO
 void BackgroundThread(void *arg) {
   uptr hard_rss_limit_mb = common_flags()->hard_rss_limit_mb;
   uptr soft_rss_limit_mb = common_flags()->soft_rss_limit_mb;
+  bool heap_profile = common_flags()->heap_profile;
   uptr prev_reported_rss = 0;
   uptr prev_reported_stack_depot_size = 0;
   bool reached_soft_rss_limit = false;
+  uptr rss_during_last_reported_profile = 0;
   while (true) {
     SleepForMillis(100);
     uptr current_rss_mb = GetRSS() >> 20;
@@ -116,8 +120,15 @@
           SoftRssLimitExceededCallback(false);
       }
     }
+    if (heap_profile &&
+        current_rss_mb > rss_during_last_reported_profile * 1.1) {
+      Printf("\n\nHEAP PROFILE at RSS %zdMb\n", current_rss_mb);
+      __sanitizer_print_memory_profile(90);
+      rss_during_last_reported_profile = current_rss_mb;
+    }
   }
 }
+#endif
 
 void WriteToSyslog(const char *msg) {
   InternalScopedString msg_copy(kErrorMessageBufferSize);
@@ -142,7 +153,8 @@
     !SANITIZER_GO  // Need to implement/test on other platforms.
   // Start the background thread if one of the rss limits is given.
   if (!common_flags()->hard_rss_limit_mb &&
-      !common_flags()->soft_rss_limit_mb) return;
+      !common_flags()->soft_rss_limit_mb &&
+      !common_flags()->heap_profile) return;
   if (!&real_pthread_create) return;  // Can't spawn the thread anyway.
   internal_start_thread(BackgroundThread, nullptr);
 #endif
diff --git a/lib/sanitizer_common/sanitizer_common_nolibc.cc b/lib/sanitizer_common/sanitizer_common_nolibc.cc
index e24cf99..ba54c73 100644
--- a/lib/sanitizer_common/sanitizer_common_nolibc.cc
+++ b/lib/sanitizer_common/sanitizer_common_nolibc.cc
@@ -17,6 +17,9 @@
 
 namespace __sanitizer {
 
+// The Windows implementations of these functions use the win32 API directly,
+// bypassing libc.
+#if !SANITIZER_WINDOWS
 #if SANITIZER_LINUX
 bool ShouldLogAfterPrintf() { return false; }
 void LogMessageOnPrintf(const char *str) {}
@@ -24,5 +27,10 @@
 void WriteToSyslog(const char *buffer) {}
 void Abort() { internal__exit(1); }
 void SleepForSeconds(int seconds) { internal_sleep(seconds); }
+#endif // !SANITIZER_WINDOWS
+
+#if !SANITIZER_WINDOWS && !SANITIZER_MAC
+void ListOfModules::init() {}
+#endif
 
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_coverage_libcdep.cc b/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
index 5135fc8..5945ebb 100644
--- a/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
@@ -113,8 +113,6 @@
   uptr *data();
   uptr size() const;
 
-  void SetPcBuffer(uptr* data, uptr length);
-
  private:
   struct NamedPcRange {
     const char *copied_module_name;
@@ -145,9 +143,6 @@
   // Descriptor of the file mapped pc array.
   fd_t pc_fd;
 
-  uptr *pc_buffer;
-  uptr pc_buffer_len;
-
   // Vector of coverage guard arrays, protected by mu.
   InternalMmapVectorNoCtor<s32*> guard_array_vec;
 
@@ -219,9 +214,6 @@
     atomic_store(&pc_array_size, kPcArrayMaxSize, memory_order_relaxed);
   }
 
-  pc_buffer = nullptr;
-  pc_buffer_len = 0;
-
   cc_array = reinterpret_cast<uptr **>(MmapNoReserveOrDie(
       sizeof(uptr *) * kCcArrayMaxSize, "CovInit::cc_array"));
   atomic_store(&cc_array_size, kCcArrayMaxSize, memory_order_relaxed);
@@ -427,7 +419,6 @@
            atomic_load(&pc_array_size, memory_order_acquire));
   uptr counter = atomic_fetch_add(&coverage_counter, 1, memory_order_relaxed);
   pc_array[idx] = BundlePcAndCounter(pc, counter);
-  if (pc_buffer && counter < pc_buffer_len) pc_buffer[counter] = pc;
 }
 
 // Registers a pair caller=>callee.
@@ -881,11 +872,6 @@
   DumpCallerCalleePairs();
 }
 
-void CoverageData::SetPcBuffer(uptr* data, uptr length) {
-  pc_buffer = data;
-  pc_buffer_len = length;
-}
-
 void CovPrepareForSandboxing(__sanitizer_sandbox_arguments *args) {
   if (!args) return;
   if (!coverage_enabled) return;
@@ -968,6 +954,9 @@
 }
 SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump() {
   coverage_data.DumpAll();
+#if SANITIZER_LINUX
+  __sanitizer_dump_trace_pc_guard_coverage();
+#endif
 }
 SANITIZER_INTERFACE_ATTRIBUTE void
 __sanitizer_cov_module_init(s32 *guards, uptr npcs, u8 *counters,
@@ -1021,16 +1010,6 @@
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void __sanitizer_set_coverage_pc_buffer(uptr *data, uptr length) {
-  coverage_data.SetPcBuffer(data, length);
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
-uptr __sanitizer_get_coverage_pc_buffer_pos() {
-  return __sanitizer_get_total_unique_coverage();
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
 uptr __sanitizer_get_number_of_counters() {
   return coverage_data.GetNumberOf8bitCounters();
 }
@@ -1040,8 +1019,26 @@
   return coverage_data.Update8bitCounterBitsetAndClearCounters(bitset);
 }
 // Default empty implementations (weak). Users should redefine them.
+#if !SANITIZER_WINDOWS  // weak does not work on Windows.
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 void __sanitizer_cov_trace_cmp() {}
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_cov_trace_cmp1() {}
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_cov_trace_cmp2() {}
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_cov_trace_cmp4() {}
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_cov_trace_cmp8() {}
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 void __sanitizer_cov_trace_switch() {}
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_cov_trace_div4() {}
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_cov_trace_div8() {}
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_cov_trace_gep() {}
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_cov_trace_pc_indir() {}
+#endif  // !SANITIZER_WINDOWS
 } // extern "C"
diff --git a/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cc b/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cc
new file mode 100644
index 0000000..df6d10f
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cc
@@ -0,0 +1,174 @@
+//===-- sanitizer_coverage_libcdep_new.cc ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Sanitizer Coverage Controller for Trace PC Guard.
+
+#include "sancov_flags.h"
+#include "sanitizer_allocator_internal.h"
+#include "sanitizer_atomic.h"
+#include "sanitizer_common.h"
+#include "sanitizer_symbolizer.h"
+
+using namespace __sanitizer;
+
+using AddressRange = LoadedModule::AddressRange;
+
+namespace __sancov {
+namespace {
+
+static const u64 Magic64 = 0xC0BFFFFFFFFFFF64ULL;
+static const u64 Magic32 = 0xC0BFFFFFFFFFFF32ULL;
+static const u64 Magic = SANITIZER_WORDSIZE == 64 ? Magic64 : Magic32;
+
+static fd_t OpenFile(const char* path) {
+  error_t err;
+  fd_t fd = OpenFile(path, WrOnly, &err);
+  if (fd == kInvalidFd)
+    Report("SanitizerCoverage: failed to open %s for writing (reason: %d)\n",
+           path, err);
+  return fd;
+}
+
+static void GetCoverageFilename(char* path, const char* name,
+                                const char* extension) {
+  CHECK(name);
+  internal_snprintf(path, kMaxPathLength, "%s/%s.%zd.%s",
+                    common_flags()->coverage_dir, name, internal_getpid(),
+                    extension);
+}
+
+static void WriteModuleCoverage(char* file_path, const char* module_name,
+                                const uptr* pcs, uptr len) {
+  GetCoverageFilename(file_path, StripModuleName(module_name), "sancov");
+  fd_t fd = OpenFile(file_path);
+  WriteToFile(fd, &Magic, sizeof(Magic));
+  WriteToFile(fd, pcs, len * sizeof(*pcs));
+  CloseFile(fd);
+  Printf("SanitizerCoverage: %s %zd PCs written\n", file_path, len);
+}
+
+static void SanitizerDumpCoverage(const uptr* unsorted_pcs, uptr len) {
+  if (!len) return;
+
+  char* file_path = static_cast<char*>(InternalAlloc(kMaxPathLength));
+  char* module_name = static_cast<char*>(InternalAlloc(kMaxPathLength));
+  uptr* pcs = static_cast<uptr*>(InternalAlloc(len * sizeof(uptr)));
+
+  internal_memcpy(pcs, unsorted_pcs, len * sizeof(uptr));
+  SortArray(pcs, len);
+
+  bool module_found = false;
+  uptr last_base = 0;
+  uptr module_start_idx = 0;
+
+  for (uptr i = 0; i < len; ++i) {
+    const uptr pc = pcs[i];
+    if (!pc) continue;
+
+    if (!__sanitizer_get_module_and_offset_for_pc(pc, nullptr, 0, &pcs[i])) {
+      Printf("ERROR: bad pc %x\n", pc);
+      continue;
+    }
+    uptr module_base = pc - pcs[i];
+
+    if (module_base != last_base || !module_found) {
+      if (module_found) {
+        WriteModuleCoverage(file_path, module_name, &pcs[module_start_idx],
+                            i - module_start_idx);
+      }
+
+      last_base = module_base;
+      module_start_idx = i;
+      module_found = true;
+      __sanitizer_get_module_and_offset_for_pc(pc, module_name, kMaxPathLength,
+                                               &pcs[i]);
+    }
+  }
+
+  if (module_found) {
+    WriteModuleCoverage(file_path, module_name, &pcs[module_start_idx],
+                        len - module_start_idx);
+  }
+
+  InternalFree(file_path);
+  InternalFree(module_name);
+  InternalFree(pcs);
+
+  if (sancov_flags()->symbolize) {
+    Printf("TODO(aizatsky): call sancov to symbolize\n");
+  }
+}
+
+// Collects trace-pc guard coverage.
+// This class relies on zero-initialization.
+class TracePcGuardController {
+ public:
+  void Initialize() {
+    CHECK(!initialized);
+
+    initialized = true;
+    InitializeSancovFlags();
+
+    pc_vector.Initialize(0);
+  }
+
+  void InitTracePcGuard(u32* start, u32* end) {
+    if (!initialized) Initialize();
+    CHECK(!*start);
+    CHECK_NE(start, end);
+
+    u32 i = pc_vector.size();
+    for (u32* p = start; p < end; p++) *p = ++i;
+    pc_vector.resize(i);
+  }
+
+  void TracePcGuard(u32* guard, uptr pc) {
+    atomic_uint32_t* guard_ptr = reinterpret_cast<atomic_uint32_t*>(guard);
+    u32 idx = atomic_exchange(guard_ptr, 0, memory_order_relaxed);
+    if (!idx) return;
+    // we start indices from 1.
+    pc_vector[idx - 1] = pc;
+  }
+
+  void Dump() {
+    if (!initialized || !common_flags()->coverage) return;
+    __sanitizer_dump_coverage(pc_vector.data(), pc_vector.size());
+  }
+
+ private:
+  bool initialized;
+  InternalMmapVectorNoCtor<uptr> pc_vector;
+};
+
+static TracePcGuardController pc_guard_controller;
+
+}  // namespace
+}  // namespace __sancov
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_coverage(  // NOLINT
+    const uptr* pcs, uptr len) {
+  return __sancov::SanitizerDumpCoverage(pcs, len);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
+__sanitizer_cov_trace_pc_guard(u32* guard) {
+  if (!*guard) return;
+  __sancov::pc_guard_controller.TracePcGuard(guard, GET_CALLER_PC() - 1);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
+__sanitizer_cov_trace_pc_guard_init(u32* start, u32* end) {
+  if (start == end || *start) return;
+  __sancov::pc_guard_controller.InitTracePcGuard(start, end);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_trace_pc_guard_coverage() {
+  __sancov::pc_guard_controller.Dump();
+}
+}  // extern "C"
diff --git a/lib/sanitizer_common/sanitizer_dbghelp.h b/lib/sanitizer_common/sanitizer_dbghelp.h
new file mode 100644
index 0000000..1689edb
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_dbghelp.h
@@ -0,0 +1,42 @@
+//===-- sanitizer_dbghelp.h ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Wrappers for lazy loaded dbghelp.dll. Provides function pointers and a
+// callback to initialize them.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_SYMBOLIZER_WIN_H
+#define SANITIZER_SYMBOLIZER_WIN_H
+
+#if !SANITIZER_WINDOWS
+#error "sanitizer_dbghelp.h is a Windows-only header"
+#endif
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <dbghelp.h>
+
+namespace __sanitizer {
+
+extern decltype(::StackWalk64) *StackWalk64;
+extern decltype(::SymCleanup) *SymCleanup;
+extern decltype(::SymFromAddr) *SymFromAddr;
+extern decltype(::SymFunctionTableAccess64) *SymFunctionTableAccess64;
+extern decltype(::SymGetLineFromAddr64) *SymGetLineFromAddr64;
+extern decltype(::SymGetModuleBase64) *SymGetModuleBase64;
+extern decltype(::SymGetSearchPathW) *SymGetSearchPathW;
+extern decltype(::SymInitialize) *SymInitialize;
+extern decltype(::SymSetOptions) *SymSetOptions;
+extern decltype(::SymSetSearchPathW) *SymSetSearchPathW;
+extern decltype(::UnDecorateSymbolName) *UnDecorateSymbolName;
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_SYMBOLIZER_WIN_H
diff --git a/lib/sanitizer_common/sanitizer_flags.inc b/lib/sanitizer_common/sanitizer_flags.inc
index 4671425..d7fa34a 100644
--- a/lib/sanitizer_common/sanitizer_flags.inc
+++ b/lib/sanitizer_common/sanitizer_flags.inc
@@ -121,6 +121,12 @@
             " until the RSS goes below the soft limit."
             " This limit does not affect memory allocations other than"
             " malloc/new.")
+COMMON_FLAG(bool, heap_profile, false, "Experimental heap profiler, asan-only")
+COMMON_FLAG(s32, allocator_release_to_os_interval_ms, kReleaseToOSIntervalNever,
+            "Experimental. Only affects a 64-bit allocator. If set, tries to "
+            "release unused memory to the OS, but not more often than this "
+            "interval (in milliseconds). Negative values mean do not attempt "
+            "to release memory to the OS.\n")
 COMMON_FLAG(bool, can_use_proc_maps_statm, true,
             "If false, do not attempt to read /proc/maps/statm."
             " Mostly useful for testing sanitizers.")
@@ -153,10 +159,10 @@
 COMMON_FLAG(bool, print_suppressions, true,
             "Print matched suppressions at exit.")
 COMMON_FLAG(
-    bool, disable_coredump, (SANITIZER_WORDSIZE == 64),
-    "Disable core dumping. By default, disable_core=1 on 64-bit to avoid "
-    "dumping a 16T+ core file. Ignored on OSes that don't dump core by"
-    "default and for sanitizers that don't reserve lots of virtual memory.")
+    bool, disable_coredump, (SANITIZER_WORDSIZE == 64) && !SANITIZER_GO,
+    "Disable core dumping. By default, disable_coredump=1 on 64-bit to avoid"
+    " dumping a 16T+ core file. Ignored on OSes that don't dump core by"
+    " default and for sanitizers that don't reserve lots of virtual memory.")
 COMMON_FLAG(bool, use_madv_dontdump, true,
           "If set, instructs kernel to not store the (huge) shadow "
           "in core file.")
diff --git a/lib/sanitizer_common/sanitizer_interface_internal.h b/lib/sanitizer_common/sanitizer_interface_internal.h
index 7f43c84..174d5e9 100644
--- a/lib/sanitizer_common/sanitizer_interface_internal.h
+++ b/lib/sanitizer_common/sanitizer_interface_internal.h
@@ -46,8 +46,12 @@
   SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
   void __sanitizer_report_error_summary(const char *error_summary);
 
-  SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump();
   SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_init();
+  SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump();
+  SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_coverage(
+      const __sanitizer::uptr *pcs, const __sanitizer::uptr len);
+  SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_trace_pc_guard_coverage();
+
   SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov(__sanitizer::u32 *guard);
   SANITIZER_INTERFACE_ATTRIBUTE
   void __sanitizer_annotate_contiguous_container(const void *beg,
@@ -60,6 +64,11 @@
   SANITIZER_INTERFACE_ATTRIBUTE
   const void *__sanitizer_contiguous_container_find_bad_address(
       const void *beg, const void *mid, const void *end);
+
+  SANITIZER_INTERFACE_ATTRIBUTE
+  int __sanitizer_get_module_and_offset_for_pc(
+      __sanitizer::uptr pc, char *module_path,
+      __sanitizer::uptr module_path_len, __sanitizer::uptr *pc_offset);
   } // extern "C"
 
 #endif  // SANITIZER_INTERFACE_INTERNAL_H
diff --git a/lib/sanitizer_common/sanitizer_internal_defs.h b/lib/sanitizer_common/sanitizer_internal_defs.h
index a36ee6d..5338f79 100644
--- a/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -24,7 +24,7 @@
 # define SANITIZER_INTERFACE_ATTRIBUTE __declspec(dllexport)
 // FIXME find out what we need on Windows, if anything.
 # define SANITIZER_WEAK_ATTRIBUTE
-#elif defined(SANITIZER_GO)
+#elif SANITIZER_GO
 # define SANITIZER_INTERFACE_ATTRIBUTE
 # define SANITIZER_WEAK_ATTRIBUTE
 #else
@@ -32,7 +32,7 @@
 # define SANITIZER_WEAK_ATTRIBUTE  __attribute__((weak))
 #endif
 
-#if (SANITIZER_LINUX || SANITIZER_WINDOWS) && !defined(SANITIZER_GO)
+#if (SANITIZER_LINUX || SANITIZER_MAC || SANITIZER_WINDOWS) && !SANITIZER_GO
 # define SANITIZER_SUPPORTS_WEAK_HOOKS 1
 #else
 # define SANITIZER_SUPPORTS_WEAK_HOOKS 0
@@ -289,8 +289,8 @@
 enum LinkerInitialized { LINKER_INITIALIZED = 0 };
 
 #if !defined(_MSC_VER) || defined(__clang__)
-# define GET_CALLER_PC() (uptr)__builtin_return_address(0)
-# define GET_CURRENT_FRAME() (uptr)__builtin_frame_address(0)
+#define GET_CALLER_PC() (__sanitizer::uptr) __builtin_return_address(0)
+#define GET_CURRENT_FRAME() (__sanitizer::uptr) __builtin_frame_address(0)
 inline void Trap() {
   __builtin_trap();
 }
@@ -299,9 +299,10 @@
 extern "C" void* _AddressOfReturnAddress(void);
 # pragma intrinsic(_ReturnAddress)
 # pragma intrinsic(_AddressOfReturnAddress)
-# define GET_CALLER_PC() (uptr)_ReturnAddress()
+#define GET_CALLER_PC() (__sanitizer::uptr) _ReturnAddress()
 // CaptureStackBackTrace doesn't need to know BP on Windows.
-# define GET_CURRENT_FRAME() (((uptr)_AddressOfReturnAddress()) + sizeof(uptr))
+#define GET_CURRENT_FRAME() \
+  (((__sanitizer::uptr)_AddressOfReturnAddress()) + sizeof(__sanitizer::uptr))
 
 extern "C" void __ud2(void);
 # pragma intrinsic(__ud2)
@@ -319,11 +320,11 @@
   }
 
 // Forces the compiler to generate a frame pointer in the function.
-#define ENABLE_FRAME_POINTER                                       \
-  do {                                                             \
-    volatile uptr enable_fp;                                       \
-    enable_fp = GET_CURRENT_FRAME();                               \
-    (void)enable_fp;                                               \
+#define ENABLE_FRAME_POINTER              \
+  do {                                    \
+    volatile __sanitizer::uptr enable_fp; \
+    enable_fp = GET_CURRENT_FRAME();      \
+    (void)enable_fp;                      \
   } while (0)
 
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_libignore.cc b/lib/sanitizer_common/sanitizer_libignore.cc
index 5453939..aa4fa88 100644
--- a/lib/sanitizer_common/sanitizer_libignore.cc
+++ b/lib/sanitizer_common/sanitizer_libignore.cc
@@ -50,23 +50,23 @@
   }
 
   // Scan suppressions list and find newly loaded and unloaded libraries.
-  MemoryMappingLayout proc_maps(/*cache_enabled*/false);
-  InternalScopedString module(kMaxPathLength);
+  ListOfModules modules;
+  modules.init();
   for (uptr i = 0; i < count_; i++) {
     Lib *lib = &libs_[i];
     bool loaded = false;
-    proc_maps.Reset();
-    uptr b, e, off, prot;
-    while (proc_maps.Next(&b, &e, &off, module.data(), module.size(), &prot)) {
-      if ((prot & MemoryMappingLayout::kProtectionExecute) == 0)
-        continue;
-      if (TemplateMatch(lib->templ, module.data()) ||
-          (lib->real_name &&
-          internal_strcmp(lib->real_name, module.data()) == 0)) {
+    for (const auto &mod : modules) {
+      for (const auto &range : mod.ranges()) {
+        if (!range.executable)
+          continue;
+        if (!TemplateMatch(lib->templ, mod.full_name()) &&
+            !(lib->real_name &&
+            internal_strcmp(lib->real_name, mod.full_name()) == 0))
+          continue;
         if (loaded) {
           Report("%s: called_from_lib suppression '%s' is matched against"
                  " 2 libraries: '%s' and '%s'\n",
-                 SanitizerToolName, lib->templ, lib->name, module.data());
+                 SanitizerToolName, lib->templ, lib->name, mod.full_name());
           Die();
         }
         loaded = true;
@@ -75,13 +75,16 @@
         VReport(1,
                 "Matched called_from_lib suppression '%s' against library"
                 " '%s'\n",
-                lib->templ, module.data());
+                lib->templ, mod.full_name());
         lib->loaded = true;
-        lib->name = internal_strdup(module.data());
-        const uptr idx = atomic_load(&loaded_count_, memory_order_relaxed);
-        code_ranges_[idx].begin = b;
-        code_ranges_[idx].end = e;
-        atomic_store(&loaded_count_, idx + 1, memory_order_release);
+        lib->name = internal_strdup(mod.full_name());
+        const uptr idx =
+            atomic_load(&ignored_ranges_count_, memory_order_relaxed);
+        CHECK_LT(idx, kMaxLibs);
+        ignored_code_ranges_[idx].begin = range.beg;
+        ignored_code_ranges_[idx].end = range.end;
+        atomic_store(&ignored_ranges_count_, idx + 1, memory_order_release);
+        break;
       }
     }
     if (lib->loaded && !loaded) {
@@ -91,6 +94,29 @@
       Die();
     }
   }
+
+  // Track instrumented ranges.
+  if (track_instrumented_libs_) {
+    for (const auto &mod : modules) {
+      if (!mod.instrumented())
+        continue;
+      for (const auto &range : mod.ranges()) {
+        if (!range.executable)
+          continue;
+        if (IsPcInstrumented(range.beg) && IsPcInstrumented(range.end - 1))
+          continue;
+        VReport(1, "Adding instrumented range %p-%p from library '%s'\n",
+                range.beg, range.end, mod.full_name());
+        const uptr idx =
+            atomic_load(&instrumented_ranges_count_, memory_order_relaxed);
+        CHECK_LT(idx, kMaxLibs);
+        instrumented_code_ranges_[idx].begin = range.beg;
+        instrumented_code_ranges_[idx].end = range.end;
+        atomic_store(&instrumented_ranges_count_, idx + 1,
+                     memory_order_release);
+      }
+    }
+  }
 }
 
 void LibIgnore::OnLibraryUnloaded() {
diff --git a/lib/sanitizer_common/sanitizer_libignore.h b/lib/sanitizer_common/sanitizer_libignore.h
index cd56c36..17b0f56 100644
--- a/lib/sanitizer_common/sanitizer_libignore.h
+++ b/lib/sanitizer_common/sanitizer_libignore.h
@@ -30,6 +30,9 @@
 
   // Must be called during initialization.
   void AddIgnoredLibrary(const char *name_templ);
+  void IgnoreNoninstrumentedModules(bool enable) {
+    track_instrumented_libs_ = enable;
+  }
 
   // Must be called after a new dynamic library is loaded.
   void OnLibraryLoaded(const char *name);
@@ -37,8 +40,14 @@
   // Must be called after a dynamic library is unloaded.
   void OnLibraryUnloaded();
 
-  // Checks whether the provided PC belongs to one of the ignored libraries.
-  bool IsIgnored(uptr pc) const;
+  // Checks whether the provided PC belongs to one of the ignored libraries or
+  // the PC should be ignored because it belongs to an non-instrumented module
+  // (when ignore_noninstrumented_modules=1). Also returns true via
+  // "pc_in_ignored_lib" if the PC is in an ignored library, false otherwise.
+  bool IsIgnored(uptr pc, bool *pc_in_ignored_lib) const;
+
+  // Checks whether the provided PC belongs to an instrumented module.
+  bool IsPcInstrumented(uptr pc) const;
 
  private:
   struct Lib {
@@ -53,26 +62,48 @@
     uptr end;
   };
 
+  inline bool IsInRange(uptr pc, const LibCodeRange &range) const {
+    return (pc >= range.begin && pc < range.end);
+  }
+
   static const uptr kMaxLibs = 128;
 
   // Hot part:
-  atomic_uintptr_t loaded_count_;
-  LibCodeRange code_ranges_[kMaxLibs];
+  atomic_uintptr_t ignored_ranges_count_;
+  LibCodeRange ignored_code_ranges_[kMaxLibs];
+
+  atomic_uintptr_t instrumented_ranges_count_;
+  LibCodeRange instrumented_code_ranges_[kMaxLibs];
 
   // Cold part:
   BlockingMutex mutex_;
   uptr count_;
   Lib libs_[kMaxLibs];
+  bool track_instrumented_libs_;
 
   // Disallow copying of LibIgnore objects.
   LibIgnore(const LibIgnore&);  // not implemented
   void operator = (const LibIgnore&);  // not implemented
 };
 
-inline bool LibIgnore::IsIgnored(uptr pc) const {
-  const uptr n = atomic_load(&loaded_count_, memory_order_acquire);
+inline bool LibIgnore::IsIgnored(uptr pc, bool *pc_in_ignored_lib) const {
+  const uptr n = atomic_load(&ignored_ranges_count_, memory_order_acquire);
   for (uptr i = 0; i < n; i++) {
-    if (pc >= code_ranges_[i].begin && pc < code_ranges_[i].end)
+    if (IsInRange(pc, ignored_code_ranges_[i])) {
+      *pc_in_ignored_lib = true;
+      return true;
+    }
+  }
+  *pc_in_ignored_lib = false;
+  if (track_instrumented_libs_ && !IsPcInstrumented(pc))
+    return true;
+  return false;
+}
+
+inline bool LibIgnore::IsPcInstrumented(uptr pc) const {
+  const uptr n = atomic_load(&instrumented_ranges_count_, memory_order_acquire);
+  for (uptr i = 0; i < n; i++) {
+    if (IsInRange(pc, instrumented_code_ranges_[i]))
       return true;
   }
   return false;
diff --git a/lib/sanitizer_common/sanitizer_linux.cc b/lib/sanitizer_common/sanitizer_linux.cc
index 64be59d..7328a5c 100644
--- a/lib/sanitizer_common/sanitizer_linux.cc
+++ b/lib/sanitizer_common/sanitizer_linux.cc
@@ -99,7 +99,7 @@
 # define SANITIZER_LINUX_USES_64BIT_SYSCALLS 0
 #endif
 
-#if defined(__x86_64__)
+#if defined(__x86_64__) || SANITIZER_MIPS64
 extern "C" {
 extern void internal_sigreturn();
 }
@@ -671,7 +671,7 @@
 // Invokes sigaction via a raw syscall with a restorer, but does not support
 // all platforms yet.
 // We disable for Go simply because we have not yet added to buildgo.sh.
-#if defined(__x86_64__) && !SANITIZER_GO
+#if (defined(__x86_64__) || SANITIZER_MIPS64) && !SANITIZER_GO
 int internal_sigaction_syscall(int signum, const void *act, void *oldact) {
   if (act == nullptr)
     return internal_sigaction_norestorer(signum, act, oldact);
@@ -1230,7 +1230,7 @@
   return (signum == SIGSEGV || signum == SIGBUS) && common_flags()->handle_segv;
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 void *internal_start_thread(void(*func)(void *arg), void *arg) {
   // Start the thread with signals blocked, otherwise it can steal user signals.
   __sanitizer_sigset_t set, old;
diff --git a/lib/sanitizer_common/sanitizer_linux.h b/lib/sanitizer_common/sanitizer_linux.h
index 526fa44..d4d0f47 100644
--- a/lib/sanitizer_common/sanitizer_linux.h
+++ b/lib/sanitizer_common/sanitizer_linux.h
@@ -42,7 +42,7 @@
 // (like the process-wide error reporting SEGV handler) must use
 // internal_sigaction instead.
 int internal_sigaction_norestorer(int signum, const void *act, void *oldact);
-#if defined(__x86_64__) && !SANITIZER_GO
+#if (defined(__x86_64__) || SANITIZER_MIPS64) && !SANITIZER_GO
 // Uses a raw system call to avoid interceptors.
 int internal_sigaction_syscall(int signum, const void *act, void *oldact);
 #endif
diff --git a/lib/sanitizer_common/sanitizer_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_linux_libcdep.cc
index a37bdf1..f99f0b5 100644
--- a/lib/sanitizer_common/sanitizer_linux_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_linux_libcdep.cc
@@ -26,14 +26,12 @@
 #include "sanitizer_procmaps.h"
 #include "sanitizer_stacktrace.h"
 
-#if SANITIZER_ANDROID || SANITIZER_FREEBSD
 #include <dlfcn.h>  // for dlsym()
-#endif
-
 #include <link.h>
 #include <pthread.h>
 #include <signal.h>
 #include <sys/resource.h>
+#include <syslog.h>
 
 #if SANITIZER_FREEBSD
 #include <pthread_np.h>
@@ -51,8 +49,6 @@
 
 #if SANITIZER_ANDROID && __ANDROID_API__ < 21
 #include <android/log.h>
-#else
-#include <syslog.h>
 #endif
 
 #if !SANITIZER_ANDROID
@@ -299,7 +295,10 @@
                 rdhwr %0,$29;\
                 .set pop" : "=r" (thread_pointer));
   descr_addr = thread_pointer - kTlsTcbOffset - TlsPreTcbSize();
-# elif defined(__aarch64__) || defined(__s390__)
+# elif defined(__aarch64__)
+  descr_addr = reinterpret_cast<uptr>(__builtin_thread_pointer()) -
+                                      ThreadDescriptorSize();
+# elif defined(__s390__)
   descr_addr = reinterpret_cast<uptr>(__builtin_thread_pointer());
 # elif defined(__powerpc64__)
   // PPC64LE uses TLS variant I. The thread pointer (in GPR 13)
@@ -521,6 +520,7 @@
 static atomic_uint8_t android_log_initialized;
 
 void AndroidLogInit() {
+  openlog(GetProcessName(), 0, LOG_USER);
   atomic_store(&android_log_initialized, 1, memory_order_release);
 }
 
diff --git a/lib/sanitizer_common/sanitizer_linux_mips64.S b/lib/sanitizer_common/sanitizer_linux_mips64.S
new file mode 100644
index 0000000..8729642
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_linux_mips64.S
@@ -0,0 +1,23 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+// Avoid being marked as needing an executable stack:
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+// Further contents are mips64 only:
+#if defined(__linux__) && defined(__mips64)
+
+.section .text
+.set noreorder
+.globl internal_sigreturn
+.type internal_sigreturn, @function
+internal_sigreturn:
+
+        li $v0,5211 // #5211 is for SYS_rt_sigreturn
+        syscall
+
+.size internal_sigreturn, .-internal_sigreturn
+
+#endif // defined(__linux__) && defined(__mips64)
diff --git a/lib/sanitizer_common/sanitizer_mac.cc b/lib/sanitizer_common/sanitizer_mac.cc
index c7b9645..b4f8ab5 100644
--- a/lib/sanitizer_common/sanitizer_mac.cc
+++ b/lib/sanitizer_common/sanitizer_mac.cc
@@ -373,7 +373,7 @@
 
 void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
                           uptr *tls_addr, uptr *tls_size) {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   uptr stack_top, stack_bottom;
   GetThreadStackTopAndBottom(main, &stack_top, &stack_bottom);
   *stk_addr = stack_bottom;
@@ -448,6 +448,15 @@
   return result;
 }
 
+bool PlatformHasDifferentMemcpyAndMemmove() {
+  // On OS X 10.7 memcpy() and memmove() are both resolved
+  // into memmove$VARIANT$sse42.
+  // See also https://github.com/google/sanitizers/issues/34.
+  // TODO(glider): need to check dynamically that memcpy() and memmove() are
+  // actually the same function.
+  return GetMacosVersion() == MACOS_VERSION_SNOW_LEOPARD;
+}
+
 uptr GetRSS() {
   struct task_basic_info info;
   unsigned count = TASK_BASIC_INFO_COUNT;
@@ -473,12 +482,12 @@
 
 void internal_join_thread(void *th) { pthread_join((pthread_t)th, 0); }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 static BlockingMutex syslog_lock(LINKER_INITIALIZED);
 #endif
 
 void WriteOneLineToSyslog(const char *s) {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   syslog_lock.CheckLocked();
   asl_log(nullptr, nullptr, ASL_LEVEL_ERR, "%s", s);
 #endif
@@ -491,7 +500,7 @@
 }
 
 void LogFullErrorReport(const char *buffer) {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   // Log with os_trace. This will make it into the crash log.
 #if SANITIZER_OS_TRACE
   if (GetMacosVersion() >= MACOS_VERSION_YOSEMITE) {
@@ -564,7 +573,7 @@
 # endif
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 static const char kDyldInsertLibraries[] = "DYLD_INSERT_LIBRARIES";
 LowLevelAllocator allocator_for_env;
 
diff --git a/lib/sanitizer_common/sanitizer_platform.h b/lib/sanitizer_common/sanitizer_platform.h
index 0ce2307..d9a8e8d 100644
--- a/lib/sanitizer_common/sanitizer_platform.h
+++ b/lib/sanitizer_common/sanitizer_platform.h
@@ -168,7 +168,9 @@
 // For such platforms build this code with -DSANITIZER_CAN_USE_ALLOCATOR64=0 or
 // change the definition of SANITIZER_CAN_USE_ALLOCATOR64 here.
 #ifndef SANITIZER_CAN_USE_ALLOCATOR64
-# if defined(__mips64) || defined(__aarch64__)
+# if SANITIZER_ANDROID && defined(__aarch64__)
+#  define SANITIZER_CAN_USE_ALLOCATOR64 1
+# elif defined(__mips64) || defined(__aarch64__)
 #  define SANITIZER_CAN_USE_ALLOCATOR64 0
 # else
 #  define SANITIZER_CAN_USE_ALLOCATOR64 (SANITIZER_WORDSIZE == 64)
@@ -247,4 +249,8 @@
 #define SANITIZER_NLDBL_VERSION "GLIBC_2.4"
 #endif
 
+#if SANITIZER_GO == 0
+# define SANITIZER_GO 0
+#endif
+
 #endif // SANITIZER_PLATFORM_H
diff --git a/lib/sanitizer_common/sanitizer_platform_interceptors.h b/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 1e53dd1..c4f90ae 100644
--- a/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -235,6 +235,7 @@
 #define SANITIZER_INTERCEPT_PTHREAD_BARRIERATTR_GETPSHARED SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_TMPNAM SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_TMPNAM_R SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_TTYNAME_R SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_TEMPNAM SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_SINCOS SI_LINUX
 #define SANITIZER_INTERCEPT_REMQUO SI_NOT_WINDOWS
@@ -311,4 +312,8 @@
 #define SANITIZER_INTERCEPT___XSTAT64 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT___LXSTAT SANITIZER_INTERCEPT___XSTAT
 #define SANITIZER_INTERCEPT___LXSTAT64 SI_LINUX_NOT_ANDROID
+
+#define SANITIZER_INTERCEPT_UTMP SI_NOT_WINDOWS && !SI_MAC && !SI_FREEBSD
+#define SANITIZER_INTERCEPT_UTMPX SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD
+
 #endif  // #ifndef SANITIZER_PLATFORM_INTERCEPTORS_H
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_linux.cc b/lib/sanitizer_common/sanitizer_platform_limits_linux.cc
index 64dbb99..46e3b18 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_linux.cc
+++ b/lib/sanitizer_common/sanitizer_platform_limits_linux.cc
@@ -38,6 +38,7 @@
 #define uid_t __kernel_uid_t
 #define gid_t __kernel_gid_t
 #define off_t __kernel_off_t
+#define time_t __kernel_time_t
 // This header seems to contain the definitions of _kernel_ stat* structs.
 #include <asm/stat.h>
 #undef ino_t
@@ -64,7 +65,8 @@
 }  // namespace __sanitizer
 
 #if !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__aarch64__)\
-                            && !defined(__mips__) && !defined(__s390__)
+                            && !defined(__mips__) && !defined(__s390__)\
+                            && !defined(__sparc__)
 COMPILER_CHECK(struct___old_kernel_stat_sz == sizeof(struct __old_kernel_stat));
 #endif
 
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
index 10c6321..fbde5e1 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
+++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
@@ -51,6 +51,9 @@
 #include <termios.h>
 #include <time.h>
 #include <wchar.h>
+#if !SANITIZER_MAC && !SANITIZER_FREEBSD
+#include <utmp.h>
+#endif
 
 #if !SANITIZER_IOS
 #include <net/route.h>
@@ -59,6 +62,7 @@
 #if !SANITIZER_ANDROID
 #include <sys/mount.h>
 #include <sys/timeb.h>
+#include <utmpx.h>
 #endif
 
 #if SANITIZER_LINUX
@@ -284,6 +288,13 @@
   int shmctl_shm_stat = (int)SHM_STAT;
 #endif
 
+#if !SANITIZER_MAC && !SANITIZER_FREEBSD
+  unsigned struct_utmp_sz = sizeof(struct utmp);
+#endif
+#if !SANITIZER_ANDROID
+  unsigned struct_utmpx_sz = sizeof(struct utmpx);
+#endif
+
   int map_fixed = MAP_FIXED;
 
   int af_inet = (int)AF_INET;
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/lib/sanitizer_common/sanitizer_platform_limits_posix.h
index 14bc750..c2d9f2c 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -20,13 +20,17 @@
 
 #if SANITIZER_FREEBSD
 // FreeBSD's dlopen() returns a pointer to an Obj_Entry structure that
-// incroporates the map structure.
+// incorporates the map structure.
 # define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) \
     ((link_map*)((handle) == nullptr ? nullptr : ((char*)(handle) + 544)))
 #else
 # define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) ((link_map*)(handle))
 #endif  // !SANITIZER_FREEBSD
 
+#ifndef __GLIBC_PREREQ
+#define __GLIBC_PREREQ(x, y) 0
+#endif
+
 namespace __sanitizer {
   extern unsigned struct_utsname_sz;
   extern unsigned struct_stat_sz;
@@ -87,6 +91,14 @@
 #elif defined(__s390x__)
   const unsigned struct_kernel_stat_sz = 144;
   const unsigned struct_kernel_stat64_sz = 0;
+#elif defined(__sparc__) && defined(__arch64__)
+  const unsigned struct___old_kernel_stat_sz = 0;
+  const unsigned struct_kernel_stat_sz = 104;
+  const unsigned struct_kernel_stat64_sz = 144;
+#elif defined(__sparc__) && !defined(__arch64__)
+  const unsigned struct___old_kernel_stat_sz = 0;
+  const unsigned struct_kernel_stat_sz = 64;
+  const unsigned struct_kernel_stat64_sz = 104;
 #endif
   struct __sanitizer_perf_event_attr {
     unsigned type;
@@ -109,7 +121,7 @@
 
 #if defined(__powerpc64__) || defined(__s390__)
   const unsigned struct___old_kernel_stat_sz = 0;
-#else
+#elif !defined(__sparc__)
   const unsigned struct___old_kernel_stat_sz = 32;
 #endif
 
@@ -194,6 +206,18 @@
     unsigned __seq;
     u64 __unused1;
     u64 __unused2;
+#elif defined(__sparc__)
+#if defined(__arch64__)
+    unsigned mode;
+    unsigned short __pad1;
+#else
+    unsigned short __pad1;
+    unsigned short mode;
+    unsigned short __pad2;
+#endif
+    unsigned short __seq;
+    unsigned long long __unused1;
+    unsigned long long __unused2;
 #elif defined(__mips__) || defined(__aarch64__) || defined(__s390x__)
     unsigned int mode;
     unsigned short __seq;
@@ -217,6 +241,26 @@
 
   struct __sanitizer_shmid_ds {
     __sanitizer_ipc_perm shm_perm;
+  #if defined(__sparc__)
+  #if !defined(__arch64__)
+    u32 __pad1;
+  #endif
+    long shm_atime;
+  #if !defined(__arch64__)
+    u32 __pad2;
+  #endif
+    long shm_dtime;
+  #if !defined(__arch64__)
+    u32 __pad3;
+  #endif
+    long shm_ctime;
+    uptr shm_segsz;
+    int shm_cpid;
+    int shm_lpid;
+    unsigned long shm_nattch;
+    unsigned long __glibc_reserved1;
+    unsigned long __glibc_reserved2;
+  #else
   #ifndef __powerpc__
     uptr shm_segsz;
   #elif !defined(__powerpc64__)
@@ -254,6 +298,7 @@
     uptr __unused4;
     uptr __unused5;
   #endif
+#endif
   };
 #elif SANITIZER_FREEBSD
   struct __sanitizer_ipc_perm {
@@ -588,7 +633,21 @@
     __sanitizer_sigset_t sa_mask;
 #endif
 #ifndef __mips__
+#if defined(__sparc__)
+#if __GLIBC_PREREQ (2, 20)
+    // On sparc glibc 2.19 and earlier sa_flags was unsigned long.
+#if defined(__arch64__)
+    // To maintain ABI compatibility on sparc64 when switching to an int,
+    // __glibc_reserved0 was added.
+    int __glibc_reserved0;
+#endif
     int sa_flags;
+#else
+    unsigned long sa_flags;
+#endif
+#else
+    int sa_flags;
+#endif
 #endif
 #endif
 #if SANITIZER_LINUX
@@ -607,7 +666,7 @@
   typedef __sanitizer_sigset_t __sanitizer_kernel_sigset_t;
 #elif defined(__mips__)
   struct __sanitizer_kernel_sigset_t {
-    u8 sig[16];
+    uptr sig[2];
   };
 #else
   struct __sanitizer_kernel_sigset_t {
@@ -616,6 +675,17 @@
 #endif
 
   // Linux system headers define the 'sa_handler' and 'sa_sigaction' macros.
+#if SANITIZER_MIPS
+  struct __sanitizer_kernel_sigaction_t {
+    unsigned int sa_flags;
+    union {
+      void (*handler)(int signo);
+      void (*sigaction)(int signo, void *info, void *ctx);
+    };
+    __sanitizer_kernel_sigset_t sa_mask;
+    void (*sa_restorer)(void);
+  };
+#else
   struct __sanitizer_kernel_sigaction_t {
     union {
       void (*handler)(int signo);
@@ -625,6 +695,7 @@
     void (*sa_restorer)(void);
     __sanitizer_kernel_sigset_t sa_mask;
   };
+#endif
 
   extern uptr sig_ign;
   extern uptr sig_dfl;
@@ -794,6 +865,13 @@
   extern int shmctl_shm_stat;
 #endif
 
+#if !SANITIZER_MAC && !SANITIZER_FREEBSD
+  extern unsigned struct_utmp_sz;
+#endif
+#if !SANITIZER_ANDROID
+  extern unsigned struct_utmpx_sz;
+#endif
+
   extern int map_fixed;
 
   // ioctl arguments
@@ -839,7 +917,8 @@
 
 #define IOC_NRBITS 8
 #define IOC_TYPEBITS 8
-#if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__)
+#if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__) || \
+    defined(__sparc__)
 #define IOC_SIZEBITS 13
 #define IOC_DIRBITS 3
 #define IOC_NONE 1U
@@ -869,7 +948,16 @@
 #define IOC_DIR(nr) (((nr) >> IOC_DIRSHIFT) & IOC_DIRMASK)
 #define IOC_TYPE(nr) (((nr) >> IOC_TYPESHIFT) & IOC_TYPEMASK)
 #define IOC_NR(nr) (((nr) >> IOC_NRSHIFT) & IOC_NRMASK)
+
+#if defined(__sparc__)
+// In sparc the 14 bits SIZE field overlaps with the
+// least significant bit of DIR, so either IOC_READ or
+// IOC_WRITE shall be 1 in order to get a non-zero SIZE.
+#define IOC_SIZE(nr) \
+  ((((((nr) >> 29) & 0x7) & (4U | 2U)) == 0) ? 0 : (((nr) >> 16) & 0x3fff))
+#else
 #define IOC_SIZE(nr) (((nr) >> IOC_SIZESHIFT) & IOC_SIZEMASK)
+#endif
 
   extern unsigned struct_ifreq_sz;
   extern unsigned struct_termios_sz;
diff --git a/lib/sanitizer_common/sanitizer_posix_libcdep.cc b/lib/sanitizer_common/sanitizer_posix_libcdep.cc
index e6c5fcc..dd62140 100644
--- a/lib/sanitizer_common/sanitizer_posix_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_posix_libcdep.cc
@@ -56,8 +56,12 @@
   return (uptr)pthread_self();
 }
 
-void FlushUnneededShadowMemory(uptr addr, uptr size) {
-  madvise((void*)addr, size, MADV_DONTNEED);
+void ReleaseMemoryPagesToOS(uptr beg, uptr end) {
+  uptr page_size = GetPageSizeCached();
+  uptr beg_aligned = RoundUpTo(beg, page_size);
+  uptr end_aligned = RoundDownTo(end, page_size);
+  if (beg_aligned < end_aligned)
+    madvise((void*)beg_aligned, end_aligned - beg_aligned, MADV_DONTNEED);
 }
 
 void NoHugePagesInRegion(uptr addr, uptr size) {
@@ -128,6 +132,7 @@
 }
 
 void Abort() {
+#if !SANITIZER_GO
   // If we are handling SIGABRT, unhandle it first.
   if (IsHandledDeadlySignal(SIGABRT)) {
     struct sigaction sigact;
@@ -135,12 +140,13 @@
     sigact.sa_sigaction = (sa_sigaction_t)SIG_DFL;
     internal_sigaction(SIGABRT, &sigact, nullptr);
   }
+#endif
 
   abort();
 }
 
 int Atexit(void (*function)(void)) {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   return atexit(function);
 #else
   return 0;
@@ -151,7 +157,7 @@
   return isatty(fd) != 0;
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 // TODO(glider): different tools may require different altstack size.
 static const uptr kAltStackSize = SIGSTKSZ * 4;  // SIGSTKSZ is not enough.
 
diff --git a/lib/sanitizer_common/sanitizer_printf.cc b/lib/sanitizer_common/sanitizer_printf.cc
index d44f709..c8317be 100644
--- a/lib/sanitizer_common/sanitizer_printf.cc
+++ b/lib/sanitizer_common/sanitizer_printf.cc
@@ -219,7 +219,7 @@
 void OnPrint(const char *str) {
   (void)str;
 }
-#elif defined(SANITIZER_GO) && defined(TSAN_EXTERNAL_HOOKS)
+#elif SANITIZER_GO && defined(TSAN_EXTERNAL_HOOKS)
 void OnPrint(const char *str);
 #else
 void OnPrint(const char *str) {
diff --git a/lib/sanitizer_common/sanitizer_procmaps_mac.cc b/lib/sanitizer_common/sanitizer_procmaps_mac.cc
index 638f543..2831f28 100644
--- a/lib/sanitizer_common/sanitizer_procmaps_mac.cc
+++ b/lib/sanitizer_common/sanitizer_procmaps_mac.cc
@@ -19,6 +19,20 @@
 #include <mach-o/dyld.h>
 #include <mach-o/loader.h>
 
+// These are not available in older macOS SDKs.
+#ifndef CPU_SUBTYPE_X86_64_H
+#define CPU_SUBTYPE_X86_64_H  ((cpu_subtype_t)8)   /* Haswell */
+#endif
+#ifndef CPU_SUBTYPE_ARM_V7S
+#define CPU_SUBTYPE_ARM_V7S   ((cpu_subtype_t)11)  /* Swift */
+#endif
+#ifndef CPU_SUBTYPE_ARM_V7K
+#define CPU_SUBTYPE_ARM_V7K   ((cpu_subtype_t)12)
+#endif
+#ifndef CPU_TYPE_ARM64
+#define CPU_TYPE_ARM64        (CPU_TYPE_ARM | CPU_ARCH_ABI64)
+#endif
+
 namespace __sanitizer {
 
 MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
diff --git a/lib/sanitizer_common/sanitizer_quarantine.h b/lib/sanitizer_common/sanitizer_quarantine.h
index ccc22bf..1a0d954 100644
--- a/lib/sanitizer_common/sanitizer_quarantine.h
+++ b/lib/sanitizer_common/sanitizer_quarantine.h
@@ -49,17 +49,31 @@
   }
 
   void Init(uptr size, uptr cache_size) {
-    atomic_store(&max_size_, size, memory_order_release);
+    // Thread local quarantine size can be zero only when global quarantine size
+    // is zero (it allows us to perform just one atomic read per Put() call).
+    CHECK((size == 0 && cache_size == 0) || cache_size != 0);
+
+    atomic_store(&max_size_, size, memory_order_relaxed);
     atomic_store(&min_size_, size / 10 * 9,
-                 memory_order_release); // 90% of max size.
-    max_cache_size_ = cache_size;
+                 memory_order_relaxed);  // 90% of max size.
+    atomic_store(&max_cache_size_, cache_size, memory_order_relaxed);
   }
 
-  uptr GetSize() const { return atomic_load(&max_size_, memory_order_acquire); }
+  uptr GetSize() const { return atomic_load(&max_size_, memory_order_relaxed); }
+  uptr GetCacheSize() const {
+    return atomic_load(&max_cache_size_, memory_order_relaxed);
+  }
 
   void Put(Cache *c, Callback cb, Node *ptr, uptr size) {
-    c->Enqueue(cb, ptr, size);
-    if (c->Size() > max_cache_size_)
+    uptr cache_size = GetCacheSize();
+    if (cache_size) {
+      c->Enqueue(cb, ptr, size);
+    } else {
+      // cache_size == 0 only when size == 0 (see Init).
+      cb.Recycle(ptr);
+    }
+    // Check cache size anyway to accommodate for runtime cache_size change.
+    if (c->Size() > cache_size)
       Drain(c, cb);
   }
 
@@ -72,12 +86,17 @@
       Recycle(cb);
   }
 
+  void PrintStats() const {
+    // It assumes that the world is stopped, just as the allocator's PrintStats.
+    cache_.PrintStats();
+  }
+
  private:
   // Read-only data.
   char pad0_[kCacheLineSize];
   atomic_uintptr_t max_size_;
   atomic_uintptr_t min_size_;
-  uptr max_cache_size_;
+  atomic_uintptr_t max_cache_size_;
   char pad1_[kCacheLineSize];
   SpinMutex cache_mutex_;
   SpinMutex recycle_mutex_;
@@ -86,7 +105,7 @@
 
   void NOINLINE Recycle(Callback cb) {
     Cache tmp;
-    uptr min_size = atomic_load(&min_size_, memory_order_acquire);
+    uptr min_size = atomic_load(&min_size_, memory_order_relaxed);
     {
       SpinMutexLock l(&cache_mutex_);
       while (cache_.Size() > min_size) {
@@ -162,8 +181,25 @@
     return b;
   }
 
+  void PrintStats() const {
+    uptr batch_count = 0;
+    uptr total_quarantine_bytes = 0;
+    uptr total_quarantine_chunks = 0;
+    for (List::ConstIterator it = list_.begin(); it != list_.end(); ++it) {
+      batch_count++;
+      total_quarantine_bytes += (*it).size;
+      total_quarantine_chunks += (*it).count;
+    }
+    Printf("Global quarantine stats: batches: %zd; bytes: %zd; chunks: %zd "
+           "(capacity: %zd chunks)\n",
+           batch_count, total_quarantine_bytes, total_quarantine_chunks,
+           batch_count * QuarantineBatch::kSize);
+  }
+
  private:
-  IntrusiveList<QuarantineBatch> list_;
+  typedef IntrusiveList<QuarantineBatch> List;
+
+  List list_;
   atomic_uintptr_t size_;
 
   void SizeAdd(uptr add) {
@@ -182,6 +218,7 @@
     return b;
   }
 };
+
 } // namespace __sanitizer
 
 #endif // SANITIZER_QUARANTINE_H
diff --git a/lib/sanitizer_common/sanitizer_stackdepot.cc b/lib/sanitizer_common/sanitizer_stackdepot.cc
index 985193d..214dda5 100644
--- a/lib/sanitizer_common/sanitizer_stackdepot.cc
+++ b/lib/sanitizer_common/sanitizer_stackdepot.cc
@@ -153,9 +153,9 @@
   if (!map_.size())
     return StackTrace();
   IdDescPair pair = {id, nullptr};
-  uptr idx = InternalBinarySearch(map_, 0, map_.size(), pair,
-                                  IdDescPair::IdComparator);
-  if (idx > map_.size())
+  uptr idx =
+      InternalLowerBound(map_, 0, map_.size(), pair, IdDescPair::IdComparator);
+  if (idx > map_.size() || map_[idx].id != id)
     return StackTrace();
   return map_[idx].desc->load();
 }
diff --git a/lib/sanitizer_common/sanitizer_stacktrace.cc b/lib/sanitizer_common/sanitizer_stacktrace.cc
index 7ad1f1f..2741dde 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace.cc
+++ b/lib/sanitizer_common/sanitizer_stacktrace.cc
@@ -106,10 +106,6 @@
   }
 }
 
-static bool MatchPc(uptr cur_pc, uptr trace_pc, uptr threshold) {
-  return cur_pc - trace_pc <= threshold || trace_pc - cur_pc <= threshold;
-}
-
 void BufferedStackTrace::PopStackFrames(uptr count) {
   CHECK_LT(count, size);
   size -= count;
@@ -118,15 +114,14 @@
   }
 }
 
+static uptr Distance(uptr a, uptr b) { return a < b ? b - a : a - b; }
+
 uptr BufferedStackTrace::LocatePcInTrace(uptr pc) {
-  // Use threshold to find PC in stack trace, as PC we want to unwind from may
-  // slightly differ from return address in the actual unwinded stack trace.
-  const int kPcThreshold = 350;
-  for (uptr i = 0; i < size; ++i) {
-    if (MatchPc(pc, trace[i], kPcThreshold))
-      return i;
+  uptr best = 0;
+  for (uptr i = 1; i < size; ++i) {
+    if (Distance(trace[i], pc) < Distance(trace[best], pc)) best = i;
   }
-  return 0;
+  return best;
 }
 
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cc b/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cc
index ddf537a..36c98d0 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cc
@@ -82,6 +82,21 @@
   }
 }
 
+static int GetModuleAndOffsetForPc(uptr pc, char *module_name,
+                                   uptr module_name_len, uptr *pc_offset) {
+  const char *found_module_name = nullptr;
+  bool ok = Symbolizer::GetOrInit()->GetModuleNameAndOffsetForPC(
+      pc, &found_module_name, pc_offset);
+
+  if (!ok) return false;
+
+  if (module_name && module_name_len) {
+    internal_strncpy(module_name, found_module_name, module_name_len);
+    module_name[module_name_len - 1] = '\x00';
+  }
+  return true;
+}
+
 }  // namespace __sanitizer
 using namespace __sanitizer;
 
@@ -117,4 +132,11 @@
   internal_strncpy(out_buf, data_desc.data(), out_buf_size);
   out_buf[out_buf_size - 1] = 0;
 }
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __sanitizer_get_module_and_offset_for_pc( // NOLINT
+    uptr pc, char *module_name, uptr module_name_len, uptr *pc_offset) {
+  return __sanitizer::GetModuleAndOffsetForPc(pc, module_name, module_name_len,
+                                              pc_offset);
+}
 }  // extern "C"
diff --git a/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
index 1ce232b..eb4c403 100644
--- a/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
@@ -190,6 +190,7 @@
 bool ThreadSuspender::SuspendAllThreads() {
   ThreadLister thread_lister(pid_);
   bool added_threads;
+  bool first_iteration = true;
   do {
     // Run through the directory entries once.
     added_threads = false;
@@ -199,12 +200,13 @@
         added_threads = true;
       tid = thread_lister.GetNextTID();
     }
-    if (thread_lister.error()) {
+    if (thread_lister.error() || (first_iteration && !added_threads)) {
       // Detach threads and fail.
       ResumeAllThreads();
       return false;
     }
     thread_lister.Reset();
+    first_iteration = false;
   } while (added_threads);
   return true;
 }
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc b/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc
index 150bc1c..7c377a7 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc
@@ -252,25 +252,21 @@
   char *file_line_info = 0;
   str = ExtractToken(str, "\n", &file_line_info);
   CHECK(file_line_info);
-  // Parse the last :<int>, which must be there.
-  char *last_colon = internal_strrchr(file_line_info, ':');
-  CHECK(last_colon);
-  int line_or_column = internal_atoll(last_colon + 1);
-  // Truncate the string at the last colon and find the next-to-last colon.
-  *last_colon = '\0';
-  last_colon = internal_strrchr(file_line_info, ':');
-  if (last_colon && IsDigit(last_colon[1])) {
-    // If the second-to-last colon is followed by a digit, it must be the line
-    // number, and the previous parsed number was a column.
-    info->line = internal_atoll(last_colon + 1);
-    info->column = line_or_column;
-    *last_colon = '\0';
-  } else {
-    // Otherwise, we have line info but no column info.
-    info->line = line_or_column;
-    info->column = 0;
+
+  if (uptr size = internal_strlen(file_line_info)) {
+    char *back = file_line_info + size - 1;
+    for (int i = 0; i < 2; ++i) {
+      while (back > file_line_info && IsDigit(*back)) --back;
+      if (*back != ':' || !IsDigit(back[1])) break;
+      info->column = info->line;
+      info->line = internal_atoll(back + 1);
+      // Truncate the string at the colon to keep only filename.
+      *back = '\0';
+      --back;
+    }
+    ExtractToken(file_line_info, "", &info->file);
   }
-  ExtractToken(file_line_info, "", &info->file);
+
   InternalFree(file_line_info);
   return str;
 }
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc b/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc
index 2d58b34..f50d8b1 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc
@@ -55,7 +55,7 @@
   // own demangler (libc++abi's implementation could be adapted so that
   // it does not allocate). For now, we just call it anyway, and we leak
   // the returned value.
-  if (__cxxabiv1::__cxa_demangle)
+  if (&__cxxabiv1::__cxa_demangle)
     if (const char *demangled_name =
           __cxxabiv1::__cxa_demangle(name, 0, 0, 0))
       return demangled_name;
@@ -496,7 +496,9 @@
     VReport(2, "Symbolizer is disabled.\n");
     return;
   }
-  if (SymbolizerTool *tool = InternalSymbolizer::get(allocator)) {
+  if (IsReportingOOM()) {
+    VReport(2, "Cannot use internal symbolizer: out of memory\n");
+  } else if (SymbolizerTool *tool = InternalSymbolizer::get(allocator)) {
     VReport(2, "Using internal symbolizer.\n");
     list->push_back(tool);
     return;
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_win.cc b/lib/sanitizer_common/sanitizer_symbolizer_win.cc
index 3cb7e48..135823b 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_win.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_win.cc
@@ -14,15 +14,24 @@
 
 #include "sanitizer_platform.h"
 #if SANITIZER_WINDOWS
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <dbghelp.h>
-#pragma comment(lib, "dbghelp.lib")
 
+#include "sanitizer_dbghelp.h"
 #include "sanitizer_symbolizer_internal.h"
 
 namespace __sanitizer {
 
+decltype(::StackWalk64) *StackWalk64;
+decltype(::SymCleanup) *SymCleanup;
+decltype(::SymFromAddr) *SymFromAddr;
+decltype(::SymFunctionTableAccess64) *SymFunctionTableAccess64;
+decltype(::SymGetLineFromAddr64) *SymGetLineFromAddr64;
+decltype(::SymGetModuleBase64) *SymGetModuleBase64;
+decltype(::SymGetSearchPathW) *SymGetSearchPathW;
+decltype(::SymInitialize) *SymInitialize;
+decltype(::SymSetOptions) *SymSetOptions;
+decltype(::SymSetSearchPathW) *SymSetSearchPathW;
+decltype(::UnDecorateSymbolName) *UnDecorateSymbolName;
+
 namespace {
 
 class WinSymbolizerTool : public SymbolizerTool {
@@ -50,6 +59,29 @@
 void InitializeDbgHelpIfNeeded() {
   if (is_dbghelp_initialized)
     return;
+
+  HMODULE dbghelp = LoadLibraryA("dbghelp.dll");
+  CHECK(dbghelp && "failed to load dbghelp.dll");
+
+#define DBGHELP_IMPORT(name)                                                  \
+  do {                                                                        \
+    name =                                                                    \
+        reinterpret_cast<decltype(::name) *>(GetProcAddress(dbghelp, #name)); \
+    CHECK(name != nullptr);                                                   \
+  } while (0)
+  DBGHELP_IMPORT(StackWalk64);
+  DBGHELP_IMPORT(SymCleanup);
+  DBGHELP_IMPORT(SymFromAddr);
+  DBGHELP_IMPORT(SymFunctionTableAccess64);
+  DBGHELP_IMPORT(SymGetLineFromAddr64);
+  DBGHELP_IMPORT(SymGetModuleBase64);
+  DBGHELP_IMPORT(SymGetSearchPathW);
+  DBGHELP_IMPORT(SymInitialize);
+  DBGHELP_IMPORT(SymSetOptions);
+  DBGHELP_IMPORT(SymSetSearchPathW);
+  DBGHELP_IMPORT(UnDecorateSymbolName);
+#undef DBGHELP_IMPORT
+
   if (!TrySymInitialize()) {
     // OK, maybe the client app has called SymInitialize already.
     // That's a bit unfortunate for us as all the DbgHelp functions are
diff --git a/lib/sanitizer_common/sanitizer_thread_registry.cc b/lib/sanitizer_common/sanitizer_thread_registry.cc
index 6e7ddfa..c2b75e6 100644
--- a/lib/sanitizer_common/sanitizer_thread_registry.cc
+++ b/lib/sanitizer_common/sanitizer_thread_registry.cc
@@ -131,7 +131,7 @@
     tctx = context_factory_(tid);
     threads_[tid] = tctx;
   } else {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
     Report("%s: Thread limit (%u threads) exceeded. Dying.\n",
            SanitizerToolName, max_threads_);
 #else
diff --git a/lib/sanitizer_common/sanitizer_win.cc b/lib/sanitizer_common/sanitizer_win.cc
index e804044..9682d29 100644
--- a/lib/sanitizer_common/sanitizer_win.cc
+++ b/lib/sanitizer_common/sanitizer_win.cc
@@ -18,12 +18,12 @@
 #define WIN32_LEAN_AND_MEAN
 #define NOGDI
 #include <windows.h>
-#include <dbghelp.h>
 #include <io.h>
 #include <psapi.h>
 #include <stdlib.h>
 
 #include "sanitizer_common.h"
+#include "sanitizer_dbghelp.h"
 #include "sanitizer_libc.h"
 #include "sanitizer_mutex.h"
 #include "sanitizer_placement_new.h"
@@ -31,6 +31,21 @@
 #include "sanitizer_stacktrace.h"
 #include "sanitizer_symbolizer.h"
 
+// A macro to tell the compiler that this part of the code cannot be reached,
+// if the compiler supports this feature. Since we're using this in
+// code that is called when terminating the process, the expansion of the
+// macro should not terminate the process to avoid infinite recursion.
+#if defined(__clang__)
+# define BUILTIN_UNREACHABLE() __builtin_unreachable()
+#elif defined(__GNUC__) && \
+    (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
+# define BUILTIN_UNREACHABLE() __builtin_unreachable()
+#elif defined(_MSC_VER)
+# define BUILTIN_UNREACHABLE() __assume(0)
+#else
+# define BUILTIN_UNREACHABLE()
+#endif
+
 namespace __sanitizer {
 
 #include "sanitizer_syscall_generic.inc"
@@ -234,14 +249,13 @@
   return VirtualProtect((LPVOID)addr, size, PAGE_NOACCESS, &old_protection);
 }
 
-
-void FlushUnneededShadowMemory(uptr addr, uptr size) {
+void ReleaseMemoryPagesToOS(uptr beg, uptr end) {
   // This is almost useless on 32-bits.
   // FIXME: add madvise-analog when we move to 64-bits.
 }
 
 void NoHugePagesInRegion(uptr addr, uptr size) {
-  // FIXME: probably similar to FlushUnneededShadowMemory.
+  // FIXME: probably similar to ReleaseMemoryToOS.
 }
 
 void DontDumpShadowMemory(uptr addr, uptr length) {
@@ -334,7 +348,7 @@
   uptr end_address;
 };
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 int CompareModulesBase(const void *pl, const void *pr) {
   const ModuleInfo *l = (ModuleInfo *)pl, *r = (ModuleInfo *)pr;
   if (l->base_address < r->base_address)
@@ -344,7 +358,7 @@
 #endif
 }  // namespace
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 void DumpProcessMap() {
   Report("Dumping process modules:\n");
   ListOfModules modules;
@@ -427,12 +441,10 @@
 }
 
 void Abort() {
-  if (::IsDebuggerPresent())
-    __debugbreak();
   internal__exit(3);
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 // Read the file to extract the ImageBase field from the PE header. If ASLR is
 // disabled and this virtual address is available, the loader will typically
 // load the image at this address. Therefore, we call it the preferred base. Any
@@ -658,7 +670,13 @@
 }
 
 void internal__exit(int exitcode) {
-  ExitProcess(exitcode);
+  // ExitProcess runs some finalizers, so use TerminateProcess to avoid that.
+  // The debugger doesn't stop on TerminateProcess like it does on ExitProcess,
+  // so add our own breakpoint here.
+  if (::IsDebuggerPresent())
+    __debugbreak();
+  TerminateProcess(GetCurrentProcess(), exitcode);
+  BUILTIN_UNREACHABLE();
 }
 
 uptr internal_ftruncate(fd_t fd, uptr size) {
@@ -725,7 +743,7 @@
 
 void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
                           uptr *tls_addr, uptr *tls_size) {
-#ifdef SANITIZER_GO
+#if SANITIZER_GO
   *stk_addr = 0;
   *stk_size = 0;
   *tls_addr = 0;
@@ -780,8 +798,8 @@
   stack_frame.AddrFrame.Mode = AddrModeFlat;
   stack_frame.AddrStack.Mode = AddrModeFlat;
   while (StackWalk64(machine_type, GetCurrentProcess(), GetCurrentThread(),
-                     &stack_frame, &ctx, NULL, &SymFunctionTableAccess64,
-                     &SymGetModuleBase64, NULL) &&
+                     &stack_frame, &ctx, NULL, SymFunctionTableAccess64,
+                     SymGetModuleBase64, NULL) &&
          size < Min(max_depth, kStackTraceMax)) {
     trace_buffer[size++] = (uptr)stack_frame.AddrPC.Offset;
   }
@@ -921,4 +939,18 @@
 
 }  // namespace __sanitizer
 
+#if !SANITIZER_GO
+// Workaround to implement weak hooks on Windows. COFF doesn't directly support
+// weak symbols, but it does support /alternatename, which is similar. If the
+// user does not override the hook, we will use this default definition instead
+// of null.
+extern "C" void __sanitizer_print_memory_profile(int top_percent) {}
+
+#ifdef _WIN64
+#pragma comment(linker, "/alternatename:__sanitizer_print_memory_profile=__sanitizer_default_print_memory_profile") // NOLINT
+#else
+#pragma comment(linker, "/alternatename:___sanitizer_print_memory_profile=___sanitizer_default_print_memory_profile") // NOLINT
+#endif
+#endif
+
 #endif  // _WIN32
diff --git a/lib/sanitizer_common/scripts/gen_dynamic_list.py b/lib/sanitizer_common/scripts/gen_dynamic_list.py
index 69f26f4..1d42306 100755
--- a/lib/sanitizer_common/scripts/gen_dynamic_list.py
+++ b/lib/sanitizer_common/scripts/gen_dynamic_list.py
@@ -19,6 +19,7 @@
 import re
 import subprocess
 import sys
+import platform
 
 new_delete = set([
                   '_Znam', '_ZnamRKSt9nothrow_t',    # operator new[](unsigned long)
@@ -50,7 +51,7 @@
     raise subprocess.CalledProcessError(nm_proc.returncode, nm)
   func_symbols = ['T', 'W']
   # On PowerPC, nm prints function descriptors from .data section.
-  if os.uname()[4] in ["powerpc", "ppc64"]:
+  if platform.uname()[4] in ["powerpc", "ppc64"]:
     func_symbols += ['D']
   for line in nm_out:
     cols = line.split(' ')
diff --git a/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cc b/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cc
new file mode 100644
index 0000000..bd315a0
--- /dev/null
+++ b/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cc
@@ -0,0 +1,72 @@
+//===-- sanitizer_symbolize.cc ----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of weak hooks from sanitizer_symbolizer_posix_libcdep.cc.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdio.h>
+#include <string>
+
+#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+
+static llvm::symbolize::LLVMSymbolizer *getDefaultSymbolizer() {
+  static llvm::symbolize::LLVMSymbolizer DefaultSymbolizer;
+  return &DefaultSymbolizer;
+}
+
+namespace __sanitizer {
+int internal_snprintf(char *buffer, unsigned long length, const char *format,
+                      ...);
+}  // namespace __sanitizer
+
+extern "C" {
+
+typedef uint64_t u64;
+
+bool __sanitizer_symbolize_code(const char *ModuleName, uint64_t ModuleOffset,
+                                char *Buffer, int MaxLength) {
+  std::string Result;
+  {
+    llvm::raw_string_ostream OS(Result);
+    llvm::symbolize::DIPrinter Printer(OS);
+    auto ResOrErr =
+        getDefaultSymbolizer()->symbolizeInlinedCode(ModuleName, ModuleOffset);
+    Printer << (ResOrErr ? ResOrErr.get() : llvm::DIInliningInfo());
+  }
+  __sanitizer::internal_snprintf(Buffer, MaxLength, "%s", Result.c_str());
+  return true;
+}
+
+bool __sanitizer_symbolize_data(const char *ModuleName, uint64_t ModuleOffset,
+                                char *Buffer, int MaxLength) {
+  std::string Result;
+  {
+    llvm::raw_string_ostream OS(Result);
+    llvm::symbolize::DIPrinter Printer(OS);
+    auto ResOrErr =
+        getDefaultSymbolizer()->symbolizeData(ModuleName, ModuleOffset);
+    Printer << (ResOrErr ? ResOrErr.get() : llvm::DIGlobal());
+  }
+  __sanitizer::internal_snprintf(Buffer, MaxLength, "%s", Result.c_str());
+  return true;
+}
+
+void __sanitizer_symbolize_flush() { getDefaultSymbolizer()->flush(); }
+
+int __sanitizer_symbolize_demangle(const char *Name, char *Buffer,
+                                   int MaxLength) {
+  std::string Result =
+      llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr);
+  __sanitizer::internal_snprintf(Buffer, MaxLength, "%s", Result.c_str());
+  return static_cast<int>(Result.size() + 1);
+}
+
+}  // extern "C"
diff --git a/lib/sanitizer_common/symbolizer/sanitizer_wrappers.cc b/lib/sanitizer_common/symbolizer/sanitizer_wrappers.cc
new file mode 100644
index 0000000..0a796d9
--- /dev/null
+++ b/lib/sanitizer_common/symbolizer/sanitizer_wrappers.cc
@@ -0,0 +1,175 @@
+//===-- sanitizer_wrappers.cc -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Redirect some functions to sanitizer interceptors.
+//
+//===----------------------------------------------------------------------===//
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <tuple>
+
+// Need to match ../sanitizer_common/sanitizer_internal_defs.h
+#if defined(ARCH_PPC)
+#define OFF_T unsigned long
+#else
+#define OFF_T unsigned long long
+#endif
+
+namespace __sanitizer {
+unsigned long internal_open(const char *filename, int flags);
+unsigned long internal_open(const char *filename, int flags, unsigned mode);
+unsigned long internal_close(int fd);
+unsigned long internal_stat(const char *path, void *buf);
+unsigned long internal_lstat(const char *path, void *buf);
+unsigned long internal_fstat(int fd, void *buf);
+size_t internal_strlen(const char *s);
+unsigned long internal_mmap(void *addr, unsigned long length, int prot,
+                            int flags, int fd, OFF_T offset);
+void *internal_memcpy(void *dest, const void *src, unsigned long n);
+// Used to propagate errno.
+bool internal_iserror(unsigned long retval, int *rverrno = 0);
+}  // namespace __sanitizer
+
+namespace {
+
+template <typename T>
+struct GetTypes;
+
+template <typename R, typename... Args>
+struct GetTypes<R(Args...)> {
+  using Result = R;
+  template <size_t i>
+  struct Arg {
+    using Type = typename std::tuple_element<i, std::tuple<Args...>>::type;
+  };
+};
+
+#define LLVM_SYMBOLIZER_GET_FUNC(Function) \
+  ((__interceptor_##Function)              \
+       ? (__interceptor_##Function)        \
+       : reinterpret_cast<decltype(&Function)>(dlsym(RTLD_NEXT, #Function)))
+
+#define LLVM_SYMBOLIZER_INTERCEPTOR1(Function, ...)               \
+  GetTypes<__VA_ARGS__>::Result __interceptor_##Function(         \
+      GetTypes<__VA_ARGS__>::Arg<0>::Type) __attribute__((weak)); \
+  GetTypes<__VA_ARGS__>::Result Function(                         \
+      GetTypes<__VA_ARGS__>::Arg<0>::Type arg0) {                 \
+    return LLVM_SYMBOLIZER_GET_FUNC(Function)(arg0);              \
+  }
+
+#define LLVM_SYMBOLIZER_INTERCEPTOR2(Function, ...)               \
+  GetTypes<__VA_ARGS__>::Result __interceptor_##Function(         \
+      GetTypes<__VA_ARGS__>::Arg<0>::Type,                        \
+      GetTypes<__VA_ARGS__>::Arg<1>::Type) __attribute__((weak)); \
+  GetTypes<__VA_ARGS__>::Result Function(                         \
+      GetTypes<__VA_ARGS__>::Arg<0>::Type arg0,                   \
+      GetTypes<__VA_ARGS__>::Arg<1>::Type arg1) {                 \
+    return LLVM_SYMBOLIZER_GET_FUNC(Function)(arg0, arg1);        \
+  }
+
+#define LLVM_SYMBOLIZER_INTERCEPTOR3(Function, ...)               \
+  GetTypes<__VA_ARGS__>::Result __interceptor_##Function(         \
+      GetTypes<__VA_ARGS__>::Arg<0>::Type,                        \
+      GetTypes<__VA_ARGS__>::Arg<1>::Type,                        \
+      GetTypes<__VA_ARGS__>::Arg<2>::Type) __attribute__((weak)); \
+  GetTypes<__VA_ARGS__>::Result Function(                         \
+      GetTypes<__VA_ARGS__>::Arg<0>::Type arg0,                   \
+      GetTypes<__VA_ARGS__>::Arg<1>::Type arg1,                   \
+      GetTypes<__VA_ARGS__>::Arg<2>::Type arg2) {                 \
+    return LLVM_SYMBOLIZER_GET_FUNC(Function)(arg0, arg1, arg2);  \
+  }
+
+#define LLVM_SYMBOLIZER_INTERCEPTOR4(Function, ...)                    \
+  GetTypes<__VA_ARGS__>::Result __interceptor_##Function(              \
+      GetTypes<__VA_ARGS__>::Arg<0>::Type,                             \
+      GetTypes<__VA_ARGS__>::Arg<1>::Type,                             \
+      GetTypes<__VA_ARGS__>::Arg<2>::Type,                             \
+      GetTypes<__VA_ARGS__>::Arg<3>::Type) __attribute__((weak));      \
+  GetTypes<__VA_ARGS__>::Result Function(                              \
+      GetTypes<__VA_ARGS__>::Arg<0>::Type arg0,                        \
+      GetTypes<__VA_ARGS__>::Arg<1>::Type arg1,                        \
+      GetTypes<__VA_ARGS__>::Arg<2>::Type arg2,                        \
+      GetTypes<__VA_ARGS__>::Arg<3>::Type arg3) {                      \
+    return LLVM_SYMBOLIZER_GET_FUNC(Function)(arg0, arg1, arg2, arg3); \
+  }
+
+}  // namespace
+
+// C-style interface around internal sanitizer libc functions.
+extern "C" {
+
+#define RETURN_OR_SET_ERRNO(T, res)                   \
+  int rverrno;                                        \
+  if (__sanitizer::internal_iserror(res, &rverrno)) { \
+    errno = rverrno;                                  \
+    return (T)-1;                                     \
+  }                                                   \
+  return (T)res;
+
+int open(const char *filename, int flags, ...) {
+  unsigned long res;
+  if (flags | O_CREAT) {
+    va_list va;
+    va_start(va, flags);
+    unsigned mode = va_arg(va, unsigned);
+    va_end(va);
+    res = __sanitizer::internal_open(filename, flags, mode);
+  } else {
+    res = __sanitizer::internal_open(filename, flags);
+  }
+  RETURN_OR_SET_ERRNO(int, res);
+}
+
+int close(int fd) {
+  unsigned long res = __sanitizer::internal_close(fd);
+  RETURN_OR_SET_ERRNO(int, res);
+}
+
+#define STAT(func, arg, buf)                                  \
+  unsigned long res = __sanitizer::internal_##func(arg, buf); \
+  RETURN_OR_SET_ERRNO(int, res);
+
+int stat(const char *path, struct stat *buf) { STAT(stat, path, buf); }
+
+int lstat(const char *path, struct stat *buf) { STAT(lstat, path, buf); }
+
+int fstat(int fd, struct stat *buf) { STAT(fstat, fd, buf); }
+
+// Redirect versioned stat functions to the __sanitizer::internal() as well.
+int __xstat(int version, const char *path, struct stat *buf) {
+  STAT(stat, path, buf);
+}
+
+int __lxstat(int version, const char *path, struct stat *buf) {
+  STAT(lstat, path, buf);
+}
+
+int __fxstat(int version, int fd, struct stat *buf) { STAT(fstat, fd, buf); }
+
+size_t strlen(const char *s) { return __sanitizer::internal_strlen(s); }
+
+void *mmap(void *addr, size_t length, int prot, int flags, int fd,
+           off_t offset) {
+  unsigned long res = __sanitizer::internal_mmap(
+      addr, (unsigned long)length, prot, flags, fd, (unsigned long long)offset);
+  RETURN_OR_SET_ERRNO(void *, res);
+}
+
+LLVM_SYMBOLIZER_INTERCEPTOR3(read, ssize_t(int, void *, size_t))
+LLVM_SYMBOLIZER_INTERCEPTOR4(pread, ssize_t(int, void *, size_t, off_t))
+LLVM_SYMBOLIZER_INTERCEPTOR4(pread64, ssize_t(int, void *, size_t, off64_t))
+LLVM_SYMBOLIZER_INTERCEPTOR2(realpath, char *(const char *, char *))
+
+}  // extern "C"
diff --git a/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh b/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
new file mode 100755
index 0000000..07239eb
--- /dev/null
+++ b/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
@@ -0,0 +1,187 @@
+#!/bin/bash -eu
+#
+# Run as: CLANG=bin/clang ZLIB_SRC=src/zlib \
+#             build_symbolizer.sh runtime_build/lib/clang/4.0.0/lib/linux/
+# zlib can be downloaded from from http://www.zlib.net.
+#
+# Script compiles self-contained object file with symbolization code and injects
+# it into the given set of runtime libraries. Script updates only libraries
+# which has unresolved __sanitizer_symbolize_* symbols and matches architecture.
+# Object file is be compiled from LLVM sources with dependencies like libc++ and
+# zlib. Then it internalizes symbols in the file, so that it can be linked
+# into arbitrary programs, avoiding conflicts with the program own symbols and
+# avoiding dependencies on any program symbols. The only acceptable dependencies
+# are libc and __sanitizer::internal_* from sanitizer runtime.
+#
+# Symbols exported by the object file will be used by Sanitizer runtime
+# libraries to symbolize code/data in-process.
+#
+# The script will modify the output directory which is given as the first
+# argument to the script.
+#
+# FIXME: We should really be using a simpler approach to building this object
+# file, and it should be available as a regular cmake rule. Conceptually, we
+# want to be doing "ld -r" followed by "objcopy -G" to create a relocatable
+# object file with only our entry points exposed. However, this does not work at
+# present, see PR30750.
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+SRC_DIR=$(readlink -f $SCRIPT_DIR/..)
+TARGE_DIR=$(readlink -f $1)
+
+LLVM_SRC="${LLVM_SRC:-$SCRIPT_DIR/../../../../../..}"
+LLVM_SRC=$(readlink -f $LLVM_SRC)
+
+if [[ ! -d "${LLVM_SRC}/projects/libcxxabi" ||
+      ! -d "${LLVM_SRC}/projects/libcxx" ]]; then
+  echo "Missing or incomplete LLVM_SRC"
+  exit 1
+fi
+
+if [[ "$ZLIB_SRC" == ""  ||
+      ! -x "${ZLIB_SRC}/configure" ||
+      ! -f "${ZLIB_SRC}/zlib.h" ]]; then
+  echo "Missing or incomplete ZLIB_SRC"
+  exit 1
+fi
+ZLIB_SRC=$(readlink -f $ZLIB_SRC)
+
+J="${J:-50}"
+
+CLANG="${CLANG:-`which clang`}"
+CLANG_DIR=$(readlink -f $(dirname "$CLANG"))
+
+BUILD_DIR=$(readlink -f ./symbolizer)
+mkdir -p $BUILD_DIR
+cd $BUILD_DIR
+
+CC=$CLANG_DIR/clang
+CXX=$CLANG_DIR/clang++
+TBLGEN=$CLANG_DIR/llvm-tblgen
+LINK=$CLANG_DIR/llvm-link
+OPT=$CLANG_DIR/opt
+AR=$CLANG_DIR/llvm-ar
+
+for F in $CC $CXX $TBLGEN $LINK $OPT $AR; do
+  if [[ ! -x "$F" ]]; then
+    echo "Missing $F"
+     exit 1
+  fi
+done
+
+ZLIB_BUILD=${BUILD_DIR}/zlib
+LIBCXX_BUILD=${BUILD_DIR}/libcxx
+LLVM_BUILD=${BUILD_DIR}/llvm
+SYMBOLIZER_BUILD=${BUILD_DIR}/symbolizer
+
+FLAGS=${FLAGS:-}
+FLAGS="$FLAGS -fPIC -flto -Os -g0 -DNDEBUG"
+
+# Build zlib.
+mkdir -p ${ZLIB_BUILD}
+cd ${ZLIB_BUILD}
+cp -r ${ZLIB_SRC}/* .
+CC=$CC CFLAGS="$FLAGS" RANLIB=/bin/true ./configure --static
+make -j${J} libz.a
+
+# Build and install libcxxabi and libcxx.
+if [[ ! -d ${LIBCXX_BUILD} ]]; then
+  mkdir -p ${LIBCXX_BUILD}
+  cd ${LIBCXX_BUILD}
+  LIBCXX_FLAGS="${FLAGS} -Wno-macro-redefined -I${LLVM_SRC}/projects/libcxxabi/include"
+  cmake -GNinja \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_C_COMPILER=$CC \
+    -DCMAKE_CXX_COMPILER=$CXX \
+    -DCMAKE_C_FLAGS_RELEASE="${LIBCXX_FLAGS}" \
+    -DCMAKE_CXX_FLAGS_RELEASE="${LIBCXX_FLAGS}" \
+    -DLIBCXXABI_ENABLE_ASSERTIONS=OFF \
+    -DLIBCXXABI_ENABLE_EXCEPTIONS=OFF \
+    -DLIBCXXABI_ENABLE_SHARED=OFF \
+    -DLIBCXXABI_ENABLE_THREADS=OFF \
+    -DLIBCXX_ENABLE_ASSERTIONS=OFF \
+    -DLIBCXX_ENABLE_EXCEPTIONS=OFF \
+    -DLIBCXX_ENABLE_RTTI=OFF \
+    -DLIBCXX_ENABLE_SHARED=OFF \
+    -DLIBCXX_ENABLE_THREADS=OFF \
+  $LLVM_SRC
+fi
+cd ${LIBCXX_BUILD}
+ninja cxx cxxabi
+
+FLAGS="${FLAGS} -fno-rtti -fno-exceptions"
+
+# Build LLVM.
+if [[ ! -d ${LLVM_BUILD} ]]; then
+  mkdir -p ${LLVM_BUILD}
+  cd ${LLVM_BUILD}
+  LLVM_FLAGS="${FLAGS} -I${ZLIB_BUILD} -I${LIBCXX_BUILD}/include/c++/v1"
+  cmake -GNinja \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_C_COMPILER=$CC \
+    -DCMAKE_CXX_COMPILER=$CXX \
+    -DCMAKE_C_FLAGS_RELEASE="${LLVM_FLAGS}" \
+    -DCMAKE_CXX_FLAGS_RELEASE="${LLVM_FLAGS}" \
+    -DLLVM_TABLEGEN=$TBLGEN \
+    -DLLVM_ENABLE_ZLIB=ON \
+    -DLLVM_ENABLE_TERMINFO=OFF \
+    -DLLVM_ENABLE_THREADS=OFF \
+  $LLVM_SRC
+fi
+cd ${LLVM_BUILD}
+ninja LLVMSymbolize LLVMObject LLVMDebugInfoDWARF LLVMSupport LLVMDebugInfoPDB LLVMMC
+
+cd ${BUILD_DIR}
+rm -rf ${SYMBOLIZER_BUILD}
+mkdir ${SYMBOLIZER_BUILD}
+cd ${SYMBOLIZER_BUILD}
+
+for A in $LIBCXX_BUILD/lib/libc++.a \
+         $LIBCXX_BUILD/lib/libc++abi.a \
+         $LLVM_BUILD/lib/libLLVMSymbolize.a \
+         $LLVM_BUILD/lib/libLLVMObject.a \
+         $LLVM_BUILD/lib/libLLVMDebugInfoDWARF.a \
+         $LLVM_BUILD/lib/libLLVMSupport.a \
+         $LLVM_BUILD/lib/libLLVMDebugInfoPDB.a \
+         $LLVM_BUILD/lib/libLLVMMC.a \
+         $ZLIB_BUILD/libz.a ; do
+  for O in $($AR t $A); do
+    $AR x $A $O
+    mv -f $O "$(basename $A).$O" # Rename to avoid collisions between libs.
+  done
+done
+
+echo "Compiling..."
+SYMBOLIZER_FLAGS="$FLAGS -std=c++11 -I${LLVM_SRC}/include -I${LLVM_BUILD}/include -I${LIBCXX_BUILD}/include/c++/v1"
+$CXX $SYMBOLIZER_FLAGS ${SRC_DIR}/sanitizer_symbolize.cc ${SRC_DIR}/sanitizer_wrappers.cc -c
+
+SYMBOLIZER_API_LIST=__sanitizer_symbolize_code,__sanitizer_symbolize_data,__sanitizer_symbolize_flush,__sanitizer_symbolize_demangle
+
+# Merge all the object files together and copy the resulting library back.
+$LINK *.o -o all.bc
+echo "Optimizing..."
+$OPT -internalize -internalize-public-api-list=${SYMBOLIZER_API_LIST} all.bc -o opt.bc
+$CC $FLAGS -fno-lto -c opt.bc -o symbolizer.o
+
+echo "Checking undefined symbols..."
+nm -f posix -g symbolizer.o | cut -f 1,2 -d \  | LC_COLLATE=C sort -u > undefined.new
+(diff -u $SCRIPT_DIR/global_symbols.txt undefined.new | grep -E "^\+[^+]") && \
+  (echo "Failed: unexpected symbols"; exit 1)
+
+arch() {
+  objdump -f $1 | grep -m1 -Po "(?<=file format ).*$"
+}
+
+SYMBOLIZER_FORMAT=$(arch symbolizer.o)
+echo "Injecting $SYMBOLIZER_FORMAT symbolizer..."
+for A in $TARGE_DIR/libclang_rt.*san*.a; do
+  A_FORMAT=$(arch $A)
+  if [[ "$A_FORMAT" != "$SYMBOLIZER_FORMAT" ]] ; then
+    continue
+  fi
+  (nm -u $A 2>/dev/null | grep -E "__sanitizer_symbolize_code" >/dev/null) || continue
+  echo "$A"
+  $AR rcs $A symbolizer.o
+done
+
+echo "Success!"
diff --git a/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
new file mode 100644
index 0000000..033acf7
--- /dev/null
+++ b/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
@@ -0,0 +1,137 @@
+_GLOBAL_OFFSET_TABLE_ U
+_ZN11__sanitizer13internal_mmapEPvmiiiy U
+_ZN11__sanitizer13internal_openEPKcij U
+_ZN11__sanitizer13internal_statEPKcPv U
+_ZN11__sanitizer14internal_closeEi U
+_ZN11__sanitizer14internal_fstatEiPv U
+_ZN11__sanitizer14internal_lstatEPKcPv U
+_ZN11__sanitizer15internal_strlenEPKc U
+_ZN11__sanitizer16internal_iserrorEmPi U
+_ZN11__sanitizer17internal_snprintfEPcmPKcz U
+__ctype_b_loc U
+__ctype_get_mb_cur_max U
+__cxa_atexit U
+__divdi3 U
+__dso_handle U
+__errno_location U
+__interceptor_pread w
+__interceptor_read w
+__interceptor_realpath w
+__moddi3 U
+__sanitizer_symbolize_code T
+__sanitizer_symbolize_data T
+__sanitizer_symbolize_demangle T
+__sanitizer_symbolize_flush T
+__strdup U
+__udivdi3 U
+__umoddi3 U
+_exit U
+abort U
+access U
+calloc U
+catclose U
+catgets U
+catopen U
+ceil U
+clock_gettime U
+cfgetospeed U
+dl_iterate_phdr U
+dlsym U
+dup2 U
+environ U
+execv U
+exit U
+fclose U
+fflush U
+fileno U
+fopen U
+fork U
+fprintf U
+fputc U
+free U
+freelocale U
+fwrite U
+getc U
+getcwd U
+getenv U
+getpagesize U
+getpid U
+gettimeofday U
+ioctl U
+isatty U
+isprint U
+isupper U
+isxdigit U
+log10 U
+lseek U
+lseek64 U
+malloc U
+mbrlen U
+mbrtowc U
+mbsnrtowcs U
+mbsrtowcs U
+mbtowc U
+memchr U
+memcmp U
+memcpy U
+memmove U
+memset U
+mkdir U
+munmap U
+newlocale U
+perror U
+posix_spawn U
+posix_spawn_file_actions_adddup2 U
+posix_spawn_file_actions_addopen U
+posix_spawn_file_actions_destroy U
+posix_spawn_file_actions_init U
+qsort U
+rand U
+readlink U
+realloc U
+remove U
+setvbuf U
+sigfillset U
+sigprocmask U
+snprintf U
+sprintf U
+srand U
+sscanf U
+stderr U
+stdin U
+stdout U
+strcat U
+strchr U
+strcmp U
+strcpy U
+strdup U
+strerror U
+strerror_r U
+strftime_l U
+strncmp U
+strncpy U
+strrchr U
+strsep U
+strtod_l U
+strtof_l U
+strtol U
+strtold_l U
+strtoll_l U
+strtoull_l U
+tcgetattr U
+uname U
+ungetc U
+unlink U
+uselocale U
+vasprintf U
+vfprintf U
+vsnprintf U
+vsscanf U
+waitpid U
+wcrtomb U
+wcslen U
+wcsnrtombs U
+wmemcpy U
+wmemmove U
+wmemset U
+write U
diff --git a/lib/sanitizer_common/tests/CMakeLists.txt b/lib/sanitizer_common/tests/CMakeLists.txt
index b66f756..20698b9 100644
--- a/lib/sanitizer_common/tests/CMakeLists.txt
+++ b/lib/sanitizer_common/tests/CMakeLists.txt
@@ -80,7 +80,10 @@
 
 if(APPLE)
   list(APPEND SANITIZER_TEST_CFLAGS_COMMON ${DARWIN_osx_CFLAGS})
-  list(APPEND SANITIZER_TEST_LINK_FLAGS_COMMON ${DARWIN_osx_LINKFLAGS})
+  list(APPEND SANITIZER_TEST_LINK_FLAGS_COMMON ${DARWIN_osx_LINK_FLAGS})
+
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+  list(APPEND SANITIZER_TEST_LINK_FLAGS_COMMON ${WEAK_SYMBOL_LINK_FLAGS})
 endif()
 
 # MSVC linker is allocating 1M for the stack by default, which is not
diff --git a/lib/sanitizer_common/tests/malloc_stress_transfer_test.cc b/lib/sanitizer_common/tests/malloc_stress_transfer_test.cc
new file mode 100644
index 0000000..3e03c4b
--- /dev/null
+++ b/lib/sanitizer_common/tests/malloc_stress_transfer_test.cc
@@ -0,0 +1,37 @@
+#include <thread>
+#include <iostream>
+
+const size_t kAllocSize = 16;
+const size_t kInitialNumAllocs = 1 << 10;
+const size_t kPeriodicNumAllocs = 1 << 10;
+const size_t kNumIterations = 1 << 7;
+const size_t kNumThreads = 16;
+
+void Thread() {
+  // int sp;
+  // std::cerr << "Thread starting, sp = " << &sp << std::endl;
+  char *InitialAllocations[kInitialNumAllocs];
+  char *PeriodicaAllocations[kPeriodicNumAllocs];
+  for (auto &p : InitialAllocations) p = new char[kAllocSize];
+  for (size_t i = 0; i < kNumIterations; i++) {
+    for (size_t j = 0; j < kPeriodicNumAllocs; j++) {
+      for (auto &p : PeriodicaAllocations) {
+        p = new char[kAllocSize];
+        *p = 0;
+      }
+      for (auto p : PeriodicaAllocations) delete [] p;
+    }
+  }
+  for (auto p : InitialAllocations) delete [] p;
+}
+
+int main() {
+//  Thread();
+//  return 0;
+  std::thread *Threads[kNumThreads];
+  for (auto &T : Threads) T = new std::thread(&Thread);
+  for (auto T : Threads) {
+    T->join();
+    delete T;
+  }
+}
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
index 46ce88a..8df5efd 100644
--- a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
@@ -36,21 +36,62 @@
 // space that is always available. Thus, a dynamically allocated address space
 // is used instead (i.e. ~(uptr)0).
 static const uptr kAllocatorSpace = ~(uptr)0;
-static const uptr kAllocatorSize  =  0x10000000000ULL;  // 1T.
-static const u64 kAddressSpaceSize = 1ULL << 40;
+static const uptr kAllocatorSize  =  0x8000000000ULL;  // 500G
+static const u64 kAddressSpaceSize = 1ULL << 47;
+typedef DefaultSizeClassMap SizeClassMap;
+#elif SANITIZER_ANDROID && defined(__aarch64__)
+static const uptr kAllocatorSpace = 0x3000000000ULL;
+static const uptr kAllocatorSize  = 0x2000000000ULL;
+static const u64 kAddressSpaceSize = 1ULL << 39;
+typedef VeryCompactSizeClassMap SizeClassMap;
 #else
 static const uptr kAllocatorSpace = 0x700000000000ULL;
 static const uptr kAllocatorSize  = 0x010000000000ULL;  // 1T.
 static const u64 kAddressSpaceSize = 1ULL << 47;
+typedef DefaultSizeClassMap SizeClassMap;
 #endif
 
-typedef SizeClassAllocator64<
-  kAllocatorSpace, kAllocatorSize, 16, DefaultSizeClassMap> Allocator64;
-typedef SizeClassAllocator64<
-  ~(uptr)0, kAllocatorSize, 16, DefaultSizeClassMap> Allocator64Dynamic;
+struct AP64 {  // Allocator Params. Short name for shorter demangled names..
+  static const uptr kSpaceBeg = kAllocatorSpace;
+  static const uptr kSpaceSize = kAllocatorSize;
+  static const uptr kMetadataSize = 16;
+  typedef ::SizeClassMap SizeClassMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
 
-typedef SizeClassAllocator64<
-  kAllocatorSpace, kAllocatorSize, 16, CompactSizeClassMap> Allocator64Compact;
+struct AP64Dyn {
+  static const uptr kSpaceBeg = ~(uptr)0;
+  static const uptr kSpaceSize = kAllocatorSize;
+  static const uptr kMetadataSize = 16;
+  typedef ::SizeClassMap SizeClassMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+
+struct AP64Compact {
+  static const uptr kSpaceBeg = ~(uptr)0;
+  static const uptr kSpaceSize = kAllocatorSize;
+  static const uptr kMetadataSize = 16;
+  typedef CompactSizeClassMap SizeClassMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+
+struct AP64VeryCompact {
+  static const uptr kSpaceBeg = ~(uptr)0;
+  static const uptr kSpaceSize = 1ULL << 37;
+  static const uptr kMetadataSize = 16;
+  typedef VeryCompactSizeClassMap SizeClassMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+
+
+typedef SizeClassAllocator64<AP64> Allocator64;
+typedef SizeClassAllocator64<AP64Dyn> Allocator64Dynamic;
+typedef SizeClassAllocator64<AP64Compact> Allocator64Compact;
+typedef SizeClassAllocator64<AP64VeryCompact> Allocator64VeryCompact;
 #elif defined(__mips64)
 static const u64 kAddressSpaceSize = 1ULL << 40;
 #elif defined(__aarch64__)
@@ -77,7 +118,7 @@
 template <class SizeClassMap>
 void TestSizeClassMap() {
   typedef SizeClassMap SCMap;
-  // SCMap::Print();
+  SCMap::Print();
   SCMap::Validate();
 }
 
@@ -89,6 +130,10 @@
   TestSizeClassMap<CompactSizeClassMap>();
 }
 
+TEST(SanitizerCommon, VeryCompactSizeClassMap) {
+  TestSizeClassMap<VeryCompactSizeClassMap>();
+}
+
 TEST(SanitizerCommon, InternalSizeClassMap) {
   TestSizeClassMap<InternalSizeClassMap>();
 }
@@ -96,13 +141,15 @@
 template <class Allocator>
 void TestSizeClassAllocator() {
   Allocator *a = new Allocator;
-  a->Init();
+  a->Init(kReleaseToOSIntervalNever);
   SizeClassAllocatorLocalCache<Allocator> cache;
   memset(&cache, 0, sizeof(cache));
   cache.Init(0);
 
-  static const uptr sizes[] = {1, 16, 30, 40, 100, 1000, 10000,
-    50000, 60000, 100000, 120000, 300000, 500000, 1000000, 2000000};
+  static const uptr sizes[] = {
+    1, 16,  30, 40, 100, 1000, 10000,
+    50000, 60000, 100000, 120000, 300000, 500000, 1000000, 2000000
+  };
 
   std::vector<void *> allocated;
 
@@ -172,10 +219,16 @@
   TestSizeClassAllocator<Allocator64Dynamic>();
 }
 
+#if !SANITIZER_ANDROID
 TEST(SanitizerCommon, SizeClassAllocator64Compact) {
   TestSizeClassAllocator<Allocator64Compact>();
 }
 #endif
+
+TEST(SanitizerCommon, SizeClassAllocator64VeryCompact) {
+  TestSizeClassAllocator<Allocator64VeryCompact>();
+}
+#endif
 #endif
 
 TEST(SanitizerCommon, SizeClassAllocator32Compact) {
@@ -185,7 +238,7 @@
 template <class Allocator>
 void SizeClassAllocatorMetadataStress() {
   Allocator *a = new Allocator;
-  a->Init();
+  a->Init(kReleaseToOSIntervalNever);
   SizeClassAllocatorLocalCache<Allocator> cache;
   memset(&cache, 0, sizeof(cache));
   cache.Init(0);
@@ -194,7 +247,7 @@
   void *allocated[kNumAllocs];
   void *meta[kNumAllocs];
   for (uptr i = 0; i < kNumAllocs; i++) {
-    void *x = cache.Allocate(a, 1 + i % 50);
+    void *x = cache.Allocate(a, 1 + i % (Allocator::kNumClasses - 1));
     allocated[i] = x;
     meta[i] = a->GetMetaData(x);
   }
@@ -205,7 +258,7 @@
     EXPECT_EQ(m, meta[idx]);
   }
   for (uptr i = 0; i < kNumAllocs; i++) {
-    cache.Deallocate(a, 1 + i % 50, allocated[i]);
+    cache.Deallocate(a, 1 + i % (Allocator::kNumClasses - 1), allocated[i]);
   }
 
   a->TestOnlyUnmap();
@@ -224,28 +277,30 @@
   SizeClassAllocatorMetadataStress<Allocator64Dynamic>();
 }
 
+#if !SANITIZER_ANDROID
 TEST(SanitizerCommon, SizeClassAllocator64CompactMetadataStress) {
   SizeClassAllocatorMetadataStress<Allocator64Compact>();
 }
 #endif
+
+#endif
 #endif  // SANITIZER_CAN_USE_ALLOCATOR64
 TEST(SanitizerCommon, SizeClassAllocator32CompactMetadataStress) {
   SizeClassAllocatorMetadataStress<Allocator32Compact>();
 }
 
 template <class Allocator>
-void SizeClassAllocatorGetBlockBeginStress() {
+void SizeClassAllocatorGetBlockBeginStress(u64 TotalSize) {
   Allocator *a = new Allocator;
-  a->Init();
+  a->Init(kReleaseToOSIntervalNever);
   SizeClassAllocatorLocalCache<Allocator> cache;
   memset(&cache, 0, sizeof(cache));
   cache.Init(0);
 
   uptr max_size_class = Allocator::SizeClassMapT::kLargestClassID;
   uptr size = Allocator::SizeClassMapT::Size(max_size_class);
-  u64 G8 = 1ULL << 33;
   // Make sure we correctly compute GetBlockBegin() w/o overflow.
-  for (size_t i = 0; i <= G8 / size; i++) {
+  for (size_t i = 0; i <= TotalSize / size; i++) {
     void *x = cache.Allocate(a, max_size_class);
     void *beg = a->GetBlockBegin(x);
     // if ((i & (i - 1)) == 0)
@@ -262,16 +317,24 @@
 // to run them all at the same time. FIXME: Make them not flaky and reenable.
 #if !SANITIZER_WINDOWS
 TEST(SanitizerCommon, SizeClassAllocator64GetBlockBegin) {
-  SizeClassAllocatorGetBlockBeginStress<Allocator64>();
+  SizeClassAllocatorGetBlockBeginStress<Allocator64>(
+      1ULL << (SANITIZER_ANDROID ? 31 : 33));
 }
 TEST(SanitizerCommon, SizeClassAllocator64DynamicGetBlockBegin) {
-  SizeClassAllocatorGetBlockBeginStress<Allocator64Dynamic>();
+  SizeClassAllocatorGetBlockBeginStress<Allocator64Dynamic>(
+      1ULL << (SANITIZER_ANDROID ? 31 : 33));
 }
+#if !SANITIZER_ANDROID
 TEST(SanitizerCommon, SizeClassAllocator64CompactGetBlockBegin) {
-  SizeClassAllocatorGetBlockBeginStress<Allocator64Compact>();
+  SizeClassAllocatorGetBlockBeginStress<Allocator64Compact>(1ULL << 33);
+}
+#endif
+TEST(SanitizerCommon, SizeClassAllocator64VeryCompactGetBlockBegin) {
+  // Does not have > 4Gb for each class.
+  SizeClassAllocatorGetBlockBeginStress<Allocator64VeryCompact>(1ULL << 31);
 }
 TEST(SanitizerCommon, SizeClassAllocator32CompactGetBlockBegin) {
-  SizeClassAllocatorGetBlockBeginStress<Allocator32Compact>();
+  SizeClassAllocatorGetBlockBeginStress<Allocator32Compact>(1ULL << 33);
 }
 #endif
 #endif  // SANITIZER_CAN_USE_ALLOCATOR64
@@ -288,22 +351,33 @@
 // These tests can fail on Windows if memory is somewhat full and lit happens
 // to run them all at the same time. FIXME: Make them not flaky and reenable.
 #if !SANITIZER_WINDOWS
+
+struct AP64WithCallback {
+  static const uptr kSpaceBeg = kAllocatorSpace;
+  static const uptr kSpaceSize = kAllocatorSize;
+  static const uptr kMetadataSize = 16;
+  typedef ::SizeClassMap SizeClassMap;
+  typedef TestMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+
 TEST(SanitizerCommon, SizeClassAllocator64MapUnmapCallback) {
   TestMapUnmapCallback::map_count = 0;
   TestMapUnmapCallback::unmap_count = 0;
-  typedef SizeClassAllocator64<
-      kAllocatorSpace, kAllocatorSize, 16, DefaultSizeClassMap,
-      TestMapUnmapCallback> Allocator64WithCallBack;
+  typedef SizeClassAllocator64<AP64WithCallback> Allocator64WithCallBack;
   Allocator64WithCallBack *a = new Allocator64WithCallBack;
-  a->Init();
+  a->Init(kReleaseToOSIntervalNever);
   EXPECT_EQ(TestMapUnmapCallback::map_count, 1);  // Allocator state.
   SizeClassAllocatorLocalCache<Allocator64WithCallBack> cache;
   memset(&cache, 0, sizeof(cache));
   cache.Init(0);
   AllocatorStats stats;
   stats.Init();
-  a->AllocateBatch(&stats, &cache, 32);
-  EXPECT_EQ(TestMapUnmapCallback::map_count, 3);  // State + alloc + metadata.
+  const size_t kNumChunks = 128;
+  uint32_t chunks[kNumChunks];
+  a->GetFromAllocator(&stats, 30, chunks, kNumChunks);
+  // State + alloc + metadata + freearray.
+  EXPECT_EQ(TestMapUnmapCallback::map_count, 4);
   a->TestOnlyUnmap();
   EXPECT_EQ(TestMapUnmapCallback::unmap_count, 1);  // The whole thing.
   delete a;
@@ -323,7 +397,7 @@
       TestMapUnmapCallback>
     Allocator32WithCallBack;
   Allocator32WithCallBack *a = new Allocator32WithCallBack;
-  a->Init();
+  a->Init(kReleaseToOSIntervalNever);
   EXPECT_EQ(TestMapUnmapCallback::map_count, 0);
   SizeClassAllocatorLocalCache<Allocator32WithCallBack>  cache;
   memset(&cache, 0, sizeof(cache));
@@ -356,14 +430,16 @@
 template<class Allocator>
 void FailInAssertionOnOOM() {
   Allocator a;
-  a.Init();
+  a.Init(kReleaseToOSIntervalNever);
   SizeClassAllocatorLocalCache<Allocator> cache;
   memset(&cache, 0, sizeof(cache));
   cache.Init(0);
   AllocatorStats stats;
   stats.Init();
+  const size_t kNumChunks = 128;
+  uint32_t chunks[kNumChunks];
   for (int i = 0; i < 1000000; i++) {
-    a.AllocateBatch(&stats, &cache, 52);
+    a.GetFromAllocator(&stats, 52, chunks, kNumChunks);
   }
 
   a.TestOnlyUnmap();
@@ -371,7 +447,7 @@
 
 // Don't test OOM conditions on Win64 because it causes other tests on the same
 // machine to OOM.
-#if SANITIZER_CAN_USE_ALLOCATOR64 && !SANITIZER_WINDOWS64
+#if SANITIZER_CAN_USE_ALLOCATOR64 && !SANITIZER_WINDOWS64 && !SANITIZER_ANDROID
 TEST(SanitizerCommon, SizeClassAllocator64Overflow) {
   EXPECT_DEATH(FailInAssertionOnOOM<Allocator64>(), "Out of memory");
 }
@@ -462,7 +538,7 @@
       CombinedAllocator<PrimaryAllocator, AllocatorCache, SecondaryAllocator>
       Allocator;
   Allocator *a = new Allocator;
-  a->Init(/* may_return_null */ true);
+  a->Init(/* may_return_null */ true, kReleaseToOSIntervalNever);
 
   AllocatorCache cache;
   memset(&cache, 0, sizeof(cache));
@@ -524,6 +600,7 @@
       SizeClassAllocatorLocalCache<Allocator64Dynamic> > ();
 }
 
+#if !SANITIZER_ANDROID
 TEST(SanitizerCommon, CombinedAllocator64Compact) {
   TestCombinedAllocator<Allocator64Compact,
       LargeMmapAllocator<>,
@@ -531,6 +608,13 @@
 }
 #endif
 
+TEST(SanitizerCommon, CombinedAllocator64VeryCompact) {
+  TestCombinedAllocator<Allocator64VeryCompact,
+      LargeMmapAllocator<>,
+      SizeClassAllocatorLocalCache<Allocator64VeryCompact> > ();
+}
+#endif
+
 TEST(SanitizerCommon, CombinedAllocator32Compact) {
   TestCombinedAllocator<Allocator32Compact,
       LargeMmapAllocator<>,
@@ -543,7 +627,7 @@
   typedef typename AllocatorCache::Allocator Allocator;
   Allocator *a = new Allocator();
 
-  a->Init();
+  a->Init(kReleaseToOSIntervalNever);
   memset(&cache, 0, sizeof(cache));
   cache.Init(0);
 
@@ -585,11 +669,17 @@
       SizeClassAllocatorLocalCache<Allocator64Dynamic> >();
 }
 
+#if !SANITIZER_ANDROID
 TEST(SanitizerCommon, SizeClassAllocator64CompactLocalCache) {
   TestSizeClassAllocatorLocalCache<
       SizeClassAllocatorLocalCache<Allocator64Compact> >();
 }
 #endif
+TEST(SanitizerCommon, SizeClassAllocator64VeryCompactLocalCache) {
+  TestSizeClassAllocatorLocalCache<
+      SizeClassAllocatorLocalCache<Allocator64VeryCompact> >();
+}
+#endif
 #endif
 
 TEST(SanitizerCommon, SizeClassAllocator32CompactLocalCache) {
@@ -612,7 +702,7 @@
 TEST(SanitizerCommon, AllocatorLeakTest) {
   typedef AllocatorCache::Allocator Allocator;
   Allocator a;
-  a.Init();
+  a.Init(kReleaseToOSIntervalNever);
   uptr total_used_memory = 0;
   for (int i = 0; i < 100; i++) {
     pthread_t t;
@@ -645,7 +735,7 @@
 // able to call Deallocate on a zeroed cache, and it will self-initialize.
 TEST(Allocator, AllocatorCacheDeallocNewThread) {
   AllocatorCache::Allocator allocator;
-  allocator.Init();
+  allocator.Init(kReleaseToOSIntervalNever);
   AllocatorCache main_cache;
   AllocatorCache child_cache;
   memset(&main_cache, 0, sizeof(main_cache));
@@ -716,7 +806,7 @@
 template <class Allocator>
 void TestSizeClassAllocatorIteration() {
   Allocator *a = new Allocator;
-  a->Init();
+  a->Init(kReleaseToOSIntervalNever);
   SizeClassAllocatorLocalCache<Allocator> cache;
   memset(&cache, 0, sizeof(cache));
   cache.Init(0);
@@ -839,33 +929,58 @@
 
 // Don't test OOM conditions on Win64 because it causes other tests on the same
 // machine to OOM.
-#if SANITIZER_CAN_USE_ALLOCATOR64 && !SANITIZER_WINDOWS64
+#if SANITIZER_CAN_USE_ALLOCATOR64 && !SANITIZER_WINDOWS64 && !SANITIZER_ANDROID
+typedef SizeClassMap<3, 4, 8, 63, 128, 16> SpecialSizeClassMap;
+struct AP64_SpecialSizeClassMap {
+  static const uptr kSpaceBeg = kAllocatorSpace;
+  static const uptr kSpaceSize = kAllocatorSize;
+  static const uptr kMetadataSize = 0;
+  typedef SpecialSizeClassMap SizeClassMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+
 // Regression test for out-of-memory condition in PopulateFreeList().
 TEST(SanitizerCommon, SizeClassAllocator64PopulateFreeListOOM) {
   // In a world where regions are small and chunks are huge...
-  typedef SizeClassMap<63, 128, 16> SpecialSizeClassMap;
-  typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, 0,
-                               SpecialSizeClassMap> SpecialAllocator64;
+  typedef SizeClassAllocator64<AP64_SpecialSizeClassMap> SpecialAllocator64;
   const uptr kRegionSize =
       kAllocatorSize / SpecialSizeClassMap::kNumClassesRounded;
   SpecialAllocator64 *a = new SpecialAllocator64;
-  a->Init();
+  a->Init(kReleaseToOSIntervalNever);
   SizeClassAllocatorLocalCache<SpecialAllocator64> cache;
   memset(&cache, 0, sizeof(cache));
   cache.Init(0);
 
   // ...one man is on a mission to overflow a region with a series of
   // successive allocations.
+
   const uptr kClassID = 107;
-  const uptr kAllocationSize = DefaultSizeClassMap::Size(kClassID);
+  const uptr kAllocationSize = SpecialSizeClassMap::Size(kClassID);
   ASSERT_LT(2 * kAllocationSize, kRegionSize);
   ASSERT_GT(3 * kAllocationSize, kRegionSize);
   cache.Allocate(a, kClassID);
   EXPECT_DEATH(cache.Allocate(a, kClassID) && cache.Allocate(a, kClassID),
                "The process has exhausted");
+
+  const uptr Class2 = 100;
+  const uptr Size2 = SpecialSizeClassMap::Size(Class2);
+  ASSERT_EQ(Size2 * 8, kRegionSize);
+  char *p[7];
+  for (int i = 0; i < 7; i++) {
+    p[i] = (char*)cache.Allocate(a, Class2);
+    fprintf(stderr, "p[%d] %p s = %lx\n", i, (void*)p[i], Size2);
+    p[i][Size2 - 1] = 42;
+    if (i) ASSERT_LT(p[i - 1], p[i]);
+  }
+  EXPECT_DEATH(cache.Allocate(a, Class2), "The process has exhausted");
+  cache.Deallocate(a, Class2, p[0]);
+  cache.Drain(a);
+  ASSERT_EQ(p[6][Size2 - 1], 42);
   a->TestOnlyUnmap();
   delete a;
 }
+
 #endif
 
 TEST(SanitizerCommon, TwoLevelByteMap) {
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc b/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc
index 54e8773..c6dd3c4 100644
--- a/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc
+++ b/lib/sanitizer_common/tests/sanitizer_allocator_testlib.cc
@@ -34,13 +34,22 @@
 # define SANITIZER_FREE_HOOK(p)
 #endif
 
-namespace {
 static const uptr kAllocatorSpace = 0x600000000000ULL;
 static const uptr kAllocatorSize  =  0x10000000000ULL;  // 1T.
 
-// typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, 0,
-typedef SizeClassAllocator64<~(uptr)0, kAllocatorSize, 0,
-  CompactSizeClassMap> PrimaryAllocator;
+struct __AP64 {
+  static const uptr kSpaceBeg = ~(uptr)0;
+  static const uptr kSpaceSize = kAllocatorSize;
+  static const uptr kMetadataSize = 0;
+  typedef CompactSizeClassMap SizeClassMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags =
+      SizeClassAllocator64FlagMasks::kRandomShuffleChunks;
+};
+
+namespace {
+
+typedef SizeClassAllocator64<__AP64> PrimaryAllocator;
 typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
 typedef LargeMmapAllocator<> SecondaryAllocator;
 typedef CombinedAllocator<PrimaryAllocator, AllocatorCache,
diff --git a/lib/sanitizer_common/tests/sanitizer_common_test.cc b/lib/sanitizer_common/tests/sanitizer_common_test.cc
index 6fc308a..ebc885d 100644
--- a/lib/sanitizer_common/tests/sanitizer_common_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_common_test.cc
@@ -10,6 +10,8 @@
 // This file is a part of ThreadSanitizer/AddressSanitizer runtime.
 //
 //===----------------------------------------------------------------------===//
+#include <algorithm>
+
 #include "sanitizer_common/sanitizer_allocator_internal.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_flags.h"
@@ -170,15 +172,54 @@
   return a < b;
 }
 
-TEST(SanitizerCommon, InternalBinarySearch) {
+TEST(SanitizerCommon, InternalLowerBound) {
   static const uptr kSize = 5;
-  uptr arr[kSize];
-  for (uptr i = 0; i < kSize; i++) arr[i] = i * i;
+  int arr[kSize];
+  arr[0] = 1;
+  arr[1] = 3;
+  arr[2] = 5;
+  arr[3] = 7;
+  arr[4] = 11;
 
-  for (uptr i = 0; i < kSize; i++)
-    ASSERT_EQ(InternalBinarySearch(arr, 0, kSize, i * i, UptrLess), i);
+  EXPECT_EQ(0u, InternalLowerBound(arr, 0, kSize, 0, UptrLess));
+  EXPECT_EQ(0u, InternalLowerBound(arr, 0, kSize, 1, UptrLess));
+  EXPECT_EQ(1u, InternalLowerBound(arr, 0, kSize, 2, UptrLess));
+  EXPECT_EQ(1u, InternalLowerBound(arr, 0, kSize, 3, UptrLess));
+  EXPECT_EQ(2u, InternalLowerBound(arr, 0, kSize, 4, UptrLess));
+  EXPECT_EQ(2u, InternalLowerBound(arr, 0, kSize, 5, UptrLess));
+  EXPECT_EQ(3u, InternalLowerBound(arr, 0, kSize, 6, UptrLess));
+  EXPECT_EQ(3u, InternalLowerBound(arr, 0, kSize, 7, UptrLess));
+  EXPECT_EQ(4u, InternalLowerBound(arr, 0, kSize, 8, UptrLess));
+  EXPECT_EQ(4u, InternalLowerBound(arr, 0, kSize, 9, UptrLess));
+  EXPECT_EQ(4u, InternalLowerBound(arr, 0, kSize, 10, UptrLess));
+  EXPECT_EQ(4u, InternalLowerBound(arr, 0, kSize, 11, UptrLess));
+  EXPECT_EQ(5u, InternalLowerBound(arr, 0, kSize, 12, UptrLess));
+}
 
-  ASSERT_EQ(InternalBinarySearch(arr, 0, kSize, 7, UptrLess), kSize + 1);
+TEST(SanitizerCommon, InternalLowerBoundVsStdLowerBound) {
+  std::vector<int> data;
+  auto create_item = [] (size_t i, size_t j) {
+    auto v = i * 10000 + j;
+    return ((v << 6) + (v >> 6) + 0x9e3779b9) % 100;
+  };
+  for (size_t i = 0; i < 1000; ++i) {
+    data.resize(i);
+    for (size_t j = 0; j < i; ++j) {
+      data[j] = create_item(i, j);
+    }
+
+    std::sort(data.begin(), data.end());
+
+    for (size_t j = 0; j < i; ++j) {
+      int val = create_item(i, j);
+      for (auto to_find : {val - 1, val, val + 1}) {
+        uptr expected =
+            std::lower_bound(data.begin(), data.end(), to_find) - data.begin();
+        EXPECT_EQ(expected, InternalLowerBound(data.data(), 0, data.size(),
+                                               to_find, std::less<int>()));
+      }
+    }
+  }
 }
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
diff --git a/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc b/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc
index 13918af..2f0494f 100644
--- a/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cc
@@ -256,4 +256,8 @@
 
   // Checks for wide-character strings are not implemented yet.
   testPrintf("%ls", 1, 0);
+
+  testPrintf("%m", 0);
+  testPrintf("%m%s", 1, test_buf_size);
+  testPrintf("%s%m%s", 2, test_buf_size, test_buf_size);
 }
diff --git a/lib/sanitizer_common/weak_symbols.txt b/lib/sanitizer_common/weak_symbols.txt
new file mode 100644
index 0000000..8a1e32b
--- /dev/null
+++ b/lib/sanitizer_common/weak_symbols.txt
@@ -0,0 +1,6 @@
+___sanitizer_free_hook
+___sanitizer_malloc_hook
+___sanitizer_symbolize_code
+___sanitizer_symbolize_data
+___sanitizer_symbolize_demangle
+___sanitizer_symbolize_flush
diff --git a/lib/scudo/CMakeLists.txt b/lib/scudo/CMakeLists.txt
index 6f8f7d7..4f1acec 100644
--- a/lib/scudo/CMakeLists.txt
+++ b/lib/scudo/CMakeLists.txt
@@ -3,17 +3,23 @@
 include_directories(..)
 
 set(SCUDO_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+# SANITIZER_COMMON_CFLAGS include -fno-builtin, but we actually want builtins!
+list(APPEND SCUDO_CFLAGS -fbuiltin)
 append_rtti_flag(OFF SCUDO_CFLAGS)
-list(APPEND SCUDO_CFLAGS -msse4.2 -mcx16)
 
 set(SCUDO_SOURCES
   scudo_allocator.cpp
   scudo_flags.cpp
+  scudo_crc32.cpp
   scudo_interceptors.cpp
   scudo_new_delete.cpp
   scudo_termination.cpp
   scudo_utils.cpp)
 
+if (COMPILER_RT_HAS_MSSE4_2_FLAG)
+  set_source_files_properties(scudo_crc32.cpp PROPERTIES COMPILE_FLAGS -msse4.2)
+endif()
+
 if(COMPILER_RT_HAS_SCUDO)
   foreach(arch ${SCUDO_SUPPORTED_ARCH})
     add_compiler_rt_runtime(clang_rt.scudo
diff --git a/lib/scudo/scudo_allocator.cpp b/lib/scudo/scudo_allocator.cpp
index ceb7bbd..d1121b0 100644
--- a/lib/scudo/scudo_allocator.cpp
+++ b/lib/scudo/scudo_allocator.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "scudo_allocator.h"
+#include "scudo_crc32.h"
 #include "scudo_utils.h"
 
 #include "sanitizer_common/sanitizer_allocator_interface.h"
@@ -22,23 +23,43 @@
 
 #include <limits.h>
 #include <pthread.h>
-#include <smmintrin.h>
 
-#include <atomic>
 #include <cstring>
 
 namespace __scudo {
 
+#if SANITIZER_CAN_USE_ALLOCATOR64
 const uptr AllocatorSpace = ~0ULL;
-const uptr AllocatorSize  =  0x10000000000ULL;
-const uptr MinAlignmentLog = 4; // 16 bytes for x64
-const uptr MaxAlignmentLog = 24;
-
+const uptr AllocatorSize = 0x40000000000ULL;
 typedef DefaultSizeClassMap SizeClassMap;
-typedef SizeClassAllocator64<AllocatorSpace, AllocatorSize, 0, SizeClassMap>
-  PrimaryAllocator;
+struct AP {
+  static const uptr kSpaceBeg = AllocatorSpace;
+  static const uptr kSpaceSize = AllocatorSize;
+  static const uptr kMetadataSize = 0;
+  typedef __scudo::SizeClassMap SizeClassMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags =
+      SizeClassAllocator64FlagMasks::kRandomShuffleChunks;
+};
+typedef SizeClassAllocator64<AP> PrimaryAllocator;
+#else
+// Currently, the 32-bit Sanitizer allocator has not yet benefited from all the
+// security improvements brought to the 64-bit one. This makes the 32-bit
+// version of Scudo slightly less toughened.
+static const uptr RegionSizeLog = 20;
+static const uptr NumRegions = SANITIZER_MMAP_RANGE_SIZE >> RegionSizeLog;
+# if SANITIZER_WORDSIZE == 32
+typedef FlatByteMap<NumRegions> ByteMap;
+# elif SANITIZER_WORDSIZE == 64
+typedef TwoLevelByteMap<(NumRegions >> 12), 1 << 12> ByteMap;
+# endif  // SANITIZER_WORDSIZE
+typedef DefaultSizeClassMap SizeClassMap;
+typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, 0, SizeClassMap,
+    RegionSizeLog, ByteMap> PrimaryAllocator;
+#endif  // SANITIZER_CAN_USE_ALLOCATOR64
+
 typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
-typedef LargeMmapAllocator<> SecondaryAllocator;
+typedef ScudoLargeMmapAllocator SecondaryAllocator;
 typedef CombinedAllocator<PrimaryAllocator, AllocatorCache, SecondaryAllocator>
   ScudoAllocator;
 
@@ -46,67 +67,63 @@
 
 static thread_local Xorshift128Plus Prng;
 // Global static cookie, initialized at start-up.
-static u64 Cookie;
+static uptr Cookie;
 
-enum ChunkState : u8 {
-  ChunkAvailable  = 0,
-  ChunkAllocated  = 1,
-  ChunkQuarantine = 2
-};
+// We default to software CRC32 if the alternatives are not supported, either
+// at compilation or at runtime.
+static atomic_uint8_t HashAlgorithm = { CRC32Software };
 
-typedef unsigned __int128 PackedHeader;
-typedef std::atomic<PackedHeader> AtomicPackedHeader;
-
-// Our header requires 128-bit of storage on x64 (the only platform supported
-// as of now), which fits nicely with the alignment requirements.
-// Having the offset saves us from using functions such as GetBlockBegin, that
-// is fairly costly. Our first implementation used the MetaData as well, which
-// offers the advantage of being stored away from the chunk itself, but
-// accessing it was costly as well.
-// The header will be atomically loaded and stored using the 16-byte primitives
-// offered by the platform (likely requires cmpxchg16b support).
-struct UnpackedHeader {
-  // 1st 8 bytes
-  u16 Checksum      : 16;
-  u64 RequestedSize : 40; // Needed for reallocation purposes.
-  u8  State         : 2;  // available, allocated, or quarantined
-  u8  AllocType     : 2;  // malloc, new, new[], or memalign
-  u8  Unused_0_     : 4;
-  // 2nd 8 bytes
-  u64 Offset        : 20; // Offset from the beginning of the backend
-                          // allocation to the beginning chunk itself, in
-                          // multiples of MinAlignment. See comment about its
-                          // maximum value and test in init().
-  u64 Unused_1_     : 28;
-  u16 Salt          : 16;
-};
-
-COMPILER_CHECK(sizeof(UnpackedHeader) == sizeof(PackedHeader));
-
-const uptr ChunkHeaderSize = sizeof(PackedHeader);
+// Helper function that will compute the chunk checksum, being passed all the
+// the needed information as uptrs. It will opt for the hardware version of
+// the checksumming function if available.
+INLINE u32 hashUptrs(uptr Pointer, uptr *Array, uptr ArraySize, u8 HashType) {
+  u32 Crc;
+  Crc = computeCRC32(Cookie, Pointer, HashType);
+  for (uptr i = 0; i < ArraySize; i++)
+    Crc = computeCRC32(Crc, Array[i], HashType);
+  return Crc;
+}
 
 struct ScudoChunk : UnpackedHeader {
   // We can't use the offset member of the chunk itself, as we would double
   // fetch it without any warranty that it wouldn't have been tampered. To
   // prevent this, we work with a local copy of the header.
-  void *AllocBeg(UnpackedHeader *Header) {
+  void *getAllocBeg(UnpackedHeader *Header) {
     return reinterpret_cast<void *>(
         reinterpret_cast<uptr>(this) - (Header->Offset << MinAlignmentLog));
   }
 
-  // CRC32 checksum of the Chunk pointer and its ChunkHeader.
-  // It currently uses the Intel Nehalem SSE4.2 crc32 64-bit instruction.
-  u16 Checksum(UnpackedHeader *Header) const {
-    u64 HeaderHolder[2];
-    memcpy(HeaderHolder, Header, sizeof(HeaderHolder));
-    u64 Crc = _mm_crc32_u64(Cookie, reinterpret_cast<uptr>(this));
-    // This is somewhat of a shortcut. The checksum is stored in the 16 least
-    // significant bits of the first 8 bytes of the header, hence zero-ing
-    // those bits out. It would be more valid to zero the checksum field of the
-    // UnpackedHeader, but would require holding an additional copy of it.
-    Crc = _mm_crc32_u64(Crc, HeaderHolder[0] & 0xffffffffffff0000ULL);
-    Crc = _mm_crc32_u64(Crc, HeaderHolder[1]);
-    return static_cast<u16>(Crc);
+  // Returns the usable size for a chunk, meaning the amount of bytes from the
+  // beginning of the user data to the end of the backend allocated chunk.
+  uptr getUsableSize(UnpackedHeader *Header) {
+    uptr Size = getAllocator().GetActuallyAllocatedSize(getAllocBeg(Header));
+    if (Size == 0)
+      return Size;
+    return Size - AlignedChunkHeaderSize - (Header->Offset << MinAlignmentLog);
+  }
+
+  // Compute the checksum of the Chunk pointer and its ChunkHeader.
+  u16 computeChecksum(UnpackedHeader *Header) const {
+    UnpackedHeader ZeroChecksumHeader = *Header;
+    ZeroChecksumHeader.Checksum = 0;
+    uptr HeaderHolder[sizeof(UnpackedHeader) / sizeof(uptr)];
+    memcpy(&HeaderHolder, &ZeroChecksumHeader, sizeof(HeaderHolder));
+    u32 Hash = hashUptrs(reinterpret_cast<uptr>(this),
+                         HeaderHolder,
+                         ARRAY_SIZE(HeaderHolder),
+                         atomic_load_relaxed(&HashAlgorithm));
+    return static_cast<u16>(Hash);
+  }
+
+  // Checks the validity of a chunk by verifying its checksum.
+  bool isValid() {
+    UnpackedHeader NewUnpackedHeader;
+    const AtomicPackedHeader *AtomicHeader =
+        reinterpret_cast<const AtomicPackedHeader *>(this);
+    PackedHeader NewPackedHeader =
+        AtomicHeader->load(std::memory_order_relaxed);
+    NewUnpackedHeader = bit_cast<UnpackedHeader>(NewPackedHeader);
+    return (NewUnpackedHeader.Checksum == computeChecksum(&NewUnpackedHeader));
   }
 
   // Loads and unpacks the header, verifying the checksum in the process.
@@ -116,16 +133,14 @@
     PackedHeader NewPackedHeader =
         AtomicHeader->load(std::memory_order_relaxed);
     *NewUnpackedHeader = bit_cast<UnpackedHeader>(NewPackedHeader);
-    if ((NewUnpackedHeader->Unused_0_ != 0) ||
-        (NewUnpackedHeader->Unused_1_ != 0) ||
-        (NewUnpackedHeader->Checksum != Checksum(NewUnpackedHeader))) {
+    if (NewUnpackedHeader->Checksum != computeChecksum(NewUnpackedHeader)) {
       dieWithMessage("ERROR: corrupted chunk header at address %p\n", this);
     }
   }
 
   // Packs and stores the header, computing the checksum in the process.
   void storeHeader(UnpackedHeader *NewUnpackedHeader) {
-    NewUnpackedHeader->Checksum = Checksum(NewUnpackedHeader);
+    NewUnpackedHeader->Checksum = computeChecksum(NewUnpackedHeader);
     PackedHeader NewPackedHeader = bit_cast<PackedHeader>(*NewUnpackedHeader);
     AtomicPackedHeader *AtomicHeader =
         reinterpret_cast<AtomicPackedHeader *>(this);
@@ -137,7 +152,7 @@
   // we are not being raced by a corruption occurring in another thread.
   void compareExchangeHeader(UnpackedHeader *NewUnpackedHeader,
                              UnpackedHeader *OldUnpackedHeader) {
-    NewUnpackedHeader->Checksum = Checksum(NewUnpackedHeader);
+    NewUnpackedHeader->Checksum = computeChecksum(NewUnpackedHeader);
     PackedHeader NewPackedHeader = bit_cast<PackedHeader>(*NewUnpackedHeader);
     PackedHeader OldPackedHeader = bit_cast<PackedHeader>(*OldUnpackedHeader);
     AtomicPackedHeader *AtomicHeader =
@@ -154,7 +169,7 @@
 static bool ScudoInitIsRunning = false;
 
 static pthread_once_t GlobalInited = PTHREAD_ONCE_INIT;
-static pthread_key_t pkey;
+static pthread_key_t PThreadKey;
 
 static thread_local bool ThreadInited = false;
 static thread_local bool ThreadTornDown = false;
@@ -168,7 +183,7 @@
   // like, so we wait until PTHREAD_DESTRUCTOR_ITERATIONS before draining the
   // quarantine and swallowing the cache.
   if (v < PTHREAD_DESTRUCTOR_ITERATIONS) {
-    pthread_setspecific(pkey, reinterpret_cast<void *>(v + 1));
+    pthread_setspecific(PThreadKey, reinterpret_cast<void *>(v + 1));
     return;
   }
   drainQuarantine();
@@ -181,23 +196,30 @@
   CHECK(!ScudoInitIsRunning && "Scudo init calls itself!");
   ScudoInitIsRunning = true;
 
+  // Check is SSE4.2 is supported, if so, opt for the CRC32 hardware version.
+  if (testCPUFeature(CRC32CPUFeature)) {
+    atomic_store_relaxed(&HashAlgorithm, CRC32Hardware);
+  }
+
   initFlags();
 
   AllocatorOptions Options;
   Options.setFrom(getFlags(), common_flags());
   initAllocator(Options);
 
+  MaybeStartBackgroudThread();
+
   ScudoInitIsRunning = false;
 }
 
 static void initGlobal() {
-  pthread_key_create(&pkey, teardownThread);
+  pthread_key_create(&PThreadKey, teardownThread);
   initInternal();
 }
 
 static void NOINLINE initThread() {
   pthread_once(&GlobalInited, initGlobal);
-  pthread_setspecific(pkey, reinterpret_cast<void *>(1));
+  pthread_setspecific(PThreadKey, reinterpret_cast<void *>(1));
   getAllocator().InitCache(&Cache);
   ThreadInited = true;
 }
@@ -214,7 +236,7 @@
       dieWithMessage("ERROR: invalid chunk state when recycling address %p\n",
                      Chunk);
     }
-    void *Ptr = Chunk->AllocBeg(&Header);
+    void *Ptr = Chunk->getAllocBeg(&Header);
     getAllocator().Deallocate(Cache_, Ptr);
   }
 
@@ -245,6 +267,7 @@
 
 void AllocatorOptions::setFrom(const Flags *f, const CommonFlags *cf) {
   MayReturnNull = cf->allocator_may_return_null;
+  ReleaseToOSIntervalMs = cf->allocator_release_to_os_interval_ms;
   QuarantineSizeMb = f->QuarantineSizeMb;
   ThreadLocalQuarantineSizeKb = f->ThreadLocalQuarantineSizeKb;
   DeallocationTypeMismatch = f->DeallocationTypeMismatch;
@@ -254,6 +277,7 @@
 
 void AllocatorOptions::copyTo(Flags *f, CommonFlags *cf) const {
   cf->allocator_may_return_null = MayReturnNull;
+  cf->allocator_release_to_os_interval_ms = ReleaseToOSIntervalMs;
   f->QuarantineSizeMb = QuarantineSizeMb;
   f->ThreadLocalQuarantineSizeKb = ThreadLocalQuarantineSizeKb;
   f->DeallocationTypeMismatch = DeallocationTypeMismatch;
@@ -262,9 +286,8 @@
 }
 
 struct Allocator {
-  static const uptr MaxAllowedMallocSize = 1ULL << 40;
-  static const uptr MinAlignment = 1 << MinAlignmentLog;
-  static const uptr MaxAlignment = 1 << MaxAlignmentLog; // 16 MB
+  static const uptr MaxAllowedMallocSize =
+      FIRST_32_SECOND_64(2UL << 30, 1ULL << 40);
 
   ScudoAllocator BackendAllocator;
   ScudoQuarantine AllocatorQuarantine;
@@ -285,85 +308,129 @@
       FallbackQuarantineCache(LINKER_INITIALIZED) {}
 
   void init(const AllocatorOptions &Options) {
-    // Currently SSE 4.2 support is required. This might change later.
-    CHECK(testCPUFeature(SSE4_2)); // for crc32
-
     // Verify that the header offset field can hold the maximum offset. In the
-    // worst case scenario, the backend allocation is already aligned on
-    // MaxAlignment, so in order to store the header and still be aligned, we
-    // add an extra MaxAlignment. As a result, the offset from the beginning of
-    // the backend allocation to the chunk will be MaxAlignment -
-    // ChunkHeaderSize.
+    // case of the Secondary allocator, it takes care of alignment and the
+    // offset will always be 0. In the case of the Primary, the worst case
+    // scenario happens in the last size class, when the backend allocation
+    // would already be aligned on the requested alignment, which would happen
+    // to be the maximum alignment that would fit in that size class. As a
+    // result, the maximum offset will be at most the maximum alignment for the
+    // last size class minus the header size, in multiples of MinAlignment.
     UnpackedHeader Header = {};
-    uptr MaximumOffset = (MaxAlignment - ChunkHeaderSize) >> MinAlignmentLog;
-    Header.Offset = MaximumOffset;
-    if (Header.Offset != MaximumOffset) {
+    uptr MaxPrimaryAlignment = 1 << MostSignificantSetBitIndex(
+        SizeClassMap::kMaxSize - MinAlignment);
+    uptr MaxOffset = (MaxPrimaryAlignment - AlignedChunkHeaderSize) >>
+        MinAlignmentLog;
+    Header.Offset = MaxOffset;
+    if (Header.Offset != MaxOffset) {
       dieWithMessage("ERROR: the maximum possible offset doesn't fit in the "
                      "header\n");
     }
+    // Verify that we can fit the maximum amount of unused bytes in the header.
+    // Given that the Secondary fits the allocation to a page, the worst case
+    // scenario happens in the Primary. It will depend on the second to last
+    // and last class sizes, as well as the dynamic base for the Primary. The
+    // following is an over-approximation that works for our needs.
+    uptr MaxUnusedBytes = SizeClassMap::kMaxSize - 1 - AlignedChunkHeaderSize;
+    Header.UnusedBytes = MaxUnusedBytes;
+    if (Header.UnusedBytes != MaxUnusedBytes) {
+      dieWithMessage("ERROR: the maximum possible unused bytes doesn't fit in "
+                     "the header\n");
+    }
 
     DeallocationTypeMismatch = Options.DeallocationTypeMismatch;
     DeleteSizeMismatch = Options.DeleteSizeMismatch;
     ZeroContents = Options.ZeroContents;
-    BackendAllocator.Init(Options.MayReturnNull);
-    AllocatorQuarantine.Init(static_cast<uptr>(Options.QuarantineSizeMb) << 20,
-                             static_cast<uptr>(
-                                 Options.ThreadLocalQuarantineSizeKb) << 10);
+    BackendAllocator.Init(Options.MayReturnNull, Options.ReleaseToOSIntervalMs);
+    AllocatorQuarantine.Init(
+        static_cast<uptr>(Options.QuarantineSizeMb) << 20,
+        static_cast<uptr>(Options.ThreadLocalQuarantineSizeKb) << 10);
     BackendAllocator.InitCache(&FallbackAllocatorCache);
     Cookie = Prng.Next();
   }
 
+  // Helper function that checks for a valid Scudo chunk.
+  bool isValidPointer(const void *UserPtr) {
+    uptr ChunkBeg = reinterpret_cast<uptr>(UserPtr);
+    if (!IsAligned(ChunkBeg, MinAlignment)) {
+      return false;
+    }
+    ScudoChunk *Chunk =
+        reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize);
+    return Chunk->isValid();
+  }
+
   // Allocates a chunk.
   void *allocate(uptr Size, uptr Alignment, AllocType Type) {
     if (UNLIKELY(!ThreadInited))
       initThread();
     if (!IsPowerOfTwo(Alignment)) {
-      dieWithMessage("ERROR: malloc alignment is not a power of 2\n");
+      dieWithMessage("ERROR: alignment is not a power of 2\n");
     }
     if (Alignment > MaxAlignment)
-      return BackendAllocator.ReturnNullOrDie();
+      return BackendAllocator.ReturnNullOrDieOnBadRequest();
     if (Alignment < MinAlignment)
       Alignment = MinAlignment;
     if (Size == 0)
       Size = 1;
     if (Size >= MaxAllowedMallocSize)
-      return BackendAllocator.ReturnNullOrDie();
-    uptr RoundedSize = RoundUpTo(Size, MinAlignment);
-    uptr ExtraBytes = ChunkHeaderSize;
+      return BackendAllocator.ReturnNullOrDieOnBadRequest();
+
+    uptr NeededSize = RoundUpTo(Size, MinAlignment) + AlignedChunkHeaderSize;
     if (Alignment > MinAlignment)
-      ExtraBytes += Alignment;
-    uptr NeededSize = RoundedSize + ExtraBytes;
+      NeededSize += Alignment;
     if (NeededSize >= MaxAllowedMallocSize)
-      return BackendAllocator.ReturnNullOrDie();
+      return BackendAllocator.ReturnNullOrDieOnBadRequest();
+
+    // Primary backed and Secondary backed allocations have a different
+    // treatment. We deal with alignment requirements of Primary serviced
+    // allocations here, but the Secondary will take care of its own alignment
+    // needs, which means we also have to work around some limitations of the
+    // combined allocator to accommodate the situation.
+    bool FromPrimary = PrimaryAllocator::CanAllocate(NeededSize, MinAlignment);
 
     void *Ptr;
     if (LIKELY(!ThreadTornDown)) {
-      Ptr = BackendAllocator.Allocate(&Cache, NeededSize, MinAlignment);
+      Ptr = BackendAllocator.Allocate(&Cache, NeededSize,
+                                      FromPrimary ? MinAlignment : Alignment);
     } else {
       SpinMutexLock l(&FallbackMutex);
       Ptr = BackendAllocator.Allocate(&FallbackAllocatorCache, NeededSize,
-                               MinAlignment);
+                                      FromPrimary ? MinAlignment : Alignment);
     }
     if (!Ptr)
-      return BackendAllocator.ReturnNullOrDie();
-
-    // If requested, we will zero out the entire contents of the returned chunk.
-    if (ZeroContents && BackendAllocator.FromPrimary(Ptr))
-       memset(Ptr, 0, BackendAllocator.GetActuallyAllocatedSize(Ptr));
+      return BackendAllocator.ReturnNullOrDieOnOOM();
 
     uptr AllocBeg = reinterpret_cast<uptr>(Ptr);
-    uptr ChunkBeg = AllocBeg + ChunkHeaderSize;
+    // If the allocation was serviced by the secondary, the returned pointer
+    // accounts for ChunkHeaderSize to pass the alignment check of the combined
+    // allocator. Adjust it here.
+    if (!FromPrimary) {
+      AllocBeg -= AlignedChunkHeaderSize;
+      if (Alignment > MinAlignment)
+        NeededSize -= Alignment;
+    }
+
+    uptr ActuallyAllocatedSize = BackendAllocator.GetActuallyAllocatedSize(
+        reinterpret_cast<void *>(AllocBeg));
+    // If requested, we will zero out the entire contents of the returned chunk.
+    if (ZeroContents && FromPrimary)
+       memset(Ptr, 0, ActuallyAllocatedSize);
+
+    uptr ChunkBeg = AllocBeg + AlignedChunkHeaderSize;
     if (!IsAligned(ChunkBeg, Alignment))
       ChunkBeg = RoundUpTo(ChunkBeg, Alignment);
     CHECK_LE(ChunkBeg + Size, AllocBeg + NeededSize);
     ScudoChunk *Chunk =
-        reinterpret_cast<ScudoChunk *>(ChunkBeg - ChunkHeaderSize);
+        reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize);
     UnpackedHeader Header = {};
     Header.State = ChunkAllocated;
-    Header.Offset = (ChunkBeg - ChunkHeaderSize - AllocBeg) >> MinAlignmentLog;
+    uptr Offset = ChunkBeg - AlignedChunkHeaderSize - AllocBeg;
+    Header.Offset = Offset >> MinAlignmentLog;
     Header.AllocType = Type;
-    Header.RequestedSize = Size;
-    Header.Salt = static_cast<u16>(Prng.Next());
+    Header.UnusedBytes = ActuallyAllocatedSize - Offset -
+        AlignedChunkHeaderSize - Size;
+    Header.Salt = static_cast<u8>(Prng.Next());
     Chunk->storeHeader(&Header);
     void *UserPtr = reinterpret_cast<void *>(ChunkBeg);
     // TODO(kostyak): hooks sound like a terrible idea security wise but might
@@ -387,13 +454,14 @@
                      "aligned at address %p\n", UserPtr);
     }
     ScudoChunk *Chunk =
-        reinterpret_cast<ScudoChunk *>(ChunkBeg - ChunkHeaderSize);
+        reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize);
     UnpackedHeader OldHeader;
     Chunk->loadHeader(&OldHeader);
     if (OldHeader.State != ChunkAllocated) {
       dieWithMessage("ERROR: invalid chunk state when deallocating address "
-                     "%p\n", Chunk);
+                     "%p\n", UserPtr);
     }
+    uptr UsableSize = Chunk->getUsableSize(&OldHeader);
     UnpackedHeader NewHeader = OldHeader;
     NewHeader.State = ChunkQuarantine;
     Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
@@ -407,69 +475,40 @@
         }
       }
     }
-    uptr Size = NewHeader.RequestedSize;
+    uptr Size = UsableSize - OldHeader.UnusedBytes;
     if (DeleteSizeMismatch) {
       if (DeleteSize && DeleteSize != Size) {
         dieWithMessage("ERROR: invalid sized delete on chunk at address %p\n",
                        Chunk);
       }
     }
+
     if (LIKELY(!ThreadTornDown)) {
       AllocatorQuarantine.Put(&ThreadQuarantineCache,
-                              QuarantineCallback(&Cache), Chunk, Size);
+                              QuarantineCallback(&Cache), Chunk, UsableSize);
     } else {
       SpinMutexLock l(&FallbackMutex);
       AllocatorQuarantine.Put(&FallbackQuarantineCache,
                               QuarantineCallback(&FallbackAllocatorCache),
-                              Chunk, Size);
+                              Chunk, UsableSize);
     }
   }
 
-  // Returns the actual usable size of a chunk. Since this requires loading the
-  // header, we will return it in the second parameter, as it can be required
-  // by the caller to perform additional processing.
-  uptr getUsableSize(const void *Ptr, UnpackedHeader *Header) {
-    if (UNLIKELY(!ThreadInited))
-      initThread();
-    if (!Ptr)
-      return 0;
-    uptr ChunkBeg = reinterpret_cast<uptr>(Ptr);
-    ScudoChunk *Chunk =
-        reinterpret_cast<ScudoChunk *>(ChunkBeg - ChunkHeaderSize);
-    Chunk->loadHeader(Header);
-    // Getting the usable size of a chunk only makes sense if it's allocated.
-    if (Header->State != ChunkAllocated) {
-      dieWithMessage("ERROR: attempted to size a non-allocated chunk at "
-                     "address %p\n", Chunk);
-    }
-    uptr Size =
-        BackendAllocator.GetActuallyAllocatedSize(Chunk->AllocBeg(Header));
-    // UsableSize works as malloc_usable_size, which is also what (AFAIU)
-    // tcmalloc's MallocExtension::GetAllocatedSize aims at providing. This
-    // means we will return the size of the chunk from the user beginning to
-    // the end of the 'user' allocation, hence us subtracting the header size
-    // and the offset from the size.
-    if (Size == 0)
-      return Size;
-    return Size - ChunkHeaderSize - (Header->Offset << MinAlignmentLog);
-  }
-
-  // Helper function that doesn't care about the header.
-  uptr getUsableSize(const void *Ptr) {
-    UnpackedHeader Header;
-    return getUsableSize(Ptr, &Header);
-  }
-
   // Reallocates a chunk. We can save on a new allocation if the new requested
   // size still fits in the chunk.
   void *reallocate(void *OldPtr, uptr NewSize) {
     if (UNLIKELY(!ThreadInited))
       initThread();
-    UnpackedHeader OldHeader;
-    uptr Size = getUsableSize(OldPtr, &OldHeader);
     uptr ChunkBeg = reinterpret_cast<uptr>(OldPtr);
     ScudoChunk *Chunk =
-        reinterpret_cast<ScudoChunk *>(ChunkBeg - ChunkHeaderSize);
+        reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize);
+    UnpackedHeader OldHeader;
+    Chunk->loadHeader(&OldHeader);
+    if (OldHeader.State != ChunkAllocated) {
+      dieWithMessage("ERROR: invalid chunk state when reallocating address "
+                     "%p\n", OldPtr);
+    }
+    uptr Size = Chunk->getUsableSize(&OldHeader);
     if (OldHeader.AllocType != FromMalloc) {
       dieWithMessage("ERROR: invalid chunk type when reallocating address %p\n",
                      Chunk);
@@ -477,7 +516,7 @@
     UnpackedHeader NewHeader = OldHeader;
     // The new size still fits in the current chunk.
     if (NewSize <= Size) {
-      NewHeader.RequestedSize = NewSize;
+      NewHeader.UnusedBytes = Size - NewSize;
       Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
       return OldPtr;
     }
@@ -485,29 +524,48 @@
     // old one.
     void *NewPtr = allocate(NewSize, MinAlignment, FromMalloc);
     if (NewPtr) {
-      uptr OldSize = OldHeader.RequestedSize;
+      uptr OldSize = Size - OldHeader.UnusedBytes;
       memcpy(NewPtr, OldPtr, Min(NewSize, OldSize));
       NewHeader.State = ChunkQuarantine;
       Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
       if (LIKELY(!ThreadTornDown)) {
         AllocatorQuarantine.Put(&ThreadQuarantineCache,
-                                QuarantineCallback(&Cache), Chunk, OldSize);
+                                QuarantineCallback(&Cache), Chunk, Size);
       } else {
         SpinMutexLock l(&FallbackMutex);
         AllocatorQuarantine.Put(&FallbackQuarantineCache,
                                 QuarantineCallback(&FallbackAllocatorCache),
-                                Chunk, OldSize);
+                                Chunk, Size);
       }
     }
     return NewPtr;
   }
 
+  // Helper function that returns the actual usable size of a chunk.
+  uptr getUsableSize(const void *Ptr) {
+    if (UNLIKELY(!ThreadInited))
+      initThread();
+    if (!Ptr)
+      return 0;
+    uptr ChunkBeg = reinterpret_cast<uptr>(Ptr);
+    ScudoChunk *Chunk =
+        reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize);
+    UnpackedHeader Header;
+    Chunk->loadHeader(&Header);
+    // Getting the usable size of a chunk only makes sense if it's allocated.
+    if (Header.State != ChunkAllocated) {
+      dieWithMessage("ERROR: invalid chunk state when sizing address %p\n",
+                     Ptr);
+    }
+    return Chunk->getUsableSize(&Header);
+  }
+
   void *calloc(uptr NMemB, uptr Size) {
     if (UNLIKELY(!ThreadInited))
       initThread();
     uptr Total = NMemB * Size;
     if (Size != 0 && Total / Size != NMemB) // Overflow check
-      return BackendAllocator.ReturnNullOrDie();
+      return BackendAllocator.ReturnNullOrDieOnBadRequest();
     void *Ptr = allocate(Total, MinAlignment, FromMalloc);
     // If ZeroContents, the content of the chunk has already been zero'd out.
     if (!ZeroContents && Ptr && BackendAllocator.FromPrimary(Ptr))
@@ -536,7 +594,7 @@
 }
 
 void *scudoMalloc(uptr Size, AllocType Type) {
-  return Instance.allocate(Size, Allocator::MinAlignment, Type);
+  return Instance.allocate(Size, MinAlignment, Type);
 }
 
 void scudoFree(void *Ptr, AllocType Type) {
@@ -549,7 +607,7 @@
 
 void *scudoRealloc(void *Ptr, uptr Size) {
   if (!Ptr)
-    return Instance.allocate(Size, Allocator::MinAlignment, FromMalloc);
+    return Instance.allocate(Size, MinAlignment, FromMalloc);
   if (Size == 0) {
     Instance.deallocate(Ptr, 0, FromMalloc);
     return nullptr;
@@ -596,7 +654,7 @@
   return Instance.getUsableSize(Ptr);
 }
 
-} // namespace __scudo
+}  // namespace __scudo
 
 using namespace __scudo;
 
@@ -626,10 +684,10 @@
   return size;
 }
 
-int __sanitizer_get_ownership(const void *p) {
-  return Instance.getUsableSize(p) != 0;
+int __sanitizer_get_ownership(const void *Ptr) {
+  return Instance.isValidPointer(Ptr);
 }
 
-uptr __sanitizer_get_allocated_size(const void *p) {
-  return Instance.getUsableSize(p);
+uptr __sanitizer_get_allocated_size(const void *Ptr) {
+  return Instance.getUsableSize(Ptr);
 }
diff --git a/lib/scudo/scudo_allocator.h b/lib/scudo/scudo_allocator.h
index 7e9c788..6431a2a 100644
--- a/lib/scudo/scudo_allocator.h
+++ b/lib/scudo/scudo_allocator.h
@@ -14,14 +14,12 @@
 #ifndef SCUDO_ALLOCATOR_H_
 #define SCUDO_ALLOCATOR_H_
 
-#ifndef __x86_64__
-# error "The Scudo hardened allocator currently only supports x86_64."
-#endif
-
 #include "scudo_flags.h"
 
 #include "sanitizer_common/sanitizer_allocator.h"
 
+#include <atomic>
+
 namespace __scudo {
 
 enum AllocType : u8 {
@@ -31,10 +29,49 @@
   FromMemalign  = 3, // Memory block came from memalign, posix_memalign, etc.
 };
 
+enum ChunkState : u8 {
+  ChunkAvailable  = 0,
+  ChunkAllocated  = 1,
+  ChunkQuarantine = 2
+};
+
+// Our header requires 64 bits of storage. Having the offset saves us from
+// using functions such as GetBlockBegin, that is fairly costly. Our first
+// implementation used the MetaData as well, which offers the advantage of
+// being stored away from the chunk itself, but accessing it was costly as
+// well. The header will be atomically loaded and stored using the 16-byte
+// primitives offered by the platform (likely requires cmpxchg16b support).
+typedef u64 PackedHeader;
+struct UnpackedHeader {
+  u64 Checksum    : 16;
+  u64 UnusedBytes : 20; // Needed for reallocation purposes.
+  u64 State       : 2;  // available, allocated, or quarantined
+  u64 AllocType   : 2;  // malloc, new, new[], or memalign
+  u64 Offset      : 16; // Offset from the beginning of the backend
+                        // allocation to the beginning of the chunk itself,
+                        // in multiples of MinAlignment. See comment about
+                        // its maximum value and test in init().
+  u64 Salt        : 8;
+};
+
+typedef std::atomic<PackedHeader> AtomicPackedHeader;
+COMPILER_CHECK(sizeof(UnpackedHeader) == sizeof(PackedHeader));
+
+// Minimum alignment of 8 bytes for 32-bit, 16 for 64-bit
+const uptr MinAlignmentLog = FIRST_32_SECOND_64(3, 4);
+const uptr MaxAlignmentLog = 24; // 16 MB
+const uptr MinAlignment = 1 << MinAlignmentLog;
+const uptr MaxAlignment = 1 << MaxAlignmentLog;
+
+const uptr ChunkHeaderSize = sizeof(PackedHeader);
+const uptr AlignedChunkHeaderSize =
+    (ChunkHeaderSize + MinAlignment - 1) & ~(MinAlignment - 1);
+
 struct AllocatorOptions {
   u32 QuarantineSizeMb;
   u32 ThreadLocalQuarantineSizeKb;
   bool MayReturnNull;
+  s32 ReleaseToOSIntervalMs;
   bool DeallocationTypeMismatch;
   bool DeleteSizeMismatch;
   bool ZeroContents;
@@ -58,6 +95,8 @@
 void *scudoAlignedAlloc(uptr Alignment, uptr Size);
 uptr scudoMallocUsableSize(void *Ptr);
 
-} // namespace __scudo
+#include "scudo_allocator_secondary.h"
+
+}  // namespace __scudo
 
 #endif  // SCUDO_ALLOCATOR_H_
diff --git a/lib/scudo/scudo_allocator_secondary.h b/lib/scudo/scudo_allocator_secondary.h
new file mode 100644
index 0000000..b984f0d
--- /dev/null
+++ b/lib/scudo/scudo_allocator_secondary.h
@@ -0,0 +1,188 @@
+//===-- scudo_allocator_secondary.h -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo Secondary Allocator.
+/// This services allocation that are too large to be serviced by the Primary
+/// Allocator. It is directly backed by the memory mapping functions of the
+/// operating system.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_ALLOCATOR_SECONDARY_H_
+#define SCUDO_ALLOCATOR_SECONDARY_H_
+
+#ifndef SCUDO_ALLOCATOR_H_
+# error "This file must be included inside scudo_allocator.h."
+#endif
+
+class ScudoLargeMmapAllocator {
+ public:
+
+  void Init(bool AllocatorMayReturnNull) {
+    PageSize = GetPageSizeCached();
+    atomic_store(&MayReturnNull, AllocatorMayReturnNull, memory_order_relaxed);
+  }
+
+  void *Allocate(AllocatorStats *Stats, uptr Size, uptr Alignment) {
+    // The Scudo frontend prevents us from allocating more than
+    // MaxAllowedMallocSize, so integer overflow checks would be superfluous.
+    uptr MapSize = Size + SecondaryHeaderSize;
+    MapSize = RoundUpTo(MapSize, PageSize);
+    // Account for 2 guard pages, one before and one after the chunk.
+    MapSize += 2 * PageSize;
+    // The size passed to the Secondary comprises the alignment, if large
+    // enough. Subtract it here to get the requested size, including header.
+    if (Alignment > MinAlignment)
+      Size -= Alignment;
+
+    uptr MapBeg = reinterpret_cast<uptr>(MmapNoAccess(MapSize));
+    if (MapBeg == ~static_cast<uptr>(0))
+      return ReturnNullOrDieOnOOM();
+    // A page-aligned pointer is assumed after that, so check it now.
+    CHECK(IsAligned(MapBeg, PageSize));
+    uptr MapEnd = MapBeg + MapSize;
+    // The beginning of the user area for that allocation comes after the
+    // initial guard page, and both headers. This is the pointer that has to
+    // abide by alignment requirements.
+    uptr UserBeg = MapBeg + PageSize + HeadersSize;
+
+    // In the rare event of larger alignments, we will attempt to fit the mmap
+    // area better and unmap extraneous memory. This will also ensure that the
+    // offset and unused bytes field of the header stay small.
+    if (Alignment > MinAlignment) {
+      if (UserBeg & (Alignment - 1))
+        UserBeg += Alignment - (UserBeg & (Alignment - 1));
+      CHECK_GE(UserBeg, MapBeg);
+      uptr NewMapBeg = RoundDownTo(UserBeg - HeadersSize, PageSize) - PageSize;
+      CHECK_GE(NewMapBeg, MapBeg);
+      uptr NewMapEnd = RoundUpTo(UserBeg + (Size - AlignedChunkHeaderSize),
+                                 PageSize) + PageSize;
+      CHECK_LE(NewMapEnd, MapEnd);
+      // Unmap the extra memory if it's large enough, on both sides.
+      uptr Diff = NewMapBeg - MapBeg;
+      if (Diff > PageSize)
+        UnmapOrDie(reinterpret_cast<void *>(MapBeg), Diff);
+      Diff = MapEnd - NewMapEnd;
+      if (Diff > PageSize)
+        UnmapOrDie(reinterpret_cast<void *>(NewMapEnd), Diff);
+      MapBeg = NewMapBeg;
+      MapEnd = NewMapEnd;
+      MapSize = NewMapEnd - NewMapBeg;
+    }
+
+    uptr UserEnd = UserBeg + (Size - AlignedChunkHeaderSize);
+    CHECK_LE(UserEnd, MapEnd - PageSize);
+    // Actually mmap the memory, preserving the guard pages on either side.
+    CHECK_EQ(MapBeg + PageSize, reinterpret_cast<uptr>(
+        MmapFixedOrDie(MapBeg + PageSize, MapSize - 2 * PageSize)));
+    uptr Ptr = UserBeg - AlignedChunkHeaderSize;
+    SecondaryHeader *Header = getHeader(Ptr);
+    Header->MapBeg = MapBeg;
+    Header->MapSize = MapSize;
+    // The primary adds the whole class size to the stats when allocating a
+    // chunk, so we will do something similar here. But we will not account for
+    // the guard pages.
+    Stats->Add(AllocatorStatAllocated, MapSize - 2 * PageSize);
+    Stats->Add(AllocatorStatMapped, MapSize - 2 * PageSize);
+
+    return reinterpret_cast<void *>(UserBeg);
+  }
+
+  void *ReturnNullOrDieOnBadRequest() {
+    if (atomic_load(&MayReturnNull, memory_order_acquire))
+      return nullptr;
+    ReportAllocatorCannotReturnNull(false);
+  }
+
+  void *ReturnNullOrDieOnOOM() {
+    if (atomic_load(&MayReturnNull, memory_order_acquire))
+      return nullptr;
+    ReportAllocatorCannotReturnNull(true);
+  }
+
+  void SetMayReturnNull(bool AllocatorMayReturnNull) {
+    atomic_store(&MayReturnNull, AllocatorMayReturnNull, memory_order_release);
+  }
+
+  void Deallocate(AllocatorStats *Stats, void *Ptr) {
+    SecondaryHeader *Header = getHeader(Ptr);
+    Stats->Sub(AllocatorStatAllocated, Header->MapSize - 2 * PageSize);
+    Stats->Sub(AllocatorStatMapped, Header->MapSize - 2 * PageSize);
+    UnmapOrDie(reinterpret_cast<void *>(Header->MapBeg), Header->MapSize);
+  }
+
+  uptr TotalMemoryUsed() {
+    UNIMPLEMENTED();
+  }
+
+  bool PointerIsMine(const void *Ptr) {
+    UNIMPLEMENTED();
+  }
+
+  uptr GetActuallyAllocatedSize(void *Ptr) {
+    SecondaryHeader *Header = getHeader(Ptr);
+    // Deduct PageSize as MapEnd includes the trailing guard page.
+    uptr MapEnd = Header->MapBeg + Header->MapSize - PageSize;
+    return MapEnd - reinterpret_cast<uptr>(Ptr);
+  }
+
+  void *GetMetaData(const void *Ptr) {
+    UNIMPLEMENTED();
+  }
+
+  void *GetBlockBegin(const void *Ptr) {
+    UNIMPLEMENTED();
+  }
+
+  void *GetBlockBeginFastLocked(void *Ptr) {
+    UNIMPLEMENTED();
+  }
+
+  void PrintStats() {
+    UNIMPLEMENTED();
+  }
+
+  void ForceLock() {
+    UNIMPLEMENTED();
+  }
+
+  void ForceUnlock() {
+    UNIMPLEMENTED();
+  }
+
+  void ForEachChunk(ForEachChunkCallback Callback, void *Arg) {
+    UNIMPLEMENTED();
+  }
+
+ private:
+  // A Secondary allocated chunk header contains the base of the mapping and
+  // its size. Currently, the base is always a page before the header, but
+  // we might want to extend that number in the future based on the size of
+  // the allocation.
+  struct SecondaryHeader {
+    uptr MapBeg;
+    uptr MapSize;
+  };
+  // Check that sizeof(SecondaryHeader) is a multiple of MinAlignment.
+  COMPILER_CHECK((sizeof(SecondaryHeader) & (MinAlignment - 1)) == 0);
+
+  SecondaryHeader *getHeader(uptr Ptr) {
+    return reinterpret_cast<SecondaryHeader*>(Ptr - sizeof(SecondaryHeader));
+  }
+  SecondaryHeader *getHeader(const void *Ptr) {
+    return getHeader(reinterpret_cast<uptr>(Ptr));
+  }
+
+  const uptr SecondaryHeaderSize = sizeof(SecondaryHeader);
+  const uptr HeadersSize = SecondaryHeaderSize + AlignedChunkHeaderSize;
+  uptr PageSize;
+  atomic_uint8_t MayReturnNull;
+};
+
+#endif  // SCUDO_ALLOCATOR_SECONDARY_H_
diff --git a/lib/scudo/scudo_crc32.cpp b/lib/scudo/scudo_crc32.cpp
new file mode 100644
index 0000000..94c8c24
--- /dev/null
+++ b/lib/scudo/scudo_crc32.cpp
@@ -0,0 +1,53 @@
+//===-- scudo_crc32.cpp -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// CRC32 function leveraging hardware specific instructions. This has to be
+/// kept separated to restrict the use of compiler specific flags to this file.
+///
+//===----------------------------------------------------------------------===//
+
+// Hardware CRC32 is supported at compilation via the following:
+// - for i386 & x86_64: -msse4.2
+// - for ARM & AArch64: -march=armv8-a+crc or -mcrc
+// An additional check must be performed at runtime as well to make sure the
+// emitted instructions are valid on the target host.
+#include "scudo_crc32.h"
+#include "scudo_utils.h"
+
+#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+# ifdef __SSE4_2__
+#  include <smmintrin.h>
+#  define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64)
+# endif
+# ifdef __ARM_FEATURE_CRC32
+#  include <arm_acle.h>
+#  define CRC32_INTRINSIC FIRST_32_SECOND_64(__crc32cw, __crc32cd)
+# endif
+#endif  // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+
+namespace __scudo {
+
+#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+INLINE u32 computeHardwareCRC32(u32 Crc, uptr Data) {
+  return CRC32_INTRINSIC(Crc, Data);
+}
+
+u32 computeCRC32(u32 Crc, uptr Data, u8 HashType) {
+  if (HashType == CRC32Hardware) {
+    return computeHardwareCRC32(Crc, Data);
+  }
+  return computeSoftwareCRC32(Crc, Data);
+}
+#else
+u32 computeCRC32(u32 Crc, uptr Data, u8 HashType) {
+  return computeSoftwareCRC32(Crc, Data);
+}
+#endif  // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+
+}  // namespace __scudo
diff --git a/lib/scudo/scudo_crc32.h b/lib/scudo/scudo_crc32.h
new file mode 100644
index 0000000..6635cc7
--- /dev/null
+++ b/lib/scudo/scudo_crc32.h
@@ -0,0 +1,30 @@
+//===-- scudo_crc32.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Header for scudo_crc32.cpp.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_CRC32_H_
+#define SCUDO_CRC32_H_
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __scudo {
+
+enum : u8 {
+  CRC32Software = 0,
+  CRC32Hardware = 1,
+};
+
+u32 computeCRC32(u32 Crc, uptr Data, u8 HashType);
+
+}  // namespace __scudo
+
+#endif  // SCUDO_CRC32_H_
diff --git a/lib/scudo/scudo_flags.cpp b/lib/scudo/scudo_flags.cpp
index f0d2088..b9c8381 100644
--- a/lib/scudo/scudo_flags.cpp
+++ b/lib/scudo/scudo_flags.cpp
@@ -90,4 +90,4 @@
   return &ScudoFlags;
 }
 
-}
+}  // namespace __scudo
diff --git a/lib/scudo/scudo_flags.h b/lib/scudo/scudo_flags.h
index c16f635..d4ae310 100644
--- a/lib/scudo/scudo_flags.h
+++ b/lib/scudo/scudo_flags.h
@@ -28,6 +28,6 @@
 
 void initFlags();
 
-} // namespace __scudo
+}  // namespace __scudo
 
 #endif  // SCUDO_FLAGS_H_
diff --git a/lib/scudo/scudo_interceptors.cpp b/lib/scudo/scudo_interceptors.cpp
index 9204652..735a131 100644
--- a/lib/scudo/scudo_interceptors.cpp
+++ b/lib/scudo/scudo_interceptors.cpp
@@ -72,4 +72,4 @@
   return -1;
 }
 
-#endif // SANITIZER_LINUX
+#endif  // SANITIZER_LINUX
diff --git a/lib/scudo/scudo_new_delete.cpp b/lib/scudo/scudo_new_delete.cpp
index 172f565..c022bd0 100644
--- a/lib/scudo/scudo_new_delete.cpp
+++ b/lib/scudo/scudo_new_delete.cpp
@@ -24,7 +24,7 @@
 // Fake std::nothrow_t to avoid including <new>.
 namespace std {
 struct nothrow_t {};
-} // namespace std
+}  // namespace std
 
 CXX_OPERATOR_ATTRIBUTE
 void *operator new(size_t size) {
diff --git a/lib/scudo/scudo_termination.cpp b/lib/scudo/scudo_termination.cpp
index a533383..c441ff3 100644
--- a/lib/scudo/scudo_termination.cpp
+++ b/lib/scudo/scudo_termination.cpp
@@ -39,4 +39,4 @@
                           File, Line, Condition, Value1, Value2);
 }
 
-} // namespace __sanitizer
+}  // namespace __sanitizer
diff --git a/lib/scudo/scudo_utils.cpp b/lib/scudo/scudo_utils.cpp
index f45569e..ffa65b2 100644
--- a/lib/scudo/scudo_utils.cpp
+++ b/lib/scudo/scudo_utils.cpp
@@ -17,6 +17,9 @@
 #include <fcntl.h>
 #include <stdarg.h>
 #include <unistd.h>
+#if defined(__x86_64__) || defined(__i386__)
+# include <cpuid.h>
+#endif
 
 #include <cstring>
 
@@ -28,14 +31,14 @@
 extern int VSNPrintf(char *buff, int buff_length, const char *format,
                      va_list args);
 
-} // namespace __sanitizer
+}  // namespace __sanitizer
 
 namespace __scudo {
 
 FORMAT(1, 2)
 void NORETURN dieWithMessage(const char *Format, ...) {
-  // Our messages are tiny, 128 characters is more than enough.
-  char Message[128];
+  // Our messages are tiny, 256 characters is more than enough.
+  char Message[256];
   va_list Args;
   va_start(Args, Format);
   __sanitizer::VSNPrintf(Message, sizeof(Message), Format, Args);
@@ -44,60 +47,61 @@
   Die();
 }
 
+#if defined(__x86_64__) || defined(__i386__)
+// i386 and x86_64 specific code to detect CRC32 hardware support via CPUID.
+// CRC32 requires the SSE 4.2 instruction set.
 typedef struct {
   u32 Eax;
   u32 Ebx;
   u32 Ecx;
   u32 Edx;
-} CPUIDInfo;
+} CPUIDRegs;
 
-static void getCPUID(CPUIDInfo *info, u32 leaf, u32 subleaf)
+static void getCPUID(CPUIDRegs *Regs, u32 Level)
 {
-  asm volatile("cpuid"
-      : "=a" (info->Eax), "=b" (info->Ebx), "=c" (info->Ecx), "=d" (info->Edx)
-      : "a" (leaf), "c" (subleaf)
-  );
+  __get_cpuid(Level, &Regs->Eax, &Regs->Ebx, &Regs->Ecx, &Regs->Edx);
 }
 
-// Returns true is the CPU is a "GenuineIntel" or "AuthenticAMD"
-static bool isSupportedCPU()
-{
-  CPUIDInfo Info;
-
-  getCPUID(&Info, 0, 0);
-  if (memcmp(reinterpret_cast<char *>(&Info.Ebx), "Genu", 4) == 0 &&
-      memcmp(reinterpret_cast<char *>(&Info.Edx), "ineI", 4) == 0 &&
-      memcmp(reinterpret_cast<char *>(&Info.Ecx), "ntel", 4) == 0) {
-      return true;
+CPUIDRegs getCPUFeatures() {
+  CPUIDRegs VendorRegs = {};
+  getCPUID(&VendorRegs, 0);
+  bool IsIntel =
+      (VendorRegs.Ebx == signature_INTEL_ebx) &&
+      (VendorRegs.Edx == signature_INTEL_edx) &&
+      (VendorRegs.Ecx == signature_INTEL_ecx);
+  bool IsAMD =
+      (VendorRegs.Ebx == signature_AMD_ebx) &&
+      (VendorRegs.Edx == signature_AMD_edx) &&
+      (VendorRegs.Ecx == signature_AMD_ecx);
+  // Default to an empty feature set if not on a supported CPU.
+  CPUIDRegs FeaturesRegs = {};
+  if (IsIntel || IsAMD) {
+    getCPUID(&FeaturesRegs, 1);
   }
-  if (memcmp(reinterpret_cast<char *>(&Info.Ebx), "Auth", 4) == 0 &&
-      memcmp(reinterpret_cast<char *>(&Info.Edx), "enti", 4) == 0 &&
-      memcmp(reinterpret_cast<char *>(&Info.Ecx), "cAMD", 4) == 0) {
-      return true;
-  }
-  return false;
+  return FeaturesRegs;
 }
 
-bool testCPUFeature(CPUFeature feature)
-{
-  static bool InfoInitialized = false;
-  static CPUIDInfo CPUInfo = {};
+#ifndef bit_SSE4_2
+#define bit_SSE4_2 bit_SSE42  // clang and gcc have different defines.
+#endif
 
-  if (InfoInitialized == false) {
-    if (isSupportedCPU() == true)
-      getCPUID(&CPUInfo, 1, 0);
-    else
-      UNIMPLEMENTED();
-    InfoInitialized = true;
-  }
-  switch (feature) {
-    case SSE4_2:
-      return ((CPUInfo.Ecx >> 20) & 0x1) != 0;
+bool testCPUFeature(CPUFeature Feature)
+{
+  static CPUIDRegs FeaturesRegs = getCPUFeatures();
+
+  switch (Feature) {
+    case CRC32CPUFeature:  // CRC32 is provided by SSE 4.2.
+      return !!(FeaturesRegs.Ecx & bit_SSE4_2);
     default:
       break;
   }
   return false;
 }
+#else
+bool testCPUFeature(CPUFeature Feature) {
+  return false;
+}
+#endif  // defined(__x86_64__) || defined(__i386__)
 
 // readRetry will attempt to read Count bytes from the Fd specified, and if
 // interrupted will retry to read additional bytes to reach Count.
@@ -117,17 +121,76 @@
   return AmountRead;
 }
 
-// Default constructor for Xorshift128Plus seeds the state with /dev/urandom
-Xorshift128Plus::Xorshift128Plus() {
+static void fillRandom(u8 *Data, ssize_t Size) {
   int Fd = open("/dev/urandom", O_RDONLY);
-  bool Success = readRetry(Fd, reinterpret_cast<u8 *>(&State_0_),
-                           sizeof(State_0_)) == sizeof(State_0_);
-  Success &= readRetry(Fd, reinterpret_cast<u8 *>(&State_1_),
-                           sizeof(State_1_)) == sizeof(State_1_);
+  if (Fd < 0) {
+    dieWithMessage("ERROR: failed to open /dev/urandom.\n");
+  }
+  bool Success = readRetry(Fd, Data, Size) == Size;
   close(Fd);
   if (!Success) {
     dieWithMessage("ERROR: failed to read enough data from /dev/urandom.\n");
   }
 }
 
-} // namespace __scudo
+// Default constructor for Xorshift128Plus seeds the state with /dev/urandom.
+// TODO(kostyak): investigate using getrandom() if available.
+Xorshift128Plus::Xorshift128Plus() {
+  fillRandom(reinterpret_cast<u8 *>(State), sizeof(State));
+}
+
+const static u32 CRC32Table[] = {
+  0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
+  0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+  0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
+  0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+  0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+  0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+  0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
+  0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+  0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
+  0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+  0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
+  0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+  0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
+  0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+  0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+  0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+  0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
+  0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+  0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
+  0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+  0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
+  0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+  0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
+  0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+  0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+  0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+  0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
+  0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+  0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
+  0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+  0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
+  0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+  0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
+  0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+  0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+  0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+  0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
+  0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+  0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
+  0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+  0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
+  0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+  0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+u32 computeSoftwareCRC32(u32 Crc, uptr Data) {
+  for (uptr i = 0; i < sizeof(Data); i++) {
+    Crc = CRC32Table[(Crc ^ Data) & 0xff] ^ (Crc >> 8);
+    Data >>= 8;
+  }
+  return Crc;
+}
+
+}  // namespace __scudo
diff --git a/lib/scudo/scudo_utils.h b/lib/scudo/scudo_utils.h
index c4f0760..ef2a609 100644
--- a/lib/scudo/scudo_utils.h
+++ b/lib/scudo/scudo_utils.h
@@ -30,9 +30,9 @@
 
 void NORETURN dieWithMessage(const char *Format, ...);
 
-enum  CPUFeature {
-  SSE4_2 = 0,
-  ENUM_CPUFEATURE_MAX
+enum CPUFeature {
+  CRC32CPUFeature = 0,
+  MaxCPUFeature,
 };
 bool testCPUFeature(CPUFeature feature);
 
@@ -42,18 +42,20 @@
  public:
   Xorshift128Plus();
   u64 Next() {
-    u64 x = State_0_;
-    const u64 y = State_1_;
-    State_0_ = y;
+    u64 x = State[0];
+    const u64 y = State[1];
+    State[0] = y;
     x ^= x << 23;
-    State_1_ = x ^ y ^ (x >> 17) ^ (y >> 26);
-    return State_1_ + y;
+    State[1] = x ^ y ^ (x >> 17) ^ (y >> 26);
+    return State[1] + y;
   }
  private:
-  u64 State_0_;
-  u64 State_1_;
+  u64 State[2];
 };
 
-} // namespace __scudo
+// Software CRC32 functions, to be used when hardware support is not detected.
+u32 computeSoftwareCRC32(u32 Crc, uptr Data);
+
+}  // namespace __scudo
 
 #endif  // SCUDO_UTILS_H_
diff --git a/lib/stats/CMakeLists.txt b/lib/stats/CMakeLists.txt
index 33ab1ae..2b3d647 100644
--- a/lib/stats/CMakeLists.txt
+++ b/lib/stats/CMakeLists.txt
@@ -5,8 +5,14 @@
 
 if(APPLE)
   set(STATS_LIB_FLAVOR SHARED)
+
+  add_weak_symbols("asan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
 else()
   set(STATS_LIB_FLAVOR STATIC)
+
+  set(WEAK_SYMBOL_LINK_FLAGS)
 endif()
 
 add_compiler_rt_runtime(clang_rt.stats
@@ -17,6 +23,7 @@
   OBJECT_LIBS RTSanitizerCommon
               RTSanitizerCommonLibc
   CFLAGS ${SANITIZER_COMMON_CFLAGS}
+  LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
   PARENT_TARGET stats)
 
 add_compiler_rt_runtime(clang_rt.stats_client
@@ -25,4 +32,5 @@
   OS ${SANITIZER_COMMON_SUPPORTED_OS}
   SOURCES stats_client.cc
   CFLAGS ${SANITIZER_COMMON_CFLAGS}
+  LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
   PARENT_TARGET stats)
diff --git a/lib/stats/stats_client.cc b/lib/stats/stats_client.cc
index fa4b2d9..5caf097 100644
--- a/lib/stats/stats_client.cc
+++ b/lib/stats/stats_client.cc
@@ -16,6 +16,7 @@
 //===----------------------------------------------------------------------===//
 
 #ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #else
 #include <dlfcn.h>
diff --git a/lib/tsan/CMakeLists.txt b/lib/tsan/CMakeLists.txt
index 8e65b37..d519545 100644
--- a/lib/tsan/CMakeLists.txt
+++ b/lib/tsan/CMakeLists.txt
@@ -17,7 +17,7 @@
 
 set(TSAN_RTL_CFLAGS ${TSAN_CFLAGS})
 append_list_if(COMPILER_RT_HAS_MSSE3_FLAG -msse3 TSAN_RTL_CFLAGS)
-append_list_if(SANITIZER_LIMIT_FRAME_SIZE -Wframe-larger-than=512
+append_list_if(SANITIZER_LIMIT_FRAME_SIZE -Wframe-larger-than=530
                TSAN_RTL_CFLAGS)
 append_list_if(COMPILER_RT_HAS_WGLOBAL_CONSTRUCTORS_FLAG -Wglobal-constructors
                TSAN_RTL_CFLAGS)
@@ -107,6 +107,10 @@
     # Pass ASM file directly to the C++ compiler.
     set_source_files_properties(${TSAN_ASM_SOURCES} PROPERTIES LANGUAGE C)
   endif()
+
+  add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+
   add_compiler_rt_runtime(clang_rt.tsan
     SHARED
     OS ${TSAN_SUPPORTED_OS}
@@ -117,6 +121,7 @@
                 RTSanitizerCommonLibc
                 RTUbsan
     CFLAGS ${TSAN_RTL_CFLAGS}
+    LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
     PARENT_TARGET tsan)
   add_compiler_rt_object_libraries(RTTsan_dynamic
     OS ${TSAN_SUPPORTED_OS}
diff --git a/lib/tsan/go/build.bat b/lib/tsan/go/build.bat
index 3ada9ab..3a64a24 100644
--- a/lib/tsan/go/build.bat
+++ b/lib/tsan/go/build.bat
@@ -1,4 +1,4 @@
 type tsan_go.cc ..\rtl\tsan_interface_atomic.cc ..\rtl\tsan_clock.cc ..\rtl\tsan_flags.cc ..\rtl\tsan_md5.cc ..\rtl\tsan_mutex.cc ..\rtl\tsan_report.cc ..\rtl\tsan_rtl.cc ..\rtl\tsan_rtl_mutex.cc ..\rtl\tsan_rtl_report.cc ..\rtl\tsan_rtl_thread.cc ..\rtl\tsan_rtl_proc.cc ..\rtl\tsan_stat.cc ..\rtl\tsan_suppressions.cc ..\rtl\tsan_sync.cc ..\rtl\tsan_stack_trace.cc ..\..\sanitizer_common\sanitizer_allocator.cc ..\..\sanitizer_common\sanitizer_common.cc ..\..\sanitizer_common\sanitizer_flags.cc ..\..\sanitizer_common\sanitizer_stacktrace.cc ..\..\sanitizer_common\sanitizer_libc.cc ..\..\sanitizer_common\sanitizer_printf.cc ..\..\sanitizer_common\sanitizer_suppressions.cc ..\..\sanitizer_common\sanitizer_thread_registry.cc ..\rtl\tsan_platform_windows.cc ..\..\sanitizer_common\sanitizer_win.cc ..\..\sanitizer_common\sanitizer_deadlock_detector1.cc ..\..\sanitizer_common\sanitizer_stackdepot.cc ..\..\sanitizer_common\sanitizer_persistent_allocator.cc ..\..\sanitizer_common\sanitizer_flag_parser.cc ..\..\sanitizer_common\sanitizer_symbolizer.cc ..\..\sanitizer_common\sanitizer_termination.cc > gotsan.cc
 
-gcc -c -o race_windows_amd64.syso gotsan.cc -I..\rtl -I..\.. -I..\..\sanitizer_common -I..\..\..\include -m64 -Wall -fno-exceptions -fno-rtti -DSANITIZER_GO -Wno-error=attributes -Wno-attributes -Wno-format -Wno-maybe-uninitialized -DSANITIZER_DEBUG=0 -O3 -fomit-frame-pointer -std=c++11
+gcc -c -o race_windows_amd64.syso gotsan.cc -I..\rtl -I..\.. -I..\..\sanitizer_common -I..\..\..\include -m64 -Wall -fno-exceptions -fno-rtti -DSANITIZER_GO=1 -Wno-error=attributes -Wno-attributes -Wno-format -Wno-maybe-uninitialized -DSANITIZER_DEBUG=0 -O3 -fomit-frame-pointer -std=c++11
 
diff --git a/lib/tsan/go/buildgo.sh b/lib/tsan/go/buildgo.sh
index 834e325..42d4790 100755
--- a/lib/tsan/go/buildgo.sh
+++ b/lib/tsan/go/buildgo.sh
@@ -113,7 +113,7 @@
 	cat $F >> $DIR/gotsan.cc
 done
 
-FLAGS=" -I../rtl -I../.. -I../../sanitizer_common -I../../../include -std=c++11 -m64 -Wall -fno-exceptions -fno-rtti -DSANITIZER_GO -DSANITIZER_DEADLOCK_DETECTOR_VERSION=2 $OSCFLAGS"
+FLAGS=" -I../rtl -I../.. -I../../sanitizer_common -I../../../include -std=c++11 -m64 -Wall -fno-exceptions -fno-rtti -DSANITIZER_GO=1 -DSANITIZER_DEADLOCK_DETECTOR_VERSION=2 $OSCFLAGS"
 if [ "$DEBUG" = "" ]; then
 	FLAGS="$FLAGS -DSANITIZER_DEBUG=0 -O3 -msse3 -fomit-frame-pointer"
 else
diff --git a/lib/tsan/go/tsan_go.cc b/lib/tsan/go/tsan_go.cc
index bc0d553..34625c8 100644
--- a/lib/tsan/go/tsan_go.cc
+++ b/lib/tsan/go/tsan_go.cc
@@ -271,6 +271,11 @@
   ThreadIgnoreSyncEnd(thr, 0);
 }
 
+void __tsan_report_count(u64 *pn) {
+  Lock lock(&ctx->report_mtx);
+  *pn = ctx->nreported;
+}
+
 }  // extern "C"
 }  // namespace __tsan
 
diff --git a/lib/tsan/rtl/tsan.syms.extra b/lib/tsan/rtl/tsan.syms.extra
index 1bc1d93..22dfde9 100644
--- a/lib/tsan/rtl/tsan.syms.extra
+++ b/lib/tsan/rtl/tsan.syms.extra
@@ -1,4 +1,5 @@
 __tsan_init
+__tsan_flush_memory
 __tsan_read*
 __tsan_write*
 __tsan_vptr*
diff --git a/lib/tsan/rtl/tsan_clock.cc b/lib/tsan/rtl/tsan_clock.cc
index 1e2050d..32435ad 100644
--- a/lib/tsan/rtl/tsan_clock.cc
+++ b/lib/tsan/rtl/tsan_clock.cc
@@ -82,7 +82,7 @@
 
 // We don't have ThreadState in these methods, so this is an ugly hack that
 // works only in C++.
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
 #else
 # define CPP_STAT_INC(typ) (void)0
diff --git a/lib/tsan/rtl/tsan_debugging.cc b/lib/tsan/rtl/tsan_debugging.cc
index ac24c89..d9fb686 100644
--- a/lib/tsan/rtl/tsan_debugging.cc
+++ b/lib/tsan/rtl/tsan_debugging.cc
@@ -15,6 +15,8 @@
 #include "tsan_report.h"
 #include "tsan_rtl.h"
 
+#include "sanitizer_common/sanitizer_stackdepot.h"
+
 using namespace __tsan;
 
 static const char *ReportTypeDescription(ReportType typ) {
@@ -160,3 +162,78 @@
   *tid = rep->unique_tids[idx];
   return 1;
 }
+
+SANITIZER_INTERFACE_ATTRIBUTE
+const char *__tsan_locate_address(uptr addr, char *name, uptr name_size,
+                                  uptr *region_address_ptr,
+                                  uptr *region_size_ptr) {
+  uptr region_address = 0;
+  uptr region_size = 0;
+  const char *region_kind = nullptr;
+  if (name && name_size > 0) name[0] = 0;
+
+  if (IsMetaMem(addr)) {
+    region_kind = "meta shadow";
+  } else if (IsShadowMem(addr)) {
+    region_kind = "shadow";
+  } else {
+    bool is_stack = false;
+    MBlock *b = 0;
+    Allocator *a = allocator();
+    if (a->PointerIsMine((void *)addr)) {
+      void *block_begin = a->GetBlockBegin((void *)addr);
+      if (block_begin) b = ctx->metamap.GetBlock((uptr)block_begin);
+    }
+
+    if (b != 0) {
+      region_address = (uptr)allocator()->GetBlockBegin((void *)addr);
+      region_size = b->siz;
+      region_kind = "heap";
+    } else {
+      // TODO(kuba.brecka): We should not lock. This is supposed to be called
+      // from within the debugger when other threads are stopped.
+      ctx->thread_registry->Lock();
+      ThreadContext *tctx = IsThreadStackOrTls(addr, &is_stack);
+      ctx->thread_registry->Unlock();
+      if (tctx) {
+        region_kind = is_stack ? "stack" : "tls";
+      } else {
+        region_kind = "global";
+        DataInfo info;
+        if (Symbolizer::GetOrInit()->SymbolizeData(addr, &info)) {
+          internal_strncpy(name, info.name, name_size);
+          region_address = info.start;
+          region_size = info.size;
+        }
+      }
+    }
+  }
+
+  CHECK(region_kind);
+  if (region_address_ptr) *region_address_ptr = region_address;
+  if (region_size_ptr) *region_size_ptr = region_size;
+  return region_kind;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id,
+                           uptr *os_id) {
+  MBlock *b = 0;
+  Allocator *a = allocator();
+  if (a->PointerIsMine((void *)addr)) {
+    void *block_begin = a->GetBlockBegin((void *)addr);
+    if (block_begin) b = ctx->metamap.GetBlock((uptr)block_begin);
+  }
+  if (b == 0) return 0;
+
+  *thread_id = b->tid;
+  // No locking.  This is supposed to be called from within the debugger when
+  // other threads are stopped.
+  ThreadContextBase *tctx = ctx->thread_registry->GetThreadLocked(b->tid);
+  *os_id = tctx->os_id;
+
+  StackTrace stack = StackDepotGet(b->stk);
+  size = Min(size, (uptr)stack.size);
+  for (uptr i = 0; i < size; i++) trace[i] = stack.trace[stack.size - i - 1];
+  return size;
+}
diff --git a/lib/tsan/rtl/tsan_defs.h b/lib/tsan/rtl/tsan_defs.h
index cdc23d0..55580a5 100644
--- a/lib/tsan/rtl/tsan_defs.h
+++ b/lib/tsan/rtl/tsan_defs.h
@@ -29,7 +29,7 @@
 #endif
 
 #ifndef TSAN_CONTAINS_UBSAN
-# if CAN_SANITIZE_UB && !defined(SANITIZER_GO)
+# if CAN_SANITIZE_UB && !SANITIZER_GO
 #  define TSAN_CONTAINS_UBSAN 1
 # else
 #  define TSAN_CONTAINS_UBSAN 0
@@ -38,19 +38,9 @@
 
 namespace __tsan {
 
-#ifdef SANITIZER_GO
-const bool kGoMode = true;
-const bool kCppMode = false;
-const char *const kTsanOptionsEnv = "GORACE";
-#else
-const bool kGoMode = false;
-const bool kCppMode = true;
-const char *const kTsanOptionsEnv = "TSAN_OPTIONS";
-#endif
-
 const int kTidBits = 13;
 const unsigned kMaxTid = 1 << kTidBits;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 const unsigned kMaxTidInClock = kMaxTid * 2;  // This includes msb 'freed' bit.
 #else
 const unsigned kMaxTidInClock = kMaxTid;  // Go does not track freed memory.
diff --git a/lib/tsan/rtl/tsan_flags.cc b/lib/tsan/rtl/tsan_flags.cc
index 93f5986..d8d4746 100644
--- a/lib/tsan/rtl/tsan_flags.cc
+++ b/lib/tsan/rtl/tsan_flags.cc
@@ -61,7 +61,7 @@
     CommonFlags cf;
     cf.CopyFrom(*common_flags());
     cf.allow_addr2line = true;
-    if (kGoMode) {
+    if (SANITIZER_GO) {
       // Does not work as expected for Go: runtime handles SIGABRT and crashes.
       cf.abort_on_error = false;
       // Go does not have mutexes.
diff --git a/lib/tsan/rtl/tsan_flags.inc b/lib/tsan/rtl/tsan_flags.inc
index 4fb4436..a48545c 100644
--- a/lib/tsan/rtl/tsan_flags.inc
+++ b/lib/tsan/rtl/tsan_flags.inc
@@ -61,8 +61,9 @@
           "Stops on start until __tsan_resume() is called (for debugging).")
 TSAN_FLAG(bool, running_on_valgrind, false,
           "Controls whether RunningOnValgrind() returns true or false.")
+// There are a lot of goroutines in Go, so we use smaller history.
 TSAN_FLAG(
-    int, history_size, kGoMode ? 1 : 3, // There are a lot of goroutines in Go.
+    int, history_size, SANITIZER_GO ? 1 : 3,
     "Per-thread history size, controls how many previous memory accesses "
     "are remembered per thread.  Possible values are [0..7]. "
     "history_size=0 amounts to 32K memory accesses.  Each next value doubles "
@@ -78,5 +79,8 @@
 TSAN_FLAG(const char *, suppressions, "", "Suppressions file name.")
 TSAN_FLAG(bool, ignore_interceptors_accesses, false,
           "Ignore reads and writes from all interceptors.")
+TSAN_FLAG(bool, ignore_noninstrumented_modules, false,
+          "Interceptors should only detect races when called from instrumented "
+          "modules.")
 TSAN_FLAG(bool, shared_ptr_interceptor, true,
           "Track atomic reference counting in libc++ shared_ptr and weak_ptr.")
diff --git a/lib/tsan/rtl/tsan_interceptors.cc b/lib/tsan/rtl/tsan_interceptors.cc
index a3a50e1..898f32d 100644
--- a/lib/tsan/rtl/tsan_interceptors.cc
+++ b/lib/tsan/rtl/tsan_interceptors.cc
@@ -231,6 +231,8 @@
     if (0 == internal_strcmp(s->type, kSuppressionLib))
       libignore()->AddIgnoredLibrary(s->templ);
   }
+  if (flags()->ignore_noninstrumented_modules)
+    libignore()->IgnoreNoninstrumentedModules(true);
   libignore()->OnLibraryLoaded(0);
 }
 
@@ -252,31 +254,20 @@
 
 ScopedInterceptor::ScopedInterceptor(ThreadState *thr, const char *fname,
                                      uptr pc)
-    : thr_(thr)
-    , pc_(pc)
-    , in_ignored_lib_(false) {
+    : thr_(thr), pc_(pc), in_ignored_lib_(false), ignoring_(false) {
   Initialize(thr);
-  if (!thr_->is_inited)
-    return;
-  if (!thr_->ignore_interceptors)
-    FuncEntry(thr, pc);
+  if (!thr_->is_inited) return;
+  if (!thr_->ignore_interceptors) FuncEntry(thr, pc);
   DPrintf("#%d: intercept %s()\n", thr_->tid, fname);
-  if (!thr_->in_ignored_lib && libignore()->IsIgnored(pc)) {
-    in_ignored_lib_ = true;
-    thr_->in_ignored_lib = true;
-    ThreadIgnoreBegin(thr_, pc_);
-  }
-  if (flags()->ignore_interceptors_accesses) ThreadIgnoreBegin(thr_, pc_);
+  ignoring_ =
+      !thr_->in_ignored_lib && (flags()->ignore_interceptors_accesses ||
+                                libignore()->IsIgnored(pc, &in_ignored_lib_));
+  EnableIgnores();
 }
 
 ScopedInterceptor::~ScopedInterceptor() {
-  if (!thr_->is_inited)
-    return;
-  if (flags()->ignore_interceptors_accesses) ThreadIgnoreEnd(thr_, pc_);
-  if (in_ignored_lib_) {
-    thr_->in_ignored_lib = false;
-    ThreadIgnoreEnd(thr_, pc_);
-  }
+  if (!thr_->is_inited) return;
+  DisableIgnores();
   if (!thr_->ignore_interceptors) {
     ProcessPendingSignals(thr_);
     FuncExit(thr_);
@@ -284,20 +275,24 @@
   }
 }
 
-void ScopedInterceptor::UserCallbackStart() {
-  if (flags()->ignore_interceptors_accesses) ThreadIgnoreEnd(thr_, pc_);
-  if (in_ignored_lib_) {
-    thr_->in_ignored_lib = false;
-    ThreadIgnoreEnd(thr_, pc_);
+void ScopedInterceptor::EnableIgnores() {
+  if (ignoring_) {
+    ThreadIgnoreBegin(thr_, pc_);
+    if (in_ignored_lib_) {
+      DCHECK(!thr_->in_ignored_lib);
+      thr_->in_ignored_lib = true;
+    }
   }
 }
 
-void ScopedInterceptor::UserCallbackEnd() {
-  if (in_ignored_lib_) {
-    thr_->in_ignored_lib = true;
-    ThreadIgnoreBegin(thr_, pc_);
+void ScopedInterceptor::DisableIgnores() {
+  if (ignoring_) {
+    ThreadIgnoreEnd(thr_, pc_);
+    if (in_ignored_lib_) {
+      DCHECK(thr_->in_ignored_lib);
+      thr_->in_ignored_lib = false;
+    }
   }
-  if (flags()->ignore_interceptors_accesses) ThreadIgnoreBegin(thr_, pc_);
 }
 
 #define TSAN_INTERCEPT(func) INTERCEPT_FUNCTION(func)
diff --git a/lib/tsan/rtl/tsan_interceptors.h b/lib/tsan/rtl/tsan_interceptors.h
index a0f9a07..72534f4 100644
--- a/lib/tsan/rtl/tsan_interceptors.h
+++ b/lib/tsan/rtl/tsan_interceptors.h
@@ -10,12 +10,13 @@
  public:
   ScopedInterceptor(ThreadState *thr, const char *fname, uptr pc);
   ~ScopedInterceptor();
-  void UserCallbackStart();
-  void UserCallbackEnd();
+  void DisableIgnores();
+  void EnableIgnores();
  private:
   ThreadState *const thr_;
   const uptr pc_;
   bool in_ignored_lib_;
+  bool ignoring_;
 };
 
 }  // namespace __tsan
@@ -39,10 +40,10 @@
 /**/
 
 #define SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START() \
-    si.UserCallbackStart();
+    si.DisableIgnores();
 
 #define SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END() \
-    si.UserCallbackEnd();
+    si.EnableIgnores();
 
 #define TSAN_INTERCEPTOR(ret, func, ...) INTERCEPTOR(ret, func, __VA_ARGS__)
 
diff --git a/lib/tsan/rtl/tsan_interface.cc b/lib/tsan/rtl/tsan_interface.cc
index 809d2ab..ad9b1fe 100644
--- a/lib/tsan/rtl/tsan_interface.cc
+++ b/lib/tsan/rtl/tsan_interface.cc
@@ -28,6 +28,10 @@
   Initialize(cur_thread());
 }
 
+void __tsan_flush_memory() {
+  FlushShadowMemory();
+}
+
 void __tsan_read16(void *addr) {
   MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog8);
   MemoryRead(cur_thread(), CALLERPC, (uptr)addr + 8, kSizeLog8);
diff --git a/lib/tsan/rtl/tsan_interface.h b/lib/tsan/rtl/tsan_interface.h
index f34b341..4e342a5 100644
--- a/lib/tsan/rtl/tsan_interface.h
+++ b/lib/tsan/rtl/tsan_interface.h
@@ -26,12 +26,14 @@
 extern "C" {
 #endif
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 
 // This function should be called at the very beginning of the process,
 // before any instrumented code is executed and before any call to malloc.
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_init();
 
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_flush_memory();
+
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read1(void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read2(void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read4(void *addr);
@@ -136,6 +138,17 @@
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_get_report_unique_tid(void *report, uptr idx, int *tid);
 
+// Returns the type of the pointer (heap, stack, global, ...) and if possible
+// also the starting address (e.g. of a heap allocation) and size.
+SANITIZER_INTERFACE_ATTRIBUTE
+const char *__tsan_locate_address(uptr addr, char *name, uptr name_size,
+                                  uptr *region_address, uptr *region_size);
+
+// Returns the allocation stack for a heap pointer.
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id,
+                           uptr *os_id);
+
 #endif  // SANITIZER_GO
 
 #ifdef __cplusplus
@@ -149,7 +162,7 @@
 typedef unsigned short     a16;  // NOLINT
 typedef unsigned int       a32;
 typedef unsigned long long a64;  // NOLINT
-#if !defined(SANITIZER_GO) && (defined(__SIZEOF_INT128__) \
+#if !SANITIZER_GO && (defined(__SIZEOF_INT128__) \
     || (__clang_major__ * 100 + __clang_minor__ >= 302)) && !defined(__mips64)
 __extension__ typedef __int128 a128;
 # define __TSAN_HAS_INT128 1
diff --git a/lib/tsan/rtl/tsan_interface_atomic.cc b/lib/tsan/rtl/tsan_interface_atomic.cc
index dc0873f..5238b66 100644
--- a/lib/tsan/rtl/tsan_interface_atomic.cc
+++ b/lib/tsan/rtl/tsan_interface_atomic.cc
@@ -28,7 +28,7 @@
 
 using namespace __tsan;  // NOLINT
 
-#if !defined(SANITIZER_GO) && __TSAN_HAS_INT128
+#if !SANITIZER_GO && __TSAN_HAS_INT128
 // Protects emulation of 128-bit atomic operations.
 static StaticSpinMutex mutex128;
 #endif
@@ -102,7 +102,7 @@
 // Atomic ops are executed under tsan internal mutex,
 // here we assume that the atomic variables are not accessed
 // from non-instrumented code.
-#if !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16) && !defined(SANITIZER_GO) \
+#if !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16) && !SANITIZER_GO \
     && __TSAN_HAS_INT128
 a128 func_xchg(volatile a128 *v, a128 op) {
   SpinMutexLock lock(&mutex128);
@@ -176,7 +176,7 @@
   // this leads to false negatives only in very obscure cases.
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 static atomic_uint8_t *to_atomic(const volatile a8 *a) {
   return reinterpret_cast<atomic_uint8_t *>(const_cast<a8 *>(a));
 }
@@ -212,7 +212,7 @@
   return atomic_load(to_atomic(a), to_mo(mo));
 }
 
-#if __TSAN_HAS_INT128 && !defined(SANITIZER_GO)
+#if __TSAN_HAS_INT128 && !SANITIZER_GO
 static a128 NoTsanAtomicLoad(const volatile a128 *a, morder mo) {
   SpinMutexLock lock(&mutex128);
   return *a;
@@ -242,7 +242,7 @@
   atomic_store(to_atomic(a), v, to_mo(mo));
 }
 
-#if __TSAN_HAS_INT128 && !defined(SANITIZER_GO)
+#if __TSAN_HAS_INT128 && !SANITIZER_GO
 static void NoTsanAtomicStore(volatile a128 *a, a128 v, morder mo) {
   SpinMutexLock lock(&mutex128);
   *a = v;
@@ -267,7 +267,7 @@
   thr->fast_state.IncrementEpoch();
   // Can't increment epoch w/o writing to the trace as well.
   TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
-  ReleaseImpl(thr, pc, &s->clock);
+  ReleaseStoreImpl(thr, pc, &s->clock);
   NoTsanAtomicStore(a, v, mo);
   s->mtx.Unlock();
 }
@@ -434,7 +434,7 @@
   return c;
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 static void NoTsanAtomicFence(morder mo) {
   __sync_synchronize();
 }
@@ -446,7 +446,7 @@
 #endif
 
 // Interface functions follow.
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 
 // C/C++
 
@@ -845,7 +845,7 @@
 }
 }  // extern "C"
 
-#else  // #ifndef SANITIZER_GO
+#else  // #if !SANITIZER_GO
 
 // Go
 
@@ -928,4 +928,4 @@
   *(bool*)(a+24) = (cur == cmp);
 }
 }  // extern "C"
-#endif  // #ifndef SANITIZER_GO
+#endif  // #if !SANITIZER_GO
diff --git a/lib/tsan/rtl/tsan_interface_java.cc b/lib/tsan/rtl/tsan_interface_java.cc
index 95be859..5bdc04f 100644
--- a/lib/tsan/rtl/tsan_interface_java.cc
+++ b/lib/tsan/rtl/tsan_interface_java.cc
@@ -150,6 +150,23 @@
   }
 }
 
+jptr __tsan_java_find(jptr *from_ptr, jptr to) {
+  SCOPED_JAVA_FUNC(__tsan_java_find);
+  DPrintf("#%d: java_find(&%p, %p)\n", *from_ptr, to);
+  CHECK_EQ((*from_ptr) % kHeapAlignment, 0);
+  CHECK_EQ(to % kHeapAlignment, 0);
+  CHECK_GE(*from_ptr, jctx->heap_begin);
+  CHECK_LE(to, jctx->heap_begin + jctx->heap_size);
+  for (uptr from = *from_ptr; from < to; from += kHeapAlignment) {
+    MBlock *b = ctx->metamap.GetBlock(from);
+    if (b) {
+      *from_ptr = from;
+      return b->siz;
+    }
+  }
+  return 0;
+}
+
 void __tsan_java_finalize() {
   SCOPED_JAVA_FUNC(__tsan_java_finalize);
   DPrintf("#%d: java_mutex_finalize()\n", thr->tid);
diff --git a/lib/tsan/rtl/tsan_interface_java.h b/lib/tsan/rtl/tsan_interface_java.h
index 30153a1..0bd49ac 100644
--- a/lib/tsan/rtl/tsan_interface_java.h
+++ b/lib/tsan/rtl/tsan_interface_java.h
@@ -57,6 +57,10 @@
 // It ensures necessary synchronization between
 // java object creation and finalization.
 void __tsan_java_finalize() INTERFACE_ATTRIBUTE;
+// Finds the first allocated memory block in the [*from_ptr, to) range, saves
+// its address in *from_ptr and returns its size. Returns 0 if there are no
+// allocated memory blocks in the range.
+jptr __tsan_java_find(jptr *from_ptr, jptr to) INTERFACE_ATTRIBUTE;
 
 // Mutex lock.
 // Addr is any unique address associated with the mutex.
diff --git a/lib/tsan/rtl/tsan_mman.cc b/lib/tsan/rtl/tsan_mman.cc
index f99ddb3..2dea249 100644
--- a/lib/tsan/rtl/tsan_mman.cc
+++ b/lib/tsan/rtl/tsan_mman.cc
@@ -54,7 +54,8 @@
     diff = p + size - RoundDown(p + size, kPageSize);
     if (diff != 0)
       size -= diff;
-    FlushUnneededShadowMemory((uptr)MemToMeta(p), size / kMetaRatio);
+    uptr p_meta = (uptr)MemToMeta(p);
+    ReleaseMemoryPagesToOS(p_meta, p_meta + size / kMetaRatio);
   }
 };
 
@@ -111,7 +112,9 @@
 }
 
 void InitializeAllocator() {
-  allocator()->Init(common_flags()->allocator_may_return_null);
+  allocator()->Init(
+      common_flags()->allocator_may_return_null,
+      common_flags()->allocator_release_to_os_interval_ms);
 }
 
 void InitializeAllocatorLate() {
@@ -148,7 +151,7 @@
 
 void *user_alloc(ThreadState *thr, uptr pc, uptr sz, uptr align, bool signal) {
   if ((sz >= (1ull << 40)) || (align >= (1ull << 40)))
-    return allocator()->ReturnNullOrDie();
+    return allocator()->ReturnNullOrDieOnBadRequest();
   void *p = allocator()->Allocate(&thr->proc()->alloc_cache, sz, align);
   if (p == 0)
     return 0;
@@ -161,7 +164,7 @@
 
 void *user_calloc(ThreadState *thr, uptr pc, uptr size, uptr n) {
   if (CallocShouldReturnNullDueToOverflow(size, n))
-    return allocator()->ReturnNullOrDie();
+    return allocator()->ReturnNullOrDieOnBadRequest();
   void *p = user_alloc(thr, pc, n * size);
   if (p)
     internal_memset(p, 0, n * size);
diff --git a/lib/tsan/rtl/tsan_mutexset.h b/lib/tsan/rtl/tsan_mutexset.h
index 68f0ec2..605c21a 100644
--- a/lib/tsan/rtl/tsan_mutexset.h
+++ b/lib/tsan/rtl/tsan_mutexset.h
@@ -43,7 +43,7 @@
   }
 
  private:
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   uptr size_;
   Desc descs_[kMaxSize];
 #endif
@@ -55,7 +55,7 @@
 // Go does not have mutexes, so do not spend memory and time.
 // (Go sync.Mutex is actually a semaphore -- can be unlocked
 // in different goroutine).
-#ifdef SANITIZER_GO
+#if SANITIZER_GO
 MutexSet::MutexSet() {}
 void MutexSet::Add(u64 id, bool write, u64 epoch) {}
 void MutexSet::Del(u64 id, bool write) {}
diff --git a/lib/tsan/rtl/tsan_platform.h b/lib/tsan/rtl/tsan_platform.h
index 213c6b5..1dd9d91 100644
--- a/lib/tsan/rtl/tsan_platform.h
+++ b/lib/tsan/rtl/tsan_platform.h
@@ -24,40 +24,46 @@
 
 namespace __tsan {
 
-#if !defined(SANITIZER_GO)
+#if !SANITIZER_GO
 
 #if defined(__x86_64__)
 /*
 C/C++ on linux/x86_64 and freebsd/x86_64
-0000 0000 1000 - 0100 0000 0000: main binary and/or MAP_32BIT mappings
-0100 0000 0000 - 0200 0000 0000: -
-0200 0000 0000 - 1000 0000 0000: shadow
-1000 0000 0000 - 3000 0000 0000: -
+0000 0000 1000 - 0080 0000 0000: main binary and/or MAP_32BIT mappings (512GB)
+0040 0000 0000 - 0100 0000 0000: -
+0100 0000 0000 - 2000 0000 0000: shadow
+2000 0000 0000 - 3000 0000 0000: -
 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
-4000 0000 0000 - 6000 0000 0000: -
+4000 0000 0000 - 5500 0000 0000: -
+5500 0000 0000 - 5680 0000 0000: pie binaries without ASLR or on 4.1+ kernels
+5680 0000 0000 - 6000 0000 0000: -
 6000 0000 0000 - 6200 0000 0000: traces
 6200 0000 0000 - 7d00 0000 0000: -
-7d00 0000 0000 - 7e00 0000 0000: heap
-7e00 0000 0000 - 7e80 0000 0000: -
+7b00 0000 0000 - 7c00 0000 0000: heap
+7c00 0000 0000 - 7e80 0000 0000: -
 7e80 0000 0000 - 8000 0000 0000: modules and main thread stack
 */
 struct Mapping {
   static const uptr kMetaShadowBeg = 0x300000000000ull;
-  static const uptr kMetaShadowEnd = 0x400000000000ull;
+  static const uptr kMetaShadowEnd = 0x340000000000ull;
   static const uptr kTraceMemBeg   = 0x600000000000ull;
   static const uptr kTraceMemEnd   = 0x620000000000ull;
-  static const uptr kShadowBeg     = 0x020000000000ull;
-  static const uptr kShadowEnd     = 0x100000000000ull;
-  static const uptr kHeapMemBeg    = 0x7d0000000000ull;
-  static const uptr kHeapMemEnd    = 0x7e0000000000ull;
+  static const uptr kShadowBeg     = 0x010000000000ull;
+  static const uptr kShadowEnd     = 0x200000000000ull;
+  static const uptr kHeapMemBeg    = 0x7b0000000000ull;
+  static const uptr kHeapMemEnd    = 0x7c0000000000ull;
   static const uptr kLoAppMemBeg   = 0x000000001000ull;
-  static const uptr kLoAppMemEnd   = 0x010000000000ull;
+  static const uptr kLoAppMemEnd   = 0x008000000000ull;
+  static const uptr kMidAppMemBeg  = 0x550000000000ull;
+  static const uptr kMidAppMemEnd  = 0x568000000000ull;
   static const uptr kHiAppMemBeg   = 0x7e8000000000ull;
   static const uptr kHiAppMemEnd   = 0x800000000000ull;
-  static const uptr kAppMemMsk     = 0x7c0000000000ull;
-  static const uptr kAppMemXor     = 0x020000000000ull;
+  static const uptr kAppMemMsk     = 0x780000000000ull;
+  static const uptr kAppMemXor     = 0x040000000000ull;
   static const uptr kVdsoBeg       = 0xf000000000000000ull;
 };
+
+#define TSAN_MID_APP_RANGE 1
 #elif defined(__mips64)
 /*
 C/C++ on linux/mips64
@@ -74,22 +80,26 @@
 ff80 0000 00 - ffff ffff ff: modules and main thread stack
 */
 struct Mapping {
-  static const uptr kMetaShadowBeg = 0x3000000000ull;
-  static const uptr kMetaShadowEnd = 0x4000000000ull;
-  static const uptr kTraceMemBeg   = 0x6000000000ull;
-  static const uptr kTraceMemEnd   = 0x6200000000ull;
-  static const uptr kShadowBeg     = 0x1400000000ull;
-  static const uptr kShadowEnd     = 0x2400000000ull;
+  static const uptr kMetaShadowBeg = 0x4000000000ull;
+  static const uptr kMetaShadowEnd = 0x5000000000ull;
+  static const uptr kTraceMemBeg   = 0xb000000000ull;
+  static const uptr kTraceMemEnd   = 0xb200000000ull;
+  static const uptr kShadowBeg     = 0x2400000000ull;
+  static const uptr kShadowEnd     = 0x4000000000ull;
   static const uptr kHeapMemBeg    = 0xfe00000000ull;
   static const uptr kHeapMemEnd    = 0xff00000000ull;
   static const uptr kLoAppMemBeg   = 0x0100000000ull;
   static const uptr kLoAppMemEnd   = 0x0200000000ull;
+  static const uptr kMidAppMemBeg  = 0xaa00000000ull;
+  static const uptr kMidAppMemEnd  = 0xab00000000ull;
   static const uptr kHiAppMemBeg   = 0xff80000000ull;
   static const uptr kHiAppMemEnd   = 0xffffffffffull;
-  static const uptr kAppMemMsk     = 0xfc00000000ull;
-  static const uptr kAppMemXor     = 0x0400000000ull;
+  static const uptr kAppMemMsk     = 0xf800000000ull;
+  static const uptr kAppMemXor     = 0x0800000000ull;
   static const uptr kVdsoBeg       = 0xfffff00000ull;
 };
+
+#define TSAN_MID_APP_RANGE 1
 #elif defined(__aarch64__)
 // AArch64 supports multiple VMA which leads to multiple address transformation
 // functions.  To support these multiple VMAS transformations and mappings TSAN
@@ -121,7 +131,6 @@
   static const uptr kMetaShadowEnd = 0x3400000000ull;
   static const uptr kMidAppMemBeg  = 0x5500000000ull;
   static const uptr kMidAppMemEnd  = 0x5600000000ull;
-  static const uptr kMidShadowOff  = 0x5000000000ull;
   static const uptr kTraceMemBeg   = 0x6000000000ull;
   static const uptr kTraceMemEnd   = 0x6200000000ull;
   static const uptr kHeapMemBeg    = 0x7c00000000ull;
@@ -157,7 +166,6 @@
   static const uptr kMetaShadowEnd = 0x28000000000ull;
   static const uptr kMidAppMemBeg  = 0x2aa00000000ull;
   static const uptr kMidAppMemEnd  = 0x2ab00000000ull;
-  static const uptr kMidShadowOff  = 0x28000000000ull;
   static const uptr kTraceMemBeg   = 0x36200000000ull;
   static const uptr kTraceMemEnd   = 0x36400000000ull;
   static const uptr kHeapMemBeg    = 0x3e000000000ull;
@@ -178,7 +186,6 @@
   static const uptr kMetaShadowEnd = 0x0006000000000ull;
   static const uptr kMidAppMemBeg  = 0x0aaaa00000000ull;
   static const uptr kMidAppMemEnd  = 0x0aaaf00000000ull;
-  static const uptr kMidShadowOff  = 0x0aaa800000000ull;
   static const uptr kTraceMemBeg   = 0x0f06000000000ull;
   static const uptr kTraceMemEnd   = 0x0f06200000000ull;
   static const uptr kHeapMemBeg    = 0x0ffff00000000ull;
@@ -269,7 +276,7 @@
 #define TSAN_RUNTIME_VMA 1
 #endif
 
-#elif defined(SANITIZER_GO) && !SANITIZER_WINDOWS
+#elif SANITIZER_GO && !SANITIZER_WINDOWS
 
 /* Go on linux, darwin and freebsd
 0000 0000 1000 - 0000 1000 0000: executable
@@ -295,7 +302,7 @@
   static const uptr kAppMemEnd     = 0x00e000000000ull;
 };
 
-#elif defined(SANITIZER_GO) && SANITIZER_WINDOWS
+#elif SANITIZER_GO && SANITIZER_WINDOWS
 
 /* Go on windows
 0000 0000 1000 - 0000 1000 0000: executable
@@ -355,7 +362,7 @@
 template<typename Mapping, int Type>
 uptr MappingImpl(void) {
   switch (Type) {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
     case MAPPING_LO_APP_BEG: return Mapping::kLoAppMemBeg;
     case MAPPING_LO_APP_END: return Mapping::kLoAppMemEnd;
 # ifdef TSAN_MID_APP_RANGE
@@ -401,7 +408,7 @@
 #endif
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 ALWAYS_INLINE
 uptr LoAppMemBeg(void) {
   return MappingArchImpl<MAPPING_LO_APP_BEG>();
@@ -463,7 +470,7 @@
   switch (i) {
   default:
     return false;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   case 0:
     *start = LoAppMemBeg();
     *end = LoAppMemEnd();
@@ -521,7 +528,7 @@
 
 template<typename Mapping>
 bool IsAppMemImpl(uptr mem) {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   return (mem >= Mapping::kHeapMemBeg && mem < Mapping::kHeapMemEnd) ||
 # ifdef TSAN_MID_APP_RANGE
          (mem >= Mapping::kMidAppMemBeg && mem < Mapping::kMidAppMemEnd) ||
@@ -612,7 +619,7 @@
 template<typename Mapping>
 uptr MemToShadowImpl(uptr x) {
   DCHECK(IsAppMem(x));
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   return (((x) & ~(Mapping::kAppMemMsk | (kShadowCell - 1)))
       ^ Mapping::kAppMemXor) * kShadowCnt;
 #else
@@ -649,13 +656,17 @@
 template<typename Mapping>
 u32 *MemToMetaImpl(uptr x) {
   DCHECK(IsAppMem(x));
-#ifndef SANITIZER_GO
-  return (u32*)(((((x) & ~(Mapping::kAppMemMsk | (kMetaShadowCell - 1)))
-        ^ Mapping::kAppMemXor) / kMetaShadowCell * kMetaShadowSize)
-          | Mapping::kMetaShadowBeg);
+#if !SANITIZER_GO
+  return (u32*)(((((x) & ~(Mapping::kAppMemMsk | (kMetaShadowCell - 1)))) /
+      kMetaShadowCell * kMetaShadowSize) | Mapping::kMetaShadowBeg);
 #else
+# ifndef SANITIZER_WINDOWS
   return (u32*)(((x & ~(kMetaShadowCell - 1)) / \
       kMetaShadowCell * kMetaShadowSize) | Mapping::kMetaShadowBeg);
+# else
+  return (u32*)(((x & ~(kMetaShadowCell - 1)) / \
+      kMetaShadowCell * kMetaShadowSize) + Mapping::kMetaShadowBeg);
+# endif
 #endif
 }
 
@@ -684,18 +695,25 @@
 template<typename Mapping>
 uptr ShadowToMemImpl(uptr s) {
   DCHECK(IsShadowMem(s));
-#ifndef SANITIZER_GO
-  if (s >= MemToShadow(Mapping::kLoAppMemBeg)
-      && s <= MemToShadow(Mapping::kLoAppMemEnd - 1))
-    return (s / kShadowCnt) ^ Mapping::kAppMemXor;
+#if !SANITIZER_GO
+  // The shadow mapping is non-linear and we've lost some bits, so we don't have
+  // an easy way to restore the original app address. But the mapping is a
+  // bijection, so we try to restore the address as belonging to low/mid/high
+  // range consecutively and see if shadow->app->shadow mapping gives us the
+  // same address.
+  uptr p = (s / kShadowCnt) ^ Mapping::kAppMemXor;
+  if (p >= Mapping::kLoAppMemBeg && p < Mapping::kLoAppMemEnd &&
+      MemToShadow(p) == s)
+    return p;
 # ifdef TSAN_MID_APP_RANGE
-  if (s >= MemToShadow(Mapping::kMidAppMemBeg)
-      && s <= MemToShadow(Mapping::kMidAppMemEnd - 1))
-    return ((s / kShadowCnt) ^ Mapping::kAppMemXor) + Mapping::kMidShadowOff;
+  p = ((s / kShadowCnt) ^ Mapping::kAppMemXor) +
+      (Mapping::kMidAppMemBeg & Mapping::kAppMemMsk);
+  if (p >= Mapping::kMidAppMemBeg && p < Mapping::kMidAppMemEnd &&
+      MemToShadow(p) == s)
+    return p;
 # endif
-  else
-    return ((s / kShadowCnt) ^ Mapping::kAppMemXor) | Mapping::kAppMemMsk;
-#else
+  return ((s / kShadowCnt) ^ Mapping::kAppMemXor) | Mapping::kAppMemMsk;
+#else  // #if !SANITIZER_GO
 # ifndef SANITIZER_WINDOWS
   return (s & ~Mapping::kShadowBeg) / kShadowCnt;
 # else
diff --git a/lib/tsan/rtl/tsan_platform_linux.cc b/lib/tsan/rtl/tsan_platform_linux.cc
index cd80e17..3313288 100644
--- a/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/lib/tsan/rtl/tsan_platform_linux.cc
@@ -98,7 +98,7 @@
     mem[MemShadow] += rss;
   else if (p >= MetaShadowBeg() && p < MetaShadowEnd())
     mem[MemMeta] += rss;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   else if (p >= HeapMemBeg() && p < HeapMemEnd())
     mem[MemHeap] += rss;
   else if (p >= LoAppMemBeg() && p < LoAppMemEnd())
@@ -134,7 +134,7 @@
 void FlushShadowMemoryCallback(
     const SuspendedThreadsList &suspended_threads_list,
     void *argument) {
-  FlushUnneededShadowMemory(ShadowBeg(), ShadowEnd() - ShadowBeg());
+  ReleaseMemoryPagesToOS(ShadowBeg(), ShadowEnd());
 }
 #endif
 
@@ -144,7 +144,7 @@
 #endif
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 // Mark shadow for .rodata sections with the special kShadowRodata marker.
 // Accesses to .rodata can't race, so this saves time, memory and trace space.
 static void MapRodata() {
@@ -206,7 +206,7 @@
   MapRodata();
 }
 
-#endif  // #ifndef SANITIZER_GO
+#endif  // #if !SANITIZER_GO
 
 void InitializePlatformEarly() {
 #ifdef TSAN_RUNTIME_VMA
@@ -234,7 +234,7 @@
   // Go maps shadow memory lazily and works fine with limited address space.
   // Unlimited stack is not a problem as well, because the executable
   // is not compiled with -pie.
-  if (kCppMode) {
+  if (!SANITIZER_GO) {
     bool reexec = false;
     // TSan doesn't play well with unlimited stack size (as stack
     // overlaps with shadow memory). If we detect unlimited stack size,
@@ -276,13 +276,13 @@
       ReExec();
   }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   CheckAndProtect();
   InitTlsSize();
 #endif
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 // Extract file descriptors passed to glibc internal __res_iclose function.
 // This is required to properly "close" the fds, because we do not see internal
 // closes within glibc. The code is a pure hack.
@@ -335,11 +335,11 @@
 }
 #endif
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 void ReplaceSystemMalloc() { }
 #endif
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 #if SANITIZER_ANDROID
 
 #if defined(__aarch64__)
@@ -400,7 +400,7 @@
   CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, &oldset, nullptr));
 }
 #endif  // SANITIZER_ANDROID
-#endif  // ifndef SANITIZER_GO
+#endif  // if !SANITIZER_GO
 
 }  // namespace __tsan
 
diff --git a/lib/tsan/rtl/tsan_platform_mac.cc b/lib/tsan/rtl/tsan_platform_mac.cc
index 3107cf6..25dd241 100644
--- a/lib/tsan/rtl/tsan_platform_mac.cc
+++ b/lib/tsan/rtl/tsan_platform_mac.cc
@@ -44,7 +44,7 @@
 
 namespace __tsan {
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 static void *SignalSafeGetOrAllocate(uptr *dst, uptr size) {
   atomic_uintptr_t *a = (atomic_uintptr_t *)dst;
   void *val = (void *)atomic_load_relaxed(a);
@@ -178,7 +178,7 @@
     nthread, nlive);
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 void InitializeShadowMemoryPlatform() { }
 
 // On OS X, GCD worker threads are created without a call to pthread_create. We
@@ -228,7 +228,7 @@
 
 void InitializePlatform() {
   DisableCoreDumperIfNecessary();
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   CheckAndProtect();
 
   CHECK_EQ(main_thread_identity, 0);
@@ -239,7 +239,7 @@
 #endif
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 // Note: this function runs with async signals enabled,
 // so it must not touch any tsan state.
 int call_pthread_cancel_with_cleanup(int(*fn)(void *c, void *m,
diff --git a/lib/tsan/rtl/tsan_platform_posix.cc b/lib/tsan/rtl/tsan_platform_posix.cc
index 805ce1b..0732c83 100644
--- a/lib/tsan/rtl/tsan_platform_posix.cc
+++ b/lib/tsan/rtl/tsan_platform_posix.cc
@@ -23,7 +23,7 @@
 
 namespace __tsan {
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 void InitializeShadowMemory() {
   // Map memory shadow.
   uptr shadow =
diff --git a/lib/tsan/rtl/tsan_report.cc b/lib/tsan/rtl/tsan_report.cc
index 91f157d..07fd412 100644
--- a/lib/tsan/rtl/tsan_report.cc
+++ b/lib/tsan/rtl/tsan_report.cc
@@ -71,7 +71,7 @@
   // FIXME(dvyukov): it must be leaking a lot of memory.
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 
 const int kThreadBufSize = 32;
 const char *thread_name(char *buf, int tid) {
@@ -363,7 +363,7 @@
   Printf("==================\n");
 }
 
-#else  // #ifndef SANITIZER_GO
+#else  // #if !SANITIZER_GO
 
 const int kMainThreadId = 1;
 
diff --git a/lib/tsan/rtl/tsan_rtl.cc b/lib/tsan/rtl/tsan_rtl.cc
index 6531296..bfb8358 100644
--- a/lib/tsan/rtl/tsan_rtl.cc
+++ b/lib/tsan/rtl/tsan_rtl.cc
@@ -44,7 +44,8 @@
 
 namespace __tsan {
 
-#if !defined(SANITIZER_GO) && !SANITIZER_MAC
+#if !SANITIZER_GO && !SANITIZER_MAC
+__attribute__((tls_model("initial-exec")))
 THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED(64);
 #endif
 static char ctx_placeholder[sizeof(Context)] ALIGNED(64);
@@ -86,7 +87,7 @@
   return new(mem) ThreadContext(tid);
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 static const u32 kThreadQuarantineSize = 16;
 #else
 static const u32 kThreadQuarantineSize = 64;
@@ -117,7 +118,7 @@
   // , ignore_reads_and_writes()
   // , ignore_interceptors()
   , clock(tid, reuse_count)
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   , jmp_bufs(MBlockJmpBuf)
 #endif
   , tid(tid)
@@ -126,13 +127,13 @@
   , stk_size(stk_size)
   , tls_addr(tls_addr)
   , tls_size(tls_size)
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   , last_sleep_clock(tid)
 #endif
 {
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 static void MemoryProfiler(Context *ctx, fd_t fd, int i) {
   uptr n_threads;
   uptr n_running_threads;
@@ -233,16 +234,17 @@
 #endif
 
 void DontNeedShadowFor(uptr addr, uptr size) {
-  uptr shadow_beg = MemToShadow(addr);
-  uptr shadow_end = MemToShadow(addr + size);
-  FlushUnneededShadowMemory(shadow_beg, shadow_end - shadow_beg);
+  ReleaseMemoryPagesToOS(MemToShadow(addr), MemToShadow(addr + size));
 }
 
 void MapShadow(uptr addr, uptr size) {
   // Global data is not 64K aligned, but there are no adjacent mappings,
   // so we can get away with unaligned mapping.
   // CHECK_EQ(addr, addr & ~((64 << 10) - 1));  // windows wants 64K alignment
-  MmapFixedNoReserve(MemToShadow(addr), size * kShadowMultiplier, "shadow");
+  const uptr kPageSize = GetPageSizeCached();
+  uptr shadow_begin = RoundDownTo((uptr)MemToShadow(addr), kPageSize);
+  uptr shadow_end = RoundUpTo((uptr)MemToShadow(addr + size), kPageSize);
+  MmapFixedNoReserve(shadow_begin, shadow_end - shadow_begin, "shadow");
 
   // Meta shadow is 2:1, so tread carefully.
   static bool data_mapped = false;
@@ -289,12 +291,13 @@
   for (int i = 0; GetUserRegion(i, &beg, &end); i++) {
     // Skip cases for empty regions (heap definition for architectures that
     // do not use 64-bit allocator).
-    if (beg ==end)
+    if (beg == end)
       continue;
     VPrintf(3, "checking shadow region %p-%p\n", beg, end);
+    uptr prev = 0;
     for (uptr p0 = beg; p0 <= end; p0 += (end - beg) / 4) {
-      for (int x = -1; x <= 1; x++) {
-        const uptr p = p0 + x;
+      for (int x = -(int)kShadowCell; x <= (int)kShadowCell; x += kShadowCell) {
+        const uptr p = RoundDown(p0 + x, kShadowCell);
         if (p < beg || p >= end)
           continue;
         const uptr s = MemToShadow(p);
@@ -302,8 +305,18 @@
         VPrintf(3, "  checking pointer %p: shadow=%p meta=%p\n", p, s, m);
         CHECK(IsAppMem(p));
         CHECK(IsShadowMem(s));
-        CHECK_EQ(p & ~(kShadowCell - 1), ShadowToMem(s));
+        CHECK_EQ(p, ShadowToMem(s));
         CHECK(IsMetaMem(m));
+        if (prev) {
+          // Ensure that shadow and meta mappings are linear within a single
+          // user range. Lots of code that processes memory ranges assumes it.
+          const uptr prev_s = MemToShadow(prev);
+          const uptr prev_m = (uptr)MemToMeta(prev);
+          CHECK_EQ(s - prev_s, (p - prev) * kShadowMultiplier);
+          CHECK_EQ((m - prev_m) / kMetaShadowSize,
+                   (p - prev) / kMetaShadowCell);
+        }
+        prev = p;
       }
     }
   }
@@ -322,12 +335,12 @@
   SetCheckFailedCallback(TsanCheckFailed);
 
   ctx = new(ctx_placeholder) Context;
-  const char *options = GetEnv(kTsanOptionsEnv);
+  const char *options = GetEnv(SANITIZER_GO ? "GORACE" : "TSAN_OPTIONS");
   CacheBinaryName();
   InitializeFlags(&ctx->flags, options);
   AvoidCVE_2016_2143();
   InitializePlatformEarly();
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   // Re-exec ourselves if we need to set additional env or command line args.
   MaybeReexec();
 
@@ -343,14 +356,14 @@
   InitializePlatform();
   InitializeMutex();
   InitializeDynamicAnnotations();
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   InitializeShadowMemory();
   InitializeAllocatorLate();
 #endif
   // Setup correct file descriptor for error reports.
   __sanitizer_set_report_path(common_flags()->log_path);
   InitializeSuppressions();
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   InitializeLibIgnore();
   Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer, ExitSymbolizer);
   // On MIPS, TSan initialization is run before
@@ -374,7 +387,7 @@
 #endif
   ctx->initialized = true;
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   Symbolizer::LateInitialize();
 #endif
 
@@ -402,7 +415,7 @@
   CommonSanitizerReportMutex.Unlock();
   ctx->report_mtx.Unlock();
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   if (Verbosity()) AllocatorPrintStats();
 #endif
 
@@ -410,7 +423,7 @@
 
   if (ctx->nreported) {
     failed = true;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
     Printf("ThreadSanitizer: reported %d warnings\n", ctx->nreported);
 #else
     Printf("Found %d data race(s)\n", ctx->nreported);
@@ -425,7 +438,7 @@
 
   if (common_flags()->print_suppressions)
     PrintMatchedSuppressions();
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   if (flags()->print_benign)
     PrintMatchedBenignRaces();
 #endif
@@ -440,7 +453,7 @@
   return failed ? common_flags()->exitcode : 0;
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 void ForkBefore(ThreadState *thr, uptr pc) {
   ctx->thread_registry->Lock();
   ctx->report_mtx.Lock();
@@ -473,7 +486,7 @@
 }
 #endif
 
-#ifdef SANITIZER_GO
+#if SANITIZER_GO
 NOINLINE
 void GrowShadowStack(ThreadState *thr) {
   const int sz = thr->shadow_stack_end - thr->shadow_stack;
@@ -492,7 +505,7 @@
   if (!thr->is_inited)  // May happen during bootstrap.
     return 0;
   if (pc != 0) {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
     DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
 #else
     if (thr->shadow_stack_pos == thr->shadow_stack_end)
@@ -538,7 +551,7 @@
   return TraceSize() / kTracePartSize;
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 extern "C" void __tsan_trace_switch() {
   TraceSwitch(cur_thread());
 }
@@ -571,7 +584,7 @@
   thr->racy_state[0] = cur.raw();
   thr->racy_state[1] = old.raw();
   thr->racy_shadow_addr = shadow_mem;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   HACKY_CALL(__tsan_report_race);
 #else
   ReportRace(thr);
@@ -768,7 +781,7 @@
   }
 #endif
 
-  if (kCppMode && *shadow_mem == kShadowRodata) {
+  if (!SANITIZER_GO && *shadow_mem == kShadowRodata) {
     // Access to .rodata section, no races here.
     // Measurements show that it can be 10-20% of all memory accesses.
     StatInc(thr, StatMop);
@@ -855,7 +868,7 @@
   size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1);
   // UnmapOrDie/MmapFixedNoReserve does not work on Windows,
   // so we do it only for C/C++.
-  if (kGoMode || size < common_flags()->clear_shadow_mmap_threshold) {
+  if (SANITIZER_GO || size < common_flags()->clear_shadow_mmap_threshold) {
     u64 *p = (u64*)MemToShadow(addr);
     CHECK(IsShadowMem((uptr)p));
     CHECK(IsShadowMem((uptr)(p + size * kShadowCnt / kShadowCell - 1)));
@@ -941,7 +954,7 @@
   // Shadow stack maintenance can be replaced with
   // stack unwinding during trace switch (which presumably must be faster).
   DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack);
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
 #else
   if (thr->shadow_stack_pos == thr->shadow_stack_end)
@@ -961,7 +974,7 @@
   }
 
   DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack);
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
 #endif
   thr->shadow_stack_pos--;
@@ -972,7 +985,7 @@
   thr->ignore_reads_and_writes++;
   CHECK_GT(thr->ignore_reads_and_writes, 0);
   thr->fast_state.SetIgnoreBit();
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   if (!ctx->after_multithreaded_fork)
     thr->mop_ignore_set.Add(CurrentStackId(thr, pc));
 #endif
@@ -984,7 +997,7 @@
   CHECK_GE(thr->ignore_reads_and_writes, 0);
   if (thr->ignore_reads_and_writes == 0) {
     thr->fast_state.ClearIgnoreBit();
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
     thr->mop_ignore_set.Reset();
 #endif
   }
@@ -1002,7 +1015,7 @@
   DPrintf("#%d: ThreadIgnoreSyncBegin\n", thr->tid);
   thr->ignore_sync++;
   CHECK_GT(thr->ignore_sync, 0);
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   if (!ctx->after_multithreaded_fork)
     thr->sync_ignore_set.Add(CurrentStackId(thr, pc));
 #endif
@@ -1012,7 +1025,7 @@
   DPrintf("#%d: ThreadIgnoreSyncEnd\n", thr->tid);
   thr->ignore_sync--;
   CHECK_GE(thr->ignore_sync, 0);
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   if (thr->ignore_sync == 0)
     thr->sync_ignore_set.Reset();
 #endif
@@ -1036,7 +1049,7 @@
 
 }  // namespace __tsan
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 // Must be included in this file to make sure everything is inlined.
 #include "tsan_interface_inl.h"
 #endif
diff --git a/lib/tsan/rtl/tsan_rtl.h b/lib/tsan/rtl/tsan_rtl.h
index ff69015..7fcb9d4 100644
--- a/lib/tsan/rtl/tsan_rtl.h
+++ b/lib/tsan/rtl/tsan_rtl.h
@@ -52,7 +52,7 @@
 
 namespace __tsan {
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 struct MapUnmapCallback;
 #if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__)
 static const uptr kAllocatorSpace = 0;
@@ -66,9 +66,15 @@
     CompactSizeClassMap, kAllocatorRegionSizeLog, ByteMap,
     MapUnmapCallback> PrimaryAllocator;
 #else
-typedef SizeClassAllocator64<Mapping::kHeapMemBeg,
-    Mapping::kHeapMemEnd - Mapping::kHeapMemBeg, 0,
-    DefaultSizeClassMap, MapUnmapCallback> PrimaryAllocator;
+struct AP64 {  // Allocator64 parameters. Deliberately using a short name.
+  static const uptr kSpaceBeg = Mapping::kHeapMemBeg;
+  static const uptr kSpaceSize = Mapping::kHeapMemEnd - Mapping::kHeapMemBeg;
+  static const uptr kMetadataSize = 0;
+  typedef DefaultSizeClassMap SizeClassMap;
+  typedef __tsan::MapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+typedef SizeClassAllocator64<AP64> PrimaryAllocator;
 #endif
 typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
 typedef LargeMmapAllocator<MapUnmapCallback> SecondaryAllocator;
@@ -335,7 +341,7 @@
 // A ThreadState must be wired with a Processor to handle events.
 struct Processor {
   ThreadState *thr; // currently wired thread, or nullptr
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   AllocatorCache alloc_cache;
   InternalAllocatorCache internal_alloc_cache;
 #endif
@@ -345,7 +351,7 @@
   DDPhysicalThread *dd_pt;
 };
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 // ScopedGlobalProcessor temporary setups a global processor for the current
 // thread, if it does not have one. Intended for interceptors that can run
 // at the very thread end, when we already destroyed the thread processor.
@@ -376,7 +382,7 @@
   int ignore_reads_and_writes;
   int ignore_sync;
   // Go does not support ignores.
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   IgnoreSet mop_ignore_set;
   IgnoreSet sync_ignore_set;
 #endif
@@ -389,7 +395,7 @@
   u64 racy_state[2];
   MutexSet mset;
   ThreadClock clock;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   Vector<JmpBuf> jmp_bufs;
   int ignore_interceptors;
 #endif
@@ -417,7 +423,7 @@
 
   // Current wired Processor, or nullptr. Required to handle any events.
   Processor *proc1;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   Processor *proc() { return proc1; }
 #else
   Processor *proc();
@@ -426,7 +432,7 @@
   atomic_uintptr_t in_signal_handler;
   ThreadSignalContext *signal_ctx;
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   u32 last_sleep_stack_id;
   ThreadClock last_sleep_clock;
 #endif
@@ -443,7 +449,7 @@
                        uptr tls_addr, uptr tls_size);
 };
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 #if SANITIZER_MAC || SANITIZER_ANDROID
 ThreadState *cur_thread();
 void cur_thread_finalize();
@@ -541,13 +547,13 @@
 
 struct ScopedIgnoreInterceptors {
   ScopedIgnoreInterceptors() {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
     cur_thread()->ignore_interceptors++;
 #endif
   }
 
   ~ScopedIgnoreInterceptors() {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
     cur_thread()->ignore_interceptors--;
 #endif
   }
@@ -584,6 +590,7 @@
   void operator = (const ScopedReport&);
 };
 
+ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack);
 void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
                   MutexSet *mset);
 
@@ -787,7 +794,7 @@
   StatInc(thr, StatEvents);
   u64 pos = fs.GetTracePos();
   if (UNLIKELY((pos % kTracePartSize) == 0)) {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
     HACKY_CALL(__tsan_trace_switch);
 #else
     TraceSwitch(thr);
@@ -799,7 +806,7 @@
   *evp = ev;
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 uptr ALWAYS_INLINE HeapEnd() {
   return HeapMemEnd() + PrimaryAllocator::AdditionalSize();
 }
diff --git a/lib/tsan/rtl/tsan_rtl_mutex.cc b/lib/tsan/rtl/tsan_rtl_mutex.cc
index 1806acf..f3b51c3 100644
--- a/lib/tsan/rtl/tsan_rtl_mutex.cc
+++ b/lib/tsan/rtl/tsan_rtl_mutex.cc
@@ -50,7 +50,7 @@
     uptr addr, u64 mid) {
   // In Go, these misuses are either impossible, or detected by std lib,
   // or false positives (e.g. unlock in a different thread).
-  if (kGoMode)
+  if (SANITIZER_GO)
     return;
   ThreadRegistryLock l(ctx->thread_registry);
   ScopedReport rep(typ);
@@ -76,7 +76,7 @@
   s->is_rw = rw;
   s->is_recursive = recursive;
   s->is_linker_init = linker_init;
-  if (kCppMode && s->creation_stack_id == 0)
+  if (!SANITIZER_GO && s->creation_stack_id == 0)
     s->creation_stack_id = CurrentStackId(thr, pc);
   s->mtx.Unlock();
 }
@@ -195,7 +195,7 @@
   TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
   int rec = 0;
   bool report_bad_unlock = false;
-  if (kCppMode && (s->recursion == 0 || s->owner_tid != thr->tid)) {
+  if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) {
     if (flags()->report_mutex_bugs && !s->is_broken) {
       s->is_broken = true;
       report_bad_unlock = true;
@@ -412,7 +412,7 @@
   s->mtx.Unlock();
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 static void UpdateSleepClockCallback(ThreadContextBase *tctx_base, void *arg) {
   ThreadState *thr = reinterpret_cast<ThreadState*>(arg);
   ThreadContext *tctx = static_cast<ThreadContext*>(tctx_base);
diff --git a/lib/tsan/rtl/tsan_rtl_proc.cc b/lib/tsan/rtl/tsan_rtl_proc.cc
index 0c838a1..efccdb5 100644
--- a/lib/tsan/rtl/tsan_rtl_proc.cc
+++ b/lib/tsan/rtl/tsan_rtl_proc.cc
@@ -23,7 +23,7 @@
   internal_memset(mem, 0, sizeof(Processor));
   Processor *proc = new(mem) Processor;
   proc->thr = nullptr;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   AllocatorProcStart(proc);
 #endif
   if (common_flags()->detect_deadlocks)
@@ -33,7 +33,7 @@
 
 void ProcDestroy(Processor *proc) {
   CHECK_EQ(proc->thr, nullptr);
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   AllocatorProcFinish(proc);
 #endif
   ctx->clock_alloc.FlushCache(&proc->clock_cache);
diff --git a/lib/tsan/rtl/tsan_rtl_report.cc b/lib/tsan/rtl/tsan_rtl_report.cc
index 810119b..bc8944f 100644
--- a/lib/tsan/rtl/tsan_rtl_report.cc
+++ b/lib/tsan/rtl/tsan_rtl_report.cc
@@ -38,6 +38,10 @@
   // on the other hand there is no sense in processing interceptors
   // since we are going to die soon.
   ScopedIgnoreInterceptors ignore;
+#if !SANITIZER_GO
+  cur_thread()->ignore_sync++;
+  cur_thread()->ignore_reads_and_writes++;
+#endif
   Printf("FATAL: ThreadSanitizer CHECK failed: "
          "%s:%d \"%s\" (0x%zx, 0x%zx)\n",
          file, line, cond, (uptr)v1, (uptr)v2);
@@ -71,7 +75,7 @@
 
   if (last_frame2 == 0)
     return;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   const char *last = last_frame->info.function;
   const char *last2 = last_frame2->info.function;
   // Strip frame above 'main'
@@ -204,7 +208,7 @@
     rt->stack->suppressable = suppressable;
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 static bool FindThreadByUidLockedCallback(ThreadContextBase *tctx, void *arg) {
   int unique_id = *(int *)arg;
   return tctx->unique_id == (u32)unique_id;
@@ -249,7 +253,7 @@
 #endif
 
 void ScopedReport::AddThread(int unique_tid, bool suppressable) {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   if (const ThreadContext *tctx = FindThreadByUidLocked(unique_tid))
     AddThread(tctx, suppressable);
 #endif
@@ -305,7 +309,7 @@
 void ScopedReport::AddLocation(uptr addr, uptr size) {
   if (addr == 0)
     return;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   int fd = -1;
   int creat_tid = -1;
   u32 creat_stack = 0;
@@ -355,7 +359,7 @@
   }
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 void ScopedReport::AddSleep(u32 stack_id) {
   rep_->sleep = SymbolizeStackId(stack_id);
 }
@@ -660,7 +664,7 @@
 
   rep.AddLocation(addr_min, addr_max - addr_min);
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   {  // NOLINT
     Shadow s(thr->racy_state[1]);
     if (s.epoch() <= thr->last_sleep_clock.get(s.tid()))
@@ -689,7 +693,7 @@
 // Also see PR27280 comment 2 and 3 for breaking examples and analysis.
 ALWAYS_INLINE
 void PrintCurrentStackSlow(uptr pc) {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   BufferedStackTrace *ptrace =
       new(internal_alloc(MBlockStackTrace, sizeof(BufferedStackTrace)))
           BufferedStackTrace();
diff --git a/lib/tsan/rtl/tsan_rtl_thread.cc b/lib/tsan/rtl/tsan_rtl_thread.cc
index 13528ae..5b17dc6 100644
--- a/lib/tsan/rtl/tsan_rtl_thread.cc
+++ b/lib/tsan/rtl/tsan_rtl_thread.cc
@@ -30,7 +30,7 @@
   , epoch1() {
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 ThreadContext::~ThreadContext() {
 }
 #endif
@@ -68,8 +68,9 @@
 
 void ThreadContext::OnReset() {
   CHECK_EQ(sync.size(), 0);
-  FlushUnneededShadowMemory(GetThreadTrace(tid), TraceSize() * sizeof(Event));
-  //!!! FlushUnneededShadowMemory(GetThreadTraceHeader(tid), sizeof(Trace));
+  uptr trace_p = GetThreadTrace(tid);
+  ReleaseMemoryPagesToOS(trace_p, trace_p + TraceSize() * sizeof(Event));
+  //!!! ReleaseMemoryToOS(GetThreadTraceHeader(tid), sizeof(Trace));
 }
 
 void ThreadContext::OnDetached(void *arg) {
@@ -94,7 +95,7 @@
   epoch1 = (u64)-1;
   new(thr) ThreadState(ctx, tid, unique_id, epoch0, reuse_count,
       args->stk_addr, args->stk_size, args->tls_addr, args->tls_size);
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   thr->shadow_stack = &ThreadTrace(thr->tid)->shadow_stack[0];
   thr->shadow_stack_pos = thr->shadow_stack;
   thr->shadow_stack_end = thr->shadow_stack + kShadowStackSize;
@@ -125,7 +126,7 @@
 }
 
 void ThreadContext::OnFinished() {
-#ifdef SANITIZER_GO
+#if SANITIZER_GO
   internal_free(thr->shadow_stack);
   thr->shadow_stack = nullptr;
   thr->shadow_stack_pos = nullptr;
@@ -148,7 +149,7 @@
   thr = 0;
 }
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 struct ThreadLeak {
   ThreadContext *tctx;
   int count;
@@ -170,7 +171,7 @@
 }
 #endif
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 static void ReportIgnoresEnabled(ThreadContext *tctx, IgnoreSet *set) {
   if (tctx->tid == 0) {
     Printf("ThreadSanitizer: main thread finished with ignores enabled\n");
@@ -202,7 +203,7 @@
 
 void ThreadFinalize(ThreadState *thr) {
   ThreadCheckIgnore(thr);
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   if (!flags()->report_thread_leaks)
     return;
   ThreadRegistryLock l(ctx->thread_registry);
@@ -240,7 +241,7 @@
   uptr stk_size = 0;
   uptr tls_addr = 0;
   uptr tls_size = 0;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   GetThreadStackAndTls(tid == 0, &stk_addr, &stk_size, &tls_addr, &tls_size);
 
   if (tid) {
@@ -271,7 +272,7 @@
   thr->tctx = (ThreadContext*)tr->GetThreadLocked(tid);
   tr->Unlock();
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   if (ctx->after_multithreaded_fork) {
     thr->ignore_interceptors++;
     ThreadIgnoreBegin(thr, 0);
diff --git a/lib/tsan/rtl/tsan_suppressions.cc b/lib/tsan/rtl/tsan_suppressions.cc
index aea3cb9..bfb64e0 100644
--- a/lib/tsan/rtl/tsan_suppressions.cc
+++ b/lib/tsan/rtl/tsan_suppressions.cc
@@ -21,7 +21,7 @@
 #include "tsan_mman.h"
 #include "tsan_platform.h"
 
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
 // Suppressions for true/false positives in standard libraries.
 static const char *const std_suppressions =
 // Libstdc++ 4.4 has data races in std::string.
@@ -54,7 +54,7 @@
   suppression_ctx = new (suppression_placeholder) // NOLINT
       SuppressionContext(kSuppressionTypes, ARRAY_SIZE(kSuppressionTypes));
   suppression_ctx->ParseFromFile(flags()->suppressions);
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   suppression_ctx->Parse(__tsan_default_suppressions());
   suppression_ctx->Parse(std_suppressions);
 #endif
diff --git a/lib/tsan/rtl/tsan_sync.cc b/lib/tsan/rtl/tsan_sync.cc
index 58b2680..44c6a26 100644
--- a/lib/tsan/rtl/tsan_sync.cc
+++ b/lib/tsan/rtl/tsan_sync.cc
@@ -30,7 +30,7 @@
   this->next = 0;
 
   creation_stack_id = 0;
-  if (kCppMode)  // Go does not use them
+  if (!SANITIZER_GO)  // Go does not use them
     creation_stack_id = CurrentStackId(thr, pc);
   if (common_flags()->detect_deadlocks)
     DDMutexInit(thr, pc, this);
@@ -120,7 +120,7 @@
 // without meta objects, at this point it stops freeing meta objects. Because
 // thread stacks grow top-down, we do the same starting from end as well.
 void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) {
-  if (kGoMode) {
+  if (SANITIZER_GO) {
     // UnmapOrDie/MmapFixedNoReserve does not work on Windows,
     // so we do the optimization only for C/C++.
     FreeRange(proc, p, sz);
diff --git a/lib/tsan/rtl/tsan_trace.h b/lib/tsan/rtl/tsan_trace.h
index 2569c7e..96a18ac 100644
--- a/lib/tsan/rtl/tsan_trace.h
+++ b/lib/tsan/rtl/tsan_trace.h
@@ -42,7 +42,7 @@
 typedef u64 Event;
 
 struct TraceHeader {
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   BufferedStackTrace stack0;  // Start stack for the trace.
 #else
   VarSizeStackTrace stack0;
@@ -55,7 +55,7 @@
 
 struct Trace {
   Mutex mtx;
-#ifndef SANITIZER_GO
+#if !SANITIZER_GO
   // Must be last to catch overflow as paging fault.
   // Go shadow stack is dynamically allocated.
   uptr shadow_stack[kShadowStackSize];
diff --git a/lib/tsan/tests/CMakeLists.txt b/lib/tsan/tests/CMakeLists.txt
index 4587e47..87e1417 100644
--- a/lib/tsan/tests/CMakeLists.txt
+++ b/lib/tsan/tests/CMakeLists.txt
@@ -76,14 +76,18 @@
           ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
         list(APPEND TEST_OBJECTS lib${TSAN_TEST_RUNTIME}.a)
         list(APPEND TEST_DEPS ${TSAN_TEST_RUNTIME})
+
+        add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+        add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+
         # Intentionally do *not* link with `-fsanitize=thread`. We already link
         # against a static version of the runtime, and we don't want the dynamic
         # one.
         add_compiler_rt_test(TsanUnitTests "${testname}-${arch}-Test"
                 OBJECTS ${TEST_OBJECTS}
                 DEPS ${TEST_DEPS}
-                LINK_FLAGS ${TARGET_LINK_FLAGS} ${DARWIN_osx_LINKFLAGS}
-                           -lc++)
+                LINK_FLAGS ${TARGET_LINK_FLAGS} ${DARWIN_osx_LINK_FLAGS}
+                           ${WEAK_SYMBOL_LINK_FLAGS} -lc++)
       endif()
     endforeach()
   endif()
diff --git a/lib/tsan/tests/rtl/tsan_test_util_posix.cc b/lib/tsan/tests/rtl/tsan_test_util_posix.cc
index 01e3f7c..834a271 100644
--- a/lib/tsan/tests/rtl/tsan_test_util_posix.cc
+++ b/lib/tsan/tests/rtl/tsan_test_util_posix.cc
@@ -60,11 +60,11 @@
     if (rep->typ != expect_report_type) {
       printf("Expected report of type %d, got type %d\n",
              (int)expect_report_type, (int)rep->typ);
-      EXPECT_FALSE("Wrong report type");
+      EXPECT_TRUE(false) << "Wrong report type";
       return false;
     }
   } else {
-    EXPECT_FALSE("Unexpected report");
+    EXPECT_TRUE(false) << "Unexpected report";
     return false;
   }
   expect_report_reported = true;
@@ -323,7 +323,7 @@
   }
   if (expect_report && !expect_report_reported) {
     printf("Missed expected report of type %d\n", (int)ev->report_type);
-    EXPECT_FALSE("Missed expected race");
+    EXPECT_TRUE(false) << "Missed expected race";
   }
   expect_report = false;
 }
diff --git a/lib/ubsan/CMakeLists.txt b/lib/ubsan/CMakeLists.txt
index 049b976..9bb36ed 100644
--- a/lib/ubsan/CMakeLists.txt
+++ b/lib/ubsan/CMakeLists.txt
@@ -12,7 +12,7 @@
   ubsan_init_standalone.cc
   )
 
-set(UBSAN_CXX_SOURCES
+set(UBSAN_CXXABI_SOURCES
   ubsan_handlers_cxx.cc
   ubsan_type_hash.cc
   ubsan_type_hash_itanium.cc
@@ -30,7 +30,7 @@
 append_list_if(SANITIZER_CAN_USE_CXXABI -DUBSAN_CAN_USE_CXXABI UBSAN_STANDALONE_CFLAGS)
 
 set(UBSAN_CXXFLAGS ${SANITIZER_COMMON_CFLAGS})
-append_rtti_flag(ON UBSAN_STANDALONE_CXXFLAGS)
+append_rtti_flag(ON UBSAN_CXXFLAGS)
 append_list_if(SANITIZER_CAN_USE_CXXABI -DUBSAN_CAN_USE_CXXABI UBSAN_CXXFLAGS)
 
 add_compiler_rt_component(ubsan)
@@ -38,7 +38,7 @@
 if(APPLE)
   set(UBSAN_COMMON_SOURCES ${UBSAN_SOURCES})
   if(SANITIZER_CAN_USE_CXXABI)
-    list(APPEND UBSAN_COMMON_SOURCES ${UBSAN_CXX_SOURCES})
+    list(APPEND UBSAN_COMMON_SOURCES ${UBSAN_CXXABI_SOURCES})
   endif()
 
   # Common parts of UBSan runtime.
@@ -56,6 +56,9 @@
       SOURCES ${UBSAN_STANDALONE_SOURCES}
       CFLAGS ${UBSAN_STANDALONE_CFLAGS})
 
+    add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+    add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+
     add_compiler_rt_runtime(clang_rt.ubsan
       SHARED
       OS ${SANITIZER_COMMON_SUPPORTED_OS}
@@ -64,6 +67,7 @@
                   RTUbsan_standalone
                   RTSanitizerCommon
                   RTSanitizerCommonLibc
+      LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
       PARENT_TARGET ubsan)
   endif()
 
@@ -72,7 +76,16 @@
   add_compiler_rt_object_libraries(RTUbsan
     ARCHS ${UBSAN_COMMON_SUPPORTED_ARCH}
     SOURCES ${UBSAN_SOURCES} CFLAGS ${UBSAN_CFLAGS})
-  # C++-specific parts of UBSan runtime. Requires a C++ ABI library.
+
+  if(SANITIZER_CAN_USE_CXXABI)
+    # C++-specific parts of UBSan runtime. Requires a C++ ABI library.
+    set(UBSAN_CXX_SOURCES ${UBSAN_CXXABI_SOURCES})
+  else()
+    # Dummy target if we don't have C++ ABI library.
+    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/cxx_dummy.cc "")
+    set(UBSAN_CXX_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/cxx_dummy.cc)
+  endif()
+
   add_compiler_rt_object_libraries(RTUbsan_cxx
     ARCHS ${UBSAN_COMMON_SUPPORTED_ARCH}
     SOURCES ${UBSAN_CXX_SOURCES} CFLAGS ${UBSAN_CXXFLAGS})
diff --git a/lib/ubsan/ubsan_handlers.cc b/lib/ubsan/ubsan_handlers.cc
index 4ede388..6ffffae 100644
--- a/lib/ubsan/ubsan_handlers.cc
+++ b/lib/ubsan/ubsan_handlers.cc
@@ -45,10 +45,11 @@
                                    ReportOptions Opts) {
   Location Loc = Data->Loc.acquire();
 
+  uptr Alignment = (uptr)1 << Data->LogAlignment;
   ErrorType ET;
   if (!Pointer)
     ET = ErrorType::NullPointerUse;
-  else if (Data->Alignment && (Pointer & (Data->Alignment - 1)))
+  else if (Pointer & (Alignment - 1))
     ET = ErrorType::MisalignedPointerUse;
   else
     ET = ErrorType::InsufficientObjectSize;
@@ -74,8 +75,8 @@
   case ErrorType::MisalignedPointerUse:
     Diag(Loc, DL_Error, "%0 misaligned address %1 for type %3, "
                         "which requires %2 byte alignment")
-        << TypeCheckKinds[Data->TypeCheckKind] << (void *)Pointer
-        << Data->Alignment << Data->Type;
+        << TypeCheckKinds[Data->TypeCheckKind] << (void *)Pointer << Alignment
+        << Data->Type;
     break;
   case ErrorType::InsufficientObjectSize:
     Diag(Loc, DL_Error, "%0 address %1 with insufficient space "
@@ -90,13 +91,13 @@
     Diag(Pointer, DL_Note, "pointer points here");
 }
 
-void __ubsan::__ubsan_handle_type_mismatch(TypeMismatchData *Data,
-                                           ValueHandle Pointer) {
+void __ubsan::__ubsan_handle_type_mismatch_v1(TypeMismatchData *Data,
+                                              ValueHandle Pointer) {
   GET_REPORT_OPTIONS(false);
   handleTypeMismatchImpl(Data, Pointer, Opts);
 }
-void __ubsan::__ubsan_handle_type_mismatch_abort(TypeMismatchData *Data,
-                                                 ValueHandle Pointer) {
+void __ubsan::__ubsan_handle_type_mismatch_v1_abort(TypeMismatchData *Data,
+                                                    ValueHandle Pointer) {
   GET_REPORT_OPTIONS(true);
   handleTypeMismatchImpl(Data, Pointer, Opts);
   Die();
diff --git a/lib/ubsan/ubsan_handlers.h b/lib/ubsan/ubsan_handlers.h
index e0cfd5b..350eb91 100644
--- a/lib/ubsan/ubsan_handlers.h
+++ b/lib/ubsan/ubsan_handlers.h
@@ -20,7 +20,7 @@
 struct TypeMismatchData {
   SourceLocation Loc;
   const TypeDescriptor &Type;
-  uptr Alignment;
+  unsigned char LogAlignment;
   unsigned char TypeCheckKind;
 };
 
@@ -37,7 +37,7 @@
 /// \brief Handle a runtime type check failure, caused by either a misaligned
 /// pointer, a null pointer, or a pointer to insufficient storage for the
 /// type.
-RECOVERABLE(type_mismatch, TypeMismatchData *Data, ValueHandle Pointer)
+RECOVERABLE(type_mismatch_v1, TypeMismatchData *Data, ValueHandle Pointer)
 
 struct OverflowData {
   SourceLocation Loc;
diff --git a/lib/ubsan/ubsan_type_hash_itanium.cc b/lib/ubsan/ubsan_type_hash_itanium.cc
index 24f7ad4..5ae5ae0 100644
--- a/lib/ubsan/ubsan_type_hash_itanium.cc
+++ b/lib/ubsan/ubsan_type_hash_itanium.cc
@@ -167,7 +167,7 @@
     dynamic_cast<const abi::__vmi_class_type_info*>(Derived);
   if (!VTI)
     // No base class subobjects.
-    return 0;
+    return nullptr;
 
   for (unsigned int base = 0; base != VTI->base_count; ++base) {
     sptr OffsetHere = VTI->base_info[base].__offset_flags >>
@@ -182,7 +182,7 @@
       return Base;
   }
 
-  return 0;
+  return nullptr;
 }
 
 namespace {
@@ -198,11 +198,11 @@
 VtablePrefix *getVtablePrefix(void *Vtable) {
   VtablePrefix *Vptr = reinterpret_cast<VtablePrefix*>(Vtable);
   if (!Vptr)
-    return 0;
+    return nullptr;
   VtablePrefix *Prefix = Vptr - 1;
   if (!Prefix->TypeInfo)
     // This can't possibly be a valid vtable.
-    return 0;
+    return nullptr;
   return Prefix;
 }
 
@@ -248,9 +248,9 @@
 __ubsan::getDynamicTypeInfoFromVtable(void *VtablePtr) {
   VtablePrefix *Vtable = getVtablePrefix(VtablePtr);
   if (!Vtable)
-    return DynamicTypeInfo(0, 0, 0);
+    return DynamicTypeInfo(nullptr, 0, nullptr);
   if (Vtable->Offset < -VptrMaxOffsetToTop || Vtable->Offset > VptrMaxOffsetToTop)
-    return DynamicTypeInfo(0, Vtable->Offset, 0);
+    return DynamicTypeInfo(nullptr, Vtable->Offset, nullptr);
   const abi::__class_type_info *ObjectType = findBaseAtOffset(
     static_cast<const abi::__class_type_info*>(Vtable->TypeInfo),
     -Vtable->Offset);
diff --git a/lib/ubsan/weak_symbols.txt b/lib/ubsan/weak_symbols.txt
new file mode 100644
index 0000000..69e1bc1
--- /dev/null
+++ b/lib/ubsan/weak_symbols.txt
@@ -0,0 +1 @@
+___ubsan_default_options
diff --git a/lib/xray/CMakeLists.txt b/lib/xray/CMakeLists.txt
index c70c4fe..9c7cf6c 100644
--- a/lib/xray/CMakeLists.txt
+++ b/lib/xray/CMakeLists.txt
@@ -1,42 +1,79 @@
 # Build for the XRay runtime support library.
 
+# Core XRay runtime library implementation files.
 set(XRAY_SOURCES
   xray_init.cc
-	xray_interface.cc
-	xray_flags.cc
-)
+  xray_interface.cc
+  xray_flags.cc
+  xray_inmemory_log.cc)
+
+# XRay flight data recorder (FDR) implementation files.
+set(XRAY_FDR_SOURCES
+  xray_buffer_queue.cc)
 
 set(x86_64_SOURCES
-		xray_trampoline_x86.S
-		${XRAY_SOURCES})
+    xray_x86_64.cc
+    xray_trampoline_x86_64.S
+    ${XRAY_SOURCES})
+
+set(arm_SOURCES
+    xray_arm.cc
+    xray_trampoline_arm.S
+    ${XRAY_SOURCES})
+
+set(armhf_SOURCES ${arm_SOURCES})
+
+set(aarch64_SOURCES
+        xray_AArch64.cc
+        xray_trampoline_AArch64.S
+        ${XRAY_SOURCES})
 
 include_directories(..)
 include_directories(../../include)
 
 set(XRAY_CFLAGS ${SANITIZER_COMMON_CFLAGS})
-
 set(XRAY_COMMON_DEFINITIONS XRAY_HAS_EXCEPTIONS=1)
+append_list_if(
+  COMPILER_RT_HAS_XRAY_COMPILER_FLAG XRAY_SUPPORTED=1 XRAY_COMMON_DEFINITIONS)
 
 add_compiler_rt_object_libraries(RTXray
-		ARCHS ${XRAY_SUPPORTED_ARCH}
-		SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS}
-		DEFS ${XRAY_COMMON_DEFINITIONS})
+  ARCHS ${XRAY_SUPPORTED_ARCH}
+  SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS}
+  DEFS ${XRAY_COMMON_DEFINITIONS})
+
+add_compiler_rt_object_libraries(RTXrayFDR
+  ARCHS ${XRAY_SUPPORTED_ARCH}
+  SOURCES ${XRAY_FDR_SOURCES} CFLAGS ${XRAY_CFLAGS}
+  DEFS ${XRAY_COMMON_DEFINITIONS})
 
 add_compiler_rt_component(xray)
+add_compiler_rt_component(xray-fdr)
 
 set(XRAY_COMMON_RUNTIME_OBJECT_LIBS
-		RTSanitizerCommon
-		RTSanitizerCommonLibc)
+    RTSanitizerCommon
+    RTSanitizerCommonLibc)
 
-foreach (arch ${XRAY_SUPPORTED_ARCH})
-		if (CAN_TARGET_${arch})
-				add_compiler_rt_runtime(clang_rt.xray
-						STATIC
-						ARCHS ${arch}
-						SOURCES ${${arch}_SOURCES}
-						CFLAGS ${XRAY_CFLAGS}
-						DEFS ${XRAY_COMMON_DEFINITIONS}
-						OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS}
-						PARENT_TARGET xray)
-		endif ()
+foreach(arch ${XRAY_SUPPORTED_ARCH})
+  if(CAN_TARGET_${arch})
+    add_compiler_rt_runtime(clang_rt.xray
+     STATIC
+     ARCHS ${arch}
+     SOURCES ${${arch}_SOURCES}
+     CFLAGS ${XRAY_CFLAGS}
+     DEFS ${XRAY_COMMON_DEFINITIONS}
+     OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS}
+     PARENT_TARGET xray)
+   add_compiler_rt_runtime(clang_rt.xray-fdr
+     STATIC
+     ARCHS ${arch}
+     SOURCES ${XRAY_FDR_SOURCES}
+     CFLAGS ${XRAY_CFLAGS}
+     DEFS ${XRAY_COMMON_DEFINITIONS}
+     OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS}
+     PARENT_TARGET xray-fdr)
+  endif()
 endforeach()
+
+if(COMPILER_RT_INCLUDE_TESTS)
+  add_subdirectory(tests)
+endif()
diff --git a/lib/xray/tests/CMakeLists.txt b/lib/xray/tests/CMakeLists.txt
new file mode 100644
index 0000000..6cb1793
--- /dev/null
+++ b/lib/xray/tests/CMakeLists.txt
@@ -0,0 +1,59 @@
+include_directories(..)
+
+add_custom_target(XRayUnitTests)
+set_target_properties(XRayUnitTests PROPERTIES FOLDER "XRay unittests")
+
+set(XRAY_UNITTEST_CFLAGS
+  ${XRAY_CFLAGS}
+  ${COMPILER_RT_UNITTEST_CFLAGS}
+  ${COMPILER_RT_GTEST_CFLAGS}
+  -I${COMPILER_RT_SOURCE_DIR}/include
+  -I${COMPILER_RT_SOURCE_DIR}/lib/xray)
+
+macro(xray_compile obj_list source arch)
+  get_filename_component(basename ${source} NAME)
+  set(output_obj "${basename}.${arch}.o")
+  get_target_flags_for_arch(${arch} TARGET_CFLAGS)
+  if(NOT COMPILER_RT_STANDALONE_BUILD)
+    list(APPEND COMPILE_DEPS gtest_main xray-fdr)
+  endif()
+  clang_compile(${output_obj} ${source}
+    CFLAGS ${XRAY_UNITTEST_CFLAGS} ${TARGET_CFLAGS}
+    DEPS ${COMPILE_DEPS})
+  list(APPEND ${obj_list} ${output_obj})
+endmacro()
+
+macro(add_xray_unittest testname)
+  set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH})
+  if (APPLE)
+    darwin_filter_host_archs(XRAY_SUPPORTED_ARCH)
+  endif()
+  if(UNIX)
+    foreach(arch ${XRAY_TEST_ARCH})
+      cmake_parse_arguments(TEST "" "" "SOURCES;HEADERS" ${ARGN})
+      set(TEST_OBJECTS)
+      foreach(SOURCE ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE})
+        xray_compile(TEST_OBJECTS ${SOURCE} ${arch} ${TEST_HEADERS})
+      endforeach()
+      get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
+      set(TEST_DEPS ${TEST_OBJECTS})
+      if(NOT COMPILER_RT_STANDALONE_BUILD)
+        list(APPEND TEST_DEPS gtest_main xray-fdr)
+      endif()
+      if(NOT APPLE)
+        add_compiler_rt_test(XRayUnitTests ${testname}
+          OBJECTS ${TEST_OBJECTS}
+          DEPS ${TEST_DEPS}
+          LINK_FLAGS ${TARGET_LINK_FLAGS}
+          -lstdc++ -lm ${CMAKE_THREAD_LIBS_INIT}
+          -lpthread
+          -L${COMPILER_RT_LIBRARY_OUTPUT_DIR} -lclang_rt.xray-fdr-${arch})
+      endif()
+      # FIXME: Figure out how to run even just the unit tests on APPLE.
+    endforeach()
+  endif()
+endmacro()
+
+if(COMPILER_RT_CAN_EXECUTE_TESTS)
+  add_subdirectory(unit)
+endif()
diff --git a/lib/xray/tests/unit/CMakeLists.txt b/lib/xray/tests/unit/CMakeLists.txt
new file mode 100644
index 0000000..3e5412d
--- /dev/null
+++ b/lib/xray/tests/unit/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_xray_unittest(XRayBufferQueueTest SOURCES
+  buffer_queue_test.cc xray_unit_test_main.cc)
diff --git a/lib/xray/tests/unit/buffer_queue_test.cc b/lib/xray/tests/unit/buffer_queue_test.cc
new file mode 100644
index 0000000..d46f194
--- /dev/null
+++ b/lib/xray/tests/unit/buffer_queue_test.cc
@@ -0,0 +1,81 @@
+//===-- buffer_queue_test.cc ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_buffer_queue.h"
+#include "gtest/gtest.h"
+
+#include <future>
+#include <system_error>
+#include <unistd.h>
+
+namespace __xray {
+
+static constexpr size_t kSize = 4096;
+
+TEST(BufferQueueTest, API) { BufferQueue Buffers(kSize, 1); }
+
+TEST(BufferQueueTest, GetAndRelease) {
+  BufferQueue Buffers(kSize, 1);
+  BufferQueue::Buffer Buf;
+  ASSERT_EQ(Buffers.getBuffer(Buf), std::error_code());
+  ASSERT_NE(nullptr, Buf.Buffer);
+  ASSERT_EQ(Buffers.releaseBuffer(Buf), std::error_code());
+  ASSERT_EQ(nullptr, Buf.Buffer);
+}
+
+TEST(BufferQueueTest, GetUntilFailed) {
+  BufferQueue Buffers(kSize, 1);
+  BufferQueue::Buffer Buf0;
+  EXPECT_EQ(Buffers.getBuffer(Buf0), std::error_code());
+  BufferQueue::Buffer Buf1;
+  EXPECT_EQ(std::errc::not_enough_memory, Buffers.getBuffer(Buf1));
+  EXPECT_EQ(Buffers.releaseBuffer(Buf0), std::error_code());
+}
+
+TEST(BufferQueueTest, ReleaseUnknown) {
+  BufferQueue Buffers(kSize, 1);
+  BufferQueue::Buffer Buf;
+  Buf.Buffer = reinterpret_cast<void *>(0xdeadbeef);
+  Buf.Size = kSize;
+  EXPECT_EQ(std::errc::argument_out_of_domain, Buffers.releaseBuffer(Buf));
+}
+
+TEST(BufferQueueTest, ErrorsWhenFinalising) {
+  BufferQueue Buffers(kSize, 2);
+  BufferQueue::Buffer Buf;
+  ASSERT_EQ(Buffers.getBuffer(Buf), std::error_code());
+  ASSERT_NE(nullptr, Buf.Buffer);
+  ASSERT_EQ(Buffers.finalize(), std::error_code());
+  BufferQueue::Buffer OtherBuf;
+  ASSERT_EQ(std::errc::state_not_recoverable, Buffers.getBuffer(OtherBuf));
+  ASSERT_EQ(std::errc::state_not_recoverable, Buffers.finalize());
+  ASSERT_EQ(Buffers.releaseBuffer(Buf), std::error_code());
+}
+
+TEST(BufferQueueTest, MultiThreaded) {
+  BufferQueue Buffers(kSize, 100);
+  auto F = [&] {
+    BufferQueue::Buffer B;
+    while (!Buffers.getBuffer(B)) {
+      Buffers.releaseBuffer(B);
+    }
+  };
+  auto T0 = std::async(std::launch::async, F);
+  auto T1 = std::async(std::launch::async, F);
+  auto T2 = std::async(std::launch::async, [&] {
+    while (!Buffers.finalize())
+      ;
+  });
+  F();
+}
+
+} // namespace __xray
diff --git a/lib/xray/tests/unit/xray_unit_test_main.cc b/lib/xray/tests/unit/xray_unit_test_main.cc
new file mode 100644
index 0000000..27d1752
--- /dev/null
+++ b/lib/xray/tests/unit/xray_unit_test_main.cc
@@ -0,0 +1,18 @@
+//===-- xray_unit_test_main.cc --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#include "gtest/gtest.h"
+
+int main(int argc, char **argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/lib/xray/xray_AArch64.cc b/lib/xray/xray_AArch64.cc
new file mode 100644
index 0000000..0c1df22
--- /dev/null
+++ b/lib/xray/xray_AArch64.cc
@@ -0,0 +1,125 @@
+//===-- xray_AArch64.cc -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of AArch64-specific routines (64-bit).
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_emulate_tsc.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+#include <cassert>
+
+
+extern "C" void __clear_cache(void* start, void* end);
+
+namespace __xray {
+
+uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
+  // There is no instruction like RDTSCP in user mode on ARM.  ARM's CP15 does
+  //   not have a constant frequency like TSC on x86[_64]; it may go faster or
+  //   slower depending on CPU's turbo or power saving modes.  Furthermore, to
+  //   read from CP15 on ARM a kernel modification or a driver is needed.
+  //   We can not require this from users of compiler-rt.
+  // So on ARM we use clock_gettime(2) which gives the result in nanoseconds.
+  //   To get the measurements per second, we scale this by the number of
+  //   nanoseconds per second, pretending that the TSC frequency is 1GHz and
+  //   one TSC tick is 1 nanosecond.
+  return NanosecondsPerSecond;
+}
+
+// The machine codes for some instructions used in runtime patching.
+enum class PatchOpcodes : uint32_t {
+  PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]!
+  PO_LdrW0_12 = 0x18000060,        // LDR W0, #12
+  PO_LdrX16_12 = 0x58000070,       // LDR X16, #12
+  PO_BlrX16 = 0xD63F0200,          // BLR X16
+  PO_LdpX0X30SP_16 = 0xA8C17BE0,   // LDP X0, X30, [SP], #16
+  PO_B32 = 0x14000008              // B #32
+};
+
+inline static bool patchSled(const bool Enable, const uint32_t FuncId,
+                             const XRaySledEntry &Sled,
+                             void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+  // When |Enable| == true,
+  // We replace the following compile-time stub (sled):
+  //
+  // xray_sled_n:
+  //   B #32
+  //   7 NOPs (24 bytes)
+  //
+  // With the following runtime patch:
+  //
+  // xray_sled_n:
+  //   STP X0, X30, [SP, #-16]! ; PUSH {r0, lr}
+  //   LDR W0, #12 ; W0 := function ID
+  //   LDR X16,#12 ; X16 := address of the trampoline
+  //   BLR X16
+  //   ;DATA: 32 bits of function ID
+  //   ;DATA: lower 32 bits of the address of the trampoline
+  //   ;DATA: higher 32 bits of the address of the trampoline
+  //   LDP X0, X30, [SP], #16 ; POP {r0, lr}
+  //
+  // Replacement of the first 4-byte instruction should be the last and atomic
+  // operation, so that the user code which reaches the sled concurrently
+  // either jumps over the whole sled, or executes the whole sled when the
+  // latter is ready.
+  //
+  // When |Enable|==false, we set back the first instruction in the sled to be
+  //   B #32
+
+  uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
+  uint32_t *CurAddress = FirstAddress + 1;
+  if (Enable) {
+    *CurAddress = uint32_t(PatchOpcodes::PO_LdrW0_12);
+    CurAddress++;
+    *CurAddress = uint32_t(PatchOpcodes::PO_LdrX16_12);
+    CurAddress++;
+    *CurAddress = uint32_t(PatchOpcodes::PO_BlrX16);
+    CurAddress++;
+    *CurAddress = FuncId;
+    CurAddress++;
+    *reinterpret_cast<void (**)()>(CurAddress) = TracingHook;
+    CurAddress += 2;
+    *CurAddress = uint32_t(PatchOpcodes::PO_LdpX0X30SP_16);
+    CurAddress++;
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+        uint32_t(PatchOpcodes::PO_StpX0X30SP_m16e), std::memory_order_release);
+  } else {
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+        uint32_t(PatchOpcodes::PO_B32), std::memory_order_release);
+  }
+  __clear_cache(reinterpret_cast<char*>(FirstAddress),
+      reinterpret_cast<char*>(CurAddress));
+  return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+                        const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // FIXME: In the future we'd need to distinguish between non-tail exits and
+  // tail exits for better information preservation.
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+} // namespace __xray
diff --git a/lib/xray/xray_arm.cc b/lib/xray/xray_arm.cc
new file mode 100644
index 0000000..d89322e
--- /dev/null
+++ b/lib/xray/xray_arm.cc
@@ -0,0 +1,156 @@
+//===-- xray_arm.cc ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of ARM-specific routines (32-bit).
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_emulate_tsc.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+#include <cassert>
+
+namespace __xray {
+
+uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
+  // There is no instruction like RDTSCP in user mode on ARM.  ARM's CP15 does
+  //   not have a constant frequency like TSC on x86[_64]; it may go faster or
+  //   slower depending on CPU's turbo or power saving modes.  Furthermore, to
+  //   read from CP15 on ARM a kernel modification or a driver is needed.
+  //   We can not require this from users of compiler-rt.
+  // So on ARM we use clock_gettime(2) which gives the result in nanoseconds.
+  //   To get the measurements per second, we scale this by the number of
+  //   nanoseconds per second, pretending that the TSC frequency is 1GHz and
+  //   one TSC tick is 1 nanosecond.
+  return NanosecondsPerSecond;
+}
+
+// The machine codes for some instructions used in runtime patching.
+enum class PatchOpcodes : uint32_t {
+  PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr}
+  PO_BlxIp = 0xE12FFF3C,    // BLX ip
+  PO_PopR0Lr = 0xE8BD4001,  // POP {r0, lr}
+  PO_B20 = 0xEA000005       // B #20
+};
+
+// 0xUUUUWXYZ -> 0x000W0XYZ
+inline static uint32_t getMovwMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+  return (Value & 0xfff) | ((Value & 0xf000) << 4);
+}
+
+// 0xWXYZUUUU -> 0x000W0XYZ
+inline static uint32_t getMovtMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+  return getMovwMask(Value >> 16);
+}
+
+// Writes the following instructions:
+//   MOVW R<regNo>, #<lower 16 bits of the |Value|>
+//   MOVT R<regNo>, #<higher 16 bits of the |Value|>
+inline static uint32_t *
+write32bitLoadReg(uint8_t regNo, uint32_t *Address,
+                  const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+  // This is a fatal error: we cannot just report it and continue execution.
+  assert(regNo <= 15 && "Register number must be 0 to 15.");
+  // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ
+  *Address = (0xE3000000 | (uint32_t(regNo) << 12) | getMovwMask(Value));
+  Address++;
+  // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ
+  *Address = (0xE3400000 | (uint32_t(regNo) << 12) | getMovtMask(Value));
+  return Address + 1;
+}
+
+// Writes the following instructions:
+//   MOVW r0, #<lower 16 bits of the |Value|>
+//   MOVT r0, #<higher 16 bits of the |Value|>
+inline static uint32_t *
+Write32bitLoadR0(uint32_t *Address,
+                 const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+  return write32bitLoadReg(0, Address, Value);
+}
+
+// Writes the following instructions:
+//   MOVW ip, #<lower 16 bits of the |Value|>
+//   MOVT ip, #<higher 16 bits of the |Value|>
+inline static uint32_t *
+Write32bitLoadIP(uint32_t *Address,
+                 const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+  return write32bitLoadReg(12, Address, Value);
+}
+
+inline static bool patchSled(const bool Enable, const uint32_t FuncId,
+                             const XRaySledEntry &Sled,
+                             void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+  // When |Enable| == true,
+  // We replace the following compile-time stub (sled):
+  //
+  // xray_sled_n:
+  //   B #20
+  //   6 NOPs (24 bytes)
+  //
+  // With the following runtime patch:
+  //
+  // xray_sled_n:
+  //   PUSH {r0, lr}
+  //   MOVW r0, #<lower 16 bits of function ID>
+  //   MOVT r0, #<higher 16 bits of function ID>
+  //   MOVW ip, #<lower 16 bits of address of TracingHook>
+  //   MOVT ip, #<higher 16 bits of address of TracingHook>
+  //   BLX ip
+  //   POP {r0, lr}
+  //
+  // Replacement of the first 4-byte instruction should be the last and atomic
+  // operation, so that the user code which reaches the sled concurrently
+  // either jumps over the whole sled, or executes the whole sled when the
+  // latter is ready.
+  //
+  // When |Enable|==false, we set back the first instruction in the sled to be
+  //   B #20
+
+  uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
+  if (Enable) {
+    uint32_t *CurAddress = FirstAddress + 1;
+    CurAddress =
+        Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
+    CurAddress =
+        Write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook));
+    *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
+    CurAddress++;
+    *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+        uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release);
+  } else {
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+        uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
+  }
+  return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+                        const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // FIXME: In the future we'd need to distinguish between non-tail exits and
+  // tail exits for better information preservation.
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+} // namespace __xray
diff --git a/lib/xray/xray_buffer_queue.cc b/lib/xray/xray_buffer_queue.cc
new file mode 100644
index 0000000..7e5462f
--- /dev/null
+++ b/lib/xray/xray_buffer_queue.cc
@@ -0,0 +1,65 @@
+//===-- xray_buffer_queue.cc -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instruementation system.
+//
+// Defines the interface for a buffer queue implementation.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_buffer_queue.h"
+#include <cassert>
+#include <cstdlib>
+
+using namespace __xray;
+
+BufferQueue::BufferQueue(std::size_t B, std::size_t N)
+    : BufferSize(B), Buffers(N), Mutex(), OwnedBuffers(), Finalizing(false) {
+  for (auto &Buf : Buffers) {
+    void *Tmp = malloc(BufferSize);
+    Buf.Buffer = Tmp;
+    Buf.Size = B;
+    if (Tmp != 0)
+      OwnedBuffers.insert(Tmp);
+  }
+}
+
+std::error_code BufferQueue::getBuffer(Buffer &Buf) {
+  if (Finalizing.load(std::memory_order_acquire))
+    return std::make_error_code(std::errc::state_not_recoverable);
+  std::lock_guard<std::mutex> Guard(Mutex);
+  if (Buffers.empty())
+    return std::make_error_code(std::errc::not_enough_memory);
+  Buf = Buffers.front();
+  Buffers.pop_front();
+  return {};
+}
+
+std::error_code BufferQueue::releaseBuffer(Buffer &Buf) {
+  if (OwnedBuffers.count(Buf.Buffer) == 0)
+    return std::make_error_code(std::errc::argument_out_of_domain);
+  std::lock_guard<std::mutex> Guard(Mutex);
+  Buffers.push_back(Buf);
+  Buf.Buffer = nullptr;
+  Buf.Size = BufferSize;
+  return {};
+}
+
+std::error_code BufferQueue::finalize() {
+  if (Finalizing.exchange(true, std::memory_order_acq_rel))
+    return std::make_error_code(std::errc::state_not_recoverable);
+  return {};
+}
+
+BufferQueue::~BufferQueue() {
+  for (auto &Buf : Buffers) {
+    free(Buf.Buffer);
+    Buf.Buffer = nullptr;
+    Buf.Size = 0;
+  }
+}
diff --git a/lib/xray/xray_buffer_queue.h b/lib/xray/xray_buffer_queue.h
new file mode 100644
index 0000000..bf0b7af
--- /dev/null
+++ b/lib/xray/xray_buffer_queue.h
@@ -0,0 +1,86 @@
+//===-- xray_buffer_queue.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Defines the interface for a buffer queue implementation.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_BUFFER_QUEUE_H
+#define XRAY_BUFFER_QUEUE_H
+
+#include <atomic>
+#include <cstdint>
+#include <deque>
+#include <mutex>
+#include <system_error>
+#include <unordered_set>
+
+namespace __xray {
+
+/// BufferQueue implements a circular queue of fixed sized buffers (much like a
+/// freelist) but is concerned mostly with making it really quick to initialise,
+/// finalise, and get/return buffers to the queue. This is one key component of
+/// the "flight data recorder" (FDR) mode to support ongoing XRay function call
+/// trace collection.
+class BufferQueue {
+public:
+  struct Buffer {
+    void *Buffer = nullptr;
+    std::size_t Size = 0;
+  };
+
+private:
+  std::size_t BufferSize;
+  std::deque<Buffer> Buffers;
+  std::mutex Mutex;
+  std::unordered_set<void *> OwnedBuffers;
+  std::atomic<bool> Finalizing;
+
+public:
+  /// Initialise a queue of size |N| with buffers of size |B|.
+  BufferQueue(std::size_t B, std::size_t N);
+
+  /// Updates |Buf| to contain the pointer to an appropriate buffer. Returns an
+  /// error in case there are no available buffers to return when we will run
+  /// over the upper bound for the total buffers.
+  ///
+  /// Requirements:
+  ///   - BufferQueue is not finalising.
+  ///
+  /// Returns:
+  ///   - std::errc::not_enough_memory on exceeding MaxSize.
+  ///   - no error when we find a Buffer.
+  ///   - std::errc::state_not_recoverable on finalising BufferQueue.
+  std::error_code getBuffer(Buffer &Buf);
+
+  /// Updates |Buf| to point to nullptr, with size 0.
+  ///
+  /// Returns:
+  ///   - ...
+  std::error_code releaseBuffer(Buffer &Buf);
+
+  bool finalizing() const { return Finalizing.load(std::memory_order_acquire); }
+
+  // Sets the state of the BufferQueue to finalizing, which ensures that:
+  //
+  //   - All subsequent attempts to retrieve a Buffer will fail.
+  //   - All releaseBuffer operations will not fail.
+  //
+  // After a call to finalize succeeds, all subsequent calls to finalize will
+  // fail with std::errc::state_not_recoverable.
+  std::error_code finalize();
+
+  // Cleans up allocated buffers.
+  ~BufferQueue();
+};
+
+} // namespace __xray
+
+#endif // XRAY_BUFFER_QUEUE_H
diff --git a/lib/xray/xray_defs.h b/lib/xray/xray_defs.h
new file mode 100644
index 0000000..e5c37c0
--- /dev/null
+++ b/lib/xray/xray_defs.h
@@ -0,0 +1,22 @@
+//===-- xray_defs.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common definitions useful for XRay sources.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_DEFS_H
+#define XRAY_XRAY_DEFS_H
+
+#if XRAY_SUPPORTED
+#define XRAY_NEVER_INSTRUMENT __attribute__((xray_never_instrument))
+#else
+#define XRAY_NEVER_INSTRUMENT
+#endif
+
+#endif  // XRAY_XRAY_DEFS_H
diff --git a/lib/xray/xray_emulate_tsc.h b/lib/xray/xray_emulate_tsc.h
new file mode 100644
index 0000000..a3e8b1c
--- /dev/null
+++ b/lib/xray/xray_emulate_tsc.h
@@ -0,0 +1,40 @@
+//===-- xray_emulate_tsc.h --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_EMULATE_TSC_H
+#define XRAY_EMULATE_TSC_H
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "xray_defs.h"
+#include <cerrno>
+#include <cstdint>
+#include <time.h>
+
+namespace __xray {
+
+static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000;
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+  timespec TS;
+  int result = clock_gettime(CLOCK_REALTIME, &TS);
+  if (result != 0) {
+    Report("clock_gettime(2) returned %d, errno=%d.", result, int(errno));
+    TS.tv_sec = 0;
+    TS.tv_nsec = 0;
+  }
+  CPU = 0;
+  return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
+}
+}
+
+#endif // XRAY_EMULATE_TSC_H
diff --git a/lib/xray/xray_flags.cc b/lib/xray/xray_flags.cc
index 6f82912..338c237 100644
--- a/lib/xray/xray_flags.cc
+++ b/lib/xray/xray_flags.cc
@@ -16,6 +16,7 @@
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_flag_parser.h"
 #include "sanitizer_common/sanitizer_libc.h"
+#include "xray_defs.h"
 
 using namespace __sanitizer;
 
@@ -23,20 +24,20 @@
 
 Flags xray_flags_dont_use_directly; // use via flags().
 
-void Flags::SetDefaults() {
+void Flags::SetDefaults() XRAY_NEVER_INSTRUMENT {
 #define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
 #include "xray_flags.inc"
 #undef XRAY_FLAG
 }
 
-static void RegisterXRayFlags(FlagParser *P, Flags *F) {
+static void RegisterXRayFlags(FlagParser *P, Flags *F) XRAY_NEVER_INSTRUMENT {
 #define XRAY_FLAG(Type, Name, DefaultValue, Description)                       \
   RegisterFlag(P, #Name, Description, &F->Name);
 #include "xray_flags.inc"
 #undef XRAY_FLAG
 }
 
-void InitializeFlags() {
+void InitializeFlags() XRAY_NEVER_INSTRUMENT {
   SetCommonFlagsDefaults();
   auto *F = flags();
   F->SetDefaults();
diff --git a/lib/xray/xray_flags.inc b/lib/xray/xray_flags.inc
index bc2f45e..0f6ced8 100644
--- a/lib/xray/xray_flags.inc
+++ b/lib/xray/xray_flags.inc
@@ -16,3 +16,7 @@
 
 XRAY_FLAG(bool, patch_premain, true,
           "Whether to patch instrumentation points before main.")
+XRAY_FLAG(bool, xray_naive_log, true,
+          "Whether to install the naive log implementation.")
+XRAY_FLAG(const char *, xray_logfile_base, "xray-log.",
+          "Filename base for the xray logfile.")
diff --git a/lib/xray/xray_init.cc b/lib/xray/xray_init.cc
index f999030..eb86182 100644
--- a/lib/xray/xray_init.cc
+++ b/lib/xray/xray_init.cc
@@ -18,6 +18,7 @@
 #include <unistd.h>
 
 #include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
 #include "xray_flags.h"
 #include "xray_interface_internal.h"
 
@@ -44,7 +45,7 @@
 
 // __xray_init() will do the actual loading of the current process' memory map
 // and then proceed to look for the .xray_instr_map section/segment.
-void __xray_init() {
+void __xray_init() XRAY_NEVER_INSTRUMENT {
   InitializeFlags();
   if (__start_xray_instr_map == nullptr) {
     Report("XRay instrumentation map missing. Not initializing XRay.\n");
diff --git a/lib/xray/xray_inmemory_log.cc b/lib/xray/xray_inmemory_log.cc
new file mode 100644
index 0000000..adcb216
--- /dev/null
+++ b/lib/xray/xray_inmemory_log.cc
@@ -0,0 +1,194 @@
+//===-- xray_inmemory_log.cc ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of a simple in-memory log of XRay events. This defines a
+// logging function that's compatible with the XRay handler interface, and
+// routines for exporting data to files.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <fcntl.h>
+#include <mutex>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <thread>
+#include <unistd.h>
+
+#if defined(__x86_64__)
+#include "xray_x86_64.h"
+#elif defined(__arm__) || defined(__aarch64__)
+#include "xray_emulate_tsc.h"
+#else
+#error "Unsupported CPU Architecture"
+#endif /* Architecture-specific inline intrinsics */
+
+#include "sanitizer_common/sanitizer_libc.h"
+#include "xray/xray_records.h"
+#include "xray_defs.h"
+#include "xray_flags.h"
+#include "xray_interface_internal.h"
+
+// __xray_InMemoryRawLog will use a thread-local aligned buffer capped to a
+// certain size (32kb by default) and use it as if it were a circular buffer for
+// events. We store simple fixed-sized entries in the log for external analysis.
+
+extern "C" {
+void __xray_InMemoryRawLog(int32_t FuncId,
+                           XRayEntryType Type) XRAY_NEVER_INSTRUMENT;
+}
+
+namespace __xray {
+
+std::mutex LogMutex;
+
+static void retryingWriteAll(int Fd, char *Begin,
+                             char *End) XRAY_NEVER_INSTRUMENT {
+  if (Begin == End)
+    return;
+  auto TotalBytes = std::distance(Begin, End);
+  while (auto Written = write(Fd, Begin, TotalBytes)) {
+    if (Written < 0) {
+      if (errno == EINTR)
+        continue; // Try again.
+      Report("Failed to write; errno = %d\n", errno);
+      return;
+    }
+    TotalBytes -= Written;
+    if (TotalBytes == 0)
+      break;
+    Begin += Written;
+  }
+}
+
+class ThreadExitFlusher {
+  int Fd;
+  XRayRecord *Start;
+  size_t &Offset;
+
+public:
+  explicit ThreadExitFlusher(int Fd, XRayRecord *Start,
+                             size_t &Offset) XRAY_NEVER_INSTRUMENT
+      : Fd(Fd),
+        Start(Start),
+        Offset(Offset) {}
+
+  ~ThreadExitFlusher() XRAY_NEVER_INSTRUMENT {
+    std::lock_guard<std::mutex> L(LogMutex);
+    if (Fd > 0 && Start != nullptr) {
+      retryingWriteAll(Fd, reinterpret_cast<char *>(Start),
+                       reinterpret_cast<char *>(Start + Offset));
+      // Because this thread's exit could be the last one trying to write to the
+      // file and that we're not able to close out the file properly, we sync
+      // instead and hope that the pending writes are flushed as the thread
+      // exits.
+      fsync(Fd);
+    }
+  }
+};
+
+} // namespace __xray
+
+using namespace __xray;
+
+void PrintToStdErr(const char *Buffer) XRAY_NEVER_INSTRUMENT {
+  fprintf(stderr, "%s", Buffer);
+}
+
+static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
+  // FIXME: Figure out how to make this less stderr-dependent.
+  SetPrintfAndReportCallback(PrintToStdErr);
+  // Open a temporary file once for the log.
+  static char TmpFilename[256] = {};
+  static char TmpWildcardPattern[] = "XXXXXX";
+  auto Argv = GetArgv();
+  const char *Progname = Argv[0] == nullptr ? "(unknown)" : Argv[0];
+  const char *LastSlash = internal_strrchr(Progname, '/');
+
+  if (LastSlash != nullptr)
+    Progname = LastSlash + 1;
+
+  const int HalfLength = sizeof(TmpFilename) / 2 - sizeof(TmpWildcardPattern);
+  int NeededLength = internal_snprintf(TmpFilename, sizeof(TmpFilename),
+                                       "%.*s%.*s.%s",
+                                       HalfLength, flags()->xray_logfile_base,
+                                       HalfLength, Progname,
+                                       TmpWildcardPattern);
+  if (NeededLength > int(sizeof(TmpFilename))) {
+    Report("XRay log file name too long (%d): %s\n", NeededLength, TmpFilename);
+    return -1;
+  }
+  int Fd = mkstemp(TmpFilename);
+  if (Fd == -1) {
+    Report("XRay: Failed opening temporary file '%s'; not logging events.\n",
+           TmpFilename);
+    return -1;
+  }
+  if (Verbosity())
+    fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename);
+
+  // Since we're here, we get to write the header. We set it up so that the
+  // header will only be written once, at the start, and let the threads
+  // logging do writes which just append.
+  XRayFileHeader Header;
+  Header.Version = 1;
+  Header.Type = FileTypes::NAIVE_LOG;
+  Header.CycleFrequency = __xray::cycleFrequency();
+
+  // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
+  // before setting the values in the header.
+  Header.ConstantTSC = 1;
+  Header.NonstopTSC = 1;
+  retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
+                   reinterpret_cast<char *>(&Header) + sizeof(Header));
+  return Fd;
+}
+
+void __xray_InMemoryRawLog(int32_t FuncId,
+                           XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
+  using Buffer =
+      std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
+  static constexpr size_t BuffLen = 1024;
+  thread_local static Buffer InMemoryBuffer[BuffLen] = {};
+  thread_local static size_t Offset = 0;
+  static int Fd = __xray_OpenLogFile();
+  if (Fd == -1)
+    return;
+  thread_local __xray::ThreadExitFlusher Flusher(
+      Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset);
+  thread_local pid_t TId = syscall(SYS_gettid);
+
+  // First we get the useful data, and stuff it into the already aligned buffer
+  // through a pointer offset.
+  auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
+  R.RecordType = RecordTypes::NORMAL;
+  R.TSC = __xray::readTSC(R.CPU);
+  R.TId = TId;
+  R.Type = Type;
+  R.FuncId = FuncId;
+  ++Offset;
+  if (Offset == BuffLen) {
+    std::lock_guard<std::mutex> L(LogMutex);
+    auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
+    retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
+                     reinterpret_cast<char *>(RecordBuffer + Offset));
+    Offset = 0;
+  }
+}
+
+static auto Unused = [] {
+  if (flags()->xray_naive_log)
+    __xray_set_handler(__xray_InMemoryRawLog);
+  return true;
+}();
diff --git a/lib/xray/xray_interface.cc b/lib/xray/xray_interface.cc
index 5ef3fc7..20a2b66 100644
--- a/lib/xray/xray_interface.cc
+++ b/lib/xray/xray_interface.cc
@@ -23,9 +23,22 @@
 #include <sys/mman.h>
 
 #include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
 
 namespace __xray {
 
+#if defined(__x86_64__)
+// FIXME: The actual length is 11 bytes. Why was length 12 passed to mprotect()
+// ?
+static const int16_t cSledLength = 12;
+#elif defined(__aarch64__)
+static const int16_t cSledLength = 32;
+#elif defined(__arm__)
+static const int16_t cSledLength = 28;
+#else
+#error "Unsupported CPU Architecture"
+#endif /* CPU architecture */
+
 // This is the function to call when we encounter the entry or exit sleds.
 std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction{nullptr};
 
@@ -43,11 +56,13 @@
   bool MustCleanup;
 
 public:
-  explicit MProtectHelper(void *PageAlignedAddr, std::size_t MProtectLen)
-      : PageAlignedAddr(PageAlignedAddr), MProtectLen(MProtectLen),
+  explicit MProtectHelper(void *PageAlignedAddr,
+                          std::size_t MProtectLen) XRAY_NEVER_INSTRUMENT
+      : PageAlignedAddr(PageAlignedAddr),
+        MProtectLen(MProtectLen),
         MustCleanup(false) {}
 
-  int MakeWriteable() {
+  int MakeWriteable() XRAY_NEVER_INSTRUMENT {
     auto R = mprotect(PageAlignedAddr, MProtectLen,
                       PROT_READ | PROT_WRITE | PROT_EXEC);
     if (R != -1)
@@ -55,7 +70,7 @@
     return R;
   }
 
-  ~MProtectHelper() {
+  ~MProtectHelper() XRAY_NEVER_INSTRUMENT {
     if (MustCleanup) {
       mprotect(PageAlignedAddr, MProtectLen, PROT_READ | PROT_EXEC);
     }
@@ -64,17 +79,11 @@
 
 } // namespace __xray
 
-extern "C" {
-// The following functions have to be defined in assembler, on a per-platform
-// basis. See xray_trampoline_*.s files for implementations.
-extern void __xray_FunctionEntry();
-extern void __xray_FunctionExit();
-}
-
 extern std::atomic<bool> XRayInitialized;
 extern std::atomic<__xray::XRaySledMap> XRayInstrMap;
 
-int __xray_set_handler(void (*entry)(int32_t, XRayEntryType)) {
+int __xray_set_handler(void (*entry)(int32_t,
+                                     XRayEntryType)) XRAY_NEVER_INSTRUMENT {
   if (XRayInitialized.load(std::memory_order_acquire)) {
     __xray::XRayPatchedFunction.store(entry, std::memory_order_release);
     return 1;
@@ -82,7 +91,9 @@
   return 0;
 }
 
-int __xray_remove_handler() { return __xray_set_handler(nullptr); }
+int __xray_remove_handler() XRAY_NEVER_INSTRUMENT {
+  return __xray_set_handler(nullptr);
+}
 
 std::atomic<bool> XRayPatching{false};
 
@@ -94,22 +105,24 @@
   Function Fn;
 
 public:
-  explicit CleanupInvoker(Function Fn) : Fn(Fn) {}
-  CleanupInvoker(const CleanupInvoker &) = default;
-  CleanupInvoker(CleanupInvoker &&) = default;
-  CleanupInvoker &operator=(const CleanupInvoker &) = delete;
-  CleanupInvoker &operator=(CleanupInvoker &&) = delete;
-  ~CleanupInvoker() { Fn(); }
+  explicit CleanupInvoker(Function Fn) XRAY_NEVER_INSTRUMENT : Fn(Fn) {}
+  CleanupInvoker(const CleanupInvoker &) XRAY_NEVER_INSTRUMENT = default;
+  CleanupInvoker(CleanupInvoker &&) XRAY_NEVER_INSTRUMENT = default;
+  CleanupInvoker &
+  operator=(const CleanupInvoker &) XRAY_NEVER_INSTRUMENT = delete;
+  CleanupInvoker &operator=(CleanupInvoker &&) XRAY_NEVER_INSTRUMENT = delete;
+  ~CleanupInvoker() XRAY_NEVER_INSTRUMENT { Fn(); }
 };
 
-template <class Function> CleanupInvoker<Function> ScopeCleanup(Function Fn) {
+template <class Function>
+CleanupInvoker<Function> ScopeCleanup(Function Fn) XRAY_NEVER_INSTRUMENT {
   return CleanupInvoker<Function>{Fn};
 }
 
 // ControlPatching implements the common internals of the patching/unpatching
 // implementation. |Enable| defines whether we're enabling or disabling the
 // runtime XRay instrumentation.
-XRayPatchingStatus ControlPatching(bool Enable) {
+XRayPatchingStatus ControlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
   if (!XRayInitialized.load(std::memory_order_acquire))
     return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
 
@@ -133,12 +146,13 @@
   if (InstrMap.Entries == 0)
     return XRayPatchingStatus::NOT_INITIALIZED;
 
-  int32_t FuncId = 1;
-  static constexpr uint8_t CallOpCode = 0xe8;
-  static constexpr uint16_t MovR10Seq = 0xba41;
-  static constexpr uint16_t Jmp9Seq = 0x09eb;
-  static constexpr uint8_t JmpOpCode = 0xe9;
-  static constexpr uint8_t RetOpCode = 0xc3;
+  const uint64_t PageSize = GetPageSizeCached();
+  if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) {
+    Report("System page size is not a power of two: %lld\n", PageSize);
+    return XRayPatchingStatus::FAILED;
+  }
+
+  uint32_t FuncId = 1;
   uint64_t CurFun = 0;
   for (std::size_t I = 0; I < InstrMap.Entries; I++) {
     auto Sled = InstrMap.Sleds[I];
@@ -153,118 +167,41 @@
     // While we're here, we should patch the nop sled. To do that we mprotect
     // the page containing the function to be writeable.
     void *PageAlignedAddr =
-        reinterpret_cast<void *>(Sled.Address & ~((2 << 16) - 1));
-    std::size_t MProtectLen =
-        (Sled.Address + 12) - reinterpret_cast<uint64_t>(PageAlignedAddr);
+        reinterpret_cast<void *>(Sled.Address & ~(PageSize - 1));
+    std::size_t MProtectLen = (Sled.Address + cSledLength) -
+                              reinterpret_cast<uint64_t>(PageAlignedAddr);
     MProtectHelper Protector(PageAlignedAddr, MProtectLen);
     if (Protector.MakeWriteable() == -1) {
       printf("Failed mprotect: %d\n", errno);
       return XRayPatchingStatus::FAILED;
     }
 
-    static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
-    static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
-    if (Sled.Kind == XRayEntryType::ENTRY) {
-      // FIXME: Implement this in a more extensible manner, per-platform.
-      // Here we do the dance of replacing the following sled:
-      //
-      // xray_sled_n:
-      //   jmp +9
-      //   <9 byte nop>
-      //
-      // With the following:
-      //
-      //   mov r10d, <function id>
-      //   call <relative 32bit offset to entry trampoline>
-      //
-      // We need to do this in the following order:
-      //
-      // 1. Put the function id first, 2 bytes from the start of the sled (just
-      // after the 2-byte jmp instruction).
-      // 2. Put the call opcode 6 bytes from the start of the sled.
-      // 3. Put the relative offset 7 bytes from the start of the sled.
-      // 4. Do an atomic write over the jmp instruction for the "mov r10d"
-      // opcode and first operand.
-      //
-      // Prerequisite is to compute the relative offset to the
-      // __xray_FunctionEntry function's address.
-      int64_t TrampolineOffset =
-          reinterpret_cast<int64_t>(__xray_FunctionEntry) -
-          (static_cast<int64_t>(Sled.Address) + 11);
-      if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
-        Report("XRay Entry trampoline (%p) too far from sled (%p); distance = "
-               "%ld\n",
-               __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address),
-               TrampolineOffset);
-        continue;
-      }
-      if (Enable) {
-        *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
-        *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
-        *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
-        std::atomic_store_explicit(
-            reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
-            std::memory_order_release);
-      } else {
-        std::atomic_store_explicit(
-            reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
-            std::memory_order_release);
-        // FIXME: Write out the nops still?
-      }
+    bool Success = false;
+    switch (Sled.Kind) {
+    case XRayEntryType::ENTRY:
+      Success = patchFunctionEntry(Enable, FuncId, Sled);
+      break;
+    case XRayEntryType::EXIT:
+      Success = patchFunctionExit(Enable, FuncId, Sled);
+      break;
+    case XRayEntryType::TAIL:
+      Success = patchFunctionTailExit(Enable, FuncId, Sled);
+      break;
+    default:
+      Report("Unsupported sled kind: %d\n", int(Sled.Kind));
+      continue;
     }
-
-    if (Sled.Kind == XRayEntryType::EXIT) {
-      // FIXME: Implement this in a more extensible manner, per-platform.
-      // Here we do the dance of replacing the following sled:
-      //
-      // xray_sled_n:
-      //   ret
-      //   <10 byte nop>
-      //
-      // With the following:
-      //
-      //   mov r10d, <function id>
-      //   jmp <relative 32bit offset to exit trampoline>
-      //
-      // 1. Put the function id first, 2 bytes from the start of the sled (just
-      // after the 1-byte ret instruction).
-      // 2. Put the jmp opcode 6 bytes from the start of the sled.
-      // 3. Put the relative offset 7 bytes from the start of the sled.
-      // 4. Do an atomic write over the jmp instruction for the "mov r10d"
-      // opcode and first operand.
-      //
-      // Prerequisite is to compute the relative offset fo the
-      // __xray_FunctionExit function's address.
-      int64_t TrampolineOffset =
-          reinterpret_cast<int64_t>(__xray_FunctionExit) -
-          (static_cast<int64_t>(Sled.Address) + 11);
-      if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
-        Report("XRay Exit trampoline (%p) too far from sled (%p); distance = "
-               "%ld\n",
-               __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address),
-               TrampolineOffset);
-        continue;
-      }
-      if (Enable) {
-        *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
-        *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
-        *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
-        std::atomic_store_explicit(
-            reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
-            std::memory_order_release);
-      } else {
-        std::atomic_store_explicit(
-            reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
-            std::memory_order_release);
-        // FIXME: Write out the nops still?
-      }
-    }
+    (void)Success;
   }
   XRayPatching.store(false, std::memory_order_release);
   PatchingSuccess = true;
   return XRayPatchingStatus::SUCCESS;
 }
 
-XRayPatchingStatus __xray_patch() { return ControlPatching(true); }
+XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT {
+  return ControlPatching(true);
+}
 
-XRayPatchingStatus __xray_unpatch() { return ControlPatching(false); }
+XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT {
+  return ControlPatching(false);
+}
diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h
index 6208c11..a8434a6 100644
--- a/lib/xray/xray_interface_internal.h
+++ b/lib/xray/xray_interface_internal.h
@@ -15,6 +15,7 @@
 #ifndef XRAY_INTERFACE_INTERNAL_H
 #define XRAY_INTERFACE_INTERNAL_H
 
+#include "sanitizer_common/sanitizer_platform.h"
 #include "xray/xray_interface.h"
 #include <cstddef>
 #include <cstdint>
@@ -22,11 +23,21 @@
 extern "C" {
 
 struct XRaySledEntry {
+#if SANITIZER_WORDSIZE == 64
   uint64_t Address;
   uint64_t Function;
   unsigned char Kind;
   unsigned char AlwaysInstrument;
   unsigned char Padding[14]; // Need 32 bytes
+#elif SANITIZER_WORDSIZE == 32
+  uint32_t Address;
+  uint32_t Function;
+  unsigned char Kind;
+  unsigned char AlwaysInstrument;
+  unsigned char Padding[6]; // Need 16 bytes
+#else
+#error "Unsupported word size."
+#endif
 };
 }
 
@@ -37,6 +48,22 @@
   size_t Entries;
 };
 
+uint64_t cycleFrequency();
+
+bool patchFunctionEntry(bool Enable, uint32_t FuncId,
+                        const XRaySledEntry &Sled);
+bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
+                           const XRaySledEntry &Sled);
+
 } // namespace __xray
 
+extern "C" {
+// The following functions have to be defined in assembler, on a per-platform
+// basis. See xray_trampoline_*.S files for implementations.
+extern void __xray_FunctionEntry();
+extern void __xray_FunctionExit();
+extern void __xray_FunctionTailExit();
+}
+
 #endif
diff --git a/lib/xray/xray_trampoline_AArch64.S b/lib/xray/xray_trampoline_AArch64.S
new file mode 100644
index 0000000..f1a471c
--- /dev/null
+++ b/lib/xray/xray_trampoline_AArch64.S
@@ -0,0 +1,89 @@
+    .text
+    /* The variable containing the handler function pointer */
+    .global _ZN6__xray19XRayPatchedFunctionE
+    /* Word-aligned function entry point */
+    .p2align 2
+    /* Let C/C++ see the symbol */
+    .global __xray_FunctionEntry
+    .type __xray_FunctionEntry, %function
+    /* In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with
+         FuncId passed in W0 register. */
+__xray_FunctionEntry:
+    /* Move the return address beyond the end of sled data. The 12 bytes of
+         data are inserted in the code of the runtime patch, between the call
+         instruction and the instruction returned into. The data contains 32
+         bits of instrumented function ID and 64 bits of the address of
+         the current trampoline. */
+    ADD X30, X30, #12
+    /* Push the registers which may be modified by the handler function */
+    STP X1, X2, [SP, #-16]!
+    STP X3, X4, [SP, #-16]!
+    STP X5, X6, [SP, #-16]!
+    STP X7, X30, [SP, #-16]!
+    STP Q0, Q1, [SP, #-32]!
+    STP Q2, Q3, [SP, #-32]!
+    STP Q4, Q5, [SP, #-32]!
+    STP Q6, Q7, [SP, #-32]!
+    /* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */
+    LDR X1, =_ZN6__xray19XRayPatchedFunctionE
+    /* Load the handler function pointer into X2 */
+    LDR X2, [X1]
+    /* Handler address is nullptr if handler is not set */
+    CMP X2, #0
+    BEQ FunctionEntry_restore
+    /* Function ID is already in W0 (the first parameter).
+         X1=0 means that we are tracing an entry event */
+    MOV X1, #0
+    /* Call the handler with 2 parameters in W0 and X1 */
+    BLR X2
+FunctionEntry_restore:
+    /* Pop the saved registers */
+    LDP Q6, Q7, [SP], #32
+    LDP Q4, Q5, [SP], #32
+    LDP Q2, Q3, [SP], #32
+    LDP Q0, Q1, [SP], #32
+    LDP X7, X30, [SP], #16
+    LDP X5, X6, [SP], #16
+    LDP X3, X4, [SP], #16
+    LDP X1, X2, [SP], #16
+    RET
+
+    /* Word-aligned function entry point */
+    .p2align 2
+    /* Let C/C++ see the symbol */
+    .global __xray_FunctionExit
+    .type __xray_FunctionExit, %function
+    /* In C++ it is void extern "C" __xray_FunctionExit(uint32_t FuncId) with
+         FuncId passed in W0 register. */
+__xray_FunctionExit:
+    /* Move the return address beyond the end of sled data. The 12 bytes of
+         data are inserted in the code of the runtime patch, between the call
+         instruction and the instruction returned into. The data contains 32
+         bits of instrumented function ID and 64 bits of the address of
+         the current trampoline. */
+    ADD X30, X30, #12
+    /* Push the registers which may be modified by the handler function */
+    STP X1, X2, [SP, #-16]!
+    STP X3, X4, [SP, #-16]!
+    STP X5, X6, [SP, #-16]!
+    STP X7, X30, [SP, #-16]!
+    STR Q0, [SP, #-16]!
+    /* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */
+    LDR X1, =_ZN6__xray19XRayPatchedFunctionE
+    /* Load the handler function pointer into X2 */
+    LDR X2, [X1]
+    /* Handler address is nullptr if handler is not set */
+    CMP X2, #0
+    BEQ FunctionExit_restore
+    /* Function ID is already in W0 (the first parameter).
+         X1=1 means that we are tracing an exit event */
+    MOV X1, #1
+    /* Call the handler with 2 parameters in W0 and X1 */
+    BLR X2
+FunctionExit_restore:
+    LDR Q0, [SP], #16
+    LDP X7, X30, [SP], #16
+    LDP X5, X6, [SP], #16
+    LDP X3, X4, [SP], #16
+    LDP X1, X2, [SP], #16
+    RET
diff --git a/lib/xray/xray_trampoline_arm.S b/lib/xray/xray_trampoline_arm.S
new file mode 100644
index 0000000..5d87c97
--- /dev/null
+++ b/lib/xray/xray_trampoline_arm.S
@@ -0,0 +1,65 @@
+    .syntax unified
+    .arch armv6t2
+    .fpu vfpv2
+    .code 32
+    .global _ZN6__xray19XRayPatchedFunctionE
+    @ Word-aligned function entry point
+    .p2align 2
+    @ Let C/C++ see the symbol
+    .global __xray_FunctionEntry
+    @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc)
+    @ Assume that "q" part of the floating-point registers is not used
+    @   for passing parameters to C/C++ functions.
+    .type __xray_FunctionEntry, %function
+    @ In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with
+    @   FuncId passed in r0 register.
+__xray_FunctionEntry:
+    PUSH {r1-r3,lr}
+    @ Save floating-point parameters of the instrumented function
+    VPUSH {d0-d7}
+    MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE
+    MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE
+    LDR r2, [r1]
+    @ Handler address is nullptr if handler is not set
+    CMP r2, #0
+    BEQ FunctionEntry_restore
+    @ Function ID is already in r0 (the first parameter).
+    @ r1=0 means that we are tracing an entry event
+    MOV r1, #0
+    @ Call the handler with 2 parameters in r0 and r1
+    BLX r2
+FunctionEntry_restore:
+    @ Restore floating-point parameters of the instrumented function
+    VPOP {d0-d7}
+    POP {r1-r3,pc}
+
+    @ Word-aligned function entry point
+    .p2align 2
+    @ Let C/C++ see the symbol
+	.global __xray_FunctionExit
+	@ Assume that d1-d7 are not used for the return value.
+    @ Assume that "q" part of the floating-point registers is not used for the
+    @   return value in C/C++.
+	.type __xray_FunctionExit, %function
+	@ In C++ it is extern "C" void __xray_FunctionExit(uint32_t FuncId) with
+    @   FuncId passed in r0 register.
+__xray_FunctionExit:
+    PUSH {r1-r3,lr}
+    @ Save the floating-point return value of the instrumented function
+    VPUSH {d0}
+    @ Load the handler address
+    MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE
+    MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE
+    LDR r2, [r1]
+    @ Handler address is nullptr if handler is not set
+    CMP r2, #0
+    BEQ FunctionExit_restore
+    @ Function ID is already in r0 (the first parameter).
+    @ 1 means that we are tracing an exit event
+    MOV r1, #1
+    @ Call the handler with 2 parameters in r0 and r1
+    BLX r2
+FunctionExit_restore:
+    @ Restore the floating-point return value of the instrumented function
+    VPOP {d0}
+    POP {r1-r3,pc}
diff --git a/lib/xray/xray_trampoline_x86.S b/lib/xray/xray_trampoline_x86_64.S
similarity index 66%
rename from lib/xray/xray_trampoline_x86.S
rename to lib/xray/xray_trampoline_x86_64.S
index 8580396..d90c30c 100644
--- a/lib/xray/xray_trampoline_x86.S
+++ b/lib/xray/xray_trampoline_x86_64.S
@@ -13,17 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-	.text
-	.file "xray_trampoline_x86.S"
-	.globl __xray_FunctionEntry
-	.align 16, 0x90
-	.type __xray_FunctionEntry,@function
-
-__xray_FunctionEntry:
-  .cfi_startproc
-  // Save caller provided registers before doing any actual work.
-	pushq %rbp
-	.cfi_def_cfa_offset 16
+.macro SAVE_REGISTERS
 	subq $200, %rsp
 	movupd	%xmm0, 184(%rsp)
 	movupd	%xmm1, 168(%rsp)
@@ -34,25 +24,15 @@
 	movupd	%xmm6, 88(%rsp)
 	movupd	%xmm7, 72(%rsp)
 	movq	%rdi, 64(%rsp)
-	movq  %rax, 56(%rsp)
-	movq  %rdx, 48(%rsp)
+	movq	%rax, 56(%rsp)
+	movq	%rdx, 48(%rsp)
 	movq	%rsi, 40(%rsp)
 	movq	%rcx, 32(%rsp)
 	movq	%r8, 24(%rsp)
 	movq	%r9, 16(%rsp)
+.endm
 
-	// de-mangled, that's __xray::XRayPatchedFunction, and we're doing an acquire
-	// load (on x86 is a normal mov instruction).
-	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
-	testq	%rax, %rax
-	je	.Ltmp0
-
-	// assume that %r10d has the function id.
-	movl	%r10d, %edi
-	xor	%esi,%esi
-	callq	*%rax
-.Ltmp0:
-  // restore the registers
+.macro RESTORE_REGISTERS
 	movupd	184(%rsp), %xmm0
 	movupd	168(%rsp), %xmm1
 	movupd	152(%rsp), %xmm2
@@ -62,13 +42,39 @@
 	movupd	88(%rsp) , %xmm6
 	movupd	72(%rsp) , %xmm7
 	movq	64(%rsp), %rdi
-	movq  56(%rsp), %rax
-	movq  48(%rsp), %rdx
+	movq	56(%rsp), %rax
+	movq	48(%rsp), %rdx
 	movq	40(%rsp), %rsi
 	movq	32(%rsp), %rcx
 	movq	24(%rsp), %r8
 	movq	16(%rsp), %r9
 	addq	$200, %rsp
+.endm
+
+	.text
+	.file "xray_trampoline_x86.S"
+	.globl __xray_FunctionEntry
+	.align 16, 0x90
+	.type __xray_FunctionEntry,@function
+
+__xray_FunctionEntry:
+	.cfi_startproc
+	pushq %rbp
+	.cfi_def_cfa_offset 16
+	SAVE_REGISTERS
+
+	// This load has to be atomic, it's concurrent with __xray_patch().
+	// On x86/amd64, a simple (type-aligned) MOV instruction is enough.
+	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+	testq	%rax, %rax
+	je	.Ltmp0
+
+	// The patched function prolog puts its xray_instr_map index into %r10d.
+	movl	%r10d, %edi
+	xor	%esi,%esi
+	callq	*%rax
+.Ltmp0:
+	RESTORE_REGISTERS
 	popq	%rbp
 	retq
 .Ltmp1:
@@ -99,7 +105,7 @@
 	movl	$1, %esi
 	callq	*%rax
 .Ltmp2:
-  // Restore the important registers.
+	// Restore the important registers.
 	movupd	40(%rsp), %xmm0
 	movupd	24(%rsp), %xmm1
 	movq	16(%rsp), %rax
@@ -110,3 +116,32 @@
 .Ltmp3:
 	.size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit
 	.cfi_endproc
+
+	.global __xray_FunctionTailExit
+	.align 16, 0x90
+	.type __xray_FunctionTailExit,@function
+__xray_FunctionTailExit:
+	.cfi_startproc
+	// Save the important registers as in the entry trampoline, but indicate that
+	// this is an exit. In the future, we will introduce a new entry type that
+	// differentiates between a normal exit and a tail exit, but we'd have to do
+	// this and increment the version number for the header.
+	pushq %rbp
+	.cfi_def_cfa_offset 16
+	SAVE_REGISTERS
+
+	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+	testq %rax,%rax
+	je	.Ltmp4
+
+	movl	%r10d, %edi
+	movl	$1, %esi
+	callq	*%rax
+
+.Ltmp4:
+	RESTORE_REGISTERS
+	popq	%rbp
+	retq
+.Ltmp5:
+	.size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit
+	.cfi_endproc
diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc
new file mode 100644
index 0000000..3ee9189
--- /dev/null
+++ b/lib/xray/xray_x86_64.cc
@@ -0,0 +1,202 @@
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+#include <cstdint>
+#include <errno.h>
+#include <fcntl.h>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <unistd.h>
+
+namespace __xray {
+
+static std::pair<ssize_t, bool>
+retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
+  auto BytesToRead = std::distance(Begin, End);
+  ssize_t BytesRead;
+  ssize_t TotalBytesRead = 0;
+  while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
+    if (BytesRead == -1) {
+      if (errno == EINTR)
+        continue;
+      Report("Read error; errno = %d\n", errno);
+      return std::make_pair(TotalBytesRead, false);
+    }
+
+    TotalBytesRead += BytesRead;
+    BytesToRead -= BytesRead;
+    Begin += BytesRead;
+  }
+  return std::make_pair(TotalBytesRead, true);
+}
+
+static bool readValueFromFile(const char *Filename,
+                              long long *Value) XRAY_NEVER_INSTRUMENT {
+  int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
+  if (Fd == -1)
+    return false;
+  static constexpr size_t BufSize = 256;
+  char Line[BufSize] = {};
+  ssize_t BytesRead;
+  bool Success;
+  std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
+  if (!Success)
+    return false;
+  close(Fd);
+  char *End = nullptr;
+  long long Tmp = internal_simple_strtoll(Line, &End, 10);
+  bool Result = false;
+  if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
+    *Value = Tmp;
+    Result = true;
+  }
+  return Result;
+}
+
+uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
+  long long CPUFrequency = -1;
+  if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
+                        &CPUFrequency)) {
+    CPUFrequency *= 1000;
+  } else if (readValueFromFile(
+      "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+      &CPUFrequency)) {
+    CPUFrequency *= 1000;
+  } else {
+    Report("Unable to determine CPU frequency for TSC accounting.\n");
+  }
+  return CPUFrequency == -1 ? 0 : static_cast<uint64_t>(CPUFrequency);
+}
+
+static constexpr uint8_t CallOpCode = 0xe8;
+static constexpr uint16_t MovR10Seq = 0xba41;
+static constexpr uint16_t Jmp9Seq = 0x09eb;
+static constexpr uint8_t JmpOpCode = 0xe9;
+static constexpr uint8_t RetOpCode = 0xc3;
+
+static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
+static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+                        const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // Here we do the dance of replacing the following sled:
+  //
+  // xray_sled_n:
+  //   jmp +9
+  //   <9 byte nop>
+  //
+  // With the following:
+  //
+  //   mov r10d, <function id>
+  //   call <relative 32bit offset to entry trampoline>
+  //
+  // We need to do this in the following order:
+  //
+  // 1. Put the function id first, 2 bytes from the start of the sled (just
+  // after the 2-byte jmp instruction).
+  // 2. Put the call opcode 6 bytes from the start of the sled.
+  // 3. Put the relative offset 7 bytes from the start of the sled.
+  // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+  // opcode and first operand.
+  //
+  // Prerequisite is to compute the relative offset to the
+  // __xray_FunctionEntry function's address.
+  int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionEntry) -
+                             (static_cast<int64_t>(Sled.Address) + 11);
+  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+    Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
+           __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address));
+    return false;
+  }
+  if (Enable) {
+    *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+    *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
+    *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+        std::memory_order_release);
+  } else {
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
+        std::memory_order_release);
+    // FIXME: Write out the nops still?
+  }
+  return true;
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // Here we do the dance of replacing the following sled:
+  //
+  // xray_sled_n:
+  //   ret
+  //   <10 byte nop>
+  //
+  // With the following:
+  //
+  //   mov r10d, <function id>
+  //   jmp <relative 32bit offset to exit trampoline>
+  //
+  // 1. Put the function id first, 2 bytes from the start of the sled (just
+  // after the 1-byte ret instruction).
+  // 2. Put the jmp opcode 6 bytes from the start of the sled.
+  // 3. Put the relative offset 7 bytes from the start of the sled.
+  // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+  // opcode and first operand.
+  //
+  // Prerequisite is to compute the relative offset fo the
+  // __xray_FunctionExit function's address.
+  int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
+                             (static_cast<int64_t>(Sled.Address) + 11);
+  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+    Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
+           __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
+    return false;
+  }
+  if (Enable) {
+    *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+    *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
+    *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+        std::memory_order_release);
+  } else {
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
+        std::memory_order_release);
+    // FIXME: Write out the nops still?
+  }
+  return true;
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // Here we do the dance of replacing the tail call sled with a similar
+  // sequence as the entry sled, but calls the tail exit sled instead.
+  int64_t TrampolineOffset =
+      reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
+      (static_cast<int64_t>(Sled.Address) + 11);
+  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+    Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
+           __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
+    return false;
+  }
+  if (Enable) {
+    *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+    *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
+    *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+        std::memory_order_release);
+  } else {
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
+        std::memory_order_release);
+    // FIXME: Write out the nops still?
+  }
+  return true;
+}
+
+} // namespace __xray
diff --git a/lib/xray/xray_x86_64.h b/lib/xray/xray_x86_64.h
new file mode 100644
index 0000000..52d2dea
--- /dev/null
+++ b/lib/xray/xray_x86_64.h
@@ -0,0 +1,32 @@
+//===-- xray_x86_64.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_X86_64_H
+#define XRAY_X86_64_H
+
+#include <cstdint>
+#include <x86intrin.h>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "xray_defs.h"
+
+namespace __xray {
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+  unsigned LongCPU;
+  uint64_t TSC = __rdtscp(&LongCPU);
+  CPU = LongCPU;
+  return TSC;
+}
+}
+
+#endif // XRAY_X86_64_H
diff --git a/make/AppleBI.mk b/make/AppleBI.mk
deleted file mode 100644
index b236152..0000000
--- a/make/AppleBI.mk
+++ /dev/null
@@ -1,149 +0,0 @@
-
-#
-# Make rules to build compiler_rt in Apple B&I infrastructure
-#
-
-# set ProjSrcRoot appropriately
-ProjSrcRoot := $(SRCROOT)
-# set ProjObjRoot appropriately
-ifdef OBJROOT
-  ProjObjRoot := $(OBJROOT)
-else
-  ProjObjRoot := $(ProjSrcRoot)
-endif
-
-ifeq (,$(RC_PURPLE))
-	INSTALL_TARGET = install-MacOSX
-else
-  ifeq (,$(RC_INDIGO))
-    INSTALL_TARGET = install-iOS
-  else
-    INSTALL_TARGET = install-iOS-Simulator
-  endif
-endif
-
-
-
-# Log full compile lines in B&I logs and omit summary lines.
-Verb :=
-Summary := @true
-
-# List of functions needed for each architecture.
-
-# Copies any public headers to DSTROOT.
-installhdrs:
-
-
-# Copies source code to SRCROOT.
-installsrc:
-	cp -r . $(SRCROOT)
-
-
-install:  $(INSTALL_TARGET)
-
-# Copy results to DSTROOT.
-install-MacOSX : $(SYMROOT)/libcompiler_rt.dylib \
-                 $(SYMROOT)/libcompiler_rt-dyld.a 
-	mkdir -p $(DSTROOT)/usr/local/lib/dyld
-	cp $(SYMROOT)/libcompiler_rt-dyld.a  \
-				    $(DSTROOT)/usr/local/lib/dyld/libcompiler_rt.a
-	mkdir -p $(DSTROOT)/usr/lib/system
-	$(call GetCNAVar,STRIP,Platform.darwin_bni,Release,) -S $(SYMROOT)/libcompiler_rt.dylib \
-	    -o $(DSTROOT)/usr/lib/system/libcompiler_rt.dylib
-	cd $(DSTROOT)/usr/lib/system; \
-	    ln -s libcompiler_rt.dylib libcompiler_rt_profile.dylib; \
-	    ln -s libcompiler_rt.dylib libcompiler_rt_debug.dylib
-
-# Rule to make each dylib slice
-$(OBJROOT)/libcompiler_rt-%.dylib : $(OBJROOT)/darwin_bni/Release/%/libcompiler_rt.a
-	echo "const char vers[] = \"@(#) $(RC_ProjectName)-$(RC_ProjectSourceVersion)\"; " > $(OBJROOT)/version.c
-	$(call GetCNAVar,CC,Platform.darwin_bni,Release,$*) \
-	   $(OBJROOT)/version.c -arch $* -dynamiclib \
-	   -install_name /usr/lib/system/libcompiler_rt.dylib \
-	   -compatibility_version 1 -current_version $(RC_ProjectSourceVersion) \
-	   -nodefaultlibs -umbrella System -dead_strip \
-	   -Wl,-upward-lunwind \
-	   -Wl,-upward-lsystem_m \
-	   -Wl,-upward-lsystem_c \
-	   -Wl,-upward-lsystem_kernel \
-	   -Wl,-upward-lsystem_platform \
-	   -Wl,-ldyld \
-	   -L$(SDKROOT)/usr/lib/system \
-	   $(DYLIB_FLAGS) -Wl,-force_load,$^ -o $@ 
-
-# Rule to make fat dylib
-$(SYMROOT)/libcompiler_rt.dylib: $(foreach arch,$(filter-out armv4t,$(RC_ARCHS)), \
-                                        $(OBJROOT)/libcompiler_rt-$(arch).dylib)
-	$(call GetCNAVar,LIPO,Platform.darwin_bni,Release,) -create $^ -o  $@
-	$(call GetCNAVar,DSYMUTIL,Platform.darwin_bni,Release,) $@
-
-
-# Copy results to DSTROOT.
-install-iOS: $(SYMROOT)/libcompiler_rt-static.a \
-             $(SYMROOT)/libcompiler_rt-dyld.a \
-             $(SYMROOT)/libcompiler_rt.dylib
-	mkdir -p $(DSTROOT)/usr/local/lib
-	cp $(SYMROOT)/libcompiler_rt-static.a  \
-				    $(DSTROOT)/usr/local/lib/libcompiler_rt-static.a
-	mkdir -p $(DSTROOT)/usr/local/lib/dyld
-	cp $(SYMROOT)/libcompiler_rt-dyld.a  \
-				    $(DSTROOT)/usr/local/lib/dyld/libcompiler_rt.a
-	mkdir -p $(DSTROOT)/usr/lib/system
-	$(call GetCNAVar,STRIP,Platform.darwin_bni,Release,) -S $(SYMROOT)/libcompiler_rt.dylib \
-	    -o $(DSTROOT)/usr/lib/system/libcompiler_rt.dylib
-
-# Rule to make fat archive
-$(SYMROOT)/libcompiler_rt-static.a : $(foreach arch,$(RC_ARCHS), \
-                         $(OBJROOT)/darwin_bni/Static/$(arch)/libcompiler_rt.a)
-	$(call GetCNAVar,LIPO,Platform.darwin_bni,Release,) -create $^ -o  $@
-
-# rule to make each archive slice for dyld (which removes a few archive members)
-$(OBJROOT)/libcompiler_rt-dyld-%.a : $(OBJROOT)/darwin_bni/Release/%/libcompiler_rt.a
-	cp $^ $@
-	DEL_LIST=`$(AR)  -t $@ | egrep 'apple_versioning|gcc_personality_v0|eprintf' | xargs echo` ; \
-	if [ -n "$${DEL_LIST}" ] ; \
-	then  \
-		$(call GetCNAVar,AR,Platform.darwin_bni,Release,) -d $@ $${DEL_LIST}; \
-		$(call GetCNAVar,RANLIB,Platform.darwin_bni,Release,) $@ ; \
-	fi
-
-# rule to make make archive for dyld
-$(SYMROOT)/libcompiler_rt-dyld.a : $(foreach arch,$(RC_ARCHS), \
-                         $(OBJROOT)/libcompiler_rt-dyld-$(arch).a)
-	$(call GetCNAVar,LIPO,Platform.darwin_bni,Release,) -create $^ -o  $@
-
-
-
-# Copy results to DSTROOT.
-install-iOS-Simulator: $(SYMROOT)/libcompiler_rt_sim.dylib \
-                       $(SYMROOT)/libcompiler_rt-dyld.a
-	mkdir -p $(DSTROOT)/$(SDKROOT)/usr/lib/system
-	$(call GetCNAVar,STRIP,Platform.darwin_bni,Release,) -S $(SYMROOT)/libcompiler_rt_sim.dylib \
-	    -o $(DSTROOT)/$(SDKROOT)/usr/lib/system/libcompiler_rt_sim.dylib
-	mkdir -p $(DSTROOT)/$(SDKROOT)/usr/local/lib/dyld
-	cp $(SYMROOT)/libcompiler_rt-dyld.a  \
-				    $(DSTROOT)/$(SDKROOT)/usr/local/lib/dyld/libcompiler_rt.a
-  
-# Rule to make fat dylib
-$(SYMROOT)/libcompiler_rt_sim.dylib: $(foreach arch,$(RC_ARCHS), \
-                                        $(OBJROOT)/libcompiler_rt_sim-$(arch).dylib)
-	$(call GetCNAVar,LIPO,Platform.darwin_bni,Release,) -create $^ -o  $@
-	$(call GetCNAVar,DSYMUTIL,Platform.darwin_bni,Release,) $@
-
-# Rule to make each dylib slice
-$(OBJROOT)/libcompiler_rt_sim-%.dylib : $(OBJROOT)/darwin_bni/Release/%/libcompiler_rt.a
-	echo "const char vers[] = \"@(#) $(RC_ProjectName)-$(RC_ProjectSourceVersion)\"; " > $(OBJROOT)/version.c
-	$(call GetCNAVar,CC,Platform.darwin_bni,Release,$*) \
-	   $(OBJROOT)/version.c -arch $* -dynamiclib \
-	   -install_name /usr/lib/system/libcompiler_rt_sim.dylib \
-	   -compatibility_version 1 -current_version $(RC_ProjectSourceVersion) \
-     -Wl,-unexported_symbol,___enable_execute_stack \
-	   -nostdlib \
-	   -Wl,-upward-lunwind_sim \
-	   -Wl,-upward-lsystem_sim_m \
-	   -Wl,-upward-lsystem_sim_c \
-	   -ldyld_sim \
-	   -Wl,-upward-lSystem \
-	   -umbrella System -Wl,-no_implicit_dylibs -L$(SDKROOT)/usr/lib/system -dead_strip \
-	   $(DYLIB_FLAGS) -Wl,-force_load,$^ -o $@ 
-
diff --git a/make/config.mk b/make/config.mk
deleted file mode 100644
index 094fd16..0000000
--- a/make/config.mk
+++ /dev/null
@@ -1,49 +0,0 @@
-###
-# Configuration variables.
-
-OS := $(shell uname)
-
-# Assume make is always run from top-level of source directory. Note than an
-# Apple style build overrides these variables later in the makefile.
-ProjSrcRoot := $(shell pwd)
-ProjObjRoot := $(ProjSrcRoot)
-
-# The list of modules which are required to be built into every library. This
-# should only be used for internal utilities which could be used in any other
-# module. Any other cases the platform should be allowed to opt-in to.
-AlwaysRequiredModules := int_util
-
-###
-# Tool configuration variables.
-
-# FIXME: LLVM uses autoconf/mkinstalldirs ?
-MKDIR := mkdir -p
-DATE := date
-LIPO := lipo
-CP := cp
-DSYMUTIL := dsymutil
-
-VERBOSE := 0
-DEBUGMAKE :=
-
-###
-# Automatic and derived variables.
-
-# Adjust settings for verbose mode
-ifneq ($(VERBOSE),1)
-  Verb := @
-else
-  Verb :=
-endif
-
-Echo := @echo
-ifndef Summary
-  Summary = $(Echo)
-endif
-
-###
-# Common compiler options
-COMMON_INCLUDES=-I${ProjSrcRoot}/lib -I${ProjSrcRoot}/include
-COMMON_CXXFLAGS=-std=c++11 -fno-exceptions -fPIC -funwind-tables $(COMMON_INCLUDES)
-COMMON_CFLAGS=-fPIC $(COMMON_INCLUDES)
-COMMON_ASMFLAGS=$(COMMON_INCLUDES)
diff --git a/make/filter-inputs b/make/filter-inputs
deleted file mode 100755
index 8a6bbe2..0000000
--- a/make/filter-inputs
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python
-
-#===- make/filter-inputs ---------------------------------------------------===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-# Given a list of files, return a new list of files taking only the
-# first file for any particular filename.
-def main():
-    import os,sys
-    
-    seen = set()
-    for file in sys.argv[1:]:
-        base = os.path.basename(file)
-        if base not in seen:
-            seen.add(base)
-            print file
-
-if __name__ == '__main__':
-    main()
diff --git a/make/lib_info.mk b/make/lib_info.mk
deleted file mode 100644
index 31850f7..0000000
--- a/make/lib_info.mk
+++ /dev/null
@@ -1,59 +0,0 @@
-# compiler-rt Library Info
-#
-# This should be included once the subdirectory information has been loaded, and
-# uses the utilities in 'util.mk'.
-#
-# This defines the following variables describing compiler-rt:
-#   AvailableFunctions   - The entire list of function names (unmangled) the
-#                          library can provide.
-#   CommonFunctions      - The list of generic functions available.
-#   ArchFunctions.<arch> - The list of functions commonly available for
-#                          'arch'. This does not include any config specific
-#                          functions.
-#
-#   AvailableIn.<function> - The list of subdir keys where 'function' is
-#                            defined.
-
-# Determine the set of available modules.
-AvailableModules := $(sort $(foreach key,$(SubDirKeys),\
-	$($(key).ModuleName)))
-
-# Build a per-module map of subdir keys.
-$(foreach key,$(SubDirKeys),\
-	$(call Append,ModuleSubDirKeys.$($(key).ModuleName),$(key)))
-
-AvailableArchs := $(sort $(foreach key,$(SubDirKeys),\
-	$($(key).OnlyArchs)))
-
-AvailableFunctions := $(sort $(foreach key,$(SubDirKeys),\
-	$(basename $($(key).ObjNames))))
-
-CommonFunctions := $(sort\
-  $(foreach key,$(ModuleSubDirKeys.builtins),\
-    $(if $(call strneq,,$(strip $($(key).OnlyArchs) $($(key).OnlyConfigs))),,\
-         $(basename $($(key).ObjNames)))))
-
-# Compute common arch functions.
-$(foreach key,$(ModuleSubDirKeys.builtins),\
-  $(if $(call strneq,,$($(key).OnlyConfigs)),,\
-    $(foreach arch,$($(key).OnlyArchs),\
-      $(call Append,ArchFunctions.$(arch),$(sort \
-        $(basename $($(key).ObjNames)))))))
-
-# Compute arch only functions.
-$(foreach arch,$(AvailableArchs),\
-  $(call Set,ArchFunctions.$(arch),$(sort $(ArchFunctions.$(arch))))\
-  $(call Set,ArchOnlyFunctions.$(arch),\
-    $(call set_difference,$(ArchFunctions.$(arch)),$(CommonFunctions))))
-
-# Compute lists of where each function is available.
-$(foreach key,$(SubDirKeys),\
-  $(foreach fn,$(basename $($(key).ObjNames)),\
-    $(call Append,AvailableIn.$(fn),$(key))))
-
-# The names of all the available options.
-AvailableOptions := AR ARFLAGS \
-                    CC CFLAGS LDFLAGS FUNCTIONS OPTIMIZED \
-                    RANLIB RANLIBFLAGS \
-                    VISIBILITY_HIDDEN KERNEL_USE \
-                    SHARED_LIBRARY SHARED_LIBRARY_SUFFIX STRIP LIPO DSYMUTIL
diff --git a/make/lib_platforms.mk b/make/lib_platforms.mk
deleted file mode 100644
index 9cf9704..0000000
--- a/make/lib_platforms.mk
+++ /dev/null
@@ -1,82 +0,0 @@
-# compiler-rt Configuration Support
-#
-# This should be included following 'lib_util.mk'.
-
-# The simple variables configurations can define.
-PlainConfigVariables := Configs Description
-PerConfigVariables := UniversalArchs Arch $(AvailableOptions)
-RequiredConfigVariables := Configs Description
-
-###
-# Load Platforms
-
-# Template: subdir_traverse_template subdir
-define load_platform_template
-$(call Set,PlatformName,$(basename $(notdir $(1))))
-ifneq ($(DEBUGMAKE),)
-  $$(info MAKE: $(PlatformName): Loading platform)
-endif
-
-# Construct the variable key for this directory.
-$(call Set,PlatformKey,Platform.$(PlatformName))
-$(call Append,PlatformKeys,$(PlatformKey))
-$(call Set,$(PlatformKey).Name,$(PlatformName))
-$(call Set,$(PlatformKey).Path,$(1))
-
-# Reset platform specific variables to sentinel value.
-$$(foreach var,$(PlainConfigVariables) $(PerConfigVariables),\
-  $$(call Set,$$(var),UNDEFINED))
-$$(foreach var,$(PerConfigVariables),\
-  $$(foreach config,$$(Configs),\
-    $$(call Set,$$(var).$$(config),UNDEFINED)))
-$$(foreach var,$(PerConfigVariables),\
-  $$(foreach arch,$(AvailableArchs),\
-    $$(call Set,$$(var).$$(arch),UNDEFINED)))
-
-# Get the platform variables.
-include make/options.mk
-include $(1)
-
-# Check for undefined required variables.
-$$(foreach var,$(RequiredConfigVariables),\
-  $$(if $$(call strneq,UNDEFINED,$$($$(var))),, \
-	$$(error $(Dir): variable '$$(var)' was not undefined)))
-
-# Check that exactly one of UniversalArchs or Arch was defined.
-$$(if $$(and $$(call strneq,UNDEFINED,$$(UniversalArchs)),\
-             $$(call strneq,UNDEFINED,$$(Arch))),\
-    $$(error '$(1)': cannot define both 'UniversalArchs' and 'Arch'))
-$$(if $$(or $$(call strneq,UNDEFINED,$$(UniversalArchs)),\
-            $$(call strneq,UNDEFINED,$$(Arch))),,\
-    $$(error '$(1)': must define one of 'UniversalArchs' and 'Arch'))
-
-# Collect all the platform variables for subsequent use.
-$$(foreach var,$(PlainConfigVariables) $(PerConfigVariables),\
-  $$(if $$(call strneq,UNDEFINED,$$($$(var))),\
-    $$(call CopyVariable,$$(var),$(PlatformKey).$$(var))))
-$$(foreach var,$(PerConfigVariables),\
-  $$(foreach config,$$(Configs),\
-    $$(if $$(call strneq,UNDEFINED,$$($$(var).$$(config))),\
-      $$(call CopyVariable,$$(var).$$(config),$(PlatformKey).$$(var).$$(config))))\
-  $$(foreach arch,$(AvailableArchs),\
-    $$(if $$(call strneq,UNDEFINED,$$($$(var).$$(arch))),\
-      $$(call CopyVariable,$$(var).$$(arch),$(PlatformKey).$$(var).$$(arch))))\
-  $$(foreach config,$$(Configs),\
-    $$(foreach arch,$(AvailableArchs),\
-      $$(if $$(call strneq,UNDEFINED,$$($$(var).$$(config).$$(arch))),\
-        $$(call CopyVariable,$$(var).$$(config).$$(arch),\
-                $(PlatformKey).$$(var).$$(config).$$(arch))))))
-
-ifneq ($(DEBUGMAKE),)
-  $$(info MAKE: $(PlatformName): Done loading platform)
-endif
-endef
-
-# Evaluate this now so we do not have to worry about order of evaluation.
-PlatformFiles := $(wildcard make/platform/*.mk)
-ifneq ($(DEBUGMAKE),)
- $(info MAKE: Loading platforms: $(PlatformFiles))
-endif
-
-$(foreach file,$(PlatformFiles),\
-  $(eval $(call load_platform_template,$(file))))
diff --git a/make/lib_util.mk b/make/lib_util.mk
deleted file mode 100644
index 089a0e2..0000000
--- a/make/lib_util.mk
+++ /dev/null
@@ -1,65 +0,0 @@
-# Library Utility Functions
-#
-# This should be included following 'lib_info.mk'.
-
-# Function: GetCNAVar variable-name platform-key config arch
-#
-# Get a per-config-and-arch variable value.
-GetCNAVar = $(strip \
-  $(or $($(2).$(1).$(3).$(4)), \
-       $($(2).$(1).$(3)), \
-       $($(2).$(1).$(4)), \
-       $($(2).$(1))))
-
-# Function: SelectFunctionDir config arch function-name optimized
-#
-# Choose the appropriate implementation directory to use for 'function-name' in
-# the configuration 'config' and on given arch.
-SelectFunctionDir = $(strip \
-  $(call Set,Tmp.SelectFunctionDir,$(call SelectFunctionDirs,$(1),$(2),$(3),$(4)))\
-  $(if $(call streq,1,$(words $(Tmp.SelectFunctionDir))),\
-       $(Tmp.SelectFunctionDir),\
-       $(error SelectFunctionDir: invalid function name "$(3)" ($(strip\
-               $(if $(call streq,0,$(words $(Tmp.SelectFunctionDir))),\
-                    no such function,\
-                    function implemented in multiple directories!!!))))))
-
-# Helper functions that select the entire list of subdirs where a function is
-# defined with a certain specificity.
-SelectFunctionDirs_Opt_ConfigAndArch = $(strip \
-  $(foreach key,$(AvailableIn.$(3)),\
-    $(if $(and $(call streq,Optimized,$($(key).Implementation)),\
-               $(call contains,$($(key).OnlyConfigs),$(1)),\
-               $(call contains,$($(key).OnlyArchs),$(2))),$(key),)))
-SelectFunctionDirs_Opt_Config = $(strip \
-  $(foreach key,$(AvailableIn.$(3)),\
-    $(if $(and $(call streq,Optimized,$($(key).Implementation)),\
-               $(call contains,$($(key).OnlyConfigs),$(1))),$(key),)))
-SelectFunctionDirs_Opt_Arch = $(strip \
-  $(foreach key,$(AvailableIn.$(3)),\
-    $(if $(and $(call streq,Optimized,$($(key).Implementation)),\
-               $(call contains,$($(key).OnlyArchs),$(2))),$(key),)))
-SelectFunctionDirs_Gen = $(strip \
-  $(foreach key,$(AvailableIn.$(3)),\
-    $(if $(call streq,Generic,$($(key).Implementation)),$(key))))
-
-# Helper function to select the right set of dirs in generic priority order.
-SelectFunctions_Gen = \
-  $(or $(call SelectFunctionDirs_Gen,$(1),$(2),$(3)),\
-       $(call SelectFunctionDirs_Opt_ConfigAndArch,$(1),$(2),$(3)), \
-       $(call SelectFunctionDirs_Opt_Config,$(1),$(2),$(3)), \
-       $(call SelectFunctionDirs_Opt_Arch,$(1),$(2),$(3)))
-
-# Helper function to select the right set of dirs in optimized priority order.
-SelectFunctions_Opt = \
-  $(or $(call SelectFunctionDirs_Opt_ConfigAndArch,$(1),$(2),$(3)), \
-       $(call SelectFunctionDirs_Opt_Config,$(1),$(2),$(3)), \
-       $(call SelectFunctionDirs_Opt_Arch,$(1),$(2),$(3)), \
-       $(call SelectFunctionDirs_Gen,$(1),$(2),$(3)))
-
-# Helper function to select the right set of dirs (which should be exactly one)
-# for a function.
-SelectFunctionDirs = \
-  $(if $(call streq,1,$(4)),\
-       $(call SelectFunctions_Opt,$(1),$(2),$(3)),\
-       $(call SelectFunctions_Gen,$(1),$(2),$(3)))
diff --git a/make/options.mk b/make/options.mk
deleted file mode 100644
index 67197de..0000000
--- a/make/options.mk
+++ /dev/null
@@ -1,48 +0,0 @@
-# Options which may be overriden for platforms, etc.
-#
-# This list of such variables should be kept up to date with AvailableOptions in
-# 'make/lib_info.mk'.
-
-# The compiler to use.
-CC := gcc
-
-# The compiler flags to use.
-CFLAGS := -Wall -Werror
-
-# The list of functions to include in the library.
-FUNCTIONS :=
-
-# Whether optimized function implementations should be used.
-OPTIMIZED := 1
-
-# Whether function definitions should use hidden visibility. This adds the
-# -fvisibility=hidden compiler option and uses .private_extern annotations in
-# assembly files.
-#
-# FIXME: Make this more portable. When that is done, it should probably be the
-# default.
-VISIBILITY_HIDDEN := 0
-
-# Whether the library is being built for kernel use.
-KERNEL_USE := 0
-
-# Whether the library should be built as a shared object.
-SHARED_LIBRARY := 0
-
-# Miscellaneous tools.
-
-AR := ar
-# FIXME: Remove these pipes once ranlib errors are fixed.
-ARFLAGS := cru 2> /dev/null
-
-LDFLAGS :=
-
-RANLIB := ranlib
-# FIXME: Remove these pipes once ranlib errors are fixed.
-RANLIBFLAGS := 2> /dev/null
-
-STRIP := strip
-LIPO := lipo
-DSYMUTIL := dsymutil
-
-SHARED_LIBRARY_SUFFIX := so
diff --git a/make/platform/clang_darwin.mk b/make/platform/clang_darwin.mk
deleted file mode 100644
index ccbee8b..0000000
--- a/make/platform/clang_darwin.mk
+++ /dev/null
@@ -1,468 +0,0 @@
-# These are the functions which clang needs when it is targeting a previous
-# version of the OS. The issue is that the backend may use functions which were
-# not present in the libgcc that shipped on the platform. In such cases, we link
-# with a version of the library which contains private_extern definitions of all
-# the extra functions which might be referenced.
-
-Description := Static runtime libraries for clang/Darwin.
-
-# A function that ensures we don't try to build for architectures and SDKs
-# that we don't have working toolchains for. Arguments:
-# (1): List of architectures
-# (2): Library name
-# (3): SDK path
-# The result is a possibly empty subset of the architectures from argument 1.
-CheckArches = \
-  $(shell \
-    result=""; \
-    if [ "X$(3)" != X ]; then \
-      for arch in $(1); do \
-        if $(LD) -v 2>&1 | grep "configured to support" \
-             | tr ' ' '\n' | grep "^$$arch$$" >/dev/null 2>/dev/null; then \
-          if $(CC) -arch $$arch \
-            -integrated-as \
-            $(ProjSrcRoot)/make/platform/clang_darwin_test_input.c \
-            -isysroot $(3) \
-            -o /dev/null > /dev/null 2> /dev/null; then \
-              result="$$result$$arch "; \
-          else \
-            printf 1>&2 \
-             "warning: clang_darwin.mk: dropping arch '$$arch' from lib '$(2)'"; \
-            printf 1>&2 " (clang or system libraries do not support it)\n"; \
-          fi; \
-        else \
-          printf 1>&2 \
-            "warning: clang_darwin.mk: dropping arch '$$arch' from lib '$(2)'";\
-          printf 1>&2 " (ld does not support it)\n"; \
-        fi; \
-      done; \
-    fi; \
-    echo $$result)
-
-XCRun = \
-  $(shell \
-    result=`xcrun -find $(1) 2> /dev/null`; \
-    if [ "$$?" != "0" ]; then result=$(1); fi; \
-    echo $$result)
-# Prefer building with the internal SDKs.
-XCRunSdkPath = \
-  $(shell \
-    result=`xcrun --sdk $(1).internal --show-sdk-path 2> /dev/null`; \
-    if [ "$$?" != "0" ]; then \
-      result=`xcrun --sdk $(1) --show-sdk-path 2> /dev/null`; \
-      if [ "$$?" != "0" ]; then result=""; fi; \
-    fi; \
-    echo $$result)
-###
-
-CC       := $(call XCRun,clang)
-LD       := $(shell $(CC) -print-prog-name=ld)
-AR       := $(call XCRun,ar)
-RANLIB   := $(call XCRun,ranlib)
-STRIP    := $(call XCRun,strip)
-LIPO     := $(call XCRun,lipo)
-DSYMUTIL := $(call XCRun,dsymutil)
-
-OSX_SDK := $(call XCRunSdkPath,macosx)
-IOS_SDK := $(call XCRunSdkPath,iphoneos)
-IOSSIM_SDK := $(call XCRunSdkPath,iphonesimulator)
-
-Configs :=
-UniversalArchs :=
-
-# Configuration solely for providing access to an eprintf symbol, which may
-# still be referenced from Darwin system headers. This symbol is only ever
-# needed on i386.
-Configs += eprintf
-UniversalArchs.eprintf := $(call CheckArches,i386,eprintf,$(OSX_SDK))
-
-# Configuration for targeting 10.4. We need a few functions missing from
-# libgcc_s.10.4.dylib. We only build x86 slices since clang doesn't really
-# support targeting PowerPC.
-Configs += 10.4
-UniversalArchs.10.4 := $(call CheckArches,i386 x86_64,10.4,$(OSX_SDK))
-
-# Configuration for targeting iOS for a couple of functions that didn't
-# make it into libSystem.
-Configs += ios
-UniversalArchs.ios := $(call CheckArches,i386 x86_64,ios,$(IOSSIM_SDK))
-UniversalArchs.ios += $(call CheckArches,armv7 arm64,ios,$(IOS_SDK))
-
-# Configuration for targeting OSX. These functions may not be in libSystem
-# so we should provide our own.
-Configs += osx
-UniversalArchs.osx := $(call CheckArches,i386 x86_64 x86_64h,osx,$(OSX_SDK))
-
-# Configuration for use with kernel/kexts.
-Configs += cc_kext
-UniversalArchs.cc_kext := $(call CheckArches,i386 x86_64 x86_64h,cc_kext,$(OSX_SDK))
-
-# Configuration for use with iOS kernel/kexts
-Configs += cc_kext_ios
-UniversalArchs.cc_kext_ios += $(call CheckArches,armv7,cc_kext_ios,$(IOS_SDK))
-
-# Darwin 10.6 has a bug in cctools that makes it unable to use ranlib on our ARM
-# object files. If we are on that platform, strip out all ARM archs. We still
-# build the libraries themselves so that Clang can find them where it expects
-# them, even though they might not have an expected slice.
-ifneq ($(shell test -x /usr/bin/sw_vers && sw_vers -productVersion | grep 10.6),)
-UniversalArchs.ios := $(filter-out armv7, $(UniversalArchs.ios))
-UniversalArchs.cc_kext_ios := $(filter-out armv7, $(UniversalArchs.cc_kext_ios))
-endif
-
-# If RC_SUPPORTED_ARCHS is defined, treat it as a list of the architectures we
-# are intended to support and limit what we try to build to that.
-ifneq ($(RC_SUPPORTED_ARCHS),)
-$(foreach config,$(Configs),\
-  $(call Set,UniversalArchs.$(config),\
-	$(filter $(RC_SUPPORTED_ARCHS),$(UniversalArchs.$(config)))))
-endif
-
-# Remove empty configs if we end up dropping all the requested
-# archs for a particular config.
-$(foreach config,$(Configs),\
-  $(if $(strip $(UniversalArchs.$(config))),,\
-	$(call Set,Configs,$(filter-out $(config),$(Configs)))))
-
-###
-
-# Forcibly strip off any -arch, as that totally breaks our universal support.
-override CC := $(subst -arch ,-arch_,$(CC))
-override CC := $(patsubst -arch_%,,$(CC))
-
-CFLAGS := -Wall -Werror -O3 -fomit-frame-pointer
-
-# Always set deployment target arguments for every build, these libraries should
-# never depend on the environmental overrides. We simply set them to minimum
-# supported deployment target -- nothing in the compiler-rt libraries should
-# actually depend on the deployment target.
-OSX_DEPLOYMENT_ARGS := -mmacosx-version-min=10.4
-IOS_DEPLOYMENT_ARGS := -mios-version-min=1.0
-IOS6_DEPLOYMENT_ARGS := -mios-version-min=6.0
-IOSSIM_DEPLOYMENT_ARGS := -mios-simulator-version-min=1.0
-
-OSX_DEPLOYMENT_ARGS += -isysroot $(OSX_SDK)
-IOS_DEPLOYMENT_ARGS += -isysroot $(IOS_SDK)
-IOS6_DEPLOYMENT_ARGS += -isysroot $(IOS_SDK)
-IOSSIM_DEPLOYMENT_ARGS += -isysroot $(IOSSIM_SDK)
-
-CFLAGS.eprintf		:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
-CFLAGS.10.4		:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
-
-SANITIZER_MACOSX_DEPLOYMENT_ARGS := -mmacosx-version-min=10.7
-SANITIZER_IOSSIM_DEPLOYMENT_ARGS := -mios-simulator-version-min=7.0 \
-  -isysroot $(IOSSIM_SDK)
-SANITIZER_CFLAGS := -fno-builtin -gline-tables-only -stdlib=libc++
-
-
-CFLAGS.ios.i386		:= $(CFLAGS) $(IOSSIM_DEPLOYMENT_ARGS)
-CFLAGS.ios.x86_64	:= $(CFLAGS) $(IOSSIM_DEPLOYMENT_ARGS)
-CFLAGS.ios.armv7	:= $(CFLAGS) $(IOS_DEPLOYMENT_ARGS)
-CFLAGS.ios.armv7k	:= $(CFLAGS) $(IOS_DEPLOYMENT_ARGS)
-CFLAGS.ios.armv7s	:= $(CFLAGS) $(IOS_DEPLOYMENT_ARGS)
-CFLAGS.ios.arm64	:= $(CFLAGS) $(IOS6_DEPLOYMENT_ARGS)
-CFLAGS.osx.i386		:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
-CFLAGS.osx.x86_64	:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
-CFLAGS.osx.x86_64h	:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
-CFLAGS.cc_kext.i386	:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
-CFLAGS.cc_kext.x86_64	:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
-CFLAGS.cc_kext.x86_64h	:= $(CFLAGS) $(OSX_DEPLOYMENT_ARGS)
-CFLAGS.cc_kext_ios.armv7	:= $(CFLAGS) $(IOS6_DEPLOYMENT_ARGS)
-CFLAGS.cc_kext_ios.armv7k	:= $(CFLAGS) $(IOS6_DEPLOYMENT_ARGS)
-CFLAGS.cc_kext_ios.armv7s	:= $(CFLAGS) $(IOS6_DEPLOYMENT_ARGS)
-CFLAGS.cc_kext_ios.arm64	:= $(CFLAGS) $(IOS6_DEPLOYMENT_ARGS)
-
-SANITIZER_LDFLAGS := -stdlib=libc++ -lc++ -lc++abi
-
-ATOMIC_FUNCTIONS := \
-	atomic_flag_clear \
-	atomic_flag_clear_explicit \
-	atomic_flag_test_and_set \
-	atomic_flag_test_and_set_explicit \
-	atomic_signal_fence \
-	atomic_thread_fence
-
-FP16_FUNCTIONS := \
-	extendhfsf2 \
-	truncdfhf2 \
-	truncsfhf2
-
-FUNCTIONS.eprintf := eprintf
-FUNCTIONS.10.4 := eprintf floatundidf floatundisf floatundixf
-
-FUNCTIONS.ios	    := divmodsi4 udivmodsi4 mulosi4 mulodi4 muloti4 \
-                       $(ATOMIC_FUNCTIONS) $(FP16_FUNCTIONS)
-# On x86, the divmod functions reference divsi.
-FUNCTIONS.ios.i386    := $(FUNCTIONS.ios) \
-                         divsi3 udivsi3
-FUNCTIONS.ios.x86_64  := $(FUNCTIONS.ios.i386)
-FUNCTIONS.ios.arm64   := mulsc3 muldc3 divsc3 divdc3 udivti3 umodti3 \
-                         $(ATOMIC_FUNCTIONS)
-
-FUNCTIONS.osx	:= mulosi4 mulodi4 muloti4 $(ATOMIC_FUNCTIONS) $(FP16_FUNCTIONS)
-
-CCKEXT_PROFILE_FUNCTIONS := \
-	InstrProfiling \
-	InstrProfilingBuffer \
-	InstrProfilingPlatformDarwin
-
-CCKEXT_COMMON_FUNCTIONS := \
-	$(CCKEXT_PROFILE_FUNCTIONS) \
-	absvdi2 \
-	absvsi2 \
-	addvdi3 \
-	addvsi3 \
-	ashldi3 \
-	ashrdi3 \
-	bswapdi2 \
-	bswapsi2 \
-	clzdi2 \
-	clzsi2 \
-	cmpdi2 \
-	ctzdi2 \
-	ctzsi2 \
-	divdc3 \
-	divdi3 \
-	divsc3 \
-	divmodsi4 \
-	udivmodsi4 \
-	do_global_dtors \
-	eprintf \
-	extendhfsf2 \
-	ffsdi2 \
-	fixdfdi \
-	fixsfdi \
-	fixunsdfdi \
-	fixunsdfsi \
-	fixunssfdi \
-	fixunssfsi \
-	floatdidf \
-	floatdisf \
-	floatundidf \
-	floatundisf \
-	gcc_bcmp \
-	lshrdi3 \
-	moddi3 \
-	muldc3 \
-	muldi3 \
-	mulsc3 \
-	mulvdi3 \
-	mulvsi3 \
-	negdi2 \
-	negvdi2 \
-	negvsi2 \
-	paritydi2 \
-	paritysi2 \
-	popcountdi2 \
-	popcountsi2 \
-	powidf2 \
-	powisf2 \
-	subvdi3 \
-	subvsi3 \
-	truncdfhf2 \
-	truncsfhf2 \
-	ucmpdi2 \
-	udiv_w_sdiv \
-	udivdi3 \
-	udivmoddi4 \
-	umoddi3
-
-CCKEXT_ARM_FUNCTIONS := $(CCKEXT_COMMON_FUNCTIONS) \
-	adddf3 \
-	addsf3 \
-	aeabi_cdcmpeq \
-	aeabi_cdrcmple \
-	aeabi_cfcmpeq \
-	aeabi_cfrcmple \
-	aeabi_dcmpeq \
-	aeabi_dcmpge \
-	aeabi_dcmpgt \
-	aeabi_dcmple \
-	aeabi_dcmplt \
-	aeabi_drsub \
-	aeabi_fcmpeq \
-	aeabi_fcmpge \
-	aeabi_fcmpgt \
-	aeabi_fcmple \
-	aeabi_fcmplt \
-	aeabi_frsub \
-	aeabi_idivmod \
-	aeabi_uidivmod \
-	cmpdf2 \
-	cmpsf2 \
-	div0 \
-	divdf3 \
-	divsf3 \
-	divsi3 \
-	extendsfdf2 \
-	ffssi2 \
-	fixdfsi \
-	fixsfsi \
-	floatsidf \
-	floatsisf \
-	floatunsidf \
-	floatunsisf \
-	comparedf2 \
-	comparesf2 \
-	modsi3 \
-	muldf3 \
-	mulsf3 \
-	mulodi4 \
-	negdf2 \
-	negsf2 \
-	subdf3 \
-	subsf3 \
-	switch16 \
-	switch32 \
-	switch8 \
-	switchu8 \
-	truncdfsf2 \
-	udivsi3 \
-	umodsi3 \
-	unorddf2 \
-	unordsf2
-
-CCKEXT_ARMVFP_FUNCTIONS := $(CCKEXT_ARM_FUNCTIONS) \
-	adddf3vfp \
-	addsf3vfp \
-	divdf3vfp \
-	divsf3vfp \
-	eqdf2vfp \
-	eqsf2vfp \
-	extendsfdf2vfp \
-	fixdfsivfp \
-	fixsfsivfp \
-	fixunsdfsivfp \
-	fixunssfsivfp \
-	floatsidfvfp \
-	floatsisfvfp \
-	floatunssidfvfp \
-	floatunssisfvfp \
-	gedf2vfp \
-	gesf2vfp \
-	gtdf2vfp \
-	gtsf2vfp \
-	ledf2vfp \
-	lesf2vfp \
-	ltdf2vfp \
-	ltsf2vfp \
-	muldf3vfp \
-	mulsf3vfp \
-	nedf2vfp \
-	nesf2vfp \
-	subdf3vfp \
-	subsf3vfp \
-	truncdfsf2vfp \
-	unorddf2vfp \
-	unordsf2vfp
-
-CCKEXT_ARM64_FUNCTIONS := \
-	$(CCKEXT_PROFILE_FUNCTIONS) \
-	divdc3 \
-	divsc3 \
-	muldc3 \
-	mulsc3 \
-	udivti3 \
-	umodti3
-
-FUNCTIONS.cc_kext_ios.armv7 := $(CCKEXT_ARMVFP_FUNCTIONS)
-FUNCTIONS.cc_kext_ios.armv7k := $(CCKEXT_ARMVFP_FUNCTIONS)
-FUNCTIONS.cc_kext_ios.armv7s := $(CCKEXT_ARMVFP_FUNCTIONS)
-FUNCTIONS.cc_kext_ios.arm64 := $(CCKEXT_ARM64_FUNCTIONS)
-
-CCKEXT_X86_FUNCTIONS := $(CCKEXT_COMMON_FUNCTIONS) \
-	divxc3 \
-	fixunsxfdi \
-	fixunsxfsi \
-	fixxfdi \
-	floatdixf \
-	floatundixf \
-	mulxc3 \
-	powixf2
-
-FUNCTIONS.cc_kext.i386 := $(CCKEXT_X86_FUNCTIONS) \
-	ffssi2 \
-	i686.get_pc_thunk.eax \
-	i686.get_pc_thunk.ebp \
-	i686.get_pc_thunk.ebx \
-	i686.get_pc_thunk.ecx \
-	i686.get_pc_thunk.edi \
-	i686.get_pc_thunk.edx \
-	i686.get_pc_thunk.esi
-
-FUNCTIONS.cc_kext.x86_64 := $(CCKEXT_X86_FUNCTIONS) \
-	absvti2 \
-	addvti3 \
-	ashlti3 \
-	ashrti3 \
-	clzti2 \
-	cmpti2 \
-	ctzti2 \
-	divti3 \
-	ffsti2 \
-	fixdfti \
-	fixsfti \
-	fixunsdfti \
-	fixunssfti \
-	fixunsxfti \
-	fixxfti \
-	floattidf \
-	floattisf \
-	floattixf \
-	floatuntidf \
-	floatuntisf \
-	floatuntixf \
-	lshrti3 \
-	modti3 \
-	multi3 \
-	mulvti3 \
-	negti2 \
-	negvti2 \
-	parityti2 \
-	popcountti2 \
-	subvti3 \
-	ucmpti2 \
-	udivmodti4 \
-	udivti3 \
-	umodti3
-
-FUNCTIONS.cc_kext.x86_64h := $(FUNCTIONS.cc_kext.x86_64)
-
-# FIXME: Currently, compiler-rt is missing implementations for a number of the
-# functions that need to go into libcc_kext.a. Filter them out for now.
-CCKEXT_MISSING_FUNCTIONS := \
-	cmpdf2 cmpsf2 div0 \
-	ffssi2 \
-	udiv_w_sdiv unorddf2 unordsf2 bswapdi2 \
-	bswapsi2 \
-	gcc_bcmp \
-	do_global_dtors \
-	i686.get_pc_thunk.eax i686.get_pc_thunk.ebp i686.get_pc_thunk.ebx \
-	i686.get_pc_thunk.ecx i686.get_pc_thunk.edi i686.get_pc_thunk.edx \
-	i686.get_pc_thunk.esi \
-	aeabi_cdcmpeq aeabi_cdrcmple aeabi_cfcmpeq aeabi_cfrcmple aeabi_dcmpeq \
-	aeabi_dcmpge aeabi_dcmpgt aeabi_dcmple aeabi_dcmplt aeabi_drsub aeabi_fcmpeq \
-	aeabi_fcmpge aeabi_fcmpgt aeabi_fcmple aeabi_fcmplt aeabi_frsub aeabi_idivmod \
-	aeabi_uidivmod
-
-FUNCTIONS.cc_kext_ios.armv7 := \
-	$(filter-out $(CCKEXT_MISSING_FUNCTIONS),$(FUNCTIONS.cc_kext_ios.armv7))
-FUNCTIONS.cc_kext_ios.armv7k := \
-	$(filter-out $(CCKEXT_MISSING_FUNCTIONS),$(FUNCTIONS.cc_kext_ios.armv7k))
-FUNCTIONS.cc_kext_ios.armv7s := \
-	$(filter-out $(CCKEXT_MISSING_FUNCTIONS),$(FUNCTIONS.cc_kext_ios.armv7s))
-FUNCTIONS.cc_kext_ios.arm64 := \
-	$(filter-out $(CCKEXT_MISSING_FUNCTIONS),$(FUNCTIONS.cc_kext_ios.arm64))
-FUNCTIONS.cc_kext.i386 := \
-	$(filter-out $(CCKEXT_MISSING_FUNCTIONS),$(FUNCTIONS.cc_kext.i386))
-FUNCTIONS.cc_kext.x86_64 := \
-	$(filter-out $(CCKEXT_MISSING_FUNCTIONS),$(FUNCTIONS.cc_kext.x86_64))
-FUNCTIONS.cc_kext.x86_64h := \
-	$(filter-out $(CCKEXT_MISSING_FUNCTIONS),$(FUNCTIONS.cc_kext.x86_64h))
-
-KERNEL_USE.cc_kext := 1
-KERNEL_USE.cc_kext_ios := 1
-
-VISIBILITY_HIDDEN := 1
-
-SHARED_LIBRARY_SUFFIX := dylib
diff --git a/make/platform/clang_darwin_test_input.c b/make/platform/clang_darwin_test_input.c
deleted file mode 100644
index b406a28..0000000
--- a/make/platform/clang_darwin_test_input.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/* Include the headers we use in int_lib.h, to verify that they work. */
-
-#include <limits.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-// Force us to link at least one symbol in a system library
-// to detect systems where we don't have those for a given
-// architecture.
-int main(int argc, const char **argv) {
-    int x;
-    memcpy(&x,&argc,sizeof(int));
-}
diff --git a/make/platform/clang_linux.mk b/make/platform/clang_linux.mk
deleted file mode 100644
index 870209f..0000000
--- a/make/platform/clang_linux.mk
+++ /dev/null
@@ -1,92 +0,0 @@
-Description := Static runtime libraries for clang/Linux.
-
-###
-
-CC := clang
-Arch := unknown
-Configs :=
-
-# We don't currently have any general purpose way to target architectures other
-# than the compiler defaults (because there is no generalized way to invoke
-# cross compilers). For now, we just find the target architecture of the
-# compiler and only define configurations we know that compiler can generate.
-CompilerTargetTriple := $(shell \
-	LANG=C $(CC) -v 2>&1 | grep 'Target:' | cut -d' ' -f2)
-ifeq ($(CompilerTargetTriple),)
-$(error "unable to infer compiler target triple for $(CC)")
-endif
-
-# Only define configs if we detected a linux target.
-ifneq ($(findstring -linux-,$(CompilerTargetTriple)),)
-
-# Define configs only if arch in triple is i386 or x86_64
-CompilerTargetArch := $(firstword $(subst -, ,$(CompilerTargetTriple)))
-ifeq ($(call contains,i386 x86_64,$(CompilerTargetArch)),true)
-
-# TryCompile compiler source flags
-# Returns exit code of running a compiler invocation.
-TryCompile = \
-  $(shell \
-    cflags=""; \
-    for flag in $(3); do \
-      cflags="$$cflags $$flag"; \
-    done; \
-    $(1) $$cflags $(2) -o /dev/null > /dev/null 2> /dev/null ; \
-    echo $$?)
-
-test_source = $(ProjSrcRoot)/make/platform/clang_linux_test_input.c
-ifeq ($(CompilerTargetArch),i386)
-  SupportedArches := i386
-  ifeq ($(call TryCompile,$(CC),$(test_source),-m64),0)
-    SupportedArches += x86_64
-  endif
-else
-  SupportedArches := x86_64
-  ifeq ($(call TryCompile,$(CC),$(test_source),-m32),0)
-    SupportedArches += i386
-  endif
-endif
-
-# Build runtime libraries for i386.
-ifeq ($(call contains,$(SupportedArches),i386),true)
-Configs += builtins-i386 profile-i386
-Arch.builtins-i386 := i386
-Arch.profile-i386 := i386
-endif
-
-# Build runtime libraries for x86_64.
-ifeq ($(call contains,$(SupportedArches),x86_64),true)
-Configs += builtins-x86_64 profile-x86_64
-Arch.builtins-x86_64 := x86_64
-Arch.profile-x86_64 := x86_64
-endif
-
-endif
-
-endif
-
-###
-
-CFLAGS := -Wall -Werror -O3 -fomit-frame-pointer
-
-CFLAGS.builtins-i386 := $(CFLAGS) -m32
-CFLAGS.builtins-x86_64 := $(CFLAGS) -m64
-CFLAGS.profile-i386 := $(CFLAGS) -m32
-CFLAGS.profile-x86_64 := $(CFLAGS) -m64
-
-FUNCTIONS.builtins-i386 := $(CommonFunctions) $(ArchFunctions.i386)
-FUNCTIONS.builtins-x86_64 := $(CommonFunctions) $(ArchFunctions.x86_64)
-FUNCTIONS.profile-i386 := GCDAProfiling InstrProfiling InstrProfilingBuffer \
-                          InstrProfilingFile InstrProfilingPlatformOther \
-                          InstrProfilingRuntime InstrProfilingUtil \
-                          InstrProfilingWriter InstrProfilingValue \
-                          InstrProfilingMerge InstrProfilingMergeFile
-FUNCTIONS.profile-x86_64 := $(FUNCTIONS.profile-i386)
-
-# Always use optimized variants.
-OPTIMIZED := 1
-
-# We don't need to use visibility hidden on Linux.
-VISIBILITY_HIDDEN := 0
-
-SHARED_LIBRARY_SUFFIX := so
diff --git a/make/platform/clang_linux_test_input.c b/make/platform/clang_linux_test_input.c
deleted file mode 100644
index e65ce98..0000000
--- a/make/platform/clang_linux_test_input.c
+++ /dev/null
@@ -1,4 +0,0 @@
-// This file is used to check if we can produce working executables
-// for i386 and x86_64 archs on Linux.
-#include <stdlib.h>
-int main(){}
diff --git a/make/platform/clang_macho_embedded.mk b/make/platform/clang_macho_embedded.mk
deleted file mode 100644
index d7870d4..0000000
--- a/make/platform/clang_macho_embedded.mk
+++ /dev/null
@@ -1,297 +0,0 @@
-# These are the functions which clang needs when it is targeting a previous
-# version of the OS. The issue is that the backend may use functions which were
-# not present in the libgcc that shipped on the platform. In such cases, we link
-# with a version of the library which contains private_extern definitions of all
-# the extra functions which might be referenced.
-
-Description := Static runtime libraries for embedded clang/Darwin
-
-# A function that ensures we don't try to build for architectures that we
-# don't have working toolchains for.
-CheckArches = \
-  $(shell \
-    result=""; \
-    for arch in $(1); do \
-      if $(CC) -arch $$arch -c \
-	  -integrated-as \
-	  $(ProjSrcRoot)/make/platform/clang_macho_embedded_test_input.c \
-	  -o /dev/null > /dev/null 2> /dev/null; then \
-        result="$$result$$arch "; \
-      else \
-	printf 1>&2 \
-	  "warning: clang_macho_embedded.mk: dropping arch '$$arch' from lib '$(2)'\n"; \
-      fi; \
-    done; \
-    echo $$result)
-
-XCRun = \
-  $(shell \
-    result=`xcrun -find $(1) 2> /dev/null`; \
-    if [ "$$?" != "0" ]; then result=$(1); fi; \
-    echo $$result)
-
-###
-
-CC       := $(call XCRun,clang)
-AR       := $(call XCRun,ar)
-RANLIB   := $(call XCRun,ranlib)
-STRIP    := $(call XCRun,strip)
-LIPO     := $(call XCRun,lipo)
-DSYMUTIL := $(call XCRun,dsymutil)
-
-Configs :=
-UniversalArchs :=
-
-# Soft-float version of the runtime. No floating-point instructions will be used
-# and the ABI (out of necessity) passes floating values in normal registers:
-# non-VFP variant of the AAPCS.
-UniversalArchs.soft_static := $(call CheckArches,armv6m armv7m armv7em armv7,soft_static)
-Configs += $(if $(UniversalArchs.soft_static),soft_static)
-
-# Hard-float version of the runtime. On ARM VFP instructions and registers are
-# allowed, and floating point values get passed in them. VFP variant of the
-# AAPCS.
-UniversalArchs.hard_static := $(call CheckArches,armv7em armv7 i386 x86_64,hard_static)
-Configs += $(if $(UniversalArchs.hard_static),hard_static)
-
-UniversalArchs.soft_pic := $(call CheckArches,armv6m armv7m armv7em armv7,soft_pic)
-Configs += $(if $(UniversalArchs.soft_pic),soft_pic)
-
-UniversalArchs.hard_pic := $(call CheckArches,armv7em armv7 i386 x86_64,hard_pic)
-Configs += $(if $(UniversalArchs.hard_pic),hard_pic)
-
-CFLAGS := -Wall -Werror -Oz -fomit-frame-pointer -ffreestanding
-
-PIC_CFLAGS := -fPIC
-STATIC_CFLAGS := -static
-
-CFLAGS_SOFT := -mfloat-abi=soft
-CFLAGS_HARD := -mfloat-abi=hard
-
-CFLAGS_ARMV7 := -target thumbv7-apple-darwin-eabi
-CFLAGS_I386  := -march=pentium
-
-CFLAGS.soft_static := $(CFLAGS) $(STATIC_CFLAGS) $(CFLAGS_SOFT)
-CFLAGS.hard_static := $(CFLAGS) $(STATIC_CFLAGS) $(CFLAGS_HARD)
-CFLAGS.soft_pic    := $(CFLAGS) $(PIC_CFLAGS) $(CFLAGS_SOFT)
-CFLAGS.hard_pic    := $(CFLAGS) $(PIC_CFLAGS) $(CFLAGS_HARD)
-
-CFLAGS.soft_static.armv7 := $(CFLAGS.soft_static) $(CFLAGS_ARMV7)
-CFLAGS.hard_static.armv7 := $(CFLAGS.hard_static) $(CFLAGS_ARMV7)
-CFLAGS.soft_pic.armv7    := $(CFLAGS.soft_pic) $(CFLAGS_ARMV7)
-CFLAGS.hard_pic.armv7    := $(CFLAGS.hard_pic) $(CFLAGS_ARMV7)
-
-# x86 platforms ignore -mfloat-abi options and complain about doing so. Despite
-# this they're hard-float.
-CFLAGS.hard_static.i386   := $(CFLAGS) $(STATIC_CFLAGS) $(CFLAGS_I386)
-CFLAGS.hard_pic.i386      := $(CFLAGS) $(PIC_CFLAGS) $(CFLAGS_I386)
-CFLAGS.hard_static.x86_64 := $(CFLAGS) $(STATIC_CFLAGS)
-CFLAGS.hard_pic.x86_64    := $(CFLAGS) $(PIC_CFLAGS)
-
-# Functions not wanted:
-#   + eprintf is obsolete anyway
-#   + *vfp: designed for Thumb1 CPUs with VFPv2
-
-COMMON_FUNCTIONS := \
-	absvdi2 \
-	absvsi2 \
-	addvdi3 \
-	addvsi3 \
-	ashldi3 \
-	ashrdi3 \
-	bswapdi2 \
-	bswapsi2 \
-	clzdi2 \
-	clzsi2 \
-	cmpdi2 \
-	ctzdi2 \
-	ctzsi2 \
-	divdc3 \
-	divdi3 \
-	divsc3 \
-	divmodsi4 \
-	udivmodsi4 \
-	do_global_dtors \
-	ffsdi2 \
-	fixdfdi \
-	fixsfdi \
-	fixunsdfdi \
-	fixunsdfsi \
-	fixunssfdi \
-	fixunssfsi \
-	floatdidf \
-	floatdisf \
-	floatundidf \
-	floatundisf \
-	gcc_bcmp \
-	lshrdi3 \
-	moddi3 \
-	muldc3 \
-	muldi3 \
-	mulsc3 \
-	mulvdi3 \
-	mulvsi3 \
-	negdi2 \
-	negvdi2 \
-	negvsi2 \
-	paritydi2 \
-	paritysi2 \
-	popcountdi2 \
-	popcountsi2 \
-	powidf2 \
-	powisf2 \
-	subvdi3 \
-	subvsi3 \
-	ucmpdi2 \
-	udiv_w_sdiv \
-	udivdi3 \
-	udivmoddi4 \
-	umoddi3 \
-	adddf3 \
-	addsf3 \
-	cmpdf2 \
-	cmpsf2 \
-	div0 \
-	divdf3 \
-	divsf3 \
-	divsi3 \
-	extendsfdf2 \
-	extendhfsf2 \
-	ffssi2 \
-	fixdfsi \
-	fixsfsi \
-	floatsidf \
-	floatsisf \
-	floatunsidf \
-	floatunsisf \
-	comparedf2 \
-	comparesf2 \
-	modsi3 \
-	muldf3 \
-	mulsf3 \
-	negdf2 \
-	negsf2 \
-	subdf3 \
-	subsf3 \
-	truncdfhf2 \
-	truncdfsf2 \
-	truncsfhf2 \
-	udivsi3 \
-	umodsi3 \
-	unorddf2 \
-	unordsf2 \
-	atomic_flag_clear \
-	atomic_flag_clear_explicit \
-	atomic_flag_test_and_set \
-	atomic_flag_test_and_set_explicit \
-	atomic_signal_fence \
-	atomic_thread_fence
-
-ARM_FUNCTIONS := \
-	aeabi_cdcmpeq \
-	aeabi_cdrcmple \
-	aeabi_cfcmpeq \
-	aeabi_cfrcmple \
-	aeabi_dcmpeq \
-	aeabi_dcmpge \
-	aeabi_dcmpgt \
-	aeabi_dcmple \
-	aeabi_dcmplt \
-	aeabi_drsub \
-	aeabi_fcmpeq \
-	aeabi_fcmpge \
-	aeabi_fcmpgt \
-	aeabi_fcmple \
-	aeabi_fcmplt \
-	aeabi_frsub \
-	aeabi_idivmod \
-	aeabi_uidivmod \
-
-# ARM Assembly implementation which requires Thumb2 (i.e. won't work on v6M).
-THUMB2_FUNCTIONS := \
-	switch16 \
-	switch32 \
-	switch8 \
-	switchu8 \
-	sync_fetch_and_add_4 \
-	sync_fetch_and_sub_4 \
-	sync_fetch_and_and_4 \
-	sync_fetch_and_or_4 \
-	sync_fetch_and_xor_4 \
-	sync_fetch_and_nand_4 \
-	sync_fetch_and_max_4 \
-	sync_fetch_and_umax_4 \
-	sync_fetch_and_min_4 \
-	sync_fetch_and_umin_4 \
-	sync_fetch_and_add_8 \
-	sync_fetch_and_sub_8 \
-	sync_fetch_and_and_8 \
-	sync_fetch_and_or_8 \
-	sync_fetch_and_xor_8 \
-	sync_fetch_and_nand_8 \
-	sync_fetch_and_max_8 \
-	sync_fetch_and_umax_8 \
-	sync_fetch_and_min_8 \
-	sync_fetch_and_umin_8
-
-I386_FUNCTIONS :=  \
-	i686.get_pc_thunk.eax \
-	i686.get_pc_thunk.ebp \
-	i686.get_pc_thunk.ebx \
-	i686.get_pc_thunk.ecx \
-	i686.get_pc_thunk.edi \
-	i686.get_pc_thunk.edx \
-	i686.get_pc_thunk.esi
-
-# FIXME: Currently, compiler-rt is missing implementations for a number of the
-# functions. Filter them out for now.
-MISSING_FUNCTIONS := \
-	cmpdf2 cmpsf2 div0 \
-	ffssi2 \
-	udiv_w_sdiv unorddf2 unordsf2 bswapdi2 \
-	bswapsi2 \
-	gcc_bcmp \
-	do_global_dtors \
-	i686.get_pc_thunk.eax i686.get_pc_thunk.ebp i686.get_pc_thunk.ebx \
-	i686.get_pc_thunk.ecx i686.get_pc_thunk.edi i686.get_pc_thunk.edx \
-	i686.get_pc_thunk.esi \
-	aeabi_cdcmpeq aeabi_cdrcmple aeabi_cfcmpeq aeabi_cfrcmple aeabi_dcmpeq \
-	aeabi_dcmpge aeabi_dcmpgt aeabi_dcmple aeabi_dcmplt aeabi_drsub \
-	aeabi_fcmpeq \ aeabi_fcmpge aeabi_fcmpgt aeabi_fcmple aeabi_fcmplt \
-	aeabi_frsub aeabi_idivmod aeabi_uidivmod
-
-FUNCTIONS_ARMV6M  := $(COMMON_FUNCTIONS) $(ARM_FUNCTIONS)
-FUNCTIONS_ARM_ALL := $(COMMON_FUNCTIONS) $(ARM_FUNCTIONS) $(THUMB2_FUNCTIONS)
-FUNCTIONS_I386    := $(COMMON_FUNCTIONS) $(I386_FUNCTIONS)
-FUNCTIONS_X86_64  := $(COMMON_FUNCTIONS)
-
-FUNCTIONS_ARMV6M := \
-	$(filter-out $(MISSING_FUNCTIONS),$(FUNCTIONS_ARMV6M))
-FUNCTIONS_ARM_ALL := \
-	$(filter-out $(MISSING_FUNCTIONS),$(FUNCTIONS_ARM_ALL))
-FUNCTIONS_I386 := \
-	$(filter-out $(MISSING_FUNCTIONS),$(FUNCTIONS_I386))
-FUNCTIONS_X86_64 := \
-	$(filter-out $(MISSING_FUNCTIONS),$(FUNCTIONS_X86_64))
-
-FUNCTIONS.soft_static.armv6m := $(FUNCTIONS_ARMV6M)
-FUNCTIONS.soft_pic.armv6m    := $(FUNCTIONS_ARMV6M)
-
-FUNCTIONS.soft_static.armv7m := $(FUNCTIONS_ARM_ALL)
-FUNCTIONS.soft_pic.armv7m    := $(FUNCTIONS_ARM_ALL)
-
-FUNCTIONS.soft_static.armv7em := $(FUNCTIONS_ARM_ALL)
-FUNCTIONS.hard_static.armv7em := $(FUNCTIONS_ARM_ALL)
-FUNCTIONS.soft_pic.armv7em    := $(FUNCTIONS_ARM_ALL)
-FUNCTIONS.hard_pic.armv7em    := $(FUNCTIONS_ARM_ALL)
-
-FUNCTIONS.soft_static.armv7 := $(FUNCTIONS_ARM_ALL)
-FUNCTIONS.hard_static.armv7 := $(FUNCTIONS_ARM_ALL)
-FUNCTIONS.soft_pic.armv7    := $(FUNCTIONS_ARM_ALL)
-FUNCTIONS.hard_pic.armv7    := $(FUNCTIONS_ARM_ALL)
-
-FUNCTIONS.hard_static.i386 := $(FUNCTIONS_I386)
-FUNCTIONS.hard_pic.i386    := $(FUNCTIONS_I386)
-
-FUNCTIONS.hard_static.x86_64 := $(FUNCTIONS_X86_64)
-FUNCTIONS.hard_pic.x86_64    := $(FUNCTIONS_X86_64)
diff --git a/make/platform/clang_macho_embedded_test_input.c b/make/platform/clang_macho_embedded_test_input.c
deleted file mode 100644
index e69de29..0000000
--- a/make/platform/clang_macho_embedded_test_input.c
+++ /dev/null
diff --git a/make/platform/clang_mingw.mk b/make/platform/clang_mingw.mk
deleted file mode 100644
index 2aedbc3..0000000
--- a/make/platform/clang_mingw.mk
+++ /dev/null
@@ -1,30 +0,0 @@
-Description := Static runtime libraries for mingw-w64
-
-###
-
-CC ?= cc
-AR ?= ar
-
-Arch := unknown
-Configs :=
-
-SupportedArches := x86_64 i386 arm
-
-Configs += builtins-x86_64 builtins-i386 builtins-arm
-Arch.builtins-x86_64 := x86_64
-Arch.builtins-i386 := i386
-Arch.builtins-arm := arm
-
-###
-
-CFLAGS := -Wall -O3 -fomit-frame-pointer
-CFLAGS.builtins-x86_64 := -target x86_64-windows-gnu $(CFLAGS)
-CFLAGS.builtins-i386 := -target i686-windows-gnu $(CFLAGS)
-CFLAGS.builtins-arm := -target armv7-windows-gnu $(CFLAGS)
-
-FUNCTIONS.builtins-x86_64 := $(CommonFunctions) $(ArchFunctions.x86_64)
-FUNCTIONS.builtins-i386 := $(CommonFunctions) $(ArchFunctions.i386)
-FUNCTIONS.builtins-arm := $(CommonFunctions) $(ArchFunctions.arm)
-
-# Always use optimized variants.
-OPTIMIZED := 1
diff --git a/make/platform/darwin_bni.mk b/make/platform/darwin_bni.mk
deleted file mode 100644
index 8e066e8..0000000
--- a/make/platform/darwin_bni.mk
+++ /dev/null
@@ -1,135 +0,0 @@
-
-Description := Target for Darwin using an Apple-style build.
-
-Configs := Debug Release Profile Static
-
-# We override this with RC_ARCHS because B&I may want to build on an ARCH we
-# haven't explicitly defined support for. If all goes well, this will just work
-# and the resulting lib will just have generic versions for anything unknown.
-UniversalArchs := $(RC_ARCHS)
-
-ifneq (,$(SDKROOT))
-	override CC := $(shell xcrun -sdk $(SDKROOT) -find clang || echo "false") 
-	AR := $(shell xcrun -sdk $(SDKROOT) -find ar || echo "false") 
-	RANLIB := $(shell xcrun -sdk $(SDKROOT) -find ranlib || echo "false") 
-	STRIP := $(shell xcrun -sdk $(SDKROOT) -find strip || echo "false") 
-	LIPO := $(shell xcrun -sdk $(SDKROOT) -find lipo || echo "false")
-	DSYMUTIL := $(shell xcrun -sdk $(SDKROOT) -find dsymutil || echo "false")
-endif
-
-ifneq ($(IPHONEOS_DEPLOYMENT_TARGET),)
-	DEPLOYMENT_FLAGS := -miphoneos-version-min=$(IPHONEOS_DEPLOYMENT_TARGET) 
-else
-	ifneq ($(MACOSX_DEPLOYMENT_TARGET),)
-		DEPLOYMENT_FLAGS := -mmacosx-version-min=$(MACOSX_DEPLOYMENT_TARGET) 
-	endif
-endif
-
-ifneq (,$(SDKROOT))
-	DEPLOYMENT_FLAGS += -isysroot $(SDKROOT)
-endif
-
-CFLAGS := -Wall -Os -fomit-frame-pointer -g $(DEPLOYMENT_FLAGS)
-CFLAGS.Static := $(CFLAGS) -static  
-DYLIB_FLAGS := $(DEPLOYMENT_FLAGS) \
-		-Xarch_arm -Wl,-alias_list,$(SRCROOT)/lib/builtins/arm/softfloat-alias.list
-
-VISIBILITY_HIDDEN := 0
-VISIBILITY_HIDDEN.Static  := 1
-
-
-FUNCTIONS := absvdi2 absvsi2 addvdi3 addvsi3 ashldi3 ashrdi3 \
-             clzdi2 clzsi2 cmpdi2 ctzdi2 ctzsi2 \
-             divdc3 divdi3 divsc3 ffsdi2 \
-             fixdfdi fixsfdi fixunsdfdi fixunsdfsi fixunssfdi \
-             fixunssfsi floatdidf floatdisf floatundidf floatundisf \
-             gcc_personality_v0 lshrdi3 moddi3 muldc3 muldi3 mulosi4 \
-             mulodi4 muloti4 mulsc3 mulvdi3 mulvsi3 negdi2 negvdi2 negvsi2 \
-             paritydi2 paritysi2 popcountdi2 popcountsi2 powidf2 \
-             powisf2 subvdi3 subvsi3 ucmpdi2 udivdi3 \
-             udivmoddi4 umoddi3 apple_versioning eprintf atomic \
-             atomic_flag_clear atomic_flag_clear_explicit \
-             atomic_flag_test_and_set atomic_flag_test_and_set_explicit \
-             atomic_signal_fence atomic_thread_fence \
-             extendhfsf2 truncdfhf2 truncsfhf2 
-
-FUNCTIONS.i386 := $(FUNCTIONS) \
-                divxc3 fixunsxfdi fixunsxfsi fixxfdi floatdixf \
-                floatundixf mulxc3 powixf2 clear_cache \
-                enable_execute_stack
-FUNCTIONS.ppc := $(FUNCTIONS) \
-                divtc3 fixtfdi fixunstfdi floatditf floatunditf \
-                gcc_qadd gcc_qdiv gcc_qmul gcc_qsub multc3 \
-                powitf2 restFP saveFP trampoline_setup \
-                clear_cache enable_execute_stack
-FUNCTIONS.x86_64 := $(FUNCTIONS) \
-                absvti2 addvti3 ashlti3 ashrti3 clzti2 cmpti2 \
-                ctzti2 divti3 divxc3 ffsti2 fixdfti fixsfti \
-                fixunsdfti fixunssfti fixunsxfdi fixunsxfsi \
-                fixunsxfti fixxfdi fixxfti floatdixf floattidf \
-                floattisf floattixf floatundixf floatuntidf \
-                floatuntisf floatuntixf lshrti3 modti3 multi3 \
-                muloti4 mulvti3 mulxc3 negti2 negvti2 parityti2 \
-                popcountti2 powixf2 subvti3 ucmpti2 udivmodti4 \
-                udivti3 umodti3 clear_cache enable_execute_stack
-
-FUNCTIONS.armv4t := $(FUNCTIONS) 
-
-FUNCTIONS.armv5 := $(FUNCTIONS) \
-                adddf3 addsf3 bswapdi2 bswapsi2  \
-                comparedf2 comparesf2 extendsfdf2 \
-                divdf3 divsf3 \
-                fixdfsi fixsfsi fixunsdfsi fixunssfsi \
-                floatsidf floatsisf floatunsidf floatunsisf \
-                muldf3 mulsf3 \
-                negdf2 negsf2 \
-                truncdfsf2  \
-                modsi3 umodsi3 udivsi3 divsi3 udivmodsi4 divmodsi4 \
-                switch8 switchu8 switch16 switch32 \
-                sync_synchronize
-
-FUNCTIONS.armv6 := $(FUNCTIONS) \
-				comparedf2 comparesf2 \
-                adddf3vfp addsf3vfp bswapdi2 bswapsi2 divdf3vfp \
-                divsf3vfp eqdf2vfp eqsf2vfp extendsfdf2vfp \
-                fixdfsivfp fixsfsivfp fixunsdfsivfp fixunssfsivfp \
-                floatsidfvfp floatsisfvfp floatunssidfvfp floatunssisfvfp \
-                gedf2vfp gesf2vfp gtdf2vfp gtsf2vfp \
-                ledf2vfp lesf2vfp ltdf2vfp ltsf2vfp \
-                muldf3vfp mulsf3vfp \
-                nedf2vfp nesf2vfp \
-                subdf3vfp subsf3vfp truncdfsf2vfp unorddf2vfp unordsf2vfp \
-                modsi3 umodsi3 udivsi3 divsi3 udivmodsi4 divmodsi4 \
-                switch8 switchu8 switch16 switch32 \
-                restore_vfp_d8_d15_regs save_vfp_d8_d15_regs \
-                sync_synchronize
-
-FUNCTIONS.armv7 := $(FUNCTIONS) \
-				comparedf2 comparesf2 \
-                adddf3vfp addsf3vfp bswapdi2 bswapsi2 divdf3vfp \
-                divsf3vfp eqdf2vfp eqsf2vfp extendsfdf2vfp \
-                fixdfsivfp fixsfsivfp fixunsdfsivfp fixunssfsivfp \
-                floatsidfvfp floatsisfvfp floatunssidfvfp floatunssisfvfp \
-                gedf2vfp gesf2vfp gtdf2vfp gtsf2vfp \
-                ledf2vfp lesf2vfp ltdf2vfp ltsf2vfp \
-                muldf3vfp mulsf3vfp \
-                nedf2vfp nesf2vfp \
-                subdf3vfp subsf3vfp truncdfsf2vfp unorddf2vfp unordsf2vfp \
-                modsi3 umodsi3 udivsi3 divsi3 udivmodsi4 divmodsi4
-
-FUNCTIONS.armv7s := $(FUNCTIONS.armv7)
-
-FUNCTIONS.arm64 :=  divti3 modti3 \
-					udivmodti4 \
-					udivti3 umodti3 \
-					mulsc3 muldc3 \
-					powisf2 powidf2 \
-					clzti2 \
-					fixdfti fixsfti \
-					fixunsdfti fixunssfti fixunssfti \
-					floattidf floattisf floatuntidf floatuntisf \
-					gcc_personality_v0 atomic \
-					atomic_flag_clear atomic_flag_clear_explicit \
-					atomic_flag_test_and_set \
-					atomic_flag_test_and_set_explicit \
-					atomic_signal_fence atomic_thread_fence
diff --git a/make/platform/multi_arch.mk b/make/platform/multi_arch.mk
deleted file mode 100644
index fe6ac4b..0000000
--- a/make/platform/multi_arch.mk
+++ /dev/null
@@ -1,15 +0,0 @@
-Description := Example configuration for build two libraries for separate \
-architectures.
-
-Configs := m32 m64
-Arch := i386
-Arch.m64 := x86_64
-
-CC := clang
-
-CFLAGS := -Wall -Werror
-CFLAGS.m32 := $(CFLAGS) -m32 -O3
-CFLAGS.m64 := $(CFLAGS) -m64 -O3
-
-FUNCTIONS := moddi3 floatundixf udivdi3
-FUNCTIONS.m64 := $(FUNCTIONS) lshrdi3
diff --git a/make/subdir.mk b/make/subdir.mk
deleted file mode 100644
index f81c25f..0000000
--- a/make/subdir.mk
+++ /dev/null
@@ -1,92 +0,0 @@
-# This file is intended to be included from each subdirectory makefile.
-#
-# Subdirectory makefiles must define:
-#   SubDirs - The subdirectories to traverse.
-#
-# Subdirectory makefiles may define:
-#   ModuleName - The library name for objects in that directory.
-#   ObjNames - The objects available in that directory.
-#   Implementation - The library configuration the objects should go in (Generic
-#                    or Optimized)
-#   Dependencies - Any dependences for the object files.
-#   OnlyArchs - Only build the objects for the listed archs.
-#   OnlyConfigs - Only build the objects for the listed configurations.
-
-ifeq ($(Dir),)
-  $(error "No Dir variable defined.")
-endif
-
-###
-# Include child makefile fragments
-
-# The list of variables which are intended to be overridden in a subdirectory
-# makefile.
-RequiredSubdirVariables := SubDirs 
-OptionalSubdirVariables := ModuleName OnlyArchs OnlyConfigs \
-	ObjNames Implementation Dependencies
-
-# Template: subdir_traverse_template subdir
-define subdir_traverse_template
-$(call Set,Dir,$(1))
-ifneq ($(DEBUGMAKE),)
-  $$(info MAKE: $(Dir): Processing subdirectory)
-endif
-
-# Construct the variable key for this directory.
-$(call Set,DirKey,SubDir.$(subst .,,$(subst /,__,$(1))))
-$(call Append,SubDirKeys,$(DirKey))
-$(call Set,$(DirKey).Dir,$(Dir))
-
-# Reset subdirectory specific variables to sentinel value.
-$$(foreach var,$$(RequiredSubdirVariables) $$(OptionalSubdirVariables),\
-  $$(call Set,$$(var),UNDEFINED))
-
-# Get the subdirectory variables.
-include $(1)/Makefile.mk
-
-ifeq ($(DEBUGMAKE),2)
-$$(foreach var,$(RequiredSubdirVariables) $(OptionalSubdirVariables),\
-  $$(if $$(call strneq,UNDEFINED,$$($$(var))), \
-	$$(info MAKE: $(Dir): $$(var) is defined), \
-	$$(info MAKE: $(Dir): $$(var) is undefined)))
-endif
-
-# Check for undefined required variables, and unset sentinel value from optional
-# variables.
-$$(foreach var,$(RequiredSubdirVariables),\
-  $$(if $$(call strneq,UNDEFINED,$$($$(var))),, \
-	$$(error $(Dir): variable '$$(var)' was not undefined)))
-$$(foreach var,$(OptionalSubdirVariables),\
-  $$(if $$(call strneq,UNDEFINED,$$($$(var))),, \
-	$$(call Set,$$(var),)))
-
-# Collect all subdirectory variables for subsequent use.
-$$(foreach var,$(RequiredSubdirVariables) $(OptionalSubdirVariables),\
-  $$(call Set,$(DirKey).$$(var),$$($$(var))))
-
-# Recurse.
-include make/subdir.mk
-
-# Restore directory variable, for cleanliness.
-$$(call Set,Dir,$(1))
-
-ifneq ($(DEBUGMAKE),)
-  $$(info MAKE: $$(Dir): Done processing subdirectory)
-endif
-endef
-
-# Evaluate this now so we do not have to worry about order of evaluation.
-
-SubDirsList := $(strip \
-  $(if $(call streq,.,$(Dir)),\
-       $(SubDirs),\
-       $(SubDirs:%=$(Dir)/%)))
-ifeq ($(SubDirsList),)
-else
-  ifneq ($(DEBUGMAKE),)
-    $(info MAKE: Descending into subdirs: $(SubDirsList))
-  endif
-
-  $(foreach subdir,$(SubDirsList),\
-	$(eval $(call subdir_traverse_template,$(subdir))))
-endif
diff --git a/make/test/test-util.mk b/make/test/test-util.mk
deleted file mode 100644
index c80c28d..0000000
--- a/make/test/test-util.mk
+++ /dev/null
@@ -1,66 +0,0 @@
-include make/util.mk
-
-streq_t0 = $(call streq,,)
-$(call AssertEqual,streq_t0,true)
-streq_t1 = $(call streq,b,)
-$(call AssertEqual,streq_t1,)
-streq_t2 = $(call streq,,b)
-$(call AssertEqual,streq_t2,)
-streq_t3 = $(call streq,b,b)
-$(call AssertEqual,streq_t3,true)
-streq_t4 = $(call streq,bb,b)
-$(call AssertEqual,streq_t4,)
-streq_t5 = $(call streq,b,bb)
-$(call AssertEqual,streq_t5,)
-streq_t6 = $(call streq,bb,bb)
-$(call AssertEqual,streq_t6,true)
-
-strneq_t7 = $(call strneq,,)
-$(call AssertEqual,strneq_t7,)
-strneq_t8 = $(call strneq,b,)
-$(call AssertEqual,strneq_t8,true)
-strneq_t9 = $(call strneq,,b)
-$(call AssertEqual,strneq_t9,true)
-strneq_t10 = $(call strneq,b,b)
-$(call AssertEqual,strneq_t10,)
-strneq_t11 = $(call strneq,bb,b)
-$(call AssertEqual,strneq_t11,true)
-strneq_t12 = $(call strneq,b,bb)
-$(call AssertEqual,strneq_t12,true)
-strneq_t13 = $(call strneq,bb,bb)
-$(call AssertEqual,strneq_t13,)
-
-contains_t0 = $(call contains,a b b c,a)
-$(call AssertEqual,contains_t0,true)
-contains_t1 = $(call contains,a b b c,b)
-$(call AssertEqual,contains_t1,true)
-contains_t2 = $(call contains,a b b c,c)
-$(call AssertEqual,contains_t2,true)
-contains_t3 = $(call contains,a b b c,d)
-$(call AssertEqual,contains_t3,)
-
-isdefined_t0_defined_var := 0
-isdefined_t0 = $(call IsDefined,isdefined_t0_defined_var)
-$(call AssertEqual,isdefined_t0,true)
-isdefined_t1 = $(call IsDefined,isdefined_t1_never_defined_var)
-$(call AssertEqual,isdefined_t1,)
-
-varordefault_t0_var := 1
-varordefault_t0 = $(call VarOrDefault,varordefault_t0_var.opt,$(varordefault_t0_var))
-$(call AssertEqual,varordefault_t0,1)
-varordefault_t1_var := 1
-varordefault_t1_var.opt := 2
-varordefault_t1 = $(call VarOrDefault,varordefault_t1_var.opt,$(varordefault_t1_var))
-$(call AssertEqual,varordefault_t1,2)
-
-$(call CopyVariable,copyvariable_t0_src,copyvariable_t0_dst)
-copyvariable_t0 = $(call IsUndefined,copyvariable_t0_dst)
-$(call AssertEqual,copyvariable_t0,true)
-copyvariable_t1_src = 1
-$(call CopyVariable,copyvariable_t1_src,copyvariable_t1)
-$(call AssertEqual,copyvariable_t1,1)
-
-all:
-	@true
-.PHONY: all
-
diff --git a/make/util.mk b/make/util.mk
deleted file mode 100644
index 0687755..0000000
--- a/make/util.mk
+++ /dev/null
@@ -1,114 +0,0 @@
-# Generic Makefile Utilities
-
-###
-# Utility functions
-
-# Function: streq LHS RHS
-#
-# Return "true" if LHS == RHS, otherwise "".
-#
-# LHS == RHS <=> (LHS subst RHS is empty) and (RHS subst LHS is empty)
-streq = $(if $(1),$(if $(subst $(1),,$(2))$(subst $(2),,$(1)),,true),$(if $(2),,true))
-
-# Function: strneq LHS RHS
-#
-# Return "true" if LHS != RHS, otherwise "".
-strneq = $(if $(call streq,$(1),$(2)),,true)
-
-# Function: contains list item
-#
-# Return "true" if 'list' contains the value 'item'.
-contains = $(if $(strip $(foreach i,$(1),$(if $(call streq,$(2),$(i)),T,))),true,)
-
-# Function: is_subset a b
-# Return "true" if 'a' is a subset of 'b'.
-is_subset = $(if $(strip $(set_difference $(1),$(2))),,true)
-
-# Function: set_difference a b
-# Return a - b.
-set_difference = $(foreach i,$(1),$(if $(call contains,$(2),$(i)),,$(i)))
-
-# Function: Set variable value
-#
-# Set the given make variable to the given value.
-Set = $(eval $(1) := $(2))
-
-# Function: Append variable value
-#
-# Append the given value to the given make variable.
-Append = $(eval $(1) += $(2))
-
-# Function: IsDefined variable
-#
-# Check whether the given variable is defined.
-IsDefined = $(call strneq,undefined,$(flavor $(1)))
-
-# Function: IsUndefined variable
-#
-# Check whether the given variable is undefined.
-IsUndefined = $(call streq,undefined,$(flavor $(1)))
-
-# Function: VarOrDefault variable default-value
-#
-# Get the value of the given make variable, or the default-value if the variable
-# is undefined.
-VarOrDefault = $(if $(call IsDefined,$(1)),$($(1)),$(2))
-
-# Function: CheckValue variable
-#
-# Print the name, definition, and value of a variable, for testing make
-# utilities.
-#
-# Example:
-#   foo = $(call streq,a,a)
-#   $(call CheckValue,foo)
-# Example Output:
-#   CHECKVALUE: foo: $(call streq,,) - true
-CheckValue = $(info CHECKVALUE: $(1): $(value $(1)) - $($(1)))
-
-# Function: CopyVariable src dst
-#
-# Copy the value of the variable 'src' to 'dst', taking care to not define 'dst'
-# if 'src' is undefined. The destination variable must be undefined.
-CopyVariable = \
-  $(call AssertValue,$(call IsUndefined,$(2)),destination is already defined)\
-  $(if $(call IsUndefined,$(1)),,\
-       $(call Set,$(2),$($(1))))
-
-# Function: Assert value message
-#
-# Check that a value is true, or give an error including the given message
-Assert = $(if $(1),,\
-           $(error Assertion failed: $(2)))
-
-# Function: AssertEqual variable expected-value
-#
-# Check that the value of a variable is 'expected-value'.
-AssertEqual = \
-  $(if $(call streq,$($(1)),$(2)),,\
-       $(error Assertion failed: $(1): $(value $(1)) - $($(1)) != $(2)))
-
-# Function: CheckCommandLineOverrides list
-#
-# Check that all command line variables are in the given list. This routine is
-# useful for validating that users aren't trying to override something which
-# will not work.
-CheckCommandLineOverrides = \
-  $(foreach arg,$(MAKEOVERRIDES),\
-    $(call Set,varname,$(firstword $(subst =, ,$(arg)))) \
-    $(if $(call contains,$(1),$(varname)),,\
-      $(error "Invalid command line override: $(1) $(varname) (not supported)")))
-
-###
-# Clean up make behavior
-
-# Cancel all suffix rules. We don't want no stinking suffix rules.
-.SUFFIXES:
-
-###
-# Debugging
-
-# General debugging rule, use 'make print-XXX' to print the definition, value
-# and origin of XXX.
-make-print-%:
-	$(error PRINT: $(value $*) = "$($*)" (from $(origin $*)))
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 32851b8..9b9c515 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -23,7 +23,7 @@
     # Use LLVM utils and Clang from the same build tree.
     list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS
       clang clang-headers FileCheck count not llvm-config llvm-nm llvm-objdump
-      llvm-symbolizer compiler-rt-headers sancov)
+      llvm-readobj llvm-symbolizer compiler-rt-headers sancov)
     if (COMPILER_RT_HAS_PROFILE)
       list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS profile)
     endif()
@@ -89,9 +89,13 @@
   # introduce a rule to run to run all of them.
   get_property(LLVM_LIT_TESTSUITES GLOBAL PROPERTY LLVM_LIT_TESTSUITES)
   get_property(LLVM_LIT_DEPENDS GLOBAL PROPERTY LLVM_LIT_DEPENDS)
-  add_lit_target(check-all
+  add_lit_target(check-compiler-rt
     "Running all regression tests"
     ${LLVM_LIT_TESTSUITES}
     DEPENDS ${LLVM_LIT_DEPENDS})
+  if(NOT TARGET check-all)
+    add_custom_target(check-all)
+  endif()
   add_custom_target(compiler-rt-test-depends DEPENDS ${LLVM_LIT_DEPENDS})
+  add_dependencies(check-all check-compiler-rt)
 endif()
diff --git a/test/asan/CMakeLists.txt b/test/asan/CMakeLists.txt
index 8b64159..637c5b8 100644
--- a/test/asan/CMakeLists.txt
+++ b/test/asan/CMakeLists.txt
@@ -92,15 +92,9 @@
   endif()
 endif()
 
-set(LIT_ARGS)
-if(ANDROID)
-  set(LIT_ARGS -j5)
-endif()
-
 add_lit_testsuite(check-asan "Running the AddressSanitizer tests"
   ${ASAN_TESTSUITES}
-  DEPENDS ${ASAN_TEST_DEPS}
-  ARGS ${LIT_ARGS})
+  DEPENDS ${ASAN_TEST_DEPS})
 set_target_properties(check-asan PROPERTIES FOLDER "Compiler-RT Misc")
 
 if(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME)
diff --git a/test/asan/TestCases/Linux/abort_on_error.cc b/test/asan/TestCases/Linux/abort_on_error.cc
index 67fa9b8..3f70613 100644
--- a/test/asan/TestCases/Linux/abort_on_error.cc
+++ b/test/asan/TestCases/Linux/abort_on_error.cc
@@ -9,6 +9,8 @@
 // lit doesn't set ASAN_OPTIONS anyway.
 // RUN: not %run %t 2>&1 | FileCheck %s
 
+// UNSUPPORTED: android
+
 #include <stdlib.h>
 int main() {
   char *x = (char*)malloc(10 * sizeof(char));
diff --git a/test/asan/TestCases/Linux/auto_memory_profile_test.cc b/test/asan/TestCases/Linux/auto_memory_profile_test.cc
new file mode 100644
index 0000000..3f8ad46
--- /dev/null
+++ b/test/asan/TestCases/Linux/auto_memory_profile_test.cc
@@ -0,0 +1,32 @@
+// Tests heap_profile=1.
+// Printing memory profiling only works in the configuration where we can
+// detect leaks.
+// REQUIRES: leak-detection
+//
+// RUN: %clangxx_asan %s -o %t
+// RUN: %env_asan_opts=heap_profile=1 %run %t 2>&1 | FileCheck %s
+#include <sanitizer/common_interface_defs.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+char *sink[1000];
+
+int main() {
+
+  for (int i = 0; i < 3; i++) {
+    const size_t  kSize = 13000000;
+    char *x = new char[kSize];
+    memset(x, 0, kSize);
+    sink[i] = x;
+    sleep(1);
+  }
+}
+
+// CHECK: HEAP PROFILE at RSS
+// CHECK: 13000000 byte(s)
+// CHECK: HEAP PROFILE at RSS
+// CHECK: 26000000 byte(s)
+// CHECK: HEAP PROFILE at RSS
+// CHECK: 39000000 byte(s)
diff --git a/test/asan/TestCases/Linux/coverage_html_report.cc b/test/asan/TestCases/Linux/coverage_html_report.cc
deleted file mode 100644
index 78fbfb3..0000000
--- a/test/asan/TestCases/Linux/coverage_html_report.cc
+++ /dev/null
@@ -1,24 +0,0 @@
-// REQUIRES: has_sancovcc, x86_64-linux, asan-dynamic-runtime
-// RUN: %clangxx_asan_static -fsanitize-coverage=func %s -o %t
-// RUN: rm -rf %T/coverage_html_report
-// RUN: mkdir -p %T/coverage_html_report
-// RUN: cd %T/coverage_html_report
-// RUN: %env_asan_opts=coverage=1:verbosity=1:html_cov_report=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-main
-// RUN: ls *.html | FileCheck %s --check-prefix=CHECK-ls
-// RUN: rm -r %T/coverage_html_report
-
-#include <stdio.h>
-#include <unistd.h>
-
-void bar() { printf("bar\n"); }
-
-int main(int argc, char **argv) {
-  fprintf(stderr, "PID: %d\n", getpid());
-  bar();
-  return 0;
-}
-
-// CHECK-main: PID: [[PID:[0-9]+]]
-// CHECK-main: [[PID]].sancov: 2 PCs written
-// CHECK-main: coverage report generated to ./coverage_html_report.cc.tmp.[[PID]].html
-// CHECK-ls: coverage_html_report.cc.tmp.{{[0-9]+}}.html
diff --git a/test/asan/TestCases/Linux/cuda_test.cc b/test/asan/TestCases/Linux/cuda_test.cc
new file mode 100644
index 0000000..e87f56b
--- /dev/null
+++ b/test/asan/TestCases/Linux/cuda_test.cc
@@ -0,0 +1,37 @@
+// Emulate the behavior of the NVIDIA CUDA driver
+// that mmaps memory inside the asan's shadow gap.
+//
+// REQUIRES: x86_64-target-arch
+//
+// RUN: %clangxx_asan %s -o %t
+// RUN: not %env_asan_opts=protect_shadow_gap=1 %t 2>&1 | FileCheck %s  --check-prefix=CHECK-PROTECT1
+// RUN: not                                     %t 2>&1 | FileCheck %s  --check-prefix=CHECK-PROTECT1
+// RUN: not %env_asan_opts=protect_shadow_gap=0 %t 2>&1 | FileCheck %s  --check-prefix=CHECK-PROTECT0
+#include <assert.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdint.h>
+
+#include "sanitizer/asan_interface.h"
+
+int main(void) {
+  uintptr_t Base = 0x200000000;
+  uintptr_t Size = 0x1100000000;
+  void *addr =
+      mmap((void *)Base, Size, PROT_READ | PROT_WRITE,
+           MAP_NORESERVE | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0, 0);
+  assert(addr == (void*)Base);
+  // Make sure we can access memory in shadow gap.
+  // W/o protect_shadow_gap=0 we should fail here.
+  for (uintptr_t Addr = Base; Addr < Base + Size; Addr += Size / 100)
+    *(char*)Addr = 1;
+  // CHECK-PROTECT1: AddressSanitizer: SEGV on unknown address 0x0000bfff8000
+
+  // Poison a part of gap's shadow:
+  __asan_poison_memory_region((void*)Base, 4096);
+  // Now we should fail with use-after-poison.
+  *(char*)(Base + 1234) = 1;
+  // CHECK-PROTECT0: AddressSanitizer: use-after-poison on address 0x0002000004d2
+}
+
+
diff --git a/test/asan/TestCases/Linux/malloc_delete_mismatch.cc b/test/asan/TestCases/Linux/malloc_delete_mismatch.cc
index 66eed33..50d920e 100644
--- a/test/asan/TestCases/Linux/malloc_delete_mismatch.cc
+++ b/test/asan/TestCases/Linux/malloc_delete_mismatch.cc
@@ -12,8 +12,7 @@
 // Also works if no malloc context is available.
 // RUN: %env_asan_opts=alloc_dealloc_mismatch=1:malloc_context_size=0:fast_unwind_on_malloc=0 not %run %t 2>&1 | FileCheck %s
 // RUN: %env_asan_opts=alloc_dealloc_mismatch=1:malloc_context_size=0:fast_unwind_on_malloc=1 not %run %t 2>&1 | FileCheck %s
-// XFAIL: arm-linux-gnueabi
-// XFAIL: armv7l-unknown-linux-gnueabihf
+// REQUIRES: stable-runtime
 #include <stdlib.h>
 
 static volatile char *x;
diff --git a/test/asan/TestCases/Linux/new_delete_mismatch.cc b/test/asan/TestCases/Linux/new_delete_mismatch.cc
index 1cfc0ef..3a71862 100644
--- a/test/asan/TestCases/Linux/new_delete_mismatch.cc
+++ b/test/asan/TestCases/Linux/new_delete_mismatch.cc
@@ -1,8 +1,8 @@
 // Check that we report new[] vs delete as alloc-dealloc-mismatch and not as
 // new-delete-type-mismatch when -fsized-deallocation is enabled.
 
-// RUN: %clangxx_asan -g %s -o %t && not %run %t |& FileCheck %s
-// RUN: %clangxx_asan -fsized-deallocation -g %s -o %t && not %run %t |& FileCheck %s
+// RUN: %clangxx_asan -g %s -o %t && %env_asan_opts=alloc_dealloc_mismatch=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -fsized-deallocation -g %s -o %t && %env_asan_opts=alloc_dealloc_mismatch=1 not %run %t 2>&1 | FileCheck %s
 
 #include <stdlib.h>
 
diff --git a/test/asan/TestCases/Linux/print_memory_profile_test.cc b/test/asan/TestCases/Linux/print_memory_profile_test.cc
index d30dbea..8909cca 100644
--- a/test/asan/TestCases/Linux/print_memory_profile_test.cc
+++ b/test/asan/TestCases/Linux/print_memory_profile_test.cc
@@ -3,27 +3,31 @@
 // REQUIRES: leak-detection
 //
 // RUN: %clangxx_asan %s -o %t
-// RUN: %run %t 2>&1 | FileCheck %s
+// RUN: %run %t 100 2>&1 | FileCheck %s --check-prefix=CHECK-100
+// RUN: %run %t 50 2>&1 | FileCheck %s --check-prefix=CHECK-50
 #include <sanitizer/common_interface_defs.h>
 
 #include <stdio.h>
+#include <stdlib.h>
 
 char *sink[1000];
 
-int main() {
+int main(int argc, char **argv) {
+  if (argc < 2)
+    return 1;
+
   int idx = 0;
   for (int i = 0; i < 17; i++)
     sink[idx++] = new char[131000];
   for (int i = 0; i < 28; i++)
     sink[idx++] = new char[24000];
 
-  __sanitizer_print_memory_profile(100);
-  __sanitizer_print_memory_profile(50);
+  __sanitizer_print_memory_profile(atoi(argv[1]));
 }
 
-// CHECK: Live Heap Allocations: {{.*}}; showing top 100%
-// CHECK: 2227000 byte(s) ({{.*}}%) in 17 allocation(s)
-// CHECK: 672000 byte(s) ({{.*}}%) in 28 allocation(s)
-// CHECK: Live Heap Allocations: {{.*}}; showing top 50%
-// CHECK: 2227000 byte(s) ({{.*}}%) in 17 allocation(s)
-// CHECK-NOT: 1008 byte
+// CHECK-100: Live Heap Allocations: {{.*}}; showing top 100%
+// CHECK-100: 2227000 byte(s) ({{.*}}%) in 17 allocation(s)
+// CHECK-100: 672000 byte(s) ({{.*}}%) in 28 allocation(s)
+// CHECK-50: Live Heap Allocations: {{.*}}; showing top 50%
+// CHECK-50: 2227000 byte(s) ({{.*}}%) in 17 allocation(s)
+// CHECK-50-NOT: allocation
diff --git a/test/asan/TestCases/Linux/pthread_create_from_constructor.cc b/test/asan/TestCases/Linux/pthread_create_from_constructor.cc
new file mode 100644
index 0000000..8f9b0b4
--- /dev/null
+++ b/test/asan/TestCases/Linux/pthread_create_from_constructor.cc
@@ -0,0 +1,49 @@
+// Test that ASan doesn't deadlock in __interceptor_pthread_create called
+// from dlopened shared library constructor. The deadlock happens only in shared
+// ASan runtime with recent Glibc (2.23 fits) when __interceptor_pthread_create
+// grabs global Glibc's GL(dl_load_lock) and waits for tls_get_addr_tail that
+// also tries to acquire it.
+//
+// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t-so.so
+// RUN: %clangxx_asan %s -o %t
+// RUN: %run %t 2>&1
+
+// dlopen() can not be intercepted on Android
+// UNSUPPORTED: android
+// REQUIRES: x86-target-arch
+
+#ifdef BUILD_SO
+
+#include <stdio.h>
+#include <pthread.h>
+#include <unistd.h>
+
+void *threadFn(void *) {
+  fprintf(stderr, "thread started\n");
+  while (true) {
+    usleep(100000);
+  }
+  return 0;
+}
+
+void __attribute__((constructor)) startPolling() {
+  fprintf(stderr, "initializing library\n");
+  pthread_t t;
+  pthread_create(&t, 0, &threadFn, 0);
+  fprintf(stderr, "done\n");
+}
+
+#else
+
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <string>
+
+int main(int argc, char **argv) {
+  std::string path = std::string(argv[0]) + "-so.so";
+  void *handle = dlopen(path.c_str(), RTLD_LAZY);
+  if (!handle)
+    abort();
+  return 0;
+}
+#endif
diff --git a/test/asan/TestCases/Linux/quarantine_size_mb.cc b/test/asan/TestCases/Linux/quarantine_size_mb.cc
index cbacec2..239eeab 100644
--- a/test/asan/TestCases/Linux/quarantine_size_mb.cc
+++ b/test/asan/TestCases/Linux/quarantine_size_mb.cc
@@ -4,7 +4,7 @@
 // RUN: %env_asan_opts=quarantine_size_mb=10:verbosity=1:hard_rss_limit_mb=50    %run %t  2>&1 | FileCheck %s  --check-prefix=Q10
 // RUN: %env_asan_opts=quarantine_size_mb=10:quarantine_size=20:verbosity=1  not %run %t  2>&1 | FileCheck %s  --check-prefix=BOTH
 // RUN: %env_asan_opts=quarantine_size_mb=1000:hard_rss_limit_mb=50 not  %run %t          2>&1 | FileCheck %s  --check-prefix=RSS_LIMIT
-// RUN: %env_asan_opts=hard_rss_limit_mb=50                         not  %run %t          2>&1 | FileCheck %s  --check-prefix=RSS_LIMIT
+// RUN: %env_asan_opts=hard_rss_limit_mb=20                         not  %run %t          2>&1 | FileCheck %s  --check-prefix=RSS_LIMIT
 #include <string.h>
 char *g;
 
diff --git a/test/asan/TestCases/Linux/release_to_os_test.cc b/test/asan/TestCases/Linux/release_to_os_test.cc
new file mode 100644
index 0000000..2640216
--- /dev/null
+++ b/test/asan/TestCases/Linux/release_to_os_test.cc
@@ -0,0 +1,46 @@
+// Tests ASAN_OPTIONS=allocator_release_to_os=1
+//
+
+// RUN: %clangxx_asan -std=c++11 %s -o %t
+// RUN: %env_asan_opts=allocator_release_to_os_interval_ms=0 %run %t 2>&1 | FileCheck %s --check-prefix=RELEASE
+// RUN: %env_asan_opts=allocator_release_to_os_interval_ms=-1 %run %t 2>&1 | FileCheck %s --check-prefix=NO_RELEASE
+//
+// REQUIRES: x86_64-target-arch
+#include <stdlib.h>
+#include <stdio.h>
+#include <algorithm>
+#include <stdint.h>
+#include <assert.h>
+
+#include <sanitizer/asan_interface.h>
+
+void MallocReleaseStress() {
+  const size_t kNumChunks = 10000;
+  const size_t kAllocSize = 100;
+  const size_t kNumIter = 100;
+  uintptr_t *chunks[kNumChunks] = {0};
+
+  for (size_t iter = 0; iter < kNumIter; iter++) {
+    std::random_shuffle(chunks, chunks + kNumChunks);
+    size_t to_replace = rand() % kNumChunks;
+    for (size_t i = 0; i < kNumChunks; i++) {
+      if (chunks[i])
+        assert(chunks[i][0] == (uintptr_t)chunks[i]);
+      if (i < to_replace) {
+        delete [] chunks[i];
+        chunks[i] = new uintptr_t[kAllocSize];
+        chunks[i][0] = (uintptr_t)chunks[i];
+      }
+    }
+  }
+  for (auto p : chunks)
+    delete[] p;
+}
+
+int main() {
+  MallocReleaseStress();
+  __asan_print_accumulated_stats();
+}
+
+// RELEASE: mapped:{{.*}}releases: {{[1-9]}}
+// NO_RELEASE: mapped:{{.*}}releases: 0
diff --git a/test/asan/TestCases/Linux/stack-trace-dlclose.cc b/test/asan/TestCases/Linux/stack-trace-dlclose.cc
index 49c2089..e604f1e 100644
--- a/test/asan/TestCases/Linux/stack-trace-dlclose.cc
+++ b/test/asan/TestCases/Linux/stack-trace-dlclose.cc
@@ -4,8 +4,7 @@
 // RUN: %clangxx_asan -DSHARED %s -shared -o %T/stack_trace_dlclose.so -fPIC
 // RUN: %clangxx_asan -DSO_DIR=\"%T\" %s %libdl -o %t
 // RUN: %env_asan_opts=exitcode=0 %run %t 2>&1 | FileCheck %s
-// XFAIL: arm-linux-gnueabi
-// XFAIL: armv7l-unknown-linux-gnueabihf
+// REQUIRES: stable-runtime
 
 #include <assert.h>
 #include <dlfcn.h>
diff --git a/test/asan/TestCases/Linux/swapcontext_annotation.cc b/test/asan/TestCases/Linux/swapcontext_annotation.cc
index 90aabae..56e8119 100644
--- a/test/asan/TestCases/Linux/swapcontext_annotation.cc
+++ b/test/asan/TestCases/Linux/swapcontext_annotation.cc
@@ -1,12 +1,17 @@
 // Check that ASan plays well with annotated makecontext/swapcontext.
 
-// RUN: %clangxx_asan -lpthread -O0 %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -lpthread -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -lpthread -O2 %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -lpthread -O3 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -std=c++11 -lpthread -O0 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -std=c++11 -lpthread -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -std=c++11 -lpthread -O2 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -std=c++11 -lpthread -O3 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -std=c++11 -lpthread -O0 %s -o %t && %run %t 2>&1 | FileCheck <( seq 60 | xargs -i -- grep LOOPCHECK %s ) --check-prefix LOOPCHECK
+// RUN: %clangxx_asan -std=c++11 -lpthread -O1 %s -o %t && %run %t 2>&1 | FileCheck <( seq 60 | xargs -i -- grep LOOPCHECK %s ) --check-prefix LOOPCHECK
+// RUN: %clangxx_asan -std=c++11 -lpthread -O2 %s -o %t && %run %t 2>&1 | FileCheck <( seq 60 | xargs -i -- grep LOOPCHECK %s ) --check-prefix LOOPCHECK
+// RUN: %clangxx_asan -std=c++11 -lpthread -O3 %s -o %t && %run %t 2>&1 | FileCheck <( seq 60 | xargs -i -- grep LOOPCHECK %s ) --check-prefix LOOPCHECK
+
 //
 // This test is too subtle to try on non-x86 arch for now.
-// REQUIRES: x86_64-supported-target,i386-supported-target
+// REQUIRES: x86-target-arch
 
 #include <pthread.h>
 #include <setjmp.h>
@@ -25,9 +30,12 @@
 
 const int kStackSize = 1 << 20;
 
-void *main_thread_stack;
+const void *main_thread_stack;
 size_t main_thread_stacksize;
 
+const void *from_stack;
+size_t from_stacksize;
+
 __attribute__((noinline, noreturn)) void LongJump(jmp_buf env) {
   longjmp(env, 1);
   _exit(1);
@@ -44,14 +52,18 @@
 
 void NextChild() {
   CallNoReturn();
-  __sanitizer_finish_switch_fiber();
+  __sanitizer_finish_switch_fiber(nullptr, &from_stack, &from_stacksize);
+
+  printf("NextChild from: %p %zu\n", from_stack, from_stacksize);
 
   char x[32] = {0};  // Stack gets poisoned.
   printf("NextChild: %p\n", x);
 
   CallNoReturn();
 
-  __sanitizer_start_switch_fiber(main_thread_stack, main_thread_stacksize);
+  __sanitizer_start_switch_fiber(nullptr,
+                                 main_thread_stack,
+                                 main_thread_stacksize);
   CallNoReturn();
   if (swapcontext(&next_child_context, &orig_context) < 0) {
     perror("swapcontext");
@@ -61,7 +73,9 @@
 
 void Child(int mode) {
   CallNoReturn();
-  __sanitizer_finish_switch_fiber();
+  __sanitizer_finish_switch_fiber(nullptr,
+                                  &main_thread_stack,
+                                  &main_thread_stacksize);
   char x[32] = {0};  // Stack gets poisoned.
   printf("Child: %p\n", x);
   CallNoReturn();
@@ -70,21 +84,28 @@
   //     something.
   // (c) Jump to another function which will then jump back to the main function
   if (mode == 0) {
-    __sanitizer_start_switch_fiber(main_thread_stack, main_thread_stacksize);
+    __sanitizer_start_switch_fiber(nullptr,
+                                   main_thread_stack,
+                                   main_thread_stacksize);
     CallNoReturn();
   } else if (mode == 1) {
-    __sanitizer_start_switch_fiber(main_thread_stack, main_thread_stacksize);
+    __sanitizer_start_switch_fiber(nullptr,
+                                   main_thread_stack,
+                                   main_thread_stacksize);
     CallNoReturn();
     if (swapcontext(&child_context, &orig_context) < 0) {
       perror("swapcontext");
       _exit(1);
     }
   } else if (mode == 2) {
+    printf("NextChild stack: %p\n", next_child_stack);
+
     getcontext(&next_child_context);
     next_child_context.uc_stack.ss_sp = next_child_stack;
     next_child_context.uc_stack.ss_size = kStackSize / 2;
     makecontext(&next_child_context, (void (*)())NextChild, 0);
-    __sanitizer_start_switch_fiber(next_child_context.uc_stack.ss_sp,
+    __sanitizer_start_switch_fiber(nullptr,
+                                   next_child_context.uc_stack.ss_sp,
                                    next_child_context.uc_stack.ss_size);
     CallNoReturn();
     if (swapcontext(&child_context, &next_child_context) < 0) {
@@ -105,7 +126,9 @@
   }
   makecontext(&child_context, (void (*)())Child, 1, mode);
   CallNoReturn();
-  __sanitizer_start_switch_fiber(child_context.uc_stack.ss_sp,
+  void* fake_stack_save;
+  __sanitizer_start_switch_fiber(&fake_stack_save,
+                                 child_context.uc_stack.ss_sp,
                                  child_context.uc_stack.ss_size);
   CallNoReturn();
   if (swapcontext(&orig_context, &child_context) < 0) {
@@ -113,8 +136,11 @@
     _exit(1);
   }
   CallNoReturn();
-  __sanitizer_finish_switch_fiber();
+  __sanitizer_finish_switch_fiber(fake_stack_save,
+                                  &from_stack,
+                                  &from_stacksize);
   CallNoReturn();
+  printf("Main context from: %p %zu\n", from_stack, from_stacksize);
 
   // Touch childs's stack to make sure it's unpoisoned.
   for (int i = 0; i < kStackSize; i++) {
@@ -125,17 +151,7 @@
 
 void handler(int sig) { CallNoReturn(); }
 
-void InitStackBounds() {
-  pthread_attr_t attr;
-  pthread_attr_init(&attr);
-  pthread_getattr_np(pthread_self(), &attr);
-  pthread_attr_getstack(&attr, &main_thread_stack, &main_thread_stacksize);
-  pthread_attr_destroy(&attr);
-}
-
 int main(int argc, char **argv) {
-  InitStackBounds();
-
   // set up a signal that will spam and trigger __asan_handle_no_return at
   // tricky moments
   struct sigaction act = {};
@@ -162,12 +178,22 @@
   // CHECK-NOT: ASan is ignoring requested __asan_handle_no_return
   for (unsigned int i = 0; i < 30; ++i) {
     ret += Run(argc - 1, 0, stack);
+    // LOOPCHECK: Child stack: [[CHILD_STACK:0x[0-9a-f]*]]
+    // LOOPCHECK: Main context from: [[CHILD_STACK]] 524288
     ret += Run(argc - 1, 1, stack);
+    // LOOPCHECK: Child stack: [[CHILD_STACK:0x[0-9a-f]*]]
+    // LOOPCHECK: Main context from: [[CHILD_STACK]] 524288
     ret += Run(argc - 1, 2, stack);
+    // LOOPCHECK: Child stack: [[CHILD_STACK:0x[0-9a-f]*]]
+    // LOOPCHECK: NextChild stack: [[NEXT_CHILD_STACK:0x[0-9a-f]*]]
+    // LOOPCHECK: NextChild from: [[CHILD_STACK]] 524288
+    // LOOPCHECK: Main context from: [[NEXT_CHILD_STACK]] 524288
     ret += Run(argc - 1, 0, heap);
     ret += Run(argc - 1, 1, heap);
     ret += Run(argc - 1, 2, heap);
+    printf("Iteration %d passed\n", i);
   }
+
   // CHECK: Test passed
   printf("Test passed\n");
 
diff --git a/test/asan/TestCases/Linux/thread_local_quarantine_size_kb.cc b/test/asan/TestCases/Linux/thread_local_quarantine_size_kb.cc
new file mode 100644
index 0000000..24022a1
--- /dev/null
+++ b/test/asan/TestCases/Linux/thread_local_quarantine_size_kb.cc
@@ -0,0 +1,43 @@
+// Test thread_local_quarantine_size_kb
+
+// RUN: %clangxx_asan  %s -o %t
+// RUN: %env_asan_opts=thread_local_quarantine_size_kb=256:verbosity=1 %run %t 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=CHECK-VALUE
+// RUN: %env_asan_opts=thread_local_quarantine_size_kb=64:quarantine_size_mb=64 %run %t 2>&1 | \
+// RUN:   FileCheck %s --allow-empty --check-prefix=CHECK-SMALL-LOCAL-CACHE-SMALL-OVERHEAD
+// RUN: %env_asan_opts=thread_local_quarantine_size_kb=0:quarantine_size_mb=0 %run %t 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=CHECK-QUARANTINE-DISABLED
+// RUN: %env_asan_opts=thread_local_quarantine_size_kb=0:quarantine_size_mb=64 not %run %t 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=CHECK-FOR-PARAMETER-ERROR
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sanitizer/allocator_interface.h>
+
+// The idea is allocate a lot of small blocks, totaling 5Mb of user memory
+// total, and verify that quarantine does not incur too much memory overhead.
+// There's always an overhead for red zones, shadow memory and such, but
+// quarantine accounting should not significantly contribute to that.
+static const int kNumAllocs = 20000;
+static const int kAllocSize = 256;
+static const size_t kHeapSizeLimit = 12 << 20;
+
+int main() {
+  size_t old_heap_size = __sanitizer_get_heap_size();
+  for (int i = 0; i < kNumAllocs; i++) {
+    char *g = new char[kAllocSize];
+    memset(g, -1, kAllocSize);
+    delete [] (g);
+  }
+  size_t new_heap_size = __sanitizer_get_heap_size();
+  fprintf(stderr, "heap size: new: %zd old: %zd\n", new_heap_size,
+          old_heap_size);
+  if (new_heap_size - old_heap_size > kHeapSizeLimit)
+    fprintf(stderr, "Heap size limit exceeded");
+}
+
+// CHECK-VALUE: thread_local_quarantine_size_kb=256K
+// CHECK-SMALL-LOCAL-CACHE-SMALL-OVERHEAD-NOT: Heap size limit exceeded
+// CHECK-QUARANTINE-DISABLED-NOT: Heap size limit exceeded
+// CHECK-FOR-PARAMETER-ERROR: thread_local_quarantine_size_kb can be set to 0 only when quarantine_size_mb is set to 0
diff --git a/test/asan/TestCases/Posix/coverage-fork.cc b/test/asan/TestCases/Posix/coverage-fork.cc
index 799d716..40ce72e 100644
--- a/test/asan/TestCases/Posix/coverage-fork.cc
+++ b/test/asan/TestCases/Posix/coverage-fork.cc
@@ -3,7 +3,7 @@
 // RUN: mkdir -p %T/coverage-fork && cd %T/coverage-fork
 // RUN: %env_asan_opts=coverage=1:coverage_direct=0:verbosity=1 %run %t 2>&1 | FileCheck %s
 //
-// XFAIL: android
+// UNSUPPORTED: android
 
 #include <stdio.h>
 #include <string.h>
diff --git a/test/asan/TestCases/Posix/start-deactivated.cc b/test/asan/TestCases/Posix/start-deactivated.cc
index 187ee5e..b223f04 100644
--- a/test/asan/TestCases/Posix/start-deactivated.cc
+++ b/test/asan/TestCases/Posix/start-deactivated.cc
@@ -2,8 +2,8 @@
 // Main executable is uninstrumented, but linked to ASan runtime. The shared
 // library is instrumented. Memory errors before dlopen are not detected.
 
-// RUN: %clangxx_asan -O0 -DSHARED_LIB %s -fPIC -shared -o %t-so.so
-// RUN: %clangxx -O0 %s -c -o %t.o
+// RUN: %clangxx_asan -O0 -DSHARED_LIB %s -std=c++11 -fPIC -shared -o %t-so.so
+// RUN: %clangxx -O0 %s -std=c++11 -c -o %t.o
 // RUN: %clangxx_asan -O0 %t.o %libdl -o %t
 // RUN: %env_asan_opts=start_deactivated=1,allocator_may_return_null=0 \
 // RUN:   ASAN_ACTIVATION_OPTIONS=allocator_may_return_null=1 not %run %t 2>&1 | FileCheck %s
@@ -21,6 +21,7 @@
 // XFAIL: arm-linux-gnueabi
 
 #if !defined(SHARED_LIB)
+
 #include <assert.h>
 #include <dlfcn.h>
 #include <stdio.h>
@@ -32,18 +33,42 @@
 
 #include "sanitizer/asan_interface.h"
 
-void test_malloc_shadow() {
-  char *p = (char *)malloc(100);
-  char *q = (char *)__asan_region_is_poisoned(p + 95, 8);
-  fprintf(stderr, "=%zd=\n", q ? q - (p + 95) : -1);
-  free(p);
+void test_malloc_shadow(char *p, size_t sz, bool expect_redzones) {
+  // Last byte of the left redzone, if present.
+  assert((char *)__asan_region_is_poisoned(p - 1, sz + 1) ==
+         (expect_redzones ? p - 1 : nullptr));
+  // The user memory.
+  assert((char *)__asan_region_is_poisoned(p, sz) == nullptr);
+  // First byte of the right redzone, if present.
+  assert((char *)__asan_region_is_poisoned(p, sz + 1) ==
+         (expect_redzones ? p + sz : nullptr));
 }
 
 typedef void (*Fn)();
 
 int main(int argc, char *argv[]) {
-  test_malloc_shadow();
-  // CHECK: =-1=
+  constexpr unsigned nPtrs = 200;
+  char *ptrs[nPtrs];
+
+  // Before activation: no redzones.
+  for (size_t sz = 1; sz < nPtrs; ++sz) {
+    ptrs[sz] = (char *)malloc(sz);
+    test_malloc_shadow(ptrs[sz], sz, false);
+  }
+
+  // Create a honey pot for the future, instrumented, allocations. Since the
+  // quarantine is disabled, chunks are going to be recycled right away and
+  // reused for the new allocations. New allocations must get the proper
+  // redzones anyway, whether it's a fresh or reused allocation.
+  constexpr size_t HoneyPotBlockSize = 4096;
+  constexpr int HoneyPotSize = 200;
+  char *honeyPot[HoneyPotSize];
+  for (int i = 1; i < HoneyPotSize; ++i) {
+    honeyPot[i] = (char *)malloc(HoneyPotBlockSize);
+    test_malloc_shadow(honeyPot[i], HoneyPotBlockSize, false);
+  }
+  for (int i = 1; i < HoneyPotSize; ++i)
+    free(honeyPot[i]);
 
   std::string path = std::string(argv[0]) + "-so.so";
   void *dso = dlopen(path.c_str(), RTLD_NOW);
@@ -52,9 +77,6 @@
     return 1;
   }
 
-  test_malloc_shadow();
-  // CHECK: =5=
-
   // After this line ASan is activated and starts detecting errors.
   void *fn = dlsym(dso, "do_another_bad_thing");
   if (!fn) {
@@ -62,6 +84,25 @@
     return 1;
   }
 
+  // After activation: redzones.
+  for (int i = 1; i < HoneyPotSize; ++i) {
+    honeyPot[i] = (char *)malloc(HoneyPotBlockSize);
+    test_malloc_shadow(honeyPot[i], HoneyPotBlockSize, true);
+  }
+  {
+    char *p = (char *)malloc(HoneyPotBlockSize);
+    test_malloc_shadow(p, HoneyPotBlockSize, true);
+    free(p);
+  }
+  for (int i = 1; i < HoneyPotSize; ++i)
+    free(honeyPot[i]);
+
+  // Pre-existing allocations got redzones, too.
+  for (size_t sz = 1; sz < nPtrs; ++sz) {
+    test_malloc_shadow(ptrs[sz], sz, true);
+    free(ptrs[sz]);
+  }
+
   // Test that ASAN_ACTIVATION_OPTIONS=allocator_may_return_null=1 has effect.
   void *p = malloc((unsigned long)-2);
   assert(!p);
@@ -76,7 +117,9 @@
 
   return 0;
 }
+
 #else  // SHARED_LIB
+
 #include <stdio.h>
 #include <stdlib.h>
 
@@ -84,6 +127,7 @@
   char *volatile p = (char *)malloc(100);
   printf("%hhx\n", p[105]);
 }
+
 #endif  // SHARED_LIB
 
 // help=1 in activation flags lists only flags are are supported at activation
diff --git a/test/asan/TestCases/Windows/coverage-dll-stdio.cc b/test/asan/TestCases/Windows/coverage-dll-stdio.cc
new file mode 100644
index 0000000..5e12e38
--- /dev/null
+++ b/test/asan/TestCases/Windows/coverage-dll-stdio.cc
@@ -0,0 +1,16 @@
+// Test that coverage and MSVC CRT stdio work from a DLL. This ensures that the
+// __local_stdio_printf_options function isn't instrumented for coverage.
+
+// RUN: rm -rf %t && mkdir %t && cd %t
+// RUN: %clang_cl_asan -fsanitize-coverage=func -O0 %p/dll_host.cc -Fet.exe
+// RUN: %clang_cl_asan -fsanitize-coverage=func -LD -O0 %s -Fet.dll
+// RUN: %run ./t.exe t.dll 2>&1 | FileCheck %s
+
+#include <stdio.h>
+
+extern "C" __declspec(dllexport)
+int test_function() {
+  printf("hello world\n");
+  // CHECK: hello world
+  return 0;
+}
diff --git a/test/asan/TestCases/Windows/default_options.cc b/test/asan/TestCases/Windows/default_options.cc
deleted file mode 100644
index 6e0a28f..0000000
--- a/test/asan/TestCases/Windows/default_options.cc
+++ /dev/null
@@ -1,18 +0,0 @@
-// RUN: %clangxx_asan -O2 %s -o %t
-// RUN: %run %t 2>&1 | FileCheck %s
-
-// FIXME: merge this with the common default_options test when we can run common
-// tests on Windows.
-
-const char *kAsanDefaultOptions="verbosity=1 help=1";
-
-extern "C"
-__attribute__((no_sanitize_address))
-const char *__asan_default_options() {
-  // CHECK: Available flags for AddressSanitizer:
-  return kAsanDefaultOptions;
-}
-
-int main() {
-  return 0;
-}
diff --git a/test/asan/TestCases/Windows/delay_dbghelp.cc b/test/asan/TestCases/Windows/delay_dbghelp.cc
new file mode 100644
index 0000000..81cd2d3
--- /dev/null
+++ b/test/asan/TestCases/Windows/delay_dbghelp.cc
@@ -0,0 +1,18 @@
+// Build an executable with ASan, then extract the DLLs that it depends on.
+// RUN: %clang_cl_asan %s -Fe%t.exe
+// RUN: llvm-readobj -coff-imports %t.exe | grep Name: | sed -e 's/ *Name: *//' > %t
+//
+// Make sure the binary doesn't depend on dbghelp directly.
+// RUN: not grep dbghelp.dll %t
+//
+// Make sure any clang_rt DLLs it depends on don't depend on dbghelp. In the
+// static build, there won't be any clang_rt DLLs.
+// RUN: not grep cl""ang_rt %t || \
+// RUN:   grep cl""ang_rt %t | xargs which | \
+// RUN:   xargs llvm-readobj -coff-imports | not grep dbghelp.dll %t
+
+extern "C" int puts(const char *);
+
+int main() {
+  puts("main");
+}
diff --git a/test/asan/TestCases/Windows/dll_global_dead_strip.c b/test/asan/TestCases/Windows/dll_global_dead_strip.c
new file mode 100644
index 0000000..2664f5b
--- /dev/null
+++ b/test/asan/TestCases/Windows/dll_global_dead_strip.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+//
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
+// RUN: %env_asan_opts=report_globals=2 %run %t %t.dll 2>&1 | FileCheck %s --check-prefix=NOSTRIP
+// RUN: %clang_cl_asan -LD -O2 %s -Fe%t.dll -link -opt:ref
+// RUN: %env_asan_opts=report_globals=2 %run %t %t.dll 2>&1 | FileCheck %s --check-prefix=STRIP
+
+#include <stdio.h>
+
+int dead_global = 42;
+int live_global = 0;
+
+__declspec(dllexport)
+int test_function() {
+  puts("main");
+  return live_global;
+}
+
+// Check that our global registration scheme works with MSVC's linker dead
+// stripping (/OPT:REF).
+
+// NOSTRIP: Added Global{{.*}}name=dead_global
+// NOSTRIP: Added Global{{.*}}name=live_global
+// NOSTRIP: main
+
+// STRIP-NOT: Added Global{{.*}}name=dead_global
+// STRIP: Added Global{{.*}}name=live_global
+// STRIP: main
diff --git a/test/asan/TestCases/Windows/dll_intercept_memchr.cc b/test/asan/TestCases/Windows/dll_intercept_memchr.cc
index 1435bdc..4f794a2 100644
--- a/test/asan/TestCases/Windows/dll_intercept_memchr.cc
+++ b/test/asan/TestCases/Windows/dll_intercept_memchr.cc
@@ -2,6 +2,12 @@
 // RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
 // RUN: not %run %t %t.dll 2>&1 | FileCheck %s
 
+// On windows 64-bit, the memchr function is written in assembly and is not
+// hookable with the interception library. There is not enough padding before
+// the function and there is a short jump on the second instruction which
+// doesn't not allow enough space to encode a 64-bit indirect jump.
+// UNSUPPORTED: x86_64-windows
+
 #include <string.h>
 
 extern "C" __declspec(dllexport)
diff --git a/test/asan/TestCases/Windows/dll_intercept_memcpy_indirect.cc b/test/asan/TestCases/Windows/dll_intercept_memcpy_indirect.cc
index c5f44df..4e28905 100644
--- a/test/asan/TestCases/Windows/dll_intercept_memcpy_indirect.cc
+++ b/test/asan/TestCases/Windows/dll_intercept_memcpy_indirect.cc
@@ -24,7 +24,7 @@
   call_memcpy(&memcpy, buff2, buff1, 6);
 // CHECK: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
 // CHECK: WRITE of size 6 at [[ADDR]] thread T0
-// CHECK-NEXT:  __asan_{{.*}}memcpy
+// CHECK-NEXT:  __asan_{{.*}}mem{{.*}}
 // CHECK-NEXT:  call_memcpy
 // CHECK-NEXT:  test_function {{.*}}dll_intercept_memcpy_indirect.cc:[[@LINE-5]]
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset {{.*}} in frame
diff --git a/test/asan/TestCases/Windows/free_hook_realloc.cc b/test/asan/TestCases/Windows/free_hook_realloc.cc
index 297218b..11e8c99 100644
--- a/test/asan/TestCases/Windows/free_hook_realloc.cc
+++ b/test/asan/TestCases/Windows/free_hook_realloc.cc
@@ -5,6 +5,9 @@
 // FIXME: merge this with the common free_hook_realloc test when we can run
 // common tests on Windows.
 
+// FIXME: Doesn't work with DLLs
+// XFAIL: win32-dynamic-asan
+
 #include <stdlib.h>
 #include <io.h>
 #include <sanitizer/allocator_interface.h>
diff --git a/test/asan/TestCases/Windows/global_dead_strip.c b/test/asan/TestCases/Windows/global_dead_strip.c
new file mode 100644
index 0000000..e685490
--- /dev/null
+++ b/test/asan/TestCases/Windows/global_dead_strip.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cl_asan /O0 %s /Fe%t.exe
+// RUN: %env_asan_opts=report_globals=2 %t.exe 2>&1 | FileCheck %s --check-prefix=NOSTRIP
+// RUN: %clang_cl_asan /O2 %s /Fe%t.exe -link -opt:ref
+// RUN: %env_asan_opts=report_globals=2 %t.exe 2>&1 | FileCheck %s --check-prefix=STRIP
+
+#include <stdio.h>
+int dead_global = 42;
+int live_global = 0;
+int main() {
+  puts("main");
+  return live_global;
+}
+
+// Check that our global registration scheme works with MSVC's linker dead
+// stripping (/OPT:REF).
+
+// NOSTRIP: Added Global{{.*}}name=dead_global
+// NOSTRIP: Added Global{{.*}}name=live_global
+// NOSTRIP: main
+
+// STRIP-NOT: Added Global{{.*}}name=dead_global
+// STRIP: Added Global{{.*}}name=live_global
+// STRIP: main
diff --git a/test/asan/TestCases/Windows/on_error_callback.cc b/test/asan/TestCases/Windows/on_error_callback.cc
deleted file mode 100644
index 9e690a3..0000000
--- a/test/asan/TestCases/Windows/on_error_callback.cc
+++ /dev/null
@@ -1,20 +0,0 @@
-// RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s
-
-// FIXME: merge this with the common on_error_callback test when we can run
-// common tests on Windows.
-
-#include <stdio.h>
-#include <stdlib.h>
-
-extern "C"
-void __asan_on_error() {
-  fprintf(stderr, "__asan_on_error called");
-  fflush(0);
-}
-
-int main() {
-  char *x = (char*)malloc(10 * sizeof(char));
-  free(x);
-  return x[5];
-  // CHECK: __asan_on_error called
-}
diff --git a/test/asan/TestCases/Windows/report_after_syminitialize.cc b/test/asan/TestCases/Windows/report_after_syminitialize.cc
index c5ac602..eec5029 100644
--- a/test/asan/TestCases/Windows/report_after_syminitialize.cc
+++ b/test/asan/TestCases/Windows/report_after_syminitialize.cc
@@ -4,6 +4,8 @@
 #include <windows.h>
 #include <dbghelp.h>
 
+#pragma comment(lib, "dbghelp")
+
 int main() {
   // Make sure the RTL recovers from "no options enabled" dbghelp setup.
   SymSetOptions(0);
diff --git a/test/asan/TestCases/Windows/tls_init.cc b/test/asan/TestCases/Windows/tls_init.cc
new file mode 100644
index 0000000..c29c4a3
--- /dev/null
+++ b/test/asan/TestCases/Windows/tls_init.cc
@@ -0,0 +1,51 @@
+// RUN: %clang_cl_asan %s -Fe%t.exe
+// RUN: %run %t.exe | FileCheck %s
+
+// CHECK: my_thread_callback
+// CHECK: ran_before_main: 1
+
+#include <windows.h>
+#include <stdio.h>
+#include <string.h>
+
+#pragma comment (lib, "dbghelp")
+
+static bool ran_before_main = false;
+
+extern "C" void __asan_init(void);
+
+static void NTAPI /*__attribute__((no_sanitize_address))*/
+my_thread_callback(PVOID module, DWORD reason, PVOID reserved) {
+  ran_before_main = true;
+  static const char str[] = "my_thread_callback\n";
+
+  // Fail the test if we aren't called for the expected reason or we can't write
+  // stdout.
+  if (reason != DLL_PROCESS_ATTACH)
+    return;
+  HANDLE out = GetStdHandle(STD_OUTPUT_HANDLE);
+  if (!out || out == INVALID_HANDLE_VALUE)
+    return;
+
+  DWORD written = 0;
+  WriteFile(out, &str[0], sizeof(str), &written, NULL);
+}
+
+extern "C" {
+#pragma const_seg(".CRT$XLC")
+extern const PIMAGE_TLS_CALLBACK p_thread_callback;
+const PIMAGE_TLS_CALLBACK p_thread_callback = my_thread_callback;
+#pragma const_seg()
+}
+
+#ifdef _WIN64
+#pragma comment(linker, "/INCLUDE:_tls_used")
+#pragma comment(linker, "/INCLUDE:p_thread_callback")
+#else
+#pragma comment(linker, "/INCLUDE:__tls_used")
+#pragma comment(linker, "/INCLUDE:_p_thread_callback")
+#endif
+
+int main() {
+  printf("ran_before_main: %d\n", ran_before_main);
+}
diff --git a/test/asan/TestCases/atexit_stats.cc b/test/asan/TestCases/atexit_stats.cc
index 42a3fbf..f0b5830 100644
--- a/test/asan/TestCases/atexit_stats.cc
+++ b/test/asan/TestCases/atexit_stats.cc
@@ -2,9 +2,9 @@
 // RUN: %clangxx_asan -O3 %s -o %t
 // RUN: %env_asan_opts=atexit=1:print_stats=1 %run %t 2>&1 | FileCheck %s
 //
-// No atexit output on Android due to
+// No atexit output in older versions of Android due to
 // https://code.google.com/p/address-sanitizer/issues/detail?id=263
-// XFAIL: android
+// UNSUPPORTED: android
 
 #include <stdlib.h>
 #if !defined(__APPLE__) && !defined(__FreeBSD__)
diff --git a/test/asan/TestCases/coverage-pc-buffer.cc b/test/asan/TestCases/coverage-pc-buffer.cc
deleted file mode 100644
index dd9beaf..0000000
--- a/test/asan/TestCases/coverage-pc-buffer.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-// Test __sanitizer_coverage_pc_buffer().
-
-// RUN: %clangxx_asan -fsanitize-coverage=edge %stdcxx11 %s -O3 -o %t && %run %t
-
-// UNSUPPORTED: android
-
-#include <assert.h>
-#include <memory>
-#include <sanitizer/coverage_interface.h>
-#include <stdint.h>
-#include <stdio.h>
-
-static volatile int sink;
-__attribute__((noinline)) void foo() { sink = 1; }
-
-void assertNotZeroPcs(uintptr_t *buf, uintptr_t size) {
-  assert(buf);
-  for (uintptr_t i = 0; i < size; ++i)
-    assert(buf[i]);
-}
-
-int main() {
-  uintptr_t buf_size = 1 << 20;
-  std::unique_ptr<uintptr_t[]> buf(new uintptr_t[buf_size]);
-  __sanitizer_set_coverage_pc_buffer(buf.get(), buf_size);
-
-  {
-    uintptr_t sz = __sanitizer_get_coverage_pc_buffer_pos();
-    assertNotZeroPcs(buf.get(), sz);
-    assert(sz);
-  }
-
-  {
-    uintptr_t sz = __sanitizer_get_coverage_pc_buffer_pos();
-    foo();
-    uintptr_t sz1 = __sanitizer_get_coverage_pc_buffer_pos();
-    assertNotZeroPcs(buf.get(), sz1);
-    assert(sz1 > sz);
-  }
-
-  {
-    uintptr_t sz = __sanitizer_get_coverage_pc_buffer_pos();
-    // reset coverage to 0.
-    __sanitizer_reset_coverage();
-    uintptr_t sz1 = __sanitizer_get_coverage_pc_buffer_pos();
-    assertNotZeroPcs(buf.get(), sz1);
-    assert(sz1 < sz);
-  }
-}
diff --git a/test/asan/TestCases/debug_report.cc b/test/asan/TestCases/debug_report.cc
index 124ae5d..34bc06e 100644
--- a/test/asan/TestCases/debug_report.cc
+++ b/test/asan/TestCases/debug_report.cc
@@ -6,6 +6,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+// FIXME: Doesn't work with DLLs
+// XFAIL: win32-dynamic-asan
+
 int main() {
   // Disable stderr buffering. Needed on Windows.
   setvbuf(stderr, NULL, _IONBF, 0);
diff --git a/test/asan/TestCases/deep_stack_uaf.cc b/test/asan/TestCases/deep_stack_uaf.cc
index 95032f2..bdf0dbd 100644
--- a/test/asan/TestCases/deep_stack_uaf.cc
+++ b/test/asan/TestCases/deep_stack_uaf.cc
@@ -2,8 +2,7 @@
 
 // RUN: %clangxx_asan -O0 %s -o %t 2>&1
 // RUN: %env_asan_opts=malloc_context_size=120:redzone=512 not %run %t 2>&1 | FileCheck %s
-// XFAIL: arm-linux-gnueabi
-// XFAIL: armv7l-unknown-linux-gnueabihf
+// REQUIRES: stable-runtime
 #include <stdlib.h>
 #include <stdio.h>
 
diff --git a/test/asan/TestCases/default_options.cc b/test/asan/TestCases/default_options.cc
index a3aa663..27af76d 100644
--- a/test/asan/TestCases/default_options.cc
+++ b/test/asan/TestCases/default_options.cc
@@ -1,6 +1,9 @@
 // RUN: %clangxx_asan -O2 %s -o %t
 // RUN: %run %t 2>&1 | FileCheck %s
 
+// FIXME: Doesn't work with DLLs
+// XFAIL: win32-dynamic-asan
+
 const char *kAsanDefaultOptions="verbosity=1 help=1";
 
 extern "C"
diff --git a/test/asan/TestCases/double-free.cc b/test/asan/TestCases/double-free.cc
index 9bd418f..2a26b23 100644
--- a/test/asan/TestCases/double-free.cc
+++ b/test/asan/TestCases/double-free.cc
@@ -7,9 +7,7 @@
 
 // RUN: %clangxx_asan -O0 -fsanitize-recover=address %s -o %t 2>&1
 // RUN: %env_asan_opts=halt_on_error=false %run %t 2>&1 | FileCheck %s --check-prefix CHECK-RECOVER
-
-// XFAIL: arm-linux-gnueabi
-// XFAIL: armv7l-unknown-linux-gnueabihf
+// REQUIRES: stable-runtime
 
 #include <stdlib.h>
 #include <string.h>
diff --git a/test/asan/TestCases/exitcode.cc b/test/asan/TestCases/exitcode.cc
new file mode 100644
index 0000000..cb10540
--- /dev/null
+++ b/test/asan/TestCases/exitcode.cc
@@ -0,0 +1,130 @@
+// RUN: %clangxx_asan -g %stdcxx11 -Wno-deprecated-declarations %s -o %t
+// RUN: %env_asan_opts=exitcode=42 %run %t | FileCheck %s
+
+// Android doesn't have spawn.h or posix_spawn.
+// UNSUPPORTED: android
+
+// CHECK: got expected 42 exit code
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifdef _WIN32
+#include <windows.h>
+
+int spawn_child(char **argv) {
+  // Set an environment variable to tell the child process to interrupt
+  // itself.
+  if (!SetEnvironmentVariableW(L"CRASH_FOR_TEST", L"1")) {
+    printf("SetEnvironmentVariableW failed (0x%8lx).\n", GetLastError());
+    fflush(stdout);
+    exit(1);
+  }
+
+  STARTUPINFOW si;
+  memset(&si, 0, sizeof(si));
+  si.cb = sizeof(si);
+
+  PROCESS_INFORMATION pi;
+  memset(&pi, 0, sizeof(pi));
+
+  if (!CreateProcessW(nullptr,           // No module name (use command line)
+                      GetCommandLineW(), // Command line
+                      nullptr,           // Process handle not inheritable
+                      nullptr,           // Thread handle not inheritable
+                      TRUE,              // Set handle inheritance to TRUE
+                      0,                 // No flags
+                      nullptr,           // Use parent's environment block
+                      nullptr,           // Use parent's starting directory
+                      &si, &pi)) {
+    printf("CreateProcess failed (0x%08lx).\n", GetLastError());
+    fflush(stdout);
+    exit(1);
+  }
+
+  WaitForSingleObject(pi.hProcess, INFINITE);
+
+  DWORD exit_code;
+  if (!GetExitCodeProcess(pi.hProcess, &exit_code)) {
+    printf("GetExitCodeProcess failed (0x%08lx).\n", GetLastError());
+    fflush(stdout);
+    exit(1);
+  }
+
+  CloseHandle(pi.hProcess);
+  CloseHandle(pi.hThread);
+
+  return exit_code;
+}
+#else
+#include <spawn.h>
+#include <errno.h>
+#include <sys/wait.h>
+
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#endif
+
+#if defined(__APPLE__) && !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE)
+#define USE_NSGETENVIRON 1
+#else
+#define USE_NSGETENVIRON 0
+#endif
+
+#if !USE_NSGETENVIRON
+extern char **environ;
+#else
+#include <crt_externs.h> // _NSGetEnviron
+#endif
+
+int spawn_child(char **argv) {
+  setenv("CRASH_FOR_TEST", "1", 1);
+
+#if !USE_NSGETENVIRON
+  char **envp = environ;
+#else
+  char **envp = *_NSGetEnviron();
+#endif
+
+  pid_t pid;
+  int err = posix_spawn(&pid, argv[0], nullptr, nullptr, argv, envp);
+  if (err) {
+    printf("posix_spawn failed: %d\n", err);
+    fflush(stdout);
+    exit(1);
+  }
+
+  // Wait until the child exits.
+  int status;
+  pid_t wait_result_pid;
+  do {
+    wait_result_pid = waitpid(pid, &status, 0);
+  } while (wait_result_pid == -1 && errno == EINTR);
+
+  if (wait_result_pid != pid || !WIFEXITED(status)) {
+    printf("error in waitpid\n");
+    fflush(stdout);
+    exit(1);
+  }
+
+  // Return the exit status.
+  return WEXITSTATUS(status);
+}
+#endif
+
+int main(int argc, char **argv) {
+  int r = 0;
+  if (getenv("CRASH_FOR_TEST")) {
+    // Generate an asan report to test ASAN_OPTIONS=exitcode=42
+    int *p = new int;
+    delete p;
+    r = *p;
+  } else {
+    int exit_code = spawn_child(argv);
+    if (exit_code == 42) {
+      printf("got expected 42 exit code\n");
+      fflush(stdout);
+    }
+  }
+  return r;
+}
diff --git a/test/asan/TestCases/interception_failure_test.cc b/test/asan/TestCases/interception_failure_test.cc
index 63d8746..d85500b 100644
--- a/test/asan/TestCases/interception_failure_test.cc
+++ b/test/asan/TestCases/interception_failure_test.cc
@@ -5,13 +5,20 @@
 // RUN: %clangxx_asan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O2 %s -o %t && %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O3 %s -o %t && %run %t 2>&1 | FileCheck %s
-// On Windows, defining strtoll results in linker errors.
-// XFAIL: freebsd,win32
+// XFAIL: freebsd
+
+// On Windows, defining strtoll in a static build results in linker errors, but
+// it works with the dynamic runtime.
+// XFAIL: win32-static-asan
+
 #include <stdlib.h>
 #include <stdio.h>
+#include <string.h>
 
 extern "C" long strtol(const char *nptr, char **endptr, int base) {
   fprintf(stderr, "my_strtol_interceptor\n");
+  if (endptr)
+    *endptr = (char*)nptr + strlen(nptr);
   return 0;
 }
 
diff --git a/test/asan/TestCases/invalid-free.cc b/test/asan/TestCases/invalid-free.cc
index dd59f5a..be45e43 100644
--- a/test/asan/TestCases/invalid-free.cc
+++ b/test/asan/TestCases/invalid-free.cc
@@ -4,8 +4,7 @@
 // Also works if no malloc context is available.
 // RUN: %env_asan_opts=malloc_context_size=0:fast_unwind_on_malloc=0 not %run %t 2>&1 | FileCheck %s
 // RUN: %env_asan_opts=malloc_context_size=0:fast_unwind_on_malloc=1 not %run %t 2>&1 | FileCheck %s
-// XFAIL: arm-linux-gnueabi
-// XFAIL: armv7l-unknown-linux-gnueabihf
+// REQUIRES: stable-runtime
 
 #include <stdlib.h>
 #include <string.h>
diff --git a/test/asan/TestCases/large_func_test.cc b/test/asan/TestCases/large_func_test.cc
index 8d9afae..1f5f7cc 100644
--- a/test/asan/TestCases/large_func_test.cc
+++ b/test/asan/TestCases/large_func_test.cc
@@ -2,8 +2,7 @@
 // RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
-// XFAIL: arm-linux-gnueabi
-// XFAIL: armv7l-unknown-linux-gnueabihf
+// REQUIRES: stable-runtime
 
 #include <stdlib.h>
 __attribute__((noinline))
diff --git a/test/asan/TestCases/on_error_callback.cc b/test/asan/TestCases/on_error_callback.cc
index 88a4d2d..f37d1eb 100644
--- a/test/asan/TestCases/on_error_callback.cc
+++ b/test/asan/TestCases/on_error_callback.cc
@@ -1,5 +1,8 @@
 // RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s
 
+// FIXME: Doesn't work with DLLs
+// XFAIL: win32-dynamic-asan
+
 #include <stdio.h>
 #include <stdlib.h>
 
diff --git a/test/asan/TestCases/printf-m.c b/test/asan/TestCases/printf-m.c
new file mode 100644
index 0000000..9cd5ae1
--- /dev/null
+++ b/test/asan/TestCases/printf-m.c
@@ -0,0 +1,14 @@
+// RUN: %clang_asan -O2 %s -o %t && %run %t
+
+// FIXME: printf is not intercepted on Windows yet.
+// UNSUPPORTED: win32
+
+#include <stdio.h>
+
+int main() {
+  char s[5] = {'w', 'o', 'r', 'l', 'd'};
+  // Test that %m does not consume an argument. If it does, %s would apply to
+  // the 5-character buffer, resulting in a stack-buffer-overflow report.
+  printf("%m %s, %.5s\n", "hello", s);
+  return 0;
+}
diff --git a/test/asan/TestCases/sanity_check_pure_c.c b/test/asan/TestCases/sanity_check_pure_c.c
index c3a43c8..9d74996 100644
--- a/test/asan/TestCases/sanity_check_pure_c.c
+++ b/test/asan/TestCases/sanity_check_pure_c.c
@@ -3,10 +3,9 @@
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 // Sanity checking a test in pure C with -pie.
-// RUN: %clang_asan -O2 %s -pie -fPIE -o %t
+// RUN: %clang_asan -O2 %s %pie %fPIE -o %t
 // RUN: not %run %t 2>&1 | FileCheck %s
-// XFAIL: arm-linux-gnueabi
-// XFAIL: armv7l-unknown-linux-gnueabihf
+// REQUIRES: stable-runtime
 
 #include <stdlib.h>
 int main() {
diff --git a/test/asan/TestCases/set_shadow_test.c b/test/asan/TestCases/set_shadow_test.c
new file mode 100644
index 0000000..daa79a6
--- /dev/null
+++ b/test/asan/TestCases/set_shadow_test.c
@@ -0,0 +1,69 @@
+// RUN: %clang_asan -O0 %s -o %t
+// RUN: %run %t 0x00 2>&1 | FileCheck %s -check-prefix=X00
+// RUN: not %run %t 0xf1 2>&1 | FileCheck %s -check-prefix=XF1
+// RUN: not %run %t 0xf2 2>&1 | FileCheck %s -check-prefix=XF2
+// RUN: not %run %t 0xf3 2>&1 | FileCheck %s -check-prefix=XF3
+// RUN: not %run %t 0xf5 2>&1 | FileCheck %s -check-prefix=XF5
+// RUN: not %run %t 0xf8 2>&1 | FileCheck %s -check-prefix=XF8
+
+// XFAIL: win32
+
+#include <assert.h>
+#include <sanitizer/asan_interface.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void __asan_set_shadow_00(size_t addr, size_t size);
+void __asan_set_shadow_f1(size_t addr, size_t size);
+void __asan_set_shadow_f2(size_t addr, size_t size);
+void __asan_set_shadow_f3(size_t addr, size_t size);
+void __asan_set_shadow_f5(size_t addr, size_t size);
+void __asan_set_shadow_f8(size_t addr, size_t size);
+
+char a __attribute__((aligned(8)));
+
+void f(long arg) {
+  size_t shadow_offset;
+  size_t shadow_scale;
+  __asan_get_shadow_mapping(&shadow_scale, &shadow_offset);
+  size_t addr = (((size_t)&a) >> shadow_scale) + shadow_offset;
+
+  switch (arg) {
+  // X00-NOT: AddressSanitizer
+  // X00: PASS
+  case 0x00:
+    return __asan_set_shadow_00(addr, 1);
+  // XF1: AddressSanitizer: stack-buffer-underflow
+  // XF1: [f1]
+  case 0xf1:
+    return __asan_set_shadow_f1(addr, 1);
+  // XF2: AddressSanitizer: stack-buffer-overflow
+  // XF2: [f2]
+  case 0xf2:
+    return __asan_set_shadow_f2(addr, 1);
+  // XF3: AddressSanitizer: stack-buffer-overflow
+  // XF3: [f3]
+  case 0xf3:
+    return __asan_set_shadow_f3(addr, 1);
+  // XF5: AddressSanitizer: stack-use-after-return
+  // XF5: [f5]
+  case 0xf5:
+    return __asan_set_shadow_f5(addr, 1);
+  // XF8: AddressSanitizer: stack-use-after-scope
+  // XF8: [f8]
+  case 0xf8:
+    return __asan_set_shadow_f8(addr, 1);
+  }
+  assert(0);
+}
+
+int main(int argc, char **argv) {
+  assert(argc > 1);
+
+  long arg = strtol(argv[1], 0, 16);
+  f(arg);
+  a = 1;
+  printf("PASS\n");
+  return 0;
+}
diff --git a/test/asan/TestCases/speculative_load.cc b/test/asan/TestCases/speculative_load.cc
index 2409d7a..fdf70eb 100644
--- a/test/asan/TestCases/speculative_load.cc
+++ b/test/asan/TestCases/speculative_load.cc
@@ -27,6 +27,10 @@
     __asan_poison_memory_region(_data._s._ch, 23);
   }
 
+  ~S() {
+    __asan_unpoison_memory_region(_data._s._ch, 23);
+  }
+
   bool is_long() const {
     return _data._s._size & 1;
   }
diff --git a/test/asan/TestCases/strdup_oob_test.cc b/test/asan/TestCases/strdup_oob_test.cc
index 492555a..60c5ef1 100644
--- a/test/asan/TestCases/strdup_oob_test.cc
+++ b/test/asan/TestCases/strdup_oob_test.cc
@@ -9,6 +9,10 @@
 // Unwind problem on arm: "main" is missing from the allocation stack trace.
 // UNSUPPORTED: armv7l-unknown-linux-gnueabihf
 
+// FIXME: We fail to intercept strdup with the dynamic WinASan RTL, so it's not
+// in the stack trace.
+// XFAIL: win32-dynamic-asan
+
 #include <string.h>
 
 char kString[] = "foo";
diff --git a/test/asan/TestCases/strncasecmp_strict.c b/test/asan/TestCases/strncasecmp_strict.c
new file mode 100644
index 0000000..aa65840
--- /dev/null
+++ b/test/asan/TestCases/strncasecmp_strict.c
@@ -0,0 +1,67 @@
+// Test strict_string_checks option in strncmp function
+// RUN: %clang_asan %s -o %t
+
+// RUN: %env_asan_opts=strict_string_checks=false %run %t a 2>&1
+// RUN: %env_asan_opts=strict_string_checks=true %run %t a 2>&1
+// RUN: not %run %t b 2>&1 | FileCheck %s
+// RUN: not %run %t c 2>&1 | FileCheck %s
+// RUN: not %run %t d 2>&1 | FileCheck %s
+// RUN: not %run %t e 2>&1 | FileCheck %s
+// RUN: not %run %t f 2>&1 | FileCheck %s
+// RUN: not %run %t g 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=strict_string_checks=false %run %t h 2>&1
+// RUN: %env_asan_opts=strict_string_checks=true not %run %t h 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=strict_string_checks=false %run %t i 2>&1
+// RUN: %env_asan_opts=strict_string_checks=true not %run %t i 2>&1 | FileCheck %s
+
+// XFAIL: win32
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+int main(int argc, char **argv) {
+  assert(argc >= 2);
+  const size_t size = 100;
+  char fill = 'o';
+  char s1[size];
+  char s2[size];
+  memset(s1, fill, size);
+  memset(s2, fill, size);
+
+  switch (argv[1][0]) {
+    case 'a':
+      s1[size - 1] = 'z';
+      s2[size - 1] = 'x';
+      for (int i = 0; i <= size; ++i)
+        assert((strncasecmp(s1, s2, i) == 0) == (i < size));
+      s1[size - 1] = '\0';
+      s2[size - 1] = '\0';
+      assert(strncasecmp(s1, s2, 2*size) == 0);
+      break;
+    case 'b':
+      return strncasecmp(s1-1, s2, 1);
+    case 'c':
+      return strncasecmp(s1, s2-1, 1);
+    case 'd':
+      return strncasecmp(s1+size, s2, 1);
+    case 'e':
+      return strncasecmp(s1, s2+size, 1);
+    case 'f':
+      return strncasecmp(s1+1, s2, size);
+    case 'g':
+      return strncasecmp(s1, s2+1, size);
+    case 'h':
+      s1[size - 1] = '\0';
+      assert(strncasecmp(s1, s2, 2*size) != 0);
+      break;
+    case 'i':
+      s2[size - 1] = '\0';
+      assert(strncasecmp(s1, s2, 2*size) != 0);
+      break;
+    // CHECK: {{.*}}ERROR: AddressSanitizer: stack-buffer-{{ov|und}}erflow on address
+  }
+  return 0;
+}
diff --git a/test/asan/TestCases/strncmp_strict.c b/test/asan/TestCases/strncmp_strict.c
new file mode 100644
index 0000000..5b54290
--- /dev/null
+++ b/test/asan/TestCases/strncmp_strict.c
@@ -0,0 +1,65 @@
+// Test strict_string_checks option in strncmp function
+// RUN: %clang_asan %s -o %t
+
+// RUN: %env_asan_opts=strict_string_checks=false %run %t a 2>&1
+// RUN: %env_asan_opts=strict_string_checks=true %run %t a 2>&1
+// RUN: not %run %t b 2>&1 | FileCheck %s
+// RUN: not %run %t c 2>&1 | FileCheck %s
+// RUN: not %run %t d 2>&1 | FileCheck %s
+// RUN: not %run %t e 2>&1 | FileCheck %s
+// RUN: not %run %t f 2>&1 | FileCheck %s
+// RUN: not %run %t g 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=strict_string_checks=false %run %t h 2>&1
+// RUN: %env_asan_opts=strict_string_checks=true not %run %t h 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=strict_string_checks=false %run %t i 2>&1
+// RUN: %env_asan_opts=strict_string_checks=true not %run %t i 2>&1 | FileCheck %s
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+int main(int argc, char **argv) {
+  assert(argc >= 2);
+  const size_t size = 100;
+  char fill = 'o';
+  char s1[size];
+  char s2[size];
+  memset(s1, fill, size);
+  memset(s2, fill, size);
+
+  switch (argv[1][0]) {
+    case 'a':
+      s1[size - 1] = 'z';
+      s2[size - 1] = 'x';
+      for (int i = 0; i <= size; ++i)
+        assert((strncmp(s1, s2, i) == 0) == (i < size));
+      s1[size - 1] = '\0';
+      s2[size - 1] = '\0';
+      assert(strncmp(s1, s2, 2*size) == 0);
+      break;
+    case 'b':
+      return strncmp(s1-1, s2, 1);
+    case 'c':
+      return strncmp(s1, s2-1, 1);
+    case 'd':
+      return strncmp(s1+size, s2, 1);
+    case 'e':
+      return strncmp(s1, s2+size, 1);
+    case 'f':
+      return strncmp(s1+1, s2, size);
+    case 'g':
+      return strncmp(s1, s2+1, size);
+    case 'h':
+      s1[size - 1] = '\0';
+      assert(strncmp(s1, s2, 2*size) != 0);
+      break;
+    case 'i':
+      s2[size - 1] = '\0';
+      assert(strncmp(s1, s2, 2*size) != 0);
+      break;
+    // CHECK: {{.*}}ERROR: AddressSanitizer: stack-buffer-{{ov|und}}erflow on address
+  }
+  return 0;
+}
diff --git a/test/asan/TestCases/strncpy-overflow.cc b/test/asan/TestCases/strncpy-overflow.cc
index 651ae22..7da9a8f 100644
--- a/test/asan/TestCases/strncpy-overflow.cc
+++ b/test/asan/TestCases/strncpy-overflow.cc
@@ -4,8 +4,7 @@
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 
 // REQUIRES: compiler-rt-optimized
-// XFAIL: arm-linux-gnueabi
-// XFAIL: armv7l-unknown-linux-gnueabihf
+// REQUIRES: stable-runtime
 
 #include <string.h>
 #include <stdlib.h>
diff --git a/test/asan/TestCases/strstr-1.c b/test/asan/TestCases/strstr-1.c
index d0fa25b..06a8a8a 100644
--- a/test/asan/TestCases/strstr-1.c
+++ b/test/asan/TestCases/strstr-1.c
@@ -15,7 +15,7 @@
   char s1[4] = "acb";
   __asan_poison_memory_region ((char *)&s1[2], 2);
   r = strstr(s1, s2);
-  // CHECK:'s1' <== Memory access at offset {{[0-9]+}} partially overflows this variable
+  // CHECK:'s1' <== Memory access at offset {{[0-9]+}} {{partially overflows this variable|is inside this variable}}
   assert(r == s1 + 1);
   return 0;
 }
diff --git a/test/asan/TestCases/strstr_strict.c b/test/asan/TestCases/strstr_strict.c
index 35ad93c..63e6b25 100644
--- a/test/asan/TestCases/strstr_strict.c
+++ b/test/asan/TestCases/strstr_strict.c
@@ -17,7 +17,7 @@
   s2[size - 1]='\0';
   char* r = strstr(s1, s2);
   // CHECK: {{.*ERROR: AddressSanitizer: heap-buffer-overflow on address}}
-  // CHECK: READ of size 101
+  // CHECK: READ of size {{101|100}}
   assert(r == s1);
   free(s1);
   free(s2);
diff --git a/test/asan/TestCases/suppressions-library.cc b/test/asan/TestCases/suppressions-library.cc
index ad6e092..e95d339 100644
--- a/test/asan/TestCases/suppressions-library.cc
+++ b/test/asan/TestCases/suppressions-library.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_asan -O0 -DSHARED_LIB %s %fPIC -shared -o %dynamiclib %ld_flags_rpath_so
 // RUN: %clangxx_asan -O0 %s -o %t %ld_flags_rpath_exe
 
 // Check that without suppressions, we catch the issue.
diff --git a/test/asan/TestCases/use-after-delete.cc b/test/asan/TestCases/use-after-delete.cc
index 8fdec8d..1cc8c2f 100644
--- a/test/asan/TestCases/use-after-delete.cc
+++ b/test/asan/TestCases/use-after-delete.cc
@@ -2,8 +2,7 @@
 // RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
-// XFAIL: arm-linux-gnueabi
-// XFAIL: armv7l-unknown-linux-gnueabihf
+// REQUIRES: stable-runtime
 
 #include <stdlib.h>
 int main() {
diff --git a/test/asan/TestCases/use-after-free-right.cc b/test/asan/TestCases/use-after-free-right.cc
index f714b44..d72370e 100644
--- a/test/asan/TestCases/use-after-free-right.cc
+++ b/test/asan/TestCases/use-after-free-right.cc
@@ -2,8 +2,7 @@
 // RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
-// XFAIL: arm-linux-gnueabi
-// XFAIL: armv7l-unknown-linux-gnueabihf
+// REQUIRES: stable-runtime
 
 // Test use-after-free report in the case when access is at the right border of
 // the allocation.
diff --git a/test/asan/TestCases/use-after-free.cc b/test/asan/TestCases/use-after-free.cc
index 7bc225b..c96d7f2 100644
--- a/test/asan/TestCases/use-after-free.cc
+++ b/test/asan/TestCases/use-after-free.cc
@@ -2,8 +2,7 @@
 // RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
-// XFAIL: arm-linux-gnueabi
-// XFAIL: armv7l-unknown-linux-gnueabihf
+// REQUIRES: stable-runtime
 
 #include <stdlib.h>
 int main() {
diff --git a/test/asan/TestCases/use-after-scope-goto.cc b/test/asan/TestCases/use-after-scope-goto.cc
new file mode 100644
index 0000000..351cbe9
--- /dev/null
+++ b/test/asan/TestCases/use-after-scope-goto.cc
@@ -0,0 +1,73 @@
+// RUN: %clangxx_asan -O0 -fsanitize-address-use-after-scope %s -o %t && %run %t
+
+// Function jumps over variable initialization making lifetime analysis
+// ambiguous. Asan should ignore such variable and program must not fail.
+
+#include <stdlib.h>
+
+int *ptr;
+
+void f1(int cond) {
+  if (cond)
+    goto label;
+  int tmp;
+
+ label:
+  ptr = &tmp;
+  *ptr = 5;
+}
+
+void f2(int cond) {
+  switch (cond) {
+  case 1: {
+    ++cond;
+    int tmp;
+    ptr = &tmp;
+    exit(0);
+  case 2:
+    ptr = &tmp;
+    *ptr = 5;
+    exit(0);
+  }
+  }
+}
+
+void f3(int cond) {
+  {
+    int tmp;
+    goto l2;
+  l1:
+    ptr = &tmp;
+    *ptr = 5;
+
+    exit(0);
+  }
+ l2:
+  goto l1;
+}
+
+void use(int *x) {
+  static int c = 10;
+  if (--c == 0)
+    exit(0);
+  (*x)++;
+}
+
+void f4() {
+  {
+    int x;
+ l2:
+    use(&x);
+    goto l1;
+  }
+ l1:
+  goto l2;
+}
+
+int main() {
+  f1(1);
+  f2(1);
+  f3(1);
+  f4();
+  return 0;
+}
diff --git a/test/asan/TestCases/use-after-scope-inlined.cc b/test/asan/TestCases/use-after-scope-inlined.cc
index fc8c7f7..98a455c 100644
--- a/test/asan/TestCases/use-after-scope-inlined.cc
+++ b/test/asan/TestCases/use-after-scope-inlined.cc
@@ -24,5 +24,5 @@
   // CHECK: Address 0x{{.*}} is located in stack of thread T0 at offset
   // CHECK:      [[OFFSET:[^ ]*]] in frame
   // CHECK: main
-  // CHECK:   {{\[}}[[OFFSET]], {{.*}}) 'x.i'
+  // CHECK:   {{\[}}[[OFFSET]], {{.*}}) 'x.i:[[@LINE-15]]'
 }
diff --git a/test/asan/TestCases/use-after-scope-loop-bug.cc b/test/asan/TestCases/use-after-scope-loop-bug.cc
index 4f483f2..d4b12ec 100644
--- a/test/asan/TestCases/use-after-scope-loop-bug.cc
+++ b/test/asan/TestCases/use-after-scope-loop-bug.cc
@@ -1,7 +1,7 @@
 // RUN: %clangxx_asan -O1 -fsanitize-address-use-after-scope %s -o %t && \
 // RUN:     not %run %t 2>&1 | FileCheck %s
 
-int *p;
+volatile int *p;
 
 int main() {
   // Variable goes in and out of scope.
diff --git a/test/asan/TestCases/use-after-scope-temp2.cc b/test/asan/TestCases/use-after-scope-temp2.cc
new file mode 100644
index 0000000..1ecd3cb
--- /dev/null
+++ b/test/asan/TestCases/use-after-scope-temp2.cc
@@ -0,0 +1,20 @@
+// RUN: %clangxx_asan %stdcxx11 -O1 -fsanitize-address-use-after-scope %s -o %t && \
+// RUN:     not %run %t 2>&1 | FileCheck %s
+
+
+struct IntHolder {
+  const IntHolder& Self() const {
+    return *this;
+  }
+  int val = 3;
+};
+
+const IntHolder *saved;
+
+int main(int argc, char *argv[]) {
+  saved = &IntHolder().Self();
+  int x = saved->val;  // BOOM
+  // CHECK: ERROR: AddressSanitizer: stack-use-after-scope
+  // CHECK:  #0 0x{{.*}} in main {{.*}}use-after-scope-temp2.cc:[[@LINE-2]]
+  return x;
+}
diff --git a/test/asan/TestCases/use-after-scope-types.cc b/test/asan/TestCases/use-after-scope-types.cc
index b213681..ec6742b 100644
--- a/test/asan/TestCases/use-after-scope-types.cc
+++ b/test/asan/TestCases/use-after-scope-types.cc
@@ -1,17 +1,15 @@
 // RUN: %clangxx_asan %stdcxx11 -O0 -fsanitize-address-use-after-scope %s -o %t
-// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 0 2>&1 | FileCheck %s
-// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 1 2>&1 | FileCheck %s
-// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 2 2>&1 | FileCheck %s
-// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 3 2>&1 | FileCheck %s
-// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 4 2>&1 | FileCheck %s
-// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 5 2>&1 | FileCheck %s
-// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 6 2>&1 | FileCheck %s
-// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 7 2>&1 | FileCheck %s
-// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 8 2>&1 | FileCheck %s
-// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 9 2>&1 | FileCheck %s
-// RUN: %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 10 2>&1 | FileCheck %s
-
-// RUN: %env_asan_opts=detect_stack_use_after_scope=0 %run %t 11
+// RUN: not %run %t 0 2>&1 | FileCheck %s
+// RUN: not %run %t 1 2>&1 | FileCheck %s
+// RUN: not %run %t 2 2>&1 | FileCheck %s
+// RUN: not %run %t 3 2>&1 | FileCheck %s
+// RUN: not %run %t 4 2>&1 | FileCheck %s
+// RUN: not %run %t 5 2>&1 | FileCheck %s
+// RUN: not %run %t 6 2>&1 | FileCheck %s
+// RUN: not %run %t 7 2>&1 | FileCheck %s
+// RUN: not %run %t 8 2>&1 | FileCheck %s
+// RUN: not %run %t 9 2>&1 | FileCheck %s
+// RUN: not %run %t 10 2>&1 | FileCheck %s
 
 #include <stdlib.h>
 #include <string>
@@ -34,7 +32,7 @@
   T *t;
 };
 
-template <class T> void test() {
+template <class T> __attribute__((noinline)) void test() {
   Ptr<T> ptr;
   {
     T x;
diff --git a/test/asan/TestCases/use-after-scope.cc b/test/asan/TestCases/use-after-scope.cc
index f669bf9..d92dae6 100644
--- a/test/asan/TestCases/use-after-scope.cc
+++ b/test/asan/TestCases/use-after-scope.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_asan -O1 -fsanitize-address-use-after-scope %s -o %t && \
-// RUN:     %env_asan_opts=detect_stack_use_after_scope=1 not %run %t 2>&1 | FileCheck %s
-
-// RUN: %env_asan_opts=detect_stack_use_after_scope=0 %run %t
+// RUN:     not %run %t 2>&1 | FileCheck %s
 
 volatile int *p = 0;
 
diff --git a/test/asan/android_commands/android_run.py b/test/asan/android_commands/android_run.py
index 272d211..f4ea52b 100755
--- a/test/asan/android_commands/android_run.py
+++ b/test/asan/android_commands/android_run.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 
-import os, sys, subprocess, tempfile
+import os, signal, sys, subprocess, tempfile
 from android_common import *
 
 ANDROID_TMPDIR = '/data/local/tmp/Output'
@@ -11,8 +11,7 @@
 def build_env():
     args = []
     # Android linker ignores RPATH. Set LD_LIBRARY_PATH to Output dir.
-    args.append('LD_LIBRARY_PATH=%s:%s' %
-                (ANDROID_TMPDIR, os.environ.get('LD_LIBRARY_PATH', '')))
+    args.append('LD_LIBRARY_PATH=%s' % (ANDROID_TMPDIR,))
     for (key, value) in os.environ.items():
         if key in ['ASAN_OPTIONS', 'ASAN_ACTIVATION_OPTIONS']:
             args.append('%s="%s"' % (key, value))
@@ -34,4 +33,9 @@
 
 sys.stdout.write(pull_from_device(device_stdout))
 sys.stderr.write(pull_from_device(device_stderr))
-sys.exit(int(pull_from_device(device_exitcode)))
+retcode = int(pull_from_device(device_exitcode))
+# If the device process died with a signal, do abort().
+# Not exactly the same, but good enough to fool "not --crash".
+if retcode > 128:
+  os.kill(os.getpid(), signal.SIGABRT)
+sys.exit(retcode)
diff --git a/test/asan/lit.cfg b/test/asan/lit.cfg
index f2c0c87..7703f5a 100644
--- a/test/asan/lit.cfg
+++ b/test/asan/lit.cfg
@@ -37,6 +37,12 @@
   # Also, make sure we do not overwhelm the syslog while testing.
   default_asan_opts = 'abort_on_error=0'
   default_asan_opts += ':log_to_syslog=0'
+elif config.android:
+  # The same as on Darwin, we default to "abort_on_error=1" which slows down
+  # testing. Also, all existing tests are using "not" instead of "not --crash"
+  # which does not work for abort()-terminated programs.
+  default_asan_opts = 'abort_on_error=0'
+
 if default_asan_opts:
   config.environment['ASAN_OPTIONS'] = default_asan_opts
   default_asan_opts += ':'
@@ -55,18 +61,18 @@
 # GCC-ASan doesn't link in all the necessary libraries automatically, so
 # we have to do it ourselves.
 if config.compiler_id == 'GNU':
-  extra_linkflags = ["-pthread", "-lstdc++", libdl_flag]
+  extra_link_flags = ["-pthread", "-lstdc++", libdl_flag]
 else:
-  extra_linkflags = []
+  extra_link_flags = []
 
 # BFD linker in 64-bit android toolchains fails to find libm.so, which is a
 # transitive shared library dependency (via asan runtime).
 if config.android:
-  extra_linkflags += ["-lm"]
+  extra_link_flags += ["-lm"]
 
 # Setup default compiler flags used with -fsanitize=address option.
 # FIXME: Review the set of required flags and check if it can be reduced.
-target_cflags = [get_required_attr(config, "target_cflags")] + extra_linkflags
+target_cflags = [get_required_attr(config, "target_cflags")] + extra_link_flags
 target_cxxflags = config.cxx_mode_flags + target_cflags
 clang_asan_static_cflags = (["-fsanitize=address",
                             "-mno-omit-leaf-frame-pointer",
@@ -77,14 +83,26 @@
   clang_asan_static_cflags.append("-mbackchain")
 clang_asan_static_cxxflags = config.cxx_mode_flags + clang_asan_static_cflags
 
+asan_dynamic_flags = []
 if config.asan_dynamic:
-  clang_asan_cflags = clang_asan_static_cflags + ['-shared-libasan']
-  clang_asan_cxxflags = clang_asan_static_cxxflags + ['-shared-libasan']
+  asan_dynamic_flags = ["-shared-libasan"]
+  # On Windows, we need to simulate "clang-cl /MD" on the clang driver side.
+  if platform.system() == 'Windows':
+    asan_dynamic_flags += ["-D_MT", "-D_DLL", "-Wl,-nodefaultlib:libcmt,-defaultlib:msvcrt,-defaultlib:oldnames"]
   config.available_features.add("asan-dynamic-runtime")
 else:
-  clang_asan_cflags = clang_asan_static_cflags
-  clang_asan_cxxflags = clang_asan_static_cxxflags
   config.available_features.add("asan-static-runtime")
+clang_asan_cflags = clang_asan_static_cflags + asan_dynamic_flags
+clang_asan_cxxflags = clang_asan_static_cxxflags + asan_dynamic_flags
+
+# Add win32-(static|dynamic)-asan features to mark tests as passing or failing
+# in those modes. lit doesn't support logical feature test combinations.
+if platform.system() == 'Windows':
+  if config.asan_dynamic:
+    win_runtime_feature = "win32-dynamic-asan"
+  else:
+    win_runtime_feature = "win32-static-asan"
+  config.available_features.add(win_runtime_feature)
 
 asan_lit_source_dir = get_required_attr(config, "asan_lit_source_dir")
 if config.android == "1":
@@ -98,11 +116,16 @@
 def build_invocation(compile_flags):
   return " " + " ".join([clang_wrapper, config.clang] + compile_flags) + " "
 
+# Clang driver link 'x86' (i686) architecture to 'i386'.
+target_arch = config.target_arch
+if (target_arch == "i686"):
+  target_arch = "i386"
+
 config.substitutions.append( ("%clang ", build_invocation(target_cflags)) )
 config.substitutions.append( ("%clangxx ", build_invocation(target_cxxflags)) )
 config.substitutions.append( ("%clang_asan ", build_invocation(clang_asan_cflags)) )
 config.substitutions.append( ("%clangxx_asan ", build_invocation(clang_asan_cxxflags)) )
-config.substitutions.append( ("%shared_libasan", "libclang_rt.asan-%s.so" % config.target_arch))
+config.substitutions.append( ("%shared_libasan", "libclang_rt.asan-%s.so" % target_arch))
 if config.asan_dynamic:
   config.substitutions.append( ("%clang_asan_static ", build_invocation(clang_asan_static_cflags)) )
   config.substitutions.append( ("%clangxx_asan_static ", build_invocation(clang_asan_static_cxxflags)) )
@@ -194,12 +217,27 @@
   libasan_dir = os.path.join(gcc_dir, "..", "lib" + config.bits)
   push_dynamic_library_lookup_path(config, libasan_dir)
 
+# Add the RT libdir to PATH directly so that we can successfully run the gtest
+# binary to list its tests.
+if config.host_os == 'Windows' and config.asan_dynamic:
+  os.environ['PATH'] = os.path.pathsep.join([config.compiler_rt_libdir,
+                                             os.environ.get('PATH', '')])
+
 # Default test suffixes.
 config.suffixes = ['.c', '.cc', '.cpp']
 
 if config.host_os == 'Darwin':
   config.suffixes.append('.mm')
 
+if config.host_os == 'Windows':
+  config.substitutions.append(('%fPIC', ''))
+  config.substitutions.append(('%fPIE', ''))
+  config.substitutions.append(('%pie', ''))
+else:
+  config.substitutions.append(('%fPIC', '-fPIC'))
+  config.substitutions.append(('%fPIE', '-fPIE'))
+  config.substitutions.append(('%pie', '-pie'))
+
 # Only run the tests on supported OSs.
 if config.host_os not in ['Linux', 'Darwin', 'FreeBSD', 'Windows']:
   config.unsupported = True
diff --git a/test/builtins/Unit/cpu_model_test.c b/test/builtins/Unit/cpu_model_test.c
index 5a918bd..6a17037 100644
--- a/test/builtins/Unit/cpu_model_test.c
+++ b/test/builtins/Unit/cpu_model_test.c
@@ -11,6 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+// REQUIRES: x86-target-arch
+
 int main (void) {
   if(__builtin_cpu_supports("avx2"))
     return 4;
diff --git a/test/builtins/Unit/floattitf_test.c b/test/builtins/Unit/floattitf_test.c
new file mode 100644
index 0000000..928b2e8
--- /dev/null
+++ b/test/builtins/Unit/floattitf_test.c
@@ -0,0 +1,213 @@
+//===-- floattitf.c - Test __floattitf ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __floattitf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+#include <float.h>
+#include <stdio.h>
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+
+/* Returns: convert a ti_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ *             ti_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI fp_t __floattitf(ti_int a);
+
+int test__floattitf(ti_int a, fp_t expected) {
+    fp_t x = __floattitf(a);
+    if (x != expected) {
+        twords at;
+        at.all = a;
+        printf("error in __floattitf(0x%.16llX%.16llX) = %LA, expected %LA\n",
+               at.s.high, at.s.low, x, expected);
+    }
+    return x != expected;
+}
+
+char assumption_1[sizeof(ti_int) == 2*sizeof(di_int)] = {0};
+char assumption_2[sizeof(ti_int)*CHAR_BIT == 128] = {0};
+char assumption_3[sizeof(fp_t)*CHAR_BIT == 128] = {0};
+
+#endif
+
+int main() {
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+    if (test__floattitf(0, 0.0))
+        return 1;
+
+    if (test__floattitf(1, 1.0))
+        return 1;
+    if (test__floattitf(2, 2.0))
+        return 1;
+    if (test__floattitf(20, 20.0))
+        return 1;
+    if (test__floattitf(-1, -1.0))
+        return 1;
+    if (test__floattitf(-2, -2.0))
+        return 1;
+    if (test__floattitf(-20, -20.0))
+        return 1;
+
+    if (test__floattitf(0x7FFFFF8000000000LL, 0x1.FFFFFEp+62))
+        return 1;
+    if (test__floattitf(0x7FFFFFFFFFFFF800LL, 0x1.FFFFFFFFFFFFEp+62))
+        return 1;
+    if (test__floattitf(0x7FFFFF0000000000LL, 0x1.FFFFFCp+62))
+        return 1;
+    if (test__floattitf(0x7FFFFFFFFFFFF000LL, 0x1.FFFFFFFFFFFFCp+62))
+        return 1;
+
+    if (test__floattitf(make_ti(0x8000008000000000LL, 0), -0x1.FFFFFEp+126))
+        return 1;
+    if (test__floattitf(make_ti(0x8000000000000800LL, 0), -0x1.FFFFFFFFFFFFEp+126))
+        return 1;
+    if (test__floattitf(make_ti(0x8000010000000000LL, 0), -0x1.FFFFFCp+126))
+        return 1;
+    if (test__floattitf(make_ti(0x8000000000001000LL, 0), -0x1.FFFFFFFFFFFFCp+126))
+        return 1;
+
+    if (test__floattitf(make_ti(0x8000000000000000LL, 0), -0x1.000000p+127))
+        return 1;
+    if (test__floattitf(make_ti(0x8000000000000001LL, 0), -0x1.FFFFFFFFFFFFFFFCp+126L))
+        return 1;
+
+    if (test__floattitf(0x0007FB72E8000000LL, 0x1.FEDCBAp+50))
+        return 1;
+
+    if (test__floattitf(0x0007FB72EA000000LL, 0x1.FEDCBA8p+50))
+        return 1;
+    if (test__floattitf(0x0007FB72EB000000LL, 0x1.FEDCBACp+50))
+        return 1;
+    if (test__floattitf(0x0007FB72EBFFFFFFLL, 0x1.FEDCBAFFFFFFCp+50))
+        return 1;
+    if (test__floattitf(0x0007FB72EC000000LL, 0x1.FEDCBBp+50))
+        return 1;
+    if (test__floattitf(0x0007FB72E8000001LL, 0x1.FEDCBA0000004p+50))
+        return 1;
+
+    if (test__floattitf(0x0007FB72E6000000LL, 0x1.FEDCB98p+50))
+        return 1;
+    if (test__floattitf(0x0007FB72E7000000LL, 0x1.FEDCB9Cp+50))
+        return 1;
+    if (test__floattitf(0x0007FB72E7FFFFFFLL, 0x1.FEDCB9FFFFFFCp+50))
+        return 1;
+    if (test__floattitf(0x0007FB72E4000001LL, 0x1.FEDCB90000004p+50))
+        return 1;
+    if (test__floattitf(0x0007FB72E4000000LL, 0x1.FEDCB9p+50))
+        return 1;
+
+    if (test__floattitf(0x023479FD0E092DC0LL, 0x1.1A3CFE870496Ep+57))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DA1LL, 0x1.1A3CFE870496D08p+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DB0LL, 0x1.1A3CFE870496D8p+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DB8LL, 0x1.1A3CFE870496DCp+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DB6LL, 0x1.1A3CFE870496DBp+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DBFLL, 0x1.1A3CFE870496DF8p+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DC1LL, 0x1.1A3CFE870496E08p+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DC7LL, 0x1.1A3CFE870496E38p+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DC8LL, 0x1.1A3CFE870496E4p+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DCFLL, 0x1.1A3CFE870496E78p+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DD0LL, 0x1.1A3CFE870496E8p+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DD1LL, 0x1.1A3CFE870496E88p+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DD8LL, 0x1.1A3CFE870496ECp+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DDFLL, 0x1.1A3CFE870496EF8p+57L))
+        return 1;
+    if (test__floattitf(0x023479FD0E092DE0LL, 0x1.1A3CFE870496Fp+57))
+        return 1;
+
+    if (test__floattitf(make_ti(0x023479FD0E092DC0LL, 0), 0x1.1A3CFE870496Ep+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DA1LL, 1), 0x1.1A3CFE870496D08p+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DB0LL, 2), 0x1.1A3CFE870496D8p+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DB8LL, 3), 0x1.1A3CFE870496DCp+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DB6LL, 4), 0x1.1A3CFE870496DBp+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DBFLL, 5), 0x1.1A3CFE870496DF8p+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DC1LL, 6), 0x1.1A3CFE870496E08p+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DC7LL, 7), 0x1.1A3CFE870496E38p+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DC8LL, 8), 0x1.1A3CFE870496E4p+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DCFLL, 9), 0x1.1A3CFE870496E78p+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DD0LL, 0), 0x1.1A3CFE870496E8p+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DD1LL, 11), 0x1.1A3CFE870496E88p+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DD8LL, 12), 0x1.1A3CFE870496ECp+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DDFLL, 13), 0x1.1A3CFE870496EF8p+121L))
+        return 1;
+    if (test__floattitf(make_ti(0x023479FD0E092DE0LL, 14), 0x1.1A3CFE870496Fp+121L))
+        return 1;
+
+    if (test__floattitf(make_ti(0, 0xFFFFFFFFFFFFFFFFLL), 0x1.FFFFFFFFFFFFFFFEp+63L))
+        return 1;
+
+    if (test__floattitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC2801LL),
+                        0x1.23456789ABCDEF0123456789ABC3p+124L))
+        return 1;
+    if (test__floattitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC3000LL),
+                        0x1.23456789ABCDEF0123456789ABC3p+124L))
+        return 1;
+    if (test__floattitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC37FFLL),
+                        0x1.23456789ABCDEF0123456789ABC3p+124L))
+        return 1;
+    if (test__floattitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC3800LL),
+                        0x1.23456789ABCDEF0123456789ABC4p+124L))
+        return 1;
+    if (test__floattitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC4000LL),
+                        0x1.23456789ABCDEF0123456789ABC4p+124L))
+        return 1;
+    if (test__floattitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC47FFLL),
+                        0x1.23456789ABCDEF0123456789ABC4p+124L))
+        return 1;
+    if (test__floattitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC4800LL),
+                        0x1.23456789ABCDEF0123456789ABC4p+124L))
+        return 1;
+    if (test__floattitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC4801LL),
+                        0x1.23456789ABCDEF0123456789ABC5p+124L))
+        return 1;
+    if (test__floattitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC57FFLL),
+                        0x1.23456789ABCDEF0123456789ABC5p+124L))
+        return 1;
+#else
+    printf("skipped\n");
+#endif
+   return 0;
+}
diff --git a/test/builtins/Unit/floatuntitf_test.c b/test/builtins/Unit/floatuntitf_test.c
new file mode 100644
index 0000000..495adcf
--- /dev/null
+++ b/test/builtins/Unit/floatuntitf_test.c
@@ -0,0 +1,220 @@
+//===-- floatuntitf.c - Test __floatuntitf --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __floatuntitf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+#include <float.h>
+#include <stdio.h>
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+
+/* Returns: convert a tu_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ *             tu_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI fp_t __floatuntitf(tu_int a);
+
+int test__floatuntitf(tu_int a, fp_t expected) {
+    fp_t x = __floatuntitf(a);
+    if (x != expected) {
+        utwords at;
+        at.all = a;
+        printf("error in __floatuntitf(0x%.16llX%.16llX) = %LA, expected %LA\n",
+               at.s.high, at.s.low, x, expected);
+    }
+    return x != expected;
+}
+
+char assumption_1[sizeof(tu_int) == 2*sizeof(du_int)] = {0};
+char assumption_2[sizeof(tu_int)*CHAR_BIT == 128] = {0};
+char assumption_3[sizeof(fp_t)*CHAR_BIT == 128] = {0};
+
+#endif
+
+int main() {
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+    if (test__floatuntitf(0, 0.0))
+        return 1;
+
+    if (test__floatuntitf(1, 1.0))
+        return 1;
+    if (test__floatuntitf(2, 2.0))
+        return 1;
+    if (test__floatuntitf(20, 20.0))
+        return 1;
+
+    if (test__floatuntitf(0x7FFFFF8000000000ULL, 0x1.FFFFFEp+62))
+        return 1;
+    if (test__floatuntitf(0x7FFFFFFFFFFFF800ULL, 0x1.FFFFFFFFFFFFEp+62))
+        return 1;
+    if (test__floatuntitf(0x7FFFFF0000000000ULL, 0x1.FFFFFCp+62))
+        return 1;
+    if (test__floatuntitf(0x7FFFFFFFFFFFF000ULL, 0x1.FFFFFFFFFFFFCp+62))
+        return 1;
+    if (test__floatuntitf(0x7FFFFFFFFFFFFFFFULL, 0xF.FFFFFFFFFFFFFFEp+59L))
+        return 1;
+    if (test__floatuntitf(0xFFFFFFFFFFFFFFFEULL, 0xF.FFFFFFFFFFFFFFEp+60L))
+        return 1;
+    if (test__floatuntitf(0xFFFFFFFFFFFFFFFFULL, 0xF.FFFFFFFFFFFFFFFp+60L))
+        return 1;
+
+    if (test__floatuntitf(0x8000008000000000ULL, 0x8.000008p+60))
+        return 1;
+    if (test__floatuntitf(0x8000000000000800ULL, 0x8.0000000000008p+60))
+        return 1;
+    if (test__floatuntitf(0x8000010000000000ULL, 0x8.00001p+60))
+        return 1;
+    if (test__floatuntitf(0x8000000000001000ULL, 0x8.000000000001p+60))
+        return 1;
+
+    if (test__floatuntitf(0x8000000000000000ULL, 0x8p+60))
+        return 1;
+    if (test__floatuntitf(0x8000000000000001ULL, 0x8.000000000000001p+60L))
+        return 1;
+
+    if (test__floatuntitf(0x0007FB72E8000000LL, 0x1.FEDCBAp+50))
+        return 1;
+
+    if (test__floatuntitf(0x0007FB72EA000000LL, 0x1.FEDCBA8p+50))
+        return 1;
+    if (test__floatuntitf(0x0007FB72EB000000LL, 0x1.FEDCBACp+50))
+        return 1;
+    if (test__floatuntitf(0x0007FB72EBFFFFFFLL, 0x1.FEDCBAFFFFFFCp+50))
+        return 1;
+    if (test__floatuntitf(0x0007FB72EC000000LL, 0x1.FEDCBBp+50))
+        return 1;
+    if (test__floatuntitf(0x0007FB72E8000001LL, 0x1.FEDCBA0000004p+50))
+        return 1;
+
+    if (test__floatuntitf(0x0007FB72E6000000LL, 0x1.FEDCB98p+50))
+        return 1;
+    if (test__floatuntitf(0x0007FB72E7000000LL, 0x1.FEDCB9Cp+50))
+        return 1;
+    if (test__floatuntitf(0x0007FB72E7FFFFFFLL, 0x1.FEDCB9FFFFFFCp+50))
+        return 1;
+    if (test__floatuntitf(0x0007FB72E4000001LL, 0x1.FEDCB90000004p+50))
+        return 1;
+    if (test__floatuntitf(0x0007FB72E4000000LL, 0x1.FEDCB9p+50))
+        return 1;
+
+    if (test__floatuntitf(0x023479FD0E092DC0LL, 0x1.1A3CFE870496Ep+57))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DA1LL, 0x1.1A3CFE870496D08p+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DB0LL, 0x1.1A3CFE870496D8p+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DB8LL, 0x1.1A3CFE870496DCp+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DB6LL, 0x1.1A3CFE870496DBp+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DBFLL, 0x1.1A3CFE870496DF8p+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DC1LL, 0x1.1A3CFE870496E08p+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DC7LL, 0x1.1A3CFE870496E38p+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DC8LL, 0x1.1A3CFE870496E4p+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DCFLL, 0x1.1A3CFE870496E78p+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DD0LL, 0x1.1A3CFE870496E8p+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DD1LL, 0x1.1A3CFE870496E88p+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DD8LL, 0x1.1A3CFE870496ECp+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DDFLL, 0x1.1A3CFE870496EF8p+57L))
+        return 1;
+    if (test__floatuntitf(0x023479FD0E092DE0LL, 0x1.1A3CFE870496Fp+57))
+        return 1;
+
+    if (test__floatuntitf(make_ti(0x023479FD0E092DC0LL, 0), 0x1.1A3CFE870496Ep+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DA1LL, 1), 0x1.1A3CFE870496D08p+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DB0LL, 2), 0x1.1A3CFE870496D8p+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DB8LL, 3), 0x1.1A3CFE870496DCp+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DB6LL, 4), 0x1.1A3CFE870496DBp+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DBFLL, 5), 0x1.1A3CFE870496DF8p+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DC1LL, 6), 0x1.1A3CFE870496E08p+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DC7LL, 7), 0x1.1A3CFE870496E38p+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DC8LL, 8), 0x1.1A3CFE870496E4p+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DCFLL, 9), 0x1.1A3CFE870496E78p+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DD0LL, 0), 0x1.1A3CFE870496E8p+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DD1LL, 11), 0x1.1A3CFE870496E88p+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DD8LL, 12), 0x1.1A3CFE870496ECp+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DDFLL, 13), 0x1.1A3CFE870496EF8p+121L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x023479FD0E092DE0LL, 14), 0x1.1A3CFE870496Fp+121L))
+        return 1;
+
+    if (test__floatuntitf(make_ti(0, 0xFFFFFFFFFFFFFFFFLL), 0x1.FFFFFFFFFFFFFFFEp+63L))
+        return 1;
+
+    if (test__floatuntitf(make_ti(0xFFFFFFFFFFFFFFFFLL, 0x0000000000000000LL),
+                          0x1.FFFFFFFFFFFFFFFEp+127L))
+        return 1;
+    if (test__floatuntitf(make_ti(0xFFFFFFFFFFFFFFFFLL, 0xFFFFFFFFFFFFFFFFLL),
+                          0x1.0000000000000000p+128L))
+        return 1;
+
+    if (test__floatuntitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC2801LL),
+                        0x1.23456789ABCDEF0123456789ABC3p+124L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC3000LL),
+                        0x1.23456789ABCDEF0123456789ABC3p+124L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC37FFLL),
+                        0x1.23456789ABCDEF0123456789ABC3p+124L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC3800LL),
+                        0x1.23456789ABCDEF0123456789ABC4p+124L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC4000LL),
+                        0x1.23456789ABCDEF0123456789ABC4p+124L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC47FFLL),
+                        0x1.23456789ABCDEF0123456789ABC4p+124L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC4800LL),
+                        0x1.23456789ABCDEF0123456789ABC4p+124L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC4801LL),
+                        0x1.23456789ABCDEF0123456789ABC5p+124L))
+        return 1;
+    if (test__floatuntitf(make_ti(0x123456789ABCDEF0LL, 0x123456789ABC57FFLL),
+                        0x1.23456789ABCDEF0123456789ABC5p+124L))
+        return 1;
+#else
+    printf("skipped\n");
+#endif
+   return 0;
+}
diff --git a/test/builtins/Unit/negdf2vfp_test.c b/test/builtins/Unit/negdf2vfp_test.c
index f0e6677..f673b92 100644
--- a/test/builtins/Unit/negdf2vfp_test.c
+++ b/test/builtins/Unit/negdf2vfp_test.c
@@ -11,14 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "int_lib.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 
 
+#if __arm__
 extern COMPILER_RT_ABI double __negdf2vfp(double a);
 
-#if __arm__
 int test__negdf2vfp(double a)
 {
     double actual = __negdf2vfp(a);
diff --git a/test/builtins/Unit/subdf3vfp_test.c b/test/builtins/Unit/subdf3vfp_test.c
index 5d5d711..6e8d5a1 100644
--- a/test/builtins/Unit/subdf3vfp_test.c
+++ b/test/builtins/Unit/subdf3vfp_test.c
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "int_lib.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
diff --git a/test/cfi/CMakeLists.txt b/test/cfi/CMakeLists.txt
index 4c4deba..bd51eac 100644
--- a/test/cfi/CMakeLists.txt
+++ b/test/cfi/CMakeLists.txt
@@ -11,16 +11,19 @@
   )
 
 set(CFI_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
+list(APPEND CFI_TEST_DEPS
+  ubsan
+  stats
+)
+if(COMPILER_RT_HAS_CFI)
+  list(APPEND CFI_TEST_DEPS cfi)
+endif()
+
 if(NOT COMPILER_RT_STANDALONE_BUILD)
   list(APPEND CFI_TEST_DEPS
     opt
-    ubsan
-    stats
     sanstats
   )
-  if(COMPILER_RT_HAS_CFI)
-    list(APPEND CFI_TEST_DEPS cfi)
-  endif()
   if(LLVM_ENABLE_PIC AND LLVM_BINUTILS_INCDIR)
     list(APPEND CFI_TEST_DEPS
       LLVMgold
diff --git a/test/cfi/cross-dso/stats.cpp b/test/cfi/cross-dso/stats.cpp
index 3d25d77..6566ea2 100644
--- a/test/cfi/cross-dso/stats.cpp
+++ b/test/cfi/cross-dso/stats.cpp
@@ -16,24 +16,24 @@
 #ifdef SHARED_LIB
 
 extern "C" __attribute__((noinline)) void vcall(A *a) {
-  // CHECK: stats.cpp:[[@LINE+1]] vcall cfi-vcall 37
+  // CHECK: stats.cpp:[[@LINE+1]] vcall.cfi cfi-vcall 37
   a->vf();
 }
 
 extern "C" __attribute__((noinline)) void nvcall(A *a) {
-  // CHECK: stats.cpp:[[@LINE+1]] nvcall cfi-nvcall 51
+  // CHECK: stats.cpp:[[@LINE+1]] nvcall.cfi cfi-nvcall 51
   a->nvf();
 }
 
 #else
 
 extern "C" __attribute__((noinline)) A *dcast(A *a) {
-  // CHECK: stats.cpp:[[@LINE+1]] dcast cfi-derived-cast 24
+  // CHECK: stats.cpp:[[@LINE+1]] dcast.cfi cfi-derived-cast 24
   return (A *)(ABase *)a;
 }
 
 extern "C" __attribute__((noinline)) A *ucast(A *a) {
-  // CHECK: stats.cpp:[[@LINE+1]] ucast cfi-unrelated-cast 81
+  // CHECK: stats.cpp:[[@LINE+1]] ucast.cfi cfi-unrelated-cast 81
   return (A *)(char *)a;
 }
 
diff --git a/test/cfi/icall/weak.c b/test/cfi/icall/weak.c
new file mode 100644
index 0000000..4073991
--- /dev/null
+++ b/test/cfi/icall/weak.c
@@ -0,0 +1,15 @@
+// Test that weak symbols stay weak.
+// RUN: %clang_cfi -lm -o %t1 %s && %t1
+// XFAIL: darwin
+
+__attribute__((weak)) void does_not_exist(void);
+
+__attribute__((noinline))
+void foo(void (*p)(void)) {
+  p();
+}
+
+int main(int argc, char **argv) {
+  if (does_not_exist)
+    foo(does_not_exist);
+}
diff --git a/test/dfsan/write_callback.c b/test/dfsan/write_callback.c
index 3ba027a..31470ef 100644
--- a/test/dfsan/write_callback.c
+++ b/test/dfsan/write_callback.c
@@ -3,6 +3,7 @@
 
 // Tests that the custom implementation of write() does writes with or without
 // a callback set using dfsan_set_write_callback().
+// REQUIRES: stable-runtime
 
 #include <sanitizer/dfsan_interface.h>
 
diff --git a/test/esan/TestCases/mmap-shadow-conflict.c b/test/esan/TestCases/mmap-shadow-conflict.c
index 4b3c58b..8e86bba 100644
--- a/test/esan/TestCases/mmap-shadow-conflict.c
+++ b/test/esan/TestCases/mmap-shadow-conflict.c
@@ -1,26 +1,40 @@
 // RUN: %clang_esan_frag -O0 %s -o %t 2>&1
-// RUN: %env_esan_opts=verbosity=1 %run %t 2>&1 | FileCheck %s
+// RUN: %env_esan_opts=verbosity=1 %run %t 2>&1 | FileCheck --check-prefix=%arch --check-prefix=CHECK %s
 
 #include <unistd.h>
 #include <sys/mman.h>
 #include <stdio.h>
 
 int main(int argc, char **argv) {
+#if defined(__mips64)
+  void *Map = mmap((void *)0x0000001600000000ULL, 0x1000, PROT_READ,
+                   MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+#else
   void *Map = mmap((void *)0x0000016000000000ULL, 0x1000, PROT_READ,
                    MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+#endif
   if (Map == (void *)-1)
     fprintf(stderr, "map failed\n");
   else
     fprintf(stderr, "mapped %p\n", Map);
+#if defined(__mips64)
+  Map = mmap((void *)0x0000001600000000ULL, 0x1000, PROT_READ,
+                   MAP_ANON|MAP_PRIVATE, -1, 0);
+#else
   Map = mmap((void *)0x0000016000000000ULL, 0x1000, PROT_READ,
                    MAP_ANON|MAP_PRIVATE, -1, 0);
+#endif
   fprintf(stderr, "mapped %p\n", Map);
   // CHECK:      in esan::initializeLibrary
   // (There can be a re-exec for stack limit here.)
-  // CHECK:      Shadow scale=2 offset=0x440000000000
-  // CHECK-NEXT: Shadow #0: [110000000000-114000000000) (256GB)
-  // CHECK-NEXT: Shadow #1: [124000000000-12c000000000) (512GB)
-  // CHECK-NEXT: Shadow #2: [148000000000-150000000000) (512GB)
+  // x86_64:      Shadow scale=2 offset=0x440000000000
+  // x86_64-NEXT: Shadow #0: [110000000000-114000000000) (256GB)
+  // x86_64-NEXT: Shadow #1: [124000000000-12c000000000) (512GB)
+  // x86_64-NEXT: Shadow #2: [148000000000-150000000000) (512GB)
+  // mips64:      Shadow scale=2 offset=0x4400000000
+  // mips64-NEXT: Shadow #0: [1140000000-1180000000) (1GB)
+  // mips64-NEXT: Shadow #1: [1380000000-13c0000000) (1GB)
+  // mips64-NEXT: Shadow #2: [14c0000000-1500000000) (1GB)
   // CHECK-NEXT: mmap conflict: {{.*}}
   // CHECK-NEXT: map failed
   // CHECK-NEXT: mmap conflict: {{.*}}
diff --git a/test/esan/TestCases/struct-simple.cpp b/test/esan/TestCases/struct-simple.cpp
index c52154e..7ec9761 100644
--- a/test/esan/TestCases/struct-simple.cpp
+++ b/test/esan/TestCases/struct-simple.cpp
@@ -115,21 +115,21 @@
   // CHECK:      in esan::initializeCacheFrag
   // CHECK-NEXT: in esan::processCompilationUnitInit
   // CHECK-NEXT: in esan::processCacheFragCompilationUnitInit: {{.*}}struct-simple.cpp with 6 class(es)/struct(s)
-  // CHECK-NEXT:  Register struct.A#2#11#11: 2 fields
-  // CHECK-NEXT:  Register struct.B#2#3#2:   2 fields
-  // CHECK-NEXT:  Register union.U#1#3:      1 fields
-  // CHECK-NEXT:  Register struct.S#2#11#11: 2 fields
-  // CHECK-NEXT:  Register struct.D#3#14#11#11: 3 fields
-  // CHECK-NEXT:  Register struct.anon#3#11#11#11: 3 fields
+  // CHECK-NEXT:  Register struct.A$2$11$11: 2 fields
+  // CHECK-NEXT:  Register struct.B$2$3$2:   2 fields
+  // CHECK-NEXT:  Register union.U$1$3:      1 fields
+  // CHECK-NEXT:  Register struct.S$2$11$11: 2 fields
+  // CHECK-NEXT:  Register struct.D$3$14$11$11: 3 fields
+  // CHECK-NEXT:  Register struct.anon$3$11$11$11: 3 fields
   // CHECK-NEXT: in esan::processCompilationUnitInit
   // CHECK-NEXT: in esan::processCacheFragCompilationUnitInit: {{.*}}struct-simple.cpp with 0 class(es)/struct(s)
   // CHECK-NEXT: in esan::processCompilationUnitInit
   // CHECK-NEXT: in esan::processCacheFragCompilationUnitInit: {{.*}}struct-simple.cpp with 5 class(es)/struct(s)
-  // CHECK-NEXT:  Register class.C#3#14#13#13:  3 fields
-  // CHECK-NEXT:  Register struct.anon#2#11#11: 2 fields
-  // CHECK-NEXT:  Register union.anon#1#3:      1 fields
-  // CHECK-NEXT:  Duplicated struct.S#2#11#11:  2 fields
-  // CHECK-NEXT:  Register struct.D#3#11#11#11: 3 fields
+  // CHECK-NEXT:  Register class.C$3$14$13$13:  3 fields
+  // CHECK-NEXT:  Register struct.anon$2$11$11: 2 fields
+  // CHECK-NEXT:  Register union.anon$1$3:      1 fields
+  // CHECK-NEXT:  Duplicated struct.S$2$11$11:  2 fields
+  // CHECK-NEXT:  Register struct.D$3$11$11$11: 3 fields
   struct C c[2];
   struct S s;
   struct D d;
@@ -148,24 +148,24 @@
   // CHECK-NEXT: in esan::finalizeCacheFrag
   // CHECK-NEXT: in esan::processCompilationUnitExit
   // CHECK-NEXT: in esan::processCacheFragCompilationUnitExit: {{.*}}struct-simple.cpp with 5 class(es)/struct(s)
-  // CHECK-NEXT:  Unregister class.C#3#14#13#13:  3 fields
+  // CHECK-NEXT:  Unregister class.C$3$14$13$13:  3 fields
   // CHECK-NEXT:   {{.*}} class C
   // CHECK-NEXT:   {{.*}}  size = 32, count = 5, ratio = 3, array access = 5
   // CHECK-NEXT:   {{.*}}  # 0: offset = 0,  size = 8,  count = 2, type = %struct.anon = type { i32, i32 }
   // CHECK-NEXT:   {{.*}}  # 1: offset = 8,  size = 8,  count = 2, type = %union.anon = type { double }
   // CHECK-NEXT:   {{.*}}  # 2: offset = 16, size = 10, count = 1, type = [10 x i8]
-  // CHECK-NEXT:  Unregister struct.anon#2#11#11: 2 fields
+  // CHECK-NEXT:  Unregister struct.anon$2$11$11: 2 fields
   // CHECK-NEXT:   {{.*}} struct anon
   // CHECK-NEXT:   {{.*}}  size = 8, count = 2, ratio = 1, array access = 0
   // CHECK-NEXT:   {{.*}}  # 0: offset = 0, size = 4, count = 1, type = i32
   // CHECK-NEXT:   {{.*}}  # 1: offset = 4, size = 4, count = 1, type = i32
-  // CHECK-NEXT:  Unregister union.anon#1#3:      1 fields
-  // CHECK-NEXT:  Unregister struct.S#2#11#11:    2 fields
+  // CHECK-NEXT:  Unregister union.anon$1$3:      1 fields
+  // CHECK-NEXT:  Unregister struct.S$2$11$11:    2 fields
   // CHECK-NEXT:   {{.*}} struct S
   // CHECK-NEXT:   {{.*}}  size = 8, count = 2, ratio = 2, array access = 0
   // CHECK-NEXT:   {{.*}}  # 0: count = 2
   // CHECK-NEXT:   {{.*}}  # 1: count = 0
-  // CHECK-NEXT:  Unregister struct.D#3#11#11#11: 3 fields
+  // CHECK-NEXT:  Unregister struct.D$3$11$11$11: 3 fields
   // CHECK-NEXT:   {{.*}} struct D
   // CHECK-NEXT:   {{.*}}  size = 12, count = 2, ratio = 2, array access = 0
   // CHECK-NEXT:   {{.*}}  # 0: offset = 0, size = 4, count = 1, type = i32
@@ -175,25 +175,25 @@
   // CHECK-NEXT: in esan::processCacheFragCompilationUnitExit: {{.*}}struct-simple.cpp with 0 class(es)/struct(s)
   // CHECK-NEXT: in esan::processCompilationUnitExit
   // CHECK-NEXT: in esan::processCacheFragCompilationUnitExit: {{.*}}struct-simple.cpp with 6 class(es)/struct(s)
-  // CHECK-NEXT:  Unregister struct.A#2#11#11:    2 fields
+  // CHECK-NEXT:  Unregister struct.A$2$11$11:    2 fields
   // CHECK-NEXT:   {{.*}} struct A
   // CHECK-NEXT:   {{.*}}  size = 8, count = 2049, ratio = 2048, array access = 0
   // CHECK-NEXT:   {{.*}}  # 0: count = 2048
   // CHECK-NEXT:   {{.*}}  # 1: count = 1
-  // CHECK-NEXT:  Unregister struct.B#2#3#2:      2 fields
+  // CHECK-NEXT:  Unregister struct.B$2$3$2:      2 fields
   // CHECK-NEXT:   {{.*}} struct B
   // CHECK-NEXT:   {{.*}}  size = 16, count = 2097153, ratio = 2097152, array access = 0
   // CHECK-NEXT:   {{.*}}  # 0: count = 1
   // CHECK-NEXT:   {{.*}}  # 1: count = 2097152
-  // CHECK-NEXT:  Unregister union.U#1#3:         1 fields
-  // CHECK-NEXT:  Duplicated struct.S#2#11#11:    2 fields
-  // CHECK-NEXT:  Unregister struct.D#3#14#11#11: 3 fields
+  // CHECK-NEXT:  Unregister union.U$1$3:         1 fields
+  // CHECK-NEXT:  Duplicated struct.S$2$11$11:    2 fields
+  // CHECK-NEXT:  Unregister struct.D$3$14$11$11: 3 fields
   // CHECK-NEXT:  {{.*}} struct D
   // CHECK-NEXT:  {{.*}}  size = 128, count = 2097153, ratio = 2097153, array access = 0
   // CHECK-NEXT:  {{.*}}  # 0: count = 1
   // CHECK-NEXT:  {{.*}}  # 1: count = 0
   // CHECK-NEXT:  {{.*}}  # 2: count = 2097152
-  // CHECK-NEXT:  Unregister struct.anon#3#11#11#11: 3 fields
+  // CHECK-NEXT:  Unregister struct.anon$3$11$11$11: 3 fields
   // CHECK-NEXT:  {{.*}} struct anon
   // CHECK-NEXT:  {{.*}}  size = 12, count = 2097152, ratio = 4194304, array access = 2097152
   // CHECK-NEXT:  {{.*}}  # 0: count = 0
diff --git a/test/esan/TestCases/verbose-simple.c b/test/esan/TestCases/verbose-simple.c
index 0d867bf..5ac37e1 100644
--- a/test/esan/TestCases/verbose-simple.c
+++ b/test/esan/TestCases/verbose-simple.c
@@ -1,14 +1,18 @@
 // RUN: %clang_esan_frag -O0 %s -o %t 2>&1
-// RUN: %env_esan_opts="verbosity=1 log_exe_name=1" %run %t 2>&1 | FileCheck %s
+// RUN: %env_esan_opts="verbosity=1 log_exe_name=1" %run %t 2>&1 | FileCheck --check-prefix=%arch --check-prefix=CHECK %s
 
 int main(int argc, char **argv) {
   // CHECK:      in esan::initializeLibrary
   // (There can be a re-exec for stack limit here.)
-  // CHECK:      Shadow scale=2 offset=0x440000000000
-  // CHECK-NEXT: Shadow #0: [110000000000-114000000000) (256GB)
-  // CHECK-NEXT: Shadow #1: [124000000000-12c000000000) (512GB)
-  // CHECK-NEXT: Shadow #2: [148000000000-150000000000) (512GB)
-  // CHECK-NEXT: in esan::finalizeLibrary
-  // CHECK-NEXT: ==verbose-simple{{.*}}EfficiencySanitizer: total struct field access count = 0
+  // x86_64:      Shadow scale=2 offset=0x440000000000
+  // x86_64-NEXT: Shadow #0: [110000000000-114000000000) (256GB)
+  // x86_64-NEXT: Shadow #1: [124000000000-12c000000000) (512GB)
+  // x86_64-NEXT: Shadow #2: [148000000000-150000000000) (512GB)
+  // mips64:      Shadow scale=2 offset=0x4400000000
+  // mips64-NEXT: Shadow #0: [1140000000-1180000000) (1GB)
+  // mips64-NEXT: Shadow #1: [1380000000-13c0000000) (1GB)
+  // mips64-NEXT: Shadow #2: [14c0000000-1500000000) (1GB)
+  // CHECK: in esan::finalizeLibrary
+  // CHECK: ==verbose-simple{{.*}}EfficiencySanitizer: total struct field access count = 0
   return 0;
 }
diff --git a/test/esan/lit.cfg b/test/esan/lit.cfg
index cf16a6b..8b8457d 100644
--- a/test/esan/lit.cfg
+++ b/test/esan/lit.cfg
@@ -40,5 +40,5 @@
 config.suffixes = ['.c', '.cpp']
 
 # EfficiencySanitizer tests are currently supported on Linux x86-64 only.
-if config.host_os not in ['Linux'] or config.target_arch != 'x86_64':
+if config.host_os not in ['Linux'] or config.target_arch not in ['x86_64', 'mips64'] :
   config.unsupported = True
diff --git a/test/lit.common.cfg b/test/lit.common.cfg
index f85deab..8ea3e7d 100644
--- a/test/lit.common.cfg
+++ b/test/lit.common.cfg
@@ -52,7 +52,8 @@
                                'LIBCLANG_LOGGING', 'LIBCLANG_BGPRIO_INDEX',
                                'LIBCLANG_BGPRIO_EDIT', 'LIBCLANG_NOTHREADS',
                                'LIBCLANG_RESOURCE_USAGE',
-                               'LIBCLANG_CODE_COMPLETION_LOGGING']
+                               'LIBCLANG_CODE_COMPLETION_LOGGING',
+                               'XRAY_OPTIONS']
 # Clang/Win32 may refer to %INCLUDE%. vsvarsall.bat sets it.
 if platform.system() != 'Windows':
     possibly_dangerous_env_vars.append('INCLUDE')
@@ -90,6 +91,9 @@
 # Define CHECK-%os to check for OS-dependent output.
 config.substitutions.append( ('CHECK-%os', ("CHECK-" + config.host_os)))
 
+# Define %arch to check for architecture-dependent output.
+config.substitutions.append( ('%arch', (config.host_arch)))
+
 if config.host_os == 'Windows':
   # FIXME: This isn't quite right. Specifically, it will succeed if the program
   # does not crash but exits with a non-zero exit code. We ought to merge
@@ -106,6 +110,7 @@
   config.available_features.add(target_arch + '-target-arch')
   if target_arch in ['x86_64', 'i386', 'i686']:
     config.available_features.add('x86-target-arch')
+  config.available_features.add(target_arch + '-' + config.host_os.lower())
 
 compiler_rt_debug = getattr(config, 'compiler_rt_debug', False)
 if not compiler_rt_debug:
@@ -166,7 +171,7 @@
   if not os.path.exists(os.path.join(config.llvm_shlib_dir, 'LLVMgold.so')):
     return False
 
-  ld_cmd = subprocess.Popen([config.gold_executable, '--help'], stdout = subprocess.PIPE)
+  ld_cmd = subprocess.Popen([config.gold_executable, '--help'], stdout = subprocess.PIPE, env={'LANG': 'C'})
   ld_out = ld_cmd.stdout.read().decode()
   ld_cmd.wait()
 
diff --git a/test/lit.common.configured.in b/test/lit.common.configured.in
index 4472f59..862d06b 100644
--- a/test/lit.common.configured.in
+++ b/test/lit.common.configured.in
@@ -17,7 +17,7 @@
 set_default("compiler_rt_src_root", "@COMPILER_RT_SOURCE_DIR@")
 set_default("compiler_rt_obj_root", "@COMPILER_RT_BINARY_DIR@")
 set_default("llvm_tools_dir", "@LLVM_TOOLS_BINARY_DIR@")
-set_default("llvm_shlib_dir", "@SHLIBDIR@")
+set_default("llvm_shlib_dir", "@LLVM_LIBRARY_OUTPUT_INTDIR@")
 set_default("gold_executable", "@GOLD_EXECUTABLE@")
 set_default("clang", "@COMPILER_RT_TEST_COMPILER@")
 set_default("compiler_id", "@COMPILER_RT_TEST_COMPILER_ID@")
diff --git a/test/lsan/TestCases/cleanup_in_tsd_destructor.c b/test/lsan/TestCases/cleanup_in_tsd_destructor.c
index debf05c..6da7595 100644
--- a/test/lsan/TestCases/cleanup_in_tsd_destructor.c
+++ b/test/lsan/TestCases/cleanup_in_tsd_destructor.c
@@ -14,6 +14,7 @@
 #include <stdlib.h>
 
 #include "sanitizer/lsan_interface.h"
+#include "sanitizer_common/print_address.h"
 
 pthread_key_t key;
 __thread void *p;
@@ -25,7 +26,7 @@
 
 void *thread_func(void *arg) {
   p = malloc(1337);
-  fprintf(stderr, "Test alloc: %p.\n", p);
+  print_address("Test alloc: ", 1, p);
   int res = pthread_setspecific(key, (void*)1);
   assert(res == 0);
   return 0;
@@ -41,5 +42,5 @@
   assert(res == 0);
   return 0;
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: [[ADDR]] (1337 bytes)
diff --git a/test/lsan/TestCases/guard-page.c b/test/lsan/TestCases/guard-page.c
index 5c70a9f..25d63e2 100644
--- a/test/lsan/TestCases/guard-page.c
+++ b/test/lsan/TestCases/guard-page.c
@@ -22,6 +22,7 @@
 static void ctxfunc() {
   pthread_mutex_lock(&mutex);
   ctxfunc_started = 1;
+  // printf("ctxfunc\n");
   pthread_cond_signal(&cond);
   pthread_mutex_unlock(&mutex);
   // Leave this context alive when the program exits.
@@ -35,11 +36,11 @@
 
   if (getcontext(&ctx) < 0)
     die("getcontext", 0);
-  stack = malloc(1 << 10);
+  stack = malloc(1 << 11);
   if (stack == NULL)
     die("malloc", 0);
   ctx.uc_stack.ss_sp = stack;
-  ctx.uc_stack.ss_size = 1 << 10;
+  ctx.uc_stack.ss_size = 1 << 11;
   makecontext(&ctx, ctxfunc, 0);
   setcontext(&ctx);
   die("setcontext", 0);
diff --git a/test/lsan/TestCases/large_allocation_leak.cc b/test/lsan/TestCases/large_allocation_leak.cc
index f41143a..9d5698c 100644
--- a/test/lsan/TestCases/large_allocation_leak.cc
+++ b/test/lsan/TestCases/large_allocation_leak.cc
@@ -5,14 +5,15 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include "sanitizer_common/print_address.h"
 
 int main() {
   // maxsize in primary allocator is always less than this (1 << 25).
   void *large_alloc = malloc(33554432);
-  fprintf(stderr, "Test alloc: %p.\n", large_alloc);
+  print_address("Test alloc: ", 1, large_alloc);
   return 0;
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (33554432 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/pointer_to_self.cc b/test/lsan/TestCases/pointer_to_self.cc
index 63bde2c..40c1228 100644
--- a/test/lsan/TestCases/pointer_to_self.cc
+++ b/test/lsan/TestCases/pointer_to_self.cc
@@ -6,13 +6,14 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include "sanitizer_common/print_address.h"
 
 int main() {
   void *p = malloc(1337);
   *reinterpret_cast<void **>(p) = p;
-  fprintf(stderr, "Test alloc: %p.\n", p);
+  print_address("Test alloc: ", 1, p);
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/stale_stack_leak.cc b/test/lsan/TestCases/stale_stack_leak.cc
index 4b8a54e..770096b 100644
--- a/test/lsan/TestCases/stale_stack_leak.cc
+++ b/test/lsan/TestCases/stale_stack_leak.cc
@@ -6,6 +6,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include "sanitizer_common/print_address.h"
 
 void **pp;
 
@@ -18,7 +19,7 @@
   void *locals[2048];
   locals[0] = p;
   pp = &locals[0];
-  fprintf(stderr, "Test alloc: %p.\n", locals[0]);
+  print_address("Test alloc: ", 1, locals[0]);
   return 0;
 }
 
@@ -33,11 +34,11 @@
 __attribute__((destructor))
 __attribute__((no_sanitize_address))
 void ConfirmPointerHasSurvived() {
-  fprintf(stderr, "Value after LSan: %p.\n", *pp);
+  print_address("Value after LSan: ", 1, *pp);
 }
-// CHECK: Test alloc: [[ADDR:.*]].
-// CHECK-sanity: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
+// CHECK-sanity: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
-// CHECK-sanity: Value after LSan: [[ADDR]].
+// CHECK-sanity: Value after LSan: [[ADDR]]
diff --git a/test/lsan/TestCases/strace_test.cc b/test/lsan/TestCases/strace_test.cc
new file mode 100644
index 0000000..b3568d0
--- /dev/null
+++ b/test/lsan/TestCases/strace_test.cc
@@ -0,0 +1,14 @@
+// Test that lsan reports a proper error when running under strace.
+// RUN: %clangxx_lsan %s -o %t
+// RUN: not strace -o /dev/null %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static volatile void *sink;
+
+int main() {
+  sink = malloc(42);
+}
+// CHECK: LeakSanitizer has encountered a fatal error
+// CHECK: HINT: LeakSanitizer does not work under ptrace (strace, gdb, etc)
diff --git a/test/lsan/TestCases/use_after_return.cc b/test/lsan/TestCases/use_after_return.cc
index eb917c0..ed9cc78 100644
--- a/test/lsan/TestCases/use_after_return.cc
+++ b/test/lsan/TestCases/use_after_return.cc
@@ -8,16 +8,17 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include "sanitizer_common/print_address.h"
 
 int main() {
   void *stack_var = malloc(1337);
-  fprintf(stderr, "Test alloc: %p.\n", stack_var);
+  print_address("Test alloc: ", 1, stack_var);
   // Take pointer to variable, to ensure it's not optimized into a register.
-  fprintf(stderr, "Stack var at: %p.\n", &stack_var);
+  print_address("Stack var at: ", 1, &stack_var);
   // Do not return from main to prevent the pointer from going out of scope.
   exit(0);
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/use_globals_initialized.cc b/test/lsan/TestCases/use_globals_initialized.cc
index 172d22a..45c12dc 100644
--- a/test/lsan/TestCases/use_globals_initialized.cc
+++ b/test/lsan/TestCases/use_globals_initialized.cc
@@ -7,15 +7,16 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include "sanitizer_common/print_address.h"
 
 void *data_var = (void *)1;
 
 int main() {
   data_var = malloc(1337);
-  fprintf(stderr, "Test alloc: %p.\n", data_var);
+  print_address("Test alloc: ", 1, data_var);
   return 0;
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/use_globals_uninitialized.cc b/test/lsan/TestCases/use_globals_uninitialized.cc
index 2daa661..c198fcc 100644
--- a/test/lsan/TestCases/use_globals_uninitialized.cc
+++ b/test/lsan/TestCases/use_globals_uninitialized.cc
@@ -7,15 +7,16 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include "sanitizer_common/print_address.h"
 
 void *bss_var;
 
 int main() {
   bss_var = malloc(1337);
-  fprintf(stderr, "Test alloc: %p.\n", bss_var);
+  print_address("Test alloc: ", 1, bss_var);
   return 0;
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/use_poisoned_asan.cc b/test/lsan/TestCases/use_poisoned_asan.cc
index a1c544c..5acceeb 100644
--- a/test/lsan/TestCases/use_poisoned_asan.cc
+++ b/test/lsan/TestCases/use_poisoned_asan.cc
@@ -9,17 +9,18 @@
 #include <stdlib.h>
 #include <sanitizer/asan_interface.h>
 #include <assert.h>
+#include "sanitizer_common/print_address.h"
 
 void **p;
 
 int main() {
   p = new void *;
   *p = malloc(1337);
-  fprintf(stderr, "Test alloc: %p.\n", *p);
+  print_address("Test alloc: ", 1, *p);
   __asan_poison_memory_region(p, sizeof(*p));
   return 0;
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: AddressSanitizer:
diff --git a/test/lsan/TestCases/use_registers.cc b/test/lsan/TestCases/use_registers.cc
index 74301a2..7647679 100644
--- a/test/lsan/TestCases/use_registers.cc
+++ b/test/lsan/TestCases/use_registers.cc
@@ -10,6 +10,7 @@
 #include <sched.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include "sanitizer_common/print_address.h"
 
 extern "C"
 void *registers_thread_func(void *arg) {
@@ -35,7 +36,7 @@
 #else
 #error "Test is not supported on this architecture."
 #endif
-  fprintf(stderr, "Test alloc: %p.\n", p);
+  print_address("Test alloc: ", 1, p);
   fflush(stderr);
   __sync_fetch_and_xor(sync, 1);
   while (true)
@@ -51,7 +52,7 @@
     sched_yield();
   return 0;
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/use_stacks.cc b/test/lsan/TestCases/use_stacks.cc
index 7afcde1..c32af68 100644
--- a/test/lsan/TestCases/use_stacks.cc
+++ b/test/lsan/TestCases/use_stacks.cc
@@ -7,14 +7,15 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include "sanitizer_common/print_address.h"
 
 int main() {
   void *stack_var = malloc(1337);
-  fprintf(stderr, "Test alloc: %p.\n", stack_var);
+  print_address("Test alloc: ", 1, stack_var);
   // Do not return from main to prevent the pointer from going out of scope.
   exit(0);
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/use_stacks_threaded.cc b/test/lsan/TestCases/use_stacks_threaded.cc
index a1d4383..ac1fb46 100644
--- a/test/lsan/TestCases/use_stacks_threaded.cc
+++ b/test/lsan/TestCases/use_stacks_threaded.cc
@@ -10,12 +10,13 @@
 #include <sched.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include "sanitizer_common/print_address.h"
 
 extern "C"
 void *stacks_thread_func(void *arg) {
   int *sync = reinterpret_cast<int *>(arg);
   void *p = malloc(1337);
-  fprintf(stderr, "Test alloc: %p.\n", p);
+  print_address("Test alloc: ", 1, p);
   fflush(stderr);
   __sync_fetch_and_xor(sync, 1);
   while (true)
@@ -31,7 +32,7 @@
     sched_yield();
   return 0;
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/use_tls_dynamic.cc b/test/lsan/TestCases/use_tls_dynamic.cc
index 207894b..927c5c4 100644
--- a/test/lsan/TestCases/use_tls_dynamic.cc
+++ b/test/lsan/TestCases/use_tls_dynamic.cc
@@ -12,6 +12,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string>
+#include "sanitizer_common/print_address.h"
 
 int main(int argc, char *argv[]) {
   std::string path = std::string(argv[0]) + "-so.so";
@@ -26,10 +27,10 @@
   // If we don't  know about dynamic TLS, we will return a false leak above.
   void **p_in_tls = StoreToTLS(p);
   assert(*p_in_tls == p);
-  fprintf(stderr, "Test alloc: %p.\n", p);
+  print_address("Test alloc: ", 1, p);
   return 0;
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/use_tls_pthread_specific_dynamic.cc b/test/lsan/TestCases/use_tls_pthread_specific_dynamic.cc
index 1488371..9ab4e1c 100644
--- a/test/lsan/TestCases/use_tls_pthread_specific_dynamic.cc
+++ b/test/lsan/TestCases/use_tls_pthread_specific_dynamic.cc
@@ -9,6 +9,7 @@
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include "sanitizer_common/print_address.h"
 
 // From glibc: this many keys are stored in the thread descriptor directly.
 const unsigned PTHREAD_KEY_2NDLEVEL_SIZE = 32;
@@ -28,10 +29,10 @@
   void *p  = malloc(1337);
   res = pthread_setspecific(key, p);
   assert(res == 0);
-  fprintf(stderr, "Test alloc: %p.\n", p);
+  print_address("Test alloc: ", 1, p);
   return 0;
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/use_tls_pthread_specific_static.cc b/test/lsan/TestCases/use_tls_pthread_specific_static.cc
index 1fd5681..be0bcf6 100644
--- a/test/lsan/TestCases/use_tls_pthread_specific_static.cc
+++ b/test/lsan/TestCases/use_tls_pthread_specific_static.cc
@@ -9,6 +9,7 @@
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include "sanitizer_common/print_address.h"
 
 // From glibc: this many keys are stored in the thread descriptor directly.
 const unsigned PTHREAD_KEY_2NDLEVEL_SIZE = 32;
@@ -22,10 +23,10 @@
   void *p = malloc(1337);
   res = pthread_setspecific(key, p);
   assert(res == 0);
-  fprintf(stderr, "Test alloc: %p.\n", p);
+  print_address("Test alloc: ", 1, p);
   return 0;
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/use_tls_static.cc b/test/lsan/TestCases/use_tls_static.cc
index 50db23a..5ffaf16 100644
--- a/test/lsan/TestCases/use_tls_static.cc
+++ b/test/lsan/TestCases/use_tls_static.cc
@@ -7,15 +7,16 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include "sanitizer_common/print_address.h"
 
 __thread void *tls_var;
 
 int main() {
   tls_var = malloc(1337);
-  fprintf(stderr, "Test alloc: %p.\n", tls_var);
+  print_address("Test alloc: ", 1, tls_var);
   return 0;
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/use_unaligned.cc b/test/lsan/TestCases/use_unaligned.cc
index 3e43ed4..86c3ed5 100644
--- a/test/lsan/TestCases/use_unaligned.cc
+++ b/test/lsan/TestCases/use_unaligned.cc
@@ -7,17 +7,18 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "sanitizer_common/print_address.h"
 
 void *arr[2];
 
 int main() {
   void *p = malloc(1337);
-  fprintf(stderr, "Test alloc: %p.\n", p);
+  print_address("Test alloc: ", 1, p);
   char *char_arr = (char *)arr;
   memcpy(char_arr + 1, &p, sizeof(p));
   return 0;
 }
-// CHECK: Test alloc: [[ADDR:.*]].
+// CHECK: Test alloc: [[ADDR:0x[0-9,a-f]+]]
 // CHECK: LeakSanitizer: detected memory leaks
 // CHECK: [[ADDR]] (1337 bytes)
 // CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/lit.common.cfg b/test/lsan/lit.common.cfg
index a04c113..6002e2d 100644
--- a/test/lsan/lit.common.cfg
+++ b/test/lsan/lit.common.cfg
@@ -31,8 +31,9 @@
 
 clang_cflags = ["-O0", config.target_cflags] + config.debug_info_flags
 clang_cxxflags = config.cxx_mode_flags + clang_cflags
-clang_lsan_cflags = clang_cflags + lsan_cflags
-clang_lsan_cxxflags = clang_cxxflags + lsan_cflags
+lsan_incdir = config.test_source_root + "/../"
+clang_lsan_cflags = clang_cflags + lsan_cflags + ["-I%s" % lsan_incdir]
+clang_lsan_cxxflags = clang_cxxflags + lsan_cflags + ["-I%s" % lsan_incdir]
 
 config.clang_cflags = clang_cflags
 config.clang_cxxflags = clang_cxxflags
diff --git a/test/msan/Linux/cmsghdr.cc b/test/msan/Linux/cmsghdr.cc
index d18415a..daed1ba 100644
--- a/test/msan/Linux/cmsghdr.cc
+++ b/test/msan/Linux/cmsghdr.cc
@@ -10,8 +10,6 @@
 
 // UNSUPPORTED: android
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/msan/Linux/eventfd.cc b/test/msan/Linux/eventfd.cc
index 62e19b2..4399211 100644
--- a/test/msan/Linux/eventfd.cc
+++ b/test/msan/Linux/eventfd.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t 2>&1
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <sys/eventfd.h>
 
diff --git a/test/msan/Linux/fopencookie.cc b/test/msan/Linux/fopencookie.cc
index 551e891..e5b8f93 100644
--- a/test/msan/Linux/fopencookie.cc
+++ b/test/msan/Linux/fopencookie.cc
@@ -2,8 +2,6 @@
 // RUN: %clangxx_msan -std=c++11 -O0 %s -o %t && %run %t
 // RUN: %clangxx_msan -std=c++11 -fsanitize-memory-track-origins -O0 %s -o %t && %run %t
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <pthread.h>
 #include <stdint.h>
diff --git a/test/msan/Linux/forkpty.cc b/test/msan/Linux/forkpty.cc
index 7a80945..c9f0437 100644
--- a/test/msan/Linux/forkpty.cc
+++ b/test/msan/Linux/forkpty.cc
@@ -1,9 +1,9 @@
 // RUN: %clangxx_msan -O0 -g %s -lutil -o %t && %run %t
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <pty.h>
+#include <unistd.h>
+#include <cstring>
 
 #include <sanitizer/msan_interface.h>
 
@@ -15,6 +15,10 @@
   assert(__msan_test_shadow(&master, sizeof(master)) == -1);
   assert(__msan_test_shadow(&slave, sizeof(slave)) == -1);
 
+  char ttyname[255];
+  ttyname_r(master, ttyname, sizeof(ttyname));
+  assert(__msan_test_shadow(ttyname, strlen(ttyname) + 1) == -1);
+
   int master2;
   forkpty(&master2, NULL, NULL, NULL);
   assert(__msan_test_shadow(&master2, sizeof(master2)) == -1);
diff --git a/test/msan/Linux/getresid.cc b/test/msan/Linux/getresid.cc
index 06e1374..f3c0914 100644
--- a/test/msan/Linux/getresid.cc
+++ b/test/msan/Linux/getresid.cc
@@ -2,8 +2,6 @@
 // RUN: %clangxx_msan -O0 -D_FILE_OFFSET_BITS=64 %s -o %t && %run %t %p 2>&1
 // RUN: %clangxx_msan -O3 %s -o %t && %run %t %p 2>&1
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <unistd.h>
 
diff --git a/test/msan/Linux/glob.cc b/test/msan/Linux/glob.cc
index 50096c0..1481861 100644
--- a/test/msan/Linux/glob.cc
+++ b/test/msan/Linux/glob.cc
@@ -2,8 +2,6 @@
 // RUN: %clangxx_msan -O0 -D_FILE_OFFSET_BITS=64 %s -o %t && %run %t %p 2>&1 | FileCheck %s
 // RUN: %clangxx_msan -O3 %s -o %t && %run %t %p 2>&1 | FileCheck %s
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <glob.h>
 #include <stdio.h>
diff --git a/test/msan/Linux/glob_altdirfunc.cc b/test/msan/Linux/glob_altdirfunc.cc
index d0cb4bc..cb7fe09 100644
--- a/test/msan/Linux/glob_altdirfunc.cc
+++ b/test/msan/Linux/glob_altdirfunc.cc
@@ -2,8 +2,6 @@
 // RUN: %clangxx_msan -O0 -D_FILE_OFFSET_BITS=64 %s -o %t && %run %t %p 2>&1 | FileCheck %s
 // RUN: %clangxx_msan -O3 %s -o %t && %run %t %p 2>&1 | FileCheck %s
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <glob.h>
 #include <stdio.h>
diff --git a/test/msan/Linux/glob_nomatch.cc b/test/msan/Linux/glob_nomatch.cc
index 5845513..fa132c8 100644
--- a/test/msan/Linux/glob_nomatch.cc
+++ b/test/msan/Linux/glob_nomatch.cc
@@ -1,8 +1,6 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t %p
 // RUN: %clangxx_msan -O3 %s -o %t && %run %t %p
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <glob.h>
 #include <stdio.h>
diff --git a/test/msan/Linux/ioctl_sound.cc b/test/msan/Linux/ioctl_sound.cc
index d7b38fa..fb36c52 100644
--- a/test/msan/Linux/ioctl_sound.cc
+++ b/test/msan/Linux/ioctl_sound.cc
@@ -1,8 +1,6 @@
 // RUN: %clangxx_msan -O0 -g %s -o %t && %run %t
 // RUN: %clangxx_msan -O3 -g %s -o %t && %run %t
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <fcntl.h>
 #include <sound/asound.h>
diff --git a/test/msan/Linux/mallinfo.cc b/test/msan/Linux/mallinfo.cc
index 577a193..545ae93 100644
--- a/test/msan/Linux/mallinfo.cc
+++ b/test/msan/Linux/mallinfo.cc
@@ -1,8 +1,6 @@
 // RUN: %clangxx_msan -O0 -g %s -o %t && %run %t
 // REQUIRES: stable-runtime
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <malloc.h>
 
diff --git a/test/msan/Linux/mincore.cc b/test/msan/Linux/mincore.cc
index a229d4b..35f5713 100644
--- a/test/msan/Linux/mincore.cc
+++ b/test/msan/Linux/mincore.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_msan -std=c++11 -O0 %s -o %t && %run %t
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <unistd.h>
 #include <sys/mman.h>
diff --git a/test/msan/Linux/obstack.cc b/test/msan/Linux/obstack.cc
index f5b4fc3..0a81d87 100644
--- a/test/msan/Linux/obstack.cc
+++ b/test/msan/Linux/obstack.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_msan -O0 -g %s -o %t && %run %t
-// RUN: %clangxx_msan -O0 -g -DPOSITIVE %s -o %t && not %run %t |& FileCheck %s
-
-// XFAIL: target-is-mips64el
+// RUN: %clangxx_msan -O0 -g -DPOSITIVE %s -o %t && not %run %t 2>&1 | FileCheck %s
 
 #include <obstack.h>
 #include <sanitizer/msan_interface.h>
diff --git a/test/msan/Linux/process_vm_readv.cc b/test/msan/Linux/process_vm_readv.cc
index 4c7cafa..0a0e027 100644
--- a/test/msan/Linux/process_vm_readv.cc
+++ b/test/msan/Linux/process_vm_readv.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_msan -std=c++11 -O0 %s -o %t && %run %t
-// RUN: %clangxx_msan -std=c++11 -O0 %s -o %t -DPOSITIVE && not %run %t |& FileCheck %s
-
-// XFAIL: target-is-mips64el
+// RUN: %clangxx_msan -std=c++11 -O0 %s -o %t -DPOSITIVE && not %run %t 2>&1 | FileCheck %s
 
 #include <assert.h>
 #include <dlfcn.h>
diff --git a/test/msan/Linux/sendmsg.cc b/test/msan/Linux/sendmsg.cc
index e04559c..6a8ef83 100644
--- a/test/msan/Linux/sendmsg.cc
+++ b/test/msan/Linux/sendmsg.cc
@@ -15,8 +15,6 @@
 
 // UNSUPPORTED: android
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/msan/Linux/sunrpc.cc b/test/msan/Linux/sunrpc.cc
index 8acb155..c92ad63 100644
--- a/test/msan/Linux/sunrpc.cc
+++ b/test/msan/Linux/sunrpc.cc
@@ -11,8 +11,6 @@
 // RUN: %clangxx_msan -g -O0 -DTYPE=u_quad_t -DFN=xdr_u_longlong_t -DUNINIT=1 %s -o %t && \
 // RUN:     not %run %t 2>&1 | FileCheck %s
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <rpc/xdr.h>
 
diff --git a/test/msan/Linux/sunrpc_bytes.cc b/test/msan/Linux/sunrpc_bytes.cc
index 4d46d55..477637a 100644
--- a/test/msan/Linux/sunrpc_bytes.cc
+++ b/test/msan/Linux/sunrpc_bytes.cc
@@ -3,8 +3,6 @@
 // RUN: %clangxx_msan -g -O0 -DUNINIT=1 %s -o %t && \
 // RUN:     not %run %t 2>&1 | FileCheck %s
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <string.h>
 #include <rpc/xdr.h>
diff --git a/test/msan/Linux/sunrpc_string.cc b/test/msan/Linux/sunrpc_string.cc
index 53bea26..350222f 100644
--- a/test/msan/Linux/sunrpc_string.cc
+++ b/test/msan/Linux/sunrpc_string.cc
@@ -3,8 +3,6 @@
 // RUN: %clangxx_msan -g -O0 -DUNINIT=1 %s -o %t && \
 // RUN:     not %run %t 2>&1 | FileCheck %s
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <string.h>
 #include <rpc/xdr.h>
diff --git a/test/msan/Linux/syscalls.cc b/test/msan/Linux/syscalls.cc
index 1287486..c5ac3e2 100644
--- a/test/msan/Linux/syscalls.cc
+++ b/test/msan/Linux/syscalls.cc
@@ -1,8 +1,6 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t 2>&1
 // RUN: %clangxx_msan -O3 %s -o %t && %run %t 2>&1
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <errno.h>
 #include <glob.h>
diff --git a/test/msan/Linux/syscalls_sigaction.cc b/test/msan/Linux/syscalls_sigaction.cc
index 84e010e..975ca2b 100644
--- a/test/msan/Linux/syscalls_sigaction.cc
+++ b/test/msan/Linux/syscalls_sigaction.cc
@@ -3,8 +3,6 @@
 // RUN: %clangxx_msan -DPRE3 -O0 %s -o %t && not %run %t 2>&1
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t 2>&1
 
-// XFAIL: target-is-mips64el                                                      
-
 #include <assert.h>
 #include <signal.h>
 #include <string.h>
@@ -13,7 +11,11 @@
 #include <sanitizer/msan_interface.h>
 
 struct my_kernel_sigaction {
+#if defined(__mips__)
+  long flags, handler;
+#else
   long handler, flags, restorer;
+#endif
   uint64_t mask[20]; // larger than any known platform
 };
 
@@ -37,6 +39,10 @@
   memset(&act, 0, sizeof(act));
   __msan_poison(&oldact, sizeof(oldact));
   __sanitizer_syscall_post_rt_sigaction(0, SIGUSR1, &act, &oldact, 5);
+#if defined(__mips__)
+  assert(__msan_test_shadow(&oldact, sizeof(oldact)) == sizeof(long)*2 + 5);
+#else
   assert(__msan_test_shadow(&oldact, sizeof(oldact)) == sizeof(long)*3 + 5);
 #endif
+#endif
 }
diff --git a/test/msan/Linux/tcgetattr.cc b/test/msan/Linux/tcgetattr.cc
index 7b6adbc..454b7fd 100644
--- a/test/msan/Linux/tcgetattr.cc
+++ b/test/msan/Linux/tcgetattr.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t %p
 
-// XFAIL: target-is-mips64el                                                      
-
 #include <assert.h>
 #include <glob.h>
 #include <stdio.h>
diff --git a/test/msan/Linux/xattr.cc b/test/msan/Linux/xattr.cc
index bead651..86cc2cd 100644
--- a/test/msan/Linux/xattr.cc
+++ b/test/msan/Linux/xattr.cc
@@ -2,8 +2,6 @@
 // RUN: %clangxx_msan -O0 -D_FILE_OFFSET_BITS=64 %s -o %t && %run %t %p 2>&1
 // RUN: %clangxx_msan -O3 %s -o %t && %run %t %p 2>&1
 
-// XFAIL: target-is-mips64el                                                      
-
 #include <argz.h>
 #include <assert.h>
 #include <sys/types.h>
diff --git a/test/msan/allocator_returns_null.cc b/test/msan/allocator_returns_null.cc
index c47dc2e..f4ea51d 100644
--- a/test/msan/allocator_returns_null.cc
+++ b/test/msan/allocator_returns_null.cc
@@ -15,8 +15,6 @@
 // RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t realloc-after-malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mrCRASH
 // RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t realloc-after-malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mrNULL
 
-// XFAIL: target-is-mips64el
-
 #include <limits.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/test/msan/backtrace.cc b/test/msan/backtrace.cc
index a4dd69b..cde4e8f 100644
--- a/test/msan/backtrace.cc
+++ b/test/msan/backtrace.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <execinfo.h>
 #include <stdio.h>
@@ -17,7 +15,7 @@
     if (!buf[i])
       exit(1);
   char **s = backtrace_symbols(buf, sz);
-  assert(s > 0);
+  assert(s != 0);
   for (int i = 0; i < sz; ++i)
     printf("%d\n", (int)strlen(s[i]));
 }
diff --git a/test/msan/c-strdup.c b/test/msan/c-strdup.c
index 4a121cb..b1e02b9 100644
--- a/test/msan/c-strdup.c
+++ b/test/msan/c-strdup.c
@@ -3,8 +3,6 @@
 // RUN: %clang_msan -O2 %s -o %t && %run %t >%t.out 2>&1
 // RUN: %clang_msan -O3 %s -o %t && %run %t >%t.out 2>&1
 
-// XFAIL: target-is-mips64el
-
 // Test that strdup in C programs is intercepted.
 // GLibC headers translate strdup to __strdup at -O1 and higher.
 
diff --git a/test/msan/chained_origin.cc b/test/msan/chained_origin.cc
index 9b30c74..7cab152 100644
--- a/test/msan/chained_origin.cc
+++ b/test/msan/chained_origin.cc
@@ -1,22 +1,20 @@
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -O3 %s -o %t && \
 // RUN:     not %run %t >%t.out 2>&1
-// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-STACK < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-%short-stack --check-prefix=CHECK-STACK < %t.out
 
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -DHEAP=1 -O3 %s -o %t && \
 // RUN:     not %run %t >%t.out 2>&1
-// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-HEAP < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-%short-stack --check-prefix=CHECK-HEAP < %t.out
 
 
 // RUN: %clangxx_msan -mllvm -msan-instrumentation-with-call-threshold=0 -fsanitize-memory-track-origins=2 -O3 %s -o %t && \
 // RUN:     not %run %t >%t.out 2>&1
-// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-STACK < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-%short-stack --check-prefix=CHECK-STACK < %t.out
 
 // RUN: %clangxx_msan -mllvm -msan-instrumentation-with-call-threshold=0 -fsanitize-memory-track-origins=2 -DHEAP=1 -O3 %s -o %t && \
 // RUN:     not %run %t >%t.out 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-HEAP < %t.out
 
-// XFAIL: target-is-mips64el
-
 #include <stdio.h>
 
 volatile int x, y;
@@ -52,16 +50,18 @@
 // CHECK: {{#0 .* in main.*chained_origin.cc:}}[[@LINE-4]]
 
 // CHECK: Uninitialized value was stored to memory at
-// CHECK: {{#0 .* in fn_h.*chained_origin.cc:}}[[@LINE-19]]
-// CHECK: {{#1 .* in main.*chained_origin.cc:}}[[@LINE-9]]
+// CHECK-FULL-STACK: {{#0 .* in fn_h.*chained_origin.cc:}}[[@LINE-19]]
+// CHECK-FULL-STACK: {{#1 .* in main.*chained_origin.cc:}}[[@LINE-9]]
+// CHECK-SHORT-STACK: {{#0 .* in fn_h.*chained_origin.cc:}}[[@LINE-21]]
 
 // CHECK: Uninitialized value was stored to memory at
-// CHECK: {{#0 .* in fn_g.*chained_origin.cc:}}[[@LINE-33]]
-// CHECK: {{#1 .* in fn_f.*chained_origin.cc:}}[[@LINE-29]]
-// CHECK: {{#2 .* in main.*chained_origin.cc:}}[[@LINE-15]]
+// CHECK-FULL-STACK: {{#0 .* in fn_g.*chained_origin.cc:}}[[@LINE-34]]
+// CHECK-FULL-STACK: {{#1 .* in fn_f.*chained_origin.cc:}}[[@LINE-30]]
+// CHECK-FULL-STACK: {{#2 .* in main.*chained_origin.cc:}}[[@LINE-16]]
+// CHECK-SHORT-STACK: {{#0 .* in fn_g.*chained_origin.cc:}}[[@LINE-37]]
 
 // CHECK-STACK: Uninitialized value was created by an allocation of 'z' in the stack frame of function 'main'
-// CHECK-STACK: {{#0 .* in main.*chained_origin.cc:}}[[@LINE-25]]
+// CHECK-STACK: {{#0 .* in main.*chained_origin.cc:}}[[@LINE-27]]
 
 // CHECK-HEAP: Uninitialized value was created by a heap allocation
-// CHECK-HEAP: {{#1 .* in main.*chained_origin.cc:}}[[@LINE-26]]
+// CHECK-HEAP: {{#1 .* in main.*chained_origin.cc:}}[[@LINE-28]]
diff --git a/test/msan/chained_origin_empty_stack.cc b/test/msan/chained_origin_empty_stack.cc
index 0a5a9c3..f1ed66b 100644
--- a/test/msan/chained_origin_empty_stack.cc
+++ b/test/msan/chained_origin_empty_stack.cc
@@ -1,8 +1,6 @@
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -O3 %s -o %t && \
 // RUN:     MSAN_OPTIONS=store_context_size=1 not %run %t 2>&1 | FileCheck %s
 
-// XFAIL: target-is-mips64el
-
 // Test that stack trace for the intermediate store is not empty.
 
 // CHECK: MemorySanitizer: use-of-uninitialized-value
diff --git a/test/msan/chained_origin_limits.cc b/test/msan/chained_origin_limits.cc
index 0f97c11..9585889 100644
--- a/test/msan/chained_origin_limits.cc
+++ b/test/msan/chained_origin_limits.cc
@@ -10,7 +10,7 @@
 // RUN: FileCheck %s --check-prefix=CHECK2 < %t.out
 
 // RUN: MSAN_OPTIONS=origin_history_per_stack_limit=1 not %run %t >%t.out 2>&1
-// RUN: FileCheck %s --check-prefix=CHECK-PER-STACK < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK-PER-STACK --check-prefix=CHECK-%short-stack < %t.out
 
 // RUN: MSAN_OPTIONS=origin_history_size=7,origin_history_per_stack_limit=0 not %run %t >%t.out 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK7 < %t.out
@@ -25,7 +25,7 @@
 // RUN: FileCheck %s --check-prefix=CHECK2 < %t.out
 
 // RUN: MSAN_OPTIONS=origin_history_per_stack_limit=1 not %run %t >%t.out 2>&1
-// RUN: FileCheck %s --check-prefix=CHECK-PER-STACK < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK-PER-STACK --check-prefix=CHECK-%short-stack < %t.out
 
 // RUN: MSAN_OPTIONS=origin_history_size=7,origin_history_per_stack_limit=0 not %run %t >%t.out 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK7 < %t.out
@@ -41,7 +41,7 @@
 // RUN: FileCheck %s --check-prefix=CHECK2 < %t.out
 
 // RUN: MSAN_OPTIONS=origin_history_per_stack_limit=1 not %run %t >%t.out 2>&1
-// RUN: FileCheck %s --check-prefix=CHECK-PER-STACK < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK-PER-STACK --check-prefix=CHECK-%short-stack < %t.out
 
 // RUN: MSAN_OPTIONS=origin_history_size=7,origin_history_per_stack_limit=0 not %run %t >%t.out 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK7 < %t.out
@@ -57,13 +57,11 @@
 // RUN: FileCheck %s --check-prefix=CHECK2 < %t.out
 
 // RUN: MSAN_OPTIONS=origin_history_per_stack_limit=1 not %run %t >%t.out 2>&1
-// RUN: FileCheck %s --check-prefix=CHECK-PER-STACK < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK-PER-STACK --check-prefix=CHECK-%short-stack < %t.out
 
 // RUN: MSAN_OPTIONS=origin_history_size=7,origin_history_per_stack_limit=0 not %run %t >%t.out 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK7 < %t.out
 
-// XFAIL: target-is-mips64el
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -149,13 +147,21 @@
 // CHECK2-NOT: Uninitialized value was stored to memory at
 // CHECK2: Uninitialized value was created
 
+// For architectures with short stack all the stacks in the chain are same
+// because the stack trace does not contain frames upto the functions fn1, fn2,
+// fn3 from where the uninitialized stores actually originate. Since we report
+// uninitialized value store once for each stack frame
+// (origin_history_per_stack_limit = 1) we expect only one instance of
+// "Uninitialized value was stored to memory at".
+
 // CHECK-PER-STACK: WARNING: MemorySanitizer: use-of-uninitialized-value
 // CHECK-PER-STACK: Uninitialized value was stored to memory at
-// CHECK-PER-STACK: in fn3
-// CHECK-PER-STACK: Uninitialized value was stored to memory at
-// CHECK-PER-STACK: in fn2
-// CHECK-PER-STACK: Uninitialized value was stored to memory at
-// CHECK-PER-STACK: in fn1
+// CHECK-SHORT-STACK: in __msan_memmove
+// CHECK-FULL-STACK: in fn3
+// CHECK-FULL-STACK: Uninitialized value was stored to memory at
+// CHECK-FULL-STACK: in fn2
+// CHECK-FULL-STACK: Uninitialized value was stored to memory at
+// CHECK-FULL-STACK: in fn1
 // CHECK-PER-STACK: Uninitialized value was created
 
 // CHECK-UNLIMITED: WARNING: MemorySanitizer: use-of-uninitialized-value
diff --git a/test/msan/chained_origin_memcpy.cc b/test/msan/chained_origin_memcpy.cc
index 07474fb..bfe50df 100644
--- a/test/msan/chained_origin_memcpy.cc
+++ b/test/msan/chained_origin_memcpy.cc
@@ -1,21 +1,19 @@
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -DOFFSET=0 -O3 %s -o %t && \
 // RUN:     not %run %t >%t.out 2>&1
-// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z1 < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z1 --check-prefix=CHECK-%short-stack < %t.out
 
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -DOFFSET=10 -O3 %s -o %t && \
 // RUN:     not %run %t >%t.out 2>&1
-// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z2 < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z2 --check-prefix=CHECK-%short-stack < %t.out
 
 
 // RUN: %clangxx_msan -mllvm -msan-instrumentation-with-call-threshold=0 -fsanitize-memory-track-origins=2 -DOFFSET=0 -O3 %s -o %t && \
 // RUN:     not %run %t >%t.out 2>&1
-// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z1 < %t.out
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z1 --check-prefix=CHECK-%short-stack < %t.out
 
 // RUN: %clangxx_msan -mllvm -msan-instrumentation-with-call-threshold=0 -fsanitize-memory-track-origins=2 -DOFFSET=10 -O3 %s -o %t && \
 // RUN:     not %run %t >%t.out 2>&1
-// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z2 < %t.out
-
-// XFAIL: target-is-mips64el
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z2 --check-prefix=CHECK-%short-stack < %t.out
 
 #include <stdio.h>
 #include <string.h>
@@ -51,12 +49,14 @@
 // CHECK: {{#0 .* in main .*chained_origin_memcpy.cc:}}[[@LINE-4]]
 
 // CHECK: Uninitialized value was stored to memory at
-// CHECK: {{#1 .* in fn_h.*chained_origin_memcpy.cc:}}[[@LINE-15]]
+// CHECK-FULL-STACK: {{#1 .* in fn_h.*chained_origin_memcpy.cc:}}[[@LINE-15]]
+// CHECK-SHORT-STACK: {{#0 .* in __msan_memcpy .*msan_interceptors.cc:}}
 
 // CHECK: Uninitialized value was stored to memory at
-// CHECK: {{#0 .* in fn_g.*chained_origin_memcpy.cc:}}[[@LINE-28]]
-// CHECK: {{#1 .* in fn_f.*chained_origin_memcpy.cc:}}[[@LINE-24]]
+// CHECK-FULL-STACK: {{#0 .* in fn_g.*chained_origin_memcpy.cc:}}[[@LINE-29]]
+// CHECK-FULL-STACK: {{#1 .* in fn_f.*chained_origin_memcpy.cc:}}[[@LINE-25]]
+// CHECK-SHORT-STACK: {{#0 .* in fn_g.*chained_origin_memcpy.cc:}}[[@LINE-31]]
 
 // CHECK-Z1: Uninitialized value was created by an allocation of 'z1' in the stack frame of function 'main'
 // CHECK-Z2: Uninitialized value was created by an allocation of 'z2' in the stack frame of function 'main'
-// CHECK: {{#0 .* in main.*chained_origin_memcpy.cc:}}[[@LINE-20]]
+// CHECK: {{#0 .* in main.*chained_origin_memcpy.cc:}}[[@LINE-22]]
diff --git a/test/msan/chained_origin_with_signals.cc b/test/msan/chained_origin_with_signals.cc
index e371982..43dbdcc 100644
--- a/test/msan/chained_origin_with_signals.cc
+++ b/test/msan/chained_origin_with_signals.cc
@@ -10,8 +10,6 @@
 // RUN:     not %run %t >%t.out 2>&1
 // RUN: FileCheck %s < %t.out
 
-// XFAIL: target-is-mips64el
-
 #include <signal.h>
 #include <stdio.h>
 #include <sys/types.h>
diff --git a/test/msan/check_mem_is_initialized.cc b/test/msan/check_mem_is_initialized.cc
index 461ce19..e1d3b11 100644
--- a/test/msan/check_mem_is_initialized.cc
+++ b/test/msan/check_mem_is_initialized.cc
@@ -16,8 +16,6 @@
 // RUN: %clangxx_msan -fsanitize-memory-track-origins -O3 %s -o %t && not %run %t >%t.out 2>&1
 // RUN: FileCheck %s < %t.out && FileCheck %s --check-prefix=CHECK-ORIGINS < %t.out
 
-// XFAIL: target-is-mips64el
-
 #include <sanitizer/msan_interface.h>
 #include <stdlib.h>
 
diff --git a/test/msan/coverage-levels.cc b/test/msan/coverage-levels.cc
index 43b03e3..b881cec 100644
--- a/test/msan/coverage-levels.cc
+++ b/test/msan/coverage-levels.cc
@@ -10,8 +10,6 @@
 // RUN: %clangxx_msan -O1 -fsanitize-coverage=edge  %s -o %t
 // RUN: MSAN_OPTIONS=coverage=1:verbosity=1:coverage_dir=%T/coverage-levels not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK3 --check-prefix=CHECK_WARN
 
-// XFAIL: target-is-mips64el
-
 volatile int sink;
 int main(int argc, char **argv) {
   int var;
diff --git a/test/msan/ctermid.cc b/test/msan/ctermid.cc
index e91ea71..a2818e6 100644
--- a/test/msan/ctermid.cc
+++ b/test/msan/ctermid.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_msan -std=c++11 -O0 %s -o %t && %run %t
 
-// XFAIL: target-is-mips64el
-
 #include <sanitizer/msan_interface.h>
 #include <stdio.h>
 #include <string.h>
diff --git a/test/msan/cxa_atexit.cc b/test/msan/cxa_atexit.cc
index 8210436..70384b9 100644
--- a/test/msan/cxa_atexit.cc
+++ b/test/msan/cxa_atexit.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t %p
 
-// XFAIL: target-is-mips64el
-
 // PR17377: C++ module destructors get stale argument shadow.
 
 #include <stdio.h>
diff --git a/test/msan/death-callback.cc b/test/msan/death-callback.cc
index 9aeac5b..08cf291 100644
--- a/test/msan/death-callback.cc
+++ b/test/msan/death-callback.cc
@@ -7,8 +7,6 @@
 // RUN: %clangxx_msan -DMSANCB_SET %s -o %t && %run %t 2>&1 | \
 // RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOCB
 
-// XFAIL: target-is-mips64el
-
 #include <sanitizer/msan_interface.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/msan/dlerror.cc b/test/msan/dlerror.cc
index d0335d9..d5510b6 100644
--- a/test/msan/dlerror.cc
+++ b/test/msan/dlerror.cc
@@ -1,7 +1,5 @@
 // RUN: %clangxx_msan -O0 %s -o %t && %run %t
 
-// XFAIL: target-is-mips64el
-
 #include <assert.h>
 #include <dlfcn.h>
 #include <stdio.h>
diff --git a/test/msan/fork.cc b/test/msan/fork.cc
index 78a62d5..e4dc549 100644
--- a/test/msan/fork.cc
+++ b/test/msan/fork.cc
@@ -3,13 +3,16 @@
 // and verify that origin reads do not deadlock in the child process.
 
 // RUN: %clangxx_msan -std=c++11 -fsanitize-memory-track-origins=2 -g -O3 %s -o %t
-// RUN: MSAN_OPTIONS=store_context_size=1000,origin_history_size=0,origin_history_per_stack_limit=0 %run %t |& FileCheck %s
+// RUN: MSAN_OPTIONS=store_context_size=1000,origin_history_size=0,origin_history_per_stack_limit=0 %run %t 2>&1 | FileCheck %s
 
 // Fun fact: if test output is redirected to a file (as opposed to
 // being piped directly to FileCheck), we may lose some "done"s due to
 // a kernel bug:
 // https://lkml.org/lkml/2014/2/17/324
 
+// Flaky on PPC64.
+// UNSUPPORTED: powerpc64-target-arch
+// UNSUPPORTED: powerpc64le-target-arch
 
 #include <pthread.h>
 #include <unistd.h>
@@ -89,7 +92,7 @@
       exit(0);
     }
   }
-  
+
   for (int i = 0; i < kChildren; ++i) {
     pid_t p;
     while ((p = wait(NULL)) == -1) {  }
diff --git a/test/msan/getutent.cc b/test/msan/getutent.cc
new file mode 100644
index 0000000..36f9e1f
--- /dev/null
+++ b/test/msan/getutent.cc
@@ -0,0 +1,17 @@
+// RUN: %clangxx_msan -O0 -g %s -o %t && %run %t
+
+#include <utmp.h>
+#include <utmpx.h>
+#include <sanitizer/msan_interface.h>
+
+int main(void) {
+  setutent();
+  while (struct utmp *ut = getutent())
+    __msan_check_mem_is_initialized(ut, sizeof(*ut));
+  endutent();
+
+  setutxent();
+  while (struct utmpx *utx = getutxent())
+    __msan_check_mem_is_initialized(utx, sizeof(*utx));
+  endutxent();
+}
diff --git a/test/msan/iconv.cc b/test/msan/iconv.cc
index c2da938..e5fbbf9 100644
--- a/test/msan/iconv.cc
+++ b/test/msan/iconv.cc
@@ -1,5 +1,5 @@
 // RUN: %clangxx_msan -O0 -g %s -o %t && %run %t
-// RUN: %clangxx_msan -O0 -g -DPOSITIVE %s -o %t && not %run %t |& FileCheck %s
+// RUN: %clangxx_msan -O0 -g -DPOSITIVE %s -o %t && not %run %t 2>&1 | FileCheck %s
 
 #include <assert.h>
 #include <iconv.h>
diff --git a/test/msan/keep-going-dso.cc b/test/msan/keep-going-dso.cc
deleted file mode 100644
index f32a513..0000000
--- a/test/msan/keep-going-dso.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// RUN: %clangxx_msan -O0 %s -o %t && not %run %t >%t.out 2>&1
-// FileCheck --check-prefix=CHECK-KEEP-GOING %s <%t.out
-// RUN: %clangxx_msan -O0 %s -o %t && MSAN_OPTIONS=keep_going=0 not %run %t >%t.out 2>&1
-// FileCheck %s <%t.out
-// RUN: %clangxx_msan -O0 %s -o %t && MSAN_OPTIONS=keep_going=1 not %run %t >%t.out 2>&1
-// FileCheck --check-prefix=CHECK-KEEP-GOING %s <%t.out
-
-// RUN: %clangxx_msan -mllvm -msan-keep-going=1 -O0 %s -o %t && not %run %t >%t.out 2>&1
-// FileCheck --check-prefix=CHECK-KEEP-GOING %s <%t.out
-// RUN: %clangxx_msan -mllvm -msan-keep-going=1 -O0 %s -o %t && MSAN_OPTIONS=keep_going=0 not %run %t >%t.out 2>&1
-// FileCheck %s <%t.out
-// RUN: %clangxx_msan -mllvm -msan-keep-going=1 -O0 %s -o %t && MSAN_OPTIONS=keep_going=1 not %run %t >%t.out 2>&1
-// FileCheck --check-prefix=CHECK-KEEP-GOING %s <%t.out
-
-// Test how -mllvm -msan-keep-going and MSAN_OPTIONS=keep_going affect reports
-// from interceptors.
-// -mllvm -msan-keep-going provides the default value of keep_going flag, but is
-// always overwritten by MSAN_OPTIONS
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-int main(int argc, char **argv) {
-  char *volatile x = (char*)malloc(5 * sizeof(char));
-  x[4] = 0;
-  if (strlen(x) < 3)
-    exit(0);
-  fprintf(stderr, "Done\n");
-  // CHECK-NOT: Done
-  // CHECK-KEEP-GOING: Done
-  return 0;
-}
diff --git a/test/msan/keep-going.cc b/test/msan/keep-going.cc
deleted file mode 100644
index 5772975..0000000
--- a/test/msan/keep-going.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// RUN: %clangxx_msan -O0 %s -o %t && not %run %t >%t.out 2>&1
-// FileCheck %s <%t.out
-// RUN: %clangxx_msan -O0 %s -o %t && MSAN_OPTIONS=keep_going=0 not %run %t >%t.out 2>&1
-// FileCheck %s <%t.out
-// RUN: %clangxx_msan -O0 %s -o %t && MSAN_OPTIONS=keep_going=1 not %run %t >%t.out 2>&1
-// FileCheck %s <%t.out
-
-// RUN: %clangxx_msan -mllvm -msan-keep-going=1 -O0 %s -o %t && not %run %t >%t.out 2>&1
-// FileCheck --check-prefix=CHECK-KEEP-GOING %s <%t.out
-// RUN: %clangxx_msan -mllvm -msan-keep-going=1 -O0 %s -o %t && MSAN_OPTIONS=keep_going=0 not %run %t >%t.out 2>&1
-// FileCheck %s <%t.out
-// RUN: %clangxx_msan -mllvm -msan-keep-going=1 -O0 %s -o %t && MSAN_OPTIONS=keep_going=1 not %run %t >%t.out 2>&1
-// FileCheck --check-prefix=CHECK-KEEP-GOING %s <%t.out
-// RUN: %clangxx_msan -mllvm -msan-keep-going=1 -O0 %s -o %t && MSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1
-// FileCheck %s <%t.out
-// RUN: %clangxx_msan -mllvm -msan-keep-going=1 -O0 %s -o %t && MSAN_OPTIONS=halt_on_error=0 not %run %t >%t.out 2>&1
-// FileCheck --check-prefix=CHECK-KEEP-GOING %s <%t.out
-
-// Test behaviour of -mllvm -msan-keep-going and MSAN_OPTIONS=keep_going.
-// -mllvm -msan-keep-going provides the default value of keep_going flag; value
-// of 1 can be overwritten by MSAN_OPTIONS, value of 0 can not.
-
-#include <stdio.h>
-#include <stdlib.h>
-
-int main(int argc, char **argv) {
-  char *volatile x = (char*)malloc(5 * sizeof(char));
-  if (x[0])
-    exit(0);
-  fprintf(stderr, "Done\n");
-  // CHECK-NOT: Done
-  // CHECK-KEEP-GOING: Done
-  return 0;
-}
diff --git a/test/msan/lit.cfg b/test/msan/lit.cfg
index d23ff31..eb0ed43 100644
--- a/test/msan/lit.cfg
+++ b/test/msan/lit.cfg
@@ -35,3 +35,11 @@
 
 if config.target_arch != 'aarch64':
   config.available_features.add('stable-runtime')
+
+# For mips64, mips64el we have forced store_context_size to 1 because these
+# archs use slow unwinder which is not async signal safe. Therefore we only
+# check the first frame since store_context size is 1.
+if config.host_arch in ['mips64', 'mips64el']:
+  config.substitutions.append( ('CHECK-%short-stack', 'CHECK-SHORT-STACK'))
+else:
+  config.substitutions.append( ('CHECK-%short-stack', 'CHECK-FULL-STACK'))
diff --git a/test/msan/mmap.cc b/test/msan/mmap.cc
index 01c1772..65d8bee 100644
--- a/test/msan/mmap.cc
+++ b/test/msan/mmap.cc
@@ -39,6 +39,12 @@
     {0x2E000000000ULL, 0x2F000000000ULL},
     {0x3B000000000ULL, 0x3C000000000ULL},
     {0x3F000000000ULL, 0x40000000000ULL},
+    {0x0041000000000ULL, 0x0042000000000ULL},
+    {0x0050000000000ULL, 0x0051000000000ULL},
+    {0x0058000000000ULL, 0x0059000000000ULL},
+    {0x0061000000000ULL, 0x0062000000000ULL},
+    {0x0AAAAA0000000ULL, 0x0AAAB00000000ULL},
+    {0x0FFFF00000000ULL, 0x1000000000000ULL},
   };
   const size_t mappingsSize = sizeof (mappings) / sizeof (mappings[0]);
 
diff --git a/test/msan/msan_check_mem_is_initialized.cc b/test/msan/msan_check_mem_is_initialized.cc
index 599cf2d..2991501 100644
--- a/test/msan/msan_check_mem_is_initialized.cc
+++ b/test/msan/msan_check_mem_is_initialized.cc
@@ -1,5 +1,5 @@
 // RUN: %clangxx_msan -O0 -g -DPOSITIVE %s -o %t
-// RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: not %run %t 2>&1 | FileCheck %s
 // RUN: MSAN_OPTIONS=verbosity=1 not %run %t 2>&1 | \
 // RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-VERBOSE
 
diff --git a/test/msan/msan_copy_shadow.cc b/test/msan/msan_copy_shadow.cc
index a1c6347..19da738 100644
--- a/test/msan/msan_copy_shadow.cc
+++ b/test/msan/msan_copy_shadow.cc
@@ -1,7 +1,7 @@
 // Test that __msan_copy_shadow copies shadow, updates origin and does not touch
 // the application memory.
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=0 -O0 %s -o %t && not %run %t 2>&1
-// RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -O0 %s -o %t && not %run %t 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-%short-stack %s
 
 #include <assert.h>
 #include <string.h>
@@ -28,7 +28,8 @@
   // CHECK: use-of-uninitialized-value
   // CHECK:   {{in main.*msan_copy_shadow.cc:}}[[@LINE-2]]
   // CHECK: Uninitialized value was stored to memory at
-  // CHECK:   {{in main.*msan_copy_shadow.cc:}}[[@LINE-8]]
+  // CHECK-FULL-STACK:   {{in main.*msan_copy_shadow.cc:}}[[@LINE-8]]
+  // CHECK-SHORT-STACK:   {{in __msan_copy_shadow .*msan_interceptors.cc:}}
   // CHECK: Uninitialized value was created by a heap allocation
-  // CHECK:   {{in main.*msan_copy_shadow.cc:}}[[@LINE-22]]
+  // CHECK:   {{in main.*msan_copy_shadow.cc:}}[[@LINE-23]]
 }
diff --git a/test/msan/print_stats.cc b/test/msan/print_stats.cc
index 39af504..5b46d4e 100644
--- a/test/msan/print_stats.cc
+++ b/test/msan/print_stats.cc
@@ -1,22 +1,22 @@
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -g %s -o %t 
 // RUN: %run %t 2>&1 | \
-// RUN:   FileCheck --check-prefix=CHECK --check-prefix=CHECK-NOSTATS %s
+// RUN:   FileCheck --check-prefixes=CHECK,CHECK-NOSTATS %s
 // RUN: MSAN_OPTIONS=print_stats=1 %run %t 2>&1 | \
-// RUN:   FileCheck --check-prefix=CHECK --check-prefix=CHECK-NOSTATS %s
+// RUN:   FileCheck --check-prefixes=CHECK,CHECK-NOSTATS %s
 // RUN: MSAN_OPTIONS=print_stats=1,atexit=1 %run %t 2>&1 | \
-// RUN:   FileCheck --check-prefix=CHECK --check-prefix=CHECK-STATS %s
+// RUN:   FileCheck --check-prefixes=CHECK,CHECK-STATS %s
 
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -g -DPOSITIVE=1 %s -o %t 
 // RUN: not %run %t 2>&1 | \
-// RUN:   FileCheck --check-prefix=CHECK --check-prefix=CHECK-NOSTATS %s
+// RUN:   FileCheck --check-prefixes=CHECK,CHECK-NOSTATS %s
 // RUN: MSAN_OPTIONS=print_stats=1 not %run %t 2>&1 | \
-// RUN:   FileCheck --check-prefix=CHECK --check-prefix=CHECK-STATS %s
+// RUN:   FileCheck --check-prefixes=CHECK,CHECK-STATS %s
 
-// RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -g -DPOSITIVE=1 -mllvm -msan-keep-going=1 %s -o %t 
+// RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -fsanitize-recover=memory -g -DPOSITIVE=1 %s -o %t
 // RUN: not %run %t 2>&1 | \
-// RUN:  FileCheck --check-prefix=CHECK --check-prefix=CHECK-NOSTATS --check-prefix=CHECK-KEEPGOING %s
+// RUN:  FileCheck --check-prefixes=CHECK,CHECK-NOSTATS,CHECK-RECOVER %s
 // RUN: MSAN_OPTIONS=print_stats=1 not %run %t 2>&1 | \
-// RUN:   FileCheck --check-prefix=CHECK --check-prefix=CHECK-STATS --check-prefix=CHECK-KEEPGOING %s
+// RUN:   FileCheck --check-prefixes=CHECK,CHECK-STATS,CHECK-RECOVER %s
 
 #include <stdio.h>
 int main(int argc, char **argv) {
@@ -42,4 +42,4 @@
 // CHECK-NOSTATS-NOT: Unique origin histories:
 // CHECK-NOSTATS-NOT: History depot allocated bytes:
 
-// CHECK-KEEPGOING: MemorySanitizer: 1 warnings reported.
+// CHECK-RECOVER: MemorySanitizer: 1 warnings reported.
diff --git a/test/msan/realloc-large-origin.cc b/test/msan/realloc-large-origin.cc
index ce25ad8..6893c1d 100644
--- a/test/msan/realloc-large-origin.cc
+++ b/test/msan/realloc-large-origin.cc
@@ -1,7 +1,7 @@
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -O0 %s -o %t && not %run %t >%t.out 2>&1
-// RUN: FileCheck %s < %t.out
+// RUN: FileCheck --check-prefix=CHECK --check-prefix=CHECK-%short-stack %s < %t.out
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -O2 %s -o %t && not %run %t >%t.out 2>&1
-// RUN: FileCheck %s < %t.out
+// RUN: FileCheck --check-prefix=CHECK --check-prefix=CHECK-%short-stack %s < %t.out
 
 // This is a regression test: there used to be broken "stored to memory at"
 // stacks with
@@ -21,10 +21,11 @@
 // CHECK:   {{#0 0x.* in main .*realloc-large-origin.cc:}}[[@LINE-3]]
 
 // CHECK:  Uninitialized value was stored to memory at
-// CHECK:   {{#0 0x.* in .*realloc}}
-// CHECK:   {{#1 0x.* in main .*realloc-large-origin.cc:}}[[@LINE-10]]
+// CHECK-FULL-STACK:   {{#0 0x.* in .*realloc}}
+// CHECK-FULL-STACK:   {{#1 0x.* in main .*realloc-large-origin.cc:}}[[@LINE-10]]
+// CHECK-SHORT-STACK:   {{#0 0x.* in .*realloc}}
 
 // CHECK:   Uninitialized value was created by a heap allocation
 // CHECK:   {{#0 0x.* in .*malloc}}
-// CHECK:   {{#1 0x.* in main .*realloc-large-origin.cc:}}[[@LINE-15]]
+// CHECK:   {{#1 0x.* in main .*realloc-large-origin.cc:}}[[@LINE-16]]
 }
diff --git a/test/msan/recover-dso.cc b/test/msan/recover-dso.cc
new file mode 100644
index 0000000..2f42256
--- /dev/null
+++ b/test/msan/recover-dso.cc
@@ -0,0 +1,39 @@
+// RUN: %clangxx_msan -O0 %s -o %t && not %run %t >%t.out 2>&1
+// FileCheck --check-prefix=CHECK-RECOVER %s <%t.out
+// RUN: %clangxx_msan -O0 %s -o %t && MSAN_OPTIONS=keep_going=0 not %run %t >%t.out 2>&1
+// FileCheck %s <%t.out
+// RUN: %clangxx_msan -O0 %s -o %t && MSAN_OPTIONS=keep_going=1 not %run %t >%t.out 2>&1
+// FileCheck --check-prefix=CHECK-RECOVER %s <%t.out
+
+// Test how -fsanitize-recover=memory and MSAN_OPTIONS=keep_going affect reports
+// from interceptors.
+// -fsanitize-recover=memory provides the default value of keep_going flag, but is
+// always overwritten by MSAN_OPTIONS
+
+// RUN: %clangxx_msan -fsanitize-recover=memory -O0 %s -o %t && not %run %t >%t.out 2>&1
+// FileCheck --check-prefix=CHECK-RECOVER %s <%t.out
+// RUN: %clangxx_msan -fsanitize-recover=memory -O0 %s -o %t && MSAN_OPTIONS=keep_going=0 not %run %t >%t.out 2>&1
+// FileCheck %s <%t.out
+// RUN: %clangxx_msan -fsanitize-recover=memory -O0 %s -o %t && MSAN_OPTIONS=keep_going=1 not %run %t >%t.out 2>&1
+// FileCheck --check-prefix=CHECK-RECOVER %s <%t.out
+
+// Test how legacy -mllvm -msan-keep-going and MSAN_OPTIONS=keep_going affect
+// reports from interceptors.
+
+// RUN: %clangxx_msan -mllvm -msan-keep-going=1 -O0 %s -o %t && not %run %t >%t.out 2>&1
+// FileCheck --check-prefix=CHECK-RECOVER %s <%t.out
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char **argv) {
+  char *volatile x = (char*)malloc(5 * sizeof(char));
+  x[4] = 0;
+  if (strlen(x) < 3)
+    exit(0);
+  fprintf(stderr, "Done\n");
+  // CHECK-NOT: Done
+  // CHECK-RECOVER: Done
+  return 0;
+}
diff --git a/test/msan/recover.cc b/test/msan/recover.cc
new file mode 100644
index 0000000..cb9916e
--- /dev/null
+++ b/test/msan/recover.cc
@@ -0,0 +1,41 @@
+// RUN: %clangxx_msan -O0 %s -o %t && not %run %t >%t.out 2>&1
+// FileCheck %s <%t.out
+// RUN: %clangxx_msan -O0 %s -o %t && MSAN_OPTIONS=keep_going=0 not %run %t >%t.out 2>&1
+// FileCheck %s <%t.out
+// RUN: %clangxx_msan -O0 %s -o %t && MSAN_OPTIONS=keep_going=1 not %run %t >%t.out 2>&1
+// FileCheck %s <%t.out
+
+// Test behavior of -fsanitize-recover=memory and MSAN_OPTIONS=keep_going.
+// -fsanitize-recover=memory provides the default value of keep_going flag; value
+// of 1 can be overwritten by MSAN_OPTIONS, value of 0 can not.
+
+// RUN: %clangxx_msan -fsanitize-recover=memory -O0 %s -o %t && not %run %t >%t.out 2>&1
+// FileCheck --check-prefix=CHECK-RECOVER %s <%t.out
+// RUN: %clangxx_msan -fsanitize-recover=memory -O0 %s -o %t && MSAN_OPTIONS=keep_going=0 not %run %t >%t.out 2>&1
+// FileCheck %s <%t.out
+// RUN: %clangxx_msan -fsanitize-recover=memory -O0 %s -o %t && MSAN_OPTIONS=keep_going=1 not %run %t >%t.out 2>&1
+// FileCheck --check-prefix=CHECK-RECOVER %s <%t.out
+// RUN: %clangxx_msan -fsanitize-recover=memory -O0 %s -o %t && MSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1
+// FileCheck %s <%t.out
+// RUN: %clangxx_msan -fsanitize-recover=memory -O0 %s -o %t && MSAN_OPTIONS=halt_on_error=0 not %run %t >%t.out 2>&1
+// FileCheck --check-prefix=CHECK-RECOVER %s <%t.out
+
+// Basic test of legacy -mllvm -msan-keep-going and MSAN_OPTIONS=keep_going.
+
+// RUN: %clangxx_msan -mllvm -msan-keep-going=1 -O0 %s -o %t && not %run %t >%t.out 2>&1
+// FileCheck --check-prefix=CHECK-RECOVER %s <%t.out
+// RUN: %clangxx_msan -mllvm -msan-keep-going=1 -O0 %s -o %t && MSAN_OPTIONS=keep_going=0 not %run %t >%t.out 2>&1
+// FileCheck %s <%t.out
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv) {
+  char *volatile x = (char*)malloc(5 * sizeof(char));
+  if (x[0])
+    exit(0);
+  fprintf(stderr, "Done\n");
+  // CHECK-NOT: Done
+  // CHECK-RECOVER: Done
+  return 0;
+}
diff --git a/test/profile/Inputs/comdat_rename.h b/test/profile/Inputs/comdat_rename.h
index 53e1007..d30628f 100644
--- a/test/profile/Inputs/comdat_rename.h
+++ b/test/profile/Inputs/comdat_rename.h
@@ -9,5 +9,5 @@
       }
   }
   int a;
-  int b;
+  int volatile b;
 };
diff --git a/test/profile/Linux/comdat_rename.test b/test/profile/Linux/comdat_rename.test
index 116b5dc..b323352 100644
--- a/test/profile/Linux/comdat_rename.test
+++ b/test/profile/Linux/comdat_rename.test
@@ -1,6 +1,6 @@
 // RUN: rm -fr %t.prof
-// RUN: %clangxx_pgogen=%t.prof/ -o %t.gen -O2 %S/../Inputs/comdat_rename_1.cc %S/../Inputs/comdat_rename_2.cc
-// RUN: %t.gen
+// RUN: %clangxx_pgogen=%t.prof/ -o %t.gen -mllvm -do-comdat-renaming=true -O2 %S/../Inputs/comdat_rename_1.cc %S/../Inputs/comdat_rename_2.cc
+// RUN: %run %t.gen
 // RUN: llvm-profdata merge -o %t.profdata %t.prof/
-// RUN: %clangxx_profuse=%t.profdata  -O2 -emit-llvm -S %S/../Inputs/comdat_rename_1.cc -o - | FileCheck %S/../Inputs/comdat_rename_1.cc
-// RUN: %clangxx_profuse=%t.profdata  -O2 -emit-llvm -S %S/../Inputs/comdat_rename_2.cc -o - | FileCheck %S/../Inputs/comdat_rename_2.cc
+// RUN: %clangxx_profuse=%t.profdata  -O2 -mllvm -do-comdat-renaming=true -emit-llvm -S %S/../Inputs/comdat_rename_1.cc -o - | FileCheck %S/../Inputs/comdat_rename_1.cc
+// RUN: %clangxx_profuse=%t.profdata  -O2 -mllvm -do-comdat-renaming=true -emit-llvm -S %S/../Inputs/comdat_rename_2.cc -o - | FileCheck %S/../Inputs/comdat_rename_2.cc
diff --git a/test/profile/Linux/extern_template.test b/test/profile/Linux/extern_template.test
index ada4d23..3ce3627 100644
--- a/test/profile/Linux/extern_template.test
+++ b/test/profile/Linux/extern_template.test
@@ -1,12 +1,12 @@
 // RUN: %clang -O2  -c -o %t.0.o %S/../Inputs/extern_template.cpp
 // RUN: %clang_profgen -O2  -c -o %t.o %S/../Inputs/extern_template.cpp
 // RUN: %clang_profgen -O2 -fcoverage-mapping %S/../Inputs/extern_template1.cpp %S/../Inputs/extern_template2.cpp %t.o -o %t
-// RUN: env LLVM_PROFILE_FILE=%t.profraw %t
+// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
 // RUN: llvm-profdata show --all-functions %t.profraw | FileCheck %s
 // RUN: llvm-profdata merge -o %t.profdata %t.profraw
 // RUN: llvm-cov show -instr-profile=%t.profdata %t | FileCheck %S/../Inputs/extern_template.h
 // RUN: %clang_profgen -O2 -fcoverage-mapping %S/../Inputs/extern_template1.cpp %S/../Inputs/extern_template2.cpp %t.0.o -o %t.0
-// RUN: env LLVM_PROFILE_FILE=%t.0.profraw %t.0
+// RUN: env LLVM_PROFILE_FILE=%t.0.profraw %run %t.0
 // RUN: llvm-profdata show --all-functions %t.0.profraw | FileCheck %s
 // RUN: llvm-profdata merge -o %t.0.profdata %t.0.profraw
 // RUN: llvm-cov show -instr-profile=%t.0.profdata %t.0 | FileCheck %S/../Inputs/extern_template.h
diff --git a/test/profile/Linux/instrprof-comdat.test b/test/profile/Linux/instrprof-comdat.test
index b933e96..5a11a24 100644
--- a/test/profile/Linux/instrprof-comdat.test
+++ b/test/profile/Linux/instrprof-comdat.test
@@ -1,6 +1,6 @@
 RUN: mkdir -p %t.d
 RUN: %clangxx_profgen -o %t.d/comdat -fcoverage-mapping -fuse-ld=gold %S/../Inputs/instrprof-comdat-1.cpp %S/../Inputs/instrprof-comdat-2.cpp
-RUN: LLVM_PROFILE_FILE=%t-comdat.profraw %t.d/comdat
+RUN: LLVM_PROFILE_FILE=%t-comdat.profraw %run %t.d/comdat
 RUN: llvm-profdata merge -o %t.d/comdat.prof %t-comdat.profraw 
 RUN: llvm-cov show --filename-equivalence --instr-profile=%t.d/comdat.prof %t.d/comdat | FileCheck --check-prefix=HEADER %S/../Inputs/instrprof-comdat.h
 
diff --git a/test/profile/Linux/instrprof-cs.c b/test/profile/Linux/instrprof-cs.c
index 3be4359..d825525 100644
--- a/test/profile/Linux/instrprof-cs.c
+++ b/test/profile/Linux/instrprof-cs.c
@@ -1,17 +1,17 @@
 // RUN: rm -fr %t.prof
 // RUN: %clang_pgogen=%t.prof/ -o %t.gen.cs -O2 %s
-// RUN: %t.gen.cs
+// RUN: %run %t.gen.cs
 // RUN: llvm-profdata merge -o %t.cs.profdata %t.prof/
 // Check context sensitive profile
 // RUN: %clang_profuse=%t.cs.profdata  -O2 -emit-llvm -S %s -o - | FileCheck %s --check-prefix=CS
 //
 // RUN: %clang_profgen=%t.profraw -o %t.gen.cis -O2 %s
-// RUN: %t.gen.cis
+// RUN: %run %t.gen.cis
 // RUN: llvm-profdata merge -o %t.cis.profdata %t.profraw
 // Check context insenstive profile
 // RUN: %clang_profuse=%t.cis.profdata  -O2 -emit-llvm -S %s -o - | FileCheck %s --check-prefix=CIS
 int g1 = 1;
-int g2 = 2;
+int volatile g2 = 2;
 static void toggle(int t) {
   if (t & 1)
     g1 *= t;
diff --git a/test/profile/Linux/lit.local.cfg b/test/profile/Linux/lit.local.cfg
index c8c79fc..410ffd8 100644
--- a/test/profile/Linux/lit.local.cfg
+++ b/test/profile/Linux/lit.local.cfg
@@ -21,6 +21,7 @@
     return False
 
   clang_cmd = subprocess.Popen([config.clang, '-fuse-ld=gold', '-xc', '-'],
+                               universal_newlines = True,
                                stdin = subprocess.PIPE,
                                stdout = subprocess.PIPE,
                                stderr = subprocess.PIPE)
diff --git a/test/profile/instrprof-value-prof-reset.c b/test/profile/instrprof-value-prof-reset.c
new file mode 100644
index 0000000..b3744f5
--- /dev/null
+++ b/test/profile/instrprof-value-prof-reset.c
@@ -0,0 +1,47 @@
+// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -mllvm -vp-static-alloc=true -mllvm -vp-counters-per-site=3 -o %t %s
+// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.profdata | FileCheck %s
+
+// IR level instrumentation
+// RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=true  -mllvm -vp-counters-per-site=3 -o %t.ir  %s
+// RUN: env LLVM_PROFILE_FILE=%t.ir.profraw %run %t.ir
+// RUN: llvm-profdata merge -o %t.ir.profdata %t.ir.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.profdata | FileCheck  %s
+
+// IR level instrumentation, dynamic allocation
+// RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=false -o %t.ir.dyn  %s
+// RUN: env LLVM_PROFILE_FILE=%t.ir.dyn.profraw %run %t.ir.dyn
+// RUN: llvm-profdata merge -o %t.ir.dyn.profdata %t.ir.dyn.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.dyn.profdata | FileCheck  %s
+void callee_0() {}
+void callee_1() {}
+void callee_2() {}
+
+void *CalleeAddrs[] = {callee_0, callee_1, callee_2, callee_2, callee_2};
+extern void lprofSetMaxValsPerSite(unsigned);
+extern void __llvm_profile_reset_counters();
+
+typedef void (*FPT)(void);
+
+
+// Testing value profiling eviction algorithm.
+FPT getCalleeFunc(int I) { return CalleeAddrs[I]; }
+
+int main() {
+  int I;
+
+  // First fill up two value profile entries with two targets
+  lprofSetMaxValsPerSite(2);
+
+  for (I = 0; I < 5; I++) {
+    if (I == 2) {
+      __llvm_profile_reset_counters();
+    }
+    // CHECK:  callee_2, 3
+    // CHECK-NEXT: callee_1, 0
+    // CHECK-NOT: callee_0,
+    FPT FP = getCalleeFunc(I);
+    FP();
+  }
+}
diff --git a/test/profile/lit.cfg b/test/profile/lit.cfg
index a6e6ef8..9ca3942 100644
--- a/test/profile/lit.cfg
+++ b/test/profile/lit.cfg
@@ -34,9 +34,9 @@
             raise SystemExit
 
 if config.host_os in ['Linux']:
-  extra_linkflags = ["-ldl"]
+  extra_link_flags = ["-ldl"]
 else:
-  extra_linkflags = []
+  extra_link_flags = []
 
 # Test suffixes.
 config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.ll', '.test']
@@ -46,7 +46,7 @@
 
 # Clang flags.
 target_cflags=[get_required_attr(config, "target_cflags")]
-clang_cflags = target_cflags + extra_linkflags
+clang_cflags = target_cflags + extra_link_flags
 clang_cxxflags = config.cxx_mode_flags + clang_cflags
 
 def build_invocation(compile_flags, with_lto = False):
diff --git a/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cc b/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cc
index 193b33d..ff1ddc4 100644
--- a/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cc
+++ b/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cc
@@ -17,6 +17,21 @@
 typedef unsigned semval_t;
 #endif
 
+// glibc 2.21 has introduced some changes in the way the semaphore value is
+// handled for 32-bit platforms, but since these changes are not ABI-breaking
+// they are not versioned. On newer platforms such as ARM, there is only one
+// version of the symbol, so it's enough to check the glibc version. However,
+// for old platforms such as i386, glibc contains two or even three versions of
+// the sem_init symbol, and the sanitizers always pick the oldest one.
+// Therefore, it is not enough to rely on the __GLIBC_PREREQ macro - we should
+// instead check the platform as well to make sure we only expect the new
+// behavior on platforms where the older symbols do not exist.
+#if defined(__arm__) && __GLIBC_PREREQ(2, 21)
+#define GET_SEM_VALUE(V) ((V) >> 1)
+#else
+#define GET_SEM_VALUE(V) (V)
+#endif
+
 void my_sem_init(bool priv, int value, semval_t *a, unsigned char *b) {
   sem_t sem;
   memset(&sem, 0xAB, sizeof(sem));
@@ -34,10 +49,10 @@
   unsigned char b;
 
   my_sem_init(false, 42, &a, &b);
-  assert(a == 42);
+  assert(GET_SEM_VALUE(a) == 42);
   assert(b != 0xAB);
 
   my_sem_init(true, 43, &a, &b);
-  assert(a == 43);
+  assert(GET_SEM_VALUE(a) == 43);
   assert(b != 0xAB);
 }
diff --git a/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cc b/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cc
index d329122..83570a9 100644
--- a/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cc
+++ b/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cc
@@ -8,6 +8,7 @@
 // This run uses getrusage. We can only test getrusage when allocator_may_return_null=0
 // because getrusage gives us max-rss, not current-rss.
 // RUN: %env_tool_opts=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=0:can_use_proc_maps_statm=0 not %run %t 2>&1 | FileCheck %s -check-prefix=CHECK_MAY_RETURN_0
+// REQUIRES: stable-runtime
 
 // FIXME: make it work for other sanitizers.
 // XFAIL: lsan
diff --git a/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cc b/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cc
index 88d41b6..2612957 100644
--- a/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cc
+++ b/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cc
@@ -1,10 +1,10 @@
 // Test dedup_token_length
 // RUN: %clangxx -O0 %s -o %t
-// RUN: env %tool_options='abort_on_error=0'                    not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK0
-// RUN: env %tool_options='abort_on_error=0, dedup_token_length=0' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK0
-// RUN: env %tool_options='abort_on_error=0, dedup_token_length=1' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK1
-// RUN: env %tool_options='abort_on_error=0, dedup_token_length=2' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK2
-// RUN: env %tool_options='abort_on_error=0, dedup_token_length=3' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK3
+// RUN: env %tool_options='abort_on_error=0'                       not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK0 --match-full-lines
+// RUN: env %tool_options='abort_on_error=0, dedup_token_length=0' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK0 --match-full-lines
+// RUN: env %tool_options='abort_on_error=0, dedup_token_length=1' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK1 --match-full-lines
+// RUN: env %tool_options='abort_on_error=0, dedup_token_length=2' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK2 --match-full-lines
+// RUN: env %tool_options='abort_on_error=0, dedup_token_length=3' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK3 --match-full-lines
 
 // REQUIRES: stable-runtime
 // FIXME: implement SEGV handler in other sanitizers, not just asan.
@@ -34,7 +34,5 @@
 
 // CHECK0-NOT: DEDUP_TOKEN:
 // CHECK1: DEDUP_TOKEN: void Xyz::Abc<int, int>()
-// CHECK1-NOT: bar
 // CHECK2: DEDUP_TOKEN: void Xyz::Abc<int, int>()--bar
-// CHECK2-NOT: FOO
 // CHECK3: DEDUP_TOKEN: void Xyz::Abc<int, int>()--bar--FOO()
diff --git a/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cc b/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cc
new file mode 100644
index 0000000..f5a18e6
--- /dev/null
+++ b/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cc
@@ -0,0 +1,63 @@
+// RUN: %clangxx -DSHARED %s -shared -o %T/get_module_and_offset_for_pc.so -fPIC
+// RUN: %clangxx -DSO_DIR=\"%T\" -O0 %s -ldl -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+// UNSUPPORTED: i386-darwin
+//
+// Tests __sanitizer_get_module_and_offset_for_pc.
+
+#include <assert.h>
+#include <dlfcn.h>
+#include <sanitizer/common_interface_defs.h>
+#include <stdio.h>
+
+#ifdef SHARED
+extern "C" {
+int foo() { return 1; }
+}
+#else
+
+void Test(void *pc, const char *name) {
+  char module_name[1024];
+  void *offset;
+  int ok = __sanitizer_get_module_and_offset_for_pc(
+      pc, module_name, sizeof(module_name), &offset);
+  if (!ok) {
+    printf("NOT FOUND %s: %p\n", name, pc);
+  } else {
+    printf("FOUND %s: %s %p\n", name, module_name, offset);
+  }
+}
+
+void TestCallerPc() { Test(__builtin_return_address(0), "callerpc"); }
+
+void TestDlsym() {
+  void *handle = dlopen(SO_DIR "/get_module_and_offset_for_pc.so", RTLD_LAZY);
+  assert(handle);
+  void *foo = dlsym(handle, "foo");
+  assert(foo);
+  Test(foo, "foo");
+  dlclose(handle);
+}
+
+// Call __sanitizer_get_module_and_offset_for_pc lots of times
+// to make sure it is not too slow.
+void TestLoop() {
+  void *pc = __builtin_return_address(0);
+  char module_name[1024];
+  void *offset;
+  for (int i = 0; i < 1000000; ++i) {
+    __sanitizer_get_module_and_offset_for_pc(pc, module_name,
+                                             sizeof(module_name), &offset);
+  }
+}
+
+int main() {
+  Test(0, "null");
+  TestCallerPc();
+  TestDlsym();
+  TestLoop();
+}
+#endif
+// CHECK: NOT FOUND null: {{.*}}
+// CHECK-NEXT: FOUND callerpc: {{.*}}/get_module_and_offset_for_pc.cc.tmp {{.*}}
+// CHECK-NEXT: FOUND foo: {{.*}}/get_module_and_offset_for_pc.so {{.*}}
diff --git a/test/sanitizer_common/TestCases/printf-ldbl.c b/test/sanitizer_common/TestCases/printf-ldbl.c
new file mode 100644
index 0000000..f6629ab
--- /dev/null
+++ b/test/sanitizer_common/TestCases/printf-ldbl.c
@@ -0,0 +1,13 @@
+// RUN: %clang %s -o %t && %run %t 2>&1
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+int main(int argc, char **argv) {
+  char buf[20];
+  long double ld = 4.0;
+  snprintf(buf, sizeof buf, "%Lf %d", ld, 123);
+  assert(!strcmp(buf, "4.000000 123"));
+  return 0;
+}
diff --git a/test/sanitizer_common/TestCases/sanitizer_coverage_symbolize.cc b/test/sanitizer_common/TestCases/sanitizer_coverage_symbolize.cc
new file mode 100644
index 0000000..48f32a7
--- /dev/null
+++ b/test/sanitizer_common/TestCases/sanitizer_coverage_symbolize.cc
@@ -0,0 +1,34 @@
+// Tests trace pc guard coverage collection.
+//
+// REQUIRES: x86_64-linux
+// XFAIL: tsan
+//
+// RUN: DIR=%t_workdir
+// RUN: rm -rf $DIR
+// RUN: mkdir -p $DIR
+// RUN: cd $DIR
+// RUN: %clangxx -O0 -fsanitize-coverage=trace-pc-guard %s -ldl -o %t
+// RUN: %env_tool_opts=coverage=1 %t 2>&1 | FileCheck %s
+// RUN: %env_tool_opts=coverage=1 SANCOV_OPTIONS=symbolize=0 %t 2>&1 | FileCheck %s --check-prefix=CHECK-NOSYM
+// RUN: rm -rf $DIR
+
+#include <stdio.h>
+
+int foo() {
+  fprintf(stderr, "foo\n");
+  return 1;
+}
+
+int main() {
+  fprintf(stderr, "main\n");
+  foo();
+  foo();
+}
+
+// CHECK: main
+// CHECK: SanitizerCoverage: ./sanitizer_coverage_symbolize.{{.*}}.sancov 2 PCs written
+// CHECK: call sancov
+
+// CHECK-NOSYM: main
+// CHECK-NOSYM: SanitizerCoverage: ./sanitizer_coverage_symbolize.{{.*}}.sancov 2 PCs written
+// CHECK-NOSYM-NOT: call sancov
diff --git a/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard-dso.cc b/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard-dso.cc
new file mode 100644
index 0000000..cf16ec3
--- /dev/null
+++ b/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard-dso.cc
@@ -0,0 +1,72 @@
+// Tests trace pc guard coverage collection.
+//
+// REQUIRES: has_sancovcc,stable-runtime
+// XFAIL: tsan,darwin,powerpc64,s390x
+//
+// RUN: DIR=%t_workdir
+// RUN: CLANG_ARGS="-O0 -fsanitize-coverage=trace-pc-guard"
+// RUN: rm -rf $DIR
+// RUN: mkdir -p $DIR
+// RUN: cd $DIR
+// RUN: %clangxx -DSHARED1 $CLANG_ARGS -shared %s -o %t_1.so -fPIC
+// RUN: %clangxx -DSHARED2 $CLANG_ARGS -shared %s -o %t_2.so -fPIC
+// RUN: %clangxx -DMAIN $CLANG_ARGS %s -o %t %t_1.so %t_2.so
+// RUN: %env_tool_opts=coverage=1 %t 2>&1 | FileCheck %s
+// RUN: %sancovcc  -covered-functions -strip_path_prefix=TestCases/ *.sancov \
+// RUN:            %t %t_1.so %t_2.so 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-SANCOV %s
+// RUN: rm -rf $DIR
+
+#include <stdio.h>
+
+extern "C" {
+  int bar();
+  int baz();
+}
+
+#ifdef MAIN
+
+int foo() {
+  fprintf(stderr, "foo\n");
+  return 1;
+}
+
+int main() {
+  fprintf(stderr, "main\n");
+  foo();
+  bar();
+  baz();
+}
+
+#endif // MAIN
+
+extern "C" {
+
+#ifdef SHARED1
+int bar() {
+  fprintf(stderr, "bar\n");
+  return 1;
+}
+#endif
+
+#ifdef SHARED2
+int baz() {
+  fprintf(stderr, "baz\n");
+  return 1;
+}
+#endif
+
+} // extern "C"
+
+// CHECK: main
+// CHECK-NEXT: foo
+// CHECK-NEXT: bar
+// CHECK-NEXT: baz
+// CHECK-DAG: SanitizerCoverage: ./sanitizer_coverage_trace_pc_guard-dso.{{.*}}.sancov 2 PCs written
+// CHECK-DAG: SanitizerCoverage: ./sanitizer_coverage_trace_pc_guard-dso.{{.*}}_2.so.{{.*}}.sancov 1 PCs written
+// CHECK-DAG: SanitizerCoverage: ./sanitizer_coverage_trace_pc_guard-dso.{{.*}}_1.so.{{.*}}.sancov 1 PCs written
+//
+// CHECK-SANCOV: Ignoring {{.*}}_1.so and its coverage because __sanitizer_cov* functions were not found.
+// CHECK-SANCOV: Ignoring {{.*}}_2.so and its coverage because __sanitizer_cov* functions were not found.
+// CHECK-SANCOV-NEXT: sanitizer_coverage_trace_pc_guard-dso.cc:29 foo
+// CHECK-SANCOV-NEXT: sanitizer_coverage_trace_pc_guard-dso.cc:34 main
diff --git a/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard.cc b/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard.cc
new file mode 100644
index 0000000..1b787f1
--- /dev/null
+++ b/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard.cc
@@ -0,0 +1,41 @@
+// Tests trace pc guard coverage collection.
+//
+// REQUIRES: has_sancovcc,stable-runtime
+// XFAIL: tsan,darwin,powerpc64,s390x
+//
+// RUN: DIR=%t_workdir
+// RUN: rm -rf $DIR
+// RUN: mkdir -p $DIR
+// RUN: cd $DIR
+// RUN: %clangxx -O0 -fsanitize-coverage=trace-pc-guard %s -ldl -o %t
+// RUN: %env_tool_opts=coverage=1 %t 2>&1 | FileCheck %s
+// RUN: %sancovcc  -covered-functions -strip_path_prefix=TestCases/ *.sancov %t 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-SANCOV %s
+// RUN: %env_tool_opts=coverage=0 %t 2>&1 | FileCheck --check-prefix=CHECK-NOCOV %s
+// RUN: rm -rf $DIR
+// Make some room to stabilize line numbers
+//
+//
+//
+#include <stdio.h>
+
+int foo() {
+  fprintf(stderr, "foo\n");
+  return 1;
+}
+
+int main() {
+  fprintf(stderr, "main\n");
+  foo();
+  foo();
+}
+
+// CHECK: main
+// CHECK-NEXT: foo
+// CHECK-NEXT: foo
+// CHECK-NEXT: SanitizerCoverage: ./sanitizer_coverage_trace_pc_guard.{{.*}}.sancov 2 PCs written
+//
+// CHECK-SANCOV: sanitizer_coverage_trace_pc_guard.cc:22 foo
+// CHECK-SANCOV-NEXT: sanitizer_coverage_trace_pc_guard.cc:27 main
+//
+// CHECK-NOCOV-NOT: SanitizerCoverage
diff --git a/test/sanitizer_common/TestCases/scanf-ldbl.c b/test/sanitizer_common/TestCases/scanf-ldbl.c
new file mode 100644
index 0000000..9ca30f4
--- /dev/null
+++ b/test/sanitizer_common/TestCases/scanf-ldbl.c
@@ -0,0 +1,13 @@
+// RUN: %clang %s -o %t && %run %t 2>&1
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+int main(int argc, char **argv) {
+  long double ld;
+  memset(&ld, 255, sizeof ld);
+  sscanf("4.0", "%Lf", &ld);
+  assert(ld == 4.0);
+  return 0;
+}
diff --git a/test/sanitizer_common/TestCases/symbolize_pc.cc b/test/sanitizer_common/TestCases/symbolize_pc.cc
index 68a6733..0cc81e1 100644
--- a/test/sanitizer_common/TestCases/symbolize_pc.cc
+++ b/test/sanitizer_common/TestCases/symbolize_pc.cc
@@ -1,5 +1,6 @@
 // RUN: %clangxx -O0 %s -o %t
 // RUN: %env_tool_opts=strip_path_prefix=/TestCases/ %run %t 2>&1 | FileCheck %s
+// UNSUPPORTED: i386-darwin
 //
 // Tests __sanitizer_symbolize_pc.
 #include <stdio.h>
diff --git a/test/sanitizer_common/print_address.h b/test/sanitizer_common/print_address.h
new file mode 100644
index 0000000..018db61
--- /dev/null
+++ b/test/sanitizer_common/print_address.h
@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include <stdarg.h>
+
+void print_address(const char *str, int n, ...) {
+  fprintf(stderr, "%s", str);
+  va_list ap;
+  va_start(ap, n);
+  while (n--) {
+    void *p = va_arg(ap, void *);
+#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__)
+    // On FreeBSD, the %p conversion specifier works as 0x%x and thus does not
+    // match to the format used in the diagnotic message.
+    fprintf(stderr, "0x%012lx ", (unsigned long) p);
+#elif defined(__mips64)
+    fprintf(stderr, "0x%010lx ", (unsigned long) p);
+#endif
+  }
+  fprintf(stderr, "\n");
+}
diff --git a/test/scudo/CMakeLists.txt b/test/scudo/CMakeLists.txt
index b6cb2fd..a899099 100644
--- a/test/scudo/CMakeLists.txt
+++ b/test/scudo/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(SCUDO_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 set(SCUDO_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
 
+set(SCUDO_TESTSUITES)
 
 set(SCUDO_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
 if(NOT COMPILER_RT_STANDALONE_BUILD)
@@ -12,17 +13,21 @@
   ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
   )
 
-if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-   EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
-   STRING(REGEX REPLACE "^.*(sse4_2).*$" "\\1" SSE_THERE ${CPUINFO})
-   STRING(COMPARE EQUAL "sse4_2" "${SSE_THERE}" SSE42_TRUE)
-endif(CMAKE_SYSTEM_NAME MATCHES "Linux")
+set(SCUDO_TEST_ARCH ${SCUDO_SUPPORTED_ARCH})
+foreach(arch ${SCUDO_TEST_ARCH})
+  set(SCUDO_TEST_TARGET_ARCH ${arch})
+  string(TOLOWER "-${arch}" SCUDO_TEST_CONFIG_SUFFIX)
+  get_test_cc_for_arch(${arch} SCUDO_TEST_TARGET_CC SCUDO_TEST_TARGET_CFLAGS)
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config)
 
-if (SSE42_TRUE AND CMAKE_SIZEOF_VOID_P EQUAL 8)
-  add_lit_testsuite(check-scudo
-    "Running the Scudo Hardened Allocator tests"
-    ${CMAKE_CURRENT_BINARY_DIR}
-    DEPENDS ${SCUDO_TEST_DEPS})
-  set_target_properties(check-scudo PROPERTIES FOLDER
-    "Compiler-RT Misc")
-endif(SSE42_TRUE AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg)
+  list(APPEND SCUDO_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+endforeach()
+
+add_lit_testsuite(check-scudo "Running the Scudo Hardened Allocator tests"
+  ${SCUDO_TESTSUITES}
+  DEPENDS ${SCUDO_TEST_DEPS})
+set_target_properties(check-scudo PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/scudo/alignment.cpp b/test/scudo/alignment.cpp
index c5e57d1..a6eca87 100644
--- a/test/scudo/alignment.cpp
+++ b/test/scudo/alignment.cpp
@@ -1,11 +1,10 @@
 // RUN: %clang_scudo %s -o %t
 // RUN: not %run %t pointers 2>&1 | FileCheck %s
 
-// Tests that a non-16-byte aligned pointer will trigger the associated error
-// on deallocation.
+// Tests that a non MinAlignment aligned pointer will trigger the associated
+// error on deallocation.
 
 #include <assert.h>
-#include <malloc.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@@ -17,7 +16,7 @@
     void *p = malloc(1U << 16);
     if (!p)
       return 1;
-    free(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(p) | 8));
+    free(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(p) | 1));
   }
   return 0;
 }
diff --git a/test/scudo/double-free.cpp b/test/scudo/double-free.cpp
index 4f5bf0c..75919f0 100644
--- a/test/scudo/double-free.cpp
+++ b/test/scudo/double-free.cpp
@@ -46,4 +46,4 @@
   return 0;
 }
 
-// CHECK: ERROR: invalid chunk state when deallocating address
+// CHECK: ERROR: invalid chunk state
diff --git a/test/scudo/interface.cpp b/test/scudo/interface.cpp
new file mode 100644
index 0000000..f935306
--- /dev/null
+++ b/test/scudo/interface.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: %run %t 2>&1
+
+// Tests that the sanitizer interface functions behave appropriately.
+
+#include <stdlib.h>
+
+#include <vector>
+
+#include <sanitizer/allocator_interface.h>
+
+int main(int argc, char **argv)
+{
+  void *p;
+  std::vector<ssize_t> sizes{1, 8, 16, 32, 1024, 32768,
+    1 << 16, 1 << 17, 1 << 20, 1 << 24};
+  for (size_t size : sizes) {
+    p = malloc(size);
+    if (!p)
+      return 1;
+    if (!__sanitizer_get_ownership(p))
+      return 1;
+    if (__sanitizer_get_allocated_size(p) < size)
+      return 1;
+    free(p);
+  }
+  return 0;
+}
diff --git a/test/scudo/lit.cfg b/test/scudo/lit.cfg
index e2a4997..4eff2ce 100644
--- a/test/scudo/lit.cfg
+++ b/test/scudo/lit.cfg
@@ -3,7 +3,7 @@
 import os
 
 # Setup config name.
-config.name = 'Scudo'
+config.name = 'Scudo' + config.name_suffix
 
 # Setup source root.
 config.test_source_root = os.path.dirname(__file__)
@@ -14,18 +14,19 @@
 whole_archive = "-Wl,-whole-archive %s -Wl,-no-whole-archive " % base_lib
 
 # Test suffixes.
-config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.ll', '.test']
+config.suffixes = ['.c', '.cc', '.cpp']
 
 # C flags.
-c_flags = ["-std=c++11",
+c_flags = ([config.target_cflags] +
+           ["-std=c++11",
            "-lstdc++",
-           "-ldl",
            "-lrt",
-           "-pthread",
            "-latomic",
+           "-ldl",
+           "-pthread",
            "-fPIE",
            "-pie",
-           "-O0"]
+           "-O0"])
 
 def build_invocation(compile_flags):                                            
   return " " + " ".join([config.clang] + compile_flags) + " "                   
diff --git a/test/scudo/lit.site.cfg.in b/test/scudo/lit.site.cfg.in
index 64e2fb3..4299518 100644
--- a/test/scudo/lit.site.cfg.in
+++ b/test/scudo/lit.site.cfg.in
@@ -1,5 +1,9 @@
 @LIT_SITE_CFG_IN_HEADER@
 
+config.name_suffix = "@SCUDO_TEST_CONFIG_SUFFIX@"
+config.target_arch = "@SCUDO_TEST_TARGET_ARCH@"
+config.target_cflags = "@SCUDO_TEST_TARGET_CFLAGS@"
+
 # Load common config for all compiler-rt lit tests.
 lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
 
diff --git a/test/scudo/malloc.cpp b/test/scudo/malloc.cpp
index 4507a52..cafc744 100644
--- a/test/scudo/malloc.cpp
+++ b/test/scudo/malloc.cpp
@@ -2,26 +2,37 @@
 // RUN: %run %t 2>&1
 
 // Tests that a regular workflow of allocation, memory fill and free works as
-// intended. Also tests that a zero-sized allocation succeeds.
+// intended. Tests various sizes serviced by the primary and secondary
+// allocators.
 
-#include <malloc.h>
 #include <stdlib.h>
 #include <string.h>
 
+#include <vector>
+
 int main(int argc, char **argv)
 {
   void *p;
-  size_t size = 1U << 8;
+  std::vector<ssize_t> sizes{1, 8, 16, 32, 1024, 32768,
+    1 << 16, 1 << 17, 1 << 20, 1 << 24};
+  std::vector<int> offsets{1, 0, -1, -7, -8, -15, -16, -31, -32};
 
-  p = malloc(size);
-  if (!p)
-    return 1;
-  memset(p, 'A', size);
-  free(p);
   p = malloc(0);
   if (!p)
     return 1;
   free(p);
+  for (ssize_t size : sizes) {
+    for (int offset: offsets) {
+      ssize_t actual_size = size + offset;
+      if (actual_size <= 0)
+        continue;
+      p = malloc(actual_size);
+      if (!p)
+        return 1;
+      memset(p, 0xff, actual_size);
+      free(p);
+    }
+  }
 
   return 0;
 }
diff --git a/test/scudo/memalign.cpp b/test/scudo/memalign.cpp
index 951d1aa..b407ec5 100644
--- a/test/scudo/memalign.cpp
+++ b/test/scudo/memalign.cpp
@@ -10,22 +10,24 @@
 #include <stdlib.h>
 #include <string.h>
 
+// Reduce the size of the quarantine, or the test can run out of aligned memory
+// on 32-bit for the larger alignments.
+extern "C" const char *__scudo_default_options() {
+  return "QuarantineSizeMb=1";
+}
+
 // Sometimes the headers may not have this...
 extern "C" void *aligned_alloc (size_t alignment, size_t size);
 
 int main(int argc, char **argv)
 {
-  void *p;
+  void *p = nullptr;
   size_t alignment = 1U << 12;
-  size_t size = alignment;
+  size_t size = 1U << 12;
 
   assert(argc == 2);
+
   if (!strcmp(argv[1], "valid")) {
-    p = memalign(alignment, size);
-    if (!p)
-      return 1;
-    free(p);
-    p = nullptr;
     posix_memalign(&p, alignment, size);
     if (!p)
       return 1;
@@ -34,6 +36,29 @@
     if (!p)
       return 1;
     free(p);
+    // Tests various combinations of alignment and sizes
+    for (int i = (sizeof(void *) == 4) ? 3 : 4; i < 19; i++) {
+      alignment = 1U << i;
+      for (int j = 1; j < 33; j++) {
+        size = 0x800 * j;
+        for (int k = 0; k < 3; k++) {
+          p = memalign(alignment, size - (2 * sizeof(void *) * k));
+          if (!p)
+            return 1;
+          free(p);
+        }
+      }
+    }
+    // For larger alignment, reduce the number of allocations to avoid running
+    // out of potential addresses (on 32-bit).
+    for (int i = 19; i <= 24; i++) {
+      for (int k = 0; k < 3; k++) {
+        p = memalign(alignment, 0x1000 - (2 * sizeof(void *) * k));
+        if (!p)
+          return 1;
+        free(p);
+      }
+    }
   }
   if (!strcmp(argv[1], "invalid")) {
     p = memalign(alignment - 1, size);
@@ -42,4 +67,4 @@
   return 0;
 }
 
-// CHECK: ERROR: malloc alignment is not a power of 2
+// CHECK: ERROR: alignment is not a power of 2
diff --git a/test/scudo/mismatch.cpp b/test/scudo/mismatch.cpp
index 2d3d198..54cdafc 100644
--- a/test/scudo/mismatch.cpp
+++ b/test/scudo/mismatch.cpp
@@ -30,7 +30,7 @@
     free((void *)p);
   }
   if (!strcmp(argv[1], "memaligndel")) {
-    int *p = (int *)memalign(0x10, 0x10);
+    int *p = (int *)memalign(16, 16);
     if (!p)
       return 1;
     delete p;
diff --git a/test/scudo/overflow.cpp b/test/scudo/overflow.cpp
index 5b2cb75..c93a544 100644
--- a/test/scudo/overflow.cpp
+++ b/test/scudo/overflow.cpp
@@ -11,12 +11,13 @@
 int main(int argc, char **argv)
 {
   assert(argc == 2);
+  ssize_t offset = sizeof(void *) == 8 ? 8 : 0;
   if (!strcmp(argv[1], "malloc")) {
     // Simulate a header corruption of an allocated chunk (1-bit)
     void *p = malloc(1U << 4);
     if (!p)
       return 1;
-    ((char *)p)[-1] ^= 1;
+    ((char *)p)[-(offset + 1)] ^= 1;
     free(p);
   }
   if (!strcmp(argv[1], "quarantine")) {
@@ -25,7 +26,7 @@
       return 1;
     free(p);
     // Simulate a header corruption of a quarantined chunk
-    ((char *)p)[-2] ^= 1;
+    ((char *)p)[-(offset + 2)] ^= 1;
     // Trigger the quarantine recycle
     for (int i = 0; i < 0x100; i++) {
       p = malloc(1U << 16);
diff --git a/test/scudo/preinit.cpp b/test/scudo/preinit.cpp
index a280ae1..34f61c9 100644
--- a/test/scudo/preinit.cpp
+++ b/test/scudo/preinit.cpp
@@ -4,7 +4,6 @@
 // Verifies that calling malloc in a preinit_array function succeeds, and that
 // the resulting pointer can be freed at program termination.
 
-#include <malloc.h>
 #include <stdlib.h>
 #include <string.h>
 
diff --git a/test/scudo/random_shuffle.cpp b/test/scudo/random_shuffle.cpp
new file mode 100644
index 0000000..fce522d
--- /dev/null
+++ b/test/scudo/random_shuffle.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: rm -rf %T/random_shuffle_tmp_dir
+// RUN: mkdir %T/random_shuffle_tmp_dir
+// RUN: %run %t 100 > %T/random_shuffle_tmp_dir/out1
+// RUN: %run %t 100 > %T/random_shuffle_tmp_dir/out2
+// RUN: %run %t 10000 > %T/random_shuffle_tmp_dir/out1
+// RUN: %run %t 10000 > %T/random_shuffle_tmp_dir/out2
+// RUN: not diff %T/random_shuffle_tmp_dir/out?
+// RUN: rm -rf %T/random_shuffle_tmp_dir
+// UNSUPPORTED: i386-linux,i686-linux,arm-linux,armhf-linux
+
+// Tests that the allocator shuffles the chunks before returning to the user.
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(int argc, char **argv) {
+  int alloc_size = argc == 2 ? atoi(argv[1]) : 100;
+  char *base = new char[alloc_size];
+  for (int i = 0; i < 20; i++) {
+    char *p = new char[alloc_size];
+    printf("%zd\n", base - p);
+  }
+}
diff --git a/test/scudo/realloc.cpp b/test/scudo/realloc.cpp
index 2a7d5b6..cc44595 100644
--- a/test/scudo/realloc.cpp
+++ b/test/scudo/realloc.cpp
@@ -14,54 +14,60 @@
 #include <malloc.h>
 #include <string.h>
 
+#include <vector>
+
 int main(int argc, char **argv)
 {
   void *p, *old_p;
-  size_t size = 32;
+  // Those sizes will exercise both allocators (Primary & Secondary).
+  std::vector<size_t> sizes{1, 16, 1024, 32768, 1 << 16, 1 << 17, 1 << 20};
 
   assert(argc == 2);
-  if (!strcmp(argv[1], "pointers")) {
-    old_p = p = realloc(nullptr, size);
-    if (!p)
-      return 1;
-    size = malloc_usable_size(p);
-    // Our realloc implementation will return the same pointer if the size
-    // requested is lower or equal to the usable size of the associated chunk.
-    p = realloc(p, size - 1);
-    if (p != old_p)
-      return 1;
-    p = realloc(p, size);
-    if (p != old_p)
-      return 1;
-    // And a new one if the size is greater.
-    p = realloc(p, size + 1);
-    if (p == old_p)
-      return 1;
-    // A size of 0 will free the chunk and return nullptr.
-    p = realloc(p, 0);
-    if (p)
-      return 1;
-    old_p = nullptr;
-  }
-  if (!strcmp(argv[1], "contents")) {
-    p = realloc(nullptr, size);
-    if (!p)
-      return 1;
-    for (int i = 0; i < size; i++)
-      reinterpret_cast<char *>(p)[i] = 'A';
-    p = realloc(p, size + 1);
-    // The contents of the reallocated chunk must match the original one.
-    for (int i = 0; i < size; i++)
-      if (reinterpret_cast<char *>(p)[i] != 'A')
+  for (size_t size : sizes) {
+    if (!strcmp(argv[1], "pointers")) {
+      old_p = p = realloc(nullptr, size);
+      if (!p)
         return 1;
-  }
-  if (!strcmp(argv[1], "memalign")) {
-    // A chunk coming from memalign cannot be reallocated.
-    p = memalign(16, size);
-    if (!p)
-      return 1;
-    p = realloc(p, size);
-    free(p);
+      size = malloc_usable_size(p);
+      // Our realloc implementation will return the same pointer if the size
+      // requested is lower than or equal to the usable size of the associated
+      // chunk.
+      p = realloc(p, size - 1);
+      if (p != old_p)
+        return 1;
+      p = realloc(p, size);
+      if (p != old_p)
+        return 1;
+      // And a new one if the size is greater.
+      p = realloc(p, size + 1);
+      if (p == old_p)
+        return 1;
+      // A size of 0 will free the chunk and return nullptr.
+      p = realloc(p, 0);
+      if (p)
+        return 1;
+      old_p = nullptr;
+    }
+    if (!strcmp(argv[1], "contents")) {
+      p = realloc(nullptr, size);
+      if (!p)
+        return 1;
+      for (int i = 0; i < size; i++)
+        reinterpret_cast<char *>(p)[i] = 'A';
+      p = realloc(p, size + 1);
+      // The contents of the reallocated chunk must match the original one.
+      for (int i = 0; i < size; i++)
+        if (reinterpret_cast<char *>(p)[i] != 'A')
+          return 1;
+    }
+    if (!strcmp(argv[1], "memalign")) {
+      // A chunk coming from memalign cannot be reallocated.
+      p = memalign(16, size);
+      if (!p)
+        return 1;
+      p = realloc(p, size);
+      free(p);
+    }
   }
   return 0;
 }
diff --git a/test/scudo/secondary.cpp b/test/scudo/secondary.cpp
new file mode 100644
index 0000000..7a634a8
--- /dev/null
+++ b/test/scudo/secondary.cpp
@@ -0,0 +1,54 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: %run %t after  2>&1 | FileCheck %s
+// RUN: %run %t before 2>&1 | FileCheck %s
+
+// Test that we hit a guard page when writing past the end of a chunk
+// allocated by the Secondary allocator, or writing too far in front of it.
+
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <assert.h>
+
+void handler(int signo, siginfo_t *info, void *uctx) {
+  if (info->si_code == SEGV_ACCERR) {
+    fprintf(stderr, "SCUDO SIGSEGV\n");
+    exit(0);
+  }
+  exit(1);
+}
+
+int main(int argc, char **argv)
+{
+  // The size must be large enough to be serviced by the secondary allocator.
+  long page_size = sysconf(_SC_PAGESIZE);
+  size_t size = (1U << 17) + page_size;
+  struct sigaction a;
+
+  assert(argc == 2);
+  memset(&a, 0, sizeof(a));
+  a.sa_sigaction = handler;
+  a.sa_flags = SA_SIGINFO;
+
+  char *p = (char *)malloc(size);
+  if (!p)
+    return 1;
+  memset(p, 'A', size); // This should not trigger anything.
+  // Set up the SIGSEGV handler now, as the rest should trigger an AV.
+  sigaction(SIGSEGV, &a, nullptr);
+  if (!strcmp(argv[1], "after")) {
+    for (int i = 0; i < page_size; i++)
+      p[size + i] = 'A';
+  }
+  if (!strcmp(argv[1], "before")) {
+    for (int i = 1; i < page_size; i++)
+      p[-i] = 'A';
+  }
+  free(p);
+
+  return 1; // A successful test means we shouldn't reach this.
+}
+
+// CHECK: SCUDO SIGSEGV
diff --git a/test/tsan/Darwin/ignore-noninstrumented.mm b/test/tsan/Darwin/ignore-noninstrumented.mm
new file mode 100644
index 0000000..5e44531
--- /dev/null
+++ b/test/tsan/Darwin/ignore-noninstrumented.mm
@@ -0,0 +1,53 @@
+// Check that ignore_noninstrumented_modules=1 supresses races from system libraries on OS X.
+
+// RUN: %clang_tsan %s -o %t -framework Foundation
+
+// Check that without the flag, there are false positives.
+// RUN: %deflake %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-RACE
+
+// With ignore_noninstrumented_modules=1, no races are reported.
+// RUN: %env_tsan_opts=ignore_noninstrumented_modules=1 %run %t 2>&1 | FileCheck %s
+
+// With ignore_noninstrumented_modules=1, races in user's code are still reported.
+// RUN: %env_tsan_opts=ignore_noninstrumented_modules=1 %deflake %run %t race 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-RACE
+
+#import <Foundation/Foundation.h>
+
+#import "../test.h"
+
+char global_buf[64];
+
+void *Thread1(void *x) {
+  barrier_wait(&barrier);
+  strcpy(global_buf, "hello world");
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  strcpy(global_buf, "world hello");
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+int main(int argc, char *argv[]) {
+  fprintf(stderr, "Hello world.\n");
+  
+  // NSUserDefaults uses XPC which triggers the false positive.
+  NSDictionary *d = [[NSUserDefaults standardUserDefaults] dictionaryRepresentation];
+  fprintf(stderr, "d = %p\n", d);
+
+  if (argc > 1 && strcmp(argv[1], "race") == 0) {
+    barrier_init(&barrier, 2);
+    pthread_t t[2];
+    pthread_create(&t[0], NULL, Thread1, NULL);
+    pthread_create(&t[1], NULL, Thread2, NULL);
+    pthread_join(t[0], NULL);
+    pthread_join(t[1], NULL);
+  }
+
+  fprintf(stderr, "Done.\n");
+}
+
+// CHECK: Hello world.
+// CHECK-RACE: SUMMARY: ThreadSanitizer: data race
+// CHECK: Done.
diff --git a/test/tsan/Darwin/libcxx-call-once.mm b/test/tsan/Darwin/libcxx-call-once.mm
index 525e6d9..5388e49 100644
--- a/test/tsan/Darwin/libcxx-call-once.mm
+++ b/test/tsan/Darwin/libcxx-call-once.mm
@@ -1,10 +1,6 @@
 // RUN: %clangxx_tsan %s -o %t -framework Foundation -std=c++11
 // RUN: %env_tsan_opts=ignore_interceptors_accesses=1 %run %t 2>&1 | FileCheck %s
 
-// A lot of bots are configured not to checkout libcxx and run against the
-// (buggy) system-installed version.
-// REQUIRES: disabled
-
 #import <Foundation/Foundation.h>
 
 #import <iostream>
diff --git a/test/tsan/atomic_free.cc b/test/tsan/atomic_free.cc
index a0d8e42..446949d 100644
--- a/test/tsan/atomic_free.cc
+++ b/test/tsan/atomic_free.cc
@@ -1,4 +1,13 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+
+// Also check that atomics instrumentation can be configured by either driver or
+// legacy flags:
+
+// RUN: %clangxx_tsan -O1 %s -o %t -fno-sanitize-thread-atomics && not %deflake %run %t 2>&1 \
+// RUN:   | FileCheck --allow-empty --check-prefix=CHECK-NO-ATOMICS %s
+// RUN: %clangxx_tsan -O1 %s -o %t -mllvm -tsan-instrument-atomics=0 && not %deflake %run %t 2>&1 \
+// RUN:   | FileCheck --allow-empty --check-prefix=CHECK-NO-ATOMICS %s <%t
+
 #include "test.h"
 
 void *Thread(void *a) {
@@ -18,3 +27,5 @@
 }
 
 // CHECK: WARNING: ThreadSanitizer: data race
+
+// CHECK-NO-ATOMICS-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/atomic_store.cc b/test/tsan/atomic_store.cc
new file mode 100644
index 0000000..7ff4879
--- /dev/null
+++ b/test/tsan/atomic_store.cc
@@ -0,0 +1,49 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+long long Data;
+long long Sync;
+
+void *Thread1(void *x) {
+  Data++;
+  __atomic_store_n(&Sync, 1, __ATOMIC_RELEASE);
+  barrier_wait(&barrier);
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread2(void *x) {
+  barrier_wait(&barrier);
+  if (__atomic_load_n(&Sync, __ATOMIC_RELAXED) != 1)
+    exit(0);
+  // This store must terminate release sequence of the store in Thread1,
+  // thus tsan must detect race between Thread1 and main on Data.
+  __atomic_store_n(&Sync, 2, __ATOMIC_RELEASE);
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+int main() {
+  barrier_init(&barrier, 3);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  barrier_wait(&barrier);
+  barrier_wait(&barrier);
+  if (__atomic_load_n(&Sync, __ATOMIC_ACQUIRE) != 2)
+    exit(0);
+  if (Data != 1)
+    exit(0);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Read
+// CHECK:     #0 main
+// CHECK:   Previous write
+// CHECK:     #0 Thread1
+// CHECK:   Location is global 'Data'
+// CHECK: DONE
diff --git a/test/tsan/debug_alloc_stack.cc b/test/tsan/debug_alloc_stack.cc
new file mode 100644
index 0000000..303c103
--- /dev/null
+++ b/test/tsan/debug_alloc_stack.cc
@@ -0,0 +1,84 @@
+// RUN: %clangxx_tsan -O0 %s -o %t
+// RUN: env %env_tsan_opts=stack_trace_format=DEFAULT %deflake %run %t 2>&1 | FileCheck %s
+
+// Until I figure out how to make this test work on Linux
+// REQUIRES: system-darwin
+
+#include "test.h"
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef __APPLE__
+#include <sys/types.h>
+#endif
+
+extern "C" int __tsan_get_alloc_stack(void *addr, void **trace, size_t size,
+                                      int *thread_id, void *os_id);
+
+char *mem;
+void alloc_func() { mem = (char *)malloc(10); }
+
+void *AllocThread(void *context) {
+  uint64_t tid;
+#ifdef __APPLE__
+  pthread_threadid_np(NULL, &tid);
+#else
+  tid = gettid();
+#endif
+  fprintf(stderr, "alloc stack thread os id = 0x%llx\n", tid);
+  // CHECK: alloc stack thread os id = [[THREAD_OS_ID:0x[0-9a-f]+]]
+  alloc_func();
+  return NULL;
+}
+
+void *RaceThread(void *context) {
+  *mem = 'a';
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+int main() {
+  pthread_t t;
+  barrier_init(&barrier, 2);
+
+  pthread_create(&t, NULL, AllocThread, NULL);
+  pthread_join(t, NULL);
+
+  void *trace[100];
+  size_t num_frames = 100;
+  int thread_id;
+  void *thread_os_id;
+  num_frames =
+      __tsan_get_alloc_stack(mem, trace, num_frames, &thread_id, &thread_os_id);
+
+  fprintf(stderr, "alloc stack retval %s\n",
+          (num_frames > 0 && num_frames < 10) ? "ok" : "");
+  // CHECK: alloc stack retval ok
+  fprintf(stderr, "thread id = %d\n", thread_id);
+  // CHECK: thread id = 1
+  fprintf(stderr, "thread os id = 0x%llx\n", (uint64_t)thread_os_id);
+  // CHECK: thread os id = [[THREAD_OS_ID]]
+  fprintf(stderr, "%p\n", trace[0]);
+  // CHECK: [[ALLOC_FRAME_0:0x[0-9a-f]+]]
+  fprintf(stderr, "%p\n", trace[1]);
+  // CHECK: [[ALLOC_FRAME_1:0x[0-9a-f]+]]
+  fprintf(stderr, "%p\n", trace[2]);
+  // CHECK: [[ALLOC_FRAME_2:0x[0-9a-f]+]]
+
+  pthread_create(&t, NULL, RaceThread, NULL);
+  barrier_wait(&barrier);
+  mem[0] = 'b';
+  pthread_join(t, NULL);
+
+  free(mem);
+
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: Location is heap block of size 10 at {{.*}} allocated by thread T1
+// CHECK: #0 [[ALLOC_FRAME_0]]
+// CHECK: #1 [[ALLOC_FRAME_1]] in alloc_func
+// CHECK: #2 [[ALLOC_FRAME_2]] in AllocThread
diff --git a/test/tsan/debug_locate.cc b/test/tsan/debug_locate.cc
new file mode 100644
index 0000000..01b0960
--- /dev/null
+++ b/test/tsan/debug_locate.cc
@@ -0,0 +1,43 @@
+// RUN: %clangxx_tsan -O0 %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <stdlib.h>
+
+extern "C" const char *
+__tsan_locate_address(void *addr, char *name, size_t name_size,
+                      void **region_address_ptr, size_t *region_size_ptr);
+
+long global_var;
+
+int main() {
+  long stack_var;
+  void *heap_var = malloc(10);
+
+  fprintf(stderr, "stack_var = %p\n", &stack_var);
+  fprintf(stderr, "global_var = %p\n", &global_var);
+  fprintf(stderr, "heap_var = %p\n", heap_var);
+  // CHECK: stack_var = [[STACK_VAR:0x[0-9a-f]+]]
+  // CHECK: global_var = [[GLOBAL_VAR:0x[0-9a-f]+]]
+  // CHECK: heap_var = [[HEAP_VAR:0x[0-9a-f]+]]
+
+  const char *type;
+  char name[128];
+  void *start;
+  size_t size;
+  type = __tsan_locate_address(&stack_var, name, 128, &start, &size);
+  fprintf(stderr, "type: %s\n", type);
+  // CHECK: type: stack
+
+  type = __tsan_locate_address(&global_var, name, 128, &start, &size);
+  fprintf(stderr, "type: %s, name = %s, start = %p, size = %zu\n", type, name,
+          start, size);
+  // CHECK: type: global, name = global_var, start = [[GLOBAL_VAR]], size = {{8|0}}
+
+  type = __tsan_locate_address(heap_var, name, 128, &start, &size);
+  fprintf(stderr, "type: %s, start = %p, size = %zu\n", type, start, size);
+  // CHECK: type: heap, start = [[HEAP_VAR]], size = 10
+
+  free(heap_var);
+  return 0;
+}
diff --git a/test/tsan/fork_atexit.cc b/test/tsan/fork_atexit.cc
index 15cf0a2..6e3a2f5 100644
--- a/test/tsan/fork_atexit.cc
+++ b/test/tsan/fork_atexit.cc
@@ -7,7 +7,7 @@
 #include <sys/wait.h>
 
 void foo() {
-  printf("CHILD ATEXIT\n");
+  fprintf(stderr, "CHILD ATEXIT\n");
 }
 
 void *worker(void *unused) {
diff --git a/test/tsan/global_race.cc b/test/tsan/global_race.cc
index a352996..ec26b06 100644
--- a/test/tsan/global_race.cc
+++ b/test/tsan/global_race.cc
@@ -1,4 +1,14 @@
-// RUN: %clangxx_tsan -O1 %s -o %T/global_race.cc.exe && %deflake %run %T/global_race.cc.exe | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %T/global_race.cc.exe && %deflake %run %T/global_race.cc.exe 2>&1 \
+// RUN:   | FileCheck %s
+
+// Also check that memory access instrumentation can be configured by either
+// driver or legacy flags:
+
+// RUN: %clangxx_tsan -O1 %s -o %T/global_race.cc.exe -fno-sanitize-thread-memory-access && not %deflake %run %T/global_race.cc.exe 2>&1 \
+// RUN:   | FileCheck --allow-empty --check-prefix=CHECK-MEMORY-ACCESS-OFF %s
+// RUN: %clangxx_tsan -O1 %s -o %T/global_race.cc.exe -mllvm -tsan-instrument-memory-accesses=0 && not %deflake %run %T/global_race.cc.exe 2>&1 \
+// RUN:   | FileCheck --allow-empty --check-prefix=CHECK-MEMORY-ACCESS-OFF %s
+
 #include "test.h"
 
 int GlobalData[10];
@@ -23,3 +33,4 @@
 // CHECK: WARNING: ThreadSanitizer: data race
 // CHECK: Location is global 'GlobalData' {{(of size 40 )?}}at [[ADDR]] (global_race.cc.exe+0x{{[0-9,a-f]+}})
 
+// CHECK-MEMORY-ACCESS-OFF-NOT: WARNING: ThreadSanitizer: data race
diff --git a/test/tsan/ignore_lib5.cc b/test/tsan/ignore_lib5.cc
new file mode 100644
index 0000000..d7cd285
--- /dev/null
+++ b/test/tsan/ignore_lib5.cc
@@ -0,0 +1,75 @@
+// RUN: %clangxx_tsan -O1 %s -DLIB -fPIC -fno-sanitize=thread -shared -o %T/libignore_lib1.so
+// RUN: %clangxx_tsan -O1 %s -o %t
+// RUN: echo running w/o suppressions:
+// RUN: %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP
+// RUN: echo running with suppressions:
+// RUN: %env_tsan_opts=suppressions='%s.supp' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP
+
+// REQUIRES: stable-runtime
+
+// Previously the test episodically failed with:
+//   ThreadSanitizer: called_from_lib suppression '/libignore_lib1.so$' is
+//   matched against 2 libraries: '/libignore_lib1.so' and '/libignore_lib1.so'
+// This was caused by non-atomicity of reading of /proc/self/maps.
+
+#ifndef LIB
+
+#include <dlfcn.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <libgen.h>
+#include <string>
+#include "test.h"
+
+#ifndef MAP_32BIT
+# define MAP_32BIT 0
+#endif
+
+#ifdef __APPLE__
+# define TSAN_MAP_ANON MAP_ANON
+#else
+# define TSAN_MAP_ANON MAP_ANONYMOUS
+#endif
+
+void *thr(void *arg) {
+  // This thread creates lots of separate mappings in /proc/self/maps before
+  // the ignored library.
+  for (int i = 0; i < 10000; i++) {
+    if (i == 5000)
+      barrier_wait(&barrier);
+    mmap(0, 4096, PROT_READ, TSAN_MAP_ANON | MAP_PRIVATE | MAP_32BIT, -1 , 0);
+    mmap(0, 4096, PROT_WRITE, TSAN_MAP_ANON | MAP_PRIVATE | MAP_32BIT, -1 , 0);
+  }
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  barrier_init(&barrier, 2);
+  pthread_t th;
+  pthread_create(&th, 0, thr, 0);
+  barrier_wait(&barrier);
+  std::string lib = std::string(dirname(argv[0])) + "/libignore_lib1.so";
+  void *h = dlopen(lib.c_str(), RTLD_GLOBAL | RTLD_NOW);
+  if (h == 0)
+    exit(printf("failed to load the library (%d)\n", errno));
+  void (*f)() = (void(*)())dlsym(h, "libfunc");
+  if (f == 0)
+    exit(printf("failed to find the func (%d)\n", errno));
+  pthread_join(th, 0);
+  f();
+}
+
+#else  // #ifdef LIB
+
+#include "ignore_lib_lib.h"
+
+#endif  // #ifdef LIB
+
+// CHECK-NOSUPP: WARNING: ThreadSanitizer: data race
+// CHECK-NOSUPP: OK
+
+// CHECK-WITHSUPP-NOT: WARNING: ThreadSanitizer: data race
+// CHECK-WITHSUPP: OK
+
diff --git a/test/tsan/ignore_lib5.cc.supp b/test/tsan/ignore_lib5.cc.supp
new file mode 100644
index 0000000..9f4119e
--- /dev/null
+++ b/test/tsan/ignore_lib5.cc.supp
@@ -0,0 +1,2 @@
+called_from_lib:/libignore_lib1.so$
+
diff --git a/test/tsan/java.h b/test/tsan/java.h
index 565a7a7..e9aa4ee 100644
--- a/test/tsan/java.h
+++ b/test/tsan/java.h
@@ -7,6 +7,7 @@
 int  __tsan_java_fini();
 void __tsan_java_alloc(jptr ptr, jptr size);
 void __tsan_java_free(jptr ptr, jptr size);
+jptr __tsan_java_find(jptr *from_ptr, jptr to);
 void __tsan_java_move(jptr src, jptr dst, jptr size);
 void __tsan_java_finalize();
 void __tsan_java_mutex_lock(jptr addr);
diff --git a/test/tsan/libcxx/lit.local.cfg b/test/tsan/libcxx/lit.local.cfg
index 202b44e..3ee7057 100644
--- a/test/tsan/libcxx/lit.local.cfg
+++ b/test/tsan/libcxx/lit.local.cfg
@@ -5,6 +5,8 @@
 
 root = getRoot(config)
 
-if not root.has_libcxx:
+# Only run if we have an instrumented libcxx.  On Darwin, run always (we have
+# interceptors to support the system-provided libcxx).
+if not root.has_libcxx and root.host_os != 'Darwin':
   config.unsupported = True
 
diff --git a/test/tsan/lit.cfg b/test/tsan/lit.cfg
index e0f93c7..5d82cc9 100644
--- a/test/tsan/lit.cfg
+++ b/test/tsan/lit.cfg
@@ -38,13 +38,15 @@
 else:
   extra_cflags = []
 
+tsan_incdir = config.test_source_root + "/../"
 # Setup default compiler flags used with -fsanitize=thread option.
 clang_tsan_cflags = (["-fsanitize=thread",
                       "-Wall"] +
                       [config.target_cflags] +
                       config.debug_info_flags +
-                      extra_cflags)
-clang_tsan_cxxflags = config.cxx_mode_flags + clang_tsan_cflags + ["-std=c++11"]
+                      extra_cflags +
+                      ["-I%s" % tsan_incdir])
+clang_tsan_cxxflags = config.cxx_mode_flags + clang_tsan_cflags + ["-std=c++11"] + ["-I%s" % tsan_incdir]
 # Add additional flags if we're using instrumented libc++.
 # Instrumented libcxx currently not supported on Darwin.
 if config.has_libcxx and config.host_os != 'Darwin':
@@ -68,7 +70,7 @@
 # Define CHECK-%os to check for OS-dependent output.
 config.substitutions.append( ('CHECK-%os', ("CHECK-" + config.host_os)))
 
-config.substitutions.append( ("%deflake ", os.path.join(os.path.dirname(__file__), "deflake.bash")) )
+config.substitutions.append( ("%deflake ", os.path.join(os.path.dirname(__file__), "deflake.bash") + " "))
 
 # Default test suffixes.
 config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm']
diff --git a/test/tsan/map32bit.cc b/test/tsan/map32bit.cc
index cec91a4..3b4f899 100644
--- a/test/tsan/map32bit.cc
+++ b/test/tsan/map32bit.cc
@@ -8,7 +8,7 @@
 // https://github.com/google/sanitizers/issues/412
 
 // MAP_32BIT flag for mmap is supported only for x86_64.
-// XFAIL: target-is-mips64
+// XFAIL: mips
 // XFAIL: aarch64
 // XFAIL: powerpc64
 
diff --git a/test/tsan/pie_test.cc b/test/tsan/pie_test.cc
index 8635f9c..93d31da 100644
--- a/test/tsan/pie_test.cc
+++ b/test/tsan/pie_test.cc
@@ -1,12 +1,6 @@
 // Check if tsan work with PIE binaries.
 // RUN: %clang_tsan %s -pie -fpic -o %t && %run %t
 
-// Some kernels might map PIE segments outside the current segment
-// mapping defined for x86 [1].
-// [1] https://git.kernel.org/linus/d1fd836dcf00d2028c700c7e44d2c23404062c90
-
-// UNSUPPORTED: x86
-
 int main(void) {
   return 0;
 }
diff --git a/test/tsan/signal_cond.cc b/test/tsan/signal_cond.cc
index beb2e02..6c20dd8 100644
--- a/test/tsan/signal_cond.cc
+++ b/test/tsan/signal_cond.cc
@@ -14,7 +14,7 @@
 
 void sig_handler(int sig) {
   (void)sig;
-  write(1, "SIGNAL\n", sizeof("SIGNAL\n") - 1);
+  write(2, "SIGNAL\n", sizeof("SIGNAL\n") - 1);
   barrier_wait(&barrier);
 }
 
diff --git a/test/tsan/simple_stack.c b/test/tsan/simple_stack.c
index 6ef92fb..71a3911 100644
--- a/test/tsan/simple_stack.c
+++ b/test/tsan/simple_stack.c
@@ -1,4 +1,3 @@
-// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include "test.h"
 
 int Global;
@@ -47,20 +46,40 @@
   return 0;
 }
 
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+
+// Also check that functions instrumentation can be configured by either driver
+// or legacy flags:
+
+// RUN: %clangxx_tsan -O1 %s -o %t -fno-sanitize-thread-func-entry-exit && %deflake %run %t 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FUNC-ENTRY-EXIT-OFF %s
+// RUN: %clangxx_tsan -O1 %s -o %t -mllvm -tsan-instrument-func-entry-exit=0 && %deflake %run %t 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FUNC-ENTRY-EXIT-OFF %s
+
 // CHECK:      WARNING: ThreadSanitizer: data race
 // CHECK-NEXT:   Write of size 4 at {{.*}} by thread T1:
-// CHECK-NEXT:     #0 foo1{{.*}} {{.*}}simple_stack.c:7{{(:10)?}} ({{.*}})
-// CHECK-NEXT:     #1 bar1{{.*}} {{.*}}simple_stack.c:12{{(:3)?}} ({{.*}})
-// CHECK-NEXT:     #2 Thread1{{.*}} {{.*}}simple_stack.c:26{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #0 foo1{{.*}} {{.*}}simple_stack.c:6{{(:10)?}} ({{.*}})
+// CHECK-NEXT:     #1 bar1{{.*}} {{.*}}simple_stack.c:11{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #2 Thread1{{.*}} {{.*}}simple_stack.c:25{{(:3)?}} ({{.*}})
 // CHECK:        Previous read of size 4 at {{.*}} by thread T2:
-// CHECK-NEXT:     #0 foo2{{.*}} {{.*}}simple_stack.c:16{{(:20)?}} ({{.*}})
-// CHECK-NEXT:     #1 bar2{{.*}} {{.*}}simple_stack.c:21{{(:3)?}} ({{.*}})
-// CHECK-NEXT:     #2 Thread2{{.*}} {{.*}}simple_stack.c:31{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #0 foo2{{.*}} {{.*}}simple_stack.c:15{{(:20)?}} ({{.*}})
+// CHECK-NEXT:     #1 bar2{{.*}} {{.*}}simple_stack.c:20{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #2 Thread2{{.*}} {{.*}}simple_stack.c:30{{(:3)?}} ({{.*}})
 // CHECK:        Thread T1 (tid={{.*}}, running) created by main thread at:
 // CHECK-NEXT:     #0 pthread_create {{.*}} ({{.*}})
-// CHECK-NEXT:     #1 StartThread{{.*}} {{.*}}simple_stack.c:37{{(:3)?}} ({{.*}})
-// CHECK-NEXT:     #2 main{{.*}} {{.*}}simple_stack.c:43{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #1 StartThread{{.*}} {{.*}}simple_stack.c:36{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #2 main{{.*}} {{.*}}simple_stack.c:42{{(:3)?}} ({{.*}})
 // CHECK:        Thread T2 ({{.*}}) created by main thread at:
 // CHECK-NEXT:     #0 pthread_create {{.*}} ({{.*}})
-// CHECK-NEXT:     #1 StartThread{{.*}} {{.*}}simple_stack.c:37{{(:3)?}} ({{.*}})
-// CHECK-NEXT:     #2 main{{.*}} {{.*}}simple_stack.c:44{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #1 StartThread{{.*}} {{.*}}simple_stack.c:36{{(:3)?}} ({{.*}})
+// CHECK-NEXT:     #2 main{{.*}} {{.*}}simple_stack.c:43{{(:3)?}} ({{.*}})
+
+// CHECK-FUNC-ENTRY-EXIT-OFF:      WARNING: ThreadSanitizer: data race
+// CHECK-FUNC-ENTRY-EXIT-OFF-NEXT:   Write of size 4 at {{.*}} by thread T1:
+// CHECK-FUNC-ENTRY-EXIT-OFF-NEXT:     #0 foo1{{.*}} {{.*}}simple_stack.c:6{{(:10)?}} ({{.*}})
+// CHECK-FUNC-ENTRY-EXIT-OFF:        Previous read of size 4 at {{.*}} by thread T2:
+// CHECK-FUNC-ENTRY-EXIT-OFF-NEXT:     #0 foo2{{.*}} {{.*}}simple_stack.c:15{{(:20)?}} ({{.*}})
+// CHECK-FUNC-ENTRY-EXIT-OFF:        Thread T1 (tid={{.*}}, running) created by main thread at:
+// CHECK-FUNC-ENTRY-EXIT-OFF-NEXT:     #0 pthread_create {{.*}} ({{.*}})
+// CHECK-FUNC-ENTRY-EXIT-OFF:        Thread T2 ({{.*}}) created by main thread at:
+// CHECK-FUNC-ENTRY-EXIT-OFF-NEXT:     #0 pthread_create {{.*}} ({{.*}})
diff --git a/test/tsan/test.h b/test/tsan/test.h
index 85fffec..6b981c0 100644
--- a/test/tsan/test.h
+++ b/test/tsan/test.h
@@ -6,6 +6,7 @@
 #include <stddef.h>
 #include <sched.h>
 #include <stdarg.h>
+#include "sanitizer_common/print_address.h"
 
 #ifdef __APPLE__
 #include <mach/mach_time.h>
@@ -38,23 +39,6 @@
 // Default instance of the barrier, but a test can declare more manually.
 invisible_barrier_t barrier;
 
-void print_address(const char *str, int n, ...) {
-  fprintf(stderr, "%s", str);
-  va_list ap;
-  va_start(ap, n);
-  while (n--) {
-    void *p = va_arg(ap, void *);
-#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__)
-    // On FreeBSD, the %p conversion specifier works as 0x%x and thus does not
-    // match to the format used in the diagnotic message.
-    fprintf(stderr, "0x%012lx ", (unsigned long) p);
-#elif defined(__mips64)
-    fprintf(stderr, "0x%010lx ", (unsigned long) p);
-#endif
-  }
-  fprintf(stderr, "\n");
-}
-
 #ifdef __APPLE__
 unsigned long long monotonic_clock_ns() {
   static mach_timebase_info_data_t timebase_info;
diff --git a/test/ubsan/TestCases/TypeCheck/null.cpp b/test/ubsan/TestCases/TypeCheck/null.cpp
index b1cba83..636fab8 100644
--- a/test/ubsan/TestCases/TypeCheck/null.cpp
+++ b/test/ubsan/TestCases/TypeCheck/null.cpp
@@ -1,20 +1,34 @@
-// RUN: %clangxx -fsanitize=null %s -O3 -o %t
-// RUN: %run %t l 2>&1 | FileCheck %s --check-prefix=CHECK-LOAD
-// RUN: %expect_crash %run %t s 2>&1 | FileCheck %s --check-prefix=CHECK-STORE
-// RUN: %run %t r 2>&1 | FileCheck %s --check-prefix=CHECK-REFERENCE
-// RUN: %run %t m 2>&1 | FileCheck %s --check-prefix=CHECK-MEMBER
-// RUN: %run %t f 2>&1 | FileCheck %s --check-prefix=CHECK-MEMFUN
+// RUN: %clangxx -fsanitize=null -fno-sanitize-recover=null %s -O3 -o %t
+// RUN: not %run %t l 2>&1 | FileCheck %s --check-prefix=CHECK-LOAD
+// RUN: not %run %t s 2>&1 | FileCheck %s --check-prefix=CHECK-STORE
+// RUN: not %run %t r 2>&1 | FileCheck %s --check-prefix=CHECK-REFERENCE
+// RUN: not %run %t m 2>&1 | FileCheck %s --check-prefix=CHECK-MEMBER
+// RUN: not %run %t f 2>&1 | FileCheck %s --check-prefix=CHECK-MEMFUN
+// RUN: not %run %t t 2>&1 | FileCheck %s --check-prefix=CHECK-VCALL
+// RUN: not %run %t u 2>&1 | FileCheck %s --check-prefix=CHECK-VCALL2
 
 struct S {
   int f() { return 0; }
   int k;
 };
 
+struct T {
+  virtual int v() { return 1; }
+};
+
+struct U : T {
+  virtual int v() { return 2; }
+};
+
 int main(int, char **argv) {
   int *p = 0;
   S *s = 0;
+  T *t = 0;
+  U *u = 0;
 
   (void)*p; // ok!
+  (void)*t; // ok!
+  (void)*u; // ok!
 
   switch (argv[1][0]) {
   case 'l':
@@ -34,5 +48,11 @@
   case 'f':
     // CHECK-MEMFUN: null.cpp:[[@LINE+1]]:15: runtime error: member call on null pointer of type 'S'
     return s->f();
+  case 't':
+    // CHECK-VCALL: null.cpp:[[@LINE+1]]:15: runtime error: member call on null pointer of type 'T'
+    return t->v();
+  case 'u':
+    // CHECK-VCALL2: null.cpp:[[@LINE+1]]:15: runtime error: member call on null pointer of type 'U'
+    return u->v();
   }
 }
diff --git a/test/ubsan/TestCases/TypeCheck/vptr.cpp b/test/ubsan/TestCases/TypeCheck/vptr.cpp
index 86b646d..53a79c9 100644
--- a/test/ubsan/TestCases/TypeCheck/vptr.cpp
+++ b/test/ubsan/TestCases/TypeCheck/vptr.cpp
@@ -1,4 +1,4 @@
-// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -g %s -O3 -o %t
+// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -g %s -O3 -o %t -mllvm -enable-tail-merge=false
 // RUN: %run %t rT && %run %t mT && %run %t fT && %run %t cT
 // RUN: %run %t rU && %run %t mU && %run %t fU && %run %t cU
 // RUN: %run %t rS && %run %t rV && %run %t oV
@@ -50,6 +50,8 @@
 // Make p global so that lsan does not complain.
 T *p = 0;
 
+volatile void *sink1, *sink2;
+
 int access_p(T *p, char type);
 
 int main(int argc, char **argv) {
@@ -74,6 +76,11 @@
 
   char Buffer[sizeof(U)] = {};
   char TStorage[sizeof(T)];
+  // Allocate two dummy objects so that the real object
+  // is not on the boundary of mapped memory. Otherwise ubsan
+  // will not be able to describe the vptr in detail.
+  sink1 = new T;
+  sink2 = new U;
   switch (argv[1][1]) {
   case '0':
     p = reinterpret_cast<T*>(Buffer);
diff --git a/test/xray/CMakeLists.txt b/test/xray/CMakeLists.txt
index 806de42..50f6926 100644
--- a/test/xray/CMakeLists.txt
+++ b/test/xray/CMakeLists.txt
@@ -1,29 +1,48 @@
 set(XRAY_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 set(XRAY_TESTSUITES)
+set(XRAY_FDR_TESTSUITES)
 
 set(XRAY_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
+set(XRAY_FDR_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
 
 if(NOT COMPILER_RT_STANDALONE_BUILD AND COMPILER_RT_BUILD_XRAY AND
    COMPILER_RT_HAS_XRAY)
   list(APPEND XRAY_TEST_DEPS xray)
+  list(APPEND XRAY_FDR_TEST_DEPS xray-fdr)
 endif()
 
 set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH})
-foreach(arch ${XRAY_TEST_ARCH})
-  set(XRAY_TEST_TARGET_ARCH ${arch})
-  string(TOLOWER "-${arch}-${OS_NAME}" XRAY_TEST_CONFIG_SUFFIX)
-  get_test_cc_for_arch(${arch} XRAY_TEST_TARGET_CC XRAY_TEST_TARGET_CFLAGS)
-  string(TOUPPER ${arch} ARCH_UPPER_CASE)
-  set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config)
+if (COMPILER_RT_BUILD_XRAY AND COMPILER_RT_HAS_XRAY)
+  foreach(arch ${XRAY_TEST_ARCH})
+    set(XRAY_TEST_TARGET_ARCH ${arch})
+    string(TOLOWER "-${arch}-${OS_NAME}" XRAY_TEST_CONFIG_SUFFIX)
+    get_test_cc_for_arch(${arch} XRAY_TEST_TARGET_CC XRAY_TEST_TARGET_CFLAGS)
+    string(TOUPPER ${arch} ARCH_UPPER_CASE)
+    set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config)
 
-  configure_lit_site_cfg(
-    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg)
-  list(APPEND XRAY_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
-endforeach()
+    configure_lit_site_cfg(
+      ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+      ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg)
+    list(APPEND XRAY_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+  endforeach()
+
+  # Add unit tests.
+  if(COMPILER_RT_INCLUDE_TESTS)
+    configure_lit_site_cfg(
+      ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
+      ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg)
+    list(APPEND XRAY_TEST_DEPS XRayUnitTests)
+    list(APPEND XRAY_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit)
+  endif()
+endif()
 
 add_lit_testsuite(check-xray "Running the XRay tests"
   ${XRAY_TESTSUITES}
-	DEPENDS ${XRAY_TEST_DEPS})
+  DEPENDS ${XRAY_TEST_DEPS})
 set_target_properties(check-xray PROPERTIES FOLDER "Compiler-RT Misc")
+
+add_lit_testsuite(check-xray-fdr "Running the XRay flight data recorder tests"
+  ${XRAY_FDR_TESTSUITES}
+  DEPENDS ${XRAY_FDR_TEST_DEPS})
+set_target_properties(check-xray-fdr PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/xray/TestCases/Linux/argv0-log-file-name.cc b/test/xray/TestCases/Linux/argv0-log-file-name.cc
new file mode 100644
index 0000000..1765ce9
--- /dev/null
+++ b/test/xray/TestCases/Linux/argv0-log-file-name.cc
@@ -0,0 +1,14 @@
+// Check to make sure argv[0] is contained within the (randomised) XRay log file
+// name.
+
+// RUN: %clangxx_xray -std=c++11 %s -o %t
+// RUN: %run %t > xray.log.file.name 2>&1
+// RUN: ls | FileCheck xray.log.file.name
+// RUN: rm xray-log.* xray.log.file.name
+
+#include <cstdio>
+#include <libgen.h>
+
+[[clang::xray_always_instrument]] int main(int argc, char *argv[]) {
+  printf("// CHECK: xray-log.%s.{{.*}}\n", basename(argv[0]));
+}
diff --git a/test/xray/TestCases/Linux/fixedsize-logging.cc b/test/xray/TestCases/Linux/fixedsize-logging.cc
new file mode 100644
index 0000000..90e7668
--- /dev/null
+++ b/test/xray/TestCases/Linux/fixedsize-logging.cc
@@ -0,0 +1,20 @@
+// Check to make sure that we have a log file with a fixed-size.
+
+// RUN: %clangxx_xray -std=c++11 %s -o %t
+// RUN: XRAY_OPTIONS="verbosity=1 xray_logfile_base=fixedsize-logging-" %run %t 2>&1 | FileCheck %s
+//
+// After all that, clean up the output xray log.
+//
+// RUN: rm fixedsize-logging-*
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void foo() {
+  printf("foo() is always instrumented!");
+}
+
+int main() {
+  // CHECK: XRay: Log file in 'fixedsize-logging-{{.*}}'
+  foo();
+  // CHECK: foo() is always instrumented!
+}
diff --git a/test/xray/TestCases/Linux/optional-inmemory-log.cc b/test/xray/TestCases/Linux/optional-inmemory-log.cc
new file mode 100644
index 0000000..ef2c43f
--- /dev/null
+++ b/test/xray/TestCases/Linux/optional-inmemory-log.cc
@@ -0,0 +1,21 @@
+// Make sure that we don't get the inmemory logging implementation enabled when
+// we turn it off via options.
+
+// RUN: %clangxx_xray -std=c++11 %s -o %t
+// RUN: XRAY_OPTIONS="verbosity=1 xray_naive_log=false xray_logfile_base=optional-inmemory-log.xray-" %run %t 2>&1 | FileCheck %s
+//
+// Make sure we clean out the logs in case there was a bug.
+//
+// RUN: rm -f optional-inmemory-log.xray-*
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void foo() {
+  printf("foo() is always instrumented!");
+}
+
+int main() {
+  // CHECK-NOT: XRay: Log file in 'optional-inmemory-log.xray-{{.*}}'
+  foo();
+  // CHECK: foo() is always instrumented!
+}
diff --git a/test/xray/Unit/lit.site.cfg.in b/test/xray/Unit/lit.site.cfg.in
new file mode 100644
index 0000000..1ebc7b9
--- /dev/null
+++ b/test/xray/Unit/lit.site.cfg.in
@@ -0,0 +1,12 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import os
+
+# Load common config for all compiler-rt unit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/unittests/lit.common.unit.configured")
+
+# Setup config name.
+config.name = 'XRay-Unit'
+
+config.test_exec_root = "@COMPILER_RT_BINARY_DIR@/lib/xray/tests"
+config.test_source_root = config.test_exec_root
diff --git a/test/xray/lit.cfg b/test/xray/lit.cfg
index 04e21f1..5d030e1 100644
--- a/test/xray/lit.cfg
+++ b/test/xray/lit.cfg
@@ -32,3 +32,8 @@
 
 if config.host_os not in ['Linux'] or config.host_arch.find('64') == -1:
   config.unsupported = True
+
+# Allow tests to use REQUIRES=stable-runtime.  For use when you cannot use XFAIL
+# e.g. because the test sometimes passes, sometimes fails.
+if config.target_arch != 'aarch64':
+  config.available_features.add('stable-runtime')