Merge remote-tracking branch 'origin/apple/stable/20190619' into stable
diff --git a/include/llvm/Transforms/IPO/HotColdSplitting.h b/include/llvm/Transforms/IPO/HotColdSplitting.h
index 7366884..8c3049f 100644
--- a/include/llvm/Transforms/IPO/HotColdSplitting.h
+++ b/include/llvm/Transforms/IPO/HotColdSplitting.h
@@ -17,6 +17,45 @@
 namespace llvm {
 
 class Module;
+class ProfileSummaryInfo;
+class BlockFrequencyInfo;
+class TargetTransformInfo;
+class OptimizationRemarkEmitter;
+class AssumptionCache;
+class DominatorTree;
+class CodeExtractorAnalysisCache;
+
+/// A sequence of basic blocks.
+///
+/// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
+using BlockSequence = SmallVector<BasicBlock *, 0>;
+
+class HotColdSplitting {
+public:
+  HotColdSplitting(ProfileSummaryInfo *ProfSI,
+                   function_ref<BlockFrequencyInfo *(Function &)> GBFI,
+                   function_ref<TargetTransformInfo &(Function &)> GTTI,
+                   std::function<OptimizationRemarkEmitter &(Function &)> *GORE,
+                   function_ref<AssumptionCache *(Function &)> LAC)
+      : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {}
+  bool run(Module &M);
+
+private:
+  bool isFunctionCold(const Function &F) const;
+  bool shouldOutlineFrom(const Function &F) const;
+  bool outlineColdRegions(Function &F, bool HasProfileSummary);
+  Function *extractColdRegion(const BlockSequence &Region,
+                              const CodeExtractorAnalysisCache &CEAC,
+                              DominatorTree &DT, BlockFrequencyInfo *BFI,
+                              TargetTransformInfo &TTI,
+                              OptimizationRemarkEmitter &ORE,
+                              AssumptionCache *AC, unsigned Count);
+  ProfileSummaryInfo *PSI;
+  function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
+  function_ref<TargetTransformInfo &(Function &)> GetTTI;
+  std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
+  function_ref<AssumptionCache *(Function &)> LookupAC;
+};
 
 /// Pass to outline cold regions.
 class HotColdSplittingPass : public PassInfoMixin<HotColdSplittingPass> {
diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h
index becbf0e..8a1ab79 100644
--- a/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -22,6 +22,7 @@
 
 namespace llvm {
 
+class AllocaInst;
 class BasicBlock;
 class BlockFrequency;
 class BlockFrequencyInfo;
@@ -36,6 +37,38 @@
 class Type;
 class Value;
 
+/// A cache for the CodeExtractor analysis. The operation \ref
+/// CodeExtractor::extractCodeRegion is guaranteed not to invalidate this
+/// object. This object should conservatively be considered invalid if any
+/// other mutating operations on the IR occur.
+///
+/// Constructing this object is O(n) in the size of the function.
+class CodeExtractorAnalysisCache {
+  /// The allocas in the function.
+  SmallVector<AllocaInst *, 16> Allocas;
+
+  /// Base memory addresses of load/store instructions, grouped by block.
+  DenseMap<BasicBlock *, DenseSet<Value *>> BaseMemAddrs;
+
+  /// Blocks which contain instructions which may have unknown side-effects
+  /// on memory.
+  DenseSet<BasicBlock *> SideEffectingBlocks;
+
+  void findSideEffectInfoForBlock(BasicBlock &BB);
+
+public:
+  CodeExtractorAnalysisCache(Function &F);
+
+  /// Get the allocas in the function at the time the analysis was created.
+  /// Note that some of these allocas may no longer be present in the function,
+  /// due to \ref CodeExtractor::extractCodeRegion.
+  ArrayRef<AllocaInst *> getAllocas() const { return Allocas; }
+
+  /// Check whether \p BB contains an instruction thought to load from, store
+  /// to, or otherwise clobber the alloca \p Addr.
+  bool doesBlockContainClobberOfAddr(BasicBlock &BB, AllocaInst *Addr) const;
+};
+
   /// Utility class for extracting code into a new function.
   ///
   /// This utility provides a simple interface for extracting some sequence of
@@ -104,13 +137,21 @@
     ///
     /// Returns zero when called on a CodeExtractor instance where isEligible
     /// returns false.
-    Function *extractCodeRegion();
+    Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC);
+
+    /// Verify that assumption cache isn't stale after a region is extracted.
+    /// Returns false when verifier finds errors. AssumptionCache is passed as
+    /// parameter to make this function stateless.
+    static bool verifyAssumptionCache(const Function& F, AssumptionCache *AC);
 
     /// Test whether this code extractor is eligible.
     ///
     /// Based on the blocks used when constructing the code extractor,
     /// determine whether it is eligible for extraction.
-    bool isEligible() const { return !Blocks.empty(); }
+    /// 
+    /// Checks that varargs handling (with vastart and vaend) is only done in
+    /// the outlined blocks.
+    bool isEligible() const;
 
     /// Compute the set of input values and output values for the code.
     ///
@@ -127,7 +168,9 @@
     /// region.
     ///
     /// Returns true if it is safe to do the code motion.
-    bool isLegalToShrinkwrapLifetimeMarkers(Instruction *AllocaAddr) const;
+    bool
+    isLegalToShrinkwrapLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
+                                       Instruction *AllocaAddr) const;
 
     /// Find the set of allocas whose life ranges are contained within the
     /// outlined region.
@@ -137,7 +180,8 @@
     /// are used by the lifetime markers are also candidates for shrink-
     /// wrapping. The instructions that need to be sunk are collected in
     /// 'Allocas'.
-    void findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
+    void findAllocas(const CodeExtractorAnalysisCache &CEAC,
+                     ValueSet &SinkCands, ValueSet &HoistCands,
                      BasicBlock *&ExitBlock) const;
 
     /// Find or create a block within the outline region for placing hoisted
@@ -151,6 +195,17 @@
     BasicBlock *findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock);
 
   private:
+    struct LifetimeMarkerInfo {
+      bool SinkLifeStart = false;
+      bool HoistLifeEnd = false;
+      Instruction *LifeStart = nullptr;
+      Instruction *LifeEnd = nullptr;
+    };
+
+    LifetimeMarkerInfo
+    getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
+                       Instruction *Addr, BasicBlock *ExitBlock) const;
+
     void severSplitPHINodesOfEntry(BasicBlock *&Header);
     void severSplitPHINodesOfExits(const SmallPtrSetImpl<BasicBlock *> &Exits);
     void splitReturnBlocks();
diff --git a/lib/Remarks/YAMLRemarkSerializer.cpp b/lib/Remarks/YAMLRemarkSerializer.cpp
index 14de44f..63c72d8 100644
--- a/lib/Remarks/YAMLRemarkSerializer.cpp
+++ b/lib/Remarks/YAMLRemarkSerializer.cpp
@@ -100,7 +100,7 @@
 /// newlines in strings.
 struct StringBlockVal {
   StringRef Value;
-  StringBlockVal(const std::string &Value) : Value(Value) {}
+  StringBlockVal(StringRef R) : Value(R) {}
 };
 
 template <> struct BlockScalarTraits<StringBlockVal> {
diff --git a/lib/Transforms/IPO/BlockExtractor.cpp b/lib/Transforms/IPO/BlockExtractor.cpp
index 6c365f3..94ced29 100644
--- a/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/lib/Transforms/IPO/BlockExtractor.cpp
@@ -204,7 +204,8 @@
       ++NumExtracted;
       Changed = true;
     }
-    Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion();
+    CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent());
+    Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC);
     if (F)
       LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
                         << "' in: " << F->getName() << '\n');
diff --git a/lib/Transforms/IPO/HotColdSplitting.cpp b/lib/Transforms/IPO/HotColdSplitting.cpp
index 1ba4ea9..83bc390 100644
--- a/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -90,12 +90,6 @@
     cl::desc("Maximum number of parameters for a split function"));
 
 namespace {
-
-/// A sequence of basic blocks.
-///
-/// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
-using BlockSequence = SmallVector<BasicBlock *, 0>;
-
 // Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
 // this function unless you modify the MBB version as well.
 //
@@ -174,31 +168,6 @@
   return Changed;
 }
 
-class HotColdSplitting {
-public:
-  HotColdSplitting(ProfileSummaryInfo *ProfSI,
-                   function_ref<BlockFrequencyInfo *(Function &)> GBFI,
-                   function_ref<TargetTransformInfo &(Function &)> GTTI,
-                   std::function<OptimizationRemarkEmitter &(Function &)> *GORE,
-                   function_ref<AssumptionCache *(Function &)> LAC)
-      : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {}
-  bool run(Module &M);
-
-private:
-  bool isFunctionCold(const Function &F) const;
-  bool shouldOutlineFrom(const Function &F) const;
-  bool outlineColdRegions(Function &F, bool HasProfileSummary);
-  Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
-                              BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
-                              OptimizationRemarkEmitter &ORE,
-                              AssumptionCache *AC, unsigned Count);
-  ProfileSummaryInfo *PSI;
-  function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
-  function_ref<TargetTransformInfo &(Function &)> GetTTI;
-  std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
-  function_ref<AssumptionCache *(Function &)> LookupAC;
-};
-
 class HotColdSplittingLegacyPass : public ModulePass {
 public:
   static char ID;
@@ -356,13 +325,10 @@
   return Penalty;
 }
 
-Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
-                                              DominatorTree &DT,
-                                              BlockFrequencyInfo *BFI,
-                                              TargetTransformInfo &TTI,
-                                              OptimizationRemarkEmitter &ORE,
-                                              AssumptionCache *AC,
-                                              unsigned Count) {
+Function *HotColdSplitting::extractColdRegion(
+    const BlockSequence &Region, const CodeExtractorAnalysisCache &CEAC,
+    DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
+    OptimizationRemarkEmitter &ORE, AssumptionCache *AC, unsigned Count) {
   assert(!Region.empty());
 
   // TODO: Pass BFI and BPI to update profile information.
@@ -384,7 +350,7 @@
     return nullptr;
 
   Function *OrigF = Region[0]->getParent();
-  if (Function *OutF = CE.extractCodeRegion()) {
+  if (Function *OutF = CE.extractCodeRegion(CEAC)) {
     User *U = *OutF->user_begin();
     CallInst *CI = cast<CallInst>(U);
     CallSite CS(CI);
@@ -672,9 +638,14 @@
     }
   }
 
+  if (OutliningWorklist.empty())
+    return Changed;
+
   // Outline single-entry cold regions, splitting up larger regions as needed.
   unsigned OutlinedFunctionID = 1;
-  while (!OutliningWorklist.empty()) {
+  // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
+  CodeExtractorAnalysisCache CEAC(F);
+  do {
     OutliningRegion Region = OutliningWorklist.pop_back_val();
     assert(!Region.empty() && "Empty outlining region in worklist");
     do {
@@ -685,14 +656,14 @@
           BB->dump();
       });
 
-      Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC,
-                                             OutlinedFunctionID);
+      Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI, TTI,
+                                             ORE, AC, OutlinedFunctionID);
       if (Outlined) {
         ++OutlinedFunctionID;
         Changed = true;
       }
     } while (!Region.empty());
-  }
+  } while (!OutliningWorklist.empty());
 
   return Changed;
 }
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 91c7b5f..add2ae0 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -141,10 +141,12 @@
     if (NumLoops == 0) return Changed;
     --NumLoops;
     AssumptionCache *AC = nullptr;
+    Function &Func = *L->getHeader()->getParent();
     if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
-      AC = ACT->lookupAssumptionCache(*L->getHeader()->getParent());
+      AC = ACT->lookupAssumptionCache(Func);
+    CodeExtractorAnalysisCache CEAC(Func);
     CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC);
-    if (Extractor.extractCodeRegion() != nullptr) {
+    if (Extractor.extractCodeRegion(CEAC) != nullptr) {
       Changed = true;
       // After extraction, the loop is replaced by a function call, so
       // we shouldn't try to run any more loop passes on it.
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 733782e..bcbd0f7 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -1122,6 +1122,9 @@
   BranchProbabilityInfo BPI(*ClonedFunc, LI);
   ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
 
+  // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
+  CodeExtractorAnalysisCache CEAC(*ClonedFunc);
+
   SetVector<Value *> Inputs, Outputs, Sinks;
   for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
        ClonedOMRI->ORI) {
@@ -1148,7 +1151,7 @@
     if (Outputs.size() > 0 && !ForceLiveExit)
       continue;
 
-    Function *OutlinedFunc = CE.extractCodeRegion();
+    Function *OutlinedFunc = CE.extractCodeRegion(CEAC);
 
     if (OutlinedFunc) {
       CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
@@ -1210,11 +1213,12 @@
     }
 
   // Extract the body of the if.
+  CodeExtractorAnalysisCache CEAC(*ClonedFunc);
   Function *OutlinedFunc =
       CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
                     ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
                     /* AllowVarargs */ true)
-          .extractCodeRegion();
+          .extractCodeRegion(CEAC);
 
   if (OutlinedFunc) {
     BasicBlock *OutliningCallBB =
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 61c1e93..48beecd 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -293,10 +293,8 @@
         CommonExitBlock = Succ;
         continue;
       }
-      if (CommonExitBlock == Succ)
-        continue;
-
-      return true;
+      if (CommonExitBlock != Succ)
+        return true;
     }
     return false;
   };
@@ -307,52 +305,79 @@
   return CommonExitBlock;
 }
 
+CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function &F) {
+  for (BasicBlock &BB : F) {
+    for (Instruction &II : BB.instructionsWithoutDebug())
+      if (auto *AI = dyn_cast<AllocaInst>(&II))
+        Allocas.push_back(AI);
+
+    findSideEffectInfoForBlock(BB);
+  }
+}
+
+void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) {
+  for (Instruction &II : BB.instructionsWithoutDebug()) {
+    unsigned Opcode = II.getOpcode();
+    Value *MemAddr = nullptr;
+    switch (Opcode) {
+    case Instruction::Store:
+    case Instruction::Load: {
+      if (Opcode == Instruction::Store) {
+        StoreInst *SI = cast<StoreInst>(&II);
+        MemAddr = SI->getPointerOperand();
+      } else {
+        LoadInst *LI = cast<LoadInst>(&II);
+        MemAddr = LI->getPointerOperand();
+      }
+      // Global variable can not be aliased with locals.
+      if (dyn_cast<Constant>(MemAddr))
+        break;
+      Value *Base = MemAddr->stripInBoundsConstantOffsets();
+      if (!isa<AllocaInst>(Base)) {
+        SideEffectingBlocks.insert(&BB);
+        return;
+      }
+      BaseMemAddrs[&BB].insert(Base);
+      break;
+    }
+    default: {
+      IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
+      if (IntrInst) {
+        if (IntrInst->isLifetimeStartOrEnd())
+          break;
+        SideEffectingBlocks.insert(&BB);
+        return;
+      }
+      // Treat all the other cases conservatively if it has side effects.
+      if (II.mayHaveSideEffects()) {
+        SideEffectingBlocks.insert(&BB);
+        return;
+      }
+    }
+    }
+  }
+}
+
+bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr(
+    BasicBlock &BB, AllocaInst *Addr) const {
+  if (SideEffectingBlocks.count(&BB))
+    return true;
+  auto It = BaseMemAddrs.find(&BB);
+  if (It != BaseMemAddrs.end())
+    return It->second.count(Addr);
+  return false;
+}
+
 bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
-    Instruction *Addr) const {
+    const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const {
   AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
   Function *Func = (*Blocks.begin())->getParent();
   for (BasicBlock &BB : *Func) {
     if (Blocks.count(&BB))
       continue;
-    for (Instruction &II : BB) {
-      if (isa<DbgInfoIntrinsic>(II))
-        continue;
-
-      unsigned Opcode = II.getOpcode();
-      Value *MemAddr = nullptr;
-      switch (Opcode) {
-      case Instruction::Store:
-      case Instruction::Load: {
-        if (Opcode == Instruction::Store) {
-          StoreInst *SI = cast<StoreInst>(&II);
-          MemAddr = SI->getPointerOperand();
-        } else {
-          LoadInst *LI = cast<LoadInst>(&II);
-          MemAddr = LI->getPointerOperand();
-        }
-        // Global variable can not be aliased with locals.
-        if (dyn_cast<Constant>(MemAddr))
-          break;
-        Value *Base = MemAddr->stripInBoundsConstantOffsets();
-        if (!isa<AllocaInst>(Base) || Base == AI)
-          return false;
-        break;
-      }
-      default: {
-        IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
-        if (IntrInst) {
-          if (IntrInst->isLifetimeStartOrEnd())
-            break;
-          return false;
-        }
-        // Treat all the other cases conservatively if it has side effects.
-        if (II.mayHaveSideEffects())
-          return false;
-      }
-      }
-    }
+    if (CEAC.doesBlockContainClobberOfAddr(BB, AI))
+      return false;
   }
-
   return true;
 }
 
@@ -410,122 +435,174 @@
   return CommonExitBlock;
 }
 
-void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
+// Find the pair of life time markers for address 'Addr' that are either
+// defined inside the outline region or can legally be shrinkwrapped into the
+// outline region. If there are not other untracked uses of the address, return
+// the pair of markers if found; otherwise return a pair of nullptr.
+CodeExtractor::LifetimeMarkerInfo
+CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
+                                  Instruction *Addr,
+                                  BasicBlock *ExitBlock) const {
+  LifetimeMarkerInfo Info;
+
+  for (User *U : Addr->users()) {
+    IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
+    if (IntrInst) {
+      if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
+        // Do not handle the case where Addr has multiple start markers.
+        if (Info.LifeStart)
+          return {};
+        Info.LifeStart = IntrInst;
+      }
+      if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
+        if (Info.LifeEnd)
+          return {};
+        Info.LifeEnd = IntrInst;
+      }
+      continue;
+    }
+    // Find untracked uses of the address, bail.
+    if (!definedInRegion(Blocks, U))
+      return {};
+  }
+
+  if (!Info.LifeStart || !Info.LifeEnd)
+    return {};
+
+  Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart);
+  Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd);
+  // Do legality check.
+  if ((Info.SinkLifeStart || Info.HoistLifeEnd) &&
+      !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr))
+    return {};
+
+  // Check to see if we have a place to do hoisting, if not, bail.
+  if (Info.HoistLifeEnd && !ExitBlock)
+    return {};
+
+  return Info;
+}
+
+void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC,
+                                ValueSet &SinkCands, ValueSet &HoistCands,
                                 BasicBlock *&ExitBlock) const {
   Function *Func = (*Blocks.begin())->getParent();
   ExitBlock = getCommonExitBlock(Blocks);
 
-  for (BasicBlock &BB : *Func) {
-    if (Blocks.count(&BB))
+  auto moveOrIgnoreLifetimeMarkers =
+      [&](const LifetimeMarkerInfo &LMI) -> bool {
+    if (!LMI.LifeStart)
+      return false;
+    if (LMI.SinkLifeStart) {
+      LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart
+                        << "\n");
+      SinkCands.insert(LMI.LifeStart);
+    }
+    if (LMI.HoistLifeEnd) {
+      LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n");
+      HoistCands.insert(LMI.LifeEnd);
+    }
+    return true;
+  };
+
+  // Look up allocas in the original function in CodeExtractorAnalysisCache, as
+  // this is much faster than walking all the instructions.
+  for (AllocaInst *AI : CEAC.getAllocas()) {
+    BasicBlock *BB = AI->getParent();
+    if (Blocks.count(BB))
       continue;
-    for (Instruction &II : BB) {
-      auto *AI = dyn_cast<AllocaInst>(&II);
-      if (!AI)
-        continue;
 
-      // Find the pair of life time markers for address 'Addr' that are either
-      // defined inside the outline region or can legally be shrinkwrapped into
-      // the outline region. If there are not other untracked uses of the
-      // address, return the pair of markers if found; otherwise return a pair
-      // of nullptr.
-      auto GetLifeTimeMarkers =
-          [&](Instruction *Addr, bool &SinkLifeStart,
-              bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> {
-        Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
+    // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca,
+    // check whether it is actually still in the original function.
+    Function *AIFunc = BB->getParent();
+    if (AIFunc != Func)
+      continue;
 
-        for (User *U : Addr->users()) {
-          IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
-          if (IntrInst) {
-            if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
-              // Do not handle the case where AI has multiple start markers.
-              if (LifeStart)
-                return std::make_pair<Instruction *>(nullptr, nullptr);
-              LifeStart = IntrInst;
-            }
-            if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
-              if (LifeEnd)
-                return std::make_pair<Instruction *>(nullptr, nullptr);
-              LifeEnd = IntrInst;
-            }
-            continue;
-          }
-          // Find untracked uses of the address, bail.
-          if (!definedInRegion(Blocks, U))
-            return std::make_pair<Instruction *>(nullptr, nullptr);
-        }
+    LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock);
+    bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
+    if (Moved) {
+      LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
+      SinkCands.insert(AI);
+      continue;
+    }
 
-        if (!LifeStart || !LifeEnd)
-          return std::make_pair<Instruction *>(nullptr, nullptr);
-
-        SinkLifeStart = !definedInRegion(Blocks, LifeStart);
-        HoistLifeEnd = !definedInRegion(Blocks, LifeEnd);
-        // Do legality Check.
-        if ((SinkLifeStart || HoistLifeEnd) &&
-            !isLegalToShrinkwrapLifetimeMarkers(Addr))
-          return std::make_pair<Instruction *>(nullptr, nullptr);
-
-        // Check to see if we have a place to do hoisting, if not, bail.
-        if (HoistLifeEnd && !ExitBlock)
-          return std::make_pair<Instruction *>(nullptr, nullptr);
-
-        return std::make_pair(LifeStart, LifeEnd);
-      };
-
-      bool SinkLifeStart = false, HoistLifeEnd = false;
-      auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd);
-
-      if (Markers.first) {
-        if (SinkLifeStart)
-          SinkCands.insert(Markers.first);
-        SinkCands.insert(AI);
-        if (HoistLifeEnd)
-          HoistCands.insert(Markers.second);
-        continue;
-      }
-
-      // Follow the bitcast.
-      Instruction *MarkerAddr = nullptr;
-      for (User *U : AI->users()) {
-        if (U->stripInBoundsConstantOffsets() == AI) {
-          SinkLifeStart = false;
-          HoistLifeEnd = false;
-          Instruction *Bitcast = cast<Instruction>(U);
-          Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd);
-          if (Markers.first) {
-            MarkerAddr = Bitcast;
-            continue;
-          }
-        }
-
-        // Found unknown use of AI.
-        if (!definedInRegion(Blocks, U)) {
-          MarkerAddr = nullptr;
-          break;
+    // Follow any bitcasts.
+    SmallVector<Instruction *, 2> Bitcasts;
+    SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
+    for (User *U : AI->users()) {
+      if (U->stripInBoundsConstantOffsets() == AI) {
+        Instruction *Bitcast = cast<Instruction>(U);
+        LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock);
+        if (LMI.LifeStart) {
+          Bitcasts.push_back(Bitcast);
+          BitcastLifetimeInfo.push_back(LMI);
+          continue;
         }
       }
 
-      if (MarkerAddr) {
-        if (SinkLifeStart)
-          SinkCands.insert(Markers.first);
-        if (!definedInRegion(Blocks, MarkerAddr))
-          SinkCands.insert(MarkerAddr);
-        SinkCands.insert(AI);
-        if (HoistLifeEnd)
-          HoistCands.insert(Markers.second);
+      // Found unknown use of AI.
+      if (!definedInRegion(Blocks, U)) {
+        Bitcasts.clear();
+        break;
+      }
+    }
+
+    // Either no bitcasts reference the alloca or there are unknown uses.
+    if (Bitcasts.empty())
+      continue;
+
+    LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
+    SinkCands.insert(AI);
+    for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
+      Instruction *BitcastAddr = Bitcasts[I];
+      const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
+      assert(LMI.LifeStart &&
+             "Unsafe to sink bitcast without lifetime markers");
+      moveOrIgnoreLifetimeMarkers(LMI);
+      if (!definedInRegion(Blocks, BitcastAddr)) {
+        LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
+                          << "\n");
+        SinkCands.insert(BitcastAddr);
       }
     }
   }
 }
 
+bool CodeExtractor::isEligible() const {
+  if (Blocks.empty())
+    return false;
+  BasicBlock *Header = *Blocks.begin();
+  Function *F = Header->getParent();
+
+  // For functions with varargs, check that varargs handling is only done in the
+  // outlined function, i.e vastart and vaend are only used in outlined blocks.
+  if (AllowVarArgs && F->getFunctionType()->isVarArg()) {
+    auto containsVarArgIntrinsic = [](const Instruction &I) {
+      if (const CallInst *CI = dyn_cast<CallInst>(&I))
+        if (const Function *Callee = CI->getCalledFunction())
+          return Callee->getIntrinsicID() == Intrinsic::vastart ||
+                 Callee->getIntrinsicID() == Intrinsic::vaend;
+      return false;
+    };
+
+    for (auto &BB : *F) {
+      if (Blocks.count(&BB))
+        continue;
+      if (llvm::any_of(BB, containsVarArgIntrinsic))
+        return false;
+    }
+  }
+  return true;
+}
+
 void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
                                       const ValueSet &SinkCands) const {
   for (BasicBlock *BB : Blocks) {
     // If a used value is defined outside the region, it's an input.  If an
     // instruction is used outside the region, it's an output.
     for (Instruction &II : *BB) {
-      for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
-           ++OI) {
-        Value *V = *OI;
+      for (auto &OI : II.operands()) {
+        Value *V = OI;
         if (!SinkCands.count(V) && definedInCaller(Blocks, V))
           Inputs.insert(V);
       }
@@ -1253,13 +1330,6 @@
 
     // Insert this basic block into the new function
     newBlocks.push_back(Block);
-
-    // Remove @llvm.assume calls that were moved to the new function from the
-    // old function's assumption cache.
-    if (AC)
-      for (auto &I : *Block)
-        if (match(&I, m_Intrinsic<Intrinsic::assume>()))
-          AC->unregisterAssumption(cast<CallInst>(&I));
   }
 }
 
@@ -1308,7 +1378,8 @@
       MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
 }
 
-Function *CodeExtractor::extractCodeRegion() {
+Function *
+CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
   if (!isEligible())
     return nullptr;
 
@@ -1317,27 +1388,6 @@
   BasicBlock *header = *Blocks.begin();
   Function *oldFunction = header->getParent();
 
-  // For functions with varargs, check that varargs handling is only done in the
-  // outlined function, i.e vastart and vaend are only used in outlined blocks.
-  if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) {
-    auto containsVarArgIntrinsic = [](Instruction &I) {
-      if (const CallInst *CI = dyn_cast<CallInst>(&I))
-        if (const Function *F = CI->getCalledFunction())
-          return F->getIntrinsicID() == Intrinsic::vastart ||
-                 F->getIntrinsicID() == Intrinsic::vaend;
-      return false;
-    };
-
-    for (auto &BB : *oldFunction) {
-      if (Blocks.count(&BB))
-        continue;
-      if (llvm::any_of(BB, containsVarArgIntrinsic))
-        return nullptr;
-    }
-  }
-  ValueSet inputs, outputs, SinkingCands, HoistingCands;
-  BasicBlock *CommonExit = nullptr;
-
   // Calculate the entry frequency of the new function before we change the root
   //   block.
   BlockFrequency EntryFreq;
@@ -1351,6 +1401,15 @@
     }
   }
 
+  if (AC) {
+    // Remove @llvm.assume calls that were moved to the new function from the
+    // old function's assumption cache.
+    for (BasicBlock *Block : Blocks)
+      for (auto &I : *Block)
+        if (match(&I, m_Intrinsic<Intrinsic::assume>()))
+          AC->unregisterAssumption(cast<CallInst>(&I));
+  }
+
   // If we have any return instructions in the region, split those blocks so
   // that the return is not in the region.
   splitReturnBlocks();
@@ -1404,16 +1463,32 @@
   }
   newFuncRoot->getInstList().push_back(BranchI);
 
-  findAllocas(SinkingCands, HoistingCands, CommonExit);
+  ValueSet inputs, outputs, SinkingCands, HoistingCands;
+  BasicBlock *CommonExit = nullptr;
+  findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
   assert(HoistingCands.empty() || CommonExit);
 
   // Find inputs to, outputs from the code region.
   findInputsOutputs(inputs, outputs, SinkingCands);
 
-  // Now sink all instructions which only have non-phi uses inside the region
-  for (auto *II : SinkingCands)
-    cast<Instruction>(II)->moveBefore(*newFuncRoot,
-                                      newFuncRoot->getFirstInsertionPt());
+  // Now sink all instructions which only have non-phi uses inside the region.
+  // Group the allocas at the start of the block, so that any bitcast uses of
+  // the allocas are well-defined.
+  AllocaInst *FirstSunkAlloca = nullptr;
+  for (auto *II : SinkingCands) {
+    if (auto *AI = dyn_cast<AllocaInst>(II)) {
+      AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt());
+      if (!FirstSunkAlloca)
+        FirstSunkAlloca = AI;
+    }
+  }
+  assert((SinkingCands.empty() || FirstSunkAlloca) &&
+         "Did not expect a sink candidate without any allocas");
+  for (auto *II : SinkingCands) {
+    if (!isa<AllocaInst>(II)) {
+      cast<Instruction>(II)->moveAfter(FirstSunkAlloca);
+    }
+  }
 
   if (!HoistingCands.empty()) {
     auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit);
@@ -1525,5 +1600,17 @@
   });
   LLVM_DEBUG(if (verifyFunction(*oldFunction))
              report_fatal_error("verification of oldFunction failed!"));
+  LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, AC))
+             report_fatal_error("Stale Asumption cache for old Function!"));
   return newFunction;
 }
+
+bool CodeExtractor::verifyAssumptionCache(const Function& F,
+                                          AssumptionCache *AC) {
+  for (auto AssumeVH : AC->assumptions()) {
+    CallInst *I = cast<CallInst>(AssumeVH);
+    if (I->getFunction() != &F)
+      return true;
+  }
+  return false;
+}
diff --git a/test/Transforms/CodeExtractor/live_shrink_multiple.ll b/test/Transforms/CodeExtractor/live_shrink_multiple.ll
index 9350ca2..3f4c947 100644
--- a/test/Transforms/CodeExtractor/live_shrink_multiple.ll
+++ b/test/Transforms/CodeExtractor/live_shrink_multiple.ll
@@ -45,9 +45,9 @@
 ; CHECK-LABEL: define internal void @_Z3foov.1.
 ; CHECK: newFuncRoot:
 ; CHECK-NEXT:  alloca 
+; CHECK-NEXT:  alloca
 ; CHECK-NEXT:  bitcast 
 ; CHECK-NEXT:  call void @llvm.lifetime.start.p0i8
-; CHECK-NEXT:  alloca
 ; CHECK-NEXT:  bitcast 
 ; CHECK-NEXT:  call void @llvm.lifetime.start.p0i8
 ; CHECK:  call void @llvm.lifetime.end.p0i8
diff --git a/test/Transforms/HotColdSplit/sink-multiple-bitcasts-of-allocas-pr42451.ll b/test/Transforms/HotColdSplit/sink-multiple-bitcasts-of-allocas-pr42451.ll
new file mode 100644
index 0000000..d2f8398
--- /dev/null
+++ b/test/Transforms/HotColdSplit/sink-multiple-bitcasts-of-allocas-pr42451.ll
@@ -0,0 +1,74 @@
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=-1 -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+@c = common global i32 0, align 4
+@h = common global i32 0, align 4
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1
+
+declare i32* @m()
+
+; CHECK-LABEL: define void @main()
+; CHECK-NEXT:   %.sroa.4.i = alloca [20 x i8], align 2
+; CHECK-NEXT:   %.sroa.5.i = alloca [6 x i8], align 8
+; CHECK-NEXT:   %1 = bitcast [6 x i8]* %.sroa.5.i to i8*
+
+define void @main() #0 {
+  %.sroa.4.i = alloca [20 x i8], align 2
+  %.sroa.5.i = alloca [6 x i8], align 8
+  %1 = bitcast [6 x i8]* %.sroa.5.i to i8*
+  %2 = load i32, i32* @h, align 4, !tbaa !4
+  %3 = icmp ne i32 %2, 0
+  br i1 %3, label %12, label %4
+
+4:                                                ; preds = %0
+  %5 = call i32* @m() #3
+  %.sroa.4.0..sroa_idx21.i = getelementptr inbounds [20 x i8], [20 x i8]* %.sroa.4.i, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 20, i8* %.sroa.4.0..sroa_idx21.i) #3
+  %.sroa.5.0..sroa_idx16.i = getelementptr inbounds [6 x i8], [6 x i8]* %.sroa.5.i, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 6, i8* %.sroa.5.0..sroa_idx16.i) #3
+  call void @llvm.memset.p0i8.i64(i8* align 2 %.sroa.4.0..sroa_idx21.i, i8 0, i64 20, i1 false) #3
+  call void @llvm.memset.p0i8.i64(i8* align 8 %.sroa.5.0..sroa_idx16.i, i8 0, i64 6, i1 false) #3
+  %6 = load i32, i32* @c, align 4, !tbaa !4
+  %7 = trunc i32 %6 to i16
+  call void @llvm.lifetime.end.p0i8(i64 20, i8* %.sroa.4.0..sroa_idx21.i) #3
+  call void @llvm.lifetime.end.p0i8(i64 6, i8* %.sroa.5.0..sroa_idx16.i) #3
+  call void @llvm.lifetime.start.p0i8(i64 6, i8* %1) #3
+  call void @llvm.memset.p0i8.i64(i8* align 1 %1, i8 3, i64 6, i1 false)
+  br label %8
+
+8:                                                ; preds = %8, %4
+  %.0.i = phi i32 [ 0, %4 ], [ %10, %8 ]
+  %9 = sext i32 %.0.i to i64
+  %10 = add nsw i32 %.0.i, 1
+  %11 = icmp slt i32 %10, 6
+  br i1 %11, label %8, label %l.exit
+
+l.exit:                                           ; preds = %8
+  call void @llvm.lifetime.end.p0i8(i64 6, i8* %1) #3
+  br label %12
+
+12:                                               ; preds = %l.exit, %0
+  %13 = phi i1 [ true, %0 ], [ true, %l.exit ]
+  ret void
+}
+
+attributes #0 = { cold }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 14]}
+!1 = !{i32 1, !"wchar_size", i32 4}
+!2 = !{i32 7, !"PIC Level", i32 2}
+!3 = !{!"Apple clang version 11.0.0 (clang-1100.0.20.17)"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"int", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
diff --git a/unittests/Transforms/Utils/CodeExtractorTest.cpp b/unittests/Transforms/Utils/CodeExtractorTest.cpp
index 8b86951..d387763 100644
--- a/unittests/Transforms/Utils/CodeExtractorTest.cpp
+++ b/unittests/Transforms/Utils/CodeExtractorTest.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/Transforms/Utils/CodeExtractor.h"
 #include "llvm/AsmParser/Parser.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@@ -61,7 +62,8 @@
   CodeExtractor CE(Candidates);
   EXPECT_TRUE(CE.isEligible());
 
-  Function *Outlined = CE.extractCodeRegion();
+  CodeExtractorAnalysisCache CEAC(*Func);
+  Function *Outlined = CE.extractCodeRegion(CEAC);
   EXPECT_TRUE(Outlined);
   BasicBlock *Exit = getBlockByName(Func, "notExtracted");
   BasicBlock *ExitSplit = getBlockByName(Outlined, "notExtracted.split");
@@ -111,7 +113,8 @@
   CodeExtractor CE(ExtractedBlocks);
   EXPECT_TRUE(CE.isEligible());
 
-  Function *Outlined = CE.extractCodeRegion();
+  CodeExtractorAnalysisCache CEAC(*Func);
+  Function *Outlined = CE.extractCodeRegion(CEAC);
   EXPECT_TRUE(Outlined);
   BasicBlock *Exit1 = getBlockByName(Func, "exit1");
   BasicBlock *Exit2 = getBlockByName(Func, "exit2");
@@ -185,7 +188,8 @@
   CodeExtractor CE(ExtractedBlocks);
   EXPECT_TRUE(CE.isEligible());
 
-  Function *Outlined = CE.extractCodeRegion();
+  CodeExtractorAnalysisCache CEAC(*Func);
+  Function *Outlined = CE.extractCodeRegion(CEAC);
   EXPECT_TRUE(Outlined);
   EXPECT_FALSE(verifyFunction(*Outlined, &errs()));
   EXPECT_FALSE(verifyFunction(*Func, &errs()));
@@ -219,10 +223,61 @@
   CodeExtractor CE(Blocks);
   EXPECT_TRUE(CE.isEligible());
 
-  Function *Outlined = CE.extractCodeRegion();
+  CodeExtractorAnalysisCache CEAC(*Func);
+  Function *Outlined = CE.extractCodeRegion(CEAC);
   EXPECT_TRUE(Outlined);
   EXPECT_FALSE(verifyFunction(*Outlined));
   EXPECT_FALSE(verifyFunction(*Func));
 }
 
+TEST(CodeExtractor, ExtractAndInvalidateAssumptionCache) {
+  LLVMContext Ctx;
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M(parseAssemblyString(R"ir(
+        target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+        target triple = "aarch64"
+
+        %b = type { i64 }
+        declare void @g(i8*)
+
+        declare void @llvm.assume(i1)
+
+        define void @test() {
+        entry:
+          br label %label
+
+        label:
+          %0 = load %b*, %b** inttoptr (i64 8 to %b**), align 8
+          %1 = getelementptr inbounds %b, %b* %0, i64 undef, i32 0
+          %2 = load i64, i64* %1, align 8
+          %3 = icmp ugt i64 %2, 1
+          br i1 %3, label %if.then, label %if.else
+
+        if.then:
+          unreachable
+
+        if.else:
+          call void @g(i8* undef)
+          store i64 undef, i64* null, align 536870912
+          %4 = icmp eq i64 %2, 0
+          call void @llvm.assume(i1 %4)
+          unreachable
+        }
+  )ir",
+                                                Err, Ctx));
+
+  assert(M && "Could not parse module?");
+  Function *Func = M->getFunction("test");
+  SmallVector<BasicBlock *, 1> Blocks{ getBlockByName(Func, "if.else") };
+  AssumptionCache AC(*Func);
+  CodeExtractor CE(Blocks, nullptr, false, nullptr, nullptr, &AC);
+  EXPECT_TRUE(CE.isEligible());
+
+  CodeExtractorAnalysisCache CEAC(*Func);
+  Function *Outlined = CE.extractCodeRegion(CEAC);
+  EXPECT_TRUE(Outlined);
+  EXPECT_FALSE(verifyFunction(*Outlined));
+  EXPECT_FALSE(verifyFunction(*Func));
+  EXPECT_FALSE(CE.verifyAssumptionCache(*Func, &AC));
+}
 } // end anonymous namespace