diff --git a/include/swift/SIL/SILBasicBlock.h b/include/swift/SIL/SILBasicBlock.h
index 488d2d2..d0c00ce 100644
--- a/include/swift/SIL/SILBasicBlock.h
+++ b/include/swift/SIL/SILBasicBlock.h
@@ -58,7 +58,7 @@
 
   void operator delete(void *Ptr, size_t) SWIFT_DELETE_OPERATOR_DELETED
 
-  SILBasicBlock(SILFunction *F, SILBasicBlock *afterBB = nullptr);
+  SILBasicBlock(SILFunction *F, SILBasicBlock *relativeToBB, bool after);
 
 public:
   ~SILBasicBlock();
diff --git a/include/swift/SIL/SILFunction.h b/include/swift/SIL/SILFunction.h
index fa30426..7511048 100644
--- a/include/swift/SIL/SILFunction.h
+++ b/include/swift/SIL/SILFunction.h
@@ -774,7 +774,8 @@
   const SILBasicBlock *getEntryBlock() const { return &front(); }
 
   SILBasicBlock *createBasicBlock();
-  SILBasicBlock *createBasicBlock(SILBasicBlock *After);
+  SILBasicBlock *createBasicBlockAfter(SILBasicBlock *afterBB);
+  SILBasicBlock *createBasicBlockBefore(SILBasicBlock *beforeBB);
 
   /// Splice the body of \p F into this function at end.
   void spliceBody(SILFunction *F) {
diff --git a/include/swift/SILOptimizer/Utils/Devirtualize.h b/include/swift/SILOptimizer/Utils/Devirtualize.h
index 9119be7..ee08fd8 100644
--- a/include/swift/SILOptimizer/Utils/Devirtualize.h
+++ b/include/swift/SILOptimizer/Utils/Devirtualize.h
@@ -35,22 +35,6 @@
 class Emitter;
 }
 
-/// A pair representing results of devirtualization.
-///  - The first element is the value representing the result of the
-///    devirtualized call.
-///  - The second element is the new apply/try_apply instruction.
-/// If no devirtualization was possible, the pair:
-/// <nullptr, FullApplySite()> is returned.
-///
-/// Two elements are required, because a result of the new devirtualized
-/// apply/try_apply instruction (second element) eventually needs to be
-/// casted to produce a properly typed value (first element).
-///
-/// *NOTE* The reason why we use a ValueBase here instead of a SILInstruction is
-/// that a devirtualization result may be a BB arg if there was a cast in
-/// between optional types.
-typedef std::pair<ValueBase *, ApplySite> DevirtualizationResult;
-
 /// Compute all subclasses of a given class.
 ///
 /// \p CHA class hierarchy analysis
@@ -64,9 +48,14 @@
                       SILModule &M,
                       ClassHierarchyAnalysis::ClassList &Subs);
 
-DevirtualizationResult tryDevirtualizeApply(ApplySite AI,
-                                            ClassHierarchyAnalysis *CHA,
-                                            OptRemark::Emitter *ORE = nullptr);
+/// Attempt to devirtualize the given apply site.  If this fails,
+/// the returned ApplySite will be null.
+///
+/// If this succeeds, the caller must call deleteDevirtualizedApply on
+/// the original apply site.
+ApplySite tryDevirtualizeApply(ApplySite AI,
+                               ClassHierarchyAnalysis *CHA,
+                               OptRemark::Emitter *ORE = nullptr);
 bool canDevirtualizeApply(FullApplySite AI, ClassHierarchyAnalysis *CHA);
 bool isNominalTypeWithUnboundGenericParameters(SILType Ty, SILModule &M);
 bool canDevirtualizeClassMethod(FullApplySite AI, SILType ClassInstanceType,
@@ -74,15 +63,45 @@
                                 bool isEffectivelyFinalMethod = false);
 SILFunction *getTargetClassMethod(SILModule &M, SILType ClassOrMetatypeType,
                                   MethodInst *MI);
-DevirtualizationResult devirtualizeClassMethod(FullApplySite AI,
-                                               SILValue ClassInstance,
-                                               OptRemark::Emitter *ORE);
-DevirtualizationResult
+
+/// Devirtualize the given apply site, which is known to be devirtualizable.
+///
+/// The caller must call deleteDevirtualizedApply on the original apply site.
+FullApplySite devirtualizeClassMethod(FullApplySite AI,
+                                      SILValue ClassInstance,
+                                      OptRemark::Emitter *ORE);
+
+/// Attempt to devirtualize the given apply site, which is known to be
+/// of a class method.  If this fails, the returned FullApplySite will be null.
+///
+/// If this succeeds, the caller must call deleteDevirtualizedApply on
+/// the original apply site.
+FullApplySite
 tryDevirtualizeClassMethod(FullApplySite AI, SILValue ClassInstance,
                            OptRemark::Emitter *ORE,
                            bool isEffectivelyFinalMethod = false);
-DevirtualizationResult
-tryDevirtualizeWitnessMethod(ApplySite AI, OptRemark::Emitter *ORE);
-}
+
+/// Attempt to devirtualize the given apply site, which is known to be
+/// of a witness method.  If this fails, the returned FullApplySite
+/// will be null.
+///
+/// If this succeeds, the caller must call deleteDevirtualizedApply on
+/// the original apply site.
+ApplySite tryDevirtualizeWitnessMethod(ApplySite AI, OptRemark::Emitter *ORE);
+
+/// Delete a successfully-devirtualized apply site.  This must always be
+/// called after devirtualizing an apply; not only is it not semantically
+/// equivalent to leave the old apply in-place, but the SIL isn't necessary
+/// well-formed.
+///
+/// Devirtualization is responsible for replacing uses of the original
+/// apply site with uses of the new one.  The only thing this does is delete
+/// the instruction and any now-trivially-dead operands; it is separated
+/// from the actual devirtualization step only to apply the caller to log
+/// information about the original apply site.  This is probably not a
+/// good enough reason to complicate the API.
+void deleteDevirtualizedApply(ApplySite AI);
+
+} // end namespace swift
 
 #endif
diff --git a/include/swift/SILOptimizer/Utils/Local.h b/include/swift/SILOptimizer/Utils/Local.h
index c517bae..0037b6a 100644
--- a/include/swift/SILOptimizer/Utils/Local.h
+++ b/include/swift/SILOptimizer/Utils/Local.h
@@ -119,11 +119,6 @@
 /// after \p ABI and returns it.
 ProjectBoxInst *getOrCreateProjectBox(AllocBoxInst *ABI, unsigned Index);
 
-/// Replace an apply with an instruction that produces the same value,
-/// then delete the apply and the instructions that produce its callee
-/// if possible.
-void replaceDeadApply(ApplySite Old, ValueBase *New);
-
 /// \brief Return true if any call inside the given function may bind dynamic
 /// 'Self' to a generic argument of the callee.
 bool mayBindDynamicSelf(SILFunction *F);
@@ -355,7 +350,7 @@
     auto *Fn = BI->getFunction();
     auto *SrcBB = BI->getParent();
     auto *DestBB = BI->getDestBB();
-    auto *EdgeBB = Fn->createBasicBlock(SrcBB);
+    auto *EdgeBB = Fn->createBasicBlockAfter(SrcBB);
 
     // Create block arguments.
     for (unsigned ArgIdx : range(DestBB->getNumArguments())) {
diff --git a/lib/SIL/SILBasicBlock.cpp b/lib/SIL/SILBasicBlock.cpp
index c33d4dd..4ee0bdd 100644
--- a/lib/SIL/SILBasicBlock.cpp
+++ b/lib/SIL/SILBasicBlock.cpp
@@ -29,12 +29,15 @@
 // SILBasicBlock Implementation
 //===----------------------------------------------------------------------===//
 
-SILBasicBlock::SILBasicBlock(SILFunction *parent, SILBasicBlock *afterBB)
+SILBasicBlock::SILBasicBlock(SILFunction *parent, SILBasicBlock *relativeToBB,
+                             bool after)
     : Parent(parent), PredList(nullptr) {
-  if (afterBB) {
-    parent->getBlocks().insertAfter(afterBB->getIterator(), this);
-  } else {
+  if (!relativeToBB) {
     parent->getBlocks().push_back(this);
+  } else if (after) {
+    parent->getBlocks().insertAfter(relativeToBB->getIterator(), this);
+  } else {
+    parent->getBlocks().insert(relativeToBB->getIterator(), this);
   }
 }
 SILBasicBlock::~SILBasicBlock() {
@@ -238,11 +241,8 @@
 /// stay as part of the original basic block. The old basic block is left
 /// without a terminator.
 SILBasicBlock *SILBasicBlock::split(iterator I) {
-  SILBasicBlock *New = new (Parent->getModule()) SILBasicBlock(Parent);
-  SILFunction::iterator Where = std::next(SILFunction::iterator(this));
-  SILFunction::iterator First = SILFunction::iterator(New);
-  if (Where != First)
-    Parent->getBlocks().splice(Where, Parent->getBlocks(), First);
+  SILBasicBlock *New =
+    new (Parent->getModule()) SILBasicBlock(Parent, this, /*after*/true);
   // Move all of the specified instructions from the original basic block into
   // the new basic block.
   New->InstList.splice(New->end(), InstList, I, end());
diff --git a/lib/SIL/SILBuilder.cpp b/lib/SIL/SILBuilder.cpp
index 7563571..b7f7b39 100644
--- a/lib/SIL/SILBuilder.cpp
+++ b/lib/SIL/SILBuilder.cpp
@@ -172,7 +172,7 @@
 SILBasicBlock *SILBuilder::splitBlockForFallthrough() {
   // If we are concatenating, just create and return a new block.
   if (insertingAtEndOfBlock()) {
-    return getFunction().createBasicBlock(BB);
+    return getFunction().createBasicBlockAfter(BB);
   }
 
   // Otherwise we need to split the current block at the insertion point.
diff --git a/lib/SIL/SILFunction.cpp b/lib/SIL/SILFunction.cpp
index 12e025b..9881ea6 100644
--- a/lib/SIL/SILFunction.cpp
+++ b/lib/SIL/SILFunction.cpp
@@ -218,11 +218,17 @@
 }
 
 SILBasicBlock *SILFunction::createBasicBlock() {
-  return new (getModule()) SILBasicBlock(this);
+  return new (getModule()) SILBasicBlock(this, nullptr, false);
 }
 
-SILBasicBlock *SILFunction::createBasicBlock(SILBasicBlock *AfterBlock) {
-  return new (getModule()) SILBasicBlock(this, AfterBlock);
+SILBasicBlock *SILFunction::createBasicBlockAfter(SILBasicBlock *afterBB) {
+  assert(afterBB);
+  return new (getModule()) SILBasicBlock(this, afterBB, /*after*/ true);
+}
+
+SILBasicBlock *SILFunction::createBasicBlockBefore(SILBasicBlock *beforeBB) {
+  assert(beforeBB);
+  return new (getModule()) SILBasicBlock(this, beforeBB, /*after*/ false);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/SIL/SILVerifier.cpp b/lib/SIL/SILVerifier.cpp
index 9918dbc..abcf59a 100644
--- a/lib/SIL/SILVerifier.cpp
+++ b/lib/SIL/SILVerifier.cpp
@@ -4527,6 +4527,7 @@
     SILModule &M = F->getModule();
     for (auto &BB : *F) {
       TermInst *TI = BB.getTerminator();
+      CurInstruction = TI;
 
       // Check for non-cond_br critical edges in canonical SIL.
       if (!isa<CondBranchInst>(TI) && M.getStage() == SILStage::Canonical) {
diff --git a/lib/SILGen/SILGenFunction.h b/lib/SILGen/SILGenFunction.h
index 1b451f7..b9e898a 100644
--- a/lib/SILGen/SILGenFunction.h
+++ b/lib/SILGen/SILGenFunction.h
@@ -636,7 +636,9 @@
   /// first block.  (This is clearly desirable behavior when blocks
   /// are created by different emissions; it's just a little
   /// counter-intuitive within a single emission.)
-  SILBasicBlock *createBasicBlock(SILBasicBlock *afterBB = nullptr);  
+  SILBasicBlock *createBasicBlock();
+  SILBasicBlock *createBasicBlockAfter(SILBasicBlock *afterBB);
+  SILBasicBlock *createBasicBlockBefore(SILBasicBlock *beforeBB);
 
   /// Create a new basic block at the end of the given function
   /// section.
diff --git a/lib/SILGen/SILGenLValue.cpp b/lib/SILGen/SILGenLValue.cpp
index a25a369..281ae64 100644
--- a/lib/SILGen/SILGenLValue.cpp
+++ b/lib/SILGen/SILGenLValue.cpp
@@ -1443,7 +1443,8 @@
       ASTContext &ctx = SGF.getASTContext();
 
       SILBasicBlock *contBB = SGF.createBasicBlock();
-      SILBasicBlock *writebackBB = SGF.createBasicBlock(SGF.B.getInsertionBB());
+      SILBasicBlock *writebackBB =
+        SGF.createBasicBlockAfter(SGF.B.getInsertionBB());
 
       SGF.B.createSwitchEnum(loc, materialized.callback, /*defaultDest*/ nullptr,
                              { { ctx.getOptionalSomeDecl(), writebackBB },
diff --git a/lib/SILGen/SILGenPattern.cpp b/lib/SILGen/SILGenPattern.cpp
index bfd075c..28f20d2 100644
--- a/lib/SILGen/SILGenPattern.cpp
+++ b/lib/SILGen/SILGenPattern.cpp
@@ -1771,7 +1771,7 @@
     if (!insertionResult.second) {
       index = insertionResult.first->second;
     } else {
-      curBB = SGF.createBasicBlock(curBB);
+      curBB = SGF.createBasicBlockAfter(curBB);
       CaseBBs.push_back({formalElt, curBB});
       CaseInfos.push_back(CaseInfo());
       CaseInfos.back().FirstMatcher = row.Pattern;
@@ -1815,7 +1815,7 @@
   }
 
   if (!canAssumeExhaustive)
-    DefaultBB = SGF.createBasicBlock(curBB);
+    DefaultBB = SGF.createBasicBlockAfter(curBB);
 }
 
 /// Perform specialized dispatch for a sequence of EnumElementPattern or an
@@ -2201,7 +2201,7 @@
     } else {
       caseToIndex[isTrue] = index;
     
-      curBB = SGF.createBasicBlock(curBB);
+      curBB = SGF.createBasicBlockAfter(curBB);
       auto *IL = SGF.B.createIntegerLiteral(PatternMatchStmt,
                                     SILType::getBuiltinIntegerType(1, Context),
                                             isTrue ? 1 : 0);
@@ -2223,7 +2223,7 @@
 
   // Check to see if we need a default block.
   if (caseBBs.size() < 2)
-    defaultBB = SGF.createBasicBlock(curBB);
+    defaultBB = SGF.createBasicBlockAfter(curBB);
 
   // Emit the switch_value
   SILLocation loc = PatternMatchStmt;
diff --git a/lib/SILGen/SILGenStmt.cpp b/lib/SILGen/SILGenStmt.cpp
index 92d7fb3..d8db97e 100644
--- a/lib/SILGen/SILGenStmt.cpp
+++ b/lib/SILGen/SILGenStmt.cpp
@@ -33,17 +33,22 @@
                          diag, std::forward<U>(args)...);
 }
 
-SILBasicBlock *SILGenFunction::createBasicBlock(SILBasicBlock *afterBB) {
-  // Honor an explicit placement if given.
-  if (afterBB) {
-    return F.createBasicBlock(afterBB);
+SILBasicBlock *SILGenFunction::createBasicBlockAfter(SILBasicBlock *afterBB) {
+  assert(afterBB);
+  return F.createBasicBlockAfter(afterBB);
+}
 
-    // If we don't have a requested placement, but we do have a current
-    // insertion point, insert there.
-  } else if (B.hasValidInsertionPoint()) {
-    return F.createBasicBlock(B.getInsertionBB());
+SILBasicBlock *SILGenFunction::createBasicBlockBefore(SILBasicBlock *beforeBB) {
+  assert(beforeBB);
+  return F.createBasicBlockBefore(beforeBB);
+}
 
-    // Otherwise, insert at the end of the current section.
+SILBasicBlock *SILGenFunction::createBasicBlock() {
+  // If we have a current insertion point, insert there.
+  if (B.hasValidInsertionPoint()) {
+    return F.createBasicBlockAfter(B.getInsertionBB());
+
+  // Otherwise, insert at the end of the current section.
   } else {
     return createBasicBlock(CurFunctionSection);
   }
@@ -54,16 +59,17 @@
   case FunctionSection::Ordinary: {
     // The end of the ordinary section is just the end of the function
     // unless postmatter blocks exist.
-    SILBasicBlock *afterBB = (StartOfPostmatter != F.end())
-                                 ? &*std::prev(StartOfPostmatter)
-                                 : nullptr;
-    return F.createBasicBlock(afterBB);
+    if (StartOfPostmatter != F.end()) {
+      return F.createBasicBlockBefore(&*StartOfPostmatter);
+    } else {
+      return F.createBasicBlock();
+    }
   }
 
   case FunctionSection::Postmatter: {
     // The end of the postmatter section is always the end of the function.
     // Register the new block as the start of the postmatter if needed.
-    SILBasicBlock *newBB = F.createBasicBlock(nullptr);
+    SILBasicBlock *newBB = F.createBasicBlock();
     if (StartOfPostmatter == F.end())
       StartOfPostmatter = newBB->getIterator();
     return newBB;
diff --git a/lib/SILOptimizer/Mandatory/MandatoryInlining.cpp b/lib/SILOptimizer/Mandatory/MandatoryInlining.cpp
index c24852b..5a00f2e 100644
--- a/lib/SILOptimizer/Mandatory/MandatoryInlining.cpp
+++ b/lib/SILOptimizer/Mandatory/MandatoryInlining.cpp
@@ -434,14 +434,14 @@
 static std::tuple<FullApplySite, SILBasicBlock::iterator>
 tryDevirtualizeApplyHelper(FullApplySite InnerAI, SILBasicBlock::iterator I,
                            ClassHierarchyAnalysis *CHA) {
-  auto NewInstPair = tryDevirtualizeApply(InnerAI, CHA);
-  if (!NewInstPair.second) {
+  auto NewInst = tryDevirtualizeApply(InnerAI, CHA);
+  if (!NewInst) {
     return std::make_tuple(InnerAI, I);
   }
 
-  replaceDeadApply(InnerAI, NewInstPair.first);
+  deleteDevirtualizedApply(InnerAI);
 
-  auto newApplyAI = NewInstPair.second.getInstruction();
+  auto newApplyAI = NewInst.getInstruction();
   assert(newApplyAI && "devirtualized but removed apply site?");
 
   return std::make_tuple(FullApplySite::isa(newApplyAI),
diff --git a/lib/SILOptimizer/Transforms/Devirtualizer.cpp b/lib/SILOptimizer/Transforms/Devirtualizer.cpp
index f2fd84f..36ccbb8 100644
--- a/lib/SILOptimizer/Transforms/Devirtualizer.cpp
+++ b/lib/SILOptimizer/Transforms/Devirtualizer.cpp
@@ -69,14 +69,14 @@
    }
   }
   for (auto Apply : Applies) {
-    auto NewInstPair = tryDevirtualizeApply(Apply, CHA, &ORE);
-    if (!NewInstPair.second)
+    auto NewInst = tryDevirtualizeApply(Apply, CHA, &ORE);
+    if (!NewInst)
       continue;
 
     Changed = true;
 
-    replaceDeadApply(Apply, NewInstPair.first);
-    NewApplies.push_back(NewInstPair.second);
+    deleteDevirtualizedApply(Apply);
+    NewApplies.push_back(NewInst);
   }
 
   // For each new apply, attempt to link in function bodies if we do
diff --git a/lib/SILOptimizer/Transforms/SpeculativeDevirtualizer.cpp b/lib/SILOptimizer/Transforms/SpeculativeDevirtualizer.cpp
index 88fadb2..6205f1b 100644
--- a/lib/SILOptimizer/Transforms/SpeculativeDevirtualizer.cpp
+++ b/lib/SILOptimizer/Transforms/SpeculativeDevirtualizer.cpp
@@ -29,6 +29,7 @@
 #include "swift/SILOptimizer/PassManager/Passes.h"
 #include "swift/SILOptimizer/PassManager/PassManager.h"
 #include "swift/SILOptimizer/PassManager/Transforms.h"
+#include "swift/SILOptimizer/Utils/CFG.h"
 #include "swift/SILOptimizer/Utils/Devirtualize.h"
 #include "swift/SILOptimizer/Utils/SILInliner.h"
 #include "swift/AST/ASTContext.h"
@@ -47,6 +48,38 @@
 
 STATISTIC(NumTargetsPredicted, "Number of monomorphic functions predicted");
 
+/// We want to form a second edge to the given block, but we know
+/// that'll form a critical edge.  Return a basic block to which we can
+/// create an edge essentially like the original edge.
+static SILBasicBlock *cloneEdge(TermInst *TI, unsigned SuccIndex) {
+#ifndef NDEBUG
+  auto origDestBB = TI->getSuccessors()[SuccIndex].getBB();
+#endif
+
+  // Split the edge twice.  The first split will become our cloned
+  // and temporarily-unused edge.  The second split will remain in place
+  // as the original edge.
+  auto clonedEdgeBB = splitEdge(TI, SuccIndex);
+  auto replacementEdgeBB = splitEdge(TI, SuccIndex);
+
+  // Extract the terminators.
+  auto clonedEdgeBranch =
+    cast<BranchInst>(clonedEdgeBB->getTerminator());
+  auto replacementEdgeBranch =
+    cast<BranchInst>(replacementEdgeBB->getTerminator());
+
+  assert(TI->getSuccessors()[SuccIndex].getBB() == replacementEdgeBB);
+  assert(replacementEdgeBranch->getDestBB() == clonedEdgeBB);
+  assert(clonedEdgeBranch->getDestBB() == origDestBB);
+
+  // Change the replacement branch to point to the original destination.
+  // This will leave the cloned edge unused, which is how we wanted it.
+  replacementEdgeBranch->getSuccessors()[0] = clonedEdgeBranch->getDestBB();
+  assert(clonedEdgeBB->pred_empty());
+
+  return clonedEdgeBB;
+}
+
 // A utility function for cloning the apply instruction.
 static FullApplySite CloneApply(FullApplySite AI, SILBuilder &Builder) {
   // Clone the Apply.
@@ -68,18 +101,17 @@
     break;
   case SILInstructionKind::TryApplyInst: {
     auto *TryApplyI = cast<TryApplyInst>(AI.getInstruction());
+    auto NormalBB = cloneEdge(TryApplyI, TryApplyInst::NormalIdx);
+    auto ErrorBB = cloneEdge(TryApplyI, TryApplyInst::ErrorIdx);
     NAI = Builder.createTryApply(AI.getLoc(), AI.getCallee(),
                                  AI.getSubstitutionMap(),
-                                 Ret,
-                                 TryApplyI->getNormalBB(),
-                                 TryApplyI->getErrorBB());
-    }
+                                 Ret, NormalBB, ErrorBB);
     break;
+  }
   default:
     llvm_unreachable("Trying to clone an unsupported apply instruction");
   }
 
-  NAI.getInstruction();
   return NAI;
 }
 
@@ -96,6 +128,10 @@
   if (SubType.getASTType()->hasDynamicSelfType())
     return FullApplySite();
 
+  // Can't speculate begin_apply yet.
+  if (isa<BeginApplyInst>(AI))
+    return FullApplySite();
+
   // Create a diamond shaped control flow and a checked_cast_branch
   // instruction that checks the exact type of the object.
   // This cast selects between two paths: one that calls the slow dynamic
@@ -182,10 +218,10 @@
   NumTargetsPredicted++;
 
   // Devirtualize the apply instruction on the identical path.
-  auto NewInstPair =
+  auto NewInst =
       devirtualizeClassMethod(IdenAI, DownCastedClassInstance, nullptr);
-  assert(NewInstPair.first && "Expected to be able to devirtualize apply!");
-  replaceDeadApply(IdenAI, NewInstPair.first);
+  assert(NewInst && "Expected to be able to devirtualize apply!");
+  deleteDevirtualizedApply(IdenAI);
 
   // Split critical edges resulting from VirtAI.
   if (auto *TAI = dyn_cast<TryApplyInst>(VirtAI)) {
@@ -366,10 +402,10 @@
     // try to devirtualize it completely.
     ClassHierarchyAnalysis::ClassList Subs;
     if (isDefaultCaseKnown(CHA, AI, CD, Subs)) {
-      auto NewInstPair = tryDevirtualizeClassMethod(AI, SubTypeValue, &ORE);
-      if (NewInstPair.first)
-        replaceDeadApply(AI, NewInstPair.first);
-      return NewInstPair.second.getInstruction() != nullptr;
+      auto NewInst = tryDevirtualizeClassMethod(AI, SubTypeValue, &ORE);
+      if (NewInst)
+        deleteDevirtualizedApply(AI);
+      return bool(NewInst);
     }
 
     LLVM_DEBUG(llvm::dbgs() << "Inserting monomorphic speculative call for "
@@ -529,10 +565,10 @@
     ORE.emit(RB);
     return true;
   }
-  auto NewInstPair = tryDevirtualizeClassMethod(AI, SubTypeValue, nullptr);
-  if (NewInstPair.first) {
+  auto NewInst = tryDevirtualizeClassMethod(AI, SubTypeValue, nullptr);
+  if (NewInst) {
     ORE.emit(RB);
-    replaceDeadApply(AI, NewInstPair.first);
+    deleteDevirtualizedApply(AI);
     return true;
   }
 
diff --git a/lib/SILOptimizer/Utils/CFG.cpp b/lib/SILOptimizer/Utils/CFG.cpp
index ec40a28..76cfa15 100644
--- a/lib/SILOptimizer/Utils/CFG.cpp
+++ b/lib/SILOptimizer/Utils/CFG.cpp
@@ -170,19 +170,39 @@
     return DefaultBB;
 }
 
+static bool hasBranchArguments(TermInst *T, unsigned EdgeIdx) {
+  if (auto Br = dyn_cast<BranchInst>(T)) {
+    assert(EdgeIdx == 0);
+    return Br->getNumArgs() != 0;
+  } else if (auto CondBr = dyn_cast<CondBranchInst>(T)) {
+    assert(EdgeIdx <= 1);
+    return EdgeIdx == CondBranchInst::TrueIdx
+             ? !CondBr->getTrueArgs().empty()
+             : !CondBr->getFalseArgs().empty();
+  } else {
+    // No other terminator have branch arguments.
+    return false;
+  }
+}
+
 void swift::changeBranchTarget(TermInst *T, unsigned EdgeIdx,
                                SILBasicBlock *NewDest, bool PreserveArgs) {
+  // In many cases, we can just rewrite the successor in place.
+  if (PreserveArgs || !hasBranchArguments(T, EdgeIdx)) {
+    T->getSuccessors()[EdgeIdx] = NewDest;
+    return;
+  }
+
+  // Otherwise, we have to build a new branch instruction.
   SILBuilderWithScope B(T);
 
   switch (T->getTermKind()) {
   // Only Branch and CondBranch may have arguments.
   case TermKind::BranchInst: {
-    auto Br = dyn_cast<BranchInst>(T);
+    auto Br = cast<BranchInst>(T);
     SmallVector<SILValue, 8> Args;
-    if (PreserveArgs) {
-      for (auto Arg : Br->getArgs())
-        Args.push_back(Arg);
-    }
+    for (auto Arg : Br->getArgs())
+      Args.push_back(Arg);
     B.createBranch(T->getLoc(), NewDest, Args);
     Br->dropAllReferences();
     Br->eraseFromParent();
@@ -190,23 +210,21 @@
   }
 
   case TermKind::CondBranchInst: {
-    auto CondBr = dyn_cast<CondBranchInst>(T);
+    auto CondBr = cast<CondBranchInst>(T);
+    SILBasicBlock *TrueDest = CondBr->getTrueBB();
+    SILBasicBlock *FalseDest = CondBr->getFalseBB();
+
     SmallVector<SILValue, 8> TrueArgs;
-    if (EdgeIdx == CondBranchInst::FalseIdx || PreserveArgs) {
+    SmallVector<SILValue, 8> FalseArgs;
+    if (EdgeIdx == CondBranchInst::FalseIdx) {
+      FalseDest = NewDest;
       for (auto Arg : CondBr->getTrueArgs())
         TrueArgs.push_back(Arg);
-    }
-    SmallVector<SILValue, 8> FalseArgs;
-    if (EdgeIdx == CondBranchInst::TrueIdx || PreserveArgs) {
+    } else {
+      TrueDest = NewDest;
       for (auto Arg : CondBr->getFalseArgs())
         FalseArgs.push_back(Arg);
     }
-    SILBasicBlock *TrueDest = CondBr->getTrueBB();
-    SILBasicBlock *FalseDest = CondBr->getFalseBB();
-    if (EdgeIdx == CondBranchInst::TrueIdx)
-      TrueDest = NewDest;
-    else
-      FalseDest = NewDest;
 
     B.createCondBranch(CondBr->getLoc(), CondBr->getCondition(), TrueDest,
                        TrueArgs, FalseDest, FalseArgs, CondBr->getTrueBBCount(),
@@ -216,120 +234,9 @@
     return;
   }
 
-  case TermKind::SwitchValueInst: {
-    auto SII = dyn_cast<SwitchValueInst>(T);
-    SmallVector<std::pair<SILValue, SILBasicBlock *>, 8> Cases;
-    auto *DefaultBB = replaceSwitchDest(SII, Cases, EdgeIdx, NewDest);
-    B.createSwitchValue(SII->getLoc(), SII->getOperand(), DefaultBB, Cases);
-    SII->eraseFromParent();
-    return;
+  default:
+    llvm_unreachable("only branch and cond_branch have branch arguments");
   }
-
-  case TermKind::SwitchEnumInst: {
-    auto SEI = dyn_cast<SwitchEnumInst>(T);
-    SmallVector<std::pair<EnumElementDecl*, SILBasicBlock*>, 8> Cases;
-    auto *DefaultBB = replaceSwitchDest(SEI, Cases, EdgeIdx, NewDest);
-    B.createSwitchEnum(SEI->getLoc(), SEI->getOperand(), DefaultBB, Cases);
-    SEI->eraseFromParent();
-    return;
-  }
-
-  case TermKind::SwitchEnumAddrInst: {
-    auto SEI = dyn_cast<SwitchEnumAddrInst>(T);
-    SmallVector<std::pair<EnumElementDecl*, SILBasicBlock*>, 8> Cases;
-    auto *DefaultBB = replaceSwitchDest(SEI, Cases, EdgeIdx, NewDest);
-    B.createSwitchEnumAddr(SEI->getLoc(), SEI->getOperand(), DefaultBB, Cases);
-    SEI->eraseFromParent();
-    return;
-  }
-
-  case TermKind::DynamicMethodBranchInst: {
-    auto DMBI = dyn_cast<DynamicMethodBranchInst>(T);
-    assert(EdgeIdx == 0 || EdgeIdx == 1 && "Invalid edge index");
-    auto HasMethodBB = !EdgeIdx ? NewDest : DMBI->getHasMethodBB();
-    auto NoMethodBB = EdgeIdx ? NewDest : DMBI->getNoMethodBB();
-    B.createDynamicMethodBranch(DMBI->getLoc(), DMBI->getOperand(),
-                                DMBI->getMember(), HasMethodBB, NoMethodBB);
-    DMBI->eraseFromParent();
-    return;
-  }
-
-  case TermKind::CheckedCastBranchInst: {
-    auto CBI = dyn_cast<CheckedCastBranchInst>(T);
-    assert(EdgeIdx == 0 || EdgeIdx == 1 && "Invalid edge index");
-    auto SuccessBB = !EdgeIdx ? NewDest : CBI->getSuccessBB();
-    auto FailureBB = EdgeIdx ? NewDest : CBI->getFailureBB();
-    B.createCheckedCastBranch(CBI->getLoc(), CBI->isExact(), CBI->getOperand(),
-                              CBI->getCastType(), SuccessBB, FailureBB,
-                              CBI->getTrueBBCount(), CBI->getFalseBBCount());
-    CBI->eraseFromParent();
-    return;
-  }
-
-  case TermKind::CheckedCastValueBranchInst: {
-    auto CBI = dyn_cast<CheckedCastValueBranchInst>(T);
-    assert(EdgeIdx == 0 || EdgeIdx == 1 && "Invalid edge index");
-    auto SuccessBB = !EdgeIdx ? NewDest : CBI->getSuccessBB();
-    auto FailureBB = EdgeIdx ? NewDest : CBI->getFailureBB();
-    B.createCheckedCastValueBranch(CBI->getLoc(), CBI->getOperand(),
-                                   CBI->getCastType(), SuccessBB, FailureBB);
-    CBI->eraseFromParent();
-    return;
-  }
-
-  case TermKind::CheckedCastAddrBranchInst: {
-    auto CBI = dyn_cast<CheckedCastAddrBranchInst>(T);
-    assert(EdgeIdx == 0 || EdgeIdx == 1 && "Invalid edge index");
-    auto SuccessBB = !EdgeIdx ? NewDest : CBI->getSuccessBB();
-    auto FailureBB = EdgeIdx ? NewDest : CBI->getFailureBB();
-    auto TrueCount = CBI->getTrueBBCount();
-    auto FalseCount = CBI->getFalseBBCount();
-    B.createCheckedCastAddrBranch(CBI->getLoc(), CBI->getConsumptionKind(),
-                                  CBI->getSrc(), CBI->getSourceType(),
-                                  CBI->getDest(), CBI->getTargetType(),
-                                  SuccessBB, FailureBB, TrueCount, FalseCount);
-    CBI->eraseFromParent();
-    return;
-  }
-
-  case TermKind::TryApplyInst: {
-    auto *TAI = dyn_cast<TryApplyInst>(T);
-    assert((EdgeIdx == 0 || EdgeIdx == 1) && "Invalid edge index");
-    auto *NormalBB = !EdgeIdx ? NewDest : TAI->getNormalBB();
-    auto *ErrorBB = EdgeIdx ? NewDest : TAI->getErrorBB();
-    SmallVector<SILValue, 4> Arguments;
-    for (auto &Op : TAI->getArgumentOperands())
-      Arguments.push_back(Op.get());
-
-    B.createTryApply(TAI->getLoc(), TAI->getCallee(), TAI->getSubstitutionMap(),
-                     Arguments, NormalBB, ErrorBB);
-
-    TAI->eraseFromParent();
-    return;
-  }
-
-  case TermKind::YieldInst: {
-    auto *YI = cast<YieldInst>(T);
-    assert((EdgeIdx == 0 || EdgeIdx == 1) && "Invalid edge index");
-    auto *resumeBB = !EdgeIdx ? NewDest : YI->getResumeBB();
-    auto *unwindBB = EdgeIdx ? NewDest : YI->getUnwindBB();
-    SmallVector<SILValue, 4> yieldedValues;
-    for (auto value : YI->getYieldedValues())
-      yieldedValues.push_back(value);
-
-    B.createYield(YI->getLoc(), yieldedValues, resumeBB, unwindBB);
-
-    YI->eraseFromParent();
-    return;
-  }
-
-  case TermKind::ReturnInst:
-  case TermKind::ThrowInst:
-  case TermKind::UnreachableInst:
-  case TermKind::UnwindInst:
-    llvm_unreachable("Branch target cannot be changed for this terminator instruction!");
-  }
-  llvm_unreachable("Not yet implemented!");
 }
 
 
@@ -645,7 +552,7 @@
   SILBasicBlock *DestBB = T->getSuccessors()[EdgeIdx];
 
   // Create a new basic block in the edge, and insert it after the SrcBB.
-  auto *EdgeBB = Fn->createBasicBlock(SrcBB);
+  auto *EdgeBB = Fn->createBasicBlockAfter(SrcBB);
 
   SmallVector<SILValue, 16> Args;
   getEdgeArgs(T, EdgeIdx, EdgeBB, Args);
diff --git a/lib/SILOptimizer/Utils/CastOptimizer.cpp b/lib/SILOptimizer/Utils/CastOptimizer.cpp
index a32a927..860b377 100644
--- a/lib/SILOptimizer/Utils/CastOptimizer.cpp
+++ b/lib/SILOptimizer/Utils/CastOptimizer.cpp
@@ -563,7 +563,7 @@
       if (isConditional) {
         // In case of a conditional cast, we should handle it gracefully.
         auto CondBrSuccessBB =
-            NewAI->getFunction()->createBasicBlock(NewAI->getParent());
+            NewAI->getFunction()->createBasicBlockAfter(NewAI->getParent());
         CondBrSuccessBB->createPHIArgument(DestTy, ValueOwnershipKind::Owned,
                                            nullptr);
         Builder.createCheckedCastBranch(Loc, /* isExact*/ false, NewAI, DestTy,
diff --git a/lib/SILOptimizer/Utils/Devirtualize.cpp b/lib/SILOptimizer/Utils/Devirtualize.cpp
index 4410b59..5683fd5 100644
--- a/lib/SILOptimizer/Utils/Devirtualize.cpp
+++ b/lib/SILOptimizer/Utils/Devirtualize.cpp
@@ -466,6 +466,140 @@
                                              baseCalleeSig);
 }
 
+static ApplyInst *replaceApplyInst(SILBuilder &B, SILLocation Loc,
+                                   ApplyInst *OldAI,
+                                   SILValue NewFn,
+                                   SubstitutionMap NewSubs,
+                                   ArrayRef<SILValue> NewArgs) {
+  auto *NewAI = B.createApply(Loc, NewFn, NewSubs, NewArgs,
+                              OldAI->isNonThrowing());
+
+  // Check if any casting is required for the return value.
+  SILValue ResultValue =
+    castValueToABICompatibleType(&B, Loc, NewAI, NewAI->getType(),
+                                 OldAI->getType());
+
+  OldAI->replaceAllUsesWith(ResultValue);
+  return NewAI;
+}
+
+static TryApplyInst *replaceTryApplyInst(SILBuilder &B, SILLocation Loc,
+                                         TryApplyInst *OldTAI,
+                                         SILValue NewFn,
+                                         SubstitutionMap NewSubs,
+                                         ArrayRef<SILValue> NewArgs,
+                                         SILFunctionConventions Conv) {
+  SILBasicBlock *NormalBB = OldTAI->getNormalBB();
+  SILBasicBlock *ResultBB = nullptr;
+
+  SILType NewResultTy = Conv.getSILResultType();
+
+  // Does the result value need to be casted?
+  auto OldResultTy = NormalBB->getArgument(0)->getType();
+  bool ResultCastRequired = NewResultTy != OldResultTy;
+
+  // Create a new normal BB only if the result of the new apply differs
+  // in type from the argument of the original normal BB.
+  if (!ResultCastRequired) {
+    ResultBB = NormalBB;
+  } else {
+    ResultBB = B.getFunction().createBasicBlockBefore(NormalBB);
+    ResultBB->createPHIArgument(NewResultTy, ValueOwnershipKind::Owned);
+  }
+
+  // We can always just use the original error BB because we'll be
+  // deleting the edge to it from the old TAI.
+  SILBasicBlock *ErrorBB = OldTAI->getErrorBB();
+
+  // Insert a try_apply here.
+  // Note that this makes this block temporarily double-terminated!
+  // We won't fix that until deleteDevirtualizedApply.
+  auto NewTAI = B.createTryApply(Loc, NewFn, NewSubs, NewArgs,
+                                 ResultBB, ErrorBB);
+
+  if (ResultCastRequired) {
+    B.setInsertionPoint(ResultBB);
+
+    SILValue ResultValue = ResultBB->getArgument(0);
+    ResultValue = castValueToABICompatibleType(&B, Loc, ResultValue,
+                                               NewResultTy, OldResultTy);
+
+    B.createBranch(Loc, NormalBB, { ResultValue });
+  }
+
+  B.setInsertionPoint(NormalBB->begin());
+  return NewTAI;
+}
+
+static BeginApplyInst *replaceBeginApplyInst(SILBuilder &B, SILLocation Loc,
+                                             BeginApplyInst *OldBAI,
+                                             SILValue NewFn,
+                                             SubstitutionMap NewSubs,
+                                             ArrayRef<SILValue> NewArgs) {
+  auto NewBAI = B.createBeginApply(Loc, NewFn, NewSubs, NewArgs,
+                                   OldBAI->isNonThrowing());
+
+  // Forward the token.
+  OldBAI->getTokenResult()->replaceAllUsesWith(NewBAI->getTokenResult());
+
+  auto OldYields = OldBAI->getYieldedValues();
+  auto NewYields = NewBAI->getYieldedValues();
+  assert(OldYields.size() == NewYields.size());
+
+  for (auto i : indices(OldYields)) {
+    auto OldYield = OldYields[i];
+    auto NewYield = NewYields[i];
+    NewYield = castValueToABICompatibleType(&B, Loc, NewYield,
+                                            NewYield->getType(),
+                                            OldYield->getType());
+    OldYield->replaceAllUsesWith(NewYield);
+  }
+
+  return NewBAI;
+}
+
+static PartialApplyInst *replacePartialApplyInst(SILBuilder &B, SILLocation Loc,
+                                                 PartialApplyInst *OldPAI,
+                                                 SILValue NewFn,
+                                                 SubstitutionMap NewSubs,
+                                                 ArrayRef<SILValue> NewArgs) {
+  auto Convention =
+    OldPAI->getType().getAs<SILFunctionType>()->getCalleeConvention();
+  auto *NewPAI = B.createPartialApply(Loc, NewFn, NewSubs, NewArgs,
+                                      Convention);
+
+  // Check if any casting is required for the partially-applied function.
+  SILValue ResultValue = castValueToABICompatibleType(
+      &B, Loc, NewPAI, NewPAI->getType(), OldPAI->getType());
+  OldPAI->replaceAllUsesWith(ResultValue);
+
+  return NewPAI;
+}
+
+static ApplySite replaceApplySite(SILBuilder &B, SILLocation Loc,
+                                  ApplySite OldAS,
+                                  SILValue NewFn,
+                                  SubstitutionMap NewSubs,
+                                  ArrayRef<SILValue> NewArgs,
+                                  SILFunctionConventions Conv) {
+  if (auto *OldAI = dyn_cast<ApplyInst>(OldAS)) {
+    return replaceApplyInst(B, Loc, OldAI, NewFn, NewSubs, NewArgs);
+  } else if (auto *OldTAI = dyn_cast<TryApplyInst>(OldAS)) {
+    return replaceTryApplyInst(B, Loc, OldTAI, NewFn, NewSubs, NewArgs, Conv);
+  } else if (auto *OldBAI = dyn_cast<BeginApplyInst>(OldAS)) {
+    return replaceBeginApplyInst(B, Loc, OldBAI, NewFn, NewSubs, NewArgs);
+  } else {
+    auto *OldPAI = cast<PartialApplyInst>(OldAS);
+    return replacePartialApplyInst(B, Loc, OldPAI, NewFn, NewSubs, NewArgs);
+  }
+}
+
+/// Delete an apply site that's been successfully devirtualized.
+void swift::deleteDevirtualizedApply(ApplySite Old) {
+  auto *OldApply = Old.getInstruction();
+  recursivelyDeleteTriviallyDeadInstructions(OldApply, true);
+}
+
 SILFunction *swift::getTargetClassMethod(SILModule &M,
                                          SILType ClassOrMetatypeType,
                                          MethodInst *MI) {
@@ -551,9 +685,9 @@
 /// \p ClassOrMetatype is a class value or metatype value that is the
 ///    self argument of the apply we will devirtualize.
 /// return the result value of the new ApplyInst if created one or null.
-DevirtualizationResult swift::devirtualizeClassMethod(FullApplySite AI,
-                                                      SILValue ClassOrMetatype,
-                                                      OptRemark::Emitter *ORE) {
+FullApplySite swift::devirtualizeClassMethod(FullApplySite AI,
+                                             SILValue ClassOrMetatype,
+                                             OptRemark::Emitter *ORE) {
   LLVM_DEBUG(llvm::dbgs() << "    Trying to devirtualize : "
                           << *AI.getInstruction());
 
@@ -574,7 +708,8 @@
   SILFunctionConventions substConv(SubstCalleeType, Mod);
 
   SILBuilderWithScope B(AI.getInstruction());
-  FunctionRefInst *FRI = B.createFunctionRef(AI.getLoc(), F);
+  SILLocation Loc = AI.getLoc();
+  FunctionRefInst *FRI = B.createFunctionRef(Loc, F);
 
   // Create the argument list for the new apply, casting when needed
   // in order to handle covariant indirect return types and
@@ -584,7 +719,7 @@
   auto IndirectResultArgIter = AI.getIndirectSILResults().begin();
   for (auto ResultTy : substConv.getIndirectSILResultTypes()) {
     NewArgs.push_back(
-        castValueToABICompatibleType(&B, AI.getLoc(), *IndirectResultArgIter,
+        castValueToABICompatibleType(&B, Loc, *IndirectResultArgIter,
                                      IndirectResultArgIter->getType(), ResultTy));
     ++IndirectResultArgIter;
   }
@@ -594,7 +729,7 @@
   for (auto param : substConv.getParameters().drop_back()) {
     auto paramType = substConv.getSILType(param);
     NewArgs.push_back(
-        castValueToABICompatibleType(&B, AI.getLoc(), *ParamArgIter,
+        castValueToABICompatibleType(&B, Loc, *ParamArgIter,
                                      ParamArgIter->getType(), paramType));
     ++ParamArgIter;
   }
@@ -602,80 +737,14 @@
   // Add the self argument, upcasting if required because we're
   // calling a base class's method.
   auto SelfParamTy = substConv.getSILType(SubstCalleeType->getSelfParameter());
-  NewArgs.push_back(castValueToABICompatibleType(&B, AI.getLoc(),
+  NewArgs.push_back(castValueToABICompatibleType(&B, Loc,
                                                  ClassOrMetatype,
                                                  ClassOrMetatypeType,
                                                  SelfParamTy));
 
-  SILType ResultTy = substConv.getSILResultType();
-
-  FullApplySite NewAI;
-
-  SILBasicBlock *ResultBB = nullptr;
-  SILBasicBlock *NormalBB = nullptr;
-  SILValue ResultValue;
-  bool ResultCastRequired = false;
-  SmallVector<Operand *, 4> OriginalResultUses;
-
-  if (!isa<TryApplyInst>(AI)) {
-    auto apply = B.createApply(AI.getLoc(), FRI, Subs, NewArgs,
-                               cast<ApplyInst>(AI)->isNonThrowing());
-    NewAI = apply;
-    ResultValue = apply;
-  } else {
-    auto *TAI = cast<TryApplyInst>(AI);
-    // Create new normal and error BBs only if:
-    // - re-using a BB would create a critical edge
-    // - or, the result of the new apply would be of different
-    //   type than the argument of the original normal BB.
-    if (TAI->getNormalBB()->getSinglePredecessorBlock())
-      ResultBB = TAI->getNormalBB();
-    else {
-      ResultBB = B.getFunction().createBasicBlock();
-      ResultBB->createPHIArgument(ResultTy, ValueOwnershipKind::Owned);
-    }
-
-    NormalBB = TAI->getNormalBB();
-
-    SILBasicBlock *ErrorBB = nullptr;
-    if (TAI->getErrorBB()->getSinglePredecessorBlock())
-      ErrorBB = TAI->getErrorBB();
-    else {
-      ErrorBB = B.getFunction().createBasicBlock();
-      ErrorBB->createPHIArgument(TAI->getErrorBB()->getArgument(0)->getType(),
-                                 ValueOwnershipKind::Owned);
-    }
-
-    NewAI = B.createTryApply(AI.getLoc(), FRI, Subs, NewArgs, ResultBB, ErrorBB);
-    if (ErrorBB != TAI->getErrorBB()) {
-      B.setInsertionPoint(ErrorBB);
-      B.createBranch(TAI->getLoc(), TAI->getErrorBB(),
-                     {ErrorBB->getArgument(0)});
-    }
-
-    // Does the result value need to be casted?
-    ResultCastRequired = ResultTy != NormalBB->getArgument(0)->getType();
-
-    if (ResultBB != NormalBB)
-      B.setInsertionPoint(ResultBB);
-    else if (ResultCastRequired) {
-      B.setInsertionPoint(NormalBB->begin());
-      // Collect all uses, before casting.
-      for (auto *Use : NormalBB->getArgument(0)->getUses()) {
-        OriginalResultUses.push_back(Use);
-      }
-      NormalBB->getArgument(0)->replaceAllUsesWith(
-          SILUndef::get(AI.getType(), Mod));
-      NormalBB->replacePHIArgument(0, ResultTy, ValueOwnershipKind::Owned);
-    }
-
-    // The result value is passed as a parameter to the normal block.
-    ResultValue = ResultBB->getArgument(0);
-  }
-
-  // Check if any casting is required for the return value.
-  ResultValue = castValueToABICompatibleType(&B, NewAI.getLoc(), ResultValue,
-                                             ResultTy, AI.getType());
+  ApplySite NewAS = replaceApplySite(B, Loc, AI, FRI, Subs, NewArgs, substConv);
+  FullApplySite NewAI = FullApplySite::isa(NewAS.getInstruction());
+  assert(NewAI);
 
   LLVM_DEBUG(llvm::dbgs() << "        SUCCESS: " << F->getName() << "\n");
   if (ORE)
@@ -686,34 +755,16 @@
       });
   NumClassDevirt++;
 
-  if (NormalBB) {
-    if (NormalBB != ResultBB) {
-      // If artificial normal BB was introduced, branch
-      // to the original normal BB.
-      B.createBranch(NewAI.getLoc(), NormalBB, { ResultValue });
-    } else if (ResultCastRequired) {
-      // Update all original uses by the new value.
-      for (auto *Use: OriginalResultUses) {
-        Use->set(ResultValue);
-      }
-    }
-  }
-
-  // We need to return a pair of values here:
-  // - the first one is the actual result of the devirtualized call, possibly
-  //   casted into an appropriate type. This SILValue may be a BB arg, if it
-  //   was a cast between optional types.
-  // - the second one is the new apply site.
-  return std::make_pair(ResultValue, NewAI);
+  return NewAI;
 }
 
-DevirtualizationResult
+FullApplySite
 swift::tryDevirtualizeClassMethod(FullApplySite AI, SILValue ClassInstance,
                                   OptRemark::Emitter *ORE,
                                   bool isEffectivelyFinalMethod) {
   if (!canDevirtualizeClassMethod(AI, ClassInstance->getType(), ORE,
                                   isEffectivelyFinalMethod))
-    return std::make_pair(nullptr, FullApplySite());
+    return FullApplySite();
   return devirtualizeClassMethod(AI, ClassInstance, ORE);
 }
 
@@ -855,7 +906,7 @@
 /// Generate a new apply of a function_ref to replace an apply of a
 /// witness_method when we've determined the actual function we'll end
 /// up calling.
-static DevirtualizationResult
+static ApplySite
 devirtualizeWitnessMethod(ApplySite AI, SILFunction *F,
                           ProtocolConformanceRef C, OptRemark::Emitter *ORE) {
   // We know the witness thunk and the corresponding set of substitutions
@@ -897,30 +948,8 @@
   SILLocation Loc = AI.getLoc();
   FunctionRefInst *FRI = Builder.createFunctionRef(Loc, F);
 
-  ApplySite SAI;
-
-  SILValue ResultValue;
-  if (auto *A = dyn_cast<ApplyInst>(AI)) {
-    auto *NewAI = Builder.createApply(Loc, FRI, SubMap, Arguments,
-                                      A->isNonThrowing());
-    // Check if any casting is required for the return value.
-    ResultValue = castValueToABICompatibleType(&Builder, Loc, NewAI,
-                                               NewAI->getType(), AI.getType());
-    SAI = NewAI;
-  }
-  if (auto *TAI = dyn_cast<TryApplyInst>(AI))
-    SAI = Builder.createTryApply(Loc, FRI, SubMap, Arguments,
-                                 TAI->getNormalBB(), TAI->getErrorBB());
-  if (auto *PAI = dyn_cast<PartialApplyInst>(AI)) {
-    auto PartialApplyConvention = PAI->getType().getAs<SILFunctionType>()
-                                      ->getCalleeConvention();
-    auto *NewPAI = Builder.createPartialApply(
-        Loc, FRI, SubMap, Arguments, PartialApplyConvention);
-    // Check if any casting is required for the return value.
-    ResultValue = castValueToABICompatibleType(
-        &Builder, Loc, NewPAI, NewPAI->getType(), PAI->getType());
-    SAI = NewPAI;
-  }
+  ApplySite SAI = replaceApplySite(Builder, Loc, AI, FRI, SubMap, Arguments,
+                                   substConv);
 
   if (ORE)
     ORE->emit([&]() {
@@ -929,7 +958,7 @@
                << "Devirtualized call to " << NV("Method", F);
       });
   NumWitnessDevirt++;
-  return std::make_pair(ResultValue, SAI);
+  return SAI;
 }
 
 static bool canDevirtualizeWitnessMethod(ApplySite AI) {
@@ -958,10 +987,10 @@
 /// In the cases where we can statically determine the function that
 /// we'll call to, replace an apply of a witness_method with an apply
 /// of a function_ref, returning the new apply.
-DevirtualizationResult
+ApplySite
 swift::tryDevirtualizeWitnessMethod(ApplySite AI, OptRemark::Emitter *ORE) {
   if (!canDevirtualizeWitnessMethod(AI))
-    return std::make_pair(nullptr, FullApplySite());
+    return ApplySite();
 
   SILFunction *F;
   SILWitnessTable *WT;
@@ -981,9 +1010,9 @@
 
 /// Attempt to devirtualize the given apply if possible, and return a
 /// new instruction in that case, or nullptr otherwise.
-DevirtualizationResult swift::tryDevirtualizeApply(ApplySite AI,
-                                                   ClassHierarchyAnalysis *CHA,
-                                                   OptRemark::Emitter *ORE) {
+ApplySite swift::tryDevirtualizeApply(ApplySite AI,
+                                      ClassHierarchyAnalysis *CHA,
+                                      OptRemark::Emitter *ORE) {
   LLVM_DEBUG(llvm::dbgs() << "    Trying to devirtualize: "
                           << *AI.getInstruction());
 
@@ -998,7 +1027,7 @@
   // TODO: check if we can also de-virtualize partial applies of class methods.
   FullApplySite FAS = FullApplySite::isa(AI.getInstruction());
   if (!FAS)
-    return std::make_pair(nullptr, ApplySite());
+    return ApplySite();
 
   /// Optimize a class_method and alloc_ref pair into a direct function
   /// reference:
@@ -1053,7 +1082,7 @@
     return tryDevirtualizeClassMethod(FAS, FAS.getArguments().back(), ORE);
   }
 
-  return std::make_pair(nullptr, ApplySite());
+  return ApplySite();
 }
 
 bool swift::canDevirtualizeApply(FullApplySite AI, ClassHierarchyAnalysis *CHA) {
diff --git a/lib/SILOptimizer/Utils/Local.cpp b/lib/SILOptimizer/Utils/Local.cpp
index b01b514..cfda0a9 100644
--- a/lib/SILOptimizer/Utils/Local.cpp
+++ b/lib/SILOptimizer/Utils/Local.cpp
@@ -313,15 +313,6 @@
   return FullApplySite();
 }
 
-// Replace a dead apply with a new instruction that computes the same
-// value, and delete the old apply.
-void swift::replaceDeadApply(ApplySite Old, ValueBase *New) {
-  auto *OldApply = Old.getInstruction();
-  if (!isa<TryApplyInst>(OldApply))
-    cast<SingleValueInstruction>(OldApply)->replaceAllUsesWith(New);
-  recursivelyDeleteTriviallyDeadInstructions(OldApply, true);
-}
-
 bool swift::mayBindDynamicSelf(SILFunction *F) {
   if (!F->hasSelfMetadataParam())
     return false;
diff --git a/lib/SILOptimizer/Utils/LoopUtils.cpp b/lib/SILOptimizer/Utils/LoopUtils.cpp
index 82cdc89..545ba44 100644
--- a/lib/SILOptimizer/Utils/LoopUtils.cpp
+++ b/lib/SILOptimizer/Utils/LoopUtils.cpp
@@ -25,7 +25,7 @@
 
 static SILBasicBlock *createInitialPreheader(SILBasicBlock *Header) {
   auto *Preheader =
-      Header->getParent()->createBasicBlock(&*std::prev(Header->getIterator()));
+      Header->getParent()->createBasicBlockBefore(Header);
 
   // Clone the arguments from header into the pre-header.
   llvm::SmallVector<SILValue, 8> Args;
@@ -117,7 +117,7 @@
   }
 
   // Create and insert the new backedge block...
-  SILBasicBlock *BEBlock = F->createBasicBlock(BackedgeBlocks.back());
+  SILBasicBlock *BEBlock = F->createBasicBlockAfter(BackedgeBlocks.back());
 
   LLVM_DEBUG(llvm::dbgs() << "  Inserting unique backedge block " << *BEBlock
                           << "\n");
diff --git a/lib/Serialization/DeserializeSIL.cpp b/lib/Serialization/DeserializeSIL.cpp
index 09bd326..db579ca 100644
--- a/lib/Serialization/DeserializeSIL.cpp
+++ b/lib/Serialization/DeserializeSIL.cpp
@@ -296,8 +296,14 @@
                                                    unsigned ID) {
   SILBasicBlock *&BB = BlocksByID[ID];
   // If the block has never been named yet, just create it.
-  if (BB == nullptr)
-    return BB = Fn->createBasicBlock(Prev);
+  if (BB == nullptr) {
+    if (Prev) {
+      BB = Fn->createBasicBlockAfter(Prev);      
+    } else {
+      BB = Fn->createBasicBlock();
+    }
+    return BB;
+  }
 
   // If it already exists, it was either a forward reference or a redefinition.
   // The latter should never happen.
