lib/SILOptimizer/IPO/PerformanceInliner.cpp - third_party/swift - Git at Google

 //===- PerformanceInliner.cpp - Basic cost based inlining for performance -===//
 //
 // This source file is part of the Swift.org open source project
 //
 // Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See http://swift.org/LICENSE.txt for license information
 // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 //
 //===----------------------------------------------------------------------===//

 #define DEBUG_TYPE "sil-inliner"
 #include "swift/SIL/SILInstruction.h"
 #include "swift/SIL/Dominance.h"
 #include "swift/SIL/SILModule.h"
 #include "swift/SIL/Projection.h"
 #include "swift/SILOptimizer/Analysis/BasicCalleeAnalysis.h"
 #include "swift/SILOptimizer/Analysis/ColdBlockInfo.h"
 #include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
 #include "swift/SILOptimizer/Analysis/FunctionOrder.h"
 #include "swift/SILOptimizer/Analysis/LoopAnalysis.h"
 #include "swift/SILOptimizer/PassManager/Passes.h"
 #include "swift/SILOptimizer/PassManager/Transforms.h"
 #include "swift/SILOptimizer/Utils/Local.h"
 #include "swift/SILOptimizer/Utils/ConstantFolding.h"
 #include "swift/SILOptimizer/Utils/Devirtualize.h"
 #include "swift/SILOptimizer/Utils/Generics.h"
 #include "swift/SILOptimizer/Utils/SILInliner.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/ADT/MapVector.h"
 #include <functional>


 using namespace swift;

 STATISTIC(NumFunctionsInlined, "Number of functions inlined");

 namespace {

   // Threshold for deterministic testing of the inline heuristic.
   // It specifies an instruction cost limit where a simplified model is used
   // for the instruction costs: only builtin instructions have a cost of exactly
   // 1.
   llvm::cl::opt<int> TestThreshold("sil-inline-test-threshold",
                                         llvm::cl::init(-1), llvm::cl::Hidden);

   llvm::cl::opt<int> TestOpt("sil-inline-test",
                                    llvm::cl::init(0), llvm::cl::Hidden);

   // The following constants define the cost model for inlining.

   // The base value for every call: it represents the benefit of removing the
   // call overhead.
   // This value can be overridden with the -sil-inline-threshold option.
   const unsigned RemovedCallBenefit = 80;

   // The benefit if the condition of a terminator instruction gets constant due
   // to inlining.
   const unsigned ConstTerminatorBenefit = 2;

   // Benefit if the operand of an apply gets constant, e.g. if a closure is
   // passed to an apply instruction in the callee.
   const unsigned ConstCalleeBenefit = 150;

   // Additional benefit for each loop level.
   const unsigned LoopBenefitFactor = 40;

   // Approximately up to this cost level a function can be inlined without
   // increasing the code size.
   const unsigned TrivialFunctionThreshold = 20;

   // Represents a value in integer constant evaluation.
   struct IntConst {
     IntConst() : isValid(false), isFromCaller(false) { }

     IntConst(const APInt &value, bool isFromCaller) :
     value(value), isValid(true), isFromCaller(isFromCaller) { }

     // The actual value.
     APInt value;

     // True if the value is valid, i.e. could be evaluated to a constant.
     bool isValid;

     // True if the value is only valid, because a constant is passed to the
     // callee. False if constant propagation could do the same job inside the
     // callee without inlining it.
     bool isFromCaller;
   };

   // Tracks constants in the caller and callee to get an estimation of what
   // values get constant if the callee is inlined.
   // This can be seen as a "simulation" of several optimizations: SROA, mem2reg
   // and constant propagation.
   // Note that this is only a simplified model and not correct in all cases.
   // For example aliasing information is not taken into account.
   class ConstantTracker {
     // Links between loaded and stored values.
     // The key is a load instruction, the value is the corresponding store
     // instruction which stores the loaded value. Both, key and value can also
     // be copy_addr instructions.
     llvm::DenseMap<SILInstruction *, SILInstruction *> links;

     // The current stored values at memory addresses.
     // The key is the base address of the memory (after skipping address
     // projections). The value are store (or copy_addr) instructions, which
     // store the current value.
     // This is only an estimation, because e.g. it does not consider potential
     // aliasing.
     llvm::DenseMap<SILValue, SILInstruction *> memoryContent;

     // Cache for evaluated constants.
     llvm::SmallDenseMap<BuiltinInst *, IntConst> constCache;

     // The caller/callee function which is tracked.
     SILFunction *F;

     // The constant tracker of the caller function (null if this is the
     // tracker of the callee).
     ConstantTracker *callerTracker;

     // The apply instruction in the caller (null if this is the tracker of the
     // callee).
     FullApplySite AI;

     // Walks through address projections and (optionally) collects them.
     // Returns the base address, i.e. the first address which is not a
     // projection.
     SILValue scanProjections(SILValue addr,
                              SmallVectorImpl<Projection> *Result = nullptr);

     // Get the stored value for a load. The loadInst can be either a real load
     // or a copy_addr.
     SILValue getStoredValue(SILInstruction *loadInst,
                             ProjectionPath &projStack);

     // Gets the parameter in the caller for a function argument.
     SILValue getParam(SILValue value) {
       if (SILArgument *arg = dyn_cast<SILArgument>(value)) {
         if (AI && arg->isFunctionArg() && arg->getFunction() == F) {
           // Continue at the caller.
           return AI.getArgument(arg->getIndex());
         }
       }
       return SILValue();
     }

     SILInstruction *getMemoryContent(SILValue addr) {
       // The memory content can be stored in this ConstantTracker or in the
       // caller's ConstantTracker.
       SILInstruction *storeInst = memoryContent[addr];
       if (storeInst)
         return storeInst;
       if (callerTracker)
         return callerTracker->getMemoryContent(addr);
       return nullptr;
     }

     // Gets the estimated definition of a value.
     SILInstruction *getDef(SILValue val, ProjectionPath &projStack);

     // Gets the estimated integer constant result of a builtin.
     IntConst getBuiltinConst(BuiltinInst *BI, int depth);

   public:

     // Constructor for the caller function.
     ConstantTracker(SILFunction *function) :
       F(function), callerTracker(nullptr), AI()
     { }

     // Constructor for the callee function.
     ConstantTracker(SILFunction *function, ConstantTracker *caller,
                     FullApplySite callerApply) :
        F(function), callerTracker(caller), AI(callerApply)
     { }

     void beginBlock() {
       // Currently we don't do any sophisticated dataflow analysis, so we keep
       // the memoryContent alive only for a single block.
       memoryContent.clear();
     }

     // Must be called for each instruction visited in dominance order.
     void trackInst(SILInstruction *inst);

     // Gets the estimated definition of a value.
     SILInstruction *getDef(SILValue val) {
       ProjectionPath projStack;
       return getDef(val, projStack);
     }

     // Gets the estimated definition of a value if it is in the caller.
     SILInstruction *getDefInCaller(SILValue val) {
       SILInstruction *def = getDef(val);
       if (def && def->getFunction() != F)
         return def;
       return nullptr;
     }

     // Gets the estimated integer constant of a value.
     IntConst getIntConst(SILValue val, int depth = 0);
   };

   // Controls the decision to inline functions with @_semantics, @effect and
   // global_init attributes.
   enum class InlineSelection {
     Everything,
     NoGlobalInit, // and no availability semantics calls
     NoSemanticsAndGlobalInit
   };

   class SILPerformanceInliner {
     /// The inline threshold.
     const int InlineCostThreshold;
     /// Specifies which functions not to inline, based on @_semantics and
     /// global_init attributes.
     InlineSelection WhatToInline;


     /// A set of pairs of function names. This set records a successful
     /// inlining operations and is used to prevent infinite inlining of
     /// recursive functions. The pair (A, B) records inlining of function
     /// B into A.
     llvm::DenseSet<std::pair<StringRef, StringRef>> InlinedFunctions;

     SILFunction *getEligibleFunction(FullApplySite AI);

     bool isProfitableToInline(FullApplySite AI, unsigned loopDepthOfAI,
                               DominanceAnalysis *DA,
                               SILLoopAnalysis *LA,
                               ConstantTracker &constTracker);

     void visitColdBlocks(SmallVectorImpl<FullApplySite> &AppliesToInline,
                          SILBasicBlock *root, DominanceInfo *DT);

     void collectAppliesToInline(SILFunction *Caller,
                                 SmallVectorImpl<FullApplySite> &Applies,
                                 DominanceAnalysis *DA, SILLoopAnalysis *LA);

     /// This method is responsible for breaking recursive cycles
     /// in the inliner (some of which are only exposed via devirtualization).
     /// \return true if the function \p Callee was previously inlined into
     /// \p Caller and the inliner needs to reject this inlining request.
     bool hasInliningCycle(SILFunction *Caller, SILFunction *Callee);

     FullApplySite devirtualize(FullApplySite Apply,
                                ClassHierarchyAnalysis *CHA);

     bool devirtualizeAndSpecializeApplies(
         llvm::SmallVectorImpl<ApplySite> &Applies,
         SILModuleTransform *MT,
         ClassHierarchyAnalysis *CHA,
         llvm::SmallVectorImpl<SILFunction *> &WorkList);

     ApplySite specializeGeneric(ApplySite Apply,
                                 llvm::SmallVectorImpl<ApplySite> &NewApplies);

     bool
     inlineCallsIntoFunction(SILFunction *F, DominanceAnalysis *DA,
                             SILLoopAnalysis *LA,
                             llvm::SmallVectorImpl<FullApplySite> &NewApplies);

   public:
     SILPerformanceInliner(int threshold,
                           InlineSelection WhatToInline)
       : InlineCostThreshold(threshold),
     WhatToInline(WhatToInline) {}

     void inlineDevirtualizeAndSpecialize(SILFunction *WorkItem,
                                          SILModuleTransform *MT,
                                          DominanceAnalysis *DA,
                                          SILLoopAnalysis *LA,
                                          ClassHierarchyAnalysis *CHA);
   };
 }

 //===----------------------------------------------------------------------===//
 //                               ConstantTracker
 //===----------------------------------------------------------------------===//


 void ConstantTracker::trackInst(SILInstruction *inst) {
   if (auto *LI = dyn_cast<LoadInst>(inst)) {
     SILValue baseAddr = scanProjections(LI->getOperand());
     if (SILInstruction *loadLink = getMemoryContent(baseAddr))
        links[LI] = loadLink;
   } else if (StoreInst *SI = dyn_cast<StoreInst>(inst)) {
     SILValue baseAddr = scanProjections(SI->getOperand(1));
     memoryContent[baseAddr] = SI;
   } else if (CopyAddrInst *CAI = dyn_cast<CopyAddrInst>(inst)) {
     if (!CAI->isTakeOfSrc()) {
       // Treat a copy_addr as a load + store
       SILValue loadAddr = scanProjections(CAI->getOperand(0));
       if (SILInstruction *loadLink = getMemoryContent(loadAddr)) {
         links[CAI] = loadLink;
         SILValue storeAddr = scanProjections(CAI->getOperand(1));
         memoryContent[storeAddr] = CAI;
       }
     }
   }
 }

 SILValue ConstantTracker::scanProjections(SILValue addr,
                                           SmallVectorImpl<Projection> *Result) {
   for (;;) {
     if (Projection::isAddrProjection(addr)) {
       SILInstruction *I = cast<SILInstruction>(addr.getDef());
       if (Result) {
         Optional<Projection> P = Projection::addressProjectionForInstruction(I);
         Result->push_back(P.getValue());
       }
       addr = I->getOperand(0);
       continue;
     }
     if (SILValue param = getParam(addr)) {
       // Go to the caller.
       addr = param;
       continue;
     }
     // Return the base address = the first address which is not a projection.
     return addr;
   }
 }

 SILValue ConstantTracker::getStoredValue(SILInstruction *loadInst,
                                   ProjectionPath &projStack) {
   SILInstruction *store = links[loadInst];
   if (!store && callerTracker)
     store = callerTracker->links[loadInst];
   if (!store) return SILValue();

   assert(isa<LoadInst>(loadInst) || isa<CopyAddrInst>(loadInst));

   // Push the address projections of the load onto the stack.
   SmallVector<Projection, 4> loadProjections;
   scanProjections(loadInst->getOperand(0), &loadProjections);
   for (const Projection &proj : loadProjections) {
     projStack.push_back(proj);
   }

   //  Pop the address projections of the store from the stack.
   SmallVector<Projection, 4> storeProjections;
   scanProjections(store->getOperand(1), &storeProjections);
   for (auto iter = storeProjections.rbegin(); iter != storeProjections.rend();
        ++iter) {
     const Projection &proj = *iter;
     // The corresponding load-projection must match the store-projection.
     if (projStack.empty() || projStack.back() != proj)
       return SILValue();
     projStack.pop_back();
   }

   if (isa<StoreInst>(store))
     return store->getOperand(0);

   // The copy_addr instruction is both a load and a store. So we follow the link
   // again.
   assert(isa<CopyAddrInst>(store));
   return getStoredValue(store, projStack);
 }

 // Get the aggregate member based on the top of the projection stack.
 static SILValue getMember(SILInstruction *inst, ProjectionPath &projStack) {
   if (!projStack.empty()) {
     const Projection &proj = projStack.back();
     return proj.getOperandForAggregate(inst);
   }
   return SILValue();
 }

 SILInstruction *ConstantTracker::getDef(SILValue val,
                                           ProjectionPath &projStack) {

   // Track the value up the dominator tree.
   for (;;) {
     if (SILInstruction *inst = dyn_cast<SILInstruction>(val)) {
       if (auto proj = Projection::valueProjectionForInstruction(inst)) {
         // Extract a member from a struct/tuple/enum.
         projStack.push_back(proj.getValue());
         val = inst->getOperand(0);
         continue;
       } else if (SILValue member = getMember(inst, projStack)) {
         // The opposite of a projection instruction: composing a struct/tuple.
         projStack.pop_back();
         val = member;
         continue;
       } else if (SILValue loadedVal = getStoredValue(inst, projStack)) {
         // A value loaded from memory.
         val = loadedVal;
         continue;
       } else if (isa<ThinToThickFunctionInst>(inst)) {
         val = inst->getOperand(0);
         continue;
       }
       return inst;
     } else if (SILValue param = getParam(val)) {
       // Continue in the caller.
       val = param;
       continue;
     }
     return nullptr;
   }
 }

 IntConst ConstantTracker::getBuiltinConst(BuiltinInst *BI, int depth) {
   const BuiltinInfo &Builtin = BI->getBuiltinInfo();
   OperandValueArrayRef Args = BI->getArguments();
   switch (Builtin.ID) {
     default: break;

       // Fold comparison predicates.
 #define BUILTIN(id, name, Attrs)
 #define BUILTIN_BINARY_PREDICATE(id, name, attrs, overload) \
 case BuiltinValueKind::id:
 #include "swift/AST/Builtins.def"
     {
       IntConst lhs = getIntConst(Args[0], depth);
       IntConst rhs = getIntConst(Args[1], depth);
       if (lhs.isValid && rhs.isValid) {
         return IntConst(constantFoldComparison(lhs.value, rhs.value,
                                               Builtin.ID),
                         lhs.isFromCaller || rhs.isFromCaller);
       }
       break;
     }


     case BuiltinValueKind::SAddOver:
     case BuiltinValueKind::UAddOver:
     case BuiltinValueKind::SSubOver:
     case BuiltinValueKind::USubOver:
     case BuiltinValueKind::SMulOver:
     case BuiltinValueKind::UMulOver: {
       IntConst lhs = getIntConst(Args[0], depth);
       IntConst rhs = getIntConst(Args[1], depth);
       if (lhs.isValid && rhs.isValid) {
         bool IgnoredOverflow;
         return IntConst(constantFoldBinaryWithOverflow(lhs.value, rhs.value,
                         IgnoredOverflow,
                         getLLVMIntrinsicIDForBuiltinWithOverflow(Builtin.ID)),
                           lhs.isFromCaller || rhs.isFromCaller);
       }
       break;
     }

     case BuiltinValueKind::SDiv:
     case BuiltinValueKind::SRem:
     case BuiltinValueKind::UDiv:
     case BuiltinValueKind::URem: {
       IntConst lhs = getIntConst(Args[0], depth);
       IntConst rhs = getIntConst(Args[1], depth);
       if (lhs.isValid && rhs.isValid && rhs.value != 0) {
         bool IgnoredOverflow;
         return IntConst(constantFoldDiv(lhs.value, rhs.value,
                                         IgnoredOverflow, Builtin.ID),
                         lhs.isFromCaller || rhs.isFromCaller);
       }
       break;
     }

     case BuiltinValueKind::And:
     case BuiltinValueKind::AShr:
     case BuiltinValueKind::LShr:
     case BuiltinValueKind::Or:
     case BuiltinValueKind::Shl:
     case BuiltinValueKind::Xor: {
       IntConst lhs = getIntConst(Args[0], depth);
       IntConst rhs = getIntConst(Args[1], depth);
       if (lhs.isValid && rhs.isValid) {
         return IntConst(constantFoldBitOperation(lhs.value, rhs.value,
                                                  Builtin.ID),
                         lhs.isFromCaller || rhs.isFromCaller);
       }
       break;
     }

     case BuiltinValueKind::Trunc:
     case BuiltinValueKind::ZExt:
     case BuiltinValueKind::SExt:
     case BuiltinValueKind::TruncOrBitCast:
     case BuiltinValueKind::ZExtOrBitCast:
     case BuiltinValueKind::SExtOrBitCast: {
       IntConst val = getIntConst(Args[0], depth);
       if (val.isValid) {
         return IntConst(constantFoldCast(val.value, Builtin), val.isFromCaller);
       }
       break;
     }
   }
   return IntConst();
 }

 // Tries to evaluate the integer constant of a value. The \p depth is used
 // to limit the complexity.
 IntConst ConstantTracker::getIntConst(SILValue val, int depth) {

   // Don't spend too much time with constant evaluation.
   if (depth >= 10)
     return IntConst();

   SILInstruction *I = getDef(val);
   if (!I)
     return IntConst();

   if (auto *IL = dyn_cast<IntegerLiteralInst>(I)) {
     return IntConst(IL->getValue(), IL->getFunction() != F);
   }
   if (auto *BI = dyn_cast<BuiltinInst>(I)) {
     if (constCache.count(BI) != 0)
       return constCache[BI];

     IntConst builtinConst = getBuiltinConst(BI, depth + 1);
     constCache[BI] = builtinConst;
     return builtinConst;
   }
   return IntConst();
 }

 //===----------------------------------------------------------------------===//
 //                           Performance Inliner
 //===----------------------------------------------------------------------===//

 bool SILPerformanceInliner::hasInliningCycle(SILFunction *Caller,
                                                 SILFunction *Callee) {
   // Reject simple recursions.
   if (Caller == Callee) return true;

   StringRef CallerName = Caller->getName();
   StringRef CalleeName = Callee->getName();

   bool InlinedBefore =
       InlinedFunctions.count(std::make_pair(CallerName, CalleeName));

   // If the Callee was inlined into the Caller in previous inlining iterations
   // then
   // we need to reject this inlining request to prevent a cycle.
   return InlinedBefore;
 }

 // Return true if the callee does few (or no) self-recursive calls.
 static bool calleeHasMinimalSelfRecursion(SILFunction *Callee) {
   int countSelfRecursiveCalls = 0;

   for (auto &BB : *Callee) {
     for (auto &I : BB) {
       if (auto Apply = FullApplySite::isa(&I)) {
         if (Apply.getCalleeFunction() == Callee)
           ++countSelfRecursiveCalls;

         if (countSelfRecursiveCalls > 2)
           return true;
       }
     }
   }

   return false;
 }

 // Returns the callee of an apply_inst if it is basically inlinable.
 SILFunction *SILPerformanceInliner::getEligibleFunction(FullApplySite AI) {

   SILFunction *Callee = AI.getCalleeFunction();

   if (!Callee) {
     DEBUG(llvm::dbgs() << "        FAIL: Cannot find inlineable callee.\n");
     return nullptr;
   }

   // Don't inline functions that are marked with the @_semantics or @effects
   // attribute if the inliner is asked not to inline them.
   if (Callee->hasSemanticsAttrs() || Callee->hasEffectsKind()) {
     if (WhatToInline == InlineSelection::NoSemanticsAndGlobalInit) {
       DEBUG(llvm::dbgs() << "        FAIL: Function " << Callee->getName()
             << " has special semantics or effects attribute.\n");
       return nullptr;
     }
     // The "availability" semantics attribute is treated like global-init.
     if (Callee->hasSemanticsAttrs() &&
         WhatToInline != InlineSelection::Everything &&
         Callee->hasSemanticsAttrsThatStartsWith("availability")) {
       return nullptr;
     }
   } else if (Callee->isGlobalInit()) {
     if (WhatToInline != InlineSelection::Everything) {
       DEBUG(llvm::dbgs() << "        FAIL: Function " << Callee->getName()
             << " has the global-init attribute.\n");
       return nullptr;
     }
   }

   // We can't inline external declarations.
   if (Callee->empty() || Callee->isExternalDeclaration()) {
     DEBUG(llvm::dbgs() << "        FAIL: Cannot inline external " <<
           Callee->getName() << ".\n");
     return nullptr;
   }

   // Explicitly disabled inlining.
   if (Callee->getInlineStrategy() == NoInline) {
     DEBUG(llvm::dbgs() << "        FAIL: noinline attribute on " <<
           Callee->getName() << ".\n");
     return nullptr;
   }

   if (!Callee->shouldOptimize()) {
     DEBUG(llvm::dbgs() << "        FAIL: optimizations disabled on " <<
           Callee->getName() << ".\n");
     return nullptr;
   }

   // We don't support this yet.
   if (AI.hasSubstitutions()) {
     DEBUG(llvm::dbgs() << "        FAIL: Generic substitutions on " <<
           Callee->getName() << ".\n");
     return nullptr;
   }

   // We don't support inlining a function that binds dynamic self because we
   // have no mechanism to preserve the original function's local self metadata.
   if (computeMayBindDynamicSelf(Callee)) {
     DEBUG(llvm::dbgs() << "        FAIL: Binding dynamic Self in " <<
           Callee->getName() << ".\n");
     return nullptr;
   }

   SILFunction *Caller = AI.getFunction();

   // Detect inlining cycles.
   if (hasInliningCycle(Caller, Callee)) {
     DEBUG(llvm::dbgs() << "        FAIL: Detected a recursion inlining " <<
           Callee->getName() << ".\n");
     return nullptr;
   }

   // A non-fragile function may not be inlined into a fragile function.
   if (Caller->isFragile() && !Callee->isFragile()) {
     DEBUG(llvm::dbgs() << "        FAIL: Can't inline fragile " <<
           Callee->getName() << ".\n");
     return nullptr;
   }

   // Inlining self-recursive functions into other functions can result
   // in excessive code duplication since we run the inliner multiple
   // times in our pipeline, so we only do it for callees with few
   // self-recursive calls.
   if (calleeHasMinimalSelfRecursion(Callee)) {
     DEBUG(llvm::dbgs() << "        FAIL: Callee is self-recursive in "
                        << Callee->getName() << ".\n");
     return nullptr;
   }

   DEBUG(llvm::dbgs() << "        Eligible callee: " <<
         Callee->getName() << "\n");

   return Callee;
 }

 // Gets the cost of an instruction by using the simplified test-model: only
 // builtin instructions have a cost and that's exactly 1.
 static unsigned testCost(SILInstruction *I) {
   switch (I->getKind()) {
     case ValueKind::BuiltinInst:
       return 1;
     default:
       return 0;
   }
 }

 // Returns the taken block of a terminator instruction if the condition turns
 // out to be constant.
 static SILBasicBlock *getTakenBlock(TermInst *term,
                                     ConstantTracker &constTracker) {
   if (CondBranchInst *CBI = dyn_cast<CondBranchInst>(term)) {
     IntConst condConst = constTracker.getIntConst(CBI->getCondition());
     if (condConst.isFromCaller) {
       return condConst.value != 0 ? CBI->getTrueBB() : CBI->getFalseBB();
     }
     return nullptr;
   }
   if (SwitchValueInst *SVI = dyn_cast<SwitchValueInst>(term)) {
     IntConst switchConst = constTracker.getIntConst(SVI->getOperand());
     if (switchConst.isFromCaller) {
       for (unsigned Idx = 0; Idx < SVI->getNumCases(); ++Idx) {
         auto switchCase = SVI->getCase(Idx);
         if (auto *IL = dyn_cast<IntegerLiteralInst>(switchCase.first)) {
           if (switchConst.value == IL->getValue())
             return switchCase.second;
         } else {
           return nullptr;
         }
       }
       if (SVI->hasDefault())
           return SVI->getDefaultBB();
     }
     return nullptr;
   }
   if (SwitchEnumInst *SEI = dyn_cast<SwitchEnumInst>(term)) {
     if (SILInstruction *def = constTracker.getDefInCaller(SEI->getOperand())) {
       if (EnumInst *EI = dyn_cast<EnumInst>(def)) {
         for (unsigned Idx = 0; Idx < SEI->getNumCases(); ++Idx) {
           auto enumCase = SEI->getCase(Idx);
           if (enumCase.first == EI->getElement())
             return enumCase.second;
         }
         if (SEI->hasDefault())
           return SEI->getDefaultBB();
       }
     }
     return nullptr;
   }
   if (CheckedCastBranchInst *CCB = dyn_cast<CheckedCastBranchInst>(term)) {
     if (SILInstruction *def = constTracker.getDefInCaller(CCB->getOperand())) {
       if (UpcastInst *UCI = dyn_cast<UpcastInst>(def)) {
         SILType castType = UCI->getOperand()->getType(0);
         if (CCB->getCastType().isSuperclassOf(castType)) {
           return CCB->getSuccessBB();
         }
         if (!castType.isSuperclassOf(CCB->getCastType())) {
           return CCB->getFailureBB();
         }
       }
     }
   }
   return nullptr;
 }

 /// Return true if inlining this call site is profitable.
 bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI,
                                               unsigned loopDepthOfAI,
                                               DominanceAnalysis *DA,
                                               SILLoopAnalysis *LA,
                                               ConstantTracker &callerTracker) {
   SILFunction *Callee = AI.getCalleeFunction();

   if (Callee->getInlineStrategy() == AlwaysInline)
     return true;

   ConstantTracker constTracker(Callee, &callerTracker, AI);

   DominanceInfo *DT = DA->get(Callee);
   SILLoopInfo *LI = LA->get(Callee);

   DominanceOrder domOrder(&Callee->front(), DT, Callee->size());

   // Calculate the inlining cost of the callee.
   unsigned CalleeCost = 0;
   unsigned Benefit = InlineCostThreshold > 0 ? InlineCostThreshold :
                                                RemovedCallBenefit;
   Benefit += loopDepthOfAI * LoopBenefitFactor;
   int testThreshold = TestThreshold;

   while (SILBasicBlock *block = domOrder.getNext()) {
     constTracker.beginBlock();
     unsigned loopDepth = LI->getLoopDepth(block);
     for (SILInstruction &I : *block) {
       constTracker.trackInst(&I);

       auto ICost = instructionInlineCost(I);

       if (testThreshold >= 0) {
         // We are in test-mode: use a simplified cost model.
         CalleeCost += testCost(&I);
       } else {
         // Use the regular cost model.
         CalleeCost += unsigned(ICost);
       }

       if (ApplyInst *AI = dyn_cast<ApplyInst>(&I)) {

         // Check if the callee is passed as an argument. If so, increase the
         // threshold, because inlining will (probably) eliminate the closure.
         SILInstruction *def = constTracker.getDefInCaller(AI->getCallee());
         if (def && (isa<FunctionRefInst>(def) || isa<PartialApplyInst>(def))) {

           DEBUG(llvm::dbgs() << "        Boost: apply const function at"
                              << *AI);
           Benefit += ConstCalleeBenefit + loopDepth * LoopBenefitFactor;
           testThreshold *= 2;
         }
       }
     }
     // Don't count costs in blocks which are dead after inlining.
     SILBasicBlock *takenBlock = getTakenBlock(block->getTerminator(),
                                               constTracker);
     if (takenBlock) {
       Benefit += ConstTerminatorBenefit + TestOpt;
       DEBUG(llvm::dbgs() << "      Take bb" << takenBlock->getDebugID() <<
             " of" << *block->getTerminator());
       domOrder.pushChildrenIf(block, [=] (SILBasicBlock *child) {
         return child->getSinglePredecessor() != block || child == takenBlock;
       });
     } else {
       domOrder.pushChildren(block);
     }
   }

   unsigned Threshold = Benefit; // The default.
   if (testThreshold >= 0) {
     // We are in testing mode.
     Threshold = testThreshold;
   } else if (AI.getFunction()->isThunk()) {
     // Only inline trivial functions into thunks (which will not increase the
     // code size).
     Threshold = TrivialFunctionThreshold;
   }

   if (CalleeCost > Threshold) {
     DEBUG(llvm::dbgs() << "        NO: Function too big to inline, "
           "cost: " << CalleeCost << ", threshold: " << Threshold << "\n");
     return false;
   }
   DEBUG(llvm::dbgs() << "        YES: ready to inline, "
         "cost: " << CalleeCost << ", threshold: " << Threshold << "\n");
   return true;
 }

 /// Return true if inlining this call site into a cold block is profitable.
 static bool isProfitableInColdBlock(SILFunction *Callee) {
   if (Callee->getInlineStrategy() == AlwaysInline)
     return true;

   // Testing with the TestThreshold disables inlining into cold blocks.
   if (TestThreshold >= 0)
     return false;

   unsigned CalleeCost = 0;

   for (SILBasicBlock &Block : *Callee) {
     for (SILInstruction &I : Block) {
       auto ICost = instructionInlineCost(I);
       CalleeCost += (unsigned)ICost;

       if (CalleeCost > TrivialFunctionThreshold)
         return false;
     }
   }

   DEBUG(llvm::dbgs() << "        YES: ready to inline into cold block, cost:"
         << CalleeCost << "\n");
   return true;
 }

 /// FIXME: Total hack to work around issues exposed while ripping call
 ///        graph maintenance from the inliner.
 static void tryLinkCallee(FullApplySite Apply) {
   auto *F = Apply.getCalleeFunction();
   if (!F || F->isDefinition()) return;

   auto &M = Apply.getFunction()->getModule();
   M.linkFunction(F, SILModule::LinkingMode::LinkAll);
 }

 // Attempt to devirtualize. When successful, replaces the old apply
 // with the new one and returns the new one. When unsuccessful returns
 // an empty apply site.
 FullApplySite SILPerformanceInliner::devirtualize(FullApplySite Apply,
                                                   ClassHierarchyAnalysis *CHA) {

   auto NewInstPair = tryDevirtualizeApply(Apply, CHA);
   if (!NewInstPair.second)
     return FullApplySite();

   replaceDeadApply(Apply, NewInstPair.first);
   auto NewApply = FullApplySite(NewInstPair.second.getInstruction());

   // FIXME: This should not be needed. We should instead be linking
   // everything in up front, including everything transitively
   // referenced through vtables and witness tables.
   tryLinkCallee(NewApply);

   return FullApplySite(NewInstPair.second.getInstruction());
 }

 ApplySite SILPerformanceInliner::specializeGeneric(
     ApplySite Apply, llvm::SmallVectorImpl<ApplySite> &NewApplies) {
   assert(NewApplies.empty() && "Expected out parameter for new applies!");

   if (!Apply.hasSubstitutions())
     return ApplySite();

   auto *Callee = Apply.getCalleeFunction();

   if (!Callee || Callee->isExternalDeclaration())
     return ApplySite();

   auto Filter = [](SILInstruction *I) -> bool {
     return ApplySite::isa(I) != ApplySite();
   };

   CloneCollector Collector(Filter);

   SILFunction *SpecializedFunction;
   auto Specialized = trySpecializeApplyOfGeneric(Apply,
                                                  SpecializedFunction,
                                                  Collector);

   if (!Specialized)
     return ApplySite();

   // Track the new applies from the specialization.
   for (auto NewCallSite : Collector.getInstructionPairs())
     NewApplies.push_back(ApplySite(NewCallSite.first));

   auto FullApply = FullApplySite::isa(Apply.getInstruction());

   if (!FullApply) {
     assert(!FullApplySite::isa(Specialized.getInstruction()) &&
            "Unexpected full apply generated!");

     // Replace the old apply with the new and delete the old.
     replaceDeadApply(Apply, Specialized.getInstruction());

     return ApplySite(Specialized);
   }

   // Replace the old apply with the new and delete the old.
   replaceDeadApply(Apply, Specialized.getInstruction());

   return Specialized;
 }

 static void collectAllAppliesInFunction(SILFunction *F,
                                 llvm::SmallVectorImpl<ApplySite> &Applies) {
   assert(Applies.empty() && "Expected empty vector to store into!");

   for (auto &B : *F)
     for (auto &I : B)
       if (auto Apply = ApplySite::isa(&I))
         Applies.push_back(Apply);
 }

 // Devirtualize and specialize a group of applies, returning a
 // worklist of newly exposed function references that should be
 // considered for inlining before continuing with the caller that has
 // the passed-in applies.
 //
 // The returned worklist is stacked such that the last things we want
 // to process are earlier on the list.
 //
 // Returns true if any changes were made.
 bool SILPerformanceInliner::devirtualizeAndSpecializeApplies(
                                   llvm::SmallVectorImpl<ApplySite> &Applies,
                                   SILModuleTransform *MT,
                                   ClassHierarchyAnalysis *CHA,
                                llvm::SmallVectorImpl<SILFunction *> &WorkList) {
   assert(WorkList.empty() && "Expected empty worklist for return results!");

   bool ChangedAny = false;

   // The set of all new function references generated by
   // devirtualization and specialization.
   llvm::SetVector<SILFunction *> NewRefs;

   // Process all applies passed in, plus any new ones that are pushed
   // on as a result of specializing the referenced functions.
   while (!Applies.empty()) {
     auto Apply = Applies.back();
     Applies.pop_back();

     bool ChangedApply = false;
     if (auto FullApply = FullApplySite::isa(Apply.getInstruction())) {
       if (auto NewApply = devirtualize(FullApply, CHA)) {
         ChangedApply = true;

         Apply = ApplySite(NewApply.getInstruction());
       }
     }

     llvm::SmallVector<ApplySite, 4> NewApplies;
     if (auto NewApply = specializeGeneric(Apply, NewApplies)) {
       ChangedApply = true;

       Apply = NewApply;
       Applies.insert(Applies.end(), NewApplies.begin(), NewApplies.end());
     }

     if (ChangedApply) {
       ChangedAny = true;

       auto *NewCallee = Apply.getCalleeFunction();
       assert(NewCallee && "Expected directly referenced function!");

       // Track all new references to function definitions.
       if (NewCallee->isDefinition())
         NewRefs.insert(NewCallee);


       // TODO: Do we need to invalidate everything at this point?
       // What about side-effects analysis? What about type analysis?
       MT->invalidateAnalysis(Apply.getFunction(),
                              SILAnalysis::InvalidationKind::Everything);
     }
   }

   // Copy out all the new function references gathered.
   if (ChangedAny)
     WorkList.insert(WorkList.end(), NewRefs.begin(), NewRefs.end());

   return ChangedAny;
 }

 void SILPerformanceInliner::collectAppliesToInline(
     SILFunction *Caller, SmallVectorImpl<FullApplySite> &Applies,
     DominanceAnalysis *DA, SILLoopAnalysis *LA) {
   DominanceInfo *DT = DA->get(Caller);
   SILLoopInfo *LI = LA->get(Caller);

   ConstantTracker constTracker(Caller);
   DominanceOrder domOrder(&Caller->front(), DT, Caller->size());

   // Go through all instructions and find candidates for inlining.
   // We do this in dominance order for the constTracker.
   SmallVector<FullApplySite, 8> InitialCandidates;
   while (SILBasicBlock *block = domOrder.getNext()) {
     constTracker.beginBlock();
     unsigned loopDepth = LI->getLoopDepth(block);
     for (auto I = block->begin(), E = block->end(); I != E; ++I) {
       constTracker.trackInst(&*I);

       if (!FullApplySite::isa(&*I))
         continue;

       FullApplySite AI = FullApplySite(&*I);

       DEBUG(llvm::dbgs() << "    Check:" << *I);

       auto *Callee = getEligibleFunction(AI);
       if (Callee) {
         if (isProfitableToInline(AI, loopDepth, DA, LA, constTracker))
           InitialCandidates.push_back(AI);
       }
     }
     domOrder.pushChildrenIf(block, [&] (SILBasicBlock *child) {
       if (ColdBlockInfo::isSlowPath(block, child)) {
         // Handle cold blocks separately.
         visitColdBlocks(InitialCandidates, child, DT);
         return false;
       }
       return true;
     });
   }

   // Calculate how many times a callee is called from this caller.
   llvm::DenseMap<SILFunction *, unsigned> CalleeCount;
   for (auto AI : InitialCandidates) {
     SILFunction *Callee = AI.getCalleeFunction();
     assert(Callee && "apply_inst does not have a direct callee anymore");
     CalleeCount[Callee]++;
   }

   // Now copy each candidate callee that has a small enough number of
   // call sites into the final set of call sites.
   for (auto AI : InitialCandidates) {
     SILFunction *Callee = AI.getCalleeFunction();
     assert(Callee && "apply_inst does not have a direct callee anymore");

     const unsigned CallsToCalleeThreshold = 1024;
     if (CalleeCount[Callee] <= CallsToCalleeThreshold)
       Applies.push_back(AI);
   }
 }

 /// \brief Attempt to inline all calls smaller than our threshold.
 /// returns True if a function was inlined.
 bool SILPerformanceInliner::inlineCallsIntoFunction(SILFunction *Caller,
                                                     DominanceAnalysis *DA,
                                                     SILLoopAnalysis *LA,
                              llvm::SmallVectorImpl<FullApplySite> &NewApplies) {
   // Don't optimize functions that are marked with the opt.never attribute.
   if (!Caller->shouldOptimize())
     return false;

   // Construct a log of all of the names of the functions that we've inlined
   // in the current iteration.
   SmallVector<StringRef, 16> InlinedFunctionNames;
   StringRef CallerName = Caller->getName();

   DEBUG(llvm::dbgs() << "Visiting Function: " << CallerName << "\n");

   assert(NewApplies.empty() && "Expected empty vector to store results in!");

   // First step: collect all the functions we want to inline.  We
   // don't change anything yet so that the dominator information
   // remains valid.
   SmallVector<FullApplySite, 8> AppliesToInline;
   collectAppliesToInline(Caller, AppliesToInline, DA, LA);

   if (AppliesToInline.empty())
     return false;

   // Second step: do the actual inlining.
   for (auto AI : AppliesToInline) {
     SILFunction *Callee = AI.getCalleeFunction();
     assert(Callee && "apply_inst does not have a direct callee anymore");

     DEBUG(llvm::dbgs() << "    Inline:" <<  *AI.getInstruction());

     if (!Callee->shouldOptimize()) {
       DEBUG(llvm::dbgs() << "    Cannot inline function " << Callee->getName()
                          << " marked to be excluded from optimizations.\n");
       continue;
     }

     SmallVector<SILValue, 8> Args;
     for (const auto &Arg : AI.getArguments())
       Args.push_back(Arg);

     // As we inline and clone we need to collect new applies.
     auto Filter = [](SILInstruction *I) -> bool {
       return bool(FullApplySite::isa(I));
     };

     CloneCollector Collector(Filter);

     // Notice that we will skip all of the newly inlined ApplyInsts. That's
     // okay because we will visit them in our next invocation of the inliner.
     TypeSubstitutionMap ContextSubs;
     SILInliner Inliner(*Caller, *Callee,
                        SILInliner::InlineKind::PerformanceInline,
                        ContextSubs, AI.getSubstitutions(),
                        Collector.getCallback());

     // Record the name of the inlined function (for cycle detection).
     InlinedFunctionNames.push_back(Callee->getName());

     auto Success = Inliner.inlineFunction(AI, Args);
     (void) Success;
     // We've already determined we should be able to inline this, so
     // we expect it to have happened.
     assert(Success && "Expected inliner to inline this function!");
     llvm::SmallVector<FullApplySite, 4> AppliesFromInlinee;
     for (auto &P : Collector.getInstructionPairs())
       AppliesFromInlinee.push_back(FullApplySite(P.first));

     recursivelyDeleteTriviallyDeadInstructions(AI.getInstruction(), true);

     NewApplies.insert(NewApplies.end(), AppliesFromInlinee.begin(),
                       AppliesFromInlinee.end());
     DA->invalidate(Caller, SILAnalysis::InvalidationKind::Everything);
     NumFunctionsInlined++;
   }

   // Record the names of the functions that we inlined.
   // We'll use this list to detect cycles in future iterations of
   // the inliner.
   for (auto CalleeName : InlinedFunctionNames) {
     InlinedFunctions.insert(std::make_pair(CallerName, CalleeName));
   }

   DEBUG(llvm::dbgs() << "\n");
   return true;
 }

 void SILPerformanceInliner::inlineDevirtualizeAndSpecialize(
                                                           SILFunction *Caller,
                                                        SILModuleTransform *MT,
                                                         DominanceAnalysis *DA,
                                                           SILLoopAnalysis *LA,
                                                   ClassHierarchyAnalysis *CHA) {
   assert(Caller->isDefinition() && "Expected only functions with bodies!");

   llvm::SmallVector<SILFunction *, 4> WorkList;
   WorkList.push_back(Caller);

   while (!WorkList.empty()) {
     llvm::SmallVector<ApplySite, 4> WorkItemApplies;
     SILFunction *CurrentCaller = WorkList.back();
     if (CurrentCaller->shouldOptimize())
       collectAllAppliesInFunction(CurrentCaller, WorkItemApplies);

     // Devirtualize and specialize any applies we've collected,
     // and collect new functions we should inline into as we do
     // so.
     llvm::SmallVector<SILFunction *, 4> NewFuncs;
     if (devirtualizeAndSpecializeApplies(WorkItemApplies, MT, CHA, NewFuncs)) {
       WorkList.insert(WorkList.end(), NewFuncs.begin(), NewFuncs.end());
       NewFuncs.clear();
     }
     assert(WorkItemApplies.empty() && "Expected all applies to be processed!");

     // We want to inline into each function on the worklist, starting
     // with any new ones that were exposed as a result of
     // devirtualization (to insure we're inlining into callees first).
     //
     // After inlining, we may have new opportunities for
     // devirtualization, e.g. as a result of exposing the dynamic type
     // of an object. When those opportunities arise we want to attempt
     // devirtualization and then again attempt to inline into the
     // newly exposed functions, etc. until we're back to the function
     // we began with.
     auto *Initial = WorkList.back();

     // In practice we rarely exceed 5, but in a perf test we iterate 51 times.
     const unsigned MaxLaps = 1500;
     unsigned Lap = 0;
     while (1) {
       auto *WorkItem = WorkList.back();
       assert(WorkItem->isDefinition() &&
         "Expected function definition on work list!");

       // Devirtualization and specialization might have exposed new
       // function references. We want to inline within those functions
       // before inlining within our original function.
       //
       // Inlining in turn might result in new applies that we should
       // consider for devirtualization and specialization.
       llvm::SmallVector<FullApplySite, 4> NewApplies;
       bool Inlined = inlineCallsIntoFunction(WorkItem, DA, LA, NewApplies);
       if (Inlined) {
         MT->invalidateAnalysis(WorkItem,
                                SILAnalysis::InvalidationKind::FunctionBody);

         // FIXME: Update inlineCallsIntoFunction to collect all
         //        remaining applies after inlining, not just those
         //        resulting from inlining code.
         llvm::SmallVector<ApplySite, 4> WorkItemApplies;
         collectAllAppliesInFunction(WorkItem, WorkItemApplies);

         bool Modified =
             devirtualizeAndSpecializeApplies(WorkItemApplies, MT,
                                              CHA, NewFuncs);
         if (Modified) {
           WorkList.insert(WorkList.end(), NewFuncs.begin(), NewFuncs.end());
           NewFuncs.clear();
           assert(WorkItemApplies.empty() &&
                  "Expected all applies to be processed!");
         } else if (WorkItem == Initial) {
           // We did not specialize generics or devirtualize calls and we
           // did not create new opportunities so we can bail out now.
          break;
         } else {
           WorkList.pop_back();
         }
       } else if (WorkItem == Initial) {
         // We did not inline any calls and did not create new opportunities
         // so we can bail out now.
         break;
       } else {
         WorkList.pop_back();
       }

       Lap++;
       // It's possible to construct real code where this will hit, but
       // it's more likely that there is an issue tracking recursive
       // inlining, in which case we want to know about it in internal
       // builds, and not hang on bots or user machines.
       assert(Lap <= MaxLaps && "Possible bug tracking recursion!");
       // Give up and move along.
       if (Lap > MaxLaps) {
         while (WorkList.back() != Initial)
           WorkList.pop_back();
         break;
       }
     }

     assert(WorkList.back() == Initial &&
            "Expected to exit with same element on top of stack!" );
     WorkList.pop_back();
   }
 }

 // Find functions in cold blocks which are forced to be inlined.
 // All other functions are not inlined in cold blocks.
 void SILPerformanceInliner::visitColdBlocks(
     SmallVectorImpl<FullApplySite> &AppliesToInline, SILBasicBlock *Root,
     DominanceInfo *DT) {
   DominanceOrder domOrder(Root, DT);
   while (SILBasicBlock *block = domOrder.getNext()) {
     for (SILInstruction &I : *block) {
       ApplyInst *AI = dyn_cast<ApplyInst>(&I);
       if (!AI)
         continue;

       auto *Callee = getEligibleFunction(AI);
       if (Callee && isProfitableInColdBlock(Callee)) {
         DEBUG(llvm::dbgs() << "    inline in cold block:" <<  *AI);
         AppliesToInline.push_back(AI);
       }
     }
     domOrder.pushChildren(block);
   }
 }


 //===----------------------------------------------------------------------===//
 //                          Performance Inliner Pass
 //===----------------------------------------------------------------------===//

 namespace {
 class SILPerformanceInlinerPass : public SILModuleTransform {
   /// Specifies which functions not to inline, based on @_semantics and
   /// global_init attributes.
   InlineSelection WhatToInline;
   std::string PassName;
 public:
   SILPerformanceInlinerPass(InlineSelection WhatToInline, StringRef LevelName):
     WhatToInline(WhatToInline), PassName(LevelName) {
     PassName.append(" Performance Inliner");
   }

   void run() override {
     BasicCalleeAnalysis *BCA = PM->getAnalysis<BasicCalleeAnalysis>();
     DominanceAnalysis *DA = PM->getAnalysis<DominanceAnalysis>();
     SILLoopAnalysis *LA = PM->getAnalysis<SILLoopAnalysis>();
     ClassHierarchyAnalysis *CHA = PM->getAnalysis<ClassHierarchyAnalysis>();

     if (getOptions().InlineThreshold == 0) {
       DEBUG(llvm::dbgs() << "*** The Performance Inliner is disabled ***\n");
       return;
     }

     SILPerformanceInliner Inliner(getOptions().InlineThreshold,
                                   WhatToInline);

     BottomUpFunctionOrder BottomUpOrder(*getModule(), BCA);
     auto BottomUpFunctions = BottomUpOrder.getFunctions();

     // Copy the bottom-up function list into a worklist.
     llvm::SmallVector<SILFunction *, 32> WorkList;
     // FIXME: std::reverse_copy would be better, but it crashes.
     for (auto I = BottomUpFunctions.rbegin(), E = BottomUpFunctions.rend();
          I != E; ++I)
       if ((*I)->isDefinition())
         WorkList.push_back(*I);

     // Inline functions bottom up from the leafs.
     while (!WorkList.empty()) {
       Inliner.inlineDevirtualizeAndSpecialize(WorkList.back(), this, DA, LA, CHA);
       WorkList.pop_back();
     }
   }

   StringRef getName() override { return PassName; }
 };
 } // end anonymous namespace

 /// Create an inliner pass that does not inline functions that are marked with
 /// the @_semantics, @effects or global_init attributes.
 SILTransform *swift::createEarlyInliner() {
   return new SILPerformanceInlinerPass(
     InlineSelection::NoSemanticsAndGlobalInit, "Early");
 }

 /// Create an inliner pass that does not inline functions that are marked with
 /// the global_init attribute or have an "availability" semantics attribute.
 SILTransform *swift::createPerfInliner() {
   return new SILPerformanceInlinerPass(InlineSelection::NoGlobalInit, "Middle");
 }

 /// Create an inliner pass that inlines all functions that are marked with
 /// the @_semantics, @effects or global_init attributes.
 SILTransform *swift::createLateInliner() {
   return new SILPerformanceInlinerPass(InlineSelection::Everything, "Late");
 }