blob: f477bb25626508cce8f8a472b4547e0c9c2377d1 [file] [log] [blame]
//===- PerformanceInliner.cpp - Basic cost based inlining for performance -===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sil-inliner"
#include "swift/SIL/SILInstruction.h"
#include "swift/SIL/Dominance.h"
#include "swift/SIL/SILModule.h"
#include "swift/SIL/Projection.h"
#include "swift/SILOptimizer/Analysis/BasicCalleeAnalysis.h"
#include "swift/SILOptimizer/Analysis/ColdBlockInfo.h"
#include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
#include "swift/SILOptimizer/Analysis/FunctionOrder.h"
#include "swift/SILOptimizer/Analysis/LoopAnalysis.h"
#include "swift/SILOptimizer/PassManager/Passes.h"
#include "swift/SILOptimizer/PassManager/Transforms.h"
#include "swift/SILOptimizer/Utils/Local.h"
#include "swift/SILOptimizer/Utils/ConstantFolding.h"
#include "swift/SILOptimizer/Utils/Devirtualize.h"
#include "swift/SILOptimizer/Utils/Generics.h"
#include "swift/SILOptimizer/Utils/SILInliner.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/ADT/MapVector.h"
#include <functional>
using namespace swift;
STATISTIC(NumFunctionsInlined, "Number of functions inlined");
namespace {
// Threshold for deterministic testing of the inline heuristic.
// It specifies an instruction cost limit where a simplified model is used
// for the instruction costs: only builtin instructions have a cost of exactly
// 1.
llvm::cl::opt<int> TestThreshold("sil-inline-test-threshold",
llvm::cl::init(-1), llvm::cl::Hidden);
llvm::cl::opt<int> TestOpt("sil-inline-test",
llvm::cl::init(0), llvm::cl::Hidden);
// The following constants define the cost model for inlining.
// The base value for every call: it represents the benefit of removing the
// call overhead.
// This value can be overridden with the -sil-inline-threshold option.
const unsigned RemovedCallBenefit = 80;
// The benefit if the condition of a terminator instruction gets constant due
// to inlining.
const unsigned ConstTerminatorBenefit = 2;
// Benefit if the operand of an apply gets constant, e.g. if a closure is
// passed to an apply instruction in the callee.
const unsigned ConstCalleeBenefit = 150;
// Additional benefit for each loop level.
const unsigned LoopBenefitFactor = 40;
// Approximately up to this cost level a function can be inlined without
// increasing the code size.
const unsigned TrivialFunctionThreshold = 20;
// Represents a value in integer constant evaluation.
struct IntConst {
IntConst() : isValid(false), isFromCaller(false) { }
IntConst(const APInt &value, bool isFromCaller) :
value(value), isValid(true), isFromCaller(isFromCaller) { }
// The actual value.
APInt value;
// True if the value is valid, i.e. could be evaluated to a constant.
bool isValid;
// True if the value is only valid, because a constant is passed to the
// callee. False if constant propagation could do the same job inside the
// callee without inlining it.
bool isFromCaller;
};
// Tracks constants in the caller and callee to get an estimation of what
// values get constant if the callee is inlined.
// This can be seen as a "simulation" of several optimizations: SROA, mem2reg
// and constant propagation.
// Note that this is only a simplified model and not correct in all cases.
// For example aliasing information is not taken into account.
class ConstantTracker {
// Links between loaded and stored values.
// The key is a load instruction, the value is the corresponding store
// instruction which stores the loaded value. Both, key and value can also
// be copy_addr instructions.
llvm::DenseMap<SILInstruction *, SILInstruction *> links;
// The current stored values at memory addresses.
// The key is the base address of the memory (after skipping address
// projections). The value are store (or copy_addr) instructions, which
// store the current value.
// This is only an estimation, because e.g. it does not consider potential
// aliasing.
llvm::DenseMap<SILValue, SILInstruction *> memoryContent;
// Cache for evaluated constants.
llvm::SmallDenseMap<BuiltinInst *, IntConst> constCache;
// The caller/callee function which is tracked.
SILFunction *F;
// The constant tracker of the caller function (null if this is the
// tracker of the callee).
ConstantTracker *callerTracker;
// The apply instruction in the caller (null if this is the tracker of the
// callee).
FullApplySite AI;
// Walks through address projections and (optionally) collects them.
// Returns the base address, i.e. the first address which is not a
// projection.
SILValue scanProjections(SILValue addr,
SmallVectorImpl<Projection> *Result = nullptr);
// Get the stored value for a load. The loadInst can be either a real load
// or a copy_addr.
SILValue getStoredValue(SILInstruction *loadInst,
ProjectionPath &projStack);
// Gets the parameter in the caller for a function argument.
SILValue getParam(SILValue value) {
if (SILArgument *arg = dyn_cast<SILArgument>(value)) {
if (AI && arg->isFunctionArg() && arg->getFunction() == F) {
// Continue at the caller.
return AI.getArgument(arg->getIndex());
}
}
return SILValue();
}
SILInstruction *getMemoryContent(SILValue addr) {
// The memory content can be stored in this ConstantTracker or in the
// caller's ConstantTracker.
SILInstruction *storeInst = memoryContent[addr];
if (storeInst)
return storeInst;
if (callerTracker)
return callerTracker->getMemoryContent(addr);
return nullptr;
}
// Gets the estimated definition of a value.
SILInstruction *getDef(SILValue val, ProjectionPath &projStack);
// Gets the estimated integer constant result of a builtin.
IntConst getBuiltinConst(BuiltinInst *BI, int depth);
public:
// Constructor for the caller function.
ConstantTracker(SILFunction *function) :
F(function), callerTracker(nullptr), AI()
{ }
// Constructor for the callee function.
ConstantTracker(SILFunction *function, ConstantTracker *caller,
FullApplySite callerApply) :
F(function), callerTracker(caller), AI(callerApply)
{ }
void beginBlock() {
// Currently we don't do any sophisticated dataflow analysis, so we keep
// the memoryContent alive only for a single block.
memoryContent.clear();
}
// Must be called for each instruction visited in dominance order.
void trackInst(SILInstruction *inst);
// Gets the estimated definition of a value.
SILInstruction *getDef(SILValue val) {
ProjectionPath projStack;
return getDef(val, projStack);
}
// Gets the estimated definition of a value if it is in the caller.
SILInstruction *getDefInCaller(SILValue val) {
SILInstruction *def = getDef(val);
if (def && def->getFunction() != F)
return def;
return nullptr;
}
// Gets the estimated integer constant of a value.
IntConst getIntConst(SILValue val, int depth = 0);
};
// Controls the decision to inline functions with @_semantics, @effect and
// global_init attributes.
enum class InlineSelection {
Everything,
NoGlobalInit, // and no availability semantics calls
NoSemanticsAndGlobalInit
};
class SILPerformanceInliner {
/// The inline threshold.
const int InlineCostThreshold;
/// Specifies which functions not to inline, based on @_semantics and
/// global_init attributes.
InlineSelection WhatToInline;
/// A set of pairs of function names. This set records a successful
/// inlining operations and is used to prevent infinite inlining of
/// recursive functions. The pair (A, B) records inlining of function
/// B into A.
llvm::DenseSet<std::pair<StringRef, StringRef>> InlinedFunctions;
SILFunction *getEligibleFunction(FullApplySite AI);
bool isProfitableToInline(FullApplySite AI, unsigned loopDepthOfAI,
DominanceAnalysis *DA,
SILLoopAnalysis *LA,
ConstantTracker &constTracker);
void visitColdBlocks(SmallVectorImpl<FullApplySite> &AppliesToInline,
SILBasicBlock *root, DominanceInfo *DT);
void collectAppliesToInline(SILFunction *Caller,
SmallVectorImpl<FullApplySite> &Applies,
DominanceAnalysis *DA, SILLoopAnalysis *LA);
/// This method is responsible for breaking recursive cycles
/// in the inliner (some of which are only exposed via devirtualization).
/// \return true if the function \p Callee was previously inlined into
/// \p Caller and the inliner needs to reject this inlining request.
bool hasInliningCycle(SILFunction *Caller, SILFunction *Callee);
FullApplySite devirtualize(FullApplySite Apply,
ClassHierarchyAnalysis *CHA);
bool devirtualizeAndSpecializeApplies(
llvm::SmallVectorImpl<ApplySite> &Applies,
SILModuleTransform *MT,
ClassHierarchyAnalysis *CHA,
llvm::SmallVectorImpl<SILFunction *> &WorkList);
ApplySite specializeGeneric(ApplySite Apply,
llvm::SmallVectorImpl<ApplySite> &NewApplies);
bool
inlineCallsIntoFunction(SILFunction *F, DominanceAnalysis *DA,
SILLoopAnalysis *LA,
llvm::SmallVectorImpl<FullApplySite> &NewApplies);
public:
SILPerformanceInliner(int threshold,
InlineSelection WhatToInline)
: InlineCostThreshold(threshold),
WhatToInline(WhatToInline) {}
void inlineDevirtualizeAndSpecialize(SILFunction *WorkItem,
SILModuleTransform *MT,
DominanceAnalysis *DA,
SILLoopAnalysis *LA,
ClassHierarchyAnalysis *CHA);
};
}
//===----------------------------------------------------------------------===//
// ConstantTracker
//===----------------------------------------------------------------------===//
void ConstantTracker::trackInst(SILInstruction *inst) {
if (auto *LI = dyn_cast<LoadInst>(inst)) {
SILValue baseAddr = scanProjections(LI->getOperand());
if (SILInstruction *loadLink = getMemoryContent(baseAddr))
links[LI] = loadLink;
} else if (StoreInst *SI = dyn_cast<StoreInst>(inst)) {
SILValue baseAddr = scanProjections(SI->getOperand(1));
memoryContent[baseAddr] = SI;
} else if (CopyAddrInst *CAI = dyn_cast<CopyAddrInst>(inst)) {
if (!CAI->isTakeOfSrc()) {
// Treat a copy_addr as a load + store
SILValue loadAddr = scanProjections(CAI->getOperand(0));
if (SILInstruction *loadLink = getMemoryContent(loadAddr)) {
links[CAI] = loadLink;
SILValue storeAddr = scanProjections(CAI->getOperand(1));
memoryContent[storeAddr] = CAI;
}
}
}
}
SILValue ConstantTracker::scanProjections(SILValue addr,
SmallVectorImpl<Projection> *Result) {
for (;;) {
if (Projection::isAddrProjection(addr)) {
SILInstruction *I = cast<SILInstruction>(addr.getDef());
if (Result) {
Optional<Projection> P = Projection::addressProjectionForInstruction(I);
Result->push_back(P.getValue());
}
addr = I->getOperand(0);
continue;
}
if (SILValue param = getParam(addr)) {
// Go to the caller.
addr = param;
continue;
}
// Return the base address = the first address which is not a projection.
return addr;
}
}
SILValue ConstantTracker::getStoredValue(SILInstruction *loadInst,
ProjectionPath &projStack) {
SILInstruction *store = links[loadInst];
if (!store && callerTracker)
store = callerTracker->links[loadInst];
if (!store) return SILValue();
assert(isa<LoadInst>(loadInst) || isa<CopyAddrInst>(loadInst));
// Push the address projections of the load onto the stack.
SmallVector<Projection, 4> loadProjections;
scanProjections(loadInst->getOperand(0), &loadProjections);
for (const Projection &proj : loadProjections) {
projStack.push_back(proj);
}
// Pop the address projections of the store from the stack.
SmallVector<Projection, 4> storeProjections;
scanProjections(store->getOperand(1), &storeProjections);
for (auto iter = storeProjections.rbegin(); iter != storeProjections.rend();
++iter) {
const Projection &proj = *iter;
// The corresponding load-projection must match the store-projection.
if (projStack.empty() || projStack.back() != proj)
return SILValue();
projStack.pop_back();
}
if (isa<StoreInst>(store))
return store->getOperand(0);
// The copy_addr instruction is both a load and a store. So we follow the link
// again.
assert(isa<CopyAddrInst>(store));
return getStoredValue(store, projStack);
}
// Get the aggregate member based on the top of the projection stack.
static SILValue getMember(SILInstruction *inst, ProjectionPath &projStack) {
if (!projStack.empty()) {
const Projection &proj = projStack.back();
return proj.getOperandForAggregate(inst);
}
return SILValue();
}
SILInstruction *ConstantTracker::getDef(SILValue val,
ProjectionPath &projStack) {
// Track the value up the dominator tree.
for (;;) {
if (SILInstruction *inst = dyn_cast<SILInstruction>(val)) {
if (auto proj = Projection::valueProjectionForInstruction(inst)) {
// Extract a member from a struct/tuple/enum.
projStack.push_back(proj.getValue());
val = inst->getOperand(0);
continue;
} else if (SILValue member = getMember(inst, projStack)) {
// The opposite of a projection instruction: composing a struct/tuple.
projStack.pop_back();
val = member;
continue;
} else if (SILValue loadedVal = getStoredValue(inst, projStack)) {
// A value loaded from memory.
val = loadedVal;
continue;
} else if (isa<ThinToThickFunctionInst>(inst)) {
val = inst->getOperand(0);
continue;
}
return inst;
} else if (SILValue param = getParam(val)) {
// Continue in the caller.
val = param;
continue;
}
return nullptr;
}
}
IntConst ConstantTracker::getBuiltinConst(BuiltinInst *BI, int depth) {
const BuiltinInfo &Builtin = BI->getBuiltinInfo();
OperandValueArrayRef Args = BI->getArguments();
switch (Builtin.ID) {
default: break;
// Fold comparison predicates.
#define BUILTIN(id, name, Attrs)
#define BUILTIN_BINARY_PREDICATE(id, name, attrs, overload) \
case BuiltinValueKind::id:
#include "swift/AST/Builtins.def"
{
IntConst lhs = getIntConst(Args[0], depth);
IntConst rhs = getIntConst(Args[1], depth);
if (lhs.isValid && rhs.isValid) {
return IntConst(constantFoldComparison(lhs.value, rhs.value,
Builtin.ID),
lhs.isFromCaller || rhs.isFromCaller);
}
break;
}
case BuiltinValueKind::SAddOver:
case BuiltinValueKind::UAddOver:
case BuiltinValueKind::SSubOver:
case BuiltinValueKind::USubOver:
case BuiltinValueKind::SMulOver:
case BuiltinValueKind::UMulOver: {
IntConst lhs = getIntConst(Args[0], depth);
IntConst rhs = getIntConst(Args[1], depth);
if (lhs.isValid && rhs.isValid) {
bool IgnoredOverflow;
return IntConst(constantFoldBinaryWithOverflow(lhs.value, rhs.value,
IgnoredOverflow,
getLLVMIntrinsicIDForBuiltinWithOverflow(Builtin.ID)),
lhs.isFromCaller || rhs.isFromCaller);
}
break;
}
case BuiltinValueKind::SDiv:
case BuiltinValueKind::SRem:
case BuiltinValueKind::UDiv:
case BuiltinValueKind::URem: {
IntConst lhs = getIntConst(Args[0], depth);
IntConst rhs = getIntConst(Args[1], depth);
if (lhs.isValid && rhs.isValid && rhs.value != 0) {
bool IgnoredOverflow;
return IntConst(constantFoldDiv(lhs.value, rhs.value,
IgnoredOverflow, Builtin.ID),
lhs.isFromCaller || rhs.isFromCaller);
}
break;
}
case BuiltinValueKind::And:
case BuiltinValueKind::AShr:
case BuiltinValueKind::LShr:
case BuiltinValueKind::Or:
case BuiltinValueKind::Shl:
case BuiltinValueKind::Xor: {
IntConst lhs = getIntConst(Args[0], depth);
IntConst rhs = getIntConst(Args[1], depth);
if (lhs.isValid && rhs.isValid) {
return IntConst(constantFoldBitOperation(lhs.value, rhs.value,
Builtin.ID),
lhs.isFromCaller || rhs.isFromCaller);
}
break;
}
case BuiltinValueKind::Trunc:
case BuiltinValueKind::ZExt:
case BuiltinValueKind::SExt:
case BuiltinValueKind::TruncOrBitCast:
case BuiltinValueKind::ZExtOrBitCast:
case BuiltinValueKind::SExtOrBitCast: {
IntConst val = getIntConst(Args[0], depth);
if (val.isValid) {
return IntConst(constantFoldCast(val.value, Builtin), val.isFromCaller);
}
break;
}
}
return IntConst();
}
// Tries to evaluate the integer constant of a value. The \p depth is used
// to limit the complexity.
IntConst ConstantTracker::getIntConst(SILValue val, int depth) {
// Don't spend too much time with constant evaluation.
if (depth >= 10)
return IntConst();
SILInstruction *I = getDef(val);
if (!I)
return IntConst();
if (auto *IL = dyn_cast<IntegerLiteralInst>(I)) {
return IntConst(IL->getValue(), IL->getFunction() != F);
}
if (auto *BI = dyn_cast<BuiltinInst>(I)) {
if (constCache.count(BI) != 0)
return constCache[BI];
IntConst builtinConst = getBuiltinConst(BI, depth + 1);
constCache[BI] = builtinConst;
return builtinConst;
}
return IntConst();
}
//===----------------------------------------------------------------------===//
// Performance Inliner
//===----------------------------------------------------------------------===//
bool SILPerformanceInliner::hasInliningCycle(SILFunction *Caller,
SILFunction *Callee) {
// Reject simple recursions.
if (Caller == Callee) return true;
StringRef CallerName = Caller->getName();
StringRef CalleeName = Callee->getName();
bool InlinedBefore =
InlinedFunctions.count(std::make_pair(CallerName, CalleeName));
// If the Callee was inlined into the Caller in previous inlining iterations
// then
// we need to reject this inlining request to prevent a cycle.
return InlinedBefore;
}
// Return true if the callee does few (or no) self-recursive calls.
static bool calleeHasMinimalSelfRecursion(SILFunction *Callee) {
int countSelfRecursiveCalls = 0;
for (auto &BB : *Callee) {
for (auto &I : BB) {
if (auto Apply = FullApplySite::isa(&I)) {
if (Apply.getCalleeFunction() == Callee)
++countSelfRecursiveCalls;
if (countSelfRecursiveCalls > 2)
return true;
}
}
}
return false;
}
// Returns the callee of an apply_inst if it is basically inlinable.
SILFunction *SILPerformanceInliner::getEligibleFunction(FullApplySite AI) {
SILFunction *Callee = AI.getCalleeFunction();
if (!Callee) {
DEBUG(llvm::dbgs() << " FAIL: Cannot find inlineable callee.\n");
return nullptr;
}
// Don't inline functions that are marked with the @_semantics or @effects
// attribute if the inliner is asked not to inline them.
if (Callee->hasSemanticsAttrs() || Callee->hasEffectsKind()) {
if (WhatToInline == InlineSelection::NoSemanticsAndGlobalInit) {
DEBUG(llvm::dbgs() << " FAIL: Function " << Callee->getName()
<< " has special semantics or effects attribute.\n");
return nullptr;
}
// The "availability" semantics attribute is treated like global-init.
if (Callee->hasSemanticsAttrs() &&
WhatToInline != InlineSelection::Everything &&
Callee->hasSemanticsAttrsThatStartsWith("availability")) {
return nullptr;
}
} else if (Callee->isGlobalInit()) {
if (WhatToInline != InlineSelection::Everything) {
DEBUG(llvm::dbgs() << " FAIL: Function " << Callee->getName()
<< " has the global-init attribute.\n");
return nullptr;
}
}
// We can't inline external declarations.
if (Callee->empty() || Callee->isExternalDeclaration()) {
DEBUG(llvm::dbgs() << " FAIL: Cannot inline external " <<
Callee->getName() << ".\n");
return nullptr;
}
// Explicitly disabled inlining.
if (Callee->getInlineStrategy() == NoInline) {
DEBUG(llvm::dbgs() << " FAIL: noinline attribute on " <<
Callee->getName() << ".\n");
return nullptr;
}
if (!Callee->shouldOptimize()) {
DEBUG(llvm::dbgs() << " FAIL: optimizations disabled on " <<
Callee->getName() << ".\n");
return nullptr;
}
// We don't support this yet.
if (AI.hasSubstitutions()) {
DEBUG(llvm::dbgs() << " FAIL: Generic substitutions on " <<
Callee->getName() << ".\n");
return nullptr;
}
// We don't support inlining a function that binds dynamic self because we
// have no mechanism to preserve the original function's local self metadata.
if (computeMayBindDynamicSelf(Callee)) {
DEBUG(llvm::dbgs() << " FAIL: Binding dynamic Self in " <<
Callee->getName() << ".\n");
return nullptr;
}
SILFunction *Caller = AI.getFunction();
// Detect inlining cycles.
if (hasInliningCycle(Caller, Callee)) {
DEBUG(llvm::dbgs() << " FAIL: Detected a recursion inlining " <<
Callee->getName() << ".\n");
return nullptr;
}
// A non-fragile function may not be inlined into a fragile function.
if (Caller->isFragile() && !Callee->isFragile()) {
DEBUG(llvm::dbgs() << " FAIL: Can't inline fragile " <<
Callee->getName() << ".\n");
return nullptr;
}
// Inlining self-recursive functions into other functions can result
// in excessive code duplication since we run the inliner multiple
// times in our pipeline, so we only do it for callees with few
// self-recursive calls.
if (calleeHasMinimalSelfRecursion(Callee)) {
DEBUG(llvm::dbgs() << " FAIL: Callee is self-recursive in "
<< Callee->getName() << ".\n");
return nullptr;
}
DEBUG(llvm::dbgs() << " Eligible callee: " <<
Callee->getName() << "\n");
return Callee;
}
// Gets the cost of an instruction by using the simplified test-model: only
// builtin instructions have a cost and that's exactly 1.
static unsigned testCost(SILInstruction *I) {
switch (I->getKind()) {
case ValueKind::BuiltinInst:
return 1;
default:
return 0;
}
}
// Returns the taken block of a terminator instruction if the condition turns
// out to be constant.
static SILBasicBlock *getTakenBlock(TermInst *term,
ConstantTracker &constTracker) {
if (CondBranchInst *CBI = dyn_cast<CondBranchInst>(term)) {
IntConst condConst = constTracker.getIntConst(CBI->getCondition());
if (condConst.isFromCaller) {
return condConst.value != 0 ? CBI->getTrueBB() : CBI->getFalseBB();
}
return nullptr;
}
if (SwitchValueInst *SVI = dyn_cast<SwitchValueInst>(term)) {
IntConst switchConst = constTracker.getIntConst(SVI->getOperand());
if (switchConst.isFromCaller) {
for (unsigned Idx = 0; Idx < SVI->getNumCases(); ++Idx) {
auto switchCase = SVI->getCase(Idx);
if (auto *IL = dyn_cast<IntegerLiteralInst>(switchCase.first)) {
if (switchConst.value == IL->getValue())
return switchCase.second;
} else {
return nullptr;
}
}
if (SVI->hasDefault())
return SVI->getDefaultBB();
}
return nullptr;
}
if (SwitchEnumInst *SEI = dyn_cast<SwitchEnumInst>(term)) {
if (SILInstruction *def = constTracker.getDefInCaller(SEI->getOperand())) {
if (EnumInst *EI = dyn_cast<EnumInst>(def)) {
for (unsigned Idx = 0; Idx < SEI->getNumCases(); ++Idx) {
auto enumCase = SEI->getCase(Idx);
if (enumCase.first == EI->getElement())
return enumCase.second;
}
if (SEI->hasDefault())
return SEI->getDefaultBB();
}
}
return nullptr;
}
if (CheckedCastBranchInst *CCB = dyn_cast<CheckedCastBranchInst>(term)) {
if (SILInstruction *def = constTracker.getDefInCaller(CCB->getOperand())) {
if (UpcastInst *UCI = dyn_cast<UpcastInst>(def)) {
SILType castType = UCI->getOperand()->getType(0);
if (CCB->getCastType().isSuperclassOf(castType)) {
return CCB->getSuccessBB();
}
if (!castType.isSuperclassOf(CCB->getCastType())) {
return CCB->getFailureBB();
}
}
}
}
return nullptr;
}
/// Return true if inlining this call site is profitable.
bool SILPerformanceInliner::isProfitableToInline(FullApplySite AI,
unsigned loopDepthOfAI,
DominanceAnalysis *DA,
SILLoopAnalysis *LA,
ConstantTracker &callerTracker) {
SILFunction *Callee = AI.getCalleeFunction();
if (Callee->getInlineStrategy() == AlwaysInline)
return true;
ConstantTracker constTracker(Callee, &callerTracker, AI);
DominanceInfo *DT = DA->get(Callee);
SILLoopInfo *LI = LA->get(Callee);
DominanceOrder domOrder(&Callee->front(), DT, Callee->size());
// Calculate the inlining cost of the callee.
unsigned CalleeCost = 0;
unsigned Benefit = InlineCostThreshold > 0 ? InlineCostThreshold :
RemovedCallBenefit;
Benefit += loopDepthOfAI * LoopBenefitFactor;
int testThreshold = TestThreshold;
while (SILBasicBlock *block = domOrder.getNext()) {
constTracker.beginBlock();
unsigned loopDepth = LI->getLoopDepth(block);
for (SILInstruction &I : *block) {
constTracker.trackInst(&I);
auto ICost = instructionInlineCost(I);
if (testThreshold >= 0) {
// We are in test-mode: use a simplified cost model.
CalleeCost += testCost(&I);
} else {
// Use the regular cost model.
CalleeCost += unsigned(ICost);
}
if (ApplyInst *AI = dyn_cast<ApplyInst>(&I)) {
// Check if the callee is passed as an argument. If so, increase the
// threshold, because inlining will (probably) eliminate the closure.
SILInstruction *def = constTracker.getDefInCaller(AI->getCallee());
if (def && (isa<FunctionRefInst>(def) || isa<PartialApplyInst>(def))) {
DEBUG(llvm::dbgs() << " Boost: apply const function at"
<< *AI);
Benefit += ConstCalleeBenefit + loopDepth * LoopBenefitFactor;
testThreshold *= 2;
}
}
}
// Don't count costs in blocks which are dead after inlining.
SILBasicBlock *takenBlock = getTakenBlock(block->getTerminator(),
constTracker);
if (takenBlock) {
Benefit += ConstTerminatorBenefit + TestOpt;
DEBUG(llvm::dbgs() << " Take bb" << takenBlock->getDebugID() <<
" of" << *block->getTerminator());
domOrder.pushChildrenIf(block, [=] (SILBasicBlock *child) {
return child->getSinglePredecessor() != block || child == takenBlock;
});
} else {
domOrder.pushChildren(block);
}
}
unsigned Threshold = Benefit; // The default.
if (testThreshold >= 0) {
// We are in testing mode.
Threshold = testThreshold;
} else if (AI.getFunction()->isThunk()) {
// Only inline trivial functions into thunks (which will not increase the
// code size).
Threshold = TrivialFunctionThreshold;
}
if (CalleeCost > Threshold) {
DEBUG(llvm::dbgs() << " NO: Function too big to inline, "
"cost: " << CalleeCost << ", threshold: " << Threshold << "\n");
return false;
}
DEBUG(llvm::dbgs() << " YES: ready to inline, "
"cost: " << CalleeCost << ", threshold: " << Threshold << "\n");
return true;
}
/// Return true if inlining this call site into a cold block is profitable.
static bool isProfitableInColdBlock(SILFunction *Callee) {
if (Callee->getInlineStrategy() == AlwaysInline)
return true;
// Testing with the TestThreshold disables inlining into cold blocks.
if (TestThreshold >= 0)
return false;
unsigned CalleeCost = 0;
for (SILBasicBlock &Block : *Callee) {
for (SILInstruction &I : Block) {
auto ICost = instructionInlineCost(I);
CalleeCost += (unsigned)ICost;
if (CalleeCost > TrivialFunctionThreshold)
return false;
}
}
DEBUG(llvm::dbgs() << " YES: ready to inline into cold block, cost:"
<< CalleeCost << "\n");
return true;
}
/// FIXME: Total hack to work around issues exposed while ripping call
/// graph maintenance from the inliner.
static void tryLinkCallee(FullApplySite Apply) {
auto *F = Apply.getCalleeFunction();
if (!F || F->isDefinition()) return;
auto &M = Apply.getFunction()->getModule();
M.linkFunction(F, SILModule::LinkingMode::LinkAll);
}
// Attempt to devirtualize. When successful, replaces the old apply
// with the new one and returns the new one. When unsuccessful returns
// an empty apply site.
FullApplySite SILPerformanceInliner::devirtualize(FullApplySite Apply,
ClassHierarchyAnalysis *CHA) {
auto NewInstPair = tryDevirtualizeApply(Apply, CHA);
if (!NewInstPair.second)
return FullApplySite();
replaceDeadApply(Apply, NewInstPair.first);
auto NewApply = FullApplySite(NewInstPair.second.getInstruction());
// FIXME: This should not be needed. We should instead be linking
// everything in up front, including everything transitively
// referenced through vtables and witness tables.
tryLinkCallee(NewApply);
return FullApplySite(NewInstPair.second.getInstruction());
}
ApplySite SILPerformanceInliner::specializeGeneric(
ApplySite Apply, llvm::SmallVectorImpl<ApplySite> &NewApplies) {
assert(NewApplies.empty() && "Expected out parameter for new applies!");
if (!Apply.hasSubstitutions())
return ApplySite();
auto *Callee = Apply.getCalleeFunction();
if (!Callee || Callee->isExternalDeclaration())
return ApplySite();
auto Filter = [](SILInstruction *I) -> bool {
return ApplySite::isa(I) != ApplySite();
};
CloneCollector Collector(Filter);
SILFunction *SpecializedFunction;
auto Specialized = trySpecializeApplyOfGeneric(Apply,
SpecializedFunction,
Collector);
if (!Specialized)
return ApplySite();
// Track the new applies from the specialization.
for (auto NewCallSite : Collector.getInstructionPairs())
NewApplies.push_back(ApplySite(NewCallSite.first));
auto FullApply = FullApplySite::isa(Apply.getInstruction());
if (!FullApply) {
assert(!FullApplySite::isa(Specialized.getInstruction()) &&
"Unexpected full apply generated!");
// Replace the old apply with the new and delete the old.
replaceDeadApply(Apply, Specialized.getInstruction());
return ApplySite(Specialized);
}
// Replace the old apply with the new and delete the old.
replaceDeadApply(Apply, Specialized.getInstruction());
return Specialized;
}
static void collectAllAppliesInFunction(SILFunction *F,
llvm::SmallVectorImpl<ApplySite> &Applies) {
assert(Applies.empty() && "Expected empty vector to store into!");
for (auto &B : *F)
for (auto &I : B)
if (auto Apply = ApplySite::isa(&I))
Applies.push_back(Apply);
}
// Devirtualize and specialize a group of applies, returning a
// worklist of newly exposed function references that should be
// considered for inlining before continuing with the caller that has
// the passed-in applies.
//
// The returned worklist is stacked such that the last things we want
// to process are earlier on the list.
//
// Returns true if any changes were made.
bool SILPerformanceInliner::devirtualizeAndSpecializeApplies(
llvm::SmallVectorImpl<ApplySite> &Applies,
SILModuleTransform *MT,
ClassHierarchyAnalysis *CHA,
llvm::SmallVectorImpl<SILFunction *> &WorkList) {
assert(WorkList.empty() && "Expected empty worklist for return results!");
bool ChangedAny = false;
// The set of all new function references generated by
// devirtualization and specialization.
llvm::SetVector<SILFunction *> NewRefs;
// Process all applies passed in, plus any new ones that are pushed
// on as a result of specializing the referenced functions.
while (!Applies.empty()) {
auto Apply = Applies.back();
Applies.pop_back();
bool ChangedApply = false;
if (auto FullApply = FullApplySite::isa(Apply.getInstruction())) {
if (auto NewApply = devirtualize(FullApply, CHA)) {
ChangedApply = true;
Apply = ApplySite(NewApply.getInstruction());
}
}
llvm::SmallVector<ApplySite, 4> NewApplies;
if (auto NewApply = specializeGeneric(Apply, NewApplies)) {
ChangedApply = true;
Apply = NewApply;
Applies.insert(Applies.end(), NewApplies.begin(), NewApplies.end());
}
if (ChangedApply) {
ChangedAny = true;
auto *NewCallee = Apply.getCalleeFunction();
assert(NewCallee && "Expected directly referenced function!");
// Track all new references to function definitions.
if (NewCallee->isDefinition())
NewRefs.insert(NewCallee);
// TODO: Do we need to invalidate everything at this point?
// What about side-effects analysis? What about type analysis?
MT->invalidateAnalysis(Apply.getFunction(),
SILAnalysis::InvalidationKind::Everything);
}
}
// Copy out all the new function references gathered.
if (ChangedAny)
WorkList.insert(WorkList.end(), NewRefs.begin(), NewRefs.end());
return ChangedAny;
}
void SILPerformanceInliner::collectAppliesToInline(
SILFunction *Caller, SmallVectorImpl<FullApplySite> &Applies,
DominanceAnalysis *DA, SILLoopAnalysis *LA) {
DominanceInfo *DT = DA->get(Caller);
SILLoopInfo *LI = LA->get(Caller);
ConstantTracker constTracker(Caller);
DominanceOrder domOrder(&Caller->front(), DT, Caller->size());
// Go through all instructions and find candidates for inlining.
// We do this in dominance order for the constTracker.
SmallVector<FullApplySite, 8> InitialCandidates;
while (SILBasicBlock *block = domOrder.getNext()) {
constTracker.beginBlock();
unsigned loopDepth = LI->getLoopDepth(block);
for (auto I = block->begin(), E = block->end(); I != E; ++I) {
constTracker.trackInst(&*I);
if (!FullApplySite::isa(&*I))
continue;
FullApplySite AI = FullApplySite(&*I);
DEBUG(llvm::dbgs() << " Check:" << *I);
auto *Callee = getEligibleFunction(AI);
if (Callee) {
if (isProfitableToInline(AI, loopDepth, DA, LA, constTracker))
InitialCandidates.push_back(AI);
}
}
domOrder.pushChildrenIf(block, [&] (SILBasicBlock *child) {
if (ColdBlockInfo::isSlowPath(block, child)) {
// Handle cold blocks separately.
visitColdBlocks(InitialCandidates, child, DT);
return false;
}
return true;
});
}
// Calculate how many times a callee is called from this caller.
llvm::DenseMap<SILFunction *, unsigned> CalleeCount;
for (auto AI : InitialCandidates) {
SILFunction *Callee = AI.getCalleeFunction();
assert(Callee && "apply_inst does not have a direct callee anymore");
CalleeCount[Callee]++;
}
// Now copy each candidate callee that has a small enough number of
// call sites into the final set of call sites.
for (auto AI : InitialCandidates) {
SILFunction *Callee = AI.getCalleeFunction();
assert(Callee && "apply_inst does not have a direct callee anymore");
const unsigned CallsToCalleeThreshold = 1024;
if (CalleeCount[Callee] <= CallsToCalleeThreshold)
Applies.push_back(AI);
}
}
/// \brief Attempt to inline all calls smaller than our threshold.
/// returns True if a function was inlined.
bool SILPerformanceInliner::inlineCallsIntoFunction(SILFunction *Caller,
DominanceAnalysis *DA,
SILLoopAnalysis *LA,
llvm::SmallVectorImpl<FullApplySite> &NewApplies) {
// Don't optimize functions that are marked with the opt.never attribute.
if (!Caller->shouldOptimize())
return false;
// Construct a log of all of the names of the functions that we've inlined
// in the current iteration.
SmallVector<StringRef, 16> InlinedFunctionNames;
StringRef CallerName = Caller->getName();
DEBUG(llvm::dbgs() << "Visiting Function: " << CallerName << "\n");
assert(NewApplies.empty() && "Expected empty vector to store results in!");
// First step: collect all the functions we want to inline. We
// don't change anything yet so that the dominator information
// remains valid.
SmallVector<FullApplySite, 8> AppliesToInline;
collectAppliesToInline(Caller, AppliesToInline, DA, LA);
if (AppliesToInline.empty())
return false;
// Second step: do the actual inlining.
for (auto AI : AppliesToInline) {
SILFunction *Callee = AI.getCalleeFunction();
assert(Callee && "apply_inst does not have a direct callee anymore");
DEBUG(llvm::dbgs() << " Inline:" << *AI.getInstruction());
if (!Callee->shouldOptimize()) {
DEBUG(llvm::dbgs() << " Cannot inline function " << Callee->getName()
<< " marked to be excluded from optimizations.\n");
continue;
}
SmallVector<SILValue, 8> Args;
for (const auto &Arg : AI.getArguments())
Args.push_back(Arg);
// As we inline and clone we need to collect new applies.
auto Filter = [](SILInstruction *I) -> bool {
return bool(FullApplySite::isa(I));
};
CloneCollector Collector(Filter);
// Notice that we will skip all of the newly inlined ApplyInsts. That's
// okay because we will visit them in our next invocation of the inliner.
TypeSubstitutionMap ContextSubs;
SILInliner Inliner(*Caller, *Callee,
SILInliner::InlineKind::PerformanceInline,
ContextSubs, AI.getSubstitutions(),
Collector.getCallback());
// Record the name of the inlined function (for cycle detection).
InlinedFunctionNames.push_back(Callee->getName());
auto Success = Inliner.inlineFunction(AI, Args);
(void) Success;
// We've already determined we should be able to inline this, so
// we expect it to have happened.
assert(Success && "Expected inliner to inline this function!");
llvm::SmallVector<FullApplySite, 4> AppliesFromInlinee;
for (auto &P : Collector.getInstructionPairs())
AppliesFromInlinee.push_back(FullApplySite(P.first));
recursivelyDeleteTriviallyDeadInstructions(AI.getInstruction(), true);
NewApplies.insert(NewApplies.end(), AppliesFromInlinee.begin(),
AppliesFromInlinee.end());
DA->invalidate(Caller, SILAnalysis::InvalidationKind::Everything);
NumFunctionsInlined++;
}
// Record the names of the functions that we inlined.
// We'll use this list to detect cycles in future iterations of
// the inliner.
for (auto CalleeName : InlinedFunctionNames) {
InlinedFunctions.insert(std::make_pair(CallerName, CalleeName));
}
DEBUG(llvm::dbgs() << "\n");
return true;
}
void SILPerformanceInliner::inlineDevirtualizeAndSpecialize(
SILFunction *Caller,
SILModuleTransform *MT,
DominanceAnalysis *DA,
SILLoopAnalysis *LA,
ClassHierarchyAnalysis *CHA) {
assert(Caller->isDefinition() && "Expected only functions with bodies!");
llvm::SmallVector<SILFunction *, 4> WorkList;
WorkList.push_back(Caller);
while (!WorkList.empty()) {
llvm::SmallVector<ApplySite, 4> WorkItemApplies;
SILFunction *CurrentCaller = WorkList.back();
if (CurrentCaller->shouldOptimize())
collectAllAppliesInFunction(CurrentCaller, WorkItemApplies);
// Devirtualize and specialize any applies we've collected,
// and collect new functions we should inline into as we do
// so.
llvm::SmallVector<SILFunction *, 4> NewFuncs;
if (devirtualizeAndSpecializeApplies(WorkItemApplies, MT, CHA, NewFuncs)) {
WorkList.insert(WorkList.end(), NewFuncs.begin(), NewFuncs.end());
NewFuncs.clear();
}
assert(WorkItemApplies.empty() && "Expected all applies to be processed!");
// We want to inline into each function on the worklist, starting
// with any new ones that were exposed as a result of
// devirtualization (to insure we're inlining into callees first).
//
// After inlining, we may have new opportunities for
// devirtualization, e.g. as a result of exposing the dynamic type
// of an object. When those opportunities arise we want to attempt
// devirtualization and then again attempt to inline into the
// newly exposed functions, etc. until we're back to the function
// we began with.
auto *Initial = WorkList.back();
// In practice we rarely exceed 5, but in a perf test we iterate 51 times.
const unsigned MaxLaps = 1500;
unsigned Lap = 0;
while (1) {
auto *WorkItem = WorkList.back();
assert(WorkItem->isDefinition() &&
"Expected function definition on work list!");
// Devirtualization and specialization might have exposed new
// function references. We want to inline within those functions
// before inlining within our original function.
//
// Inlining in turn might result in new applies that we should
// consider for devirtualization and specialization.
llvm::SmallVector<FullApplySite, 4> NewApplies;
bool Inlined = inlineCallsIntoFunction(WorkItem, DA, LA, NewApplies);
if (Inlined) {
MT->invalidateAnalysis(WorkItem,
SILAnalysis::InvalidationKind::FunctionBody);
// FIXME: Update inlineCallsIntoFunction to collect all
// remaining applies after inlining, not just those
// resulting from inlining code.
llvm::SmallVector<ApplySite, 4> WorkItemApplies;
collectAllAppliesInFunction(WorkItem, WorkItemApplies);
bool Modified =
devirtualizeAndSpecializeApplies(WorkItemApplies, MT,
CHA, NewFuncs);
if (Modified) {
WorkList.insert(WorkList.end(), NewFuncs.begin(), NewFuncs.end());
NewFuncs.clear();
assert(WorkItemApplies.empty() &&
"Expected all applies to be processed!");
} else if (WorkItem == Initial) {
// We did not specialize generics or devirtualize calls and we
// did not create new opportunities so we can bail out now.
break;
} else {
WorkList.pop_back();
}
} else if (WorkItem == Initial) {
// We did not inline any calls and did not create new opportunities
// so we can bail out now.
break;
} else {
WorkList.pop_back();
}
Lap++;
// It's possible to construct real code where this will hit, but
// it's more likely that there is an issue tracking recursive
// inlining, in which case we want to know about it in internal
// builds, and not hang on bots or user machines.
assert(Lap <= MaxLaps && "Possible bug tracking recursion!");
// Give up and move along.
if (Lap > MaxLaps) {
while (WorkList.back() != Initial)
WorkList.pop_back();
break;
}
}
assert(WorkList.back() == Initial &&
"Expected to exit with same element on top of stack!" );
WorkList.pop_back();
}
}
// Find functions in cold blocks which are forced to be inlined.
// All other functions are not inlined in cold blocks.
void SILPerformanceInliner::visitColdBlocks(
SmallVectorImpl<FullApplySite> &AppliesToInline, SILBasicBlock *Root,
DominanceInfo *DT) {
DominanceOrder domOrder(Root, DT);
while (SILBasicBlock *block = domOrder.getNext()) {
for (SILInstruction &I : *block) {
ApplyInst *AI = dyn_cast<ApplyInst>(&I);
if (!AI)
continue;
auto *Callee = getEligibleFunction(AI);
if (Callee && isProfitableInColdBlock(Callee)) {
DEBUG(llvm::dbgs() << " inline in cold block:" << *AI);
AppliesToInline.push_back(AI);
}
}
domOrder.pushChildren(block);
}
}
//===----------------------------------------------------------------------===//
// Performance Inliner Pass
//===----------------------------------------------------------------------===//
namespace {
class SILPerformanceInlinerPass : public SILModuleTransform {
/// Specifies which functions not to inline, based on @_semantics and
/// global_init attributes.
InlineSelection WhatToInline;
std::string PassName;
public:
SILPerformanceInlinerPass(InlineSelection WhatToInline, StringRef LevelName):
WhatToInline(WhatToInline), PassName(LevelName) {
PassName.append(" Performance Inliner");
}
void run() override {
BasicCalleeAnalysis *BCA = PM->getAnalysis<BasicCalleeAnalysis>();
DominanceAnalysis *DA = PM->getAnalysis<DominanceAnalysis>();
SILLoopAnalysis *LA = PM->getAnalysis<SILLoopAnalysis>();
ClassHierarchyAnalysis *CHA = PM->getAnalysis<ClassHierarchyAnalysis>();
if (getOptions().InlineThreshold == 0) {
DEBUG(llvm::dbgs() << "*** The Performance Inliner is disabled ***\n");
return;
}
SILPerformanceInliner Inliner(getOptions().InlineThreshold,
WhatToInline);
BottomUpFunctionOrder BottomUpOrder(*getModule(), BCA);
auto BottomUpFunctions = BottomUpOrder.getFunctions();
// Copy the bottom-up function list into a worklist.
llvm::SmallVector<SILFunction *, 32> WorkList;
// FIXME: std::reverse_copy would be better, but it crashes.
for (auto I = BottomUpFunctions.rbegin(), E = BottomUpFunctions.rend();
I != E; ++I)
if ((*I)->isDefinition())
WorkList.push_back(*I);
// Inline functions bottom up from the leafs.
while (!WorkList.empty()) {
Inliner.inlineDevirtualizeAndSpecialize(WorkList.back(), this, DA, LA, CHA);
WorkList.pop_back();
}
}
StringRef getName() override { return PassName; }
};
} // end anonymous namespace
/// Create an inliner pass that does not inline functions that are marked with
/// the @_semantics, @effects or global_init attributes.
SILTransform *swift::createEarlyInliner() {
return new SILPerformanceInlinerPass(
InlineSelection::NoSemanticsAndGlobalInit, "Early");
}
/// Create an inliner pass that does not inline functions that are marked with
/// the global_init attribute or have an "availability" semantics attribute.
SILTransform *swift::createPerfInliner() {
return new SILPerformanceInlinerPass(InlineSelection::NoGlobalInit, "Middle");
}
/// Create an inliner pass that inlines all functions that are marked with
/// the @_semantics, @effects or global_init attributes.
SILTransform *swift::createLateInliner() {
return new SILPerformanceInlinerPass(InlineSelection::Everything, "Late");
}