| //===--- MandatoryInlining.cpp - Perform inlining of "transparent" sites --===// |
| // |
| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See https://swift.org/LICENSE.txt for license information |
| // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #define DEBUG_TYPE "mandatory-inlining" |
| #include "swift/AST/DiagnosticEngine.h" |
| #include "swift/AST/DiagnosticsSIL.h" |
| #include "swift/Basic/BlotSetVector.h" |
| #include "swift/SIL/BasicBlockUtils.h" |
| #include "swift/SIL/InstructionUtils.h" |
| #include "swift/SIL/LinearLifetimeChecker.h" |
| #include "swift/SIL/OwnershipUtils.h" |
| #include "swift/SILOptimizer/PassManager/Passes.h" |
| #include "swift/SILOptimizer/PassManager/Transforms.h" |
| #include "swift/SILOptimizer/Utils/CFGOptUtils.h" |
| #include "swift/SILOptimizer/Utils/Devirtualize.h" |
| #include "swift/SILOptimizer/Utils/InstOptUtils.h" |
| #include "swift/SILOptimizer/Utils/SILInliner.h" |
| #include "swift/SILOptimizer/Utils/SILOptFunctionBuilder.h" |
| #include "swift/SILOptimizer/Utils/StackNesting.h" |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/ADT/ImmutableSet.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/Support/Debug.h" |
| |
| using namespace swift; |
| |
| using DenseFunctionSet = llvm::DenseSet<SILFunction *>; |
| using ImmutableFunctionSet = llvm::ImmutableSet<SILFunction *>; |
| |
| STATISTIC(NumMandatoryInlines, |
| "Number of function application sites inlined by the mandatory " |
| "inlining pass"); |
| |
| template<typename...T, typename...U> |
| static void diagnose(ASTContext &Context, SourceLoc loc, Diag<T...> diag, |
| U &&...args) { |
| Context.Diags.diagnose(loc, diag, std::forward<U>(args)...); |
| } |
| |
| static SILValue stripCopiesAndBorrows(SILValue v) { |
| while (isa<CopyValueInst>(v) || isa<BeginBorrowInst>(v)) { |
| v = cast<SingleValueInstruction>(v)->getOperand(0); |
| } |
| return v; |
| } |
| |
| /// Fixup reference counts after inlining a function call (which is a no-op |
| /// unless the function is a thick function). |
| /// |
| /// It is important to note that, we can not assume that the partial apply, the |
| /// apply site, or the callee value are control dependent in any way. This |
| /// requires us to need to be very careful. See inline comments. |
| /// |
| /// Returns true if the stack nesting is invalidated and must be corrected |
| /// afterwards. |
| static bool fixupReferenceCounts( |
| PartialApplyInst *pai, FullApplySite applySite, SILValue calleeValue, |
| ArrayRef<ParameterConvention> captureArgConventions, |
| MutableArrayRef<SILValue> capturedArgs, bool isCalleeGuaranteed) { |
| |
| // We assume that we were passed a slice of our actual argument array. So we |
| // can use this to copy if we need to. |
| assert(captureArgConventions.size() == capturedArgs.size()); |
| |
| SmallPtrSet<SILBasicBlock *, 8> visitedBlocks; |
| // FIXME: Can we cache this in between inlining invocations? |
| DeadEndBlocks deadEndBlocks(pai->getFunction()); |
| SmallVector<SILBasicBlock *, 4> leakingBlocks; |
| bool invalidatedStackNesting = false; |
| |
| // Add a copy of each non-address type capture argument to lifetime extend the |
| // captured argument over at least the inlined function and till the end of a |
| // box if we have an address. This deals with the possibility of the closure |
| // being destroyed by an earlier application and thus cause the captured |
| // argument to be destroyed. |
| auto loc = RegularLocation::getAutoGeneratedLocation(); |
| |
| for (unsigned i : indices(captureArgConventions)) { |
| auto convention = captureArgConventions[i]; |
| SILValue &v = capturedArgs[i]; |
| auto *f = applySite.getFunction(); |
| |
| // See if we have a trivial value. In such a case, just continue. We do not |
| // need to fix up anything. |
| if (v->getType().isTrivial(*f)) |
| continue; |
| |
| bool hasOwnership = f->hasOwnership(); |
| |
| switch (convention) { |
| case ParameterConvention::Indirect_In: |
| llvm_unreachable("Missing indirect copy"); |
| |
| case ParameterConvention::Indirect_In_Constant: |
| case ParameterConvention::Indirect_Inout: |
| case ParameterConvention::Indirect_InoutAliasable: |
| break; |
| |
| case ParameterConvention::Indirect_In_Guaranteed: { |
| // Do the same as for Direct_Guaranteed, just the address version. |
| // (See comment below). |
| SILBuilderWithScope builder(pai); |
| auto *stackLoc = builder.createAllocStack(loc, v->getType().getObjectType()); |
| builder.createCopyAddr(loc, v, stackLoc, IsNotTake, IsInitialization); |
| visitedBlocks.clear(); |
| |
| LinearLifetimeChecker checker(visitedBlocks, deadEndBlocks); |
| bool consumedInLoop = checker.completeConsumingUseSet( |
| pai, applySite.getCalleeOperand(), |
| [&](SILBasicBlock::iterator insertPt) { |
| SILBuilderWithScope builder(insertPt); |
| builder.createDestroyAddr(loc, stackLoc); |
| builder.createDeallocStack(loc, stackLoc); |
| }); |
| |
| if (!consumedInLoop) { |
| applySite.insertAfterInvocation([&](SILBuilder &builder) { |
| builder.createDestroyAddr(loc, stackLoc); |
| builder.createDeallocStack(loc, stackLoc); |
| }); |
| } |
| v = stackLoc; |
| invalidatedStackNesting = true; |
| break; |
| } |
| |
| case ParameterConvention::Direct_Guaranteed: { |
| // If we have a direct_guaranteed value, the value is being taken by the |
| // partial_apply at +1, but we are going to invoke the value at +0. So we |
| // need to copy/borrow the value before the pai and then |
| // end_borrow/destroy_value at the apply site. |
| SILValue copy = SILBuilderWithScope(pai).emitCopyValueOperation(loc, v); |
| SILValue argument = copy; |
| if (hasOwnership) { |
| argument = SILBuilderWithScope(pai).createBeginBorrow(loc, argument); |
| } |
| |
| visitedBlocks.clear(); |
| |
| // If we need to insert compensating destroys, do so. |
| // |
| // NOTE: We use pai here since in non-ossa code emitCopyValueOperation |
| // returns the operand of the strong_retain which may have a ValueBase |
| // that is not in the same block. An example of where this is important is |
| // if we are performing emitCopyValueOperation in non-ossa code on an |
| // argument when the partial_apply is not in the entrance block. In truth, |
| // the linear lifetime checker does not /actually/ care what the value is |
| // (ignoring diagnostic error msgs that we do not care about here), it |
| // just cares about the block the value is in. In a forthcoming commit, I |
| // am going to change this to use a different API on the linear lifetime |
| // checker that makes this clearer. |
| LinearLifetimeChecker checker(visitedBlocks, deadEndBlocks); |
| bool consumedInLoop = checker.completeConsumingUseSet( |
| pai, applySite.getCalleeOperand(), |
| [&](SILBasicBlock::iterator insertPt) { |
| SILBuilderWithScope builder(insertPt); |
| if (hasOwnership) { |
| builder.createEndBorrow(loc, argument); |
| } |
| builder.emitDestroyValueOperation(loc, copy); |
| }); |
| |
| // Since our applySite is in a different loop than our partial apply means |
| // thatour leak code will have lifetime extended the value over the |
| // loop. So we should /not/ insert a destroy after the apply site. In |
| // contrast, if we do not have a loop, we must have been compensating for |
| // uses in the top of a diamond and need to insert a destroy after the |
| // apply since the leak will just cover the other path. |
| if (!consumedInLoop) { |
| applySite.insertAfterInvocation([&](SILBuilder &builder) { |
| if (hasOwnership) { |
| builder.createEndBorrow(loc, argument); |
| } |
| builder.emitDestroyValueOperation(loc, copy); |
| }); |
| } |
| v = argument; |
| break; |
| } |
| |
| // TODO: Do we need to lifetime extend here? |
| case ParameterConvention::Direct_Unowned: { |
| v = SILBuilderWithScope(pai).emitCopyValueOperation(loc, v); |
| visitedBlocks.clear(); |
| |
| // If our consuming partial apply does not post-dominate our |
| // partial_apply, compute the completion of the post dominance set and if |
| // that set is non-empty, insert compensating destroys at those places. |
| // |
| // NOTE: We use pai here since in non-ossa code emitCopyValueOperation |
| // returns the operand of the strong_retain which may have a ValueBase |
| // that is not in the same block. An example of where this is important is |
| // if we are performing emitCopyValueOperation in non-ossa code on an |
| // argument when the partial_apply is not in the entrance block. In truth, |
| // the linear lifetime checker does not /actually/ care what the value is |
| // (ignoring diagnostic error msgs that we do not care about here), it |
| // just cares about the block the value is in. In a forthcoming commit, I |
| // am going to change this to use a different API on the linear lifetime |
| // checker that makes this clearer. |
| LinearLifetimeChecker checker(visitedBlocks, deadEndBlocks); |
| checker.completeConsumingUseSet( |
| pai, applySite.getCalleeOperand(), |
| [&](SILBasicBlock::iterator insertPt) { |
| auto loc = RegularLocation::getAutoGeneratedLocation(); |
| SILBuilderWithScope builder(insertPt); |
| builder.emitDestroyValueOperation(loc, v); |
| }); |
| |
| // Then insert destroys after the apply site since our value is not being |
| // consumed as part of the actual apply. |
| applySite.insertAfterInvocation([&](SILBuilder &builder) { |
| builder.emitDestroyValueOperation(loc, v); |
| }); |
| break; |
| } |
| |
| // If we have an owned value, we insert a copy here for two reasons: |
| // |
| // 1. To balance the consuming argument. |
| // 2. To lifetime extend the value over the call site in case our partial |
| // apply has another use that would destroy our value first. |
| case ParameterConvention::Direct_Owned: { |
| v = SILBuilderWithScope(pai).emitCopyValueOperation(loc, v); |
| visitedBlocks.clear(); |
| |
| // If we need to insert compensating destroys, do so. |
| // |
| // NOTE: We use pai here since in non-ossa code emitCopyValueOperation |
| // returns the operand of the strong_retain which may have a ValueBase |
| // that is not in the same block. An example of where this is important is |
| // if we are performing emitCopyValueOperation in non-ossa code on an |
| // argument when the partial_apply is not in the entrance block. In truth, |
| // the linear lifetime checker does not /actually/ care what the value is |
| // (ignoring diagnostic error msgs that we do not care about here), it |
| // just cares about the block the value is in. In a forthcoming commit, I |
| // am going to change this to use a different API on the linear lifetime |
| // checker that makes this clearer. |
| LinearLifetimeChecker checker(visitedBlocks, deadEndBlocks); |
| checker.completeConsumingUseSet( |
| pai, applySite.getCalleeOperand(), |
| [&](SILBasicBlock::iterator insertPt) { |
| auto loc = RegularLocation::getAutoGeneratedLocation(); |
| SILBuilderWithScope builder(insertPt); |
| builder.emitDestroyValueOperation(loc, v); |
| }); |
| |
| // NOTE: Unlike with the unowned case above, when we are owned we do not |
| // need to insert destroys since the apply will consume the value for us. |
| break; |
| } |
| } |
| } |
| |
| // Destroy the callee as the apply would have done if our function is not |
| // callee guaranteed. |
| if (!isCalleeGuaranteed) { |
| applySite.insertAfterInvocation([&](SILBuilder &builder) { |
| builder.emitDestroyValueOperation(loc, calleeValue); |
| }); |
| } |
| return invalidatedStackNesting; |
| } |
| |
| static SILValue cleanupLoadedCalleeValue(SILValue calleeValue, LoadInst *li) { |
| auto *pbi = dyn_cast<ProjectBoxInst>(li->getOperand()); |
| if (!pbi) |
| return SILValue(); |
| auto *abi = dyn_cast<AllocBoxInst>(pbi->getOperand()); |
| if (!abi) |
| return SILValue(); |
| |
| // The load instruction must have no more uses or a single destroy left to |
| // erase it. |
| if (li->getFunction()->hasOwnership()) { |
| // TODO: What if we have multiple destroy_value? That should be ok as well. |
| auto *dvi = li->getSingleUserOfType<DestroyValueInst>(); |
| if (!dvi) |
| return SILValue(); |
| dvi->eraseFromParent(); |
| } else if (!li->use_empty()) { |
| return SILValue(); |
| } |
| li->eraseFromParent(); |
| |
| // Look through uses of the alloc box the load is loading from to find up to |
| // one store and up to one strong release. |
| PointerUnion<StrongReleaseInst *, DestroyValueInst *> destroy; |
| destroy = nullptr; |
| for (Operand *use : abi->getUses()) { |
| auto *user = use->getUser(); |
| |
| if (destroy.isNull()) { |
| if (auto *sri = dyn_cast<StrongReleaseInst>(user)) { |
| destroy = sri; |
| continue; |
| } |
| |
| if (auto *dvi = dyn_cast<DestroyValueInst>(user)) { |
| destroy = dvi; |
| continue; |
| } |
| } |
| |
| if (user == pbi) |
| continue; |
| |
| return SILValue(); |
| } |
| |
| StoreInst *si = nullptr; |
| for (Operand *use : pbi->getUses()) { |
| if (auto *useSI = dyn_cast_or_null<StoreInst>(use->getUser())) { |
| si = useSI; |
| continue; |
| } |
| return SILValue(); |
| } |
| |
| // If we found a store, record its source and erase it. |
| if (si) { |
| calleeValue = si->getSrc(); |
| si->eraseFromParent(); |
| } else { |
| calleeValue = SILValue(); |
| } |
| |
| // If we found a strong release, replace it with a strong release of the |
| // source of the store and erase it. |
| if (destroy) { |
| if (calleeValue) { |
| if (auto *sri = destroy.dyn_cast<StrongReleaseInst *>()) { |
| SILBuilderWithScope(sri).emitStrongReleaseAndFold(sri->getLoc(), |
| calleeValue); |
| sri->eraseFromParent(); |
| } else { |
| auto *dvi = destroy.get<DestroyValueInst *>(); |
| SILBuilderWithScope(dvi).emitDestroyValueAndFold(dvi->getLoc(), |
| calleeValue); |
| dvi->eraseFromParent(); |
| } |
| } |
| } |
| |
| assert(pbi->use_empty()); |
| pbi->eraseFromParent(); |
| assert(abi->use_empty()); |
| abi->eraseFromParent(); |
| |
| return calleeValue; |
| } |
| |
| /// Removes instructions that create the callee value if they are no |
| /// longer necessary after inlining. |
| static void cleanupCalleeValue(SILValue calleeValue, |
| bool &invalidatedStackNesting) { |
| // Handle the case where the callee of the apply is a load instruction. If we |
| // fail to optimize, return. Otherwise, see if we can look through other |
| // abstractions on our callee. |
| if (auto *li = dyn_cast<LoadInst>(calleeValue)) { |
| calleeValue = cleanupLoadedCalleeValue(calleeValue, li); |
| if (!calleeValue) { |
| return; |
| } |
| } |
| |
| calleeValue = stripCopiesAndBorrows(calleeValue); |
| |
| // Inline constructor |
| auto calleeSource = ([&]() -> SILValue { |
| // Handle partial_apply/thin_to_thick -> convert_function: |
| // tryDeleteDeadClosure must run before deleting a ConvertFunction that uses |
| // the PartialApplyInst or ThinToThickFunctionInst. tryDeleteDeadClosure |
| // will delete any uses of the closure, including a |
| // convert_escape_to_noescape conversion. |
| if (auto *cfi = dyn_cast<ConvertFunctionInst>(calleeValue)) |
| return stripCopiesAndBorrows(cfi->getOperand()); |
| |
| if (auto *cvt = dyn_cast<ConvertEscapeToNoEscapeInst>(calleeValue)) |
| return stripCopiesAndBorrows(cvt->getOperand()); |
| |
| return stripCopiesAndBorrows(calleeValue); |
| })(); |
| |
| if (auto *pai = dyn_cast<PartialApplyInst>(calleeSource)) { |
| SILValue callee = pai->getCallee(); |
| if (!tryDeleteDeadClosure(pai)) |
| return; |
| calleeValue = callee; |
| } else if (auto *tttfi = dyn_cast<ThinToThickFunctionInst>(calleeSource)) { |
| SILValue callee = tttfi->getCallee(); |
| if (!tryDeleteDeadClosure(tttfi)) |
| return; |
| calleeValue = callee; |
| } |
| invalidatedStackNesting = true; |
| |
| calleeValue = stripCopiesAndBorrows(calleeValue); |
| |
| // Handle function_ref -> convert_function -> partial_apply/thin_to_thick. |
| if (auto *cfi = dyn_cast<ConvertFunctionInst>(calleeValue)) { |
| if (isInstructionTriviallyDead(cfi)) { |
| recursivelyDeleteTriviallyDeadInstructions(cfi, true); |
| return; |
| } |
| } |
| |
| if (auto *fri = dyn_cast<FunctionRefInst>(calleeValue)) { |
| if (!fri->use_empty()) |
| return; |
| fri->eraseFromParent(); |
| } |
| } |
| |
| namespace { |
| /// Cleanup dead closures after inlining. |
| class ClosureCleanup { |
| using DeadInstSet = SmallBlotSetVector<SILInstruction *, 4>; |
| |
| /// A helper class to update the set of dead instructions. |
| /// |
| /// Since this is called by the SILModule callback, the instruction may longer |
| /// be well-formed. Do not visit its operands. However, it's position in the |
| /// basic block is still valid. |
| /// |
| /// FIXME: Using the Module's callback mechanism for this is terrible. |
| /// Instead, cleanupCalleeValue could be easily rewritten to use its own |
| /// instruction deletion helper and pass a callback to tryDeleteDeadClosure |
| /// and recursivelyDeleteTriviallyDeadInstructions. |
| class DeleteUpdateHandler : public DeleteNotificationHandler { |
| SILModule &Module; |
| DeadInstSet &DeadInsts; |
| |
| public: |
| DeleteUpdateHandler(SILModule &M, DeadInstSet &DeadInsts) |
| : Module(M), DeadInsts(DeadInsts) { |
| Module.registerDeleteNotificationHandler(this); |
| } |
| |
| ~DeleteUpdateHandler() override { |
| // Unregister the handler. |
| Module.removeDeleteNotificationHandler(this); |
| } |
| |
| // Handling of instruction removal notifications. |
| bool needsNotifications() override { return true; } |
| |
| // Handle notifications about removals of instructions. |
| void handleDeleteNotification(SILNode *node) override { |
| auto deletedI = dyn_cast<SILInstruction>(node); |
| if (!deletedI) |
| return; |
| |
| DeadInsts.erase(deletedI); |
| } |
| }; |
| |
| SmallBlotSetVector<SILInstruction *, 4> deadFunctionVals; |
| |
| public: |
| /// Set to true if some alloc/dealloc_stack instruction are inserted and at |
| /// the end of the run stack nesting needs to be corrected. |
| bool invalidatedStackNesting = false; |
| |
| /// This regular instruction deletion callback checks for any function-type |
| /// values that may be unused after deleting the given instruction. |
| void recordDeadFunction(SILInstruction *deletedInst) { |
| // If it is a debug instruction, return. |
| // In this function, we look at operands of an instruction to be |
| // deleted, and add back the defining instruction of the operands to the |
| // worklist if it has a function type. This works in general when we are |
| // deleting dead instructions recursively. |
| // But we also consider, an instruction with only debug uses as dead. |
| // And with eraseFromParentWithDebugInsts, we will be deleting a dead |
| // instruction with its debug instructions. So when we are deleting a debug |
| // instruction, we may have already deleted its operand's defining |
| // instruction. So it would be incorrect to add back its operand's defining |
| // instruction. |
| if (deletedInst->isDebugInstruction()) |
| return; |
| // If the deleted instruction was already recorded as a function producer, |
| // delete it from the map and record its operands instead. |
| deadFunctionVals.erase(deletedInst); |
| for (auto &operand : deletedInst->getAllOperands()) { |
| SILValue operandVal = operand.get(); |
| if (!operandVal->getType().is<SILFunctionType>()) |
| continue; |
| |
| // Simply record all function-producing instructions used by dead |
| // code. Checking for a single use would not be precise because |
| // `deletedInst` could itself use `deadInst` multiple times. |
| if (auto *deadInst = operandVal->getDefiningInstruction()) |
| deadFunctionVals.insert(deadInst); |
| } |
| } |
| |
| // Note: instructions in the `deadFunctionVals` set may use each other, so the |
| // set needs to continue to be updated (by this handler) when deleting |
| // instructions. This assumes that DeadFunctionValSet::erase() is stable. |
| void cleanupDeadClosures(SILFunction *F) { |
| DeleteUpdateHandler deleteUpdate(F->getModule(), deadFunctionVals); |
| for (Optional<SILInstruction *> I : deadFunctionVals) { |
| if (!I.hasValue()) |
| continue; |
| |
| if (auto *SVI = dyn_cast<SingleValueInstruction>(I.getValue())) |
| cleanupCalleeValue(SVI, invalidatedStackNesting); |
| } |
| } |
| }; |
| |
| } // end of namespace |
| |
| static void collectPartiallyAppliedArguments( |
| PartialApplyInst *PAI, |
| SmallVectorImpl<ParameterConvention> &CapturedArgConventions, |
| SmallVectorImpl<SILValue> &FullArgs) { |
| ApplySite Site(PAI); |
| SILFunctionConventions CalleeConv(Site.getSubstCalleeType(), |
| PAI->getModule()); |
| for (auto &Arg : PAI->getArgumentOperands()) { |
| unsigned CalleeArgumentIndex = Site.getCalleeArgIndex(Arg); |
| assert(CalleeArgumentIndex >= CalleeConv.getSILArgIndexOfFirstParam()); |
| auto ParamInfo = CalleeConv.getParamInfoForSILArg(CalleeArgumentIndex); |
| CapturedArgConventions.push_back(ParamInfo.getConvention()); |
| FullArgs.push_back(Arg.get()); |
| } |
| } |
| |
| static SILValue getLoadedCalleeValue(LoadInst *li) { |
| auto *pbi = dyn_cast<ProjectBoxInst>(li->getOperand()); |
| if (!pbi) |
| return SILValue(); |
| |
| auto *abi = dyn_cast<AllocBoxInst>(pbi->getOperand()); |
| if (!abi) |
| return SILValue(); |
| |
| PointerUnion<StrongReleaseInst *, DestroyValueInst *> destroy = |
| static_cast<StrongReleaseInst *>(nullptr); |
| |
| // Look through uses of the alloc box the load is loading from to find up to |
| // one store and up to one destroy. |
| for (auto *use : abi->getUses()) { |
| auto *user = use->getUser(); |
| |
| // Look for our single destroy. If we find it... continue. |
| if (destroy.isNull()) { |
| if (auto *sri = dyn_cast<StrongReleaseInst>(user)) { |
| destroy = sri; |
| continue; |
| } |
| |
| if (auto *dvi = dyn_cast<DestroyValueInst>(user)) { |
| destroy = dvi; |
| continue; |
| } |
| } |
| |
| // Ignore our pbi if we find one. |
| if (user == pbi) |
| continue; |
| |
| // Otherwise, we have something that we do not understand. Return |
| // SILValue(). |
| // |
| // NOTE: We purposely allow for strong_retain, retain_value, copy_value to |
| // go down this path since we only want to consider simple boxes that have a |
| // single post-dominating destroy. So if we have a strong_retain, |
| // retain_value, or copy_value, we want to bail. |
| return SILValue(); |
| } |
| |
| // Make sure that our project_box has a single store user and our load user. |
| StoreInst *si = nullptr; |
| for (Operand *use : pbi->getUses()) { |
| // If this use is our load... continue. |
| if (use->getUser() == li) |
| continue; |
| |
| // Otherwise, see if we have a store... |
| if (auto *useSI = dyn_cast_or_null<StoreInst>(use->getUser())) { |
| // If we already have a store, we have a value that is initialized |
| // multiple times... bail. |
| if (si) |
| return SILValue(); |
| |
| // If we do not have a store yet, make sure that it is in the same basic |
| // block as box. Otherwise bail. |
| if (useSI->getParent() != abi->getParent()) |
| return SILValue(); |
| |
| // Ok, we found a store in the same block as the box and for which we have |
| // so far only found one. Stash the store. |
| si = useSI; |
| continue; |
| } |
| |
| // Otherwise, we have something we do not support... bail. |
| return SILValue(); |
| } |
| |
| // If we did not find a store, bail. |
| if (!si) |
| return SILValue(); |
| |
| // Otherwise, we have found our callee... the source of our store. |
| return si->getSrc(); |
| } |
| |
| /// Returns the callee SILFunction called at a call site, in the case |
| /// that the call is transparent (as in, both that the call is marked |
| /// with the transparent flag and that callee function is actually transparently |
| /// determinable from the SIL) or nullptr otherwise. This assumes that the SIL |
| /// is already in SSA form. |
| /// |
| /// In the case that a non-null value is returned, FullArgs contains effective |
| /// argument operands for the callee function. |
| static SILFunction * |
| getCalleeFunction(SILFunction *F, FullApplySite AI, bool &IsThick, |
| SmallVectorImpl<ParameterConvention> &CapturedArgConventions, |
| SmallVectorImpl<SILValue> &FullArgs, |
| PartialApplyInst *&PartialApply) { |
| IsThick = false; |
| PartialApply = nullptr; |
| CapturedArgConventions.clear(); |
| FullArgs.clear(); |
| |
| // First grab our basic arguments from our apply. |
| for (SILValue Arg : AI.getArguments()) |
| FullArgs.push_back(Arg); |
| |
| // Then grab a first approximation of our apply by stripping off all copy |
| // operations. |
| SILValue CalleeValue = stripCopiesAndBorrows(AI.getCallee()); |
| |
| // If after stripping off copy_values, we have a load then see if we the |
| // function we want to inline has a simple available value through a simple |
| // alloc_box. Bail otherwise. |
| if (auto *li = dyn_cast<LoadInst>(CalleeValue)) { |
| CalleeValue = getLoadedCalleeValue(li); |
| if (!CalleeValue) |
| return nullptr; |
| CalleeValue = stripCopiesAndBorrows(CalleeValue); |
| } |
| |
| // PartialApply/ThinToThick -> ConvertFunction patterns are generated |
| // by @noescape closures. |
| // |
| // FIXME: We don't currently handle mismatched return types, however, this |
| // would be a good optimization to handle and would be as simple as inserting |
| // a cast. |
| auto skipFuncConvert = [](SILValue CalleeValue) { |
| // Skip any copies that we see. |
| CalleeValue = stripCopiesAndBorrows(CalleeValue); |
| |
| // We can also allow a thin @escape to noescape conversion as such: |
| // %1 = function_ref @thin_closure_impl : $@convention(thin) () -> () |
| // %2 = convert_function %1 : |
| // $@convention(thin) () -> () to $@convention(thin) @noescape () -> () |
| // %3 = thin_to_thick_function %2 : |
| // $@convention(thin) @noescape () -> () to |
| // $@noescape @callee_guaranteed () -> () |
| // %4 = apply %3() : $@noescape @callee_guaranteed () -> () |
| if (auto *ConvertFn = dyn_cast<ConvertFunctionInst>(CalleeValue)) { |
| // If the conversion only changes the substitution level of the function, |
| // we can also look through it. |
| if (ConvertFn->onlyConvertsSubstitutions()) { |
| return stripCopiesAndBorrows(ConvertFn->getOperand()); |
| } |
| |
| auto FromCalleeTy = |
| ConvertFn->getOperand()->getType().castTo<SILFunctionType>(); |
| if (FromCalleeTy->getExtInfo().hasContext()) |
| return CalleeValue; |
| auto ToCalleeTy = ConvertFn->getType().castTo<SILFunctionType>(); |
| auto EscapingCalleeTy = ToCalleeTy->getWithExtInfo( |
| ToCalleeTy->getExtInfo().withNoEscape(false)); |
| if (FromCalleeTy != EscapingCalleeTy) |
| return CalleeValue; |
| return stripCopiesAndBorrows(ConvertFn->getOperand()); |
| } |
| |
| // Ignore mark_dependence users. A partial_apply [stack] uses them to mark |
| // the dependence of the trivial closure context value on the captured |
| // arguments. |
| if (auto *MD = dyn_cast<MarkDependenceInst>(CalleeValue)) { |
| while (MD) { |
| CalleeValue = MD->getValue(); |
| MD = dyn_cast<MarkDependenceInst>(CalleeValue); |
| } |
| return CalleeValue; |
| } |
| |
| auto *CFI = dyn_cast<ConvertEscapeToNoEscapeInst>(CalleeValue); |
| if (!CFI) |
| return stripCopiesAndBorrows(CalleeValue); |
| |
| // TODO: Handle argument conversion. All the code in this file needs to be |
| // cleaned up and generalized. The argument conversion handling in |
| // optimizeApplyOfConvertFunctionInst should apply to any combine |
| // involving an apply, not just a specific pattern. |
| // |
| // For now, just handle conversion that doesn't affect argument types, |
| // return types, or throws. We could trivially handle any other |
| // representation change, but the only one that doesn't affect the ABI and |
| // matters here is @noescape, so just check for that. |
| auto FromCalleeTy = CFI->getOperand()->getType().castTo<SILFunctionType>(); |
| auto ToCalleeTy = CFI->getType().castTo<SILFunctionType>(); |
| auto EscapingCalleeTy = |
| ToCalleeTy->getWithExtInfo(ToCalleeTy->getExtInfo().withNoEscape(false)); |
| if (FromCalleeTy != EscapingCalleeTy) |
| return stripCopiesAndBorrows(CalleeValue); |
| |
| return stripCopiesAndBorrows(CFI->getOperand()); |
| }; |
| |
| // Look through a escape to @noescape conversion. |
| CalleeValue = skipFuncConvert(CalleeValue); |
| |
| // We are allowed to see through exactly one "partial apply" instruction or |
| // one "thin to thick function" instructions, since those are the patterns |
| // generated when using auto closures. |
| if (auto *PAI = dyn_cast<PartialApplyInst>(CalleeValue)) { |
| // Collect the applied arguments and their convention. |
| collectPartiallyAppliedArguments(PAI, CapturedArgConventions, FullArgs); |
| |
| CalleeValue = stripCopiesAndBorrows(PAI->getCallee()); |
| IsThick = true; |
| PartialApply = PAI; |
| } else if (auto *TTTFI = dyn_cast<ThinToThickFunctionInst>(CalleeValue)) { |
| CalleeValue = stripCopiesAndBorrows(TTTFI->getOperand()); |
| IsThick = true; |
| } |
| |
| CalleeValue = skipFuncConvert(CalleeValue); |
| |
| auto *FRI = dyn_cast<FunctionRefInst>(CalleeValue); |
| if (!FRI) |
| return nullptr; |
| |
| SILFunction *CalleeFunction = FRI->getReferencedFunctionOrNull(); |
| |
| switch (CalleeFunction->getRepresentation()) { |
| case SILFunctionTypeRepresentation::Thick: |
| case SILFunctionTypeRepresentation::Thin: |
| case SILFunctionTypeRepresentation::Method: |
| case SILFunctionTypeRepresentation::Closure: |
| case SILFunctionTypeRepresentation::WitnessMethod: |
| break; |
| |
| case SILFunctionTypeRepresentation::CFunctionPointer: |
| case SILFunctionTypeRepresentation::ObjCMethod: |
| case SILFunctionTypeRepresentation::Block: |
| return nullptr; |
| } |
| |
| // If the CalleeFunction is a not-transparent definition, we can not process |
| // it. |
| if (CalleeFunction->isTransparent() == IsNotTransparent) |
| return nullptr; |
| |
| // If CalleeFunction is a declaration, see if we can load it. |
| if (CalleeFunction->empty()) |
| AI.getModule().loadFunction(CalleeFunction); |
| |
| // If we fail to load it, bail. |
| if (CalleeFunction->empty()) |
| return nullptr; |
| |
| if (F->isSerialized() && |
| !CalleeFunction->hasValidLinkageForFragileInline()) { |
| if (!CalleeFunction->hasValidLinkageForFragileRef()) { |
| llvm::errs() << "caller: " << F->getName() << "\n"; |
| llvm::errs() << "callee: " << CalleeFunction->getName() << "\n"; |
| llvm_unreachable("Should never be inlining a resilient function into " |
| "a fragile function"); |
| } |
| return nullptr; |
| } |
| |
| return CalleeFunction; |
| } |
| |
| static SILInstruction *tryDevirtualizeApplyHelper(FullApplySite InnerAI, |
| ClassHierarchyAnalysis *CHA) { |
| auto NewInst = tryDevirtualizeApply(InnerAI, CHA).first; |
| if (!NewInst) |
| return InnerAI.getInstruction(); |
| |
| deleteDevirtualizedApply(InnerAI); |
| |
| // FIXME: Comments at the use of this helper indicate that devirtualization |
| // may return SILArgument. Yet here we assert that it must return an |
| // instruction. |
| auto newApplyAI = NewInst.getInstruction(); |
| assert(newApplyAI && "devirtualized but removed apply site?"); |
| |
| return newApplyAI; |
| } |
| |
| /// Inlines all mandatory inlined functions into the body of a function, |
| /// first recursively inlining all mandatory apply instructions in those |
| /// functions into their bodies if necessary. |
| /// |
| /// \param F the function to be processed |
| /// \param AI nullptr if this is being called from the top level; the relevant |
| /// ApplyInst requiring the recursive call when non-null |
| /// \param FullyInlinedSet the set of all functions already known to be fully |
| /// processed, to avoid processing them over again |
| /// \param SetFactory an instance of ImmutableFunctionSet::Factory |
| /// \param CurrentInliningSet the set of functions currently being inlined in |
| /// the current call stack of recursive calls |
| /// |
| /// \returns true if successful, false if failed due to circular inlining. |
| static bool |
| runOnFunctionRecursively(SILOptFunctionBuilder &FuncBuilder, SILFunction *F, |
| FullApplySite AI, DenseFunctionSet &FullyInlinedSet, |
| ImmutableFunctionSet::Factory &SetFactory, |
| ImmutableFunctionSet CurrentInliningSet, |
| ClassHierarchyAnalysis *CHA, |
| DenseFunctionSet &changedFunctions) { |
| // Avoid reprocessing functions needlessly. |
| if (FullyInlinedSet.count(F)) |
| return true; |
| |
| // Prevent attempt to circularly inline. |
| if (CurrentInliningSet.contains(F)) { |
| // This cannot happen on a top-level call, so AI should be non-null. |
| assert(AI && "Cannot have circular inline without apply"); |
| SILLocation L = AI.getLoc(); |
| assert(L && "Must have location for transparent inline apply"); |
| diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), |
| diag::circular_transparent); |
| return false; |
| } |
| |
| // Add to the current inlining set (immutably, so we only affect the set |
| // during this call and recursive subcalls). |
| CurrentInliningSet = SetFactory.add(CurrentInliningSet, F); |
| |
| SmallVector<ParameterConvention, 16> CapturedArgConventions; |
| SmallVector<SILValue, 32> FullArgs; |
| bool invalidatedStackNesting = false; |
| |
| // Visiting blocks in reverse order avoids revisiting instructions after block |
| // splitting, which would be quadratic. |
| for (auto BI = F->rbegin(), BE = F->rend(), nextBB = BI; BI != BE; |
| BI = nextBB) { |
| // After inlining, the block iterator will be adjusted to point to the last |
| // block containing inlined instructions. This way, the inlined function |
| // body will be reprocessed within the caller's context without revisiting |
| // any original instructions. |
| nextBB = std::next(BI); |
| |
| // While iterating over this block, instructions are inserted and deleted. |
| // To avoid quadratic block splitting, instructions must be processed in |
| // reverse order (block splitting reassigned the parent pointer of all |
| // instructions below the split point). |
| for (auto II = BI->rbegin(); II != BI->rend(); ++II) { |
| FullApplySite InnerAI = FullApplySite::isa(&*II); |
| if (!InnerAI) |
| continue; |
| |
| // *NOTE* If devirtualization succeeds, devirtInst may not be InnerAI, |
| // but a casted result of InnerAI or even a block argument due to |
| // abstraction changes when calling the witness or class method. |
| auto *devirtInst = tryDevirtualizeApplyHelper(InnerAI, CHA); |
| // If devirtualization succeeds, make sure we record that this function |
| // changed. |
| if (devirtInst != InnerAI.getInstruction()) |
| changedFunctions.insert(F); |
| // Restore II to the current apply site. |
| II = devirtInst->getReverseIterator(); |
| // If the devirtualized call result is no longer a invalid FullApplySite, |
| // then it has succeeded, but the result is not immediately inlinable. |
| InnerAI = FullApplySite::isa(devirtInst); |
| if (!InnerAI) |
| continue; |
| |
| SILValue CalleeValue = InnerAI.getCallee(); |
| bool IsThick; |
| PartialApplyInst *PAI; |
| SILFunction *CalleeFunction = getCalleeFunction( |
| F, InnerAI, IsThick, CapturedArgConventions, FullArgs, PAI); |
| |
| if (!CalleeFunction) |
| continue; |
| |
| // Then recursively process it first before trying to inline it. |
| if (!runOnFunctionRecursively( |
| FuncBuilder, CalleeFunction, InnerAI, FullyInlinedSet, SetFactory, |
| CurrentInliningSet, CHA, changedFunctions)) { |
| // If we failed due to circular inlining, then emit some notes to |
| // trace back the failure if we have more information. |
| // FIXME: possibly it could be worth recovering and attempting other |
| // inlines within this same recursive call rather than simply |
| // propagating the failure. |
| if (AI) { |
| SILLocation L = AI.getLoc(); |
| assert(L && "Must have location for transparent inline apply"); |
| diagnose(F->getModule().getASTContext(), L.getStartSourceLoc(), |
| diag::note_while_inlining); |
| } |
| return false; |
| } |
| |
| // Get our list of substitutions. |
| auto Subs = (PAI |
| ? PAI->getSubstitutionMap() |
| : InnerAI.getSubstitutionMap()); |
| |
| SILOpenedArchetypesTracker OpenedArchetypesTracker(F); |
| F->getModule().registerDeleteNotificationHandler( |
| &OpenedArchetypesTracker); |
| // The callee only needs to know about opened archetypes used in |
| // the substitution list. |
| OpenedArchetypesTracker.registerUsedOpenedArchetypes( |
| InnerAI.getInstruction()); |
| if (PAI) { |
| OpenedArchetypesTracker.registerUsedOpenedArchetypes(PAI); |
| } |
| |
| SILInliner Inliner(FuncBuilder, SILInliner::InlineKind::MandatoryInline, |
| Subs, OpenedArchetypesTracker); |
| if (!Inliner.canInlineApplySite(InnerAI)) |
| continue; |
| |
| // Inline function at I, which also changes I to refer to the first |
| // instruction inlined in the case that it succeeds. We purposely |
| // process the inlined body after inlining, because the inlining may |
| // have exposed new inlining opportunities beyond those present in |
| // the inlined function when processed independently. |
| LLVM_DEBUG(llvm::errs() << "Inlining @" << CalleeFunction->getName() |
| << " into @" << InnerAI.getFunction()->getName() |
| << "\n"); |
| |
| // If we intend to inline a partial_apply function that is not on the |
| // stack, then we need to balance the reference counts for correctness. |
| // |
| // NOTE: If our partial apply is on the stack, it only has point uses (and |
| // hopefully eventually guaranteed) uses of the captured arguments. |
| // |
| // NOTE: If we have a thin_to_thick_function, we do not need to worry |
| // about such things since a thin_to_thick_function does not capture any |
| // arguments. |
| if (PAI && PAI->isOnStack() == PartialApplyInst::NotOnStack) { |
| bool IsCalleeGuaranteed = |
| PAI->getType().castTo<SILFunctionType>()->isCalleeGuaranteed(); |
| auto CapturedArgs = MutableArrayRef<SILValue>(FullArgs).take_back( |
| CapturedArgConventions.size()); |
| // We need to insert the copies before the partial_apply since if we can |
| // not remove the partial_apply the captured values will be dead by the |
| // time we hit the call site. |
| invalidatedStackNesting |= fixupReferenceCounts(PAI, InnerAI, |
| CalleeValue, CapturedArgConventions, |
| CapturedArgs, IsCalleeGuaranteed); |
| } |
| |
| // Register a callback to record potentially unused function values after |
| // inlining. |
| ClosureCleanup closureCleanup; |
| Inliner.setDeletionCallback([&closureCleanup](SILInstruction *I) { |
| closureCleanup.recordDeadFunction(I); |
| }); |
| |
| invalidatedStackNesting |= Inliner.invalidatesStackNesting(InnerAI); |
| |
| // Inlining deletes the apply, and can introduce multiple new basic |
| // blocks. After this, CalleeValue and other instructions may be invalid. |
| // nextBB will point to the last inlined block |
| auto firstInlinedInstAndLastBB = |
| Inliner.inlineFunction(CalleeFunction, InnerAI, FullArgs); |
| nextBB = firstInlinedInstAndLastBB.second->getReverseIterator(); |
| ++NumMandatoryInlines; |
| |
| // The IR is now valid, and trivial dead arguments are removed. However, |
| // we may be able to remove dead callee computations (e.g. dead |
| // partial_apply closures). |
| closureCleanup.cleanupDeadClosures(F); |
| invalidatedStackNesting |= closureCleanup.invalidatedStackNesting; |
| |
| // Record that we inlined into this function so that we can invalidate it |
| // later. |
| changedFunctions.insert(F); |
| |
| // Resume inlining within nextBB, which contains only the inlined |
| // instructions and possibly instructions in the original call block that |
| // have not yet been visited. |
| break; |
| } |
| } |
| |
| if (invalidatedStackNesting) { |
| StackNesting().correctStackNesting(F); |
| changedFunctions.insert(F); |
| } |
| |
| // Keep track of full inlined functions so we don't waste time recursively |
| // reprocessing them. |
| FullyInlinedSet.insert(F); |
| return true; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Top Level Driver |
| //===----------------------------------------------------------------------===// |
| |
| namespace { |
| |
| class MandatoryInlining : public SILModuleTransform { |
| /// The entry point to the transformation. |
| void run() override { |
| ClassHierarchyAnalysis *CHA = getAnalysis<ClassHierarchyAnalysis>(); |
| SILModule *M = getModule(); |
| bool SILVerifyAll = getOptions().VerifyAll; |
| DenseFunctionSet FullyInlinedSet; |
| ImmutableFunctionSet::Factory SetFactory; |
| DenseFunctionSet changedFunctions; |
| |
| SILOptFunctionBuilder FuncBuilder(*this); |
| for (auto &F : *M) { |
| // Don't inline into thunks, even transparent callees. |
| if (F.isThunk()) |
| continue; |
| |
| // Skip deserialized functions. |
| if (F.wasDeserializedCanonical()) |
| continue; |
| |
| runOnFunctionRecursively(FuncBuilder, &F, FullApplySite(), |
| FullyInlinedSet, SetFactory, |
| SetFactory.getEmptySet(), CHA, changedFunctions); |
| |
| // The inliner splits blocks at call sites. Re-merge trivial branches |
| // to reestablish a canonical CFG. |
| if (mergeBasicBlocks(&F)) { |
| changedFunctions.insert(&F); |
| } |
| |
| // If we are asked to perform SIL verify all, perform that now so that we |
| // can discover the immediate inlining trigger of the problematic |
| // function. |
| if (SILVerifyAll) { |
| F.verify(); |
| } |
| } |
| |
| if (getOptions().DebugSerialization) |
| return; |
| for (auto *F : changedFunctions) { |
| invalidateAnalysis(F, SILAnalysis::InvalidationKind::Everything); |
| } |
| } |
| |
| }; |
| } // end anonymous namespace |
| |
| SILTransform *swift::createMandatoryInlining() { |
| return new MandatoryInlining(); |
| } |