| //===--- PredictableMemOpt.cpp - Perform predictable memory optzns --------===// |
| // |
| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See https://swift.org/LICENSE.txt for license information |
| // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #define DEBUG_TYPE "predictable-memopt" |
| |
| #include "PMOMemoryUseCollector.h" |
| #include "swift/Basic/BlotSetVector.h" |
| #include "swift/Basic/FrozenMultiMap.h" |
| #include "swift/Basic/STLExtras.h" |
| #include "swift/SIL/BasicBlockUtils.h" |
| #include "swift/SIL/LinearLifetimeChecker.h" |
| #include "swift/SIL/OwnershipUtils.h" |
| #include "swift/SIL/SILBuilder.h" |
| #include "swift/SILOptimizer/PassManager/Passes.h" |
| #include "swift/SILOptimizer/PassManager/Transforms.h" |
| #include "swift/SILOptimizer/Utils/CFGOptUtils.h" |
| #include "swift/SILOptimizer/Utils/InstOptUtils.h" |
| #include "swift/SILOptimizer/Utils/SILSSAUpdater.h" |
| #include "swift/SILOptimizer/Utils/ValueLifetime.h" |
| #include "llvm/ADT/SmallBitVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| |
| using namespace swift; |
| |
| STATISTIC(NumLoadPromoted, "Number of loads promoted"); |
| STATISTIC(NumLoadTakePromoted, "Number of load takes promoted"); |
| STATISTIC(NumDestroyAddrPromoted, "Number of destroy_addrs promoted"); |
| STATISTIC(NumAllocRemoved, "Number of allocations completely removed"); |
| |
| //===----------------------------------------------------------------------===// |
| // Subelement Analysis |
| //===----------------------------------------------------------------------===// |
| |
| // We can only analyze components of structs whose storage is fully accessible |
| // from Swift. |
| static StructDecl * |
| getFullyReferenceableStruct(SILType Ty) { |
| auto SD = Ty.getStructOrBoundGenericStruct(); |
| if (!SD || SD->hasUnreferenceableStorage()) |
| return nullptr; |
| return SD; |
| } |
| |
| static unsigned getNumSubElements(SILType T, SILModule &M, |
| TypeExpansionContext context) { |
| |
| if (auto TT = T.getAs<TupleType>()) { |
| unsigned NumElements = 0; |
| for (auto index : indices(TT.getElementTypes())) |
| NumElements += |
| getNumSubElements(T.getTupleElementType(index), M, context); |
| return NumElements; |
| } |
| |
| if (auto *SD = getFullyReferenceableStruct(T)) { |
| unsigned NumElements = 0; |
| for (auto *D : SD->getStoredProperties()) |
| NumElements += |
| getNumSubElements(T.getFieldType(D, M, context), M, context); |
| return NumElements; |
| } |
| |
| // If this isn't a tuple or struct, it is a single element. |
| return 1; |
| } |
| |
| /// getAccessPathRoot - Given an address, dive through any tuple/struct element |
| /// addresses to get the underlying value. |
| static SILValue getAccessPathRoot(SILValue pointer) { |
| while (true) { |
| if (auto *TEAI = dyn_cast<TupleElementAddrInst>(pointer)) { |
| pointer = TEAI->getOperand(); |
| continue; |
| } |
| |
| if (auto *SEAI = dyn_cast<StructElementAddrInst>(pointer)) { |
| pointer = SEAI->getOperand(); |
| continue; |
| } |
| |
| if (auto *BAI = dyn_cast<BeginAccessInst>(pointer)) { |
| pointer = BAI->getSource(); |
| continue; |
| } |
| |
| return pointer; |
| } |
| } |
| |
| /// Compute the subelement number indicated by the specified pointer (which is |
| /// derived from the root by a series of tuple/struct element addresses) by |
| /// treating the type as a linearized namespace with sequential elements. For |
| /// example, given: |
| /// |
| /// root = alloc { a: { c: i64, d: i64 }, b: (i64, i64) } |
| /// tmp1 = struct_element_addr root, 1 |
| /// tmp2 = tuple_element_addr tmp1, 0 |
| /// |
| /// This will return a subelement number of 2. |
| /// |
| /// If this pointer is to within an existential projection, it returns ~0U. |
| static unsigned computeSubelement(SILValue Pointer, |
| SingleValueInstruction *RootInst) { |
| unsigned SubElementNumber = 0; |
| SILModule &M = RootInst->getModule(); |
| |
| while (1) { |
| // If we got to the root, we're done. |
| if (RootInst == Pointer) |
| return SubElementNumber; |
| |
| if (auto *PBI = dyn_cast<ProjectBoxInst>(Pointer)) { |
| Pointer = PBI->getOperand(); |
| continue; |
| } |
| |
| if (auto *BAI = dyn_cast<BeginAccessInst>(Pointer)) { |
| Pointer = BAI->getSource(); |
| continue; |
| } |
| |
| if (auto *TEAI = dyn_cast<TupleElementAddrInst>(Pointer)) { |
| SILType TT = TEAI->getOperand()->getType(); |
| |
| // Keep track of what subelement is being referenced. |
| for (unsigned i = 0, e = TEAI->getFieldIndex(); i != e; ++i) { |
| SubElementNumber += |
| getNumSubElements(TT.getTupleElementType(i), M, |
| TypeExpansionContext(*RootInst->getFunction())); |
| } |
| Pointer = TEAI->getOperand(); |
| continue; |
| } |
| |
| if (auto *SEAI = dyn_cast<StructElementAddrInst>(Pointer)) { |
| SILType ST = SEAI->getOperand()->getType(); |
| |
| // Keep track of what subelement is being referenced. |
| StructDecl *SD = SEAI->getStructDecl(); |
| for (auto *D : SD->getStoredProperties()) { |
| if (D == SEAI->getField()) break; |
| auto context = TypeExpansionContext(*RootInst->getFunction()); |
| SubElementNumber += |
| getNumSubElements(ST.getFieldType(D, M, context), M, context); |
| } |
| |
| Pointer = SEAI->getOperand(); |
| continue; |
| } |
| |
| // This fails when we visit unchecked_take_enum_data_addr. We should just |
| // add support for enums. |
| assert(isa<InitExistentialAddrInst>(Pointer) && |
| "Unknown access path instruction"); |
| // Cannot promote loads and stores from within an existential projection. |
| return ~0U; |
| } |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Available Value |
| //===----------------------------------------------------------------------===// |
| |
| namespace { |
| |
| class AvailableValueAggregator; |
| |
| struct AvailableValue { |
| friend class AvailableValueAggregator; |
| |
| SILValue Value; |
| unsigned SubElementNumber; |
| |
| /// If this gets too expensive in terms of copying, we can use an arena and a |
| /// FrozenPtrSet like we do in ARC. |
| SmallSetVector<StoreInst *, 1> InsertionPoints; |
| |
| /// Just for updating. |
| SmallVectorImpl<PMOMemoryUse> *Uses; |
| |
| public: |
| AvailableValue() = default; |
| |
| /// Main initializer for available values. |
| /// |
| /// *NOTE* We assume that all available values start with a singular insertion |
| /// point and insertion points are added by merging. |
| AvailableValue(SILValue Value, unsigned SubElementNumber, |
| StoreInst *InsertPoint) |
| : Value(Value), SubElementNumber(SubElementNumber), InsertionPoints() { |
| InsertionPoints.insert(InsertPoint); |
| } |
| |
| /// Deleted copy constructor. This is a move only type. |
| AvailableValue(const AvailableValue &) = delete; |
| |
| /// Deleted copy operator. This is a move only type. |
| AvailableValue &operator=(const AvailableValue &) = delete; |
| |
| /// Move constructor. |
| AvailableValue(AvailableValue &&Other) |
| : Value(nullptr), SubElementNumber(~0), InsertionPoints() { |
| std::swap(Value, Other.Value); |
| std::swap(SubElementNumber, Other.SubElementNumber); |
| std::swap(InsertionPoints, Other.InsertionPoints); |
| } |
| |
| /// Move operator. |
| AvailableValue &operator=(AvailableValue &&Other) { |
| std::swap(Value, Other.Value); |
| std::swap(SubElementNumber, Other.SubElementNumber); |
| std::swap(InsertionPoints, Other.InsertionPoints); |
| return *this; |
| } |
| |
| operator bool() const { return bool(Value); } |
| |
| bool operator==(const AvailableValue &Other) const { |
| return Value == Other.Value && SubElementNumber == Other.SubElementNumber; |
| } |
| |
| bool operator!=(const AvailableValue &Other) const { |
| return !(*this == Other); |
| } |
| |
| SILValue getValue() const { return Value; } |
| SILType getType() const { return Value->getType(); } |
| unsigned getSubElementNumber() const { return SubElementNumber; } |
| ArrayRef<StoreInst *> getInsertionPoints() const { |
| return InsertionPoints.getArrayRef(); |
| } |
| |
| void mergeInsertionPoints(const AvailableValue &Other) & { |
| assert(Value == Other.Value && SubElementNumber == Other.SubElementNumber); |
| InsertionPoints.set_union(Other.InsertionPoints); |
| } |
| |
| void addInsertionPoint(StoreInst *si) & { InsertionPoints.insert(si); } |
| |
| AvailableValue emitStructExtract(SILBuilder &B, SILLocation Loc, VarDecl *D, |
| unsigned SubElementNumber) const { |
| SILValue NewValue = B.emitStructExtract(Loc, Value, D); |
| return {NewValue, SubElementNumber, InsertionPoints}; |
| } |
| |
| AvailableValue emitTupleExtract(SILBuilder &B, SILLocation Loc, |
| unsigned EltNo, |
| unsigned SubElementNumber) const { |
| SILValue NewValue = B.emitTupleExtract(Loc, Value, EltNo); |
| return {NewValue, SubElementNumber, InsertionPoints}; |
| } |
| |
| AvailableValue emitBeginBorrow(SILBuilder &b, SILLocation loc) const { |
| // If we do not have ownership or already are guaranteed, just return a copy |
| // of our state. |
| if (!b.hasOwnership() || |
| Value.getOwnershipKind().isCompatibleWith(OwnershipKind::Guaranteed)) { |
| return {Value, SubElementNumber, InsertionPoints}; |
| } |
| |
| // Otherwise, return newValue. |
| return {b.createBeginBorrow(loc, Value), SubElementNumber, InsertionPoints}; |
| } |
| |
| void dump() const LLVM_ATTRIBUTE_USED; |
| void print(llvm::raw_ostream &os) const; |
| |
| private: |
| /// Private constructor. |
| AvailableValue(SILValue Value, unsigned SubElementNumber, |
| const decltype(InsertionPoints) &InsertPoints) |
| : Value(Value), SubElementNumber(SubElementNumber), |
| InsertionPoints(InsertPoints) {} |
| }; |
| |
| } // end anonymous namespace |
| |
| void AvailableValue::dump() const { print(llvm::dbgs()); } |
| |
| void AvailableValue::print(llvm::raw_ostream &os) const { |
| os << "Available Value Dump. Value: "; |
| if (getValue()) { |
| os << getValue(); |
| } else { |
| os << "NoValue;\n"; |
| } |
| os << "SubElementNumber: " << getSubElementNumber() << "\n"; |
| os << "Insertion Points:\n"; |
| for (auto *I : getInsertionPoints()) { |
| os << *I; |
| } |
| } |
| |
| namespace llvm { |
| |
| llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const AvailableValue &V) { |
| V.print(os); |
| return os; |
| } |
| |
| } // end llvm namespace |
| |
| //===----------------------------------------------------------------------===// |
| // Subelement Extraction |
| //===----------------------------------------------------------------------===// |
| |
| /// Given an aggregate value and an access path, non-destructively extract the |
| /// value indicated by the path. |
| static SILValue nonDestructivelyExtractSubElement(const AvailableValue &Val, |
| SILBuilder &B, |
| SILLocation Loc) { |
| SILType ValTy = Val.getType(); |
| unsigned SubElementNumber = Val.SubElementNumber; |
| |
| // Extract tuple elements. |
| if (auto TT = ValTy.getAs<TupleType>()) { |
| for (unsigned EltNo : indices(TT.getElementTypes())) { |
| // Keep track of what subelement is being referenced. |
| SILType EltTy = ValTy.getTupleElementType(EltNo); |
| unsigned NumSubElt = getNumSubElements( |
| EltTy, B.getModule(), TypeExpansionContext(B.getFunction())); |
| if (SubElementNumber < NumSubElt) { |
| auto BorrowedVal = Val.emitBeginBorrow(B, Loc); |
| auto NewVal = |
| BorrowedVal.emitTupleExtract(B, Loc, EltNo, SubElementNumber); |
| SILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc); |
| // If our original value wasn't guaranteed and we did actually perform a |
| // borrow as a result, insert the end_borrow. |
| if (BorrowedVal.getValue() != Val.getValue()) |
| B.createEndBorrow(Loc, BorrowedVal.getValue()); |
| return result; |
| } |
| |
| SubElementNumber -= NumSubElt; |
| } |
| |
| llvm_unreachable("Didn't find field"); |
| } |
| |
| // Extract struct elements. |
| if (auto *SD = getFullyReferenceableStruct(ValTy)) { |
| for (auto *D : SD->getStoredProperties()) { |
| auto fieldType = ValTy.getFieldType( |
| D, B.getModule(), TypeExpansionContext(B.getFunction())); |
| unsigned NumSubElt = getNumSubElements( |
| fieldType, B.getModule(), TypeExpansionContext(B.getFunction())); |
| |
| if (SubElementNumber < NumSubElt) { |
| auto BorrowedVal = Val.emitBeginBorrow(B, Loc); |
| auto NewVal = |
| BorrowedVal.emitStructExtract(B, Loc, D, SubElementNumber); |
| SILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc); |
| // If our original value wasn't guaranteed and we did actually perform a |
| // borrow as a result, insert the end_borrow. |
| if (BorrowedVal.getValue() != Val.getValue()) |
| B.createEndBorrow(Loc, BorrowedVal.getValue()); |
| return result; |
| } |
| |
| SubElementNumber -= NumSubElt; |
| |
| } |
| llvm_unreachable("Didn't find field"); |
| } |
| |
| // Otherwise, we're down to a scalar. If we have ownership enabled, |
| // we return a copy. Otherwise, there we can ignore ownership |
| // issues. This is ok since in [ossa] we are going to eliminate a |
| // load [copy] or a load [trivial], while in non-[ossa] SIL we will |
| // be replacing unqualified loads. |
| assert(SubElementNumber == 0 && "Miscalculation indexing subelements"); |
| if (!B.hasOwnership()) |
| return Val.getValue(); |
| return B.emitCopyValueOperation(Loc, Val.getValue()); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Available Value Aggregation |
| //===----------------------------------------------------------------------===// |
| |
| static bool anyMissing(unsigned StartSubElt, unsigned NumSubElts, |
| ArrayRef<AvailableValue> &Values) { |
| while (NumSubElts) { |
| if (!Values[StartSubElt]) |
| return true; |
| ++StartSubElt; |
| --NumSubElts; |
| } |
| return false; |
| } |
| |
| namespace { |
| |
| enum class AvailableValueExpectedOwnership { |
| Take, |
| Borrow, |
| Copy, |
| }; |
| |
| /// A class that aggregates available values, loading them if they are not |
| /// available. |
| class AvailableValueAggregator { |
| SILModule &M; |
| SILBuilderWithScope B; |
| SILLocation Loc; |
| MutableArrayRef<AvailableValue> AvailableValueList; |
| SmallVectorImpl<PMOMemoryUse> &Uses; |
| DeadEndBlocks &deadEndBlocks; |
| AvailableValueExpectedOwnership expectedOwnership; |
| |
| /// Keep track of all instructions that we have added. Once we are done |
| /// promoting a value, we need to make sure that if we need to balance any |
| /// copies (to avoid leaks), we do so. This is not used if we are performing a |
| /// take. |
| SmallVector<SILInstruction *, 16> insertedInsts; |
| |
| /// The list of phi nodes inserted by the SSA updater. |
| SmallVector<SILPhiArgument *, 16> insertedPhiNodes; |
| |
| /// A set of copy_values whose lifetime we balanced while inserting phi |
| /// nodes. This means that these copy_value must be skipped in |
| /// addMissingDestroysForCopiedValues. |
| SmallPtrSet<CopyValueInst *, 16> copyValueProcessedWithPhiNodes; |
| |
| public: |
| AvailableValueAggregator(SILInstruction *Inst, |
| MutableArrayRef<AvailableValue> AvailableValueList, |
| SmallVectorImpl<PMOMemoryUse> &Uses, |
| DeadEndBlocks &deadEndBlocks, |
| AvailableValueExpectedOwnership expectedOwnership) |
| : M(Inst->getModule()), B(Inst), Loc(Inst->getLoc()), |
| AvailableValueList(AvailableValueList), Uses(Uses), |
| deadEndBlocks(deadEndBlocks), expectedOwnership(expectedOwnership) {} |
| |
| // This is intended to be passed by reference only once constructed. |
| AvailableValueAggregator(const AvailableValueAggregator &) = delete; |
| AvailableValueAggregator(AvailableValueAggregator &&) = delete; |
| AvailableValueAggregator & |
| operator=(const AvailableValueAggregator &) = delete; |
| AvailableValueAggregator &operator=(AvailableValueAggregator &&) = delete; |
| |
| SILValue aggregateValues(SILType LoadTy, SILValue Address, unsigned FirstElt, |
| bool isTopLevel = true); |
| bool canTake(SILType loadTy, unsigned firstElt) const; |
| |
| void print(llvm::raw_ostream &os) const; |
| void dump() const LLVM_ATTRIBUTE_USED; |
| |
| bool isTake() const { |
| return expectedOwnership == AvailableValueExpectedOwnership::Take; |
| } |
| |
| bool isBorrow() const { |
| return expectedOwnership == AvailableValueExpectedOwnership::Borrow; |
| } |
| |
| bool isCopy() const { |
| return expectedOwnership == AvailableValueExpectedOwnership::Copy; |
| } |
| |
| /// Given a load_borrow that we have aggregated a new value for, fixup the |
| /// reference counts of the intermediate copies and phis to ensure that all |
| /// forwarding operations in the CFG are strongly control equivalent (i.e. run |
| /// the same number of times). |
| void fixupOwnership(SILInstruction *load, SILValue newVal) { |
| assert(isa<LoadBorrowInst>(load) || isa<LoadInst>(load)); |
| |
| addHandOffCopyDestroysForPhis(load, newVal); |
| addMissingDestroysForCopiedValues(load, newVal); |
| } |
| |
| private: |
| SILValue aggregateFullyAvailableValue(SILType loadTy, unsigned firstElt); |
| SILValue aggregateTupleSubElts(TupleType *tt, SILType loadTy, |
| SILValue address, unsigned firstElt); |
| SILValue aggregateStructSubElts(StructDecl *sd, SILType loadTy, |
| SILValue address, unsigned firstElt); |
| SILValue handlePrimitiveValue(SILType loadTy, SILValue address, |
| unsigned firstElt); |
| bool isFullyAvailable(SILType loadTy, unsigned firstElt) const; |
| |
| |
| /// If as a result of us copying values, we may have unconsumed destroys, find |
| /// the appropriate location and place the values there. Only used when |
| /// ownership is enabled. |
| void addMissingDestroysForCopiedValues(SILInstruction *load, SILValue newVal); |
| |
| /// As a result of us using the SSA updater, insert hand off copy/destroys at |
| /// each phi and make sure that intermediate phis do not leak by inserting |
| /// destroys along paths that go through the intermediate phi that do not also |
| /// go through the |
| void addHandOffCopyDestroysForPhis(SILInstruction *load, SILValue newVal); |
| }; |
| |
| } // end anonymous namespace |
| |
| void AvailableValueAggregator::dump() const { print(llvm::dbgs()); } |
| |
| void AvailableValueAggregator::print(llvm::raw_ostream &os) const { |
| os << "Available Value List, N = " << AvailableValueList.size() |
| << ". Elts:\n"; |
| for (auto &V : AvailableValueList) { |
| os << V; |
| } |
| } |
| |
| bool AvailableValueAggregator::isFullyAvailable(SILType loadTy, |
| unsigned firstElt) const { |
| if (firstElt >= AvailableValueList.size()) { // #Elements may be zero. |
| return false; |
| } |
| |
| auto &firstVal = AvailableValueList[firstElt]; |
| |
| // Make sure that the first element is available and is the correct type. |
| if (!firstVal || firstVal.getType() != loadTy) |
| return false; |
| |
| return llvm::all_of(range(getNumSubElements( |
| loadTy, M, TypeExpansionContext(B.getFunction()))), |
| [&](unsigned index) -> bool { |
| auto &val = AvailableValueList[firstElt + index]; |
| return val.getValue() == firstVal.getValue() && |
| val.getSubElementNumber() == index; |
| }); |
| } |
| |
| // We can only take if we never have to split a larger value to promote this |
| // address. |
| bool AvailableValueAggregator::canTake(SILType loadTy, |
| unsigned firstElt) const { |
| // If we do not have ownership, we can always take since we do not need to |
| // keep any ownership invariants up to date. In the future, we should be able |
| // to chop up larger values before they are being stored. |
| if (!B.hasOwnership()) |
| return true; |
| |
| // If we are trivially fully available, just return true. |
| if (isFullyAvailable(loadTy, firstElt)) |
| return true; |
| |
| // Otherwise see if we are an aggregate with fully available leaf types. |
| if (TupleType *tt = loadTy.getAs<TupleType>()) { |
| return llvm::all_of(indices(tt->getElements()), [&](unsigned eltNo) { |
| SILType eltTy = loadTy.getTupleElementType(eltNo); |
| unsigned numSubElt = |
| getNumSubElements(eltTy, M, TypeExpansionContext(B.getFunction())); |
| bool success = canTake(eltTy, firstElt); |
| firstElt += numSubElt; |
| return success; |
| }); |
| } |
| |
| if (auto *sd = getFullyReferenceableStruct(loadTy)) { |
| return llvm::all_of(sd->getStoredProperties(), [&](VarDecl *decl) -> bool { |
| auto context = TypeExpansionContext(B.getFunction()); |
| SILType eltTy = loadTy.getFieldType(decl, M, context); |
| unsigned numSubElt = getNumSubElements(eltTy, M, context); |
| bool success = canTake(eltTy, firstElt); |
| firstElt += numSubElt; |
| return success; |
| }); |
| } |
| |
| // Otherwise, fail. The value is not fully available at its leafs. We can not |
| // perform a take. |
| return false; |
| } |
| |
| /// Given a bunch of primitive subelement values, build out the right aggregate |
| /// type (LoadTy) by emitting tuple and struct instructions as necessary. |
| SILValue AvailableValueAggregator::aggregateValues(SILType LoadTy, |
| SILValue Address, |
| unsigned FirstElt, |
| bool isTopLevel) { |
| // If we are performing a take, make sure that we have available values for |
| // /all/ of our values. Otherwise, bail. |
| if (isTopLevel && isTake() && !canTake(LoadTy, FirstElt)) { |
| return SILValue(); |
| } |
| |
| // Check to see if the requested value is fully available, as an aggregate. |
| // This is a super-common case for single-element structs, but is also a |
| // general answer for arbitrary structs and tuples as well. |
| if (SILValue Result = aggregateFullyAvailableValue(LoadTy, FirstElt)) { |
| return Result; |
| } |
| |
| // If we have a tuple type, then aggregate the tuple's elements into a full |
| // tuple value. |
| if (TupleType *tupleType = LoadTy.getAs<TupleType>()) { |
| SILValue result = |
| aggregateTupleSubElts(tupleType, LoadTy, Address, FirstElt); |
| if (isTopLevel && result.getOwnershipKind() == OwnershipKind::Guaranteed) { |
| SILValue borrowedResult = result; |
| SILBuilderWithScope builder(&*B.getInsertionPoint(), &insertedInsts); |
| result = builder.emitCopyValueOperation(Loc, borrowedResult); |
| SmallVector<BorrowedValue, 4> introducers; |
| bool foundIntroducers = |
| getAllBorrowIntroducingValues(borrowedResult, introducers); |
| (void)foundIntroducers; |
| assert(foundIntroducers); |
| for (auto value : introducers) { |
| builder.emitEndBorrowOperation(Loc, value.value); |
| } |
| } |
| return result; |
| } |
| |
| // If we have a struct type, then aggregate the struct's elements into a full |
| // struct value. |
| if (auto *structDecl = getFullyReferenceableStruct(LoadTy)) { |
| SILValue result = |
| aggregateStructSubElts(structDecl, LoadTy, Address, FirstElt); |
| if (isTopLevel && result.getOwnershipKind() == OwnershipKind::Guaranteed) { |
| SILValue borrowedResult = result; |
| SILBuilderWithScope builder(&*B.getInsertionPoint(), &insertedInsts); |
| result = builder.emitCopyValueOperation(Loc, borrowedResult); |
| SmallVector<BorrowedValue, 4> introducers; |
| bool foundIntroducers = |
| getAllBorrowIntroducingValues(borrowedResult, introducers); |
| (void)foundIntroducers; |
| assert(foundIntroducers); |
| for (auto value : introducers) { |
| builder.emitEndBorrowOperation(Loc, value.value); |
| } |
| } |
| return result; |
| } |
| |
| // Otherwise, we have a non-aggregate primitive. Load or extract the value. |
| // |
| // NOTE: We should never call this when taking since when taking we know that |
| // our underlying value is always fully available. |
| assert(!isTake()); |
| return handlePrimitiveValue(LoadTy, Address, FirstElt); |
| } |
| |
| // See if we have this value is fully available. In such a case, return it as an |
| // aggregate. This is a super-common case for single-element structs, but is |
| // also a general answer for arbitrary structs and tuples as well. |
| SILValue |
| AvailableValueAggregator::aggregateFullyAvailableValue(SILType loadTy, |
| unsigned firstElt) { |
| // Check if our underlying type is fully available. If it isn't, bail. |
| if (!isFullyAvailable(loadTy, firstElt)) |
| return SILValue(); |
| |
| // Ok, grab out first value. (note: any actually will do). |
| auto &firstVal = AvailableValueList[firstElt]; |
| |
| // Ok, we know that all of our available values are all parts of the same |
| // value. Without ownership, we can just return the underlying first value. |
| if (!B.hasOwnership()) |
| return firstVal.getValue(); |
| |
| // Otherwise, we need to put in a copy. This is b/c we only propagate along +1 |
| // values and we are eliminating a load [copy]. |
| ArrayRef<StoreInst *> insertPts = firstVal.getInsertionPoints(); |
| if (insertPts.size() == 1) { |
| // Use the scope and location of the store at the insertion point. |
| SILBuilderWithScope builder(insertPts[0], &insertedInsts); |
| SILLocation loc = insertPts[0]->getLoc(); |
| // If we have a take, just return the value. |
| if (isTake()) |
| return firstVal.getValue(); |
| // Otherwise, return a copy of the value. |
| return builder.emitCopyValueOperation(loc, firstVal.getValue()); |
| } |
| |
| // If we have multiple insertion points, put copies at each point and use the |
| // SSA updater to get a value. The reason why this is safe is that we can only |
| // have multiple insertion points if we are storing exactly the same value |
| // implying that we can just copy firstVal at each insertion point. |
| SILSSAUpdater updater(&insertedPhiNodes); |
| updater.initialize(loadTy); |
| |
| Optional<SILValue> singularValue; |
| for (auto *insertPt : insertPts) { |
| // Use the scope and location of the store at the insertion point. |
| SILBuilderWithScope builder(insertPt, &insertedInsts); |
| SILLocation loc = insertPt->getLoc(); |
| SILValue eltVal = firstVal.getValue(); |
| |
| // If we are not taking, copy the element value. |
| if (!isTake()) { |
| eltVal = builder.emitCopyValueOperation(loc, eltVal); |
| } |
| |
| if (!singularValue.hasValue()) { |
| singularValue = eltVal; |
| } else if (*singularValue != eltVal) { |
| singularValue = SILValue(); |
| } |
| |
| // And then put the value into the SSA updater. |
| updater.addAvailableValue(insertPt->getParent(), eltVal); |
| } |
| |
| // If we only are tracking a singular value, we do not need to construct |
| // SSA. Just return that value. |
| if (auto val = singularValue.getValueOr(SILValue())) { |
| // This assert documents that we are expecting that if we are in ossa, have |
| // a non-trivial value, and are not taking, we should never go down this |
| // code path. If we did, we would need to insert a copy here. The reason why |
| // we know we will never go down this code path is since we have been |
| // inserting copy_values implying that our potential singular value would be |
| // of the copy_values which are guaranteed to all be different. |
| assert((!B.hasOwnership() || isTake() || |
| val->getType().isTrivial(*B.getInsertionBB()->getParent())) && |
| "Should never reach this code path if we are in ossa and have a " |
| "non-trivial value"); |
| return val; |
| } |
| |
| // Finally, grab the value from the SSA updater. |
| SILValue result = updater.getValueInMiddleOfBlock(B.getInsertionBB()); |
| assert(result.getOwnershipKind().isCompatibleWith(OwnershipKind::Owned)); |
| if (isTake() || !B.hasOwnership()) { |
| return result; |
| } |
| |
| // Be careful with this value and insert a copy in our load block to prevent |
| // any weird control equivalence issues. |
| SILBuilderWithScope builder(&*B.getInsertionPoint(), &insertedInsts); |
| return builder.emitCopyValueOperation(Loc, result); |
| } |
| |
| SILValue AvailableValueAggregator::aggregateTupleSubElts(TupleType *TT, |
| SILType LoadTy, |
| SILValue Address, |
| unsigned FirstElt) { |
| SmallVector<SILValue, 4> ResultElts; |
| |
| for (unsigned EltNo : indices(TT->getElements())) { |
| SILType EltTy = LoadTy.getTupleElementType(EltNo); |
| unsigned NumSubElt = |
| getNumSubElements(EltTy, M, TypeExpansionContext(B.getFunction())); |
| |
| // If we are missing any of the available values in this struct element, |
| // compute an address to load from. |
| SILValue EltAddr; |
| if (anyMissing(FirstElt, NumSubElt, AvailableValueList)) { |
| assert(!isTake() && "When taking, values should never be missing?!"); |
| EltAddr = |
| B.createTupleElementAddr(Loc, Address, EltNo, EltTy.getAddressType()); |
| } |
| |
| ResultElts.push_back( |
| aggregateValues(EltTy, EltAddr, FirstElt, /*isTopLevel*/ false)); |
| FirstElt += NumSubElt; |
| } |
| |
| // If we are going to use this to promote a borrowed value, insert borrow |
| // operations. Eventually I am going to do this for everything, but this |
| // should make it easier to bring up. |
| if (!isTake()) { |
| for (unsigned i : indices(ResultElts)) { |
| ResultElts[i] = B.emitBeginBorrowOperation(Loc, ResultElts[i]); |
| } |
| } |
| |
| return B.createTuple(Loc, LoadTy, ResultElts); |
| } |
| |
| SILValue AvailableValueAggregator::aggregateStructSubElts(StructDecl *sd, |
| SILType loadTy, |
| SILValue address, |
| unsigned firstElt) { |
| SmallVector<SILValue, 4> resultElts; |
| |
| for (auto *decl : sd->getStoredProperties()) { |
| auto context = TypeExpansionContext(B.getFunction()); |
| SILType eltTy = loadTy.getFieldType(decl, M, context); |
| unsigned numSubElt = getNumSubElements(eltTy, M, context); |
| |
| // If we are missing any of the available values in this struct element, |
| // compute an address to load from. |
| SILValue eltAddr; |
| if (anyMissing(firstElt, numSubElt, AvailableValueList)) { |
| assert(!isTake() && "When taking, values should never be missing?!"); |
| eltAddr = |
| B.createStructElementAddr(Loc, address, decl, eltTy.getAddressType()); |
| } |
| |
| resultElts.push_back( |
| aggregateValues(eltTy, eltAddr, firstElt, /*isTopLevel*/ false)); |
| firstElt += numSubElt; |
| } |
| |
| if (!isTake()) { |
| for (unsigned i : indices(resultElts)) { |
| resultElts[i] = B.emitBeginBorrowOperation(Loc, resultElts[i]); |
| } |
| } |
| |
| return B.createStruct(Loc, loadTy, resultElts); |
| } |
| |
| // We have looked through all of the aggregate values and finally found a value |
| // that is not available without transforming, i.e. a "primitive value". If the |
| // value is available, use it (extracting if we need to), otherwise emit a load |
| // of the value with the appropriate qualifier. |
| SILValue AvailableValueAggregator::handlePrimitiveValue(SILType loadTy, |
| SILValue address, |
| unsigned firstElt) { |
| assert(!isTake() && "Should only take fully available values?!"); |
| |
| // If the value is not available, load the value and update our use list. |
| auto &val = AvailableValueList[firstElt]; |
| if (!val) { |
| LoadInst *load = ([&]() { |
| if (B.hasOwnership()) { |
| SILBuilderWithScope builder(&*B.getInsertionPoint(), &insertedInsts); |
| return builder.createTrivialLoadOr(Loc, address, |
| LoadOwnershipQualifier::Copy); |
| } |
| return B.createLoad(Loc, address, LoadOwnershipQualifier::Unqualified); |
| }()); |
| Uses.emplace_back(load, PMOUseKind::Load); |
| return load; |
| } |
| |
| // If we have 1 insertion point, just extract the value and return. |
| // |
| // This saves us from having to spend compile time in the SSA updater in this |
| // case. |
| ArrayRef<StoreInst *> insertPts = val.getInsertionPoints(); |
| if (insertPts.size() == 1) { |
| // Use the scope and location of the store at the insertion point. |
| SILBuilderWithScope builder(insertPts[0], &insertedInsts); |
| SILLocation loc = insertPts[0]->getLoc(); |
| SILValue eltVal = nonDestructivelyExtractSubElement(val, builder, loc); |
| assert(!builder.hasOwnership() || |
| eltVal.getOwnershipKind().isCompatibleWith(OwnershipKind::Owned)); |
| assert(eltVal->getType() == loadTy && "Subelement types mismatch"); |
| |
| if (!builder.hasOwnership()) { |
| return eltVal; |
| } |
| |
| SILBuilderWithScope builder2(&*B.getInsertionPoint(), &insertedInsts); |
| return builder2.emitCopyValueOperation(Loc, eltVal); |
| } |
| |
| // If we have an available value, then we want to extract the subelement from |
| // the borrowed aggregate before each insertion point. Note that since we have |
| // inserted copies at each of these insertion points, we know that we will |
| // never have the same value along all paths unless we have a trivial value |
| // meaning the SSA updater given a non-trivial value must /always/ be used. |
| SILSSAUpdater updater(&insertedPhiNodes); |
| updater.initialize(loadTy); |
| |
| Optional<SILValue> singularValue; |
| for (auto *i : insertPts) { |
| // Use the scope and location of the store at the insertion point. |
| SILBuilderWithScope builder(i, &insertedInsts); |
| SILLocation loc = i->getLoc(); |
| SILValue eltVal = nonDestructivelyExtractSubElement(val, builder, loc); |
| assert(!builder.hasOwnership() || |
| eltVal.getOwnershipKind().isCompatibleWith(OwnershipKind::Owned)); |
| |
| if (!singularValue.hasValue()) { |
| singularValue = eltVal; |
| } else if (*singularValue != eltVal) { |
| singularValue = SILValue(); |
| } |
| |
| updater.addAvailableValue(i->getParent(), eltVal); |
| } |
| |
| SILBasicBlock *insertBlock = B.getInsertionBB(); |
| |
| // If we are not in ossa and have a singular value or if we are in ossa and |
| // have a trivial singular value, just return that value. |
| // |
| // This can never happen for non-trivial values in ossa since we never should |
| // visit this code path if we have a take implying that non-trivial values |
| // /will/ have a copy and thus are guaranteed (since each copy yields a |
| // different value) to not be singular values. |
| if (auto val = singularValue.getValueOr(SILValue())) { |
| assert((!B.hasOwnership() || |
| val->getType().isTrivial(*insertBlock->getParent())) && |
| "Should have inserted copies for each insertion point, so shouldn't " |
| "have a singular value if non-trivial?!"); |
| return val; |
| } |
| |
| // Finally, grab the value from the SSA updater. |
| SILValue eltVal = updater.getValueInMiddleOfBlock(insertBlock); |
| assert(!B.hasOwnership() || |
| eltVal.getOwnershipKind().isCompatibleWith(OwnershipKind::Owned)); |
| assert(eltVal->getType() == loadTy && "Subelement types mismatch"); |
| if (!B.hasOwnership()) |
| return eltVal; |
| SILBuilderWithScope builder(&*B.getInsertionPoint(), &insertedInsts); |
| return builder.emitCopyValueOperation(Loc, eltVal); |
| } |
| |
| static SILInstruction * |
| getNonPhiBlockIncomingValueDef(SILValue incomingValue, |
| SingleValueInstruction *phiCopy) { |
| assert(isa<CopyValueInst>(phiCopy)); |
| auto *phiBlock = phiCopy->getParent(); |
| if (phiBlock == incomingValue->getParentBlock()) { |
| return nullptr; |
| } |
| |
| if (auto *cvi = dyn_cast<CopyValueInst>(incomingValue)) { |
| return cvi; |
| } |
| |
| assert(isa<SILPhiArgument>(incomingValue)); |
| |
| // Otherwise, our copy_value may not be post-dominated by our phi. To |
| // work around that, we need to insert destroys along the other |
| // paths. So set base to the first instruction in our argument's block, |
| // so we can insert destroys for our base. |
| return &*incomingValue->getParentBlock()->begin(); |
| } |
| |
| static bool |
| terminatorHasAnyKnownPhis(TermInst *ti, |
| ArrayRef<SILPhiArgument *> insertedPhiNodesSorted) { |
| for (auto succArgList : ti->getSuccessorBlockArgumentLists()) { |
| if (llvm::any_of(succArgList, [&](SILArgument *arg) { |
| return binary_search(insertedPhiNodesSorted, |
| cast<SILPhiArgument>(arg)); |
| })) { |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| namespace { |
| |
| class PhiNodeCopyCleanupInserter { |
| llvm::SmallMapVector<SILValue, unsigned, 8> incomingValues; |
| |
| /// Map from index -> (incomingValueIndex, copy). |
| /// |
| /// We are going to stable_sort this array using the indices of |
| /// incomingValueIndex. This will ensure that we always visit in |
| /// insertion order our incoming values (since the indices we are |
| /// sorting by are the count of incoming values we have seen so far |
| /// when we see the incoming value) and maintain the internal |
| /// insertion sort within our range as well. This ensures that we |
| /// visit our incoming values in visitation order and that within |
| /// their own values, also visit them in visitation order with |
| /// respect to each other. |
| SmallFrozenMultiMap<unsigned, SingleValueInstruction *, 16> copiesToCleanup; |
| |
| /// The lifetime frontier that we use to compute lifetime endpoints |
| /// when emitting cleanups. |
| ValueLifetimeAnalysis::Frontier lifetimeFrontier; |
| |
| public: |
| PhiNodeCopyCleanupInserter() = default; |
| |
| void trackNewCleanup(SILValue incomingValue, CopyValueInst *copy) { |
| auto entry = std::make_pair(incomingValue, incomingValues.size()); |
| auto iter = incomingValues.insert(entry); |
| // If we did not succeed, then iter.first.second is the index of |
| // incoming value. Otherwise, it will be nextIndex. |
| copiesToCleanup.insert(iter.first->second, copy); |
| } |
| |
| void emit(DeadEndBlocks &deadEndBlocks) &&; |
| }; |
| |
| } // end anonymous namespace |
| |
| void PhiNodeCopyCleanupInserter::emit(DeadEndBlocks &deadEndBlocks) && { |
| // READ THIS: We are being very careful here to avoid allowing for |
| // non-determinism to enter here. |
| // |
| // 1. First we create a list of indices of our phi node data. Then we use a |
| // stable sort those indices into the order in which our phi node cleanups |
| // would be in if we compared just using incomingValues. We use a stable |
| // sort here to ensure that within the same "cohort" of values, our order |
| // is insertion order. |
| // |
| // 2. We go through the list of phiNodeCleanupStates in insertion order. We |
| // also maintain a set of already visited base values. When we visit the |
| // first phiNodeCleanupState for a specific phi, we process the phi |
| // then. This ensures that we always process the phis in insertion order as |
| // well. |
| copiesToCleanup.setFrozen(); |
| |
| for (auto keyValue : copiesToCleanup.getRange()) { |
| unsigned incomingValueIndex = keyValue.first; |
| auto copies = keyValue.second; |
| |
| SILValue incomingValue = |
| std::next(incomingValues.begin(), incomingValueIndex)->first; |
| SingleValueInstruction *phiCopy = copies.front(); |
| auto *insertPt = getNonPhiBlockIncomingValueDef(incomingValue, phiCopy); |
| auto loc = RegularLocation::getAutoGeneratedLocation(); |
| |
| // Before we do anything, see if we have a single cleanup state. In such a |
| // case, we could have that we have a phi node as an incoming value and a |
| // copy_value in that same block. In such a case, we want to just insert the |
| // copy and continue. This means that |
| // cleanupState.getNonPhiBlockIncomingValueDef() should always return a |
| // non-null value in the code below. |
| if (copies.size() == 1 && isa<SILArgument>(incomingValue) && !insertPt) { |
| SILBasicBlock *phiBlock = phiCopy->getParent(); |
| SILBuilderWithScope builder(phiBlock->getTerminator()); |
| builder.createDestroyValue(loc, incomingValue); |
| continue; |
| } |
| |
| // Otherwise, we know that we have for this incomingValue, multiple |
| // potential insert pts that we need to handle at the same time with our |
| // lifetime query. Lifetime extend our base over these copy_value uses. |
| assert(lifetimeFrontier.empty()); |
| auto *def = getNonPhiBlockIncomingValueDef(incomingValue, phiCopy); |
| assert(def && "Should never have a nullptr here since we handled all of " |
| "the single block cases earlier"); |
| ValueLifetimeAnalysis analysis(def, copies); |
| bool foundCriticalEdges = !analysis.computeFrontier( |
| lifetimeFrontier, ValueLifetimeAnalysis::DontModifyCFG, &deadEndBlocks); |
| (void)foundCriticalEdges; |
| assert(!foundCriticalEdges); |
| |
| while (!lifetimeFrontier.empty()) { |
| auto *insertPoint = lifetimeFrontier.pop_back_val(); |
| SILBuilderWithScope builder(insertPoint); |
| builder.createDestroyValue(loc, incomingValue); |
| } |
| } |
| } |
| |
| void AvailableValueAggregator::addHandOffCopyDestroysForPhis( |
| SILInstruction *load, SILValue newVal) { |
| assert(isa<LoadBorrowInst>(load) || isa<LoadInst>(load)); |
| |
| SmallPtrSet<SILBasicBlock *, 8> visitedBlocks; |
| SmallVector<SILBasicBlock *, 8> leakingBlocks; |
| SmallVector<std::pair<SILBasicBlock *, SILValue>, 8> incomingValues; |
| auto loc = RegularLocation::getAutoGeneratedLocation(); |
| |
| #ifndef NDEBUG |
| LLVM_DEBUG(llvm::dbgs() << "Inserted Phis!\n"); |
| for (auto *phi : insertedPhiNodes) { |
| LLVM_DEBUG(llvm::dbgs() << "Phi: " << *phi); |
| } |
| #endif |
| |
| // Before we begin, identify the offset for all phis that are intermediate |
| // phis inserted by the SSA updater. We are taking advantage of the fact that |
| // the SSA updater just constructs the web without knowledge of ownership. So |
| // if a phi node is only used by another phi node that we inserted, then we |
| // have an intermediate phi node. |
| // |
| // TODO: There should be a better way of doing this than doing a copy + sort. |
| SmallVector<SILPhiArgument *, 32> insertedPhiNodesSorted; |
| llvm::copy(insertedPhiNodes, std::back_inserter(insertedPhiNodesSorted)); |
| llvm::sort(insertedPhiNodesSorted); |
| |
| SmallBitVector intermediatePhiOffsets(insertedPhiNodes.size()); |
| for (unsigned i : indices(insertedPhiNodes)) { |
| if (TermInst *termInst = |
| insertedPhiNodes[i]->getSingleUserOfType<TermInst>()) { |
| // Only set the value if we find termInst has a successor with a phi node |
| // in our insertedPhiNodes. |
| if (terminatorHasAnyKnownPhis(termInst, insertedPhiNodesSorted)) { |
| intermediatePhiOffsets.set(i); |
| } |
| } |
| } |
| |
| // First go through all of our phi nodes doing the following: |
| // |
| // 1. If any of the phi node have a copy_value as an operand, we know that the |
| // copy_value does not dominate our final definition since otherwise the |
| // SSA updater would not have inserted a phi node here. In such a case |
| // since we may not have that the copy_value is post-dominated by the phi, |
| // we need to insert a copy_value at the phi to allow for post-domination |
| // and then use the ValueLifetimeChecker to determine the rest of the |
| // frontier for the base value. |
| // |
| // 2. If our phi node is used by another phi node, we run into a similar |
| // problem where we could have that our original phi node does not dominate |
| // our final definition (since the SSA updater would not have inserted the |
| // phi) and may not be strongly control dependent on our phi. To work |
| // around this problem, we insert at the phi a copy_value to allow for the |
| // phi to post_dominate its copy and then extend the lifetime of the phied |
| // value over that copy. |
| // |
| // As an extra complication to this, when we insert compensating releases for |
| // any copy_values from (1), we need to insert the destroy_value on "base |
| // values" (either a copy_value or the first instruction of a phi argument's |
| // block) /after/ we have found all of the base_values to ensure that if the |
| // same base value is used by multiple phis, we do not insert too many destroy |
| // value. |
| // |
| // NOTE: At first glance one may think that such a problem could not occur |
| // with phi nodes as well. Sadly if we allow for double backedge loops, it is |
| // possible (there may be more cases). |
| PhiNodeCopyCleanupInserter cleanupInserter; |
| |
| for (unsigned i : indices(insertedPhiNodes)) { |
| auto *phi = insertedPhiNodes[i]; |
| |
| // If our phi is not owned, continue. No fixes are needed. |
| if (phi->getOwnershipKind() != OwnershipKind::Owned) |
| continue; |
| |
| LLVM_DEBUG(llvm::dbgs() << "Visiting inserted phi: " << *phi); |
| // Otherwise, we have a copy_value that may not be strongly control |
| // equivalent with our phi node. In such a case, we need to use |
| // ValueLifetimeAnalysis to lifetime extend the copy such that we can |
| // produce a new copy_value at the phi. We insert destroys along the |
| // frontier. |
| visitedBlocks.clear(); |
| leakingBlocks.clear(); |
| incomingValues.clear(); |
| |
| phi->getIncomingPhiValues(incomingValues); |
| unsigned phiIndex = phi->getIndex(); |
| for (auto pair : incomingValues) { |
| SILValue value = pair.second; |
| |
| // If we had a non-trivial type with non-owned ownership, we will not see |
| // a copy_value, so skip them here. |
| if (value.getOwnershipKind() != OwnershipKind::Owned) |
| continue; |
| |
| // Otherwise, value should be from a copy_value or a phi node. |
| assert(isa<CopyValueInst>(value) || isa<SILPhiArgument>(value)); |
| |
| // If we have a copy_value, remove it from the inserted insts set so we |
| // skip it when we start processing insertedInstrs. |
| if (auto *cvi = dyn_cast<CopyValueInst>(value)) { |
| copyValueProcessedWithPhiNodes.insert(cvi); |
| |
| // Then check if our termInst is in the same block as our copy_value. In |
| // such a case, we can just use the copy_value as our phi's value |
| // without needing to worry about any issues around control equivalence. |
| if (pair.first == cvi->getParent()) |
| continue; |
| } else { |
| assert(isa<SILPhiArgument>(value)); |
| } |
| |
| // Otherwise, insert a copy_value instruction right before the phi. We use |
| // that for our actual phi. |
| auto *termInst = pair.first->getTerminator(); |
| SILBuilderWithScope builder(termInst); |
| CopyValueInst *phiCopy = builder.createCopyValue(loc, value); |
| termInst->setOperand(phiIndex, phiCopy); |
| |
| // Now that we know our base, phi, phiCopy for this specific incoming |
| // value, append it to the phiNodeClenaupState so we can insert |
| // destroy_values late after we visit all insertedPhiNodes. |
| cleanupInserter.trackNewCleanup(value, phiCopy); |
| } |
| |
| // Then see if our phi is an intermediate phi. If it is an intermediate phi, |
| // we know that this is not the phi node that is post-dominated by the |
| // load_borrow and that we will lifetime extend it via the child |
| // phi. Instead, we need to just ensure that our phi arg does not leak onto |
| // its set of post-dominating paths, subtracting from that set the path |
| // through our terminator use. |
| if (intermediatePhiOffsets[i]) { |
| continue; |
| } |
| |
| // If we reach this point, then we know that we are a phi node that actually |
| // dominates our user so we need to lifetime extend it over the |
| // load_borrow. Thus insert copy_value along the incoming edges and then |
| // lifetime extend the phi node over the load_borrow. |
| // |
| // The linear lifetime checker doesn't care if the passed in load is |
| // actually a user of our copy_value. What we care about is that the load is |
| // guaranteed to be in the block where we have reformed the tuple in a |
| // consuming manner. This means if we add it as the consuming use of the |
| // copy, we can find the leaking places if any exist. |
| // |
| // Then perform the linear lifetime check. If we succeed, continue. We have |
| // no further work to do. |
| auto *loadOperand = &load->getAllOperands()[0]; |
| LinearLifetimeChecker checker(visitedBlocks, deadEndBlocks); |
| bool consumedInLoop = checker.completeConsumingUseSet( |
| phi, loadOperand, [&](SILBasicBlock::iterator iter) { |
| SILBuilderWithScope builder(iter); |
| builder.emitDestroyValueOperation(loc, phi); |
| }); |
| |
| // Ok, we found some leaking blocks and potentially that our load is |
| // "consumed" inside a different loop in the loop nest from cvi. If we are |
| // consumed in the loop, then our visit should have inserted all of the |
| // necessary destroys for us by inserting the destroys on the loop |
| // boundaries. So, continue. |
| // |
| // NOTE: This includes cases where due to an infinite loop, we did not |
| // insert /any/ destroys since the loop has no boundary in a certain sense. |
| if (consumedInLoop) { |
| continue; |
| } |
| |
| // Otherwise, we need to insert one last destroy after the load for our phi. |
| auto next = std::next(load->getIterator()); |
| SILBuilderWithScope builder(next); |
| builder.emitDestroyValueOperation(next->getLoc(), phi); |
| } |
| |
| // Alright! In summary, we just lifetime extended all of our phis, |
| // lifetime extended them to the load block, and inserted phi copies |
| // at all of our intermediate phi nodes. Now we need to cleanup and |
| // insert all of the compensating destroy_value that we need. |
| std::move(cleanupInserter).emit(deadEndBlocks); |
| |
| // Clear the phi node array now that we are done. |
| insertedPhiNodes.clear(); |
| } |
| |
| void AvailableValueAggregator::addMissingDestroysForCopiedValues( |
| SILInstruction *load, SILValue newVal) { |
| assert(B.hasOwnership() && |
| "We assume this is only called if we have ownership"); |
| |
| SmallPtrSet<SILBasicBlock *, 8> visitedBlocks; |
| SmallVector<SILBasicBlock *, 8> leakingBlocks; |
| auto loc = RegularLocation::getAutoGeneratedLocation(); |
| |
| for (auto *inst : insertedInsts) { |
| // Otherwise, see if this is a load [copy]. It if it a load [copy], then we |
| // know that the load [copy] must be in the load block meaing we can just |
| // put a destroy_value /after/ the load_borrow to ensure that the value |
| // lives long enough for us to copy_value it or a derived value for the |
| // begin_borrow. |
| if (auto *li = dyn_cast<LoadInst>(inst)) { |
| if (li->getOwnershipQualifier() == LoadOwnershipQualifier::Copy) { |
| assert(li->getParent() == load->getParent()); |
| auto next = std::next(load->getIterator()); |
| SILBuilderWithScope builder(next); |
| builder.emitDestroyValueOperation(next->getLoc(), li); |
| continue; |
| } |
| } |
| |
| // Our copy_value may have been unset above if it was used by a phi |
| // (implying it does not dominate our final user). |
| auto *cvi = dyn_cast<CopyValueInst>(inst); |
| if (!cvi) |
| continue; |
| |
| // If we already handled this copy_value above when handling phi nodes, just |
| // continue. |
| if (copyValueProcessedWithPhiNodes.count(cvi)) |
| continue; |
| |
| // Clear our state. |
| visitedBlocks.clear(); |
| leakingBlocks.clear(); |
| |
| // The linear lifetime checker doesn't care if the passed in load is |
| // actually a user of our copy_value. What we care about is that the load is |
| // guaranteed to be in the block where we have reformed the tuple in a |
| // consuming manner. This means if we add it as the consuming use of the |
| // copy, we can find the leaking places if any exist. |
| // |
| // Then perform the linear lifetime check. If we succeed, continue. We have |
| // no further work to do. |
| auto *loadOperand = &load->getAllOperands()[0]; |
| LinearLifetimeChecker checker(visitedBlocks, deadEndBlocks); |
| bool consumedInLoop = checker.completeConsumingUseSet( |
| cvi, loadOperand, [&](SILBasicBlock::iterator iter) { |
| SILBuilderWithScope builder(iter); |
| builder.emitDestroyValueOperation(loc, cvi); |
| }); |
| |
| // Ok, we found some leaking blocks and potentially that our load is |
| // "consumed" inside a different loop in the loop nest from cvi. If we are |
| // consumed in the loop, then our visit should have inserted all of the |
| // necessary destroys for us by inserting the destroys on the loop |
| // boundaries. So, continue. |
| // |
| // NOTE: This includes cases where due to an infinite loop, we did not |
| // insert /any/ destroys since the loop has no boundary in a certain sense. |
| if (consumedInLoop) { |
| continue; |
| } |
| |
| // Otherwise, we need to insert one last destroy after the load for our phi. |
| auto next = std::next(load->getIterator()); |
| SILBuilderWithScope builder(next); |
| builder.emitDestroyValueOperation(next->getLoc(), cvi); |
| } |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Available Value Dataflow |
| //===----------------------------------------------------------------------===// |
| |
| namespace { |
| |
| /// Given a piece of memory, the memory's uses, and destroys perform a single |
| /// round of semi-optimistic backwards dataflow for each use. The result is the |
| /// set of available values that reach the specific use of the field in the |
| /// allocated object. |
| /// |
| /// The general form of the algorithm is that in our constructor, we analyze our |
| /// uses and determine available values. Then users call computeAvailableValues |
| /// which looks backwards up the control flow graph for available values that we |
| /// can use. |
| /// |
| /// NOTE: The reason why we say that the algorithm is semi-optimistic is that we |
| /// assume that all incoming elements into a loopheader will be the same. If we |
| /// find a conflict, we record it and fail. |
| class AvailableValueDataflowContext { |
| /// The base memory we are performing dataflow upon. |
| AllocationInst *TheMemory; |
| |
| /// The number of sub elements of our memory. |
| unsigned NumMemorySubElements; |
| |
| /// The set of uses that we are tracking. This is only here so we can update |
| /// when exploding copy_addr. It would be great if we did not have to store |
| /// this. |
| SmallVectorImpl<PMOMemoryUse> &Uses; |
| |
| /// The set of blocks with local definitions. |
| /// |
| /// We use this to determine if we should visit a block or look at a block's |
| /// predecessors during dataflow for an available value. |
| llvm::SmallPtrSet<SILBasicBlock *, 32> HasLocalDefinition; |
| |
| /// The set of blocks that have definitions which specifically "kill" the |
| /// given value. If a block is in this set, there must be an instruction in |
| /// LoadTakeUse whose parent is the block. This is just used to speed up |
| /// computation. |
| /// |
| /// NOTE: These are not considered escapes. |
| llvm::SmallPtrSet<SILBasicBlock *, 32> HasLocalKill; |
| |
| /// This is a set of load takes that we are tracking. HasLocalKill is the set |
| /// of parent blocks of these instructions. |
| llvm::SmallPtrSet<SILInstruction *, 8> LoadTakeUses; |
| |
| /// This is a map of uses that are not loads (i.e., they are Stores, |
| /// InOutUses, and Escapes), to their entry in Uses. |
| llvm::SmallDenseMap<SILInstruction *, unsigned, 16> NonLoadUses; |
| |
| /// Does this value escape anywhere in the function. We use this very |
| /// conservatively. |
| bool HasAnyEscape = false; |
| |
| public: |
| AvailableValueDataflowContext(AllocationInst *TheMemory, |
| unsigned NumMemorySubElements, |
| SmallVectorImpl<PMOMemoryUse> &Uses); |
| |
| /// Try to compute available values for "TheMemory" at the instruction \p |
| /// StartingFrom. We only compute the values for set bits in \p |
| /// RequiredElts. We return the vailable values in \p Result. If any available |
| /// values were found, return true. Otherwise, return false. |
| bool computeAvailableValues(SILInstruction *StartingFrom, |
| unsigned FirstEltOffset, |
| unsigned NumLoadSubElements, |
| SmallBitVector &RequiredElts, |
| SmallVectorImpl<AvailableValue> &Result); |
| |
| /// Return true if the box has escaped at the specified instruction. We are |
| /// not |
| /// allowed to do load promotion in an escape region. |
| bool hasEscapedAt(SILInstruction *I); |
| |
| /// Explode a copy_addr, updating the Uses at the same time. |
| void explodeCopyAddr(CopyAddrInst *CAI); |
| |
| private: |
| SILModule &getModule() const { return TheMemory->getModule(); } |
| |
| void updateAvailableValues(SILInstruction *Inst, |
| SmallBitVector &RequiredElts, |
| SmallVectorImpl<AvailableValue> &Result, |
| SmallBitVector &ConflictingValues); |
| void computeAvailableValuesFrom( |
| SILBasicBlock::iterator StartingFrom, SILBasicBlock *BB, |
| SmallBitVector &RequiredElts, |
| SmallVectorImpl<AvailableValue> &Result, |
| llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32> |
| &VisitedBlocks, |
| SmallBitVector &ConflictingValues); |
| }; |
| |
| } // end anonymous namespace |
| |
| AvailableValueDataflowContext::AvailableValueDataflowContext( |
| AllocationInst *InputTheMemory, unsigned NumMemorySubElements, |
| SmallVectorImpl<PMOMemoryUse> &InputUses) |
| : TheMemory(InputTheMemory), NumMemorySubElements(NumMemorySubElements), |
| Uses(InputUses) { |
| // The first step of processing an element is to collect information about the |
| // element into data structures we use later. |
| for (unsigned ui : indices(Uses)) { |
| auto &Use = Uses[ui]; |
| assert(Use.Inst && "No instruction identified?"); |
| |
| // If we have a load... |
| if (Use.Kind == PMOUseKind::Load) { |
| // Skip load borrow use and open_existential_addr. |
| if (isa<LoadBorrowInst>(Use.Inst) || isa<OpenExistentialAddrInst>(Use.Inst)) |
| continue; |
| |
| // That is not a load take, continue. Otherwise, stash the load [take]. |
| if (auto *LI = dyn_cast<LoadInst>(Use.Inst)) { |
| if (LI->getOwnershipQualifier() == LoadOwnershipQualifier::Take) { |
| LoadTakeUses.insert(LI); |
| HasLocalKill.insert(LI->getParent()); |
| } |
| continue; |
| } |
| |
| // If we have a copy_addr as our load, it means we are processing a source |
| // of the value. If the copy_addr is taking from the source, we need to |
| // treat it like a load take use. |
| if (auto *CAI = dyn_cast<CopyAddrInst>(Use.Inst)) { |
| if (CAI->isTakeOfSrc() == IsTake) { |
| LoadTakeUses.insert(CAI); |
| HasLocalKill.insert(CAI->getParent()); |
| } |
| continue; |
| } |
| |
| llvm_unreachable("Unhandled SILInstructionKind for PMOUseKind::Load?!"); |
| } |
| |
| // Keep track of all the uses that aren't loads. |
| NonLoadUses[Use.Inst] = ui; |
| HasLocalDefinition.insert(Use.Inst->getParent()); |
| |
| if (Use.Kind == PMOUseKind::Escape) { |
| // Determine which blocks the value can escape from. We aren't allowed to |
| // promote loads in blocks reachable from an escape point. |
| HasAnyEscape = true; |
| } |
| } |
| |
| // If isn't really a use, but we account for the alloc_box/mark_uninitialized |
| // as a use so we see it in our dataflow walks. |
| NonLoadUses[TheMemory] = ~0U; |
| HasLocalDefinition.insert(TheMemory->getParent()); |
| } |
| |
| // This function takes in the current (potentially uninitialized) available |
| // values for theMemory and for the subset of AvailableValues corresponding to |
| // \p address either: |
| // |
| // 1. If uninitialized, optionally initialize the available value with a new |
| // SILValue. It is optional since in certain cases, (for instance when |
| // invalidating one just wants to skip empty available values). |
| // |
| // 2. Given an initialized value, either add the given instruction as an |
| // insertion point or state that we have a conflict. |
| static inline void updateAvailableValuesHelper( |
| SingleValueInstruction *theMemory, SILInstruction *inst, SILValue address, |
| SmallBitVector &requiredElts, SmallVectorImpl<AvailableValue> &result, |
| SmallBitVector &conflictingValues, |
| function_ref<Optional<AvailableValue>(unsigned)> defaultFunc, |
| function_ref<bool(AvailableValue &, unsigned)> isSafeFunc) { |
| auto &mod = theMemory->getModule(); |
| unsigned startSubElt = computeSubelement(address, theMemory); |
| |
| // TODO: Is this needed now? |
| assert(startSubElt != ~0U && "Store within enum projection not handled"); |
| for (unsigned i : range(getNumSubElements( |
| address->getType().getObjectType(), mod, |
| TypeExpansionContext(*theMemory->getFunction())))) { |
| // If this element is not required, don't fill it in. |
| if (!requiredElts[startSubElt + i]) |
| continue; |
| |
| // At this point we know that we will either mark the value as conflicting |
| // or give it a value. |
| requiredElts[startSubElt + i] = false; |
| |
| // First see if we have an entry at all. |
| auto &entry = result[startSubElt + i]; |
| |
| // If we don't... |
| if (!entry) { |
| // and we are told to initialize it, do so. |
| if (auto defaultValue = defaultFunc(i)) { |
| entry = std::move(defaultValue.getValue()); |
| } else { |
| // Otherwise, mark this as a conflicting value. There is some available |
| // value here, we just do not know what it is at this point. This |
| // ensures that if we visit a kill where we do not have an entry yet, we |
| // properly invalidate our state. |
| conflictingValues[startSubElt + i] = true; |
| } |
| continue; |
| } |
| |
| // Check if our caller thinks that the value currently in entry is |
| // compatible with \p inst. If not, mark the values as conflicting and |
| // continue. |
| if (!isSafeFunc(entry, i)) { |
| conflictingValues[startSubElt + i] = true; |
| continue; |
| } |
| |
| // Otherwise, we found another insertion point for our available |
| // value. Today this will always be a Store. |
| entry.addInsertionPoint(cast<StoreInst>(inst)); |
| } |
| } |
| |
| void AvailableValueDataflowContext::updateAvailableValues( |
| SILInstruction *Inst, SmallBitVector &RequiredElts, |
| SmallVectorImpl<AvailableValue> &Result, |
| SmallBitVector &ConflictingValues) { |
| |
| // If we are visiting a load [take], it invalidates the underlying available |
| // values. |
| // |
| // NOTE: Since we are always looking back from the instruction to promote, |
| // when we attempt to promote the load [take] itself, we will never hit this |
| // code since. |
| if (auto *LI = dyn_cast<LoadInst>(Inst)) { |
| // First see if this is a load inst that we are tracking. |
| if (LoadTakeUses.count(LI)) { |
| updateAvailableValuesHelper(TheMemory, LI, LI->getOperand(), RequiredElts, |
| Result, ConflictingValues, |
| /*default*/ |
| [](unsigned) -> Optional<AvailableValue> { |
| // We never initialize values. We only |
| // want to invalidate. |
| return None; |
| }, |
| /*isSafe*/ |
| [](AvailableValue &, unsigned) -> bool { |
| // Always assume values conflict. |
| return false; |
| }); |
| return; |
| } |
| } |
| |
| // Handle store. |
| if (auto *SI = dyn_cast<StoreInst>(Inst)) { |
| updateAvailableValuesHelper( |
| TheMemory, SI, SI->getDest(), RequiredElts, Result, ConflictingValues, |
| /*default*/ |
| [&](unsigned ResultOffset) -> Optional<AvailableValue> { |
| Optional<AvailableValue> Result; |
| Result.emplace(SI->getSrc(), ResultOffset, SI); |
| return Result; |
| }, |
| /*isSafe*/ |
| [&](AvailableValue &Entry, unsigned ResultOffset) -> bool { |
| // TODO: This is /really/, /really/, conservative. This basically |
| // means that if we do not have an identical store, we will not |
| // promote. |
| return Entry.getValue() == SI->getSrc() && |
| Entry.getSubElementNumber() == ResultOffset; |
| }); |
| return; |
| } |
| |
| // If we got here from an apply, we must either be initializing the element |
| // via an @out parameter or we are trying to model an invalidating load of the |
| // value (e.x.: indirect_in, indirect_inout). |
| |
| // If we get here with a copy_addr, we must either be storing into the element |
| // or tracking some sort of take of the src. First check if we are taking (in |
| // which case, we just track invalidation of src) and continue. Otherwise we |
| // must be storing into the copy_addr so see which loaded subelements are |
| // being used, and if so, explode the copy_addr to its individual pieces. |
| if (auto *CAI = dyn_cast<CopyAddrInst>(Inst)) { |
| // If we have a load take use, we must be tracking a store of CAI. |
| if (LoadTakeUses.count(CAI)) { |
| updateAvailableValuesHelper(TheMemory, CAI, CAI->getSrc(), RequiredElts, |
| Result, ConflictingValues, |
| /*default*/ |
| [](unsigned) -> Optional<AvailableValue> { |
| // We never give values default initialized |
| // values. We only want to invalidate. |
| return None; |
| }, |
| /*isSafe*/ |
| [](AvailableValue &, unsigned) -> bool { |
| // Always assume values conflict. |
| return false; |
| }); |
| return; |
| } |
| |
| unsigned StartSubElt = computeSubelement(CAI->getDest(), TheMemory); |
| assert(StartSubElt != ~0U && "Store within enum projection not handled"); |
| SILType ValTy = CAI->getDest()->getType(); |
| |
| bool AnyRequired = false; |
| for (unsigned i : range(getNumSubElements( |
| ValTy, getModule(), TypeExpansionContext(*CAI->getFunction())))) { |
| // If this element is not required, don't fill it in. |
| AnyRequired = RequiredElts[StartSubElt+i]; |
| if (AnyRequired) break; |
| } |
| |
| // If this is a copy addr that doesn't intersect the loaded subelements, |
| // just continue with an unmodified load mask. |
| if (!AnyRequired) |
| return; |
| |
| // If the copyaddr is of a non-loadable type, we can't promote it. Just |
| // consider it to be a clobber. |
| if (CAI->getSrc()->getType().isLoadable(*CAI->getFunction())) { |
| // Otherwise, some part of the copy_addr's value is demanded by a load, so |
| // we need to explode it to its component pieces. This only expands one |
| // level of the copyaddr. |
| explodeCopyAddr(CAI); |
| |
| // The copy_addr doesn't provide any values, but we've arranged for our |
| // iterators to visit the newly generated instructions, which do. |
| return; |
| } |
| } |
| |
| // TODO: inout apply's should only clobber pieces passed in. |
| |
| // Otherwise, this is some unknown instruction, conservatively assume that all |
| // values are clobbered. |
| RequiredElts.clear(); |
| ConflictingValues = SmallBitVector(Result.size(), true); |
| return; |
| } |
| |
| bool AvailableValueDataflowContext::computeAvailableValues( |
| SILInstruction *StartingFrom, unsigned FirstEltOffset, |
| unsigned NumLoadSubElements, SmallBitVector &RequiredElts, |
| SmallVectorImpl<AvailableValue> &Result) { |
| llvm::SmallDenseMap<SILBasicBlock*, SmallBitVector, 32> VisitedBlocks; |
| SmallBitVector ConflictingValues(Result.size()); |
| |
| computeAvailableValuesFrom(StartingFrom->getIterator(), |
| StartingFrom->getParent(), RequiredElts, Result, |
| VisitedBlocks, ConflictingValues); |
| // If there are no values available at this load point, then we fail to |
| // promote this load and there is nothing to do. |
| SmallBitVector AvailableValueIsPresent(NumMemorySubElements); |
| |
| for (unsigned i : |
| range(FirstEltOffset, FirstEltOffset + NumLoadSubElements)) { |
| AvailableValueIsPresent[i] = Result[i].getValue(); |
| } |
| |
| // If we do not have any values available, bail. |
| if (AvailableValueIsPresent.none()) |
| return false; |
| |
| // Otherwise, if we have any conflicting values, explicitly mask them out of |
| // the result, so we don't pick one arbitrary available value. |
| if (ConflictingValues.none()) { |
| return true; |
| } |
| |
| // At this point, we know that we have /some/ conflicting values and some |
| // available values. |
| if (AvailableValueIsPresent.reset(ConflictingValues).none()) |
| return false; |
| |
| // Otherwise, mask out the available values and return true. We have at least |
| // 1 available value. |
| int NextIter = ConflictingValues.find_first(); |
| while (NextIter != -1) { |
| assert(NextIter >= 0 && "Int can not be represented?!"); |
| unsigned Iter = NextIter; |
| Result[Iter] = {}; |
| NextIter = ConflictingValues.find_next(Iter); |
| } |
| |
| return true; |
| } |
| |
| void AvailableValueDataflowContext::computeAvailableValuesFrom( |
| SILBasicBlock::iterator StartingFrom, SILBasicBlock *BB, |
| SmallBitVector &RequiredElts, SmallVectorImpl<AvailableValue> &Result, |
| llvm::SmallDenseMap<SILBasicBlock *, SmallBitVector, 32> |
| &VisitedBlocks, |
| SmallBitVector &ConflictingValues) { |
| assert(!RequiredElts.none() && "Scanning with a goal of finding nothing?"); |
| |
| // If there is a potential modification in the current block, scan the block |
| // to see if the store, escape, or load [take] is before or after the load. If |
| // it is before, check to see if it produces the value we are looking for. |
| bool shouldCheckBlock = |
| HasLocalDefinition.count(BB) || HasLocalKill.count(BB); |
| if (shouldCheckBlock) { |
| for (SILBasicBlock::iterator BBI = StartingFrom; BBI != BB->begin();) { |
| SILInstruction *TheInst = &*std::prev(BBI); |
| // If this instruction is unrelated to the element, ignore it. |
| if (!NonLoadUses.count(TheInst) && !LoadTakeUses.count(TheInst)) { |
| --BBI; |
| continue; |
| } |
| |
| // Given an interesting instruction, incorporate it into the set of |
| // results, and filter down the list of demanded subelements that we still |
| // need. |
| updateAvailableValues(TheInst, RequiredElts, Result, ConflictingValues); |
| |
| // If this satisfied all of the demanded values, we're done. |
| if (RequiredElts.none()) |
| return; |
| |
| // Otherwise, keep scanning the block. If the instruction we were looking |
| // at just got exploded, don't skip the next instruction. |
| if (&*std::prev(BBI) == TheInst) |
| --BBI; |
| } |
| } |
| |
| // Otherwise, we need to scan up the CFG looking for available values. |
| for (auto PI = BB->pred_begin(), E = BB->pred_end(); PI != E; ++PI) { |
| SILBasicBlock *PredBB = *PI; |
| |
| // If the predecessor block has already been visited (potentially due to a |
| // cycle in the CFG), don't revisit it. We can do this safely because we |
| // are optimistically assuming that all incoming elements in a cycle will be |
| // the same. If we ever detect a conflicting element, we record it and do |
| // not look at the result. |
| auto Entry = VisitedBlocks.insert({PredBB, RequiredElts}); |
| if (!Entry.second) { |
| // If we are revisiting a block and asking for different required elements |
| // then anything that isn't agreeing is in conflict. |
| const auto &PrevRequired = Entry.first->second; |
| if (PrevRequired != RequiredElts) { |
| ConflictingValues |= (PrevRequired ^ RequiredElts); |
| |
| RequiredElts &= ~ConflictingValues; |
| if (RequiredElts.none()) |
| return; |
| } |
| continue; |
| } |
| |
| // Make sure to pass in the same set of required elements for each pred. |
| SmallBitVector Elts = RequiredElts; |
| computeAvailableValuesFrom(PredBB->end(), PredBB, Elts, Result, |
| VisitedBlocks, ConflictingValues); |
| |
| // If we have any conflicting values, don't bother searching for them. |
| RequiredElts &= ~ConflictingValues; |
| if (RequiredElts.none()) |
| return; |
| } |
| } |
| |
| /// Explode a copy_addr instruction of a loadable type into lower level |
| /// operations like loads, stores, retains, releases, retain_value, etc. |
| void AvailableValueDataflowContext::explodeCopyAddr(CopyAddrInst *CAI) { |
| LLVM_DEBUG(llvm::dbgs() << " -- Exploding copy_addr: " << *CAI << "\n"); |
| |
| SILType ValTy = CAI->getDest()->getType().getObjectType(); |
| |
| SILFunction *F = CAI->getFunction(); |
| auto &TL = F->getTypeLowering(ValTy); |
| |
| // Keep track of the new instructions emitted. |
| SmallVector<SILInstruction *, 4> NewInsts; |
| SILBuilder B(CAI, &NewInsts); |
| B.setCurrentDebugScope(CAI->getDebugScope()); |
| |
| // Use type lowering to lower the copyaddr into a load sequence + store |
| // sequence appropriate for the type. |
| SILValue StoredValue = |
| TL.emitLoadOfCopy(B, CAI->getLoc(), CAI->getSrc(), CAI->isTakeOfSrc()); |
| |
| TL.emitStoreOfCopy(B, CAI->getLoc(), StoredValue, CAI->getDest(), |
| CAI->isInitializationOfDest()); |
| |
| // Update our internal state for this being gone. |
| NonLoadUses.erase(CAI); |
| LoadTakeUses.erase(CAI); |
| // NOTE: We do not need to update HasLocalKill since the copy_addr |
| // and the loads/stores will have the same parent block. |
| |
| // Remove the copy_addr from Uses. A single copy_addr can appear multiple |
| // times if the source and dest are to elements within a single aggregate, but |
| // we only want to pick up the CopyAddrKind from the store. |
| PMOMemoryUse LoadUse, StoreUse; |
| for (auto &Use : Uses) { |
| if (Use.Inst != CAI) |
| continue; |
| |
| if (Use.Kind == PMOUseKind::Load) { |
| assert(LoadUse.isInvalid()); |
| LoadUse = Use; |
| } else { |
| assert(StoreUse.isInvalid()); |
| StoreUse = Use; |
| } |
| |
| Use.Inst = nullptr; |
| |
| // Keep scanning in case the copy_addr appears multiple times. |
| } |
| |
| assert((LoadUse.isValid() || StoreUse.isValid()) && |
| "we should have a load or a store, possibly both"); |
| assert(StoreUse.isInvalid() || StoreUse.Kind == Assign || |
| StoreUse.Kind == Initialization || StoreUse.Kind == InitOrAssign); |
| |
| // Now that we've emitted a bunch of instructions, including a load and store |
| // but also including other stuff, update the internal state of |
| // LifetimeChecker to reflect them. |
| |
| // Update the instructions that touch the memory. NewInst can grow as this |
| // iterates, so we can't use a foreach loop. |
| for (auto *NewInst : NewInsts) { |
| switch (NewInst->getKind()) { |
| default: |
| NewInst->dump(); |
| llvm_unreachable("Unknown instruction generated by copy_addr lowering"); |
| |
| case SILInstructionKind::StoreInst: |
| // If it is a store to the memory object (as oppose to a store to |
| // something else), track it as an access. |
| if (StoreUse.isValid()) { |
| StoreUse.Inst = NewInst; |
| // If our store use by the copy_addr is an assign, then we know that |
| // before we store the new value, we loaded the old value implying that |
| // our store is technically initializing memory when it occurs. So |
| // change the kind to Initialization. |
| if (StoreUse.Kind == Assign) |
| StoreUse.Kind = Initialization; |
| NonLoadUses[NewInst] = Uses.size(); |
| Uses.push_back(StoreUse); |
| } |
| continue; |
| |
| case SILInstructionKind::LoadInst: |
| // If it is a load from the memory object (as oppose to a load from |
| // something else), track it as an access. We need to explicitly check to |
| // see if the load accesses "TheMemory" because it could either be a load |
| // for the copy_addr source, or it could be a load corresponding to the |
| // "assign" operation on the destination of the copyaddr. |
| if (LoadUse.isValid() && |
| getAccessPathRoot(NewInst->getOperand(0)) == TheMemory) { |
| if (auto *LI = dyn_cast<LoadInst>(NewInst)) { |
| if (LI->getOwnershipQualifier() == LoadOwnershipQualifier::Take) { |
| LoadTakeUses.insert(LI); |
| HasLocalKill.insert(LI->getParent()); |
| } |
| } |
| LoadUse.Inst = NewInst; |
| Uses.push_back(LoadUse); |
| } |
| continue; |
| |
| case SILInstructionKind::RetainValueInst: |
| case SILInstructionKind::StrongRetainInst: |
| case SILInstructionKind::StrongReleaseInst: |
| case SILInstructionKind::ReleaseValueInst: // Destroy overwritten value |
| // These are ignored. |
| continue; |
| } |
| } |
| |
| // Next, remove the copy_addr itself. |
| CAI->eraseFromParent(); |
| } |
| |
| bool AvailableValueDataflowContext::hasEscapedAt(SILInstruction *I) { |
| // Return true if the box has escaped at the specified instruction. We are |
| // not allowed to do load promotion in an escape region. |
| |
| // FIXME: This is not an aggressive implementation. :) |
| |
| // TODO: At some point, we should special case closures that just *read* from |
| // the escaped value (by looking at the body of the closure). They should not |
| // prevent load promotion, and will allow promoting values like X in regions |
| // dominated by "... && X != 0". |
| return HasAnyEscape; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Allocation Optimization |
| //===----------------------------------------------------------------------===// |
| |
| static SILType getMemoryType(AllocationInst *memory) { |
| // Compute the type of the memory object. |
| if (auto *abi = dyn_cast<AllocBoxInst>(memory)) { |
| assert(abi->getBoxType()->getLayout()->getFields().size() == 1 && |
| "optimizing multi-field boxes not implemented"); |
| return getSILBoxFieldType(TypeExpansionContext(*abi->getFunction()), |
| abi->getBoxType(), abi->getModule().Types, 0); |
| } |
| |
| assert(isa<AllocStackInst>(memory)); |
| return cast<AllocStackInst>(memory)->getElementType(); |
| } |
| |
| namespace { |
| |
| /// This performs load promotion and deletes synthesized allocations if all |
| /// loads can be removed. |
| class AllocOptimize { |
| |
| SILModule &Module; |
| |
| /// This is either an alloc_box or alloc_stack instruction. |
| AllocationInst *TheMemory; |
| |
| /// This is the SILType of the memory object. |
| SILType MemoryType; |
| |
| /// The number of primitive subelements across all elements of this memory |
| /// value. |
| unsigned NumMemorySubElements; |
| |
| SmallVectorImpl<PMOMemoryUse> &Uses; |
| SmallVectorImpl<SILInstruction *> &Releases; |
| |
| DeadEndBlocks &deadEndBlocks; |
| |
| /// A structure that we use to compute our available values. |
| AvailableValueDataflowContext DataflowContext; |
| |
| public: |
| AllocOptimize(AllocationInst *memory, SmallVectorImpl<PMOMemoryUse> &uses, |
| SmallVectorImpl<SILInstruction *> &releases, |
| DeadEndBlocks &deadEndBlocks) |
| : Module(memory->getModule()), TheMemory(memory), |
| MemoryType(getMemoryType(memory)), |
| NumMemorySubElements(getNumSubElements( |
| MemoryType, Module, TypeExpansionContext(*memory->getFunction()))), |
| Uses(uses), Releases(releases), deadEndBlocks(deadEndBlocks), |
| DataflowContext(TheMemory, NumMemorySubElements, uses) {} |
| |
| bool optimizeMemoryAccesses(); |
| |
| /// If the allocation is an autogenerated allocation that is only stored to |
| /// (after load promotion) then remove it completely. |
| bool tryToRemoveDeadAllocation(); |
| |
| private: |
| Optional<std::pair<SILType, unsigned>> |
| computeAvailableValues(SILValue SrcAddr, SILInstruction *Inst, |
| SmallVectorImpl<AvailableValue> &AvailableValues); |
| bool promoteLoadCopy(LoadInst *li); |
| bool promoteLoadBorrow(LoadBorrowInst *lbi); |
| bool promoteCopyAddr(CopyAddrInst *cai); |
| void promoteLoadTake(LoadInst *Inst, MutableArrayRef<AvailableValue> values); |
| void promoteDestroyAddr(DestroyAddrInst *dai, |
| MutableArrayRef<AvailableValue> values); |
| bool canPromoteTake(SILInstruction *i, |
| SmallVectorImpl<AvailableValue> &availableValues); |
| }; |
| |
| } // end anonymous namespace |
| |
| Optional<std::pair<SILType, unsigned>> AllocOptimize::computeAvailableValues( |
| SILValue SrcAddr, SILInstruction *Inst, |
| SmallVectorImpl<AvailableValue> &AvailableValues) { |
| // If the box has escaped at this instruction, we can't safely promote the |
| // load. |
| if (DataflowContext.hasEscapedAt(Inst)) |
| return None; |
| |
| SILType LoadTy = SrcAddr->getType().getObjectType(); |
| |
| // If this is a load/copy_addr from a struct field that we want to promote, |
| // compute the access path down to the field so we can determine precise |
| // def/use behavior. |
| unsigned FirstElt = computeSubelement(SrcAddr, TheMemory); |
| |
| // If this is a load from within an enum projection, we can't promote it since |
| // we don't track subelements in a type that could be changing. |
| if (FirstElt == ~0U) |
| return None; |
| |
| unsigned NumLoadSubElements = getNumSubElements( |
| LoadTy, Module, TypeExpansionContext(*TheMemory->getFunction())); |
| |
| // Set up the bitvector of elements being demanded by the load. |
| SmallBitVector RequiredElts(NumMemorySubElements); |
| RequiredElts.set(FirstElt, FirstElt + NumLoadSubElements); |
| |
| AvailableValues.resize(NumMemorySubElements); |
| |
| // Find out if we have any available values. If no bits are demanded, we |
| // trivially succeed. This can happen when there is a load of an empty struct. |
| if (NumLoadSubElements != 0 && |
| !DataflowContext.computeAvailableValues( |
| Inst, FirstElt, NumLoadSubElements, RequiredElts, AvailableValues)) |
| return None; |
| |
| return std::make_pair(LoadTy, FirstElt); |
| } |
| |
| /// If we are able to optimize \p Inst, return the source address that |
| /// instruction is loading from. If we can not optimize \p Inst, then just |
| /// return an empty SILValue. |
| static SILValue tryFindSrcAddrForLoad(SILInstruction *i) { |
| // We can always promote a load_borrow. |
| if (auto *lbi = dyn_cast<LoadBorrowInst>(i)) |
| return lbi->getOperand(); |
| |
| // We only handle load [copy], load [trivial], load and copy_addr right |
| // now. Notably we do not support load [take] when promoting loads. |
| if (auto *li = dyn_cast<LoadInst>(i)) |
| if (li->getOwnershipQualifier() != LoadOwnershipQualifier::Take) |
| return li->getOperand(); |
| |
| // If this is a CopyAddr, verify that the element type is loadable. If not, |
| // we can't explode to a load. |
| auto *cai = dyn_cast<CopyAddrInst>(i); |
| if (!cai || !cai->getSrc()->getType().isLoadable(*cai->getFunction())) |
| return SILValue(); |
| return cai->getSrc(); |
| } |
| |
| /// At this point, we know that this element satisfies the definitive init |
| /// requirements, so we can try to promote loads to enable SSA-based dataflow |
| /// analysis. We know that accesses to this element only access this element, |
| /// cross element accesses have been scalarized. |
| /// |
| /// This returns true if the load has been removed from the program. |
| bool AllocOptimize::promoteLoadCopy(LoadInst *li) { |
| // Note that we intentionally don't support forwarding of weak pointers, |
| // because the underlying value may drop be deallocated at any time. We would |
| // have to prove that something in this function is holding the weak value |
| // live across the promoted region and that isn't desired for a stable |
| // diagnostics pass this like one. |
| |
| // First attempt to find a source addr for our "load" instruction. If we fail |
| // to find a valid value, just return. |
| SILValue srcAddr = tryFindSrcAddrForLoad(li); |
| if (!srcAddr) |
| return false; |
| |
| SmallVector<AvailableValue, 8> availableValues; |
| auto result = computeAvailableValues(srcAddr, li, availableValues); |
| if (!result.hasValue()) |
| return false; |
| |
| SILType loadTy = result->first; |
| unsigned firstElt = result->second; |
| |
| // Aggregate together all of the subelements into something that has the same |
| // type as the load did, and emit smaller loads for any subelements that were |
| // not available. We are "propagating" a +1 available value from the store |
| // points. |
| AvailableValueAggregator agg(li, availableValues, Uses, deadEndBlocks, |
| AvailableValueExpectedOwnership::Copy); |
| SILValue newVal = agg.aggregateValues(loadTy, li->getOperand(), firstElt); |
| |
| LLVM_DEBUG(llvm::dbgs() << " *** Promoting load: " << *li); |
| LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal); |
| ++NumLoadPromoted; |
| |
| // If we did not have ownership, we did not insert extra copies at our stores, |
| // so we can just RAUW and return. |
| if (!li->getFunction()->hasOwnership()) { |
| li->replaceAllUsesWith(newVal); |
| SILValue addr = li->getOperand(); |
| li->eraseFromParent(); |
| if (auto *addrI = addr->getDefiningInstruction()) |
| eliminateDeadInstruction(addrI); |
| return true; |
| } |
| |
| // If we inserted any copies, we created the copies at our stores. We know |
| // that in our load block, we will reform the aggregate as appropriate at the |
| // load implying that the value /must/ be fully consumed. If we promoted a +0 |
| // value, we created dominating destroys along those paths. Thus any leaking |
| // blocks that we may have can be found by performing a linear lifetime check |
| // over all copies that we found using the load as the "consuming uses" (just |
| // for the purposes of identifying the consuming block). |
| agg.fixupOwnership(li, newVal); |
| |
| // Now that we have fixed up all of our missing destroys, insert the copy |
| // value for our actual load and RAUW. |
| newVal = SILBuilderWithScope(li).emitCopyValueOperation(li->getLoc(), newVal); |
| |
| li->replaceAllUsesWith(newVal); |
| SILValue addr = li->getOperand(); |
| li->eraseFromParent(); |
| if (auto *addrI = addr->getDefiningInstruction()) |
| eliminateDeadInstruction(addrI); |
| return true; |
| } |
| |
| bool AllocOptimize::promoteCopyAddr(CopyAddrInst *cai) { |
| // Note that we intentionally don't support forwarding of weak pointers, |
| // because the underlying value may drop be deallocated at any time. We would |
| // have to prove that something in this function is holding the weak value |
| // live across the promoted region and that isn't desired for a stable |
| // diagnostics pass this like one. |
| |
| // First attempt to find a source addr for our "load" instruction. If we fail |
| // to find a valid value, just return. |
| SILValue srcAddr = tryFindSrcAddrForLoad(cai); |
| if (!srcAddr) |
| return false; |
| |
| SmallVector<AvailableValue, 8> availableValues; |
| auto result = computeAvailableValues(srcAddr, cai, availableValues); |
| if (!result.hasValue()) |
| return false; |
| |
| // Ok, we have some available values. If we have a copy_addr, explode it now, |
| // exposing the load operation within it. Subsequent optimization passes will |
| // see the load and propagate the available values into it. |
| DataflowContext.explodeCopyAddr(cai); |
| |
| // This is removing the copy_addr, but explodeCopyAddr takes care of |
| // removing the instruction from Uses for us, so we return false. |
| return false; |
| } |
| |
| /// At this point, we know that this element satisfies the definitive init |
| /// requirements, so we can try to promote loads to enable SSA-based dataflow |
| /// analysis. We know that accesses to this element only access this element, |
| /// cross element accesses have been scalarized. |
| /// |
| /// This returns true if the load has been removed from the program. |
| bool AllocOptimize::promoteLoadBorrow(LoadBorrowInst *lbi) { |
| // Note that we intentionally don't support forwarding of weak pointers, |
| // because the underlying value may drop be deallocated at any time. We would |
| // have to prove that something in this function is holding the weak value |
| // live across the promoted region and that isn't desired for a stable |
| // diagnostics pass this like one. |
| |
| // First attempt to find a source addr for our "load" instruction. If we fail |
| // to find a valid value, just return. |
| SILValue srcAddr = tryFindSrcAddrForLoad(lbi); |
| if (!srcAddr) |
| return false; |
| |
| SmallVector<AvailableValue, 8> availableValues; |
| auto result = computeAvailableValues(srcAddr, lbi, availableValues); |
| if (!result.hasValue()) |
| return false; |
| |
| ++NumLoadPromoted; |
| |
| SILType loadTy = result->first; |
| unsigned firstElt = result->second; |
| |
| // Aggregate together all of the subelements into something that has the same |
| // type as the load did, and emit smaller loads for any subelements that were |
| // not available. We are "propagating" a +1 available value from the store |
| // points. |
| AvailableValueAggregator agg(lbi, availableValues, Uses, deadEndBlocks, |
| AvailableValueExpectedOwnership::Borrow); |
| SILValue newVal = agg.aggregateValues(loadTy, lbi->getOperand(), firstElt); |
| |
| LLVM_DEBUG(llvm::dbgs() << " *** Promoting load: " << *lbi); |
| LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal); |
| |
| // If we inserted any copies, we created the copies at our |
| // stores. We know that in our load block, we will reform the |
| // aggregate as appropriate, will borrow the value there and give us |
| // a whole pristine new value. Now in this routine, we go through |
| // all of the copies and phis that we inserted and ensure that: |
| // |
| // 1. Phis are always strongly control equivalent to the copies that |
| // produced their incoming values. |
| // |
| // 2. All intermediate copies are properly lifetime extended to the |
| // load block and all leaking blocks are filled in as appropriate |
| // with destroy_values. |
| agg.fixupOwnership(lbi, newVal); |
| |
| // Now that we have fixed up the lifetimes of all of our incoming copies so |
| // that they are alive over the load point, copy, borrow newVal and insert |
| // destroy_value after the end_borrow and then RAUW. |
| SILBuilderWithScope builder(lbi); |
| SILValue copiedVal = builder.emitCopyValueOperation(lbi->getLoc(), newVal); |
| newVal = builder.createBeginBorrow(lbi->getLoc(), copiedVal); |
| |
| for (auto *ebi : lbi->getUsersOfType<EndBorrowInst>()) { |
| auto next = std::next(ebi->getIterator()); |
| SILBuilderWithScope(next).emitDestroyValueOperation(ebi->getLoc(), |
| copiedVal); |
| } |
| |
| lbi->replaceAllUsesWith(newVal); |
| SILValue addr = lbi->getOperand(); |
| lbi->eraseFromParent(); |
| if (auto *addrI = addr->getDefiningInstruction()) |
| eliminateDeadInstruction(addrI); |
| return true; |
| } |
| |
| /// Return true if we can promote the given destroy. |
| bool AllocOptimize::canPromoteTake( |
| SILInstruction *inst, SmallVectorImpl<AvailableValue> &availableValues) { |
| SILValue address = inst->getOperand(0); |
| |
| // We cannot promote destroys of address-only types, because we can't expose |
| // the load. |
| SILType loadTy = address->getType().getObjectType(); |
| if (loadTy.isAddressOnly(*inst->getFunction())) |
| return false; |
| |
| // If the box has escaped at this instruction, we can't safely promote the |
| // load. |
| if (DataflowContext.hasEscapedAt(inst)) |
| return false; |
| |
| // Compute the access path down to the field so we can determine precise |
| // def/use behavior. |
| unsigned firstElt = computeSubelement(address, TheMemory); |
| assert(firstElt != ~0U && "destroy within enum projection is not valid"); |
| auto expansionContext = TypeExpansionContext(*inst->getFunction()); |
| unsigned numLoadSubElements = |
| getNumSubElements(loadTy, Module, expansionContext); |
| |
| // Find out if we have any available values. If no bits are demanded, we |
| // trivially succeed. This can happen when there is a load of an empty struct. |
| if (numLoadSubElements == 0) |
| return true; |
| |
| // Set up the bitvector of elements being demanded by the load. |
| SmallBitVector requiredElts(NumMemorySubElements); |
| requiredElts.set(firstElt, firstElt + numLoadSubElements); |
| |
| // Compute our available values. If we do not have any available values, |
| // return false. We have nothing further to do. |
| SmallVector<AvailableValue, 8> tmpList; |
| tmpList.resize(NumMemorySubElements); |
| if (!DataflowContext.computeAvailableValues( |
| inst, firstElt, numLoadSubElements, requiredElts, tmpList)) |
| return false; |
| |
| // Now check that we can perform a take upon our available values. This |
| // implies today that our value is fully available. If the value is not fully |
| // available, we would need to split stores to promote this destroy_addr. We |
| // do not support that yet. |
| AvailableValueAggregator agg(inst, tmpList, Uses, deadEndBlocks, |
| AvailableValueExpectedOwnership::Take); |
| if (!agg.canTake(loadTy, firstElt)) |
| return false; |
| |
| // Ok, we can promote this destroy_addr... move the temporary lists contents |
| // into the final AvailableValues list. |
| std::move(tmpList.begin(), tmpList.end(), |
| std::back_inserter(availableValues)); |
| |
| return true; |
| } |
| |
| // DestroyAddr is a composed operation merging load [take] + destroy_value. If |
| // the implicit load's value is available, explode it. |
| // |
| // NOTE: We only do this if we have a fully available value. |
| // |
| // Note that we handle the general case of a destroy_addr of a piece of the |
| // memory object, not just destroy_addrs of the entire thing. |
| void AllocOptimize::promoteDestroyAddr( |
| DestroyAddrInst *dai, MutableArrayRef<AvailableValue> availableValues) { |
| SILValue address = dai->getOperand(); |
| SILType loadTy = address->getType().getObjectType(); |
| |
| // Compute the access path down to the field so we can determine precise |
| // def/use behavior. |
| unsigned firstElt = computeSubelement(address, TheMemory); |
| |
| // Aggregate together all of the subelements into something that has the same |
| // type as the load did, and emit smaller) loads for any subelements that were |
| // not available. |
| AvailableValueAggregator agg(dai, availableValues, Uses, deadEndBlocks, |
| AvailableValueExpectedOwnership::Take); |
| SILValue newVal = agg.aggregateValues(loadTy, address, firstElt); |
| |
| ++NumDestroyAddrPromoted; |
| |
| LLVM_DEBUG(llvm::dbgs() << " *** Promoting destroy_addr: " << *dai); |
| LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal); |
| |
| SILBuilderWithScope(dai).emitDestroyValueOperation(dai->getLoc(), newVal); |
| dai->eraseFromParent(); |
| } |
| |
| void AllocOptimize::promoteLoadTake( |
| LoadInst *li, MutableArrayRef<AvailableValue> availableValues) { |
| assert(li->getOwnershipQualifier() == LoadOwnershipQualifier::Take && |
| "load [copy], load [trivial], load should be handled by " |
| "promoteLoadCopy"); |
| SILValue address = li->getOperand(); |
| SILType loadTy = address->getType().getObjectType(); |
| |
| // Compute the access path down to the field so we can determine precise |
| // def/use behavior. |
| unsigned firstElt = computeSubelement(address, TheMemory); |
| |
| // Aggregate together all of the subelements into something that has the same |
| // type as the load did, and emit smaller) loads for any subelements that were |
| // not available. |
| AvailableValueAggregator agg(li, availableValues, Uses, deadEndBlocks, |
| AvailableValueExpectedOwnership::Take); |
| SILValue newVal = agg.aggregateValues(loadTy, address, firstElt); |
| |
| ++NumLoadTakePromoted; |
| |
| LLVM_DEBUG(llvm::dbgs() << " *** Promoting load_take: " << *li); |
| LLVM_DEBUG(llvm::dbgs() << " To value: " << *newVal); |
| |
| // Then perform the RAUW. |
| li->replaceAllUsesWith(newVal); |
| li->eraseFromParent(); |
| } |
| |
| namespace { |
| |
| struct TakePromotionState { |
| ArrayRef<SILInstruction *> takeInsts; |
| SmallVector<unsigned, 8> takeInstIndices; |
| SmallVector<AvailableValue, 32> availableValueList; |
| SmallVector<unsigned, 8> availableValueStartOffsets; |
| |
| TakePromotionState(ArrayRef<SILInstruction *> takeInsts) |
| : takeInsts(takeInsts) {} |
| |
| unsigned size() const { return takeInstIndices.size(); } |
| |
| void initializeForTakeInst(unsigned takeInstIndex) { |
| availableValueStartOffsets.push_back(availableValueList.size()); |
| takeInstIndices.push_back(takeInstIndex); |
| } |
| |
| std::pair<SILInstruction *, MutableArrayRef<AvailableValue>> |
| getData(unsigned index) { |
| unsigned takeInstIndex = takeInstIndices[index]; |
| unsigned startOffset = availableValueStartOffsets[index]; |
| unsigned count; |
| |
| if ((availableValueStartOffsets.size() - 1) != index) { |
| count = availableValueStartOffsets[index + 1] - startOffset; |
| } else { |
| count = availableValueList.size() - startOffset; |
| } |
| |
| MutableArrayRef<AvailableValue> values(&availableValueList[startOffset], |
| count); |
| return {takeInsts[takeInstIndex], values}; |
| } |
| }; |
| |
| } // end anonymous namespace |
| |
| // Check if our use list has any non store, non take uses that keep the value |
| // alive. Returns nullptr on success and the user that prevents removal on |
| // failure. |
| // |
| // NOTE: This also gathers up any takes that we need to process. |
| static SILInstruction * |
| checkForNonStoreNonTakeUses(ArrayRef<PMOMemoryUse> uses, |
| SmallVectorImpl<SILInstruction *> &loadTakeList) { |
| for (auto &u : uses) { |
| // Ignore removed instructions. |
| if (u.Inst == nullptr) |
| continue; |
| |
| switch (u.Kind) { |
| case PMOUseKind::Assign: |
| // Until we can promote the value being destroyed by the assign, we can |
| // not remove deallocations with such assigns. |
| return u.Inst; |
| case PMOUseKind::InitOrAssign: |
| continue; // These don't prevent removal. |
| case PMOUseKind::Load: |
| // For now only handle takes from alloc_stack. |
| // |
| // TODO: It should be implementable, but it has not been needed yet. |
| if (auto *li = dyn_cast<LoadInst>(u.Inst)) { |
| if (li->getOwnershipQualifier() == LoadOwnershipQualifier::Take) { |
| loadTakeList.push_back(li); |
| continue; |
| } |
| } |
| return u.Inst; |
| case PMOUseKind::Initialization: |
| if (!isa<ApplyInst>(u.Inst) && |
| // A copy_addr that is not a take affects the retain count |
| // of the source. |
| (!isa<CopyAddrInst>(u.Inst) || |
| cast<CopyAddrInst>(u.Inst)->isTakeOfSrc())) |
| continue; |
| // FALL THROUGH. |
| LLVM_FALLTHROUGH; |
| case PMOUseKind::IndirectIn: |
| case PMOUseKind::InOutUse: |
| case PMOUseKind::Escape: |
| return u.Inst; // These do prevent removal. |
| } |
| } |
| |
| return nullptr; |
| } |
| |
| // We don't want to remove allocations that are required for useful debug |
| // information at -O0. As such, we only remove allocations if: |
| // |
| // 1. They are in a transparent function. |
| // 2. They are in a normal function, but didn't come from a VarDecl, or came |
| // from one that was autogenerated or inlined from a transparent function. |
| static bool isRemovableAutogeneratedAllocation(AllocationInst *TheMemory) { |
| SILLocation loc = TheMemory->getLoc(); |
| return TheMemory->getFunction()->isTransparent() || |
| !loc.getAsASTNode<VarDecl>() || loc.isAutoGenerated() || |
| loc.is<MandatoryInlinedLocation>(); |
| } |
| |
| bool AllocOptimize::tryToRemoveDeadAllocation() { |
| assert((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) && |
| "Unhandled allocation case"); |
| |
| if (!isRemovableAutogeneratedAllocation(TheMemory)) |
| return false; |
| |
| SmallVector<SILInstruction *, 8> loadTakeList; |
| // Check the uses list to see if there are any non-store uses left over after |
| // load promotion and other things PMO does. |
| if (auto *badUser = checkForNonStoreNonTakeUses(Uses, loadTakeList)) { |
| LLVM_DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: " |
| "kept alive by: " |
| << *badUser); |
| return false; |
| } |
| |
| // If our memory is trivially typed, we can just remove it without needing to |
| // consider if the stored value needs to be destroyed. So at this point, |
| // delete the memory! |
| if (MemoryType.isTrivial(*TheMemory->getFunction())) { |
| LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated trivial allocation: " |
| << *TheMemory); |
| |
| // If it is safe to remove, do it. Recursively remove all instructions |
| // hanging off the allocation instruction, then return success. Let the |
| // caller remove the allocation itself to avoid iterator invalidation. |
| eraseUsesOfInstruction(TheMemory); |
| |
| return true; |
| } |
| |
| // Now make sure we can promote all load [take] and prepare state for each of |
| // them. |
| TakePromotionState loadTakeState(loadTakeList); |
| for (auto p : llvm::enumerate(loadTakeList)) { |
| loadTakeState.initializeForTakeInst(p.index()); |
| if (!canPromoteTake(p.value(), loadTakeState.availableValueList)) |
| return false; |
| } |
| |
| // Otherwise removing the deallocation will drop any releases. Check that |
| // there is nothing preventing removal. |
| TakePromotionState destroyAddrState(Releases); |
| for (auto p : llvm::enumerate(Releases)) { |
| auto *r = p.value(); |
| if (r == nullptr) |
| continue; |
| |
| // We stash all of the destroy_addr that we see. |
| if (auto *dai = dyn_cast<DestroyAddrInst>(r)) { |
| destroyAddrState.initializeForTakeInst(p.index() /*destroyAddrIndex*/); |
| // Make sure we can actually promote this destroy addr. If we can not, |
| // then we must bail. In order to not gather available values twice, we |
| // gather the available values here that we will use to promote the |
| // values. |
| if (!canPromoteTake(dai, destroyAddrState.availableValueList)) |
| return false; |
| continue; |
| } |
| |
| LLVM_DEBUG(llvm::dbgs() |
| << "*** Failed to remove autogenerated non-trivial alloc: " |
| "kept alive by release: " |
| << *r); |
| return false; |
| } |
| |
| // If we reached this point, we can promote all of our destroy_addr and load |
| // take. Since our load [take] may be available values for our destroy_addr, |
| // we promote the destroy_addr first. |
| for (unsigned i : range(destroyAddrState.size())) { |
| SILInstruction *dai; |
| MutableArrayRef<AvailableValue> values; |
| std::tie(dai, values) = destroyAddrState.getData(i); |
| promoteDestroyAddr(cast<DestroyAddrInst>(dai), values); |
| // We do not need to unset releases, since we are going to exit here. |
| } |
| for (unsigned i : range(loadTakeState.size())) { |
| SILInstruction *li; |
| MutableArrayRef<AvailableValue> values; |
| std::tie(li, values) = loadTakeState.getData(i); |
| promoteLoadTake(cast<LoadInst>(li), values); |
| } |
| |
| LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated non-trivial alloc: " |
| << *TheMemory); |
| |
| // If it is safe to remove, do it. Recursively remove all instructions |
| // hanging off the allocation instruction, then return success. Let the |
| // caller remove the allocation itself to avoid iterator invalidation. |
| eraseUsesOfInstruction(TheMemory); |
| |
| return true; |
| } |
| |
| bool AllocOptimize::optimizeMemoryAccesses() { |
| bool changed = false; |
| |
| // If we've successfully checked all of the definitive initialization |
| // requirements, try to promote loads. This can explode copy_addrs, so the |
| // use list may change size. |
| for (unsigned i = 0; i != Uses.size(); ++i) { |
| auto &use = Uses[i]; |
| // Ignore entries for instructions that got expanded along the way. |
| if (use.Inst && use.Kind == PMOUseKind::Load) { |
| if (auto *cai = dyn_cast<CopyAddrInst>(use.Inst)) { |
| if (promoteCopyAddr(cai)) { |
| Uses[i].Inst = nullptr; // remove entry if load got deleted. |
| changed = true; |
| } |
| continue; |
| } |
| |
| if (auto *lbi = dyn_cast<LoadBorrowInst>(use.Inst)) { |
| if (promoteLoadBorrow(lbi)) { |
| Uses[i].Inst = nullptr; // remove entry if load got deleted. |
| changed = true; |
| } |
| continue; |
| } |
| |
| if (auto *li = dyn_cast<LoadInst>(use.Inst)) { |
| if (promoteLoadCopy(li)) { |
| Uses[i].Inst = nullptr; // remove entry if load got deleted. |
| changed = true; |
| } |
| continue; |
| } |
| } |
| } |
| |
| return changed; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Top Level Entrypoints |
| //===----------------------------------------------------------------------===// |
| |
| static AllocationInst *getOptimizableAllocation(SILInstruction *i) { |
| if (!isa<AllocBoxInst>(i) && !isa<AllocStackInst>(i)) { |
| return nullptr; |
| } |
| |
| auto *alloc = cast<AllocationInst>(i); |
| |
| // If our aggregate has unreferencable storage, we can't optimize. Return |
| // nullptr. |
| if (getMemoryType(alloc).aggregateHasUnreferenceableStorage()) |
| return nullptr; |
| |
| // Otherwise we are good to go. Lets try to optimize this memory! |
| return alloc; |
| } |
| |
| static bool optimizeMemoryAccesses(SILFunction &fn) { |
| bool changed = false; |
| DeadEndBlocks deadEndBlocks(&fn); |
| |
| for (auto &bb : fn) { |
| auto i = bb.begin(), e = bb.end(); |
| while (i != e) { |
| // First see if i is an allocation that we can optimize. If not, skip it. |
| AllocationInst *alloc = getOptimizableAllocation(&*i); |
| if (!alloc) { |
| ++i; |
| continue; |
| } |
| |
| LLVM_DEBUG(llvm::dbgs() |
| << "*** PMO Optimize Memory Accesses looking at: " << *alloc); |
| PMOMemoryObjectInfo memInfo(alloc); |
| |
| // Set up the datastructure used to collect the uses of the allocation. |
| SmallVector<PMOMemoryUse, 16> uses; |
| SmallVector<SILInstruction *, 4> destroys; |
| |
| // Walk the use list of the pointer, collecting them. If we are not able |
| // to optimize, skip this value. *NOTE* We may still scalarize values |
| // inside the value. |
| if (!collectPMOElementUsesFrom(memInfo, uses, destroys)) { |
| ++i; |
| continue; |
| } |
| |
| AllocOptimize allocOptimize(alloc, uses, destroys, deadEndBlocks); |
| changed |= allocOptimize.optimizeMemoryAccesses(); |
| |
| // Move onto the next instruction. We know this is safe since we do not |
| // eliminate allocations here. |
| ++i; |
| } |
| } |
| |
| return changed; |
| } |
| |
| static bool eliminateDeadAllocations(SILFunction &fn) { |
| bool changed = false; |
| DeadEndBlocks deadEndBlocks(&fn); |
| |
| for (auto &bb : fn) { |
| auto i = bb.begin(), e = bb.end(); |
| while (i != e) { |
| // First see if i is an allocation that we can optimize. If not, skip it. |
| AllocationInst *alloc = getOptimizableAllocation(&*i); |
| if (!alloc) { |
| ++i; |
| continue; |
| } |
| |
| LLVM_DEBUG(llvm::dbgs() |
| << "*** PMO Dead Allocation Elimination looking at: " |
| << *alloc); |
| PMOMemoryObjectInfo memInfo(alloc); |
| |
| // Set up the datastructure used to collect the uses of the allocation. |
| SmallVector<PMOMemoryUse, 16> uses; |
| SmallVector<SILInstruction *, 4> destroys; |
| |
| // Walk the use list of the pointer, collecting them. If we are not able |
| // to optimize, skip this value. *NOTE* We may still scalarize values |
| // inside the value. |
| if (!collectPMOElementUsesFrom(memInfo, uses, destroys)) { |
| ++i; |
| continue; |
| } |
| |
| AllocOptimize allocOptimize(alloc, uses, destroys, deadEndBlocks); |
| changed |= allocOptimize.tryToRemoveDeadAllocation(); |
| |
| // Move onto the next instruction. We know this is safe since we do not |
| // eliminate allocations here. |
| ++i; |
| if (alloc->use_empty()) { |
| alloc->eraseFromParent(); |
| ++NumAllocRemoved; |
| changed = true; |
| } |
| } |
| } |
| |
| return changed; |
| } |
| |
| namespace { |
| |
| class PredictableMemoryAccessOptimizations : public SILFunctionTransform { |
| /// The entry point to the transformation. |
| /// |
| /// FIXME: This pass should not need to rerun on deserialized |
| /// functions. Nothing should have changed in the upstream pipeline after |
| /// deserialization. However, rerunning does improve some benchmarks. This |
| /// either indicates that this pass missing some opportunities the first time, |
| /// or has a pass order dependency on other early passes. |
| void run() override { |
| // TODO: Can we invalidate here just instructions? |
| if (optimizeMemoryAccesses(*getFunction())) |
| invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody); |
| } |
| }; |
| |
| class PredictableDeadAllocationElimination : public SILFunctionTransform { |
| void run() override { |
| if (eliminateDeadAllocations(*getFunction())) |
| invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody); |
| } |
| }; |
| |
| } // end anonymous namespace |
| |
| SILTransform *swift::createPredictableMemoryAccessOptimizations() { |
| return new PredictableMemoryAccessOptimizations(); |
| } |
| |
| SILTransform *swift::createPredictableDeadAllocationElimination() { |
| return new PredictableDeadAllocationElimination(); |
| } |