lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp - third_party/swift - Git at Google

 //===--- PredictableMemOpt.cpp - Perform predictable memory optzns --------===//
 //
 // This source file is part of the Swift.org open source project
 //
 // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See https://swift.org/LICENSE.txt for license information
 // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 //
 //===----------------------------------------------------------------------===//

 #define DEBUG_TYPE "predictable-memopt"

 #include "swift/SILOptimizer/PassManager/Passes.h"
 #include "DIMemoryUseCollector.h"
 #include "swift/SIL/SILBuilder.h"
 #include "swift/SILOptimizer/Utils/Local.h"
 #include "swift/SILOptimizer/PassManager/Transforms.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"

 using namespace swift;

 STATISTIC(NumLoadPromoted, "Number of loads promoted");
 STATISTIC(NumDestroyAddrPromoted, "Number of destroy_addrs promoted");
 STATISTIC(NumAllocRemoved, "Number of allocations completely removed");

 //===----------------------------------------------------------------------===//
 // Subelement Analysis Implementation
 //===----------------------------------------------------------------------===//

 // We can only analyze components of structs whose storage is fully accessible
 // from Swift.
 static StructDecl *
 getFullyReferenceableStruct(SILType Ty) {
   auto SD = Ty.getStructOrBoundGenericStruct();
   if (!SD || SD->hasUnreferenceableStorage())
     return nullptr;
   return SD;
 }

 static unsigned getNumSubElements(SILType T, SILModule &M) {

   if (auto TT = T.getAs<TupleType>()) {
     unsigned NumElements = 0;
     for (auto index : indices(TT.getElementTypes()))
       NumElements += getNumSubElements(T.getTupleElementType(index), M);
     return NumElements;
   }

   if (auto *SD = getFullyReferenceableStruct(T)) {
     unsigned NumElements = 0;
     for (auto *D : SD->getStoredProperties())
       NumElements += getNumSubElements(T.getFieldType(D, M), M);
     return NumElements;
   }

   // If this isn't a tuple or struct, it is a single element.
   return 1;
 }

 /// getAccessPathRoot - Given an address, dive through any tuple/struct element
 /// addresses to get the underlying value.
 static SILValue getAccessPathRoot(SILValue Pointer) {
   while (1) {
     if (auto *TEAI = dyn_cast<TupleElementAddrInst>(Pointer))
       Pointer = TEAI->getOperand();
     else if (auto SEAI = dyn_cast<StructElementAddrInst>(Pointer))
       Pointer = SEAI->getOperand();
     else
       return Pointer;
   }
 }

 /// Compute the subelement number indicated by the specified pointer (which is
 /// derived from the root by a series of tuple/struct element addresses) by
 /// treating the type as a linearized namespace with sequential elements.  For
 /// example, given:
 ///
 ///   root = alloc { a: { c: i64, d: i64 }, b: (i64, i64) }
 ///   tmp1 = struct_element_addr root, 1
 ///   tmp2 = tuple_element_addr tmp1, 0
 ///
 /// This will return a subelement number of 2.
 ///
 /// If this pointer is to within an existential projection, it returns ~0U.
 ///
 static unsigned computeSubelement(SILValue Pointer, SILInstruction *RootInst) {
   unsigned SubEltNumber = 0;
   SILModule &M = RootInst->getModule();

   while (1) {
     // If we got to the root, we're done.
     if (RootInst == Pointer)
       return SubEltNumber;

     auto *Inst = cast<SILInstruction>(Pointer);
     if (auto *PBI = dyn_cast<ProjectBoxInst>(Inst)) {
       Pointer = PBI->getOperand();
       continue;
     }

     if (auto *TEAI = dyn_cast<TupleElementAddrInst>(Inst)) {
       SILType TT = TEAI->getOperand()->getType();

       // Keep track of what subelement is being referenced.
       for (unsigned i = 0, e = TEAI->getFieldNo(); i != e; ++i) {
         SubEltNumber += getNumSubElements(TT.getTupleElementType(i), M);
       }
       Pointer = TEAI->getOperand();
       continue;
     }

     if (auto *SEAI = dyn_cast<StructElementAddrInst>(Inst)) {
       SILType ST = SEAI->getOperand()->getType();

       // Keep track of what subelement is being referenced.
       StructDecl *SD = SEAI->getStructDecl();
       for (auto *D : SD->getStoredProperties()) {
         if (D == SEAI->getField()) break;
         SubEltNumber += getNumSubElements(ST.getFieldType(D, M), M);
       }

       Pointer = SEAI->getOperand();
       continue;
     }


     assert((isa<InitExistentialAddrInst>(Inst) || isa<InjectEnumAddrInst>(Inst))&&
            "Unknown access path instruction");
     // Cannot promote loads and stores from within an existential projection.
     return ~0U;
   }
 }


 /// Given an aggregate value and an access path, extract the value indicated by
 /// the path.
 static SILValue ExtractSubElement(SILValue Val, unsigned SubElementNumber,
                                   SILBuilder &B, SILLocation Loc) {
   SILType ValTy = Val->getType();

   // Extract tuple elements.
   if (auto TT = ValTy.getAs<TupleType>()) {
     for (unsigned EltNo : indices(TT.getElementTypes())) {
       // Keep track of what subelement is being referenced.
       SILType EltTy = ValTy.getTupleElementType(EltNo);
       unsigned NumSubElt = getNumSubElements(EltTy, B.getModule());
       if (SubElementNumber < NumSubElt) {
         Val = B.emitTupleExtract(Loc, Val, EltNo, EltTy);
         return ExtractSubElement(Val, SubElementNumber, B, Loc);
       }

       SubElementNumber -= NumSubElt;
     }

     llvm_unreachable("Didn't find field");
   }

   // Extract struct elements.
   if (auto *SD = getFullyReferenceableStruct(ValTy)) {
     for (auto *D : SD->getStoredProperties()) {
       auto fieldType = ValTy.getFieldType(D, B.getModule());
       unsigned NumSubElt = getNumSubElements(fieldType, B.getModule());

       if (SubElementNumber < NumSubElt) {
         Val = B.emitStructExtract(Loc, Val, D);
         return ExtractSubElement(Val, SubElementNumber, B, Loc);
       }

       SubElementNumber -= NumSubElt;

     }
     llvm_unreachable("Didn't find field");
   }

   // Otherwise, we're down to a scalar.
   assert(SubElementNumber == 0 && "Miscalculation indexing subelements");
   return Val;
 }

 //===----------------------------------------------------------------------===//
 //                          Allocation Optimization
 //===----------------------------------------------------------------------===//

 namespace {
   /// AllocOptimize - This performs load promotion and deletes synthesized
   /// allocations if all loads can be removed.
   class AllocOptimize {
     SILModule &Module;

     /// TheMemory - This is either an alloc_box or alloc_stack instruction.
     SILInstruction *TheMemory;

     /// This is the SILType of the memory object.
     SILType MemoryType;

     /// The number of primitive subelements across all elements of this memory
     /// value.
     unsigned NumMemorySubElements;

     SmallVectorImpl<DIMemoryUse> &Uses;
     SmallVectorImpl<SILInstruction*> &Releases;

     llvm::SmallPtrSet<SILBasicBlock*, 32> HasLocalDefinition;

     /// This is a map of uses that are not loads (i.e., they are Stores,
     /// InOutUses, and Escapes), to their entry in Uses.
     llvm::SmallDenseMap<SILInstruction*, unsigned, 16> NonLoadUses;

     /// Does this value escape anywhere in the function.
     bool HasAnyEscape = false;

   public:
     AllocOptimize(SILInstruction *TheMemory,
                   SmallVectorImpl<DIMemoryUse> &Uses,
                   SmallVectorImpl<SILInstruction*> &Releases);

     bool doIt();

   private:

     bool promoteLoad(SILInstruction *Inst);
     bool promoteDestroyAddr(DestroyAddrInst *DAI);

     // Load promotion.
     bool hasEscapedAt(SILInstruction *I);
     void updateAvailableValues(SILInstruction *Inst,
                                llvm::SmallBitVector &RequiredElts,
                          SmallVectorImpl<std::pair<SILValue, unsigned>> &Result,
                                llvm::SmallBitVector &ConflictingValues);
     void computeAvailableValues(SILInstruction *StartingFrom,
                                 llvm::SmallBitVector &RequiredElts,
                         SmallVectorImpl<std::pair<SILValue, unsigned>> &Result);
     void computeAvailableValuesFrom(SILBasicBlock::iterator StartingFrom,
                                     SILBasicBlock *BB,
                                     llvm::SmallBitVector &RequiredElts,
                         SmallVectorImpl<std::pair<SILValue, unsigned>> &Result,
   llvm::SmallDenseMap<SILBasicBlock*, llvm::SmallBitVector, 32> &VisitedBlocks,
                                     llvm::SmallBitVector &ConflictingValues);

     void explodeCopyAddr(CopyAddrInst *CAI);

     bool tryToRemoveDeadAllocation();
   };
 } // end anonymous namespace


 AllocOptimize::AllocOptimize(SILInstruction *TheMemory,
                              SmallVectorImpl<DIMemoryUse> &Uses,
                              SmallVectorImpl<SILInstruction*> &Releases)
 : Module(TheMemory->getModule()), TheMemory(TheMemory), Uses(Uses),
   Releases(Releases) {

   // Compute the type of the memory object.
   if (auto *ABI = dyn_cast<AllocBoxInst>(TheMemory)) {
     assert(ABI->getBoxType()->getLayout()->getFields().size() == 1
            && "optimizing multi-field boxes not implemented");
     MemoryType = ABI->getBoxType()->getFieldType(ABI->getModule(), 0);
   } else {
     assert(isa<AllocStackInst>(TheMemory));
     MemoryType = cast<AllocStackInst>(TheMemory)->getElementType();
   }

   NumMemorySubElements = getNumSubElements(MemoryType, Module);

   // The first step of processing an element is to collect information about the
   // element into data structures we use later.
   for (unsigned ui = 0, e = Uses.size(); ui != e; ++ui) {
     auto &Use = Uses[ui];
     assert(Use.Inst && "No instruction identified?");

     // Keep track of all the uses that aren't loads.
     if (Use.Kind == DIUseKind::Load)
       continue;

     NonLoadUses[Use.Inst] = ui;

     HasLocalDefinition.insert(Use.Inst->getParent());

     if (Use.Kind == DIUseKind::Escape) {
       // Determine which blocks the value can escape from.  We aren't allowed to
       // promote loads in blocks reachable from an escape point.
       HasAnyEscape = true;
     }
   }

   // If isn't really a use, but we account for the alloc_box/mark_uninitialized
   // as a use so we see it in our dataflow walks.
   NonLoadUses[TheMemory] = ~0U;
   HasLocalDefinition.insert(TheMemory->getParent());
 }


 /// hasEscapedAt - Return true if the box has escaped at the specified
 /// instruction.  We are not allowed to do load promotion in an escape region.
 bool AllocOptimize::hasEscapedAt(SILInstruction *I) {
   // FIXME: This is not an aggressive implementation.  :)

   // TODO: At some point, we should special case closures that just *read* from
   // the escaped value (by looking at the body of the closure).  They should not
   // prevent load promotion, and will allow promoting values like X in regions
   // dominated by "... && X != 0".
   return HasAnyEscape;
 }


 /// The specified instruction is a non-load access of the element being
 /// promoted.  See if it provides a value or refines the demanded element mask
 /// used for load promotion.
 void AllocOptimize::
 updateAvailableValues(SILInstruction *Inst, llvm::SmallBitVector &RequiredElts,
                       SmallVectorImpl<std::pair<SILValue, unsigned>> &Result,
                       llvm::SmallBitVector &ConflictingValues) {
   // Handle store and assign.
   if (isa<StoreInst>(Inst) || isa<AssignInst>(Inst)) {
     unsigned StartSubElt = computeSubelement(Inst->getOperand(1), TheMemory);
     assert(StartSubElt != ~0U && "Store within enum projection not handled");
     SILType ValTy = Inst->getOperand(0)->getType();

     for (unsigned i = 0, e = getNumSubElements(ValTy, Module); i != e; ++i) {
       // If this element is not required, don't fill it in.
       if (!RequiredElts[StartSubElt+i]) continue;

       // If there is no result computed for this subelement, record it.  If
       // there already is a result, check it for conflict.  If there is no
       // conflict, then we're ok.
       auto &Entry = Result[StartSubElt+i];
       if (Entry.first == SILValue())
         Entry = { Inst->getOperand(0), i };
       else if (Entry.first != Inst->getOperand(0) || Entry.second != i)
         ConflictingValues[StartSubElt+i] = true;

       // This element is now provided.
       RequiredElts[StartSubElt+i] = false;
     }

     return;
   }

   // If we get here with a copy_addr, it must be storing into the element. Check
   // to see if any loaded subelements are being used, and if so, explode the
   // copy_addr to its individual pieces.
   if (auto *CAI = dyn_cast<CopyAddrInst>(Inst)) {
     unsigned StartSubElt = computeSubelement(Inst->getOperand(1), TheMemory);
     assert(StartSubElt != ~0U && "Store within enum projection not handled");
     SILType ValTy = Inst->getOperand(1)->getType();

     bool AnyRequired = false;
     for (unsigned i = 0, e = getNumSubElements(ValTy, Module); i != e; ++i) {
       // If this element is not required, don't fill it in.
       AnyRequired = RequiredElts[StartSubElt+i];
       if (AnyRequired) break;
     }

     // If this is a copy addr that doesn't intersect the loaded subelements,
     // just continue with an unmodified load mask.
     if (!AnyRequired)
       return;

     // If the copyaddr is of a non-loadable type, we can't promote it.  Just
     // consider it to be a clobber.
     if (CAI->getOperand(0)->getType().isLoadable(Module)) {
       // Otherwise, some part of the copy_addr's value is demanded by a load, so
       // we need to explode it to its component pieces.  This only expands one
       // level of the copyaddr.
       explodeCopyAddr(CAI);

       // The copy_addr doesn't provide any values, but we've arranged for our
       // iterators to visit the newly generated instructions, which do.
       return;
     }
   }


   // TODO: inout apply's should only clobber pieces passed in.

   // Otherwise, this is some unknown instruction, conservatively assume that all
   // values are clobbered.
   RequiredElts.clear();
   ConflictingValues = llvm::SmallBitVector(Result.size(), true);
   return;
 }


 /// Try to find available values of a set of subelements of the current value,
 /// starting right before the specified instruction.
 ///
 /// The bitvector indicates which subelements we're interested in, and result
 /// captures the available value (plus an indicator of which subelement of that
 /// value is needed).
 ///
 void AllocOptimize::
 computeAvailableValues(SILInstruction *StartingFrom,
                        llvm::SmallBitVector &RequiredElts,
                        SmallVectorImpl<std::pair<SILValue, unsigned>> &Result) {
   llvm::SmallDenseMap<SILBasicBlock*, llvm::SmallBitVector, 32> VisitedBlocks;
   llvm::SmallBitVector ConflictingValues(Result.size());

   computeAvailableValuesFrom(StartingFrom->getIterator(),
                              StartingFrom->getParent(), RequiredElts, Result,
                              VisitedBlocks, ConflictingValues);

   // If we have any conflicting values, explicitly mask them out of the result,
   // so we don't pick one arbitrary available value.
   if (!ConflictingValues.none())
     for (unsigned i = 0, e = Result.size(); i != e; ++i)
       if (ConflictingValues[i])
         Result[i] = { SILValue(), 0U };

   return;
 }

 void AllocOptimize::
 computeAvailableValuesFrom(SILBasicBlock::iterator StartingFrom,
                            SILBasicBlock *BB,
                            llvm::SmallBitVector &RequiredElts,
                          SmallVectorImpl<std::pair<SILValue, unsigned>> &Result,
    llvm::SmallDenseMap<SILBasicBlock*, llvm::SmallBitVector, 32> &VisitedBlocks,
                            llvm::SmallBitVector &ConflictingValues) {
   assert(!RequiredElts.none() && "Scanning with a goal of finding nothing?");

   // If there is a potential modification in the current block, scan the block
   // to see if the store or escape is before or after the load.  If it is
   // before, check to see if it produces the value we are looking for.
   if (HasLocalDefinition.count(BB)) {
     for (SILBasicBlock::iterator BBI = StartingFrom; BBI != BB->begin();) {
       SILInstruction *TheInst = &*std::prev(BBI);

       // If this instruction is unrelated to the element, ignore it.
       if (!NonLoadUses.count(TheInst)) {
         --BBI;
         continue;
       }

       // Given an interesting instruction, incorporate it into the set of
       // results, and filter down the list of demanded subelements that we still
       // need.
       updateAvailableValues(TheInst, RequiredElts, Result, ConflictingValues);

       // If this satisfied all of the demanded values, we're done.
       if (RequiredElts.none())
         return;

       // Otherwise, keep scanning the block.  If the instruction we were looking
       // at just got exploded, don't skip the next instruction.
       if (&*std::prev(BBI) == TheInst)
         --BBI;
     }
   }


   // Otherwise, we need to scan up the CFG looking for available values.
   for (auto PI = BB->pred_begin(), E = BB->pred_end(); PI != E; ++PI) {
     SILBasicBlock *PredBB = *PI;

     // If the predecessor block has already been visited (potentially due to a
     // cycle in the CFG), don't revisit it.  We can do this safely because we
     // are optimistically assuming that all incoming elements in a cycle will be
     // the same.  If we ever detect a conflicting element, we record it and do
     // not look at the result.
     auto Entry = VisitedBlocks.insert({PredBB, RequiredElts});
     if (!Entry.second) {
       // If we are revisiting a block and asking for different required elements
       // then anything that isn't agreeing is in conflict.
       const auto &PrevRequired = Entry.first->second;
       if (PrevRequired != RequiredElts) {
         ConflictingValues |= (PrevRequired ^ RequiredElts);

         RequiredElts &= ~ConflictingValues;
         if (RequiredElts.none())
           return;
       }
       continue;
     }

     // Make sure to pass in the same set of required elements for each pred.
     llvm::SmallBitVector Elts = RequiredElts;
     computeAvailableValuesFrom(PredBB->end(), PredBB, Elts, Result,
                                VisitedBlocks, ConflictingValues);

     // If we have any conflicting values, don't bother searching for them.
     RequiredElts &= ~ConflictingValues;
     if (RequiredElts.none())
       return;
   }
 }


 static bool anyMissing(unsigned StartSubElt, unsigned NumSubElts,
                        ArrayRef<std::pair<SILValue, unsigned>> &Values) {
   while (NumSubElts) {
     if (!Values[StartSubElt].first) return true;
     ++StartSubElt;
     --NumSubElts;
   }
   return false;
 }


 /// AggregateAvailableValues - Given a bunch of primitive subelement values,
 /// build out the right aggregate type (LoadTy) by emitting tuple and struct
 /// instructions as necessary.
 static SILValue
 AggregateAvailableValues(SILInstruction *Inst, SILType LoadTy,
                          SILValue Address,
                          ArrayRef<std::pair<SILValue, unsigned>> AvailableValues,
                          unsigned FirstElt) {
   assert(LoadTy.isObject());
   SILModule &M = Inst->getModule();

   // Check to see if the requested value is fully available, as an aggregate.
   // This is a super-common case for single-element structs, but is also a
   // general answer for arbitrary structs and tuples as well.
   if (FirstElt < AvailableValues.size()) {  // #Elements may be zero.
     SILValue FirstVal = AvailableValues[FirstElt].first;
     if (FirstVal && AvailableValues[FirstElt].second == 0 &&
         FirstVal->getType() == LoadTy) {
       // If the first element of this value is available, check any extra ones
       // before declaring success.
       bool AllMatch = true;
       for (unsigned i = 0, e = getNumSubElements(LoadTy, M); i != e; ++i)
         if (AvailableValues[FirstElt+i].first != FirstVal ||
             AvailableValues[FirstElt+i].second != i) {
           AllMatch = false;
           break;
         }

       if (AllMatch)
         return FirstVal;
     }
   }


   SILBuilderWithScope B(Inst);

   if (TupleType *TT = LoadTy.getAs<TupleType>()) {
     SmallVector<SILValue, 4> ResultElts;

     for (unsigned EltNo : indices(TT->getElements())) {
       SILType EltTy = LoadTy.getTupleElementType(EltNo);
       unsigned NumSubElt = getNumSubElements(EltTy, M);

       // If we are missing any of the available values in this struct element,
       // compute an address to load from.
       SILValue EltAddr;
       if (anyMissing(FirstElt, NumSubElt, AvailableValues))
         EltAddr = B.createTupleElementAddr(Inst->getLoc(), Address, EltNo,
                                            EltTy.getAddressType());

       ResultElts.push_back(AggregateAvailableValues(Inst, EltTy, EltAddr,
                                                     AvailableValues, FirstElt));
       FirstElt += NumSubElt;
     }

     return B.createTuple(Inst->getLoc(), LoadTy, ResultElts);
   }

   // Extract struct elements from fully referenceable structs.
   if (auto *SD = getFullyReferenceableStruct(LoadTy)) {
     SmallVector<SILValue, 4> ResultElts;

     for (auto *FD : SD->getStoredProperties()) {
       SILType EltTy = LoadTy.getFieldType(FD, M);
       unsigned NumSubElt = getNumSubElements(EltTy, M);

       // If we are missing any of the available values in this struct element,
       // compute an address to load from.
       SILValue EltAddr;
       if (anyMissing(FirstElt, NumSubElt, AvailableValues))
         EltAddr = B.createStructElementAddr(Inst->getLoc(), Address, FD,
                                             EltTy.getAddressType());

       ResultElts.push_back(AggregateAvailableValues(Inst, EltTy, EltAddr,
                                                     AvailableValues, FirstElt));
       FirstElt += NumSubElt;
     }
     return B.createStruct(Inst->getLoc(), LoadTy, ResultElts);
   }

   // Otherwise, we have a simple primitive.  If the value is available, use it,
   // otherwise emit a load of the value.
   auto Val = AvailableValues[FirstElt];
   if (!Val.first)
     return B.createLoad(Inst->getLoc(), Address,
                         LoadOwnershipQualifier::Unqualified);

   SILValue EltVal = ExtractSubElement(Val.first, Val.second, B, Inst->getLoc());
   // It must be the same type as LoadTy if available.
   assert(EltVal->getType() == LoadTy &&
          "Subelement types mismatch");
   return EltVal;
 }


 /// At this point, we know that this element satisfies the definitive init
 /// requirements, so we can try to promote loads to enable SSA-based dataflow
 /// analysis.  We know that accesses to this element only access this element,
 /// cross element accesses have been scalarized.
 ///
 /// This returns true if the load has been removed from the program.
 ///
 bool AllocOptimize::promoteLoad(SILInstruction *Inst) {
   // Note that we intentionally don't support forwarding of weak pointers,
   // because the underlying value may drop be deallocated at any time.  We would
   // have to prove that something in this function is holding the weak value
   // live across the promoted region and that isn't desired for a stable
   // diagnostics pass this like one.

   // We only handle load and copy_addr right now.
   if (auto CAI = dyn_cast<CopyAddrInst>(Inst)) {
     // If this is a CopyAddr, verify that the element type is loadable.  If not,
     // we can't explode to a load.
     if (!CAI->getSrc()->getType().isLoadable(Module))
       return false;
   } else if (!isa<LoadInst>(Inst))
     return false;

   // If the box has escaped at this instruction, we can't safely promote the
   // load.
   if (hasEscapedAt(Inst))
     return false;

   SILType LoadTy = Inst->getOperand(0)->getType().getObjectType();

   // If this is a load/copy_addr from a struct field that we want to promote,
   // compute the access path down to the field so we can determine precise
   // def/use behavior.
   unsigned FirstElt = computeSubelement(Inst->getOperand(0), TheMemory);

   // If this is a load from within an enum projection, we can't promote it since
   // we don't track subelements in a type that could be changing.
   if (FirstElt == ~0U)
     return false;

   unsigned NumLoadSubElements = getNumSubElements(LoadTy, Module);

   // Set up the bitvector of elements being demanded by the load.
   llvm::SmallBitVector RequiredElts(NumMemorySubElements);
   RequiredElts.set(FirstElt, FirstElt+NumLoadSubElements);

   SmallVector<std::pair<SILValue, unsigned>, 8> AvailableValues;
   AvailableValues.resize(NumMemorySubElements);

   // Find out if we have any available values.  If no bits are demanded, we
   // trivially succeed. This can happen when there is a load of an empty struct.
   if (NumLoadSubElements != 0) {
     computeAvailableValues(Inst, RequiredElts, AvailableValues);

     // If there are no values available at this load point, then we fail to
     // promote this load and there is nothing to do.
     bool AnyAvailable = false;
     for (unsigned i = FirstElt, e = i+NumLoadSubElements; i != e; ++i)
       if (AvailableValues[i].first) {
         AnyAvailable = true;
         break;
       }

     if (!AnyAvailable)
       return false;
   }

   // Ok, we have some available values.  If we have a copy_addr, explode it now,
   // exposing the load operation within it.  Subsequent optimization passes will
   // see the load and propagate the available values into it.
   if (auto *CAI = dyn_cast<CopyAddrInst>(Inst)) {
     explodeCopyAddr(CAI);

     // This is removing the copy_addr, but explodeCopyAddr takes care of
     // removing the instruction from Uses for us, so we return false.
     return false;
   }

   // Aggregate together all of the subelements into something that has the same
   // type as the load did, and emit smaller) loads for any subelements that were
   // not available.
   auto NewVal = AggregateAvailableValues(Inst, LoadTy, Inst->getOperand(0),
                                          AvailableValues, FirstElt);

   ++NumLoadPromoted;

   // Simply replace the load.
   assert(isa<LoadInst>(Inst));
   DEBUG(llvm::dbgs() << "  *** Promoting load: " << *Inst << "\n");
   DEBUG(llvm::dbgs() << "      To value: " << *NewVal << "\n");

   Inst->replaceAllUsesWith(NewVal);
   SILValue Addr = Inst->getOperand(0);
   Inst->eraseFromParent();
   if (auto *AddrI = dyn_cast<SILInstruction>(Addr))
     recursivelyDeleteTriviallyDeadInstructions(AddrI);
   return true;
 }

 /// promoteDestroyAddr - DestroyAddr is a composed operation merging
 /// load+strong_release.  If the implicit load's value is available, explode it.
 ///
 /// Note that we handle the general case of a destroy_addr of a piece of the
 /// memory object, not just destroy_addrs of the entire thing.
 ///
 bool AllocOptimize::promoteDestroyAddr(DestroyAddrInst *DAI) {
   SILValue Address = DAI->getOperand();

   // We cannot promote destroys of address-only types, because we can't expose
   // the load.
   SILType LoadTy = Address->getType().getObjectType();
   if (LoadTy.isAddressOnly(Module))
     return false;

   // If the box has escaped at this instruction, we can't safely promote the
   // load.
   if (hasEscapedAt(DAI))
     return false;

   // Compute the access path down to the field so we can determine precise
   // def/use behavior.
   unsigned FirstElt = computeSubelement(Address, TheMemory);
   assert(FirstElt != ~0U && "destroy within enum projection is not valid");
   unsigned NumLoadSubElements = getNumSubElements(LoadTy, Module);

   // Set up the bitvector of elements being demanded by the load.
   llvm::SmallBitVector RequiredElts(NumMemorySubElements);
   RequiredElts.set(FirstElt, FirstElt+NumLoadSubElements);

   SmallVector<std::pair<SILValue, unsigned>, 8> AvailableValues;
   AvailableValues.resize(NumMemorySubElements);

   // Find out if we have any available values.  If no bits are demanded, we
   // trivially succeed. This can happen when there is a load of an empty struct.
   if (NumLoadSubElements != 0) {
     computeAvailableValues(DAI, RequiredElts, AvailableValues);

     // If some value is not available at this load point, then we fail.
     for (unsigned i = FirstElt, e = FirstElt+NumLoadSubElements; i != e; ++i)
       if (!AvailableValues[i].first)
         return false;
   }

   // Aggregate together all of the subelements into something that has the same
   // type as the load did, and emit smaller) loads for any subelements that were
   // not available.
   auto NewVal =
   AggregateAvailableValues(DAI, LoadTy, Address, AvailableValues, FirstElt);

   ++NumDestroyAddrPromoted;

   DEBUG(llvm::dbgs() << "  *** Promoting destroy_addr: " << *DAI << "\n");
   DEBUG(llvm::dbgs() << "      To value: " << *NewVal << "\n");

   SILBuilderWithScope(DAI).emitDestroyValueOperation(DAI->getLoc(), NewVal);
   DAI->eraseFromParent();
   return true;
 }


 /// Explode a copy_addr instruction of a loadable type into lower level
 /// operations like loads, stores, retains, releases, retain_value, etc.
 void AllocOptimize::explodeCopyAddr(CopyAddrInst *CAI) {
   DEBUG(llvm::dbgs() << "  -- Exploding copy_addr: " << *CAI << "\n");

   SILType ValTy = CAI->getDest()->getType().getObjectType();
   auto &TL = Module.getTypeLowering(ValTy);

   // Keep track of the new instructions emitted.
   SmallVector<SILInstruction*, 4> NewInsts;
   SILBuilder B(CAI, &NewInsts);
   B.setCurrentDebugScope(CAI->getDebugScope());

   // Use type lowering to lower the copyaddr into a load sequence + store
   // sequence appropriate for the type.
   SILValue StoredValue = TL.emitLoadOfCopy(B, CAI->getLoc(), CAI->getSrc(),
                                            CAI->isTakeOfSrc());

   TL.emitStoreOfCopy(B, CAI->getLoc(), StoredValue, CAI->getDest(),
                      CAI->isInitializationOfDest());

   // Update our internal state for this being gone.
   NonLoadUses.erase(CAI);

   // Remove the copy_addr from Uses.  A single copy_addr can appear multiple
   // times if the source and dest are to elements within a single aggregate, but
   // we only want to pick up the CopyAddrKind from the store.
   DIMemoryUse LoadUse, StoreUse;
   for (auto &Use : Uses) {
     if (Use.Inst != CAI) continue;

     if (Use.Kind == DIUseKind::Load) {
       assert(LoadUse.isInvalid());
       LoadUse = Use;
     } else {
       assert(StoreUse.isInvalid());
       StoreUse = Use;
     }

     Use.Inst = nullptr;

     // Keep scanning in case the copy_addr appears multiple times.
   }

   assert((LoadUse.isValid() || StoreUse.isValid()) &&
          "we should have a load or a store, possibly both");
   assert(StoreUse.isInvalid() || StoreUse.Kind == Assign ||
          StoreUse.Kind == PartialStore || StoreUse.Kind == Initialization);

   // Now that we've emitted a bunch of instructions, including a load and store
   // but also including other stuff, update the internal state of
   // LifetimeChecker to reflect them.

   // Update the instructions that touch the memory.  NewInst can grow as this
   // iterates, so we can't use a foreach loop.
   for (auto *NewInst : NewInsts) {
     switch (NewInst->getKind()) {
     default:
       NewInst->dump();
       llvm_unreachable("Unknown instruction generated by copy_addr lowering");

     case ValueKind::StoreInst:
       // If it is a store to the memory object (as oppose to a store to
       // something else), track it as an access.
       if (StoreUse.isValid()) {
         StoreUse.Inst = NewInst;
         NonLoadUses[NewInst] = Uses.size();
         Uses.push_back(StoreUse);
       }
       continue;

     case ValueKind::LoadInst:
       // If it is a load from the memory object (as oppose to a load from
       // something else), track it as an access.  We need to explicitly check to
       // see if the load accesses "TheMemory" because it could either be a load
       // for the copy_addr source, or it could be a load corresponding to the
       // "assign" operation on the destination of the copyaddr.
       if (LoadUse.isValid() &&
           getAccessPathRoot(NewInst->getOperand(0)) == TheMemory) {
         LoadUse.Inst = NewInst;
         Uses.push_back(LoadUse);
       }
       continue;

     case ValueKind::RetainValueInst:
     case ValueKind::StrongRetainInst:
     case ValueKind::StrongReleaseInst:
     case ValueKind::UnownedRetainInst:
     case ValueKind::UnownedReleaseInst:
     case ValueKind::ReleaseValueInst:   // Destroy overwritten value
       // These are ignored.
       continue;
     }
   }

   // Next, remove the copy_addr itself.
   CAI->eraseFromParent();
 }

 /// tryToRemoveDeadAllocation - If the allocation is an autogenerated allocation
 /// that is only stored to (after load promotion) then remove it completely.
 bool AllocOptimize::tryToRemoveDeadAllocation() {
   assert((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) &&
          "Unhandled allocation case");

   // We don't want to remove allocations that are required for useful debug
   // information at -O0.  As such, we only remove allocations if:
   //
   // 1. They are in a transparent function.
   // 2. They are in a normal function, but didn't come from a VarDecl, or came
   //    from one that was autogenerated or inlined from a transparent function.
   SILLocation Loc = TheMemory->getLoc();
   if (!TheMemory->getFunction()->isTransparent() &&
       Loc.getAsASTNode<VarDecl>() && !Loc.isAutoGenerated() &&
       !Loc.is<MandatoryInlinedLocation>())
     return false;

   // Check the uses list to see if there are any non-store uses left over after
   // load promotion and other things DI does.
   for (auto &U : Uses) {
     // Ignore removed instructions.
     if (U.Inst == nullptr) continue;

     switch (U.Kind) {
     case DIUseKind::SelfInit:
     case DIUseKind::SuperInit:
       llvm_unreachable("Can't happen on allocations");
     case DIUseKind::Assign:
     case DIUseKind::PartialStore:
     case DIUseKind::InitOrAssign:
       break;    // These don't prevent removal.
     case DIUseKind::Initialization:
       if (!isa<ApplyInst>(U.Inst) &&
           // A copy_addr that is not a take affects the retain count
           // of the source.
           (!isa<CopyAddrInst>(U.Inst) ||
            cast<CopyAddrInst>(U.Inst)->isTakeOfSrc()))
         break;
       // FALL THROUGH.
      LLVM_FALLTHROUGH;
     case DIUseKind::Load:
     case DIUseKind::IndirectIn:
     case DIUseKind::InOutUse:
     case DIUseKind::Escape:
       DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: "
             "kept alive by: " << *U.Inst);
       return false;   // These do prevent removal.
     }
   }

   // If the memory object has non-trivial type, then removing the deallocation
   // will drop any releases.  Check that there is nothing preventing removal.
   if (!MemoryType.isTrivial(Module)) {
     for (auto *R : Releases) {
       if (R == nullptr || isa<DeallocStackInst>(R) || isa<DeallocBoxInst>(R))
         continue;

       DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: "
             "kept alive by release: " << *R);
       return false;
     }
   }

   DEBUG(llvm::dbgs() << "*** Removing autogenerated alloc_stack: "<<*TheMemory);

   // If it is safe to remove, do it.  Recursively remove all instructions
   // hanging off the allocation instruction, then return success.  Let the
   // caller remove the allocation itself to avoid iterator invalidation.
   eraseUsesOfInstruction(TheMemory);

   return true;
 }

 /// doIt - returns true on error.
 bool AllocOptimize::doIt() {
   bool Changed = false;

   // Don't try to optimize incomplete aggregates.
   if (MemoryType.aggregateHasUnreferenceableStorage())
     return false;

   // If we've successfully checked all of the definitive initialization
   // requirements, try to promote loads.  This can explode copy_addrs, so the
   // use list may change size.
   for (unsigned i = 0; i != Uses.size(); ++i) {
     auto &Use = Uses[i];
     // Ignore entries for instructions that got expanded along the way.
     if (Use.Inst && Use.Kind == DIUseKind::Load)
       if (promoteLoad(Use.Inst)) {
         Uses[i].Inst = nullptr;  // remove entry if load got deleted.
         Changed = true;
       }
   }

   // destroy_addr(p) is strong_release(load(p)), try to promote it too.
   for (unsigned i = 0; i != Releases.size(); ++i) {
     if (auto *DAI = dyn_cast_or_null<DestroyAddrInst>(Releases[i]))
       if (promoteDestroyAddr(DAI)) {
         // remove entry if destroy_addr got deleted.
         Releases[i] = nullptr;
         Changed = true;
       }
   }

   // If this is an allocation, try to remove it completely.
   if (!isa<MarkUninitializedInst>(TheMemory)
       && !isa<MarkUninitializedBehaviorInst>(TheMemory))
     Changed |= tryToRemoveDeadAllocation();

   return Changed;
  }


 static bool optimizeMemoryAllocations(SILFunction &Fn) {
   bool Changed = false;
   for (auto &BB : Fn) {
     auto I = BB.begin(), E = BB.end();
     while (I != E) {
       SILInstruction *Inst = &*I;
       if (!isa<AllocBoxInst>(Inst) && !isa<AllocStackInst>(Inst)) {
         ++I;
         continue;
       }

       DEBUG(llvm::dbgs() << "*** DI Optimize looking at: " << *Inst << "\n");
       DIMemoryObjectInfo MemInfo(Inst);

       // Set up the datastructure used to collect the uses of the allocation.
       SmallVector<DIMemoryUse, 16> Uses;
       SmallVector<TermInst*, 1> FailableInits;
       SmallVector<SILInstruction*, 4> Releases;

       // Walk the use list of the pointer, collecting them.
       collectDIElementUsesFrom(MemInfo, Uses, FailableInits, Releases, true,
                                /*TreatAddressToPointerAsInout*/ false);

       Changed |= AllocOptimize(Inst, Uses, Releases).doIt();

       // Carefully move iterator to avoid invalidation problems.
       ++I;
       if (Inst->use_empty()) {
         Inst->eraseFromParent();
         ++NumAllocRemoved;
         Changed = true;
       }
     }
   }
   return Changed;
 }

 namespace {
 class PredictableMemoryOptimizations : public SILFunctionTransform {

   /// The entry point to the transformation.
   void run() override {
     if (optimizeMemoryAllocations(*getFunction()))
       invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody);
   }

   StringRef getName() override { return "Predictable Memory Opts"; }
 };
 } // end anonymous namespace


 SILTransform *swift::createPredictableMemoryOptimizations() {
   return new PredictableMemoryOptimizations();
 }