| //===--- RedundantLoadElimination.cpp - SIL Load Forwarding ---------------===// |
| // |
| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See https://swift.org/LICENSE.txt for license information |
| // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// |
| /// This pass eliminates redundant loads. |
| /// |
| /// A load can be eliminated if its value has already been held somewhere, |
| /// i.e. generated by a previous load, LSLocation stored by a known value. |
| /// |
| /// In this case, one can replace the load instruction with the previous |
| /// results. |
| /// |
| /// Redundant Load Elimination (RLE) eliminates such loads by: |
| /// |
| /// 1. Introducing a notion of a LSLocation that is used to model object |
| /// fields. (See below for more details). |
| /// |
| /// 2. Introducing a notion of a LSValue that is used to model the value |
| /// that currently resides in the associated LSLocation on the particular |
| /// program path. (See below for more details). |
| /// |
| /// 3. Performing a RPO walk over the control flow graph, tracking any |
| /// LSLocations that are read from or stored into in each basic block. The |
| /// read or stored value, kept in a map between LSLocation and LSValue, |
| /// becomes the available value for the LSLocation. |
| /// |
| /// 4. An optimistic iterative intersection-based dataflow is performed on the |
| /// gensets until convergence. |
| /// |
| /// At the core of RLE, there is the LSLocation class. A LSLocation is an |
| /// abstraction of an object field in program. It consists of a base and a |
| /// projection path to the field accessed. |
| /// |
| /// In SIL, one can access an aggregate as a whole, i.e. store to a struct with |
| /// 2 Int fields. A store like this will generate 2 *indivisible* LSLocations, |
| /// 1 for each field and in addition to keeping a list of LSLocation, RLE also |
| /// keeps their available LSValues. We call it *indivisible* because it |
| /// cannot be broken down to more LSLocations. |
| /// |
| /// LSValue consists of a base - a SILValue from the load or store inst, |
| /// as well as a projection path to which the field it represents. So, a |
| /// store to an 2-field struct as mentioned above will generate 2 LSLocations |
| /// and 2 LSValues. |
| /// |
| /// Every basic block keeps a map between LSLocation and LSValue. By |
| /// keeping the LSLocation and LSValue in their indivisible form, one |
| /// can easily find which part of the load is redundant and how to compute its |
| /// forwarding value. |
| /// |
| /// Given the case which the 2 fields of the struct both have available values, |
| /// RLE can find their LSValues (maybe by struct_extract from a larger |
| /// value) and then aggregate them. |
| /// |
| /// However, this may introduce a lot of extraction and aggregation which may |
| /// not be necessary. i.e. a store the struct followed by a load from the |
| /// struct. To solve this problem, when RLE detects that a load instruction |
| /// can be replaced by forwarded value, it will try to find minimum # of |
| /// extractions necessary to form the forwarded value. It will group the |
| /// available value's by the LSValue base, i.e. the LSValues come from the |
| /// same instruction, and then use extraction to obtain the needed components |
| /// of the base. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #define DEBUG_TYPE "sil-redundant-load-elim" |
| #include "swift/SIL/Projection.h" |
| #include "swift/SIL/SILArgument.h" |
| #include "swift/SIL/SILBuilder.h" |
| #include "swift/SILOptimizer/Analysis/AliasAnalysis.h" |
| #include "swift/SILOptimizer/Analysis/ARCAnalysis.h" |
| #include "swift/SILOptimizer/Analysis/DominanceAnalysis.h" |
| #include "swift/SILOptimizer/Analysis/PostOrderAnalysis.h" |
| #include "swift/SILOptimizer/Analysis/ValueTracking.h" |
| #include "swift/SILOptimizer/PassManager/Passes.h" |
| #include "swift/SILOptimizer/PassManager/Transforms.h" |
| #include "swift/SILOptimizer/Utils/CFG.h" |
| #include "swift/SILOptimizer/Utils/Local.h" |
| #include "swift/SILOptimizer/Utils/LoadStoreOptUtils.h" |
| #include "swift/SILOptimizer/Utils/SILSSAUpdater.h" |
| #include "llvm/ADT/BitVector.h" |
| #include "llvm/ADT/MapVector.h" |
| #include "llvm/ADT/None.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Debug.h" |
| |
| using namespace swift; |
| |
| STATISTIC(NumForwardedLoads, "Number of loads forwarded"); |
| |
| /// Return the deallocate stack instructions corresponding to the given |
| /// AllocStackInst. |
| static SILInstruction *findAllocStackInst(SILInstruction *I) { |
| if (DeallocStackInst *DSI = dyn_cast<DeallocStackInst>(I)) |
| return dyn_cast<SILInstruction>(DSI->getOperand()); |
| return nullptr; |
| } |
| |
| /// ComputeAvailSetMax - If we ignore all unknown writes, what is the max |
| /// available set that can reach the a certain point in a basic block. This |
| /// helps generating the genset and killset. i.e. if there is no downward visible |
| /// value that can reach the end of a basic block, then we know that the genset |
| /// and killset for the location need not be set. |
| /// |
| /// ComputeAvailGenKillSet - Build the genset and killset of the basic block. |
| /// |
| /// ComputeAvailValue - Compute the available value at the end of the basic |
| /// block. |
| /// |
| /// PerformRLE - Perform the actual redundant load elimination. |
| enum class RLEKind : unsigned { |
| ComputeAvailSetMax = 0, |
| ComputeAvailGenKillSet = 1, |
| ComputeAvailValue = 2, |
| PerformRLE = 3, |
| }; |
| |
| //===----------------------------------------------------------------------===// |
| // Utility Functions |
| //===----------------------------------------------------------------------===// |
| |
| static bool inline isComputeAvailSetMax(RLEKind Kind) { |
| return Kind == RLEKind::ComputeAvailSetMax; |
| } |
| |
| static bool inline isComputeAvailGenKillSet(RLEKind Kind) { |
| return Kind == RLEKind::ComputeAvailGenKillSet; |
| } |
| |
| static bool inline isComputeAvailValue(RLEKind Kind) { |
| return Kind == RLEKind::ComputeAvailValue; |
| } |
| |
| static bool inline isPerformingRLE(RLEKind Kind) { |
| return Kind == RLEKind::PerformRLE; |
| } |
| |
| /// Returns true if this is an instruction that may have side effects in a |
| /// general sense but are inert from a load store perspective. |
| static bool isRLEInertInstruction(SILInstruction *Inst) { |
| switch (Inst->getKind()) { |
| case ValueKind::StrongRetainInst: |
| case ValueKind::StrongRetainUnownedInst: |
| case ValueKind::UnownedRetainInst: |
| case ValueKind::RetainValueInst: |
| case ValueKind::DeallocStackInst: |
| case ValueKind::CondFailInst: |
| case ValueKind::IsUniqueInst: |
| case ValueKind::IsUniqueOrPinnedInst: |
| case ValueKind::FixLifetimeInst: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Basic Block Location State |
| //===----------------------------------------------------------------------===// |
| namespace { |
| |
| /// If this function has too many basic blocks or too many locations, it may |
| /// take a long time to compute the genset and killset. The number of memory |
| /// behavior or alias query we need to do in worst case is roughly linear to |
| /// # of BBs x(times) # of locations. |
| /// |
| /// we could run RLE on functions with 128 basic blocks and 128 locations, |
| /// which is a large function. |
| constexpr unsigned MaxLSLocationBBMultiplicationNone = 128*128; |
| |
| /// we could run optimistic RLE on functions with less than 64 basic blocks |
| /// and 64 locations which is a sizable function. |
| constexpr unsigned MaxLSLocationBBMultiplicationPessimistic = 64*64; |
| |
| /// forward declaration. |
| class RLEContext; |
| |
| /// State of the load store in one basic block which allows for forwarding from |
| /// loads, stores -> loads |
| class BlockState { |
| public: |
| enum class ValueState : unsigned { |
| CoverValues = 0, |
| ConcreteValues = 1, |
| CoverAndConcreteValues = 2, |
| }; |
| |
| private: |
| /// # of locations in the LocationVault. |
| unsigned LocationNum; |
| |
| /// The basic block that we are optimizing. |
| SILBasicBlock *BB; |
| |
| /// A bit vector for which the ith bit represents the ith LSLocation in |
| /// LocationVault. If the bit is set, then the location currently has an |
| /// downward visible value at the beginning of the basic block. |
| llvm::SmallBitVector ForwardSetIn; |
| |
| /// A bit vector for which the ith bit represents the ith LSLocation in |
| /// LocationVault. If the bit is set, then the location currently has an |
| /// downward visible value at the end of the basic block. |
| llvm::SmallBitVector ForwardSetOut; |
| |
| /// A bit vector for which the ith bit represents the ith LSLocation in |
| /// LocationVault. If we ignore all unknown write, what's the maximum set |
| /// of available locations at the current position in the basic block. |
| llvm::SmallBitVector ForwardSetMax; |
| |
| /// A bit vector for which the ith bit represents the ith LSLocation in |
| /// LocationVault. If the bit is set, then the basic block generates a |
| /// value for the location. |
| llvm::SmallBitVector BBGenSet; |
| |
| /// A bit vector for which the ith bit represents the ith LSLocation in |
| /// LocationVault. If the bit is set, then the basic block kills the |
| /// value for the location. |
| llvm::SmallBitVector BBKillSet; |
| |
| /// This is map between LSLocations and their available values at the |
| /// beginning of this basic block. |
| ValueTableMap ForwardValIn; |
| |
| /// This is map between LSLocations and their available values at the end of |
| /// this basic block. |
| ValueTableMap ForwardValOut; |
| |
| /// Keeps a list of replaceable instructions in the current basic block as |
| /// well as their SILValue replacement. |
| llvm::DenseMap<SILInstruction *, SILValue> RedundantLoads; |
| |
| /// LSLocation read or written has been extracted, expanded and mapped to the |
| /// bit position in the bitvector. Update it in the ForwardSetIn of the |
| /// current basic block. |
| void updateForwardSetForRead(RLEContext &Ctx, unsigned B); |
| void updateForwardSetForWrite(RLEContext &Ctx, unsigned B); |
| |
| /// LSLocation read or written has been extracted, expanded and mapped to the |
| /// B position in the Bvector. Update it in the genset and killset of the |
| /// current basic block. |
| void updateGenKillSetForRead(RLEContext &Ctx, unsigned B); |
| void updateGenKillSetForWrite(RLEContext &Ctx, unsigned B); |
| |
| /// LSLocation read or written has been extracted, expanded and mapped to the |
| /// B position in the Bvector. Update it in the MaxAvailForwardSet of the |
| /// current basic block. |
| void updateMaxAvailSetForRead(RLEContext &Ctx, unsigned B); |
| void updateMaxAvailSetForWrite(RLEContext &Ctx, unsigned B); |
| |
| /// LSLocation written has been extracted, expanded and mapped to the bit |
| /// position in the bitvector. process it using the bit position. |
| void updateForwardSetAndValForRead(RLEContext &Ctx, unsigned L, unsigned V); |
| void updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned L, unsigned V); |
| |
| /// There is a read to a LSLocation, expand the LSLocation into individual |
| /// fields before processing them. |
| void processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem, |
| SILValue Val, RLEKind Kind); |
| |
| /// There is a write to a LSLocation, expand the LSLocation into individual |
| /// fields before processing them. |
| void processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem, |
| SILValue Val, RLEKind Kind); |
| |
| /// BitVector manipulation functions. |
| void startTrackingLocation(llvm::SmallBitVector &BV, unsigned B); |
| void stopTrackingLocation(llvm::SmallBitVector &BV, unsigned B); |
| bool isTrackingLocation(llvm::SmallBitVector &BV, unsigned B); |
| void startTrackingValue(ValueTableMap &VM, unsigned L, unsigned V); |
| void stopTrackingValue(ValueTableMap &VM, unsigned B); |
| |
| public: |
| BlockState() = default; |
| |
| void init(SILBasicBlock *NewBB, unsigned bitcnt, bool optimistic) { |
| BB = NewBB; |
| LocationNum = bitcnt; |
| // For reachable basic blocks, the initial state of ForwardSetOut should be |
| // all 1's. Otherwise the dataflow solution could be too conservative. |
| // |
| // Consider this case, the forwardable value by var a = 10 before the loop |
| // will not be forwarded if the ForwardSetOut is set to 0 initially. |
| // |
| // var a = 10 |
| // for _ in 0...1024 {} |
| // use(a); |
| // |
| // However, by doing so, we can only do the data forwarding after the |
| // data flow stabilizes. |
| // |
| // We set the initial state of unreachable block to 0, as we do not have |
| // a value for the location. |
| // |
| // This is a bit conservative as we could be missing forwarding |
| // opportunities. i.e. a joint block with 1 predecessor being an |
| // unreachable block. |
| // |
| // we rely on other passes to clean up unreachable block. |
| ForwardSetIn.resize(LocationNum, false); |
| ForwardSetOut.resize(LocationNum, optimistic); |
| |
| // If we are running an optimistic data flow, set forward max to true |
| // initially. |
| ForwardSetMax.resize(LocationNum, optimistic); |
| |
| BBGenSet.resize(LocationNum, false); |
| BBKillSet.resize(LocationNum, false); |
| } |
| |
| /// Initialize the AvailSetMax by intersecting this basic block's |
| /// predecessors' AvailSetMax. |
| void mergePredecessorsAvailSetMax(RLEContext &Ctx); |
| |
| /// Initialize the AvailSet by intersecting this basic block' predecessors' |
| /// AvailSet. |
| void mergePredecessorAvailSet(RLEContext &Ctx); |
| |
| /// Initialize the AvailSet and AvailVal of the current basic block. |
| void mergePredecessorAvailSetAndValue(RLEContext &Ctx); |
| |
| /// Reached the end of the basic block, update the ForwardValOut with the |
| /// ForwardValIn. |
| void updateForwardValOut() { ForwardValOut = ForwardValIn; } |
| |
| /// Check whether the ForwardSetOut has changed. If it does, we need to |
| /// rerun the data flow to reach fixed point. |
| bool updateForwardSetOut() { |
| if (ForwardSetIn == ForwardSetOut) |
| return false; |
| ForwardSetOut = ForwardSetIn; |
| return true; |
| } |
| |
| /// Returns the current basic block we are processing. |
| SILBasicBlock *getBB() const { return BB; } |
| |
| /// Returns the ForwardValIn for the current basic block. |
| ValueTableMap &getForwardValIn() { return ForwardValIn; } |
| |
| /// Returns the ForwardValOut for the current basic block. |
| ValueTableMap &getForwardValOut() { return ForwardValOut; } |
| |
| /// Returns the redundant loads and their replacement in the currently basic |
| /// block. |
| llvm::DenseMap<SILInstruction *, SILValue> &getRL() { return RedundantLoads; } |
| |
| /// Look into the value for the given LSLocation at end of the basic block, |
| /// return one of the three ValueState type. |
| ValueState getValueStateAtEndOfBlock(RLEContext &Ctx, LSLocation &L); |
| |
| /// Wrappers to query the value state of the location in this BlockState. |
| bool isCoverValues(RLEContext &Ctx, LSLocation &L) { |
| return getValueStateAtEndOfBlock(Ctx, L) == ValueState::CoverValues; |
| } |
| bool isConcreteValues(RLEContext &Ctx, LSLocation &L) { |
| return getValueStateAtEndOfBlock(Ctx, L) == ValueState::ConcreteValues; |
| } |
| |
| /// Iterate over the instructions in the basic block in forward order and |
| /// process them w.r.t. the given \p Kind. |
| void processInstructionWithKind(RLEContext &Ctx, SILInstruction *I, |
| RLEKind Kind); |
| void processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind); |
| |
| /// Process the current basic block with the genset and killset. Return true |
| /// if the ForwardSetOut changes. |
| bool processBasicBlockWithGenKillSet(); |
| |
| /// Set up the value for redundant load elimination. |
| bool setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem); |
| |
| /// Process Instruction which writes to memory in an unknown way. |
| void processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I, |
| RLEKind Kind); |
| void processUnknownWriteInstForGenKillSet(RLEContext &Ctx, SILInstruction *I); |
| void processUnknownWriteInstForRLE(RLEContext &Ctx, SILInstruction *I); |
| |
| |
| void processDeallocStackInst(RLEContext &Ctx, SILInstruction *I, |
| RLEKind Kind); |
| void processDeallocStackInstForGenKillSet(RLEContext &Ctx, SILInstruction *I); |
| void processDeallocStackInstForRLE(RLEContext &Ctx, SILInstruction *I); |
| |
| /// Process LoadInst. Extract LSLocations from LoadInst. |
| void processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind); |
| |
| /// Process LoadInst. Extract LSLocations from StoreInst. |
| void processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind); |
| |
| /// Returns a *single* forwardable SILValue for the given LSLocation right |
| /// before the InsertPt instruction. |
| SILValue reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L); |
| |
| #ifndef NDEBUG |
| void dump(RLEContext &Ctx); |
| #endif |
| }; |
| |
| } // end anonymous namespace |
| |
| //===----------------------------------------------------------------------===// |
| // RLEContext Interface |
| //===----------------------------------------------------------------------===// |
| |
| namespace { |
| |
| using BBValueMap = llvm::DenseMap<SILBasicBlock *, SILValue>; |
| |
| /// This class stores global state that we use when computing redundant load and |
| /// their replacement in each basic block. |
| class RLEContext { |
| enum class ProcessKind { |
| ProcessMultipleIterations = 0, |
| ProcessOneIteration = 1, |
| ProcessNone = 2, |
| }; |
| private: |
| /// Function currently processing. |
| SILFunction *Fn; |
| |
| /// The passmanager we are using. |
| SILPassManager *PM; |
| |
| /// The alias analysis that we will use during all computations. |
| AliasAnalysis *AA; |
| |
| /// The type expansion analysis we will use during all computations. |
| TypeExpansionAnalysis *TE; |
| |
| /// The SSA updater we use to materialize covering values. |
| SILSSAUpdater Updater; |
| |
| /// The range that we use to iterate over the post order and reverse post |
| /// order of the given function. |
| PostOrderFunctionInfo *PO; |
| |
| /// Epilogue release analysis. |
| EpilogueARCFunctionInfo *EAFI; |
| |
| /// Keeps all the locations for the current function. The BitVector in each |
| /// BlockState is then laid on top of it to keep track of which LSLocation |
| /// has a downward available value. |
| std::vector<LSLocation> LocationVault; |
| |
| /// Contains a map between LSLocation to their index in the LocationVault. |
| /// Use for fast lookup. |
| LSLocationIndexMap LocToBitIndex; |
| |
| /// Keeps a map between the accessed SILValue and the location. |
| LSLocationBaseMap BaseToLocIndex; |
| |
| /// Keeps all the loadstorevalues for the current function. The BitVector in |
| /// each g is then laid on top of it to keep track of which LSLocation |
| /// has a downward available value. |
| std::vector<LSValue> LSValueVault; |
| |
| /// Contains a map between LSLocation to their index in the LocationVault. |
| /// Use for fast lookup. |
| llvm::DenseMap<LSValue, unsigned> ValToBitIndex; |
| |
| /// A map from each BasicBlock to its BlockState. |
| llvm::SmallDenseMap<SILBasicBlock *, BlockState, 16> BBToLocState; |
| |
| /// Keeps a list of basic blocks that have LoadInsts. If a basic block does |
| /// not have LoadInst, we do not actually perform the last iteration where |
| /// RLE is actually performed on the basic block. |
| /// |
| /// NOTE: This is never populated for functions which will only require 1 |
| /// data flow iteration. For function that requires more than 1 iteration of |
| /// the data flow this is populated when the first time the functions is |
| /// walked, i.e. when the we generate the genset and killset. |
| llvm::DenseSet<SILBasicBlock *> BBWithLoads; |
| |
| #ifndef NDEBUG |
| SILPrintContext printCtx; |
| #endif |
| |
| public: |
| RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA, |
| TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO, |
| EpilogueARCFunctionInfo *EAFI); |
| |
| RLEContext(const RLEContext &) = delete; |
| RLEContext(RLEContext &&) = default; |
| ~RLEContext() = default; |
| |
| /// Entry point to redundant load elimination. |
| bool run(); |
| |
| SILFunction *getFunction() const { return Fn; } |
| |
| /// Use a set of ad hoc rules to tell whether we should run a pessimistic |
| /// one iteration data flow on the function. |
| ProcessKind getProcessFunctionKind(unsigned LoadCount, unsigned StoreCount); |
| |
| /// Run the iterative data flow until convergence. |
| void runIterativeRLE(); |
| |
| /// Process the basic blocks for the gen and kill set. |
| void processBasicBlocksForGenKillSet(); |
| |
| /// Process the basic blocks with the gen and kill set. |
| void processBasicBlocksWithGenKillSet(); |
| |
| /// Process the basic block for values generated in the current basic |
| /// block. |
| void processBasicBlocksForAvailValue(); |
| |
| /// Process basic blocks to perform the redundant load elimination. |
| void processBasicBlocksForRLE(bool Optimistic); |
| |
| /// Returns the alias analysis we will use during all computations. |
| AliasAnalysis *getAA() const { return AA; } |
| |
| /// Returns the current type expansion analysis we are . |
| TypeExpansionAnalysis *getTE() const { return TE; } |
| |
| /// Returns current epilogue release function info we are using. |
| EpilogueARCFunctionInfo *getEAFI() const { return EAFI; } |
| |
| /// Returns the SILValue base to bit index. |
| LSLocationBaseMap &getBM() { return BaseToLocIndex; } |
| |
| /// Return the BlockState for the basic block this basic block belongs to. |
| BlockState &getBlockState(SILBasicBlock *B) { return BBToLocState[B]; } |
| |
| /// Get the bit representing the LSLocation in the LocationVault. |
| unsigned getLocationBit(const LSLocation &L); |
| |
| /// Given the bit, get the LSLocation from the LocationVault. |
| LSLocation &getLocation(const unsigned index); |
| |
| /// Get the bit representing the LSValue in the LSValueVault. |
| unsigned getValueBit(const LSValue &L); |
| |
| /// Given the bit, get the LSValue from the LSValueVault. |
| LSValue &getValue(const unsigned index); |
| |
| /// Given a LSLocation, try to collect all the LSValues for this LSLocation |
| /// in the given basic block. If part of the locations have covering values, |
| /// find the values in its predecessors. |
| bool collectLocationValues(SILBasicBlock *BB, LSLocation &L, |
| LSLocationValueMap &Values, ValueTableMap &VM); |
| |
| /// Transitively collect all the values that make up this location and |
| /// create a SILArgument out of them. |
| SILValue computePredecessorLocationValue(SILBasicBlock *BB, LSLocation &L); |
| }; |
| |
| } // end anonymous namespace |
| |
| void BlockState::startTrackingValue(ValueTableMap &VM, unsigned L, unsigned V) { |
| VM[L] = V; |
| } |
| |
| void BlockState::stopTrackingValue(ValueTableMap &VM, unsigned B) { |
| VM.erase(B); |
| } |
| |
| bool BlockState::isTrackingLocation(llvm::SmallBitVector &BV, unsigned B) { |
| return BV.test(B); |
| } |
| |
| void BlockState::startTrackingLocation(llvm::SmallBitVector &BV, unsigned B) { |
| BV.set(B); |
| } |
| |
| void BlockState::stopTrackingLocation(llvm::SmallBitVector &BV, unsigned B) { |
| BV.reset(B); |
| } |
| |
| void BlockState::mergePredecessorsAvailSetMax(RLEContext &Ctx) { |
| if (BB->pred_empty()) { |
| ForwardSetMax.reset(); |
| return; |
| } |
| |
| auto Iter = BB->pred_begin(); |
| ForwardSetMax = Ctx.getBlockState(*Iter).ForwardSetMax; |
| Iter = std::next(Iter); |
| for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) { |
| ForwardSetMax &= Ctx.getBlockState(*Iter).ForwardSetMax; |
| } |
| } |
| |
| void BlockState::mergePredecessorAvailSet(RLEContext &Ctx) { |
| // Clear the state if the basic block has no predecessor. |
| if (BB->getPredecessorBlocks().begin() == BB->getPredecessorBlocks().end()) { |
| ForwardSetIn.reset(); |
| return; |
| } |
| |
| auto Iter = BB->pred_begin(); |
| ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut; |
| Iter = std::next(Iter); |
| for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) { |
| ForwardSetIn &= Ctx.getBlockState(*Iter).ForwardSetOut; |
| } |
| } |
| |
| void BlockState::mergePredecessorAvailSetAndValue(RLEContext &Ctx) { |
| // Clear the state if the basic block has no predecessor. |
| if (BB->getPredecessorBlocks().begin() == BB->getPredecessorBlocks().end()) { |
| ForwardSetIn.reset(); |
| ForwardValIn.clear(); |
| return; |
| } |
| |
| auto Iter = BB->pred_begin(); |
| ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut; |
| ForwardValIn = Ctx.getBlockState(*Iter).ForwardValOut; |
| Iter = std::next(Iter); |
| for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) { |
| BlockState &OtherState = Ctx.getBlockState(*Iter); |
| ForwardSetIn &= OtherState.ForwardSetOut; |
| |
| // Merge in the predecessor state. |
| for (unsigned i = 0; i < LocationNum; ++i) { |
| if (OtherState.ForwardSetOut[i]) { |
| // There are multiple values from multiple predecessors, set this as |
| // a covering value. We do not need to track the value itself, as we |
| // can always go to the predecessors BlockState to find it. |
| ForwardValIn[i] = Ctx.getValueBit(LSValue(true)); |
| continue; |
| } |
| // If this location does have an available value, then clear it. |
| stopTrackingValue(ForwardValIn, i); |
| stopTrackingLocation(ForwardSetIn, i); |
| } |
| } |
| } |
| |
| void BlockState::processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind) { |
| // Iterate over instructions in forward order. |
| for (auto &II : *BB) { |
| processInstructionWithKind(Ctx, &II, Kind); |
| } |
| } |
| |
| bool BlockState::processBasicBlockWithGenKillSet() { |
| ForwardSetIn.reset(BBKillSet); |
| ForwardSetIn |= BBGenSet; |
| return updateForwardSetOut(); |
| } |
| |
| SILValue BlockState::reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L) { |
| // First, collect current available locations and their corresponding values |
| // into a map. |
| LSLocationValueMap Values; |
| |
| LSLocationList Locs; |
| LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE()); |
| |
| // Find the values that this basic block defines and the locations which |
| // we do not have a concrete value in the current basic block. |
| ValueTableMap &OTM = getForwardValOut(); |
| for (unsigned i = 0; i < Locs.size(); ++i) { |
| Values[Locs[i]] = Ctx.getValue(OTM[Ctx.getLocationBit(Locs[i])]); |
| } |
| |
| // Second, reduce the available values into a single SILValue we can use to |
| // forward. |
| SILValue TheForwardingValue; |
| TheForwardingValue = LSValue::reduce(L, &BB->getModule(), Values, |
| BB->getTerminator()); |
| /// Return the forwarding value. |
| return TheForwardingValue; |
| } |
| |
| bool BlockState::setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem) { |
| // Try to construct a SILValue for the current LSLocation. |
| // |
| // Collect the locations and their corresponding values into a map. |
| LSLocation L; |
| LSLocationBaseMap &BaseToLocIndex = Ctx.getBM(); |
| if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) { |
| L = BaseToLocIndex[Mem]; |
| } else { |
| SILValue UO = getUnderlyingObject(Mem); |
| L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem)); |
| } |
| |
| LSLocationValueMap Values; |
| // Use the ForwardValIn as we are currently processing the basic block. |
| if (!Ctx.collectLocationValues(I->getParent(), L, Values, getForwardValIn())) |
| return false; |
| |
| // Reduce the available values into a single SILValue we can use to forward. |
| SILModule *Mod = &I->getModule(); |
| SILValue TheForwardingValue = LSValue::reduce(L, Mod, Values, I); |
| if (!TheForwardingValue) |
| return false; |
| |
| // Now we have the forwarding value, record it for forwarding!. |
| // |
| // NOTE: we do not perform the RLE right here because doing so could introduce |
| // new LSLocations. |
| // |
| // e.g. |
| // %0 = load %x |
| // %1 = load %x |
| // %2 = extract_struct %1, #a |
| // %3 = load %2 |
| // |
| // If we perform the RLE and replace %1 with %0, we end up having a memory |
| // location we do not have before, i.e. Base == %0, and Path == #a. |
| // |
| // We may be able to add the LSLocation to the vault, but it gets |
| // complicated very quickly, e.g. we need to resize the bit vectors size, |
| // etc. |
| // |
| // However, since we already know the instruction to replace and the value to |
| // replace it with, we can record it for now and forwarded it after all the |
| // forwardable values are recorded in the function. |
| // |
| RedundantLoads[I] = TheForwardingValue; |
| |
| DEBUG(llvm::dbgs() << "FORWARD " << TheForwardingValue << " to" << *I); |
| return true; |
| } |
| |
| void BlockState::updateForwardSetForRead(RLEContext &Ctx, unsigned B) { |
| startTrackingLocation(ForwardSetIn, B); |
| } |
| |
| void BlockState::updateGenKillSetForRead(RLEContext &Ctx, unsigned B) { |
| startTrackingLocation(BBGenSet, B); |
| stopTrackingLocation(BBKillSet, B); |
| } |
| |
| void BlockState::updateForwardSetAndValForRead(RLEContext &Ctx, unsigned L, |
| unsigned V) { |
| // Track the new location and value. |
| startTrackingValue(ForwardValIn, L, V); |
| startTrackingLocation(ForwardSetIn, L); |
| } |
| |
| void BlockState::updateGenKillSetForWrite(RLEContext &Ctx, unsigned B) { |
| // This is a store, invalidate any location that this location may alias, as |
| // their values can no longer be forwarded. |
| LSLocation &R = Ctx.getLocation(B); |
| for (unsigned i = 0; i < LocationNum; ++i) { |
| if (!isTrackingLocation(ForwardSetMax, i)) |
| continue; |
| LSLocation &L = Ctx.getLocation(i); |
| if (!L.isMayAliasLSLocation(R, Ctx.getAA())) |
| continue; |
| // MayAlias, invalidate the location. |
| stopTrackingLocation(BBGenSet, i); |
| startTrackingLocation(BBKillSet, i); |
| } |
| |
| // Start tracking this location. |
| startTrackingLocation(BBGenSet, B); |
| stopTrackingLocation(BBKillSet, B); |
| } |
| |
| void BlockState::updateMaxAvailSetForWrite(RLEContext &Ctx, unsigned B) { |
| startTrackingLocation(ForwardSetMax, B); |
| } |
| |
| void BlockState::updateMaxAvailSetForRead(RLEContext &Ctx, unsigned B) { |
| startTrackingLocation(ForwardSetMax, B); |
| } |
| |
| void BlockState::updateForwardSetForWrite(RLEContext &Ctx, unsigned B) { |
| // This is a store, invalidate any location that this location may alias, as |
| // their values can no longer be forwarded. |
| LSLocation &R = Ctx.getLocation(B); |
| for (unsigned i = 0; i < LocationNum; ++i) { |
| if (!isTrackingLocation(ForwardSetIn, i)) |
| continue; |
| LSLocation &L = Ctx.getLocation(i); |
| if (!L.isMayAliasLSLocation(R, Ctx.getAA())) |
| continue; |
| // MayAlias, invalidate the location. |
| stopTrackingLocation(ForwardSetIn, i); |
| } |
| |
| // Start tracking this location. |
| startTrackingLocation(ForwardSetIn, B); |
| } |
| |
| void BlockState::updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned L, |
| unsigned V) { |
| // This is a store, invalidate any location that this location may alias, as |
| // their values can no longer be forwarded. |
| LSLocation &R = Ctx.getLocation(L); |
| for (unsigned i = 0; i < LocationNum; ++i) { |
| if (!isTrackingLocation(ForwardSetIn, i)) |
| continue; |
| LSLocation &L = Ctx.getLocation(i); |
| if (!L.isMayAliasLSLocation(R, Ctx.getAA())) |
| continue; |
| // MayAlias, invalidate the location and value. |
| stopTrackingValue(ForwardValIn, i); |
| stopTrackingLocation(ForwardSetIn, i); |
| } |
| |
| // Start tracking this location and value. |
| startTrackingLocation(ForwardSetIn, L); |
| startTrackingValue(ForwardValIn, L, V); |
| } |
| |
| void BlockState::processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem, |
| SILValue Val, RLEKind Kind) { |
| // Initialize the LSLocation. |
| LSLocation L; |
| LSLocationBaseMap &BaseToLocIndex = Ctx.getBM(); |
| if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) { |
| L = BaseToLocIndex[Mem]; |
| } else { |
| SILValue UO = getUnderlyingObject(Mem); |
| L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem)); |
| } |
| |
| // If we can't figure out the Base or Projection Path for the write, |
| // process it as an unknown memory instruction. |
| if (!L.isValid()) { |
| // we can ignore unknown store instructions if we are computing the |
| // AvailSetMax. |
| if (!isComputeAvailSetMax(Kind)) { |
| processUnknownWriteInst(Ctx, I, Kind); |
| } |
| return; |
| } |
| |
| // Expand the given location and val into individual fields and process |
| // them as separate writes. |
| LSLocationList Locs; |
| LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE()); |
| |
| if (isComputeAvailSetMax(Kind)) { |
| for (unsigned i = 0; i < Locs.size(); ++i) { |
| updateMaxAvailSetForWrite(Ctx, Ctx.getLocationBit(Locs[i])); |
| } |
| return; |
| } |
| |
| // Are we computing the genset and killset ? |
| if (isComputeAvailGenKillSet(Kind)) { |
| for (unsigned i = 0; i < Locs.size(); ++i) { |
| updateGenKillSetForWrite(Ctx, Ctx.getLocationBit(Locs[i])); |
| } |
| return; |
| } |
| |
| // Are we computing available value or performing RLE? |
| LSValueList Vals; |
| LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE()); |
| if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) { |
| for (unsigned i = 0; i < Locs.size(); ++i) { |
| updateForwardSetAndValForWrite(Ctx, Ctx.getLocationBit(Locs[i]), |
| Ctx.getValueBit(Vals[i])); |
| } |
| return; |
| } |
| |
| llvm_unreachable("Unknown RLE compute kind"); |
| } |
| |
| void BlockState::processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem, |
| SILValue Val, RLEKind Kind) { |
| // Initialize the LSLocation. |
| LSLocation L; |
| LSLocationBaseMap &BaseToLocIndex = Ctx.getBM(); |
| if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) { |
| L = BaseToLocIndex[Mem]; |
| } else { |
| SILValue UO = getUnderlyingObject(Mem); |
| L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem)); |
| } |
| |
| // If we can't figure out the Base or Projection Path for the read, simply |
| // ignore it for now. |
| if (!L.isValid()) |
| return; |
| |
| // Expand the given LSLocation and Val into individual fields and process |
| // them as separate reads. |
| LSLocationList Locs; |
| LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE()); |
| |
| if (isComputeAvailSetMax(Kind)) { |
| for (unsigned i = 0; i < Locs.size(); ++i) { |
| updateMaxAvailSetForRead(Ctx, Ctx.getLocationBit(Locs[i])); |
| } |
| return; |
| } |
| |
| // Are we computing the genset and killset. |
| if (isComputeAvailGenKillSet(Kind)) { |
| for (unsigned i = 0; i < Locs.size(); ++i) { |
| updateGenKillSetForRead(Ctx, Ctx.getLocationBit(Locs[i])); |
| } |
| return; |
| } |
| |
| // Are we computing available values ?. |
| bool CanForward = true; |
| LSValueList Vals; |
| LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE()); |
| if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) { |
| for (unsigned i = 0; i < Locs.size(); ++i) { |
| if (isTrackingLocation(ForwardSetIn, Ctx.getLocationBit(Locs[i]))) |
| continue; |
| updateForwardSetAndValForRead(Ctx, Ctx.getLocationBit(Locs[i]), |
| Ctx.getValueBit(Vals[i])); |
| // We can not perform the forwarding as we are at least missing |
| // some pieces of the read location. |
| CanForward = false; |
| } |
| } |
| |
| // Simply return if we are not performing RLE or we do not have all the |
| // values available to perform RLE. |
| if (!isPerformingRLE(Kind) || !CanForward) |
| return; |
| |
| // Lastly, forward value to the load. |
| setupRLE(Ctx, I, Mem); |
| } |
| |
| void BlockState::processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind) { |
| processWrite(Ctx, SI, SI->getDest(), SI->getSrc(), Kind); |
| } |
| |
| void BlockState::processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind) { |
| processRead(Ctx, LI, LI->getOperand(), SILValue(LI), Kind); |
| } |
| |
| void BlockState::processUnknownWriteInstForGenKillSet(RLEContext &Ctx, |
| SILInstruction *I) { |
| auto *AA = Ctx.getAA(); |
| for (unsigned i = 0; i < LocationNum; ++i) { |
| if (!isTrackingLocation(ForwardSetMax, i)) |
| continue; |
| // Invalidate any location this instruction may write to. |
| // |
| // TODO: checking may alias with Base is overly conservative, |
| // we should check may alias with base plus projection path. |
| LSLocation &R = Ctx.getLocation(i); |
| if (!AA->mayWriteToMemory(I, R.getBase())) |
| continue; |
| // MayAlias. |
| stopTrackingLocation(BBGenSet, i); |
| startTrackingLocation(BBKillSet, i); |
| } |
| } |
| |
| void BlockState::processUnknownWriteInstForRLE(RLEContext &Ctx, |
| SILInstruction *I) { |
| auto *AA = Ctx.getAA(); |
| for (unsigned i = 0; i < LocationNum; ++i) { |
| if (!isTrackingLocation(ForwardSetIn, i)) |
| continue; |
| // Invalidate any location this instruction may write to. |
| // |
| // TODO: checking may alias with Base is overly conservative, |
| // we should check may alias with base plus projection path. |
| LSLocation &R = Ctx.getLocation(i); |
| if (!AA->mayWriteToMemory(I, R.getBase())) |
| continue; |
| // MayAlias. |
| stopTrackingLocation(ForwardSetIn, i); |
| stopTrackingValue(ForwardValIn, i); |
| } |
| } |
| |
| void BlockState::processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I, |
| RLEKind Kind) { |
| // If this is a release on a guaranteed parameter, it can not call deinit, |
| // which might read or write memory. |
| if (isIntermediateRelease(I, Ctx.getEAFI())) |
| return; |
| |
| // Are we computing the genset and killset ? |
| if (isComputeAvailGenKillSet(Kind)) { |
| processUnknownWriteInstForGenKillSet(Ctx, I); |
| return; |
| } |
| |
| // Are we computing the available value or doing RLE ? |
| if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) { |
| processUnknownWriteInstForRLE(Ctx, I); |
| return; |
| } |
| |
| llvm_unreachable("Unknown RLE compute kind"); |
| } |
| |
| void BlockState:: |
| processDeallocStackInstForGenKillSet(RLEContext &Ctx, SILInstruction *I) { |
| SILValue ASI = findAllocStackInst(I); |
| for (unsigned i = 0; i < LocationNum; ++i) { |
| LSLocation &R = Ctx.getLocation(i); |
| if (R.getBase() != ASI) |
| continue; |
| // MayAlias. |
| stopTrackingLocation(BBGenSet, i); |
| startTrackingLocation(BBKillSet, i); |
| } |
| } |
| |
| void BlockState:: |
| processDeallocStackInstForRLE(RLEContext &Ctx, SILInstruction *I) { |
| SILValue ASI = findAllocStackInst(I); |
| for (unsigned i = 0; i < LocationNum; ++i) { |
| LSLocation &R = Ctx.getLocation(i); |
| if (R.getBase() != ASI) |
| continue; |
| // MayAlias. |
| stopTrackingLocation(ForwardSetIn, i); |
| stopTrackingValue(ForwardValIn, i); |
| } |
| } |
| |
| void BlockState:: |
| processDeallocStackInst(RLEContext &Ctx, SILInstruction *I, RLEKind Kind) { |
| // Are we computing the genset and killset ? |
| if (isComputeAvailGenKillSet(Kind)) { |
| processDeallocStackInstForGenKillSet(Ctx, I); |
| return; |
| } |
| |
| // Are we computing the available value or doing RLE ? |
| if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) { |
| processDeallocStackInstForRLE(Ctx, I); |
| return; |
| } |
| |
| llvm_unreachable("Unknown RLE compute kind"); |
| } |
| |
| |
| void BlockState::processInstructionWithKind(RLEContext &Ctx, |
| SILInstruction *Inst, |
| RLEKind Kind) { |
| // This is a StoreInst, try to see whether it clobbers any forwarding value |
| if (auto *SI = dyn_cast<StoreInst>(Inst)) { |
| processStoreInst(Ctx, SI, Kind); |
| return; |
| } |
| |
| // This is a LoadInst. Let's see if we can find a previous loaded, stored |
| // value to use instead of this load. |
| if (auto *LI = dyn_cast<LoadInst>(Inst)) { |
| processLoadInst(Ctx, LI, Kind); |
| return; |
| } |
| |
| if (auto *DSI = dyn_cast<DeallocStackInst>(Inst)) { |
| processDeallocStackInst(Ctx, DSI, Kind); |
| return; |
| } |
| |
| // If this instruction has side effects, but is inert from a load store |
| // perspective, skip it. |
| if (isRLEInertInstruction(Inst)) |
| return; |
| |
| // If this instruction does not read or write memory, we can skip it. |
| if (!Inst->mayReadOrWriteMemory()) |
| return; |
| |
| // If we have an instruction that may write to memory and we cannot prove |
| // that it and its operands cannot alias a load we have visited, |
| // invalidate that load. |
| if (Inst->mayWriteToMemory()) { |
| DEBUG(llvm::dbgs() << "WRITE " << *Inst); |
| processUnknownWriteInst(Ctx, Inst, Kind); |
| return; |
| } |
| DEBUG(llvm::dbgs() << "READ " << *Inst); |
| } |
| |
| RLEContext::ProcessKind |
| RLEContext:: |
| getProcessFunctionKind(unsigned LoadCount, unsigned StoreCount) { |
| // Don't optimize function that are marked as 'no.optimize'. |
| if (!Fn->shouldOptimize()) |
| return ProcessKind::ProcessNone; |
| |
| // Really no point optimizing here as there is no forwardable loads. |
| if (LoadCount + StoreCount < 2) |
| return ProcessKind::ProcessNone; |
| |
| bool RunOneIteration = true; |
| unsigned BBCount = 0; |
| unsigned LocationCount = LocationVault.size(); |
| |
| if (LocationCount == 0) |
| return ProcessKind::ProcessNone; |
| |
| // If all basic blocks will have their predecessors processed if |
| // the basic blocks in the functions are iterated in post order. |
| // Then this function can be processed in one iteration, i.e. no |
| // need to generate the genset and killset. |
| auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(Fn); |
| llvm::DenseSet<SILBasicBlock *> HandledBBs; |
| for (SILBasicBlock *B : PO->getReversePostOrder()) { |
| ++BBCount; |
| for (auto X : B->getPredecessorBlocks()) { |
| if (HandledBBs.find(X) == HandledBBs.end()) { |
| RunOneIteration = false; |
| break; |
| } |
| } |
| HandledBBs.insert(B); |
| } |
| |
| // Data flow may take too long to run. |
| if (BBCount * LocationCount > MaxLSLocationBBMultiplicationNone) |
| return ProcessKind::ProcessNone; |
| |
| // This function's data flow would converge in 1 iteration. |
| if (RunOneIteration) |
| return ProcessKind::ProcessOneIteration; |
| |
| // We run one pessimistic data flow to do dead store elimination on |
| // the function. |
| if (BBCount * LocationCount > MaxLSLocationBBMultiplicationPessimistic) |
| return ProcessKind::ProcessOneIteration; |
| |
| return ProcessKind::ProcessMultipleIterations; |
| } |
| |
| #ifndef NDEBUG |
| void BlockState::dump(RLEContext &Ctx) { |
| for (unsigned i = 0; i < LocationNum; ++i) { |
| if (!isTrackingLocation(ForwardSetMax, i)) |
| continue; |
| |
| llvm::dbgs() << "Loc #" << i << ":" << (BBGenSet[i] ? " Gen" : "") |
| << (BBKillSet[i] ? " Kill" : ""); |
| if (!ForwardSetIn.empty() && ForwardSetIn.test(i)) { |
| llvm::dbgs() << " IN "; |
| ValueTableMap::const_iterator inIter = ForwardValIn.find(i); |
| if (inIter != ForwardValIn.end()) { |
| if (SILValue base = Ctx.getValue(inIter->second).getBase()) |
| llvm::dbgs() << base; |
| else |
| llvm::dbgs() << "no base"; |
| } |
| } |
| if (!ForwardSetOut.empty() && ForwardSetOut.test(i)) { |
| llvm::dbgs() << " OUT "; |
| ValueTableMap::const_iterator outIter = ForwardValOut.find(i); |
| if (outIter != ForwardValOut.end()) { |
| if (SILValue base = Ctx.getValue(outIter->second).getBase()) |
| llvm::dbgs() << base; |
| else |
| llvm::dbgs() << "no base"; |
| } |
| } |
| llvm::dbgs() << "\n"; |
| } |
| } |
| #endif |
| |
| //===----------------------------------------------------------------------===// |
| // RLEContext Implementation |
| //===----------------------------------------------------------------------===// |
| |
| RLEContext::RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA, |
| TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO, |
| EpilogueARCFunctionInfo *EAFI) |
| : Fn(F), PM(PM), AA(AA), TE(TE), PO(PO), EAFI(EAFI) |
| #ifndef NDEBUG |
| , |
| printCtx(llvm::dbgs(), /*Verbose=*/false, /*Sorted=*/true) |
| #endif |
| { |
| } |
| |
| LSLocation &RLEContext::getLocation(const unsigned index) { |
| return LocationVault[index]; |
| } |
| |
| unsigned RLEContext::getLocationBit(const LSLocation &Loc) { |
| // Return the bit position of the given Loc in the LocationVault. The bit |
| // position is then used to set/reset the bitvector kept by each BlockState. |
| // |
| // We should have the location populated by the enumerateLSLocation at this |
| // point. |
| auto Iter = LocToBitIndex.find(Loc); |
| assert(Iter != LocToBitIndex.end() && "Location should have been enum'ed"); |
| return Iter->second; |
| } |
| |
| LSValue &RLEContext::getValue(const unsigned index) { |
| return LSValueVault[index]; |
| } |
| |
| unsigned RLEContext::getValueBit(const LSValue &Val) { |
| // Return the bit position of the given Val in the LSValueVault. The bit |
| // position is then used to set/reset the bitvector kept by each g. |
| auto Iter = ValToBitIndex.find(Val); |
| |
| // We do not walk over the function and find all the possible LSValues |
| // in this function, as some of the these values will not be used, i.e. |
| // if the LoadInst that generates this value is actually RLE'ed. |
| // Instead, we create the LSValues when we need them. |
| if (Iter == ValToBitIndex.end()) { |
| ValToBitIndex[Val] = LSValueVault.size(); |
| LSValueVault.push_back(Val); |
| return ValToBitIndex[Val]; |
| } |
| return Iter->second; |
| } |
| |
| BlockState::ValueState BlockState::getValueStateAtEndOfBlock(RLEContext &Ctx, |
| LSLocation &L) { |
| // Find number of covering value and concrete values for the locations |
| // expanded from the given location. |
| unsigned CSCount = 0, CTCount = 0; |
| LSLocationList Locs; |
| LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE()); |
| |
| ValueTableMap &OTM = getForwardValOut(); |
| for (auto &X : Locs) { |
| LSValue &V = Ctx.getValue(OTM[Ctx.getLocationBit(X)]); |
| if (V.isCoveringValue()) { |
| ++CSCount; |
| continue; |
| } |
| ++CTCount; |
| } |
| |
| if (CSCount == Locs.size()) |
| return ValueState::CoverValues; |
| if (CTCount == Locs.size()) |
| return ValueState::ConcreteValues; |
| return ValueState::CoverAndConcreteValues; |
| } |
| |
| SILValue RLEContext::computePredecessorLocationValue(SILBasicBlock *BB, |
| LSLocation &L) { |
| BBValueMap Values; |
| llvm::DenseSet<SILBasicBlock *> HandledBBs; |
| llvm::SmallVector<SILBasicBlock *, 8> WorkList; |
| |
| // Push in all the predecessors to get started. |
| for (auto Pred : BB->getPredecessorBlocks()) { |
| WorkList.push_back(Pred); |
| } |
| |
| while (!WorkList.empty()) { |
| auto *CurBB = WorkList.pop_back_val(); |
| BlockState &Forwarder = getBlockState(CurBB); |
| |
| // Mark this basic block as processed. |
| HandledBBs.insert(CurBB); |
| |
| // There are 3 cases that can happen here. |
| // |
| // 1. The current basic block contains concrete values for the entire |
| // location. |
| // 2. The current basic block contains covering values for the entire |
| // location. |
| // 3. The current basic block contains concrete value for part of the |
| // location and covering values for the rest. |
| // |
| // This BlockState contains concrete values for all the expanded |
| // locations, collect and reduce them into a single value in the current |
| // basic block. |
| if (Forwarder.isConcreteValues(*this, L)) { |
| Values[CurBB] = Forwarder.reduceValuesAtEndOfBlock(*this, L); |
| continue; |
| } |
| |
| // This BlockState does not contain concrete value for any of the expanded |
| // locations, collect in this block's predecessors. |
| if (Forwarder.isCoverValues(*this, L)) { |
| for (auto Pred : CurBB->getPredecessorBlocks()) { |
| if (HandledBBs.find(Pred) != HandledBBs.end()) |
| continue; |
| WorkList.push_back(Pred); |
| } |
| continue; |
| } |
| |
| // This block contains concrete values for some but not all the expanded |
| // locations, recursively call collectLocationValues to materialize the |
| // value that reaches this basic block. |
| LSLocationValueMap LSValues; |
| if (!collectLocationValues(CurBB, L, LSValues, Forwarder.getForwardValOut())) |
| return SILValue(); |
| |
| // Reduce the available values into a single SILValue we can use to forward |
| SILInstruction *IPt = CurBB->getTerminator(); |
| Values[CurBB] = LSValue::reduce(L, &BB->getModule(), LSValues, IPt); |
| } |
| |
| // Finally, collect all the values for the SILArgument, materialize it using |
| // the SSAUpdater. |
| Updater.Initialize(L.getType(&BB->getModule()).getObjectType()); |
| for (auto V : Values) { |
| Updater.AddAvailableValue(V.first, V.second); |
| } |
| |
| return Updater.GetValueInMiddleOfBlock(BB); |
| } |
| |
| bool RLEContext::collectLocationValues(SILBasicBlock *BB, LSLocation &L, |
| LSLocationValueMap &Values, |
| ValueTableMap &VM) { |
| LSLocationSet CSLocs; |
| LSLocationList Locs; |
| LSLocation::expand(L, &BB->getModule(), Locs, TE); |
| |
| auto *Mod = &BB->getModule(); |
| // Find the locations that this basic block defines and the locations which |
| // we do not have a concrete value in the current basic block. |
| for (auto &X : Locs) { |
| Values[X] = getValue(VM[getLocationBit(X)]); |
| if (!Values[X].isCoveringValue()) |
| continue; |
| CSLocs.insert(X); |
| } |
| |
| // For locations which we do not have concrete values for in this basic |
| // block, try to reduce it to the minimum # of locations possible, this |
| // will help us to generate as few SILArguments as possible. |
| LSLocation::reduce(L, Mod, CSLocs); |
| |
| // To handle covering value, we need to go to the predecessors and |
| // materialize them there. |
| for (auto &X : CSLocs) { |
| SILValue V = computePredecessorLocationValue(BB, X); |
| if (!V) |
| return false; |
| |
| // We've constructed a concrete value for the covering value. Expand and |
| // collect the newly created forwardable values. |
| LSLocationList Locs; |
| LSValueList Vals; |
| LSLocation::expand(X, Mod, Locs, TE); |
| LSValue::expand(V, Mod, Vals, TE); |
| |
| for (unsigned i = 0; i < Locs.size(); ++i) { |
| Values[Locs[i]] = Vals[i]; |
| assert(Values[Locs[i]].isValid() && "Invalid load store value"); |
| } |
| } |
| return true; |
| } |
| |
| void RLEContext::processBasicBlocksForGenKillSet() { |
| for (SILBasicBlock *BB : PO->getReversePostOrder()) { |
| DEBUG(llvm::dbgs() << "PROCESS " << printCtx.getID(BB) |
| << " for Gen/Kill:\n"; |
| BB->print(llvm::dbgs(), printCtx)); |
| |
| BlockState &S = getBlockState(BB); |
| |
| // Compute the AvailSetMax at the beginning of the basic block. |
| S.mergePredecessorsAvailSetMax(*this); |
| |
| // Compute the genset and killset. |
| // |
| // To optimize this process, we also compute the AvailSetMax at particular |
| // point in the basic block. |
| for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { |
| if (auto *LI = dyn_cast<LoadInst>(&*I)) { |
| if (BBWithLoads.find(BB) == BBWithLoads.end()) |
| BBWithLoads.insert(BB); |
| S.processLoadInst(*this, LI, RLEKind::ComputeAvailSetMax); |
| } |
| if (auto *SI = dyn_cast<StoreInst>(&*I)) { |
| S.processStoreInst(*this, SI, RLEKind::ComputeAvailSetMax); |
| } |
| |
| S.processInstructionWithKind(*this, &*I, RLEKind::ComputeAvailGenKillSet); |
| } |
| DEBUG(S.dump(*this)); |
| } |
| } |
| |
| void RLEContext::processBasicBlocksWithGenKillSet() { |
| // Process each basic block with the gen and kill set. Every time the |
| // ForwardSetOut of a basic block changes, the optimization is rerun on its |
| // successors. |
| llvm::SmallVector<SILBasicBlock *, 16> WorkList; |
| llvm::DenseSet<SILBasicBlock *> HandledBBs; |
| |
| // Push into the worklist in post order so that we can pop from the back and |
| // get reverse post order. |
| for (SILBasicBlock *BB : PO->getPostOrder()) { |
| WorkList.push_back(BB); |
| HandledBBs.insert(BB); |
| } |
| while (!WorkList.empty()) { |
| SILBasicBlock *BB = WorkList.pop_back_val(); |
| DEBUG(llvm::dbgs() << "PROCESS " << printCtx.getID(BB) |
| << " with Gen/Kill.\n"); |
| HandledBBs.erase(BB); |
| |
| // Intersection. |
| BlockState &Forwarder = getBlockState(BB); |
| // Compute the ForwardSetIn at the beginning of the basic block. |
| Forwarder.mergePredecessorAvailSet(*this); |
| |
| if (Forwarder.processBasicBlockWithGenKillSet()) { |
| for (auto &X : BB->getSuccessors()) { |
| // We do not push basic block into the worklist if its already |
| // in the worklist. |
| if (HandledBBs.find(X) != HandledBBs.end()) |
| continue; |
| WorkList.push_back(X); |
| } |
| } |
| DEBUG(Forwarder.dump(*this)); |
| } |
| } |
| |
| void RLEContext::processBasicBlocksForAvailValue() { |
| for (SILBasicBlock *BB : PO->getReversePostOrder()) { |
| DEBUG(llvm::dbgs() << "PROCESS " << printCtx.getID(BB) |
| << " for available.\n"); |
| |
| BlockState &Forwarder = getBlockState(BB); |
| |
| // Merge the predecessors. After merging, BlockState now contains |
| // lists of available LSLocations and their values that reach the |
| // beginning of the basic block along all paths. |
| Forwarder.mergePredecessorAvailSetAndValue(*this); |
| |
| // Merge duplicate loads, and forward stores to |
| // loads. We also update lists of stores|loads to reflect the end |
| // of the basic block. |
| Forwarder.processBasicBlockWithKind(*this, RLEKind::ComputeAvailValue); |
| |
| // Update the locations with available values. We do not need to update |
| // the available BitVector here as they should have been initialized and |
| // stabilized in the processBasicBlocksWithGenKillSet. |
| Forwarder.updateForwardValOut(); |
| |
| DEBUG(Forwarder.dump(*this)); |
| } |
| } |
| |
| void RLEContext::processBasicBlocksForRLE(bool Optimistic) { |
| for (SILBasicBlock *BB : PO->getReversePostOrder()) { |
| DEBUG(llvm::dbgs() << "PROCESS " << printCtx.getID(BB) << " for RLE.\n"); |
| |
| // If we know this is not a one iteration function which means its |
| // forward sets have been computed and converged, |
| // and this basic block does not even have LoadInsts, there is no point |
| // in processing every instruction in the basic block again as no store |
| // will be eliminated. |
| if (Optimistic && BBWithLoads.find(BB) == BBWithLoads.end()) |
| continue; |
| |
| BlockState &Forwarder = getBlockState(BB); |
| |
| // Merge the predecessors. After merging, BlockState now contains |
| // lists of available LSLocations and their values that reach the |
| // beginning of the basic block along all paths. |
| Forwarder.mergePredecessorAvailSetAndValue(*this); |
| |
| DEBUG(Forwarder.dump(*this)); |
| |
| // Perform the actual redundant load elimination. |
| Forwarder.processBasicBlockWithKind(*this, RLEKind::PerformRLE); |
| |
| // If this is not a one iteration data flow, then the forward sets |
| // have been computed. |
| if (Optimistic) |
| continue; |
| |
| // Update the locations with available values and their values. |
| Forwarder.updateForwardSetOut(); |
| Forwarder.updateForwardValOut(); |
| } |
| } |
| |
| void RLEContext::runIterativeRLE() { |
| // Generate the genset and killset for every basic block. |
| processBasicBlocksForGenKillSet(); |
| |
| // Process basic blocks in RPO. After the data flow converges, run last |
| // iteration and perform load forwarding. |
| processBasicBlocksWithGenKillSet(); |
| |
| // We have computed the available value bit, now go through every basic |
| // block and compute the forwarding value locally. This is necessary as |
| // when we perform the RLE in the last iteration, we must handle loops, i.e. |
| // predecessor blocks which have not been processed when a basic block is |
| // processed. |
| processBasicBlocksForAvailValue(); |
| } |
| |
| bool RLEContext::run() { |
| // We perform redundant load elimination in the following phases. |
| // |
| // Phase 1. Compute the genset and killset for every basic block. |
| // |
| // Phase 2. Use an iterative data flow to compute whether there is an |
| // available value at a given point, we do not yet care about what the value |
| // is. |
| // |
| // Phase 3. we compute the real forwardable value at a given point. |
| // |
| // Phase 4. we perform the redundant load elimination. |
| // Walk over the function and find all the locations accessed by |
| // this function. |
| std::pair<int, int> LSCount = std::make_pair(0, 0); |
| LSLocation::enumerateLSLocations(*Fn, LocationVault, |
| LocToBitIndex, |
| BaseToLocIndex, TE, |
| LSCount); |
| |
| // Check how to optimize this function. |
| ProcessKind Kind = getProcessFunctionKind(LSCount.first, LSCount.second); |
| |
| // We do not optimize this function at all. |
| if (Kind == ProcessKind::ProcessNone) |
| return false; |
| |
| // Do we run a multi-iteration data flow ? |
| bool Optimistic = Kind == ProcessKind::ProcessMultipleIterations ? |
| true : false; |
| |
| // These are a list of basic blocks that we actually processed. |
| // We do not process unreachable block, instead we set their liveouts to nil. |
| llvm::DenseSet<SILBasicBlock *> BBToProcess; |
| for (auto X : PO->getPostOrder()) |
| BBToProcess.insert(X); |
| |
| // For all basic blocks in the function, initialize a BB state. Since we |
| // know all the locations accessed in this function, we can resize the bit |
| // vector to the appropriate size. |
| for (auto &B : *Fn) { |
| BBToLocState[&B] = BlockState(); |
| BBToLocState[&B].init(&B, LocationVault.size(), Optimistic && |
| BBToProcess.find(&B) != BBToProcess.end()); |
| } |
| |
| DEBUG(for (unsigned i = 0; i < LocationVault.size(); ++i) { |
| llvm::dbgs() << "LSLocation #" << i; |
| getLocation(i).print(llvm::dbgs(), &Fn->getModule()); |
| }); |
| |
| if (Optimistic) |
| runIterativeRLE(); |
| |
| // We have the available value bit computed and the local forwarding value. |
| // Set up the load forwarding. |
| processBasicBlocksForRLE(Optimistic); |
| |
| // Finally, perform the redundant load replacements. |
| llvm::DenseSet<SILInstruction *> InstsToDelete; |
| bool SILChanged = false; |
| for (auto &B : *Fn) { |
| auto &State = BBToLocState[&B]; |
| auto &Loads = State.getRL(); |
| // Nothing to forward. |
| if (Loads.empty()) |
| continue; |
| // We iterate the instructions in the basic block in a deterministic order |
| // and use this order to perform the load forwarding. |
| // |
| // NOTE: we could end up with different SIL depending on the ordering load |
| // forwarding is performed. |
| for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) { |
| auto Iter = Loads.find(&*I); |
| if (Iter == Loads.end()) |
| continue; |
| DEBUG(llvm::dbgs() << "Replacing " << SILValue(Iter->first) << "With " |
| << Iter->second); |
| SILChanged = true; |
| Iter->first->replaceAllUsesWith(Iter->second); |
| InstsToDelete.insert(Iter->first); |
| ++NumForwardedLoads; |
| } |
| } |
| |
| // Erase the instructions recursively, this way, we get rid of pass |
| // dependence on DCE. |
| for (auto &X : InstsToDelete) { |
| // It is possible that the instruction still has uses, because it could be |
| // used as the replacement Value, i.e. F.second, for some other RLE pairs. |
| // |
| // TODO: we should fix this, otherwise we are missing RLE opportunities. |
| if (!X->use_empty()) |
| continue; |
| recursivelyDeleteTriviallyDeadInstructions(X, true); |
| } |
| return SILChanged; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Top Level Entry Point |
| //===----------------------------------------------------------------------===// |
| |
| namespace { |
| |
| class RedundantLoadElimination : public SILFunctionTransform { |
| |
| StringRef getName() override { return "SIL Redundant Load Elimination"; } |
| |
| /// The entry point to the transformation. |
| void run() override { |
| SILFunction *F = getFunction(); |
| DEBUG(llvm::dbgs() << "*** RLE on function: " << F->getName() << " ***\n"); |
| |
| auto *AA = PM->getAnalysis<AliasAnalysis>(); |
| auto *TE = PM->getAnalysis<TypeExpansionAnalysis>(); |
| auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F); |
| auto *EAFI = PM->getAnalysis<EpilogueARCAnalysis>()->get(F); |
| |
| RLEContext RLE(F, PM, AA, TE, PO, EAFI); |
| if (RLE.run()) { |
| invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions); |
| } |
| } |
| }; |
| |
| } // end anonymous namespace |
| |
| SILTransform *swift::createRedundantLoadElimination() { |
| return new RedundantLoadElimination(); |
| } |