blob: 27440a8df0010957a7081ded13080b8d938ee7f0 [file] [log] [blame]
//===--- RedundantLoadElimination.cpp - SIL Load Forwarding ---------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
///
/// \file
///
/// This pass eliminates redundant loads.
///
/// A load can be eliminated if its value has already been held somewhere,
/// i.e. generated by a previous load, LSLocation stored by a known value.
///
/// In this case, one can replace the load instruction with the previous
/// results.
///
/// Redundant Load Elimination (RLE) eliminates such loads by:
///
/// 1. Introducing a notion of a LSLocation that is used to model object
/// fields. (See below for more details).
///
/// 2. Introducing a notion of a LSValue that is used to model the value
/// that currently resides in the associated LSLocation on the particular
/// program path. (See below for more details).
///
/// 3. Performing a RPO walk over the control flow graph, tracking any
/// LSLocations that are read from or stored into in each basic block. The
/// read or stored value, kept in a map between LSLocation and LSValue,
/// becomes the available value for the LSLocation.
///
/// 4. An optimistic iterative intersection-based dataflow is performed on the
/// gensets until convergence.
///
/// At the core of RLE, there is the LSLocation class. A LSLocation is an
/// abstraction of an object field in program. It consists of a base and a
/// projection path to the field accessed.
///
/// In SIL, one can access an aggregate as a whole, i.e. store to a struct with
/// 2 Int fields. A store like this will generate 2 *indivisible* LSLocations,
/// 1 for each field and in addition to keeping a list of LSLocation, RLE also
/// keeps their available LSValues. We call it *indivisible* because it
/// cannot be broken down to more LSLocations.
///
/// LSValue consists of a base - a SILValue from the load or store inst,
/// as well as a projection path to which the field it represents. So, a
/// store to an 2-field struct as mentioned above will generate 2 LSLocations
/// and 2 LSValues.
///
/// Every basic block keeps a map between LSLocation and LSValue. By
/// keeping the LSLocation and LSValue in their indivisible form, one
/// can easily find which part of the load is redundant and how to compute its
/// forwarding value.
///
/// Given the case which the 2 fields of the struct both have available values,
/// RLE can find their LSValues (maybe by struct_extract from a larger
/// value) and then aggregate them.
///
/// However, this may introduce a lot of extraction and aggregation which may
/// not be necessary. i.e. a store the struct followed by a load from the
/// struct. To solve this problem, when RLE detects that a load instruction
/// can be replaced by forwarded value, it will try to find minimum # of
/// extractions necessary to form the forwarded value. It will group the
/// available value's by the LSValue base, i.e. the LSValues come from the
/// same instruction, and then use extraction to obtain the needed components
/// of the base.
///
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sil-redundant-load-elim"
#include "swift/SIL/Projection.h"
#include "swift/SIL/SILArgument.h"
#include "swift/SIL/SILBuilder.h"
#include "swift/SILOptimizer/Analysis/AliasAnalysis.h"
#include "swift/SILOptimizer/Analysis/ARCAnalysis.h"
#include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
#include "swift/SILOptimizer/Analysis/PostOrderAnalysis.h"
#include "swift/SILOptimizer/Analysis/ValueTracking.h"
#include "swift/SILOptimizer/PassManager/Passes.h"
#include "swift/SILOptimizer/PassManager/Transforms.h"
#include "swift/SILOptimizer/Utils/CFG.h"
#include "swift/SILOptimizer/Utils/Local.h"
#include "swift/SILOptimizer/Utils/LoadStoreOptUtils.h"
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace swift;
STATISTIC(NumForwardedLoads, "Number of loads forwarded");
/// Return the deallocate stack instructions corresponding to the given
/// AllocStackInst.
static SILInstruction *findAllocStackInst(SILInstruction *I) {
if (DeallocStackInst *DSI = dyn_cast<DeallocStackInst>(I))
return dyn_cast<SILInstruction>(DSI->getOperand());
return nullptr;
}
/// ComputeAvailSetMax - If we ignore all unknown writes, what is the max
/// available set that can reach the a certain point in a basic block. This
/// helps generating the genset and killset. i.e. if there is no downward visible
/// value that can reach the end of a basic block, then we know that the genset
/// and killset for the location need not be set.
///
/// ComputeAvailGenKillSet - Build the genset and killset of the basic block.
///
/// ComputeAvailValue - Compute the available value at the end of the basic
/// block.
///
/// PerformRLE - Perform the actual redundant load elimination.
enum class RLEKind : unsigned {
ComputeAvailSetMax = 0,
ComputeAvailGenKillSet = 1,
ComputeAvailValue = 2,
PerformRLE = 3,
};
//===----------------------------------------------------------------------===//
// Utility Functions
//===----------------------------------------------------------------------===//
static bool inline isComputeAvailSetMax(RLEKind Kind) {
return Kind == RLEKind::ComputeAvailSetMax;
}
static bool inline isComputeAvailGenKillSet(RLEKind Kind) {
return Kind == RLEKind::ComputeAvailGenKillSet;
}
static bool inline isComputeAvailValue(RLEKind Kind) {
return Kind == RLEKind::ComputeAvailValue;
}
static bool inline isPerformingRLE(RLEKind Kind) {
return Kind == RLEKind::PerformRLE;
}
/// Returns true if this is an instruction that may have side effects in a
/// general sense but are inert from a load store perspective.
static bool isRLEInertInstruction(SILInstruction *Inst) {
switch (Inst->getKind()) {
case ValueKind::StrongRetainInst:
case ValueKind::StrongRetainUnownedInst:
case ValueKind::UnownedRetainInst:
case ValueKind::RetainValueInst:
case ValueKind::DeallocStackInst:
case ValueKind::CondFailInst:
case ValueKind::IsUniqueInst:
case ValueKind::IsUniqueOrPinnedInst:
case ValueKind::FixLifetimeInst:
return true;
default:
return false;
}
}
//===----------------------------------------------------------------------===//
// Basic Block Location State
//===----------------------------------------------------------------------===//
namespace {
/// If this function has too many basic blocks or too many locations, it may
/// take a long time to compute the genset and killset. The number of memory
/// behavior or alias query we need to do in worst case is roughly linear to
/// # of BBs x(times) # of locations.
///
/// we could run RLE on functions with 128 basic blocks and 128 locations,
/// which is a large function.
constexpr unsigned MaxLSLocationBBMultiplicationNone = 128*128;
/// we could run optimistic RLE on functions with less than 64 basic blocks
/// and 64 locations which is a sizable function.
constexpr unsigned MaxLSLocationBBMultiplicationPessimistic = 64*64;
/// forward declaration.
class RLEContext;
/// State of the load store in one basic block which allows for forwarding from
/// loads, stores -> loads
class BlockState {
public:
enum class ValueState : unsigned {
CoverValues = 0,
ConcreteValues = 1,
CoverAndConcreteValues = 2,
};
private:
/// # of locations in the LocationVault.
unsigned LocationNum;
/// The basic block that we are optimizing.
SILBasicBlock *BB;
/// A bit vector for which the ith bit represents the ith LSLocation in
/// LocationVault. If the bit is set, then the location currently has an
/// downward visible value at the beginning of the basic block.
llvm::SmallBitVector ForwardSetIn;
/// A bit vector for which the ith bit represents the ith LSLocation in
/// LocationVault. If the bit is set, then the location currently has an
/// downward visible value at the end of the basic block.
llvm::SmallBitVector ForwardSetOut;
/// A bit vector for which the ith bit represents the ith LSLocation in
/// LocationVault. If we ignore all unknown write, what's the maximum set
/// of available locations at the current position in the basic block.
llvm::SmallBitVector ForwardSetMax;
/// A bit vector for which the ith bit represents the ith LSLocation in
/// LocationVault. If the bit is set, then the basic block generates a
/// value for the location.
llvm::SmallBitVector BBGenSet;
/// A bit vector for which the ith bit represents the ith LSLocation in
/// LocationVault. If the bit is set, then the basic block kills the
/// value for the location.
llvm::SmallBitVector BBKillSet;
/// This is map between LSLocations and their available values at the
/// beginning of this basic block.
ValueTableMap ForwardValIn;
/// This is map between LSLocations and their available values at the end of
/// this basic block.
ValueTableMap ForwardValOut;
/// Keeps a list of replaceable instructions in the current basic block as
/// well as their SILValue replacement.
llvm::DenseMap<SILInstruction *, SILValue> RedundantLoads;
/// LSLocation read or written has been extracted, expanded and mapped to the
/// bit position in the bitvector. Update it in the ForwardSetIn of the
/// current basic block.
void updateForwardSetForRead(RLEContext &Ctx, unsigned B);
void updateForwardSetForWrite(RLEContext &Ctx, unsigned B);
/// LSLocation read or written has been extracted, expanded and mapped to the
/// B position in the Bvector. Update it in the genset and killset of the
/// current basic block.
void updateGenKillSetForRead(RLEContext &Ctx, unsigned B);
void updateGenKillSetForWrite(RLEContext &Ctx, unsigned B);
/// LSLocation read or written has been extracted, expanded and mapped to the
/// B position in the Bvector. Update it in the MaxAvailForwardSet of the
/// current basic block.
void updateMaxAvailSetForRead(RLEContext &Ctx, unsigned B);
void updateMaxAvailSetForWrite(RLEContext &Ctx, unsigned B);
/// LSLocation written has been extracted, expanded and mapped to the bit
/// position in the bitvector. process it using the bit position.
void updateForwardSetAndValForRead(RLEContext &Ctx, unsigned L, unsigned V);
void updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned L, unsigned V);
/// There is a read to a LSLocation, expand the LSLocation into individual
/// fields before processing them.
void processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
SILValue Val, RLEKind Kind);
/// There is a write to a LSLocation, expand the LSLocation into individual
/// fields before processing them.
void processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
SILValue Val, RLEKind Kind);
/// BitVector manipulation functions.
void startTrackingLocation(llvm::SmallBitVector &BV, unsigned B);
void stopTrackingLocation(llvm::SmallBitVector &BV, unsigned B);
bool isTrackingLocation(llvm::SmallBitVector &BV, unsigned B);
void startTrackingValue(ValueTableMap &VM, unsigned L, unsigned V);
void stopTrackingValue(ValueTableMap &VM, unsigned B);
public:
BlockState() = default;
void init(SILBasicBlock *NewBB, unsigned bitcnt, bool optimistic) {
BB = NewBB;
LocationNum = bitcnt;
// For reachable basic blocks, the initial state of ForwardSetOut should be
// all 1's. Otherwise the dataflow solution could be too conservative.
//
// Consider this case, the forwardable value by var a = 10 before the loop
// will not be forwarded if the ForwardSetOut is set to 0 initially.
//
// var a = 10
// for _ in 0...1024 {}
// use(a);
//
// However, by doing so, we can only do the data forwarding after the
// data flow stabilizes.
//
// We set the initial state of unreachable block to 0, as we do not have
// a value for the location.
//
// This is a bit conservative as we could be missing forwarding
// opportunities. i.e. a joint block with 1 predecessor being an
// unreachable block.
//
// we rely on other passes to clean up unreachable block.
ForwardSetIn.resize(LocationNum, false);
ForwardSetOut.resize(LocationNum, optimistic);
// If we are running an optimistic data flow, set forward max to true
// initially.
ForwardSetMax.resize(LocationNum, optimistic);
BBGenSet.resize(LocationNum, false);
BBKillSet.resize(LocationNum, false);
}
/// Initialize the AvailSetMax by intersecting this basic block's
/// predecessors' AvailSetMax.
void mergePredecessorsAvailSetMax(RLEContext &Ctx);
/// Initialize the AvailSet by intersecting this basic block' predecessors'
/// AvailSet.
void mergePredecessorAvailSet(RLEContext &Ctx);
/// Initialize the AvailSet and AvailVal of the current basic block.
void mergePredecessorAvailSetAndValue(RLEContext &Ctx);
/// Reached the end of the basic block, update the ForwardValOut with the
/// ForwardValIn.
void updateForwardValOut() { ForwardValOut = ForwardValIn; }
/// Check whether the ForwardSetOut has changed. If it does, we need to
/// rerun the data flow to reach fixed point.
bool updateForwardSetOut() {
if (ForwardSetIn == ForwardSetOut)
return false;
ForwardSetOut = ForwardSetIn;
return true;
}
/// Returns the current basic block we are processing.
SILBasicBlock *getBB() const { return BB; }
/// Returns the ForwardValIn for the current basic block.
ValueTableMap &getForwardValIn() { return ForwardValIn; }
/// Returns the ForwardValOut for the current basic block.
ValueTableMap &getForwardValOut() { return ForwardValOut; }
/// Returns the redundant loads and their replacement in the currently basic
/// block.
llvm::DenseMap<SILInstruction *, SILValue> &getRL() { return RedundantLoads; }
/// Look into the value for the given LSLocation at end of the basic block,
/// return one of the three ValueState type.
ValueState getValueStateAtEndOfBlock(RLEContext &Ctx, LSLocation &L);
/// Wrappers to query the value state of the location in this BlockState.
bool isCoverValues(RLEContext &Ctx, LSLocation &L) {
return getValueStateAtEndOfBlock(Ctx, L) == ValueState::CoverValues;
}
bool isConcreteValues(RLEContext &Ctx, LSLocation &L) {
return getValueStateAtEndOfBlock(Ctx, L) == ValueState::ConcreteValues;
}
/// Iterate over the instructions in the basic block in forward order and
/// process them w.r.t. the given \p Kind.
void processInstructionWithKind(RLEContext &Ctx, SILInstruction *I,
RLEKind Kind);
void processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind);
/// Process the current basic block with the genset and killset. Return true
/// if the ForwardSetOut changes.
bool processBasicBlockWithGenKillSet();
/// Set up the value for redundant load elimination.
bool setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem);
/// Process Instruction which writes to memory in an unknown way.
void processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I,
RLEKind Kind);
void processUnknownWriteInstForGenKillSet(RLEContext &Ctx, SILInstruction *I);
void processUnknownWriteInstForRLE(RLEContext &Ctx, SILInstruction *I);
void processDeallocStackInst(RLEContext &Ctx, SILInstruction *I,
RLEKind Kind);
void processDeallocStackInstForGenKillSet(RLEContext &Ctx, SILInstruction *I);
void processDeallocStackInstForRLE(RLEContext &Ctx, SILInstruction *I);
/// Process LoadInst. Extract LSLocations from LoadInst.
void processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind);
/// Process LoadInst. Extract LSLocations from StoreInst.
void processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind);
/// Returns a *single* forwardable SILValue for the given LSLocation right
/// before the InsertPt instruction.
SILValue reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L);
#ifndef NDEBUG
void dump(RLEContext &Ctx);
#endif
};
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// RLEContext Interface
//===----------------------------------------------------------------------===//
namespace {
using BBValueMap = llvm::DenseMap<SILBasicBlock *, SILValue>;
/// This class stores global state that we use when computing redundant load and
/// their replacement in each basic block.
class RLEContext {
enum class ProcessKind {
ProcessMultipleIterations = 0,
ProcessOneIteration = 1,
ProcessNone = 2,
};
private:
/// Function currently processing.
SILFunction *Fn;
/// The passmanager we are using.
SILPassManager *PM;
/// The alias analysis that we will use during all computations.
AliasAnalysis *AA;
/// The type expansion analysis we will use during all computations.
TypeExpansionAnalysis *TE;
/// The SSA updater we use to materialize covering values.
SILSSAUpdater Updater;
/// The range that we use to iterate over the post order and reverse post
/// order of the given function.
PostOrderFunctionInfo *PO;
/// Epilogue release analysis.
EpilogueARCFunctionInfo *EAFI;
/// Keeps all the locations for the current function. The BitVector in each
/// BlockState is then laid on top of it to keep track of which LSLocation
/// has a downward available value.
std::vector<LSLocation> LocationVault;
/// Contains a map between LSLocation to their index in the LocationVault.
/// Use for fast lookup.
LSLocationIndexMap LocToBitIndex;
/// Keeps a map between the accessed SILValue and the location.
LSLocationBaseMap BaseToLocIndex;
/// Keeps all the loadstorevalues for the current function. The BitVector in
/// each g is then laid on top of it to keep track of which LSLocation
/// has a downward available value.
std::vector<LSValue> LSValueVault;
/// Contains a map between LSLocation to their index in the LocationVault.
/// Use for fast lookup.
llvm::DenseMap<LSValue, unsigned> ValToBitIndex;
/// A map from each BasicBlock to its BlockState.
llvm::SmallDenseMap<SILBasicBlock *, BlockState, 16> BBToLocState;
/// Keeps a list of basic blocks that have LoadInsts. If a basic block does
/// not have LoadInst, we do not actually perform the last iteration where
/// RLE is actually performed on the basic block.
///
/// NOTE: This is never populated for functions which will only require 1
/// data flow iteration. For function that requires more than 1 iteration of
/// the data flow this is populated when the first time the functions is
/// walked, i.e. when the we generate the genset and killset.
llvm::DenseSet<SILBasicBlock *> BBWithLoads;
#ifndef NDEBUG
SILPrintContext printCtx;
#endif
public:
RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO,
EpilogueARCFunctionInfo *EAFI);
RLEContext(const RLEContext &) = delete;
RLEContext(RLEContext &&) = default;
~RLEContext() = default;
/// Entry point to redundant load elimination.
bool run();
SILFunction *getFunction() const { return Fn; }
/// Use a set of ad hoc rules to tell whether we should run a pessimistic
/// one iteration data flow on the function.
ProcessKind getProcessFunctionKind(unsigned LoadCount, unsigned StoreCount);
/// Run the iterative data flow until convergence.
void runIterativeRLE();
/// Process the basic blocks for the gen and kill set.
void processBasicBlocksForGenKillSet();
/// Process the basic blocks with the gen and kill set.
void processBasicBlocksWithGenKillSet();
/// Process the basic block for values generated in the current basic
/// block.
void processBasicBlocksForAvailValue();
/// Process basic blocks to perform the redundant load elimination.
void processBasicBlocksForRLE(bool Optimistic);
/// Returns the alias analysis we will use during all computations.
AliasAnalysis *getAA() const { return AA; }
/// Returns the current type expansion analysis we are .
TypeExpansionAnalysis *getTE() const { return TE; }
/// Returns current epilogue release function info we are using.
EpilogueARCFunctionInfo *getEAFI() const { return EAFI; }
/// Returns the SILValue base to bit index.
LSLocationBaseMap &getBM() { return BaseToLocIndex; }
/// Return the BlockState for the basic block this basic block belongs to.
BlockState &getBlockState(SILBasicBlock *B) { return BBToLocState[B]; }
/// Get the bit representing the LSLocation in the LocationVault.
unsigned getLocationBit(const LSLocation &L);
/// Given the bit, get the LSLocation from the LocationVault.
LSLocation &getLocation(const unsigned index);
/// Get the bit representing the LSValue in the LSValueVault.
unsigned getValueBit(const LSValue &L);
/// Given the bit, get the LSValue from the LSValueVault.
LSValue &getValue(const unsigned index);
/// Given a LSLocation, try to collect all the LSValues for this LSLocation
/// in the given basic block. If part of the locations have covering values,
/// find the values in its predecessors.
bool collectLocationValues(SILBasicBlock *BB, LSLocation &L,
LSLocationValueMap &Values, ValueTableMap &VM);
/// Transitively collect all the values that make up this location and
/// create a SILArgument out of them.
SILValue computePredecessorLocationValue(SILBasicBlock *BB, LSLocation &L);
};
} // end anonymous namespace
void BlockState::startTrackingValue(ValueTableMap &VM, unsigned L, unsigned V) {
VM[L] = V;
}
void BlockState::stopTrackingValue(ValueTableMap &VM, unsigned B) {
VM.erase(B);
}
bool BlockState::isTrackingLocation(llvm::SmallBitVector &BV, unsigned B) {
return BV.test(B);
}
void BlockState::startTrackingLocation(llvm::SmallBitVector &BV, unsigned B) {
BV.set(B);
}
void BlockState::stopTrackingLocation(llvm::SmallBitVector &BV, unsigned B) {
BV.reset(B);
}
void BlockState::mergePredecessorsAvailSetMax(RLEContext &Ctx) {
if (BB->pred_empty()) {
ForwardSetMax.reset();
return;
}
auto Iter = BB->pred_begin();
ForwardSetMax = Ctx.getBlockState(*Iter).ForwardSetMax;
Iter = std::next(Iter);
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
ForwardSetMax &= Ctx.getBlockState(*Iter).ForwardSetMax;
}
}
void BlockState::mergePredecessorAvailSet(RLEContext &Ctx) {
// Clear the state if the basic block has no predecessor.
if (BB->getPredecessorBlocks().begin() == BB->getPredecessorBlocks().end()) {
ForwardSetIn.reset();
return;
}
auto Iter = BB->pred_begin();
ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut;
Iter = std::next(Iter);
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
ForwardSetIn &= Ctx.getBlockState(*Iter).ForwardSetOut;
}
}
void BlockState::mergePredecessorAvailSetAndValue(RLEContext &Ctx) {
// Clear the state if the basic block has no predecessor.
if (BB->getPredecessorBlocks().begin() == BB->getPredecessorBlocks().end()) {
ForwardSetIn.reset();
ForwardValIn.clear();
return;
}
auto Iter = BB->pred_begin();
ForwardSetIn = Ctx.getBlockState(*Iter).ForwardSetOut;
ForwardValIn = Ctx.getBlockState(*Iter).ForwardValOut;
Iter = std::next(Iter);
for (auto EndIter = BB->pred_end(); Iter != EndIter; ++Iter) {
BlockState &OtherState = Ctx.getBlockState(*Iter);
ForwardSetIn &= OtherState.ForwardSetOut;
// Merge in the predecessor state.
for (unsigned i = 0; i < LocationNum; ++i) {
if (OtherState.ForwardSetOut[i]) {
// There are multiple values from multiple predecessors, set this as
// a covering value. We do not need to track the value itself, as we
// can always go to the predecessors BlockState to find it.
ForwardValIn[i] = Ctx.getValueBit(LSValue(true));
continue;
}
// If this location does have an available value, then clear it.
stopTrackingValue(ForwardValIn, i);
stopTrackingLocation(ForwardSetIn, i);
}
}
}
void BlockState::processBasicBlockWithKind(RLEContext &Ctx, RLEKind Kind) {
// Iterate over instructions in forward order.
for (auto &II : *BB) {
processInstructionWithKind(Ctx, &II, Kind);
}
}
bool BlockState::processBasicBlockWithGenKillSet() {
ForwardSetIn.reset(BBKillSet);
ForwardSetIn |= BBGenSet;
return updateForwardSetOut();
}
SILValue BlockState::reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L) {
// First, collect current available locations and their corresponding values
// into a map.
LSLocationValueMap Values;
LSLocationList Locs;
LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE());
// Find the values that this basic block defines and the locations which
// we do not have a concrete value in the current basic block.
ValueTableMap &OTM = getForwardValOut();
for (unsigned i = 0; i < Locs.size(); ++i) {
Values[Locs[i]] = Ctx.getValue(OTM[Ctx.getLocationBit(Locs[i])]);
}
// Second, reduce the available values into a single SILValue we can use to
// forward.
SILValue TheForwardingValue;
TheForwardingValue = LSValue::reduce(L, &BB->getModule(), Values,
BB->getTerminator());
/// Return the forwarding value.
return TheForwardingValue;
}
bool BlockState::setupRLE(RLEContext &Ctx, SILInstruction *I, SILValue Mem) {
// Try to construct a SILValue for the current LSLocation.
//
// Collect the locations and their corresponding values into a map.
LSLocation L;
LSLocationBaseMap &BaseToLocIndex = Ctx.getBM();
if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) {
L = BaseToLocIndex[Mem];
} else {
SILValue UO = getUnderlyingObject(Mem);
L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem));
}
LSLocationValueMap Values;
// Use the ForwardValIn as we are currently processing the basic block.
if (!Ctx.collectLocationValues(I->getParent(), L, Values, getForwardValIn()))
return false;
// Reduce the available values into a single SILValue we can use to forward.
SILModule *Mod = &I->getModule();
SILValue TheForwardingValue = LSValue::reduce(L, Mod, Values, I);
if (!TheForwardingValue)
return false;
// Now we have the forwarding value, record it for forwarding!.
//
// NOTE: we do not perform the RLE right here because doing so could introduce
// new LSLocations.
//
// e.g.
// %0 = load %x
// %1 = load %x
// %2 = extract_struct %1, #a
// %3 = load %2
//
// If we perform the RLE and replace %1 with %0, we end up having a memory
// location we do not have before, i.e. Base == %0, and Path == #a.
//
// We may be able to add the LSLocation to the vault, but it gets
// complicated very quickly, e.g. we need to resize the bit vectors size,
// etc.
//
// However, since we already know the instruction to replace and the value to
// replace it with, we can record it for now and forwarded it after all the
// forwardable values are recorded in the function.
//
RedundantLoads[I] = TheForwardingValue;
DEBUG(llvm::dbgs() << "FORWARD " << TheForwardingValue << " to" << *I);
return true;
}
void BlockState::updateForwardSetForRead(RLEContext &Ctx, unsigned B) {
startTrackingLocation(ForwardSetIn, B);
}
void BlockState::updateGenKillSetForRead(RLEContext &Ctx, unsigned B) {
startTrackingLocation(BBGenSet, B);
stopTrackingLocation(BBKillSet, B);
}
void BlockState::updateForwardSetAndValForRead(RLEContext &Ctx, unsigned L,
unsigned V) {
// Track the new location and value.
startTrackingValue(ForwardValIn, L, V);
startTrackingLocation(ForwardSetIn, L);
}
void BlockState::updateGenKillSetForWrite(RLEContext &Ctx, unsigned B) {
// This is a store, invalidate any location that this location may alias, as
// their values can no longer be forwarded.
LSLocation &R = Ctx.getLocation(B);
for (unsigned i = 0; i < LocationNum; ++i) {
if (!isTrackingLocation(ForwardSetMax, i))
continue;
LSLocation &L = Ctx.getLocation(i);
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
continue;
// MayAlias, invalidate the location.
stopTrackingLocation(BBGenSet, i);
startTrackingLocation(BBKillSet, i);
}
// Start tracking this location.
startTrackingLocation(BBGenSet, B);
stopTrackingLocation(BBKillSet, B);
}
void BlockState::updateMaxAvailSetForWrite(RLEContext &Ctx, unsigned B) {
startTrackingLocation(ForwardSetMax, B);
}
void BlockState::updateMaxAvailSetForRead(RLEContext &Ctx, unsigned B) {
startTrackingLocation(ForwardSetMax, B);
}
void BlockState::updateForwardSetForWrite(RLEContext &Ctx, unsigned B) {
// This is a store, invalidate any location that this location may alias, as
// their values can no longer be forwarded.
LSLocation &R = Ctx.getLocation(B);
for (unsigned i = 0; i < LocationNum; ++i) {
if (!isTrackingLocation(ForwardSetIn, i))
continue;
LSLocation &L = Ctx.getLocation(i);
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
continue;
// MayAlias, invalidate the location.
stopTrackingLocation(ForwardSetIn, i);
}
// Start tracking this location.
startTrackingLocation(ForwardSetIn, B);
}
void BlockState::updateForwardSetAndValForWrite(RLEContext &Ctx, unsigned L,
unsigned V) {
// This is a store, invalidate any location that this location may alias, as
// their values can no longer be forwarded.
LSLocation &R = Ctx.getLocation(L);
for (unsigned i = 0; i < LocationNum; ++i) {
if (!isTrackingLocation(ForwardSetIn, i))
continue;
LSLocation &L = Ctx.getLocation(i);
if (!L.isMayAliasLSLocation(R, Ctx.getAA()))
continue;
// MayAlias, invalidate the location and value.
stopTrackingValue(ForwardValIn, i);
stopTrackingLocation(ForwardSetIn, i);
}
// Start tracking this location and value.
startTrackingLocation(ForwardSetIn, L);
startTrackingValue(ForwardValIn, L, V);
}
void BlockState::processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
SILValue Val, RLEKind Kind) {
// Initialize the LSLocation.
LSLocation L;
LSLocationBaseMap &BaseToLocIndex = Ctx.getBM();
if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) {
L = BaseToLocIndex[Mem];
} else {
SILValue UO = getUnderlyingObject(Mem);
L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem));
}
// If we can't figure out the Base or Projection Path for the write,
// process it as an unknown memory instruction.
if (!L.isValid()) {
// we can ignore unknown store instructions if we are computing the
// AvailSetMax.
if (!isComputeAvailSetMax(Kind)) {
processUnknownWriteInst(Ctx, I, Kind);
}
return;
}
// Expand the given location and val into individual fields and process
// them as separate writes.
LSLocationList Locs;
LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE());
if (isComputeAvailSetMax(Kind)) {
for (unsigned i = 0; i < Locs.size(); ++i) {
updateMaxAvailSetForWrite(Ctx, Ctx.getLocationBit(Locs[i]));
}
return;
}
// Are we computing the genset and killset ?
if (isComputeAvailGenKillSet(Kind)) {
for (unsigned i = 0; i < Locs.size(); ++i) {
updateGenKillSetForWrite(Ctx, Ctx.getLocationBit(Locs[i]));
}
return;
}
// Are we computing available value or performing RLE?
LSValueList Vals;
LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE());
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
for (unsigned i = 0; i < Locs.size(); ++i) {
updateForwardSetAndValForWrite(Ctx, Ctx.getLocationBit(Locs[i]),
Ctx.getValueBit(Vals[i]));
}
return;
}
llvm_unreachable("Unknown RLE compute kind");
}
void BlockState::processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem,
SILValue Val, RLEKind Kind) {
// Initialize the LSLocation.
LSLocation L;
LSLocationBaseMap &BaseToLocIndex = Ctx.getBM();
if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) {
L = BaseToLocIndex[Mem];
} else {
SILValue UO = getUnderlyingObject(Mem);
L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem));
}
// If we can't figure out the Base or Projection Path for the read, simply
// ignore it for now.
if (!L.isValid())
return;
// Expand the given LSLocation and Val into individual fields and process
// them as separate reads.
LSLocationList Locs;
LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE());
if (isComputeAvailSetMax(Kind)) {
for (unsigned i = 0; i < Locs.size(); ++i) {
updateMaxAvailSetForRead(Ctx, Ctx.getLocationBit(Locs[i]));
}
return;
}
// Are we computing the genset and killset.
if (isComputeAvailGenKillSet(Kind)) {
for (unsigned i = 0; i < Locs.size(); ++i) {
updateGenKillSetForRead(Ctx, Ctx.getLocationBit(Locs[i]));
}
return;
}
// Are we computing available values ?.
bool CanForward = true;
LSValueList Vals;
LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE());
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
for (unsigned i = 0; i < Locs.size(); ++i) {
if (isTrackingLocation(ForwardSetIn, Ctx.getLocationBit(Locs[i])))
continue;
updateForwardSetAndValForRead(Ctx, Ctx.getLocationBit(Locs[i]),
Ctx.getValueBit(Vals[i]));
// We can not perform the forwarding as we are at least missing
// some pieces of the read location.
CanForward = false;
}
}
// Simply return if we are not performing RLE or we do not have all the
// values available to perform RLE.
if (!isPerformingRLE(Kind) || !CanForward)
return;
// Lastly, forward value to the load.
setupRLE(Ctx, I, Mem);
}
void BlockState::processStoreInst(RLEContext &Ctx, StoreInst *SI, RLEKind Kind) {
processWrite(Ctx, SI, SI->getDest(), SI->getSrc(), Kind);
}
void BlockState::processLoadInst(RLEContext &Ctx, LoadInst *LI, RLEKind Kind) {
processRead(Ctx, LI, LI->getOperand(), SILValue(LI), Kind);
}
void BlockState::processUnknownWriteInstForGenKillSet(RLEContext &Ctx,
SILInstruction *I) {
auto *AA = Ctx.getAA();
for (unsigned i = 0; i < LocationNum; ++i) {
if (!isTrackingLocation(ForwardSetMax, i))
continue;
// Invalidate any location this instruction may write to.
//
// TODO: checking may alias with Base is overly conservative,
// we should check may alias with base plus projection path.
LSLocation &R = Ctx.getLocation(i);
if (!AA->mayWriteToMemory(I, R.getBase()))
continue;
// MayAlias.
stopTrackingLocation(BBGenSet, i);
startTrackingLocation(BBKillSet, i);
}
}
void BlockState::processUnknownWriteInstForRLE(RLEContext &Ctx,
SILInstruction *I) {
auto *AA = Ctx.getAA();
for (unsigned i = 0; i < LocationNum; ++i) {
if (!isTrackingLocation(ForwardSetIn, i))
continue;
// Invalidate any location this instruction may write to.
//
// TODO: checking may alias with Base is overly conservative,
// we should check may alias with base plus projection path.
LSLocation &R = Ctx.getLocation(i);
if (!AA->mayWriteToMemory(I, R.getBase()))
continue;
// MayAlias.
stopTrackingLocation(ForwardSetIn, i);
stopTrackingValue(ForwardValIn, i);
}
}
void BlockState::processUnknownWriteInst(RLEContext &Ctx, SILInstruction *I,
RLEKind Kind) {
// If this is a release on a guaranteed parameter, it can not call deinit,
// which might read or write memory.
if (isIntermediateRelease(I, Ctx.getEAFI()))
return;
// Are we computing the genset and killset ?
if (isComputeAvailGenKillSet(Kind)) {
processUnknownWriteInstForGenKillSet(Ctx, I);
return;
}
// Are we computing the available value or doing RLE ?
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
processUnknownWriteInstForRLE(Ctx, I);
return;
}
llvm_unreachable("Unknown RLE compute kind");
}
void BlockState::
processDeallocStackInstForGenKillSet(RLEContext &Ctx, SILInstruction *I) {
SILValue ASI = findAllocStackInst(I);
for (unsigned i = 0; i < LocationNum; ++i) {
LSLocation &R = Ctx.getLocation(i);
if (R.getBase() != ASI)
continue;
// MayAlias.
stopTrackingLocation(BBGenSet, i);
startTrackingLocation(BBKillSet, i);
}
}
void BlockState::
processDeallocStackInstForRLE(RLEContext &Ctx, SILInstruction *I) {
SILValue ASI = findAllocStackInst(I);
for (unsigned i = 0; i < LocationNum; ++i) {
LSLocation &R = Ctx.getLocation(i);
if (R.getBase() != ASI)
continue;
// MayAlias.
stopTrackingLocation(ForwardSetIn, i);
stopTrackingValue(ForwardValIn, i);
}
}
void BlockState::
processDeallocStackInst(RLEContext &Ctx, SILInstruction *I, RLEKind Kind) {
// Are we computing the genset and killset ?
if (isComputeAvailGenKillSet(Kind)) {
processDeallocStackInstForGenKillSet(Ctx, I);
return;
}
// Are we computing the available value or doing RLE ?
if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) {
processDeallocStackInstForRLE(Ctx, I);
return;
}
llvm_unreachable("Unknown RLE compute kind");
}
void BlockState::processInstructionWithKind(RLEContext &Ctx,
SILInstruction *Inst,
RLEKind Kind) {
// This is a StoreInst, try to see whether it clobbers any forwarding value
if (auto *SI = dyn_cast<StoreInst>(Inst)) {
processStoreInst(Ctx, SI, Kind);
return;
}
// This is a LoadInst. Let's see if we can find a previous loaded, stored
// value to use instead of this load.
if (auto *LI = dyn_cast<LoadInst>(Inst)) {
processLoadInst(Ctx, LI, Kind);
return;
}
if (auto *DSI = dyn_cast<DeallocStackInst>(Inst)) {
processDeallocStackInst(Ctx, DSI, Kind);
return;
}
// If this instruction has side effects, but is inert from a load store
// perspective, skip it.
if (isRLEInertInstruction(Inst))
return;
// If this instruction does not read or write memory, we can skip it.
if (!Inst->mayReadOrWriteMemory())
return;
// If we have an instruction that may write to memory and we cannot prove
// that it and its operands cannot alias a load we have visited,
// invalidate that load.
if (Inst->mayWriteToMemory()) {
DEBUG(llvm::dbgs() << "WRITE " << *Inst);
processUnknownWriteInst(Ctx, Inst, Kind);
return;
}
DEBUG(llvm::dbgs() << "READ " << *Inst);
}
RLEContext::ProcessKind
RLEContext::
getProcessFunctionKind(unsigned LoadCount, unsigned StoreCount) {
// Don't optimize function that are marked as 'no.optimize'.
if (!Fn->shouldOptimize())
return ProcessKind::ProcessNone;
// Really no point optimizing here as there is no forwardable loads.
if (LoadCount + StoreCount < 2)
return ProcessKind::ProcessNone;
bool RunOneIteration = true;
unsigned BBCount = 0;
unsigned LocationCount = LocationVault.size();
if (LocationCount == 0)
return ProcessKind::ProcessNone;
// If all basic blocks will have their predecessors processed if
// the basic blocks in the functions are iterated in post order.
// Then this function can be processed in one iteration, i.e. no
// need to generate the genset and killset.
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(Fn);
llvm::DenseSet<SILBasicBlock *> HandledBBs;
for (SILBasicBlock *B : PO->getReversePostOrder()) {
++BBCount;
for (auto X : B->getPredecessorBlocks()) {
if (HandledBBs.find(X) == HandledBBs.end()) {
RunOneIteration = false;
break;
}
}
HandledBBs.insert(B);
}
// Data flow may take too long to run.
if (BBCount * LocationCount > MaxLSLocationBBMultiplicationNone)
return ProcessKind::ProcessNone;
// This function's data flow would converge in 1 iteration.
if (RunOneIteration)
return ProcessKind::ProcessOneIteration;
// We run one pessimistic data flow to do dead store elimination on
// the function.
if (BBCount * LocationCount > MaxLSLocationBBMultiplicationPessimistic)
return ProcessKind::ProcessOneIteration;
return ProcessKind::ProcessMultipleIterations;
}
#ifndef NDEBUG
void BlockState::dump(RLEContext &Ctx) {
for (unsigned i = 0; i < LocationNum; ++i) {
if (!isTrackingLocation(ForwardSetMax, i))
continue;
llvm::dbgs() << "Loc #" << i << ":" << (BBGenSet[i] ? " Gen" : "")
<< (BBKillSet[i] ? " Kill" : "");
if (!ForwardSetIn.empty() && ForwardSetIn.test(i)) {
llvm::dbgs() << " IN ";
ValueTableMap::const_iterator inIter = ForwardValIn.find(i);
if (inIter != ForwardValIn.end()) {
if (SILValue base = Ctx.getValue(inIter->second).getBase())
llvm::dbgs() << base;
else
llvm::dbgs() << "no base";
}
}
if (!ForwardSetOut.empty() && ForwardSetOut.test(i)) {
llvm::dbgs() << " OUT ";
ValueTableMap::const_iterator outIter = ForwardValOut.find(i);
if (outIter != ForwardValOut.end()) {
if (SILValue base = Ctx.getValue(outIter->second).getBase())
llvm::dbgs() << base;
else
llvm::dbgs() << "no base";
}
}
llvm::dbgs() << "\n";
}
}
#endif
//===----------------------------------------------------------------------===//
// RLEContext Implementation
//===----------------------------------------------------------------------===//
RLEContext::RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO,
EpilogueARCFunctionInfo *EAFI)
: Fn(F), PM(PM), AA(AA), TE(TE), PO(PO), EAFI(EAFI)
#ifndef NDEBUG
,
printCtx(llvm::dbgs(), /*Verbose=*/false, /*Sorted=*/true)
#endif
{
}
LSLocation &RLEContext::getLocation(const unsigned index) {
return LocationVault[index];
}
unsigned RLEContext::getLocationBit(const LSLocation &Loc) {
// Return the bit position of the given Loc in the LocationVault. The bit
// position is then used to set/reset the bitvector kept by each BlockState.
//
// We should have the location populated by the enumerateLSLocation at this
// point.
auto Iter = LocToBitIndex.find(Loc);
assert(Iter != LocToBitIndex.end() && "Location should have been enum'ed");
return Iter->second;
}
LSValue &RLEContext::getValue(const unsigned index) {
return LSValueVault[index];
}
unsigned RLEContext::getValueBit(const LSValue &Val) {
// Return the bit position of the given Val in the LSValueVault. The bit
// position is then used to set/reset the bitvector kept by each g.
auto Iter = ValToBitIndex.find(Val);
// We do not walk over the function and find all the possible LSValues
// in this function, as some of the these values will not be used, i.e.
// if the LoadInst that generates this value is actually RLE'ed.
// Instead, we create the LSValues when we need them.
if (Iter == ValToBitIndex.end()) {
ValToBitIndex[Val] = LSValueVault.size();
LSValueVault.push_back(Val);
return ValToBitIndex[Val];
}
return Iter->second;
}
BlockState::ValueState BlockState::getValueStateAtEndOfBlock(RLEContext &Ctx,
LSLocation &L) {
// Find number of covering value and concrete values for the locations
// expanded from the given location.
unsigned CSCount = 0, CTCount = 0;
LSLocationList Locs;
LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE());
ValueTableMap &OTM = getForwardValOut();
for (auto &X : Locs) {
LSValue &V = Ctx.getValue(OTM[Ctx.getLocationBit(X)]);
if (V.isCoveringValue()) {
++CSCount;
continue;
}
++CTCount;
}
if (CSCount == Locs.size())
return ValueState::CoverValues;
if (CTCount == Locs.size())
return ValueState::ConcreteValues;
return ValueState::CoverAndConcreteValues;
}
SILValue RLEContext::computePredecessorLocationValue(SILBasicBlock *BB,
LSLocation &L) {
BBValueMap Values;
llvm::DenseSet<SILBasicBlock *> HandledBBs;
llvm::SmallVector<SILBasicBlock *, 8> WorkList;
// Push in all the predecessors to get started.
for (auto Pred : BB->getPredecessorBlocks()) {
WorkList.push_back(Pred);
}
while (!WorkList.empty()) {
auto *CurBB = WorkList.pop_back_val();
BlockState &Forwarder = getBlockState(CurBB);
// Mark this basic block as processed.
HandledBBs.insert(CurBB);
// There are 3 cases that can happen here.
//
// 1. The current basic block contains concrete values for the entire
// location.
// 2. The current basic block contains covering values for the entire
// location.
// 3. The current basic block contains concrete value for part of the
// location and covering values for the rest.
//
// This BlockState contains concrete values for all the expanded
// locations, collect and reduce them into a single value in the current
// basic block.
if (Forwarder.isConcreteValues(*this, L)) {
Values[CurBB] = Forwarder.reduceValuesAtEndOfBlock(*this, L);
continue;
}
// This BlockState does not contain concrete value for any of the expanded
// locations, collect in this block's predecessors.
if (Forwarder.isCoverValues(*this, L)) {
for (auto Pred : CurBB->getPredecessorBlocks()) {
if (HandledBBs.find(Pred) != HandledBBs.end())
continue;
WorkList.push_back(Pred);
}
continue;
}
// This block contains concrete values for some but not all the expanded
// locations, recursively call collectLocationValues to materialize the
// value that reaches this basic block.
LSLocationValueMap LSValues;
if (!collectLocationValues(CurBB, L, LSValues, Forwarder.getForwardValOut()))
return SILValue();
// Reduce the available values into a single SILValue we can use to forward
SILInstruction *IPt = CurBB->getTerminator();
Values[CurBB] = LSValue::reduce(L, &BB->getModule(), LSValues, IPt);
}
// Finally, collect all the values for the SILArgument, materialize it using
// the SSAUpdater.
Updater.Initialize(L.getType(&BB->getModule()).getObjectType());
for (auto V : Values) {
Updater.AddAvailableValue(V.first, V.second);
}
return Updater.GetValueInMiddleOfBlock(BB);
}
bool RLEContext::collectLocationValues(SILBasicBlock *BB, LSLocation &L,
LSLocationValueMap &Values,
ValueTableMap &VM) {
LSLocationSet CSLocs;
LSLocationList Locs;
LSLocation::expand(L, &BB->getModule(), Locs, TE);
auto *Mod = &BB->getModule();
// Find the locations that this basic block defines and the locations which
// we do not have a concrete value in the current basic block.
for (auto &X : Locs) {
Values[X] = getValue(VM[getLocationBit(X)]);
if (!Values[X].isCoveringValue())
continue;
CSLocs.insert(X);
}
// For locations which we do not have concrete values for in this basic
// block, try to reduce it to the minimum # of locations possible, this
// will help us to generate as few SILArguments as possible.
LSLocation::reduce(L, Mod, CSLocs);
// To handle covering value, we need to go to the predecessors and
// materialize them there.
for (auto &X : CSLocs) {
SILValue V = computePredecessorLocationValue(BB, X);
if (!V)
return false;
// We've constructed a concrete value for the covering value. Expand and
// collect the newly created forwardable values.
LSLocationList Locs;
LSValueList Vals;
LSLocation::expand(X, Mod, Locs, TE);
LSValue::expand(V, Mod, Vals, TE);
for (unsigned i = 0; i < Locs.size(); ++i) {
Values[Locs[i]] = Vals[i];
assert(Values[Locs[i]].isValid() && "Invalid load store value");
}
}
return true;
}
void RLEContext::processBasicBlocksForGenKillSet() {
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
DEBUG(llvm::dbgs() << "PROCESS " << printCtx.getID(BB)
<< " for Gen/Kill:\n";
BB->print(llvm::dbgs(), printCtx));
BlockState &S = getBlockState(BB);
// Compute the AvailSetMax at the beginning of the basic block.
S.mergePredecessorsAvailSetMax(*this);
// Compute the genset and killset.
//
// To optimize this process, we also compute the AvailSetMax at particular
// point in the basic block.
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
if (auto *LI = dyn_cast<LoadInst>(&*I)) {
if (BBWithLoads.find(BB) == BBWithLoads.end())
BBWithLoads.insert(BB);
S.processLoadInst(*this, LI, RLEKind::ComputeAvailSetMax);
}
if (auto *SI = dyn_cast<StoreInst>(&*I)) {
S.processStoreInst(*this, SI, RLEKind::ComputeAvailSetMax);
}
S.processInstructionWithKind(*this, &*I, RLEKind::ComputeAvailGenKillSet);
}
DEBUG(S.dump(*this));
}
}
void RLEContext::processBasicBlocksWithGenKillSet() {
// Process each basic block with the gen and kill set. Every time the
// ForwardSetOut of a basic block changes, the optimization is rerun on its
// successors.
llvm::SmallVector<SILBasicBlock *, 16> WorkList;
llvm::DenseSet<SILBasicBlock *> HandledBBs;
// Push into the worklist in post order so that we can pop from the back and
// get reverse post order.
for (SILBasicBlock *BB : PO->getPostOrder()) {
WorkList.push_back(BB);
HandledBBs.insert(BB);
}
while (!WorkList.empty()) {
SILBasicBlock *BB = WorkList.pop_back_val();
DEBUG(llvm::dbgs() << "PROCESS " << printCtx.getID(BB)
<< " with Gen/Kill.\n");
HandledBBs.erase(BB);
// Intersection.
BlockState &Forwarder = getBlockState(BB);
// Compute the ForwardSetIn at the beginning of the basic block.
Forwarder.mergePredecessorAvailSet(*this);
if (Forwarder.processBasicBlockWithGenKillSet()) {
for (auto &X : BB->getSuccessors()) {
// We do not push basic block into the worklist if its already
// in the worklist.
if (HandledBBs.find(X) != HandledBBs.end())
continue;
WorkList.push_back(X);
}
}
DEBUG(Forwarder.dump(*this));
}
}
void RLEContext::processBasicBlocksForAvailValue() {
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
DEBUG(llvm::dbgs() << "PROCESS " << printCtx.getID(BB)
<< " for available.\n");
BlockState &Forwarder = getBlockState(BB);
// Merge the predecessors. After merging, BlockState now contains
// lists of available LSLocations and their values that reach the
// beginning of the basic block along all paths.
Forwarder.mergePredecessorAvailSetAndValue(*this);
// Merge duplicate loads, and forward stores to
// loads. We also update lists of stores|loads to reflect the end
// of the basic block.
Forwarder.processBasicBlockWithKind(*this, RLEKind::ComputeAvailValue);
// Update the locations with available values. We do not need to update
// the available BitVector here as they should have been initialized and
// stabilized in the processBasicBlocksWithGenKillSet.
Forwarder.updateForwardValOut();
DEBUG(Forwarder.dump(*this));
}
}
void RLEContext::processBasicBlocksForRLE(bool Optimistic) {
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
DEBUG(llvm::dbgs() << "PROCESS " << printCtx.getID(BB) << " for RLE.\n");
// If we know this is not a one iteration function which means its
// forward sets have been computed and converged,
// and this basic block does not even have LoadInsts, there is no point
// in processing every instruction in the basic block again as no store
// will be eliminated.
if (Optimistic && BBWithLoads.find(BB) == BBWithLoads.end())
continue;
BlockState &Forwarder = getBlockState(BB);
// Merge the predecessors. After merging, BlockState now contains
// lists of available LSLocations and their values that reach the
// beginning of the basic block along all paths.
Forwarder.mergePredecessorAvailSetAndValue(*this);
DEBUG(Forwarder.dump(*this));
// Perform the actual redundant load elimination.
Forwarder.processBasicBlockWithKind(*this, RLEKind::PerformRLE);
// If this is not a one iteration data flow, then the forward sets
// have been computed.
if (Optimistic)
continue;
// Update the locations with available values and their values.
Forwarder.updateForwardSetOut();
Forwarder.updateForwardValOut();
}
}
void RLEContext::runIterativeRLE() {
// Generate the genset and killset for every basic block.
processBasicBlocksForGenKillSet();
// Process basic blocks in RPO. After the data flow converges, run last
// iteration and perform load forwarding.
processBasicBlocksWithGenKillSet();
// We have computed the available value bit, now go through every basic
// block and compute the forwarding value locally. This is necessary as
// when we perform the RLE in the last iteration, we must handle loops, i.e.
// predecessor blocks which have not been processed when a basic block is
// processed.
processBasicBlocksForAvailValue();
}
bool RLEContext::run() {
// We perform redundant load elimination in the following phases.
//
// Phase 1. Compute the genset and killset for every basic block.
//
// Phase 2. Use an iterative data flow to compute whether there is an
// available value at a given point, we do not yet care about what the value
// is.
//
// Phase 3. we compute the real forwardable value at a given point.
//
// Phase 4. we perform the redundant load elimination.
// Walk over the function and find all the locations accessed by
// this function.
std::pair<int, int> LSCount = std::make_pair(0, 0);
LSLocation::enumerateLSLocations(*Fn, LocationVault,
LocToBitIndex,
BaseToLocIndex, TE,
LSCount);
// Check how to optimize this function.
ProcessKind Kind = getProcessFunctionKind(LSCount.first, LSCount.second);
// We do not optimize this function at all.
if (Kind == ProcessKind::ProcessNone)
return false;
// Do we run a multi-iteration data flow ?
bool Optimistic = Kind == ProcessKind::ProcessMultipleIterations ?
true : false;
// These are a list of basic blocks that we actually processed.
// We do not process unreachable block, instead we set their liveouts to nil.
llvm::DenseSet<SILBasicBlock *> BBToProcess;
for (auto X : PO->getPostOrder())
BBToProcess.insert(X);
// For all basic blocks in the function, initialize a BB state. Since we
// know all the locations accessed in this function, we can resize the bit
// vector to the appropriate size.
for (auto &B : *Fn) {
BBToLocState[&B] = BlockState();
BBToLocState[&B].init(&B, LocationVault.size(), Optimistic &&
BBToProcess.find(&B) != BBToProcess.end());
}
DEBUG(for (unsigned i = 0; i < LocationVault.size(); ++i) {
llvm::dbgs() << "LSLocation #" << i;
getLocation(i).print(llvm::dbgs(), &Fn->getModule());
});
if (Optimistic)
runIterativeRLE();
// We have the available value bit computed and the local forwarding value.
// Set up the load forwarding.
processBasicBlocksForRLE(Optimistic);
// Finally, perform the redundant load replacements.
llvm::DenseSet<SILInstruction *> InstsToDelete;
bool SILChanged = false;
for (auto &B : *Fn) {
auto &State = BBToLocState[&B];
auto &Loads = State.getRL();
// Nothing to forward.
if (Loads.empty())
continue;
// We iterate the instructions in the basic block in a deterministic order
// and use this order to perform the load forwarding.
//
// NOTE: we could end up with different SIL depending on the ordering load
// forwarding is performed.
for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) {
auto Iter = Loads.find(&*I);
if (Iter == Loads.end())
continue;
DEBUG(llvm::dbgs() << "Replacing " << SILValue(Iter->first) << "With "
<< Iter->second);
SILChanged = true;
Iter->first->replaceAllUsesWith(Iter->second);
InstsToDelete.insert(Iter->first);
++NumForwardedLoads;
}
}
// Erase the instructions recursively, this way, we get rid of pass
// dependence on DCE.
for (auto &X : InstsToDelete) {
// It is possible that the instruction still has uses, because it could be
// used as the replacement Value, i.e. F.second, for some other RLE pairs.
//
// TODO: we should fix this, otherwise we are missing RLE opportunities.
if (!X->use_empty())
continue;
recursivelyDeleteTriviallyDeadInstructions(X, true);
}
return SILChanged;
}
//===----------------------------------------------------------------------===//
// Top Level Entry Point
//===----------------------------------------------------------------------===//
namespace {
class RedundantLoadElimination : public SILFunctionTransform {
StringRef getName() override { return "SIL Redundant Load Elimination"; }
/// The entry point to the transformation.
void run() override {
SILFunction *F = getFunction();
DEBUG(llvm::dbgs() << "*** RLE on function: " << F->getName() << " ***\n");
auto *AA = PM->getAnalysis<AliasAnalysis>();
auto *TE = PM->getAnalysis<TypeExpansionAnalysis>();
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
auto *EAFI = PM->getAnalysis<EpilogueARCAnalysis>()->get(F);
RLEContext RLE(F, PM, AA, TE, PO, EAFI);
if (RLE.run()) {
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
}
}
};
} // end anonymous namespace
SILTransform *swift::createRedundantLoadElimination() {
return new RedundantLoadElimination();
}