//===--- ARCCodeMotion.cpp - SIL ARC Code Motion --------------------------===//
// This source file is part of the open source project
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
// See for license information
// See for the list of Swift project authors
/// \file
/// This pass moves retains down and releases up. This, hopefully, will help
/// ARC sequence opt to remove retain and release pairs without worrying too
/// much about control flows.
/// It uses an optimistic iterative data flow to compute where to insert the
/// retains and releases for every reference-counted root. It then removes all
/// the old retain and release instructions and create the new ones.
/// This pass is more sophisticated than SILCodeMotion, as arc optimizations
/// can be very beneficial, use an optimistic global data flow to achieve
/// optimality.
/// Proof of Correctness:
/// -------------------
/// 1. Retains are blocked by MayDecrements. Its straightforward to prove that
/// retain sinking is correct.
/// If a retain is sunk from Region A to Region B, that means there is no
/// blocking operation between where the retain was in Region A to where it is
/// sunk to in Region B. Since we only sink retains (we do not move any other
/// instructions) which themselves are NOT MayDecrement operations, and moving
/// retains can't turn non-decrement instruction MayDecrement.
/// 2. Releases are blocked by MayInterfere. If a release is hoisted from
/// Region B to Region A, that means there is no blocking operation from where
/// the release was in Region B and where the release is hoisted to in Region A.
/// The question is whether we can introduce such operation while we hoist
/// other releases. The answer is NO. because if such releases exist, they
/// would be blocked by the old release (we remove old release and recreate new
/// ones at the end of the pass) and will not be able to be hoisted beyond the
/// old release.
/// This proof also hinges on the fact that if release A interferes with
/// releases B then release B must interfere with release A. i.e. the 2
/// releases must have the symmetric property. Consider the 2 releases as 2
/// function calls, i.e. CallA (release A) and CallB (release B), if CallA
/// interferes with CallB, that means CallA must share some program states
/// (through read or write) with CallB. Then it is not possible for CallB
/// to not share any states with CallA. And if they do share states, then
/// its not possible for CallB to block CallA and CallA not to block CallB.
/// TODO: Sinking retains can block releases to be hoisted, and hoisting
/// releases can block retains to be sunk. Investigate when to sink retains and
/// when to hoist releases and their ordering in the pass pipeline.
/// TODO: Consider doing retain hoisting and release sinking. This can help
/// to discover disjoint lifetimes and we can try to stitch them together.
/// TODO: There are a lot of code duplications between retain and release code
/// motion in the data flow part. Consider whether we can share them.
/// Essentially, we can implement the release code motion by inverting the
/// retain code motion, but this can also make the code less readable.
#define DEBUG_TYPE "sil-rr-code-motion"
#include "swift/SIL/SILBuilder.h"
#include "swift/SILOptimizer/Analysis/AliasAnalysis.h"
#include "swift/SILOptimizer/Analysis/ARCAnalysis.h"
#include "swift/SILOptimizer/Analysis/EscapeAnalysis.h"
#include "swift/SILOptimizer/Analysis/PostOrderAnalysis.h"
#include "swift/SILOptimizer/Analysis/RCIdentityAnalysis.h"
#include "swift/SILOptimizer/PassManager/Passes.h"
#include "swift/SILOptimizer/PassManager/Transforms.h"
#include "swift/SILOptimizer/Utils/CFG.h"
#include "swift/SILOptimizer/Utils/Local.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace swift;
STATISTIC(NumRetainsSunk, "Number of retains sunk");
STATISTIC(NumReleasesHoisted, "Number of releases hoisted");
llvm::cl::opt<bool> DisableARCCodeMotion("disable-arc-cm", llvm::cl::init(false));
/// Disable optimization if we have to break critical edges in the function.
DisableIfWithCriticalEdge("disable-with-critical-edge", llvm::cl::init(false));
// Block State
struct BlockState {
/// A bit vector for which the ith bit represents the ith refcounted root in
/// RCRootVault.
/// NOTE: we could do the data flow with BBSetIn or BBSetOut, but that would
/// require us to create a temporary copy to check whether the BBSet has
/// changed after the genset and killset has been applied.
llvm::SmallBitVector BBSetIn;
/// A bit vector for which the ith bit represents the ith refcounted root in
/// RCRootVault.
llvm::SmallBitVector BBSetOut;
/// A bit vector for which the ith bit represents the ith refcounted root in
/// RCRootVault. If the bit is set, that means this basic block creates a
/// retain which can be sunk or a release which can be hoisted.
llvm::SmallBitVector BBGenSet;
/// A bit vector for which the ith bit represents the ith refcounted root in
/// RCRootVault. If this bit is set, that means this basic block stops retain
/// or release of the refcounted root to be moved across.
llvm::SmallBitVector BBKillSet;
/// A bit vector for which the ith bit represents the ith refcounted root in
/// RCRootVault. If this bit is set, that means this is potentially a retain
/// or release that can be sunk or hoisted to this point. This is used to
/// optimize the time for computing genset and killset.
/// NOTE: this vector contains an approximation of whether there will be a
/// retain or release to a certain point of a basic block.
llvm::SmallBitVector BBMaxSet;
/// CodeMotionContext - This is the base class which retain code motion and
/// release code motion inherits from. It defines an interface as to how the
/// code motion procedure should be.
class CodeMotionContext {
/// Dataflow needs multiple iteration to converge. If this is false, then we
/// do not need to generate the genset or killset, i.e. we can simply do 1
/// pessimistic data flow iteration.
bool MultiIteration;
/// The allocator we are currently using.
llvm::SpecificBumpPtrAllocator<BlockState> &BPA;
/// Current function we are analyzing.
SILFunction *F;
/// Current post-order we are using.
PostOrderFunctionInfo *PO;
/// Current alias analysis we are using.
AliasAnalysis *AA;
/// Current rc-identity we are using.
RCIdentityFunctionInfo *RCFI;
/// All the unique refcount roots retained or released in the function.
llvm::SetVector<SILValue> RCRootVault;
/// Contains a map between RC roots to their index in the RCRootVault.
/// used to facilitate fast RC roots to index lookup.
llvm::DenseMap<SILValue, unsigned> RCRootIndex;
/// All the retains or releases originally in the function. Eventually
/// they will all be removed after all the new ones are generated.
llvm::SmallPtrSet<SILInstruction *, 8> RCInstructions;
/// All the places to place the new retains or releases after code motion.
using InsertPointList = llvm::SmallVector<SILInstruction *, 2>;
llvm::SmallDenseMap<SILValue, InsertPointList> InsertPoints;
/// These are the blocks that have an RC instruction to process or it blocks
/// some RC instructions. If the basic block has neither, we do not need to
/// process the block again in the last iteration. We populate this set when
/// we compute the genset and killset.
llvm::SmallPtrSet<SILBasicBlock *, 8> InterestBlocks;
/// Return the rc-identity root of the SILValue.
SILValue getRCRoot(SILValue R) {
return RCFI->getRCIdentityRoot(R);
/// Return the rc-identity root of the RC instruction, i.e.
/// retain or release.
SILValue getRCRoot(SILInstruction *I) {
assert(isRetainInstruction(I) || isReleaseInstruction(I) &&
"Extracting RC root from invalid instruction");
return getRCRoot(I->getOperand(0));
/// Constructor.
CodeMotionContext(llvm::SpecificBumpPtrAllocator<BlockState> &BPA,
SILFunction *F,
PostOrderFunctionInfo *PO, AliasAnalysis *AA,
RCIdentityFunctionInfo *RCFI)
: MultiIteration(true), BPA(BPA), F(F), PO(PO), AA(AA), RCFI(RCFI) {}
/// virtual destructor.
virtual ~CodeMotionContext() {}
/// Run the data flow to move retains and releases.
bool run();
/// Check whether we need to run an optimistic iteration data flow.
/// or a pessimistic would suffice.
virtual bool requireIteration() = 0;
/// Initialize necessary things to run the iterative data flow.
virtual void initializeCodeMotionDataFlow() = 0;
/// Initialize the basic block maximum refcounted set.
virtual void initializeCodeMotionBBMaxSet() = 0;
/// Compute the genset and killset for every root in every basic block.
virtual void computeCodeMotionGenKillSet() = 0;
/// Run the iterative data flow to converge.
virtual void convergeCodeMotionDataFlow() = 0;
/// Use the data flow results, come up with places to insert the new inst.
virtual void computeCodeMotionInsertPoints() = 0;
/// Remove the old retains and create the new *moved* refcounted instructions
virtual bool performCodeMotion() = 0;
/// Merge the data flow states.
virtual void mergeBBDataFlowStates(SILBasicBlock *BB) = 0;
/// Compute the BBSetIn and BBSetOut for the current basic
/// block with the generated gen and kill set.
virtual bool processBBWithGenKillSet(SILBasicBlock *BB) = 0;
/// Return true if the instruction blocks the Ptr to be moved further.
virtual bool mayBlockCodeMotion(SILInstruction *II, SILValue Ptr) = 0;
bool CodeMotionContext::run() {
// Initialize the data flow.
// Converge the BBSetOut with iterative data flow.
if (MultiIteration) {
// Compute the insertion point where each RC root can be moved to.
// Finally, generate new retains and remove the old retains.
return performCodeMotion();
// Retain Code Motion
class RetainBlockState : public BlockState {
/// Check whether the BBSetOut has changed. If it does, we need to rerun
/// the data flow on this block's successors to reach fixed point.
bool updateBBSetOut(llvm::SmallBitVector &X) {
if (BBSetOut == X)
return false;
BBSetOut = X;
return true;
/// constructor.
RetainBlockState(bool IsEntry, unsigned size, bool MultiIteration) {
// Iterative forward data flow.
BBSetIn.resize(size, false);
// Initialize to true if we are running optimistic data flow, i.e.
// MultiIteration is true.
BBSetOut.resize(size, MultiIteration);
BBMaxSet.resize(size, !IsEntry && MultiIteration);
// Genset and Killset are initially empty.
BBGenSet.resize(size, false);
BBKillSet.resize(size, false);
/// RetainCodeMotionContext - Context to perform retain code motion.
class RetainCodeMotionContext : public CodeMotionContext {
/// All the retain block state for all the basic blocks in the function.
llvm::SmallDenseMap<SILBasicBlock *, RetainBlockState *> BlockStates;
/// Return true if the instruction blocks the Ptr to be moved further.
bool mayBlockCodeMotion(SILInstruction *II, SILValue Ptr) override {
// NOTE: If more checks are to be added, place the most expensive in the
// end, this function is called many times.
// These terminator instructions block.
if (isa<ReturnInst>(II) || isa<ThrowInst>(II) || isa<UnreachableInst>(II))
return true;
// Identical RC root blocks code motion, we will be able to move this retain
// further once we move the blocking retain.
if (isRetainInstruction(II) && getRCRoot(II) == Ptr)
return true;
// Ref count checks do not have side effects, but are barriers for retains.
if (mayCheckRefCount(II))
return true;
// mayDecrement reference count stops code motion.
if (mayDecrementRefCount(II, Ptr, AA))
return true;
// This instruction does not block the retain code motion.
return false;
/// Return the previous instruction if it happens to be a retain with the
/// given RC root, nullptr otherwise.
SILInstruction *getPrevReusableInst(SILInstruction *I, SILValue Root) {
if (&*I->getParent()->begin() == I)
return nullptr;
auto Prev = &*std::prev(SILBasicBlock::iterator(I));
if (isRetainInstruction(Prev) && getRCRoot(Prev) == Root)
return Prev;
return nullptr;
/// Constructor.
RetainCodeMotionContext(llvm::SpecificBumpPtrAllocator<BlockState> &BPA,
SILFunction *F, PostOrderFunctionInfo *PO,
AliasAnalysis *AA, RCIdentityFunctionInfo *RCFI)
: CodeMotionContext(BPA, F, PO, AA, RCFI) {
MultiIteration = requireIteration();
/// virtual destructor.
~RetainCodeMotionContext() override {}
/// Return true if we do not need optimistic data flow.
bool requireIteration() override;
/// Initialize necessary things to run the iterative data flow.
void initializeCodeMotionDataFlow() override;
/// Initialize the basic block maximum refcounted set.
void initializeCodeMotionBBMaxSet() override;
/// Compute the genset and killset for every root in every basic block.
void computeCodeMotionGenKillSet() override;
/// Run the iterative data flow to converge.
void convergeCodeMotionDataFlow() override;
/// Use the data flow results, come up with places to insert the new inst.
void computeCodeMotionInsertPoints() override;
/// Remove the old retains and create the new *moved* refcounted instructions
bool performCodeMotion() override;
/// Compute the BBSetIn and BBSetOut for the current basic block with the
/// generated gen and kill set.
bool processBBWithGenKillSet(SILBasicBlock *BB) override;
/// Merge the data flow states.
void mergeBBDataFlowStates(SILBasicBlock *BB) override;
bool RetainCodeMotionContext::requireIteration() {
// If all basic blocks will have their predecessors processed if the basic
// blocks in the functions are iterated in reverse post order. Then this
// function can be processed in one iteration, i.e. no need to generate the
// genset and killset.
llvm::SmallPtrSet<SILBasicBlock *, 4> PBBs;
for (SILBasicBlock *B : PO->getReversePostOrder()) {
for (auto X : B->getPredecessorBlocks()) {
if (!PBBs.count(X))
return true;
return false;
void RetainCodeMotionContext::initializeCodeMotionDataFlow() {
// Find all the RC roots in the function.
for (auto &BB : *F) {
for (auto &II : BB) {
if (!isRetainInstruction(&II))
SILValue Root = getRCRoot(&II);
if (RCRootIndex.find(Root) != RCRootIndex.end())
RCRootIndex[Root] = RCRootVault.size();
// Initialize all the data flow bit vector for all basic blocks.
for (auto &BB : *F) {
BlockStates[&BB] = new (BPA.Allocate())
RetainBlockState(&BB == &*F->begin(),
RCRootVault.size(), MultiIteration);
void RetainCodeMotionContext::initializeCodeMotionBBMaxSet() {
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
// If basic block has no predecessor, do nothing.
BlockState *State = BlockStates[BB];
if (BB->pred_empty()) {
} else {
// Intersect in all predecessors' BBSetOut.
for (auto E = BB->pred_end(), I = BB->pred_begin(); I != E; ++I) {
State->BBMaxSet &= BlockStates[*I]->BBMaxSet;
// Process the instructions in the basic block to find what refcounted
// roots are retained. If we know that an RC root can't be retained at a
// basic block, then we know we do not need to consider it for the killset.
// NOTE: this is a conservative approximation, because some retains may be
// blocked before it reaches this block.
for (auto &II : *BB) {
if (!isRetainInstruction(&II))
void RetainCodeMotionContext::computeCodeMotionGenKillSet() {
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
auto *State = BlockStates[BB];
bool InterestBlock = false;
for (auto &I : *BB) {
// Check whether this instruction blocks any RC root code motion.
for (unsigned i = 0; i < RCRootVault.size(); ++i) {
if (!State->BBMaxSet.test(i) || !mayBlockCodeMotion(&I, RCRootVault[i]))
// This is a blocking instruction for the rcroot.
InterestBlock = true;
// If this is a retain instruction, it also generates.
if (isRetainInstruction(&I)) {
unsigned idx = RCRootIndex[getRCRoot(&I)];
assert(State->BBKillSet.test(idx) && "Killset computed incorrectly");
InterestBlock = true;
// Is this a block that is interesting to the last iteration of the data
// flow.
if (!InterestBlock)
bool RetainCodeMotionContext::performCodeMotion() {
bool Changed = false;
// Create the new retain instructions.
for (auto RC : RCRootVault) {
auto Iter = InsertPoints.find(RC);
if (Iter == InsertPoints.end())
for (auto IP : Iter->second) {
// we are about to insert a new retain instruction before the insertion
// point. Check if the previous instruction is reusable, reuse it, do
// not insert new instruction and delete old one.
if (auto I = getPrevReusableInst(IP, Iter->first)) {
createIncrementBefore(Iter->first, IP);
Changed = true;
// Remove the old retain instructions.
for (auto R : RCInstructions) {
recursivelyDeleteTriviallyDeadInstructions(R, true);
return Changed;
void RetainCodeMotionContext::mergeBBDataFlowStates(SILBasicBlock *BB) {
BlockState *State = BlockStates[BB];
// If basic block has no predecessor, simply reset and return.
if (BB->pred_empty())
// Intersect in all predecessors' BBSetOuts.
auto Iter = BB->pred_begin();
State->BBSetIn = BlockStates[*Iter]->BBSetOut;
Iter = std::next(Iter);
for (auto E = BB->pred_end(); Iter != E; ++Iter) {
State->BBSetIn &= BlockStates[*Iter]->BBSetOut;
bool RetainCodeMotionContext::processBBWithGenKillSet(SILBasicBlock *BB) {
RetainBlockState *State = BlockStates[BB];
// Compute the BBSetOut at the end of the basic block.
// Compute the BBSetIn at the beginning of the basic block.
State->BBSetIn |= State->BBGenSet;
// If BBSetIn changes, then keep iterating until reached a fixed point.
return State->updateBBSetOut(State->BBSetIn);
void RetainCodeMotionContext::convergeCodeMotionDataFlow() {
// Process each basic block with the genset and killset. Every time the
// BBSetOut of a basic block changes, the optimization is rerun on its
// successors.
llvm::SmallVector<SILBasicBlock *, 16> WorkList;
llvm::SmallPtrSet<SILBasicBlock *, 4> HandledBBs;
// Push into reverse post order so that we can pop from the back and get
// post order.
for (SILBasicBlock *B : PO->getReversePostOrder()) {
while (!WorkList.empty()) {
SILBasicBlock *BB = WorkList.pop_back_val();
if (processBBWithGenKillSet(BB)) {
for (auto &X : BB->getSuccessors()) {
// We do not push basic block into the worklist if its already
// in the worklist.
if (HandledBBs.count(X))
void RetainCodeMotionContext::computeCodeMotionInsertPoints() {
// The BBSetOuts have converged, run last iteration and figure out
// insertion point for each refcounted root.
for (SILBasicBlock *BB : PO->getReversePostOrder()) {
RetainBlockState *S = BlockStates[BB];
// Compute insertion point generated by the edge value transition.
// If there is a transition from 1 to 0, that means we have a partial
// merge, which means the retain can NOT be sunk to the current block,
// so place it at the end of the predecessors.
for (unsigned i = 0; i < RCRootVault.size(); ++i) {
if (S->BBSetIn[i])
for (auto Pred : BB->getPredecessorBlocks()) {
BlockState *PBB = BlockStates[Pred];
if (!PBB->BBSetOut[i])
// Is this block interesting. If we are sure this block does not generate
// retains nor does it block any retains (i.e. no insertion point will be
// created), we can skip it, as the BBSetOut has been converged if this is
// a multi-iteration function.
if (MultiIteration && !InterestBlocks.count(BB))
// Compute insertion point within the basic block. Process instructions in
// the basic block in reverse post-order fashion.
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
for (unsigned i = 0; i < RCRootVault.size(); ++i) {
if (!S->BBSetIn[i] || !mayBlockCodeMotion(&*I, RCRootVault[i]))
// If this is a retain instruction, it also generates.
if (isRetainInstruction(&*I)) {
// Lastly update the BBSetOut, only necessary when we are running a single
// iteration dataflow.
if (!MultiIteration) {
// Release Code Motion
class ReleaseBlockState : public BlockState {
/// Check whether the BBSetIn has changed. If it does, we need to rerun
/// the data flow on this block's predecessors to reach fixed point.
bool updateBBSetIn(llvm::SmallBitVector &X) {
if (BBSetIn == X)
return false;
BBSetIn = X;
return true;
/// constructor.
ReleaseBlockState(bool IsExit, unsigned size, bool MultiIteration) {
// backward data flow.
// Initialize to true if we are running optimistic data flow, i.e.
// MultiIteration is true.
BBSetIn.resize(size, MultiIteration);
BBSetOut.resize(size, false);
BBMaxSet.resize(size, !IsExit && MultiIteration);
// Genset and Killset are initially empty.
BBGenSet.resize(size, false);
BBKillSet.resize(size, false);
/// ReleaseCodeMotionContext - Context to perform release code motion.
class ReleaseCodeMotionContext : public CodeMotionContext {
/// All the release block state for all the basic blocks in the function.
llvm::SmallDenseMap<SILBasicBlock *, ReleaseBlockState *> BlockStates;
/// We are not moving epilogue releases.
bool FreezeEpilogueReleases;
/// The epilogue release matcher we are currently using.
ConsumedArgToEpilogueReleaseMatcher &ERM;
/// Return true if the instruction blocks the Ptr to be moved further.
bool mayBlockCodeMotion(SILInstruction *II, SILValue Ptr) override {
// NOTE: If more checks are to be added, place the most expensive in the end.
// This function is called many times.
// We can not move a release above the instruction that defines the
// released value.
if (II == Ptr)
return true;
// Identical RC root blocks code motion, we will be able to move this release
// further once we move the blocking release.
if (isReleaseInstruction(II) && getRCRoot(II) == Ptr)
return true;
// Stop at may interfere.
if (mayHaveSymmetricInterference(II, Ptr, AA))
return true;
// This instruction does not block the release.
return false;
/// Return the successor instruction if it happens to be a release with the
/// given RC root, nullptr otherwise.
SILInstruction *getPrevReusableInst(SILInstruction *I, SILValue Root) {
if (&*I->getParent()->begin() == I)
return nullptr;
auto Prev = &*std::prev(SILBasicBlock::iterator(I));
if (isReleaseInstruction(Prev) && getRCRoot(Prev) == Root)
return Prev;
return nullptr;
/// Constructor.
ReleaseCodeMotionContext(llvm::SpecificBumpPtrAllocator<BlockState> &BPA,
SILFunction *F, PostOrderFunctionInfo *PO,
AliasAnalysis *AA, RCIdentityFunctionInfo *RCFI,
bool FreezeEpilogueReleases,
ConsumedArgToEpilogueReleaseMatcher &ERM)
: CodeMotionContext(BPA, F, PO, AA, RCFI),
FreezeEpilogueReleases(FreezeEpilogueReleases), ERM(ERM) {
MultiIteration = requireIteration();
/// virtual destructor.
~ReleaseCodeMotionContext() override {}
/// Return true if the data flow can converge in 1 iteration.
bool requireIteration() override;
/// Initialize necessary things to run the iterative data flow.
void initializeCodeMotionDataFlow() override;
/// Initialize the basic block maximum refcounted set.
void initializeCodeMotionBBMaxSet() override;
/// Compute the genset and killset for every root in every basic block.
void computeCodeMotionGenKillSet() override;
/// Run the iterative data flow to converge.
void convergeCodeMotionDataFlow() override;
/// Use the data flow results, come up with places to insert the new inst.
void computeCodeMotionInsertPoints() override;
/// Remove the old retains and create the new *moved* refcounted instructions
bool performCodeMotion() override;
/// Compute the BBSetIn and BBSetOut for the current basic
/// block with the generated gen and kill set.
bool processBBWithGenKillSet(SILBasicBlock *BB) override;
/// Merge the data flow states.
void mergeBBDataFlowStates(SILBasicBlock *BB) override;
bool ReleaseCodeMotionContext::requireIteration() {
// If all basic blocks will have their successors processed if the basic
// blocks in the functions are iterated in post order. Then this function
// can be processed in one iteration, i.e. no need to generate the genset
// and killset.
llvm::SmallPtrSet<SILBasicBlock *, 4> PBBs;
for (SILBasicBlock *B : PO->getPostOrder()) {
for (auto &X : B->getSuccessors()) {
if (!PBBs.count(X))
return true;
return false;
void ReleaseCodeMotionContext::initializeCodeMotionDataFlow() {
// Find all the RC roots in the function.
for (auto &BB : *F) {
for (auto &II : BB) {
if (!isReleaseInstruction(&II))
// Do not try to enumerate if we are not hoisting epilogue releases.
if (FreezeEpilogueReleases && ERM.isEpilogueRelease(&II))
SILValue Root = getRCRoot(&II);
if (RCRootIndex.find(Root) != RCRootIndex.end())
RCRootIndex[Root] = RCRootVault.size();
// Initialize all the data flow bit vector for all basic blocks.
for (auto &BB : *F) {
BlockStates[&BB] = new (BPA.Allocate())
RCRootVault.size(), MultiIteration);
void ReleaseCodeMotionContext::initializeCodeMotionBBMaxSet() {
for (SILBasicBlock *BB : PO->getPostOrder()) {
// If basic block has no successor, do nothing.
BlockState *State = BlockStates[BB];
if (BB->succ_empty()) {
} else {
// Intersect in all successors' BBMaxOuts.
for (auto E = BB->succ_end(), I = BB->succ_begin(); I != E; ++I) {
State->BBMaxSet &= BlockStates[*I]->BBMaxSet;
// Process the instructions in the basic block to find what refcounted
// roots are released. If we know that an RC root can't be released at a
// basic block, then we know we do not need to consider it for the killset.
// NOTE: this is a conservative approximation, because some releases may be
// blocked before it reaches this block.
for (auto II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
if (!isReleaseInstruction(&*II))
void ReleaseCodeMotionContext::computeCodeMotionGenKillSet() {
for (SILBasicBlock *BB : PO->getPostOrder()) {
auto *State = BlockStates[BB];
bool InterestBlock = false;
for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
// Check whether this instruction blocks any RC root code motion.
for (unsigned i = 0; i < RCRootVault.size(); ++i) {
if (!State->BBMaxSet.test(i) || !mayBlockCodeMotion(&*I, RCRootVault[i]))
// This instruction blocks this RC root.
InterestBlock = true;
// If this is an epilogue release and we are freezing epilogue release
// simply continue.
if (FreezeEpilogueReleases && ERM.isEpilogueRelease(&*I))
// If this is a release instruction, it also generates.
if (isReleaseInstruction(&*I)) {
unsigned idx = RCRootIndex[getRCRoot(&*I)];
assert(State->BBKillSet.test(idx) && "Killset computed incorrectly");
InterestBlock = true;
// Handle SILArgument, SILArgument can invalidate.
for (unsigned i = 0; i < RCRootVault.size(); ++i) {
auto *A = dyn_cast<SILArgument>(RCRootVault[i]);
if (!A || A->getParent() != BB)
InterestBlock = true;
// Is this interesting to the last iteration of the data flow.
if (!InterestBlock)
void ReleaseCodeMotionContext::mergeBBDataFlowStates(SILBasicBlock *BB) {
BlockState *State = BlockStates[BB];
// If basic block has no successor, simply reset and return.
if (BB->succ_empty())
// Intersect in all successors' BBSetIn.
auto Iter = BB->succ_begin();
State->BBSetOut = BlockStates[*Iter]->BBSetIn;
Iter = std::next(Iter);
for (auto E = BB->succ_end(); Iter != E; ++Iter) {
State->BBSetOut &= BlockStates[*Iter]->BBSetIn;
bool ReleaseCodeMotionContext::performCodeMotion() {
bool Changed = false;
// Create the new releases at each anchor point.
for (auto RC : RCRootVault) {
auto Iter = InsertPoints.find(RC);
if (Iter == InsertPoints.end())
for (auto IP : Iter->second) {
// we are about to insert a new release instruction before the insertion
// point. Check if the successor instruction is reusable, reuse it, do
// not insert new instruction and delete old one.
if (auto I = getPrevReusableInst(IP, Iter->first)) {
createDecrementBefore(Iter->first, IP);
Changed = true;
// Remove the old release instructions.
for (auto R : RCInstructions) {
recursivelyDeleteTriviallyDeadInstructions(R, true);
return Changed;
bool ReleaseCodeMotionContext::processBBWithGenKillSet(SILBasicBlock *BB) {
ReleaseBlockState *State = BlockStates[BB];
// Compute the BBSetOut at the end of the basic block.
// Compute the BBSetIn at the beginning of the basic block.
State->BBSetOut |= State->BBGenSet;
// If BBSetIn changes, then keep iterating until reached a fixed point.
return State->updateBBSetIn(State->BBSetOut);
void ReleaseCodeMotionContext::convergeCodeMotionDataFlow() {
// Process each basic block with the gen and kill set. Every time the
// BBSetIn of a basic block changes, the optimization is rerun on its
// predecessors.
llvm::SmallVector<SILBasicBlock *, 16> WorkList;
llvm::SmallPtrSet<SILBasicBlock *, 8> HandledBBs;
// Push into reverse post order so that we can pop from the back and get
// post order.
for (SILBasicBlock *B : PO->getPostOrder()) {
while (!WorkList.empty()) {
SILBasicBlock *BB = WorkList.pop_back_val();
if (processBBWithGenKillSet(BB)) {
for (auto X : BB->getPredecessorBlocks()) {
// We do not push basic block into the worklist if its already
// in the worklist.
if (HandledBBs.count(X))
void ReleaseCodeMotionContext::computeCodeMotionInsertPoints() {
// The BBSetIns have converged, run last iteration and figure out insertion
// point for each RC root.
for (SILBasicBlock *BB : PO->getPostOrder()) {
// Intersect in the successor BBSetIns.
ReleaseBlockState *S = BlockStates[BB];
// Compute insertion point generated by the edge value transition.
// If there is a transition from 1 to 0, that means we have a partial
// merge, which means the release can NOT be hoisted to the current block.
// place it at the successors.
for (unsigned i = 0; i < RCRootVault.size(); ++i) {
if (S->BBSetOut[i])
for (auto &Succ : BB->getSuccessors()) {
BlockState *SBB = BlockStates[Succ];
if (!SBB->BBSetIn[i])
// Is this block interesting ?
if (MultiIteration && !InterestBlocks.count(BB))
// Compute insertion point generated by MayUse terminator inst.
// If terminator instruction can block the RC root. We will have no
// choice but to anchor the release instructions in the successor blocks.
for (unsigned i = 0; i < RCRootVault.size(); ++i) {
SILInstruction *Term = BB->getTerminator();
if (!S->BBSetOut[i] || !mayBlockCodeMotion(Term, RCRootVault[i]))
for (auto &Succ : BB->getSuccessors()) {
BlockState *SBB = BlockStates[Succ];
if (!SBB->BBSetIn[i])
// Compute insertion point generated within the basic block. Process
// instructions in post-order fashion.
for (auto I = std::next(BB->rbegin()), E = BB->rend(); I != E; ++I) {
for (unsigned i = 0; i < RCRootVault.size(); ++i) {
if (!S->BBSetOut[i] || !mayBlockCodeMotion(&*I, RCRootVault[i]))
auto *InsertPt = &*std::next(SILBasicBlock::iterator(&*I));
// If we are freezing this epilogue release. Simply continue.
if (FreezeEpilogueReleases && ERM.isEpilogueRelease(&*I))
// This release generates.
if (isReleaseInstruction(&*I)) {
// Compute insertion point generated by SILArgument. SILArgument blocks if
// it defines the released value.
for (unsigned i = 0; i < RCRootVault.size(); ++i) {
if (!S->BBSetOut[i])
auto *A = dyn_cast<SILArgument>(RCRootVault[i]);
if (!A || A->getParent() != BB)
// Lastly update the BBSetIn, only necessary when we are running a single
// iteration dataflow.
if (!MultiIteration) {
// Top Level Entry Point
namespace {
/// Code motion kind.
enum CodeMotionKind : unsigned { Retain = 0, Release = 1};
class ARCCodeMotion : public SILFunctionTransform {
/// Whether to hoist releases or sink retains.
CodeMotionKind Kind;
/// Freeze epilogue release or not.
bool FreezeEpilogueReleases;
/// Constructor.
ARCCodeMotion(CodeMotionKind H, bool F) : Kind(H), FreezeEpilogueReleases(F) {}
/// The entry point to the transformation.
void run() override {
// Code motion disabled.
if (DisableARCCodeMotion)
// Respect function no.optimize.
SILFunction *F = getFunction();
if (!F->shouldOptimize())
// Return if there is critical edge and we are disabling critical edge
// splitting.
if (DisableIfWithCriticalEdge && hasCriticalEdges(*F, false))
DEBUG(llvm::dbgs() << "*** ARCCM on function: " << F->getName() << " ***\n");
// Split all critical edges.
// TODO: maybe we can do this lazily or maybe we should disallow SIL passes
// to create critical edges.
bool EdgeChanged = splitAllCriticalEdges(*F, false, nullptr, nullptr);
llvm::SpecificBumpPtrAllocator<BlockState> BPA;
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
auto *AA = PM->getAnalysis<AliasAnalysis>();
auto *RCFI = PM->getAnalysis<RCIdentityAnalysis>()->get(F);
bool InstChanged = false;
if (Kind == Release) {
// TODO: we should consider Throw block as well, or better we should
// abstract the Return block or Throw block away in the matcher.
SILArgumentConvention Conv[] = {SILArgumentConvention::Direct_Owned};
ConsumedArgToEpilogueReleaseMatcher ERM(RCFI, F,
ReleaseCodeMotionContext RelCM(BPA, F, PO, AA, RCFI,
FreezeEpilogueReleases, ERM);
// Run release hoisting.
InstChanged |=;
} else {
RetainCodeMotionContext RetCM(BPA, F, PO, AA, RCFI);
// Run retain sinking.
InstChanged |=;
if (EdgeChanged) {
// We splitted critical edges.
if (InstChanged) {
// We moved instructions.
} // end anonymous namespace
/// Sink Retains.
SILTransform *swift::createRetainSinking() {
return new ARCCodeMotion(CodeMotionKind::Retain, false);
/// Hoist releases, but not epilogue release. ASO relies on epilogue releases
/// to prove knownsafety on enclosed releases.
SILTransform *swift::createReleaseHoisting() {
return new ARCCodeMotion(CodeMotionKind::Release, true);
/// Hoist all releases.
SILTransform *swift::createLateReleaseHoisting() {
return new ARCCodeMotion(CodeMotionKind::Release, false);