blob: f39998cca56fe5d02385bd957950eef27c72cfae [file] [log] [blame] [edit]
//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Analysis/Analyses/LifetimeSafety.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Expr.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/AST/Type.h"
#include "clang/Analysis/Analyses/PostOrderCFGView.h"
#include "clang/Analysis/AnalysisDeclContext.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/ImmutableMap.h"
#include "llvm/ADT/ImmutableSet.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TimeProfiler.h"
#include <cstdint>
#include <memory>
namespace clang::lifetimes {
namespace internal {
/// Represents the storage location being borrowed, e.g., a specific stack
/// variable.
/// TODO: Model access paths of other types, e.g., s.field, heap and globals.
struct AccessPath {
const clang::ValueDecl *D;
AccessPath(const clang::ValueDecl *D) : D(D) {}
};
/// Information about a single borrow, or "Loan". A loan is created when a
/// reference or pointer is created.
struct Loan {
/// TODO: Represent opaque loans.
/// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it
/// is represented as empty LoanSet
LoanID ID;
AccessPath Path;
SourceLocation IssueLoc;
Loan(LoanID id, AccessPath path, SourceLocation loc)
: ID(id), Path(path), IssueLoc(loc) {}
};
/// An Origin is a symbolic identifier that represents the set of possible
/// loans a pointer-like object could hold at any given time.
/// TODO: Enhance the origin model to handle complex types, pointer
/// indirection and reborrowing. The plan is to move from a single origin per
/// variable/expression to a "list of origins" governed by the Type.
/// For example, the type 'int**' would have two origins.
/// See discussion:
/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238
struct Origin {
OriginID ID;
/// A pointer to the AST node that this origin represents. This union
/// distinguishes between origins from declarations (variables or parameters)
/// and origins from expressions.
llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr;
Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {}
Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {}
const clang::ValueDecl *getDecl() const {
return Ptr.dyn_cast<const clang::ValueDecl *>();
}
const clang::Expr *getExpr() const {
return Ptr.dyn_cast<const clang::Expr *>();
}
};
/// Manages the creation, storage and retrieval of loans.
class LoanManager {
public:
LoanManager() = default;
Loan &addLoan(AccessPath Path, SourceLocation Loc) {
AllLoans.emplace_back(getNextLoanID(), Path, Loc);
return AllLoans.back();
}
const Loan &getLoan(LoanID ID) const {
assert(ID.Value < AllLoans.size());
return AllLoans[ID.Value];
}
llvm::ArrayRef<Loan> getLoans() const { return AllLoans; }
private:
LoanID getNextLoanID() { return NextLoanID++; }
LoanID NextLoanID{0};
/// TODO(opt): Profile and evaluate the usefullness of small buffer
/// optimisation.
llvm::SmallVector<Loan> AllLoans;
};
/// Manages the creation, storage, and retrieval of origins for pointer-like
/// variables and expressions.
class OriginManager {
public:
OriginManager() = default;
Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) {
AllOrigins.emplace_back(ID, &D);
return AllOrigins.back();
}
Origin &addOrigin(OriginID ID, const clang::Expr &E) {
AllOrigins.emplace_back(ID, &E);
return AllOrigins.back();
}
OriginID get(const Expr &E) {
// Origin of DeclRefExpr is that of the declaration it refers to.
if (const auto *DRE = dyn_cast<DeclRefExpr>(&E))
return get(*DRE->getDecl());
auto It = ExprToOriginID.find(&E);
// TODO: This should be an assert(It != ExprToOriginID.end()). The current
// implementation falls back to getOrCreate to avoid crashing on
// yet-unhandled pointer expressions, creating an empty origin for them.
if (It == ExprToOriginID.end())
return getOrCreate(E);
return It->second;
}
OriginID get(const ValueDecl &D) {
auto It = DeclToOriginID.find(&D);
// TODO: This should be an assert(It != DeclToOriginID.end()). The current
// implementation falls back to getOrCreate to avoid crashing on
// yet-unhandled pointer expressions, creating an empty origin for them.
if (It == DeclToOriginID.end())
return getOrCreate(D);
return It->second;
}
OriginID getOrCreate(const Expr &E) {
auto It = ExprToOriginID.find(&E);
if (It != ExprToOriginID.end())
return It->second;
if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) {
// Origin of DeclRefExpr is that of the declaration it refers to.
return getOrCreate(*DRE->getDecl());
}
OriginID NewID = getNextOriginID();
addOrigin(NewID, E);
ExprToOriginID[&E] = NewID;
return NewID;
}
const Origin &getOrigin(OriginID ID) const {
assert(ID.Value < AllOrigins.size());
return AllOrigins[ID.Value];
}
llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; }
OriginID getOrCreate(const ValueDecl &D) {
auto It = DeclToOriginID.find(&D);
if (It != DeclToOriginID.end())
return It->second;
OriginID NewID = getNextOriginID();
addOrigin(NewID, D);
DeclToOriginID[&D] = NewID;
return NewID;
}
private:
OriginID getNextOriginID() { return NextOriginID++; }
OriginID NextOriginID{0};
/// TODO(opt): Profile and evaluate the usefullness of small buffer
/// optimisation.
llvm::SmallVector<Origin> AllOrigins;
llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID;
llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID;
};
/// An abstract base class for a single, atomic lifetime-relevant event.
class Fact {
public:
enum class Kind : uint8_t {
/// A new loan is issued from a borrow expression (e.g., &x).
Issue,
/// A loan expires as its underlying storage is freed (e.g., variable goes
/// out of scope).
Expire,
/// An origin is propagated from a source to a destination (e.g., p = q).
AssignOrigin,
/// An origin escapes the function by flowing into the return value.
ReturnOfOrigin,
/// A marker for a specific point in the code, for testing.
TestPoint,
};
private:
Kind K;
protected:
Fact(Kind K) : K(K) {}
public:
virtual ~Fact() = default;
Kind getKind() const { return K; }
template <typename T> const T *getAs() const {
if (T::classof(this))
return static_cast<const T *>(this);
return nullptr;
}
virtual void dump(llvm::raw_ostream &OS) const {
OS << "Fact (Kind: " << static_cast<int>(K) << ")\n";
}
};
class IssueFact : public Fact {
LoanID LID;
OriginID OID;
public:
static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; }
IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {}
LoanID getLoanID() const { return LID; }
OriginID getOriginID() const { return OID; }
void dump(llvm::raw_ostream &OS) const override {
OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID()
<< ")\n";
}
};
class ExpireFact : public Fact {
LoanID LID;
public:
static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; }
ExpireFact(LoanID LID) : Fact(Kind::Expire), LID(LID) {}
LoanID getLoanID() const { return LID; }
void dump(llvm::raw_ostream &OS) const override {
OS << "Expire (LoanID: " << getLoanID() << ")\n";
}
};
class AssignOriginFact : public Fact {
OriginID OIDDest;
OriginID OIDSrc;
public:
static bool classof(const Fact *F) {
return F->getKind() == Kind::AssignOrigin;
}
AssignOriginFact(OriginID OIDDest, OriginID OIDSrc)
: Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {}
OriginID getDestOriginID() const { return OIDDest; }
OriginID getSrcOriginID() const { return OIDSrc; }
void dump(llvm::raw_ostream &OS) const override {
OS << "AssignOrigin (DestID: " << getDestOriginID()
<< ", SrcID: " << getSrcOriginID() << ")\n";
}
};
class ReturnOfOriginFact : public Fact {
OriginID OID;
public:
static bool classof(const Fact *F) {
return F->getKind() == Kind::ReturnOfOrigin;
}
ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {}
OriginID getReturnedOriginID() const { return OID; }
void dump(llvm::raw_ostream &OS) const override {
OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n";
}
};
/// A dummy-fact used to mark a specific point in the code for testing.
/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast.
class TestPointFact : public Fact {
StringRef Annotation;
public:
static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; }
explicit TestPointFact(StringRef Annotation)
: Fact(Kind::TestPoint), Annotation(Annotation) {}
StringRef getAnnotation() const { return Annotation; }
void dump(llvm::raw_ostream &OS) const override {
OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n";
}
};
class FactManager {
public:
llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const {
auto It = BlockToFactsMap.find(B);
if (It != BlockToFactsMap.end())
return It->second;
return {};
}
void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) {
if (!NewFacts.empty())
BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end());
}
template <typename FactType, typename... Args>
FactType *createFact(Args &&...args) {
void *Mem = FactAllocator.Allocate<FactType>();
return new (Mem) FactType(std::forward<Args>(args)...);
}
void dump(const CFG &Cfg, AnalysisDeclContext &AC) const {
llvm::dbgs() << "==========================================\n";
llvm::dbgs() << " Lifetime Analysis Facts:\n";
llvm::dbgs() << "==========================================\n";
if (const Decl *D = AC.getDecl())
if (const auto *ND = dyn_cast<NamedDecl>(D))
llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n";
// Print blocks in the order as they appear in code for a stable ordering.
for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) {
llvm::dbgs() << " Block B" << B->getBlockID() << ":\n";
auto It = BlockToFactsMap.find(B);
if (It != BlockToFactsMap.end()) {
for (const Fact *F : It->second) {
llvm::dbgs() << " ";
F->dump(llvm::dbgs());
}
}
llvm::dbgs() << " End of Block\n";
}
}
LoanManager &getLoanMgr() { return LoanMgr; }
OriginManager &getOriginMgr() { return OriginMgr; }
private:
LoanManager LoanMgr;
OriginManager OriginMgr;
llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>>
BlockToFactsMap;
llvm::BumpPtrAllocator FactAllocator;
};
class FactGenerator : public ConstStmtVisitor<FactGenerator> {
using Base = ConstStmtVisitor<FactGenerator>;
public:
FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC)
: FactMgr(FactMgr), AC(AC) {}
void run() {
llvm::TimeTraceScope TimeProfile("FactGenerator");
// Iterate through the CFG blocks in reverse post-order to ensure that
// initializations and destructions are processed in the correct sequence.
for (const CFGBlock *Block : *AC.getAnalysis<PostOrderCFGView>()) {
CurrentBlockFacts.clear();
for (unsigned I = 0; I < Block->size(); ++I) {
const CFGElement &Element = Block->Elements[I];
if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>())
Visit(CS->getStmt());
else if (std::optional<CFGAutomaticObjDtor> DtorOpt =
Element.getAs<CFGAutomaticObjDtor>())
handleDestructor(*DtorOpt);
}
FactMgr.addBlockFacts(Block, CurrentBlockFacts);
}
}
void VisitDeclStmt(const DeclStmt *DS) {
for (const Decl *D : DS->decls())
if (const auto *VD = dyn_cast<VarDecl>(D))
if (hasOrigin(VD->getType()))
if (const Expr *InitExpr = VD->getInit())
addAssignOriginFact(*VD, *InitExpr);
}
void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) {
/// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized
/// pointers can use the same type of loan.
FactMgr.getOriginMgr().getOrCreate(*N);
}
void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
if (!hasOrigin(ICE->getType()))
return;
Visit(ICE->getSubExpr());
// An ImplicitCastExpr node itself gets an origin, which flows from the
// origin of its sub-expression (after stripping its own parens/casts).
// TODO: Consider if this is actually useful in practice. Alternatively, we
// could directly use the sub-expression's OriginID instead of creating a
// new one.
addAssignOriginFact(*ICE, *ICE->getSubExpr());
}
void VisitUnaryOperator(const UnaryOperator *UO) {
if (UO->getOpcode() == UO_AddrOf) {
const Expr *SubExpr = UO->getSubExpr();
if (const auto *DRE = dyn_cast<DeclRefExpr>(SubExpr)) {
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
// Check if it's a local variable.
if (VD->hasLocalStorage()) {
OriginID OID = FactMgr.getOriginMgr().getOrCreate(*UO);
AccessPath AddrOfLocalVarPath(VD);
const Loan &L = FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath,
UO->getOperatorLoc());
CurrentBlockFacts.push_back(
FactMgr.createFact<IssueFact>(L.ID, OID));
}
}
}
}
}
void VisitReturnStmt(const ReturnStmt *RS) {
if (const Expr *RetExpr = RS->getRetValue()) {
if (hasOrigin(RetExpr->getType())) {
OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr);
CurrentBlockFacts.push_back(
FactMgr.createFact<ReturnOfOriginFact>(OID));
}
}
}
void VisitBinaryOperator(const BinaryOperator *BO) {
if (BO->isAssignmentOp()) {
const Expr *LHSExpr = BO->getLHS();
const Expr *RHSExpr = BO->getRHS();
// We are interested in assignments like `ptr1 = ptr2` or `ptr = &var`
// LHS must be a pointer/reference type that can be an origin.
// RHS must also represent an origin (either another pointer/ref or an
// address-of).
if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(LHSExpr))
if (const auto *VD_LHS =
dyn_cast<ValueDecl>(DRE_LHS->getDecl()->getCanonicalDecl());
VD_LHS && hasOrigin(VD_LHS->getType()))
addAssignOriginFact(*VD_LHS, *RHSExpr);
}
}
void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) {
// Check if this is a test point marker. If so, we are done with this
// expression.
if (VisitTestPoint(FCE))
return;
// Visit as normal otherwise.
Base::VisitCXXFunctionalCastExpr(FCE);
}
private:
// Check if a type has an origin.
bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); }
template <typename Destination, typename Source>
void addAssignOriginFact(const Destination &D, const Source &S) {
OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D);
OriginID SrcOID = FactMgr.getOriginMgr().get(S);
CurrentBlockFacts.push_back(
FactMgr.createFact<AssignOriginFact>(DestOID, SrcOID));
}
void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) {
/// TODO: Also handle trivial destructors (e.g., for `int`
/// variables) which will never have a CFGAutomaticObjDtor node.
/// TODO: Handle loans to temporaries.
/// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the
/// lifetime ends.
const VarDecl *DestructedVD = DtorOpt.getVarDecl();
if (!DestructedVD)
return;
// Iterate through all loans to see if any expire.
/// TODO(opt): Do better than a linear search to find loans associated with
/// 'DestructedVD'.
for (const Loan &L : FactMgr.getLoanMgr().getLoans()) {
const AccessPath &LoanPath = L.Path;
// Check if the loan is for a stack variable and if that variable
// is the one being destructed.
if (LoanPath.D == DestructedVD)
CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>(L.ID));
}
}
/// Checks if the expression is a `void("__lifetime_test_point_...")` cast.
/// If so, creates a `TestPointFact` and returns true.
bool VisitTestPoint(const CXXFunctionalCastExpr *FCE) {
if (!FCE->getType()->isVoidType())
return false;
const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts();
if (const auto *SL = dyn_cast<StringLiteral>(SubExpr)) {
llvm::StringRef LiteralValue = SL->getString();
const std::string Prefix = "__lifetime_test_point_";
if (LiteralValue.starts_with(Prefix)) {
StringRef Annotation = LiteralValue.drop_front(Prefix.length());
CurrentBlockFacts.push_back(
FactMgr.createFact<TestPointFact>(Annotation));
return true;
}
}
return false;
}
FactManager &FactMgr;
AnalysisDeclContext &AC;
llvm::SmallVector<Fact *> CurrentBlockFacts;
};
// ========================================================================= //
// Generic Dataflow Analysis
// ========================================================================= //
enum class Direction { Forward, Backward };
/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific
/// `Fact`. identified by a lifetime-related event (`Fact`).
///
/// A `ProgramPoint` has "after" semantics: it represents the location
/// immediately after its corresponding `Fact`.
using ProgramPoint = const Fact *;
/// A generic, policy-based driver for dataflow analyses. It combines
/// the dataflow runner and the transferer logic into a single class hierarchy.
///
/// The derived class is expected to provide:
/// - A `Lattice` type.
/// - `StringRef getAnalysisName() const`
/// - `Lattice getInitialState();` The initial state of the analysis.
/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths.
/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single
/// lifetime-relevant `Fact` transforms the lattice state. Only overloads
/// for facts relevant to the analysis need to be implemented.
///
/// \tparam Derived The CRTP derived class that implements the specific
/// analysis.
/// \tparam LatticeType The dataflow lattice used by the analysis.
/// \tparam Dir The direction of the analysis (Forward or Backward).
/// TODO: Maybe use the dataflow framework! The framework might need changes
/// to support the current comparison done at block-entry.
template <typename Derived, typename LatticeType, Direction Dir>
class DataflowAnalysis {
public:
using Lattice = LatticeType;
using Base = DataflowAnalysis<Derived, Lattice, Dir>;
private:
const CFG &Cfg;
AnalysisDeclContext &AC;
/// The dataflow state before a basic block is processed.
llvm::DenseMap<const CFGBlock *, Lattice> InStates;
/// The dataflow state after a basic block is processed.
llvm::DenseMap<const CFGBlock *, Lattice> OutStates;
/// The dataflow state at a Program Point.
/// In a forward analysis, this is the state after the Fact at that point has
/// been applied, while in a backward analysis, it is the state before.
llvm::DenseMap<ProgramPoint, Lattice> PerPointStates;
static constexpr bool isForward() { return Dir == Direction::Forward; }
protected:
FactManager &AllFacts;
explicit DataflowAnalysis(const CFG &C, AnalysisDeclContext &AC,
FactManager &F)
: Cfg(C), AC(AC), AllFacts(F) {}
public:
void run() {
Derived &D = static_cast<Derived &>(*this);
llvm::TimeTraceScope Time(D.getAnalysisName());
using Worklist =
std::conditional_t<Dir == Direction::Forward, ForwardDataflowWorklist,
BackwardDataflowWorklist>;
Worklist W(Cfg, AC);
const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit();
InStates[Start] = D.getInitialState();
W.enqueueBlock(Start);
llvm::SmallBitVector Visited(Cfg.getNumBlockIDs() + 1);
while (const CFGBlock *B = W.dequeue()) {
Lattice StateIn = getInState(B);
Lattice StateOut = transferBlock(B, StateIn);
OutStates[B] = StateOut;
Visited.set(B->getBlockID());
for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) {
if (!AdjacentB)
continue;
Lattice OldInState = getInState(AdjacentB);
Lattice NewInState = D.join(OldInState, StateOut);
// Enqueue the adjacent block if its in-state has changed or if we have
// never visited it.
if (!Visited.test(AdjacentB->getBlockID()) ||
NewInState != OldInState) {
InStates[AdjacentB] = NewInState;
W.enqueueBlock(AdjacentB);
}
}
}
}
Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); }
Lattice getInState(const CFGBlock *B) const { return InStates.lookup(B); }
Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); }
void dump() const {
const Derived *D = static_cast<const Derived *>(this);
llvm::dbgs() << "==========================================\n";
llvm::dbgs() << D->getAnalysisName() << " results:\n";
llvm::dbgs() << "==========================================\n";
const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry();
getOutState(&B).dump(llvm::dbgs());
}
private:
/// Computes the state at one end of a block by applying all its facts
/// sequentially to a given state from the other end.
Lattice transferBlock(const CFGBlock *Block, Lattice State) {
auto Facts = AllFacts.getFacts(Block);
if constexpr (isForward()) {
for (const Fact *F : Facts) {
State = transferFact(State, F);
PerPointStates[F] = State;
}
} else {
for (const Fact *F : llvm::reverse(Facts)) {
// In backward analysis, capture the state before applying the fact.
PerPointStates[F] = State;
State = transferFact(State, F);
}
}
return State;
}
Lattice transferFact(Lattice In, const Fact *F) {
assert(F);
Derived *D = static_cast<Derived *>(this);
switch (F->getKind()) {
case Fact::Kind::Issue:
return D->transfer(In, *F->getAs<IssueFact>());
case Fact::Kind::Expire:
return D->transfer(In, *F->getAs<ExpireFact>());
case Fact::Kind::AssignOrigin:
return D->transfer(In, *F->getAs<AssignOriginFact>());
case Fact::Kind::ReturnOfOrigin:
return D->transfer(In, *F->getAs<ReturnOfOriginFact>());
case Fact::Kind::TestPoint:
return D->transfer(In, *F->getAs<TestPointFact>());
}
llvm_unreachable("Unknown fact kind");
}
public:
Lattice transfer(Lattice In, const IssueFact &) { return In; }
Lattice transfer(Lattice In, const ExpireFact &) { return In; }
Lattice transfer(Lattice In, const AssignOriginFact &) { return In; }
Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; }
Lattice transfer(Lattice In, const TestPointFact &) { return In; }
};
namespace utils {
/// Computes the union of two ImmutableSets.
template <typename T>
static llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A,
llvm::ImmutableSet<T> B,
typename llvm::ImmutableSet<T>::Factory &F) {
if (A.getHeight() < B.getHeight())
std::swap(A, B);
for (const T &E : B)
A = F.add(A, E);
return A;
}
/// Computes the key-wise union of two ImmutableMaps.
// TODO(opt): This key-wise join is a performance bottleneck. A more
// efficient merge could be implemented using a Patricia Trie or HAMT
// instead of the current AVL-tree-based ImmutableMap.
template <typename K, typename V, typename Joiner>
static llvm::ImmutableMap<K, V>
join(llvm::ImmutableMap<K, V> A, llvm::ImmutableMap<K, V> B,
typename llvm::ImmutableMap<K, V>::Factory &F, Joiner joinValues) {
if (A.getHeight() < B.getHeight())
std::swap(A, B);
// For each element in B, join it with the corresponding element in A
// (or with an empty value if it doesn't exist in A).
for (const auto &Entry : B) {
const K &Key = Entry.first;
const V &ValB = Entry.second;
if (const V *ValA = A.lookup(Key))
A = F.add(A, Key, joinValues(*ValA, ValB));
else
A = F.add(A, Key, ValB);
}
return A;
}
} // namespace utils
// ========================================================================= //
// Loan Propagation Analysis
// ========================================================================= //
using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>;
/// An object to hold the factories for immutable collections, ensuring
/// that all created states share the same underlying memory management.
struct LifetimeFactory {
OriginLoanMap::Factory OriginMapFactory;
LoanSet::Factory LoanSetFactory;
/// Creates a singleton set containing only the given loan ID.
LoanSet createLoanSet(LoanID LID) {
return LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID);
}
};
/// Represents the dataflow lattice for loan propagation.
///
/// This lattice tracks which loans each origin may hold at a given program
/// point.The lattice has a finite height: An origin's loan set is bounded by
/// the total number of loans in the function.
/// TODO(opt): To reduce the lattice size, propagate origins of declarations,
/// not expressions, because expressions are not visible across blocks.
struct LoanPropagationLattice {
/// The map from an origin to the set of loans it contains.
OriginLoanMap Origins = OriginLoanMap(nullptr);
explicit LoanPropagationLattice(const OriginLoanMap &S) : Origins(S) {}
LoanPropagationLattice() = default;
bool operator==(const LoanPropagationLattice &Other) const {
return Origins == Other.Origins;
}
bool operator!=(const LoanPropagationLattice &Other) const {
return !(*this == Other);
}
void dump(llvm::raw_ostream &OS) const {
OS << "LoanPropagationLattice State:\n";
if (Origins.isEmpty())
OS << " <empty>\n";
for (const auto &Entry : Origins) {
if (Entry.second.isEmpty())
OS << " Origin " << Entry.first << " contains no loans\n";
for (const LoanID &LID : Entry.second)
OS << " Origin " << Entry.first << " contains Loan " << LID << "\n";
}
}
};
/// The analysis that tracks which loans belong to which origins.
class LoanPropagationAnalysis
: public DataflowAnalysis<LoanPropagationAnalysis, LoanPropagationLattice,
Direction::Forward> {
LifetimeFactory &Factory;
public:
LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F,
LifetimeFactory &Factory)
: DataflowAnalysis(C, AC, F), Factory(Factory) {}
using Base::transfer;
StringRef getAnalysisName() const { return "LoanPropagation"; }
Lattice getInitialState() { return Lattice{}; }
/// Merges two lattices by taking the union of loans for each origin.
// TODO(opt): Keep the state small by removing origins which become dead.
Lattice join(Lattice A, Lattice B) {
OriginLoanMap JoinedOrigins =
utils::join(A.Origins, B.Origins, Factory.OriginMapFactory,
[this](LoanSet S1, LoanSet S2) {
return utils::join(S1, S2, Factory.LoanSetFactory);
});
return Lattice(JoinedOrigins);
}
/// A new loan is issued to the origin. Old loans are erased.
Lattice transfer(Lattice In, const IssueFact &F) {
OriginID OID = F.getOriginID();
LoanID LID = F.getLoanID();
return LoanPropagationLattice(Factory.OriginMapFactory.add(
In.Origins, OID, Factory.createLoanSet(LID)));
}
/// The destination origin's loan set is replaced by the source's.
/// This implicitly "resets" the old loans of the destination.
Lattice transfer(Lattice In, const AssignOriginFact &F) {
OriginID DestOID = F.getDestOriginID();
OriginID SrcOID = F.getSrcOriginID();
LoanSet SrcLoans = getLoans(In, SrcOID);
return LoanPropagationLattice(
Factory.OriginMapFactory.add(In.Origins, DestOID, SrcLoans));
}
LoanSet getLoans(OriginID OID, ProgramPoint P) {
return getLoans(getState(P), OID);
}
private:
LoanSet getLoans(Lattice L, OriginID OID) {
if (auto *Loans = L.Origins.lookup(OID))
return *Loans;
return Factory.LoanSetFactory.getEmptySet();
}
};
// ========================================================================= //
// Expired Loans Analysis
// ========================================================================= //
/// The dataflow lattice for tracking the set of expired loans.
struct ExpiredLattice {
LoanSet Expired;
ExpiredLattice() : Expired(nullptr) {};
explicit ExpiredLattice(LoanSet S) : Expired(S) {}
bool operator==(const ExpiredLattice &Other) const {
return Expired == Other.Expired;
}
bool operator!=(const ExpiredLattice &Other) const {
return !(*this == Other);
}
void dump(llvm::raw_ostream &OS) const {
OS << "ExpiredLattice State:\n";
if (Expired.isEmpty())
OS << " <empty>\n";
for (const LoanID &LID : Expired)
OS << " Loan " << LID << " is expired\n";
}
};
/// The analysis that tracks which loans have expired.
class ExpiredLoansAnalysis
: public DataflowAnalysis<ExpiredLoansAnalysis, ExpiredLattice,
Direction::Forward> {
LoanSet::Factory &Factory;
public:
ExpiredLoansAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F,
LifetimeFactory &Factory)
: DataflowAnalysis(C, AC, F), Factory(Factory.LoanSetFactory) {}
using Base::transfer;
StringRef getAnalysisName() const { return "ExpiredLoans"; }
Lattice getInitialState() { return Lattice(Factory.getEmptySet()); }
/// Merges two lattices by taking the union of the expired loan sets.
Lattice join(Lattice L1, Lattice L2) const {
return Lattice(utils::join(L1.Expired, L2.Expired, Factory));
}
Lattice transfer(Lattice In, const ExpireFact &F) {
return Lattice(Factory.add(In.Expired, F.getLoanID()));
}
// Removes the loan from the set of expired loans.
//
// When a loan is re-issued (e.g., in a loop), it is no longer considered
// expired. A loan can be in the expired set at the point of issue due to
// the dataflow state from a previous loop iteration being propagated along
// a backedge in the CFG.
//
// Note: This has a subtle false-negative though where a loan from previous
// iteration is not overwritten by a reissue. This needs careful tracking
// of loans "across iterations" which can be considered for future
// enhancements.
//
// void foo(int safe) {
// int* p = &safe;
// int* q = &safe;
// while (condition()) {
// int x = 1;
// p = &x; // A loan to 'x' is issued to 'p' in every iteration.
// if (condition()) {
// q = p;
// }
// (void)*p; // OK — 'p' points to 'x' from new iteration.
// (void)*q; // UaF - 'q' still points to 'x' from previous iteration
// // which is now destroyed.
// }
// }
Lattice transfer(Lattice In, const IssueFact &F) {
return Lattice(Factory.remove(In.Expired, F.getLoanID()));
}
};
// ========================================================================= //
// TODO:
// - Modify loan expiry analysis to answer `bool isExpired(Loan L, Point P)`
// - Modify origin liveness analysis to answer `bool isLive(Origin O, Point P)`
// - Using the above three to perform the final error reporting.
// ========================================================================= //
// ========================================================================= //
// LifetimeSafetyAnalysis Class Implementation
// ========================================================================= //
// We need this here for unique_ptr with forward declared class.
LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default;
LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC)
: AC(AC), Factory(std::make_unique<LifetimeFactory>()),
FactMgr(std::make_unique<FactManager>()) {}
void LifetimeSafetyAnalysis::run() {
llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis");
const CFG &Cfg = *AC.getCFG();
DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(),
/*ShowColors=*/true));
FactGenerator FactGen(*FactMgr, AC);
FactGen.run();
DEBUG_WITH_TYPE("LifetimeFacts", FactMgr->dump(Cfg, AC));
/// TODO(opt): Consider optimizing individual blocks before running the
/// dataflow analysis.
/// 1. Expression Origins: These are assigned once and read at most once,
/// forming simple chains. These chains can be compressed into a single
/// assignment.
/// 2. Block-Local Loans: Origins of expressions are never read by other
/// blocks; only Decls are visible. Therefore, loans in a block that
/// never reach an Origin associated with a Decl can be safely dropped by
/// the analysis.
LoanPropagation =
std::make_unique<LoanPropagationAnalysis>(Cfg, AC, *FactMgr, *Factory);
LoanPropagation->run();
ExpiredLoans =
std::make_unique<ExpiredLoansAnalysis>(Cfg, AC, *FactMgr, *Factory);
ExpiredLoans->run();
}
LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID,
ProgramPoint PP) const {
assert(LoanPropagation && "Analysis has not been run.");
return LoanPropagation->getLoans(OID, PP);
}
LoanSet LifetimeSafetyAnalysis::getExpiredLoansAtPoint(ProgramPoint PP) const {
assert(ExpiredLoans && "ExpiredLoansAnalysis has not been run.");
return ExpiredLoans->getState(PP).Expired;
}
std::optional<OriginID>
LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const {
assert(FactMgr && "FactManager not initialized");
// This assumes the OriginManager's `get` can find an existing origin.
// We might need a `find` method on OriginManager to avoid `getOrCreate` logic
// in a const-query context if that becomes an issue.
return FactMgr->getOriginMgr().get(*D);
}
std::vector<LoanID>
LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const {
assert(FactMgr && "FactManager not initialized");
std::vector<LoanID> Result;
for (const Loan &L : FactMgr->getLoanMgr().getLoans())
if (L.Path.D == VD)
Result.push_back(L.ID);
return Result;
}
llvm::StringMap<ProgramPoint> LifetimeSafetyAnalysis::getTestPoints() const {
assert(FactMgr && "FactManager not initialized");
llvm::StringMap<ProgramPoint> AnnotationToPointMap;
for (const CFGBlock *Block : *AC.getCFG()) {
for (const Fact *F : FactMgr->getFacts(Block)) {
if (const auto *TPF = F->getAs<TestPointFact>()) {
StringRef PointName = TPF->getAnnotation();
assert(AnnotationToPointMap.find(PointName) ==
AnnotationToPointMap.end() &&
"more than one test points with the same name");
AnnotationToPointMap[PointName] = F;
}
}
}
return AnnotationToPointMap;
}
} // namespace internal
void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC) {
internal::LifetimeSafetyAnalysis Analysis(AC);
Analysis.run();
}
} // namespace clang::lifetimes