| //===- DataFlowFramework.h - A generic framework for data-flow analysis ---===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines a generic framework for writing data-flow analysis in MLIR. |
| // The framework consists of a solver, which runs the fixed-point iteration and |
| // manages analysis dependencies, and a data-flow analysis class used to |
| // implement specific analyses. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H |
| #define MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H |
| |
| #include "mlir/IR/Operation.h" |
| #include "mlir/Support/StorageUniquer.h" |
| #include "llvm/ADT/SetVector.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/TypeName.h" |
| #include <queue> |
| |
| namespace mlir { |
| |
| //===----------------------------------------------------------------------===// |
| // ChangeResult |
| //===----------------------------------------------------------------------===// |
| |
| /// A result type used to indicate if a change happened. Boolean operations on |
| /// ChangeResult behave as though `Change` is truth. |
| enum class [[nodiscard]] ChangeResult { |
| NoChange, |
| Change, |
| }; |
| inline ChangeResult operator|(ChangeResult lhs, ChangeResult rhs) { |
| return lhs == ChangeResult::Change ? lhs : rhs; |
| } |
| inline ChangeResult &operator|=(ChangeResult &lhs, ChangeResult rhs) { |
| lhs = lhs | rhs; |
| return lhs; |
| } |
| inline ChangeResult operator&(ChangeResult lhs, ChangeResult rhs) { |
| return lhs == ChangeResult::NoChange ? lhs : rhs; |
| } |
| |
| /// Forward declare the analysis state class. |
| class AnalysisState; |
| |
| //===----------------------------------------------------------------------===// |
| // GenericProgramPoint |
| //===----------------------------------------------------------------------===// |
| |
| /// Abstract class for generic program points. In classical data-flow analysis, |
| /// programs points represent positions in a program to which lattice elements |
| /// are attached. In sparse data-flow analysis, these can be SSA values, and in |
| /// dense data-flow analysis, these are the program points before and after |
| /// every operation. |
| /// |
| /// In the general MLIR data-flow analysis framework, program points are an |
| /// extensible concept. Program points are uniquely identifiable objects to |
| /// which analysis states can be attached. The semantics of program points are |
| /// defined by the analyses that specify their transfer functions. |
| /// |
| /// Program points are implemented using MLIR's storage uniquer framework and |
| /// type ID system to provide RTTI. |
| class GenericProgramPoint : public StorageUniquer::BaseStorage { |
| public: |
| virtual ~GenericProgramPoint(); |
| |
| /// Get the abstract program point's type identifier. |
| TypeID getTypeID() const { return typeID; } |
| |
| /// Get a derived source location for the program point. |
| virtual Location getLoc() const = 0; |
| |
| /// Print the program point. |
| virtual void print(raw_ostream &os) const = 0; |
| |
| protected: |
| /// Create an abstract program point with type identifier. |
| explicit GenericProgramPoint(TypeID typeID) : typeID(typeID) {} |
| |
| private: |
| /// The type identifier of the program point. |
| TypeID typeID; |
| }; |
| |
| //===----------------------------------------------------------------------===// |
| // GenericProgramPointBase |
| //===----------------------------------------------------------------------===// |
| |
| /// Base class for generic program points based on a concrete program point |
| /// type and a content key. This class defines the common methods required for |
| /// operability with the storage uniquer framework. |
| /// |
| /// The provided key type uniquely identifies the concrete program point |
| /// instance and are the data members of the class. |
| template <typename ConcreteT, typename Value> |
| class GenericProgramPointBase : public GenericProgramPoint { |
| public: |
| /// The concrete key type used by the storage uniquer. This class is uniqued |
| /// by its contents. |
| using KeyTy = Value; |
| /// Alias for the base class. |
| using Base = GenericProgramPointBase<ConcreteT, Value>; |
| |
| /// Construct an instance of the program point using the provided value and |
| /// the type ID of the concrete type. |
| template <typename ValueT> |
| explicit GenericProgramPointBase(ValueT &&value) |
| : GenericProgramPoint(TypeID::get<ConcreteT>()), |
| value(std::forward<ValueT>(value)) {} |
| |
| /// Get a uniqued instance of this program point class with the given |
| /// arguments. |
| template <typename... Args> |
| static ConcreteT *get(StorageUniquer &uniquer, Args &&...args) { |
| return uniquer.get<ConcreteT>(/*initFn=*/{}, std::forward<Args>(args)...); |
| } |
| |
| /// Allocate space for a program point and construct it in-place. |
| template <typename ValueT> |
| static ConcreteT *construct(StorageUniquer::StorageAllocator &alloc, |
| ValueT &&value) { |
| return new (alloc.allocate<ConcreteT>()) |
| ConcreteT(std::forward<ValueT>(value)); |
| } |
| |
| /// Two program points are equal if their values are equal. |
| bool operator==(const Value &value) const { return this->value == value; } |
| |
| /// Provide LLVM-style RTTI using type IDs. |
| static bool classof(const GenericProgramPoint *point) { |
| return point->getTypeID() == TypeID::get<ConcreteT>(); |
| } |
| |
| /// Get the contents of the program point. |
| const Value &getValue() const { return value; } |
| |
| private: |
| /// The program point value. |
| Value value; |
| }; |
| |
| //===----------------------------------------------------------------------===// |
| // ProgramPoint |
| //===----------------------------------------------------------------------===// |
| |
| /// Fundamental IR components are supported as first-class program points. |
| struct ProgramPoint |
| : public PointerUnion<GenericProgramPoint *, Operation *, Value, Block *> { |
| using ParentTy = |
| PointerUnion<GenericProgramPoint *, Operation *, Value, Block *>; |
| /// Inherit constructors. |
| using ParentTy::PointerUnion; |
| /// Allow implicit conversion from the parent type. |
| ProgramPoint(ParentTy point = nullptr) : ParentTy(point) {} |
| /// Allow implicit conversions from operation wrappers. |
| /// TODO: For Windows only. Find a better solution. |
| template <typename OpT, typename = std::enable_if_t< |
| std::is_convertible<OpT, Operation *>::value && |
| !std::is_same<OpT, Operation *>::value>> |
| ProgramPoint(OpT op) : ParentTy(op) {} |
| |
| /// Print the program point. |
| void print(raw_ostream &os) const; |
| |
| /// Get the source location of the program point. |
| Location getLoc() const; |
| }; |
| |
| /// Forward declaration of the data-flow analysis class. |
| class DataFlowAnalysis; |
| |
| //===----------------------------------------------------------------------===// |
| // DataFlowConfig |
| //===----------------------------------------------------------------------===// |
| |
| /// Configuration class for data flow solver and child analyses. Follows the |
| /// fluent API pattern. |
| class DataFlowConfig { |
| public: |
| DataFlowConfig() = default; |
| |
| /// Set whether the solver should operate interpocedurally, i.e. enter the |
| /// callee body when available. Interprocedural analyses may be more precise, |
| /// but also more expensive as more states need to be computed and the |
| /// fixpoint convergence takes longer. |
| DataFlowConfig &setInterprocedural(bool enable) { |
| interprocedural = enable; |
| return *this; |
| } |
| |
| /// Return `true` if the solver operates interprocedurally, `false` otherwise. |
| bool isInterprocedural() const { return interprocedural; } |
| |
| private: |
| bool interprocedural = true; |
| }; |
| |
| //===----------------------------------------------------------------------===// |
| // DataFlowSolver |
| //===----------------------------------------------------------------------===// |
| |
| /// The general data-flow analysis solver. This class is responsible for |
| /// orchestrating child data-flow analyses, running the fixed-point iteration |
| /// algorithm, managing analysis state and program point memory, and tracking |
| /// dependencies between analyses, program points, and analysis states. |
| /// |
| /// Steps to run a data-flow analysis: |
| /// |
| /// 1. Load and initialize children analyses. Children analyses are instantiated |
| /// in the solver and initialized, building their dependency relations. |
| /// 2. Configure and run the analysis. The solver invokes the children analyses |
| /// according to their dependency relations until a fixed point is reached. |
| /// 3. Query analysis state results from the solver. |
| /// |
| /// TODO: Optimize the internal implementation of the solver. |
| class DataFlowSolver { |
| public: |
| explicit DataFlowSolver(const DataFlowConfig &config = DataFlowConfig()) |
| : config(config) {} |
| |
| /// Load an analysis into the solver. Return the analysis instance. |
| template <typename AnalysisT, typename... Args> |
| AnalysisT *load(Args &&...args); |
| |
| /// Initialize the children analyses starting from the provided top-level |
| /// operation and run the analysis until fixpoint. |
| LogicalResult initializeAndRun(Operation *top); |
| |
| /// Lookup an analysis state for the given program point. Returns null if one |
| /// does not exist. |
| template <typename StateT, typename PointT> |
| const StateT *lookupState(PointT point) const { |
| auto it = analysisStates.find({ProgramPoint(point), TypeID::get<StateT>()}); |
| if (it == analysisStates.end()) |
| return nullptr; |
| return static_cast<const StateT *>(it->second.get()); |
| } |
| |
| /// Get a uniqued program point instance. If one is not present, it is |
| /// created with the provided arguments. |
| template <typename PointT, typename... Args> |
| PointT *getProgramPoint(Args &&...args) { |
| return PointT::get(uniquer, std::forward<Args>(args)...); |
| } |
| |
| /// A work item on the solver queue is a program point, child analysis pair. |
| /// Each item is processed by invoking the child analysis at the program |
| /// point. |
| using WorkItem = std::pair<ProgramPoint, DataFlowAnalysis *>; |
| /// Push a work item onto the worklist. |
| void enqueue(WorkItem item) { worklist.push(std::move(item)); } |
| |
| /// Get the state associated with the given program point. If it does not |
| /// exist, create an uninitialized state. |
| template <typename StateT, typename PointT> |
| StateT *getOrCreateState(PointT point); |
| |
| /// Propagate an update to an analysis state if it changed by pushing |
| /// dependent work items to the back of the queue. |
| void propagateIfChanged(AnalysisState *state, ChangeResult changed); |
| |
| /// Get the configuration of the solver. |
| const DataFlowConfig &getConfig() const { return config; } |
| |
| private: |
| /// Configuration of the dataflow solver. |
| DataFlowConfig config; |
| |
| /// The solver's work queue. Work items can be inserted to the front of the |
| /// queue to be processed greedily, speeding up computations that otherwise |
| /// quickly degenerate to quadratic due to propagation of state updates. |
| std::queue<WorkItem> worklist; |
| |
| /// Type-erased instances of the children analyses. |
| SmallVector<std::unique_ptr<DataFlowAnalysis>> childAnalyses; |
| |
| /// The storage uniquer instance that owns the memory of the allocated program |
| /// points. |
| StorageUniquer uniquer; |
| |
| /// A type-erased map of program points to associated analysis states for |
| /// first-class program points. |
| DenseMap<std::pair<ProgramPoint, TypeID>, std::unique_ptr<AnalysisState>> |
| analysisStates; |
| |
| /// Allow the base child analysis class to access the internals of the solver. |
| friend class DataFlowAnalysis; |
| }; |
| |
| //===----------------------------------------------------------------------===// |
| // AnalysisState |
| //===----------------------------------------------------------------------===// |
| |
| /// Base class for generic analysis states. Analysis states contain data-flow |
| /// information that are attached to program points and which evolve as the |
| /// analysis iterates. |
| /// |
| /// This class places no restrictions on the semantics of analysis states beyond |
| /// these requirements. |
| /// |
| /// 1. Querying the state of a program point prior to visiting that point |
| /// results in uninitialized state. Analyses must be aware of unintialized |
| /// states. |
| /// 2. Analysis states can reach fixpoints, where subsequent updates will never |
| /// trigger a change in the state. |
| /// 3. Analysis states that are uninitialized can be forcefully initialized to a |
| /// default value. |
| class AnalysisState { |
| public: |
| virtual ~AnalysisState(); |
| |
| /// Create the analysis state at the given program point. |
| AnalysisState(ProgramPoint point) : point(point) {} |
| |
| /// Returns the program point this state is located at. |
| ProgramPoint getPoint() const { return point; } |
| |
| /// Print the contents of the analysis state. |
| virtual void print(raw_ostream &os) const = 0; |
| LLVM_DUMP_METHOD void dump() const; |
| |
| /// Add a dependency to this analysis state on a program point and an |
| /// analysis. If this state is updated, the analysis will be invoked on the |
| /// given program point again (in onUpdate()). |
| void addDependency(ProgramPoint dependent, DataFlowAnalysis *analysis); |
| |
| protected: |
| /// This function is called by the solver when the analysis state is updated |
| /// to enqueue more work items. For example, if a state tracks dependents |
| /// through the IR (e.g. use-def chains), this function can be implemented to |
| /// push those dependents on the worklist. |
| virtual void onUpdate(DataFlowSolver *solver) const { |
| for (const DataFlowSolver::WorkItem &item : dependents) |
| solver->enqueue(item); |
| } |
| |
| /// The program point to which the state belongs. |
| ProgramPoint point; |
| |
| #if LLVM_ENABLE_ABI_BREAKING_CHECKS |
| /// When compiling with debugging, keep a name for the analysis state. |
| StringRef debugName; |
| #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS |
| |
| private: |
| /// The dependency relations originating from this analysis state. An entry |
| /// `state -> (analysis, point)` is created when `analysis` queries `state` |
| /// when updating `point`. |
| /// |
| /// When this state is updated, all dependent child analysis invocations are |
| /// pushed to the back of the queue. Use a `SetVector` to keep the analysis |
| /// deterministic. |
| /// |
| /// Store the dependents on the analysis state for efficiency. |
| SetVector<DataFlowSolver::WorkItem> dependents; |
| |
| /// Allow the framework to access the dependents. |
| friend class DataFlowSolver; |
| }; |
| |
| //===----------------------------------------------------------------------===// |
| // DataFlowAnalysis |
| //===----------------------------------------------------------------------===// |
| |
| /// Base class for all data-flow analyses. A child analysis is expected to build |
| /// an initial dependency graph (and optionally provide an initial state) when |
| /// initialized and define transfer functions when visiting program points. |
| /// |
| /// In classical data-flow analysis, the dependency graph is fixed and analyses |
| /// define explicit transfer functions between input states and output states. |
| /// In this framework, however, the dependency graph can change during the |
| /// analysis, and transfer functions are opaque such that the solver doesn't |
| /// know what states calling `visit` on an analysis will be updated. This allows |
| /// multiple analyses to plug in and provide values for the same state. |
| /// |
| /// Generally, when an analysis queries an uninitialized state, it is expected |
| /// to "bail out", i.e., not provide any updates. When the value is initialized, |
| /// the solver will re-invoke the analysis. If the solver exhausts its worklist, |
| /// however, and there are still uninitialized states, the solver "nudges" the |
| /// analyses by default-initializing those states. |
| class DataFlowAnalysis { |
| public: |
| virtual ~DataFlowAnalysis(); |
| |
| /// Create an analysis with a reference to the parent solver. |
| explicit DataFlowAnalysis(DataFlowSolver &solver); |
| |
| /// Initialize the analysis from the provided top-level operation by building |
| /// an initial dependency graph between all program points of interest. This |
| /// can be implemented by calling `visit` on all program points of interest |
| /// below the top-level operation. |
| /// |
| /// An analysis can optionally provide initial values to certain analysis |
| /// states to influence the evolution of the analysis. |
| virtual LogicalResult initialize(Operation *top) = 0; |
| |
| /// Visit the given program point. This function is invoked by the solver on |
| /// this analysis with a given program point when a dependent analysis state |
| /// is updated. The function is similar to a transfer function; it queries |
| /// certain analysis states and sets other states. |
| /// |
| /// The function is expected to create dependencies on queried states and |
| /// propagate updates on changed states. A dependency can be created by |
| /// calling `addDependency` between the input state and a program point, |
| /// indicating that, if the state is updated, the solver should invoke `solve` |
| /// on the program point. The dependent point does not have to be the same as |
| /// the provided point. An update to a state is propagated by calling |
| /// `propagateIfChange` on the state. If the state has changed, then all its |
| /// dependents are placed on the worklist. |
| /// |
| /// The dependency graph does not need to be static. Each invocation of |
| /// `visit` can add new dependencies, but these dependencies will not be |
| /// dynamically added to the worklist because the solver doesn't know what |
| /// will provide a value for then. |
| virtual LogicalResult visit(ProgramPoint point) = 0; |
| |
| protected: |
| /// Create a dependency between the given analysis state and program point |
| /// on this analysis. |
| void addDependency(AnalysisState *state, ProgramPoint point); |
| |
| /// Propagate an update to a state if it changed. |
| void propagateIfChanged(AnalysisState *state, ChangeResult changed); |
| |
| /// Register a custom program point class. |
| template <typename PointT> |
| void registerPointKind() { |
| solver.uniquer.registerParametricStorageType<PointT>(); |
| } |
| |
| /// Get or create a custom program point. |
| template <typename PointT, typename... Args> |
| PointT *getProgramPoint(Args &&...args) { |
| return solver.getProgramPoint<PointT>(std::forward<Args>(args)...); |
| } |
| |
| /// Get the analysis state associated with the program point. The returned |
| /// state is expected to be "write-only", and any updates need to be |
| /// propagated by `propagateIfChanged`. |
| template <typename StateT, typename PointT> |
| StateT *getOrCreate(PointT point) { |
| return solver.getOrCreateState<StateT>(point); |
| } |
| |
| /// Get a read-only analysis state for the given point and create a dependency |
| /// on `dependent`. If the return state is updated elsewhere, this analysis is |
| /// re-invoked on the dependent. |
| template <typename StateT, typename PointT> |
| const StateT *getOrCreateFor(ProgramPoint dependent, PointT point) { |
| StateT *state = getOrCreate<StateT>(point); |
| addDependency(state, dependent); |
| return state; |
| } |
| |
| /// Return the configuration of the solver used for this analysis. |
| const DataFlowConfig &getSolverConfig() const { return solver.getConfig(); } |
| |
| #if LLVM_ENABLE_ABI_BREAKING_CHECKS |
| /// When compiling with debugging, keep a name for the analyis. |
| StringRef debugName; |
| #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS |
| |
| private: |
| /// The parent data-flow solver. |
| DataFlowSolver &solver; |
| |
| /// Allow the data-flow solver to access the internals of this class. |
| friend class DataFlowSolver; |
| }; |
| |
| template <typename AnalysisT, typename... Args> |
| AnalysisT *DataFlowSolver::load(Args &&...args) { |
| childAnalyses.emplace_back(new AnalysisT(*this, std::forward<Args>(args)...)); |
| #if LLVM_ENABLE_ABI_BREAKING_CHECKS |
| childAnalyses.back().get()->debugName = llvm::getTypeName<AnalysisT>(); |
| #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS |
| return static_cast<AnalysisT *>(childAnalyses.back().get()); |
| } |
| |
| template <typename StateT, typename PointT> |
| StateT *DataFlowSolver::getOrCreateState(PointT point) { |
| std::unique_ptr<AnalysisState> &state = |
| analysisStates[{ProgramPoint(point), TypeID::get<StateT>()}]; |
| if (!state) { |
| state = std::unique_ptr<StateT>(new StateT(point)); |
| #if LLVM_ENABLE_ABI_BREAKING_CHECKS |
| state->debugName = llvm::getTypeName<StateT>(); |
| #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS |
| } |
| return static_cast<StateT *>(state.get()); |
| } |
| |
| inline raw_ostream &operator<<(raw_ostream &os, const AnalysisState &state) { |
| state.print(os); |
| return os; |
| } |
| |
| inline raw_ostream &operator<<(raw_ostream &os, ProgramPoint point) { |
| point.print(os); |
| return os; |
| } |
| |
| } // end namespace mlir |
| |
| namespace llvm { |
| /// Allow hashing of program points. |
| template <> |
| struct DenseMapInfo<mlir::ProgramPoint> |
| : public DenseMapInfo<mlir::ProgramPoint::ParentTy> {}; |
| |
| // Allow llvm::cast style functions. |
| template <typename To> |
| struct CastInfo<To, mlir::ProgramPoint> |
| : public CastInfo<To, mlir::ProgramPoint::PointerUnion> {}; |
| |
| template <typename To> |
| struct CastInfo<To, const mlir::ProgramPoint> |
| : public CastInfo<To, const mlir::ProgramPoint::PointerUnion> {}; |
| |
| } // end namespace llvm |
| |
| #endif // MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H |