| //===- LoopEmitter.h --------------------------------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_SPARSETENSORLOOPEMITTER_H_ |
| #define MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_SPARSETENSORLOOPEMITTER_H_ |
| |
| #include <vector> |
| |
| #include "mlir/Dialect/SparseTensor/IR/Enums.h" |
| #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" |
| #include "mlir/Dialect/SparseTensor/Utils/Merger.h" |
| #include "mlir/IR/PatternMatch.h" |
| |
| namespace mlir { |
| namespace sparse_tensor { |
| |
| // A compressed <tensor id, level> pair. |
| using TensorLevel = unsigned; |
| |
| //===----------------------------------------------------------------------===// |
| // SparseTensorLoopEmiter class, manages sparse tensors and helps to |
| // generate loop structure to (co)-iterate sparse tensors. |
| // |
| // An example usage: |
| // To generate the following loops over T1<?x?> and T2<?x?> |
| // |
| // for i in TENSOR_1_0 { |
| // for j : TENSOR_2_0 { |
| // for k : TENSOR_1_1 {} |
| // for k : TENSOR_2_1 {} |
| // } |
| // } |
| // |
| // One can use |
| // |
| // LoopEmiter loopEmiter({T1, T1}); |
| // loopEmiter.initializeLoopEmit(); |
| // loopEmiter.enterLoopOverTensorAtLvl(T1, 0); |
| // loopEmiter.enterLoopOverTensorAtLvl(T2, 0); |
| // loopEmiter.enterLoopOverTensorAtLvl(T1, 1); |
| // loopEmiter.exitCurrentLoop(); |
| // loopEmiter.enterLoopOverTensorAtLvl(T2, 1); |
| // loopEmiter.exitCurrentLoop(); // exit k |
| // loopEmiter.exitCurrentLoop(); // exit j |
| // loopEmiter.exitCurrentLoop(); // exit i |
| //===----------------------------------------------------------------------===// |
| |
| class LoopEmitter { |
| public: |
| /// Optional callback function to setup dense output tensors when |
| /// initializing the loop emitter (e.g., to fill a dense output with zeros). |
| using OutputUpdater = function_ref<Value(OpBuilder &builder, Location loc, |
| Value memref, Value tensor)>; |
| |
| /// Optional callback function to set the bound for the synthetic tensor, |
| /// which essentially is the dense loop bound. |
| using SynTensorBoundSetter = |
| function_ref<Value(OpBuilder &builder, Location loc, Level lvl)>; |
| |
| // Map from [tid, lvl] to a list of dependent [tidlvl, coeffecient] for |
| // subscript expressions on sparse tensors. |
| // |
| // E.g., for affine index (2 * d0 + d1), it depends on two tidlvls that |
| // defines d0 and d1 (for affine expression reduction) and uses 2 and 1 for |
| // cofficients on d0, d1 respectively. |
| // If the list is empty, it means that there is no affine expression on the |
| // input [tid, lvl]. |
| // |
| // NOTE: The caller is responsible to ensure that the order of the returned |
| // list to be consistent with the topological order of the iteration graph, |
| // otherwise the loop emitter might reduce a wrong dependent index variable |
| // when generating slice-driven loops. |
| using DependentLvlGetter = |
| function_ref<std::vector<std::pair<TensorLevel, unsigned>>(TensorId, |
| Level)>; |
| |
| LoopEmitter() = default; |
| |
| /// Takes an array of input tensors, which the generated loops will |
| /// iterate over. Each tensor is given a `TensorId` (numerically equal |
| /// to the position of that tensor `Value` in the array). Setting |
| /// `isSparseOut` indicates that the sparse output tensor is empty, |
| /// so the loop emitter will generate loops over it according to the |
| /// level-sizes. |
| void initialize(ValueRange tensors, StringAttr loopTag = nullptr, |
| bool hasOutput = false, bool isSparseOut = false, |
| unsigned numLoops = 0, DependentLvlGetter getter = nullptr); |
| |
| explicit LoopEmitter(ValueRange tensors, StringAttr loopTag = nullptr, |
| bool hasOutput = false, bool isSparseOut = false, |
| unsigned numLoops = 0, |
| DependentLvlGetter getter = nullptr); |
| |
| /// Starts a loop emitting session by generating all the buffers needed |
| /// for iterating over the tensors. |
| void initializeLoopEmit(OpBuilder &builder, Location loc, |
| OutputUpdater updater = nullptr, |
| SynTensorBoundSetter synSetter = nullptr); |
| |
| /// Generates code to compute an affine expression whose variables are |
| /// `LoopId`s (i.e., `a.cast<AffineDimExpr>().getPosition()` is a valid |
| /// `LoopId`). |
| Value genAffine(OpBuilder &builder, Location loc, AffineExpr a); |
| |
| /// Enters a new loop sequence, the loops within the same sequence starts |
| /// from the break points of previous loop instead of starting over from 0. |
| /// e.g., |
| /// { |
| /// // loop sequence start. |
| /// p0 = while(xxx) |
| /// ... |
| /// break p0 |
| /// |
| /// // Starts loop from p0 |
| /// for (i = p0; i < end; i++) |
| /// ... |
| /// // loop sequence end. |
| /// } |
| void enterNewLoopSeq(OpBuilder &builder, Location loc, |
| ArrayRef<TensorLevel> tidLvls); |
| |
| /// Exits the current loop sequence, this will reset universal index to 0. |
| void exitCurrentLoopSeq(OpBuilder &builder, Location loc); |
| |
| /// Enters a loop that tries to locate a coordinates in a sparse level based |
| /// on the value evaluated by the provided affine expression. |
| /// DEPRECATED: affine index expression should be handled by index reduction |
| /// loop, filter loop-based solution is slow. |
| Operation *enterFilterLoopOverTensorAtLvl(OpBuilder &builder, Location loc, |
| TensorId tid, Level lvl, |
| AffineExpr affine, |
| MutableArrayRef<Value> reduc = {}); |
| |
| /// Emits the address for a dense level based on the value evaluated by the |
| /// provided affine expression. |
| /// DEPRECATED: affine index expression should be handled by index reduction |
| /// loop, filter loop-based solution is slow. |
| void genDenseAffineAddress(OpBuilder &builder, Location loc, |
| TensorLevel tidLvl, AffineExpr lvlExpr); |
| |
| // TODO: Get rid of `lvls` in the argument list? Track the level we |
| // are currently at internally. Then it would be enterNextLvlForTensor. |
| // Still need a way to specify the lvl for non-annotated tensors though, |
| // as those can be accessed out of order. |
| // |
| /// Emits a co-iteration loop over a set of tensors. |
| /// Emits loop over tensor_tid_lvl, it assumes that loops between |
| /// tensor_tid_[0, lvl - 1] have already been generated. |
| /// The function will also perform in-place update on the `reduc` vector to |
| /// return the reduction variable used inside the generated loop. |
| Operation *enterCoIterationOverTensorsAtLvls( |
| OpBuilder &builder, Location loc, ArrayRef<TensorLevel> tidLvls, |
| MutableArrayRef<Value> reduc = {}, bool isParallel = false, |
| bool genDedup = false, bool needsUniv = false); |
| |
| /// Generates code to exit the current loop (e.g., generates yields, forwards |
| /// loop induction variables, etc). |
| void exitCurrentLoop(RewriterBase &rewriter, Location loc, |
| MutableArrayRef<Value> reduc = {}); |
| |
| /// Get the range of values for all induction variables. |
| auto getLoopIVsRange() const { |
| return llvm::map_range(loopStack, [](const LoopInfo &li) { return li.iv; }); |
| } |
| |
| /// Fills the out-parameter with the loop induction variables for all |
| /// loops in the current loop-stack. |
| SmallVector<Value> getLoopIVs() const { |
| return llvm::to_vector(getLoopIVsRange()); |
| } |
| |
| /// Gets the current depth of the loop-stack. |
| LoopId getCurrentDepth() const { return llvm::range_size(getLoopIVsRange()); } |
| |
| /// Gets loop induction variable for the given loop |
| Value getLoopIV(LoopId n) const { |
| if (n >= getCurrentDepth()) |
| return Value(); |
| auto it = getLoopIVsRange().begin(); |
| std::advance(it, n); |
| return *it; |
| } |
| |
| /// Gets the total number of manifest tensors (excluding the synthetic |
| /// tensor). |
| unsigned getNumManifestTensors() const { return tensors.size(); } |
| |
| /// Gets the total number of tensors that loopEmitter is operating on. |
| unsigned getNumTensors() const { |
| // Manifest tensors with one synthetic tensor at the end. |
| return getNumManifestTensors() + 1; |
| } |
| |
| /// Gets the TensorId for synthetic tensor. |
| TensorId getSynTensorId() const { return tensors.size(); } |
| |
| /// Gets the TensorId for output tensor. |
| TensorId getOutTensorId() const { |
| assert(hasOutput); |
| return getNumManifestTensors() - 1; |
| } |
| |
| /// Compresses a TensorId and Level into a TensorLevel. |
| TensorLevel makeTensorLevel(TensorId t, Level l) const { |
| return l * getNumTensors() + t; |
| } |
| |
| /// De-compresses a TensorLevel back to a pair of TensorId and Level. |
| std::pair<TensorId, Level> unpackTensorLevel(TensorLevel tidLvl) const { |
| unsigned nt = getNumTensors(); |
| return std::make_pair(tidLvl % nt, tidLvl / nt); |
| } |
| |
| /// Converts a range of TensorLevel to a range of std::pair<TensorId, Level> |
| template <class ContainerTy> |
| auto unpackTensorLevelRange(ContainerTy &&c) const { |
| using EltTy = decltype(*c.begin()); |
| static_assert(std::is_same_v<llvm::remove_cvref_t<EltTy>, TensorLevel>, |
| "Must be unpacking a TensorLevel range"); |
| return llvm::map_range(std::forward<ContainerTy>(c), [this](EltTy tl) { |
| return this->unpackTensorLevel(tl); |
| }); |
| } |
| |
| template <class ContainerTy> |
| auto unpackTensorLevelFromCondRange(ContainerTy &&c) const { |
| using EltTy = decltype(*c.begin()); |
| static_assert(std::is_same_v<llvm::remove_cvref_t<EltTy>, TensorLvlCond>, |
| "Must be unpacking a TensorLvlCond range"); |
| return unpackTensorLevelRange( |
| llvm::make_first_range(std::forward<ContainerTy>(c))); |
| } |
| |
| /// |
| /// Getters. |
| /// |
| const std::vector<std::vector<Value>> &getPosits() const { return posits; }; |
| const std::vector<std::vector<Value>> &getCoords() const { return coords; }; |
| const std::vector<std::vector<Value>> &getHighs() const { return highs; }; |
| const std::vector<std::vector<Value>> &getPositionBuffers() const { |
| return positionsBuffers; |
| }; |
| const std::vector<std::vector<Value>> &getCoordinateBuffers() const { |
| return coordinatesBuffers; |
| }; |
| const std::vector<Value> &getValBuffer() const { return valBuffer; }; |
| |
| constexpr static llvm::StringLiteral getLoopEmitterLoopAttrName() { |
| return llvm::StringLiteral("Emitted from"); |
| } |
| |
| private: |
| /// |
| /// Structure definitions that hold different kinds of loops information. |
| /// |
| |
| // A tuple that stored the slice-driven loop information. |
| struct SliceLoopInfo final { |
| SliceLoopInfo(TensorId tid, Level lvl, bool reduced) |
| : tid(tid), lvl(lvl), reduced(reduced) {} |
| TensorId tid; |
| Level lvl; |
| bool reduced; |
| }; |
| // LoopInfo stores information of a loop generated by LoopEmitter. E.g., |
| // the set of tensors levels that the loop is iterating over. |
| struct LoopInfo final { |
| LoopInfo(ArrayRef<TensorLevel> trivialTidLvls, |
| ArrayRef<SliceLoopInfo> sliceDrivenInfo, Operation *loop, |
| Block *userBlock, Value iv, StringAttr loopTag) |
| : trivialTidLvls(trivialTidLvls), sliceDrivenInfo(sliceDrivenInfo), |
| loop(loop), userCodeBlock(userBlock), iv(iv) { |
| // Attached a special tag to loop emitter generated loop. |
| if (loopTag) |
| loop->setAttr(LoopEmitter::getLoopEmitterLoopAttrName(), loopTag); |
| } |
| // The set of <tensor, lvl>, with *only* trivial index expressions, that are |
| // used as the condition for the generated loop. Extra information is |
| // required for levels with non-tivial index expressions, which is |
| // maintained by the sliceDrivenInfo array below. |
| const llvm::SmallVector<TensorLevel> trivialTidLvls; |
| // The set of <tensor, lvl>, with *only* non-trivial index expressions, that |
| // are used as the condition for the generated loop. |
| const llvm::SmallVector<SliceLoopInfo> sliceDrivenInfo; |
| const Operation *loop; // the loop operation |
| Block *const userCodeBlock; // the block holding users' generated code. |
| const Value iv; // the induction variable for the loop |
| }; |
| |
| // SliceInfo stores information of an extracted slice for slice-driven loop. |
| // E.g., the in-scope SSA values for the minimum coordinates and offset for |
| // the slice, etc. |
| struct SliceInfo final { |
| // Note that we do not need to create a actual sparse tensor slice but |
| // instead only need to maintain the metadata of the slice. |
| SliceInfo(Value minCrd, Value offset, Value isNonEmpty, Value posTupleNum, |
| std::optional<Level> slicedOnLvl, unsigned depth) |
| : minCrd(minCrd), offset(offset), isNonEmpty(isNonEmpty), |
| posTupleNum(posTupleNum), slicedOnLvl(slicedOnLvl), depth(depth) { |
| // TODO: use std::optional<pair<Level, minCrd>> |
| assert(!slicedOnLvl || minCrd); |
| } |
| |
| // Whether this is the tensor that has not yet been sliced. |
| bool isInitialTensor() const { return !slicedOnLvl.has_value(); } |
| |
| Value minCrd; // the minimum coordinate of the slice. |
| Value offset; // the *absolute* offset of the current slice. |
| Value isNonEmpty; // whether the slice is empty. |
| Value posTupleNum; // The number of position tuples used in the slice. |
| std::optional<Level> slicedOnLvl; // the level on which the slice is done |
| unsigned depth; // the depth (relative to dependentDimMap[tid][lvl]). |
| }; |
| |
| /// |
| /// Enums for different kinds of loop conditions. |
| /// |
| |
| // The bit indicating whether the loop conditions is sparse. |
| static constexpr uint8_t kSparseCond = 1 << 3; |
| // The bit indicating whether the loop iterates over sparse tensor slices |
| // (i.e., with non-empty SliceDimAttr). |
| static constexpr uint8_t kSliceCond = 1 << 2; |
| // The bit indicating whether the loop iterates over tensor levels with |
| // non-trivial affine index reduction. |
| static constexpr uint8_t kAffineIdxCond = 1 << 1; |
| // The bit indicating whether the loop iterates over tensor levels with |
| // non-trivial affine index reduction, and it is not fully reduced. |
| static constexpr uint8_t kAffineIdxCondUnRed = 1 << 0; |
| |
| enum class LoopCondKind : uint8_t { |
| // Dense conditions. |
| DenseCond = 0, |
| DenseSliceCond = kSliceCond, |
| DenseAffineCond = kAffineIdxCond, |
| DenseAffineUnRedCond = kAffineIdxCond | kAffineIdxCondUnRed, |
| // Sparse Conditions. |
| SparseCond = kSparseCond, |
| SparseSliceCond = kSparseCond | kSliceCond, |
| SparseAffineCond = kSparseCond | kAffineIdxCond, |
| SparseAffineUnRedCond = kSparseCond | kAffineIdxCond | kAffineIdxCondUnRed, |
| }; |
| using TensorLvlCond = std::pair<TensorLevel, LoopCondKind>; |
| |
| /// Sparse or dense loop condition. |
| static bool isSparseCond(LoopCondKind k) { |
| return static_cast<uint8_t>(k) & kSparseCond; |
| } |
| static bool isDenseCond(LoopCondKind k) { return !isSparseCond(k); } |
| |
| /// Whether loops over sparse tensor slices or sparse tensors. |
| static bool isSliceCond(LoopCondKind k) { |
| return static_cast<uint8_t>(k) & kSliceCond; |
| } |
| |
| /// Affine or trivial index expression loop condition. |
| static bool isAffineIdxCond(LoopCondKind k) { |
| return static_cast<uint8_t>(k) & kAffineIdxCond; |
| } |
| static bool isTrivalIdxCond(LoopCondKind k) { return !isAffineIdxCond(k); } |
| |
| /// Whether the affine index expression is fully reduced. |
| static bool isAffineIdxUnRedCond(LoopCondKind k) { |
| return isAffineIdxCond(k) && static_cast<uint8_t>(k) & kAffineIdxCondUnRed; |
| } |
| static bool isAffineIdxRedCond(LoopCondKind k) { |
| return isAffineIdxCond(k) && !isAffineIdxUnRedCond(k); |
| } |
| |
| // Whether the loop condition kind requires extra check inside the loop body. |
| // E.g., to iterate over sparse tensor slice, we need to check whether the |
| // current cooridnate is on the slice (e.g., due to stride) or not. |
| static bool isCondWithExtraCheck(LoopCondKind k) { |
| return isSparseCond(k) && (isSliceCond(k) || isAffineIdxUnRedCond(k)); |
| } |
| |
| static LoopCondKind makeLoopCondKind(bool isSparse, bool isSlice, |
| bool isAffine, bool isUnRedu) { |
| assert(!isUnRedu || isAffine); |
| uint8_t bits = 0; |
| bits = isSparse ? bits | kSparseCond : bits; |
| bits = isSlice ? bits | kSliceCond : bits; |
| bits = isAffine ? bits | kAffineIdxCond : bits; |
| bits = isUnRedu ? bits | kAffineIdxCondUnRed : bits; |
| LoopCondKind kind = static_cast<LoopCondKind>(bits); |
| |
| // Sanity checks. |
| assert(isSparse == isSparseCond(kind)); |
| assert(isSlice == isSliceCond(kind)); |
| assert(isAffine == isAffineIdxCond(kind)); |
| assert(isUnRedu == isAffineIdxUnRedCond(kind)); |
| return kind; |
| } |
| |
| void categorizeLoopCondition(ArrayRef<TensorLevel> tidLvls, |
| SmallVectorImpl<TensorLvlCond> &dnConds, |
| SmallVectorImpl<TensorLvlCond> &spConds); |
| |
| /// |
| /// LoopEmitter internal helper functions. |
| /// |
| |
| using LoopBodyBuilder = llvm::function_ref<void(OpBuilder &, Location, Value, |
| MutableArrayRef<Value>)>; |
| |
| /// Whether the list of the sparse condition should be iterated by for loop. |
| bool shouldIteratedByForLoop(ArrayRef<TensorLvlCond> spConds, bool genDedup); |
| |
| /// Linearizes address for dense dimension (i.e., p = (i * d0) + j). |
| Value genAddress(OpBuilder &builder, Location loc, TensorId tid, Level lvl, |
| Value iv); |
| |
| /// Generates the segment high for a non-unique level (to fast forward |
| /// duplicated coordinates). That is, it generates the code: |
| /// |
| /// crd = coordinates_tid_lvl[pos] |
| /// while (pos < pHi && coordinates_tid_lvl[pos] == crd) |
| /// pos++; |
| /// <return pos>; |
| Value genSegmentHigh(OpBuilder &builder, Location loc, TensorId tid, |
| Level lvl, Value pos, Value pHi); |
| |
| /// Generates instructions to compute the coordinate of tensors[tid][lvl] |
| /// under the current loop context. The final argument is the |
| /// collapsed-output level, whereas this function handles converting |
| /// that to the uncollapsed-input level |
| Value genSparseCrd(OpBuilder &builder, Location loc, TensorId tid, |
| Level dstLvl); |
| |
| /// Generates a predicate to determine whether the tranformed coordinates are |
| /// in the given slice. |
| /// Returns std::pair<Transformed coordinates, Predicate> |
| std::pair<Value, Value> genSliceLegitPredicate(OpBuilder &builder, |
| Location loc, Value crd, |
| TensorId tid, Level lvl); |
| |
| bool isSynTensor(TensorId tid) const { return tid == getSynTensorId(); } |
| |
| bool isOutputTensor(TensorId tid) const { |
| return hasOutput && tid == getOutTensorId(); |
| } |
| |
| bool isSparseOutput(TensorId tid) const { |
| return isOutputTensor(tid) && isSparseOut; |
| } |
| |
| bool isValidLevel(TensorId tid, Level lvl) const { |
| return tid < lvlTypes.size() && lvl < lvlTypes[tid].size(); |
| } |
| |
| /// Prepares loop for iterating over `tensor[lvl]`, under the assumption |
| /// that `tensor[0...lvl-1]` loops have already been set up. |
| void prepareLoopOverTensorAtLvl(OpBuilder &builder, Location loc, |
| TensorId tid, Level lvl); |
| |
| /// Enter dense tensor levels. Since the dense tensor condition could be |
| /// optimized from the loop condition, we need to compute the |
| /// positions/coordinates inside the loop body. |
| void enterTensorsAtDenseLvls(OpBuilder &builder, Location loc, |
| ArrayRef<TensorLvlCond> dnConds, Value iv, |
| SmallVectorImpl<SliceLoopInfo> &sliceInfo); |
| |
| /// Emits a for loop to iterate over a tensor level with the provided |
| /// lower bound `lo` and upper bound `hi`. Apart from iterating just |
| /// single tensor level, for loops can be used for slice-driven loop on |
| /// dense level too. |
| /// Returns a pair: the loop generated and the value for the induction |
| /// variable. |
| std::pair<Operation *, Value> |
| emitForLoopOverTensorAtLvl(OpBuilder &builder, Location loc, TensorId tid, |
| Level lvl, Value lo, Value hi, |
| MutableArrayRef<Value> reduc, bool isParallel); |
| |
| /// Emits a while loop to co-iterate over a list of sparse condition, or |
| /// (complex) single sparse condition that can not be handled by for loop |
| /// (e.g., index reduction loop). |
| /// Returns a pair: the loop generated and the value for the induction |
| /// variable (which is the minimum coordinate of all the tensor that being |
| /// iterated). |
| std::pair<Operation *, Value> |
| emitWhileLoopOverTensorsAtLvls(OpBuilder &builder, Location loc, |
| ArrayRef<TensorLvlCond> spConds, |
| MutableArrayRef<Value> reduc, bool needsUniv); |
| |
| /// Generates the while loop condition for the given tensor level condition. |
| Value genWhileLoopConditions(OpBuilder &builder, Location loc, ValueRange ivs, |
| TensorLvlCond cond); |
| |
| /// Generates the while loop body for the given tensor level condition. |
| std::optional<Value> genWhileLoopBody(OpBuilder &builder, Location loc, |
| ValueRange ivs, TensorLvlCond cond); |
| |
| /// Generates the values (to forward the loop) if the extra check failes. |
| /// E.g., to iterate over a sparse tensor slice, we need: |
| /// |
| /// pos = onSlice(curCrd) ? pos : pos + 1 |
| /// |
| /// to skip invalid coordinate that is included in the slice. |
| ValueRange genCheckedValue(OpBuilder &builder, Location loc, Value pred, |
| ValueRange curArg, TensorLvlCond cond); |
| |
| /// Exits a for loop, returns the reduction results, e.g., |
| /// For sequential for loops: |
| /// %ret = for () { |
| /// ... |
| /// %val = addi %args, %c |
| /// yield %val |
| /// } |
| /// For parallel loops, the following generated code by users: |
| /// %ret = parallel () init(%args) { |
| /// ... |
| /// %val = op %args, %c |
| /// } |
| /// will be transformed into |
| /// %ret = parallel () init(%args) { |
| /// ... |
| /// scf.reduce(%c) bb0(%0, %1){ |
| /// %val = op %0, %1 |
| /// scf.reduce.return %val |
| /// } |
| /// } |
| /// NOTE: only one instruction will be moved into reduce block, |
| /// transformation will fail if multiple instructions are used to compute |
| /// the reduction value. Return %ret to user, while %val is provided by |
| /// users (`reduc`). |
| void exitForLoop(RewriterBase &rewriter, Location loc, |
| MutableArrayRef<Value> reduc); |
| |
| /// Exits a while loop, returns the reduction results. |
| void exitWhileLoop(OpBuilder &builder, Location loc, |
| MutableArrayRef<Value> reduc); |
| |
| // |
| // Slice-driven loop related methods. |
| // |
| |
| /// Retrieves the most recent slice on lvl. To reduce affine expression like |
| /// d0 + d1 + d2, we need two slices (one of size d1 + d2, and the other of |
| /// size d2). This methods returns the latter slice (of size d2). |
| const SliceInfo &getMostRecentSliceOnLvl(TensorId tid, Level lvl); |
| |
| /// Similar to getMostRecentSliceOnLvl, but yields error when the most recent |
| /// slice is not the final slice needed to fully reduced the dependencies. |
| const SliceInfo &getFinalSliceOnLvl(TensorId tid, Level lvl) { |
| const SliceInfo &info = getMostRecentSliceOnLvl(tid, lvl); |
| assert(info.depth == dependentLvlMap[tid][lvl].size() - 1); |
| return info; |
| } |
| |
| /// Get the remaining number of constraints needed to fully *resolve* |
| /// dependent levels on tensor[tid]. |
| unsigned remDepOnLevel(TensorId tid, Level lvl) const; |
| |
| /// Whether the tid, lvl is fully *reduced*, i.e., the non-trivial index |
| /// expression has been reduced to a trivial one. |
| /// E.g., A[i + j] => A[i + 2] (j is reduced) |
| bool depFullyReduced(TensorId tid, Level lvl) const { |
| return remDepOnLevel(tid, lvl) == 1; |
| } |
| |
| /// Whether the tid, lvl is fully resolved, i.e., we entered the level already |
| /// (the index on that level is determined). |
| /// E.g., A[i + j] => A[2 + 3] (both i and j become invariants for inner |
| /// loops). |
| bool lvlFullyResolved(TensorId tid, Level lvl) const { |
| return remDepOnLevel(tid, lvl) == 0; |
| } |
| |
| /// Generates a whileOp to iterate over a subset of coordinates on tid on lvl |
| /// using the pHi and pLo provided, the loop break on the first coordinate |
| /// that exceeds the slice boundary (i.e., coord >= slice.offset + |
| /// slice.size). |
| std::pair<Operation *, ValueRange> |
| genSliceLvlTraverseLoop(OpBuilder &builder, Location loc, Value pLo, |
| Value pHi, Value offset, Value size, TensorId tid, |
| Level lvl, ValueRange userReduc, |
| LoopBodyBuilder bodyBuilder); |
| |
| /// Generates a nested loop that iterates over tid on all the coordinates on |
| /// lvl. |
| ValueRange genUnResolvedSliceTreeTraverse( |
| OpBuilder &builder, Location loc, TensorId tid, |
| ArrayRef<const SliceInfo *> unResLvls, |
| std::optional<std::pair<TensorId, Level>> firstResLvl, |
| ValueRange userReduc, LoopBodyBuilder bodyBuilder); |
| |
| /// Generates code to get the first non-empty slice of tid on lvl, when all |
| /// the previous level before `lvl` are resolved (or lvl is the first level). |
| /// |
| /// This is the simple case because the previous level are resolved into a |
| /// single node in the storage tree. |
| void genResolvedSliceBegin(OpBuilder &builder, Location loc, TensorId tid, |
| Level lvl); |
| |
| /// Generates code to get the first non-empty slice of tid on lvl, when |
| /// the previous levels before `lvl` are unresolved |
| /// |
| /// This is the complex case because the previous levels corresponding to a |
| /// range of nodes in the storage tree. |
| void genUnResolvedSliceBegin(OpBuilder &builder, Location loc, TensorId tid, |
| Level lvl); |
| |
| /// Generates code to get the first non-empty slice of tid on lvl. |
| /// return true if has already been resolved. |
| bool genSliceBegin(OpBuilder &builder, Location loc, TensorId tid, Level lvl); |
| |
| /// Generates code to get the next non-empty slices of tid on lvl. |
| /// Returns a tuple of values for <NonEmpty, MinCrd, AbsOffset> (see |
| /// SliceInfo) respectively. |
| std::tuple<Value, Value, Value> genSliceNextInduction(OpBuilder &builder, |
| Location loc, |
| TensorId tid, |
| Level lvl); |
| |
| /// A optional string attribute that should be attached to the loop |
| /// generated by loop emitter, it might help following passes to identify |
| /// loops that operates on sparse tensors more easily. |
| StringAttr loopTag; |
| /// Whether the loop emitter needs to treat the last tensor as the output |
| /// tensor. |
| bool hasOutput; |
| bool isSparseOut; |
| |
| /// The insertion point to allocate top level local variables. |
| Operation *localInsertPos; |
| |
| // |
| // Fields which have `numTensor` many entries. |
| // |
| // TODO: switch to an AOS style to avoid any possible mismatches. |
| // |
| |
| /// Input and (optional) output tensors. |
| std::vector<Value> tensors; |
| /// Level-types for each `(TensorId, Level)` pair. |
| std::vector<std::vector<LevelType>> lvlTypes; |
| // Sparse iteration information for each `(TensorId, Level)` pair. |
| // These arrays are updated to remain current within the current loop. |
| std::vector<std::vector<Value>> posits; |
| /// The collection of coordinates for a given element (one such |
| /// collection for each tensor). |
| std::vector<std::vector<Value>> coords; |
| // The segment upper bound for non-uniques level after de-duplication. |
| std::vector<std::vector<Value>> segHi; |
| std::vector<std::vector<Value>> highs; |
| std::vector<std::vector<Value>> lvlSizes; |
| std::vector<std::vector<Value>> positionsBuffers; // to_positions |
| std::vector<std::vector<Value>> coordinatesBuffers; // to_coordinates |
| std::vector<Value> valBuffer; // to_value |
| |
| // |
| // Slice-driven loops related fields. |
| // |
| |
| /// Whether the sparse input is a slice. |
| std::vector<bool> isSparseSlices; |
| /// Values related to slices. |
| std::vector<std::vector<Value>> sliceOffsets; |
| std::vector<std::vector<Value>> sliceStrides; |
| |
| // Map from [tid, level] to a list of dependent [tidlevel, coefficient]. |
| // See comments for `DependentLvlGetter`. |
| std::vector<std::vector<std::vector<std::pair<TensorLevel, unsigned>>>> |
| dependentLvlMap; |
| |
| // The cached position buffer for the slices, they serve the same purpose as |
| // ptrBuffer for compressed dimensions. |
| // But they always starts with the first pidx pointing to coord > slice.offset |
| // to avoid iteration from the beginning. |
| std::vector<std::vector<std::vector<Value>>> slicePosBuffer; |
| std::vector<std::vector<Value>> sliceTupleNxStartIdx; |
| std::vector<std::vector<Value>> sliceTupleFwdCnt; |
| std::vector<std::vector<bool>> trivialSlice; |
| |
| // The (size, stride) for each conceptual slice used for index reduction |
| // loops. |
| std::vector<std::vector<std::vector<std::pair<Value, unsigned>>>> sliceMeta; |
| |
| // The number of reduced dependencies on a tensor level so far. |
| std::vector<std::vector<unsigned>> levelReducedDep; |
| |
| // sliceStack[tid] holds the generated slice stack on tid. |
| std::vector<std::vector<SliceInfo>> sliceStack; |
| |
| // |
| // Fields which have at most `numLoops` many entries. |
| // |
| |
| /// Loop Stack, stores the information of all the nested loops that are |
| /// alive. |
| std::vector<LoopInfo> loopStack; |
| |
| // Loop Sequence Stack, stores the unversial index for the current loop |
| // sequence. and a list of tids which was taken sliced. |
| // TODO: maybe we should have a LoopSeqInfo |
| std::vector<std::pair<Value, std::vector<std::tuple<TensorId, Level, bool>>>> |
| loopSeqStack; |
| }; |
| |
| } // namespace sparse_tensor |
| } // namespace mlir |
| |
| #endif // MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_SPARSETENSORLOOPEMITTER_H_ |