| //===- Transforms.h - Linalg transformations as patterns --------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H_ |
| #define DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H_ |
| |
| #include "mlir/Dialect/Linalg/Utils/Utils.h" |
| #include "mlir/Dialect/Vector/VectorOps.h" |
| #include "mlir/IR/Identifier.h" |
| #include "mlir/IR/PatternMatch.h" |
| #include "mlir/Transforms/Bufferize.h" |
| #include "llvm/ADT/SmallBitVector.h" |
| #include "llvm/ADT/SmallSet.h" |
| |
| namespace mlir { |
| class BufferizeTypeConverter; |
| class FrozenRewritePatternList; |
| |
| namespace linalg { |
| |
| struct LinalgFusionOptions; |
| struct LinalgTilingOptions; |
| |
| //===----------------------------------------------------------------------===// |
| // Transformations exposed as function calls. |
| //===----------------------------------------------------------------------===// |
| using LinalgLoops = SmallVector<Operation *, 4>; |
| |
| struct TiledLinalgOp { |
| LinalgOp op; |
| SmallVector<Operation *, 8> loops; |
| SmallVector<Value, 4> tensorResults; |
| }; |
| |
| /// Populates patterns for vectorization of all ConvN-D ops. |
| void populateConvVectorizationPatterns( |
| MLIRContext *context, SmallVectorImpl<OwningRewritePatternList> &patterns, |
| ArrayRef<int64_t> tileSizes); |
| |
| /// Populates the given list with patterns to bufferize linalg ops. |
| void populateLinalgBufferizePatterns(MLIRContext *context, |
| BufferizeTypeConverter &converter, |
| OwningRewritePatternList &patterns); |
| |
| /// Performs standalone tiling of a single LinalgOp by `tileSizes`. |
| /// and permute the loop nest according to `interchangeVector` |
| /// The permutation is expressed as a list of integers that specify |
| /// the new ordering of the loop nest. The length of `interchangeVector` |
| /// must be equal to the length of `tileSizes`. |
| /// An empty vector is interpreted as the identity permutation and the |
| /// transformation returns early. |
| /// |
| /// Returns a struct containing the tiled loops in the specified order |
| /// and the cloned op if successful, llvm::None otherwise. |
| /// |
| /// E.g. the permutation `(i,j,k) -> (j,k,i)` is expressed by |
| /// `interchangeVector = [1,2,0]`. All values in `interchangeVector` must be |
| /// integers, in the range 0..`tileSizes.size()` without duplications |
| /// (i.e. `[1,1,2]` is an invalid permutation). |
| Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op, |
| const LinalgTilingOptions &options); |
| |
| /// Fuse a sequence of linalg operations (`ops`) using tile-and-fuse. This |
| /// proceeds as follows: |
| /// - Find outer parallel loops in these ops that can be fused. |
| /// - Tile fusable outer parallel loops of the last operation in the sequence. |
| /// - Fuse the remaining operations with the tiled operation |
| /// |
| /// For example, consider the sequence of matmul below |
| /// |
| /// linalg.matmul ins(%arg0, %arg1 : memref<256x32xf32>, memref<32x32xf32>) |
| /// outs(%arg2 : memref<256x32xf32>) |
| /// linalg.matmul ins(%arg2, %arg3 : memref<256x32xf32>, memref<32x32xf32>) |
| /// outs(%arg4 : memref<256x32xf32>) |
| /// |
| /// It is legal to fuse the RAW dependence (through %arg2) by only fusing the |
| /// matmuls row-wise. For example, the fused computation for the above is shown |
| /// below. The outer `scf.parallel` loop is the "fused" loop obtained by tiling |
| /// along the rows of the matrix. The entire rows of the first matmul operation |
| /// need to be computed before they can be used for the second matmul. The |
| /// second matmul is further tiled (similar to normal tiling). |
| /// |
| /// #map0 = affine_map<(d0, d1)[s0] -> (d0 * 32 + s0 + d1)> |
| /// #map1 = affine_map<(d0, d1) -> (d0 * 32 + d1)> |
| /// scf.parallel (%arg5) = (%c0) to (%c256) step (%c16) { |
| /// %0 = subview %arg2[%arg5, 0] [16, 32] [1, 1] |
| /// : memref<256x32xf32> to memref<16x32xf32, #map0> |
| /// %1 = subview %arg4[%arg5, 0] [16, 32] [1, 1] |
| /// : memref<256x32xf32> to memref<16x32xf32, #map0> |
| /// %2 = subview %arg0[%arg5, 0] [16, 32] [1, 1] |
| /// : memref<256x32xf32> to memref<16x32xf32, #map0> |
| /// %3 = subview %arg1[0, 0] [32, 32] [1, 1] |
| /// : memref<32x32xf32> to memref<32x32xf32, #map1> |
| /// %4 = subview %arg3[0, 0] [32, 32] [1, 1] |
| /// : memref<32x32xf32> to memref<32x32xf32, #map1> |
| /// linalg.matmul |
| /// ins(%2, %3 : memref<16x32xf32, #map0>, memref<32x32xf32, #map1>) |
| /// outs(%0 : memref<16x32xf32, #map0>) |
| /// linalg.matmul |
| /// ins(%0, %4 : memref<16x4xf32, #map0>, memref<4x8xf32, #map0>) |
| /// outs(%1 : memref<16x8xf32, #map0>) |
| /// } |
| /// |
| /// `tilingOptions` are used to tile the corresponding operation in `ops` (the |
| /// size of the former should be same as size of the latter. Based on how |
| /// tile+fuse is implemented, the fused loops are generated based on the last |
| /// operation in the sequence. For example, the tile sizes for the fused loops |
| /// is obtained from `tilingOptions.back()`. The following tiling options are |
| /// handled differently in tile+fuse (compared to tile only) |
| /// - Interchange of the tiling loops is not supported right now. |
| /// - Only the fused loops are distributed. |
| struct TiledAndFusedLinalgOps { |
| /// Operation obtained by tiling the last operation in sequence of `ops` |
| /// passed to `tileAndFuseLinalgOps`. |
| LinalgOp op; |
| /// The dimension of the loops that are fused. |
| std::set<unsigned> fusedLoopDims; |
| /// The generated fused operations (created within the fused loops). |
| SmallVector<LinalgOp, 1> fusedProducers; |
| /// The fused loop generated. |
| SmallVector<Operation *, 4> fusedLoops; |
| }; |
| Optional<TiledAndFusedLinalgOps> |
| tileAndFuseLinalgOps(OpBuilder &builder, ArrayRef<LinalgOp> ops, |
| const LinalgDependenceGraph &dependenceGraph, |
| const LinalgTilingOptions &tilingOptions); |
| |
| /// Interchanges the `iterator_types` and `iterator_maps` dimensions of `op`. |
| /// This is an in-place transformation controlled by `interchangeVector`. |
| /// An empty vector is interpreted as the identity permutation and the |
| /// transformation returns early. |
| /// |
| /// E.g. the permutation `(i,j,k) -> (j,k,i)` is expressed with |
| /// `interchangeVector = [1,2,0]`. All values in `interchangeVector` must be |
| /// integers, in the range 0..`op.rank` without duplications |
| /// (i.e. `[1,1,2]` is an invalid permutation). |
| LinalgOp interchange(LinalgOp op, ArrayRef<unsigned> interchangeVector); |
| |
| /// Callback function type used to perform the allocation for the promoted |
| /// `subView`. In `boundingSubViewsize` a best attempt is made to find the |
| /// smallest constant value for the size of the buffer needed for each |
| /// dimension. If that is not possible, contains the dynamic size of the |
| /// subview. The call back should return the buffer to use. |
| using AllocBufferCallbackFn = std::function<Optional<Value>( |
| OpBuilder &b, SubViewOp subView, ArrayRef<Value> boundingSubViewSize, |
| OperationFolder *folder)>; |
| |
| /// Callback function type used to deallocate the buffers used to hold the |
| /// promoted subview. |
| using DeallocBufferCallbackFn = |
| std::function<LogicalResult(OpBuilder &b, Value buffer)>; |
| |
| /// Callback function type used to insert copy from original subview to subview |
| /// of the promoted region for the read operands/subview of promoted region to |
| /// original subview for the results. The copy has to happen from `src` to |
| /// `dst`. |
| using CopyCallbackFn = |
| std::function<LogicalResult(OpBuilder &b, Value src, Value dst)>; |
| |
| struct LinalgPromotionOptions { |
| /// Indices of subViews to promote. If `None`, try to promote all operands. |
| Optional<DenseSet<unsigned>> operandsToPromote = None; |
| LinalgPromotionOptions &setOperandsToPromote(ArrayRef<int64_t> operands) { |
| operandsToPromote = DenseSet<unsigned>(); |
| operandsToPromote->insert(operands.begin(), operands.end()); |
| return *this; |
| } |
| /// If ith element of `useFullTiles` is true the full view should be used for |
| /// the promoted buffer of the ith operand in `operandsToPromote`. Otherwise |
| /// the partial view will be used. |
| /// The decision is defaulted to `useFullTileBuffersDefault` when |
| /// `useFullTileBuffers` is None and for operands missing from |
| /// `useFullTileBuffers`. |
| Optional<llvm::SmallBitVector> useFullTileBuffers = None; |
| LinalgPromotionOptions &setUseFullTileBuffers(ArrayRef<bool> useFullTiles) { |
| unsigned size = useFullTiles.size(); |
| llvm::SmallBitVector tmp(size, false); |
| for (unsigned i = 0; i < size; ++i) |
| tmp[i] = useFullTiles[i]; |
| useFullTileBuffers = tmp; |
| return *this; |
| } |
| /// If true all operands unspecified by `useFullTileBuffers` will use the full |
| /// view, otherwise the partial view. |
| bool useFullTileBuffersDefault = false; |
| LinalgPromotionOptions &setUseFullTileBuffersByDefault(bool use) { |
| useFullTileBuffersDefault = use; |
| return *this; |
| } |
| /// Allow the use of dynamically-sized buffers. |
| bool dynamicBuffers = false; |
| LinalgPromotionOptions &setDynamicBuffers(unsigned dynamic) { |
| dynamicBuffers = dynamic; |
| return *this; |
| } |
| /// Alignment of promoted buffer. If `None` do not specify alignment. |
| Optional<unsigned> alignment = None; |
| LinalgPromotionOptions &setAlignment(unsigned align) { |
| alignment = align; |
| return *this; |
| } |
| /// Use alloca with the default allocation scheme. |
| bool useAlloca = false; |
| LinalgPromotionOptions &setUseAlloca(bool use) { |
| useAlloca = use; |
| return *this; |
| } |
| /// Callback function to do the allocation of the promoted buffer. If None, |
| /// then the default allocation scheme of allocating a memref<?xi8> buffer |
| /// followed by a view operation is used. |
| Optional<AllocBufferCallbackFn> allocationFn = None; |
| Optional<DeallocBufferCallbackFn> deallocationFn = None; |
| LinalgPromotionOptions & |
| setAllocationDeallocationFns(AllocBufferCallbackFn const &allocFn, |
| DeallocBufferCallbackFn const &deallocFn) { |
| allocationFn = allocFn; |
| deallocationFn = deallocFn; |
| return *this; |
| } |
| /// Callback function to do the copy of data to and from the promoted |
| /// subview. If None then a linalg.copy is used. |
| Optional<CopyCallbackFn> copyInFn = None; |
| Optional<CopyCallbackFn> copyOutFn = None; |
| LinalgPromotionOptions &setCopyInOutFns(CopyCallbackFn const ©In, |
| CopyCallbackFn const ©Out) { |
| copyInFn = copyIn; |
| copyOutFn = copyOut; |
| return *this; |
| } |
| }; |
| |
| /// Creates a new buffer using the `allocationFn` provided. The size of this |
| /// buffer is the smallest constant bounding size along each dimension that can |
| /// be computed for the size of the result of `subView`. Returns the allocated |
| /// buffer as `fullLocalView` and the view that matches the size of the result |
| /// of subview operation as `partialLocalView`. |
| struct PromotionInfo { |
| Value fullLocalView; |
| Value partialLocalView; |
| }; |
| Optional<PromotionInfo> |
| promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, SubViewOp subView, |
| AllocBufferCallbackFn allocationFn, |
| OperationFolder *folder = nullptr); |
| |
| /// Promotes the `subViews` into a new buffer allocated at the insertion point |
| /// `b`. Promotion occurs in 3 steps: |
| /// 1. Create a new buffer for a full tile (i.e. not clipped at the boundary). |
| /// 2. Take a full view on the buffer. |
| /// 3. Take a partial slice of the full view in step 2. and copy into it. |
| /// Infers statically sized buffers from subViews unless `dynamicBuffers` is |
| /// true. |
| /// |
| /// Returns the modified linalg op (the modification happens in place) as well |
| /// as all the copy ops created. |
| Optional<LinalgOp> promoteSubViews(OpBuilder &b, LinalgOp op, |
| LinalgPromotionOptions options, |
| OperationFolder *folder = nullptr); |
| |
| /// Emit a suitable vector form for a Linalg op with fully static shape. |
| void vectorizeLinalgOp(OpBuilder &builder, Operation *op); |
| |
| /// Emits a loop nest of `LoopTy` with the proper body for `op`. |
| template <typename LoopTy> |
| Optional<LinalgLoops> linalgLowerOpToLoops(OpBuilder &builder, Operation *op); |
| |
| /// Emits a loop nest of `scf.for` with the proper body for `op`. |
| LogicalResult linalgOpToLoops(OpBuilder &builder, Operation *op); |
| |
| /// Emits a loop nest of `scf.parallel` with the proper body for `op`. |
| LogicalResult linalgOpToParallelLoops(OpBuilder &builder, Operation *op); |
| |
| /// Emits a loop nest of `affine.for` with the proper body for `op`. |
| LogicalResult linalgOpToAffineLoops(OpBuilder &builder, Operation *op); |
| |
| //===----------------------------------------------------------------------===// |
| // Preconditions that ensure the corresponding transformation succeeds and can |
| // be applied as a rewrite pattern. |
| //===----------------------------------------------------------------------===// |
| /// Emits a `generic` or `indexed_generic` operation with the `indexing_maps` |
| /// and `iterator_types` permutated according to `permutation`. |
| LogicalResult |
| interchangeGenericLinalgOpPrecondition(Operation *op, |
| ArrayRef<unsigned> interchangeVector); |
| |
| /// Promote std.subviews feeding linalg operations. |
| LogicalResult promoteSubviewsPrecondition(Operation *op, |
| LinalgPromotionOptions options); |
| |
| /// Rewrite a linalg.generic into a suitable vector.contraction op. |
| LogicalResult vectorizeLinalgOpPrecondition(Operation *op); |
| |
| //===----------------------------------------------------------------------===// |
| // Transformations exposed as rewrite patterns. |
| //===----------------------------------------------------------------------===// |
| // Marker used as attribute name in generated Linalg rewriting transformations. |
| struct LinalgTransforms { |
| static const StringLiteral kLinalgTransformMarker; |
| }; |
| |
| /// Helper class to control common attribute matching and setting behavior. |
| struct LinalgMarker { |
| explicit LinalgMarker(ArrayRef<Identifier> matchDisjunction = {}, |
| Optional<Identifier> replacement = None); |
| LinalgMarker(LinalgMarker &&) = default; |
| LinalgMarker(const LinalgMarker &) = default; |
| LogicalResult checkAndNotify(PatternRewriter &rewriter, Operation *op) const; |
| void replaceLinalgMarker(PatternRewriter &rewriter, Operation *op) const; |
| |
| private: |
| SmallVector<Identifier, 4> matchDisjunction; |
| Optional<Identifier> replacement; |
| }; |
| |
| /// |
| /// Linalg tiling patterns. |
| /// |
| /// Apply the `tileLinalgOp` transformation as a pattern. |
| /// `marker` controls LinalgTransformMarker matching and update when specified. |
| /// See `tileLinalgOp` for more details. |
| enum class LinalgTilingLoopType { |
| Loops = 0, |
| AffineLoops = 1, |
| ParallelLoops = 2, |
| }; |
| |
| using TileSizeComputationFunction = |
| std::function<SmallVector<Value, 4>(OpBuilder &, Operation *)>; |
| |
| struct LinalgTilingOptions { |
| /// Computation function that returns the tile sizes for each operation. |
| /// Delayed construction of constant tile sizes should occur to interoperate |
| /// with folding. |
| TileSizeComputationFunction tileSizeComputationFunction = nullptr; |
| |
| LinalgTilingOptions & |
| setTileSizeComputationFunction(TileSizeComputationFunction fun) { |
| tileSizeComputationFunction = std::move(fun); |
| return *this; |
| } |
| /// Set the `tileSizeComputationFunction` to return the values `ts`. The |
| /// values must not fold away when tiling. Otherwise, use a more robust |
| /// `tileSizeComputationFunction`. |
| LinalgTilingOptions &setTileSizes(SmallVector<Value, 4> ts) { |
| tileSizeComputationFunction = [=](OpBuilder &, Operation *) { return ts; }; |
| return *this; |
| } |
| /// Convenience function to set the `tileSizeComputationFunction` to a |
| /// function that computes tile sizes at the point they are needed. Allows |
| /// proper interaction with folding. |
| LinalgTilingOptions &setTileSizes(ArrayRef<int64_t> ts); |
| |
| /// The interchange vector to reorder the tiled loops. |
| SmallVector<unsigned, 4> interchangeVector = {}; |
| |
| LinalgTilingOptions &setInterchange(ArrayRef<unsigned> interchange) { |
| interchangeVector.assign(interchange.begin(), interchange.end()); |
| return *this; |
| } |
| |
| /// The type of tile loops to generate. |
| LinalgTilingLoopType loopType = LinalgTilingLoopType::Loops; |
| |
| LinalgTilingOptions &setLoopType(LinalgTilingLoopType lt) { |
| loopType = lt; |
| return *this; |
| } |
| |
| /// When specified, specifies distribution of generated tile loops to |
| /// processors. |
| Optional<LinalgLoopDistributionOptions> distribution = None; |
| |
| LinalgTilingOptions & |
| setDistributionOptions(LinalgLoopDistributionOptions distributionOptions) { |
| distribution = std::move(distributionOptions); |
| return *this; |
| } |
| }; |
| |
| /// Canonicalization patterns relevant to apply after tiling patterns. These are |
| /// applied automatically by the tiling pass but need to be applied manually |
| /// when tiling is called programmatically. |
| OwningRewritePatternList |
| getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx); |
| |
| struct LinalgBaseTilingPattern : public RewritePattern { |
| LinalgBaseTilingPattern(StringRef opName, MLIRContext *context, |
| LinalgTilingOptions options, |
| LinalgMarker marker = LinalgMarker(), |
| PatternBenefit benefit = 1); |
| LogicalResult |
| matchAndRewriteBase(Operation *op, PatternRewriter &rewriter, |
| SmallVectorImpl<Value> &tensorResults) const; |
| |
| private: |
| /// LinalgTransformMarker handles special attribute manipulations. |
| LinalgMarker marker; |
| /// Options to control tiling; |
| LinalgTilingOptions options; |
| }; |
| |
| template <typename OpTy> |
| struct LinalgTilingPattern : public LinalgBaseTilingPattern { |
| LinalgTilingPattern(MLIRContext *context, LinalgTilingOptions options, |
| LinalgMarker marker = LinalgMarker(), |
| PatternBenefit benefit = 1) |
| : LinalgBaseTilingPattern(OpTy::getOperationName(), context, options, |
| marker, benefit) {} |
| LogicalResult matchAndRewrite(Operation *op, |
| PatternRewriter &rewriter) const override { |
| SmallVector<Value, 4> tensorResults; |
| if (failed(LinalgBaseTilingPattern::matchAndRewriteBase(op, rewriter, |
| tensorResults))) |
| return failure(); |
| if (tensorResults.empty()) |
| rewriter.eraseOp(op); |
| else |
| rewriter.replaceOp(op, tensorResults); |
| return success(); |
| } |
| }; |
| |
| struct LinalgFusionOptions { |
| /// List of operands indices to use for fusion. |
| llvm::SmallSet<unsigned, 1> indicesToFuse = {}; |
| LinalgFusionOptions &setIndicesToFuse(ArrayRef<int64_t> operands) { |
| indicesToFuse.insert(operands.begin(), operands.end()); |
| return *this; |
| } |
| }; |
| |
| struct LinalgBaseTileAndFusePattern : public RewritePattern { |
| LinalgBaseTileAndFusePattern(StringRef opName, MLIRContext *context, |
| const LinalgDependenceGraph &dependenceGraph, |
| LinalgTilingOptions tilingOptions, |
| LinalgFusionOptions fusionOptions, |
| LinalgMarker marker = LinalgMarker(), |
| LinalgMarker fusedOpMarker = LinalgMarker(), |
| LinalgMarker originalOpMarker = LinalgMarker(), |
| PatternBenefit benefit = 1); |
| LogicalResult matchAndRewrite(Operation *op, |
| PatternRewriter &rewriter) const override; |
| |
| private: |
| /// Dependence graph needed for fusion. |
| const LinalgDependenceGraph &dependenceGraph; |
| /// Options to control tiling. |
| LinalgTilingOptions tilingOptions; |
| /// Options to control fusion. |
| LinalgFusionOptions fusionOptions; |
| /// Marker to control application of the pattern. |
| LinalgMarker marker; |
| /// Marker set on the fused op after tile and fuse. |
| LinalgMarker fusedOpMarker; |
| /// The dependenceGraph is not modifiable, i.e. if the Linalg operations used |
| /// to build the dependence graph changes then the dependenceGraph needs to be |
| /// recomputed right now. To not invalidate the dependenceGraph as |
| /// transformation happens, the original producer can be tagged with a marker |
| /// that can be later used to delete the original operations. |
| LinalgMarker originalOpMarker; |
| }; |
| |
| template <typename OpTy> |
| struct LinalgTileAndFusePattern : public LinalgBaseTileAndFusePattern { |
| LinalgTileAndFusePattern(MLIRContext *context, |
| const LinalgDependenceGraph &dependenceGraph, |
| LinalgTilingOptions tilingOptions, |
| LinalgFusionOptions fusionOptions, |
| LinalgMarker marker = LinalgMarker(), |
| LinalgMarker fusedOpMarker = LinalgMarker(), |
| LinalgMarker originalOpMarker = LinalgMarker(), |
| PatternBenefit benefit = 1) |
| : LinalgBaseTileAndFusePattern( |
| OpTy::getOperationName(), context, dependenceGraph, tilingOptions, |
| fusionOptions, marker, fusedOpMarker, originalOpMarker, benefit) {} |
| }; |
| |
| /// |
| /// Linalg interchange patterns. |
| /// |
| /// Apply the `interchange` transformation as a pattern. |
| /// `marker` controls LinalgTransformMarker matching and update when specified. |
| /// See `interchange` for more details. |
| struct LinalgBaseInterchangePattern : public RewritePattern { |
| LinalgBaseInterchangePattern(StringRef opName, MLIRContext *context, |
| ArrayRef<unsigned> interchangeVector, |
| LinalgMarker marker = LinalgMarker(), |
| PatternBenefit benefit = 1); |
| LogicalResult matchAndRewrite(Operation *op, |
| PatternRewriter &rewriter) const override; |
| |
| private: |
| /// LinalgTransformMarker handles special attribute manipulations. |
| LinalgMarker marker; |
| /// The interchange vector to reorder the iterators and indexing_maps dims. |
| SmallVector<unsigned, 8> interchangeVector; |
| }; |
| |
| template <typename OpTy> |
| struct LinalgInterchangePattern : public LinalgBaseInterchangePattern { |
| LinalgInterchangePattern(MLIRContext *context, |
| ArrayRef<unsigned> interchangeVector, |
| LinalgMarker marker = LinalgMarker(), |
| PatternBenefit benefit = 1) |
| : LinalgBaseInterchangePattern(OpTy::getOperationName(), context, |
| interchangeVector, marker, benefit) {} |
| }; |
| |
| /// |
| /// Linalg promotion patterns. |
| /// |
| /// Apply the `promoteSubViews` transformation as a pattern. |
| /// `marker` controls LinalgTransformMarker matching and update when specified. |
| /// See `promoteSubViews` for more details. |
| struct LinalgBasePromotionPattern : public RewritePattern { |
| LinalgBasePromotionPattern(StringRef opName, MLIRContext *context, |
| LinalgPromotionOptions options, |
| LinalgMarker marker = LinalgMarker(), |
| PatternBenefit benefit = 1); |
| LogicalResult matchAndRewrite(Operation *op, |
| PatternRewriter &rewriter) const override; |
| |
| private: |
| /// LinalgTransformMarker handles special attribute manipulations. |
| LinalgMarker marker; |
| /// Promotion options. |
| LinalgPromotionOptions options; |
| }; |
| |
| template <typename OpTy> |
| struct LinalgPromotionPattern : public LinalgBasePromotionPattern { |
| LinalgPromotionPattern(MLIRContext *context, LinalgPromotionOptions options, |
| LinalgMarker marker = LinalgMarker(), |
| PatternBenefit benefit = 1) |
| : LinalgBasePromotionPattern(OpTy::getOperationName(), context, options, |
| marker, benefit) {} |
| }; |
| |
| /// |
| /// Linalg vectorization patterns. |
| /// |
| /// Apply the `vectorizeLinalgOp` transformation as a pattern. |
| /// `marker` controls LinalgTransformMarker matching and update when specified. |
| /// See `vectorizeLinalgOp` for more details. |
| struct LinalgBaseVectorizationPattern : public RewritePattern { |
| LinalgBaseVectorizationPattern(StringRef opName, MLIRContext *context, |
| LinalgMarker marker = LinalgMarker(), |
| PatternBenefit benefit = 1); |
| LogicalResult matchAndRewrite(Operation *op, |
| PatternRewriter &rewriter) const override; |
| |
| private: |
| /// LinalgTransformMarker handles special attribute manipulations. |
| LinalgMarker marker; |
| }; |
| |
| template <typename OpTy> |
| struct LinalgVectorizationPattern : public LinalgBaseVectorizationPattern { |
| LinalgVectorizationPattern(MLIRContext *context, |
| LinalgMarker marker = LinalgMarker(), |
| PatternBenefit benefit = 1) |
| : LinalgBaseVectorizationPattern(OpTy::getOperationName(), context, |
| marker, benefit) {} |
| }; |
| |
| /// |
| /// Linalg lowering patterns. |
| /// |
| /// Apply the `linalgLowerOpToLoops` transformation as a pattern. |
| /// `marker` controls LinalgTransformMarker matching and update when specified. |
| /// See `linalgLowerOpToLoops` for more details. |
| enum class LinalgLoweringType { |
| LibraryCall = 0, |
| Loops = 1, |
| AffineLoops = 2, |
| ParallelLoops = 3 |
| }; |
| template <typename OpTy> |
| struct LinalgLoweringPattern : public RewritePattern { |
| LinalgLoweringPattern(MLIRContext *context, LinalgLoweringType loweringType, |
| LinalgMarker marker = LinalgMarker(), |
| PatternBenefit benefit = 1) |
| : RewritePattern(OpTy::getOperationName(), {}, benefit, context), |
| marker(marker), loweringType(loweringType) {} |
| // TODO: Move implementation to .cpp once named ops are auto-generated. |
| LogicalResult matchAndRewrite(Operation *op, |
| PatternRewriter &rewriter) const override { |
| LinalgOp linalgOp = dyn_cast<LinalgOp>(op); |
| if (!linalgOp) |
| return failure(); |
| if (failed(marker.checkAndNotify(rewriter, linalgOp))) |
| return failure(); |
| |
| if (loweringType == LinalgLoweringType::LibraryCall) { |
| // TODO: Move lowering to library calls here. |
| return failure(); |
| } else if (loweringType == LinalgLoweringType::Loops) { |
| if (failed(linalgOpToLoops(rewriter, op))) |
| return failure(); |
| } else if (loweringType == LinalgLoweringType::AffineLoops) { |
| if (failed(linalgOpToAffineLoops(rewriter, op))) |
| return failure(); |
| } else if (failed(linalgOpToParallelLoops(rewriter, op))) { |
| return failure(); |
| } |
| rewriter.eraseOp(op); |
| return success(); |
| } |
| |
| private: |
| /// LinalgTransformMarker handles special attribute manipulations. |
| LinalgMarker marker; |
| /// Controls whether the pattern lowers to library calls, scf.for, affine.for |
| /// or scf.parallel. |
| LinalgLoweringType loweringType; |
| }; |
| |
| /// Linalg generalization patterns |
| |
| /// Populates `patterns` with patterns to convert spec-generated named ops to |
| /// linalg.generic ops. |
| void populateLinalgNamedOpsGeneralizationPatterns( |
| MLIRContext *context, OwningRewritePatternList &patterns, |
| LinalgMarker marker = LinalgMarker()); |
| |
| /// Populates `patterns` with patterns to convert linalg.conv ops to |
| /// linalg.generic ops. |
| void populateLinalgConvGeneralizationPatterns( |
| MLIRContext *context, OwningRewritePatternList &patterns, |
| LinalgMarker marker = LinalgMarker()); |
| |
| //===----------------------------------------------------------------------===// |
| // Op-specific patterns. |
| //===----------------------------------------------------------------------===// |
| /// Match and rewrite for the pattern: |
| /// ``` |
| /// %alloc = ... |
| /// [optional] %view = std.view %alloc ... |
| /// %subView = subview %allocOrView ... |
| /// [optional] linalg.fill(%allocOrView, %cst) ... |
| /// ... |
| /// linalg.copy(%in, %subView) ... |
| /// vector.transfer_read %allocOrView[...], %cst ... |
| /// ``` |
| /// into |
| /// ``` |
| /// [unchanged] %alloc = ... |
| /// [unchanged] [optional] %view = std.view %alloc ... |
| /// [unchanged] [unchanged] %subView = subview %allocOrView ... |
| /// ... |
| /// vector.transfer_read %in[...], %cst ... |
| /// ``` |
| /// Where there is no interleaved use between linalg.copy and transfer_read as |
| /// well as no interleaved use between linalg.fill and linalg.copy (if |
| /// linalg.fill is specified). |
| /// This is a custom rewrite to forward partial reads (with optional fills) to |
| /// vector.transfer_read. |
| struct LinalgCopyVTRForwardingPattern |
| : public OpRewritePattern<vector::TransferReadOp> { |
| using OpRewritePattern<vector::TransferReadOp>::OpRewritePattern; |
| |
| LogicalResult matchAndRewrite(vector::TransferReadOp xferOp, |
| PatternRewriter &rewriter) const override; |
| }; |
| |
| /// Match and rewrite for the pattern: |
| /// ``` |
| /// %alloc = ... |
| /// [optional] %view = std.view %alloc ... |
| /// %subView = subview %allocOrView... |
| /// ... |
| /// vector.transfer_write %..., %allocOrView[...] |
| /// linalg.copy(%subView, %out) |
| /// ``` |
| /// into |
| /// ``` |
| /// [unchanged] %alloc = ... |
| /// [unchanged] [optional] %view = std.view %alloc ... |
| /// [unchanged] %subView = subview %allocOrView... |
| /// ... |
| /// vector.transfer_write %..., %out[...] |
| /// ``` |
| /// Where there is no interleaved use between transfer_write and linalg.copy. |
| /// This is a custom rewrite to forward partial writes to vector.transfer_write. |
| struct LinalgCopyVTWForwardingPattern |
| : public OpRewritePattern<vector::TransferWriteOp> { |
| using OpRewritePattern<vector::TransferWriteOp>::OpRewritePattern; |
| |
| LogicalResult matchAndRewrite(vector::TransferWriteOp xferOp, |
| PatternRewriter &rewriter) const override; |
| }; |
| |
| /// Canonicalize AffineMinOp operations in the context of enclosing scf.for and |
| /// scf.parallel by: |
| /// 1. building an affine map where uses of the induction variable of a loop |
| /// are replaced by either the min (i.e. `%lb`) of the max |
| /// (i.e. `%lb + %step * floordiv(%ub -1 - %lb, %step)`) expression, depending |
| /// on whether the induction variable is used with a positive or negative |
| /// coefficient. |
| /// 2. checking whether any of the results of this affine map is known to be |
| /// greater than all other results. |
| /// 3. replacing the AffineMinOp by the result of (2). |
| // TODO: move to a more appropriate place when it is determined. For now Linalg |
| // depends both on Affine and SCF but they do not depend on each other. |
| struct AffineMinSCFCanonicalizationPattern |
| : public OpRewritePattern<AffineMinOp> { |
| using OpRewritePattern<AffineMinOp>::OpRewritePattern; |
| |
| LogicalResult matchAndRewrite(AffineMinOp minOp, |
| PatternRewriter &rewriter) const override; |
| }; |
| |
| /// Converts Convolution op into vector contraction. |
| /// |
| /// Conversion expects ConvOp to have dimensions marked in the *mask* as |
| /// false of size 1. This ensures that the ConvOp can be lowered to vector |
| /// contraction of dimensions marked in the *mask* as true. |
| /// |
| /// A good example for vectorization is ConvNHWCOp which is 2D Conv op |
| /// with channels as the last dimension. Let's vectorize last 3 dimensions. |
| /// The initial op definition looks like this: |
| /// ``` |
| /// linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : |
| /// (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref<?x?x?x?xf32>) |
| /// ``` |
| /// This op can be expressed as a dot product between %arg0 (input) and |
| /// %arg1 (kernel) which is written into first entry of %arg2 (output). This is |
| /// the ConvOp this pass expects and converts into: |
| /// ``` |
| /// #map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> |
| /// #map1 = affine_map<(d0, d1, d2) -> ()> |
| /// ..... |
| /// %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %c0_f32 |
| /// : memref<1x3x3x3xf32>, vector<3x3x3xf32> |
| /// %1 = vector.transfer_read %arg1[%c0, %c0, %c0, %c0], %c0_f32 |
| /// : memref<1x3x3x3xf32>, vector<3x3x3xf32> |
| /// %2 = vector.contract {indexing_maps = [#map0, #map0, #map1], |
| /// iterator_types = ["reduction", "reduction", "reduction"]} %0, %1, |
| /// %c0_f32 : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 |
| /// store %2, %arg2[%c0, %c0, %c0, %c0] : memref<?x?x?x?xf32> |
| /// ``` |
| /// where first 2 operations read input and kernel memory buffers into vectors. |
| /// Subsequently, they are contracted together and the result is written to |
| /// the first entry of the output buffer. |
| template <typename ConvOp, int N> |
| class ConvOpVectorization : public OpRewritePattern<ConvOp> { |
| using OpRewritePattern<ConvOp>::OpRewritePattern; |
| SmallVector<bool, 4> mask; |
| |
| public: |
| ConvOpVectorization(MLIRContext *context, SmallVector<bool, 4> msk) |
| : OpRewritePattern<ConvOp>(context) { |
| assert(msk.size() == N && "Mask size does not match rank"); |
| this->mask = msk; |
| } |
| |
| LogicalResult matchAndRewrite(ConvOp minOp, |
| PatternRewriter &rewriter) const override; |
| }; |
| |
| //===----------------------------------------------------------------------===// |
| // Support for staged pattern application. |
| //===----------------------------------------------------------------------===// |
| /// Helper function to allow applying rewrite patterns, interleaved with more |
| /// global transformations, in a staged fashion: |
| /// 1. the first stage consists of a list of FrozenRewritePatternList. Each |
| /// FrozenRewritePatternList in this list is applied once, in order. |
| /// 2. the second stage consists of a single OwningRewritePattern that is |
| /// applied greedily until convergence. |
| /// 3. the third stage consists of applying a lambda, generally used for |
| /// non-local transformation effects. This allows creating custom fused |
| /// transformations where patterns can be ordered and applied at a finer |
| /// granularity than a sequence of traditional compiler passes. |
| LogicalResult applyStagedPatterns( |
| Operation *op, ArrayRef<FrozenRewritePatternList> stage1Patterns, |
| const FrozenRewritePatternList &stage2Patterns, |
| function_ref<LogicalResult(Operation *)> stage3Lambda = nullptr); |
| |
| //===----------------------------------------------------------------------===// |
| // Support for sparse tensor code generation. |
| // |
| // The sparse compiler part of MLIR lowers a tensor expression formulated as a |
| // Linalg operation into a sequence of loops depending on what dimensions of the |
| // tensors are marked dense or sparse. The generated code distinguishes between: |
| // (1) for-loops that iterate over a single dense dimension, |
| // (2) for-loops that iterate over a single sparse dimension, |
| // (3) while-loops that co-iterate over several sparse dimensions. |
| // The for-loops may be subsequently optimized for parallel or vector execution. |
| // |
| // For more details, the Dialect/Linalg/Transforms/Sparsification.cpp file. |
| //===----------------------------------------------------------------------===// |
| |
| /// Defines a parallelization strategy. Any implicit loop in the Linalg |
| /// operation that is marked "parallel" (thus not "reduction") is a candidate |
| /// for parallelization. The loop is made parallel if (1) allowed by the |
| /// strategy (e.g., AnyStorageOuterLoop considers either a dense or sparse |
| /// outermost loop only), and (2) the generated code is an actual for-loop |
| /// (and not a co-iterating while-loop). |
| enum class SparseParallelizationStrategy { |
| kNone, |
| kDenseOuterLoop, |
| kAnyStorageOuterLoop, |
| kDenseAnyLoop, |
| kAnyStorageAnyLoop |
| // TODO: support reduction parallelization too? |
| }; |
| |
| /// Defines a vectorization strategy. Any implicit inner loop in the Linalg |
| /// operation is a candidate (full SIMD for "parallel" loops and horizontal |
| /// SIMD for "reduction" loops). A loop is actually vectorized if (1) allowed |
| /// by the strategy, and (2) the emitted code is an actual for-loop (and not |
| /// a co-iterating while-loop). |
| enum class SparseVectorizationStrategy { |
| kNone, |
| kDenseInnerLoop, |
| kAnyStorageInnerLoop |
| }; |
| |
| /// Defines a type for "pointer" and "index" storage in the sparse storage |
| /// scheme, with a choice between the native platform-dependent index width, |
| /// 64-bit integers, or 32-bit integers. A narrow width obviously reduces |
| /// the memory footprint of the sparse storage scheme, but the width should |
| /// suffice to define the total required range (viz. the maximum number of |
| /// stored entries per indirection level for the "pointers" and the maximum |
| /// value of each tensor index over all dimensions for the "indices"). |
| enum class SparseIntType { kNative, kI64, kI32 }; |
| |
| /// Sparsification options. |
| struct SparsificationOptions { |
| SparsificationOptions(SparseParallelizationStrategy p, |
| SparseVectorizationStrategy v, unsigned vl, |
| SparseIntType pt, SparseIntType it) |
| : parallelizationStrategy(p), vectorizationStrategy(v), vectorLength(vl), |
| ptrType(pt), indType(it) {} |
| SparsificationOptions() |
| : SparsificationOptions(SparseParallelizationStrategy::kNone, |
| SparseVectorizationStrategy::kNone, 1u, |
| SparseIntType::kNative, SparseIntType::kNative) {} |
| SparseParallelizationStrategy parallelizationStrategy; |
| SparseVectorizationStrategy vectorizationStrategy; |
| unsigned vectorLength; |
| SparseIntType ptrType; |
| SparseIntType indType; |
| }; |
| |
| /// Set up sparsification rewriting rules with the given options. |
| void populateSparsificationPatterns( |
| MLIRContext *context, OwningRewritePatternList &patterns, |
| const SparsificationOptions &options = SparsificationOptions()); |
| |
| } // namespace linalg |
| } // namespace mlir |
| |
| #endif // DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H_ |