mlir/include/mlir/Transforms/Passes.td - third_party/github.com/llvm/llvm-project - Git at Google

 //===-- Passes.td - Transforms pass definition file --------*- tablegen -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains definitions for passes within the Transforms/ directory.
 //
 //===----------------------------------------------------------------------===//

 #ifndef MLIR_TRANSFORMS_PASSES
 #define MLIR_TRANSFORMS_PASSES

 include "mlir/Pass/PassBase.td"

 def AffineLoopFusion : FunctionPass<"affine-loop-fusion"> {
   let summary = "Fuse affine loop nests";
   let constructor = "mlir::createLoopFusionPass()";
   let options = [
     Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double",
            /*default=*/"0.30f", "Fractional increase in additional computation "
                                 "tolerated while fusing">,
     Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned",
            /*default=*/"0",
            "Faster memory space number to promote fusion buffers to">,
     Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t",
            /*default=*/"0", "Threshold size (KiB) for promoting local buffers "
                             "to fast memory space">,
     Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false",
            "Enables maximal loop fusion">,
   ];
 }

 def AffinePipelineDataTransfer
     : FunctionPass<"affine-pipeline-data-transfer"> {
   let summary = "Pipeline non-blocking data transfers between explicitly "
                 "managed levels of the memory hierarchy";
   let description = [{
     This pass performs a transformation to overlap non-blocking DMA operations
     in a loop with computations through double buffering. This is achieved by
     advancing dma_start operations with respect to other operations.

     Input

     ```mlir
     func @pipelinedatatransfer() {
       %0 = alloc() : memref<256xf32>
       %1 = alloc() : memref<32xf32, 1>
       %2 = alloc() : memref<1xf32>
       %c0 = constant 0 : index
       %c128 = constant 128 : index
       affine.for %i0 = 0 to 8 {
         affine.dma_start %0[%i0], %1[%i0], %2[%c0], %c128 : memref<256xf32>, memref<32xf32, 1>, memref<1xf32>
         affine.dma_wait %2[%c0], %c128 : memref<1xf32>
         %3 = affine.load %1[%i0] : memref<32xf32, 1>
         %4 = "compute"(%3) : (f32) -> f32
         affine.store %4, %1[%i0] : memref<32xf32, 1>
       }
       return
     }
     ```

     Output

     ```mlir
     module {
       func @pipelinedatatransfer() {
         %c8 = constant 8 : index
         %c0 = constant 0 : index
         %0 = alloc() : memref<256xf32>
         %c0_0 = constant 0 : index
         %c128 = constant 128 : index
         %1 = alloc() : memref<2x32xf32, 1>
         %2 = alloc() : memref<2x1xf32>
         affine.dma_start %0[%c0], %1[%c0 mod 2, %c0], %2[%c0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
         affine.for %arg0 = 1 to 8 {
           affine.dma_start %0[%arg0], %1[%arg0 mod 2, %arg0], %2[%arg0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
           %8 = affine.apply #map3(%arg0)
           %9 = affine.apply #map4(%8)
           %10 = affine.apply #map4(%8)
           affine.dma_wait %2[%8 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
           %11 = affine.load %1[%8 mod 2, %8] : memref<2x32xf32, 1>
           %12 = "compute"(%11) : (f32) -> f32
           affine.store %12, %1[%8 mod 2, %8] : memref<2x32xf32, 1>
         }
         %3 = affine.apply #map3(%c8)
         %4 = affine.apply #map4(%3)
         %5 = affine.apply #map4(%3)
         affine.dma_wait %2[%3 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
         %6 = affine.load %1[%3 mod 2, %3] : memref<2x32xf32, 1>
         %7 = "compute"(%6) : (f32) -> f32
         affine.store %7, %1[%3 mod 2, %3] : memref<2x32xf32, 1>
         dealloc %2 : memref<2x1xf32>
         dealloc %1 : memref<2x32xf32, 1>
         return
       }
     }
     ```
   }];
   let constructor = "mlir::createPipelineDataTransferPass()";
 }

 def BufferDeallocation : FunctionPass<"buffer-deallocation"> {
   let summary = "Adds all required dealloc operations for all allocations in the "
                 "input program";
   let description = [{
     This pass implements an algorithm to automatically introduce all required
     deallocation operations for all buffers in the input program. This ensures that
     the resulting program does not have any memory leaks.


     Input

     ```mlir
     #map0 = affine_map<(d0) -> (d0)>
     module {
       func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
         cond_br %arg0, ^bb1, ^bb2
       ^bb1:
         br ^bb3(%arg1 : memref<2xf32>)
       ^bb2:
         %0 = alloc() : memref<2xf32>
         linalg.generic {
           args_in = 1 : i64,
           args_out = 1 : i64,
           indexing_maps = [#map0, #map0],
           iterator_types = ["parallel"]} %arg1, %0 {
         ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
           %tmp1 = exp %gen1_arg0 : f32
           linalg.yield %tmp1 : f32
         }: memref<2xf32>, memref<2xf32>
         br ^bb3(%0 : memref<2xf32>)
       ^bb3(%1: memref<2xf32>):
         "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
         return
       }
     }

     ```

     Output

     ```mlir
     #map0 = affine_map<(d0) -> (d0)>
     module {
       func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
         cond_br %arg0, ^bb1, ^bb2
       ^bb1:  // pred: ^bb0
         %0 = alloc() : memref<2xf32>
         linalg.copy(%arg1, %0) : memref<2xf32>, memref<2xf32>
         br ^bb3(%0 : memref<2xf32>)
       ^bb2:  // pred: ^bb0
         %1 = alloc() : memref<2xf32>
         linalg.generic {
           args_in = 1 : i64,
           args_out = 1 : i64,
           indexing_maps = [#map0, #map0],
           iterator_types = ["parallel"]} %arg1, %1 {
         ^bb0(%arg3: f32, %arg4: f32):  // no predecessors
           %4 = exp %arg3 : f32
           linalg.yield %4 : f32
         }: memref<2xf32>, memref<2xf32>
         %2 = alloc() : memref<2xf32>
         linalg.copy(%1, %2) : memref<2xf32>, memref<2xf32>
         dealloc %1 : memref<2xf32>
         br ^bb3(%2 : memref<2xf32>)
       ^bb3(%3: memref<2xf32>):  // 2 preds: ^bb1, ^bb2
         linalg.copy(%3, %arg2) : memref<2xf32>, memref<2xf32>
         dealloc %3 : memref<2xf32>
         return
       }

     }
     ```

   }];
   let constructor = "mlir::createBufferDeallocationPass()";
   // TODO: this pass likely shouldn't depend on Linalg?
   let dependentDialects = ["linalg::LinalgDialect"];
 }

 def BufferHoisting : FunctionPass<"buffer-hoisting"> {
   let summary = "Optimizes placement of allocation operations by moving them "
                 "into common dominators and out of nested regions";
   let description = [{
     This pass implements an approach to aggressively move allocations upwards
     into common dominators and out of nested regions.
   }];
   let constructor = "mlir::createBufferHoistingPass()";
 }

 def BufferLoopHoisting : FunctionPass<"buffer-loop-hoisting"> {
   let summary = "Optimizes placement of allocation operations by moving them "
                 "out of loop nests";
   let description = [{
     This pass implements an approach to aggressively move allocations upwards
     out of loop nests. It does not move allocations into common dominators.
   }];
   let constructor = "mlir::createBufferLoopHoistingPass()";
 }

 def PromoteBuffersToStack : FunctionPass<"promote-buffers-to-stack"> {
   let summary = "Promotes heap-based allocations to automatically managed "
                 "stack-based allocations";
   let description = [{
     This pass implements a simple algorithm to convert heap-based memory
     allocations to stack-based ones. It uses a built-in heuristic to decide
     whether it makes sense to convert an allocation.
   }];
   let constructor = "mlir::createPromoteBuffersToStackPass()";
   let options = [
     Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
            /*default=*/"1024",
            "Define the maximum size in bytes to promote allocations to stack.">,
   ];
 }

 def Canonicalizer : Pass<"canonicalize"> {
   let summary = "Canonicalize operations";
   let description = [{
     This pass performs various types of canonicalizations over a set of
     operations. See [Operation Canonicalization](Canonicalization.md) for more
     details.
   }];
   let constructor = "mlir::createCanonicalizerPass()";
 }

 def CopyRemoval : FunctionPass<"copy-removal"> {
   let summary = "Remove the redundant copies from input IR";
   let constructor = "mlir::createCopyRemovalPass()";
 }

 def CSE : Pass<"cse"> {
   let summary = "Eliminate common sub-expressions";
   let description = [{
     This pass implements a generalized algorithm for common sub-expression
     elimination. This pass relies on information provided by the
     `Memory SideEffect` interface to identify when it is safe to eliminate
     operations. See [Common subexpression elimination](https://en.wikipedia.org/wiki/Common_subexpression_elimination)
     for more general details on this optimization.
   }];
   let constructor = "mlir::createCSEPass()";
   let statistics = [
     Statistic<"numCSE", "num-cse'd", "Number of operations CSE'd">,
     Statistic<"numDCE", "num-dce'd", "Number of operations DCE'd">
   ];
 }

 def Inliner : Pass<"inline"> {
   let summary = "Inline function calls";
   let constructor = "mlir::createInlinerPass()";
   let options = [
     Option<"disableCanonicalization", "disable-simplify", "bool",
            /*default=*/"false",
            "Disable running simplifications during inlining">,
     Option<"maxInliningIterations", "max-iterations", "unsigned",
            /*default=*/"4",
            "Maximum number of iterations when inlining within an SCC">,
   ];
 }

 def LocationSnapshot : Pass<"snapshot-op-locations"> {
   let summary = "Generate new locations from the current IR";
   let description = [{
     This pass allows for generating new locations from the IR during any stage
     of compilation, by snapshotting the IR to a file and using that file to
     generate new locations for the operations.

     Depending on the value of the `tag` option, different resulting locations
     may be generated:

     * If unset, the original location of the operation is replaced.

     Example:

     ```mlir
     // old:
     ... loc("original_source.cpp":1:1)

     // new:
     ... loc("snapshot_source.mlir":10:10)
     ```

     * If set, the new location is fused with the original location in the form
     of a [`Name Location`](Diagnostics.md#name-location) with the specified tag.

     Example:

     ```mlir
     // old:
     ... loc("original_source.cpp":1:1)

     // new:
     ... loc(fused["original_source.cpp":1:1, "snapshot"("snapshot_source.mlir":10:10)])
     ```
   }];
   let constructor = "mlir::createLocationSnapshotPass()";
   let options = [
     Option<"fileName", "filename", "std::string", /*default=*/"",
            "The filename to print the generated IR">,
     Option<"tag", "tag", "std::string", /*default=*/"",
            "A tag to use when fusing the new locations with the "
            "original. If unset, the locations are replaced.">,
   ];
 }

 def LoopCoalescing : FunctionPass<"loop-coalescing"> {
   let summary = "Coalesce nested loops with independent bounds into a single "
                 "loop";
   let constructor = "mlir::createLoopCoalescingPass()";
 }

 def LoopInvariantCodeMotion : Pass<"loop-invariant-code-motion"> {
   let summary = "Hoist loop invariant instructions outside of the loop";
   let constructor = "mlir::createLoopInvariantCodeMotionPass()";
 }

 def MemRefDataFlowOpt : FunctionPass<"memref-dataflow-opt"> {
   let summary = "Perform store/load forwarding for memrefs";
   let description = [{
     This pass performs store to load forwarding for memref's to eliminate memory
     accesses and potentially the entire memref if all its accesses are
     forwarded.

     Input

     ```mlir
     func @store_load_affine_apply() -> memref<10x10xf32> {
       %cf7 = constant 7.0 : f32
       %m = alloc() : memref<10x10xf32>
       affine.for %i0 = 0 to 10 {
         affine.for %i1 = 0 to 10 {
           affine.store %cf7, %m[%i0, %i1] : memref<10x10xf32>
           %v0 = affine.load %m[%i0, %i1] : memref<10x10xf32>
           %v1 = addf %v0, %v0 : f32
         }
       }
       return %m : memref<10x10xf32>
     }
     ```

     Output

     ```mlir
     module {
       func @store_load_affine_apply() -> memref<10x10xf32> {
         %cst = constant 7.000000e+00 : f32
         %0 = alloc() : memref<10x10xf32>
         affine.for %arg0 = 0 to 10 {
           affine.for %arg1 = 0 to 10 {
             affine.store %cst, %0[%arg0, %arg1] : memref<10x10xf32>
             %1 = addf %cst, %cst : f32
           }
         }
         return %0 : memref<10x10xf32>
       }
     }
     ```
   }];
   let constructor = "mlir::createMemRefDataFlowOptPass()";
 }

 def NormalizeMemRefs : Pass<"normalize-memrefs", "ModuleOp"> {
   let summary = "Normalize memrefs";
    let description = [{
     This pass transforms memref types with a non-trivial
     [layout map](https://mlir.llvm.org/docs/LangRef/#layout-map) into
     memref types with an identity layout map, e.g. (i, j) -> (i, j). This
     pass is inter-procedural, in the sense that it can modify function
     interfaces and call sites that pass memref types. In order to modify
     memref types while preserving the original behavior, users of those
     memref types are also modified to incorporate the resulting layout map.
     For instance, an [AffineLoadOp]
     (https://mlir.llvm.org/docs/Dialects/Affine/#affineload-affineloadop)
     will be updated to compose the layout map with with the affine expression
     contained in the op. Operations marked with the [MemRefsNormalizable]
     (https://mlir.llvm.org/docs/Traits/#memrefsnormalizable) trait are
     expected to be normalizable. Supported operations include affine
     operations, std.alloc, std.dealloc, and std.return.

     Given an appropriate layout map specified in the code, this transformation
     can express tiled or linearized access to multi-dimensional data
     structures, but will not modify memref types without an explicit layout
     map.

     Currently this pass is limited to only modify
     functions where all memref types can be normalized. If a function
     contains any operations that are not MemRefNormalizable, then the function
     and any functions that call or call it will not be modified.

     Input

     ```mlir
     #tile = affine_map<(i) -> (i floordiv 4, i mod 4)>
     func @matmul(%A: memref<16xf64, #tile>,
                  %B: index, %C: memref<16xf64>) -> (memref<16xf64, #tile>) {
       affine.for %arg3 = 0 to 16 {
             %a = affine.load %A[%arg3] : memref<16xf64, #tile>
             %p = mulf %a, %a : f64
             affine.store %p, %A[%arg3] : memref<16xf64, #tile>
       }
       %c = alloc() : memref<16xf64, #tile>
       %d = affine.load %c[0] : memref<16xf64, #tile>
       return %A: memref<16xf64, #tile>
     }
     ```

     Output

     ```mlir
     func @matmul(%arg0: memref<4x4xf64>, %arg1: index, %arg2: memref<16xf64>)
       -> memref<4x4xf64> {
       affine.for %arg3 = 0 to 16 {
         %3 = affine.load %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
         %4 = mulf %3, %3 : f64
         affine.store %4, %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
       }
       %0 = alloc() : memref<4x4xf64>
       %1 = affine.apply #map1()
       %2 = affine.load %0[0, 0] : memref<4x4xf64>
       return %arg0 : memref<4x4xf64>
     }
     ```

     Input

     ```
     #linear8 = affine_map<(i, j) -> (i * 8 + j)>
     func @linearize(%arg0: memref<8x8xi32, #linear8>,
                     %arg1: memref<8x8xi32, #linear8>,
                     %arg2: memref<8x8xi32, #linear8>) {
       %c8 = constant 8 : index
       %c0 = constant 0 : index
       %c1 = constant 1 : index
       affine.for %arg3 = %c0 to %c8  {
       affine.for %arg4 = %c0 to %c8  {
         affine.for %arg5 = %c0 to %c8 {
           %0 = affine.load %arg0[%arg3, %arg5] : memref<8x8xi32, #linear8>
           %1 = affine.load %arg1[%arg5, %arg4] : memref<8x8xi32, #linear8>
           %2 = affine.load %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
           %3 = muli %0, %1 : i32
           %4 = addi %2, %3 : i32
           affine.store %4, %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
         }
       }
       }
       return
     }
     ```

     Output

     ```mlir
     func @linearize(%arg0: memref<64xi32>,
                     %arg1: memref<64xi32>,
                     %arg2: memref<64xi32>) {
     %c8 = constant 8 : index
     %c0 = constant 0 : index
     affine.for %arg3 = %c0 to %c8 {
       affine.for %arg4 = %c0 to %c8 {
         affine.for %arg5 = %c0 to %c8 {
           %0 = affine.load %arg0[%arg3 * 8 + %arg5] : memref<64xi32>
           %1 = affine.load %arg1[%arg5 * 8 + %arg4] : memref<64xi32>
           %2 = affine.load %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
           %3 = muli %0, %1 : i32
           %4 = addi %2, %3 : i32
           affine.store %4, %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
         }
       }
     }
     return
   }
   ```
   }];
   let constructor = "mlir::createNormalizeMemRefsPass()";
 }

 def ParallelLoopCollapsing : Pass<"parallel-loop-collapsing"> {
   let summary = "Collapse parallel loops to use less induction variables";
   let constructor = "mlir::createParallelLoopCollapsingPass()";
   let options = [
     ListOption<"clCollapsedIndices0", "collapsed-indices-0", "unsigned",
                "Which loop indices to combine 0th loop index",
                "llvm::cl::MiscFlags::CommaSeparated">,
     ListOption<"clCollapsedIndices1", "collapsed-indices-1", "unsigned",
                "Which loop indices to combine into the position 1 loop index",
                "llvm::cl::MiscFlags::CommaSeparated">,
     ListOption<"clCollapsedIndices2", "collapsed-indices-2", "unsigned",
                "Which loop indices to combine into the position 2 loop index",
                "llvm::cl::MiscFlags::CommaSeparated">,
   ];
 }

 def PrintCFG : FunctionPass<"print-cfg-graph"> {
   let summary = "Print CFG graph per-Region";
   let constructor = "mlir::createPrintCFGGraphPass()";
 }

 def PrintOpStats : Pass<"print-op-stats", "ModuleOp"> {
   let summary = "Print statistics of operations";
   let constructor = "mlir::createPrintOpStatsPass()";
 }

 def PrintOp : Pass<"print-op-graph", "ModuleOp"> {
   let summary = "Print op graph per-Region";
   let constructor = "mlir::createPrintOpGraphPass()";
 }

 def SCCP : Pass<"sccp"> {
   let summary = "Sparse Conditional Constant Propagation";
   let description = [{
     This pass implements a general algorithm for sparse conditional constant
     propagation. This algorithm detects values that are known to be constant and
     optimistically propagates this throughout the IR. Any values proven to be
     constant are replaced, and removed if possible.

     This implementation is based on the algorithm described by Wegman and Zadeck
     in [“Constant Propagation with Conditional Branches”](https://dl.acm.org/doi/10.1145/103135.103136) (1991).
   }];
   let constructor = "mlir::createSCCPPass()";
 }

 def StripDebugInfo : Pass<"strip-debuginfo"> {
   let summary = "Strip debug info from all operations";
   let description = [{
     This pass strips the IR of any location information, by replacing all
     operation locations with [`unknown`](Diagnostics.md#unknown-location).
   }];
   let constructor = "mlir::createStripDebugInfoPass()";
 }

 def SymbolDCE : Pass<"symbol-dce"> {
   let summary = "Eliminate dead symbols";
   let description = [{
     This pass deletes all symbols that are found to be unreachable. This is done
     by computing the set of operations that are known to be live, propagating
     that liveness to other symbols, and then deleting all symbols that are not
     within this live set. Live symbols are those that have a
     [visibility](SymbolsAndSymbolTables.md#symbol-visibility) that extends
     beyond the IR, e.g. `public`, or those that are referenced by live symbols
     or other non-Symbol operations.

     For example, consider the following input:

     ```mlir
     func @dead_private_function() attributes { sym_visibility = "private" }
     func @live_private_function() attributes { sym_visibility = "private" }

     // Note: The `public` isn't necessary here, as this is the default.
     func @public_function() attributes { sym_visibility = "public" } {
       "foo.return"() {uses = [@live_private_function]} : () -> ()
     }
     ```

     A known live function, `public_function`, contains a reference to an
     otherwise non-live function `live_private_function`. After running
     `symbol-dce`, only these two symbols should remain, as the final symbol
     `dead_private_function` is not visible outside of the current IR and there
     are no links to known-live operations. After running, we get the expected:

     ```mlir
     func @live_private_function() attributes { sym_visibility = "private" }

     func @public_function() attributes { sym_visibility = "public" } {
       "foo.return"() {uses = [@live_private_function]} : () -> ()
     }
     ```

     See [Symbols and SymbolTables](SymbolsAndSymbolTables.md) for more
     information on `Symbols`.
   }];
   let constructor = "mlir::createSymbolDCEPass()";
 }
 #endif // MLIR_TRANSFORMS_PASSES
	//===-- Passes.td - Transforms pass definition file --------- tablegen --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains definitions for passes within the Transforms/ directory.
	//
	//===----------------------------------------------------------------------===//

	#ifndef MLIR_TRANSFORMS_PASSES
	#define MLIR_TRANSFORMS_PASSES

	include "mlir/Pass/PassBase.td"

	def AffineLoopFusion : FunctionPass<"affine-loop-fusion"> {
	let summary = "Fuse affine loop nests";
	let constructor = "mlir::createLoopFusionPass()";
	let options = [
	Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double",
	/default=/"0.30f", "Fractional increase in additional computation "
	"tolerated while fusing">,
	Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned",
	/default=/"0",
	"Faster memory space number to promote fusion buffers to">,
	Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t",
	/default=/"0", "Threshold size (KiB) for promoting local buffers "
	"to fast memory space">,
	Option<"maximalFusion", "fusion-maximal", "bool", /default=/"false",
	"Enables maximal loop fusion">,
	];
	}

	def AffinePipelineDataTransfer
	: FunctionPass<"affine-pipeline-data-transfer"> {
	let summary = "Pipeline non-blocking data transfers between explicitly "
	"managed levels of the memory hierarchy";
	let description = [{
	This pass performs a transformation to overlap non-blocking DMA operations
	in a loop with computations through double buffering. This is achieved by
	advancing dma_start operations with respect to other operations.

	Input

	```mlir
	func @pipelinedatatransfer() {
	%0 = alloc() : memref<256xf32>
	%1 = alloc() : memref<32xf32, 1>
	%2 = alloc() : memref<1xf32>
	%c0 = constant 0 : index
	%c128 = constant 128 : index
	affine.for %i0 = 0 to 8 {
	affine.dma_start %0[%i0], %1[%i0], %2[%c0], %c128 : memref<256xf32>, memref<32xf32, 1>, memref<1xf32>
	affine.dma_wait %2[%c0], %c128 : memref<1xf32>
	%3 = affine.load %1[%i0] : memref<32xf32, 1>
	%4 = "compute"(%3) : (f32) -> f32
	affine.store %4, %1[%i0] : memref<32xf32, 1>
	}
	return
	}
	```

	Output

	```mlir
	module {
	func @pipelinedatatransfer() {
	%c8 = constant 8 : index
	%c0 = constant 0 : index
	%0 = alloc() : memref<256xf32>
	%c0_0 = constant 0 : index
	%c128 = constant 128 : index
	%1 = alloc() : memref<2x32xf32, 1>
	%2 = alloc() : memref<2x1xf32>
	affine.dma_start %0[%c0], %1[%c0 mod 2, %c0], %2[%c0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
	affine.for %arg0 = 1 to 8 {
	affine.dma_start %0[%arg0], %1[%arg0 mod 2, %arg0], %2[%arg0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
	%8 = affine.apply #map3(%arg0)
	%9 = affine.apply #map4(%8)
	%10 = affine.apply #map4(%8)
	affine.dma_wait %2[%8 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
	%11 = affine.load %1[%8 mod 2, %8] : memref<2x32xf32, 1>
	%12 = "compute"(%11) : (f32) -> f32
	affine.store %12, %1[%8 mod 2, %8] : memref<2x32xf32, 1>
	}
	%3 = affine.apply #map3(%c8)
	%4 = affine.apply #map4(%3)
	%5 = affine.apply #map4(%3)
	affine.dma_wait %2[%3 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
	%6 = affine.load %1[%3 mod 2, %3] : memref<2x32xf32, 1>
	%7 = "compute"(%6) : (f32) -> f32
	affine.store %7, %1[%3 mod 2, %3] : memref<2x32xf32, 1>
	dealloc %2 : memref<2x1xf32>
	dealloc %1 : memref<2x32xf32, 1>
	return
	}
	}
	```
	}];
	let constructor = "mlir::createPipelineDataTransferPass()";
	}

	def BufferDeallocation : FunctionPass<"buffer-deallocation"> {
	let summary = "Adds all required dealloc operations for all allocations in the "
	"input program";
	let description = [{
	This pass implements an algorithm to automatically introduce all required
	deallocation operations for all buffers in the input program. This ensures that
	the resulting program does not have any memory leaks.


	Input

	```mlir
	#map0 = affine_map<(d0) -> (d0)>
	module {
	func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
	cond_br %arg0, ^bb1, ^bb2
	^bb1:
	br ^bb3(%arg1 : memref<2xf32>)
	^bb2:
	%0 = alloc() : memref<2xf32>
	linalg.generic {
	args_in = 1 : i64,
	args_out = 1 : i64,
	indexing_maps = [#map0, #map0],
	iterator_types = ["parallel"]} %arg1, %0 {
	^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
	%tmp1 = exp %gen1_arg0 : f32
	linalg.yield %tmp1 : f32
	}: memref<2xf32>, memref<2xf32>
	br ^bb3(%0 : memref<2xf32>)
	^bb3(%1: memref<2xf32>):
	"linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
	return
	}
	}

	```

	Output

	```mlir
	#map0 = affine_map<(d0) -> (d0)>
	module {
	func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
	cond_br %arg0, ^bb1, ^bb2
	^bb1: // pred: ^bb0
	%0 = alloc() : memref<2xf32>
	linalg.copy(%arg1, %0) : memref<2xf32>, memref<2xf32>
	br ^bb3(%0 : memref<2xf32>)
	^bb2: // pred: ^bb0
	%1 = alloc() : memref<2xf32>
	linalg.generic {
	args_in = 1 : i64,
	args_out = 1 : i64,
	indexing_maps = [#map0, #map0],
	iterator_types = ["parallel"]} %arg1, %1 {
	^bb0(%arg3: f32, %arg4: f32): // no predecessors
	%4 = exp %arg3 : f32
	linalg.yield %4 : f32
	}: memref<2xf32>, memref<2xf32>
	%2 = alloc() : memref<2xf32>
	linalg.copy(%1, %2) : memref<2xf32>, memref<2xf32>
	dealloc %1 : memref<2xf32>
	br ^bb3(%2 : memref<2xf32>)
	^bb3(%3: memref<2xf32>): // 2 preds: ^bb1, ^bb2
	linalg.copy(%3, %arg2) : memref<2xf32>, memref<2xf32>
	dealloc %3 : memref<2xf32>
	return
	}

	}
	```

	}];
	let constructor = "mlir::createBufferDeallocationPass()";
	// TODO: this pass likely shouldn't depend on Linalg?
	let dependentDialects = ["linalg::LinalgDialect"];
	}

	def BufferHoisting : FunctionPass<"buffer-hoisting"> {
	let summary = "Optimizes placement of allocation operations by moving them "
	"into common dominators and out of nested regions";
	let description = [{
	This pass implements an approach to aggressively move allocations upwards
	into common dominators and out of nested regions.
	}];
	let constructor = "mlir::createBufferHoistingPass()";
	}

	def BufferLoopHoisting : FunctionPass<"buffer-loop-hoisting"> {
	let summary = "Optimizes placement of allocation operations by moving them "
	"out of loop nests";
	let description = [{
	This pass implements an approach to aggressively move allocations upwards
	out of loop nests. It does not move allocations into common dominators.
	}];
	let constructor = "mlir::createBufferLoopHoistingPass()";
	}

	def PromoteBuffersToStack : FunctionPass<"promote-buffers-to-stack"> {
	let summary = "Promotes heap-based allocations to automatically managed "
	"stack-based allocations";
	let description = [{
	This pass implements a simple algorithm to convert heap-based memory
	allocations to stack-based ones. It uses a built-in heuristic to decide
	whether it makes sense to convert an allocation.
	}];
	let constructor = "mlir::createPromoteBuffersToStackPass()";
	let options = [
	Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
	/default=/"1024",
	"Define the maximum size in bytes to promote allocations to stack.">,
	];
	}

	def Canonicalizer : Pass<"canonicalize"> {
	let summary = "Canonicalize operations";
	let description = [{
	This pass performs various types of canonicalizations over a set of
	operations. See [Operation Canonicalization](Canonicalization.md) for more
	details.
	}];
	let constructor = "mlir::createCanonicalizerPass()";
	}

	def CopyRemoval : FunctionPass<"copy-removal"> {
	let summary = "Remove the redundant copies from input IR";
	let constructor = "mlir::createCopyRemovalPass()";
	}

	def CSE : Pass<"cse"> {
	let summary = "Eliminate common sub-expressions";
	let description = [{
	This pass implements a generalized algorithm for common sub-expression
	elimination. This pass relies on information provided by the
	`Memory SideEffect` interface to identify when it is safe to eliminate
	operations. See [Common subexpression elimination](https://en.wikipedia.org/wiki/Common_subexpression_elimination)
	for more general details on this optimization.
	}];
	let constructor = "mlir::createCSEPass()";
	let statistics = [
	Statistic<"numCSE", "num-cse'd", "Number of operations CSE'd">,
	Statistic<"numDCE", "num-dce'd", "Number of operations DCE'd">
	];
	}

	def Inliner : Pass<"inline"> {
	let summary = "Inline function calls";
	let constructor = "mlir::createInlinerPass()";
	let options = [
	Option<"disableCanonicalization", "disable-simplify", "bool",
	/default=/"false",
	"Disable running simplifications during inlining">,
	Option<"maxInliningIterations", "max-iterations", "unsigned",
	/default=/"4",
	"Maximum number of iterations when inlining within an SCC">,
	];
	}

	def LocationSnapshot : Pass<"snapshot-op-locations"> {
	let summary = "Generate new locations from the current IR";
	let description = [{
	This pass allows for generating new locations from the IR during any stage
	of compilation, by snapshotting the IR to a file and using that file to
	generate new locations for the operations.

	Depending on the value of the `tag` option, different resulting locations
	may be generated:

	* If unset, the original location of the operation is replaced.

	Example:

	```mlir
	// old:
	... loc("original_source.cpp":1:1)

	// new:
	... loc("snapshot_source.mlir":10:10)
	```

	* If set, the new location is fused with the original location in the form
	of a [`Name Location`](Diagnostics.md#name-location) with the specified tag.

	Example:

	```mlir
	// old:
	... loc("original_source.cpp":1:1)

	// new:
	... loc(fused["original_source.cpp":1:1, "snapshot"("snapshot_source.mlir":10:10)])
	```
	}];
	let constructor = "mlir::createLocationSnapshotPass()";
	let options = [
	Option<"fileName", "filename", "std::string", /default=/"",
	"The filename to print the generated IR">,
	Option<"tag", "tag", "std::string", /default=/"",
	"A tag to use when fusing the new locations with the "
	"original. If unset, the locations are replaced.">,
	];
	}

	def LoopCoalescing : FunctionPass<"loop-coalescing"> {
	let summary = "Coalesce nested loops with independent bounds into a single "
	"loop";
	let constructor = "mlir::createLoopCoalescingPass()";
	}

	def LoopInvariantCodeMotion : Pass<"loop-invariant-code-motion"> {
	let summary = "Hoist loop invariant instructions outside of the loop";
	let constructor = "mlir::createLoopInvariantCodeMotionPass()";
	}

	def MemRefDataFlowOpt : FunctionPass<"memref-dataflow-opt"> {
	let summary = "Perform store/load forwarding for memrefs";
	let description = [{
	This pass performs store to load forwarding for memref's to eliminate memory
	accesses and potentially the entire memref if all its accesses are
	forwarded.

	Input

	```mlir
	func @store_load_affine_apply() -> memref<10x10xf32> {
	%cf7 = constant 7.0 : f32
	%m = alloc() : memref<10x10xf32>
	affine.for %i0 = 0 to 10 {
	affine.for %i1 = 0 to 10 {
	affine.store %cf7, %m[%i0, %i1] : memref<10x10xf32>
	%v0 = affine.load %m[%i0, %i1] : memref<10x10xf32>
	%v1 = addf %v0, %v0 : f32
	}
	}
	return %m : memref<10x10xf32>
	}
	```

	Output

	```mlir
	module {
	func @store_load_affine_apply() -> memref<10x10xf32> {
	%cst = constant 7.000000e+00 : f32
	%0 = alloc() : memref<10x10xf32>
	affine.for %arg0 = 0 to 10 {
	affine.for %arg1 = 0 to 10 {
	affine.store %cst, %0[%arg0, %arg1] : memref<10x10xf32>
	%1 = addf %cst, %cst : f32
	}
	}
	return %0 : memref<10x10xf32>
	}
	}
	```
	}];
	let constructor = "mlir::createMemRefDataFlowOptPass()";
	}

	def NormalizeMemRefs : Pass<"normalize-memrefs", "ModuleOp"> {
	let summary = "Normalize memrefs";
	let description = [{
	This pass transforms memref types with a non-trivial
	[layout map](https://mlir.llvm.org/docs/LangRef/#layout-map) into
	memref types with an identity layout map, e.g. (i, j) -> (i, j). This
	pass is inter-procedural, in the sense that it can modify function
	interfaces and call sites that pass memref types. In order to modify
	memref types while preserving the original behavior, users of those
	memref types are also modified to incorporate the resulting layout map.
	For instance, an [AffineLoadOp]
	(https://mlir.llvm.org/docs/Dialects/Affine/#affineload-affineloadop)
	will be updated to compose the layout map with with the affine expression
	contained in the op. Operations marked with the [MemRefsNormalizable]
	(https://mlir.llvm.org/docs/Traits/#memrefsnormalizable) trait are
	expected to be normalizable. Supported operations include affine
	operations, std.alloc, std.dealloc, and std.return.

	Given an appropriate layout map specified in the code, this transformation
	can express tiled or linearized access to multi-dimensional data
	structures, but will not modify memref types without an explicit layout
	map.

	Currently this pass is limited to only modify
	functions where all memref types can be normalized. If a function
	contains any operations that are not MemRefNormalizable, then the function
	and any functions that call or call it will not be modified.

	Input

	```mlir
	#tile = affine_map<(i) -> (i floordiv 4, i mod 4)>
	func @matmul(%A: memref<16xf64, #tile>,
	%B: index, %C: memref<16xf64>) -> (memref<16xf64, #tile>) {
	affine.for %arg3 = 0 to 16 {
	%a = affine.load %A[%arg3] : memref<16xf64, #tile>
	%p = mulf %a, %a : f64
	affine.store %p, %A[%arg3] : memref<16xf64, #tile>
	}
	%c = alloc() : memref<16xf64, #tile>
	%d = affine.load %c[0] : memref<16xf64, #tile>
	return %A: memref<16xf64, #tile>
	}
	```

	Output

	```mlir
	func @matmul(%arg0: memref<4x4xf64>, %arg1: index, %arg2: memref<16xf64>)
	-> memref<4x4xf64> {
	affine.for %arg3 = 0 to 16 {
	%3 = affine.load %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
	%4 = mulf %3, %3 : f64
	affine.store %4, %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
	}
	%0 = alloc() : memref<4x4xf64>
	%1 = affine.apply #map1()
	%2 = affine.load %0[0, 0] : memref<4x4xf64>
	return %arg0 : memref<4x4xf64>
	}
	```

	Input

	```
	#linear8 = affine_map<(i, j) -> (i * 8 + j)>
	func @linearize(%arg0: memref<8x8xi32, #linear8>,
	%arg1: memref<8x8xi32, #linear8>,
	%arg2: memref<8x8xi32, #linear8>) {
	%c8 = constant 8 : index
	%c0 = constant 0 : index
	%c1 = constant 1 : index
	affine.for %arg3 = %c0 to %c8 {
	affine.for %arg4 = %c0 to %c8 {
	affine.for %arg5 = %c0 to %c8 {
	%0 = affine.load %arg0[%arg3, %arg5] : memref<8x8xi32, #linear8>
	%1 = affine.load %arg1[%arg5, %arg4] : memref<8x8xi32, #linear8>
	%2 = affine.load %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
	%3 = muli %0, %1 : i32
	%4 = addi %2, %3 : i32
	affine.store %4, %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
	}
	}
	}
	return
	}
	```

	Output

	```mlir
	func @linearize(%arg0: memref<64xi32>,
	%arg1: memref<64xi32>,
	%arg2: memref<64xi32>) {
	%c8 = constant 8 : index
	%c0 = constant 0 : index
	affine.for %arg3 = %c0 to %c8 {
	affine.for %arg4 = %c0 to %c8 {
	affine.for %arg5 = %c0 to %c8 {
	%0 = affine.load %arg0[%arg3 * 8 + %arg5] : memref<64xi32>
	%1 = affine.load %arg1[%arg5 * 8 + %arg4] : memref<64xi32>
	%2 = affine.load %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
	%3 = muli %0, %1 : i32
	%4 = addi %2, %3 : i32
	affine.store %4, %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
	}
	}
	}
	return
	}
	```
	}];
	let constructor = "mlir::createNormalizeMemRefsPass()";
	}

	def ParallelLoopCollapsing : Pass<"parallel-loop-collapsing"> {
	let summary = "Collapse parallel loops to use less induction variables";
	let constructor = "mlir::createParallelLoopCollapsingPass()";
	let options = [
	ListOption<"clCollapsedIndices0", "collapsed-indices-0", "unsigned",
	"Which loop indices to combine 0th loop index",
	"llvm::cl::MiscFlags::CommaSeparated">,
	ListOption<"clCollapsedIndices1", "collapsed-indices-1", "unsigned",
	"Which loop indices to combine into the position 1 loop index",
	"llvm::cl::MiscFlags::CommaSeparated">,
	ListOption<"clCollapsedIndices2", "collapsed-indices-2", "unsigned",
	"Which loop indices to combine into the position 2 loop index",
	"llvm::cl::MiscFlags::CommaSeparated">,
	];
	}

	def PrintCFG : FunctionPass<"print-cfg-graph"> {
	let summary = "Print CFG graph per-Region";
	let constructor = "mlir::createPrintCFGGraphPass()";
	}

	def PrintOpStats : Pass<"print-op-stats", "ModuleOp"> {
	let summary = "Print statistics of operations";
	let constructor = "mlir::createPrintOpStatsPass()";
	}

	def PrintOp : Pass<"print-op-graph", "ModuleOp"> {
	let summary = "Print op graph per-Region";
	let constructor = "mlir::createPrintOpGraphPass()";
	}

	def SCCP : Pass<"sccp"> {
	let summary = "Sparse Conditional Constant Propagation";
	let description = [{
	This pass implements a general algorithm for sparse conditional constant
	propagation. This algorithm detects values that are known to be constant and
	optimistically propagates this throughout the IR. Any values proven to be
	constant are replaced, and removed if possible.

	This implementation is based on the algorithm described by Wegman and Zadeck
	in [“Constant Propagation with Conditional Branches”](https://dl.acm.org/doi/10.1145/103135.103136) (1991).
	}];
	let constructor = "mlir::createSCCPPass()";
	}

	def StripDebugInfo : Pass<"strip-debuginfo"> {
	let summary = "Strip debug info from all operations";
	let description = [{
	This pass strips the IR of any location information, by replacing all
	operation locations with [`unknown`](Diagnostics.md#unknown-location).
	}];
	let constructor = "mlir::createStripDebugInfoPass()";
	}

	def SymbolDCE : Pass<"symbol-dce"> {
	let summary = "Eliminate dead symbols";
	let description = [{
	This pass deletes all symbols that are found to be unreachable. This is done
	by computing the set of operations that are known to be live, propagating
	that liveness to other symbols, and then deleting all symbols that are not
	within this live set. Live symbols are those that have a
	[visibility](SymbolsAndSymbolTables.md#symbol-visibility) that extends
	beyond the IR, e.g. `public`, or those that are referenced by live symbols
	or other non-Symbol operations.

	For example, consider the following input:

	```mlir
	func @dead_private_function() attributes { sym_visibility = "private" }
	func @live_private_function() attributes { sym_visibility = "private" }

	// Note: The `public` isn't necessary here, as this is the default.
	func @public_function() attributes { sym_visibility = "public" } {
	"foo.return"() {uses = [@live_private_function]} : () -> ()
	}
	```

	A known live function, `public_function`, contains a reference to an
	otherwise non-live function `live_private_function`. After running
	`symbol-dce`, only these two symbols should remain, as the final symbol
	`dead_private_function` is not visible outside of the current IR and there
	are no links to known-live operations. After running, we get the expected:

	```mlir
	func @live_private_function() attributes { sym_visibility = "private" }

	func @public_function() attributes { sym_visibility = "public" } {
	"foo.return"() {uses = [@live_private_function]} : () -> ()
	}
	```

	See [Symbols and SymbolTables](SymbolsAndSymbolTables.md) for more
	information on `Symbols`.
	}];
	let constructor = "mlir::createSymbolDCEPass()";
	}
	#endif // MLIR_TRANSFORMS_PASSES