blob: f0b7c3d5b0711ff632422182843964b5534b761c [file] [log] [blame]
// RUN: mlir-opt -linalg-bufferize -buffer-hoisting -buffer-deallocation -split-input-file %s | FileCheck %s
#map0 = affine_map<(d0) -> (d0)>
// CHECK-LABEL: func @multiple_results
func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
%0, %1 = linalg.generic {
indexing_maps = [#map0, #map0, #map0],
iterator_types = ["parallel"]
} ins(%arg0 : tensor<4xf32>) {
^bb0(%gen_arg1: f32):
%tmp1 = exp %gen_arg1 : f32
linalg.yield %tmp1, %tmp1 : f32, f32
} -> tensor<4xf32>, tensor<4xf32>
return %0, %1 : tensor<4xf32>, tensor<4xf32>
}
// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]], %[[ARG1_RESULT:.*]]: [[TYPE]], %[[ARG2_RESULT:.*]]: [[TYPE]])
// CHECK: %[[FIRST_ALLOC:.*]] = alloc() : [[TYPE]]
// CHECK: %[[SECOND_ALLOC:.*]] = alloc() : [[TYPE]]
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[NEW_ARG0]] : [[TYPE]]
// CHECK-SAME: outs(%[[FIRST_ALLOC]], %[[SECOND_ALLOC]] : [[TYPE]], [[TYPE]]
// CHECK-NEXT: ^{{[a-z0-9_]*}}
// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32
// CHECK-NEXT: %{{.*}} = exp
// CHECK-NEXT: linalg.yield
// CHECK: linalg.copy(%[[FIRST_ALLOC]], %[[ARG1_RESULT]])
// CHECK: dealloc %[[FIRST_ALLOC]]
// CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[ARG2_RESULT]])
// CHECK: dealloc %[[SECOND_ALLOC]]
// CHECK: return
// -----
#map0 = affine_map<(d0) -> (d0)>
// CHECK-LABEL: func @chained_operations
func @chained_operations(%arg0: tensor<4xf32>) -> tensor<4xf32> {
%0 = linalg.generic {
indexing_maps = [#map0, #map0],
iterator_types = ["parallel"]
} ins(%arg0 : tensor<4xf32>) {
^bb0(%gen_arg1: f32):
%tmp1 = exp %gen_arg1 : f32
linalg.yield %tmp1 : f32
} -> tensor<4xf32>
%1 = linalg.generic {
indexing_maps = [#map0, #map0],
iterator_types = ["parallel"]
} ins(%0 : tensor<4xf32>) {
^bb0(%gen_arg2: f32):
%tmp2 = exp %gen_arg2 : f32
linalg.yield %tmp2 : f32
} -> tensor<4xf32>
return %1 : tensor<4xf32>
}
// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]], %[[ARG1_RESULT:.*]]: [[TYPE]])
// CHECK: %[[FIRST_ALLOC:.*]] = alloc() : [[TYPE]]
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[NEW_ARG0]] : [[TYPE]]
// CHECK-SAME: outs(%[[FIRST_ALLOC]] : [[TYPE]]
// CHECK: ^{{[a-z0-9_]*}}
// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32
// CHECK: %[[SECOND_ALLOC:.*]] = alloc() : [[TYPE]]
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[FIRST_ALLOC]] : [[TYPE]]
// CHECK-SAME: outs(%[[SECOND_ALLOC]] : [[TYPE]]
// CHECK: ^{{[a-z0-9_]*}}
// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32
// CHECK: dealloc %[[FIRST_ALLOC]]
// CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[ARG1_RESULT]])
// CHECK: dealloc %[[SECOND_ALLOC]]
// CHECK: return
// -----
// CHECK-LABEL: func @no_linalg_op
func @no_linalg_op(%arg0: f32) -> (f32, f32) {
%0 = mulf %arg0, %arg0 : f32
return %0, %0 : f32, f32
}
// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]]) -> ([[TYPE]], [[TYPE]])
// CHECK: %[[RESULT:.*]] = mulf %[[NEW_ARG0]], %[[NEW_ARG0]] : [[TYPE]]
// CHECK: return %[[RESULT]], %[[RESULT]] : [[TYPE]], [[TYPE]]
// -----
#map_2d = affine_map<(d0, d1) -> (d0, d1)>
#map_2d_inv = affine_map<(d0, d1) -> (d1, d0)>
func @dynamic_results(%arg0: tensor<?x?xf32>)
-> (tensor<?x?xf32>, tensor<?x?xf32>) {
%0, %1 = linalg.generic {
indexing_maps = [#map_2d, #map_2d, #map_2d_inv],
iterator_types = ["parallel", "parallel"]
} ins(%arg0 : tensor<?x?xf32>) {
^bb0(%gen_arg1: f32):
%tmp1 = exp %gen_arg1 : f32
linalg.yield %tmp1, %tmp1 : f32, f32
} -> tensor<?x?xf32>, tensor<?x?xf32>
return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
}
// CHECK: #map0 = affine_map<(d0, d1) -> (d0, d1)>
// CHECK: #map1 = affine_map<(d0, d1) -> (d1, d0)>
// CHECK-LABEL: func @dynamic_results
// CHECK-SAME: (%[[INPUT:.*]]: [[TYPE:.*]], %[[OUT_1:.*]]: [[TYPE]], %[[OUT_2:.*]]: [[TYPE]]) {
// CHECK: %[[C0:.*]] = constant 0 : index
// CHECK: %[[DIM_0:.*]] = dim %[[INPUT]], %[[C0]] : [[TYPE]]
// CHECK: %[[C1:.*]] = constant 1 : index
// CHECK: %[[DIM_1:.*]] = dim %[[INPUT]], %[[C1]] : [[TYPE]]
// CHECK: %[[OUT_BUF_1:.*]] = alloc(%[[DIM_0]], %[[DIM_1]]) : [[TYPE]]
// CHECK: %[[OUT_BUF_2:.*]] = alloc(%[[DIM_1]], %[[DIM_0]]) : [[TYPE]]
// CHECK: linalg.generic {indexing_maps = [#map0, #map0, #map1], {{.*}}}
// CHECK-SAME: ins(%[[INPUT]] : [[TYPE]])
// CHECK-SAME: outs(%[[OUT_BUF_1]], %[[OUT_BUF_2]] : [[TYPE]], [[TYPE]]) {
// CHECK: linalg.copy(%[[OUT_BUF_1]], %[[OUT_1]]) : [[TYPE]], [[TYPE]]
// CHECK: dealloc %[[OUT_BUF_1]] : [[TYPE]]
// CHECK: linalg.copy(%[[OUT_BUF_2]], %[[OUT_2]]) : [[TYPE]], [[TYPE]]
// CHECK: dealloc %[[OUT_BUF_2]] : [[TYPE]]
// CHECK: return
// -----
func @foo() -> tensor<2x3xf32> {
// CHECK-LABEL: func @foo(
// CHECK-SAME: %[[A:[0-9a-z]*]]: memref<2x3xf32>) {
%0 = constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32>
// CHECK-NEXT: %[[ALLOC:.*]] = alloc() : memref<vector<6xf32>>
// CHECK-NEXT: %[[CST:.*]] = constant dense<[0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00]> : vector<6xf32>
// CHECK-NEXT: store %[[CST]], %[[ALLOC]][] : memref<vector<6xf32>>
// CHECK-NEXT: %[[FLAT:.*]] = vector.type_cast %[[ALLOC]] : memref<vector<6xf32>> to memref<6xf32>
// CHECK-NEXT: %[[RES:.*]] = linalg.reshape %[[FLAT]] {{.*}} : memref<6xf32> into memref<2x3xf32>
return %0 : tensor<2x3xf32>
// CHECK-NEXT: linalg.copy(%[[RES]], %[[A]]) : memref<2x3xf32>, memref<2x3xf32>
// CHECK-NEXT: dealloc %[[ALLOC]] : memref<vector<6xf32>>
// CHECK-NEXT: return
}
func @bar() {
// CHECK-LABEL: func @bar() {
%0 = call @foo() : () -> tensor<2x3xf32>
// CHECK-NEXT: %[[ALLOC:.*]] = alloc() : memref<2x3xf32>
// CHECK-NEXT: call @foo(%[[ALLOC]]) : (memref<2x3xf32>) -> ()
// Instead of relying on tensor_store which introduces aliasing, we rely on
// the conversion of print_memref_f32(tensor<*xf32>) to
// print_memref_f32(memref<*xf32>).
// Note that this is skipping a step and we would need at least some function
// attribute to declare that this conversion is valid (e.g. when we statically
// know that things will play nicely at the C ABI boundary).
%unranked = tensor_cast %0 : tensor<2x3xf32> to tensor<*xf32>
// CHECK-NEXT: %[[UNRANKED:.*]] = memref_cast %[[ALLOC]] :
// CHECK-SAME: memref<2x3xf32> to memref<*xf32>
call @print_memref_f32(%unranked) : (tensor<*xf32>) -> ()
// CHECK-NEXT: call @print_memref_f32(%[[UNRANKED]]) : (memref<*xf32>) -> ()
return
// CHECK-NEXT: dealloc %[[ALLOC]] : memref<2x3xf32>
// CHECK-NEXT: return
}
// This gets converted to a function operating on memref<*xf32>.
// Note that this is skipping a step and we would need at least some function
// attribute to declare that this conversion is valid (e.g. when we statically
// know that things will play nicely at the C ABI boundary).
func @print_memref_f32(%ptr : tensor<*xf32>)
// CHECK-LABEL: func @print_memref_f32(memref<*xf32>)
// -----
#accesses = [
affine_map<(i, j, k) -> (j, i, k)>,
affine_map<(i, j, k) -> (i, j)>
]
#trait = {
indexing_maps = #accesses,
iterator_types = ["parallel", "parallel", "reduction"]
}
func @generic_with_init_tensor(%arg0: tensor<2x3x4xvector<3x4xi4>>,
%arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) {
%0 = linalg.generic #trait
ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
init(%arg1 : tensor<3x2xf32>) {
^bb(%v0: vector<3x4xi4>, %v1: f32) :
%f0 = constant 0.0 : f32
linalg.yield %f0 : f32
} -> tensor<3x2xf32>
return %0 : tensor<3x2xf32>
}
// CHECK-LABEL: func @generic_with_init_tensor
// CHECK-SAME: (%[[ARG0:.*]]: memref<2x3x4xvector<3x4xi4>>, %[[ARG1:.*]]: memref<3x2xf32>, %[[RESULT0:.*]]: memref<3x2xf32>) {
// CHECK-NEXT: linalg.generic
// CHECK: linalg.copy(%[[ARG1]], %[[RESULT0]])
// CHECK-NEXT: return
// CHECK-NOT: %
// -----
#accesses = [
affine_map<(i, j, k) -> (j, i, k)>,
affine_map<(i, j, k) -> (i, j)>
]
#trait = {
indexing_maps = #accesses,
iterator_types = ["parallel", "parallel", "reduction"]
}
func @init_tensor_with_2_uses(%arg0: tensor<2x3x4xvector<3x4xi4>>,
%arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>, tensor<3x2xf32>) {
%0 = linalg.generic #trait
ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
init(%arg1 : tensor<3x2xf32>) {
^bb(%v0: vector<3x4xi4>, %v1: f32) :
%f0 = constant 0.0 : f32
linalg.yield %f0 : f32
} -> tensor<3x2xf32>
%1 = linalg.generic #trait
ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
init(%arg1 : tensor<3x2xf32>) {
^bb(%v0: vector<3x4xi4>, %v1: f32) :
%f0 = constant 0.0 : f32
linalg.yield %f0 : f32
} -> tensor<3x2xf32>
return %0, %1 : tensor<3x2xf32>, tensor<3x2xf32>
}
// CHECK-LABEL: func @init_tensor_with_2_uses
// CHECK-SAME: (%[[ARG0:.*]]: memref<2x3x4xvector<3x4xi4>>, %[[ARG1:.*]]: memref<3x2xf32>, %[[RESULT0:.*]]: memref<3x2xf32>, %[[RESULT1:.*]]: memref<3x2xf32>) {
// CHECK-NEXT: %[[ALLOC0:.*]] = alloc
// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[ALLOC0]])
// CHECK-NEXT: linalg.generic
// CHECK-SAME: outs(%[[ALLOC0]]
// CHECK-NEXT: ^bb
// CHECK-NEXT: constant
// CHECK-NEXT: yield
// CHECK-NEXT: }
// CHECK-NEXT: %[[ALLOC1:.*]] = alloc
// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[ALLOC1]])
// CHECK-NEXT: linalg.generic
// CHECK-SAME: outs(%[[ALLOC1]]
// CHECK-NEXT: ^bb
// CHECK-NEXT: constant
// CHECK-NEXT: yield
// CHECK-NEXT: }
// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[RESULT0]])
// CHECK-NEXT: dealloc
// CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[RESULT1]])
// CHECK-NEXT: dealloc
// CHECK-NEXT: return
// CHECK-NOT: %
// -----
#accesses = [
affine_map<(i, j, k) -> (j, i, k)>,
affine_map<(i, j, k) -> (i, j)>
]
#trait = {
indexing_maps = #accesses,
iterator_types = ["parallel", "parallel", "reduction"]
}
func @init_tensor_with_1_use_def_chain(%arg0: tensor<2x3x4xvector<3x4xi4>>,
%arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) {
%0 = linalg.generic #trait
ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
init(%arg1 : tensor<3x2xf32>) {
^bb(%v0: vector<3x4xi4>, %v1: f32) :
%f0 = constant 0.0 : f32
linalg.yield %f0 : f32
} -> tensor<3x2xf32>
%1 = linalg.generic #trait
ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
init(%0 : tensor<3x2xf32>) {
^bb(%v0: vector<3x4xi4>, %v1: f32) :
%f0 = constant 0.0 : f32
linalg.yield %f0 : f32
} -> tensor<3x2xf32>
return %1 : tensor<3x2xf32>
}
// CHECK-LABEL: func @init_tensor_with_1_use_def_chain
// CHECK-SAME: (%[[ARG0:.*]]: memref<2x3x4xvector<3x4xi4>>, %[[ARG1:.*]]: memref<3x2xf32>, %[[RESULT0:.*]]: memref<3x2xf32>) {
// CHECK-NEXT: linalg.generic
// CHECK-NEXT: ^bb
// CHECK-NEXT: constant
// CHECK-NEXT: yield
// CHECK-NEXT: }
// CHECK-NEXT: linalg.generic
// CHECK-NEXT: ^bb
// CHECK-NEXT: constant
// CHECK-NEXT: yield
// CHECK-NEXT: }
// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[RESULT0]])
// CHECK-NEXT: return
// CHECK-NOT: %