Add Eigen based implementation of tosa conv2d op (#231)

An optional replacement for the current implementation of the TOSA conv2d op with better performance. Can be enabled by passing the define `EMITC_TOSA_USE_EIGEN`.

Co-authored-by: Marius Brehler <marius.brehler@iml.fraunhofer.de>
diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml
index 22026b3..ed7966d 100644
--- a/.github/workflows/build-and-test.yaml
+++ b/.github/workflows/build-and-test.yaml
@@ -109,6 +109,9 @@
     - name: Configure environment
       run: echo "$GITHUB_WORKSPACE/${LLVM}/install/bin" >> $GITHUB_PATH
 
+    - name: Install dependencies
+      run: sudo apt-get install -y libeigen3-dev
+
     - name: Checkout EmitC
       uses: actions/checkout@v2
       with:
@@ -143,7 +146,9 @@
           -DLLVM_EXTERNAL_LIT=`pwd`/../../${LLVM}/build/bin/llvm-lit
         cmake --build . --target check-emitc -- -j$(nproc)
         cmake --build . --target MLIREmitCAllTests -- -j$(nproc)
+        cmake --build . --target MLIREmitCEigenTests -- -j$(nproc)
         ./unittests/MLIREmitCAllTests
+        ./unittests/MLIREmitCEigenTests
 
   build-release:
     name: Build and test EmitC (Release)
@@ -153,6 +158,9 @@
     - name: Configure environment
       run: echo "$GITHUB_WORKSPACE/${LLVM}/install/bin" >> $GITHUB_PATH
 
+    - name: Install dependencies
+      run: sudo apt-get install -y libeigen3-dev
+
     - name: Checkout EmitC
       uses: actions/checkout@v2
       with:
@@ -187,7 +195,9 @@
           -DLLVM_EXTERNAL_LIT=`pwd`/../../${LLVM}/build/bin/llvm-lit
         cmake --build . --target check-emitc -- -j$(nproc)
         cmake --build . --target MLIREmitCAllTests -- -j$(nproc)
+        cmake --build . --target MLIREmitCEigenTests -- -j$(nproc)
         ./unittests/MLIREmitCAllTests
+        ./unittests/MLIREmitCEigenTests
 
     - name: Cache e2e
       uses: actions/cache@v2
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6d5de32..7612b67 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,6 +12,7 @@
 
 option(EMITC_BUILD_EMBEDDED "Build EmitC as part of another project" OFF)
 option(EMITC_ENABLE_HLO "Enables building MLIR-HLO." ON)
+option(EMITC_TOSA_TEST_EIGEN "Enables testing of Eigen library for some TOSA Ops." ON)
 option(EMITC_INCLUDE_TESTS "Generate build targets for the MLIR EmitC unit tests." ON)
 # TODO: Set to MLIR or LLVM default
 #       ${LLVM_INCLUDE_TESTS})
@@ -72,6 +73,15 @@
   include_directories(${mlir-hlo_BINARY_DIR}/include)
 endif()
 
+# Optional Eigen dependency for some TOSA Ops
+if(EMITC_TOSA_TEST_EIGEN)
+  find_package(Eigen3 3.3.1 NO_MODULE)
+  if(NOT TARGET Eigen3::Eigen)
+    message(WARNING "Should test with Eigen, but Eigen was not found.")
+    set(EMITC_TOSA_TEST_EIGEN OFF)
+  endif()
+endif()
+
 #-------------------------------------------------------------------------------
 # Directory setup
 #-------------------------------------------------------------------------------
diff --git a/include/emitc/emitc_tosa.h b/include/emitc/emitc_tosa.h
index 61a19c9..f2ba90e 100644
--- a/include/emitc/emitc_tosa.h
+++ b/include/emitc/emitc_tosa.h
@@ -20,6 +20,10 @@
 #include "emitc_core_ops.h"
 #include "emitc_std.h"
 
+#ifdef EMITC_TOSA_USE_EIGEN
+#include "emitc_tosa_eigen.h"
+#endif
+
 namespace emitc {
 namespace tosa {
 
@@ -155,6 +159,8 @@
 }
 
 /// Functions for other TOSA ops.
+// Disable Conv2DOp if Eigen implementation is used
+#ifndef EMITC_TOSA_USE_EIGEN
 // Conv2DOp
 template <typename Dest, typename Src, typename Weights>
 Dest conv2d(Src input, Weights weights, Tensor1D<int64_t, 4> padding,
@@ -226,6 +232,7 @@
 
   return output;
 }
+#endif
 
 // DepthwiseConv2DOp
 template <typename Dest, typename Src, typename Weights>
diff --git a/include/emitc/emitc_tosa_eigen.h b/include/emitc/emitc_tosa_eigen.h
new file mode 100644
index 0000000..38257a3
--- /dev/null
+++ b/include/emitc/emitc_tosa_eigen.h
@@ -0,0 +1,119 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file defines alternative implementations for the functions in
+// emitc_tosa.h utilizing Eigen.
+
+#ifndef EMITC_EMITC_TOSA_EIGEN_H
+#define EMITC_EMITC_TOSA_EIGEN_H
+
+#include "emitc_std.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+namespace {
+
+// A view on an emitc tensor as Eigen tensor in row-major order
+template <typename T, size_t... Shape>
+inline auto as_eigen(Tensor<T, Shape...> &t) {
+  return Eigen::TensorMap<Eigen::Tensor<T, sizeof...(Shape), Eigen::RowMajor>>(
+      &*t.begin(), Shape...);
+}
+
+} // namespace
+
+namespace emitc {
+namespace tosa {
+
+// Conv2DOp
+template <typename Dest, typename Src, typename Weights>
+Dest conv2d(Src input, Weights weights, Tensor1D<int64_t, 4> padding,
+            Tensor1D<int64_t, 2> stride, Tensor1D<int64_t, 2> dilation) {
+  // Input is [N,IH,IW,IC], weights are [OC,KH,KW,IC] and output is [N,H,W,OC]
+  static_assert(is_tensor_of_dim<4, Src>::value,
+                "Expected 4 dimensional input");
+  static_assert(is_tensor_of_dim<4, Dest>::value,
+                "Expected 4 dimensional output");
+  static_assert(is_tensor_of_dim<4, Weights>::value,
+                "Expected 4 dimensional weights");
+
+  constexpr Eigen::Index N = Src::dim(0);
+  constexpr Eigen::Index IC = Src::dim(3);
+  constexpr Eigen::Index KF = Weights::dim(0);
+  constexpr Eigen::Index KH = Weights::dim(1);
+  constexpr Eigen::Index KW = Weights::dim(2);
+  constexpr Eigen::Index KC = Weights::dim(3);
+  constexpr Eigen::Index ON = Dest::dim(0);
+  constexpr Eigen::Index H = Dest::dim(1);
+  constexpr Eigen::Index W = Dest::dim(2);
+  constexpr Eigen::Index OC = Dest::dim(3);
+
+  static_assert(N == ON, "Expected input batch size to match output");
+  static_assert(IC == KC, "Expected input channels to match weights");
+  static_assert(OC == KF, "Expected output channels to match weights");
+
+  const int64_t pt = padding[0];
+  const int64_t pb = padding[1];
+  const int64_t pl = padding[2];
+  const int64_t pr = padding[3];
+  const int64_t SH = stride[0];
+  const int64_t SW = stride[1];
+  const int64_t DH = dilation[0];
+  const int64_t DW = dilation[1];
+
+  Dest output;
+  // [N,IH,IW,IC]
+  auto e_input = as_eigen(input);
+
+  // [KH,KW,IC,OC]
+#if EIGEN_VERSION_AT_LEAST(3, 4, 0)
+  auto e_weight =
+      as_eigen(weights).shuffle(Eigen::array<Eigen::Index, 4>({1, 2, 3, 0}));
+#else
+  Eigen::Tensor<typename Weights::value_type, 4, Eigen::RowMajor> e_weight =
+      as_eigen(weights).shuffle(Eigen::array<Eigen::Index, 4>({1, 2, 3, 0}));
+#endif
+
+  // [N,H,W,OC]
+  auto e_output = as_eigen(output);
+
+  // apply padding to input [N,IH+pt+pb,IW+pl+pr,IC]
+  auto input_pad = e_input.pad(Eigen::array<std::pair<int64_t, int64_t>, 4>{
+      std::make_pair(0, 0), std::make_pair(pt, pb), std::make_pair(pl, pr),
+      std::make_pair(0, 0)});
+
+  // create tensor containing input patches [N,H*W,KH,KW,IC]
+  auto patches = input_pad.extract_image_patches(KW, KH, SW, SH, DW, DH,
+                                                 Eigen::PADDING_VALID);
+
+  // create 2d tensor from patches [N*H*W,KH*KW*IC]
+  auto patches_m =
+      patches.reshape(Eigen::DSizes<Eigen::Index, 2>{N * H * W, KH * KW * IC});
+
+  // create 2d tensor from weights [KH*KW*IC,OC]
+  auto weight_m =
+      e_weight.reshape(Eigen::DSizes<Eigen::Index, 2>{KH * KW * IC, OC});
+
+  // multiply [N*H*W,OC]
+  auto contr = patches_m.contract(
+      weight_m, Eigen::array<Eigen::IndexPair<Eigen::Index>, 1>{
+                    Eigen::IndexPair<Eigen::Index>(1, 0)});
+
+  // reshape result to output [N,H,W,OC]
+  e_output = contr.reshape(Eigen::DSizes<Eigen::Index, 4>{N, H, W, OC});
+
+  return output;
+}
+
+} // namespace tosa
+} // namespace emitc
+
+#endif // EMITC_EMITC_TOSA_EIGEN_H
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index 7c8f784..b7f0afe 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -1,3 +1,13 @@
+set(LLVM_OPTIONAL_SOURCES
+  emitc_mhlo.cpp
+  emitc_arith.cpp
+  emitc_std.cpp
+  emitc_tensor.cpp
+  emitc_tosa_eigen.cpp
+  emitc_tosa.cpp
+  emitc_types.cpp
+)
+
 add_custom_target(MLIREmitCUnitTests)
 set_target_properties(MLIREmitCUnitTests PROPERTIES FOLDER "MLIR EmitC Tests")
 
@@ -9,9 +19,22 @@
   Support
   )
 
-add_mlir_unittest(MLIREmitCAllTests emitc_mhlo.cpp emitc_arith.cpp emitc_std.cpp emitc_tensor.cpp emitc_tosa.cpp emitc_types.cpp)
+add_mlir_unittest(MLIREmitCAllTests emitc_mhlo.cpp emitc_arith.cpp emitc_std.cpp emitc_tensor.cpp emitc_tosa.cpp emitc_tosa_eigen.cpp emitc_types.cpp)
 
 target_include_directories(MLIREmitCAllTests
   PRIVATE ${gtest_SOURCE_DIR}/include
   PRIVATE ${gmock_SOURCE_DIR}/include
 )
+
+if(EMITC_TOSA_TEST_EIGEN)
+  add_mlir_unittest(MLIREmitCEigenTests emitc_tosa_eigen.cpp)
+
+  target_compile_definitions(MLIREmitCEigenTests PRIVATE EMITC_TOSA_USE_EIGEN)
+
+  target_include_directories(MLIREmitCEigenTests
+    PRIVATE ${gtest_SOURCE_DIR}/include
+    PRIVATE ${gmock_SOURCE_DIR}/include
+  )
+
+  target_link_libraries(MLIREmitCEigenTests PRIVATE Eigen3::Eigen)
+endif()
diff --git a/unittests/emitc_tosa.cpp b/unittests/emitc_tosa.cpp
index 32e0dc2..667b7c4 100644
--- a/unittests/emitc_tosa.cpp
+++ b/unittests/emitc_tosa.cpp
@@ -137,53 +137,6 @@
 }
 
 // Other ops
-TEST(tosa, conv2d) {
-  {
-    // strides = 1
-    using InputType = Tensor4D<float, 1, 4, 5, 2>;  // N H W C
-    using WeightType = Tensor4D<float, 1, 3, 2, 2>; // COUT KH KW CIN
-    using ResultType = Tensor4D<float, 1, 4, 5, 1>; // N H W C
-    InputType input{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
-                    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
-                    29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40};
-    WeightType weights{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
-    ResultType expected_result{600,  736,  872,  1008, 476,  1310, 1466,
-                               1622, 1778, 805,  2090, 2246, 2402, 2558,
-                               1135, 1080, 1152, 1224, 1296, 524};
-
-    Tensor1D<int64_t, 4> padding{1, 1, 0, 1}; // {pt, pb, pl, pr}
-    Tensor1D<int64_t, 2> dilation{1, 1};
-    Tensor1D<int64_t, 2> stride{1, 1};
-
-    ResultType result =
-        tosa::conv2d<ResultType>(input, weights, padding, stride, dilation);
-    EXPECT_THAT(result, Pointwise(FloatNear(EPSILON), expected_result));
-  }
-  {
-    // Strided convolution
-    using InputType = Tensor4D<float, 1, 4, 4, 1>;  // N H W C
-    using WeightType = Tensor4D<float, 1, 2, 2, 1>; // COUT KH KW CIN
-    using ResultType = Tensor4D<float, 1, 2, 2, 1>; // N H W C
-    // clang-format off
-    InputType input{1,  2,  3,  4,
-                    5,  6,  7,  8,
-                    9,  10, 11, 12,
-                    13, 14, 15, 16};
-    WeightType weights{1, 2,
-                       3, 4};
-    ResultType expected_result{44,  64,
-                              124, 144};
-    // clang-format on
-    Tensor1D<int64_t, 4> padding{0, 0, 0, 1}; // {pt, pb, pl, pr}
-    Tensor1D<int64_t, 2> dilation{1, 1};
-    Tensor1D<int64_t, 2> stride{2, 2};
-
-    ResultType result =
-        tosa::conv2d<ResultType>(input, weights, padding, stride, dilation);
-    EXPECT_THAT(result, Pointwise(FloatNear(EPSILON), expected_result));
-  }
-}
-
 TEST(tosa, depthwise_conv2d) {
   {
     // test for channel_multiplier=1
diff --git a/unittests/emitc_tosa_eigen.cpp b/unittests/emitc_tosa_eigen.cpp
new file mode 100644
index 0000000..e0e534d
--- /dev/null
+++ b/unittests/emitc_tosa_eigen.cpp
@@ -0,0 +1,81 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gmock/gmock.h"
+
+#ifdef EMITC_TOSA_USE_EIGEN
+#include "emitc/emitc_tosa_eigen.h"
+#else
+#include "emitc/emitc_tosa.h"
+#endif
+
+#include "emitc/emitc_types.h"
+
+namespace {
+
+using namespace emitc;
+using ::testing::Eq;
+using ::testing::FloatEq;
+using ::testing::FloatNear;
+using ::testing::Pointwise;
+
+const float EPSILON = 5e-4;
+
+// Other ops
+TEST(tosa, conv2d) {
+  {
+    // strides = 1
+    using InputType = Tensor4D<float, 1, 4, 5, 2>;  // N H W C
+    using WeightType = Tensor4D<float, 1, 3, 2, 2>; // COUT KH KW CIN
+    using ResultType = Tensor4D<float, 1, 4, 5, 1>; // N H W C
+    InputType input{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+                    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+                    29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40};
+    WeightType weights{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+    ResultType expected_result{600,  736,  872,  1008, 476,  1310, 1466,
+                               1622, 1778, 805,  2090, 2246, 2402, 2558,
+                               1135, 1080, 1152, 1224, 1296, 524};
+
+    Tensor1D<int64_t, 4> padding{1, 1, 0, 1}; // {pt, pb, pl, pr}
+    Tensor1D<int64_t, 2> dilation{1, 1};
+    Tensor1D<int64_t, 2> stride{1, 1};
+
+    ResultType result =
+        tosa::conv2d<ResultType>(input, weights, padding, stride, dilation);
+    EXPECT_THAT(result, Pointwise(FloatNear(EPSILON), expected_result));
+  }
+  {
+    // Strided convolution
+    using InputType = Tensor4D<float, 1, 4, 4, 1>;  // N H W C
+    using WeightType = Tensor4D<float, 1, 2, 2, 1>; // COUT KH KW CIN
+    using ResultType = Tensor4D<float, 1, 2, 2, 1>; // N H W C
+    // clang-format off
+    InputType input{1,  2,  3,  4,
+                    5,  6,  7,  8,
+                    9,  10, 11, 12,
+                    13, 14, 15, 16};
+    WeightType weights{1, 2,
+                       3, 4};
+    ResultType expected_result{44,  64,
+                              124, 144};
+    // clang-format on
+    Tensor1D<int64_t, 4> padding{0, 0, 0, 1}; // {pt, pb, pl, pr}
+    Tensor1D<int64_t, 2> dilation{1, 1};
+    Tensor1D<int64_t, 2> stride{2, 2};
+
+    ResultType result =
+        tosa::conv2d<ResultType>(input, weights, padding, stride, dilation);
+    EXPECT_THAT(result, Pointwise(FloatNear(EPSILON), expected_result));
+  }
+}
+
+} // namespace