Expand `mhlo::select` to handle scalar `pred`

According to
https://www.tensorflow.org/mlir/hlo_ops#mhloselect_mlirmhloselectop the
tensor of pred may be a scalar in which case it is broadcasted. Instead
of broadcasting we keep it a 0D tensor and overload the reference
implementation with a function that handles it accordingly.

Fixes #280
diff --git a/reference-implementation/include/emitc/mhlo.h b/reference-implementation/include/emitc/mhlo.h
index 9919ede..d737f8b 100644
--- a/reference-implementation/include/emitc/mhlo.h
+++ b/reference-implementation/include/emitc/mhlo.h
@@ -601,6 +601,17 @@
 }
 
 template <typename Src, IsTensor<Src> = true>
+inline Src select(Tensor<bool> pred, Src on_true, Src on_false) {
+  Src z;
+
+  for (size_t i = 0; i < Src::size(); i++) {
+    z[i] = pred[0] ? on_true[i] : on_false[i];
+  }
+
+  return z;
+}
+
+template <typename Src, IsTensor<Src> = true>
 inline Src select(typename replace_element_type<bool, Src>::type pred,
                   Src on_true, Src on_false) {
   Src z;
diff --git a/reference-implementation/unittests/mhlo.cpp b/reference-implementation/unittests/mhlo.cpp
index 98b5441..749796b 100644
--- a/reference-implementation/unittests/mhlo.cpp
+++ b/reference-implementation/unittests/mhlo.cpp
@@ -1901,6 +1901,16 @@
     EXPECT_THAT(result, Pointwise(FloatEq(), expected_result));
   }
   {
+    Tensor1D<float, 2> s{-1.3f, 2.4f};
+    Tensor1D<float, 2> t{0.2f, -3.7f};
+    Tensor0D<bool> p{true};
+
+    Tensor1D<float, 2> expected_result = s;
+    Tensor1D<float, 2> result = mhlo::select<Tensor1D<float, 2>>(p, s, t);
+
+    EXPECT_THAT(result, Pointwise(FloatEq(), expected_result));
+  }
+  {
     Tensor2D<long, 2, 2> s{3, 1, 4, 9};
     Tensor2D<long, 2, 2> t{-2, 8, 6, -10};
     Tensor2D<bool, 2, 2> p{false, true, true, false};
@@ -1910,6 +1920,16 @@
 
     EXPECT_THAT(result, Pointwise(Eq(), expected_result));
   }
+  {
+    Tensor2D<long, 2, 2> s{3, 1, 4, 9};
+    Tensor2D<long, 2, 2> t{-2, 8, 6, -10};
+    Tensor0D<bool> p{false};
+
+    Tensor2D<long, 2, 2> expected_result = t;
+    Tensor2D<long, 2, 2> result = mhlo::select<Tensor2D<long, 2, 2>>(p, s, t);
+
+    EXPECT_THAT(result, Pointwise(Eq(), expected_result));
+  }
 }
 
 } // namespace
diff --git a/test/Conversion/mhlo-to-emitc.mlir b/test/Conversion/mhlo-to-emitc.mlir
index 79ec28f..e88c025 100644
--- a/test/Conversion/mhlo-to-emitc.mlir
+++ b/test/Conversion/mhlo-to-emitc.mlir
@@ -398,6 +398,10 @@
   return %1 : tensor<2xf32>
 }
 
+func @select_scalar_pred(%arg0: tensor<i1>, %arg1: tensor<2x3xi32>, %arg2: tensor<2x3xi32>) -> tensor<2x3xi32> {
+  %0 = "mhlo.select"(%arg0, %arg1, %arg2) : (tensor<i1>, tensor<2x3xi32>, tensor<2x3xi32>) -> tensor<2x3xi32>
+  return %0 : tensor<2x3xi32>
+}
 
 // RNG ops