including v2i128
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 0320328..9952cfa 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -6681,7 +6681,8 @@
}
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
if (!DCI.isBeforeLegalize())
return SDValue();
@@ -6690,9 +6691,17 @@
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ // Note: v2i128 is an unsupported MVT vector type (see
+ // MachineValueType.h::getVectorVT()), use NumElements and SizeInBits to
+ // identify it.
+ bool HasLSXOnly = Subtarget.hasExtLSX() && !Subtarget.hasExtLASX();
+ bool Isv2i128 = ResTy.isVector() && ResTy.getVectorNumElements() == 2 &&
+ ResTy.getScalarSizeInBits() == 128;
if (ResTy != MVT::v8i16 && ResTy != MVT::v4i32 && ResTy != MVT::v2i64 &&
- ResTy != MVT::v16i16 && ResTy != MVT::v8i32 && ResTy != MVT::v4i64 &&
- ResTy != MVT::i128)
+ ResTy != MVT::i128 && ResTy != MVT::v16i16 && ResTy != MVT::v8i32 &&
+ ResTy != MVT::v4i64 && !Isv2i128)
+ return SDValue();
+ if (HasLSXOnly && (ResTy.is256BitVector() || Isv2i128))
return SDValue();
// Combine:
@@ -6837,7 +6846,7 @@
case ISD::EXTRACT_VECTOR_ELT:
return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
- return performMULCombine(N, DAG, DCI);
+ return performMULCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
index 605325f..ed3a31d 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
define void @vmulwev_h_b(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: vmulwev_h_b:
@@ -63,139 +63,13 @@
}
define void @vmulwev_q_d(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_q_d:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -32
-; LA32-NEXT: st.w $fp, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 24 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s1, $sp, 20 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s2, $sp, 16 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s3, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s4, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $t3, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a5, $xr0, 5
-; LA32-NEXT: xvpickve2gr.w $a3, $xr1, 4
-; LA32-NEXT: xvpickve2gr.w $a4, $xr1, 0
-; LA32-NEXT: xvpickve2gr.w $t4, $xr1, 1
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 5
-; LA32-NEXT: srai.w $t1, $a5, 31
-; LA32-NEXT: srai.w $t5, $t3, 31
-; LA32-NEXT: srai.w $t0, $a7, 31
-; LA32-NEXT: srai.w $t6, $t4, 31
-; LA32-NEXT: mulh.wu $a6, $a2, $a4
-; LA32-NEXT: mul.w $t2, $t3, $a4
-; LA32-NEXT: add.w $a6, $t2, $a6
-; LA32-NEXT: sltu $t2, $a6, $t2
-; LA32-NEXT: mulh.wu $t7, $t3, $a4
-; LA32-NEXT: add.w $t7, $t7, $t2
-; LA32-NEXT: mul.w $t2, $a2, $t4
-; LA32-NEXT: add.w $a6, $t2, $a6
-; LA32-NEXT: sltu $t2, $a6, $t2
-; LA32-NEXT: mulh.wu $t8, $a2, $t4
-; LA32-NEXT: add.w $t2, $t8, $t2
-; LA32-NEXT: add.w $t8, $t7, $t2
-; LA32-NEXT: mul.w $fp, $t3, $t4
-; LA32-NEXT: add.w $s0, $fp, $t8
-; LA32-NEXT: mul.w $s1, $a4, $t5
-; LA32-NEXT: mul.w $s2, $t6, $a2
-; LA32-NEXT: add.w $s3, $s2, $s1
-; LA32-NEXT: add.w $t2, $s0, $s3
-; LA32-NEXT: sltu $s4, $t2, $s0
-; LA32-NEXT: sltu $fp, $s0, $fp
-; LA32-NEXT: sltu $t7, $t8, $t7
-; LA32-NEXT: mulh.wu $t8, $t3, $t4
-; LA32-NEXT: add.w $t7, $t8, $t7
-; LA32-NEXT: add.w $t7, $t7, $fp
-; LA32-NEXT: mulh.wu $t8, $a4, $t5
-; LA32-NEXT: add.w $t8, $t8, $s1
-; LA32-NEXT: mul.w $t4, $t4, $t5
-; LA32-NEXT: add.w $t4, $t8, $t4
-; LA32-NEXT: mul.w $t3, $t6, $t3
-; LA32-NEXT: mulh.wu $t5, $t6, $a2
-; LA32-NEXT: add.w $t3, $t5, $t3
-; LA32-NEXT: add.w $t3, $t3, $s2
-; LA32-NEXT: add.w $t3, $t3, $t4
-; LA32-NEXT: sltu $t4, $s3, $s2
-; LA32-NEXT: add.w $t3, $t3, $t4
-; LA32-NEXT: add.w $t3, $t7, $t3
-; LA32-NEXT: add.w $t3, $t3, $s4
-; LA32-NEXT: mulh.wu $t4, $a1, $a3
-; LA32-NEXT: mul.w $t5, $a5, $a3
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: sltu $t5, $t4, $t5
-; LA32-NEXT: mulh.wu $t6, $a5, $a3
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: mul.w $t6, $a1, $a7
-; LA32-NEXT: add.w $t4, $t6, $t4
-; LA32-NEXT: sltu $t6, $t4, $t6
-; LA32-NEXT: mulh.wu $t7, $a1, $a7
-; LA32-NEXT: add.w $t6, $t7, $t6
-; LA32-NEXT: add.w $t6, $t5, $t6
-; LA32-NEXT: mul.w $t7, $a5, $a7
-; LA32-NEXT: add.w $t8, $t7, $t6
-; LA32-NEXT: mul.w $fp, $a3, $t1
-; LA32-NEXT: mul.w $s0, $t0, $a1
-; LA32-NEXT: add.w $s1, $s0, $fp
-; LA32-NEXT: add.w $s2, $t8, $s1
-; LA32-NEXT: sltu $s3, $s2, $t8
-; LA32-NEXT: sltu $t7, $t8, $t7
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: mulh.wu $t6, $a5, $a7
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: add.w $t5, $t5, $t7
-; LA32-NEXT: mulh.wu $t6, $a3, $t1
-; LA32-NEXT: add.w $t6, $t6, $fp
-; LA32-NEXT: mul.w $a7, $a7, $t1
-; LA32-NEXT: add.w $a7, $t6, $a7
-; LA32-NEXT: mul.w $a5, $t0, $a5
-; LA32-NEXT: mulh.wu $t0, $t0, $a1
-; LA32-NEXT: add.w $a5, $t0, $a5
-; LA32-NEXT: add.w $a5, $a5, $s0
-; LA32-NEXT: add.w $a5, $a5, $a7
-; LA32-NEXT: sltu $a7, $s1, $s0
-; LA32-NEXT: add.w $a5, $a5, $a7
-; LA32-NEXT: add.w $a5, $t5, $a5
-; LA32-NEXT: add.w $a5, $a5, $s3
-; LA32-NEXT: mul.w $a2, $a2, $a4
-; LA32-NEXT: mul.w $a1, $a1, $a3
-; LA32-NEXT: st.w $a1, $a0, 16
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $t4, $a0, 20
-; LA32-NEXT: st.w $a6, $a0, 4
-; LA32-NEXT: st.w $s2, $a0, 24
-; LA32-NEXT: st.w $t2, $a0, 8
-; LA32-NEXT: st.w $a5, $a0, 28
-; LA32-NEXT: st.w $t3, $a0, 12
-; LA32-NEXT: ld.w $s4, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s3, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s2, $sp, 16 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s1, $sp, 20 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s0, $sp, 24 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 32
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_q_d:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 2
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 0
-; LA64-NEXT: mul.d $a5, $a2, $a4
-; LA64-NEXT: mulh.d $a2, $a2, $a4
-; LA64-NEXT: mul.d $a4, $a1, $a3
-; LA64-NEXT: mulh.d $a1, $a1, $a3
-; LA64-NEXT: st.d $a1, $a0, 24
-; LA64-NEXT: st.d $a4, $a0, 16
-; LA64-NEXT: st.d $a2, $a0, 8
-; LA64-NEXT: st.d $a5, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_q_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.q.d $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -269,139 +143,13 @@
}
define void @vmulwod_q_d(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_q_d:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -32
-; LA32-NEXT: st.w $fp, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 24 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s1, $sp, 20 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s2, $sp, 16 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s3, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s4, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $t3, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a5, $xr0, 7
-; LA32-NEXT: xvpickve2gr.w $a3, $xr1, 6
-; LA32-NEXT: xvpickve2gr.w $a4, $xr1, 2
-; LA32-NEXT: xvpickve2gr.w $t4, $xr1, 3
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 7
-; LA32-NEXT: srai.w $t1, $a5, 31
-; LA32-NEXT: srai.w $t5, $t3, 31
-; LA32-NEXT: srai.w $t0, $a7, 31
-; LA32-NEXT: srai.w $t6, $t4, 31
-; LA32-NEXT: mulh.wu $a6, $a2, $a4
-; LA32-NEXT: mul.w $t2, $t3, $a4
-; LA32-NEXT: add.w $a6, $t2, $a6
-; LA32-NEXT: sltu $t2, $a6, $t2
-; LA32-NEXT: mulh.wu $t7, $t3, $a4
-; LA32-NEXT: add.w $t7, $t7, $t2
-; LA32-NEXT: mul.w $t2, $a2, $t4
-; LA32-NEXT: add.w $a6, $t2, $a6
-; LA32-NEXT: sltu $t2, $a6, $t2
-; LA32-NEXT: mulh.wu $t8, $a2, $t4
-; LA32-NEXT: add.w $t2, $t8, $t2
-; LA32-NEXT: add.w $t8, $t7, $t2
-; LA32-NEXT: mul.w $fp, $t3, $t4
-; LA32-NEXT: add.w $s0, $fp, $t8
-; LA32-NEXT: mul.w $s1, $a4, $t5
-; LA32-NEXT: mul.w $s2, $t6, $a2
-; LA32-NEXT: add.w $s3, $s2, $s1
-; LA32-NEXT: add.w $t2, $s0, $s3
-; LA32-NEXT: sltu $s4, $t2, $s0
-; LA32-NEXT: sltu $fp, $s0, $fp
-; LA32-NEXT: sltu $t7, $t8, $t7
-; LA32-NEXT: mulh.wu $t8, $t3, $t4
-; LA32-NEXT: add.w $t7, $t8, $t7
-; LA32-NEXT: add.w $t7, $t7, $fp
-; LA32-NEXT: mulh.wu $t8, $a4, $t5
-; LA32-NEXT: add.w $t8, $t8, $s1
-; LA32-NEXT: mul.w $t4, $t4, $t5
-; LA32-NEXT: add.w $t4, $t8, $t4
-; LA32-NEXT: mul.w $t3, $t6, $t3
-; LA32-NEXT: mulh.wu $t5, $t6, $a2
-; LA32-NEXT: add.w $t3, $t5, $t3
-; LA32-NEXT: add.w $t3, $t3, $s2
-; LA32-NEXT: add.w $t3, $t3, $t4
-; LA32-NEXT: sltu $t4, $s3, $s2
-; LA32-NEXT: add.w $t3, $t3, $t4
-; LA32-NEXT: add.w $t3, $t7, $t3
-; LA32-NEXT: add.w $t3, $t3, $s4
-; LA32-NEXT: mulh.wu $t4, $a1, $a3
-; LA32-NEXT: mul.w $t5, $a5, $a3
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: sltu $t5, $t4, $t5
-; LA32-NEXT: mulh.wu $t6, $a5, $a3
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: mul.w $t6, $a1, $a7
-; LA32-NEXT: add.w $t4, $t6, $t4
-; LA32-NEXT: sltu $t6, $t4, $t6
-; LA32-NEXT: mulh.wu $t7, $a1, $a7
-; LA32-NEXT: add.w $t6, $t7, $t6
-; LA32-NEXT: add.w $t6, $t5, $t6
-; LA32-NEXT: mul.w $t7, $a5, $a7
-; LA32-NEXT: add.w $t8, $t7, $t6
-; LA32-NEXT: mul.w $fp, $a3, $t1
-; LA32-NEXT: mul.w $s0, $t0, $a1
-; LA32-NEXT: add.w $s1, $s0, $fp
-; LA32-NEXT: add.w $s2, $t8, $s1
-; LA32-NEXT: sltu $s3, $s2, $t8
-; LA32-NEXT: sltu $t7, $t8, $t7
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: mulh.wu $t6, $a5, $a7
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: add.w $t5, $t5, $t7
-; LA32-NEXT: mulh.wu $t6, $a3, $t1
-; LA32-NEXT: add.w $t6, $t6, $fp
-; LA32-NEXT: mul.w $a7, $a7, $t1
-; LA32-NEXT: add.w $a7, $t6, $a7
-; LA32-NEXT: mul.w $a5, $t0, $a5
-; LA32-NEXT: mulh.wu $t0, $t0, $a1
-; LA32-NEXT: add.w $a5, $t0, $a5
-; LA32-NEXT: add.w $a5, $a5, $s0
-; LA32-NEXT: add.w $a5, $a5, $a7
-; LA32-NEXT: sltu $a7, $s1, $s0
-; LA32-NEXT: add.w $a5, $a5, $a7
-; LA32-NEXT: add.w $a5, $t5, $a5
-; LA32-NEXT: add.w $a5, $a5, $s3
-; LA32-NEXT: mul.w $a2, $a2, $a4
-; LA32-NEXT: mul.w $a1, $a1, $a3
-; LA32-NEXT: st.w $a1, $a0, 16
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $t4, $a0, 20
-; LA32-NEXT: st.w $a6, $a0, 4
-; LA32-NEXT: st.w $s2, $a0, 24
-; LA32-NEXT: st.w $t2, $a0, 8
-; LA32-NEXT: st.w $a5, $a0, 28
-; LA32-NEXT: st.w $t3, $a0, 12
-; LA32-NEXT: ld.w $s4, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s3, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s2, $sp, 16 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s1, $sp, 20 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $s0, $sp, 24 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 32
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_q_d:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 3
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 1
-; LA64-NEXT: mul.d $a5, $a2, $a4
-; LA64-NEXT: mulh.d $a2, $a2, $a4
-; LA64-NEXT: mul.d $a4, $a1, $a3
-; LA64-NEXT: mulh.d $a1, $a1, $a3
-; LA64-NEXT: st.d $a1, $a0, 24
-; LA64-NEXT: st.d $a4, $a0, 16
-; LA64-NEXT: st.d $a2, $a0, 8
-; LA64-NEXT: st.d $a5, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_q_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.q.d $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -475,85 +223,13 @@
}
define void @vmulwev_q_du(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_q_du:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 4
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 5
-; LA32-NEXT: xvpickve2gr.w $a6, $xr1, 4
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 1
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 0
-; LA32-NEXT: mulh.wu $t1, $a4, $t0
-; LA32-NEXT: mul.w $t2, $a3, $t0
-; LA32-NEXT: add.w $t1, $t2, $t1
-; LA32-NEXT: sltu $t2, $t1, $t2
-; LA32-NEXT: mulh.wu $t3, $a3, $t0
-; LA32-NEXT: add.w $t2, $t3, $t2
-; LA32-NEXT: mul.w $t3, $a4, $a7
-; LA32-NEXT: add.w $t1, $t3, $t1
-; LA32-NEXT: sltu $t3, $t1, $t3
-; LA32-NEXT: mulh.wu $t4, $a4, $a7
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: add.w $t3, $t2, $t3
-; LA32-NEXT: mul.w $t4, $a3, $a7
-; LA32-NEXT: add.w $t5, $t4, $t3
-; LA32-NEXT: sltu $t4, $t5, $t4
-; LA32-NEXT: sltu $t2, $t3, $t2
-; LA32-NEXT: mulh.wu $a3, $a3, $a7
-; LA32-NEXT: add.w $a3, $a3, $t2
-; LA32-NEXT: add.w $a3, $a3, $t4
-; LA32-NEXT: mulh.wu $a7, $a2, $a6
-; LA32-NEXT: mul.w $t2, $a1, $a6
-; LA32-NEXT: add.w $a7, $t2, $a7
-; LA32-NEXT: sltu $t2, $a7, $t2
-; LA32-NEXT: mulh.wu $t3, $a1, $a6
-; LA32-NEXT: add.w $t2, $t3, $t2
-; LA32-NEXT: mul.w $t3, $a2, $a5
-; LA32-NEXT: add.w $a7, $t3, $a7
-; LA32-NEXT: sltu $t3, $a7, $t3
-; LA32-NEXT: mulh.wu $t4, $a2, $a5
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: add.w $t3, $t2, $t3
-; LA32-NEXT: mul.w $t4, $a1, $a5
-; LA32-NEXT: add.w $t6, $t4, $t3
-; LA32-NEXT: sltu $t4, $t6, $t4
-; LA32-NEXT: sltu $t2, $t3, $t2
-; LA32-NEXT: mulh.wu $a1, $a1, $a5
-; LA32-NEXT: add.w $a1, $a1, $t2
-; LA32-NEXT: add.w $a1, $a1, $t4
-; LA32-NEXT: mul.w $a4, $a4, $t0
-; LA32-NEXT: mul.w $a2, $a2, $a6
-; LA32-NEXT: st.w $a2, $a0, 16
-; LA32-NEXT: st.w $a4, $a0, 0
-; LA32-NEXT: st.w $a7, $a0, 20
-; LA32-NEXT: st.w $t1, $a0, 4
-; LA32-NEXT: st.w $t6, $a0, 24
-; LA32-NEXT: st.w $t5, $a0, 8
-; LA32-NEXT: st.w $a1, $a0, 28
-; LA32-NEXT: st.w $a3, $a0, 12
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_q_du:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 2
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 0
-; LA64-NEXT: mul.d $a5, $a2, $a4
-; LA64-NEXT: mulh.du $a2, $a2, $a4
-; LA64-NEXT: mul.d $a4, $a1, $a3
-; LA64-NEXT: mulh.du $a1, $a1, $a3
-; LA64-NEXT: st.d $a1, $a0, 24
-; LA64-NEXT: st.d $a4, $a0, 16
-; LA64-NEXT: st.d $a2, $a0, 8
-; LA64-NEXT: st.d $a5, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_q_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.q.du $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -627,85 +303,13 @@
}
define void @vmulwod_q_du(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_q_du:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 6
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 7
-; LA32-NEXT: xvpickve2gr.w $a6, $xr1, 6
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 3
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 2
-; LA32-NEXT: mulh.wu $t1, $a4, $t0
-; LA32-NEXT: mul.w $t2, $a3, $t0
-; LA32-NEXT: add.w $t1, $t2, $t1
-; LA32-NEXT: sltu $t2, $t1, $t2
-; LA32-NEXT: mulh.wu $t3, $a3, $t0
-; LA32-NEXT: add.w $t2, $t3, $t2
-; LA32-NEXT: mul.w $t3, $a4, $a7
-; LA32-NEXT: add.w $t1, $t3, $t1
-; LA32-NEXT: sltu $t3, $t1, $t3
-; LA32-NEXT: mulh.wu $t4, $a4, $a7
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: add.w $t3, $t2, $t3
-; LA32-NEXT: mul.w $t4, $a3, $a7
-; LA32-NEXT: add.w $t5, $t4, $t3
-; LA32-NEXT: sltu $t4, $t5, $t4
-; LA32-NEXT: sltu $t2, $t3, $t2
-; LA32-NEXT: mulh.wu $a3, $a3, $a7
-; LA32-NEXT: add.w $a3, $a3, $t2
-; LA32-NEXT: add.w $a3, $a3, $t4
-; LA32-NEXT: mulh.wu $a7, $a2, $a6
-; LA32-NEXT: mul.w $t2, $a1, $a6
-; LA32-NEXT: add.w $a7, $t2, $a7
-; LA32-NEXT: sltu $t2, $a7, $t2
-; LA32-NEXT: mulh.wu $t3, $a1, $a6
-; LA32-NEXT: add.w $t2, $t3, $t2
-; LA32-NEXT: mul.w $t3, $a2, $a5
-; LA32-NEXT: add.w $a7, $t3, $a7
-; LA32-NEXT: sltu $t3, $a7, $t3
-; LA32-NEXT: mulh.wu $t4, $a2, $a5
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: add.w $t3, $t2, $t3
-; LA32-NEXT: mul.w $t4, $a1, $a5
-; LA32-NEXT: add.w $t6, $t4, $t3
-; LA32-NEXT: sltu $t4, $t6, $t4
-; LA32-NEXT: sltu $t2, $t3, $t2
-; LA32-NEXT: mulh.wu $a1, $a1, $a5
-; LA32-NEXT: add.w $a1, $a1, $t2
-; LA32-NEXT: add.w $a1, $a1, $t4
-; LA32-NEXT: mul.w $a4, $a4, $t0
-; LA32-NEXT: mul.w $a2, $a2, $a6
-; LA32-NEXT: st.w $a2, $a0, 16
-; LA32-NEXT: st.w $a4, $a0, 0
-; LA32-NEXT: st.w $a7, $a0, 20
-; LA32-NEXT: st.w $t1, $a0, 4
-; LA32-NEXT: st.w $t6, $a0, 24
-; LA32-NEXT: st.w $t5, $a0, 8
-; LA32-NEXT: st.w $a1, $a0, 28
-; LA32-NEXT: st.w $a3, $a0, 12
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_q_du:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 3
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 1
-; LA64-NEXT: mul.d $a5, $a2, $a4
-; LA64-NEXT: mulh.du $a2, $a2, $a4
-; LA64-NEXT: mul.d $a4, $a1, $a3
-; LA64-NEXT: mulh.du $a1, $a1, $a3
-; LA64-NEXT: st.d $a1, $a0, 24
-; LA64-NEXT: st.d $a4, $a0, 16
-; LA64-NEXT: st.d $a2, $a0, 8
-; LA64-NEXT: st.d $a5, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_q_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.q.du $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -779,117 +383,13 @@
}
define void @vmulwev_q_du_d(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_q_du_d:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 5
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA32-NEXT: xvpickve2gr.w $a6, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $a4, $xr1, 4
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 0
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 1
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 5
-; LA32-NEXT: srai.w $t1, $t0, 31
-; LA32-NEXT: srai.w $t2, $a7, 31
-; LA32-NEXT: mulh.wu $t3, $a2, $a5
-; LA32-NEXT: mul.w $t4, $a6, $a5
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: sltu $t4, $t3, $t4
-; LA32-NEXT: mulh.wu $t5, $a6, $a5
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: mul.w $t5, $a2, $a7
-; LA32-NEXT: add.w $t3, $t5, $t3
-; LA32-NEXT: sltu $t5, $t3, $t5
-; LA32-NEXT: mulh.wu $t6, $a2, $a7
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: add.w $t5, $t4, $t5
-; LA32-NEXT: mul.w $t6, $a6, $a7
-; LA32-NEXT: add.w $t7, $t6, $t5
-; LA32-NEXT: mul.w $t8, $t2, $a2
-; LA32-NEXT: add.w $fp, $t7, $t8
-; LA32-NEXT: sltu $s0, $fp, $t7
-; LA32-NEXT: sltu $t6, $t7, $t6
-; LA32-NEXT: sltu $t4, $t5, $t4
-; LA32-NEXT: mulh.wu $a7, $a6, $a7
-; LA32-NEXT: add.w $a7, $a7, $t4
-; LA32-NEXT: add.w $a7, $a7, $t6
-; LA32-NEXT: mul.w $a6, $t2, $a6
-; LA32-NEXT: mulh.wu $t2, $t2, $a2
-; LA32-NEXT: add.w $a6, $t2, $a6
-; LA32-NEXT: add.w $a6, $a6, $t8
-; LA32-NEXT: add.w $a6, $a7, $a6
-; LA32-NEXT: add.w $a6, $a6, $s0
-; LA32-NEXT: mulh.wu $a7, $a1, $a4
-; LA32-NEXT: mul.w $t2, $a3, $a4
-; LA32-NEXT: add.w $a7, $t2, $a7
-; LA32-NEXT: sltu $t2, $a7, $t2
-; LA32-NEXT: mulh.wu $t4, $a3, $a4
-; LA32-NEXT: add.w $t2, $t4, $t2
-; LA32-NEXT: mul.w $t4, $a1, $t0
-; LA32-NEXT: add.w $a7, $t4, $a7
-; LA32-NEXT: sltu $t4, $a7, $t4
-; LA32-NEXT: mulh.wu $t5, $a1, $t0
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: add.w $t4, $t2, $t4
-; LA32-NEXT: mul.w $t5, $a3, $t0
-; LA32-NEXT: add.w $t6, $t5, $t4
-; LA32-NEXT: mul.w $t7, $t1, $a1
-; LA32-NEXT: add.w $t8, $t6, $t7
-; LA32-NEXT: sltu $s0, $t8, $t6
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: sltu $t2, $t4, $t2
-; LA32-NEXT: mulh.wu $t0, $a3, $t0
-; LA32-NEXT: add.w $t0, $t0, $t2
-; LA32-NEXT: add.w $t0, $t0, $t5
-; LA32-NEXT: mul.w $a3, $t1, $a3
-; LA32-NEXT: mulh.wu $t1, $t1, $a1
-; LA32-NEXT: add.w $a3, $t1, $a3
-; LA32-NEXT: add.w $a3, $a3, $t7
-; LA32-NEXT: add.w $a3, $t0, $a3
-; LA32-NEXT: add.w $a3, $a3, $s0
-; LA32-NEXT: mul.w $a2, $a2, $a5
-; LA32-NEXT: mul.w $a1, $a1, $a4
-; LA32-NEXT: st.w $a1, $a0, 16
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $a7, $a0, 20
-; LA32-NEXT: st.w $t3, $a0, 4
-; LA32-NEXT: st.w $t8, $a0, 24
-; LA32-NEXT: st.w $fp, $a0, 8
-; LA32-NEXT: st.w $a3, $a0, 28
-; LA32-NEXT: st.w $a6, $a0, 12
-; LA32-NEXT: ld.w $s0, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 16
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_q_du_d:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 0
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 0
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 2
-; LA64-NEXT: srai.d $a5, $a4, 63
-; LA64-NEXT: srai.d $a6, $a3, 63
-; LA64-NEXT: mulh.du $a7, $a2, $a3
-; LA64-NEXT: mul.d $a6, $a2, $a6
-; LA64-NEXT: add.d $a6, $a7, $a6
-; LA64-NEXT: mulh.du $a7, $a1, $a4
-; LA64-NEXT: mul.d $a5, $a1, $a5
-; LA64-NEXT: add.d $a5, $a7, $a5
-; LA64-NEXT: mul.d $a2, $a2, $a3
-; LA64-NEXT: mul.d $a1, $a1, $a4
-; LA64-NEXT: st.d $a1, $a0, 16
-; LA64-NEXT: st.d $a2, $a0, 0
-; LA64-NEXT: st.d $a5, $a0, 24
-; LA64-NEXT: st.d $a6, $a0, 8
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_q_du_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.q.du.d $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -963,117 +463,13 @@
}
define void @vmulwod_q_du_d(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_q_du_d:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 7
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT: xvpickve2gr.w $a6, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $a4, $xr1, 6
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 2
-; LA32-NEXT: xvpickve2gr.w $a7, $xr1, 3
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 7
-; LA32-NEXT: srai.w $t1, $t0, 31
-; LA32-NEXT: srai.w $t2, $a7, 31
-; LA32-NEXT: mulh.wu $t3, $a2, $a5
-; LA32-NEXT: mul.w $t4, $a6, $a5
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: sltu $t4, $t3, $t4
-; LA32-NEXT: mulh.wu $t5, $a6, $a5
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: mul.w $t5, $a2, $a7
-; LA32-NEXT: add.w $t3, $t5, $t3
-; LA32-NEXT: sltu $t5, $t3, $t5
-; LA32-NEXT: mulh.wu $t6, $a2, $a7
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: add.w $t5, $t4, $t5
-; LA32-NEXT: mul.w $t6, $a6, $a7
-; LA32-NEXT: add.w $t7, $t6, $t5
-; LA32-NEXT: mul.w $t8, $t2, $a2
-; LA32-NEXT: add.w $fp, $t7, $t8
-; LA32-NEXT: sltu $s0, $fp, $t7
-; LA32-NEXT: sltu $t6, $t7, $t6
-; LA32-NEXT: sltu $t4, $t5, $t4
-; LA32-NEXT: mulh.wu $a7, $a6, $a7
-; LA32-NEXT: add.w $a7, $a7, $t4
-; LA32-NEXT: add.w $a7, $a7, $t6
-; LA32-NEXT: mul.w $a6, $t2, $a6
-; LA32-NEXT: mulh.wu $t2, $t2, $a2
-; LA32-NEXT: add.w $a6, $t2, $a6
-; LA32-NEXT: add.w $a6, $a6, $t8
-; LA32-NEXT: add.w $a6, $a7, $a6
-; LA32-NEXT: add.w $a6, $a6, $s0
-; LA32-NEXT: mulh.wu $a7, $a1, $a4
-; LA32-NEXT: mul.w $t2, $a3, $a4
-; LA32-NEXT: add.w $a7, $t2, $a7
-; LA32-NEXT: sltu $t2, $a7, $t2
-; LA32-NEXT: mulh.wu $t4, $a3, $a4
-; LA32-NEXT: add.w $t2, $t4, $t2
-; LA32-NEXT: mul.w $t4, $a1, $t0
-; LA32-NEXT: add.w $a7, $t4, $a7
-; LA32-NEXT: sltu $t4, $a7, $t4
-; LA32-NEXT: mulh.wu $t5, $a1, $t0
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: add.w $t4, $t2, $t4
-; LA32-NEXT: mul.w $t5, $a3, $t0
-; LA32-NEXT: add.w $t6, $t5, $t4
-; LA32-NEXT: mul.w $t7, $t1, $a1
-; LA32-NEXT: add.w $t8, $t6, $t7
-; LA32-NEXT: sltu $s0, $t8, $t6
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: sltu $t2, $t4, $t2
-; LA32-NEXT: mulh.wu $t0, $a3, $t0
-; LA32-NEXT: add.w $t0, $t0, $t2
-; LA32-NEXT: add.w $t0, $t0, $t5
-; LA32-NEXT: mul.w $a3, $t1, $a3
-; LA32-NEXT: mulh.wu $t1, $t1, $a1
-; LA32-NEXT: add.w $a3, $t1, $a3
-; LA32-NEXT: add.w $a3, $a3, $t7
-; LA32-NEXT: add.w $a3, $t0, $a3
-; LA32-NEXT: add.w $a3, $a3, $s0
-; LA32-NEXT: mul.w $a2, $a2, $a5
-; LA32-NEXT: mul.w $a1, $a1, $a4
-; LA32-NEXT: st.w $a1, $a0, 16
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $a7, $a0, 20
-; LA32-NEXT: st.w $t3, $a0, 4
-; LA32-NEXT: st.w $t8, $a0, 24
-; LA32-NEXT: st.w $fp, $a0, 8
-; LA32-NEXT: st.w $a3, $a0, 28
-; LA32-NEXT: st.w $a6, $a0, 12
-; LA32-NEXT: ld.w $s0, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 16
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_q_du_d:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 1
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 1
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 3
-; LA64-NEXT: srai.d $a5, $a4, 63
-; LA64-NEXT: srai.d $a6, $a3, 63
-; LA64-NEXT: mulh.du $a7, $a2, $a3
-; LA64-NEXT: mul.d $a6, $a2, $a6
-; LA64-NEXT: add.d $a6, $a7, $a6
-; LA64-NEXT: mulh.du $a7, $a1, $a4
-; LA64-NEXT: mul.d $a5, $a1, $a5
-; LA64-NEXT: add.d $a5, $a7, $a5
-; LA64-NEXT: mul.d $a2, $a2, $a3
-; LA64-NEXT: mul.d $a1, $a1, $a4
-; LA64-NEXT: st.d $a1, $a0, 16
-; LA64-NEXT: st.d $a2, $a0, 0
-; LA64-NEXT: st.d $a5, $a0, 24
-; LA64-NEXT: st.d $a6, $a0, 8
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_q_du_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.q.du.d $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -1147,117 +543,13 @@
}
define void @vmulwev_q_du_d_1(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwev_q_du_d_1:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvpickve2gr.w $a6, $xr0, 1
-; LA32-NEXT: xvpickve2gr.w $a7, $xr0, 5
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 5
-; LA32-NEXT: xvpickve2gr.w $a3, $xr1, 4
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 1
-; LA32-NEXT: xvpickve2gr.w $a4, $xr1, 0
-; LA32-NEXT: srai.w $t1, $a7, 31
-; LA32-NEXT: srai.w $t2, $a6, 31
-; LA32-NEXT: mulh.wu $t3, $a2, $a4
-; LA32-NEXT: mul.w $t4, $a6, $a4
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: sltu $t4, $t3, $t4
-; LA32-NEXT: mulh.wu $t5, $a6, $a4
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: mul.w $t5, $a2, $t0
-; LA32-NEXT: add.w $t3, $t5, $t3
-; LA32-NEXT: sltu $t5, $t3, $t5
-; LA32-NEXT: mulh.wu $t6, $a2, $t0
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: add.w $t5, $t4, $t5
-; LA32-NEXT: mul.w $t6, $a6, $t0
-; LA32-NEXT: add.w $t7, $t6, $t5
-; LA32-NEXT: mul.w $t8, $a4, $t2
-; LA32-NEXT: add.w $fp, $t7, $t8
-; LA32-NEXT: sltu $s0, $fp, $t7
-; LA32-NEXT: sltu $t6, $t7, $t6
-; LA32-NEXT: sltu $t4, $t5, $t4
-; LA32-NEXT: mulh.wu $a6, $a6, $t0
-; LA32-NEXT: add.w $a6, $a6, $t4
-; LA32-NEXT: add.w $a6, $a6, $t6
-; LA32-NEXT: mulh.wu $t4, $a4, $t2
-; LA32-NEXT: add.w $t4, $t4, $t8
-; LA32-NEXT: mul.w $t0, $t0, $t2
-; LA32-NEXT: add.w $t0, $t4, $t0
-; LA32-NEXT: add.w $a6, $a6, $t0
-; LA32-NEXT: add.w $a6, $a6, $s0
-; LA32-NEXT: mulh.wu $t0, $a1, $a3
-; LA32-NEXT: mul.w $t2, $a7, $a3
-; LA32-NEXT: add.w $t0, $t2, $t0
-; LA32-NEXT: sltu $t2, $t0, $t2
-; LA32-NEXT: mulh.wu $t4, $a7, $a3
-; LA32-NEXT: add.w $t2, $t4, $t2
-; LA32-NEXT: mul.w $t4, $a1, $a5
-; LA32-NEXT: add.w $t0, $t4, $t0
-; LA32-NEXT: sltu $t4, $t0, $t4
-; LA32-NEXT: mulh.wu $t5, $a1, $a5
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: add.w $t4, $t2, $t4
-; LA32-NEXT: mul.w $t5, $a7, $a5
-; LA32-NEXT: add.w $t6, $t5, $t4
-; LA32-NEXT: mul.w $t7, $a3, $t1
-; LA32-NEXT: add.w $t8, $t6, $t7
-; LA32-NEXT: sltu $s0, $t8, $t6
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: sltu $t2, $t4, $t2
-; LA32-NEXT: mulh.wu $a7, $a7, $a5
-; LA32-NEXT: add.w $a7, $a7, $t2
-; LA32-NEXT: add.w $a7, $a7, $t5
-; LA32-NEXT: mulh.wu $t2, $a3, $t1
-; LA32-NEXT: add.w $t2, $t2, $t7
-; LA32-NEXT: mul.w $a5, $a5, $t1
-; LA32-NEXT: add.w $a5, $t2, $a5
-; LA32-NEXT: add.w $a5, $a7, $a5
-; LA32-NEXT: add.w $a5, $a5, $s0
-; LA32-NEXT: mul.w $a2, $a2, $a4
-; LA32-NEXT: mul.w $a1, $a1, $a3
-; LA32-NEXT: st.w $a1, $a0, 16
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $t0, $a0, 20
-; LA32-NEXT: st.w $t3, $a0, 4
-; LA32-NEXT: st.w $t8, $a0, 24
-; LA32-NEXT: st.w $fp, $a0, 8
-; LA32-NEXT: st.w $a5, $a0, 28
-; LA32-NEXT: st.w $a6, $a0, 12
-; LA32-NEXT: ld.w $s0, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 16
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwev_q_du_d_1:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 2
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 2
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 0
-; LA64-NEXT: srai.d $a5, $a2, 63
-; LA64-NEXT: srai.d $a6, $a1, 63
-; LA64-NEXT: mulh.du $a7, $a1, $a4
-; LA64-NEXT: mul.d $a6, $a6, $a4
-; LA64-NEXT: add.d $a6, $a7, $a6
-; LA64-NEXT: mulh.du $a7, $a2, $a3
-; LA64-NEXT: mul.d $a5, $a5, $a3
-; LA64-NEXT: add.d $a5, $a7, $a5
-; LA64-NEXT: mul.d $a1, $a1, $a4
-; LA64-NEXT: mul.d $a2, $a2, $a3
-; LA64-NEXT: st.d $a2, $a0, 16
-; LA64-NEXT: st.d $a1, $a0, 0
-; LA64-NEXT: st.d $a5, $a0, 24
-; LA64-NEXT: st.d $a6, $a0, 8
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwev_q_du_d_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwev.q.du.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -1331,117 +623,13 @@
}
define void @vmulwod_q_du_d_1(ptr %res, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: vmulwod_q_du_d_1:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $sp, $sp, -16
-; LA32-NEXT: st.w $fp, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT: st.w $s0, $sp, 8 # 4-byte Folded Spill
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 2
-; LA32-NEXT: xvpickve2gr.w $a6, $xr0, 3
-; LA32-NEXT: xvpickve2gr.w $a7, $xr0, 7
-; LA32-NEXT: xvpickve2gr.w $a5, $xr1, 7
-; LA32-NEXT: xvpickve2gr.w $a3, $xr1, 6
-; LA32-NEXT: xvpickve2gr.w $t0, $xr1, 3
-; LA32-NEXT: xvpickve2gr.w $a4, $xr1, 2
-; LA32-NEXT: srai.w $t1, $a7, 31
-; LA32-NEXT: srai.w $t2, $a6, 31
-; LA32-NEXT: mulh.wu $t3, $a2, $a4
-; LA32-NEXT: mul.w $t4, $a6, $a4
-; LA32-NEXT: add.w $t3, $t4, $t3
-; LA32-NEXT: sltu $t4, $t3, $t4
-; LA32-NEXT: mulh.wu $t5, $a6, $a4
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: mul.w $t5, $a2, $t0
-; LA32-NEXT: add.w $t3, $t5, $t3
-; LA32-NEXT: sltu $t5, $t3, $t5
-; LA32-NEXT: mulh.wu $t6, $a2, $t0
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: add.w $t5, $t4, $t5
-; LA32-NEXT: mul.w $t6, $a6, $t0
-; LA32-NEXT: add.w $t7, $t6, $t5
-; LA32-NEXT: mul.w $t8, $a4, $t2
-; LA32-NEXT: add.w $fp, $t7, $t8
-; LA32-NEXT: sltu $s0, $fp, $t7
-; LA32-NEXT: sltu $t6, $t7, $t6
-; LA32-NEXT: sltu $t4, $t5, $t4
-; LA32-NEXT: mulh.wu $a6, $a6, $t0
-; LA32-NEXT: add.w $a6, $a6, $t4
-; LA32-NEXT: add.w $a6, $a6, $t6
-; LA32-NEXT: mulh.wu $t4, $a4, $t2
-; LA32-NEXT: add.w $t4, $t4, $t8
-; LA32-NEXT: mul.w $t0, $t0, $t2
-; LA32-NEXT: add.w $t0, $t4, $t0
-; LA32-NEXT: add.w $a6, $a6, $t0
-; LA32-NEXT: add.w $a6, $a6, $s0
-; LA32-NEXT: mulh.wu $t0, $a1, $a3
-; LA32-NEXT: mul.w $t2, $a7, $a3
-; LA32-NEXT: add.w $t0, $t2, $t0
-; LA32-NEXT: sltu $t2, $t0, $t2
-; LA32-NEXT: mulh.wu $t4, $a7, $a3
-; LA32-NEXT: add.w $t2, $t4, $t2
-; LA32-NEXT: mul.w $t4, $a1, $a5
-; LA32-NEXT: add.w $t0, $t4, $t0
-; LA32-NEXT: sltu $t4, $t0, $t4
-; LA32-NEXT: mulh.wu $t5, $a1, $a5
-; LA32-NEXT: add.w $t4, $t5, $t4
-; LA32-NEXT: add.w $t4, $t2, $t4
-; LA32-NEXT: mul.w $t5, $a7, $a5
-; LA32-NEXT: add.w $t6, $t5, $t4
-; LA32-NEXT: mul.w $t7, $a3, $t1
-; LA32-NEXT: add.w $t8, $t6, $t7
-; LA32-NEXT: sltu $s0, $t8, $t6
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: sltu $t2, $t4, $t2
-; LA32-NEXT: mulh.wu $a7, $a7, $a5
-; LA32-NEXT: add.w $a7, $a7, $t2
-; LA32-NEXT: add.w $a7, $a7, $t5
-; LA32-NEXT: mulh.wu $t2, $a3, $t1
-; LA32-NEXT: add.w $t2, $t2, $t7
-; LA32-NEXT: mul.w $a5, $a5, $t1
-; LA32-NEXT: add.w $a5, $t2, $a5
-; LA32-NEXT: add.w $a5, $a7, $a5
-; LA32-NEXT: add.w $a5, $a5, $s0
-; LA32-NEXT: mul.w $a2, $a2, $a4
-; LA32-NEXT: mul.w $a1, $a1, $a3
-; LA32-NEXT: st.w $a1, $a0, 16
-; LA32-NEXT: st.w $a2, $a0, 0
-; LA32-NEXT: st.w $t0, $a0, 20
-; LA32-NEXT: st.w $t3, $a0, 4
-; LA32-NEXT: st.w $t8, $a0, 24
-; LA32-NEXT: st.w $fp, $a0, 8
-; LA32-NEXT: st.w $a5, $a0, 28
-; LA32-NEXT: st.w $a6, $a0, 12
-; LA32-NEXT: ld.w $s0, $sp, 8 # 4-byte Folded Reload
-; LA32-NEXT: ld.w $fp, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 16
-; LA32-NEXT: ret
-;
-; LA64-LABEL: vmulwod_q_du_d_1:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvld $xr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1
-; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 3
-; LA64-NEXT: xvpickve2gr.d $a3, $xr1, 3
-; LA64-NEXT: xvpickve2gr.d $a4, $xr1, 1
-; LA64-NEXT: srai.d $a5, $a2, 63
-; LA64-NEXT: srai.d $a6, $a1, 63
-; LA64-NEXT: mulh.du $a7, $a1, $a4
-; LA64-NEXT: mul.d $a6, $a6, $a4
-; LA64-NEXT: add.d $a6, $a7, $a6
-; LA64-NEXT: mulh.du $a7, $a2, $a3
-; LA64-NEXT: mul.d $a5, $a5, $a3
-; LA64-NEXT: add.d $a5, $a7, $a5
-; LA64-NEXT: mul.d $a1, $a1, $a4
-; LA64-NEXT: mul.d $a2, $a2, $a3
-; LA64-NEXT: st.d $a2, $a0, 16
-; LA64-NEXT: st.d $a1, $a0, 0
-; LA64-NEXT: st.d $a5, $a0, 24
-; LA64-NEXT: st.d $a6, $a0, 8
-; LA64-NEXT: ret
+; CHECK-LABEL: vmulwod_q_du_d_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvmulwod.q.du.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b