[PowerPC] Implement __int128 vector divide operations

This patch implements __int128 vector divide operations for ISA3.1.

Differential Revision: https://reviews.llvm.org/D85453
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 22744ad..51fd3d2 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -3368,6 +3368,18 @@
 }
 #endif
 
+#ifdef __POWER10_VECTOR__
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_div(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __a / __b;
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_div(vector signed __int128 __a, vector signed __int128 __b) {
+  return __a / __b;
+}
+#endif __POWER10_VECTOR__
+
 /* vec_dss */
 
 #define vec_dss __builtin_altivec_dss
diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index ad63d64..12ec3a6 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -17,6 +17,7 @@
 vector unsigned int vuia, vuib, vuic;
 vector signed long long vslla, vsllb;
 vector unsigned long long vulla, vullb, vullc;
+vector signed __int128 vsi128a, vsi128b;
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
@@ -69,6 +70,18 @@
   return vec_div(vulla, vullb);
 }
 
+vector unsigned __int128 test_vec_div_u128(void) {
+  // CHECK: udiv <1 x i128>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_div(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_div_s128(void) {
+  // CHECK: sdiv <1 x i128>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_div(vsi128a, vsi128b);
+}
+
 vector signed int test_vec_dive_si(void) {
   // CHECK: @llvm.ppc.altivec.vdivesw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}})
   // CHECK-NEXT: ret <4 x i32>
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 66711f6..3b0acfa 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -888,6 +888,8 @@
       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
+      setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
+      setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
     }
 
     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 55872a4..4e95111 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1285,9 +1285,11 @@
                            [(set v1i128:$vD, (int_ppc_altivec_vmsumcud
                                  v2i64:$vA, v2i64:$vB, v1i128:$vC))]>;
   def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                        "vdivsq $vD, $vA, $vB", IIC_VecGeneral, []>;
+                        "vdivsq $vD, $vA, $vB", IIC_VecGeneral,
+                        [(set v1i128:$vD, (sdiv v1i128:$vA, v1i128:$vB))]>;
   def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                        "vdivuq $vD, $vA, $vB", IIC_VecGeneral, []>;
+                        "vdivuq $vD, $vA, $vB", IIC_VecGeneral,
+                        [(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>;
   def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                          "vdivesq $vD, $vA, $vB", IIC_VecGeneral, []>;
   def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll b/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
index dc21b4f..b5f36a7 100644
--- a/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
@@ -76,6 +76,24 @@
   ret <4 x i32> %div
 }
 
+define <1 x i128> @test_vdivsq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vdivsq:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vdivsq v2, v2, v3
+; CHECK-NEXT:    blr
+  %tmp = sdiv <1 x i128> %x, %y
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vdivuq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vdivuq:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vdivuq v2, v2, v3
+; CHECK-NEXT:    blr
+  %tmp = udiv <1 x i128> %x, %y
+  ret <1 x i128> %tmp
+}
+
 define <2 x i64> @test_vdivesd(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vdivesd:
 ; CHECK:       # %bb.0: # %entry