blob: cdbbabe3e3b05b8b52ef35d9b22f7000e06b6597 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64-- 2>&1 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- | %ptxas-verify %}
define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: srem_i128(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<20>;
; CHECK-NEXT: .reg .b32 %r<12>;
; CHECK-NEXT: .reg .b64 %rd<79>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
; CHECK-NEXT: ld.param.v2.b64 {%rd8, %rd9}, [srem_i128_param_0];
; CHECK-NEXT: ld.param.v2.b64 {%rd10, %rd11}, [srem_i128_param_1];
; CHECK-NEXT: shr.s64 %rd1, %rd9, 63;
; CHECK-NEXT: sub.cc.s64 %rd12, 0, %rd8;
; CHECK-NEXT: subc.cc.s64 %rd13, 0, %rd9;
; CHECK-NEXT: setp.lt.s64 %p1, %rd9, 0;
; CHECK-NEXT: selp.b64 %rd3, %rd13, %rd9, %p1;
; CHECK-NEXT: selp.b64 %rd2, %rd12, %rd8, %p1;
; CHECK-NEXT: sub.cc.s64 %rd14, 0, %rd10;
; CHECK-NEXT: subc.cc.s64 %rd15, 0, %rd11;
; CHECK-NEXT: setp.lt.s64 %p2, %rd11, 0;
; CHECK-NEXT: selp.b64 %rd5, %rd15, %rd11, %p2;
; CHECK-NEXT: selp.b64 %rd4, %rd14, %rd10, %p2;
; CHECK-NEXT: or.b64 %rd16, %rd4, %rd5;
; CHECK-NEXT: setp.eq.b64 %p3, %rd16, 0;
; CHECK-NEXT: or.b64 %rd17, %rd2, %rd3;
; CHECK-NEXT: setp.eq.b64 %p4, %rd17, 0;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
; CHECK-NEXT: setp.ne.b64 %p6, %rd5, 0;
; CHECK-NEXT: clz.b64 %r1, %rd5;
; CHECK-NEXT: cvt.u64.u32 %rd18, %r1;
; CHECK-NEXT: clz.b64 %r2, %rd4;
; CHECK-NEXT: cvt.u64.u32 %rd19, %r2;
; CHECK-NEXT: add.s64 %rd20, %rd19, 64;
; CHECK-NEXT: selp.b64 %rd21, %rd18, %rd20, %p6;
; CHECK-NEXT: setp.ne.b64 %p7, %rd3, 0;
; CHECK-NEXT: clz.b64 %r3, %rd3;
; CHECK-NEXT: cvt.u64.u32 %rd22, %r3;
; CHECK-NEXT: clz.b64 %r4, %rd2;
; CHECK-NEXT: cvt.u64.u32 %rd23, %r4;
; CHECK-NEXT: add.s64 %rd24, %rd23, 64;
; CHECK-NEXT: selp.b64 %rd25, %rd22, %rd24, %p7;
; CHECK-NEXT: mov.b64 %rd70, 0;
; CHECK-NEXT: sub.cc.s64 %rd26, %rd21, %rd25;
; CHECK-NEXT: subc.cc.s64 %rd27, %rd70, 0;
; CHECK-NEXT: setp.gt.u64 %p8, %rd26, 127;
; CHECK-NEXT: setp.eq.b64 %p9, %rd27, 0;
; CHECK-NEXT: and.pred %p10, %p9, %p8;
; CHECK-NEXT: setp.ne.b64 %p11, %rd27, 0;
; CHECK-NEXT: or.pred %p12, %p10, %p11;
; CHECK-NEXT: or.pred %p13, %p5, %p12;
; CHECK-NEXT: xor.b64 %rd28, %rd26, 127;
; CHECK-NEXT: or.b64 %rd29, %rd28, %rd27;
; CHECK-NEXT: setp.eq.b64 %p14, %rd29, 0;
; CHECK-NEXT: selp.b64 %rd78, 0, %rd3, %p13;
; CHECK-NEXT: selp.b64 %rd77, 0, %rd2, %p13;
; CHECK-NEXT: or.pred %p15, %p13, %p14;
; CHECK-NEXT: @%p15 bra $L__BB0_5;
; CHECK-NEXT: // %bb.3: // %udiv-bb1
; CHECK-NEXT: add.cc.s64 %rd71, %rd26, 1;
; CHECK-NEXT: addc.cc.s64 %rd72, %rd27, 0;
; CHECK-NEXT: or.b64 %rd30, %rd71, %rd72;
; CHECK-NEXT: setp.eq.b64 %p16, %rd30, 0;
; CHECK-NEXT: cvt.u32.u64 %r5, %rd26;
; CHECK-NEXT: sub.s32 %r6, 127, %r5;
; CHECK-NEXT: shl.b64 %rd31, %rd3, %r6;
; CHECK-NEXT: sub.s32 %r7, 64, %r6;
; CHECK-NEXT: shr.u64 %rd32, %rd2, %r7;
; CHECK-NEXT: or.b64 %rd33, %rd31, %rd32;
; CHECK-NEXT: sub.s32 %r8, 63, %r5;
; CHECK-NEXT: shl.b64 %rd34, %rd2, %r8;
; CHECK-NEXT: setp.gt.s32 %p17, %r6, 63;
; CHECK-NEXT: selp.b64 %rd76, %rd34, %rd33, %p17;
; CHECK-NEXT: shl.b64 %rd75, %rd2, %r6;
; CHECK-NEXT: mov.b64 %rd69, %rd70;
; CHECK-NEXT: @%p16 bra $L__BB0_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r9, %rd71;
; CHECK-NEXT: shr.u64 %rd35, %rd2, %r9;
; CHECK-NEXT: sub.s32 %r10, 64, %r9;
; CHECK-NEXT: shl.b64 %rd36, %rd3, %r10;
; CHECK-NEXT: or.b64 %rd37, %rd35, %rd36;
; CHECK-NEXT: add.s32 %r11, %r9, -64;
; CHECK-NEXT: shr.u64 %rd38, %rd3, %r11;
; CHECK-NEXT: setp.gt.s32 %p18, %r9, 63;
; CHECK-NEXT: selp.b64 %rd73, %rd38, %rd37, %p18;
; CHECK-NEXT: shr.u64 %rd74, %rd3, %r9;
; CHECK-NEXT: add.cc.s64 %rd6, %rd4, -1;
; CHECK-NEXT: addc.cc.s64 %rd7, %rd5, -1;
; CHECK-NEXT: mov.b64 %rd69, 0;
; CHECK-NEXT: mov.b64 %rd70, %rd69;
; CHECK-NEXT: $L__BB0_2: // %udiv-do-while
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u64 %rd39, %rd73, 63;
; CHECK-NEXT: shl.b64 %rd40, %rd74, 1;
; CHECK-NEXT: or.b64 %rd41, %rd40, %rd39;
; CHECK-NEXT: shl.b64 %rd42, %rd73, 1;
; CHECK-NEXT: shr.u64 %rd43, %rd76, 63;
; CHECK-NEXT: or.b64 %rd44, %rd42, %rd43;
; CHECK-NEXT: shr.u64 %rd45, %rd75, 63;
; CHECK-NEXT: shl.b64 %rd46, %rd76, 1;
; CHECK-NEXT: or.b64 %rd47, %rd46, %rd45;
; CHECK-NEXT: shl.b64 %rd48, %rd75, 1;
; CHECK-NEXT: or.b64 %rd75, %rd70, %rd48;
; CHECK-NEXT: or.b64 %rd76, %rd69, %rd47;
; CHECK-NEXT: sub.cc.s64 %rd49, %rd6, %rd44;
; CHECK-NEXT: subc.cc.s64 %rd50, %rd7, %rd41;
; CHECK-NEXT: shr.s64 %rd51, %rd50, 63;
; CHECK-NEXT: and.b64 %rd70, %rd51, 1;
; CHECK-NEXT: and.b64 %rd52, %rd51, %rd4;
; CHECK-NEXT: and.b64 %rd53, %rd51, %rd5;
; CHECK-NEXT: sub.cc.s64 %rd73, %rd44, %rd52;
; CHECK-NEXT: subc.cc.s64 %rd74, %rd41, %rd53;
; CHECK-NEXT: add.cc.s64 %rd71, %rd71, -1;
; CHECK-NEXT: addc.cc.s64 %rd72, %rd72, -1;
; CHECK-NEXT: or.b64 %rd54, %rd71, %rd72;
; CHECK-NEXT: setp.eq.b64 %p19, %rd54, 0;
; CHECK-NEXT: @%p19 bra $L__BB0_4;
; CHECK-NEXT: bra.uni $L__BB0_2;
; CHECK-NEXT: $L__BB0_4: // %udiv-loop-exit
; CHECK-NEXT: shr.u64 %rd55, %rd75, 63;
; CHECK-NEXT: shl.b64 %rd56, %rd76, 1;
; CHECK-NEXT: or.b64 %rd57, %rd56, %rd55;
; CHECK-NEXT: shl.b64 %rd58, %rd75, 1;
; CHECK-NEXT: or.b64 %rd77, %rd70, %rd58;
; CHECK-NEXT: or.b64 %rd78, %rd69, %rd57;
; CHECK-NEXT: $L__BB0_5: // %udiv-end
; CHECK-NEXT: mul.hi.u64 %rd59, %rd4, %rd77;
; CHECK-NEXT: mad.lo.s64 %rd60, %rd4, %rd78, %rd59;
; CHECK-NEXT: mad.lo.s64 %rd61, %rd5, %rd77, %rd60;
; CHECK-NEXT: mul.lo.s64 %rd62, %rd4, %rd77;
; CHECK-NEXT: sub.cc.s64 %rd63, %rd2, %rd62;
; CHECK-NEXT: subc.cc.s64 %rd64, %rd3, %rd61;
; CHECK-NEXT: xor.b64 %rd65, %rd63, %rd1;
; CHECK-NEXT: xor.b64 %rd66, %rd64, %rd1;
; CHECK-NEXT: sub.cc.s64 %rd67, %rd65, %rd1;
; CHECK-NEXT: subc.cc.s64 %rd68, %rd66, %rd1;
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd67, %rd68};
; CHECK-NEXT: ret;
%div = srem i128 %lhs, %rhs
ret i128 %div
}
define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: urem_i128(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<18>;
; CHECK-NEXT: .reg .b32 %r<12>;
; CHECK-NEXT: .reg .b64 %rd<66>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [urem_i128_param_0];
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [urem_i128_param_1];
; CHECK-NEXT: or.b64 %rd7, %rd1, %rd2;
; CHECK-NEXT: setp.eq.b64 %p1, %rd7, 0;
; CHECK-NEXT: or.b64 %rd8, %rd5, %rd6;
; CHECK-NEXT: setp.eq.b64 %p2, %rd8, 0;
; CHECK-NEXT: or.pred %p3, %p1, %p2;
; CHECK-NEXT: setp.ne.b64 %p4, %rd2, 0;
; CHECK-NEXT: clz.b64 %r1, %rd2;
; CHECK-NEXT: cvt.u64.u32 %rd9, %r1;
; CHECK-NEXT: clz.b64 %r2, %rd1;
; CHECK-NEXT: cvt.u64.u32 %rd10, %r2;
; CHECK-NEXT: add.s64 %rd11, %rd10, 64;
; CHECK-NEXT: selp.b64 %rd12, %rd9, %rd11, %p4;
; CHECK-NEXT: setp.ne.b64 %p5, %rd6, 0;
; CHECK-NEXT: clz.b64 %r3, %rd6;
; CHECK-NEXT: cvt.u64.u32 %rd13, %r3;
; CHECK-NEXT: clz.b64 %r4, %rd5;
; CHECK-NEXT: cvt.u64.u32 %rd14, %r4;
; CHECK-NEXT: add.s64 %rd15, %rd14, 64;
; CHECK-NEXT: selp.b64 %rd16, %rd13, %rd15, %p5;
; CHECK-NEXT: mov.b64 %rd57, 0;
; CHECK-NEXT: sub.cc.s64 %rd17, %rd12, %rd16;
; CHECK-NEXT: subc.cc.s64 %rd18, %rd57, 0;
; CHECK-NEXT: setp.gt.u64 %p6, %rd17, 127;
; CHECK-NEXT: setp.eq.b64 %p7, %rd18, 0;
; CHECK-NEXT: and.pred %p8, %p7, %p6;
; CHECK-NEXT: setp.ne.b64 %p9, %rd18, 0;
; CHECK-NEXT: or.pred %p10, %p8, %p9;
; CHECK-NEXT: or.pred %p11, %p3, %p10;
; CHECK-NEXT: xor.b64 %rd19, %rd17, 127;
; CHECK-NEXT: or.b64 %rd20, %rd19, %rd18;
; CHECK-NEXT: setp.eq.b64 %p12, %rd20, 0;
; CHECK-NEXT: selp.b64 %rd65, 0, %rd6, %p11;
; CHECK-NEXT: selp.b64 %rd64, 0, %rd5, %p11;
; CHECK-NEXT: or.pred %p13, %p11, %p12;
; CHECK-NEXT: @%p13 bra $L__BB1_5;
; CHECK-NEXT: // %bb.3: // %udiv-bb1
; CHECK-NEXT: add.cc.s64 %rd58, %rd17, 1;
; CHECK-NEXT: addc.cc.s64 %rd59, %rd18, 0;
; CHECK-NEXT: or.b64 %rd21, %rd58, %rd59;
; CHECK-NEXT: setp.eq.b64 %p14, %rd21, 0;
; CHECK-NEXT: cvt.u32.u64 %r5, %rd17;
; CHECK-NEXT: sub.s32 %r6, 127, %r5;
; CHECK-NEXT: shl.b64 %rd22, %rd6, %r6;
; CHECK-NEXT: sub.s32 %r7, 64, %r6;
; CHECK-NEXT: shr.u64 %rd23, %rd5, %r7;
; CHECK-NEXT: or.b64 %rd24, %rd22, %rd23;
; CHECK-NEXT: sub.s32 %r8, 63, %r5;
; CHECK-NEXT: shl.b64 %rd25, %rd5, %r8;
; CHECK-NEXT: setp.gt.s32 %p15, %r6, 63;
; CHECK-NEXT: selp.b64 %rd63, %rd25, %rd24, %p15;
; CHECK-NEXT: shl.b64 %rd62, %rd5, %r6;
; CHECK-NEXT: mov.b64 %rd56, %rd57;
; CHECK-NEXT: @%p14 bra $L__BB1_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r9, %rd58;
; CHECK-NEXT: shr.u64 %rd26, %rd5, %r9;
; CHECK-NEXT: sub.s32 %r10, 64, %r9;
; CHECK-NEXT: shl.b64 %rd27, %rd6, %r10;
; CHECK-NEXT: or.b64 %rd28, %rd26, %rd27;
; CHECK-NEXT: add.s32 %r11, %r9, -64;
; CHECK-NEXT: shr.u64 %rd29, %rd6, %r11;
; CHECK-NEXT: setp.gt.s32 %p16, %r9, 63;
; CHECK-NEXT: selp.b64 %rd60, %rd29, %rd28, %p16;
; CHECK-NEXT: shr.u64 %rd61, %rd6, %r9;
; CHECK-NEXT: add.cc.s64 %rd3, %rd1, -1;
; CHECK-NEXT: addc.cc.s64 %rd4, %rd2, -1;
; CHECK-NEXT: mov.b64 %rd56, 0;
; CHECK-NEXT: mov.b64 %rd57, %rd56;
; CHECK-NEXT: $L__BB1_2: // %udiv-do-while
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u64 %rd30, %rd60, 63;
; CHECK-NEXT: shl.b64 %rd31, %rd61, 1;
; CHECK-NEXT: or.b64 %rd32, %rd31, %rd30;
; CHECK-NEXT: shl.b64 %rd33, %rd60, 1;
; CHECK-NEXT: shr.u64 %rd34, %rd63, 63;
; CHECK-NEXT: or.b64 %rd35, %rd33, %rd34;
; CHECK-NEXT: shr.u64 %rd36, %rd62, 63;
; CHECK-NEXT: shl.b64 %rd37, %rd63, 1;
; CHECK-NEXT: or.b64 %rd38, %rd37, %rd36;
; CHECK-NEXT: shl.b64 %rd39, %rd62, 1;
; CHECK-NEXT: or.b64 %rd62, %rd57, %rd39;
; CHECK-NEXT: or.b64 %rd63, %rd56, %rd38;
; CHECK-NEXT: sub.cc.s64 %rd40, %rd3, %rd35;
; CHECK-NEXT: subc.cc.s64 %rd41, %rd4, %rd32;
; CHECK-NEXT: shr.s64 %rd42, %rd41, 63;
; CHECK-NEXT: and.b64 %rd57, %rd42, 1;
; CHECK-NEXT: and.b64 %rd43, %rd42, %rd1;
; CHECK-NEXT: and.b64 %rd44, %rd42, %rd2;
; CHECK-NEXT: sub.cc.s64 %rd60, %rd35, %rd43;
; CHECK-NEXT: subc.cc.s64 %rd61, %rd32, %rd44;
; CHECK-NEXT: add.cc.s64 %rd58, %rd58, -1;
; CHECK-NEXT: addc.cc.s64 %rd59, %rd59, -1;
; CHECK-NEXT: or.b64 %rd45, %rd58, %rd59;
; CHECK-NEXT: setp.eq.b64 %p17, %rd45, 0;
; CHECK-NEXT: @%p17 bra $L__BB1_4;
; CHECK-NEXT: bra.uni $L__BB1_2;
; CHECK-NEXT: $L__BB1_4: // %udiv-loop-exit
; CHECK-NEXT: shr.u64 %rd46, %rd62, 63;
; CHECK-NEXT: shl.b64 %rd47, %rd63, 1;
; CHECK-NEXT: or.b64 %rd48, %rd47, %rd46;
; CHECK-NEXT: shl.b64 %rd49, %rd62, 1;
; CHECK-NEXT: or.b64 %rd64, %rd57, %rd49;
; CHECK-NEXT: or.b64 %rd65, %rd56, %rd48;
; CHECK-NEXT: $L__BB1_5: // %udiv-end
; CHECK-NEXT: mul.hi.u64 %rd50, %rd1, %rd64;
; CHECK-NEXT: mad.lo.s64 %rd51, %rd1, %rd65, %rd50;
; CHECK-NEXT: mad.lo.s64 %rd52, %rd2, %rd64, %rd51;
; CHECK-NEXT: mul.lo.s64 %rd53, %rd1, %rd64;
; CHECK-NEXT: sub.cc.s64 %rd54, %rd5, %rd53;
; CHECK-NEXT: subc.cc.s64 %rd55, %rd6, %rd52;
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd54, %rd55};
; CHECK-NEXT: ret;
%div = urem i128 %lhs, %rhs
ret i128 %div
}
define i128 @srem_i128_pow2k(i128 %lhs) {
; CHECK-LABEL: srem_i128_pow2k(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<10>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [srem_i128_pow2k_param_0];
; CHECK-NEXT: shr.s64 %rd3, %rd2, 63;
; CHECK-NEXT: shr.u64 %rd4, %rd3, 31;
; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd4;
; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, 0;
; CHECK-NEXT: and.b64 %rd7, %rd5, -8589934592;
; CHECK-NEXT: sub.cc.s64 %rd8, %rd1, %rd7;
; CHECK-NEXT: subc.cc.s64 %rd9, %rd2, %rd6;
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd8, %rd9};
; CHECK-NEXT: ret;
%div = srem i128 %lhs, 8589934592
ret i128 %div
}
define i128 @urem_i128_pow2k(i128 %lhs) {
; CHECK-LABEL: urem_i128_pow2k(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [urem_i128_pow2k_param_0];
; CHECK-NEXT: and.b64 %rd3, %rd1, 8589934591;
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, 0};
; CHECK-NEXT: ret;
%div = urem i128 %lhs, 8589934592
ret i128 %div
}
define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: sdiv_i128(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<20>;
; CHECK-NEXT: .reg .b32 %r<12>;
; CHECK-NEXT: .reg .b64 %rd<74>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
; CHECK-NEXT: ld.param.v2.b64 {%rd8, %rd9}, [sdiv_i128_param_0];
; CHECK-NEXT: ld.param.v2.b64 {%rd10, %rd11}, [sdiv_i128_param_1];
; CHECK-NEXT: sub.cc.s64 %rd12, 0, %rd8;
; CHECK-NEXT: subc.cc.s64 %rd13, 0, %rd9;
; CHECK-NEXT: setp.lt.s64 %p1, %rd9, 0;
; CHECK-NEXT: selp.b64 %rd2, %rd13, %rd9, %p1;
; CHECK-NEXT: selp.b64 %rd1, %rd12, %rd8, %p1;
; CHECK-NEXT: sub.cc.s64 %rd14, 0, %rd10;
; CHECK-NEXT: subc.cc.s64 %rd15, 0, %rd11;
; CHECK-NEXT: setp.lt.s64 %p2, %rd11, 0;
; CHECK-NEXT: selp.b64 %rd4, %rd15, %rd11, %p2;
; CHECK-NEXT: selp.b64 %rd3, %rd14, %rd10, %p2;
; CHECK-NEXT: xor.b64 %rd16, %rd11, %rd9;
; CHECK-NEXT: shr.s64 %rd5, %rd16, 63;
; CHECK-NEXT: or.b64 %rd17, %rd3, %rd4;
; CHECK-NEXT: setp.eq.b64 %p3, %rd17, 0;
; CHECK-NEXT: or.b64 %rd18, %rd1, %rd2;
; CHECK-NEXT: setp.eq.b64 %p4, %rd18, 0;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
; CHECK-NEXT: setp.ne.b64 %p6, %rd4, 0;
; CHECK-NEXT: clz.b64 %r1, %rd4;
; CHECK-NEXT: cvt.u64.u32 %rd19, %r1;
; CHECK-NEXT: clz.b64 %r2, %rd3;
; CHECK-NEXT: cvt.u64.u32 %rd20, %r2;
; CHECK-NEXT: add.s64 %rd21, %rd20, 64;
; CHECK-NEXT: selp.b64 %rd22, %rd19, %rd21, %p6;
; CHECK-NEXT: setp.ne.b64 %p7, %rd2, 0;
; CHECK-NEXT: clz.b64 %r3, %rd2;
; CHECK-NEXT: cvt.u64.u32 %rd23, %r3;
; CHECK-NEXT: clz.b64 %r4, %rd1;
; CHECK-NEXT: cvt.u64.u32 %rd24, %r4;
; CHECK-NEXT: add.s64 %rd25, %rd24, 64;
; CHECK-NEXT: selp.b64 %rd26, %rd23, %rd25, %p7;
; CHECK-NEXT: mov.b64 %rd65, 0;
; CHECK-NEXT: sub.cc.s64 %rd27, %rd22, %rd26;
; CHECK-NEXT: subc.cc.s64 %rd28, %rd65, 0;
; CHECK-NEXT: setp.gt.u64 %p8, %rd27, 127;
; CHECK-NEXT: setp.eq.b64 %p9, %rd28, 0;
; CHECK-NEXT: and.pred %p10, %p9, %p8;
; CHECK-NEXT: setp.ne.b64 %p11, %rd28, 0;
; CHECK-NEXT: or.pred %p12, %p10, %p11;
; CHECK-NEXT: or.pred %p13, %p5, %p12;
; CHECK-NEXT: xor.b64 %rd29, %rd27, 127;
; CHECK-NEXT: or.b64 %rd30, %rd29, %rd28;
; CHECK-NEXT: setp.eq.b64 %p14, %rd30, 0;
; CHECK-NEXT: selp.b64 %rd73, 0, %rd2, %p13;
; CHECK-NEXT: selp.b64 %rd72, 0, %rd1, %p13;
; CHECK-NEXT: or.pred %p15, %p13, %p14;
; CHECK-NEXT: @%p15 bra $L__BB4_5;
; CHECK-NEXT: // %bb.3: // %udiv-bb1
; CHECK-NEXT: add.cc.s64 %rd66, %rd27, 1;
; CHECK-NEXT: addc.cc.s64 %rd67, %rd28, 0;
; CHECK-NEXT: or.b64 %rd31, %rd66, %rd67;
; CHECK-NEXT: setp.eq.b64 %p16, %rd31, 0;
; CHECK-NEXT: cvt.u32.u64 %r5, %rd27;
; CHECK-NEXT: sub.s32 %r6, 127, %r5;
; CHECK-NEXT: shl.b64 %rd32, %rd2, %r6;
; CHECK-NEXT: sub.s32 %r7, 64, %r6;
; CHECK-NEXT: shr.u64 %rd33, %rd1, %r7;
; CHECK-NEXT: or.b64 %rd34, %rd32, %rd33;
; CHECK-NEXT: sub.s32 %r8, 63, %r5;
; CHECK-NEXT: shl.b64 %rd35, %rd1, %r8;
; CHECK-NEXT: setp.gt.s32 %p17, %r6, 63;
; CHECK-NEXT: selp.b64 %rd71, %rd35, %rd34, %p17;
; CHECK-NEXT: shl.b64 %rd70, %rd1, %r6;
; CHECK-NEXT: mov.b64 %rd64, %rd65;
; CHECK-NEXT: @%p16 bra $L__BB4_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r9, %rd66;
; CHECK-NEXT: shr.u64 %rd36, %rd1, %r9;
; CHECK-NEXT: sub.s32 %r10, 64, %r9;
; CHECK-NEXT: shl.b64 %rd37, %rd2, %r10;
; CHECK-NEXT: or.b64 %rd38, %rd36, %rd37;
; CHECK-NEXT: add.s32 %r11, %r9, -64;
; CHECK-NEXT: shr.u64 %rd39, %rd2, %r11;
; CHECK-NEXT: setp.gt.s32 %p18, %r9, 63;
; CHECK-NEXT: selp.b64 %rd68, %rd39, %rd38, %p18;
; CHECK-NEXT: shr.u64 %rd69, %rd2, %r9;
; CHECK-NEXT: add.cc.s64 %rd6, %rd3, -1;
; CHECK-NEXT: addc.cc.s64 %rd7, %rd4, -1;
; CHECK-NEXT: mov.b64 %rd64, 0;
; CHECK-NEXT: mov.b64 %rd65, %rd64;
; CHECK-NEXT: $L__BB4_2: // %udiv-do-while
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u64 %rd40, %rd68, 63;
; CHECK-NEXT: shl.b64 %rd41, %rd69, 1;
; CHECK-NEXT: or.b64 %rd42, %rd41, %rd40;
; CHECK-NEXT: shl.b64 %rd43, %rd68, 1;
; CHECK-NEXT: shr.u64 %rd44, %rd71, 63;
; CHECK-NEXT: or.b64 %rd45, %rd43, %rd44;
; CHECK-NEXT: shr.u64 %rd46, %rd70, 63;
; CHECK-NEXT: shl.b64 %rd47, %rd71, 1;
; CHECK-NEXT: or.b64 %rd48, %rd47, %rd46;
; CHECK-NEXT: shl.b64 %rd49, %rd70, 1;
; CHECK-NEXT: or.b64 %rd70, %rd65, %rd49;
; CHECK-NEXT: or.b64 %rd71, %rd64, %rd48;
; CHECK-NEXT: sub.cc.s64 %rd50, %rd6, %rd45;
; CHECK-NEXT: subc.cc.s64 %rd51, %rd7, %rd42;
; CHECK-NEXT: shr.s64 %rd52, %rd51, 63;
; CHECK-NEXT: and.b64 %rd65, %rd52, 1;
; CHECK-NEXT: and.b64 %rd53, %rd52, %rd3;
; CHECK-NEXT: and.b64 %rd54, %rd52, %rd4;
; CHECK-NEXT: sub.cc.s64 %rd68, %rd45, %rd53;
; CHECK-NEXT: subc.cc.s64 %rd69, %rd42, %rd54;
; CHECK-NEXT: add.cc.s64 %rd66, %rd66, -1;
; CHECK-NEXT: addc.cc.s64 %rd67, %rd67, -1;
; CHECK-NEXT: or.b64 %rd55, %rd66, %rd67;
; CHECK-NEXT: setp.eq.b64 %p19, %rd55, 0;
; CHECK-NEXT: @%p19 bra $L__BB4_4;
; CHECK-NEXT: bra.uni $L__BB4_2;
; CHECK-NEXT: $L__BB4_4: // %udiv-loop-exit
; CHECK-NEXT: shr.u64 %rd56, %rd70, 63;
; CHECK-NEXT: shl.b64 %rd57, %rd71, 1;
; CHECK-NEXT: or.b64 %rd58, %rd57, %rd56;
; CHECK-NEXT: shl.b64 %rd59, %rd70, 1;
; CHECK-NEXT: or.b64 %rd72, %rd65, %rd59;
; CHECK-NEXT: or.b64 %rd73, %rd64, %rd58;
; CHECK-NEXT: $L__BB4_5: // %udiv-end
; CHECK-NEXT: xor.b64 %rd60, %rd72, %rd5;
; CHECK-NEXT: xor.b64 %rd61, %rd73, %rd5;
; CHECK-NEXT: sub.cc.s64 %rd62, %rd60, %rd5;
; CHECK-NEXT: subc.cc.s64 %rd63, %rd61, %rd5;
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd62, %rd63};
; CHECK-NEXT: ret;
%div = sdiv i128 %lhs, %rhs
ret i128 %div
}
define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: udiv_i128(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<18>;
; CHECK-NEXT: .reg .b32 %r<12>;
; CHECK-NEXT: .reg .b64 %rd<60>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [udiv_i128_param_0];
; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [udiv_i128_param_1];
; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6;
; CHECK-NEXT: setp.eq.b64 %p1, %rd7, 0;
; CHECK-NEXT: or.b64 %rd8, %rd3, %rd4;
; CHECK-NEXT: setp.eq.b64 %p2, %rd8, 0;
; CHECK-NEXT: or.pred %p3, %p1, %p2;
; CHECK-NEXT: setp.ne.b64 %p4, %rd6, 0;
; CHECK-NEXT: clz.b64 %r1, %rd6;
; CHECK-NEXT: cvt.u64.u32 %rd9, %r1;
; CHECK-NEXT: clz.b64 %r2, %rd5;
; CHECK-NEXT: cvt.u64.u32 %rd10, %r2;
; CHECK-NEXT: add.s64 %rd11, %rd10, 64;
; CHECK-NEXT: selp.b64 %rd12, %rd9, %rd11, %p4;
; CHECK-NEXT: setp.ne.b64 %p5, %rd4, 0;
; CHECK-NEXT: clz.b64 %r3, %rd4;
; CHECK-NEXT: cvt.u64.u32 %rd13, %r3;
; CHECK-NEXT: clz.b64 %r4, %rd3;
; CHECK-NEXT: cvt.u64.u32 %rd14, %r4;
; CHECK-NEXT: add.s64 %rd15, %rd14, 64;
; CHECK-NEXT: selp.b64 %rd16, %rd13, %rd15, %p5;
; CHECK-NEXT: mov.b64 %rd51, 0;
; CHECK-NEXT: sub.cc.s64 %rd17, %rd12, %rd16;
; CHECK-NEXT: subc.cc.s64 %rd18, %rd51, 0;
; CHECK-NEXT: setp.gt.u64 %p6, %rd17, 127;
; CHECK-NEXT: setp.eq.b64 %p7, %rd18, 0;
; CHECK-NEXT: and.pred %p8, %p7, %p6;
; CHECK-NEXT: setp.ne.b64 %p9, %rd18, 0;
; CHECK-NEXT: or.pred %p10, %p8, %p9;
; CHECK-NEXT: or.pred %p11, %p3, %p10;
; CHECK-NEXT: xor.b64 %rd19, %rd17, 127;
; CHECK-NEXT: or.b64 %rd20, %rd19, %rd18;
; CHECK-NEXT: setp.eq.b64 %p12, %rd20, 0;
; CHECK-NEXT: selp.b64 %rd59, 0, %rd4, %p11;
; CHECK-NEXT: selp.b64 %rd58, 0, %rd3, %p11;
; CHECK-NEXT: or.pred %p13, %p11, %p12;
; CHECK-NEXT: @%p13 bra $L__BB5_5;
; CHECK-NEXT: // %bb.3: // %udiv-bb1
; CHECK-NEXT: add.cc.s64 %rd52, %rd17, 1;
; CHECK-NEXT: addc.cc.s64 %rd53, %rd18, 0;
; CHECK-NEXT: or.b64 %rd21, %rd52, %rd53;
; CHECK-NEXT: setp.eq.b64 %p14, %rd21, 0;
; CHECK-NEXT: cvt.u32.u64 %r5, %rd17;
; CHECK-NEXT: sub.s32 %r6, 127, %r5;
; CHECK-NEXT: shl.b64 %rd22, %rd4, %r6;
; CHECK-NEXT: sub.s32 %r7, 64, %r6;
; CHECK-NEXT: shr.u64 %rd23, %rd3, %r7;
; CHECK-NEXT: or.b64 %rd24, %rd22, %rd23;
; CHECK-NEXT: sub.s32 %r8, 63, %r5;
; CHECK-NEXT: shl.b64 %rd25, %rd3, %r8;
; CHECK-NEXT: setp.gt.s32 %p15, %r6, 63;
; CHECK-NEXT: selp.b64 %rd57, %rd25, %rd24, %p15;
; CHECK-NEXT: shl.b64 %rd56, %rd3, %r6;
; CHECK-NEXT: mov.b64 %rd50, %rd51;
; CHECK-NEXT: @%p14 bra $L__BB5_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r9, %rd52;
; CHECK-NEXT: shr.u64 %rd26, %rd3, %r9;
; CHECK-NEXT: sub.s32 %r10, 64, %r9;
; CHECK-NEXT: shl.b64 %rd27, %rd4, %r10;
; CHECK-NEXT: or.b64 %rd28, %rd26, %rd27;
; CHECK-NEXT: add.s32 %r11, %r9, -64;
; CHECK-NEXT: shr.u64 %rd29, %rd4, %r11;
; CHECK-NEXT: setp.gt.s32 %p16, %r9, 63;
; CHECK-NEXT: selp.b64 %rd54, %rd29, %rd28, %p16;
; CHECK-NEXT: shr.u64 %rd55, %rd4, %r9;
; CHECK-NEXT: add.cc.s64 %rd1, %rd5, -1;
; CHECK-NEXT: addc.cc.s64 %rd2, %rd6, -1;
; CHECK-NEXT: mov.b64 %rd50, 0;
; CHECK-NEXT: mov.b64 %rd51, %rd50;
; CHECK-NEXT: $L__BB5_2: // %udiv-do-while
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u64 %rd30, %rd54, 63;
; CHECK-NEXT: shl.b64 %rd31, %rd55, 1;
; CHECK-NEXT: or.b64 %rd32, %rd31, %rd30;
; CHECK-NEXT: shl.b64 %rd33, %rd54, 1;
; CHECK-NEXT: shr.u64 %rd34, %rd57, 63;
; CHECK-NEXT: or.b64 %rd35, %rd33, %rd34;
; CHECK-NEXT: shr.u64 %rd36, %rd56, 63;
; CHECK-NEXT: shl.b64 %rd37, %rd57, 1;
; CHECK-NEXT: or.b64 %rd38, %rd37, %rd36;
; CHECK-NEXT: shl.b64 %rd39, %rd56, 1;
; CHECK-NEXT: or.b64 %rd56, %rd51, %rd39;
; CHECK-NEXT: or.b64 %rd57, %rd50, %rd38;
; CHECK-NEXT: sub.cc.s64 %rd40, %rd1, %rd35;
; CHECK-NEXT: subc.cc.s64 %rd41, %rd2, %rd32;
; CHECK-NEXT: shr.s64 %rd42, %rd41, 63;
; CHECK-NEXT: and.b64 %rd51, %rd42, 1;
; CHECK-NEXT: and.b64 %rd43, %rd42, %rd5;
; CHECK-NEXT: and.b64 %rd44, %rd42, %rd6;
; CHECK-NEXT: sub.cc.s64 %rd54, %rd35, %rd43;
; CHECK-NEXT: subc.cc.s64 %rd55, %rd32, %rd44;
; CHECK-NEXT: add.cc.s64 %rd52, %rd52, -1;
; CHECK-NEXT: addc.cc.s64 %rd53, %rd53, -1;
; CHECK-NEXT: or.b64 %rd45, %rd52, %rd53;
; CHECK-NEXT: setp.eq.b64 %p17, %rd45, 0;
; CHECK-NEXT: @%p17 bra $L__BB5_4;
; CHECK-NEXT: bra.uni $L__BB5_2;
; CHECK-NEXT: $L__BB5_4: // %udiv-loop-exit
; CHECK-NEXT: shr.u64 %rd46, %rd56, 63;
; CHECK-NEXT: shl.b64 %rd47, %rd57, 1;
; CHECK-NEXT: or.b64 %rd48, %rd47, %rd46;
; CHECK-NEXT: shl.b64 %rd49, %rd56, 1;
; CHECK-NEXT: or.b64 %rd58, %rd51, %rd49;
; CHECK-NEXT: or.b64 %rd59, %rd50, %rd48;
; CHECK-NEXT: $L__BB5_5: // %udiv-end
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd58, %rd59};
; CHECK-NEXT: ret;
%div = udiv i128 %lhs, %rhs
ret i128 %div
}
define i128 @sdiv_i128_pow2k(i128 %lhs) {
; CHECK-LABEL: sdiv_i128_pow2k(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<11>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [sdiv_i128_pow2k_param_0];
; CHECK-NEXT: shr.s64 %rd3, %rd2, 63;
; CHECK-NEXT: shr.u64 %rd4, %rd3, 31;
; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd4;
; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, 0;
; CHECK-NEXT: shl.b64 %rd7, %rd6, 31;
; CHECK-NEXT: shr.u64 %rd8, %rd5, 33;
; CHECK-NEXT: or.b64 %rd9, %rd8, %rd7;
; CHECK-NEXT: shr.s64 %rd10, %rd6, 33;
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd9, %rd10};
; CHECK-NEXT: ret;
%div = sdiv i128 %lhs, 8589934592
ret i128 %div
}
define i128 @udiv_i128_pow2k(i128 %lhs) {
; CHECK-LABEL: udiv_i128_pow2k(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [udiv_i128_pow2k_param_0];
; CHECK-NEXT: shl.b64 %rd3, %rd2, 31;
; CHECK-NEXT: shr.u64 %rd4, %rd1, 33;
; CHECK-NEXT: or.b64 %rd5, %rd4, %rd3;
; CHECK-NEXT: shr.u64 %rd6, %rd2, 33;
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd5, %rd6};
; CHECK-NEXT: ret;
%div = udiv i128 %lhs, 8589934592
ret i128 %div
}
define i128 @add_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: add_i128(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [add_i128_param_0];
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [add_i128_param_1];
; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd3;
; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, %rd4;
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd5, %rd6};
; CHECK-NEXT: ret;
%result = add i128 %lhs, %rhs
ret i128 %result
}