| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mtriple=nvptx64-- 2>&1 | FileCheck %s |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- | %ptxas-verify %} |
| |
| define i128 @srem_i128(i128 %lhs, i128 %rhs) { |
| ; CHECK-LABEL: srem_i128( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<20>; |
| ; CHECK-NEXT: .reg .b32 %r<12>; |
| ; CHECK-NEXT: .reg .b64 %rd<79>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %_udiv-special-cases |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd8, %rd9}, [srem_i128_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd10, %rd11}, [srem_i128_param_1]; |
| ; CHECK-NEXT: shr.s64 %rd1, %rd9, 63; |
| ; CHECK-NEXT: sub.cc.s64 %rd12, 0, %rd8; |
| ; CHECK-NEXT: subc.cc.s64 %rd13, 0, %rd9; |
| ; CHECK-NEXT: setp.lt.s64 %p1, %rd9, 0; |
| ; CHECK-NEXT: selp.b64 %rd3, %rd13, %rd9, %p1; |
| ; CHECK-NEXT: selp.b64 %rd2, %rd12, %rd8, %p1; |
| ; CHECK-NEXT: sub.cc.s64 %rd14, 0, %rd10; |
| ; CHECK-NEXT: subc.cc.s64 %rd15, 0, %rd11; |
| ; CHECK-NEXT: setp.lt.s64 %p2, %rd11, 0; |
| ; CHECK-NEXT: selp.b64 %rd5, %rd15, %rd11, %p2; |
| ; CHECK-NEXT: selp.b64 %rd4, %rd14, %rd10, %p2; |
| ; CHECK-NEXT: or.b64 %rd16, %rd4, %rd5; |
| ; CHECK-NEXT: setp.eq.b64 %p3, %rd16, 0; |
| ; CHECK-NEXT: or.b64 %rd17, %rd2, %rd3; |
| ; CHECK-NEXT: setp.eq.b64 %p4, %rd17, 0; |
| ; CHECK-NEXT: or.pred %p5, %p3, %p4; |
| ; CHECK-NEXT: setp.ne.b64 %p6, %rd5, 0; |
| ; CHECK-NEXT: clz.b64 %r1, %rd5; |
| ; CHECK-NEXT: cvt.u64.u32 %rd18, %r1; |
| ; CHECK-NEXT: clz.b64 %r2, %rd4; |
| ; CHECK-NEXT: cvt.u64.u32 %rd19, %r2; |
| ; CHECK-NEXT: add.s64 %rd20, %rd19, 64; |
| ; CHECK-NEXT: selp.b64 %rd21, %rd18, %rd20, %p6; |
| ; CHECK-NEXT: setp.ne.b64 %p7, %rd3, 0; |
| ; CHECK-NEXT: clz.b64 %r3, %rd3; |
| ; CHECK-NEXT: cvt.u64.u32 %rd22, %r3; |
| ; CHECK-NEXT: clz.b64 %r4, %rd2; |
| ; CHECK-NEXT: cvt.u64.u32 %rd23, %r4; |
| ; CHECK-NEXT: add.s64 %rd24, %rd23, 64; |
| ; CHECK-NEXT: selp.b64 %rd25, %rd22, %rd24, %p7; |
| ; CHECK-NEXT: mov.b64 %rd70, 0; |
| ; CHECK-NEXT: sub.cc.s64 %rd26, %rd21, %rd25; |
| ; CHECK-NEXT: subc.cc.s64 %rd27, %rd70, 0; |
| ; CHECK-NEXT: setp.gt.u64 %p8, %rd26, 127; |
| ; CHECK-NEXT: setp.eq.b64 %p9, %rd27, 0; |
| ; CHECK-NEXT: and.pred %p10, %p9, %p8; |
| ; CHECK-NEXT: setp.ne.b64 %p11, %rd27, 0; |
| ; CHECK-NEXT: or.pred %p12, %p10, %p11; |
| ; CHECK-NEXT: or.pred %p13, %p5, %p12; |
| ; CHECK-NEXT: xor.b64 %rd28, %rd26, 127; |
| ; CHECK-NEXT: or.b64 %rd29, %rd28, %rd27; |
| ; CHECK-NEXT: setp.eq.b64 %p14, %rd29, 0; |
| ; CHECK-NEXT: selp.b64 %rd78, 0, %rd3, %p13; |
| ; CHECK-NEXT: selp.b64 %rd77, 0, %rd2, %p13; |
| ; CHECK-NEXT: or.pred %p15, %p13, %p14; |
| ; CHECK-NEXT: @%p15 bra $L__BB0_5; |
| ; CHECK-NEXT: // %bb.3: // %udiv-bb1 |
| ; CHECK-NEXT: add.cc.s64 %rd71, %rd26, 1; |
| ; CHECK-NEXT: addc.cc.s64 %rd72, %rd27, 0; |
| ; CHECK-NEXT: or.b64 %rd30, %rd71, %rd72; |
| ; CHECK-NEXT: setp.eq.b64 %p16, %rd30, 0; |
| ; CHECK-NEXT: cvt.u32.u64 %r5, %rd26; |
| ; CHECK-NEXT: sub.s32 %r6, 127, %r5; |
| ; CHECK-NEXT: shl.b64 %rd31, %rd3, %r6; |
| ; CHECK-NEXT: sub.s32 %r7, 64, %r6; |
| ; CHECK-NEXT: shr.u64 %rd32, %rd2, %r7; |
| ; CHECK-NEXT: or.b64 %rd33, %rd31, %rd32; |
| ; CHECK-NEXT: sub.s32 %r8, 63, %r5; |
| ; CHECK-NEXT: shl.b64 %rd34, %rd2, %r8; |
| ; CHECK-NEXT: setp.gt.s32 %p17, %r6, 63; |
| ; CHECK-NEXT: selp.b64 %rd76, %rd34, %rd33, %p17; |
| ; CHECK-NEXT: shl.b64 %rd75, %rd2, %r6; |
| ; CHECK-NEXT: mov.b64 %rd69, %rd70; |
| ; CHECK-NEXT: @%p16 bra $L__BB0_4; |
| ; CHECK-NEXT: // %bb.1: // %udiv-preheader |
| ; CHECK-NEXT: cvt.u32.u64 %r9, %rd71; |
| ; CHECK-NEXT: shr.u64 %rd35, %rd2, %r9; |
| ; CHECK-NEXT: sub.s32 %r10, 64, %r9; |
| ; CHECK-NEXT: shl.b64 %rd36, %rd3, %r10; |
| ; CHECK-NEXT: or.b64 %rd37, %rd35, %rd36; |
| ; CHECK-NEXT: add.s32 %r11, %r9, -64; |
| ; CHECK-NEXT: shr.u64 %rd38, %rd3, %r11; |
| ; CHECK-NEXT: setp.gt.s32 %p18, %r9, 63; |
| ; CHECK-NEXT: selp.b64 %rd73, %rd38, %rd37, %p18; |
| ; CHECK-NEXT: shr.u64 %rd74, %rd3, %r9; |
| ; CHECK-NEXT: add.cc.s64 %rd6, %rd4, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd7, %rd5, -1; |
| ; CHECK-NEXT: mov.b64 %rd69, 0; |
| ; CHECK-NEXT: mov.b64 %rd70, %rd69; |
| ; CHECK-NEXT: $L__BB0_2: // %udiv-do-while |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: shr.u64 %rd39, %rd73, 63; |
| ; CHECK-NEXT: shl.b64 %rd40, %rd74, 1; |
| ; CHECK-NEXT: or.b64 %rd41, %rd40, %rd39; |
| ; CHECK-NEXT: shl.b64 %rd42, %rd73, 1; |
| ; CHECK-NEXT: shr.u64 %rd43, %rd76, 63; |
| ; CHECK-NEXT: or.b64 %rd44, %rd42, %rd43; |
| ; CHECK-NEXT: shr.u64 %rd45, %rd75, 63; |
| ; CHECK-NEXT: shl.b64 %rd46, %rd76, 1; |
| ; CHECK-NEXT: or.b64 %rd47, %rd46, %rd45; |
| ; CHECK-NEXT: shl.b64 %rd48, %rd75, 1; |
| ; CHECK-NEXT: or.b64 %rd75, %rd70, %rd48; |
| ; CHECK-NEXT: or.b64 %rd76, %rd69, %rd47; |
| ; CHECK-NEXT: sub.cc.s64 %rd49, %rd6, %rd44; |
| ; CHECK-NEXT: subc.cc.s64 %rd50, %rd7, %rd41; |
| ; CHECK-NEXT: shr.s64 %rd51, %rd50, 63; |
| ; CHECK-NEXT: and.b64 %rd70, %rd51, 1; |
| ; CHECK-NEXT: and.b64 %rd52, %rd51, %rd4; |
| ; CHECK-NEXT: and.b64 %rd53, %rd51, %rd5; |
| ; CHECK-NEXT: sub.cc.s64 %rd73, %rd44, %rd52; |
| ; CHECK-NEXT: subc.cc.s64 %rd74, %rd41, %rd53; |
| ; CHECK-NEXT: add.cc.s64 %rd71, %rd71, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd72, %rd72, -1; |
| ; CHECK-NEXT: or.b64 %rd54, %rd71, %rd72; |
| ; CHECK-NEXT: setp.eq.b64 %p19, %rd54, 0; |
| ; CHECK-NEXT: @%p19 bra $L__BB0_4; |
| ; CHECK-NEXT: bra.uni $L__BB0_2; |
| ; CHECK-NEXT: $L__BB0_4: // %udiv-loop-exit |
| ; CHECK-NEXT: shr.u64 %rd55, %rd75, 63; |
| ; CHECK-NEXT: shl.b64 %rd56, %rd76, 1; |
| ; CHECK-NEXT: or.b64 %rd57, %rd56, %rd55; |
| ; CHECK-NEXT: shl.b64 %rd58, %rd75, 1; |
| ; CHECK-NEXT: or.b64 %rd77, %rd70, %rd58; |
| ; CHECK-NEXT: or.b64 %rd78, %rd69, %rd57; |
| ; CHECK-NEXT: $L__BB0_5: // %udiv-end |
| ; CHECK-NEXT: mul.hi.u64 %rd59, %rd4, %rd77; |
| ; CHECK-NEXT: mad.lo.s64 %rd60, %rd4, %rd78, %rd59; |
| ; CHECK-NEXT: mad.lo.s64 %rd61, %rd5, %rd77, %rd60; |
| ; CHECK-NEXT: mul.lo.s64 %rd62, %rd4, %rd77; |
| ; CHECK-NEXT: sub.cc.s64 %rd63, %rd2, %rd62; |
| ; CHECK-NEXT: subc.cc.s64 %rd64, %rd3, %rd61; |
| ; CHECK-NEXT: xor.b64 %rd65, %rd63, %rd1; |
| ; CHECK-NEXT: xor.b64 %rd66, %rd64, %rd1; |
| ; CHECK-NEXT: sub.cc.s64 %rd67, %rd65, %rd1; |
| ; CHECK-NEXT: subc.cc.s64 %rd68, %rd66, %rd1; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd67, %rd68}; |
| ; CHECK-NEXT: ret; |
| %div = srem i128 %lhs, %rhs |
| ret i128 %div |
| } |
| |
| define i128 @urem_i128(i128 %lhs, i128 %rhs) { |
| ; CHECK-LABEL: urem_i128( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<18>; |
| ; CHECK-NEXT: .reg .b32 %r<12>; |
| ; CHECK-NEXT: .reg .b64 %rd<66>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %_udiv-special-cases |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [urem_i128_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [urem_i128_param_1]; |
| ; CHECK-NEXT: or.b64 %rd7, %rd1, %rd2; |
| ; CHECK-NEXT: setp.eq.b64 %p1, %rd7, 0; |
| ; CHECK-NEXT: or.b64 %rd8, %rd5, %rd6; |
| ; CHECK-NEXT: setp.eq.b64 %p2, %rd8, 0; |
| ; CHECK-NEXT: or.pred %p3, %p1, %p2; |
| ; CHECK-NEXT: setp.ne.b64 %p4, %rd2, 0; |
| ; CHECK-NEXT: clz.b64 %r1, %rd2; |
| ; CHECK-NEXT: cvt.u64.u32 %rd9, %r1; |
| ; CHECK-NEXT: clz.b64 %r2, %rd1; |
| ; CHECK-NEXT: cvt.u64.u32 %rd10, %r2; |
| ; CHECK-NEXT: add.s64 %rd11, %rd10, 64; |
| ; CHECK-NEXT: selp.b64 %rd12, %rd9, %rd11, %p4; |
| ; CHECK-NEXT: setp.ne.b64 %p5, %rd6, 0; |
| ; CHECK-NEXT: clz.b64 %r3, %rd6; |
| ; CHECK-NEXT: cvt.u64.u32 %rd13, %r3; |
| ; CHECK-NEXT: clz.b64 %r4, %rd5; |
| ; CHECK-NEXT: cvt.u64.u32 %rd14, %r4; |
| ; CHECK-NEXT: add.s64 %rd15, %rd14, 64; |
| ; CHECK-NEXT: selp.b64 %rd16, %rd13, %rd15, %p5; |
| ; CHECK-NEXT: mov.b64 %rd57, 0; |
| ; CHECK-NEXT: sub.cc.s64 %rd17, %rd12, %rd16; |
| ; CHECK-NEXT: subc.cc.s64 %rd18, %rd57, 0; |
| ; CHECK-NEXT: setp.gt.u64 %p6, %rd17, 127; |
| ; CHECK-NEXT: setp.eq.b64 %p7, %rd18, 0; |
| ; CHECK-NEXT: and.pred %p8, %p7, %p6; |
| ; CHECK-NEXT: setp.ne.b64 %p9, %rd18, 0; |
| ; CHECK-NEXT: or.pred %p10, %p8, %p9; |
| ; CHECK-NEXT: or.pred %p11, %p3, %p10; |
| ; CHECK-NEXT: xor.b64 %rd19, %rd17, 127; |
| ; CHECK-NEXT: or.b64 %rd20, %rd19, %rd18; |
| ; CHECK-NEXT: setp.eq.b64 %p12, %rd20, 0; |
| ; CHECK-NEXT: selp.b64 %rd65, 0, %rd6, %p11; |
| ; CHECK-NEXT: selp.b64 %rd64, 0, %rd5, %p11; |
| ; CHECK-NEXT: or.pred %p13, %p11, %p12; |
| ; CHECK-NEXT: @%p13 bra $L__BB1_5; |
| ; CHECK-NEXT: // %bb.3: // %udiv-bb1 |
| ; CHECK-NEXT: add.cc.s64 %rd58, %rd17, 1; |
| ; CHECK-NEXT: addc.cc.s64 %rd59, %rd18, 0; |
| ; CHECK-NEXT: or.b64 %rd21, %rd58, %rd59; |
| ; CHECK-NEXT: setp.eq.b64 %p14, %rd21, 0; |
| ; CHECK-NEXT: cvt.u32.u64 %r5, %rd17; |
| ; CHECK-NEXT: sub.s32 %r6, 127, %r5; |
| ; CHECK-NEXT: shl.b64 %rd22, %rd6, %r6; |
| ; CHECK-NEXT: sub.s32 %r7, 64, %r6; |
| ; CHECK-NEXT: shr.u64 %rd23, %rd5, %r7; |
| ; CHECK-NEXT: or.b64 %rd24, %rd22, %rd23; |
| ; CHECK-NEXT: sub.s32 %r8, 63, %r5; |
| ; CHECK-NEXT: shl.b64 %rd25, %rd5, %r8; |
| ; CHECK-NEXT: setp.gt.s32 %p15, %r6, 63; |
| ; CHECK-NEXT: selp.b64 %rd63, %rd25, %rd24, %p15; |
| ; CHECK-NEXT: shl.b64 %rd62, %rd5, %r6; |
| ; CHECK-NEXT: mov.b64 %rd56, %rd57; |
| ; CHECK-NEXT: @%p14 bra $L__BB1_4; |
| ; CHECK-NEXT: // %bb.1: // %udiv-preheader |
| ; CHECK-NEXT: cvt.u32.u64 %r9, %rd58; |
| ; CHECK-NEXT: shr.u64 %rd26, %rd5, %r9; |
| ; CHECK-NEXT: sub.s32 %r10, 64, %r9; |
| ; CHECK-NEXT: shl.b64 %rd27, %rd6, %r10; |
| ; CHECK-NEXT: or.b64 %rd28, %rd26, %rd27; |
| ; CHECK-NEXT: add.s32 %r11, %r9, -64; |
| ; CHECK-NEXT: shr.u64 %rd29, %rd6, %r11; |
| ; CHECK-NEXT: setp.gt.s32 %p16, %r9, 63; |
| ; CHECK-NEXT: selp.b64 %rd60, %rd29, %rd28, %p16; |
| ; CHECK-NEXT: shr.u64 %rd61, %rd6, %r9; |
| ; CHECK-NEXT: add.cc.s64 %rd3, %rd1, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd4, %rd2, -1; |
| ; CHECK-NEXT: mov.b64 %rd56, 0; |
| ; CHECK-NEXT: mov.b64 %rd57, %rd56; |
| ; CHECK-NEXT: $L__BB1_2: // %udiv-do-while |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: shr.u64 %rd30, %rd60, 63; |
| ; CHECK-NEXT: shl.b64 %rd31, %rd61, 1; |
| ; CHECK-NEXT: or.b64 %rd32, %rd31, %rd30; |
| ; CHECK-NEXT: shl.b64 %rd33, %rd60, 1; |
| ; CHECK-NEXT: shr.u64 %rd34, %rd63, 63; |
| ; CHECK-NEXT: or.b64 %rd35, %rd33, %rd34; |
| ; CHECK-NEXT: shr.u64 %rd36, %rd62, 63; |
| ; CHECK-NEXT: shl.b64 %rd37, %rd63, 1; |
| ; CHECK-NEXT: or.b64 %rd38, %rd37, %rd36; |
| ; CHECK-NEXT: shl.b64 %rd39, %rd62, 1; |
| ; CHECK-NEXT: or.b64 %rd62, %rd57, %rd39; |
| ; CHECK-NEXT: or.b64 %rd63, %rd56, %rd38; |
| ; CHECK-NEXT: sub.cc.s64 %rd40, %rd3, %rd35; |
| ; CHECK-NEXT: subc.cc.s64 %rd41, %rd4, %rd32; |
| ; CHECK-NEXT: shr.s64 %rd42, %rd41, 63; |
| ; CHECK-NEXT: and.b64 %rd57, %rd42, 1; |
| ; CHECK-NEXT: and.b64 %rd43, %rd42, %rd1; |
| ; CHECK-NEXT: and.b64 %rd44, %rd42, %rd2; |
| ; CHECK-NEXT: sub.cc.s64 %rd60, %rd35, %rd43; |
| ; CHECK-NEXT: subc.cc.s64 %rd61, %rd32, %rd44; |
| ; CHECK-NEXT: add.cc.s64 %rd58, %rd58, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd59, %rd59, -1; |
| ; CHECK-NEXT: or.b64 %rd45, %rd58, %rd59; |
| ; CHECK-NEXT: setp.eq.b64 %p17, %rd45, 0; |
| ; CHECK-NEXT: @%p17 bra $L__BB1_4; |
| ; CHECK-NEXT: bra.uni $L__BB1_2; |
| ; CHECK-NEXT: $L__BB1_4: // %udiv-loop-exit |
| ; CHECK-NEXT: shr.u64 %rd46, %rd62, 63; |
| ; CHECK-NEXT: shl.b64 %rd47, %rd63, 1; |
| ; CHECK-NEXT: or.b64 %rd48, %rd47, %rd46; |
| ; CHECK-NEXT: shl.b64 %rd49, %rd62, 1; |
| ; CHECK-NEXT: or.b64 %rd64, %rd57, %rd49; |
| ; CHECK-NEXT: or.b64 %rd65, %rd56, %rd48; |
| ; CHECK-NEXT: $L__BB1_5: // %udiv-end |
| ; CHECK-NEXT: mul.hi.u64 %rd50, %rd1, %rd64; |
| ; CHECK-NEXT: mad.lo.s64 %rd51, %rd1, %rd65, %rd50; |
| ; CHECK-NEXT: mad.lo.s64 %rd52, %rd2, %rd64, %rd51; |
| ; CHECK-NEXT: mul.lo.s64 %rd53, %rd1, %rd64; |
| ; CHECK-NEXT: sub.cc.s64 %rd54, %rd5, %rd53; |
| ; CHECK-NEXT: subc.cc.s64 %rd55, %rd6, %rd52; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd54, %rd55}; |
| ; CHECK-NEXT: ret; |
| %div = urem i128 %lhs, %rhs |
| ret i128 %div |
| } |
| |
| define i128 @srem_i128_pow2k(i128 %lhs) { |
| ; CHECK-LABEL: srem_i128_pow2k( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<10>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [srem_i128_pow2k_param_0]; |
| ; CHECK-NEXT: shr.s64 %rd3, %rd2, 63; |
| ; CHECK-NEXT: shr.u64 %rd4, %rd3, 31; |
| ; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd4; |
| ; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, 0; |
| ; CHECK-NEXT: and.b64 %rd7, %rd5, -8589934592; |
| ; CHECK-NEXT: sub.cc.s64 %rd8, %rd1, %rd7; |
| ; CHECK-NEXT: subc.cc.s64 %rd9, %rd2, %rd6; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd8, %rd9}; |
| ; CHECK-NEXT: ret; |
| %div = srem i128 %lhs, 8589934592 |
| ret i128 %div |
| } |
| |
| define i128 @urem_i128_pow2k(i128 %lhs) { |
| ; CHECK-LABEL: urem_i128_pow2k( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [urem_i128_pow2k_param_0]; |
| ; CHECK-NEXT: and.b64 %rd3, %rd1, 8589934591; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, 0}; |
| ; CHECK-NEXT: ret; |
| %div = urem i128 %lhs, 8589934592 |
| ret i128 %div |
| } |
| |
| define i128 @sdiv_i128(i128 %lhs, i128 %rhs) { |
| ; CHECK-LABEL: sdiv_i128( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<20>; |
| ; CHECK-NEXT: .reg .b32 %r<12>; |
| ; CHECK-NEXT: .reg .b64 %rd<74>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %_udiv-special-cases |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd8, %rd9}, [sdiv_i128_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd10, %rd11}, [sdiv_i128_param_1]; |
| ; CHECK-NEXT: sub.cc.s64 %rd12, 0, %rd8; |
| ; CHECK-NEXT: subc.cc.s64 %rd13, 0, %rd9; |
| ; CHECK-NEXT: setp.lt.s64 %p1, %rd9, 0; |
| ; CHECK-NEXT: selp.b64 %rd2, %rd13, %rd9, %p1; |
| ; CHECK-NEXT: selp.b64 %rd1, %rd12, %rd8, %p1; |
| ; CHECK-NEXT: sub.cc.s64 %rd14, 0, %rd10; |
| ; CHECK-NEXT: subc.cc.s64 %rd15, 0, %rd11; |
| ; CHECK-NEXT: setp.lt.s64 %p2, %rd11, 0; |
| ; CHECK-NEXT: selp.b64 %rd4, %rd15, %rd11, %p2; |
| ; CHECK-NEXT: selp.b64 %rd3, %rd14, %rd10, %p2; |
| ; CHECK-NEXT: xor.b64 %rd16, %rd11, %rd9; |
| ; CHECK-NEXT: shr.s64 %rd5, %rd16, 63; |
| ; CHECK-NEXT: or.b64 %rd17, %rd3, %rd4; |
| ; CHECK-NEXT: setp.eq.b64 %p3, %rd17, 0; |
| ; CHECK-NEXT: or.b64 %rd18, %rd1, %rd2; |
| ; CHECK-NEXT: setp.eq.b64 %p4, %rd18, 0; |
| ; CHECK-NEXT: or.pred %p5, %p3, %p4; |
| ; CHECK-NEXT: setp.ne.b64 %p6, %rd4, 0; |
| ; CHECK-NEXT: clz.b64 %r1, %rd4; |
| ; CHECK-NEXT: cvt.u64.u32 %rd19, %r1; |
| ; CHECK-NEXT: clz.b64 %r2, %rd3; |
| ; CHECK-NEXT: cvt.u64.u32 %rd20, %r2; |
| ; CHECK-NEXT: add.s64 %rd21, %rd20, 64; |
| ; CHECK-NEXT: selp.b64 %rd22, %rd19, %rd21, %p6; |
| ; CHECK-NEXT: setp.ne.b64 %p7, %rd2, 0; |
| ; CHECK-NEXT: clz.b64 %r3, %rd2; |
| ; CHECK-NEXT: cvt.u64.u32 %rd23, %r3; |
| ; CHECK-NEXT: clz.b64 %r4, %rd1; |
| ; CHECK-NEXT: cvt.u64.u32 %rd24, %r4; |
| ; CHECK-NEXT: add.s64 %rd25, %rd24, 64; |
| ; CHECK-NEXT: selp.b64 %rd26, %rd23, %rd25, %p7; |
| ; CHECK-NEXT: mov.b64 %rd65, 0; |
| ; CHECK-NEXT: sub.cc.s64 %rd27, %rd22, %rd26; |
| ; CHECK-NEXT: subc.cc.s64 %rd28, %rd65, 0; |
| ; CHECK-NEXT: setp.gt.u64 %p8, %rd27, 127; |
| ; CHECK-NEXT: setp.eq.b64 %p9, %rd28, 0; |
| ; CHECK-NEXT: and.pred %p10, %p9, %p8; |
| ; CHECK-NEXT: setp.ne.b64 %p11, %rd28, 0; |
| ; CHECK-NEXT: or.pred %p12, %p10, %p11; |
| ; CHECK-NEXT: or.pred %p13, %p5, %p12; |
| ; CHECK-NEXT: xor.b64 %rd29, %rd27, 127; |
| ; CHECK-NEXT: or.b64 %rd30, %rd29, %rd28; |
| ; CHECK-NEXT: setp.eq.b64 %p14, %rd30, 0; |
| ; CHECK-NEXT: selp.b64 %rd73, 0, %rd2, %p13; |
| ; CHECK-NEXT: selp.b64 %rd72, 0, %rd1, %p13; |
| ; CHECK-NEXT: or.pred %p15, %p13, %p14; |
| ; CHECK-NEXT: @%p15 bra $L__BB4_5; |
| ; CHECK-NEXT: // %bb.3: // %udiv-bb1 |
| ; CHECK-NEXT: add.cc.s64 %rd66, %rd27, 1; |
| ; CHECK-NEXT: addc.cc.s64 %rd67, %rd28, 0; |
| ; CHECK-NEXT: or.b64 %rd31, %rd66, %rd67; |
| ; CHECK-NEXT: setp.eq.b64 %p16, %rd31, 0; |
| ; CHECK-NEXT: cvt.u32.u64 %r5, %rd27; |
| ; CHECK-NEXT: sub.s32 %r6, 127, %r5; |
| ; CHECK-NEXT: shl.b64 %rd32, %rd2, %r6; |
| ; CHECK-NEXT: sub.s32 %r7, 64, %r6; |
| ; CHECK-NEXT: shr.u64 %rd33, %rd1, %r7; |
| ; CHECK-NEXT: or.b64 %rd34, %rd32, %rd33; |
| ; CHECK-NEXT: sub.s32 %r8, 63, %r5; |
| ; CHECK-NEXT: shl.b64 %rd35, %rd1, %r8; |
| ; CHECK-NEXT: setp.gt.s32 %p17, %r6, 63; |
| ; CHECK-NEXT: selp.b64 %rd71, %rd35, %rd34, %p17; |
| ; CHECK-NEXT: shl.b64 %rd70, %rd1, %r6; |
| ; CHECK-NEXT: mov.b64 %rd64, %rd65; |
| ; CHECK-NEXT: @%p16 bra $L__BB4_4; |
| ; CHECK-NEXT: // %bb.1: // %udiv-preheader |
| ; CHECK-NEXT: cvt.u32.u64 %r9, %rd66; |
| ; CHECK-NEXT: shr.u64 %rd36, %rd1, %r9; |
| ; CHECK-NEXT: sub.s32 %r10, 64, %r9; |
| ; CHECK-NEXT: shl.b64 %rd37, %rd2, %r10; |
| ; CHECK-NEXT: or.b64 %rd38, %rd36, %rd37; |
| ; CHECK-NEXT: add.s32 %r11, %r9, -64; |
| ; CHECK-NEXT: shr.u64 %rd39, %rd2, %r11; |
| ; CHECK-NEXT: setp.gt.s32 %p18, %r9, 63; |
| ; CHECK-NEXT: selp.b64 %rd68, %rd39, %rd38, %p18; |
| ; CHECK-NEXT: shr.u64 %rd69, %rd2, %r9; |
| ; CHECK-NEXT: add.cc.s64 %rd6, %rd3, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd7, %rd4, -1; |
| ; CHECK-NEXT: mov.b64 %rd64, 0; |
| ; CHECK-NEXT: mov.b64 %rd65, %rd64; |
| ; CHECK-NEXT: $L__BB4_2: // %udiv-do-while |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: shr.u64 %rd40, %rd68, 63; |
| ; CHECK-NEXT: shl.b64 %rd41, %rd69, 1; |
| ; CHECK-NEXT: or.b64 %rd42, %rd41, %rd40; |
| ; CHECK-NEXT: shl.b64 %rd43, %rd68, 1; |
| ; CHECK-NEXT: shr.u64 %rd44, %rd71, 63; |
| ; CHECK-NEXT: or.b64 %rd45, %rd43, %rd44; |
| ; CHECK-NEXT: shr.u64 %rd46, %rd70, 63; |
| ; CHECK-NEXT: shl.b64 %rd47, %rd71, 1; |
| ; CHECK-NEXT: or.b64 %rd48, %rd47, %rd46; |
| ; CHECK-NEXT: shl.b64 %rd49, %rd70, 1; |
| ; CHECK-NEXT: or.b64 %rd70, %rd65, %rd49; |
| ; CHECK-NEXT: or.b64 %rd71, %rd64, %rd48; |
| ; CHECK-NEXT: sub.cc.s64 %rd50, %rd6, %rd45; |
| ; CHECK-NEXT: subc.cc.s64 %rd51, %rd7, %rd42; |
| ; CHECK-NEXT: shr.s64 %rd52, %rd51, 63; |
| ; CHECK-NEXT: and.b64 %rd65, %rd52, 1; |
| ; CHECK-NEXT: and.b64 %rd53, %rd52, %rd3; |
| ; CHECK-NEXT: and.b64 %rd54, %rd52, %rd4; |
| ; CHECK-NEXT: sub.cc.s64 %rd68, %rd45, %rd53; |
| ; CHECK-NEXT: subc.cc.s64 %rd69, %rd42, %rd54; |
| ; CHECK-NEXT: add.cc.s64 %rd66, %rd66, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd67, %rd67, -1; |
| ; CHECK-NEXT: or.b64 %rd55, %rd66, %rd67; |
| ; CHECK-NEXT: setp.eq.b64 %p19, %rd55, 0; |
| ; CHECK-NEXT: @%p19 bra $L__BB4_4; |
| ; CHECK-NEXT: bra.uni $L__BB4_2; |
| ; CHECK-NEXT: $L__BB4_4: // %udiv-loop-exit |
| ; CHECK-NEXT: shr.u64 %rd56, %rd70, 63; |
| ; CHECK-NEXT: shl.b64 %rd57, %rd71, 1; |
| ; CHECK-NEXT: or.b64 %rd58, %rd57, %rd56; |
| ; CHECK-NEXT: shl.b64 %rd59, %rd70, 1; |
| ; CHECK-NEXT: or.b64 %rd72, %rd65, %rd59; |
| ; CHECK-NEXT: or.b64 %rd73, %rd64, %rd58; |
| ; CHECK-NEXT: $L__BB4_5: // %udiv-end |
| ; CHECK-NEXT: xor.b64 %rd60, %rd72, %rd5; |
| ; CHECK-NEXT: xor.b64 %rd61, %rd73, %rd5; |
| ; CHECK-NEXT: sub.cc.s64 %rd62, %rd60, %rd5; |
| ; CHECK-NEXT: subc.cc.s64 %rd63, %rd61, %rd5; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd62, %rd63}; |
| ; CHECK-NEXT: ret; |
| %div = sdiv i128 %lhs, %rhs |
| ret i128 %div |
| } |
| |
| define i128 @udiv_i128(i128 %lhs, i128 %rhs) { |
| ; CHECK-LABEL: udiv_i128( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<18>; |
| ; CHECK-NEXT: .reg .b32 %r<12>; |
| ; CHECK-NEXT: .reg .b64 %rd<60>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %_udiv-special-cases |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [udiv_i128_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [udiv_i128_param_1]; |
| ; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6; |
| ; CHECK-NEXT: setp.eq.b64 %p1, %rd7, 0; |
| ; CHECK-NEXT: or.b64 %rd8, %rd3, %rd4; |
| ; CHECK-NEXT: setp.eq.b64 %p2, %rd8, 0; |
| ; CHECK-NEXT: or.pred %p3, %p1, %p2; |
| ; CHECK-NEXT: setp.ne.b64 %p4, %rd6, 0; |
| ; CHECK-NEXT: clz.b64 %r1, %rd6; |
| ; CHECK-NEXT: cvt.u64.u32 %rd9, %r1; |
| ; CHECK-NEXT: clz.b64 %r2, %rd5; |
| ; CHECK-NEXT: cvt.u64.u32 %rd10, %r2; |
| ; CHECK-NEXT: add.s64 %rd11, %rd10, 64; |
| ; CHECK-NEXT: selp.b64 %rd12, %rd9, %rd11, %p4; |
| ; CHECK-NEXT: setp.ne.b64 %p5, %rd4, 0; |
| ; CHECK-NEXT: clz.b64 %r3, %rd4; |
| ; CHECK-NEXT: cvt.u64.u32 %rd13, %r3; |
| ; CHECK-NEXT: clz.b64 %r4, %rd3; |
| ; CHECK-NEXT: cvt.u64.u32 %rd14, %r4; |
| ; CHECK-NEXT: add.s64 %rd15, %rd14, 64; |
| ; CHECK-NEXT: selp.b64 %rd16, %rd13, %rd15, %p5; |
| ; CHECK-NEXT: mov.b64 %rd51, 0; |
| ; CHECK-NEXT: sub.cc.s64 %rd17, %rd12, %rd16; |
| ; CHECK-NEXT: subc.cc.s64 %rd18, %rd51, 0; |
| ; CHECK-NEXT: setp.gt.u64 %p6, %rd17, 127; |
| ; CHECK-NEXT: setp.eq.b64 %p7, %rd18, 0; |
| ; CHECK-NEXT: and.pred %p8, %p7, %p6; |
| ; CHECK-NEXT: setp.ne.b64 %p9, %rd18, 0; |
| ; CHECK-NEXT: or.pred %p10, %p8, %p9; |
| ; CHECK-NEXT: or.pred %p11, %p3, %p10; |
| ; CHECK-NEXT: xor.b64 %rd19, %rd17, 127; |
| ; CHECK-NEXT: or.b64 %rd20, %rd19, %rd18; |
| ; CHECK-NEXT: setp.eq.b64 %p12, %rd20, 0; |
| ; CHECK-NEXT: selp.b64 %rd59, 0, %rd4, %p11; |
| ; CHECK-NEXT: selp.b64 %rd58, 0, %rd3, %p11; |
| ; CHECK-NEXT: or.pred %p13, %p11, %p12; |
| ; CHECK-NEXT: @%p13 bra $L__BB5_5; |
| ; CHECK-NEXT: // %bb.3: // %udiv-bb1 |
| ; CHECK-NEXT: add.cc.s64 %rd52, %rd17, 1; |
| ; CHECK-NEXT: addc.cc.s64 %rd53, %rd18, 0; |
| ; CHECK-NEXT: or.b64 %rd21, %rd52, %rd53; |
| ; CHECK-NEXT: setp.eq.b64 %p14, %rd21, 0; |
| ; CHECK-NEXT: cvt.u32.u64 %r5, %rd17; |
| ; CHECK-NEXT: sub.s32 %r6, 127, %r5; |
| ; CHECK-NEXT: shl.b64 %rd22, %rd4, %r6; |
| ; CHECK-NEXT: sub.s32 %r7, 64, %r6; |
| ; CHECK-NEXT: shr.u64 %rd23, %rd3, %r7; |
| ; CHECK-NEXT: or.b64 %rd24, %rd22, %rd23; |
| ; CHECK-NEXT: sub.s32 %r8, 63, %r5; |
| ; CHECK-NEXT: shl.b64 %rd25, %rd3, %r8; |
| ; CHECK-NEXT: setp.gt.s32 %p15, %r6, 63; |
| ; CHECK-NEXT: selp.b64 %rd57, %rd25, %rd24, %p15; |
| ; CHECK-NEXT: shl.b64 %rd56, %rd3, %r6; |
| ; CHECK-NEXT: mov.b64 %rd50, %rd51; |
| ; CHECK-NEXT: @%p14 bra $L__BB5_4; |
| ; CHECK-NEXT: // %bb.1: // %udiv-preheader |
| ; CHECK-NEXT: cvt.u32.u64 %r9, %rd52; |
| ; CHECK-NEXT: shr.u64 %rd26, %rd3, %r9; |
| ; CHECK-NEXT: sub.s32 %r10, 64, %r9; |
| ; CHECK-NEXT: shl.b64 %rd27, %rd4, %r10; |
| ; CHECK-NEXT: or.b64 %rd28, %rd26, %rd27; |
| ; CHECK-NEXT: add.s32 %r11, %r9, -64; |
| ; CHECK-NEXT: shr.u64 %rd29, %rd4, %r11; |
| ; CHECK-NEXT: setp.gt.s32 %p16, %r9, 63; |
| ; CHECK-NEXT: selp.b64 %rd54, %rd29, %rd28, %p16; |
| ; CHECK-NEXT: shr.u64 %rd55, %rd4, %r9; |
| ; CHECK-NEXT: add.cc.s64 %rd1, %rd5, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd2, %rd6, -1; |
| ; CHECK-NEXT: mov.b64 %rd50, 0; |
| ; CHECK-NEXT: mov.b64 %rd51, %rd50; |
| ; CHECK-NEXT: $L__BB5_2: // %udiv-do-while |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: shr.u64 %rd30, %rd54, 63; |
| ; CHECK-NEXT: shl.b64 %rd31, %rd55, 1; |
| ; CHECK-NEXT: or.b64 %rd32, %rd31, %rd30; |
| ; CHECK-NEXT: shl.b64 %rd33, %rd54, 1; |
| ; CHECK-NEXT: shr.u64 %rd34, %rd57, 63; |
| ; CHECK-NEXT: or.b64 %rd35, %rd33, %rd34; |
| ; CHECK-NEXT: shr.u64 %rd36, %rd56, 63; |
| ; CHECK-NEXT: shl.b64 %rd37, %rd57, 1; |
| ; CHECK-NEXT: or.b64 %rd38, %rd37, %rd36; |
| ; CHECK-NEXT: shl.b64 %rd39, %rd56, 1; |
| ; CHECK-NEXT: or.b64 %rd56, %rd51, %rd39; |
| ; CHECK-NEXT: or.b64 %rd57, %rd50, %rd38; |
| ; CHECK-NEXT: sub.cc.s64 %rd40, %rd1, %rd35; |
| ; CHECK-NEXT: subc.cc.s64 %rd41, %rd2, %rd32; |
| ; CHECK-NEXT: shr.s64 %rd42, %rd41, 63; |
| ; CHECK-NEXT: and.b64 %rd51, %rd42, 1; |
| ; CHECK-NEXT: and.b64 %rd43, %rd42, %rd5; |
| ; CHECK-NEXT: and.b64 %rd44, %rd42, %rd6; |
| ; CHECK-NEXT: sub.cc.s64 %rd54, %rd35, %rd43; |
| ; CHECK-NEXT: subc.cc.s64 %rd55, %rd32, %rd44; |
| ; CHECK-NEXT: add.cc.s64 %rd52, %rd52, -1; |
| ; CHECK-NEXT: addc.cc.s64 %rd53, %rd53, -1; |
| ; CHECK-NEXT: or.b64 %rd45, %rd52, %rd53; |
| ; CHECK-NEXT: setp.eq.b64 %p17, %rd45, 0; |
| ; CHECK-NEXT: @%p17 bra $L__BB5_4; |
| ; CHECK-NEXT: bra.uni $L__BB5_2; |
| ; CHECK-NEXT: $L__BB5_4: // %udiv-loop-exit |
| ; CHECK-NEXT: shr.u64 %rd46, %rd56, 63; |
| ; CHECK-NEXT: shl.b64 %rd47, %rd57, 1; |
| ; CHECK-NEXT: or.b64 %rd48, %rd47, %rd46; |
| ; CHECK-NEXT: shl.b64 %rd49, %rd56, 1; |
| ; CHECK-NEXT: or.b64 %rd58, %rd51, %rd49; |
| ; CHECK-NEXT: or.b64 %rd59, %rd50, %rd48; |
| ; CHECK-NEXT: $L__BB5_5: // %udiv-end |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd58, %rd59}; |
| ; CHECK-NEXT: ret; |
| %div = udiv i128 %lhs, %rhs |
| ret i128 %div |
| } |
| |
| define i128 @sdiv_i128_pow2k(i128 %lhs) { |
| ; CHECK-LABEL: sdiv_i128_pow2k( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<11>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [sdiv_i128_pow2k_param_0]; |
| ; CHECK-NEXT: shr.s64 %rd3, %rd2, 63; |
| ; CHECK-NEXT: shr.u64 %rd4, %rd3, 31; |
| ; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd4; |
| ; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, 0; |
| ; CHECK-NEXT: shl.b64 %rd7, %rd6, 31; |
| ; CHECK-NEXT: shr.u64 %rd8, %rd5, 33; |
| ; CHECK-NEXT: or.b64 %rd9, %rd8, %rd7; |
| ; CHECK-NEXT: shr.s64 %rd10, %rd6, 33; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd9, %rd10}; |
| ; CHECK-NEXT: ret; |
| %div = sdiv i128 %lhs, 8589934592 |
| ret i128 %div |
| } |
| |
| define i128 @udiv_i128_pow2k(i128 %lhs) { |
| ; CHECK-LABEL: udiv_i128_pow2k( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [udiv_i128_pow2k_param_0]; |
| ; CHECK-NEXT: shl.b64 %rd3, %rd2, 31; |
| ; CHECK-NEXT: shr.u64 %rd4, %rd1, 33; |
| ; CHECK-NEXT: or.b64 %rd5, %rd4, %rd3; |
| ; CHECK-NEXT: shr.u64 %rd6, %rd2, 33; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd5, %rd6}; |
| ; CHECK-NEXT: ret; |
| %div = udiv i128 %lhs, 8589934592 |
| ret i128 %div |
| } |
| |
| define i128 @add_i128(i128 %lhs, i128 %rhs) { |
| ; CHECK-LABEL: add_i128( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [add_i128_param_0]; |
| ; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [add_i128_param_1]; |
| ; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd3; |
| ; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, %rd4; |
| ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd5, %rd6}; |
| ; CHECK-NEXT: ret; |
| %result = add i128 %lhs, %rhs |
| ret i128 %result |
| } |