|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | 
|  | ; RUN: llc < %s -mcpu=sm_50 | FileCheck %s | 
|  | ; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify %} | 
|  |  | 
|  | target triple = "nvptx64-nvidia-cuda" | 
|  |  | 
|  | define i32 @test_simple_rotl(i32 %x) { | 
|  | ; CHECK-LABEL: test_simple_rotl( | 
|  | ; CHECK:       { | 
|  | ; CHECK-NEXT:    .reg .b32 %r<3>; | 
|  | ; CHECK-EMPTY: | 
|  | ; CHECK-NEXT:  // %bb.0: | 
|  | ; CHECK-NEXT:    ld.param.b32 %r1, [test_simple_rotl_param_0]; | 
|  | ; CHECK-NEXT:    shf.l.wrap.b32 %r2, %r1, %r1, 7; | 
|  | ; CHECK-NEXT:    st.param.b32 [func_retval0], %r2; | 
|  | ; CHECK-NEXT:    ret; | 
|  | %shl = shl i32 %x, 7 | 
|  | %shr = lshr i32 %x, 25 | 
|  | %add = add i32 %shl, %shr | 
|  | ret i32 %add | 
|  | } | 
|  |  | 
|  | define i32 @test_simple_rotr(i32 %x) { | 
|  | ; CHECK-LABEL: test_simple_rotr( | 
|  | ; CHECK:       { | 
|  | ; CHECK-NEXT:    .reg .b32 %r<3>; | 
|  | ; CHECK-EMPTY: | 
|  | ; CHECK-NEXT:  // %bb.0: | 
|  | ; CHECK-NEXT:    ld.param.b32 %r1, [test_simple_rotr_param_0]; | 
|  | ; CHECK-NEXT:    shf.l.wrap.b32 %r2, %r1, %r1, 25; | 
|  | ; CHECK-NEXT:    st.param.b32 [func_retval0], %r2; | 
|  | ; CHECK-NEXT:    ret; | 
|  | %shr = lshr i32 %x, 7 | 
|  | %shl = shl i32 %x, 25 | 
|  | %add = add i32 %shr, %shl | 
|  | ret i32 %add | 
|  | } | 
|  |  | 
|  | define i32 @test_rotl_var(i32 %x, i32 %y) { | 
|  | ; CHECK-LABEL: test_rotl_var( | 
|  | ; CHECK:       { | 
|  | ; CHECK-NEXT:    .reg .b32 %r<4>; | 
|  | ; CHECK-EMPTY: | 
|  | ; CHECK-NEXT:  // %bb.0: | 
|  | ; CHECK-NEXT:    ld.param.b32 %r1, [test_rotl_var_param_0]; | 
|  | ; CHECK-NEXT:    ld.param.b32 %r2, [test_rotl_var_param_1]; | 
|  | ; CHECK-NEXT:    shf.l.wrap.b32 %r3, %r1, %r1, %r2; | 
|  | ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3; | 
|  | ; CHECK-NEXT:    ret; | 
|  | %shl = shl i32 %x, %y | 
|  | %sub = sub i32 32, %y | 
|  | %shr = lshr i32 %x, %sub | 
|  | %add = add i32 %shl, %shr | 
|  | ret i32 %add | 
|  | } | 
|  |  | 
|  | define i32 @test_rotr_var(i32 %x, i32 %y) { | 
|  | ; CHECK-LABEL: test_rotr_var( | 
|  | ; CHECK:       { | 
|  | ; CHECK-NEXT:    .reg .b32 %r<4>; | 
|  | ; CHECK-EMPTY: | 
|  | ; CHECK-NEXT:  // %bb.0: | 
|  | ; CHECK-NEXT:    ld.param.b32 %r1, [test_rotr_var_param_0]; | 
|  | ; CHECK-NEXT:    ld.param.b32 %r2, [test_rotr_var_param_1]; | 
|  | ; CHECK-NEXT:    shf.r.wrap.b32 %r3, %r1, %r1, %r2; | 
|  | ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3; | 
|  | ; CHECK-NEXT:    ret; | 
|  | %shr = lshr i32 %x, %y | 
|  | %sub = sub i32 32, %y | 
|  | %shl = shl i32 %x, %sub | 
|  | %add = add i32 %shr, %shl | 
|  | ret i32 %add | 
|  | } | 
|  |  | 
|  | define i32 @test_invalid_rotl_var_and(i32 %x, i32 %y) { | 
|  | ; CHECK-LABEL: test_invalid_rotl_var_and( | 
|  | ; CHECK:       { | 
|  | ; CHECK-NEXT:    .reg .b32 %r<8>; | 
|  | ; CHECK-EMPTY: | 
|  | ; CHECK-NEXT:  // %bb.0: | 
|  | ; CHECK-NEXT:    ld.param.b32 %r1, [test_invalid_rotl_var_and_param_0]; | 
|  | ; CHECK-NEXT:    ld.param.b32 %r2, [test_invalid_rotl_var_and_param_1]; | 
|  | ; CHECK-NEXT:    shl.b32 %r3, %r1, %r2; | 
|  | ; CHECK-NEXT:    neg.s32 %r4, %r2; | 
|  | ; CHECK-NEXT:    and.b32 %r5, %r4, 31; | 
|  | ; CHECK-NEXT:    shr.u32 %r6, %r1, %r5; | 
|  | ; CHECK-NEXT:    add.s32 %r7, %r6, %r3; | 
|  | ; CHECK-NEXT:    st.param.b32 [func_retval0], %r7; | 
|  | ; CHECK-NEXT:    ret; | 
|  | %shr = shl i32 %x, %y | 
|  | %sub = sub nsw i32 0, %y | 
|  | %and = and i32 %sub, 31 | 
|  | %shl = lshr i32 %x, %and | 
|  | %add = add i32 %shl, %shr | 
|  | ret i32 %add | 
|  | } | 
|  |  | 
|  | define i32 @test_invalid_rotr_var_and(i32 %x, i32 %y) { | 
|  | ; CHECK-LABEL: test_invalid_rotr_var_and( | 
|  | ; CHECK:       { | 
|  | ; CHECK-NEXT:    .reg .b32 %r<8>; | 
|  | ; CHECK-EMPTY: | 
|  | ; CHECK-NEXT:  // %bb.0: | 
|  | ; CHECK-NEXT:    ld.param.b32 %r1, [test_invalid_rotr_var_and_param_0]; | 
|  | ; CHECK-NEXT:    ld.param.b32 %r2, [test_invalid_rotr_var_and_param_1]; | 
|  | ; CHECK-NEXT:    shr.u32 %r3, %r1, %r2; | 
|  | ; CHECK-NEXT:    neg.s32 %r4, %r2; | 
|  | ; CHECK-NEXT:    and.b32 %r5, %r4, 31; | 
|  | ; CHECK-NEXT:    shl.b32 %r6, %r1, %r5; | 
|  | ; CHECK-NEXT:    add.s32 %r7, %r3, %r6; | 
|  | ; CHECK-NEXT:    st.param.b32 [func_retval0], %r7; | 
|  | ; CHECK-NEXT:    ret; | 
|  | %shr = lshr i32 %x, %y | 
|  | %sub = sub nsw i32 0, %y | 
|  | %and = and i32 %sub, 31 | 
|  | %shl = shl i32 %x, %and | 
|  | %add = add i32 %shr, %shl | 
|  | ret i32 %add | 
|  | } | 
|  |  | 
|  | define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) { | 
|  | ; CHECK-LABEL: test_fshl_special_case( | 
|  | ; CHECK:       { | 
|  | ; CHECK-NEXT:    .reg .b32 %r<5>; | 
|  | ; CHECK-EMPTY: | 
|  | ; CHECK-NEXT:  // %bb.0: | 
|  | ; CHECK-NEXT:    ld.param.b32 %r1, [test_fshl_special_case_param_0]; | 
|  | ; CHECK-NEXT:    ld.param.b32 %r2, [test_fshl_special_case_param_1]; | 
|  | ; CHECK-NEXT:    ld.param.b32 %r3, [test_fshl_special_case_param_2]; | 
|  | ; CHECK-NEXT:    shf.l.wrap.b32 %r4, %r2, %r1, %r3; | 
|  | ; CHECK-NEXT:    st.param.b32 [func_retval0], %r4; | 
|  | ; CHECK-NEXT:    ret; | 
|  | %shl = shl i32 %x0, %y | 
|  | %srli = lshr i32 %x1, 1 | 
|  | %x = xor i32 %y, 31 | 
|  | %srlo = lshr i32 %srli, %x | 
|  | %o = add i32 %shl, %srlo | 
|  | ret i32 %o | 
|  | } | 
|  |  | 
|  | define i32 @test_fshr_special_case(i32 %x0, i32 %x1, i32 %y) { | 
|  | ; CHECK-LABEL: test_fshr_special_case( | 
|  | ; CHECK:       { | 
|  | ; CHECK-NEXT:    .reg .b32 %r<5>; | 
|  | ; CHECK-EMPTY: | 
|  | ; CHECK-NEXT:  // %bb.0: | 
|  | ; CHECK-NEXT:    ld.param.b32 %r1, [test_fshr_special_case_param_0]; | 
|  | ; CHECK-NEXT:    ld.param.b32 %r2, [test_fshr_special_case_param_1]; | 
|  | ; CHECK-NEXT:    ld.param.b32 %r3, [test_fshr_special_case_param_2]; | 
|  | ; CHECK-NEXT:    shf.r.wrap.b32 %r4, %r2, %r1, %r3; | 
|  | ; CHECK-NEXT:    st.param.b32 [func_retval0], %r4; | 
|  | ; CHECK-NEXT:    ret; | 
|  | %shl = lshr i32 %x1, %y | 
|  | %srli = shl i32 %x0, 1 | 
|  | %x = xor i32 %y, 31 | 
|  | %srlo = shl i32 %srli, %x | 
|  | %o = add i32 %shl, %srlo | 
|  | ret i32 %o | 
|  | } | 
|  |  | 
|  | define i64 @test_rotl_udiv_special_case(i64 %i) { | 
|  | ; CHECK-LABEL: test_rotl_udiv_special_case( | 
|  | ; CHECK:       { | 
|  | ; CHECK-NEXT:    .reg .b32 %r<5>; | 
|  | ; CHECK-NEXT:    .reg .b64 %rd<5>; | 
|  | ; CHECK-EMPTY: | 
|  | ; CHECK-NEXT:  // %bb.0: | 
|  | ; CHECK-NEXT:    ld.param.b64 %rd1, [test_rotl_udiv_special_case_param_0]; | 
|  | ; CHECK-NEXT:    mul.hi.u64 %rd2, %rd1, -6148914691236517205; | 
|  | ; CHECK-NEXT:    shr.u64 %rd3, %rd2, 1; | 
|  | ; CHECK-NEXT:    mov.b64 {%r1, %r2}, %rd3; | 
|  | ; CHECK-NEXT:    shf.l.wrap.b32 %r3, %r2, %r1, 28; | 
|  | ; CHECK-NEXT:    shf.l.wrap.b32 %r4, %r1, %r2, 28; | 
|  | ; CHECK-NEXT:    mov.b64 %rd4, {%r4, %r3}; | 
|  | ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4; | 
|  | ; CHECK-NEXT:    ret; | 
|  | %lhs_div = udiv i64 %i, 3 | 
|  | %rhs_div = udiv i64 %i, 48 | 
|  | %lhs_shift = shl i64 %lhs_div, 60 | 
|  | %out = add i64 %lhs_shift, %rhs_div | 
|  | ret i64 %out | 
|  | } | 
|  |  | 
|  | define i32 @test_rotl_mul_special_case(i32 %i) { | 
|  | ; CHECK-LABEL: test_rotl_mul_special_case( | 
|  | ; CHECK:       { | 
|  | ; CHECK-NEXT:    .reg .b32 %r<4>; | 
|  | ; CHECK-EMPTY: | 
|  | ; CHECK-NEXT:  // %bb.0: | 
|  | ; CHECK-NEXT:    ld.param.b32 %r1, [test_rotl_mul_special_case_param_0]; | 
|  | ; CHECK-NEXT:    mul.lo.s32 %r2, %r1, 9; | 
|  | ; CHECK-NEXT:    shf.l.wrap.b32 %r3, %r2, %r2, 7; | 
|  | ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3; | 
|  | ; CHECK-NEXT:    ret; | 
|  | %lhs_mul = mul i32 %i, 9 | 
|  | %rhs_mul = mul i32 %i, 1152 | 
|  | %lhs_shift = lshr i32 %lhs_mul, 25 | 
|  | %out = add i32 %lhs_shift, %rhs_mul | 
|  | ret i32 %out | 
|  | } | 
|  |  | 
|  | define i64 @test_rotl_mul_with_mask_special_case(i64 %i) { | 
|  | ; CHECK-LABEL: test_rotl_mul_with_mask_special_case( | 
|  | ; CHECK:       { | 
|  | ; CHECK-NEXT:    .reg .b32 %r<7>; | 
|  | ; CHECK-NEXT:    .reg .b64 %rd<5>; | 
|  | ; CHECK-EMPTY: | 
|  | ; CHECK-NEXT:  // %bb.0: | 
|  | ; CHECK-NEXT:    ld.param.b64 %rd1, [test_rotl_mul_with_mask_special_case_param_0]; | 
|  | ; CHECK-NEXT:    mul.lo.s64 %rd2, %rd1, 9; | 
|  | ; CHECK-NEXT:    mov.b64 {%r1, %r2}, %rd1; | 
|  | ; CHECK-NEXT:    mov.b64 {%r3, %r4}, %rd2; | 
|  | ; CHECK-NEXT:    shf.l.wrap.b32 %r5, %r4, %r1, 7; | 
|  | ; CHECK-NEXT:    shf.l.wrap.b32 %r6, %r1, %r2, 7; | 
|  | ; CHECK-NEXT:    mov.b64 %rd3, {%r5, %r6}; | 
|  | ; CHECK-NEXT:    and.b64 %rd4, %rd3, 255; | 
|  | ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4; | 
|  | ; CHECK-NEXT:    ret; | 
|  | %lhs_mul = mul i64 %i, 1152 | 
|  | %rhs_mul = mul i64 %i, 9 | 
|  | %lhs_and = and i64 %lhs_mul, 160 | 
|  | %rhs_shift = lshr i64 %rhs_mul, 57 | 
|  | %out = add i64 %lhs_and, %rhs_shift | 
|  | ret i64 %out | 
|  | } | 
|  |  | 
|  | define i32 @test_fshl_with_mask_special_case(i32 %x) { | 
|  | ; CHECK-LABEL: test_fshl_with_mask_special_case( | 
|  | ; CHECK:       { | 
|  | ; CHECK-NEXT:    .reg .b32 %r<5>; | 
|  | ; CHECK-EMPTY: | 
|  | ; CHECK-NEXT:  // %bb.0: | 
|  | ; CHECK-NEXT:    ld.param.b32 %r1, [test_fshl_with_mask_special_case_param_0]; | 
|  | ; CHECK-NEXT:    or.b32 %r2, %r1, 1; | 
|  | ; CHECK-NEXT:    shf.l.wrap.b32 %r3, %r1, %r2, 5; | 
|  | ; CHECK-NEXT:    and.b32 %r4, %r3, -31; | 
|  | ; CHECK-NEXT:    st.param.b32 [func_retval0], %r4; | 
|  | ; CHECK-NEXT:    ret; | 
|  | %or1 = or i32 %x, 1 | 
|  | %sh1 = shl i32 %or1, 5 | 
|  | %sh2 = lshr i32 %x, 27 | 
|  | %1 = and i32 %sh2, 1 | 
|  | %r = add i32 %sh1, %1 | 
|  | ret i32 %r | 
|  | } |