| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s | FileCheck %s |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| @global_smem = external addrspace(3) global [0 x i8], align 16 |
| |
| |
| ;; Confirm the mov.b64 of global_smem is CSE'd. We need to make things a bit |
| ;; complex with a loop to make this interesting. |
| define i32 @test_mov_sym(i32 %offset1, i32 %offset2, i1 %cond) { |
| ; CHECK-LABEL: test_mov_sym( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<4>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<6>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b8 %rs1, [test_mov_sym_param_2]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_mov_sym_param_0]; |
| ; CHECK-NEXT: cvt.s64.s32 %rd1, %r1; |
| ; CHECK-NEXT: mov.b64 %rd2, global_smem; |
| ; CHECK-NEXT: add.s64 %rd3, %rd2, %rd1; |
| ; CHECK-NEXT: ld.shared.b32 %r4, [%rd3]; |
| ; CHECK-NEXT: not.pred %p2, %p1; |
| ; CHECK-NEXT: @%p2 bra $L__BB0_4; |
| ; CHECK-NEXT: // %bb.1: // %if1.preheader |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_mov_sym_param_1]; |
| ; CHECK-NEXT: setp.ne.b32 %p3, %r1, %r2; |
| ; CHECK-NEXT: $L__BB0_2: // %if1 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: @%p3 bra $L__BB0_2; |
| ; CHECK-NEXT: // %bb.3: // %if2 |
| ; CHECK-NEXT: cvt.s64.s32 %rd4, %r2; |
| ; CHECK-NEXT: add.s64 %rd5, %rd2, %rd4; |
| ; CHECK-NEXT: ld.shared.b32 %r3, [%rd5]; |
| ; CHECK-NEXT: add.s32 %r4, %r4, %r3; |
| ; CHECK-NEXT: $L__BB0_4: // %end |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; |
| ; CHECK-NEXT: ret; |
| entry: |
| %gep = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 %offset1 |
| %val = load i32, ptr addrspace(3) %gep |
| br i1 %cond, label %if1, label %end |
| if1: |
| %cond2 = icmp eq i32 %offset1, %offset2 |
| br i1 %cond2, label %if2, label %if1 |
| if2: |
| %gep2 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 %offset2 |
| %val2 = load i32, ptr addrspace(3) %gep2 |
| %add = add i32 %val, %val2 |
| br label %end |
| end: |
| %ret = phi i32 [ %add, %if2 ], [ %val, %entry ] |
| ret i32 %ret |
| } |