|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | 
|  | ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s | 
|  |  | 
|  | ;--------------------------------------------------------------------- | 
|  | ; xchg i32 cases | 
|  | ;--------------------------------------------------------------------- | 
|  |  | 
|  | ; Input and result use AGPR | 
|  | define void @global_atomic_xchg_i32_ret_a_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i32_ret_a_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v2, a0 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap v0, v[0:1], v2, off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=a"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "a"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AGPR, result used as VGPR. | 
|  | define void @global_atomic_xchg_i32_ret_a_v(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i32_ret_a_v: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v2, a0 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap v0, v[0:1], v2, off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=a"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "v"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is VGPR, result used as AGPR | 
|  | define void @global_atomic_xchg_i32_ret_v_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i32_ret_v_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v2 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap v0, v[0:1], v2, off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=v"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "a"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AV, result also used as AV | 
|  | define void @global_atomic_xchg_i32_ret_av_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i32_ret_av_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v2 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap v0, v[0:1], v2, off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=^VA"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "^VA"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AV, used as v | 
|  | define void @global_atomic_xchg_i32_ret_av_v(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i32_ret_av_v: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v2 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap v0, v[0:1], v2, off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=^VA"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "v"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AV, used as a | 
|  | define void @global_atomic_xchg_i32_ret_av_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i32_ret_av_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v2 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap v0, v[0:1], v2, off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=^VA"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "a"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is a, result used as AV | 
|  | define void @global_atomic_xchg_i32_ret_a_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i32_ret_a_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v2, a0 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap v0, v[0:1], v2, off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=a"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "^VA"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is v, result used as AV | 
|  | define void @global_atomic_xchg_i32_ret_v_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i32_ret_v_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v2 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap v0, v[0:1], v2, off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=v"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "^VA"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @global_atomic_xchg_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i32_ret_av_av_no_agprs: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a3, v40 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a4, v41 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a5, v42 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a6, v43 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a7, v44 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a8, v45 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a9, v46 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a10, v47 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a11, v56 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a12, v57 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a13, v58 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a14, v59 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a15, v60 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a16, v61 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a17, v62 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a18, v63 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a1, v1 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[0:31] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    s_nop 0 | 
|  | ; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a2 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a19, v31 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v0, a0 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v1, a1 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v2, a2 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    global_atomic_swap v0, v[0:1], v2, off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a31, v19 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a30, v20 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a29, v21 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a28, v22 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a27, v23 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a26, v24 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a25, v25 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a24, v26 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a23, v27 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a22, v28 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a21, v29 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a20, v30 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0 | 
|  | ; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v19, a31 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v20, a30 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v21, a29 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v22, a28 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v23, a27 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v24, a26 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v25, a25 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v26, a24 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v27, a23 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v28, a22 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v29, a21 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v30, a20 ; Reload Reuse | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v31, a19 ; Reload Reuse | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v[0:31] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v63, a18 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v62, a17 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v61, a16 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v60, a15 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v59, a14 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v58, a13 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v57, a12 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v56, a11 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v47, a10 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v46, a9 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v45, a8 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v44, a7 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v43, a6 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v42, a5 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v41, a4 ; Reload Reuse | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v40, a3 ; Reload Reuse | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=^VA"() | 
|  | %vgpr.def = call { <32 x i32>, <32 x i32> }  asm sideeffect "; def $0", "=${v[0:31]},=${v[32:63]}"() | 
|  | %vgpr.0 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 0 | 
|  | %vgpr.1 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 1 | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm sideeffect "; use $0", "{v[0:31]},{v[32:63]}"(<32 x i32> %vgpr.0, <32 x i32> %vgpr.1) | 
|  | call void asm "; use $0", "^VA"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @global_atomic_xchg_i32_noret_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i32_noret_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap v[0:1], a0, off | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=a"() | 
|  | %unused = atomicrmw xchg ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @global_atomic_xchg_i32_noret_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i32_noret_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v2 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap v[0:1], v2, off | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=^VA"() | 
|  | %unused = atomicrmw xchg ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ;--------------------------------------------------------------------- | 
|  | ; xchg i64 cases | 
|  | ;--------------------------------------------------------------------- | 
|  |  | 
|  | ; Input and result use AGPR | 
|  | define void @global_atomic_xchg_i64_ret_a_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i64_ret_a_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v3, a1 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v2, a0 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a1, v1 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=a"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "a"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AGPR, result used as VGPR. | 
|  | define void @global_atomic_xchg_i64_ret_a_v(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i64_ret_a_v: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v3, a1 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v2, a0 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=a"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "v"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is VGPR, result used as AGPR | 
|  | define void @global_atomic_xchg_i64_ret_v_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i64_ret_v_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[2:3] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a1, v1 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=v"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "a"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AV, result also used as AV | 
|  | define void @global_atomic_xchg_i64_ret_av_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i64_ret_av_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[2:3] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=^VA"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "^VA"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AV, used as v | 
|  | define void @global_atomic_xchg_i64_ret_av_v(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i64_ret_av_v: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[2:3] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=^VA"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "v"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AV, used as a | 
|  | define void @global_atomic_xchg_i64_ret_av_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i64_ret_av_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[2:3] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a1, v1 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=^VA"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "a"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is a, result used as AV | 
|  | define void @global_atomic_xchg_i64_ret_a_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i64_ret_a_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v3, a1 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v2, a0 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=a"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "^VA"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is v, result used as AV | 
|  | define void @global_atomic_xchg_i64_ret_v_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i64_ret_v_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[2:3] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap_x2 v[0:1], v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=v"() | 
|  | %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "^VA"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @global_atomic_xchg_i64_noret_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i64_noret_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap_x2 v[0:1], a[0:1], off | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=a"() | 
|  | %unused = atomicrmw xchg ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @global_atomic_xchg_i64_noret_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xchg_i64_noret_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[2:3] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_swap_x2 v[0:1], v[2:3], off | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=^VA"() | 
|  | %unused = atomicrmw xchg ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ;--------------------------------------------------------------------- | 
|  | ; xor i32 cases with cmpxchg expansion | 
|  | ;--------------------------------------------------------------------- | 
|  |  | 
|  | ; Input and result use AGPR | 
|  | define void @global_atomic_xor_expansion_i32_ret_a_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i32_ret_a_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dword v3, v[0:1], off | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v4, a0 | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:  .LBB21_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v2, v3, v4 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v2 | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v3, v2 | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB21_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=a"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "a"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AGPR, result used as VGPR. | 
|  | define void @global_atomic_xor_expansion_i32_ret_a_v(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i32_ret_a_v: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dword v2, v[0:1], off | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v3, a0 | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:  .LBB22_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v5, v2 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v4, v5, v3 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap v2, v[0:1], v[4:5], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v5 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB22_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v2 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=a"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "v"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is VGPR, result used as AGPR | 
|  | define void @global_atomic_xor_expansion_i32_ret_v_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i32_ret_v_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dword v3, v[0:1], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v4 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB23_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v2, v3, v4 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v2 | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v3, v2 | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB23_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=v"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "a"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AV, result also used as AV | 
|  | define void @global_atomic_xor_expansion_i32_ret_av_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i32_ret_av_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dword v2, v[0:1], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v3 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB24_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v5, v2 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v4, v5, v3 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap v2, v[0:1], v[4:5], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v5 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB24_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v2 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=^VA"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "^VA"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AV, used as v | 
|  | define void @global_atomic_xor_expansion_i32_ret_av_v(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i32_ret_av_v: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dword v2, v[0:1], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v3 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB25_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v5, v2 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v4, v5, v3 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap v2, v[0:1], v[4:5], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v5 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB25_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v2 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=^VA"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "v"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AV, used as a | 
|  | define void @global_atomic_xor_expansion_i32_ret_av_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i32_ret_av_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dword v3, v[0:1], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v4 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB26_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v2, v3, v4 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v2 | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v3, v2 | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB26_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=^VA"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "a"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is a, result used as AV | 
|  | define void @global_atomic_xor_expansion_i32_ret_a_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i32_ret_a_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dword v2, v[0:1], off | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v3, a0 | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:  .LBB27_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v5, v2 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v4, v5, v3 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap v2, v[0:1], v[4:5], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v5 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB27_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v2 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=a"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "^VA"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is v, result used as AV | 
|  | define void @global_atomic_xor_expansion_i32_ret_v_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i32_ret_v_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dword v2, v[0:1], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v3 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB28_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v5, v2 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v4, v5, v3 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap v2, v[0:1], v[4:5], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v5 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB28_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v2 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=v"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm "; use $0", "^VA"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @global_atomic_xor_expansion_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i32_ret_av_av_no_agprs: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    buffer_store_dword a33, off, s[0:3], s32 ; 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a33, v1 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a32, v0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[0:31] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a1, v1 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a2, v2 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a3, v3 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a4, v4 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a5, v5 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a6, v6 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a7, v7 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a8, v8 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a9, v9 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a10, v10 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a11, v11 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a12, v12 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a13, v13 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a14, v14 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a15, v15 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a16, v16 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a17, v17 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a18, v18 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a19, v19 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a20, v20 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a21, v21 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a22, v22 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a23, v23 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a24, v24 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a25, v25 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a26, v26 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a27, v27 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a28, v28 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a29, v29 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a30, v30 | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a31, v31 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v4, a32 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v5, a33 | 
|  | ; CHECK-NEXT:    global_load_dword v1, v[4:5], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB29_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v3, v1 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v2, v3, v0 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap v1, v[4:5], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB29_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a32, v1 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v0, a0 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v1, a1 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v2, a2 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v3, a3 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v4, a4 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v5, a5 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v6, a6 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v7, a7 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v8, a8 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v9, a9 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v10, a10 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v11, a11 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v12, a12 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v13, a13 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v14, a14 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v15, a15 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v16, a16 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v17, a17 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v18, a18 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v19, a19 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v20, a20 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v21, a21 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v22, a22 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v23, a23 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v24, a24 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v25, a25 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v26, a26 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v27, a27 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v28, a28 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v29, a29 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v30, a30 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v31, a31 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v[0:31] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a32 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    buffer_load_dword a33, off, s[0:3], s32 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=^VA"() | 
|  | %vgpr.def = call { <32 x i32>, <32 x i32> }  asm sideeffect "; def $0", "=${v[0:31]},=${v[32:63]}"() | 
|  | %vgpr.0 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 0 | 
|  | %vgpr.1 = extractvalue { <32 x i32>, <32 x i32> } %vgpr.def, 1 | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | call void asm sideeffect "; use $0", "{v[0:31]},{v[32:63]}"(<32 x i32> %vgpr.0, <32 x i32> %vgpr.1) | 
|  | call void asm "; use $0", "^VA"(i32 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @global_atomic_xor_expansion_i32_noret_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i32_noret_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dword v3, v[0:1], off | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a0 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v4, a0 | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:  .LBB30_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v2, v3, v4 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v3, v2 | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB30_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=a"() | 
|  | %unused = atomicrmw xor ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @global_atomic_xor_expansion_i32_noret_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i32_noret_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dword v3, v[0:1], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v4 | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB31_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v2, v3, v4 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    v_mov_b32_e32 v3, v2 | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB31_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i32 asm "; def $0", "=^VA"() | 
|  | %unused = atomicrmw xor ptr addrspace(1) %ptr, i32 %data seq_cst | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ;--------------------------------------------------------------------- | 
|  | ; xor i64 cases with cmpxchg expansion | 
|  | ;--------------------------------------------------------------------- | 
|  |  | 
|  | ; Input and result use AGPR | 
|  | define void @global_atomic_xor_expansion_i64_ret_a_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i64_ret_a_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v7, a1 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v6, a0 | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:  .LBB32_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v3, v5, v7 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v2, v4, v6 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v2 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a1, v3 | 
|  | ; CHECK-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB32_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=a"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "a"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AGPR, result used as VGPR. | 
|  | define void @global_atomic_xor_expansion_i64_ret_a_v(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i64_ret_a_v: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v3, a1 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v2, a0 | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:  .LBB33_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1] | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v5, v7, v3 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v4, v6, v2 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB33_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=a"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "v"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is VGPR, result used as AGPR | 
|  | define void @global_atomic_xor_expansion_i64_ret_v_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i64_ret_v_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[6:7] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB34_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v3, v5, v7 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v2, v4, v6 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v2 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a1, v3 | 
|  | ; CHECK-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB34_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=v"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "a"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AV, result also used as AV | 
|  | define void @global_atomic_xor_expansion_i64_ret_av_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i64_ret_av_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[2:3] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB35_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1] | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v5, v7, v3 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v4, v6, v2 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB35_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=^VA"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "^VA"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AV, used as v | 
|  | define void @global_atomic_xor_expansion_i64_ret_av_v(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i64_ret_av_v: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[2:3] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB36_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1] | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v5, v7, v3 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v4, v6, v2 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB36_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=^VA"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "v"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is AV, used as a | 
|  | define void @global_atomic_xor_expansion_i64_ret_av_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i64_ret_av_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[6:7] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB37_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v3, v5, v7 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v2, v4, v6 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a0, v2 | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    v_accvgpr_write_b32 a1, v3 | 
|  | ; CHECK-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB37_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=^VA"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "a"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is a, result used as AV | 
|  | define void @global_atomic_xor_expansion_i64_ret_a_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i64_ret_a_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v3, a1 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v2, a0 | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:  .LBB38_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1] | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v5, v7, v3 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v4, v6, v2 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB38_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=a"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "^VA"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Input is v, result used as AV | 
|  | define void @global_atomic_xor_expansion_i64_ret_v_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i64_ret_v_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[2:3] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB39_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1] | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v5, v7, v3 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v4, v6, v2 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB39_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; use v[4:5] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=v"() | 
|  | %result = atomicrmw xor ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | call void asm "; use $0", "^VA"(i64 %result) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @global_atomic_xor_expansion_i64_noret_a(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i64_noret_a: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def a[0:1] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v7, a1 | 
|  | ; CHECK-NEXT:    v_accvgpr_read_b32 v6, a0 | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:  .LBB40_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v3, v5, v7 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v2, v4, v6 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB40_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=a"() | 
|  | %unused = atomicrmw xor ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @global_atomic_xor_expansion_i64_noret_av(ptr addrspace(1) %ptr) #0 { | 
|  | ; CHECK-LABEL: global_atomic_xor_expansion_i64_noret_av: | 
|  | ; CHECK:       ; %bb.0: | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | 
|  | ; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off | 
|  | ; CHECK-NEXT:    s_mov_b64 s[4:5], 0 | 
|  | ; CHECK-NEXT:    ;;#ASMSTART | 
|  | ; CHECK-NEXT:    ; def v[6:7] | 
|  | ; CHECK-NEXT:    ;;#ASMEND | 
|  | ; CHECK-NEXT:  .LBB41_1: ; %atomicrmw.start | 
|  | ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v3, v5, v7 | 
|  | ; CHECK-NEXT:    v_xor_b32_e32 v2, v4, v6 | 
|  | ; CHECK-NEXT:    buffer_wbl2 | 
|  | ; CHECK-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc | 
|  | ; CHECK-NEXT:    s_waitcnt vmcnt(0) | 
|  | ; CHECK-NEXT:    buffer_invl2 | 
|  | ; CHECK-NEXT:    buffer_wbinvl1_vol | 
|  | ; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] | 
|  | ; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5] | 
|  | ; CHECK-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] | 
|  | ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_cbranch_execnz .LBB41_1 | 
|  | ; CHECK-NEXT:  ; %bb.2: ; %atomicrmw.end | 
|  | ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5] | 
|  | ; CHECK-NEXT:    s_setpc_b64 s[30:31] | 
|  | %gep.0 = getelementptr inbounds [512 x i64], ptr addrspace(1) %ptr, i64 0, i64 10 | 
|  | %data = call i64 asm "; def $0", "=^VA"() | 
|  | %unused = atomicrmw xor ptr addrspace(1) %ptr, i64 %data seq_cst | 
|  | ret void | 
|  | } | 
|  |  | 
|  | attributes #0 = { nounwind "amdgpu-waves-per-eu"="10,10" } |