| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=amdgcn -amdgpu-sdwa-peephole=0 < %s | FileCheck -check-prefix=GCN %s |
| |
| declare i32 @llvm.amdgcn.workitem.id.x() #0 |
| |
| ; -------------------------------------------------------------------------------- |
| ; i32 compares |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_kernel void @commute_eq_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_eq_64_i32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 64, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i32, ptr addrspace(1) %gep.in |
| %cmp = icmp eq i32 %val, 64 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ne_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ne_64_i32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 64, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i32, ptr addrspace(1) %gep.in |
| %cmp = icmp ne i32 %val, 64 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| ; FIXME: Why isn't this being folded as a constant? |
| define amdgpu_kernel void @commute_ne_litk_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ne_litk_i32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_movk_i32 s4, 0x3039 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, s4, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i32, ptr addrspace(1) %gep.in |
| %cmp = icmp ne i32 %val, 12345 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ugt_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ugt_64_i32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 64, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i32, ptr addrspace(1) %gep.in |
| %cmp = icmp ugt i32 %val, 64 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_uge_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_uge_64_i32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 63, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i32, ptr addrspace(1) %gep.in |
| %cmp = icmp uge i32 %val, 64 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ult_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ult_64_i32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i32, ptr addrspace(1) %gep.in |
| %cmp = icmp ult i32 %val, 64 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ule_63_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ule_63_i32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i32, ptr addrspace(1) %gep.in |
| %cmp = icmp ule i32 %val, 63 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ule_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ule_64_i32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_movk_i32 s4, 0x41 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s4, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i32, ptr addrspace(1) %gep.in |
| %cmp = icmp ule i32 %val, 64 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_sgt_neg1_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_sgt_neg1_i32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_not_b32_e32 v2, v2 |
| ; GCN-NEXT: v_ashrrev_i32_e32 v2, 31, v2 |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i32, ptr addrspace(1) %gep.in |
| %cmp = icmp sgt i32 %val, -1 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_sge_neg2_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_sge_neg2_i32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, -3, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i32, ptr addrspace(1) %gep.in |
| %cmp = icmp sge i32 %val, -2 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_slt_neg16_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_slt_neg16_i32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, -16, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i32, ptr addrspace(1) %gep.in |
| %cmp = icmp slt i32 %val, -16 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_sle_5_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_sle_5_i32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 6, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i32, ptr addrspace(1) %gep.in |
| %cmp = icmp sle i32 %val, 5 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; i64 compares |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_kernel void @commute_eq_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_eq_64_i64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 64, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i64, ptr addrspace(1) %gep.in |
| %cmp = icmp eq i64 %val, 64 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ne_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ne_64_i64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 64, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i64, ptr addrspace(1) %gep.in |
| %cmp = icmp ne i64 %val, 64 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ugt_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ugt_64_i64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_lt_u64_e32 vcc, 64, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i64, ptr addrspace(1) %gep.in |
| %cmp = icmp ugt i64 %val, 64 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_uge_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_uge_64_i64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i64, ptr addrspace(1) %gep.in |
| %cmp = icmp uge i64 %val, 64 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ult_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ult_64_i64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_u64_e32 vcc, 64, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i64, ptr addrspace(1) %gep.in |
| %cmp = icmp ult i64 %val, 64 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ule_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ule_63_i64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_u64_e32 vcc, 64, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i64, ptr addrspace(1) %gep.in |
| %cmp = icmp ule i64 %val, 63 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| ; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm |
| |
| define amdgpu_kernel void @commute_ule_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ule_64_i64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[4:5], 0x41 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i64, ptr addrspace(1) %gep.in |
| %cmp = icmp ule i64 %val, 64 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_sgt_neg1_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_sgt_neg1_i64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_lt_i64_e32 vcc, -1, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i64, ptr addrspace(1) %gep.in |
| %cmp = icmp sgt i64 %val, -1 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_sge_neg2_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_sge_neg2_i64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_lt_i64_e32 vcc, -3, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i64, ptr addrspace(1) %gep.in |
| %cmp = icmp sge i64 %val, -2 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_slt_neg16_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_slt_neg16_i64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_i64_e32 vcc, -16, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i64, ptr addrspace(1) %gep.in |
| %cmp = icmp slt i64 %val, -16 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_sle_5_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_sle_5_i64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 6, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load i64, ptr addrspace(1) %gep.in |
| %cmp = icmp sle i64 %val, 5 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; f32 compares |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_kernel void @commute_oeq_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_oeq_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 2.0, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp oeq float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ogt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ogt_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp ogt float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_oge_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_oge_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_le_f32_e32 vcc, 2.0, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp oge float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_olt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_olt_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp olt float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ole_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ole_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_ge_f32_e32 vcc, 2.0, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp ole float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_one_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_one_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_lg_f32_e32 vcc, 2.0, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp one float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ord_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ord_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp ord float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ueq_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ueq_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_nlg_f32_e32 vcc, 2.0, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp ueq float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ugt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ugt_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_nge_f32_e32 vcc, 2.0, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp ugt float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_uge_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_uge_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, 2.0, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp uge float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ult_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ult_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_nle_f32_e32 vcc, 2.0, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp ult float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ule_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ule_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 2.0, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp ule float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_une_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_une_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_neq_f32_e32 vcc, 2.0, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp une float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_uno_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_uno_2.0_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v1, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 |
| ; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load float, ptr addrspace(1) %gep.in |
| %cmp = fcmp uno float %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------------------- |
| ; f64 compares |
| ; -------------------------------------------------------------------------------- |
| |
| define amdgpu_kernel void @commute_oeq_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_oeq_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_eq_f64_e32 vcc, 2.0, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp oeq double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ogt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ogt_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_lt_f64_e32 vcc, 2.0, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp ogt double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_oge_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_oge_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_le_f64_e32 vcc, 2.0, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp oge double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_olt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_olt_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_gt_f64_e32 vcc, 2.0, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp olt double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ole_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ole_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_ge_f64_e32 vcc, 2.0, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp ole double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_one_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_one_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_lg_f64_e32 vcc, 2.0, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp one double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ord_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ord_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_o_f64_e32 vcc, v[3:4], v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp ord double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ueq_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ueq_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_nlg_f64_e32 vcc, 2.0, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp ueq double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ugt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ugt_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_nge_f64_e32 vcc, 2.0, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp ugt double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_uge_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_uge_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_ngt_f64_e32 vcc, 2.0, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp uge double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ult_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ult_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_nle_f64_e32 vcc, 2.0, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp ult double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_ule_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_ule_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_nlt_f64_e32 vcc, 2.0, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp ule double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_une_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_une_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_neq_f64_e32 vcc, 2.0, v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp une double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_uno_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { |
| ; GCN-LABEL: commute_uno_2.0_f64: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s7, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s6, 0 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 |
| ; GCN-NEXT: v_mov_b32_e32 v2, 0 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: s_mov_b64 s[4:5], s[2:3] |
| ; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64 |
| ; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
| ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: v_cmp_u_f64_e32 vcc, v[3:4], v[3:4] |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 |
| ; GCN-NEXT: s_endpgm |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 |
| %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid |
| %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid |
| %val = load double, ptr addrspace(1) %gep.in |
| %cmp = fcmp uno double %val, 2.0 |
| %ext = sext i1 %cmp to i32 |
| store i32 %ext, ptr addrspace(1) %gep.out |
| ret void |
| } |
| |
| |
| ; FIXME: Should be able to fold this frameindex |
| ; Without commuting the frame index in the pre-regalloc run of |
| ; SIShrinkInstructions, this was using the VOP3 compare. |
| |
| define amdgpu_kernel void @commute_frameindex(ptr addrspace(1) nocapture %out) #0 { |
| ; GCN-LABEL: commute_frameindex: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 |
| ; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 |
| ; GCN-NEXT: s_mov_b32 s14, -1 |
| ; GCN-NEXT: s_mov_b32 s15, 0xe8f000 |
| ; GCN-NEXT: s_add_u32 s12, s12, s11 |
| ; GCN-NEXT: s_addc_u32 s13, s13, 0 |
| ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; GCN-NEXT: s_mov_b32 s3, 0xf000 |
| ; GCN-NEXT: s_mov_b32 s2, -1 |
| ; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: s_mov_b32 s4, 0 |
| ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 |
| ; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; GCN-NEXT: s_waitcnt vmcnt(0) |
| ; GCN-NEXT: s_endpgm |
| entry: |
| %stack0 = alloca i32, addrspace(5) |
| %ptr0 = load volatile ptr addrspace(5), ptr addrspace(1) poison |
| %eq = icmp eq ptr addrspace(5) %ptr0, %stack0 |
| %ext = zext i1 %eq to i32 |
| store volatile i32 %ext, ptr addrspace(1) %out |
| ret void |
| } |
| |
| attributes #0 = { nounwind readnone } |
| attributes #1 = { nounwind } |