| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s |
| ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s |
| |
| ; FIXME: Merge into imm.ll |
| |
| define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) { |
| ; VI-LABEL: store_inline_imm_neg_0.0_i16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0xffff8000 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_inline_imm_neg_0.0_i16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0x8000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store volatile i16 -32768, i16 addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_inline_imm_0.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_inline_imm_0.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half 0.0, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_imm_neg_0.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0xffff8000 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_imm_neg_0.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0x8000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half -0.0, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_inline_imm_0.5_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0x3800 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_inline_imm_0.5_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0x3800 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half 0.5, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_inline_imm_m_0.5_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0xffffb800 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_inline_imm_m_0.5_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0xb800 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half -0.5, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_inline_imm_1.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0x3c00 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_inline_imm_1.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0x3c00 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half 1.0, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_inline_imm_m_1.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0xffffbc00 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_inline_imm_m_1.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0xbc00 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half -1.0, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_inline_imm_2.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0x4000 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_inline_imm_2.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0x4000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half 2.0, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_inline_imm_m_2.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0xffffc000 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_inline_imm_m_2.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0xc000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half -2.0, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_inline_imm_4.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0x4400 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_inline_imm_4.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0x4400 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half 4.0, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_inline_imm_m_4.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0xffffc400 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_inline_imm_m_4.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0xc400 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half -4.0, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_inline_imm_inv_2pi_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0x3118 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_inline_imm_inv_2pi_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0x3118 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half 0xH3118, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_inline_imm_m_inv_2pi_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0xffffb118 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_inline_imm_m_inv_2pi_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0xb118 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half 0xHB118, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) { |
| ; VI-LABEL: store_literal_imm_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: v_mov_b32_e32 v0, 0x6c00 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: store_literal_imm_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0x6c00 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| store half 4096.0, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_0.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, 0 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_0.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, 0, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, 0.0 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_0.5_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, 0.5 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_0.5_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, 0.5, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, 0.5 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_neg_0.5_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, -0.5 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_neg_0.5_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, -0.5, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, -0.5 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_1.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, 1.0 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_1.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, 1.0, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, 1.0 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_neg_1.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, -1.0 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_neg_1.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, -1.0, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, -1.0 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_2.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, 2.0 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_2.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, 2.0, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, 2.0 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_neg_2.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, -2.0 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_neg_2.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, -2.0, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, -2.0 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_4.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, 4.0 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_4.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, 4.0, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, 4.0 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_neg_4.0_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, -4.0 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_neg_4.0_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, -4.0, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, -4.0 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) { |
| ; VI-LABEL: commute_add_inline_imm_0.5_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_mov_b32 s0, s4 |
| ; VI-NEXT: s_mov_b32 s1, s5 |
| ; VI-NEXT: s_mov_b32 s4, s6 |
| ; VI-NEXT: s_mov_b32 s5, s7 |
| ; VI-NEXT: s_mov_b32 s6, s2 |
| ; VI-NEXT: s_mov_b32 s7, s3 |
| ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: v_add_f16_e32 v0, 0.5, v0 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: commute_add_inline_imm_0.5_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_mov_b32 s10, s2 |
| ; SI-NEXT: s_mov_b32 s11, s3 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s8, s6 |
| ; SI-NEXT: s_mov_b32 s9, s7 |
| ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 |
| ; SI-NEXT: s_mov_b32 s0, s4 |
| ; SI-NEXT: s_mov_b32 s1, s5 |
| ; SI-NEXT: s_waitcnt vmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-NEXT: v_add_f32_e32 v0, 0.5, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %x = load half, half addrspace(1)* %in |
| %y = fadd half %x, 0.5 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) { |
| ; VI-LABEL: commute_add_literal_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_mov_b32 s0, s4 |
| ; VI-NEXT: s_mov_b32 s1, s5 |
| ; VI-NEXT: s_mov_b32 s4, s6 |
| ; VI-NEXT: s_mov_b32 s5, s7 |
| ; VI-NEXT: s_mov_b32 s6, s2 |
| ; VI-NEXT: s_mov_b32 s7, s3 |
| ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: v_add_f16_e32 v0, 0x6400, v0 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: commute_add_literal_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_mov_b32 s10, s2 |
| ; SI-NEXT: s_mov_b32 s11, s3 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s8, s6 |
| ; SI-NEXT: s_mov_b32 s9, s7 |
| ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 |
| ; SI-NEXT: s_mov_b32 s0, s4 |
| ; SI-NEXT: s_mov_b32 s1, s5 |
| ; SI-NEXT: s_waitcnt vmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-NEXT: v_add_f32_e32 v0, 0x44800000, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %x = load half, half addrspace(1)* %in |
| %y = fadd half %x, 1024.0 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_1_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, 1 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_1_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, 0x33800000, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, 0xH0001 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_2_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, 2 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_2_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, 0x34000000, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, 0xH0002 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_16_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, 16 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_16_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, 0x35800000, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, 0xH0010 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { |
| ; VI-LABEL: add_inline_imm_neg_1_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_mov_b32 s0, s4 |
| ; VI-NEXT: s_mov_b32 s1, s5 |
| ; VI-NEXT: s_mov_b32 s4, s6 |
| ; VI-NEXT: s_mov_b32 s5, s7 |
| ; VI-NEXT: s_mov_b32 s6, s2 |
| ; VI-NEXT: s_mov_b32 s7, s3 |
| ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: v_add_u16_e32 v0, -1, v0 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_neg_1_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s0, s4 |
| ; SI-NEXT: s_mov_b32 s1, s5 |
| ; SI-NEXT: s_mov_b32 s4, s6 |
| ; SI-NEXT: s_mov_b32 s5, s7 |
| ; SI-NEXT: s_mov_b32 s6, s2 |
| ; SI-NEXT: s_mov_b32 s7, s3 |
| ; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 |
| ; SI-NEXT: s_waitcnt vmcnt(0) |
| ; SI-NEXT: v_add_i32_e32 v0, vcc, -1, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %x = load i16, i16 addrspace(1)* %in |
| %y = add i16 %x, -1 |
| %ybc = bitcast i16 %y to half |
| store half %ybc, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { |
| ; VI-LABEL: add_inline_imm_neg_2_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_mov_b32 s0, s4 |
| ; VI-NEXT: s_mov_b32 s1, s5 |
| ; VI-NEXT: s_mov_b32 s4, s6 |
| ; VI-NEXT: s_mov_b32 s5, s7 |
| ; VI-NEXT: s_mov_b32 s6, s2 |
| ; VI-NEXT: s_mov_b32 s7, s3 |
| ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: v_add_u16_e32 v0, -2, v0 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_neg_2_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s0, s4 |
| ; SI-NEXT: s_mov_b32 s1, s5 |
| ; SI-NEXT: s_mov_b32 s4, s6 |
| ; SI-NEXT: s_mov_b32 s5, s7 |
| ; SI-NEXT: s_mov_b32 s6, s2 |
| ; SI-NEXT: s_mov_b32 s7, s3 |
| ; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 |
| ; SI-NEXT: s_waitcnt vmcnt(0) |
| ; SI-NEXT: v_add_i32_e32 v0, vcc, -2, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %x = load i16, i16 addrspace(1)* %in |
| %y = add i16 %x, -2 |
| %ybc = bitcast i16 %y to half |
| store half %ybc, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { |
| ; VI-LABEL: add_inline_imm_neg_16_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_mov_b32 s0, s4 |
| ; VI-NEXT: s_mov_b32 s1, s5 |
| ; VI-NEXT: s_mov_b32 s4, s6 |
| ; VI-NEXT: s_mov_b32 s5, s7 |
| ; VI-NEXT: s_mov_b32 s6, s2 |
| ; VI-NEXT: s_mov_b32 s7, s3 |
| ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: v_add_u16_e32 v0, -16, v0 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_neg_16_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s0, s4 |
| ; SI-NEXT: s_mov_b32 s1, s5 |
| ; SI-NEXT: s_mov_b32 s4, s6 |
| ; SI-NEXT: s_mov_b32 s5, s7 |
| ; SI-NEXT: s_mov_b32 s6, s2 |
| ; SI-NEXT: s_mov_b32 s7, s3 |
| ; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 |
| ; SI-NEXT: s_waitcnt vmcnt(0) |
| ; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %x = load i16, i16 addrspace(1)* %in |
| %y = add i16 %x, -16 |
| %ybc = bitcast i16 %y to half |
| store half %ybc, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_63_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, 63 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_63_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, 0x367c0000, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, 0xH003F |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) { |
| ; VI-LABEL: add_inline_imm_64_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| ; VI-NEXT: s_load_dword s4, s[4:5], 0x8 |
| ; VI-NEXT: s_mov_b32 s3, 0x1100f000 |
| ; VI-NEXT: s_mov_b32 s2, -1 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e64 v0, s4, 64 |
| ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; SI-LABEL: add_inline_imm_64_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_add_f32_e32 v0, 0x36800000, v0 |
| ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| %y = fadd half %x, 0xH0040 |
| store half %y, half addrspace(1)* %out |
| ret void |
| } |