| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-REAL16 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-FAKE16 %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-REAL16 %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-FAKE16 %s |
| |
| declare half @llvm.amdgcn.cvt.f16.bf8(i32, i32) |
| declare half @llvm.amdgcn.cvt.f16.fp8(i32, i32) |
| declare <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16) |
| declare <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16) |
| |
| define amdgpu_ps float @test_cvt_f16_bf8_byte0(i32 %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte0: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e32 v0.l, v0 |
| ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte0: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e32 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte0: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e32 v0.l, v0 |
| ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte0: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e32 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 0) |
| %ret = fpext half %cvt to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_f16_bf8_byte1(i32 %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte1: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:1 |
| ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte1: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:1 |
| ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte1: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:1 |
| ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte1: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:1 |
| ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 1) |
| %ret = fpext half %cvt to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_f16_bf8_byte2(i32 %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte2: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:2 |
| ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte2: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:2 |
| ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte2: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:2 |
| ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte2: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:2 |
| ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 2) |
| %ret = fpext half %cvt to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_f16_bf8_byte3(i32 %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte3: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3 |
| ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte3: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3 |
| ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte3: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3 |
| ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte3: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3 |
| ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 3) |
| %ret = fpext half %cvt to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_f16_bf8_byte3_hi(i32 %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte3_hi: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.h, v0 byte_sel:3 |
| ; GFX1250-SDAG-REAL16-NEXT: v_mov_b16_e32 v0.l, 0 |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte3_hi: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3 |
| ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, 0, 0x5040100 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte3_hi: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3 |
| ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-REAL16-NEXT: v_lshl_or_b32 v0, v0, 16, 0 |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte3_hi: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3 |
| ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, 0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 3) |
| %ins.0 = insertelement <2 x half> undef, half 0.0, i32 0 |
| %ins.1 = insertelement <2 x half> %ins.0, half %cvt, i32 1 |
| %ret = bitcast <2 x half> %ins.1 to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_f16_fp8_byte0(i32 %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_fp8_byte0: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_fp8_e32 v0.l, v0 |
| ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_fp8_byte0: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_fp8_e32 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_fp8_byte0: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_fp8_e32 v0.l, v0 |
| ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_fp8_byte0: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_fp8_e32 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %cvt = tail call half @llvm.amdgcn.cvt.f16.fp8(i32 %a, i32 0) |
| %ret = fpext half %cvt to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_f16_fp8_byte1(i32 %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_fp8_byte1: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_fp8_e64 v0.l, v0 byte_sel:1 |
| ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_fp8_byte1: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_fp8_e64 v0, v0 byte_sel:1 |
| ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_fp8_byte1: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_fp8_e64 v0.l, v0 byte_sel:1 |
| ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_fp8_byte1: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_fp8_e64 v0, v0 byte_sel:1 |
| ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %cvt = tail call half @llvm.amdgcn.cvt.f16.fp8(i32 %a, i32 1) |
| %ret = fpext half %cvt to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_f16_fp8_byte2(i32 %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_fp8_byte2: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_fp8_e64 v0.l, v0 byte_sel:2 |
| ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_fp8_byte2: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_fp8_e64 v0, v0 byte_sel:2 |
| ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_fp8_byte2: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_fp8_e64 v0.l, v0 byte_sel:2 |
| ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_fp8_byte2: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_fp8_e64 v0, v0 byte_sel:2 |
| ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %cvt = tail call half @llvm.amdgcn.cvt.f16.fp8(i32 %a, i32 2) |
| %ret = fpext half %cvt to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_f16_fp8_byte3(i32 %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_fp8_byte3: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_fp8_e64 v0.l, v0 byte_sel:3 |
| ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_fp8_byte3: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_fp8_e64 v0, v0 byte_sel:3 |
| ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_fp8_byte3: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_fp8_e64 v0.l, v0 byte_sel:3 |
| ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_fp8_byte3: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_fp8_e64 v0, v0 byte_sel:3 |
| ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %cvt = tail call half @llvm.amdgcn.cvt.f16.fp8(i32 %a, i32 3) |
| %ret = fpext half %cvt to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_f16_fp8_byte3_hi(i32 %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_fp8_byte3_hi: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_fp8_e64 v0.h, v0 byte_sel:3 |
| ; GFX1250-SDAG-REAL16-NEXT: v_mov_b16_e32 v0.l, 0 |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_fp8_byte3_hi: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_fp8_e64 v0, v0 byte_sel:3 |
| ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, 0, 0x5040100 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_fp8_byte3_hi: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_fp8_e64 v0.l, v0 byte_sel:3 |
| ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-REAL16-NEXT: v_lshl_or_b32 v0, v0, 16, 0 |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_fp8_byte3_hi: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_fp8_e64 v0, v0 byte_sel:3 |
| ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, 0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %cvt = tail call half @llvm.amdgcn.cvt.f16.fp8(i32 %a, i32 3) |
| %ins.0 = insertelement <2 x half> undef, half 0.0, i32 0 |
| %ins.1 = insertelement <2 x half> %ins.0, half %cvt, i32 1 |
| %ret = bitcast <2 x half> %ins.1 to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_pk_f16_bf8_v(i16 %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_bf8_v: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_bf8 v0, v0.l |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_bf8_v: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_bf8 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_bf8_v: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_bf8 v0, v0.l |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_bf8_v: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_bf8 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 %a) |
| %ret = bitcast <2 x half> %cvt to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_pk_f16_bf8_s(i16 inreg %a) { |
| ; GFX1250-LABEL: test_cvt_pk_f16_bf8_s: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: v_cvt_pk_f16_bf8 v0, s0 |
| ; GFX1250-NEXT: ; return to shader part epilog |
| %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 %a) |
| %ret = bitcast <2 x half> %cvt to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_pk_f16_fp8_v(i16 %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_fp8_v: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_fp8_v: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_fp8_v: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a) |
| %ret = bitcast <2 x half> %cvt to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_pk_f16_fp8_s(i16 inreg %a) { |
| ; GFX1250-LABEL: test_cvt_pk_f16_fp8_s: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: v_cvt_pk_f16_fp8 v0, s0 |
| ; GFX1250-NEXT: ; return to shader part epilog |
| %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a) |
| %ret = bitcast <2 x half> %cvt to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @test_cvt_pk_f16_fp8_v_hi(<2 x i16> %a) { |
| ; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v_hi: |
| ; GFX1250-SDAG-REAL16: ; %bb.0: |
| ; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.h |
| ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_fp8_v_hi: |
| ; GFX1250-SDAG-FAKE16: ; %bb.0: |
| ; GFX1250-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0 |
| ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_fp8_v_hi: |
| ; GFX1250-GISEL-REAL16: ; %bb.0: |
| ; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l |
| ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog |
| ; |
| ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_fp8_v_hi: |
| ; GFX1250-GISEL-FAKE16: ; %bb.0: |
| ; GFX1250-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0 |
| ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog |
| %a.1 = extractelement <2 x i16> %a, i32 1 |
| %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a.1) |
| %ret = bitcast <2 x half> %cvt to float |
| ret float %ret |
| } |