| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefix=GFX78 %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX78 %s |
| |
| define double @v_floor_f64_ieee(double %x) { |
| ; GFX6-LABEL: v_floor_f64_ieee: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] |
| ; GFX6-NEXT: s_mov_b32 s4, -1 |
| ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff |
| ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] |
| ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] |
| ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc |
| ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc |
| ; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX78-LABEL: v_floor_f64_ieee: |
| ; GFX78: ; %bb.0: |
| ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1] |
| ; GFX78-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.floor.f64(double %x) |
| ret double %result |
| } |
| |
| define double @v_floor_f64_ieee_nnan(double %x) { |
| ; GFX6-LABEL: v_floor_f64_ieee_nnan: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] |
| ; GFX6-NEXT: s_mov_b32 s4, -1 |
| ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff |
| ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] |
| ; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX78-LABEL: v_floor_f64_ieee_nnan: |
| ; GFX78: ; %bb.0: |
| ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1] |
| ; GFX78-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan double @llvm.floor.f64(double %x) |
| ret double %result |
| } |
| |
| define double @v_floor_f64_ieee_fneg(double %x) { |
| ; GFX6-LABEL: v_floor_f64_ieee_fneg: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1] |
| ; GFX6-NEXT: s_mov_b32 s4, -1 |
| ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff |
| ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] |
| ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] |
| ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc |
| ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc |
| ; GFX6-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3] |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX78-LABEL: v_floor_f64_ieee_fneg: |
| ; GFX78: ; %bb.0: |
| ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX78-NEXT: v_floor_f64_e64 v[0:1], -v[0:1] |
| ; GFX78-NEXT: s_setpc_b64 s[30:31] |
| %neg.x = fneg double %x |
| %result = call double @llvm.floor.f64(double %neg.x) |
| ret double %result |
| } |
| |
| define double @v_floor_f64_nonieee(double %x) #1 { |
| ; GFX6-LABEL: v_floor_f64_nonieee: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] |
| ; GFX6-NEXT: s_mov_b32 s4, -1 |
| ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff |
| ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] |
| ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] |
| ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc |
| ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc |
| ; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX78-LABEL: v_floor_f64_nonieee: |
| ; GFX78: ; %bb.0: |
| ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1] |
| ; GFX78-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.floor.f64(double %x) |
| ret double %result |
| } |
| |
| define double @v_floor_f64_nonieee_nnan(double %x) #1 { |
| ; GFX6-LABEL: v_floor_f64_nonieee_nnan: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] |
| ; GFX6-NEXT: s_mov_b32 s4, -1 |
| ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff |
| ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] |
| ; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX78-LABEL: v_floor_f64_nonieee_nnan: |
| ; GFX78: ; %bb.0: |
| ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1] |
| ; GFX78-NEXT: s_setpc_b64 s[30:31] |
| %result = call nnan double @llvm.floor.f64(double %x) |
| ret double %result |
| } |
| |
| define double @v_floor_f64_non_ieee_fneg(double %x) #1 { |
| ; GFX6-LABEL: v_floor_f64_non_ieee_fneg: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1] |
| ; GFX6-NEXT: s_mov_b32 s4, -1 |
| ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff |
| ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] |
| ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] |
| ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc |
| ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc |
| ; GFX6-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3] |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX78-LABEL: v_floor_f64_non_ieee_fneg: |
| ; GFX78: ; %bb.0: |
| ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX78-NEXT: v_floor_f64_e64 v[0:1], -v[0:1] |
| ; GFX78-NEXT: s_setpc_b64 s[30:31] |
| %neg.x = fneg double %x |
| %result = call double @llvm.floor.f64(double %neg.x) |
| ret double %result |
| } |
| |
| define double @v_floor_f64_fabs(double %x) { |
| ; GFX6-LABEL: v_floor_f64_fabs: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: v_fract_f64_e64 v[2:3], |v[0:1]| |
| ; GFX6-NEXT: s_mov_b32 s4, -1 |
| ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff |
| ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] |
| ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] |
| ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc |
| ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc |
| ; GFX6-NEXT: v_add_f64 v[0:1], |v[0:1]|, -v[2:3] |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX78-LABEL: v_floor_f64_fabs: |
| ; GFX78: ; %bb.0: |
| ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX78-NEXT: v_floor_f64_e64 v[0:1], |v[0:1]| |
| ; GFX78-NEXT: s_setpc_b64 s[30:31] |
| %abs.x = call double @llvm.fabs.f64(double %x) |
| %result = call double @llvm.floor.f64(double %abs.x) |
| ret double %result |
| } |
| |
| define double @v_floor_f64_fneg_fabs(double %x) { |
| ; GFX6-LABEL: v_floor_f64_fneg_fabs: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: v_fract_f64_e64 v[2:3], -|v[0:1]| |
| ; GFX6-NEXT: s_mov_b32 s4, -1 |
| ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff |
| ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] |
| ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] |
| ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc |
| ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc |
| ; GFX6-NEXT: v_add_f64 v[0:1], -|v[0:1]|, -v[2:3] |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX78-LABEL: v_floor_f64_fneg_fabs: |
| ; GFX78: ; %bb.0: |
| ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX78-NEXT: v_floor_f64_e64 v[0:1], -|v[0:1]| |
| ; GFX78-NEXT: s_setpc_b64 s[30:31] |
| %abs.x = call double @llvm.fabs.f64(double %x) |
| %neg.abs.x = fneg double %abs.x |
| %result = call double @llvm.floor.f64(double %neg.abs.x) |
| ret double %result |
| } |
| |
| define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) { |
| ; GFX6-LABEL: s_floor_f64: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: v_fract_f64_e32 v[0:1], s[2:3] |
| ; GFX6-NEXT: s_mov_b32 s0, -1 |
| ; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff |
| ; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1] |
| ; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] |
| ; GFX6-NEXT: v_mov_b32_e32 v2, s2 |
| ; GFX6-NEXT: v_mov_b32_e32 v3, s3 |
| ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc |
| ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX6-NEXT: v_add_f64 v[0:1], s[2:3], -v[0:1] |
| ; GFX6-NEXT: ; return to shader part epilog |
| ; |
| ; GFX78-LABEL: s_floor_f64: |
| ; GFX78: ; %bb.0: |
| ; GFX78-NEXT: v_floor_f64_e32 v[0:1], s[2:3] |
| ; GFX78-NEXT: ; return to shader part epilog |
| %result = call double @llvm.floor.f64(double %x) |
| %cast = bitcast double %result to <2 x float> |
| ret <2 x float> %cast |
| } |
| |
| define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) { |
| ; GFX6-LABEL: s_floor_f64_fneg: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: v_fract_f64_e64 v[0:1], -s[2:3] |
| ; GFX6-NEXT: s_mov_b32 s0, -1 |
| ; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff |
| ; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1] |
| ; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] |
| ; GFX6-NEXT: v_mov_b32_e32 v2, s2 |
| ; GFX6-NEXT: v_mov_b32_e32 v3, s3 |
| ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc |
| ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX6-NEXT: v_add_f64 v[0:1], -s[2:3], -v[0:1] |
| ; GFX6-NEXT: ; return to shader part epilog |
| ; |
| ; GFX78-LABEL: s_floor_f64_fneg: |
| ; GFX78: ; %bb.0: |
| ; GFX78-NEXT: v_floor_f64_e64 v[0:1], -s[2:3] |
| ; GFX78-NEXT: ; return to shader part epilog |
| %neg.x = fneg double %x |
| %result = call double @llvm.floor.f64(double %neg.x) |
| %cast = bitcast double %result to <2 x float> |
| ret <2 x float> %cast |
| } |
| |
| define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) { |
| ; GFX6-LABEL: s_floor_f64_fabs: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: v_fract_f64_e64 v[0:1], |s[2:3]| |
| ; GFX6-NEXT: s_mov_b32 s0, -1 |
| ; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff |
| ; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1] |
| ; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] |
| ; GFX6-NEXT: v_mov_b32_e32 v2, s2 |
| ; GFX6-NEXT: v_mov_b32_e32 v3, s3 |
| ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc |
| ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX6-NEXT: v_add_f64 v[0:1], |s[2:3]|, -v[0:1] |
| ; GFX6-NEXT: ; return to shader part epilog |
| ; |
| ; GFX78-LABEL: s_floor_f64_fabs: |
| ; GFX78: ; %bb.0: |
| ; GFX78-NEXT: v_floor_f64_e64 v[0:1], |s[2:3]| |
| ; GFX78-NEXT: ; return to shader part epilog |
| %abs.x = call double @llvm.fabs.f64(double %x) |
| %result = call double @llvm.floor.f64(double %abs.x) |
| %cast = bitcast double %result to <2 x float> |
| ret <2 x float> %cast |
| } |
| |
| define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) { |
| ; GFX6-LABEL: s_floor_f64_fneg_fabs: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: v_fract_f64_e64 v[0:1], -|s[2:3]| |
| ; GFX6-NEXT: s_mov_b32 s0, -1 |
| ; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff |
| ; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1] |
| ; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] |
| ; GFX6-NEXT: v_mov_b32_e32 v2, s2 |
| ; GFX6-NEXT: v_mov_b32_e32 v3, s3 |
| ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc |
| ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc |
| ; GFX6-NEXT: v_add_f64 v[0:1], -|s[2:3]|, -v[0:1] |
| ; GFX6-NEXT: ; return to shader part epilog |
| ; |
| ; GFX78-LABEL: s_floor_f64_fneg_fabs: |
| ; GFX78: ; %bb.0: |
| ; GFX78-NEXT: v_floor_f64_e64 v[0:1], -|s[2:3]| |
| ; GFX78-NEXT: ; return to shader part epilog |
| %abs.x = call double @llvm.fabs.f64(double %x) |
| %neg.abs.x = fneg double %abs.x |
| %result = call double @llvm.floor.f64(double %neg.abs.x) |
| %cast = bitcast double %result to <2 x float> |
| ret <2 x float> %cast |
| } |
| |
| declare double @llvm.floor.f64(double) #0 |
| declare double @llvm.fabs.f64(double) #0 |
| |
| attributes #0 = { nounwind readnone speculatable willreturn } |
| attributes #1 = { "amdgpu-ieee"="false" } |