blob: 93cc12f152ccaf84ce70b991a6620d4b7adf4b10 [file] [log] [blame] [edit]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-fold-operands -mattr=+real-true16 -o - %s | FileCheck %s
---
name: fold_16bit_subreg_1
tracksRegLiveness: true
registers:
body: |
bb.0.entry:
; CHECK-LABEL: name: fold_16bit_subreg_1
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
; CHECK-NEXT: [[V_CMP_EQ_F16_t16_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_EQ_F16_t16_e64 0, killed [[DEF1]], 2, [[DEF]].sub1, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_t16_e64_]]
%0:sreg_64_xexec = IMPLICIT_DEF
%1:sgpr_lo16 = COPY %0.sub1_lo16:sreg_64_xexec
%2:vgpr_16 = COPY %1:sgpr_lo16
%3:vgpr_16 = IMPLICIT_DEF
%4:sreg_32 = nofpexcept V_CMP_EQ_F16_t16_e64 0, killed %3:vgpr_16, 2, killed %2:vgpr_16, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0, implicit %4
...
---
name: fold_16bit_subreg_0
tracksRegLiveness: true
registers:
body: |
bb.0.entry:
; CHECK-LABEL: name: fold_16bit_subreg_0
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
; CHECK-NEXT: [[V_CMP_EQ_F16_t16_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_EQ_F16_t16_e64 0, killed [[DEF1]], 2, [[DEF]].sub0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_t16_e64_]]
%0:sreg_64_xexec = IMPLICIT_DEF
%1:sgpr_lo16 = COPY %0.lo16:sreg_64_xexec
%2:vgpr_16 = COPY %1:sgpr_lo16
%3:vgpr_16 = IMPLICIT_DEF
%4:sreg_32 = nofpexcept V_CMP_EQ_F16_t16_e64 0, killed %3:vgpr_16, 2, killed %2:vgpr_16, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0, implicit %4
...
---
name: sgpr_lo16
tracksRegLiveness: true
registers:
body: |
bb.0.entry:
; CHECK-LABEL: name: sgpr_lo16
; CHECK: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: [[V_ALIGNBIT_B32_t16_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, [[DEF]], 0, killed [[DEF1]], 0, 30, 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_t16_e64_]]
%0:sreg_32 = IMPLICIT_DEF
%1:sreg_32 = IMPLICIT_DEF
%2:sreg_32 = S_MOV_B32 30
%3:sgpr_lo16 = COPY %2.lo16:sreg_32
%4:vgpr_16 = COPY %3:sgpr_lo16
%5:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, %0:sreg_32, 0, killed %1:sreg_32, 0, killed %4:vgpr_16, 0, 0, implicit $exec
S_ENDPGM 0, implicit %5
---
name: fold_16bit_madmix_clamp
tracksRegLiveness: true
registers:
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: fold_16bit_madmix_clamp
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
%0:vgpr_32 = COPY $vgpr2
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr0
%3:sreg_32 = IMPLICIT_DEF
%4:vgpr_32 = COPY %3
%5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
%6:vgpr_16 = COPY %5
%7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
$vgpr0 = COPY %7
S_ENDPGM 0, implicit $vgpr0
...
---
name: fold_16bit_subreg_1_clamp
tracksRegLiveness: true
registers:
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: fold_16bit_subreg_1_clamp
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
%0:vgpr_32 = COPY $vgpr2
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr0
%3:sreg_32 = IMPLICIT_DEF
%4:vgpr_32 = COPY %3
%5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
%6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %5.lo16, 0, %5.lo16, -1, 0, 0, implicit $mode, implicit $exec
$vgpr0 = COPY %6
S_ENDPGM 0, implicit $vgpr0
...
---
name: fold_16bit_subreg_2_clamp
tracksRegLiveness: true
registers:
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: fold_16bit_subreg_2_clamp
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
%0:vgpr_32 = COPY $vgpr2
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr0
%3:sreg_32 = IMPLICIT_DEF
%4:vgpr_32 = COPY %3
%5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
%6:vgpr_16 = COPY %5.lo16
%7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
$vgpr0 = COPY %7
S_ENDPGM 0, implicit $vgpr0
...
---
name: fold_16bit_phyreg_clamp
tracksRegLiveness: true
registers:
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: fold_16bit_phyreg_clamp
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
%0:vgpr_32 = COPY $vgpr2
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr0
%3:sreg_32 = IMPLICIT_DEF
%4:vgpr_32 = COPY %3
%5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
$vgpr10_lo16 = COPY %5
%6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
$vgpr0 = COPY %6
S_ENDPGM 0, implicit $vgpr0
...
---
name: fold_16bit_undef_clamp
tracksRegLiveness: true
registers:
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: fold_16bit_undef_clamp
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[DEF]], 0, [[DEF]], -1, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
%0:vgpr_32 = COPY $vgpr2
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr0
%3:vgpr_16 = IMPLICIT_DEF
%4:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %3, 0, %3, -1, 0, 0, implicit $mode, implicit $exec
$vgpr0 = COPY %4
S_ENDPGM 0, implicit $vgpr0
...