| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-fold-operands -mattr=+real-true16 -o - %s | FileCheck %s |
| |
| --- |
| name: fold_16bit_subreg_1 |
| tracksRegLiveness: true |
| registers: |
| body: | |
| bb.0.entry: |
| ; CHECK-LABEL: name: fold_16bit_subreg_1 |
| ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[V_CMP_EQ_F16_t16_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_EQ_F16_t16_e64 0, killed [[DEF1]], 2, [[DEF]].sub1, 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_t16_e64_]] |
| %0:sreg_64_xexec = IMPLICIT_DEF |
| %1:sgpr_lo16 = COPY %0.sub1_lo16:sreg_64_xexec |
| %2:vgpr_16 = COPY %1:sgpr_lo16 |
| %3:vgpr_16 = IMPLICIT_DEF |
| %4:sreg_32 = nofpexcept V_CMP_EQ_F16_t16_e64 0, killed %3:vgpr_16, 2, killed %2:vgpr_16, 0, 0, implicit $mode, implicit $exec |
| S_ENDPGM 0, implicit %4 |
| ... |
| |
| --- |
| name: fold_16bit_subreg_0 |
| tracksRegLiveness: true |
| registers: |
| body: | |
| bb.0.entry: |
| ; CHECK-LABEL: name: fold_16bit_subreg_0 |
| ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[V_CMP_EQ_F16_t16_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_EQ_F16_t16_e64 0, killed [[DEF1]], 2, [[DEF]].sub0, 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_t16_e64_]] |
| %0:sreg_64_xexec = IMPLICIT_DEF |
| %1:sgpr_lo16 = COPY %0.lo16:sreg_64_xexec |
| %2:vgpr_16 = COPY %1:sgpr_lo16 |
| %3:vgpr_16 = IMPLICIT_DEF |
| %4:sreg_32 = nofpexcept V_CMP_EQ_F16_t16_e64 0, killed %3:vgpr_16, 2, killed %2:vgpr_16, 0, 0, implicit $mode, implicit $exec |
| S_ENDPGM 0, implicit %4 |
| ... |
| |
| --- |
| name: sgpr_lo16 |
| tracksRegLiveness: true |
| registers: |
| body: | |
| bb.0.entry: |
| ; CHECK-LABEL: name: sgpr_lo16 |
| ; CHECK: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[V_ALIGNBIT_B32_t16_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, [[DEF]], 0, killed [[DEF1]], 0, 30, 0, 0, implicit $exec |
| ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ALIGNBIT_B32_t16_e64_]] |
| %0:sreg_32 = IMPLICIT_DEF |
| %1:sreg_32 = IMPLICIT_DEF |
| %2:sreg_32 = S_MOV_B32 30 |
| %3:sgpr_lo16 = COPY %2.lo16:sreg_32 |
| %4:vgpr_16 = COPY %3:sgpr_lo16 |
| %5:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, %0:sreg_32, 0, killed %1:sreg_32, 0, killed %4:vgpr_16, 0, 0, implicit $exec |
| S_ENDPGM 0, implicit %5 |
| |
| --- |
| name: fold_16bit_madmix_clamp |
| tracksRegLiveness: true |
| registers: |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; CHECK-LABEL: name: fold_16bit_madmix_clamp |
| ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]] |
| ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]] |
| ; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]] |
| ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0 |
| %0:vgpr_32 = COPY $vgpr2 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = COPY $vgpr0 |
| %3:sreg_32 = IMPLICIT_DEF |
| %4:vgpr_32 = COPY %3 |
| %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec |
| %6:vgpr_16 = COPY %5 |
| %7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec |
| $vgpr0 = COPY %7 |
| S_ENDPGM 0, implicit $vgpr0 |
| ... |
| |
| --- |
| name: fold_16bit_subreg_1_clamp |
| tracksRegLiveness: true |
| registers: |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; CHECK-LABEL: name: fold_16bit_subreg_1_clamp |
| ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]] |
| ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]] |
| ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0 |
| %0:vgpr_32 = COPY $vgpr2 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = COPY $vgpr0 |
| %3:sreg_32 = IMPLICIT_DEF |
| %4:vgpr_32 = COPY %3 |
| %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec |
| %6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %5.lo16, 0, %5.lo16, -1, 0, 0, implicit $mode, implicit $exec |
| $vgpr0 = COPY %6 |
| S_ENDPGM 0, implicit $vgpr0 |
| ... |
| |
| --- |
| name: fold_16bit_subreg_2_clamp |
| tracksRegLiveness: true |
| registers: |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; CHECK-LABEL: name: fold_16bit_subreg_2_clamp |
| ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]] |
| ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]] |
| ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0 |
| %0:vgpr_32 = COPY $vgpr2 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = COPY $vgpr0 |
| %3:sreg_32 = IMPLICIT_DEF |
| %4:vgpr_32 = COPY %3 |
| %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec |
| %6:vgpr_16 = COPY %5.lo16 |
| %7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec |
| $vgpr0 = COPY %7 |
| S_ENDPGM 0, implicit $vgpr0 |
| ... |
| |
| --- |
| name: fold_16bit_phyreg_clamp |
| tracksRegLiveness: true |
| registers: |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; CHECK-LABEL: name: fold_16bit_phyreg_clamp |
| ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]] |
| ; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]] |
| ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]] |
| ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0 |
| %0:vgpr_32 = COPY $vgpr2 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = COPY $vgpr0 |
| %3:sreg_32 = IMPLICIT_DEF |
| %4:vgpr_32 = COPY %3 |
| %5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec |
| $vgpr10_lo16 = COPY %5 |
| %6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec |
| $vgpr0 = COPY %6 |
| S_ENDPGM 0, implicit $vgpr0 |
| ... |
| |
| --- |
| name: fold_16bit_undef_clamp |
| tracksRegLiveness: true |
| registers: |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; CHECK-LABEL: name: fold_16bit_undef_clamp |
| ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF |
| ; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[DEF]], 0, [[DEF]], -1, 0, 0, implicit $mode, implicit $exec |
| ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]] |
| ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0 |
| %0:vgpr_32 = COPY $vgpr2 |
| %1:vgpr_32 = COPY $vgpr1 |
| %2:vgpr_32 = COPY $vgpr0 |
| %3:vgpr_16 = IMPLICIT_DEF |
| %4:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %3, 0, %3, -1, 0, 0, implicit $mode, implicit $exec |
| $vgpr0 = COPY %4 |
| S_ENDPGM 0, implicit $vgpr0 |
| ... |