llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll - third_party/github.com/llvm/llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=regbankselect -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=regbankselect -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s

 ; Make sure we don't violate the constant bus restriction
 ; FIXME: Make this test isa output when div.fmas works.


 define amdgpu_ps float @fmul_s_s(float inreg %src0, float inreg %src1) {
   ; GFX9-LABEL: name: fmul_s_s
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2, $sgpr3
   ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX9:   [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]]
   ; GFX9:   $vgpr0 = COPY [[FMUL]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: fmul_s_s
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2, $sgpr3
   ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX10:   [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]]
   ; GFX10:   $vgpr0 = COPY [[FMUL]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %result = fmul float %src0, %src1
   ret float %result
 }

 define amdgpu_ps float @fmul_ss(float inreg %src) {
   ; GFX9-LABEL: name: fmul_ss
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2
   ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY1]], [[COPY2]]
   ; GFX9:   $vgpr0 = COPY [[FMUL]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: fmul_ss
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2
   ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY1]], [[COPY2]]
   ; GFX10:   $vgpr0 = COPY [[FMUL]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %result = fmul float %src, %src
   ret float %result
 }

 ; Ternary operation with 3 different SGPRs
 define amdgpu_ps float @fma_s_s_s(float inreg %src0, float inreg %src1, float inreg %src2) {
   ; GFX9-LABEL: name: fma_s_s_s
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4
   ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
   ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX9:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
   ; GFX9:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]]
   ; GFX9:   $vgpr0 = COPY [[FMA]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: fma_s_s_s
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4
   ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
   ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX10:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
   ; GFX10:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]]
   ; GFX10:   $vgpr0 = COPY [[FMA]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %result = call float @llvm.fma.f32(float %src0, float %src1, float %src2)
   ret float %result
 }

 ; Ternary operation with 3 identical SGPRs
 define amdgpu_ps float @fma_sss(float inreg %src) {
   ; GFX9-LABEL: name: fma_sss
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2
   ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY1]], [[COPY2]], [[COPY3]]
   ; GFX9:   $vgpr0 = COPY [[FMA]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: fma_sss
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2
   ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY1]], [[COPY2]], [[COPY3]]
   ; GFX10:   $vgpr0 = COPY [[FMA]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %result = call float @llvm.fma.f32(float %src, float %src, float %src)
   ret float %result
 }

 ; src0/1 are same SGPR
 define amdgpu_ps float @fma_ss_s(float inreg %src01, float inreg %src2) {
   ; GFX9-LABEL: name: fma_ss_s
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2, $sgpr3
   ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX9:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
   ; GFX9:   $vgpr0 = COPY [[FMA]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: fma_ss_s
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2, $sgpr3
   ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX10:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
   ; GFX10:   $vgpr0 = COPY [[FMA]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %result = call float @llvm.fma.f32(float %src01, float %src01, float %src2)
   ret float %result
 }

 ; src1/2 are same SGPR
 define amdgpu_ps float @fma_s_ss(float inreg %src0, float inreg %src12) {
   ; GFX9-LABEL: name: fma_s_ss
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2, $sgpr3
   ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX9:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
   ; GFX9:   $vgpr0 = COPY [[FMA]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: fma_s_ss
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2, $sgpr3
   ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX10:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
   ; GFX10:   $vgpr0 = COPY [[FMA]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %result = call float @llvm.fma.f32(float %src0, float %src12, float %src12)
   ret float %result
 }

 ; src0/2 are same SGPR
 define amdgpu_ps float @fma_ss_s_same_outer(float inreg %src02, float inreg %src1) {
   ; GFX9-LABEL: name: fma_ss_s_same_outer
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2, $sgpr3
   ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
   ; GFX9:   $vgpr0 = COPY [[FMA]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: fma_ss_s_same_outer
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2, $sgpr3
   ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
   ; GFX10:   $vgpr0 = COPY [[FMA]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %result = call float @llvm.fma.f32(float %src02, float %src1, float %src02)
   ret float %result
 }

 define amdgpu_ps float @fcmp_s_s(float inreg %src0, float inreg %src1) {
   ; GFX9-LABEL: name: fcmp_s_s
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2, $sgpr3
   ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX9:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]]
   ; GFX9:   [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
   ; GFX9:   [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
   ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
   ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
   ; GFX9:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]]
   ; GFX9:   $vgpr0 = COPY [[SELECT]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: fcmp_s_s
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2, $sgpr3
   ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX10:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]]
   ; GFX10:   [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
   ; GFX10:   [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
   ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
   ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
   ; GFX10:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]]
   ; GFX10:   $vgpr0 = COPY [[SELECT]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %cmp = fcmp oeq float %src0, %src1
   %result = select i1 %cmp, float 1.0, float 0.0
   ret float %result
 }

 define amdgpu_ps float @select_vcc_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) {
   ; GFX9-LABEL: name: select_vcc_s_s
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
   ; GFX9:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
   ; GFX9:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
   ; GFX9:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX9:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
   ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
   ; GFX9:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
   ; GFX9:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
   ; GFX9:   $vgpr0 = COPY [[SELECT]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: select_vcc_s_s
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
   ; GFX10:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
   ; GFX10:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
   ; GFX10:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX10:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
   ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
   ; GFX10:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
   ; GFX10:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
   ; GFX10:   $vgpr0 = COPY [[SELECT]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %cmp = fcmp oeq float %cmp0, %cmp1
   %result = select i1 %cmp, float %src0, float %src1
   ret float %result
 }

 define amdgpu_ps float @select_vcc_fneg_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) {
   ; GFX9-LABEL: name: select_vcc_fneg_s_s
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
   ; GFX9:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
   ; GFX9:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
   ; GFX9:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX9:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
   ; GFX9:   [[FNEG:%[0-9]+]]:sgpr(s32) = G_FNEG [[COPY2]]
   ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[FNEG]](s32)
   ; GFX9:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
   ; GFX9:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
   ; GFX9:   $vgpr0 = COPY [[SELECT]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: select_vcc_fneg_s_s
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
   ; GFX10:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
   ; GFX10:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
   ; GFX10:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX10:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
   ; GFX10:   [[FNEG:%[0-9]+]]:sgpr(s32) = G_FNEG [[COPY2]]
   ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[FNEG]](s32)
   ; GFX10:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
   ; GFX10:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
   ; GFX10:   $vgpr0 = COPY [[SELECT]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %cmp = fcmp oeq float %cmp0, %cmp1
   %neg.src0 = fneg float %src0
   %result = select i1 %cmp, float %neg.src0, float %src1
   ret float %result
 }

 ; Constant bus used by vcc
 define amdgpu_ps float @amdgcn_div_fmas_sss(float inreg %src, float %cmp.src) {
   ; GFX9-LABEL: name: amdgcn_div_fmas_sss
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2, $vgpr0
   ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
   ; GFX9:   [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
   ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
   ; GFX9:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY1]](s32), [[COPY2]]
   ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[FCMP]](s1)
   ; GFX9:   $vgpr0 = COPY [[INT]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: amdgcn_div_fmas_sss
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2, $vgpr0
   ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
   ; GFX10:   [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
   ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
   ; GFX10:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY1]](s32), [[COPY2]]
   ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[FCMP]](s1)
   ; GFX10:   $vgpr0 = COPY [[INT]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %vcc = fcmp oeq float %cmp.src, 0.0
   %result = call float @llvm.amdgcn.div.fmas.f32(float %src, float %src, float %src, i1 %vcc)
   ret float %result
 }

 define amdgpu_ps float @class_s_s(float inreg %src0, i32 inreg %src1) {
   ; GFX9-LABEL: name: class_s_s
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2, $sgpr3
   ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX9:   [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32)
   ; GFX9:   [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
   ; GFX9:   [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
   ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
   ; GFX9:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
   ; GFX9:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]]
   ; GFX9:   $vgpr0 = COPY [[SELECT]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: class_s_s
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2, $sgpr3
   ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX10:   [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32)
   ; GFX10:   [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
   ; GFX10:   [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
   ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
   ; GFX10:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
   ; GFX10:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]]
   ; GFX10:   $vgpr0 = COPY [[SELECT]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %class = call i1 @llvm.amdgcn.class.f32(float %src0, i32 %src1)
   %result = select i1 %class, float 1.0, float 0.0
   ret float %result
 }

 define amdgpu_ps float @div_scale_s_s_true(float inreg %src0, float inreg %src1) {
   ; GFX9-LABEL: name: div_scale_s_s_true
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2, $sgpr3
   ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX9:   [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), -1
   ; GFX9:   $vgpr0 = COPY [[INT]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: div_scale_s_s_true
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2, $sgpr3
   ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX10:   [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), -1
   ; GFX10:   $vgpr0 = COPY [[INT]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 true)
   %result = extractvalue { float, i1 } %div.scale, 0
   ret float %result
 }

 define amdgpu_ps float @div_scale_s_s_false(float inreg %src0, float inreg %src1) {
   ; GFX9-LABEL: name: div_scale_s_s_false
   ; GFX9: bb.1 (%ir-block.0):
   ; GFX9:   liveins: $sgpr2, $sgpr3
   ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX9:   [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0
   ; GFX9:   $vgpr0 = COPY [[INT]](s32)
   ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10-LABEL: name: div_scale_s_s_false
   ; GFX10: bb.1 (%ir-block.0):
   ; GFX10:   liveins: $sgpr2, $sgpr3
   ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
   ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
   ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
   ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
   ; GFX10:   [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0
   ; GFX10:   $vgpr0 = COPY [[INT]](s32)
   ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 false)
   %result = extractvalue { float, i1 } %div.scale, 0
   ret float %result
 }

 declare float @llvm.fma.f32(float, float, float) #0
 declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) #1
 declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1 immarg) #1
 declare i1 @llvm.amdgcn.class.f32(float, i32) #1

 attributes #0 = { nounwind readnone speculatable willreturn }
 attributes #1 = { nounwind readnone speculatable }
	; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
	; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=regbankselect -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GFX9 %s
	; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=regbankselect -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=GFX10 %s

	; Make sure we don't violate the constant bus restriction
	; FIXME: Make this test isa output when div.fmas works.


	define amdgpu_ps float @fmul_s_s(float inreg %src0, float inreg %src1) {
	; GFX9-LABEL: name: fmul_s_s
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2, $sgpr3
	; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX9: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]]
	; GFX9: $vgpr0 = COPY [[FMUL]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: fmul_s_s
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2, $sgpr3
	; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX10: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]]
	; GFX10: $vgpr0 = COPY [[FMUL]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%result = fmul float %src0, %src1
	ret float %result
	}

	define amdgpu_ps float @fmul_ss(float inreg %src) {
	; GFX9-LABEL: name: fmul_ss
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2
	; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY1]], [[COPY2]]
	; GFX9: $vgpr0 = COPY [[FMUL]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: fmul_ss
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2
	; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY1]], [[COPY2]]
	; GFX10: $vgpr0 = COPY [[FMUL]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%result = fmul float %src, %src
	ret float %result
	}

	; Ternary operation with 3 different SGPRs
	define amdgpu_ps float @fma_s_s_s(float inreg %src0, float inreg %src1, float inreg %src2) {
	; GFX9-LABEL: name: fma_s_s_s
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4
	; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX9: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
	; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
	; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]]
	; GFX9: $vgpr0 = COPY [[FMA]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: fma_s_s_s
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4
	; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX10: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
	; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
	; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]]
	; GFX10: $vgpr0 = COPY [[FMA]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%result = call float @llvm.fma.f32(float %src0, float %src1, float %src2)
	ret float %result
	}

	; Ternary operation with 3 identical SGPRs
	define amdgpu_ps float @fma_sss(float inreg %src) {
	; GFX9-LABEL: name: fma_sss
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2
	; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY1]], [[COPY2]], [[COPY3]]
	; GFX9: $vgpr0 = COPY [[FMA]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: fma_sss
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2
	; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY1]], [[COPY2]], [[COPY3]]
	; GFX10: $vgpr0 = COPY [[FMA]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%result = call float @llvm.fma.f32(float %src, float %src, float %src)
	ret float %result
	}

	; src0/1 are same SGPR
	define amdgpu_ps float @fma_ss_s(float inreg %src01, float inreg %src2) {
	; GFX9-LABEL: name: fma_ss_s
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2, $sgpr3
	; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
	; GFX9: $vgpr0 = COPY [[FMA]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: fma_ss_s
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2, $sgpr3
	; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
	; GFX10: $vgpr0 = COPY [[FMA]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%result = call float @llvm.fma.f32(float %src01, float %src01, float %src2)
	ret float %result
	}

	; src1/2 are same SGPR
	define amdgpu_ps float @fma_s_ss(float inreg %src0, float inreg %src12) {
	; GFX9-LABEL: name: fma_s_ss
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2, $sgpr3
	; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
	; GFX9: $vgpr0 = COPY [[FMA]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: fma_s_ss
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2, $sgpr3
	; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
	; GFX10: $vgpr0 = COPY [[FMA]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%result = call float @llvm.fma.f32(float %src0, float %src12, float %src12)
	ret float %result
	}

	; src0/2 are same SGPR
	define amdgpu_ps float @fma_ss_s_same_outer(float inreg %src02, float inreg %src1) {
	; GFX9-LABEL: name: fma_ss_s_same_outer
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2, $sgpr3
	; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
	; GFX9: $vgpr0 = COPY [[FMA]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: fma_ss_s_same_outer
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2, $sgpr3
	; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
	; GFX10: $vgpr0 = COPY [[FMA]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%result = call float @llvm.fma.f32(float %src02, float %src1, float %src02)
	ret float %result
	}

	define amdgpu_ps float @fcmp_s_s(float inreg %src0, float inreg %src1) {
	; GFX9-LABEL: name: fcmp_s_s
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2, $sgpr3
	; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]]
	; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
	; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
	; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
	; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
	; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]]
	; GFX9: $vgpr0 = COPY [[SELECT]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: fcmp_s_s
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2, $sgpr3
	; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]]
	; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
	; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
	; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
	; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
	; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]]
	; GFX10: $vgpr0 = COPY [[SELECT]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%cmp = fcmp oeq float %src0, %src1
	%result = select i1 %cmp, float 1.0, float 0.0
	ret float %result
	}

	define amdgpu_ps float @select_vcc_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) {
	; GFX9-LABEL: name: select_vcc_s_s
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
	; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
	; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
	; GFX9: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
	; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
	; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
	; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
	; GFX9: $vgpr0 = COPY [[SELECT]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: select_vcc_s_s
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
	; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
	; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
	; GFX10: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
	; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
	; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
	; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
	; GFX10: $vgpr0 = COPY [[SELECT]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%cmp = fcmp oeq float %cmp0, %cmp1
	%result = select i1 %cmp, float %src0, float %src1
	ret float %result
	}

	define amdgpu_ps float @select_vcc_fneg_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) {
	; GFX9-LABEL: name: select_vcc_fneg_s_s
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
	; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
	; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
	; GFX9: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
	; GFX9: [[FNEG:%[0-9]+]]:sgpr(s32) = G_FNEG [[COPY2]]
	; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[FNEG]](s32)
	; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
	; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
	; GFX9: $vgpr0 = COPY [[SELECT]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: select_vcc_fneg_s_s
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
	; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
	; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
	; GFX10: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
	; GFX10: [[FNEG:%[0-9]+]]:sgpr(s32) = G_FNEG [[COPY2]]
	; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[FNEG]](s32)
	; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
	; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
	; GFX10: $vgpr0 = COPY [[SELECT]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%cmp = fcmp oeq float %cmp0, %cmp1
	%neg.src0 = fneg float %src0
	%result = select i1 %cmp, float %neg.src0, float %src1
	ret float %result
	}

	; Constant bus used by vcc
	define amdgpu_ps float @amdgcn_div_fmas_sss(float inreg %src, float %cmp.src) {
	; GFX9-LABEL: name: amdgcn_div_fmas_sss
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2, $vgpr0
	; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
	; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
	; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
	; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY1]](s32), [[COPY2]]
	; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[FCMP]](s1)
	; GFX9: $vgpr0 = COPY [[INT]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: amdgcn_div_fmas_sss
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2, $vgpr0
	; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
	; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
	; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
	; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY1]](s32), [[COPY2]]
	; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[FCMP]](s1)
	; GFX10: $vgpr0 = COPY [[INT]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%vcc = fcmp oeq float %cmp.src, 0.0
	%result = call float @llvm.amdgcn.div.fmas.f32(float %src, float %src, float %src, i1 %vcc)
	ret float %result
	}

	define amdgpu_ps float @class_s_s(float inreg %src0, i32 inreg %src1) {
	; GFX9-LABEL: name: class_s_s
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2, $sgpr3
	; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX9: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32)
	; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
	; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
	; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
	; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
	; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]]
	; GFX9: $vgpr0 = COPY [[SELECT]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: class_s_s
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2, $sgpr3
	; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX10: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32)
	; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
	; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
	; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
	; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
	; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]]
	; GFX10: $vgpr0 = COPY [[SELECT]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%class = call i1 @llvm.amdgcn.class.f32(float %src0, i32 %src1)
	%result = select i1 %class, float 1.0, float 0.0
	ret float %result
	}

	define amdgpu_ps float @div_scale_s_s_true(float inreg %src0, float inreg %src1) {
	; GFX9-LABEL: name: div_scale_s_s_true
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2, $sgpr3
	; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX9: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), -1
	; GFX9: $vgpr0 = COPY [[INT]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: div_scale_s_s_true
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2, $sgpr3
	; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX10: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), -1
	; GFX10: $vgpr0 = COPY [[INT]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 true)
	%result = extractvalue { float, i1 } %div.scale, 0
	ret float %result
	}

	define amdgpu_ps float @div_scale_s_s_false(float inreg %src0, float inreg %src1) {
	; GFX9-LABEL: name: div_scale_s_s_false
	; GFX9: bb.1 (%ir-block.0):
	; GFX9: liveins: $sgpr2, $sgpr3
	; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX9: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0
	; GFX9: $vgpr0 = COPY [[INT]](s32)
	; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
	; GFX10-LABEL: name: div_scale_s_s_false
	; GFX10: bb.1 (%ir-block.0):
	; GFX10: liveins: $sgpr2, $sgpr3
	; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
	; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
	; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
	; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
	; GFX10: [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0
	; GFX10: $vgpr0 = COPY [[INT]](s32)
	; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
	%div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 false)
	%result = extractvalue { float, i1 } %div.scale, 0
	ret float %result
	}

	declare float @llvm.fma.f32(float, float, float) #0
	declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) #1
	declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1 immarg) #1
	declare i1 @llvm.amdgcn.class.f32(float, i32) #1

	attributes #0 = { nounwind readnone speculatable willreturn }
	attributes #1 = { nounwind readnone speculatable }