| //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a function pass that inserts VSETVLI instructions where |
| // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL |
| // instructions. |
| // |
| // This pass consists of 3 phases: |
| // |
| // Phase 1 collects how each basic block affects VL/VTYPE. |
| // |
| // Phase 2 uses the information from phase 1 to do a data flow analysis to |
| // propagate the VL/VTYPE changes through the function. This gives us the |
| // VL/VTYPE at the start of each basic block. |
| // |
| // Phase 3 inserts VSETVLI instructions in each basic block. Information from |
| // phase 2 is used to prevent inserting a VSETVLI before the first vector |
| // instruction in the block if possible. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "RISCV.h" |
| #include "RISCVSubtarget.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/CodeGen/LiveDebugVariables.h" |
| #include "llvm/CodeGen/LiveIntervals.h" |
| #include "llvm/CodeGen/LiveStacks.h" |
| #include "llvm/CodeGen/MachineFunctionPass.h" |
| #include <queue> |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "riscv-insert-vsetvli" |
| #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass" |
| #define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass" |
| |
| STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted"); |
| STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced"); |
| |
| static cl::opt<bool> DisableInsertVSETVLPHIOpt( |
| "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, |
| cl::desc("Disable looking through phis when inserting vsetvlis.")); |
| |
| static cl::opt<bool> UseStrictAsserts( |
| "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, |
| cl::desc("Enable strict assertion checking for the dataflow algorithm")); |
| |
| namespace { |
| |
| static unsigned getVLOpNum(const MachineInstr &MI) { |
| return RISCVII::getVLOpNum(MI.getDesc()); |
| } |
| |
| static unsigned getSEWOpNum(const MachineInstr &MI) { |
| return RISCVII::getSEWOpNum(MI.getDesc()); |
| } |
| |
| static bool isVectorConfigInstr(const MachineInstr &MI) { |
| return MI.getOpcode() == RISCV::PseudoVSETVLI || |
| MI.getOpcode() == RISCV::PseudoVSETVLIX0 || |
| MI.getOpcode() == RISCV::PseudoVSETIVLI; |
| } |
| |
| /// Return true if this is 'vsetvli x0, x0, vtype' which preserves |
| /// VL and only sets VTYPE. |
| static bool isVLPreservingConfig(const MachineInstr &MI) { |
| if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) |
| return false; |
| assert(RISCV::X0 == MI.getOperand(1).getReg()); |
| return RISCV::X0 == MI.getOperand(0).getReg(); |
| } |
| |
| static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) { |
| switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { |
| default: |
| return false; |
| case RISCV::VFMV_S_F: |
| case RISCV::VFMV_V_F: |
| return true; |
| } |
| } |
| |
| static bool isScalarExtractInstr(const MachineInstr &MI) { |
| switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { |
| default: |
| return false; |
| case RISCV::VMV_X_S: |
| case RISCV::VFMV_F_S: |
| return true; |
| } |
| } |
| |
| static bool isScalarInsertInstr(const MachineInstr &MI) { |
| switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { |
| default: |
| return false; |
| case RISCV::VMV_S_X: |
| case RISCV::VFMV_S_F: |
| return true; |
| } |
| } |
| |
| static bool isScalarSplatInstr(const MachineInstr &MI) { |
| switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { |
| default: |
| return false; |
| case RISCV::VMV_V_I: |
| case RISCV::VMV_V_X: |
| case RISCV::VFMV_V_F: |
| return true; |
| } |
| } |
| |
| static bool isVSlideInstr(const MachineInstr &MI) { |
| switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { |
| default: |
| return false; |
| case RISCV::VSLIDEDOWN_VX: |
| case RISCV::VSLIDEDOWN_VI: |
| case RISCV::VSLIDEUP_VX: |
| case RISCV::VSLIDEUP_VI: |
| return true; |
| } |
| } |
| |
| /// Get the EEW for a load or store instruction. Return std::nullopt if MI is |
| /// not a load or store which ignores SEW. |
| static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { |
| switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { |
| default: |
| return std::nullopt; |
| case RISCV::VLE8_V: |
| case RISCV::VLSE8_V: |
| case RISCV::VSE8_V: |
| case RISCV::VSSE8_V: |
| return 8; |
| case RISCV::VLE16_V: |
| case RISCV::VLSE16_V: |
| case RISCV::VSE16_V: |
| case RISCV::VSSE16_V: |
| return 16; |
| case RISCV::VLE32_V: |
| case RISCV::VLSE32_V: |
| case RISCV::VSE32_V: |
| case RISCV::VSSE32_V: |
| return 32; |
| case RISCV::VLE64_V: |
| case RISCV::VLSE64_V: |
| case RISCV::VSE64_V: |
| case RISCV::VSSE64_V: |
| return 64; |
| } |
| } |
| |
| static bool isNonZeroLoadImmediate(const MachineInstr &MI) { |
| return MI.getOpcode() == RISCV::ADDI && |
| MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && |
| MI.getOperand(1).getReg() == RISCV::X0 && |
| MI.getOperand(2).getImm() != 0; |
| } |
| |
| /// Return true if this is an operation on mask registers. Note that |
| /// this includes both arithmetic/logical ops and load/store (vlm/vsm). |
| static bool isMaskRegOp(const MachineInstr &MI) { |
| if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) |
| return false; |
| const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); |
| // A Log2SEW of 0 is an operation on mask registers only. |
| return Log2SEW == 0; |
| } |
| |
| /// Return true if the inactive elements in the result are entirely undefined. |
| /// Note that this is different from "agnostic" as defined by the vector |
| /// specification. Agnostic requires each lane to either be undisturbed, or |
| /// take the value -1; no other value is allowed. |
| static bool hasUndefinedMergeOp(const MachineInstr &MI, |
| const MachineRegisterInfo &MRI) { |
| |
| unsigned UseOpIdx; |
| if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) |
| // If there is no passthrough operand, then the pass through |
| // lanes are undefined. |
| return true; |
| |
| // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose |
| // operands are solely IMPLICIT_DEFS, then the pass through lanes are |
| // undefined. |
| const MachineOperand &UseMO = MI.getOperand(UseOpIdx); |
| if (UseMO.getReg() == RISCV::NoRegister) |
| return true; |
| |
| if (UseMO.isUndef()) |
| return true; |
| if (UseMO.getReg().isPhysical()) |
| return false; |
| |
| if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) { |
| if (UseMI->isImplicitDef()) |
| return true; |
| |
| if (UseMI->isRegSequence()) { |
| for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) { |
| MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg()); |
| if (!SourceMI || !SourceMI->isImplicitDef()) |
| return false; |
| } |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /// Which subfields of VL or VTYPE have values we need to preserve? |
| struct DemandedFields { |
| // Some unknown property of VL is used. If demanded, must preserve entire |
| // value. |
| bool VLAny = false; |
| // Only zero vs non-zero is used. If demanded, can change non-zero values. |
| bool VLZeroness = false; |
| // What properties of SEW we need to preserve. |
| enum : uint8_t { |
| SEWEqual = 3, // The exact value of SEW needs to be preserved. |
| SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater |
| // than or equal to the original value. |
| SEWGreaterThanOrEqualAndLessThan64 = |
| 1, // SEW can be changed as long as it's greater |
| // than or equal to the original value, but must be less |
| // than 64. |
| SEWNone = 0 // We don't need to preserve SEW at all. |
| } SEW = SEWNone; |
| bool LMUL = false; |
| bool SEWLMULRatio = false; |
| bool TailPolicy = false; |
| bool MaskPolicy = false; |
| |
| // Return true if any part of VTYPE was used |
| bool usedVTYPE() const { |
| return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; |
| } |
| |
| // Return true if any property of VL was used |
| bool usedVL() { |
| return VLAny || VLZeroness; |
| } |
| |
| // Mark all VTYPE subfields and properties as demanded |
| void demandVTYPE() { |
| SEW = SEWEqual; |
| LMUL = true; |
| SEWLMULRatio = true; |
| TailPolicy = true; |
| MaskPolicy = true; |
| } |
| |
| // Mark all VL properties as demanded |
| void demandVL() { |
| VLAny = true; |
| VLZeroness = true; |
| } |
| |
| // Make this the result of demanding both the fields in this and B. |
| void doUnion(const DemandedFields &B) { |
| VLAny |= B.VLAny; |
| VLZeroness |= B.VLZeroness; |
| SEW = std::max(SEW, B.SEW); |
| LMUL |= B.LMUL; |
| SEWLMULRatio |= B.SEWLMULRatio; |
| TailPolicy |= B.TailPolicy; |
| MaskPolicy |= B.MaskPolicy; |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| /// Support for debugging, callable in GDB: V->dump() |
| LLVM_DUMP_METHOD void dump() const { |
| print(dbgs()); |
| dbgs() << "\n"; |
| } |
| |
| /// Implement operator<<. |
| void print(raw_ostream &OS) const { |
| OS << "{"; |
| OS << "VLAny=" << VLAny << ", "; |
| OS << "VLZeroness=" << VLZeroness << ", "; |
| OS << "SEW="; |
| switch (SEW) { |
| case SEWEqual: |
| OS << "SEWEqual"; |
| break; |
| case SEWGreaterThanOrEqual: |
| OS << "SEWGreaterThanOrEqual"; |
| break; |
| case SEWGreaterThanOrEqualAndLessThan64: |
| OS << "SEWGreaterThanOrEqualAndLessThan64"; |
| break; |
| case SEWNone: |
| OS << "SEWNone"; |
| break; |
| }; |
| OS << ", "; |
| OS << "LMUL=" << LMUL << ", "; |
| OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; |
| OS << "TailPolicy=" << TailPolicy << ", "; |
| OS << "MaskPolicy=" << MaskPolicy; |
| OS << "}"; |
| } |
| #endif |
| }; |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| LLVM_ATTRIBUTE_USED |
| inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { |
| DF.print(OS); |
| return OS; |
| } |
| #endif |
| |
| /// Return true if moving from CurVType to NewVType is |
| /// indistinguishable from the perspective of an instruction (or set |
| /// of instructions) which use only the Used subfields and properties. |
| static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, |
| const DemandedFields &Used) { |
| switch (Used.SEW) { |
| case DemandedFields::SEWNone: |
| break; |
| case DemandedFields::SEWEqual: |
| if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) |
| return false; |
| break; |
| case DemandedFields::SEWGreaterThanOrEqual: |
| if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) |
| return false; |
| break; |
| case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: |
| if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) || |
| RISCVVType::getSEW(NewVType) >= 64) |
| return false; |
| break; |
| } |
| |
| if (Used.LMUL && |
| RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) |
| return false; |
| |
| if (Used.SEWLMULRatio) { |
| auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType), |
| RISCVVType::getVLMUL(CurVType)); |
| auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType), |
| RISCVVType::getVLMUL(NewVType)); |
| if (Ratio1 != Ratio2) |
| return false; |
| } |
| |
| if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) != |
| RISCVVType::isTailAgnostic(NewVType)) |
| return false; |
| if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != |
| RISCVVType::isMaskAgnostic(NewVType)) |
| return false; |
| return true; |
| } |
| |
| /// Return the fields and properties demanded by the provided instruction. |
| DemandedFields getDemanded(const MachineInstr &MI, |
| const MachineRegisterInfo *MRI, |
| const RISCVSubtarget *ST) { |
| // Warning: This function has to work on both the lowered (i.e. post |
| // emitVSETVLIs) and pre-lowering forms. The main implication of this is |
| // that it can't use the value of a SEW, VL, or Policy operand as they might |
| // be stale after lowering. |
| |
| // Most instructions don't use any of these subfeilds. |
| DemandedFields Res; |
| // Start conservative if registers are used |
| if (MI.isCall() || MI.isInlineAsm() || |
| MI.readsRegister(RISCV::VL, /*TRI=*/nullptr)) |
| Res.demandVL(); |
| if (MI.isCall() || MI.isInlineAsm() || |
| MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr)) |
| Res.demandVTYPE(); |
| // Start conservative on the unlowered form too |
| uint64_t TSFlags = MI.getDesc().TSFlags; |
| if (RISCVII::hasSEWOp(TSFlags)) { |
| Res.demandVTYPE(); |
| if (RISCVII::hasVLOp(TSFlags)) |
| Res.demandVL(); |
| |
| // Behavior is independent of mask policy. |
| if (!RISCVII::usesMaskPolicy(TSFlags)) |
| Res.MaskPolicy = false; |
| } |
| |
| // Loads and stores with implicit EEW do not demand SEW or LMUL directly. |
| // They instead demand the ratio of the two which is used in computing |
| // EMUL, but which allows us the flexibility to change SEW and LMUL |
| // provided we don't change the ratio. |
| // Note: We assume that the instructions initial SEW is the EEW encoded |
| // in the opcode. This is asserted when constructing the VSETVLIInfo. |
| if (getEEWForLoadStore(MI)) { |
| Res.SEW = DemandedFields::SEWNone; |
| Res.LMUL = false; |
| } |
| |
| // Store instructions don't use the policy fields. |
| if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { |
| Res.TailPolicy = false; |
| Res.MaskPolicy = false; |
| } |
| |
| // If this is a mask reg operation, it only cares about VLMAX. |
| // TODO: Possible extensions to this logic |
| // * Probably ok if available VLMax is larger than demanded |
| // * The policy bits can probably be ignored.. |
| if (isMaskRegOp(MI)) { |
| Res.SEW = DemandedFields::SEWNone; |
| Res.LMUL = false; |
| } |
| |
| // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. |
| if (isScalarInsertInstr(MI)) { |
| Res.LMUL = false; |
| Res.SEWLMULRatio = false; |
| Res.VLAny = false; |
| // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't |
| // need to preserve any other bits and are thus compatible with any larger, |
| // etype and can disregard policy bits. Warning: It's tempting to try doing |
| // this for any tail agnostic operation, but we can't as TA requires |
| // tail lanes to either be the original value or -1. We are writing |
| // unknown bits to the lanes here. |
| if (hasUndefinedMergeOp(MI, *MRI)) { |
| if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) |
| Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; |
| else |
| Res.SEW = DemandedFields::SEWGreaterThanOrEqual; |
| Res.TailPolicy = false; |
| } |
| } |
| |
| // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW. |
| if (isScalarExtractInstr(MI)) { |
| assert(!RISCVII::hasVLOp(TSFlags)); |
| Res.LMUL = false; |
| Res.SEWLMULRatio = false; |
| Res.TailPolicy = false; |
| Res.MaskPolicy = false; |
| } |
| |
| return Res; |
| } |
| |
| /// Defines the abstract state with which the forward dataflow models the |
| /// values of the VL and VTYPE registers after insertion. |
| class VSETVLIInfo { |
| struct AVLDef { |
| const MachineInstr *DefMI; |
| Register DefReg; |
| }; |
| union { |
| AVLDef AVLRegDef; |
| unsigned AVLImm; |
| }; |
| |
| enum : uint8_t { |
| Uninitialized, |
| AVLIsReg, |
| AVLIsImm, |
| AVLIsVLMAX, |
| AVLIsIgnored, |
| Unknown, |
| } State = Uninitialized; |
| |
| // Fields from VTYPE. |
| RISCVII::VLMUL VLMul = RISCVII::LMUL_1; |
| uint8_t SEW = 0; |
| uint8_t TailAgnostic : 1; |
| uint8_t MaskAgnostic : 1; |
| uint8_t SEWLMULRatioOnly : 1; |
| |
| public: |
| VSETVLIInfo() |
| : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), |
| SEWLMULRatioOnly(false) {} |
| |
| static VSETVLIInfo getUnknown() { |
| VSETVLIInfo Info; |
| Info.setUnknown(); |
| return Info; |
| } |
| |
| bool isValid() const { return State != Uninitialized; } |
| void setUnknown() { State = Unknown; } |
| bool isUnknown() const { return State == Unknown; } |
| |
| void setAVLRegDef(const MachineInstr *DefMI, Register AVLReg) { |
| assert(DefMI && AVLReg.isVirtual()); |
| AVLRegDef.DefMI = DefMI; |
| AVLRegDef.DefReg = AVLReg; |
| State = AVLIsReg; |
| } |
| |
| void setAVLImm(unsigned Imm) { |
| AVLImm = Imm; |
| State = AVLIsImm; |
| } |
| |
| void setAVLVLMAX() { State = AVLIsVLMAX; } |
| |
| void setAVLIgnored() { State = AVLIsIgnored; } |
| |
| bool hasAVLImm() const { return State == AVLIsImm; } |
| bool hasAVLReg() const { return State == AVLIsReg; } |
| bool hasAVLVLMAX() const { return State == AVLIsVLMAX; } |
| bool hasAVLIgnored() const { return State == AVLIsIgnored; } |
| Register getAVLReg() const { |
| assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual()); |
| return AVLRegDef.DefReg; |
| } |
| unsigned getAVLImm() const { |
| assert(hasAVLImm()); |
| return AVLImm; |
| } |
| const MachineInstr &getAVLDefMI() const { |
| assert(hasAVLReg() && AVLRegDef.DefMI); |
| return *AVLRegDef.DefMI; |
| } |
| |
| void setAVL(VSETVLIInfo Info) { |
| assert(Info.isValid()); |
| if (Info.isUnknown()) |
| setUnknown(); |
| else if (Info.hasAVLReg()) |
| setAVLRegDef(&Info.getAVLDefMI(), Info.getAVLReg()); |
| else if (Info.hasAVLVLMAX()) |
| setAVLVLMAX(); |
| else if (Info.hasAVLIgnored()) |
| setAVLIgnored(); |
| else { |
| assert(Info.hasAVLImm()); |
| setAVLImm(Info.getAVLImm()); |
| } |
| } |
| |
| unsigned getSEW() const { return SEW; } |
| RISCVII::VLMUL getVLMUL() const { return VLMul; } |
| bool getTailAgnostic() const { return TailAgnostic; } |
| bool getMaskAgnostic() const { return MaskAgnostic; } |
| |
| bool hasNonZeroAVL() const { |
| if (hasAVLImm()) |
| return getAVLImm() > 0; |
| if (hasAVLReg()) |
| return isNonZeroLoadImmediate(getAVLDefMI()); |
| if (hasAVLVLMAX()) |
| return true; |
| if (hasAVLIgnored()) |
| return false; |
| return false; |
| } |
| |
| bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const { |
| if (hasSameAVL(Other)) |
| return true; |
| return (hasNonZeroAVL() && Other.hasNonZeroAVL()); |
| } |
| |
| bool hasSameAVL(const VSETVLIInfo &Other) const { |
| if (hasAVLReg() && Other.hasAVLReg()) |
| return AVLRegDef.DefMI == Other.AVLRegDef.DefMI && |
| AVLRegDef.DefReg == Other.AVLRegDef.DefReg; |
| |
| if (hasAVLImm() && Other.hasAVLImm()) |
| return getAVLImm() == Other.getAVLImm(); |
| |
| if (hasAVLVLMAX()) |
| return Other.hasAVLVLMAX() && hasSameVLMAX(Other); |
| |
| if (hasAVLIgnored()) |
| return Other.hasAVLIgnored(); |
| |
| return false; |
| } |
| |
| void setVTYPE(unsigned VType) { |
| assert(isValid() && !isUnknown() && |
| "Can't set VTYPE for uninitialized or unknown"); |
| VLMul = RISCVVType::getVLMUL(VType); |
| SEW = RISCVVType::getSEW(VType); |
| TailAgnostic = RISCVVType::isTailAgnostic(VType); |
| MaskAgnostic = RISCVVType::isMaskAgnostic(VType); |
| } |
| void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { |
| assert(isValid() && !isUnknown() && |
| "Can't set VTYPE for uninitialized or unknown"); |
| VLMul = L; |
| SEW = S; |
| TailAgnostic = TA; |
| MaskAgnostic = MA; |
| } |
| |
| void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; } |
| |
| unsigned encodeVTYPE() const { |
| assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && |
| "Can't encode VTYPE for uninitialized or unknown"); |
| return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); |
| } |
| |
| bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } |
| |
| bool hasSameVTYPE(const VSETVLIInfo &Other) const { |
| assert(isValid() && Other.isValid() && |
| "Can't compare invalid VSETVLIInfos"); |
| assert(!isUnknown() && !Other.isUnknown() && |
| "Can't compare VTYPE in unknown state"); |
| assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && |
| "Can't compare when only LMUL/SEW ratio is valid."); |
| return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == |
| std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, |
| Other.MaskAgnostic); |
| } |
| |
| unsigned getSEWLMULRatio() const { |
| assert(isValid() && !isUnknown() && |
| "Can't use VTYPE for uninitialized or unknown"); |
| return RISCVVType::getSEWLMULRatio(SEW, VLMul); |
| } |
| |
| // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. |
| // Note that having the same VLMAX ensures that both share the same |
| // function from AVL to VL; that is, they must produce the same VL value |
| // for any given AVL value. |
| bool hasSameVLMAX(const VSETVLIInfo &Other) const { |
| assert(isValid() && Other.isValid() && |
| "Can't compare invalid VSETVLIInfos"); |
| assert(!isUnknown() && !Other.isUnknown() && |
| "Can't compare VTYPE in unknown state"); |
| return getSEWLMULRatio() == Other.getSEWLMULRatio(); |
| } |
| |
| bool hasCompatibleVTYPE(const DemandedFields &Used, |
| const VSETVLIInfo &Require) const { |
| return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used); |
| } |
| |
| // Determine whether the vector instructions requirements represented by |
| // Require are compatible with the previous vsetvli instruction represented |
| // by this. MI is the instruction whose requirements we're considering. |
| bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require, |
| const MachineRegisterInfo &MRI) const { |
| assert(isValid() && Require.isValid() && |
| "Can't compare invalid VSETVLIInfos"); |
| assert(!Require.SEWLMULRatioOnly && |
| "Expected a valid VTYPE for instruction!"); |
| // Nothing is compatible with Unknown. |
| if (isUnknown() || Require.isUnknown()) |
| return false; |
| |
| // If only our VLMAX ratio is valid, then this isn't compatible. |
| if (SEWLMULRatioOnly) |
| return false; |
| |
| if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require))) |
| return false; |
| |
| if (Used.VLZeroness && !hasEquallyZeroAVL(Require)) |
| return false; |
| |
| return hasCompatibleVTYPE(Used, Require); |
| } |
| |
| bool operator==(const VSETVLIInfo &Other) const { |
| // Uninitialized is only equal to another Uninitialized. |
| if (!isValid()) |
| return !Other.isValid(); |
| if (!Other.isValid()) |
| return !isValid(); |
| |
| // Unknown is only equal to another Unknown. |
| if (isUnknown()) |
| return Other.isUnknown(); |
| if (Other.isUnknown()) |
| return isUnknown(); |
| |
| if (!hasSameAVL(Other)) |
| return false; |
| |
| // If the SEWLMULRatioOnly bits are different, then they aren't equal. |
| if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) |
| return false; |
| |
| // If only the VLMAX is valid, check that it is the same. |
| if (SEWLMULRatioOnly) |
| return hasSameVLMAX(Other); |
| |
| // If the full VTYPE is valid, check that it is the same. |
| return hasSameVTYPE(Other); |
| } |
| |
| bool operator!=(const VSETVLIInfo &Other) const { |
| return !(*this == Other); |
| } |
| |
| // Calculate the VSETVLIInfo visible to a block assuming this and Other are |
| // both predecessors. |
| VSETVLIInfo intersect(const VSETVLIInfo &Other) const { |
| // If the new value isn't valid, ignore it. |
| if (!Other.isValid()) |
| return *this; |
| |
| // If this value isn't valid, this must be the first predecessor, use it. |
| if (!isValid()) |
| return Other; |
| |
| // If either is unknown, the result is unknown. |
| if (isUnknown() || Other.isUnknown()) |
| return VSETVLIInfo::getUnknown(); |
| |
| // If we have an exact, match return this. |
| if (*this == Other) |
| return *this; |
| |
| // Not an exact match, but maybe the AVL and VLMAX are the same. If so, |
| // return an SEW/LMUL ratio only value. |
| if (hasSameAVL(Other) && hasSameVLMAX(Other)) { |
| VSETVLIInfo MergeInfo = *this; |
| MergeInfo.SEWLMULRatioOnly = true; |
| return MergeInfo; |
| } |
| |
| // Otherwise the result is unknown. |
| return VSETVLIInfo::getUnknown(); |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| /// Support for debugging, callable in GDB: V->dump() |
| LLVM_DUMP_METHOD void dump() const { |
| print(dbgs()); |
| dbgs() << "\n"; |
| } |
| |
| /// Implement operator<<. |
| /// @{ |
| void print(raw_ostream &OS) const { |
| OS << "{"; |
| if (!isValid()) |
| OS << "Uninitialized"; |
| if (isUnknown()) |
| OS << "unknown"; |
| if (hasAVLReg()) |
| OS << "AVLReg=" << (unsigned)getAVLReg(); |
| if (hasAVLImm()) |
| OS << "AVLImm=" << (unsigned)AVLImm; |
| if (hasAVLVLMAX()) |
| OS << "AVLVLMAX"; |
| if (hasAVLIgnored()) |
| OS << "AVLIgnored"; |
| OS << ", " |
| << "VLMul=" << (unsigned)VLMul << ", " |
| << "SEW=" << (unsigned)SEW << ", " |
| << "TailAgnostic=" << (bool)TailAgnostic << ", " |
| << "MaskAgnostic=" << (bool)MaskAgnostic << ", " |
| << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; |
| } |
| #endif |
| }; |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| LLVM_ATTRIBUTE_USED |
| inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { |
| V.print(OS); |
| return OS; |
| } |
| #endif |
| |
| struct BlockData { |
| // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this |
| // block. Calculated in Phase 2. |
| VSETVLIInfo Exit; |
| |
| // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor |
| // blocks. Calculated in Phase 2, and used by Phase 3. |
| VSETVLIInfo Pred; |
| |
| // Keeps track of whether the block is already in the queue. |
| bool InQueue = false; |
| |
| BlockData() = default; |
| }; |
| |
| class RISCVInsertVSETVLI : public MachineFunctionPass { |
| const RISCVSubtarget *ST; |
| const TargetInstrInfo *TII; |
| MachineRegisterInfo *MRI; |
| |
| std::vector<BlockData> BlockInfo; |
| std::queue<const MachineBasicBlock *> WorkList; |
| |
| public: |
| static char ID; |
| |
| RISCVInsertVSETVLI() : MachineFunctionPass(ID) {} |
| bool runOnMachineFunction(MachineFunction &MF) override; |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.setPreservesCFG(); |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| |
| StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } |
| |
| private: |
| bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, |
| const VSETVLIInfo &CurInfo) const; |
| bool needVSETVLIPHI(const VSETVLIInfo &Require, |
| const MachineBasicBlock &MBB) const; |
| void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, |
| const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); |
| void insertVSETVLI(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator InsertPt, DebugLoc DL, |
| const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); |
| |
| void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const; |
| void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const; |
| bool computeVLVTYPEChanges(const MachineBasicBlock &MBB, |
| VSETVLIInfo &Info) const; |
| void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); |
| void emitVSETVLIs(MachineBasicBlock &MBB); |
| void doPRE(MachineBasicBlock &MBB); |
| void insertReadVL(MachineBasicBlock &MBB); |
| }; |
| |
| class RISCVCoalesceVSETVLI : public MachineFunctionPass { |
| public: |
| static char ID; |
| const RISCVSubtarget *ST; |
| const TargetInstrInfo *TII; |
| MachineRegisterInfo *MRI; |
| LiveIntervals *LIS; |
| |
| RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {} |
| bool runOnMachineFunction(MachineFunction &MF) override; |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.setPreservesCFG(); |
| |
| AU.addRequired<LiveIntervals>(); |
| AU.addPreserved<LiveIntervals>(); |
| AU.addRequired<SlotIndexes>(); |
| AU.addPreserved<SlotIndexes>(); |
| AU.addPreserved<LiveDebugVariables>(); |
| AU.addPreserved<LiveStacks>(); |
| |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| |
| StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; } |
| |
| private: |
| bool coalesceVSETVLIs(MachineBasicBlock &MBB); |
| }; |
| |
| } // end anonymous namespace |
| |
| char RISCVInsertVSETVLI::ID = 0; |
| |
| INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, |
| false, false) |
| |
| char RISCVCoalesceVSETVLI::ID = 0; |
| |
| INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli", |
| RISCV_COALESCE_VSETVLI_NAME, false, false) |
| |
| // Return a VSETVLIInfo representing the changes made by this VSETVLI or |
| // VSETIVLI instruction. |
| static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI, |
| const MachineRegisterInfo &MRI) { |
| VSETVLIInfo NewInfo; |
| if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { |
| NewInfo.setAVLImm(MI.getOperand(1).getImm()); |
| } else { |
| assert(MI.getOpcode() == RISCV::PseudoVSETVLI || |
| MI.getOpcode() == RISCV::PseudoVSETVLIX0); |
| Register AVLReg = MI.getOperand(1).getReg(); |
| assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && |
| "Can't handle X0, X0 vsetvli yet"); |
| if (AVLReg == RISCV::X0) |
| NewInfo.setAVLVLMAX(); |
| else |
| NewInfo.setAVLRegDef(MRI.getVRegDef(AVLReg), AVLReg); |
| } |
| NewInfo.setVTYPE(MI.getOperand(2).getImm()); |
| |
| return NewInfo; |
| } |
| |
| static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, |
| RISCVII::VLMUL VLMul) { |
| auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul); |
| if (Fractional) |
| VLEN = VLEN / LMul; |
| else |
| VLEN = VLEN * LMul; |
| return VLEN/SEW; |
| } |
| |
| static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, |
| const RISCVSubtarget &ST, |
| const MachineRegisterInfo *MRI) { |
| VSETVLIInfo InstrInfo; |
| |
| bool TailAgnostic = true; |
| bool MaskAgnostic = true; |
| if (!hasUndefinedMergeOp(MI, *MRI)) { |
| // Start with undisturbed. |
| TailAgnostic = false; |
| MaskAgnostic = false; |
| |
| // If there is a policy operand, use it. |
| if (RISCVII::hasVecPolicyOp(TSFlags)) { |
| const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); |
| uint64_t Policy = Op.getImm(); |
| assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && |
| "Invalid Policy Value"); |
| TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; |
| MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; |
| } |
| |
| // Some pseudo instructions force a tail agnostic policy despite having a |
| // tied def. |
| if (RISCVII::doesForceTailAgnostic(TSFlags)) |
| TailAgnostic = true; |
| |
| if (!RISCVII::usesMaskPolicy(TSFlags)) |
| MaskAgnostic = true; |
| } |
| |
| RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); |
| |
| unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); |
| // A Log2SEW of 0 is an operation on mask registers only. |
| unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; |
| assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); |
| |
| if (RISCVII::hasVLOp(TSFlags)) { |
| const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); |
| if (VLOp.isImm()) { |
| int64_t Imm = VLOp.getImm(); |
| // Conver the VLMax sentintel to X0 register. |
| if (Imm == RISCV::VLMaxSentinel) { |
| // If we know the exact VLEN, see if we can use the constant encoding |
| // for the VLMAX instead. This reduces register pressure slightly. |
| const unsigned VLMAX = computeVLMAX(ST.getRealMaxVLen(), SEW, VLMul); |
| if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31) |
| InstrInfo.setAVLImm(VLMAX); |
| else |
| InstrInfo.setAVLVLMAX(); |
| } |
| else |
| InstrInfo.setAVLImm(Imm); |
| } else { |
| InstrInfo.setAVLRegDef(MRI->getVRegDef(VLOp.getReg()), VLOp.getReg()); |
| } |
| } else { |
| assert(isScalarExtractInstr(MI)); |
| // TODO: If we are more clever about x0,x0 insertion then we should be able |
| // to deduce that the VL is ignored based off of DemandedFields, and remove |
| // the AVLIsIgnored state. Then we can just use an arbitrary immediate AVL. |
| InstrInfo.setAVLIgnored(); |
| } |
| #ifndef NDEBUG |
| if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) { |
| assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); |
| } |
| #endif |
| InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); |
| |
| // If AVL is defined by a vsetvli with the same VLMAX, we can replace the |
| // AVL operand with the AVL of the defining vsetvli. We avoid general |
| // register AVLs to avoid extending live ranges without being sure we can |
| // kill the original source reg entirely. |
| if (InstrInfo.hasAVLReg()) { |
| const MachineInstr &DefMI = InstrInfo.getAVLDefMI(); |
| if (isVectorConfigInstr(DefMI)) { |
| VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(DefMI, *MRI); |
| if (DefInstrInfo.hasSameVLMAX(InstrInfo) && |
| (DefInstrInfo.hasAVLImm() || DefInstrInfo.hasAVLVLMAX())) |
| InstrInfo.setAVL(DefInstrInfo); |
| } |
| } |
| |
| return InstrInfo; |
| } |
| |
| void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, |
| const VSETVLIInfo &Info, |
| const VSETVLIInfo &PrevInfo) { |
| DebugLoc DL = MI.getDebugLoc(); |
| insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); |
| } |
| |
| void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, |
| MachineBasicBlock::iterator InsertPt, DebugLoc DL, |
| const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { |
| |
| ++NumInsertedVSETVL; |
| if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { |
| // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same |
| // VLMAX. |
| if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { |
| BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) |
| .addReg(RISCV::X0, RegState::Define | RegState::Dead) |
| .addReg(RISCV::X0, RegState::Kill) |
| .addImm(Info.encodeVTYPE()) |
| .addReg(RISCV::VL, RegState::Implicit); |
| return; |
| } |
| |
| // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If |
| // it has the same VLMAX we want and the last VL/VTYPE we observed is the |
| // same, we can use the X0, X0 form. |
| if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) { |
| const MachineInstr &DefMI = Info.getAVLDefMI(); |
| if (isVectorConfigInstr(DefMI)) { |
| VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI); |
| if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) { |
| BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) |
| .addReg(RISCV::X0, RegState::Define | RegState::Dead) |
| .addReg(RISCV::X0, RegState::Kill) |
| .addImm(Info.encodeVTYPE()) |
| .addReg(RISCV::VL, RegState::Implicit); |
| return; |
| } |
| } |
| } |
| } |
| |
| if (Info.hasAVLImm()) { |
| BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) |
| .addReg(RISCV::X0, RegState::Define | RegState::Dead) |
| .addImm(Info.getAVLImm()) |
| .addImm(Info.encodeVTYPE()); |
| return; |
| } |
| |
| if (Info.hasAVLIgnored()) { |
| // We can only use x0, x0 if there's no chance of the vtype change causing |
| // the previous vl to become invalid. |
| if (PrevInfo.isValid() && !PrevInfo.isUnknown() && |
| Info.hasSameVLMAX(PrevInfo)) { |
| BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) |
| .addReg(RISCV::X0, RegState::Define | RegState::Dead) |
| .addReg(RISCV::X0, RegState::Kill) |
| .addImm(Info.encodeVTYPE()) |
| .addReg(RISCV::VL, RegState::Implicit); |
| return; |
| } |
| // Otherwise use an AVL of 1 to avoid depending on previous vl. |
| BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) |
| .addReg(RISCV::X0, RegState::Define | RegState::Dead) |
| .addImm(1) |
| .addImm(Info.encodeVTYPE()); |
| return; |
| } |
| |
| if (Info.hasAVLVLMAX()) { |
| Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); |
| BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) |
| .addReg(DestReg, RegState::Define | RegState::Dead) |
| .addReg(RISCV::X0, RegState::Kill) |
| .addImm(Info.encodeVTYPE()); |
| return; |
| } |
| |
| Register AVLReg = Info.getAVLReg(); |
| MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); |
| BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI)) |
| .addReg(RISCV::X0, RegState::Define | RegState::Dead) |
| .addReg(AVLReg) |
| .addImm(Info.encodeVTYPE()); |
| } |
| |
| static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { |
| auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL); |
| return Fractional || LMul == 1; |
| } |
| |
| /// Return true if a VSETVLI is required to transition from CurInfo to Require |
| /// before MI. |
| bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, |
| const VSETVLIInfo &Require, |
| const VSETVLIInfo &CurInfo) const { |
| assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI)); |
| |
| if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) |
| return true; |
| |
| DemandedFields Used = getDemanded(MI, MRI, ST); |
| |
| // A slidedown/slideup with an *undefined* merge op can freely clobber |
| // elements not copied from the source vector (e.g. masked off, tail, or |
| // slideup's prefix). Notes: |
| // * We can't modify SEW here since the slide amount is in units of SEW. |
| // * VL=1 is special only because we have existing support for zero vs |
| // non-zero VL. We could generalize this if we had a VL > C predicate. |
| // * The LMUL1 restriction is for machines whose latency may depend on VL. |
| // * As above, this is only legal for tail "undefined" not "agnostic". |
| if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 && |
| isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) { |
| Used.VLAny = false; |
| Used.VLZeroness = true; |
| Used.LMUL = false; |
| Used.TailPolicy = false; |
| } |
| |
| // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same |
| // semantically as vmv.s.x. This is particularly useful since we don't have an |
| // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place. |
| // Since a splat is non-constant time in LMUL, we do need to be careful to not |
| // increase the number of active vector registers (unlike for vmv.s.x.) |
| if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 && |
| isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) { |
| Used.LMUL = false; |
| Used.SEWLMULRatio = false; |
| Used.VLAny = false; |
| if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) |
| Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; |
| else |
| Used.SEW = DemandedFields::SEWGreaterThanOrEqual; |
| Used.TailPolicy = false; |
| } |
| |
| if (CurInfo.isCompatible(Used, Require, *MRI)) |
| return false; |
| |
| // We didn't find a compatible value. If our AVL is a virtual register, |
| // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need |
| // and the last VL/VTYPE we observed is the same, we don't need a |
| // VSETVLI here. |
| if (Require.hasAVLReg() && CurInfo.hasCompatibleVTYPE(Used, Require)) { |
| const MachineInstr &DefMI = Require.getAVLDefMI(); |
| if (isVectorConfigInstr(DefMI)) { |
| VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI); |
| if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we |
| // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more |
| // places. |
| static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, |
| DemandedFields &Demanded) { |
| VSETVLIInfo Info = NewInfo; |
| |
| if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() && |
| !PrevInfo.isUnknown()) { |
| if (auto NewVLMul = RISCVVType::getSameRatioLMUL( |
| PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW())) |
| Info.setVLMul(*NewVLMul); |
| Demanded.LMUL = true; |
| } |
| |
| return Info; |
| } |
| |
| // Given an incoming state reaching MI, minimally modifies that state so that it |
| // is compatible with MI. The resulting state is guaranteed to be semantically |
| // legal for MI, but may not be the state requested by MI. |
| void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, |
| const MachineInstr &MI) const { |
| uint64_t TSFlags = MI.getDesc().TSFlags; |
| if (!RISCVII::hasSEWOp(TSFlags)) |
| return; |
| |
| const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI); |
| assert(NewInfo.isValid() && !NewInfo.isUnknown()); |
| if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) |
| return; |
| |
| const VSETVLIInfo PrevInfo = Info; |
| if (!Info.isValid() || Info.isUnknown()) |
| Info = NewInfo; |
| |
| DemandedFields Demanded = getDemanded(MI, MRI, ST); |
| const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded); |
| |
| // If MI only demands that VL has the same zeroness, we only need to set the |
| // AVL if the zeroness differs. This removes a vsetvli entirely if the types |
| // match or allows use of cheaper avl preserving variant if VLMAX doesn't |
| // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype" |
| // variant, so we avoid the transform to prevent extending live range of an |
| // avl register operand. |
| // TODO: We can probably relax this for immediates. |
| bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo) && |
| IncomingInfo.hasSameVLMAX(PrevInfo); |
| if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero)) |
| Info.setAVL(IncomingInfo); |
| |
| Info.setVTYPE( |
| ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info) |
| .getVLMUL(), |
| ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(), |
| // Prefer tail/mask agnostic since it can be relaxed to undisturbed later |
| // if needed. |
| (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || |
| IncomingInfo.getTailAgnostic(), |
| (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || |
| IncomingInfo.getMaskAgnostic()); |
| |
| // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep |
| // the AVL. |
| if (Info.hasSEWLMULRatioOnly()) { |
| VSETVLIInfo RatiolessInfo = IncomingInfo; |
| RatiolessInfo.setAVL(Info); |
| Info = RatiolessInfo; |
| } |
| } |
| |
| // Given a state with which we evaluated MI (see transferBefore above for why |
| // this might be different that the state MI requested), modify the state to |
| // reflect the changes MI might make. |
| void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, |
| const MachineInstr &MI) const { |
| if (isVectorConfigInstr(MI)) { |
| Info = getInfoForVSETVLI(MI, *MRI); |
| return; |
| } |
| |
| if (RISCV::isFaultFirstLoad(MI)) { |
| // Update AVL to vl-output of the fault first load. |
| Info.setAVLRegDef(MRI->getVRegDef(MI.getOperand(1).getReg()), |
| MI.getOperand(1).getReg()); |
| return; |
| } |
| |
| // If this is something that updates VL/VTYPE that we don't know about, set |
| // the state to unknown. |
| if (MI.isCall() || MI.isInlineAsm() || |
| MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || |
| MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) |
| Info = VSETVLIInfo::getUnknown(); |
| } |
| |
| bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, |
| VSETVLIInfo &Info) const { |
| bool HadVectorOp = false; |
| |
| Info = BlockInfo[MBB.getNumber()].Pred; |
| for (const MachineInstr &MI : MBB) { |
| transferBefore(Info, MI); |
| |
| if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) |
| HadVectorOp = true; |
| |
| transferAfter(Info, MI); |
| } |
| |
| return HadVectorOp; |
| } |
| |
| void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { |
| |
| BlockData &BBInfo = BlockInfo[MBB.getNumber()]; |
| |
| BBInfo.InQueue = false; |
| |
| // Start with the previous entry so that we keep the most conservative state |
| // we have ever found. |
| VSETVLIInfo InInfo = BBInfo.Pred; |
| if (MBB.pred_empty()) { |
| // There are no predecessors, so use the default starting status. |
| InInfo.setUnknown(); |
| } else { |
| for (MachineBasicBlock *P : MBB.predecessors()) |
| InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); |
| } |
| |
| // If we don't have any valid predecessor value, wait until we do. |
| if (!InInfo.isValid()) |
| return; |
| |
| // If no change, no need to rerun block |
| if (InInfo == BBInfo.Pred) |
| return; |
| |
| BBInfo.Pred = InInfo; |
| LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) |
| << " changed to " << BBInfo.Pred << "\n"); |
| |
| // Note: It's tempting to cache the state changes here, but due to the |
| // compatibility checks performed a blocks output state can change based on |
| // the input state. To cache, we'd have to add logic for finding |
| // never-compatible state changes. |
| VSETVLIInfo TmpStatus; |
| computeVLVTYPEChanges(MBB, TmpStatus); |
| |
| // If the new exit value matches the old exit value, we don't need to revisit |
| // any blocks. |
| if (BBInfo.Exit == TmpStatus) |
| return; |
| |
| BBInfo.Exit = TmpStatus; |
| LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) |
| << " changed to " << BBInfo.Exit << "\n"); |
| |
| // Add the successors to the work list so we can propagate the changed exit |
| // status. |
| for (MachineBasicBlock *S : MBB.successors()) |
| if (!BlockInfo[S->getNumber()].InQueue) { |
| BlockInfo[S->getNumber()].InQueue = true; |
| WorkList.push(S); |
| } |
| } |
| |
| // If we weren't able to prove a vsetvli was directly unneeded, it might still |
| // be unneeded if the AVL is a phi node where all incoming values are VL |
| // outputs from the last VSETVLI in their respective basic blocks. |
| bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, |
| const MachineBasicBlock &MBB) const { |
| if (DisableInsertVSETVLPHIOpt) |
| return true; |
| |
| if (!Require.hasAVLReg()) |
| return true; |
| |
| // We need the AVL to be produce by a PHI node in this basic block. |
| const MachineInstr *PHI = &Require.getAVLDefMI(); |
| if (PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) |
| return true; |
| |
| for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; |
| PHIOp += 2) { |
| Register InReg = PHI->getOperand(PHIOp).getReg(); |
| MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); |
| const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit; |
| |
| // We need the PHI input to the be the output of a VSET(I)VLI. |
| MachineInstr *DefMI = MRI->getVRegDef(InReg); |
| if (!DefMI || !isVectorConfigInstr(*DefMI)) |
| return true; |
| |
| // We found a VSET(I)VLI make sure it matches the output of the |
| // predecessor block. |
| VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, *MRI); |
| if (DefInfo != PBBExit) |
| return true; |
| |
| // Require has the same VL as PBBExit, so if the exit from the |
| // predecessor has the VTYPE we are looking for we might be able |
| // to avoid a VSETVLI. |
| if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require)) |
| return true; |
| } |
| |
| // If all the incoming values to the PHI checked out, we don't need |
| // to insert a VSETVLI. |
| return false; |
| } |
| |
| void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { |
| VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; |
| // Track whether the prefix of the block we've scanned is transparent |
| // (meaning has not yet changed the abstract state). |
| bool PrefixTransparent = true; |
| for (MachineInstr &MI : MBB) { |
| const VSETVLIInfo PrevInfo = CurInfo; |
| transferBefore(CurInfo, MI); |
| |
| // If this is an explicit VSETVLI or VSETIVLI, update our state. |
| if (isVectorConfigInstr(MI)) { |
| // Conservatively, mark the VL and VTYPE as live. |
| assert(MI.getOperand(3).getReg() == RISCV::VL && |
| MI.getOperand(4).getReg() == RISCV::VTYPE && |
| "Unexpected operands where VL and VTYPE should be"); |
| MI.getOperand(3).setIsDead(false); |
| MI.getOperand(4).setIsDead(false); |
| PrefixTransparent = false; |
| } |
| |
| uint64_t TSFlags = MI.getDesc().TSFlags; |
| if (RISCVII::hasSEWOp(TSFlags)) { |
| if (PrevInfo != CurInfo) { |
| // If this is the first implicit state change, and the state change |
| // requested can be proven to produce the same register contents, we |
| // can skip emitting the actual state change and continue as if we |
| // had since we know the GPR result of the implicit state change |
| // wouldn't be used and VL/VTYPE registers are correct. Note that |
| // we *do* need to model the state as if it changed as while the |
| // register contents are unchanged, the abstract model can change. |
| if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB)) |
| insertVSETVLI(MBB, MI, CurInfo, PrevInfo); |
| PrefixTransparent = false; |
| } |
| |
| if (RISCVII::hasVLOp(TSFlags)) { |
| MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); |
| if (VLOp.isReg()) { |
| Register Reg = VLOp.getReg(); |
| MachineInstr *VLOpDef = MRI->getVRegDef(Reg); |
| |
| // Erase the AVL operand from the instruction. |
| VLOp.setReg(RISCV::NoRegister); |
| VLOp.setIsKill(false); |
| |
| // If the AVL was an immediate > 31, then it would have been emitted |
| // as an ADDI. However, the ADDI might not have been used in the |
| // vsetvli, or a vsetvli might not have been emitted, so it may be |
| // dead now. |
| if (VLOpDef && TII->isAddImmediate(*VLOpDef, Reg) && |
| MRI->use_nodbg_empty(Reg)) |
| VLOpDef->eraseFromParent(); |
| } |
| MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, |
| /*isImp*/ true)); |
| } |
| MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, |
| /*isImp*/ true)); |
| } |
| |
| if (MI.isCall() || MI.isInlineAsm() || |
| MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || |
| MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) |
| PrefixTransparent = false; |
| |
| transferAfter(CurInfo, MI); |
| } |
| |
| // If we reach the end of the block and our current info doesn't match the |
| // expected info, insert a vsetvli to correct. |
| if (!UseStrictAsserts) { |
| const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; |
| if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && |
| CurInfo != ExitInfo) { |
| // Note there's an implicit assumption here that terminators never use |
| // or modify VL or VTYPE. Also, fallthrough will return end(). |
| auto InsertPt = MBB.getFirstInstrTerminator(); |
| insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo, |
| CurInfo); |
| CurInfo = ExitInfo; |
| } |
| } |
| |
| if (UseStrictAsserts && CurInfo.isValid()) { |
| const auto &Info = BlockInfo[MBB.getNumber()]; |
| if (CurInfo != Info.Exit) { |
| LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); |
| LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); |
| LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n"); |
| LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n"); |
| } |
| assert(CurInfo == Info.Exit && |
| "InsertVSETVLI dataflow invariant violated"); |
| } |
| } |
| |
| /// Perform simple partial redundancy elimination of the VSETVLI instructions |
| /// we're about to insert by looking for cases where we can PRE from the |
| /// beginning of one block to the end of one of its predecessors. Specifically, |
| /// this is geared to catch the common case of a fixed length vsetvl in a single |
| /// block loop when it could execute once in the preheader instead. |
| void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { |
| if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) |
| return; |
| |
| MachineBasicBlock *UnavailablePred = nullptr; |
| VSETVLIInfo AvailableInfo; |
| for (MachineBasicBlock *P : MBB.predecessors()) { |
| const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; |
| if (PredInfo.isUnknown()) { |
| if (UnavailablePred) |
| return; |
| UnavailablePred = P; |
| } else if (!AvailableInfo.isValid()) { |
| AvailableInfo = PredInfo; |
| } else if (AvailableInfo != PredInfo) { |
| return; |
| } |
| } |
| |
| // Unreachable, single pred, or full redundancy. Note that FRE is handled by |
| // phase 3. |
| if (!UnavailablePred || !AvailableInfo.isValid()) |
| return; |
| |
| // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of |
| // the unavailable pred. |
| if (AvailableInfo.hasSEWLMULRatioOnly()) |
| return; |
| |
| // Critical edge - TODO: consider splitting? |
| if (UnavailablePred->succ_size() != 1) |
| return; |
| |
| // If the AVL value is a register (other than our VLMAX sentinel), |
| // we need to prove the value is available at the point we're going |
| // to insert the vsetvli at. |
| if (AvailableInfo.hasAVLReg()) { |
| const MachineInstr *AVLDefMI = &AvailableInfo.getAVLDefMI(); |
| // This is an inline dominance check which covers the case of |
| // UnavailablePred being the preheader of a loop. |
| if (AVLDefMI->getParent() != UnavailablePred) |
| return; |
| for (auto &TermMI : UnavailablePred->terminators()) |
| if (&TermMI == AVLDefMI) |
| return; |
| } |
| |
| // If the AVL isn't used in its predecessors then bail, since we have no AVL |
| // to insert a vsetvli with. |
| if (AvailableInfo.hasAVLIgnored()) |
| return; |
| |
| // Model the effect of changing the input state of the block MBB to |
| // AvailableInfo. We're looking for two issues here; one legality, |
| // one profitability. |
| // 1) If the block doesn't use some of the fields from VL or VTYPE, we |
| // may hit the end of the block with a different end state. We can |
| // not make this change without reflowing later blocks as well. |
| // 2) If we don't actually remove a transition, inserting a vsetvli |
| // into the predecessor block would be correct, but unprofitable. |
| VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; |
| VSETVLIInfo CurInfo = AvailableInfo; |
| int TransitionsRemoved = 0; |
| for (const MachineInstr &MI : MBB) { |
| const VSETVLIInfo LastInfo = CurInfo; |
| const VSETVLIInfo LastOldInfo = OldInfo; |
| transferBefore(CurInfo, MI); |
| transferBefore(OldInfo, MI); |
| if (CurInfo == LastInfo) |
| TransitionsRemoved++; |
| if (LastOldInfo == OldInfo) |
| TransitionsRemoved--; |
| transferAfter(CurInfo, MI); |
| transferAfter(OldInfo, MI); |
| if (CurInfo == OldInfo) |
| // Convergence. All transitions after this must match by construction. |
| break; |
| } |
| if (CurInfo != OldInfo || TransitionsRemoved <= 0) |
| // Issues 1 and 2 above |
| return; |
| |
| // Finally, update both data flow state and insert the actual vsetvli. |
| // Doing both keeps the code in sync with the dataflow results, which |
| // is critical for correctness of phase 3. |
| auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit; |
| LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " |
| << UnavailablePred->getName() << " with state " |
| << AvailableInfo << "\n"); |
| BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; |
| BlockInfo[MBB.getNumber()].Pred = AvailableInfo; |
| |
| // Note there's an implicit assumption here that terminators never use |
| // or modify VL or VTYPE. Also, fallthrough will return end(). |
| auto InsertPt = UnavailablePred->getFirstInstrTerminator(); |
| insertVSETVLI(*UnavailablePred, InsertPt, |
| UnavailablePred->findDebugLoc(InsertPt), |
| AvailableInfo, OldExit); |
| } |
| |
| // Return true if we can mutate PrevMI to match MI without changing any the |
| // fields which would be observed. |
| static bool canMutatePriorConfig(const MachineInstr &PrevMI, |
| const MachineInstr &MI, |
| const DemandedFields &Used, |
| const MachineRegisterInfo &MRI) { |
| // If the VL values aren't equal, return false if either a) the former is |
| // demanded, or b) we can't rewrite the former to be the later for |
| // implementation reasons. |
| if (!isVLPreservingConfig(MI)) { |
| if (Used.VLAny) |
| return false; |
| |
| if (Used.VLZeroness) { |
| if (isVLPreservingConfig(PrevMI)) |
| return false; |
| if (!getInfoForVSETVLI(PrevMI, MRI) |
| .hasEquallyZeroAVL(getInfoForVSETVLI(MI, MRI))) |
| return false; |
| } |
| |
| auto &AVL = MI.getOperand(1); |
| auto &PrevAVL = PrevMI.getOperand(1); |
| |
| // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI. |
| // For now just check that PrevMI uses the same virtual register. |
| if (AVL.isReg() && AVL.getReg() != RISCV::X0 && |
| (!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() || |
| PrevAVL.getReg() != AVL.getReg())) |
| return false; |
| } |
| |
| assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm()); |
| auto PriorVType = PrevMI.getOperand(2).getImm(); |
| auto VType = MI.getOperand(2).getImm(); |
| return areCompatibleVTYPEs(PriorVType, VType, Used); |
| } |
| |
| bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) { |
| MachineInstr *NextMI = nullptr; |
| // We can have arbitrary code in successors, so VL and VTYPE |
| // must be considered demanded. |
| DemandedFields Used; |
| Used.demandVL(); |
| Used.demandVTYPE(); |
| SmallVector<MachineInstr*> ToDelete; |
| for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { |
| |
| if (!isVectorConfigInstr(MI)) { |
| Used.doUnion(getDemanded(MI, MRI, ST)); |
| if (MI.isCall() || MI.isInlineAsm() || |
| MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || |
| MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) |
| NextMI = nullptr; |
| continue; |
| } |
| |
| Register RegDef = MI.getOperand(0).getReg(); |
| assert(RegDef == RISCV::X0 || RegDef.isVirtual()); |
| if (RegDef != RISCV::X0 && !MRI->use_nodbg_empty(RegDef)) |
| Used.demandVL(); |
| |
| if (NextMI) { |
| if (!Used.usedVL() && !Used.usedVTYPE()) { |
| ToDelete.push_back(&MI); |
| // Leave NextMI unchanged |
| continue; |
| } |
| |
| if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) { |
| if (!isVLPreservingConfig(*NextMI)) { |
| Register DefReg = NextMI->getOperand(0).getReg(); |
| |
| MI.getOperand(0).setReg(DefReg); |
| MI.getOperand(0).setIsDead(false); |
| |
| // The def of DefReg moved to MI, so extend the LiveInterval up to |
| // it. |
| if (DefReg.isVirtual()) { |
| LiveInterval &DefLI = LIS->getInterval(DefReg); |
| SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot(); |
| VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex()); |
| LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI); |
| DefLI.addSegment(S); |
| DefVNI->def = MISlot; |
| // Mark DefLI as spillable if it was previously unspillable |
| DefLI.setWeight(0); |
| |
| // DefReg may have had no uses, in which case we need to shrink |
| // the LiveInterval up to MI. |
| LIS->shrinkToUses(&DefLI); |
| } |
| |
| Register OldVLReg; |
| if (MI.getOperand(1).isReg()) |
| OldVLReg = MI.getOperand(1).getReg(); |
| if (NextMI->getOperand(1).isImm()) |
| MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm()); |
| else |
| MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false); |
| |
| // Clear NextMI's AVL early so we're not counting it as a use. |
| if (NextMI->getOperand(1).isReg()) |
| NextMI->getOperand(1).setReg(RISCV::NoRegister); |
| |
| if (OldVLReg && OldVLReg.isVirtual()) { |
| // NextMI no longer uses OldVLReg so shrink its LiveInterval. |
| LIS->shrinkToUses(&LIS->getInterval(OldVLReg)); |
| |
| MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg); |
| if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) && |
| MRI->use_nodbg_empty(OldVLReg)) { |
| VLOpDef->eraseFromParent(); |
| LIS->removeInterval(OldVLReg); |
| } |
| } |
| MI.setDesc(NextMI->getDesc()); |
| } |
| MI.getOperand(2).setImm(NextMI->getOperand(2).getImm()); |
| ToDelete.push_back(NextMI); |
| // fallthrough |
| } |
| } |
| NextMI = &MI; |
| Used = getDemanded(MI, MRI, ST); |
| } |
| |
| NumCoalescedVSETVL += ToDelete.size(); |
| for (auto *MI : ToDelete) { |
| LIS->RemoveMachineInstrFromMaps(*MI); |
| MI->eraseFromParent(); |
| } |
| |
| return !ToDelete.empty(); |
| } |
| |
| void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { |
| for (auto I = MBB.begin(), E = MBB.end(); I != E;) { |
| MachineInstr &MI = *I++; |
| if (RISCV::isFaultFirstLoad(MI)) { |
| Register VLOutput = MI.getOperand(1).getReg(); |
| if (!MRI->use_nodbg_empty(VLOutput)) |
| BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), |
| VLOutput); |
| // We don't use the vl output of the VLEFF/VLSEGFF anymore. |
| MI.getOperand(1).setReg(RISCV::X0); |
| } |
| } |
| } |
| |
| bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { |
| // Skip if the vector extension is not enabled. |
| ST = &MF.getSubtarget<RISCVSubtarget>(); |
| if (!ST->hasVInstructions()) |
| return false; |
| |
| LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); |
| |
| TII = ST->getInstrInfo(); |
| MRI = &MF.getRegInfo(); |
| |
| assert(BlockInfo.empty() && "Expect empty block infos"); |
| BlockInfo.resize(MF.getNumBlockIDs()); |
| |
| bool HaveVectorOp = false; |
| |
| // Phase 1 - determine how VL/VTYPE are affected by the each block. |
| for (const MachineBasicBlock &MBB : MF) { |
| VSETVLIInfo TmpStatus; |
| HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus); |
| // Initial exit state is whatever change we found in the block. |
| BlockData &BBInfo = BlockInfo[MBB.getNumber()]; |
| BBInfo.Exit = TmpStatus; |
| LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) |
| << " is " << BBInfo.Exit << "\n"); |
| |
| } |
| |
| // If we didn't find any instructions that need VSETVLI, we're done. |
| if (!HaveVectorOp) { |
| BlockInfo.clear(); |
| return false; |
| } |
| |
| // Phase 2 - determine the exit VL/VTYPE from each block. We add all |
| // blocks to the list here, but will also add any that need to be revisited |
| // during Phase 2 processing. |
| for (const MachineBasicBlock &MBB : MF) { |
| WorkList.push(&MBB); |
| BlockInfo[MBB.getNumber()].InQueue = true; |
| } |
| while (!WorkList.empty()) { |
| const MachineBasicBlock &MBB = *WorkList.front(); |
| WorkList.pop(); |
| computeIncomingVLVTYPE(MBB); |
| } |
| |
| // Perform partial redundancy elimination of vsetvli transitions. |
| for (MachineBasicBlock &MBB : MF) |
| doPRE(MBB); |
| |
| // Phase 3 - add any vsetvli instructions needed in the block. Use the |
| // Phase 2 information to avoid adding vsetvlis before the first vector |
| // instruction in the block if the VL/VTYPE is satisfied by its |
| // predecessors. |
| for (MachineBasicBlock &MBB : MF) |
| emitVSETVLIs(MBB); |
| |
| // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output |
| // of VLEFF/VLSEGFF. |
| for (MachineBasicBlock &MBB : MF) |
| insertReadVL(MBB); |
| |
| BlockInfo.clear(); |
| return HaveVectorOp; |
| } |
| |
| /// Returns an instance of the Insert VSETVLI pass. |
| FunctionPass *llvm::createRISCVInsertVSETVLIPass() { |
| return new RISCVInsertVSETVLI(); |
| } |
| |
| // Now that all vsetvlis are explicit, go through and do block local |
| // DSE and peephole based demanded fields based transforms. Note that |
| // this *must* be done outside the main dataflow so long as we allow |
| // any cross block analysis within the dataflow. We can't have both |
| // demanded fields based mutation and non-local analysis in the |
| // dataflow at the same time without introducing inconsistencies. |
| bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) { |
| // Skip if the vector extension is not enabled. |
| ST = &MF.getSubtarget<RISCVSubtarget>(); |
| if (!ST->hasVInstructions()) |
| return false; |
| TII = ST->getInstrInfo(); |
| MRI = &MF.getRegInfo(); |
| LIS = &getAnalysis<LiveIntervals>(); |
| |
| bool Changed = false; |
| for (MachineBasicBlock &MBB : MF) |
| Changed |= coalesceVSETVLIs(MBB); |
| |
| return Changed; |
| } |
| |
| FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() { |
| return new RISCVCoalesceVSETVLI(); |
| } |