blob: b27e1dd258ebd0be6559147838f0a84f0251b0d0 [file] [log] [blame]
//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a function pass that inserts VSETVLI instructions where
// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
// instructions.
//
// This pass consists of 3 phases:
//
// Phase 1 collects how each basic block affects VL/VTYPE.
//
// Phase 2 uses the information from phase 1 to do a data flow analysis to
// propagate the VL/VTYPE changes through the function. This gives us the
// VL/VTYPE at the start of each basic block.
//
// Phase 3 inserts VSETVLI instructions in each basic block. Information from
// phase 2 is used to prevent inserting a VSETVLI before the first vector
// instruction in the block if possible.
//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include <queue>
using namespace llvm;
#define DEBUG_TYPE "riscv-insert-vsetvli"
#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
static cl::opt<bool> DisableInsertVSETVLPHIOpt(
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
cl::desc("Disable looking through phis when inserting vsetvlis."));
static cl::opt<bool> UseStrictAsserts(
"riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
cl::desc("Enable strict assertion checking for the dataflow algorithm"));
namespace {
static unsigned getVLOpNum(const MachineInstr &MI) {
return RISCVII::getVLOpNum(MI.getDesc());
}
static unsigned getSEWOpNum(const MachineInstr &MI) {
return RISCVII::getSEWOpNum(MI.getDesc());
}
static bool isVectorConfigInstr(const MachineInstr &MI) {
return MI.getOpcode() == RISCV::PseudoVSETVLI ||
MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
MI.getOpcode() == RISCV::PseudoVSETIVLI;
}
/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
/// VL and only sets VTYPE.
static bool isVLPreservingConfig(const MachineInstr &MI) {
if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
return false;
assert(RISCV::X0 == MI.getOperand(1).getReg());
return RISCV::X0 == MI.getOperand(0).getReg();
}
static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VFMV_S_F:
case RISCV::VFMV_V_F:
return true;
}
}
static bool isScalarExtractInstr(const MachineInstr &MI) {
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VMV_X_S:
case RISCV::VFMV_F_S:
return true;
}
}
static bool isScalarInsertInstr(const MachineInstr &MI) {
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VMV_S_X:
case RISCV::VFMV_S_F:
return true;
}
}
static bool isScalarSplatInstr(const MachineInstr &MI) {
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VMV_V_I:
case RISCV::VMV_V_X:
case RISCV::VFMV_V_F:
return true;
}
}
static bool isVSlideInstr(const MachineInstr &MI) {
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VSLIDEDOWN_VX:
case RISCV::VSLIDEDOWN_VI:
case RISCV::VSLIDEUP_VX:
case RISCV::VSLIDEUP_VI:
return true;
}
}
/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
/// not a load or store which ignores SEW.
static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
return std::nullopt;
case RISCV::VLE8_V:
case RISCV::VLSE8_V:
case RISCV::VSE8_V:
case RISCV::VSSE8_V:
return 8;
case RISCV::VLE16_V:
case RISCV::VLSE16_V:
case RISCV::VSE16_V:
case RISCV::VSSE16_V:
return 16;
case RISCV::VLE32_V:
case RISCV::VLSE32_V:
case RISCV::VSE32_V:
case RISCV::VSSE32_V:
return 32;
case RISCV::VLE64_V:
case RISCV::VLSE64_V:
case RISCV::VSE64_V:
case RISCV::VSSE64_V:
return 64;
}
}
static bool isNonZeroLoadImmediate(const MachineInstr &MI) {
return MI.getOpcode() == RISCV::ADDI &&
MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
MI.getOperand(1).getReg() == RISCV::X0 &&
MI.getOperand(2).getImm() != 0;
}
/// Return true if this is an operation on mask registers. Note that
/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
static bool isMaskRegOp(const MachineInstr &MI) {
if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
return false;
const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
// A Log2SEW of 0 is an operation on mask registers only.
return Log2SEW == 0;
}
/// Return true if the inactive elements in the result are entirely undefined.
/// Note that this is different from "agnostic" as defined by the vector
/// specification. Agnostic requires each lane to either be undisturbed, or
/// take the value -1; no other value is allowed.
static bool hasUndefinedMergeOp(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
unsigned UseOpIdx;
if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
// If there is no passthrough operand, then the pass through
// lanes are undefined.
return true;
// If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
// operands are solely IMPLICIT_DEFS, then the pass through lanes are
// undefined.
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
if (UseMO.getReg() == RISCV::NoRegister)
return true;
if (UseMO.isUndef())
return true;
if (UseMO.getReg().isPhysical())
return false;
if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
if (UseMI->isImplicitDef())
return true;
if (UseMI->isRegSequence()) {
for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg());
if (!SourceMI || !SourceMI->isImplicitDef())
return false;
}
return true;
}
}
return false;
}
/// Which subfields of VL or VTYPE have values we need to preserve?
struct DemandedFields {
// Some unknown property of VL is used. If demanded, must preserve entire
// value.
bool VLAny = false;
// Only zero vs non-zero is used. If demanded, can change non-zero values.
bool VLZeroness = false;
// What properties of SEW we need to preserve.
enum : uint8_t {
SEWEqual = 3, // The exact value of SEW needs to be preserved.
SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
// than or equal to the original value.
SEWGreaterThanOrEqualAndLessThan64 =
1, // SEW can be changed as long as it's greater
// than or equal to the original value, but must be less
// than 64.
SEWNone = 0 // We don't need to preserve SEW at all.
} SEW = SEWNone;
bool LMUL = false;
bool SEWLMULRatio = false;
bool TailPolicy = false;
bool MaskPolicy = false;
// Return true if any part of VTYPE was used
bool usedVTYPE() const {
return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
}
// Return true if any property of VL was used
bool usedVL() {
return VLAny || VLZeroness;
}
// Mark all VTYPE subfields and properties as demanded
void demandVTYPE() {
SEW = SEWEqual;
LMUL = true;
SEWLMULRatio = true;
TailPolicy = true;
MaskPolicy = true;
}
// Mark all VL properties as demanded
void demandVL() {
VLAny = true;
VLZeroness = true;
}
// Make this the result of demanding both the fields in this and B.
void doUnion(const DemandedFields &B) {
VLAny |= B.VLAny;
VLZeroness |= B.VLZeroness;
SEW = std::max(SEW, B.SEW);
LMUL |= B.LMUL;
SEWLMULRatio |= B.SEWLMULRatio;
TailPolicy |= B.TailPolicy;
MaskPolicy |= B.MaskPolicy;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Support for debugging, callable in GDB: V->dump()
LLVM_DUMP_METHOD void dump() const {
print(dbgs());
dbgs() << "\n";
}
/// Implement operator<<.
void print(raw_ostream &OS) const {
OS << "{";
OS << "VLAny=" << VLAny << ", ";
OS << "VLZeroness=" << VLZeroness << ", ";
OS << "SEW=";
switch (SEW) {
case SEWEqual:
OS << "SEWEqual";
break;
case SEWGreaterThanOrEqual:
OS << "SEWGreaterThanOrEqual";
break;
case SEWGreaterThanOrEqualAndLessThan64:
OS << "SEWGreaterThanOrEqualAndLessThan64";
break;
case SEWNone:
OS << "SEWNone";
break;
};
OS << ", ";
OS << "LMUL=" << LMUL << ", ";
OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
OS << "TailPolicy=" << TailPolicy << ", ";
OS << "MaskPolicy=" << MaskPolicy;
OS << "}";
}
#endif
};
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_ATTRIBUTE_USED
inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
DF.print(OS);
return OS;
}
#endif
/// Return true if moving from CurVType to NewVType is
/// indistinguishable from the perspective of an instruction (or set
/// of instructions) which use only the Used subfields and properties.
static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
const DemandedFields &Used) {
switch (Used.SEW) {
case DemandedFields::SEWNone:
break;
case DemandedFields::SEWEqual:
if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
return false;
break;
case DemandedFields::SEWGreaterThanOrEqual:
if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
return false;
break;
case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
RISCVVType::getSEW(NewVType) >= 64)
return false;
break;
}
if (Used.LMUL &&
RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
return false;
if (Used.SEWLMULRatio) {
auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
RISCVVType::getVLMUL(CurVType));
auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
RISCVVType::getVLMUL(NewVType));
if (Ratio1 != Ratio2)
return false;
}
if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
RISCVVType::isTailAgnostic(NewVType))
return false;
if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
RISCVVType::isMaskAgnostic(NewVType))
return false;
return true;
}
/// Return the fields and properties demanded by the provided instruction.
DemandedFields getDemanded(const MachineInstr &MI,
const MachineRegisterInfo *MRI,
const RISCVSubtarget *ST) {
// Warning: This function has to work on both the lowered (i.e. post
// emitVSETVLIs) and pre-lowering forms. The main implication of this is
// that it can't use the value of a SEW, VL, or Policy operand as they might
// be stale after lowering.
// Most instructions don't use any of these subfeilds.
DemandedFields Res;
// Start conservative if registers are used
if (MI.isCall() || MI.isInlineAsm() ||
MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
Res.demandVL();
if (MI.isCall() || MI.isInlineAsm() ||
MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
Res.demandVTYPE();
// Start conservative on the unlowered form too
uint64_t TSFlags = MI.getDesc().TSFlags;
if (RISCVII::hasSEWOp(TSFlags)) {
Res.demandVTYPE();
if (RISCVII::hasVLOp(TSFlags))
Res.demandVL();
// Behavior is independent of mask policy.
if (!RISCVII::usesMaskPolicy(TSFlags))
Res.MaskPolicy = false;
}
// Loads and stores with implicit EEW do not demand SEW or LMUL directly.
// They instead demand the ratio of the two which is used in computing
// EMUL, but which allows us the flexibility to change SEW and LMUL
// provided we don't change the ratio.
// Note: We assume that the instructions initial SEW is the EEW encoded
// in the opcode. This is asserted when constructing the VSETVLIInfo.
if (getEEWForLoadStore(MI)) {
Res.SEW = DemandedFields::SEWNone;
Res.LMUL = false;
}
// Store instructions don't use the policy fields.
if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
Res.TailPolicy = false;
Res.MaskPolicy = false;
}
// If this is a mask reg operation, it only cares about VLMAX.
// TODO: Possible extensions to this logic
// * Probably ok if available VLMax is larger than demanded
// * The policy bits can probably be ignored..
if (isMaskRegOp(MI)) {
Res.SEW = DemandedFields::SEWNone;
Res.LMUL = false;
}
// For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
if (isScalarInsertInstr(MI)) {
Res.LMUL = false;
Res.SEWLMULRatio = false;
Res.VLAny = false;
// For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
// need to preserve any other bits and are thus compatible with any larger,
// etype and can disregard policy bits. Warning: It's tempting to try doing
// this for any tail agnostic operation, but we can't as TA requires
// tail lanes to either be the original value or -1. We are writing
// unknown bits to the lanes here.
if (hasUndefinedMergeOp(MI, *MRI)) {
if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
else
Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
Res.TailPolicy = false;
}
}
// vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
if (isScalarExtractInstr(MI)) {
assert(!RISCVII::hasVLOp(TSFlags));
Res.LMUL = false;
Res.SEWLMULRatio = false;
Res.TailPolicy = false;
Res.MaskPolicy = false;
}
return Res;
}
/// Defines the abstract state with which the forward dataflow models the
/// values of the VL and VTYPE registers after insertion.
class VSETVLIInfo {
struct AVLDef {
const MachineInstr *DefMI;
Register DefReg;
};
union {
AVLDef AVLRegDef;
unsigned AVLImm;
};
enum : uint8_t {
Uninitialized,
AVLIsReg,
AVLIsImm,
AVLIsVLMAX,
AVLIsIgnored,
Unknown,
} State = Uninitialized;
// Fields from VTYPE.
RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
uint8_t SEW = 0;
uint8_t TailAgnostic : 1;
uint8_t MaskAgnostic : 1;
uint8_t SEWLMULRatioOnly : 1;
public:
VSETVLIInfo()
: AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
SEWLMULRatioOnly(false) {}
static VSETVLIInfo getUnknown() {
VSETVLIInfo Info;
Info.setUnknown();
return Info;
}
bool isValid() const { return State != Uninitialized; }
void setUnknown() { State = Unknown; }
bool isUnknown() const { return State == Unknown; }
void setAVLRegDef(const MachineInstr *DefMI, Register AVLReg) {
assert(DefMI && AVLReg.isVirtual());
AVLRegDef.DefMI = DefMI;
AVLRegDef.DefReg = AVLReg;
State = AVLIsReg;
}
void setAVLImm(unsigned Imm) {
AVLImm = Imm;
State = AVLIsImm;
}
void setAVLVLMAX() { State = AVLIsVLMAX; }
void setAVLIgnored() { State = AVLIsIgnored; }
bool hasAVLImm() const { return State == AVLIsImm; }
bool hasAVLReg() const { return State == AVLIsReg; }
bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
bool hasAVLIgnored() const { return State == AVLIsIgnored; }
Register getAVLReg() const {
assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
return AVLRegDef.DefReg;
}
unsigned getAVLImm() const {
assert(hasAVLImm());
return AVLImm;
}
const MachineInstr &getAVLDefMI() const {
assert(hasAVLReg() && AVLRegDef.DefMI);
return *AVLRegDef.DefMI;
}
void setAVL(VSETVLIInfo Info) {
assert(Info.isValid());
if (Info.isUnknown())
setUnknown();
else if (Info.hasAVLReg())
setAVLRegDef(&Info.getAVLDefMI(), Info.getAVLReg());
else if (Info.hasAVLVLMAX())
setAVLVLMAX();
else if (Info.hasAVLIgnored())
setAVLIgnored();
else {
assert(Info.hasAVLImm());
setAVLImm(Info.getAVLImm());
}
}
unsigned getSEW() const { return SEW; }
RISCVII::VLMUL getVLMUL() const { return VLMul; }
bool getTailAgnostic() const { return TailAgnostic; }
bool getMaskAgnostic() const { return MaskAgnostic; }
bool hasNonZeroAVL() const {
if (hasAVLImm())
return getAVLImm() > 0;
if (hasAVLReg())
return isNonZeroLoadImmediate(getAVLDefMI());
if (hasAVLVLMAX())
return true;
if (hasAVLIgnored())
return false;
return false;
}
bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const {
if (hasSameAVL(Other))
return true;
return (hasNonZeroAVL() && Other.hasNonZeroAVL());
}
bool hasSameAVL(const VSETVLIInfo &Other) const {
if (hasAVLReg() && Other.hasAVLReg())
return AVLRegDef.DefMI == Other.AVLRegDef.DefMI &&
AVLRegDef.DefReg == Other.AVLRegDef.DefReg;
if (hasAVLImm() && Other.hasAVLImm())
return getAVLImm() == Other.getAVLImm();
if (hasAVLVLMAX())
return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
if (hasAVLIgnored())
return Other.hasAVLIgnored();
return false;
}
void setVTYPE(unsigned VType) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
VLMul = RISCVVType::getVLMUL(VType);
SEW = RISCVVType::getSEW(VType);
TailAgnostic = RISCVVType::isTailAgnostic(VType);
MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
}
void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
VLMul = L;
SEW = S;
TailAgnostic = TA;
MaskAgnostic = MA;
}
void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
unsigned encodeVTYPE() const {
assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
"Can't encode VTYPE for uninitialized or unknown");
return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
}
bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
bool hasSameVTYPE(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!isUnknown() && !Other.isUnknown() &&
"Can't compare VTYPE in unknown state");
assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
"Can't compare when only LMUL/SEW ratio is valid.");
return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
Other.MaskAgnostic);
}
unsigned getSEWLMULRatio() const {
assert(isValid() && !isUnknown() &&
"Can't use VTYPE for uninitialized or unknown");
return RISCVVType::getSEWLMULRatio(SEW, VLMul);
}
// Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
// Note that having the same VLMAX ensures that both share the same
// function from AVL to VL; that is, they must produce the same VL value
// for any given AVL value.
bool hasSameVLMAX(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!isUnknown() && !Other.isUnknown() &&
"Can't compare VTYPE in unknown state");
return getSEWLMULRatio() == Other.getSEWLMULRatio();
}
bool hasCompatibleVTYPE(const DemandedFields &Used,
const VSETVLIInfo &Require) const {
return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
}
// Determine whether the vector instructions requirements represented by
// Require are compatible with the previous vsetvli instruction represented
// by this. MI is the instruction whose requirements we're considering.
bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
const MachineRegisterInfo &MRI) const {
assert(isValid() && Require.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!Require.SEWLMULRatioOnly &&
"Expected a valid VTYPE for instruction!");
// Nothing is compatible with Unknown.
if (isUnknown() || Require.isUnknown())
return false;
// If only our VLMAX ratio is valid, then this isn't compatible.
if (SEWLMULRatioOnly)
return false;
if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
return false;
if (Used.VLZeroness && !hasEquallyZeroAVL(Require))
return false;
return hasCompatibleVTYPE(Used, Require);
}
bool operator==(const VSETVLIInfo &Other) const {
// Uninitialized is only equal to another Uninitialized.
if (!isValid())
return !Other.isValid();
if (!Other.isValid())
return !isValid();
// Unknown is only equal to another Unknown.
if (isUnknown())
return Other.isUnknown();
if (Other.isUnknown())
return isUnknown();
if (!hasSameAVL(Other))
return false;
// If the SEWLMULRatioOnly bits are different, then they aren't equal.
if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
return false;
// If only the VLMAX is valid, check that it is the same.
if (SEWLMULRatioOnly)
return hasSameVLMAX(Other);
// If the full VTYPE is valid, check that it is the same.
return hasSameVTYPE(Other);
}
bool operator!=(const VSETVLIInfo &Other) const {
return !(*this == Other);
}
// Calculate the VSETVLIInfo visible to a block assuming this and Other are
// both predecessors.
VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
// If the new value isn't valid, ignore it.
if (!Other.isValid())
return *this;
// If this value isn't valid, this must be the first predecessor, use it.
if (!isValid())
return Other;
// If either is unknown, the result is unknown.
if (isUnknown() || Other.isUnknown())
return VSETVLIInfo::getUnknown();
// If we have an exact, match return this.
if (*this == Other)
return *this;
// Not an exact match, but maybe the AVL and VLMAX are the same. If so,
// return an SEW/LMUL ratio only value.
if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
VSETVLIInfo MergeInfo = *this;
MergeInfo.SEWLMULRatioOnly = true;
return MergeInfo;
}
// Otherwise the result is unknown.
return VSETVLIInfo::getUnknown();
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Support for debugging, callable in GDB: V->dump()
LLVM_DUMP_METHOD void dump() const {
print(dbgs());
dbgs() << "\n";
}
/// Implement operator<<.
/// @{
void print(raw_ostream &OS) const {
OS << "{";
if (!isValid())
OS << "Uninitialized";
if (isUnknown())
OS << "unknown";
if (hasAVLReg())
OS << "AVLReg=" << (unsigned)getAVLReg();
if (hasAVLImm())
OS << "AVLImm=" << (unsigned)AVLImm;
if (hasAVLVLMAX())
OS << "AVLVLMAX";
if (hasAVLIgnored())
OS << "AVLIgnored";
OS << ", "
<< "VLMul=" << (unsigned)VLMul << ", "
<< "SEW=" << (unsigned)SEW << ", "
<< "TailAgnostic=" << (bool)TailAgnostic << ", "
<< "MaskAgnostic=" << (bool)MaskAgnostic << ", "
<< "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
}
#endif
};
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_ATTRIBUTE_USED
inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
V.print(OS);
return OS;
}
#endif
struct BlockData {
// The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
// block. Calculated in Phase 2.
VSETVLIInfo Exit;
// The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
// blocks. Calculated in Phase 2, and used by Phase 3.
VSETVLIInfo Pred;
// Keeps track of whether the block is already in the queue.
bool InQueue = false;
BlockData() = default;
};
class RISCVInsertVSETVLI : public MachineFunctionPass {
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
std::vector<BlockData> BlockInfo;
std::queue<const MachineBasicBlock *> WorkList;
public:
static char ID;
RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
private:
bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
const VSETVLIInfo &CurInfo) const;
bool needVSETVLIPHI(const VSETVLIInfo &Require,
const MachineBasicBlock &MBB) const;
void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
void insertVSETVLI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
VSETVLIInfo &Info) const;
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
void emitVSETVLIs(MachineBasicBlock &MBB);
void doPRE(MachineBasicBlock &MBB);
void insertReadVL(MachineBasicBlock &MBB);
};
class RISCVCoalesceVSETVLI : public MachineFunctionPass {
public:
static char ID;
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveDebugVariables>();
AU.addPreserved<LiveStacks>();
MachineFunctionPass::getAnalysisUsage(AU);
}
StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }
private:
bool coalesceVSETVLIs(MachineBasicBlock &MBB);
};
} // end anonymous namespace
char RISCVInsertVSETVLI::ID = 0;
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
false, false)
char RISCVCoalesceVSETVLI::ID = 0;
INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
RISCV_COALESCE_VSETVLI_NAME, false, false)
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
// VSETIVLI instruction.
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
VSETVLIInfo NewInfo;
if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
NewInfo.setAVLImm(MI.getOperand(1).getImm());
} else {
assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
MI.getOpcode() == RISCV::PseudoVSETVLIX0);
Register AVLReg = MI.getOperand(1).getReg();
assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
"Can't handle X0, X0 vsetvli yet");
if (AVLReg == RISCV::X0)
NewInfo.setAVLVLMAX();
else
NewInfo.setAVLRegDef(MRI.getVRegDef(AVLReg), AVLReg);
}
NewInfo.setVTYPE(MI.getOperand(2).getImm());
return NewInfo;
}
static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
RISCVII::VLMUL VLMul) {
auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
if (Fractional)
VLEN = VLEN / LMul;
else
VLEN = VLEN * LMul;
return VLEN/SEW;
}
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
const RISCVSubtarget &ST,
const MachineRegisterInfo *MRI) {
VSETVLIInfo InstrInfo;
bool TailAgnostic = true;
bool MaskAgnostic = true;
if (!hasUndefinedMergeOp(MI, *MRI)) {
// Start with undisturbed.
TailAgnostic = false;
MaskAgnostic = false;
// If there is a policy operand, use it.
if (RISCVII::hasVecPolicyOp(TSFlags)) {
const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
uint64_t Policy = Op.getImm();
assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
"Invalid Policy Value");
TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
}
// Some pseudo instructions force a tail agnostic policy despite having a
// tied def.
if (RISCVII::doesForceTailAgnostic(TSFlags))
TailAgnostic = true;
if (!RISCVII::usesMaskPolicy(TSFlags))
MaskAgnostic = true;
}
RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
// A Log2SEW of 0 is an operation on mask registers only.
unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
if (RISCVII::hasVLOp(TSFlags)) {
const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isImm()) {
int64_t Imm = VLOp.getImm();
// Conver the VLMax sentintel to X0 register.
if (Imm == RISCV::VLMaxSentinel) {
// If we know the exact VLEN, see if we can use the constant encoding
// for the VLMAX instead. This reduces register pressure slightly.
const unsigned VLMAX = computeVLMAX(ST.getRealMaxVLen(), SEW, VLMul);
if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31)
InstrInfo.setAVLImm(VLMAX);
else
InstrInfo.setAVLVLMAX();
}
else
InstrInfo.setAVLImm(Imm);
} else {
InstrInfo.setAVLRegDef(MRI->getVRegDef(VLOp.getReg()), VLOp.getReg());
}
} else {
assert(isScalarExtractInstr(MI));
// TODO: If we are more clever about x0,x0 insertion then we should be able
// to deduce that the VL is ignored based off of DemandedFields, and remove
// the AVLIsIgnored state. Then we can just use an arbitrary immediate AVL.
InstrInfo.setAVLIgnored();
}
#ifndef NDEBUG
if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
}
#endif
InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
// If AVL is defined by a vsetvli with the same VLMAX, we can replace the
// AVL operand with the AVL of the defining vsetvli. We avoid general
// register AVLs to avoid extending live ranges without being sure we can
// kill the original source reg entirely.
if (InstrInfo.hasAVLReg()) {
const MachineInstr &DefMI = InstrInfo.getAVLDefMI();
if (isVectorConfigInstr(DefMI)) {
VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(DefMI, *MRI);
if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
(DefInstrInfo.hasAVLImm() || DefInstrInfo.hasAVLVLMAX()))
InstrInfo.setAVL(DefInstrInfo);
}
}
return InstrInfo;
}
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
const VSETVLIInfo &Info,
const VSETVLIInfo &PrevInfo) {
DebugLoc DL = MI.getDebugLoc();
insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
}
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
++NumInsertedVSETVL;
if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
// Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
// VLMAX.
if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
.addReg(RISCV::VL, RegState::Implicit);
return;
}
// If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
// it has the same VLMAX we want and the last VL/VTYPE we observed is the
// same, we can use the X0, X0 form.
if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
const MachineInstr &DefMI = Info.getAVLDefMI();
if (isVectorConfigInstr(DefMI)) {
VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI);
if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
.addReg(RISCV::VL, RegState::Implicit);
return;
}
}
}
}
if (Info.hasAVLImm()) {
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addImm(Info.getAVLImm())
.addImm(Info.encodeVTYPE());
return;
}
if (Info.hasAVLIgnored()) {
// We can only use x0, x0 if there's no chance of the vtype change causing
// the previous vl to become invalid.
if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
Info.hasSameVLMAX(PrevInfo)) {
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
.addReg(RISCV::VL, RegState::Implicit);
return;
}
// Otherwise use an AVL of 1 to avoid depending on previous vl.
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addImm(1)
.addImm(Info.encodeVTYPE());
return;
}
if (Info.hasAVLVLMAX()) {
Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE());
return;
}
Register AVLReg = Info.getAVLReg();
MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(AVLReg)
.addImm(Info.encodeVTYPE());
}
static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
return Fractional || LMul == 1;
}
/// Return true if a VSETVLI is required to transition from CurInfo to Require
/// before MI.
bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
const VSETVLIInfo &Require,
const VSETVLIInfo &CurInfo) const {
assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI));
if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
return true;
DemandedFields Used = getDemanded(MI, MRI, ST);
// A slidedown/slideup with an *undefined* merge op can freely clobber
// elements not copied from the source vector (e.g. masked off, tail, or
// slideup's prefix). Notes:
// * We can't modify SEW here since the slide amount is in units of SEW.
// * VL=1 is special only because we have existing support for zero vs
// non-zero VL. We could generalize this if we had a VL > C predicate.
// * The LMUL1 restriction is for machines whose latency may depend on VL.
// * As above, this is only legal for tail "undefined" not "agnostic".
if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
Used.VLAny = false;
Used.VLZeroness = true;
Used.LMUL = false;
Used.TailPolicy = false;
}
// A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
// semantically as vmv.s.x. This is particularly useful since we don't have an
// immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
// Since a splat is non-constant time in LMUL, we do need to be careful to not
// increase the number of active vector registers (unlike for vmv.s.x.)
if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
Used.LMUL = false;
Used.SEWLMULRatio = false;
Used.VLAny = false;
if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
else
Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
Used.TailPolicy = false;
}
if (CurInfo.isCompatible(Used, Require, *MRI))
return false;
// We didn't find a compatible value. If our AVL is a virtual register,
// it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
// and the last VL/VTYPE we observed is the same, we don't need a
// VSETVLI here.
if (Require.hasAVLReg() && CurInfo.hasCompatibleVTYPE(Used, Require)) {
const MachineInstr &DefMI = Require.getAVLDefMI();
if (isVectorConfigInstr(DefMI)) {
VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI);
if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
return false;
}
}
return true;
}
// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
// places.
static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
DemandedFields &Demanded) {
VSETVLIInfo Info = NewInfo;
if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
!PrevInfo.isUnknown()) {
if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
Info.setVLMul(*NewVLMul);
Demanded.LMUL = true;
}
return Info;
}
// Given an incoming state reaching MI, minimally modifies that state so that it
// is compatible with MI. The resulting state is guaranteed to be semantically
// legal for MI, but may not be the state requested by MI.
void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
const MachineInstr &MI) const {
uint64_t TSFlags = MI.getDesc().TSFlags;
if (!RISCVII::hasSEWOp(TSFlags))
return;
const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI);
assert(NewInfo.isValid() && !NewInfo.isUnknown());
if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
return;
const VSETVLIInfo PrevInfo = Info;
if (!Info.isValid() || Info.isUnknown())
Info = NewInfo;
DemandedFields Demanded = getDemanded(MI, MRI, ST);
const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
// If MI only demands that VL has the same zeroness, we only need to set the
// AVL if the zeroness differs. This removes a vsetvli entirely if the types
// match or allows use of cheaper avl preserving variant if VLMAX doesn't
// change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
// variant, so we avoid the transform to prevent extending live range of an
// avl register operand.
// TODO: We can probably relax this for immediates.
bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo) &&
IncomingInfo.hasSameVLMAX(PrevInfo);
if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
Info.setAVL(IncomingInfo);
Info.setVTYPE(
((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
.getVLMUL(),
((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
// Prefer tail/mask agnostic since it can be relaxed to undisturbed later
// if needed.
(Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
IncomingInfo.getTailAgnostic(),
(Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
IncomingInfo.getMaskAgnostic());
// If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
// the AVL.
if (Info.hasSEWLMULRatioOnly()) {
VSETVLIInfo RatiolessInfo = IncomingInfo;
RatiolessInfo.setAVL(Info);
Info = RatiolessInfo;
}
}
// Given a state with which we evaluated MI (see transferBefore above for why
// this might be different that the state MI requested), modify the state to
// reflect the changes MI might make.
void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
const MachineInstr &MI) const {
if (isVectorConfigInstr(MI)) {
Info = getInfoForVSETVLI(MI, *MRI);
return;
}
if (RISCV::isFaultFirstLoad(MI)) {
// Update AVL to vl-output of the fault first load.
Info.setAVLRegDef(MRI->getVRegDef(MI.getOperand(1).getReg()),
MI.getOperand(1).getReg());
return;
}
// If this is something that updates VL/VTYPE that we don't know about, set
// the state to unknown.
if (MI.isCall() || MI.isInlineAsm() ||
MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
Info = VSETVLIInfo::getUnknown();
}
bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
VSETVLIInfo &Info) const {
bool HadVectorOp = false;
Info = BlockInfo[MBB.getNumber()].Pred;
for (const MachineInstr &MI : MBB) {
transferBefore(Info, MI);
if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
HadVectorOp = true;
transferAfter(Info, MI);
}
return HadVectorOp;
}
void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
BBInfo.InQueue = false;
// Start with the previous entry so that we keep the most conservative state
// we have ever found.
VSETVLIInfo InInfo = BBInfo.Pred;
if (MBB.pred_empty()) {
// There are no predecessors, so use the default starting status.
InInfo.setUnknown();
} else {
for (MachineBasicBlock *P : MBB.predecessors())
InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
}
// If we don't have any valid predecessor value, wait until we do.
if (!InInfo.isValid())
return;
// If no change, no need to rerun block
if (InInfo == BBInfo.Pred)
return;
BBInfo.Pred = InInfo;
LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
<< " changed to " << BBInfo.Pred << "\n");
// Note: It's tempting to cache the state changes here, but due to the
// compatibility checks performed a blocks output state can change based on
// the input state. To cache, we'd have to add logic for finding
// never-compatible state changes.
VSETVLIInfo TmpStatus;
computeVLVTYPEChanges(MBB, TmpStatus);
// If the new exit value matches the old exit value, we don't need to revisit
// any blocks.
if (BBInfo.Exit == TmpStatus)
return;
BBInfo.Exit = TmpStatus;
LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
<< " changed to " << BBInfo.Exit << "\n");
// Add the successors to the work list so we can propagate the changed exit
// status.
for (MachineBasicBlock *S : MBB.successors())
if (!BlockInfo[S->getNumber()].InQueue) {
BlockInfo[S->getNumber()].InQueue = true;
WorkList.push(S);
}
}
// If we weren't able to prove a vsetvli was directly unneeded, it might still
// be unneeded if the AVL is a phi node where all incoming values are VL
// outputs from the last VSETVLI in their respective basic blocks.
bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
const MachineBasicBlock &MBB) const {
if (DisableInsertVSETVLPHIOpt)
return true;
if (!Require.hasAVLReg())
return true;
// We need the AVL to be produce by a PHI node in this basic block.
const MachineInstr *PHI = &Require.getAVLDefMI();
if (PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
return true;
for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
PHIOp += 2) {
Register InReg = PHI->getOperand(PHIOp).getReg();
MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
// We need the PHI input to the be the output of a VSET(I)VLI.
MachineInstr *DefMI = MRI->getVRegDef(InReg);
if (!DefMI || !isVectorConfigInstr(*DefMI))
return true;
// We found a VSET(I)VLI make sure it matches the output of the
// predecessor block.
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, *MRI);
if (DefInfo != PBBExit)
return true;
// Require has the same VL as PBBExit, so if the exit from the
// predecessor has the VTYPE we are looking for we might be able
// to avoid a VSETVLI.
if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
return true;
}
// If all the incoming values to the PHI checked out, we don't need
// to insert a VSETVLI.
return false;
}
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
// Track whether the prefix of the block we've scanned is transparent
// (meaning has not yet changed the abstract state).
bool PrefixTransparent = true;
for (MachineInstr &MI : MBB) {
const VSETVLIInfo PrevInfo = CurInfo;
transferBefore(CurInfo, MI);
// If this is an explicit VSETVLI or VSETIVLI, update our state.
if (isVectorConfigInstr(MI)) {
// Conservatively, mark the VL and VTYPE as live.
assert(MI.getOperand(3).getReg() == RISCV::VL &&
MI.getOperand(4).getReg() == RISCV::VTYPE &&
"Unexpected operands where VL and VTYPE should be");
MI.getOperand(3).setIsDead(false);
MI.getOperand(4).setIsDead(false);
PrefixTransparent = false;
}
uint64_t TSFlags = MI.getDesc().TSFlags;
if (RISCVII::hasSEWOp(TSFlags)) {
if (PrevInfo != CurInfo) {
// If this is the first implicit state change, and the state change
// requested can be proven to produce the same register contents, we
// can skip emitting the actual state change and continue as if we
// had since we know the GPR result of the implicit state change
// wouldn't be used and VL/VTYPE registers are correct. Note that
// we *do* need to model the state as if it changed as while the
// register contents are unchanged, the abstract model can change.
if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
PrefixTransparent = false;
}
if (RISCVII::hasVLOp(TSFlags)) {
MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isReg()) {
Register Reg = VLOp.getReg();
MachineInstr *VLOpDef = MRI->getVRegDef(Reg);
// Erase the AVL operand from the instruction.
VLOp.setReg(RISCV::NoRegister);
VLOp.setIsKill(false);
// If the AVL was an immediate > 31, then it would have been emitted
// as an ADDI. However, the ADDI might not have been used in the
// vsetvli, or a vsetvli might not have been emitted, so it may be
// dead now.
if (VLOpDef && TII->isAddImmediate(*VLOpDef, Reg) &&
MRI->use_nodbg_empty(Reg))
VLOpDef->eraseFromParent();
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
/*isImp*/ true));
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
/*isImp*/ true));
}
if (MI.isCall() || MI.isInlineAsm() ||
MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
PrefixTransparent = false;
transferAfter(CurInfo, MI);
}
// If we reach the end of the block and our current info doesn't match the
// expected info, insert a vsetvli to correct.
if (!UseStrictAsserts) {
const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
CurInfo != ExitInfo) {
// Note there's an implicit assumption here that terminators never use
// or modify VL or VTYPE. Also, fallthrough will return end().
auto InsertPt = MBB.getFirstInstrTerminator();
insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
CurInfo);
CurInfo = ExitInfo;
}
}
if (UseStrictAsserts && CurInfo.isValid()) {
const auto &Info = BlockInfo[MBB.getNumber()];
if (CurInfo != Info.Exit) {
LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
}
assert(CurInfo == Info.Exit &&
"InsertVSETVLI dataflow invariant violated");
}
}
/// Perform simple partial redundancy elimination of the VSETVLI instructions
/// we're about to insert by looking for cases where we can PRE from the
/// beginning of one block to the end of one of its predecessors. Specifically,
/// this is geared to catch the common case of a fixed length vsetvl in a single
/// block loop when it could execute once in the preheader instead.
void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
return;
MachineBasicBlock *UnavailablePred = nullptr;
VSETVLIInfo AvailableInfo;
for (MachineBasicBlock *P : MBB.predecessors()) {
const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
if (PredInfo.isUnknown()) {
if (UnavailablePred)
return;
UnavailablePred = P;
} else if (!AvailableInfo.isValid()) {
AvailableInfo = PredInfo;
} else if (AvailableInfo != PredInfo) {
return;
}
}
// Unreachable, single pred, or full redundancy. Note that FRE is handled by
// phase 3.
if (!UnavailablePred || !AvailableInfo.isValid())
return;
// If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
// the unavailable pred.
if (AvailableInfo.hasSEWLMULRatioOnly())
return;
// Critical edge - TODO: consider splitting?
if (UnavailablePred->succ_size() != 1)
return;
// If the AVL value is a register (other than our VLMAX sentinel),
// we need to prove the value is available at the point we're going
// to insert the vsetvli at.
if (AvailableInfo.hasAVLReg()) {
const MachineInstr *AVLDefMI = &AvailableInfo.getAVLDefMI();
// This is an inline dominance check which covers the case of
// UnavailablePred being the preheader of a loop.
if (AVLDefMI->getParent() != UnavailablePred)
return;
for (auto &TermMI : UnavailablePred->terminators())
if (&TermMI == AVLDefMI)
return;
}
// If the AVL isn't used in its predecessors then bail, since we have no AVL
// to insert a vsetvli with.
if (AvailableInfo.hasAVLIgnored())
return;
// Model the effect of changing the input state of the block MBB to
// AvailableInfo. We're looking for two issues here; one legality,
// one profitability.
// 1) If the block doesn't use some of the fields from VL or VTYPE, we
// may hit the end of the block with a different end state. We can
// not make this change without reflowing later blocks as well.
// 2) If we don't actually remove a transition, inserting a vsetvli
// into the predecessor block would be correct, but unprofitable.
VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
VSETVLIInfo CurInfo = AvailableInfo;
int TransitionsRemoved = 0;
for (const MachineInstr &MI : MBB) {
const VSETVLIInfo LastInfo = CurInfo;
const VSETVLIInfo LastOldInfo = OldInfo;
transferBefore(CurInfo, MI);
transferBefore(OldInfo, MI);
if (CurInfo == LastInfo)
TransitionsRemoved++;
if (LastOldInfo == OldInfo)
TransitionsRemoved--;
transferAfter(CurInfo, MI);
transferAfter(OldInfo, MI);
if (CurInfo == OldInfo)
// Convergence. All transitions after this must match by construction.
break;
}
if (CurInfo != OldInfo || TransitionsRemoved <= 0)
// Issues 1 and 2 above
return;
// Finally, update both data flow state and insert the actual vsetvli.
// Doing both keeps the code in sync with the dataflow results, which
// is critical for correctness of phase 3.
auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
<< UnavailablePred->getName() << " with state "
<< AvailableInfo << "\n");
BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
// Note there's an implicit assumption here that terminators never use
// or modify VL or VTYPE. Also, fallthrough will return end().
auto InsertPt = UnavailablePred->getFirstInstrTerminator();
insertVSETVLI(*UnavailablePred, InsertPt,
UnavailablePred->findDebugLoc(InsertPt),
AvailableInfo, OldExit);
}
// Return true if we can mutate PrevMI to match MI without changing any the
// fields which would be observed.
static bool canMutatePriorConfig(const MachineInstr &PrevMI,
const MachineInstr &MI,
const DemandedFields &Used,
const MachineRegisterInfo &MRI) {
// If the VL values aren't equal, return false if either a) the former is
// demanded, or b) we can't rewrite the former to be the later for
// implementation reasons.
if (!isVLPreservingConfig(MI)) {
if (Used.VLAny)
return false;
if (Used.VLZeroness) {
if (isVLPreservingConfig(PrevMI))
return false;
if (!getInfoForVSETVLI(PrevMI, MRI)
.hasEquallyZeroAVL(getInfoForVSETVLI(MI, MRI)))
return false;
}
auto &AVL = MI.getOperand(1);
auto &PrevAVL = PrevMI.getOperand(1);
// If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
// For now just check that PrevMI uses the same virtual register.
if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
(!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
PrevAVL.getReg() != AVL.getReg()))
return false;
}
assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
auto PriorVType = PrevMI.getOperand(2).getImm();
auto VType = MI.getOperand(2).getImm();
return areCompatibleVTYPEs(PriorVType, VType, Used);
}
bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
// must be considered demanded.
DemandedFields Used;
Used.demandVL();
Used.demandVTYPE();
SmallVector<MachineInstr*> ToDelete;
for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
if (!isVectorConfigInstr(MI)) {
Used.doUnion(getDemanded(MI, MRI, ST));
if (MI.isCall() || MI.isInlineAsm() ||
MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
NextMI = nullptr;
continue;
}
Register RegDef = MI.getOperand(0).getReg();
assert(RegDef == RISCV::X0 || RegDef.isVirtual());
if (RegDef != RISCV::X0 && !MRI->use_nodbg_empty(RegDef))
Used.demandVL();
if (NextMI) {
if (!Used.usedVL() && !Used.usedVTYPE()) {
ToDelete.push_back(&MI);
// Leave NextMI unchanged
continue;
}
if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
if (!isVLPreservingConfig(*NextMI)) {
Register DefReg = NextMI->getOperand(0).getReg();
MI.getOperand(0).setReg(DefReg);
MI.getOperand(0).setIsDead(false);
// The def of DefReg moved to MI, so extend the LiveInterval up to
// it.
if (DefReg.isVirtual()) {
LiveInterval &DefLI = LIS->getInterval(DefReg);
SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
DefLI.addSegment(S);
DefVNI->def = MISlot;
// Mark DefLI as spillable if it was previously unspillable
DefLI.setWeight(0);
// DefReg may have had no uses, in which case we need to shrink
// the LiveInterval up to MI.
LIS->shrinkToUses(&DefLI);
}
Register OldVLReg;
if (MI.getOperand(1).isReg())
OldVLReg = MI.getOperand(1).getReg();
if (NextMI->getOperand(1).isImm())
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
else
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
// Clear NextMI's AVL early so we're not counting it as a use.
if (NextMI->getOperand(1).isReg())
NextMI->getOperand(1).setReg(RISCV::NoRegister);
if (OldVLReg && OldVLReg.isVirtual()) {
// NextMI no longer uses OldVLReg so shrink its LiveInterval.
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
MRI->use_nodbg_empty(OldVLReg)) {
VLOpDef->eraseFromParent();
LIS->removeInterval(OldVLReg);
}
}
MI.setDesc(NextMI->getDesc());
}
MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
ToDelete.push_back(NextMI);
// fallthrough
}
}
NextMI = &MI;
Used = getDemanded(MI, MRI, ST);
}
NumCoalescedVSETVL += ToDelete.size();
for (auto *MI : ToDelete) {
LIS->RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
}
return !ToDelete.empty();
}
void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
MachineInstr &MI = *I++;
if (RISCV::isFaultFirstLoad(MI)) {
Register VLOutput = MI.getOperand(1).getReg();
if (!MRI->use_nodbg_empty(VLOutput))
BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
VLOutput);
// We don't use the vl output of the VLEFF/VLSEGFF anymore.
MI.getOperand(1).setReg(RISCV::X0);
}
}
}
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
ST = &MF.getSubtarget<RISCVSubtarget>();
if (!ST->hasVInstructions())
return false;
LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
TII = ST->getInstrInfo();
MRI = &MF.getRegInfo();
assert(BlockInfo.empty() && "Expect empty block infos");
BlockInfo.resize(MF.getNumBlockIDs());
bool HaveVectorOp = false;
// Phase 1 - determine how VL/VTYPE are affected by the each block.
for (const MachineBasicBlock &MBB : MF) {
VSETVLIInfo TmpStatus;
HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
// Initial exit state is whatever change we found in the block.
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
BBInfo.Exit = TmpStatus;
LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
<< " is " << BBInfo.Exit << "\n");
}
// If we didn't find any instructions that need VSETVLI, we're done.
if (!HaveVectorOp) {
BlockInfo.clear();
return false;
}
// Phase 2 - determine the exit VL/VTYPE from each block. We add all
// blocks to the list here, but will also add any that need to be revisited
// during Phase 2 processing.
for (const MachineBasicBlock &MBB : MF) {
WorkList.push(&MBB);
BlockInfo[MBB.getNumber()].InQueue = true;
}
while (!WorkList.empty()) {
const MachineBasicBlock &MBB = *WorkList.front();
WorkList.pop();
computeIncomingVLVTYPE(MBB);
}
// Perform partial redundancy elimination of vsetvli transitions.
for (MachineBasicBlock &MBB : MF)
doPRE(MBB);
// Phase 3 - add any vsetvli instructions needed in the block. Use the
// Phase 2 information to avoid adding vsetvlis before the first vector
// instruction in the block if the VL/VTYPE is satisfied by its
// predecessors.
for (MachineBasicBlock &MBB : MF)
emitVSETVLIs(MBB);
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
// of VLEFF/VLSEGFF.
for (MachineBasicBlock &MBB : MF)
insertReadVL(MBB);
BlockInfo.clear();
return HaveVectorOp;
}
/// Returns an instance of the Insert VSETVLI pass.
FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
return new RISCVInsertVSETVLI();
}
// Now that all vsetvlis are explicit, go through and do block local
// DSE and peephole based demanded fields based transforms. Note that
// this *must* be done outside the main dataflow so long as we allow
// any cross block analysis within the dataflow. We can't have both
// demanded fields based mutation and non-local analysis in the
// dataflow at the same time without introducing inconsistencies.
bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
ST = &MF.getSubtarget<RISCVSubtarget>();
if (!ST->hasVInstructions())
return false;
TII = ST->getInstrInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
bool Changed = false;
for (MachineBasicBlock &MBB : MF)
Changed |= coalesceVSETVLIs(MBB);
return Changed;
}
FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
return new RISCVCoalesceVSETVLI();
}