blob: 368e2100031f35d9706d217c3abfa5fdc540d989 [file] [log] [blame]
//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This implements the TargetLowering class.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <cctype>
using namespace llvm;
/// NOTE: The TargetMachine owns TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm)
: TargetLoweringBase(tm) {}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
bool TargetLowering::isPositionIndependent() const {
return getTargetMachine().isPositionIndependent();
}
/// Check whether a given call node is in tail position within its function. If
/// so, it sets Chain to the input chain of the tail call.
bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
SDValue &Chain) const {
const Function &F = DAG.getMachineFunction().getFunction();
// First, check if tail calls have been disabled in this function.
if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
return false;
// Conservatively require the attributes of the call to match those of
// the return. Ignore NoAlias and NonNull because they don't affect the
// call sequence.
AttributeList CallerAttrs = F.getAttributes();
if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
.removeAttribute(Attribute::NoAlias)
.removeAttribute(Attribute::NonNull)
.hasAttributes())
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
return false;
// Check if the only use is a function return node.
return isUsedByReturnOnly(Node, Chain);
}
bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
const uint32_t *CallerPreservedMask,
const SmallVectorImpl<CCValAssign> &ArgLocs,
const SmallVectorImpl<SDValue> &OutVals) const {
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
const CCValAssign &ArgLoc = ArgLocs[I];
if (!ArgLoc.isRegLoc())
continue;
Register Reg = ArgLoc.getLocReg();
// Only look at callee saved registers.
if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
continue;
// Check that we pass the value used for the caller.
// (We look for a CopyFromReg reading a virtual register that is used
// for the function live-in value of register Reg)
SDValue Value = OutVals[I];
if (Value->getOpcode() != ISD::CopyFromReg)
return false;
unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
if (MRI.getLiveInPhysReg(ArgReg) != Reg)
return false;
}
return true;
}
/// Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
unsigned ArgIdx) {
IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
Alignment = Call->getParamAlignment(ArgIdx);
ByValType = nullptr;
if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
ByValType = Call->getParamByValType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
/// result of type RetVT.
std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
ArrayRef<SDValue> Ops,
MakeLibCallOptions CallOptions,
const SDLoc &dl,
SDValue InChain) const {
if (!InChain)
InChain = DAG.getEntryNode();
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0; i < Ops.size(); ++i) {
SDValue NewOp = Ops[i];
Entry.Node = NewOp;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
CallOptions.IsSExt);
Entry.IsZExt = !Entry.IsSExt;
if (CallOptions.IsSoften &&
!shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
Entry.IsSExt = Entry.IsZExt = false;
}
Args.push_back(Entry);
}
if (LC == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported library call operation!");
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
bool zeroExtend = !signExtend;
if (CallOptions.IsSoften &&
!shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
signExtend = zeroExtend = false;
}
CLI.setDebugLoc(dl)
.setChain(InChain)
.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setNoReturn(CallOptions.DoesNotReturn)
.setDiscardResult(!CallOptions.IsReturnValueUsed)
.setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
.setSExtResult(signExtend)
.setZExtResult(zeroExtend);
return LowerCallTo(CLI);
}
bool
TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset,
bool ZeroMemset,
bool MemcpyStrSrc,
bool AllowOverlap,
unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes) const {
// If 'SrcAlign' is zero, that means the memory operation does not need to
// load the value, i.e. memset or memcpy from constant string. Otherwise,
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
// is the specified alignment of the memory operation. If it is zero, that
// means it's possible to change the alignment of the destination.
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
// not need to be loaded.
if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
return false;
EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
IsMemset, ZeroMemset, MemcpyStrSrc,
FuncAttributes);
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
VT = MVT::i64;
while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
!allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());
// Find the largest legal integer type.
MVT LVT = MVT::i64;
while (!isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());
// If the type we've chosen is larger than the largest legal integer type
// then use that instead.
if (VT.bitsGT(LVT))
VT = LVT;
}
unsigned NumMemOps = 0;
while (Size != 0) {
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
EVT NewVT = VT;
unsigned NewVTSize;
bool Found = false;
if (VT.isVector() || VT.isFloatingPoint()) {
NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
isSafeMemOpType(NewVT.getSimpleVT()))
Found = true;
else if (NewVT == MVT::i64 &&
isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
isSafeMemOpType(MVT::f64)) {
// i64 is usually not legal on 32-bit targets, but f64 may be.
NewVT = MVT::f64;
Found = true;
}
}
if (!Found) {
do {
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
if (NewVT == MVT::i8)
break;
} while (!isSafeMemOpType(NewVT.getSimpleVT()));
}
NewVTSize = NewVT.getSizeInBits() / 8;
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;
if (NumMemOps && AllowOverlap && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
MachineMemOperand::MONone, &Fast) &&
Fast)
VTSize = Size;
else {
VT = NewVT;
VTSize = NewVTSize;
}
}
if (++NumMemOps > Limit)
return false;
MemOps.push_back(VT);
Size -= VTSize;
}
return true;
}
/// Soften the operands of a comparison. This code is shared among BR_CC,
/// SELECT_CC, and SETCC handlers.
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
const SDLoc &dl, const SDValue OldLHS,
const SDValue OldRHS) const {
SDValue Chain;
return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
OldRHS, Chain);
}
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
const SDLoc &dl, const SDValue OldLHS,
const SDValue OldRHS,
SDValue &Chain,
bool IsSignaling) const {
// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
// not supporting it. We can update this code when libgcc provides such
// functions.
assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
&& "Unsupported setcc type!");
// Expand into one or more soft-fp libcall(s).
RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
bool ShouldInvertCC = false;
switch (CCCode) {
case ISD::SETEQ:
case ISD::SETOEQ:
LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
(VT == MVT::f64) ? RTLIB::OEQ_F64 :
(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
break;
case ISD::SETNE:
case ISD::SETUNE:
LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
(VT == MVT::f64) ? RTLIB::UNE_F64 :
(VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
break;
case ISD::SETGE:
case ISD::SETOGE:
LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
(VT == MVT::f64) ? RTLIB::OGE_F64 :
(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
break;
case ISD::SETLT:
case ISD::SETOLT:
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
(VT == MVT::f64) ? RTLIB::OLT_F64 :
(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
break;
case ISD::SETLE:
case ISD::SETOLE:
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
(VT == MVT::f64) ? RTLIB::OLE_F64 :
(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
break;
case ISD::SETGT:
case ISD::SETOGT:
LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
(VT == MVT::f64) ? RTLIB::OGT_F64 :
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
case ISD::SETO:
ShouldInvertCC = true;
LLVM_FALLTHROUGH;
case ISD::SETUO:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
break;
case ISD::SETONE:
// SETONE = O && UNE
ShouldInvertCC = true;
LLVM_FALLTHROUGH;
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
(VT == MVT::f64) ? RTLIB::OEQ_F64 :
(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
break;
default:
// Invert CC for unordered comparisons
ShouldInvertCC = true;
switch (CCCode) {
case ISD::SETULT:
LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
(VT == MVT::f64) ? RTLIB::OGE_F64 :
(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
break;
case ISD::SETULE:
LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
(VT == MVT::f64) ? RTLIB::OGT_F64 :
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
case ISD::SETUGT:
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
(VT == MVT::f64) ? RTLIB::OLE_F64 :
(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
break;
case ISD::SETUGE:
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
(VT == MVT::f64) ? RTLIB::OLT_F64 :
(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
break;
default: llvm_unreachable("Do not know how to soften this setcc!");
}
}
// Use the target specific return value for comparions lib calls.
EVT RetVT = getCmpLibcallReturnType();
SDValue Ops[2] = {NewLHS, NewRHS};
TargetLowering::MakeLibCallOptions CallOptions;
EVT OpsVT[2] = { OldLHS.getValueType(),
OldRHS.getValueType() };
CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
NewLHS = Call.first;
NewRHS = DAG.getConstant(0, dl, RetVT);
CCCode = getCmpLibcallCC(LC1);
if (ShouldInvertCC) {
assert(RetVT.isInteger());
CCCode = getSetCCInverse(CCCode, RetVT);
}
if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
// Update Chain.
Chain = Call.second;
} else {
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
CCCode = getCmpLibcallCC(LC2);
if (ShouldInvertCC)
CCCode = getSetCCInverse(CCCode, RetVT);
NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
if (Chain)
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
Call2.second);
NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
Tmp.getValueType(), Tmp, NewLHS);
NewRHS = SDValue();
}
}
/// Return the entry encoding for a jump table in the current function. The
/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
unsigned TargetLowering::getJumpTableEncoding() const {
// In non-pic modes, just use the address of a block.
if (!isPositionIndependent())
return MachineJumpTableInfo::EK_BlockAddress;
// In PIC mode, if the target supports a GPRel32 directive, use it.
if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
return MachineJumpTableInfo::EK_GPRel32BlockAddress;
// Otherwise, use a label difference.
return MachineJumpTableInfo::EK_LabelDifference32;
}
SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
// If our PIC model is GP relative, use the global offset table as the base.
unsigned JTEncoding = getJumpTableEncoding();
if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
(JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
return Table;
}
/// This returns the relocation base for the given PIC jumptable, the same as
/// getPICJumpTableRelocBase, but as an MCExpr.
const MCExpr *
TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI,MCContext &Ctx) const{
// The normal PIC reloc base is the label at the start of the jump table.
return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
}
bool
TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
const TargetMachine &TM = getTargetMachine();
const GlobalValue *GV = GA->getGlobal();
// If the address is not even local to this DSO we will have to load it from
// a got and then add the offset.
if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return false;
// If the code is position independent we will have to add a base register.
if (isPositionIndependent())
return false;
// Otherwise we can do it.
return true;
}
//===----------------------------------------------------------------------===//
// Optimization Methods
//===----------------------------------------------------------------------===//
/// If the specified instruction has a constant integer operand and there are
/// bits set in that constant that are not demanded, then clear those bits and
/// return true.
bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
TargetLoweringOpt &TLO) const {
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
// Do target-specific constant optimization.
if (targetShrinkDemandedConstant(Op, Demanded, TLO))
return TLO.New.getNode();
// FIXME: ISD::SELECT, ISD::SELECT_CC
switch (Opcode) {
default:
break;
case ISD::XOR:
case ISD::AND:
case ISD::OR: {
auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!Op1C)
return false;
// If this is a 'not' op, don't touch it because that's a canonical form.
const APInt &C = Op1C->getAPIntValue();
if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
return false;
if (!C.isSubsetOf(Demanded)) {
EVT VT = Op.getValueType();
SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
}
break;
}
}
return false;
}
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
/// generalized for targets with other types of implicit widening casts.
bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
const APInt &Demanded,
TargetLoweringOpt &TLO) const {
assert(Op.getNumOperands() == 2 &&
"ShrinkDemandedOp only supports binary operators!");
assert(Op.getNode()->getNumValues() == 1 &&
"ShrinkDemandedOp only supports nodes with one result!");
SelectionDAG &DAG = TLO.DAG;
SDLoc dl(Op);
// Early return, as this function cannot handle vector types.
if (Op.getValueType().isVector())
return false;
// Don't do this if the node has another user, which may require the
// full value.
if (!Op.getNode()->hasOneUse())
return false;
// Search for the smallest integer type with free casts to and from
// Op's type. For expedience, just check power-of-2 integer types.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned DemandedSize = Demanded.getActiveBits();
unsigned SmallVTBits = DemandedSize;
if (!isPowerOf2_32(SmallVTBits))
SmallVTBits = NextPowerOf2(SmallVTBits);
for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
TLI.isZExtFree(SmallVT, Op.getValueType())) {
// We found a type with free casts.
SDValue X = DAG.getNode(
Op.getOpcode(), dl, SmallVT,
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
return TLO.CombineTo(Op, Z);
}
}
return false;
}
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
KnownBits Known;
bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
if (Simplified) {
DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
}
return Simplified;
}
bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
KnownBits &Known,
TargetLoweringOpt &TLO,
unsigned Depth,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
AssumeSingleUse);
}
// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
// TODO: Under what circumstances can we create nodes? Constant folding?
SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
SelectionDAG &DAG, unsigned Depth) const {
// Limit search depth.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return SDValue();
// Ignore UNDEFs.
if (Op.isUndef())
return SDValue();
// Not demanding any bits/elts from Op.
if (DemandedBits == 0 || DemandedElts == 0)
return DAG.getUNDEF(Op.getValueType());
unsigned NumElts = DemandedElts.getBitWidth();
KnownBits LHSKnown, RHSKnown;
switch (Op.getOpcode()) {
case ISD::BITCAST: {
SDValue Src = peekThroughBitcasts(Op.getOperand(0));
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
if (NumSrcEltBits == NumDstEltBits)
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
// TODO - bigendian once we have test coverage.
if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
if (!Sub.isNullValue()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
DemandedSrcElts.setBit((j * Scale) + i);
}
}
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
}
// TODO - bigendian once we have test coverage.
if ((NumSrcEltBits % NumDstEltBits) == 0 &&
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / NumDstEltBits;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * NumDstEltBits;
DemandedSrcBits.insertBits(DemandedBits, Offset);
DemandedSrcElts.setBit(i / Scale);
}
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
}
break;
}
case ISD::AND: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
// context.
if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
return Op.getOperand(0);
if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
return Op.getOperand(1);
break;
}
case ISD::OR: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
// context.
if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
return Op.getOperand(0);
if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
return Op.getOperand(1);
break;
}
case ISD::XOR: {
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If all of the demanded bits are known zero on one side, return the
// other.
if (DemandedBits.isSubsetOf(RHSKnown.Zero))
return Op.getOperand(0);
if (DemandedBits.isSubsetOf(LHSKnown.Zero))
return Op.getOperand(1);
break;
}
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// If (1) we only need the sign-bit, (2) the setcc operands are the same
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() &&
getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
// if we don't care about FP signed-zero. The use of SETLT with FP means
// that we don't care about NaNs.
if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
(isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
return Op0;
}
break;
}
case ISD::SIGN_EXTEND_INREG: {
// If none of the extended bits are demanded, eliminate the sextinreg.
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
return Op.getOperand(0);
break;
}
case ISD::INSERT_VECTOR_ELT: {
// If we don't demand the inserted element, return the base vector.
SDValue Vec = Op.getOperand(0);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
EVT VecVT = Vec.getValueType();
if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
!DemandedElts[CIdx->getZExtValue()])
return Vec;
break;
}
case ISD::VECTOR_SHUFFLE: {
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// If all the demanded elts are from one operand and are inline,
// then we can use the operand directly.
bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
for (unsigned i = 0; i != NumElts; ++i) {
int M = ShuffleMask[i];
if (M < 0 || !DemandedElts[i])
continue;
AllUndef = false;
IdentityLHS &= (M == (int)i);
IdentityRHS &= ((M - NumElts) == i);
}
if (AllUndef)
return DAG.getUNDEF(Op.getValueType());
if (IdentityLHS)
return Op.getOperand(0);
if (IdentityRHS)
return Op.getOperand(1);
break;
}
default:
if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
Op, DemandedBits, DemandedElts, DAG, Depth))
return V;
break;
}
return SDValue();
}
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
/// original and new nodes in Old and New. Otherwise, analyze the expression and
/// return a mask of Known bits for the expression (used to simplify the
/// caller). The Known bits may only be accurate for those bits in the
/// OriginalDemandedBits and OriginalDemandedElts.
bool TargetLowering::SimplifyDemandedBits(
SDValue Op, const APInt &OriginalDemandedBits,
const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
unsigned Depth, bool AssumeSingleUse) const {
unsigned BitWidth = OriginalDemandedBits.getBitWidth();
assert(Op.getScalarValueSizeInBits() == BitWidth &&
"Mask size mismatches value type size!");
unsigned NumElts = OriginalDemandedElts.getBitWidth();
assert((!Op.getValueType().isVector() ||
NumElts == Op.getValueType().getVectorNumElements()) &&
"Unexpected vector size");
APInt DemandedBits = OriginalDemandedBits;
APInt DemandedElts = OriginalDemandedElts;
SDLoc dl(Op);
auto &DL = TLO.DAG.getDataLayout();
// Don't know anything.
Known = KnownBits(BitWidth);
// Undef operand.
if (Op.isUndef())
return false;
if (Op.getOpcode() == ISD::Constant) {
// We know all of the bits for a constant!
Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
Known.Zero = ~Known.One;
return false;
}
// Other users may use these bits.
EVT VT = Op.getValueType();
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
if (Depth != 0) {
// If not at the root, Just compute the Known bits to
// simplify things downstream.
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false;
}
// If this is the root being simplified, allow it to have multiple uses,
// just set the DemandedBits/Elts to all bits.
DemandedBits = APInt::getAllOnesValue(BitWidth);
DemandedElts = APInt::getAllOnesValue(NumElts);
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
} else if (Depth >= SelectionDAG::MaxRecursionDepth) {
// Limit search depth.
return false;
}
KnownBits Known2, KnownOut;
switch (Op.getOpcode()) {
case ISD::TargetConstant:
llvm_unreachable("Can't simplify this node");
case ISD::SCALAR_TO_VECTOR: {
if (!DemandedElts[0])
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
KnownBits SrcKnown;
SDValue Src = Op.getOperand(0);
unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
return true;
Known = SrcKnown.zextOrTrunc(BitWidth, false);
break;
}
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every demanded element.
// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
if (getTargetConstantFromLoad(LD)) {
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
}
break;
}
case ISD::INSERT_VECTOR_ELT: {
SDValue Vec = Op.getOperand(0);
SDValue Scl = Op.getOperand(1);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
EVT VecVT = Vec.getValueType();
// If index isn't constant, assume we need all vector elements AND the
// inserted element.
APInt DemandedVecElts(DemandedElts);
if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
unsigned Idx = CIdx->getZExtValue();
DemandedVecElts.clearBit(Idx);
// Inserted element is not required.
if (!DemandedElts[Idx])
return TLO.CombineTo(Op, Vec);
}
KnownBits KnownScl;
unsigned NumSclBits = Scl.getScalarValueSizeInBits();
APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
return true;
Known = KnownScl.zextOrTrunc(BitWidth, false);
KnownBits KnownVec;
if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
Depth + 1))
return true;
if (!!DemandedVecElts) {
Known.One &= KnownVec.One;
Known.Zero &= KnownVec.Zero;
}
return false;
}
case ISD::INSERT_SUBVECTOR: {
SDValue Base = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
EVT SubVT = Sub.getValueType();
unsigned NumSubElts = SubVT.getVectorNumElements();
// If index isn't constant, assume we need the original demanded base
// elements and ALL the inserted subvector elements.
APInt BaseElts = DemandedElts;
APInt SubElts = APInt::getAllOnesValue(NumSubElts);
if (isa<ConstantSDNode>(Op.getOperand(2))) {
const APInt &Idx = Op.getConstantOperandAPInt(2);
if (Idx.ule(NumElts - NumSubElts)) {
unsigned SubIdx = Idx.getZExtValue();
SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
}
}
KnownBits KnownSub, KnownBase;
if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
Depth + 1))
return true;
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!SubElts) {
Known.One &= KnownSub.One;
Known.Zero &= KnownSub.Zero;
}
if (!!BaseElts) {
Known.One &= KnownBase.One;
Known.Zero &= KnownBase.Zero;
}
break;
}
case ISD::EXTRACT_SUBVECTOR: {
// If index isn't constant, assume we need all the source vector elements.
SDValue Src = Op.getOperand(0);
ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
}
if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
return true;
break;
}
case ISD::CONCAT_VECTORS: {
Known.Zero.setAllBits();
Known.One.setAllBits();
EVT SubVT = Op.getOperand(0).getValueType();
unsigned NumSubVecs = Op.getNumOperands();
unsigned NumSubElts = SubVT.getVectorNumElements();
for (unsigned i = 0; i != NumSubVecs; ++i) {
APInt DemandedSubElts =
DemandedElts.extractBits(NumSubElts, i * NumSubElts);
if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
Known2, TLO, Depth + 1))
return true;
// Known bits are shared by every demanded subvector element.
if (!!DemandedSubElts) {
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
}
break;
}
case ISD::VECTOR_SHUFFLE: {
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
// Collect demanded elements from shuffle operands..
APInt DemandedLHS(NumElts, 0);
APInt DemandedRHS(NumElts, 0);
for (unsigned i = 0; i != NumElts; ++i) {
if (!DemandedElts[i])
continue;
int M = ShuffleMask[i];
if (M < 0) {
// For UNDEF elements, we don't know anything about the common state of
// the shuffle result.
DemandedLHS.clearAllBits();
DemandedRHS.clearAllBits();
break;
}
assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
if (M < (int)NumElts)
DemandedLHS.setBit(M);
else
DemandedRHS.setBit(M - NumElts);
}
if (!!DemandedLHS || !!DemandedRHS) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!DemandedLHS) {
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
Depth + 1))
return true;
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
if (!!DemandedRHS) {
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
Depth + 1))
return true;
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
// Attempt to avoid multi-use ops if we don't need anything from them.
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
return TLO.CombineTo(Op, NewOp);
}
}
break;
}
case ISD::AND: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
// If the RHS is a constant, check to see if the LHS would be zero without
// using the bits from the RHS. Below, we use knowledge about the RHS to
// simplify the LHS, here we're using information from the LHS to simplify
// the RHS.
if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
// Do not increment Depth here; that can cause an infinite loop.
KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
// If the LHS already has zeros where RHSC does, this 'and' is dead.
if ((LHSKnown.Zero & DemandedBits) ==
(~RHSC->getAPIntValue() & DemandedBits))
return TLO.CombineTo(Op, Op0);
// If any of the set bits in the RHS are known zero on the LHS, shrink
// the constant.
if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
return true;
// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
// constant, but if this 'and' is only clearing bits that were just set by
// the xor, then this 'and' can be eliminated by shrinking the mask of
// the xor. For example, for a 32-bit X:
// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
LHSKnown.One == ~RHSC->getAPIntValue()) {
SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
return TLO.CombineTo(Op, Xor);
}
}
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
return TLO.CombineTo(Op, Op0);
if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
return TLO.CombineTo(Op, Op1);
// If all of the demanded bits in the inputs are known zeros, return zero.
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
return true;
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// Output known-1 bits are only known if set in both the LHS & RHS.
Known.One &= Known2.One;
// Output known-0 are known to be clear if zero in either the LHS | RHS.
Known.Zero |= Known2.Zero;
break;
}
case ISD::OR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
return TLO.CombineTo(Op, Op0);
if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
return TLO.CombineTo(Op, Op1);
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
return true;
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// Output known-0 bits are only known if clear in both the LHS & RHS.
Known.Zero &= Known2.Zero;
// Output known-1 are known to be set if set in either the LHS | RHS.
Known.One |= Known2.One;
break;
}
case ISD::XOR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
return TLO.CombineTo(Op, NewOp);
}
}
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if (DemandedBits.isSubsetOf(Known.Zero))
return TLO.CombineTo(Op, Op0);
if (DemandedBits.isSubsetOf(Known2.Zero))
return TLO.CombineTo(Op, Op1);
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// If all of the unknown bits are known to be zero on one side or the other
// (but not both) turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
// Output known-0 bits are known if clear or set in both the LHS & RHS.
KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
// If one side is a constant, and all of the known set bits on the other
// side are also set in the constant, turn this into an AND, as we know
// the bits will be cleared.
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
// NB: it is okay if more bits are known than are requested
if (C->getAPIntValue() == Known2.One) {
SDValue ANDC =
TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
}
// If the RHS is a constant, see if we can change it. Don't alter a -1
// constant because that's a 'not' op, and that is better for combining
// and codegen.
if (!C->isAllOnesValue()) {
if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
// We're flipping all demanded bits. Flip the undemanded bits too.
SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
return TLO.CombineTo(Op, New);
}
// If we can't turn this into a 'not', try to shrink the constant.
if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
return true;
}
}
Known = std::move(KnownOut);
break;
}
case ISD::SELECT:
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
return true;
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
break;
case ISD::SELECT_CC:
if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
return true;
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
break;
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// If (1) we only need the sign-bit, (2) the setcc operands are the same
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
// if we don't care about FP signed-zero. The use of SETLT with FP means
// that we don't care about NaNs.
if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
(isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
return TLO.CombineTo(Op, Op0);
// TODO: Should we check for other forms of sign-bit comparisons?
// Examples: X <= -1, X >= 0
}
if (getBooleanContents(Op0.getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
Known.Zero.setBitsFrom(1);
break;
}
case ISD::SHL: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
if (ConstantSDNode *SA2 =
isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SHL;
int Diff = ShAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SRL;
}
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
}
if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
Known, TLO, Depth + 1))
return true;
// Try shrinking the operation as long as the shift amount will still be
// in range.
if ((ShAmt < DemandedBits.getActiveBits()) &&
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
// are not demanded. This will likely allow the anyext to be folded away.
if (Op0.getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = Op0.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
unsigned InnerBits = InnerVT.getScalarSizeInBits();
if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT, DL);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
ShTy = InnerVT;
SDValue NarrowShl =
TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
TLO.DAG.getConstant(ShAmt, dl, ShTy));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
// Repeat the SHL optimization above in cases where an extension
// intervenes: (shl (anyext (shr x, c1)), c2) to
// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
InnerOp.hasOneUse()) {
if (ConstantSDNode *SA2 =
isConstOrConstSplat(InnerOp.getOperand(1))) {
unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
DemandedBits.getActiveBits() <=
(InnerBits - InnerShAmt + ShAmt) &&
DemandedBits.countTrailingZeros() >= ShAmt) {
SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
Op1.getValueType());
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
InnerOp.getOperand(0));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
}
}
}
}
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
// low bits known zero.
Known.Zero.setLowBits(ShAmt);
}
break;
}
case ISD::SRL: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
EVT ShiftVT = Op1.getValueType();
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (Op->getFlags().hasExact())
InDemandedMask.setLowBits(ShAmt);
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SA2 =
isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
if (!DemandedBits.intersects(
APInt::getHighBitsSet(BitWidth, ShAmt))) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SRL;
int Diff = ShAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SHL;
}
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
}
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
Known.Zero.setHighBits(ShAmt); // High bits known zero.
}
break;
}
case ISD::SRA: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
// If this is an arithmetic shift right and only the low-bit is set, we can
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
if (DemandedBits.isOneValue())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (Op->getFlags().hasExact())
InDemandedMask.setLowBits(ShAmt);
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
if (DemandedBits.countLeadingZeros() < ShAmt)
InDemandedMask.setSignBit();
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (Known.Zero[BitWidth - ShAmt - 1] ||
DemandedBits.countLeadingZeros() >= ShAmt) {
SDNodeFlags Flags;
Flags.setExact(Op->getFlags().hasExact());
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
}
int Log2 = DemandedBits.exactLogBase2();
if (Log2 >= 0) {
// The bit must come from the sign.
SDValue NewSA =
TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
}
if (Known.One[BitWidth - ShAmt - 1])
// New bits are known one.
Known.One.setHighBits(ShAmt);
}
break;
}
case ISD::FSHL:
case ISD::FSHR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
unsigned Amt = SA->getAPIntValue().urem(BitWidth);
// For fshl, 0-shift returns the 1st arg.
// For fshr, 0-shift returns the 2nd arg.
if (Amt == 0) {
if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
Known, TLO, Depth + 1))
return true;
break;
}
// fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
// fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
Depth + 1))
return true;
if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
Depth + 1))
return true;
Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known.One |= Known2.One;
Known.Zero |= Known2.Zero;
}
break;
}
case ISD::BITREVERSE: {
SDValue Src = Op.getOperand(0);
APInt DemandedSrcBits = DemandedBits.reverseBits();
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
Known.One = Known2.One.reverseBits();
Known.Zero = Known2.Zero.reverseBits();
break;
}
case ISD::BSWAP: {
SDValue Src = Op.getOperand(0);
APInt DemandedSrcBits = DemandedBits.byteSwap();
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
Known.One = Known2.One.byteSwap();
Known.Zero = Known2.Zero.byteSwap();
break;
}
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned ExVTBits = ExVT.getScalarSizeInBits();
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
// Compute the correct shift amount type, which must be getShiftAmountTy
// for scalar types after legalization.
EVT ShiftAmtTy = VT;
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
SDValue ShiftAmt =
TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
}
}
// If none of the extended bits are demanded, eliminate the sextinreg.
if (DemandedBits.getActiveBits() <= ExVTBits)
return TLO.CombineTo(Op, Op0);
APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
// Since the sign extended bits are demanded, we know that the sign
// bit is demanded.
InputDemandedBits.setBit(ExVTBits - 1);
if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
// If the input sign bit is known zero, convert this into a zero extension.
if (Known.Zero[ExVTBits - 1])
return TLO.CombineTo(
Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
if (Known.One[ExVTBits - 1]) { // Input sign bit known set
Known.One.setBitsFrom(ExVTBits);
Known.Zero &= Mask;
} else { // Input sign bit unknown
Known.Zero &= Mask;
Known.One &= Mask;
}
break;
}
case ISD::BUILD_PAIR: {
EVT HalfVT = Op.getOperand(0).getValueType();
unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
KnownBits KnownLo, KnownHi;
if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
return true;
Known.Zero = KnownLo.Zero.zext(BitWidth) |
KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
Known.One = KnownLo.One.zext(BitWidth) |
KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
break;
}
case ISD::ZERO_EXTEND:
case ISD::ZERO_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
if (IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
TLO.DAG.getDataLayout().isLittleEndian())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
break;
}
case ISD::SIGN_EXTEND:
case ISD::SIGN_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
if (IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
TLO.DAG.getDataLayout().isLittleEndian())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
InDemandedBits.setBit(InBits - 1);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
// If the sign bit is known one, the top bits match.
Known = Known.sext(BitWidth);
// If the sign bit is known zero, convert this to a zero extend.
if (Known.isNonNegative()) {
unsigned Opc =
IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
break;
}
case ISD::ANY_EXTEND:
case ISD::ANY_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
// If we only need the bottom element then we can just bitcast.
// TODO: Handle ANY_EXTEND?
if (IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
TLO.DAG.getDataLayout().isLittleEndian())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.zext(BitWidth, false /* => any extend */);
break;
}
case ISD::TRUNCATE: {
SDValue Src = Op.getOperand(0);
// Simplify the input, using demanded bit information, and compute the known
// zero/one bits live out.
unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
APInt TruncMask = DemandedBits.zext(OperandBitWidth);
if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
return true;
Known = Known.trunc(BitWidth);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
if (Src.getNode()->hasOneUse()) {
switch (Src.getOpcode()) {
default:
break;
case ISD::SRL:
// Shrink SRL by a constant if none of the high bits shifted in are
// demanded.
if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
// undesirable.
break;
SDValue ShAmt = Src.getOperand(1);
auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
break;
uint64_t ShVal = ShAmtC->getZExtValue();
APInt HighBits =
APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
HighBits.lshrInPlace(ShVal);
HighBits = HighBits.trunc(BitWidth);
if (!(HighBits & DemandedBits)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
if (TLO.LegalTypes())
ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
SDValue NewTrunc =
TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt));
}
break;
}
}
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
case ISD::AssertZext: {
// AssertZext demands all of the high bits, plus any of the low bits
// demanded by its users.
EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero |= ~InMask;
break;
}
case ISD::EXTRACT_VECTOR_ELT: {
SDValue Src = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
unsigned EltBitWidth = Src.getScalarValueSizeInBits();
// Demand the bits from every vector element without a constant index.
APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
if (CIdx->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
// anything about the extended bits.
APInt DemandedSrcBits = DemandedBits;
if (BitWidth > EltBitWidth)
DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
Depth + 1))
return true;
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedSrcBits.isAllOnesValue() ||
!DemandedSrcElts.isAllOnesValue()) {
if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
SDValue NewOp =
TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
return TLO.CombineTo(Op, NewOp);
}
}
Known = Known2;
if (BitWidth > EltBitWidth)
Known = Known.zext(BitWidth, false /* => any extend */);
break;
}
case ISD::BITCAST: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
// If this is an FP->Int bitcast and if the sign bit is the only
// thing demanded, turn this into a FGETSIGN.
if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
SrcVT.isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
SrcVT != MVT::f128) {
// Cannot eliminate/lower SHL for f128 yet.
EVT Ty = OpVTLegal ? VT : MVT::i32;
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
unsigned OpVTSizeInBits = Op.getValueSizeInBits();
if (!OpVTLegal && OpVTSizeInBits > 32)
Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
unsigned ShVal = Op.getValueSizeInBits() - 1;
SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
}
}
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
// Demand the elt/bit if any of the original elts/bits are demanded.
// TODO - bigendian once we have test coverage.
if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
if (!Sub.isNullValue()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
DemandedSrcElts.setBit((j * Scale) + i);
}
}
APInt KnownSrcUndef, KnownSrcZero;
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
KnownSrcZero, TLO, Depth + 1))
return true;
KnownBits KnownSrcBits;
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
} else if ((NumSrcEltBits % BitWidth) == 0 &&
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / BitWidth;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * BitWidth;
DemandedSrcBits.insertBits(DemandedBits, Offset);
DemandedSrcElts.setBit(i / Scale);
}
if (SrcVT.isVector()) {
APInt KnownSrcUndef, KnownSrcZero;
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
KnownSrcZero, TLO, Depth + 1))
return true;
}
KnownBits KnownSrcBits;
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
}
// If this is a bitcast, let computeKnownBits handle it. Only do this on a
// recursive call where Known may be useful to the caller.
if (Depth > 0) {
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false;
}
break;
}
case ISD::ADD:
case ISD::MUL:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
SDNodeFlags Flags = Op.getNode()->getFlags();
unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
Depth + 1) ||
SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
SDValue NewOp =
TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
return TLO.CombineTo(Op, NewOp);
}
return true;
}
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0 || DemandedOp1) {
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
Op0 = DemandedOp0 ? DemandedOp0 : Op0;
Op1 = DemandedOp1 ? DemandedOp1 : Op1;
SDValue NewOp =
TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
return TLO.CombineTo(Op, NewOp);
}
}
// If we have a constant operand, we may be able to turn it into -1 if we
// do not demand the high bits. This can make the constant smaller to
// encode, allow more general folding, or match specialized instruction
// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
// is probably not useful (and could be detrimental).
ConstantSDNode *C = isConstOrConstSplat(Op1);
APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
if (C && !C->isAllOnesValue() && !C->isOne() &&
(C->getAPIntValue() | HighMask).isAllOnesValue()) {
SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
return TLO.CombineTo(Op, NewOp);
}
LLVM_FALLTHROUGH;
}
default:
if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
Known, TLO, Depth))
return true;
break;
}
// Just use computeKnownBits to compute output bits.
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
// If we know the value of all of the demanded bits, return this as a
// constant.
if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
// Avoid folding to a constant if any OpaqueConstant is involved.
const SDNode *N = Op.getNode();
for (SDNodeIterator I = SDNodeIterator::begin(N),
E = SDNodeIterator::end(N);
I != E; ++I) {
SDNode *Op = *I;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
if (C->isOpaque())
return false;
}
// TODO: Handle float bits as well.
if (VT.isInteger())
return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
}
return false;
}
bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
const APInt &DemandedElts,
APInt &KnownUndef,
APInt &KnownZero,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
bool Simplified =
SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
if (Simplified) {
DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
}
return Simplified;
}
/// Given a vector binary operation and known undefined elements for each input
/// operand, compute whether each element of the output is undefined.
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
const APInt &UndefOp0,
const APInt &UndefOp1) {
EVT VT = BO.getValueType();
assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
"Vector binop only");
EVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
assert(UndefOp0.getBitWidth() == NumElts &&
UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
const APInt &UndefVals) {
if (UndefVals[Index])
return DAG.getUNDEF(EltVT);
if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
// Try hard to make sure that the getNode() call is not creating temporary
// nodes. Ignore opaque integers because they do not constant fold.
SDValue Elt = BV->getOperand(Index);
auto *C = dyn_cast<ConstantSDNode>(Elt);
if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
return Elt;
}
return SDValue();
};
APInt KnownUndef = APInt::getNullValue(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
// If both inputs for this element are either constant or undef and match
// the element type, compute the constant/undef result for this element of
// the vector.
// TODO: Ideally we would use FoldConstantArithmetic() here, but that does
// not handle FP constants. The code within getNode() should be refactored
// to avoid the danger of creating a bogus temporary node here.
SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
KnownUndef.setBit(i);
}
return KnownUndef;
}
bool TargetLowering::SimplifyDemandedVectorElts(
SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
APInt DemandedElts = OriginalDemandedElts;
unsigned NumElts = DemandedElts.getBitWidth();
assert(VT.isVector() && "Expected vector op");
assert(VT.getVectorNumElements() == NumElts &&
"Mask size mismatches value type element count!");
KnownUndef = KnownZero = APInt::getNullValue(NumElts);
// Undef operand.
if (Op.isUndef()) {
KnownUndef.setAllBits();
return false;
}
// If Op has other users, assume that all elements are needed.
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
DemandedElts.setAllBits();
// Not demanding any elements from Op.
if (DemandedElts == 0) {
KnownUndef.setAllBits();
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
}
// Limit search depth.
if (Depth >= SelectionDAG::MaxRecursionDepth)
return false;
SDLoc DL(Op);
unsigned EltSizeInBits = VT.getScalarSizeInBits();
switch (Op.getOpcode()) {
case ISD::SCALAR_TO_VECTOR: {
if (!DemandedElts[0]) {
KnownUndef.setAllBits();
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
}
KnownUndef.setHighBits(NumElts - 1);
break;
}
case ISD::BITCAST: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
// We only handle vectors here.
// TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
if (!SrcVT.isVector())
break;
// Fast handling of 'identity' bitcasts.
unsigned NumSrcElts = SrcVT.getVectorNumElements();
if (NumSrcElts == NumElts)
return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1);
APInt SrcZero, SrcUndef;
APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
// Bitcast from 'large element' src vector to 'small element' vector, we
// must demand a source element if any DemandedElt maps to it.
if ((NumElts % NumSrcElts) == 0) {
unsigned Scale = NumElts / NumSrcElts;
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i])
SrcDemandedElts.setBit(i / Scale);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
// Try calling SimplifyDemandedBits, converting demanded elts to the bits
// of the large element.
// TODO - bigendian once we have test coverage.
if (TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Ofs = (i % Scale) * EltSizeInBits;
SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
}
KnownBits Known;
if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
return true;
}
// If the src element is zero/undef then all the output elements will be -
// only demanded elements are guaranteed to be correct.
for (unsigned i = 0; i != NumSrcElts; ++i) {
if (SrcDemandedElts[i]) {