| //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the interfaces that VE uses to lower LLVM code into a |
| // selection DAG. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "VEISelLowering.h" |
| #include "MCTargetDesc/VEMCExpr.h" |
| #include "VEMachineFunctionInfo.h" |
| #include "VERegisterInfo.h" |
| #include "VETargetMachine.h" |
| #include "llvm/ADT/StringSwitch.h" |
| #include "llvm/CodeGen/CallingConvLower.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineModuleInfo.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/KnownBits.h" |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "ve-lower" |
| |
| //===----------------------------------------------------------------------===// |
| // Calling Convention Implementation |
| //===----------------------------------------------------------------------===// |
| |
| #include "VEGenCallingConv.inc" |
| |
| bool VETargetLowering::CanLowerReturn( |
| CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, |
| const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
| CCAssignFn *RetCC = RetCC_VE; |
| SmallVector<CCValAssign, 16> RVLocs; |
| CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); |
| return CCInfo.CheckReturn(Outs, RetCC); |
| } |
| |
| SDValue |
| VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
| bool IsVarArg, |
| const SmallVectorImpl<ISD::OutputArg> &Outs, |
| const SmallVectorImpl<SDValue> &OutVals, |
| const SDLoc &DL, SelectionDAG &DAG) const { |
| // CCValAssign - represent the assignment of the return value to locations. |
| SmallVector<CCValAssign, 16> RVLocs; |
| |
| // CCState - Info about the registers and stack slot. |
| CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, |
| *DAG.getContext()); |
| |
| // Analyze return values. |
| CCInfo.AnalyzeReturn(Outs, RetCC_VE); |
| |
| SDValue Flag; |
| SmallVector<SDValue, 4> RetOps(1, Chain); |
| |
| // Copy the result values into the output registers. |
| for (unsigned i = 0; i != RVLocs.size(); ++i) { |
| CCValAssign &VA = RVLocs[i]; |
| assert(VA.isRegLoc() && "Can only return in registers!"); |
| SDValue OutVal = OutVals[i]; |
| |
| // Integer return values must be sign or zero extended by the callee. |
| switch (VA.getLocInfo()) { |
| case CCValAssign::Full: |
| break; |
| case CCValAssign::SExt: |
| OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal); |
| break; |
| case CCValAssign::ZExt: |
| OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal); |
| break; |
| case CCValAssign::AExt: |
| OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal); |
| break; |
| case CCValAssign::BCvt: { |
| // Convert a float return value to i64 with padding. |
| // 63 31 0 |
| // +------+------+ |
| // | float| 0 | |
| // +------+------+ |
| assert(VA.getLocVT() == MVT::i64); |
| assert(VA.getValVT() == MVT::f32); |
| SDValue Undef = SDValue( |
| DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0); |
| SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32); |
| OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, |
| MVT::i64, Undef, OutVal, Sub_f32), |
| 0); |
| break; |
| } |
| default: |
| llvm_unreachable("Unknown loc info!"); |
| } |
| |
| assert(!VA.needsCustom() && "Unexpected custom lowering"); |
| |
| Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag); |
| |
| // Guarantee that all emitted copies are stuck together with flags. |
| Flag = Chain.getValue(1); |
| RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); |
| } |
| |
| RetOps[0] = Chain; // Update chain. |
| |
| // Add the flag if we have it. |
| if (Flag.getNode()) |
| RetOps.push_back(Flag); |
| |
| return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps); |
| } |
| |
| SDValue VETargetLowering::LowerFormalArguments( |
| SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
| const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
| SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
| MachineFunction &MF = DAG.getMachineFunction(); |
| |
| // Get the base offset of the incoming arguments stack space. |
| unsigned ArgsBaseOffset = 176; |
| // Get the size of the preserved arguments area |
| unsigned ArgsPreserved = 64; |
| |
| // Analyze arguments according to CC_VE. |
| SmallVector<CCValAssign, 16> ArgLocs; |
| CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, |
| *DAG.getContext()); |
| // Allocate the preserved area first. |
| CCInfo.AllocateStack(ArgsPreserved, Align(8)); |
| // We already allocated the preserved area, so the stack offset computed |
| // by CC_VE would be correct now. |
| CCInfo.AnalyzeFormalArguments(Ins, CC_VE); |
| |
| for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
| CCValAssign &VA = ArgLocs[i]; |
| if (VA.isRegLoc()) { |
| // This argument is passed in a register. |
| // All integer register arguments are promoted by the caller to i64. |
| |
| // Create a virtual register for the promoted live-in value. |
| unsigned VReg = |
| MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT())); |
| SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT()); |
| |
| // Get the high bits for i32 struct elements. |
| if (VA.getValVT() == MVT::i32 && VA.needsCustom()) |
| Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg, |
| DAG.getConstant(32, DL, MVT::i32)); |
| |
| // The caller promoted the argument, so insert an Assert?ext SDNode so we |
| // won't promote the value again in this function. |
| switch (VA.getLocInfo()) { |
| case CCValAssign::SExt: |
| Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg, |
| DAG.getValueType(VA.getValVT())); |
| break; |
| case CCValAssign::ZExt: |
| Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg, |
| DAG.getValueType(VA.getValVT())); |
| break; |
| case CCValAssign::BCvt: { |
| // Extract a float argument from i64 with padding. |
| // 63 31 0 |
| // +------+------+ |
| // | float| 0 | |
| // +------+------+ |
| assert(VA.getLocVT() == MVT::i64); |
| assert(VA.getValVT() == MVT::f32); |
| SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32); |
| Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, |
| MVT::f32, Arg, Sub_f32), |
| 0); |
| break; |
| } |
| default: |
| break; |
| } |
| |
| // Truncate the register down to the argument type. |
| if (VA.isExtInLoc()) |
| Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); |
| |
| InVals.push_back(Arg); |
| continue; |
| } |
| |
| // The registers are exhausted. This argument was passed on the stack. |
| assert(VA.isMemLoc()); |
| // The CC_VE_Full/Half functions compute stack offsets relative to the |
| // beginning of the arguments area at %fp+176. |
| unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset; |
| unsigned ValSize = VA.getValVT().getSizeInBits() / 8; |
| |
| // Adjust offset for a float argument by adding 4 since the argument is |
| // stored in 8 bytes buffer with offset like below. LLVM generates |
| // 4 bytes load instruction, so need to adjust offset here. This |
| // adjustment is required in only LowerFormalArguments. In LowerCall, |
| // a float argument is converted to i64 first, and stored as 8 bytes |
| // data, which is required by ABI, so no need for adjustment. |
| // 0 4 |
| // +------+------+ |
| // | empty| float| |
| // +------+------+ |
| if (VA.getValVT() == MVT::f32) |
| Offset += 4; |
| |
| int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true); |
| InVals.push_back( |
| DAG.getLoad(VA.getValVT(), DL, Chain, |
| DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())), |
| MachinePointerInfo::getFixedStack(MF, FI))); |
| } |
| |
| if (!IsVarArg) |
| return Chain; |
| |
| // This function takes variable arguments, some of which may have been passed |
| // in registers %s0-%s8. |
| // |
| // The va_start intrinsic needs to know the offset to the first variable |
| // argument. |
| // TODO: need to calculate offset correctly once we support f128. |
| unsigned ArgOffset = ArgLocs.size() * 8; |
| VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); |
| // Skip the 176 bytes of register save area. |
| FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset); |
| |
| return Chain; |
| } |
| |
| // FIXME? Maybe this could be a TableGen attribute on some registers and |
| // this table could be generated automatically from RegInfo. |
| Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT, |
| const MachineFunction &MF) const { |
| Register Reg = StringSwitch<Register>(RegName) |
| .Case("sp", VE::SX11) // Stack pointer |
| .Case("fp", VE::SX9) // Frame pointer |
| .Case("sl", VE::SX8) // Stack limit |
| .Case("lr", VE::SX10) // Link register |
| .Case("tp", VE::SX14) // Thread pointer |
| .Case("outer", VE::SX12) // Outer regiser |
| .Case("info", VE::SX17) // Info area register |
| .Case("got", VE::SX15) // Global offset table register |
| .Case("plt", VE::SX16) // Procedure linkage table register |
| .Default(0); |
| |
| if (Reg) |
| return Reg; |
| |
| report_fatal_error("Invalid register name global variable"); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // TargetLowering Implementation |
| //===----------------------------------------------------------------------===// |
| |
| SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, |
| SmallVectorImpl<SDValue> &InVals) const { |
| SelectionDAG &DAG = CLI.DAG; |
| SDLoc DL = CLI.DL; |
| SDValue Chain = CLI.Chain; |
| auto PtrVT = getPointerTy(DAG.getDataLayout()); |
| |
| // VE target does not yet support tail call optimization. |
| CLI.IsTailCall = false; |
| |
| // Get the base offset of the outgoing arguments stack space. |
| unsigned ArgsBaseOffset = 176; |
| // Get the size of the preserved arguments area |
| unsigned ArgsPreserved = 8 * 8u; |
| |
| // Analyze operands of the call, assigning locations to each operand. |
| SmallVector<CCValAssign, 16> ArgLocs; |
| CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs, |
| *DAG.getContext()); |
| // Allocate the preserved area first. |
| CCInfo.AllocateStack(ArgsPreserved, Align(8)); |
| // We already allocated the preserved area, so the stack offset computed |
| // by CC_VE would be correct now. |
| CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE); |
| |
| // VE requires to use both register and stack for varargs or no-prototyped |
| // functions. |
| bool UseBoth = CLI.IsVarArg; |
| |
| // Analyze operands again if it is required to store BOTH. |
| SmallVector<CCValAssign, 16> ArgLocs2; |
| CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), |
| ArgLocs2, *DAG.getContext()); |
| if (UseBoth) |
| CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2); |
| |
| // Get the size of the outgoing arguments stack space requirement. |
| unsigned ArgsSize = CCInfo.getNextStackOffset(); |
| |
| // Keep stack frames 16-byte aligned. |
| ArgsSize = alignTo(ArgsSize, 16); |
| |
| // Adjust the stack pointer to make room for the arguments. |
| // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls |
| // with more than 6 arguments. |
| Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL); |
| |
| // Collect the set of registers to pass to the function and their values. |
| // This will be emitted as a sequence of CopyToReg nodes glued to the call |
| // instruction. |
| SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; |
| |
| // Collect chains from all the memory opeations that copy arguments to the |
| // stack. They must follow the stack pointer adjustment above and precede the |
| // call instruction itself. |
| SmallVector<SDValue, 8> MemOpChains; |
| |
| // VE needs to get address of callee function in a register |
| // So, prepare to copy it to SX12 here. |
| |
| // If the callee is a GlobalAddress node (quite common, every direct call is) |
| // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. |
| // Likewise ExternalSymbol -> TargetExternalSymbol. |
| SDValue Callee = CLI.Callee; |
| |
| bool IsPICCall = isPositionIndependent(); |
| |
| // PC-relative references to external symbols should go through $stub. |
| // If so, we need to prepare GlobalBaseReg first. |
| const TargetMachine &TM = DAG.getTarget(); |
| const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); |
| const GlobalValue *GV = nullptr; |
| auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee); |
| if (CalleeG) |
| GV = CalleeG->getGlobal(); |
| bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); |
| bool UsePlt = !Local; |
| MachineFunction &MF = DAG.getMachineFunction(); |
| |
| // Turn GlobalAddress/ExternalSymbol node into a value node |
| // containing the address of them here. |
| if (CalleeG) { |
| if (IsPICCall) { |
| if (UsePlt) |
| Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); |
| Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); |
| Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee); |
| } else { |
| Callee = |
| makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); |
| } |
| } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { |
| if (IsPICCall) { |
| if (UsePlt) |
| Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); |
| Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); |
| Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee); |
| } else { |
| Callee = |
| makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); |
| } |
| } |
| |
| RegsToPass.push_back(std::make_pair(VE::SX12, Callee)); |
| |
| for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
| CCValAssign &VA = ArgLocs[i]; |
| SDValue Arg = CLI.OutVals[i]; |
| |
| // Promote the value if needed. |
| switch (VA.getLocInfo()) { |
| default: |
| llvm_unreachable("Unknown location info!"); |
| case CCValAssign::Full: |
| break; |
| case CCValAssign::SExt: |
| Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); |
| break; |
| case CCValAssign::ZExt: |
| Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); |
| break; |
| case CCValAssign::AExt: |
| Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); |
| break; |
| case CCValAssign::BCvt: { |
| // Convert a float argument to i64 with padding. |
| // 63 31 0 |
| // +------+------+ |
| // | float| 0 | |
| // +------+------+ |
| assert(VA.getLocVT() == MVT::i64); |
| assert(VA.getValVT() == MVT::f32); |
| SDValue Undef = SDValue( |
| DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0); |
| SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32); |
| Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, |
| MVT::i64, Undef, Arg, Sub_f32), |
| 0); |
| break; |
| } |
| } |
| |
| if (VA.isRegLoc()) { |
| RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); |
| if (!UseBoth) |
| continue; |
| VA = ArgLocs2[i]; |
| } |
| |
| assert(VA.isMemLoc()); |
| |
| // Create a store off the stack pointer for this argument. |
| SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT); |
| // The argument area starts at %fp+176 in the callee frame, |
| // %sp+176 in ours. |
| SDValue PtrOff = |
| DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL); |
| PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); |
| MemOpChains.push_back( |
| DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo())); |
| } |
| |
| // Emit all stores, make sure they occur before the call. |
| if (!MemOpChains.empty()) |
| Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); |
| |
| // Build a sequence of CopyToReg nodes glued together with token chain and |
| // glue operands which copy the outgoing args into registers. The InGlue is |
| // necessary since all emitted instructions must be stuck together in order |
| // to pass the live physical registers. |
| SDValue InGlue; |
| for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { |
| Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, |
| RegsToPass[i].second, InGlue); |
| InGlue = Chain.getValue(1); |
| } |
| |
| // Build the operands for the call instruction itself. |
| SmallVector<SDValue, 8> Ops; |
| Ops.push_back(Chain); |
| for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) |
| Ops.push_back(DAG.getRegister(RegsToPass[i].first, |
| RegsToPass[i].second.getValueType())); |
| |
| // Add a register mask operand representing the call-preserved registers. |
| const VERegisterInfo *TRI = Subtarget->getRegisterInfo(); |
| const uint32_t *Mask = |
| TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv); |
| assert(Mask && "Missing call preserved mask for calling convention"); |
| Ops.push_back(DAG.getRegisterMask(Mask)); |
| |
| // Make sure the CopyToReg nodes are glued to the call instruction which |
| // consumes the registers. |
| if (InGlue.getNode()) |
| Ops.push_back(InGlue); |
| |
| // Now the call itself. |
| SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
| Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops); |
| InGlue = Chain.getValue(1); |
| |
| // Revert the stack pointer immediately after the call. |
| Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true), |
| DAG.getIntPtrConstant(0, DL, true), InGlue, DL); |
| InGlue = Chain.getValue(1); |
| |
| // Now extract the return values. This is more or less the same as |
| // LowerFormalArguments. |
| |
| // Assign locations to each value returned by this call. |
| SmallVector<CCValAssign, 16> RVLocs; |
| CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs, |
| *DAG.getContext()); |
| |
| // Set inreg flag manually for codegen generated library calls that |
| // return float. |
| if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB) |
| CLI.Ins[0].Flags.setInReg(); |
| |
| RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE); |
| |
| // Copy all of the result registers out of their specified physreg. |
| for (unsigned i = 0; i != RVLocs.size(); ++i) { |
| CCValAssign &VA = RVLocs[i]; |
| unsigned Reg = VA.getLocReg(); |
| |
| // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can |
| // reside in the same register in the high and low bits. Reuse the |
| // CopyFromReg previous node to avoid duplicate copies. |
| SDValue RV; |
| if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1))) |
| if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg) |
| RV = Chain.getValue(0); |
| |
| // But usually we'll create a new CopyFromReg for a different register. |
| if (!RV.getNode()) { |
| RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue); |
| Chain = RV.getValue(1); |
| InGlue = Chain.getValue(2); |
| } |
| |
| // Get the high bits for i32 struct elements. |
| if (VA.getValVT() == MVT::i32 && VA.needsCustom()) |
| RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV, |
| DAG.getConstant(32, DL, MVT::i32)); |
| |
| // The callee promoted the return value, so insert an Assert?ext SDNode so |
| // we won't promote the value again in this function. |
| switch (VA.getLocInfo()) { |
| case CCValAssign::SExt: |
| RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV, |
| DAG.getValueType(VA.getValVT())); |
| break; |
| case CCValAssign::ZExt: |
| RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV, |
| DAG.getValueType(VA.getValVT())); |
| break; |
| case CCValAssign::BCvt: { |
| // Extract a float return value from i64 with padding. |
| // 63 31 0 |
| // +------+------+ |
| // | float| 0 | |
| // +------+------+ |
| assert(VA.getLocVT() == MVT::i64); |
| assert(VA.getValVT() == MVT::f32); |
| SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32); |
| RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, |
| MVT::f32, RV, Sub_f32), |
| 0); |
| break; |
| } |
| default: |
| break; |
| } |
| |
| // Truncate the register down to the return value type. |
| if (VA.isExtInLoc()) |
| RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV); |
| |
| InVals.push_back(RV); |
| } |
| |
| return Chain; |
| } |
| |
| /// isFPImmLegal - Returns true if the target can instruction select the |
| /// specified FP immediate natively. If false, the legalizer will |
| /// materialize the FP immediate as a load from a constant pool. |
| bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
| bool ForCodeSize) const { |
| return VT == MVT::f32 || VT == MVT::f64; |
| } |
| |
| /// Determine if the target supports unaligned memory accesses. |
| /// |
| /// This function returns true if the target allows unaligned memory accesses |
| /// of the specified type in the given address space. If true, it also returns |
| /// whether the unaligned memory access is "fast" in the last argument by |
| /// reference. This is used, for example, in situations where an array |
| /// copy/move/set is converted to a sequence of store operations. Its use |
| /// helps to ensure that such replacements don't generate code that causes an |
| /// alignment error (trap) on the target machine. |
| bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, |
| unsigned AddrSpace, |
| unsigned Align, |
| MachineMemOperand::Flags, |
| bool *Fast) const { |
| if (Fast) { |
| // It's fast anytime on VE |
| *Fast = true; |
| } |
| return true; |
| } |
| |
| bool VETargetLowering::hasAndNot(SDValue Y) const { |
| EVT VT = Y.getValueType(); |
| |
| // VE doesn't have vector and not instruction. |
| if (VT.isVector()) |
| return false; |
| |
| // VE allows different immediate values for X and Y where ~X & Y. |
| // Only simm7 works for X, and only mimm works for Y on VE. However, this |
| // function is used to check whether an immediate value is OK for and-not |
| // instruction as both X and Y. Generating additional instruction to |
| // retrieve an immediate value is no good since the purpose of this |
| // function is to convert a series of 3 instructions to another series of |
| // 3 instructions with better parallelism. Therefore, we return false |
| // for all immediate values now. |
| // FIXME: Change hasAndNot function to have two operands to make it work |
| // correctly with Aurora VE. |
| if (isa<ConstantSDNode>(Y)) |
| return false; |
| |
| // It's ok for generic registers. |
| return true; |
| } |
| |
| VETargetLowering::VETargetLowering(const TargetMachine &TM, |
| const VESubtarget &STI) |
| : TargetLowering(TM), Subtarget(&STI) { |
| // Instructions which use registers as conditionals examine all the |
| // bits (as does the pseudo SELECT_CC expansion). I don't think it |
| // matters much whether it's ZeroOrOneBooleanContent, or |
| // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the |
| // former. |
| setBooleanContents(ZeroOrOneBooleanContent); |
| setBooleanVectorContents(ZeroOrOneBooleanContent); |
| |
| // Set up the register classes. |
| addRegisterClass(MVT::i32, &VE::I32RegClass); |
| addRegisterClass(MVT::i64, &VE::I64RegClass); |
| addRegisterClass(MVT::f32, &VE::F32RegClass); |
| addRegisterClass(MVT::f64, &VE::I64RegClass); |
| |
| /// Load & Store { |
| for (MVT FPVT : MVT::fp_valuetypes()) { |
| for (MVT OtherFPVT : MVT::fp_valuetypes()) { |
| // Turn FP extload into load/fpextend |
| setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand); |
| |
| // Turn FP truncstore into trunc + store. |
| setTruncStoreAction(FPVT, OtherFPVT, Expand); |
| } |
| } |
| |
| // VE doesn't have i1 sign extending load |
| for (MVT VT : MVT::integer_valuetypes()) { |
| setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); |
| setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); |
| setTruncStoreAction(VT, MVT::i1, Expand); |
| } |
| /// } Load & Store |
| |
| // Custom legalize address nodes into LO/HI parts. |
| MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); |
| setOperationAction(ISD::BlockAddress, PtrVT, Custom); |
| setOperationAction(ISD::GlobalAddress, PtrVT, Custom); |
| setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); |
| |
| /// VAARG handling { |
| setOperationAction(ISD::VASTART, MVT::Other, Custom); |
| // VAARG needs to be lowered to access with 8 bytes alignment. |
| setOperationAction(ISD::VAARG, MVT::Other, Custom); |
| // Use the default implementation. |
| setOperationAction(ISD::VACOPY, MVT::Other, Expand); |
| setOperationAction(ISD::VAEND, MVT::Other, Expand); |
| /// } VAARG handling |
| |
| /// Stack { |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); |
| /// } Stack |
| |
| /// Int Ops { |
| for (MVT IntVT : {MVT::i32, MVT::i64}) { |
| // VE has no REM or DIVREM operations. |
| setOperationAction(ISD::UREM, IntVT, Expand); |
| setOperationAction(ISD::SREM, IntVT, Expand); |
| setOperationAction(ISD::SDIVREM, IntVT, Expand); |
| setOperationAction(ISD::UDIVREM, IntVT, Expand); |
| |
| // VE has no MULHU/S or U/SMUL_LOHI operations. |
| // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type. |
| setOperationAction(ISD::MULHU, IntVT, Expand); |
| setOperationAction(ISD::MULHS, IntVT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, IntVT, Expand); |
| setOperationAction(ISD::SMUL_LOHI, IntVT, Expand); |
| |
| // VE has no CTTZ, ROTL, ROTR operations. |
| setOperationAction(ISD::CTTZ, IntVT, Expand); |
| setOperationAction(ISD::ROTL, IntVT, Expand); |
| setOperationAction(ISD::ROTR, IntVT, Expand); |
| |
| // VE has 64 bits instruction which works as i64 BSWAP operation. This |
| // instruction works fine as i32 BSWAP operation with an additional |
| // parameter. Use isel patterns to lower BSWAP. |
| setOperationAction(ISD::BSWAP, IntVT, Legal); |
| |
| // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP |
| // operations. Use isel patterns for i64, promote for i32. |
| LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal; |
| setOperationAction(ISD::BITREVERSE, IntVT, Act); |
| setOperationAction(ISD::CTLZ, IntVT, Act); |
| setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act); |
| setOperationAction(ISD::CTPOP, IntVT, Act); |
| |
| // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations. |
| // Use isel patterns for i64, promote for i32. |
| setOperationAction(ISD::AND, IntVT, Act); |
| setOperationAction(ISD::OR, IntVT, Act); |
| setOperationAction(ISD::XOR, IntVT, Act); |
| } |
| /// } Int Ops |
| |
| /// Conversion { |
| // VE doesn't have instructions for fp<->uint, so expand them by llvm |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64 |
| setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64 |
| setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); |
| |
| // fp16 not supported |
| for (MVT FPVT : MVT::fp_valuetypes()) { |
| setOperationAction(ISD::FP16_TO_FP, FPVT, Expand); |
| setOperationAction(ISD::FP_TO_FP16, FPVT, Expand); |
| } |
| /// } Conversion |
| |
| setStackPointerRegisterToSaveRestore(VE::SX11); |
| |
| // We have target-specific dag combine patterns for the following nodes: |
| setTargetDAGCombine(ISD::TRUNCATE); |
| |
| // Set function alignment to 16 bytes |
| setMinFunctionAlignment(Align(16)); |
| |
| // VE stores all argument by 8 bytes alignment |
| setMinStackArgumentAlignment(Align(8)); |
| |
| computeRegisterProperties(Subtarget->getRegisterInfo()); |
| } |
| |
| const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const { |
| #define TARGET_NODE_CASE(NAME) \ |
| case VEISD::NAME: \ |
| return "VEISD::" #NAME; |
| switch ((VEISD::NodeType)Opcode) { |
| case VEISD::FIRST_NUMBER: |
| break; |
| TARGET_NODE_CASE(Lo) |
| TARGET_NODE_CASE(Hi) |
| TARGET_NODE_CASE(GETFUNPLT) |
| TARGET_NODE_CASE(GETSTACKTOP) |
| TARGET_NODE_CASE(GETTLSADDR) |
| TARGET_NODE_CASE(CALL) |
| TARGET_NODE_CASE(RET_FLAG) |
| TARGET_NODE_CASE(GLOBAL_BASE_REG) |
| } |
| #undef TARGET_NODE_CASE |
| return nullptr; |
| } |
| |
| EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, |
| EVT VT) const { |
| return MVT::i32; |
| } |
| |
| // Convert to a target node and set target flags. |
| SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF, |
| SelectionDAG &DAG) const { |
| if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) |
| return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), |
| GA->getValueType(0), GA->getOffset(), TF); |
| |
| if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) |
| return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(), |
| 0, TF); |
| |
| if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) |
| return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), |
| TF); |
| |
| llvm_unreachable("Unhandled address SDNode"); |
| } |
| |
| // Split Op into high and low parts according to HiTF and LoTF. |
| // Return an ADD node combining the parts. |
| SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, |
| SelectionDAG &DAG) const { |
| SDLoc DL(Op); |
| EVT VT = Op.getValueType(); |
| SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG)); |
| SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG)); |
| return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo); |
| } |
| |
| // Build SDNodes for producing an address from a GlobalAddress, ConstantPool, |
| // or ExternalSymbol SDNode. |
| SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { |
| SDLoc DL(Op); |
| EVT PtrVT = Op.getValueType(); |
| |
| // Handle PIC mode first. VE needs a got load for every variable! |
| if (isPositionIndependent()) { |
| // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this |
| // function has calls. |
| MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
| MFI.setHasCalls(true); |
| auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op); |
| |
| if (isa<ConstantPoolSDNode>(Op) || |
| (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) { |
| // Create following instructions for local linkage PIC code. |
| // lea %s35, %gotoff_lo(.LCPI0_0) |
| // and %s35, %s35, (32)0 |
| // lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35) |
| // adds.l %s35, %s15, %s35 ; %s15 is GOT |
| // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) |
| SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32, |
| VEMCExpr::VK_VE_GOTOFF_LO32, DAG); |
| SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT); |
| return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo); |
| } |
| // Create following instructions for not local linkage PIC code. |
| // lea %s35, %got_lo(.LCPI0_0) |
| // and %s35, %s35, (32)0 |
| // lea.sl %s35, %got_hi(.LCPI0_0)(%s35) |
| // adds.l %s35, %s15, %s35 ; %s15 is GOT |
| // ld %s35, (,%s35) |
| // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) |
| SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32, |
| VEMCExpr::VK_VE_GOT_LO32, DAG); |
| SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT); |
| SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo); |
| return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr, |
| MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
| } |
| |
| // This is one of the absolute code models. |
| switch (getTargetMachine().getCodeModel()) { |
| default: |
| llvm_unreachable("Unsupported absolute code model"); |
| case CodeModel::Small: |
| case CodeModel::Medium: |
| case CodeModel::Large: |
| // abs64. |
| return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); |
| } |
| } |
| |
| /// Custom Lower { |
| |
| SDValue VETargetLowering::LowerGlobalAddress(SDValue Op, |
| SelectionDAG &DAG) const { |
| return makeAddress(Op, DAG); |
| } |
| |
| SDValue VETargetLowering::LowerBlockAddress(SDValue Op, |
| SelectionDAG &DAG) const { |
| return makeAddress(Op, DAG); |
| } |
| |
| SDValue |
| VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op, |
| SelectionDAG &DAG) const { |
| SDLoc dl(Op); |
| |
| // Generate the following code: |
| // t1: ch,glue = callseq_start t0, 0, 0 |
| // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1 |
| // t3: ch,glue = callseq_end t2, 0, 0, t2:2 |
| // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1 |
| SDValue Label = withTargetFlags(Op, 0, DAG); |
| EVT PtrVT = Op.getValueType(); |
| |
| // Lowering the machine isd will make sure everything is in the right |
| // location. |
| SDValue Chain = DAG.getEntryNode(); |
| SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
| const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask( |
| DAG.getMachineFunction(), CallingConv::C); |
| Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl); |
| SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)}; |
| Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args); |
| Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true), |
| DAG.getIntPtrConstant(0, dl, true), |
| Chain.getValue(1), dl); |
| Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1)); |
| |
| // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls. |
| MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
| MFI.setHasCalls(true); |
| |
| // Also generate code to prepare a GOT register if it is PIC. |
| if (isPositionIndependent()) { |
| MachineFunction &MF = DAG.getMachineFunction(); |
| Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); |
| } |
| |
| return Chain; |
| } |
| |
| SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op, |
| SelectionDAG &DAG) const { |
| // The current implementation of nld (2.26) doesn't allow local exec model |
| // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always |
| // generate the general dynamic model code sequence. |
| // |
| // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf |
| return LowerToTLSGeneralDynamicModel(Op, DAG); |
| } |
| |
| SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { |
| MachineFunction &MF = DAG.getMachineFunction(); |
| VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); |
| auto PtrVT = getPointerTy(DAG.getDataLayout()); |
| |
| // Need frame address to find the address of VarArgsFrameIndex. |
| MF.getFrameInfo().setFrameAddressIsTaken(true); |
| |
| // vastart just stores the address of the VarArgsFrameIndex slot into the |
| // memory location argument. |
| SDLoc DL(Op); |
| SDValue Offset = |
| DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT), |
| DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL)); |
| const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
| return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1), |
| MachinePointerInfo(SV)); |
| } |
| |
| SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { |
| SDNode *Node = Op.getNode(); |
| EVT VT = Node->getValueType(0); |
| SDValue InChain = Node->getOperand(0); |
| SDValue VAListPtr = Node->getOperand(1); |
| EVT PtrVT = VAListPtr.getValueType(); |
| const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); |
| SDLoc DL(Node); |
| SDValue VAList = |
| DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV)); |
| SDValue Chain = VAList.getValue(1); |
| SDValue NextPtr; |
| |
| if (VT == MVT::f32) { |
| // float --> need special handling like below. |
| // 0 4 |
| // +------+------+ |
| // | empty| float| |
| // +------+------+ |
| // Increment the pointer, VAList, by 8 to the next vaarg. |
| NextPtr = |
| DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); |
| // Then, adjust VAList. |
| unsigned InternalOffset = 4; |
| VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, |
| DAG.getConstant(InternalOffset, DL, PtrVT)); |
| } else { |
| // Increment the pointer, VAList, by 8 to the next vaarg. |
| NextPtr = |
| DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); |
| } |
| |
| // Store the incremented VAList to the legalized pointer. |
| InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV)); |
| |
| // Load the actual argument out of the pointer VAList. |
| // We can't count on greater alignment than the word size. |
| return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(), |
| std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8); |
| } |
| |
| SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, |
| SelectionDAG &DAG) const { |
| // Generate following code. |
| // (void)__llvm_grow_stack(size); |
| // ret = GETSTACKTOP; // pseudo instruction |
| SDLoc DL(Op); |
| |
| // Get the inputs. |
| SDNode *Node = Op.getNode(); |
| SDValue Chain = Op.getOperand(0); |
| SDValue Size = Op.getOperand(1); |
| MaybeAlign Alignment(Op.getConstantOperandVal(2)); |
| EVT VT = Node->getValueType(0); |
| |
| // Chain the dynamic stack allocation so that it doesn't modify the stack |
| // pointer when other instructions are using the stack. |
| Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); |
| |
| const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); |
| Align StackAlign = TFI.getStackAlign(); |
| bool NeedsAlign = Alignment.valueOrOne() > StackAlign; |
| |
| // Prepare arguments |
| TargetLowering::ArgListTy Args; |
| TargetLowering::ArgListEntry Entry; |
| Entry.Node = Size; |
| Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); |
| Args.push_back(Entry); |
| if (NeedsAlign) { |
| Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT); |
| Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); |
| Args.push_back(Entry); |
| } |
| Type *RetTy = Type::getVoidTy(*DAG.getContext()); |
| |
| EVT PtrVT = Op.getValueType(); |
| SDValue Callee; |
| if (NeedsAlign) { |
| Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0); |
| } else { |
| Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0); |
| } |
| |
| TargetLowering::CallLoweringInfo CLI(DAG); |
| CLI.setDebugLoc(DL) |
| .setChain(Chain) |
| .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args)) |
| .setDiscardResult(true); |
| std::pair<SDValue, SDValue> pair = LowerCallTo(CLI); |
| Chain = pair.second; |
| SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain); |
| if (NeedsAlign) { |
| Result = DAG.getNode(ISD::ADD, DL, VT, Result, |
| DAG.getConstant((Alignment->value() - 1ULL), DL, VT)); |
| Result = DAG.getNode(ISD::AND, DL, VT, Result, |
| DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT)); |
| } |
| // Chain = Result.getValue(1); |
| Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), |
| DAG.getIntPtrConstant(0, DL, true), SDValue(), DL); |
| |
| SDValue Ops[2] = {Result, Chain}; |
| return DAG.getMergeValues(Ops, DL); |
| } |
| |
| SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { |
| switch (Op.getOpcode()) { |
| default: |
| llvm_unreachable("Should not custom lower this!"); |
| case ISD::BlockAddress: |
| return LowerBlockAddress(Op, DAG); |
| case ISD::DYNAMIC_STACKALLOC: |
| return lowerDYNAMIC_STACKALLOC(Op, DAG); |
| case ISD::GlobalAddress: |
| return LowerGlobalAddress(Op, DAG); |
| case ISD::GlobalTLSAddress: |
| return LowerGlobalTLSAddress(Op, DAG); |
| case ISD::VASTART: |
| return LowerVASTART(Op, DAG); |
| case ISD::VAARG: |
| return LowerVAARG(Op, DAG); |
| } |
| } |
| /// } Custom Lower |
| |
| static bool isI32Insn(const SDNode *User, const SDNode *N) { |
| switch (User->getOpcode()) { |
| default: |
| return false; |
| case ISD::ADD: |
| case ISD::SUB: |
| case ISD::MUL: |
| case ISD::SDIV: |
| case ISD::UDIV: |
| case ISD::SETCC: |
| case ISD::SMIN: |
| case ISD::SMAX: |
| case ISD::SHL: |
| case ISD::SRA: |
| case ISD::BSWAP: |
| case ISD::SINT_TO_FP: |
| case ISD::UINT_TO_FP: |
| case ISD::BR_CC: |
| case ISD::BITCAST: |
| case ISD::ATOMIC_CMP_SWAP: |
| case ISD::ATOMIC_SWAP: |
| return true; |
| case ISD::SRL: |
| if (N->getOperand(0).getOpcode() != ISD::SRL) |
| return true; |
| // (srl (trunc (srl ...))) may be optimized by combining srl, so |
| // doesn't optimize trunc now. |
| return false; |
| case ISD::SELECT_CC: |
| if (User->getOperand(2).getNode() != N && |
| User->getOperand(3).getNode() != N) |
| return true; |
| LLVM_FALLTHROUGH; |
| case ISD::AND: |
| case ISD::OR: |
| case ISD::XOR: |
| case ISD::SELECT: |
| case ISD::CopyToReg: |
| // Check all use of selections, bit operations, and copies. If all of them |
| // are safe, optimize truncate to extract_subreg. |
| for (SDNode::use_iterator UI = User->use_begin(), UE = User->use_end(); |
| UI != UE; ++UI) { |
| switch ((*UI)->getOpcode()) { |
| default: |
| // If the use is an instruction which treats the source operand as i32, |
| // it is safe to avoid truncate here. |
| if (isI32Insn(*UI, N)) |
| continue; |
| break; |
| case ISD::ANY_EXTEND: |
| case ISD::SIGN_EXTEND: |
| case ISD::ZERO_EXTEND: { |
| // Special optimizations to the combination of ext and trunc. |
| // (ext ... (select ... (trunc ...))) is safe to avoid truncate here |
| // since this truncate instruction clears higher 32 bits which is filled |
| // by one of ext instructions later. |
| assert(N->getValueType(0) == MVT::i32 && |
| "find truncate to not i32 integer"); |
| if (User->getOpcode() == ISD::SELECT_CC || |
| User->getOpcode() == ISD::SELECT) |
| continue; |
| break; |
| } |
| } |
| return false; |
| } |
| return true; |
| } |
| } |
| |
| // Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is |
| // sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td |
| // is sometime too late. So, doing it at here. |
| SDValue VETargetLowering::combineTRUNCATE(SDNode *N, |
| DAGCombinerInfo &DCI) const { |
| assert(N->getOpcode() == ISD::TRUNCATE && |
| "Should be called with a TRUNCATE node"); |
| |
| SelectionDAG &DAG = DCI.DAG; |
| SDLoc DL(N); |
| EVT VT = N->getValueType(0); |
| |
| // We prefer to do this when all types are legal. |
| if (!DCI.isAfterLegalizeDAG()) |
| return SDValue(); |
| |
| // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant. |
| if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC && |
| isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) && |
| isa<ConstantSDNode>(N->getOperand(0)->getOperand(1))) |
| return SDValue(); |
| |
| // Check all use of this TRUNCATE. |
| for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; |
| ++UI) { |
| SDNode *User = *UI; |
| |
| // Make sure that we're not going to replace TRUNCATE for non i32 |
| // instructions. |
| // |
| // FIXME: Although we could sometimes handle this, and it does occur in |
| // practice that one of the condition inputs to the select is also one of |
| // the outputs, we currently can't deal with this. |
| if (isI32Insn(User, N)) |
| continue; |
| |
| return SDValue(); |
| } |
| |
| SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32); |
| return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, |
| N->getOperand(0), SubI32), |
| 0); |
| } |
| |
| SDValue VETargetLowering::PerformDAGCombine(SDNode *N, |
| DAGCombinerInfo &DCI) const { |
| switch (N->getOpcode()) { |
| default: |
| break; |
| case ISD::TRUNCATE: |
| return combineTRUNCATE(N, DCI); |
| } |
| |
| return SDValue(); |
| } |