llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp - third_party/github.com/llvm/llvm-project - Git at Google

 //===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This is a RISC-V specific version of CodeGenPrepare.
 // It munges the code in the input function to better prepare it for
 // SelectionDAG-based code generation. This works around limitations in it's
 // basic-block-at-a-time approach.
 //
 //===----------------------------------------------------------------------===//

 #include "RISCV.h"
 #include "RISCVTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"

 using namespace llvm;

 #define DEBUG_TYPE "riscv-codegenprepare"
 #define PASS_NAME "RISC-V CodeGenPrepare"

 namespace {

 class RISCVCodeGenPrepare : public FunctionPass,
                             public InstVisitor<RISCVCodeGenPrepare, bool> {
   const DataLayout *DL;
   const RISCVSubtarget *ST;

 public:
   static char ID;

   RISCVCodeGenPrepare() : FunctionPass(ID) {}

   bool runOnFunction(Function &F) override;

   StringRef getPassName() const override { return PASS_NAME; }

   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
     AU.addRequired<TargetPassConfig>();
   }

   bool visitInstruction(Instruction &I) { return false; }
   bool visitAnd(BinaryOperator &BO);
   bool visitIntrinsicInst(IntrinsicInst &I);
 };

 } // end anonymous namespace

 // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
 // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
 // the upper 32 bits with ones.
 bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
   if (!ST->is64Bit())
     return false;

   if (!BO.getType()->isIntegerTy(64))
     return false;

   using namespace PatternMatch;

   // Left hand side should be a zext nneg.
   Value *LHSSrc;
   if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc))))
     return false;

   if (!LHSSrc->getType()->isIntegerTy(32))
     return false;

   // Right hand side should be a constant.
   Value *RHS = BO.getOperand(1);

   auto *CI = dyn_cast<ConstantInt>(RHS);
   if (!CI)
     return false;
   uint64_t C = CI->getZExtValue();

   // Look for constants that fit in 32 bits but not simm12, and can be made
   // into simm12 by sign extending bit 31. This will allow use of ANDI.
   // TODO: Is worth making simm32?
   if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C)))
     return false;

   // Sign extend the constant and replace the And operand.
   C = SignExtend64<32>(C);
   BO.setOperand(1, ConstantInt::get(RHS->getType(), C));

   return true;
 }

 // LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector
 // reduction instructions write the result in the first element of a vector
 // register. So when a reduction in a loop uses a scalar phi, we end up with
 // unnecessary scalar moves:
 //
 // loop:
 // vfmv.s.f v10, fa0
 // vfredosum.vs v8, v8, v10
 // vfmv.f.s fa0, v8
 //
 // This mainly affects ordered fadd reductions, since other types of reduction
 // typically use element-wise vectorisation in the loop body. This tries to
 // vectorize any scalar phis that feed into a fadd reduction:
 //
 // loop:
 // %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]
 // %acc = call float @llvm.vector.reduce.fadd.nxv4f32(float %phi, <vscale x 2 x float> %vec)
 //
 // ->
 //
 // loop:
 // %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]
 // %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0
 // %acc = call float @llvm.vector.reduce.fadd.nxv4f32(float %x, <vscale x 2 x float> %vec)
 // %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0
 //
 // Which eliminates the scalar -> vector -> scalar crossing during instruction
 // selection.
 bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
   if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
     return false;

   auto *PHI = dyn_cast<PHINode>(I.getOperand(0));
   if (!PHI || !PHI->hasOneUse() ||
       !llvm::is_contained(PHI->incoming_values(), &I))
     return false;

   Type *VecTy = I.getOperand(1)->getType();
   IRBuilder<> Builder(PHI);
   auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues());

   for (auto *BB : PHI->blocks()) {
     Builder.SetInsertPoint(BB->getTerminator());
     Value *InsertElt = Builder.CreateInsertElement(
         VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0);
     VecPHI->addIncoming(InsertElt, BB);
   }

   Builder.SetInsertPoint(&I);
   I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0));

   PHI->eraseFromParent();

   return true;
 }

 bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;

   auto &TPC = getAnalysis<TargetPassConfig>();
   auto &TM = TPC.getTM<RISCVTargetMachine>();
   ST = &TM.getSubtarget<RISCVSubtarget>(F);

   DL = &F.getParent()->getDataLayout();

   bool MadeChange = false;
   for (auto &BB : F)
     for (Instruction &I : llvm::make_early_inc_range(BB))
       MadeChange |= visit(I);

   return MadeChange;
 }

 INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
 INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)

 char RISCVCodeGenPrepare::ID = 0;

 FunctionPass *llvm::createRISCVCodeGenPreparePass() {
   return new RISCVCodeGenPrepare();
 }
	//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This is a RISC-V specific version of CodeGenPrepare.
	// It munges the code in the input function to better prepare it for
	// SelectionDAG-based code generation. This works around limitations in it's
	// basic-block-at-a-time approach.
	//
	//===----------------------------------------------------------------------===//

	#include "RISCV.h"
	#include "RISCVTargetMachine.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/CodeGen/TargetPassConfig.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/InstVisitor.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/Pass.h"

	using namespace llvm;

	#define DEBUG_TYPE "riscv-codegenprepare"
	#define PASS_NAME "RISC-V CodeGenPrepare"

	namespace {

	class RISCVCodeGenPrepare : public FunctionPass,
	public InstVisitor<RISCVCodeGenPrepare, bool> {
	const DataLayout *DL;
	const RISCVSubtarget *ST;

	public:
	static char ID;

	RISCVCodeGenPrepare() : FunctionPass(ID) {}

	bool runOnFunction(Function &F) override;

	StringRef getPassName() const override { return PASS_NAME; }

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.setPreservesCFG();
	AU.addRequired<TargetPassConfig>();
	}

	bool visitInstruction(Instruction &I) { return false; }
	bool visitAnd(BinaryOperator &BO);
	bool visitIntrinsicInst(IntrinsicInst &I);
	};

	} // end anonymous namespace

	// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
	// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
	// the upper 32 bits with ones.
	bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
	if (!ST->is64Bit())
	return false;

	if (!BO.getType()->isIntegerTy(64))
	return false;

	using namespace PatternMatch;

	// Left hand side should be a zext nneg.
	Value *LHSSrc;
	if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc))))
	return false;

	if (!LHSSrc->getType()->isIntegerTy(32))
	return false;

	// Right hand side should be a constant.
	Value *RHS = BO.getOperand(1);

	auto *CI = dyn_cast<ConstantInt>(RHS);
	if (!CI)
	return false;
	uint64_t C = CI->getZExtValue();

	// Look for constants that fit in 32 bits but not simm12, and can be made
	// into simm12 by sign extending bit 31. This will allow use of ANDI.
	// TODO: Is worth making simm32?
	if (!isUInt<32>(C) \|\| isInt<12>(C) \|\| !isInt<12>(SignExtend64<32>(C)))
	return false;

	// Sign extend the constant and replace the And operand.
	C = SignExtend64<32>(C);
	BO.setOperand(1, ConstantInt::get(RHS->getType(), C));

	return true;
	}

	// LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector
	// reduction instructions write the result in the first element of a vector
	// register. So when a reduction in a loop uses a scalar phi, we end up with
	// unnecessary scalar moves:
	//
	// loop:
	// vfmv.s.f v10, fa0
	// vfredosum.vs v8, v8, v10
	// vfmv.f.s fa0, v8
	//
	// This mainly affects ordered fadd reductions, since other types of reduction
	// typically use element-wise vectorisation in the loop body. This tries to
	// vectorize any scalar phis that feed into a fadd reduction:
	//
	// loop:
	// %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]
	// %acc = call float @llvm.vector.reduce.fadd.nxv4f32(float %phi, <vscale x 2 x float> %vec)
	//
	// ->
	//
	// loop:
	// %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]
	// %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0
	// %acc = call float @llvm.vector.reduce.fadd.nxv4f32(float %x, <vscale x 2 x float> %vec)
	// %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0
	//
	// Which eliminates the scalar -> vector -> scalar crossing during instruction
	// selection.
	bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
	if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
	return false;

	auto *PHI = dyn_cast<PHINode>(I.getOperand(0));
	if (!PHI \|\| !PHI->hasOneUse() \|\|
	!llvm::is_contained(PHI->incoming_values(), &I))
	return false;

	Type *VecTy = I.getOperand(1)->getType();
	IRBuilder<> Builder(PHI);
	auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues());

	for (auto *BB : PHI->blocks()) {
	Builder.SetInsertPoint(BB->getTerminator());
	Value *InsertElt = Builder.CreateInsertElement(
	VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0);
	VecPHI->addIncoming(InsertElt, BB);
	}

	Builder.SetInsertPoint(&I);
	I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0));

	PHI->eraseFromParent();

	return true;
	}

	bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
	if (skipFunction(F))
	return false;

	auto &TPC = getAnalysis<TargetPassConfig>();
	auto &TM = TPC.getTM<RISCVTargetMachine>();
	ST = &TM.getSubtarget<RISCVSubtarget>(F);

	DL = &F.getParent()->getDataLayout();

	bool MadeChange = false;
	for (auto &BB : F)
	for (Instruction &I : llvm::make_early_inc_range(BB))
	MadeChange \|= visit(I);

	return MadeChange;
	}

	INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
	INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)

	char RISCVCodeGenPrepare::ID = 0;

	FunctionPass *llvm::createRISCVCodeGenPreparePass() {
	return new RISCVCodeGenPrepare();
	}