mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h - third_party/llvm-project - Git at Google

 //===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
 #define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_

 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"

 namespace mlir {

 //===----------------------------------------------------------------------===//
 // Helper Functions
 //===----------------------------------------------------------------------===//

 /// Find or create an external function declaration in the given module.
 LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc,
                                      OpBuilder &b, StringRef name,
                                      LLVM::LLVMFunctionType type);

 /// Create a global that contains the given string. If a global with the same
 /// string already exists in the module, return that global.
 LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc,
                                          gpu::GPUModuleOp moduleOp, Type llvmI8,
                                          StringRef namePrefix, StringRef str,
                                          uint64_t alignment = 0,
                                          unsigned addrSpace = 0);

 //===----------------------------------------------------------------------===//
 // Lowering Patterns
 //===----------------------------------------------------------------------===//

 /// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first
 /// create a 0-sized global array symbol similar as LLVM expects. It constructs
 /// a memref descriptor with these values and return it.
 struct GPUDynamicSharedMemoryOpLowering
     : public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> {
   using ConvertOpToLLVMPattern<
       gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern;
   GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter,
                                    unsigned alignmentBit = 0,
                                    PatternBenefit benefit = 1)
       : ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter, benefit),
         alignmentBit(alignmentBit) {}

   LogicalResult
   matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override;

 private:
   // Alignment bit
   unsigned alignmentBit;
 };

 struct GPUFuncOpLoweringOptions {
   /// The address space to use for `alloca`s in private memory.
   unsigned allocaAddrSpace;
   /// The address space to use declaring workgroup memory.
   unsigned workgroupAddrSpace;

   /// The attribute name to use instead of `gpu.kernel`. Null if no attribute
   /// should be used.
   StringAttr kernelAttributeName;
   /// The attribute name to to set block size. Null if no attribute should be
   /// used.
   StringAttr kernelBlockSizeAttributeName;

   /// The calling convention to use for kernel functions.
   LLVM::CConv kernelCallingConvention = LLVM::CConv::C;
   /// The calling convention to use for non-kernel functions.
   LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C;

   /// Whether to encode workgroup attributions as additional arguments instead
   /// of a global variable.
   bool encodeWorkgroupAttributionsAsArguments = false;
 };

 struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
   GPUFuncOpLowering(const LLVMTypeConverter &converter,
                     const GPUFuncOpLoweringOptions &options,
                     PatternBenefit benefit = 1)
       : ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter, benefit),
         allocaAddrSpace(options.allocaAddrSpace),
         workgroupAddrSpace(options.workgroupAddrSpace),
         kernelAttributeName(options.kernelAttributeName),
         kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName),
         kernelCallingConvention(options.kernelCallingConvention),
         nonKernelCallingConvention(options.nonKernelCallingConvention),
         encodeWorkgroupAttributionsAsArguments(
             options.encodeWorkgroupAttributionsAsArguments) {}

   LogicalResult
   matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override;

 private:
   /// The address space to use for `alloca`s in private memory.
   unsigned allocaAddrSpace;
   /// The address space to use declaring workgroup memory.
   unsigned workgroupAddrSpace;

   /// The attribute name to use instead of `gpu.kernel`. Null if no attribute
   /// should be used.
   StringAttr kernelAttributeName;
   /// The attribute name to to set block size. Null if no attribute should be
   /// used.
   StringAttr kernelBlockSizeAttributeName;

   /// The calling convention to use for kernel functions
   LLVM::CConv kernelCallingConvention;
   /// The calling convention to use for non-kernel functions
   LLVM::CConv nonKernelCallingConvention;

   /// Whether to encode workgroup attributions as additional arguments instead
   /// of a global variable.
   bool encodeWorkgroupAttributionsAsArguments;
 };

 /// The lowering of gpu.printf to a call to HIP hostcalls
 ///
 /// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have
 /// to deal with %s (even if there were first-class strings in MLIR, they're not
 /// legal input to gpu.printf) or non-constant format strings
 struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
   using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern;

   LogicalResult
   matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override;
 };

 /// The lowering of gpu.printf to a call to an external printf() function
 ///
 /// This pass will add a declaration of printf() to the GPUModule if needed
 /// and separate out the format strings into global constants. For some
 /// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler
 /// will lower printf calls to appropriate device-side code
 struct GPUPrintfOpToLLVMCallLowering
     : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
   GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter,
                                 int addressSpace = 0)
       : ConvertOpToLLVMPattern<gpu::PrintfOp>(converter),
         addressSpace(addressSpace) {}

   LogicalResult
   matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override;

 private:
   int addressSpace;
 };

 /// Lowering of gpu.printf to a vprintf standard library.
 struct GPUPrintfOpToVPrintfLowering
     : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
   using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern;

   LogicalResult
   matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override;
 };

 struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> {
   using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern;

   LogicalResult
   matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override;
 };

 namespace impl {
 /// Unrolls op to array/vector elements.
 LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands,
                                 ConversionPatternRewriter &rewriter,
                                 const LLVMTypeConverter &converter);
 } // namespace impl

 /// Unrolls SourceOp to array/vector elements.
 template <typename SourceOp>
 struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> {
 public:
   using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;

   LogicalResult
   matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     return impl::scalarizeVectorOp(op, adaptor.getOperands(), rewriter,
                                    *this->getTypeConverter());
   }
 };

 } // namespace mlir

 #endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
	//===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------- C++ ---===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	#ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
	#define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_

	#include "mlir/Conversion/LLVMCommon/Pattern.h"
	#include "mlir/Dialect/GPU/IR/GPUDialect.h"
	#include "mlir/Dialect/LLVMIR/LLVMDialect.h"

	namespace mlir {

	//===----------------------------------------------------------------------===//
	// Helper Functions
	//===----------------------------------------------------------------------===//

	/// Find or create an external function declaration in the given module.
	LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc,
	OpBuilder &b, StringRef name,
	LLVM::LLVMFunctionType type);

	/// Create a global that contains the given string. If a global with the same
	/// string already exists in the module, return that global.
	LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc,
	gpu::GPUModuleOp moduleOp, Type llvmI8,
	StringRef namePrefix, StringRef str,
	uint64_t alignment = 0,
	unsigned addrSpace = 0);

	//===----------------------------------------------------------------------===//
	// Lowering Patterns
	//===----------------------------------------------------------------------===//

	/// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first
	/// create a 0-sized global array symbol similar as LLVM expects. It constructs
	/// a memref descriptor with these values and return it.
	struct GPUDynamicSharedMemoryOpLowering
	: public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> {
	using ConvertOpToLLVMPattern<
	gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern;
	GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter,
	unsigned alignmentBit = 0,
	PatternBenefit benefit = 1)
	: ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter, benefit),
	alignmentBit(alignmentBit) {}

	LogicalResult
	matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
	ConversionPatternRewriter &rewriter) const override;

	private:
	// Alignment bit
	unsigned alignmentBit;
	};

	struct GPUFuncOpLoweringOptions {
	/// The address space to use for `alloca`s in private memory.
	unsigned allocaAddrSpace;
	/// The address space to use declaring workgroup memory.
	unsigned workgroupAddrSpace;

	/// The attribute name to use instead of `gpu.kernel`. Null if no attribute
	/// should be used.
	StringAttr kernelAttributeName;
	/// The attribute name to to set block size. Null if no attribute should be
	/// used.
	StringAttr kernelBlockSizeAttributeName;

	/// The calling convention to use for kernel functions.
	LLVM::CConv kernelCallingConvention = LLVM::CConv::C;
	/// The calling convention to use for non-kernel functions.
	LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C;

	/// Whether to encode workgroup attributions as additional arguments instead
	/// of a global variable.
	bool encodeWorkgroupAttributionsAsArguments = false;
	};

	struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
	GPUFuncOpLowering(const LLVMTypeConverter &converter,
	const GPUFuncOpLoweringOptions &options,
	PatternBenefit benefit = 1)
	: ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter, benefit),
	allocaAddrSpace(options.allocaAddrSpace),
	workgroupAddrSpace(options.workgroupAddrSpace),
	kernelAttributeName(options.kernelAttributeName),
	kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName),
	kernelCallingConvention(options.kernelCallingConvention),
	nonKernelCallingConvention(options.nonKernelCallingConvention),
	encodeWorkgroupAttributionsAsArguments(
	options.encodeWorkgroupAttributionsAsArguments) {}

	LogicalResult
	matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
	ConversionPatternRewriter &rewriter) const override;

	private:
	/// The address space to use for `alloca`s in private memory.
	unsigned allocaAddrSpace;
	/// The address space to use declaring workgroup memory.
	unsigned workgroupAddrSpace;

	/// The attribute name to use instead of `gpu.kernel`. Null if no attribute
	/// should be used.
	StringAttr kernelAttributeName;
	/// The attribute name to to set block size. Null if no attribute should be
	/// used.
	StringAttr kernelBlockSizeAttributeName;

	/// The calling convention to use for kernel functions
	LLVM::CConv kernelCallingConvention;
	/// The calling convention to use for non-kernel functions
	LLVM::CConv nonKernelCallingConvention;

	/// Whether to encode workgroup attributions as additional arguments instead
	/// of a global variable.
	bool encodeWorkgroupAttributionsAsArguments;
	};

	/// The lowering of gpu.printf to a call to HIP hostcalls
	///
	/// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have
	/// to deal with %s (even if there were first-class strings in MLIR, they're not
	/// legal input to gpu.printf) or non-constant format strings
	struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
	using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern;

	LogicalResult
	matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
	ConversionPatternRewriter &rewriter) const override;
	};

	/// The lowering of gpu.printf to a call to an external printf() function
	///
	/// This pass will add a declaration of printf() to the GPUModule if needed
	/// and separate out the format strings into global constants. For some
	/// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler
	/// will lower printf calls to appropriate device-side code
	struct GPUPrintfOpToLLVMCallLowering
	: public ConvertOpToLLVMPattern<gpu::PrintfOp> {
	GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter,
	int addressSpace = 0)
	: ConvertOpToLLVMPattern<gpu::PrintfOp>(converter),
	addressSpace(addressSpace) {}

	LogicalResult
	matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
	ConversionPatternRewriter &rewriter) const override;

	private:
	int addressSpace;
	};

	/// Lowering of gpu.printf to a vprintf standard library.
	struct GPUPrintfOpToVPrintfLowering
	: public ConvertOpToLLVMPattern<gpu::PrintfOp> {
	using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern;

	LogicalResult
	matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
	ConversionPatternRewriter &rewriter) const override;
	};

	struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> {
	using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern;

	LogicalResult
	matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor,
	ConversionPatternRewriter &rewriter) const override;
	};

	namespace impl {
	/// Unrolls op to array/vector elements.
	LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands,
	ConversionPatternRewriter &rewriter,
	const LLVMTypeConverter &converter);
	} // namespace impl

	/// Unrolls SourceOp to array/vector elements.
	template <typename SourceOp>
	struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> {
	public:
	using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;

	LogicalResult
	matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
	ConversionPatternRewriter &rewriter) const override {
	return impl::scalarizeVectorOp(op, adaptor.getOperands(), rewriter,
	*this->getTypeConverter());
	}
	};

	} // namespace mlir

	#endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_