| //===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| #ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ |
| #define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ |
| |
| #include "mlir/Conversion/LLVMCommon/Pattern.h" |
| #include "mlir/Dialect/GPU/IR/GPUDialect.h" |
| #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
| |
| namespace mlir { |
| |
| //===----------------------------------------------------------------------===// |
| // Helper Functions |
| //===----------------------------------------------------------------------===// |
| |
| /// Find or create an external function declaration in the given module. |
| LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc, |
| OpBuilder &b, StringRef name, |
| LLVM::LLVMFunctionType type); |
| |
| /// Create a global that contains the given string. If a global with the same |
| /// string already exists in the module, return that global. |
| LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, |
| gpu::GPUModuleOp moduleOp, Type llvmI8, |
| StringRef namePrefix, StringRef str, |
| uint64_t alignment = 0, |
| unsigned addrSpace = 0); |
| |
| //===----------------------------------------------------------------------===// |
| // Lowering Patterns |
| //===----------------------------------------------------------------------===// |
| |
| /// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first |
| /// create a 0-sized global array symbol similar as LLVM expects. It constructs |
| /// a memref descriptor with these values and return it. |
| struct GPUDynamicSharedMemoryOpLowering |
| : public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> { |
| using ConvertOpToLLVMPattern< |
| gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern; |
| GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter, |
| unsigned alignmentBit = 0, |
| PatternBenefit benefit = 1) |
| : ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter, benefit), |
| alignmentBit(alignmentBit) {} |
| |
| LogicalResult |
| matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, |
| ConversionPatternRewriter &rewriter) const override; |
| |
| private: |
| // Alignment bit |
| unsigned alignmentBit; |
| }; |
| |
| struct GPUFuncOpLoweringOptions { |
| /// The address space to use for `alloca`s in private memory. |
| unsigned allocaAddrSpace; |
| /// The address space to use declaring workgroup memory. |
| unsigned workgroupAddrSpace; |
| |
| /// The attribute name to use instead of `gpu.kernel`. Null if no attribute |
| /// should be used. |
| StringAttr kernelAttributeName; |
| /// The attribute name to to set block size. Null if no attribute should be |
| /// used. |
| StringAttr kernelBlockSizeAttributeName; |
| |
| /// The calling convention to use for kernel functions. |
| LLVM::CConv kernelCallingConvention = LLVM::CConv::C; |
| /// The calling convention to use for non-kernel functions. |
| LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C; |
| |
| /// Whether to encode workgroup attributions as additional arguments instead |
| /// of a global variable. |
| bool encodeWorkgroupAttributionsAsArguments = false; |
| }; |
| |
| struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> { |
| GPUFuncOpLowering(const LLVMTypeConverter &converter, |
| const GPUFuncOpLoweringOptions &options, |
| PatternBenefit benefit = 1) |
| : ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter, benefit), |
| allocaAddrSpace(options.allocaAddrSpace), |
| workgroupAddrSpace(options.workgroupAddrSpace), |
| kernelAttributeName(options.kernelAttributeName), |
| kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName), |
| kernelCallingConvention(options.kernelCallingConvention), |
| nonKernelCallingConvention(options.nonKernelCallingConvention), |
| encodeWorkgroupAttributionsAsArguments( |
| options.encodeWorkgroupAttributionsAsArguments) {} |
| |
| LogicalResult |
| matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, |
| ConversionPatternRewriter &rewriter) const override; |
| |
| private: |
| /// The address space to use for `alloca`s in private memory. |
| unsigned allocaAddrSpace; |
| /// The address space to use declaring workgroup memory. |
| unsigned workgroupAddrSpace; |
| |
| /// The attribute name to use instead of `gpu.kernel`. Null if no attribute |
| /// should be used. |
| StringAttr kernelAttributeName; |
| /// The attribute name to to set block size. Null if no attribute should be |
| /// used. |
| StringAttr kernelBlockSizeAttributeName; |
| |
| /// The calling convention to use for kernel functions |
| LLVM::CConv kernelCallingConvention; |
| /// The calling convention to use for non-kernel functions |
| LLVM::CConv nonKernelCallingConvention; |
| |
| /// Whether to encode workgroup attributions as additional arguments instead |
| /// of a global variable. |
| bool encodeWorkgroupAttributionsAsArguments; |
| }; |
| |
| /// The lowering of gpu.printf to a call to HIP hostcalls |
| /// |
| /// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have |
| /// to deal with %s (even if there were first-class strings in MLIR, they're not |
| /// legal input to gpu.printf) or non-constant format strings |
| struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern<gpu::PrintfOp> { |
| using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern; |
| |
| LogicalResult |
| matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, |
| ConversionPatternRewriter &rewriter) const override; |
| }; |
| |
| /// The lowering of gpu.printf to a call to an external printf() function |
| /// |
| /// This pass will add a declaration of printf() to the GPUModule if needed |
| /// and separate out the format strings into global constants. For some |
| /// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler |
| /// will lower printf calls to appropriate device-side code |
| struct GPUPrintfOpToLLVMCallLowering |
| : public ConvertOpToLLVMPattern<gpu::PrintfOp> { |
| GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter, |
| int addressSpace = 0) |
| : ConvertOpToLLVMPattern<gpu::PrintfOp>(converter), |
| addressSpace(addressSpace) {} |
| |
| LogicalResult |
| matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, |
| ConversionPatternRewriter &rewriter) const override; |
| |
| private: |
| int addressSpace; |
| }; |
| |
| /// Lowering of gpu.printf to a vprintf standard library. |
| struct GPUPrintfOpToVPrintfLowering |
| : public ConvertOpToLLVMPattern<gpu::PrintfOp> { |
| using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern; |
| |
| LogicalResult |
| matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, |
| ConversionPatternRewriter &rewriter) const override; |
| }; |
| |
| struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> { |
| using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern; |
| |
| LogicalResult |
| matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, |
| ConversionPatternRewriter &rewriter) const override; |
| }; |
| |
| namespace impl { |
| /// Unrolls op to array/vector elements. |
| LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, |
| ConversionPatternRewriter &rewriter, |
| const LLVMTypeConverter &converter); |
| } // namespace impl |
| |
| /// Unrolls SourceOp to array/vector elements. |
| template <typename SourceOp> |
| struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> { |
| public: |
| using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern; |
| |
| LogicalResult |
| matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, |
| ConversionPatternRewriter &rewriter) const override { |
| return impl::scalarizeVectorOp(op, adaptor.getOperands(), rewriter, |
| *this->getTypeConverter()); |
| } |
| }; |
| |
| } // namespace mlir |
| |
| #endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ |