mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp - third_party/llvm-project - Git at Google

 //===- TestLowerToNVVM.cpp - Test lowering to NVVM as a sink pass ---------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements a pass for testing the lowering to NVVM as a generally
 // usable sink pass.
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
 #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
 #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
 #include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
 #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
 #include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h"
 #include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h"
 #include "mlir/Conversion/NVVMToLLVM/NVVMToLLVM.h"
 #include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
 #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h"
 #include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/Transforms/Passes.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Pass/PassOptions.h"
 #include "mlir/Transforms/Passes.h"

 using namespace mlir;

 #if MLIR_CUDA_CONVERSIONS_ENABLED
 namespace {
 struct TestLowerToNVVMOptions
     : public PassPipelineOptions<TestLowerToNVVMOptions> {
   PassOptions::Option<int64_t> indexBitWidth{
       *this, "index-bitwidth",
       llvm::cl::desc("Bitwidth of the index type for the host (warning this "
                      "should be 64 until the GPU layering is fixed)"),
       llvm::cl::init(64)};
   PassOptions::Option<std::string> cubinTriple{
       *this, "cubin-triple",
       llvm::cl::desc("Triple to use to serialize to cubin."),
       llvm::cl::init("nvptx64-nvidia-cuda")};
   PassOptions::Option<std::string> cubinChip{
       *this, "cubin-chip", llvm::cl::desc("Chip to use to serialize to cubin."),
       llvm::cl::init("sm_50")};
   PassOptions::Option<std::string> cubinFeatures{
       *this, "cubin-features",
       llvm::cl::desc("Features to use to serialize to cubin."),
       llvm::cl::init("+ptx60")};
   PassOptions::Option<std::string> cubinFormat{
       *this, "cubin-format",
       llvm::cl::desc("Compilation format to use to serialize to cubin."),
       llvm::cl::init("fatbin")};
   PassOptions::Option<int> optLevel{
       *this, "opt-level",
       llvm::cl::desc("Optimization level for NVVM compilation"),
       llvm::cl::init(2)};
   PassOptions::Option<bool> kernelUseBarePtrCallConv{
       *this, "kernel-bare-ptr-calling-convention",
       llvm::cl::desc(
           "Whether to use the bareptr calling convention on the kernel "
           "(warning this should be false until the GPU layering is fixed)"),
       llvm::cl::init(false)};
   PassOptions::Option<bool> hostUseBarePtrCallConv{
       *this, "host-bare-ptr-calling-convention",
       llvm::cl::desc(
           "Whether to use the bareptr calling convention on the host (warning "
           "this should be false until the GPU layering is fixed)"),
       llvm::cl::init(false)};
 };

 //===----------------------------------------------------------------------===//
 // Common pipeline
 //===----------------------------------------------------------------------===//
 void buildCommonPassPipeline(OpPassManager &pm,
                              const TestLowerToNVVMOptions &options) {
   pm.addPass(createConvertNVGPUToNVVMPass());
   pm.addPass(createGpuKernelOutliningPass());
   pm.addPass(createConvertLinalgToLoopsPass());
   pm.addPass(createConvertVectorToSCFPass());
   pm.addPass(createConvertSCFToCFPass());
   pm.addPass(createConvertNVVMToLLVMPass());
   pm.addPass(createConvertVectorToLLVMPass());
   pm.addPass(createConvertMathToLLVMPass());
   pm.addPass(createFinalizeMemRefToLLVMConversionPass());
   pm.addPass(createConvertFuncToLLVMPass());
   pm.addPass(memref::createExpandStridedMetadataPass());

   GpuNVVMAttachTargetOptions nvvmTargetOptions;
   nvvmTargetOptions.triple = options.cubinTriple;
   nvvmTargetOptions.chip = options.cubinChip;
   nvvmTargetOptions.features = options.cubinFeatures;
   nvvmTargetOptions.optLevel = options.optLevel;
   pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
   pm.addPass(createLowerAffinePass());
   pm.addPass(createArithToLLVMConversionPass());
   ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
   convertIndexToLLVMPassOpt.indexBitwidth = options.indexBitWidth;
   pm.addPass(createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt));
   pm.addPass(createCanonicalizerPass());
   pm.addPass(createCSEPass());
 }

 //===----------------------------------------------------------------------===//
 // GPUModule-specific stuff.
 //===----------------------------------------------------------------------===//
 void buildGpuPassPipeline(OpPassManager &pm,
                           const TestLowerToNVVMOptions &options) {
   pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
   ConvertGpuOpsToNVVMOpsOptions opt;
   opt.useBarePtrCallConv = options.kernelUseBarePtrCallConv;
   opt.indexBitwidth = options.indexBitWidth;
   pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps(opt));
   pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
   pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
   pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
 }

 //===----------------------------------------------------------------------===//
 // Host Post-GPU pipeline
 //===----------------------------------------------------------------------===//
 void buildHostPostPipeline(OpPassManager &pm,
                            const TestLowerToNVVMOptions &options) {
   GpuToLLVMConversionPassOptions opt;
   opt.hostBarePtrCallConv = options.hostUseBarePtrCallConv;
   opt.kernelBarePtrCallConv = options.kernelUseBarePtrCallConv;
   pm.addPass(createGpuToLLVMConversionPass(opt));

   GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
   gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
   pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
   pm.addPass(createCanonicalizerPass());
   pm.addPass(createCSEPass());
   pm.addPass(createReconcileUnrealizedCastsPass());
 }

 void buildLowerToNVVMPassPipeline(OpPassManager &pm,
                                   const TestLowerToNVVMOptions &options) {
   //===----------------------------------------------------------------------===//
   // Common pipeline
   //===----------------------------------------------------------------------===//
   buildCommonPassPipeline(pm, options);

   //===----------------------------------------------------------------------===//
   // GPUModule-specific stuff.
   //===----------------------------------------------------------------------===//
   buildGpuPassPipeline(pm, options);

   //===----------------------------------------------------------------------===//
   // Host post-GPUModule-specific stuff.
   //===----------------------------------------------------------------------===//
   buildHostPostPipeline(pm, options);
 }
 } // namespace

 namespace mlir {
 namespace test {
 void registerTestLowerToNVVM() {
   PassPipelineRegistration<TestLowerToNVVMOptions>(
       "test-lower-to-nvvm",
       "An example of pipeline to lower the main dialects (arith, linalg, "
       "memref, scf, vector) down to NVVM.",
       buildLowerToNVVMPassPipeline);
 }
 } // namespace test
 } // namespace mlir
 #endif // MLIR_CUDA_CONVERSIONS_ENABLED
	//===- TestLowerToNVVM.cpp - Test lowering to NVVM as a sink pass ---------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements a pass for testing the lowering to NVVM as a generally
	// usable sink pass.
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
	#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
	#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
	#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
	#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
	#include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h"
	#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
	#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
	#include "mlir/Conversion/NVGPUToNVVM/NVGPUToNVVM.h"
	#include "mlir/Conversion/NVVMToLLVM/NVVMToLLVM.h"
	#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
	#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
	#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h"
	#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
	#include "mlir/Dialect/Func/IR/FuncOps.h"
	#include "mlir/Dialect/GPU/IR/GPUDialect.h"
	#include "mlir/Dialect/GPU/Transforms/Passes.h"
	#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
	#include "mlir/Dialect/Linalg/Passes.h"
	#include "mlir/Dialect/MemRef/Transforms/Passes.h"
	#include "mlir/Pass/PassManager.h"
	#include "mlir/Pass/PassOptions.h"
	#include "mlir/Transforms/Passes.h"

	using namespace mlir;

	#if MLIR_CUDA_CONVERSIONS_ENABLED
	namespace {
	struct TestLowerToNVVMOptions
	: public PassPipelineOptions<TestLowerToNVVMOptions> {
	PassOptions::Option<int64_t> indexBitWidth{
	*this, "index-bitwidth",
	llvm::cl::desc("Bitwidth of the index type for the host (warning this "
	"should be 64 until the GPU layering is fixed)"),
	llvm::cl::init(64)};
	PassOptions::Option<std::string> cubinTriple{
	*this, "cubin-triple",
	llvm::cl::desc("Triple to use to serialize to cubin."),
	llvm::cl::init("nvptx64-nvidia-cuda")};
	PassOptions::Option<std::string> cubinChip{
	*this, "cubin-chip", llvm::cl::desc("Chip to use to serialize to cubin."),
	llvm::cl::init("sm_50")};
	PassOptions::Option<std::string> cubinFeatures{
	*this, "cubin-features",
	llvm::cl::desc("Features to use to serialize to cubin."),
	llvm::cl::init("+ptx60")};
	PassOptions::Option<std::string> cubinFormat{
	*this, "cubin-format",
	llvm::cl::desc("Compilation format to use to serialize to cubin."),
	llvm::cl::init("fatbin")};
	PassOptions::Option<int> optLevel{
	*this, "opt-level",
	llvm::cl::desc("Optimization level for NVVM compilation"),
	llvm::cl::init(2)};
	PassOptions::Option<bool> kernelUseBarePtrCallConv{
	*this, "kernel-bare-ptr-calling-convention",
	llvm::cl::desc(
	"Whether to use the bareptr calling convention on the kernel "
	"(warning this should be false until the GPU layering is fixed)"),
	llvm::cl::init(false)};
	PassOptions::Option<bool> hostUseBarePtrCallConv{
	*this, "host-bare-ptr-calling-convention",
	llvm::cl::desc(
	"Whether to use the bareptr calling convention on the host (warning "
	"this should be false until the GPU layering is fixed)"),
	llvm::cl::init(false)};
	};

	//===----------------------------------------------------------------------===//
	// Common pipeline
	//===----------------------------------------------------------------------===//
	void buildCommonPassPipeline(OpPassManager &pm,
	const TestLowerToNVVMOptions &options) {
	pm.addPass(createConvertNVGPUToNVVMPass());
	pm.addPass(createGpuKernelOutliningPass());
	pm.addPass(createConvertLinalgToLoopsPass());
	pm.addPass(createConvertVectorToSCFPass());
	pm.addPass(createConvertSCFToCFPass());
	pm.addPass(createConvertNVVMToLLVMPass());
	pm.addPass(createConvertVectorToLLVMPass());
	pm.addPass(createConvertMathToLLVMPass());
	pm.addPass(createFinalizeMemRefToLLVMConversionPass());
	pm.addPass(createConvertFuncToLLVMPass());
	pm.addPass(memref::createExpandStridedMetadataPass());

	GpuNVVMAttachTargetOptions nvvmTargetOptions;
	nvvmTargetOptions.triple = options.cubinTriple;
	nvvmTargetOptions.chip = options.cubinChip;
	nvvmTargetOptions.features = options.cubinFeatures;
	nvvmTargetOptions.optLevel = options.optLevel;
	pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
	pm.addPass(createLowerAffinePass());
	pm.addPass(createArithToLLVMConversionPass());
	ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
	convertIndexToLLVMPassOpt.indexBitwidth = options.indexBitWidth;
	pm.addPass(createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt));
	pm.addPass(createCanonicalizerPass());
	pm.addPass(createCSEPass());
	}

	//===----------------------------------------------------------------------===//
	// GPUModule-specific stuff.
	//===----------------------------------------------------------------------===//
	void buildGpuPassPipeline(OpPassManager &pm,
	const TestLowerToNVVMOptions &options) {
	pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
	ConvertGpuOpsToNVVMOpsOptions opt;
	opt.useBarePtrCallConv = options.kernelUseBarePtrCallConv;
	opt.indexBitwidth = options.indexBitWidth;
	pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps(opt));
	pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
	pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
	pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
	}

	//===----------------------------------------------------------------------===//
	// Host Post-GPU pipeline
	//===----------------------------------------------------------------------===//
	void buildHostPostPipeline(OpPassManager &pm,
	const TestLowerToNVVMOptions &options) {
	GpuToLLVMConversionPassOptions opt;
	opt.hostBarePtrCallConv = options.hostUseBarePtrCallConv;
	opt.kernelBarePtrCallConv = options.kernelUseBarePtrCallConv;
	pm.addPass(createGpuToLLVMConversionPass(opt));

	GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
	gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
	pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
	pm.addPass(createCanonicalizerPass());
	pm.addPass(createCSEPass());
	pm.addPass(createReconcileUnrealizedCastsPass());
	}

	void buildLowerToNVVMPassPipeline(OpPassManager &pm,
	const TestLowerToNVVMOptions &options) {
	//===----------------------------------------------------------------------===//
	// Common pipeline
	//===----------------------------------------------------------------------===//
	buildCommonPassPipeline(pm, options);

	//===----------------------------------------------------------------------===//
	// GPUModule-specific stuff.
	//===----------------------------------------------------------------------===//
	buildGpuPassPipeline(pm, options);

	//===----------------------------------------------------------------------===//
	// Host post-GPUModule-specific stuff.
	//===----------------------------------------------------------------------===//
	buildHostPostPipeline(pm, options);
	}
	} // namespace

	namespace mlir {
	namespace test {
	void registerTestLowerToNVVM() {
	PassPipelineRegistration<TestLowerToNVVMOptions>(
	"test-lower-to-nvvm",
	"An example of pipeline to lower the main dialects (arith, linalg, "
	"memref, scf, vector) down to NVVM.",
	buildLowerToNVVMPassPipeline);
	}
	} // namespace test
	} // namespace mlir
	#endif // MLIR_CUDA_CONVERSIONS_ENABLED