mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp - third_party/llvm-project - Git at Google

 //===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"

 #include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/CommandLine.h"

 namespace mlir {
 #define GEN_PASS_DEF_CONVERTAFFINEFORTOGPUPASS
 #define GEN_PASS_DEF_CONVERTPARALLELLOOPTOGPUPASS
 #include "mlir/Conversion/Passes.h.inc"
 } // namespace mlir

 using namespace mlir;
 using namespace mlir::scf;

 namespace {
 // A pass that traverses top-level loops in the function and converts them to
 // GPU launch operations.  Nested launches are not allowed, so this does not
 // walk the function recursively to avoid considering nested loops.
 struct ForLoopMapper
     : public impl::ConvertAffineForToGPUPassBase<ForLoopMapper> {
   using Base::Base;

   void runOnOperation() override {
     for (Operation &op : llvm::make_early_inc_range(
              getOperation().getFunctionBody().getOps())) {
       if (auto forOp = dyn_cast<affine::AffineForOp>(&op)) {
         if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
                                                     numThreadDims)))
           signalPassFailure();
       }
     }
   }
 };

 struct ParallelLoopToGpuPass
     : public impl::ConvertParallelLoopToGpuPassBase<ParallelLoopToGpuPass> {
   void runOnOperation() override {
     RewritePatternSet patterns(&getContext());
     populateParallelLoopToGPUPatterns(patterns);
     ConversionTarget target(getContext());
     target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
     configureParallelLoopToGPULegality(target);
     if (failed(applyPartialConversion(getOperation(), target,
                                       std::move(patterns))))
       signalPassFailure();
     finalizeParallelLoopToGPUConversion(getOperation());
   }
 };

 } // namespace
	//===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"

	#include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
	#include "mlir/Dialect/Affine/IR/AffineOps.h"
	#include "mlir/Dialect/Arith/IR/Arith.h"
	#include "mlir/Dialect/Complex/IR/Complex.h"
	#include "mlir/Dialect/GPU/IR/GPUDialect.h"
	#include "mlir/Dialect/SCF/IR/SCF.h"
	#include "mlir/Pass/Pass.h"
	#include "mlir/Transforms/DialectConversion.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/Support/CommandLine.h"

	namespace mlir {
	#define GEN_PASS_DEF_CONVERTAFFINEFORTOGPUPASS
	#define GEN_PASS_DEF_CONVERTPARALLELLOOPTOGPUPASS
	#include "mlir/Conversion/Passes.h.inc"
	} // namespace mlir

	using namespace mlir;
	using namespace mlir::scf;

	namespace {
	// A pass that traverses top-level loops in the function and converts them to
	// GPU launch operations. Nested launches are not allowed, so this does not
	// walk the function recursively to avoid considering nested loops.
	struct ForLoopMapper
	: public impl::ConvertAffineForToGPUPassBase<ForLoopMapper> {
	using Base::Base;

	void runOnOperation() override {
	for (Operation &op : llvm::make_early_inc_range(
	getOperation().getFunctionBody().getOps())) {
	if (auto forOp = dyn_cast<affine::AffineForOp>(&op)) {
	if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
	numThreadDims)))
	signalPassFailure();
	}
	}
	}
	};

	struct ParallelLoopToGpuPass
	: public impl::ConvertParallelLoopToGpuPassBase<ParallelLoopToGpuPass> {
	void runOnOperation() override {
	RewritePatternSet patterns(&getContext());
	populateParallelLoopToGPUPatterns(patterns);
	ConversionTarget target(getContext());
	target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
	configureParallelLoopToGPULegality(target);
	if (failed(applyPartialConversion(getOperation(), target,
	std::move(patterns))))
	signalPassFailure();
	finalizeParallelLoopToGPUConversion(getOperation());
	}
	};

	} // namespace