mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp - third_party/llvm-project - Git at Google

 //===- ParallelLoopCollapsing.cpp - Pass collapsing parallel loop indices -===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/SCF/Transforms/Passes.h"

 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/SCF/Utils/Utils.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"

 namespace mlir {
 #define GEN_PASS_DEF_TESTSCFPARALLELLOOPCOLLAPSING
 #include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
 } // namespace mlir

 #define DEBUG_TYPE "parallel-loop-collapsing"

 using namespace mlir;

 namespace {
 struct TestSCFParallelLoopCollapsing
     : public impl::TestSCFParallelLoopCollapsingBase<
           TestSCFParallelLoopCollapsing> {

   void runOnOperation() override {
     Operation *module = getOperation();

     // The common case for GPU dialect will be simplifying the ParallelOp to 3
     // arguments, so we do that here to simplify things.
     llvm::SmallVector<std::vector<unsigned>, 3> combinedLoops;

     // Gather the input args into the format required by
     // `collapseParallelLoops`.
     if (!clCollapsedIndices0.empty())
       combinedLoops.push_back(clCollapsedIndices0);
     if (!clCollapsedIndices1.empty()) {
       if (clCollapsedIndices0.empty()) {
         llvm::errs()
             << "collapsed-indices-1 specified but not collapsed-indices-0";
         signalPassFailure();
         return;
       }
       combinedLoops.push_back(clCollapsedIndices1);
     }
     if (!clCollapsedIndices2.empty()) {
       if (clCollapsedIndices1.empty()) {
         llvm::errs()
             << "collapsed-indices-2 specified but not collapsed-indices-1";
         signalPassFailure();
         return;
       }
       combinedLoops.push_back(clCollapsedIndices2);
     }

     if (combinedLoops.empty()) {
       llvm::errs() << "No collapsed-indices were specified. This pass is only "
                       "for testing and does not automatically collapse all "
                       "parallel loops or similar.";
       signalPassFailure();
       return;
     }

     // Confirm that the specified loops are [0,N) by testing that N values exist
     // with the maximum value being N-1.
     llvm::SmallSet<unsigned, 8> flattenedCombinedLoops;
     unsigned maxCollapsedIndex = 0;
     for (auto &loops : combinedLoops) {
       for (auto &loop : loops) {
         flattenedCombinedLoops.insert(loop);
         maxCollapsedIndex = std::max(maxCollapsedIndex, loop);
       }
     }

     if (maxCollapsedIndex != flattenedCombinedLoops.size() - 1 ||
         !flattenedCombinedLoops.contains(maxCollapsedIndex)) {
       llvm::errs()
           << "collapsed-indices arguments must include all values [0,N).";
       signalPassFailure();
       return;
     }

     // Only apply the transformation on parallel loops where the specified
     // transformation is valid, but do NOT early abort in the case of invalid
     // loops.
     IRRewriter rewriter(&getContext());
     module->walk([&](scf::ParallelOp op) {
       if (flattenedCombinedLoops.size() != op.getNumLoops()) {
         op.emitOpError("has ")
             << op.getNumLoops()
             << " iter args while this limited functionality testing pass was "
                "configured only for loops with exactly "
             << flattenedCombinedLoops.size() << " iter args.";
         return;
       }
       collapseParallelLoops(rewriter, op, combinedLoops);
     });
   }
 };
 } // namespace

 std::unique_ptr<Pass> mlir::createTestSCFParallelLoopCollapsingPass() {
   return std::make_unique<TestSCFParallelLoopCollapsing>();
 }
	//===- ParallelLoopCollapsing.cpp - Pass collapsing parallel loop indices -===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Dialect/SCF/Transforms/Passes.h"

	#include "mlir/Dialect/SCF/IR/SCF.h"
	#include "mlir/Dialect/SCF/Utils/Utils.h"
	#include "mlir/Transforms/RegionUtils.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"

	namespace mlir {
	#define GEN_PASS_DEF_TESTSCFPARALLELLOOPCOLLAPSING
	#include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
	} // namespace mlir

	#define DEBUG_TYPE "parallel-loop-collapsing"

	using namespace mlir;

	namespace {
	struct TestSCFParallelLoopCollapsing
	: public impl::TestSCFParallelLoopCollapsingBase<
	TestSCFParallelLoopCollapsing> {

	void runOnOperation() override {
	Operation *module = getOperation();

	// The common case for GPU dialect will be simplifying the ParallelOp to 3
	// arguments, so we do that here to simplify things.
	llvm::SmallVector<std::vector<unsigned>, 3> combinedLoops;

	// Gather the input args into the format required by
	// `collapseParallelLoops`.
	if (!clCollapsedIndices0.empty())
	combinedLoops.push_back(clCollapsedIndices0);
	if (!clCollapsedIndices1.empty()) {
	if (clCollapsedIndices0.empty()) {
	llvm::errs()
	<< "collapsed-indices-1 specified but not collapsed-indices-0";
	signalPassFailure();
	return;
	}
	combinedLoops.push_back(clCollapsedIndices1);
	}
	if (!clCollapsedIndices2.empty()) {
	if (clCollapsedIndices1.empty()) {
	llvm::errs()
	<< "collapsed-indices-2 specified but not collapsed-indices-1";
	signalPassFailure();
	return;
	}
	combinedLoops.push_back(clCollapsedIndices2);
	}

	if (combinedLoops.empty()) {
	llvm::errs() << "No collapsed-indices were specified. This pass is only "
	"for testing and does not automatically collapse all "
	"parallel loops or similar.";
	signalPassFailure();
	return;
	}

	// Confirm that the specified loops are [0,N) by testing that N values exist
	// with the maximum value being N-1.
	llvm::SmallSet<unsigned, 8> flattenedCombinedLoops;
	unsigned maxCollapsedIndex = 0;
	for (auto &loops : combinedLoops) {
	for (auto &loop : loops) {
	flattenedCombinedLoops.insert(loop);
	maxCollapsedIndex = std::max(maxCollapsedIndex, loop);
	}
	}

	if (maxCollapsedIndex != flattenedCombinedLoops.size() - 1 \|\|
	!flattenedCombinedLoops.contains(maxCollapsedIndex)) {
	llvm::errs()
	<< "collapsed-indices arguments must include all values [0,N).";
	signalPassFailure();
	return;
	}

	// Only apply the transformation on parallel loops where the specified
	// transformation is valid, but do NOT early abort in the case of invalid
	// loops.
	IRRewriter rewriter(&getContext());
	module->walk([&](scf::ParallelOp op) {
	if (flattenedCombinedLoops.size() != op.getNumLoops()) {
	op.emitOpError("has ")
	<< op.getNumLoops()
	<< " iter args while this limited functionality testing pass was "
	"configured only for loops with exactly "
	<< flattenedCombinedLoops.size() << " iter args.";
	return;
	}
	collapseParallelLoops(rewriter, op, combinedLoops);
	});
	}
	};
	} // namespace

	std::unique_ptr<Pass> mlir::createTestSCFParallelLoopCollapsingPass() {
	return std::make_unique<TestSCFParallelLoopCollapsing>();
	}